{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.984308131241086, "eval_steps": 500, "global_step": 11025, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0019020446980504042, "grad_norm": 0.9932524561882019, "learning_rate": 2e-05, "loss": 1.3348, "step": 1 }, { "epoch": 0.0038040893961008085, "grad_norm": 0.9241018295288086, "learning_rate": 4e-05, "loss": 1.3131, "step": 2 }, { "epoch": 0.005706134094151213, "grad_norm": 1.1556137800216675, "learning_rate": 6e-05, "loss": 1.5644, "step": 3 }, { "epoch": 0.007608178792201617, "grad_norm": 0.8612737059593201, "learning_rate": 8e-05, "loss": 1.2192, "step": 4 }, { "epoch": 0.009510223490252021, "grad_norm": 0.8998388648033142, "learning_rate": 0.0001, "loss": 1.3651, "step": 5 }, { "epoch": 0.011412268188302425, "grad_norm": 0.7211980819702148, "learning_rate": 9.999364877738964e-05, "loss": 1.2525, "step": 6 }, { "epoch": 0.01331431288635283, "grad_norm": 0.44894707202911377, "learning_rate": 9.998729755477931e-05, "loss": 1.1999, "step": 7 }, { "epoch": 0.015216357584403234, "grad_norm": 0.4338511824607849, "learning_rate": 9.998094633216895e-05, "loss": 1.0147, "step": 8 }, { "epoch": 0.017118402282453638, "grad_norm": 0.5658989548683167, "learning_rate": 9.99745951095586e-05, "loss": 1.1997, "step": 9 }, { "epoch": 0.019020446980504042, "grad_norm": 0.4467356503009796, "learning_rate": 9.996824388694824e-05, "loss": 1.0424, "step": 10 }, { "epoch": 0.020922491678554447, "grad_norm": 0.3743385374546051, "learning_rate": 9.996189266433789e-05, "loss": 1.0902, "step": 11 }, { "epoch": 0.02282453637660485, "grad_norm": 0.30667275190353394, "learning_rate": 9.995554144172754e-05, "loss": 0.8736, "step": 12 }, { "epoch": 0.024726581074655255, "grad_norm": 0.48634254932403564, "learning_rate": 9.994919021911718e-05, "loss": 0.977, "step": 13 }, { "epoch": 0.02662862577270566, "grad_norm": 0.4229658246040344, "learning_rate": 9.994283899650683e-05, "loss": 0.9673, "step": 14 }, { "epoch": 0.028530670470756064, "grad_norm": 0.39269882440567017, "learning_rate": 9.993648777389648e-05, "loss": 1.0001, "step": 15 }, { "epoch": 0.030432715168806468, "grad_norm": 0.38597363233566284, "learning_rate": 9.993013655128612e-05, "loss": 0.9705, "step": 16 }, { "epoch": 0.03233475986685687, "grad_norm": 0.40809136629104614, "learning_rate": 9.992378532867577e-05, "loss": 0.9246, "step": 17 }, { "epoch": 0.034236804564907276, "grad_norm": 0.4431133270263672, "learning_rate": 9.991743410606542e-05, "loss": 1.0409, "step": 18 }, { "epoch": 0.03613884926295768, "grad_norm": 0.5659255981445312, "learning_rate": 9.991108288345506e-05, "loss": 1.1118, "step": 19 }, { "epoch": 0.038040893961008085, "grad_norm": 0.4943106472492218, "learning_rate": 9.990473166084471e-05, "loss": 0.9213, "step": 20 }, { "epoch": 0.039942938659058486, "grad_norm": 0.48820945620536804, "learning_rate": 9.989838043823437e-05, "loss": 0.9108, "step": 21 }, { "epoch": 0.04184498335710889, "grad_norm": 0.4464576542377472, "learning_rate": 9.989202921562402e-05, "loss": 0.8959, "step": 22 }, { "epoch": 0.043747028055159294, "grad_norm": 0.3870016038417816, "learning_rate": 9.988567799301366e-05, "loss": 0.8013, "step": 23 }, { "epoch": 0.0456490727532097, "grad_norm": 0.42381179332733154, "learning_rate": 9.987932677040331e-05, "loss": 0.8584, "step": 24 }, { "epoch": 0.0475511174512601, "grad_norm": 0.37170907855033875, "learning_rate": 9.987297554779296e-05, "loss": 0.7849, "step": 25 }, { "epoch": 0.04945316214931051, "grad_norm": 0.4516700506210327, "learning_rate": 9.98666243251826e-05, "loss": 0.8902, "step": 26 }, { "epoch": 0.05135520684736091, "grad_norm": 0.3525027334690094, "learning_rate": 9.986027310257225e-05, "loss": 0.6029, "step": 27 }, { "epoch": 0.05325725154541132, "grad_norm": 0.437707781791687, "learning_rate": 9.98539218799619e-05, "loss": 0.7387, "step": 28 }, { "epoch": 0.05515929624346172, "grad_norm": 0.45205071568489075, "learning_rate": 9.984757065735154e-05, "loss": 0.7468, "step": 29 }, { "epoch": 0.05706134094151213, "grad_norm": 0.3709086775779724, "learning_rate": 9.984121943474119e-05, "loss": 0.7365, "step": 30 }, { "epoch": 0.05896338563956253, "grad_norm": 0.4089844822883606, "learning_rate": 9.983486821213084e-05, "loss": 0.6563, "step": 31 }, { "epoch": 0.060865430337612936, "grad_norm": 0.45955532789230347, "learning_rate": 9.982851698952048e-05, "loss": 0.8021, "step": 32 }, { "epoch": 0.06276747503566334, "grad_norm": 0.5240988731384277, "learning_rate": 9.982216576691013e-05, "loss": 0.6933, "step": 33 }, { "epoch": 0.06466951973371374, "grad_norm": 0.4703526496887207, "learning_rate": 9.981581454429977e-05, "loss": 0.7339, "step": 34 }, { "epoch": 0.06657156443176415, "grad_norm": 0.5659805536270142, "learning_rate": 9.980946332168944e-05, "loss": 0.8139, "step": 35 }, { "epoch": 0.06847360912981455, "grad_norm": 0.39259326457977295, "learning_rate": 9.980311209907908e-05, "loss": 0.5838, "step": 36 }, { "epoch": 0.07037565382786495, "grad_norm": 0.4165003001689911, "learning_rate": 9.979676087646871e-05, "loss": 0.674, "step": 37 }, { "epoch": 0.07227769852591535, "grad_norm": 0.4533802568912506, "learning_rate": 9.979040965385838e-05, "loss": 0.6974, "step": 38 }, { "epoch": 0.07417974322396577, "grad_norm": 0.5213814973831177, "learning_rate": 9.978405843124802e-05, "loss": 0.7896, "step": 39 }, { "epoch": 0.07608178792201617, "grad_norm": 0.3241259753704071, "learning_rate": 9.977770720863767e-05, "loss": 0.5895, "step": 40 }, { "epoch": 0.07798383262006657, "grad_norm": 0.34446167945861816, "learning_rate": 9.977135598602731e-05, "loss": 0.6222, "step": 41 }, { "epoch": 0.07988587731811697, "grad_norm": 0.49035167694091797, "learning_rate": 9.976500476341696e-05, "loss": 0.6978, "step": 42 }, { "epoch": 0.08178792201616739, "grad_norm": 0.4795296788215637, "learning_rate": 9.975865354080661e-05, "loss": 0.7368, "step": 43 }, { "epoch": 0.08368996671421779, "grad_norm": 0.44959381222724915, "learning_rate": 9.975230231819625e-05, "loss": 0.57, "step": 44 }, { "epoch": 0.08559201141226819, "grad_norm": 0.4577605426311493, "learning_rate": 9.974595109558592e-05, "loss": 0.691, "step": 45 }, { "epoch": 0.08749405611031859, "grad_norm": 0.41654840111732483, "learning_rate": 9.973959987297555e-05, "loss": 0.6346, "step": 46 }, { "epoch": 0.089396100808369, "grad_norm": 0.6599829196929932, "learning_rate": 9.973324865036519e-05, "loss": 0.6358, "step": 47 }, { "epoch": 0.0912981455064194, "grad_norm": 0.38539162278175354, "learning_rate": 9.972689742775484e-05, "loss": 0.5723, "step": 48 }, { "epoch": 0.0932001902044698, "grad_norm": 0.4626316428184509, "learning_rate": 9.97205462051445e-05, "loss": 0.6845, "step": 49 }, { "epoch": 0.0951022349025202, "grad_norm": 0.348387211561203, "learning_rate": 9.971419498253413e-05, "loss": 0.4857, "step": 50 }, { "epoch": 0.09700427960057062, "grad_norm": 0.4964020252227783, "learning_rate": 9.970784375992379e-05, "loss": 0.7141, "step": 51 }, { "epoch": 0.09890632429862102, "grad_norm": 0.4282241463661194, "learning_rate": 9.970149253731344e-05, "loss": 0.6619, "step": 52 }, { "epoch": 0.10080836899667142, "grad_norm": 0.35991716384887695, "learning_rate": 9.969514131470309e-05, "loss": 0.4727, "step": 53 }, { "epoch": 0.10271041369472182, "grad_norm": 0.3936012387275696, "learning_rate": 9.968879009209273e-05, "loss": 0.5644, "step": 54 }, { "epoch": 0.10461245839277224, "grad_norm": 0.39267924427986145, "learning_rate": 9.968243886948238e-05, "loss": 0.5126, "step": 55 }, { "epoch": 0.10651450309082264, "grad_norm": 0.4119136333465576, "learning_rate": 9.967608764687203e-05, "loss": 0.471, "step": 56 }, { "epoch": 0.10841654778887304, "grad_norm": 0.5160384178161621, "learning_rate": 9.966973642426167e-05, "loss": 0.6555, "step": 57 }, { "epoch": 0.11031859248692344, "grad_norm": 0.4742174744606018, "learning_rate": 9.966338520165132e-05, "loss": 0.6093, "step": 58 }, { "epoch": 0.11222063718497385, "grad_norm": 0.3615169823169708, "learning_rate": 9.965703397904097e-05, "loss": 0.5527, "step": 59 }, { "epoch": 0.11412268188302425, "grad_norm": 0.5700575113296509, "learning_rate": 9.965068275643061e-05, "loss": 0.5713, "step": 60 }, { "epoch": 0.11602472658107466, "grad_norm": 0.4825727939605713, "learning_rate": 9.964433153382026e-05, "loss": 0.5142, "step": 61 }, { "epoch": 0.11792677127912506, "grad_norm": 0.392088919878006, "learning_rate": 9.963798031120992e-05, "loss": 0.513, "step": 62 }, { "epoch": 0.11982881597717546, "grad_norm": 0.35883110761642456, "learning_rate": 9.963162908859957e-05, "loss": 0.501, "step": 63 }, { "epoch": 0.12173086067522587, "grad_norm": 0.39946749806404114, "learning_rate": 9.96252778659892e-05, "loss": 0.5532, "step": 64 }, { "epoch": 0.12363290537327627, "grad_norm": 0.4191288352012634, "learning_rate": 9.961892664337886e-05, "loss": 0.5258, "step": 65 }, { "epoch": 0.12553495007132667, "grad_norm": 0.3662487268447876, "learning_rate": 9.961257542076851e-05, "loss": 0.5121, "step": 66 }, { "epoch": 0.1274369947693771, "grad_norm": 0.5582164525985718, "learning_rate": 9.960622419815815e-05, "loss": 0.6494, "step": 67 }, { "epoch": 0.12933903946742747, "grad_norm": 0.485128790140152, "learning_rate": 9.959987297554779e-05, "loss": 0.6022, "step": 68 }, { "epoch": 0.1312410841654779, "grad_norm": 0.3816944360733032, "learning_rate": 9.959352175293745e-05, "loss": 0.4851, "step": 69 }, { "epoch": 0.1331431288635283, "grad_norm": 0.3637336194515228, "learning_rate": 9.958717053032709e-05, "loss": 0.4344, "step": 70 }, { "epoch": 0.1350451735615787, "grad_norm": 0.4418705105781555, "learning_rate": 9.958081930771674e-05, "loss": 0.6008, "step": 71 }, { "epoch": 0.1369472182596291, "grad_norm": 0.44138631224632263, "learning_rate": 9.95744680851064e-05, "loss": 0.5319, "step": 72 }, { "epoch": 0.1388492629576795, "grad_norm": 0.37523001432418823, "learning_rate": 9.956811686249603e-05, "loss": 0.657, "step": 73 }, { "epoch": 0.1407513076557299, "grad_norm": 0.4489665627479553, "learning_rate": 9.956176563988568e-05, "loss": 0.5526, "step": 74 }, { "epoch": 0.14265335235378032, "grad_norm": 0.39318791031837463, "learning_rate": 9.955541441727532e-05, "loss": 0.6046, "step": 75 }, { "epoch": 0.1445553970518307, "grad_norm": 0.4817538261413574, "learning_rate": 9.954906319466499e-05, "loss": 0.5149, "step": 76 }, { "epoch": 0.14645744174988112, "grad_norm": 0.4451163411140442, "learning_rate": 9.954271197205463e-05, "loss": 0.4892, "step": 77 }, { "epoch": 0.14835948644793154, "grad_norm": 0.29836660623550415, "learning_rate": 9.953636074944426e-05, "loss": 0.4005, "step": 78 }, { "epoch": 0.15026153114598192, "grad_norm": 0.3185100555419922, "learning_rate": 9.953000952683393e-05, "loss": 0.4168, "step": 79 }, { "epoch": 0.15216357584403234, "grad_norm": 0.26550424098968506, "learning_rate": 9.952365830422357e-05, "loss": 0.39, "step": 80 }, { "epoch": 0.15406562054208273, "grad_norm": 0.4328240156173706, "learning_rate": 9.951730708161322e-05, "loss": 0.5041, "step": 81 }, { "epoch": 0.15596766524013314, "grad_norm": 0.5178936123847961, "learning_rate": 9.951095585900286e-05, "loss": 0.6017, "step": 82 }, { "epoch": 0.15786970993818356, "grad_norm": 0.45657551288604736, "learning_rate": 9.950460463639251e-05, "loss": 0.5734, "step": 83 }, { "epoch": 0.15977175463623394, "grad_norm": 0.5482913851737976, "learning_rate": 9.949825341378216e-05, "loss": 0.6015, "step": 84 }, { "epoch": 0.16167379933428436, "grad_norm": 0.39362308382987976, "learning_rate": 9.94919021911718e-05, "loss": 0.5712, "step": 85 }, { "epoch": 0.16357584403233477, "grad_norm": 0.4381113350391388, "learning_rate": 9.948555096856145e-05, "loss": 0.5194, "step": 86 }, { "epoch": 0.16547788873038516, "grad_norm": 0.5021312236785889, "learning_rate": 9.94791997459511e-05, "loss": 0.5279, "step": 87 }, { "epoch": 0.16737993342843557, "grad_norm": 0.4364267587661743, "learning_rate": 9.947284852334074e-05, "loss": 0.5892, "step": 88 }, { "epoch": 0.16928197812648596, "grad_norm": 0.37873050570487976, "learning_rate": 9.94664973007304e-05, "loss": 0.5328, "step": 89 }, { "epoch": 0.17118402282453637, "grad_norm": 0.4768919050693512, "learning_rate": 9.946014607812005e-05, "loss": 0.4889, "step": 90 }, { "epoch": 0.1730860675225868, "grad_norm": 0.3834541440010071, "learning_rate": 9.945379485550968e-05, "loss": 0.4642, "step": 91 }, { "epoch": 0.17498811222063718, "grad_norm": 0.48581764101982117, "learning_rate": 9.944744363289934e-05, "loss": 0.4741, "step": 92 }, { "epoch": 0.1768901569186876, "grad_norm": 0.39364808797836304, "learning_rate": 9.944109241028899e-05, "loss": 0.5684, "step": 93 }, { "epoch": 0.178792201616738, "grad_norm": 0.4657204747200012, "learning_rate": 9.943474118767864e-05, "loss": 0.609, "step": 94 }, { "epoch": 0.1806942463147884, "grad_norm": 0.40989887714385986, "learning_rate": 9.942838996506828e-05, "loss": 0.4319, "step": 95 }, { "epoch": 0.1825962910128388, "grad_norm": 0.43797624111175537, "learning_rate": 9.942203874245793e-05, "loss": 0.4997, "step": 96 }, { "epoch": 0.1844983357108892, "grad_norm": 0.3887675106525421, "learning_rate": 9.941568751984758e-05, "loss": 0.5548, "step": 97 }, { "epoch": 0.1864003804089396, "grad_norm": 0.39017003774642944, "learning_rate": 9.940933629723722e-05, "loss": 0.5113, "step": 98 }, { "epoch": 0.18830242510699002, "grad_norm": 0.41409194469451904, "learning_rate": 9.940298507462687e-05, "loss": 0.5496, "step": 99 }, { "epoch": 0.1902044698050404, "grad_norm": 0.34578803181648254, "learning_rate": 9.939663385201652e-05, "loss": 0.4048, "step": 100 }, { "epoch": 0.19210651450309082, "grad_norm": 0.32233092188835144, "learning_rate": 9.939028262940616e-05, "loss": 0.4442, "step": 101 }, { "epoch": 0.19400855920114124, "grad_norm": 0.45841965079307556, "learning_rate": 9.938393140679581e-05, "loss": 0.5646, "step": 102 }, { "epoch": 0.19591060389919163, "grad_norm": 0.3825596272945404, "learning_rate": 9.937758018418547e-05, "loss": 0.4583, "step": 103 }, { "epoch": 0.19781264859724204, "grad_norm": 0.44690102338790894, "learning_rate": 9.93712289615751e-05, "loss": 0.5799, "step": 104 }, { "epoch": 0.19971469329529243, "grad_norm": 0.4881773591041565, "learning_rate": 9.936487773896476e-05, "loss": 0.4094, "step": 105 }, { "epoch": 0.20161673799334284, "grad_norm": 0.4745669960975647, "learning_rate": 9.93585265163544e-05, "loss": 0.6068, "step": 106 }, { "epoch": 0.20351878269139326, "grad_norm": 0.5497081279754639, "learning_rate": 9.935217529374406e-05, "loss": 0.4654, "step": 107 }, { "epoch": 0.20542082738944364, "grad_norm": 0.3564707636833191, "learning_rate": 9.93458240711337e-05, "loss": 0.5678, "step": 108 }, { "epoch": 0.20732287208749406, "grad_norm": 0.446321964263916, "learning_rate": 9.933947284852334e-05, "loss": 0.4503, "step": 109 }, { "epoch": 0.20922491678554447, "grad_norm": 0.4253140389919281, "learning_rate": 9.9333121625913e-05, "loss": 0.538, "step": 110 }, { "epoch": 0.21112696148359486, "grad_norm": 0.4123047888278961, "learning_rate": 9.932677040330264e-05, "loss": 0.4359, "step": 111 }, { "epoch": 0.21302900618164528, "grad_norm": 0.3887772262096405, "learning_rate": 9.932041918069229e-05, "loss": 0.5534, "step": 112 }, { "epoch": 0.21493105087969566, "grad_norm": 0.38153669238090515, "learning_rate": 9.931406795808193e-05, "loss": 0.4296, "step": 113 }, { "epoch": 0.21683309557774608, "grad_norm": 0.43017521500587463, "learning_rate": 9.930771673547158e-05, "loss": 0.5899, "step": 114 }, { "epoch": 0.2187351402757965, "grad_norm": 0.40156394243240356, "learning_rate": 9.930136551286123e-05, "loss": 0.3917, "step": 115 }, { "epoch": 0.22063718497384688, "grad_norm": 0.3576590120792389, "learning_rate": 9.929501429025087e-05, "loss": 0.3908, "step": 116 }, { "epoch": 0.2225392296718973, "grad_norm": 0.33245769143104553, "learning_rate": 9.928866306764054e-05, "loss": 0.4043, "step": 117 }, { "epoch": 0.2244412743699477, "grad_norm": 0.43169739842414856, "learning_rate": 9.928231184503018e-05, "loss": 0.5569, "step": 118 }, { "epoch": 0.2263433190679981, "grad_norm": 0.4004412293434143, "learning_rate": 9.927596062241981e-05, "loss": 0.4931, "step": 119 }, { "epoch": 0.2282453637660485, "grad_norm": 0.3550797998905182, "learning_rate": 9.926960939980947e-05, "loss": 0.4505, "step": 120 }, { "epoch": 0.2301474084640989, "grad_norm": 0.3701287508010864, "learning_rate": 9.926325817719912e-05, "loss": 0.4967, "step": 121 }, { "epoch": 0.2320494531621493, "grad_norm": 0.4120308756828308, "learning_rate": 9.925690695458876e-05, "loss": 0.4408, "step": 122 }, { "epoch": 0.23395149786019973, "grad_norm": 0.4737403392791748, "learning_rate": 9.925055573197841e-05, "loss": 0.7221, "step": 123 }, { "epoch": 0.2358535425582501, "grad_norm": 0.37103158235549927, "learning_rate": 9.924420450936806e-05, "loss": 0.4419, "step": 124 }, { "epoch": 0.23775558725630053, "grad_norm": 0.48644623160362244, "learning_rate": 9.923785328675771e-05, "loss": 0.5006, "step": 125 }, { "epoch": 0.2396576319543509, "grad_norm": 0.3381918966770172, "learning_rate": 9.923150206414735e-05, "loss": 0.4786, "step": 126 }, { "epoch": 0.24155967665240133, "grad_norm": 0.4500490128993988, "learning_rate": 9.9225150841537e-05, "loss": 0.4984, "step": 127 }, { "epoch": 0.24346172135045174, "grad_norm": 0.5506143569946289, "learning_rate": 9.921879961892665e-05, "loss": 0.4857, "step": 128 }, { "epoch": 0.24536376604850213, "grad_norm": 0.4111080467700958, "learning_rate": 9.921244839631629e-05, "loss": 0.4464, "step": 129 }, { "epoch": 0.24726581074655254, "grad_norm": 0.52936851978302, "learning_rate": 9.920609717370594e-05, "loss": 0.5664, "step": 130 }, { "epoch": 0.24916785544460296, "grad_norm": 0.465009480714798, "learning_rate": 9.91997459510956e-05, "loss": 0.4318, "step": 131 }, { "epoch": 0.25106990014265335, "grad_norm": 0.3044665455818176, "learning_rate": 9.919339472848523e-05, "loss": 0.4284, "step": 132 }, { "epoch": 0.25297194484070373, "grad_norm": 0.4849638342857361, "learning_rate": 9.918704350587488e-05, "loss": 0.5956, "step": 133 }, { "epoch": 0.2548739895387542, "grad_norm": 0.4701893925666809, "learning_rate": 9.918069228326454e-05, "loss": 0.4541, "step": 134 }, { "epoch": 0.25677603423680456, "grad_norm": 0.42524924874305725, "learning_rate": 9.917434106065419e-05, "loss": 0.4991, "step": 135 }, { "epoch": 0.25867807893485495, "grad_norm": 0.46284592151641846, "learning_rate": 9.916798983804383e-05, "loss": 0.453, "step": 136 }, { "epoch": 0.2605801236329054, "grad_norm": 0.40281572937965393, "learning_rate": 9.916163861543348e-05, "loss": 0.4771, "step": 137 }, { "epoch": 0.2624821683309558, "grad_norm": 0.425214558839798, "learning_rate": 9.915528739282313e-05, "loss": 0.4665, "step": 138 }, { "epoch": 0.26438421302900617, "grad_norm": 0.4181045889854431, "learning_rate": 9.914893617021277e-05, "loss": 0.5014, "step": 139 }, { "epoch": 0.2662862577270566, "grad_norm": 0.4024779498577118, "learning_rate": 9.914258494760241e-05, "loss": 0.5905, "step": 140 }, { "epoch": 0.268188302425107, "grad_norm": 0.3768770694732666, "learning_rate": 9.913623372499207e-05, "loss": 0.408, "step": 141 }, { "epoch": 0.2700903471231574, "grad_norm": 0.4033905267715454, "learning_rate": 9.912988250238171e-05, "loss": 0.4511, "step": 142 }, { "epoch": 0.2719923918212078, "grad_norm": 0.32505708932876587, "learning_rate": 9.912353127977136e-05, "loss": 0.4395, "step": 143 }, { "epoch": 0.2738944365192582, "grad_norm": 0.3487790822982788, "learning_rate": 9.9117180057161e-05, "loss": 0.3601, "step": 144 }, { "epoch": 0.2757964812173086, "grad_norm": 0.30558326840400696, "learning_rate": 9.911082883455065e-05, "loss": 0.4607, "step": 145 }, { "epoch": 0.277698525915359, "grad_norm": 0.3752080500125885, "learning_rate": 9.91044776119403e-05, "loss": 0.3957, "step": 146 }, { "epoch": 0.2796005706134094, "grad_norm": 0.3506644368171692, "learning_rate": 9.909812638932994e-05, "loss": 0.366, "step": 147 }, { "epoch": 0.2815026153114598, "grad_norm": 0.43430307507514954, "learning_rate": 9.909177516671961e-05, "loss": 0.4542, "step": 148 }, { "epoch": 0.2834046600095102, "grad_norm": 0.41930171847343445, "learning_rate": 9.908542394410925e-05, "loss": 0.709, "step": 149 }, { "epoch": 0.28530670470756064, "grad_norm": 0.3717108964920044, "learning_rate": 9.907907272149888e-05, "loss": 0.4701, "step": 150 }, { "epoch": 0.28720874940561103, "grad_norm": 0.4177984595298767, "learning_rate": 9.907272149888854e-05, "loss": 0.6189, "step": 151 }, { "epoch": 0.2891107941036614, "grad_norm": 0.37706881761550903, "learning_rate": 9.906637027627819e-05, "loss": 0.4546, "step": 152 }, { "epoch": 0.29101283880171186, "grad_norm": 0.4210599660873413, "learning_rate": 9.906001905366784e-05, "loss": 0.4716, "step": 153 }, { "epoch": 0.29291488349976225, "grad_norm": 0.3707990050315857, "learning_rate": 9.905366783105748e-05, "loss": 0.4644, "step": 154 }, { "epoch": 0.29481692819781263, "grad_norm": 0.36913537979125977, "learning_rate": 9.904731660844713e-05, "loss": 0.4605, "step": 155 }, { "epoch": 0.2967189728958631, "grad_norm": 0.41291072964668274, "learning_rate": 9.904096538583678e-05, "loss": 0.4294, "step": 156 }, { "epoch": 0.29862101759391346, "grad_norm": 0.30809640884399414, "learning_rate": 9.903461416322642e-05, "loss": 0.4369, "step": 157 }, { "epoch": 0.30052306229196385, "grad_norm": 0.4266267716884613, "learning_rate": 9.902826294061607e-05, "loss": 0.456, "step": 158 }, { "epoch": 0.3024251069900143, "grad_norm": 0.37408629059791565, "learning_rate": 9.902191171800572e-05, "loss": 0.4359, "step": 159 }, { "epoch": 0.3043271516880647, "grad_norm": 0.40199100971221924, "learning_rate": 9.901556049539536e-05, "loss": 0.4433, "step": 160 }, { "epoch": 0.30622919638611507, "grad_norm": 0.3430602252483368, "learning_rate": 9.900920927278501e-05, "loss": 0.4317, "step": 161 }, { "epoch": 0.30813124108416545, "grad_norm": 0.5091786980628967, "learning_rate": 9.900285805017467e-05, "loss": 0.5824, "step": 162 }, { "epoch": 0.3100332857822159, "grad_norm": 0.34287527203559875, "learning_rate": 9.89965068275643e-05, "loss": 0.4025, "step": 163 }, { "epoch": 0.3119353304802663, "grad_norm": 0.4919246733188629, "learning_rate": 9.899015560495396e-05, "loss": 0.5612, "step": 164 }, { "epoch": 0.31383737517831667, "grad_norm": 0.35404297709465027, "learning_rate": 9.898380438234361e-05, "loss": 0.4731, "step": 165 }, { "epoch": 0.3157394198763671, "grad_norm": 0.3590085506439209, "learning_rate": 9.897745315973326e-05, "loss": 0.4365, "step": 166 }, { "epoch": 0.3176414645744175, "grad_norm": 0.4132196605205536, "learning_rate": 9.89711019371229e-05, "loss": 0.3485, "step": 167 }, { "epoch": 0.3195435092724679, "grad_norm": 0.46459728479385376, "learning_rate": 9.896475071451255e-05, "loss": 0.4327, "step": 168 }, { "epoch": 0.3214455539705183, "grad_norm": 0.435651957988739, "learning_rate": 9.89583994919022e-05, "loss": 0.4684, "step": 169 }, { "epoch": 0.3233475986685687, "grad_norm": 0.38278958201408386, "learning_rate": 9.895204826929184e-05, "loss": 0.4265, "step": 170 }, { "epoch": 0.3252496433666191, "grad_norm": 0.31499558687210083, "learning_rate": 9.894569704668149e-05, "loss": 0.4099, "step": 171 }, { "epoch": 0.32715168806466954, "grad_norm": 0.40141284465789795, "learning_rate": 9.893934582407114e-05, "loss": 0.4461, "step": 172 }, { "epoch": 0.32905373276271993, "grad_norm": 0.42945384979248047, "learning_rate": 9.893299460146078e-05, "loss": 0.4379, "step": 173 }, { "epoch": 0.3309557774607703, "grad_norm": 0.5186269283294678, "learning_rate": 9.892664337885043e-05, "loss": 0.5134, "step": 174 }, { "epoch": 0.33285782215882076, "grad_norm": 0.3771612048149109, "learning_rate": 9.892029215624009e-05, "loss": 0.4617, "step": 175 }, { "epoch": 0.33475986685687115, "grad_norm": 0.48396849632263184, "learning_rate": 9.891394093362972e-05, "loss": 0.4944, "step": 176 }, { "epoch": 0.33666191155492153, "grad_norm": 0.5303121209144592, "learning_rate": 9.890758971101938e-05, "loss": 0.4049, "step": 177 }, { "epoch": 0.3385639562529719, "grad_norm": 0.33063024282455444, "learning_rate": 9.890123848840901e-05, "loss": 0.401, "step": 178 }, { "epoch": 0.34046600095102236, "grad_norm": 0.3764759302139282, "learning_rate": 9.889488726579868e-05, "loss": 0.4222, "step": 179 }, { "epoch": 0.34236804564907275, "grad_norm": 0.27206951379776, "learning_rate": 9.888853604318832e-05, "loss": 0.3206, "step": 180 }, { "epoch": 0.34427009034712314, "grad_norm": 0.3893122971057892, "learning_rate": 9.888218482057796e-05, "loss": 0.3558, "step": 181 }, { "epoch": 0.3461721350451736, "grad_norm": 0.42340540885925293, "learning_rate": 9.887583359796762e-05, "loss": 0.3948, "step": 182 }, { "epoch": 0.34807417974322397, "grad_norm": 0.4103796184062958, "learning_rate": 9.886948237535726e-05, "loss": 0.4769, "step": 183 }, { "epoch": 0.34997622444127435, "grad_norm": 0.39225244522094727, "learning_rate": 9.886313115274691e-05, "loss": 0.441, "step": 184 }, { "epoch": 0.3518782691393248, "grad_norm": 0.3774043023586273, "learning_rate": 9.885677993013655e-05, "loss": 0.3018, "step": 185 }, { "epoch": 0.3537803138373752, "grad_norm": 0.4012366235256195, "learning_rate": 9.88504287075262e-05, "loss": 0.4217, "step": 186 }, { "epoch": 0.35568235853542557, "grad_norm": 0.37299972772598267, "learning_rate": 9.884407748491585e-05, "loss": 0.4518, "step": 187 }, { "epoch": 0.357584403233476, "grad_norm": 0.34713125228881836, "learning_rate": 9.883772626230549e-05, "loss": 0.3882, "step": 188 }, { "epoch": 0.3594864479315264, "grad_norm": 0.4148958623409271, "learning_rate": 9.883137503969516e-05, "loss": 0.4979, "step": 189 }, { "epoch": 0.3613884926295768, "grad_norm": 0.3979155421257019, "learning_rate": 9.88250238170848e-05, "loss": 0.3854, "step": 190 }, { "epoch": 0.36329053732762717, "grad_norm": 0.42723751068115234, "learning_rate": 9.881867259447443e-05, "loss": 0.4325, "step": 191 }, { "epoch": 0.3651925820256776, "grad_norm": 0.4195951521396637, "learning_rate": 9.881232137186409e-05, "loss": 0.3917, "step": 192 }, { "epoch": 0.367094626723728, "grad_norm": 0.43937554955482483, "learning_rate": 9.880597014925374e-05, "loss": 0.3907, "step": 193 }, { "epoch": 0.3689966714217784, "grad_norm": 0.3176072835922241, "learning_rate": 9.879961892664338e-05, "loss": 0.3581, "step": 194 }, { "epoch": 0.37089871611982883, "grad_norm": 0.39909854531288147, "learning_rate": 9.879326770403303e-05, "loss": 0.5881, "step": 195 }, { "epoch": 0.3728007608178792, "grad_norm": 0.35058659315109253, "learning_rate": 9.878691648142268e-05, "loss": 0.4753, "step": 196 }, { "epoch": 0.3747028055159296, "grad_norm": 0.3353765904903412, "learning_rate": 9.878056525881233e-05, "loss": 0.4014, "step": 197 }, { "epoch": 0.37660485021398005, "grad_norm": 0.4102007746696472, "learning_rate": 9.877421403620197e-05, "loss": 0.4841, "step": 198 }, { "epoch": 0.37850689491203043, "grad_norm": 0.45450812578201294, "learning_rate": 9.876786281359162e-05, "loss": 0.4655, "step": 199 }, { "epoch": 0.3804089396100808, "grad_norm": 0.32525572180747986, "learning_rate": 9.876151159098127e-05, "loss": 0.3869, "step": 200 }, { "epoch": 0.38231098430813126, "grad_norm": 0.4488207697868347, "learning_rate": 9.875516036837091e-05, "loss": 0.4743, "step": 201 }, { "epoch": 0.38421302900618165, "grad_norm": 0.432962030172348, "learning_rate": 9.874880914576056e-05, "loss": 0.4171, "step": 202 }, { "epoch": 0.38611507370423204, "grad_norm": 0.4264095723628998, "learning_rate": 9.874245792315022e-05, "loss": 0.4344, "step": 203 }, { "epoch": 0.3880171184022825, "grad_norm": 0.43752139806747437, "learning_rate": 9.873610670053985e-05, "loss": 0.5248, "step": 204 }, { "epoch": 0.38991916310033287, "grad_norm": 0.42547503113746643, "learning_rate": 9.87297554779295e-05, "loss": 0.4011, "step": 205 }, { "epoch": 0.39182120779838325, "grad_norm": 0.34600159525871277, "learning_rate": 9.872340425531916e-05, "loss": 0.3444, "step": 206 }, { "epoch": 0.39372325249643364, "grad_norm": 0.3614776134490967, "learning_rate": 9.871705303270881e-05, "loss": 0.4784, "step": 207 }, { "epoch": 0.3956252971944841, "grad_norm": 0.47591882944107056, "learning_rate": 9.871070181009845e-05, "loss": 0.5159, "step": 208 }, { "epoch": 0.39752734189253447, "grad_norm": 0.3321515917778015, "learning_rate": 9.870435058748809e-05, "loss": 0.4382, "step": 209 }, { "epoch": 0.39942938659058486, "grad_norm": 0.45849499106407166, "learning_rate": 9.869799936487775e-05, "loss": 0.4269, "step": 210 }, { "epoch": 0.4013314312886353, "grad_norm": 0.3666900098323822, "learning_rate": 9.869164814226739e-05, "loss": 0.4077, "step": 211 }, { "epoch": 0.4032334759866857, "grad_norm": 0.3387741446495056, "learning_rate": 9.868529691965703e-05, "loss": 0.4485, "step": 212 }, { "epoch": 0.4051355206847361, "grad_norm": 0.3360239267349243, "learning_rate": 9.86789456970467e-05, "loss": 0.4042, "step": 213 }, { "epoch": 0.4070375653827865, "grad_norm": 0.40923500061035156, "learning_rate": 9.867259447443633e-05, "loss": 0.5001, "step": 214 }, { "epoch": 0.4089396100808369, "grad_norm": 0.3974573314189911, "learning_rate": 9.866624325182598e-05, "loss": 0.4984, "step": 215 }, { "epoch": 0.4108416547788873, "grad_norm": 0.4095960557460785, "learning_rate": 9.865989202921562e-05, "loss": 0.3837, "step": 216 }, { "epoch": 0.41274369947693773, "grad_norm": 0.3334168493747711, "learning_rate": 9.865354080660527e-05, "loss": 0.3935, "step": 217 }, { "epoch": 0.4146457441749881, "grad_norm": 0.5007266998291016, "learning_rate": 9.864718958399493e-05, "loss": 0.4443, "step": 218 }, { "epoch": 0.4165477888730385, "grad_norm": 0.35881495475769043, "learning_rate": 9.864083836138456e-05, "loss": 0.3835, "step": 219 }, { "epoch": 0.41844983357108895, "grad_norm": 0.3785092830657959, "learning_rate": 9.863448713877423e-05, "loss": 0.3884, "step": 220 }, { "epoch": 0.42035187826913933, "grad_norm": 0.41435107588768005, "learning_rate": 9.862813591616387e-05, "loss": 0.4116, "step": 221 }, { "epoch": 0.4222539229671897, "grad_norm": 0.41338756680488586, "learning_rate": 9.86217846935535e-05, "loss": 0.5235, "step": 222 }, { "epoch": 0.4241559676652401, "grad_norm": 0.4335710406303406, "learning_rate": 9.861543347094316e-05, "loss": 0.516, "step": 223 }, { "epoch": 0.42605801236329055, "grad_norm": 0.37374967336654663, "learning_rate": 9.860908224833281e-05, "loss": 0.4663, "step": 224 }, { "epoch": 0.42796005706134094, "grad_norm": 0.3213825821876526, "learning_rate": 9.860273102572246e-05, "loss": 0.3636, "step": 225 }, { "epoch": 0.4298621017593913, "grad_norm": 0.41535523533821106, "learning_rate": 9.85963798031121e-05, "loss": 0.3677, "step": 226 }, { "epoch": 0.43176414645744177, "grad_norm": 0.3543884754180908, "learning_rate": 9.859002858050175e-05, "loss": 0.376, "step": 227 }, { "epoch": 0.43366619115549215, "grad_norm": 0.4012312889099121, "learning_rate": 9.85836773578914e-05, "loss": 0.4886, "step": 228 }, { "epoch": 0.43556823585354254, "grad_norm": 0.3928169310092926, "learning_rate": 9.857732613528104e-05, "loss": 0.3741, "step": 229 }, { "epoch": 0.437470280551593, "grad_norm": 0.4982980191707611, "learning_rate": 9.85709749126707e-05, "loss": 0.5704, "step": 230 }, { "epoch": 0.43937232524964337, "grad_norm": 0.356545090675354, "learning_rate": 9.856462369006035e-05, "loss": 0.3618, "step": 231 }, { "epoch": 0.44127436994769376, "grad_norm": 0.5087487697601318, "learning_rate": 9.855827246744998e-05, "loss": 0.4733, "step": 232 }, { "epoch": 0.4431764146457442, "grad_norm": 0.3566097021102905, "learning_rate": 9.855192124483964e-05, "loss": 0.3771, "step": 233 }, { "epoch": 0.4450784593437946, "grad_norm": 0.3210541605949402, "learning_rate": 9.854557002222929e-05, "loss": 0.4341, "step": 234 }, { "epoch": 0.446980504041845, "grad_norm": 0.25422924757003784, "learning_rate": 9.853921879961893e-05, "loss": 0.3987, "step": 235 }, { "epoch": 0.4488825487398954, "grad_norm": 0.39164894819259644, "learning_rate": 9.853286757700858e-05, "loss": 0.4149, "step": 236 }, { "epoch": 0.4507845934379458, "grad_norm": 0.37471455335617065, "learning_rate": 9.852651635439823e-05, "loss": 0.4471, "step": 237 }, { "epoch": 0.4526866381359962, "grad_norm": 0.37678262591362, "learning_rate": 9.852016513178788e-05, "loss": 0.3943, "step": 238 }, { "epoch": 0.4545886828340466, "grad_norm": 0.4653976857662201, "learning_rate": 9.851381390917752e-05, "loss": 0.4848, "step": 239 }, { "epoch": 0.456490727532097, "grad_norm": 0.46764564514160156, "learning_rate": 9.850746268656717e-05, "loss": 0.4624, "step": 240 }, { "epoch": 0.4583927722301474, "grad_norm": 0.3803463876247406, "learning_rate": 9.850111146395682e-05, "loss": 0.442, "step": 241 }, { "epoch": 0.4602948169281978, "grad_norm": 0.33662229776382446, "learning_rate": 9.849476024134646e-05, "loss": 0.4564, "step": 242 }, { "epoch": 0.46219686162624823, "grad_norm": 0.42181041836738586, "learning_rate": 9.848840901873611e-05, "loss": 0.4702, "step": 243 }, { "epoch": 0.4640989063242986, "grad_norm": 0.40373390913009644, "learning_rate": 9.848205779612576e-05, "loss": 0.3745, "step": 244 }, { "epoch": 0.466000951022349, "grad_norm": 0.36634379625320435, "learning_rate": 9.84757065735154e-05, "loss": 0.428, "step": 245 }, { "epoch": 0.46790299572039945, "grad_norm": 0.35369235277175903, "learning_rate": 9.846935535090506e-05, "loss": 0.3986, "step": 246 }, { "epoch": 0.46980504041844984, "grad_norm": 0.4154004454612732, "learning_rate": 9.846300412829471e-05, "loss": 0.3512, "step": 247 }, { "epoch": 0.4717070851165002, "grad_norm": 0.3689868450164795, "learning_rate": 9.845665290568435e-05, "loss": 0.3708, "step": 248 }, { "epoch": 0.47360912981455067, "grad_norm": 0.38414841890335083, "learning_rate": 9.8450301683074e-05, "loss": 0.3401, "step": 249 }, { "epoch": 0.47551117451260105, "grad_norm": 0.39936143159866333, "learning_rate": 9.844395046046364e-05, "loss": 0.4328, "step": 250 }, { "epoch": 0.47741321921065144, "grad_norm": 0.30578187108039856, "learning_rate": 9.84375992378533e-05, "loss": 0.3694, "step": 251 }, { "epoch": 0.4793152639087018, "grad_norm": 0.39497658610343933, "learning_rate": 9.843124801524294e-05, "loss": 0.3945, "step": 252 }, { "epoch": 0.48121730860675227, "grad_norm": 0.44466689229011536, "learning_rate": 9.842489679263258e-05, "loss": 0.4485, "step": 253 }, { "epoch": 0.48311935330480266, "grad_norm": 0.3614617586135864, "learning_rate": 9.841854557002223e-05, "loss": 0.3701, "step": 254 }, { "epoch": 0.48502139800285304, "grad_norm": 0.3102608621120453, "learning_rate": 9.841219434741188e-05, "loss": 0.3677, "step": 255 }, { "epoch": 0.4869234427009035, "grad_norm": 0.36049678921699524, "learning_rate": 9.840584312480153e-05, "loss": 0.411, "step": 256 }, { "epoch": 0.4888254873989539, "grad_norm": 0.4025668501853943, "learning_rate": 9.839949190219117e-05, "loss": 0.433, "step": 257 }, { "epoch": 0.49072753209700426, "grad_norm": 0.4131562113761902, "learning_rate": 9.839314067958082e-05, "loss": 0.4818, "step": 258 }, { "epoch": 0.4926295767950547, "grad_norm": 0.481468141078949, "learning_rate": 9.838678945697047e-05, "loss": 0.5226, "step": 259 }, { "epoch": 0.4945316214931051, "grad_norm": 0.2845190167427063, "learning_rate": 9.838043823436011e-05, "loss": 0.3323, "step": 260 }, { "epoch": 0.4964336661911555, "grad_norm": 0.40381497144699097, "learning_rate": 9.837408701174976e-05, "loss": 0.4025, "step": 261 }, { "epoch": 0.4983357108892059, "grad_norm": 0.4109043478965759, "learning_rate": 9.836773578913942e-05, "loss": 0.4429, "step": 262 }, { "epoch": 0.5002377555872562, "grad_norm": 0.4256783425807953, "learning_rate": 9.836138456652906e-05, "loss": 0.3994, "step": 263 }, { "epoch": 0.5021398002853067, "grad_norm": 0.35044407844543457, "learning_rate": 9.835503334391871e-05, "loss": 0.4431, "step": 264 }, { "epoch": 0.5040418449833571, "grad_norm": 0.4456939697265625, "learning_rate": 9.834868212130836e-05, "loss": 0.5424, "step": 265 }, { "epoch": 0.5059438896814075, "grad_norm": 0.36340197920799255, "learning_rate": 9.8342330898698e-05, "loss": 0.4199, "step": 266 }, { "epoch": 0.5078459343794579, "grad_norm": 0.4018803536891937, "learning_rate": 9.833597967608765e-05, "loss": 0.4132, "step": 267 }, { "epoch": 0.5097479790775084, "grad_norm": 0.3372616469860077, "learning_rate": 9.83296284534773e-05, "loss": 0.3239, "step": 268 }, { "epoch": 0.5116500237755587, "grad_norm": 0.4497722387313843, "learning_rate": 9.832327723086695e-05, "loss": 0.4019, "step": 269 }, { "epoch": 0.5135520684736091, "grad_norm": 0.422269344329834, "learning_rate": 9.831692600825659e-05, "loss": 0.45, "step": 270 }, { "epoch": 0.5154541131716596, "grad_norm": 0.4167305529117584, "learning_rate": 9.831057478564624e-05, "loss": 0.4172, "step": 271 }, { "epoch": 0.5173561578697099, "grad_norm": 0.4340919554233551, "learning_rate": 9.83042235630359e-05, "loss": 0.5042, "step": 272 }, { "epoch": 0.5192582025677603, "grad_norm": 0.4179072380065918, "learning_rate": 9.829787234042553e-05, "loss": 0.3499, "step": 273 }, { "epoch": 0.5211602472658108, "grad_norm": 0.39216554164886475, "learning_rate": 9.829152111781518e-05, "loss": 0.4729, "step": 274 }, { "epoch": 0.5230622919638611, "grad_norm": 0.4485825002193451, "learning_rate": 9.828516989520484e-05, "loss": 0.4449, "step": 275 }, { "epoch": 0.5249643366619116, "grad_norm": 0.3843270242214203, "learning_rate": 9.827881867259447e-05, "loss": 0.5416, "step": 276 }, { "epoch": 0.526866381359962, "grad_norm": 0.30829140543937683, "learning_rate": 9.827246744998413e-05, "loss": 0.4004, "step": 277 }, { "epoch": 0.5287684260580123, "grad_norm": 0.2905525863170624, "learning_rate": 9.826611622737378e-05, "loss": 0.3574, "step": 278 }, { "epoch": 0.5306704707560628, "grad_norm": 0.3848637342453003, "learning_rate": 9.825976500476343e-05, "loss": 0.4021, "step": 279 }, { "epoch": 0.5325725154541132, "grad_norm": 0.32691988348960876, "learning_rate": 9.825341378215307e-05, "loss": 0.4317, "step": 280 }, { "epoch": 0.5344745601521635, "grad_norm": 0.3506065011024475, "learning_rate": 9.824706255954271e-05, "loss": 0.329, "step": 281 }, { "epoch": 0.536376604850214, "grad_norm": 0.3102387487888336, "learning_rate": 9.824071133693237e-05, "loss": 0.3695, "step": 282 }, { "epoch": 0.5382786495482644, "grad_norm": 0.45750680565834045, "learning_rate": 9.823436011432201e-05, "loss": 0.4232, "step": 283 }, { "epoch": 0.5401806942463148, "grad_norm": 0.297134131193161, "learning_rate": 9.822800889171165e-05, "loss": 0.4137, "step": 284 }, { "epoch": 0.5420827389443652, "grad_norm": 0.3696708679199219, "learning_rate": 9.822165766910131e-05, "loss": 0.4598, "step": 285 }, { "epoch": 0.5439847836424156, "grad_norm": 0.31236112117767334, "learning_rate": 9.821530644649095e-05, "loss": 0.314, "step": 286 }, { "epoch": 0.545886828340466, "grad_norm": 0.3596087694168091, "learning_rate": 9.82089552238806e-05, "loss": 0.4164, "step": 287 }, { "epoch": 0.5477888730385164, "grad_norm": 0.33347079157829285, "learning_rate": 9.820260400127024e-05, "loss": 0.3915, "step": 288 }, { "epoch": 0.5496909177365669, "grad_norm": 0.37818920612335205, "learning_rate": 9.81962527786599e-05, "loss": 0.3994, "step": 289 }, { "epoch": 0.5515929624346172, "grad_norm": 0.3968106806278229, "learning_rate": 9.818990155604955e-05, "loss": 0.3611, "step": 290 }, { "epoch": 0.5534950071326676, "grad_norm": 0.34991270303726196, "learning_rate": 9.818355033343918e-05, "loss": 0.3703, "step": 291 }, { "epoch": 0.555397051830718, "grad_norm": 0.4046263098716736, "learning_rate": 9.817719911082885e-05, "loss": 0.3302, "step": 292 }, { "epoch": 0.5572990965287684, "grad_norm": 0.35804587602615356, "learning_rate": 9.817084788821849e-05, "loss": 0.373, "step": 293 }, { "epoch": 0.5592011412268189, "grad_norm": 0.3538301885128021, "learning_rate": 9.816449666560813e-05, "loss": 0.3482, "step": 294 }, { "epoch": 0.5611031859248692, "grad_norm": 0.36835455894470215, "learning_rate": 9.815814544299778e-05, "loss": 0.3393, "step": 295 }, { "epoch": 0.5630052306229196, "grad_norm": 0.48919835686683655, "learning_rate": 9.815179422038743e-05, "loss": 0.4213, "step": 296 }, { "epoch": 0.5649072753209701, "grad_norm": 0.3472330570220947, "learning_rate": 9.814544299777708e-05, "loss": 0.3996, "step": 297 }, { "epoch": 0.5668093200190204, "grad_norm": 0.428611159324646, "learning_rate": 9.813909177516672e-05, "loss": 0.4524, "step": 298 }, { "epoch": 0.5687113647170708, "grad_norm": 0.4176979959011078, "learning_rate": 9.813274055255637e-05, "loss": 0.3787, "step": 299 }, { "epoch": 0.5706134094151213, "grad_norm": 0.41548797488212585, "learning_rate": 9.812638932994602e-05, "loss": 0.4758, "step": 300 }, { "epoch": 0.5725154541131716, "grad_norm": 0.3926902413368225, "learning_rate": 9.812003810733566e-05, "loss": 0.434, "step": 301 }, { "epoch": 0.5744174988112221, "grad_norm": 0.392846018075943, "learning_rate": 9.811368688472531e-05, "loss": 0.3928, "step": 302 }, { "epoch": 0.5763195435092725, "grad_norm": 0.36347585916519165, "learning_rate": 9.810733566211497e-05, "loss": 0.4264, "step": 303 }, { "epoch": 0.5782215882073228, "grad_norm": 0.4314410090446472, "learning_rate": 9.81009844395046e-05, "loss": 0.4199, "step": 304 }, { "epoch": 0.5801236329053733, "grad_norm": 0.337494820356369, "learning_rate": 9.809463321689426e-05, "loss": 0.4181, "step": 305 }, { "epoch": 0.5820256776034237, "grad_norm": 0.27786335349082947, "learning_rate": 9.808828199428391e-05, "loss": 0.3, "step": 306 }, { "epoch": 0.583927722301474, "grad_norm": 0.37235599756240845, "learning_rate": 9.808193077167355e-05, "loss": 0.3927, "step": 307 }, { "epoch": 0.5858297669995245, "grad_norm": 0.37353670597076416, "learning_rate": 9.80755795490632e-05, "loss": 0.4146, "step": 308 }, { "epoch": 0.5877318116975749, "grad_norm": 0.3919946551322937, "learning_rate": 9.806922832645285e-05, "loss": 0.5055, "step": 309 }, { "epoch": 0.5896338563956253, "grad_norm": 0.45411062240600586, "learning_rate": 9.80628771038425e-05, "loss": 0.5347, "step": 310 }, { "epoch": 0.5915359010936757, "grad_norm": 0.4087005853652954, "learning_rate": 9.805652588123214e-05, "loss": 0.3732, "step": 311 }, { "epoch": 0.5934379457917262, "grad_norm": 0.313297837972641, "learning_rate": 9.805017465862178e-05, "loss": 0.3093, "step": 312 }, { "epoch": 0.5953399904897765, "grad_norm": 0.40149226784706116, "learning_rate": 9.804382343601144e-05, "loss": 0.4404, "step": 313 }, { "epoch": 0.5972420351878269, "grad_norm": 0.34245574474334717, "learning_rate": 9.803747221340108e-05, "loss": 0.4036, "step": 314 }, { "epoch": 0.5991440798858774, "grad_norm": 0.38059449195861816, "learning_rate": 9.803112099079073e-05, "loss": 0.3763, "step": 315 }, { "epoch": 0.6010461245839277, "grad_norm": 0.4539381265640259, "learning_rate": 9.802476976818039e-05, "loss": 0.4551, "step": 316 }, { "epoch": 0.6029481692819781, "grad_norm": 0.4077235460281372, "learning_rate": 9.801841854557002e-05, "loss": 0.4641, "step": 317 }, { "epoch": 0.6048502139800286, "grad_norm": 0.3426643908023834, "learning_rate": 9.801206732295968e-05, "loss": 0.3684, "step": 318 }, { "epoch": 0.6067522586780789, "grad_norm": 0.3042270839214325, "learning_rate": 9.800571610034931e-05, "loss": 0.373, "step": 319 }, { "epoch": 0.6086543033761294, "grad_norm": 0.4373973309993744, "learning_rate": 9.799936487773897e-05, "loss": 0.5442, "step": 320 }, { "epoch": 0.6105563480741797, "grad_norm": 0.385797917842865, "learning_rate": 9.799301365512862e-05, "loss": 0.4218, "step": 321 }, { "epoch": 0.6124583927722301, "grad_norm": 0.33210891485214233, "learning_rate": 9.798666243251826e-05, "loss": 0.3062, "step": 322 }, { "epoch": 0.6143604374702806, "grad_norm": 0.3997063636779785, "learning_rate": 9.798031120990792e-05, "loss": 0.4104, "step": 323 }, { "epoch": 0.6162624821683309, "grad_norm": 0.4837460219860077, "learning_rate": 9.797395998729756e-05, "loss": 0.5271, "step": 324 }, { "epoch": 0.6181645268663813, "grad_norm": 0.36420971155166626, "learning_rate": 9.79676087646872e-05, "loss": 0.4033, "step": 325 }, { "epoch": 0.6200665715644318, "grad_norm": 0.33610865473747253, "learning_rate": 9.796125754207685e-05, "loss": 0.3992, "step": 326 }, { "epoch": 0.6219686162624821, "grad_norm": 0.28999099135398865, "learning_rate": 9.79549063194665e-05, "loss": 0.3675, "step": 327 }, { "epoch": 0.6238706609605326, "grad_norm": 0.359401673078537, "learning_rate": 9.794855509685615e-05, "loss": 0.4363, "step": 328 }, { "epoch": 0.625772705658583, "grad_norm": 0.3948569595813751, "learning_rate": 9.794220387424579e-05, "loss": 0.3698, "step": 329 }, { "epoch": 0.6276747503566333, "grad_norm": 0.3753513991832733, "learning_rate": 9.793585265163544e-05, "loss": 0.4397, "step": 330 }, { "epoch": 0.6295767950546838, "grad_norm": 0.32612451910972595, "learning_rate": 9.79295014290251e-05, "loss": 0.3846, "step": 331 }, { "epoch": 0.6314788397527342, "grad_norm": 0.40796539187431335, "learning_rate": 9.792315020641473e-05, "loss": 0.371, "step": 332 }, { "epoch": 0.6333808844507846, "grad_norm": 0.4358294904232025, "learning_rate": 9.791679898380439e-05, "loss": 0.4052, "step": 333 }, { "epoch": 0.635282929148835, "grad_norm": 0.39615437388420105, "learning_rate": 9.791044776119404e-05, "loss": 0.3686, "step": 334 }, { "epoch": 0.6371849738468854, "grad_norm": 0.32977715134620667, "learning_rate": 9.790409653858368e-05, "loss": 0.4404, "step": 335 }, { "epoch": 0.6390870185449358, "grad_norm": 0.38361093401908875, "learning_rate": 9.789774531597333e-05, "loss": 0.3709, "step": 336 }, { "epoch": 0.6409890632429862, "grad_norm": 0.40280988812446594, "learning_rate": 9.789139409336298e-05, "loss": 0.3322, "step": 337 }, { "epoch": 0.6428911079410367, "grad_norm": 0.3682766854763031, "learning_rate": 9.788504287075262e-05, "loss": 0.4144, "step": 338 }, { "epoch": 0.644793152639087, "grad_norm": 0.39864271879196167, "learning_rate": 9.787869164814227e-05, "loss": 0.4404, "step": 339 }, { "epoch": 0.6466951973371374, "grad_norm": 0.3244321048259735, "learning_rate": 9.787234042553192e-05, "loss": 0.3541, "step": 340 }, { "epoch": 0.6485972420351879, "grad_norm": 0.323403924703598, "learning_rate": 9.786598920292157e-05, "loss": 0.3374, "step": 341 }, { "epoch": 0.6504992867332382, "grad_norm": 0.3881044387817383, "learning_rate": 9.785963798031121e-05, "loss": 0.4415, "step": 342 }, { "epoch": 0.6524013314312886, "grad_norm": 0.35189467668533325, "learning_rate": 9.785328675770086e-05, "loss": 0.401, "step": 343 }, { "epoch": 0.6543033761293391, "grad_norm": 0.3553767800331116, "learning_rate": 9.784693553509052e-05, "loss": 0.456, "step": 344 }, { "epoch": 0.6562054208273894, "grad_norm": 0.3302605152130127, "learning_rate": 9.784058431248015e-05, "loss": 0.472, "step": 345 }, { "epoch": 0.6581074655254399, "grad_norm": 0.4526873826980591, "learning_rate": 9.78342330898698e-05, "loss": 0.3908, "step": 346 }, { "epoch": 0.6600095102234903, "grad_norm": 0.3232348561286926, "learning_rate": 9.782788186725946e-05, "loss": 0.3421, "step": 347 }, { "epoch": 0.6619115549215406, "grad_norm": 0.38508203625679016, "learning_rate": 9.78215306446491e-05, "loss": 0.4093, "step": 348 }, { "epoch": 0.6638135996195911, "grad_norm": 0.3187748193740845, "learning_rate": 9.781517942203875e-05, "loss": 0.4319, "step": 349 }, { "epoch": 0.6657156443176415, "grad_norm": 0.2614807188510895, "learning_rate": 9.78088281994284e-05, "loss": 0.314, "step": 350 }, { "epoch": 0.6676176890156919, "grad_norm": 0.40218180418014526, "learning_rate": 9.780247697681805e-05, "loss": 0.4404, "step": 351 }, { "epoch": 0.6695197337137423, "grad_norm": 0.4016517996788025, "learning_rate": 9.779612575420769e-05, "loss": 0.5063, "step": 352 }, { "epoch": 0.6714217784117926, "grad_norm": 0.3333278000354767, "learning_rate": 9.778977453159733e-05, "loss": 0.2966, "step": 353 }, { "epoch": 0.6733238231098431, "grad_norm": 0.4535547196865082, "learning_rate": 9.778342330898699e-05, "loss": 0.4077, "step": 354 }, { "epoch": 0.6752258678078935, "grad_norm": 0.4180653393268585, "learning_rate": 9.777707208637663e-05, "loss": 0.4554, "step": 355 }, { "epoch": 0.6771279125059438, "grad_norm": 0.43454670906066895, "learning_rate": 9.777072086376627e-05, "loss": 0.4403, "step": 356 }, { "epoch": 0.6790299572039943, "grad_norm": 0.45290321111679077, "learning_rate": 9.776436964115594e-05, "loss": 0.4037, "step": 357 }, { "epoch": 0.6809320019020447, "grad_norm": 0.34165212512016296, "learning_rate": 9.775801841854557e-05, "loss": 0.3044, "step": 358 }, { "epoch": 0.6828340466000951, "grad_norm": 0.435138463973999, "learning_rate": 9.775166719593523e-05, "loss": 0.4293, "step": 359 }, { "epoch": 0.6847360912981455, "grad_norm": 0.36061882972717285, "learning_rate": 9.774531597332486e-05, "loss": 0.4052, "step": 360 }, { "epoch": 0.6866381359961959, "grad_norm": 0.4023354947566986, "learning_rate": 9.773896475071452e-05, "loss": 0.4232, "step": 361 }, { "epoch": 0.6885401806942463, "grad_norm": 0.39200109243392944, "learning_rate": 9.773261352810417e-05, "loss": 0.3882, "step": 362 }, { "epoch": 0.6904422253922967, "grad_norm": 0.34504035115242004, "learning_rate": 9.77262623054938e-05, "loss": 0.4063, "step": 363 }, { "epoch": 0.6923442700903472, "grad_norm": 0.31081900000572205, "learning_rate": 9.771991108288346e-05, "loss": 0.251, "step": 364 }, { "epoch": 0.6942463147883975, "grad_norm": 0.3800300061702728, "learning_rate": 9.771355986027311e-05, "loss": 0.3722, "step": 365 }, { "epoch": 0.6961483594864479, "grad_norm": 0.3476494550704956, "learning_rate": 9.770720863766275e-05, "loss": 0.382, "step": 366 }, { "epoch": 0.6980504041844984, "grad_norm": 0.38069918751716614, "learning_rate": 9.77008574150524e-05, "loss": 0.4329, "step": 367 }, { "epoch": 0.6999524488825487, "grad_norm": 0.4034759998321533, "learning_rate": 9.769450619244205e-05, "loss": 0.4112, "step": 368 }, { "epoch": 0.7018544935805991, "grad_norm": 0.4232093393802643, "learning_rate": 9.76881549698317e-05, "loss": 0.4524, "step": 369 }, { "epoch": 0.7037565382786496, "grad_norm": 0.40627321600914, "learning_rate": 9.768180374722134e-05, "loss": 0.388, "step": 370 }, { "epoch": 0.7056585829766999, "grad_norm": 0.41021519899368286, "learning_rate": 9.767545252461099e-05, "loss": 0.3741, "step": 371 }, { "epoch": 0.7075606276747504, "grad_norm": 0.3615809679031372, "learning_rate": 9.766910130200065e-05, "loss": 0.4432, "step": 372 }, { "epoch": 0.7094626723728008, "grad_norm": 0.3088645935058594, "learning_rate": 9.766275007939028e-05, "loss": 0.3343, "step": 373 }, { "epoch": 0.7113647170708511, "grad_norm": 0.380659818649292, "learning_rate": 9.765639885677994e-05, "loss": 0.4092, "step": 374 }, { "epoch": 0.7132667617689016, "grad_norm": 0.28462380170822144, "learning_rate": 9.765004763416959e-05, "loss": 0.31, "step": 375 }, { "epoch": 0.715168806466952, "grad_norm": 0.3215513229370117, "learning_rate": 9.764369641155923e-05, "loss": 0.4115, "step": 376 }, { "epoch": 0.7170708511650024, "grad_norm": 0.397651731967926, "learning_rate": 9.763734518894888e-05, "loss": 0.4369, "step": 377 }, { "epoch": 0.7189728958630528, "grad_norm": 0.31436121463775635, "learning_rate": 9.763099396633853e-05, "loss": 0.4339, "step": 378 }, { "epoch": 0.7208749405611032, "grad_norm": 0.4024806320667267, "learning_rate": 9.762464274372817e-05, "loss": 0.4252, "step": 379 }, { "epoch": 0.7227769852591536, "grad_norm": 0.37994107604026794, "learning_rate": 9.761829152111782e-05, "loss": 0.3483, "step": 380 }, { "epoch": 0.724679029957204, "grad_norm": 0.44616061449050903, "learning_rate": 9.761194029850747e-05, "loss": 0.3809, "step": 381 }, { "epoch": 0.7265810746552543, "grad_norm": 0.3396744728088379, "learning_rate": 9.760558907589712e-05, "loss": 0.3382, "step": 382 }, { "epoch": 0.7284831193533048, "grad_norm": 0.334839791059494, "learning_rate": 9.759923785328676e-05, "loss": 0.3465, "step": 383 }, { "epoch": 0.7303851640513552, "grad_norm": 0.417478084564209, "learning_rate": 9.75928866306764e-05, "loss": 0.3191, "step": 384 }, { "epoch": 0.7322872087494056, "grad_norm": 0.30790823698043823, "learning_rate": 9.758653540806606e-05, "loss": 0.3139, "step": 385 }, { "epoch": 0.734189253447456, "grad_norm": 0.4008057415485382, "learning_rate": 9.75801841854557e-05, "loss": 0.419, "step": 386 }, { "epoch": 0.7360912981455064, "grad_norm": 0.42966723442077637, "learning_rate": 9.757383296284535e-05, "loss": 0.3634, "step": 387 }, { "epoch": 0.7379933428435568, "grad_norm": 0.33789002895355225, "learning_rate": 9.7567481740235e-05, "loss": 0.3966, "step": 388 }, { "epoch": 0.7398953875416072, "grad_norm": 0.35244229435920715, "learning_rate": 9.756113051762464e-05, "loss": 0.3991, "step": 389 }, { "epoch": 0.7417974322396577, "grad_norm": 0.3581864833831787, "learning_rate": 9.75547792950143e-05, "loss": 0.347, "step": 390 }, { "epoch": 0.743699476937708, "grad_norm": 0.30788975954055786, "learning_rate": 9.754842807240394e-05, "loss": 0.3485, "step": 391 }, { "epoch": 0.7456015216357584, "grad_norm": 0.5155593156814575, "learning_rate": 9.754207684979359e-05, "loss": 0.4793, "step": 392 }, { "epoch": 0.7475035663338089, "grad_norm": 0.4183029532432556, "learning_rate": 9.753572562718324e-05, "loss": 0.4064, "step": 393 }, { "epoch": 0.7494056110318592, "grad_norm": 0.36132046580314636, "learning_rate": 9.752937440457288e-05, "loss": 0.3539, "step": 394 }, { "epoch": 0.7513076557299097, "grad_norm": 0.4269217252731323, "learning_rate": 9.752302318196254e-05, "loss": 0.4358, "step": 395 }, { "epoch": 0.7532097004279601, "grad_norm": 0.38872459530830383, "learning_rate": 9.751667195935218e-05, "loss": 0.3238, "step": 396 }, { "epoch": 0.7551117451260104, "grad_norm": 0.4668743312358856, "learning_rate": 9.751032073674182e-05, "loss": 0.4218, "step": 397 }, { "epoch": 0.7570137898240609, "grad_norm": 0.3817143738269806, "learning_rate": 9.750396951413147e-05, "loss": 0.4332, "step": 398 }, { "epoch": 0.7589158345221113, "grad_norm": 0.4089401960372925, "learning_rate": 9.749761829152112e-05, "loss": 0.319, "step": 399 }, { "epoch": 0.7608178792201616, "grad_norm": 0.36516866087913513, "learning_rate": 9.749126706891077e-05, "loss": 0.3858, "step": 400 }, { "epoch": 0.7627199239182121, "grad_norm": 0.3843027949333191, "learning_rate": 9.748491584630041e-05, "loss": 0.4682, "step": 401 }, { "epoch": 0.7646219686162625, "grad_norm": 0.36987295746803284, "learning_rate": 9.747856462369006e-05, "loss": 0.3328, "step": 402 }, { "epoch": 0.7665240133143129, "grad_norm": 0.4972301721572876, "learning_rate": 9.747221340107972e-05, "loss": 0.3939, "step": 403 }, { "epoch": 0.7684260580123633, "grad_norm": 0.4319972097873688, "learning_rate": 9.746586217846935e-05, "loss": 0.3918, "step": 404 }, { "epoch": 0.7703281027104137, "grad_norm": 0.364364892244339, "learning_rate": 9.7459510955859e-05, "loss": 0.3871, "step": 405 }, { "epoch": 0.7722301474084641, "grad_norm": 0.43767908215522766, "learning_rate": 9.745315973324866e-05, "loss": 0.3973, "step": 406 }, { "epoch": 0.7741321921065145, "grad_norm": 0.44734928011894226, "learning_rate": 9.74468085106383e-05, "loss": 0.3884, "step": 407 }, { "epoch": 0.776034236804565, "grad_norm": 0.3817954957485199, "learning_rate": 9.744045728802795e-05, "loss": 0.3647, "step": 408 }, { "epoch": 0.7779362815026153, "grad_norm": 0.3619462251663208, "learning_rate": 9.74341060654176e-05, "loss": 0.4994, "step": 409 }, { "epoch": 0.7798383262006657, "grad_norm": 0.38225993514060974, "learning_rate": 9.742775484280724e-05, "loss": 0.4116, "step": 410 }, { "epoch": 0.7817403708987162, "grad_norm": 0.39784252643585205, "learning_rate": 9.742140362019689e-05, "loss": 0.3729, "step": 411 }, { "epoch": 0.7836424155967665, "grad_norm": 0.3188072443008423, "learning_rate": 9.741505239758654e-05, "loss": 0.3767, "step": 412 }, { "epoch": 0.785544460294817, "grad_norm": 0.4509223401546478, "learning_rate": 9.74087011749762e-05, "loss": 0.4595, "step": 413 }, { "epoch": 0.7874465049928673, "grad_norm": 0.40249937772750854, "learning_rate": 9.740234995236583e-05, "loss": 0.3761, "step": 414 }, { "epoch": 0.7893485496909177, "grad_norm": 0.3387410044670105, "learning_rate": 9.739599872975547e-05, "loss": 0.401, "step": 415 }, { "epoch": 0.7912505943889682, "grad_norm": 0.47670629620552063, "learning_rate": 9.738964750714514e-05, "loss": 0.3656, "step": 416 }, { "epoch": 0.7931526390870185, "grad_norm": 0.37239211797714233, "learning_rate": 9.738329628453477e-05, "loss": 0.4885, "step": 417 }, { "epoch": 0.7950546837850689, "grad_norm": 0.3347351849079132, "learning_rate": 9.737694506192443e-05, "loss": 0.291, "step": 418 }, { "epoch": 0.7969567284831194, "grad_norm": 0.3727717399597168, "learning_rate": 9.737059383931408e-05, "loss": 0.3506, "step": 419 }, { "epoch": 0.7988587731811697, "grad_norm": 0.3866841793060303, "learning_rate": 9.736424261670372e-05, "loss": 0.4355, "step": 420 }, { "epoch": 0.8007608178792202, "grad_norm": 0.39670372009277344, "learning_rate": 9.735789139409337e-05, "loss": 0.4041, "step": 421 }, { "epoch": 0.8026628625772706, "grad_norm": 0.35946765542030334, "learning_rate": 9.7351540171483e-05, "loss": 0.3378, "step": 422 }, { "epoch": 0.8045649072753209, "grad_norm": 0.24180381000041962, "learning_rate": 9.734518894887267e-05, "loss": 0.3133, "step": 423 }, { "epoch": 0.8064669519733714, "grad_norm": 0.4238085150718689, "learning_rate": 9.733883772626231e-05, "loss": 0.3968, "step": 424 }, { "epoch": 0.8083689966714218, "grad_norm": 0.35451412200927734, "learning_rate": 9.733248650365195e-05, "loss": 0.3456, "step": 425 }, { "epoch": 0.8102710413694721, "grad_norm": 0.49277418851852417, "learning_rate": 9.732613528104161e-05, "loss": 0.3916, "step": 426 }, { "epoch": 0.8121730860675226, "grad_norm": 0.34536874294281006, "learning_rate": 9.731978405843125e-05, "loss": 0.537, "step": 427 }, { "epoch": 0.814075130765573, "grad_norm": 0.3002311885356903, "learning_rate": 9.731343283582089e-05, "loss": 0.3842, "step": 428 }, { "epoch": 0.8159771754636234, "grad_norm": 0.29766812920570374, "learning_rate": 9.730708161321054e-05, "loss": 0.2979, "step": 429 }, { "epoch": 0.8178792201616738, "grad_norm": 0.34347230195999146, "learning_rate": 9.73007303906002e-05, "loss": 0.3996, "step": 430 }, { "epoch": 0.8197812648597242, "grad_norm": 0.42430102825164795, "learning_rate": 9.729437916798985e-05, "loss": 0.4677, "step": 431 }, { "epoch": 0.8216833095577746, "grad_norm": 0.3375668227672577, "learning_rate": 9.728802794537948e-05, "loss": 0.4257, "step": 432 }, { "epoch": 0.823585354255825, "grad_norm": 0.3718586266040802, "learning_rate": 9.728167672276914e-05, "loss": 0.3555, "step": 433 }, { "epoch": 0.8254873989538755, "grad_norm": 0.4310496151447296, "learning_rate": 9.727532550015879e-05, "loss": 0.4026, "step": 434 }, { "epoch": 0.8273894436519258, "grad_norm": 0.43832001090049744, "learning_rate": 9.726897427754843e-05, "loss": 0.4421, "step": 435 }, { "epoch": 0.8292914883499762, "grad_norm": 0.42209911346435547, "learning_rate": 9.726262305493808e-05, "loss": 0.397, "step": 436 }, { "epoch": 0.8311935330480267, "grad_norm": 0.4297396242618561, "learning_rate": 9.725627183232773e-05, "loss": 0.4244, "step": 437 }, { "epoch": 0.833095577746077, "grad_norm": 0.40587079524993896, "learning_rate": 9.724992060971737e-05, "loss": 0.3753, "step": 438 }, { "epoch": 0.8349976224441275, "grad_norm": 0.4127040505409241, "learning_rate": 9.724356938710702e-05, "loss": 0.3926, "step": 439 }, { "epoch": 0.8368996671421779, "grad_norm": 0.3734678030014038, "learning_rate": 9.723721816449667e-05, "loss": 0.3338, "step": 440 }, { "epoch": 0.8388017118402282, "grad_norm": 0.38152286410331726, "learning_rate": 9.723086694188632e-05, "loss": 0.3893, "step": 441 }, { "epoch": 0.8407037565382787, "grad_norm": 0.4234791398048401, "learning_rate": 9.722451571927596e-05, "loss": 0.3104, "step": 442 }, { "epoch": 0.842605801236329, "grad_norm": 0.49204525351524353, "learning_rate": 9.721816449666561e-05, "loss": 0.3698, "step": 443 }, { "epoch": 0.8445078459343794, "grad_norm": 0.40980932116508484, "learning_rate": 9.721181327405527e-05, "loss": 0.3901, "step": 444 }, { "epoch": 0.8464098906324299, "grad_norm": 0.3330426514148712, "learning_rate": 9.72054620514449e-05, "loss": 0.3118, "step": 445 }, { "epoch": 0.8483119353304802, "grad_norm": 0.3042624890804291, "learning_rate": 9.719911082883456e-05, "loss": 0.3003, "step": 446 }, { "epoch": 0.8502139800285307, "grad_norm": 0.34576475620269775, "learning_rate": 9.719275960622421e-05, "loss": 0.3332, "step": 447 }, { "epoch": 0.8521160247265811, "grad_norm": 0.2980082035064697, "learning_rate": 9.718640838361385e-05, "loss": 0.3285, "step": 448 }, { "epoch": 0.8540180694246314, "grad_norm": 0.31439459323883057, "learning_rate": 9.71800571610035e-05, "loss": 0.3178, "step": 449 }, { "epoch": 0.8559201141226819, "grad_norm": 0.37447845935821533, "learning_rate": 9.717370593839315e-05, "loss": 0.3861, "step": 450 }, { "epoch": 0.8578221588207323, "grad_norm": 0.4261024594306946, "learning_rate": 9.716735471578279e-05, "loss": 0.4377, "step": 451 }, { "epoch": 0.8597242035187826, "grad_norm": 0.3328630328178406, "learning_rate": 9.716100349317244e-05, "loss": 0.2791, "step": 452 }, { "epoch": 0.8616262482168331, "grad_norm": 0.41943463683128357, "learning_rate": 9.715465227056209e-05, "loss": 0.4693, "step": 453 }, { "epoch": 0.8635282929148835, "grad_norm": 0.4295640289783478, "learning_rate": 9.714830104795174e-05, "loss": 0.4105, "step": 454 }, { "epoch": 0.8654303376129339, "grad_norm": 0.3548508882522583, "learning_rate": 9.714194982534138e-05, "loss": 0.3024, "step": 455 }, { "epoch": 0.8673323823109843, "grad_norm": 0.5577777624130249, "learning_rate": 9.713559860273102e-05, "loss": 0.3961, "step": 456 }, { "epoch": 0.8692344270090347, "grad_norm": 0.4119040071964264, "learning_rate": 9.712924738012069e-05, "loss": 0.3143, "step": 457 }, { "epoch": 0.8711364717070851, "grad_norm": 0.40272560715675354, "learning_rate": 9.712289615751032e-05, "loss": 0.3452, "step": 458 }, { "epoch": 0.8730385164051355, "grad_norm": 0.456386536359787, "learning_rate": 9.711654493489998e-05, "loss": 0.403, "step": 459 }, { "epoch": 0.874940561103186, "grad_norm": 0.3982544541358948, "learning_rate": 9.711019371228963e-05, "loss": 0.4498, "step": 460 }, { "epoch": 0.8768426058012363, "grad_norm": 0.29361623525619507, "learning_rate": 9.710384248967927e-05, "loss": 0.3724, "step": 461 }, { "epoch": 0.8787446504992867, "grad_norm": 0.3854773938655853, "learning_rate": 9.709749126706892e-05, "loss": 0.4162, "step": 462 }, { "epoch": 0.8806466951973372, "grad_norm": 0.3760225474834442, "learning_rate": 9.709114004445856e-05, "loss": 0.4335, "step": 463 }, { "epoch": 0.8825487398953875, "grad_norm": 0.4936290383338928, "learning_rate": 9.708478882184821e-05, "loss": 0.3522, "step": 464 }, { "epoch": 0.884450784593438, "grad_norm": 0.3584468364715576, "learning_rate": 9.707843759923786e-05, "loss": 0.552, "step": 465 }, { "epoch": 0.8863528292914884, "grad_norm": 0.3523949086666107, "learning_rate": 9.70720863766275e-05, "loss": 0.3498, "step": 466 }, { "epoch": 0.8882548739895387, "grad_norm": 0.42082804441452026, "learning_rate": 9.706573515401716e-05, "loss": 0.4863, "step": 467 }, { "epoch": 0.8901569186875892, "grad_norm": 0.4284763038158417, "learning_rate": 9.70593839314068e-05, "loss": 0.4737, "step": 468 }, { "epoch": 0.8920589633856396, "grad_norm": 0.3609261214733124, "learning_rate": 9.705303270879644e-05, "loss": 0.3208, "step": 469 }, { "epoch": 0.89396100808369, "grad_norm": 0.31832849979400635, "learning_rate": 9.704668148618609e-05, "loss": 0.2545, "step": 470 }, { "epoch": 0.8958630527817404, "grad_norm": 0.38202738761901855, "learning_rate": 9.704033026357574e-05, "loss": 0.3952, "step": 471 }, { "epoch": 0.8977650974797908, "grad_norm": 0.347649484872818, "learning_rate": 9.70339790409654e-05, "loss": 0.3776, "step": 472 }, { "epoch": 0.8996671421778412, "grad_norm": 0.41626760363578796, "learning_rate": 9.702762781835503e-05, "loss": 0.4152, "step": 473 }, { "epoch": 0.9015691868758916, "grad_norm": 0.4042579233646393, "learning_rate": 9.702127659574469e-05, "loss": 0.3813, "step": 474 }, { "epoch": 0.9034712315739419, "grad_norm": 0.38196825981140137, "learning_rate": 9.701492537313434e-05, "loss": 0.4398, "step": 475 }, { "epoch": 0.9053732762719924, "grad_norm": 0.3867753744125366, "learning_rate": 9.700857415052398e-05, "loss": 0.4995, "step": 476 }, { "epoch": 0.9072753209700428, "grad_norm": 0.34228166937828064, "learning_rate": 9.700222292791363e-05, "loss": 0.284, "step": 477 }, { "epoch": 0.9091773656680932, "grad_norm": 0.3962937593460083, "learning_rate": 9.699587170530328e-05, "loss": 0.3501, "step": 478 }, { "epoch": 0.9110794103661436, "grad_norm": 0.3665268123149872, "learning_rate": 9.698952048269292e-05, "loss": 0.2737, "step": 479 }, { "epoch": 0.912981455064194, "grad_norm": 0.3775653839111328, "learning_rate": 9.698316926008257e-05, "loss": 0.3173, "step": 480 }, { "epoch": 0.9148834997622444, "grad_norm": 0.3584369421005249, "learning_rate": 9.697681803747222e-05, "loss": 0.3055, "step": 481 }, { "epoch": 0.9167855444602948, "grad_norm": 0.3510100245475769, "learning_rate": 9.697046681486186e-05, "loss": 0.3278, "step": 482 }, { "epoch": 0.9186875891583453, "grad_norm": 0.33394765853881836, "learning_rate": 9.696411559225151e-05, "loss": 0.2954, "step": 483 }, { "epoch": 0.9205896338563956, "grad_norm": 0.437014102935791, "learning_rate": 9.695776436964116e-05, "loss": 0.3797, "step": 484 }, { "epoch": 0.922491678554446, "grad_norm": 0.37421244382858276, "learning_rate": 9.695141314703082e-05, "loss": 0.3521, "step": 485 }, { "epoch": 0.9243937232524965, "grad_norm": 0.37696099281311035, "learning_rate": 9.694506192442045e-05, "loss": 0.3455, "step": 486 }, { "epoch": 0.9262957679505468, "grad_norm": 0.5452500581741333, "learning_rate": 9.693871070181009e-05, "loss": 0.3624, "step": 487 }, { "epoch": 0.9281978126485972, "grad_norm": 0.4049624502658844, "learning_rate": 9.693235947919976e-05, "loss": 0.4017, "step": 488 }, { "epoch": 0.9300998573466477, "grad_norm": 0.32757866382598877, "learning_rate": 9.69260082565894e-05, "loss": 0.3536, "step": 489 }, { "epoch": 0.932001902044698, "grad_norm": 0.298367977142334, "learning_rate": 9.691965703397905e-05, "loss": 0.3374, "step": 490 }, { "epoch": 0.9339039467427485, "grad_norm": 0.22035005688667297, "learning_rate": 9.69133058113687e-05, "loss": 0.2855, "step": 491 }, { "epoch": 0.9358059914407989, "grad_norm": 0.43000441789627075, "learning_rate": 9.690695458875834e-05, "loss": 0.4544, "step": 492 }, { "epoch": 0.9377080361388492, "grad_norm": 0.28024253249168396, "learning_rate": 9.690060336614799e-05, "loss": 0.308, "step": 493 }, { "epoch": 0.9396100808368997, "grad_norm": 0.53145432472229, "learning_rate": 9.689425214353763e-05, "loss": 0.4569, "step": 494 }, { "epoch": 0.9415121255349501, "grad_norm": 0.4006127715110779, "learning_rate": 9.688790092092729e-05, "loss": 0.419, "step": 495 }, { "epoch": 0.9434141702330004, "grad_norm": 0.4057261645793915, "learning_rate": 9.688154969831693e-05, "loss": 0.3553, "step": 496 }, { "epoch": 0.9453162149310509, "grad_norm": 0.40803465247154236, "learning_rate": 9.687519847570657e-05, "loss": 0.3735, "step": 497 }, { "epoch": 0.9472182596291013, "grad_norm": 0.34222155809402466, "learning_rate": 9.686884725309623e-05, "loss": 0.367, "step": 498 }, { "epoch": 0.9491203043271517, "grad_norm": 0.40403544902801514, "learning_rate": 9.686249603048587e-05, "loss": 0.416, "step": 499 }, { "epoch": 0.9510223490252021, "grad_norm": 0.33636951446533203, "learning_rate": 9.685614480787551e-05, "loss": 0.3423, "step": 500 }, { "epoch": 0.9529243937232525, "grad_norm": 0.3394258916378021, "learning_rate": 9.684979358526516e-05, "loss": 0.3282, "step": 501 }, { "epoch": 0.9548264384213029, "grad_norm": 0.3682473599910736, "learning_rate": 9.684344236265482e-05, "loss": 0.406, "step": 502 }, { "epoch": 0.9567284831193533, "grad_norm": 0.35073623061180115, "learning_rate": 9.683709114004447e-05, "loss": 0.376, "step": 503 }, { "epoch": 0.9586305278174037, "grad_norm": 0.36000022292137146, "learning_rate": 9.68307399174341e-05, "loss": 0.3969, "step": 504 }, { "epoch": 0.9605325725154541, "grad_norm": 0.361158162355423, "learning_rate": 9.682438869482376e-05, "loss": 0.347, "step": 505 }, { "epoch": 0.9624346172135045, "grad_norm": 0.3075178265571594, "learning_rate": 9.681803747221341e-05, "loss": 0.4362, "step": 506 }, { "epoch": 0.9643366619115549, "grad_norm": 0.30084747076034546, "learning_rate": 9.681168624960305e-05, "loss": 0.3563, "step": 507 }, { "epoch": 0.9662387066096053, "grad_norm": 0.3221014440059662, "learning_rate": 9.68053350269927e-05, "loss": 0.3366, "step": 508 }, { "epoch": 0.9681407513076558, "grad_norm": 0.36464688181877136, "learning_rate": 9.679898380438235e-05, "loss": 0.3992, "step": 509 }, { "epoch": 0.9700427960057061, "grad_norm": 0.32443803548812866, "learning_rate": 9.679263258177199e-05, "loss": 0.3293, "step": 510 }, { "epoch": 0.9719448407037565, "grad_norm": 0.3689454197883606, "learning_rate": 9.678628135916164e-05, "loss": 0.3546, "step": 511 }, { "epoch": 0.973846885401807, "grad_norm": 0.3754975199699402, "learning_rate": 9.677993013655129e-05, "loss": 0.3856, "step": 512 }, { "epoch": 0.9757489300998573, "grad_norm": 0.3642953634262085, "learning_rate": 9.677357891394094e-05, "loss": 0.4326, "step": 513 }, { "epoch": 0.9776509747979077, "grad_norm": 0.43278223276138306, "learning_rate": 9.676722769133058e-05, "loss": 0.3964, "step": 514 }, { "epoch": 0.9795530194959582, "grad_norm": 0.43771886825561523, "learning_rate": 9.676087646872023e-05, "loss": 0.3861, "step": 515 }, { "epoch": 0.9814550641940085, "grad_norm": 0.34908977150917053, "learning_rate": 9.675452524610989e-05, "loss": 0.3981, "step": 516 }, { "epoch": 0.983357108892059, "grad_norm": 0.35733312368392944, "learning_rate": 9.674817402349953e-05, "loss": 0.3636, "step": 517 }, { "epoch": 0.9852591535901094, "grad_norm": 0.3636298179626465, "learning_rate": 9.674182280088918e-05, "loss": 0.4336, "step": 518 }, { "epoch": 0.9871611982881597, "grad_norm": 0.32771605253219604, "learning_rate": 9.673547157827883e-05, "loss": 0.3481, "step": 519 }, { "epoch": 0.9890632429862102, "grad_norm": 0.40213117003440857, "learning_rate": 9.672912035566847e-05, "loss": 0.3707, "step": 520 }, { "epoch": 0.9909652876842606, "grad_norm": 0.3386654257774353, "learning_rate": 9.672276913305812e-05, "loss": 0.3384, "step": 521 }, { "epoch": 0.992867332382311, "grad_norm": 0.3965696096420288, "learning_rate": 9.671641791044777e-05, "loss": 0.3595, "step": 522 }, { "epoch": 0.9947693770803614, "grad_norm": 0.38238459825515747, "learning_rate": 9.671006668783741e-05, "loss": 0.3714, "step": 523 }, { "epoch": 0.9966714217784118, "grad_norm": 0.3248405456542969, "learning_rate": 9.670371546522706e-05, "loss": 0.394, "step": 524 }, { "epoch": 0.9985734664764622, "grad_norm": 0.3902266323566437, "learning_rate": 9.66973642426167e-05, "loss": 0.4115, "step": 525 }, { "epoch": 1.0004755111745125, "grad_norm": 0.4164808392524719, "learning_rate": 9.669101302000636e-05, "loss": 0.2972, "step": 526 }, { "epoch": 1.002377555872563, "grad_norm": 0.33123117685317993, "learning_rate": 9.6684661797396e-05, "loss": 0.3211, "step": 527 }, { "epoch": 1.0042796005706134, "grad_norm": 0.322803258895874, "learning_rate": 9.667831057478564e-05, "loss": 0.3424, "step": 528 }, { "epoch": 1.0061816452686638, "grad_norm": 0.29135918617248535, "learning_rate": 9.66719593521753e-05, "loss": 0.2882, "step": 529 }, { "epoch": 1.0080836899667143, "grad_norm": 0.3367983400821686, "learning_rate": 9.666560812956494e-05, "loss": 0.2776, "step": 530 }, { "epoch": 1.0099857346647647, "grad_norm": 0.304070383310318, "learning_rate": 9.66592569069546e-05, "loss": 0.249, "step": 531 }, { "epoch": 1.011887779362815, "grad_norm": 0.3832727372646332, "learning_rate": 9.665290568434423e-05, "loss": 0.3118, "step": 532 }, { "epoch": 1.0137898240608654, "grad_norm": 0.3365418612957001, "learning_rate": 9.664655446173389e-05, "loss": 0.197, "step": 533 }, { "epoch": 1.0156918687589158, "grad_norm": 0.4367881119251251, "learning_rate": 9.664020323912354e-05, "loss": 0.3121, "step": 534 }, { "epoch": 1.0175939134569663, "grad_norm": 0.43158653378486633, "learning_rate": 9.663385201651318e-05, "loss": 0.3543, "step": 535 }, { "epoch": 1.0194959581550167, "grad_norm": 0.43556904792785645, "learning_rate": 9.662750079390283e-05, "loss": 0.3121, "step": 536 }, { "epoch": 1.0213980028530671, "grad_norm": 0.31828534603118896, "learning_rate": 9.662114957129248e-05, "loss": 0.24, "step": 537 }, { "epoch": 1.0233000475511174, "grad_norm": 0.3935330808162689, "learning_rate": 9.661479834868212e-05, "loss": 0.2548, "step": 538 }, { "epoch": 1.0252020922491678, "grad_norm": 0.3288602828979492, "learning_rate": 9.660844712607177e-05, "loss": 0.2219, "step": 539 }, { "epoch": 1.0271041369472182, "grad_norm": 0.36314669251441956, "learning_rate": 9.660209590346142e-05, "loss": 0.2817, "step": 540 }, { "epoch": 1.0290061816452687, "grad_norm": 0.3528159558773041, "learning_rate": 9.659574468085106e-05, "loss": 0.2989, "step": 541 }, { "epoch": 1.0309082263433191, "grad_norm": 0.3235621750354767, "learning_rate": 9.658939345824071e-05, "loss": 0.2443, "step": 542 }, { "epoch": 1.0328102710413696, "grad_norm": 0.3819037675857544, "learning_rate": 9.658304223563036e-05, "loss": 0.3494, "step": 543 }, { "epoch": 1.0347123157394198, "grad_norm": 0.3885079324245453, "learning_rate": 9.657669101302002e-05, "loss": 0.3033, "step": 544 }, { "epoch": 1.0366143604374702, "grad_norm": 0.3339099884033203, "learning_rate": 9.657033979040965e-05, "loss": 0.2673, "step": 545 }, { "epoch": 1.0385164051355207, "grad_norm": 0.37009695172309875, "learning_rate": 9.65639885677993e-05, "loss": 0.3715, "step": 546 }, { "epoch": 1.0404184498335711, "grad_norm": 0.3462003171443939, "learning_rate": 9.655763734518896e-05, "loss": 0.2664, "step": 547 }, { "epoch": 1.0423204945316216, "grad_norm": 0.3916226327419281, "learning_rate": 9.65512861225786e-05, "loss": 0.3804, "step": 548 }, { "epoch": 1.044222539229672, "grad_norm": 0.3801763951778412, "learning_rate": 9.654493489996825e-05, "loss": 0.2672, "step": 549 }, { "epoch": 1.0461245839277222, "grad_norm": 0.37406545877456665, "learning_rate": 9.65385836773579e-05, "loss": 0.6203, "step": 550 }, { "epoch": 1.0480266286257727, "grad_norm": 0.43677276372909546, "learning_rate": 9.653223245474754e-05, "loss": 0.3866, "step": 551 }, { "epoch": 1.0499286733238231, "grad_norm": 0.26939406991004944, "learning_rate": 9.652588123213719e-05, "loss": 0.2169, "step": 552 }, { "epoch": 1.0518307180218736, "grad_norm": 0.41554608941078186, "learning_rate": 9.651953000952684e-05, "loss": 0.3705, "step": 553 }, { "epoch": 1.053732762719924, "grad_norm": 0.3090009391307831, "learning_rate": 9.651317878691648e-05, "loss": 0.2471, "step": 554 }, { "epoch": 1.0556348074179742, "grad_norm": 0.36705514788627625, "learning_rate": 9.650682756430613e-05, "loss": 0.2764, "step": 555 }, { "epoch": 1.0575368521160247, "grad_norm": 0.39900127053260803, "learning_rate": 9.650047634169578e-05, "loss": 0.2836, "step": 556 }, { "epoch": 1.059438896814075, "grad_norm": 0.31405431032180786, "learning_rate": 9.649412511908544e-05, "loss": 0.2464, "step": 557 }, { "epoch": 1.0613409415121255, "grad_norm": 0.39795488119125366, "learning_rate": 9.648777389647507e-05, "loss": 0.283, "step": 558 }, { "epoch": 1.063242986210176, "grad_norm": 0.36270254850387573, "learning_rate": 9.648142267386471e-05, "loss": 0.26, "step": 559 }, { "epoch": 1.0651450309082264, "grad_norm": 0.42650437355041504, "learning_rate": 9.647507145125438e-05, "loss": 0.2693, "step": 560 }, { "epoch": 1.0670470756062767, "grad_norm": 0.3075532019138336, "learning_rate": 9.646872022864402e-05, "loss": 0.2941, "step": 561 }, { "epoch": 1.068949120304327, "grad_norm": 0.4509059190750122, "learning_rate": 9.646236900603367e-05, "loss": 0.3525, "step": 562 }, { "epoch": 1.0708511650023775, "grad_norm": 0.3420471251010895, "learning_rate": 9.645601778342332e-05, "loss": 0.2601, "step": 563 }, { "epoch": 1.072753209700428, "grad_norm": 0.422493577003479, "learning_rate": 9.644966656081296e-05, "loss": 0.3441, "step": 564 }, { "epoch": 1.0746552543984784, "grad_norm": 0.3960445821285248, "learning_rate": 9.644331533820261e-05, "loss": 0.3049, "step": 565 }, { "epoch": 1.0765572990965289, "grad_norm": 0.32367074489593506, "learning_rate": 9.643696411559225e-05, "loss": 0.2694, "step": 566 }, { "epoch": 1.078459343794579, "grad_norm": 0.3480624258518219, "learning_rate": 9.643061289298191e-05, "loss": 0.2667, "step": 567 }, { "epoch": 1.0803613884926295, "grad_norm": 0.37603023648262024, "learning_rate": 9.642426167037155e-05, "loss": 0.2875, "step": 568 }, { "epoch": 1.08226343319068, "grad_norm": 0.391438752412796, "learning_rate": 9.641791044776119e-05, "loss": 0.2844, "step": 569 }, { "epoch": 1.0841654778887304, "grad_norm": 0.42726075649261475, "learning_rate": 9.641155922515086e-05, "loss": 0.3092, "step": 570 }, { "epoch": 1.0860675225867809, "grad_norm": 0.4007676839828491, "learning_rate": 9.64052080025405e-05, "loss": 0.2405, "step": 571 }, { "epoch": 1.0879695672848313, "grad_norm": 0.401592493057251, "learning_rate": 9.639885677993013e-05, "loss": 0.297, "step": 572 }, { "epoch": 1.0898716119828815, "grad_norm": 0.3883298635482788, "learning_rate": 9.639250555731978e-05, "loss": 0.3201, "step": 573 }, { "epoch": 1.091773656680932, "grad_norm": 0.41852253675460815, "learning_rate": 9.638615433470944e-05, "loss": 0.259, "step": 574 }, { "epoch": 1.0936757013789824, "grad_norm": 0.4559331238269806, "learning_rate": 9.637980311209909e-05, "loss": 0.3204, "step": 575 }, { "epoch": 1.0955777460770328, "grad_norm": 0.4163438379764557, "learning_rate": 9.637345188948873e-05, "loss": 0.267, "step": 576 }, { "epoch": 1.0974797907750833, "grad_norm": 0.38813936710357666, "learning_rate": 9.636710066687838e-05, "loss": 0.2653, "step": 577 }, { "epoch": 1.0993818354731335, "grad_norm": 0.373047798871994, "learning_rate": 9.636074944426803e-05, "loss": 0.2995, "step": 578 }, { "epoch": 1.101283880171184, "grad_norm": 0.39488789439201355, "learning_rate": 9.635439822165767e-05, "loss": 0.2972, "step": 579 }, { "epoch": 1.1031859248692344, "grad_norm": 0.37775856256484985, "learning_rate": 9.634804699904732e-05, "loss": 0.2833, "step": 580 }, { "epoch": 1.1050879695672848, "grad_norm": 0.3843298554420471, "learning_rate": 9.634169577643697e-05, "loss": 0.3413, "step": 581 }, { "epoch": 1.1069900142653353, "grad_norm": 0.3834189176559448, "learning_rate": 9.633534455382661e-05, "loss": 0.2792, "step": 582 }, { "epoch": 1.1088920589633857, "grad_norm": 0.37232789397239685, "learning_rate": 9.632899333121626e-05, "loss": 0.2724, "step": 583 }, { "epoch": 1.1107941036614362, "grad_norm": 0.2608899772167206, "learning_rate": 9.632264210860591e-05, "loss": 0.1966, "step": 584 }, { "epoch": 1.1126961483594864, "grad_norm": 0.2676723301410675, "learning_rate": 9.631629088599557e-05, "loss": 0.2149, "step": 585 }, { "epoch": 1.1145981930575368, "grad_norm": 0.40126022696495056, "learning_rate": 9.63099396633852e-05, "loss": 0.2937, "step": 586 }, { "epoch": 1.1165002377555873, "grad_norm": 0.3493163287639618, "learning_rate": 9.630358844077486e-05, "loss": 0.2461, "step": 587 }, { "epoch": 1.1184022824536377, "grad_norm": 0.39294591546058655, "learning_rate": 9.629723721816451e-05, "loss": 0.2922, "step": 588 }, { "epoch": 1.1203043271516882, "grad_norm": 0.3855053186416626, "learning_rate": 9.629088599555415e-05, "loss": 0.2541, "step": 589 }, { "epoch": 1.1222063718497384, "grad_norm": 0.3388477861881256, "learning_rate": 9.628453477294378e-05, "loss": 0.2234, "step": 590 }, { "epoch": 1.1241084165477888, "grad_norm": 0.3856431841850281, "learning_rate": 9.627818355033345e-05, "loss": 0.2836, "step": 591 }, { "epoch": 1.1260104612458393, "grad_norm": 0.39824768900871277, "learning_rate": 9.627183232772309e-05, "loss": 0.2562, "step": 592 }, { "epoch": 1.1279125059438897, "grad_norm": 0.44484448432922363, "learning_rate": 9.626548110511274e-05, "loss": 0.2685, "step": 593 }, { "epoch": 1.1298145506419401, "grad_norm": 0.4581182599067688, "learning_rate": 9.625912988250239e-05, "loss": 0.3208, "step": 594 }, { "epoch": 1.1317165953399906, "grad_norm": 0.3560565412044525, "learning_rate": 9.625277865989203e-05, "loss": 0.2834, "step": 595 }, { "epoch": 1.1336186400380408, "grad_norm": 0.4423635005950928, "learning_rate": 9.624642743728168e-05, "loss": 0.3154, "step": 596 }, { "epoch": 1.1355206847360912, "grad_norm": 0.3797377943992615, "learning_rate": 9.624007621467132e-05, "loss": 0.28, "step": 597 }, { "epoch": 1.1374227294341417, "grad_norm": 0.29780030250549316, "learning_rate": 9.623372499206099e-05, "loss": 0.2209, "step": 598 }, { "epoch": 1.1393247741321921, "grad_norm": 0.3372732996940613, "learning_rate": 9.622737376945062e-05, "loss": 0.2502, "step": 599 }, { "epoch": 1.1412268188302426, "grad_norm": 0.36365967988967896, "learning_rate": 9.622102254684026e-05, "loss": 0.2804, "step": 600 }, { "epoch": 1.1431288635282928, "grad_norm": 0.40790894627571106, "learning_rate": 9.621467132422993e-05, "loss": 0.3633, "step": 601 }, { "epoch": 1.1450309082263432, "grad_norm": 0.35693496465682983, "learning_rate": 9.620832010161957e-05, "loss": 0.3193, "step": 602 }, { "epoch": 1.1469329529243937, "grad_norm": 0.3701719045639038, "learning_rate": 9.620196887900922e-05, "loss": 0.2937, "step": 603 }, { "epoch": 1.1488349976224441, "grad_norm": 0.4299123287200928, "learning_rate": 9.619561765639886e-05, "loss": 0.2732, "step": 604 }, { "epoch": 1.1507370423204946, "grad_norm": 0.4082129895687103, "learning_rate": 9.618926643378851e-05, "loss": 0.2867, "step": 605 }, { "epoch": 1.152639087018545, "grad_norm": 0.49353981018066406, "learning_rate": 9.618291521117816e-05, "loss": 0.266, "step": 606 }, { "epoch": 1.1545411317165954, "grad_norm": 0.3889831006526947, "learning_rate": 9.61765639885678e-05, "loss": 0.2732, "step": 607 }, { "epoch": 1.1564431764146457, "grad_norm": 0.3464524745941162, "learning_rate": 9.617021276595745e-05, "loss": 0.2616, "step": 608 }, { "epoch": 1.158345221112696, "grad_norm": 0.3498656153678894, "learning_rate": 9.61638615433471e-05, "loss": 0.2538, "step": 609 }, { "epoch": 1.1602472658107466, "grad_norm": 0.31552717089653015, "learning_rate": 9.615751032073674e-05, "loss": 0.2283, "step": 610 }, { "epoch": 1.162149310508797, "grad_norm": 0.3225223422050476, "learning_rate": 9.615115909812639e-05, "loss": 0.2428, "step": 611 }, { "epoch": 1.1640513552068474, "grad_norm": 0.3108568489551544, "learning_rate": 9.614480787551604e-05, "loss": 0.2207, "step": 612 }, { "epoch": 1.1659533999048977, "grad_norm": 0.42909371852874756, "learning_rate": 9.613845665290568e-05, "loss": 0.3285, "step": 613 }, { "epoch": 1.167855444602948, "grad_norm": 0.3831368088722229, "learning_rate": 9.613210543029533e-05, "loss": 0.2425, "step": 614 }, { "epoch": 1.1697574893009985, "grad_norm": 0.3891592025756836, "learning_rate": 9.612575420768499e-05, "loss": 0.2849, "step": 615 }, { "epoch": 1.171659533999049, "grad_norm": 0.5383257865905762, "learning_rate": 9.611940298507464e-05, "loss": 0.3444, "step": 616 }, { "epoch": 1.1735615786970994, "grad_norm": 0.4203440845012665, "learning_rate": 9.611305176246428e-05, "loss": 0.3198, "step": 617 }, { "epoch": 1.1754636233951499, "grad_norm": 0.42422881722450256, "learning_rate": 9.610670053985393e-05, "loss": 0.3873, "step": 618 }, { "epoch": 1.1773656680932003, "grad_norm": 0.34799742698669434, "learning_rate": 9.610034931724358e-05, "loss": 0.2645, "step": 619 }, { "epoch": 1.1792677127912505, "grad_norm": 0.37579119205474854, "learning_rate": 9.609399809463322e-05, "loss": 0.3379, "step": 620 }, { "epoch": 1.181169757489301, "grad_norm": 0.3958894610404968, "learning_rate": 9.608764687202287e-05, "loss": 0.2792, "step": 621 }, { "epoch": 1.1830718021873514, "grad_norm": 0.30366870760917664, "learning_rate": 9.608129564941252e-05, "loss": 0.1871, "step": 622 }, { "epoch": 1.1849738468854019, "grad_norm": 0.39878007769584656, "learning_rate": 9.607494442680216e-05, "loss": 0.2675, "step": 623 }, { "epoch": 1.1868758915834523, "grad_norm": 0.35332080721855164, "learning_rate": 9.606859320419181e-05, "loss": 0.2856, "step": 624 }, { "epoch": 1.1887779362815025, "grad_norm": 0.3391731381416321, "learning_rate": 9.606224198158146e-05, "loss": 0.254, "step": 625 }, { "epoch": 1.190679980979553, "grad_norm": 0.39363861083984375, "learning_rate": 9.60558907589711e-05, "loss": 0.2447, "step": 626 }, { "epoch": 1.1925820256776034, "grad_norm": 0.4773564040660858, "learning_rate": 9.604953953636075e-05, "loss": 0.3447, "step": 627 }, { "epoch": 1.1944840703756539, "grad_norm": 0.34327152371406555, "learning_rate": 9.60431883137504e-05, "loss": 0.2353, "step": 628 }, { "epoch": 1.1963861150737043, "grad_norm": 0.37386631965637207, "learning_rate": 9.603683709114006e-05, "loss": 0.2792, "step": 629 }, { "epoch": 1.1982881597717547, "grad_norm": 0.4061308801174164, "learning_rate": 9.60304858685297e-05, "loss": 0.3216, "step": 630 }, { "epoch": 1.200190204469805, "grad_norm": 0.3440467417240143, "learning_rate": 9.602413464591933e-05, "loss": 0.2653, "step": 631 }, { "epoch": 1.2020922491678554, "grad_norm": 0.36648881435394287, "learning_rate": 9.6017783423309e-05, "loss": 0.2471, "step": 632 }, { "epoch": 1.2039942938659058, "grad_norm": 0.3737157881259918, "learning_rate": 9.601143220069864e-05, "loss": 0.3255, "step": 633 }, { "epoch": 1.2058963385639563, "grad_norm": 0.3840744197368622, "learning_rate": 9.600508097808829e-05, "loss": 0.2457, "step": 634 }, { "epoch": 1.2077983832620067, "grad_norm": 0.34374961256980896, "learning_rate": 9.599872975547793e-05, "loss": 0.2705, "step": 635 }, { "epoch": 1.209700427960057, "grad_norm": 0.3460882306098938, "learning_rate": 9.599237853286758e-05, "loss": 0.2308, "step": 636 }, { "epoch": 1.2116024726581074, "grad_norm": 0.33316507935523987, "learning_rate": 9.598602731025723e-05, "loss": 0.2562, "step": 637 }, { "epoch": 1.2135045173561578, "grad_norm": 0.3132528066635132, "learning_rate": 9.597967608764687e-05, "loss": 0.2331, "step": 638 }, { "epoch": 1.2154065620542083, "grad_norm": 0.3329333961009979, "learning_rate": 9.597332486503653e-05, "loss": 0.2224, "step": 639 }, { "epoch": 1.2173086067522587, "grad_norm": 0.35949432849884033, "learning_rate": 9.596697364242617e-05, "loss": 0.2337, "step": 640 }, { "epoch": 1.2192106514503092, "grad_norm": 0.33591121435165405, "learning_rate": 9.596062241981581e-05, "loss": 0.2441, "step": 641 }, { "epoch": 1.2211126961483596, "grad_norm": 0.38212794065475464, "learning_rate": 9.595427119720546e-05, "loss": 0.2569, "step": 642 }, { "epoch": 1.2230147408464098, "grad_norm": 0.4124354124069214, "learning_rate": 9.594791997459512e-05, "loss": 0.3143, "step": 643 }, { "epoch": 1.2249167855444603, "grad_norm": 0.4712159037590027, "learning_rate": 9.594156875198475e-05, "loss": 0.3153, "step": 644 }, { "epoch": 1.2268188302425107, "grad_norm": 0.3652181923389435, "learning_rate": 9.59352175293744e-05, "loss": 0.2448, "step": 645 }, { "epoch": 1.2287208749405611, "grad_norm": 0.40058213472366333, "learning_rate": 9.592886630676406e-05, "loss": 0.304, "step": 646 }, { "epoch": 1.2306229196386116, "grad_norm": 0.4105280041694641, "learning_rate": 9.592251508415371e-05, "loss": 0.251, "step": 647 }, { "epoch": 1.2325249643366618, "grad_norm": 0.3609527349472046, "learning_rate": 9.591616386154335e-05, "loss": 0.2311, "step": 648 }, { "epoch": 1.2344270090347123, "grad_norm": 0.3686671257019043, "learning_rate": 9.5909812638933e-05, "loss": 0.2214, "step": 649 }, { "epoch": 1.2363290537327627, "grad_norm": 0.27986517548561096, "learning_rate": 9.590346141632265e-05, "loss": 0.2531, "step": 650 }, { "epoch": 1.2382310984308131, "grad_norm": 0.4477519690990448, "learning_rate": 9.589711019371229e-05, "loss": 0.3039, "step": 651 }, { "epoch": 1.2401331431288636, "grad_norm": 0.33017873764038086, "learning_rate": 9.589075897110194e-05, "loss": 0.205, "step": 652 }, { "epoch": 1.242035187826914, "grad_norm": 0.31245800852775574, "learning_rate": 9.588440774849159e-05, "loss": 0.2493, "step": 653 }, { "epoch": 1.2439372325249642, "grad_norm": 0.33620285987854004, "learning_rate": 9.587805652588123e-05, "loss": 0.2629, "step": 654 }, { "epoch": 1.2458392772230147, "grad_norm": 0.34820401668548584, "learning_rate": 9.587170530327088e-05, "loss": 0.2446, "step": 655 }, { "epoch": 1.2477413219210651, "grad_norm": 0.4110179543495178, "learning_rate": 9.586535408066053e-05, "loss": 0.3345, "step": 656 }, { "epoch": 1.2496433666191156, "grad_norm": 0.3637439012527466, "learning_rate": 9.585900285805019e-05, "loss": 0.2052, "step": 657 }, { "epoch": 1.251545411317166, "grad_norm": 0.39023682475090027, "learning_rate": 9.585265163543982e-05, "loss": 0.2841, "step": 658 }, { "epoch": 1.2534474560152162, "grad_norm": 0.3623685836791992, "learning_rate": 9.584630041282948e-05, "loss": 0.2286, "step": 659 }, { "epoch": 1.2553495007132667, "grad_norm": 0.38151344656944275, "learning_rate": 9.583994919021913e-05, "loss": 0.2357, "step": 660 }, { "epoch": 1.2572515454113171, "grad_norm": 0.38236725330352783, "learning_rate": 9.583359796760877e-05, "loss": 0.2966, "step": 661 }, { "epoch": 1.2591535901093676, "grad_norm": 0.38568076491355896, "learning_rate": 9.58272467449984e-05, "loss": 0.3018, "step": 662 }, { "epoch": 1.261055634807418, "grad_norm": 0.3488738238811493, "learning_rate": 9.582089552238807e-05, "loss": 0.354, "step": 663 }, { "epoch": 1.2629576795054684, "grad_norm": 0.352860689163208, "learning_rate": 9.581454429977771e-05, "loss": 0.2143, "step": 664 }, { "epoch": 1.2648597242035189, "grad_norm": 0.3734944760799408, "learning_rate": 9.580819307716736e-05, "loss": 0.3486, "step": 665 }, { "epoch": 1.266761768901569, "grad_norm": 0.4024759531021118, "learning_rate": 9.580184185455701e-05, "loss": 0.2922, "step": 666 }, { "epoch": 1.2686638135996195, "grad_norm": 0.37389662861824036, "learning_rate": 9.579549063194665e-05, "loss": 0.2545, "step": 667 }, { "epoch": 1.27056585829767, "grad_norm": 0.42338186502456665, "learning_rate": 9.57891394093363e-05, "loss": 0.2961, "step": 668 }, { "epoch": 1.2724679029957204, "grad_norm": 0.3795355260372162, "learning_rate": 9.578278818672594e-05, "loss": 0.2777, "step": 669 }, { "epoch": 1.2743699476937709, "grad_norm": 0.3439030945301056, "learning_rate": 9.57764369641156e-05, "loss": 0.2179, "step": 670 }, { "epoch": 1.276271992391821, "grad_norm": 0.39637741446495056, "learning_rate": 9.577008574150524e-05, "loss": 0.2701, "step": 671 }, { "epoch": 1.2781740370898715, "grad_norm": 0.3348701298236847, "learning_rate": 9.576373451889488e-05, "loss": 0.2632, "step": 672 }, { "epoch": 1.280076081787922, "grad_norm": 0.3696272671222687, "learning_rate": 9.575738329628455e-05, "loss": 0.2228, "step": 673 }, { "epoch": 1.2819781264859724, "grad_norm": 0.3261694610118866, "learning_rate": 9.575103207367419e-05, "loss": 0.2589, "step": 674 }, { "epoch": 1.2838801711840229, "grad_norm": 0.39266085624694824, "learning_rate": 9.574468085106384e-05, "loss": 0.2893, "step": 675 }, { "epoch": 1.2857822158820733, "grad_norm": 0.4356357157230377, "learning_rate": 9.573832962845348e-05, "loss": 0.3249, "step": 676 }, { "epoch": 1.2876842605801238, "grad_norm": 0.38992395997047424, "learning_rate": 9.573197840584313e-05, "loss": 0.2697, "step": 677 }, { "epoch": 1.289586305278174, "grad_norm": 0.35415610671043396, "learning_rate": 9.572562718323278e-05, "loss": 0.2538, "step": 678 }, { "epoch": 1.2914883499762244, "grad_norm": 0.38410142064094543, "learning_rate": 9.571927596062242e-05, "loss": 0.2325, "step": 679 }, { "epoch": 1.2933903946742749, "grad_norm": 0.36036771535873413, "learning_rate": 9.571292473801207e-05, "loss": 0.242, "step": 680 }, { "epoch": 1.2952924393723253, "grad_norm": 0.3901429772377014, "learning_rate": 9.570657351540172e-05, "loss": 0.3141, "step": 681 }, { "epoch": 1.2971944840703755, "grad_norm": 0.3684573769569397, "learning_rate": 9.570022229279136e-05, "loss": 0.2725, "step": 682 }, { "epoch": 1.299096528768426, "grad_norm": 0.44199153780937195, "learning_rate": 9.569387107018101e-05, "loss": 0.2938, "step": 683 }, { "epoch": 1.3009985734664764, "grad_norm": 0.4435335695743561, "learning_rate": 9.568751984757066e-05, "loss": 0.3454, "step": 684 }, { "epoch": 1.3029006181645268, "grad_norm": 0.3713487386703491, "learning_rate": 9.56811686249603e-05, "loss": 0.25, "step": 685 }, { "epoch": 1.3048026628625773, "grad_norm": 0.394452840089798, "learning_rate": 9.567481740234995e-05, "loss": 0.3062, "step": 686 }, { "epoch": 1.3067047075606277, "grad_norm": 0.47593292593955994, "learning_rate": 9.56684661797396e-05, "loss": 0.3131, "step": 687 }, { "epoch": 1.3086067522586782, "grad_norm": 0.39060479402542114, "learning_rate": 9.566211495712926e-05, "loss": 0.3267, "step": 688 }, { "epoch": 1.3105087969567286, "grad_norm": 0.40931451320648193, "learning_rate": 9.56557637345189e-05, "loss": 0.2979, "step": 689 }, { "epoch": 1.3124108416547788, "grad_norm": 0.3557567000389099, "learning_rate": 9.564941251190855e-05, "loss": 0.213, "step": 690 }, { "epoch": 1.3143128863528293, "grad_norm": 0.43843701481819153, "learning_rate": 9.56430612892982e-05, "loss": 0.2835, "step": 691 }, { "epoch": 1.3162149310508797, "grad_norm": 0.33530867099761963, "learning_rate": 9.563671006668784e-05, "loss": 0.2392, "step": 692 }, { "epoch": 1.3181169757489302, "grad_norm": 0.35071656107902527, "learning_rate": 9.563035884407749e-05, "loss": 0.1916, "step": 693 }, { "epoch": 1.3200190204469804, "grad_norm": 0.3808371126651764, "learning_rate": 9.562400762146714e-05, "loss": 0.2426, "step": 694 }, { "epoch": 1.3219210651450308, "grad_norm": 0.46641990542411804, "learning_rate": 9.561765639885678e-05, "loss": 0.3399, "step": 695 }, { "epoch": 1.3238231098430813, "grad_norm": 0.4153888523578644, "learning_rate": 9.561130517624643e-05, "loss": 0.4152, "step": 696 }, { "epoch": 1.3257251545411317, "grad_norm": 0.4004898965358734, "learning_rate": 9.560495395363608e-05, "loss": 0.3637, "step": 697 }, { "epoch": 1.3276271992391822, "grad_norm": 0.421058714389801, "learning_rate": 9.559860273102572e-05, "loss": 0.2625, "step": 698 }, { "epoch": 1.3295292439372326, "grad_norm": 0.39722004532814026, "learning_rate": 9.559225150841537e-05, "loss": 0.3563, "step": 699 }, { "epoch": 1.331431288635283, "grad_norm": 0.3793489634990692, "learning_rate": 9.558590028580501e-05, "loss": 0.2306, "step": 700 }, { "epoch": 1.3333333333333333, "grad_norm": 0.43592244386672974, "learning_rate": 9.557954906319468e-05, "loss": 0.4354, "step": 701 }, { "epoch": 1.3352353780313837, "grad_norm": 0.30159738659858704, "learning_rate": 9.557319784058432e-05, "loss": 0.2062, "step": 702 }, { "epoch": 1.3371374227294341, "grad_norm": 0.34011465311050415, "learning_rate": 9.556684661797395e-05, "loss": 0.2363, "step": 703 }, { "epoch": 1.3390394674274846, "grad_norm": 0.41224443912506104, "learning_rate": 9.556049539536362e-05, "loss": 0.2913, "step": 704 }, { "epoch": 1.340941512125535, "grad_norm": 0.4105536937713623, "learning_rate": 9.555414417275326e-05, "loss": 0.2459, "step": 705 }, { "epoch": 1.3428435568235852, "grad_norm": 0.3158798813819885, "learning_rate": 9.554779295014291e-05, "loss": 0.1921, "step": 706 }, { "epoch": 1.3447456015216357, "grad_norm": 0.4023972451686859, "learning_rate": 9.554144172753255e-05, "loss": 0.2406, "step": 707 }, { "epoch": 1.3466476462196861, "grad_norm": 0.4204084277153015, "learning_rate": 9.55350905049222e-05, "loss": 0.2977, "step": 708 }, { "epoch": 1.3485496909177366, "grad_norm": 0.4853519797325134, "learning_rate": 9.552873928231185e-05, "loss": 0.3871, "step": 709 }, { "epoch": 1.350451735615787, "grad_norm": 0.3755006194114685, "learning_rate": 9.552238805970149e-05, "loss": 0.2399, "step": 710 }, { "epoch": 1.3523537803138375, "grad_norm": 0.37587347626686096, "learning_rate": 9.551603683709116e-05, "loss": 0.3029, "step": 711 }, { "epoch": 1.354255825011888, "grad_norm": 0.4257625937461853, "learning_rate": 9.55096856144808e-05, "loss": 0.2541, "step": 712 }, { "epoch": 1.3561578697099381, "grad_norm": 0.29570913314819336, "learning_rate": 9.550333439187043e-05, "loss": 0.1668, "step": 713 }, { "epoch": 1.3580599144079886, "grad_norm": 0.5089273452758789, "learning_rate": 9.549698316926008e-05, "loss": 0.4006, "step": 714 }, { "epoch": 1.359961959106039, "grad_norm": 0.43584999442100525, "learning_rate": 9.549063194664974e-05, "loss": 0.2996, "step": 715 }, { "epoch": 1.3618640038040895, "grad_norm": 0.4071057140827179, "learning_rate": 9.548428072403937e-05, "loss": 0.308, "step": 716 }, { "epoch": 1.3637660485021397, "grad_norm": 0.37772196531295776, "learning_rate": 9.547792950142903e-05, "loss": 0.2235, "step": 717 }, { "epoch": 1.3656680932001901, "grad_norm": 0.44488438963890076, "learning_rate": 9.547157827881868e-05, "loss": 0.2748, "step": 718 }, { "epoch": 1.3675701378982406, "grad_norm": 0.3227798640727997, "learning_rate": 9.546522705620833e-05, "loss": 0.2609, "step": 719 }, { "epoch": 1.369472182596291, "grad_norm": 0.3742448389530182, "learning_rate": 9.545887583359797e-05, "loss": 0.2417, "step": 720 }, { "epoch": 1.3713742272943414, "grad_norm": 0.3582020699977875, "learning_rate": 9.545252461098762e-05, "loss": 0.2688, "step": 721 }, { "epoch": 1.3732762719923919, "grad_norm": 0.3762567341327667, "learning_rate": 9.544617338837727e-05, "loss": 0.2939, "step": 722 }, { "epoch": 1.3751783166904423, "grad_norm": 0.38103973865509033, "learning_rate": 9.543982216576691e-05, "loss": 0.3335, "step": 723 }, { "epoch": 1.3770803613884925, "grad_norm": 0.3109844923019409, "learning_rate": 9.543347094315656e-05, "loss": 0.2094, "step": 724 }, { "epoch": 1.378982406086543, "grad_norm": 0.3642789125442505, "learning_rate": 9.542711972054621e-05, "loss": 0.2879, "step": 725 }, { "epoch": 1.3808844507845934, "grad_norm": 0.3879150152206421, "learning_rate": 9.542076849793585e-05, "loss": 0.2567, "step": 726 }, { "epoch": 1.3827864954826439, "grad_norm": 0.3364320993423462, "learning_rate": 9.54144172753255e-05, "loss": 0.2773, "step": 727 }, { "epoch": 1.3846885401806943, "grad_norm": 0.5071269273757935, "learning_rate": 9.540806605271516e-05, "loss": 0.2916, "step": 728 }, { "epoch": 1.3865905848787445, "grad_norm": 0.425793319940567, "learning_rate": 9.540171483010481e-05, "loss": 0.2948, "step": 729 }, { "epoch": 1.388492629576795, "grad_norm": 0.38478776812553406, "learning_rate": 9.539536360749445e-05, "loss": 0.2493, "step": 730 }, { "epoch": 1.3903946742748454, "grad_norm": 0.4016847014427185, "learning_rate": 9.53890123848841e-05, "loss": 0.3038, "step": 731 }, { "epoch": 1.3922967189728959, "grad_norm": 0.2799355983734131, "learning_rate": 9.538266116227375e-05, "loss": 0.2964, "step": 732 }, { "epoch": 1.3941987636709463, "grad_norm": 0.3720659613609314, "learning_rate": 9.537630993966339e-05, "loss": 0.2528, "step": 733 }, { "epoch": 1.3961008083689967, "grad_norm": 0.2954385578632355, "learning_rate": 9.536995871705303e-05, "loss": 0.2119, "step": 734 }, { "epoch": 1.3980028530670472, "grad_norm": 0.35636264085769653, "learning_rate": 9.536360749444269e-05, "loss": 0.3042, "step": 735 }, { "epoch": 1.3999048977650974, "grad_norm": 0.3219160735607147, "learning_rate": 9.535725627183233e-05, "loss": 0.2977, "step": 736 }, { "epoch": 1.4018069424631479, "grad_norm": 0.32340940833091736, "learning_rate": 9.535090504922198e-05, "loss": 0.2295, "step": 737 }, { "epoch": 1.4037089871611983, "grad_norm": 0.3884155750274658, "learning_rate": 9.534455382661163e-05, "loss": 0.2367, "step": 738 }, { "epoch": 1.4056110318592487, "grad_norm": 0.3708769381046295, "learning_rate": 9.533820260400127e-05, "loss": 0.2807, "step": 739 }, { "epoch": 1.407513076557299, "grad_norm": 0.3377797603607178, "learning_rate": 9.533185138139092e-05, "loss": 0.2459, "step": 740 }, { "epoch": 1.4094151212553494, "grad_norm": 0.542662501335144, "learning_rate": 9.532550015878056e-05, "loss": 0.3883, "step": 741 }, { "epoch": 1.4113171659533998, "grad_norm": 0.36908188462257385, "learning_rate": 9.531914893617023e-05, "loss": 0.2239, "step": 742 }, { "epoch": 1.4132192106514503, "grad_norm": 0.2898438572883606, "learning_rate": 9.531279771355987e-05, "loss": 0.1929, "step": 743 }, { "epoch": 1.4151212553495007, "grad_norm": 0.361965537071228, "learning_rate": 9.53064464909495e-05, "loss": 0.2758, "step": 744 }, { "epoch": 1.4170233000475512, "grad_norm": 0.42736831307411194, "learning_rate": 9.530009526833916e-05, "loss": 0.3103, "step": 745 }, { "epoch": 1.4189253447456016, "grad_norm": 0.3411954641342163, "learning_rate": 9.529374404572881e-05, "loss": 0.2498, "step": 746 }, { "epoch": 1.420827389443652, "grad_norm": 0.3671089708805084, "learning_rate": 9.528739282311846e-05, "loss": 0.2961, "step": 747 }, { "epoch": 1.4227294341417023, "grad_norm": 0.35021135210990906, "learning_rate": 9.52810416005081e-05, "loss": 0.2422, "step": 748 }, { "epoch": 1.4246314788397527, "grad_norm": 0.3203287422657013, "learning_rate": 9.527469037789775e-05, "loss": 0.2377, "step": 749 }, { "epoch": 1.4265335235378032, "grad_norm": 0.32512807846069336, "learning_rate": 9.52683391552874e-05, "loss": 0.2533, "step": 750 }, { "epoch": 1.4284355682358536, "grad_norm": 0.39963454008102417, "learning_rate": 9.526198793267704e-05, "loss": 0.3191, "step": 751 }, { "epoch": 1.4303376129339038, "grad_norm": 0.3722153306007385, "learning_rate": 9.525563671006669e-05, "loss": 0.2134, "step": 752 }, { "epoch": 1.4322396576319543, "grad_norm": 0.3429708182811737, "learning_rate": 9.524928548745634e-05, "loss": 0.2221, "step": 753 }, { "epoch": 1.4341417023300047, "grad_norm": 0.4014436602592468, "learning_rate": 9.524293426484598e-05, "loss": 0.2638, "step": 754 }, { "epoch": 1.4360437470280552, "grad_norm": 0.38329729437828064, "learning_rate": 9.523658304223563e-05, "loss": 0.25, "step": 755 }, { "epoch": 1.4379457917261056, "grad_norm": 0.37710002064704895, "learning_rate": 9.523023181962529e-05, "loss": 0.2623, "step": 756 }, { "epoch": 1.439847836424156, "grad_norm": 0.4223197102546692, "learning_rate": 9.522388059701492e-05, "loss": 0.408, "step": 757 }, { "epoch": 1.4417498811222065, "grad_norm": 0.45707425475120544, "learning_rate": 9.521752937440458e-05, "loss": 0.3491, "step": 758 }, { "epoch": 1.4436519258202567, "grad_norm": 0.39775991439819336, "learning_rate": 9.521117815179423e-05, "loss": 0.2498, "step": 759 }, { "epoch": 1.4455539705183071, "grad_norm": 0.3113288879394531, "learning_rate": 9.520482692918388e-05, "loss": 0.2191, "step": 760 }, { "epoch": 1.4474560152163576, "grad_norm": 0.35126394033432007, "learning_rate": 9.519847570657352e-05, "loss": 0.2689, "step": 761 }, { "epoch": 1.449358059914408, "grad_norm": 0.42121708393096924, "learning_rate": 9.519212448396317e-05, "loss": 0.2859, "step": 762 }, { "epoch": 1.4512601046124585, "grad_norm": 0.37913796305656433, "learning_rate": 9.518577326135282e-05, "loss": 0.2676, "step": 763 }, { "epoch": 1.4531621493105087, "grad_norm": 0.3767364025115967, "learning_rate": 9.517942203874246e-05, "loss": 0.2298, "step": 764 }, { "epoch": 1.4550641940085591, "grad_norm": 0.3317908048629761, "learning_rate": 9.517307081613211e-05, "loss": 0.2439, "step": 765 }, { "epoch": 1.4569662387066096, "grad_norm": 0.28014522790908813, "learning_rate": 9.516671959352176e-05, "loss": 0.207, "step": 766 }, { "epoch": 1.45886828340466, "grad_norm": 0.4119054675102234, "learning_rate": 9.51603683709114e-05, "loss": 0.2969, "step": 767 }, { "epoch": 1.4607703281027105, "grad_norm": 0.3351030647754669, "learning_rate": 9.515401714830105e-05, "loss": 0.2925, "step": 768 }, { "epoch": 1.462672372800761, "grad_norm": 0.5204692482948303, "learning_rate": 9.51476659256907e-05, "loss": 0.3546, "step": 769 }, { "epoch": 1.4645744174988113, "grad_norm": 0.42994043231010437, "learning_rate": 9.514131470308034e-05, "loss": 0.3284, "step": 770 }, { "epoch": 1.4664764621968616, "grad_norm": 0.3580436408519745, "learning_rate": 9.513496348047e-05, "loss": 0.2639, "step": 771 }, { "epoch": 1.468378506894912, "grad_norm": 0.37151291966438293, "learning_rate": 9.512861225785963e-05, "loss": 0.2556, "step": 772 }, { "epoch": 1.4702805515929624, "grad_norm": 0.33122384548187256, "learning_rate": 9.51222610352493e-05, "loss": 0.2565, "step": 773 }, { "epoch": 1.472182596291013, "grad_norm": 0.3718935251235962, "learning_rate": 9.511590981263894e-05, "loss": 0.2348, "step": 774 }, { "epoch": 1.474084640989063, "grad_norm": 0.3752667009830475, "learning_rate": 9.510955859002858e-05, "loss": 0.2933, "step": 775 }, { "epoch": 1.4759866856871136, "grad_norm": 0.44539371132850647, "learning_rate": 9.510320736741824e-05, "loss": 0.2699, "step": 776 }, { "epoch": 1.477888730385164, "grad_norm": 0.5468220114707947, "learning_rate": 9.509685614480788e-05, "loss": 0.4141, "step": 777 }, { "epoch": 1.4797907750832144, "grad_norm": 0.5036222338676453, "learning_rate": 9.509050492219753e-05, "loss": 0.3463, "step": 778 }, { "epoch": 1.4816928197812649, "grad_norm": 0.3742172420024872, "learning_rate": 9.508415369958717e-05, "loss": 0.3104, "step": 779 }, { "epoch": 1.4835948644793153, "grad_norm": 0.38696351647377014, "learning_rate": 9.507780247697682e-05, "loss": 0.2406, "step": 780 }, { "epoch": 1.4854969091773658, "grad_norm": 0.43431171774864197, "learning_rate": 9.507145125436647e-05, "loss": 0.307, "step": 781 }, { "epoch": 1.4873989538754162, "grad_norm": 0.3814404606819153, "learning_rate": 9.506510003175611e-05, "loss": 0.2681, "step": 782 }, { "epoch": 1.4893009985734664, "grad_norm": 0.350359708070755, "learning_rate": 9.505874880914578e-05, "loss": 0.2408, "step": 783 }, { "epoch": 1.4912030432715169, "grad_norm": 0.4443821609020233, "learning_rate": 9.505239758653541e-05, "loss": 0.3358, "step": 784 }, { "epoch": 1.4931050879695673, "grad_norm": 0.2963017225265503, "learning_rate": 9.504604636392505e-05, "loss": 0.2085, "step": 785 }, { "epoch": 1.4950071326676178, "grad_norm": 0.4765385389328003, "learning_rate": 9.50396951413147e-05, "loss": 0.396, "step": 786 }, { "epoch": 1.496909177365668, "grad_norm": 0.3389003574848175, "learning_rate": 9.503334391870436e-05, "loss": 0.327, "step": 787 }, { "epoch": 1.4988112220637184, "grad_norm": 0.42218640446662903, "learning_rate": 9.5026992696094e-05, "loss": 0.3078, "step": 788 }, { "epoch": 1.5007132667617689, "grad_norm": 0.4693278670310974, "learning_rate": 9.502064147348365e-05, "loss": 0.2853, "step": 789 }, { "epoch": 1.5026153114598193, "grad_norm": 0.3891851305961609, "learning_rate": 9.50142902508733e-05, "loss": 0.2493, "step": 790 }, { "epoch": 1.5045173561578697, "grad_norm": 0.3862535357475281, "learning_rate": 9.500793902826295e-05, "loss": 0.2673, "step": 791 }, { "epoch": 1.5064194008559202, "grad_norm": 0.34803205728530884, "learning_rate": 9.500158780565259e-05, "loss": 0.2814, "step": 792 }, { "epoch": 1.5083214455539706, "grad_norm": 0.3963899314403534, "learning_rate": 9.499523658304224e-05, "loss": 0.3018, "step": 793 }, { "epoch": 1.510223490252021, "grad_norm": 0.4004577398300171, "learning_rate": 9.498888536043189e-05, "loss": 0.313, "step": 794 }, { "epoch": 1.5121255349500713, "grad_norm": 0.32212579250335693, "learning_rate": 9.498253413782153e-05, "loss": 0.2081, "step": 795 }, { "epoch": 1.5140275796481217, "grad_norm": 0.32745805382728577, "learning_rate": 9.497618291521118e-05, "loss": 0.231, "step": 796 }, { "epoch": 1.5159296243461722, "grad_norm": 0.40773364901542664, "learning_rate": 9.496983169260083e-05, "loss": 0.2804, "step": 797 }, { "epoch": 1.5178316690442224, "grad_norm": 0.3848927319049835, "learning_rate": 9.496348046999047e-05, "loss": 0.288, "step": 798 }, { "epoch": 1.5197337137422728, "grad_norm": 0.317124605178833, "learning_rate": 9.495712924738012e-05, "loss": 0.2202, "step": 799 }, { "epoch": 1.5216357584403233, "grad_norm": 0.3564606010913849, "learning_rate": 9.495077802476978e-05, "loss": 0.2594, "step": 800 }, { "epoch": 1.5235378031383737, "grad_norm": 0.3151964545249939, "learning_rate": 9.494442680215943e-05, "loss": 0.2138, "step": 801 }, { "epoch": 1.5254398478364242, "grad_norm": 0.4009242057800293, "learning_rate": 9.493807557954907e-05, "loss": 0.3157, "step": 802 }, { "epoch": 1.5273418925344746, "grad_norm": 0.36916011571884155, "learning_rate": 9.49317243569387e-05, "loss": 0.2478, "step": 803 }, { "epoch": 1.529243937232525, "grad_norm": 0.372277170419693, "learning_rate": 9.492537313432837e-05, "loss": 0.2912, "step": 804 }, { "epoch": 1.5311459819305755, "grad_norm": 0.42100057005882263, "learning_rate": 9.491902191171801e-05, "loss": 0.2938, "step": 805 }, { "epoch": 1.533048026628626, "grad_norm": 0.3528178334236145, "learning_rate": 9.491267068910765e-05, "loss": 0.2519, "step": 806 }, { "epoch": 1.5349500713266762, "grad_norm": 0.3655840754508972, "learning_rate": 9.490631946649731e-05, "loss": 0.2685, "step": 807 }, { "epoch": 1.5368521160247266, "grad_norm": 0.34080174565315247, "learning_rate": 9.489996824388695e-05, "loss": 0.2339, "step": 808 }, { "epoch": 1.5387541607227768, "grad_norm": 0.3532484173774719, "learning_rate": 9.48936170212766e-05, "loss": 0.2448, "step": 809 }, { "epoch": 1.5406562054208273, "grad_norm": 0.33115965127944946, "learning_rate": 9.488726579866624e-05, "loss": 0.2549, "step": 810 }, { "epoch": 1.5425582501188777, "grad_norm": 0.40624433755874634, "learning_rate": 9.488091457605589e-05, "loss": 0.2847, "step": 811 }, { "epoch": 1.5444602948169281, "grad_norm": 0.35374221205711365, "learning_rate": 9.487456335344554e-05, "loss": 0.2704, "step": 812 }, { "epoch": 1.5463623395149786, "grad_norm": 0.3859337568283081, "learning_rate": 9.486821213083518e-05, "loss": 0.2969, "step": 813 }, { "epoch": 1.548264384213029, "grad_norm": 0.37984946370124817, "learning_rate": 9.486186090822485e-05, "loss": 0.2908, "step": 814 }, { "epoch": 1.5501664289110795, "grad_norm": 0.34984755516052246, "learning_rate": 9.485550968561449e-05, "loss": 0.2247, "step": 815 }, { "epoch": 1.55206847360913, "grad_norm": 0.32592761516571045, "learning_rate": 9.484915846300412e-05, "loss": 0.1985, "step": 816 }, { "epoch": 1.5539705183071804, "grad_norm": 0.4273107945919037, "learning_rate": 9.484280724039378e-05, "loss": 0.2875, "step": 817 }, { "epoch": 1.5558725630052306, "grad_norm": 0.35476601123809814, "learning_rate": 9.483645601778343e-05, "loss": 0.2721, "step": 818 }, { "epoch": 1.557774607703281, "grad_norm": 0.30542057752609253, "learning_rate": 9.483010479517308e-05, "loss": 0.1966, "step": 819 }, { "epoch": 1.5596766524013315, "grad_norm": 0.44310665130615234, "learning_rate": 9.482375357256272e-05, "loss": 0.2533, "step": 820 }, { "epoch": 1.5615786970993817, "grad_norm": 0.39837488532066345, "learning_rate": 9.481740234995237e-05, "loss": 0.3045, "step": 821 }, { "epoch": 1.5634807417974321, "grad_norm": 0.33650925755500793, "learning_rate": 9.481105112734202e-05, "loss": 0.3626, "step": 822 }, { "epoch": 1.5653827864954826, "grad_norm": 0.39762622117996216, "learning_rate": 9.480469990473166e-05, "loss": 0.2862, "step": 823 }, { "epoch": 1.567284831193533, "grad_norm": 0.36138975620269775, "learning_rate": 9.479834868212131e-05, "loss": 0.2434, "step": 824 }, { "epoch": 1.5691868758915835, "grad_norm": 0.37878358364105225, "learning_rate": 9.479199745951096e-05, "loss": 0.2421, "step": 825 }, { "epoch": 1.571088920589634, "grad_norm": 0.4009093642234802, "learning_rate": 9.47856462369006e-05, "loss": 0.2561, "step": 826 }, { "epoch": 1.5729909652876843, "grad_norm": 0.3085389733314514, "learning_rate": 9.477929501429025e-05, "loss": 0.2293, "step": 827 }, { "epoch": 1.5748930099857348, "grad_norm": 0.48082223534584045, "learning_rate": 9.47729437916799e-05, "loss": 0.3193, "step": 828 }, { "epoch": 1.5767950546837852, "grad_norm": 0.42938464879989624, "learning_rate": 9.476659256906954e-05, "loss": 0.3319, "step": 829 }, { "epoch": 1.5786970993818354, "grad_norm": 0.32788941264152527, "learning_rate": 9.47602413464592e-05, "loss": 0.2432, "step": 830 }, { "epoch": 1.5805991440798859, "grad_norm": 0.38157737255096436, "learning_rate": 9.475389012384885e-05, "loss": 0.3165, "step": 831 }, { "epoch": 1.5825011887779363, "grad_norm": 0.38666632771492004, "learning_rate": 9.47475389012385e-05, "loss": 0.2554, "step": 832 }, { "epoch": 1.5844032334759865, "grad_norm": 0.3475115895271301, "learning_rate": 9.474118767862814e-05, "loss": 0.2679, "step": 833 }, { "epoch": 1.586305278174037, "grad_norm": 0.35684680938720703, "learning_rate": 9.473483645601779e-05, "loss": 0.2574, "step": 834 }, { "epoch": 1.5882073228720874, "grad_norm": 0.5205959677696228, "learning_rate": 9.472848523340744e-05, "loss": 0.3646, "step": 835 }, { "epoch": 1.5901093675701379, "grad_norm": 0.37549740076065063, "learning_rate": 9.472213401079708e-05, "loss": 0.2741, "step": 836 }, { "epoch": 1.5920114122681883, "grad_norm": 0.5251928567886353, "learning_rate": 9.471578278818673e-05, "loss": 0.3799, "step": 837 }, { "epoch": 1.5939134569662388, "grad_norm": 0.42622271180152893, "learning_rate": 9.470943156557638e-05, "loss": 0.2991, "step": 838 }, { "epoch": 1.5958155016642892, "grad_norm": 0.3737063407897949, "learning_rate": 9.470308034296602e-05, "loss": 0.288, "step": 839 }, { "epoch": 1.5977175463623396, "grad_norm": 0.4851538836956024, "learning_rate": 9.469672912035567e-05, "loss": 0.3293, "step": 840 }, { "epoch": 1.5996195910603899, "grad_norm": 0.3662918508052826, "learning_rate": 9.469037789774533e-05, "loss": 0.2338, "step": 841 }, { "epoch": 1.6015216357584403, "grad_norm": 0.3263486325740814, "learning_rate": 9.468402667513496e-05, "loss": 0.2228, "step": 842 }, { "epoch": 1.6034236804564908, "grad_norm": 0.4000779092311859, "learning_rate": 9.467767545252462e-05, "loss": 0.2635, "step": 843 }, { "epoch": 1.605325725154541, "grad_norm": 0.4274492859840393, "learning_rate": 9.467132422991425e-05, "loss": 0.3063, "step": 844 }, { "epoch": 1.6072277698525914, "grad_norm": 0.4486158490180969, "learning_rate": 9.466497300730392e-05, "loss": 0.3039, "step": 845 }, { "epoch": 1.6091298145506419, "grad_norm": 0.48109135031700134, "learning_rate": 9.465862178469356e-05, "loss": 0.3471, "step": 846 }, { "epoch": 1.6110318592486923, "grad_norm": 0.41299277544021606, "learning_rate": 9.46522705620832e-05, "loss": 0.2896, "step": 847 }, { "epoch": 1.6129339039467427, "grad_norm": 0.4177182614803314, "learning_rate": 9.464591933947286e-05, "loss": 0.2519, "step": 848 }, { "epoch": 1.6148359486447932, "grad_norm": 0.36468592286109924, "learning_rate": 9.46395681168625e-05, "loss": 0.275, "step": 849 }, { "epoch": 1.6167379933428436, "grad_norm": 0.33025646209716797, "learning_rate": 9.463321689425215e-05, "loss": 0.234, "step": 850 }, { "epoch": 1.618640038040894, "grad_norm": 0.4377218186855316, "learning_rate": 9.462686567164179e-05, "loss": 0.2939, "step": 851 }, { "epoch": 1.6205420827389445, "grad_norm": 0.34059834480285645, "learning_rate": 9.462051444903144e-05, "loss": 0.2559, "step": 852 }, { "epoch": 1.6224441274369947, "grad_norm": 0.36525094509124756, "learning_rate": 9.46141632264211e-05, "loss": 0.2638, "step": 853 }, { "epoch": 1.6243461721350452, "grad_norm": 0.344927042722702, "learning_rate": 9.460781200381073e-05, "loss": 0.1906, "step": 854 }, { "epoch": 1.6262482168330956, "grad_norm": 0.4097568988800049, "learning_rate": 9.460146078120038e-05, "loss": 0.3143, "step": 855 }, { "epoch": 1.6281502615311458, "grad_norm": 0.32290300726890564, "learning_rate": 9.459510955859004e-05, "loss": 0.2734, "step": 856 }, { "epoch": 1.6300523062291963, "grad_norm": 0.3865107595920563, "learning_rate": 9.458875833597967e-05, "loss": 0.3012, "step": 857 }, { "epoch": 1.6319543509272467, "grad_norm": 0.3034641444683075, "learning_rate": 9.458240711336933e-05, "loss": 0.2164, "step": 858 }, { "epoch": 1.6338563956252972, "grad_norm": 0.3896719217300415, "learning_rate": 9.457605589075898e-05, "loss": 0.2577, "step": 859 }, { "epoch": 1.6357584403233476, "grad_norm": 0.35619622468948364, "learning_rate": 9.456970466814862e-05, "loss": 0.3076, "step": 860 }, { "epoch": 1.637660485021398, "grad_norm": 0.39600345492362976, "learning_rate": 9.456335344553827e-05, "loss": 0.4003, "step": 861 }, { "epoch": 1.6395625297194485, "grad_norm": 0.3511577248573303, "learning_rate": 9.455700222292792e-05, "loss": 0.2603, "step": 862 }, { "epoch": 1.641464574417499, "grad_norm": 0.44329899549484253, "learning_rate": 9.455065100031757e-05, "loss": 0.2921, "step": 863 }, { "epoch": 1.6433666191155494, "grad_norm": 0.3798992931842804, "learning_rate": 9.454429977770721e-05, "loss": 0.2897, "step": 864 }, { "epoch": 1.6452686638135996, "grad_norm": 0.38711193203926086, "learning_rate": 9.453794855509686e-05, "loss": 0.2791, "step": 865 }, { "epoch": 1.64717070851165, "grad_norm": 0.3537624478340149, "learning_rate": 9.453159733248651e-05, "loss": 0.2207, "step": 866 }, { "epoch": 1.6490727532097005, "grad_norm": 0.350455641746521, "learning_rate": 9.452524610987615e-05, "loss": 0.2595, "step": 867 }, { "epoch": 1.6509747979077507, "grad_norm": 0.35781386494636536, "learning_rate": 9.45188948872658e-05, "loss": 0.2618, "step": 868 }, { "epoch": 1.6528768426058011, "grad_norm": 0.4823295772075653, "learning_rate": 9.451254366465546e-05, "loss": 0.3174, "step": 869 }, { "epoch": 1.6547788873038516, "grad_norm": 0.31698495149612427, "learning_rate": 9.45061924420451e-05, "loss": 0.2165, "step": 870 }, { "epoch": 1.656680932001902, "grad_norm": 0.4576948583126068, "learning_rate": 9.449984121943475e-05, "loss": 0.2937, "step": 871 }, { "epoch": 1.6585829766999525, "grad_norm": 0.4196888506412506, "learning_rate": 9.44934899968244e-05, "loss": 0.2876, "step": 872 }, { "epoch": 1.660485021398003, "grad_norm": 0.48588597774505615, "learning_rate": 9.448713877421405e-05, "loss": 0.3433, "step": 873 }, { "epoch": 1.6623870660960534, "grad_norm": 0.427946537733078, "learning_rate": 9.448078755160369e-05, "loss": 0.3184, "step": 874 }, { "epoch": 1.6642891107941038, "grad_norm": 0.4138951897621155, "learning_rate": 9.447443632899333e-05, "loss": 0.2738, "step": 875 }, { "epoch": 1.666191155492154, "grad_norm": 0.36560842394828796, "learning_rate": 9.446808510638299e-05, "loss": 0.3029, "step": 876 }, { "epoch": 1.6680932001902045, "grad_norm": 0.42942315340042114, "learning_rate": 9.446173388377263e-05, "loss": 0.2888, "step": 877 }, { "epoch": 1.669995244888255, "grad_norm": 0.21167854964733124, "learning_rate": 9.445538266116227e-05, "loss": 0.1919, "step": 878 }, { "epoch": 1.6718972895863051, "grad_norm": 0.41339564323425293, "learning_rate": 9.444903143855193e-05, "loss": 0.2482, "step": 879 }, { "epoch": 1.6737993342843556, "grad_norm": 0.47189727425575256, "learning_rate": 9.444268021594157e-05, "loss": 0.328, "step": 880 }, { "epoch": 1.675701378982406, "grad_norm": 0.32868659496307373, "learning_rate": 9.443632899333122e-05, "loss": 0.1985, "step": 881 }, { "epoch": 1.6776034236804565, "grad_norm": 0.3501724898815155, "learning_rate": 9.442997777072086e-05, "loss": 0.2733, "step": 882 }, { "epoch": 1.679505468378507, "grad_norm": 0.37144583463668823, "learning_rate": 9.442362654811051e-05, "loss": 0.2293, "step": 883 }, { "epoch": 1.6814075130765573, "grad_norm": 0.36318424344062805, "learning_rate": 9.441727532550017e-05, "loss": 0.3521, "step": 884 }, { "epoch": 1.6833095577746078, "grad_norm": 0.4295286238193512, "learning_rate": 9.44109241028898e-05, "loss": 0.3113, "step": 885 }, { "epoch": 1.6852116024726582, "grad_norm": 0.3312181830406189, "learning_rate": 9.440457288027947e-05, "loss": 0.2818, "step": 886 }, { "epoch": 1.6871136471707087, "grad_norm": 0.3743634819984436, "learning_rate": 9.439822165766911e-05, "loss": 0.245, "step": 887 }, { "epoch": 1.6890156918687589, "grad_norm": 0.5934861898422241, "learning_rate": 9.439187043505875e-05, "loss": 0.3654, "step": 888 }, { "epoch": 1.6909177365668093, "grad_norm": 0.4149317741394043, "learning_rate": 9.43855192124484e-05, "loss": 0.2584, "step": 889 }, { "epoch": 1.6928197812648598, "grad_norm": 0.40615764260292053, "learning_rate": 9.437916798983805e-05, "loss": 0.2986, "step": 890 }, { "epoch": 1.69472182596291, "grad_norm": 0.37536385655403137, "learning_rate": 9.43728167672277e-05, "loss": 0.2813, "step": 891 }, { "epoch": 1.6966238706609604, "grad_norm": 0.41415923833847046, "learning_rate": 9.436646554461734e-05, "loss": 0.3333, "step": 892 }, { "epoch": 1.6985259153590109, "grad_norm": 0.30747082829475403, "learning_rate": 9.436011432200699e-05, "loss": 0.2143, "step": 893 }, { "epoch": 1.7004279600570613, "grad_norm": 0.44593873620033264, "learning_rate": 9.435376309939664e-05, "loss": 0.2834, "step": 894 }, { "epoch": 1.7023300047551118, "grad_norm": 0.3417704403400421, "learning_rate": 9.434741187678628e-05, "loss": 0.2265, "step": 895 }, { "epoch": 1.7042320494531622, "grad_norm": 0.3436511754989624, "learning_rate": 9.434106065417593e-05, "loss": 0.249, "step": 896 }, { "epoch": 1.7061340941512126, "grad_norm": 0.4569544494152069, "learning_rate": 9.433470943156559e-05, "loss": 0.3271, "step": 897 }, { "epoch": 1.708036138849263, "grad_norm": 0.3883751630783081, "learning_rate": 9.432835820895522e-05, "loss": 0.2673, "step": 898 }, { "epoch": 1.7099381835473135, "grad_norm": 0.3915776014328003, "learning_rate": 9.432200698634488e-05, "loss": 0.2313, "step": 899 }, { "epoch": 1.7118402282453637, "grad_norm": 0.3450072407722473, "learning_rate": 9.431565576373453e-05, "loss": 0.2726, "step": 900 }, { "epoch": 1.7137422729434142, "grad_norm": 0.3894912004470825, "learning_rate": 9.430930454112417e-05, "loss": 0.2607, "step": 901 }, { "epoch": 1.7156443176414644, "grad_norm": 0.3509180545806885, "learning_rate": 9.430295331851382e-05, "loss": 0.2781, "step": 902 }, { "epoch": 1.7175463623395149, "grad_norm": 0.5164948105812073, "learning_rate": 9.429660209590347e-05, "loss": 0.3619, "step": 903 }, { "epoch": 1.7194484070375653, "grad_norm": 0.4074023962020874, "learning_rate": 9.429025087329312e-05, "loss": 0.3116, "step": 904 }, { "epoch": 1.7213504517356157, "grad_norm": 0.4034394323825836, "learning_rate": 9.428389965068276e-05, "loss": 0.3155, "step": 905 }, { "epoch": 1.7232524964336662, "grad_norm": 0.32292982935905457, "learning_rate": 9.427754842807241e-05, "loss": 0.2171, "step": 906 }, { "epoch": 1.7251545411317166, "grad_norm": 0.368856817483902, "learning_rate": 9.427119720546206e-05, "loss": 0.3021, "step": 907 }, { "epoch": 1.727056585829767, "grad_norm": 0.34953123331069946, "learning_rate": 9.42648459828517e-05, "loss": 0.2701, "step": 908 }, { "epoch": 1.7289586305278175, "grad_norm": 0.37510743737220764, "learning_rate": 9.425849476024135e-05, "loss": 0.3216, "step": 909 }, { "epoch": 1.730860675225868, "grad_norm": 0.31331393122673035, "learning_rate": 9.4252143537631e-05, "loss": 0.2855, "step": 910 }, { "epoch": 1.7327627199239182, "grad_norm": 0.3806105852127075, "learning_rate": 9.424579231502064e-05, "loss": 0.3216, "step": 911 }, { "epoch": 1.7346647646219686, "grad_norm": 0.3693408668041229, "learning_rate": 9.42394410924103e-05, "loss": 0.2473, "step": 912 }, { "epoch": 1.736566809320019, "grad_norm": 0.2931939959526062, "learning_rate": 9.423308986979993e-05, "loss": 0.1873, "step": 913 }, { "epoch": 1.7384688540180693, "grad_norm": 0.4330272972583771, "learning_rate": 9.422673864718959e-05, "loss": 0.3078, "step": 914 }, { "epoch": 1.7403708987161197, "grad_norm": 0.4881534278392792, "learning_rate": 9.422038742457924e-05, "loss": 0.3771, "step": 915 }, { "epoch": 1.7422729434141702, "grad_norm": 0.3158344328403473, "learning_rate": 9.421403620196888e-05, "loss": 0.2813, "step": 916 }, { "epoch": 1.7441749881122206, "grad_norm": 0.4482041299343109, "learning_rate": 9.420768497935854e-05, "loss": 0.3872, "step": 917 }, { "epoch": 1.746077032810271, "grad_norm": 0.3493407070636749, "learning_rate": 9.420133375674818e-05, "loss": 0.2284, "step": 918 }, { "epoch": 1.7479790775083215, "grad_norm": 0.3753608763217926, "learning_rate": 9.419498253413782e-05, "loss": 0.254, "step": 919 }, { "epoch": 1.749881122206372, "grad_norm": 0.4550943374633789, "learning_rate": 9.418863131152747e-05, "loss": 0.3073, "step": 920 }, { "epoch": 1.7517831669044224, "grad_norm": 0.3239607810974121, "learning_rate": 9.418228008891712e-05, "loss": 0.2087, "step": 921 }, { "epoch": 1.7536852116024728, "grad_norm": 0.4610382616519928, "learning_rate": 9.417592886630677e-05, "loss": 0.3104, "step": 922 }, { "epoch": 1.755587256300523, "grad_norm": 0.4382965862751007, "learning_rate": 9.416957764369641e-05, "loss": 0.2583, "step": 923 }, { "epoch": 1.7574893009985735, "grad_norm": 0.31299924850463867, "learning_rate": 9.416322642108606e-05, "loss": 0.2033, "step": 924 }, { "epoch": 1.759391345696624, "grad_norm": 0.33872106671333313, "learning_rate": 9.415687519847571e-05, "loss": 0.2366, "step": 925 }, { "epoch": 1.7612933903946741, "grad_norm": 0.33771976828575134, "learning_rate": 9.415052397586535e-05, "loss": 0.3062, "step": 926 }, { "epoch": 1.7631954350927246, "grad_norm": 0.32810178399086, "learning_rate": 9.4144172753255e-05, "loss": 0.2264, "step": 927 }, { "epoch": 1.765097479790775, "grad_norm": 0.41518697142601013, "learning_rate": 9.413782153064466e-05, "loss": 0.2747, "step": 928 }, { "epoch": 1.7669995244888255, "grad_norm": 0.43647775053977966, "learning_rate": 9.41314703080343e-05, "loss": 0.3439, "step": 929 }, { "epoch": 1.768901569186876, "grad_norm": 0.2905902564525604, "learning_rate": 9.412511908542395e-05, "loss": 0.2327, "step": 930 }, { "epoch": 1.7708036138849264, "grad_norm": 0.38527336716651917, "learning_rate": 9.41187678628136e-05, "loss": 0.264, "step": 931 }, { "epoch": 1.7727056585829768, "grad_norm": 0.4135185182094574, "learning_rate": 9.411241664020324e-05, "loss": 0.3075, "step": 932 }, { "epoch": 1.7746077032810272, "grad_norm": 0.30278775095939636, "learning_rate": 9.410606541759289e-05, "loss": 0.1831, "step": 933 }, { "epoch": 1.7765097479790775, "grad_norm": 0.3687085509300232, "learning_rate": 9.409971419498254e-05, "loss": 0.2862, "step": 934 }, { "epoch": 1.778411792677128, "grad_norm": 0.3217594623565674, "learning_rate": 9.409336297237219e-05, "loss": 0.1975, "step": 935 }, { "epoch": 1.7803138373751783, "grad_norm": 0.3583223223686218, "learning_rate": 9.408701174976183e-05, "loss": 0.2345, "step": 936 }, { "epoch": 1.7822158820732286, "grad_norm": 0.4119435250759125, "learning_rate": 9.408066052715148e-05, "loss": 0.2916, "step": 937 }, { "epoch": 1.784117926771279, "grad_norm": 0.400728315114975, "learning_rate": 9.407430930454113e-05, "loss": 0.4505, "step": 938 }, { "epoch": 1.7860199714693294, "grad_norm": 0.3988611698150635, "learning_rate": 9.406795808193077e-05, "loss": 0.286, "step": 939 }, { "epoch": 1.78792201616738, "grad_norm": 0.4544796347618103, "learning_rate": 9.406160685932042e-05, "loss": 0.3268, "step": 940 }, { "epoch": 1.7898240608654303, "grad_norm": 0.3785744905471802, "learning_rate": 9.405525563671008e-05, "loss": 0.2532, "step": 941 }, { "epoch": 1.7917261055634808, "grad_norm": 0.4459128975868225, "learning_rate": 9.404890441409971e-05, "loss": 0.3348, "step": 942 }, { "epoch": 1.7936281502615312, "grad_norm": 0.3253449499607086, "learning_rate": 9.404255319148937e-05, "loss": 0.1945, "step": 943 }, { "epoch": 1.7955301949595817, "grad_norm": 0.4977390468120575, "learning_rate": 9.403620196887902e-05, "loss": 0.3, "step": 944 }, { "epoch": 1.797432239657632, "grad_norm": 0.46191859245300293, "learning_rate": 9.402985074626867e-05, "loss": 0.3638, "step": 945 }, { "epoch": 1.7993342843556823, "grad_norm": 0.38492342829704285, "learning_rate": 9.402349952365831e-05, "loss": 0.2566, "step": 946 }, { "epoch": 1.8012363290537328, "grad_norm": 0.34863540530204773, "learning_rate": 9.401714830104795e-05, "loss": 0.2321, "step": 947 }, { "epoch": 1.8031383737517832, "grad_norm": 0.3839346766471863, "learning_rate": 9.401079707843761e-05, "loss": 0.2751, "step": 948 }, { "epoch": 1.8050404184498334, "grad_norm": 0.36121171712875366, "learning_rate": 9.400444585582725e-05, "loss": 0.2492, "step": 949 }, { "epoch": 1.8069424631478839, "grad_norm": 0.3479311466217041, "learning_rate": 9.399809463321689e-05, "loss": 0.2436, "step": 950 }, { "epoch": 1.8088445078459343, "grad_norm": 0.35279884934425354, "learning_rate": 9.399174341060655e-05, "loss": 0.2718, "step": 951 }, { "epoch": 1.8107465525439848, "grad_norm": 0.43152448534965515, "learning_rate": 9.398539218799619e-05, "loss": 0.2739, "step": 952 }, { "epoch": 1.8126485972420352, "grad_norm": 0.3631283938884735, "learning_rate": 9.397904096538584e-05, "loss": 0.2239, "step": 953 }, { "epoch": 1.8145506419400856, "grad_norm": 0.4698762595653534, "learning_rate": 9.397268974277548e-05, "loss": 0.3247, "step": 954 }, { "epoch": 1.816452686638136, "grad_norm": 0.36629432439804077, "learning_rate": 9.396633852016513e-05, "loss": 0.2778, "step": 955 }, { "epoch": 1.8183547313361865, "grad_norm": 0.34220409393310547, "learning_rate": 9.395998729755479e-05, "loss": 0.2466, "step": 956 }, { "epoch": 1.820256776034237, "grad_norm": 0.3768969178199768, "learning_rate": 9.395363607494442e-05, "loss": 0.334, "step": 957 }, { "epoch": 1.8221588207322872, "grad_norm": 0.2891027629375458, "learning_rate": 9.394728485233409e-05, "loss": 0.206, "step": 958 }, { "epoch": 1.8240608654303376, "grad_norm": 0.2802363634109497, "learning_rate": 9.394093362972373e-05, "loss": 0.2566, "step": 959 }, { "epoch": 1.825962910128388, "grad_norm": 0.38722601532936096, "learning_rate": 9.393458240711337e-05, "loss": 0.2615, "step": 960 }, { "epoch": 1.8278649548264383, "grad_norm": 0.45663881301879883, "learning_rate": 9.392823118450302e-05, "loss": 0.3521, "step": 961 }, { "epoch": 1.8297669995244887, "grad_norm": 0.36096152663230896, "learning_rate": 9.392187996189267e-05, "loss": 0.2429, "step": 962 }, { "epoch": 1.8316690442225392, "grad_norm": 0.3237638473510742, "learning_rate": 9.391552873928232e-05, "loss": 0.2874, "step": 963 }, { "epoch": 1.8335710889205896, "grad_norm": 0.379863440990448, "learning_rate": 9.390917751667196e-05, "loss": 0.2504, "step": 964 }, { "epoch": 1.83547313361864, "grad_norm": 0.40816691517829895, "learning_rate": 9.390282629406161e-05, "loss": 0.2614, "step": 965 }, { "epoch": 1.8373751783166905, "grad_norm": 0.38382720947265625, "learning_rate": 9.389647507145126e-05, "loss": 0.2282, "step": 966 }, { "epoch": 1.839277223014741, "grad_norm": 0.328861266374588, "learning_rate": 9.38901238488409e-05, "loss": 0.1763, "step": 967 }, { "epoch": 1.8411792677127914, "grad_norm": 0.3471934497356415, "learning_rate": 9.388377262623055e-05, "loss": 0.2348, "step": 968 }, { "epoch": 1.8430813124108416, "grad_norm": 0.44112637639045715, "learning_rate": 9.38774214036202e-05, "loss": 0.3496, "step": 969 }, { "epoch": 1.844983357108892, "grad_norm": 0.4357364773750305, "learning_rate": 9.387107018100984e-05, "loss": 0.2832, "step": 970 }, { "epoch": 1.8468854018069425, "grad_norm": 0.4502738118171692, "learning_rate": 9.38647189583995e-05, "loss": 0.2862, "step": 971 }, { "epoch": 1.8487874465049927, "grad_norm": 0.3577602505683899, "learning_rate": 9.385836773578915e-05, "loss": 0.2019, "step": 972 }, { "epoch": 1.8506894912030432, "grad_norm": 0.36250707507133484, "learning_rate": 9.385201651317879e-05, "loss": 0.2936, "step": 973 }, { "epoch": 1.8525915359010936, "grad_norm": 0.44027233123779297, "learning_rate": 9.384566529056844e-05, "loss": 0.3004, "step": 974 }, { "epoch": 1.854493580599144, "grad_norm": 0.4500497877597809, "learning_rate": 9.383931406795809e-05, "loss": 0.3, "step": 975 }, { "epoch": 1.8563956252971945, "grad_norm": 0.3777524530887604, "learning_rate": 9.383296284534774e-05, "loss": 0.2535, "step": 976 }, { "epoch": 1.858297669995245, "grad_norm": 0.3377416431903839, "learning_rate": 9.382661162273738e-05, "loss": 0.2767, "step": 977 }, { "epoch": 1.8601997146932954, "grad_norm": 0.34563374519348145, "learning_rate": 9.382026040012702e-05, "loss": 0.1923, "step": 978 }, { "epoch": 1.8621017593913458, "grad_norm": 0.3025479018688202, "learning_rate": 9.381390917751668e-05, "loss": 0.2214, "step": 979 }, { "epoch": 1.8640038040893963, "grad_norm": 0.3614577054977417, "learning_rate": 9.380755795490632e-05, "loss": 0.299, "step": 980 }, { "epoch": 1.8659058487874465, "grad_norm": 0.34508028626441956, "learning_rate": 9.380120673229597e-05, "loss": 0.2201, "step": 981 }, { "epoch": 1.867807893485497, "grad_norm": 0.33169567584991455, "learning_rate": 9.379485550968563e-05, "loss": 0.2298, "step": 982 }, { "epoch": 1.8697099381835474, "grad_norm": 0.4361656904220581, "learning_rate": 9.378850428707526e-05, "loss": 0.3109, "step": 983 }, { "epoch": 1.8716119828815976, "grad_norm": 0.3832654654979706, "learning_rate": 9.378215306446492e-05, "loss": 0.2877, "step": 984 }, { "epoch": 1.873514027579648, "grad_norm": 0.3991541862487793, "learning_rate": 9.377580184185455e-05, "loss": 0.2755, "step": 985 }, { "epoch": 1.8754160722776985, "grad_norm": 0.6057716012001038, "learning_rate": 9.37694506192442e-05, "loss": 0.3665, "step": 986 }, { "epoch": 1.877318116975749, "grad_norm": 0.2887308895587921, "learning_rate": 9.376309939663386e-05, "loss": 0.2414, "step": 987 }, { "epoch": 1.8792201616737993, "grad_norm": 0.28379005193710327, "learning_rate": 9.37567481740235e-05, "loss": 0.1895, "step": 988 }, { "epoch": 1.8811222063718498, "grad_norm": 0.36071258783340454, "learning_rate": 9.375039695141316e-05, "loss": 0.2855, "step": 989 }, { "epoch": 1.8830242510699002, "grad_norm": 0.3872823119163513, "learning_rate": 9.37440457288028e-05, "loss": 0.3112, "step": 990 }, { "epoch": 1.8849262957679507, "grad_norm": 0.3761101961135864, "learning_rate": 9.373769450619244e-05, "loss": 0.2291, "step": 991 }, { "epoch": 1.886828340466001, "grad_norm": 0.404000461101532, "learning_rate": 9.373134328358209e-05, "loss": 0.2349, "step": 992 }, { "epoch": 1.8887303851640513, "grad_norm": 0.4787864089012146, "learning_rate": 9.372499206097174e-05, "loss": 0.3447, "step": 993 }, { "epoch": 1.8906324298621018, "grad_norm": 0.4898964762687683, "learning_rate": 9.37186408383614e-05, "loss": 0.3306, "step": 994 }, { "epoch": 1.892534474560152, "grad_norm": 0.3915330767631531, "learning_rate": 9.371228961575103e-05, "loss": 0.2896, "step": 995 }, { "epoch": 1.8944365192582024, "grad_norm": 0.4643494486808777, "learning_rate": 9.370593839314068e-05, "loss": 0.3131, "step": 996 }, { "epoch": 1.8963385639562529, "grad_norm": 0.39880135655403137, "learning_rate": 9.369958717053034e-05, "loss": 0.2598, "step": 997 }, { "epoch": 1.8982406086543033, "grad_norm": 0.3153114318847656, "learning_rate": 9.369323594791997e-05, "loss": 0.2429, "step": 998 }, { "epoch": 1.9001426533523538, "grad_norm": 0.4997500479221344, "learning_rate": 9.368688472530963e-05, "loss": 0.4179, "step": 999 }, { "epoch": 1.9020446980504042, "grad_norm": 0.3919009566307068, "learning_rate": 9.368053350269928e-05, "loss": 0.2468, "step": 1000 }, { "epoch": 1.9039467427484547, "grad_norm": 0.48444265127182007, "learning_rate": 9.367418228008892e-05, "loss": 0.3191, "step": 1001 }, { "epoch": 1.905848787446505, "grad_norm": 0.38168856501579285, "learning_rate": 9.366783105747857e-05, "loss": 0.2658, "step": 1002 }, { "epoch": 1.9077508321445555, "grad_norm": 0.47058162093162537, "learning_rate": 9.366147983486822e-05, "loss": 0.3392, "step": 1003 }, { "epoch": 1.9096528768426058, "grad_norm": 0.40145471692085266, "learning_rate": 9.365512861225786e-05, "loss": 0.2619, "step": 1004 }, { "epoch": 1.9115549215406562, "grad_norm": 0.6980530619621277, "learning_rate": 9.364877738964751e-05, "loss": 0.3111, "step": 1005 }, { "epoch": 1.9134569662387066, "grad_norm": 0.35878410935401917, "learning_rate": 9.364242616703716e-05, "loss": 0.3026, "step": 1006 }, { "epoch": 1.9153590109367569, "grad_norm": 0.3291071653366089, "learning_rate": 9.363607494442681e-05, "loss": 0.2813, "step": 1007 }, { "epoch": 1.9172610556348073, "grad_norm": 0.4286592900753021, "learning_rate": 9.362972372181645e-05, "loss": 0.2921, "step": 1008 }, { "epoch": 1.9191631003328578, "grad_norm": 0.2965177893638611, "learning_rate": 9.36233724992061e-05, "loss": 0.2373, "step": 1009 }, { "epoch": 1.9210651450309082, "grad_norm": 0.3153838515281677, "learning_rate": 9.361702127659576e-05, "loss": 0.2195, "step": 1010 }, { "epoch": 1.9229671897289586, "grad_norm": 0.4827108085155487, "learning_rate": 9.36106700539854e-05, "loss": 0.3127, "step": 1011 }, { "epoch": 1.924869234427009, "grad_norm": 0.43089860677719116, "learning_rate": 9.360431883137505e-05, "loss": 0.2687, "step": 1012 }, { "epoch": 1.9267712791250595, "grad_norm": 0.43147915601730347, "learning_rate": 9.35979676087647e-05, "loss": 0.3953, "step": 1013 }, { "epoch": 1.92867332382311, "grad_norm": 0.37924453616142273, "learning_rate": 9.359161638615434e-05, "loss": 0.2522, "step": 1014 }, { "epoch": 1.9305753685211604, "grad_norm": 0.34664931893348694, "learning_rate": 9.358526516354399e-05, "loss": 0.2048, "step": 1015 }, { "epoch": 1.9324774132192106, "grad_norm": 0.2877664566040039, "learning_rate": 9.357891394093364e-05, "loss": 0.1794, "step": 1016 }, { "epoch": 1.934379457917261, "grad_norm": 0.4924784302711487, "learning_rate": 9.357256271832329e-05, "loss": 0.2737, "step": 1017 }, { "epoch": 1.9362815026153115, "grad_norm": 0.36828553676605225, "learning_rate": 9.356621149571293e-05, "loss": 0.2761, "step": 1018 }, { "epoch": 1.9381835473133617, "grad_norm": 0.355372816324234, "learning_rate": 9.355986027310257e-05, "loss": 0.2647, "step": 1019 }, { "epoch": 1.9400855920114122, "grad_norm": 0.37469297647476196, "learning_rate": 9.355350905049223e-05, "loss": 0.2347, "step": 1020 }, { "epoch": 1.9419876367094626, "grad_norm": 0.44890064001083374, "learning_rate": 9.354715782788187e-05, "loss": 0.2581, "step": 1021 }, { "epoch": 1.943889681407513, "grad_norm": 0.355234295129776, "learning_rate": 9.354080660527151e-05, "loss": 0.2467, "step": 1022 }, { "epoch": 1.9457917261055635, "grad_norm": 0.463871568441391, "learning_rate": 9.353445538266116e-05, "loss": 0.2338, "step": 1023 }, { "epoch": 1.947693770803614, "grad_norm": 0.38206830620765686, "learning_rate": 9.352810416005081e-05, "loss": 0.2353, "step": 1024 }, { "epoch": 1.9495958155016644, "grad_norm": 0.37627413868904114, "learning_rate": 9.352175293744047e-05, "loss": 0.2375, "step": 1025 }, { "epoch": 1.9514978601997148, "grad_norm": 0.4191925823688507, "learning_rate": 9.35154017148301e-05, "loss": 0.2444, "step": 1026 }, { "epoch": 1.953399904897765, "grad_norm": 0.41149812936782837, "learning_rate": 9.350905049221976e-05, "loss": 0.2905, "step": 1027 }, { "epoch": 1.9553019495958155, "grad_norm": 0.329313725233078, "learning_rate": 9.350269926960941e-05, "loss": 0.2293, "step": 1028 }, { "epoch": 1.957203994293866, "grad_norm": 0.4160427749156952, "learning_rate": 9.349634804699905e-05, "loss": 0.2512, "step": 1029 }, { "epoch": 1.9591060389919162, "grad_norm": 0.4005848467350006, "learning_rate": 9.34899968243887e-05, "loss": 0.2446, "step": 1030 }, { "epoch": 1.9610080836899666, "grad_norm": 0.4497627019882202, "learning_rate": 9.348364560177835e-05, "loss": 0.3265, "step": 1031 }, { "epoch": 1.962910128388017, "grad_norm": 0.4275449216365814, "learning_rate": 9.347729437916799e-05, "loss": 0.302, "step": 1032 }, { "epoch": 1.9648121730860675, "grad_norm": 0.33947649598121643, "learning_rate": 9.347094315655764e-05, "loss": 0.1903, "step": 1033 }, { "epoch": 1.966714217784118, "grad_norm": 0.38422051072120667, "learning_rate": 9.346459193394729e-05, "loss": 0.2595, "step": 1034 }, { "epoch": 1.9686162624821684, "grad_norm": 0.35371389985084534, "learning_rate": 9.345824071133694e-05, "loss": 0.2284, "step": 1035 }, { "epoch": 1.9705183071802188, "grad_norm": 0.38803884387016296, "learning_rate": 9.345188948872658e-05, "loss": 0.3021, "step": 1036 }, { "epoch": 1.9724203518782693, "grad_norm": 0.38203269243240356, "learning_rate": 9.344553826611623e-05, "loss": 0.2863, "step": 1037 }, { "epoch": 1.9743223965763197, "grad_norm": 0.3267860412597656, "learning_rate": 9.343918704350588e-05, "loss": 0.226, "step": 1038 }, { "epoch": 1.97622444127437, "grad_norm": 0.39556884765625, "learning_rate": 9.343283582089552e-05, "loss": 0.2727, "step": 1039 }, { "epoch": 1.9781264859724204, "grad_norm": 0.4278768301010132, "learning_rate": 9.342648459828517e-05, "loss": 0.2723, "step": 1040 }, { "epoch": 1.9800285306704708, "grad_norm": 0.37279701232910156, "learning_rate": 9.342013337567483e-05, "loss": 0.2685, "step": 1041 }, { "epoch": 1.981930575368521, "grad_norm": 0.4421425759792328, "learning_rate": 9.341378215306447e-05, "loss": 0.2793, "step": 1042 }, { "epoch": 1.9838326200665715, "grad_norm": 0.4341887831687927, "learning_rate": 9.340743093045412e-05, "loss": 0.2752, "step": 1043 }, { "epoch": 1.985734664764622, "grad_norm": 0.42935600876808167, "learning_rate": 9.340107970784377e-05, "loss": 0.3127, "step": 1044 }, { "epoch": 1.9876367094626723, "grad_norm": 0.29476839303970337, "learning_rate": 9.339472848523341e-05, "loss": 0.1855, "step": 1045 }, { "epoch": 1.9895387541607228, "grad_norm": 0.43286338448524475, "learning_rate": 9.338837726262306e-05, "loss": 0.3109, "step": 1046 }, { "epoch": 1.9914407988587732, "grad_norm": 0.35097062587738037, "learning_rate": 9.338202604001271e-05, "loss": 0.2178, "step": 1047 }, { "epoch": 1.9933428435568237, "grad_norm": 0.3497145175933838, "learning_rate": 9.337567481740236e-05, "loss": 0.2372, "step": 1048 }, { "epoch": 1.9952448882548741, "grad_norm": 0.4399060904979706, "learning_rate": 9.3369323594792e-05, "loss": 0.3065, "step": 1049 }, { "epoch": 1.9971469329529246, "grad_norm": 0.43642693758010864, "learning_rate": 9.336297237218164e-05, "loss": 0.3099, "step": 1050 }, { "epoch": 1.9990489776509748, "grad_norm": 0.42969372868537903, "learning_rate": 9.33566211495713e-05, "loss": 0.2899, "step": 1051 }, { "epoch": 2.000951022349025, "grad_norm": 0.324709951877594, "learning_rate": 9.335026992696094e-05, "loss": 0.1977, "step": 1052 }, { "epoch": 2.0028530670470754, "grad_norm": 0.2254759967327118, "learning_rate": 9.33439187043506e-05, "loss": 0.1513, "step": 1053 }, { "epoch": 2.004755111745126, "grad_norm": 0.29324305057525635, "learning_rate": 9.333756748174025e-05, "loss": 0.1739, "step": 1054 }, { "epoch": 2.0066571564431763, "grad_norm": 0.2934301495552063, "learning_rate": 9.333121625912988e-05, "loss": 0.1788, "step": 1055 }, { "epoch": 2.0085592011412268, "grad_norm": 0.3355758786201477, "learning_rate": 9.332486503651954e-05, "loss": 0.1829, "step": 1056 }, { "epoch": 2.010461245839277, "grad_norm": 0.4047424793243408, "learning_rate": 9.331851381390917e-05, "loss": 0.2256, "step": 1057 }, { "epoch": 2.0123632905373277, "grad_norm": 0.38155117630958557, "learning_rate": 9.331216259129883e-05, "loss": 0.1992, "step": 1058 }, { "epoch": 2.014265335235378, "grad_norm": 0.4122423827648163, "learning_rate": 9.330581136868848e-05, "loss": 0.2222, "step": 1059 }, { "epoch": 2.0161673799334285, "grad_norm": 0.4098420739173889, "learning_rate": 9.329946014607812e-05, "loss": 0.1495, "step": 1060 }, { "epoch": 2.018069424631479, "grad_norm": 0.37494683265686035, "learning_rate": 9.329310892346778e-05, "loss": 0.1955, "step": 1061 }, { "epoch": 2.0199714693295294, "grad_norm": 0.4210919439792633, "learning_rate": 9.328675770085742e-05, "loss": 0.1851, "step": 1062 }, { "epoch": 2.02187351402758, "grad_norm": 0.415770560503006, "learning_rate": 9.328040647824706e-05, "loss": 0.209, "step": 1063 }, { "epoch": 2.02377555872563, "grad_norm": 0.38957807421684265, "learning_rate": 9.327405525563671e-05, "loss": 0.1597, "step": 1064 }, { "epoch": 2.0256776034236803, "grad_norm": 0.3568849563598633, "learning_rate": 9.326770403302636e-05, "loss": 0.1564, "step": 1065 }, { "epoch": 2.0275796481217307, "grad_norm": 0.4151419699192047, "learning_rate": 9.326135281041601e-05, "loss": 0.2213, "step": 1066 }, { "epoch": 2.029481692819781, "grad_norm": 0.437418669462204, "learning_rate": 9.325500158780565e-05, "loss": 0.2091, "step": 1067 }, { "epoch": 2.0313837375178316, "grad_norm": 0.45977523922920227, "learning_rate": 9.32486503651953e-05, "loss": 0.2044, "step": 1068 }, { "epoch": 2.033285782215882, "grad_norm": 0.3634967803955078, "learning_rate": 9.324229914258496e-05, "loss": 0.1575, "step": 1069 }, { "epoch": 2.0351878269139325, "grad_norm": 0.4348776638507843, "learning_rate": 9.32359479199746e-05, "loss": 0.1892, "step": 1070 }, { "epoch": 2.037089871611983, "grad_norm": 0.39220520853996277, "learning_rate": 9.322959669736425e-05, "loss": 0.1962, "step": 1071 }, { "epoch": 2.0389919163100334, "grad_norm": 0.4379669725894928, "learning_rate": 9.32232454747539e-05, "loss": 0.2201, "step": 1072 }, { "epoch": 2.040893961008084, "grad_norm": 0.31880828738212585, "learning_rate": 9.321689425214354e-05, "loss": 0.1471, "step": 1073 }, { "epoch": 2.0427960057061343, "grad_norm": 0.31966346502304077, "learning_rate": 9.321054302953319e-05, "loss": 0.1688, "step": 1074 }, { "epoch": 2.0446980504041843, "grad_norm": 0.38291382789611816, "learning_rate": 9.320419180692284e-05, "loss": 0.1797, "step": 1075 }, { "epoch": 2.0466000951022347, "grad_norm": 0.3871828615665436, "learning_rate": 9.319784058431248e-05, "loss": 0.2201, "step": 1076 }, { "epoch": 2.048502139800285, "grad_norm": 0.35201162099838257, "learning_rate": 9.319148936170213e-05, "loss": 0.1759, "step": 1077 }, { "epoch": 2.0504041844983356, "grad_norm": 0.32999902963638306, "learning_rate": 9.318513813909178e-05, "loss": 0.1676, "step": 1078 }, { "epoch": 2.052306229196386, "grad_norm": 0.38137802481651306, "learning_rate": 9.317878691648143e-05, "loss": 0.181, "step": 1079 }, { "epoch": 2.0542082738944365, "grad_norm": 0.28507858514785767, "learning_rate": 9.317243569387107e-05, "loss": 0.1333, "step": 1080 }, { "epoch": 2.056110318592487, "grad_norm": 0.511489987373352, "learning_rate": 9.316608447126071e-05, "loss": 0.271, "step": 1081 }, { "epoch": 2.0580123632905374, "grad_norm": 0.37042170763015747, "learning_rate": 9.315973324865038e-05, "loss": 0.2733, "step": 1082 }, { "epoch": 2.059914407988588, "grad_norm": 0.3986508548259735, "learning_rate": 9.315338202604001e-05, "loss": 0.1964, "step": 1083 }, { "epoch": 2.0618164526866383, "grad_norm": 0.37804266810417175, "learning_rate": 9.314703080342967e-05, "loss": 0.1601, "step": 1084 }, { "epoch": 2.0637184973846887, "grad_norm": 0.32077136635780334, "learning_rate": 9.314067958081932e-05, "loss": 0.1462, "step": 1085 }, { "epoch": 2.065620542082739, "grad_norm": 0.2813294231891632, "learning_rate": 9.313432835820896e-05, "loss": 0.1321, "step": 1086 }, { "epoch": 2.067522586780789, "grad_norm": 0.40840163826942444, "learning_rate": 9.312797713559861e-05, "loss": 0.1892, "step": 1087 }, { "epoch": 2.0694246314788396, "grad_norm": 0.3264133334159851, "learning_rate": 9.312162591298825e-05, "loss": 0.1415, "step": 1088 }, { "epoch": 2.07132667617689, "grad_norm": 0.4274674952030182, "learning_rate": 9.311527469037791e-05, "loss": 0.1813, "step": 1089 }, { "epoch": 2.0732287208749405, "grad_norm": 0.37283292412757874, "learning_rate": 9.310892346776755e-05, "loss": 0.1753, "step": 1090 }, { "epoch": 2.075130765572991, "grad_norm": 0.32638901472091675, "learning_rate": 9.310257224515719e-05, "loss": 0.1731, "step": 1091 }, { "epoch": 2.0770328102710414, "grad_norm": 0.3295043408870697, "learning_rate": 9.309622102254685e-05, "loss": 0.1934, "step": 1092 }, { "epoch": 2.078934854969092, "grad_norm": 0.34605681896209717, "learning_rate": 9.308986979993649e-05, "loss": 0.2556, "step": 1093 }, { "epoch": 2.0808368996671422, "grad_norm": 0.35646018385887146, "learning_rate": 9.308351857732613e-05, "loss": 0.1508, "step": 1094 }, { "epoch": 2.0827389443651927, "grad_norm": 0.3224691152572632, "learning_rate": 9.307716735471578e-05, "loss": 0.1592, "step": 1095 }, { "epoch": 2.084640989063243, "grad_norm": 0.3692566156387329, "learning_rate": 9.307081613210543e-05, "loss": 0.1555, "step": 1096 }, { "epoch": 2.0865430337612936, "grad_norm": 0.46436119079589844, "learning_rate": 9.306446490949509e-05, "loss": 0.2176, "step": 1097 }, { "epoch": 2.088445078459344, "grad_norm": 0.3176686465740204, "learning_rate": 9.305811368688472e-05, "loss": 0.1763, "step": 1098 }, { "epoch": 2.090347123157394, "grad_norm": 0.29192522168159485, "learning_rate": 9.305176246427438e-05, "loss": 0.1485, "step": 1099 }, { "epoch": 2.0922491678554445, "grad_norm": 0.34905532002449036, "learning_rate": 9.304541124166403e-05, "loss": 0.1657, "step": 1100 }, { "epoch": 2.094151212553495, "grad_norm": 0.4198562800884247, "learning_rate": 9.303906001905367e-05, "loss": 0.2077, "step": 1101 }, { "epoch": 2.0960532572515453, "grad_norm": 0.35974305868148804, "learning_rate": 9.303270879644332e-05, "loss": 0.1776, "step": 1102 }, { "epoch": 2.097955301949596, "grad_norm": 0.35371047258377075, "learning_rate": 9.302635757383297e-05, "loss": 0.1887, "step": 1103 }, { "epoch": 2.0998573466476462, "grad_norm": 0.30068957805633545, "learning_rate": 9.302000635122261e-05, "loss": 0.14, "step": 1104 }, { "epoch": 2.1017593913456967, "grad_norm": 0.31092819571495056, "learning_rate": 9.301365512861226e-05, "loss": 0.1603, "step": 1105 }, { "epoch": 2.103661436043747, "grad_norm": 0.3615265190601349, "learning_rate": 9.300730390600191e-05, "loss": 0.1791, "step": 1106 }, { "epoch": 2.1055634807417976, "grad_norm": 0.2767830491065979, "learning_rate": 9.300095268339156e-05, "loss": 0.1243, "step": 1107 }, { "epoch": 2.107465525439848, "grad_norm": 0.36988285183906555, "learning_rate": 9.29946014607812e-05, "loss": 0.1619, "step": 1108 }, { "epoch": 2.1093675701378984, "grad_norm": 0.6014404892921448, "learning_rate": 9.298825023817085e-05, "loss": 0.2635, "step": 1109 }, { "epoch": 2.1112696148359484, "grad_norm": 0.3621249794960022, "learning_rate": 9.29818990155605e-05, "loss": 0.1749, "step": 1110 }, { "epoch": 2.113171659533999, "grad_norm": 0.2977392077445984, "learning_rate": 9.297554779295014e-05, "loss": 0.1582, "step": 1111 }, { "epoch": 2.1150737042320493, "grad_norm": 0.3253994286060333, "learning_rate": 9.29691965703398e-05, "loss": 0.1787, "step": 1112 }, { "epoch": 2.1169757489300998, "grad_norm": 0.34662213921546936, "learning_rate": 9.296284534772945e-05, "loss": 0.1923, "step": 1113 }, { "epoch": 2.11887779362815, "grad_norm": 0.416458398103714, "learning_rate": 9.295649412511909e-05, "loss": 0.1941, "step": 1114 }, { "epoch": 2.1207798383262007, "grad_norm": 0.36649563908576965, "learning_rate": 9.295014290250874e-05, "loss": 0.2233, "step": 1115 }, { "epoch": 2.122681883024251, "grad_norm": 0.3445313274860382, "learning_rate": 9.294379167989839e-05, "loss": 0.1701, "step": 1116 }, { "epoch": 2.1245839277223015, "grad_norm": 0.38747549057006836, "learning_rate": 9.293744045728803e-05, "loss": 0.1707, "step": 1117 }, { "epoch": 2.126485972420352, "grad_norm": 0.4027896225452423, "learning_rate": 9.293108923467768e-05, "loss": 0.2086, "step": 1118 }, { "epoch": 2.1283880171184024, "grad_norm": 0.3629845976829529, "learning_rate": 9.292473801206733e-05, "loss": 0.1743, "step": 1119 }, { "epoch": 2.130290061816453, "grad_norm": 0.39419326186180115, "learning_rate": 9.291838678945698e-05, "loss": 0.1907, "step": 1120 }, { "epoch": 2.132192106514503, "grad_norm": 0.36944523453712463, "learning_rate": 9.291203556684662e-05, "loss": 0.1631, "step": 1121 }, { "epoch": 2.1340941512125533, "grad_norm": 0.4214774966239929, "learning_rate": 9.290568434423626e-05, "loss": 0.2397, "step": 1122 }, { "epoch": 2.1359961959106037, "grad_norm": 0.3092084228992462, "learning_rate": 9.289933312162593e-05, "loss": 0.1396, "step": 1123 }, { "epoch": 2.137898240608654, "grad_norm": 0.3649998605251312, "learning_rate": 9.289298189901556e-05, "loss": 0.1677, "step": 1124 }, { "epoch": 2.1398002853067046, "grad_norm": 0.4131282567977905, "learning_rate": 9.288663067640522e-05, "loss": 0.2049, "step": 1125 }, { "epoch": 2.141702330004755, "grad_norm": 0.4324544668197632, "learning_rate": 9.288027945379485e-05, "loss": 0.1757, "step": 1126 }, { "epoch": 2.1436043747028055, "grad_norm": 0.4258798658847809, "learning_rate": 9.28739282311845e-05, "loss": 0.199, "step": 1127 }, { "epoch": 2.145506419400856, "grad_norm": 0.4244062602519989, "learning_rate": 9.286757700857416e-05, "loss": 0.2006, "step": 1128 }, { "epoch": 2.1474084640989064, "grad_norm": 0.4003104865550995, "learning_rate": 9.28612257859638e-05, "loss": 0.2098, "step": 1129 }, { "epoch": 2.149310508796957, "grad_norm": 0.36191633343696594, "learning_rate": 9.285487456335345e-05, "loss": 0.1821, "step": 1130 }, { "epoch": 2.1512125534950073, "grad_norm": 0.47675448656082153, "learning_rate": 9.28485233407431e-05, "loss": 0.2083, "step": 1131 }, { "epoch": 2.1531145981930577, "grad_norm": 0.4418546259403229, "learning_rate": 9.284217211813274e-05, "loss": 0.2228, "step": 1132 }, { "epoch": 2.155016642891108, "grad_norm": 0.31201982498168945, "learning_rate": 9.283582089552239e-05, "loss": 0.1326, "step": 1133 }, { "epoch": 2.156918687589158, "grad_norm": 0.30012449622154236, "learning_rate": 9.282946967291204e-05, "loss": 0.1376, "step": 1134 }, { "epoch": 2.1588207322872086, "grad_norm": 0.3705848455429077, "learning_rate": 9.282311845030168e-05, "loss": 0.1719, "step": 1135 }, { "epoch": 2.160722776985259, "grad_norm": 0.4028238356113434, "learning_rate": 9.281676722769133e-05, "loss": 0.178, "step": 1136 }, { "epoch": 2.1626248216833095, "grad_norm": 0.38973838090896606, "learning_rate": 9.281041600508098e-05, "loss": 0.1875, "step": 1137 }, { "epoch": 2.16452686638136, "grad_norm": 0.3756285309791565, "learning_rate": 9.280406478247064e-05, "loss": 0.1883, "step": 1138 }, { "epoch": 2.1664289110794104, "grad_norm": 0.2721819579601288, "learning_rate": 9.279771355986027e-05, "loss": 0.1468, "step": 1139 }, { "epoch": 2.168330955777461, "grad_norm": 0.34547916054725647, "learning_rate": 9.279136233724993e-05, "loss": 0.2043, "step": 1140 }, { "epoch": 2.1702330004755113, "grad_norm": 0.44819575548171997, "learning_rate": 9.278501111463958e-05, "loss": 0.2029, "step": 1141 }, { "epoch": 2.1721350451735617, "grad_norm": 0.36632853746414185, "learning_rate": 9.277865989202922e-05, "loss": 0.1884, "step": 1142 }, { "epoch": 2.174037089871612, "grad_norm": 0.37020185589790344, "learning_rate": 9.277230866941887e-05, "loss": 0.1819, "step": 1143 }, { "epoch": 2.1759391345696626, "grad_norm": 0.4174460470676422, "learning_rate": 9.276595744680852e-05, "loss": 0.1918, "step": 1144 }, { "epoch": 2.1778411792677126, "grad_norm": 0.4120714068412781, "learning_rate": 9.275960622419816e-05, "loss": 0.2496, "step": 1145 }, { "epoch": 2.179743223965763, "grad_norm": 0.4350152909755707, "learning_rate": 9.275325500158781e-05, "loss": 0.1981, "step": 1146 }, { "epoch": 2.1816452686638135, "grad_norm": 0.35637348890304565, "learning_rate": 9.274690377897746e-05, "loss": 0.1639, "step": 1147 }, { "epoch": 2.183547313361864, "grad_norm": 0.34323298931121826, "learning_rate": 9.27405525563671e-05, "loss": 0.1761, "step": 1148 }, { "epoch": 2.1854493580599144, "grad_norm": 0.30730780959129333, "learning_rate": 9.273420133375675e-05, "loss": 0.1623, "step": 1149 }, { "epoch": 2.187351402757965, "grad_norm": 0.32239773869514465, "learning_rate": 9.27278501111464e-05, "loss": 0.1238, "step": 1150 }, { "epoch": 2.1892534474560152, "grad_norm": 0.35441848635673523, "learning_rate": 9.272149888853606e-05, "loss": 0.1578, "step": 1151 }, { "epoch": 2.1911554921540657, "grad_norm": 0.33287835121154785, "learning_rate": 9.27151476659257e-05, "loss": 0.1726, "step": 1152 }, { "epoch": 2.193057536852116, "grad_norm": 0.3281983435153961, "learning_rate": 9.270879644331533e-05, "loss": 0.1435, "step": 1153 }, { "epoch": 2.1949595815501666, "grad_norm": 0.31831398606300354, "learning_rate": 9.2702445220705e-05, "loss": 0.1585, "step": 1154 }, { "epoch": 2.196861626248217, "grad_norm": 0.43460169434547424, "learning_rate": 9.269609399809464e-05, "loss": 0.2121, "step": 1155 }, { "epoch": 2.198763670946267, "grad_norm": 0.3470516502857208, "learning_rate": 9.268974277548429e-05, "loss": 0.157, "step": 1156 }, { "epoch": 2.2006657156443175, "grad_norm": 0.3971126079559326, "learning_rate": 9.268339155287394e-05, "loss": 0.1738, "step": 1157 }, { "epoch": 2.202567760342368, "grad_norm": 0.39526277780532837, "learning_rate": 9.267704033026358e-05, "loss": 0.2117, "step": 1158 }, { "epoch": 2.2044698050404183, "grad_norm": 0.31649425625801086, "learning_rate": 9.267068910765323e-05, "loss": 0.1966, "step": 1159 }, { "epoch": 2.206371849738469, "grad_norm": 0.4104944169521332, "learning_rate": 9.266433788504287e-05, "loss": 0.2178, "step": 1160 }, { "epoch": 2.2082738944365192, "grad_norm": 0.3751467168331146, "learning_rate": 9.265798666243253e-05, "loss": 0.1921, "step": 1161 }, { "epoch": 2.2101759391345697, "grad_norm": 0.3348170816898346, "learning_rate": 9.265163543982217e-05, "loss": 0.1533, "step": 1162 }, { "epoch": 2.21207798383262, "grad_norm": 0.39907872676849365, "learning_rate": 9.264528421721181e-05, "loss": 0.1733, "step": 1163 }, { "epoch": 2.2139800285306706, "grad_norm": 0.45442381501197815, "learning_rate": 9.263893299460147e-05, "loss": 0.2065, "step": 1164 }, { "epoch": 2.215882073228721, "grad_norm": 0.37475696206092834, "learning_rate": 9.263258177199111e-05, "loss": 0.1914, "step": 1165 }, { "epoch": 2.2177841179267714, "grad_norm": 0.3757840394973755, "learning_rate": 9.262623054938075e-05, "loss": 0.1781, "step": 1166 }, { "epoch": 2.219686162624822, "grad_norm": 0.3655502200126648, "learning_rate": 9.26198793267704e-05, "loss": 0.1814, "step": 1167 }, { "epoch": 2.2215882073228723, "grad_norm": 0.4219561219215393, "learning_rate": 9.261352810416006e-05, "loss": 0.213, "step": 1168 }, { "epoch": 2.2234902520209223, "grad_norm": 0.3741750419139862, "learning_rate": 9.260717688154971e-05, "loss": 0.1782, "step": 1169 }, { "epoch": 2.2253922967189728, "grad_norm": 0.37189987301826477, "learning_rate": 9.260082565893935e-05, "loss": 0.1783, "step": 1170 }, { "epoch": 2.227294341417023, "grad_norm": 0.2988317608833313, "learning_rate": 9.2594474436329e-05, "loss": 0.1481, "step": 1171 }, { "epoch": 2.2291963861150736, "grad_norm": 0.38000479340553284, "learning_rate": 9.258812321371865e-05, "loss": 0.1843, "step": 1172 }, { "epoch": 2.231098430813124, "grad_norm": 0.30989545583724976, "learning_rate": 9.258177199110829e-05, "loss": 0.1487, "step": 1173 }, { "epoch": 2.2330004755111745, "grad_norm": 0.27984580397605896, "learning_rate": 9.257542076849794e-05, "loss": 0.1445, "step": 1174 }, { "epoch": 2.234902520209225, "grad_norm": 0.3828918933868408, "learning_rate": 9.256906954588759e-05, "loss": 0.1709, "step": 1175 }, { "epoch": 2.2368045649072754, "grad_norm": 0.33677807450294495, "learning_rate": 9.256271832327723e-05, "loss": 0.1656, "step": 1176 }, { "epoch": 2.238706609605326, "grad_norm": 0.37769967317581177, "learning_rate": 9.255636710066688e-05, "loss": 0.2101, "step": 1177 }, { "epoch": 2.2406086543033763, "grad_norm": 0.3978733420372009, "learning_rate": 9.255001587805653e-05, "loss": 0.215, "step": 1178 }, { "epoch": 2.2425106990014267, "grad_norm": 0.3774537146091461, "learning_rate": 9.254366465544618e-05, "loss": 0.1778, "step": 1179 }, { "epoch": 2.2444127436994767, "grad_norm": 0.4117525815963745, "learning_rate": 9.253731343283582e-05, "loss": 0.1801, "step": 1180 }, { "epoch": 2.246314788397527, "grad_norm": 0.41460955142974854, "learning_rate": 9.253096221022547e-05, "loss": 0.1939, "step": 1181 }, { "epoch": 2.2482168330955776, "grad_norm": 0.41124284267425537, "learning_rate": 9.252461098761513e-05, "loss": 0.1944, "step": 1182 }, { "epoch": 2.250118877793628, "grad_norm": 0.39252787828445435, "learning_rate": 9.251825976500476e-05, "loss": 0.2037, "step": 1183 }, { "epoch": 2.2520209224916785, "grad_norm": 0.4118300676345825, "learning_rate": 9.25119085423944e-05, "loss": 0.2067, "step": 1184 }, { "epoch": 2.253922967189729, "grad_norm": 0.43823009729385376, "learning_rate": 9.250555731978407e-05, "loss": 0.2093, "step": 1185 }, { "epoch": 2.2558250118877794, "grad_norm": 0.41397175192832947, "learning_rate": 9.249920609717371e-05, "loss": 0.195, "step": 1186 }, { "epoch": 2.25772705658583, "grad_norm": 0.4286901652812958, "learning_rate": 9.249285487456336e-05, "loss": 0.1777, "step": 1187 }, { "epoch": 2.2596291012838803, "grad_norm": 0.373329758644104, "learning_rate": 9.248650365195301e-05, "loss": 0.1759, "step": 1188 }, { "epoch": 2.2615311459819307, "grad_norm": 0.4786781072616577, "learning_rate": 9.248015242934265e-05, "loss": 0.2509, "step": 1189 }, { "epoch": 2.263433190679981, "grad_norm": 0.41533464193344116, "learning_rate": 9.24738012067323e-05, "loss": 0.1595, "step": 1190 }, { "epoch": 2.265335235378031, "grad_norm": 0.37687090039253235, "learning_rate": 9.246744998412194e-05, "loss": 0.19, "step": 1191 }, { "epoch": 2.2672372800760816, "grad_norm": 0.3623497188091278, "learning_rate": 9.24610987615116e-05, "loss": 0.1723, "step": 1192 }, { "epoch": 2.269139324774132, "grad_norm": 0.378251850605011, "learning_rate": 9.245474753890124e-05, "loss": 0.1773, "step": 1193 }, { "epoch": 2.2710413694721825, "grad_norm": 0.3755147457122803, "learning_rate": 9.244839631629088e-05, "loss": 0.1685, "step": 1194 }, { "epoch": 2.272943414170233, "grad_norm": 0.5196719765663147, "learning_rate": 9.244204509368055e-05, "loss": 0.2665, "step": 1195 }, { "epoch": 2.2748454588682834, "grad_norm": 0.4404764473438263, "learning_rate": 9.243569387107018e-05, "loss": 0.1956, "step": 1196 }, { "epoch": 2.276747503566334, "grad_norm": 0.47750818729400635, "learning_rate": 9.242934264845984e-05, "loss": 0.2164, "step": 1197 }, { "epoch": 2.2786495482643843, "grad_norm": 0.3968189060688019, "learning_rate": 9.242299142584947e-05, "loss": 0.2299, "step": 1198 }, { "epoch": 2.2805515929624347, "grad_norm": 0.4168682396411896, "learning_rate": 9.241664020323913e-05, "loss": 0.1924, "step": 1199 }, { "epoch": 2.282453637660485, "grad_norm": 0.3767165541648865, "learning_rate": 9.241028898062878e-05, "loss": 0.1868, "step": 1200 }, { "epoch": 2.2843556823585356, "grad_norm": 0.37699073553085327, "learning_rate": 9.240393775801842e-05, "loss": 0.1968, "step": 1201 }, { "epoch": 2.2862577270565856, "grad_norm": 0.4355759620666504, "learning_rate": 9.239758653540807e-05, "loss": 0.1988, "step": 1202 }, { "epoch": 2.2881597717546365, "grad_norm": 0.42668578028678894, "learning_rate": 9.239123531279772e-05, "loss": 0.1988, "step": 1203 }, { "epoch": 2.2900618164526865, "grad_norm": 0.44233736395835876, "learning_rate": 9.238488409018736e-05, "loss": 0.2128, "step": 1204 }, { "epoch": 2.291963861150737, "grad_norm": 0.31429731845855713, "learning_rate": 9.237853286757701e-05, "loss": 0.1527, "step": 1205 }, { "epoch": 2.2938659058487874, "grad_norm": 0.38366618752479553, "learning_rate": 9.237218164496666e-05, "loss": 0.1747, "step": 1206 }, { "epoch": 2.295767950546838, "grad_norm": 0.3685773015022278, "learning_rate": 9.23658304223563e-05, "loss": 0.183, "step": 1207 }, { "epoch": 2.2976699952448882, "grad_norm": 0.349924772977829, "learning_rate": 9.235947919974595e-05, "loss": 0.1641, "step": 1208 }, { "epoch": 2.2995720399429387, "grad_norm": 0.3128054738044739, "learning_rate": 9.23531279771356e-05, "loss": 0.1682, "step": 1209 }, { "epoch": 2.301474084640989, "grad_norm": 0.4457269608974457, "learning_rate": 9.234677675452526e-05, "loss": 0.1888, "step": 1210 }, { "epoch": 2.3033761293390396, "grad_norm": 0.37438902258872986, "learning_rate": 9.23404255319149e-05, "loss": 0.1612, "step": 1211 }, { "epoch": 2.30527817403709, "grad_norm": 0.3830793499946594, "learning_rate": 9.233407430930455e-05, "loss": 0.1825, "step": 1212 }, { "epoch": 2.3071802187351405, "grad_norm": 0.4047216773033142, "learning_rate": 9.23277230866942e-05, "loss": 0.1874, "step": 1213 }, { "epoch": 2.309082263433191, "grad_norm": 0.400716096162796, "learning_rate": 9.232137186408384e-05, "loss": 0.165, "step": 1214 }, { "epoch": 2.310984308131241, "grad_norm": 0.35491228103637695, "learning_rate": 9.231502064147349e-05, "loss": 0.1428, "step": 1215 }, { "epoch": 2.3128863528292913, "grad_norm": 0.3040875494480133, "learning_rate": 9.230866941886314e-05, "loss": 0.1315, "step": 1216 }, { "epoch": 2.314788397527342, "grad_norm": 0.40058350563049316, "learning_rate": 9.230231819625278e-05, "loss": 0.2016, "step": 1217 }, { "epoch": 2.316690442225392, "grad_norm": 0.33165568113327026, "learning_rate": 9.229596697364243e-05, "loss": 0.1668, "step": 1218 }, { "epoch": 2.3185924869234427, "grad_norm": 0.29281625151634216, "learning_rate": 9.228961575103208e-05, "loss": 0.1577, "step": 1219 }, { "epoch": 2.320494531621493, "grad_norm": 0.4083446264266968, "learning_rate": 9.228326452842172e-05, "loss": 0.174, "step": 1220 }, { "epoch": 2.3223965763195435, "grad_norm": 0.3308553695678711, "learning_rate": 9.227691330581137e-05, "loss": 0.21, "step": 1221 }, { "epoch": 2.324298621017594, "grad_norm": 0.4102175831794739, "learning_rate": 9.227056208320102e-05, "loss": 0.205, "step": 1222 }, { "epoch": 2.3262006657156444, "grad_norm": 0.48705750703811646, "learning_rate": 9.226421086059068e-05, "loss": 0.2544, "step": 1223 }, { "epoch": 2.328102710413695, "grad_norm": 0.3305780291557312, "learning_rate": 9.225785963798031e-05, "loss": 0.1786, "step": 1224 }, { "epoch": 2.3300047551117453, "grad_norm": 0.3046979308128357, "learning_rate": 9.225150841536995e-05, "loss": 0.1325, "step": 1225 }, { "epoch": 2.3319067998097953, "grad_norm": 0.4403087794780731, "learning_rate": 9.224515719275962e-05, "loss": 0.2288, "step": 1226 }, { "epoch": 2.3338088445078458, "grad_norm": 0.3797864317893982, "learning_rate": 9.223880597014926e-05, "loss": 0.2068, "step": 1227 }, { "epoch": 2.335710889205896, "grad_norm": 0.34793582558631897, "learning_rate": 9.223245474753891e-05, "loss": 0.182, "step": 1228 }, { "epoch": 2.3376129339039466, "grad_norm": 0.30754920840263367, "learning_rate": 9.222610352492856e-05, "loss": 0.144, "step": 1229 }, { "epoch": 2.339514978601997, "grad_norm": 0.4364961087703705, "learning_rate": 9.22197523023182e-05, "loss": 0.1824, "step": 1230 }, { "epoch": 2.3414170233000475, "grad_norm": 0.3395443260669708, "learning_rate": 9.221340107970785e-05, "loss": 0.1691, "step": 1231 }, { "epoch": 2.343319067998098, "grad_norm": 0.34626251459121704, "learning_rate": 9.220704985709749e-05, "loss": 0.2285, "step": 1232 }, { "epoch": 2.3452211126961484, "grad_norm": 0.316518098115921, "learning_rate": 9.220069863448715e-05, "loss": 0.1469, "step": 1233 }, { "epoch": 2.347123157394199, "grad_norm": 0.38813212513923645, "learning_rate": 9.219434741187679e-05, "loss": 0.1907, "step": 1234 }, { "epoch": 2.3490252020922493, "grad_norm": 0.3442121744155884, "learning_rate": 9.218799618926643e-05, "loss": 0.1398, "step": 1235 }, { "epoch": 2.3509272467902997, "grad_norm": 0.3373865783214569, "learning_rate": 9.218164496665608e-05, "loss": 0.1477, "step": 1236 }, { "epoch": 2.3528292914883497, "grad_norm": 0.39781641960144043, "learning_rate": 9.217529374404573e-05, "loss": 0.1766, "step": 1237 }, { "epoch": 2.3547313361864006, "grad_norm": 0.25478801131248474, "learning_rate": 9.216894252143537e-05, "loss": 0.1301, "step": 1238 }, { "epoch": 2.3566333808844506, "grad_norm": 0.350087970495224, "learning_rate": 9.216259129882502e-05, "loss": 0.161, "step": 1239 }, { "epoch": 2.358535425582501, "grad_norm": 0.4105963408946991, "learning_rate": 9.215624007621468e-05, "loss": 0.1887, "step": 1240 }, { "epoch": 2.3604374702805515, "grad_norm": 0.4141649007797241, "learning_rate": 9.214988885360433e-05, "loss": 0.333, "step": 1241 }, { "epoch": 2.362339514978602, "grad_norm": 0.4416482448577881, "learning_rate": 9.214353763099397e-05, "loss": 0.2329, "step": 1242 }, { "epoch": 2.3642415596766524, "grad_norm": 0.4285755753517151, "learning_rate": 9.213718640838362e-05, "loss": 0.2194, "step": 1243 }, { "epoch": 2.366143604374703, "grad_norm": 0.33636924624443054, "learning_rate": 9.213083518577327e-05, "loss": 0.1853, "step": 1244 }, { "epoch": 2.3680456490727533, "grad_norm": 0.40267783403396606, "learning_rate": 9.212448396316291e-05, "loss": 0.1837, "step": 1245 }, { "epoch": 2.3699476937708037, "grad_norm": 0.3251781463623047, "learning_rate": 9.211813274055256e-05, "loss": 0.1853, "step": 1246 }, { "epoch": 2.371849738468854, "grad_norm": 0.3559510111808777, "learning_rate": 9.211178151794221e-05, "loss": 0.1735, "step": 1247 }, { "epoch": 2.3737517831669046, "grad_norm": 0.3483911454677582, "learning_rate": 9.210543029533185e-05, "loss": 0.156, "step": 1248 }, { "epoch": 2.375653827864955, "grad_norm": 0.4093637764453888, "learning_rate": 9.20990790727215e-05, "loss": 0.2013, "step": 1249 }, { "epoch": 2.377555872563005, "grad_norm": 0.38886240124702454, "learning_rate": 9.209272785011115e-05, "loss": 0.1723, "step": 1250 }, { "epoch": 2.3794579172610555, "grad_norm": 0.3627004325389862, "learning_rate": 9.20863766275008e-05, "loss": 0.1639, "step": 1251 }, { "epoch": 2.381359961959106, "grad_norm": 0.33721840381622314, "learning_rate": 9.208002540489044e-05, "loss": 0.1613, "step": 1252 }, { "epoch": 2.3832620066571564, "grad_norm": 0.4337291121482849, "learning_rate": 9.20736741822801e-05, "loss": 0.2036, "step": 1253 }, { "epoch": 2.385164051355207, "grad_norm": 0.43212467432022095, "learning_rate": 9.206732295966975e-05, "loss": 0.1925, "step": 1254 }, { "epoch": 2.3870660960532573, "grad_norm": 0.3450334966182709, "learning_rate": 9.206097173705939e-05, "loss": 0.1489, "step": 1255 }, { "epoch": 2.3889681407513077, "grad_norm": 0.36295151710510254, "learning_rate": 9.205462051444902e-05, "loss": 0.1801, "step": 1256 }, { "epoch": 2.390870185449358, "grad_norm": 0.469532310962677, "learning_rate": 9.204826929183869e-05, "loss": 0.2163, "step": 1257 }, { "epoch": 2.3927722301474086, "grad_norm": 0.4618028402328491, "learning_rate": 9.204191806922833e-05, "loss": 0.2175, "step": 1258 }, { "epoch": 2.394674274845459, "grad_norm": 0.3891139030456543, "learning_rate": 9.203556684661798e-05, "loss": 0.1585, "step": 1259 }, { "epoch": 2.3965763195435095, "grad_norm": 0.4574741721153259, "learning_rate": 9.202921562400763e-05, "loss": 0.2545, "step": 1260 }, { "epoch": 2.3984783642415595, "grad_norm": 0.49759337306022644, "learning_rate": 9.202286440139727e-05, "loss": 0.2208, "step": 1261 }, { "epoch": 2.40038040893961, "grad_norm": 0.3180585503578186, "learning_rate": 9.201651317878692e-05, "loss": 0.157, "step": 1262 }, { "epoch": 2.4022824536376604, "grad_norm": 0.3678848147392273, "learning_rate": 9.201016195617656e-05, "loss": 0.1891, "step": 1263 }, { "epoch": 2.404184498335711, "grad_norm": 0.3016449809074402, "learning_rate": 9.200381073356623e-05, "loss": 0.1295, "step": 1264 }, { "epoch": 2.4060865430337612, "grad_norm": 0.522779643535614, "learning_rate": 9.199745951095586e-05, "loss": 0.2814, "step": 1265 }, { "epoch": 2.4079885877318117, "grad_norm": 0.45210519433021545, "learning_rate": 9.19911082883455e-05, "loss": 0.234, "step": 1266 }, { "epoch": 2.409890632429862, "grad_norm": 0.3812367022037506, "learning_rate": 9.198475706573517e-05, "loss": 0.2104, "step": 1267 }, { "epoch": 2.4117926771279126, "grad_norm": 0.3120013177394867, "learning_rate": 9.19784058431248e-05, "loss": 0.1511, "step": 1268 }, { "epoch": 2.413694721825963, "grad_norm": 0.34164851903915405, "learning_rate": 9.197205462051446e-05, "loss": 0.1607, "step": 1269 }, { "epoch": 2.4155967665240135, "grad_norm": 0.3127415180206299, "learning_rate": 9.19657033979041e-05, "loss": 0.143, "step": 1270 }, { "epoch": 2.417498811222064, "grad_norm": 0.4628545641899109, "learning_rate": 9.195935217529375e-05, "loss": 0.2187, "step": 1271 }, { "epoch": 2.419400855920114, "grad_norm": 0.3645714223384857, "learning_rate": 9.19530009526834e-05, "loss": 0.1648, "step": 1272 }, { "epoch": 2.4213029006181643, "grad_norm": 0.41127142310142517, "learning_rate": 9.194664973007304e-05, "loss": 0.1712, "step": 1273 }, { "epoch": 2.4232049453162148, "grad_norm": 0.48663556575775146, "learning_rate": 9.194029850746269e-05, "loss": 0.2713, "step": 1274 }, { "epoch": 2.425106990014265, "grad_norm": 0.3965604305267334, "learning_rate": 9.193394728485234e-05, "loss": 0.1766, "step": 1275 }, { "epoch": 2.4270090347123157, "grad_norm": 0.4565601646900177, "learning_rate": 9.192759606224198e-05, "loss": 0.1827, "step": 1276 }, { "epoch": 2.428911079410366, "grad_norm": 0.4272227883338928, "learning_rate": 9.192124483963163e-05, "loss": 0.1874, "step": 1277 }, { "epoch": 2.4308131241084165, "grad_norm": 0.42560452222824097, "learning_rate": 9.191489361702128e-05, "loss": 0.1829, "step": 1278 }, { "epoch": 2.432715168806467, "grad_norm": 0.30827009677886963, "learning_rate": 9.190854239441092e-05, "loss": 0.1747, "step": 1279 }, { "epoch": 2.4346172135045174, "grad_norm": 0.3780437707901001, "learning_rate": 9.190219117180057e-05, "loss": 0.1955, "step": 1280 }, { "epoch": 2.436519258202568, "grad_norm": 0.32639580965042114, "learning_rate": 9.189583994919023e-05, "loss": 0.1568, "step": 1281 }, { "epoch": 2.4384213029006183, "grad_norm": 0.37228289246559143, "learning_rate": 9.188948872657988e-05, "loss": 0.1871, "step": 1282 }, { "epoch": 2.4403233475986688, "grad_norm": 0.4045466482639313, "learning_rate": 9.188313750396952e-05, "loss": 0.2237, "step": 1283 }, { "epoch": 2.442225392296719, "grad_norm": 0.40609246492385864, "learning_rate": 9.187678628135917e-05, "loss": 0.2313, "step": 1284 }, { "epoch": 2.444127436994769, "grad_norm": 0.36473485827445984, "learning_rate": 9.187043505874882e-05, "loss": 0.2528, "step": 1285 }, { "epoch": 2.4460294816928196, "grad_norm": 0.4154009222984314, "learning_rate": 9.186408383613846e-05, "loss": 0.215, "step": 1286 }, { "epoch": 2.44793152639087, "grad_norm": 0.33488062024116516, "learning_rate": 9.185773261352811e-05, "loss": 0.1666, "step": 1287 }, { "epoch": 2.4498335710889205, "grad_norm": 0.392004132270813, "learning_rate": 9.185138139091776e-05, "loss": 0.2127, "step": 1288 }, { "epoch": 2.451735615786971, "grad_norm": 0.32925739884376526, "learning_rate": 9.18450301683074e-05, "loss": 0.1459, "step": 1289 }, { "epoch": 2.4536376604850214, "grad_norm": 0.3380909264087677, "learning_rate": 9.183867894569705e-05, "loss": 0.1482, "step": 1290 }, { "epoch": 2.455539705183072, "grad_norm": 0.47436705231666565, "learning_rate": 9.18323277230867e-05, "loss": 0.2652, "step": 1291 }, { "epoch": 2.4574417498811223, "grad_norm": 0.39543116092681885, "learning_rate": 9.182597650047634e-05, "loss": 0.1762, "step": 1292 }, { "epoch": 2.4593437945791727, "grad_norm": 0.4776802659034729, "learning_rate": 9.181962527786599e-05, "loss": 0.1967, "step": 1293 }, { "epoch": 2.461245839277223, "grad_norm": 0.37519994378089905, "learning_rate": 9.181327405525563e-05, "loss": 0.1909, "step": 1294 }, { "epoch": 2.4631478839752736, "grad_norm": 0.37666913866996765, "learning_rate": 9.18069228326453e-05, "loss": 0.1477, "step": 1295 }, { "epoch": 2.4650499286733236, "grad_norm": 0.3830261528491974, "learning_rate": 9.180057161003494e-05, "loss": 0.1825, "step": 1296 }, { "epoch": 2.466951973371374, "grad_norm": 0.4064732789993286, "learning_rate": 9.179422038742457e-05, "loss": 0.2, "step": 1297 }, { "epoch": 2.4688540180694245, "grad_norm": 0.318314790725708, "learning_rate": 9.178786916481424e-05, "loss": 0.1543, "step": 1298 }, { "epoch": 2.470756062767475, "grad_norm": 0.3804973065853119, "learning_rate": 9.178151794220388e-05, "loss": 0.2248, "step": 1299 }, { "epoch": 2.4726581074655254, "grad_norm": 0.4222256541252136, "learning_rate": 9.177516671959353e-05, "loss": 0.2037, "step": 1300 }, { "epoch": 2.474560152163576, "grad_norm": 0.4317629337310791, "learning_rate": 9.176881549698317e-05, "loss": 0.1914, "step": 1301 }, { "epoch": 2.4764621968616263, "grad_norm": 0.4674796760082245, "learning_rate": 9.176246427437282e-05, "loss": 0.212, "step": 1302 }, { "epoch": 2.4783642415596767, "grad_norm": 0.40157684683799744, "learning_rate": 9.175611305176247e-05, "loss": 0.1948, "step": 1303 }, { "epoch": 2.480266286257727, "grad_norm": 0.37824416160583496, "learning_rate": 9.174976182915211e-05, "loss": 0.1849, "step": 1304 }, { "epoch": 2.4821683309557776, "grad_norm": 0.5870863199234009, "learning_rate": 9.174341060654177e-05, "loss": 0.1586, "step": 1305 }, { "epoch": 2.484070375653828, "grad_norm": 0.3794877529144287, "learning_rate": 9.173705938393141e-05, "loss": 0.2162, "step": 1306 }, { "epoch": 2.485972420351878, "grad_norm": 0.40509578585624695, "learning_rate": 9.173070816132105e-05, "loss": 0.1895, "step": 1307 }, { "epoch": 2.4878744650499285, "grad_norm": 0.37314295768737793, "learning_rate": 9.17243569387107e-05, "loss": 0.1926, "step": 1308 }, { "epoch": 2.489776509747979, "grad_norm": 0.32264095544815063, "learning_rate": 9.171800571610035e-05, "loss": 0.1385, "step": 1309 }, { "epoch": 2.4916785544460294, "grad_norm": 0.43269702792167664, "learning_rate": 9.171165449348999e-05, "loss": 0.2189, "step": 1310 }, { "epoch": 2.49358059914408, "grad_norm": 0.330098956823349, "learning_rate": 9.170530327087964e-05, "loss": 0.168, "step": 1311 }, { "epoch": 2.4954826438421303, "grad_norm": 0.2726501524448395, "learning_rate": 9.16989520482693e-05, "loss": 0.1306, "step": 1312 }, { "epoch": 2.4973846885401807, "grad_norm": 0.27615344524383545, "learning_rate": 9.169260082565895e-05, "loss": 0.1361, "step": 1313 }, { "epoch": 2.499286733238231, "grad_norm": 0.3685866594314575, "learning_rate": 9.168624960304859e-05, "loss": 0.1901, "step": 1314 }, { "epoch": 2.5011887779362816, "grad_norm": 0.323897123336792, "learning_rate": 9.167989838043824e-05, "loss": 0.2608, "step": 1315 }, { "epoch": 2.503090822634332, "grad_norm": 0.6715079545974731, "learning_rate": 9.167354715782789e-05, "loss": 0.199, "step": 1316 }, { "epoch": 2.5049928673323825, "grad_norm": 0.32039186358451843, "learning_rate": 9.166719593521753e-05, "loss": 0.1723, "step": 1317 }, { "epoch": 2.5068949120304325, "grad_norm": 0.3974270224571228, "learning_rate": 9.166084471260718e-05, "loss": 0.1659, "step": 1318 }, { "epoch": 2.5087969567284834, "grad_norm": 0.3953278362751007, "learning_rate": 9.165449348999683e-05, "loss": 0.1879, "step": 1319 }, { "epoch": 2.5106990014265333, "grad_norm": 0.4061002731323242, "learning_rate": 9.164814226738647e-05, "loss": 0.1858, "step": 1320 }, { "epoch": 2.512601046124584, "grad_norm": 0.3816406726837158, "learning_rate": 9.164179104477612e-05, "loss": 0.1899, "step": 1321 }, { "epoch": 2.5145030908226342, "grad_norm": 0.3856441378593445, "learning_rate": 9.163543982216577e-05, "loss": 0.1727, "step": 1322 }, { "epoch": 2.5164051355206847, "grad_norm": 0.47267359495162964, "learning_rate": 9.162908859955543e-05, "loss": 0.2137, "step": 1323 }, { "epoch": 2.518307180218735, "grad_norm": 0.41764524579048157, "learning_rate": 9.162273737694506e-05, "loss": 0.2138, "step": 1324 }, { "epoch": 2.5202092249167856, "grad_norm": 0.42864158749580383, "learning_rate": 9.161638615433472e-05, "loss": 0.1919, "step": 1325 }, { "epoch": 2.522111269614836, "grad_norm": 0.5067504048347473, "learning_rate": 9.161003493172437e-05, "loss": 0.2068, "step": 1326 }, { "epoch": 2.5240133143128864, "grad_norm": 0.430951863527298, "learning_rate": 9.1603683709114e-05, "loss": 0.2195, "step": 1327 }, { "epoch": 2.525915359010937, "grad_norm": 0.37973999977111816, "learning_rate": 9.159733248650364e-05, "loss": 0.1799, "step": 1328 }, { "epoch": 2.527817403708987, "grad_norm": 0.362768292427063, "learning_rate": 9.159098126389331e-05, "loss": 0.1555, "step": 1329 }, { "epoch": 2.5297194484070378, "grad_norm": 0.41433513164520264, "learning_rate": 9.158463004128295e-05, "loss": 0.1958, "step": 1330 }, { "epoch": 2.5316214931050878, "grad_norm": 0.3091717064380646, "learning_rate": 9.15782788186726e-05, "loss": 0.1622, "step": 1331 }, { "epoch": 2.533523537803138, "grad_norm": 0.35242778062820435, "learning_rate": 9.157192759606225e-05, "loss": 0.1627, "step": 1332 }, { "epoch": 2.5354255825011887, "grad_norm": 0.38102760910987854, "learning_rate": 9.156557637345189e-05, "loss": 0.1663, "step": 1333 }, { "epoch": 2.537327627199239, "grad_norm": 0.4313855469226837, "learning_rate": 9.155922515084154e-05, "loss": 0.208, "step": 1334 }, { "epoch": 2.5392296718972895, "grad_norm": 0.33921730518341064, "learning_rate": 9.155287392823118e-05, "loss": 0.1572, "step": 1335 }, { "epoch": 2.54113171659534, "grad_norm": 0.3824930489063263, "learning_rate": 9.154652270562085e-05, "loss": 0.1986, "step": 1336 }, { "epoch": 2.5430337612933904, "grad_norm": 0.33059945702552795, "learning_rate": 9.154017148301048e-05, "loss": 0.156, "step": 1337 }, { "epoch": 2.544935805991441, "grad_norm": 0.4880346357822418, "learning_rate": 9.153382026040012e-05, "loss": 0.2319, "step": 1338 }, { "epoch": 2.5468378506894913, "grad_norm": 0.27151229977607727, "learning_rate": 9.152746903778979e-05, "loss": 0.128, "step": 1339 }, { "epoch": 2.5487398953875418, "grad_norm": 0.35515275597572327, "learning_rate": 9.152111781517943e-05, "loss": 0.1685, "step": 1340 }, { "epoch": 2.550641940085592, "grad_norm": 0.41455206274986267, "learning_rate": 9.151476659256908e-05, "loss": 0.2354, "step": 1341 }, { "epoch": 2.552543984783642, "grad_norm": 0.3215075731277466, "learning_rate": 9.150841536995872e-05, "loss": 0.1653, "step": 1342 }, { "epoch": 2.554446029481693, "grad_norm": 0.34158623218536377, "learning_rate": 9.150206414734837e-05, "loss": 0.1598, "step": 1343 }, { "epoch": 2.556348074179743, "grad_norm": 0.4195705056190491, "learning_rate": 9.149571292473802e-05, "loss": 0.228, "step": 1344 }, { "epoch": 2.5582501188777935, "grad_norm": 0.34753212332725525, "learning_rate": 9.148936170212766e-05, "loss": 0.1948, "step": 1345 }, { "epoch": 2.560152163575844, "grad_norm": 0.43792131543159485, "learning_rate": 9.148301047951731e-05, "loss": 0.2191, "step": 1346 }, { "epoch": 2.5620542082738944, "grad_norm": 0.35464513301849365, "learning_rate": 9.147665925690696e-05, "loss": 0.1555, "step": 1347 }, { "epoch": 2.563956252971945, "grad_norm": 0.50618976354599, "learning_rate": 9.14703080342966e-05, "loss": 0.2262, "step": 1348 }, { "epoch": 2.5658582976699953, "grad_norm": 0.3603616952896118, "learning_rate": 9.146395681168625e-05, "loss": 0.1647, "step": 1349 }, { "epoch": 2.5677603423680457, "grad_norm": 0.486316978931427, "learning_rate": 9.14576055890759e-05, "loss": 0.2052, "step": 1350 }, { "epoch": 2.569662387066096, "grad_norm": 0.45915400981903076, "learning_rate": 9.145125436646554e-05, "loss": 0.218, "step": 1351 }, { "epoch": 2.5715644317641466, "grad_norm": 0.3178432583808899, "learning_rate": 9.14449031438552e-05, "loss": 0.1453, "step": 1352 }, { "epoch": 2.5734664764621966, "grad_norm": 0.3939111828804016, "learning_rate": 9.143855192124485e-05, "loss": 0.1784, "step": 1353 }, { "epoch": 2.5753685211602475, "grad_norm": 0.3399297595024109, "learning_rate": 9.14322006986345e-05, "loss": 0.1644, "step": 1354 }, { "epoch": 2.5772705658582975, "grad_norm": 0.39880868792533875, "learning_rate": 9.142584947602414e-05, "loss": 0.2139, "step": 1355 }, { "epoch": 2.579172610556348, "grad_norm": 0.40534335374832153, "learning_rate": 9.141949825341379e-05, "loss": 0.1872, "step": 1356 }, { "epoch": 2.5810746552543984, "grad_norm": 0.3201380968093872, "learning_rate": 9.141314703080344e-05, "loss": 0.1557, "step": 1357 }, { "epoch": 2.582976699952449, "grad_norm": 0.31011682748794556, "learning_rate": 9.140679580819308e-05, "loss": 0.1301, "step": 1358 }, { "epoch": 2.5848787446504993, "grad_norm": 0.3697820007801056, "learning_rate": 9.140044458558273e-05, "loss": 0.1856, "step": 1359 }, { "epoch": 2.5867807893485497, "grad_norm": 0.291369765996933, "learning_rate": 9.139409336297238e-05, "loss": 0.1323, "step": 1360 }, { "epoch": 2.5886828340466, "grad_norm": 0.4111400842666626, "learning_rate": 9.138774214036202e-05, "loss": 0.2271, "step": 1361 }, { "epoch": 2.5905848787446506, "grad_norm": 0.4169454872608185, "learning_rate": 9.138139091775167e-05, "loss": 0.199, "step": 1362 }, { "epoch": 2.592486923442701, "grad_norm": 0.4209660589694977, "learning_rate": 9.137503969514132e-05, "loss": 0.2296, "step": 1363 }, { "epoch": 2.594388968140751, "grad_norm": 0.3968026041984558, "learning_rate": 9.136868847253096e-05, "loss": 0.2174, "step": 1364 }, { "epoch": 2.596291012838802, "grad_norm": 0.3477707803249359, "learning_rate": 9.136233724992061e-05, "loss": 0.1818, "step": 1365 }, { "epoch": 2.598193057536852, "grad_norm": 0.3979746699333191, "learning_rate": 9.135598602731025e-05, "loss": 0.2373, "step": 1366 }, { "epoch": 2.6000951022349024, "grad_norm": 0.32050615549087524, "learning_rate": 9.134963480469992e-05, "loss": 0.1562, "step": 1367 }, { "epoch": 2.601997146932953, "grad_norm": 0.4675930142402649, "learning_rate": 9.134328358208956e-05, "loss": 0.2942, "step": 1368 }, { "epoch": 2.6038991916310033, "grad_norm": 0.32259052991867065, "learning_rate": 9.13369323594792e-05, "loss": 0.1411, "step": 1369 }, { "epoch": 2.6058012363290537, "grad_norm": 0.3838285803794861, "learning_rate": 9.133058113686886e-05, "loss": 0.2098, "step": 1370 }, { "epoch": 2.607703281027104, "grad_norm": 0.4749825596809387, "learning_rate": 9.13242299142585e-05, "loss": 0.2621, "step": 1371 }, { "epoch": 2.6096053257251546, "grad_norm": 0.3093271255493164, "learning_rate": 9.131787869164815e-05, "loss": 0.1389, "step": 1372 }, { "epoch": 2.611507370423205, "grad_norm": 0.4896688461303711, "learning_rate": 9.131152746903779e-05, "loss": 0.2347, "step": 1373 }, { "epoch": 2.6134094151212555, "grad_norm": 0.39409998059272766, "learning_rate": 9.130517624642744e-05, "loss": 0.2224, "step": 1374 }, { "epoch": 2.615311459819306, "grad_norm": 0.39578184485435486, "learning_rate": 9.129882502381709e-05, "loss": 0.1963, "step": 1375 }, { "epoch": 2.6172135045173563, "grad_norm": 0.34999507665634155, "learning_rate": 9.129247380120673e-05, "loss": 0.1612, "step": 1376 }, { "epoch": 2.6191155492154063, "grad_norm": 0.33919695019721985, "learning_rate": 9.12861225785964e-05, "loss": 0.1813, "step": 1377 }, { "epoch": 2.6210175939134572, "grad_norm": 0.3273175060749054, "learning_rate": 9.127977135598603e-05, "loss": 0.1436, "step": 1378 }, { "epoch": 2.6229196386115072, "grad_norm": 0.4175270199775696, "learning_rate": 9.127342013337567e-05, "loss": 0.1832, "step": 1379 }, { "epoch": 2.6248216833095577, "grad_norm": 0.3580436408519745, "learning_rate": 9.126706891076532e-05, "loss": 0.1569, "step": 1380 }, { "epoch": 2.626723728007608, "grad_norm": 0.3683449625968933, "learning_rate": 9.126071768815498e-05, "loss": 0.1955, "step": 1381 }, { "epoch": 2.6286257727056586, "grad_norm": 0.3830251395702362, "learning_rate": 9.125436646554461e-05, "loss": 0.1626, "step": 1382 }, { "epoch": 2.630527817403709, "grad_norm": 0.3428569734096527, "learning_rate": 9.124801524293427e-05, "loss": 0.1477, "step": 1383 }, { "epoch": 2.6324298621017594, "grad_norm": 0.4621574878692627, "learning_rate": 9.124166402032392e-05, "loss": 0.1675, "step": 1384 }, { "epoch": 2.63433190679981, "grad_norm": 0.40000998973846436, "learning_rate": 9.123531279771357e-05, "loss": 0.1751, "step": 1385 }, { "epoch": 2.6362339514978603, "grad_norm": 0.4612349271774292, "learning_rate": 9.122896157510321e-05, "loss": 0.2165, "step": 1386 }, { "epoch": 2.6381359961959108, "grad_norm": 0.47919005155563354, "learning_rate": 9.122261035249286e-05, "loss": 0.2, "step": 1387 }, { "epoch": 2.6400380408939608, "grad_norm": 0.5020009875297546, "learning_rate": 9.121625912988251e-05, "loss": 0.1997, "step": 1388 }, { "epoch": 2.6419400855920117, "grad_norm": 0.4959258437156677, "learning_rate": 9.120990790727215e-05, "loss": 0.1903, "step": 1389 }, { "epoch": 2.6438421302900617, "grad_norm": 0.4882603585720062, "learning_rate": 9.12035566846618e-05, "loss": 0.2082, "step": 1390 }, { "epoch": 2.645744174988112, "grad_norm": 0.37479934096336365, "learning_rate": 9.119720546205145e-05, "loss": 0.179, "step": 1391 }, { "epoch": 2.6476462196861625, "grad_norm": 0.5104106068611145, "learning_rate": 9.119085423944109e-05, "loss": 0.2281, "step": 1392 }, { "epoch": 2.649548264384213, "grad_norm": 0.3893817663192749, "learning_rate": 9.118450301683074e-05, "loss": 0.2324, "step": 1393 }, { "epoch": 2.6514503090822634, "grad_norm": 0.35762450098991394, "learning_rate": 9.11781517942204e-05, "loss": 0.1933, "step": 1394 }, { "epoch": 2.653352353780314, "grad_norm": 0.37635737657546997, "learning_rate": 9.117180057161005e-05, "loss": 0.1869, "step": 1395 }, { "epoch": 2.6552543984783643, "grad_norm": 0.3230188488960266, "learning_rate": 9.116544934899969e-05, "loss": 0.1576, "step": 1396 }, { "epoch": 2.6571564431764148, "grad_norm": 0.3708724081516266, "learning_rate": 9.115909812638934e-05, "loss": 0.168, "step": 1397 }, { "epoch": 2.659058487874465, "grad_norm": 0.34403741359710693, "learning_rate": 9.115274690377899e-05, "loss": 0.2721, "step": 1398 }, { "epoch": 2.660960532572515, "grad_norm": 0.2812383770942688, "learning_rate": 9.114639568116863e-05, "loss": 0.1605, "step": 1399 }, { "epoch": 2.662862577270566, "grad_norm": 0.39116060733795166, "learning_rate": 9.114004445855827e-05, "loss": 0.1843, "step": 1400 }, { "epoch": 2.664764621968616, "grad_norm": 0.3641309440135956, "learning_rate": 9.113369323594793e-05, "loss": 0.1818, "step": 1401 }, { "epoch": 2.6666666666666665, "grad_norm": 0.4198780953884125, "learning_rate": 9.112734201333757e-05, "loss": 0.2044, "step": 1402 }, { "epoch": 2.668568711364717, "grad_norm": 0.3912922739982605, "learning_rate": 9.112099079072722e-05, "loss": 0.1881, "step": 1403 }, { "epoch": 2.6704707560627674, "grad_norm": 0.4235263764858246, "learning_rate": 9.111463956811686e-05, "loss": 0.2034, "step": 1404 }, { "epoch": 2.672372800760818, "grad_norm": 0.3731124699115753, "learning_rate": 9.110828834550651e-05, "loss": 0.1803, "step": 1405 }, { "epoch": 2.6742748454588683, "grad_norm": 0.3907954692840576, "learning_rate": 9.110193712289616e-05, "loss": 0.2074, "step": 1406 }, { "epoch": 2.6761768901569187, "grad_norm": 0.3954913914203644, "learning_rate": 9.10955859002858e-05, "loss": 0.1797, "step": 1407 }, { "epoch": 2.678078934854969, "grad_norm": 0.5066515207290649, "learning_rate": 9.108923467767547e-05, "loss": 0.2096, "step": 1408 }, { "epoch": 2.6799809795530196, "grad_norm": 0.4380313456058502, "learning_rate": 9.10828834550651e-05, "loss": 0.2064, "step": 1409 }, { "epoch": 2.68188302425107, "grad_norm": 0.3758976459503174, "learning_rate": 9.107653223245474e-05, "loss": 0.2076, "step": 1410 }, { "epoch": 2.6837850689491205, "grad_norm": 0.38098809123039246, "learning_rate": 9.10701810098444e-05, "loss": 0.1727, "step": 1411 }, { "epoch": 2.6856871136471705, "grad_norm": 0.4027041792869568, "learning_rate": 9.106382978723405e-05, "loss": 0.154, "step": 1412 }, { "epoch": 2.6875891583452214, "grad_norm": 0.307954877614975, "learning_rate": 9.10574785646237e-05, "loss": 0.1766, "step": 1413 }, { "epoch": 2.6894912030432714, "grad_norm": 0.4232465326786041, "learning_rate": 9.105112734201334e-05, "loss": 0.1866, "step": 1414 }, { "epoch": 2.691393247741322, "grad_norm": 0.4296838641166687, "learning_rate": 9.104477611940299e-05, "loss": 0.1813, "step": 1415 }, { "epoch": 2.6932952924393723, "grad_norm": 0.3334490954875946, "learning_rate": 9.103842489679264e-05, "loss": 0.1576, "step": 1416 }, { "epoch": 2.6951973371374227, "grad_norm": 0.42984020709991455, "learning_rate": 9.103207367418228e-05, "loss": 0.1945, "step": 1417 }, { "epoch": 2.697099381835473, "grad_norm": 0.4306494891643524, "learning_rate": 9.102572245157193e-05, "loss": 0.179, "step": 1418 }, { "epoch": 2.6990014265335236, "grad_norm": 0.38049131631851196, "learning_rate": 9.101937122896158e-05, "loss": 0.1951, "step": 1419 }, { "epoch": 2.700903471231574, "grad_norm": 0.3691817820072174, "learning_rate": 9.101302000635122e-05, "loss": 0.1725, "step": 1420 }, { "epoch": 2.7028055159296245, "grad_norm": 0.32240816950798035, "learning_rate": 9.100666878374087e-05, "loss": 0.1852, "step": 1421 }, { "epoch": 2.704707560627675, "grad_norm": 0.3735920488834381, "learning_rate": 9.100031756113053e-05, "loss": 0.1857, "step": 1422 }, { "epoch": 2.706609605325725, "grad_norm": 0.3693629801273346, "learning_rate": 9.099396633852016e-05, "loss": 0.1698, "step": 1423 }, { "epoch": 2.708511650023776, "grad_norm": 0.40846189856529236, "learning_rate": 9.098761511590982e-05, "loss": 0.2531, "step": 1424 }, { "epoch": 2.710413694721826, "grad_norm": 0.3387136161327362, "learning_rate": 9.098126389329947e-05, "loss": 0.152, "step": 1425 }, { "epoch": 2.7123157394198762, "grad_norm": 0.43113890290260315, "learning_rate": 9.097491267068912e-05, "loss": 0.1939, "step": 1426 }, { "epoch": 2.7142177841179267, "grad_norm": 0.45811060070991516, "learning_rate": 9.096856144807876e-05, "loss": 0.2217, "step": 1427 }, { "epoch": 2.716119828815977, "grad_norm": 0.3742765486240387, "learning_rate": 9.096221022546841e-05, "loss": 0.183, "step": 1428 }, { "epoch": 2.7180218735140276, "grad_norm": 0.39835286140441895, "learning_rate": 9.095585900285806e-05, "loss": 0.248, "step": 1429 }, { "epoch": 2.719923918212078, "grad_norm": 0.38528379797935486, "learning_rate": 9.09495077802477e-05, "loss": 0.1642, "step": 1430 }, { "epoch": 2.7218259629101285, "grad_norm": 0.4142857789993286, "learning_rate": 9.094315655763735e-05, "loss": 0.1817, "step": 1431 }, { "epoch": 2.723728007608179, "grad_norm": 0.4072723388671875, "learning_rate": 9.0936805335027e-05, "loss": 0.2017, "step": 1432 }, { "epoch": 2.7256300523062293, "grad_norm": 0.37081822752952576, "learning_rate": 9.093045411241664e-05, "loss": 0.2262, "step": 1433 }, { "epoch": 2.7275320970042793, "grad_norm": 0.3628768026828766, "learning_rate": 9.092410288980629e-05, "loss": 0.1714, "step": 1434 }, { "epoch": 2.7294341417023302, "grad_norm": 0.46637046337127686, "learning_rate": 9.091775166719594e-05, "loss": 0.3189, "step": 1435 }, { "epoch": 2.7313361864003802, "grad_norm": 0.2643025517463684, "learning_rate": 9.091140044458558e-05, "loss": 0.234, "step": 1436 }, { "epoch": 2.7332382310984307, "grad_norm": 0.36125344038009644, "learning_rate": 9.090504922197523e-05, "loss": 0.1981, "step": 1437 }, { "epoch": 2.735140275796481, "grad_norm": 0.3064311742782593, "learning_rate": 9.089869799936487e-05, "loss": 0.1644, "step": 1438 }, { "epoch": 2.7370423204945316, "grad_norm": 0.372164249420166, "learning_rate": 9.089234677675454e-05, "loss": 0.2023, "step": 1439 }, { "epoch": 2.738944365192582, "grad_norm": 0.346431165933609, "learning_rate": 9.088599555414418e-05, "loss": 0.1913, "step": 1440 }, { "epoch": 2.7408464098906324, "grad_norm": 0.3421470522880554, "learning_rate": 9.087964433153382e-05, "loss": 0.1599, "step": 1441 }, { "epoch": 2.742748454588683, "grad_norm": 0.33351296186447144, "learning_rate": 9.087329310892348e-05, "loss": 0.1775, "step": 1442 }, { "epoch": 2.7446504992867333, "grad_norm": 0.3450356721878052, "learning_rate": 9.086694188631312e-05, "loss": 0.199, "step": 1443 }, { "epoch": 2.7465525439847838, "grad_norm": 0.34339770674705505, "learning_rate": 9.086059066370277e-05, "loss": 0.1608, "step": 1444 }, { "epoch": 2.748454588682834, "grad_norm": 0.35941675305366516, "learning_rate": 9.085423944109241e-05, "loss": 0.1566, "step": 1445 }, { "epoch": 2.7503566333808847, "grad_norm": 0.396847665309906, "learning_rate": 9.084788821848206e-05, "loss": 0.1829, "step": 1446 }, { "epoch": 2.7522586780789347, "grad_norm": 0.3818894028663635, "learning_rate": 9.084153699587171e-05, "loss": 0.2017, "step": 1447 }, { "epoch": 2.754160722776985, "grad_norm": 0.46124422550201416, "learning_rate": 9.083518577326135e-05, "loss": 0.185, "step": 1448 }, { "epoch": 2.7560627674750355, "grad_norm": 0.4047834575176239, "learning_rate": 9.082883455065102e-05, "loss": 0.1848, "step": 1449 }, { "epoch": 2.757964812173086, "grad_norm": 0.5650888085365295, "learning_rate": 9.082248332804065e-05, "loss": 0.2274, "step": 1450 }, { "epoch": 2.7598668568711364, "grad_norm": 0.35878250002861023, "learning_rate": 9.081613210543029e-05, "loss": 0.1414, "step": 1451 }, { "epoch": 2.761768901569187, "grad_norm": 0.37223199009895325, "learning_rate": 9.080978088281994e-05, "loss": 0.1718, "step": 1452 }, { "epoch": 2.7636709462672373, "grad_norm": 0.34717050194740295, "learning_rate": 9.08034296602096e-05, "loss": 0.1719, "step": 1453 }, { "epoch": 2.7655729909652877, "grad_norm": 0.4706629812717438, "learning_rate": 9.079707843759923e-05, "loss": 0.1953, "step": 1454 }, { "epoch": 2.767475035663338, "grad_norm": 0.40658390522003174, "learning_rate": 9.079072721498889e-05, "loss": 0.1723, "step": 1455 }, { "epoch": 2.7693770803613886, "grad_norm": 0.5025349855422974, "learning_rate": 9.078437599237854e-05, "loss": 0.2122, "step": 1456 }, { "epoch": 2.771279125059439, "grad_norm": 0.4134734272956848, "learning_rate": 9.077802476976819e-05, "loss": 0.1872, "step": 1457 }, { "epoch": 2.773181169757489, "grad_norm": 0.4301147162914276, "learning_rate": 9.077167354715783e-05, "loss": 0.2102, "step": 1458 }, { "epoch": 2.77508321445554, "grad_norm": 0.4295254051685333, "learning_rate": 9.076532232454748e-05, "loss": 0.2132, "step": 1459 }, { "epoch": 2.77698525915359, "grad_norm": 0.40130358934402466, "learning_rate": 9.075897110193713e-05, "loss": 0.1891, "step": 1460 }, { "epoch": 2.7788873038516404, "grad_norm": 0.4124513268470764, "learning_rate": 9.075261987932677e-05, "loss": 0.204, "step": 1461 }, { "epoch": 2.780789348549691, "grad_norm": 0.3976169526576996, "learning_rate": 9.074626865671642e-05, "loss": 0.2016, "step": 1462 }, { "epoch": 2.7826913932477413, "grad_norm": 0.3949052095413208, "learning_rate": 9.073991743410607e-05, "loss": 0.1924, "step": 1463 }, { "epoch": 2.7845934379457917, "grad_norm": 0.4033919870853424, "learning_rate": 9.073356621149571e-05, "loss": 0.1967, "step": 1464 }, { "epoch": 2.786495482643842, "grad_norm": 0.32922443747520447, "learning_rate": 9.072721498888536e-05, "loss": 0.1639, "step": 1465 }, { "epoch": 2.7883975273418926, "grad_norm": 0.372179239988327, "learning_rate": 9.072086376627502e-05, "loss": 0.1783, "step": 1466 }, { "epoch": 2.790299572039943, "grad_norm": 0.45123547315597534, "learning_rate": 9.071451254366467e-05, "loss": 0.2573, "step": 1467 }, { "epoch": 2.7922016167379935, "grad_norm": 0.33130937814712524, "learning_rate": 9.07081613210543e-05, "loss": 0.1427, "step": 1468 }, { "epoch": 2.7941036614360435, "grad_norm": 0.4377565085887909, "learning_rate": 9.070181009844394e-05, "loss": 0.1915, "step": 1469 }, { "epoch": 2.7960057061340944, "grad_norm": 0.555698037147522, "learning_rate": 9.069545887583361e-05, "loss": 0.2495, "step": 1470 }, { "epoch": 2.7979077508321444, "grad_norm": 0.4749322831630707, "learning_rate": 9.068910765322325e-05, "loss": 0.1944, "step": 1471 }, { "epoch": 2.799809795530195, "grad_norm": 0.3543435335159302, "learning_rate": 9.068275643061289e-05, "loss": 0.1669, "step": 1472 }, { "epoch": 2.8017118402282453, "grad_norm": 0.33086055517196655, "learning_rate": 9.067640520800255e-05, "loss": 0.1792, "step": 1473 }, { "epoch": 2.8036138849262957, "grad_norm": 0.3898443281650543, "learning_rate": 9.067005398539219e-05, "loss": 0.171, "step": 1474 }, { "epoch": 2.805515929624346, "grad_norm": 0.4169894754886627, "learning_rate": 9.066370276278184e-05, "loss": 0.2057, "step": 1475 }, { "epoch": 2.8074179743223966, "grad_norm": 0.37259283661842346, "learning_rate": 9.065735154017148e-05, "loss": 0.1799, "step": 1476 }, { "epoch": 2.809320019020447, "grad_norm": 0.3892917037010193, "learning_rate": 9.065100031756113e-05, "loss": 0.1847, "step": 1477 }, { "epoch": 2.8112220637184975, "grad_norm": 0.5309971570968628, "learning_rate": 9.064464909495078e-05, "loss": 0.2462, "step": 1478 }, { "epoch": 2.813124108416548, "grad_norm": 0.3646765351295471, "learning_rate": 9.063829787234042e-05, "loss": 0.168, "step": 1479 }, { "epoch": 2.815026153114598, "grad_norm": 0.3424735963344574, "learning_rate": 9.063194664973009e-05, "loss": 0.1547, "step": 1480 }, { "epoch": 2.816928197812649, "grad_norm": 0.38415202498435974, "learning_rate": 9.062559542711973e-05, "loss": 0.2186, "step": 1481 }, { "epoch": 2.818830242510699, "grad_norm": 0.4032725393772125, "learning_rate": 9.061924420450936e-05, "loss": 0.1802, "step": 1482 }, { "epoch": 2.8207322872087492, "grad_norm": 0.35286685824394226, "learning_rate": 9.061289298189902e-05, "loss": 0.139, "step": 1483 }, { "epoch": 2.8226343319067997, "grad_norm": 0.35866954922676086, "learning_rate": 9.060654175928867e-05, "loss": 0.2022, "step": 1484 }, { "epoch": 2.82453637660485, "grad_norm": 0.36488500237464905, "learning_rate": 9.060019053667832e-05, "loss": 0.1816, "step": 1485 }, { "epoch": 2.8264384213029006, "grad_norm": 0.4557202160358429, "learning_rate": 9.059383931406796e-05, "loss": 0.1975, "step": 1486 }, { "epoch": 2.828340466000951, "grad_norm": 0.32717350125312805, "learning_rate": 9.058748809145761e-05, "loss": 0.1639, "step": 1487 }, { "epoch": 2.8302425106990015, "grad_norm": 0.41179734468460083, "learning_rate": 9.058113686884726e-05, "loss": 0.1841, "step": 1488 }, { "epoch": 2.832144555397052, "grad_norm": 0.3747973144054413, "learning_rate": 9.05747856462369e-05, "loss": 0.1678, "step": 1489 }, { "epoch": 2.8340466000951023, "grad_norm": 0.41899365186691284, "learning_rate": 9.056843442362655e-05, "loss": 0.2753, "step": 1490 }, { "epoch": 2.835948644793153, "grad_norm": 0.397416889667511, "learning_rate": 9.05620832010162e-05, "loss": 0.1658, "step": 1491 }, { "epoch": 2.8378506894912032, "grad_norm": 0.3874271810054779, "learning_rate": 9.055573197840584e-05, "loss": 0.1808, "step": 1492 }, { "epoch": 2.8397527341892532, "grad_norm": 0.3698302209377289, "learning_rate": 9.05493807557955e-05, "loss": 0.1869, "step": 1493 }, { "epoch": 2.841654778887304, "grad_norm": 0.3908369541168213, "learning_rate": 9.054302953318515e-05, "loss": 0.1866, "step": 1494 }, { "epoch": 2.843556823585354, "grad_norm": 0.5696883201599121, "learning_rate": 9.053667831057478e-05, "loss": 0.2083, "step": 1495 }, { "epoch": 2.8454588682834046, "grad_norm": 0.3560580611228943, "learning_rate": 9.053032708796444e-05, "loss": 0.1829, "step": 1496 }, { "epoch": 2.847360912981455, "grad_norm": 0.4369358718395233, "learning_rate": 9.052397586535409e-05, "loss": 0.2302, "step": 1497 }, { "epoch": 2.8492629576795054, "grad_norm": 0.4240768551826477, "learning_rate": 9.051762464274374e-05, "loss": 0.2204, "step": 1498 }, { "epoch": 2.851165002377556, "grad_norm": 0.4078483581542969, "learning_rate": 9.051127342013338e-05, "loss": 0.181, "step": 1499 }, { "epoch": 2.8530670470756063, "grad_norm": 0.4196905195713043, "learning_rate": 9.050492219752303e-05, "loss": 0.2147, "step": 1500 }, { "epoch": 2.8549690917736568, "grad_norm": 0.3858025372028351, "learning_rate": 9.049857097491268e-05, "loss": 0.1719, "step": 1501 }, { "epoch": 2.856871136471707, "grad_norm": 0.3923434019088745, "learning_rate": 9.049221975230232e-05, "loss": 0.1966, "step": 1502 }, { "epoch": 2.8587731811697576, "grad_norm": 0.38231122493743896, "learning_rate": 9.048586852969197e-05, "loss": 0.186, "step": 1503 }, { "epoch": 2.8606752258678076, "grad_norm": 0.3579331040382385, "learning_rate": 9.047951730708162e-05, "loss": 0.1777, "step": 1504 }, { "epoch": 2.8625772705658585, "grad_norm": 0.2968972623348236, "learning_rate": 9.047316608447126e-05, "loss": 0.1456, "step": 1505 }, { "epoch": 2.8644793152639085, "grad_norm": 0.3534374535083771, "learning_rate": 9.046681486186091e-05, "loss": 0.178, "step": 1506 }, { "epoch": 2.866381359961959, "grad_norm": 0.4368778169155121, "learning_rate": 9.046046363925057e-05, "loss": 0.2349, "step": 1507 }, { "epoch": 2.8682834046600094, "grad_norm": 0.43825942277908325, "learning_rate": 9.04541124166402e-05, "loss": 0.1857, "step": 1508 }, { "epoch": 2.87018544935806, "grad_norm": 0.35765841603279114, "learning_rate": 9.044776119402986e-05, "loss": 0.1787, "step": 1509 }, { "epoch": 2.8720874940561103, "grad_norm": 0.35496601462364197, "learning_rate": 9.04414099714195e-05, "loss": 0.1776, "step": 1510 }, { "epoch": 2.8739895387541607, "grad_norm": 0.39673030376434326, "learning_rate": 9.043505874880916e-05, "loss": 0.1916, "step": 1511 }, { "epoch": 2.875891583452211, "grad_norm": 0.3670983612537384, "learning_rate": 9.04287075261988e-05, "loss": 0.1726, "step": 1512 }, { "epoch": 2.8777936281502616, "grad_norm": 0.4254002273082733, "learning_rate": 9.042235630358844e-05, "loss": 0.224, "step": 1513 }, { "epoch": 2.879695672848312, "grad_norm": 0.37891489267349243, "learning_rate": 9.041600508097809e-05, "loss": 0.1805, "step": 1514 }, { "epoch": 2.881597717546362, "grad_norm": 0.33309099078178406, "learning_rate": 9.040965385836774e-05, "loss": 0.1442, "step": 1515 }, { "epoch": 2.883499762244413, "grad_norm": 0.4709990918636322, "learning_rate": 9.040330263575739e-05, "loss": 0.2342, "step": 1516 }, { "epoch": 2.885401806942463, "grad_norm": 0.41639766097068787, "learning_rate": 9.039695141314703e-05, "loss": 0.1772, "step": 1517 }, { "epoch": 2.8873038516405134, "grad_norm": 0.37914562225341797, "learning_rate": 9.039060019053668e-05, "loss": 0.1632, "step": 1518 }, { "epoch": 2.889205896338564, "grad_norm": 0.4000544250011444, "learning_rate": 9.038424896792633e-05, "loss": 0.1927, "step": 1519 }, { "epoch": 2.8911079410366143, "grad_norm": 0.42467859387397766, "learning_rate": 9.037789774531597e-05, "loss": 0.1789, "step": 1520 }, { "epoch": 2.8930099857346647, "grad_norm": 0.46945691108703613, "learning_rate": 9.037154652270562e-05, "loss": 0.197, "step": 1521 }, { "epoch": 2.894912030432715, "grad_norm": 0.43455827236175537, "learning_rate": 9.036519530009528e-05, "loss": 0.1877, "step": 1522 }, { "epoch": 2.8968140751307656, "grad_norm": 0.5169146656990051, "learning_rate": 9.035884407748491e-05, "loss": 0.2039, "step": 1523 }, { "epoch": 2.898716119828816, "grad_norm": 0.42767763137817383, "learning_rate": 9.035249285487457e-05, "loss": 0.2123, "step": 1524 }, { "epoch": 2.9006181645268665, "grad_norm": 0.40808382630348206, "learning_rate": 9.034614163226422e-05, "loss": 0.2617, "step": 1525 }, { "epoch": 2.902520209224917, "grad_norm": 0.3179365396499634, "learning_rate": 9.033979040965386e-05, "loss": 0.1548, "step": 1526 }, { "epoch": 2.9044222539229674, "grad_norm": 0.259781152009964, "learning_rate": 9.033343918704351e-05, "loss": 0.1299, "step": 1527 }, { "epoch": 2.9063242986210174, "grad_norm": 0.40235599875450134, "learning_rate": 9.032708796443316e-05, "loss": 0.1957, "step": 1528 }, { "epoch": 2.9082263433190683, "grad_norm": 0.3170933127403259, "learning_rate": 9.032073674182281e-05, "loss": 0.1594, "step": 1529 }, { "epoch": 2.9101283880171183, "grad_norm": 0.31572115421295166, "learning_rate": 9.031438551921245e-05, "loss": 0.1922, "step": 1530 }, { "epoch": 2.9120304327151687, "grad_norm": 0.4456964433193207, "learning_rate": 9.03080342966021e-05, "loss": 0.2459, "step": 1531 }, { "epoch": 2.913932477413219, "grad_norm": 0.3345606327056885, "learning_rate": 9.030168307399175e-05, "loss": 0.1708, "step": 1532 }, { "epoch": 2.9158345221112696, "grad_norm": 0.4247712790966034, "learning_rate": 9.029533185138139e-05, "loss": 0.227, "step": 1533 }, { "epoch": 2.91773656680932, "grad_norm": 0.3642347455024719, "learning_rate": 9.028898062877104e-05, "loss": 0.1971, "step": 1534 }, { "epoch": 2.9196386115073705, "grad_norm": 0.40530455112457275, "learning_rate": 9.02826294061607e-05, "loss": 0.1574, "step": 1535 }, { "epoch": 2.921540656205421, "grad_norm": 0.5143640637397766, "learning_rate": 9.027627818355033e-05, "loss": 0.207, "step": 1536 }, { "epoch": 2.9234427009034714, "grad_norm": 0.4270274043083191, "learning_rate": 9.026992696093999e-05, "loss": 0.1971, "step": 1537 }, { "epoch": 2.925344745601522, "grad_norm": 0.5170589685440063, "learning_rate": 9.026357573832964e-05, "loss": 0.2768, "step": 1538 }, { "epoch": 2.927246790299572, "grad_norm": 0.41313278675079346, "learning_rate": 9.025722451571929e-05, "loss": 0.1765, "step": 1539 }, { "epoch": 2.9291488349976227, "grad_norm": 0.4040130078792572, "learning_rate": 9.025087329310893e-05, "loss": 0.2002, "step": 1540 }, { "epoch": 2.9310508796956727, "grad_norm": 0.37281498312950134, "learning_rate": 9.024452207049857e-05, "loss": 0.1542, "step": 1541 }, { "epoch": 2.932952924393723, "grad_norm": 0.5352873802185059, "learning_rate": 9.023817084788823e-05, "loss": 0.2437, "step": 1542 }, { "epoch": 2.9348549690917736, "grad_norm": 0.4044128358364105, "learning_rate": 9.023181962527787e-05, "loss": 0.1721, "step": 1543 }, { "epoch": 2.936757013789824, "grad_norm": 0.35553574562072754, "learning_rate": 9.022546840266751e-05, "loss": 0.1838, "step": 1544 }, { "epoch": 2.9386590584878745, "grad_norm": 0.42568060755729675, "learning_rate": 9.021911718005717e-05, "loss": 0.2022, "step": 1545 }, { "epoch": 2.940561103185925, "grad_norm": 0.453700453042984, "learning_rate": 9.021276595744681e-05, "loss": 0.1866, "step": 1546 }, { "epoch": 2.9424631478839753, "grad_norm": 0.3909238576889038, "learning_rate": 9.020641473483646e-05, "loss": 0.1628, "step": 1547 }, { "epoch": 2.944365192582026, "grad_norm": 0.39725926518440247, "learning_rate": 9.02000635122261e-05, "loss": 0.217, "step": 1548 }, { "epoch": 2.9462672372800762, "grad_norm": 0.34860628843307495, "learning_rate": 9.019371228961575e-05, "loss": 0.1724, "step": 1549 }, { "epoch": 2.948169281978126, "grad_norm": 0.38813674449920654, "learning_rate": 9.01873610670054e-05, "loss": 0.2047, "step": 1550 }, { "epoch": 2.950071326676177, "grad_norm": 0.37160560488700867, "learning_rate": 9.018100984439504e-05, "loss": 0.2119, "step": 1551 }, { "epoch": 2.951973371374227, "grad_norm": 0.4166210889816284, "learning_rate": 9.017465862178471e-05, "loss": 0.2215, "step": 1552 }, { "epoch": 2.9538754160722775, "grad_norm": 0.3657042980194092, "learning_rate": 9.016830739917435e-05, "loss": 0.1924, "step": 1553 }, { "epoch": 2.955777460770328, "grad_norm": 0.37292999029159546, "learning_rate": 9.016195617656399e-05, "loss": 0.2329, "step": 1554 }, { "epoch": 2.9576795054683784, "grad_norm": 0.3373647928237915, "learning_rate": 9.015560495395364e-05, "loss": 0.2034, "step": 1555 }, { "epoch": 2.959581550166429, "grad_norm": 0.31643402576446533, "learning_rate": 9.014925373134329e-05, "loss": 0.1713, "step": 1556 }, { "epoch": 2.9614835948644793, "grad_norm": 0.3107222318649292, "learning_rate": 9.014290250873294e-05, "loss": 0.1511, "step": 1557 }, { "epoch": 2.9633856395625298, "grad_norm": 0.32063353061676025, "learning_rate": 9.013655128612258e-05, "loss": 0.1581, "step": 1558 }, { "epoch": 2.96528768426058, "grad_norm": 0.4035079777240753, "learning_rate": 9.013020006351223e-05, "loss": 0.2036, "step": 1559 }, { "epoch": 2.9671897289586306, "grad_norm": 0.28573077917099, "learning_rate": 9.012384884090188e-05, "loss": 0.1388, "step": 1560 }, { "epoch": 2.969091773656681, "grad_norm": 0.38853904604911804, "learning_rate": 9.011749761829152e-05, "loss": 0.1981, "step": 1561 }, { "epoch": 2.9709938183547315, "grad_norm": 0.39904823899269104, "learning_rate": 9.011114639568117e-05, "loss": 0.2249, "step": 1562 }, { "epoch": 2.9728958630527815, "grad_norm": 0.3704228103160858, "learning_rate": 9.010479517307082e-05, "loss": 0.2176, "step": 1563 }, { "epoch": 2.9747979077508324, "grad_norm": 0.3712176978588104, "learning_rate": 9.009844395046046e-05, "loss": 0.1685, "step": 1564 }, { "epoch": 2.9766999524488824, "grad_norm": 0.47927892208099365, "learning_rate": 9.009209272785011e-05, "loss": 0.2027, "step": 1565 }, { "epoch": 2.978601997146933, "grad_norm": 0.4230005443096161, "learning_rate": 9.008574150523977e-05, "loss": 0.212, "step": 1566 }, { "epoch": 2.9805040418449833, "grad_norm": 0.32152169942855835, "learning_rate": 9.00793902826294e-05, "loss": 0.1639, "step": 1567 }, { "epoch": 2.9824060865430337, "grad_norm": 0.42794153094291687, "learning_rate": 9.007303906001906e-05, "loss": 0.2143, "step": 1568 }, { "epoch": 2.984308131241084, "grad_norm": 0.37590306997299194, "learning_rate": 9.006668783740871e-05, "loss": 0.189, "step": 1569 }, { "epoch": 2.9862101759391346, "grad_norm": 0.3247901201248169, "learning_rate": 9.006033661479836e-05, "loss": 0.1616, "step": 1570 }, { "epoch": 2.988112220637185, "grad_norm": 0.36269792914390564, "learning_rate": 9.0053985392188e-05, "loss": 0.2037, "step": 1571 }, { "epoch": 2.9900142653352355, "grad_norm": 0.4436742067337036, "learning_rate": 9.004763416957764e-05, "loss": 0.202, "step": 1572 }, { "epoch": 2.991916310033286, "grad_norm": 0.45660001039505005, "learning_rate": 9.00412829469673e-05, "loss": 0.2298, "step": 1573 }, { "epoch": 2.993818354731336, "grad_norm": 0.3276821970939636, "learning_rate": 9.003493172435694e-05, "loss": 0.158, "step": 1574 }, { "epoch": 2.995720399429387, "grad_norm": 0.3427131175994873, "learning_rate": 9.002858050174659e-05, "loss": 0.1781, "step": 1575 }, { "epoch": 2.997622444127437, "grad_norm": 0.38842669129371643, "learning_rate": 9.002222927913624e-05, "loss": 0.1905, "step": 1576 }, { "epoch": 2.9995244888254873, "grad_norm": 0.4034234285354614, "learning_rate": 9.001587805652588e-05, "loss": 0.1989, "step": 1577 }, { "epoch": 3.0014265335235377, "grad_norm": 0.23682546615600586, "learning_rate": 9.000952683391553e-05, "loss": 0.0968, "step": 1578 }, { "epoch": 3.003328578221588, "grad_norm": 0.23321636021137238, "learning_rate": 9.000317561130517e-05, "loss": 0.1278, "step": 1579 }, { "epoch": 3.0052306229196386, "grad_norm": 0.2891576290130615, "learning_rate": 8.999682438869482e-05, "loss": 0.1297, "step": 1580 }, { "epoch": 3.007132667617689, "grad_norm": 0.30067315697669983, "learning_rate": 8.999047316608448e-05, "loss": 0.1216, "step": 1581 }, { "epoch": 3.0090347123157395, "grad_norm": 0.25676554441452026, "learning_rate": 8.998412194347411e-05, "loss": 0.1167, "step": 1582 }, { "epoch": 3.01093675701379, "grad_norm": 0.30124133825302124, "learning_rate": 8.997777072086378e-05, "loss": 0.1243, "step": 1583 }, { "epoch": 3.0128388017118404, "grad_norm": 0.30313733220100403, "learning_rate": 8.997141949825342e-05, "loss": 0.127, "step": 1584 }, { "epoch": 3.014740846409891, "grad_norm": 0.36067837476730347, "learning_rate": 8.996506827564306e-05, "loss": 0.1331, "step": 1585 }, { "epoch": 3.0166428911079413, "grad_norm": 0.3327738642692566, "learning_rate": 8.995871705303271e-05, "loss": 0.1304, "step": 1586 }, { "epoch": 3.0185449358059913, "grad_norm": 0.2918979525566101, "learning_rate": 8.995236583042236e-05, "loss": 0.1127, "step": 1587 }, { "epoch": 3.0204469805040417, "grad_norm": 0.40982192754745483, "learning_rate": 8.994601460781201e-05, "loss": 0.1283, "step": 1588 }, { "epoch": 3.022349025202092, "grad_norm": 0.37201565504074097, "learning_rate": 8.993966338520165e-05, "loss": 0.1198, "step": 1589 }, { "epoch": 3.0242510699001426, "grad_norm": 0.4271756708621979, "learning_rate": 8.99333121625913e-05, "loss": 0.1218, "step": 1590 }, { "epoch": 3.026153114598193, "grad_norm": 0.3430047035217285, "learning_rate": 8.992696093998095e-05, "loss": 0.1213, "step": 1591 }, { "epoch": 3.0280551592962435, "grad_norm": 0.3253467381000519, "learning_rate": 8.992060971737059e-05, "loss": 0.1124, "step": 1592 }, { "epoch": 3.029957203994294, "grad_norm": 0.38685157895088196, "learning_rate": 8.991425849476024e-05, "loss": 0.112, "step": 1593 }, { "epoch": 3.0318592486923444, "grad_norm": 0.36162498593330383, "learning_rate": 8.99079072721499e-05, "loss": 0.1061, "step": 1594 }, { "epoch": 3.033761293390395, "grad_norm": 0.32084980607032776, "learning_rate": 8.990155604953953e-05, "loss": 0.0965, "step": 1595 }, { "epoch": 3.0356633380884452, "grad_norm": 0.4037097096443176, "learning_rate": 8.989520482692919e-05, "loss": 0.1237, "step": 1596 }, { "epoch": 3.0375653827864957, "grad_norm": 0.23668204247951508, "learning_rate": 8.988885360431884e-05, "loss": 0.1778, "step": 1597 }, { "epoch": 3.0394674274845457, "grad_norm": 0.3448043167591095, "learning_rate": 8.988250238170848e-05, "loss": 0.1349, "step": 1598 }, { "epoch": 3.041369472182596, "grad_norm": 0.39455583691596985, "learning_rate": 8.987615115909813e-05, "loss": 0.1175, "step": 1599 }, { "epoch": 3.0432715168806466, "grad_norm": 0.39552587270736694, "learning_rate": 8.986979993648778e-05, "loss": 0.1296, "step": 1600 }, { "epoch": 3.045173561578697, "grad_norm": 0.36603817343711853, "learning_rate": 8.986344871387743e-05, "loss": 0.1392, "step": 1601 }, { "epoch": 3.0470756062767475, "grad_norm": 0.34084847569465637, "learning_rate": 8.985709749126707e-05, "loss": 0.1155, "step": 1602 }, { "epoch": 3.048977650974798, "grad_norm": 0.36548131704330444, "learning_rate": 8.985074626865672e-05, "loss": 0.1381, "step": 1603 }, { "epoch": 3.0508796956728483, "grad_norm": 0.30957910418510437, "learning_rate": 8.984439504604637e-05, "loss": 0.1123, "step": 1604 }, { "epoch": 3.0527817403708988, "grad_norm": 0.38922393321990967, "learning_rate": 8.983804382343601e-05, "loss": 0.1588, "step": 1605 }, { "epoch": 3.054683785068949, "grad_norm": 0.3416849672794342, "learning_rate": 8.983169260082566e-05, "loss": 0.1236, "step": 1606 }, { "epoch": 3.0565858297669997, "grad_norm": 0.31353771686553955, "learning_rate": 8.982534137821532e-05, "loss": 0.1025, "step": 1607 }, { "epoch": 3.05848787446505, "grad_norm": 0.36878702044487, "learning_rate": 8.981899015560495e-05, "loss": 0.1421, "step": 1608 }, { "epoch": 3.0603899191631005, "grad_norm": 0.38487425446510315, "learning_rate": 8.98126389329946e-05, "loss": 0.1223, "step": 1609 }, { "epoch": 3.0622919638611505, "grad_norm": 0.3435547649860382, "learning_rate": 8.980628771038426e-05, "loss": 0.1105, "step": 1610 }, { "epoch": 3.064194008559201, "grad_norm": 0.422198086977005, "learning_rate": 8.979993648777391e-05, "loss": 0.1368, "step": 1611 }, { "epoch": 3.0660960532572514, "grad_norm": 0.43352290987968445, "learning_rate": 8.979358526516355e-05, "loss": 0.1743, "step": 1612 }, { "epoch": 3.067998097955302, "grad_norm": 0.3885476887226105, "learning_rate": 8.978723404255319e-05, "loss": 0.1979, "step": 1613 }, { "epoch": 3.0699001426533523, "grad_norm": 0.3135451376438141, "learning_rate": 8.978088281994285e-05, "loss": 0.1105, "step": 1614 }, { "epoch": 3.0718021873514028, "grad_norm": 0.4184531271457672, "learning_rate": 8.977453159733249e-05, "loss": 0.1335, "step": 1615 }, { "epoch": 3.073704232049453, "grad_norm": 0.35463500022888184, "learning_rate": 8.976818037472213e-05, "loss": 0.1384, "step": 1616 }, { "epoch": 3.0756062767475036, "grad_norm": 0.33959662914276123, "learning_rate": 8.97618291521118e-05, "loss": 0.118, "step": 1617 }, { "epoch": 3.077508321445554, "grad_norm": 0.3295678198337555, "learning_rate": 8.975547792950143e-05, "loss": 0.1073, "step": 1618 }, { "epoch": 3.0794103661436045, "grad_norm": 0.32906121015548706, "learning_rate": 8.974912670689108e-05, "loss": 0.0992, "step": 1619 }, { "epoch": 3.081312410841655, "grad_norm": 0.2967415750026703, "learning_rate": 8.974277548428072e-05, "loss": 0.0901, "step": 1620 }, { "epoch": 3.0832144555397054, "grad_norm": 0.3415001928806305, "learning_rate": 8.973642426167037e-05, "loss": 0.1248, "step": 1621 }, { "epoch": 3.0851165002377554, "grad_norm": 0.2587614357471466, "learning_rate": 8.973007303906003e-05, "loss": 0.0872, "step": 1622 }, { "epoch": 3.087018544935806, "grad_norm": 0.3469274640083313, "learning_rate": 8.972372181644966e-05, "loss": 0.1147, "step": 1623 }, { "epoch": 3.0889205896338563, "grad_norm": 0.28534063696861267, "learning_rate": 8.971737059383932e-05, "loss": 0.1377, "step": 1624 }, { "epoch": 3.0908226343319067, "grad_norm": 0.3836195170879364, "learning_rate": 8.971101937122897e-05, "loss": 0.1242, "step": 1625 }, { "epoch": 3.092724679029957, "grad_norm": 0.40428081154823303, "learning_rate": 8.97046681486186e-05, "loss": 0.1017, "step": 1626 }, { "epoch": 3.0946267237280076, "grad_norm": 0.37237152457237244, "learning_rate": 8.969831692600826e-05, "loss": 0.1318, "step": 1627 }, { "epoch": 3.096528768426058, "grad_norm": 0.3669044077396393, "learning_rate": 8.969196570339791e-05, "loss": 0.1191, "step": 1628 }, { "epoch": 3.0984308131241085, "grad_norm": 0.36814671754837036, "learning_rate": 8.968561448078756e-05, "loss": 0.1227, "step": 1629 }, { "epoch": 3.100332857822159, "grad_norm": 0.3883667290210724, "learning_rate": 8.96792632581772e-05, "loss": 0.1556, "step": 1630 }, { "epoch": 3.1022349025202094, "grad_norm": 0.44517648220062256, "learning_rate": 8.967291203556685e-05, "loss": 0.1439, "step": 1631 }, { "epoch": 3.10413694721826, "grad_norm": 0.3230499029159546, "learning_rate": 8.96665608129565e-05, "loss": 0.117, "step": 1632 }, { "epoch": 3.10603899191631, "grad_norm": 0.2505279779434204, "learning_rate": 8.966020959034614e-05, "loss": 0.0945, "step": 1633 }, { "epoch": 3.1079410366143603, "grad_norm": 0.31753817200660706, "learning_rate": 8.96538583677358e-05, "loss": 0.1119, "step": 1634 }, { "epoch": 3.1098430813124107, "grad_norm": 0.34199607372283936, "learning_rate": 8.964750714512545e-05, "loss": 0.1508, "step": 1635 }, { "epoch": 3.111745126010461, "grad_norm": 0.39167290925979614, "learning_rate": 8.964115592251508e-05, "loss": 0.1422, "step": 1636 }, { "epoch": 3.1136471707085116, "grad_norm": 0.28108343482017517, "learning_rate": 8.963480469990474e-05, "loss": 0.0981, "step": 1637 }, { "epoch": 3.115549215406562, "grad_norm": 0.2806454598903656, "learning_rate": 8.962845347729439e-05, "loss": 0.1227, "step": 1638 }, { "epoch": 3.1174512601046125, "grad_norm": 0.3393970727920532, "learning_rate": 8.962210225468403e-05, "loss": 0.1419, "step": 1639 }, { "epoch": 3.119353304802663, "grad_norm": 0.3800428509712219, "learning_rate": 8.961575103207368e-05, "loss": 0.1323, "step": 1640 }, { "epoch": 3.1212553495007134, "grad_norm": 0.3849729299545288, "learning_rate": 8.960939980946333e-05, "loss": 0.1505, "step": 1641 }, { "epoch": 3.123157394198764, "grad_norm": 0.38189247250556946, "learning_rate": 8.960304858685298e-05, "loss": 0.1303, "step": 1642 }, { "epoch": 3.1250594388968143, "grad_norm": 0.3030915856361389, "learning_rate": 8.959669736424262e-05, "loss": 0.1141, "step": 1643 }, { "epoch": 3.1269614835948643, "grad_norm": 0.3842359185218811, "learning_rate": 8.959034614163226e-05, "loss": 0.1124, "step": 1644 }, { "epoch": 3.1288635282929147, "grad_norm": 0.3637976348400116, "learning_rate": 8.958399491902192e-05, "loss": 0.1275, "step": 1645 }, { "epoch": 3.130765572990965, "grad_norm": 0.2884964346885681, "learning_rate": 8.957764369641156e-05, "loss": 0.1065, "step": 1646 }, { "epoch": 3.1326676176890156, "grad_norm": 0.3866124749183655, "learning_rate": 8.957129247380121e-05, "loss": 0.1389, "step": 1647 }, { "epoch": 3.134569662387066, "grad_norm": 0.418950617313385, "learning_rate": 8.956494125119087e-05, "loss": 0.1406, "step": 1648 }, { "epoch": 3.1364717070851165, "grad_norm": 0.37514927983283997, "learning_rate": 8.95585900285805e-05, "loss": 0.1239, "step": 1649 }, { "epoch": 3.138373751783167, "grad_norm": 0.29558438062667847, "learning_rate": 8.955223880597016e-05, "loss": 0.1077, "step": 1650 }, { "epoch": 3.1402757964812174, "grad_norm": 0.3241124749183655, "learning_rate": 8.95458875833598e-05, "loss": 0.1254, "step": 1651 }, { "epoch": 3.142177841179268, "grad_norm": 0.40942251682281494, "learning_rate": 8.953953636074945e-05, "loss": 0.1388, "step": 1652 }, { "epoch": 3.1440798858773182, "grad_norm": 0.3899609446525574, "learning_rate": 8.95331851381391e-05, "loss": 0.1279, "step": 1653 }, { "epoch": 3.1459819305753687, "grad_norm": 0.37820303440093994, "learning_rate": 8.952683391552874e-05, "loss": 0.1146, "step": 1654 }, { "epoch": 3.147883975273419, "grad_norm": 0.3521963059902191, "learning_rate": 8.95204826929184e-05, "loss": 0.1337, "step": 1655 }, { "epoch": 3.1497860199714696, "grad_norm": 0.3292877674102783, "learning_rate": 8.951413147030804e-05, "loss": 0.1225, "step": 1656 }, { "epoch": 3.1516880646695196, "grad_norm": 0.28479406237602234, "learning_rate": 8.950778024769768e-05, "loss": 0.1006, "step": 1657 }, { "epoch": 3.15359010936757, "grad_norm": 0.2883979380130768, "learning_rate": 8.950142902508733e-05, "loss": 0.1114, "step": 1658 }, { "epoch": 3.1554921540656204, "grad_norm": 0.33744558691978455, "learning_rate": 8.949507780247698e-05, "loss": 0.1263, "step": 1659 }, { "epoch": 3.157394198763671, "grad_norm": 0.2845192551612854, "learning_rate": 8.948872657986663e-05, "loss": 0.1047, "step": 1660 }, { "epoch": 3.1592962434617213, "grad_norm": 0.3539939224720001, "learning_rate": 8.948237535725627e-05, "loss": 0.1183, "step": 1661 }, { "epoch": 3.1611982881597718, "grad_norm": 0.24927809834480286, "learning_rate": 8.947602413464592e-05, "loss": 0.0825, "step": 1662 }, { "epoch": 3.163100332857822, "grad_norm": 0.4059623181819916, "learning_rate": 8.946967291203558e-05, "loss": 0.1457, "step": 1663 }, { "epoch": 3.1650023775558727, "grad_norm": 0.3298782706260681, "learning_rate": 8.946332168942521e-05, "loss": 0.1226, "step": 1664 }, { "epoch": 3.166904422253923, "grad_norm": 0.3750251829624176, "learning_rate": 8.945697046681487e-05, "loss": 0.144, "step": 1665 }, { "epoch": 3.1688064669519735, "grad_norm": 0.40858665108680725, "learning_rate": 8.945061924420452e-05, "loss": 0.1426, "step": 1666 }, { "epoch": 3.170708511650024, "grad_norm": 0.38032254576683044, "learning_rate": 8.944426802159416e-05, "loss": 0.1479, "step": 1667 }, { "epoch": 3.172610556348074, "grad_norm": 0.3702940046787262, "learning_rate": 8.943791679898381e-05, "loss": 0.1262, "step": 1668 }, { "epoch": 3.1745126010461244, "grad_norm": 0.43061700463294983, "learning_rate": 8.943156557637346e-05, "loss": 0.1463, "step": 1669 }, { "epoch": 3.176414645744175, "grad_norm": 0.2968880832195282, "learning_rate": 8.94252143537631e-05, "loss": 0.1135, "step": 1670 }, { "epoch": 3.1783166904422253, "grad_norm": 0.28398388624191284, "learning_rate": 8.941886313115275e-05, "loss": 0.1137, "step": 1671 }, { "epoch": 3.1802187351402758, "grad_norm": 0.2764633595943451, "learning_rate": 8.94125119085424e-05, "loss": 0.0974, "step": 1672 }, { "epoch": 3.182120779838326, "grad_norm": 0.39509302377700806, "learning_rate": 8.940616068593205e-05, "loss": 0.1491, "step": 1673 }, { "epoch": 3.1840228245363766, "grad_norm": 0.2926827669143677, "learning_rate": 8.939980946332169e-05, "loss": 0.1207, "step": 1674 }, { "epoch": 3.185924869234427, "grad_norm": 0.35445713996887207, "learning_rate": 8.939345824071133e-05, "loss": 0.1252, "step": 1675 }, { "epoch": 3.1878269139324775, "grad_norm": 0.3183155059814453, "learning_rate": 8.9387107018101e-05, "loss": 0.1178, "step": 1676 }, { "epoch": 3.189728958630528, "grad_norm": 0.40158188343048096, "learning_rate": 8.938075579549063e-05, "loss": 0.1266, "step": 1677 }, { "epoch": 3.1916310033285784, "grad_norm": 0.33932897448539734, "learning_rate": 8.937440457288029e-05, "loss": 0.1321, "step": 1678 }, { "epoch": 3.1935330480266284, "grad_norm": 0.3436925411224365, "learning_rate": 8.936805335026994e-05, "loss": 0.1204, "step": 1679 }, { "epoch": 3.195435092724679, "grad_norm": 0.32970649003982544, "learning_rate": 8.936170212765958e-05, "loss": 0.1023, "step": 1680 }, { "epoch": 3.1973371374227293, "grad_norm": 0.3206690549850464, "learning_rate": 8.935535090504923e-05, "loss": 0.1011, "step": 1681 }, { "epoch": 3.1992391821207797, "grad_norm": 0.39323487877845764, "learning_rate": 8.934899968243887e-05, "loss": 0.1263, "step": 1682 }, { "epoch": 3.20114122681883, "grad_norm": 0.3755662143230438, "learning_rate": 8.934264845982853e-05, "loss": 0.1345, "step": 1683 }, { "epoch": 3.2030432715168806, "grad_norm": 0.3337384760379791, "learning_rate": 8.933629723721817e-05, "loss": 0.1094, "step": 1684 }, { "epoch": 3.204945316214931, "grad_norm": 0.35307517647743225, "learning_rate": 8.932994601460781e-05, "loss": 0.1244, "step": 1685 }, { "epoch": 3.2068473609129815, "grad_norm": 0.2809374928474426, "learning_rate": 8.932359479199747e-05, "loss": 0.0961, "step": 1686 }, { "epoch": 3.208749405611032, "grad_norm": 0.35939821600914, "learning_rate": 8.931724356938711e-05, "loss": 0.1294, "step": 1687 }, { "epoch": 3.2106514503090824, "grad_norm": 0.36626148223876953, "learning_rate": 8.931089234677675e-05, "loss": 0.141, "step": 1688 }, { "epoch": 3.212553495007133, "grad_norm": 0.31976842880249023, "learning_rate": 8.93045411241664e-05, "loss": 0.1058, "step": 1689 }, { "epoch": 3.2144555397051833, "grad_norm": 0.40340307354927063, "learning_rate": 8.929818990155605e-05, "loss": 0.142, "step": 1690 }, { "epoch": 3.2163575844032333, "grad_norm": 0.3481243848800659, "learning_rate": 8.92918386789457e-05, "loss": 0.1301, "step": 1691 }, { "epoch": 3.2182596291012837, "grad_norm": 0.41779786348342896, "learning_rate": 8.928548745633534e-05, "loss": 0.1531, "step": 1692 }, { "epoch": 3.220161673799334, "grad_norm": 0.33376792073249817, "learning_rate": 8.9279136233725e-05, "loss": 0.1397, "step": 1693 }, { "epoch": 3.2220637184973846, "grad_norm": 0.42083820700645447, "learning_rate": 8.927278501111465e-05, "loss": 0.1456, "step": 1694 }, { "epoch": 3.223965763195435, "grad_norm": 0.23268885910511017, "learning_rate": 8.926643378850429e-05, "loss": 0.1261, "step": 1695 }, { "epoch": 3.2258678078934855, "grad_norm": 0.3965808153152466, "learning_rate": 8.926008256589394e-05, "loss": 0.1454, "step": 1696 }, { "epoch": 3.227769852591536, "grad_norm": 0.40782594680786133, "learning_rate": 8.925373134328359e-05, "loss": 0.137, "step": 1697 }, { "epoch": 3.2296718972895864, "grad_norm": 0.37247705459594727, "learning_rate": 8.924738012067323e-05, "loss": 0.1227, "step": 1698 }, { "epoch": 3.231573941987637, "grad_norm": 0.5225626230239868, "learning_rate": 8.924102889806288e-05, "loss": 0.1596, "step": 1699 }, { "epoch": 3.2334759866856873, "grad_norm": 0.35236862301826477, "learning_rate": 8.923467767545253e-05, "loss": 0.1576, "step": 1700 }, { "epoch": 3.2353780313837377, "grad_norm": 0.3305290639400482, "learning_rate": 8.922832645284218e-05, "loss": 0.1114, "step": 1701 }, { "epoch": 3.237280076081788, "grad_norm": 0.37631455063819885, "learning_rate": 8.922197523023182e-05, "loss": 0.1278, "step": 1702 }, { "epoch": 3.239182120779838, "grad_norm": 0.3439154624938965, "learning_rate": 8.921562400762147e-05, "loss": 0.1658, "step": 1703 }, { "epoch": 3.2410841654778886, "grad_norm": 0.4184103310108185, "learning_rate": 8.920927278501112e-05, "loss": 0.1754, "step": 1704 }, { "epoch": 3.242986210175939, "grad_norm": 0.3708958029747009, "learning_rate": 8.920292156240076e-05, "loss": 0.148, "step": 1705 }, { "epoch": 3.2448882548739895, "grad_norm": 0.36626115441322327, "learning_rate": 8.919657033979041e-05, "loss": 0.152, "step": 1706 }, { "epoch": 3.24679029957204, "grad_norm": 0.3738412857055664, "learning_rate": 8.919021911718007e-05, "loss": 0.1432, "step": 1707 }, { "epoch": 3.2486923442700903, "grad_norm": 0.4470990002155304, "learning_rate": 8.91838678945697e-05, "loss": 0.1639, "step": 1708 }, { "epoch": 3.250594388968141, "grad_norm": 0.3332229554653168, "learning_rate": 8.917751667195936e-05, "loss": 0.1257, "step": 1709 }, { "epoch": 3.2524964336661912, "grad_norm": 0.3853921890258789, "learning_rate": 8.917116544934901e-05, "loss": 0.1262, "step": 1710 }, { "epoch": 3.2543984783642417, "grad_norm": 0.32993221282958984, "learning_rate": 8.916481422673865e-05, "loss": 0.1231, "step": 1711 }, { "epoch": 3.256300523062292, "grad_norm": 0.3631759285926819, "learning_rate": 8.91584630041283e-05, "loss": 0.148, "step": 1712 }, { "epoch": 3.2582025677603426, "grad_norm": 0.40394118428230286, "learning_rate": 8.915211178151795e-05, "loss": 0.1542, "step": 1713 }, { "epoch": 3.2601046124583926, "grad_norm": 0.3267883360385895, "learning_rate": 8.91457605589076e-05, "loss": 0.1411, "step": 1714 }, { "epoch": 3.262006657156443, "grad_norm": 0.3076201379299164, "learning_rate": 8.913940933629724e-05, "loss": 0.1189, "step": 1715 }, { "epoch": 3.2639087018544934, "grad_norm": 0.43854421377182007, "learning_rate": 8.913305811368688e-05, "loss": 0.1806, "step": 1716 }, { "epoch": 3.265810746552544, "grad_norm": 0.2679373621940613, "learning_rate": 8.912670689107654e-05, "loss": 0.1251, "step": 1717 }, { "epoch": 3.2677127912505943, "grad_norm": 0.35840150713920593, "learning_rate": 8.912035566846618e-05, "loss": 0.1276, "step": 1718 }, { "epoch": 3.2696148359486448, "grad_norm": 0.368457168340683, "learning_rate": 8.911400444585583e-05, "loss": 0.1312, "step": 1719 }, { "epoch": 3.271516880646695, "grad_norm": 0.3617841303348541, "learning_rate": 8.910765322324549e-05, "loss": 0.1165, "step": 1720 }, { "epoch": 3.2734189253447457, "grad_norm": 0.34482330083847046, "learning_rate": 8.910130200063512e-05, "loss": 0.1246, "step": 1721 }, { "epoch": 3.275320970042796, "grad_norm": 0.27358710765838623, "learning_rate": 8.909495077802478e-05, "loss": 0.1093, "step": 1722 }, { "epoch": 3.2772230147408465, "grad_norm": 0.40264174342155457, "learning_rate": 8.908859955541441e-05, "loss": 0.146, "step": 1723 }, { "epoch": 3.279125059438897, "grad_norm": 0.45845937728881836, "learning_rate": 8.908224833280407e-05, "loss": 0.1457, "step": 1724 }, { "epoch": 3.281027104136947, "grad_norm": 0.34490594267845154, "learning_rate": 8.907589711019372e-05, "loss": 0.1247, "step": 1725 }, { "epoch": 3.282929148834998, "grad_norm": 0.4256596267223358, "learning_rate": 8.906954588758336e-05, "loss": 0.1563, "step": 1726 }, { "epoch": 3.284831193533048, "grad_norm": 0.3607080280780792, "learning_rate": 8.906319466497302e-05, "loss": 0.1279, "step": 1727 }, { "epoch": 3.2867332382310983, "grad_norm": 0.30969080328941345, "learning_rate": 8.905684344236266e-05, "loss": 0.1238, "step": 1728 }, { "epoch": 3.2886352829291488, "grad_norm": 0.34044647216796875, "learning_rate": 8.90504922197523e-05, "loss": 0.1237, "step": 1729 }, { "epoch": 3.290537327627199, "grad_norm": 0.40037238597869873, "learning_rate": 8.904414099714195e-05, "loss": 0.1509, "step": 1730 }, { "epoch": 3.2924393723252496, "grad_norm": 0.3565572500228882, "learning_rate": 8.90377897745316e-05, "loss": 0.1251, "step": 1731 }, { "epoch": 3.2943414170233, "grad_norm": 0.33730757236480713, "learning_rate": 8.903143855192125e-05, "loss": 0.1527, "step": 1732 }, { "epoch": 3.2962434617213505, "grad_norm": 0.4168394207954407, "learning_rate": 8.902508732931089e-05, "loss": 0.1429, "step": 1733 }, { "epoch": 3.298145506419401, "grad_norm": 0.40814298391342163, "learning_rate": 8.901873610670054e-05, "loss": 0.1588, "step": 1734 }, { "epoch": 3.3000475511174514, "grad_norm": 0.42030104994773865, "learning_rate": 8.90123848840902e-05, "loss": 0.1495, "step": 1735 }, { "epoch": 3.301949595815502, "grad_norm": 0.3305467367172241, "learning_rate": 8.900603366147983e-05, "loss": 0.1239, "step": 1736 }, { "epoch": 3.3038516405135523, "grad_norm": 0.31360068917274475, "learning_rate": 8.899968243886949e-05, "loss": 0.108, "step": 1737 }, { "epoch": 3.3057536852116023, "grad_norm": 0.42463186383247375, "learning_rate": 8.899333121625914e-05, "loss": 0.1451, "step": 1738 }, { "epoch": 3.3076557299096527, "grad_norm": 0.3854060471057892, "learning_rate": 8.898697999364878e-05, "loss": 0.1638, "step": 1739 }, { "epoch": 3.309557774607703, "grad_norm": 0.46821728348731995, "learning_rate": 8.898062877103843e-05, "loss": 0.1718, "step": 1740 }, { "epoch": 3.3114598193057536, "grad_norm": 0.33078089356422424, "learning_rate": 8.897427754842808e-05, "loss": 0.1153, "step": 1741 }, { "epoch": 3.313361864003804, "grad_norm": 0.3746374249458313, "learning_rate": 8.896792632581772e-05, "loss": 0.1387, "step": 1742 }, { "epoch": 3.3152639087018545, "grad_norm": 0.33252257108688354, "learning_rate": 8.896157510320737e-05, "loss": 0.1218, "step": 1743 }, { "epoch": 3.317165953399905, "grad_norm": 0.3421841561794281, "learning_rate": 8.895522388059702e-05, "loss": 0.1376, "step": 1744 }, { "epoch": 3.3190679980979554, "grad_norm": 0.3410481810569763, "learning_rate": 8.894887265798667e-05, "loss": 0.1174, "step": 1745 }, { "epoch": 3.320970042796006, "grad_norm": 0.3556031882762909, "learning_rate": 8.894252143537631e-05, "loss": 0.1612, "step": 1746 }, { "epoch": 3.3228720874940563, "grad_norm": 0.35139304399490356, "learning_rate": 8.893617021276595e-05, "loss": 0.1371, "step": 1747 }, { "epoch": 3.3247741321921067, "grad_norm": 0.38646724820137024, "learning_rate": 8.892981899015562e-05, "loss": 0.1472, "step": 1748 }, { "epoch": 3.3266761768901567, "grad_norm": 0.40337100625038147, "learning_rate": 8.892346776754525e-05, "loss": 0.1938, "step": 1749 }, { "epoch": 3.328578221588207, "grad_norm": 0.2508182227611542, "learning_rate": 8.89171165449349e-05, "loss": 0.0987, "step": 1750 }, { "epoch": 3.3304802662862576, "grad_norm": 0.392284631729126, "learning_rate": 8.891076532232456e-05, "loss": 0.1448, "step": 1751 }, { "epoch": 3.332382310984308, "grad_norm": 0.25311291217803955, "learning_rate": 8.89044140997142e-05, "loss": 0.1227, "step": 1752 }, { "epoch": 3.3342843556823585, "grad_norm": 0.38591787219047546, "learning_rate": 8.889806287710385e-05, "loss": 0.1251, "step": 1753 }, { "epoch": 3.336186400380409, "grad_norm": 0.3149789869785309, "learning_rate": 8.889171165449349e-05, "loss": 0.1282, "step": 1754 }, { "epoch": 3.3380884450784594, "grad_norm": 0.4134093225002289, "learning_rate": 8.888536043188315e-05, "loss": 0.1509, "step": 1755 }, { "epoch": 3.33999048977651, "grad_norm": 0.3769814074039459, "learning_rate": 8.887900920927279e-05, "loss": 0.1283, "step": 1756 }, { "epoch": 3.3418925344745603, "grad_norm": 0.42259126901626587, "learning_rate": 8.887265798666243e-05, "loss": 0.1319, "step": 1757 }, { "epoch": 3.3437945791726107, "grad_norm": 0.4603644609451294, "learning_rate": 8.88663067640521e-05, "loss": 0.1427, "step": 1758 }, { "epoch": 3.345696623870661, "grad_norm": 0.3804812431335449, "learning_rate": 8.885995554144173e-05, "loss": 0.1479, "step": 1759 }, { "epoch": 3.347598668568711, "grad_norm": 0.42290598154067993, "learning_rate": 8.885360431883137e-05, "loss": 0.17, "step": 1760 }, { "epoch": 3.3495007132667616, "grad_norm": 0.3739291727542877, "learning_rate": 8.884725309622102e-05, "loss": 0.1297, "step": 1761 }, { "epoch": 3.351402757964812, "grad_norm": 0.36516469717025757, "learning_rate": 8.884090187361067e-05, "loss": 0.1294, "step": 1762 }, { "epoch": 3.3533048026628625, "grad_norm": 0.32364609837532043, "learning_rate": 8.883455065100033e-05, "loss": 0.1211, "step": 1763 }, { "epoch": 3.355206847360913, "grad_norm": 0.3903793394565582, "learning_rate": 8.882819942838996e-05, "loss": 0.1339, "step": 1764 }, { "epoch": 3.3571088920589633, "grad_norm": 0.3321349322795868, "learning_rate": 8.882184820577962e-05, "loss": 0.1229, "step": 1765 }, { "epoch": 3.359010936757014, "grad_norm": 0.3843282163143158, "learning_rate": 8.881549698316927e-05, "loss": 0.1425, "step": 1766 }, { "epoch": 3.3609129814550642, "grad_norm": 0.34259116649627686, "learning_rate": 8.88091457605589e-05, "loss": 0.1275, "step": 1767 }, { "epoch": 3.3628150261531147, "grad_norm": 0.335219144821167, "learning_rate": 8.880279453794856e-05, "loss": 0.1273, "step": 1768 }, { "epoch": 3.364717070851165, "grad_norm": 0.3495425879955292, "learning_rate": 8.879644331533821e-05, "loss": 0.1112, "step": 1769 }, { "epoch": 3.3666191155492156, "grad_norm": 0.430451899766922, "learning_rate": 8.879009209272785e-05, "loss": 0.1404, "step": 1770 }, { "epoch": 3.368521160247266, "grad_norm": 0.24980789422988892, "learning_rate": 8.87837408701175e-05, "loss": 0.1034, "step": 1771 }, { "epoch": 3.3704232049453164, "grad_norm": 0.4349839687347412, "learning_rate": 8.877738964750715e-05, "loss": 0.1371, "step": 1772 }, { "epoch": 3.3723252496433664, "grad_norm": 0.3427116572856903, "learning_rate": 8.87710384248968e-05, "loss": 0.1224, "step": 1773 }, { "epoch": 3.374227294341417, "grad_norm": 0.3835298418998718, "learning_rate": 8.876468720228644e-05, "loss": 0.1576, "step": 1774 }, { "epoch": 3.3761293390394673, "grad_norm": 0.3284079432487488, "learning_rate": 8.87583359796761e-05, "loss": 0.1039, "step": 1775 }, { "epoch": 3.3780313837375178, "grad_norm": 0.32109662890434265, "learning_rate": 8.875198475706575e-05, "loss": 0.1079, "step": 1776 }, { "epoch": 3.379933428435568, "grad_norm": 0.27259504795074463, "learning_rate": 8.874563353445538e-05, "loss": 0.0983, "step": 1777 }, { "epoch": 3.3818354731336187, "grad_norm": 0.3639247417449951, "learning_rate": 8.873928231184504e-05, "loss": 0.1297, "step": 1778 }, { "epoch": 3.383737517831669, "grad_norm": 0.3729754388332367, "learning_rate": 8.873293108923469e-05, "loss": 0.1419, "step": 1779 }, { "epoch": 3.3856395625297195, "grad_norm": 0.44657668471336365, "learning_rate": 8.872657986662433e-05, "loss": 0.1299, "step": 1780 }, { "epoch": 3.38754160722777, "grad_norm": 0.2924906611442566, "learning_rate": 8.872022864401398e-05, "loss": 0.109, "step": 1781 }, { "epoch": 3.3894436519258204, "grad_norm": 0.3643059730529785, "learning_rate": 8.871387742140363e-05, "loss": 0.1217, "step": 1782 }, { "epoch": 3.391345696623871, "grad_norm": 0.31588301062583923, "learning_rate": 8.870752619879327e-05, "loss": 0.1309, "step": 1783 }, { "epoch": 3.393247741321921, "grad_norm": 0.5099390149116516, "learning_rate": 8.870117497618292e-05, "loss": 0.3371, "step": 1784 }, { "epoch": 3.3951497860199713, "grad_norm": 0.3374120891094208, "learning_rate": 8.869482375357256e-05, "loss": 0.1341, "step": 1785 }, { "epoch": 3.3970518307180217, "grad_norm": 0.36739760637283325, "learning_rate": 8.868847253096222e-05, "loss": 0.135, "step": 1786 }, { "epoch": 3.398953875416072, "grad_norm": 0.36785241961479187, "learning_rate": 8.868212130835186e-05, "loss": 0.1402, "step": 1787 }, { "epoch": 3.4008559201141226, "grad_norm": 0.3834420442581177, "learning_rate": 8.86757700857415e-05, "loss": 0.132, "step": 1788 }, { "epoch": 3.402757964812173, "grad_norm": 0.40532076358795166, "learning_rate": 8.866941886313117e-05, "loss": 0.1491, "step": 1789 }, { "epoch": 3.4046600095102235, "grad_norm": 0.3840698003768921, "learning_rate": 8.86630676405208e-05, "loss": 0.1238, "step": 1790 }, { "epoch": 3.406562054208274, "grad_norm": 0.3948921859264374, "learning_rate": 8.865671641791046e-05, "loss": 0.1452, "step": 1791 }, { "epoch": 3.4084640989063244, "grad_norm": 0.30841973423957825, "learning_rate": 8.86503651953001e-05, "loss": 0.1152, "step": 1792 }, { "epoch": 3.410366143604375, "grad_norm": 0.3028883635997772, "learning_rate": 8.864401397268975e-05, "loss": 0.103, "step": 1793 }, { "epoch": 3.4122681883024253, "grad_norm": 0.3348149359226227, "learning_rate": 8.86376627500794e-05, "loss": 0.124, "step": 1794 }, { "epoch": 3.4141702330004753, "grad_norm": 0.397709459066391, "learning_rate": 8.863131152746904e-05, "loss": 0.1489, "step": 1795 }, { "epoch": 3.4160722776985257, "grad_norm": 0.33986514806747437, "learning_rate": 8.862496030485869e-05, "loss": 0.1243, "step": 1796 }, { "epoch": 3.417974322396576, "grad_norm": 0.3443019688129425, "learning_rate": 8.861860908224834e-05, "loss": 0.1206, "step": 1797 }, { "epoch": 3.4198763670946266, "grad_norm": 0.2696784734725952, "learning_rate": 8.861225785963798e-05, "loss": 0.0978, "step": 1798 }, { "epoch": 3.421778411792677, "grad_norm": 0.3711314797401428, "learning_rate": 8.860590663702763e-05, "loss": 0.1416, "step": 1799 }, { "epoch": 3.4236804564907275, "grad_norm": 0.4727902114391327, "learning_rate": 8.859955541441728e-05, "loss": 0.1749, "step": 1800 }, { "epoch": 3.425582501188778, "grad_norm": 0.39370161294937134, "learning_rate": 8.859320419180692e-05, "loss": 0.1516, "step": 1801 }, { "epoch": 3.4274845458868284, "grad_norm": 0.36975982785224915, "learning_rate": 8.858685296919657e-05, "loss": 0.1185, "step": 1802 }, { "epoch": 3.429386590584879, "grad_norm": 0.30827558040618896, "learning_rate": 8.858050174658622e-05, "loss": 0.1292, "step": 1803 }, { "epoch": 3.4312886352829293, "grad_norm": 0.3955543339252472, "learning_rate": 8.857415052397588e-05, "loss": 0.1484, "step": 1804 }, { "epoch": 3.4331906799809797, "grad_norm": 0.35280320048332214, "learning_rate": 8.856779930136551e-05, "loss": 0.1241, "step": 1805 }, { "epoch": 3.4350927246790297, "grad_norm": 0.4241807460784912, "learning_rate": 8.856144807875517e-05, "loss": 0.1663, "step": 1806 }, { "epoch": 3.4369947693770806, "grad_norm": 0.41491755843162537, "learning_rate": 8.855509685614482e-05, "loss": 0.1465, "step": 1807 }, { "epoch": 3.4388968140751306, "grad_norm": 0.3022492229938507, "learning_rate": 8.854874563353446e-05, "loss": 0.1132, "step": 1808 }, { "epoch": 3.440798858773181, "grad_norm": 0.3701956570148468, "learning_rate": 8.854239441092411e-05, "loss": 0.1525, "step": 1809 }, { "epoch": 3.4427009034712315, "grad_norm": 0.3692464232444763, "learning_rate": 8.853604318831376e-05, "loss": 0.1364, "step": 1810 }, { "epoch": 3.444602948169282, "grad_norm": 0.2783905267715454, "learning_rate": 8.85296919657034e-05, "loss": 0.1112, "step": 1811 }, { "epoch": 3.4465049928673324, "grad_norm": 0.26422539353370667, "learning_rate": 8.852334074309305e-05, "loss": 0.0871, "step": 1812 }, { "epoch": 3.448407037565383, "grad_norm": 0.3428441882133484, "learning_rate": 8.85169895204827e-05, "loss": 0.1397, "step": 1813 }, { "epoch": 3.4503090822634332, "grad_norm": 0.43042463064193726, "learning_rate": 8.851063829787234e-05, "loss": 0.1524, "step": 1814 }, { "epoch": 3.4522111269614837, "grad_norm": 0.4124317765235901, "learning_rate": 8.850428707526199e-05, "loss": 0.165, "step": 1815 }, { "epoch": 3.454113171659534, "grad_norm": 0.38967373967170715, "learning_rate": 8.849793585265164e-05, "loss": 0.129, "step": 1816 }, { "epoch": 3.4560152163575846, "grad_norm": 0.3426058292388916, "learning_rate": 8.84915846300413e-05, "loss": 0.1229, "step": 1817 }, { "epoch": 3.457917261055635, "grad_norm": 0.4571113884449005, "learning_rate": 8.848523340743093e-05, "loss": 0.1428, "step": 1818 }, { "epoch": 3.459819305753685, "grad_norm": 0.43344834446907043, "learning_rate": 8.847888218482057e-05, "loss": 0.1561, "step": 1819 }, { "epoch": 3.4617213504517355, "grad_norm": 0.36749354004859924, "learning_rate": 8.847253096221024e-05, "loss": 0.1313, "step": 1820 }, { "epoch": 3.463623395149786, "grad_norm": 0.36647292971611023, "learning_rate": 8.846617973959988e-05, "loss": 0.1278, "step": 1821 }, { "epoch": 3.4655254398478363, "grad_norm": 0.3204960525035858, "learning_rate": 8.845982851698953e-05, "loss": 0.11, "step": 1822 }, { "epoch": 3.467427484545887, "grad_norm": 0.366187185049057, "learning_rate": 8.845347729437918e-05, "loss": 0.1443, "step": 1823 }, { "epoch": 3.4693295292439372, "grad_norm": 0.4711836874485016, "learning_rate": 8.844712607176882e-05, "loss": 0.151, "step": 1824 }, { "epoch": 3.4712315739419877, "grad_norm": 0.35596373677253723, "learning_rate": 8.844077484915847e-05, "loss": 0.1246, "step": 1825 }, { "epoch": 3.473133618640038, "grad_norm": 0.41798681020736694, "learning_rate": 8.843442362654811e-05, "loss": 0.1575, "step": 1826 }, { "epoch": 3.4750356633380886, "grad_norm": 0.3631289303302765, "learning_rate": 8.842807240393777e-05, "loss": 0.1105, "step": 1827 }, { "epoch": 3.476937708036139, "grad_norm": 0.36891433596611023, "learning_rate": 8.842172118132741e-05, "loss": 0.146, "step": 1828 }, { "epoch": 3.4788397527341894, "grad_norm": 0.33271533250808716, "learning_rate": 8.841536995871705e-05, "loss": 0.1246, "step": 1829 }, { "epoch": 3.4807417974322394, "grad_norm": 0.2956920266151428, "learning_rate": 8.840901873610671e-05, "loss": 0.1181, "step": 1830 }, { "epoch": 3.48264384213029, "grad_norm": 0.3685608506202698, "learning_rate": 8.840266751349635e-05, "loss": 0.1338, "step": 1831 }, { "epoch": 3.4845458868283403, "grad_norm": 0.35031598806381226, "learning_rate": 8.839631629088599e-05, "loss": 0.1166, "step": 1832 }, { "epoch": 3.4864479315263908, "grad_norm": 0.5173628330230713, "learning_rate": 8.838996506827564e-05, "loss": 0.157, "step": 1833 }, { "epoch": 3.488349976224441, "grad_norm": 0.4643428921699524, "learning_rate": 8.83836138456653e-05, "loss": 0.1842, "step": 1834 }, { "epoch": 3.4902520209224916, "grad_norm": 0.3688521981239319, "learning_rate": 8.837726262305495e-05, "loss": 0.1375, "step": 1835 }, { "epoch": 3.492154065620542, "grad_norm": 0.3947365880012512, "learning_rate": 8.837091140044458e-05, "loss": 0.149, "step": 1836 }, { "epoch": 3.4940561103185925, "grad_norm": 0.35394486784935, "learning_rate": 8.836456017783424e-05, "loss": 0.1252, "step": 1837 }, { "epoch": 3.495958155016643, "grad_norm": 0.37168943881988525, "learning_rate": 8.835820895522389e-05, "loss": 0.1318, "step": 1838 }, { "epoch": 3.4978601997146934, "grad_norm": 0.37239521741867065, "learning_rate": 8.835185773261353e-05, "loss": 0.1214, "step": 1839 }, { "epoch": 3.499762244412744, "grad_norm": 0.36515411734580994, "learning_rate": 8.834550651000318e-05, "loss": 0.1412, "step": 1840 }, { "epoch": 3.501664289110794, "grad_norm": 0.38534054160118103, "learning_rate": 8.833915528739283e-05, "loss": 0.1334, "step": 1841 }, { "epoch": 3.5035663338088447, "grad_norm": 0.36949092149734497, "learning_rate": 8.833280406478247e-05, "loss": 0.1283, "step": 1842 }, { "epoch": 3.5054683785068947, "grad_norm": 0.39546898007392883, "learning_rate": 8.832645284217212e-05, "loss": 0.1471, "step": 1843 }, { "epoch": 3.507370423204945, "grad_norm": 0.34906435012817383, "learning_rate": 8.832010161956177e-05, "loss": 0.1386, "step": 1844 }, { "epoch": 3.5092724679029956, "grad_norm": 0.44590094685554504, "learning_rate": 8.831375039695142e-05, "loss": 0.157, "step": 1845 }, { "epoch": 3.511174512601046, "grad_norm": 0.3336107134819031, "learning_rate": 8.830739917434106e-05, "loss": 0.1435, "step": 1846 }, { "epoch": 3.5130765572990965, "grad_norm": 0.4013485610485077, "learning_rate": 8.830104795173071e-05, "loss": 0.1209, "step": 1847 }, { "epoch": 3.514978601997147, "grad_norm": 0.30285441875457764, "learning_rate": 8.829469672912037e-05, "loss": 0.108, "step": 1848 }, { "epoch": 3.5168806466951974, "grad_norm": 0.440489798784256, "learning_rate": 8.828834550651e-05, "loss": 0.1514, "step": 1849 }, { "epoch": 3.518782691393248, "grad_norm": 0.26309430599212646, "learning_rate": 8.828199428389964e-05, "loss": 0.0953, "step": 1850 }, { "epoch": 3.5206847360912983, "grad_norm": 0.548433244228363, "learning_rate": 8.827564306128931e-05, "loss": 0.1977, "step": 1851 }, { "epoch": 3.5225867807893483, "grad_norm": 0.4941021203994751, "learning_rate": 8.826929183867895e-05, "loss": 0.1268, "step": 1852 }, { "epoch": 3.524488825487399, "grad_norm": 0.3945002555847168, "learning_rate": 8.82629406160686e-05, "loss": 0.1304, "step": 1853 }, { "epoch": 3.526390870185449, "grad_norm": 0.3647942841053009, "learning_rate": 8.825658939345825e-05, "loss": 0.1454, "step": 1854 }, { "epoch": 3.5282929148834996, "grad_norm": 0.3890063762664795, "learning_rate": 8.825023817084789e-05, "loss": 0.1384, "step": 1855 }, { "epoch": 3.53019495958155, "grad_norm": 0.4001372456550598, "learning_rate": 8.824388694823754e-05, "loss": 0.1429, "step": 1856 }, { "epoch": 3.5320970042796005, "grad_norm": 0.407721608877182, "learning_rate": 8.823753572562718e-05, "loss": 0.1374, "step": 1857 }, { "epoch": 3.533999048977651, "grad_norm": 0.37832140922546387, "learning_rate": 8.823118450301684e-05, "loss": 0.1236, "step": 1858 }, { "epoch": 3.5359010936757014, "grad_norm": 0.35406047105789185, "learning_rate": 8.822483328040648e-05, "loss": 0.1306, "step": 1859 }, { "epoch": 3.537803138373752, "grad_norm": 0.2923578917980194, "learning_rate": 8.821848205779612e-05, "loss": 0.0986, "step": 1860 }, { "epoch": 3.5397051830718023, "grad_norm": 0.3824620544910431, "learning_rate": 8.821213083518579e-05, "loss": 0.1492, "step": 1861 }, { "epoch": 3.5416072277698527, "grad_norm": 0.38851413130760193, "learning_rate": 8.820577961257542e-05, "loss": 0.1612, "step": 1862 }, { "epoch": 3.543509272467903, "grad_norm": 0.3961692154407501, "learning_rate": 8.819942838996508e-05, "loss": 0.1525, "step": 1863 }, { "epoch": 3.5454113171659536, "grad_norm": 0.423235684633255, "learning_rate": 8.819307716735471e-05, "loss": 0.1514, "step": 1864 }, { "epoch": 3.5473133618640036, "grad_norm": 0.3355453610420227, "learning_rate": 8.818672594474437e-05, "loss": 0.1183, "step": 1865 }, { "epoch": 3.5492154065620545, "grad_norm": 0.44291865825653076, "learning_rate": 8.818037472213402e-05, "loss": 0.1457, "step": 1866 }, { "epoch": 3.5511174512601045, "grad_norm": 0.39356529712677, "learning_rate": 8.817402349952366e-05, "loss": 0.146, "step": 1867 }, { "epoch": 3.553019495958155, "grad_norm": 0.28863412141799927, "learning_rate": 8.816767227691331e-05, "loss": 0.1113, "step": 1868 }, { "epoch": 3.5549215406562054, "grad_norm": 0.3859669268131256, "learning_rate": 8.816132105430296e-05, "loss": 0.1234, "step": 1869 }, { "epoch": 3.556823585354256, "grad_norm": 0.3483799993991852, "learning_rate": 8.81549698316926e-05, "loss": 0.1324, "step": 1870 }, { "epoch": 3.5587256300523062, "grad_norm": 0.3053433299064636, "learning_rate": 8.814861860908225e-05, "loss": 0.1252, "step": 1871 }, { "epoch": 3.5606276747503567, "grad_norm": 0.44125038385391235, "learning_rate": 8.81422673864719e-05, "loss": 0.1627, "step": 1872 }, { "epoch": 3.562529719448407, "grad_norm": 0.35409316420555115, "learning_rate": 8.813591616386154e-05, "loss": 0.1312, "step": 1873 }, { "epoch": 3.5644317641464576, "grad_norm": 0.4219510853290558, "learning_rate": 8.812956494125119e-05, "loss": 0.1522, "step": 1874 }, { "epoch": 3.566333808844508, "grad_norm": 0.4153057932853699, "learning_rate": 8.812321371864084e-05, "loss": 0.1272, "step": 1875 }, { "epoch": 3.568235853542558, "grad_norm": 0.3225264549255371, "learning_rate": 8.81168624960305e-05, "loss": 0.1461, "step": 1876 }, { "epoch": 3.570137898240609, "grad_norm": 0.41065141558647156, "learning_rate": 8.811051127342013e-05, "loss": 0.1466, "step": 1877 }, { "epoch": 3.572039942938659, "grad_norm": 0.33854374289512634, "learning_rate": 8.810416005080979e-05, "loss": 0.2636, "step": 1878 }, { "epoch": 3.5739419876367093, "grad_norm": 0.4266054034233093, "learning_rate": 8.809780882819944e-05, "loss": 0.1546, "step": 1879 }, { "epoch": 3.57584403233476, "grad_norm": 0.32462188601493835, "learning_rate": 8.809145760558908e-05, "loss": 0.0992, "step": 1880 }, { "epoch": 3.5777460770328102, "grad_norm": 0.3243044912815094, "learning_rate": 8.808510638297873e-05, "loss": 0.127, "step": 1881 }, { "epoch": 3.5796481217308607, "grad_norm": 0.36742255091667175, "learning_rate": 8.807875516036838e-05, "loss": 0.1648, "step": 1882 }, { "epoch": 3.581550166428911, "grad_norm": 0.47478726506233215, "learning_rate": 8.807240393775802e-05, "loss": 0.1402, "step": 1883 }, { "epoch": 3.5834522111269616, "grad_norm": 0.29675087332725525, "learning_rate": 8.806605271514767e-05, "loss": 0.1102, "step": 1884 }, { "epoch": 3.585354255825012, "grad_norm": 0.26269370317459106, "learning_rate": 8.805970149253732e-05, "loss": 0.0926, "step": 1885 }, { "epoch": 3.5872563005230624, "grad_norm": 0.42690059542655945, "learning_rate": 8.805335026992696e-05, "loss": 0.1663, "step": 1886 }, { "epoch": 3.5891583452211124, "grad_norm": 0.4843170940876007, "learning_rate": 8.804699904731661e-05, "loss": 0.156, "step": 1887 }, { "epoch": 3.5910603899191633, "grad_norm": 0.4166446030139923, "learning_rate": 8.804064782470626e-05, "loss": 0.1556, "step": 1888 }, { "epoch": 3.5929624346172133, "grad_norm": 0.3265363872051239, "learning_rate": 8.803429660209592e-05, "loss": 0.122, "step": 1889 }, { "epoch": 3.5948644793152638, "grad_norm": 0.4674152433872223, "learning_rate": 8.802794537948555e-05, "loss": 0.1706, "step": 1890 }, { "epoch": 3.596766524013314, "grad_norm": 0.4072030782699585, "learning_rate": 8.802159415687519e-05, "loss": 0.1465, "step": 1891 }, { "epoch": 3.5986685687113646, "grad_norm": 0.4924727976322174, "learning_rate": 8.801524293426486e-05, "loss": 0.153, "step": 1892 }, { "epoch": 3.600570613409415, "grad_norm": 0.34262821078300476, "learning_rate": 8.80088917116545e-05, "loss": 0.1221, "step": 1893 }, { "epoch": 3.6024726581074655, "grad_norm": 0.3641190528869629, "learning_rate": 8.800254048904415e-05, "loss": 0.1146, "step": 1894 }, { "epoch": 3.604374702805516, "grad_norm": 0.3594358265399933, "learning_rate": 8.799618926643379e-05, "loss": 0.1198, "step": 1895 }, { "epoch": 3.6062767475035664, "grad_norm": 0.40045297145843506, "learning_rate": 8.798983804382344e-05, "loss": 0.2122, "step": 1896 }, { "epoch": 3.608178792201617, "grad_norm": 0.40417537093162537, "learning_rate": 8.798348682121309e-05, "loss": 0.1523, "step": 1897 }, { "epoch": 3.6100808368996673, "grad_norm": 0.3493559658527374, "learning_rate": 8.797713559860273e-05, "loss": 0.1105, "step": 1898 }, { "epoch": 3.6119828815977177, "grad_norm": 0.3540056645870209, "learning_rate": 8.79707843759924e-05, "loss": 0.1205, "step": 1899 }, { "epoch": 3.6138849262957677, "grad_norm": 0.4836410582065582, "learning_rate": 8.796443315338203e-05, "loss": 0.184, "step": 1900 }, { "epoch": 3.6157869709938186, "grad_norm": 0.34036317467689514, "learning_rate": 8.795808193077167e-05, "loss": 0.1313, "step": 1901 }, { "epoch": 3.6176890156918686, "grad_norm": 0.34924453496932983, "learning_rate": 8.795173070816132e-05, "loss": 0.1018, "step": 1902 }, { "epoch": 3.619591060389919, "grad_norm": 0.4308503270149231, "learning_rate": 8.794537948555097e-05, "loss": 0.1396, "step": 1903 }, { "epoch": 3.6214931050879695, "grad_norm": 0.44268596172332764, "learning_rate": 8.793902826294061e-05, "loss": 0.1377, "step": 1904 }, { "epoch": 3.62339514978602, "grad_norm": 0.36984702944755554, "learning_rate": 8.793267704033026e-05, "loss": 0.1343, "step": 1905 }, { "epoch": 3.6252971944840704, "grad_norm": 0.3913877606391907, "learning_rate": 8.792632581771992e-05, "loss": 0.1443, "step": 1906 }, { "epoch": 3.627199239182121, "grad_norm": 0.4213595986366272, "learning_rate": 8.791997459510957e-05, "loss": 0.1537, "step": 1907 }, { "epoch": 3.6291012838801713, "grad_norm": 0.4095703959465027, "learning_rate": 8.79136233724992e-05, "loss": 0.151, "step": 1908 }, { "epoch": 3.6310033285782217, "grad_norm": 0.366328626871109, "learning_rate": 8.790727214988886e-05, "loss": 0.1198, "step": 1909 }, { "epoch": 3.632905373276272, "grad_norm": 0.4124557375907898, "learning_rate": 8.790092092727851e-05, "loss": 0.1408, "step": 1910 }, { "epoch": 3.634807417974322, "grad_norm": 0.36249884963035583, "learning_rate": 8.789456970466815e-05, "loss": 0.2058, "step": 1911 }, { "epoch": 3.636709462672373, "grad_norm": 0.40580618381500244, "learning_rate": 8.78882184820578e-05, "loss": 0.1247, "step": 1912 }, { "epoch": 3.638611507370423, "grad_norm": 0.30640462040901184, "learning_rate": 8.788186725944745e-05, "loss": 0.1078, "step": 1913 }, { "epoch": 3.6405135520684735, "grad_norm": 0.4200808107852936, "learning_rate": 8.787551603683709e-05, "loss": 0.1572, "step": 1914 }, { "epoch": 3.642415596766524, "grad_norm": 0.43338900804519653, "learning_rate": 8.786916481422674e-05, "loss": 0.1606, "step": 1915 }, { "epoch": 3.6443176414645744, "grad_norm": 0.4340536296367645, "learning_rate": 8.78628135916164e-05, "loss": 0.1711, "step": 1916 }, { "epoch": 3.646219686162625, "grad_norm": 0.3239591419696808, "learning_rate": 8.785646236900605e-05, "loss": 0.1166, "step": 1917 }, { "epoch": 3.6481217308606753, "grad_norm": 0.3957262933254242, "learning_rate": 8.785011114639568e-05, "loss": 0.1605, "step": 1918 }, { "epoch": 3.6500237755587257, "grad_norm": 0.4386723041534424, "learning_rate": 8.784375992378534e-05, "loss": 0.1595, "step": 1919 }, { "epoch": 3.651925820256776, "grad_norm": 0.376113623380661, "learning_rate": 8.783740870117499e-05, "loss": 0.1708, "step": 1920 }, { "epoch": 3.6538278649548266, "grad_norm": 0.2861535847187042, "learning_rate": 8.783105747856463e-05, "loss": 0.1134, "step": 1921 }, { "epoch": 3.6557299096528766, "grad_norm": 0.3381497263908386, "learning_rate": 8.782470625595426e-05, "loss": 0.1522, "step": 1922 }, { "epoch": 3.6576319543509275, "grad_norm": 0.2682400047779083, "learning_rate": 8.781835503334393e-05, "loss": 0.1007, "step": 1923 }, { "epoch": 3.6595339990489775, "grad_norm": 0.4277699887752533, "learning_rate": 8.781200381073357e-05, "loss": 0.1757, "step": 1924 }, { "epoch": 3.661436043747028, "grad_norm": 0.3176470696926117, "learning_rate": 8.780565258812322e-05, "loss": 0.1186, "step": 1925 }, { "epoch": 3.6633380884450784, "grad_norm": 0.32315725088119507, "learning_rate": 8.779930136551287e-05, "loss": 0.1353, "step": 1926 }, { "epoch": 3.665240133143129, "grad_norm": 0.44492077827453613, "learning_rate": 8.779295014290251e-05, "loss": 0.1689, "step": 1927 }, { "epoch": 3.6671421778411792, "grad_norm": 0.33450883626937866, "learning_rate": 8.778659892029216e-05, "loss": 0.1171, "step": 1928 }, { "epoch": 3.6690442225392297, "grad_norm": 0.45678386092185974, "learning_rate": 8.77802476976818e-05, "loss": 0.1547, "step": 1929 }, { "epoch": 3.67094626723728, "grad_norm": 0.3756123185157776, "learning_rate": 8.777389647507147e-05, "loss": 0.1441, "step": 1930 }, { "epoch": 3.6728483119353306, "grad_norm": 0.30440792441368103, "learning_rate": 8.77675452524611e-05, "loss": 0.1034, "step": 1931 }, { "epoch": 3.674750356633381, "grad_norm": 0.38540956377983093, "learning_rate": 8.776119402985074e-05, "loss": 0.1456, "step": 1932 }, { "epoch": 3.6766524013314315, "grad_norm": 0.42409566044807434, "learning_rate": 8.775484280724041e-05, "loss": 0.1445, "step": 1933 }, { "epoch": 3.678554446029482, "grad_norm": 0.3903610408306122, "learning_rate": 8.774849158463005e-05, "loss": 0.1428, "step": 1934 }, { "epoch": 3.680456490727532, "grad_norm": 0.4002249836921692, "learning_rate": 8.77421403620197e-05, "loss": 0.1328, "step": 1935 }, { "epoch": 3.6823585354255823, "grad_norm": 0.37625521421432495, "learning_rate": 8.773578913940934e-05, "loss": 0.1271, "step": 1936 }, { "epoch": 3.6842605801236328, "grad_norm": 0.333882600069046, "learning_rate": 8.772943791679899e-05, "loss": 0.1209, "step": 1937 }, { "epoch": 3.686162624821683, "grad_norm": 0.3934018313884735, "learning_rate": 8.772308669418864e-05, "loss": 0.1383, "step": 1938 }, { "epoch": 3.6880646695197337, "grad_norm": 0.3329316973686218, "learning_rate": 8.771673547157828e-05, "loss": 0.1334, "step": 1939 }, { "epoch": 3.689966714217784, "grad_norm": 0.3686552047729492, "learning_rate": 8.771038424896793e-05, "loss": 0.1163, "step": 1940 }, { "epoch": 3.6918687589158345, "grad_norm": 0.35531577467918396, "learning_rate": 8.770403302635758e-05, "loss": 0.114, "step": 1941 }, { "epoch": 3.693770803613885, "grad_norm": 0.4164102375507355, "learning_rate": 8.769768180374722e-05, "loss": 0.1271, "step": 1942 }, { "epoch": 3.6956728483119354, "grad_norm": 0.4182850420475006, "learning_rate": 8.769133058113687e-05, "loss": 0.1343, "step": 1943 }, { "epoch": 3.697574893009986, "grad_norm": 0.3373199701309204, "learning_rate": 8.768497935852652e-05, "loss": 0.1424, "step": 1944 }, { "epoch": 3.6994769377080363, "grad_norm": 0.44398215413093567, "learning_rate": 8.767862813591616e-05, "loss": 0.1626, "step": 1945 }, { "epoch": 3.7013789824060863, "grad_norm": 0.2877051830291748, "learning_rate": 8.767227691330581e-05, "loss": 0.0941, "step": 1946 }, { "epoch": 3.703281027104137, "grad_norm": 0.30384746193885803, "learning_rate": 8.766592569069547e-05, "loss": 0.1239, "step": 1947 }, { "epoch": 3.705183071802187, "grad_norm": 0.41360363364219666, "learning_rate": 8.765957446808512e-05, "loss": 0.1567, "step": 1948 }, { "epoch": 3.7070851165002376, "grad_norm": 0.28865674138069153, "learning_rate": 8.765322324547476e-05, "loss": 0.1165, "step": 1949 }, { "epoch": 3.708987161198288, "grad_norm": 0.341654509305954, "learning_rate": 8.764687202286441e-05, "loss": 0.1199, "step": 1950 }, { "epoch": 3.7108892058963385, "grad_norm": 0.33211663365364075, "learning_rate": 8.764052080025406e-05, "loss": 0.1386, "step": 1951 }, { "epoch": 3.712791250594389, "grad_norm": 0.37999534606933594, "learning_rate": 8.76341695776437e-05, "loss": 0.1411, "step": 1952 }, { "epoch": 3.7146932952924394, "grad_norm": 0.3158533573150635, "learning_rate": 8.762781835503335e-05, "loss": 0.1082, "step": 1953 }, { "epoch": 3.71659533999049, "grad_norm": 0.42071765661239624, "learning_rate": 8.7621467132423e-05, "loss": 0.2395, "step": 1954 }, { "epoch": 3.7184973846885403, "grad_norm": 0.3723015785217285, "learning_rate": 8.761511590981264e-05, "loss": 0.1427, "step": 1955 }, { "epoch": 3.7203994293865907, "grad_norm": 0.31827929615974426, "learning_rate": 8.760876468720229e-05, "loss": 0.0983, "step": 1956 }, { "epoch": 3.7223014740846407, "grad_norm": 0.45022010803222656, "learning_rate": 8.760241346459194e-05, "loss": 0.1658, "step": 1957 }, { "epoch": 3.7242035187826916, "grad_norm": 0.4069976508617401, "learning_rate": 8.759606224198158e-05, "loss": 0.1277, "step": 1958 }, { "epoch": 3.7261055634807416, "grad_norm": 0.3239624500274658, "learning_rate": 8.758971101937123e-05, "loss": 0.1204, "step": 1959 }, { "epoch": 3.728007608178792, "grad_norm": 0.38038089871406555, "learning_rate": 8.758335979676087e-05, "loss": 0.1305, "step": 1960 }, { "epoch": 3.7299096528768425, "grad_norm": 0.44531160593032837, "learning_rate": 8.757700857415054e-05, "loss": 0.1504, "step": 1961 }, { "epoch": 3.731811697574893, "grad_norm": 0.380256712436676, "learning_rate": 8.757065735154017e-05, "loss": 0.1213, "step": 1962 }, { "epoch": 3.7337137422729434, "grad_norm": 0.39982911944389343, "learning_rate": 8.756430612892981e-05, "loss": 0.1255, "step": 1963 }, { "epoch": 3.735615786970994, "grad_norm": 0.39186495542526245, "learning_rate": 8.755795490631948e-05, "loss": 0.1459, "step": 1964 }, { "epoch": 3.7375178316690443, "grad_norm": 0.4191820025444031, "learning_rate": 8.755160368370912e-05, "loss": 0.1269, "step": 1965 }, { "epoch": 3.7394198763670947, "grad_norm": 0.3438499867916107, "learning_rate": 8.754525246109877e-05, "loss": 0.124, "step": 1966 }, { "epoch": 3.741321921065145, "grad_norm": 0.3626823127269745, "learning_rate": 8.753890123848841e-05, "loss": 0.1326, "step": 1967 }, { "epoch": 3.743223965763195, "grad_norm": 0.3823707103729248, "learning_rate": 8.753255001587806e-05, "loss": 0.1351, "step": 1968 }, { "epoch": 3.745126010461246, "grad_norm": 0.3537774980068207, "learning_rate": 8.752619879326771e-05, "loss": 0.1079, "step": 1969 }, { "epoch": 3.747028055159296, "grad_norm": 0.4008922576904297, "learning_rate": 8.751984757065735e-05, "loss": 0.1752, "step": 1970 }, { "epoch": 3.7489300998573465, "grad_norm": 0.3501138687133789, "learning_rate": 8.751349634804701e-05, "loss": 0.1296, "step": 1971 }, { "epoch": 3.750832144555397, "grad_norm": 0.3441070318222046, "learning_rate": 8.750714512543665e-05, "loss": 0.1161, "step": 1972 }, { "epoch": 3.7527341892534474, "grad_norm": 0.42847099900245667, "learning_rate": 8.750079390282629e-05, "loss": 0.1483, "step": 1973 }, { "epoch": 3.754636233951498, "grad_norm": 0.4879817068576813, "learning_rate": 8.749444268021594e-05, "loss": 0.1725, "step": 1974 }, { "epoch": 3.7565382786495483, "grad_norm": 0.32576873898506165, "learning_rate": 8.74880914576056e-05, "loss": 0.1211, "step": 1975 }, { "epoch": 3.7584403233475987, "grad_norm": 0.4470548927783966, "learning_rate": 8.748174023499523e-05, "loss": 0.155, "step": 1976 }, { "epoch": 3.760342368045649, "grad_norm": 0.506020724773407, "learning_rate": 8.747538901238488e-05, "loss": 0.1924, "step": 1977 }, { "epoch": 3.7622444127436996, "grad_norm": 0.3949258625507355, "learning_rate": 8.746903778977454e-05, "loss": 0.1365, "step": 1978 }, { "epoch": 3.76414645744175, "grad_norm": 0.381511390209198, "learning_rate": 8.746268656716419e-05, "loss": 0.1706, "step": 1979 }, { "epoch": 3.7660485021398005, "grad_norm": 0.32848381996154785, "learning_rate": 8.745633534455383e-05, "loss": 0.1302, "step": 1980 }, { "epoch": 3.7679505468378505, "grad_norm": 0.39011678099632263, "learning_rate": 8.744998412194348e-05, "loss": 0.1501, "step": 1981 }, { "epoch": 3.7698525915359014, "grad_norm": 0.35527095198631287, "learning_rate": 8.744363289933313e-05, "loss": 0.1218, "step": 1982 }, { "epoch": 3.7717546362339514, "grad_norm": 0.4448065459728241, "learning_rate": 8.743728167672277e-05, "loss": 0.1527, "step": 1983 }, { "epoch": 3.773656680932002, "grad_norm": 0.45173025131225586, "learning_rate": 8.743093045411242e-05, "loss": 0.1546, "step": 1984 }, { "epoch": 3.7755587256300522, "grad_norm": 0.3051410913467407, "learning_rate": 8.742457923150207e-05, "loss": 0.1176, "step": 1985 }, { "epoch": 3.7774607703281027, "grad_norm": 0.4559077322483063, "learning_rate": 8.741822800889171e-05, "loss": 0.1466, "step": 1986 }, { "epoch": 3.779362815026153, "grad_norm": 0.33901482820510864, "learning_rate": 8.741187678628136e-05, "loss": 0.1263, "step": 1987 }, { "epoch": 3.7812648597242036, "grad_norm": 0.3377963900566101, "learning_rate": 8.740552556367101e-05, "loss": 0.1029, "step": 1988 }, { "epoch": 3.783166904422254, "grad_norm": 0.3285292088985443, "learning_rate": 8.739917434106067e-05, "loss": 0.1256, "step": 1989 }, { "epoch": 3.7850689491203044, "grad_norm": 0.4042280614376068, "learning_rate": 8.73928231184503e-05, "loss": 0.1554, "step": 1990 }, { "epoch": 3.786970993818355, "grad_norm": 0.374153733253479, "learning_rate": 8.738647189583996e-05, "loss": 0.1109, "step": 1991 }, { "epoch": 3.788873038516405, "grad_norm": 0.3667593002319336, "learning_rate": 8.738012067322961e-05, "loss": 0.1014, "step": 1992 }, { "epoch": 3.7907750832144558, "grad_norm": 0.40893805027008057, "learning_rate": 8.737376945061925e-05, "loss": 0.137, "step": 1993 }, { "epoch": 3.7926771279125058, "grad_norm": 0.4428877830505371, "learning_rate": 8.736741822800888e-05, "loss": 0.1516, "step": 1994 }, { "epoch": 3.794579172610556, "grad_norm": 0.4404061734676361, "learning_rate": 8.736106700539855e-05, "loss": 0.155, "step": 1995 }, { "epoch": 3.7964812173086067, "grad_norm": 0.3298742473125458, "learning_rate": 8.735471578278819e-05, "loss": 0.1244, "step": 1996 }, { "epoch": 3.798383262006657, "grad_norm": 0.36190545558929443, "learning_rate": 8.734836456017784e-05, "loss": 0.148, "step": 1997 }, { "epoch": 3.8002853067047075, "grad_norm": 0.34386786818504333, "learning_rate": 8.734201333756749e-05, "loss": 0.1479, "step": 1998 }, { "epoch": 3.802187351402758, "grad_norm": 0.434257835149765, "learning_rate": 8.733566211495713e-05, "loss": 0.1624, "step": 1999 }, { "epoch": 3.8040893961008084, "grad_norm": 0.369232177734375, "learning_rate": 8.732931089234678e-05, "loss": 0.1297, "step": 2000 }, { "epoch": 3.805991440798859, "grad_norm": 0.31438469886779785, "learning_rate": 8.732295966973642e-05, "loss": 0.1074, "step": 2001 }, { "epoch": 3.8078934854969093, "grad_norm": 0.4128814935684204, "learning_rate": 8.731660844712609e-05, "loss": 0.1489, "step": 2002 }, { "epoch": 3.8097955301949593, "grad_norm": 0.2960624694824219, "learning_rate": 8.731025722451572e-05, "loss": 0.1063, "step": 2003 }, { "epoch": 3.81169757489301, "grad_norm": 0.35740041732788086, "learning_rate": 8.730390600190536e-05, "loss": 0.1438, "step": 2004 }, { "epoch": 3.81359961959106, "grad_norm": 0.3402657210826874, "learning_rate": 8.729755477929501e-05, "loss": 0.151, "step": 2005 }, { "epoch": 3.8155016642891106, "grad_norm": 0.3280869722366333, "learning_rate": 8.729120355668467e-05, "loss": 0.112, "step": 2006 }, { "epoch": 3.817403708987161, "grad_norm": 0.3747129440307617, "learning_rate": 8.728485233407432e-05, "loss": 0.1191, "step": 2007 }, { "epoch": 3.8193057536852115, "grad_norm": 0.3609796464443207, "learning_rate": 8.727850111146396e-05, "loss": 0.1373, "step": 2008 }, { "epoch": 3.821207798383262, "grad_norm": 0.38992708921432495, "learning_rate": 8.727214988885361e-05, "loss": 0.1474, "step": 2009 }, { "epoch": 3.8231098430813124, "grad_norm": 0.3531118929386139, "learning_rate": 8.726579866624326e-05, "loss": 0.1188, "step": 2010 }, { "epoch": 3.825011887779363, "grad_norm": 0.30585137009620667, "learning_rate": 8.72594474436329e-05, "loss": 0.1072, "step": 2011 }, { "epoch": 3.8269139324774133, "grad_norm": 0.40438538789749146, "learning_rate": 8.725309622102255e-05, "loss": 0.1527, "step": 2012 }, { "epoch": 3.8288159771754637, "grad_norm": 0.31290772557258606, "learning_rate": 8.72467449984122e-05, "loss": 0.1251, "step": 2013 }, { "epoch": 3.830718021873514, "grad_norm": 0.389160692691803, "learning_rate": 8.724039377580184e-05, "loss": 0.1387, "step": 2014 }, { "epoch": 3.8326200665715646, "grad_norm": 0.34139397740364075, "learning_rate": 8.723404255319149e-05, "loss": 0.1205, "step": 2015 }, { "epoch": 3.8345221112696146, "grad_norm": 0.4144088923931122, "learning_rate": 8.722769133058114e-05, "loss": 0.1493, "step": 2016 }, { "epoch": 3.8364241559676655, "grad_norm": 0.3793914318084717, "learning_rate": 8.722134010797078e-05, "loss": 0.1379, "step": 2017 }, { "epoch": 3.8383262006657155, "grad_norm": 0.3809344470500946, "learning_rate": 8.721498888536043e-05, "loss": 0.196, "step": 2018 }, { "epoch": 3.840228245363766, "grad_norm": 0.3764810860157013, "learning_rate": 8.720863766275009e-05, "loss": 0.1096, "step": 2019 }, { "epoch": 3.8421302900618164, "grad_norm": 0.47973567247390747, "learning_rate": 8.720228644013974e-05, "loss": 0.1195, "step": 2020 }, { "epoch": 3.844032334759867, "grad_norm": 0.4527863562107086, "learning_rate": 8.719593521752938e-05, "loss": 0.2112, "step": 2021 }, { "epoch": 3.8459343794579173, "grad_norm": 0.39066699147224426, "learning_rate": 8.718958399491903e-05, "loss": 0.1281, "step": 2022 }, { "epoch": 3.8478364241559677, "grad_norm": 0.37056446075439453, "learning_rate": 8.718323277230868e-05, "loss": 0.1519, "step": 2023 }, { "epoch": 3.849738468854018, "grad_norm": 0.516057550907135, "learning_rate": 8.717688154969832e-05, "loss": 0.1657, "step": 2024 }, { "epoch": 3.8516405135520686, "grad_norm": 0.3468872010707855, "learning_rate": 8.717053032708797e-05, "loss": 0.1408, "step": 2025 }, { "epoch": 3.853542558250119, "grad_norm": 0.5452744364738464, "learning_rate": 8.716417910447762e-05, "loss": 0.3173, "step": 2026 }, { "epoch": 3.855444602948169, "grad_norm": 0.4378301501274109, "learning_rate": 8.715782788186726e-05, "loss": 0.136, "step": 2027 }, { "epoch": 3.85734664764622, "grad_norm": 0.49818679690361023, "learning_rate": 8.715147665925691e-05, "loss": 0.233, "step": 2028 }, { "epoch": 3.85924869234427, "grad_norm": 0.4228188693523407, "learning_rate": 8.714512543664656e-05, "loss": 0.1485, "step": 2029 }, { "epoch": 3.8611507370423204, "grad_norm": 0.34110891819000244, "learning_rate": 8.71387742140362e-05, "loss": 0.1455, "step": 2030 }, { "epoch": 3.863052781740371, "grad_norm": 0.38667479157447815, "learning_rate": 8.713242299142585e-05, "loss": 0.1302, "step": 2031 }, { "epoch": 3.8649548264384213, "grad_norm": 0.3971845805644989, "learning_rate": 8.712607176881549e-05, "loss": 0.1562, "step": 2032 }, { "epoch": 3.8668568711364717, "grad_norm": 0.32637760043144226, "learning_rate": 8.711972054620516e-05, "loss": 0.1213, "step": 2033 }, { "epoch": 3.868758915834522, "grad_norm": 0.3475836217403412, "learning_rate": 8.71133693235948e-05, "loss": 0.1514, "step": 2034 }, { "epoch": 3.8706609605325726, "grad_norm": 0.37775367498397827, "learning_rate": 8.710701810098443e-05, "loss": 0.1672, "step": 2035 }, { "epoch": 3.872563005230623, "grad_norm": 0.4611580967903137, "learning_rate": 8.71006668783741e-05, "loss": 0.1977, "step": 2036 }, { "epoch": 3.8744650499286735, "grad_norm": 0.34681427478790283, "learning_rate": 8.709431565576374e-05, "loss": 0.127, "step": 2037 }, { "epoch": 3.8763670946267235, "grad_norm": 0.3547581732273102, "learning_rate": 8.708796443315339e-05, "loss": 0.1432, "step": 2038 }, { "epoch": 3.8782691393247744, "grad_norm": 0.3560992479324341, "learning_rate": 8.708161321054303e-05, "loss": 0.1269, "step": 2039 }, { "epoch": 3.8801711840228243, "grad_norm": 0.48965948820114136, "learning_rate": 8.707526198793268e-05, "loss": 0.1694, "step": 2040 }, { "epoch": 3.882073228720875, "grad_norm": 0.4042951464653015, "learning_rate": 8.706891076532233e-05, "loss": 0.1432, "step": 2041 }, { "epoch": 3.8839752734189252, "grad_norm": 0.40321534872055054, "learning_rate": 8.706255954271197e-05, "loss": 0.1206, "step": 2042 }, { "epoch": 3.8858773181169757, "grad_norm": 0.5154759883880615, "learning_rate": 8.705620832010164e-05, "loss": 0.2034, "step": 2043 }, { "epoch": 3.887779362815026, "grad_norm": 0.3707939684391022, "learning_rate": 8.704985709749127e-05, "loss": 0.1408, "step": 2044 }, { "epoch": 3.8896814075130766, "grad_norm": 0.46117648482322693, "learning_rate": 8.704350587488091e-05, "loss": 0.1921, "step": 2045 }, { "epoch": 3.891583452211127, "grad_norm": 0.4917357265949249, "learning_rate": 8.703715465227056e-05, "loss": 0.1684, "step": 2046 }, { "epoch": 3.8934854969091774, "grad_norm": 0.36523228883743286, "learning_rate": 8.703080342966022e-05, "loss": 0.1977, "step": 2047 }, { "epoch": 3.895387541607228, "grad_norm": 0.3557770550251007, "learning_rate": 8.702445220704985e-05, "loss": 0.1326, "step": 2048 }, { "epoch": 3.8972895863052783, "grad_norm": 0.2716139853000641, "learning_rate": 8.70181009844395e-05, "loss": 0.1119, "step": 2049 }, { "epoch": 3.8991916310033288, "grad_norm": 0.3266098201274872, "learning_rate": 8.701174976182916e-05, "loss": 0.1355, "step": 2050 }, { "epoch": 3.9010936757013788, "grad_norm": 0.4549683928489685, "learning_rate": 8.700539853921881e-05, "loss": 0.174, "step": 2051 }, { "epoch": 3.9029957203994297, "grad_norm": 0.3865867555141449, "learning_rate": 8.699904731660845e-05, "loss": 0.131, "step": 2052 }, { "epoch": 3.9048977650974797, "grad_norm": 0.4354785084724426, "learning_rate": 8.69926960939981e-05, "loss": 0.1497, "step": 2053 }, { "epoch": 3.90679980979553, "grad_norm": 0.38822686672210693, "learning_rate": 8.698634487138775e-05, "loss": 0.1272, "step": 2054 }, { "epoch": 3.9087018544935805, "grad_norm": 0.4395056366920471, "learning_rate": 8.697999364877739e-05, "loss": 0.1801, "step": 2055 }, { "epoch": 3.910603899191631, "grad_norm": 0.4310166835784912, "learning_rate": 8.697364242616704e-05, "loss": 0.1457, "step": 2056 }, { "epoch": 3.9125059438896814, "grad_norm": 0.42527538537979126, "learning_rate": 8.69672912035567e-05, "loss": 0.1827, "step": 2057 }, { "epoch": 3.914407988587732, "grad_norm": 0.41284388303756714, "learning_rate": 8.696093998094633e-05, "loss": 0.1588, "step": 2058 }, { "epoch": 3.9163100332857823, "grad_norm": 0.3561374247074127, "learning_rate": 8.695458875833598e-05, "loss": 0.138, "step": 2059 }, { "epoch": 3.9182120779838328, "grad_norm": 0.4057970941066742, "learning_rate": 8.694823753572564e-05, "loss": 0.1504, "step": 2060 }, { "epoch": 3.920114122681883, "grad_norm": 0.47292712330818176, "learning_rate": 8.694188631311529e-05, "loss": 0.1417, "step": 2061 }, { "epoch": 3.922016167379933, "grad_norm": 0.4207940995693207, "learning_rate": 8.693553509050493e-05, "loss": 0.1372, "step": 2062 }, { "epoch": 3.923918212077984, "grad_norm": 0.5482998490333557, "learning_rate": 8.692918386789456e-05, "loss": 0.1917, "step": 2063 }, { "epoch": 3.925820256776034, "grad_norm": 0.41113635897636414, "learning_rate": 8.692283264528423e-05, "loss": 0.1479, "step": 2064 }, { "epoch": 3.9277223014740845, "grad_norm": 0.3470059037208557, "learning_rate": 8.691648142267387e-05, "loss": 0.1235, "step": 2065 }, { "epoch": 3.929624346172135, "grad_norm": 0.4131185710430145, "learning_rate": 8.69101302000635e-05, "loss": 0.1476, "step": 2066 }, { "epoch": 3.9315263908701854, "grad_norm": 0.3750738501548767, "learning_rate": 8.690377897745317e-05, "loss": 0.1517, "step": 2067 }, { "epoch": 3.933428435568236, "grad_norm": 0.37411704659461975, "learning_rate": 8.689742775484281e-05, "loss": 0.1493, "step": 2068 }, { "epoch": 3.9353304802662863, "grad_norm": 0.4208986759185791, "learning_rate": 8.689107653223246e-05, "loss": 0.1558, "step": 2069 }, { "epoch": 3.9372325249643367, "grad_norm": 0.36959660053253174, "learning_rate": 8.68847253096221e-05, "loss": 0.1247, "step": 2070 }, { "epoch": 3.939134569662387, "grad_norm": 0.3977148234844208, "learning_rate": 8.687837408701175e-05, "loss": 0.1428, "step": 2071 }, { "epoch": 3.9410366143604376, "grad_norm": 0.40076392889022827, "learning_rate": 8.68720228644014e-05, "loss": 0.1652, "step": 2072 }, { "epoch": 3.9429386590584876, "grad_norm": 0.3828325569629669, "learning_rate": 8.686567164179104e-05, "loss": 0.1518, "step": 2073 }, { "epoch": 3.9448407037565385, "grad_norm": 0.35112518072128296, "learning_rate": 8.685932041918071e-05, "loss": 0.1303, "step": 2074 }, { "epoch": 3.9467427484545885, "grad_norm": 0.31564921140670776, "learning_rate": 8.685296919657035e-05, "loss": 0.1325, "step": 2075 }, { "epoch": 3.948644793152639, "grad_norm": 0.3110829293727875, "learning_rate": 8.684661797395998e-05, "loss": 0.0958, "step": 2076 }, { "epoch": 3.9505468378506894, "grad_norm": 0.41574040055274963, "learning_rate": 8.684026675134964e-05, "loss": 0.142, "step": 2077 }, { "epoch": 3.95244888254874, "grad_norm": 0.4371127188205719, "learning_rate": 8.683391552873929e-05, "loss": 0.1699, "step": 2078 }, { "epoch": 3.9543509272467903, "grad_norm": 0.41888341307640076, "learning_rate": 8.682756430612894e-05, "loss": 0.1467, "step": 2079 }, { "epoch": 3.9562529719448407, "grad_norm": 0.4013144373893738, "learning_rate": 8.682121308351858e-05, "loss": 0.1541, "step": 2080 }, { "epoch": 3.958155016642891, "grad_norm": 0.3627847135066986, "learning_rate": 8.681486186090823e-05, "loss": 0.1412, "step": 2081 }, { "epoch": 3.9600570613409416, "grad_norm": 0.34517934918403625, "learning_rate": 8.680851063829788e-05, "loss": 0.1302, "step": 2082 }, { "epoch": 3.961959106038992, "grad_norm": 0.409612238407135, "learning_rate": 8.680215941568752e-05, "loss": 0.1806, "step": 2083 }, { "epoch": 3.9638611507370425, "grad_norm": 0.37562572956085205, "learning_rate": 8.679580819307717e-05, "loss": 0.1305, "step": 2084 }, { "epoch": 3.965763195435093, "grad_norm": 0.30839917063713074, "learning_rate": 8.678945697046682e-05, "loss": 0.1179, "step": 2085 }, { "epoch": 3.967665240133143, "grad_norm": 0.4009683430194855, "learning_rate": 8.678310574785646e-05, "loss": 0.1392, "step": 2086 }, { "epoch": 3.969567284831194, "grad_norm": 0.5373052358627319, "learning_rate": 8.677675452524611e-05, "loss": 0.2366, "step": 2087 }, { "epoch": 3.971469329529244, "grad_norm": 0.44061073660850525, "learning_rate": 8.677040330263576e-05, "loss": 0.1541, "step": 2088 }, { "epoch": 3.9733713742272943, "grad_norm": 0.6880194544792175, "learning_rate": 8.67640520800254e-05, "loss": 0.1822, "step": 2089 }, { "epoch": 3.9752734189253447, "grad_norm": 0.4342186450958252, "learning_rate": 8.675770085741505e-05, "loss": 0.1398, "step": 2090 }, { "epoch": 3.977175463623395, "grad_norm": 0.3437482714653015, "learning_rate": 8.675134963480471e-05, "loss": 0.1407, "step": 2091 }, { "epoch": 3.9790775083214456, "grad_norm": 0.43729832768440247, "learning_rate": 8.674499841219436e-05, "loss": 0.1604, "step": 2092 }, { "epoch": 3.980979553019496, "grad_norm": 0.36654895544052124, "learning_rate": 8.6738647189584e-05, "loss": 0.1261, "step": 2093 }, { "epoch": 3.9828815977175465, "grad_norm": 0.40422323346138, "learning_rate": 8.673229596697365e-05, "loss": 0.1463, "step": 2094 }, { "epoch": 3.984783642415597, "grad_norm": 0.37436428666114807, "learning_rate": 8.67259447443633e-05, "loss": 0.1283, "step": 2095 }, { "epoch": 3.9866856871136473, "grad_norm": 0.4568138122558594, "learning_rate": 8.671959352175294e-05, "loss": 0.1735, "step": 2096 }, { "epoch": 3.9885877318116973, "grad_norm": 0.3864310681819916, "learning_rate": 8.671324229914259e-05, "loss": 0.1458, "step": 2097 }, { "epoch": 3.9904897765097482, "grad_norm": 0.3622378408908844, "learning_rate": 8.670689107653224e-05, "loss": 0.1333, "step": 2098 }, { "epoch": 3.9923918212077982, "grad_norm": 0.5126944780349731, "learning_rate": 8.670053985392188e-05, "loss": 0.1897, "step": 2099 }, { "epoch": 3.9942938659058487, "grad_norm": 0.3905584216117859, "learning_rate": 8.669418863131153e-05, "loss": 0.1743, "step": 2100 }, { "epoch": 3.996195910603899, "grad_norm": 0.4149746298789978, "learning_rate": 8.668783740870118e-05, "loss": 0.1686, "step": 2101 }, { "epoch": 3.9980979553019496, "grad_norm": 0.30447009205818176, "learning_rate": 8.668148618609082e-05, "loss": 0.1079, "step": 2102 }, { "epoch": 4.0, "grad_norm": 0.533173143863678, "learning_rate": 8.667513496348047e-05, "loss": 0.1652, "step": 2103 }, { "epoch": 4.00190204469805, "grad_norm": 0.26669684052467346, "learning_rate": 8.666878374087011e-05, "loss": 0.1105, "step": 2104 }, { "epoch": 4.003804089396101, "grad_norm": 0.2511195242404938, "learning_rate": 8.666243251825978e-05, "loss": 0.1018, "step": 2105 }, { "epoch": 4.005706134094151, "grad_norm": 0.2838079035282135, "learning_rate": 8.665608129564942e-05, "loss": 0.0979, "step": 2106 }, { "epoch": 4.007608178792202, "grad_norm": 0.3789231479167938, "learning_rate": 8.664973007303905e-05, "loss": 0.1216, "step": 2107 }, { "epoch": 4.009510223490252, "grad_norm": 0.36412686109542847, "learning_rate": 8.664337885042872e-05, "loss": 0.0924, "step": 2108 }, { "epoch": 4.011412268188303, "grad_norm": 0.3399736285209656, "learning_rate": 8.663702762781836e-05, "loss": 0.1007, "step": 2109 }, { "epoch": 4.013314312886353, "grad_norm": 0.3104216456413269, "learning_rate": 8.663067640520801e-05, "loss": 0.1146, "step": 2110 }, { "epoch": 4.0152163575844035, "grad_norm": 0.33002039790153503, "learning_rate": 8.662432518259765e-05, "loss": 0.1112, "step": 2111 }, { "epoch": 4.0171184022824535, "grad_norm": 0.3158220946788788, "learning_rate": 8.66179739599873e-05, "loss": 0.0983, "step": 2112 }, { "epoch": 4.019020446980504, "grad_norm": 0.3281852900981903, "learning_rate": 8.661162273737695e-05, "loss": 0.1002, "step": 2113 }, { "epoch": 4.020922491678554, "grad_norm": 0.42810752987861633, "learning_rate": 8.660527151476659e-05, "loss": 0.145, "step": 2114 }, { "epoch": 4.022824536376604, "grad_norm": 0.343757301568985, "learning_rate": 8.659892029215624e-05, "loss": 0.1046, "step": 2115 }, { "epoch": 4.024726581074655, "grad_norm": 0.3978208601474762, "learning_rate": 8.65925690695459e-05, "loss": 0.1232, "step": 2116 }, { "epoch": 4.026628625772705, "grad_norm": 0.3716939687728882, "learning_rate": 8.658621784693553e-05, "loss": 0.1073, "step": 2117 }, { "epoch": 4.028530670470756, "grad_norm": 0.3938986659049988, "learning_rate": 8.657986662432518e-05, "loss": 0.1162, "step": 2118 }, { "epoch": 4.030432715168806, "grad_norm": 0.26515620946884155, "learning_rate": 8.657351540171484e-05, "loss": 0.0927, "step": 2119 }, { "epoch": 4.032334759866857, "grad_norm": 0.4481755197048187, "learning_rate": 8.656716417910447e-05, "loss": 0.1192, "step": 2120 }, { "epoch": 4.034236804564907, "grad_norm": 0.2902253568172455, "learning_rate": 8.656081295649413e-05, "loss": 0.0972, "step": 2121 }, { "epoch": 4.036138849262958, "grad_norm": 0.3764674961566925, "learning_rate": 8.655446173388378e-05, "loss": 0.1242, "step": 2122 }, { "epoch": 4.038040893961008, "grad_norm": 0.4040977954864502, "learning_rate": 8.654811051127343e-05, "loss": 0.1053, "step": 2123 }, { "epoch": 4.039942938659059, "grad_norm": 0.3967365026473999, "learning_rate": 8.654175928866307e-05, "loss": 0.1132, "step": 2124 }, { "epoch": 4.041844983357109, "grad_norm": 0.4135635197162628, "learning_rate": 8.653540806605272e-05, "loss": 0.1171, "step": 2125 }, { "epoch": 4.04374702805516, "grad_norm": 0.43473535776138306, "learning_rate": 8.652905684344237e-05, "loss": 0.1227, "step": 2126 }, { "epoch": 4.04564907275321, "grad_norm": 0.30436238646507263, "learning_rate": 8.652270562083201e-05, "loss": 0.0853, "step": 2127 }, { "epoch": 4.04755111745126, "grad_norm": 0.3265203535556793, "learning_rate": 8.651635439822166e-05, "loss": 0.1007, "step": 2128 }, { "epoch": 4.049453162149311, "grad_norm": 0.3733639121055603, "learning_rate": 8.651000317561131e-05, "loss": 0.1164, "step": 2129 }, { "epoch": 4.051355206847361, "grad_norm": 0.3707481324672699, "learning_rate": 8.650365195300095e-05, "loss": 0.1225, "step": 2130 }, { "epoch": 4.0532572515454115, "grad_norm": 0.39869242906570435, "learning_rate": 8.64973007303906e-05, "loss": 0.1127, "step": 2131 }, { "epoch": 4.0551592962434615, "grad_norm": 0.31656894087791443, "learning_rate": 8.649094950778026e-05, "loss": 0.0936, "step": 2132 }, { "epoch": 4.057061340941512, "grad_norm": 0.32848450541496277, "learning_rate": 8.648459828516991e-05, "loss": 0.1192, "step": 2133 }, { "epoch": 4.058963385639562, "grad_norm": 0.41309690475463867, "learning_rate": 8.647824706255955e-05, "loss": 0.1224, "step": 2134 }, { "epoch": 4.060865430337613, "grad_norm": 0.30171439051628113, "learning_rate": 8.647189583994918e-05, "loss": 0.1108, "step": 2135 }, { "epoch": 4.062767475035663, "grad_norm": 0.31793013215065, "learning_rate": 8.646554461733885e-05, "loss": 0.0958, "step": 2136 }, { "epoch": 4.064669519733714, "grad_norm": 0.3515986502170563, "learning_rate": 8.645919339472849e-05, "loss": 0.098, "step": 2137 }, { "epoch": 4.066571564431764, "grad_norm": 0.2572970390319824, "learning_rate": 8.645284217211813e-05, "loss": 0.0782, "step": 2138 }, { "epoch": 4.068473609129814, "grad_norm": 0.40460988879203796, "learning_rate": 8.644649094950779e-05, "loss": 0.111, "step": 2139 }, { "epoch": 4.070375653827865, "grad_norm": 0.25654932856559753, "learning_rate": 8.644013972689743e-05, "loss": 0.078, "step": 2140 }, { "epoch": 4.072277698525915, "grad_norm": 0.3793332278728485, "learning_rate": 8.643378850428708e-05, "loss": 0.1113, "step": 2141 }, { "epoch": 4.074179743223966, "grad_norm": 0.3457014560699463, "learning_rate": 8.642743728167672e-05, "loss": 0.1016, "step": 2142 }, { "epoch": 4.076081787922016, "grad_norm": 0.41619420051574707, "learning_rate": 8.642108605906637e-05, "loss": 0.1379, "step": 2143 }, { "epoch": 4.077983832620067, "grad_norm": 0.3582102656364441, "learning_rate": 8.641473483645602e-05, "loss": 0.1068, "step": 2144 }, { "epoch": 4.079885877318117, "grad_norm": 0.4142124652862549, "learning_rate": 8.640838361384566e-05, "loss": 0.1155, "step": 2145 }, { "epoch": 4.081787922016168, "grad_norm": 0.3544979393482208, "learning_rate": 8.640203239123533e-05, "loss": 0.0969, "step": 2146 }, { "epoch": 4.083689966714218, "grad_norm": 0.37561002373695374, "learning_rate": 8.639568116862497e-05, "loss": 0.1218, "step": 2147 }, { "epoch": 4.085592011412269, "grad_norm": 0.3568158447742462, "learning_rate": 8.63893299460146e-05, "loss": 0.1225, "step": 2148 }, { "epoch": 4.087494056110319, "grad_norm": 0.3126932382583618, "learning_rate": 8.638297872340426e-05, "loss": 0.084, "step": 2149 }, { "epoch": 4.089396100808369, "grad_norm": 0.4232020378112793, "learning_rate": 8.637662750079391e-05, "loss": 0.1155, "step": 2150 }, { "epoch": 4.0912981455064195, "grad_norm": 0.4121897518634796, "learning_rate": 8.637027627818356e-05, "loss": 0.1352, "step": 2151 }, { "epoch": 4.0932001902044695, "grad_norm": 0.3292025923728943, "learning_rate": 8.63639250555732e-05, "loss": 0.115, "step": 2152 }, { "epoch": 4.09510223490252, "grad_norm": 0.3273860514163971, "learning_rate": 8.635757383296285e-05, "loss": 0.1087, "step": 2153 }, { "epoch": 4.09700427960057, "grad_norm": 0.36760157346725464, "learning_rate": 8.63512226103525e-05, "loss": 0.1206, "step": 2154 }, { "epoch": 4.098906324298621, "grad_norm": 0.3717329502105713, "learning_rate": 8.634487138774214e-05, "loss": 0.1244, "step": 2155 }, { "epoch": 4.100808368996671, "grad_norm": 0.379068523645401, "learning_rate": 8.633852016513179e-05, "loss": 0.1048, "step": 2156 }, { "epoch": 4.102710413694722, "grad_norm": 0.30912551283836365, "learning_rate": 8.633216894252144e-05, "loss": 0.0838, "step": 2157 }, { "epoch": 4.104612458392772, "grad_norm": 0.3093559741973877, "learning_rate": 8.632581771991108e-05, "loss": 0.0948, "step": 2158 }, { "epoch": 4.106514503090823, "grad_norm": 0.2924623489379883, "learning_rate": 8.631946649730073e-05, "loss": 0.085, "step": 2159 }, { "epoch": 4.108416547788873, "grad_norm": 0.335437536239624, "learning_rate": 8.631311527469039e-05, "loss": 0.102, "step": 2160 }, { "epoch": 4.110318592486923, "grad_norm": 0.37450480461120605, "learning_rate": 8.630676405208002e-05, "loss": 0.1102, "step": 2161 }, { "epoch": 4.112220637184974, "grad_norm": 0.40548086166381836, "learning_rate": 8.630041282946968e-05, "loss": 0.1122, "step": 2162 }, { "epoch": 4.114122681883024, "grad_norm": 0.2255704551935196, "learning_rate": 8.629406160685933e-05, "loss": 0.0875, "step": 2163 }, { "epoch": 4.116024726581075, "grad_norm": 0.3774515390396118, "learning_rate": 8.628771038424898e-05, "loss": 0.1007, "step": 2164 }, { "epoch": 4.117926771279125, "grad_norm": 0.4410356879234314, "learning_rate": 8.628135916163862e-05, "loss": 0.1238, "step": 2165 }, { "epoch": 4.119828815977176, "grad_norm": 0.3007069230079651, "learning_rate": 8.627500793902826e-05, "loss": 0.0849, "step": 2166 }, { "epoch": 4.121730860675226, "grad_norm": 0.3165019750595093, "learning_rate": 8.626865671641792e-05, "loss": 0.0959, "step": 2167 }, { "epoch": 4.1236329053732765, "grad_norm": 0.3213941752910614, "learning_rate": 8.626230549380756e-05, "loss": 0.1011, "step": 2168 }, { "epoch": 4.1255349500713265, "grad_norm": 0.2742742598056793, "learning_rate": 8.625595427119721e-05, "loss": 0.0855, "step": 2169 }, { "epoch": 4.127436994769377, "grad_norm": 0.35063308477401733, "learning_rate": 8.624960304858686e-05, "loss": 0.1115, "step": 2170 }, { "epoch": 4.129339039467427, "grad_norm": 0.4272489845752716, "learning_rate": 8.62432518259765e-05, "loss": 0.1162, "step": 2171 }, { "epoch": 4.131241084165478, "grad_norm": 0.27256911993026733, "learning_rate": 8.623690060336615e-05, "loss": 0.1066, "step": 2172 }, { "epoch": 4.133143128863528, "grad_norm": 0.275309294462204, "learning_rate": 8.623054938075579e-05, "loss": 0.1029, "step": 2173 }, { "epoch": 4.135045173561578, "grad_norm": 0.2678431570529938, "learning_rate": 8.622419815814544e-05, "loss": 0.0836, "step": 2174 }, { "epoch": 4.136947218259629, "grad_norm": 0.3313474953174591, "learning_rate": 8.62178469355351e-05, "loss": 0.0925, "step": 2175 }, { "epoch": 4.138849262957679, "grad_norm": 0.2514117658138275, "learning_rate": 8.621149571292473e-05, "loss": 0.0905, "step": 2176 }, { "epoch": 4.14075130765573, "grad_norm": 0.2868940532207489, "learning_rate": 8.62051444903144e-05, "loss": 0.1057, "step": 2177 }, { "epoch": 4.14265335235378, "grad_norm": 0.3867243826389313, "learning_rate": 8.619879326770404e-05, "loss": 0.1151, "step": 2178 }, { "epoch": 4.144555397051831, "grad_norm": 0.3011827766895294, "learning_rate": 8.619244204509368e-05, "loss": 0.1152, "step": 2179 }, { "epoch": 4.146457441749881, "grad_norm": 0.33059659600257874, "learning_rate": 8.618609082248333e-05, "loss": 0.1121, "step": 2180 }, { "epoch": 4.148359486447932, "grad_norm": 0.45777612924575806, "learning_rate": 8.617973959987298e-05, "loss": 0.133, "step": 2181 }, { "epoch": 4.150261531145982, "grad_norm": 0.39224299788475037, "learning_rate": 8.617338837726263e-05, "loss": 0.1381, "step": 2182 }, { "epoch": 4.152163575844033, "grad_norm": 0.2813168168067932, "learning_rate": 8.616703715465227e-05, "loss": 0.0939, "step": 2183 }, { "epoch": 4.154065620542083, "grad_norm": 0.30850479006767273, "learning_rate": 8.616068593204192e-05, "loss": 0.1016, "step": 2184 }, { "epoch": 4.155967665240133, "grad_norm": 0.2755066156387329, "learning_rate": 8.615433470943157e-05, "loss": 0.1253, "step": 2185 }, { "epoch": 4.157869709938184, "grad_norm": 0.25375935435295105, "learning_rate": 8.614798348682121e-05, "loss": 0.088, "step": 2186 }, { "epoch": 4.159771754636234, "grad_norm": 0.27644097805023193, "learning_rate": 8.614163226421086e-05, "loss": 0.1053, "step": 2187 }, { "epoch": 4.1616737993342845, "grad_norm": 0.30916059017181396, "learning_rate": 8.613528104160052e-05, "loss": 0.1075, "step": 2188 }, { "epoch": 4.1635758440323345, "grad_norm": 0.3316441476345062, "learning_rate": 8.612892981899015e-05, "loss": 0.1087, "step": 2189 }, { "epoch": 4.165477888730385, "grad_norm": 0.27464917302131653, "learning_rate": 8.61225785963798e-05, "loss": 0.079, "step": 2190 }, { "epoch": 4.167379933428435, "grad_norm": 0.3684466779232025, "learning_rate": 8.611622737376946e-05, "loss": 0.1312, "step": 2191 }, { "epoch": 4.169281978126486, "grad_norm": 0.33914482593536377, "learning_rate": 8.61098761511591e-05, "loss": 0.0991, "step": 2192 }, { "epoch": 4.171184022824536, "grad_norm": 0.3610948324203491, "learning_rate": 8.610352492854875e-05, "loss": 0.1068, "step": 2193 }, { "epoch": 4.173086067522587, "grad_norm": 0.2824098765850067, "learning_rate": 8.60971737059384e-05, "loss": 0.0913, "step": 2194 }, { "epoch": 4.174988112220637, "grad_norm": 0.28685760498046875, "learning_rate": 8.609082248332805e-05, "loss": 0.098, "step": 2195 }, { "epoch": 4.176890156918688, "grad_norm": 0.44503989815711975, "learning_rate": 8.608447126071769e-05, "loss": 0.1441, "step": 2196 }, { "epoch": 4.178792201616738, "grad_norm": 0.4228593409061432, "learning_rate": 8.607812003810734e-05, "loss": 0.1228, "step": 2197 }, { "epoch": 4.180694246314788, "grad_norm": 0.34366467595100403, "learning_rate": 8.607176881549699e-05, "loss": 0.0969, "step": 2198 }, { "epoch": 4.182596291012839, "grad_norm": 0.3302469849586487, "learning_rate": 8.606541759288663e-05, "loss": 0.1093, "step": 2199 }, { "epoch": 4.184498335710889, "grad_norm": 0.316914826631546, "learning_rate": 8.605906637027628e-05, "loss": 0.096, "step": 2200 }, { "epoch": 4.18640038040894, "grad_norm": 0.3100655972957611, "learning_rate": 8.605271514766594e-05, "loss": 0.0902, "step": 2201 }, { "epoch": 4.18830242510699, "grad_norm": 0.2934771776199341, "learning_rate": 8.604636392505557e-05, "loss": 0.1011, "step": 2202 }, { "epoch": 4.190204469805041, "grad_norm": 0.32837802171707153, "learning_rate": 8.604001270244523e-05, "loss": 0.1284, "step": 2203 }, { "epoch": 4.192106514503091, "grad_norm": 0.3842618465423584, "learning_rate": 8.603366147983488e-05, "loss": 0.1072, "step": 2204 }, { "epoch": 4.194008559201142, "grad_norm": 0.29006102681159973, "learning_rate": 8.602731025722453e-05, "loss": 0.0919, "step": 2205 }, { "epoch": 4.195910603899192, "grad_norm": 0.31507110595703125, "learning_rate": 8.602095903461417e-05, "loss": 0.1103, "step": 2206 }, { "epoch": 4.1978126485972425, "grad_norm": 0.35961470007896423, "learning_rate": 8.60146078120038e-05, "loss": 0.1738, "step": 2207 }, { "epoch": 4.1997146932952925, "grad_norm": 0.34587833285331726, "learning_rate": 8.600825658939347e-05, "loss": 0.1096, "step": 2208 }, { "epoch": 4.2016167379933425, "grad_norm": 0.37271326780319214, "learning_rate": 8.600190536678311e-05, "loss": 0.1186, "step": 2209 }, { "epoch": 4.203518782691393, "grad_norm": 0.31880611181259155, "learning_rate": 8.599555414417275e-05, "loss": 0.1046, "step": 2210 }, { "epoch": 4.205420827389443, "grad_norm": 0.28906506299972534, "learning_rate": 8.598920292156241e-05, "loss": 0.0988, "step": 2211 }, { "epoch": 4.207322872087494, "grad_norm": 0.33470967411994934, "learning_rate": 8.598285169895205e-05, "loss": 0.1056, "step": 2212 }, { "epoch": 4.209224916785544, "grad_norm": 0.3186233341693878, "learning_rate": 8.59765004763417e-05, "loss": 0.1203, "step": 2213 }, { "epoch": 4.211126961483595, "grad_norm": 0.3465280532836914, "learning_rate": 8.597014925373134e-05, "loss": 0.1073, "step": 2214 }, { "epoch": 4.213029006181645, "grad_norm": 0.27451473474502563, "learning_rate": 8.596379803112099e-05, "loss": 0.0965, "step": 2215 }, { "epoch": 4.214931050879696, "grad_norm": 0.35004234313964844, "learning_rate": 8.595744680851064e-05, "loss": 0.1003, "step": 2216 }, { "epoch": 4.216833095577746, "grad_norm": 0.36494818329811096, "learning_rate": 8.595109558590028e-05, "loss": 0.1143, "step": 2217 }, { "epoch": 4.218735140275797, "grad_norm": 0.4278135597705841, "learning_rate": 8.594474436328995e-05, "loss": 0.1234, "step": 2218 }, { "epoch": 4.220637184973847, "grad_norm": 0.5124382972717285, "learning_rate": 8.593839314067959e-05, "loss": 0.1158, "step": 2219 }, { "epoch": 4.222539229671897, "grad_norm": 0.39850741624832153, "learning_rate": 8.593204191806923e-05, "loss": 0.1295, "step": 2220 }, { "epoch": 4.224441274369948, "grad_norm": 0.4141925573348999, "learning_rate": 8.592569069545888e-05, "loss": 0.1103, "step": 2221 }, { "epoch": 4.226343319067998, "grad_norm": 0.274980366230011, "learning_rate": 8.591933947284853e-05, "loss": 0.0927, "step": 2222 }, { "epoch": 4.228245363766049, "grad_norm": 0.4274260103702545, "learning_rate": 8.591298825023818e-05, "loss": 0.1248, "step": 2223 }, { "epoch": 4.230147408464099, "grad_norm": 0.39051416516304016, "learning_rate": 8.590663702762782e-05, "loss": 0.1068, "step": 2224 }, { "epoch": 4.2320494531621495, "grad_norm": 0.3913654685020447, "learning_rate": 8.590028580501747e-05, "loss": 0.1212, "step": 2225 }, { "epoch": 4.2339514978601995, "grad_norm": 0.33034393191337585, "learning_rate": 8.589393458240712e-05, "loss": 0.0875, "step": 2226 }, { "epoch": 4.23585354255825, "grad_norm": 0.405618280172348, "learning_rate": 8.588758335979676e-05, "loss": 0.1228, "step": 2227 }, { "epoch": 4.2377555872563, "grad_norm": 0.3220268189907074, "learning_rate": 8.588123213718641e-05, "loss": 0.1046, "step": 2228 }, { "epoch": 4.239657631954351, "grad_norm": 0.32537737488746643, "learning_rate": 8.587488091457606e-05, "loss": 0.0901, "step": 2229 }, { "epoch": 4.241559676652401, "grad_norm": 0.3968732953071594, "learning_rate": 8.58685296919657e-05, "loss": 0.1753, "step": 2230 }, { "epoch": 4.243461721350451, "grad_norm": 0.3441084325313568, "learning_rate": 8.586217846935535e-05, "loss": 0.1181, "step": 2231 }, { "epoch": 4.245363766048502, "grad_norm": 0.4014514684677124, "learning_rate": 8.5855827246745e-05, "loss": 0.1067, "step": 2232 }, { "epoch": 4.247265810746552, "grad_norm": 0.40167930722236633, "learning_rate": 8.584947602413464e-05, "loss": 0.1142, "step": 2233 }, { "epoch": 4.249167855444603, "grad_norm": 0.3604772984981537, "learning_rate": 8.58431248015243e-05, "loss": 0.108, "step": 2234 }, { "epoch": 4.251069900142653, "grad_norm": 0.4210832118988037, "learning_rate": 8.583677357891395e-05, "loss": 0.1161, "step": 2235 }, { "epoch": 4.252971944840704, "grad_norm": 0.34467047452926636, "learning_rate": 8.58304223563036e-05, "loss": 0.1187, "step": 2236 }, { "epoch": 4.254873989538754, "grad_norm": 0.8141130805015564, "learning_rate": 8.582407113369324e-05, "loss": 0.1766, "step": 2237 }, { "epoch": 4.256776034236805, "grad_norm": 0.28791263699531555, "learning_rate": 8.581771991108288e-05, "loss": 0.0953, "step": 2238 }, { "epoch": 4.258678078934855, "grad_norm": 0.2527415454387665, "learning_rate": 8.581136868847254e-05, "loss": 0.0847, "step": 2239 }, { "epoch": 4.260580123632906, "grad_norm": 0.2793647050857544, "learning_rate": 8.580501746586218e-05, "loss": 0.116, "step": 2240 }, { "epoch": 4.262482168330956, "grad_norm": 0.5324682593345642, "learning_rate": 8.579866624325183e-05, "loss": 0.1357, "step": 2241 }, { "epoch": 4.264384213029006, "grad_norm": 0.31979575753211975, "learning_rate": 8.579231502064148e-05, "loss": 0.1004, "step": 2242 }, { "epoch": 4.266286257727057, "grad_norm": 0.453645795583725, "learning_rate": 8.578596379803112e-05, "loss": 0.121, "step": 2243 }, { "epoch": 4.268188302425107, "grad_norm": 0.2688881754875183, "learning_rate": 8.577961257542077e-05, "loss": 0.0935, "step": 2244 }, { "epoch": 4.2700903471231575, "grad_norm": 0.30262473225593567, "learning_rate": 8.577326135281041e-05, "loss": 0.086, "step": 2245 }, { "epoch": 4.2719923918212075, "grad_norm": 0.4076935648918152, "learning_rate": 8.576691013020006e-05, "loss": 0.1075, "step": 2246 }, { "epoch": 4.273894436519258, "grad_norm": 0.5229641199111938, "learning_rate": 8.576055890758972e-05, "loss": 0.1585, "step": 2247 }, { "epoch": 4.275796481217308, "grad_norm": 0.3732607960700989, "learning_rate": 8.575420768497935e-05, "loss": 0.1065, "step": 2248 }, { "epoch": 4.277698525915359, "grad_norm": 0.39624014496803284, "learning_rate": 8.574785646236902e-05, "loss": 0.1229, "step": 2249 }, { "epoch": 4.279600570613409, "grad_norm": 0.47354966402053833, "learning_rate": 8.574150523975866e-05, "loss": 0.1574, "step": 2250 }, { "epoch": 4.28150261531146, "grad_norm": 0.35089337825775146, "learning_rate": 8.57351540171483e-05, "loss": 0.1098, "step": 2251 }, { "epoch": 4.28340466000951, "grad_norm": 0.3599602282047272, "learning_rate": 8.572880279453795e-05, "loss": 0.1136, "step": 2252 }, { "epoch": 4.285306704707561, "grad_norm": 0.4661259949207306, "learning_rate": 8.57224515719276e-05, "loss": 0.1297, "step": 2253 }, { "epoch": 4.287208749405611, "grad_norm": 0.27821779251098633, "learning_rate": 8.571610034931725e-05, "loss": 0.0974, "step": 2254 }, { "epoch": 4.289110794103661, "grad_norm": 0.3892570436000824, "learning_rate": 8.570974912670689e-05, "loss": 0.1362, "step": 2255 }, { "epoch": 4.291012838801712, "grad_norm": 0.3612288534641266, "learning_rate": 8.570339790409654e-05, "loss": 0.121, "step": 2256 }, { "epoch": 4.292914883499762, "grad_norm": 0.3542415499687195, "learning_rate": 8.56970466814862e-05, "loss": 0.1004, "step": 2257 }, { "epoch": 4.294816928197813, "grad_norm": 0.3457956910133362, "learning_rate": 8.569069545887583e-05, "loss": 0.1035, "step": 2258 }, { "epoch": 4.296718972895863, "grad_norm": 0.42984023690223694, "learning_rate": 8.568434423626548e-05, "loss": 0.1236, "step": 2259 }, { "epoch": 4.298621017593914, "grad_norm": 0.3002376854419708, "learning_rate": 8.567799301365514e-05, "loss": 0.0867, "step": 2260 }, { "epoch": 4.300523062291964, "grad_norm": 0.3134646415710449, "learning_rate": 8.567164179104477e-05, "loss": 0.0928, "step": 2261 }, { "epoch": 4.302425106990015, "grad_norm": 0.35177892446517944, "learning_rate": 8.566529056843443e-05, "loss": 0.1072, "step": 2262 }, { "epoch": 4.304327151688065, "grad_norm": 0.40704670548439026, "learning_rate": 8.565893934582408e-05, "loss": 0.1216, "step": 2263 }, { "epoch": 4.3062291963861155, "grad_norm": 0.40002110600471497, "learning_rate": 8.565258812321372e-05, "loss": 0.1153, "step": 2264 }, { "epoch": 4.3081312410841655, "grad_norm": 0.28185611963272095, "learning_rate": 8.564623690060337e-05, "loss": 0.0815, "step": 2265 }, { "epoch": 4.310033285782216, "grad_norm": 0.45204728841781616, "learning_rate": 8.563988567799302e-05, "loss": 0.1285, "step": 2266 }, { "epoch": 4.311935330480266, "grad_norm": 0.39130833745002747, "learning_rate": 8.563353445538267e-05, "loss": 0.1235, "step": 2267 }, { "epoch": 4.313837375178316, "grad_norm": 0.29855722188949585, "learning_rate": 8.562718323277231e-05, "loss": 0.0943, "step": 2268 }, { "epoch": 4.315739419876367, "grad_norm": 0.2964162826538086, "learning_rate": 8.562083201016196e-05, "loss": 0.1056, "step": 2269 }, { "epoch": 4.317641464574417, "grad_norm": 0.3408963978290558, "learning_rate": 8.561448078755161e-05, "loss": 0.1096, "step": 2270 }, { "epoch": 4.319543509272468, "grad_norm": 0.26335135102272034, "learning_rate": 8.560812956494125e-05, "loss": 0.1258, "step": 2271 }, { "epoch": 4.321445553970518, "grad_norm": 0.45781078934669495, "learning_rate": 8.56017783423309e-05, "loss": 0.1441, "step": 2272 }, { "epoch": 4.323347598668569, "grad_norm": 0.30225613713264465, "learning_rate": 8.559542711972056e-05, "loss": 0.0886, "step": 2273 }, { "epoch": 4.325249643366619, "grad_norm": 0.39499637484550476, "learning_rate": 8.55890758971102e-05, "loss": 0.108, "step": 2274 }, { "epoch": 4.32715168806467, "grad_norm": 0.25995761156082153, "learning_rate": 8.558272467449985e-05, "loss": 0.0832, "step": 2275 }, { "epoch": 4.32905373276272, "grad_norm": 0.4667019248008728, "learning_rate": 8.557637345188948e-05, "loss": 0.1376, "step": 2276 }, { "epoch": 4.330955777460771, "grad_norm": 0.6616588830947876, "learning_rate": 8.557002222927915e-05, "loss": 0.1402, "step": 2277 }, { "epoch": 4.332857822158821, "grad_norm": 0.362642765045166, "learning_rate": 8.556367100666879e-05, "loss": 0.1036, "step": 2278 }, { "epoch": 4.334759866856871, "grad_norm": 0.34205347299575806, "learning_rate": 8.555731978405843e-05, "loss": 0.0901, "step": 2279 }, { "epoch": 4.336661911554922, "grad_norm": 0.428653746843338, "learning_rate": 8.555096856144809e-05, "loss": 0.1291, "step": 2280 }, { "epoch": 4.338563956252972, "grad_norm": 0.31291234493255615, "learning_rate": 8.554461733883773e-05, "loss": 0.091, "step": 2281 }, { "epoch": 4.3404660009510225, "grad_norm": 0.33913081884384155, "learning_rate": 8.553826611622737e-05, "loss": 0.0844, "step": 2282 }, { "epoch": 4.3423680456490725, "grad_norm": 0.3302326500415802, "learning_rate": 8.553191489361702e-05, "loss": 0.0894, "step": 2283 }, { "epoch": 4.344270090347123, "grad_norm": 0.39421653747558594, "learning_rate": 8.552556367100667e-05, "loss": 0.1173, "step": 2284 }, { "epoch": 4.346172135045173, "grad_norm": 0.35651376843452454, "learning_rate": 8.551921244839632e-05, "loss": 0.0945, "step": 2285 }, { "epoch": 4.348074179743224, "grad_norm": 0.37059125304222107, "learning_rate": 8.551286122578596e-05, "loss": 0.1223, "step": 2286 }, { "epoch": 4.349976224441274, "grad_norm": 0.31241846084594727, "learning_rate": 8.550651000317561e-05, "loss": 0.1057, "step": 2287 }, { "epoch": 4.351878269139325, "grad_norm": 0.29532214999198914, "learning_rate": 8.550015878056527e-05, "loss": 0.1008, "step": 2288 }, { "epoch": 4.353780313837375, "grad_norm": 0.435973584651947, "learning_rate": 8.54938075579549e-05, "loss": 0.1258, "step": 2289 }, { "epoch": 4.355682358535425, "grad_norm": 0.3240755498409271, "learning_rate": 8.548745633534456e-05, "loss": 0.1383, "step": 2290 }, { "epoch": 4.357584403233476, "grad_norm": 0.3592849373817444, "learning_rate": 8.548110511273421e-05, "loss": 0.118, "step": 2291 }, { "epoch": 4.359486447931526, "grad_norm": 0.3495205342769623, "learning_rate": 8.547475389012385e-05, "loss": 0.1182, "step": 2292 }, { "epoch": 4.361388492629577, "grad_norm": 0.35103073716163635, "learning_rate": 8.54684026675135e-05, "loss": 0.1075, "step": 2293 }, { "epoch": 4.363290537327627, "grad_norm": 0.4233345091342926, "learning_rate": 8.546205144490315e-05, "loss": 0.1111, "step": 2294 }, { "epoch": 4.365192582025678, "grad_norm": 0.3999617099761963, "learning_rate": 8.54557002222928e-05, "loss": 0.1172, "step": 2295 }, { "epoch": 4.367094626723728, "grad_norm": 0.3122519254684448, "learning_rate": 8.544934899968244e-05, "loss": 0.0973, "step": 2296 }, { "epoch": 4.368996671421779, "grad_norm": 0.2844139039516449, "learning_rate": 8.544299777707209e-05, "loss": 0.0972, "step": 2297 }, { "epoch": 4.370898716119829, "grad_norm": 0.3841843008995056, "learning_rate": 8.543664655446174e-05, "loss": 0.1145, "step": 2298 }, { "epoch": 4.37280076081788, "grad_norm": 0.35272732377052307, "learning_rate": 8.543029533185138e-05, "loss": 0.1, "step": 2299 }, { "epoch": 4.37470280551593, "grad_norm": 0.3861033618450165, "learning_rate": 8.542394410924103e-05, "loss": 0.12, "step": 2300 }, { "epoch": 4.37660485021398, "grad_norm": 0.2895589768886566, "learning_rate": 8.541759288663069e-05, "loss": 0.0857, "step": 2301 }, { "epoch": 4.3785068949120305, "grad_norm": 0.4067385792732239, "learning_rate": 8.541124166402032e-05, "loss": 0.114, "step": 2302 }, { "epoch": 4.3804089396100805, "grad_norm": 0.3439483642578125, "learning_rate": 8.540489044140998e-05, "loss": 0.1218, "step": 2303 }, { "epoch": 4.382310984308131, "grad_norm": 0.273703396320343, "learning_rate": 8.539853921879963e-05, "loss": 0.0919, "step": 2304 }, { "epoch": 4.384213029006181, "grad_norm": 0.2975528836250305, "learning_rate": 8.539218799618927e-05, "loss": 0.0786, "step": 2305 }, { "epoch": 4.386115073704232, "grad_norm": 0.3109762370586395, "learning_rate": 8.538583677357892e-05, "loss": 0.1043, "step": 2306 }, { "epoch": 4.388017118402282, "grad_norm": 0.30896326899528503, "learning_rate": 8.537948555096857e-05, "loss": 0.0986, "step": 2307 }, { "epoch": 4.389919163100333, "grad_norm": 0.24300821125507355, "learning_rate": 8.537313432835822e-05, "loss": 0.0821, "step": 2308 }, { "epoch": 4.391821207798383, "grad_norm": 0.2907545566558838, "learning_rate": 8.536678310574786e-05, "loss": 0.0943, "step": 2309 }, { "epoch": 4.393723252496434, "grad_norm": 0.4220617115497589, "learning_rate": 8.53604318831375e-05, "loss": 0.1359, "step": 2310 }, { "epoch": 4.395625297194484, "grad_norm": 0.3436138331890106, "learning_rate": 8.535408066052716e-05, "loss": 0.1106, "step": 2311 }, { "epoch": 4.397527341892534, "grad_norm": 0.36533981561660767, "learning_rate": 8.53477294379168e-05, "loss": 0.1194, "step": 2312 }, { "epoch": 4.399429386590585, "grad_norm": 0.3554334044456482, "learning_rate": 8.534137821530645e-05, "loss": 0.1571, "step": 2313 }, { "epoch": 4.401331431288635, "grad_norm": 0.3670365512371063, "learning_rate": 8.53350269926961e-05, "loss": 0.1299, "step": 2314 }, { "epoch": 4.403233475986686, "grad_norm": 0.4539790451526642, "learning_rate": 8.532867577008574e-05, "loss": 0.1348, "step": 2315 }, { "epoch": 4.405135520684736, "grad_norm": 0.29808804392814636, "learning_rate": 8.53223245474754e-05, "loss": 0.1046, "step": 2316 }, { "epoch": 4.407037565382787, "grad_norm": 0.3486464321613312, "learning_rate": 8.531597332486503e-05, "loss": 0.1047, "step": 2317 }, { "epoch": 4.408939610080837, "grad_norm": 0.2947161793708801, "learning_rate": 8.530962210225469e-05, "loss": 0.0814, "step": 2318 }, { "epoch": 4.410841654778888, "grad_norm": 0.3321152627468109, "learning_rate": 8.530327087964434e-05, "loss": 0.1068, "step": 2319 }, { "epoch": 4.412743699476938, "grad_norm": 0.2441323846578598, "learning_rate": 8.529691965703398e-05, "loss": 0.0813, "step": 2320 }, { "epoch": 4.4146457441749885, "grad_norm": 0.37151622772216797, "learning_rate": 8.529056843442364e-05, "loss": 0.0995, "step": 2321 }, { "epoch": 4.4165477888730384, "grad_norm": 0.330240398645401, "learning_rate": 8.528421721181328e-05, "loss": 0.0999, "step": 2322 }, { "epoch": 4.418449833571089, "grad_norm": 0.38048794865608215, "learning_rate": 8.527786598920292e-05, "loss": 0.1065, "step": 2323 }, { "epoch": 4.420351878269139, "grad_norm": 0.3825136423110962, "learning_rate": 8.527151476659257e-05, "loss": 0.1021, "step": 2324 }, { "epoch": 4.422253922967189, "grad_norm": 0.3410681486129761, "learning_rate": 8.526516354398222e-05, "loss": 0.0899, "step": 2325 }, { "epoch": 4.42415596766524, "grad_norm": 0.33466002345085144, "learning_rate": 8.525881232137187e-05, "loss": 0.1051, "step": 2326 }, { "epoch": 4.42605801236329, "grad_norm": 0.3932620584964752, "learning_rate": 8.525246109876151e-05, "loss": 0.1156, "step": 2327 }, { "epoch": 4.427960057061341, "grad_norm": 0.31098031997680664, "learning_rate": 8.524610987615116e-05, "loss": 0.1026, "step": 2328 }, { "epoch": 4.429862101759391, "grad_norm": 0.3773583471775055, "learning_rate": 8.523975865354082e-05, "loss": 0.1113, "step": 2329 }, { "epoch": 4.431764146457442, "grad_norm": 0.33763033151626587, "learning_rate": 8.523340743093045e-05, "loss": 0.0941, "step": 2330 }, { "epoch": 4.433666191155492, "grad_norm": 0.23584803938865662, "learning_rate": 8.52270562083201e-05, "loss": 0.0777, "step": 2331 }, { "epoch": 4.435568235853543, "grad_norm": 0.3598161041736603, "learning_rate": 8.522070498570976e-05, "loss": 0.1173, "step": 2332 }, { "epoch": 4.437470280551593, "grad_norm": 0.3960074484348297, "learning_rate": 8.52143537630994e-05, "loss": 0.119, "step": 2333 }, { "epoch": 4.439372325249644, "grad_norm": 0.3260672092437744, "learning_rate": 8.520800254048905e-05, "loss": 0.1107, "step": 2334 }, { "epoch": 4.441274369947694, "grad_norm": 0.3651185929775238, "learning_rate": 8.52016513178787e-05, "loss": 0.0993, "step": 2335 }, { "epoch": 4.443176414645745, "grad_norm": 0.39154887199401855, "learning_rate": 8.519530009526834e-05, "loss": 0.1168, "step": 2336 }, { "epoch": 4.445078459343795, "grad_norm": 0.3429001569747925, "learning_rate": 8.518894887265799e-05, "loss": 0.1111, "step": 2337 }, { "epoch": 4.446980504041845, "grad_norm": 0.3407055735588074, "learning_rate": 8.518259765004764e-05, "loss": 0.1032, "step": 2338 }, { "epoch": 4.4488825487398955, "grad_norm": 0.3813023567199707, "learning_rate": 8.517624642743729e-05, "loss": 0.1077, "step": 2339 }, { "epoch": 4.4507845934379455, "grad_norm": 0.2836807370185852, "learning_rate": 8.516989520482693e-05, "loss": 0.0833, "step": 2340 }, { "epoch": 4.452686638135996, "grad_norm": 0.4083840250968933, "learning_rate": 8.516354398221657e-05, "loss": 0.1254, "step": 2341 }, { "epoch": 4.454588682834046, "grad_norm": 0.29835161566734314, "learning_rate": 8.515719275960623e-05, "loss": 0.1207, "step": 2342 }, { "epoch": 4.456490727532097, "grad_norm": 0.30677247047424316, "learning_rate": 8.515084153699587e-05, "loss": 0.0807, "step": 2343 }, { "epoch": 4.458392772230147, "grad_norm": 0.312853068113327, "learning_rate": 8.514449031438552e-05, "loss": 0.1174, "step": 2344 }, { "epoch": 4.460294816928198, "grad_norm": 0.431356281042099, "learning_rate": 8.513813909177518e-05, "loss": 0.1324, "step": 2345 }, { "epoch": 4.462196861626248, "grad_norm": 0.2785525918006897, "learning_rate": 8.513178786916482e-05, "loss": 0.1025, "step": 2346 }, { "epoch": 4.464098906324299, "grad_norm": 0.2919105291366577, "learning_rate": 8.512543664655447e-05, "loss": 0.1154, "step": 2347 }, { "epoch": 4.466000951022349, "grad_norm": 0.4356403350830078, "learning_rate": 8.51190854239441e-05, "loss": 0.1161, "step": 2348 }, { "epoch": 4.467902995720399, "grad_norm": 0.3411230146884918, "learning_rate": 8.511273420133377e-05, "loss": 0.1032, "step": 2349 }, { "epoch": 4.46980504041845, "grad_norm": 0.3335597515106201, "learning_rate": 8.510638297872341e-05, "loss": 0.1427, "step": 2350 }, { "epoch": 4.4717070851165, "grad_norm": 0.3813069760799408, "learning_rate": 8.510003175611305e-05, "loss": 0.1214, "step": 2351 }, { "epoch": 4.473609129814551, "grad_norm": 0.2616579830646515, "learning_rate": 8.509368053350271e-05, "loss": 0.0914, "step": 2352 }, { "epoch": 4.475511174512601, "grad_norm": 0.24161195755004883, "learning_rate": 8.508732931089235e-05, "loss": 0.0806, "step": 2353 }, { "epoch": 4.477413219210652, "grad_norm": 0.41089168190956116, "learning_rate": 8.508097808828199e-05, "loss": 0.1095, "step": 2354 }, { "epoch": 4.479315263908702, "grad_norm": 0.2930002510547638, "learning_rate": 8.507462686567164e-05, "loss": 0.0851, "step": 2355 }, { "epoch": 4.481217308606753, "grad_norm": 0.38217440247535706, "learning_rate": 8.506827564306129e-05, "loss": 0.106, "step": 2356 }, { "epoch": 4.483119353304803, "grad_norm": 0.4617588520050049, "learning_rate": 8.506192442045094e-05, "loss": 0.1269, "step": 2357 }, { "epoch": 4.4850213980028535, "grad_norm": 0.33491015434265137, "learning_rate": 8.505557319784058e-05, "loss": 0.1086, "step": 2358 }, { "epoch": 4.4869234427009035, "grad_norm": 0.31024834513664246, "learning_rate": 8.504922197523023e-05, "loss": 0.1039, "step": 2359 }, { "epoch": 4.4888254873989535, "grad_norm": 0.36780717968940735, "learning_rate": 8.504287075261989e-05, "loss": 0.1102, "step": 2360 }, { "epoch": 4.490727532097004, "grad_norm": 0.40606439113616943, "learning_rate": 8.503651953000952e-05, "loss": 0.13, "step": 2361 }, { "epoch": 4.492629576795054, "grad_norm": 0.4511033296585083, "learning_rate": 8.503016830739918e-05, "loss": 0.1182, "step": 2362 }, { "epoch": 4.494531621493105, "grad_norm": 0.36328256130218506, "learning_rate": 8.502381708478883e-05, "loss": 0.1024, "step": 2363 }, { "epoch": 4.496433666191155, "grad_norm": 0.3860591650009155, "learning_rate": 8.501746586217847e-05, "loss": 0.1019, "step": 2364 }, { "epoch": 4.498335710889206, "grad_norm": 0.46222564578056335, "learning_rate": 8.501111463956812e-05, "loss": 0.1132, "step": 2365 }, { "epoch": 4.500237755587256, "grad_norm": 0.3612005412578583, "learning_rate": 8.500476341695777e-05, "loss": 0.0963, "step": 2366 }, { "epoch": 4.502139800285307, "grad_norm": 0.43513086438179016, "learning_rate": 8.499841219434742e-05, "loss": 0.1109, "step": 2367 }, { "epoch": 4.504041844983357, "grad_norm": 0.2950316071510315, "learning_rate": 8.499206097173706e-05, "loss": 0.1124, "step": 2368 }, { "epoch": 4.505943889681408, "grad_norm": 0.36488962173461914, "learning_rate": 8.498570974912671e-05, "loss": 0.1, "step": 2369 }, { "epoch": 4.507845934379458, "grad_norm": 0.3592323064804077, "learning_rate": 8.497935852651636e-05, "loss": 0.0995, "step": 2370 }, { "epoch": 4.509747979077508, "grad_norm": 0.34753555059432983, "learning_rate": 8.4973007303906e-05, "loss": 0.1026, "step": 2371 }, { "epoch": 4.511650023775559, "grad_norm": 0.39495691657066345, "learning_rate": 8.496665608129565e-05, "loss": 0.1272, "step": 2372 }, { "epoch": 4.513552068473609, "grad_norm": 0.3553752601146698, "learning_rate": 8.49603048586853e-05, "loss": 0.1136, "step": 2373 }, { "epoch": 4.51545411317166, "grad_norm": 0.37848785519599915, "learning_rate": 8.495395363607494e-05, "loss": 0.1069, "step": 2374 }, { "epoch": 4.51735615786971, "grad_norm": 0.33565762639045715, "learning_rate": 8.49476024134646e-05, "loss": 0.1075, "step": 2375 }, { "epoch": 4.519258202567761, "grad_norm": 0.3359149694442749, "learning_rate": 8.494125119085425e-05, "loss": 0.098, "step": 2376 }, { "epoch": 4.521160247265811, "grad_norm": 0.3218232989311218, "learning_rate": 8.493489996824389e-05, "loss": 0.096, "step": 2377 }, { "epoch": 4.5230622919638614, "grad_norm": 0.3153054714202881, "learning_rate": 8.492854874563354e-05, "loss": 0.1015, "step": 2378 }, { "epoch": 4.5249643366619114, "grad_norm": 0.37637823820114136, "learning_rate": 8.492219752302319e-05, "loss": 0.1164, "step": 2379 }, { "epoch": 4.526866381359962, "grad_norm": 0.3270327150821686, "learning_rate": 8.491584630041284e-05, "loss": 0.1084, "step": 2380 }, { "epoch": 4.528768426058012, "grad_norm": 0.23998558521270752, "learning_rate": 8.490949507780248e-05, "loss": 0.0777, "step": 2381 }, { "epoch": 4.530670470756062, "grad_norm": 0.31294015049934387, "learning_rate": 8.490314385519212e-05, "loss": 0.0807, "step": 2382 }, { "epoch": 4.532572515454113, "grad_norm": 0.3305555582046509, "learning_rate": 8.489679263258178e-05, "loss": 0.1011, "step": 2383 }, { "epoch": 4.534474560152163, "grad_norm": 0.35641244053840637, "learning_rate": 8.489044140997142e-05, "loss": 0.11, "step": 2384 }, { "epoch": 4.536376604850214, "grad_norm": 0.3511948883533478, "learning_rate": 8.488409018736107e-05, "loss": 0.1009, "step": 2385 }, { "epoch": 4.538278649548264, "grad_norm": 0.3899917006492615, "learning_rate": 8.487773896475071e-05, "loss": 0.1285, "step": 2386 }, { "epoch": 4.540180694246315, "grad_norm": 0.4415057897567749, "learning_rate": 8.487138774214036e-05, "loss": 0.1434, "step": 2387 }, { "epoch": 4.542082738944365, "grad_norm": 0.42669907212257385, "learning_rate": 8.486503651953002e-05, "loss": 0.1201, "step": 2388 }, { "epoch": 4.543984783642416, "grad_norm": 0.27351129055023193, "learning_rate": 8.485868529691965e-05, "loss": 0.0761, "step": 2389 }, { "epoch": 4.545886828340466, "grad_norm": 0.31243595480918884, "learning_rate": 8.48523340743093e-05, "loss": 0.0909, "step": 2390 }, { "epoch": 4.547788873038517, "grad_norm": 0.36273542046546936, "learning_rate": 8.484598285169896e-05, "loss": 0.1156, "step": 2391 }, { "epoch": 4.549690917736567, "grad_norm": 0.3167242109775543, "learning_rate": 8.48396316290886e-05, "loss": 0.2065, "step": 2392 }, { "epoch": 4.551592962434617, "grad_norm": 0.3072797358036041, "learning_rate": 8.483328040647825e-05, "loss": 0.0939, "step": 2393 }, { "epoch": 4.553495007132668, "grad_norm": 0.32601553201675415, "learning_rate": 8.48269291838679e-05, "loss": 0.1052, "step": 2394 }, { "epoch": 4.555397051830718, "grad_norm": 0.41232773661613464, "learning_rate": 8.482057796125754e-05, "loss": 0.1207, "step": 2395 }, { "epoch": 4.5572990965287685, "grad_norm": 0.46499213576316833, "learning_rate": 8.481422673864719e-05, "loss": 0.1251, "step": 2396 }, { "epoch": 4.5592011412268185, "grad_norm": 0.3984009325504303, "learning_rate": 8.480787551603684e-05, "loss": 0.1317, "step": 2397 }, { "epoch": 4.561103185924869, "grad_norm": 0.3825131356716156, "learning_rate": 8.48015242934265e-05, "loss": 0.1273, "step": 2398 }, { "epoch": 4.563005230622919, "grad_norm": 0.39657148718833923, "learning_rate": 8.479517307081613e-05, "loss": 0.145, "step": 2399 }, { "epoch": 4.56490727532097, "grad_norm": 0.3764631748199463, "learning_rate": 8.478882184820578e-05, "loss": 0.1133, "step": 2400 }, { "epoch": 4.56680932001902, "grad_norm": 0.2968275249004364, "learning_rate": 8.478247062559544e-05, "loss": 0.0885, "step": 2401 }, { "epoch": 4.568711364717071, "grad_norm": 0.326856791973114, "learning_rate": 8.477611940298507e-05, "loss": 0.0923, "step": 2402 }, { "epoch": 4.570613409415121, "grad_norm": 0.38287606835365295, "learning_rate": 8.476976818037473e-05, "loss": 0.141, "step": 2403 }, { "epoch": 4.572515454113171, "grad_norm": 0.47493815422058105, "learning_rate": 8.476341695776438e-05, "loss": 0.1146, "step": 2404 }, { "epoch": 4.574417498811222, "grad_norm": 0.35078614950180054, "learning_rate": 8.475706573515402e-05, "loss": 0.1153, "step": 2405 }, { "epoch": 4.576319543509273, "grad_norm": 0.3837313950061798, "learning_rate": 8.475071451254367e-05, "loss": 0.1408, "step": 2406 }, { "epoch": 4.578221588207323, "grad_norm": 0.3800102472305298, "learning_rate": 8.474436328993332e-05, "loss": 0.1224, "step": 2407 }, { "epoch": 4.580123632905373, "grad_norm": 0.40831804275512695, "learning_rate": 8.473801206732296e-05, "loss": 0.1283, "step": 2408 }, { "epoch": 4.582025677603424, "grad_norm": 0.34854429960250854, "learning_rate": 8.473166084471261e-05, "loss": 0.101, "step": 2409 }, { "epoch": 4.583927722301474, "grad_norm": 0.3317374885082245, "learning_rate": 8.472530962210226e-05, "loss": 0.0986, "step": 2410 }, { "epoch": 4.585829766999525, "grad_norm": 0.3316230773925781, "learning_rate": 8.471895839949191e-05, "loss": 0.0955, "step": 2411 }, { "epoch": 4.587731811697575, "grad_norm": 0.3458825945854187, "learning_rate": 8.471260717688155e-05, "loss": 0.1246, "step": 2412 }, { "epoch": 4.589633856395626, "grad_norm": 0.2985215187072754, "learning_rate": 8.470625595427119e-05, "loss": 0.0904, "step": 2413 }, { "epoch": 4.591535901093676, "grad_norm": 0.5128130912780762, "learning_rate": 8.469990473166086e-05, "loss": 0.1119, "step": 2414 }, { "epoch": 4.5934379457917265, "grad_norm": 0.3538981080055237, "learning_rate": 8.46935535090505e-05, "loss": 0.1276, "step": 2415 }, { "epoch": 4.5953399904897765, "grad_norm": 0.24112893640995026, "learning_rate": 8.468720228644015e-05, "loss": 0.0813, "step": 2416 }, { "epoch": 4.597242035187827, "grad_norm": 0.34151947498321533, "learning_rate": 8.46808510638298e-05, "loss": 0.1214, "step": 2417 }, { "epoch": 4.599144079885877, "grad_norm": 0.3011094629764557, "learning_rate": 8.467449984121944e-05, "loss": 0.0955, "step": 2418 }, { "epoch": 4.601046124583927, "grad_norm": 0.45026248693466187, "learning_rate": 8.466814861860909e-05, "loss": 0.1309, "step": 2419 }, { "epoch": 4.602948169281978, "grad_norm": 0.38199952244758606, "learning_rate": 8.466179739599873e-05, "loss": 0.1229, "step": 2420 }, { "epoch": 4.604850213980028, "grad_norm": 0.44846484065055847, "learning_rate": 8.465544617338839e-05, "loss": 0.1254, "step": 2421 }, { "epoch": 4.606752258678079, "grad_norm": 0.29512494802474976, "learning_rate": 8.464909495077803e-05, "loss": 0.0874, "step": 2422 }, { "epoch": 4.608654303376129, "grad_norm": 0.34601306915283203, "learning_rate": 8.464274372816767e-05, "loss": 0.0928, "step": 2423 }, { "epoch": 4.61055634807418, "grad_norm": 0.4081529378890991, "learning_rate": 8.463639250555733e-05, "loss": 0.1161, "step": 2424 }, { "epoch": 4.61245839277223, "grad_norm": 0.39208075404167175, "learning_rate": 8.463004128294697e-05, "loss": 0.1124, "step": 2425 }, { "epoch": 4.614360437470281, "grad_norm": 0.2740732431411743, "learning_rate": 8.462369006033661e-05, "loss": 0.0698, "step": 2426 }, { "epoch": 4.616262482168331, "grad_norm": 0.37493231892585754, "learning_rate": 8.461733883772626e-05, "loss": 0.089, "step": 2427 }, { "epoch": 4.618164526866382, "grad_norm": 0.4912300407886505, "learning_rate": 8.461098761511591e-05, "loss": 0.1374, "step": 2428 }, { "epoch": 4.620066571564432, "grad_norm": 0.44587963819503784, "learning_rate": 8.460463639250557e-05, "loss": 0.1207, "step": 2429 }, { "epoch": 4.621968616262482, "grad_norm": 0.4140859544277191, "learning_rate": 8.45982851698952e-05, "loss": 0.1333, "step": 2430 }, { "epoch": 4.623870660960533, "grad_norm": 0.3500138223171234, "learning_rate": 8.459193394728486e-05, "loss": 0.1032, "step": 2431 }, { "epoch": 4.625772705658583, "grad_norm": 0.3875083327293396, "learning_rate": 8.458558272467451e-05, "loss": 0.1018, "step": 2432 }, { "epoch": 4.627674750356634, "grad_norm": 0.5065046548843384, "learning_rate": 8.457923150206415e-05, "loss": 0.125, "step": 2433 }, { "epoch": 4.629576795054684, "grad_norm": 0.2707502841949463, "learning_rate": 8.45728802794538e-05, "loss": 0.1002, "step": 2434 }, { "epoch": 4.6314788397527344, "grad_norm": 0.38502418994903564, "learning_rate": 8.456652905684345e-05, "loss": 0.1264, "step": 2435 }, { "epoch": 4.633380884450784, "grad_norm": 0.34822702407836914, "learning_rate": 8.456017783423309e-05, "loss": 0.1184, "step": 2436 }, { "epoch": 4.635282929148835, "grad_norm": 0.33620592951774597, "learning_rate": 8.455382661162274e-05, "loss": 0.1264, "step": 2437 }, { "epoch": 4.637184973846885, "grad_norm": 0.3064115345478058, "learning_rate": 8.454747538901239e-05, "loss": 0.1122, "step": 2438 }, { "epoch": 4.639087018544936, "grad_norm": 0.34428808093070984, "learning_rate": 8.454112416640204e-05, "loss": 0.1083, "step": 2439 }, { "epoch": 4.640989063242986, "grad_norm": 0.3312735855579376, "learning_rate": 8.453477294379168e-05, "loss": 0.1046, "step": 2440 }, { "epoch": 4.642891107941036, "grad_norm": 0.42405757308006287, "learning_rate": 8.452842172118133e-05, "loss": 0.1364, "step": 2441 }, { "epoch": 4.644793152639087, "grad_norm": 0.39682331681251526, "learning_rate": 8.452207049857099e-05, "loss": 0.1262, "step": 2442 }, { "epoch": 4.646695197337137, "grad_norm": 0.3447044789791107, "learning_rate": 8.451571927596062e-05, "loss": 0.1158, "step": 2443 }, { "epoch": 4.648597242035188, "grad_norm": 0.40121355652809143, "learning_rate": 8.450936805335026e-05, "loss": 0.1246, "step": 2444 }, { "epoch": 4.650499286733238, "grad_norm": 0.3898472785949707, "learning_rate": 8.450301683073993e-05, "loss": 0.1244, "step": 2445 }, { "epoch": 4.652401331431289, "grad_norm": 0.2964152991771698, "learning_rate": 8.449666560812957e-05, "loss": 0.0925, "step": 2446 }, { "epoch": 4.654303376129339, "grad_norm": 0.2836705446243286, "learning_rate": 8.449031438551922e-05, "loss": 0.101, "step": 2447 }, { "epoch": 4.65620542082739, "grad_norm": 0.3003692030906677, "learning_rate": 8.448396316290887e-05, "loss": 0.0922, "step": 2448 }, { "epoch": 4.65810746552544, "grad_norm": 0.5348609089851379, "learning_rate": 8.447761194029851e-05, "loss": 0.1735, "step": 2449 }, { "epoch": 4.660009510223491, "grad_norm": 0.3387379050254822, "learning_rate": 8.447126071768816e-05, "loss": 0.1126, "step": 2450 }, { "epoch": 4.661911554921541, "grad_norm": 0.30646830797195435, "learning_rate": 8.44649094950778e-05, "loss": 0.085, "step": 2451 }, { "epoch": 4.663813599619591, "grad_norm": 0.34434470534324646, "learning_rate": 8.445855827246746e-05, "loss": 0.1113, "step": 2452 }, { "epoch": 4.6657156443176415, "grad_norm": 0.38273414969444275, "learning_rate": 8.44522070498571e-05, "loss": 0.1135, "step": 2453 }, { "epoch": 4.6676176890156915, "grad_norm": 0.44843336939811707, "learning_rate": 8.444585582724674e-05, "loss": 0.1497, "step": 2454 }, { "epoch": 4.669519733713742, "grad_norm": 0.4575416147708893, "learning_rate": 8.44395046046364e-05, "loss": 0.1082, "step": 2455 }, { "epoch": 4.671421778411792, "grad_norm": 0.38473185896873474, "learning_rate": 8.443315338202604e-05, "loss": 0.1255, "step": 2456 }, { "epoch": 4.673323823109843, "grad_norm": 0.3839578926563263, "learning_rate": 8.44268021594157e-05, "loss": 0.1106, "step": 2457 }, { "epoch": 4.675225867807893, "grad_norm": 0.35472893714904785, "learning_rate": 8.442045093680533e-05, "loss": 0.1122, "step": 2458 }, { "epoch": 4.677127912505944, "grad_norm": 0.34224382042884827, "learning_rate": 8.441409971419499e-05, "loss": 0.0963, "step": 2459 }, { "epoch": 4.679029957203994, "grad_norm": 0.3992440104484558, "learning_rate": 8.440774849158464e-05, "loss": 0.1234, "step": 2460 }, { "epoch": 4.680932001902045, "grad_norm": 0.39441943168640137, "learning_rate": 8.440139726897428e-05, "loss": 0.11, "step": 2461 }, { "epoch": 4.682834046600095, "grad_norm": 0.43852171301841736, "learning_rate": 8.439504604636393e-05, "loss": 0.1361, "step": 2462 }, { "epoch": 4.684736091298145, "grad_norm": 0.35047483444213867, "learning_rate": 8.438869482375358e-05, "loss": 0.0981, "step": 2463 }, { "epoch": 4.686638135996196, "grad_norm": 0.3970755934715271, "learning_rate": 8.438234360114322e-05, "loss": 0.1196, "step": 2464 }, { "epoch": 4.688540180694246, "grad_norm": 0.2760510742664337, "learning_rate": 8.437599237853287e-05, "loss": 0.1035, "step": 2465 }, { "epoch": 4.690442225392297, "grad_norm": 0.26530909538269043, "learning_rate": 8.436964115592252e-05, "loss": 0.1589, "step": 2466 }, { "epoch": 4.692344270090347, "grad_norm": 0.2989928126335144, "learning_rate": 8.436328993331216e-05, "loss": 0.0945, "step": 2467 }, { "epoch": 4.694246314788398, "grad_norm": 0.42447128891944885, "learning_rate": 8.435693871070181e-05, "loss": 0.1433, "step": 2468 }, { "epoch": 4.696148359486448, "grad_norm": 0.4014334976673126, "learning_rate": 8.435058748809146e-05, "loss": 0.1242, "step": 2469 }, { "epoch": 4.698050404184499, "grad_norm": 0.3872852921485901, "learning_rate": 8.434423626548111e-05, "loss": 0.1195, "step": 2470 }, { "epoch": 4.699952448882549, "grad_norm": 0.3857705891132355, "learning_rate": 8.433788504287075e-05, "loss": 0.108, "step": 2471 }, { "epoch": 4.7018544935805995, "grad_norm": 0.3534420430660248, "learning_rate": 8.43315338202604e-05, "loss": 0.1218, "step": 2472 }, { "epoch": 4.7037565382786495, "grad_norm": 0.32009604573249817, "learning_rate": 8.432518259765006e-05, "loss": 0.1053, "step": 2473 }, { "epoch": 4.7056585829766995, "grad_norm": 0.2501387894153595, "learning_rate": 8.43188313750397e-05, "loss": 0.0668, "step": 2474 }, { "epoch": 4.70756062767475, "grad_norm": 0.3360025882720947, "learning_rate": 8.431248015242935e-05, "loss": 0.1119, "step": 2475 }, { "epoch": 4.709462672372801, "grad_norm": 0.31509891152381897, "learning_rate": 8.4306128929819e-05, "loss": 0.0955, "step": 2476 }, { "epoch": 4.711364717070851, "grad_norm": 0.42007285356521606, "learning_rate": 8.429977770720864e-05, "loss": 0.1441, "step": 2477 }, { "epoch": 4.713266761768901, "grad_norm": 0.39764338731765747, "learning_rate": 8.429342648459829e-05, "loss": 0.1175, "step": 2478 }, { "epoch": 4.715168806466952, "grad_norm": 0.33381861448287964, "learning_rate": 8.428707526198794e-05, "loss": 0.1199, "step": 2479 }, { "epoch": 4.717070851165002, "grad_norm": 0.2918257415294647, "learning_rate": 8.428072403937758e-05, "loss": 0.0796, "step": 2480 }, { "epoch": 4.718972895863053, "grad_norm": 0.42560750246047974, "learning_rate": 8.427437281676723e-05, "loss": 0.114, "step": 2481 }, { "epoch": 4.720874940561103, "grad_norm": 0.3700113594532013, "learning_rate": 8.426802159415688e-05, "loss": 0.1145, "step": 2482 }, { "epoch": 4.722776985259154, "grad_norm": 0.39171457290649414, "learning_rate": 8.426167037154653e-05, "loss": 0.128, "step": 2483 }, { "epoch": 4.724679029957204, "grad_norm": 0.3000270426273346, "learning_rate": 8.425531914893617e-05, "loss": 0.0932, "step": 2484 }, { "epoch": 4.726581074655254, "grad_norm": 0.2848623991012573, "learning_rate": 8.424896792632581e-05, "loss": 0.086, "step": 2485 }, { "epoch": 4.728483119353305, "grad_norm": 0.3404539227485657, "learning_rate": 8.424261670371548e-05, "loss": 0.0934, "step": 2486 }, { "epoch": 4.730385164051356, "grad_norm": 0.31609418988227844, "learning_rate": 8.423626548110511e-05, "loss": 0.0985, "step": 2487 }, { "epoch": 4.732287208749406, "grad_norm": 0.34037312865257263, "learning_rate": 8.422991425849477e-05, "loss": 0.1193, "step": 2488 }, { "epoch": 4.734189253447456, "grad_norm": 0.31899651885032654, "learning_rate": 8.422356303588442e-05, "loss": 0.1137, "step": 2489 }, { "epoch": 4.736091298145507, "grad_norm": 0.39307737350463867, "learning_rate": 8.421721181327406e-05, "loss": 0.1452, "step": 2490 }, { "epoch": 4.7379933428435566, "grad_norm": 0.26885175704956055, "learning_rate": 8.421086059066371e-05, "loss": 0.1025, "step": 2491 }, { "epoch": 4.739895387541607, "grad_norm": 0.23492799699306488, "learning_rate": 8.420450936805335e-05, "loss": 0.0821, "step": 2492 }, { "epoch": 4.741797432239657, "grad_norm": 0.30144715309143066, "learning_rate": 8.419815814544301e-05, "loss": 0.0924, "step": 2493 }, { "epoch": 4.743699476937708, "grad_norm": 0.3370392322540283, "learning_rate": 8.419180692283265e-05, "loss": 0.1281, "step": 2494 }, { "epoch": 4.745601521635758, "grad_norm": 0.3939819633960724, "learning_rate": 8.418545570022229e-05, "loss": 0.1115, "step": 2495 }, { "epoch": 4.747503566333809, "grad_norm": 0.7242825627326965, "learning_rate": 8.417910447761194e-05, "loss": 0.1038, "step": 2496 }, { "epoch": 4.749405611031859, "grad_norm": 0.3430320620536804, "learning_rate": 8.417275325500159e-05, "loss": 0.107, "step": 2497 }, { "epoch": 4.75130765572991, "grad_norm": 0.37956321239471436, "learning_rate": 8.416640203239123e-05, "loss": 0.1203, "step": 2498 }, { "epoch": 4.75320970042796, "grad_norm": 0.3118121027946472, "learning_rate": 8.416005080978088e-05, "loss": 0.0961, "step": 2499 }, { "epoch": 4.75511174512601, "grad_norm": 0.3842122554779053, "learning_rate": 8.415369958717053e-05, "loss": 0.1095, "step": 2500 }, { "epoch": 4.757013789824061, "grad_norm": 0.36103618144989014, "learning_rate": 8.414734836456019e-05, "loss": 0.107, "step": 2501 }, { "epoch": 4.758915834522111, "grad_norm": 0.4404369592666626, "learning_rate": 8.414099714194982e-05, "loss": 0.0972, "step": 2502 }, { "epoch": 4.760817879220162, "grad_norm": 0.45303696393966675, "learning_rate": 8.413464591933948e-05, "loss": 0.1286, "step": 2503 }, { "epoch": 4.762719923918212, "grad_norm": 0.36196044087409973, "learning_rate": 8.412829469672913e-05, "loss": 0.1095, "step": 2504 }, { "epoch": 4.764621968616263, "grad_norm": 0.49001795053482056, "learning_rate": 8.412194347411877e-05, "loss": 0.1578, "step": 2505 }, { "epoch": 4.766524013314313, "grad_norm": 0.32446369528770447, "learning_rate": 8.411559225150842e-05, "loss": 0.0991, "step": 2506 }, { "epoch": 4.768426058012364, "grad_norm": 0.3021388053894043, "learning_rate": 8.410924102889807e-05, "loss": 0.0902, "step": 2507 }, { "epoch": 4.770328102710414, "grad_norm": 0.28912147879600525, "learning_rate": 8.410288980628771e-05, "loss": 0.106, "step": 2508 }, { "epoch": 4.7722301474084645, "grad_norm": 0.40766748785972595, "learning_rate": 8.409653858367736e-05, "loss": 0.1155, "step": 2509 }, { "epoch": 4.7741321921065145, "grad_norm": 0.5005617737770081, "learning_rate": 8.409018736106701e-05, "loss": 0.1674, "step": 2510 }, { "epoch": 4.7760342368045645, "grad_norm": 0.4575154781341553, "learning_rate": 8.408383613845666e-05, "loss": 0.1639, "step": 2511 }, { "epoch": 4.777936281502615, "grad_norm": 0.4962354302406311, "learning_rate": 8.40774849158463e-05, "loss": 0.1336, "step": 2512 }, { "epoch": 4.779838326200665, "grad_norm": 0.4569809138774872, "learning_rate": 8.407113369323595e-05, "loss": 0.1323, "step": 2513 }, { "epoch": 4.781740370898716, "grad_norm": 0.34369999170303345, "learning_rate": 8.40647824706256e-05, "loss": 0.1171, "step": 2514 }, { "epoch": 4.783642415596766, "grad_norm": 0.3565669655799866, "learning_rate": 8.405843124801524e-05, "loss": 0.1159, "step": 2515 }, { "epoch": 4.785544460294817, "grad_norm": 0.24039465188980103, "learning_rate": 8.405208002540488e-05, "loss": 0.0976, "step": 2516 }, { "epoch": 4.787446504992867, "grad_norm": 0.37532779574394226, "learning_rate": 8.404572880279455e-05, "loss": 0.1129, "step": 2517 }, { "epoch": 4.789348549690918, "grad_norm": 0.334505170583725, "learning_rate": 8.403937758018419e-05, "loss": 0.1016, "step": 2518 }, { "epoch": 4.791250594388968, "grad_norm": 0.43082761764526367, "learning_rate": 8.403302635757384e-05, "loss": 0.1307, "step": 2519 }, { "epoch": 4.793152639087019, "grad_norm": 0.4381292760372162, "learning_rate": 8.402667513496349e-05, "loss": 0.1137, "step": 2520 }, { "epoch": 4.795054683785069, "grad_norm": 0.4337981045246124, "learning_rate": 8.402032391235313e-05, "loss": 0.1281, "step": 2521 }, { "epoch": 4.796956728483119, "grad_norm": 0.4429587721824646, "learning_rate": 8.401397268974278e-05, "loss": 0.1191, "step": 2522 }, { "epoch": 4.79885877318117, "grad_norm": 0.4298746883869171, "learning_rate": 8.400762146713242e-05, "loss": 0.1367, "step": 2523 }, { "epoch": 4.80076081787922, "grad_norm": 0.42826715111732483, "learning_rate": 8.400127024452208e-05, "loss": 0.1222, "step": 2524 }, { "epoch": 4.802662862577271, "grad_norm": 0.37338751554489136, "learning_rate": 8.399491902191172e-05, "loss": 0.1048, "step": 2525 }, { "epoch": 4.804564907275321, "grad_norm": 0.38671061396598816, "learning_rate": 8.398856779930136e-05, "loss": 0.1154, "step": 2526 }, { "epoch": 4.806466951973372, "grad_norm": 0.3544102907180786, "learning_rate": 8.398221657669103e-05, "loss": 0.1055, "step": 2527 }, { "epoch": 4.808368996671422, "grad_norm": 0.38023364543914795, "learning_rate": 8.397586535408066e-05, "loss": 0.1117, "step": 2528 }, { "epoch": 4.8102710413694725, "grad_norm": 0.3622092008590698, "learning_rate": 8.396951413147032e-05, "loss": 0.1099, "step": 2529 }, { "epoch": 4.8121730860675225, "grad_norm": 0.692039966583252, "learning_rate": 8.396316290885995e-05, "loss": 0.1335, "step": 2530 }, { "epoch": 4.814075130765573, "grad_norm": 0.35321712493896484, "learning_rate": 8.39568116862496e-05, "loss": 0.1175, "step": 2531 }, { "epoch": 4.815977175463623, "grad_norm": 0.37036386132240295, "learning_rate": 8.395046046363926e-05, "loss": 0.1253, "step": 2532 }, { "epoch": 4.817879220161673, "grad_norm": 0.42249128222465515, "learning_rate": 8.39441092410289e-05, "loss": 0.1163, "step": 2533 }, { "epoch": 4.819781264859724, "grad_norm": 0.3563583195209503, "learning_rate": 8.393775801841855e-05, "loss": 0.1597, "step": 2534 }, { "epoch": 4.821683309557774, "grad_norm": 0.39946305751800537, "learning_rate": 8.39314067958082e-05, "loss": 0.1156, "step": 2535 }, { "epoch": 4.823585354255825, "grad_norm": 0.31761807203292847, "learning_rate": 8.392505557319784e-05, "loss": 0.0946, "step": 2536 }, { "epoch": 4.825487398953875, "grad_norm": 0.4180295765399933, "learning_rate": 8.391870435058749e-05, "loss": 0.1271, "step": 2537 }, { "epoch": 4.827389443651926, "grad_norm": 0.36158043146133423, "learning_rate": 8.391235312797714e-05, "loss": 0.106, "step": 2538 }, { "epoch": 4.829291488349976, "grad_norm": 0.4044169783592224, "learning_rate": 8.390600190536678e-05, "loss": 0.1094, "step": 2539 }, { "epoch": 4.831193533048027, "grad_norm": 0.3362937569618225, "learning_rate": 8.389965068275643e-05, "loss": 0.078, "step": 2540 }, { "epoch": 4.833095577746077, "grad_norm": 0.3558341860771179, "learning_rate": 8.389329946014608e-05, "loss": 0.1125, "step": 2541 }, { "epoch": 4.834997622444128, "grad_norm": 0.44893354177474976, "learning_rate": 8.388694823753574e-05, "loss": 0.1393, "step": 2542 }, { "epoch": 4.836899667142178, "grad_norm": 0.3790888488292694, "learning_rate": 8.388059701492537e-05, "loss": 0.1312, "step": 2543 }, { "epoch": 4.838801711840228, "grad_norm": 0.24070213735103607, "learning_rate": 8.387424579231503e-05, "loss": 0.0772, "step": 2544 }, { "epoch": 4.840703756538279, "grad_norm": 0.4367123246192932, "learning_rate": 8.386789456970468e-05, "loss": 0.1227, "step": 2545 }, { "epoch": 4.842605801236329, "grad_norm": 0.3168450891971588, "learning_rate": 8.386154334709432e-05, "loss": 0.0928, "step": 2546 }, { "epoch": 4.8445078459343796, "grad_norm": 0.36236846446990967, "learning_rate": 8.385519212448397e-05, "loss": 0.0997, "step": 2547 }, { "epoch": 4.8464098906324296, "grad_norm": 0.31763169169425964, "learning_rate": 8.384884090187362e-05, "loss": 0.1093, "step": 2548 }, { "epoch": 4.84831193533048, "grad_norm": 0.3502260148525238, "learning_rate": 8.384248967926326e-05, "loss": 0.1299, "step": 2549 }, { "epoch": 4.85021398002853, "grad_norm": 0.3593395948410034, "learning_rate": 8.383613845665291e-05, "loss": 0.1066, "step": 2550 }, { "epoch": 4.852116024726581, "grad_norm": 0.39665883779525757, "learning_rate": 8.382978723404256e-05, "loss": 0.1267, "step": 2551 }, { "epoch": 4.854018069424631, "grad_norm": 0.4395765960216522, "learning_rate": 8.38234360114322e-05, "loss": 0.174, "step": 2552 }, { "epoch": 4.855920114122682, "grad_norm": 0.3507075607776642, "learning_rate": 8.381708478882185e-05, "loss": 0.0953, "step": 2553 }, { "epoch": 4.857822158820732, "grad_norm": 0.3769589364528656, "learning_rate": 8.381073356621149e-05, "loss": 0.1395, "step": 2554 }, { "epoch": 4.859724203518782, "grad_norm": 0.30503159761428833, "learning_rate": 8.380438234360116e-05, "loss": 0.0937, "step": 2555 }, { "epoch": 4.861626248216833, "grad_norm": 0.39943060278892517, "learning_rate": 8.37980311209908e-05, "loss": 0.1103, "step": 2556 }, { "epoch": 4.863528292914884, "grad_norm": 0.36200422048568726, "learning_rate": 8.379167989838043e-05, "loss": 0.1135, "step": 2557 }, { "epoch": 4.865430337612934, "grad_norm": 0.3811735510826111, "learning_rate": 8.37853286757701e-05, "loss": 0.1265, "step": 2558 }, { "epoch": 4.867332382310984, "grad_norm": 0.42090871930122375, "learning_rate": 8.377897745315974e-05, "loss": 0.1339, "step": 2559 }, { "epoch": 4.869234427009035, "grad_norm": 0.41796380281448364, "learning_rate": 8.377262623054939e-05, "loss": 0.1136, "step": 2560 }, { "epoch": 4.871136471707085, "grad_norm": 0.33189094066619873, "learning_rate": 8.376627500793903e-05, "loss": 0.0923, "step": 2561 }, { "epoch": 4.873038516405136, "grad_norm": 0.46369072794914246, "learning_rate": 8.375992378532868e-05, "loss": 0.1236, "step": 2562 }, { "epoch": 4.874940561103186, "grad_norm": 0.27973759174346924, "learning_rate": 8.375357256271833e-05, "loss": 0.0933, "step": 2563 }, { "epoch": 4.876842605801237, "grad_norm": 0.39309409260749817, "learning_rate": 8.374722134010797e-05, "loss": 0.1135, "step": 2564 }, { "epoch": 4.878744650499287, "grad_norm": 0.43652641773223877, "learning_rate": 8.374087011749763e-05, "loss": 0.136, "step": 2565 }, { "epoch": 4.8806466951973375, "grad_norm": 0.30485180020332336, "learning_rate": 8.373451889488727e-05, "loss": 0.0894, "step": 2566 }, { "epoch": 4.8825487398953875, "grad_norm": 0.40164196491241455, "learning_rate": 8.372816767227691e-05, "loss": 0.1235, "step": 2567 }, { "epoch": 4.884450784593438, "grad_norm": 0.3442533314228058, "learning_rate": 8.372181644966656e-05, "loss": 0.1222, "step": 2568 }, { "epoch": 4.886352829291488, "grad_norm": 0.38092851638793945, "learning_rate": 8.371546522705621e-05, "loss": 0.1135, "step": 2569 }, { "epoch": 4.888254873989538, "grad_norm": 0.37114188075065613, "learning_rate": 8.370911400444585e-05, "loss": 0.1181, "step": 2570 }, { "epoch": 4.890156918687589, "grad_norm": 0.35971492528915405, "learning_rate": 8.37027627818355e-05, "loss": 0.1247, "step": 2571 }, { "epoch": 4.892058963385639, "grad_norm": 0.25756967067718506, "learning_rate": 8.369641155922516e-05, "loss": 0.0929, "step": 2572 }, { "epoch": 4.89396100808369, "grad_norm": 0.4541129171848297, "learning_rate": 8.369006033661481e-05, "loss": 0.142, "step": 2573 }, { "epoch": 4.89586305278174, "grad_norm": 0.48526903986930847, "learning_rate": 8.368370911400445e-05, "loss": 0.1612, "step": 2574 }, { "epoch": 4.897765097479791, "grad_norm": 0.31703343987464905, "learning_rate": 8.36773578913941e-05, "loss": 0.1135, "step": 2575 }, { "epoch": 4.899667142177841, "grad_norm": 0.2969724237918854, "learning_rate": 8.367100666878375e-05, "loss": 0.1148, "step": 2576 }, { "epoch": 4.901569186875892, "grad_norm": 0.37165188789367676, "learning_rate": 8.366465544617339e-05, "loss": 0.1066, "step": 2577 }, { "epoch": 4.903471231573942, "grad_norm": 0.2899304926395416, "learning_rate": 8.365830422356304e-05, "loss": 0.0896, "step": 2578 }, { "epoch": 4.905373276271993, "grad_norm": 0.3420521914958954, "learning_rate": 8.365195300095269e-05, "loss": 0.0929, "step": 2579 }, { "epoch": 4.907275320970043, "grad_norm": 0.48174387216567993, "learning_rate": 8.364560177834233e-05, "loss": 0.1422, "step": 2580 }, { "epoch": 4.909177365668093, "grad_norm": 0.3492242693901062, "learning_rate": 8.363925055573198e-05, "loss": 0.1116, "step": 2581 }, { "epoch": 4.911079410366144, "grad_norm": 0.367914080619812, "learning_rate": 8.363289933312163e-05, "loss": 0.1139, "step": 2582 }, { "epoch": 4.912981455064194, "grad_norm": 0.32939612865448, "learning_rate": 8.362654811051129e-05, "loss": 0.1175, "step": 2583 }, { "epoch": 4.914883499762245, "grad_norm": 0.3939587473869324, "learning_rate": 8.362019688790092e-05, "loss": 0.1263, "step": 2584 }, { "epoch": 4.916785544460295, "grad_norm": 0.36641520261764526, "learning_rate": 8.361384566529058e-05, "loss": 0.1219, "step": 2585 }, { "epoch": 4.9186875891583455, "grad_norm": 0.2804834544658661, "learning_rate": 8.360749444268023e-05, "loss": 0.0839, "step": 2586 }, { "epoch": 4.9205896338563955, "grad_norm": 0.310461163520813, "learning_rate": 8.360114322006987e-05, "loss": 0.0949, "step": 2587 }, { "epoch": 4.922491678554446, "grad_norm": 0.34361201524734497, "learning_rate": 8.35947919974595e-05, "loss": 0.1167, "step": 2588 }, { "epoch": 4.924393723252496, "grad_norm": 0.3348811864852905, "learning_rate": 8.358844077484917e-05, "loss": 0.1035, "step": 2589 }, { "epoch": 4.926295767950547, "grad_norm": 0.24014593660831451, "learning_rate": 8.358208955223881e-05, "loss": 0.1413, "step": 2590 }, { "epoch": 4.928197812648597, "grad_norm": 0.4338441491127014, "learning_rate": 8.357573832962846e-05, "loss": 0.1186, "step": 2591 }, { "epoch": 4.930099857346647, "grad_norm": 0.3601210415363312, "learning_rate": 8.356938710701811e-05, "loss": 0.1014, "step": 2592 }, { "epoch": 4.932001902044698, "grad_norm": 0.2996499538421631, "learning_rate": 8.356303588440775e-05, "loss": 0.0906, "step": 2593 }, { "epoch": 4.933903946742748, "grad_norm": 0.30851230025291443, "learning_rate": 8.35566846617974e-05, "loss": 0.0806, "step": 2594 }, { "epoch": 4.935805991440799, "grad_norm": 0.22290165722370148, "learning_rate": 8.355033343918704e-05, "loss": 0.0728, "step": 2595 }, { "epoch": 4.937708036138849, "grad_norm": 0.28518247604370117, "learning_rate": 8.35439822165767e-05, "loss": 0.0894, "step": 2596 }, { "epoch": 4.9396100808369, "grad_norm": 0.424231618642807, "learning_rate": 8.353763099396634e-05, "loss": 0.1157, "step": 2597 }, { "epoch": 4.94151212553495, "grad_norm": 0.5748564600944519, "learning_rate": 8.353127977135598e-05, "loss": 0.1777, "step": 2598 }, { "epoch": 4.943414170233001, "grad_norm": 0.39010798931121826, "learning_rate": 8.352492854874565e-05, "loss": 0.104, "step": 2599 }, { "epoch": 4.945316214931051, "grad_norm": 0.40491625666618347, "learning_rate": 8.351857732613529e-05, "loss": 0.115, "step": 2600 }, { "epoch": 4.947218259629102, "grad_norm": 0.3881874084472656, "learning_rate": 8.351222610352494e-05, "loss": 0.1125, "step": 2601 }, { "epoch": 4.949120304327152, "grad_norm": 0.4075947403907776, "learning_rate": 8.350587488091458e-05, "loss": 0.1376, "step": 2602 }, { "epoch": 4.951022349025202, "grad_norm": 0.4263762831687927, "learning_rate": 8.349952365830423e-05, "loss": 0.1214, "step": 2603 }, { "epoch": 4.9529243937232525, "grad_norm": 0.4403824806213379, "learning_rate": 8.349317243569388e-05, "loss": 0.1212, "step": 2604 }, { "epoch": 4.9548264384213025, "grad_norm": 0.41958004236221313, "learning_rate": 8.348682121308352e-05, "loss": 0.1197, "step": 2605 }, { "epoch": 4.956728483119353, "grad_norm": 0.3664645850658417, "learning_rate": 8.348046999047317e-05, "loss": 0.1208, "step": 2606 }, { "epoch": 4.958630527817403, "grad_norm": 0.3618158996105194, "learning_rate": 8.347411876786282e-05, "loss": 0.1241, "step": 2607 }, { "epoch": 4.960532572515454, "grad_norm": 0.3135223686695099, "learning_rate": 8.346776754525246e-05, "loss": 0.0807, "step": 2608 }, { "epoch": 4.962434617213504, "grad_norm": 0.3673211932182312, "learning_rate": 8.346141632264211e-05, "loss": 0.1188, "step": 2609 }, { "epoch": 4.964336661911555, "grad_norm": 0.34168919920921326, "learning_rate": 8.345506510003176e-05, "loss": 0.1113, "step": 2610 }, { "epoch": 4.966238706609605, "grad_norm": 0.3807981312274933, "learning_rate": 8.34487138774214e-05, "loss": 0.1243, "step": 2611 }, { "epoch": 4.968140751307656, "grad_norm": 0.35833629965782166, "learning_rate": 8.344236265481105e-05, "loss": 0.1175, "step": 2612 }, { "epoch": 4.970042796005706, "grad_norm": 0.4410795569419861, "learning_rate": 8.34360114322007e-05, "loss": 0.1174, "step": 2613 }, { "epoch": 4.971944840703756, "grad_norm": 0.27122291922569275, "learning_rate": 8.342966020959036e-05, "loss": 0.1062, "step": 2614 }, { "epoch": 4.973846885401807, "grad_norm": 0.3411978483200073, "learning_rate": 8.342330898698e-05, "loss": 0.1274, "step": 2615 }, { "epoch": 4.975748930099857, "grad_norm": 0.36536306142807007, "learning_rate": 8.341695776436965e-05, "loss": 0.1182, "step": 2616 }, { "epoch": 4.977650974797908, "grad_norm": 0.3873109221458435, "learning_rate": 8.34106065417593e-05, "loss": 0.1043, "step": 2617 }, { "epoch": 4.979553019495958, "grad_norm": 0.30192115902900696, "learning_rate": 8.340425531914894e-05, "loss": 0.0984, "step": 2618 }, { "epoch": 4.981455064194009, "grad_norm": 0.37886565923690796, "learning_rate": 8.339790409653859e-05, "loss": 0.1161, "step": 2619 }, { "epoch": 4.983357108892059, "grad_norm": 0.34957846999168396, "learning_rate": 8.339155287392824e-05, "loss": 0.1083, "step": 2620 }, { "epoch": 4.98525915359011, "grad_norm": 0.3169527053833008, "learning_rate": 8.338520165131788e-05, "loss": 0.088, "step": 2621 }, { "epoch": 4.98716119828816, "grad_norm": 0.41983914375305176, "learning_rate": 8.337885042870753e-05, "loss": 0.1158, "step": 2622 }, { "epoch": 4.9890632429862105, "grad_norm": 0.3467552661895752, "learning_rate": 8.337249920609718e-05, "loss": 0.0958, "step": 2623 }, { "epoch": 4.9909652876842605, "grad_norm": 0.3872130513191223, "learning_rate": 8.336614798348682e-05, "loss": 0.1012, "step": 2624 }, { "epoch": 4.9928673323823105, "grad_norm": 0.2966238856315613, "learning_rate": 8.335979676087647e-05, "loss": 0.0913, "step": 2625 }, { "epoch": 4.994769377080361, "grad_norm": 0.4195917248725891, "learning_rate": 8.335344553826611e-05, "loss": 0.1124, "step": 2626 }, { "epoch": 4.996671421778412, "grad_norm": 0.39411017298698425, "learning_rate": 8.334709431565578e-05, "loss": 0.1286, "step": 2627 }, { "epoch": 4.998573466476462, "grad_norm": 0.3783339262008667, "learning_rate": 8.334074309304541e-05, "loss": 0.1084, "step": 2628 }, { "epoch": 5.000475511174512, "grad_norm": 0.4462081789970398, "learning_rate": 8.333439187043505e-05, "loss": 0.118, "step": 2629 }, { "epoch": 5.002377555872563, "grad_norm": 0.26820147037506104, "learning_rate": 8.332804064782472e-05, "loss": 0.1318, "step": 2630 }, { "epoch": 5.004279600570613, "grad_norm": 0.27382394671440125, "learning_rate": 8.332168942521436e-05, "loss": 0.0847, "step": 2631 }, { "epoch": 5.006181645268664, "grad_norm": 0.2846938371658325, "learning_rate": 8.331533820260401e-05, "loss": 0.0897, "step": 2632 }, { "epoch": 5.008083689966714, "grad_norm": 0.27744022011756897, "learning_rate": 8.330898697999365e-05, "loss": 0.0809, "step": 2633 }, { "epoch": 5.009985734664765, "grad_norm": 0.23713688552379608, "learning_rate": 8.33026357573833e-05, "loss": 0.0768, "step": 2634 }, { "epoch": 5.011887779362815, "grad_norm": 0.4143311381340027, "learning_rate": 8.329628453477295e-05, "loss": 0.0924, "step": 2635 }, { "epoch": 5.013789824060866, "grad_norm": 0.315565288066864, "learning_rate": 8.328993331216259e-05, "loss": 0.082, "step": 2636 }, { "epoch": 5.015691868758916, "grad_norm": 0.3401367664337158, "learning_rate": 8.328358208955225e-05, "loss": 0.1119, "step": 2637 }, { "epoch": 5.017593913456966, "grad_norm": 0.30635321140289307, "learning_rate": 8.327723086694189e-05, "loss": 0.1126, "step": 2638 }, { "epoch": 5.019495958155017, "grad_norm": 0.28435221314430237, "learning_rate": 8.327087964433153e-05, "loss": 0.0782, "step": 2639 }, { "epoch": 5.021398002853067, "grad_norm": 0.36568841338157654, "learning_rate": 8.326452842172118e-05, "loss": 0.1068, "step": 2640 }, { "epoch": 5.023300047551118, "grad_norm": 0.31536993384361267, "learning_rate": 8.325817719911083e-05, "loss": 0.0999, "step": 2641 }, { "epoch": 5.025202092249168, "grad_norm": 0.30222904682159424, "learning_rate": 8.325182597650047e-05, "loss": 0.09, "step": 2642 }, { "epoch": 5.0271041369472185, "grad_norm": 0.27157366275787354, "learning_rate": 8.324547475389012e-05, "loss": 0.0877, "step": 2643 }, { "epoch": 5.0290061816452685, "grad_norm": 0.239767923951149, "learning_rate": 8.323912353127978e-05, "loss": 0.0774, "step": 2644 }, { "epoch": 5.030908226343319, "grad_norm": 0.3949924111366272, "learning_rate": 8.323277230866943e-05, "loss": 0.0971, "step": 2645 }, { "epoch": 5.032810271041369, "grad_norm": 0.384383887052536, "learning_rate": 8.322642108605907e-05, "loss": 0.093, "step": 2646 }, { "epoch": 5.03471231573942, "grad_norm": 0.38407453894615173, "learning_rate": 8.322006986344872e-05, "loss": 0.097, "step": 2647 }, { "epoch": 5.03661436043747, "grad_norm": 0.2176978439092636, "learning_rate": 8.321371864083837e-05, "loss": 0.061, "step": 2648 }, { "epoch": 5.038516405135521, "grad_norm": 0.4389532208442688, "learning_rate": 8.320736741822801e-05, "loss": 0.1065, "step": 2649 }, { "epoch": 5.040418449833571, "grad_norm": 0.2318810671567917, "learning_rate": 8.320101619561766e-05, "loss": 0.0698, "step": 2650 }, { "epoch": 5.042320494531621, "grad_norm": 0.3554401993751526, "learning_rate": 8.319466497300731e-05, "loss": 0.1052, "step": 2651 }, { "epoch": 5.044222539229672, "grad_norm": 0.4346822500228882, "learning_rate": 8.318831375039695e-05, "loss": 0.1108, "step": 2652 }, { "epoch": 5.046124583927722, "grad_norm": 0.34418416023254395, "learning_rate": 8.31819625277866e-05, "loss": 0.1003, "step": 2653 }, { "epoch": 5.048026628625773, "grad_norm": 0.35901421308517456, "learning_rate": 8.317561130517625e-05, "loss": 0.0786, "step": 2654 }, { "epoch": 5.049928673323823, "grad_norm": 0.2725570797920227, "learning_rate": 8.31692600825659e-05, "loss": 0.0901, "step": 2655 }, { "epoch": 5.051830718021874, "grad_norm": 0.3747507333755493, "learning_rate": 8.316290885995554e-05, "loss": 0.0922, "step": 2656 }, { "epoch": 5.053732762719924, "grad_norm": 0.3192732036113739, "learning_rate": 8.315655763734518e-05, "loss": 0.0834, "step": 2657 }, { "epoch": 5.055634807417975, "grad_norm": 0.32220616936683655, "learning_rate": 8.315020641473485e-05, "loss": 0.0976, "step": 2658 }, { "epoch": 5.057536852116025, "grad_norm": 0.29179829359054565, "learning_rate": 8.314385519212449e-05, "loss": 0.0865, "step": 2659 }, { "epoch": 5.0594388968140755, "grad_norm": 0.337753564119339, "learning_rate": 8.313750396951412e-05, "loss": 0.0934, "step": 2660 }, { "epoch": 5.0613409415121255, "grad_norm": 0.2385604828596115, "learning_rate": 8.313115274690379e-05, "loss": 0.0897, "step": 2661 }, { "epoch": 5.0632429862101755, "grad_norm": 0.3223874568939209, "learning_rate": 8.312480152429343e-05, "loss": 0.113, "step": 2662 }, { "epoch": 5.065145030908226, "grad_norm": 0.2641155421733856, "learning_rate": 8.311845030168308e-05, "loss": 0.0784, "step": 2663 }, { "epoch": 5.067047075606276, "grad_norm": 0.3063553273677826, "learning_rate": 8.311209907907272e-05, "loss": 0.0895, "step": 2664 }, { "epoch": 5.068949120304327, "grad_norm": 0.28479015827178955, "learning_rate": 8.310574785646237e-05, "loss": 0.0781, "step": 2665 }, { "epoch": 5.070851165002377, "grad_norm": 0.25443270802497864, "learning_rate": 8.309939663385202e-05, "loss": 0.0816, "step": 2666 }, { "epoch": 5.072753209700428, "grad_norm": 0.3532758951187134, "learning_rate": 8.309304541124166e-05, "loss": 0.1004, "step": 2667 }, { "epoch": 5.074655254398478, "grad_norm": 0.26470455527305603, "learning_rate": 8.308669418863133e-05, "loss": 0.0959, "step": 2668 }, { "epoch": 5.076557299096529, "grad_norm": 0.3608281910419464, "learning_rate": 8.308034296602096e-05, "loss": 0.0925, "step": 2669 }, { "epoch": 5.078459343794579, "grad_norm": 0.223192036151886, "learning_rate": 8.30739917434106e-05, "loss": 0.0652, "step": 2670 }, { "epoch": 5.08036138849263, "grad_norm": 0.36757364869117737, "learning_rate": 8.306764052080025e-05, "loss": 0.0847, "step": 2671 }, { "epoch": 5.08226343319068, "grad_norm": 0.3613258898258209, "learning_rate": 8.30612892981899e-05, "loss": 0.1133, "step": 2672 }, { "epoch": 5.08416547788873, "grad_norm": 0.3451091945171356, "learning_rate": 8.305493807557956e-05, "loss": 0.0851, "step": 2673 }, { "epoch": 5.086067522586781, "grad_norm": 0.39230048656463623, "learning_rate": 8.30485868529692e-05, "loss": 0.1264, "step": 2674 }, { "epoch": 5.087969567284831, "grad_norm": 0.30938684940338135, "learning_rate": 8.304223563035885e-05, "loss": 0.1009, "step": 2675 }, { "epoch": 5.089871611982882, "grad_norm": 0.29073426127433777, "learning_rate": 8.30358844077485e-05, "loss": 0.0884, "step": 2676 }, { "epoch": 5.091773656680932, "grad_norm": 0.32096439599990845, "learning_rate": 8.302953318513814e-05, "loss": 0.0944, "step": 2677 }, { "epoch": 5.093675701378983, "grad_norm": 0.3321041464805603, "learning_rate": 8.302318196252779e-05, "loss": 0.0945, "step": 2678 }, { "epoch": 5.095577746077033, "grad_norm": 0.4814242720603943, "learning_rate": 8.301683073991744e-05, "loss": 0.1222, "step": 2679 }, { "epoch": 5.0974797907750835, "grad_norm": 0.5268080830574036, "learning_rate": 8.301047951730708e-05, "loss": 0.1096, "step": 2680 }, { "epoch": 5.0993818354731335, "grad_norm": 0.29475218057632446, "learning_rate": 8.300412829469673e-05, "loss": 0.0794, "step": 2681 }, { "epoch": 5.101283880171184, "grad_norm": 0.2624375820159912, "learning_rate": 8.299777707208638e-05, "loss": 0.0745, "step": 2682 }, { "epoch": 5.103185924869234, "grad_norm": 0.3305892050266266, "learning_rate": 8.299142584947602e-05, "loss": 0.0931, "step": 2683 }, { "epoch": 5.105087969567284, "grad_norm": 0.3345329165458679, "learning_rate": 8.298507462686567e-05, "loss": 0.0979, "step": 2684 }, { "epoch": 5.106990014265335, "grad_norm": 0.37109294533729553, "learning_rate": 8.297872340425533e-05, "loss": 0.0908, "step": 2685 }, { "epoch": 5.108892058963385, "grad_norm": 0.28300270438194275, "learning_rate": 8.297237218164498e-05, "loss": 0.0817, "step": 2686 }, { "epoch": 5.110794103661436, "grad_norm": 0.29176023602485657, "learning_rate": 8.296602095903462e-05, "loss": 0.0818, "step": 2687 }, { "epoch": 5.112696148359486, "grad_norm": 0.38012629747390747, "learning_rate": 8.295966973642427e-05, "loss": 0.1113, "step": 2688 }, { "epoch": 5.114598193057537, "grad_norm": 0.3183780610561371, "learning_rate": 8.295331851381392e-05, "loss": 0.114, "step": 2689 }, { "epoch": 5.116500237755587, "grad_norm": 0.34422457218170166, "learning_rate": 8.294696729120356e-05, "loss": 0.1029, "step": 2690 }, { "epoch": 5.118402282453638, "grad_norm": 0.29582858085632324, "learning_rate": 8.294061606859321e-05, "loss": 0.0921, "step": 2691 }, { "epoch": 5.120304327151688, "grad_norm": 0.29475873708724976, "learning_rate": 8.293426484598286e-05, "loss": 0.0699, "step": 2692 }, { "epoch": 5.122206371849739, "grad_norm": 0.24619872868061066, "learning_rate": 8.29279136233725e-05, "loss": 0.0729, "step": 2693 }, { "epoch": 5.124108416547789, "grad_norm": 0.2696808874607086, "learning_rate": 8.292156240076215e-05, "loss": 0.0907, "step": 2694 }, { "epoch": 5.12601046124584, "grad_norm": 0.3869519829750061, "learning_rate": 8.29152111781518e-05, "loss": 0.1007, "step": 2695 }, { "epoch": 5.12791250594389, "grad_norm": 0.3821418881416321, "learning_rate": 8.290885995554144e-05, "loss": 0.1093, "step": 2696 }, { "epoch": 5.12981455064194, "grad_norm": 0.5121551752090454, "learning_rate": 8.29025087329311e-05, "loss": 0.1344, "step": 2697 }, { "epoch": 5.131716595339991, "grad_norm": 0.2786102890968323, "learning_rate": 8.289615751032073e-05, "loss": 0.0883, "step": 2698 }, { "epoch": 5.133618640038041, "grad_norm": 0.31683072447776794, "learning_rate": 8.28898062877104e-05, "loss": 0.0901, "step": 2699 }, { "epoch": 5.1355206847360915, "grad_norm": 0.318406879901886, "learning_rate": 8.288345506510004e-05, "loss": 0.0924, "step": 2700 }, { "epoch": 5.1374227294341415, "grad_norm": 0.3161453604698181, "learning_rate": 8.287710384248967e-05, "loss": 0.0863, "step": 2701 }, { "epoch": 5.139324774132192, "grad_norm": 0.4410136342048645, "learning_rate": 8.287075261987934e-05, "loss": 0.1014, "step": 2702 }, { "epoch": 5.141226818830242, "grad_norm": 0.3554822504520416, "learning_rate": 8.286440139726898e-05, "loss": 0.0905, "step": 2703 }, { "epoch": 5.143128863528293, "grad_norm": 0.3166615664958954, "learning_rate": 8.285805017465863e-05, "loss": 0.0814, "step": 2704 }, { "epoch": 5.145030908226343, "grad_norm": 0.314028263092041, "learning_rate": 8.285169895204827e-05, "loss": 0.0819, "step": 2705 }, { "epoch": 5.146932952924394, "grad_norm": 0.3088028132915497, "learning_rate": 8.284534772943792e-05, "loss": 0.1025, "step": 2706 }, { "epoch": 5.148834997622444, "grad_norm": 0.30459243059158325, "learning_rate": 8.283899650682757e-05, "loss": 0.0919, "step": 2707 }, { "epoch": 5.150737042320494, "grad_norm": 0.31617748737335205, "learning_rate": 8.283264528421721e-05, "loss": 0.0828, "step": 2708 }, { "epoch": 5.152639087018545, "grad_norm": 0.38467937707901, "learning_rate": 8.282629406160688e-05, "loss": 0.1875, "step": 2709 }, { "epoch": 5.154541131716595, "grad_norm": 0.31181344389915466, "learning_rate": 8.281994283899651e-05, "loss": 0.0836, "step": 2710 }, { "epoch": 5.156443176414646, "grad_norm": 0.2717238962650299, "learning_rate": 8.281359161638615e-05, "loss": 0.1023, "step": 2711 }, { "epoch": 5.158345221112696, "grad_norm": 0.24646538496017456, "learning_rate": 8.28072403937758e-05, "loss": 0.073, "step": 2712 }, { "epoch": 5.160247265810747, "grad_norm": 0.28425633907318115, "learning_rate": 8.280088917116546e-05, "loss": 0.082, "step": 2713 }, { "epoch": 5.162149310508797, "grad_norm": 0.24602612853050232, "learning_rate": 8.27945379485551e-05, "loss": 0.0748, "step": 2714 }, { "epoch": 5.164051355206848, "grad_norm": 0.3181002736091614, "learning_rate": 8.278818672594475e-05, "loss": 0.0915, "step": 2715 }, { "epoch": 5.165953399904898, "grad_norm": 0.3696095049381256, "learning_rate": 8.27818355033344e-05, "loss": 0.1079, "step": 2716 }, { "epoch": 5.1678554446029485, "grad_norm": 0.3242207467556, "learning_rate": 8.277548428072405e-05, "loss": 0.0963, "step": 2717 }, { "epoch": 5.1697574893009985, "grad_norm": 0.3968127965927124, "learning_rate": 8.276913305811369e-05, "loss": 0.0799, "step": 2718 }, { "epoch": 5.171659533999049, "grad_norm": 0.2539953887462616, "learning_rate": 8.276278183550334e-05, "loss": 0.0678, "step": 2719 }, { "epoch": 5.173561578697099, "grad_norm": 0.34455937147140503, "learning_rate": 8.275643061289299e-05, "loss": 0.1237, "step": 2720 }, { "epoch": 5.175463623395149, "grad_norm": 0.29677414894104004, "learning_rate": 8.275007939028263e-05, "loss": 0.1007, "step": 2721 }, { "epoch": 5.1773656680932, "grad_norm": 0.2782600224018097, "learning_rate": 8.274372816767228e-05, "loss": 0.0945, "step": 2722 }, { "epoch": 5.17926771279125, "grad_norm": 0.3580763339996338, "learning_rate": 8.273737694506193e-05, "loss": 0.0931, "step": 2723 }, { "epoch": 5.181169757489301, "grad_norm": 0.19728906452655792, "learning_rate": 8.273102572245157e-05, "loss": 0.0695, "step": 2724 }, { "epoch": 5.183071802187351, "grad_norm": 0.2223612666130066, "learning_rate": 8.272467449984122e-05, "loss": 0.0631, "step": 2725 }, { "epoch": 5.184973846885402, "grad_norm": 0.2530241906642914, "learning_rate": 8.271832327723088e-05, "loss": 0.068, "step": 2726 }, { "epoch": 5.186875891583452, "grad_norm": 0.605760931968689, "learning_rate": 8.271197205462053e-05, "loss": 0.1698, "step": 2727 }, { "epoch": 5.188777936281503, "grad_norm": 0.39937227964401245, "learning_rate": 8.270562083201017e-05, "loss": 0.1197, "step": 2728 }, { "epoch": 5.190679980979553, "grad_norm": 0.380604088306427, "learning_rate": 8.26992696093998e-05, "loss": 0.1024, "step": 2729 }, { "epoch": 5.192582025677604, "grad_norm": 0.3906427323818207, "learning_rate": 8.269291838678947e-05, "loss": 0.1103, "step": 2730 }, { "epoch": 5.194484070375654, "grad_norm": 0.25769877433776855, "learning_rate": 8.268656716417911e-05, "loss": 0.0716, "step": 2731 }, { "epoch": 5.196386115073704, "grad_norm": 0.27115434408187866, "learning_rate": 8.268021594156875e-05, "loss": 0.0748, "step": 2732 }, { "epoch": 5.198288159771755, "grad_norm": 0.31713467836380005, "learning_rate": 8.267386471895841e-05, "loss": 0.1119, "step": 2733 }, { "epoch": 5.200190204469805, "grad_norm": 0.27098706364631653, "learning_rate": 8.266751349634805e-05, "loss": 0.0916, "step": 2734 }, { "epoch": 5.202092249167856, "grad_norm": 0.26350903511047363, "learning_rate": 8.26611622737377e-05, "loss": 0.1107, "step": 2735 }, { "epoch": 5.203994293865906, "grad_norm": 0.3183554410934448, "learning_rate": 8.265481105112734e-05, "loss": 0.0864, "step": 2736 }, { "epoch": 5.2058963385639565, "grad_norm": 0.33910661935806274, "learning_rate": 8.264845982851699e-05, "loss": 0.0975, "step": 2737 }, { "epoch": 5.2077983832620065, "grad_norm": 0.23509973287582397, "learning_rate": 8.264210860590664e-05, "loss": 0.0637, "step": 2738 }, { "epoch": 5.209700427960057, "grad_norm": 0.31973764300346375, "learning_rate": 8.263575738329628e-05, "loss": 0.0974, "step": 2739 }, { "epoch": 5.211602472658107, "grad_norm": 0.42649564146995544, "learning_rate": 8.262940616068595e-05, "loss": 0.1126, "step": 2740 }, { "epoch": 5.213504517356158, "grad_norm": 0.2706199288368225, "learning_rate": 8.262305493807558e-05, "loss": 0.0855, "step": 2741 }, { "epoch": 5.215406562054208, "grad_norm": 0.4610327184200287, "learning_rate": 8.261670371546522e-05, "loss": 0.1214, "step": 2742 }, { "epoch": 5.217308606752258, "grad_norm": 0.2792705297470093, "learning_rate": 8.261035249285488e-05, "loss": 0.0798, "step": 2743 }, { "epoch": 5.219210651450309, "grad_norm": 0.2638397514820099, "learning_rate": 8.260400127024453e-05, "loss": 0.0837, "step": 2744 }, { "epoch": 5.221112696148359, "grad_norm": 0.2619345188140869, "learning_rate": 8.259765004763418e-05, "loss": 0.0781, "step": 2745 }, { "epoch": 5.22301474084641, "grad_norm": 0.32482966780662537, "learning_rate": 8.259129882502382e-05, "loss": 0.0867, "step": 2746 }, { "epoch": 5.22491678554446, "grad_norm": 0.3487076759338379, "learning_rate": 8.258494760241347e-05, "loss": 0.1038, "step": 2747 }, { "epoch": 5.226818830242511, "grad_norm": 0.37396374344825745, "learning_rate": 8.257859637980312e-05, "loss": 0.1041, "step": 2748 }, { "epoch": 5.228720874940561, "grad_norm": 0.310883492231369, "learning_rate": 8.257224515719276e-05, "loss": 0.0958, "step": 2749 }, { "epoch": 5.230622919638612, "grad_norm": 0.33855023980140686, "learning_rate": 8.256589393458241e-05, "loss": 0.1001, "step": 2750 }, { "epoch": 5.232524964336662, "grad_norm": 0.295162558555603, "learning_rate": 8.255954271197206e-05, "loss": 0.084, "step": 2751 }, { "epoch": 5.234427009034713, "grad_norm": 0.22703345119953156, "learning_rate": 8.25531914893617e-05, "loss": 0.0777, "step": 2752 }, { "epoch": 5.236329053732763, "grad_norm": 0.23589177429676056, "learning_rate": 8.254684026675135e-05, "loss": 0.0781, "step": 2753 }, { "epoch": 5.238231098430813, "grad_norm": 0.2537785768508911, "learning_rate": 8.2540489044141e-05, "loss": 0.1024, "step": 2754 }, { "epoch": 5.240133143128864, "grad_norm": 0.3413544297218323, "learning_rate": 8.253413782153064e-05, "loss": 0.0792, "step": 2755 }, { "epoch": 5.242035187826914, "grad_norm": 0.28834807872772217, "learning_rate": 8.25277865989203e-05, "loss": 0.0969, "step": 2756 }, { "epoch": 5.2439372325249645, "grad_norm": 0.2617645263671875, "learning_rate": 8.252143537630995e-05, "loss": 0.0906, "step": 2757 }, { "epoch": 5.2458392772230145, "grad_norm": 0.283772736787796, "learning_rate": 8.25150841536996e-05, "loss": 0.085, "step": 2758 }, { "epoch": 5.247741321921065, "grad_norm": 0.2892938256263733, "learning_rate": 8.250873293108924e-05, "loss": 0.0971, "step": 2759 }, { "epoch": 5.249643366619115, "grad_norm": 0.30989378690719604, "learning_rate": 8.250238170847889e-05, "loss": 0.0832, "step": 2760 }, { "epoch": 5.251545411317166, "grad_norm": 0.3129521906375885, "learning_rate": 8.249603048586854e-05, "loss": 0.0767, "step": 2761 }, { "epoch": 5.253447456015216, "grad_norm": 0.2838345170021057, "learning_rate": 8.248967926325818e-05, "loss": 0.0864, "step": 2762 }, { "epoch": 5.255349500713267, "grad_norm": 0.3472948968410492, "learning_rate": 8.248332804064783e-05, "loss": 0.0927, "step": 2763 }, { "epoch": 5.257251545411317, "grad_norm": 0.32811862230300903, "learning_rate": 8.247697681803748e-05, "loss": 0.1022, "step": 2764 }, { "epoch": 5.259153590109367, "grad_norm": 0.2726902961730957, "learning_rate": 8.247062559542712e-05, "loss": 0.0989, "step": 2765 }, { "epoch": 5.261055634807418, "grad_norm": 0.26436948776245117, "learning_rate": 8.246427437281677e-05, "loss": 0.087, "step": 2766 }, { "epoch": 5.262957679505468, "grad_norm": 0.25681155920028687, "learning_rate": 8.245792315020641e-05, "loss": 0.0844, "step": 2767 }, { "epoch": 5.264859724203519, "grad_norm": 0.3114239275455475, "learning_rate": 8.245157192759606e-05, "loss": 0.1081, "step": 2768 }, { "epoch": 5.266761768901569, "grad_norm": 0.31467998027801514, "learning_rate": 8.244522070498571e-05, "loss": 0.103, "step": 2769 }, { "epoch": 5.26866381359962, "grad_norm": 0.32647088170051575, "learning_rate": 8.243886948237535e-05, "loss": 0.0987, "step": 2770 }, { "epoch": 5.27056585829767, "grad_norm": 0.3176961839199066, "learning_rate": 8.243251825976502e-05, "loss": 0.0995, "step": 2771 }, { "epoch": 5.272467902995721, "grad_norm": 0.29948559403419495, "learning_rate": 8.242616703715466e-05, "loss": 0.0886, "step": 2772 }, { "epoch": 5.274369947693771, "grad_norm": 0.2150421142578125, "learning_rate": 8.24198158145443e-05, "loss": 0.0859, "step": 2773 }, { "epoch": 5.2762719923918215, "grad_norm": 0.276875764131546, "learning_rate": 8.241346459193395e-05, "loss": 0.0985, "step": 2774 }, { "epoch": 5.2781740370898715, "grad_norm": 0.2445184886455536, "learning_rate": 8.24071133693236e-05, "loss": 0.1443, "step": 2775 }, { "epoch": 5.280076081787922, "grad_norm": 0.2786940932273865, "learning_rate": 8.240076214671325e-05, "loss": 0.0922, "step": 2776 }, { "epoch": 5.281978126485972, "grad_norm": 0.330771267414093, "learning_rate": 8.239441092410289e-05, "loss": 0.082, "step": 2777 }, { "epoch": 5.283880171184022, "grad_norm": 0.3194374442100525, "learning_rate": 8.238805970149254e-05, "loss": 0.0972, "step": 2778 }, { "epoch": 5.285782215882073, "grad_norm": 0.27137935161590576, "learning_rate": 8.238170847888219e-05, "loss": 0.0731, "step": 2779 }, { "epoch": 5.287684260580123, "grad_norm": 0.3671146035194397, "learning_rate": 8.237535725627183e-05, "loss": 0.0755, "step": 2780 }, { "epoch": 5.289586305278174, "grad_norm": 0.4442805051803589, "learning_rate": 8.236900603366148e-05, "loss": 0.1041, "step": 2781 }, { "epoch": 5.291488349976224, "grad_norm": 0.3038801848888397, "learning_rate": 8.236265481105113e-05, "loss": 0.0921, "step": 2782 }, { "epoch": 5.293390394674275, "grad_norm": 0.31325793266296387, "learning_rate": 8.235630358844077e-05, "loss": 0.0972, "step": 2783 }, { "epoch": 5.295292439372325, "grad_norm": 0.28008419275283813, "learning_rate": 8.234995236583042e-05, "loss": 0.0797, "step": 2784 }, { "epoch": 5.297194484070376, "grad_norm": 0.3274953067302704, "learning_rate": 8.234360114322008e-05, "loss": 0.0863, "step": 2785 }, { "epoch": 5.299096528768426, "grad_norm": 0.40671971440315247, "learning_rate": 8.233724992060971e-05, "loss": 0.1018, "step": 2786 }, { "epoch": 5.300998573466477, "grad_norm": 0.43156853318214417, "learning_rate": 8.233089869799937e-05, "loss": 0.1142, "step": 2787 }, { "epoch": 5.302900618164527, "grad_norm": 0.28940603137016296, "learning_rate": 8.232454747538902e-05, "loss": 0.0647, "step": 2788 }, { "epoch": 5.304802662862578, "grad_norm": 0.4310309886932373, "learning_rate": 8.231819625277867e-05, "loss": 0.1124, "step": 2789 }, { "epoch": 5.306704707560628, "grad_norm": 0.3912397027015686, "learning_rate": 8.231184503016831e-05, "loss": 0.0995, "step": 2790 }, { "epoch": 5.308606752258678, "grad_norm": 0.33847618103027344, "learning_rate": 8.230549380755796e-05, "loss": 0.0926, "step": 2791 }, { "epoch": 5.310508796956729, "grad_norm": 0.30807194113731384, "learning_rate": 8.229914258494761e-05, "loss": 0.0961, "step": 2792 }, { "epoch": 5.312410841654779, "grad_norm": 0.29250118136405945, "learning_rate": 8.229279136233725e-05, "loss": 0.0774, "step": 2793 }, { "epoch": 5.3143128863528295, "grad_norm": 0.34272050857543945, "learning_rate": 8.22864401397269e-05, "loss": 0.0756, "step": 2794 }, { "epoch": 5.3162149310508795, "grad_norm": 0.3028480112552643, "learning_rate": 8.228008891711655e-05, "loss": 0.0936, "step": 2795 }, { "epoch": 5.31811697574893, "grad_norm": 0.35978344082832336, "learning_rate": 8.227373769450619e-05, "loss": 0.0856, "step": 2796 }, { "epoch": 5.32001902044698, "grad_norm": 0.24763593077659607, "learning_rate": 8.226738647189584e-05, "loss": 0.0683, "step": 2797 }, { "epoch": 5.321921065145031, "grad_norm": 0.34561780095100403, "learning_rate": 8.22610352492855e-05, "loss": 0.0887, "step": 2798 }, { "epoch": 5.323823109843081, "grad_norm": 0.34023576974868774, "learning_rate": 8.225468402667515e-05, "loss": 0.1141, "step": 2799 }, { "epoch": 5.325725154541132, "grad_norm": 0.32317525148391724, "learning_rate": 8.224833280406479e-05, "loss": 0.0902, "step": 2800 }, { "epoch": 5.327627199239182, "grad_norm": 0.3947696089744568, "learning_rate": 8.224198158145442e-05, "loss": 0.1152, "step": 2801 }, { "epoch": 5.329529243937232, "grad_norm": 0.2948441803455353, "learning_rate": 8.223563035884409e-05, "loss": 0.1001, "step": 2802 }, { "epoch": 5.331431288635283, "grad_norm": 0.42355984449386597, "learning_rate": 8.222927913623373e-05, "loss": 0.0999, "step": 2803 }, { "epoch": 5.333333333333333, "grad_norm": 0.22205117344856262, "learning_rate": 8.222292791362337e-05, "loss": 0.0788, "step": 2804 }, { "epoch": 5.335235378031384, "grad_norm": 0.28190529346466064, "learning_rate": 8.221657669101303e-05, "loss": 0.0857, "step": 2805 }, { "epoch": 5.337137422729434, "grad_norm": 0.272397518157959, "learning_rate": 8.221022546840267e-05, "loss": 0.0804, "step": 2806 }, { "epoch": 5.339039467427485, "grad_norm": 0.2642618417739868, "learning_rate": 8.220387424579232e-05, "loss": 0.0768, "step": 2807 }, { "epoch": 5.340941512125535, "grad_norm": 0.3142416477203369, "learning_rate": 8.219752302318196e-05, "loss": 0.0985, "step": 2808 }, { "epoch": 5.342843556823586, "grad_norm": 0.3162345588207245, "learning_rate": 8.219117180057161e-05, "loss": 0.0842, "step": 2809 }, { "epoch": 5.344745601521636, "grad_norm": 0.3445020318031311, "learning_rate": 8.218482057796126e-05, "loss": 0.0948, "step": 2810 }, { "epoch": 5.346647646219687, "grad_norm": 0.3281369209289551, "learning_rate": 8.21784693553509e-05, "loss": 0.1059, "step": 2811 }, { "epoch": 5.348549690917737, "grad_norm": 0.36346635222435, "learning_rate": 8.217211813274057e-05, "loss": 0.1098, "step": 2812 }, { "epoch": 5.350451735615787, "grad_norm": 0.29653385281562805, "learning_rate": 8.21657669101302e-05, "loss": 0.0994, "step": 2813 }, { "epoch": 5.3523537803138375, "grad_norm": 0.19076651334762573, "learning_rate": 8.215941568751984e-05, "loss": 0.0636, "step": 2814 }, { "epoch": 5.3542558250118875, "grad_norm": 0.23119299113750458, "learning_rate": 8.21530644649095e-05, "loss": 0.0755, "step": 2815 }, { "epoch": 5.356157869709938, "grad_norm": 0.21026362478733063, "learning_rate": 8.214671324229915e-05, "loss": 0.0714, "step": 2816 }, { "epoch": 5.358059914407988, "grad_norm": 0.19502770900726318, "learning_rate": 8.21403620196888e-05, "loss": 0.0817, "step": 2817 }, { "epoch": 5.359961959106039, "grad_norm": 0.2644445598125458, "learning_rate": 8.213401079707844e-05, "loss": 0.0723, "step": 2818 }, { "epoch": 5.361864003804089, "grad_norm": 0.34537041187286377, "learning_rate": 8.212765957446809e-05, "loss": 0.1039, "step": 2819 }, { "epoch": 5.36376604850214, "grad_norm": 0.28133493661880493, "learning_rate": 8.212130835185774e-05, "loss": 0.0906, "step": 2820 }, { "epoch": 5.36566809320019, "grad_norm": 0.2598632872104645, "learning_rate": 8.211495712924738e-05, "loss": 0.0848, "step": 2821 }, { "epoch": 5.367570137898241, "grad_norm": 0.2941516041755676, "learning_rate": 8.210860590663703e-05, "loss": 0.0951, "step": 2822 }, { "epoch": 5.369472182596291, "grad_norm": 0.617328941822052, "learning_rate": 8.210225468402668e-05, "loss": 0.1256, "step": 2823 }, { "epoch": 5.371374227294341, "grad_norm": 0.3117159903049469, "learning_rate": 8.209590346141632e-05, "loss": 0.0971, "step": 2824 }, { "epoch": 5.373276271992392, "grad_norm": 0.28506991267204285, "learning_rate": 8.208955223880597e-05, "loss": 0.0706, "step": 2825 }, { "epoch": 5.375178316690442, "grad_norm": 0.3072415888309479, "learning_rate": 8.208320101619563e-05, "loss": 0.0939, "step": 2826 }, { "epoch": 5.377080361388493, "grad_norm": 0.3019329309463501, "learning_rate": 8.207684979358526e-05, "loss": 0.117, "step": 2827 }, { "epoch": 5.378982406086543, "grad_norm": 0.3263597786426544, "learning_rate": 8.207049857097492e-05, "loss": 0.1013, "step": 2828 }, { "epoch": 5.380884450784594, "grad_norm": 0.2426205277442932, "learning_rate": 8.206414734836457e-05, "loss": 0.09, "step": 2829 }, { "epoch": 5.382786495482644, "grad_norm": 0.3131352663040161, "learning_rate": 8.205779612575422e-05, "loss": 0.0831, "step": 2830 }, { "epoch": 5.3846885401806945, "grad_norm": 0.2705948054790497, "learning_rate": 8.205144490314386e-05, "loss": 0.0825, "step": 2831 }, { "epoch": 5.3865905848787445, "grad_norm": 0.24609902501106262, "learning_rate": 8.20450936805335e-05, "loss": 0.0941, "step": 2832 }, { "epoch": 5.388492629576795, "grad_norm": 0.2822064757347107, "learning_rate": 8.203874245792316e-05, "loss": 0.0868, "step": 2833 }, { "epoch": 5.390394674274845, "grad_norm": 0.3406517207622528, "learning_rate": 8.20323912353128e-05, "loss": 0.083, "step": 2834 }, { "epoch": 5.392296718972895, "grad_norm": 0.38472169637680054, "learning_rate": 8.202604001270245e-05, "loss": 0.101, "step": 2835 }, { "epoch": 5.394198763670946, "grad_norm": 0.39235028624534607, "learning_rate": 8.20196887900921e-05, "loss": 0.1109, "step": 2836 }, { "epoch": 5.396100808368996, "grad_norm": 0.2799689769744873, "learning_rate": 8.201333756748174e-05, "loss": 0.087, "step": 2837 }, { "epoch": 5.398002853067047, "grad_norm": 0.32398489117622375, "learning_rate": 8.20069863448714e-05, "loss": 0.0994, "step": 2838 }, { "epoch": 5.399904897765097, "grad_norm": 0.29752808809280396, "learning_rate": 8.200063512226103e-05, "loss": 0.093, "step": 2839 }, { "epoch": 5.401806942463148, "grad_norm": 0.2725158631801605, "learning_rate": 8.199428389965068e-05, "loss": 0.1066, "step": 2840 }, { "epoch": 5.403708987161198, "grad_norm": 0.2493102252483368, "learning_rate": 8.198793267704034e-05, "loss": 0.0659, "step": 2841 }, { "epoch": 5.405611031859249, "grad_norm": 0.3313886225223541, "learning_rate": 8.198158145442997e-05, "loss": 0.093, "step": 2842 }, { "epoch": 5.407513076557299, "grad_norm": 0.3171900808811188, "learning_rate": 8.197523023181964e-05, "loss": 0.1016, "step": 2843 }, { "epoch": 5.40941512125535, "grad_norm": 0.2522033452987671, "learning_rate": 8.196887900920928e-05, "loss": 0.0713, "step": 2844 }, { "epoch": 5.4113171659534, "grad_norm": 0.26137956976890564, "learning_rate": 8.196252778659892e-05, "loss": 0.0756, "step": 2845 }, { "epoch": 5.41321921065145, "grad_norm": 0.30233171582221985, "learning_rate": 8.195617656398857e-05, "loss": 0.0849, "step": 2846 }, { "epoch": 5.415121255349501, "grad_norm": 0.3242833614349365, "learning_rate": 8.194982534137822e-05, "loss": 0.1057, "step": 2847 }, { "epoch": 5.417023300047551, "grad_norm": 0.28022679686546326, "learning_rate": 8.194347411876787e-05, "loss": 0.0612, "step": 2848 }, { "epoch": 5.418925344745602, "grad_norm": 0.31969156861305237, "learning_rate": 8.193712289615751e-05, "loss": 0.0869, "step": 2849 }, { "epoch": 5.420827389443652, "grad_norm": 0.3627922832965851, "learning_rate": 8.193077167354716e-05, "loss": 0.0886, "step": 2850 }, { "epoch": 5.4227294341417025, "grad_norm": 0.4317481219768524, "learning_rate": 8.192442045093681e-05, "loss": 0.1014, "step": 2851 }, { "epoch": 5.4246314788397525, "grad_norm": 0.3192225992679596, "learning_rate": 8.191806922832645e-05, "loss": 0.084, "step": 2852 }, { "epoch": 5.426533523537803, "grad_norm": 0.28119418025016785, "learning_rate": 8.19117180057161e-05, "loss": 0.0849, "step": 2853 }, { "epoch": 5.428435568235853, "grad_norm": 0.25419628620147705, "learning_rate": 8.190536678310576e-05, "loss": 0.0849, "step": 2854 }, { "epoch": 5.430337612933904, "grad_norm": 0.3957049250602722, "learning_rate": 8.18990155604954e-05, "loss": 0.1057, "step": 2855 }, { "epoch": 5.432239657631954, "grad_norm": 0.31903624534606934, "learning_rate": 8.189266433788505e-05, "loss": 0.0958, "step": 2856 }, { "epoch": 5.434141702330005, "grad_norm": 0.2692996561527252, "learning_rate": 8.18863131152747e-05, "loss": 0.0982, "step": 2857 }, { "epoch": 5.436043747028055, "grad_norm": 0.3083006739616394, "learning_rate": 8.187996189266434e-05, "loss": 0.1022, "step": 2858 }, { "epoch": 5.437945791726105, "grad_norm": 0.4186379909515381, "learning_rate": 8.187361067005399e-05, "loss": 0.1278, "step": 2859 }, { "epoch": 5.439847836424156, "grad_norm": 0.3124418258666992, "learning_rate": 8.186725944744364e-05, "loss": 0.0892, "step": 2860 }, { "epoch": 5.441749881122206, "grad_norm": 0.2959184944629669, "learning_rate": 8.186090822483329e-05, "loss": 0.0897, "step": 2861 }, { "epoch": 5.443651925820257, "grad_norm": 0.3044806718826294, "learning_rate": 8.185455700222293e-05, "loss": 0.0902, "step": 2862 }, { "epoch": 5.445553970518307, "grad_norm": 0.3201037347316742, "learning_rate": 8.184820577961258e-05, "loss": 0.1028, "step": 2863 }, { "epoch": 5.447456015216358, "grad_norm": 0.226481631398201, "learning_rate": 8.184185455700223e-05, "loss": 0.0701, "step": 2864 }, { "epoch": 5.449358059914408, "grad_norm": 0.29510584473609924, "learning_rate": 8.183550333439187e-05, "loss": 0.0954, "step": 2865 }, { "epoch": 5.451260104612459, "grad_norm": 0.3167766332626343, "learning_rate": 8.182915211178152e-05, "loss": 0.0907, "step": 2866 }, { "epoch": 5.453162149310509, "grad_norm": 0.3047981858253479, "learning_rate": 8.182280088917117e-05, "loss": 0.0978, "step": 2867 }, { "epoch": 5.45506419400856, "grad_norm": 0.3672785460948944, "learning_rate": 8.181644966656081e-05, "loss": 0.0992, "step": 2868 }, { "epoch": 5.45696623870661, "grad_norm": 0.3375677466392517, "learning_rate": 8.181009844395046e-05, "loss": 0.0933, "step": 2869 }, { "epoch": 5.4588682834046605, "grad_norm": 0.3718927204608917, "learning_rate": 8.180374722134012e-05, "loss": 0.0983, "step": 2870 }, { "epoch": 5.4607703281027105, "grad_norm": 0.2880299687385559, "learning_rate": 8.179739599872977e-05, "loss": 0.0937, "step": 2871 }, { "epoch": 5.4626723728007605, "grad_norm": 0.5487256646156311, "learning_rate": 8.179104477611941e-05, "loss": 0.0926, "step": 2872 }, { "epoch": 5.464574417498811, "grad_norm": 0.29540562629699707, "learning_rate": 8.178469355350905e-05, "loss": 0.0827, "step": 2873 }, { "epoch": 5.466476462196861, "grad_norm": 0.3226901590824127, "learning_rate": 8.177834233089871e-05, "loss": 0.0718, "step": 2874 }, { "epoch": 5.468378506894912, "grad_norm": 0.311902791261673, "learning_rate": 8.177199110828835e-05, "loss": 0.0761, "step": 2875 }, { "epoch": 5.470280551592962, "grad_norm": 0.39249876141548157, "learning_rate": 8.176563988567799e-05, "loss": 0.1199, "step": 2876 }, { "epoch": 5.472182596291013, "grad_norm": 0.32737264037132263, "learning_rate": 8.175928866306764e-05, "loss": 0.0756, "step": 2877 }, { "epoch": 5.474084640989063, "grad_norm": 0.4456403851509094, "learning_rate": 8.175293744045729e-05, "loss": 0.0903, "step": 2878 }, { "epoch": 5.475986685687114, "grad_norm": 0.2985632121562958, "learning_rate": 8.174658621784694e-05, "loss": 0.0854, "step": 2879 }, { "epoch": 5.477888730385164, "grad_norm": 0.34640341997146606, "learning_rate": 8.174023499523658e-05, "loss": 0.1007, "step": 2880 }, { "epoch": 5.479790775083215, "grad_norm": 0.3404572010040283, "learning_rate": 8.173388377262623e-05, "loss": 0.126, "step": 2881 }, { "epoch": 5.481692819781265, "grad_norm": 0.3574541509151459, "learning_rate": 8.172753255001588e-05, "loss": 0.102, "step": 2882 }, { "epoch": 5.483594864479315, "grad_norm": 0.23681578040122986, "learning_rate": 8.172118132740552e-05, "loss": 0.0768, "step": 2883 }, { "epoch": 5.485496909177366, "grad_norm": 0.30854761600494385, "learning_rate": 8.171483010479517e-05, "loss": 0.0777, "step": 2884 }, { "epoch": 5.487398953875416, "grad_norm": 0.35504138469696045, "learning_rate": 8.170847888218483e-05, "loss": 0.1114, "step": 2885 }, { "epoch": 5.489300998573467, "grad_norm": 0.26217421889305115, "learning_rate": 8.170212765957446e-05, "loss": 0.0982, "step": 2886 }, { "epoch": 5.491203043271517, "grad_norm": 0.3383858799934387, "learning_rate": 8.169577643696412e-05, "loss": 0.1318, "step": 2887 }, { "epoch": 5.4931050879695675, "grad_norm": 0.33602502942085266, "learning_rate": 8.168942521435377e-05, "loss": 0.1095, "step": 2888 }, { "epoch": 5.4950071326676175, "grad_norm": 0.3534327745437622, "learning_rate": 8.168307399174342e-05, "loss": 0.0891, "step": 2889 }, { "epoch": 5.496909177365668, "grad_norm": 0.28556129336357117, "learning_rate": 8.167672276913306e-05, "loss": 0.0984, "step": 2890 }, { "epoch": 5.498811222063718, "grad_norm": 0.32809069752693176, "learning_rate": 8.167037154652271e-05, "loss": 0.1075, "step": 2891 }, { "epoch": 5.500713266761769, "grad_norm": 0.608711838722229, "learning_rate": 8.166402032391236e-05, "loss": 0.0796, "step": 2892 }, { "epoch": 5.502615311459819, "grad_norm": 0.20012949407100677, "learning_rate": 8.1657669101302e-05, "loss": 0.0573, "step": 2893 }, { "epoch": 5.504517356157869, "grad_norm": 0.2836569547653198, "learning_rate": 8.165131787869165e-05, "loss": 0.1029, "step": 2894 }, { "epoch": 5.50641940085592, "grad_norm": 0.28194811940193176, "learning_rate": 8.16449666560813e-05, "loss": 0.0995, "step": 2895 }, { "epoch": 5.50832144555397, "grad_norm": 0.28990745544433594, "learning_rate": 8.163861543347094e-05, "loss": 0.1038, "step": 2896 }, { "epoch": 5.510223490252021, "grad_norm": 0.3534923791885376, "learning_rate": 8.16322642108606e-05, "loss": 0.0933, "step": 2897 }, { "epoch": 5.512125534950071, "grad_norm": 0.4764708876609802, "learning_rate": 8.162591298825025e-05, "loss": 0.1351, "step": 2898 }, { "epoch": 5.514027579648122, "grad_norm": 0.2705288827419281, "learning_rate": 8.161956176563988e-05, "loss": 0.1027, "step": 2899 }, { "epoch": 5.515929624346172, "grad_norm": 0.36976736783981323, "learning_rate": 8.161321054302954e-05, "loss": 0.0959, "step": 2900 }, { "epoch": 5.517831669044223, "grad_norm": 0.3459687829017639, "learning_rate": 8.160685932041919e-05, "loss": 0.1124, "step": 2901 }, { "epoch": 5.519733713742273, "grad_norm": 0.4014488458633423, "learning_rate": 8.160050809780884e-05, "loss": 0.1079, "step": 2902 }, { "epoch": 5.521635758440324, "grad_norm": 0.39544427394866943, "learning_rate": 8.159415687519848e-05, "loss": 0.1004, "step": 2903 }, { "epoch": 5.523537803138374, "grad_norm": 0.30223849415779114, "learning_rate": 8.158780565258812e-05, "loss": 0.0859, "step": 2904 }, { "epoch": 5.525439847836424, "grad_norm": 0.2351481020450592, "learning_rate": 8.158145442997778e-05, "loss": 0.0716, "step": 2905 }, { "epoch": 5.527341892534475, "grad_norm": 0.40507906675338745, "learning_rate": 8.157510320736742e-05, "loss": 0.1118, "step": 2906 }, { "epoch": 5.529243937232525, "grad_norm": 0.2664635181427002, "learning_rate": 8.156875198475707e-05, "loss": 0.0956, "step": 2907 }, { "epoch": 5.5311459819305755, "grad_norm": 0.296220988035202, "learning_rate": 8.156240076214672e-05, "loss": 0.1011, "step": 2908 }, { "epoch": 5.5330480266286255, "grad_norm": 0.34762975573539734, "learning_rate": 8.155604953953636e-05, "loss": 0.0936, "step": 2909 }, { "epoch": 5.534950071326676, "grad_norm": 0.42042768001556396, "learning_rate": 8.154969831692601e-05, "loss": 0.1104, "step": 2910 }, { "epoch": 5.536852116024726, "grad_norm": 0.33530357480049133, "learning_rate": 8.154334709431565e-05, "loss": 0.1481, "step": 2911 }, { "epoch": 5.538754160722777, "grad_norm": 0.37931686639785767, "learning_rate": 8.15369958717053e-05, "loss": 0.114, "step": 2912 }, { "epoch": 5.540656205420827, "grad_norm": 0.3276258707046509, "learning_rate": 8.153064464909496e-05, "loss": 0.1009, "step": 2913 }, { "epoch": 5.542558250118878, "grad_norm": 0.29436194896698, "learning_rate": 8.15242934264846e-05, "loss": 0.0914, "step": 2914 }, { "epoch": 5.544460294816928, "grad_norm": 0.32761478424072266, "learning_rate": 8.151794220387426e-05, "loss": 0.1162, "step": 2915 }, { "epoch": 5.546362339514978, "grad_norm": 0.6013909578323364, "learning_rate": 8.15115909812639e-05, "loss": 0.1271, "step": 2916 }, { "epoch": 5.548264384213029, "grad_norm": 0.2207658290863037, "learning_rate": 8.150523975865354e-05, "loss": 0.0861, "step": 2917 }, { "epoch": 5.550166428911079, "grad_norm": 0.25360438227653503, "learning_rate": 8.149888853604319e-05, "loss": 0.055, "step": 2918 }, { "epoch": 5.55206847360913, "grad_norm": 0.2537856101989746, "learning_rate": 8.149253731343284e-05, "loss": 0.0749, "step": 2919 }, { "epoch": 5.55397051830718, "grad_norm": 0.29756224155426025, "learning_rate": 8.148618609082249e-05, "loss": 0.082, "step": 2920 }, { "epoch": 5.555872563005231, "grad_norm": 0.41644203662872314, "learning_rate": 8.147983486821213e-05, "loss": 0.1885, "step": 2921 }, { "epoch": 5.557774607703281, "grad_norm": 0.32786139845848083, "learning_rate": 8.147348364560178e-05, "loss": 0.0822, "step": 2922 }, { "epoch": 5.559676652401332, "grad_norm": 0.30312976241111755, "learning_rate": 8.146713242299143e-05, "loss": 0.0863, "step": 2923 }, { "epoch": 5.561578697099382, "grad_norm": 0.34702423214912415, "learning_rate": 8.146078120038107e-05, "loss": 0.1012, "step": 2924 }, { "epoch": 5.563480741797433, "grad_norm": 0.24299529194831848, "learning_rate": 8.145442997777072e-05, "loss": 0.0739, "step": 2925 }, { "epoch": 5.565382786495483, "grad_norm": 0.30519744753837585, "learning_rate": 8.144807875516038e-05, "loss": 0.0953, "step": 2926 }, { "epoch": 5.567284831193533, "grad_norm": 0.32798048853874207, "learning_rate": 8.144172753255001e-05, "loss": 0.1077, "step": 2927 }, { "epoch": 5.5691868758915835, "grad_norm": 0.38108792901039124, "learning_rate": 8.143537630993967e-05, "loss": 0.1246, "step": 2928 }, { "epoch": 5.571088920589634, "grad_norm": 0.480277955532074, "learning_rate": 8.142902508732932e-05, "loss": 0.0909, "step": 2929 }, { "epoch": 5.572990965287684, "grad_norm": 0.3235543668270111, "learning_rate": 8.142267386471896e-05, "loss": 0.0925, "step": 2930 }, { "epoch": 5.574893009985734, "grad_norm": 0.34970083832740784, "learning_rate": 8.141632264210861e-05, "loss": 0.0908, "step": 2931 }, { "epoch": 5.576795054683785, "grad_norm": 0.2239646017551422, "learning_rate": 8.140997141949826e-05, "loss": 0.0723, "step": 2932 }, { "epoch": 5.578697099381835, "grad_norm": 0.33661478757858276, "learning_rate": 8.140362019688791e-05, "loss": 0.0922, "step": 2933 }, { "epoch": 5.580599144079886, "grad_norm": 0.4126195013523102, "learning_rate": 8.139726897427755e-05, "loss": 0.0943, "step": 2934 }, { "epoch": 5.582501188777936, "grad_norm": 0.3538368046283722, "learning_rate": 8.139091775166719e-05, "loss": 0.0955, "step": 2935 }, { "epoch": 5.584403233475987, "grad_norm": 0.3369925320148468, "learning_rate": 8.138456652905685e-05, "loss": 0.1166, "step": 2936 }, { "epoch": 5.586305278174037, "grad_norm": 0.27559757232666016, "learning_rate": 8.137821530644649e-05, "loss": 0.0808, "step": 2937 }, { "epoch": 5.588207322872088, "grad_norm": 0.30875301361083984, "learning_rate": 8.137186408383614e-05, "loss": 0.1024, "step": 2938 }, { "epoch": 5.590109367570138, "grad_norm": 0.2719765305519104, "learning_rate": 8.13655128612258e-05, "loss": 0.0897, "step": 2939 }, { "epoch": 5.592011412268189, "grad_norm": 0.3717488646507263, "learning_rate": 8.135916163861543e-05, "loss": 0.0966, "step": 2940 }, { "epoch": 5.593913456966239, "grad_norm": 0.2868727743625641, "learning_rate": 8.135281041600509e-05, "loss": 0.0802, "step": 2941 }, { "epoch": 5.595815501664289, "grad_norm": 0.28469622135162354, "learning_rate": 8.134645919339472e-05, "loss": 0.0989, "step": 2942 }, { "epoch": 5.59771754636234, "grad_norm": 0.34950777888298035, "learning_rate": 8.134010797078439e-05, "loss": 0.0939, "step": 2943 }, { "epoch": 5.59961959106039, "grad_norm": 0.23884734511375427, "learning_rate": 8.133375674817403e-05, "loss": 0.0845, "step": 2944 }, { "epoch": 5.6015216357584405, "grad_norm": 0.34531524777412415, "learning_rate": 8.132740552556367e-05, "loss": 0.0978, "step": 2945 }, { "epoch": 5.6034236804564905, "grad_norm": 0.26655009388923645, "learning_rate": 8.132105430295333e-05, "loss": 0.0723, "step": 2946 }, { "epoch": 5.605325725154541, "grad_norm": 0.23067669570446014, "learning_rate": 8.131470308034297e-05, "loss": 0.078, "step": 2947 }, { "epoch": 5.607227769852591, "grad_norm": 0.4221946597099304, "learning_rate": 8.130835185773261e-05, "loss": 0.1085, "step": 2948 }, { "epoch": 5.609129814550642, "grad_norm": 0.3162672519683838, "learning_rate": 8.130200063512226e-05, "loss": 0.0893, "step": 2949 }, { "epoch": 5.611031859248692, "grad_norm": 0.32246506214141846, "learning_rate": 8.129564941251191e-05, "loss": 0.0836, "step": 2950 }, { "epoch": 5.612933903946743, "grad_norm": 0.31152433156967163, "learning_rate": 8.128929818990156e-05, "loss": 0.0806, "step": 2951 }, { "epoch": 5.614835948644793, "grad_norm": 0.3357599377632141, "learning_rate": 8.12829469672912e-05, "loss": 0.0958, "step": 2952 }, { "epoch": 5.616737993342843, "grad_norm": 0.3181150555610657, "learning_rate": 8.127659574468085e-05, "loss": 0.0855, "step": 2953 }, { "epoch": 5.618640038040894, "grad_norm": 0.3397297263145447, "learning_rate": 8.12702445220705e-05, "loss": 0.1022, "step": 2954 }, { "epoch": 5.620542082738944, "grad_norm": 0.31479981541633606, "learning_rate": 8.126389329946014e-05, "loss": 0.0774, "step": 2955 }, { "epoch": 5.622444127436995, "grad_norm": 0.26667311787605286, "learning_rate": 8.12575420768498e-05, "loss": 0.0756, "step": 2956 }, { "epoch": 5.624346172135045, "grad_norm": 0.2729688882827759, "learning_rate": 8.125119085423945e-05, "loss": 0.0721, "step": 2957 }, { "epoch": 5.626248216833096, "grad_norm": 0.24858340620994568, "learning_rate": 8.124483963162909e-05, "loss": 0.0711, "step": 2958 }, { "epoch": 5.628150261531146, "grad_norm": 0.3526616096496582, "learning_rate": 8.123848840901874e-05, "loss": 0.084, "step": 2959 }, { "epoch": 5.630052306229197, "grad_norm": 0.2841814458370209, "learning_rate": 8.123213718640839e-05, "loss": 0.0999, "step": 2960 }, { "epoch": 5.631954350927247, "grad_norm": 0.2419266402721405, "learning_rate": 8.122578596379804e-05, "loss": 0.0662, "step": 2961 }, { "epoch": 5.633856395625298, "grad_norm": 0.34861576557159424, "learning_rate": 8.121943474118768e-05, "loss": 0.0804, "step": 2962 }, { "epoch": 5.635758440323348, "grad_norm": 0.42378073930740356, "learning_rate": 8.121308351857733e-05, "loss": 0.1006, "step": 2963 }, { "epoch": 5.637660485021398, "grad_norm": 0.41002216935157776, "learning_rate": 8.120673229596698e-05, "loss": 0.101, "step": 2964 }, { "epoch": 5.6395625297194485, "grad_norm": 0.2810782194137573, "learning_rate": 8.120038107335662e-05, "loss": 0.0702, "step": 2965 }, { "epoch": 5.6414645744174985, "grad_norm": 0.5979880094528198, "learning_rate": 8.119402985074627e-05, "loss": 0.149, "step": 2966 }, { "epoch": 5.643366619115549, "grad_norm": 0.26545101404190063, "learning_rate": 8.118767862813593e-05, "loss": 0.0998, "step": 2967 }, { "epoch": 5.645268663813599, "grad_norm": 0.37219372391700745, "learning_rate": 8.118132740552556e-05, "loss": 0.0984, "step": 2968 }, { "epoch": 5.64717070851165, "grad_norm": 0.3255815804004669, "learning_rate": 8.117497618291522e-05, "loss": 0.0931, "step": 2969 }, { "epoch": 5.6490727532097, "grad_norm": 0.4053998589515686, "learning_rate": 8.116862496030487e-05, "loss": 0.1134, "step": 2970 }, { "epoch": 5.650974797907751, "grad_norm": 0.3078075647354126, "learning_rate": 8.11622737376945e-05, "loss": 0.0908, "step": 2971 }, { "epoch": 5.652876842605801, "grad_norm": 0.28989338874816895, "learning_rate": 8.115592251508416e-05, "loss": 0.1144, "step": 2972 }, { "epoch": 5.654778887303852, "grad_norm": 0.3299599289894104, "learning_rate": 8.114957129247381e-05, "loss": 0.1064, "step": 2973 }, { "epoch": 5.656680932001902, "grad_norm": 0.4091484844684601, "learning_rate": 8.114322006986346e-05, "loss": 0.1126, "step": 2974 }, { "epoch": 5.658582976699952, "grad_norm": 0.264202356338501, "learning_rate": 8.11368688472531e-05, "loss": 0.0796, "step": 2975 }, { "epoch": 5.660485021398003, "grad_norm": 0.32306942343711853, "learning_rate": 8.113051762464274e-05, "loss": 0.0955, "step": 2976 }, { "epoch": 5.662387066096053, "grad_norm": 0.3152141273021698, "learning_rate": 8.11241664020324e-05, "loss": 0.1098, "step": 2977 }, { "epoch": 5.664289110794104, "grad_norm": 0.35286226868629456, "learning_rate": 8.111781517942204e-05, "loss": 0.1081, "step": 2978 }, { "epoch": 5.666191155492154, "grad_norm": 0.35619062185287476, "learning_rate": 8.111146395681169e-05, "loss": 0.11, "step": 2979 }, { "epoch": 5.668093200190205, "grad_norm": 0.4397338330745697, "learning_rate": 8.110511273420135e-05, "loss": 0.0886, "step": 2980 }, { "epoch": 5.669995244888255, "grad_norm": 0.23792682588100433, "learning_rate": 8.109876151159098e-05, "loss": 0.0834, "step": 2981 }, { "epoch": 5.671897289586306, "grad_norm": 0.30805590748786926, "learning_rate": 8.109241028898064e-05, "loss": 0.0816, "step": 2982 }, { "epoch": 5.673799334284356, "grad_norm": 0.3652699589729309, "learning_rate": 8.108605906637027e-05, "loss": 0.0995, "step": 2983 }, { "epoch": 5.6757013789824065, "grad_norm": 0.2952606976032257, "learning_rate": 8.107970784375993e-05, "loss": 0.102, "step": 2984 }, { "epoch": 5.6776034236804565, "grad_norm": 0.3017944395542145, "learning_rate": 8.107335662114958e-05, "loss": 0.0939, "step": 2985 }, { "epoch": 5.6795054683785064, "grad_norm": 0.3887818157672882, "learning_rate": 8.106700539853922e-05, "loss": 0.1168, "step": 2986 }, { "epoch": 5.681407513076557, "grad_norm": 0.3510635793209076, "learning_rate": 8.106065417592887e-05, "loss": 0.0877, "step": 2987 }, { "epoch": 5.683309557774607, "grad_norm": 0.4994543790817261, "learning_rate": 8.105430295331852e-05, "loss": 0.1296, "step": 2988 }, { "epoch": 5.685211602472658, "grad_norm": 0.43380653858184814, "learning_rate": 8.104795173070816e-05, "loss": 0.1293, "step": 2989 }, { "epoch": 5.687113647170708, "grad_norm": 0.33307018876075745, "learning_rate": 8.104160050809781e-05, "loss": 0.1161, "step": 2990 }, { "epoch": 5.689015691868759, "grad_norm": 0.2537522315979004, "learning_rate": 8.103524928548746e-05, "loss": 0.0864, "step": 2991 }, { "epoch": 5.690917736566809, "grad_norm": 0.269766628742218, "learning_rate": 8.102889806287711e-05, "loss": 0.0895, "step": 2992 }, { "epoch": 5.69281978126486, "grad_norm": 0.4431968331336975, "learning_rate": 8.102254684026675e-05, "loss": 0.1144, "step": 2993 }, { "epoch": 5.69472182596291, "grad_norm": 0.3709297180175781, "learning_rate": 8.10161956176564e-05, "loss": 0.1371, "step": 2994 }, { "epoch": 5.696623870660961, "grad_norm": 0.23933501541614532, "learning_rate": 8.100984439504605e-05, "loss": 0.0765, "step": 2995 }, { "epoch": 5.698525915359011, "grad_norm": 0.2999648451805115, "learning_rate": 8.100349317243569e-05, "loss": 0.0925, "step": 2996 }, { "epoch": 5.700427960057061, "grad_norm": 0.3433458209037781, "learning_rate": 8.099714194982535e-05, "loss": 0.1018, "step": 2997 }, { "epoch": 5.702330004755112, "grad_norm": 0.30743271112442017, "learning_rate": 8.0990790727215e-05, "loss": 0.0818, "step": 2998 }, { "epoch": 5.704232049453163, "grad_norm": 0.28858688473701477, "learning_rate": 8.098443950460464e-05, "loss": 0.089, "step": 2999 }, { "epoch": 5.706134094151213, "grad_norm": 0.3105423152446747, "learning_rate": 8.097808828199429e-05, "loss": 0.0859, "step": 3000 }, { "epoch": 5.708036138849263, "grad_norm": 0.348749041557312, "learning_rate": 8.097173705938394e-05, "loss": 0.1047, "step": 3001 }, { "epoch": 5.7099381835473135, "grad_norm": 0.2492302805185318, "learning_rate": 8.096538583677358e-05, "loss": 0.0874, "step": 3002 }, { "epoch": 5.7118402282453635, "grad_norm": 0.351367324590683, "learning_rate": 8.095903461416323e-05, "loss": 0.104, "step": 3003 }, { "epoch": 5.713742272943414, "grad_norm": 0.31233423948287964, "learning_rate": 8.095268339155288e-05, "loss": 0.0812, "step": 3004 }, { "epoch": 5.715644317641464, "grad_norm": 0.3879316747188568, "learning_rate": 8.094633216894253e-05, "loss": 0.1078, "step": 3005 }, { "epoch": 5.717546362339515, "grad_norm": 0.3155204653739929, "learning_rate": 8.093998094633217e-05, "loss": 0.0831, "step": 3006 }, { "epoch": 5.719448407037565, "grad_norm": 0.37463921308517456, "learning_rate": 8.093362972372181e-05, "loss": 0.1045, "step": 3007 }, { "epoch": 5.721350451735615, "grad_norm": 0.37191277742385864, "learning_rate": 8.092727850111147e-05, "loss": 0.1002, "step": 3008 }, { "epoch": 5.723252496433666, "grad_norm": 0.3215421736240387, "learning_rate": 8.092092727850111e-05, "loss": 0.1033, "step": 3009 }, { "epoch": 5.725154541131717, "grad_norm": 0.290348619222641, "learning_rate": 8.091457605589076e-05, "loss": 0.0767, "step": 3010 }, { "epoch": 5.727056585829767, "grad_norm": 0.2508927285671234, "learning_rate": 8.090822483328042e-05, "loss": 0.0676, "step": 3011 }, { "epoch": 5.728958630527817, "grad_norm": 0.7272295355796814, "learning_rate": 8.090187361067005e-05, "loss": 0.1755, "step": 3012 }, { "epoch": 5.730860675225868, "grad_norm": 0.3547666072845459, "learning_rate": 8.08955223880597e-05, "loss": 0.1002, "step": 3013 }, { "epoch": 5.732762719923918, "grad_norm": 0.26749446988105774, "learning_rate": 8.088917116544935e-05, "loss": 0.0795, "step": 3014 }, { "epoch": 5.734664764621969, "grad_norm": 0.2167988121509552, "learning_rate": 8.088281994283901e-05, "loss": 0.0704, "step": 3015 }, { "epoch": 5.736566809320019, "grad_norm": 0.23982734978199005, "learning_rate": 8.087646872022865e-05, "loss": 0.0724, "step": 3016 }, { "epoch": 5.73846885401807, "grad_norm": 0.3214990794658661, "learning_rate": 8.087011749761829e-05, "loss": 0.0884, "step": 3017 }, { "epoch": 5.74037089871612, "grad_norm": 0.37602269649505615, "learning_rate": 8.086376627500795e-05, "loss": 0.0956, "step": 3018 }, { "epoch": 5.742272943414171, "grad_norm": 0.28181731700897217, "learning_rate": 8.085741505239759e-05, "loss": 0.0787, "step": 3019 }, { "epoch": 5.744174988112221, "grad_norm": 0.24445931613445282, "learning_rate": 8.085106382978723e-05, "loss": 0.0804, "step": 3020 }, { "epoch": 5.7460770328102715, "grad_norm": 0.3743366003036499, "learning_rate": 8.084471260717688e-05, "loss": 0.1172, "step": 3021 }, { "epoch": 5.7479790775083215, "grad_norm": 0.2813834547996521, "learning_rate": 8.083836138456653e-05, "loss": 0.0849, "step": 3022 }, { "epoch": 5.7498811222063715, "grad_norm": 0.35081392526626587, "learning_rate": 8.083201016195618e-05, "loss": 0.1163, "step": 3023 }, { "epoch": 5.751783166904422, "grad_norm": 0.4236094653606415, "learning_rate": 8.082565893934582e-05, "loss": 0.0978, "step": 3024 }, { "epoch": 5.753685211602472, "grad_norm": 0.38664132356643677, "learning_rate": 8.081930771673547e-05, "loss": 0.1077, "step": 3025 }, { "epoch": 5.755587256300523, "grad_norm": 0.3874824047088623, "learning_rate": 8.081295649412513e-05, "loss": 0.0991, "step": 3026 }, { "epoch": 5.757489300998573, "grad_norm": 0.32273393869400024, "learning_rate": 8.080660527151476e-05, "loss": 0.0859, "step": 3027 }, { "epoch": 5.759391345696624, "grad_norm": 0.39935457706451416, "learning_rate": 8.080025404890442e-05, "loss": 0.108, "step": 3028 }, { "epoch": 5.761293390394674, "grad_norm": 0.24157053232192993, "learning_rate": 8.079390282629407e-05, "loss": 0.0774, "step": 3029 }, { "epoch": 5.763195435092725, "grad_norm": 0.38274866342544556, "learning_rate": 8.07875516036837e-05, "loss": 0.1171, "step": 3030 }, { "epoch": 5.765097479790775, "grad_norm": 0.34954944252967834, "learning_rate": 8.078120038107336e-05, "loss": 0.0714, "step": 3031 }, { "epoch": 5.766999524488826, "grad_norm": 0.35465356707572937, "learning_rate": 8.077484915846301e-05, "loss": 0.0857, "step": 3032 }, { "epoch": 5.768901569186876, "grad_norm": 0.32265377044677734, "learning_rate": 8.076849793585266e-05, "loss": 0.0933, "step": 3033 }, { "epoch": 5.770803613884926, "grad_norm": 0.3461415469646454, "learning_rate": 8.07621467132423e-05, "loss": 0.0872, "step": 3034 }, { "epoch": 5.772705658582977, "grad_norm": 0.358465313911438, "learning_rate": 8.075579549063195e-05, "loss": 0.1205, "step": 3035 }, { "epoch": 5.774607703281027, "grad_norm": 0.3661046326160431, "learning_rate": 8.07494442680216e-05, "loss": 0.1167, "step": 3036 }, { "epoch": 5.776509747979078, "grad_norm": 0.31511178612709045, "learning_rate": 8.074309304541124e-05, "loss": 0.1017, "step": 3037 }, { "epoch": 5.778411792677128, "grad_norm": 0.27341797947883606, "learning_rate": 8.07367418228009e-05, "loss": 0.0708, "step": 3038 }, { "epoch": 5.780313837375179, "grad_norm": 0.3324090242385864, "learning_rate": 8.073039060019055e-05, "loss": 0.0835, "step": 3039 }, { "epoch": 5.782215882073229, "grad_norm": 0.2953234314918518, "learning_rate": 8.072403937758018e-05, "loss": 0.0991, "step": 3040 }, { "epoch": 5.7841179267712794, "grad_norm": 0.324093759059906, "learning_rate": 8.071768815496984e-05, "loss": 0.0999, "step": 3041 }, { "epoch": 5.7860199714693294, "grad_norm": 0.4137260317802429, "learning_rate": 8.071133693235949e-05, "loss": 0.1338, "step": 3042 }, { "epoch": 5.78792201616738, "grad_norm": 0.247357577085495, "learning_rate": 8.070498570974913e-05, "loss": 0.0792, "step": 3043 }, { "epoch": 5.78982406086543, "grad_norm": 0.40995171666145325, "learning_rate": 8.069863448713878e-05, "loss": 0.136, "step": 3044 }, { "epoch": 5.79172610556348, "grad_norm": 0.34627994894981384, "learning_rate": 8.069228326452842e-05, "loss": 0.0996, "step": 3045 }, { "epoch": 5.793628150261531, "grad_norm": 0.25772425532341003, "learning_rate": 8.068593204191808e-05, "loss": 0.0858, "step": 3046 }, { "epoch": 5.795530194959581, "grad_norm": 0.3984861671924591, "learning_rate": 8.067958081930772e-05, "loss": 0.1134, "step": 3047 }, { "epoch": 5.797432239657632, "grad_norm": 0.35580113530158997, "learning_rate": 8.067322959669736e-05, "loss": 0.0912, "step": 3048 }, { "epoch": 5.799334284355682, "grad_norm": 0.2826536297798157, "learning_rate": 8.066687837408702e-05, "loss": 0.1028, "step": 3049 }, { "epoch": 5.801236329053733, "grad_norm": 0.26871412992477417, "learning_rate": 8.066052715147666e-05, "loss": 0.0884, "step": 3050 }, { "epoch": 5.803138373751783, "grad_norm": 0.3068493902683258, "learning_rate": 8.065417592886631e-05, "loss": 0.0921, "step": 3051 }, { "epoch": 5.805040418449834, "grad_norm": 0.31987568736076355, "learning_rate": 8.064782470625595e-05, "loss": 0.0899, "step": 3052 }, { "epoch": 5.806942463147884, "grad_norm": 0.25358253717422485, "learning_rate": 8.06414734836456e-05, "loss": 0.0961, "step": 3053 }, { "epoch": 5.808844507845935, "grad_norm": 0.2783609926700592, "learning_rate": 8.063512226103526e-05, "loss": 0.0943, "step": 3054 }, { "epoch": 5.810746552543985, "grad_norm": 0.30739203095436096, "learning_rate": 8.06287710384249e-05, "loss": 0.0962, "step": 3055 }, { "epoch": 5.812648597242035, "grad_norm": 0.2969946265220642, "learning_rate": 8.062241981581455e-05, "loss": 0.0924, "step": 3056 }, { "epoch": 5.814550641940086, "grad_norm": 0.4024634063243866, "learning_rate": 8.06160685932042e-05, "loss": 0.0973, "step": 3057 }, { "epoch": 5.816452686638136, "grad_norm": 0.390010267496109, "learning_rate": 8.060971737059384e-05, "loss": 0.1082, "step": 3058 }, { "epoch": 5.8183547313361865, "grad_norm": 0.45062586665153503, "learning_rate": 8.060336614798349e-05, "loss": 0.1212, "step": 3059 }, { "epoch": 5.8202567760342365, "grad_norm": 0.26907825469970703, "learning_rate": 8.059701492537314e-05, "loss": 0.0929, "step": 3060 }, { "epoch": 5.822158820732287, "grad_norm": 0.2780208885669708, "learning_rate": 8.059066370276278e-05, "loss": 0.0931, "step": 3061 }, { "epoch": 5.824060865430337, "grad_norm": 0.3321477472782135, "learning_rate": 8.058431248015243e-05, "loss": 0.106, "step": 3062 }, { "epoch": 5.825962910128388, "grad_norm": 0.2702338397502899, "learning_rate": 8.057796125754208e-05, "loss": 0.0804, "step": 3063 }, { "epoch": 5.827864954826438, "grad_norm": 0.27758532762527466, "learning_rate": 8.057161003493173e-05, "loss": 0.1032, "step": 3064 }, { "epoch": 5.829766999524489, "grad_norm": 0.39256051182746887, "learning_rate": 8.056525881232137e-05, "loss": 0.1187, "step": 3065 }, { "epoch": 5.831669044222539, "grad_norm": 0.3004806637763977, "learning_rate": 8.055890758971102e-05, "loss": 0.0988, "step": 3066 }, { "epoch": 5.833571088920589, "grad_norm": 0.3810843229293823, "learning_rate": 8.055255636710068e-05, "loss": 0.1141, "step": 3067 }, { "epoch": 5.83547313361864, "grad_norm": 0.27720025181770325, "learning_rate": 8.054620514449031e-05, "loss": 0.0898, "step": 3068 }, { "epoch": 5.83737517831669, "grad_norm": 0.3405880630016327, "learning_rate": 8.053985392187997e-05, "loss": 0.1195, "step": 3069 }, { "epoch": 5.839277223014741, "grad_norm": 0.23189480602741241, "learning_rate": 8.053350269926962e-05, "loss": 0.0824, "step": 3070 }, { "epoch": 5.841179267712791, "grad_norm": 0.2764407694339752, "learning_rate": 8.052715147665926e-05, "loss": 0.1146, "step": 3071 }, { "epoch": 5.843081312410842, "grad_norm": 0.34092894196510315, "learning_rate": 8.052080025404891e-05, "loss": 0.0994, "step": 3072 }, { "epoch": 5.844983357108892, "grad_norm": 0.46230098605155945, "learning_rate": 8.051444903143856e-05, "loss": 0.136, "step": 3073 }, { "epoch": 5.846885401806943, "grad_norm": 0.2688174545764923, "learning_rate": 8.05080978088282e-05, "loss": 0.1094, "step": 3074 }, { "epoch": 5.848787446504993, "grad_norm": 0.3978007137775421, "learning_rate": 8.050174658621785e-05, "loss": 0.1037, "step": 3075 }, { "epoch": 5.850689491203044, "grad_norm": 0.29960164427757263, "learning_rate": 8.04953953636075e-05, "loss": 0.0939, "step": 3076 }, { "epoch": 5.852591535901094, "grad_norm": 0.3292900025844574, "learning_rate": 8.048904414099715e-05, "loss": 0.0874, "step": 3077 }, { "epoch": 5.854493580599144, "grad_norm": 0.33958300948143005, "learning_rate": 8.048269291838679e-05, "loss": 0.0976, "step": 3078 }, { "epoch": 5.8563956252971945, "grad_norm": 0.3058733642101288, "learning_rate": 8.047634169577643e-05, "loss": 0.0968, "step": 3079 }, { "epoch": 5.858297669995245, "grad_norm": 0.3476194143295288, "learning_rate": 8.04699904731661e-05, "loss": 0.0853, "step": 3080 }, { "epoch": 5.860199714693295, "grad_norm": 0.34195569157600403, "learning_rate": 8.046363925055573e-05, "loss": 0.0905, "step": 3081 }, { "epoch": 5.862101759391345, "grad_norm": 0.3744758367538452, "learning_rate": 8.045728802794539e-05, "loss": 0.103, "step": 3082 }, { "epoch": 5.864003804089396, "grad_norm": 0.3824380934238434, "learning_rate": 8.045093680533504e-05, "loss": 0.0941, "step": 3083 }, { "epoch": 5.865905848787446, "grad_norm": 0.33374378085136414, "learning_rate": 8.044458558272468e-05, "loss": 0.1087, "step": 3084 }, { "epoch": 5.867807893485497, "grad_norm": 0.33335942029953003, "learning_rate": 8.043823436011433e-05, "loss": 0.0997, "step": 3085 }, { "epoch": 5.869709938183547, "grad_norm": 0.3410753309726715, "learning_rate": 8.043188313750397e-05, "loss": 0.0801, "step": 3086 }, { "epoch": 5.871611982881598, "grad_norm": 0.31030407547950745, "learning_rate": 8.042553191489363e-05, "loss": 0.0963, "step": 3087 }, { "epoch": 5.873514027579648, "grad_norm": 0.29095837473869324, "learning_rate": 8.041918069228327e-05, "loss": 0.0929, "step": 3088 }, { "epoch": 5.875416072277699, "grad_norm": 0.31219327449798584, "learning_rate": 8.041282946967291e-05, "loss": 0.0989, "step": 3089 }, { "epoch": 5.877318116975749, "grad_norm": 0.3598634898662567, "learning_rate": 8.040647824706257e-05, "loss": 0.0974, "step": 3090 }, { "epoch": 5.8792201616738, "grad_norm": 0.4326852858066559, "learning_rate": 8.040012702445221e-05, "loss": 0.1409, "step": 3091 }, { "epoch": 5.88112220637185, "grad_norm": 0.5164662003517151, "learning_rate": 8.039377580184185e-05, "loss": 0.126, "step": 3092 }, { "epoch": 5.8830242510699, "grad_norm": 0.26032760739326477, "learning_rate": 8.03874245792315e-05, "loss": 0.0768, "step": 3093 }, { "epoch": 5.884926295767951, "grad_norm": 0.3444868326187134, "learning_rate": 8.038107335662115e-05, "loss": 0.1065, "step": 3094 }, { "epoch": 5.886828340466001, "grad_norm": 0.37405380606651306, "learning_rate": 8.03747221340108e-05, "loss": 0.0984, "step": 3095 }, { "epoch": 5.888730385164052, "grad_norm": 0.36833861470222473, "learning_rate": 8.036837091140044e-05, "loss": 0.0999, "step": 3096 }, { "epoch": 5.890632429862102, "grad_norm": 0.3146866261959076, "learning_rate": 8.03620196887901e-05, "loss": 0.0915, "step": 3097 }, { "epoch": 5.8925344745601524, "grad_norm": 0.3376007378101349, "learning_rate": 8.035566846617975e-05, "loss": 0.0781, "step": 3098 }, { "epoch": 5.8944365192582024, "grad_norm": 0.3204367458820343, "learning_rate": 8.034931724356939e-05, "loss": 0.1133, "step": 3099 }, { "epoch": 5.896338563956253, "grad_norm": 0.21730051934719086, "learning_rate": 8.034296602095904e-05, "loss": 0.061, "step": 3100 }, { "epoch": 5.898240608654303, "grad_norm": 0.26351940631866455, "learning_rate": 8.033661479834869e-05, "loss": 0.0789, "step": 3101 }, { "epoch": 5.900142653352354, "grad_norm": 0.3514474034309387, "learning_rate": 8.033026357573833e-05, "loss": 0.1141, "step": 3102 }, { "epoch": 5.902044698050404, "grad_norm": 0.351646363735199, "learning_rate": 8.032391235312798e-05, "loss": 0.089, "step": 3103 }, { "epoch": 5.903946742748454, "grad_norm": 0.3225652873516083, "learning_rate": 8.031756113051763e-05, "loss": 0.1012, "step": 3104 }, { "epoch": 5.905848787446505, "grad_norm": 0.3405236601829529, "learning_rate": 8.031120990790728e-05, "loss": 0.0851, "step": 3105 }, { "epoch": 5.907750832144555, "grad_norm": 0.39399632811546326, "learning_rate": 8.030485868529692e-05, "loss": 0.0915, "step": 3106 }, { "epoch": 5.909652876842606, "grad_norm": 0.4024227559566498, "learning_rate": 8.029850746268657e-05, "loss": 0.1077, "step": 3107 }, { "epoch": 5.911554921540656, "grad_norm": 0.3052838146686554, "learning_rate": 8.029215624007623e-05, "loss": 0.0787, "step": 3108 }, { "epoch": 5.913456966238707, "grad_norm": 0.4465295970439911, "learning_rate": 8.028580501746586e-05, "loss": 0.0963, "step": 3109 }, { "epoch": 5.915359010936757, "grad_norm": 0.28723543882369995, "learning_rate": 8.02794537948555e-05, "loss": 0.0752, "step": 3110 }, { "epoch": 5.917261055634808, "grad_norm": 0.4216603934764862, "learning_rate": 8.027310257224517e-05, "loss": 0.1018, "step": 3111 }, { "epoch": 5.919163100332858, "grad_norm": 0.2752178907394409, "learning_rate": 8.02667513496348e-05, "loss": 0.0817, "step": 3112 }, { "epoch": 5.921065145030909, "grad_norm": 0.27371007204055786, "learning_rate": 8.026040012702446e-05, "loss": 0.0783, "step": 3113 }, { "epoch": 5.922967189728959, "grad_norm": 0.3314353823661804, "learning_rate": 8.025404890441411e-05, "loss": 0.0891, "step": 3114 }, { "epoch": 5.924869234427009, "grad_norm": 0.35603460669517517, "learning_rate": 8.024769768180375e-05, "loss": 0.104, "step": 3115 }, { "epoch": 5.9267712791250595, "grad_norm": 0.3468729853630066, "learning_rate": 8.02413464591934e-05, "loss": 0.0974, "step": 3116 }, { "epoch": 5.9286733238231095, "grad_norm": 0.3321313261985779, "learning_rate": 8.023499523658304e-05, "loss": 0.093, "step": 3117 }, { "epoch": 5.93057536852116, "grad_norm": 0.36478671431541443, "learning_rate": 8.02286440139727e-05, "loss": 0.0979, "step": 3118 }, { "epoch": 5.93247741321921, "grad_norm": 0.32760557532310486, "learning_rate": 8.022229279136234e-05, "loss": 0.1148, "step": 3119 }, { "epoch": 5.934379457917261, "grad_norm": 0.3271568715572357, "learning_rate": 8.021594156875198e-05, "loss": 0.0959, "step": 3120 }, { "epoch": 5.936281502615311, "grad_norm": 0.3014872968196869, "learning_rate": 8.020959034614164e-05, "loss": 0.0942, "step": 3121 }, { "epoch": 5.938183547313362, "grad_norm": 0.30579501390457153, "learning_rate": 8.020323912353128e-05, "loss": 0.0826, "step": 3122 }, { "epoch": 5.940085592011412, "grad_norm": 0.42316779494285583, "learning_rate": 8.019688790092093e-05, "loss": 0.1025, "step": 3123 }, { "epoch": 5.941987636709463, "grad_norm": 0.30180487036705017, "learning_rate": 8.019053667831057e-05, "loss": 0.0905, "step": 3124 }, { "epoch": 5.943889681407513, "grad_norm": 0.3880978226661682, "learning_rate": 8.018418545570023e-05, "loss": 0.1066, "step": 3125 }, { "epoch": 5.945791726105563, "grad_norm": 0.3437277674674988, "learning_rate": 8.017783423308988e-05, "loss": 0.0973, "step": 3126 }, { "epoch": 5.947693770803614, "grad_norm": 0.3143709897994995, "learning_rate": 8.017148301047952e-05, "loss": 0.0947, "step": 3127 }, { "epoch": 5.949595815501664, "grad_norm": 0.2899588644504547, "learning_rate": 8.016513178786917e-05, "loss": 0.0979, "step": 3128 }, { "epoch": 5.951497860199715, "grad_norm": 0.3774805963039398, "learning_rate": 8.015878056525882e-05, "loss": 0.2451, "step": 3129 }, { "epoch": 5.953399904897765, "grad_norm": 0.38204115629196167, "learning_rate": 8.015242934264846e-05, "loss": 0.1064, "step": 3130 }, { "epoch": 5.955301949595816, "grad_norm": 0.25636038184165955, "learning_rate": 8.014607812003811e-05, "loss": 0.0767, "step": 3131 }, { "epoch": 5.957203994293866, "grad_norm": 0.4159661829471588, "learning_rate": 8.013972689742776e-05, "loss": 0.0946, "step": 3132 }, { "epoch": 5.959106038991917, "grad_norm": 0.38069942593574524, "learning_rate": 8.01333756748174e-05, "loss": 0.1257, "step": 3133 }, { "epoch": 5.961008083689967, "grad_norm": 0.3810632526874542, "learning_rate": 8.012702445220705e-05, "loss": 0.0761, "step": 3134 }, { "epoch": 5.9629101283880175, "grad_norm": 0.30504754185676575, "learning_rate": 8.01206732295967e-05, "loss": 0.0933, "step": 3135 }, { "epoch": 5.9648121730860675, "grad_norm": 0.24171409010887146, "learning_rate": 8.011432200698635e-05, "loss": 0.0851, "step": 3136 }, { "epoch": 5.9667142177841175, "grad_norm": 0.32692912220954895, "learning_rate": 8.010797078437599e-05, "loss": 0.0836, "step": 3137 }, { "epoch": 5.968616262482168, "grad_norm": 0.2763676643371582, "learning_rate": 8.010161956176564e-05, "loss": 0.0778, "step": 3138 }, { "epoch": 5.970518307180218, "grad_norm": 0.42881324887275696, "learning_rate": 8.00952683391553e-05, "loss": 0.1088, "step": 3139 }, { "epoch": 5.972420351878269, "grad_norm": 0.3442386984825134, "learning_rate": 8.008891711654493e-05, "loss": 0.0993, "step": 3140 }, { "epoch": 5.974322396576319, "grad_norm": 0.3938843607902527, "learning_rate": 8.008256589393459e-05, "loss": 0.1337, "step": 3141 }, { "epoch": 5.97622444127437, "grad_norm": 0.33586129546165466, "learning_rate": 8.007621467132424e-05, "loss": 0.1002, "step": 3142 }, { "epoch": 5.97812648597242, "grad_norm": 0.3166608214378357, "learning_rate": 8.006986344871388e-05, "loss": 0.099, "step": 3143 }, { "epoch": 5.980028530670471, "grad_norm": 0.31059524416923523, "learning_rate": 8.006351222610353e-05, "loss": 0.1011, "step": 3144 }, { "epoch": 5.981930575368521, "grad_norm": 0.33092477917671204, "learning_rate": 8.005716100349318e-05, "loss": 0.0785, "step": 3145 }, { "epoch": 5.983832620066572, "grad_norm": 0.4692544639110565, "learning_rate": 8.005080978088282e-05, "loss": 0.1377, "step": 3146 }, { "epoch": 5.985734664764622, "grad_norm": 0.42482489347457886, "learning_rate": 8.004445855827247e-05, "loss": 0.1286, "step": 3147 }, { "epoch": 5.987636709462672, "grad_norm": 0.3942713737487793, "learning_rate": 8.003810733566212e-05, "loss": 0.1085, "step": 3148 }, { "epoch": 5.989538754160723, "grad_norm": 0.4134069085121155, "learning_rate": 8.003175611305177e-05, "loss": 0.1149, "step": 3149 }, { "epoch": 5.991440798858774, "grad_norm": 0.30162832140922546, "learning_rate": 8.002540489044141e-05, "loss": 0.0945, "step": 3150 }, { "epoch": 5.993342843556824, "grad_norm": 0.38140755891799927, "learning_rate": 8.001905366783105e-05, "loss": 0.103, "step": 3151 }, { "epoch": 5.995244888254874, "grad_norm": 0.2811982035636902, "learning_rate": 8.001270244522072e-05, "loss": 0.0877, "step": 3152 }, { "epoch": 5.997146932952925, "grad_norm": 0.3001856207847595, "learning_rate": 8.000635122261035e-05, "loss": 0.0895, "step": 3153 }, { "epoch": 5.999048977650975, "grad_norm": 0.2948606610298157, "learning_rate": 8e-05, "loss": 0.1035, "step": 3154 }, { "epoch": 6.000951022349025, "grad_norm": 0.3746136426925659, "learning_rate": 7.999364877738964e-05, "loss": 0.1152, "step": 3155 }, { "epoch": 6.002853067047075, "grad_norm": 0.21500763297080994, "learning_rate": 7.99872975547793e-05, "loss": 0.0769, "step": 3156 }, { "epoch": 6.004755111745126, "grad_norm": 0.249894380569458, "learning_rate": 7.998094633216895e-05, "loss": 0.0665, "step": 3157 }, { "epoch": 6.006657156443176, "grad_norm": 0.3538912832736969, "learning_rate": 7.997459510955859e-05, "loss": 0.1028, "step": 3158 }, { "epoch": 6.008559201141227, "grad_norm": 0.20869600772857666, "learning_rate": 7.996824388694825e-05, "loss": 0.0594, "step": 3159 }, { "epoch": 6.010461245839277, "grad_norm": 0.38950517773628235, "learning_rate": 7.996189266433789e-05, "loss": 0.0932, "step": 3160 }, { "epoch": 6.012363290537327, "grad_norm": 0.33621397614479065, "learning_rate": 7.995554144172753e-05, "loss": 0.0939, "step": 3161 }, { "epoch": 6.014265335235378, "grad_norm": 0.3625282943248749, "learning_rate": 7.994919021911718e-05, "loss": 0.107, "step": 3162 }, { "epoch": 6.016167379933428, "grad_norm": 0.32474133372306824, "learning_rate": 7.994283899650683e-05, "loss": 0.1042, "step": 3163 }, { "epoch": 6.018069424631479, "grad_norm": 0.1935519427061081, "learning_rate": 7.993648777389647e-05, "loss": 0.0735, "step": 3164 }, { "epoch": 6.019971469329529, "grad_norm": 0.3521823287010193, "learning_rate": 7.993013655128612e-05, "loss": 0.0945, "step": 3165 }, { "epoch": 6.02187351402758, "grad_norm": 0.22658848762512207, "learning_rate": 7.992378532867577e-05, "loss": 0.059, "step": 3166 }, { "epoch": 6.02377555872563, "grad_norm": 0.2659030556678772, "learning_rate": 7.991743410606543e-05, "loss": 0.0803, "step": 3167 }, { "epoch": 6.025677603423681, "grad_norm": 0.3843698799610138, "learning_rate": 7.991108288345506e-05, "loss": 0.1053, "step": 3168 }, { "epoch": 6.027579648121731, "grad_norm": 0.23756776750087738, "learning_rate": 7.990473166084472e-05, "loss": 0.0749, "step": 3169 }, { "epoch": 6.029481692819782, "grad_norm": 0.25975948572158813, "learning_rate": 7.989838043823437e-05, "loss": 0.0879, "step": 3170 }, { "epoch": 6.031383737517832, "grad_norm": 0.32162702083587646, "learning_rate": 7.9892029215624e-05, "loss": 0.0886, "step": 3171 }, { "epoch": 6.0332857822158825, "grad_norm": 0.3066456615924835, "learning_rate": 7.988567799301366e-05, "loss": 0.0712, "step": 3172 }, { "epoch": 6.0351878269139325, "grad_norm": 0.2621491253376007, "learning_rate": 7.987932677040331e-05, "loss": 0.0594, "step": 3173 }, { "epoch": 6.0370898716119825, "grad_norm": 0.16599467396736145, "learning_rate": 7.987297554779295e-05, "loss": 0.0584, "step": 3174 }, { "epoch": 6.038991916310033, "grad_norm": 0.36765870451927185, "learning_rate": 7.98666243251826e-05, "loss": 0.0789, "step": 3175 }, { "epoch": 6.040893961008083, "grad_norm": 0.3242380619049072, "learning_rate": 7.986027310257225e-05, "loss": 0.0977, "step": 3176 }, { "epoch": 6.042796005706134, "grad_norm": 0.4075121283531189, "learning_rate": 7.98539218799619e-05, "loss": 0.09, "step": 3177 }, { "epoch": 6.044698050404184, "grad_norm": 0.28770434856414795, "learning_rate": 7.984757065735154e-05, "loss": 0.0748, "step": 3178 }, { "epoch": 6.046600095102235, "grad_norm": 0.32635626196861267, "learning_rate": 7.98412194347412e-05, "loss": 0.088, "step": 3179 }, { "epoch": 6.048502139800285, "grad_norm": 0.254139244556427, "learning_rate": 7.983486821213085e-05, "loss": 0.0718, "step": 3180 }, { "epoch": 6.050404184498336, "grad_norm": 0.349336713552475, "learning_rate": 7.982851698952048e-05, "loss": 0.118, "step": 3181 }, { "epoch": 6.052306229196386, "grad_norm": 0.33583179116249084, "learning_rate": 7.982216576691012e-05, "loss": 0.1126, "step": 3182 }, { "epoch": 6.054208273894437, "grad_norm": 0.25572314858436584, "learning_rate": 7.981581454429979e-05, "loss": 0.1009, "step": 3183 }, { "epoch": 6.056110318592487, "grad_norm": 0.2622678577899933, "learning_rate": 7.980946332168943e-05, "loss": 0.0628, "step": 3184 }, { "epoch": 6.058012363290537, "grad_norm": 0.24747587740421295, "learning_rate": 7.980311209907908e-05, "loss": 0.0657, "step": 3185 }, { "epoch": 6.059914407988588, "grad_norm": 0.27714699506759644, "learning_rate": 7.979676087646873e-05, "loss": 0.062, "step": 3186 }, { "epoch": 6.061816452686638, "grad_norm": 0.27411553263664246, "learning_rate": 7.979040965385837e-05, "loss": 0.082, "step": 3187 }, { "epoch": 6.063718497384689, "grad_norm": 0.3901764452457428, "learning_rate": 7.978405843124802e-05, "loss": 0.1028, "step": 3188 }, { "epoch": 6.065620542082739, "grad_norm": 0.20082591474056244, "learning_rate": 7.977770720863766e-05, "loss": 0.0619, "step": 3189 }, { "epoch": 6.06752258678079, "grad_norm": 0.30728980898857117, "learning_rate": 7.977135598602732e-05, "loss": 0.0786, "step": 3190 }, { "epoch": 6.06942463147884, "grad_norm": 0.3012641966342926, "learning_rate": 7.976500476341696e-05, "loss": 0.0725, "step": 3191 }, { "epoch": 6.0713266761768905, "grad_norm": 0.21619060635566711, "learning_rate": 7.97586535408066e-05, "loss": 0.0595, "step": 3192 }, { "epoch": 6.0732287208749405, "grad_norm": 0.2747598886489868, "learning_rate": 7.975230231819627e-05, "loss": 0.0675, "step": 3193 }, { "epoch": 6.075130765572991, "grad_norm": 0.3214286267757416, "learning_rate": 7.97459510955859e-05, "loss": 0.0902, "step": 3194 }, { "epoch": 6.077032810271041, "grad_norm": 0.37599408626556396, "learning_rate": 7.973959987297556e-05, "loss": 0.0794, "step": 3195 }, { "epoch": 6.078934854969091, "grad_norm": 0.2802245318889618, "learning_rate": 7.97332486503652e-05, "loss": 0.0691, "step": 3196 }, { "epoch": 6.080836899667142, "grad_norm": 0.22154775261878967, "learning_rate": 7.972689742775485e-05, "loss": 0.0848, "step": 3197 }, { "epoch": 6.082738944365192, "grad_norm": 0.4010308086872101, "learning_rate": 7.97205462051445e-05, "loss": 0.0838, "step": 3198 }, { "epoch": 6.084640989063243, "grad_norm": 0.32006803154945374, "learning_rate": 7.971419498253414e-05, "loss": 0.1033, "step": 3199 }, { "epoch": 6.086543033761293, "grad_norm": 0.2201933115720749, "learning_rate": 7.970784375992379e-05, "loss": 0.0648, "step": 3200 }, { "epoch": 6.088445078459344, "grad_norm": 0.2613593637943268, "learning_rate": 7.970149253731344e-05, "loss": 0.0743, "step": 3201 }, { "epoch": 6.090347123157394, "grad_norm": 0.26748836040496826, "learning_rate": 7.969514131470308e-05, "loss": 0.0774, "step": 3202 }, { "epoch": 6.092249167855445, "grad_norm": 0.26437023282051086, "learning_rate": 7.968879009209273e-05, "loss": 0.0915, "step": 3203 }, { "epoch": 6.094151212553495, "grad_norm": 0.30303144454956055, "learning_rate": 7.968243886948238e-05, "loss": 0.1109, "step": 3204 }, { "epoch": 6.096053257251546, "grad_norm": 0.22862769663333893, "learning_rate": 7.967608764687202e-05, "loss": 0.0659, "step": 3205 }, { "epoch": 6.097955301949596, "grad_norm": 0.22851070761680603, "learning_rate": 7.966973642426167e-05, "loss": 0.0664, "step": 3206 }, { "epoch": 6.099857346647646, "grad_norm": 0.3395095467567444, "learning_rate": 7.966338520165132e-05, "loss": 0.1202, "step": 3207 }, { "epoch": 6.101759391345697, "grad_norm": 0.33721911907196045, "learning_rate": 7.965703397904098e-05, "loss": 0.0971, "step": 3208 }, { "epoch": 6.103661436043747, "grad_norm": 0.28860366344451904, "learning_rate": 7.965068275643061e-05, "loss": 0.0876, "step": 3209 }, { "epoch": 6.1055634807417976, "grad_norm": 0.3022976815700531, "learning_rate": 7.964433153382027e-05, "loss": 0.072, "step": 3210 }, { "epoch": 6.1074655254398476, "grad_norm": 0.27039656043052673, "learning_rate": 7.963798031120992e-05, "loss": 0.0857, "step": 3211 }, { "epoch": 6.109367570137898, "grad_norm": 0.28560107946395874, "learning_rate": 7.963162908859956e-05, "loss": 0.0703, "step": 3212 }, { "epoch": 6.111269614835948, "grad_norm": 0.16948960721492767, "learning_rate": 7.962527786598921e-05, "loss": 0.0574, "step": 3213 }, { "epoch": 6.113171659533999, "grad_norm": 0.31261399388313293, "learning_rate": 7.961892664337886e-05, "loss": 0.098, "step": 3214 }, { "epoch": 6.115073704232049, "grad_norm": 0.2879314720630646, "learning_rate": 7.96125754207685e-05, "loss": 0.0768, "step": 3215 }, { "epoch": 6.1169757489301, "grad_norm": 0.3212367296218872, "learning_rate": 7.960622419815815e-05, "loss": 0.0816, "step": 3216 }, { "epoch": 6.11887779362815, "grad_norm": 0.306856244802475, "learning_rate": 7.95998729755478e-05, "loss": 0.0706, "step": 3217 }, { "epoch": 6.120779838326201, "grad_norm": 0.30228865146636963, "learning_rate": 7.959352175293744e-05, "loss": 0.0842, "step": 3218 }, { "epoch": 6.122681883024251, "grad_norm": 0.40820634365081787, "learning_rate": 7.958717053032709e-05, "loss": 0.0831, "step": 3219 }, { "epoch": 6.124583927722301, "grad_norm": 0.2787196934223175, "learning_rate": 7.958081930771673e-05, "loss": 0.0817, "step": 3220 }, { "epoch": 6.126485972420352, "grad_norm": 0.26536598801612854, "learning_rate": 7.95744680851064e-05, "loss": 0.0868, "step": 3221 }, { "epoch": 6.128388017118402, "grad_norm": 0.33307042717933655, "learning_rate": 7.956811686249603e-05, "loss": 0.1104, "step": 3222 }, { "epoch": 6.130290061816453, "grad_norm": 0.29908856749534607, "learning_rate": 7.956176563988567e-05, "loss": 0.0981, "step": 3223 }, { "epoch": 6.132192106514503, "grad_norm": 0.25544753670692444, "learning_rate": 7.955541441727534e-05, "loss": 0.0635, "step": 3224 }, { "epoch": 6.134094151212554, "grad_norm": 0.458103746175766, "learning_rate": 7.954906319466498e-05, "loss": 0.1149, "step": 3225 }, { "epoch": 6.135996195910604, "grad_norm": 0.23199333250522614, "learning_rate": 7.954271197205463e-05, "loss": 0.0806, "step": 3226 }, { "epoch": 6.137898240608655, "grad_norm": 0.25189074873924255, "learning_rate": 7.953636074944427e-05, "loss": 0.0782, "step": 3227 }, { "epoch": 6.139800285306705, "grad_norm": 0.3281899094581604, "learning_rate": 7.953000952683392e-05, "loss": 0.0706, "step": 3228 }, { "epoch": 6.1417023300047555, "grad_norm": 0.309741348028183, "learning_rate": 7.952365830422357e-05, "loss": 0.0676, "step": 3229 }, { "epoch": 6.1436043747028055, "grad_norm": 0.4716850519180298, "learning_rate": 7.951730708161321e-05, "loss": 0.1398, "step": 3230 }, { "epoch": 6.1455064194008555, "grad_norm": 0.28038209676742554, "learning_rate": 7.951095585900287e-05, "loss": 0.1256, "step": 3231 }, { "epoch": 6.147408464098906, "grad_norm": 0.2801377773284912, "learning_rate": 7.950460463639251e-05, "loss": 0.063, "step": 3232 }, { "epoch": 6.149310508796956, "grad_norm": 0.2686994671821594, "learning_rate": 7.949825341378215e-05, "loss": 0.0735, "step": 3233 }, { "epoch": 6.151212553495007, "grad_norm": 0.30351752042770386, "learning_rate": 7.94919021911718e-05, "loss": 0.0856, "step": 3234 }, { "epoch": 6.153114598193057, "grad_norm": 0.2922990024089813, "learning_rate": 7.948555096856145e-05, "loss": 0.0883, "step": 3235 }, { "epoch": 6.155016642891108, "grad_norm": 0.41719111800193787, "learning_rate": 7.947919974595109e-05, "loss": 0.0905, "step": 3236 }, { "epoch": 6.156918687589158, "grad_norm": 0.24950455129146576, "learning_rate": 7.947284852334074e-05, "loss": 0.0596, "step": 3237 }, { "epoch": 6.158820732287209, "grad_norm": 0.2344481348991394, "learning_rate": 7.94664973007304e-05, "loss": 0.0789, "step": 3238 }, { "epoch": 6.160722776985259, "grad_norm": 0.21180817484855652, "learning_rate": 7.946014607812005e-05, "loss": 0.0616, "step": 3239 }, { "epoch": 6.16262482168331, "grad_norm": 0.2386842519044876, "learning_rate": 7.945379485550969e-05, "loss": 0.086, "step": 3240 }, { "epoch": 6.16452686638136, "grad_norm": 0.21530573070049286, "learning_rate": 7.944744363289934e-05, "loss": 0.0704, "step": 3241 }, { "epoch": 6.166428911079411, "grad_norm": 0.24444977939128876, "learning_rate": 7.944109241028899e-05, "loss": 0.0711, "step": 3242 }, { "epoch": 6.168330955777461, "grad_norm": 0.24185262620449066, "learning_rate": 7.943474118767863e-05, "loss": 0.0705, "step": 3243 }, { "epoch": 6.170233000475511, "grad_norm": 0.28293880820274353, "learning_rate": 7.942838996506828e-05, "loss": 0.0709, "step": 3244 }, { "epoch": 6.172135045173562, "grad_norm": 0.2779384255409241, "learning_rate": 7.942203874245793e-05, "loss": 0.0821, "step": 3245 }, { "epoch": 6.174037089871612, "grad_norm": 0.509385883808136, "learning_rate": 7.941568751984757e-05, "loss": 0.1144, "step": 3246 }, { "epoch": 6.175939134569663, "grad_norm": 0.2250889390707016, "learning_rate": 7.940933629723722e-05, "loss": 0.0576, "step": 3247 }, { "epoch": 6.177841179267713, "grad_norm": 0.21779869496822357, "learning_rate": 7.940298507462687e-05, "loss": 0.0871, "step": 3248 }, { "epoch": 6.1797432239657635, "grad_norm": 0.3144400715827942, "learning_rate": 7.939663385201652e-05, "loss": 0.0832, "step": 3249 }, { "epoch": 6.1816452686638135, "grad_norm": 0.2595573663711548, "learning_rate": 7.939028262940616e-05, "loss": 0.078, "step": 3250 }, { "epoch": 6.183547313361864, "grad_norm": 0.4201616942882538, "learning_rate": 7.938393140679582e-05, "loss": 0.1123, "step": 3251 }, { "epoch": 6.185449358059914, "grad_norm": 0.2986547350883484, "learning_rate": 7.937758018418547e-05, "loss": 0.0911, "step": 3252 }, { "epoch": 6.187351402757965, "grad_norm": 0.3912739157676697, "learning_rate": 7.93712289615751e-05, "loss": 0.0904, "step": 3253 }, { "epoch": 6.189253447456015, "grad_norm": 0.2589069902896881, "learning_rate": 7.936487773896474e-05, "loss": 0.081, "step": 3254 }, { "epoch": 6.191155492154065, "grad_norm": 0.3324070870876312, "learning_rate": 7.935852651635441e-05, "loss": 0.0897, "step": 3255 }, { "epoch": 6.193057536852116, "grad_norm": 0.2300262153148651, "learning_rate": 7.935217529374405e-05, "loss": 0.0658, "step": 3256 }, { "epoch": 6.194959581550166, "grad_norm": 0.19878318905830383, "learning_rate": 7.93458240711337e-05, "loss": 0.0579, "step": 3257 }, { "epoch": 6.196861626248217, "grad_norm": 0.3048309087753296, "learning_rate": 7.933947284852334e-05, "loss": 0.0719, "step": 3258 }, { "epoch": 6.198763670946267, "grad_norm": 0.3125574290752411, "learning_rate": 7.933312162591299e-05, "loss": 0.0707, "step": 3259 }, { "epoch": 6.200665715644318, "grad_norm": 0.2814660668373108, "learning_rate": 7.932677040330264e-05, "loss": 0.0761, "step": 3260 }, { "epoch": 6.202567760342368, "grad_norm": 0.22039173543453217, "learning_rate": 7.932041918069228e-05, "loss": 0.0634, "step": 3261 }, { "epoch": 6.204469805040419, "grad_norm": 0.2500283420085907, "learning_rate": 7.931406795808194e-05, "loss": 0.0746, "step": 3262 }, { "epoch": 6.206371849738469, "grad_norm": 0.28570613265037537, "learning_rate": 7.930771673547158e-05, "loss": 0.0809, "step": 3263 }, { "epoch": 6.20827389443652, "grad_norm": 0.4383977949619293, "learning_rate": 7.930136551286122e-05, "loss": 0.1058, "step": 3264 }, { "epoch": 6.21017593913457, "grad_norm": 0.24185074865818024, "learning_rate": 7.929501429025087e-05, "loss": 0.0737, "step": 3265 }, { "epoch": 6.21207798383262, "grad_norm": 0.3835535943508148, "learning_rate": 7.928866306764052e-05, "loss": 0.1063, "step": 3266 }, { "epoch": 6.2139800285306706, "grad_norm": 0.23498846590518951, "learning_rate": 7.928231184503018e-05, "loss": 0.0784, "step": 3267 }, { "epoch": 6.2158820732287206, "grad_norm": 0.3388371467590332, "learning_rate": 7.927596062241982e-05, "loss": 0.0836, "step": 3268 }, { "epoch": 6.217784117926771, "grad_norm": 0.24535390734672546, "learning_rate": 7.926960939980947e-05, "loss": 0.0799, "step": 3269 }, { "epoch": 6.219686162624821, "grad_norm": 0.2897215485572815, "learning_rate": 7.926325817719912e-05, "loss": 0.0736, "step": 3270 }, { "epoch": 6.221588207322872, "grad_norm": 0.3779149353504181, "learning_rate": 7.925690695458876e-05, "loss": 0.0846, "step": 3271 }, { "epoch": 6.223490252020922, "grad_norm": 0.29152947664260864, "learning_rate": 7.925055573197841e-05, "loss": 0.0868, "step": 3272 }, { "epoch": 6.225392296718973, "grad_norm": 0.3078393042087555, "learning_rate": 7.924420450936806e-05, "loss": 0.1035, "step": 3273 }, { "epoch": 6.227294341417023, "grad_norm": 0.21019497513771057, "learning_rate": 7.92378532867577e-05, "loss": 0.0668, "step": 3274 }, { "epoch": 6.229196386115074, "grad_norm": 0.27748751640319824, "learning_rate": 7.923150206414735e-05, "loss": 0.0835, "step": 3275 }, { "epoch": 6.231098430813124, "grad_norm": 0.24059148132801056, "learning_rate": 7.9225150841537e-05, "loss": 0.0613, "step": 3276 }, { "epoch": 6.233000475511174, "grad_norm": 0.2641827166080475, "learning_rate": 7.921879961892664e-05, "loss": 0.0821, "step": 3277 }, { "epoch": 6.234902520209225, "grad_norm": 0.3308301866054535, "learning_rate": 7.921244839631629e-05, "loss": 0.0858, "step": 3278 }, { "epoch": 6.236804564907275, "grad_norm": 0.3596150875091553, "learning_rate": 7.920609717370594e-05, "loss": 0.0991, "step": 3279 }, { "epoch": 6.238706609605326, "grad_norm": 0.23002928495407104, "learning_rate": 7.91997459510956e-05, "loss": 0.0692, "step": 3280 }, { "epoch": 6.240608654303376, "grad_norm": 0.38528430461883545, "learning_rate": 7.919339472848523e-05, "loss": 0.112, "step": 3281 }, { "epoch": 6.242510699001427, "grad_norm": 0.3260602355003357, "learning_rate": 7.918704350587489e-05, "loss": 0.087, "step": 3282 }, { "epoch": 6.244412743699477, "grad_norm": 0.2542354464530945, "learning_rate": 7.918069228326454e-05, "loss": 0.0701, "step": 3283 }, { "epoch": 6.246314788397528, "grad_norm": 0.3322354257106781, "learning_rate": 7.917434106065418e-05, "loss": 0.0704, "step": 3284 }, { "epoch": 6.248216833095578, "grad_norm": 0.31040525436401367, "learning_rate": 7.916798983804383e-05, "loss": 0.0925, "step": 3285 }, { "epoch": 6.2501188777936285, "grad_norm": 0.23275534808635712, "learning_rate": 7.916163861543348e-05, "loss": 0.0662, "step": 3286 }, { "epoch": 6.2520209224916785, "grad_norm": 0.2919691801071167, "learning_rate": 7.915528739282312e-05, "loss": 0.0799, "step": 3287 }, { "epoch": 6.2539229671897285, "grad_norm": 0.2902929186820984, "learning_rate": 7.914893617021277e-05, "loss": 0.0904, "step": 3288 }, { "epoch": 6.255825011887779, "grad_norm": 0.3168974220752716, "learning_rate": 7.914258494760242e-05, "loss": 0.0925, "step": 3289 }, { "epoch": 6.257727056585829, "grad_norm": 0.49860697984695435, "learning_rate": 7.913623372499206e-05, "loss": 0.1138, "step": 3290 }, { "epoch": 6.25962910128388, "grad_norm": 0.35882389545440674, "learning_rate": 7.912988250238171e-05, "loss": 0.1377, "step": 3291 }, { "epoch": 6.26153114598193, "grad_norm": 0.40341517329216003, "learning_rate": 7.912353127977135e-05, "loss": 0.0884, "step": 3292 }, { "epoch": 6.263433190679981, "grad_norm": 0.18433208763599396, "learning_rate": 7.911718005716102e-05, "loss": 0.055, "step": 3293 }, { "epoch": 6.265335235378031, "grad_norm": 0.39911824464797974, "learning_rate": 7.911082883455065e-05, "loss": 0.0915, "step": 3294 }, { "epoch": 6.267237280076082, "grad_norm": 0.32022351026535034, "learning_rate": 7.910447761194029e-05, "loss": 0.1122, "step": 3295 }, { "epoch": 6.269139324774132, "grad_norm": 0.30021417140960693, "learning_rate": 7.909812638932996e-05, "loss": 0.0918, "step": 3296 }, { "epoch": 6.271041369472183, "grad_norm": 0.3369525074958801, "learning_rate": 7.90917751667196e-05, "loss": 0.0921, "step": 3297 }, { "epoch": 6.272943414170233, "grad_norm": 0.28983327746391296, "learning_rate": 7.908542394410925e-05, "loss": 0.0581, "step": 3298 }, { "epoch": 6.274845458868284, "grad_norm": 0.30698540806770325, "learning_rate": 7.907907272149889e-05, "loss": 0.092, "step": 3299 }, { "epoch": 6.276747503566334, "grad_norm": 0.2095540314912796, "learning_rate": 7.907272149888854e-05, "loss": 0.0724, "step": 3300 }, { "epoch": 6.278649548264384, "grad_norm": 0.2522066831588745, "learning_rate": 7.906637027627819e-05, "loss": 0.0857, "step": 3301 }, { "epoch": 6.280551592962435, "grad_norm": 0.23340962827205658, "learning_rate": 7.906001905366783e-05, "loss": 0.0811, "step": 3302 }, { "epoch": 6.282453637660485, "grad_norm": 0.3235514760017395, "learning_rate": 7.90536678310575e-05, "loss": 0.1103, "step": 3303 }, { "epoch": 6.284355682358536, "grad_norm": 0.2366112768650055, "learning_rate": 7.904731660844713e-05, "loss": 0.0849, "step": 3304 }, { "epoch": 6.286257727056586, "grad_norm": 0.35640236735343933, "learning_rate": 7.904096538583677e-05, "loss": 0.0973, "step": 3305 }, { "epoch": 6.2881597717546365, "grad_norm": 0.2726953625679016, "learning_rate": 7.903461416322642e-05, "loss": 0.0867, "step": 3306 }, { "epoch": 6.2900618164526865, "grad_norm": 0.2399393916130066, "learning_rate": 7.902826294061607e-05, "loss": 0.0804, "step": 3307 }, { "epoch": 6.291963861150737, "grad_norm": 0.3313315510749817, "learning_rate": 7.902191171800571e-05, "loss": 0.1023, "step": 3308 }, { "epoch": 6.293865905848787, "grad_norm": 0.30253294110298157, "learning_rate": 7.901556049539536e-05, "loss": 0.0954, "step": 3309 }, { "epoch": 6.295767950546838, "grad_norm": 0.25532180070877075, "learning_rate": 7.900920927278502e-05, "loss": 0.0708, "step": 3310 }, { "epoch": 6.297669995244888, "grad_norm": 0.2780250012874603, "learning_rate": 7.900285805017467e-05, "loss": 0.0719, "step": 3311 }, { "epoch": 6.299572039942939, "grad_norm": 0.31086266040802, "learning_rate": 7.89965068275643e-05, "loss": 0.0848, "step": 3312 }, { "epoch": 6.301474084640989, "grad_norm": 0.2522285580635071, "learning_rate": 7.899015560495396e-05, "loss": 0.1076, "step": 3313 }, { "epoch": 6.303376129339039, "grad_norm": 0.3242189288139343, "learning_rate": 7.898380438234361e-05, "loss": 0.0824, "step": 3314 }, { "epoch": 6.30527817403709, "grad_norm": 0.19826237857341766, "learning_rate": 7.897745315973325e-05, "loss": 0.0506, "step": 3315 }, { "epoch": 6.30718021873514, "grad_norm": 0.295257568359375, "learning_rate": 7.89711019371229e-05, "loss": 0.0662, "step": 3316 }, { "epoch": 6.309082263433191, "grad_norm": 0.2735994756221771, "learning_rate": 7.896475071451255e-05, "loss": 0.0918, "step": 3317 }, { "epoch": 6.310984308131241, "grad_norm": 0.32766851782798767, "learning_rate": 7.895839949190219e-05, "loss": 0.087, "step": 3318 }, { "epoch": 6.312886352829292, "grad_norm": 0.3935610353946686, "learning_rate": 7.895204826929184e-05, "loss": 0.1329, "step": 3319 }, { "epoch": 6.314788397527342, "grad_norm": 0.2454293966293335, "learning_rate": 7.89456970466815e-05, "loss": 0.0798, "step": 3320 }, { "epoch": 6.316690442225393, "grad_norm": 0.20663952827453613, "learning_rate": 7.893934582407115e-05, "loss": 0.0635, "step": 3321 }, { "epoch": 6.318592486923443, "grad_norm": 0.3088918924331665, "learning_rate": 7.893299460146078e-05, "loss": 0.1099, "step": 3322 }, { "epoch": 6.3204945316214936, "grad_norm": 0.3906470239162445, "learning_rate": 7.892664337885042e-05, "loss": 0.0799, "step": 3323 }, { "epoch": 6.3223965763195435, "grad_norm": 0.2616283893585205, "learning_rate": 7.892029215624009e-05, "loss": 0.0622, "step": 3324 }, { "epoch": 6.3242986210175935, "grad_norm": 0.2966347932815552, "learning_rate": 7.891394093362973e-05, "loss": 0.1009, "step": 3325 }, { "epoch": 6.326200665715644, "grad_norm": 0.347552090883255, "learning_rate": 7.890758971101936e-05, "loss": 0.0819, "step": 3326 }, { "epoch": 6.328102710413694, "grad_norm": 0.30803078413009644, "learning_rate": 7.890123848840903e-05, "loss": 0.0717, "step": 3327 }, { "epoch": 6.330004755111745, "grad_norm": 0.4039364755153656, "learning_rate": 7.889488726579867e-05, "loss": 0.0984, "step": 3328 }, { "epoch": 6.331906799809795, "grad_norm": 0.39296120405197144, "learning_rate": 7.888853604318832e-05, "loss": 0.1066, "step": 3329 }, { "epoch": 6.333808844507846, "grad_norm": 0.25951987504959106, "learning_rate": 7.888218482057796e-05, "loss": 0.0894, "step": 3330 }, { "epoch": 6.335710889205896, "grad_norm": 0.3936977982521057, "learning_rate": 7.887583359796761e-05, "loss": 0.0938, "step": 3331 }, { "epoch": 6.337612933903947, "grad_norm": 0.17758914828300476, "learning_rate": 7.886948237535726e-05, "loss": 0.0618, "step": 3332 }, { "epoch": 6.339514978601997, "grad_norm": 0.27204954624176025, "learning_rate": 7.88631311527469e-05, "loss": 0.0906, "step": 3333 }, { "epoch": 6.341417023300048, "grad_norm": 0.3029746115207672, "learning_rate": 7.885677993013657e-05, "loss": 0.0834, "step": 3334 }, { "epoch": 6.343319067998098, "grad_norm": 0.33268237113952637, "learning_rate": 7.88504287075262e-05, "loss": 0.0922, "step": 3335 }, { "epoch": 6.345221112696148, "grad_norm": 0.37622734904289246, "learning_rate": 7.884407748491584e-05, "loss": 0.096, "step": 3336 }, { "epoch": 6.347123157394199, "grad_norm": 0.24578069150447845, "learning_rate": 7.88377262623055e-05, "loss": 0.0699, "step": 3337 }, { "epoch": 6.349025202092249, "grad_norm": 0.33049342036247253, "learning_rate": 7.883137503969515e-05, "loss": 0.0881, "step": 3338 }, { "epoch": 6.3509272467903, "grad_norm": 0.2978206276893616, "learning_rate": 7.88250238170848e-05, "loss": 0.0876, "step": 3339 }, { "epoch": 6.35282929148835, "grad_norm": 0.21608392894268036, "learning_rate": 7.881867259447444e-05, "loss": 0.0643, "step": 3340 }, { "epoch": 6.354731336186401, "grad_norm": 0.28561514616012573, "learning_rate": 7.881232137186409e-05, "loss": 0.0827, "step": 3341 }, { "epoch": 6.356633380884451, "grad_norm": 0.2658670246601105, "learning_rate": 7.880597014925374e-05, "loss": 0.0918, "step": 3342 }, { "epoch": 6.3585354255825015, "grad_norm": 0.31455472111701965, "learning_rate": 7.879961892664338e-05, "loss": 0.1127, "step": 3343 }, { "epoch": 6.3604374702805515, "grad_norm": 0.2520543038845062, "learning_rate": 7.879326770403303e-05, "loss": 0.074, "step": 3344 }, { "epoch": 6.362339514978602, "grad_norm": 0.3586767911911011, "learning_rate": 7.878691648142268e-05, "loss": 0.1097, "step": 3345 }, { "epoch": 6.364241559676652, "grad_norm": 0.23053278028964996, "learning_rate": 7.878056525881232e-05, "loss": 0.0923, "step": 3346 }, { "epoch": 6.366143604374702, "grad_norm": 0.31203171610832214, "learning_rate": 7.877421403620197e-05, "loss": 0.1194, "step": 3347 }, { "epoch": 6.368045649072753, "grad_norm": 0.22267888486385345, "learning_rate": 7.876786281359162e-05, "loss": 0.0686, "step": 3348 }, { "epoch": 6.369947693770803, "grad_norm": 0.30080437660217285, "learning_rate": 7.876151159098126e-05, "loss": 0.0883, "step": 3349 }, { "epoch": 6.371849738468854, "grad_norm": 0.3382682502269745, "learning_rate": 7.875516036837091e-05, "loss": 0.089, "step": 3350 }, { "epoch": 6.373751783166904, "grad_norm": 0.23919892311096191, "learning_rate": 7.874880914576057e-05, "loss": 0.0693, "step": 3351 }, { "epoch": 6.375653827864955, "grad_norm": 0.25442051887512207, "learning_rate": 7.874245792315022e-05, "loss": 0.0783, "step": 3352 }, { "epoch": 6.377555872563005, "grad_norm": 0.24807670712471008, "learning_rate": 7.873610670053986e-05, "loss": 0.0805, "step": 3353 }, { "epoch": 6.379457917261056, "grad_norm": 0.3315785229206085, "learning_rate": 7.872975547792951e-05, "loss": 0.0812, "step": 3354 }, { "epoch": 6.381359961959106, "grad_norm": 0.281994104385376, "learning_rate": 7.872340425531916e-05, "loss": 0.0945, "step": 3355 }, { "epoch": 6.383262006657157, "grad_norm": 0.4207977056503296, "learning_rate": 7.87170530327088e-05, "loss": 0.1041, "step": 3356 }, { "epoch": 6.385164051355207, "grad_norm": 0.2093304991722107, "learning_rate": 7.871070181009845e-05, "loss": 0.0682, "step": 3357 }, { "epoch": 6.387066096053257, "grad_norm": 0.29875820875167847, "learning_rate": 7.87043505874881e-05, "loss": 0.089, "step": 3358 }, { "epoch": 6.388968140751308, "grad_norm": 0.331050843000412, "learning_rate": 7.869799936487774e-05, "loss": 0.076, "step": 3359 }, { "epoch": 6.390870185449358, "grad_norm": 0.32891416549682617, "learning_rate": 7.869164814226739e-05, "loss": 0.0821, "step": 3360 }, { "epoch": 6.392772230147409, "grad_norm": 0.23887665569782257, "learning_rate": 7.868529691965704e-05, "loss": 0.0703, "step": 3361 }, { "epoch": 6.394674274845459, "grad_norm": 0.3243681490421295, "learning_rate": 7.867894569704668e-05, "loss": 0.0893, "step": 3362 }, { "epoch": 6.3965763195435095, "grad_norm": 0.3317023813724518, "learning_rate": 7.867259447443633e-05, "loss": 0.0913, "step": 3363 }, { "epoch": 6.3984783642415595, "grad_norm": 0.4032711982727051, "learning_rate": 7.866624325182597e-05, "loss": 0.1019, "step": 3364 }, { "epoch": 6.40038040893961, "grad_norm": 0.30233386158943176, "learning_rate": 7.865989202921564e-05, "loss": 0.0817, "step": 3365 }, { "epoch": 6.40228245363766, "grad_norm": 0.25192874670028687, "learning_rate": 7.865354080660528e-05, "loss": 0.0863, "step": 3366 }, { "epoch": 6.404184498335711, "grad_norm": 0.3983643651008606, "learning_rate": 7.864718958399491e-05, "loss": 0.1088, "step": 3367 }, { "epoch": 6.406086543033761, "grad_norm": 0.3095570206642151, "learning_rate": 7.864083836138457e-05, "loss": 0.0808, "step": 3368 }, { "epoch": 6.407988587731811, "grad_norm": 0.2907126843929291, "learning_rate": 7.863448713877422e-05, "loss": 0.07, "step": 3369 }, { "epoch": 6.409890632429862, "grad_norm": 0.2748839557170868, "learning_rate": 7.862813591616387e-05, "loss": 0.0884, "step": 3370 }, { "epoch": 6.411792677127912, "grad_norm": 0.23985274136066437, "learning_rate": 7.862178469355351e-05, "loss": 0.0732, "step": 3371 }, { "epoch": 6.413694721825963, "grad_norm": 0.2567084729671478, "learning_rate": 7.861543347094316e-05, "loss": 0.0873, "step": 3372 }, { "epoch": 6.415596766524013, "grad_norm": 0.3082403838634491, "learning_rate": 7.860908224833281e-05, "loss": 0.0741, "step": 3373 }, { "epoch": 6.417498811222064, "grad_norm": 0.1999639868736267, "learning_rate": 7.860273102572245e-05, "loss": 0.0628, "step": 3374 }, { "epoch": 6.419400855920114, "grad_norm": 0.34851139783859253, "learning_rate": 7.85963798031121e-05, "loss": 0.0915, "step": 3375 }, { "epoch": 6.421302900618165, "grad_norm": 0.2502918541431427, "learning_rate": 7.859002858050175e-05, "loss": 0.0695, "step": 3376 }, { "epoch": 6.423204945316215, "grad_norm": 0.2936602532863617, "learning_rate": 7.858367735789139e-05, "loss": 0.0759, "step": 3377 }, { "epoch": 6.425106990014266, "grad_norm": 0.3128640353679657, "learning_rate": 7.857732613528104e-05, "loss": 0.0733, "step": 3378 }, { "epoch": 6.427009034712316, "grad_norm": 0.26009759306907654, "learning_rate": 7.85709749126707e-05, "loss": 0.0962, "step": 3379 }, { "epoch": 6.4289110794103665, "grad_norm": 0.28206828236579895, "learning_rate": 7.856462369006033e-05, "loss": 0.0901, "step": 3380 }, { "epoch": 6.4308131241084165, "grad_norm": 0.2728763818740845, "learning_rate": 7.855827246744999e-05, "loss": 0.0657, "step": 3381 }, { "epoch": 6.4327151688064665, "grad_norm": 0.2516337037086487, "learning_rate": 7.855192124483964e-05, "loss": 0.0774, "step": 3382 }, { "epoch": 6.434617213504517, "grad_norm": 0.310434490442276, "learning_rate": 7.854557002222929e-05, "loss": 0.0999, "step": 3383 }, { "epoch": 6.436519258202567, "grad_norm": 0.2987268567085266, "learning_rate": 7.853921879961893e-05, "loss": 0.0902, "step": 3384 }, { "epoch": 6.438421302900618, "grad_norm": 0.2936643362045288, "learning_rate": 7.853286757700858e-05, "loss": 0.0761, "step": 3385 }, { "epoch": 6.440323347598668, "grad_norm": 0.292655348777771, "learning_rate": 7.852651635439823e-05, "loss": 0.0755, "step": 3386 }, { "epoch": 6.442225392296719, "grad_norm": 0.24866874516010284, "learning_rate": 7.852016513178787e-05, "loss": 0.0846, "step": 3387 }, { "epoch": 6.444127436994769, "grad_norm": 0.29063111543655396, "learning_rate": 7.851381390917752e-05, "loss": 0.0897, "step": 3388 }, { "epoch": 6.44602948169282, "grad_norm": 0.41345977783203125, "learning_rate": 7.850746268656717e-05, "loss": 0.0922, "step": 3389 }, { "epoch": 6.44793152639087, "grad_norm": 0.2677401006221771, "learning_rate": 7.850111146395681e-05, "loss": 0.0764, "step": 3390 }, { "epoch": 6.449833571088921, "grad_norm": 0.2827543318271637, "learning_rate": 7.849476024134646e-05, "loss": 0.0838, "step": 3391 }, { "epoch": 6.451735615786971, "grad_norm": 0.241709366440773, "learning_rate": 7.848840901873611e-05, "loss": 0.0747, "step": 3392 }, { "epoch": 6.453637660485022, "grad_norm": 0.3448995053768158, "learning_rate": 7.848205779612577e-05, "loss": 0.0882, "step": 3393 }, { "epoch": 6.455539705183072, "grad_norm": 0.2734489440917969, "learning_rate": 7.84757065735154e-05, "loss": 0.0764, "step": 3394 }, { "epoch": 6.457441749881122, "grad_norm": 0.21918603777885437, "learning_rate": 7.846935535090504e-05, "loss": 0.0809, "step": 3395 }, { "epoch": 6.459343794579173, "grad_norm": 0.2477739155292511, "learning_rate": 7.846300412829471e-05, "loss": 0.0807, "step": 3396 }, { "epoch": 6.461245839277223, "grad_norm": 0.21536344289779663, "learning_rate": 7.845665290568435e-05, "loss": 0.0632, "step": 3397 }, { "epoch": 6.463147883975274, "grad_norm": 0.2905837297439575, "learning_rate": 7.845030168307399e-05, "loss": 0.1073, "step": 3398 }, { "epoch": 6.465049928673324, "grad_norm": 0.29049018025398254, "learning_rate": 7.844395046046365e-05, "loss": 0.0915, "step": 3399 }, { "epoch": 6.4669519733713745, "grad_norm": 0.21640491485595703, "learning_rate": 7.843759923785329e-05, "loss": 0.0597, "step": 3400 }, { "epoch": 6.4688540180694245, "grad_norm": 0.255434513092041, "learning_rate": 7.843124801524294e-05, "loss": 0.0772, "step": 3401 }, { "epoch": 6.470756062767475, "grad_norm": 0.1592421680688858, "learning_rate": 7.842489679263258e-05, "loss": 0.0605, "step": 3402 }, { "epoch": 6.472658107465525, "grad_norm": 0.285553902387619, "learning_rate": 7.841854557002223e-05, "loss": 0.0683, "step": 3403 }, { "epoch": 6.474560152163576, "grad_norm": 0.20020133256912231, "learning_rate": 7.841219434741188e-05, "loss": 0.1138, "step": 3404 }, { "epoch": 6.476462196861626, "grad_norm": 0.3204385042190552, "learning_rate": 7.840584312480152e-05, "loss": 0.0766, "step": 3405 }, { "epoch": 6.478364241559676, "grad_norm": 0.28798025846481323, "learning_rate": 7.839949190219119e-05, "loss": 0.0883, "step": 3406 }, { "epoch": 6.480266286257727, "grad_norm": 0.3688175082206726, "learning_rate": 7.839314067958082e-05, "loss": 0.1036, "step": 3407 }, { "epoch": 6.482168330955777, "grad_norm": 0.35191601514816284, "learning_rate": 7.838678945697046e-05, "loss": 0.0864, "step": 3408 }, { "epoch": 6.484070375653828, "grad_norm": 0.2684312164783478, "learning_rate": 7.838043823436011e-05, "loss": 0.0712, "step": 3409 }, { "epoch": 6.485972420351878, "grad_norm": 0.39929521083831787, "learning_rate": 7.837408701174977e-05, "loss": 0.0861, "step": 3410 }, { "epoch": 6.487874465049929, "grad_norm": 0.26154014468193054, "learning_rate": 7.836773578913942e-05, "loss": 0.0704, "step": 3411 }, { "epoch": 6.489776509747979, "grad_norm": 0.2874952554702759, "learning_rate": 7.836138456652906e-05, "loss": 0.0804, "step": 3412 }, { "epoch": 6.49167855444603, "grad_norm": 0.2434820979833603, "learning_rate": 7.835503334391871e-05, "loss": 0.0797, "step": 3413 }, { "epoch": 6.49358059914408, "grad_norm": 0.23986493051052094, "learning_rate": 7.834868212130836e-05, "loss": 0.0598, "step": 3414 }, { "epoch": 6.495482643842131, "grad_norm": 0.2969447672367096, "learning_rate": 7.8342330898698e-05, "loss": 0.0827, "step": 3415 }, { "epoch": 6.497384688540181, "grad_norm": 0.27279725670814514, "learning_rate": 7.833597967608765e-05, "loss": 0.094, "step": 3416 }, { "epoch": 6.499286733238231, "grad_norm": 0.29611873626708984, "learning_rate": 7.83296284534773e-05, "loss": 0.0643, "step": 3417 }, { "epoch": 6.501188777936282, "grad_norm": 0.28763914108276367, "learning_rate": 7.832327723086694e-05, "loss": 0.0707, "step": 3418 }, { "epoch": 6.503090822634332, "grad_norm": 0.23590637743473053, "learning_rate": 7.831692600825659e-05, "loss": 0.0741, "step": 3419 }, { "epoch": 6.5049928673323825, "grad_norm": 0.3088235557079315, "learning_rate": 7.831057478564624e-05, "loss": 0.0766, "step": 3420 }, { "epoch": 6.5068949120304325, "grad_norm": 0.24730335175991058, "learning_rate": 7.830422356303588e-05, "loss": 0.0712, "step": 3421 }, { "epoch": 6.508796956728483, "grad_norm": 0.29086196422576904, "learning_rate": 7.829787234042553e-05, "loss": 0.0842, "step": 3422 }, { "epoch": 6.510699001426533, "grad_norm": 0.27357545495033264, "learning_rate": 7.829152111781519e-05, "loss": 0.0693, "step": 3423 }, { "epoch": 6.512601046124584, "grad_norm": 0.2787666618824005, "learning_rate": 7.828516989520484e-05, "loss": 0.0762, "step": 3424 }, { "epoch": 6.514503090822634, "grad_norm": 0.27191832661628723, "learning_rate": 7.827881867259448e-05, "loss": 0.0744, "step": 3425 }, { "epoch": 6.516405135520685, "grad_norm": 0.3606361448764801, "learning_rate": 7.827246744998411e-05, "loss": 0.0976, "step": 3426 }, { "epoch": 6.518307180218735, "grad_norm": 0.25823327898979187, "learning_rate": 7.826611622737378e-05, "loss": 0.0733, "step": 3427 }, { "epoch": 6.520209224916785, "grad_norm": 0.38170698285102844, "learning_rate": 7.825976500476342e-05, "loss": 0.0961, "step": 3428 }, { "epoch": 6.522111269614836, "grad_norm": 0.27328750491142273, "learning_rate": 7.825341378215307e-05, "loss": 0.0809, "step": 3429 }, { "epoch": 6.524013314312886, "grad_norm": 0.2610822916030884, "learning_rate": 7.824706255954272e-05, "loss": 0.0631, "step": 3430 }, { "epoch": 6.525915359010937, "grad_norm": 0.21520115435123444, "learning_rate": 7.824071133693236e-05, "loss": 0.0815, "step": 3431 }, { "epoch": 6.527817403708987, "grad_norm": 0.36525583267211914, "learning_rate": 7.823436011432201e-05, "loss": 0.0979, "step": 3432 }, { "epoch": 6.529719448407038, "grad_norm": 0.31419306993484497, "learning_rate": 7.822800889171165e-05, "loss": 0.1211, "step": 3433 }, { "epoch": 6.531621493105088, "grad_norm": 0.2525794804096222, "learning_rate": 7.82216576691013e-05, "loss": 0.0918, "step": 3434 }, { "epoch": 6.533523537803139, "grad_norm": 0.22929687798023224, "learning_rate": 7.821530644649095e-05, "loss": 0.058, "step": 3435 }, { "epoch": 6.535425582501189, "grad_norm": 0.36891230940818787, "learning_rate": 7.820895522388059e-05, "loss": 0.1044, "step": 3436 }, { "epoch": 6.5373276271992395, "grad_norm": 0.2763042151927948, "learning_rate": 7.820260400127026e-05, "loss": 0.1427, "step": 3437 }, { "epoch": 6.5392296718972895, "grad_norm": 0.34488481283187866, "learning_rate": 7.81962527786599e-05, "loss": 0.0815, "step": 3438 }, { "epoch": 6.5411317165953395, "grad_norm": 0.28253301978111267, "learning_rate": 7.818990155604953e-05, "loss": 0.0802, "step": 3439 }, { "epoch": 6.54303376129339, "grad_norm": 0.24336297810077667, "learning_rate": 7.818355033343919e-05, "loss": 0.0762, "step": 3440 }, { "epoch": 6.54493580599144, "grad_norm": 0.2575397491455078, "learning_rate": 7.817719911082884e-05, "loss": 0.0927, "step": 3441 }, { "epoch": 6.546837850689491, "grad_norm": 0.3071526885032654, "learning_rate": 7.817084788821849e-05, "loss": 0.0873, "step": 3442 }, { "epoch": 6.548739895387541, "grad_norm": 0.3540725111961365, "learning_rate": 7.816449666560813e-05, "loss": 0.0962, "step": 3443 }, { "epoch": 6.550641940085592, "grad_norm": 0.20820970833301544, "learning_rate": 7.815814544299778e-05, "loss": 0.0623, "step": 3444 }, { "epoch": 6.552543984783642, "grad_norm": 0.24989038705825806, "learning_rate": 7.815179422038743e-05, "loss": 0.0983, "step": 3445 }, { "epoch": 6.554446029481693, "grad_norm": 0.2294490784406662, "learning_rate": 7.814544299777707e-05, "loss": 0.0822, "step": 3446 }, { "epoch": 6.556348074179743, "grad_norm": 0.25477972626686096, "learning_rate": 7.813909177516672e-05, "loss": 0.0552, "step": 3447 }, { "epoch": 6.558250118877794, "grad_norm": 0.2907105088233948, "learning_rate": 7.813274055255637e-05, "loss": 0.0881, "step": 3448 }, { "epoch": 6.560152163575844, "grad_norm": 0.3188037574291229, "learning_rate": 7.812638932994601e-05, "loss": 0.0877, "step": 3449 }, { "epoch": 6.562054208273894, "grad_norm": 0.3072590231895447, "learning_rate": 7.812003810733566e-05, "loss": 0.0989, "step": 3450 }, { "epoch": 6.563956252971945, "grad_norm": 0.44314712285995483, "learning_rate": 7.811368688472532e-05, "loss": 0.0977, "step": 3451 }, { "epoch": 6.565858297669996, "grad_norm": 0.2874956727027893, "learning_rate": 7.810733566211495e-05, "loss": 0.0909, "step": 3452 }, { "epoch": 6.567760342368046, "grad_norm": 0.32697319984436035, "learning_rate": 7.81009844395046e-05, "loss": 0.1043, "step": 3453 }, { "epoch": 6.569662387066096, "grad_norm": 0.28816571831703186, "learning_rate": 7.809463321689426e-05, "loss": 0.0944, "step": 3454 }, { "epoch": 6.571564431764147, "grad_norm": 0.4285229742527008, "learning_rate": 7.808828199428391e-05, "loss": 0.1251, "step": 3455 }, { "epoch": 6.573466476462197, "grad_norm": 0.30767038464546204, "learning_rate": 7.808193077167355e-05, "loss": 0.0864, "step": 3456 }, { "epoch": 6.5753685211602475, "grad_norm": 0.3375704884529114, "learning_rate": 7.80755795490632e-05, "loss": 0.0861, "step": 3457 }, { "epoch": 6.5772705658582975, "grad_norm": 0.31623944640159607, "learning_rate": 7.806922832645285e-05, "loss": 0.0706, "step": 3458 }, { "epoch": 6.579172610556348, "grad_norm": 0.3341452181339264, "learning_rate": 7.806287710384249e-05, "loss": 0.0946, "step": 3459 }, { "epoch": 6.581074655254398, "grad_norm": 0.32458797097206116, "learning_rate": 7.805652588123214e-05, "loss": 0.0976, "step": 3460 }, { "epoch": 6.582976699952448, "grad_norm": 0.2971061170101166, "learning_rate": 7.80501746586218e-05, "loss": 0.0838, "step": 3461 }, { "epoch": 6.584878744650499, "grad_norm": 0.29806768894195557, "learning_rate": 7.804382343601143e-05, "loss": 0.0719, "step": 3462 }, { "epoch": 6.58678078934855, "grad_norm": 0.268477201461792, "learning_rate": 7.803747221340108e-05, "loss": 0.0819, "step": 3463 }, { "epoch": 6.5886828340466, "grad_norm": 0.3286268413066864, "learning_rate": 7.803112099079074e-05, "loss": 0.0919, "step": 3464 }, { "epoch": 6.59058487874465, "grad_norm": 0.30549558997154236, "learning_rate": 7.802476976818039e-05, "loss": 0.0784, "step": 3465 }, { "epoch": 6.592486923442701, "grad_norm": 0.3088127374649048, "learning_rate": 7.801841854557003e-05, "loss": 0.0894, "step": 3466 }, { "epoch": 6.594388968140751, "grad_norm": 0.29529303312301636, "learning_rate": 7.801206732295966e-05, "loss": 0.0945, "step": 3467 }, { "epoch": 6.596291012838802, "grad_norm": 0.33194759488105774, "learning_rate": 7.800571610034933e-05, "loss": 0.0999, "step": 3468 }, { "epoch": 6.598193057536852, "grad_norm": 0.3072373867034912, "learning_rate": 7.799936487773897e-05, "loss": 0.0819, "step": 3469 }, { "epoch": 6.600095102234903, "grad_norm": 0.3042803704738617, "learning_rate": 7.79930136551286e-05, "loss": 0.0919, "step": 3470 }, { "epoch": 6.601997146932953, "grad_norm": 0.3292810618877411, "learning_rate": 7.798666243251827e-05, "loss": 0.0915, "step": 3471 }, { "epoch": 6.603899191631004, "grad_norm": 0.36252084374427795, "learning_rate": 7.798031120990791e-05, "loss": 0.0707, "step": 3472 }, { "epoch": 6.605801236329054, "grad_norm": 0.23440448939800262, "learning_rate": 7.797395998729756e-05, "loss": 0.0692, "step": 3473 }, { "epoch": 6.607703281027105, "grad_norm": 0.3150375187397003, "learning_rate": 7.79676087646872e-05, "loss": 0.0979, "step": 3474 }, { "epoch": 6.609605325725155, "grad_norm": 0.2594108581542969, "learning_rate": 7.796125754207685e-05, "loss": 0.1073, "step": 3475 }, { "epoch": 6.611507370423205, "grad_norm": 0.25529977679252625, "learning_rate": 7.79549063194665e-05, "loss": 0.0907, "step": 3476 }, { "epoch": 6.6134094151212555, "grad_norm": 0.2744307219982147, "learning_rate": 7.794855509685614e-05, "loss": 0.066, "step": 3477 }, { "epoch": 6.6153114598193055, "grad_norm": 0.26056280732154846, "learning_rate": 7.79422038742458e-05, "loss": 0.0883, "step": 3478 }, { "epoch": 6.617213504517356, "grad_norm": 0.27727293968200684, "learning_rate": 7.793585265163545e-05, "loss": 0.1024, "step": 3479 }, { "epoch": 6.619115549215406, "grad_norm": 0.31950679421424866, "learning_rate": 7.792950142902508e-05, "loss": 0.0799, "step": 3480 }, { "epoch": 6.621017593913457, "grad_norm": 0.301362007856369, "learning_rate": 7.792315020641474e-05, "loss": 0.0885, "step": 3481 }, { "epoch": 6.622919638611507, "grad_norm": 0.3070707321166992, "learning_rate": 7.791679898380439e-05, "loss": 0.0851, "step": 3482 }, { "epoch": 6.624821683309558, "grad_norm": 0.39499345421791077, "learning_rate": 7.791044776119404e-05, "loss": 0.1236, "step": 3483 }, { "epoch": 6.626723728007608, "grad_norm": 0.30052968859672546, "learning_rate": 7.790409653858368e-05, "loss": 0.0761, "step": 3484 }, { "epoch": 6.628625772705659, "grad_norm": 0.2883196175098419, "learning_rate": 7.789774531597333e-05, "loss": 0.0718, "step": 3485 }, { "epoch": 6.630527817403709, "grad_norm": 0.24781768023967743, "learning_rate": 7.789139409336298e-05, "loss": 0.0866, "step": 3486 }, { "epoch": 6.632429862101759, "grad_norm": 0.3658462464809418, "learning_rate": 7.788504287075262e-05, "loss": 0.0763, "step": 3487 }, { "epoch": 6.63433190679981, "grad_norm": 0.24517424404621124, "learning_rate": 7.787869164814227e-05, "loss": 0.0734, "step": 3488 }, { "epoch": 6.63623395149786, "grad_norm": 0.36587032675743103, "learning_rate": 7.787234042553192e-05, "loss": 0.1221, "step": 3489 }, { "epoch": 6.638135996195911, "grad_norm": 0.3201904892921448, "learning_rate": 7.786598920292156e-05, "loss": 0.0748, "step": 3490 }, { "epoch": 6.640038040893961, "grad_norm": 0.3514649569988251, "learning_rate": 7.785963798031121e-05, "loss": 0.0971, "step": 3491 }, { "epoch": 6.641940085592012, "grad_norm": 0.25201430916786194, "learning_rate": 7.785328675770087e-05, "loss": 0.071, "step": 3492 }, { "epoch": 6.643842130290062, "grad_norm": 0.21259263157844543, "learning_rate": 7.78469355350905e-05, "loss": 0.0692, "step": 3493 }, { "epoch": 6.6457441749881125, "grad_norm": 0.32301944494247437, "learning_rate": 7.784058431248016e-05, "loss": 0.0798, "step": 3494 }, { "epoch": 6.6476462196861625, "grad_norm": 0.32042115926742554, "learning_rate": 7.783423308986981e-05, "loss": 0.0846, "step": 3495 }, { "epoch": 6.649548264384213, "grad_norm": 0.294508695602417, "learning_rate": 7.782788186725946e-05, "loss": 0.0849, "step": 3496 }, { "epoch": 6.651450309082263, "grad_norm": 0.3282386064529419, "learning_rate": 7.78215306446491e-05, "loss": 0.0823, "step": 3497 }, { "epoch": 6.653352353780313, "grad_norm": 0.3124896287918091, "learning_rate": 7.781517942203874e-05, "loss": 0.0835, "step": 3498 }, { "epoch": 6.655254398478364, "grad_norm": 0.38605186343193054, "learning_rate": 7.78088281994284e-05, "loss": 0.0977, "step": 3499 }, { "epoch": 6.657156443176414, "grad_norm": 0.2927373945713043, "learning_rate": 7.780247697681804e-05, "loss": 0.0835, "step": 3500 }, { "epoch": 6.659058487874465, "grad_norm": 0.2956550121307373, "learning_rate": 7.779612575420769e-05, "loss": 0.0895, "step": 3501 }, { "epoch": 6.660960532572515, "grad_norm": 0.3334302008152008, "learning_rate": 7.778977453159734e-05, "loss": 0.0699, "step": 3502 }, { "epoch": 6.662862577270566, "grad_norm": 0.2574530839920044, "learning_rate": 7.778342330898698e-05, "loss": 0.0815, "step": 3503 }, { "epoch": 6.664764621968616, "grad_norm": 0.2664775252342224, "learning_rate": 7.777707208637663e-05, "loss": 0.0837, "step": 3504 }, { "epoch": 6.666666666666667, "grad_norm": 0.25906041264533997, "learning_rate": 7.777072086376627e-05, "loss": 0.0719, "step": 3505 }, { "epoch": 6.668568711364717, "grad_norm": 0.373308390378952, "learning_rate": 7.776436964115592e-05, "loss": 0.1048, "step": 3506 }, { "epoch": 6.670470756062768, "grad_norm": 0.2534428536891937, "learning_rate": 7.775801841854558e-05, "loss": 0.0784, "step": 3507 }, { "epoch": 6.672372800760818, "grad_norm": 0.33284425735473633, "learning_rate": 7.775166719593521e-05, "loss": 0.0953, "step": 3508 }, { "epoch": 6.674274845458868, "grad_norm": 0.3962637484073639, "learning_rate": 7.774531597332488e-05, "loss": 0.0963, "step": 3509 }, { "epoch": 6.676176890156919, "grad_norm": 0.2589205503463745, "learning_rate": 7.773896475071452e-05, "loss": 0.0747, "step": 3510 }, { "epoch": 6.678078934854969, "grad_norm": 0.3683101534843445, "learning_rate": 7.773261352810416e-05, "loss": 0.1112, "step": 3511 }, { "epoch": 6.67998097955302, "grad_norm": 0.3012533187866211, "learning_rate": 7.772626230549381e-05, "loss": 0.0823, "step": 3512 }, { "epoch": 6.68188302425107, "grad_norm": 0.23834310472011566, "learning_rate": 7.771991108288346e-05, "loss": 0.0791, "step": 3513 }, { "epoch": 6.6837850689491205, "grad_norm": 0.28065311908721924, "learning_rate": 7.771355986027311e-05, "loss": 0.0816, "step": 3514 }, { "epoch": 6.6856871136471705, "grad_norm": 0.3004380166530609, "learning_rate": 7.770720863766275e-05, "loss": 0.0958, "step": 3515 }, { "epoch": 6.687589158345221, "grad_norm": 0.32143503427505493, "learning_rate": 7.77008574150524e-05, "loss": 0.0897, "step": 3516 }, { "epoch": 6.689491203043271, "grad_norm": 0.3012734055519104, "learning_rate": 7.769450619244205e-05, "loss": 0.0783, "step": 3517 }, { "epoch": 6.691393247741322, "grad_norm": 0.2757457494735718, "learning_rate": 7.768815496983169e-05, "loss": 0.0877, "step": 3518 }, { "epoch": 6.693295292439372, "grad_norm": 0.3059716820716858, "learning_rate": 7.768180374722134e-05, "loss": 0.0805, "step": 3519 }, { "epoch": 6.695197337137422, "grad_norm": 0.26920875906944275, "learning_rate": 7.7675452524611e-05, "loss": 0.0753, "step": 3520 }, { "epoch": 6.697099381835473, "grad_norm": 0.30633947253227234, "learning_rate": 7.766910130200063e-05, "loss": 0.071, "step": 3521 }, { "epoch": 6.699001426533523, "grad_norm": 0.39395442605018616, "learning_rate": 7.766275007939029e-05, "loss": 0.1016, "step": 3522 }, { "epoch": 6.700903471231574, "grad_norm": 0.40755128860473633, "learning_rate": 7.765639885677994e-05, "loss": 0.0955, "step": 3523 }, { "epoch": 6.702805515929624, "grad_norm": 0.26808515191078186, "learning_rate": 7.765004763416958e-05, "loss": 0.0732, "step": 3524 }, { "epoch": 6.704707560627675, "grad_norm": 0.30976927280426025, "learning_rate": 7.764369641155923e-05, "loss": 0.0895, "step": 3525 }, { "epoch": 6.706609605325725, "grad_norm": 0.3644852936267853, "learning_rate": 7.763734518894888e-05, "loss": 0.1035, "step": 3526 }, { "epoch": 6.708511650023776, "grad_norm": 0.37868261337280273, "learning_rate": 7.763099396633853e-05, "loss": 0.1074, "step": 3527 }, { "epoch": 6.710413694721826, "grad_norm": 0.3462066948413849, "learning_rate": 7.762464274372817e-05, "loss": 0.1004, "step": 3528 }, { "epoch": 6.712315739419877, "grad_norm": 0.4035661518573761, "learning_rate": 7.761829152111782e-05, "loss": 0.0891, "step": 3529 }, { "epoch": 6.714217784117927, "grad_norm": 0.26760900020599365, "learning_rate": 7.761194029850747e-05, "loss": 0.0834, "step": 3530 }, { "epoch": 6.716119828815977, "grad_norm": 0.3930485248565674, "learning_rate": 7.760558907589711e-05, "loss": 0.0976, "step": 3531 }, { "epoch": 6.718021873514028, "grad_norm": 0.24880844354629517, "learning_rate": 7.759923785328676e-05, "loss": 0.0886, "step": 3532 }, { "epoch": 6.7199239182120785, "grad_norm": 0.3307024836540222, "learning_rate": 7.759288663067641e-05, "loss": 0.0947, "step": 3533 }, { "epoch": 6.7218259629101285, "grad_norm": 0.34674742817878723, "learning_rate": 7.758653540806605e-05, "loss": 0.0987, "step": 3534 }, { "epoch": 6.7237280076081785, "grad_norm": 0.2853250801563263, "learning_rate": 7.75801841854557e-05, "loss": 0.0748, "step": 3535 }, { "epoch": 6.725630052306229, "grad_norm": 0.23212608695030212, "learning_rate": 7.757383296284534e-05, "loss": 0.0716, "step": 3536 }, { "epoch": 6.727532097004279, "grad_norm": 0.3743533194065094, "learning_rate": 7.756748174023501e-05, "loss": 0.0903, "step": 3537 }, { "epoch": 6.72943414170233, "grad_norm": 0.2562249004840851, "learning_rate": 7.756113051762465e-05, "loss": 0.0719, "step": 3538 }, { "epoch": 6.73133618640038, "grad_norm": 0.29872533679008484, "learning_rate": 7.755477929501429e-05, "loss": 0.0983, "step": 3539 }, { "epoch": 6.733238231098431, "grad_norm": 0.46953633427619934, "learning_rate": 7.754842807240395e-05, "loss": 0.1268, "step": 3540 }, { "epoch": 6.735140275796481, "grad_norm": 0.3572865128517151, "learning_rate": 7.754207684979359e-05, "loss": 0.1016, "step": 3541 }, { "epoch": 6.737042320494532, "grad_norm": 0.27556324005126953, "learning_rate": 7.753572562718323e-05, "loss": 0.0873, "step": 3542 }, { "epoch": 6.738944365192582, "grad_norm": 0.3115139305591583, "learning_rate": 7.752937440457288e-05, "loss": 0.0879, "step": 3543 }, { "epoch": 6.740846409890633, "grad_norm": 0.2544573247432709, "learning_rate": 7.752302318196253e-05, "loss": 0.0824, "step": 3544 }, { "epoch": 6.742748454588683, "grad_norm": 0.3366973400115967, "learning_rate": 7.751667195935218e-05, "loss": 0.0912, "step": 3545 }, { "epoch": 6.744650499286733, "grad_norm": 0.23436006903648376, "learning_rate": 7.751032073674182e-05, "loss": 0.0882, "step": 3546 }, { "epoch": 6.746552543984784, "grad_norm": 0.41576170921325684, "learning_rate": 7.750396951413147e-05, "loss": 0.098, "step": 3547 }, { "epoch": 6.748454588682834, "grad_norm": 0.23372437059879303, "learning_rate": 7.749761829152112e-05, "loss": 0.0604, "step": 3548 }, { "epoch": 6.750356633380885, "grad_norm": 0.42242592573165894, "learning_rate": 7.749126706891076e-05, "loss": 0.1247, "step": 3549 }, { "epoch": 6.752258678078935, "grad_norm": 0.19408640265464783, "learning_rate": 7.748491584630041e-05, "loss": 0.0514, "step": 3550 }, { "epoch": 6.7541607227769855, "grad_norm": 0.30913442373275757, "learning_rate": 7.747856462369007e-05, "loss": 0.0785, "step": 3551 }, { "epoch": 6.7560627674750355, "grad_norm": 0.3456955850124359, "learning_rate": 7.74722134010797e-05, "loss": 0.0922, "step": 3552 }, { "epoch": 6.757964812173086, "grad_norm": 0.2908725142478943, "learning_rate": 7.746586217846936e-05, "loss": 0.0837, "step": 3553 }, { "epoch": 6.759866856871136, "grad_norm": 0.32404351234436035, "learning_rate": 7.745951095585901e-05, "loss": 0.0813, "step": 3554 }, { "epoch": 6.761768901569187, "grad_norm": 0.3528265357017517, "learning_rate": 7.745315973324866e-05, "loss": 0.0952, "step": 3555 }, { "epoch": 6.763670946267237, "grad_norm": 0.3190256655216217, "learning_rate": 7.74468085106383e-05, "loss": 0.0753, "step": 3556 }, { "epoch": 6.765572990965287, "grad_norm": 0.36666131019592285, "learning_rate": 7.744045728802795e-05, "loss": 0.0986, "step": 3557 }, { "epoch": 6.767475035663338, "grad_norm": 0.3192061185836792, "learning_rate": 7.74341060654176e-05, "loss": 0.0659, "step": 3558 }, { "epoch": 6.769377080361388, "grad_norm": 0.3112170696258545, "learning_rate": 7.742775484280724e-05, "loss": 0.0783, "step": 3559 }, { "epoch": 6.771279125059439, "grad_norm": 0.30128562450408936, "learning_rate": 7.742140362019689e-05, "loss": 0.078, "step": 3560 }, { "epoch": 6.773181169757489, "grad_norm": 0.31869345903396606, "learning_rate": 7.741505239758654e-05, "loss": 0.0948, "step": 3561 }, { "epoch": 6.77508321445554, "grad_norm": 0.23667073249816895, "learning_rate": 7.740870117497618e-05, "loss": 0.0778, "step": 3562 }, { "epoch": 6.77698525915359, "grad_norm": 0.26983892917633057, "learning_rate": 7.740234995236583e-05, "loss": 0.0779, "step": 3563 }, { "epoch": 6.778887303851641, "grad_norm": 0.3361709415912628, "learning_rate": 7.739599872975549e-05, "loss": 0.0911, "step": 3564 }, { "epoch": 6.780789348549691, "grad_norm": 0.25900062918663025, "learning_rate": 7.738964750714512e-05, "loss": 0.0728, "step": 3565 }, { "epoch": 6.782691393247742, "grad_norm": 0.2520677447319031, "learning_rate": 7.738329628453478e-05, "loss": 0.0704, "step": 3566 }, { "epoch": 6.784593437945792, "grad_norm": 0.29848986864089966, "learning_rate": 7.737694506192443e-05, "loss": 0.0829, "step": 3567 }, { "epoch": 6.786495482643842, "grad_norm": 0.27131152153015137, "learning_rate": 7.737059383931408e-05, "loss": 0.0651, "step": 3568 }, { "epoch": 6.788397527341893, "grad_norm": 0.2900139093399048, "learning_rate": 7.736424261670372e-05, "loss": 0.0684, "step": 3569 }, { "epoch": 6.790299572039943, "grad_norm": 0.29935070872306824, "learning_rate": 7.735789139409336e-05, "loss": 0.0741, "step": 3570 }, { "epoch": 6.7922016167379935, "grad_norm": 0.3142547905445099, "learning_rate": 7.735154017148302e-05, "loss": 0.0993, "step": 3571 }, { "epoch": 6.7941036614360435, "grad_norm": 0.2870892286300659, "learning_rate": 7.734518894887266e-05, "loss": 0.0725, "step": 3572 }, { "epoch": 6.796005706134094, "grad_norm": 0.4500356912612915, "learning_rate": 7.733883772626231e-05, "loss": 0.1104, "step": 3573 }, { "epoch": 6.797907750832144, "grad_norm": 0.4293663501739502, "learning_rate": 7.733248650365196e-05, "loss": 0.0867, "step": 3574 }, { "epoch": 6.799809795530195, "grad_norm": 0.33508118987083435, "learning_rate": 7.73261352810416e-05, "loss": 0.0853, "step": 3575 }, { "epoch": 6.801711840228245, "grad_norm": 0.30892953276634216, "learning_rate": 7.731978405843125e-05, "loss": 0.092, "step": 3576 }, { "epoch": 6.803613884926296, "grad_norm": 0.3197793960571289, "learning_rate": 7.731343283582089e-05, "loss": 0.0783, "step": 3577 }, { "epoch": 6.805515929624346, "grad_norm": 0.29358264803886414, "learning_rate": 7.730708161321054e-05, "loss": 0.0805, "step": 3578 }, { "epoch": 6.807417974322396, "grad_norm": 0.3208252489566803, "learning_rate": 7.73007303906002e-05, "loss": 0.0774, "step": 3579 }, { "epoch": 6.809320019020447, "grad_norm": 0.3243962824344635, "learning_rate": 7.729437916798983e-05, "loss": 0.0807, "step": 3580 }, { "epoch": 6.811222063718497, "grad_norm": 0.3556419909000397, "learning_rate": 7.72880279453795e-05, "loss": 0.0796, "step": 3581 }, { "epoch": 6.813124108416548, "grad_norm": 0.2571510076522827, "learning_rate": 7.728167672276914e-05, "loss": 0.0754, "step": 3582 }, { "epoch": 6.815026153114598, "grad_norm": 0.29990503191947937, "learning_rate": 7.727532550015878e-05, "loss": 0.0907, "step": 3583 }, { "epoch": 6.816928197812649, "grad_norm": 0.3055148720741272, "learning_rate": 7.726897427754843e-05, "loss": 0.0645, "step": 3584 }, { "epoch": 6.818830242510699, "grad_norm": 0.30374523997306824, "learning_rate": 7.726262305493808e-05, "loss": 0.0777, "step": 3585 }, { "epoch": 6.82073228720875, "grad_norm": 0.3372064530849457, "learning_rate": 7.725627183232773e-05, "loss": 0.0998, "step": 3586 }, { "epoch": 6.8226343319068, "grad_norm": 0.3002014756202698, "learning_rate": 7.724992060971737e-05, "loss": 0.069, "step": 3587 }, { "epoch": 6.824536376604851, "grad_norm": 0.26137155294418335, "learning_rate": 7.724356938710702e-05, "loss": 0.1057, "step": 3588 }, { "epoch": 6.826438421302901, "grad_norm": 0.3956218361854553, "learning_rate": 7.723721816449667e-05, "loss": 0.0795, "step": 3589 }, { "epoch": 6.828340466000951, "grad_norm": 0.2980736792087555, "learning_rate": 7.723086694188631e-05, "loss": 0.0934, "step": 3590 }, { "epoch": 6.8302425106990015, "grad_norm": 0.37110257148742676, "learning_rate": 7.722451571927596e-05, "loss": 0.0931, "step": 3591 }, { "epoch": 6.8321445553970515, "grad_norm": 0.2767345905303955, "learning_rate": 7.721816449666562e-05, "loss": 0.0736, "step": 3592 }, { "epoch": 6.834046600095102, "grad_norm": 0.31996482610702515, "learning_rate": 7.721181327405525e-05, "loss": 0.0842, "step": 3593 }, { "epoch": 6.835948644793152, "grad_norm": 0.303419291973114, "learning_rate": 7.72054620514449e-05, "loss": 0.1011, "step": 3594 }, { "epoch": 6.837850689491203, "grad_norm": 0.2984336316585541, "learning_rate": 7.719911082883456e-05, "loss": 0.0876, "step": 3595 }, { "epoch": 6.839752734189253, "grad_norm": 0.32384997606277466, "learning_rate": 7.71927596062242e-05, "loss": 0.0788, "step": 3596 }, { "epoch": 6.841654778887304, "grad_norm": 0.23781158030033112, "learning_rate": 7.718640838361385e-05, "loss": 0.0861, "step": 3597 }, { "epoch": 6.843556823585354, "grad_norm": 0.22147323191165924, "learning_rate": 7.71800571610035e-05, "loss": 0.0614, "step": 3598 }, { "epoch": 6.845458868283405, "grad_norm": 0.3343324363231659, "learning_rate": 7.717370593839315e-05, "loss": 0.0906, "step": 3599 }, { "epoch": 6.847360912981455, "grad_norm": 0.4509906470775604, "learning_rate": 7.716735471578279e-05, "loss": 0.0859, "step": 3600 }, { "epoch": 6.849262957679505, "grad_norm": 0.27889499068260193, "learning_rate": 7.716100349317243e-05, "loss": 0.0832, "step": 3601 }, { "epoch": 6.851165002377556, "grad_norm": 0.24554187059402466, "learning_rate": 7.71546522705621e-05, "loss": 0.061, "step": 3602 }, { "epoch": 6.853067047075607, "grad_norm": 0.307670533657074, "learning_rate": 7.714830104795173e-05, "loss": 0.0853, "step": 3603 }, { "epoch": 6.854969091773657, "grad_norm": 0.29913416504859924, "learning_rate": 7.714194982534138e-05, "loss": 0.064, "step": 3604 }, { "epoch": 6.856871136471707, "grad_norm": 0.2952573001384735, "learning_rate": 7.713559860273104e-05, "loss": 0.0974, "step": 3605 }, { "epoch": 6.858773181169758, "grad_norm": 0.3110623359680176, "learning_rate": 7.712924738012067e-05, "loss": 0.1036, "step": 3606 }, { "epoch": 6.860675225867808, "grad_norm": 0.25095778703689575, "learning_rate": 7.712289615751033e-05, "loss": 0.0867, "step": 3607 }, { "epoch": 6.8625772705658585, "grad_norm": 0.23761174082756042, "learning_rate": 7.711654493489996e-05, "loss": 0.0719, "step": 3608 }, { "epoch": 6.8644793152639085, "grad_norm": 0.39075493812561035, "learning_rate": 7.711019371228963e-05, "loss": 0.0942, "step": 3609 }, { "epoch": 6.866381359961959, "grad_norm": 0.20142102241516113, "learning_rate": 7.710384248967927e-05, "loss": 0.0753, "step": 3610 }, { "epoch": 6.868283404660009, "grad_norm": 0.3020033836364746, "learning_rate": 7.70974912670689e-05, "loss": 0.1073, "step": 3611 }, { "epoch": 6.870185449358059, "grad_norm": 0.5035987496376038, "learning_rate": 7.709114004445857e-05, "loss": 0.0936, "step": 3612 }, { "epoch": 6.87208749405611, "grad_norm": 0.25016555190086365, "learning_rate": 7.708478882184821e-05, "loss": 0.0853, "step": 3613 }, { "epoch": 6.873989538754161, "grad_norm": 0.3226104974746704, "learning_rate": 7.707843759923785e-05, "loss": 0.0984, "step": 3614 }, { "epoch": 6.875891583452211, "grad_norm": 0.3413734436035156, "learning_rate": 7.70720863766275e-05, "loss": 0.0967, "step": 3615 }, { "epoch": 6.877793628150261, "grad_norm": 0.2394518107175827, "learning_rate": 7.706573515401715e-05, "loss": 0.0699, "step": 3616 }, { "epoch": 6.879695672848312, "grad_norm": 0.3109191358089447, "learning_rate": 7.70593839314068e-05, "loss": 0.0941, "step": 3617 }, { "epoch": 6.881597717546362, "grad_norm": 0.24442525207996368, "learning_rate": 7.705303270879644e-05, "loss": 0.0993, "step": 3618 }, { "epoch": 6.883499762244413, "grad_norm": 0.350276380777359, "learning_rate": 7.70466814861861e-05, "loss": 0.1212, "step": 3619 }, { "epoch": 6.885401806942463, "grad_norm": 0.3216733932495117, "learning_rate": 7.704033026357575e-05, "loss": 0.1079, "step": 3620 }, { "epoch": 6.887303851640514, "grad_norm": 0.5144888162612915, "learning_rate": 7.703397904096538e-05, "loss": 0.1056, "step": 3621 }, { "epoch": 6.889205896338564, "grad_norm": 0.2998292148113251, "learning_rate": 7.702762781835504e-05, "loss": 0.0891, "step": 3622 }, { "epoch": 6.891107941036615, "grad_norm": 0.39780697226524353, "learning_rate": 7.702127659574469e-05, "loss": 0.1039, "step": 3623 }, { "epoch": 6.893009985734665, "grad_norm": 0.30607423186302185, "learning_rate": 7.701492537313433e-05, "loss": 0.0864, "step": 3624 }, { "epoch": 6.894912030432716, "grad_norm": 0.2678467631340027, "learning_rate": 7.700857415052398e-05, "loss": 0.0956, "step": 3625 }, { "epoch": 6.896814075130766, "grad_norm": 0.2922561764717102, "learning_rate": 7.700222292791363e-05, "loss": 0.0904, "step": 3626 }, { "epoch": 6.898716119828816, "grad_norm": 0.34059253334999084, "learning_rate": 7.699587170530328e-05, "loss": 0.1142, "step": 3627 }, { "epoch": 6.9006181645268665, "grad_norm": 0.34268996119499207, "learning_rate": 7.698952048269292e-05, "loss": 0.0999, "step": 3628 }, { "epoch": 6.9025202092249165, "grad_norm": 0.2850574851036072, "learning_rate": 7.698316926008257e-05, "loss": 0.0827, "step": 3629 }, { "epoch": 6.904422253922967, "grad_norm": 0.44454216957092285, "learning_rate": 7.697681803747222e-05, "loss": 0.1169, "step": 3630 }, { "epoch": 6.906324298621017, "grad_norm": 0.3208524286746979, "learning_rate": 7.697046681486186e-05, "loss": 0.0989, "step": 3631 }, { "epoch": 6.908226343319068, "grad_norm": 0.36460134387016296, "learning_rate": 7.696411559225151e-05, "loss": 0.0775, "step": 3632 }, { "epoch": 6.910128388017118, "grad_norm": 0.34505489468574524, "learning_rate": 7.695776436964117e-05, "loss": 0.0959, "step": 3633 }, { "epoch": 6.912030432715169, "grad_norm": 0.28789353370666504, "learning_rate": 7.69514131470308e-05, "loss": 0.0862, "step": 3634 }, { "epoch": 6.913932477413219, "grad_norm": 0.3089391589164734, "learning_rate": 7.694506192442046e-05, "loss": 0.0838, "step": 3635 }, { "epoch": 6.91583452211127, "grad_norm": 0.38007378578186035, "learning_rate": 7.693871070181011e-05, "loss": 0.0811, "step": 3636 }, { "epoch": 6.91773656680932, "grad_norm": 0.24695605039596558, "learning_rate": 7.693235947919975e-05, "loss": 0.0886, "step": 3637 }, { "epoch": 6.91963861150737, "grad_norm": 0.31701305508613586, "learning_rate": 7.69260082565894e-05, "loss": 0.0814, "step": 3638 }, { "epoch": 6.921540656205421, "grad_norm": 0.2539271116256714, "learning_rate": 7.691965703397905e-05, "loss": 0.0948, "step": 3639 }, { "epoch": 6.923442700903471, "grad_norm": 0.3465598523616791, "learning_rate": 7.69133058113687e-05, "loss": 0.0983, "step": 3640 }, { "epoch": 6.925344745601522, "grad_norm": 0.30124831199645996, "learning_rate": 7.690695458875834e-05, "loss": 0.0687, "step": 3641 }, { "epoch": 6.927246790299572, "grad_norm": 0.3128988444805145, "learning_rate": 7.690060336614798e-05, "loss": 0.091, "step": 3642 }, { "epoch": 6.929148834997623, "grad_norm": 0.30787718296051025, "learning_rate": 7.689425214353764e-05, "loss": 0.08, "step": 3643 }, { "epoch": 6.931050879695673, "grad_norm": 0.26531967520713806, "learning_rate": 7.688790092092728e-05, "loss": 0.0903, "step": 3644 }, { "epoch": 6.932952924393724, "grad_norm": 0.28860077261924744, "learning_rate": 7.688154969831693e-05, "loss": 0.1021, "step": 3645 }, { "epoch": 6.934854969091774, "grad_norm": 0.3609332740306854, "learning_rate": 7.687519847570657e-05, "loss": 0.1108, "step": 3646 }, { "epoch": 6.9367570137898245, "grad_norm": 0.2961665987968445, "learning_rate": 7.686884725309622e-05, "loss": 0.0732, "step": 3647 }, { "epoch": 6.9386590584878745, "grad_norm": 0.38632073998451233, "learning_rate": 7.686249603048587e-05, "loss": 0.0999, "step": 3648 }, { "epoch": 6.9405611031859245, "grad_norm": 0.26912546157836914, "learning_rate": 7.685614480787551e-05, "loss": 0.0803, "step": 3649 }, { "epoch": 6.942463147883975, "grad_norm": 0.505782961845398, "learning_rate": 7.684979358526517e-05, "loss": 0.1164, "step": 3650 }, { "epoch": 6.944365192582025, "grad_norm": 0.202500119805336, "learning_rate": 7.684344236265482e-05, "loss": 0.0714, "step": 3651 }, { "epoch": 6.946267237280076, "grad_norm": 0.3188091814517975, "learning_rate": 7.683709114004446e-05, "loss": 0.0758, "step": 3652 }, { "epoch": 6.948169281978126, "grad_norm": 0.34093478322029114, "learning_rate": 7.683073991743411e-05, "loss": 0.0787, "step": 3653 }, { "epoch": 6.950071326676177, "grad_norm": 0.4178665280342102, "learning_rate": 7.682438869482376e-05, "loss": 0.1045, "step": 3654 }, { "epoch": 6.951973371374227, "grad_norm": 0.29486292600631714, "learning_rate": 7.68180374722134e-05, "loss": 0.081, "step": 3655 }, { "epoch": 6.953875416072278, "grad_norm": 0.23391938209533691, "learning_rate": 7.681168624960305e-05, "loss": 0.0653, "step": 3656 }, { "epoch": 6.955777460770328, "grad_norm": 0.26563167572021484, "learning_rate": 7.68053350269927e-05, "loss": 0.0815, "step": 3657 }, { "epoch": 6.957679505468379, "grad_norm": 0.25844621658325195, "learning_rate": 7.679898380438235e-05, "loss": 0.0657, "step": 3658 }, { "epoch": 6.959581550166429, "grad_norm": 0.2926645874977112, "learning_rate": 7.679263258177199e-05, "loss": 0.0807, "step": 3659 }, { "epoch": 6.961483594864479, "grad_norm": 0.2784111499786377, "learning_rate": 7.678628135916164e-05, "loss": 0.0973, "step": 3660 }, { "epoch": 6.96338563956253, "grad_norm": 0.3077068030834198, "learning_rate": 7.67799301365513e-05, "loss": 0.0902, "step": 3661 }, { "epoch": 6.96528768426058, "grad_norm": 0.3007901906967163, "learning_rate": 7.677357891394093e-05, "loss": 0.093, "step": 3662 }, { "epoch": 6.967189728958631, "grad_norm": 0.3273557126522064, "learning_rate": 7.676722769133058e-05, "loss": 0.0958, "step": 3663 }, { "epoch": 6.969091773656681, "grad_norm": 0.2853504419326782, "learning_rate": 7.676087646872024e-05, "loss": 0.0949, "step": 3664 }, { "epoch": 6.9709938183547315, "grad_norm": 0.30276769399642944, "learning_rate": 7.675452524610987e-05, "loss": 0.0615, "step": 3665 }, { "epoch": 6.9728958630527815, "grad_norm": 0.23220020532608032, "learning_rate": 7.674817402349953e-05, "loss": 0.0605, "step": 3666 }, { "epoch": 6.974797907750832, "grad_norm": 0.26779958605766296, "learning_rate": 7.674182280088918e-05, "loss": 0.0844, "step": 3667 }, { "epoch": 6.976699952448882, "grad_norm": 0.32445698976516724, "learning_rate": 7.673547157827882e-05, "loss": 0.0944, "step": 3668 }, { "epoch": 6.978601997146933, "grad_norm": 0.36110180616378784, "learning_rate": 7.672912035566847e-05, "loss": 0.083, "step": 3669 }, { "epoch": 6.980504041844983, "grad_norm": 0.3307937681674957, "learning_rate": 7.672276913305812e-05, "loss": 0.0843, "step": 3670 }, { "epoch": 6.982406086543033, "grad_norm": 0.32733941078186035, "learning_rate": 7.671641791044777e-05, "loss": 0.0837, "step": 3671 }, { "epoch": 6.984308131241084, "grad_norm": 0.8110985159873962, "learning_rate": 7.671006668783741e-05, "loss": 0.1877, "step": 3672 }, { "epoch": 6.986210175939134, "grad_norm": 0.41781920194625854, "learning_rate": 7.670371546522705e-05, "loss": 0.1121, "step": 3673 }, { "epoch": 6.988112220637185, "grad_norm": 0.2781015932559967, "learning_rate": 7.669736424261671e-05, "loss": 0.0735, "step": 3674 }, { "epoch": 6.990014265335235, "grad_norm": 0.38082799315452576, "learning_rate": 7.669101302000635e-05, "loss": 0.0951, "step": 3675 }, { "epoch": 6.991916310033286, "grad_norm": 0.3210826814174652, "learning_rate": 7.6684661797396e-05, "loss": 0.0947, "step": 3676 }, { "epoch": 6.993818354731336, "grad_norm": 0.2684693932533264, "learning_rate": 7.667831057478566e-05, "loss": 0.1023, "step": 3677 }, { "epoch": 6.995720399429387, "grad_norm": 0.4197976589202881, "learning_rate": 7.66719593521753e-05, "loss": 0.0941, "step": 3678 }, { "epoch": 6.997622444127437, "grad_norm": 0.3351534605026245, "learning_rate": 7.666560812956495e-05, "loss": 0.0748, "step": 3679 }, { "epoch": 6.999524488825488, "grad_norm": 0.3510059118270874, "learning_rate": 7.665925690695458e-05, "loss": 0.0993, "step": 3680 }, { "epoch": 7.001426533523538, "grad_norm": 0.2770400643348694, "learning_rate": 7.665290568434425e-05, "loss": 0.0962, "step": 3681 }, { "epoch": 7.003328578221589, "grad_norm": 0.21430467069149017, "learning_rate": 7.664655446173389e-05, "loss": 0.0689, "step": 3682 }, { "epoch": 7.005230622919639, "grad_norm": 0.3444157540798187, "learning_rate": 7.664020323912353e-05, "loss": 0.061, "step": 3683 }, { "epoch": 7.007132667617689, "grad_norm": 0.3022604286670685, "learning_rate": 7.663385201651319e-05, "loss": 0.0792, "step": 3684 }, { "epoch": 7.0090347123157395, "grad_norm": 0.233037069439888, "learning_rate": 7.662750079390283e-05, "loss": 0.0821, "step": 3685 }, { "epoch": 7.0109367570137895, "grad_norm": 0.254637211561203, "learning_rate": 7.662114957129247e-05, "loss": 0.0957, "step": 3686 }, { "epoch": 7.01283880171184, "grad_norm": 0.30084213614463806, "learning_rate": 7.661479834868212e-05, "loss": 0.0813, "step": 3687 }, { "epoch": 7.01474084640989, "grad_norm": 0.30689504742622375, "learning_rate": 7.660844712607177e-05, "loss": 0.0863, "step": 3688 }, { "epoch": 7.016642891107941, "grad_norm": 0.24477942287921906, "learning_rate": 7.660209590346142e-05, "loss": 0.071, "step": 3689 }, { "epoch": 7.018544935805991, "grad_norm": 0.25171712040901184, "learning_rate": 7.659574468085106e-05, "loss": 0.0641, "step": 3690 }, { "epoch": 7.020446980504042, "grad_norm": 0.21781741082668304, "learning_rate": 7.658939345824071e-05, "loss": 0.0676, "step": 3691 }, { "epoch": 7.022349025202092, "grad_norm": 0.23356683552265167, "learning_rate": 7.658304223563037e-05, "loss": 0.0654, "step": 3692 }, { "epoch": 7.024251069900143, "grad_norm": 0.2565113604068756, "learning_rate": 7.657669101302e-05, "loss": 0.0753, "step": 3693 }, { "epoch": 7.026153114598193, "grad_norm": 0.3282324969768524, "learning_rate": 7.657033979040966e-05, "loss": 0.0725, "step": 3694 }, { "epoch": 7.028055159296243, "grad_norm": 0.2607407867908478, "learning_rate": 7.656398856779931e-05, "loss": 0.0962, "step": 3695 }, { "epoch": 7.029957203994294, "grad_norm": 0.18478083610534668, "learning_rate": 7.655763734518895e-05, "loss": 0.0694, "step": 3696 }, { "epoch": 7.031859248692344, "grad_norm": 0.21331697702407837, "learning_rate": 7.65512861225786e-05, "loss": 0.059, "step": 3697 }, { "epoch": 7.033761293390395, "grad_norm": 0.20701389014720917, "learning_rate": 7.654493489996825e-05, "loss": 0.0745, "step": 3698 }, { "epoch": 7.035663338088445, "grad_norm": 0.35444629192352295, "learning_rate": 7.65385836773579e-05, "loss": 0.1039, "step": 3699 }, { "epoch": 7.037565382786496, "grad_norm": 0.24495595693588257, "learning_rate": 7.653223245474754e-05, "loss": 0.0695, "step": 3700 }, { "epoch": 7.039467427484546, "grad_norm": 0.24500828981399536, "learning_rate": 7.652588123213719e-05, "loss": 0.0663, "step": 3701 }, { "epoch": 7.041369472182597, "grad_norm": 0.2540830075740814, "learning_rate": 7.651953000952684e-05, "loss": 0.0546, "step": 3702 }, { "epoch": 7.043271516880647, "grad_norm": 0.3355015814304352, "learning_rate": 7.651317878691648e-05, "loss": 0.0744, "step": 3703 }, { "epoch": 7.0451735615786975, "grad_norm": 0.2634127736091614, "learning_rate": 7.650682756430612e-05, "loss": 0.0752, "step": 3704 }, { "epoch": 7.0470756062767475, "grad_norm": 0.21350815892219543, "learning_rate": 7.650047634169579e-05, "loss": 0.0743, "step": 3705 }, { "epoch": 7.048977650974798, "grad_norm": 0.3560217320919037, "learning_rate": 7.649412511908542e-05, "loss": 0.0894, "step": 3706 }, { "epoch": 7.050879695672848, "grad_norm": 0.2929129898548126, "learning_rate": 7.648777389647508e-05, "loss": 0.0632, "step": 3707 }, { "epoch": 7.052781740370898, "grad_norm": 0.34552210569381714, "learning_rate": 7.648142267386473e-05, "loss": 0.0909, "step": 3708 }, { "epoch": 7.054683785068949, "grad_norm": 0.21399307250976562, "learning_rate": 7.647507145125437e-05, "loss": 0.0687, "step": 3709 }, { "epoch": 7.056585829766999, "grad_norm": 0.22795552015304565, "learning_rate": 7.646872022864402e-05, "loss": 0.0694, "step": 3710 }, { "epoch": 7.05848787446505, "grad_norm": 0.4743165075778961, "learning_rate": 7.646236900603366e-05, "loss": 0.1167, "step": 3711 }, { "epoch": 7.0603899191631, "grad_norm": 0.25579768419265747, "learning_rate": 7.645601778342332e-05, "loss": 0.052, "step": 3712 }, { "epoch": 7.062291963861151, "grad_norm": 0.41287311911582947, "learning_rate": 7.644966656081296e-05, "loss": 0.0891, "step": 3713 }, { "epoch": 7.064194008559201, "grad_norm": 0.4381440281867981, "learning_rate": 7.64433153382026e-05, "loss": 0.1165, "step": 3714 }, { "epoch": 7.066096053257252, "grad_norm": 0.4422372281551361, "learning_rate": 7.643696411559226e-05, "loss": 0.0979, "step": 3715 }, { "epoch": 7.067998097955302, "grad_norm": 0.3064451813697815, "learning_rate": 7.64306128929819e-05, "loss": 0.0781, "step": 3716 }, { "epoch": 7.069900142653353, "grad_norm": 0.30070748925209045, "learning_rate": 7.642426167037155e-05, "loss": 0.0737, "step": 3717 }, { "epoch": 7.071802187351403, "grad_norm": 0.34751248359680176, "learning_rate": 7.641791044776119e-05, "loss": 0.0682, "step": 3718 }, { "epoch": 7.073704232049453, "grad_norm": 0.33590537309646606, "learning_rate": 7.641155922515084e-05, "loss": 0.0944, "step": 3719 }, { "epoch": 7.075606276747504, "grad_norm": 0.4045461416244507, "learning_rate": 7.64052080025405e-05, "loss": 0.0846, "step": 3720 }, { "epoch": 7.077508321445554, "grad_norm": 0.189460888504982, "learning_rate": 7.639885677993013e-05, "loss": 0.0715, "step": 3721 }, { "epoch": 7.0794103661436045, "grad_norm": 0.2401493638753891, "learning_rate": 7.639250555731979e-05, "loss": 0.0558, "step": 3722 }, { "epoch": 7.0813124108416545, "grad_norm": 0.265419602394104, "learning_rate": 7.638615433470944e-05, "loss": 0.0761, "step": 3723 }, { "epoch": 7.083214455539705, "grad_norm": 0.22794272005558014, "learning_rate": 7.637980311209908e-05, "loss": 0.0594, "step": 3724 }, { "epoch": 7.085116500237755, "grad_norm": 0.2641700804233551, "learning_rate": 7.637345188948873e-05, "loss": 0.0789, "step": 3725 }, { "epoch": 7.087018544935806, "grad_norm": 0.29633039236068726, "learning_rate": 7.636710066687838e-05, "loss": 0.0788, "step": 3726 }, { "epoch": 7.088920589633856, "grad_norm": 0.3040051758289337, "learning_rate": 7.636074944426802e-05, "loss": 0.0853, "step": 3727 }, { "epoch": 7.090822634331907, "grad_norm": 0.24118536710739136, "learning_rate": 7.635439822165767e-05, "loss": 0.0745, "step": 3728 }, { "epoch": 7.092724679029957, "grad_norm": 0.26473772525787354, "learning_rate": 7.634804699904732e-05, "loss": 0.0654, "step": 3729 }, { "epoch": 7.094626723728007, "grad_norm": 0.3334522247314453, "learning_rate": 7.634169577643697e-05, "loss": 0.0896, "step": 3730 }, { "epoch": 7.096528768426058, "grad_norm": 0.3315863311290741, "learning_rate": 7.633534455382661e-05, "loss": 0.0878, "step": 3731 }, { "epoch": 7.098430813124108, "grad_norm": 0.2381017655134201, "learning_rate": 7.632899333121626e-05, "loss": 0.07, "step": 3732 }, { "epoch": 7.100332857822159, "grad_norm": 0.25926443934440613, "learning_rate": 7.632264210860592e-05, "loss": 0.0786, "step": 3733 }, { "epoch": 7.102234902520209, "grad_norm": 0.38049688935279846, "learning_rate": 7.631629088599555e-05, "loss": 0.0953, "step": 3734 }, { "epoch": 7.10413694721826, "grad_norm": 0.36096832156181335, "learning_rate": 7.63099396633852e-05, "loss": 0.0894, "step": 3735 }, { "epoch": 7.10603899191631, "grad_norm": 0.2273632436990738, "learning_rate": 7.630358844077486e-05, "loss": 0.071, "step": 3736 }, { "epoch": 7.107941036614361, "grad_norm": 0.24754773080348969, "learning_rate": 7.62972372181645e-05, "loss": 0.0881, "step": 3737 }, { "epoch": 7.109843081312411, "grad_norm": 0.3349929749965668, "learning_rate": 7.629088599555415e-05, "loss": 0.0845, "step": 3738 }, { "epoch": 7.111745126010462, "grad_norm": 0.21691037714481354, "learning_rate": 7.62845347729438e-05, "loss": 0.0733, "step": 3739 }, { "epoch": 7.113647170708512, "grad_norm": 0.2048906683921814, "learning_rate": 7.627818355033344e-05, "loss": 0.0492, "step": 3740 }, { "epoch": 7.1155492154065625, "grad_norm": 0.2179352343082428, "learning_rate": 7.627183232772309e-05, "loss": 0.0759, "step": 3741 }, { "epoch": 7.1174512601046125, "grad_norm": 0.24795210361480713, "learning_rate": 7.626548110511274e-05, "loss": 0.0731, "step": 3742 }, { "epoch": 7.1193533048026625, "grad_norm": 0.28693392872810364, "learning_rate": 7.62591298825024e-05, "loss": 0.0865, "step": 3743 }, { "epoch": 7.121255349500713, "grad_norm": 0.2696365416049957, "learning_rate": 7.625277865989203e-05, "loss": 0.0723, "step": 3744 }, { "epoch": 7.123157394198763, "grad_norm": 0.3165305256843567, "learning_rate": 7.624642743728167e-05, "loss": 0.0955, "step": 3745 }, { "epoch": 7.125059438896814, "grad_norm": 0.2257310301065445, "learning_rate": 7.624007621467134e-05, "loss": 0.0699, "step": 3746 }, { "epoch": 7.126961483594864, "grad_norm": 0.14401155710220337, "learning_rate": 7.623372499206097e-05, "loss": 0.0582, "step": 3747 }, { "epoch": 7.128863528292915, "grad_norm": 0.40427732467651367, "learning_rate": 7.622737376945063e-05, "loss": 0.1004, "step": 3748 }, { "epoch": 7.130765572990965, "grad_norm": 0.3528768718242645, "learning_rate": 7.622102254684028e-05, "loss": 0.0902, "step": 3749 }, { "epoch": 7.132667617689016, "grad_norm": 0.2220061868429184, "learning_rate": 7.621467132422992e-05, "loss": 0.0517, "step": 3750 }, { "epoch": 7.134569662387066, "grad_norm": 0.3189872205257416, "learning_rate": 7.620832010161957e-05, "loss": 0.0837, "step": 3751 }, { "epoch": 7.136471707085117, "grad_norm": 0.21646323800086975, "learning_rate": 7.62019688790092e-05, "loss": 0.0546, "step": 3752 }, { "epoch": 7.138373751783167, "grad_norm": 0.3528130054473877, "learning_rate": 7.619561765639887e-05, "loss": 0.0781, "step": 3753 }, { "epoch": 7.140275796481217, "grad_norm": 0.2020796537399292, "learning_rate": 7.618926643378851e-05, "loss": 0.0652, "step": 3754 }, { "epoch": 7.142177841179268, "grad_norm": 0.24950513243675232, "learning_rate": 7.618291521117815e-05, "loss": 0.0741, "step": 3755 }, { "epoch": 7.144079885877318, "grad_norm": 0.24914821982383728, "learning_rate": 7.61765639885678e-05, "loss": 0.0734, "step": 3756 }, { "epoch": 7.145981930575369, "grad_norm": 0.2292976677417755, "learning_rate": 7.617021276595745e-05, "loss": 0.0754, "step": 3757 }, { "epoch": 7.147883975273419, "grad_norm": 0.3274495005607605, "learning_rate": 7.616386154334709e-05, "loss": 0.1309, "step": 3758 }, { "epoch": 7.14978601997147, "grad_norm": 0.28548160195350647, "learning_rate": 7.615751032073674e-05, "loss": 0.0801, "step": 3759 }, { "epoch": 7.15168806466952, "grad_norm": 0.20921234786510468, "learning_rate": 7.61511590981264e-05, "loss": 0.0686, "step": 3760 }, { "epoch": 7.1535901093675704, "grad_norm": 0.2667579650878906, "learning_rate": 7.614480787551605e-05, "loss": 0.0831, "step": 3761 }, { "epoch": 7.1554921540656204, "grad_norm": 0.4038238227367401, "learning_rate": 7.613845665290568e-05, "loss": 0.0965, "step": 3762 }, { "epoch": 7.157394198763671, "grad_norm": 0.2721848785877228, "learning_rate": 7.613210543029534e-05, "loss": 0.0752, "step": 3763 }, { "epoch": 7.159296243461721, "grad_norm": 0.2838439345359802, "learning_rate": 7.612575420768499e-05, "loss": 0.0758, "step": 3764 }, { "epoch": 7.161198288159771, "grad_norm": 0.3026263117790222, "learning_rate": 7.611940298507463e-05, "loss": 0.0913, "step": 3765 }, { "epoch": 7.163100332857822, "grad_norm": 0.7450515031814575, "learning_rate": 7.611305176246428e-05, "loss": 0.0912, "step": 3766 }, { "epoch": 7.165002377555872, "grad_norm": 0.2942456007003784, "learning_rate": 7.610670053985393e-05, "loss": 0.0644, "step": 3767 }, { "epoch": 7.166904422253923, "grad_norm": 0.24996860325336456, "learning_rate": 7.610034931724357e-05, "loss": 0.0906, "step": 3768 }, { "epoch": 7.168806466951973, "grad_norm": 0.23108260333538055, "learning_rate": 7.609399809463322e-05, "loss": 0.069, "step": 3769 }, { "epoch": 7.170708511650024, "grad_norm": 0.28415048122406006, "learning_rate": 7.608764687202287e-05, "loss": 0.0727, "step": 3770 }, { "epoch": 7.172610556348074, "grad_norm": 0.2941918969154358, "learning_rate": 7.608129564941252e-05, "loss": 0.0749, "step": 3771 }, { "epoch": 7.174512601046125, "grad_norm": 0.2570197284221649, "learning_rate": 7.607494442680216e-05, "loss": 0.0798, "step": 3772 }, { "epoch": 7.176414645744175, "grad_norm": 0.20420850813388824, "learning_rate": 7.606859320419181e-05, "loss": 0.061, "step": 3773 }, { "epoch": 7.178316690442226, "grad_norm": 0.31141209602355957, "learning_rate": 7.606224198158146e-05, "loss": 0.0708, "step": 3774 }, { "epoch": 7.180218735140276, "grad_norm": 0.2804230749607086, "learning_rate": 7.60558907589711e-05, "loss": 0.1223, "step": 3775 }, { "epoch": 7.182120779838327, "grad_norm": 0.28173887729644775, "learning_rate": 7.604953953636074e-05, "loss": 0.0847, "step": 3776 }, { "epoch": 7.184022824536377, "grad_norm": 0.2961934208869934, "learning_rate": 7.604318831375041e-05, "loss": 0.0675, "step": 3777 }, { "epoch": 7.185924869234427, "grad_norm": 0.3397238850593567, "learning_rate": 7.603683709114005e-05, "loss": 0.0896, "step": 3778 }, { "epoch": 7.1878269139324775, "grad_norm": 0.3696925938129425, "learning_rate": 7.60304858685297e-05, "loss": 0.1039, "step": 3779 }, { "epoch": 7.1897289586305275, "grad_norm": 0.2568362355232239, "learning_rate": 7.602413464591935e-05, "loss": 0.0764, "step": 3780 }, { "epoch": 7.191631003328578, "grad_norm": 0.23331722617149353, "learning_rate": 7.601778342330899e-05, "loss": 0.0799, "step": 3781 }, { "epoch": 7.193533048026628, "grad_norm": 0.2694757878780365, "learning_rate": 7.601143220069864e-05, "loss": 0.0986, "step": 3782 }, { "epoch": 7.195435092724679, "grad_norm": 0.3996799886226654, "learning_rate": 7.600508097808828e-05, "loss": 0.0824, "step": 3783 }, { "epoch": 7.197337137422729, "grad_norm": 0.341553270816803, "learning_rate": 7.599872975547794e-05, "loss": 0.0935, "step": 3784 }, { "epoch": 7.19923918212078, "grad_norm": 0.41450145840644836, "learning_rate": 7.599237853286758e-05, "loss": 0.0944, "step": 3785 }, { "epoch": 7.20114122681883, "grad_norm": 0.30726349353790283, "learning_rate": 7.598602731025722e-05, "loss": 0.0811, "step": 3786 }, { "epoch": 7.203043271516881, "grad_norm": 0.29750320315361023, "learning_rate": 7.597967608764688e-05, "loss": 0.0706, "step": 3787 }, { "epoch": 7.204945316214931, "grad_norm": 0.37351077795028687, "learning_rate": 7.597332486503652e-05, "loss": 0.1056, "step": 3788 }, { "epoch": 7.206847360912981, "grad_norm": 0.2162342220544815, "learning_rate": 7.596697364242617e-05, "loss": 0.0732, "step": 3789 }, { "epoch": 7.208749405611032, "grad_norm": 0.2836562693119049, "learning_rate": 7.596062241981581e-05, "loss": 0.0779, "step": 3790 }, { "epoch": 7.210651450309082, "grad_norm": 0.23386134207248688, "learning_rate": 7.595427119720546e-05, "loss": 0.0741, "step": 3791 }, { "epoch": 7.212553495007133, "grad_norm": 0.2197110950946808, "learning_rate": 7.594791997459512e-05, "loss": 0.0628, "step": 3792 }, { "epoch": 7.214455539705183, "grad_norm": 0.2751940190792084, "learning_rate": 7.594156875198476e-05, "loss": 0.0873, "step": 3793 }, { "epoch": 7.216357584403234, "grad_norm": 0.2839178740978241, "learning_rate": 7.593521752937441e-05, "loss": 0.0861, "step": 3794 }, { "epoch": 7.218259629101284, "grad_norm": 0.22591270506381989, "learning_rate": 7.592886630676406e-05, "loss": 0.083, "step": 3795 }, { "epoch": 7.220161673799335, "grad_norm": 0.20070408284664154, "learning_rate": 7.59225150841537e-05, "loss": 0.0663, "step": 3796 }, { "epoch": 7.222063718497385, "grad_norm": 0.2874523401260376, "learning_rate": 7.591616386154335e-05, "loss": 0.0729, "step": 3797 }, { "epoch": 7.2239657631954355, "grad_norm": 0.2688714563846588, "learning_rate": 7.5909812638933e-05, "loss": 0.0757, "step": 3798 }, { "epoch": 7.2258678078934855, "grad_norm": 0.19655251502990723, "learning_rate": 7.590346141632264e-05, "loss": 0.0566, "step": 3799 }, { "epoch": 7.2277698525915355, "grad_norm": 0.27301642298698425, "learning_rate": 7.589711019371229e-05, "loss": 0.0846, "step": 3800 }, { "epoch": 7.229671897289586, "grad_norm": 0.24339912831783295, "learning_rate": 7.589075897110194e-05, "loss": 0.0689, "step": 3801 }, { "epoch": 7.231573941987636, "grad_norm": 0.22319412231445312, "learning_rate": 7.58844077484916e-05, "loss": 0.0671, "step": 3802 }, { "epoch": 7.233475986685687, "grad_norm": 0.2316451221704483, "learning_rate": 7.587805652588123e-05, "loss": 0.0924, "step": 3803 }, { "epoch": 7.235378031383737, "grad_norm": 0.24457992613315582, "learning_rate": 7.587170530327088e-05, "loss": 0.0694, "step": 3804 }, { "epoch": 7.237280076081788, "grad_norm": 0.2121913731098175, "learning_rate": 7.586535408066054e-05, "loss": 0.1148, "step": 3805 }, { "epoch": 7.239182120779838, "grad_norm": 0.19238021969795227, "learning_rate": 7.585900285805017e-05, "loss": 0.0518, "step": 3806 }, { "epoch": 7.241084165477889, "grad_norm": 0.2916553318500519, "learning_rate": 7.585265163543983e-05, "loss": 0.0851, "step": 3807 }, { "epoch": 7.242986210175939, "grad_norm": 0.263918399810791, "learning_rate": 7.584630041282948e-05, "loss": 0.0828, "step": 3808 }, { "epoch": 7.24488825487399, "grad_norm": 0.2717205286026001, "learning_rate": 7.583994919021912e-05, "loss": 0.0808, "step": 3809 }, { "epoch": 7.24679029957204, "grad_norm": 0.37340980768203735, "learning_rate": 7.583359796760877e-05, "loss": 0.1054, "step": 3810 }, { "epoch": 7.24869234427009, "grad_norm": 0.3086722195148468, "learning_rate": 7.582724674499842e-05, "loss": 0.0744, "step": 3811 }, { "epoch": 7.250594388968141, "grad_norm": 0.3170717656612396, "learning_rate": 7.582089552238806e-05, "loss": 0.0709, "step": 3812 }, { "epoch": 7.252496433666191, "grad_norm": 0.3427761495113373, "learning_rate": 7.581454429977771e-05, "loss": 0.0853, "step": 3813 }, { "epoch": 7.254398478364242, "grad_norm": 0.36829718947410583, "learning_rate": 7.580819307716735e-05, "loss": 0.0911, "step": 3814 }, { "epoch": 7.256300523062292, "grad_norm": 0.2933104634284973, "learning_rate": 7.580184185455701e-05, "loss": 0.0788, "step": 3815 }, { "epoch": 7.258202567760343, "grad_norm": 0.30918237566947937, "learning_rate": 7.579549063194665e-05, "loss": 0.0897, "step": 3816 }, { "epoch": 7.260104612458393, "grad_norm": 0.27912604808807373, "learning_rate": 7.578913940933629e-05, "loss": 0.0781, "step": 3817 }, { "epoch": 7.2620066571564434, "grad_norm": 0.3042118549346924, "learning_rate": 7.578278818672596e-05, "loss": 0.0768, "step": 3818 }, { "epoch": 7.263908701854493, "grad_norm": 0.34230148792266846, "learning_rate": 7.57764369641156e-05, "loss": 0.0985, "step": 3819 }, { "epoch": 7.265810746552544, "grad_norm": 0.3558116853237152, "learning_rate": 7.577008574150525e-05, "loss": 0.0907, "step": 3820 }, { "epoch": 7.267712791250594, "grad_norm": 0.25006282329559326, "learning_rate": 7.576373451889488e-05, "loss": 0.0704, "step": 3821 }, { "epoch": 7.269614835948644, "grad_norm": 0.2956601679325104, "learning_rate": 7.575738329628454e-05, "loss": 0.1006, "step": 3822 }, { "epoch": 7.271516880646695, "grad_norm": 0.23409853875637054, "learning_rate": 7.575103207367419e-05, "loss": 0.0847, "step": 3823 }, { "epoch": 7.273418925344745, "grad_norm": 0.3059806227684021, "learning_rate": 7.574468085106383e-05, "loss": 0.0726, "step": 3824 }, { "epoch": 7.275320970042796, "grad_norm": 0.22190988063812256, "learning_rate": 7.573832962845349e-05, "loss": 0.083, "step": 3825 }, { "epoch": 7.277223014740846, "grad_norm": 0.25523841381073, "learning_rate": 7.573197840584313e-05, "loss": 0.1011, "step": 3826 }, { "epoch": 7.279125059438897, "grad_norm": 0.2877829372882843, "learning_rate": 7.572562718323277e-05, "loss": 0.1145, "step": 3827 }, { "epoch": 7.281027104136947, "grad_norm": 0.28623905777931213, "learning_rate": 7.571927596062242e-05, "loss": 0.0988, "step": 3828 }, { "epoch": 7.282929148834998, "grad_norm": 0.26607346534729004, "learning_rate": 7.571292473801207e-05, "loss": 0.0683, "step": 3829 }, { "epoch": 7.284831193533048, "grad_norm": 0.26464006304740906, "learning_rate": 7.570657351540171e-05, "loss": 0.0813, "step": 3830 }, { "epoch": 7.286733238231099, "grad_norm": 0.37109822034835815, "learning_rate": 7.570022229279136e-05, "loss": 0.0709, "step": 3831 }, { "epoch": 7.288635282929149, "grad_norm": 0.2537643313407898, "learning_rate": 7.569387107018101e-05, "loss": 0.0907, "step": 3832 }, { "epoch": 7.2905373276272, "grad_norm": 0.2658579647541046, "learning_rate": 7.568751984757067e-05, "loss": 0.0712, "step": 3833 }, { "epoch": 7.29243937232525, "grad_norm": 0.29602423310279846, "learning_rate": 7.56811686249603e-05, "loss": 0.0741, "step": 3834 }, { "epoch": 7.2943414170233, "grad_norm": 0.25278082489967346, "learning_rate": 7.567481740234996e-05, "loss": 0.0975, "step": 3835 }, { "epoch": 7.2962434617213505, "grad_norm": 0.2224166989326477, "learning_rate": 7.566846617973961e-05, "loss": 0.0606, "step": 3836 }, { "epoch": 7.2981455064194005, "grad_norm": 0.2645518481731415, "learning_rate": 7.566211495712925e-05, "loss": 0.0691, "step": 3837 }, { "epoch": 7.300047551117451, "grad_norm": 0.2529650628566742, "learning_rate": 7.56557637345189e-05, "loss": 0.0823, "step": 3838 }, { "epoch": 7.301949595815501, "grad_norm": 0.3536492586135864, "learning_rate": 7.564941251190855e-05, "loss": 0.0881, "step": 3839 }, { "epoch": 7.303851640513552, "grad_norm": 0.270697683095932, "learning_rate": 7.564306128929819e-05, "loss": 0.0852, "step": 3840 }, { "epoch": 7.305753685211602, "grad_norm": 0.18432219326496124, "learning_rate": 7.563671006668784e-05, "loss": 0.0491, "step": 3841 }, { "epoch": 7.307655729909653, "grad_norm": 0.2726588547229767, "learning_rate": 7.563035884407749e-05, "loss": 0.0813, "step": 3842 }, { "epoch": 7.309557774607703, "grad_norm": 0.2138698250055313, "learning_rate": 7.562400762146714e-05, "loss": 0.0522, "step": 3843 }, { "epoch": 7.311459819305754, "grad_norm": 0.24019719660282135, "learning_rate": 7.561765639885678e-05, "loss": 0.0901, "step": 3844 }, { "epoch": 7.313361864003804, "grad_norm": 0.4069961905479431, "learning_rate": 7.561130517624643e-05, "loss": 0.0882, "step": 3845 }, { "epoch": 7.315263908701855, "grad_norm": 0.29907310009002686, "learning_rate": 7.560495395363609e-05, "loss": 0.0733, "step": 3846 }, { "epoch": 7.317165953399905, "grad_norm": 0.29490938782691956, "learning_rate": 7.559860273102572e-05, "loss": 0.0554, "step": 3847 }, { "epoch": 7.319067998097955, "grad_norm": 0.2691021263599396, "learning_rate": 7.559225150841536e-05, "loss": 0.0824, "step": 3848 }, { "epoch": 7.320970042796006, "grad_norm": 0.2889997959136963, "learning_rate": 7.558590028580503e-05, "loss": 0.0751, "step": 3849 }, { "epoch": 7.322872087494056, "grad_norm": 0.27042216062545776, "learning_rate": 7.557954906319467e-05, "loss": 0.0683, "step": 3850 }, { "epoch": 7.324774132192107, "grad_norm": 0.2516496181488037, "learning_rate": 7.557319784058432e-05, "loss": 0.0821, "step": 3851 }, { "epoch": 7.326676176890157, "grad_norm": 0.25301793217658997, "learning_rate": 7.556684661797397e-05, "loss": 0.0684, "step": 3852 }, { "epoch": 7.328578221588208, "grad_norm": 0.20801399648189545, "learning_rate": 7.556049539536361e-05, "loss": 0.0594, "step": 3853 }, { "epoch": 7.330480266286258, "grad_norm": 0.2855316400527954, "learning_rate": 7.555414417275326e-05, "loss": 0.1444, "step": 3854 }, { "epoch": 7.3323823109843085, "grad_norm": 0.23078607022762299, "learning_rate": 7.55477929501429e-05, "loss": 0.0847, "step": 3855 }, { "epoch": 7.3342843556823585, "grad_norm": 0.29791387915611267, "learning_rate": 7.554144172753256e-05, "loss": 0.0811, "step": 3856 }, { "epoch": 7.336186400380409, "grad_norm": 0.3947182297706604, "learning_rate": 7.55350905049222e-05, "loss": 0.0792, "step": 3857 }, { "epoch": 7.338088445078459, "grad_norm": 0.29231664538383484, "learning_rate": 7.552873928231184e-05, "loss": 0.0867, "step": 3858 }, { "epoch": 7.339990489776509, "grad_norm": 0.27032309770584106, "learning_rate": 7.55223880597015e-05, "loss": 0.0728, "step": 3859 }, { "epoch": 7.34189253447456, "grad_norm": 0.27698999643325806, "learning_rate": 7.551603683709114e-05, "loss": 0.0661, "step": 3860 }, { "epoch": 7.34379457917261, "grad_norm": 0.2770868241786957, "learning_rate": 7.55096856144808e-05, "loss": 0.0598, "step": 3861 }, { "epoch": 7.345696623870661, "grad_norm": 0.26074889302253723, "learning_rate": 7.550333439187043e-05, "loss": 0.0703, "step": 3862 }, { "epoch": 7.347598668568711, "grad_norm": 0.1882147341966629, "learning_rate": 7.549698316926009e-05, "loss": 0.0684, "step": 3863 }, { "epoch": 7.349500713266762, "grad_norm": 0.3019726276397705, "learning_rate": 7.549063194664974e-05, "loss": 0.0762, "step": 3864 }, { "epoch": 7.351402757964812, "grad_norm": 0.32880455255508423, "learning_rate": 7.548428072403938e-05, "loss": 0.0786, "step": 3865 }, { "epoch": 7.353304802662863, "grad_norm": 0.280349463224411, "learning_rate": 7.547792950142903e-05, "loss": 0.0682, "step": 3866 }, { "epoch": 7.355206847360913, "grad_norm": 0.3259483277797699, "learning_rate": 7.547157827881868e-05, "loss": 0.0867, "step": 3867 }, { "epoch": 7.357108892058964, "grad_norm": 0.2592945992946625, "learning_rate": 7.546522705620832e-05, "loss": 0.0587, "step": 3868 }, { "epoch": 7.359010936757014, "grad_norm": 0.2538476288318634, "learning_rate": 7.545887583359797e-05, "loss": 0.0635, "step": 3869 }, { "epoch": 7.360912981455064, "grad_norm": 0.38723552227020264, "learning_rate": 7.545252461098762e-05, "loss": 0.0843, "step": 3870 }, { "epoch": 7.362815026153115, "grad_norm": 0.20521502196788788, "learning_rate": 7.544617338837726e-05, "loss": 0.0608, "step": 3871 }, { "epoch": 7.364717070851165, "grad_norm": 0.26786184310913086, "learning_rate": 7.543982216576691e-05, "loss": 0.0798, "step": 3872 }, { "epoch": 7.366619115549216, "grad_norm": 0.30265524983406067, "learning_rate": 7.543347094315656e-05, "loss": 0.0761, "step": 3873 }, { "epoch": 7.3685211602472656, "grad_norm": 0.383024126291275, "learning_rate": 7.542711972054622e-05, "loss": 0.1, "step": 3874 }, { "epoch": 7.370423204945316, "grad_norm": 0.3774031400680542, "learning_rate": 7.542076849793585e-05, "loss": 0.1151, "step": 3875 }, { "epoch": 7.372325249643366, "grad_norm": 0.1787770837545395, "learning_rate": 7.54144172753255e-05, "loss": 0.0774, "step": 3876 }, { "epoch": 7.374227294341417, "grad_norm": 0.3548794686794281, "learning_rate": 7.540806605271516e-05, "loss": 0.0982, "step": 3877 }, { "epoch": 7.376129339039467, "grad_norm": 0.4059542119503021, "learning_rate": 7.54017148301048e-05, "loss": 0.0912, "step": 3878 }, { "epoch": 7.378031383737518, "grad_norm": 0.5270293951034546, "learning_rate": 7.539536360749445e-05, "loss": 0.1042, "step": 3879 }, { "epoch": 7.379933428435568, "grad_norm": 0.3405616581439972, "learning_rate": 7.53890123848841e-05, "loss": 0.0899, "step": 3880 }, { "epoch": 7.381835473133618, "grad_norm": 0.3558886647224426, "learning_rate": 7.538266116227374e-05, "loss": 0.091, "step": 3881 }, { "epoch": 7.383737517831669, "grad_norm": 0.28821122646331787, "learning_rate": 7.537630993966339e-05, "loss": 0.0819, "step": 3882 }, { "epoch": 7.385639562529719, "grad_norm": 0.3417015075683594, "learning_rate": 7.536995871705304e-05, "loss": 0.078, "step": 3883 }, { "epoch": 7.38754160722777, "grad_norm": 0.32220759987831116, "learning_rate": 7.536360749444268e-05, "loss": 0.0865, "step": 3884 }, { "epoch": 7.38944365192582, "grad_norm": 0.3623081147670746, "learning_rate": 7.535725627183233e-05, "loss": 0.0844, "step": 3885 }, { "epoch": 7.391345696623871, "grad_norm": 0.2569948732852936, "learning_rate": 7.535090504922197e-05, "loss": 0.0932, "step": 3886 }, { "epoch": 7.393247741321921, "grad_norm": 0.33517327904701233, "learning_rate": 7.534455382661164e-05, "loss": 0.102, "step": 3887 }, { "epoch": 7.395149786019972, "grad_norm": 0.3194878101348877, "learning_rate": 7.533820260400127e-05, "loss": 0.0942, "step": 3888 }, { "epoch": 7.397051830718022, "grad_norm": 0.28731876611709595, "learning_rate": 7.533185138139091e-05, "loss": 0.0881, "step": 3889 }, { "epoch": 7.398953875416073, "grad_norm": 0.2113313227891922, "learning_rate": 7.532550015878058e-05, "loss": 0.0554, "step": 3890 }, { "epoch": 7.400855920114123, "grad_norm": 0.30572062730789185, "learning_rate": 7.531914893617022e-05, "loss": 0.0849, "step": 3891 }, { "epoch": 7.402757964812173, "grad_norm": 0.17145153880119324, "learning_rate": 7.531279771355987e-05, "loss": 0.07, "step": 3892 }, { "epoch": 7.4046600095102235, "grad_norm": 0.30596452951431274, "learning_rate": 7.53064464909495e-05, "loss": 0.0752, "step": 3893 }, { "epoch": 7.4065620542082735, "grad_norm": 0.2836361825466156, "learning_rate": 7.530009526833916e-05, "loss": 0.0864, "step": 3894 }, { "epoch": 7.408464098906324, "grad_norm": 0.37935930490493774, "learning_rate": 7.529374404572881e-05, "loss": 0.097, "step": 3895 }, { "epoch": 7.410366143604374, "grad_norm": 0.24081386625766754, "learning_rate": 7.528739282311845e-05, "loss": 0.0643, "step": 3896 }, { "epoch": 7.412268188302425, "grad_norm": 0.324744313955307, "learning_rate": 7.528104160050811e-05, "loss": 0.0892, "step": 3897 }, { "epoch": 7.414170233000475, "grad_norm": 0.29037338495254517, "learning_rate": 7.527469037789775e-05, "loss": 0.0792, "step": 3898 }, { "epoch": 7.416072277698526, "grad_norm": 0.20849809050559998, "learning_rate": 7.526833915528739e-05, "loss": 0.0688, "step": 3899 }, { "epoch": 7.417974322396576, "grad_norm": 0.4752673804759979, "learning_rate": 7.526198793267704e-05, "loss": 0.0848, "step": 3900 }, { "epoch": 7.419876367094627, "grad_norm": 0.20970579981803894, "learning_rate": 7.525563671006669e-05, "loss": 0.0685, "step": 3901 }, { "epoch": 7.421778411792677, "grad_norm": 0.2748827338218689, "learning_rate": 7.524928548745633e-05, "loss": 0.0869, "step": 3902 }, { "epoch": 7.423680456490728, "grad_norm": 0.4105650782585144, "learning_rate": 7.524293426484598e-05, "loss": 0.0907, "step": 3903 }, { "epoch": 7.425582501188778, "grad_norm": 0.4483408033847809, "learning_rate": 7.523658304223564e-05, "loss": 0.0933, "step": 3904 }, { "epoch": 7.427484545886828, "grad_norm": 0.2609012722969055, "learning_rate": 7.523023181962529e-05, "loss": 0.0711, "step": 3905 }, { "epoch": 7.429386590584879, "grad_norm": 0.2896825075149536, "learning_rate": 7.522388059701493e-05, "loss": 0.0669, "step": 3906 }, { "epoch": 7.431288635282929, "grad_norm": 0.3085344731807709, "learning_rate": 7.521752937440458e-05, "loss": 0.0827, "step": 3907 }, { "epoch": 7.43319067998098, "grad_norm": 0.3237287104129791, "learning_rate": 7.521117815179423e-05, "loss": 0.0827, "step": 3908 }, { "epoch": 7.43509272467903, "grad_norm": 0.27414318919181824, "learning_rate": 7.520482692918387e-05, "loss": 0.0728, "step": 3909 }, { "epoch": 7.436994769377081, "grad_norm": 0.23370763659477234, "learning_rate": 7.519847570657352e-05, "loss": 0.067, "step": 3910 }, { "epoch": 7.438896814075131, "grad_norm": 0.20903317630290985, "learning_rate": 7.519212448396317e-05, "loss": 0.057, "step": 3911 }, { "epoch": 7.4407988587731815, "grad_norm": 0.2630857527256012, "learning_rate": 7.518577326135281e-05, "loss": 0.0713, "step": 3912 }, { "epoch": 7.4427009034712315, "grad_norm": 0.27731242775917053, "learning_rate": 7.517942203874246e-05, "loss": 0.0804, "step": 3913 }, { "epoch": 7.444602948169282, "grad_norm": 0.2399328500032425, "learning_rate": 7.517307081613211e-05, "loss": 0.0696, "step": 3914 }, { "epoch": 7.446504992867332, "grad_norm": 0.3606988787651062, "learning_rate": 7.516671959352176e-05, "loss": 0.0662, "step": 3915 }, { "epoch": 7.448407037565383, "grad_norm": 0.30132564902305603, "learning_rate": 7.51603683709114e-05, "loss": 0.0925, "step": 3916 }, { "epoch": 7.450309082263433, "grad_norm": 0.36007988452911377, "learning_rate": 7.515401714830104e-05, "loss": 0.1102, "step": 3917 }, { "epoch": 7.452211126961483, "grad_norm": 0.2463337630033493, "learning_rate": 7.51476659256907e-05, "loss": 0.0769, "step": 3918 }, { "epoch": 7.454113171659534, "grad_norm": 0.21934674680233002, "learning_rate": 7.514131470308034e-05, "loss": 0.0603, "step": 3919 }, { "epoch": 7.456015216357584, "grad_norm": 0.3735393285751343, "learning_rate": 7.513496348046998e-05, "loss": 0.088, "step": 3920 }, { "epoch": 7.457917261055635, "grad_norm": 0.3590213656425476, "learning_rate": 7.512861225785965e-05, "loss": 0.0892, "step": 3921 }, { "epoch": 7.459819305753685, "grad_norm": 0.2909318506717682, "learning_rate": 7.512226103524929e-05, "loss": 0.0743, "step": 3922 }, { "epoch": 7.461721350451736, "grad_norm": 0.2760285437107086, "learning_rate": 7.511590981263894e-05, "loss": 0.0743, "step": 3923 }, { "epoch": 7.463623395149786, "grad_norm": 0.20594924688339233, "learning_rate": 7.510955859002858e-05, "loss": 0.0712, "step": 3924 }, { "epoch": 7.465525439847837, "grad_norm": 0.36622461676597595, "learning_rate": 7.510320736741823e-05, "loss": 0.0796, "step": 3925 }, { "epoch": 7.467427484545887, "grad_norm": 0.47549140453338623, "learning_rate": 7.509685614480788e-05, "loss": 0.1338, "step": 3926 }, { "epoch": 7.469329529243938, "grad_norm": 0.22030964493751526, "learning_rate": 7.509050492219752e-05, "loss": 0.0696, "step": 3927 }, { "epoch": 7.471231573941988, "grad_norm": 0.3158624470233917, "learning_rate": 7.508415369958718e-05, "loss": 0.0804, "step": 3928 }, { "epoch": 7.473133618640038, "grad_norm": 0.28002792596817017, "learning_rate": 7.507780247697682e-05, "loss": 0.0859, "step": 3929 }, { "epoch": 7.4750356633380886, "grad_norm": 0.42471325397491455, "learning_rate": 7.507145125436646e-05, "loss": 0.0943, "step": 3930 }, { "epoch": 7.4769377080361386, "grad_norm": 0.25244924426078796, "learning_rate": 7.506510003175611e-05, "loss": 0.0683, "step": 3931 }, { "epoch": 7.478839752734189, "grad_norm": 0.22861167788505554, "learning_rate": 7.505874880914576e-05, "loss": 0.0601, "step": 3932 }, { "epoch": 7.480741797432239, "grad_norm": 0.2393944412469864, "learning_rate": 7.505239758653542e-05, "loss": 0.0565, "step": 3933 }, { "epoch": 7.48264384213029, "grad_norm": 0.22203993797302246, "learning_rate": 7.504604636392505e-05, "loss": 0.078, "step": 3934 }, { "epoch": 7.48454588682834, "grad_norm": 0.22644877433776855, "learning_rate": 7.50396951413147e-05, "loss": 0.0793, "step": 3935 }, { "epoch": 7.486447931526391, "grad_norm": 0.2989737093448639, "learning_rate": 7.503334391870436e-05, "loss": 0.0893, "step": 3936 }, { "epoch": 7.488349976224441, "grad_norm": 0.2532472014427185, "learning_rate": 7.5026992696094e-05, "loss": 0.0892, "step": 3937 }, { "epoch": 7.490252020922492, "grad_norm": 0.29805564880371094, "learning_rate": 7.502064147348365e-05, "loss": 0.0744, "step": 3938 }, { "epoch": 7.492154065620542, "grad_norm": 0.2866828441619873, "learning_rate": 7.50142902508733e-05, "loss": 0.0854, "step": 3939 }, { "epoch": 7.494056110318592, "grad_norm": 0.4060494005680084, "learning_rate": 7.500793902826294e-05, "loss": 0.1008, "step": 3940 }, { "epoch": 7.495958155016643, "grad_norm": 0.24941779673099518, "learning_rate": 7.500158780565259e-05, "loss": 0.07, "step": 3941 }, { "epoch": 7.497860199714693, "grad_norm": 0.260382741689682, "learning_rate": 7.499523658304224e-05, "loss": 0.0799, "step": 3942 }, { "epoch": 7.499762244412744, "grad_norm": 0.24394255876541138, "learning_rate": 7.498888536043188e-05, "loss": 0.0607, "step": 3943 }, { "epoch": 7.501664289110794, "grad_norm": 0.3588791489601135, "learning_rate": 7.498253413782153e-05, "loss": 0.0862, "step": 3944 }, { "epoch": 7.503566333808845, "grad_norm": 0.29977500438690186, "learning_rate": 7.497618291521118e-05, "loss": 0.0885, "step": 3945 }, { "epoch": 7.505468378506895, "grad_norm": 0.37303635478019714, "learning_rate": 7.496983169260084e-05, "loss": 0.0879, "step": 3946 }, { "epoch": 7.507370423204946, "grad_norm": 0.2976795434951782, "learning_rate": 7.496348046999047e-05, "loss": 0.0796, "step": 3947 }, { "epoch": 7.509272467902996, "grad_norm": 0.2982828617095947, "learning_rate": 7.495712924738013e-05, "loss": 0.0813, "step": 3948 }, { "epoch": 7.5111745126010465, "grad_norm": 0.2461419552564621, "learning_rate": 7.495077802476978e-05, "loss": 0.0843, "step": 3949 }, { "epoch": 7.5130765572990965, "grad_norm": 0.3808850646018982, "learning_rate": 7.494442680215942e-05, "loss": 0.0855, "step": 3950 }, { "epoch": 7.5149786019971465, "grad_norm": 0.5055833458900452, "learning_rate": 7.493807557954907e-05, "loss": 0.1101, "step": 3951 }, { "epoch": 7.516880646695197, "grad_norm": 0.22496581077575684, "learning_rate": 7.493172435693872e-05, "loss": 0.124, "step": 3952 }, { "epoch": 7.518782691393247, "grad_norm": 0.29180511832237244, "learning_rate": 7.492537313432836e-05, "loss": 0.0865, "step": 3953 }, { "epoch": 7.520684736091298, "grad_norm": 0.35880160331726074, "learning_rate": 7.491902191171801e-05, "loss": 0.1123, "step": 3954 }, { "epoch": 7.522586780789348, "grad_norm": 0.36466071009635925, "learning_rate": 7.491267068910766e-05, "loss": 0.1021, "step": 3955 }, { "epoch": 7.524488825487399, "grad_norm": 0.3240335285663605, "learning_rate": 7.49063194664973e-05, "loss": 0.0757, "step": 3956 }, { "epoch": 7.526390870185449, "grad_norm": 0.28873270750045776, "learning_rate": 7.489996824388695e-05, "loss": 0.1093, "step": 3957 }, { "epoch": 7.5282929148835, "grad_norm": 0.3459244668483734, "learning_rate": 7.489361702127659e-05, "loss": 0.0997, "step": 3958 }, { "epoch": 7.53019495958155, "grad_norm": 0.3284059762954712, "learning_rate": 7.488726579866626e-05, "loss": 0.0948, "step": 3959 }, { "epoch": 7.532097004279601, "grad_norm": 0.3020002245903015, "learning_rate": 7.48809145760559e-05, "loss": 0.0847, "step": 3960 }, { "epoch": 7.533999048977651, "grad_norm": 0.2214907556772232, "learning_rate": 7.487456335344553e-05, "loss": 0.0719, "step": 3961 }, { "epoch": 7.535901093675701, "grad_norm": 0.19691269099712372, "learning_rate": 7.48682121308352e-05, "loss": 0.0491, "step": 3962 }, { "epoch": 7.537803138373752, "grad_norm": 0.2436630129814148, "learning_rate": 7.486186090822484e-05, "loss": 0.0696, "step": 3963 }, { "epoch": 7.539705183071802, "grad_norm": 0.3241187632083893, "learning_rate": 7.485550968561449e-05, "loss": 0.1074, "step": 3964 }, { "epoch": 7.541607227769853, "grad_norm": 0.2550383508205414, "learning_rate": 7.484915846300413e-05, "loss": 0.0729, "step": 3965 }, { "epoch": 7.543509272467903, "grad_norm": 0.28291863203048706, "learning_rate": 7.484280724039378e-05, "loss": 0.0704, "step": 3966 }, { "epoch": 7.545411317165954, "grad_norm": 0.3558233380317688, "learning_rate": 7.483645601778343e-05, "loss": 0.0924, "step": 3967 }, { "epoch": 7.547313361864004, "grad_norm": 0.29661667346954346, "learning_rate": 7.483010479517307e-05, "loss": 0.0755, "step": 3968 }, { "epoch": 7.5492154065620545, "grad_norm": 0.2704700231552124, "learning_rate": 7.482375357256272e-05, "loss": 0.0869, "step": 3969 }, { "epoch": 7.5511174512601045, "grad_norm": 0.23262625932693481, "learning_rate": 7.481740234995237e-05, "loss": 0.06, "step": 3970 }, { "epoch": 7.553019495958155, "grad_norm": 0.24703703820705414, "learning_rate": 7.481105112734201e-05, "loss": 0.0617, "step": 3971 }, { "epoch": 7.554921540656205, "grad_norm": 0.3128037750720978, "learning_rate": 7.480469990473166e-05, "loss": 0.085, "step": 3972 }, { "epoch": 7.556823585354255, "grad_norm": 0.27781766653060913, "learning_rate": 7.479834868212131e-05, "loss": 0.0731, "step": 3973 }, { "epoch": 7.558725630052306, "grad_norm": 0.21794871985912323, "learning_rate": 7.479199745951095e-05, "loss": 0.064, "step": 3974 }, { "epoch": 7.560627674750357, "grad_norm": 0.332007497549057, "learning_rate": 7.47856462369006e-05, "loss": 0.1261, "step": 3975 }, { "epoch": 7.562529719448407, "grad_norm": 0.36492758989334106, "learning_rate": 7.477929501429026e-05, "loss": 0.0869, "step": 3976 }, { "epoch": 7.564431764146457, "grad_norm": 0.36482545733451843, "learning_rate": 7.477294379167991e-05, "loss": 0.0956, "step": 3977 }, { "epoch": 7.566333808844508, "grad_norm": 0.3136618137359619, "learning_rate": 7.476659256906955e-05, "loss": 0.0724, "step": 3978 }, { "epoch": 7.568235853542558, "grad_norm": 0.1828143447637558, "learning_rate": 7.47602413464592e-05, "loss": 0.0656, "step": 3979 }, { "epoch": 7.570137898240609, "grad_norm": 0.2850959599018097, "learning_rate": 7.475389012384885e-05, "loss": 0.0807, "step": 3980 }, { "epoch": 7.572039942938659, "grad_norm": 0.23250633478164673, "learning_rate": 7.474753890123849e-05, "loss": 0.0879, "step": 3981 }, { "epoch": 7.57394198763671, "grad_norm": 0.22114038467407227, "learning_rate": 7.474118767862814e-05, "loss": 0.0881, "step": 3982 }, { "epoch": 7.57584403233476, "grad_norm": 0.304195374250412, "learning_rate": 7.473483645601779e-05, "loss": 0.0854, "step": 3983 }, { "epoch": 7.57774607703281, "grad_norm": 0.3512796461582184, "learning_rate": 7.472848523340743e-05, "loss": 0.0984, "step": 3984 }, { "epoch": 7.579648121730861, "grad_norm": 0.19406934082508087, "learning_rate": 7.472213401079708e-05, "loss": 0.0527, "step": 3985 }, { "epoch": 7.5815501664289116, "grad_norm": 0.35052844882011414, "learning_rate": 7.471578278818673e-05, "loss": 0.093, "step": 3986 }, { "epoch": 7.5834522111269616, "grad_norm": 0.3175174593925476, "learning_rate": 7.470943156557639e-05, "loss": 0.0765, "step": 3987 }, { "epoch": 7.5853542558250115, "grad_norm": 0.2843180298805237, "learning_rate": 7.470308034296602e-05, "loss": 0.0794, "step": 3988 }, { "epoch": 7.587256300523062, "grad_norm": 0.25421711802482605, "learning_rate": 7.469672912035566e-05, "loss": 0.0559, "step": 3989 }, { "epoch": 7.589158345221112, "grad_norm": 0.26305899024009705, "learning_rate": 7.469037789774533e-05, "loss": 0.083, "step": 3990 }, { "epoch": 7.591060389919163, "grad_norm": 0.2685282528400421, "learning_rate": 7.468402667513497e-05, "loss": 0.0735, "step": 3991 }, { "epoch": 7.592962434617213, "grad_norm": 0.2922079265117645, "learning_rate": 7.46776754525246e-05, "loss": 0.0752, "step": 3992 }, { "epoch": 7.594864479315264, "grad_norm": 0.4285946190357208, "learning_rate": 7.467132422991427e-05, "loss": 0.0875, "step": 3993 }, { "epoch": 7.596766524013314, "grad_norm": 0.3181219696998596, "learning_rate": 7.466497300730391e-05, "loss": 0.0664, "step": 3994 }, { "epoch": 7.598668568711365, "grad_norm": 0.2952035069465637, "learning_rate": 7.465862178469356e-05, "loss": 0.0814, "step": 3995 }, { "epoch": 7.600570613409415, "grad_norm": 0.2553558647632599, "learning_rate": 7.46522705620832e-05, "loss": 0.066, "step": 3996 }, { "epoch": 7.602472658107466, "grad_norm": 0.23761653900146484, "learning_rate": 7.464591933947285e-05, "loss": 0.0642, "step": 3997 }, { "epoch": 7.604374702805516, "grad_norm": 0.2953890562057495, "learning_rate": 7.46395681168625e-05, "loss": 0.0888, "step": 3998 }, { "epoch": 7.606276747503566, "grad_norm": 0.29672572016716003, "learning_rate": 7.463321689425214e-05, "loss": 0.0856, "step": 3999 }, { "epoch": 7.608178792201617, "grad_norm": 0.48846060037612915, "learning_rate": 7.46268656716418e-05, "loss": 0.1094, "step": 4000 }, { "epoch": 7.610080836899667, "grad_norm": 0.27442026138305664, "learning_rate": 7.462051444903144e-05, "loss": 0.0699, "step": 4001 }, { "epoch": 7.611982881597718, "grad_norm": 0.27723994851112366, "learning_rate": 7.461416322642108e-05, "loss": 0.0826, "step": 4002 }, { "epoch": 7.613884926295768, "grad_norm": 0.19016140699386597, "learning_rate": 7.460781200381073e-05, "loss": 0.0863, "step": 4003 }, { "epoch": 7.615786970993819, "grad_norm": 0.2775978744029999, "learning_rate": 7.460146078120039e-05, "loss": 0.0775, "step": 4004 }, { "epoch": 7.617689015691869, "grad_norm": 0.23393043875694275, "learning_rate": 7.459510955859004e-05, "loss": 0.0774, "step": 4005 }, { "epoch": 7.6195910603899195, "grad_norm": 0.3676578104496002, "learning_rate": 7.458875833597968e-05, "loss": 0.0849, "step": 4006 }, { "epoch": 7.6214931050879695, "grad_norm": 0.22967612743377686, "learning_rate": 7.458240711336933e-05, "loss": 0.0585, "step": 4007 }, { "epoch": 7.62339514978602, "grad_norm": 0.20074373483657837, "learning_rate": 7.457605589075898e-05, "loss": 0.0756, "step": 4008 }, { "epoch": 7.62529719448407, "grad_norm": 0.32885944843292236, "learning_rate": 7.456970466814862e-05, "loss": 0.094, "step": 4009 }, { "epoch": 7.62719923918212, "grad_norm": 0.34579211473464966, "learning_rate": 7.456335344553827e-05, "loss": 0.0667, "step": 4010 }, { "epoch": 7.629101283880171, "grad_norm": 0.26802295446395874, "learning_rate": 7.455700222292792e-05, "loss": 0.073, "step": 4011 }, { "epoch": 7.631003328578221, "grad_norm": 0.315384179353714, "learning_rate": 7.455065100031756e-05, "loss": 0.0792, "step": 4012 }, { "epoch": 7.632905373276272, "grad_norm": 0.35064810514450073, "learning_rate": 7.454429977770721e-05, "loss": 0.0859, "step": 4013 }, { "epoch": 7.634807417974322, "grad_norm": 0.3940977156162262, "learning_rate": 7.453794855509686e-05, "loss": 0.0899, "step": 4014 }, { "epoch": 7.636709462672373, "grad_norm": 0.28424572944641113, "learning_rate": 7.45315973324865e-05, "loss": 0.0826, "step": 4015 }, { "epoch": 7.638611507370423, "grad_norm": 0.2842091917991638, "learning_rate": 7.452524610987615e-05, "loss": 0.0727, "step": 4016 }, { "epoch": 7.640513552068474, "grad_norm": 0.2675850987434387, "learning_rate": 7.45188948872658e-05, "loss": 0.085, "step": 4017 }, { "epoch": 7.642415596766524, "grad_norm": 0.20844288170337677, "learning_rate": 7.451254366465546e-05, "loss": 0.0662, "step": 4018 }, { "epoch": 7.644317641464575, "grad_norm": 0.1705649346113205, "learning_rate": 7.45061924420451e-05, "loss": 0.0673, "step": 4019 }, { "epoch": 7.646219686162625, "grad_norm": 0.22716879844665527, "learning_rate": 7.449984121943475e-05, "loss": 0.0737, "step": 4020 }, { "epoch": 7.648121730860675, "grad_norm": 0.22440075874328613, "learning_rate": 7.44934899968244e-05, "loss": 0.082, "step": 4021 }, { "epoch": 7.650023775558726, "grad_norm": 0.18389202654361725, "learning_rate": 7.448713877421404e-05, "loss": 0.056, "step": 4022 }, { "epoch": 7.651925820256776, "grad_norm": 0.32997074723243713, "learning_rate": 7.448078755160369e-05, "loss": 0.0852, "step": 4023 }, { "epoch": 7.653827864954827, "grad_norm": 0.2446168065071106, "learning_rate": 7.447443632899334e-05, "loss": 0.0612, "step": 4024 }, { "epoch": 7.655729909652877, "grad_norm": 0.2455371469259262, "learning_rate": 7.446808510638298e-05, "loss": 0.0595, "step": 4025 }, { "epoch": 7.6576319543509275, "grad_norm": 0.3117963373661041, "learning_rate": 7.446173388377263e-05, "loss": 0.0869, "step": 4026 }, { "epoch": 7.6595339990489775, "grad_norm": 0.2107362449169159, "learning_rate": 7.445538266116227e-05, "loss": 0.0726, "step": 4027 }, { "epoch": 7.661436043747028, "grad_norm": 0.21624886989593506, "learning_rate": 7.444903143855192e-05, "loss": 0.0721, "step": 4028 }, { "epoch": 7.663338088445078, "grad_norm": 0.30223172903060913, "learning_rate": 7.444268021594157e-05, "loss": 0.1137, "step": 4029 }, { "epoch": 7.665240133143129, "grad_norm": 0.3354337513446808, "learning_rate": 7.443632899333121e-05, "loss": 0.0718, "step": 4030 }, { "epoch": 7.667142177841179, "grad_norm": 0.20831426978111267, "learning_rate": 7.442997777072088e-05, "loss": 0.0696, "step": 4031 }, { "epoch": 7.669044222539229, "grad_norm": 0.222997784614563, "learning_rate": 7.442362654811052e-05, "loss": 0.0716, "step": 4032 }, { "epoch": 7.67094626723728, "grad_norm": 0.29635244607925415, "learning_rate": 7.441727532550015e-05, "loss": 0.1017, "step": 4033 }, { "epoch": 7.67284831193533, "grad_norm": 0.24566298723220825, "learning_rate": 7.44109241028898e-05, "loss": 0.0757, "step": 4034 }, { "epoch": 7.674750356633381, "grad_norm": 0.21996475756168365, "learning_rate": 7.440457288027946e-05, "loss": 0.0899, "step": 4035 }, { "epoch": 7.676652401331431, "grad_norm": 0.36381396651268005, "learning_rate": 7.439822165766911e-05, "loss": 0.0827, "step": 4036 }, { "epoch": 7.678554446029482, "grad_norm": 0.2817314863204956, "learning_rate": 7.439187043505875e-05, "loss": 0.071, "step": 4037 }, { "epoch": 7.680456490727532, "grad_norm": 0.24392902851104736, "learning_rate": 7.43855192124484e-05, "loss": 0.0575, "step": 4038 }, { "epoch": 7.682358535425583, "grad_norm": 0.22329150140285492, "learning_rate": 7.437916798983805e-05, "loss": 0.0659, "step": 4039 }, { "epoch": 7.684260580123633, "grad_norm": 0.302490770816803, "learning_rate": 7.437281676722769e-05, "loss": 0.0863, "step": 4040 }, { "epoch": 7.686162624821684, "grad_norm": 0.2640133202075958, "learning_rate": 7.436646554461734e-05, "loss": 0.0977, "step": 4041 }, { "epoch": 7.688064669519734, "grad_norm": 0.26939862966537476, "learning_rate": 7.436011432200699e-05, "loss": 0.0791, "step": 4042 }, { "epoch": 7.689966714217784, "grad_norm": 0.2610327899456024, "learning_rate": 7.435376309939663e-05, "loss": 0.0539, "step": 4043 }, { "epoch": 7.6918687589158345, "grad_norm": 0.2487347424030304, "learning_rate": 7.434741187678628e-05, "loss": 0.0661, "step": 4044 }, { "epoch": 7.6937708036138845, "grad_norm": 0.34148916602134705, "learning_rate": 7.434106065417593e-05, "loss": 0.083, "step": 4045 }, { "epoch": 7.695672848311935, "grad_norm": 0.28194597363471985, "learning_rate": 7.433470943156557e-05, "loss": 0.0681, "step": 4046 }, { "epoch": 7.697574893009985, "grad_norm": 0.2983434200286865, "learning_rate": 7.432835820895523e-05, "loss": 0.1014, "step": 4047 }, { "epoch": 7.699476937708036, "grad_norm": 0.27884411811828613, "learning_rate": 7.432200698634488e-05, "loss": 0.0711, "step": 4048 }, { "epoch": 7.701378982406086, "grad_norm": 0.27753472328186035, "learning_rate": 7.431565576373453e-05, "loss": 0.0815, "step": 4049 }, { "epoch": 7.703281027104137, "grad_norm": 0.33670538663864136, "learning_rate": 7.430930454112417e-05, "loss": 0.0829, "step": 4050 }, { "epoch": 7.705183071802187, "grad_norm": 0.38544511795043945, "learning_rate": 7.430295331851382e-05, "loss": 0.0764, "step": 4051 }, { "epoch": 7.707085116500238, "grad_norm": 0.2954690158367157, "learning_rate": 7.429660209590347e-05, "loss": 0.0779, "step": 4052 }, { "epoch": 7.708987161198288, "grad_norm": 0.2750054597854614, "learning_rate": 7.429025087329311e-05, "loss": 0.0668, "step": 4053 }, { "epoch": 7.710889205896338, "grad_norm": 0.27135616540908813, "learning_rate": 7.428389965068276e-05, "loss": 0.0936, "step": 4054 }, { "epoch": 7.712791250594389, "grad_norm": 0.25627055764198303, "learning_rate": 7.427754842807241e-05, "loss": 0.0585, "step": 4055 }, { "epoch": 7.71469329529244, "grad_norm": 0.3443787395954132, "learning_rate": 7.427119720546205e-05, "loss": 0.0825, "step": 4056 }, { "epoch": 7.71659533999049, "grad_norm": 0.23502713441848755, "learning_rate": 7.42648459828517e-05, "loss": 0.0772, "step": 4057 }, { "epoch": 7.71849738468854, "grad_norm": 0.28659313917160034, "learning_rate": 7.425849476024135e-05, "loss": 0.1007, "step": 4058 }, { "epoch": 7.720399429386591, "grad_norm": 0.2622101902961731, "learning_rate": 7.4252143537631e-05, "loss": 0.0751, "step": 4059 }, { "epoch": 7.722301474084641, "grad_norm": 0.2769562304019928, "learning_rate": 7.424579231502064e-05, "loss": 0.0744, "step": 4060 }, { "epoch": 7.724203518782692, "grad_norm": 0.29820317029953003, "learning_rate": 7.423944109241028e-05, "loss": 0.0968, "step": 4061 }, { "epoch": 7.726105563480742, "grad_norm": 0.1930849850177765, "learning_rate": 7.423308986979995e-05, "loss": 0.0622, "step": 4062 }, { "epoch": 7.7280076081787925, "grad_norm": 0.3451099395751953, "learning_rate": 7.422673864718959e-05, "loss": 0.1056, "step": 4063 }, { "epoch": 7.7299096528768425, "grad_norm": 0.4290725588798523, "learning_rate": 7.422038742457923e-05, "loss": 0.0813, "step": 4064 }, { "epoch": 7.731811697574893, "grad_norm": 0.25179606676101685, "learning_rate": 7.421403620196889e-05, "loss": 0.0713, "step": 4065 }, { "epoch": 7.733713742272943, "grad_norm": 0.30341050028800964, "learning_rate": 7.420768497935853e-05, "loss": 0.0797, "step": 4066 }, { "epoch": 7.735615786970994, "grad_norm": 0.2818777561187744, "learning_rate": 7.420133375674818e-05, "loss": 0.0844, "step": 4067 }, { "epoch": 7.737517831669044, "grad_norm": 0.2710643410682678, "learning_rate": 7.419498253413782e-05, "loss": 0.085, "step": 4068 }, { "epoch": 7.739419876367094, "grad_norm": 0.29590675234794617, "learning_rate": 7.418863131152747e-05, "loss": 0.0844, "step": 4069 }, { "epoch": 7.741321921065145, "grad_norm": 0.26093828678131104, "learning_rate": 7.418228008891712e-05, "loss": 0.0927, "step": 4070 }, { "epoch": 7.743223965763195, "grad_norm": 0.20662081241607666, "learning_rate": 7.417592886630676e-05, "loss": 0.0842, "step": 4071 }, { "epoch": 7.745126010461246, "grad_norm": 0.2244255244731903, "learning_rate": 7.416957764369643e-05, "loss": 0.0691, "step": 4072 }, { "epoch": 7.747028055159296, "grad_norm": 0.1915290802717209, "learning_rate": 7.416322642108606e-05, "loss": 0.0765, "step": 4073 }, { "epoch": 7.748930099857347, "grad_norm": 0.34651103615760803, "learning_rate": 7.41568751984757e-05, "loss": 0.0809, "step": 4074 }, { "epoch": 7.750832144555397, "grad_norm": 0.2221180945634842, "learning_rate": 7.415052397586535e-05, "loss": 0.0618, "step": 4075 }, { "epoch": 7.752734189253448, "grad_norm": 0.2723255455493927, "learning_rate": 7.4144172753255e-05, "loss": 0.0921, "step": 4076 }, { "epoch": 7.754636233951498, "grad_norm": 0.2689955234527588, "learning_rate": 7.413782153064466e-05, "loss": 0.06, "step": 4077 }, { "epoch": 7.756538278649549, "grad_norm": 0.2710064649581909, "learning_rate": 7.41314703080343e-05, "loss": 0.0786, "step": 4078 }, { "epoch": 7.758440323347599, "grad_norm": 0.2953267991542816, "learning_rate": 7.412511908542395e-05, "loss": 0.0662, "step": 4079 }, { "epoch": 7.760342368045649, "grad_norm": 0.40185266733169556, "learning_rate": 7.41187678628136e-05, "loss": 0.0758, "step": 4080 }, { "epoch": 7.7622444127437, "grad_norm": 0.3208361268043518, "learning_rate": 7.411241664020324e-05, "loss": 0.091, "step": 4081 }, { "epoch": 7.76414645744175, "grad_norm": 0.20456193387508392, "learning_rate": 7.410606541759289e-05, "loss": 0.062, "step": 4082 }, { "epoch": 7.7660485021398005, "grad_norm": 0.38072583079338074, "learning_rate": 7.409971419498254e-05, "loss": 0.0859, "step": 4083 }, { "epoch": 7.7679505468378505, "grad_norm": 0.19965219497680664, "learning_rate": 7.409336297237218e-05, "loss": 0.0632, "step": 4084 }, { "epoch": 7.769852591535901, "grad_norm": 0.3247738778591156, "learning_rate": 7.408701174976183e-05, "loss": 0.0902, "step": 4085 }, { "epoch": 7.771754636233951, "grad_norm": 0.3265827000141144, "learning_rate": 7.408066052715148e-05, "loss": 0.0849, "step": 4086 }, { "epoch": 7.773656680932002, "grad_norm": 0.254866361618042, "learning_rate": 7.407430930454112e-05, "loss": 0.0721, "step": 4087 }, { "epoch": 7.775558725630052, "grad_norm": 0.24622495472431183, "learning_rate": 7.406795808193077e-05, "loss": 0.0744, "step": 4088 }, { "epoch": 7.777460770328103, "grad_norm": 0.3360063135623932, "learning_rate": 7.406160685932043e-05, "loss": 0.0869, "step": 4089 }, { "epoch": 7.779362815026153, "grad_norm": 0.23800595104694366, "learning_rate": 7.405525563671008e-05, "loss": 0.0662, "step": 4090 }, { "epoch": 7.781264859724203, "grad_norm": 0.2821212112903595, "learning_rate": 7.404890441409972e-05, "loss": 0.0739, "step": 4091 }, { "epoch": 7.783166904422254, "grad_norm": 0.293625146150589, "learning_rate": 7.404255319148935e-05, "loss": 0.0781, "step": 4092 }, { "epoch": 7.785068949120304, "grad_norm": 0.26291313767433167, "learning_rate": 7.403620196887902e-05, "loss": 0.0708, "step": 4093 }, { "epoch": 7.786970993818355, "grad_norm": 0.20752035081386566, "learning_rate": 7.402985074626866e-05, "loss": 0.0638, "step": 4094 }, { "epoch": 7.788873038516405, "grad_norm": 0.36044710874557495, "learning_rate": 7.402349952365831e-05, "loss": 0.079, "step": 4095 }, { "epoch": 7.790775083214456, "grad_norm": 0.32189956307411194, "learning_rate": 7.401714830104796e-05, "loss": 0.0754, "step": 4096 }, { "epoch": 7.792677127912506, "grad_norm": 0.259915828704834, "learning_rate": 7.40107970784376e-05, "loss": 0.083, "step": 4097 }, { "epoch": 7.794579172610557, "grad_norm": 0.2856104373931885, "learning_rate": 7.400444585582725e-05, "loss": 0.0742, "step": 4098 }, { "epoch": 7.796481217308607, "grad_norm": 0.27823686599731445, "learning_rate": 7.399809463321689e-05, "loss": 0.0697, "step": 4099 }, { "epoch": 7.7983832620066575, "grad_norm": 0.291198194026947, "learning_rate": 7.399174341060654e-05, "loss": 0.0771, "step": 4100 }, { "epoch": 7.8002853067047075, "grad_norm": 0.322517067193985, "learning_rate": 7.39853921879962e-05, "loss": 0.0585, "step": 4101 }, { "epoch": 7.8021873514027575, "grad_norm": 0.3800067603588104, "learning_rate": 7.397904096538583e-05, "loss": 0.0941, "step": 4102 }, { "epoch": 7.804089396100808, "grad_norm": 0.33139878511428833, "learning_rate": 7.39726897427755e-05, "loss": 0.0879, "step": 4103 }, { "epoch": 7.805991440798858, "grad_norm": 0.3596518039703369, "learning_rate": 7.396633852016514e-05, "loss": 0.1046, "step": 4104 }, { "epoch": 7.807893485496909, "grad_norm": 0.24730372428894043, "learning_rate": 7.395998729755477e-05, "loss": 0.0727, "step": 4105 }, { "epoch": 7.809795530194959, "grad_norm": 0.24898457527160645, "learning_rate": 7.395363607494443e-05, "loss": 0.0787, "step": 4106 }, { "epoch": 7.81169757489301, "grad_norm": 0.27778348326683044, "learning_rate": 7.394728485233408e-05, "loss": 0.0789, "step": 4107 }, { "epoch": 7.81359961959106, "grad_norm": 0.4483128488063812, "learning_rate": 7.394093362972373e-05, "loss": 0.0903, "step": 4108 }, { "epoch": 7.815501664289111, "grad_norm": 0.25830453634262085, "learning_rate": 7.393458240711337e-05, "loss": 0.0776, "step": 4109 }, { "epoch": 7.817403708987161, "grad_norm": 0.26678788661956787, "learning_rate": 7.392823118450302e-05, "loss": 0.0689, "step": 4110 }, { "epoch": 7.819305753685212, "grad_norm": 0.3530676066875458, "learning_rate": 7.392187996189267e-05, "loss": 0.0877, "step": 4111 }, { "epoch": 7.821207798383262, "grad_norm": 0.27258220314979553, "learning_rate": 7.391552873928231e-05, "loss": 0.0849, "step": 4112 }, { "epoch": 7.823109843081312, "grad_norm": 0.3042837679386139, "learning_rate": 7.390917751667196e-05, "loss": 0.0764, "step": 4113 }, { "epoch": 7.825011887779363, "grad_norm": 0.26109039783477783, "learning_rate": 7.390282629406161e-05, "loss": 0.0687, "step": 4114 }, { "epoch": 7.826913932477413, "grad_norm": 0.36038345098495483, "learning_rate": 7.389647507145125e-05, "loss": 0.1176, "step": 4115 }, { "epoch": 7.828815977175464, "grad_norm": 0.24119070172309875, "learning_rate": 7.38901238488409e-05, "loss": 0.0803, "step": 4116 }, { "epoch": 7.830718021873514, "grad_norm": 0.279041588306427, "learning_rate": 7.388377262623056e-05, "loss": 0.0748, "step": 4117 }, { "epoch": 7.832620066571565, "grad_norm": 0.24732699990272522, "learning_rate": 7.38774214036202e-05, "loss": 0.0749, "step": 4118 }, { "epoch": 7.834522111269615, "grad_norm": 0.19942957162857056, "learning_rate": 7.387107018100985e-05, "loss": 0.058, "step": 4119 }, { "epoch": 7.8364241559676655, "grad_norm": 0.3800085484981537, "learning_rate": 7.38647189583995e-05, "loss": 0.0959, "step": 4120 }, { "epoch": 7.8383262006657155, "grad_norm": 0.24655404686927795, "learning_rate": 7.385836773578915e-05, "loss": 0.0704, "step": 4121 }, { "epoch": 7.840228245363766, "grad_norm": 0.29220011830329895, "learning_rate": 7.385201651317879e-05, "loss": 0.0686, "step": 4122 }, { "epoch": 7.842130290061816, "grad_norm": 0.29641228914260864, "learning_rate": 7.384566529056844e-05, "loss": 0.1011, "step": 4123 }, { "epoch": 7.844032334759866, "grad_norm": 0.3333776593208313, "learning_rate": 7.383931406795809e-05, "loss": 0.0656, "step": 4124 }, { "epoch": 7.845934379457917, "grad_norm": 0.3494468331336975, "learning_rate": 7.383296284534773e-05, "loss": 0.0884, "step": 4125 }, { "epoch": 7.847836424155968, "grad_norm": 0.2233295738697052, "learning_rate": 7.382661162273738e-05, "loss": 0.0835, "step": 4126 }, { "epoch": 7.849738468854018, "grad_norm": 0.26915204524993896, "learning_rate": 7.382026040012703e-05, "loss": 0.0748, "step": 4127 }, { "epoch": 7.851640513552068, "grad_norm": 0.3454749286174774, "learning_rate": 7.381390917751667e-05, "loss": 0.0839, "step": 4128 }, { "epoch": 7.853542558250119, "grad_norm": 0.23676830530166626, "learning_rate": 7.380755795490632e-05, "loss": 0.0802, "step": 4129 }, { "epoch": 7.855444602948169, "grad_norm": 0.38189131021499634, "learning_rate": 7.380120673229598e-05, "loss": 0.0968, "step": 4130 }, { "epoch": 7.85734664764622, "grad_norm": 0.40328049659729004, "learning_rate": 7.379485550968563e-05, "loss": 0.0985, "step": 4131 }, { "epoch": 7.85924869234427, "grad_norm": 0.2955927848815918, "learning_rate": 7.378850428707527e-05, "loss": 0.0761, "step": 4132 }, { "epoch": 7.861150737042321, "grad_norm": 0.34690070152282715, "learning_rate": 7.37821530644649e-05, "loss": 0.0956, "step": 4133 }, { "epoch": 7.863052781740371, "grad_norm": 0.18155311048030853, "learning_rate": 7.377580184185457e-05, "loss": 0.0389, "step": 4134 }, { "epoch": 7.864954826438421, "grad_norm": 0.23770956695079803, "learning_rate": 7.376945061924421e-05, "loss": 0.0744, "step": 4135 }, { "epoch": 7.866856871136472, "grad_norm": 0.32734113931655884, "learning_rate": 7.376309939663385e-05, "loss": 0.0786, "step": 4136 }, { "epoch": 7.868758915834523, "grad_norm": 0.24840562045574188, "learning_rate": 7.37567481740235e-05, "loss": 0.0702, "step": 4137 }, { "epoch": 7.870660960532573, "grad_norm": 0.3209112882614136, "learning_rate": 7.375039695141315e-05, "loss": 0.084, "step": 4138 }, { "epoch": 7.872563005230623, "grad_norm": 0.34609726071357727, "learning_rate": 7.37440457288028e-05, "loss": 0.086, "step": 4139 }, { "epoch": 7.8744650499286735, "grad_norm": 0.24158047139644623, "learning_rate": 7.373769450619244e-05, "loss": 0.0799, "step": 4140 }, { "epoch": 7.8763670946267235, "grad_norm": 0.2257915884256363, "learning_rate": 7.373134328358209e-05, "loss": 0.0644, "step": 4141 }, { "epoch": 7.878269139324774, "grad_norm": 0.2671010196208954, "learning_rate": 7.372499206097174e-05, "loss": 0.0786, "step": 4142 }, { "epoch": 7.880171184022824, "grad_norm": 0.4526705741882324, "learning_rate": 7.371864083836138e-05, "loss": 0.1118, "step": 4143 }, { "epoch": 7.882073228720875, "grad_norm": 0.2693113684654236, "learning_rate": 7.371228961575103e-05, "loss": 0.0588, "step": 4144 }, { "epoch": 7.883975273418925, "grad_norm": 0.20709961652755737, "learning_rate": 7.370593839314069e-05, "loss": 0.0651, "step": 4145 }, { "epoch": 7.885877318116976, "grad_norm": 0.21224233508110046, "learning_rate": 7.369958717053032e-05, "loss": 0.0664, "step": 4146 }, { "epoch": 7.887779362815026, "grad_norm": 0.3241201639175415, "learning_rate": 7.369323594791998e-05, "loss": 0.0787, "step": 4147 }, { "epoch": 7.889681407513077, "grad_norm": 0.24492232501506805, "learning_rate": 7.368688472530963e-05, "loss": 0.0791, "step": 4148 }, { "epoch": 7.891583452211127, "grad_norm": 0.33030468225479126, "learning_rate": 7.368053350269928e-05, "loss": 0.0738, "step": 4149 }, { "epoch": 7.893485496909177, "grad_norm": 0.2864938974380493, "learning_rate": 7.367418228008892e-05, "loss": 0.0818, "step": 4150 }, { "epoch": 7.895387541607228, "grad_norm": 0.2391495555639267, "learning_rate": 7.366783105747857e-05, "loss": 0.0678, "step": 4151 }, { "epoch": 7.897289586305278, "grad_norm": 0.3550390303134918, "learning_rate": 7.366147983486822e-05, "loss": 0.0835, "step": 4152 }, { "epoch": 7.899191631003329, "grad_norm": 0.26836395263671875, "learning_rate": 7.365512861225786e-05, "loss": 0.0757, "step": 4153 }, { "epoch": 7.901093675701379, "grad_norm": 0.2554214894771576, "learning_rate": 7.364877738964751e-05, "loss": 0.0888, "step": 4154 }, { "epoch": 7.90299572039943, "grad_norm": 0.2723173499107361, "learning_rate": 7.364242616703716e-05, "loss": 0.0765, "step": 4155 }, { "epoch": 7.90489776509748, "grad_norm": 0.3314184844493866, "learning_rate": 7.36360749444268e-05, "loss": 0.1036, "step": 4156 }, { "epoch": 7.9067998097955305, "grad_norm": 0.2639399766921997, "learning_rate": 7.362972372181645e-05, "loss": 0.0573, "step": 4157 }, { "epoch": 7.9087018544935805, "grad_norm": 0.229736328125, "learning_rate": 7.36233724992061e-05, "loss": 0.0736, "step": 4158 }, { "epoch": 7.910603899191631, "grad_norm": 0.34588804841041565, "learning_rate": 7.361702127659574e-05, "loss": 0.0606, "step": 4159 }, { "epoch": 7.912505943889681, "grad_norm": 0.37485557794570923, "learning_rate": 7.36106700539854e-05, "loss": 0.0861, "step": 4160 }, { "epoch": 7.914407988587731, "grad_norm": 0.23467861115932465, "learning_rate": 7.360431883137505e-05, "loss": 0.0909, "step": 4161 }, { "epoch": 7.916310033285782, "grad_norm": 0.2061249315738678, "learning_rate": 7.35979676087647e-05, "loss": 0.0689, "step": 4162 }, { "epoch": 7.918212077983832, "grad_norm": 0.31142982840538025, "learning_rate": 7.359161638615434e-05, "loss": 0.0897, "step": 4163 }, { "epoch": 7.920114122681883, "grad_norm": 0.21099460124969482, "learning_rate": 7.358526516354398e-05, "loss": 0.0689, "step": 4164 }, { "epoch": 7.922016167379933, "grad_norm": 0.2100033015012741, "learning_rate": 7.357891394093364e-05, "loss": 0.0565, "step": 4165 }, { "epoch": 7.923918212077984, "grad_norm": 0.26403024792671204, "learning_rate": 7.357256271832328e-05, "loss": 0.0674, "step": 4166 }, { "epoch": 7.925820256776034, "grad_norm": 0.26634496450424194, "learning_rate": 7.356621149571293e-05, "loss": 0.0657, "step": 4167 }, { "epoch": 7.927722301474085, "grad_norm": 0.1982484757900238, "learning_rate": 7.355986027310258e-05, "loss": 0.0554, "step": 4168 }, { "epoch": 7.929624346172135, "grad_norm": 0.24268779158592224, "learning_rate": 7.355350905049222e-05, "loss": 0.0669, "step": 4169 }, { "epoch": 7.931526390870186, "grad_norm": 0.4950437545776367, "learning_rate": 7.354715782788187e-05, "loss": 0.101, "step": 4170 }, { "epoch": 7.933428435568236, "grad_norm": 0.39911749958992004, "learning_rate": 7.354080660527151e-05, "loss": 0.0784, "step": 4171 }, { "epoch": 7.935330480266286, "grad_norm": 0.3811649680137634, "learning_rate": 7.353445538266116e-05, "loss": 0.0884, "step": 4172 }, { "epoch": 7.937232524964337, "grad_norm": 0.20468108355998993, "learning_rate": 7.352810416005081e-05, "loss": 0.0578, "step": 4173 }, { "epoch": 7.939134569662387, "grad_norm": 0.34287476539611816, "learning_rate": 7.352175293744045e-05, "loss": 0.0692, "step": 4174 }, { "epoch": 7.941036614360438, "grad_norm": 0.23349088430404663, "learning_rate": 7.351540171483012e-05, "loss": 0.0706, "step": 4175 }, { "epoch": 7.942938659058488, "grad_norm": 0.33331581950187683, "learning_rate": 7.350905049221976e-05, "loss": 0.0788, "step": 4176 }, { "epoch": 7.9448407037565385, "grad_norm": 0.2772316038608551, "learning_rate": 7.35026992696094e-05, "loss": 0.0766, "step": 4177 }, { "epoch": 7.9467427484545885, "grad_norm": 0.23243772983551025, "learning_rate": 7.349634804699905e-05, "loss": 0.0588, "step": 4178 }, { "epoch": 7.948644793152639, "grad_norm": 0.2223922461271286, "learning_rate": 7.34899968243887e-05, "loss": 0.051, "step": 4179 }, { "epoch": 7.950546837850689, "grad_norm": 0.29967790842056274, "learning_rate": 7.348364560177835e-05, "loss": 0.0754, "step": 4180 }, { "epoch": 7.95244888254874, "grad_norm": 0.317216694355011, "learning_rate": 7.347729437916799e-05, "loss": 0.0873, "step": 4181 }, { "epoch": 7.95435092724679, "grad_norm": 0.28003180027008057, "learning_rate": 7.347094315655764e-05, "loss": 0.0795, "step": 4182 }, { "epoch": 7.95625297194484, "grad_norm": 0.38882380723953247, "learning_rate": 7.346459193394729e-05, "loss": 0.0874, "step": 4183 }, { "epoch": 7.958155016642891, "grad_norm": 0.21491193771362305, "learning_rate": 7.345824071133693e-05, "loss": 0.0829, "step": 4184 }, { "epoch": 7.960057061340941, "grad_norm": 0.2690449655056, "learning_rate": 7.345188948872658e-05, "loss": 0.0613, "step": 4185 }, { "epoch": 7.961959106038992, "grad_norm": 0.27885550260543823, "learning_rate": 7.344553826611623e-05, "loss": 0.0692, "step": 4186 }, { "epoch": 7.963861150737042, "grad_norm": 0.20747528970241547, "learning_rate": 7.343918704350587e-05, "loss": 0.069, "step": 4187 }, { "epoch": 7.965763195435093, "grad_norm": 0.33041733503341675, "learning_rate": 7.343283582089552e-05, "loss": 0.099, "step": 4188 }, { "epoch": 7.967665240133143, "grad_norm": 0.2511996626853943, "learning_rate": 7.342648459828518e-05, "loss": 0.0775, "step": 4189 }, { "epoch": 7.969567284831194, "grad_norm": 0.2743570804595947, "learning_rate": 7.342013337567481e-05, "loss": 0.0877, "step": 4190 }, { "epoch": 7.971469329529244, "grad_norm": 0.2678762972354889, "learning_rate": 7.341378215306447e-05, "loss": 0.0848, "step": 4191 }, { "epoch": 7.973371374227295, "grad_norm": 0.23124247789382935, "learning_rate": 7.340743093045412e-05, "loss": 0.0725, "step": 4192 }, { "epoch": 7.975273418925345, "grad_norm": 0.2571721374988556, "learning_rate": 7.340107970784377e-05, "loss": 0.0806, "step": 4193 }, { "epoch": 7.977175463623395, "grad_norm": 0.2708531618118286, "learning_rate": 7.339472848523341e-05, "loss": 0.073, "step": 4194 }, { "epoch": 7.979077508321446, "grad_norm": 0.17527925968170166, "learning_rate": 7.338837726262305e-05, "loss": 0.0977, "step": 4195 }, { "epoch": 7.980979553019496, "grad_norm": 0.23408342897891998, "learning_rate": 7.338202604001271e-05, "loss": 0.0769, "step": 4196 }, { "epoch": 7.9828815977175465, "grad_norm": 0.27631494402885437, "learning_rate": 7.337567481740235e-05, "loss": 0.0878, "step": 4197 }, { "epoch": 7.9847836424155965, "grad_norm": 0.28769686818122864, "learning_rate": 7.3369323594792e-05, "loss": 0.0979, "step": 4198 }, { "epoch": 7.986685687113647, "grad_norm": 0.2594468593597412, "learning_rate": 7.336297237218165e-05, "loss": 0.0829, "step": 4199 }, { "epoch": 7.988587731811697, "grad_norm": 0.46053510904312134, "learning_rate": 7.335662114957129e-05, "loss": 0.0974, "step": 4200 }, { "epoch": 7.990489776509748, "grad_norm": 0.29801467061042786, "learning_rate": 7.335026992696094e-05, "loss": 0.0948, "step": 4201 }, { "epoch": 7.992391821207798, "grad_norm": 0.374401330947876, "learning_rate": 7.334391870435058e-05, "loss": 0.1119, "step": 4202 }, { "epoch": 7.994293865905849, "grad_norm": 0.3322446644306183, "learning_rate": 7.333756748174025e-05, "loss": 0.0921, "step": 4203 }, { "epoch": 7.996195910603899, "grad_norm": 0.3914404511451721, "learning_rate": 7.333121625912989e-05, "loss": 0.0879, "step": 4204 }, { "epoch": 7.998097955301949, "grad_norm": 0.3230751156806946, "learning_rate": 7.332486503651952e-05, "loss": 0.0785, "step": 4205 }, { "epoch": 8.0, "grad_norm": 0.3116169571876526, "learning_rate": 7.331851381390919e-05, "loss": 0.1309, "step": 4206 }, { "epoch": 8.00190204469805, "grad_norm": 0.1682499498128891, "learning_rate": 7.331216259129883e-05, "loss": 0.0519, "step": 4207 }, { "epoch": 8.0038040893961, "grad_norm": 0.31888794898986816, "learning_rate": 7.330581136868847e-05, "loss": 0.0693, "step": 4208 }, { "epoch": 8.00570613409415, "grad_norm": 0.1675415188074112, "learning_rate": 7.329946014607812e-05, "loss": 0.049, "step": 4209 }, { "epoch": 8.007608178792202, "grad_norm": 0.2735571265220642, "learning_rate": 7.329310892346777e-05, "loss": 0.0682, "step": 4210 }, { "epoch": 8.009510223490253, "grad_norm": 0.2755778133869171, "learning_rate": 7.328675770085742e-05, "loss": 0.072, "step": 4211 }, { "epoch": 8.011412268188302, "grad_norm": 0.2477390468120575, "learning_rate": 7.328040647824706e-05, "loss": 0.0613, "step": 4212 }, { "epoch": 8.013314312886353, "grad_norm": 0.21356599032878876, "learning_rate": 7.327405525563671e-05, "loss": 0.0637, "step": 4213 }, { "epoch": 8.015216357584404, "grad_norm": 0.2848338782787323, "learning_rate": 7.326770403302636e-05, "loss": 0.0742, "step": 4214 }, { "epoch": 8.017118402282454, "grad_norm": 0.30361059308052063, "learning_rate": 7.3261352810416e-05, "loss": 0.0722, "step": 4215 }, { "epoch": 8.019020446980504, "grad_norm": 0.2148815244436264, "learning_rate": 7.325500158780565e-05, "loss": 0.072, "step": 4216 }, { "epoch": 8.020922491678554, "grad_norm": 0.3315644860267639, "learning_rate": 7.32486503651953e-05, "loss": 0.1286, "step": 4217 }, { "epoch": 8.022824536376605, "grad_norm": 0.30502501130104065, "learning_rate": 7.324229914258494e-05, "loss": 0.0857, "step": 4218 }, { "epoch": 8.024726581074654, "grad_norm": 0.32918721437454224, "learning_rate": 7.32359479199746e-05, "loss": 0.0695, "step": 4219 }, { "epoch": 8.026628625772705, "grad_norm": 0.19223280251026154, "learning_rate": 7.322959669736425e-05, "loss": 0.0594, "step": 4220 }, { "epoch": 8.028530670470756, "grad_norm": 0.3204280138015747, "learning_rate": 7.32232454747539e-05, "loss": 0.0752, "step": 4221 }, { "epoch": 8.030432715168807, "grad_norm": 0.27245232462882996, "learning_rate": 7.321689425214354e-05, "loss": 0.0617, "step": 4222 }, { "epoch": 8.032334759866856, "grad_norm": 0.26873305439949036, "learning_rate": 7.321054302953319e-05, "loss": 0.1057, "step": 4223 }, { "epoch": 8.034236804564907, "grad_norm": 0.36406102776527405, "learning_rate": 7.320419180692284e-05, "loss": 0.0583, "step": 4224 }, { "epoch": 8.036138849262958, "grad_norm": 0.25883886218070984, "learning_rate": 7.319784058431248e-05, "loss": 0.0634, "step": 4225 }, { "epoch": 8.038040893961009, "grad_norm": 0.30319416522979736, "learning_rate": 7.319148936170213e-05, "loss": 0.0706, "step": 4226 }, { "epoch": 8.039942938659058, "grad_norm": 0.3315434753894806, "learning_rate": 7.318513813909178e-05, "loss": 0.0611, "step": 4227 }, { "epoch": 8.041844983357109, "grad_norm": 0.22568808495998383, "learning_rate": 7.317878691648142e-05, "loss": 0.0847, "step": 4228 }, { "epoch": 8.04374702805516, "grad_norm": 0.3340080976486206, "learning_rate": 7.317243569387107e-05, "loss": 0.0767, "step": 4229 }, { "epoch": 8.045649072753209, "grad_norm": 0.20436561107635498, "learning_rate": 7.316608447126073e-05, "loss": 0.0849, "step": 4230 }, { "epoch": 8.04755111745126, "grad_norm": 0.2909284830093384, "learning_rate": 7.315973324865036e-05, "loss": 0.055, "step": 4231 }, { "epoch": 8.04945316214931, "grad_norm": 0.24360747635364532, "learning_rate": 7.315338202604002e-05, "loss": 0.0852, "step": 4232 }, { "epoch": 8.051355206847362, "grad_norm": 0.26607367396354675, "learning_rate": 7.314703080342967e-05, "loss": 0.0739, "step": 4233 }, { "epoch": 8.05325725154541, "grad_norm": 0.3718374967575073, "learning_rate": 7.314067958081932e-05, "loss": 0.074, "step": 4234 }, { "epoch": 8.055159296243461, "grad_norm": 0.2629009485244751, "learning_rate": 7.313432835820896e-05, "loss": 0.0966, "step": 4235 }, { "epoch": 8.057061340941512, "grad_norm": 0.24625834822654724, "learning_rate": 7.31279771355986e-05, "loss": 0.0733, "step": 4236 }, { "epoch": 8.058963385639563, "grad_norm": 0.2897358238697052, "learning_rate": 7.312162591298826e-05, "loss": 0.0497, "step": 4237 }, { "epoch": 8.060865430337612, "grad_norm": 0.23193830251693726, "learning_rate": 7.31152746903779e-05, "loss": 0.0612, "step": 4238 }, { "epoch": 8.062767475035663, "grad_norm": 0.22554795444011688, "learning_rate": 7.310892346776755e-05, "loss": 0.0819, "step": 4239 }, { "epoch": 8.064669519733714, "grad_norm": 0.2712651491165161, "learning_rate": 7.31025722451572e-05, "loss": 0.079, "step": 4240 }, { "epoch": 8.066571564431765, "grad_norm": 0.2655814290046692, "learning_rate": 7.309622102254684e-05, "loss": 0.0824, "step": 4241 }, { "epoch": 8.068473609129814, "grad_norm": 0.29547134041786194, "learning_rate": 7.30898697999365e-05, "loss": 0.0776, "step": 4242 }, { "epoch": 8.070375653827865, "grad_norm": 0.19004866480827332, "learning_rate": 7.308351857732613e-05, "loss": 0.0631, "step": 4243 }, { "epoch": 8.072277698525916, "grad_norm": 0.2522698640823364, "learning_rate": 7.307716735471578e-05, "loss": 0.0882, "step": 4244 }, { "epoch": 8.074179743223965, "grad_norm": 0.1891438066959381, "learning_rate": 7.307081613210544e-05, "loss": 0.0744, "step": 4245 }, { "epoch": 8.076081787922016, "grad_norm": 0.18746241927146912, "learning_rate": 7.306446490949507e-05, "loss": 0.0534, "step": 4246 }, { "epoch": 8.077983832620067, "grad_norm": 0.2318631261587143, "learning_rate": 7.305811368688473e-05, "loss": 0.0749, "step": 4247 }, { "epoch": 8.079885877318118, "grad_norm": 0.2980051040649414, "learning_rate": 7.305176246427438e-05, "loss": 0.0996, "step": 4248 }, { "epoch": 8.081787922016167, "grad_norm": 0.26681259274482727, "learning_rate": 7.304541124166402e-05, "loss": 0.0759, "step": 4249 }, { "epoch": 8.083689966714218, "grad_norm": 0.27438244223594666, "learning_rate": 7.303906001905367e-05, "loss": 0.1157, "step": 4250 }, { "epoch": 8.085592011412269, "grad_norm": 0.3032076060771942, "learning_rate": 7.303270879644332e-05, "loss": 0.0696, "step": 4251 }, { "epoch": 8.08749405611032, "grad_norm": 0.23937378823757172, "learning_rate": 7.302635757383297e-05, "loss": 0.0784, "step": 4252 }, { "epoch": 8.089396100808369, "grad_norm": 0.24228748679161072, "learning_rate": 7.302000635122261e-05, "loss": 0.0677, "step": 4253 }, { "epoch": 8.09129814550642, "grad_norm": 0.14060363173484802, "learning_rate": 7.301365512861226e-05, "loss": 0.0618, "step": 4254 }, { "epoch": 8.09320019020447, "grad_norm": 0.23839721083641052, "learning_rate": 7.300730390600191e-05, "loss": 0.0686, "step": 4255 }, { "epoch": 8.09510223490252, "grad_norm": 0.41366299986839294, "learning_rate": 7.300095268339155e-05, "loss": 0.0925, "step": 4256 }, { "epoch": 8.09700427960057, "grad_norm": 0.2656678855419159, "learning_rate": 7.29946014607812e-05, "loss": 0.0579, "step": 4257 }, { "epoch": 8.098906324298621, "grad_norm": 0.25097447633743286, "learning_rate": 7.298825023817086e-05, "loss": 0.0733, "step": 4258 }, { "epoch": 8.100808368996672, "grad_norm": 0.30444836616516113, "learning_rate": 7.29818990155605e-05, "loss": 0.0576, "step": 4259 }, { "epoch": 8.102710413694721, "grad_norm": 0.2674120366573334, "learning_rate": 7.297554779295015e-05, "loss": 0.0662, "step": 4260 }, { "epoch": 8.104612458392772, "grad_norm": 0.30446937680244446, "learning_rate": 7.29691965703398e-05, "loss": 0.064, "step": 4261 }, { "epoch": 8.106514503090823, "grad_norm": 0.20771685242652893, "learning_rate": 7.296284534772944e-05, "loss": 0.0689, "step": 4262 }, { "epoch": 8.108416547788874, "grad_norm": 0.30618518590927124, "learning_rate": 7.295649412511909e-05, "loss": 0.0701, "step": 4263 }, { "epoch": 8.110318592486923, "grad_norm": 0.22746066749095917, "learning_rate": 7.295014290250874e-05, "loss": 0.0888, "step": 4264 }, { "epoch": 8.112220637184974, "grad_norm": 0.19552601873874664, "learning_rate": 7.294379167989839e-05, "loss": 0.0638, "step": 4265 }, { "epoch": 8.114122681883025, "grad_norm": 0.2784650921821594, "learning_rate": 7.293744045728803e-05, "loss": 0.0715, "step": 4266 }, { "epoch": 8.116024726581074, "grad_norm": 0.28357699513435364, "learning_rate": 7.293108923467767e-05, "loss": 0.0779, "step": 4267 }, { "epoch": 8.117926771279125, "grad_norm": 0.3271869421005249, "learning_rate": 7.292473801206733e-05, "loss": 0.0788, "step": 4268 }, { "epoch": 8.119828815977176, "grad_norm": 0.29517510533332825, "learning_rate": 7.291838678945697e-05, "loss": 0.0851, "step": 4269 }, { "epoch": 8.121730860675227, "grad_norm": 0.1606910526752472, "learning_rate": 7.291203556684662e-05, "loss": 0.0703, "step": 4270 }, { "epoch": 8.123632905373276, "grad_norm": 0.2820865213871002, "learning_rate": 7.290568434423628e-05, "loss": 0.0785, "step": 4271 }, { "epoch": 8.125534950071327, "grad_norm": 0.2265879064798355, "learning_rate": 7.289933312162591e-05, "loss": 0.0593, "step": 4272 }, { "epoch": 8.127436994769377, "grad_norm": 0.25888168811798096, "learning_rate": 7.289298189901557e-05, "loss": 0.08, "step": 4273 }, { "epoch": 8.129339039467428, "grad_norm": 0.3094843924045563, "learning_rate": 7.28866306764052e-05, "loss": 0.0724, "step": 4274 }, { "epoch": 8.131241084165477, "grad_norm": 0.29158642888069153, "learning_rate": 7.288027945379487e-05, "loss": 0.0704, "step": 4275 }, { "epoch": 8.133143128863528, "grad_norm": 0.4067235291004181, "learning_rate": 7.287392823118451e-05, "loss": 0.0848, "step": 4276 }, { "epoch": 8.13504517356158, "grad_norm": 0.31271129846572876, "learning_rate": 7.286757700857415e-05, "loss": 0.0753, "step": 4277 }, { "epoch": 8.136947218259628, "grad_norm": 0.2223985642194748, "learning_rate": 7.286122578596381e-05, "loss": 0.0619, "step": 4278 }, { "epoch": 8.13884926295768, "grad_norm": 0.19151456654071808, "learning_rate": 7.285487456335345e-05, "loss": 0.0774, "step": 4279 }, { "epoch": 8.14075130765573, "grad_norm": 0.2343282848596573, "learning_rate": 7.284852334074309e-05, "loss": 0.0662, "step": 4280 }, { "epoch": 8.142653352353781, "grad_norm": 0.22527043521404266, "learning_rate": 7.284217211813274e-05, "loss": 0.0779, "step": 4281 }, { "epoch": 8.14455539705183, "grad_norm": 0.13312441110610962, "learning_rate": 7.283582089552239e-05, "loss": 0.0523, "step": 4282 }, { "epoch": 8.146457441749881, "grad_norm": 0.18979710340499878, "learning_rate": 7.282946967291204e-05, "loss": 0.0811, "step": 4283 }, { "epoch": 8.148359486447932, "grad_norm": 0.31543537974357605, "learning_rate": 7.282311845030168e-05, "loss": 0.0756, "step": 4284 }, { "epoch": 8.150261531145983, "grad_norm": 0.23982515931129456, "learning_rate": 7.281676722769133e-05, "loss": 0.0726, "step": 4285 }, { "epoch": 8.152163575844032, "grad_norm": 0.26044005155563354, "learning_rate": 7.281041600508099e-05, "loss": 0.0571, "step": 4286 }, { "epoch": 8.154065620542083, "grad_norm": 0.26153257489204407, "learning_rate": 7.280406478247062e-05, "loss": 0.0729, "step": 4287 }, { "epoch": 8.155967665240134, "grad_norm": 0.3593063950538635, "learning_rate": 7.279771355986028e-05, "loss": 0.1001, "step": 4288 }, { "epoch": 8.157869709938183, "grad_norm": 0.2667100429534912, "learning_rate": 7.279136233724993e-05, "loss": 0.0642, "step": 4289 }, { "epoch": 8.159771754636234, "grad_norm": 0.2793238162994385, "learning_rate": 7.278501111463957e-05, "loss": 0.0629, "step": 4290 }, { "epoch": 8.161673799334284, "grad_norm": 0.20465384423732758, "learning_rate": 7.277865989202922e-05, "loss": 0.0547, "step": 4291 }, { "epoch": 8.163575844032335, "grad_norm": 0.19889406859874725, "learning_rate": 7.277230866941887e-05, "loss": 0.0595, "step": 4292 }, { "epoch": 8.165477888730384, "grad_norm": 0.3476722836494446, "learning_rate": 7.276595744680852e-05, "loss": 0.0899, "step": 4293 }, { "epoch": 8.167379933428435, "grad_norm": 0.3568250834941864, "learning_rate": 7.275960622419816e-05, "loss": 0.0834, "step": 4294 }, { "epoch": 8.169281978126486, "grad_norm": 0.37351176142692566, "learning_rate": 7.275325500158781e-05, "loss": 0.0731, "step": 4295 }, { "epoch": 8.171184022824537, "grad_norm": 0.2659788429737091, "learning_rate": 7.274690377897746e-05, "loss": 0.0664, "step": 4296 }, { "epoch": 8.173086067522586, "grad_norm": 0.29493045806884766, "learning_rate": 7.27405525563671e-05, "loss": 0.0903, "step": 4297 }, { "epoch": 8.174988112220637, "grad_norm": 0.23891212046146393, "learning_rate": 7.273420133375674e-05, "loss": 0.0754, "step": 4298 }, { "epoch": 8.176890156918688, "grad_norm": 0.3760624825954437, "learning_rate": 7.27278501111464e-05, "loss": 0.0863, "step": 4299 }, { "epoch": 8.178792201616737, "grad_norm": 0.23999914526939392, "learning_rate": 7.272149888853604e-05, "loss": 0.0622, "step": 4300 }, { "epoch": 8.180694246314788, "grad_norm": 0.3227231204509735, "learning_rate": 7.27151476659257e-05, "loss": 0.0695, "step": 4301 }, { "epoch": 8.182596291012839, "grad_norm": 0.20113304257392883, "learning_rate": 7.270879644331535e-05, "loss": 0.0687, "step": 4302 }, { "epoch": 8.18449833571089, "grad_norm": 0.31149378418922424, "learning_rate": 7.270244522070499e-05, "loss": 0.069, "step": 4303 }, { "epoch": 8.186400380408939, "grad_norm": 0.2091590166091919, "learning_rate": 7.269609399809464e-05, "loss": 0.0623, "step": 4304 }, { "epoch": 8.18830242510699, "grad_norm": 0.1678960770368576, "learning_rate": 7.268974277548428e-05, "loss": 0.0634, "step": 4305 }, { "epoch": 8.19020446980504, "grad_norm": 0.1816246211528778, "learning_rate": 7.268339155287394e-05, "loss": 0.0619, "step": 4306 }, { "epoch": 8.192106514503092, "grad_norm": 0.21957528591156006, "learning_rate": 7.267704033026358e-05, "loss": 0.0648, "step": 4307 }, { "epoch": 8.19400855920114, "grad_norm": 0.24338406324386597, "learning_rate": 7.267068910765322e-05, "loss": 0.0674, "step": 4308 }, { "epoch": 8.195910603899192, "grad_norm": 0.29942211508750916, "learning_rate": 7.266433788504288e-05, "loss": 0.0993, "step": 4309 }, { "epoch": 8.197812648597242, "grad_norm": 0.3957151770591736, "learning_rate": 7.265798666243252e-05, "loss": 0.0981, "step": 4310 }, { "epoch": 8.199714693295292, "grad_norm": 0.5199161767959595, "learning_rate": 7.265163543982217e-05, "loss": 0.0858, "step": 4311 }, { "epoch": 8.201616737993342, "grad_norm": 0.3623242974281311, "learning_rate": 7.264528421721181e-05, "loss": 0.0801, "step": 4312 }, { "epoch": 8.203518782691393, "grad_norm": 0.29007983207702637, "learning_rate": 7.263893299460146e-05, "loss": 0.0754, "step": 4313 }, { "epoch": 8.205420827389444, "grad_norm": 0.21117328107357025, "learning_rate": 7.263258177199111e-05, "loss": 0.0665, "step": 4314 }, { "epoch": 8.207322872087493, "grad_norm": 0.3226154148578644, "learning_rate": 7.262623054938075e-05, "loss": 0.0776, "step": 4315 }, { "epoch": 8.209224916785544, "grad_norm": 0.23427355289459229, "learning_rate": 7.26198793267704e-05, "loss": 0.0711, "step": 4316 }, { "epoch": 8.211126961483595, "grad_norm": 0.26657065749168396, "learning_rate": 7.261352810416006e-05, "loss": 0.0693, "step": 4317 }, { "epoch": 8.213029006181646, "grad_norm": 0.2999359965324402, "learning_rate": 7.26071768815497e-05, "loss": 0.0762, "step": 4318 }, { "epoch": 8.214931050879695, "grad_norm": 0.2279568612575531, "learning_rate": 7.260082565893935e-05, "loss": 0.0802, "step": 4319 }, { "epoch": 8.216833095577746, "grad_norm": 0.17314577102661133, "learning_rate": 7.2594474436329e-05, "loss": 0.0658, "step": 4320 }, { "epoch": 8.218735140275797, "grad_norm": 0.3194338381290436, "learning_rate": 7.258812321371864e-05, "loss": 0.0891, "step": 4321 }, { "epoch": 8.220637184973846, "grad_norm": 0.32515963912010193, "learning_rate": 7.258177199110829e-05, "loss": 0.0813, "step": 4322 }, { "epoch": 8.222539229671897, "grad_norm": 0.25820595026016235, "learning_rate": 7.257542076849794e-05, "loss": 0.0851, "step": 4323 }, { "epoch": 8.224441274369948, "grad_norm": 0.27671653032302856, "learning_rate": 7.256906954588759e-05, "loss": 0.075, "step": 4324 }, { "epoch": 8.226343319067999, "grad_norm": 0.3091162145137787, "learning_rate": 7.256271832327723e-05, "loss": 0.0876, "step": 4325 }, { "epoch": 8.228245363766048, "grad_norm": 0.2991148829460144, "learning_rate": 7.255636710066688e-05, "loss": 0.0826, "step": 4326 }, { "epoch": 8.230147408464099, "grad_norm": 0.2437266856431961, "learning_rate": 7.255001587805653e-05, "loss": 0.0801, "step": 4327 }, { "epoch": 8.23204945316215, "grad_norm": 0.3508904278278351, "learning_rate": 7.254366465544617e-05, "loss": 0.1039, "step": 4328 }, { "epoch": 8.2339514978602, "grad_norm": 0.21462726593017578, "learning_rate": 7.253731343283582e-05, "loss": 0.0528, "step": 4329 }, { "epoch": 8.23585354255825, "grad_norm": 0.24533016979694366, "learning_rate": 7.253096221022548e-05, "loss": 0.1122, "step": 4330 }, { "epoch": 8.2377555872563, "grad_norm": 0.36849719285964966, "learning_rate": 7.252461098761511e-05, "loss": 0.1083, "step": 4331 }, { "epoch": 8.239657631954351, "grad_norm": 0.1650083214044571, "learning_rate": 7.251825976500477e-05, "loss": 0.0499, "step": 4332 }, { "epoch": 8.241559676652402, "grad_norm": 0.27161771059036255, "learning_rate": 7.251190854239442e-05, "loss": 0.0839, "step": 4333 }, { "epoch": 8.243461721350451, "grad_norm": 0.43029820919036865, "learning_rate": 7.250555731978406e-05, "loss": 0.099, "step": 4334 }, { "epoch": 8.245363766048502, "grad_norm": 0.19337327778339386, "learning_rate": 7.249920609717371e-05, "loss": 0.0653, "step": 4335 }, { "epoch": 8.247265810746553, "grad_norm": 0.24718555808067322, "learning_rate": 7.249285487456336e-05, "loss": 0.0796, "step": 4336 }, { "epoch": 8.249167855444602, "grad_norm": 0.20396992564201355, "learning_rate": 7.248650365195301e-05, "loss": 0.0651, "step": 4337 }, { "epoch": 8.251069900142653, "grad_norm": 0.3806256055831909, "learning_rate": 7.248015242934265e-05, "loss": 0.0906, "step": 4338 }, { "epoch": 8.252971944840704, "grad_norm": 0.2927926778793335, "learning_rate": 7.247380120673229e-05, "loss": 0.0719, "step": 4339 }, { "epoch": 8.254873989538755, "grad_norm": 0.2693699300289154, "learning_rate": 7.246744998412195e-05, "loss": 0.0835, "step": 4340 }, { "epoch": 8.256776034236804, "grad_norm": 0.22693417966365814, "learning_rate": 7.246109876151159e-05, "loss": 0.0642, "step": 4341 }, { "epoch": 8.258678078934855, "grad_norm": 0.25865957140922546, "learning_rate": 7.245474753890124e-05, "loss": 0.0855, "step": 4342 }, { "epoch": 8.260580123632906, "grad_norm": 0.21432361006736755, "learning_rate": 7.24483963162909e-05, "loss": 0.0497, "step": 4343 }, { "epoch": 8.262482168330957, "grad_norm": 0.21584494411945343, "learning_rate": 7.244204509368053e-05, "loss": 0.0741, "step": 4344 }, { "epoch": 8.264384213029006, "grad_norm": 0.26091018319129944, "learning_rate": 7.243569387107019e-05, "loss": 0.0681, "step": 4345 }, { "epoch": 8.266286257727057, "grad_norm": 0.1850460171699524, "learning_rate": 7.242934264845982e-05, "loss": 0.075, "step": 4346 }, { "epoch": 8.268188302425107, "grad_norm": 0.21847312152385712, "learning_rate": 7.242299142584949e-05, "loss": 0.0798, "step": 4347 }, { "epoch": 8.270090347123157, "grad_norm": 0.1917494237422943, "learning_rate": 7.241664020323913e-05, "loss": 0.0556, "step": 4348 }, { "epoch": 8.271992391821207, "grad_norm": 0.320363849401474, "learning_rate": 7.241028898062877e-05, "loss": 0.0649, "step": 4349 }, { "epoch": 8.273894436519258, "grad_norm": 0.2682509422302246, "learning_rate": 7.240393775801843e-05, "loss": 0.0865, "step": 4350 }, { "epoch": 8.27579648121731, "grad_norm": 0.2643677294254303, "learning_rate": 7.239758653540807e-05, "loss": 0.0794, "step": 4351 }, { "epoch": 8.277698525915358, "grad_norm": 0.21975865960121155, "learning_rate": 7.239123531279771e-05, "loss": 0.0756, "step": 4352 }, { "epoch": 8.27960057061341, "grad_norm": 0.4391152560710907, "learning_rate": 7.238488409018736e-05, "loss": 0.0836, "step": 4353 }, { "epoch": 8.28150261531146, "grad_norm": 0.2395821362733841, "learning_rate": 7.237853286757701e-05, "loss": 0.0619, "step": 4354 }, { "epoch": 8.283404660009511, "grad_norm": 0.24098101258277893, "learning_rate": 7.237218164496666e-05, "loss": 0.0827, "step": 4355 }, { "epoch": 8.28530670470756, "grad_norm": 0.2938069701194763, "learning_rate": 7.23658304223563e-05, "loss": 0.0726, "step": 4356 }, { "epoch": 8.287208749405611, "grad_norm": 0.24898219108581543, "learning_rate": 7.235947919974595e-05, "loss": 0.0709, "step": 4357 }, { "epoch": 8.289110794103662, "grad_norm": 0.361019104719162, "learning_rate": 7.23531279771356e-05, "loss": 0.0702, "step": 4358 }, { "epoch": 8.291012838801711, "grad_norm": 0.2318553328514099, "learning_rate": 7.234677675452524e-05, "loss": 0.0546, "step": 4359 }, { "epoch": 8.292914883499762, "grad_norm": 0.23865951597690582, "learning_rate": 7.23404255319149e-05, "loss": 0.0737, "step": 4360 }, { "epoch": 8.294816928197813, "grad_norm": 0.21517156064510345, "learning_rate": 7.233407430930455e-05, "loss": 0.0627, "step": 4361 }, { "epoch": 8.296718972895864, "grad_norm": 0.30916860699653625, "learning_rate": 7.232772308669419e-05, "loss": 0.0637, "step": 4362 }, { "epoch": 8.298621017593913, "grad_norm": 0.16915148496627808, "learning_rate": 7.232137186408384e-05, "loss": 0.061, "step": 4363 }, { "epoch": 8.300523062291964, "grad_norm": 0.19052615761756897, "learning_rate": 7.231502064147349e-05, "loss": 0.0504, "step": 4364 }, { "epoch": 8.302425106990015, "grad_norm": 0.35525116324424744, "learning_rate": 7.230866941886314e-05, "loss": 0.0802, "step": 4365 }, { "epoch": 8.304327151688065, "grad_norm": 0.2397274225950241, "learning_rate": 7.230231819625278e-05, "loss": 0.088, "step": 4366 }, { "epoch": 8.306229196386115, "grad_norm": 0.30947449803352356, "learning_rate": 7.229596697364243e-05, "loss": 0.0997, "step": 4367 }, { "epoch": 8.308131241084165, "grad_norm": 0.3109225630760193, "learning_rate": 7.228961575103208e-05, "loss": 0.0631, "step": 4368 }, { "epoch": 8.310033285782216, "grad_norm": 0.27324262261390686, "learning_rate": 7.228326452842172e-05, "loss": 0.0741, "step": 4369 }, { "epoch": 8.311935330480265, "grad_norm": 0.3617977201938629, "learning_rate": 7.227691330581136e-05, "loss": 0.0787, "step": 4370 }, { "epoch": 8.313837375178316, "grad_norm": 0.2727469205856323, "learning_rate": 7.227056208320103e-05, "loss": 0.0584, "step": 4371 }, { "epoch": 8.315739419876367, "grad_norm": 0.24662989377975464, "learning_rate": 7.226421086059066e-05, "loss": 0.0556, "step": 4372 }, { "epoch": 8.317641464574418, "grad_norm": 0.264032244682312, "learning_rate": 7.225785963798032e-05, "loss": 0.067, "step": 4373 }, { "epoch": 8.319543509272467, "grad_norm": 0.19273340702056885, "learning_rate": 7.225150841536997e-05, "loss": 0.0626, "step": 4374 }, { "epoch": 8.321445553970518, "grad_norm": 0.2576651871204376, "learning_rate": 7.22451571927596e-05, "loss": 0.0613, "step": 4375 }, { "epoch": 8.323347598668569, "grad_norm": 0.40740740299224854, "learning_rate": 7.223880597014926e-05, "loss": 0.0928, "step": 4376 }, { "epoch": 8.32524964336662, "grad_norm": 0.30207934975624084, "learning_rate": 7.22324547475389e-05, "loss": 0.0837, "step": 4377 }, { "epoch": 8.327151688064669, "grad_norm": 0.3514809012413025, "learning_rate": 7.222610352492856e-05, "loss": 0.1216, "step": 4378 }, { "epoch": 8.32905373276272, "grad_norm": 0.3978719711303711, "learning_rate": 7.22197523023182e-05, "loss": 0.0879, "step": 4379 }, { "epoch": 8.33095577746077, "grad_norm": 0.23223471641540527, "learning_rate": 7.221340107970784e-05, "loss": 0.0664, "step": 4380 }, { "epoch": 8.332857822158822, "grad_norm": 0.31868988275527954, "learning_rate": 7.22070498570975e-05, "loss": 0.0947, "step": 4381 }, { "epoch": 8.33475986685687, "grad_norm": 0.23482367396354675, "learning_rate": 7.220069863448714e-05, "loss": 0.0642, "step": 4382 }, { "epoch": 8.336661911554922, "grad_norm": 0.2527036964893341, "learning_rate": 7.21943474118768e-05, "loss": 0.0639, "step": 4383 }, { "epoch": 8.338563956252973, "grad_norm": 0.3074730932712555, "learning_rate": 7.218799618926643e-05, "loss": 0.0575, "step": 4384 }, { "epoch": 8.340466000951022, "grad_norm": 0.253987193107605, "learning_rate": 7.218164496665608e-05, "loss": 0.0787, "step": 4385 }, { "epoch": 8.342368045649073, "grad_norm": 0.19843977689743042, "learning_rate": 7.217529374404574e-05, "loss": 0.0733, "step": 4386 }, { "epoch": 8.344270090347123, "grad_norm": 0.3194156587123871, "learning_rate": 7.216894252143537e-05, "loss": 0.0811, "step": 4387 }, { "epoch": 8.346172135045174, "grad_norm": 0.3143181800842285, "learning_rate": 7.216259129882503e-05, "loss": 0.0753, "step": 4388 }, { "epoch": 8.348074179743223, "grad_norm": 0.24854514002799988, "learning_rate": 7.215624007621468e-05, "loss": 0.0545, "step": 4389 }, { "epoch": 8.349976224441274, "grad_norm": 0.30429551005363464, "learning_rate": 7.214988885360432e-05, "loss": 0.0629, "step": 4390 }, { "epoch": 8.351878269139325, "grad_norm": 0.23647554218769073, "learning_rate": 7.214353763099397e-05, "loss": 0.0614, "step": 4391 }, { "epoch": 8.353780313837376, "grad_norm": 0.2253941148519516, "learning_rate": 7.213718640838362e-05, "loss": 0.0715, "step": 4392 }, { "epoch": 8.355682358535425, "grad_norm": 0.24400204420089722, "learning_rate": 7.213083518577326e-05, "loss": 0.074, "step": 4393 }, { "epoch": 8.357584403233476, "grad_norm": 0.27485939860343933, "learning_rate": 7.212448396316291e-05, "loss": 0.075, "step": 4394 }, { "epoch": 8.359486447931527, "grad_norm": 0.25887802243232727, "learning_rate": 7.211813274055256e-05, "loss": 0.073, "step": 4395 }, { "epoch": 8.361388492629576, "grad_norm": 0.25531402230262756, "learning_rate": 7.211178151794221e-05, "loss": 0.0914, "step": 4396 }, { "epoch": 8.363290537327627, "grad_norm": 0.3148556649684906, "learning_rate": 7.210543029533185e-05, "loss": 0.0757, "step": 4397 }, { "epoch": 8.365192582025678, "grad_norm": 0.19946114718914032, "learning_rate": 7.20990790727215e-05, "loss": 0.073, "step": 4398 }, { "epoch": 8.367094626723729, "grad_norm": 0.30881115794181824, "learning_rate": 7.209272785011116e-05, "loss": 0.0966, "step": 4399 }, { "epoch": 8.368996671421778, "grad_norm": 0.20002616941928864, "learning_rate": 7.20863766275008e-05, "loss": 0.0611, "step": 4400 }, { "epoch": 8.370898716119829, "grad_norm": 0.34020841121673584, "learning_rate": 7.208002540489045e-05, "loss": 0.0885, "step": 4401 }, { "epoch": 8.37280076081788, "grad_norm": 0.36568552255630493, "learning_rate": 7.20736741822801e-05, "loss": 0.0942, "step": 4402 }, { "epoch": 8.37470280551593, "grad_norm": 0.25258252024650574, "learning_rate": 7.206732295966974e-05, "loss": 0.0684, "step": 4403 }, { "epoch": 8.37660485021398, "grad_norm": 0.34854474663734436, "learning_rate": 7.206097173705939e-05, "loss": 0.0822, "step": 4404 }, { "epoch": 8.37850689491203, "grad_norm": 0.2970679700374603, "learning_rate": 7.205462051444904e-05, "loss": 0.0773, "step": 4405 }, { "epoch": 8.380408939610081, "grad_norm": 0.2980916500091553, "learning_rate": 7.204826929183868e-05, "loss": 0.0983, "step": 4406 }, { "epoch": 8.38231098430813, "grad_norm": 0.21473757922649384, "learning_rate": 7.204191806922833e-05, "loss": 0.0624, "step": 4407 }, { "epoch": 8.384213029006181, "grad_norm": 0.41438958048820496, "learning_rate": 7.203556684661797e-05, "loss": 0.0941, "step": 4408 }, { "epoch": 8.386115073704232, "grad_norm": 0.30517882108688354, "learning_rate": 7.202921562400763e-05, "loss": 0.0812, "step": 4409 }, { "epoch": 8.388017118402283, "grad_norm": 0.2721097469329834, "learning_rate": 7.202286440139727e-05, "loss": 0.0696, "step": 4410 }, { "epoch": 8.389919163100332, "grad_norm": 0.1893528401851654, "learning_rate": 7.201651317878691e-05, "loss": 0.0823, "step": 4411 }, { "epoch": 8.391821207798383, "grad_norm": 0.33133113384246826, "learning_rate": 7.201016195617658e-05, "loss": 0.0812, "step": 4412 }, { "epoch": 8.393723252496434, "grad_norm": 0.35746121406555176, "learning_rate": 7.200381073356621e-05, "loss": 0.093, "step": 4413 }, { "epoch": 8.395625297194485, "grad_norm": 0.20589201152324677, "learning_rate": 7.199745951095587e-05, "loss": 0.0832, "step": 4414 }, { "epoch": 8.397527341892534, "grad_norm": 0.29606151580810547, "learning_rate": 7.19911082883455e-05, "loss": 0.0726, "step": 4415 }, { "epoch": 8.399429386590585, "grad_norm": 0.17773225903511047, "learning_rate": 7.198475706573516e-05, "loss": 0.0609, "step": 4416 }, { "epoch": 8.401331431288636, "grad_norm": 0.2160770446062088, "learning_rate": 7.197840584312481e-05, "loss": 0.093, "step": 4417 }, { "epoch": 8.403233475986685, "grad_norm": 0.2273487150669098, "learning_rate": 7.197205462051445e-05, "loss": 0.0617, "step": 4418 }, { "epoch": 8.405135520684736, "grad_norm": 0.30558666586875916, "learning_rate": 7.196570339790411e-05, "loss": 0.0875, "step": 4419 }, { "epoch": 8.407037565382787, "grad_norm": 0.208307147026062, "learning_rate": 7.195935217529375e-05, "loss": 0.0832, "step": 4420 }, { "epoch": 8.408939610080838, "grad_norm": 0.2679693400859833, "learning_rate": 7.195300095268339e-05, "loss": 0.0654, "step": 4421 }, { "epoch": 8.410841654778887, "grad_norm": 0.2670406997203827, "learning_rate": 7.194664973007304e-05, "loss": 0.0612, "step": 4422 }, { "epoch": 8.412743699476938, "grad_norm": 0.21855837106704712, "learning_rate": 7.194029850746269e-05, "loss": 0.0712, "step": 4423 }, { "epoch": 8.414645744174988, "grad_norm": 0.2817660868167877, "learning_rate": 7.193394728485233e-05, "loss": 0.0824, "step": 4424 }, { "epoch": 8.41654778887304, "grad_norm": 0.26271113753318787, "learning_rate": 7.192759606224198e-05, "loss": 0.067, "step": 4425 }, { "epoch": 8.418449833571088, "grad_norm": 0.23378713428974152, "learning_rate": 7.192124483963163e-05, "loss": 0.0664, "step": 4426 }, { "epoch": 8.42035187826914, "grad_norm": 0.3081713616847992, "learning_rate": 7.191489361702129e-05, "loss": 0.0784, "step": 4427 }, { "epoch": 8.42225392296719, "grad_norm": 0.3694870173931122, "learning_rate": 7.190854239441092e-05, "loss": 0.0782, "step": 4428 }, { "epoch": 8.42415596766524, "grad_norm": 0.273703396320343, "learning_rate": 7.190219117180058e-05, "loss": 0.0975, "step": 4429 }, { "epoch": 8.42605801236329, "grad_norm": 0.21073737740516663, "learning_rate": 7.189583994919023e-05, "loss": 0.0639, "step": 4430 }, { "epoch": 8.427960057061341, "grad_norm": 0.273165225982666, "learning_rate": 7.188948872657987e-05, "loss": 0.0653, "step": 4431 }, { "epoch": 8.429862101759392, "grad_norm": 0.23675204813480377, "learning_rate": 7.188313750396952e-05, "loss": 0.0555, "step": 4432 }, { "epoch": 8.431764146457441, "grad_norm": 0.35497403144836426, "learning_rate": 7.187678628135917e-05, "loss": 0.0789, "step": 4433 }, { "epoch": 8.433666191155492, "grad_norm": 0.24616917967796326, "learning_rate": 7.187043505874881e-05, "loss": 0.0532, "step": 4434 }, { "epoch": 8.435568235853543, "grad_norm": 0.2543208599090576, "learning_rate": 7.186408383613846e-05, "loss": 0.0732, "step": 4435 }, { "epoch": 8.437470280551594, "grad_norm": 0.378913551568985, "learning_rate": 7.185773261352811e-05, "loss": 0.0916, "step": 4436 }, { "epoch": 8.439372325249643, "grad_norm": 0.19589996337890625, "learning_rate": 7.185138139091776e-05, "loss": 0.0573, "step": 4437 }, { "epoch": 8.441274369947694, "grad_norm": 0.27976134419441223, "learning_rate": 7.18450301683074e-05, "loss": 0.0723, "step": 4438 }, { "epoch": 8.443176414645745, "grad_norm": 0.1991090476512909, "learning_rate": 7.183867894569705e-05, "loss": 0.0618, "step": 4439 }, { "epoch": 8.445078459343794, "grad_norm": 0.18157024681568146, "learning_rate": 7.18323277230867e-05, "loss": 0.0744, "step": 4440 }, { "epoch": 8.446980504041845, "grad_norm": 0.2461085170507431, "learning_rate": 7.182597650047634e-05, "loss": 0.0622, "step": 4441 }, { "epoch": 8.448882548739896, "grad_norm": 0.35755425691604614, "learning_rate": 7.181962527786598e-05, "loss": 0.0822, "step": 4442 }, { "epoch": 8.450784593437946, "grad_norm": 0.2072787582874298, "learning_rate": 7.181327405525565e-05, "loss": 0.0664, "step": 4443 }, { "epoch": 8.452686638135996, "grad_norm": 0.3196927607059479, "learning_rate": 7.180692283264528e-05, "loss": 0.0717, "step": 4444 }, { "epoch": 8.454588682834046, "grad_norm": 0.23601149022579193, "learning_rate": 7.180057161003494e-05, "loss": 0.1213, "step": 4445 }, { "epoch": 8.456490727532097, "grad_norm": 0.18101097643375397, "learning_rate": 7.179422038742459e-05, "loss": 0.0645, "step": 4446 }, { "epoch": 8.458392772230148, "grad_norm": 0.29692402482032776, "learning_rate": 7.178786916481423e-05, "loss": 0.0667, "step": 4447 }, { "epoch": 8.460294816928197, "grad_norm": 0.32833683490753174, "learning_rate": 7.178151794220388e-05, "loss": 0.1109, "step": 4448 }, { "epoch": 8.462196861626248, "grad_norm": 0.4002103805541992, "learning_rate": 7.177516671959352e-05, "loss": 0.0804, "step": 4449 }, { "epoch": 8.464098906324299, "grad_norm": 0.40316230058670044, "learning_rate": 7.176881549698318e-05, "loss": 0.1148, "step": 4450 }, { "epoch": 8.466000951022348, "grad_norm": 0.24910256266593933, "learning_rate": 7.176246427437282e-05, "loss": 0.089, "step": 4451 }, { "epoch": 8.467902995720399, "grad_norm": 0.31366637349128723, "learning_rate": 7.175611305176246e-05, "loss": 0.0822, "step": 4452 }, { "epoch": 8.46980504041845, "grad_norm": 0.21720853447914124, "learning_rate": 7.174976182915212e-05, "loss": 0.0842, "step": 4453 }, { "epoch": 8.4717070851165, "grad_norm": 0.4722314476966858, "learning_rate": 7.174341060654176e-05, "loss": 0.0882, "step": 4454 }, { "epoch": 8.47360912981455, "grad_norm": 0.2937127649784088, "learning_rate": 7.173705938393141e-05, "loss": 0.0752, "step": 4455 }, { "epoch": 8.4755111745126, "grad_norm": 0.3475329577922821, "learning_rate": 7.173070816132105e-05, "loss": 0.0652, "step": 4456 }, { "epoch": 8.477413219210652, "grad_norm": 0.32552871108055115, "learning_rate": 7.17243569387107e-05, "loss": 0.0824, "step": 4457 }, { "epoch": 8.479315263908703, "grad_norm": 0.3245328664779663, "learning_rate": 7.171800571610036e-05, "loss": 0.078, "step": 4458 }, { "epoch": 8.481217308606752, "grad_norm": 0.3682286739349365, "learning_rate": 7.171165449349e-05, "loss": 0.0672, "step": 4459 }, { "epoch": 8.483119353304803, "grad_norm": 0.37250223755836487, "learning_rate": 7.170530327087965e-05, "loss": 0.073, "step": 4460 }, { "epoch": 8.485021398002853, "grad_norm": 0.3498341739177704, "learning_rate": 7.16989520482693e-05, "loss": 0.0817, "step": 4461 }, { "epoch": 8.486923442700903, "grad_norm": 0.2839718759059906, "learning_rate": 7.169260082565894e-05, "loss": 0.0652, "step": 4462 }, { "epoch": 8.488825487398953, "grad_norm": 0.2696433663368225, "learning_rate": 7.168624960304859e-05, "loss": 0.0693, "step": 4463 }, { "epoch": 8.490727532097004, "grad_norm": 0.3266785144805908, "learning_rate": 7.167989838043824e-05, "loss": 0.0927, "step": 4464 }, { "epoch": 8.492629576795055, "grad_norm": 0.275698184967041, "learning_rate": 7.167354715782788e-05, "loss": 0.068, "step": 4465 }, { "epoch": 8.494531621493104, "grad_norm": 0.24140353500843048, "learning_rate": 7.166719593521753e-05, "loss": 0.0543, "step": 4466 }, { "epoch": 8.496433666191155, "grad_norm": 0.2588673233985901, "learning_rate": 7.166084471260718e-05, "loss": 0.0983, "step": 4467 }, { "epoch": 8.498335710889206, "grad_norm": 0.2226792275905609, "learning_rate": 7.165449348999683e-05, "loss": 0.0728, "step": 4468 }, { "epoch": 8.500237755587257, "grad_norm": 0.2721460461616516, "learning_rate": 7.164814226738647e-05, "loss": 0.0807, "step": 4469 }, { "epoch": 8.502139800285306, "grad_norm": 0.2585650384426117, "learning_rate": 7.164179104477612e-05, "loss": 0.0973, "step": 4470 }, { "epoch": 8.504041844983357, "grad_norm": 0.20774824917316437, "learning_rate": 7.163543982216578e-05, "loss": 0.0598, "step": 4471 }, { "epoch": 8.505943889681408, "grad_norm": 0.22531108558177948, "learning_rate": 7.162908859955541e-05, "loss": 0.0658, "step": 4472 }, { "epoch": 8.507845934379457, "grad_norm": 0.23569051921367645, "learning_rate": 7.162273737694507e-05, "loss": 0.0778, "step": 4473 }, { "epoch": 8.509747979077508, "grad_norm": 0.28449130058288574, "learning_rate": 7.161638615433472e-05, "loss": 0.0868, "step": 4474 }, { "epoch": 8.511650023775559, "grad_norm": 0.2436315268278122, "learning_rate": 7.161003493172436e-05, "loss": 0.071, "step": 4475 }, { "epoch": 8.51355206847361, "grad_norm": 0.2228497564792633, "learning_rate": 7.160368370911401e-05, "loss": 0.0742, "step": 4476 }, { "epoch": 8.515454113171659, "grad_norm": 0.2390798032283783, "learning_rate": 7.159733248650366e-05, "loss": 0.0694, "step": 4477 }, { "epoch": 8.51735615786971, "grad_norm": 0.15328125655651093, "learning_rate": 7.15909812638933e-05, "loss": 0.0558, "step": 4478 }, { "epoch": 8.51925820256776, "grad_norm": 0.1791938692331314, "learning_rate": 7.158463004128295e-05, "loss": 0.0662, "step": 4479 }, { "epoch": 8.521160247265811, "grad_norm": 0.20867963135242462, "learning_rate": 7.157827881867259e-05, "loss": 0.0658, "step": 4480 }, { "epoch": 8.52306229196386, "grad_norm": 0.21594001352787018, "learning_rate": 7.157192759606225e-05, "loss": 0.053, "step": 4481 }, { "epoch": 8.524964336661911, "grad_norm": 0.18117885291576385, "learning_rate": 7.156557637345189e-05, "loss": 0.0497, "step": 4482 }, { "epoch": 8.526866381359962, "grad_norm": 0.3606829345226288, "learning_rate": 7.155922515084153e-05, "loss": 0.0831, "step": 4483 }, { "epoch": 8.528768426058011, "grad_norm": 0.26378872990608215, "learning_rate": 7.15528739282312e-05, "loss": 0.081, "step": 4484 }, { "epoch": 8.530670470756062, "grad_norm": 0.38574284315109253, "learning_rate": 7.154652270562083e-05, "loss": 0.0995, "step": 4485 }, { "epoch": 8.532572515454113, "grad_norm": 0.241069957613945, "learning_rate": 7.154017148301049e-05, "loss": 0.0672, "step": 4486 }, { "epoch": 8.534474560152164, "grad_norm": 0.32855287194252014, "learning_rate": 7.153382026040012e-05, "loss": 0.0887, "step": 4487 }, { "epoch": 8.536376604850213, "grad_norm": 0.2607368230819702, "learning_rate": 7.152746903778978e-05, "loss": 0.0835, "step": 4488 }, { "epoch": 8.538278649548264, "grad_norm": 0.23832017183303833, "learning_rate": 7.152111781517943e-05, "loss": 0.07, "step": 4489 }, { "epoch": 8.540180694246315, "grad_norm": 0.3213230073451996, "learning_rate": 7.151476659256907e-05, "loss": 0.0777, "step": 4490 }, { "epoch": 8.542082738944366, "grad_norm": 0.36647650599479675, "learning_rate": 7.150841536995873e-05, "loss": 0.0909, "step": 4491 }, { "epoch": 8.543984783642415, "grad_norm": 0.20226097106933594, "learning_rate": 7.150206414734837e-05, "loss": 0.0679, "step": 4492 }, { "epoch": 8.545886828340466, "grad_norm": 0.2921244502067566, "learning_rate": 7.149571292473801e-05, "loss": 0.0736, "step": 4493 }, { "epoch": 8.547788873038517, "grad_norm": 0.24473552405834198, "learning_rate": 7.148936170212766e-05, "loss": 0.0706, "step": 4494 }, { "epoch": 8.549690917736568, "grad_norm": 0.5814130306243896, "learning_rate": 7.148301047951731e-05, "loss": 0.1231, "step": 4495 }, { "epoch": 8.551592962434617, "grad_norm": 0.33063530921936035, "learning_rate": 7.147665925690695e-05, "loss": 0.0891, "step": 4496 }, { "epoch": 8.553495007132668, "grad_norm": 0.3366851508617401, "learning_rate": 7.14703080342966e-05, "loss": 0.0909, "step": 4497 }, { "epoch": 8.555397051830719, "grad_norm": 0.2970556914806366, "learning_rate": 7.146395681168625e-05, "loss": 0.0826, "step": 4498 }, { "epoch": 8.557299096528768, "grad_norm": 0.2414432168006897, "learning_rate": 7.14576055890759e-05, "loss": 0.0778, "step": 4499 }, { "epoch": 8.559201141226819, "grad_norm": 0.22268299758434296, "learning_rate": 7.145125436646554e-05, "loss": 0.0621, "step": 4500 }, { "epoch": 8.56110318592487, "grad_norm": 0.2911908030509949, "learning_rate": 7.14449031438552e-05, "loss": 0.058, "step": 4501 }, { "epoch": 8.56300523062292, "grad_norm": 0.17397868633270264, "learning_rate": 7.143855192124485e-05, "loss": 0.0554, "step": 4502 }, { "epoch": 8.56490727532097, "grad_norm": 0.2673262357711792, "learning_rate": 7.143220069863449e-05, "loss": 0.0591, "step": 4503 }, { "epoch": 8.56680932001902, "grad_norm": 0.29203251004219055, "learning_rate": 7.142584947602414e-05, "loss": 0.0919, "step": 4504 }, { "epoch": 8.568711364717071, "grad_norm": 0.22142189741134644, "learning_rate": 7.141949825341379e-05, "loss": 0.0674, "step": 4505 }, { "epoch": 8.570613409415122, "grad_norm": 0.34359076619148254, "learning_rate": 7.141314703080343e-05, "loss": 0.0677, "step": 4506 }, { "epoch": 8.572515454113171, "grad_norm": 0.23393453657627106, "learning_rate": 7.140679580819308e-05, "loss": 0.0712, "step": 4507 }, { "epoch": 8.574417498811222, "grad_norm": 0.2184855043888092, "learning_rate": 7.140044458558273e-05, "loss": 0.0737, "step": 4508 }, { "epoch": 8.576319543509273, "grad_norm": 0.26712971925735474, "learning_rate": 7.139409336297238e-05, "loss": 0.0863, "step": 4509 }, { "epoch": 8.578221588207322, "grad_norm": 0.22099913656711578, "learning_rate": 7.138774214036202e-05, "loss": 0.0679, "step": 4510 }, { "epoch": 8.580123632905373, "grad_norm": 0.389970600605011, "learning_rate": 7.138139091775167e-05, "loss": 0.0776, "step": 4511 }, { "epoch": 8.582025677603424, "grad_norm": 0.2818027138710022, "learning_rate": 7.137503969514133e-05, "loss": 0.0751, "step": 4512 }, { "epoch": 8.583927722301475, "grad_norm": 0.2537659704685211, "learning_rate": 7.136868847253096e-05, "loss": 0.0708, "step": 4513 }, { "epoch": 8.585829766999524, "grad_norm": 0.2738887369632721, "learning_rate": 7.13623372499206e-05, "loss": 0.0544, "step": 4514 }, { "epoch": 8.587731811697575, "grad_norm": 0.21738064289093018, "learning_rate": 7.135598602731027e-05, "loss": 0.0663, "step": 4515 }, { "epoch": 8.589633856395626, "grad_norm": 0.2945701479911804, "learning_rate": 7.13496348046999e-05, "loss": 0.0558, "step": 4516 }, { "epoch": 8.591535901093676, "grad_norm": 0.2870883345603943, "learning_rate": 7.134328358208956e-05, "loss": 0.0695, "step": 4517 }, { "epoch": 8.593437945791726, "grad_norm": 0.28586509823799133, "learning_rate": 7.13369323594792e-05, "loss": 0.0715, "step": 4518 }, { "epoch": 8.595339990489776, "grad_norm": 0.25800830125808716, "learning_rate": 7.133058113686885e-05, "loss": 0.0791, "step": 4519 }, { "epoch": 8.597242035187827, "grad_norm": 0.23650942742824554, "learning_rate": 7.13242299142585e-05, "loss": 0.0628, "step": 4520 }, { "epoch": 8.599144079885878, "grad_norm": 0.5239331126213074, "learning_rate": 7.131787869164814e-05, "loss": 0.0821, "step": 4521 }, { "epoch": 8.601046124583927, "grad_norm": 0.2804233431816101, "learning_rate": 7.13115274690378e-05, "loss": 0.087, "step": 4522 }, { "epoch": 8.602948169281978, "grad_norm": 0.29622703790664673, "learning_rate": 7.130517624642744e-05, "loss": 0.0746, "step": 4523 }, { "epoch": 8.60485021398003, "grad_norm": 0.2862629294395447, "learning_rate": 7.129882502381708e-05, "loss": 0.0854, "step": 4524 }, { "epoch": 8.606752258678078, "grad_norm": 0.23067869246006012, "learning_rate": 7.129247380120673e-05, "loss": 0.0726, "step": 4525 }, { "epoch": 8.60865430337613, "grad_norm": 0.2480565309524536, "learning_rate": 7.128612257859638e-05, "loss": 0.0594, "step": 4526 }, { "epoch": 8.61055634807418, "grad_norm": 0.22137805819511414, "learning_rate": 7.127977135598604e-05, "loss": 0.06, "step": 4527 }, { "epoch": 8.612458392772231, "grad_norm": 0.1964171975851059, "learning_rate": 7.127342013337567e-05, "loss": 0.0529, "step": 4528 }, { "epoch": 8.61436043747028, "grad_norm": 0.2646423280239105, "learning_rate": 7.126706891076533e-05, "loss": 0.0757, "step": 4529 }, { "epoch": 8.616262482168331, "grad_norm": 0.25427189469337463, "learning_rate": 7.126071768815498e-05, "loss": 0.0652, "step": 4530 }, { "epoch": 8.618164526866382, "grad_norm": 0.22125300765037537, "learning_rate": 7.125436646554462e-05, "loss": 0.0838, "step": 4531 }, { "epoch": 8.620066571564433, "grad_norm": 0.33113163709640503, "learning_rate": 7.124801524293427e-05, "loss": 0.0735, "step": 4532 }, { "epoch": 8.621968616262482, "grad_norm": 0.21249188482761383, "learning_rate": 7.124166402032392e-05, "loss": 0.0655, "step": 4533 }, { "epoch": 8.623870660960533, "grad_norm": 0.3268553912639618, "learning_rate": 7.123531279771356e-05, "loss": 0.0619, "step": 4534 }, { "epoch": 8.625772705658584, "grad_norm": 0.3507033586502075, "learning_rate": 7.122896157510321e-05, "loss": 0.0734, "step": 4535 }, { "epoch": 8.627674750356633, "grad_norm": 0.20632556080818176, "learning_rate": 7.122261035249286e-05, "loss": 0.0614, "step": 4536 }, { "epoch": 8.629576795054684, "grad_norm": 0.23562835156917572, "learning_rate": 7.12162591298825e-05, "loss": 0.0592, "step": 4537 }, { "epoch": 8.631478839752734, "grad_norm": 0.2938970625400543, "learning_rate": 7.120990790727215e-05, "loss": 0.0793, "step": 4538 }, { "epoch": 8.633380884450785, "grad_norm": 0.25379982590675354, "learning_rate": 7.12035566846618e-05, "loss": 0.0658, "step": 4539 }, { "epoch": 8.635282929148834, "grad_norm": 0.32854923605918884, "learning_rate": 7.119720546205146e-05, "loss": 0.0785, "step": 4540 }, { "epoch": 8.637184973846885, "grad_norm": 0.25158101320266724, "learning_rate": 7.11908542394411e-05, "loss": 0.0885, "step": 4541 }, { "epoch": 8.639087018544936, "grad_norm": 0.3338216543197632, "learning_rate": 7.118450301683075e-05, "loss": 0.103, "step": 4542 }, { "epoch": 8.640989063242987, "grad_norm": 0.24056802690029144, "learning_rate": 7.11781517942204e-05, "loss": 0.0738, "step": 4543 }, { "epoch": 8.642891107941036, "grad_norm": 0.27864179015159607, "learning_rate": 7.117180057161004e-05, "loss": 0.0782, "step": 4544 }, { "epoch": 8.644793152639087, "grad_norm": 0.33596450090408325, "learning_rate": 7.116544934899969e-05, "loss": 0.0737, "step": 4545 }, { "epoch": 8.646695197337138, "grad_norm": 0.3385752737522125, "learning_rate": 7.115909812638934e-05, "loss": 0.0962, "step": 4546 }, { "epoch": 8.648597242035187, "grad_norm": 0.24388425052165985, "learning_rate": 7.115274690377898e-05, "loss": 0.0548, "step": 4547 }, { "epoch": 8.650499286733238, "grad_norm": 0.22161011397838593, "learning_rate": 7.114639568116863e-05, "loss": 0.0614, "step": 4548 }, { "epoch": 8.652401331431289, "grad_norm": 0.26100924611091614, "learning_rate": 7.114004445855828e-05, "loss": 0.0645, "step": 4549 }, { "epoch": 8.65430337612934, "grad_norm": 0.26137739419937134, "learning_rate": 7.113369323594792e-05, "loss": 0.0793, "step": 4550 }, { "epoch": 8.656205420827389, "grad_norm": 0.40450796484947205, "learning_rate": 7.112734201333757e-05, "loss": 0.08, "step": 4551 }, { "epoch": 8.65810746552544, "grad_norm": 0.3062434196472168, "learning_rate": 7.112099079072721e-05, "loss": 0.0827, "step": 4552 }, { "epoch": 8.66000951022349, "grad_norm": 0.3404581844806671, "learning_rate": 7.111463956811687e-05, "loss": 0.0839, "step": 4553 }, { "epoch": 8.661911554921542, "grad_norm": 0.24370938539505005, "learning_rate": 7.110828834550651e-05, "loss": 0.0686, "step": 4554 }, { "epoch": 8.66381359961959, "grad_norm": 0.24574245512485504, "learning_rate": 7.110193712289615e-05, "loss": 0.0696, "step": 4555 }, { "epoch": 8.665715644317642, "grad_norm": 0.26258182525634766, "learning_rate": 7.109558590028582e-05, "loss": 0.0828, "step": 4556 }, { "epoch": 8.667617689015692, "grad_norm": 0.3371654748916626, "learning_rate": 7.108923467767546e-05, "loss": 0.0709, "step": 4557 }, { "epoch": 8.669519733713742, "grad_norm": 0.2799132168292999, "learning_rate": 7.108288345506511e-05, "loss": 0.0739, "step": 4558 }, { "epoch": 8.671421778411792, "grad_norm": 0.6267777681350708, "learning_rate": 7.107653223245475e-05, "loss": 0.0934, "step": 4559 }, { "epoch": 8.673323823109843, "grad_norm": 0.28459432721138, "learning_rate": 7.10701810098444e-05, "loss": 0.0906, "step": 4560 }, { "epoch": 8.675225867807894, "grad_norm": 0.29621008038520813, "learning_rate": 7.106382978723405e-05, "loss": 0.0561, "step": 4561 }, { "epoch": 8.677127912505943, "grad_norm": 0.20961788296699524, "learning_rate": 7.105747856462369e-05, "loss": 0.067, "step": 4562 }, { "epoch": 8.679029957203994, "grad_norm": 0.37413978576660156, "learning_rate": 7.105112734201335e-05, "loss": 0.0912, "step": 4563 }, { "epoch": 8.680932001902045, "grad_norm": 0.2561793029308319, "learning_rate": 7.104477611940299e-05, "loss": 0.0665, "step": 4564 }, { "epoch": 8.682834046600096, "grad_norm": 0.3220752477645874, "learning_rate": 7.103842489679263e-05, "loss": 0.0621, "step": 4565 }, { "epoch": 8.684736091298145, "grad_norm": 0.18698498606681824, "learning_rate": 7.103207367418228e-05, "loss": 0.0548, "step": 4566 }, { "epoch": 8.686638135996196, "grad_norm": 0.3139233887195587, "learning_rate": 7.102572245157193e-05, "loss": 0.0971, "step": 4567 }, { "epoch": 8.688540180694247, "grad_norm": 0.2640330195426941, "learning_rate": 7.101937122896157e-05, "loss": 0.08, "step": 4568 }, { "epoch": 8.690442225392296, "grad_norm": 0.2758345901966095, "learning_rate": 7.101302000635122e-05, "loss": 0.0663, "step": 4569 }, { "epoch": 8.692344270090347, "grad_norm": 0.23548677563667297, "learning_rate": 7.100666878374087e-05, "loss": 0.0753, "step": 4570 }, { "epoch": 8.694246314788398, "grad_norm": 0.4258490204811096, "learning_rate": 7.100031756113053e-05, "loss": 0.1119, "step": 4571 }, { "epoch": 8.696148359486449, "grad_norm": 0.25525277853012085, "learning_rate": 7.099396633852017e-05, "loss": 0.0762, "step": 4572 }, { "epoch": 8.698050404184498, "grad_norm": 0.34199386835098267, "learning_rate": 7.098761511590982e-05, "loss": 0.0828, "step": 4573 }, { "epoch": 8.699952448882549, "grad_norm": 0.31173181533813477, "learning_rate": 7.098126389329947e-05, "loss": 0.0855, "step": 4574 }, { "epoch": 8.7018544935806, "grad_norm": 0.20065343379974365, "learning_rate": 7.097491267068911e-05, "loss": 0.0639, "step": 4575 }, { "epoch": 8.70375653827865, "grad_norm": 0.32433775067329407, "learning_rate": 7.096856144807876e-05, "loss": 0.0799, "step": 4576 }, { "epoch": 8.7056585829767, "grad_norm": 0.43522873520851135, "learning_rate": 7.096221022546841e-05, "loss": 0.0882, "step": 4577 }, { "epoch": 8.70756062767475, "grad_norm": 0.33370131254196167, "learning_rate": 7.095585900285805e-05, "loss": 0.0996, "step": 4578 }, { "epoch": 8.709462672372801, "grad_norm": 0.3209904730319977, "learning_rate": 7.09495077802477e-05, "loss": 0.0722, "step": 4579 }, { "epoch": 8.71136471707085, "grad_norm": 0.2941846549510956, "learning_rate": 7.094315655763735e-05, "loss": 0.0746, "step": 4580 }, { "epoch": 8.713266761768901, "grad_norm": 0.27437055110931396, "learning_rate": 7.0936805335027e-05, "loss": 0.0772, "step": 4581 }, { "epoch": 8.715168806466952, "grad_norm": 0.19886012375354767, "learning_rate": 7.093045411241664e-05, "loss": 0.0569, "step": 4582 }, { "epoch": 8.717070851165003, "grad_norm": 0.2602776885032654, "learning_rate": 7.092410288980628e-05, "loss": 0.0843, "step": 4583 }, { "epoch": 8.718972895863052, "grad_norm": 0.20333616435527802, "learning_rate": 7.091775166719595e-05, "loss": 0.1018, "step": 4584 }, { "epoch": 8.720874940561103, "grad_norm": 0.2698846757411957, "learning_rate": 7.091140044458558e-05, "loss": 0.0777, "step": 4585 }, { "epoch": 8.722776985259154, "grad_norm": 0.18672236800193787, "learning_rate": 7.090504922197522e-05, "loss": 0.0608, "step": 4586 }, { "epoch": 8.724679029957205, "grad_norm": 0.2933334410190582, "learning_rate": 7.089869799936489e-05, "loss": 0.0783, "step": 4587 }, { "epoch": 8.726581074655254, "grad_norm": 0.2911440432071686, "learning_rate": 7.089234677675453e-05, "loss": 0.0637, "step": 4588 }, { "epoch": 8.728483119353305, "grad_norm": 0.19904588162899017, "learning_rate": 7.088599555414418e-05, "loss": 0.0629, "step": 4589 }, { "epoch": 8.730385164051356, "grad_norm": 0.22173476219177246, "learning_rate": 7.087964433153382e-05, "loss": 0.0611, "step": 4590 }, { "epoch": 8.732287208749405, "grad_norm": 0.3003878891468048, "learning_rate": 7.087329310892347e-05, "loss": 0.0939, "step": 4591 }, { "epoch": 8.734189253447456, "grad_norm": 0.27259764075279236, "learning_rate": 7.086694188631312e-05, "loss": 0.0798, "step": 4592 }, { "epoch": 8.736091298145507, "grad_norm": 0.25377964973449707, "learning_rate": 7.086059066370276e-05, "loss": 0.0756, "step": 4593 }, { "epoch": 8.737993342843557, "grad_norm": 0.2532641887664795, "learning_rate": 7.085423944109242e-05, "loss": 0.0767, "step": 4594 }, { "epoch": 8.739895387541607, "grad_norm": 0.33837416768074036, "learning_rate": 7.084788821848206e-05, "loss": 0.0906, "step": 4595 }, { "epoch": 8.741797432239657, "grad_norm": 0.20276691019535065, "learning_rate": 7.08415369958717e-05, "loss": 0.0744, "step": 4596 }, { "epoch": 8.743699476937708, "grad_norm": 0.2975214123725891, "learning_rate": 7.083518577326135e-05, "loss": 0.0771, "step": 4597 }, { "epoch": 8.74560152163576, "grad_norm": 0.3634226322174072, "learning_rate": 7.0828834550651e-05, "loss": 0.0946, "step": 4598 }, { "epoch": 8.747503566333808, "grad_norm": 0.1982811689376831, "learning_rate": 7.082248332804066e-05, "loss": 0.057, "step": 4599 }, { "epoch": 8.74940561103186, "grad_norm": 0.2674190104007721, "learning_rate": 7.08161321054303e-05, "loss": 0.0695, "step": 4600 }, { "epoch": 8.75130765572991, "grad_norm": 0.40828996896743774, "learning_rate": 7.080978088281995e-05, "loss": 0.0814, "step": 4601 }, { "epoch": 8.75320970042796, "grad_norm": 0.2784786522388458, "learning_rate": 7.08034296602096e-05, "loss": 0.0791, "step": 4602 }, { "epoch": 8.75511174512601, "grad_norm": 0.26131200790405273, "learning_rate": 7.079707843759924e-05, "loss": 0.0668, "step": 4603 }, { "epoch": 8.757013789824061, "grad_norm": 0.34598174691200256, "learning_rate": 7.079072721498889e-05, "loss": 0.0671, "step": 4604 }, { "epoch": 8.758915834522112, "grad_norm": 0.28290489315986633, "learning_rate": 7.078437599237854e-05, "loss": 0.0901, "step": 4605 }, { "epoch": 8.760817879220161, "grad_norm": 0.4226842522621155, "learning_rate": 7.077802476976818e-05, "loss": 0.0848, "step": 4606 }, { "epoch": 8.762719923918212, "grad_norm": 0.23531925678253174, "learning_rate": 7.077167354715783e-05, "loss": 0.0738, "step": 4607 }, { "epoch": 8.764621968616263, "grad_norm": 0.3046768605709076, "learning_rate": 7.076532232454748e-05, "loss": 0.0723, "step": 4608 }, { "epoch": 8.766524013314314, "grad_norm": 0.22470355033874512, "learning_rate": 7.075897110193712e-05, "loss": 0.0493, "step": 4609 }, { "epoch": 8.768426058012363, "grad_norm": 0.23630782961845398, "learning_rate": 7.075261987932677e-05, "loss": 0.0627, "step": 4610 }, { "epoch": 8.770328102710414, "grad_norm": 0.38506048917770386, "learning_rate": 7.074626865671642e-05, "loss": 0.1092, "step": 4611 }, { "epoch": 8.772230147408465, "grad_norm": 0.23271790146827698, "learning_rate": 7.073991743410608e-05, "loss": 0.0726, "step": 4612 }, { "epoch": 8.774132192106514, "grad_norm": 0.22639422118663788, "learning_rate": 7.073356621149571e-05, "loss": 0.064, "step": 4613 }, { "epoch": 8.776034236804565, "grad_norm": 0.3787291347980499, "learning_rate": 7.072721498888537e-05, "loss": 0.0871, "step": 4614 }, { "epoch": 8.777936281502615, "grad_norm": 0.23703598976135254, "learning_rate": 7.072086376627502e-05, "loss": 0.0677, "step": 4615 }, { "epoch": 8.779838326200666, "grad_norm": 0.3365970551967621, "learning_rate": 7.071451254366466e-05, "loss": 0.0949, "step": 4616 }, { "epoch": 8.781740370898715, "grad_norm": 0.25305354595184326, "learning_rate": 7.070816132105431e-05, "loss": 0.0612, "step": 4617 }, { "epoch": 8.783642415596766, "grad_norm": 0.23190952837467194, "learning_rate": 7.070181009844396e-05, "loss": 0.0649, "step": 4618 }, { "epoch": 8.785544460294817, "grad_norm": 0.2549265921115875, "learning_rate": 7.06954588758336e-05, "loss": 0.0695, "step": 4619 }, { "epoch": 8.787446504992868, "grad_norm": 0.26542478799819946, "learning_rate": 7.068910765322325e-05, "loss": 0.0776, "step": 4620 }, { "epoch": 8.789348549690917, "grad_norm": 0.27121585607528687, "learning_rate": 7.06827564306129e-05, "loss": 0.1015, "step": 4621 }, { "epoch": 8.791250594388968, "grad_norm": 0.1988026648759842, "learning_rate": 7.067640520800254e-05, "loss": 0.058, "step": 4622 }, { "epoch": 8.793152639087019, "grad_norm": 0.386599063873291, "learning_rate": 7.067005398539219e-05, "loss": 0.1015, "step": 4623 }, { "epoch": 8.795054683785068, "grad_norm": 0.28242504596710205, "learning_rate": 7.066370276278183e-05, "loss": 0.0683, "step": 4624 }, { "epoch": 8.796956728483119, "grad_norm": 0.2033800631761551, "learning_rate": 7.06573515401715e-05, "loss": 0.0734, "step": 4625 }, { "epoch": 8.79885877318117, "grad_norm": 0.25961750745773315, "learning_rate": 7.065100031756113e-05, "loss": 0.0564, "step": 4626 }, { "epoch": 8.80076081787922, "grad_norm": 0.26695361733436584, "learning_rate": 7.064464909495077e-05, "loss": 0.0815, "step": 4627 }, { "epoch": 8.80266286257727, "grad_norm": 0.3340332508087158, "learning_rate": 7.063829787234042e-05, "loss": 0.0598, "step": 4628 }, { "epoch": 8.80456490727532, "grad_norm": 0.3151671290397644, "learning_rate": 7.063194664973008e-05, "loss": 0.1035, "step": 4629 }, { "epoch": 8.806466951973372, "grad_norm": 0.31824958324432373, "learning_rate": 7.062559542711973e-05, "loss": 0.0666, "step": 4630 }, { "epoch": 8.808368996671422, "grad_norm": 0.24082911014556885, "learning_rate": 7.061924420450937e-05, "loss": 0.065, "step": 4631 }, { "epoch": 8.810271041369472, "grad_norm": 0.2710180878639221, "learning_rate": 7.061289298189902e-05, "loss": 0.074, "step": 4632 }, { "epoch": 8.812173086067522, "grad_norm": 0.31997546553611755, "learning_rate": 7.060654175928867e-05, "loss": 0.0841, "step": 4633 }, { "epoch": 8.814075130765573, "grad_norm": 0.40407681465148926, "learning_rate": 7.060019053667831e-05, "loss": 0.0784, "step": 4634 }, { "epoch": 8.815977175463622, "grad_norm": 0.2711647152900696, "learning_rate": 7.059383931406796e-05, "loss": 0.0959, "step": 4635 }, { "epoch": 8.817879220161673, "grad_norm": 0.25653815269470215, "learning_rate": 7.058748809145761e-05, "loss": 0.0697, "step": 4636 }, { "epoch": 8.819781264859724, "grad_norm": 0.17264167964458466, "learning_rate": 7.058113686884725e-05, "loss": 0.0484, "step": 4637 }, { "epoch": 8.821683309557775, "grad_norm": 0.37043115496635437, "learning_rate": 7.05747856462369e-05, "loss": 0.0829, "step": 4638 }, { "epoch": 8.823585354255824, "grad_norm": 0.24396714568138123, "learning_rate": 7.056843442362655e-05, "loss": 0.0732, "step": 4639 }, { "epoch": 8.825487398953875, "grad_norm": 0.33010244369506836, "learning_rate": 7.056208320101619e-05, "loss": 0.0742, "step": 4640 }, { "epoch": 8.827389443651926, "grad_norm": 0.2644335925579071, "learning_rate": 7.055573197840584e-05, "loss": 0.0699, "step": 4641 }, { "epoch": 8.829291488349977, "grad_norm": 0.2645286023616791, "learning_rate": 7.05493807557955e-05, "loss": 0.0773, "step": 4642 }, { "epoch": 8.831193533048026, "grad_norm": 0.21194998919963837, "learning_rate": 7.054302953318515e-05, "loss": 0.1088, "step": 4643 }, { "epoch": 8.833095577746077, "grad_norm": 0.36791402101516724, "learning_rate": 7.053667831057479e-05, "loss": 0.0897, "step": 4644 }, { "epoch": 8.834997622444128, "grad_norm": 0.44055166840553284, "learning_rate": 7.053032708796444e-05, "loss": 0.099, "step": 4645 }, { "epoch": 8.836899667142179, "grad_norm": 0.2763393819332123, "learning_rate": 7.052397586535409e-05, "loss": 0.0786, "step": 4646 }, { "epoch": 8.838801711840228, "grad_norm": 0.2285812348127365, "learning_rate": 7.051762464274373e-05, "loss": 0.0465, "step": 4647 }, { "epoch": 8.840703756538279, "grad_norm": 0.266569048166275, "learning_rate": 7.051127342013338e-05, "loss": 0.0778, "step": 4648 }, { "epoch": 8.84260580123633, "grad_norm": 0.29792171716690063, "learning_rate": 7.050492219752303e-05, "loss": 0.1043, "step": 4649 }, { "epoch": 8.844507845934379, "grad_norm": 0.27587541937828064, "learning_rate": 7.049857097491267e-05, "loss": 0.0901, "step": 4650 }, { "epoch": 8.84640989063243, "grad_norm": 0.2589787542819977, "learning_rate": 7.049221975230232e-05, "loss": 0.0597, "step": 4651 }, { "epoch": 8.84831193533048, "grad_norm": 0.1944485306739807, "learning_rate": 7.048586852969197e-05, "loss": 0.0744, "step": 4652 }, { "epoch": 8.850213980028531, "grad_norm": 0.27121320366859436, "learning_rate": 7.047951730708163e-05, "loss": 0.0826, "step": 4653 }, { "epoch": 8.85211602472658, "grad_norm": 0.28086745738983154, "learning_rate": 7.047316608447126e-05, "loss": 0.0855, "step": 4654 }, { "epoch": 8.854018069424631, "grad_norm": 0.2198602259159088, "learning_rate": 7.04668148618609e-05, "loss": 0.0641, "step": 4655 }, { "epoch": 8.855920114122682, "grad_norm": 0.3515179455280304, "learning_rate": 7.046046363925057e-05, "loss": 0.0929, "step": 4656 }, { "epoch": 8.857822158820733, "grad_norm": 0.294048935174942, "learning_rate": 7.04541124166402e-05, "loss": 0.0705, "step": 4657 }, { "epoch": 8.859724203518782, "grad_norm": 0.25119510293006897, "learning_rate": 7.044776119402984e-05, "loss": 0.0622, "step": 4658 }, { "epoch": 8.861626248216833, "grad_norm": 0.2803022265434265, "learning_rate": 7.044140997141951e-05, "loss": 0.0543, "step": 4659 }, { "epoch": 8.863528292914884, "grad_norm": 0.25134971737861633, "learning_rate": 7.043505874880915e-05, "loss": 0.0848, "step": 4660 }, { "epoch": 8.865430337612933, "grad_norm": 0.23869898915290833, "learning_rate": 7.04287075261988e-05, "loss": 0.0786, "step": 4661 }, { "epoch": 8.867332382310984, "grad_norm": 0.19293729960918427, "learning_rate": 7.042235630358844e-05, "loss": 0.0615, "step": 4662 }, { "epoch": 8.869234427009035, "grad_norm": 0.23141486942768097, "learning_rate": 7.041600508097809e-05, "loss": 0.0871, "step": 4663 }, { "epoch": 8.871136471707086, "grad_norm": 0.28251227736473083, "learning_rate": 7.040965385836774e-05, "loss": 0.0707, "step": 4664 }, { "epoch": 8.873038516405135, "grad_norm": 0.1875603348016739, "learning_rate": 7.040330263575738e-05, "loss": 0.0693, "step": 4665 }, { "epoch": 8.874940561103186, "grad_norm": 0.24016056954860687, "learning_rate": 7.039695141314705e-05, "loss": 0.0656, "step": 4666 }, { "epoch": 8.876842605801237, "grad_norm": 0.4740009605884552, "learning_rate": 7.039060019053668e-05, "loss": 0.0807, "step": 4667 }, { "epoch": 8.878744650499288, "grad_norm": 0.24484196305274963, "learning_rate": 7.038424896792632e-05, "loss": 0.0679, "step": 4668 }, { "epoch": 8.880646695197337, "grad_norm": 0.46116209030151367, "learning_rate": 7.037789774531597e-05, "loss": 0.0982, "step": 4669 }, { "epoch": 8.882548739895388, "grad_norm": 0.3259490728378296, "learning_rate": 7.037154652270563e-05, "loss": 0.1054, "step": 4670 }, { "epoch": 8.884450784593438, "grad_norm": 0.27219080924987793, "learning_rate": 7.036519530009528e-05, "loss": 0.0712, "step": 4671 }, { "epoch": 8.88635282929149, "grad_norm": 0.2472408562898636, "learning_rate": 7.035884407748492e-05, "loss": 0.0564, "step": 4672 }, { "epoch": 8.888254873989538, "grad_norm": 0.26915243268013, "learning_rate": 7.035249285487457e-05, "loss": 0.0743, "step": 4673 }, { "epoch": 8.89015691868759, "grad_norm": 0.27465927600860596, "learning_rate": 7.034614163226422e-05, "loss": 0.0679, "step": 4674 }, { "epoch": 8.89205896338564, "grad_norm": 0.2622419595718384, "learning_rate": 7.033979040965386e-05, "loss": 0.0762, "step": 4675 }, { "epoch": 8.89396100808369, "grad_norm": 0.32345646619796753, "learning_rate": 7.033343918704351e-05, "loss": 0.0816, "step": 4676 }, { "epoch": 8.89586305278174, "grad_norm": 0.2678157389163971, "learning_rate": 7.032708796443316e-05, "loss": 0.0672, "step": 4677 }, { "epoch": 8.897765097479791, "grad_norm": 0.3485300838947296, "learning_rate": 7.03207367418228e-05, "loss": 0.0953, "step": 4678 }, { "epoch": 8.899667142177842, "grad_norm": 0.2810485363006592, "learning_rate": 7.031438551921245e-05, "loss": 0.069, "step": 4679 }, { "epoch": 8.901569186875891, "grad_norm": 0.3006494343280792, "learning_rate": 7.03080342966021e-05, "loss": 0.0717, "step": 4680 }, { "epoch": 8.903471231573942, "grad_norm": 0.29095882177352905, "learning_rate": 7.030168307399174e-05, "loss": 0.0762, "step": 4681 }, { "epoch": 8.905373276271993, "grad_norm": 0.21266767382621765, "learning_rate": 7.02953318513814e-05, "loss": 0.0771, "step": 4682 }, { "epoch": 8.907275320970044, "grad_norm": 0.23947753012180328, "learning_rate": 7.028898062877105e-05, "loss": 0.0641, "step": 4683 }, { "epoch": 8.909177365668093, "grad_norm": 0.24538327753543854, "learning_rate": 7.02826294061607e-05, "loss": 0.0538, "step": 4684 }, { "epoch": 8.911079410366144, "grad_norm": 0.45514872670173645, "learning_rate": 7.027627818355034e-05, "loss": 0.0759, "step": 4685 }, { "epoch": 8.912981455064195, "grad_norm": 0.31402572989463806, "learning_rate": 7.026992696093997e-05, "loss": 0.0654, "step": 4686 }, { "epoch": 8.914883499762244, "grad_norm": 0.2629324793815613, "learning_rate": 7.026357573832964e-05, "loss": 0.0729, "step": 4687 }, { "epoch": 8.916785544460295, "grad_norm": 0.34424546360969543, "learning_rate": 7.025722451571928e-05, "loss": 0.0643, "step": 4688 }, { "epoch": 8.918687589158345, "grad_norm": 0.45988425612449646, "learning_rate": 7.025087329310893e-05, "loss": 0.0942, "step": 4689 }, { "epoch": 8.920589633856396, "grad_norm": 0.2842889428138733, "learning_rate": 7.024452207049858e-05, "loss": 0.0679, "step": 4690 }, { "epoch": 8.922491678554445, "grad_norm": 0.3553121089935303, "learning_rate": 7.023817084788822e-05, "loss": 0.0778, "step": 4691 }, { "epoch": 8.924393723252496, "grad_norm": 0.2820618152618408, "learning_rate": 7.023181962527787e-05, "loss": 0.072, "step": 4692 }, { "epoch": 8.926295767950547, "grad_norm": 0.23294129967689514, "learning_rate": 7.022546840266751e-05, "loss": 0.0628, "step": 4693 }, { "epoch": 8.928197812648598, "grad_norm": 0.3666904866695404, "learning_rate": 7.021911718005716e-05, "loss": 0.0983, "step": 4694 }, { "epoch": 8.930099857346647, "grad_norm": 0.22570650279521942, "learning_rate": 7.021276595744681e-05, "loss": 0.0984, "step": 4695 }, { "epoch": 8.932001902044698, "grad_norm": 0.26434043049812317, "learning_rate": 7.020641473483645e-05, "loss": 0.0852, "step": 4696 }, { "epoch": 8.933903946742749, "grad_norm": 0.32865387201309204, "learning_rate": 7.020006351222612e-05, "loss": 0.0922, "step": 4697 }, { "epoch": 8.935805991440798, "grad_norm": 0.36958640813827515, "learning_rate": 7.019371228961576e-05, "loss": 0.0797, "step": 4698 }, { "epoch": 8.937708036138849, "grad_norm": 0.3567565679550171, "learning_rate": 7.01873610670054e-05, "loss": 0.1245, "step": 4699 }, { "epoch": 8.9396100808369, "grad_norm": 0.2572322189807892, "learning_rate": 7.018100984439505e-05, "loss": 0.0662, "step": 4700 }, { "epoch": 8.94151212553495, "grad_norm": 0.30103451013565063, "learning_rate": 7.01746586217847e-05, "loss": 0.0751, "step": 4701 }, { "epoch": 8.943414170233, "grad_norm": 0.2606084644794464, "learning_rate": 7.016830739917435e-05, "loss": 0.0679, "step": 4702 }, { "epoch": 8.94531621493105, "grad_norm": 0.2592700719833374, "learning_rate": 7.016195617656399e-05, "loss": 0.057, "step": 4703 }, { "epoch": 8.947218259629102, "grad_norm": 0.3322167694568634, "learning_rate": 7.015560495395364e-05, "loss": 0.0798, "step": 4704 }, { "epoch": 8.949120304327153, "grad_norm": 0.2863213121891022, "learning_rate": 7.014925373134329e-05, "loss": 0.0744, "step": 4705 }, { "epoch": 8.951022349025202, "grad_norm": 0.3357941806316376, "learning_rate": 7.014290250873293e-05, "loss": 0.0797, "step": 4706 }, { "epoch": 8.952924393723253, "grad_norm": 0.21185575425624847, "learning_rate": 7.013655128612258e-05, "loss": 0.0689, "step": 4707 }, { "epoch": 8.954826438421303, "grad_norm": 0.414275199174881, "learning_rate": 7.013020006351223e-05, "loss": 0.0863, "step": 4708 }, { "epoch": 8.956728483119353, "grad_norm": 0.3176036477088928, "learning_rate": 7.012384884090187e-05, "loss": 0.0641, "step": 4709 }, { "epoch": 8.958630527817403, "grad_norm": 0.2829712927341461, "learning_rate": 7.011749761829152e-05, "loss": 0.0722, "step": 4710 }, { "epoch": 8.960532572515454, "grad_norm": 0.2736256718635559, "learning_rate": 7.011114639568117e-05, "loss": 0.1003, "step": 4711 }, { "epoch": 8.962434617213505, "grad_norm": 0.2921677827835083, "learning_rate": 7.010479517307081e-05, "loss": 0.0662, "step": 4712 }, { "epoch": 8.964336661911554, "grad_norm": 0.3244280219078064, "learning_rate": 7.009844395046046e-05, "loss": 0.0774, "step": 4713 }, { "epoch": 8.966238706609605, "grad_norm": 0.3001691997051239, "learning_rate": 7.009209272785012e-05, "loss": 0.0896, "step": 4714 }, { "epoch": 8.968140751307656, "grad_norm": 0.2865767776966095, "learning_rate": 7.008574150523977e-05, "loss": 0.0636, "step": 4715 }, { "epoch": 8.970042796005707, "grad_norm": 0.28869691491127014, "learning_rate": 7.007939028262941e-05, "loss": 0.077, "step": 4716 }, { "epoch": 8.971944840703756, "grad_norm": 0.22912059724330902, "learning_rate": 7.007303906001906e-05, "loss": 0.0701, "step": 4717 }, { "epoch": 8.973846885401807, "grad_norm": 0.2385643869638443, "learning_rate": 7.006668783740871e-05, "loss": 0.0729, "step": 4718 }, { "epoch": 8.975748930099858, "grad_norm": 0.26056259870529175, "learning_rate": 7.006033661479835e-05, "loss": 0.0752, "step": 4719 }, { "epoch": 8.977650974797907, "grad_norm": 0.21698705852031708, "learning_rate": 7.0053985392188e-05, "loss": 0.0684, "step": 4720 }, { "epoch": 8.979553019495958, "grad_norm": 0.30771854519844055, "learning_rate": 7.004763416957765e-05, "loss": 0.0815, "step": 4721 }, { "epoch": 8.981455064194009, "grad_norm": 0.2765178978443146, "learning_rate": 7.004128294696729e-05, "loss": 0.064, "step": 4722 }, { "epoch": 8.98335710889206, "grad_norm": 0.31932932138442993, "learning_rate": 7.003493172435694e-05, "loss": 0.085, "step": 4723 }, { "epoch": 8.985259153590109, "grad_norm": 0.23978936672210693, "learning_rate": 7.00285805017466e-05, "loss": 0.0767, "step": 4724 }, { "epoch": 8.98716119828816, "grad_norm": 0.3429613411426544, "learning_rate": 7.002222927913625e-05, "loss": 0.0924, "step": 4725 }, { "epoch": 8.98906324298621, "grad_norm": 0.2052287459373474, "learning_rate": 7.001587805652588e-05, "loss": 0.0552, "step": 4726 }, { "epoch": 8.990965287684261, "grad_norm": 0.31074854731559753, "learning_rate": 7.000952683391552e-05, "loss": 0.0966, "step": 4727 }, { "epoch": 8.99286733238231, "grad_norm": 0.274406760931015, "learning_rate": 7.000317561130519e-05, "loss": 0.066, "step": 4728 }, { "epoch": 8.994769377080361, "grad_norm": 0.3344108462333679, "learning_rate": 6.999682438869483e-05, "loss": 0.0894, "step": 4729 }, { "epoch": 8.996671421778412, "grad_norm": 0.263041615486145, "learning_rate": 6.999047316608446e-05, "loss": 0.0786, "step": 4730 }, { "epoch": 8.998573466476461, "grad_norm": 0.33452650904655457, "learning_rate": 6.998412194347413e-05, "loss": 0.0856, "step": 4731 }, { "epoch": 9.000475511174512, "grad_norm": 0.3001817464828491, "learning_rate": 6.997777072086377e-05, "loss": 0.0864, "step": 4732 }, { "epoch": 9.002377555872563, "grad_norm": 0.541235089302063, "learning_rate": 6.997141949825342e-05, "loss": 0.1138, "step": 4733 }, { "epoch": 9.004279600570614, "grad_norm": 0.1997150033712387, "learning_rate": 6.996506827564306e-05, "loss": 0.069, "step": 4734 }, { "epoch": 9.006181645268663, "grad_norm": 0.2090476155281067, "learning_rate": 6.995871705303271e-05, "loss": 0.0624, "step": 4735 }, { "epoch": 9.008083689966714, "grad_norm": 0.15848073363304138, "learning_rate": 6.995236583042236e-05, "loss": 0.0836, "step": 4736 }, { "epoch": 9.009985734664765, "grad_norm": 0.15844178199768066, "learning_rate": 6.9946014607812e-05, "loss": 0.0624, "step": 4737 }, { "epoch": 9.011887779362816, "grad_norm": 0.2605695426464081, "learning_rate": 6.993966338520165e-05, "loss": 0.0645, "step": 4738 }, { "epoch": 9.013789824060865, "grad_norm": 0.25842902064323425, "learning_rate": 6.99333121625913e-05, "loss": 0.0806, "step": 4739 }, { "epoch": 9.015691868758916, "grad_norm": 0.18746209144592285, "learning_rate": 6.992696093998094e-05, "loss": 0.0649, "step": 4740 }, { "epoch": 9.017593913456967, "grad_norm": 0.3465903699398041, "learning_rate": 6.99206097173706e-05, "loss": 0.0652, "step": 4741 }, { "epoch": 9.019495958155016, "grad_norm": 0.14225976169109344, "learning_rate": 6.991425849476025e-05, "loss": 0.05, "step": 4742 }, { "epoch": 9.021398002853067, "grad_norm": 0.1438269317150116, "learning_rate": 6.99079072721499e-05, "loss": 0.0581, "step": 4743 }, { "epoch": 9.023300047551118, "grad_norm": 0.2879999279975891, "learning_rate": 6.990155604953954e-05, "loss": 0.078, "step": 4744 }, { "epoch": 9.025202092249168, "grad_norm": 0.25889408588409424, "learning_rate": 6.989520482692919e-05, "loss": 0.0631, "step": 4745 }, { "epoch": 9.027104136947218, "grad_norm": 0.2674861252307892, "learning_rate": 6.988885360431884e-05, "loss": 0.0626, "step": 4746 }, { "epoch": 9.029006181645268, "grad_norm": 0.2341015636920929, "learning_rate": 6.988250238170848e-05, "loss": 0.072, "step": 4747 }, { "epoch": 9.03090822634332, "grad_norm": 0.2379583716392517, "learning_rate": 6.987615115909813e-05, "loss": 0.059, "step": 4748 }, { "epoch": 9.03281027104137, "grad_norm": 0.24688489735126495, "learning_rate": 6.986979993648778e-05, "loss": 0.0767, "step": 4749 }, { "epoch": 9.03471231573942, "grad_norm": 0.33658576011657715, "learning_rate": 6.986344871387742e-05, "loss": 0.0893, "step": 4750 }, { "epoch": 9.03661436043747, "grad_norm": 0.28995007276535034, "learning_rate": 6.985709749126707e-05, "loss": 0.0602, "step": 4751 }, { "epoch": 9.038516405135521, "grad_norm": 0.23219893872737885, "learning_rate": 6.985074626865672e-05, "loss": 0.0603, "step": 4752 }, { "epoch": 9.04041844983357, "grad_norm": 0.23928780853748322, "learning_rate": 6.984439504604636e-05, "loss": 0.0714, "step": 4753 }, { "epoch": 9.042320494531621, "grad_norm": 0.3113102912902832, "learning_rate": 6.983804382343601e-05, "loss": 0.0856, "step": 4754 }, { "epoch": 9.044222539229672, "grad_norm": 0.2090459018945694, "learning_rate": 6.983169260082567e-05, "loss": 0.0649, "step": 4755 }, { "epoch": 9.046124583927723, "grad_norm": 0.31228578090667725, "learning_rate": 6.982534137821532e-05, "loss": 0.078, "step": 4756 }, { "epoch": 9.048026628625772, "grad_norm": 0.26794952154159546, "learning_rate": 6.981899015560496e-05, "loss": 0.1032, "step": 4757 }, { "epoch": 9.049928673323823, "grad_norm": 0.20607098937034607, "learning_rate": 6.98126389329946e-05, "loss": 0.0652, "step": 4758 }, { "epoch": 9.051830718021874, "grad_norm": 0.23113508522510529, "learning_rate": 6.980628771038426e-05, "loss": 0.0701, "step": 4759 }, { "epoch": 9.053732762719925, "grad_norm": 0.35468852519989014, "learning_rate": 6.97999364877739e-05, "loss": 0.117, "step": 4760 }, { "epoch": 9.055634807417974, "grad_norm": 0.271899551153183, "learning_rate": 6.979358526516355e-05, "loss": 0.0856, "step": 4761 }, { "epoch": 9.057536852116025, "grad_norm": 0.19537365436553955, "learning_rate": 6.97872340425532e-05, "loss": 0.0458, "step": 4762 }, { "epoch": 9.059438896814076, "grad_norm": 0.27871695160865784, "learning_rate": 6.978088281994284e-05, "loss": 0.0541, "step": 4763 }, { "epoch": 9.061340941512125, "grad_norm": 0.30046334862709045, "learning_rate": 6.977453159733249e-05, "loss": 0.0952, "step": 4764 }, { "epoch": 9.063242986210176, "grad_norm": 0.2028159648180008, "learning_rate": 6.976818037472213e-05, "loss": 0.0916, "step": 4765 }, { "epoch": 9.065145030908226, "grad_norm": 0.24774155020713806, "learning_rate": 6.976182915211178e-05, "loss": 0.054, "step": 4766 }, { "epoch": 9.067047075606277, "grad_norm": 0.16707736253738403, "learning_rate": 6.975547792950143e-05, "loss": 0.0524, "step": 4767 }, { "epoch": 9.068949120304326, "grad_norm": 0.16940726339817047, "learning_rate": 6.974912670689107e-05, "loss": 0.0475, "step": 4768 }, { "epoch": 9.070851165002377, "grad_norm": 0.25898852944374084, "learning_rate": 6.974277548428074e-05, "loss": 0.0697, "step": 4769 }, { "epoch": 9.072753209700428, "grad_norm": 0.27324533462524414, "learning_rate": 6.973642426167038e-05, "loss": 0.0858, "step": 4770 }, { "epoch": 9.074655254398479, "grad_norm": 0.20153963565826416, "learning_rate": 6.973007303906001e-05, "loss": 0.0829, "step": 4771 }, { "epoch": 9.076557299096528, "grad_norm": 0.2274760901927948, "learning_rate": 6.972372181644967e-05, "loss": 0.0609, "step": 4772 }, { "epoch": 9.078459343794579, "grad_norm": 0.2810436189174652, "learning_rate": 6.971737059383932e-05, "loss": 0.074, "step": 4773 }, { "epoch": 9.08036138849263, "grad_norm": 0.24662359058856964, "learning_rate": 6.971101937122897e-05, "loss": 0.0644, "step": 4774 }, { "epoch": 9.08226343319068, "grad_norm": 0.17531703412532806, "learning_rate": 6.970466814861861e-05, "loss": 0.0588, "step": 4775 }, { "epoch": 9.08416547788873, "grad_norm": 0.3015911877155304, "learning_rate": 6.969831692600826e-05, "loss": 0.0759, "step": 4776 }, { "epoch": 9.08606752258678, "grad_norm": 0.31380707025527954, "learning_rate": 6.969196570339791e-05, "loss": 0.0656, "step": 4777 }, { "epoch": 9.087969567284832, "grad_norm": 0.1681576669216156, "learning_rate": 6.968561448078755e-05, "loss": 0.0623, "step": 4778 }, { "epoch": 9.08987161198288, "grad_norm": 0.23140835762023926, "learning_rate": 6.96792632581772e-05, "loss": 0.0744, "step": 4779 }, { "epoch": 9.091773656680932, "grad_norm": 0.1460658758878708, "learning_rate": 6.967291203556685e-05, "loss": 0.054, "step": 4780 }, { "epoch": 9.093675701378983, "grad_norm": 0.1995638608932495, "learning_rate": 6.966656081295649e-05, "loss": 0.0617, "step": 4781 }, { "epoch": 9.095577746077034, "grad_norm": 0.2675533890724182, "learning_rate": 6.966020959034614e-05, "loss": 0.0667, "step": 4782 }, { "epoch": 9.097479790775083, "grad_norm": 0.3835360109806061, "learning_rate": 6.96538583677358e-05, "loss": 0.0763, "step": 4783 }, { "epoch": 9.099381835473134, "grad_norm": 0.28690552711486816, "learning_rate": 6.964750714512543e-05, "loss": 0.0822, "step": 4784 }, { "epoch": 9.101283880171184, "grad_norm": 0.18676474690437317, "learning_rate": 6.964115592251509e-05, "loss": 0.0521, "step": 4785 }, { "epoch": 9.103185924869235, "grad_norm": 0.28001606464385986, "learning_rate": 6.963480469990474e-05, "loss": 0.0596, "step": 4786 }, { "epoch": 9.105087969567284, "grad_norm": 0.34591495990753174, "learning_rate": 6.962845347729439e-05, "loss": 0.0739, "step": 4787 }, { "epoch": 9.106990014265335, "grad_norm": 0.2598930299282074, "learning_rate": 6.962210225468403e-05, "loss": 0.0648, "step": 4788 }, { "epoch": 9.108892058963386, "grad_norm": 0.17923042178153992, "learning_rate": 6.961575103207367e-05, "loss": 0.0624, "step": 4789 }, { "epoch": 9.110794103661435, "grad_norm": 0.30225229263305664, "learning_rate": 6.960939980946333e-05, "loss": 0.0735, "step": 4790 }, { "epoch": 9.112696148359486, "grad_norm": 0.20305217802524567, "learning_rate": 6.960304858685297e-05, "loss": 0.0654, "step": 4791 }, { "epoch": 9.114598193057537, "grad_norm": 0.24463124573230743, "learning_rate": 6.959669736424262e-05, "loss": 0.0885, "step": 4792 }, { "epoch": 9.116500237755588, "grad_norm": 0.2814115881919861, "learning_rate": 6.959034614163227e-05, "loss": 0.0716, "step": 4793 }, { "epoch": 9.118402282453637, "grad_norm": 0.1762290596961975, "learning_rate": 6.958399491902191e-05, "loss": 0.0552, "step": 4794 }, { "epoch": 9.120304327151688, "grad_norm": 0.30058228969573975, "learning_rate": 6.957764369641156e-05, "loss": 0.0728, "step": 4795 }, { "epoch": 9.122206371849739, "grad_norm": 0.25093328952789307, "learning_rate": 6.95712924738012e-05, "loss": 0.0598, "step": 4796 }, { "epoch": 9.12410841654779, "grad_norm": 0.2654763460159302, "learning_rate": 6.956494125119087e-05, "loss": 0.0685, "step": 4797 }, { "epoch": 9.126010461245839, "grad_norm": 0.22402413189411163, "learning_rate": 6.95585900285805e-05, "loss": 0.0561, "step": 4798 }, { "epoch": 9.12791250594389, "grad_norm": 0.22796908020973206, "learning_rate": 6.955223880597014e-05, "loss": 0.0689, "step": 4799 }, { "epoch": 9.12981455064194, "grad_norm": 0.26183420419692993, "learning_rate": 6.954588758335981e-05, "loss": 0.0693, "step": 4800 }, { "epoch": 9.13171659533999, "grad_norm": 0.21732604503631592, "learning_rate": 6.953953636074945e-05, "loss": 0.0572, "step": 4801 }, { "epoch": 9.13361864003804, "grad_norm": 0.244783416390419, "learning_rate": 6.953318513813909e-05, "loss": 0.0714, "step": 4802 }, { "epoch": 9.135520684736091, "grad_norm": 0.25078529119491577, "learning_rate": 6.952683391552874e-05, "loss": 0.0651, "step": 4803 }, { "epoch": 9.137422729434142, "grad_norm": 0.2012512981891632, "learning_rate": 6.952048269291839e-05, "loss": 0.0621, "step": 4804 }, { "epoch": 9.139324774132191, "grad_norm": 0.26933446526527405, "learning_rate": 6.951413147030804e-05, "loss": 0.0636, "step": 4805 }, { "epoch": 9.141226818830242, "grad_norm": 0.18506479263305664, "learning_rate": 6.950778024769768e-05, "loss": 0.0628, "step": 4806 }, { "epoch": 9.143128863528293, "grad_norm": 0.2639950215816498, "learning_rate": 6.950142902508733e-05, "loss": 0.0595, "step": 4807 }, { "epoch": 9.145030908226344, "grad_norm": 0.2577735483646393, "learning_rate": 6.949507780247698e-05, "loss": 0.0602, "step": 4808 }, { "epoch": 9.146932952924393, "grad_norm": 0.20550990104675293, "learning_rate": 6.948872657986662e-05, "loss": 0.0731, "step": 4809 }, { "epoch": 9.148834997622444, "grad_norm": 0.18981294333934784, "learning_rate": 6.948237535725627e-05, "loss": 0.0567, "step": 4810 }, { "epoch": 9.150737042320495, "grad_norm": 0.2644405961036682, "learning_rate": 6.947602413464593e-05, "loss": 0.0631, "step": 4811 }, { "epoch": 9.152639087018544, "grad_norm": 0.17926043272018433, "learning_rate": 6.946967291203556e-05, "loss": 0.0567, "step": 4812 }, { "epoch": 9.154541131716595, "grad_norm": 0.2050900012254715, "learning_rate": 6.946332168942522e-05, "loss": 0.0968, "step": 4813 }, { "epoch": 9.156443176414646, "grad_norm": 0.29818764328956604, "learning_rate": 6.945697046681487e-05, "loss": 0.0643, "step": 4814 }, { "epoch": 9.158345221112697, "grad_norm": 0.24317260086536407, "learning_rate": 6.945061924420452e-05, "loss": 0.0799, "step": 4815 }, { "epoch": 9.160247265810746, "grad_norm": 0.16913026571273804, "learning_rate": 6.944426802159416e-05, "loss": 0.0799, "step": 4816 }, { "epoch": 9.162149310508797, "grad_norm": 0.14065365493297577, "learning_rate": 6.943791679898381e-05, "loss": 0.0666, "step": 4817 }, { "epoch": 9.164051355206848, "grad_norm": 0.16402816772460938, "learning_rate": 6.943156557637346e-05, "loss": 0.0589, "step": 4818 }, { "epoch": 9.165953399904899, "grad_norm": 0.2175387740135193, "learning_rate": 6.94252143537631e-05, "loss": 0.0647, "step": 4819 }, { "epoch": 9.167855444602948, "grad_norm": 0.3297087848186493, "learning_rate": 6.941886313115275e-05, "loss": 0.0875, "step": 4820 }, { "epoch": 9.169757489300999, "grad_norm": 0.14931192994117737, "learning_rate": 6.94125119085424e-05, "loss": 0.058, "step": 4821 }, { "epoch": 9.17165953399905, "grad_norm": 0.21912738680839539, "learning_rate": 6.940616068593204e-05, "loss": 0.0648, "step": 4822 }, { "epoch": 9.173561578697099, "grad_norm": 0.27302685379981995, "learning_rate": 6.939980946332169e-05, "loss": 0.0711, "step": 4823 }, { "epoch": 9.17546362339515, "grad_norm": 0.23038659989833832, "learning_rate": 6.939345824071134e-05, "loss": 0.0741, "step": 4824 }, { "epoch": 9.1773656680932, "grad_norm": 0.24258142709732056, "learning_rate": 6.938710701810098e-05, "loss": 0.0857, "step": 4825 }, { "epoch": 9.179267712791251, "grad_norm": 0.2215842455625534, "learning_rate": 6.938075579549064e-05, "loss": 0.0856, "step": 4826 }, { "epoch": 9.1811697574893, "grad_norm": 0.2076491415500641, "learning_rate": 6.937440457288029e-05, "loss": 0.0711, "step": 4827 }, { "epoch": 9.183071802187351, "grad_norm": 0.4951212704181671, "learning_rate": 6.936805335026994e-05, "loss": 0.1213, "step": 4828 }, { "epoch": 9.184973846885402, "grad_norm": 0.1944156438112259, "learning_rate": 6.936170212765958e-05, "loss": 0.0682, "step": 4829 }, { "epoch": 9.186875891583453, "grad_norm": 0.17844007909297943, "learning_rate": 6.935535090504922e-05, "loss": 0.0624, "step": 4830 }, { "epoch": 9.188777936281502, "grad_norm": 0.47420743107795715, "learning_rate": 6.934899968243888e-05, "loss": 0.0875, "step": 4831 }, { "epoch": 9.190679980979553, "grad_norm": 0.2568189799785614, "learning_rate": 6.934264845982852e-05, "loss": 0.0715, "step": 4832 }, { "epoch": 9.192582025677604, "grad_norm": 0.16426631808280945, "learning_rate": 6.933629723721817e-05, "loss": 0.0776, "step": 4833 }, { "epoch": 9.194484070375653, "grad_norm": 0.2905197739601135, "learning_rate": 6.932994601460782e-05, "loss": 0.0875, "step": 4834 }, { "epoch": 9.196386115073704, "grad_norm": 0.1947975754737854, "learning_rate": 6.932359479199746e-05, "loss": 0.0528, "step": 4835 }, { "epoch": 9.198288159771755, "grad_norm": 0.24473321437835693, "learning_rate": 6.931724356938711e-05, "loss": 0.047, "step": 4836 }, { "epoch": 9.200190204469806, "grad_norm": 0.2849128544330597, "learning_rate": 6.931089234677675e-05, "loss": 0.0708, "step": 4837 }, { "epoch": 9.202092249167855, "grad_norm": 0.3052026033401489, "learning_rate": 6.93045411241664e-05, "loss": 0.0761, "step": 4838 }, { "epoch": 9.203994293865906, "grad_norm": 0.1367628425359726, "learning_rate": 6.929818990155605e-05, "loss": 0.0465, "step": 4839 }, { "epoch": 9.205896338563957, "grad_norm": 0.2635146975517273, "learning_rate": 6.929183867894569e-05, "loss": 0.0715, "step": 4840 }, { "epoch": 9.207798383262007, "grad_norm": 0.21499952673912048, "learning_rate": 6.928548745633536e-05, "loss": 0.0593, "step": 4841 }, { "epoch": 9.209700427960057, "grad_norm": 0.20185910165309906, "learning_rate": 6.9279136233725e-05, "loss": 0.0777, "step": 4842 }, { "epoch": 9.211602472658107, "grad_norm": 0.20574524998664856, "learning_rate": 6.927278501111464e-05, "loss": 0.0682, "step": 4843 }, { "epoch": 9.213504517356158, "grad_norm": 0.30292728543281555, "learning_rate": 6.926643378850429e-05, "loss": 0.0913, "step": 4844 }, { "epoch": 9.21540656205421, "grad_norm": 0.21002617478370667, "learning_rate": 6.926008256589394e-05, "loss": 0.0655, "step": 4845 }, { "epoch": 9.217308606752258, "grad_norm": 0.30659377574920654, "learning_rate": 6.925373134328359e-05, "loss": 0.0746, "step": 4846 }, { "epoch": 9.21921065145031, "grad_norm": 0.3024582266807556, "learning_rate": 6.924738012067323e-05, "loss": 0.0754, "step": 4847 }, { "epoch": 9.22111269614836, "grad_norm": 0.20387329161167145, "learning_rate": 6.924102889806288e-05, "loss": 0.0538, "step": 4848 }, { "epoch": 9.22301474084641, "grad_norm": 0.19420789182186127, "learning_rate": 6.923467767545253e-05, "loss": 0.0668, "step": 4849 }, { "epoch": 9.22491678554446, "grad_norm": 0.18014901876449585, "learning_rate": 6.922832645284217e-05, "loss": 0.0643, "step": 4850 }, { "epoch": 9.226818830242511, "grad_norm": 0.27916622161865234, "learning_rate": 6.922197523023182e-05, "loss": 0.097, "step": 4851 }, { "epoch": 9.228720874940562, "grad_norm": 0.38052618503570557, "learning_rate": 6.921562400762147e-05, "loss": 0.0773, "step": 4852 }, { "epoch": 9.230622919638611, "grad_norm": 0.2899182438850403, "learning_rate": 6.920927278501111e-05, "loss": 0.0739, "step": 4853 }, { "epoch": 9.232524964336662, "grad_norm": 0.16164492070674896, "learning_rate": 6.920292156240076e-05, "loss": 0.0683, "step": 4854 }, { "epoch": 9.234427009034713, "grad_norm": 0.38230976462364197, "learning_rate": 6.919657033979042e-05, "loss": 0.0665, "step": 4855 }, { "epoch": 9.236329053732764, "grad_norm": 0.21252687275409698, "learning_rate": 6.919021911718005e-05, "loss": 0.0693, "step": 4856 }, { "epoch": 9.238231098430813, "grad_norm": 0.31365564465522766, "learning_rate": 6.91838678945697e-05, "loss": 0.0898, "step": 4857 }, { "epoch": 9.240133143128864, "grad_norm": 0.25702884793281555, "learning_rate": 6.917751667195936e-05, "loss": 0.0934, "step": 4858 }, { "epoch": 9.242035187826914, "grad_norm": 0.17167243361473083, "learning_rate": 6.917116544934901e-05, "loss": 0.0664, "step": 4859 }, { "epoch": 9.243937232524964, "grad_norm": 0.2831684648990631, "learning_rate": 6.916481422673865e-05, "loss": 0.0836, "step": 4860 }, { "epoch": 9.245839277223014, "grad_norm": 0.32651644945144653, "learning_rate": 6.915846300412829e-05, "loss": 0.0855, "step": 4861 }, { "epoch": 9.247741321921065, "grad_norm": 0.5113350749015808, "learning_rate": 6.915211178151795e-05, "loss": 0.097, "step": 4862 }, { "epoch": 9.249643366619116, "grad_norm": 0.23918244242668152, "learning_rate": 6.914576055890759e-05, "loss": 0.0676, "step": 4863 }, { "epoch": 9.251545411317165, "grad_norm": 0.24368615448474884, "learning_rate": 6.913940933629724e-05, "loss": 0.0679, "step": 4864 }, { "epoch": 9.253447456015216, "grad_norm": 0.19113053381443024, "learning_rate": 6.91330581136869e-05, "loss": 0.0661, "step": 4865 }, { "epoch": 9.255349500713267, "grad_norm": 0.3215082585811615, "learning_rate": 6.912670689107653e-05, "loss": 0.0782, "step": 4866 }, { "epoch": 9.257251545411318, "grad_norm": 0.18558810651302338, "learning_rate": 6.912035566846618e-05, "loss": 0.0771, "step": 4867 }, { "epoch": 9.259153590109367, "grad_norm": 0.25669926404953003, "learning_rate": 6.911400444585582e-05, "loss": 0.0877, "step": 4868 }, { "epoch": 9.261055634807418, "grad_norm": 0.37997862696647644, "learning_rate": 6.910765322324549e-05, "loss": 0.0919, "step": 4869 }, { "epoch": 9.262957679505469, "grad_norm": 0.27809038758277893, "learning_rate": 6.910130200063513e-05, "loss": 0.0598, "step": 4870 }, { "epoch": 9.264859724203518, "grad_norm": 0.1689881980419159, "learning_rate": 6.909495077802476e-05, "loss": 0.0479, "step": 4871 }, { "epoch": 9.266761768901569, "grad_norm": 0.14065071940422058, "learning_rate": 6.908859955541443e-05, "loss": 0.0515, "step": 4872 }, { "epoch": 9.26866381359962, "grad_norm": 0.3398038148880005, "learning_rate": 6.908224833280407e-05, "loss": 0.1203, "step": 4873 }, { "epoch": 9.27056585829767, "grad_norm": 0.21026046574115753, "learning_rate": 6.90758971101937e-05, "loss": 0.0633, "step": 4874 }, { "epoch": 9.27246790299572, "grad_norm": 0.16988354921340942, "learning_rate": 6.906954588758336e-05, "loss": 0.0663, "step": 4875 }, { "epoch": 9.27436994769377, "grad_norm": 0.18398746848106384, "learning_rate": 6.906319466497301e-05, "loss": 0.0744, "step": 4876 }, { "epoch": 9.276271992391822, "grad_norm": 0.20699293911457062, "learning_rate": 6.905684344236266e-05, "loss": 0.0801, "step": 4877 }, { "epoch": 9.278174037089872, "grad_norm": 0.2684108316898346, "learning_rate": 6.90504922197523e-05, "loss": 0.0823, "step": 4878 }, { "epoch": 9.280076081787922, "grad_norm": 0.3421522378921509, "learning_rate": 6.904414099714195e-05, "loss": 0.0975, "step": 4879 }, { "epoch": 9.281978126485972, "grad_norm": 0.216671422123909, "learning_rate": 6.90377897745316e-05, "loss": 0.0657, "step": 4880 }, { "epoch": 9.283880171184023, "grad_norm": 0.23740790784358978, "learning_rate": 6.903143855192124e-05, "loss": 0.0921, "step": 4881 }, { "epoch": 9.285782215882072, "grad_norm": 0.2520216107368469, "learning_rate": 6.90250873293109e-05, "loss": 0.0746, "step": 4882 }, { "epoch": 9.287684260580123, "grad_norm": 0.22703856229782104, "learning_rate": 6.901873610670055e-05, "loss": 0.0669, "step": 4883 }, { "epoch": 9.289586305278174, "grad_norm": 0.20447954535484314, "learning_rate": 6.901238488409018e-05, "loss": 0.0625, "step": 4884 }, { "epoch": 9.291488349976225, "grad_norm": 0.2351393699645996, "learning_rate": 6.900603366147984e-05, "loss": 0.0561, "step": 4885 }, { "epoch": 9.293390394674274, "grad_norm": 0.19731055200099945, "learning_rate": 6.899968243886949e-05, "loss": 0.0675, "step": 4886 }, { "epoch": 9.295292439372325, "grad_norm": 0.29714658856391907, "learning_rate": 6.899333121625914e-05, "loss": 0.0762, "step": 4887 }, { "epoch": 9.297194484070376, "grad_norm": 0.37264585494995117, "learning_rate": 6.898697999364878e-05, "loss": 0.0869, "step": 4888 }, { "epoch": 9.299096528768427, "grad_norm": 0.16094455122947693, "learning_rate": 6.898062877103843e-05, "loss": 0.0754, "step": 4889 }, { "epoch": 9.300998573466476, "grad_norm": 0.3849022686481476, "learning_rate": 6.897427754842808e-05, "loss": 0.1015, "step": 4890 }, { "epoch": 9.302900618164527, "grad_norm": 0.2579275667667389, "learning_rate": 6.896792632581772e-05, "loss": 0.0725, "step": 4891 }, { "epoch": 9.304802662862578, "grad_norm": 0.2276908904314041, "learning_rate": 6.896157510320737e-05, "loss": 0.0586, "step": 4892 }, { "epoch": 9.306704707560627, "grad_norm": 0.2620905637741089, "learning_rate": 6.895522388059702e-05, "loss": 0.0504, "step": 4893 }, { "epoch": 9.308606752258678, "grad_norm": 0.21400023996829987, "learning_rate": 6.894887265798666e-05, "loss": 0.0581, "step": 4894 }, { "epoch": 9.310508796956729, "grad_norm": 0.3381659984588623, "learning_rate": 6.894252143537631e-05, "loss": 0.0745, "step": 4895 }, { "epoch": 9.31241084165478, "grad_norm": 0.3350960910320282, "learning_rate": 6.893617021276597e-05, "loss": 0.0796, "step": 4896 }, { "epoch": 9.314312886352829, "grad_norm": 0.14392538368701935, "learning_rate": 6.89298189901556e-05, "loss": 0.0615, "step": 4897 }, { "epoch": 9.31621493105088, "grad_norm": 0.19645485281944275, "learning_rate": 6.892346776754526e-05, "loss": 0.0698, "step": 4898 }, { "epoch": 9.31811697574893, "grad_norm": 0.2611951231956482, "learning_rate": 6.89171165449349e-05, "loss": 0.0731, "step": 4899 }, { "epoch": 9.320019020446981, "grad_norm": 0.20194761455059052, "learning_rate": 6.891076532232456e-05, "loss": 0.0649, "step": 4900 }, { "epoch": 9.32192106514503, "grad_norm": 0.17792201042175293, "learning_rate": 6.89044140997142e-05, "loss": 0.0809, "step": 4901 }, { "epoch": 9.323823109843081, "grad_norm": 0.26867902278900146, "learning_rate": 6.889806287710384e-05, "loss": 0.0715, "step": 4902 }, { "epoch": 9.325725154541132, "grad_norm": 0.2845976948738098, "learning_rate": 6.88917116544935e-05, "loss": 0.0738, "step": 4903 }, { "epoch": 9.327627199239181, "grad_norm": 0.39770546555519104, "learning_rate": 6.888536043188314e-05, "loss": 0.0905, "step": 4904 }, { "epoch": 9.329529243937232, "grad_norm": 0.27722543478012085, "learning_rate": 6.887900920927279e-05, "loss": 0.063, "step": 4905 }, { "epoch": 9.331431288635283, "grad_norm": 0.23620283603668213, "learning_rate": 6.887265798666243e-05, "loss": 0.0701, "step": 4906 }, { "epoch": 9.333333333333334, "grad_norm": 0.1973799765110016, "learning_rate": 6.886630676405208e-05, "loss": 0.0704, "step": 4907 }, { "epoch": 9.335235378031383, "grad_norm": 0.29582053422927856, "learning_rate": 6.885995554144173e-05, "loss": 0.0702, "step": 4908 }, { "epoch": 9.337137422729434, "grad_norm": 0.32822665572166443, "learning_rate": 6.885360431883137e-05, "loss": 0.0773, "step": 4909 }, { "epoch": 9.339039467427485, "grad_norm": 0.2938401401042938, "learning_rate": 6.884725309622102e-05, "loss": 0.0686, "step": 4910 }, { "epoch": 9.340941512125536, "grad_norm": 0.23710989952087402, "learning_rate": 6.884090187361068e-05, "loss": 0.071, "step": 4911 }, { "epoch": 9.342843556823585, "grad_norm": 0.37398549914360046, "learning_rate": 6.883455065100031e-05, "loss": 0.088, "step": 4912 }, { "epoch": 9.344745601521636, "grad_norm": 0.3170340657234192, "learning_rate": 6.882819942838997e-05, "loss": 0.0736, "step": 4913 }, { "epoch": 9.346647646219687, "grad_norm": 0.21240757405757904, "learning_rate": 6.882184820577962e-05, "loss": 0.0569, "step": 4914 }, { "epoch": 9.348549690917736, "grad_norm": 0.2711277902126312, "learning_rate": 6.881549698316926e-05, "loss": 0.0644, "step": 4915 }, { "epoch": 9.350451735615787, "grad_norm": 0.3614808917045593, "learning_rate": 6.880914576055891e-05, "loss": 0.0888, "step": 4916 }, { "epoch": 9.352353780313837, "grad_norm": 0.2747959792613983, "learning_rate": 6.880279453794856e-05, "loss": 0.0933, "step": 4917 }, { "epoch": 9.354255825011888, "grad_norm": 0.20947587490081787, "learning_rate": 6.879644331533821e-05, "loss": 0.0658, "step": 4918 }, { "epoch": 9.356157869709937, "grad_norm": 0.19765761494636536, "learning_rate": 6.879009209272785e-05, "loss": 0.0739, "step": 4919 }, { "epoch": 9.358059914407988, "grad_norm": 0.22963549196720123, "learning_rate": 6.87837408701175e-05, "loss": 0.0485, "step": 4920 }, { "epoch": 9.35996195910604, "grad_norm": 0.22210000455379486, "learning_rate": 6.877738964750715e-05, "loss": 0.0731, "step": 4921 }, { "epoch": 9.36186400380409, "grad_norm": 0.22614774107933044, "learning_rate": 6.877103842489679e-05, "loss": 0.1153, "step": 4922 }, { "epoch": 9.36376604850214, "grad_norm": 0.20579391717910767, "learning_rate": 6.876468720228644e-05, "loss": 0.0631, "step": 4923 }, { "epoch": 9.36566809320019, "grad_norm": 0.25111064314842224, "learning_rate": 6.87583359796761e-05, "loss": 0.0606, "step": 4924 }, { "epoch": 9.367570137898241, "grad_norm": 0.20417334139347076, "learning_rate": 6.875198475706573e-05, "loss": 0.0904, "step": 4925 }, { "epoch": 9.36947218259629, "grad_norm": 0.21653708815574646, "learning_rate": 6.874563353445539e-05, "loss": 0.0585, "step": 4926 }, { "epoch": 9.371374227294341, "grad_norm": 0.28104594349861145, "learning_rate": 6.873928231184504e-05, "loss": 0.064, "step": 4927 }, { "epoch": 9.373276271992392, "grad_norm": 0.275823712348938, "learning_rate": 6.873293108923468e-05, "loss": 0.0749, "step": 4928 }, { "epoch": 9.375178316690443, "grad_norm": 0.16498956084251404, "learning_rate": 6.872657986662433e-05, "loss": 0.0601, "step": 4929 }, { "epoch": 9.377080361388492, "grad_norm": 0.278196781873703, "learning_rate": 6.872022864401398e-05, "loss": 0.0765, "step": 4930 }, { "epoch": 9.378982406086543, "grad_norm": 0.2505214512348175, "learning_rate": 6.871387742140363e-05, "loss": 0.0612, "step": 4931 }, { "epoch": 9.380884450784594, "grad_norm": 0.26770439743995667, "learning_rate": 6.870752619879327e-05, "loss": 0.0572, "step": 4932 }, { "epoch": 9.382786495482645, "grad_norm": 0.24536767601966858, "learning_rate": 6.870117497618291e-05, "loss": 0.0579, "step": 4933 }, { "epoch": 9.384688540180694, "grad_norm": 0.26408347487449646, "learning_rate": 6.869482375357257e-05, "loss": 0.0767, "step": 4934 }, { "epoch": 9.386590584878745, "grad_norm": 0.2711765766143799, "learning_rate": 6.868847253096221e-05, "loss": 0.0797, "step": 4935 }, { "epoch": 9.388492629576795, "grad_norm": 0.2707313299179077, "learning_rate": 6.868212130835186e-05, "loss": 0.0687, "step": 4936 }, { "epoch": 9.390394674274846, "grad_norm": 0.21455048024654388, "learning_rate": 6.867577008574152e-05, "loss": 0.0737, "step": 4937 }, { "epoch": 9.392296718972895, "grad_norm": 0.24911226332187653, "learning_rate": 6.866941886313115e-05, "loss": 0.0589, "step": 4938 }, { "epoch": 9.394198763670946, "grad_norm": 0.17448081076145172, "learning_rate": 6.86630676405208e-05, "loss": 0.0682, "step": 4939 }, { "epoch": 9.396100808368997, "grad_norm": 0.3200829327106476, "learning_rate": 6.865671641791044e-05, "loss": 0.0846, "step": 4940 }, { "epoch": 9.398002853067046, "grad_norm": 0.18561430275440216, "learning_rate": 6.865036519530011e-05, "loss": 0.0685, "step": 4941 }, { "epoch": 9.399904897765097, "grad_norm": 0.2448703497648239, "learning_rate": 6.864401397268975e-05, "loss": 0.0912, "step": 4942 }, { "epoch": 9.401806942463148, "grad_norm": 0.19484390318393707, "learning_rate": 6.863766275007939e-05, "loss": 0.0543, "step": 4943 }, { "epoch": 9.403708987161199, "grad_norm": 0.1991858035326004, "learning_rate": 6.863131152746905e-05, "loss": 0.0716, "step": 4944 }, { "epoch": 9.405611031859248, "grad_norm": 0.2221297323703766, "learning_rate": 6.862496030485869e-05, "loss": 0.0621, "step": 4945 }, { "epoch": 9.407513076557299, "grad_norm": 0.26933857798576355, "learning_rate": 6.861860908224833e-05, "loss": 0.0525, "step": 4946 }, { "epoch": 9.40941512125535, "grad_norm": 0.275558203458786, "learning_rate": 6.861225785963798e-05, "loss": 0.0717, "step": 4947 }, { "epoch": 9.4113171659534, "grad_norm": 0.24430836737155914, "learning_rate": 6.860590663702763e-05, "loss": 0.0624, "step": 4948 }, { "epoch": 9.41321921065145, "grad_norm": 0.19468329846858978, "learning_rate": 6.859955541441728e-05, "loss": 0.0546, "step": 4949 }, { "epoch": 9.4151212553495, "grad_norm": 0.1639413833618164, "learning_rate": 6.859320419180692e-05, "loss": 0.0617, "step": 4950 }, { "epoch": 9.417023300047552, "grad_norm": 0.25814270973205566, "learning_rate": 6.858685296919657e-05, "loss": 0.0654, "step": 4951 }, { "epoch": 9.4189253447456, "grad_norm": 0.18765553832054138, "learning_rate": 6.858050174658623e-05, "loss": 0.0757, "step": 4952 }, { "epoch": 9.420827389443652, "grad_norm": 0.17281563580036163, "learning_rate": 6.857415052397586e-05, "loss": 0.063, "step": 4953 }, { "epoch": 9.422729434141702, "grad_norm": 0.1971518099308014, "learning_rate": 6.856779930136552e-05, "loss": 0.0695, "step": 4954 }, { "epoch": 9.424631478839753, "grad_norm": 0.20317022502422333, "learning_rate": 6.856144807875517e-05, "loss": 0.0531, "step": 4955 }, { "epoch": 9.426533523537802, "grad_norm": 0.2848222851753235, "learning_rate": 6.85550968561448e-05, "loss": 0.067, "step": 4956 }, { "epoch": 9.428435568235853, "grad_norm": 0.20473995804786682, "learning_rate": 6.854874563353446e-05, "loss": 0.0723, "step": 4957 }, { "epoch": 9.430337612933904, "grad_norm": 0.20673026144504547, "learning_rate": 6.854239441092411e-05, "loss": 0.054, "step": 4958 }, { "epoch": 9.432239657631955, "grad_norm": 0.20065629482269287, "learning_rate": 6.853604318831376e-05, "loss": 0.0723, "step": 4959 }, { "epoch": 9.434141702330004, "grad_norm": 0.2596694529056549, "learning_rate": 6.85296919657034e-05, "loss": 0.045, "step": 4960 }, { "epoch": 9.436043747028055, "grad_norm": 0.22267058491706848, "learning_rate": 6.852334074309305e-05, "loss": 0.0486, "step": 4961 }, { "epoch": 9.437945791726106, "grad_norm": 0.18863868713378906, "learning_rate": 6.85169895204827e-05, "loss": 0.0638, "step": 4962 }, { "epoch": 9.439847836424155, "grad_norm": 0.23035591840744019, "learning_rate": 6.851063829787234e-05, "loss": 0.0731, "step": 4963 }, { "epoch": 9.441749881122206, "grad_norm": 0.19516980648040771, "learning_rate": 6.850428707526198e-05, "loss": 0.0589, "step": 4964 }, { "epoch": 9.443651925820257, "grad_norm": 0.22958572208881378, "learning_rate": 6.849793585265164e-05, "loss": 0.0682, "step": 4965 }, { "epoch": 9.445553970518308, "grad_norm": 0.33866357803344727, "learning_rate": 6.849158463004128e-05, "loss": 0.089, "step": 4966 }, { "epoch": 9.447456015216357, "grad_norm": 0.27389445900917053, "learning_rate": 6.848523340743093e-05, "loss": 0.0699, "step": 4967 }, { "epoch": 9.449358059914408, "grad_norm": 0.28872737288475037, "learning_rate": 6.847888218482059e-05, "loss": 0.0646, "step": 4968 }, { "epoch": 9.451260104612459, "grad_norm": 0.22543446719646454, "learning_rate": 6.847253096221023e-05, "loss": 0.0639, "step": 4969 }, { "epoch": 9.45316214931051, "grad_norm": 0.23369933664798737, "learning_rate": 6.846617973959988e-05, "loss": 0.0697, "step": 4970 }, { "epoch": 9.455064194008559, "grad_norm": 0.3129672110080719, "learning_rate": 6.845982851698952e-05, "loss": 0.0638, "step": 4971 }, { "epoch": 9.45696623870661, "grad_norm": 0.5016154646873474, "learning_rate": 6.845347729437918e-05, "loss": 0.0995, "step": 4972 }, { "epoch": 9.45886828340466, "grad_norm": 0.2865123748779297, "learning_rate": 6.844712607176882e-05, "loss": 0.0482, "step": 4973 }, { "epoch": 9.46077032810271, "grad_norm": 0.23946310579776764, "learning_rate": 6.844077484915846e-05, "loss": 0.0602, "step": 4974 }, { "epoch": 9.46267237280076, "grad_norm": 0.3832375705242157, "learning_rate": 6.843442362654812e-05, "loss": 0.1025, "step": 4975 }, { "epoch": 9.464574417498811, "grad_norm": 0.2058791220188141, "learning_rate": 6.842807240393776e-05, "loss": 0.0605, "step": 4976 }, { "epoch": 9.466476462196862, "grad_norm": 0.24849610030651093, "learning_rate": 6.842172118132741e-05, "loss": 0.0699, "step": 4977 }, { "epoch": 9.468378506894911, "grad_norm": 0.2340250462293625, "learning_rate": 6.841536995871705e-05, "loss": 0.0606, "step": 4978 }, { "epoch": 9.470280551592962, "grad_norm": 0.2013719379901886, "learning_rate": 6.84090187361067e-05, "loss": 0.0658, "step": 4979 }, { "epoch": 9.472182596291013, "grad_norm": 0.24580608308315277, "learning_rate": 6.840266751349635e-05, "loss": 0.0722, "step": 4980 }, { "epoch": 9.474084640989064, "grad_norm": 0.23459140956401825, "learning_rate": 6.839631629088599e-05, "loss": 0.0578, "step": 4981 }, { "epoch": 9.475986685687113, "grad_norm": 0.2569440007209778, "learning_rate": 6.838996506827564e-05, "loss": 0.0775, "step": 4982 }, { "epoch": 9.477888730385164, "grad_norm": 0.3572919964790344, "learning_rate": 6.83836138456653e-05, "loss": 0.0848, "step": 4983 }, { "epoch": 9.479790775083215, "grad_norm": 0.252504825592041, "learning_rate": 6.837726262305493e-05, "loss": 0.0838, "step": 4984 }, { "epoch": 9.481692819781266, "grad_norm": 0.2865563929080963, "learning_rate": 6.837091140044459e-05, "loss": 0.07, "step": 4985 }, { "epoch": 9.483594864479315, "grad_norm": 0.32399818301200867, "learning_rate": 6.836456017783424e-05, "loss": 0.0794, "step": 4986 }, { "epoch": 9.485496909177366, "grad_norm": 0.1883072704076767, "learning_rate": 6.835820895522388e-05, "loss": 0.0694, "step": 4987 }, { "epoch": 9.487398953875417, "grad_norm": 0.3224576413631439, "learning_rate": 6.835185773261353e-05, "loss": 0.0734, "step": 4988 }, { "epoch": 9.489300998573466, "grad_norm": 0.2641156315803528, "learning_rate": 6.834550651000318e-05, "loss": 0.0738, "step": 4989 }, { "epoch": 9.491203043271517, "grad_norm": 0.3000323176383972, "learning_rate": 6.833915528739283e-05, "loss": 0.0646, "step": 4990 }, { "epoch": 9.493105087969568, "grad_norm": 0.20200790464878082, "learning_rate": 6.833280406478247e-05, "loss": 0.0732, "step": 4991 }, { "epoch": 9.495007132667618, "grad_norm": 0.13494519889354706, "learning_rate": 6.832645284217212e-05, "loss": 0.0533, "step": 4992 }, { "epoch": 9.496909177365668, "grad_norm": 0.22805655002593994, "learning_rate": 6.832010161956177e-05, "loss": 0.0506, "step": 4993 }, { "epoch": 9.498811222063718, "grad_norm": 0.2714557349681854, "learning_rate": 6.831375039695141e-05, "loss": 0.0798, "step": 4994 }, { "epoch": 9.50071326676177, "grad_norm": 0.2856943607330322, "learning_rate": 6.830739917434106e-05, "loss": 0.0755, "step": 4995 }, { "epoch": 9.50261531145982, "grad_norm": 0.39549219608306885, "learning_rate": 6.830104795173072e-05, "loss": 0.0854, "step": 4996 }, { "epoch": 9.50451735615787, "grad_norm": 0.24781033396720886, "learning_rate": 6.829469672912035e-05, "loss": 0.0784, "step": 4997 }, { "epoch": 9.50641940085592, "grad_norm": 0.2739943861961365, "learning_rate": 6.828834550651e-05, "loss": 0.0627, "step": 4998 }, { "epoch": 9.508321445553971, "grad_norm": 0.3200620710849762, "learning_rate": 6.828199428389966e-05, "loss": 0.1221, "step": 4999 }, { "epoch": 9.51022349025202, "grad_norm": 0.2922022342681885, "learning_rate": 6.82756430612893e-05, "loss": 0.0765, "step": 5000 }, { "epoch": 9.512125534950071, "grad_norm": 0.2247658371925354, "learning_rate": 6.826929183867895e-05, "loss": 0.0586, "step": 5001 }, { "epoch": 9.514027579648122, "grad_norm": 0.18211832642555237, "learning_rate": 6.82629406160686e-05, "loss": 0.0559, "step": 5002 }, { "epoch": 9.515929624346173, "grad_norm": 0.4612724483013153, "learning_rate": 6.825658939345825e-05, "loss": 0.1092, "step": 5003 }, { "epoch": 9.517831669044222, "grad_norm": 0.2759299874305725, "learning_rate": 6.825023817084789e-05, "loss": 0.0885, "step": 5004 }, { "epoch": 9.519733713742273, "grad_norm": 0.28181999921798706, "learning_rate": 6.824388694823753e-05, "loss": 0.0585, "step": 5005 }, { "epoch": 9.521635758440324, "grad_norm": 0.249153271317482, "learning_rate": 6.82375357256272e-05, "loss": 0.0625, "step": 5006 }, { "epoch": 9.523537803138375, "grad_norm": 0.23787033557891846, "learning_rate": 6.823118450301683e-05, "loss": 0.0599, "step": 5007 }, { "epoch": 9.525439847836424, "grad_norm": 0.30787795782089233, "learning_rate": 6.822483328040648e-05, "loss": 0.0652, "step": 5008 }, { "epoch": 9.527341892534475, "grad_norm": 0.21903198957443237, "learning_rate": 6.821848205779612e-05, "loss": 0.0689, "step": 5009 }, { "epoch": 9.529243937232525, "grad_norm": 0.2730168402194977, "learning_rate": 6.821213083518577e-05, "loss": 0.088, "step": 5010 }, { "epoch": 9.531145981930575, "grad_norm": 0.2813446819782257, "learning_rate": 6.820577961257543e-05, "loss": 0.0734, "step": 5011 }, { "epoch": 9.533048026628625, "grad_norm": 0.22539003193378448, "learning_rate": 6.819942838996506e-05, "loss": 0.0781, "step": 5012 }, { "epoch": 9.534950071326676, "grad_norm": 0.24514015018939972, "learning_rate": 6.819307716735473e-05, "loss": 0.0629, "step": 5013 }, { "epoch": 9.536852116024727, "grad_norm": 0.2931705415248871, "learning_rate": 6.818672594474437e-05, "loss": 0.0696, "step": 5014 }, { "epoch": 9.538754160722776, "grad_norm": 0.16477547585964203, "learning_rate": 6.8180374722134e-05, "loss": 0.0619, "step": 5015 }, { "epoch": 9.540656205420827, "grad_norm": 0.27690985798835754, "learning_rate": 6.817402349952366e-05, "loss": 0.0736, "step": 5016 }, { "epoch": 9.542558250118878, "grad_norm": 0.27963119745254517, "learning_rate": 6.816767227691331e-05, "loss": 0.0947, "step": 5017 }, { "epoch": 9.544460294816929, "grad_norm": 0.2638995945453644, "learning_rate": 6.816132105430295e-05, "loss": 0.0891, "step": 5018 }, { "epoch": 9.546362339514978, "grad_norm": 0.2374192625284195, "learning_rate": 6.81549698316926e-05, "loss": 0.0668, "step": 5019 }, { "epoch": 9.548264384213029, "grad_norm": 0.16655726730823517, "learning_rate": 6.814861860908225e-05, "loss": 0.0516, "step": 5020 }, { "epoch": 9.55016642891108, "grad_norm": 0.2931881844997406, "learning_rate": 6.81422673864719e-05, "loss": 0.08, "step": 5021 }, { "epoch": 9.552068473609129, "grad_norm": 0.257124125957489, "learning_rate": 6.813591616386154e-05, "loss": 0.0693, "step": 5022 }, { "epoch": 9.55397051830718, "grad_norm": 0.20207662880420685, "learning_rate": 6.81295649412512e-05, "loss": 0.0619, "step": 5023 }, { "epoch": 9.55587256300523, "grad_norm": 0.21767112612724304, "learning_rate": 6.812321371864085e-05, "loss": 0.0612, "step": 5024 }, { "epoch": 9.557774607703282, "grad_norm": 0.21762673556804657, "learning_rate": 6.811686249603048e-05, "loss": 0.073, "step": 5025 }, { "epoch": 9.55967665240133, "grad_norm": 0.2587988078594208, "learning_rate": 6.811051127342014e-05, "loss": 0.0646, "step": 5026 }, { "epoch": 9.561578697099382, "grad_norm": 0.27404069900512695, "learning_rate": 6.810416005080979e-05, "loss": 0.0675, "step": 5027 }, { "epoch": 9.563480741797433, "grad_norm": 0.2376728504896164, "learning_rate": 6.809780882819943e-05, "loss": 0.0646, "step": 5028 }, { "epoch": 9.565382786495483, "grad_norm": 0.20966622233390808, "learning_rate": 6.809145760558908e-05, "loss": 0.0772, "step": 5029 }, { "epoch": 9.567284831193533, "grad_norm": 0.3153587579727173, "learning_rate": 6.808510638297873e-05, "loss": 0.0896, "step": 5030 }, { "epoch": 9.569186875891583, "grad_norm": 0.4038897156715393, "learning_rate": 6.807875516036838e-05, "loss": 0.1006, "step": 5031 }, { "epoch": 9.571088920589634, "grad_norm": 0.16164061427116394, "learning_rate": 6.807240393775802e-05, "loss": 0.0567, "step": 5032 }, { "epoch": 9.572990965287683, "grad_norm": 0.33663511276245117, "learning_rate": 6.806605271514767e-05, "loss": 0.0767, "step": 5033 }, { "epoch": 9.574893009985734, "grad_norm": 0.2418556958436966, "learning_rate": 6.805970149253732e-05, "loss": 0.0766, "step": 5034 }, { "epoch": 9.576795054683785, "grad_norm": 0.26614710688591003, "learning_rate": 6.805335026992696e-05, "loss": 0.0691, "step": 5035 }, { "epoch": 9.578697099381836, "grad_norm": 0.35526224970817566, "learning_rate": 6.80469990473166e-05, "loss": 0.0751, "step": 5036 }, { "epoch": 9.580599144079885, "grad_norm": 0.16759097576141357, "learning_rate": 6.804064782470627e-05, "loss": 0.0548, "step": 5037 }, { "epoch": 9.582501188777936, "grad_norm": 0.3385069668292999, "learning_rate": 6.80342966020959e-05, "loss": 0.0734, "step": 5038 }, { "epoch": 9.584403233475987, "grad_norm": 0.24936793744564056, "learning_rate": 6.802794537948556e-05, "loss": 0.0846, "step": 5039 }, { "epoch": 9.586305278174038, "grad_norm": 0.22732244431972504, "learning_rate": 6.802159415687521e-05, "loss": 0.0588, "step": 5040 }, { "epoch": 9.588207322872087, "grad_norm": 0.2636946141719818, "learning_rate": 6.801524293426485e-05, "loss": 0.0652, "step": 5041 }, { "epoch": 9.590109367570138, "grad_norm": 0.25417861342430115, "learning_rate": 6.80088917116545e-05, "loss": 0.0701, "step": 5042 }, { "epoch": 9.592011412268189, "grad_norm": 0.2900865375995636, "learning_rate": 6.800254048904414e-05, "loss": 0.1018, "step": 5043 }, { "epoch": 9.593913456966238, "grad_norm": 0.21319755911827087, "learning_rate": 6.79961892664338e-05, "loss": 0.0673, "step": 5044 }, { "epoch": 9.595815501664289, "grad_norm": 0.4287901222705841, "learning_rate": 6.798983804382344e-05, "loss": 0.1022, "step": 5045 }, { "epoch": 9.59771754636234, "grad_norm": 0.22494946420192719, "learning_rate": 6.798348682121308e-05, "loss": 0.0494, "step": 5046 }, { "epoch": 9.59961959106039, "grad_norm": 0.3113909065723419, "learning_rate": 6.797713559860274e-05, "loss": 0.2004, "step": 5047 }, { "epoch": 9.60152163575844, "grad_norm": 0.3399859368801117, "learning_rate": 6.797078437599238e-05, "loss": 0.0865, "step": 5048 }, { "epoch": 9.60342368045649, "grad_norm": 0.3913676142692566, "learning_rate": 6.796443315338203e-05, "loss": 0.0732, "step": 5049 }, { "epoch": 9.605325725154541, "grad_norm": 0.2704809308052063, "learning_rate": 6.795808193077167e-05, "loss": 0.0718, "step": 5050 }, { "epoch": 9.607227769852592, "grad_norm": 0.1767328977584839, "learning_rate": 6.795173070816132e-05, "loss": 0.0618, "step": 5051 }, { "epoch": 9.609129814550641, "grad_norm": 0.3268260061740875, "learning_rate": 6.794537948555098e-05, "loss": 0.081, "step": 5052 }, { "epoch": 9.611031859248692, "grad_norm": 0.24618151783943176, "learning_rate": 6.793902826294061e-05, "loss": 0.0791, "step": 5053 }, { "epoch": 9.612933903946743, "grad_norm": 0.3275710940361023, "learning_rate": 6.793267704033027e-05, "loss": 0.0838, "step": 5054 }, { "epoch": 9.614835948644792, "grad_norm": 0.2791629135608673, "learning_rate": 6.792632581771992e-05, "loss": 0.0446, "step": 5055 }, { "epoch": 9.616737993342843, "grad_norm": 0.2838995158672333, "learning_rate": 6.791997459510956e-05, "loss": 0.0627, "step": 5056 }, { "epoch": 9.618640038040894, "grad_norm": 0.32235971093177795, "learning_rate": 6.791362337249921e-05, "loss": 0.0832, "step": 5057 }, { "epoch": 9.620542082738945, "grad_norm": 0.3361624777317047, "learning_rate": 6.790727214988886e-05, "loss": 0.0731, "step": 5058 }, { "epoch": 9.622444127436994, "grad_norm": 0.31241574883461, "learning_rate": 6.79009209272785e-05, "loss": 0.0823, "step": 5059 }, { "epoch": 9.624346172135045, "grad_norm": 0.5286279916763306, "learning_rate": 6.789456970466815e-05, "loss": 0.0851, "step": 5060 }, { "epoch": 9.626248216833096, "grad_norm": 0.2209668606519699, "learning_rate": 6.78882184820578e-05, "loss": 0.0851, "step": 5061 }, { "epoch": 9.628150261531147, "grad_norm": 0.23965579271316528, "learning_rate": 6.788186725944745e-05, "loss": 0.0752, "step": 5062 }, { "epoch": 9.630052306229196, "grad_norm": 0.320737361907959, "learning_rate": 6.787551603683709e-05, "loss": 0.0671, "step": 5063 }, { "epoch": 9.631954350927247, "grad_norm": 0.23596954345703125, "learning_rate": 6.786916481422674e-05, "loss": 0.0661, "step": 5064 }, { "epoch": 9.633856395625298, "grad_norm": 0.24826346337795258, "learning_rate": 6.78628135916164e-05, "loss": 0.0673, "step": 5065 }, { "epoch": 9.635758440323347, "grad_norm": 0.19784359633922577, "learning_rate": 6.785646236900603e-05, "loss": 0.0644, "step": 5066 }, { "epoch": 9.637660485021398, "grad_norm": 0.2708280682563782, "learning_rate": 6.785011114639569e-05, "loss": 0.0962, "step": 5067 }, { "epoch": 9.639562529719448, "grad_norm": 0.22095151245594025, "learning_rate": 6.784375992378534e-05, "loss": 0.0792, "step": 5068 }, { "epoch": 9.6414645744175, "grad_norm": 0.3527117967605591, "learning_rate": 6.783740870117498e-05, "loss": 0.0867, "step": 5069 }, { "epoch": 9.643366619115548, "grad_norm": 0.27398914098739624, "learning_rate": 6.783105747856463e-05, "loss": 0.0707, "step": 5070 }, { "epoch": 9.6452686638136, "grad_norm": 0.251521497964859, "learning_rate": 6.782470625595428e-05, "loss": 0.0494, "step": 5071 }, { "epoch": 9.64717070851165, "grad_norm": 0.23466455936431885, "learning_rate": 6.781835503334392e-05, "loss": 0.0762, "step": 5072 }, { "epoch": 9.649072753209701, "grad_norm": 0.25730809569358826, "learning_rate": 6.781200381073357e-05, "loss": 0.0598, "step": 5073 }, { "epoch": 9.65097479790775, "grad_norm": 0.23233462870121002, "learning_rate": 6.780565258812321e-05, "loss": 0.0696, "step": 5074 }, { "epoch": 9.652876842605801, "grad_norm": 0.34221500158309937, "learning_rate": 6.779930136551287e-05, "loss": 0.0859, "step": 5075 }, { "epoch": 9.654778887303852, "grad_norm": 0.24130195379257202, "learning_rate": 6.779295014290251e-05, "loss": 0.0625, "step": 5076 }, { "epoch": 9.656680932001901, "grad_norm": 0.27201899886131287, "learning_rate": 6.778659892029215e-05, "loss": 0.0839, "step": 5077 }, { "epoch": 9.658582976699952, "grad_norm": 0.2817753255367279, "learning_rate": 6.778024769768181e-05, "loss": 0.0709, "step": 5078 }, { "epoch": 9.660485021398003, "grad_norm": 0.3388703465461731, "learning_rate": 6.777389647507145e-05, "loss": 0.0655, "step": 5079 }, { "epoch": 9.662387066096054, "grad_norm": 0.3778761625289917, "learning_rate": 6.77675452524611e-05, "loss": 0.0941, "step": 5080 }, { "epoch": 9.664289110794103, "grad_norm": 0.23203331232070923, "learning_rate": 6.776119402985074e-05, "loss": 0.0725, "step": 5081 }, { "epoch": 9.666191155492154, "grad_norm": 0.32718273997306824, "learning_rate": 6.77548428072404e-05, "loss": 0.061, "step": 5082 }, { "epoch": 9.668093200190205, "grad_norm": 0.21871528029441833, "learning_rate": 6.774849158463005e-05, "loss": 0.0615, "step": 5083 }, { "epoch": 9.669995244888256, "grad_norm": 0.20493032038211823, "learning_rate": 6.774214036201969e-05, "loss": 0.0708, "step": 5084 }, { "epoch": 9.671897289586305, "grad_norm": 0.2302158623933792, "learning_rate": 6.773578913940935e-05, "loss": 0.0857, "step": 5085 }, { "epoch": 9.673799334284356, "grad_norm": 0.2125237137079239, "learning_rate": 6.772943791679899e-05, "loss": 0.0888, "step": 5086 }, { "epoch": 9.675701378982406, "grad_norm": 0.2613074779510498, "learning_rate": 6.772308669418863e-05, "loss": 0.0651, "step": 5087 }, { "epoch": 9.677603423680456, "grad_norm": 0.3250272274017334, "learning_rate": 6.771673547157828e-05, "loss": 0.099, "step": 5088 }, { "epoch": 9.679505468378506, "grad_norm": 0.18376415967941284, "learning_rate": 6.771038424896793e-05, "loss": 0.0609, "step": 5089 }, { "epoch": 9.681407513076557, "grad_norm": 0.14176584780216217, "learning_rate": 6.770403302635757e-05, "loss": 0.0511, "step": 5090 }, { "epoch": 9.683309557774608, "grad_norm": 0.21321751177310944, "learning_rate": 6.769768180374722e-05, "loss": 0.0586, "step": 5091 }, { "epoch": 9.685211602472657, "grad_norm": 0.19097277522087097, "learning_rate": 6.769133058113687e-05, "loss": 0.069, "step": 5092 }, { "epoch": 9.687113647170708, "grad_norm": 0.3134055733680725, "learning_rate": 6.768497935852652e-05, "loss": 0.0799, "step": 5093 }, { "epoch": 9.689015691868759, "grad_norm": 0.19542521238327026, "learning_rate": 6.767862813591616e-05, "loss": 0.0513, "step": 5094 }, { "epoch": 9.69091773656681, "grad_norm": 0.2252933830022812, "learning_rate": 6.767227691330581e-05, "loss": 0.0717, "step": 5095 }, { "epoch": 9.692819781264859, "grad_norm": 0.24214401841163635, "learning_rate": 6.766592569069547e-05, "loss": 0.0713, "step": 5096 }, { "epoch": 9.69472182596291, "grad_norm": 0.38027697801589966, "learning_rate": 6.76595744680851e-05, "loss": 0.0793, "step": 5097 }, { "epoch": 9.69662387066096, "grad_norm": 0.2558886706829071, "learning_rate": 6.765322324547476e-05, "loss": 0.0595, "step": 5098 }, { "epoch": 9.698525915359012, "grad_norm": 0.22915878891944885, "learning_rate": 6.764687202286441e-05, "loss": 0.0804, "step": 5099 }, { "epoch": 9.70042796005706, "grad_norm": 0.2833732068538666, "learning_rate": 6.764052080025405e-05, "loss": 0.0802, "step": 5100 }, { "epoch": 9.702330004755112, "grad_norm": 0.20914685726165771, "learning_rate": 6.76341695776437e-05, "loss": 0.0546, "step": 5101 }, { "epoch": 9.704232049453163, "grad_norm": 0.26040971279144287, "learning_rate": 6.762781835503335e-05, "loss": 0.0718, "step": 5102 }, { "epoch": 9.706134094151212, "grad_norm": 0.21980637311935425, "learning_rate": 6.7621467132423e-05, "loss": 0.0758, "step": 5103 }, { "epoch": 9.708036138849263, "grad_norm": 0.24792368710041046, "learning_rate": 6.761511590981264e-05, "loss": 0.0799, "step": 5104 }, { "epoch": 9.709938183547314, "grad_norm": 0.23755225539207458, "learning_rate": 6.760876468720229e-05, "loss": 0.0606, "step": 5105 }, { "epoch": 9.711840228245364, "grad_norm": 0.14159350097179413, "learning_rate": 6.760241346459194e-05, "loss": 0.0666, "step": 5106 }, { "epoch": 9.713742272943414, "grad_norm": 0.29056262969970703, "learning_rate": 6.759606224198158e-05, "loss": 0.0782, "step": 5107 }, { "epoch": 9.715644317641464, "grad_norm": 0.23274298012256622, "learning_rate": 6.758971101937122e-05, "loss": 0.0672, "step": 5108 }, { "epoch": 9.717546362339515, "grad_norm": 0.37022560834884644, "learning_rate": 6.758335979676089e-05, "loss": 0.0746, "step": 5109 }, { "epoch": 9.719448407037566, "grad_norm": 0.18731588125228882, "learning_rate": 6.757700857415052e-05, "loss": 0.0537, "step": 5110 }, { "epoch": 9.721350451735615, "grad_norm": 0.33824440836906433, "learning_rate": 6.757065735154018e-05, "loss": 0.0741, "step": 5111 }, { "epoch": 9.723252496433666, "grad_norm": 0.3501187860965729, "learning_rate": 6.756430612892983e-05, "loss": 0.0853, "step": 5112 }, { "epoch": 9.725154541131717, "grad_norm": 0.25903284549713135, "learning_rate": 6.755795490631947e-05, "loss": 0.0572, "step": 5113 }, { "epoch": 9.727056585829766, "grad_norm": 0.19478189945220947, "learning_rate": 6.755160368370912e-05, "loss": 0.0643, "step": 5114 }, { "epoch": 9.728958630527817, "grad_norm": 0.249579519033432, "learning_rate": 6.754525246109876e-05, "loss": 0.0737, "step": 5115 }, { "epoch": 9.730860675225868, "grad_norm": 0.2832339107990265, "learning_rate": 6.753890123848842e-05, "loss": 0.093, "step": 5116 }, { "epoch": 9.732762719923919, "grad_norm": 0.22517451643943787, "learning_rate": 6.753255001587806e-05, "loss": 0.0696, "step": 5117 }, { "epoch": 9.734664764621968, "grad_norm": 0.28813955187797546, "learning_rate": 6.75261987932677e-05, "loss": 0.0872, "step": 5118 }, { "epoch": 9.736566809320019, "grad_norm": 0.36511409282684326, "learning_rate": 6.751984757065735e-05, "loss": 0.087, "step": 5119 }, { "epoch": 9.73846885401807, "grad_norm": 0.3195239305496216, "learning_rate": 6.7513496348047e-05, "loss": 0.0863, "step": 5120 }, { "epoch": 9.74037089871612, "grad_norm": 0.39804649353027344, "learning_rate": 6.750714512543665e-05, "loss": 0.077, "step": 5121 }, { "epoch": 9.74227294341417, "grad_norm": 0.20853570103645325, "learning_rate": 6.750079390282629e-05, "loss": 0.0538, "step": 5122 }, { "epoch": 9.74417498811222, "grad_norm": 0.29665881395339966, "learning_rate": 6.749444268021594e-05, "loss": 0.0811, "step": 5123 }, { "epoch": 9.746077032810271, "grad_norm": 0.2606349289417267, "learning_rate": 6.74880914576056e-05, "loss": 0.0701, "step": 5124 }, { "epoch": 9.747979077508322, "grad_norm": 0.297983855009079, "learning_rate": 6.748174023499523e-05, "loss": 0.0837, "step": 5125 }, { "epoch": 9.749881122206371, "grad_norm": 0.4061759412288666, "learning_rate": 6.747538901238489e-05, "loss": 0.0913, "step": 5126 }, { "epoch": 9.751783166904422, "grad_norm": 0.26573270559310913, "learning_rate": 6.746903778977454e-05, "loss": 0.1043, "step": 5127 }, { "epoch": 9.753685211602473, "grad_norm": 0.3666708767414093, "learning_rate": 6.746268656716418e-05, "loss": 0.0852, "step": 5128 }, { "epoch": 9.755587256300522, "grad_norm": 0.29040297865867615, "learning_rate": 6.745633534455383e-05, "loss": 0.0786, "step": 5129 }, { "epoch": 9.757489300998573, "grad_norm": 0.2016884982585907, "learning_rate": 6.744998412194348e-05, "loss": 0.0695, "step": 5130 }, { "epoch": 9.759391345696624, "grad_norm": 0.28894975781440735, "learning_rate": 6.744363289933312e-05, "loss": 0.0689, "step": 5131 }, { "epoch": 9.761293390394675, "grad_norm": 0.3561621606349945, "learning_rate": 6.743728167672277e-05, "loss": 0.0942, "step": 5132 }, { "epoch": 9.763195435092724, "grad_norm": 0.23403863608837128, "learning_rate": 6.743093045411242e-05, "loss": 0.0946, "step": 5133 }, { "epoch": 9.765097479790775, "grad_norm": 0.18953385949134827, "learning_rate": 6.742457923150207e-05, "loss": 0.0621, "step": 5134 }, { "epoch": 9.766999524488826, "grad_norm": 0.2510560154914856, "learning_rate": 6.741822800889171e-05, "loss": 0.076, "step": 5135 }, { "epoch": 9.768901569186877, "grad_norm": 0.23607076704502106, "learning_rate": 6.741187678628136e-05, "loss": 0.0554, "step": 5136 }, { "epoch": 9.770803613884926, "grad_norm": 0.26703163981437683, "learning_rate": 6.740552556367102e-05, "loss": 0.0917, "step": 5137 }, { "epoch": 9.772705658582977, "grad_norm": 0.3677026033401489, "learning_rate": 6.739917434106065e-05, "loss": 0.0853, "step": 5138 }, { "epoch": 9.774607703281028, "grad_norm": 0.22919195890426636, "learning_rate": 6.73928231184503e-05, "loss": 0.0548, "step": 5139 }, { "epoch": 9.776509747979077, "grad_norm": 0.24974371492862701, "learning_rate": 6.738647189583996e-05, "loss": 0.0657, "step": 5140 }, { "epoch": 9.778411792677128, "grad_norm": 0.23522870242595673, "learning_rate": 6.73801206732296e-05, "loss": 0.0765, "step": 5141 }, { "epoch": 9.780313837375179, "grad_norm": 0.3335094451904297, "learning_rate": 6.737376945061925e-05, "loss": 0.0584, "step": 5142 }, { "epoch": 9.78221588207323, "grad_norm": 0.24172602593898773, "learning_rate": 6.73674182280089e-05, "loss": 0.0613, "step": 5143 }, { "epoch": 9.784117926771279, "grad_norm": 0.24692966043949127, "learning_rate": 6.736106700539854e-05, "loss": 0.0681, "step": 5144 }, { "epoch": 9.78601997146933, "grad_norm": 0.1990744024515152, "learning_rate": 6.735471578278819e-05, "loss": 0.0643, "step": 5145 }, { "epoch": 9.78792201616738, "grad_norm": 0.182316854596138, "learning_rate": 6.734836456017783e-05, "loss": 0.0611, "step": 5146 }, { "epoch": 9.789824060865431, "grad_norm": 0.2731354832649231, "learning_rate": 6.73420133375675e-05, "loss": 0.0725, "step": 5147 }, { "epoch": 9.79172610556348, "grad_norm": 0.23140563070774078, "learning_rate": 6.733566211495713e-05, "loss": 0.075, "step": 5148 }, { "epoch": 9.793628150261531, "grad_norm": 0.2696152329444885, "learning_rate": 6.732931089234677e-05, "loss": 0.0788, "step": 5149 }, { "epoch": 9.795530194959582, "grad_norm": 0.22241799533367157, "learning_rate": 6.732295966973644e-05, "loss": 0.0661, "step": 5150 }, { "epoch": 9.797432239657631, "grad_norm": 0.24715057015419006, "learning_rate": 6.731660844712607e-05, "loss": 0.0768, "step": 5151 }, { "epoch": 9.799334284355682, "grad_norm": 0.16379965841770172, "learning_rate": 6.731025722451573e-05, "loss": 0.0515, "step": 5152 }, { "epoch": 9.801236329053733, "grad_norm": 0.25936359167099, "learning_rate": 6.730390600190536e-05, "loss": 0.0655, "step": 5153 }, { "epoch": 9.803138373751784, "grad_norm": 0.29590755701065063, "learning_rate": 6.729755477929502e-05, "loss": 0.0761, "step": 5154 }, { "epoch": 9.805040418449833, "grad_norm": 0.2646804749965668, "learning_rate": 6.729120355668467e-05, "loss": 0.0717, "step": 5155 }, { "epoch": 9.806942463147884, "grad_norm": 0.2943807542324066, "learning_rate": 6.72848523340743e-05, "loss": 0.0979, "step": 5156 }, { "epoch": 9.808844507845935, "grad_norm": 0.25199463963508606, "learning_rate": 6.727850111146397e-05, "loss": 0.0617, "step": 5157 }, { "epoch": 9.810746552543986, "grad_norm": 0.23185868561267853, "learning_rate": 6.727214988885361e-05, "loss": 0.0615, "step": 5158 }, { "epoch": 9.812648597242035, "grad_norm": 0.2331693172454834, "learning_rate": 6.726579866624325e-05, "loss": 0.0535, "step": 5159 }, { "epoch": 9.814550641940086, "grad_norm": 0.23456302285194397, "learning_rate": 6.72594474436329e-05, "loss": 0.0796, "step": 5160 }, { "epoch": 9.816452686638137, "grad_norm": 0.18812789022922516, "learning_rate": 6.725309622102255e-05, "loss": 0.0626, "step": 5161 }, { "epoch": 9.818354731336186, "grad_norm": 0.21228209137916565, "learning_rate": 6.724674499841219e-05, "loss": 0.0669, "step": 5162 }, { "epoch": 9.820256776034237, "grad_norm": 0.24619713425636292, "learning_rate": 6.724039377580184e-05, "loss": 0.0628, "step": 5163 }, { "epoch": 9.822158820732287, "grad_norm": 0.26102298498153687, "learning_rate": 6.72340425531915e-05, "loss": 0.069, "step": 5164 }, { "epoch": 9.824060865430338, "grad_norm": 0.271749883890152, "learning_rate": 6.722769133058115e-05, "loss": 0.0904, "step": 5165 }, { "epoch": 9.825962910128387, "grad_norm": 0.43275517225265503, "learning_rate": 6.722134010797078e-05, "loss": 0.1015, "step": 5166 }, { "epoch": 9.827864954826438, "grad_norm": 0.36582517623901367, "learning_rate": 6.721498888536044e-05, "loss": 0.0702, "step": 5167 }, { "epoch": 9.82976699952449, "grad_norm": 0.24312864243984222, "learning_rate": 6.720863766275009e-05, "loss": 0.076, "step": 5168 }, { "epoch": 9.83166904422254, "grad_norm": 0.26955533027648926, "learning_rate": 6.720228644013973e-05, "loss": 0.064, "step": 5169 }, { "epoch": 9.83357108892059, "grad_norm": 0.24358266592025757, "learning_rate": 6.719593521752938e-05, "loss": 0.0541, "step": 5170 }, { "epoch": 9.83547313361864, "grad_norm": 0.2266244739294052, "learning_rate": 6.718958399491903e-05, "loss": 0.0716, "step": 5171 }, { "epoch": 9.837375178316691, "grad_norm": 0.34034213423728943, "learning_rate": 6.718323277230867e-05, "loss": 0.0819, "step": 5172 }, { "epoch": 9.83927722301474, "grad_norm": 0.2445746511220932, "learning_rate": 6.717688154969832e-05, "loss": 0.0671, "step": 5173 }, { "epoch": 9.841179267712791, "grad_norm": 0.3341140151023865, "learning_rate": 6.717053032708797e-05, "loss": 0.0818, "step": 5174 }, { "epoch": 9.843081312410842, "grad_norm": 0.38900166749954224, "learning_rate": 6.716417910447762e-05, "loss": 0.0942, "step": 5175 }, { "epoch": 9.844983357108893, "grad_norm": 0.3027609884738922, "learning_rate": 6.715782788186726e-05, "loss": 0.075, "step": 5176 }, { "epoch": 9.846885401806942, "grad_norm": 0.29892697930336, "learning_rate": 6.71514766592569e-05, "loss": 0.0619, "step": 5177 }, { "epoch": 9.848787446504993, "grad_norm": 0.2879316508769989, "learning_rate": 6.714512543664657e-05, "loss": 0.0737, "step": 5178 }, { "epoch": 9.850689491203044, "grad_norm": 0.31047359108924866, "learning_rate": 6.71387742140362e-05, "loss": 0.0809, "step": 5179 }, { "epoch": 9.852591535901094, "grad_norm": 0.19660452008247375, "learning_rate": 6.713242299142584e-05, "loss": 0.0569, "step": 5180 }, { "epoch": 9.854493580599144, "grad_norm": 0.2426648586988449, "learning_rate": 6.712607176881551e-05, "loss": 0.0609, "step": 5181 }, { "epoch": 9.856395625297194, "grad_norm": 0.24112775921821594, "learning_rate": 6.711972054620515e-05, "loss": 0.0585, "step": 5182 }, { "epoch": 9.858297669995245, "grad_norm": 0.17509464919567108, "learning_rate": 6.71133693235948e-05, "loss": 0.0757, "step": 5183 }, { "epoch": 9.860199714693294, "grad_norm": 0.23438102006912231, "learning_rate": 6.710701810098444e-05, "loss": 0.0597, "step": 5184 }, { "epoch": 9.862101759391345, "grad_norm": 0.23365238308906555, "learning_rate": 6.710066687837409e-05, "loss": 0.081, "step": 5185 }, { "epoch": 9.864003804089396, "grad_norm": 0.28201591968536377, "learning_rate": 6.709431565576374e-05, "loss": 0.0711, "step": 5186 }, { "epoch": 9.865905848787447, "grad_norm": 0.2844151556491852, "learning_rate": 6.708796443315338e-05, "loss": 0.0793, "step": 5187 }, { "epoch": 9.867807893485496, "grad_norm": 0.2788445055484772, "learning_rate": 6.708161321054304e-05, "loss": 0.079, "step": 5188 }, { "epoch": 9.869709938183547, "grad_norm": 0.17613370716571808, "learning_rate": 6.707526198793268e-05, "loss": 0.054, "step": 5189 }, { "epoch": 9.871611982881598, "grad_norm": 0.40397071838378906, "learning_rate": 6.706891076532232e-05, "loss": 0.0819, "step": 5190 }, { "epoch": 9.873514027579649, "grad_norm": 0.21067620813846588, "learning_rate": 6.706255954271197e-05, "loss": 0.0609, "step": 5191 }, { "epoch": 9.875416072277698, "grad_norm": 0.1924947053194046, "learning_rate": 6.705620832010162e-05, "loss": 0.0729, "step": 5192 }, { "epoch": 9.877318116975749, "grad_norm": 0.317019522190094, "learning_rate": 6.704985709749128e-05, "loss": 0.0915, "step": 5193 }, { "epoch": 9.8792201616738, "grad_norm": 0.27511605620384216, "learning_rate": 6.704350587488091e-05, "loss": 0.0993, "step": 5194 }, { "epoch": 9.881122206371849, "grad_norm": 0.24651335179805756, "learning_rate": 6.703715465227057e-05, "loss": 0.045, "step": 5195 }, { "epoch": 9.8830242510699, "grad_norm": 0.3148553669452667, "learning_rate": 6.703080342966022e-05, "loss": 0.0787, "step": 5196 }, { "epoch": 9.88492629576795, "grad_norm": 0.3132942020893097, "learning_rate": 6.702445220704986e-05, "loss": 0.0865, "step": 5197 }, { "epoch": 9.886828340466002, "grad_norm": 0.23067975044250488, "learning_rate": 6.701810098443951e-05, "loss": 0.0631, "step": 5198 }, { "epoch": 9.88873038516405, "grad_norm": 0.3110620677471161, "learning_rate": 6.701174976182916e-05, "loss": 0.0849, "step": 5199 }, { "epoch": 9.890632429862102, "grad_norm": 0.3161821663379669, "learning_rate": 6.70053985392188e-05, "loss": 0.1031, "step": 5200 }, { "epoch": 9.892534474560152, "grad_norm": 0.36968421936035156, "learning_rate": 6.699904731660845e-05, "loss": 0.0807, "step": 5201 }, { "epoch": 9.894436519258203, "grad_norm": 0.3091064393520355, "learning_rate": 6.69926960939981e-05, "loss": 0.0816, "step": 5202 }, { "epoch": 9.896338563956252, "grad_norm": 0.24623756110668182, "learning_rate": 6.698634487138774e-05, "loss": 0.0966, "step": 5203 }, { "epoch": 9.898240608654303, "grad_norm": 0.21923215687274933, "learning_rate": 6.697999364877739e-05, "loss": 0.0677, "step": 5204 }, { "epoch": 9.900142653352354, "grad_norm": 0.32677069306373596, "learning_rate": 6.697364242616704e-05, "loss": 0.0863, "step": 5205 }, { "epoch": 9.902044698050403, "grad_norm": 0.32407426834106445, "learning_rate": 6.69672912035567e-05, "loss": 0.0815, "step": 5206 }, { "epoch": 9.903946742748454, "grad_norm": 0.20180168747901917, "learning_rate": 6.696093998094633e-05, "loss": 0.0588, "step": 5207 }, { "epoch": 9.905848787446505, "grad_norm": 0.2846551537513733, "learning_rate": 6.695458875833599e-05, "loss": 0.0707, "step": 5208 }, { "epoch": 9.907750832144556, "grad_norm": 0.22882051765918732, "learning_rate": 6.694823753572564e-05, "loss": 0.0459, "step": 5209 }, { "epoch": 9.909652876842605, "grad_norm": 0.2983953356742859, "learning_rate": 6.694188631311528e-05, "loss": 0.0781, "step": 5210 }, { "epoch": 9.911554921540656, "grad_norm": 0.3955616354942322, "learning_rate": 6.693553509050493e-05, "loss": 0.0782, "step": 5211 }, { "epoch": 9.913456966238707, "grad_norm": 0.22890278697013855, "learning_rate": 6.692918386789458e-05, "loss": 0.0545, "step": 5212 }, { "epoch": 9.915359010936758, "grad_norm": 0.2330084592103958, "learning_rate": 6.692283264528422e-05, "loss": 0.0612, "step": 5213 }, { "epoch": 9.917261055634807, "grad_norm": 0.3311925530433655, "learning_rate": 6.691648142267387e-05, "loss": 0.074, "step": 5214 }, { "epoch": 9.919163100332858, "grad_norm": 0.48154211044311523, "learning_rate": 6.691013020006352e-05, "loss": 0.0739, "step": 5215 }, { "epoch": 9.921065145030909, "grad_norm": 0.3412367105484009, "learning_rate": 6.690377897745316e-05, "loss": 0.0766, "step": 5216 }, { "epoch": 9.922967189728958, "grad_norm": 0.18190982937812805, "learning_rate": 6.689742775484281e-05, "loss": 0.0509, "step": 5217 }, { "epoch": 9.924869234427009, "grad_norm": 0.23118863999843597, "learning_rate": 6.689107653223245e-05, "loss": 0.0655, "step": 5218 }, { "epoch": 9.92677127912506, "grad_norm": 0.2357853502035141, "learning_rate": 6.688472530962211e-05, "loss": 0.0689, "step": 5219 }, { "epoch": 9.92867332382311, "grad_norm": 0.3301222026348114, "learning_rate": 6.687837408701175e-05, "loss": 0.079, "step": 5220 }, { "epoch": 9.93057536852116, "grad_norm": 0.3085179030895233, "learning_rate": 6.687202286440139e-05, "loss": 0.0826, "step": 5221 }, { "epoch": 9.93247741321921, "grad_norm": 0.23810113966464996, "learning_rate": 6.686567164179106e-05, "loss": 0.0773, "step": 5222 }, { "epoch": 9.934379457917261, "grad_norm": 0.38858041167259216, "learning_rate": 6.68593204191807e-05, "loss": 0.0957, "step": 5223 }, { "epoch": 9.936281502615312, "grad_norm": 0.15044887363910675, "learning_rate": 6.685296919657035e-05, "loss": 0.0512, "step": 5224 }, { "epoch": 9.938183547313361, "grad_norm": 0.23409341275691986, "learning_rate": 6.684661797395999e-05, "loss": 0.0822, "step": 5225 }, { "epoch": 9.940085592011412, "grad_norm": 0.2580219507217407, "learning_rate": 6.684026675134964e-05, "loss": 0.0806, "step": 5226 }, { "epoch": 9.941987636709463, "grad_norm": 0.21620117127895355, "learning_rate": 6.683391552873929e-05, "loss": 0.0722, "step": 5227 }, { "epoch": 9.943889681407512, "grad_norm": 0.33486407995224, "learning_rate": 6.682756430612893e-05, "loss": 0.0661, "step": 5228 }, { "epoch": 9.945791726105563, "grad_norm": 0.21919208765029907, "learning_rate": 6.682121308351858e-05, "loss": 0.062, "step": 5229 }, { "epoch": 9.947693770803614, "grad_norm": 0.30199721455574036, "learning_rate": 6.681486186090823e-05, "loss": 0.0926, "step": 5230 }, { "epoch": 9.949595815501665, "grad_norm": 0.3589622974395752, "learning_rate": 6.680851063829787e-05, "loss": 0.0914, "step": 5231 }, { "epoch": 9.951497860199714, "grad_norm": 0.32676953077316284, "learning_rate": 6.680215941568752e-05, "loss": 0.0971, "step": 5232 }, { "epoch": 9.953399904897765, "grad_norm": 0.23442213237285614, "learning_rate": 6.679580819307717e-05, "loss": 0.0597, "step": 5233 }, { "epoch": 9.955301949595816, "grad_norm": 0.29047128558158875, "learning_rate": 6.678945697046681e-05, "loss": 0.0693, "step": 5234 }, { "epoch": 9.957203994293867, "grad_norm": 0.17525753378868103, "learning_rate": 6.678310574785646e-05, "loss": 0.0552, "step": 5235 }, { "epoch": 9.959106038991916, "grad_norm": 0.276344358921051, "learning_rate": 6.677675452524611e-05, "loss": 0.0784, "step": 5236 }, { "epoch": 9.961008083689967, "grad_norm": 0.323887437582016, "learning_rate": 6.677040330263577e-05, "loss": 0.0734, "step": 5237 }, { "epoch": 9.962910128388017, "grad_norm": 0.25159624218940735, "learning_rate": 6.67640520800254e-05, "loss": 0.0683, "step": 5238 }, { "epoch": 9.964812173086067, "grad_norm": 0.23952990770339966, "learning_rate": 6.675770085741506e-05, "loss": 0.0635, "step": 5239 }, { "epoch": 9.966714217784117, "grad_norm": 0.21993881464004517, "learning_rate": 6.675134963480471e-05, "loss": 0.0741, "step": 5240 }, { "epoch": 9.968616262482168, "grad_norm": 0.2130298614501953, "learning_rate": 6.674499841219435e-05, "loss": 0.0696, "step": 5241 }, { "epoch": 9.97051830718022, "grad_norm": 0.3035607933998108, "learning_rate": 6.6738647189584e-05, "loss": 0.0913, "step": 5242 }, { "epoch": 9.972420351878268, "grad_norm": 0.34198522567749023, "learning_rate": 6.673229596697365e-05, "loss": 0.1082, "step": 5243 }, { "epoch": 9.97432239657632, "grad_norm": 0.2956897020339966, "learning_rate": 6.672594474436329e-05, "loss": 0.074, "step": 5244 }, { "epoch": 9.97622444127437, "grad_norm": 0.43758097290992737, "learning_rate": 6.671959352175294e-05, "loss": 0.0786, "step": 5245 }, { "epoch": 9.978126485972421, "grad_norm": 0.17064926028251648, "learning_rate": 6.671324229914259e-05, "loss": 0.0571, "step": 5246 }, { "epoch": 9.98002853067047, "grad_norm": 0.3719223737716675, "learning_rate": 6.670689107653224e-05, "loss": 0.0961, "step": 5247 }, { "epoch": 9.981930575368521, "grad_norm": 0.16983850300312042, "learning_rate": 6.670053985392188e-05, "loss": 0.0459, "step": 5248 }, { "epoch": 9.983832620066572, "grad_norm": 0.3689050078392029, "learning_rate": 6.669418863131152e-05, "loss": 0.099, "step": 5249 }, { "epoch": 9.985734664764623, "grad_norm": 0.35418006777763367, "learning_rate": 6.668783740870119e-05, "loss": 0.0761, "step": 5250 }, { "epoch": 10.00190204469805, "grad_norm": 0.39694878458976746, "learning_rate": 6.668148618609082e-05, "loss": 0.1313, "step": 5251 }, { "epoch": 10.0038040893961, "grad_norm": 0.6753419041633606, "learning_rate": 6.667513496348046e-05, "loss": 0.1505, "step": 5252 }, { "epoch": 10.00570613409415, "grad_norm": 0.46378642320632935, "learning_rate": 6.666878374087013e-05, "loss": 0.1148, "step": 5253 }, { "epoch": 10.007608178792202, "grad_norm": 0.342328280210495, "learning_rate": 6.666243251825977e-05, "loss": 0.1214, "step": 5254 }, { "epoch": 10.009510223490253, "grad_norm": 0.41636717319488525, "learning_rate": 6.665608129564942e-05, "loss": 0.1316, "step": 5255 }, { "epoch": 10.011412268188302, "grad_norm": 0.5482467412948608, "learning_rate": 6.664973007303906e-05, "loss": 0.1513, "step": 5256 }, { "epoch": 10.013314312886353, "grad_norm": 0.28404325246810913, "learning_rate": 6.664337885042871e-05, "loss": 0.1009, "step": 5257 }, { "epoch": 10.015216357584404, "grad_norm": 0.14674490690231323, "learning_rate": 6.663702762781836e-05, "loss": 0.0506, "step": 5258 }, { "epoch": 10.017118402282454, "grad_norm": 0.15933150053024292, "learning_rate": 6.6630676405208e-05, "loss": 0.0569, "step": 5259 }, { "epoch": 10.019020446980504, "grad_norm": 0.29720667004585266, "learning_rate": 6.662432518259766e-05, "loss": 0.0712, "step": 5260 }, { "epoch": 10.020922491678554, "grad_norm": 0.22701764106750488, "learning_rate": 6.66179739599873e-05, "loss": 0.0773, "step": 5261 }, { "epoch": 10.022824536376605, "grad_norm": 0.15102310478687286, "learning_rate": 6.661162273737694e-05, "loss": 0.057, "step": 5262 }, { "epoch": 10.024726581074654, "grad_norm": 0.184609517455101, "learning_rate": 6.660527151476659e-05, "loss": 0.0584, "step": 5263 }, { "epoch": 10.026628625772705, "grad_norm": 0.223251610994339, "learning_rate": 6.659892029215624e-05, "loss": 0.0465, "step": 5264 }, { "epoch": 10.028530670470756, "grad_norm": 0.2808699905872345, "learning_rate": 6.65925690695459e-05, "loss": 0.0836, "step": 5265 }, { "epoch": 10.030432715168807, "grad_norm": 0.3460269272327423, "learning_rate": 6.658621784693553e-05, "loss": 0.0767, "step": 5266 }, { "epoch": 10.032334759866856, "grad_norm": 0.24595172703266144, "learning_rate": 6.657986662432519e-05, "loss": 0.06, "step": 5267 }, { "epoch": 10.034236804564907, "grad_norm": 0.252651572227478, "learning_rate": 6.657351540171484e-05, "loss": 0.0565, "step": 5268 }, { "epoch": 10.036138849262958, "grad_norm": 0.23152634501457214, "learning_rate": 6.656716417910448e-05, "loss": 0.0701, "step": 5269 }, { "epoch": 10.038040893961009, "grad_norm": 0.17529435455799103, "learning_rate": 6.656081295649413e-05, "loss": 0.0527, "step": 5270 }, { "epoch": 10.039942938659058, "grad_norm": 0.1904911994934082, "learning_rate": 6.655446173388378e-05, "loss": 0.0861, "step": 5271 }, { "epoch": 10.041844983357109, "grad_norm": 0.35613206028938293, "learning_rate": 6.654811051127342e-05, "loss": 0.074, "step": 5272 }, { "epoch": 10.04374702805516, "grad_norm": 0.2638046145439148, "learning_rate": 6.654175928866307e-05, "loss": 0.117, "step": 5273 }, { "epoch": 10.045649072753209, "grad_norm": 0.21620406210422516, "learning_rate": 6.653540806605272e-05, "loss": 0.0586, "step": 5274 }, { "epoch": 10.04755111745126, "grad_norm": 0.16865353286266327, "learning_rate": 6.652905684344236e-05, "loss": 0.0665, "step": 5275 }, { "epoch": 10.04945316214931, "grad_norm": 0.15396840870380402, "learning_rate": 6.652270562083201e-05, "loss": 0.0542, "step": 5276 }, { "epoch": 10.051355206847362, "grad_norm": 0.3527628779411316, "learning_rate": 6.651635439822166e-05, "loss": 0.0813, "step": 5277 }, { "epoch": 10.05325725154541, "grad_norm": 0.16249553859233856, "learning_rate": 6.651000317561132e-05, "loss": 0.0546, "step": 5278 }, { "epoch": 10.055159296243461, "grad_norm": 0.38107484579086304, "learning_rate": 6.650365195300095e-05, "loss": 0.0803, "step": 5279 }, { "epoch": 10.057061340941512, "grad_norm": 0.14425760507583618, "learning_rate": 6.64973007303906e-05, "loss": 0.0411, "step": 5280 }, { "epoch": 10.058963385639563, "grad_norm": 0.20022647082805634, "learning_rate": 6.649094950778026e-05, "loss": 0.0641, "step": 5281 }, { "epoch": 10.060865430337612, "grad_norm": 0.2576226592063904, "learning_rate": 6.64845982851699e-05, "loss": 0.0779, "step": 5282 }, { "epoch": 10.062767475035663, "grad_norm": 0.23341166973114014, "learning_rate": 6.647824706255955e-05, "loss": 0.0769, "step": 5283 }, { "epoch": 10.064669519733714, "grad_norm": 0.20656423270702362, "learning_rate": 6.64718958399492e-05, "loss": 0.07, "step": 5284 }, { "epoch": 10.066571564431765, "grad_norm": 0.26069366931915283, "learning_rate": 6.646554461733884e-05, "loss": 0.0513, "step": 5285 }, { "epoch": 10.068473609129814, "grad_norm": 0.3721386194229126, "learning_rate": 6.645919339472849e-05, "loss": 0.0896, "step": 5286 }, { "epoch": 10.070375653827865, "grad_norm": 0.36856430768966675, "learning_rate": 6.645284217211813e-05, "loss": 0.0785, "step": 5287 }, { "epoch": 10.072277698525916, "grad_norm": 0.2893162965774536, "learning_rate": 6.644649094950778e-05, "loss": 0.0697, "step": 5288 }, { "epoch": 10.074179743223965, "grad_norm": 0.21396392583847046, "learning_rate": 6.644013972689743e-05, "loss": 0.0694, "step": 5289 }, { "epoch": 10.076081787922016, "grad_norm": 0.49254268407821655, "learning_rate": 6.643378850428707e-05, "loss": 0.0813, "step": 5290 }, { "epoch": 10.077983832620067, "grad_norm": 0.22749567031860352, "learning_rate": 6.642743728167674e-05, "loss": 0.059, "step": 5291 }, { "epoch": 10.079885877318118, "grad_norm": 0.25905197858810425, "learning_rate": 6.642108605906637e-05, "loss": 0.0504, "step": 5292 }, { "epoch": 10.081787922016167, "grad_norm": 0.24032166600227356, "learning_rate": 6.641473483645601e-05, "loss": 0.0757, "step": 5293 }, { "epoch": 10.083689966714218, "grad_norm": 0.34126365184783936, "learning_rate": 6.640838361384566e-05, "loss": 0.076, "step": 5294 }, { "epoch": 10.085592011412269, "grad_norm": 0.12431490421295166, "learning_rate": 6.640203239123532e-05, "loss": 0.0373, "step": 5295 }, { "epoch": 10.08749405611032, "grad_norm": 0.3214389681816101, "learning_rate": 6.639568116862497e-05, "loss": 0.0746, "step": 5296 }, { "epoch": 10.089396100808369, "grad_norm": 0.20561222732067108, "learning_rate": 6.63893299460146e-05, "loss": 0.0598, "step": 5297 }, { "epoch": 10.09129814550642, "grad_norm": 0.19043764472007751, "learning_rate": 6.638297872340426e-05, "loss": 0.0781, "step": 5298 }, { "epoch": 10.09320019020447, "grad_norm": 0.23050783574581146, "learning_rate": 6.637662750079391e-05, "loss": 0.0877, "step": 5299 }, { "epoch": 10.09510223490252, "grad_norm": 0.23546096682548523, "learning_rate": 6.637027627818355e-05, "loss": 0.063, "step": 5300 }, { "epoch": 10.09700427960057, "grad_norm": 0.1960730254650116, "learning_rate": 6.63639250555732e-05, "loss": 0.0688, "step": 5301 }, { "epoch": 10.098906324298621, "grad_norm": 0.2317197173833847, "learning_rate": 6.635757383296285e-05, "loss": 0.0726, "step": 5302 }, { "epoch": 10.100808368996672, "grad_norm": 0.14227913320064545, "learning_rate": 6.635122261035249e-05, "loss": 0.0646, "step": 5303 }, { "epoch": 10.102710413694721, "grad_norm": 0.17636631429195404, "learning_rate": 6.634487138774214e-05, "loss": 0.0486, "step": 5304 }, { "epoch": 10.104612458392772, "grad_norm": 0.2111407071352005, "learning_rate": 6.63385201651318e-05, "loss": 0.0566, "step": 5305 }, { "epoch": 10.106514503090823, "grad_norm": 0.17445050179958344, "learning_rate": 6.633216894252143e-05, "loss": 0.0696, "step": 5306 }, { "epoch": 10.108416547788874, "grad_norm": 0.20955027639865875, "learning_rate": 6.632581771991108e-05, "loss": 0.0662, "step": 5307 }, { "epoch": 10.110318592486923, "grad_norm": 0.2103997766971588, "learning_rate": 6.631946649730074e-05, "loss": 0.0875, "step": 5308 }, { "epoch": 10.112220637184974, "grad_norm": 0.1805890053510666, "learning_rate": 6.631311527469039e-05, "loss": 0.0602, "step": 5309 }, { "epoch": 10.114122681883025, "grad_norm": 0.28872668743133545, "learning_rate": 6.630676405208003e-05, "loss": 0.065, "step": 5310 }, { "epoch": 10.116024726581074, "grad_norm": 0.2273848056793213, "learning_rate": 6.630041282946968e-05, "loss": 0.0679, "step": 5311 }, { "epoch": 10.117926771279125, "grad_norm": 0.2823135554790497, "learning_rate": 6.629406160685933e-05, "loss": 0.0738, "step": 5312 }, { "epoch": 10.119828815977176, "grad_norm": 0.2553102672100067, "learning_rate": 6.628771038424897e-05, "loss": 0.0748, "step": 5313 }, { "epoch": 10.121730860675227, "grad_norm": 0.2882140278816223, "learning_rate": 6.628135916163862e-05, "loss": 0.0672, "step": 5314 }, { "epoch": 10.123632905373276, "grad_norm": 0.2157353013753891, "learning_rate": 6.627500793902827e-05, "loss": 0.0663, "step": 5315 }, { "epoch": 10.125534950071327, "grad_norm": 0.20802216231822968, "learning_rate": 6.626865671641791e-05, "loss": 0.067, "step": 5316 }, { "epoch": 10.127436994769377, "grad_norm": 0.20221203565597534, "learning_rate": 6.626230549380756e-05, "loss": 0.0742, "step": 5317 }, { "epoch": 10.129339039467428, "grad_norm": 0.290554404258728, "learning_rate": 6.625595427119721e-05, "loss": 0.0563, "step": 5318 }, { "epoch": 10.131241084165477, "grad_norm": 0.2569425106048584, "learning_rate": 6.624960304858687e-05, "loss": 0.0677, "step": 5319 }, { "epoch": 10.133143128863528, "grad_norm": 0.20562361180782318, "learning_rate": 6.62432518259765e-05, "loss": 0.0607, "step": 5320 }, { "epoch": 10.13504517356158, "grad_norm": 0.17843393981456757, "learning_rate": 6.623690060336614e-05, "loss": 0.0944, "step": 5321 }, { "epoch": 10.136947218259628, "grad_norm": 0.2664887309074402, "learning_rate": 6.623054938075581e-05, "loss": 0.0878, "step": 5322 }, { "epoch": 10.13884926295768, "grad_norm": 0.2143498659133911, "learning_rate": 6.622419815814545e-05, "loss": 0.0678, "step": 5323 }, { "epoch": 10.14075130765573, "grad_norm": 0.14991843700408936, "learning_rate": 6.621784693553508e-05, "loss": 0.0695, "step": 5324 }, { "epoch": 10.142653352353781, "grad_norm": 0.22047267854213715, "learning_rate": 6.621149571292475e-05, "loss": 0.0817, "step": 5325 }, { "epoch": 10.14455539705183, "grad_norm": 0.20937331020832062, "learning_rate": 6.620514449031439e-05, "loss": 0.0592, "step": 5326 }, { "epoch": 10.146457441749881, "grad_norm": 0.2746511399745941, "learning_rate": 6.619879326770404e-05, "loss": 0.0583, "step": 5327 }, { "epoch": 10.148359486447932, "grad_norm": 0.23711417615413666, "learning_rate": 6.619244204509368e-05, "loss": 0.0739, "step": 5328 }, { "epoch": 10.150261531145983, "grad_norm": 0.4201519787311554, "learning_rate": 6.618609082248333e-05, "loss": 0.0908, "step": 5329 }, { "epoch": 10.152163575844032, "grad_norm": 0.1867586076259613, "learning_rate": 6.617973959987298e-05, "loss": 0.0488, "step": 5330 }, { "epoch": 10.154065620542083, "grad_norm": 0.22101399302482605, "learning_rate": 6.617338837726262e-05, "loss": 0.0532, "step": 5331 }, { "epoch": 10.155967665240134, "grad_norm": 0.18868492543697357, "learning_rate": 6.616703715465228e-05, "loss": 0.0724, "step": 5332 }, { "epoch": 10.157869709938183, "grad_norm": 0.23109176754951477, "learning_rate": 6.616068593204192e-05, "loss": 0.074, "step": 5333 }, { "epoch": 10.159771754636234, "grad_norm": 0.23569205403327942, "learning_rate": 6.615433470943156e-05, "loss": 0.0833, "step": 5334 }, { "epoch": 10.161673799334284, "grad_norm": 0.350978285074234, "learning_rate": 6.614798348682121e-05, "loss": 0.0692, "step": 5335 }, { "epoch": 10.163575844032335, "grad_norm": 0.365357905626297, "learning_rate": 6.614163226421087e-05, "loss": 0.0927, "step": 5336 }, { "epoch": 10.165477888730384, "grad_norm": 0.1944890320301056, "learning_rate": 6.613528104160052e-05, "loss": 0.0858, "step": 5337 }, { "epoch": 10.167379933428435, "grad_norm": 0.20101040601730347, "learning_rate": 6.612892981899016e-05, "loss": 0.0656, "step": 5338 }, { "epoch": 10.169281978126486, "grad_norm": 0.2939762473106384, "learning_rate": 6.612257859637981e-05, "loss": 0.0918, "step": 5339 }, { "epoch": 10.171184022824537, "grad_norm": 0.22608570754528046, "learning_rate": 6.611622737376946e-05, "loss": 0.0675, "step": 5340 }, { "epoch": 10.173086067522586, "grad_norm": 0.49089139699935913, "learning_rate": 6.61098761511591e-05, "loss": 0.072, "step": 5341 }, { "epoch": 10.174988112220637, "grad_norm": 0.28007015585899353, "learning_rate": 6.610352492854875e-05, "loss": 0.0615, "step": 5342 }, { "epoch": 10.176890156918688, "grad_norm": 0.21805545687675476, "learning_rate": 6.60971737059384e-05, "loss": 0.061, "step": 5343 }, { "epoch": 10.178792201616737, "grad_norm": 0.2612074613571167, "learning_rate": 6.609082248332804e-05, "loss": 0.0716, "step": 5344 }, { "epoch": 10.180694246314788, "grad_norm": 0.1996293067932129, "learning_rate": 6.608447126071769e-05, "loss": 0.0743, "step": 5345 }, { "epoch": 10.182596291012839, "grad_norm": 0.18069012463092804, "learning_rate": 6.607812003810734e-05, "loss": 0.0694, "step": 5346 }, { "epoch": 10.18449833571089, "grad_norm": 0.35819369554519653, "learning_rate": 6.607176881549698e-05, "loss": 0.0903, "step": 5347 }, { "epoch": 10.186400380408939, "grad_norm": 0.292188435792923, "learning_rate": 6.606541759288663e-05, "loss": 0.0868, "step": 5348 }, { "epoch": 10.18830242510699, "grad_norm": 0.2514616847038269, "learning_rate": 6.605906637027628e-05, "loss": 0.0717, "step": 5349 }, { "epoch": 10.19020446980504, "grad_norm": 0.2554888427257538, "learning_rate": 6.605271514766594e-05, "loss": 0.072, "step": 5350 }, { "epoch": 10.192106514503092, "grad_norm": 0.24336287379264832, "learning_rate": 6.604636392505558e-05, "loss": 0.0741, "step": 5351 }, { "epoch": 10.19400855920114, "grad_norm": 0.21117272973060608, "learning_rate": 6.604001270244521e-05, "loss": 0.0476, "step": 5352 }, { "epoch": 10.195910603899192, "grad_norm": 0.22045139968395233, "learning_rate": 6.603366147983488e-05, "loss": 0.0531, "step": 5353 }, { "epoch": 10.197812648597242, "grad_norm": 0.19895130395889282, "learning_rate": 6.602731025722452e-05, "loss": 0.0552, "step": 5354 }, { "epoch": 10.199714693295292, "grad_norm": 0.22562842071056366, "learning_rate": 6.602095903461417e-05, "loss": 0.0652, "step": 5355 }, { "epoch": 10.201616737993342, "grad_norm": 0.2797020375728607, "learning_rate": 6.601460781200382e-05, "loss": 0.0789, "step": 5356 }, { "epoch": 10.203518782691393, "grad_norm": 0.1523316651582718, "learning_rate": 6.600825658939346e-05, "loss": 0.0426, "step": 5357 }, { "epoch": 10.205420827389444, "grad_norm": 0.2565470337867737, "learning_rate": 6.600190536678311e-05, "loss": 0.0659, "step": 5358 }, { "epoch": 10.207322872087493, "grad_norm": 0.18802964687347412, "learning_rate": 6.599555414417275e-05, "loss": 0.0657, "step": 5359 }, { "epoch": 10.209224916785544, "grad_norm": 0.20242714881896973, "learning_rate": 6.59892029215624e-05, "loss": 0.0735, "step": 5360 }, { "epoch": 10.211126961483595, "grad_norm": 0.11940667778253555, "learning_rate": 6.598285169895205e-05, "loss": 0.0642, "step": 5361 }, { "epoch": 10.213029006181646, "grad_norm": 0.18162617087364197, "learning_rate": 6.597650047634169e-05, "loss": 0.0558, "step": 5362 }, { "epoch": 10.214931050879695, "grad_norm": 0.24901486933231354, "learning_rate": 6.597014925373136e-05, "loss": 0.0716, "step": 5363 }, { "epoch": 10.216833095577746, "grad_norm": 0.21373523771762848, "learning_rate": 6.5963798031121e-05, "loss": 0.07, "step": 5364 }, { "epoch": 10.218735140275797, "grad_norm": 0.3318414092063904, "learning_rate": 6.595744680851063e-05, "loss": 0.0876, "step": 5365 }, { "epoch": 10.220637184973846, "grad_norm": 0.25907251238822937, "learning_rate": 6.595109558590028e-05, "loss": 0.0788, "step": 5366 }, { "epoch": 10.222539229671897, "grad_norm": 0.16159944236278534, "learning_rate": 6.594474436328994e-05, "loss": 0.0694, "step": 5367 }, { "epoch": 10.224441274369948, "grad_norm": 0.21074874699115753, "learning_rate": 6.593839314067959e-05, "loss": 0.0784, "step": 5368 }, { "epoch": 10.226343319067999, "grad_norm": 0.2328321486711502, "learning_rate": 6.593204191806923e-05, "loss": 0.0671, "step": 5369 }, { "epoch": 10.228245363766048, "grad_norm": 0.39634034037590027, "learning_rate": 6.592569069545888e-05, "loss": 0.0758, "step": 5370 }, { "epoch": 10.230147408464099, "grad_norm": 0.25715863704681396, "learning_rate": 6.591933947284853e-05, "loss": 0.0527, "step": 5371 }, { "epoch": 10.23204945316215, "grad_norm": 0.2564283013343811, "learning_rate": 6.591298825023817e-05, "loss": 0.0619, "step": 5372 }, { "epoch": 10.2339514978602, "grad_norm": 0.24235618114471436, "learning_rate": 6.590663702762782e-05, "loss": 0.0626, "step": 5373 }, { "epoch": 10.23585354255825, "grad_norm": 0.2732139825820923, "learning_rate": 6.590028580501747e-05, "loss": 0.068, "step": 5374 }, { "epoch": 10.2377555872563, "grad_norm": 0.2638366222381592, "learning_rate": 6.589393458240711e-05, "loss": 0.0578, "step": 5375 }, { "epoch": 10.239657631954351, "grad_norm": 0.2025076001882553, "learning_rate": 6.588758335979676e-05, "loss": 0.0549, "step": 5376 }, { "epoch": 10.241559676652402, "grad_norm": 0.24390022456645966, "learning_rate": 6.588123213718641e-05, "loss": 0.0881, "step": 5377 }, { "epoch": 10.243461721350451, "grad_norm": 0.43563568592071533, "learning_rate": 6.587488091457605e-05, "loss": 0.0741, "step": 5378 }, { "epoch": 10.245363766048502, "grad_norm": 0.18210797011852264, "learning_rate": 6.58685296919657e-05, "loss": 0.0585, "step": 5379 }, { "epoch": 10.247265810746553, "grad_norm": 0.17295345664024353, "learning_rate": 6.586217846935536e-05, "loss": 0.0496, "step": 5380 }, { "epoch": 10.249167855444602, "grad_norm": 0.20748892426490784, "learning_rate": 6.585582724674501e-05, "loss": 0.0505, "step": 5381 }, { "epoch": 10.251069900142653, "grad_norm": 0.3153904378414154, "learning_rate": 6.584947602413465e-05, "loss": 0.0653, "step": 5382 }, { "epoch": 10.252971944840704, "grad_norm": 0.16658660769462585, "learning_rate": 6.58431248015243e-05, "loss": 0.0669, "step": 5383 }, { "epoch": 10.254873989538755, "grad_norm": 0.22813019156455994, "learning_rate": 6.583677357891395e-05, "loss": 0.0519, "step": 5384 }, { "epoch": 10.256776034236804, "grad_norm": 0.19745177030563354, "learning_rate": 6.583042235630359e-05, "loss": 0.0759, "step": 5385 }, { "epoch": 10.258678078934855, "grad_norm": 0.2388034611940384, "learning_rate": 6.582407113369324e-05, "loss": 0.0714, "step": 5386 }, { "epoch": 10.260580123632906, "grad_norm": 0.2057228535413742, "learning_rate": 6.581771991108289e-05, "loss": 0.0641, "step": 5387 }, { "epoch": 10.262482168330957, "grad_norm": 0.2861918807029724, "learning_rate": 6.581136868847253e-05, "loss": 0.082, "step": 5388 }, { "epoch": 10.264384213029006, "grad_norm": 0.26108086109161377, "learning_rate": 6.580501746586218e-05, "loss": 0.0703, "step": 5389 }, { "epoch": 10.266286257727057, "grad_norm": 0.331620454788208, "learning_rate": 6.579866624325182e-05, "loss": 0.0836, "step": 5390 }, { "epoch": 10.268188302425107, "grad_norm": 0.24640053510665894, "learning_rate": 6.579231502064149e-05, "loss": 0.051, "step": 5391 }, { "epoch": 10.270090347123157, "grad_norm": 0.27249953150749207, "learning_rate": 6.578596379803112e-05, "loss": 0.0718, "step": 5392 }, { "epoch": 10.271992391821207, "grad_norm": 0.28086280822753906, "learning_rate": 6.577961257542076e-05, "loss": 0.083, "step": 5393 }, { "epoch": 10.273894436519258, "grad_norm": 0.3512130081653595, "learning_rate": 6.577326135281043e-05, "loss": 0.0763, "step": 5394 }, { "epoch": 10.27579648121731, "grad_norm": 0.14599667489528656, "learning_rate": 6.576691013020007e-05, "loss": 0.0707, "step": 5395 }, { "epoch": 10.277698525915358, "grad_norm": 0.19556157290935516, "learning_rate": 6.57605589075897e-05, "loss": 0.052, "step": 5396 }, { "epoch": 10.27960057061341, "grad_norm": 0.24162036180496216, "learning_rate": 6.575420768497936e-05, "loss": 0.0641, "step": 5397 }, { "epoch": 10.28150261531146, "grad_norm": 0.22074343264102936, "learning_rate": 6.574785646236901e-05, "loss": 0.0683, "step": 5398 }, { "epoch": 10.283404660009511, "grad_norm": 0.18643182516098022, "learning_rate": 6.574150523975866e-05, "loss": 0.0964, "step": 5399 }, { "epoch": 10.28530670470756, "grad_norm": 0.22997966408729553, "learning_rate": 6.57351540171483e-05, "loss": 0.0814, "step": 5400 }, { "epoch": 10.287208749405611, "grad_norm": 0.11969192326068878, "learning_rate": 6.572880279453795e-05, "loss": 0.0678, "step": 5401 }, { "epoch": 10.289110794103662, "grad_norm": 0.3286897838115692, "learning_rate": 6.57224515719276e-05, "loss": 0.0916, "step": 5402 }, { "epoch": 10.291012838801711, "grad_norm": 0.2948076128959656, "learning_rate": 6.571610034931724e-05, "loss": 0.0763, "step": 5403 }, { "epoch": 10.292914883499762, "grad_norm": 0.2712877094745636, "learning_rate": 6.570974912670689e-05, "loss": 0.0816, "step": 5404 }, { "epoch": 10.294816928197813, "grad_norm": 0.19706730544567108, "learning_rate": 6.570339790409654e-05, "loss": 0.0604, "step": 5405 }, { "epoch": 10.296718972895864, "grad_norm": 0.27862757444381714, "learning_rate": 6.569704668148618e-05, "loss": 0.0643, "step": 5406 }, { "epoch": 10.298621017593913, "grad_norm": 0.3971151113510132, "learning_rate": 6.569069545887583e-05, "loss": 0.093, "step": 5407 }, { "epoch": 10.300523062291964, "grad_norm": 0.2490602731704712, "learning_rate": 6.568434423626549e-05, "loss": 0.0675, "step": 5408 }, { "epoch": 10.302425106990015, "grad_norm": 0.19942264258861542, "learning_rate": 6.567799301365514e-05, "loss": 0.0596, "step": 5409 }, { "epoch": 10.304327151688065, "grad_norm": 0.17037898302078247, "learning_rate": 6.567164179104478e-05, "loss": 0.076, "step": 5410 }, { "epoch": 10.306229196386115, "grad_norm": 0.20393238961696625, "learning_rate": 6.566529056843443e-05, "loss": 0.0594, "step": 5411 }, { "epoch": 10.308131241084165, "grad_norm": 0.2775140106678009, "learning_rate": 6.565893934582408e-05, "loss": 0.0589, "step": 5412 }, { "epoch": 10.310033285782216, "grad_norm": 0.25280818343162537, "learning_rate": 6.565258812321372e-05, "loss": 0.0616, "step": 5413 }, { "epoch": 10.311935330480265, "grad_norm": 0.22769461572170258, "learning_rate": 6.564623690060337e-05, "loss": 0.0916, "step": 5414 }, { "epoch": 10.313837375178316, "grad_norm": 0.2882596552371979, "learning_rate": 6.563988567799302e-05, "loss": 0.0752, "step": 5415 }, { "epoch": 10.315739419876367, "grad_norm": 0.43357282876968384, "learning_rate": 6.563353445538266e-05, "loss": 0.0798, "step": 5416 }, { "epoch": 10.317641464574418, "grad_norm": 0.26240670680999756, "learning_rate": 6.562718323277231e-05, "loss": 0.0672, "step": 5417 }, { "epoch": 10.319543509272467, "grad_norm": 0.24738280475139618, "learning_rate": 6.562083201016196e-05, "loss": 0.0774, "step": 5418 }, { "epoch": 10.321445553970518, "grad_norm": 0.1921759992837906, "learning_rate": 6.56144807875516e-05, "loss": 0.0616, "step": 5419 }, { "epoch": 10.323347598668569, "grad_norm": 0.1570550799369812, "learning_rate": 6.560812956494125e-05, "loss": 0.0592, "step": 5420 }, { "epoch": 10.32524964336662, "grad_norm": 0.21802720427513123, "learning_rate": 6.56017783423309e-05, "loss": 0.0929, "step": 5421 }, { "epoch": 10.327151688064669, "grad_norm": 0.17816253006458282, "learning_rate": 6.559542711972056e-05, "loss": 0.072, "step": 5422 }, { "epoch": 10.32905373276272, "grad_norm": 0.14564338326454163, "learning_rate": 6.55890758971102e-05, "loss": 0.0412, "step": 5423 }, { "epoch": 10.33095577746077, "grad_norm": 0.1930164396762848, "learning_rate": 6.558272467449983e-05, "loss": 0.0659, "step": 5424 }, { "epoch": 10.332857822158822, "grad_norm": 0.3501245379447937, "learning_rate": 6.55763734518895e-05, "loss": 0.0617, "step": 5425 }, { "epoch": 10.33475986685687, "grad_norm": 0.27747026085853577, "learning_rate": 6.557002222927914e-05, "loss": 0.0842, "step": 5426 }, { "epoch": 10.336661911554922, "grad_norm": 0.24909347295761108, "learning_rate": 6.556367100666879e-05, "loss": 0.0702, "step": 5427 }, { "epoch": 10.338563956252973, "grad_norm": 0.2601378560066223, "learning_rate": 6.555731978405844e-05, "loss": 0.0732, "step": 5428 }, { "epoch": 10.340466000951022, "grad_norm": 0.18222792446613312, "learning_rate": 6.555096856144808e-05, "loss": 0.0512, "step": 5429 }, { "epoch": 10.342368045649073, "grad_norm": 0.1804533451795578, "learning_rate": 6.554461733883773e-05, "loss": 0.0601, "step": 5430 }, { "epoch": 10.344270090347123, "grad_norm": 0.239121213555336, "learning_rate": 6.553826611622737e-05, "loss": 0.0744, "step": 5431 }, { "epoch": 10.346172135045174, "grad_norm": 0.25100961327552795, "learning_rate": 6.553191489361702e-05, "loss": 0.07, "step": 5432 }, { "epoch": 10.348074179743223, "grad_norm": 0.19875814020633698, "learning_rate": 6.552556367100667e-05, "loss": 0.0781, "step": 5433 }, { "epoch": 10.349976224441274, "grad_norm": 0.19966357946395874, "learning_rate": 6.551921244839631e-05, "loss": 0.0744, "step": 5434 }, { "epoch": 10.351878269139325, "grad_norm": 0.2350044846534729, "learning_rate": 6.551286122578598e-05, "loss": 0.0707, "step": 5435 }, { "epoch": 10.353780313837376, "grad_norm": 0.21448549628257751, "learning_rate": 6.550651000317562e-05, "loss": 0.0725, "step": 5436 }, { "epoch": 10.355682358535425, "grad_norm": 0.26761433482170105, "learning_rate": 6.550015878056525e-05, "loss": 0.0523, "step": 5437 }, { "epoch": 10.357584403233476, "grad_norm": 0.30378443002700806, "learning_rate": 6.54938075579549e-05, "loss": 0.0584, "step": 5438 }, { "epoch": 10.359486447931527, "grad_norm": 0.2591283321380615, "learning_rate": 6.548745633534456e-05, "loss": 0.078, "step": 5439 }, { "epoch": 10.361388492629576, "grad_norm": 0.2300068587064743, "learning_rate": 6.548110511273421e-05, "loss": 0.0706, "step": 5440 }, { "epoch": 10.363290537327627, "grad_norm": 0.2989126741886139, "learning_rate": 6.547475389012385e-05, "loss": 0.0587, "step": 5441 }, { "epoch": 10.365192582025678, "grad_norm": 0.2825726568698883, "learning_rate": 6.54684026675135e-05, "loss": 0.0589, "step": 5442 }, { "epoch": 10.367094626723729, "grad_norm": 0.24061256647109985, "learning_rate": 6.546205144490315e-05, "loss": 0.0759, "step": 5443 }, { "epoch": 10.368996671421778, "grad_norm": 0.31294533610343933, "learning_rate": 6.545570022229279e-05, "loss": 0.0846, "step": 5444 }, { "epoch": 10.370898716119829, "grad_norm": 0.13745763897895813, "learning_rate": 6.544934899968244e-05, "loss": 0.065, "step": 5445 }, { "epoch": 10.37280076081788, "grad_norm": 0.33087965846061707, "learning_rate": 6.54429977770721e-05, "loss": 0.0744, "step": 5446 }, { "epoch": 10.37470280551593, "grad_norm": 0.16575999557971954, "learning_rate": 6.543664655446173e-05, "loss": 0.0637, "step": 5447 }, { "epoch": 10.37660485021398, "grad_norm": 0.17727252840995789, "learning_rate": 6.543029533185138e-05, "loss": 0.0883, "step": 5448 }, { "epoch": 10.37850689491203, "grad_norm": 0.29931455850601196, "learning_rate": 6.542394410924104e-05, "loss": 0.0877, "step": 5449 }, { "epoch": 10.380408939610081, "grad_norm": 0.1964782327413559, "learning_rate": 6.541759288663067e-05, "loss": 0.0585, "step": 5450 }, { "epoch": 10.38231098430813, "grad_norm": 0.2581721842288971, "learning_rate": 6.541124166402033e-05, "loss": 0.0549, "step": 5451 }, { "epoch": 10.384213029006181, "grad_norm": 0.2323419451713562, "learning_rate": 6.540489044140998e-05, "loss": 0.0675, "step": 5452 }, { "epoch": 10.386115073704232, "grad_norm": 0.3017885684967041, "learning_rate": 6.539853921879963e-05, "loss": 0.0737, "step": 5453 }, { "epoch": 10.388017118402283, "grad_norm": 0.2838028073310852, "learning_rate": 6.539218799618927e-05, "loss": 0.0706, "step": 5454 }, { "epoch": 10.389919163100332, "grad_norm": 0.20796798169612885, "learning_rate": 6.53858367735789e-05, "loss": 0.0546, "step": 5455 }, { "epoch": 10.391821207798383, "grad_norm": 0.25365984439849854, "learning_rate": 6.537948555096857e-05, "loss": 0.0914, "step": 5456 }, { "epoch": 10.393723252496434, "grad_norm": 0.2627136707305908, "learning_rate": 6.537313432835821e-05, "loss": 0.0732, "step": 5457 }, { "epoch": 10.395625297194485, "grad_norm": 0.16268092393875122, "learning_rate": 6.536678310574786e-05, "loss": 0.0554, "step": 5458 }, { "epoch": 10.397527341892534, "grad_norm": 0.1628711223602295, "learning_rate": 6.536043188313751e-05, "loss": 0.0719, "step": 5459 }, { "epoch": 10.399429386590585, "grad_norm": 0.19809551537036896, "learning_rate": 6.535408066052715e-05, "loss": 0.0709, "step": 5460 }, { "epoch": 10.401331431288636, "grad_norm": 0.14292262494564056, "learning_rate": 6.53477294379168e-05, "loss": 0.0445, "step": 5461 }, { "epoch": 10.403233475986685, "grad_norm": 0.25781774520874023, "learning_rate": 6.534137821530644e-05, "loss": 0.0749, "step": 5462 }, { "epoch": 10.405135520684736, "grad_norm": 0.3060111701488495, "learning_rate": 6.533502699269611e-05, "loss": 0.0776, "step": 5463 }, { "epoch": 10.407037565382787, "grad_norm": 0.2065560519695282, "learning_rate": 6.532867577008575e-05, "loss": 0.0903, "step": 5464 }, { "epoch": 10.408939610080838, "grad_norm": 0.3038816452026367, "learning_rate": 6.532232454747538e-05, "loss": 0.0803, "step": 5465 }, { "epoch": 10.410841654778887, "grad_norm": 0.19374610483646393, "learning_rate": 6.531597332486505e-05, "loss": 0.0586, "step": 5466 }, { "epoch": 10.412743699476938, "grad_norm": 0.3853093981742859, "learning_rate": 6.530962210225469e-05, "loss": 0.0857, "step": 5467 }, { "epoch": 10.414645744174988, "grad_norm": 0.2604793608188629, "learning_rate": 6.530327087964433e-05, "loss": 0.0683, "step": 5468 }, { "epoch": 10.41654778887304, "grad_norm": 0.2732802629470825, "learning_rate": 6.529691965703398e-05, "loss": 0.079, "step": 5469 }, { "epoch": 10.418449833571088, "grad_norm": 0.2541634738445282, "learning_rate": 6.529056843442363e-05, "loss": 0.0939, "step": 5470 }, { "epoch": 10.42035187826914, "grad_norm": 0.23742996156215668, "learning_rate": 6.528421721181328e-05, "loss": 0.0937, "step": 5471 }, { "epoch": 10.42225392296719, "grad_norm": 0.3552636206150055, "learning_rate": 6.527786598920292e-05, "loss": 0.108, "step": 5472 }, { "epoch": 10.42415596766524, "grad_norm": 0.17072491347789764, "learning_rate": 6.527151476659257e-05, "loss": 0.0529, "step": 5473 }, { "epoch": 10.42605801236329, "grad_norm": 0.36401453614234924, "learning_rate": 6.526516354398222e-05, "loss": 0.0853, "step": 5474 }, { "epoch": 10.427960057061341, "grad_norm": 0.27907583117485046, "learning_rate": 6.525881232137186e-05, "loss": 0.0653, "step": 5475 }, { "epoch": 10.429862101759392, "grad_norm": 0.13606156408786774, "learning_rate": 6.525246109876151e-05, "loss": 0.0469, "step": 5476 }, { "epoch": 10.431764146457441, "grad_norm": 0.23634037375450134, "learning_rate": 6.524610987615117e-05, "loss": 0.0691, "step": 5477 }, { "epoch": 10.433666191155492, "grad_norm": 0.35563454031944275, "learning_rate": 6.52397586535408e-05, "loss": 0.0719, "step": 5478 }, { "epoch": 10.435568235853543, "grad_norm": 0.3277428150177002, "learning_rate": 6.523340743093046e-05, "loss": 0.0521, "step": 5479 }, { "epoch": 10.437470280551594, "grad_norm": 0.32706040143966675, "learning_rate": 6.522705620832011e-05, "loss": 0.0743, "step": 5480 }, { "epoch": 10.439372325249643, "grad_norm": 0.44523194432258606, "learning_rate": 6.522070498570976e-05, "loss": 0.0683, "step": 5481 }, { "epoch": 10.441274369947694, "grad_norm": 0.24652712047100067, "learning_rate": 6.52143537630994e-05, "loss": 0.0589, "step": 5482 }, { "epoch": 10.443176414645745, "grad_norm": 0.27444836497306824, "learning_rate": 6.520800254048905e-05, "loss": 0.0553, "step": 5483 }, { "epoch": 10.445078459343794, "grad_norm": 0.17975351214408875, "learning_rate": 6.52016513178787e-05, "loss": 0.0702, "step": 5484 }, { "epoch": 10.446980504041845, "grad_norm": 0.3462238311767578, "learning_rate": 6.519530009526834e-05, "loss": 0.0834, "step": 5485 }, { "epoch": 10.448882548739896, "grad_norm": 0.18925482034683228, "learning_rate": 6.518894887265799e-05, "loss": 0.0534, "step": 5486 }, { "epoch": 10.450784593437946, "grad_norm": 0.2295648753643036, "learning_rate": 6.518259765004764e-05, "loss": 0.0593, "step": 5487 }, { "epoch": 10.452686638135996, "grad_norm": 0.19527499377727509, "learning_rate": 6.517624642743728e-05, "loss": 0.0415, "step": 5488 }, { "epoch": 10.454588682834046, "grad_norm": 0.27798953652381897, "learning_rate": 6.516989520482693e-05, "loss": 0.0614, "step": 5489 }, { "epoch": 10.456490727532097, "grad_norm": 0.3666345477104187, "learning_rate": 6.516354398221658e-05, "loss": 0.0757, "step": 5490 }, { "epoch": 10.458392772230148, "grad_norm": 0.19281333684921265, "learning_rate": 6.515719275960622e-05, "loss": 0.055, "step": 5491 }, { "epoch": 10.460294816928197, "grad_norm": 0.25162625312805176, "learning_rate": 6.515084153699587e-05, "loss": 0.0727, "step": 5492 }, { "epoch": 10.462196861626248, "grad_norm": 0.32369402050971985, "learning_rate": 6.514449031438553e-05, "loss": 0.0632, "step": 5493 }, { "epoch": 10.464098906324299, "grad_norm": 0.3831532299518585, "learning_rate": 6.513813909177518e-05, "loss": 0.0735, "step": 5494 }, { "epoch": 10.466000951022348, "grad_norm": 0.2529776692390442, "learning_rate": 6.513178786916482e-05, "loss": 0.0838, "step": 5495 }, { "epoch": 10.467902995720399, "grad_norm": 0.22363333404064178, "learning_rate": 6.512543664655446e-05, "loss": 0.071, "step": 5496 }, { "epoch": 10.46980504041845, "grad_norm": 0.20970891416072845, "learning_rate": 6.511908542394412e-05, "loss": 0.0565, "step": 5497 }, { "epoch": 10.4717070851165, "grad_norm": 0.14854298532009125, "learning_rate": 6.511273420133376e-05, "loss": 0.0577, "step": 5498 }, { "epoch": 10.47360912981455, "grad_norm": 0.200281023979187, "learning_rate": 6.510638297872341e-05, "loss": 0.0807, "step": 5499 }, { "epoch": 10.4755111745126, "grad_norm": 0.24579498171806335, "learning_rate": 6.510003175611305e-05, "loss": 0.0698, "step": 5500 }, { "epoch": 10.477413219210652, "grad_norm": 0.28687208890914917, "learning_rate": 6.50936805335027e-05, "loss": 0.0542, "step": 5501 }, { "epoch": 10.479315263908703, "grad_norm": 0.2897108793258667, "learning_rate": 6.508732931089235e-05, "loss": 0.0845, "step": 5502 }, { "epoch": 10.481217308606752, "grad_norm": 0.18317654728889465, "learning_rate": 6.508097808828199e-05, "loss": 0.0556, "step": 5503 }, { "epoch": 10.483119353304803, "grad_norm": 0.20223313570022583, "learning_rate": 6.507462686567164e-05, "loss": 0.076, "step": 5504 }, { "epoch": 10.485021398002853, "grad_norm": 0.19548408687114716, "learning_rate": 6.50682756430613e-05, "loss": 0.0516, "step": 5505 }, { "epoch": 10.486923442700903, "grad_norm": 0.24220460653305054, "learning_rate": 6.506192442045093e-05, "loss": 0.0452, "step": 5506 }, { "epoch": 10.488825487398953, "grad_norm": 0.19592654705047607, "learning_rate": 6.505557319784058e-05, "loss": 0.0606, "step": 5507 }, { "epoch": 10.490727532097004, "grad_norm": 0.16772393882274628, "learning_rate": 6.504922197523024e-05, "loss": 0.0512, "step": 5508 }, { "epoch": 10.492629576795055, "grad_norm": 0.17901265621185303, "learning_rate": 6.504287075261987e-05, "loss": 0.0523, "step": 5509 }, { "epoch": 10.494531621493104, "grad_norm": 0.2328658252954483, "learning_rate": 6.503651953000953e-05, "loss": 0.0514, "step": 5510 }, { "epoch": 10.496433666191155, "grad_norm": 0.16231556236743927, "learning_rate": 6.503016830739918e-05, "loss": 0.0529, "step": 5511 }, { "epoch": 10.498335710889206, "grad_norm": 0.2349044382572174, "learning_rate": 6.502381708478883e-05, "loss": 0.0639, "step": 5512 }, { "epoch": 10.500237755587257, "grad_norm": 0.22910793125629425, "learning_rate": 6.501746586217847e-05, "loss": 0.0819, "step": 5513 }, { "epoch": 10.502139800285306, "grad_norm": 0.2594580054283142, "learning_rate": 6.501111463956812e-05, "loss": 0.0713, "step": 5514 }, { "epoch": 10.504041844983357, "grad_norm": 0.21220757067203522, "learning_rate": 6.500476341695777e-05, "loss": 0.0646, "step": 5515 }, { "epoch": 10.505943889681408, "grad_norm": 0.16294394433498383, "learning_rate": 6.499841219434741e-05, "loss": 0.0406, "step": 5516 }, { "epoch": 10.507845934379457, "grad_norm": 0.25099077820777893, "learning_rate": 6.499206097173706e-05, "loss": 0.0731, "step": 5517 }, { "epoch": 10.509747979077508, "grad_norm": 0.2558063268661499, "learning_rate": 6.498570974912671e-05, "loss": 0.0783, "step": 5518 }, { "epoch": 10.511650023775559, "grad_norm": 0.3749690353870392, "learning_rate": 6.497935852651635e-05, "loss": 0.0638, "step": 5519 }, { "epoch": 10.51355206847361, "grad_norm": 0.31419336795806885, "learning_rate": 6.4973007303906e-05, "loss": 0.085, "step": 5520 }, { "epoch": 10.515454113171659, "grad_norm": 0.18491724133491516, "learning_rate": 6.496665608129566e-05, "loss": 0.0617, "step": 5521 }, { "epoch": 10.51735615786971, "grad_norm": 0.1913200169801712, "learning_rate": 6.49603048586853e-05, "loss": 0.0755, "step": 5522 }, { "epoch": 10.51925820256776, "grad_norm": 0.21209709346294403, "learning_rate": 6.495395363607495e-05, "loss": 0.0809, "step": 5523 }, { "epoch": 10.521160247265811, "grad_norm": 0.2636054754257202, "learning_rate": 6.49476024134646e-05, "loss": 0.0664, "step": 5524 }, { "epoch": 10.52306229196386, "grad_norm": 0.27395957708358765, "learning_rate": 6.494125119085425e-05, "loss": 0.0678, "step": 5525 }, { "epoch": 10.524964336661911, "grad_norm": 0.3728583753108978, "learning_rate": 6.493489996824389e-05, "loss": 0.0793, "step": 5526 }, { "epoch": 10.526866381359962, "grad_norm": 0.22992080450057983, "learning_rate": 6.492854874563353e-05, "loss": 0.09, "step": 5527 }, { "epoch": 10.528768426058011, "grad_norm": 0.25271546840667725, "learning_rate": 6.492219752302319e-05, "loss": 0.0687, "step": 5528 }, { "epoch": 10.530670470756062, "grad_norm": 0.2486717402935028, "learning_rate": 6.491584630041283e-05, "loss": 0.0778, "step": 5529 }, { "epoch": 10.532572515454113, "grad_norm": 0.14895717799663544, "learning_rate": 6.490949507780248e-05, "loss": 0.0693, "step": 5530 }, { "epoch": 10.534474560152164, "grad_norm": 0.24672646820545197, "learning_rate": 6.490314385519213e-05, "loss": 0.0641, "step": 5531 }, { "epoch": 10.536376604850213, "grad_norm": 0.21848620474338531, "learning_rate": 6.489679263258177e-05, "loss": 0.0832, "step": 5532 }, { "epoch": 10.538278649548264, "grad_norm": 0.2144383043050766, "learning_rate": 6.489044140997142e-05, "loss": 0.0599, "step": 5533 }, { "epoch": 10.540180694246315, "grad_norm": 0.44709065556526184, "learning_rate": 6.488409018736106e-05, "loss": 0.0932, "step": 5534 }, { "epoch": 10.542082738944366, "grad_norm": 0.23711533844470978, "learning_rate": 6.487773896475073e-05, "loss": 0.0606, "step": 5535 }, { "epoch": 10.543984783642415, "grad_norm": 0.29541435837745667, "learning_rate": 6.487138774214037e-05, "loss": 0.0881, "step": 5536 }, { "epoch": 10.545886828340466, "grad_norm": 0.26311227679252625, "learning_rate": 6.486503651953e-05, "loss": 0.0698, "step": 5537 }, { "epoch": 10.547788873038517, "grad_norm": 0.23279817402362823, "learning_rate": 6.485868529691967e-05, "loss": 0.0666, "step": 5538 }, { "epoch": 10.549690917736568, "grad_norm": 0.1474435180425644, "learning_rate": 6.485233407430931e-05, "loss": 0.0659, "step": 5539 }, { "epoch": 10.551592962434617, "grad_norm": 0.18819576501846313, "learning_rate": 6.484598285169895e-05, "loss": 0.0546, "step": 5540 }, { "epoch": 10.553495007132668, "grad_norm": 0.27992531657218933, "learning_rate": 6.48396316290886e-05, "loss": 0.0632, "step": 5541 }, { "epoch": 10.555397051830719, "grad_norm": 0.22803568840026855, "learning_rate": 6.483328040647825e-05, "loss": 0.0617, "step": 5542 }, { "epoch": 10.557299096528768, "grad_norm": 0.29736748337745667, "learning_rate": 6.48269291838679e-05, "loss": 0.0773, "step": 5543 }, { "epoch": 10.559201141226819, "grad_norm": 0.19592848420143127, "learning_rate": 6.482057796125754e-05, "loss": 0.086, "step": 5544 }, { "epoch": 10.56110318592487, "grad_norm": 0.27602100372314453, "learning_rate": 6.481422673864719e-05, "loss": 0.0846, "step": 5545 }, { "epoch": 10.56300523062292, "grad_norm": 0.2776179015636444, "learning_rate": 6.480787551603684e-05, "loss": 0.0869, "step": 5546 }, { "epoch": 10.56490727532097, "grad_norm": 0.1875569075345993, "learning_rate": 6.480152429342648e-05, "loss": 0.0682, "step": 5547 }, { "epoch": 10.56680932001902, "grad_norm": 0.2264152467250824, "learning_rate": 6.479517307081613e-05, "loss": 0.0718, "step": 5548 }, { "epoch": 10.568711364717071, "grad_norm": 0.19574370980262756, "learning_rate": 6.478882184820579e-05, "loss": 0.0676, "step": 5549 }, { "epoch": 10.570613409415122, "grad_norm": 0.3385010361671448, "learning_rate": 6.478247062559542e-05, "loss": 0.0978, "step": 5550 }, { "epoch": 10.572515454113171, "grad_norm": 0.2784387767314911, "learning_rate": 6.477611940298508e-05, "loss": 0.0919, "step": 5551 }, { "epoch": 10.574417498811222, "grad_norm": 0.23461949825286865, "learning_rate": 6.476976818037473e-05, "loss": 0.0713, "step": 5552 }, { "epoch": 10.576319543509273, "grad_norm": 0.2040696144104004, "learning_rate": 6.476341695776438e-05, "loss": 0.0631, "step": 5553 }, { "epoch": 10.578221588207322, "grad_norm": 0.23008395731449127, "learning_rate": 6.475706573515402e-05, "loss": 0.0627, "step": 5554 }, { "epoch": 10.580123632905373, "grad_norm": 0.20021569728851318, "learning_rate": 6.475071451254367e-05, "loss": 0.0577, "step": 5555 }, { "epoch": 10.582025677603424, "grad_norm": 0.24329201877117157, "learning_rate": 6.474436328993332e-05, "loss": 0.0774, "step": 5556 }, { "epoch": 10.583927722301475, "grad_norm": 0.2651795446872711, "learning_rate": 6.473801206732296e-05, "loss": 0.0833, "step": 5557 }, { "epoch": 10.585829766999524, "grad_norm": 0.2536809742450714, "learning_rate": 6.47316608447126e-05, "loss": 0.1058, "step": 5558 }, { "epoch": 10.587731811697575, "grad_norm": 0.18420420587062836, "learning_rate": 6.472530962210226e-05, "loss": 0.0821, "step": 5559 }, { "epoch": 10.589633856395626, "grad_norm": 0.2313283234834671, "learning_rate": 6.47189583994919e-05, "loss": 0.0621, "step": 5560 }, { "epoch": 10.591535901093676, "grad_norm": 0.28729113936424255, "learning_rate": 6.471260717688155e-05, "loss": 0.0571, "step": 5561 }, { "epoch": 10.593437945791726, "grad_norm": 0.3117208480834961, "learning_rate": 6.47062559542712e-05, "loss": 0.0824, "step": 5562 }, { "epoch": 10.595339990489776, "grad_norm": 0.23070599138736725, "learning_rate": 6.469990473166084e-05, "loss": 0.0682, "step": 5563 }, { "epoch": 10.597242035187827, "grad_norm": 0.18042044341564178, "learning_rate": 6.46935535090505e-05, "loss": 0.0487, "step": 5564 }, { "epoch": 10.599144079885878, "grad_norm": 0.26043298840522766, "learning_rate": 6.468720228644013e-05, "loss": 0.063, "step": 5565 }, { "epoch": 10.601046124583927, "grad_norm": 0.2567049562931061, "learning_rate": 6.46808510638298e-05, "loss": 0.0643, "step": 5566 }, { "epoch": 10.602948169281978, "grad_norm": 0.2641132175922394, "learning_rate": 6.467449984121944e-05, "loss": 0.0621, "step": 5567 }, { "epoch": 10.60485021398003, "grad_norm": 0.24984419345855713, "learning_rate": 6.466814861860908e-05, "loss": 0.071, "step": 5568 }, { "epoch": 10.606752258678078, "grad_norm": 0.29163748025894165, "learning_rate": 6.466179739599874e-05, "loss": 0.0933, "step": 5569 }, { "epoch": 10.60865430337613, "grad_norm": 0.28160735964775085, "learning_rate": 6.465544617338838e-05, "loss": 0.0745, "step": 5570 }, { "epoch": 10.61055634807418, "grad_norm": 0.20157352089881897, "learning_rate": 6.464909495077803e-05, "loss": 0.0731, "step": 5571 }, { "epoch": 10.612458392772231, "grad_norm": 0.3122706115245819, "learning_rate": 6.464274372816767e-05, "loss": 0.0787, "step": 5572 }, { "epoch": 10.61436043747028, "grad_norm": 0.20753252506256104, "learning_rate": 6.463639250555732e-05, "loss": 0.0654, "step": 5573 }, { "epoch": 10.616262482168331, "grad_norm": 0.2078036218881607, "learning_rate": 6.463004128294697e-05, "loss": 0.0568, "step": 5574 }, { "epoch": 10.618164526866382, "grad_norm": 0.2810681462287903, "learning_rate": 6.462369006033661e-05, "loss": 0.0729, "step": 5575 }, { "epoch": 10.620066571564433, "grad_norm": 0.1743522733449936, "learning_rate": 6.461733883772626e-05, "loss": 0.0479, "step": 5576 }, { "epoch": 10.621968616262482, "grad_norm": 0.19856297969818115, "learning_rate": 6.461098761511592e-05, "loss": 0.0566, "step": 5577 }, { "epoch": 10.623870660960533, "grad_norm": 0.25219857692718506, "learning_rate": 6.460463639250555e-05, "loss": 0.0662, "step": 5578 }, { "epoch": 10.625772705658584, "grad_norm": 0.39976876974105835, "learning_rate": 6.45982851698952e-05, "loss": 0.072, "step": 5579 }, { "epoch": 10.627674750356633, "grad_norm": 0.2829294800758362, "learning_rate": 6.459193394728486e-05, "loss": 0.0574, "step": 5580 }, { "epoch": 10.629576795054684, "grad_norm": 0.22536730766296387, "learning_rate": 6.45855827246745e-05, "loss": 0.0733, "step": 5581 }, { "epoch": 10.631478839752734, "grad_norm": 0.1678084135055542, "learning_rate": 6.457923150206415e-05, "loss": 0.0599, "step": 5582 }, { "epoch": 10.633380884450785, "grad_norm": 0.22193792462348938, "learning_rate": 6.45728802794538e-05, "loss": 0.0646, "step": 5583 }, { "epoch": 10.635282929148834, "grad_norm": 0.3831283152103424, "learning_rate": 6.456652905684345e-05, "loss": 0.0839, "step": 5584 }, { "epoch": 10.637184973846885, "grad_norm": 0.17634892463684082, "learning_rate": 6.456017783423309e-05, "loss": 0.0586, "step": 5585 }, { "epoch": 10.639087018544936, "grad_norm": 0.18664374947547913, "learning_rate": 6.455382661162274e-05, "loss": 0.0496, "step": 5586 }, { "epoch": 10.640989063242987, "grad_norm": 0.21163462102413177, "learning_rate": 6.45474753890124e-05, "loss": 0.0616, "step": 5587 }, { "epoch": 10.642891107941036, "grad_norm": 0.1956004649400711, "learning_rate": 6.454112416640203e-05, "loss": 0.0695, "step": 5588 }, { "epoch": 10.644793152639087, "grad_norm": 0.2948111891746521, "learning_rate": 6.453477294379168e-05, "loss": 0.0684, "step": 5589 }, { "epoch": 10.646695197337138, "grad_norm": 0.18460431694984436, "learning_rate": 6.452842172118134e-05, "loss": 0.0606, "step": 5590 }, { "epoch": 10.648597242035187, "grad_norm": 0.2511366903781891, "learning_rate": 6.452207049857097e-05, "loss": 0.0946, "step": 5591 }, { "epoch": 10.650499286733238, "grad_norm": 0.21115030348300934, "learning_rate": 6.451571927596063e-05, "loss": 0.0552, "step": 5592 }, { "epoch": 10.652401331431289, "grad_norm": 0.2667510211467743, "learning_rate": 6.450936805335028e-05, "loss": 0.0698, "step": 5593 }, { "epoch": 10.65430337612934, "grad_norm": 0.35384124517440796, "learning_rate": 6.450301683073992e-05, "loss": 0.0746, "step": 5594 }, { "epoch": 10.656205420827389, "grad_norm": 0.26212817430496216, "learning_rate": 6.449666560812957e-05, "loss": 0.0589, "step": 5595 }, { "epoch": 10.65810746552544, "grad_norm": 0.22370542585849762, "learning_rate": 6.449031438551922e-05, "loss": 0.0526, "step": 5596 }, { "epoch": 10.66000951022349, "grad_norm": 0.362678587436676, "learning_rate": 6.448396316290887e-05, "loss": 0.1082, "step": 5597 }, { "epoch": 10.661911554921542, "grad_norm": 0.3471091389656067, "learning_rate": 6.447761194029851e-05, "loss": 0.0884, "step": 5598 }, { "epoch": 10.66381359961959, "grad_norm": 0.29880112409591675, "learning_rate": 6.447126071768815e-05, "loss": 0.0734, "step": 5599 }, { "epoch": 10.665715644317642, "grad_norm": 0.1709068864583969, "learning_rate": 6.446490949507781e-05, "loss": 0.0461, "step": 5600 }, { "epoch": 10.667617689015692, "grad_norm": 0.23527254164218903, "learning_rate": 6.445855827246745e-05, "loss": 0.0599, "step": 5601 }, { "epoch": 10.669519733713742, "grad_norm": 0.27498099207878113, "learning_rate": 6.44522070498571e-05, "loss": 0.062, "step": 5602 }, { "epoch": 10.671421778411792, "grad_norm": 0.2606363296508789, "learning_rate": 6.444585582724675e-05, "loss": 0.0702, "step": 5603 }, { "epoch": 10.673323823109843, "grad_norm": 0.28660082817077637, "learning_rate": 6.44395046046364e-05, "loss": 0.0805, "step": 5604 }, { "epoch": 10.675225867807894, "grad_norm": 0.22156530618667603, "learning_rate": 6.443315338202605e-05, "loss": 0.061, "step": 5605 }, { "epoch": 10.677127912505943, "grad_norm": 0.20593293011188507, "learning_rate": 6.442680215941568e-05, "loss": 0.0662, "step": 5606 }, { "epoch": 10.679029957203994, "grad_norm": 0.22477205097675323, "learning_rate": 6.442045093680535e-05, "loss": 0.0728, "step": 5607 }, { "epoch": 10.680932001902045, "grad_norm": 0.18077880144119263, "learning_rate": 6.441409971419499e-05, "loss": 0.06, "step": 5608 }, { "epoch": 10.682834046600096, "grad_norm": 0.2662803828716278, "learning_rate": 6.440774849158463e-05, "loss": 0.0629, "step": 5609 }, { "epoch": 10.684736091298145, "grad_norm": 0.26029613614082336, "learning_rate": 6.440139726897428e-05, "loss": 0.0676, "step": 5610 }, { "epoch": 10.686638135996196, "grad_norm": 0.26586633920669556, "learning_rate": 6.439504604636393e-05, "loss": 0.0532, "step": 5611 }, { "epoch": 10.688540180694247, "grad_norm": 0.2061486840248108, "learning_rate": 6.438869482375357e-05, "loss": 0.0744, "step": 5612 }, { "epoch": 10.690442225392296, "grad_norm": 0.2764222025871277, "learning_rate": 6.438234360114322e-05, "loss": 0.0821, "step": 5613 }, { "epoch": 10.692344270090347, "grad_norm": 0.2235095053911209, "learning_rate": 6.437599237853287e-05, "loss": 0.0598, "step": 5614 }, { "epoch": 10.694246314788398, "grad_norm": 0.12553450465202332, "learning_rate": 6.436964115592252e-05, "loss": 0.0619, "step": 5615 }, { "epoch": 10.696148359486449, "grad_norm": 0.23471371829509735, "learning_rate": 6.436328993331216e-05, "loss": 0.0574, "step": 5616 }, { "epoch": 10.698050404184498, "grad_norm": 0.24933548271656036, "learning_rate": 6.435693871070181e-05, "loss": 0.0638, "step": 5617 }, { "epoch": 10.699952448882549, "grad_norm": 0.2832605242729187, "learning_rate": 6.435058748809146e-05, "loss": 0.0682, "step": 5618 }, { "epoch": 10.7018544935806, "grad_norm": 0.3140818178653717, "learning_rate": 6.43442362654811e-05, "loss": 0.0647, "step": 5619 }, { "epoch": 10.70375653827865, "grad_norm": 0.28693559765815735, "learning_rate": 6.433788504287075e-05, "loss": 0.077, "step": 5620 }, { "epoch": 10.7056585829767, "grad_norm": 0.1607866883277893, "learning_rate": 6.433153382026041e-05, "loss": 0.0549, "step": 5621 }, { "epoch": 10.70756062767475, "grad_norm": 0.32727786898612976, "learning_rate": 6.432518259765005e-05, "loss": 0.0836, "step": 5622 }, { "epoch": 10.709462672372801, "grad_norm": 0.26169657707214355, "learning_rate": 6.43188313750397e-05, "loss": 0.0551, "step": 5623 }, { "epoch": 10.71136471707085, "grad_norm": 0.17940069735050201, "learning_rate": 6.431248015242935e-05, "loss": 0.0548, "step": 5624 }, { "epoch": 10.713266761768901, "grad_norm": 0.3701190650463104, "learning_rate": 6.4306128929819e-05, "loss": 0.0721, "step": 5625 }, { "epoch": 10.715168806466952, "grad_norm": 0.15953880548477173, "learning_rate": 6.429977770720864e-05, "loss": 0.0582, "step": 5626 }, { "epoch": 10.717070851165003, "grad_norm": 0.2598779797554016, "learning_rate": 6.429342648459829e-05, "loss": 0.066, "step": 5627 }, { "epoch": 10.718972895863052, "grad_norm": 0.19500410556793213, "learning_rate": 6.428707526198794e-05, "loss": 0.049, "step": 5628 }, { "epoch": 10.720874940561103, "grad_norm": 0.2729705572128296, "learning_rate": 6.428072403937758e-05, "loss": 0.0742, "step": 5629 }, { "epoch": 10.722776985259154, "grad_norm": 0.3123151957988739, "learning_rate": 6.427437281676722e-05, "loss": 0.0629, "step": 5630 }, { "epoch": 10.724679029957205, "grad_norm": 0.23334093391895294, "learning_rate": 6.426802159415688e-05, "loss": 0.0698, "step": 5631 }, { "epoch": 10.726581074655254, "grad_norm": 0.18635810911655426, "learning_rate": 6.426167037154652e-05, "loss": 0.0712, "step": 5632 }, { "epoch": 10.728483119353305, "grad_norm": 0.22240066528320312, "learning_rate": 6.425531914893617e-05, "loss": 0.0766, "step": 5633 }, { "epoch": 10.730385164051356, "grad_norm": 0.1942574381828308, "learning_rate": 6.424896792632583e-05, "loss": 0.0598, "step": 5634 }, { "epoch": 10.732287208749405, "grad_norm": 0.22906853258609772, "learning_rate": 6.424261670371546e-05, "loss": 0.059, "step": 5635 }, { "epoch": 10.734189253447456, "grad_norm": 0.257289856672287, "learning_rate": 6.423626548110512e-05, "loss": 0.1054, "step": 5636 }, { "epoch": 10.736091298145507, "grad_norm": 0.2559765875339508, "learning_rate": 6.422991425849475e-05, "loss": 0.1258, "step": 5637 }, { "epoch": 10.737993342843557, "grad_norm": 0.21526873111724854, "learning_rate": 6.422356303588442e-05, "loss": 0.0699, "step": 5638 }, { "epoch": 10.739895387541607, "grad_norm": 0.23773230612277985, "learning_rate": 6.421721181327406e-05, "loss": 0.0752, "step": 5639 }, { "epoch": 10.741797432239657, "grad_norm": 0.23262368142604828, "learning_rate": 6.42108605906637e-05, "loss": 0.0627, "step": 5640 }, { "epoch": 10.743699476937708, "grad_norm": 0.22525426745414734, "learning_rate": 6.420450936805336e-05, "loss": 0.0594, "step": 5641 }, { "epoch": 10.74560152163576, "grad_norm": 0.2492980659008026, "learning_rate": 6.4198158145443e-05, "loss": 0.0666, "step": 5642 }, { "epoch": 10.747503566333808, "grad_norm": 0.1995389312505722, "learning_rate": 6.419180692283265e-05, "loss": 0.0621, "step": 5643 }, { "epoch": 10.74940561103186, "grad_norm": 0.15982617437839508, "learning_rate": 6.418545570022229e-05, "loss": 0.068, "step": 5644 }, { "epoch": 10.75130765572991, "grad_norm": 0.17556272447109222, "learning_rate": 6.417910447761194e-05, "loss": 0.0539, "step": 5645 }, { "epoch": 10.75320970042796, "grad_norm": 0.2797108292579651, "learning_rate": 6.41727532550016e-05, "loss": 0.0602, "step": 5646 }, { "epoch": 10.75511174512601, "grad_norm": 0.17692141234874725, "learning_rate": 6.416640203239123e-05, "loss": 0.0541, "step": 5647 }, { "epoch": 10.757013789824061, "grad_norm": 0.26082196831703186, "learning_rate": 6.416005080978088e-05, "loss": 0.0697, "step": 5648 }, { "epoch": 10.758915834522112, "grad_norm": 0.2215351164340973, "learning_rate": 6.415369958717054e-05, "loss": 0.0417, "step": 5649 }, { "epoch": 10.760817879220161, "grad_norm": 0.3061333894729614, "learning_rate": 6.414734836456017e-05, "loss": 0.0884, "step": 5650 }, { "epoch": 10.762719923918212, "grad_norm": 0.21420468389987946, "learning_rate": 6.414099714194983e-05, "loss": 0.0751, "step": 5651 }, { "epoch": 10.764621968616263, "grad_norm": 0.23848441243171692, "learning_rate": 6.413464591933948e-05, "loss": 0.0738, "step": 5652 }, { "epoch": 10.766524013314314, "grad_norm": 0.26106590032577515, "learning_rate": 6.412829469672912e-05, "loss": 0.0667, "step": 5653 }, { "epoch": 10.768426058012363, "grad_norm": 0.22626987099647522, "learning_rate": 6.412194347411877e-05, "loss": 0.0683, "step": 5654 }, { "epoch": 10.770328102710414, "grad_norm": 0.19195275008678436, "learning_rate": 6.411559225150842e-05, "loss": 0.0563, "step": 5655 }, { "epoch": 10.772230147408465, "grad_norm": 0.2996932566165924, "learning_rate": 6.410924102889807e-05, "loss": 0.0789, "step": 5656 }, { "epoch": 10.774132192106514, "grad_norm": 0.2504531741142273, "learning_rate": 6.410288980628771e-05, "loss": 0.0795, "step": 5657 }, { "epoch": 10.776034236804565, "grad_norm": 0.2465498298406601, "learning_rate": 6.409653858367736e-05, "loss": 0.0638, "step": 5658 }, { "epoch": 10.777936281502615, "grad_norm": 0.21041840314865112, "learning_rate": 6.409018736106701e-05, "loss": 0.0669, "step": 5659 }, { "epoch": 10.779838326200666, "grad_norm": 0.22026972472667694, "learning_rate": 6.408383613845665e-05, "loss": 0.0612, "step": 5660 }, { "epoch": 10.781740370898715, "grad_norm": 0.22157369554042816, "learning_rate": 6.40774849158463e-05, "loss": 0.0672, "step": 5661 }, { "epoch": 10.783642415596766, "grad_norm": 0.2333994209766388, "learning_rate": 6.407113369323596e-05, "loss": 0.0834, "step": 5662 }, { "epoch": 10.785544460294817, "grad_norm": 0.24471452832221985, "learning_rate": 6.40647824706256e-05, "loss": 0.0599, "step": 5663 }, { "epoch": 10.787446504992868, "grad_norm": 0.1821519434452057, "learning_rate": 6.405843124801525e-05, "loss": 0.0832, "step": 5664 }, { "epoch": 10.789348549690917, "grad_norm": 0.26059895753860474, "learning_rate": 6.40520800254049e-05, "loss": 0.0892, "step": 5665 }, { "epoch": 10.791250594388968, "grad_norm": 0.2928067445755005, "learning_rate": 6.404572880279454e-05, "loss": 0.0778, "step": 5666 }, { "epoch": 10.793152639087019, "grad_norm": 0.28212589025497437, "learning_rate": 6.403937758018419e-05, "loss": 0.0631, "step": 5667 }, { "epoch": 10.795054683785068, "grad_norm": 0.3376213610172272, "learning_rate": 6.403302635757383e-05, "loss": 0.0674, "step": 5668 }, { "epoch": 10.796956728483119, "grad_norm": 0.28411048650741577, "learning_rate": 6.402667513496349e-05, "loss": 0.0912, "step": 5669 }, { "epoch": 10.79885877318117, "grad_norm": 0.20652949810028076, "learning_rate": 6.402032391235313e-05, "loss": 0.0612, "step": 5670 }, { "epoch": 10.80076081787922, "grad_norm": 0.2766312062740326, "learning_rate": 6.401397268974277e-05, "loss": 0.0801, "step": 5671 }, { "epoch": 10.80266286257727, "grad_norm": 0.22918127477169037, "learning_rate": 6.400762146713243e-05, "loss": 0.0766, "step": 5672 }, { "epoch": 10.80456490727532, "grad_norm": 0.13110528886318207, "learning_rate": 6.400127024452207e-05, "loss": 0.0531, "step": 5673 }, { "epoch": 10.806466951973372, "grad_norm": 0.34798091650009155, "learning_rate": 6.399491902191172e-05, "loss": 0.0846, "step": 5674 }, { "epoch": 10.808368996671422, "grad_norm": 0.21080365777015686, "learning_rate": 6.398856779930136e-05, "loss": 0.0669, "step": 5675 }, { "epoch": 10.810271041369472, "grad_norm": 0.18395227193832397, "learning_rate": 6.398221657669101e-05, "loss": 0.0515, "step": 5676 }, { "epoch": 10.812173086067522, "grad_norm": 0.2864764630794525, "learning_rate": 6.397586535408067e-05, "loss": 0.0655, "step": 5677 }, { "epoch": 10.814075130765573, "grad_norm": 0.24587604403495789, "learning_rate": 6.39695141314703e-05, "loss": 0.0723, "step": 5678 }, { "epoch": 10.815977175463622, "grad_norm": 0.26868876814842224, "learning_rate": 6.396316290885997e-05, "loss": 0.0668, "step": 5679 }, { "epoch": 10.817879220161673, "grad_norm": 0.24933819472789764, "learning_rate": 6.395681168624961e-05, "loss": 0.0811, "step": 5680 }, { "epoch": 10.819781264859724, "grad_norm": 0.26463747024536133, "learning_rate": 6.395046046363925e-05, "loss": 0.0582, "step": 5681 }, { "epoch": 10.821683309557775, "grad_norm": 0.19800810515880585, "learning_rate": 6.39441092410289e-05, "loss": 0.0624, "step": 5682 }, { "epoch": 10.823585354255824, "grad_norm": 0.230701744556427, "learning_rate": 6.393775801841855e-05, "loss": 0.0566, "step": 5683 }, { "epoch": 10.825487398953875, "grad_norm": 0.2717706263065338, "learning_rate": 6.393140679580819e-05, "loss": 0.0842, "step": 5684 }, { "epoch": 10.827389443651926, "grad_norm": 0.20510073006153107, "learning_rate": 6.392505557319784e-05, "loss": 0.0735, "step": 5685 }, { "epoch": 10.829291488349977, "grad_norm": 0.17784926295280457, "learning_rate": 6.391870435058749e-05, "loss": 0.0746, "step": 5686 }, { "epoch": 10.831193533048026, "grad_norm": 0.30288398265838623, "learning_rate": 6.391235312797714e-05, "loss": 0.0667, "step": 5687 }, { "epoch": 10.833095577746077, "grad_norm": 0.10946258902549744, "learning_rate": 6.390600190536678e-05, "loss": 0.0515, "step": 5688 }, { "epoch": 10.834997622444128, "grad_norm": 0.24755284190177917, "learning_rate": 6.389965068275643e-05, "loss": 0.0555, "step": 5689 }, { "epoch": 10.836899667142179, "grad_norm": 0.231001615524292, "learning_rate": 6.389329946014609e-05, "loss": 0.0629, "step": 5690 }, { "epoch": 10.838801711840228, "grad_norm": 0.2223047912120819, "learning_rate": 6.388694823753572e-05, "loss": 0.0638, "step": 5691 }, { "epoch": 10.840703756538279, "grad_norm": 0.40585601329803467, "learning_rate": 6.388059701492538e-05, "loss": 0.0897, "step": 5692 }, { "epoch": 10.84260580123633, "grad_norm": 0.3459436893463135, "learning_rate": 6.387424579231503e-05, "loss": 0.08, "step": 5693 }, { "epoch": 10.844507845934379, "grad_norm": 0.19375737011432648, "learning_rate": 6.386789456970467e-05, "loss": 0.0645, "step": 5694 }, { "epoch": 10.84640989063243, "grad_norm": 0.280331552028656, "learning_rate": 6.386154334709432e-05, "loss": 0.092, "step": 5695 }, { "epoch": 10.84831193533048, "grad_norm": 0.24692851305007935, "learning_rate": 6.385519212448397e-05, "loss": 0.0631, "step": 5696 }, { "epoch": 10.850213980028531, "grad_norm": 0.23048484325408936, "learning_rate": 6.384884090187362e-05, "loss": 0.0675, "step": 5697 }, { "epoch": 10.85211602472658, "grad_norm": 0.17280757427215576, "learning_rate": 6.384248967926326e-05, "loss": 0.0642, "step": 5698 }, { "epoch": 10.854018069424631, "grad_norm": 0.18811771273612976, "learning_rate": 6.383613845665291e-05, "loss": 0.0718, "step": 5699 }, { "epoch": 10.855920114122682, "grad_norm": 0.21706104278564453, "learning_rate": 6.382978723404256e-05, "loss": 0.0615, "step": 5700 }, { "epoch": 10.857822158820733, "grad_norm": 0.34146109223365784, "learning_rate": 6.38234360114322e-05, "loss": 0.0715, "step": 5701 }, { "epoch": 10.859724203518782, "grad_norm": 0.1904754787683487, "learning_rate": 6.381708478882184e-05, "loss": 0.068, "step": 5702 }, { "epoch": 10.861626248216833, "grad_norm": 0.26520657539367676, "learning_rate": 6.38107335662115e-05, "loss": 0.0644, "step": 5703 }, { "epoch": 10.863528292914884, "grad_norm": 0.18787312507629395, "learning_rate": 6.380438234360114e-05, "loss": 0.048, "step": 5704 }, { "epoch": 10.865430337612933, "grad_norm": 0.22588405013084412, "learning_rate": 6.37980311209908e-05, "loss": 0.0581, "step": 5705 }, { "epoch": 10.867332382310984, "grad_norm": 0.2537688910961151, "learning_rate": 6.379167989838045e-05, "loss": 0.0779, "step": 5706 }, { "epoch": 10.869234427009035, "grad_norm": 0.25890594720840454, "learning_rate": 6.378532867577009e-05, "loss": 0.0649, "step": 5707 }, { "epoch": 10.871136471707086, "grad_norm": 0.14498251676559448, "learning_rate": 6.377897745315974e-05, "loss": 0.0734, "step": 5708 }, { "epoch": 10.873038516405135, "grad_norm": 0.2548612654209137, "learning_rate": 6.377262623054938e-05, "loss": 0.0813, "step": 5709 }, { "epoch": 10.874940561103186, "grad_norm": 0.19444790482521057, "learning_rate": 6.376627500793904e-05, "loss": 0.067, "step": 5710 }, { "epoch": 10.876842605801237, "grad_norm": 0.22690126299858093, "learning_rate": 6.375992378532868e-05, "loss": 0.0743, "step": 5711 }, { "epoch": 10.878744650499288, "grad_norm": 0.2915293872356415, "learning_rate": 6.375357256271832e-05, "loss": 0.0657, "step": 5712 }, { "epoch": 10.880646695197337, "grad_norm": 0.3559345006942749, "learning_rate": 6.374722134010798e-05, "loss": 0.0864, "step": 5713 }, { "epoch": 10.882548739895388, "grad_norm": 0.16378241777420044, "learning_rate": 6.374087011749762e-05, "loss": 0.0379, "step": 5714 }, { "epoch": 10.884450784593438, "grad_norm": 0.43481773138046265, "learning_rate": 6.373451889488727e-05, "loss": 0.1046, "step": 5715 }, { "epoch": 10.88635282929149, "grad_norm": 0.31856125593185425, "learning_rate": 6.372816767227691e-05, "loss": 0.0795, "step": 5716 }, { "epoch": 10.888254873989538, "grad_norm": 0.2719641327857971, "learning_rate": 6.372181644966656e-05, "loss": 0.0866, "step": 5717 }, { "epoch": 10.89015691868759, "grad_norm": 0.25337544083595276, "learning_rate": 6.371546522705622e-05, "loss": 0.063, "step": 5718 }, { "epoch": 10.89205896338564, "grad_norm": 0.18715976178646088, "learning_rate": 6.370911400444585e-05, "loss": 0.0703, "step": 5719 }, { "epoch": 10.89396100808369, "grad_norm": 0.17753618955612183, "learning_rate": 6.37027627818355e-05, "loss": 0.0657, "step": 5720 }, { "epoch": 10.89586305278174, "grad_norm": 0.17351071536540985, "learning_rate": 6.369641155922516e-05, "loss": 0.0625, "step": 5721 }, { "epoch": 10.897765097479791, "grad_norm": 0.3891054391860962, "learning_rate": 6.36900603366148e-05, "loss": 0.0823, "step": 5722 }, { "epoch": 10.899667142177842, "grad_norm": 0.3254895806312561, "learning_rate": 6.368370911400445e-05, "loss": 0.0696, "step": 5723 }, { "epoch": 10.901569186875891, "grad_norm": 0.3056901693344116, "learning_rate": 6.36773578913941e-05, "loss": 0.0859, "step": 5724 }, { "epoch": 10.903471231573942, "grad_norm": 0.4118444621562958, "learning_rate": 6.367100666878374e-05, "loss": 0.0926, "step": 5725 }, { "epoch": 10.905373276271993, "grad_norm": 0.23973733186721802, "learning_rate": 6.366465544617339e-05, "loss": 0.0759, "step": 5726 }, { "epoch": 10.907275320970044, "grad_norm": 0.2237926423549652, "learning_rate": 6.365830422356304e-05, "loss": 0.061, "step": 5727 }, { "epoch": 10.909177365668093, "grad_norm": 0.28075718879699707, "learning_rate": 6.365195300095269e-05, "loss": 0.083, "step": 5728 }, { "epoch": 10.911079410366144, "grad_norm": 0.24081255495548248, "learning_rate": 6.364560177834233e-05, "loss": 0.0543, "step": 5729 }, { "epoch": 10.912981455064195, "grad_norm": 0.1605367660522461, "learning_rate": 6.363925055573198e-05, "loss": 0.0598, "step": 5730 }, { "epoch": 10.914883499762244, "grad_norm": 0.2056272178888321, "learning_rate": 6.363289933312164e-05, "loss": 0.0569, "step": 5731 }, { "epoch": 10.916785544460295, "grad_norm": 0.1978829950094223, "learning_rate": 6.362654811051127e-05, "loss": 0.06, "step": 5732 }, { "epoch": 10.918687589158345, "grad_norm": 0.27313923835754395, "learning_rate": 6.362019688790093e-05, "loss": 0.0939, "step": 5733 }, { "epoch": 10.920589633856396, "grad_norm": 0.4229736030101776, "learning_rate": 6.361384566529058e-05, "loss": 0.0779, "step": 5734 }, { "epoch": 10.922491678554445, "grad_norm": 0.23305979371070862, "learning_rate": 6.360749444268022e-05, "loss": 0.0556, "step": 5735 }, { "epoch": 10.924393723252496, "grad_norm": 0.2642943561077118, "learning_rate": 6.360114322006987e-05, "loss": 0.0831, "step": 5736 }, { "epoch": 10.926295767950547, "grad_norm": 0.2663883864879608, "learning_rate": 6.359479199745952e-05, "loss": 0.072, "step": 5737 }, { "epoch": 10.928197812648598, "grad_norm": 0.25676098465919495, "learning_rate": 6.358844077484916e-05, "loss": 0.0689, "step": 5738 }, { "epoch": 10.930099857346647, "grad_norm": 0.28164270520210266, "learning_rate": 6.358208955223881e-05, "loss": 0.0707, "step": 5739 }, { "epoch": 10.932001902044698, "grad_norm": 0.19156311452388763, "learning_rate": 6.357573832962845e-05, "loss": 0.0643, "step": 5740 }, { "epoch": 10.933903946742749, "grad_norm": 0.37269774079322815, "learning_rate": 6.356938710701811e-05, "loss": 0.0854, "step": 5741 }, { "epoch": 10.935805991440798, "grad_norm": 0.19018201529979706, "learning_rate": 6.356303588440775e-05, "loss": 0.0746, "step": 5742 }, { "epoch": 10.937708036138849, "grad_norm": 0.272897869348526, "learning_rate": 6.355668466179739e-05, "loss": 0.0653, "step": 5743 }, { "epoch": 10.9396100808369, "grad_norm": 0.24475888907909393, "learning_rate": 6.355033343918705e-05, "loss": 0.062, "step": 5744 }, { "epoch": 10.94151212553495, "grad_norm": 0.21947970986366272, "learning_rate": 6.354398221657669e-05, "loss": 0.0675, "step": 5745 }, { "epoch": 10.943414170233, "grad_norm": 0.41693615913391113, "learning_rate": 6.353763099396634e-05, "loss": 0.0798, "step": 5746 }, { "epoch": 10.94531621493105, "grad_norm": 0.2103748619556427, "learning_rate": 6.353127977135598e-05, "loss": 0.0561, "step": 5747 }, { "epoch": 10.947218259629102, "grad_norm": 0.40578147768974304, "learning_rate": 6.352492854874564e-05, "loss": 0.0809, "step": 5748 }, { "epoch": 10.949120304327153, "grad_norm": 0.22502633929252625, "learning_rate": 6.351857732613529e-05, "loss": 0.0503, "step": 5749 }, { "epoch": 10.951022349025202, "grad_norm": 0.2478155642747879, "learning_rate": 6.351222610352493e-05, "loss": 0.0542, "step": 5750 }, { "epoch": 10.952924393723253, "grad_norm": 0.22941410541534424, "learning_rate": 6.350587488091459e-05, "loss": 0.0712, "step": 5751 }, { "epoch": 10.954826438421303, "grad_norm": 0.2138836532831192, "learning_rate": 6.349952365830423e-05, "loss": 0.0617, "step": 5752 }, { "epoch": 10.956728483119353, "grad_norm": 0.1656169891357422, "learning_rate": 6.349317243569387e-05, "loss": 0.0573, "step": 5753 }, { "epoch": 10.958630527817403, "grad_norm": 0.20173387229442596, "learning_rate": 6.348682121308352e-05, "loss": 0.0667, "step": 5754 }, { "epoch": 10.960532572515454, "grad_norm": 0.23104602098464966, "learning_rate": 6.348046999047317e-05, "loss": 0.0728, "step": 5755 }, { "epoch": 10.962434617213505, "grad_norm": 0.2905726134777069, "learning_rate": 6.347411876786281e-05, "loss": 0.0786, "step": 5756 }, { "epoch": 10.964336661911554, "grad_norm": 0.24987512826919556, "learning_rate": 6.346776754525246e-05, "loss": 0.1001, "step": 5757 }, { "epoch": 10.966238706609605, "grad_norm": 0.2912735939025879, "learning_rate": 6.346141632264211e-05, "loss": 0.1025, "step": 5758 }, { "epoch": 10.968140751307656, "grad_norm": 0.40985098481178284, "learning_rate": 6.345506510003176e-05, "loss": 0.0943, "step": 5759 }, { "epoch": 10.970042796005707, "grad_norm": 0.26428380608558655, "learning_rate": 6.34487138774214e-05, "loss": 0.0693, "step": 5760 }, { "epoch": 10.971944840703756, "grad_norm": 0.29792144894599915, "learning_rate": 6.344236265481105e-05, "loss": 0.1097, "step": 5761 }, { "epoch": 10.973846885401807, "grad_norm": 0.21788249909877777, "learning_rate": 6.34360114322007e-05, "loss": 0.0709, "step": 5762 }, { "epoch": 10.975748930099858, "grad_norm": 0.16237157583236694, "learning_rate": 6.342966020959034e-05, "loss": 0.0689, "step": 5763 }, { "epoch": 10.977650974797907, "grad_norm": 0.16543160378932953, "learning_rate": 6.342330898698e-05, "loss": 0.0638, "step": 5764 }, { "epoch": 10.979553019495958, "grad_norm": 0.24611502885818481, "learning_rate": 6.341695776436965e-05, "loss": 0.0938, "step": 5765 }, { "epoch": 10.981455064194009, "grad_norm": 0.17132847011089325, "learning_rate": 6.341060654175929e-05, "loss": 0.0471, "step": 5766 }, { "epoch": 10.98335710889206, "grad_norm": 0.3223244845867157, "learning_rate": 6.340425531914894e-05, "loss": 0.2433, "step": 5767 }, { "epoch": 10.985259153590109, "grad_norm": 0.37807127833366394, "learning_rate": 6.339790409653859e-05, "loss": 0.0779, "step": 5768 }, { "epoch": 10.98716119828816, "grad_norm": 0.30722638964653015, "learning_rate": 6.339155287392824e-05, "loss": 0.0833, "step": 5769 }, { "epoch": 10.98906324298621, "grad_norm": 0.16426853835582733, "learning_rate": 6.338520165131788e-05, "loss": 0.0615, "step": 5770 }, { "epoch": 10.990965287684261, "grad_norm": 0.17217318713665009, "learning_rate": 6.337885042870753e-05, "loss": 0.0548, "step": 5771 }, { "epoch": 10.99286733238231, "grad_norm": 0.24564428627490997, "learning_rate": 6.337249920609718e-05, "loss": 0.0566, "step": 5772 }, { "epoch": 10.994769377080361, "grad_norm": 0.26936957240104675, "learning_rate": 6.336614798348682e-05, "loss": 0.0648, "step": 5773 }, { "epoch": 10.996671421778412, "grad_norm": 0.21403786540031433, "learning_rate": 6.335979676087646e-05, "loss": 0.049, "step": 5774 }, { "epoch": 10.998573466476461, "grad_norm": 0.2741341292858124, "learning_rate": 6.335344553826613e-05, "loss": 0.0736, "step": 5775 }, { "epoch": 11.000475511174512, "grad_norm": 0.2551502287387848, "learning_rate": 6.334709431565576e-05, "loss": 0.0683, "step": 5776 }, { "epoch": 11.002377555872563, "grad_norm": 0.23173901438713074, "learning_rate": 6.334074309304542e-05, "loss": 0.072, "step": 5777 }, { "epoch": 11.004279600570614, "grad_norm": 0.1508857011795044, "learning_rate": 6.333439187043505e-05, "loss": 0.0782, "step": 5778 }, { "epoch": 11.006181645268663, "grad_norm": 0.17948079109191895, "learning_rate": 6.33280406478247e-05, "loss": 0.0658, "step": 5779 }, { "epoch": 11.008083689966714, "grad_norm": 0.1505243331193924, "learning_rate": 6.332168942521436e-05, "loss": 0.0351, "step": 5780 }, { "epoch": 11.009985734664765, "grad_norm": 0.19849568605422974, "learning_rate": 6.3315338202604e-05, "loss": 0.0602, "step": 5781 }, { "epoch": 11.011887779362816, "grad_norm": 0.2975490093231201, "learning_rate": 6.330898697999366e-05, "loss": 0.0693, "step": 5782 }, { "epoch": 11.013789824060865, "grad_norm": 0.2738147974014282, "learning_rate": 6.33026357573833e-05, "loss": 0.0974, "step": 5783 }, { "epoch": 11.015691868758916, "grad_norm": 0.11051417142152786, "learning_rate": 6.329628453477294e-05, "loss": 0.0597, "step": 5784 }, { "epoch": 11.017593913456967, "grad_norm": 0.28424665331840515, "learning_rate": 6.328993331216259e-05, "loss": 0.0704, "step": 5785 }, { "epoch": 11.019495958155016, "grad_norm": 0.1699250191450119, "learning_rate": 6.328358208955224e-05, "loss": 0.0625, "step": 5786 }, { "epoch": 11.021398002853067, "grad_norm": 0.23104004561901093, "learning_rate": 6.32772308669419e-05, "loss": 0.0612, "step": 5787 }, { "epoch": 11.023300047551118, "grad_norm": 0.22150813043117523, "learning_rate": 6.327087964433153e-05, "loss": 0.0616, "step": 5788 }, { "epoch": 11.025202092249168, "grad_norm": 0.24865034222602844, "learning_rate": 6.326452842172118e-05, "loss": 0.0589, "step": 5789 }, { "epoch": 11.027104136947218, "grad_norm": 0.14965787529945374, "learning_rate": 6.325817719911084e-05, "loss": 0.0652, "step": 5790 }, { "epoch": 11.029006181645268, "grad_norm": 0.22783611714839935, "learning_rate": 6.325182597650047e-05, "loss": 0.0545, "step": 5791 }, { "epoch": 11.03090822634332, "grad_norm": 0.34645095467567444, "learning_rate": 6.324547475389013e-05, "loss": 0.0643, "step": 5792 }, { "epoch": 11.03281027104137, "grad_norm": 0.18645666539669037, "learning_rate": 6.323912353127978e-05, "loss": 0.0603, "step": 5793 }, { "epoch": 11.03471231573942, "grad_norm": 0.13025419414043427, "learning_rate": 6.323277230866942e-05, "loss": 0.0434, "step": 5794 }, { "epoch": 11.03661436043747, "grad_norm": 0.24453479051589966, "learning_rate": 6.322642108605907e-05, "loss": 0.0606, "step": 5795 }, { "epoch": 11.038516405135521, "grad_norm": 0.29672566056251526, "learning_rate": 6.322006986344872e-05, "loss": 0.0697, "step": 5796 }, { "epoch": 11.04041844983357, "grad_norm": 0.2354295700788498, "learning_rate": 6.321371864083836e-05, "loss": 0.061, "step": 5797 }, { "epoch": 11.042320494531621, "grad_norm": 0.2746586799621582, "learning_rate": 6.320736741822801e-05, "loss": 0.0486, "step": 5798 }, { "epoch": 11.044222539229672, "grad_norm": 0.5878947973251343, "learning_rate": 6.320101619561766e-05, "loss": 0.1112, "step": 5799 }, { "epoch": 11.046124583927723, "grad_norm": 0.19655099511146545, "learning_rate": 6.319466497300731e-05, "loss": 0.0582, "step": 5800 }, { "epoch": 11.048026628625772, "grad_norm": 0.17464180290699005, "learning_rate": 6.318831375039695e-05, "loss": 0.0595, "step": 5801 }, { "epoch": 11.049928673323823, "grad_norm": 0.31295058131217957, "learning_rate": 6.31819625277866e-05, "loss": 0.0695, "step": 5802 }, { "epoch": 11.051830718021874, "grad_norm": 0.18527528643608093, "learning_rate": 6.317561130517626e-05, "loss": 0.0813, "step": 5803 }, { "epoch": 11.053732762719925, "grad_norm": 0.24729035794734955, "learning_rate": 6.31692600825659e-05, "loss": 0.053, "step": 5804 }, { "epoch": 11.055634807417974, "grad_norm": 0.24672453105449677, "learning_rate": 6.316290885995555e-05, "loss": 0.0678, "step": 5805 }, { "epoch": 11.057536852116025, "grad_norm": 0.3050698935985565, "learning_rate": 6.31565576373452e-05, "loss": 0.0641, "step": 5806 }, { "epoch": 11.059438896814076, "grad_norm": 0.20497417449951172, "learning_rate": 6.315020641473484e-05, "loss": 0.0874, "step": 5807 }, { "epoch": 11.061340941512125, "grad_norm": 0.2367674559354782, "learning_rate": 6.314385519212449e-05, "loss": 0.0716, "step": 5808 }, { "epoch": 11.063242986210176, "grad_norm": 0.315449595451355, "learning_rate": 6.313750396951414e-05, "loss": 0.0681, "step": 5809 }, { "epoch": 11.065145030908226, "grad_norm": 0.19548985362052917, "learning_rate": 6.313115274690378e-05, "loss": 0.0749, "step": 5810 }, { "epoch": 11.067047075606277, "grad_norm": 0.2502439320087433, "learning_rate": 6.312480152429343e-05, "loss": 0.0923, "step": 5811 }, { "epoch": 11.068949120304326, "grad_norm": 0.27582550048828125, "learning_rate": 6.311845030168307e-05, "loss": 0.0694, "step": 5812 }, { "epoch": 11.070851165002377, "grad_norm": 0.19255176186561584, "learning_rate": 6.311209907907273e-05, "loss": 0.0664, "step": 5813 }, { "epoch": 11.072753209700428, "grad_norm": 0.316190630197525, "learning_rate": 6.310574785646237e-05, "loss": 0.1079, "step": 5814 }, { "epoch": 11.074655254398479, "grad_norm": 0.20605596899986267, "learning_rate": 6.309939663385201e-05, "loss": 0.0579, "step": 5815 }, { "epoch": 11.076557299096528, "grad_norm": 0.23222453892230988, "learning_rate": 6.309304541124168e-05, "loss": 0.051, "step": 5816 }, { "epoch": 11.078459343794579, "grad_norm": 0.24020570516586304, "learning_rate": 6.308669418863131e-05, "loss": 0.0639, "step": 5817 }, { "epoch": 11.08036138849263, "grad_norm": 0.25853022933006287, "learning_rate": 6.308034296602097e-05, "loss": 0.0623, "step": 5818 }, { "epoch": 11.08226343319068, "grad_norm": 0.20045647025108337, "learning_rate": 6.30739917434106e-05, "loss": 0.0501, "step": 5819 }, { "epoch": 11.08416547788873, "grad_norm": 0.22093622386455536, "learning_rate": 6.306764052080026e-05, "loss": 0.0672, "step": 5820 }, { "epoch": 11.08606752258678, "grad_norm": 0.21147139370441437, "learning_rate": 6.306128929818991e-05, "loss": 0.0595, "step": 5821 }, { "epoch": 11.087969567284832, "grad_norm": 0.19494277238845825, "learning_rate": 6.305493807557955e-05, "loss": 0.0687, "step": 5822 }, { "epoch": 11.08987161198288, "grad_norm": 0.2994452118873596, "learning_rate": 6.304858685296921e-05, "loss": 0.0588, "step": 5823 }, { "epoch": 11.091773656680932, "grad_norm": 0.3022996485233307, "learning_rate": 6.304223563035885e-05, "loss": 0.0742, "step": 5824 }, { "epoch": 11.093675701378983, "grad_norm": 0.1606903374195099, "learning_rate": 6.303588440774849e-05, "loss": 0.0511, "step": 5825 }, { "epoch": 11.095577746077034, "grad_norm": 0.2217331975698471, "learning_rate": 6.302953318513814e-05, "loss": 0.0645, "step": 5826 }, { "epoch": 11.097479790775083, "grad_norm": 0.18725329637527466, "learning_rate": 6.302318196252779e-05, "loss": 0.066, "step": 5827 }, { "epoch": 11.099381835473134, "grad_norm": 0.17725598812103271, "learning_rate": 6.301683073991743e-05, "loss": 0.0502, "step": 5828 }, { "epoch": 11.101283880171184, "grad_norm": 0.1918100267648697, "learning_rate": 6.301047951730708e-05, "loss": 0.0616, "step": 5829 }, { "epoch": 11.103185924869235, "grad_norm": 0.2146424800157547, "learning_rate": 6.300412829469673e-05, "loss": 0.0629, "step": 5830 }, { "epoch": 11.105087969567284, "grad_norm": 0.20890381932258606, "learning_rate": 6.299777707208639e-05, "loss": 0.0567, "step": 5831 }, { "epoch": 11.106990014265335, "grad_norm": 0.19044464826583862, "learning_rate": 6.299142584947602e-05, "loss": 0.0497, "step": 5832 }, { "epoch": 11.108892058963386, "grad_norm": 0.2516787350177765, "learning_rate": 6.298507462686568e-05, "loss": 0.0568, "step": 5833 }, { "epoch": 11.110794103661435, "grad_norm": 0.23537124693393707, "learning_rate": 6.297872340425533e-05, "loss": 0.0664, "step": 5834 }, { "epoch": 11.112696148359486, "grad_norm": 0.27227261662483215, "learning_rate": 6.297237218164497e-05, "loss": 0.0628, "step": 5835 }, { "epoch": 11.114598193057537, "grad_norm": 0.24858912825584412, "learning_rate": 6.296602095903462e-05, "loss": 0.0837, "step": 5836 }, { "epoch": 11.116500237755588, "grad_norm": 0.23548686504364014, "learning_rate": 6.295966973642427e-05, "loss": 0.0812, "step": 5837 }, { "epoch": 11.118402282453637, "grad_norm": 0.2094622701406479, "learning_rate": 6.295331851381391e-05, "loss": 0.0523, "step": 5838 }, { "epoch": 11.120304327151688, "grad_norm": 0.2200409173965454, "learning_rate": 6.294696729120356e-05, "loss": 0.0663, "step": 5839 }, { "epoch": 11.122206371849739, "grad_norm": 0.2630188465118408, "learning_rate": 6.294061606859321e-05, "loss": 0.0799, "step": 5840 }, { "epoch": 11.12410841654779, "grad_norm": 0.17814132571220398, "learning_rate": 6.293426484598286e-05, "loss": 0.0582, "step": 5841 }, { "epoch": 11.126010461245839, "grad_norm": 0.18654607236385345, "learning_rate": 6.29279136233725e-05, "loss": 0.0812, "step": 5842 }, { "epoch": 11.12791250594389, "grad_norm": 0.2401699423789978, "learning_rate": 6.292156240076214e-05, "loss": 0.0606, "step": 5843 }, { "epoch": 11.12981455064194, "grad_norm": 0.15061615407466888, "learning_rate": 6.29152111781518e-05, "loss": 0.0574, "step": 5844 }, { "epoch": 11.13171659533999, "grad_norm": 0.096590057015419, "learning_rate": 6.290885995554144e-05, "loss": 0.0534, "step": 5845 }, { "epoch": 11.13361864003804, "grad_norm": 0.13583779335021973, "learning_rate": 6.290250873293108e-05, "loss": 0.0677, "step": 5846 }, { "epoch": 11.135520684736091, "grad_norm": 0.2538928687572479, "learning_rate": 6.289615751032075e-05, "loss": 0.0754, "step": 5847 }, { "epoch": 11.137422729434142, "grad_norm": 0.19595909118652344, "learning_rate": 6.288980628771039e-05, "loss": 0.0668, "step": 5848 }, { "epoch": 11.139324774132191, "grad_norm": 0.2578146159648895, "learning_rate": 6.288345506510004e-05, "loss": 0.0577, "step": 5849 }, { "epoch": 11.141226818830242, "grad_norm": 0.13636615872383118, "learning_rate": 6.287710384248968e-05, "loss": 0.0453, "step": 5850 }, { "epoch": 11.143128863528293, "grad_norm": 0.26093125343322754, "learning_rate": 6.287075261987933e-05, "loss": 0.0587, "step": 5851 }, { "epoch": 11.145030908226344, "grad_norm": 0.356971800327301, "learning_rate": 6.286440139726898e-05, "loss": 0.0575, "step": 5852 }, { "epoch": 11.146932952924393, "grad_norm": 0.25038501620292664, "learning_rate": 6.285805017465862e-05, "loss": 0.067, "step": 5853 }, { "epoch": 11.148834997622444, "grad_norm": 0.19863051176071167, "learning_rate": 6.285169895204828e-05, "loss": 0.0696, "step": 5854 }, { "epoch": 11.150737042320495, "grad_norm": 0.183120995759964, "learning_rate": 6.284534772943792e-05, "loss": 0.0579, "step": 5855 }, { "epoch": 11.152639087018544, "grad_norm": 0.16368988156318665, "learning_rate": 6.283899650682756e-05, "loss": 0.0694, "step": 5856 }, { "epoch": 11.154541131716595, "grad_norm": 0.4161554276943207, "learning_rate": 6.283264528421721e-05, "loss": 0.0811, "step": 5857 }, { "epoch": 11.156443176414646, "grad_norm": 0.2229108363389969, "learning_rate": 6.282629406160686e-05, "loss": 0.0694, "step": 5858 }, { "epoch": 11.158345221112697, "grad_norm": 0.39521241188049316, "learning_rate": 6.281994283899652e-05, "loss": 0.0755, "step": 5859 }, { "epoch": 11.160247265810746, "grad_norm": 0.17866304516792297, "learning_rate": 6.281359161638615e-05, "loss": 0.0602, "step": 5860 }, { "epoch": 11.162149310508797, "grad_norm": 0.21596479415893555, "learning_rate": 6.28072403937758e-05, "loss": 0.0337, "step": 5861 }, { "epoch": 11.164051355206848, "grad_norm": 0.21442416310310364, "learning_rate": 6.280088917116546e-05, "loss": 0.0639, "step": 5862 }, { "epoch": 11.165953399904899, "grad_norm": 0.24515525996685028, "learning_rate": 6.27945379485551e-05, "loss": 0.0851, "step": 5863 }, { "epoch": 11.167855444602948, "grad_norm": 0.3923701047897339, "learning_rate": 6.278818672594475e-05, "loss": 0.0787, "step": 5864 }, { "epoch": 11.169757489300999, "grad_norm": 0.13907523453235626, "learning_rate": 6.27818355033344e-05, "loss": 0.0994, "step": 5865 }, { "epoch": 11.17165953399905, "grad_norm": 0.22324611246585846, "learning_rate": 6.277548428072404e-05, "loss": 0.0684, "step": 5866 }, { "epoch": 11.173561578697099, "grad_norm": 0.20631495118141174, "learning_rate": 6.276913305811369e-05, "loss": 0.0605, "step": 5867 }, { "epoch": 11.17546362339515, "grad_norm": 0.15408509969711304, "learning_rate": 6.276278183550334e-05, "loss": 0.057, "step": 5868 }, { "epoch": 11.1773656680932, "grad_norm": 0.2680682837963104, "learning_rate": 6.275643061289298e-05, "loss": 0.0606, "step": 5869 }, { "epoch": 11.179267712791251, "grad_norm": 0.22061650454998016, "learning_rate": 6.275007939028263e-05, "loss": 0.0615, "step": 5870 }, { "epoch": 11.1811697574893, "grad_norm": 0.23415584862232208, "learning_rate": 6.274372816767228e-05, "loss": 0.067, "step": 5871 }, { "epoch": 11.183071802187351, "grad_norm": 0.25447916984558105, "learning_rate": 6.273737694506193e-05, "loss": 0.0595, "step": 5872 }, { "epoch": 11.184973846885402, "grad_norm": 0.21616342663764954, "learning_rate": 6.273102572245157e-05, "loss": 0.0573, "step": 5873 }, { "epoch": 11.186875891583453, "grad_norm": 0.2780592739582062, "learning_rate": 6.272467449984122e-05, "loss": 0.1027, "step": 5874 }, { "epoch": 11.188777936281502, "grad_norm": 0.1951284557580948, "learning_rate": 6.271832327723088e-05, "loss": 0.0686, "step": 5875 }, { "epoch": 11.190679980979553, "grad_norm": 0.25119706988334656, "learning_rate": 6.271197205462052e-05, "loss": 0.0727, "step": 5876 }, { "epoch": 11.192582025677604, "grad_norm": 0.17263376712799072, "learning_rate": 6.270562083201017e-05, "loss": 0.0717, "step": 5877 }, { "epoch": 11.194484070375653, "grad_norm": 0.24591860175132751, "learning_rate": 6.269926960939982e-05, "loss": 0.0794, "step": 5878 }, { "epoch": 11.196386115073704, "grad_norm": 0.21584731340408325, "learning_rate": 6.269291838678946e-05, "loss": 0.0759, "step": 5879 }, { "epoch": 11.198288159771755, "grad_norm": 0.20340055227279663, "learning_rate": 6.268656716417911e-05, "loss": 0.0579, "step": 5880 }, { "epoch": 11.200190204469806, "grad_norm": 0.23323297500610352, "learning_rate": 6.268021594156876e-05, "loss": 0.0683, "step": 5881 }, { "epoch": 11.202092249167855, "grad_norm": 0.26051902770996094, "learning_rate": 6.26738647189584e-05, "loss": 0.0615, "step": 5882 }, { "epoch": 11.203994293865906, "grad_norm": 0.1682775467634201, "learning_rate": 6.266751349634805e-05, "loss": 0.0721, "step": 5883 }, { "epoch": 11.205896338563957, "grad_norm": 0.13243019580841064, "learning_rate": 6.266116227373769e-05, "loss": 0.0512, "step": 5884 }, { "epoch": 11.207798383262007, "grad_norm": 0.18601296842098236, "learning_rate": 6.265481105112735e-05, "loss": 0.053, "step": 5885 }, { "epoch": 11.209700427960057, "grad_norm": 0.14149025082588196, "learning_rate": 6.264845982851699e-05, "loss": 0.0629, "step": 5886 }, { "epoch": 11.211602472658107, "grad_norm": 0.12751762568950653, "learning_rate": 6.264210860590663e-05, "loss": 0.0582, "step": 5887 }, { "epoch": 11.213504517356158, "grad_norm": 0.20913732051849365, "learning_rate": 6.263575738329628e-05, "loss": 0.0564, "step": 5888 }, { "epoch": 11.21540656205421, "grad_norm": 0.2129901647567749, "learning_rate": 6.262940616068593e-05, "loss": 0.0887, "step": 5889 }, { "epoch": 11.217308606752258, "grad_norm": 0.2636778652667999, "learning_rate": 6.262305493807559e-05, "loss": 0.0714, "step": 5890 }, { "epoch": 11.21921065145031, "grad_norm": 0.20317071676254272, "learning_rate": 6.261670371546522e-05, "loss": 0.0539, "step": 5891 }, { "epoch": 11.22111269614836, "grad_norm": 0.13354192674160004, "learning_rate": 6.261035249285488e-05, "loss": 0.0637, "step": 5892 }, { "epoch": 11.22301474084641, "grad_norm": 0.17662078142166138, "learning_rate": 6.260400127024453e-05, "loss": 0.0594, "step": 5893 }, { "epoch": 11.22491678554446, "grad_norm": 0.22467391192913055, "learning_rate": 6.259765004763417e-05, "loss": 0.0635, "step": 5894 }, { "epoch": 11.226818830242511, "grad_norm": 0.23158535361289978, "learning_rate": 6.259129882502382e-05, "loss": 0.0479, "step": 5895 }, { "epoch": 11.228720874940562, "grad_norm": 0.2320852428674698, "learning_rate": 6.258494760241347e-05, "loss": 0.0787, "step": 5896 }, { "epoch": 11.230622919638611, "grad_norm": 0.26288601756095886, "learning_rate": 6.257859637980311e-05, "loss": 0.0807, "step": 5897 }, { "epoch": 11.232524964336662, "grad_norm": 0.15741541981697083, "learning_rate": 6.257224515719276e-05, "loss": 0.0616, "step": 5898 }, { "epoch": 11.234427009034713, "grad_norm": 0.17461591958999634, "learning_rate": 6.256589393458241e-05, "loss": 0.0524, "step": 5899 }, { "epoch": 11.236329053732764, "grad_norm": 0.2302362471818924, "learning_rate": 6.255954271197205e-05, "loss": 0.0652, "step": 5900 }, { "epoch": 11.238231098430813, "grad_norm": 0.18687783181667328, "learning_rate": 6.25531914893617e-05, "loss": 0.0526, "step": 5901 }, { "epoch": 11.240133143128864, "grad_norm": 0.22332634031772614, "learning_rate": 6.254684026675135e-05, "loss": 0.0668, "step": 5902 }, { "epoch": 11.242035187826914, "grad_norm": 0.2076207846403122, "learning_rate": 6.2540489044141e-05, "loss": 0.0583, "step": 5903 }, { "epoch": 11.243937232524964, "grad_norm": 0.2083384096622467, "learning_rate": 6.253413782153064e-05, "loss": 0.0623, "step": 5904 }, { "epoch": 11.245839277223014, "grad_norm": 0.28720328211784363, "learning_rate": 6.25277865989203e-05, "loss": 0.0853, "step": 5905 }, { "epoch": 11.247741321921065, "grad_norm": 0.24125772714614868, "learning_rate": 6.252143537630995e-05, "loss": 0.0643, "step": 5906 }, { "epoch": 11.249643366619116, "grad_norm": 0.13032442331314087, "learning_rate": 6.251508415369959e-05, "loss": 0.0647, "step": 5907 }, { "epoch": 11.251545411317165, "grad_norm": 0.25721585750579834, "learning_rate": 6.250873293108924e-05, "loss": 0.0678, "step": 5908 }, { "epoch": 11.253447456015216, "grad_norm": 0.28258436918258667, "learning_rate": 6.250238170847889e-05, "loss": 0.0573, "step": 5909 }, { "epoch": 11.255349500713267, "grad_norm": 0.20877690613269806, "learning_rate": 6.249603048586853e-05, "loss": 0.0682, "step": 5910 }, { "epoch": 11.257251545411318, "grad_norm": 0.2083483189344406, "learning_rate": 6.248967926325818e-05, "loss": 0.068, "step": 5911 }, { "epoch": 11.259153590109367, "grad_norm": 0.1268092691898346, "learning_rate": 6.248332804064783e-05, "loss": 0.0624, "step": 5912 }, { "epoch": 11.261055634807418, "grad_norm": 0.18287968635559082, "learning_rate": 6.247697681803748e-05, "loss": 0.0581, "step": 5913 }, { "epoch": 11.262957679505469, "grad_norm": 0.27067553997039795, "learning_rate": 6.247062559542712e-05, "loss": 0.0549, "step": 5914 }, { "epoch": 11.264859724203518, "grad_norm": 0.23166772723197937, "learning_rate": 6.246427437281676e-05, "loss": 0.0617, "step": 5915 }, { "epoch": 11.266761768901569, "grad_norm": 0.2581332325935364, "learning_rate": 6.245792315020643e-05, "loss": 0.0632, "step": 5916 }, { "epoch": 11.26866381359962, "grad_norm": 0.14740905165672302, "learning_rate": 6.245157192759606e-05, "loss": 0.0505, "step": 5917 }, { "epoch": 11.27056585829767, "grad_norm": 0.2767736613750458, "learning_rate": 6.24452207049857e-05, "loss": 0.0766, "step": 5918 }, { "epoch": 11.27246790299572, "grad_norm": 0.2171592116355896, "learning_rate": 6.243886948237537e-05, "loss": 0.0722, "step": 5919 }, { "epoch": 11.27436994769377, "grad_norm": 0.2533954381942749, "learning_rate": 6.2432518259765e-05, "loss": 0.0748, "step": 5920 }, { "epoch": 11.276271992391822, "grad_norm": 0.30213838815689087, "learning_rate": 6.242616703715466e-05, "loss": 0.0741, "step": 5921 }, { "epoch": 11.278174037089872, "grad_norm": 0.2629832327365875, "learning_rate": 6.24198158145443e-05, "loss": 0.0613, "step": 5922 }, { "epoch": 11.280076081787922, "grad_norm": 0.16284258663654327, "learning_rate": 6.241346459193395e-05, "loss": 0.0562, "step": 5923 }, { "epoch": 11.281978126485972, "grad_norm": 0.23348303139209747, "learning_rate": 6.24071133693236e-05, "loss": 0.0698, "step": 5924 }, { "epoch": 11.283880171184023, "grad_norm": 0.2706508934497833, "learning_rate": 6.240076214671324e-05, "loss": 0.0806, "step": 5925 }, { "epoch": 11.285782215882072, "grad_norm": 0.14358559250831604, "learning_rate": 6.23944109241029e-05, "loss": 0.0485, "step": 5926 }, { "epoch": 11.287684260580123, "grad_norm": 0.19689619541168213, "learning_rate": 6.238805970149254e-05, "loss": 0.0785, "step": 5927 }, { "epoch": 11.289586305278174, "grad_norm": 0.2577039301395416, "learning_rate": 6.238170847888218e-05, "loss": 0.1031, "step": 5928 }, { "epoch": 11.291488349976225, "grad_norm": 0.18427234888076782, "learning_rate": 6.237535725627183e-05, "loss": 0.0651, "step": 5929 }, { "epoch": 11.293390394674274, "grad_norm": 0.2167913019657135, "learning_rate": 6.236900603366148e-05, "loss": 0.0556, "step": 5930 }, { "epoch": 11.295292439372325, "grad_norm": 0.1679394543170929, "learning_rate": 6.236265481105114e-05, "loss": 0.0697, "step": 5931 }, { "epoch": 11.297194484070376, "grad_norm": 0.15036652982234955, "learning_rate": 6.235630358844077e-05, "loss": 0.0572, "step": 5932 }, { "epoch": 11.299096528768427, "grad_norm": 0.285196989774704, "learning_rate": 6.234995236583043e-05, "loss": 0.0848, "step": 5933 }, { "epoch": 11.300998573466476, "grad_norm": 0.17883095145225525, "learning_rate": 6.234360114322008e-05, "loss": 0.0508, "step": 5934 }, { "epoch": 11.302900618164527, "grad_norm": 0.21533527970314026, "learning_rate": 6.233724992060972e-05, "loss": 0.0493, "step": 5935 }, { "epoch": 11.304802662862578, "grad_norm": 0.19913125038146973, "learning_rate": 6.233089869799937e-05, "loss": 0.0754, "step": 5936 }, { "epoch": 11.306704707560627, "grad_norm": 0.20028865337371826, "learning_rate": 6.232454747538902e-05, "loss": 0.0516, "step": 5937 }, { "epoch": 11.308606752258678, "grad_norm": 0.22111880779266357, "learning_rate": 6.231819625277866e-05, "loss": 0.0631, "step": 5938 }, { "epoch": 11.310508796956729, "grad_norm": 0.24731788039207458, "learning_rate": 6.231184503016831e-05, "loss": 0.0715, "step": 5939 }, { "epoch": 11.31241084165478, "grad_norm": 0.13716444373130798, "learning_rate": 6.230549380755796e-05, "loss": 0.0511, "step": 5940 }, { "epoch": 11.314312886352829, "grad_norm": 0.19190862774848938, "learning_rate": 6.22991425849476e-05, "loss": 0.0606, "step": 5941 }, { "epoch": 11.31621493105088, "grad_norm": 0.2019043266773224, "learning_rate": 6.229279136233725e-05, "loss": 0.0483, "step": 5942 }, { "epoch": 11.31811697574893, "grad_norm": 0.17971554398536682, "learning_rate": 6.22864401397269e-05, "loss": 0.0536, "step": 5943 }, { "epoch": 11.320019020446981, "grad_norm": 0.19725970923900604, "learning_rate": 6.228008891711656e-05, "loss": 0.0824, "step": 5944 }, { "epoch": 11.32192106514503, "grad_norm": 0.3006778955459595, "learning_rate": 6.22737376945062e-05, "loss": 0.0767, "step": 5945 }, { "epoch": 11.323823109843081, "grad_norm": 0.1861002892255783, "learning_rate": 6.226738647189583e-05, "loss": 0.0626, "step": 5946 }, { "epoch": 11.325725154541132, "grad_norm": 0.22243209183216095, "learning_rate": 6.22610352492855e-05, "loss": 0.0664, "step": 5947 }, { "epoch": 11.327627199239181, "grad_norm": 0.20367178320884705, "learning_rate": 6.225468402667514e-05, "loss": 0.0541, "step": 5948 }, { "epoch": 11.329529243937232, "grad_norm": 0.23790258169174194, "learning_rate": 6.224833280406479e-05, "loss": 0.0684, "step": 5949 }, { "epoch": 11.331431288635283, "grad_norm": 0.2381439507007599, "learning_rate": 6.224198158145444e-05, "loss": 0.0833, "step": 5950 }, { "epoch": 11.333333333333334, "grad_norm": 0.2214234173297882, "learning_rate": 6.223563035884408e-05, "loss": 0.0623, "step": 5951 }, { "epoch": 11.335235378031383, "grad_norm": 0.24603192508220673, "learning_rate": 6.222927913623373e-05, "loss": 0.0916, "step": 5952 }, { "epoch": 11.337137422729434, "grad_norm": 0.23510603606700897, "learning_rate": 6.222292791362337e-05, "loss": 0.0894, "step": 5953 }, { "epoch": 11.339039467427485, "grad_norm": 0.2977652847766876, "learning_rate": 6.221657669101302e-05, "loss": 0.0824, "step": 5954 }, { "epoch": 11.340941512125536, "grad_norm": 0.15970540046691895, "learning_rate": 6.221022546840267e-05, "loss": 0.0599, "step": 5955 }, { "epoch": 11.342843556823585, "grad_norm": 0.12921825051307678, "learning_rate": 6.220387424579231e-05, "loss": 0.0651, "step": 5956 }, { "epoch": 11.344745601521636, "grad_norm": 0.16822974383831024, "learning_rate": 6.219752302318198e-05, "loss": 0.0669, "step": 5957 }, { "epoch": 11.346647646219687, "grad_norm": 0.1776653677225113, "learning_rate": 6.219117180057161e-05, "loss": 0.0584, "step": 5958 }, { "epoch": 11.348549690917736, "grad_norm": 0.19017675518989563, "learning_rate": 6.218482057796125e-05, "loss": 0.0642, "step": 5959 }, { "epoch": 11.350451735615787, "grad_norm": 0.2337440699338913, "learning_rate": 6.21784693553509e-05, "loss": 0.0648, "step": 5960 }, { "epoch": 11.352353780313837, "grad_norm": 0.2026948630809784, "learning_rate": 6.217211813274056e-05, "loss": 0.0603, "step": 5961 }, { "epoch": 11.354255825011888, "grad_norm": 0.3038780391216278, "learning_rate": 6.216576691013021e-05, "loss": 0.2317, "step": 5962 }, { "epoch": 11.356157869709937, "grad_norm": 0.2820383906364441, "learning_rate": 6.215941568751985e-05, "loss": 0.1064, "step": 5963 }, { "epoch": 11.358059914407988, "grad_norm": 0.361411452293396, "learning_rate": 6.21530644649095e-05, "loss": 0.1318, "step": 5964 }, { "epoch": 11.35996195910604, "grad_norm": 0.17481759190559387, "learning_rate": 6.214671324229915e-05, "loss": 0.0419, "step": 5965 }, { "epoch": 11.36186400380409, "grad_norm": 0.3438234329223633, "learning_rate": 6.214036201968879e-05, "loss": 0.0683, "step": 5966 }, { "epoch": 11.36376604850214, "grad_norm": 0.2617831826210022, "learning_rate": 6.213401079707844e-05, "loss": 0.0683, "step": 5967 }, { "epoch": 11.36566809320019, "grad_norm": 0.21333356201648712, "learning_rate": 6.212765957446809e-05, "loss": 0.0591, "step": 5968 }, { "epoch": 11.367570137898241, "grad_norm": 0.16227330267429352, "learning_rate": 6.212130835185773e-05, "loss": 0.061, "step": 5969 }, { "epoch": 11.36947218259629, "grad_norm": 0.13485564291477203, "learning_rate": 6.211495712924738e-05, "loss": 0.0865, "step": 5970 }, { "epoch": 11.371374227294341, "grad_norm": 0.16355431079864502, "learning_rate": 6.210860590663703e-05, "loss": 0.1023, "step": 5971 }, { "epoch": 11.373276271992392, "grad_norm": 0.20975372195243835, "learning_rate": 6.210225468402667e-05, "loss": 0.061, "step": 5972 }, { "epoch": 11.375178316690443, "grad_norm": 0.22960126399993896, "learning_rate": 6.209590346141632e-05, "loss": 0.0706, "step": 5973 }, { "epoch": 11.377080361388492, "grad_norm": 0.19620640575885773, "learning_rate": 6.208955223880598e-05, "loss": 0.0595, "step": 5974 }, { "epoch": 11.378982406086543, "grad_norm": 0.17666909098625183, "learning_rate": 6.208320101619563e-05, "loss": 0.0581, "step": 5975 }, { "epoch": 11.380884450784594, "grad_norm": 0.18484234809875488, "learning_rate": 6.207684979358527e-05, "loss": 0.0614, "step": 5976 }, { "epoch": 11.382786495482645, "grad_norm": 0.18495036661624908, "learning_rate": 6.207049857097492e-05, "loss": 0.061, "step": 5977 }, { "epoch": 11.384688540180694, "grad_norm": 0.19153162837028503, "learning_rate": 6.206414734836457e-05, "loss": 0.0624, "step": 5978 }, { "epoch": 11.386590584878745, "grad_norm": 0.20312979817390442, "learning_rate": 6.205779612575421e-05, "loss": 0.0728, "step": 5979 }, { "epoch": 11.388492629576795, "grad_norm": 0.19322583079338074, "learning_rate": 6.205144490314386e-05, "loss": 0.0545, "step": 5980 }, { "epoch": 11.390394674274846, "grad_norm": 0.16400569677352905, "learning_rate": 6.204509368053351e-05, "loss": 0.0508, "step": 5981 }, { "epoch": 11.392296718972895, "grad_norm": 0.19000458717346191, "learning_rate": 6.203874245792315e-05, "loss": 0.0747, "step": 5982 }, { "epoch": 11.394198763670946, "grad_norm": 0.2547617554664612, "learning_rate": 6.20323912353128e-05, "loss": 0.0712, "step": 5983 }, { "epoch": 11.396100808368997, "grad_norm": 0.23105841875076294, "learning_rate": 6.202604001270245e-05, "loss": 0.0629, "step": 5984 }, { "epoch": 11.398002853067046, "grad_norm": 0.2565494477748871, "learning_rate": 6.20196887900921e-05, "loss": 0.0691, "step": 5985 }, { "epoch": 11.399904897765097, "grad_norm": 0.35790374875068665, "learning_rate": 6.201333756748174e-05, "loss": 0.1012, "step": 5986 }, { "epoch": 11.401806942463148, "grad_norm": 0.21136760711669922, "learning_rate": 6.200698634487138e-05, "loss": 0.0689, "step": 5987 }, { "epoch": 11.403708987161199, "grad_norm": 0.3775070011615753, "learning_rate": 6.200063512226105e-05, "loss": 0.0807, "step": 5988 }, { "epoch": 11.405611031859248, "grad_norm": 0.1303168386220932, "learning_rate": 6.199428389965069e-05, "loss": 0.0422, "step": 5989 }, { "epoch": 11.407513076557299, "grad_norm": 0.233234703540802, "learning_rate": 6.198793267704032e-05, "loss": 0.0829, "step": 5990 }, { "epoch": 11.40941512125535, "grad_norm": 0.18227219581604004, "learning_rate": 6.198158145442999e-05, "loss": 0.0711, "step": 5991 }, { "epoch": 11.4113171659534, "grad_norm": 0.18231059610843658, "learning_rate": 6.197523023181963e-05, "loss": 0.0441, "step": 5992 }, { "epoch": 11.41321921065145, "grad_norm": 0.2130158692598343, "learning_rate": 6.196887900920928e-05, "loss": 0.083, "step": 5993 }, { "epoch": 11.4151212553495, "grad_norm": 0.18165136873722076, "learning_rate": 6.196252778659892e-05, "loss": 0.0659, "step": 5994 }, { "epoch": 11.417023300047552, "grad_norm": 0.1659289002418518, "learning_rate": 6.195617656398857e-05, "loss": 0.0444, "step": 5995 }, { "epoch": 11.4189253447456, "grad_norm": 0.2109125852584839, "learning_rate": 6.194982534137822e-05, "loss": 0.0707, "step": 5996 }, { "epoch": 11.420827389443652, "grad_norm": 0.24702899158000946, "learning_rate": 6.194347411876786e-05, "loss": 0.0764, "step": 5997 }, { "epoch": 11.422729434141702, "grad_norm": 0.26356109976768494, "learning_rate": 6.193712289615751e-05, "loss": 0.0847, "step": 5998 }, { "epoch": 11.424631478839753, "grad_norm": 0.2942047119140625, "learning_rate": 6.193077167354716e-05, "loss": 0.069, "step": 5999 }, { "epoch": 11.426533523537802, "grad_norm": 0.19051453471183777, "learning_rate": 6.19244204509368e-05, "loss": 0.0714, "step": 6000 }, { "epoch": 11.428435568235853, "grad_norm": 0.24365760385990143, "learning_rate": 6.191806922832645e-05, "loss": 0.0772, "step": 6001 }, { "epoch": 11.430337612933904, "grad_norm": 0.19398783147335052, "learning_rate": 6.19117180057161e-05, "loss": 0.0523, "step": 6002 }, { "epoch": 11.432239657631955, "grad_norm": 0.23657700419425964, "learning_rate": 6.190536678310576e-05, "loss": 0.0653, "step": 6003 }, { "epoch": 11.434141702330004, "grad_norm": 0.21983860433101654, "learning_rate": 6.18990155604954e-05, "loss": 0.0706, "step": 6004 }, { "epoch": 11.436043747028055, "grad_norm": 0.17263579368591309, "learning_rate": 6.189266433788505e-05, "loss": 0.0641, "step": 6005 }, { "epoch": 11.437945791726106, "grad_norm": 0.45329514145851135, "learning_rate": 6.18863131152747e-05, "loss": 0.0845, "step": 6006 }, { "epoch": 11.439847836424155, "grad_norm": 0.2627212703227997, "learning_rate": 6.187996189266434e-05, "loss": 0.0679, "step": 6007 }, { "epoch": 11.441749881122206, "grad_norm": 0.2166578322649002, "learning_rate": 6.187361067005399e-05, "loss": 0.0726, "step": 6008 }, { "epoch": 11.443651925820257, "grad_norm": 0.23665811121463776, "learning_rate": 6.186725944744364e-05, "loss": 0.055, "step": 6009 }, { "epoch": 11.445553970518308, "grad_norm": 0.19395120441913605, "learning_rate": 6.186090822483328e-05, "loss": 0.0775, "step": 6010 }, { "epoch": 11.447456015216357, "grad_norm": 0.2175108790397644, "learning_rate": 6.185455700222293e-05, "loss": 0.061, "step": 6011 }, { "epoch": 11.449358059914408, "grad_norm": 0.25349605083465576, "learning_rate": 6.184820577961258e-05, "loss": 0.0624, "step": 6012 }, { "epoch": 11.451260104612459, "grad_norm": 0.22870026528835297, "learning_rate": 6.184185455700222e-05, "loss": 0.0701, "step": 6013 }, { "epoch": 11.45316214931051, "grad_norm": 0.2290843427181244, "learning_rate": 6.183550333439187e-05, "loss": 0.0703, "step": 6014 }, { "epoch": 11.455064194008559, "grad_norm": 0.2366465926170349, "learning_rate": 6.182915211178152e-05, "loss": 0.0896, "step": 6015 }, { "epoch": 11.45696623870661, "grad_norm": 0.18010316789150238, "learning_rate": 6.182280088917118e-05, "loss": 0.0661, "step": 6016 }, { "epoch": 11.45886828340466, "grad_norm": 0.1060408502817154, "learning_rate": 6.181644966656081e-05, "loss": 0.0629, "step": 6017 }, { "epoch": 11.46077032810271, "grad_norm": 0.24757403135299683, "learning_rate": 6.181009844395045e-05, "loss": 0.0557, "step": 6018 }, { "epoch": 11.46267237280076, "grad_norm": 0.21169930696487427, "learning_rate": 6.180374722134012e-05, "loss": 0.0692, "step": 6019 }, { "epoch": 11.464574417498811, "grad_norm": 0.19375531375408173, "learning_rate": 6.179739599872976e-05, "loss": 0.0625, "step": 6020 }, { "epoch": 11.466476462196862, "grad_norm": 0.2409812957048416, "learning_rate": 6.179104477611941e-05, "loss": 0.0438, "step": 6021 }, { "epoch": 11.468378506894911, "grad_norm": 0.18499095737934113, "learning_rate": 6.178469355350906e-05, "loss": 0.0741, "step": 6022 }, { "epoch": 11.470280551592962, "grad_norm": 0.26087823510169983, "learning_rate": 6.17783423308987e-05, "loss": 0.1246, "step": 6023 }, { "epoch": 11.472182596291013, "grad_norm": 0.21440978348255157, "learning_rate": 6.177199110828835e-05, "loss": 0.058, "step": 6024 }, { "epoch": 11.474084640989064, "grad_norm": 0.17176677286624908, "learning_rate": 6.176563988567799e-05, "loss": 0.0616, "step": 6025 }, { "epoch": 11.475986685687113, "grad_norm": 0.3351137042045593, "learning_rate": 6.175928866306764e-05, "loss": 0.0703, "step": 6026 }, { "epoch": 11.477888730385164, "grad_norm": 0.22019176185131073, "learning_rate": 6.175293744045729e-05, "loss": 0.0614, "step": 6027 }, { "epoch": 11.479790775083215, "grad_norm": 0.2089587152004242, "learning_rate": 6.174658621784693e-05, "loss": 0.0662, "step": 6028 }, { "epoch": 11.481692819781266, "grad_norm": 0.21846778690814972, "learning_rate": 6.17402349952366e-05, "loss": 0.0718, "step": 6029 }, { "epoch": 11.483594864479315, "grad_norm": 0.235796257853508, "learning_rate": 6.173388377262623e-05, "loss": 0.0806, "step": 6030 }, { "epoch": 11.485496909177366, "grad_norm": 0.17812518775463104, "learning_rate": 6.172753255001587e-05, "loss": 0.0638, "step": 6031 }, { "epoch": 11.487398953875417, "grad_norm": 0.3472003638744354, "learning_rate": 6.172118132740552e-05, "loss": 0.0726, "step": 6032 }, { "epoch": 11.489300998573466, "grad_norm": 0.22589357197284698, "learning_rate": 6.171483010479518e-05, "loss": 0.0789, "step": 6033 }, { "epoch": 11.491203043271517, "grad_norm": 0.15773439407348633, "learning_rate": 6.170847888218483e-05, "loss": 0.0628, "step": 6034 }, { "epoch": 11.493105087969568, "grad_norm": 0.20131023228168488, "learning_rate": 6.170212765957447e-05, "loss": 0.0671, "step": 6035 }, { "epoch": 11.495007132667618, "grad_norm": 0.27972444891929626, "learning_rate": 6.169577643696412e-05, "loss": 0.0747, "step": 6036 }, { "epoch": 11.496909177365668, "grad_norm": 0.14555224776268005, "learning_rate": 6.168942521435377e-05, "loss": 0.0625, "step": 6037 }, { "epoch": 11.498811222063718, "grad_norm": 0.1408931314945221, "learning_rate": 6.168307399174341e-05, "loss": 0.0775, "step": 6038 }, { "epoch": 11.50071326676177, "grad_norm": 0.2279130518436432, "learning_rate": 6.167672276913306e-05, "loss": 0.0592, "step": 6039 }, { "epoch": 11.50261531145982, "grad_norm": 0.33479738235473633, "learning_rate": 6.167037154652271e-05, "loss": 0.0841, "step": 6040 }, { "epoch": 11.50451735615787, "grad_norm": 0.34034156799316406, "learning_rate": 6.166402032391235e-05, "loss": 0.0792, "step": 6041 }, { "epoch": 11.50641940085592, "grad_norm": 0.3499704599380493, "learning_rate": 6.1657669101302e-05, "loss": 0.0841, "step": 6042 }, { "epoch": 11.508321445553971, "grad_norm": 0.2372085303068161, "learning_rate": 6.165131787869165e-05, "loss": 0.0597, "step": 6043 }, { "epoch": 11.51022349025202, "grad_norm": 0.2807712256908417, "learning_rate": 6.164496665608129e-05, "loss": 0.0565, "step": 6044 }, { "epoch": 11.512125534950071, "grad_norm": 0.2463337928056717, "learning_rate": 6.163861543347094e-05, "loss": 0.069, "step": 6045 }, { "epoch": 11.514027579648122, "grad_norm": 0.19078849256038666, "learning_rate": 6.16322642108606e-05, "loss": 0.0503, "step": 6046 }, { "epoch": 11.515929624346173, "grad_norm": 0.2622719705104828, "learning_rate": 6.162591298825025e-05, "loss": 0.0877, "step": 6047 }, { "epoch": 11.517831669044222, "grad_norm": 0.16791850328445435, "learning_rate": 6.161956176563989e-05, "loss": 0.0764, "step": 6048 }, { "epoch": 11.519733713742273, "grad_norm": 0.15910501778125763, "learning_rate": 6.161321054302952e-05, "loss": 0.0728, "step": 6049 }, { "epoch": 11.521635758440324, "grad_norm": 0.13227106630802155, "learning_rate": 6.160685932041919e-05, "loss": 0.0574, "step": 6050 }, { "epoch": 11.523537803138375, "grad_norm": 0.20476002991199493, "learning_rate": 6.160050809780883e-05, "loss": 0.0523, "step": 6051 }, { "epoch": 11.525439847836424, "grad_norm": 0.20992450416088104, "learning_rate": 6.159415687519848e-05, "loss": 0.0625, "step": 6052 }, { "epoch": 11.527341892534475, "grad_norm": 0.19773100316524506, "learning_rate": 6.158780565258813e-05, "loss": 0.0705, "step": 6053 }, { "epoch": 11.529243937232525, "grad_norm": 0.20119865238666534, "learning_rate": 6.158145442997777e-05, "loss": 0.0512, "step": 6054 }, { "epoch": 11.531145981930575, "grad_norm": 0.1957743763923645, "learning_rate": 6.157510320736742e-05, "loss": 0.0553, "step": 6055 }, { "epoch": 11.533048026628625, "grad_norm": 0.18962952494621277, "learning_rate": 6.156875198475706e-05, "loss": 0.0679, "step": 6056 }, { "epoch": 11.534950071326676, "grad_norm": 0.29589298367500305, "learning_rate": 6.156240076214673e-05, "loss": 0.0769, "step": 6057 }, { "epoch": 11.536852116024727, "grad_norm": 0.22001637518405914, "learning_rate": 6.155604953953636e-05, "loss": 0.0712, "step": 6058 }, { "epoch": 11.538754160722776, "grad_norm": 0.16552984714508057, "learning_rate": 6.1549698316926e-05, "loss": 0.0724, "step": 6059 }, { "epoch": 11.540656205420827, "grad_norm": 0.27614063024520874, "learning_rate": 6.154334709431567e-05, "loss": 0.0616, "step": 6060 }, { "epoch": 11.542558250118878, "grad_norm": 0.3362590968608856, "learning_rate": 6.15369958717053e-05, "loss": 0.076, "step": 6061 }, { "epoch": 11.544460294816929, "grad_norm": 0.31461089849472046, "learning_rate": 6.153064464909494e-05, "loss": 0.0668, "step": 6062 }, { "epoch": 11.546362339514978, "grad_norm": 0.2893616557121277, "learning_rate": 6.15242934264846e-05, "loss": 0.0526, "step": 6063 }, { "epoch": 11.548264384213029, "grad_norm": 0.19768260419368744, "learning_rate": 6.151794220387425e-05, "loss": 0.0504, "step": 6064 }, { "epoch": 11.55016642891108, "grad_norm": 0.21805433928966522, "learning_rate": 6.15115909812639e-05, "loss": 0.0522, "step": 6065 }, { "epoch": 11.552068473609129, "grad_norm": 0.2028207778930664, "learning_rate": 6.150523975865354e-05, "loss": 0.0526, "step": 6066 }, { "epoch": 11.55397051830718, "grad_norm": 0.11325007677078247, "learning_rate": 6.149888853604319e-05, "loss": 0.0516, "step": 6067 }, { "epoch": 11.55587256300523, "grad_norm": 0.24469703435897827, "learning_rate": 6.149253731343284e-05, "loss": 0.059, "step": 6068 }, { "epoch": 11.557774607703282, "grad_norm": 0.21558788418769836, "learning_rate": 6.148618609082248e-05, "loss": 0.0621, "step": 6069 }, { "epoch": 11.55967665240133, "grad_norm": 0.17346946895122528, "learning_rate": 6.147983486821213e-05, "loss": 0.06, "step": 6070 }, { "epoch": 11.561578697099382, "grad_norm": 0.24504558742046356, "learning_rate": 6.147348364560178e-05, "loss": 0.0723, "step": 6071 }, { "epoch": 11.563480741797433, "grad_norm": 0.1529906839132309, "learning_rate": 6.146713242299142e-05, "loss": 0.0782, "step": 6072 }, { "epoch": 11.565382786495483, "grad_norm": 0.2217208594083786, "learning_rate": 6.146078120038107e-05, "loss": 0.0485, "step": 6073 }, { "epoch": 11.567284831193533, "grad_norm": 0.23646852374076843, "learning_rate": 6.145442997777073e-05, "loss": 0.0632, "step": 6074 }, { "epoch": 11.569186875891583, "grad_norm": 0.24585440754890442, "learning_rate": 6.144807875516038e-05, "loss": 0.0679, "step": 6075 }, { "epoch": 11.571088920589634, "grad_norm": 0.22161781787872314, "learning_rate": 6.144172753255002e-05, "loss": 0.0743, "step": 6076 }, { "epoch": 11.572990965287683, "grad_norm": 0.20924946665763855, "learning_rate": 6.143537630993967e-05, "loss": 0.0625, "step": 6077 }, { "epoch": 11.574893009985734, "grad_norm": 0.1991322785615921, "learning_rate": 6.142902508732932e-05, "loss": 0.0647, "step": 6078 }, { "epoch": 11.576795054683785, "grad_norm": 0.14237317442893982, "learning_rate": 6.142267386471896e-05, "loss": 0.059, "step": 6079 }, { "epoch": 11.578697099381836, "grad_norm": 0.17688287794589996, "learning_rate": 6.141632264210861e-05, "loss": 0.071, "step": 6080 }, { "epoch": 11.580599144079885, "grad_norm": 0.3290722966194153, "learning_rate": 6.140997141949826e-05, "loss": 0.1133, "step": 6081 }, { "epoch": 11.582501188777936, "grad_norm": 0.2535814642906189, "learning_rate": 6.14036201968879e-05, "loss": 0.0844, "step": 6082 }, { "epoch": 11.584403233475987, "grad_norm": 0.17662294209003448, "learning_rate": 6.139726897427755e-05, "loss": 0.088, "step": 6083 }, { "epoch": 11.586305278174038, "grad_norm": 0.42101120948791504, "learning_rate": 6.13909177516672e-05, "loss": 0.0983, "step": 6084 }, { "epoch": 11.588207322872087, "grad_norm": 0.15525983273983002, "learning_rate": 6.138456652905684e-05, "loss": 0.0685, "step": 6085 }, { "epoch": 11.590109367570138, "grad_norm": 0.19920846819877625, "learning_rate": 6.13782153064465e-05, "loss": 0.0664, "step": 6086 }, { "epoch": 11.592011412268189, "grad_norm": 0.17457710206508636, "learning_rate": 6.137186408383615e-05, "loss": 0.0697, "step": 6087 }, { "epoch": 11.593913456966238, "grad_norm": 0.12437298893928528, "learning_rate": 6.13655128612258e-05, "loss": 0.0557, "step": 6088 }, { "epoch": 11.595815501664289, "grad_norm": 0.25111961364746094, "learning_rate": 6.135916163861544e-05, "loss": 0.0669, "step": 6089 }, { "epoch": 11.59771754636234, "grad_norm": 0.15608292818069458, "learning_rate": 6.135281041600507e-05, "loss": 0.0836, "step": 6090 }, { "epoch": 11.59961959106039, "grad_norm": 0.24482889473438263, "learning_rate": 6.134645919339474e-05, "loss": 0.0685, "step": 6091 }, { "epoch": 11.60152163575844, "grad_norm": 0.27814391255378723, "learning_rate": 6.134010797078438e-05, "loss": 0.083, "step": 6092 }, { "epoch": 11.60342368045649, "grad_norm": 0.25812262296676636, "learning_rate": 6.133375674817403e-05, "loss": 0.0642, "step": 6093 }, { "epoch": 11.605325725154541, "grad_norm": 0.14833666384220123, "learning_rate": 6.132740552556368e-05, "loss": 0.0636, "step": 6094 }, { "epoch": 11.607227769852592, "grad_norm": 0.21950408816337585, "learning_rate": 6.132105430295332e-05, "loss": 0.052, "step": 6095 }, { "epoch": 11.609129814550641, "grad_norm": 0.2043406218290329, "learning_rate": 6.131470308034297e-05, "loss": 0.0664, "step": 6096 }, { "epoch": 11.611031859248692, "grad_norm": 0.12655547261238098, "learning_rate": 6.130835185773261e-05, "loss": 0.044, "step": 6097 }, { "epoch": 11.612933903946743, "grad_norm": 0.30291444063186646, "learning_rate": 6.130200063512226e-05, "loss": 0.0844, "step": 6098 }, { "epoch": 11.614835948644792, "grad_norm": 0.18023452162742615, "learning_rate": 6.129564941251191e-05, "loss": 0.065, "step": 6099 }, { "epoch": 11.616737993342843, "grad_norm": 0.1753673255443573, "learning_rate": 6.128929818990155e-05, "loss": 0.0577, "step": 6100 }, { "epoch": 11.618640038040894, "grad_norm": 0.2921854853630066, "learning_rate": 6.12829469672912e-05, "loss": 0.0798, "step": 6101 }, { "epoch": 11.620542082738945, "grad_norm": 0.2377692610025406, "learning_rate": 6.127659574468086e-05, "loss": 0.0592, "step": 6102 }, { "epoch": 11.622444127436994, "grad_norm": 0.1325494349002838, "learning_rate": 6.12702445220705e-05, "loss": 0.0464, "step": 6103 }, { "epoch": 11.624346172135045, "grad_norm": 0.2446260154247284, "learning_rate": 6.126389329946015e-05, "loss": 0.0591, "step": 6104 }, { "epoch": 11.626248216833096, "grad_norm": 0.24112503230571747, "learning_rate": 6.12575420768498e-05, "loss": 0.0636, "step": 6105 }, { "epoch": 11.628150261531147, "grad_norm": 0.21624989807605743, "learning_rate": 6.125119085423945e-05, "loss": 0.0573, "step": 6106 }, { "epoch": 11.630052306229196, "grad_norm": 0.29112082719802856, "learning_rate": 6.124483963162909e-05, "loss": 0.055, "step": 6107 }, { "epoch": 11.631954350927247, "grad_norm": 0.256797730922699, "learning_rate": 6.123848840901874e-05, "loss": 0.0586, "step": 6108 }, { "epoch": 11.633856395625298, "grad_norm": 0.2393050193786621, "learning_rate": 6.123213718640839e-05, "loss": 0.0776, "step": 6109 }, { "epoch": 11.635758440323347, "grad_norm": 0.1504524052143097, "learning_rate": 6.122578596379803e-05, "loss": 0.0749, "step": 6110 }, { "epoch": 11.637660485021398, "grad_norm": 0.3078169822692871, "learning_rate": 6.121943474118768e-05, "loss": 0.0655, "step": 6111 }, { "epoch": 11.639562529719448, "grad_norm": 0.33488377928733826, "learning_rate": 6.121308351857733e-05, "loss": 0.0708, "step": 6112 }, { "epoch": 11.6414645744175, "grad_norm": 0.23643803596496582, "learning_rate": 6.120673229596697e-05, "loss": 0.0711, "step": 6113 }, { "epoch": 11.643366619115548, "grad_norm": 0.22984230518341064, "learning_rate": 6.120038107335662e-05, "loss": 0.0676, "step": 6114 }, { "epoch": 11.6452686638136, "grad_norm": 0.25781843066215515, "learning_rate": 6.119402985074628e-05, "loss": 0.0803, "step": 6115 }, { "epoch": 11.64717070851165, "grad_norm": 0.2645505964756012, "learning_rate": 6.118767862813591e-05, "loss": 0.0491, "step": 6116 }, { "epoch": 11.649072753209701, "grad_norm": 0.18775779008865356, "learning_rate": 6.118132740552557e-05, "loss": 0.0639, "step": 6117 }, { "epoch": 11.65097479790775, "grad_norm": 0.21034850180149078, "learning_rate": 6.117497618291522e-05, "loss": 0.0883, "step": 6118 }, { "epoch": 11.652876842605801, "grad_norm": 0.2566367983818054, "learning_rate": 6.116862496030487e-05, "loss": 0.0591, "step": 6119 }, { "epoch": 11.654778887303852, "grad_norm": 0.22888152301311493, "learning_rate": 6.116227373769451e-05, "loss": 0.0591, "step": 6120 }, { "epoch": 11.656680932001901, "grad_norm": 0.3078417181968689, "learning_rate": 6.115592251508415e-05, "loss": 0.0904, "step": 6121 }, { "epoch": 11.658582976699952, "grad_norm": 0.3688507676124573, "learning_rate": 6.114957129247381e-05, "loss": 0.0792, "step": 6122 }, { "epoch": 11.660485021398003, "grad_norm": 0.15711909532546997, "learning_rate": 6.114322006986345e-05, "loss": 0.0576, "step": 6123 }, { "epoch": 11.662387066096054, "grad_norm": 0.22273437678813934, "learning_rate": 6.11368688472531e-05, "loss": 0.0745, "step": 6124 }, { "epoch": 11.664289110794103, "grad_norm": 0.3121403753757477, "learning_rate": 6.113051762464275e-05, "loss": 0.0654, "step": 6125 }, { "epoch": 11.666191155492154, "grad_norm": 0.3767518997192383, "learning_rate": 6.112416640203239e-05, "loss": 0.0829, "step": 6126 }, { "epoch": 11.668093200190205, "grad_norm": 0.2748688757419586, "learning_rate": 6.111781517942204e-05, "loss": 0.0773, "step": 6127 }, { "epoch": 11.669995244888256, "grad_norm": 0.24371102452278137, "learning_rate": 6.111146395681168e-05, "loss": 0.0689, "step": 6128 }, { "epoch": 11.671897289586305, "grad_norm": 0.2633253335952759, "learning_rate": 6.110511273420135e-05, "loss": 0.0924, "step": 6129 }, { "epoch": 11.673799334284356, "grad_norm": 0.1922878921031952, "learning_rate": 6.109876151159099e-05, "loss": 0.0653, "step": 6130 }, { "epoch": 11.675701378982406, "grad_norm": 0.305594801902771, "learning_rate": 6.109241028898062e-05, "loss": 0.0625, "step": 6131 }, { "epoch": 11.677603423680456, "grad_norm": 0.1557915359735489, "learning_rate": 6.108605906637029e-05, "loss": 0.0555, "step": 6132 }, { "epoch": 11.679505468378506, "grad_norm": 0.2857607901096344, "learning_rate": 6.107970784375993e-05, "loss": 0.0755, "step": 6133 }, { "epoch": 11.681407513076557, "grad_norm": 0.2326517254114151, "learning_rate": 6.107335662114957e-05, "loss": 0.0717, "step": 6134 }, { "epoch": 11.683309557774608, "grad_norm": 0.3538796305656433, "learning_rate": 6.106700539853922e-05, "loss": 0.0812, "step": 6135 }, { "epoch": 11.685211602472657, "grad_norm": 0.17662444710731506, "learning_rate": 6.106065417592887e-05, "loss": 0.0792, "step": 6136 }, { "epoch": 11.687113647170708, "grad_norm": 0.3378899097442627, "learning_rate": 6.105430295331852e-05, "loss": 0.0608, "step": 6137 }, { "epoch": 11.689015691868759, "grad_norm": 0.14349856972694397, "learning_rate": 6.104795173070816e-05, "loss": 0.0428, "step": 6138 }, { "epoch": 11.69091773656681, "grad_norm": 0.35674959421157837, "learning_rate": 6.104160050809781e-05, "loss": 0.0936, "step": 6139 }, { "epoch": 11.692819781264859, "grad_norm": 0.2726203203201294, "learning_rate": 6.103524928548746e-05, "loss": 0.0628, "step": 6140 }, { "epoch": 11.69472182596291, "grad_norm": 0.24450047314167023, "learning_rate": 6.102889806287711e-05, "loss": 0.0539, "step": 6141 }, { "epoch": 11.69662387066096, "grad_norm": 0.223331019282341, "learning_rate": 6.102254684026676e-05, "loss": 0.058, "step": 6142 }, { "epoch": 11.698525915359012, "grad_norm": 0.22839614748954773, "learning_rate": 6.1016195617656405e-05, "loss": 0.058, "step": 6143 }, { "epoch": 11.70042796005706, "grad_norm": 0.21672922372817993, "learning_rate": 6.100984439504604e-05, "loss": 0.0782, "step": 6144 }, { "epoch": 11.702330004755112, "grad_norm": 0.18630997836589813, "learning_rate": 6.10034931724357e-05, "loss": 0.0767, "step": 6145 }, { "epoch": 11.704232049453163, "grad_norm": 0.20281566679477692, "learning_rate": 6.099714194982534e-05, "loss": 0.0924, "step": 6146 }, { "epoch": 11.706134094151212, "grad_norm": 0.2289772927761078, "learning_rate": 6.0990790727215e-05, "loss": 0.0603, "step": 6147 }, { "epoch": 11.708036138849263, "grad_norm": 0.3022541403770447, "learning_rate": 6.098443950460464e-05, "loss": 0.094, "step": 6148 }, { "epoch": 11.709938183547314, "grad_norm": 0.23966433107852936, "learning_rate": 6.097808828199428e-05, "loss": 0.0481, "step": 6149 }, { "epoch": 11.711840228245364, "grad_norm": 0.2377452850341797, "learning_rate": 6.097173705938394e-05, "loss": 0.0634, "step": 6150 }, { "epoch": 11.713742272943414, "grad_norm": 0.24796810746192932, "learning_rate": 6.096538583677358e-05, "loss": 0.078, "step": 6151 }, { "epoch": 11.715644317641464, "grad_norm": 0.18740686774253845, "learning_rate": 6.0959034614163224e-05, "loss": 0.0541, "step": 6152 }, { "epoch": 11.717546362339515, "grad_norm": 0.18637046217918396, "learning_rate": 6.0952683391552876e-05, "loss": 0.0589, "step": 6153 }, { "epoch": 11.719448407037566, "grad_norm": 0.19956031441688538, "learning_rate": 6.094633216894252e-05, "loss": 0.0686, "step": 6154 }, { "epoch": 11.721350451735615, "grad_norm": 0.25902166962623596, "learning_rate": 6.093998094633217e-05, "loss": 0.0509, "step": 6155 }, { "epoch": 11.723252496433666, "grad_norm": 0.18729211390018463, "learning_rate": 6.093362972372182e-05, "loss": 0.0846, "step": 6156 }, { "epoch": 11.725154541131717, "grad_norm": 0.22573569416999817, "learning_rate": 6.092727850111146e-05, "loss": 0.071, "step": 6157 }, { "epoch": 11.727056585829766, "grad_norm": 0.3560294806957245, "learning_rate": 6.0920927278501115e-05, "loss": 0.0857, "step": 6158 }, { "epoch": 11.728958630527817, "grad_norm": 0.15637195110321045, "learning_rate": 6.091457605589076e-05, "loss": 0.0748, "step": 6159 }, { "epoch": 11.730860675225868, "grad_norm": 0.24328051507472992, "learning_rate": 6.090822483328041e-05, "loss": 0.0702, "step": 6160 }, { "epoch": 11.732762719923919, "grad_norm": 0.17262868583202362, "learning_rate": 6.090187361067006e-05, "loss": 0.0601, "step": 6161 }, { "epoch": 11.734664764621968, "grad_norm": 0.21686933934688568, "learning_rate": 6.08955223880597e-05, "loss": 0.0809, "step": 6162 }, { "epoch": 11.736566809320019, "grad_norm": 0.2194422036409378, "learning_rate": 6.0889171165449354e-05, "loss": 0.0534, "step": 6163 }, { "epoch": 11.73846885401807, "grad_norm": 0.2666342854499817, "learning_rate": 6.0882819942839e-05, "loss": 0.0611, "step": 6164 }, { "epoch": 11.74037089871612, "grad_norm": 0.20401927828788757, "learning_rate": 6.087646872022865e-05, "loss": 0.0613, "step": 6165 }, { "epoch": 11.74227294341417, "grad_norm": 0.156899556517601, "learning_rate": 6.0870117497618296e-05, "loss": 0.0698, "step": 6166 }, { "epoch": 11.74417498811222, "grad_norm": 0.39890050888061523, "learning_rate": 6.086376627500794e-05, "loss": 0.0778, "step": 6167 }, { "epoch": 11.746077032810271, "grad_norm": 0.2203887552022934, "learning_rate": 6.085741505239759e-05, "loss": 0.071, "step": 6168 }, { "epoch": 11.747979077508322, "grad_norm": 0.22864332795143127, "learning_rate": 6.085106382978724e-05, "loss": 0.0784, "step": 6169 }, { "epoch": 11.749881122206371, "grad_norm": 0.20026814937591553, "learning_rate": 6.0844712607176876e-05, "loss": 0.0475, "step": 6170 }, { "epoch": 11.751783166904422, "grad_norm": 0.1660257875919342, "learning_rate": 6.0838361384566534e-05, "loss": 0.0513, "step": 6171 }, { "epoch": 11.753685211602473, "grad_norm": 0.17982488870620728, "learning_rate": 6.083201016195618e-05, "loss": 0.0601, "step": 6172 }, { "epoch": 11.755587256300522, "grad_norm": 0.20312677323818207, "learning_rate": 6.082565893934583e-05, "loss": 0.0605, "step": 6173 }, { "epoch": 11.757489300998573, "grad_norm": 0.2112540304660797, "learning_rate": 6.0819307716735476e-05, "loss": 0.0544, "step": 6174 }, { "epoch": 11.759391345696624, "grad_norm": 0.16244660317897797, "learning_rate": 6.0812956494125115e-05, "loss": 0.0588, "step": 6175 }, { "epoch": 11.761293390394675, "grad_norm": 0.17628826200962067, "learning_rate": 6.080660527151477e-05, "loss": 0.0646, "step": 6176 }, { "epoch": 11.763195435092724, "grad_norm": 0.11459421366453171, "learning_rate": 6.080025404890441e-05, "loss": 0.0431, "step": 6177 }, { "epoch": 11.765097479790775, "grad_norm": 0.2594488263130188, "learning_rate": 6.079390282629407e-05, "loss": 0.07, "step": 6178 }, { "epoch": 11.766999524488826, "grad_norm": 0.16236400604248047, "learning_rate": 6.0787551603683715e-05, "loss": 0.0516, "step": 6179 }, { "epoch": 11.768901569186877, "grad_norm": 0.22279828786849976, "learning_rate": 6.0781200381073354e-05, "loss": 0.0665, "step": 6180 }, { "epoch": 11.770803613884926, "grad_norm": 0.3085854947566986, "learning_rate": 6.077484915846301e-05, "loss": 0.0654, "step": 6181 }, { "epoch": 11.772705658582977, "grad_norm": 0.23716288805007935, "learning_rate": 6.076849793585265e-05, "loss": 0.0664, "step": 6182 }, { "epoch": 11.774607703281028, "grad_norm": 0.3094969689846039, "learning_rate": 6.076214671324231e-05, "loss": 0.0782, "step": 6183 }, { "epoch": 11.776509747979077, "grad_norm": 0.23545250296592712, "learning_rate": 6.075579549063195e-05, "loss": 0.0704, "step": 6184 }, { "epoch": 11.778411792677128, "grad_norm": 0.2429591715335846, "learning_rate": 6.074944426802159e-05, "loss": 0.0588, "step": 6185 }, { "epoch": 11.780313837375179, "grad_norm": 0.13099047541618347, "learning_rate": 6.074309304541125e-05, "loss": 0.0624, "step": 6186 }, { "epoch": 11.78221588207323, "grad_norm": 0.18458104133605957, "learning_rate": 6.073674182280089e-05, "loss": 0.05, "step": 6187 }, { "epoch": 11.784117926771279, "grad_norm": 0.16507314145565033, "learning_rate": 6.0730390600190534e-05, "loss": 0.0724, "step": 6188 }, { "epoch": 11.78601997146933, "grad_norm": 0.29678866267204285, "learning_rate": 6.0724039377580186e-05, "loss": 0.0681, "step": 6189 }, { "epoch": 11.78792201616738, "grad_norm": 0.2282082587480545, "learning_rate": 6.071768815496983e-05, "loss": 0.0717, "step": 6190 }, { "epoch": 11.789824060865431, "grad_norm": 0.25922587513923645, "learning_rate": 6.071133693235948e-05, "loss": 0.091, "step": 6191 }, { "epoch": 11.79172610556348, "grad_norm": 0.14782756567001343, "learning_rate": 6.070498570974913e-05, "loss": 0.0469, "step": 6192 }, { "epoch": 11.793628150261531, "grad_norm": 0.1892451047897339, "learning_rate": 6.069863448713877e-05, "loss": 0.0502, "step": 6193 }, { "epoch": 11.795530194959582, "grad_norm": 0.1491728127002716, "learning_rate": 6.0692283264528425e-05, "loss": 0.0513, "step": 6194 }, { "epoch": 11.797432239657631, "grad_norm": 0.20024257898330688, "learning_rate": 6.068593204191807e-05, "loss": 0.0671, "step": 6195 }, { "epoch": 11.799334284355682, "grad_norm": 0.2266501933336258, "learning_rate": 6.067958081930772e-05, "loss": 0.0704, "step": 6196 }, { "epoch": 11.801236329053733, "grad_norm": 0.15230099856853485, "learning_rate": 6.067322959669737e-05, "loss": 0.0547, "step": 6197 }, { "epoch": 11.803138373751784, "grad_norm": 0.33129194378852844, "learning_rate": 6.066687837408701e-05, "loss": 0.0669, "step": 6198 }, { "epoch": 11.805040418449833, "grad_norm": 0.1871880143880844, "learning_rate": 6.0660527151476664e-05, "loss": 0.0597, "step": 6199 }, { "epoch": 11.806942463147884, "grad_norm": 0.2125813364982605, "learning_rate": 6.065417592886631e-05, "loss": 0.0713, "step": 6200 }, { "epoch": 11.808844507845935, "grad_norm": 0.1894565224647522, "learning_rate": 6.064782470625596e-05, "loss": 0.053, "step": 6201 }, { "epoch": 11.810746552543986, "grad_norm": 0.18981118500232697, "learning_rate": 6.0641473483645606e-05, "loss": 0.066, "step": 6202 }, { "epoch": 11.812648597242035, "grad_norm": 0.1811751425266266, "learning_rate": 6.063512226103525e-05, "loss": 0.0551, "step": 6203 }, { "epoch": 11.814550641940086, "grad_norm": 0.23820361495018005, "learning_rate": 6.06287710384249e-05, "loss": 0.072, "step": 6204 }, { "epoch": 11.816452686638137, "grad_norm": 0.14921443164348602, "learning_rate": 6.062241981581455e-05, "loss": 0.0919, "step": 6205 }, { "epoch": 11.818354731336186, "grad_norm": 0.23790062963962555, "learning_rate": 6.0616068593204186e-05, "loss": 0.0573, "step": 6206 }, { "epoch": 11.820256776034237, "grad_norm": 0.18912047147750854, "learning_rate": 6.0609717370593845e-05, "loss": 0.0564, "step": 6207 }, { "epoch": 11.822158820732287, "grad_norm": 0.2074270099401474, "learning_rate": 6.060336614798349e-05, "loss": 0.0583, "step": 6208 }, { "epoch": 11.824060865430338, "grad_norm": 0.15312808752059937, "learning_rate": 6.059701492537314e-05, "loss": 0.0617, "step": 6209 }, { "epoch": 11.825962910128387, "grad_norm": 0.2151595503091812, "learning_rate": 6.059066370276279e-05, "loss": 0.0597, "step": 6210 }, { "epoch": 11.827864954826438, "grad_norm": 0.2581579089164734, "learning_rate": 6.0584312480152425e-05, "loss": 0.0678, "step": 6211 }, { "epoch": 11.82976699952449, "grad_norm": 0.16070884466171265, "learning_rate": 6.0577961257542084e-05, "loss": 0.0555, "step": 6212 }, { "epoch": 11.83166904422254, "grad_norm": 0.1928481012582779, "learning_rate": 6.057161003493172e-05, "loss": 0.0547, "step": 6213 }, { "epoch": 11.83357108892059, "grad_norm": 0.15289796888828278, "learning_rate": 6.056525881232138e-05, "loss": 0.0473, "step": 6214 }, { "epoch": 11.83547313361864, "grad_norm": 0.2093266397714615, "learning_rate": 6.0558907589711026e-05, "loss": 0.0844, "step": 6215 }, { "epoch": 11.837375178316691, "grad_norm": 0.259589821100235, "learning_rate": 6.0552556367100664e-05, "loss": 0.0646, "step": 6216 }, { "epoch": 11.83927722301474, "grad_norm": 0.20682299137115479, "learning_rate": 6.054620514449032e-05, "loss": 0.0715, "step": 6217 }, { "epoch": 11.841179267712791, "grad_norm": 0.16990284621715546, "learning_rate": 6.053985392187996e-05, "loss": 0.0527, "step": 6218 }, { "epoch": 11.843081312410842, "grad_norm": 0.1800818145275116, "learning_rate": 6.053350269926962e-05, "loss": 0.0584, "step": 6219 }, { "epoch": 11.844983357108893, "grad_norm": 0.31309738755226135, "learning_rate": 6.052715147665926e-05, "loss": 0.0688, "step": 6220 }, { "epoch": 11.846885401806942, "grad_norm": 0.23532579839229584, "learning_rate": 6.05208002540489e-05, "loss": 0.0611, "step": 6221 }, { "epoch": 11.848787446504993, "grad_norm": 0.20161910355091095, "learning_rate": 6.051444903143856e-05, "loss": 0.0703, "step": 6222 }, { "epoch": 11.850689491203044, "grad_norm": 0.21405024826526642, "learning_rate": 6.05080978088282e-05, "loss": 0.0669, "step": 6223 }, { "epoch": 11.852591535901094, "grad_norm": 0.33080706000328064, "learning_rate": 6.0501746586217845e-05, "loss": 0.0689, "step": 6224 }, { "epoch": 11.854493580599144, "grad_norm": 0.29687973856925964, "learning_rate": 6.04953953636075e-05, "loss": 0.0697, "step": 6225 }, { "epoch": 11.856395625297194, "grad_norm": 0.3404380679130554, "learning_rate": 6.048904414099714e-05, "loss": 0.0576, "step": 6226 }, { "epoch": 11.858297669995245, "grad_norm": 0.26443350315093994, "learning_rate": 6.0482692918386794e-05, "loss": 0.0718, "step": 6227 }, { "epoch": 11.860199714693294, "grad_norm": 0.22917382419109344, "learning_rate": 6.047634169577644e-05, "loss": 0.0833, "step": 6228 }, { "epoch": 11.862101759391345, "grad_norm": 0.3504857122898102, "learning_rate": 6.0469990473166084e-05, "loss": 0.0833, "step": 6229 }, { "epoch": 11.864003804089396, "grad_norm": 0.2867469787597656, "learning_rate": 6.0463639250555736e-05, "loss": 0.0707, "step": 6230 }, { "epoch": 11.865905848787447, "grad_norm": 0.22201254963874817, "learning_rate": 6.045728802794538e-05, "loss": 0.0761, "step": 6231 }, { "epoch": 11.867807893485496, "grad_norm": 0.20512932538986206, "learning_rate": 6.045093680533503e-05, "loss": 0.0685, "step": 6232 }, { "epoch": 11.869709938183547, "grad_norm": 0.23396115005016327, "learning_rate": 6.044458558272468e-05, "loss": 0.0619, "step": 6233 }, { "epoch": 11.871611982881598, "grad_norm": 0.19524352252483368, "learning_rate": 6.043823436011432e-05, "loss": 0.0484, "step": 6234 }, { "epoch": 11.873514027579649, "grad_norm": 0.3588017523288727, "learning_rate": 6.0431883137503974e-05, "loss": 0.0832, "step": 6235 }, { "epoch": 11.875416072277698, "grad_norm": 0.24636587500572205, "learning_rate": 6.042553191489362e-05, "loss": 0.0983, "step": 6236 }, { "epoch": 11.877318116975749, "grad_norm": 0.17110472917556763, "learning_rate": 6.041918069228327e-05, "loss": 0.0779, "step": 6237 }, { "epoch": 11.8792201616738, "grad_norm": 0.2017090618610382, "learning_rate": 6.0412829469672916e-05, "loss": 0.0645, "step": 6238 }, { "epoch": 11.881122206371849, "grad_norm": 0.26978799700737, "learning_rate": 6.040647824706256e-05, "loss": 0.0542, "step": 6239 }, { "epoch": 11.8830242510699, "grad_norm": 0.16964603960514069, "learning_rate": 6.040012702445221e-05, "loss": 0.064, "step": 6240 }, { "epoch": 11.88492629576795, "grad_norm": 0.22872652113437653, "learning_rate": 6.039377580184186e-05, "loss": 0.0655, "step": 6241 }, { "epoch": 11.886828340466002, "grad_norm": 0.2875925600528717, "learning_rate": 6.03874245792315e-05, "loss": 0.0754, "step": 6242 }, { "epoch": 11.88873038516405, "grad_norm": 0.2056693732738495, "learning_rate": 6.0381073356621155e-05, "loss": 0.0781, "step": 6243 }, { "epoch": 11.890632429862102, "grad_norm": 0.18020100891590118, "learning_rate": 6.03747221340108e-05, "loss": 0.047, "step": 6244 }, { "epoch": 11.892534474560152, "grad_norm": 0.20994068682193756, "learning_rate": 6.036837091140045e-05, "loss": 0.0528, "step": 6245 }, { "epoch": 11.894436519258203, "grad_norm": 0.2020370066165924, "learning_rate": 6.03620196887901e-05, "loss": 0.0607, "step": 6246 }, { "epoch": 11.896338563956252, "grad_norm": 0.22844986617565155, "learning_rate": 6.0355668466179736e-05, "loss": 0.0939, "step": 6247 }, { "epoch": 11.898240608654303, "grad_norm": 0.2161705046892166, "learning_rate": 6.0349317243569394e-05, "loss": 0.0408, "step": 6248 }, { "epoch": 11.900142653352354, "grad_norm": 0.202499121427536, "learning_rate": 6.034296602095903e-05, "loss": 0.063, "step": 6249 }, { "epoch": 11.902044698050403, "grad_norm": 0.2268448770046234, "learning_rate": 6.033661479834869e-05, "loss": 0.0916, "step": 6250 }, { "epoch": 11.903946742748454, "grad_norm": 0.20890720188617706, "learning_rate": 6.0330263575738336e-05, "loss": 0.0673, "step": 6251 }, { "epoch": 11.905848787446505, "grad_norm": 0.16075345873832703, "learning_rate": 6.0323912353127974e-05, "loss": 0.0671, "step": 6252 }, { "epoch": 11.907750832144556, "grad_norm": 0.19686825573444366, "learning_rate": 6.031756113051763e-05, "loss": 0.0554, "step": 6253 }, { "epoch": 11.909652876842605, "grad_norm": 0.15151557326316833, "learning_rate": 6.031120990790727e-05, "loss": 0.0482, "step": 6254 }, { "epoch": 11.911554921540656, "grad_norm": 0.3714834451675415, "learning_rate": 6.030485868529693e-05, "loss": 0.0697, "step": 6255 }, { "epoch": 11.913456966238707, "grad_norm": 0.18377116322517395, "learning_rate": 6.029850746268657e-05, "loss": 0.066, "step": 6256 }, { "epoch": 11.915359010936758, "grad_norm": 0.21345947682857513, "learning_rate": 6.029215624007621e-05, "loss": 0.0698, "step": 6257 }, { "epoch": 11.917261055634807, "grad_norm": 0.215050607919693, "learning_rate": 6.0285805017465865e-05, "loss": 0.0755, "step": 6258 }, { "epoch": 11.919163100332858, "grad_norm": 0.1479867845773697, "learning_rate": 6.027945379485551e-05, "loss": 0.0519, "step": 6259 }, { "epoch": 11.921065145030909, "grad_norm": 0.2630639374256134, "learning_rate": 6.0273102572245155e-05, "loss": 0.0523, "step": 6260 }, { "epoch": 11.922967189728958, "grad_norm": 0.2974599003791809, "learning_rate": 6.026675134963481e-05, "loss": 0.0531, "step": 6261 }, { "epoch": 11.924869234427009, "grad_norm": 0.09849124401807785, "learning_rate": 6.026040012702445e-05, "loss": 0.047, "step": 6262 }, { "epoch": 11.92677127912506, "grad_norm": 0.18132846057415009, "learning_rate": 6.0254048904414104e-05, "loss": 0.063, "step": 6263 }, { "epoch": 11.92867332382311, "grad_norm": 0.2807028591632843, "learning_rate": 6.024769768180375e-05, "loss": 0.0774, "step": 6264 }, { "epoch": 11.93057536852116, "grad_norm": 0.2209707349538803, "learning_rate": 6.0241346459193394e-05, "loss": 0.0665, "step": 6265 }, { "epoch": 11.93247741321921, "grad_norm": 0.21792303025722504, "learning_rate": 6.0234995236583046e-05, "loss": 0.0612, "step": 6266 }, { "epoch": 11.934379457917261, "grad_norm": 0.216604083776474, "learning_rate": 6.022864401397269e-05, "loss": 0.0543, "step": 6267 }, { "epoch": 11.936281502615312, "grad_norm": 0.40272215008735657, "learning_rate": 6.022229279136234e-05, "loss": 0.0773, "step": 6268 }, { "epoch": 11.938183547313361, "grad_norm": 0.2663206160068512, "learning_rate": 6.021594156875199e-05, "loss": 0.0746, "step": 6269 }, { "epoch": 11.940085592011412, "grad_norm": 0.2070496529340744, "learning_rate": 6.020959034614163e-05, "loss": 0.0441, "step": 6270 }, { "epoch": 11.941987636709463, "grad_norm": 0.11446495354175568, "learning_rate": 6.0203239123531285e-05, "loss": 0.028, "step": 6271 }, { "epoch": 11.943889681407512, "grad_norm": 0.2797466516494751, "learning_rate": 6.019688790092093e-05, "loss": 0.0703, "step": 6272 }, { "epoch": 11.945791726105563, "grad_norm": 0.1635390669107437, "learning_rate": 6.019053667831058e-05, "loss": 0.0625, "step": 6273 }, { "epoch": 11.947693770803614, "grad_norm": 0.24397385120391846, "learning_rate": 6.018418545570023e-05, "loss": 0.07, "step": 6274 }, { "epoch": 11.949595815501665, "grad_norm": 0.19531305134296417, "learning_rate": 6.017783423308987e-05, "loss": 0.0726, "step": 6275 }, { "epoch": 11.951497860199714, "grad_norm": 0.23823881149291992, "learning_rate": 6.0171483010479524e-05, "loss": 0.066, "step": 6276 }, { "epoch": 11.953399904897765, "grad_norm": 0.28544214367866516, "learning_rate": 6.016513178786917e-05, "loss": 0.0846, "step": 6277 }, { "epoch": 11.955301949595816, "grad_norm": 0.3207830488681793, "learning_rate": 6.015878056525881e-05, "loss": 0.0746, "step": 6278 }, { "epoch": 11.957203994293867, "grad_norm": 0.22940942645072937, "learning_rate": 6.0152429342648466e-05, "loss": 0.0648, "step": 6279 }, { "epoch": 11.959106038991916, "grad_norm": 0.24860794842243195, "learning_rate": 6.0146078120038104e-05, "loss": 0.0766, "step": 6280 }, { "epoch": 11.961008083689967, "grad_norm": 0.1024889349937439, "learning_rate": 6.013972689742776e-05, "loss": 0.0379, "step": 6281 }, { "epoch": 11.962910128388017, "grad_norm": 0.22909225523471832, "learning_rate": 6.013337567481741e-05, "loss": 0.0703, "step": 6282 }, { "epoch": 11.964812173086067, "grad_norm": 0.18012385070323944, "learning_rate": 6.0127024452207046e-05, "loss": 0.0596, "step": 6283 }, { "epoch": 11.966714217784117, "grad_norm": 0.31610363721847534, "learning_rate": 6.0120673229596705e-05, "loss": 0.0758, "step": 6284 }, { "epoch": 11.968616262482168, "grad_norm": 0.3586494028568268, "learning_rate": 6.011432200698634e-05, "loss": 0.0977, "step": 6285 }, { "epoch": 11.97051830718022, "grad_norm": 0.24757806956768036, "learning_rate": 6.0107970784376e-05, "loss": 0.0829, "step": 6286 }, { "epoch": 11.972420351878268, "grad_norm": 0.2095833718776703, "learning_rate": 6.010161956176564e-05, "loss": 0.0691, "step": 6287 }, { "epoch": 11.97432239657632, "grad_norm": 0.337371826171875, "learning_rate": 6.0095268339155285e-05, "loss": 0.0835, "step": 6288 }, { "epoch": 11.97622444127437, "grad_norm": 0.1434415727853775, "learning_rate": 6.0088917116544944e-05, "loss": 0.0393, "step": 6289 }, { "epoch": 11.978126485972421, "grad_norm": 0.1812358945608139, "learning_rate": 6.008256589393458e-05, "loss": 0.076, "step": 6290 }, { "epoch": 11.98002853067047, "grad_norm": 0.21688790619373322, "learning_rate": 6.007621467132424e-05, "loss": 0.0581, "step": 6291 }, { "epoch": 11.981930575368521, "grad_norm": 0.227838397026062, "learning_rate": 6.006986344871388e-05, "loss": 0.0694, "step": 6292 }, { "epoch": 11.983832620066572, "grad_norm": 0.29435136914253235, "learning_rate": 6.0063512226103524e-05, "loss": 0.0856, "step": 6293 }, { "epoch": 11.985734664764623, "grad_norm": 0.36537912487983704, "learning_rate": 6.0057161003493176e-05, "loss": 0.0635, "step": 6294 }, { "epoch": 11.987636709462672, "grad_norm": 0.26229700446128845, "learning_rate": 6.005080978088282e-05, "loss": 0.0579, "step": 6295 }, { "epoch": 11.989538754160723, "grad_norm": 0.17395451664924622, "learning_rate": 6.0044458558272466e-05, "loss": 0.0638, "step": 6296 }, { "epoch": 11.991440798858774, "grad_norm": 0.16267430782318115, "learning_rate": 6.003810733566212e-05, "loss": 0.0652, "step": 6297 }, { "epoch": 11.993342843556823, "grad_norm": 0.28298768401145935, "learning_rate": 6.003175611305176e-05, "loss": 0.0639, "step": 6298 }, { "epoch": 11.995244888254874, "grad_norm": 0.23265522718429565, "learning_rate": 6.0025404890441414e-05, "loss": 0.0675, "step": 6299 }, { "epoch": 11.997146932952925, "grad_norm": 0.1655040979385376, "learning_rate": 6.001905366783106e-05, "loss": 0.0492, "step": 6300 }, { "epoch": 11.999048977650975, "grad_norm": 0.27211880683898926, "learning_rate": 6.0012702445220705e-05, "loss": 0.0648, "step": 6301 }, { "epoch": 12.000951022349025, "grad_norm": 0.29517486691474915, "learning_rate": 6.0006351222610356e-05, "loss": 0.0727, "step": 6302 }, { "epoch": 12.002853067047075, "grad_norm": 0.19214580953121185, "learning_rate": 6e-05, "loss": 0.0641, "step": 6303 }, { "epoch": 12.004755111745126, "grad_norm": 0.1292438805103302, "learning_rate": 5.999364877738965e-05, "loss": 0.0493, "step": 6304 }, { "epoch": 12.006657156443177, "grad_norm": 0.19623462855815887, "learning_rate": 5.99872975547793e-05, "loss": 0.078, "step": 6305 }, { "epoch": 12.008559201141226, "grad_norm": 0.3401356041431427, "learning_rate": 5.9980946332168943e-05, "loss": 0.0795, "step": 6306 }, { "epoch": 12.010461245839277, "grad_norm": 0.20049375295639038, "learning_rate": 5.9974595109558595e-05, "loss": 0.0629, "step": 6307 }, { "epoch": 12.012363290537328, "grad_norm": 0.13026028871536255, "learning_rate": 5.996824388694824e-05, "loss": 0.0499, "step": 6308 }, { "epoch": 12.014265335235377, "grad_norm": 0.245549276471138, "learning_rate": 5.996189266433789e-05, "loss": 0.0634, "step": 6309 }, { "epoch": 12.016167379933428, "grad_norm": 0.12406376004219055, "learning_rate": 5.995554144172754e-05, "loss": 0.0408, "step": 6310 }, { "epoch": 12.018069424631479, "grad_norm": 0.16947674751281738, "learning_rate": 5.994919021911718e-05, "loss": 0.065, "step": 6311 }, { "epoch": 12.01997146932953, "grad_norm": 0.14021891355514526, "learning_rate": 5.9942838996506834e-05, "loss": 0.0771, "step": 6312 }, { "epoch": 12.021873514027579, "grad_norm": 0.12413173168897629, "learning_rate": 5.993648777389648e-05, "loss": 0.054, "step": 6313 }, { "epoch": 12.02377555872563, "grad_norm": 0.11317545920610428, "learning_rate": 5.993013655128612e-05, "loss": 0.0738, "step": 6314 }, { "epoch": 12.02567760342368, "grad_norm": 0.1977335810661316, "learning_rate": 5.9923785328675776e-05, "loss": 0.0588, "step": 6315 }, { "epoch": 12.027579648121732, "grad_norm": 0.23832173645496368, "learning_rate": 5.9917434106065414e-05, "loss": 0.0626, "step": 6316 }, { "epoch": 12.02948169281978, "grad_norm": 0.23073810338974, "learning_rate": 5.991108288345507e-05, "loss": 0.0534, "step": 6317 }, { "epoch": 12.031383737517832, "grad_norm": 0.1403868943452835, "learning_rate": 5.990473166084472e-05, "loss": 0.0786, "step": 6318 }, { "epoch": 12.033285782215883, "grad_norm": 0.25834405422210693, "learning_rate": 5.9898380438234356e-05, "loss": 0.0566, "step": 6319 }, { "epoch": 12.035187826913932, "grad_norm": 0.18973225355148315, "learning_rate": 5.9892029215624015e-05, "loss": 0.0484, "step": 6320 }, { "epoch": 12.037089871611983, "grad_norm": 0.18137171864509583, "learning_rate": 5.988567799301365e-05, "loss": 0.0717, "step": 6321 }, { "epoch": 12.038991916310033, "grad_norm": 0.23322740197181702, "learning_rate": 5.987932677040331e-05, "loss": 0.2109, "step": 6322 }, { "epoch": 12.040893961008084, "grad_norm": 0.20067892968654633, "learning_rate": 5.987297554779295e-05, "loss": 0.0475, "step": 6323 }, { "epoch": 12.042796005706133, "grad_norm": 0.13799051940441132, "learning_rate": 5.9866624325182595e-05, "loss": 0.0529, "step": 6324 }, { "epoch": 12.044698050404184, "grad_norm": 0.21583084762096405, "learning_rate": 5.9860273102572254e-05, "loss": 0.0732, "step": 6325 }, { "epoch": 12.046600095102235, "grad_norm": 0.21806849539279938, "learning_rate": 5.985392187996189e-05, "loss": 0.0911, "step": 6326 }, { "epoch": 12.048502139800286, "grad_norm": 0.33009856939315796, "learning_rate": 5.984757065735155e-05, "loss": 0.0738, "step": 6327 }, { "epoch": 12.050404184498335, "grad_norm": 0.14276517927646637, "learning_rate": 5.984121943474119e-05, "loss": 0.0441, "step": 6328 }, { "epoch": 12.052306229196386, "grad_norm": 0.14218777418136597, "learning_rate": 5.9834868212130834e-05, "loss": 0.0672, "step": 6329 }, { "epoch": 12.054208273894437, "grad_norm": 0.09867583215236664, "learning_rate": 5.9828516989520486e-05, "loss": 0.0498, "step": 6330 }, { "epoch": 12.056110318592486, "grad_norm": 0.11911500245332718, "learning_rate": 5.982216576691013e-05, "loss": 0.0503, "step": 6331 }, { "epoch": 12.058012363290537, "grad_norm": 0.21429555118083954, "learning_rate": 5.9815814544299776e-05, "loss": 0.0746, "step": 6332 }, { "epoch": 12.059914407988588, "grad_norm": 0.21537581086158752, "learning_rate": 5.980946332168943e-05, "loss": 0.0896, "step": 6333 }, { "epoch": 12.061816452686639, "grad_norm": 0.11404319852590561, "learning_rate": 5.980311209907907e-05, "loss": 0.0468, "step": 6334 }, { "epoch": 12.063718497384688, "grad_norm": 0.1308046579360962, "learning_rate": 5.9796760876468725e-05, "loss": 0.0697, "step": 6335 }, { "epoch": 12.065620542082739, "grad_norm": 0.15974490344524384, "learning_rate": 5.979040965385837e-05, "loss": 0.0586, "step": 6336 }, { "epoch": 12.06752258678079, "grad_norm": 0.19112728536128998, "learning_rate": 5.9784058431248015e-05, "loss": 0.0471, "step": 6337 }, { "epoch": 12.06942463147884, "grad_norm": 0.1520841419696808, "learning_rate": 5.977770720863767e-05, "loss": 0.0383, "step": 6338 }, { "epoch": 12.07132667617689, "grad_norm": 0.2817116379737854, "learning_rate": 5.977135598602731e-05, "loss": 0.0721, "step": 6339 }, { "epoch": 12.07322872087494, "grad_norm": 0.13829343020915985, "learning_rate": 5.9765004763416964e-05, "loss": 0.06, "step": 6340 }, { "epoch": 12.075130765572991, "grad_norm": 0.1855231076478958, "learning_rate": 5.975865354080661e-05, "loss": 0.0683, "step": 6341 }, { "epoch": 12.077032810271042, "grad_norm": 0.19953955709934235, "learning_rate": 5.9752302318196254e-05, "loss": 0.0674, "step": 6342 }, { "epoch": 12.078934854969091, "grad_norm": 0.1148846447467804, "learning_rate": 5.9745951095585906e-05, "loss": 0.0501, "step": 6343 }, { "epoch": 12.080836899667142, "grad_norm": 0.1192585900425911, "learning_rate": 5.973959987297555e-05, "loss": 0.0706, "step": 6344 }, { "epoch": 12.082738944365193, "grad_norm": 0.20792797207832336, "learning_rate": 5.97332486503652e-05, "loss": 0.0679, "step": 6345 }, { "epoch": 12.084640989063242, "grad_norm": 0.30921128392219543, "learning_rate": 5.972689742775485e-05, "loss": 0.0922, "step": 6346 }, { "epoch": 12.086543033761293, "grad_norm": 0.18978790938854218, "learning_rate": 5.972054620514449e-05, "loss": 0.0555, "step": 6347 }, { "epoch": 12.088445078459344, "grad_norm": 0.156063511967659, "learning_rate": 5.9714194982534145e-05, "loss": 0.069, "step": 6348 }, { "epoch": 12.090347123157395, "grad_norm": 0.10468289256095886, "learning_rate": 5.970784375992379e-05, "loss": 0.0391, "step": 6349 }, { "epoch": 12.092249167855444, "grad_norm": 0.26243624091148376, "learning_rate": 5.970149253731343e-05, "loss": 0.0671, "step": 6350 }, { "epoch": 12.094151212553495, "grad_norm": 0.149452343583107, "learning_rate": 5.969514131470309e-05, "loss": 0.0625, "step": 6351 }, { "epoch": 12.096053257251546, "grad_norm": 0.15380904078483582, "learning_rate": 5.9688790092092725e-05, "loss": 0.0701, "step": 6352 }, { "epoch": 12.097955301949597, "grad_norm": 0.25332167744636536, "learning_rate": 5.9682438869482384e-05, "loss": 0.0597, "step": 6353 }, { "epoch": 12.099857346647646, "grad_norm": 0.20952828228473663, "learning_rate": 5.967608764687203e-05, "loss": 0.0715, "step": 6354 }, { "epoch": 12.101759391345697, "grad_norm": 0.19237349927425385, "learning_rate": 5.966973642426167e-05, "loss": 0.0607, "step": 6355 }, { "epoch": 12.103661436043748, "grad_norm": 0.1403336375951767, "learning_rate": 5.9663385201651326e-05, "loss": 0.0667, "step": 6356 }, { "epoch": 12.105563480741797, "grad_norm": 0.2567223310470581, "learning_rate": 5.9657033979040964e-05, "loss": 0.0587, "step": 6357 }, { "epoch": 12.107465525439848, "grad_norm": 0.15880762040615082, "learning_rate": 5.965068275643062e-05, "loss": 0.0588, "step": 6358 }, { "epoch": 12.109367570137898, "grad_norm": 0.2392554134130478, "learning_rate": 5.964433153382026e-05, "loss": 0.0647, "step": 6359 }, { "epoch": 12.11126961483595, "grad_norm": 0.22182698547840118, "learning_rate": 5.9637980311209906e-05, "loss": 0.0631, "step": 6360 }, { "epoch": 12.113171659533998, "grad_norm": 0.1435650885105133, "learning_rate": 5.963162908859956e-05, "loss": 0.0505, "step": 6361 }, { "epoch": 12.11507370423205, "grad_norm": 0.17061443626880646, "learning_rate": 5.96252778659892e-05, "loss": 0.0682, "step": 6362 }, { "epoch": 12.1169757489301, "grad_norm": 0.14622712135314941, "learning_rate": 5.961892664337886e-05, "loss": 0.0528, "step": 6363 }, { "epoch": 12.118877793628151, "grad_norm": 0.16312119364738464, "learning_rate": 5.96125754207685e-05, "loss": 0.058, "step": 6364 }, { "epoch": 12.1207798383262, "grad_norm": 0.13628754019737244, "learning_rate": 5.9606224198158145e-05, "loss": 0.0492, "step": 6365 }, { "epoch": 12.122681883024251, "grad_norm": 0.1798691302537918, "learning_rate": 5.9599872975547796e-05, "loss": 0.0829, "step": 6366 }, { "epoch": 12.124583927722302, "grad_norm": 0.3143334686756134, "learning_rate": 5.959352175293744e-05, "loss": 0.0772, "step": 6367 }, { "epoch": 12.126485972420351, "grad_norm": 0.1629781275987625, "learning_rate": 5.958717053032709e-05, "loss": 0.0538, "step": 6368 }, { "epoch": 12.128388017118402, "grad_norm": 0.15789663791656494, "learning_rate": 5.958081930771674e-05, "loss": 0.0534, "step": 6369 }, { "epoch": 12.130290061816453, "grad_norm": 0.30617648363113403, "learning_rate": 5.9574468085106384e-05, "loss": 0.107, "step": 6370 }, { "epoch": 12.132192106514504, "grad_norm": 0.24667520821094513, "learning_rate": 5.9568116862496035e-05, "loss": 0.0722, "step": 6371 }, { "epoch": 12.134094151212553, "grad_norm": 0.1449286937713623, "learning_rate": 5.956176563988568e-05, "loss": 0.088, "step": 6372 }, { "epoch": 12.135996195910604, "grad_norm": 0.13355699181556702, "learning_rate": 5.9555414417275325e-05, "loss": 0.066, "step": 6373 }, { "epoch": 12.137898240608655, "grad_norm": 0.20543327927589417, "learning_rate": 5.954906319466498e-05, "loss": 0.0674, "step": 6374 }, { "epoch": 12.139800285306706, "grad_norm": 0.18319639563560486, "learning_rate": 5.954271197205462e-05, "loss": 0.0745, "step": 6375 }, { "epoch": 12.141702330004755, "grad_norm": 0.18459048867225647, "learning_rate": 5.9536360749444274e-05, "loss": 0.064, "step": 6376 }, { "epoch": 12.143604374702806, "grad_norm": 0.16675764322280884, "learning_rate": 5.953000952683392e-05, "loss": 0.052, "step": 6377 }, { "epoch": 12.145506419400856, "grad_norm": 0.12792012095451355, "learning_rate": 5.9523658304223564e-05, "loss": 0.0523, "step": 6378 }, { "epoch": 12.147408464098906, "grad_norm": 0.1535326987504959, "learning_rate": 5.9517307081613216e-05, "loss": 0.0552, "step": 6379 }, { "epoch": 12.149310508796956, "grad_norm": 0.15143269300460815, "learning_rate": 5.951095585900286e-05, "loss": 0.06, "step": 6380 }, { "epoch": 12.151212553495007, "grad_norm": 0.20260484516620636, "learning_rate": 5.950460463639251e-05, "loss": 0.0664, "step": 6381 }, { "epoch": 12.153114598193058, "grad_norm": 0.19385959208011627, "learning_rate": 5.949825341378216e-05, "loss": 0.0573, "step": 6382 }, { "epoch": 12.155016642891107, "grad_norm": 0.23560649156570435, "learning_rate": 5.94919021911718e-05, "loss": 0.0446, "step": 6383 }, { "epoch": 12.156918687589158, "grad_norm": 0.20179638266563416, "learning_rate": 5.9485550968561455e-05, "loss": 0.0603, "step": 6384 }, { "epoch": 12.158820732287209, "grad_norm": 0.21200603246688843, "learning_rate": 5.94791997459511e-05, "loss": 0.0679, "step": 6385 }, { "epoch": 12.16072277698526, "grad_norm": 0.1812458634376526, "learning_rate": 5.947284852334074e-05, "loss": 0.0648, "step": 6386 }, { "epoch": 12.162624821683309, "grad_norm": 0.23138156533241272, "learning_rate": 5.94664973007304e-05, "loss": 0.076, "step": 6387 }, { "epoch": 12.16452686638136, "grad_norm": 0.12320508062839508, "learning_rate": 5.9460146078120035e-05, "loss": 0.0624, "step": 6388 }, { "epoch": 12.16642891107941, "grad_norm": 0.24851442873477936, "learning_rate": 5.9453794855509694e-05, "loss": 0.0432, "step": 6389 }, { "epoch": 12.16833095577746, "grad_norm": 0.14283230900764465, "learning_rate": 5.944744363289933e-05, "loss": 0.0561, "step": 6390 }, { "epoch": 12.17023300047551, "grad_norm": 0.3072549104690552, "learning_rate": 5.944109241028898e-05, "loss": 0.1046, "step": 6391 }, { "epoch": 12.172135045173562, "grad_norm": 0.1524524837732315, "learning_rate": 5.9434741187678636e-05, "loss": 0.0736, "step": 6392 }, { "epoch": 12.174037089871613, "grad_norm": 0.24835297465324402, "learning_rate": 5.9428389965068274e-05, "loss": 0.0675, "step": 6393 }, { "epoch": 12.175939134569662, "grad_norm": 0.19676776230335236, "learning_rate": 5.942203874245793e-05, "loss": 0.062, "step": 6394 }, { "epoch": 12.177841179267713, "grad_norm": 0.3479282855987549, "learning_rate": 5.941568751984757e-05, "loss": 0.085, "step": 6395 }, { "epoch": 12.179743223965763, "grad_norm": 0.3322353661060333, "learning_rate": 5.9409336297237216e-05, "loss": 0.0578, "step": 6396 }, { "epoch": 12.181645268663814, "grad_norm": 0.164231076836586, "learning_rate": 5.940298507462687e-05, "loss": 0.049, "step": 6397 }, { "epoch": 12.183547313361863, "grad_norm": 0.21767644584178925, "learning_rate": 5.939663385201651e-05, "loss": 0.0578, "step": 6398 }, { "epoch": 12.185449358059914, "grad_norm": 0.16830454766750336, "learning_rate": 5.939028262940617e-05, "loss": 0.0664, "step": 6399 }, { "epoch": 12.187351402757965, "grad_norm": 0.19520054757595062, "learning_rate": 5.938393140679581e-05, "loss": 0.0614, "step": 6400 }, { "epoch": 12.189253447456014, "grad_norm": 0.1620379537343979, "learning_rate": 5.9377580184185455e-05, "loss": 0.0724, "step": 6401 }, { "epoch": 12.191155492154065, "grad_norm": 0.1629243791103363, "learning_rate": 5.937122896157511e-05, "loss": 0.0479, "step": 6402 }, { "epoch": 12.193057536852116, "grad_norm": 0.13102157413959503, "learning_rate": 5.936487773896475e-05, "loss": 0.0568, "step": 6403 }, { "epoch": 12.194959581550167, "grad_norm": 0.30659785866737366, "learning_rate": 5.93585265163544e-05, "loss": 0.066, "step": 6404 }, { "epoch": 12.196861626248216, "grad_norm": 0.3091185986995697, "learning_rate": 5.935217529374405e-05, "loss": 0.0935, "step": 6405 }, { "epoch": 12.198763670946267, "grad_norm": 0.34564849734306335, "learning_rate": 5.9345824071133694e-05, "loss": 0.0775, "step": 6406 }, { "epoch": 12.200665715644318, "grad_norm": 0.21233195066452026, "learning_rate": 5.9339472848523346e-05, "loss": 0.0528, "step": 6407 }, { "epoch": 12.202567760342369, "grad_norm": 0.12436842918395996, "learning_rate": 5.933312162591299e-05, "loss": 0.0616, "step": 6408 }, { "epoch": 12.204469805040418, "grad_norm": 0.1318405121564865, "learning_rate": 5.9326770403302636e-05, "loss": 0.0406, "step": 6409 }, { "epoch": 12.206371849738469, "grad_norm": 0.16262254118919373, "learning_rate": 5.932041918069229e-05, "loss": 0.0573, "step": 6410 }, { "epoch": 12.20827389443652, "grad_norm": 0.26165643334388733, "learning_rate": 5.931406795808193e-05, "loss": 0.0639, "step": 6411 }, { "epoch": 12.210175939134569, "grad_norm": 0.1864023208618164, "learning_rate": 5.9307716735471585e-05, "loss": 0.0692, "step": 6412 }, { "epoch": 12.21207798383262, "grad_norm": 0.20596599578857422, "learning_rate": 5.930136551286123e-05, "loss": 0.0654, "step": 6413 }, { "epoch": 12.21398002853067, "grad_norm": 0.1777569204568863, "learning_rate": 5.9295014290250875e-05, "loss": 0.061, "step": 6414 }, { "epoch": 12.215882073228721, "grad_norm": 0.24012942612171173, "learning_rate": 5.928866306764053e-05, "loss": 0.0711, "step": 6415 }, { "epoch": 12.21778411792677, "grad_norm": 0.19146688282489777, "learning_rate": 5.928231184503017e-05, "loss": 0.0627, "step": 6416 }, { "epoch": 12.219686162624821, "grad_norm": 0.17938733100891113, "learning_rate": 5.9275960622419824e-05, "loss": 0.0672, "step": 6417 }, { "epoch": 12.221588207322872, "grad_norm": 0.1901213824748993, "learning_rate": 5.926960939980947e-05, "loss": 0.0539, "step": 6418 }, { "epoch": 12.223490252020923, "grad_norm": 0.22269144654273987, "learning_rate": 5.926325817719911e-05, "loss": 0.0457, "step": 6419 }, { "epoch": 12.225392296718972, "grad_norm": 0.2517926096916199, "learning_rate": 5.9256906954588766e-05, "loss": 0.0483, "step": 6420 }, { "epoch": 12.227294341417023, "grad_norm": 0.2087641805410385, "learning_rate": 5.925055573197841e-05, "loss": 0.0624, "step": 6421 }, { "epoch": 12.229196386115074, "grad_norm": 0.3165057599544525, "learning_rate": 5.924420450936805e-05, "loss": 0.0733, "step": 6422 }, { "epoch": 12.231098430813125, "grad_norm": 0.1193968653678894, "learning_rate": 5.923785328675771e-05, "loss": 0.0496, "step": 6423 }, { "epoch": 12.233000475511174, "grad_norm": 0.21130363643169403, "learning_rate": 5.9231502064147346e-05, "loss": 0.0672, "step": 6424 }, { "epoch": 12.234902520209225, "grad_norm": 0.1780078411102295, "learning_rate": 5.9225150841537004e-05, "loss": 0.0556, "step": 6425 }, { "epoch": 12.236804564907276, "grad_norm": 0.1848141849040985, "learning_rate": 5.921879961892664e-05, "loss": 0.0528, "step": 6426 }, { "epoch": 12.238706609605325, "grad_norm": 0.2436516433954239, "learning_rate": 5.921244839631629e-05, "loss": 0.0728, "step": 6427 }, { "epoch": 12.240608654303376, "grad_norm": 0.14385049045085907, "learning_rate": 5.9206097173705946e-05, "loss": 0.0684, "step": 6428 }, { "epoch": 12.242510699001427, "grad_norm": 0.10670439153909683, "learning_rate": 5.9199745951095585e-05, "loss": 0.0511, "step": 6429 }, { "epoch": 12.244412743699478, "grad_norm": 0.13781152665615082, "learning_rate": 5.919339472848524e-05, "loss": 0.0619, "step": 6430 }, { "epoch": 12.246314788397527, "grad_norm": 0.18462249636650085, "learning_rate": 5.918704350587488e-05, "loss": 0.0663, "step": 6431 }, { "epoch": 12.248216833095578, "grad_norm": 0.17309071123600006, "learning_rate": 5.918069228326453e-05, "loss": 0.0647, "step": 6432 }, { "epoch": 12.250118877793629, "grad_norm": 0.11195661127567291, "learning_rate": 5.917434106065418e-05, "loss": 0.0505, "step": 6433 }, { "epoch": 12.25202092249168, "grad_norm": 0.21312302350997925, "learning_rate": 5.9167989838043824e-05, "loss": 0.0622, "step": 6434 }, { "epoch": 12.253922967189729, "grad_norm": 0.14799503982067108, "learning_rate": 5.916163861543348e-05, "loss": 0.0512, "step": 6435 }, { "epoch": 12.25582501188778, "grad_norm": 0.24153608083724976, "learning_rate": 5.915528739282312e-05, "loss": 0.0662, "step": 6436 }, { "epoch": 12.25772705658583, "grad_norm": 0.1948433816432953, "learning_rate": 5.9148936170212766e-05, "loss": 0.0518, "step": 6437 }, { "epoch": 12.25962910128388, "grad_norm": 0.11079234629869461, "learning_rate": 5.914258494760242e-05, "loss": 0.0561, "step": 6438 }, { "epoch": 12.26153114598193, "grad_norm": 0.23933576047420502, "learning_rate": 5.913623372499206e-05, "loss": 0.0755, "step": 6439 }, { "epoch": 12.263433190679981, "grad_norm": 0.2561612129211426, "learning_rate": 5.912988250238171e-05, "loss": 0.0776, "step": 6440 }, { "epoch": 12.265335235378032, "grad_norm": 0.19443702697753906, "learning_rate": 5.912353127977136e-05, "loss": 0.0593, "step": 6441 }, { "epoch": 12.267237280076081, "grad_norm": 0.2021740823984146, "learning_rate": 5.9117180057161004e-05, "loss": 0.0532, "step": 6442 }, { "epoch": 12.269139324774132, "grad_norm": 0.3714454472064972, "learning_rate": 5.9110828834550656e-05, "loss": 0.095, "step": 6443 }, { "epoch": 12.271041369472183, "grad_norm": 0.1737510859966278, "learning_rate": 5.91044776119403e-05, "loss": 0.061, "step": 6444 }, { "epoch": 12.272943414170234, "grad_norm": 0.19303299486637115, "learning_rate": 5.9098126389329946e-05, "loss": 0.056, "step": 6445 }, { "epoch": 12.274845458868283, "grad_norm": 0.23309148848056793, "learning_rate": 5.90917751667196e-05, "loss": 0.0765, "step": 6446 }, { "epoch": 12.276747503566334, "grad_norm": 0.1525864601135254, "learning_rate": 5.908542394410924e-05, "loss": 0.053, "step": 6447 }, { "epoch": 12.278649548264385, "grad_norm": 0.18437476456165314, "learning_rate": 5.9079072721498895e-05, "loss": 0.0576, "step": 6448 }, { "epoch": 12.280551592962434, "grad_norm": 0.200849249958992, "learning_rate": 5.907272149888854e-05, "loss": 0.0605, "step": 6449 }, { "epoch": 12.282453637660485, "grad_norm": 0.16408275067806244, "learning_rate": 5.9066370276278185e-05, "loss": 0.0857, "step": 6450 }, { "epoch": 12.284355682358536, "grad_norm": 0.19141173362731934, "learning_rate": 5.906001905366784e-05, "loss": 0.0515, "step": 6451 }, { "epoch": 12.286257727056586, "grad_norm": 0.1545351892709732, "learning_rate": 5.905366783105748e-05, "loss": 0.0498, "step": 6452 }, { "epoch": 12.288159771754636, "grad_norm": 0.1542082577943802, "learning_rate": 5.9047316608447134e-05, "loss": 0.0617, "step": 6453 }, { "epoch": 12.290061816452686, "grad_norm": 0.2996247112751007, "learning_rate": 5.904096538583678e-05, "loss": 0.0759, "step": 6454 }, { "epoch": 12.291963861150737, "grad_norm": 0.09020984172821045, "learning_rate": 5.903461416322642e-05, "loss": 0.046, "step": 6455 }, { "epoch": 12.293865905848788, "grad_norm": 0.1882646679878235, "learning_rate": 5.9028262940616076e-05, "loss": 0.046, "step": 6456 }, { "epoch": 12.295767950546837, "grad_norm": 0.17779584228992462, "learning_rate": 5.902191171800572e-05, "loss": 0.0586, "step": 6457 }, { "epoch": 12.297669995244888, "grad_norm": 0.21012863516807556, "learning_rate": 5.901556049539536e-05, "loss": 0.0503, "step": 6458 }, { "epoch": 12.29957203994294, "grad_norm": 0.2423000931739807, "learning_rate": 5.900920927278502e-05, "loss": 0.0584, "step": 6459 }, { "epoch": 12.301474084640988, "grad_norm": 0.2120276242494583, "learning_rate": 5.9002858050174656e-05, "loss": 0.067, "step": 6460 }, { "epoch": 12.30337612933904, "grad_norm": 0.20125135779380798, "learning_rate": 5.8996506827564315e-05, "loss": 0.0686, "step": 6461 }, { "epoch": 12.30527817403709, "grad_norm": 0.13728126883506775, "learning_rate": 5.899015560495395e-05, "loss": 0.0547, "step": 6462 }, { "epoch": 12.307180218735141, "grad_norm": 0.1871999353170395, "learning_rate": 5.89838043823436e-05, "loss": 0.0634, "step": 6463 }, { "epoch": 12.30908226343319, "grad_norm": 0.38858938217163086, "learning_rate": 5.897745315973326e-05, "loss": 0.1177, "step": 6464 }, { "epoch": 12.310984308131241, "grad_norm": 0.1638013869524002, "learning_rate": 5.8971101937122895e-05, "loss": 0.1012, "step": 6465 }, { "epoch": 12.312886352829292, "grad_norm": 0.2869760990142822, "learning_rate": 5.8964750714512554e-05, "loss": 0.1207, "step": 6466 }, { "epoch": 12.314788397527343, "grad_norm": 0.15024712681770325, "learning_rate": 5.895839949190219e-05, "loss": 0.059, "step": 6467 }, { "epoch": 12.316690442225392, "grad_norm": 0.21937395632266998, "learning_rate": 5.895204826929184e-05, "loss": 0.0839, "step": 6468 }, { "epoch": 12.318592486923443, "grad_norm": 0.0890878215432167, "learning_rate": 5.894569704668149e-05, "loss": 0.0606, "step": 6469 }, { "epoch": 12.320494531621494, "grad_norm": 0.1961527168750763, "learning_rate": 5.8939345824071134e-05, "loss": 0.0759, "step": 6470 }, { "epoch": 12.322396576319543, "grad_norm": 0.14583544433116913, "learning_rate": 5.8932994601460786e-05, "loss": 0.0622, "step": 6471 }, { "epoch": 12.324298621017594, "grad_norm": 0.2313392609357834, "learning_rate": 5.892664337885043e-05, "loss": 0.0693, "step": 6472 }, { "epoch": 12.326200665715644, "grad_norm": 0.16019153594970703, "learning_rate": 5.8920292156240076e-05, "loss": 0.0538, "step": 6473 }, { "epoch": 12.328102710413695, "grad_norm": 0.22677358984947205, "learning_rate": 5.891394093362973e-05, "loss": 0.1244, "step": 6474 }, { "epoch": 12.330004755111744, "grad_norm": 0.20695747435092926, "learning_rate": 5.890758971101937e-05, "loss": 0.066, "step": 6475 }, { "epoch": 12.331906799809795, "grad_norm": 0.1801384836435318, "learning_rate": 5.890123848840902e-05, "loss": 0.0492, "step": 6476 }, { "epoch": 12.333808844507846, "grad_norm": 0.23628447949886322, "learning_rate": 5.889488726579867e-05, "loss": 0.0686, "step": 6477 }, { "epoch": 12.335710889205897, "grad_norm": 0.1047058179974556, "learning_rate": 5.8888536043188315e-05, "loss": 0.0622, "step": 6478 }, { "epoch": 12.337612933903946, "grad_norm": 0.19160684943199158, "learning_rate": 5.888218482057797e-05, "loss": 0.0791, "step": 6479 }, { "epoch": 12.339514978601997, "grad_norm": 0.19324207305908203, "learning_rate": 5.887583359796761e-05, "loss": 0.0835, "step": 6480 }, { "epoch": 12.341417023300048, "grad_norm": 0.18310590088367462, "learning_rate": 5.886948237535726e-05, "loss": 0.0604, "step": 6481 }, { "epoch": 12.343319067998099, "grad_norm": 0.2560455799102783, "learning_rate": 5.886313115274691e-05, "loss": 0.0815, "step": 6482 }, { "epoch": 12.345221112696148, "grad_norm": 0.37622329592704773, "learning_rate": 5.8856779930136554e-05, "loss": 0.0725, "step": 6483 }, { "epoch": 12.347123157394199, "grad_norm": 0.14701174199581146, "learning_rate": 5.8850428707526206e-05, "loss": 0.0511, "step": 6484 }, { "epoch": 12.34902520209225, "grad_norm": 0.22057802975177765, "learning_rate": 5.884407748491585e-05, "loss": 0.0583, "step": 6485 }, { "epoch": 12.350927246790299, "grad_norm": 0.16882608830928802, "learning_rate": 5.8837726262305496e-05, "loss": 0.0721, "step": 6486 }, { "epoch": 12.35282929148835, "grad_norm": 0.21905134618282318, "learning_rate": 5.883137503969515e-05, "loss": 0.0457, "step": 6487 }, { "epoch": 12.3547313361864, "grad_norm": 0.28024494647979736, "learning_rate": 5.882502381708479e-05, "loss": 0.0851, "step": 6488 }, { "epoch": 12.356633380884452, "grad_norm": 0.12102214246988297, "learning_rate": 5.8818672594474444e-05, "loss": 0.0646, "step": 6489 }, { "epoch": 12.3585354255825, "grad_norm": 0.21057242155075073, "learning_rate": 5.881232137186409e-05, "loss": 0.0577, "step": 6490 }, { "epoch": 12.360437470280552, "grad_norm": 0.20247074961662292, "learning_rate": 5.880597014925373e-05, "loss": 0.0555, "step": 6491 }, { "epoch": 12.362339514978602, "grad_norm": 0.36881357431411743, "learning_rate": 5.8799618926643386e-05, "loss": 0.0832, "step": 6492 }, { "epoch": 12.364241559676653, "grad_norm": 0.19223707914352417, "learning_rate": 5.879326770403303e-05, "loss": 0.0571, "step": 6493 }, { "epoch": 12.366143604374702, "grad_norm": 0.14942830801010132, "learning_rate": 5.878691648142267e-05, "loss": 0.0565, "step": 6494 }, { "epoch": 12.368045649072753, "grad_norm": 0.14054648578166962, "learning_rate": 5.878056525881233e-05, "loss": 0.0504, "step": 6495 }, { "epoch": 12.369947693770804, "grad_norm": 0.22053012251853943, "learning_rate": 5.877421403620197e-05, "loss": 0.0552, "step": 6496 }, { "epoch": 12.371849738468853, "grad_norm": 0.23704233765602112, "learning_rate": 5.8767862813591625e-05, "loss": 0.0667, "step": 6497 }, { "epoch": 12.373751783166904, "grad_norm": 0.17568421363830566, "learning_rate": 5.8761511590981264e-05, "loss": 0.0699, "step": 6498 }, { "epoch": 12.375653827864955, "grad_norm": 0.27614712715148926, "learning_rate": 5.875516036837091e-05, "loss": 0.0682, "step": 6499 }, { "epoch": 12.377555872563006, "grad_norm": 0.2671426832675934, "learning_rate": 5.874880914576056e-05, "loss": 0.0776, "step": 6500 }, { "epoch": 12.379457917261055, "grad_norm": 0.19971761107444763, "learning_rate": 5.8742457923150206e-05, "loss": 0.0601, "step": 6501 }, { "epoch": 12.381359961959106, "grad_norm": 0.15626168251037598, "learning_rate": 5.8736106700539864e-05, "loss": 0.0683, "step": 6502 }, { "epoch": 12.383262006657157, "grad_norm": 0.1768084019422531, "learning_rate": 5.87297554779295e-05, "loss": 0.061, "step": 6503 }, { "epoch": 12.385164051355208, "grad_norm": 0.17245520651340485, "learning_rate": 5.872340425531915e-05, "loss": 0.0494, "step": 6504 }, { "epoch": 12.387066096053257, "grad_norm": 0.14765174686908722, "learning_rate": 5.87170530327088e-05, "loss": 0.1017, "step": 6505 }, { "epoch": 12.388968140751308, "grad_norm": 0.21613261103630066, "learning_rate": 5.8710701810098444e-05, "loss": 0.0665, "step": 6506 }, { "epoch": 12.390870185449359, "grad_norm": 0.10022646188735962, "learning_rate": 5.8704350587488096e-05, "loss": 0.0818, "step": 6507 }, { "epoch": 12.392772230147408, "grad_norm": 0.1353398710489273, "learning_rate": 5.869799936487774e-05, "loss": 0.0521, "step": 6508 }, { "epoch": 12.394674274845459, "grad_norm": 0.291027307510376, "learning_rate": 5.8691648142267386e-05, "loss": 0.0542, "step": 6509 }, { "epoch": 12.39657631954351, "grad_norm": 0.2245619297027588, "learning_rate": 5.868529691965704e-05, "loss": 0.0711, "step": 6510 }, { "epoch": 12.39847836424156, "grad_norm": 0.11868350952863693, "learning_rate": 5.867894569704668e-05, "loss": 0.0782, "step": 6511 }, { "epoch": 12.40038040893961, "grad_norm": 0.24280130863189697, "learning_rate": 5.867259447443633e-05, "loss": 0.0843, "step": 6512 }, { "epoch": 12.40228245363766, "grad_norm": 0.16018423438072205, "learning_rate": 5.866624325182598e-05, "loss": 0.0559, "step": 6513 }, { "epoch": 12.404184498335711, "grad_norm": 0.25252264738082886, "learning_rate": 5.8659892029215625e-05, "loss": 0.0612, "step": 6514 }, { "epoch": 12.406086543033762, "grad_norm": 0.19043385982513428, "learning_rate": 5.865354080660528e-05, "loss": 0.0699, "step": 6515 }, { "epoch": 12.407988587731811, "grad_norm": 0.16969317197799683, "learning_rate": 5.864718958399492e-05, "loss": 0.0805, "step": 6516 }, { "epoch": 12.409890632429862, "grad_norm": 0.20662227272987366, "learning_rate": 5.864083836138457e-05, "loss": 0.0657, "step": 6517 }, { "epoch": 12.411792677127913, "grad_norm": 0.1850903332233429, "learning_rate": 5.863448713877422e-05, "loss": 0.0698, "step": 6518 }, { "epoch": 12.413694721825962, "grad_norm": 0.2603982388973236, "learning_rate": 5.8628135916163864e-05, "loss": 0.0693, "step": 6519 }, { "epoch": 12.415596766524013, "grad_norm": 0.25423145294189453, "learning_rate": 5.8621784693553516e-05, "loss": 0.0576, "step": 6520 }, { "epoch": 12.417498811222064, "grad_norm": 0.24416786432266235, "learning_rate": 5.861543347094316e-05, "loss": 0.0623, "step": 6521 }, { "epoch": 12.419400855920115, "grad_norm": 0.11997820436954498, "learning_rate": 5.8609082248332806e-05, "loss": 0.0576, "step": 6522 }, { "epoch": 12.421302900618164, "grad_norm": 0.20214514434337616, "learning_rate": 5.860273102572246e-05, "loss": 0.0598, "step": 6523 }, { "epoch": 12.423204945316215, "grad_norm": 0.21587751805782318, "learning_rate": 5.85963798031121e-05, "loss": 0.0655, "step": 6524 }, { "epoch": 12.425106990014266, "grad_norm": 0.11338116228580475, "learning_rate": 5.8590028580501755e-05, "loss": 0.0439, "step": 6525 }, { "epoch": 12.427009034712317, "grad_norm": 0.26029443740844727, "learning_rate": 5.85836773578914e-05, "loss": 0.0662, "step": 6526 }, { "epoch": 12.428911079410366, "grad_norm": 0.19062893092632294, "learning_rate": 5.857732613528104e-05, "loss": 0.084, "step": 6527 }, { "epoch": 12.430813124108417, "grad_norm": 0.16516171395778656, "learning_rate": 5.85709749126707e-05, "loss": 0.0629, "step": 6528 }, { "epoch": 12.432715168806467, "grad_norm": 0.20229387283325195, "learning_rate": 5.8564623690060335e-05, "loss": 0.0525, "step": 6529 }, { "epoch": 12.434617213504517, "grad_norm": 0.16246093809604645, "learning_rate": 5.855827246744998e-05, "loss": 0.0539, "step": 6530 }, { "epoch": 12.436519258202567, "grad_norm": 0.20271427929401398, "learning_rate": 5.855192124483964e-05, "loss": 0.0703, "step": 6531 }, { "epoch": 12.438421302900618, "grad_norm": 0.38267767429351807, "learning_rate": 5.854557002222928e-05, "loss": 0.0817, "step": 6532 }, { "epoch": 12.44032334759867, "grad_norm": 0.17112848162651062, "learning_rate": 5.8539218799618936e-05, "loss": 0.065, "step": 6533 }, { "epoch": 12.442225392296718, "grad_norm": 0.29853564500808716, "learning_rate": 5.8532867577008574e-05, "loss": 0.0604, "step": 6534 }, { "epoch": 12.44412743699477, "grad_norm": 0.1856275498867035, "learning_rate": 5.852651635439822e-05, "loss": 0.0523, "step": 6535 }, { "epoch": 12.44602948169282, "grad_norm": 0.2045893520116806, "learning_rate": 5.852016513178787e-05, "loss": 0.0572, "step": 6536 }, { "epoch": 12.447931526390871, "grad_norm": 0.1976349800825119, "learning_rate": 5.8513813909177516e-05, "loss": 0.0583, "step": 6537 }, { "epoch": 12.44983357108892, "grad_norm": 0.2721671164035797, "learning_rate": 5.8507462686567175e-05, "loss": 0.059, "step": 6538 }, { "epoch": 12.451735615786971, "grad_norm": 0.22028490900993347, "learning_rate": 5.850111146395681e-05, "loss": 0.0497, "step": 6539 }, { "epoch": 12.453637660485022, "grad_norm": 0.18933860957622528, "learning_rate": 5.849476024134646e-05, "loss": 0.0444, "step": 6540 }, { "epoch": 12.455539705183071, "grad_norm": 0.13993880152702332, "learning_rate": 5.848840901873611e-05, "loss": 0.046, "step": 6541 }, { "epoch": 12.457441749881122, "grad_norm": 0.1706465780735016, "learning_rate": 5.8482057796125755e-05, "loss": 0.0635, "step": 6542 }, { "epoch": 12.459343794579173, "grad_norm": 0.10777102410793304, "learning_rate": 5.847570657351541e-05, "loss": 0.0568, "step": 6543 }, { "epoch": 12.461245839277224, "grad_norm": 0.2629159390926361, "learning_rate": 5.846935535090505e-05, "loss": 0.0549, "step": 6544 }, { "epoch": 12.463147883975273, "grad_norm": 0.2961212992668152, "learning_rate": 5.84630041282947e-05, "loss": 0.0595, "step": 6545 }, { "epoch": 12.465049928673324, "grad_norm": 0.22317998111248016, "learning_rate": 5.845665290568435e-05, "loss": 0.0729, "step": 6546 }, { "epoch": 12.466951973371375, "grad_norm": 0.24215160310268402, "learning_rate": 5.8450301683073994e-05, "loss": 0.0706, "step": 6547 }, { "epoch": 12.468854018069425, "grad_norm": 0.22650720179080963, "learning_rate": 5.844395046046364e-05, "loss": 0.0717, "step": 6548 }, { "epoch": 12.470756062767475, "grad_norm": 0.1432311236858368, "learning_rate": 5.843759923785329e-05, "loss": 0.0609, "step": 6549 }, { "epoch": 12.472658107465525, "grad_norm": 0.2215825915336609, "learning_rate": 5.8431248015242936e-05, "loss": 0.0709, "step": 6550 }, { "epoch": 12.474560152163576, "grad_norm": 0.31182411313056946, "learning_rate": 5.842489679263259e-05, "loss": 0.0952, "step": 6551 }, { "epoch": 12.476462196861625, "grad_norm": 0.1482132524251938, "learning_rate": 5.841854557002223e-05, "loss": 0.0996, "step": 6552 }, { "epoch": 12.478364241559676, "grad_norm": 0.17739084362983704, "learning_rate": 5.841219434741188e-05, "loss": 0.07, "step": 6553 }, { "epoch": 12.480266286257727, "grad_norm": 0.2121700793504715, "learning_rate": 5.840584312480153e-05, "loss": 0.0768, "step": 6554 }, { "epoch": 12.482168330955778, "grad_norm": 0.1372910887002945, "learning_rate": 5.8399491902191175e-05, "loss": 0.053, "step": 6555 }, { "epoch": 12.484070375653827, "grad_norm": 0.1774810403585434, "learning_rate": 5.8393140679580826e-05, "loss": 0.0661, "step": 6556 }, { "epoch": 12.485972420351878, "grad_norm": 0.25764548778533936, "learning_rate": 5.838678945697047e-05, "loss": 0.0706, "step": 6557 }, { "epoch": 12.487874465049929, "grad_norm": 0.16082552075386047, "learning_rate": 5.838043823436011e-05, "loss": 0.0528, "step": 6558 }, { "epoch": 12.48977650974798, "grad_norm": 0.27846723794937134, "learning_rate": 5.837408701174977e-05, "loss": 0.0806, "step": 6559 }, { "epoch": 12.491678554446029, "grad_norm": 0.1597774624824524, "learning_rate": 5.8367735789139413e-05, "loss": 0.0466, "step": 6560 }, { "epoch": 12.49358059914408, "grad_norm": 0.18083947896957397, "learning_rate": 5.8361384566529065e-05, "loss": 0.0633, "step": 6561 }, { "epoch": 12.49548264384213, "grad_norm": 0.14837045967578888, "learning_rate": 5.835503334391871e-05, "loss": 0.0658, "step": 6562 }, { "epoch": 12.49738468854018, "grad_norm": 0.20318514108657837, "learning_rate": 5.834868212130835e-05, "loss": 0.0727, "step": 6563 }, { "epoch": 12.49928673323823, "grad_norm": 0.23750798404216766, "learning_rate": 5.834233089869801e-05, "loss": 0.066, "step": 6564 }, { "epoch": 12.501188777936282, "grad_norm": 0.1972920298576355, "learning_rate": 5.8335979676087646e-05, "loss": 0.063, "step": 6565 }, { "epoch": 12.503090822634332, "grad_norm": 0.1612776815891266, "learning_rate": 5.832962845347729e-05, "loss": 0.0388, "step": 6566 }, { "epoch": 12.504992867332382, "grad_norm": 0.22913222014904022, "learning_rate": 5.832327723086695e-05, "loss": 0.0666, "step": 6567 }, { "epoch": 12.506894912030432, "grad_norm": 0.1786653995513916, "learning_rate": 5.831692600825659e-05, "loss": 0.0828, "step": 6568 }, { "epoch": 12.508796956728483, "grad_norm": 0.16014467179775238, "learning_rate": 5.8310574785646246e-05, "loss": 0.0473, "step": 6569 }, { "epoch": 12.510699001426534, "grad_norm": 0.1647956818342209, "learning_rate": 5.8304223563035884e-05, "loss": 0.0628, "step": 6570 }, { "epoch": 12.512601046124583, "grad_norm": 0.3335977792739868, "learning_rate": 5.829787234042553e-05, "loss": 0.0541, "step": 6571 }, { "epoch": 12.514503090822634, "grad_norm": 0.19307781755924225, "learning_rate": 5.829152111781518e-05, "loss": 0.054, "step": 6572 }, { "epoch": 12.516405135520685, "grad_norm": 0.20400182902812958, "learning_rate": 5.8285169895204826e-05, "loss": 0.0475, "step": 6573 }, { "epoch": 12.518307180218734, "grad_norm": 0.2718433737754822, "learning_rate": 5.8278818672594485e-05, "loss": 0.071, "step": 6574 }, { "epoch": 12.520209224916785, "grad_norm": 0.26126694679260254, "learning_rate": 5.827246744998412e-05, "loss": 0.1035, "step": 6575 }, { "epoch": 12.522111269614836, "grad_norm": 0.2177870124578476, "learning_rate": 5.826611622737377e-05, "loss": 0.0665, "step": 6576 }, { "epoch": 12.524013314312887, "grad_norm": 0.2201864868402481, "learning_rate": 5.825976500476342e-05, "loss": 0.0589, "step": 6577 }, { "epoch": 12.525915359010936, "grad_norm": 0.264030396938324, "learning_rate": 5.8253413782153065e-05, "loss": 0.0553, "step": 6578 }, { "epoch": 12.527817403708987, "grad_norm": 0.24069593846797943, "learning_rate": 5.824706255954272e-05, "loss": 0.0669, "step": 6579 }, { "epoch": 12.529719448407038, "grad_norm": 0.16773609817028046, "learning_rate": 5.824071133693236e-05, "loss": 0.0736, "step": 6580 }, { "epoch": 12.531621493105089, "grad_norm": 0.2015453428030014, "learning_rate": 5.823436011432201e-05, "loss": 0.0606, "step": 6581 }, { "epoch": 12.533523537803138, "grad_norm": 0.17908424139022827, "learning_rate": 5.822800889171166e-05, "loss": 0.0692, "step": 6582 }, { "epoch": 12.535425582501189, "grad_norm": 0.11076918244361877, "learning_rate": 5.8221657669101304e-05, "loss": 0.0622, "step": 6583 }, { "epoch": 12.53732762719924, "grad_norm": 0.24794691801071167, "learning_rate": 5.821530644649095e-05, "loss": 0.0702, "step": 6584 }, { "epoch": 12.539229671897289, "grad_norm": 0.15396280586719513, "learning_rate": 5.82089552238806e-05, "loss": 0.0575, "step": 6585 }, { "epoch": 12.54113171659534, "grad_norm": 0.2552478611469269, "learning_rate": 5.8202604001270246e-05, "loss": 0.057, "step": 6586 }, { "epoch": 12.54303376129339, "grad_norm": 0.14009839296340942, "learning_rate": 5.81962527786599e-05, "loss": 0.0487, "step": 6587 }, { "epoch": 12.544935805991441, "grad_norm": 0.2083524912595749, "learning_rate": 5.818990155604954e-05, "loss": 0.0589, "step": 6588 }, { "epoch": 12.54683785068949, "grad_norm": 0.24637573957443237, "learning_rate": 5.818355033343919e-05, "loss": 0.0678, "step": 6589 }, { "epoch": 12.548739895387541, "grad_norm": 0.23935122787952423, "learning_rate": 5.817719911082884e-05, "loss": 0.0725, "step": 6590 }, { "epoch": 12.550641940085592, "grad_norm": 0.1685015857219696, "learning_rate": 5.8170847888218485e-05, "loss": 0.0625, "step": 6591 }, { "epoch": 12.552543984783643, "grad_norm": 0.26168474555015564, "learning_rate": 5.816449666560814e-05, "loss": 0.0683, "step": 6592 }, { "epoch": 12.554446029481692, "grad_norm": 0.2873637080192566, "learning_rate": 5.815814544299778e-05, "loss": 0.0743, "step": 6593 }, { "epoch": 12.556348074179743, "grad_norm": 0.1569848358631134, "learning_rate": 5.815179422038742e-05, "loss": 0.0919, "step": 6594 }, { "epoch": 12.558250118877794, "grad_norm": 0.18452291190624237, "learning_rate": 5.814544299777708e-05, "loss": 0.0589, "step": 6595 }, { "epoch": 12.560152163575845, "grad_norm": 0.1471710354089737, "learning_rate": 5.8139091775166724e-05, "loss": 0.0641, "step": 6596 }, { "epoch": 12.562054208273894, "grad_norm": 0.1817297339439392, "learning_rate": 5.8132740552556376e-05, "loss": 0.1083, "step": 6597 }, { "epoch": 12.563956252971945, "grad_norm": 0.21600444614887238, "learning_rate": 5.812638932994602e-05, "loss": 0.0774, "step": 6598 }, { "epoch": 12.565858297669996, "grad_norm": 0.1959545612335205, "learning_rate": 5.812003810733566e-05, "loss": 0.0751, "step": 6599 }, { "epoch": 12.567760342368045, "grad_norm": 0.23535875976085663, "learning_rate": 5.811368688472532e-05, "loss": 0.0791, "step": 6600 }, { "epoch": 12.569662387066096, "grad_norm": 0.2223137617111206, "learning_rate": 5.8107335662114956e-05, "loss": 0.0687, "step": 6601 }, { "epoch": 12.571564431764147, "grad_norm": 0.258556067943573, "learning_rate": 5.81009844395046e-05, "loss": 0.1151, "step": 6602 }, { "epoch": 12.573466476462198, "grad_norm": 0.15880589187145233, "learning_rate": 5.809463321689426e-05, "loss": 0.0607, "step": 6603 }, { "epoch": 12.575368521160247, "grad_norm": 0.2969416379928589, "learning_rate": 5.80882819942839e-05, "loss": 0.0591, "step": 6604 }, { "epoch": 12.577270565858298, "grad_norm": 0.2408699095249176, "learning_rate": 5.808193077167356e-05, "loss": 0.0465, "step": 6605 }, { "epoch": 12.579172610556348, "grad_norm": 0.14702194929122925, "learning_rate": 5.8075579549063195e-05, "loss": 0.0376, "step": 6606 }, { "epoch": 12.5810746552544, "grad_norm": 0.2562587559223175, "learning_rate": 5.806922832645284e-05, "loss": 0.0612, "step": 6607 }, { "epoch": 12.582976699952448, "grad_norm": 0.17348787188529968, "learning_rate": 5.806287710384249e-05, "loss": 0.0527, "step": 6608 }, { "epoch": 12.5848787446505, "grad_norm": 0.3318137526512146, "learning_rate": 5.805652588123214e-05, "loss": 0.0837, "step": 6609 }, { "epoch": 12.58678078934855, "grad_norm": 0.16464082896709442, "learning_rate": 5.805017465862179e-05, "loss": 0.0611, "step": 6610 }, { "epoch": 12.5886828340466, "grad_norm": 0.18480993807315826, "learning_rate": 5.8043823436011434e-05, "loss": 0.0562, "step": 6611 }, { "epoch": 12.59058487874465, "grad_norm": 0.2223837673664093, "learning_rate": 5.803747221340108e-05, "loss": 0.0498, "step": 6612 }, { "epoch": 12.592486923442701, "grad_norm": 0.242923304438591, "learning_rate": 5.803112099079073e-05, "loss": 0.0685, "step": 6613 }, { "epoch": 12.594388968140752, "grad_norm": 0.2212572693824768, "learning_rate": 5.8024769768180376e-05, "loss": 0.0659, "step": 6614 }, { "epoch": 12.596291012838801, "grad_norm": 0.13670332729816437, "learning_rate": 5.801841854557003e-05, "loss": 0.0418, "step": 6615 }, { "epoch": 12.598193057536852, "grad_norm": 0.13037995994091034, "learning_rate": 5.801206732295967e-05, "loss": 0.0492, "step": 6616 }, { "epoch": 12.600095102234903, "grad_norm": 0.21356116235256195, "learning_rate": 5.800571610034932e-05, "loss": 0.0579, "step": 6617 }, { "epoch": 12.601997146932954, "grad_norm": 0.20228992402553558, "learning_rate": 5.799936487773897e-05, "loss": 0.0575, "step": 6618 }, { "epoch": 12.603899191631003, "grad_norm": 0.24074339866638184, "learning_rate": 5.7993013655128615e-05, "loss": 0.0601, "step": 6619 }, { "epoch": 12.605801236329054, "grad_norm": 0.18884117901325226, "learning_rate": 5.798666243251826e-05, "loss": 0.0941, "step": 6620 }, { "epoch": 12.607703281027105, "grad_norm": 0.29533296823501587, "learning_rate": 5.798031120990791e-05, "loss": 0.0597, "step": 6621 }, { "epoch": 12.609605325725155, "grad_norm": 0.3215848207473755, "learning_rate": 5.797395998729756e-05, "loss": 0.076, "step": 6622 }, { "epoch": 12.611507370423205, "grad_norm": 0.20919732749462128, "learning_rate": 5.796760876468721e-05, "loss": 0.0765, "step": 6623 }, { "epoch": 12.613409415121255, "grad_norm": 0.1770048439502716, "learning_rate": 5.7961257542076854e-05, "loss": 0.0747, "step": 6624 }, { "epoch": 12.615311459819306, "grad_norm": 0.17611941695213318, "learning_rate": 5.79549063194665e-05, "loss": 0.0725, "step": 6625 }, { "epoch": 12.617213504517355, "grad_norm": 0.21046479046344757, "learning_rate": 5.794855509685615e-05, "loss": 0.0916, "step": 6626 }, { "epoch": 12.619115549215406, "grad_norm": 0.11053504049777985, "learning_rate": 5.7942203874245796e-05, "loss": 0.0519, "step": 6627 }, { "epoch": 12.621017593913457, "grad_norm": 0.0993650034070015, "learning_rate": 5.793585265163545e-05, "loss": 0.0418, "step": 6628 }, { "epoch": 12.622919638611508, "grad_norm": 0.19049400091171265, "learning_rate": 5.792950142902509e-05, "loss": 0.0824, "step": 6629 }, { "epoch": 12.624821683309557, "grad_norm": 0.21655261516571045, "learning_rate": 5.792315020641473e-05, "loss": 0.0589, "step": 6630 }, { "epoch": 12.626723728007608, "grad_norm": 0.12360772490501404, "learning_rate": 5.791679898380439e-05, "loss": 0.0578, "step": 6631 }, { "epoch": 12.628625772705659, "grad_norm": 0.16151952743530273, "learning_rate": 5.7910447761194034e-05, "loss": 0.0813, "step": 6632 }, { "epoch": 12.63052781740371, "grad_norm": 0.25301963090896606, "learning_rate": 5.7904096538583686e-05, "loss": 0.0563, "step": 6633 }, { "epoch": 12.632429862101759, "grad_norm": 0.2590464651584625, "learning_rate": 5.789774531597333e-05, "loss": 0.0685, "step": 6634 }, { "epoch": 12.63433190679981, "grad_norm": 0.18846406042575836, "learning_rate": 5.789139409336297e-05, "loss": 0.0678, "step": 6635 }, { "epoch": 12.63623395149786, "grad_norm": 0.21730905771255493, "learning_rate": 5.788504287075263e-05, "loss": 0.072, "step": 6636 }, { "epoch": 12.63813599619591, "grad_norm": 0.23395763337612152, "learning_rate": 5.7878691648142266e-05, "loss": 0.0682, "step": 6637 }, { "epoch": 12.64003804089396, "grad_norm": 0.23463581502437592, "learning_rate": 5.787234042553191e-05, "loss": 0.0764, "step": 6638 }, { "epoch": 12.641940085592012, "grad_norm": 0.1801515519618988, "learning_rate": 5.7865989202921563e-05, "loss": 0.0494, "step": 6639 }, { "epoch": 12.643842130290063, "grad_norm": 0.30223360657691956, "learning_rate": 5.785963798031121e-05, "loss": 0.0686, "step": 6640 }, { "epoch": 12.645744174988112, "grad_norm": 0.17319241166114807, "learning_rate": 5.785328675770087e-05, "loss": 0.0631, "step": 6641 }, { "epoch": 12.647646219686163, "grad_norm": 0.17781338095664978, "learning_rate": 5.7846935535090505e-05, "loss": 0.061, "step": 6642 }, { "epoch": 12.649548264384213, "grad_norm": 0.34193697571754456, "learning_rate": 5.784058431248015e-05, "loss": 0.0789, "step": 6643 }, { "epoch": 12.651450309082264, "grad_norm": 0.2082579880952835, "learning_rate": 5.78342330898698e-05, "loss": 0.0496, "step": 6644 }, { "epoch": 12.653352353780313, "grad_norm": 0.36952418088912964, "learning_rate": 5.782788186725945e-05, "loss": 0.0874, "step": 6645 }, { "epoch": 12.655254398478364, "grad_norm": 0.23420372605323792, "learning_rate": 5.78215306446491e-05, "loss": 0.0723, "step": 6646 }, { "epoch": 12.657156443176415, "grad_norm": 0.19578108191490173, "learning_rate": 5.7815179422038744e-05, "loss": 0.0692, "step": 6647 }, { "epoch": 12.659058487874464, "grad_norm": 0.24436496198177338, "learning_rate": 5.780882819942839e-05, "loss": 0.0659, "step": 6648 }, { "epoch": 12.660960532572515, "grad_norm": 0.2588810622692108, "learning_rate": 5.780247697681804e-05, "loss": 0.059, "step": 6649 }, { "epoch": 12.662862577270566, "grad_norm": 0.19169163703918457, "learning_rate": 5.7796125754207686e-05, "loss": 0.0457, "step": 6650 }, { "epoch": 12.664764621968617, "grad_norm": 0.31427711248397827, "learning_rate": 5.778977453159734e-05, "loss": 0.0721, "step": 6651 }, { "epoch": 12.666666666666666, "grad_norm": 0.28431782126426697, "learning_rate": 5.778342330898698e-05, "loss": 0.0636, "step": 6652 }, { "epoch": 12.668568711364717, "grad_norm": 0.19275827705860138, "learning_rate": 5.777707208637663e-05, "loss": 0.0598, "step": 6653 }, { "epoch": 12.670470756062768, "grad_norm": 0.28678253293037415, "learning_rate": 5.777072086376628e-05, "loss": 0.0799, "step": 6654 }, { "epoch": 12.672372800760819, "grad_norm": 0.279844731092453, "learning_rate": 5.7764369641155925e-05, "loss": 0.0634, "step": 6655 }, { "epoch": 12.674274845458868, "grad_norm": 0.16320635378360748, "learning_rate": 5.775801841854557e-05, "loss": 0.0355, "step": 6656 }, { "epoch": 12.676176890156919, "grad_norm": 0.30712923407554626, "learning_rate": 5.775166719593522e-05, "loss": 0.0888, "step": 6657 }, { "epoch": 12.67807893485497, "grad_norm": 0.1668136864900589, "learning_rate": 5.774531597332487e-05, "loss": 0.0527, "step": 6658 }, { "epoch": 12.679980979553019, "grad_norm": 0.17858512699604034, "learning_rate": 5.773896475071452e-05, "loss": 0.0638, "step": 6659 }, { "epoch": 12.68188302425107, "grad_norm": 0.19142888486385345, "learning_rate": 5.7732613528104164e-05, "loss": 0.0601, "step": 6660 }, { "epoch": 12.68378506894912, "grad_norm": 0.2255232036113739, "learning_rate": 5.77262623054938e-05, "loss": 0.0577, "step": 6661 }, { "epoch": 12.685687113647171, "grad_norm": 0.2920807898044586, "learning_rate": 5.771991108288346e-05, "loss": 0.078, "step": 6662 }, { "epoch": 12.68758915834522, "grad_norm": 0.1767326146364212, "learning_rate": 5.7713559860273106e-05, "loss": 0.0508, "step": 6663 }, { "epoch": 12.689491203043271, "grad_norm": 0.14078831672668457, "learning_rate": 5.770720863766276e-05, "loss": 0.0664, "step": 6664 }, { "epoch": 12.691393247741322, "grad_norm": 0.2606064975261688, "learning_rate": 5.77008574150524e-05, "loss": 0.0693, "step": 6665 }, { "epoch": 12.693295292439373, "grad_norm": 0.22922249138355255, "learning_rate": 5.769450619244204e-05, "loss": 0.0546, "step": 6666 }, { "epoch": 12.695197337137422, "grad_norm": 0.14279182255268097, "learning_rate": 5.76881549698317e-05, "loss": 0.0531, "step": 6667 }, { "epoch": 12.697099381835473, "grad_norm": 0.1705865114927292, "learning_rate": 5.768180374722134e-05, "loss": 0.0628, "step": 6668 }, { "epoch": 12.699001426533524, "grad_norm": 0.2879997193813324, "learning_rate": 5.7675452524611e-05, "loss": 0.065, "step": 6669 }, { "epoch": 12.700903471231573, "grad_norm": 0.2573727071285248, "learning_rate": 5.766910130200064e-05, "loss": 0.0716, "step": 6670 }, { "epoch": 12.702805515929624, "grad_norm": 0.17233164608478546, "learning_rate": 5.766275007939028e-05, "loss": 0.0493, "step": 6671 }, { "epoch": 12.704707560627675, "grad_norm": 0.16349801421165466, "learning_rate": 5.765639885677994e-05, "loss": 0.0523, "step": 6672 }, { "epoch": 12.706609605325726, "grad_norm": 0.14094343781471252, "learning_rate": 5.765004763416958e-05, "loss": 0.0527, "step": 6673 }, { "epoch": 12.708511650023775, "grad_norm": 0.16286692023277283, "learning_rate": 5.764369641155922e-05, "loss": 0.0644, "step": 6674 }, { "epoch": 12.710413694721826, "grad_norm": 0.3345188796520233, "learning_rate": 5.7637345188948874e-05, "loss": 0.0684, "step": 6675 }, { "epoch": 12.712315739419877, "grad_norm": 0.2787039875984192, "learning_rate": 5.763099396633852e-05, "loss": 0.0881, "step": 6676 }, { "epoch": 12.714217784117928, "grad_norm": 0.46529752016067505, "learning_rate": 5.762464274372818e-05, "loss": 0.1022, "step": 6677 }, { "epoch": 12.716119828815977, "grad_norm": 0.33916109800338745, "learning_rate": 5.7618291521117816e-05, "loss": 0.0715, "step": 6678 }, { "epoch": 12.718021873514028, "grad_norm": 0.22834204137325287, "learning_rate": 5.761194029850746e-05, "loss": 0.0581, "step": 6679 }, { "epoch": 12.719923918212078, "grad_norm": 0.26732945442199707, "learning_rate": 5.760558907589711e-05, "loss": 0.0597, "step": 6680 }, { "epoch": 12.721825962910128, "grad_norm": 0.18561658263206482, "learning_rate": 5.759923785328676e-05, "loss": 0.0624, "step": 6681 }, { "epoch": 12.723728007608178, "grad_norm": 0.2095700353384018, "learning_rate": 5.759288663067641e-05, "loss": 0.0563, "step": 6682 }, { "epoch": 12.72563005230623, "grad_norm": 0.20030710101127625, "learning_rate": 5.7586535408066055e-05, "loss": 0.0558, "step": 6683 }, { "epoch": 12.72753209700428, "grad_norm": 0.25263649225234985, "learning_rate": 5.75801841854557e-05, "loss": 0.0831, "step": 6684 }, { "epoch": 12.72943414170233, "grad_norm": 0.2224830538034439, "learning_rate": 5.757383296284535e-05, "loss": 0.0692, "step": 6685 }, { "epoch": 12.73133618640038, "grad_norm": 0.25267601013183594, "learning_rate": 5.7567481740235e-05, "loss": 0.052, "step": 6686 }, { "epoch": 12.733238231098431, "grad_norm": 0.23251911997795105, "learning_rate": 5.756113051762465e-05, "loss": 0.0375, "step": 6687 }, { "epoch": 12.735140275796482, "grad_norm": 0.1670525074005127, "learning_rate": 5.7554779295014294e-05, "loss": 0.0453, "step": 6688 }, { "epoch": 12.737042320494531, "grad_norm": 0.25991198420524597, "learning_rate": 5.754842807240394e-05, "loss": 0.0634, "step": 6689 }, { "epoch": 12.738944365192582, "grad_norm": 0.25706109404563904, "learning_rate": 5.754207684979359e-05, "loss": 0.0636, "step": 6690 }, { "epoch": 12.740846409890633, "grad_norm": 0.18260982632637024, "learning_rate": 5.7535725627183236e-05, "loss": 0.0675, "step": 6691 }, { "epoch": 12.742748454588682, "grad_norm": 0.18274103105068207, "learning_rate": 5.752937440457288e-05, "loss": 0.0686, "step": 6692 }, { "epoch": 12.744650499286733, "grad_norm": 0.13347792625427246, "learning_rate": 5.752302318196253e-05, "loss": 0.0592, "step": 6693 }, { "epoch": 12.746552543984784, "grad_norm": 0.2516426742076874, "learning_rate": 5.751667195935218e-05, "loss": 0.0571, "step": 6694 }, { "epoch": 12.748454588682835, "grad_norm": 0.1681799739599228, "learning_rate": 5.751032073674183e-05, "loss": 0.0779, "step": 6695 }, { "epoch": 12.750356633380884, "grad_norm": 0.14614692330360413, "learning_rate": 5.7503969514131474e-05, "loss": 0.0715, "step": 6696 }, { "epoch": 12.752258678078935, "grad_norm": 0.17188875377178192, "learning_rate": 5.749761829152111e-05, "loss": 0.0705, "step": 6697 }, { "epoch": 12.754160722776986, "grad_norm": 0.16121084988117218, "learning_rate": 5.749126706891077e-05, "loss": 0.0606, "step": 6698 }, { "epoch": 12.756062767475036, "grad_norm": 0.1163937896490097, "learning_rate": 5.7484915846300416e-05, "loss": 0.0499, "step": 6699 }, { "epoch": 12.757964812173086, "grad_norm": 0.2049596607685089, "learning_rate": 5.747856462369007e-05, "loss": 0.0616, "step": 6700 }, { "epoch": 12.759866856871136, "grad_norm": 0.13135495781898499, "learning_rate": 5.747221340107971e-05, "loss": 0.0565, "step": 6701 }, { "epoch": 12.761768901569187, "grad_norm": 0.1636398732662201, "learning_rate": 5.746586217846935e-05, "loss": 0.0517, "step": 6702 }, { "epoch": 12.763670946267236, "grad_norm": 0.15648093819618225, "learning_rate": 5.745951095585901e-05, "loss": 0.0503, "step": 6703 }, { "epoch": 12.765572990965287, "grad_norm": 0.15833771228790283, "learning_rate": 5.745315973324865e-05, "loss": 0.0375, "step": 6704 }, { "epoch": 12.767475035663338, "grad_norm": 0.2346632480621338, "learning_rate": 5.744680851063831e-05, "loss": 0.074, "step": 6705 }, { "epoch": 12.769377080361389, "grad_norm": 0.11698388308286667, "learning_rate": 5.744045728802795e-05, "loss": 0.0629, "step": 6706 }, { "epoch": 12.771279125059438, "grad_norm": 0.21215496957302094, "learning_rate": 5.743410606541759e-05, "loss": 0.0593, "step": 6707 }, { "epoch": 12.773181169757489, "grad_norm": 0.18357087671756744, "learning_rate": 5.742775484280725e-05, "loss": 0.0698, "step": 6708 }, { "epoch": 12.77508321445554, "grad_norm": 0.283631294965744, "learning_rate": 5.742140362019689e-05, "loss": 0.0672, "step": 6709 }, { "epoch": 12.77698525915359, "grad_norm": 0.15704897046089172, "learning_rate": 5.741505239758653e-05, "loss": 0.0568, "step": 6710 }, { "epoch": 12.77888730385164, "grad_norm": 0.23621056973934174, "learning_rate": 5.7408701174976184e-05, "loss": 0.0824, "step": 6711 }, { "epoch": 12.78078934854969, "grad_norm": 0.25481176376342773, "learning_rate": 5.740234995236583e-05, "loss": 0.0598, "step": 6712 }, { "epoch": 12.782691393247742, "grad_norm": 0.17756560444831848, "learning_rate": 5.739599872975549e-05, "loss": 0.0736, "step": 6713 }, { "epoch": 12.78459343794579, "grad_norm": 0.23665587604045868, "learning_rate": 5.7389647507145126e-05, "loss": 0.0666, "step": 6714 }, { "epoch": 12.786495482643842, "grad_norm": 0.25118178129196167, "learning_rate": 5.738329628453477e-05, "loss": 0.0633, "step": 6715 }, { "epoch": 12.788397527341893, "grad_norm": 0.16724514961242676, "learning_rate": 5.737694506192442e-05, "loss": 0.0587, "step": 6716 }, { "epoch": 12.790299572039943, "grad_norm": 0.24320180714130402, "learning_rate": 5.737059383931407e-05, "loss": 0.0912, "step": 6717 }, { "epoch": 12.792201616737993, "grad_norm": 0.27849307656288147, "learning_rate": 5.736424261670372e-05, "loss": 0.0874, "step": 6718 }, { "epoch": 12.794103661436043, "grad_norm": 0.12784165143966675, "learning_rate": 5.7357891394093365e-05, "loss": 0.0605, "step": 6719 }, { "epoch": 12.796005706134094, "grad_norm": 0.1868617981672287, "learning_rate": 5.735154017148301e-05, "loss": 0.0909, "step": 6720 }, { "epoch": 12.797907750832145, "grad_norm": 0.1971626728773117, "learning_rate": 5.734518894887266e-05, "loss": 0.0585, "step": 6721 }, { "epoch": 12.799809795530194, "grad_norm": 0.37201204895973206, "learning_rate": 5.733883772626231e-05, "loss": 0.0801, "step": 6722 }, { "epoch": 12.801711840228245, "grad_norm": 0.34986621141433716, "learning_rate": 5.733248650365196e-05, "loss": 0.0757, "step": 6723 }, { "epoch": 12.803613884926296, "grad_norm": 0.18505094945430756, "learning_rate": 5.7326135281041604e-05, "loss": 0.0591, "step": 6724 }, { "epoch": 12.805515929624345, "grad_norm": 0.19197897613048553, "learning_rate": 5.731978405843125e-05, "loss": 0.0509, "step": 6725 }, { "epoch": 12.807417974322396, "grad_norm": 0.17455685138702393, "learning_rate": 5.73134328358209e-05, "loss": 0.0522, "step": 6726 }, { "epoch": 12.809320019020447, "grad_norm": 0.1809726059436798, "learning_rate": 5.7307081613210546e-05, "loss": 0.0662, "step": 6727 }, { "epoch": 12.811222063718498, "grad_norm": 0.2538421154022217, "learning_rate": 5.730073039060019e-05, "loss": 0.0511, "step": 6728 }, { "epoch": 12.813124108416547, "grad_norm": 0.35618454217910767, "learning_rate": 5.729437916798984e-05, "loss": 0.0665, "step": 6729 }, { "epoch": 12.815026153114598, "grad_norm": 0.21073471009731293, "learning_rate": 5.728802794537949e-05, "loss": 0.0613, "step": 6730 }, { "epoch": 12.816928197812649, "grad_norm": 0.17091955244541168, "learning_rate": 5.728167672276914e-05, "loss": 0.0684, "step": 6731 }, { "epoch": 12.8188302425107, "grad_norm": 0.30342546105384827, "learning_rate": 5.7275325500158785e-05, "loss": 0.1016, "step": 6732 }, { "epoch": 12.820732287208749, "grad_norm": 0.16343329846858978, "learning_rate": 5.726897427754842e-05, "loss": 0.0558, "step": 6733 }, { "epoch": 12.8226343319068, "grad_norm": 0.20818068087100983, "learning_rate": 5.726262305493808e-05, "loss": 0.0525, "step": 6734 }, { "epoch": 12.82453637660485, "grad_norm": 0.1833098977804184, "learning_rate": 5.725627183232773e-05, "loss": 0.0445, "step": 6735 }, { "epoch": 12.8264384213029, "grad_norm": 0.23190821707248688, "learning_rate": 5.724992060971738e-05, "loss": 0.0761, "step": 6736 }, { "epoch": 12.82834046600095, "grad_norm": 0.25338640809059143, "learning_rate": 5.7243569387107024e-05, "loss": 0.0685, "step": 6737 }, { "epoch": 12.830242510699001, "grad_norm": 0.15732528269290924, "learning_rate": 5.723721816449666e-05, "loss": 0.0626, "step": 6738 }, { "epoch": 12.832144555397052, "grad_norm": 0.20255662500858307, "learning_rate": 5.723086694188632e-05, "loss": 0.0554, "step": 6739 }, { "epoch": 12.834046600095101, "grad_norm": 0.22862756252288818, "learning_rate": 5.722451571927596e-05, "loss": 0.056, "step": 6740 }, { "epoch": 12.835948644793152, "grad_norm": 0.164798304438591, "learning_rate": 5.721816449666562e-05, "loss": 0.048, "step": 6741 }, { "epoch": 12.837850689491203, "grad_norm": 0.23823440074920654, "learning_rate": 5.721181327405526e-05, "loss": 0.0574, "step": 6742 }, { "epoch": 12.839752734189254, "grad_norm": 0.18618272244930267, "learning_rate": 5.72054620514449e-05, "loss": 0.054, "step": 6743 }, { "epoch": 12.841654778887303, "grad_norm": 0.2012937366962433, "learning_rate": 5.719911082883456e-05, "loss": 0.0629, "step": 6744 }, { "epoch": 12.843556823585354, "grad_norm": 0.26844924688339233, "learning_rate": 5.71927596062242e-05, "loss": 0.0771, "step": 6745 }, { "epoch": 12.845458868283405, "grad_norm": 0.14869700372219086, "learning_rate": 5.718640838361384e-05, "loss": 0.0462, "step": 6746 }, { "epoch": 12.847360912981456, "grad_norm": 0.24835607409477234, "learning_rate": 5.7180057161003495e-05, "loss": 0.0575, "step": 6747 }, { "epoch": 12.849262957679505, "grad_norm": 0.1934400200843811, "learning_rate": 5.717370593839314e-05, "loss": 0.0588, "step": 6748 }, { "epoch": 12.851165002377556, "grad_norm": 0.10977878421545029, "learning_rate": 5.716735471578279e-05, "loss": 0.0605, "step": 6749 }, { "epoch": 12.853067047075607, "grad_norm": 0.24564503133296967, "learning_rate": 5.716100349317244e-05, "loss": 0.0702, "step": 6750 }, { "epoch": 12.854969091773656, "grad_norm": 0.2185717672109604, "learning_rate": 5.715465227056208e-05, "loss": 0.0549, "step": 6751 }, { "epoch": 12.856871136471707, "grad_norm": 0.19493113458156586, "learning_rate": 5.7148301047951734e-05, "loss": 0.0528, "step": 6752 }, { "epoch": 12.858773181169758, "grad_norm": 0.2067340910434723, "learning_rate": 5.714194982534138e-05, "loss": 0.0603, "step": 6753 }, { "epoch": 12.860675225867809, "grad_norm": 0.21597956120967865, "learning_rate": 5.713559860273103e-05, "loss": 0.0724, "step": 6754 }, { "epoch": 12.862577270565858, "grad_norm": 0.1443902999162674, "learning_rate": 5.7129247380120676e-05, "loss": 0.0477, "step": 6755 }, { "epoch": 12.864479315263909, "grad_norm": 0.2689155042171478, "learning_rate": 5.712289615751032e-05, "loss": 0.0632, "step": 6756 }, { "epoch": 12.86638135996196, "grad_norm": 0.2500086724758148, "learning_rate": 5.711654493489997e-05, "loss": 0.0685, "step": 6757 }, { "epoch": 12.86828340466001, "grad_norm": 0.2317550778388977, "learning_rate": 5.711019371228962e-05, "loss": 0.0762, "step": 6758 }, { "epoch": 12.87018544935806, "grad_norm": 0.1884494423866272, "learning_rate": 5.710384248967927e-05, "loss": 0.0441, "step": 6759 }, { "epoch": 12.87208749405611, "grad_norm": 0.22511914372444153, "learning_rate": 5.7097491267068914e-05, "loss": 0.0659, "step": 6760 }, { "epoch": 12.873989538754161, "grad_norm": 0.15401434898376465, "learning_rate": 5.709114004445856e-05, "loss": 0.0706, "step": 6761 }, { "epoch": 12.87589158345221, "grad_norm": 0.32594338059425354, "learning_rate": 5.708478882184821e-05, "loss": 0.0572, "step": 6762 }, { "epoch": 12.877793628150261, "grad_norm": 0.2316208928823471, "learning_rate": 5.7078437599237856e-05, "loss": 0.0639, "step": 6763 }, { "epoch": 12.879695672848312, "grad_norm": 0.17722217738628387, "learning_rate": 5.70720863766275e-05, "loss": 0.0673, "step": 6764 }, { "epoch": 12.881597717546363, "grad_norm": 0.1882850080728531, "learning_rate": 5.706573515401715e-05, "loss": 0.0526, "step": 6765 }, { "epoch": 12.883499762244412, "grad_norm": 0.2745272219181061, "learning_rate": 5.70593839314068e-05, "loss": 0.0669, "step": 6766 }, { "epoch": 12.885401806942463, "grad_norm": 0.11121740192174911, "learning_rate": 5.705303270879645e-05, "loss": 0.0524, "step": 6767 }, { "epoch": 12.887303851640514, "grad_norm": 0.25695016980171204, "learning_rate": 5.7046681486186095e-05, "loss": 0.0619, "step": 6768 }, { "epoch": 12.889205896338565, "grad_norm": 0.2739970088005066, "learning_rate": 5.7040330263575734e-05, "loss": 0.0784, "step": 6769 }, { "epoch": 12.891107941036614, "grad_norm": 0.35055750608444214, "learning_rate": 5.703397904096539e-05, "loss": 0.0932, "step": 6770 }, { "epoch": 12.893009985734665, "grad_norm": 0.16527678072452545, "learning_rate": 5.702762781835503e-05, "loss": 0.044, "step": 6771 }, { "epoch": 12.894912030432716, "grad_norm": 0.19253818690776825, "learning_rate": 5.702127659574469e-05, "loss": 0.0612, "step": 6772 }, { "epoch": 12.896814075130766, "grad_norm": 0.24265630543231964, "learning_rate": 5.7014925373134334e-05, "loss": 0.0736, "step": 6773 }, { "epoch": 12.898716119828816, "grad_norm": 0.31677743792533875, "learning_rate": 5.700857415052397e-05, "loss": 0.0684, "step": 6774 }, { "epoch": 12.900618164526866, "grad_norm": 0.18557855486869812, "learning_rate": 5.700222292791363e-05, "loss": 0.0661, "step": 6775 }, { "epoch": 12.902520209224917, "grad_norm": 0.13152940571308136, "learning_rate": 5.699587170530327e-05, "loss": 0.0682, "step": 6776 }, { "epoch": 12.904422253922966, "grad_norm": 0.16339555382728577, "learning_rate": 5.698952048269293e-05, "loss": 0.0729, "step": 6777 }, { "epoch": 12.906324298621017, "grad_norm": 0.14895136654376984, "learning_rate": 5.6983169260082566e-05, "loss": 0.0647, "step": 6778 }, { "epoch": 12.908226343319068, "grad_norm": 0.18370980024337769, "learning_rate": 5.697681803747221e-05, "loss": 0.0676, "step": 6779 }, { "epoch": 12.91012838801712, "grad_norm": 0.2819598317146301, "learning_rate": 5.697046681486187e-05, "loss": 0.0698, "step": 6780 }, { "epoch": 12.912030432715168, "grad_norm": 0.2503798305988312, "learning_rate": 5.696411559225151e-05, "loss": 0.0609, "step": 6781 }, { "epoch": 12.91393247741322, "grad_norm": 0.21035026013851166, "learning_rate": 5.695776436964115e-05, "loss": 0.0779, "step": 6782 }, { "epoch": 12.91583452211127, "grad_norm": 0.20218412578105927, "learning_rate": 5.6951413147030805e-05, "loss": 0.0544, "step": 6783 }, { "epoch": 12.917736566809321, "grad_norm": 0.1813918650150299, "learning_rate": 5.694506192442045e-05, "loss": 0.0736, "step": 6784 }, { "epoch": 12.91963861150737, "grad_norm": 0.24606718122959137, "learning_rate": 5.69387107018101e-05, "loss": 0.0602, "step": 6785 }, { "epoch": 12.921540656205421, "grad_norm": 0.26272520422935486, "learning_rate": 5.693235947919975e-05, "loss": 0.119, "step": 6786 }, { "epoch": 12.923442700903472, "grad_norm": 0.236623153090477, "learning_rate": 5.692600825658939e-05, "loss": 0.0748, "step": 6787 }, { "epoch": 12.925344745601521, "grad_norm": 0.20204751193523407, "learning_rate": 5.6919657033979044e-05, "loss": 0.0672, "step": 6788 }, { "epoch": 12.927246790299572, "grad_norm": 0.20411472022533417, "learning_rate": 5.691330581136869e-05, "loss": 0.0616, "step": 6789 }, { "epoch": 12.929148834997623, "grad_norm": 0.2538439929485321, "learning_rate": 5.690695458875834e-05, "loss": 0.0664, "step": 6790 }, { "epoch": 12.931050879695674, "grad_norm": 0.14548756182193756, "learning_rate": 5.6900603366147986e-05, "loss": 0.0713, "step": 6791 }, { "epoch": 12.932952924393723, "grad_norm": 0.2531229555606842, "learning_rate": 5.689425214353763e-05, "loss": 0.0797, "step": 6792 }, { "epoch": 12.934854969091774, "grad_norm": 0.14427652955055237, "learning_rate": 5.688790092092728e-05, "loss": 0.0643, "step": 6793 }, { "epoch": 12.936757013789824, "grad_norm": 0.14956879615783691, "learning_rate": 5.688154969831693e-05, "loss": 0.078, "step": 6794 }, { "epoch": 12.938659058487875, "grad_norm": 0.2142200469970703, "learning_rate": 5.687519847570658e-05, "loss": 0.0764, "step": 6795 }, { "epoch": 12.940561103185924, "grad_norm": 0.3186730146408081, "learning_rate": 5.6868847253096225e-05, "loss": 0.0628, "step": 6796 }, { "epoch": 12.942463147883975, "grad_norm": 0.20074574649333954, "learning_rate": 5.686249603048587e-05, "loss": 0.0576, "step": 6797 }, { "epoch": 12.944365192582026, "grad_norm": 0.18254826962947845, "learning_rate": 5.685614480787552e-05, "loss": 0.0641, "step": 6798 }, { "epoch": 12.946267237280075, "grad_norm": 0.16693158447742462, "learning_rate": 5.684979358526517e-05, "loss": 0.0701, "step": 6799 }, { "epoch": 12.948169281978126, "grad_norm": 0.16787877678871155, "learning_rate": 5.6843442362654805e-05, "loss": 0.0484, "step": 6800 }, { "epoch": 12.950071326676177, "grad_norm": 0.24660000205039978, "learning_rate": 5.6837091140044464e-05, "loss": 0.0587, "step": 6801 }, { "epoch": 12.951973371374228, "grad_norm": 0.18229947984218597, "learning_rate": 5.683073991743411e-05, "loss": 0.073, "step": 6802 }, { "epoch": 12.953875416072277, "grad_norm": 0.16272900998592377, "learning_rate": 5.682438869482376e-05, "loss": 0.0604, "step": 6803 }, { "epoch": 12.955777460770328, "grad_norm": 0.34907060861587524, "learning_rate": 5.6818037472213406e-05, "loss": 0.0667, "step": 6804 }, { "epoch": 12.957679505468379, "grad_norm": 0.18044497072696686, "learning_rate": 5.6811686249603044e-05, "loss": 0.0738, "step": 6805 }, { "epoch": 12.95958155016643, "grad_norm": 0.22940124571323395, "learning_rate": 5.68053350269927e-05, "loss": 0.0549, "step": 6806 }, { "epoch": 12.961483594864479, "grad_norm": 0.2308824360370636, "learning_rate": 5.679898380438234e-05, "loss": 0.0707, "step": 6807 }, { "epoch": 12.96338563956253, "grad_norm": 0.16795487701892853, "learning_rate": 5.6792632581772e-05, "loss": 0.0697, "step": 6808 }, { "epoch": 12.96528768426058, "grad_norm": 0.12602196633815765, "learning_rate": 5.6786281359161645e-05, "loss": 0.0684, "step": 6809 }, { "epoch": 12.96718972895863, "grad_norm": 0.3651646673679352, "learning_rate": 5.677993013655128e-05, "loss": 0.0826, "step": 6810 }, { "epoch": 12.96909177365668, "grad_norm": 0.1852756291627884, "learning_rate": 5.677357891394094e-05, "loss": 0.0611, "step": 6811 }, { "epoch": 12.970993818354732, "grad_norm": 0.18153737485408783, "learning_rate": 5.676722769133058e-05, "loss": 0.0456, "step": 6812 }, { "epoch": 12.972895863052782, "grad_norm": 0.2669750154018402, "learning_rate": 5.676087646872024e-05, "loss": 0.0751, "step": 6813 }, { "epoch": 12.974797907750832, "grad_norm": 0.1596618890762329, "learning_rate": 5.675452524610988e-05, "loss": 0.0502, "step": 6814 }, { "epoch": 12.976699952448882, "grad_norm": 0.28879284858703613, "learning_rate": 5.674817402349952e-05, "loss": 0.0732, "step": 6815 }, { "epoch": 12.978601997146933, "grad_norm": 0.14341185986995697, "learning_rate": 5.674182280088918e-05, "loss": 0.0654, "step": 6816 }, { "epoch": 12.980504041844984, "grad_norm": 0.27027440071105957, "learning_rate": 5.673547157827882e-05, "loss": 0.0882, "step": 6817 }, { "epoch": 12.982406086543033, "grad_norm": 0.18623539805412292, "learning_rate": 5.6729120355668464e-05, "loss": 0.0525, "step": 6818 }, { "epoch": 12.984308131241084, "grad_norm": 0.3178105354309082, "learning_rate": 5.6722769133058116e-05, "loss": 0.0671, "step": 6819 }, { "epoch": 12.986210175939135, "grad_norm": 0.17404824495315552, "learning_rate": 5.671641791044776e-05, "loss": 0.0524, "step": 6820 }, { "epoch": 12.988112220637184, "grad_norm": 0.26026493310928345, "learning_rate": 5.671006668783741e-05, "loss": 0.0612, "step": 6821 }, { "epoch": 12.990014265335235, "grad_norm": 0.22481663525104523, "learning_rate": 5.670371546522706e-05, "loss": 0.0582, "step": 6822 }, { "epoch": 12.991916310033286, "grad_norm": 0.2761218547821045, "learning_rate": 5.66973642426167e-05, "loss": 0.0805, "step": 6823 }, { "epoch": 12.993818354731337, "grad_norm": 0.28394970297813416, "learning_rate": 5.6691013020006354e-05, "loss": 0.0675, "step": 6824 }, { "epoch": 12.995720399429386, "grad_norm": 0.2090967446565628, "learning_rate": 5.6684661797396e-05, "loss": 0.0471, "step": 6825 }, { "epoch": 12.997622444127437, "grad_norm": 0.13171760737895966, "learning_rate": 5.667831057478565e-05, "loss": 0.0535, "step": 6826 }, { "epoch": 12.999524488825488, "grad_norm": 0.2904289662837982, "learning_rate": 5.6671959352175296e-05, "loss": 0.0671, "step": 6827 }, { "epoch": 13.001426533523539, "grad_norm": 0.19046546518802643, "learning_rate": 5.666560812956494e-05, "loss": 0.0539, "step": 6828 }, { "epoch": 13.003328578221588, "grad_norm": 0.22456832230091095, "learning_rate": 5.665925690695459e-05, "loss": 0.0717, "step": 6829 }, { "epoch": 13.005230622919639, "grad_norm": 0.1796262115240097, "learning_rate": 5.665290568434424e-05, "loss": 0.0637, "step": 6830 }, { "epoch": 13.00713266761769, "grad_norm": 0.15445412695407867, "learning_rate": 5.664655446173389e-05, "loss": 0.0533, "step": 6831 }, { "epoch": 13.009034712315739, "grad_norm": 0.1539810299873352, "learning_rate": 5.6640203239123535e-05, "loss": 0.0606, "step": 6832 }, { "epoch": 13.01093675701379, "grad_norm": 0.12736746668815613, "learning_rate": 5.663385201651318e-05, "loss": 0.059, "step": 6833 }, { "epoch": 13.01283880171184, "grad_norm": 0.14891792833805084, "learning_rate": 5.662750079390283e-05, "loss": 0.0619, "step": 6834 }, { "epoch": 13.014740846409891, "grad_norm": 0.2501888871192932, "learning_rate": 5.662114957129248e-05, "loss": 0.0684, "step": 6835 }, { "epoch": 13.01664289110794, "grad_norm": 0.1664845049381256, "learning_rate": 5.6614798348682116e-05, "loss": 0.038, "step": 6836 }, { "epoch": 13.018544935805991, "grad_norm": 0.23480184376239777, "learning_rate": 5.6608447126071774e-05, "loss": 0.0641, "step": 6837 }, { "epoch": 13.020446980504042, "grad_norm": 0.43155115842819214, "learning_rate": 5.660209590346142e-05, "loss": 0.0728, "step": 6838 }, { "epoch": 13.022349025202093, "grad_norm": 0.13202181458473206, "learning_rate": 5.659574468085107e-05, "loss": 0.0662, "step": 6839 }, { "epoch": 13.024251069900142, "grad_norm": 0.22883914411067963, "learning_rate": 5.6589393458240716e-05, "loss": 0.0761, "step": 6840 }, { "epoch": 13.026153114598193, "grad_norm": 0.2755950391292572, "learning_rate": 5.6583042235630354e-05, "loss": 0.0561, "step": 6841 }, { "epoch": 13.028055159296244, "grad_norm": 0.264454185962677, "learning_rate": 5.657669101302001e-05, "loss": 0.0748, "step": 6842 }, { "epoch": 13.029957203994293, "grad_norm": 0.14492832124233246, "learning_rate": 5.657033979040965e-05, "loss": 0.0572, "step": 6843 }, { "epoch": 13.031859248692344, "grad_norm": 0.20079435408115387, "learning_rate": 5.656398856779931e-05, "loss": 0.0594, "step": 6844 }, { "epoch": 13.033761293390395, "grad_norm": 0.26874688267707825, "learning_rate": 5.6557637345188955e-05, "loss": 0.0679, "step": 6845 }, { "epoch": 13.035663338088446, "grad_norm": 0.12486004829406738, "learning_rate": 5.655128612257859e-05, "loss": 0.0768, "step": 6846 }, { "epoch": 13.037565382786495, "grad_norm": 0.1992534101009369, "learning_rate": 5.654493489996825e-05, "loss": 0.0716, "step": 6847 }, { "epoch": 13.039467427484546, "grad_norm": 0.20536714792251587, "learning_rate": 5.653858367735789e-05, "loss": 0.0653, "step": 6848 }, { "epoch": 13.041369472182597, "grad_norm": 0.18057265877723694, "learning_rate": 5.653223245474755e-05, "loss": 0.0879, "step": 6849 }, { "epoch": 13.043271516880647, "grad_norm": 0.2120429128408432, "learning_rate": 5.652588123213719e-05, "loss": 0.0556, "step": 6850 }, { "epoch": 13.045173561578697, "grad_norm": 0.08657226711511612, "learning_rate": 5.651953000952683e-05, "loss": 0.0394, "step": 6851 }, { "epoch": 13.047075606276747, "grad_norm": 0.2886129915714264, "learning_rate": 5.651317878691649e-05, "loss": 0.0573, "step": 6852 }, { "epoch": 13.048977650974798, "grad_norm": 0.14705555140972137, "learning_rate": 5.650682756430613e-05, "loss": 0.0598, "step": 6853 }, { "epoch": 13.050879695672847, "grad_norm": 0.13532306253910065, "learning_rate": 5.6500476341695774e-05, "loss": 0.046, "step": 6854 }, { "epoch": 13.052781740370898, "grad_norm": 0.1289561241865158, "learning_rate": 5.6494125119085426e-05, "loss": 0.055, "step": 6855 }, { "epoch": 13.05468378506895, "grad_norm": 0.22951653599739075, "learning_rate": 5.648777389647507e-05, "loss": 0.0565, "step": 6856 }, { "epoch": 13.056585829767, "grad_norm": 0.2005440890789032, "learning_rate": 5.648142267386472e-05, "loss": 0.0723, "step": 6857 }, { "epoch": 13.05848787446505, "grad_norm": 0.18460001051425934, "learning_rate": 5.647507145125437e-05, "loss": 0.0673, "step": 6858 }, { "epoch": 13.0603899191631, "grad_norm": 0.16628994047641754, "learning_rate": 5.646872022864401e-05, "loss": 0.0655, "step": 6859 }, { "epoch": 13.062291963861151, "grad_norm": 0.11353509873151779, "learning_rate": 5.6462369006033665e-05, "loss": 0.0724, "step": 6860 }, { "epoch": 13.064194008559202, "grad_norm": 0.16154246032238007, "learning_rate": 5.645601778342331e-05, "loss": 0.0549, "step": 6861 }, { "epoch": 13.066096053257251, "grad_norm": 0.20058204233646393, "learning_rate": 5.644966656081296e-05, "loss": 0.0529, "step": 6862 }, { "epoch": 13.067998097955302, "grad_norm": 0.09383098036050797, "learning_rate": 5.644331533820261e-05, "loss": 0.0483, "step": 6863 }, { "epoch": 13.069900142653353, "grad_norm": 0.23607304692268372, "learning_rate": 5.643696411559225e-05, "loss": 0.0793, "step": 6864 }, { "epoch": 13.071802187351402, "grad_norm": 0.1318606436252594, "learning_rate": 5.6430612892981904e-05, "loss": 0.0509, "step": 6865 }, { "epoch": 13.073704232049453, "grad_norm": 0.25798821449279785, "learning_rate": 5.642426167037155e-05, "loss": 0.0718, "step": 6866 }, { "epoch": 13.075606276747504, "grad_norm": 0.21423642337322235, "learning_rate": 5.64179104477612e-05, "loss": 0.0569, "step": 6867 }, { "epoch": 13.077508321445555, "grad_norm": 0.16581445932388306, "learning_rate": 5.6411559225150846e-05, "loss": 0.0542, "step": 6868 }, { "epoch": 13.079410366143604, "grad_norm": 0.15126816928386688, "learning_rate": 5.640520800254049e-05, "loss": 0.0478, "step": 6869 }, { "epoch": 13.081312410841655, "grad_norm": 0.1429680585861206, "learning_rate": 5.639885677993014e-05, "loss": 0.062, "step": 6870 }, { "epoch": 13.083214455539705, "grad_norm": 0.049759067595005035, "learning_rate": 5.639250555731979e-05, "loss": 0.0326, "step": 6871 }, { "epoch": 13.085116500237756, "grad_norm": 0.19529815018177032, "learning_rate": 5.6386154334709426e-05, "loss": 0.0561, "step": 6872 }, { "epoch": 13.087018544935805, "grad_norm": 0.17486746609210968, "learning_rate": 5.6379803112099085e-05, "loss": 0.0596, "step": 6873 }, { "epoch": 13.088920589633856, "grad_norm": 0.12108680605888367, "learning_rate": 5.637345188948873e-05, "loss": 0.0475, "step": 6874 }, { "epoch": 13.090822634331907, "grad_norm": 0.16674591600894928, "learning_rate": 5.636710066687838e-05, "loss": 0.0877, "step": 6875 }, { "epoch": 13.092724679029958, "grad_norm": 0.14456391334533691, "learning_rate": 5.636074944426803e-05, "loss": 0.0416, "step": 6876 }, { "epoch": 13.094626723728007, "grad_norm": 0.22981326282024384, "learning_rate": 5.6354398221657665e-05, "loss": 0.0885, "step": 6877 }, { "epoch": 13.096528768426058, "grad_norm": 0.14590546488761902, "learning_rate": 5.6348046999047324e-05, "loss": 0.0377, "step": 6878 }, { "epoch": 13.098430813124109, "grad_norm": 0.18224582076072693, "learning_rate": 5.634169577643696e-05, "loss": 0.059, "step": 6879 }, { "epoch": 13.100332857822158, "grad_norm": 0.26473188400268555, "learning_rate": 5.633534455382662e-05, "loss": 0.0562, "step": 6880 }, { "epoch": 13.102234902520209, "grad_norm": 0.2478216588497162, "learning_rate": 5.632899333121626e-05, "loss": 0.0563, "step": 6881 }, { "epoch": 13.10413694721826, "grad_norm": 0.29242587089538574, "learning_rate": 5.6322642108605904e-05, "loss": 0.0608, "step": 6882 }, { "epoch": 13.10603899191631, "grad_norm": 0.16479073464870453, "learning_rate": 5.631629088599556e-05, "loss": 0.0678, "step": 6883 }, { "epoch": 13.10794103661436, "grad_norm": 0.23486103117465973, "learning_rate": 5.63099396633852e-05, "loss": 0.0652, "step": 6884 }, { "epoch": 13.10984308131241, "grad_norm": 0.154318168759346, "learning_rate": 5.630358844077486e-05, "loss": 0.1446, "step": 6885 }, { "epoch": 13.111745126010462, "grad_norm": 0.14569495618343353, "learning_rate": 5.62972372181645e-05, "loss": 0.053, "step": 6886 }, { "epoch": 13.113647170708512, "grad_norm": 0.17716017365455627, "learning_rate": 5.629088599555414e-05, "loss": 0.0676, "step": 6887 }, { "epoch": 13.115549215406562, "grad_norm": 0.20627303421497345, "learning_rate": 5.6284534772943795e-05, "loss": 0.0589, "step": 6888 }, { "epoch": 13.117451260104612, "grad_norm": 0.1511334627866745, "learning_rate": 5.627818355033344e-05, "loss": 0.058, "step": 6889 }, { "epoch": 13.119353304802663, "grad_norm": 0.17161938548088074, "learning_rate": 5.6271832327723085e-05, "loss": 0.0459, "step": 6890 }, { "epoch": 13.121255349500712, "grad_norm": 0.193283349275589, "learning_rate": 5.6265481105112736e-05, "loss": 0.0838, "step": 6891 }, { "epoch": 13.123157394198763, "grad_norm": 0.12341416627168655, "learning_rate": 5.625912988250238e-05, "loss": 0.0658, "step": 6892 }, { "epoch": 13.125059438896814, "grad_norm": 0.1398099958896637, "learning_rate": 5.6252778659892033e-05, "loss": 0.0475, "step": 6893 }, { "epoch": 13.126961483594865, "grad_norm": 0.1959352046251297, "learning_rate": 5.624642743728168e-05, "loss": 0.053, "step": 6894 }, { "epoch": 13.128863528292914, "grad_norm": 0.13947531580924988, "learning_rate": 5.6240076214671324e-05, "loss": 0.0479, "step": 6895 }, { "epoch": 13.130765572990965, "grad_norm": 0.15292446315288544, "learning_rate": 5.6233724992060975e-05, "loss": 0.052, "step": 6896 }, { "epoch": 13.132667617689016, "grad_norm": 0.20733055472373962, "learning_rate": 5.622737376945062e-05, "loss": 0.0528, "step": 6897 }, { "epoch": 13.134569662387067, "grad_norm": 0.18862584233283997, "learning_rate": 5.622102254684027e-05, "loss": 0.0677, "step": 6898 }, { "epoch": 13.136471707085116, "grad_norm": 0.15024633705615997, "learning_rate": 5.621467132422992e-05, "loss": 0.0485, "step": 6899 }, { "epoch": 13.138373751783167, "grad_norm": 0.2352614849805832, "learning_rate": 5.620832010161956e-05, "loss": 0.0782, "step": 6900 }, { "epoch": 13.140275796481218, "grad_norm": 0.23415392637252808, "learning_rate": 5.6201968879009214e-05, "loss": 0.064, "step": 6901 }, { "epoch": 13.142177841179267, "grad_norm": 0.11979921162128448, "learning_rate": 5.619561765639886e-05, "loss": 0.0543, "step": 6902 }, { "epoch": 13.144079885877318, "grad_norm": 0.14752525091171265, "learning_rate": 5.618926643378851e-05, "loss": 0.0593, "step": 6903 }, { "epoch": 13.145981930575369, "grad_norm": 0.19310492277145386, "learning_rate": 5.6182915211178156e-05, "loss": 0.0586, "step": 6904 }, { "epoch": 13.14788397527342, "grad_norm": 0.12309253960847855, "learning_rate": 5.61765639885678e-05, "loss": 0.0548, "step": 6905 }, { "epoch": 13.149786019971469, "grad_norm": 0.09493910521268845, "learning_rate": 5.617021276595745e-05, "loss": 0.0878, "step": 6906 }, { "epoch": 13.15168806466952, "grad_norm": 0.161015585064888, "learning_rate": 5.61638615433471e-05, "loss": 0.0655, "step": 6907 }, { "epoch": 13.15359010936757, "grad_norm": 0.31709450483322144, "learning_rate": 5.6157510320736736e-05, "loss": 0.0841, "step": 6908 }, { "epoch": 13.155492154065621, "grad_norm": 0.20902904868125916, "learning_rate": 5.6151159098126395e-05, "loss": 0.0471, "step": 6909 }, { "epoch": 13.15739419876367, "grad_norm": 0.13743600249290466, "learning_rate": 5.6144807875516033e-05, "loss": 0.058, "step": 6910 }, { "epoch": 13.159296243461721, "grad_norm": 0.1610523909330368, "learning_rate": 5.613845665290569e-05, "loss": 0.0612, "step": 6911 }, { "epoch": 13.161198288159772, "grad_norm": 0.13575279712677002, "learning_rate": 5.613210543029534e-05, "loss": 0.0683, "step": 6912 }, { "epoch": 13.163100332857821, "grad_norm": 0.15719729661941528, "learning_rate": 5.6125754207684975e-05, "loss": 0.0565, "step": 6913 }, { "epoch": 13.165002377555872, "grad_norm": 0.17677704989910126, "learning_rate": 5.6119402985074634e-05, "loss": 0.0441, "step": 6914 }, { "epoch": 13.166904422253923, "grad_norm": 0.11826075613498688, "learning_rate": 5.611305176246427e-05, "loss": 0.0513, "step": 6915 }, { "epoch": 13.168806466951974, "grad_norm": 0.13202832639217377, "learning_rate": 5.610670053985393e-05, "loss": 0.0492, "step": 6916 }, { "epoch": 13.170708511650023, "grad_norm": 0.264354944229126, "learning_rate": 5.610034931724357e-05, "loss": 0.1007, "step": 6917 }, { "epoch": 13.172610556348074, "grad_norm": 0.19659414887428284, "learning_rate": 5.6093998094633214e-05, "loss": 0.0618, "step": 6918 }, { "epoch": 13.174512601046125, "grad_norm": 0.2893676459789276, "learning_rate": 5.608764687202287e-05, "loss": 0.0606, "step": 6919 }, { "epoch": 13.176414645744176, "grad_norm": 0.12958897650241852, "learning_rate": 5.608129564941251e-05, "loss": 0.0488, "step": 6920 }, { "epoch": 13.178316690442225, "grad_norm": 0.18396779894828796, "learning_rate": 5.607494442680217e-05, "loss": 0.0595, "step": 6921 }, { "epoch": 13.180218735140276, "grad_norm": 0.14515328407287598, "learning_rate": 5.606859320419181e-05, "loss": 0.0506, "step": 6922 }, { "epoch": 13.182120779838327, "grad_norm": 0.21712662279605865, "learning_rate": 5.606224198158145e-05, "loss": 0.0807, "step": 6923 }, { "epoch": 13.184022824536376, "grad_norm": 0.10604386031627655, "learning_rate": 5.6055890758971105e-05, "loss": 0.0654, "step": 6924 }, { "epoch": 13.185924869234427, "grad_norm": 0.25209611654281616, "learning_rate": 5.604953953636075e-05, "loss": 0.0643, "step": 6925 }, { "epoch": 13.187826913932478, "grad_norm": 0.22648891806602478, "learning_rate": 5.6043188313750395e-05, "loss": 0.0522, "step": 6926 }, { "epoch": 13.189728958630528, "grad_norm": 0.202748104929924, "learning_rate": 5.603683709114005e-05, "loss": 0.0352, "step": 6927 }, { "epoch": 13.191631003328578, "grad_norm": 0.23080334067344666, "learning_rate": 5.603048586852969e-05, "loss": 0.0616, "step": 6928 }, { "epoch": 13.193533048026628, "grad_norm": 0.09643086045980453, "learning_rate": 5.6024134645919344e-05, "loss": 0.0688, "step": 6929 }, { "epoch": 13.19543509272468, "grad_norm": 0.3101188540458679, "learning_rate": 5.601778342330899e-05, "loss": 0.0709, "step": 6930 }, { "epoch": 13.19733713742273, "grad_norm": 0.21519017219543457, "learning_rate": 5.6011432200698634e-05, "loss": 0.0642, "step": 6931 }, { "epoch": 13.19923918212078, "grad_norm": 0.34213313460350037, "learning_rate": 5.6005080978088286e-05, "loss": 0.0737, "step": 6932 }, { "epoch": 13.20114122681883, "grad_norm": 0.24912647902965546, "learning_rate": 5.599872975547793e-05, "loss": 0.0525, "step": 6933 }, { "epoch": 13.203043271516881, "grad_norm": 0.29063260555267334, "learning_rate": 5.599237853286758e-05, "loss": 0.0818, "step": 6934 }, { "epoch": 13.204945316214932, "grad_norm": 0.38843879103660583, "learning_rate": 5.598602731025723e-05, "loss": 0.0916, "step": 6935 }, { "epoch": 13.206847360912981, "grad_norm": 0.13748934864997864, "learning_rate": 5.597967608764687e-05, "loss": 0.0387, "step": 6936 }, { "epoch": 13.208749405611032, "grad_norm": 0.23910416662693024, "learning_rate": 5.5973324865036525e-05, "loss": 0.0579, "step": 6937 }, { "epoch": 13.210651450309083, "grad_norm": 0.12952692806720734, "learning_rate": 5.596697364242617e-05, "loss": 0.0485, "step": 6938 }, { "epoch": 13.212553495007132, "grad_norm": 0.14758580923080444, "learning_rate": 5.596062241981582e-05, "loss": 0.063, "step": 6939 }, { "epoch": 13.214455539705183, "grad_norm": 0.19575528800487518, "learning_rate": 5.595427119720547e-05, "loss": 0.0773, "step": 6940 }, { "epoch": 13.216357584403234, "grad_norm": 0.15974241495132446, "learning_rate": 5.594791997459511e-05, "loss": 0.0873, "step": 6941 }, { "epoch": 13.218259629101285, "grad_norm": 0.145432248711586, "learning_rate": 5.5941568751984764e-05, "loss": 0.0524, "step": 6942 }, { "epoch": 13.220161673799334, "grad_norm": 0.21533656120300293, "learning_rate": 5.593521752937441e-05, "loss": 0.0599, "step": 6943 }, { "epoch": 13.222063718497385, "grad_norm": 0.16924172639846802, "learning_rate": 5.592886630676405e-05, "loss": 0.0677, "step": 6944 }, { "epoch": 13.223965763195435, "grad_norm": 0.18145938217639923, "learning_rate": 5.5922515084153706e-05, "loss": 0.0612, "step": 6945 }, { "epoch": 13.225867807893486, "grad_norm": 0.10504286736249924, "learning_rate": 5.5916163861543344e-05, "loss": 0.0781, "step": 6946 }, { "epoch": 13.227769852591535, "grad_norm": 0.18427816033363342, "learning_rate": 5.5909812638933e-05, "loss": 0.0584, "step": 6947 }, { "epoch": 13.229671897289586, "grad_norm": 0.1417822390794754, "learning_rate": 5.590346141632265e-05, "loss": 0.0591, "step": 6948 }, { "epoch": 13.231573941987637, "grad_norm": 0.19255954027175903, "learning_rate": 5.5897110193712286e-05, "loss": 0.0752, "step": 6949 }, { "epoch": 13.233475986685686, "grad_norm": 0.1782093495130539, "learning_rate": 5.5890758971101944e-05, "loss": 0.0452, "step": 6950 }, { "epoch": 13.235378031383737, "grad_norm": 0.2388116419315338, "learning_rate": 5.588440774849158e-05, "loss": 0.0738, "step": 6951 }, { "epoch": 13.237280076081788, "grad_norm": 0.2225741297006607, "learning_rate": 5.587805652588124e-05, "loss": 0.0606, "step": 6952 }, { "epoch": 13.239182120779839, "grad_norm": 0.12010703235864639, "learning_rate": 5.587170530327088e-05, "loss": 0.0701, "step": 6953 }, { "epoch": 13.241084165477888, "grad_norm": 0.10702623426914215, "learning_rate": 5.5865354080660525e-05, "loss": 0.0665, "step": 6954 }, { "epoch": 13.242986210175939, "grad_norm": 0.20028775930404663, "learning_rate": 5.585900285805018e-05, "loss": 0.0596, "step": 6955 }, { "epoch": 13.24488825487399, "grad_norm": 0.12375194579362869, "learning_rate": 5.585265163543982e-05, "loss": 0.0528, "step": 6956 }, { "epoch": 13.24679029957204, "grad_norm": 0.15475822985172272, "learning_rate": 5.584630041282948e-05, "loss": 0.0643, "step": 6957 }, { "epoch": 13.24869234427009, "grad_norm": 0.14023445546627045, "learning_rate": 5.583994919021912e-05, "loss": 0.0493, "step": 6958 }, { "epoch": 13.25059438896814, "grad_norm": 0.34267377853393555, "learning_rate": 5.5833597967608764e-05, "loss": 0.0888, "step": 6959 }, { "epoch": 13.252496433666192, "grad_norm": 0.22459900379180908, "learning_rate": 5.5827246744998415e-05, "loss": 0.072, "step": 6960 }, { "epoch": 13.25439847836424, "grad_norm": 0.16262765228748322, "learning_rate": 5.582089552238806e-05, "loss": 0.041, "step": 6961 }, { "epoch": 13.256300523062292, "grad_norm": 0.16136904060840607, "learning_rate": 5.5814544299777706e-05, "loss": 0.0512, "step": 6962 }, { "epoch": 13.258202567760343, "grad_norm": 0.24696044623851776, "learning_rate": 5.580819307716736e-05, "loss": 0.0577, "step": 6963 }, { "epoch": 13.260104612458393, "grad_norm": 0.27188289165496826, "learning_rate": 5.5801841854557e-05, "loss": 0.0564, "step": 6964 }, { "epoch": 13.262006657156443, "grad_norm": 0.2794724106788635, "learning_rate": 5.5795490631946654e-05, "loss": 0.0614, "step": 6965 }, { "epoch": 13.263908701854493, "grad_norm": 0.181308776140213, "learning_rate": 5.57891394093363e-05, "loss": 0.0531, "step": 6966 }, { "epoch": 13.265810746552544, "grad_norm": 0.2799053490161896, "learning_rate": 5.5782788186725944e-05, "loss": 0.0701, "step": 6967 }, { "epoch": 13.267712791250595, "grad_norm": 0.34972909092903137, "learning_rate": 5.5776436964115596e-05, "loss": 0.1403, "step": 6968 }, { "epoch": 13.269614835948644, "grad_norm": 0.31298545002937317, "learning_rate": 5.577008574150524e-05, "loss": 0.1189, "step": 6969 }, { "epoch": 13.271516880646695, "grad_norm": 0.15706603229045868, "learning_rate": 5.576373451889489e-05, "loss": 0.0698, "step": 6970 }, { "epoch": 13.273418925344746, "grad_norm": 0.20245127379894257, "learning_rate": 5.575738329628454e-05, "loss": 0.0658, "step": 6971 }, { "epoch": 13.275320970042795, "grad_norm": 0.2303195297718048, "learning_rate": 5.575103207367418e-05, "loss": 0.0678, "step": 6972 }, { "epoch": 13.277223014740846, "grad_norm": 0.16143448650836945, "learning_rate": 5.5744680851063835e-05, "loss": 0.053, "step": 6973 }, { "epoch": 13.279125059438897, "grad_norm": 0.14536364376544952, "learning_rate": 5.573832962845348e-05, "loss": 0.0762, "step": 6974 }, { "epoch": 13.281027104136948, "grad_norm": 0.11136563122272491, "learning_rate": 5.573197840584313e-05, "loss": 0.0643, "step": 6975 }, { "epoch": 13.282929148834997, "grad_norm": 0.11188220232725143, "learning_rate": 5.572562718323278e-05, "loss": 0.0507, "step": 6976 }, { "epoch": 13.284831193533048, "grad_norm": 0.1301429122686386, "learning_rate": 5.571927596062242e-05, "loss": 0.0502, "step": 6977 }, { "epoch": 13.286733238231099, "grad_norm": 0.21348558366298676, "learning_rate": 5.5712924738012074e-05, "loss": 0.0819, "step": 6978 }, { "epoch": 13.28863528292915, "grad_norm": 0.19746661186218262, "learning_rate": 5.570657351540172e-05, "loss": 0.0736, "step": 6979 }, { "epoch": 13.290537327627199, "grad_norm": 0.21061228215694427, "learning_rate": 5.570022229279136e-05, "loss": 0.0724, "step": 6980 }, { "epoch": 13.29243937232525, "grad_norm": 0.19609305262565613, "learning_rate": 5.5693871070181016e-05, "loss": 0.0768, "step": 6981 }, { "epoch": 13.2943414170233, "grad_norm": 0.3237515985965729, "learning_rate": 5.5687519847570654e-05, "loss": 0.0907, "step": 6982 }, { "epoch": 13.29624346172135, "grad_norm": 0.2335703819990158, "learning_rate": 5.568116862496031e-05, "loss": 0.0758, "step": 6983 }, { "epoch": 13.2981455064194, "grad_norm": 0.19985871016979218, "learning_rate": 5.567481740234996e-05, "loss": 0.0549, "step": 6984 }, { "epoch": 13.300047551117451, "grad_norm": 0.1393788903951645, "learning_rate": 5.5668466179739596e-05, "loss": 0.0535, "step": 6985 }, { "epoch": 13.301949595815502, "grad_norm": 0.21756048500537872, "learning_rate": 5.5662114957129255e-05, "loss": 0.0675, "step": 6986 }, { "epoch": 13.303851640513551, "grad_norm": 0.3030356466770172, "learning_rate": 5.565576373451889e-05, "loss": 0.0777, "step": 6987 }, { "epoch": 13.305753685211602, "grad_norm": 0.22126442193984985, "learning_rate": 5.564941251190855e-05, "loss": 0.059, "step": 6988 }, { "epoch": 13.307655729909653, "grad_norm": 0.18576373159885406, "learning_rate": 5.564306128929819e-05, "loss": 0.0573, "step": 6989 }, { "epoch": 13.309557774607704, "grad_norm": 0.11999959498643875, "learning_rate": 5.5636710066687835e-05, "loss": 0.0502, "step": 6990 }, { "epoch": 13.311459819305753, "grad_norm": 0.1743020862340927, "learning_rate": 5.563035884407749e-05, "loss": 0.0431, "step": 6991 }, { "epoch": 13.313361864003804, "grad_norm": 0.1762196123600006, "learning_rate": 5.562400762146713e-05, "loss": 0.0785, "step": 6992 }, { "epoch": 13.315263908701855, "grad_norm": 0.17211808264255524, "learning_rate": 5.561765639885679e-05, "loss": 0.062, "step": 6993 }, { "epoch": 13.317165953399904, "grad_norm": 0.18189425766468048, "learning_rate": 5.561130517624643e-05, "loss": 0.0509, "step": 6994 }, { "epoch": 13.319067998097955, "grad_norm": 0.25210216641426086, "learning_rate": 5.5604953953636074e-05, "loss": 0.0502, "step": 6995 }, { "epoch": 13.320970042796006, "grad_norm": 0.16832081973552704, "learning_rate": 5.5598602731025726e-05, "loss": 0.0775, "step": 6996 }, { "epoch": 13.322872087494057, "grad_norm": 0.2224736213684082, "learning_rate": 5.559225150841537e-05, "loss": 0.058, "step": 6997 }, { "epoch": 13.324774132192106, "grad_norm": 0.17822198569774628, "learning_rate": 5.5585900285805016e-05, "loss": 0.0648, "step": 6998 }, { "epoch": 13.326676176890157, "grad_norm": 0.12076257914304733, "learning_rate": 5.557954906319467e-05, "loss": 0.0414, "step": 6999 }, { "epoch": 13.328578221588208, "grad_norm": 0.2498164027929306, "learning_rate": 5.557319784058431e-05, "loss": 0.0601, "step": 7000 }, { "epoch": 13.330480266286258, "grad_norm": 0.128321573138237, "learning_rate": 5.5566846617973965e-05, "loss": 0.0526, "step": 7001 }, { "epoch": 13.332382310984308, "grad_norm": 0.4024381935596466, "learning_rate": 5.556049539536361e-05, "loss": 0.0775, "step": 7002 }, { "epoch": 13.334284355682358, "grad_norm": 0.15158173441886902, "learning_rate": 5.5554144172753255e-05, "loss": 0.046, "step": 7003 }, { "epoch": 13.33618640038041, "grad_norm": 0.20958949625492096, "learning_rate": 5.554779295014291e-05, "loss": 0.0629, "step": 7004 }, { "epoch": 13.338088445078458, "grad_norm": 0.20752930641174316, "learning_rate": 5.554144172753255e-05, "loss": 0.0659, "step": 7005 }, { "epoch": 13.33999048977651, "grad_norm": 0.18724681437015533, "learning_rate": 5.5535090504922204e-05, "loss": 0.051, "step": 7006 }, { "epoch": 13.34189253447456, "grad_norm": 0.1965479701757431, "learning_rate": 5.552873928231185e-05, "loss": 0.0591, "step": 7007 }, { "epoch": 13.343794579172611, "grad_norm": 0.12148141860961914, "learning_rate": 5.5522388059701494e-05, "loss": 0.0607, "step": 7008 }, { "epoch": 13.34569662387066, "grad_norm": 0.19634923338890076, "learning_rate": 5.5516036837091146e-05, "loss": 0.0603, "step": 7009 }, { "epoch": 13.347598668568711, "grad_norm": 0.16275207698345184, "learning_rate": 5.550968561448079e-05, "loss": 0.0504, "step": 7010 }, { "epoch": 13.349500713266762, "grad_norm": 0.1558467447757721, "learning_rate": 5.550333439187044e-05, "loss": 0.0836, "step": 7011 }, { "epoch": 13.351402757964813, "grad_norm": 0.12177915871143341, "learning_rate": 5.549698316926009e-05, "loss": 0.0455, "step": 7012 }, { "epoch": 13.353304802662862, "grad_norm": 0.341022789478302, "learning_rate": 5.549063194664973e-05, "loss": 0.092, "step": 7013 }, { "epoch": 13.355206847360913, "grad_norm": 0.21205639839172363, "learning_rate": 5.5484280724039384e-05, "loss": 0.0562, "step": 7014 }, { "epoch": 13.357108892058964, "grad_norm": 0.2380506545305252, "learning_rate": 5.547792950142903e-05, "loss": 0.0593, "step": 7015 }, { "epoch": 13.359010936757013, "grad_norm": 0.24583934247493744, "learning_rate": 5.547157827881867e-05, "loss": 0.0501, "step": 7016 }, { "epoch": 13.360912981455064, "grad_norm": 0.11172381788492203, "learning_rate": 5.5465227056208326e-05, "loss": 0.0679, "step": 7017 }, { "epoch": 13.362815026153115, "grad_norm": 0.1802452802658081, "learning_rate": 5.5458875833597965e-05, "loss": 0.0647, "step": 7018 }, { "epoch": 13.364717070851166, "grad_norm": 0.22848699986934662, "learning_rate": 5.545252461098762e-05, "loss": 0.0521, "step": 7019 }, { "epoch": 13.366619115549215, "grad_norm": 0.16188445687294006, "learning_rate": 5.544617338837726e-05, "loss": 0.0473, "step": 7020 }, { "epoch": 13.368521160247266, "grad_norm": 0.2097487598657608, "learning_rate": 5.543982216576691e-05, "loss": 0.0594, "step": 7021 }, { "epoch": 13.370423204945316, "grad_norm": 0.24954724311828613, "learning_rate": 5.5433470943156565e-05, "loss": 0.0532, "step": 7022 }, { "epoch": 13.372325249643367, "grad_norm": 0.1943424791097641, "learning_rate": 5.5427119720546204e-05, "loss": 0.0583, "step": 7023 }, { "epoch": 13.374227294341416, "grad_norm": 0.21266767382621765, "learning_rate": 5.542076849793586e-05, "loss": 0.0446, "step": 7024 }, { "epoch": 13.376129339039467, "grad_norm": 0.24260656535625458, "learning_rate": 5.54144172753255e-05, "loss": 0.0572, "step": 7025 }, { "epoch": 13.378031383737518, "grad_norm": 0.16277165710926056, "learning_rate": 5.5408066052715146e-05, "loss": 0.0468, "step": 7026 }, { "epoch": 13.379933428435567, "grad_norm": 0.33338937163352966, "learning_rate": 5.54017148301048e-05, "loss": 0.0645, "step": 7027 }, { "epoch": 13.381835473133618, "grad_norm": 0.19156119227409363, "learning_rate": 5.539536360749444e-05, "loss": 0.0589, "step": 7028 }, { "epoch": 13.383737517831669, "grad_norm": 0.18192563951015472, "learning_rate": 5.53890123848841e-05, "loss": 0.0793, "step": 7029 }, { "epoch": 13.38563956252972, "grad_norm": 0.10455767810344696, "learning_rate": 5.538266116227374e-05, "loss": 0.0547, "step": 7030 }, { "epoch": 13.387541607227769, "grad_norm": 0.2381904125213623, "learning_rate": 5.5376309939663384e-05, "loss": 0.0556, "step": 7031 }, { "epoch": 13.38944365192582, "grad_norm": 0.1895589977502823, "learning_rate": 5.5369958717053036e-05, "loss": 0.0442, "step": 7032 }, { "epoch": 13.39134569662387, "grad_norm": 0.11963589489459991, "learning_rate": 5.536360749444268e-05, "loss": 0.0476, "step": 7033 }, { "epoch": 13.393247741321922, "grad_norm": 0.13365913927555084, "learning_rate": 5.5357256271832326e-05, "loss": 0.0399, "step": 7034 }, { "epoch": 13.39514978601997, "grad_norm": 0.2600623369216919, "learning_rate": 5.535090504922198e-05, "loss": 0.0673, "step": 7035 }, { "epoch": 13.397051830718022, "grad_norm": 0.20314230024814606, "learning_rate": 5.534455382661162e-05, "loss": 0.0698, "step": 7036 }, { "epoch": 13.398953875416073, "grad_norm": 0.29874828457832336, "learning_rate": 5.5338202604001275e-05, "loss": 0.075, "step": 7037 }, { "epoch": 13.400855920114124, "grad_norm": 0.18833285570144653, "learning_rate": 5.533185138139092e-05, "loss": 0.0596, "step": 7038 }, { "epoch": 13.402757964812173, "grad_norm": 0.1932767629623413, "learning_rate": 5.5325500158780565e-05, "loss": 0.0531, "step": 7039 }, { "epoch": 13.404660009510224, "grad_norm": 0.2641963064670563, "learning_rate": 5.531914893617022e-05, "loss": 0.0631, "step": 7040 }, { "epoch": 13.406562054208274, "grad_norm": 0.1732291430234909, "learning_rate": 5.531279771355986e-05, "loss": 0.0409, "step": 7041 }, { "epoch": 13.408464098906324, "grad_norm": 0.13920660316944122, "learning_rate": 5.5306446490949514e-05, "loss": 0.0634, "step": 7042 }, { "epoch": 13.410366143604374, "grad_norm": 0.20135769248008728, "learning_rate": 5.530009526833916e-05, "loss": 0.0524, "step": 7043 }, { "epoch": 13.412268188302425, "grad_norm": 0.2123275250196457, "learning_rate": 5.5293744045728804e-05, "loss": 0.0617, "step": 7044 }, { "epoch": 13.414170233000476, "grad_norm": 0.18639571964740753, "learning_rate": 5.5287392823118456e-05, "loss": 0.0567, "step": 7045 }, { "epoch": 13.416072277698525, "grad_norm": 0.1802852600812912, "learning_rate": 5.52810416005081e-05, "loss": 0.0557, "step": 7046 }, { "epoch": 13.417974322396576, "grad_norm": 0.2503332793712616, "learning_rate": 5.527469037789775e-05, "loss": 0.0819, "step": 7047 }, { "epoch": 13.419876367094627, "grad_norm": 0.1591678261756897, "learning_rate": 5.52683391552874e-05, "loss": 0.0587, "step": 7048 }, { "epoch": 13.421778411792678, "grad_norm": 0.10254660993814468, "learning_rate": 5.5261987932677036e-05, "loss": 0.0609, "step": 7049 }, { "epoch": 13.423680456490727, "grad_norm": 0.24069607257843018, "learning_rate": 5.5255636710066695e-05, "loss": 0.0776, "step": 7050 }, { "epoch": 13.425582501188778, "grad_norm": 0.13990217447280884, "learning_rate": 5.524928548745634e-05, "loss": 0.0618, "step": 7051 }, { "epoch": 13.427484545886829, "grad_norm": 0.15880687534809113, "learning_rate": 5.524293426484598e-05, "loss": 0.041, "step": 7052 }, { "epoch": 13.429386590584878, "grad_norm": 0.1640283614397049, "learning_rate": 5.523658304223564e-05, "loss": 0.0569, "step": 7053 }, { "epoch": 13.431288635282929, "grad_norm": 0.11456301063299179, "learning_rate": 5.5230231819625275e-05, "loss": 0.0604, "step": 7054 }, { "epoch": 13.43319067998098, "grad_norm": 0.18163856863975525, "learning_rate": 5.5223880597014934e-05, "loss": 0.0525, "step": 7055 }, { "epoch": 13.43509272467903, "grad_norm": 0.172135591506958, "learning_rate": 5.521752937440457e-05, "loss": 0.0732, "step": 7056 }, { "epoch": 13.43699476937708, "grad_norm": 0.14534039795398712, "learning_rate": 5.521117815179422e-05, "loss": 0.0527, "step": 7057 }, { "epoch": 13.43889681407513, "grad_norm": 0.10526961088180542, "learning_rate": 5.5204826929183876e-05, "loss": 0.0452, "step": 7058 }, { "epoch": 13.440798858773181, "grad_norm": 0.2756957411766052, "learning_rate": 5.5198475706573514e-05, "loss": 0.1274, "step": 7059 }, { "epoch": 13.442700903471232, "grad_norm": 0.21903552114963531, "learning_rate": 5.519212448396317e-05, "loss": 0.0579, "step": 7060 }, { "epoch": 13.444602948169281, "grad_norm": 0.16684231162071228, "learning_rate": 5.518577326135281e-05, "loss": 0.0581, "step": 7061 }, { "epoch": 13.446504992867332, "grad_norm": 0.14972932636737823, "learning_rate": 5.5179422038742456e-05, "loss": 0.0484, "step": 7062 }, { "epoch": 13.448407037565383, "grad_norm": 0.19577839970588684, "learning_rate": 5.517307081613211e-05, "loss": 0.0649, "step": 7063 }, { "epoch": 13.450309082263432, "grad_norm": 0.21453331410884857, "learning_rate": 5.516671959352175e-05, "loss": 0.0846, "step": 7064 }, { "epoch": 13.452211126961483, "grad_norm": 0.13317766785621643, "learning_rate": 5.516036837091141e-05, "loss": 0.0544, "step": 7065 }, { "epoch": 13.454113171659534, "grad_norm": 0.15833154320716858, "learning_rate": 5.515401714830105e-05, "loss": 0.0639, "step": 7066 }, { "epoch": 13.456015216357585, "grad_norm": 0.15371407568454742, "learning_rate": 5.5147665925690695e-05, "loss": 0.0665, "step": 7067 }, { "epoch": 13.457917261055634, "grad_norm": 0.15756915509700775, "learning_rate": 5.514131470308035e-05, "loss": 0.0502, "step": 7068 }, { "epoch": 13.459819305753685, "grad_norm": 0.15613050758838654, "learning_rate": 5.513496348046999e-05, "loss": 0.0805, "step": 7069 }, { "epoch": 13.461721350451736, "grad_norm": 0.2498931884765625, "learning_rate": 5.512861225785964e-05, "loss": 0.0624, "step": 7070 }, { "epoch": 13.463623395149787, "grad_norm": 0.1730266809463501, "learning_rate": 5.512226103524929e-05, "loss": 0.0419, "step": 7071 }, { "epoch": 13.465525439847836, "grad_norm": 0.15835736691951752, "learning_rate": 5.5115909812638934e-05, "loss": 0.0533, "step": 7072 }, { "epoch": 13.467427484545887, "grad_norm": 0.22773846983909607, "learning_rate": 5.5109558590028586e-05, "loss": 0.0575, "step": 7073 }, { "epoch": 13.469329529243938, "grad_norm": 0.2337859719991684, "learning_rate": 5.510320736741823e-05, "loss": 0.0611, "step": 7074 }, { "epoch": 13.471231573941987, "grad_norm": 0.31625816226005554, "learning_rate": 5.5096856144807876e-05, "loss": 0.0967, "step": 7075 }, { "epoch": 13.473133618640038, "grad_norm": 0.18602289259433746, "learning_rate": 5.509050492219753e-05, "loss": 0.055, "step": 7076 }, { "epoch": 13.475035663338089, "grad_norm": 0.16424252092838287, "learning_rate": 5.508415369958717e-05, "loss": 0.0407, "step": 7077 }, { "epoch": 13.47693770803614, "grad_norm": 0.1765555590391159, "learning_rate": 5.5077802476976824e-05, "loss": 0.0674, "step": 7078 }, { "epoch": 13.478839752734189, "grad_norm": 0.26882824301719666, "learning_rate": 5.507145125436647e-05, "loss": 0.0779, "step": 7079 }, { "epoch": 13.48074179743224, "grad_norm": 0.320591002702713, "learning_rate": 5.5065100031756115e-05, "loss": 0.1318, "step": 7080 }, { "epoch": 13.48264384213029, "grad_norm": 0.1425531655550003, "learning_rate": 5.5058748809145766e-05, "loss": 0.0624, "step": 7081 }, { "epoch": 13.484545886828341, "grad_norm": 0.2594924569129944, "learning_rate": 5.505239758653541e-05, "loss": 0.0806, "step": 7082 }, { "epoch": 13.48644793152639, "grad_norm": 0.18772482872009277, "learning_rate": 5.504604636392506e-05, "loss": 0.0777, "step": 7083 }, { "epoch": 13.488349976224441, "grad_norm": 0.29713594913482666, "learning_rate": 5.503969514131471e-05, "loss": 0.102, "step": 7084 }, { "epoch": 13.490252020922492, "grad_norm": 0.21502874791622162, "learning_rate": 5.503334391870435e-05, "loss": 0.0693, "step": 7085 }, { "epoch": 13.492154065620543, "grad_norm": 0.162057027220726, "learning_rate": 5.5026992696094005e-05, "loss": 0.0544, "step": 7086 }, { "epoch": 13.494056110318592, "grad_norm": 0.22181911766529083, "learning_rate": 5.502064147348365e-05, "loss": 0.0667, "step": 7087 }, { "epoch": 13.495958155016643, "grad_norm": 0.15365485846996307, "learning_rate": 5.501429025087329e-05, "loss": 0.0605, "step": 7088 }, { "epoch": 13.497860199714694, "grad_norm": 0.10755611211061478, "learning_rate": 5.500793902826295e-05, "loss": 0.0389, "step": 7089 }, { "epoch": 13.499762244412743, "grad_norm": 0.2021094262599945, "learning_rate": 5.5001587805652586e-05, "loss": 0.06, "step": 7090 }, { "epoch": 13.501664289110794, "grad_norm": 0.19792579114437103, "learning_rate": 5.4995236583042244e-05, "loss": 0.0547, "step": 7091 }, { "epoch": 13.503566333808845, "grad_norm": 0.20431144535541534, "learning_rate": 5.498888536043188e-05, "loss": 0.0752, "step": 7092 }, { "epoch": 13.505468378506896, "grad_norm": 0.22182844579219818, "learning_rate": 5.498253413782153e-05, "loss": 0.0724, "step": 7093 }, { "epoch": 13.507370423204945, "grad_norm": 0.17189350724220276, "learning_rate": 5.4976182915211186e-05, "loss": 0.0493, "step": 7094 }, { "epoch": 13.509272467902996, "grad_norm": 0.27690860629081726, "learning_rate": 5.4969831692600824e-05, "loss": 0.0584, "step": 7095 }, { "epoch": 13.511174512601047, "grad_norm": 0.15486986935138702, "learning_rate": 5.496348046999048e-05, "loss": 0.0559, "step": 7096 }, { "epoch": 13.513076557299097, "grad_norm": 0.2064022719860077, "learning_rate": 5.495712924738012e-05, "loss": 0.0689, "step": 7097 }, { "epoch": 13.514978601997147, "grad_norm": 0.22928853332996368, "learning_rate": 5.4950778024769766e-05, "loss": 0.0589, "step": 7098 }, { "epoch": 13.516880646695197, "grad_norm": 0.20014768838882446, "learning_rate": 5.494442680215942e-05, "loss": 0.0652, "step": 7099 }, { "epoch": 13.518782691393248, "grad_norm": 0.17018628120422363, "learning_rate": 5.493807557954906e-05, "loss": 0.0513, "step": 7100 }, { "epoch": 13.520684736091297, "grad_norm": 0.23920871317386627, "learning_rate": 5.4931724356938715e-05, "loss": 0.0442, "step": 7101 }, { "epoch": 13.522586780789348, "grad_norm": 0.21291278302669525, "learning_rate": 5.492537313432836e-05, "loss": 0.0662, "step": 7102 }, { "epoch": 13.5244888254874, "grad_norm": 0.17782357335090637, "learning_rate": 5.4919021911718005e-05, "loss": 0.0644, "step": 7103 }, { "epoch": 13.52639087018545, "grad_norm": 0.26954326033592224, "learning_rate": 5.491267068910766e-05, "loss": 0.0803, "step": 7104 }, { "epoch": 13.5282929148835, "grad_norm": 0.14195388555526733, "learning_rate": 5.49063194664973e-05, "loss": 0.0536, "step": 7105 }, { "epoch": 13.53019495958155, "grad_norm": 0.22725562751293182, "learning_rate": 5.489996824388695e-05, "loss": 0.0708, "step": 7106 }, { "epoch": 13.532097004279601, "grad_norm": 0.2652048170566559, "learning_rate": 5.48936170212766e-05, "loss": 0.0842, "step": 7107 }, { "epoch": 13.533999048977652, "grad_norm": 0.1272173821926117, "learning_rate": 5.4887265798666244e-05, "loss": 0.0599, "step": 7108 }, { "epoch": 13.535901093675701, "grad_norm": 0.15636959671974182, "learning_rate": 5.4880914576055896e-05, "loss": 0.0456, "step": 7109 }, { "epoch": 13.537803138373752, "grad_norm": 0.22358985245227814, "learning_rate": 5.487456335344554e-05, "loss": 0.0626, "step": 7110 }, { "epoch": 13.539705183071803, "grad_norm": 0.2860788404941559, "learning_rate": 5.4868212130835186e-05, "loss": 0.0769, "step": 7111 }, { "epoch": 13.541607227769852, "grad_norm": 0.25277575850486755, "learning_rate": 5.486186090822484e-05, "loss": 0.0626, "step": 7112 }, { "epoch": 13.543509272467903, "grad_norm": 0.16722799837589264, "learning_rate": 5.485550968561448e-05, "loss": 0.0617, "step": 7113 }, { "epoch": 13.545411317165954, "grad_norm": 0.22804458439350128, "learning_rate": 5.4849158463004135e-05, "loss": 0.0553, "step": 7114 }, { "epoch": 13.547313361864004, "grad_norm": 0.18392793834209442, "learning_rate": 5.484280724039378e-05, "loss": 0.0818, "step": 7115 }, { "epoch": 13.549215406562054, "grad_norm": 0.13310854136943817, "learning_rate": 5.4836456017783425e-05, "loss": 0.0487, "step": 7116 }, { "epoch": 13.551117451260104, "grad_norm": 0.1897313892841339, "learning_rate": 5.483010479517308e-05, "loss": 0.0733, "step": 7117 }, { "epoch": 13.553019495958155, "grad_norm": 0.15019789338111877, "learning_rate": 5.482375357256272e-05, "loss": 0.0605, "step": 7118 }, { "epoch": 13.554921540656206, "grad_norm": 0.17896315455436707, "learning_rate": 5.4817402349952374e-05, "loss": 0.0587, "step": 7119 }, { "epoch": 13.556823585354255, "grad_norm": 0.16350199282169342, "learning_rate": 5.481105112734202e-05, "loss": 0.0338, "step": 7120 }, { "epoch": 13.558725630052306, "grad_norm": 0.28143972158432007, "learning_rate": 5.480469990473166e-05, "loss": 0.0888, "step": 7121 }, { "epoch": 13.560627674750357, "grad_norm": 0.18174341320991516, "learning_rate": 5.4798348682121316e-05, "loss": 0.067, "step": 7122 }, { "epoch": 13.562529719448406, "grad_norm": 0.12471450120210648, "learning_rate": 5.479199745951096e-05, "loss": 0.0483, "step": 7123 }, { "epoch": 13.564431764146457, "grad_norm": 0.20541246235370636, "learning_rate": 5.47856462369006e-05, "loss": 0.0655, "step": 7124 }, { "epoch": 13.566333808844508, "grad_norm": 0.16435562074184418, "learning_rate": 5.477929501429026e-05, "loss": 0.0533, "step": 7125 }, { "epoch": 13.568235853542559, "grad_norm": 0.14860351383686066, "learning_rate": 5.4772943791679896e-05, "loss": 0.053, "step": 7126 }, { "epoch": 13.570137898240608, "grad_norm": 0.1912887692451477, "learning_rate": 5.4766592569069555e-05, "loss": 0.0588, "step": 7127 }, { "epoch": 13.572039942938659, "grad_norm": 0.17781256139278412, "learning_rate": 5.476024134645919e-05, "loss": 0.0578, "step": 7128 }, { "epoch": 13.57394198763671, "grad_norm": 0.18353131413459778, "learning_rate": 5.475389012384884e-05, "loss": 0.0493, "step": 7129 }, { "epoch": 13.57584403233476, "grad_norm": 0.1685384213924408, "learning_rate": 5.474753890123849e-05, "loss": 0.0658, "step": 7130 }, { "epoch": 13.57774607703281, "grad_norm": 0.18952107429504395, "learning_rate": 5.4741187678628135e-05, "loss": 0.0557, "step": 7131 }, { "epoch": 13.57964812173086, "grad_norm": 0.1624719798564911, "learning_rate": 5.4734836456017794e-05, "loss": 0.0479, "step": 7132 }, { "epoch": 13.581550166428912, "grad_norm": 0.14944607019424438, "learning_rate": 5.472848523340743e-05, "loss": 0.0606, "step": 7133 }, { "epoch": 13.58345221112696, "grad_norm": 0.27262911200523376, "learning_rate": 5.472213401079708e-05, "loss": 0.0751, "step": 7134 }, { "epoch": 13.585354255825012, "grad_norm": 0.26825249195098877, "learning_rate": 5.471578278818673e-05, "loss": 0.0594, "step": 7135 }, { "epoch": 13.587256300523062, "grad_norm": 0.20710812509059906, "learning_rate": 5.4709431565576374e-05, "loss": 0.0744, "step": 7136 }, { "epoch": 13.589158345221113, "grad_norm": 0.27290740609169006, "learning_rate": 5.4703080342966026e-05, "loss": 0.0731, "step": 7137 }, { "epoch": 13.591060389919162, "grad_norm": 0.3027440905570984, "learning_rate": 5.469672912035567e-05, "loss": 0.0826, "step": 7138 }, { "epoch": 13.592962434617213, "grad_norm": 0.13028496503829956, "learning_rate": 5.4690377897745316e-05, "loss": 0.0417, "step": 7139 }, { "epoch": 13.594864479315264, "grad_norm": 0.22668135166168213, "learning_rate": 5.468402667513497e-05, "loss": 0.0577, "step": 7140 }, { "epoch": 13.596766524013315, "grad_norm": 0.20752792060375214, "learning_rate": 5.467767545252461e-05, "loss": 0.0867, "step": 7141 }, { "epoch": 13.598668568711364, "grad_norm": 0.18179014325141907, "learning_rate": 5.467132422991426e-05, "loss": 0.0604, "step": 7142 }, { "epoch": 13.600570613409415, "grad_norm": 0.2199116200208664, "learning_rate": 5.466497300730391e-05, "loss": 0.0552, "step": 7143 }, { "epoch": 13.602472658107466, "grad_norm": 0.36437416076660156, "learning_rate": 5.4658621784693555e-05, "loss": 0.0819, "step": 7144 }, { "epoch": 13.604374702805515, "grad_norm": 0.17574447393417358, "learning_rate": 5.4652270562083207e-05, "loss": 0.0728, "step": 7145 }, { "epoch": 13.606276747503566, "grad_norm": 0.13370124995708466, "learning_rate": 5.464591933947285e-05, "loss": 0.065, "step": 7146 }, { "epoch": 13.608178792201617, "grad_norm": 0.18489646911621094, "learning_rate": 5.46395681168625e-05, "loss": 0.0566, "step": 7147 }, { "epoch": 13.610080836899668, "grad_norm": 0.08928623795509338, "learning_rate": 5.463321689425215e-05, "loss": 0.0459, "step": 7148 }, { "epoch": 13.611982881597717, "grad_norm": 0.11430780589580536, "learning_rate": 5.4626865671641794e-05, "loss": 0.0611, "step": 7149 }, { "epoch": 13.613884926295768, "grad_norm": 0.2574712038040161, "learning_rate": 5.4620514449031445e-05, "loss": 0.0737, "step": 7150 }, { "epoch": 13.615786970993819, "grad_norm": 0.2580837607383728, "learning_rate": 5.461416322642109e-05, "loss": 0.063, "step": 7151 }, { "epoch": 13.61768901569187, "grad_norm": 0.2556533217430115, "learning_rate": 5.4607812003810736e-05, "loss": 0.0767, "step": 7152 }, { "epoch": 13.619591060389919, "grad_norm": 0.2444048970937729, "learning_rate": 5.460146078120039e-05, "loss": 0.0945, "step": 7153 }, { "epoch": 13.62149310508797, "grad_norm": 0.2365688532590866, "learning_rate": 5.459510955859003e-05, "loss": 0.0624, "step": 7154 }, { "epoch": 13.62339514978602, "grad_norm": 0.24796128273010254, "learning_rate": 5.4588758335979684e-05, "loss": 0.0612, "step": 7155 }, { "epoch": 13.62529719448407, "grad_norm": 0.32208365201950073, "learning_rate": 5.458240711336933e-05, "loss": 0.0815, "step": 7156 }, { "epoch": 13.62719923918212, "grad_norm": 0.2308531254529953, "learning_rate": 5.457605589075897e-05, "loss": 0.0671, "step": 7157 }, { "epoch": 13.629101283880171, "grad_norm": 0.13749493658542633, "learning_rate": 5.4569704668148626e-05, "loss": 0.0526, "step": 7158 }, { "epoch": 13.631003328578222, "grad_norm": 0.24286620318889618, "learning_rate": 5.4563353445538265e-05, "loss": 0.0526, "step": 7159 }, { "epoch": 13.632905373276271, "grad_norm": 0.17536072432994843, "learning_rate": 5.455700222292791e-05, "loss": 0.049, "step": 7160 }, { "epoch": 13.634807417974322, "grad_norm": 0.1444406658411026, "learning_rate": 5.455065100031757e-05, "loss": 0.051, "step": 7161 }, { "epoch": 13.636709462672373, "grad_norm": 0.2647935450077057, "learning_rate": 5.4544299777707206e-05, "loss": 0.0689, "step": 7162 }, { "epoch": 13.638611507370424, "grad_norm": 0.14098279178142548, "learning_rate": 5.4537948555096865e-05, "loss": 0.0579, "step": 7163 }, { "epoch": 13.640513552068473, "grad_norm": 0.18277288973331451, "learning_rate": 5.4531597332486503e-05, "loss": 0.096, "step": 7164 }, { "epoch": 13.642415596766524, "grad_norm": 0.17182549834251404, "learning_rate": 5.452524610987615e-05, "loss": 0.0591, "step": 7165 }, { "epoch": 13.644317641464575, "grad_norm": 0.1310691237449646, "learning_rate": 5.45188948872658e-05, "loss": 0.0724, "step": 7166 }, { "epoch": 13.646219686162624, "grad_norm": 0.16267623007297516, "learning_rate": 5.4512543664655445e-05, "loss": 0.078, "step": 7167 }, { "epoch": 13.648121730860675, "grad_norm": 0.15424087643623352, "learning_rate": 5.4506192442045104e-05, "loss": 0.0704, "step": 7168 }, { "epoch": 13.650023775558726, "grad_norm": 0.13465258479118347, "learning_rate": 5.449984121943474e-05, "loss": 0.0765, "step": 7169 }, { "epoch": 13.651925820256777, "grad_norm": 0.19921620190143585, "learning_rate": 5.449348999682439e-05, "loss": 0.0626, "step": 7170 }, { "epoch": 13.653827864954826, "grad_norm": 0.1628495454788208, "learning_rate": 5.448713877421404e-05, "loss": 0.0812, "step": 7171 }, { "epoch": 13.655729909652877, "grad_norm": 0.3022763729095459, "learning_rate": 5.4480787551603684e-05, "loss": 0.0574, "step": 7172 }, { "epoch": 13.657631954350927, "grad_norm": 0.14462687075138092, "learning_rate": 5.4474436328993336e-05, "loss": 0.0693, "step": 7173 }, { "epoch": 13.659533999048978, "grad_norm": 0.24949683248996735, "learning_rate": 5.446808510638298e-05, "loss": 0.0732, "step": 7174 }, { "epoch": 13.661436043747027, "grad_norm": 0.11945445835590363, "learning_rate": 5.4461733883772626e-05, "loss": 0.0692, "step": 7175 }, { "epoch": 13.663338088445078, "grad_norm": 0.22510592639446259, "learning_rate": 5.445538266116228e-05, "loss": 0.0531, "step": 7176 }, { "epoch": 13.66524013314313, "grad_norm": 0.1811244785785675, "learning_rate": 5.444903143855192e-05, "loss": 0.0602, "step": 7177 }, { "epoch": 13.667142177841178, "grad_norm": 0.299470454454422, "learning_rate": 5.444268021594157e-05, "loss": 0.056, "step": 7178 }, { "epoch": 13.66904422253923, "grad_norm": 0.15481477975845337, "learning_rate": 5.443632899333122e-05, "loss": 0.046, "step": 7179 }, { "epoch": 13.67094626723728, "grad_norm": 0.15388630330562592, "learning_rate": 5.4429977770720865e-05, "loss": 0.0484, "step": 7180 }, { "epoch": 13.672848311935331, "grad_norm": 0.25386807322502136, "learning_rate": 5.442362654811052e-05, "loss": 0.0654, "step": 7181 }, { "epoch": 13.67475035663338, "grad_norm": 0.18215690553188324, "learning_rate": 5.441727532550016e-05, "loss": 0.0591, "step": 7182 }, { "epoch": 13.676652401331431, "grad_norm": 0.1830330491065979, "learning_rate": 5.441092410288981e-05, "loss": 0.0577, "step": 7183 }, { "epoch": 13.678554446029482, "grad_norm": 0.21742892265319824, "learning_rate": 5.440457288027946e-05, "loss": 0.0697, "step": 7184 }, { "epoch": 13.680456490727533, "grad_norm": 0.34482452273368835, "learning_rate": 5.4398221657669104e-05, "loss": 0.0628, "step": 7185 }, { "epoch": 13.682358535425582, "grad_norm": 0.1803099662065506, "learning_rate": 5.4391870435058756e-05, "loss": 0.0586, "step": 7186 }, { "epoch": 13.684260580123633, "grad_norm": 0.2071414440870285, "learning_rate": 5.43855192124484e-05, "loss": 0.0537, "step": 7187 }, { "epoch": 13.686162624821684, "grad_norm": 0.14682170748710632, "learning_rate": 5.437916798983804e-05, "loss": 0.0621, "step": 7188 }, { "epoch": 13.688064669519733, "grad_norm": 0.19556795060634613, "learning_rate": 5.43728167672277e-05, "loss": 0.0524, "step": 7189 }, { "epoch": 13.689966714217784, "grad_norm": 0.2657491862773895, "learning_rate": 5.436646554461734e-05, "loss": 0.0598, "step": 7190 }, { "epoch": 13.691868758915835, "grad_norm": 0.28120002150535583, "learning_rate": 5.4360114322006995e-05, "loss": 0.0694, "step": 7191 }, { "epoch": 13.693770803613885, "grad_norm": 0.1710984855890274, "learning_rate": 5.435376309939664e-05, "loss": 0.0557, "step": 7192 }, { "epoch": 13.695672848311935, "grad_norm": 0.1776714026927948, "learning_rate": 5.434741187678628e-05, "loss": 0.0666, "step": 7193 }, { "epoch": 13.697574893009985, "grad_norm": 0.1420143097639084, "learning_rate": 5.434106065417594e-05, "loss": 0.0581, "step": 7194 }, { "epoch": 13.699476937708036, "grad_norm": 0.2275141328573227, "learning_rate": 5.4334709431565575e-05, "loss": 0.0704, "step": 7195 }, { "epoch": 13.701378982406087, "grad_norm": 0.20450417697429657, "learning_rate": 5.432835820895522e-05, "loss": 0.0861, "step": 7196 }, { "epoch": 13.703281027104136, "grad_norm": 0.2755592167377472, "learning_rate": 5.432200698634488e-05, "loss": 0.0699, "step": 7197 }, { "epoch": 13.705183071802187, "grad_norm": 0.1973126381635666, "learning_rate": 5.431565576373452e-05, "loss": 0.0798, "step": 7198 }, { "epoch": 13.707085116500238, "grad_norm": 0.3487046957015991, "learning_rate": 5.4309304541124176e-05, "loss": 0.0843, "step": 7199 }, { "epoch": 13.708987161198289, "grad_norm": 0.2203672230243683, "learning_rate": 5.4302953318513814e-05, "loss": 0.0599, "step": 7200 }, { "epoch": 13.710889205896338, "grad_norm": 0.29556646943092346, "learning_rate": 5.429660209590346e-05, "loss": 0.076, "step": 7201 }, { "epoch": 13.712791250594389, "grad_norm": 0.16539618372917175, "learning_rate": 5.429025087329311e-05, "loss": 0.0591, "step": 7202 }, { "epoch": 13.71469329529244, "grad_norm": 0.2217624932527542, "learning_rate": 5.4283899650682756e-05, "loss": 0.0483, "step": 7203 }, { "epoch": 13.716595339990489, "grad_norm": 0.25081562995910645, "learning_rate": 5.4277548428072414e-05, "loss": 0.0707, "step": 7204 }, { "epoch": 13.71849738468854, "grad_norm": 0.17247477173805237, "learning_rate": 5.427119720546205e-05, "loss": 0.0644, "step": 7205 }, { "epoch": 13.72039942938659, "grad_norm": 0.11861587315797806, "learning_rate": 5.42648459828517e-05, "loss": 0.0671, "step": 7206 }, { "epoch": 13.722301474084642, "grad_norm": 0.20958885550498962, "learning_rate": 5.425849476024135e-05, "loss": 0.0687, "step": 7207 }, { "epoch": 13.72420351878269, "grad_norm": 0.20735900104045868, "learning_rate": 5.4252143537630995e-05, "loss": 0.0612, "step": 7208 }, { "epoch": 13.726105563480742, "grad_norm": 0.16221395134925842, "learning_rate": 5.4245792315020647e-05, "loss": 0.0706, "step": 7209 }, { "epoch": 13.728007608178793, "grad_norm": 0.24386169016361237, "learning_rate": 5.423944109241029e-05, "loss": 0.0941, "step": 7210 }, { "epoch": 13.729909652876843, "grad_norm": 0.12547758221626282, "learning_rate": 5.423308986979994e-05, "loss": 0.0663, "step": 7211 }, { "epoch": 13.731811697574893, "grad_norm": 0.21748408675193787, "learning_rate": 5.422673864718959e-05, "loss": 0.0669, "step": 7212 }, { "epoch": 13.733713742272943, "grad_norm": 0.3474538028240204, "learning_rate": 5.4220387424579234e-05, "loss": 0.0571, "step": 7213 }, { "epoch": 13.735615786970994, "grad_norm": 0.20235182344913483, "learning_rate": 5.421403620196888e-05, "loss": 0.0567, "step": 7214 }, { "epoch": 13.737517831669045, "grad_norm": 0.19568586349487305, "learning_rate": 5.420768497935853e-05, "loss": 0.0629, "step": 7215 }, { "epoch": 13.739419876367094, "grad_norm": 0.238690584897995, "learning_rate": 5.4201333756748176e-05, "loss": 0.0619, "step": 7216 }, { "epoch": 13.741321921065145, "grad_norm": 0.0995822325348854, "learning_rate": 5.419498253413783e-05, "loss": 0.0699, "step": 7217 }, { "epoch": 13.743223965763196, "grad_norm": 0.18771526217460632, "learning_rate": 5.418863131152747e-05, "loss": 0.0622, "step": 7218 }, { "epoch": 13.745126010461245, "grad_norm": 0.2403172105550766, "learning_rate": 5.418228008891712e-05, "loss": 0.061, "step": 7219 }, { "epoch": 13.747028055159296, "grad_norm": 0.3270457983016968, "learning_rate": 5.417592886630677e-05, "loss": 0.0787, "step": 7220 }, { "epoch": 13.748930099857347, "grad_norm": 0.2265048325061798, "learning_rate": 5.4169577643696414e-05, "loss": 0.06, "step": 7221 }, { "epoch": 13.750832144555398, "grad_norm": 0.18946008384227753, "learning_rate": 5.4163226421086066e-05, "loss": 0.0538, "step": 7222 }, { "epoch": 13.752734189253447, "grad_norm": 0.22583557665348053, "learning_rate": 5.415687519847571e-05, "loss": 0.0624, "step": 7223 }, { "epoch": 13.754636233951498, "grad_norm": 0.2988790273666382, "learning_rate": 5.415052397586535e-05, "loss": 0.0755, "step": 7224 }, { "epoch": 13.756538278649549, "grad_norm": 0.24329178035259247, "learning_rate": 5.414417275325501e-05, "loss": 0.049, "step": 7225 }, { "epoch": 13.7584403233476, "grad_norm": 0.13913269340991974, "learning_rate": 5.413782153064465e-05, "loss": 0.0712, "step": 7226 }, { "epoch": 13.760342368045649, "grad_norm": 0.23898306488990784, "learning_rate": 5.4131470308034305e-05, "loss": 0.0507, "step": 7227 }, { "epoch": 13.7622444127437, "grad_norm": 0.14138242602348328, "learning_rate": 5.412511908542395e-05, "loss": 0.0596, "step": 7228 }, { "epoch": 13.76414645744175, "grad_norm": 0.20261605083942413, "learning_rate": 5.411876786281359e-05, "loss": 0.0539, "step": 7229 }, { "epoch": 13.7660485021398, "grad_norm": 0.16496528685092926, "learning_rate": 5.411241664020325e-05, "loss": 0.0649, "step": 7230 }, { "epoch": 13.76795054683785, "grad_norm": 0.3304959535598755, "learning_rate": 5.4106065417592885e-05, "loss": 0.0644, "step": 7231 }, { "epoch": 13.769852591535901, "grad_norm": 0.2712630033493042, "learning_rate": 5.409971419498253e-05, "loss": 0.0699, "step": 7232 }, { "epoch": 13.771754636233952, "grad_norm": 0.21757693588733673, "learning_rate": 5.409336297237219e-05, "loss": 0.0727, "step": 7233 }, { "epoch": 13.773656680932001, "grad_norm": 0.1744980663061142, "learning_rate": 5.408701174976183e-05, "loss": 0.037, "step": 7234 }, { "epoch": 13.775558725630052, "grad_norm": 0.23627837002277374, "learning_rate": 5.4080660527151486e-05, "loss": 0.0585, "step": 7235 }, { "epoch": 13.777460770328103, "grad_norm": 0.19850653409957886, "learning_rate": 5.4074309304541124e-05, "loss": 0.0629, "step": 7236 }, { "epoch": 13.779362815026154, "grad_norm": 0.18019446730613708, "learning_rate": 5.406795808193077e-05, "loss": 0.0442, "step": 7237 }, { "epoch": 13.781264859724203, "grad_norm": 0.26067209243774414, "learning_rate": 5.406160685932042e-05, "loss": 0.064, "step": 7238 }, { "epoch": 13.783166904422254, "grad_norm": 0.27970629930496216, "learning_rate": 5.4055255636710066e-05, "loss": 0.0782, "step": 7239 }, { "epoch": 13.785068949120305, "grad_norm": 0.2066178172826767, "learning_rate": 5.404890441409972e-05, "loss": 0.0663, "step": 7240 }, { "epoch": 13.786970993818354, "grad_norm": 0.18839718401432037, "learning_rate": 5.404255319148936e-05, "loss": 0.0667, "step": 7241 }, { "epoch": 13.788873038516405, "grad_norm": 0.274984747171402, "learning_rate": 5.403620196887901e-05, "loss": 0.0826, "step": 7242 }, { "epoch": 13.790775083214456, "grad_norm": 0.1409456431865692, "learning_rate": 5.402985074626866e-05, "loss": 0.0455, "step": 7243 }, { "epoch": 13.792677127912507, "grad_norm": 0.2696741819381714, "learning_rate": 5.4023499523658305e-05, "loss": 0.063, "step": 7244 }, { "epoch": 13.794579172610556, "grad_norm": 0.21434366703033447, "learning_rate": 5.401714830104796e-05, "loss": 0.0543, "step": 7245 }, { "epoch": 13.796481217308607, "grad_norm": 0.20650018751621246, "learning_rate": 5.40107970784376e-05, "loss": 0.0513, "step": 7246 }, { "epoch": 13.798383262006658, "grad_norm": 0.18018142879009247, "learning_rate": 5.400444585582725e-05, "loss": 0.0466, "step": 7247 }, { "epoch": 13.800285306704708, "grad_norm": 0.14728957414627075, "learning_rate": 5.39980946332169e-05, "loss": 0.0455, "step": 7248 }, { "epoch": 13.802187351402758, "grad_norm": 0.18369527161121368, "learning_rate": 5.3991743410606544e-05, "loss": 0.049, "step": 7249 }, { "epoch": 13.804089396100808, "grad_norm": 0.15343110263347626, "learning_rate": 5.398539218799619e-05, "loss": 0.044, "step": 7250 }, { "epoch": 13.80599144079886, "grad_norm": 0.19930998980998993, "learning_rate": 5.397904096538584e-05, "loss": 0.0623, "step": 7251 }, { "epoch": 13.807893485496908, "grad_norm": 0.15696053206920624, "learning_rate": 5.3972689742775486e-05, "loss": 0.0719, "step": 7252 }, { "epoch": 13.80979553019496, "grad_norm": 0.19954028725624084, "learning_rate": 5.396633852016514e-05, "loss": 0.0411, "step": 7253 }, { "epoch": 13.81169757489301, "grad_norm": 0.3664581775665283, "learning_rate": 5.395998729755478e-05, "loss": 0.1245, "step": 7254 }, { "epoch": 13.813599619591061, "grad_norm": 0.2724677622318268, "learning_rate": 5.395363607494443e-05, "loss": 0.0596, "step": 7255 }, { "epoch": 13.81550166428911, "grad_norm": 0.2526918053627014, "learning_rate": 5.394728485233408e-05, "loss": 0.0606, "step": 7256 }, { "epoch": 13.817403708987161, "grad_norm": 0.2746874988079071, "learning_rate": 5.3940933629723725e-05, "loss": 0.1144, "step": 7257 }, { "epoch": 13.819305753685212, "grad_norm": 0.2281782627105713, "learning_rate": 5.393458240711338e-05, "loss": 0.0913, "step": 7258 }, { "epoch": 13.821207798383263, "grad_norm": 0.2360704243183136, "learning_rate": 5.392823118450302e-05, "loss": 0.0608, "step": 7259 }, { "epoch": 13.823109843081312, "grad_norm": 0.23050999641418457, "learning_rate": 5.392187996189266e-05, "loss": 0.0475, "step": 7260 }, { "epoch": 13.825011887779363, "grad_norm": 0.16015152633190155, "learning_rate": 5.391552873928232e-05, "loss": 0.0437, "step": 7261 }, { "epoch": 13.826913932477414, "grad_norm": 0.12719005346298218, "learning_rate": 5.3909177516671964e-05, "loss": 0.0702, "step": 7262 }, { "epoch": 13.828815977175463, "grad_norm": 0.21993082761764526, "learning_rate": 5.3902826294061616e-05, "loss": 0.0671, "step": 7263 }, { "epoch": 13.830718021873514, "grad_norm": 0.20380781590938568, "learning_rate": 5.389647507145126e-05, "loss": 0.055, "step": 7264 }, { "epoch": 13.832620066571565, "grad_norm": 0.29961729049682617, "learning_rate": 5.38901238488409e-05, "loss": 0.0689, "step": 7265 }, { "epoch": 13.834522111269616, "grad_norm": 0.2058323621749878, "learning_rate": 5.388377262623056e-05, "loss": 0.0671, "step": 7266 }, { "epoch": 13.836424155967665, "grad_norm": 0.33653509616851807, "learning_rate": 5.3877421403620196e-05, "loss": 0.0624, "step": 7267 }, { "epoch": 13.838326200665716, "grad_norm": 0.13654020428657532, "learning_rate": 5.387107018100984e-05, "loss": 0.0493, "step": 7268 }, { "epoch": 13.840228245363766, "grad_norm": 0.31805330514907837, "learning_rate": 5.386471895839949e-05, "loss": 0.0672, "step": 7269 }, { "epoch": 13.842130290061817, "grad_norm": 0.1884007602930069, "learning_rate": 5.385836773578914e-05, "loss": 0.0721, "step": 7270 }, { "epoch": 13.844032334759866, "grad_norm": 0.2302660197019577, "learning_rate": 5.3852016513178796e-05, "loss": 0.0655, "step": 7271 }, { "epoch": 13.845934379457917, "grad_norm": 0.11893751472234726, "learning_rate": 5.3845665290568435e-05, "loss": 0.05, "step": 7272 }, { "epoch": 13.847836424155968, "grad_norm": 0.18107381463050842, "learning_rate": 5.383931406795808e-05, "loss": 0.0645, "step": 7273 }, { "epoch": 13.849738468854017, "grad_norm": 0.2402561604976654, "learning_rate": 5.383296284534773e-05, "loss": 0.0692, "step": 7274 }, { "epoch": 13.851640513552068, "grad_norm": 0.1792139858007431, "learning_rate": 5.382661162273738e-05, "loss": 0.0536, "step": 7275 }, { "epoch": 13.853542558250119, "grad_norm": 0.2682904005050659, "learning_rate": 5.382026040012703e-05, "loss": 0.0549, "step": 7276 }, { "epoch": 13.85544460294817, "grad_norm": 0.2266082614660263, "learning_rate": 5.3813909177516674e-05, "loss": 0.069, "step": 7277 }, { "epoch": 13.857346647646219, "grad_norm": 0.2881041467189789, "learning_rate": 5.380755795490632e-05, "loss": 0.0659, "step": 7278 }, { "epoch": 13.85924869234427, "grad_norm": 0.30093881487846375, "learning_rate": 5.380120673229597e-05, "loss": 0.0783, "step": 7279 }, { "epoch": 13.86115073704232, "grad_norm": 0.2137882113456726, "learning_rate": 5.3794855509685616e-05, "loss": 0.0688, "step": 7280 }, { "epoch": 13.863052781740372, "grad_norm": 0.2526339888572693, "learning_rate": 5.378850428707527e-05, "loss": 0.0603, "step": 7281 }, { "epoch": 13.86495482643842, "grad_norm": 0.20394502580165863, "learning_rate": 5.378215306446491e-05, "loss": 0.0677, "step": 7282 }, { "epoch": 13.866856871136472, "grad_norm": 0.15528029203414917, "learning_rate": 5.377580184185456e-05, "loss": 0.0619, "step": 7283 }, { "epoch": 13.868758915834523, "grad_norm": 0.21263182163238525, "learning_rate": 5.376945061924421e-05, "loss": 0.0735, "step": 7284 }, { "epoch": 13.870660960532572, "grad_norm": 0.2879989743232727, "learning_rate": 5.3763099396633854e-05, "loss": 0.0747, "step": 7285 }, { "epoch": 13.872563005230623, "grad_norm": 0.25435084104537964, "learning_rate": 5.37567481740235e-05, "loss": 0.058, "step": 7286 }, { "epoch": 13.874465049928673, "grad_norm": 0.15193717181682587, "learning_rate": 5.375039695141315e-05, "loss": 0.0609, "step": 7287 }, { "epoch": 13.876367094626724, "grad_norm": 0.20760442316532135, "learning_rate": 5.3744045728802796e-05, "loss": 0.0512, "step": 7288 }, { "epoch": 13.878269139324773, "grad_norm": 0.16267850995063782, "learning_rate": 5.373769450619245e-05, "loss": 0.0602, "step": 7289 }, { "epoch": 13.880171184022824, "grad_norm": 0.2610892951488495, "learning_rate": 5.373134328358209e-05, "loss": 0.0633, "step": 7290 }, { "epoch": 13.882073228720875, "grad_norm": 0.22512303292751312, "learning_rate": 5.372499206097173e-05, "loss": 0.0598, "step": 7291 }, { "epoch": 13.883975273418926, "grad_norm": 0.23303070664405823, "learning_rate": 5.371864083836139e-05, "loss": 0.0702, "step": 7292 }, { "epoch": 13.885877318116975, "grad_norm": 0.19913730025291443, "learning_rate": 5.3712289615751035e-05, "loss": 0.0662, "step": 7293 }, { "epoch": 13.887779362815026, "grad_norm": 0.30238837003707886, "learning_rate": 5.370593839314069e-05, "loss": 0.0923, "step": 7294 }, { "epoch": 13.889681407513077, "grad_norm": 0.22719161212444305, "learning_rate": 5.369958717053033e-05, "loss": 0.0577, "step": 7295 }, { "epoch": 13.891583452211126, "grad_norm": 0.19003400206565857, "learning_rate": 5.369323594791997e-05, "loss": 0.06, "step": 7296 }, { "epoch": 13.893485496909177, "grad_norm": 0.20794767141342163, "learning_rate": 5.368688472530963e-05, "loss": 0.0596, "step": 7297 }, { "epoch": 13.895387541607228, "grad_norm": 0.14656628668308258, "learning_rate": 5.368053350269927e-05, "loss": 0.0594, "step": 7298 }, { "epoch": 13.897289586305279, "grad_norm": 0.1808067113161087, "learning_rate": 5.3674182280088926e-05, "loss": 0.0743, "step": 7299 }, { "epoch": 13.899191631003328, "grad_norm": 0.27814120054244995, "learning_rate": 5.366783105747857e-05, "loss": 0.0865, "step": 7300 }, { "epoch": 13.901093675701379, "grad_norm": 0.17773757874965668, "learning_rate": 5.366147983486821e-05, "loss": 0.0633, "step": 7301 }, { "epoch": 13.90299572039943, "grad_norm": 0.171430766582489, "learning_rate": 5.365512861225787e-05, "loss": 0.0538, "step": 7302 }, { "epoch": 13.90489776509748, "grad_norm": 0.16702894866466522, "learning_rate": 5.3648777389647506e-05, "loss": 0.0557, "step": 7303 }, { "epoch": 13.90679980979553, "grad_norm": 0.241920605301857, "learning_rate": 5.364242616703715e-05, "loss": 0.0567, "step": 7304 }, { "epoch": 13.90870185449358, "grad_norm": 0.1949584186077118, "learning_rate": 5.36360749444268e-05, "loss": 0.0855, "step": 7305 }, { "epoch": 13.910603899191631, "grad_norm": 0.17606988549232483, "learning_rate": 5.362972372181645e-05, "loss": 0.0715, "step": 7306 }, { "epoch": 13.91250594388968, "grad_norm": 0.2018020749092102, "learning_rate": 5.362337249920611e-05, "loss": 0.0672, "step": 7307 }, { "epoch": 13.914407988587731, "grad_norm": 0.20741713047027588, "learning_rate": 5.3617021276595745e-05, "loss": 0.067, "step": 7308 }, { "epoch": 13.916310033285782, "grad_norm": 0.22029127180576324, "learning_rate": 5.361067005398539e-05, "loss": 0.0438, "step": 7309 }, { "epoch": 13.918212077983833, "grad_norm": 0.13996699452400208, "learning_rate": 5.360431883137504e-05, "loss": 0.0601, "step": 7310 }, { "epoch": 13.920114122681882, "grad_norm": 0.23882152140140533, "learning_rate": 5.359796760876469e-05, "loss": 0.0614, "step": 7311 }, { "epoch": 13.922016167379933, "grad_norm": 0.3080902099609375, "learning_rate": 5.359161638615434e-05, "loss": 0.0838, "step": 7312 }, { "epoch": 13.923918212077984, "grad_norm": 0.24677343666553497, "learning_rate": 5.3585265163543984e-05, "loss": 0.0611, "step": 7313 }, { "epoch": 13.925820256776035, "grad_norm": 0.4021768569946289, "learning_rate": 5.357891394093363e-05, "loss": 0.0867, "step": 7314 }, { "epoch": 13.927722301474084, "grad_norm": 0.23158122599124908, "learning_rate": 5.357256271832328e-05, "loss": 0.0553, "step": 7315 }, { "epoch": 13.929624346172135, "grad_norm": 0.2736777067184448, "learning_rate": 5.3566211495712926e-05, "loss": 0.0649, "step": 7316 }, { "epoch": 13.931526390870186, "grad_norm": 0.12983861565589905, "learning_rate": 5.355986027310258e-05, "loss": 0.0553, "step": 7317 }, { "epoch": 13.933428435568235, "grad_norm": 0.20654582977294922, "learning_rate": 5.355350905049222e-05, "loss": 0.0512, "step": 7318 }, { "epoch": 13.935330480266286, "grad_norm": 0.2009374499320984, "learning_rate": 5.354715782788187e-05, "loss": 0.0513, "step": 7319 }, { "epoch": 13.937232524964337, "grad_norm": 0.2418518364429474, "learning_rate": 5.354080660527152e-05, "loss": 0.0585, "step": 7320 }, { "epoch": 13.939134569662388, "grad_norm": 0.17909492552280426, "learning_rate": 5.3534455382661165e-05, "loss": 0.0595, "step": 7321 }, { "epoch": 13.941036614360437, "grad_norm": 0.2896093726158142, "learning_rate": 5.352810416005081e-05, "loss": 0.1046, "step": 7322 }, { "epoch": 13.942938659058488, "grad_norm": 0.3157602846622467, "learning_rate": 5.352175293744046e-05, "loss": 0.0576, "step": 7323 }, { "epoch": 13.944840703756539, "grad_norm": 0.2978362441062927, "learning_rate": 5.351540171483011e-05, "loss": 0.0701, "step": 7324 }, { "epoch": 13.94674274845459, "grad_norm": 0.13338571786880493, "learning_rate": 5.350905049221976e-05, "loss": 0.0548, "step": 7325 }, { "epoch": 13.948644793152638, "grad_norm": 0.5071760416030884, "learning_rate": 5.3502699269609404e-05, "loss": 0.0801, "step": 7326 }, { "epoch": 13.95054683785069, "grad_norm": 0.20635323226451874, "learning_rate": 5.349634804699904e-05, "loss": 0.0495, "step": 7327 }, { "epoch": 13.95244888254874, "grad_norm": 0.3134024441242218, "learning_rate": 5.34899968243887e-05, "loss": 0.0615, "step": 7328 }, { "epoch": 13.95435092724679, "grad_norm": 0.16929863393306732, "learning_rate": 5.3483645601778346e-05, "loss": 0.0657, "step": 7329 }, { "epoch": 13.95625297194484, "grad_norm": 0.23720704019069672, "learning_rate": 5.3477294379168e-05, "loss": 0.0664, "step": 7330 }, { "epoch": 13.958155016642891, "grad_norm": 0.30032214522361755, "learning_rate": 5.347094315655764e-05, "loss": 0.0661, "step": 7331 }, { "epoch": 13.960057061340942, "grad_norm": 0.212426096200943, "learning_rate": 5.346459193394728e-05, "loss": 0.0686, "step": 7332 }, { "epoch": 13.961959106038991, "grad_norm": 0.3285209536552429, "learning_rate": 5.345824071133694e-05, "loss": 0.0782, "step": 7333 }, { "epoch": 13.963861150737042, "grad_norm": 0.19874542951583862, "learning_rate": 5.345188948872658e-05, "loss": 0.0994, "step": 7334 }, { "epoch": 13.965763195435093, "grad_norm": 0.1890346109867096, "learning_rate": 5.3445538266116236e-05, "loss": 0.0763, "step": 7335 }, { "epoch": 13.967665240133144, "grad_norm": 0.20329201221466064, "learning_rate": 5.343918704350588e-05, "loss": 0.0609, "step": 7336 }, { "epoch": 13.969567284831193, "grad_norm": 0.20286491513252258, "learning_rate": 5.343283582089552e-05, "loss": 0.0491, "step": 7337 }, { "epoch": 13.971469329529244, "grad_norm": 0.1681714653968811, "learning_rate": 5.342648459828518e-05, "loss": 0.0587, "step": 7338 }, { "epoch": 13.973371374227295, "grad_norm": 0.23216994106769562, "learning_rate": 5.342013337567482e-05, "loss": 0.0584, "step": 7339 }, { "epoch": 13.975273418925344, "grad_norm": 0.1894179880619049, "learning_rate": 5.341378215306446e-05, "loss": 0.0499, "step": 7340 }, { "epoch": 13.977175463623395, "grad_norm": 0.2528207302093506, "learning_rate": 5.3407430930454114e-05, "loss": 0.0671, "step": 7341 }, { "epoch": 13.979077508321446, "grad_norm": 0.21460577845573425, "learning_rate": 5.340107970784376e-05, "loss": 0.0538, "step": 7342 }, { "epoch": 13.980979553019496, "grad_norm": 0.23168377578258514, "learning_rate": 5.339472848523342e-05, "loss": 0.066, "step": 7343 }, { "epoch": 13.982881597717546, "grad_norm": 0.2557571828365326, "learning_rate": 5.3388377262623056e-05, "loss": 0.069, "step": 7344 }, { "epoch": 13.984783642415596, "grad_norm": 0.28882935643196106, "learning_rate": 5.33820260400127e-05, "loss": 0.0541, "step": 7345 }, { "epoch": 13.986685687113647, "grad_norm": 0.13144026696681976, "learning_rate": 5.337567481740235e-05, "loss": 0.0678, "step": 7346 }, { "epoch": 13.988587731811698, "grad_norm": 0.17657329142093658, "learning_rate": 5.3369323594792e-05, "loss": 0.0612, "step": 7347 }, { "epoch": 13.990489776509747, "grad_norm": 0.1429637223482132, "learning_rate": 5.336297237218165e-05, "loss": 0.0561, "step": 7348 }, { "epoch": 13.992391821207798, "grad_norm": 0.12323079258203506, "learning_rate": 5.3356621149571294e-05, "loss": 0.059, "step": 7349 }, { "epoch": 13.99429386590585, "grad_norm": 0.17069973051548004, "learning_rate": 5.335026992696094e-05, "loss": 0.0675, "step": 7350 }, { "epoch": 13.9961959106039, "grad_norm": 0.16346120834350586, "learning_rate": 5.334391870435059e-05, "loss": 0.0456, "step": 7351 }, { "epoch": 13.99809795530195, "grad_norm": 0.1862858533859253, "learning_rate": 5.3337567481740236e-05, "loss": 0.0584, "step": 7352 }, { "epoch": 14.0, "grad_norm": 0.2782989740371704, "learning_rate": 5.333121625912989e-05, "loss": 0.0682, "step": 7353 }, { "epoch": 14.00190204469805, "grad_norm": 0.2528998851776123, "learning_rate": 5.332486503651953e-05, "loss": 0.0594, "step": 7354 }, { "epoch": 14.0038040893961, "grad_norm": 0.19271168112754822, "learning_rate": 5.331851381390918e-05, "loss": 0.0616, "step": 7355 }, { "epoch": 14.00570613409415, "grad_norm": 0.14756593108177185, "learning_rate": 5.331216259129883e-05, "loss": 0.0566, "step": 7356 }, { "epoch": 14.007608178792202, "grad_norm": 0.2509566843509674, "learning_rate": 5.3305811368688475e-05, "loss": 0.0683, "step": 7357 }, { "epoch": 14.009510223490253, "grad_norm": 0.11840216815471649, "learning_rate": 5.329946014607812e-05, "loss": 0.0585, "step": 7358 }, { "epoch": 14.011412268188302, "grad_norm": 0.15667672455310822, "learning_rate": 5.329310892346777e-05, "loss": 0.0677, "step": 7359 }, { "epoch": 14.013314312886353, "grad_norm": 0.11608525365591049, "learning_rate": 5.328675770085742e-05, "loss": 0.0521, "step": 7360 }, { "epoch": 14.015216357584404, "grad_norm": 0.09074059873819351, "learning_rate": 5.328040647824707e-05, "loss": 0.0405, "step": 7361 }, { "epoch": 14.017118402282454, "grad_norm": 0.20417606830596924, "learning_rate": 5.3274055255636714e-05, "loss": 0.0486, "step": 7362 }, { "epoch": 14.019020446980504, "grad_norm": 0.15287773311138153, "learning_rate": 5.326770403302635e-05, "loss": 0.0691, "step": 7363 }, { "epoch": 14.020922491678554, "grad_norm": 0.2926754057407379, "learning_rate": 5.326135281041601e-05, "loss": 0.0764, "step": 7364 }, { "epoch": 14.022824536376605, "grad_norm": 0.16718968749046326, "learning_rate": 5.3255001587805656e-05, "loss": 0.0601, "step": 7365 }, { "epoch": 14.024726581074654, "grad_norm": 0.18837334215641022, "learning_rate": 5.324865036519531e-05, "loss": 0.054, "step": 7366 }, { "epoch": 14.026628625772705, "grad_norm": 0.1734861582517624, "learning_rate": 5.324229914258495e-05, "loss": 0.07, "step": 7367 }, { "epoch": 14.028530670470756, "grad_norm": 0.38013237714767456, "learning_rate": 5.323594791997459e-05, "loss": 0.0718, "step": 7368 }, { "epoch": 14.030432715168807, "grad_norm": 0.19275639951229095, "learning_rate": 5.322959669736425e-05, "loss": 0.071, "step": 7369 }, { "epoch": 14.032334759866856, "grad_norm": 0.09969597309827805, "learning_rate": 5.322324547475389e-05, "loss": 0.0414, "step": 7370 }, { "epoch": 14.034236804564907, "grad_norm": 0.10100686550140381, "learning_rate": 5.321689425214355e-05, "loss": 0.0508, "step": 7371 }, { "epoch": 14.036138849262958, "grad_norm": 0.11903412640094757, "learning_rate": 5.3210543029533185e-05, "loss": 0.0839, "step": 7372 }, { "epoch": 14.038040893961009, "grad_norm": 0.14304274320602417, "learning_rate": 5.320419180692283e-05, "loss": 0.0638, "step": 7373 }, { "epoch": 14.039942938659058, "grad_norm": 0.24439594149589539, "learning_rate": 5.319784058431249e-05, "loss": 0.0555, "step": 7374 }, { "epoch": 14.041844983357109, "grad_norm": 0.2599331736564636, "learning_rate": 5.319148936170213e-05, "loss": 0.0546, "step": 7375 }, { "epoch": 14.04374702805516, "grad_norm": 0.12374287098646164, "learning_rate": 5.318513813909177e-05, "loss": 0.0649, "step": 7376 }, { "epoch": 14.045649072753209, "grad_norm": 0.21068157255649567, "learning_rate": 5.3178786916481424e-05, "loss": 0.0727, "step": 7377 }, { "epoch": 14.04755111745126, "grad_norm": 0.19516895711421967, "learning_rate": 5.317243569387107e-05, "loss": 0.0606, "step": 7378 }, { "epoch": 14.04945316214931, "grad_norm": 0.27725622057914734, "learning_rate": 5.316608447126072e-05, "loss": 0.0722, "step": 7379 }, { "epoch": 14.051355206847362, "grad_norm": 0.1234855204820633, "learning_rate": 5.3159733248650366e-05, "loss": 0.0543, "step": 7380 }, { "epoch": 14.05325725154541, "grad_norm": 0.23500555753707886, "learning_rate": 5.315338202604001e-05, "loss": 0.0493, "step": 7381 }, { "epoch": 14.055159296243461, "grad_norm": 0.1624249815940857, "learning_rate": 5.314703080342966e-05, "loss": 0.0442, "step": 7382 }, { "epoch": 14.057061340941512, "grad_norm": 0.2046501189470291, "learning_rate": 5.314067958081931e-05, "loss": 0.0673, "step": 7383 }, { "epoch": 14.058963385639563, "grad_norm": 0.1192425861954689, "learning_rate": 5.313432835820896e-05, "loss": 0.0319, "step": 7384 }, { "epoch": 14.060865430337612, "grad_norm": 0.1112857535481453, "learning_rate": 5.3127977135598605e-05, "loss": 0.0332, "step": 7385 }, { "epoch": 14.062767475035663, "grad_norm": 0.26829591393470764, "learning_rate": 5.312162591298825e-05, "loss": 0.0663, "step": 7386 }, { "epoch": 14.064669519733714, "grad_norm": 0.1901654750108719, "learning_rate": 5.31152746903779e-05, "loss": 0.0653, "step": 7387 }, { "epoch": 14.066571564431765, "grad_norm": 0.14392554759979248, "learning_rate": 5.310892346776755e-05, "loss": 0.0616, "step": 7388 }, { "epoch": 14.068473609129814, "grad_norm": 0.144301638007164, "learning_rate": 5.31025722451572e-05, "loss": 0.0503, "step": 7389 }, { "epoch": 14.070375653827865, "grad_norm": 0.2335568219423294, "learning_rate": 5.3096221022546844e-05, "loss": 0.0583, "step": 7390 }, { "epoch": 14.072277698525916, "grad_norm": 0.164510577917099, "learning_rate": 5.308986979993649e-05, "loss": 0.0557, "step": 7391 }, { "epoch": 14.074179743223965, "grad_norm": 0.3761489987373352, "learning_rate": 5.308351857732614e-05, "loss": 0.0658, "step": 7392 }, { "epoch": 14.076081787922016, "grad_norm": 0.2669321596622467, "learning_rate": 5.3077167354715786e-05, "loss": 0.0635, "step": 7393 }, { "epoch": 14.077983832620067, "grad_norm": 0.21432504057884216, "learning_rate": 5.307081613210543e-05, "loss": 0.0519, "step": 7394 }, { "epoch": 14.079885877318118, "grad_norm": 0.316077321767807, "learning_rate": 5.306446490949508e-05, "loss": 0.0809, "step": 7395 }, { "epoch": 14.081787922016167, "grad_norm": 0.14039736986160278, "learning_rate": 5.305811368688473e-05, "loss": 0.047, "step": 7396 }, { "epoch": 14.083689966714218, "grad_norm": 0.1503455489873886, "learning_rate": 5.305176246427438e-05, "loss": 0.0539, "step": 7397 }, { "epoch": 14.085592011412269, "grad_norm": 0.16759897768497467, "learning_rate": 5.3045411241664025e-05, "loss": 0.0611, "step": 7398 }, { "epoch": 14.08749405611032, "grad_norm": 0.13561630249023438, "learning_rate": 5.303906001905366e-05, "loss": 0.0541, "step": 7399 }, { "epoch": 14.089396100808369, "grad_norm": 0.17430227994918823, "learning_rate": 5.303270879644332e-05, "loss": 0.054, "step": 7400 }, { "epoch": 14.09129814550642, "grad_norm": 0.16151241958141327, "learning_rate": 5.302635757383296e-05, "loss": 0.0471, "step": 7401 }, { "epoch": 14.09320019020447, "grad_norm": 0.13555926084518433, "learning_rate": 5.302000635122262e-05, "loss": 0.0454, "step": 7402 }, { "epoch": 14.09510223490252, "grad_norm": 0.19882944226264954, "learning_rate": 5.3013655128612264e-05, "loss": 0.0769, "step": 7403 }, { "epoch": 14.09700427960057, "grad_norm": 0.19104546308517456, "learning_rate": 5.30073039060019e-05, "loss": 0.0748, "step": 7404 }, { "epoch": 14.098906324298621, "grad_norm": 0.24949337542057037, "learning_rate": 5.300095268339156e-05, "loss": 0.0753, "step": 7405 }, { "epoch": 14.100808368996672, "grad_norm": 0.22298850119113922, "learning_rate": 5.29946014607812e-05, "loss": 0.0556, "step": 7406 }, { "epoch": 14.102710413694721, "grad_norm": 0.12767939269542694, "learning_rate": 5.298825023817086e-05, "loss": 0.0413, "step": 7407 }, { "epoch": 14.104612458392772, "grad_norm": 0.3507814407348633, "learning_rate": 5.2981899015560496e-05, "loss": 0.0677, "step": 7408 }, { "epoch": 14.106514503090823, "grad_norm": 0.21291838586330414, "learning_rate": 5.297554779295014e-05, "loss": 0.0709, "step": 7409 }, { "epoch": 14.108416547788874, "grad_norm": 0.20955125987529755, "learning_rate": 5.29691965703398e-05, "loss": 0.0555, "step": 7410 }, { "epoch": 14.110318592486923, "grad_norm": 0.38880980014801025, "learning_rate": 5.296284534772944e-05, "loss": 0.0605, "step": 7411 }, { "epoch": 14.112220637184974, "grad_norm": 0.16443461179733276, "learning_rate": 5.295649412511908e-05, "loss": 0.0902, "step": 7412 }, { "epoch": 14.114122681883025, "grad_norm": 0.11160407215356827, "learning_rate": 5.2950142902508735e-05, "loss": 0.0431, "step": 7413 }, { "epoch": 14.116024726581074, "grad_norm": 0.26912933588027954, "learning_rate": 5.294379167989838e-05, "loss": 0.0733, "step": 7414 }, { "epoch": 14.117926771279125, "grad_norm": 0.11232420057058334, "learning_rate": 5.293744045728803e-05, "loss": 0.0342, "step": 7415 }, { "epoch": 14.119828815977176, "grad_norm": 0.13175956904888153, "learning_rate": 5.2931089234677677e-05, "loss": 0.0538, "step": 7416 }, { "epoch": 14.121730860675227, "grad_norm": 0.1904328614473343, "learning_rate": 5.292473801206732e-05, "loss": 0.0576, "step": 7417 }, { "epoch": 14.123632905373276, "grad_norm": 0.13842704892158508, "learning_rate": 5.2918386789456973e-05, "loss": 0.036, "step": 7418 }, { "epoch": 14.125534950071327, "grad_norm": 0.1916932314634323, "learning_rate": 5.291203556684662e-05, "loss": 0.0411, "step": 7419 }, { "epoch": 14.127436994769377, "grad_norm": 0.1814490705728531, "learning_rate": 5.290568434423627e-05, "loss": 0.0365, "step": 7420 }, { "epoch": 14.129339039467428, "grad_norm": 0.24279388785362244, "learning_rate": 5.2899333121625915e-05, "loss": 0.0803, "step": 7421 }, { "epoch": 14.131241084165477, "grad_norm": 0.2318277359008789, "learning_rate": 5.289298189901556e-05, "loss": 0.1126, "step": 7422 }, { "epoch": 14.133143128863528, "grad_norm": 0.17640984058380127, "learning_rate": 5.288663067640521e-05, "loss": 0.0591, "step": 7423 }, { "epoch": 14.13504517356158, "grad_norm": 0.1813698261976242, "learning_rate": 5.288027945379486e-05, "loss": 0.0864, "step": 7424 }, { "epoch": 14.136947218259628, "grad_norm": 0.23784920573234558, "learning_rate": 5.287392823118451e-05, "loss": 0.0632, "step": 7425 }, { "epoch": 14.13884926295768, "grad_norm": 0.24212495982646942, "learning_rate": 5.2867577008574154e-05, "loss": 0.0776, "step": 7426 }, { "epoch": 14.14075130765573, "grad_norm": 0.10199500620365143, "learning_rate": 5.28612257859638e-05, "loss": 0.0574, "step": 7427 }, { "epoch": 14.142653352353781, "grad_norm": 0.07228594273328781, "learning_rate": 5.285487456335345e-05, "loss": 0.0407, "step": 7428 }, { "epoch": 14.14455539705183, "grad_norm": 0.11618991196155548, "learning_rate": 5.2848523340743096e-05, "loss": 0.0706, "step": 7429 }, { "epoch": 14.146457441749881, "grad_norm": 0.2242993712425232, "learning_rate": 5.2842172118132735e-05, "loss": 0.0634, "step": 7430 }, { "epoch": 14.148359486447932, "grad_norm": 0.19708840548992157, "learning_rate": 5.283582089552239e-05, "loss": 0.0505, "step": 7431 }, { "epoch": 14.150261531145983, "grad_norm": 0.2558425962924957, "learning_rate": 5.282946967291204e-05, "loss": 0.084, "step": 7432 }, { "epoch": 14.152163575844032, "grad_norm": 0.12093882262706757, "learning_rate": 5.282311845030169e-05, "loss": 0.0448, "step": 7433 }, { "epoch": 14.154065620542083, "grad_norm": 0.18094058334827423, "learning_rate": 5.2816767227691335e-05, "loss": 0.0795, "step": 7434 }, { "epoch": 14.155967665240134, "grad_norm": 0.20837660133838654, "learning_rate": 5.2810416005080973e-05, "loss": 0.0533, "step": 7435 }, { "epoch": 14.157869709938183, "grad_norm": 0.20709973573684692, "learning_rate": 5.280406478247063e-05, "loss": 0.0697, "step": 7436 }, { "epoch": 14.159771754636234, "grad_norm": 0.2028302401304245, "learning_rate": 5.279771355986027e-05, "loss": 0.0565, "step": 7437 }, { "epoch": 14.161673799334284, "grad_norm": 0.15181556344032288, "learning_rate": 5.279136233724993e-05, "loss": 0.0526, "step": 7438 }, { "epoch": 14.163575844032335, "grad_norm": 0.16445064544677734, "learning_rate": 5.2785011114639574e-05, "loss": 0.0502, "step": 7439 }, { "epoch": 14.165477888730384, "grad_norm": 0.26812055706977844, "learning_rate": 5.277865989202921e-05, "loss": 0.0699, "step": 7440 }, { "epoch": 14.167379933428435, "grad_norm": 0.16392607986927032, "learning_rate": 5.277230866941887e-05, "loss": 0.0524, "step": 7441 }, { "epoch": 14.169281978126486, "grad_norm": 0.1482769399881363, "learning_rate": 5.276595744680851e-05, "loss": 0.0901, "step": 7442 }, { "epoch": 14.171184022824537, "grad_norm": 0.38180655241012573, "learning_rate": 5.275960622419817e-05, "loss": 0.0734, "step": 7443 }, { "epoch": 14.173086067522586, "grad_norm": 0.13449139893054962, "learning_rate": 5.2753255001587806e-05, "loss": 0.0537, "step": 7444 }, { "epoch": 14.174988112220637, "grad_norm": 0.21906720101833344, "learning_rate": 5.274690377897745e-05, "loss": 0.0648, "step": 7445 }, { "epoch": 14.176890156918688, "grad_norm": 0.23385153710842133, "learning_rate": 5.274055255636711e-05, "loss": 0.0568, "step": 7446 }, { "epoch": 14.178792201616737, "grad_norm": 0.18561255931854248, "learning_rate": 5.273420133375675e-05, "loss": 0.0571, "step": 7447 }, { "epoch": 14.180694246314788, "grad_norm": 0.356733500957489, "learning_rate": 5.272785011114639e-05, "loss": 0.0734, "step": 7448 }, { "epoch": 14.182596291012839, "grad_norm": 0.2717702090740204, "learning_rate": 5.2721498888536045e-05, "loss": 0.0531, "step": 7449 }, { "epoch": 14.18449833571089, "grad_norm": 0.12378834187984467, "learning_rate": 5.271514766592569e-05, "loss": 0.0587, "step": 7450 }, { "epoch": 14.186400380408939, "grad_norm": 0.22417086362838745, "learning_rate": 5.270879644331534e-05, "loss": 0.0912, "step": 7451 }, { "epoch": 14.18830242510699, "grad_norm": 0.13545849919319153, "learning_rate": 5.270244522070499e-05, "loss": 0.0547, "step": 7452 }, { "epoch": 14.19020446980504, "grad_norm": 0.28296709060668945, "learning_rate": 5.269609399809463e-05, "loss": 0.0721, "step": 7453 }, { "epoch": 14.192106514503092, "grad_norm": 0.17048925161361694, "learning_rate": 5.2689742775484284e-05, "loss": 0.0699, "step": 7454 }, { "epoch": 14.19400855920114, "grad_norm": 0.16482289135456085, "learning_rate": 5.268339155287393e-05, "loss": 0.0499, "step": 7455 }, { "epoch": 14.195910603899192, "grad_norm": 0.241130992770195, "learning_rate": 5.267704033026358e-05, "loss": 0.0873, "step": 7456 }, { "epoch": 14.197812648597242, "grad_norm": 0.15253259241580963, "learning_rate": 5.2670689107653226e-05, "loss": 0.053, "step": 7457 }, { "epoch": 14.199714693295292, "grad_norm": 0.19944600760936737, "learning_rate": 5.266433788504287e-05, "loss": 0.0746, "step": 7458 }, { "epoch": 14.201616737993342, "grad_norm": 0.17144067585468292, "learning_rate": 5.265798666243252e-05, "loss": 0.0691, "step": 7459 }, { "epoch": 14.203518782691393, "grad_norm": 0.20575584471225739, "learning_rate": 5.265163543982217e-05, "loss": 0.0449, "step": 7460 }, { "epoch": 14.205420827389444, "grad_norm": 0.11989787220954895, "learning_rate": 5.264528421721182e-05, "loss": 0.0442, "step": 7461 }, { "epoch": 14.207322872087493, "grad_norm": 0.19378036260604858, "learning_rate": 5.2638932994601465e-05, "loss": 0.0527, "step": 7462 }, { "epoch": 14.209224916785544, "grad_norm": 0.1528719812631607, "learning_rate": 5.263258177199111e-05, "loss": 0.0606, "step": 7463 }, { "epoch": 14.211126961483595, "grad_norm": 0.18818438053131104, "learning_rate": 5.262623054938076e-05, "loss": 0.0509, "step": 7464 }, { "epoch": 14.213029006181646, "grad_norm": 0.14130130410194397, "learning_rate": 5.261987932677041e-05, "loss": 0.0517, "step": 7465 }, { "epoch": 14.214931050879695, "grad_norm": 0.1270495057106018, "learning_rate": 5.2613528104160045e-05, "loss": 0.0651, "step": 7466 }, { "epoch": 14.216833095577746, "grad_norm": 0.22663429379463196, "learning_rate": 5.2607176881549704e-05, "loss": 0.0492, "step": 7467 }, { "epoch": 14.218735140275797, "grad_norm": 0.18520976603031158, "learning_rate": 5.260082565893935e-05, "loss": 0.076, "step": 7468 }, { "epoch": 14.220637184973846, "grad_norm": 0.14399489760398865, "learning_rate": 5.2594474436329e-05, "loss": 0.0581, "step": 7469 }, { "epoch": 14.222539229671897, "grad_norm": 0.12707483768463135, "learning_rate": 5.2588123213718646e-05, "loss": 0.0513, "step": 7470 }, { "epoch": 14.224441274369948, "grad_norm": 0.1769390106201172, "learning_rate": 5.2581771991108284e-05, "loss": 0.0819, "step": 7471 }, { "epoch": 14.226343319067999, "grad_norm": 0.17762137949466705, "learning_rate": 5.257542076849794e-05, "loss": 0.0649, "step": 7472 }, { "epoch": 14.228245363766048, "grad_norm": 0.2458849549293518, "learning_rate": 5.256906954588758e-05, "loss": 0.0641, "step": 7473 }, { "epoch": 14.230147408464099, "grad_norm": 0.12928763031959534, "learning_rate": 5.256271832327724e-05, "loss": 0.0564, "step": 7474 }, { "epoch": 14.23204945316215, "grad_norm": 0.11910723894834518, "learning_rate": 5.2556367100666884e-05, "loss": 0.064, "step": 7475 }, { "epoch": 14.2339514978602, "grad_norm": 0.14299996197223663, "learning_rate": 5.255001587805652e-05, "loss": 0.0482, "step": 7476 }, { "epoch": 14.23585354255825, "grad_norm": 0.36283624172210693, "learning_rate": 5.254366465544618e-05, "loss": 0.0642, "step": 7477 }, { "epoch": 14.2377555872563, "grad_norm": 0.19579343497753143, "learning_rate": 5.253731343283582e-05, "loss": 0.0679, "step": 7478 }, { "epoch": 14.239657631954351, "grad_norm": 0.12536804378032684, "learning_rate": 5.253096221022548e-05, "loss": 0.0553, "step": 7479 }, { "epoch": 14.241559676652402, "grad_norm": 0.16742166876792908, "learning_rate": 5.2524610987615117e-05, "loss": 0.0534, "step": 7480 }, { "epoch": 14.243461721350451, "grad_norm": 0.32988205552101135, "learning_rate": 5.251825976500476e-05, "loss": 0.072, "step": 7481 }, { "epoch": 14.245363766048502, "grad_norm": 0.19991889595985413, "learning_rate": 5.2511908542394413e-05, "loss": 0.0632, "step": 7482 }, { "epoch": 14.247265810746553, "grad_norm": 0.23099440336227417, "learning_rate": 5.250555731978406e-05, "loss": 0.0702, "step": 7483 }, { "epoch": 14.249167855444602, "grad_norm": 0.20775353908538818, "learning_rate": 5.2499206097173704e-05, "loss": 0.0734, "step": 7484 }, { "epoch": 14.251069900142653, "grad_norm": 0.20609457790851593, "learning_rate": 5.2492854874563355e-05, "loss": 0.0596, "step": 7485 }, { "epoch": 14.252971944840704, "grad_norm": 0.22942540049552917, "learning_rate": 5.2486503651953e-05, "loss": 0.0551, "step": 7486 }, { "epoch": 14.254873989538755, "grad_norm": 0.19878560304641724, "learning_rate": 5.248015242934265e-05, "loss": 0.0576, "step": 7487 }, { "epoch": 14.256776034236804, "grad_norm": 0.19331955909729004, "learning_rate": 5.24738012067323e-05, "loss": 0.0868, "step": 7488 }, { "epoch": 14.258678078934855, "grad_norm": 0.24519440531730652, "learning_rate": 5.246744998412194e-05, "loss": 0.0642, "step": 7489 }, { "epoch": 14.260580123632906, "grad_norm": 0.23775117099285126, "learning_rate": 5.2461098761511594e-05, "loss": 0.0739, "step": 7490 }, { "epoch": 14.262482168330957, "grad_norm": 0.17239496111869812, "learning_rate": 5.245474753890124e-05, "loss": 0.0525, "step": 7491 }, { "epoch": 14.264384213029006, "grad_norm": 0.09015030413866043, "learning_rate": 5.244839631629089e-05, "loss": 0.0641, "step": 7492 }, { "epoch": 14.266286257727057, "grad_norm": 0.14684611558914185, "learning_rate": 5.2442045093680536e-05, "loss": 0.0573, "step": 7493 }, { "epoch": 14.268188302425107, "grad_norm": 0.20221078395843506, "learning_rate": 5.243569387107018e-05, "loss": 0.0613, "step": 7494 }, { "epoch": 14.270090347123157, "grad_norm": 0.38689568638801575, "learning_rate": 5.242934264845983e-05, "loss": 0.0707, "step": 7495 }, { "epoch": 14.271992391821207, "grad_norm": 0.17096443474292755, "learning_rate": 5.242299142584948e-05, "loss": 0.0508, "step": 7496 }, { "epoch": 14.273894436519258, "grad_norm": 0.14185024797916412, "learning_rate": 5.241664020323913e-05, "loss": 0.0541, "step": 7497 }, { "epoch": 14.27579648121731, "grad_norm": 0.18281078338623047, "learning_rate": 5.2410288980628775e-05, "loss": 0.0711, "step": 7498 }, { "epoch": 14.277698525915358, "grad_norm": 0.17065037786960602, "learning_rate": 5.240393775801842e-05, "loss": 0.0425, "step": 7499 }, { "epoch": 14.27960057061341, "grad_norm": 0.240870401263237, "learning_rate": 5.239758653540807e-05, "loss": 0.0556, "step": 7500 }, { "epoch": 14.28150261531146, "grad_norm": 0.14593961834907532, "learning_rate": 5.239123531279772e-05, "loss": 0.0551, "step": 7501 }, { "epoch": 14.283404660009511, "grad_norm": 0.21113112568855286, "learning_rate": 5.2384884090187355e-05, "loss": 0.0401, "step": 7502 }, { "epoch": 14.28530670470756, "grad_norm": 0.24402189254760742, "learning_rate": 5.2378532867577014e-05, "loss": 0.0692, "step": 7503 }, { "epoch": 14.287208749405611, "grad_norm": 0.290025532245636, "learning_rate": 5.237218164496666e-05, "loss": 0.0754, "step": 7504 }, { "epoch": 14.289110794103662, "grad_norm": 0.21898028254508972, "learning_rate": 5.236583042235631e-05, "loss": 0.0673, "step": 7505 }, { "epoch": 14.291012838801711, "grad_norm": 0.2041853815317154, "learning_rate": 5.2359479199745956e-05, "loss": 0.054, "step": 7506 }, { "epoch": 14.292914883499762, "grad_norm": 0.2848202586174011, "learning_rate": 5.2353127977135594e-05, "loss": 0.0649, "step": 7507 }, { "epoch": 14.294816928197813, "grad_norm": 0.13204796612262726, "learning_rate": 5.234677675452525e-05, "loss": 0.0733, "step": 7508 }, { "epoch": 14.296718972895864, "grad_norm": 0.14004924893379211, "learning_rate": 5.234042553191489e-05, "loss": 0.0585, "step": 7509 }, { "epoch": 14.298621017593913, "grad_norm": 0.1074732318520546, "learning_rate": 5.233407430930455e-05, "loss": 0.0531, "step": 7510 }, { "epoch": 14.300523062291964, "grad_norm": 0.2174833118915558, "learning_rate": 5.232772308669419e-05, "loss": 0.0768, "step": 7511 }, { "epoch": 14.302425106990015, "grad_norm": 0.2654950022697449, "learning_rate": 5.232137186408383e-05, "loss": 0.0579, "step": 7512 }, { "epoch": 14.304327151688065, "grad_norm": 0.14409427344799042, "learning_rate": 5.231502064147349e-05, "loss": 0.0472, "step": 7513 }, { "epoch": 14.306229196386115, "grad_norm": 0.12296228855848312, "learning_rate": 5.230866941886313e-05, "loss": 0.0449, "step": 7514 }, { "epoch": 14.308131241084165, "grad_norm": 0.14399056136608124, "learning_rate": 5.230231819625279e-05, "loss": 0.0451, "step": 7515 }, { "epoch": 14.310033285782216, "grad_norm": 0.17021867632865906, "learning_rate": 5.229596697364243e-05, "loss": 0.0513, "step": 7516 }, { "epoch": 14.311935330480265, "grad_norm": 0.15062189102172852, "learning_rate": 5.228961575103207e-05, "loss": 0.0484, "step": 7517 }, { "epoch": 14.313837375178316, "grad_norm": 0.18984819948673248, "learning_rate": 5.2283264528421724e-05, "loss": 0.0725, "step": 7518 }, { "epoch": 14.315739419876367, "grad_norm": 0.15147118270397186, "learning_rate": 5.227691330581137e-05, "loss": 0.0559, "step": 7519 }, { "epoch": 14.317641464574418, "grad_norm": 0.23060442507266998, "learning_rate": 5.2270562083201014e-05, "loss": 0.0501, "step": 7520 }, { "epoch": 14.319543509272467, "grad_norm": 0.14676730334758759, "learning_rate": 5.2264210860590666e-05, "loss": 0.0599, "step": 7521 }, { "epoch": 14.321445553970518, "grad_norm": 0.21464918553829193, "learning_rate": 5.225785963798031e-05, "loss": 0.0643, "step": 7522 }, { "epoch": 14.323347598668569, "grad_norm": 0.2580917775630951, "learning_rate": 5.225150841536996e-05, "loss": 0.067, "step": 7523 }, { "epoch": 14.32524964336662, "grad_norm": 0.23342084884643555, "learning_rate": 5.224515719275961e-05, "loss": 0.0746, "step": 7524 }, { "epoch": 14.327151688064669, "grad_norm": 0.1253678947687149, "learning_rate": 5.223880597014925e-05, "loss": 0.0436, "step": 7525 }, { "epoch": 14.32905373276272, "grad_norm": 0.20070791244506836, "learning_rate": 5.2232454747538905e-05, "loss": 0.0628, "step": 7526 }, { "epoch": 14.33095577746077, "grad_norm": 0.32024890184402466, "learning_rate": 5.222610352492855e-05, "loss": 0.0637, "step": 7527 }, { "epoch": 14.332857822158822, "grad_norm": 0.17475807666778564, "learning_rate": 5.22197523023182e-05, "loss": 0.0512, "step": 7528 }, { "epoch": 14.33475986685687, "grad_norm": 0.2347768247127533, "learning_rate": 5.221340107970785e-05, "loss": 0.0553, "step": 7529 }, { "epoch": 14.336661911554922, "grad_norm": 0.17313428223133087, "learning_rate": 5.220704985709749e-05, "loss": 0.0519, "step": 7530 }, { "epoch": 14.338563956252973, "grad_norm": 0.1019507348537445, "learning_rate": 5.2200698634487144e-05, "loss": 0.0738, "step": 7531 }, { "epoch": 14.340466000951022, "grad_norm": 0.18478825688362122, "learning_rate": 5.219434741187679e-05, "loss": 0.0468, "step": 7532 }, { "epoch": 14.342368045649073, "grad_norm": 0.33729398250579834, "learning_rate": 5.218799618926644e-05, "loss": 0.069, "step": 7533 }, { "epoch": 14.344270090347123, "grad_norm": 0.18752019107341766, "learning_rate": 5.2181644966656086e-05, "loss": 0.0614, "step": 7534 }, { "epoch": 14.346172135045174, "grad_norm": 0.10709106922149658, "learning_rate": 5.217529374404573e-05, "loss": 0.0418, "step": 7535 }, { "epoch": 14.348074179743223, "grad_norm": 0.2352564036846161, "learning_rate": 5.216894252143538e-05, "loss": 0.0649, "step": 7536 }, { "epoch": 14.349976224441274, "grad_norm": 0.2340080589056015, "learning_rate": 5.216259129882503e-05, "loss": 0.051, "step": 7537 }, { "epoch": 14.351878269139325, "grad_norm": 0.23616208136081696, "learning_rate": 5.2156240076214666e-05, "loss": 0.0653, "step": 7538 }, { "epoch": 14.353780313837376, "grad_norm": 0.1928998827934265, "learning_rate": 5.2149888853604324e-05, "loss": 0.0623, "step": 7539 }, { "epoch": 14.355682358535425, "grad_norm": 0.17965523898601532, "learning_rate": 5.214353763099396e-05, "loss": 0.0604, "step": 7540 }, { "epoch": 14.357584403233476, "grad_norm": 0.15935584902763367, "learning_rate": 5.213718640838362e-05, "loss": 0.0581, "step": 7541 }, { "epoch": 14.359486447931527, "grad_norm": 0.11977368593215942, "learning_rate": 5.2130835185773266e-05, "loss": 0.0569, "step": 7542 }, { "epoch": 14.361388492629576, "grad_norm": 0.11723438650369644, "learning_rate": 5.2124483963162905e-05, "loss": 0.0492, "step": 7543 }, { "epoch": 14.363290537327627, "grad_norm": 0.21849237382411957, "learning_rate": 5.211813274055256e-05, "loss": 0.0542, "step": 7544 }, { "epoch": 14.365192582025678, "grad_norm": 0.14745013415813446, "learning_rate": 5.21117815179422e-05, "loss": 0.0627, "step": 7545 }, { "epoch": 14.367094626723729, "grad_norm": 0.24485322833061218, "learning_rate": 5.210543029533186e-05, "loss": 0.0658, "step": 7546 }, { "epoch": 14.368996671421778, "grad_norm": 0.2918044626712799, "learning_rate": 5.20990790727215e-05, "loss": 0.0848, "step": 7547 }, { "epoch": 14.370898716119829, "grad_norm": 0.13040199875831604, "learning_rate": 5.2092727850111144e-05, "loss": 0.0835, "step": 7548 }, { "epoch": 14.37280076081788, "grad_norm": 0.22240321338176727, "learning_rate": 5.20863766275008e-05, "loss": 0.0753, "step": 7549 }, { "epoch": 14.37470280551593, "grad_norm": 0.1147005707025528, "learning_rate": 5.208002540489044e-05, "loss": 0.0646, "step": 7550 }, { "epoch": 14.37660485021398, "grad_norm": 0.17129144072532654, "learning_rate": 5.20736741822801e-05, "loss": 0.0577, "step": 7551 }, { "epoch": 14.37850689491203, "grad_norm": 0.2121477574110031, "learning_rate": 5.206732295966974e-05, "loss": 0.0514, "step": 7552 }, { "epoch": 14.380408939610081, "grad_norm": 0.29183366894721985, "learning_rate": 5.206097173705938e-05, "loss": 0.0778, "step": 7553 }, { "epoch": 14.38231098430813, "grad_norm": 0.2168087661266327, "learning_rate": 5.2054620514449034e-05, "loss": 0.0673, "step": 7554 }, { "epoch": 14.384213029006181, "grad_norm": 0.29269078373908997, "learning_rate": 5.204826929183868e-05, "loss": 0.0895, "step": 7555 }, { "epoch": 14.386115073704232, "grad_norm": 0.08655479550361633, "learning_rate": 5.2041918069228324e-05, "loss": 0.0473, "step": 7556 }, { "epoch": 14.388017118402283, "grad_norm": 0.2127612829208374, "learning_rate": 5.2035566846617976e-05, "loss": 0.0725, "step": 7557 }, { "epoch": 14.389919163100332, "grad_norm": 0.16400641202926636, "learning_rate": 5.202921562400762e-05, "loss": 0.0633, "step": 7558 }, { "epoch": 14.391821207798383, "grad_norm": 0.30889225006103516, "learning_rate": 5.202286440139727e-05, "loss": 0.0838, "step": 7559 }, { "epoch": 14.393723252496434, "grad_norm": 0.2411080151796341, "learning_rate": 5.201651317878692e-05, "loss": 0.062, "step": 7560 }, { "epoch": 14.395625297194485, "grad_norm": 0.14351652562618256, "learning_rate": 5.201016195617656e-05, "loss": 0.038, "step": 7561 }, { "epoch": 14.397527341892534, "grad_norm": 0.27990370988845825, "learning_rate": 5.2003810733566215e-05, "loss": 0.0642, "step": 7562 }, { "epoch": 14.399429386590585, "grad_norm": 0.22874848544597626, "learning_rate": 5.199745951095586e-05, "loss": 0.0659, "step": 7563 }, { "epoch": 14.401331431288636, "grad_norm": 0.1129019483923912, "learning_rate": 5.199110828834551e-05, "loss": 0.0415, "step": 7564 }, { "epoch": 14.403233475986685, "grad_norm": 0.25754669308662415, "learning_rate": 5.198475706573516e-05, "loss": 0.0684, "step": 7565 }, { "epoch": 14.405135520684736, "grad_norm": 0.19301319122314453, "learning_rate": 5.19784058431248e-05, "loss": 0.0739, "step": 7566 }, { "epoch": 14.407037565382787, "grad_norm": 0.27009403705596924, "learning_rate": 5.1972054620514454e-05, "loss": 0.0858, "step": 7567 }, { "epoch": 14.408939610080838, "grad_norm": 0.17593978345394135, "learning_rate": 5.19657033979041e-05, "loss": 0.053, "step": 7568 }, { "epoch": 14.410841654778887, "grad_norm": 0.14010390639305115, "learning_rate": 5.195935217529375e-05, "loss": 0.0478, "step": 7569 }, { "epoch": 14.412743699476938, "grad_norm": 0.2071506828069687, "learning_rate": 5.1953000952683396e-05, "loss": 0.0533, "step": 7570 }, { "epoch": 14.414645744174988, "grad_norm": 0.13032926619052887, "learning_rate": 5.194664973007304e-05, "loss": 0.0418, "step": 7571 }, { "epoch": 14.41654778887304, "grad_norm": 0.10803449153900146, "learning_rate": 5.194029850746269e-05, "loss": 0.0495, "step": 7572 }, { "epoch": 14.418449833571088, "grad_norm": 0.30941590666770935, "learning_rate": 5.193394728485234e-05, "loss": 0.0693, "step": 7573 }, { "epoch": 14.42035187826914, "grad_norm": 0.13794384896755219, "learning_rate": 5.1927596062241976e-05, "loss": 0.066, "step": 7574 }, { "epoch": 14.42225392296719, "grad_norm": 0.10141877830028534, "learning_rate": 5.1921244839631635e-05, "loss": 0.0432, "step": 7575 }, { "epoch": 14.42415596766524, "grad_norm": 0.2614104747772217, "learning_rate": 5.191489361702127e-05, "loss": 0.0686, "step": 7576 }, { "epoch": 14.42605801236329, "grad_norm": 0.2342391312122345, "learning_rate": 5.190854239441093e-05, "loss": 0.0546, "step": 7577 }, { "epoch": 14.427960057061341, "grad_norm": 0.2741243839263916, "learning_rate": 5.190219117180058e-05, "loss": 0.0693, "step": 7578 }, { "epoch": 14.429862101759392, "grad_norm": 0.14258788526058197, "learning_rate": 5.1895839949190215e-05, "loss": 0.0844, "step": 7579 }, { "epoch": 14.431764146457441, "grad_norm": 0.19403590261936188, "learning_rate": 5.1889488726579874e-05, "loss": 0.049, "step": 7580 }, { "epoch": 14.433666191155492, "grad_norm": 0.10535039007663727, "learning_rate": 5.188313750396951e-05, "loss": 0.0468, "step": 7581 }, { "epoch": 14.435568235853543, "grad_norm": 0.27161017060279846, "learning_rate": 5.187678628135917e-05, "loss": 0.0733, "step": 7582 }, { "epoch": 14.437470280551594, "grad_norm": 0.1395357847213745, "learning_rate": 5.187043505874881e-05, "loss": 0.0582, "step": 7583 }, { "epoch": 14.439372325249643, "grad_norm": 0.12294315546751022, "learning_rate": 5.1864083836138454e-05, "loss": 0.0635, "step": 7584 }, { "epoch": 14.441274369947694, "grad_norm": 0.24253630638122559, "learning_rate": 5.185773261352811e-05, "loss": 0.0739, "step": 7585 }, { "epoch": 14.443176414645745, "grad_norm": 0.1546248495578766, "learning_rate": 5.185138139091775e-05, "loss": 0.065, "step": 7586 }, { "epoch": 14.445078459343794, "grad_norm": 0.15066717565059662, "learning_rate": 5.184503016830741e-05, "loss": 0.0621, "step": 7587 }, { "epoch": 14.446980504041845, "grad_norm": 0.1923573911190033, "learning_rate": 5.183867894569705e-05, "loss": 0.049, "step": 7588 }, { "epoch": 14.448882548739896, "grad_norm": 0.17305736243724823, "learning_rate": 5.183232772308669e-05, "loss": 0.0524, "step": 7589 }, { "epoch": 14.450784593437946, "grad_norm": 0.17465534806251526, "learning_rate": 5.1825976500476345e-05, "loss": 0.0536, "step": 7590 }, { "epoch": 14.452686638135996, "grad_norm": 0.15740615129470825, "learning_rate": 5.181962527786599e-05, "loss": 0.0552, "step": 7591 }, { "epoch": 14.454588682834046, "grad_norm": 0.16666673123836517, "learning_rate": 5.1813274055255635e-05, "loss": 0.0615, "step": 7592 }, { "epoch": 14.456490727532097, "grad_norm": 0.18835856020450592, "learning_rate": 5.180692283264529e-05, "loss": 0.0486, "step": 7593 }, { "epoch": 14.458392772230148, "grad_norm": 0.18493612110614777, "learning_rate": 5.180057161003493e-05, "loss": 0.0487, "step": 7594 }, { "epoch": 14.460294816928197, "grad_norm": 0.15935729444026947, "learning_rate": 5.1794220387424584e-05, "loss": 0.0569, "step": 7595 }, { "epoch": 14.462196861626248, "grad_norm": 0.1528821438550949, "learning_rate": 5.178786916481423e-05, "loss": 0.0562, "step": 7596 }, { "epoch": 14.464098906324299, "grad_norm": 0.11653603613376617, "learning_rate": 5.1781517942203874e-05, "loss": 0.0651, "step": 7597 }, { "epoch": 14.466000951022348, "grad_norm": 0.16568215191364288, "learning_rate": 5.1775166719593526e-05, "loss": 0.0581, "step": 7598 }, { "epoch": 14.467902995720399, "grad_norm": 0.27015024423599243, "learning_rate": 5.176881549698317e-05, "loss": 0.0625, "step": 7599 }, { "epoch": 14.46980504041845, "grad_norm": 0.16504183411598206, "learning_rate": 5.176246427437282e-05, "loss": 0.0597, "step": 7600 }, { "epoch": 14.4717070851165, "grad_norm": 0.1252370923757553, "learning_rate": 5.175611305176247e-05, "loss": 0.0489, "step": 7601 }, { "epoch": 14.47360912981455, "grad_norm": 0.22102156281471252, "learning_rate": 5.174976182915211e-05, "loss": 0.071, "step": 7602 }, { "epoch": 14.4755111745126, "grad_norm": 0.13126230239868164, "learning_rate": 5.1743410606541765e-05, "loss": 0.0585, "step": 7603 }, { "epoch": 14.477413219210652, "grad_norm": 0.17570582032203674, "learning_rate": 5.173705938393141e-05, "loss": 0.0693, "step": 7604 }, { "epoch": 14.479315263908703, "grad_norm": 0.20770826935768127, "learning_rate": 5.173070816132106e-05, "loss": 0.0689, "step": 7605 }, { "epoch": 14.481217308606752, "grad_norm": 0.36989128589630127, "learning_rate": 5.1724356938710706e-05, "loss": 0.0721, "step": 7606 }, { "epoch": 14.483119353304803, "grad_norm": 0.1427404284477234, "learning_rate": 5.171800571610035e-05, "loss": 0.0496, "step": 7607 }, { "epoch": 14.485021398002853, "grad_norm": 0.2404860109090805, "learning_rate": 5.171165449349e-05, "loss": 0.0785, "step": 7608 }, { "epoch": 14.486923442700903, "grad_norm": 0.18874046206474304, "learning_rate": 5.170530327087965e-05, "loss": 0.0505, "step": 7609 }, { "epoch": 14.488825487398953, "grad_norm": 0.17922984063625336, "learning_rate": 5.169895204826929e-05, "loss": 0.052, "step": 7610 }, { "epoch": 14.490727532097004, "grad_norm": 0.1344066709280014, "learning_rate": 5.1692600825658945e-05, "loss": 0.0449, "step": 7611 }, { "epoch": 14.492629576795055, "grad_norm": 0.271064817905426, "learning_rate": 5.1686249603048584e-05, "loss": 0.0413, "step": 7612 }, { "epoch": 14.494531621493104, "grad_norm": 0.29216963052749634, "learning_rate": 5.167989838043824e-05, "loss": 0.0618, "step": 7613 }, { "epoch": 14.496433666191155, "grad_norm": 0.297928124666214, "learning_rate": 5.167354715782789e-05, "loss": 0.0727, "step": 7614 }, { "epoch": 14.498335710889206, "grad_norm": 0.1421874612569809, "learning_rate": 5.1667195935217526e-05, "loss": 0.0647, "step": 7615 }, { "epoch": 14.500237755587257, "grad_norm": 0.17354488372802734, "learning_rate": 5.1660844712607184e-05, "loss": 0.0541, "step": 7616 }, { "epoch": 14.502139800285306, "grad_norm": 0.22090236842632294, "learning_rate": 5.165449348999682e-05, "loss": 0.0583, "step": 7617 }, { "epoch": 14.504041844983357, "grad_norm": 0.2978653907775879, "learning_rate": 5.164814226738648e-05, "loss": 0.0683, "step": 7618 }, { "epoch": 14.505943889681408, "grad_norm": 0.11627073585987091, "learning_rate": 5.164179104477612e-05, "loss": 0.0542, "step": 7619 }, { "epoch": 14.507845934379457, "grad_norm": 0.24588021636009216, "learning_rate": 5.1635439822165764e-05, "loss": 0.0701, "step": 7620 }, { "epoch": 14.509747979077508, "grad_norm": 0.16642743349075317, "learning_rate": 5.1629088599555416e-05, "loss": 0.0641, "step": 7621 }, { "epoch": 14.511650023775559, "grad_norm": 0.16874666512012482, "learning_rate": 5.162273737694506e-05, "loss": 0.0705, "step": 7622 }, { "epoch": 14.51355206847361, "grad_norm": 0.09476254135370255, "learning_rate": 5.161638615433472e-05, "loss": 0.0532, "step": 7623 }, { "epoch": 14.515454113171659, "grad_norm": 0.39242368936538696, "learning_rate": 5.161003493172436e-05, "loss": 0.0843, "step": 7624 }, { "epoch": 14.51735615786971, "grad_norm": 0.22530074417591095, "learning_rate": 5.1603683709114e-05, "loss": 0.0662, "step": 7625 }, { "epoch": 14.51925820256776, "grad_norm": 0.19328154623508453, "learning_rate": 5.1597332486503655e-05, "loss": 0.0612, "step": 7626 }, { "epoch": 14.521160247265811, "grad_norm": 0.2952597737312317, "learning_rate": 5.15909812638933e-05, "loss": 0.0762, "step": 7627 }, { "epoch": 14.52306229196386, "grad_norm": 0.11258865147829056, "learning_rate": 5.1584630041282945e-05, "loss": 0.0689, "step": 7628 }, { "epoch": 14.524964336661911, "grad_norm": 0.12267016619443893, "learning_rate": 5.15782788186726e-05, "loss": 0.0461, "step": 7629 }, { "epoch": 14.526866381359962, "grad_norm": 0.13929691910743713, "learning_rate": 5.157192759606224e-05, "loss": 0.0439, "step": 7630 }, { "epoch": 14.528768426058011, "grad_norm": 0.17429441213607788, "learning_rate": 5.1565576373451894e-05, "loss": 0.073, "step": 7631 }, { "epoch": 14.530670470756062, "grad_norm": 0.15891039371490479, "learning_rate": 5.155922515084154e-05, "loss": 0.0536, "step": 7632 }, { "epoch": 14.532572515454113, "grad_norm": 0.155000239610672, "learning_rate": 5.1552873928231184e-05, "loss": 0.0483, "step": 7633 }, { "epoch": 14.534474560152164, "grad_norm": 0.16273196041584015, "learning_rate": 5.1546522705620836e-05, "loss": 0.0548, "step": 7634 }, { "epoch": 14.536376604850213, "grad_norm": 0.19954794645309448, "learning_rate": 5.154017148301048e-05, "loss": 0.0578, "step": 7635 }, { "epoch": 14.538278649548264, "grad_norm": 0.2829608917236328, "learning_rate": 5.153382026040013e-05, "loss": 0.071, "step": 7636 }, { "epoch": 14.540180694246315, "grad_norm": 0.3026480972766876, "learning_rate": 5.152746903778978e-05, "loss": 0.0726, "step": 7637 }, { "epoch": 14.542082738944366, "grad_norm": 0.22207795083522797, "learning_rate": 5.152111781517942e-05, "loss": 0.0604, "step": 7638 }, { "epoch": 14.543984783642415, "grad_norm": 0.19725818932056427, "learning_rate": 5.1514766592569075e-05, "loss": 0.0538, "step": 7639 }, { "epoch": 14.545886828340466, "grad_norm": 0.31464138627052307, "learning_rate": 5.150841536995872e-05, "loss": 0.0738, "step": 7640 }, { "epoch": 14.547788873038517, "grad_norm": 0.11473199725151062, "learning_rate": 5.150206414734837e-05, "loss": 0.0624, "step": 7641 }, { "epoch": 14.549690917736568, "grad_norm": 0.23509199917316437, "learning_rate": 5.149571292473802e-05, "loss": 0.0675, "step": 7642 }, { "epoch": 14.551592962434617, "grad_norm": 0.0961693674325943, "learning_rate": 5.148936170212766e-05, "loss": 0.0411, "step": 7643 }, { "epoch": 14.553495007132668, "grad_norm": 0.14130799472332, "learning_rate": 5.1483010479517314e-05, "loss": 0.0665, "step": 7644 }, { "epoch": 14.555397051830719, "grad_norm": 0.16295112669467926, "learning_rate": 5.147665925690696e-05, "loss": 0.0545, "step": 7645 }, { "epoch": 14.557299096528768, "grad_norm": 0.19805748760700226, "learning_rate": 5.14703080342966e-05, "loss": 0.037, "step": 7646 }, { "epoch": 14.559201141226819, "grad_norm": 0.24183066189289093, "learning_rate": 5.1463956811686256e-05, "loss": 0.0659, "step": 7647 }, { "epoch": 14.56110318592487, "grad_norm": 0.14304210245609283, "learning_rate": 5.1457605589075894e-05, "loss": 0.0733, "step": 7648 }, { "epoch": 14.56300523062292, "grad_norm": 0.08641630411148071, "learning_rate": 5.145125436646555e-05, "loss": 0.0552, "step": 7649 }, { "epoch": 14.56490727532097, "grad_norm": 0.18774019181728363, "learning_rate": 5.144490314385519e-05, "loss": 0.0556, "step": 7650 }, { "epoch": 14.56680932001902, "grad_norm": 0.1754588782787323, "learning_rate": 5.1438551921244836e-05, "loss": 0.069, "step": 7651 }, { "epoch": 14.568711364717071, "grad_norm": 0.20813584327697754, "learning_rate": 5.1432200698634495e-05, "loss": 0.0744, "step": 7652 }, { "epoch": 14.570613409415122, "grad_norm": 0.22223742306232452, "learning_rate": 5.142584947602413e-05, "loss": 0.0642, "step": 7653 }, { "epoch": 14.572515454113171, "grad_norm": 0.2160334438085556, "learning_rate": 5.141949825341379e-05, "loss": 0.0521, "step": 7654 }, { "epoch": 14.574417498811222, "grad_norm": 0.37020984292030334, "learning_rate": 5.141314703080343e-05, "loss": 0.0743, "step": 7655 }, { "epoch": 14.576319543509273, "grad_norm": 0.18275423347949982, "learning_rate": 5.1406795808193075e-05, "loss": 0.0774, "step": 7656 }, { "epoch": 14.578221588207322, "grad_norm": 0.13438047468662262, "learning_rate": 5.140044458558273e-05, "loss": 0.0702, "step": 7657 }, { "epoch": 14.580123632905373, "grad_norm": 0.1055048257112503, "learning_rate": 5.139409336297237e-05, "loss": 0.0482, "step": 7658 }, { "epoch": 14.582025677603424, "grad_norm": 0.1880851536989212, "learning_rate": 5.138774214036203e-05, "loss": 0.0708, "step": 7659 }, { "epoch": 14.583927722301475, "grad_norm": 0.26509201526641846, "learning_rate": 5.138139091775167e-05, "loss": 0.0708, "step": 7660 }, { "epoch": 14.585829766999524, "grad_norm": 0.26800549030303955, "learning_rate": 5.1375039695141314e-05, "loss": 0.0592, "step": 7661 }, { "epoch": 14.587731811697575, "grad_norm": 0.16615542769432068, "learning_rate": 5.1368688472530966e-05, "loss": 0.0532, "step": 7662 }, { "epoch": 14.589633856395626, "grad_norm": 0.29129597544670105, "learning_rate": 5.136233724992061e-05, "loss": 0.0684, "step": 7663 }, { "epoch": 14.591535901093676, "grad_norm": 0.274124413728714, "learning_rate": 5.1355986027310256e-05, "loss": 0.0641, "step": 7664 }, { "epoch": 14.593437945791726, "grad_norm": 0.20047277212142944, "learning_rate": 5.134963480469991e-05, "loss": 0.0563, "step": 7665 }, { "epoch": 14.595339990489776, "grad_norm": 0.2891108989715576, "learning_rate": 5.134328358208955e-05, "loss": 0.0593, "step": 7666 }, { "epoch": 14.597242035187827, "grad_norm": 0.14130154252052307, "learning_rate": 5.1336932359479205e-05, "loss": 0.0878, "step": 7667 }, { "epoch": 14.599144079885878, "grad_norm": 0.13794413208961487, "learning_rate": 5.133058113686885e-05, "loss": 0.0724, "step": 7668 }, { "epoch": 14.601046124583927, "grad_norm": 0.08242025971412659, "learning_rate": 5.1324229914258495e-05, "loss": 0.0497, "step": 7669 }, { "epoch": 14.602948169281978, "grad_norm": 0.15497428178787231, "learning_rate": 5.1317878691648147e-05, "loss": 0.0648, "step": 7670 }, { "epoch": 14.60485021398003, "grad_norm": 0.1163494810461998, "learning_rate": 5.131152746903779e-05, "loss": 0.0691, "step": 7671 }, { "epoch": 14.606752258678078, "grad_norm": 0.1807105541229248, "learning_rate": 5.1305176246427443e-05, "loss": 0.0554, "step": 7672 }, { "epoch": 14.60865430337613, "grad_norm": 0.19974341988563538, "learning_rate": 5.129882502381709e-05, "loss": 0.0519, "step": 7673 }, { "epoch": 14.61055634807418, "grad_norm": 0.30416256189346313, "learning_rate": 5.1292473801206734e-05, "loss": 0.0661, "step": 7674 }, { "epoch": 14.612458392772231, "grad_norm": 0.1608065962791443, "learning_rate": 5.1286122578596385e-05, "loss": 0.0703, "step": 7675 }, { "epoch": 14.61436043747028, "grad_norm": 0.2223321944475174, "learning_rate": 5.127977135598603e-05, "loss": 0.0649, "step": 7676 }, { "epoch": 14.616262482168331, "grad_norm": 0.19103401899337769, "learning_rate": 5.127342013337568e-05, "loss": 0.0378, "step": 7677 }, { "epoch": 14.618164526866382, "grad_norm": 0.20109812915325165, "learning_rate": 5.126706891076533e-05, "loss": 0.0795, "step": 7678 }, { "epoch": 14.620066571564433, "grad_norm": 0.09337878972291946, "learning_rate": 5.1260717688154966e-05, "loss": 0.0605, "step": 7679 }, { "epoch": 14.621968616262482, "grad_norm": 0.20119719207286835, "learning_rate": 5.1254366465544624e-05, "loss": 0.0554, "step": 7680 }, { "epoch": 14.623870660960533, "grad_norm": 0.1166246086359024, "learning_rate": 5.124801524293427e-05, "loss": 0.0616, "step": 7681 }, { "epoch": 14.625772705658584, "grad_norm": 0.1417192667722702, "learning_rate": 5.124166402032391e-05, "loss": 0.0679, "step": 7682 }, { "epoch": 14.627674750356633, "grad_norm": 0.2099301666021347, "learning_rate": 5.1235312797713566e-05, "loss": 0.0563, "step": 7683 }, { "epoch": 14.629576795054684, "grad_norm": 0.1745438575744629, "learning_rate": 5.1228961575103205e-05, "loss": 0.0482, "step": 7684 }, { "epoch": 14.631478839752734, "grad_norm": 0.1499902904033661, "learning_rate": 5.122261035249286e-05, "loss": 0.0668, "step": 7685 }, { "epoch": 14.633380884450785, "grad_norm": 0.15425054728984833, "learning_rate": 5.12162591298825e-05, "loss": 0.0446, "step": 7686 }, { "epoch": 14.635282929148834, "grad_norm": 0.2625950276851654, "learning_rate": 5.1209907907272147e-05, "loss": 0.0621, "step": 7687 }, { "epoch": 14.637184973846885, "grad_norm": 0.1606355756521225, "learning_rate": 5.1203556684661805e-05, "loss": 0.0551, "step": 7688 }, { "epoch": 14.639087018544936, "grad_norm": 0.16646242141723633, "learning_rate": 5.1197205462051443e-05, "loss": 0.0491, "step": 7689 }, { "epoch": 14.640989063242987, "grad_norm": 0.26278549432754517, "learning_rate": 5.11908542394411e-05, "loss": 0.0761, "step": 7690 }, { "epoch": 14.642891107941036, "grad_norm": 0.11060616374015808, "learning_rate": 5.118450301683074e-05, "loss": 0.0639, "step": 7691 }, { "epoch": 14.644793152639087, "grad_norm": 0.2710588276386261, "learning_rate": 5.1178151794220385e-05, "loss": 0.0704, "step": 7692 }, { "epoch": 14.646695197337138, "grad_norm": 0.2786422371864319, "learning_rate": 5.117180057161004e-05, "loss": 0.0718, "step": 7693 }, { "epoch": 14.648597242035187, "grad_norm": 0.16812217235565186, "learning_rate": 5.116544934899968e-05, "loss": 0.0705, "step": 7694 }, { "epoch": 14.650499286733238, "grad_norm": 0.14156568050384521, "learning_rate": 5.115909812638934e-05, "loss": 0.0712, "step": 7695 }, { "epoch": 14.652401331431289, "grad_norm": 0.1136791855096817, "learning_rate": 5.115274690377898e-05, "loss": 0.0477, "step": 7696 }, { "epoch": 14.65430337612934, "grad_norm": 0.3072340488433838, "learning_rate": 5.1146395681168624e-05, "loss": 0.0911, "step": 7697 }, { "epoch": 14.656205420827389, "grad_norm": 0.2108566164970398, "learning_rate": 5.1140044458558276e-05, "loss": 0.0518, "step": 7698 }, { "epoch": 14.65810746552544, "grad_norm": 0.20374548435211182, "learning_rate": 5.113369323594792e-05, "loss": 0.0459, "step": 7699 }, { "epoch": 14.66000951022349, "grad_norm": 0.20516937971115112, "learning_rate": 5.1127342013337566e-05, "loss": 0.0601, "step": 7700 }, { "epoch": 14.661911554921542, "grad_norm": 0.19593757390975952, "learning_rate": 5.112099079072722e-05, "loss": 0.0688, "step": 7701 }, { "epoch": 14.66381359961959, "grad_norm": 0.13337576389312744, "learning_rate": 5.111463956811686e-05, "loss": 0.0534, "step": 7702 }, { "epoch": 14.665715644317642, "grad_norm": 0.1885136514902115, "learning_rate": 5.1108288345506515e-05, "loss": 0.0588, "step": 7703 }, { "epoch": 14.667617689015692, "grad_norm": 0.2416466921567917, "learning_rate": 5.110193712289616e-05, "loss": 0.0813, "step": 7704 }, { "epoch": 14.669519733713742, "grad_norm": 0.13200879096984863, "learning_rate": 5.1095585900285805e-05, "loss": 0.063, "step": 7705 }, { "epoch": 14.671421778411792, "grad_norm": 0.2144845873117447, "learning_rate": 5.108923467767546e-05, "loss": 0.0629, "step": 7706 }, { "epoch": 14.673323823109843, "grad_norm": 0.12554292380809784, "learning_rate": 5.10828834550651e-05, "loss": 0.0569, "step": 7707 }, { "epoch": 14.675225867807894, "grad_norm": 0.18374310433864594, "learning_rate": 5.1076532232454754e-05, "loss": 0.0688, "step": 7708 }, { "epoch": 14.677127912505943, "grad_norm": 0.17229317128658295, "learning_rate": 5.10701810098444e-05, "loss": 0.0593, "step": 7709 }, { "epoch": 14.679029957203994, "grad_norm": 0.18080811202526093, "learning_rate": 5.1063829787234044e-05, "loss": 0.0567, "step": 7710 }, { "epoch": 14.680932001902045, "grad_norm": 0.19398432970046997, "learning_rate": 5.1057478564623696e-05, "loss": 0.0641, "step": 7711 }, { "epoch": 14.682834046600096, "grad_norm": 0.37756291031837463, "learning_rate": 5.105112734201334e-05, "loss": 0.0759, "step": 7712 }, { "epoch": 14.684736091298145, "grad_norm": 0.1937006264925003, "learning_rate": 5.104477611940299e-05, "loss": 0.0867, "step": 7713 }, { "epoch": 14.686638135996196, "grad_norm": 0.20375970005989075, "learning_rate": 5.103842489679264e-05, "loss": 0.0539, "step": 7714 }, { "epoch": 14.688540180694247, "grad_norm": 0.19085298478603363, "learning_rate": 5.1032073674182276e-05, "loss": 0.0603, "step": 7715 }, { "epoch": 14.690442225392296, "grad_norm": 0.24104918539524078, "learning_rate": 5.1025722451571935e-05, "loss": 0.0486, "step": 7716 }, { "epoch": 14.692344270090347, "grad_norm": 0.279154509305954, "learning_rate": 5.101937122896158e-05, "loss": 0.0793, "step": 7717 }, { "epoch": 14.694246314788398, "grad_norm": 0.1371929794549942, "learning_rate": 5.101302000635122e-05, "loss": 0.0387, "step": 7718 }, { "epoch": 14.696148359486449, "grad_norm": 0.22420379519462585, "learning_rate": 5.100666878374088e-05, "loss": 0.0554, "step": 7719 }, { "epoch": 14.698050404184498, "grad_norm": 0.3003183901309967, "learning_rate": 5.1000317561130515e-05, "loss": 0.0717, "step": 7720 }, { "epoch": 14.699952448882549, "grad_norm": 0.18145035207271576, "learning_rate": 5.0993966338520174e-05, "loss": 0.0686, "step": 7721 }, { "epoch": 14.7018544935806, "grad_norm": 0.23177501559257507, "learning_rate": 5.098761511590981e-05, "loss": 0.0844, "step": 7722 }, { "epoch": 14.70375653827865, "grad_norm": 0.213920459151268, "learning_rate": 5.098126389329946e-05, "loss": 0.0715, "step": 7723 }, { "epoch": 14.7056585829767, "grad_norm": 0.2284982055425644, "learning_rate": 5.0974912670689116e-05, "loss": 0.0515, "step": 7724 }, { "epoch": 14.70756062767475, "grad_norm": 0.2825984060764313, "learning_rate": 5.0968561448078754e-05, "loss": 0.0701, "step": 7725 }, { "epoch": 14.709462672372801, "grad_norm": 0.19270162284374237, "learning_rate": 5.096221022546841e-05, "loss": 0.0891, "step": 7726 }, { "epoch": 14.71136471707085, "grad_norm": 0.12928995490074158, "learning_rate": 5.095585900285805e-05, "loss": 0.054, "step": 7727 }, { "epoch": 14.713266761768901, "grad_norm": 0.1467064470052719, "learning_rate": 5.0949507780247696e-05, "loss": 0.0634, "step": 7728 }, { "epoch": 14.715168806466952, "grad_norm": 0.28742527961730957, "learning_rate": 5.094315655763735e-05, "loss": 0.0631, "step": 7729 }, { "epoch": 14.717070851165003, "grad_norm": 0.22021564841270447, "learning_rate": 5.093680533502699e-05, "loss": 0.0653, "step": 7730 }, { "epoch": 14.718972895863052, "grad_norm": 0.21744635701179504, "learning_rate": 5.0930454112416645e-05, "loss": 0.0552, "step": 7731 }, { "epoch": 14.720874940561103, "grad_norm": 0.10791151225566864, "learning_rate": 5.092410288980629e-05, "loss": 0.0515, "step": 7732 }, { "epoch": 14.722776985259154, "grad_norm": 0.16238145530223846, "learning_rate": 5.0917751667195935e-05, "loss": 0.0537, "step": 7733 }, { "epoch": 14.724679029957205, "grad_norm": 0.22757276892662048, "learning_rate": 5.0911400444585587e-05, "loss": 0.0653, "step": 7734 }, { "epoch": 14.726581074655254, "grad_norm": 0.17570172250270844, "learning_rate": 5.090504922197523e-05, "loss": 0.0597, "step": 7735 }, { "epoch": 14.728483119353305, "grad_norm": 0.174399271607399, "learning_rate": 5.089869799936488e-05, "loss": 0.0593, "step": 7736 }, { "epoch": 14.730385164051356, "grad_norm": 0.14591853320598602, "learning_rate": 5.089234677675453e-05, "loss": 0.0717, "step": 7737 }, { "epoch": 14.732287208749405, "grad_norm": 0.17062704265117645, "learning_rate": 5.0885995554144174e-05, "loss": 0.0635, "step": 7738 }, { "epoch": 14.734189253447456, "grad_norm": 0.24444295465946198, "learning_rate": 5.0879644331533825e-05, "loss": 0.0645, "step": 7739 }, { "epoch": 14.736091298145507, "grad_norm": 0.1436753273010254, "learning_rate": 5.087329310892347e-05, "loss": 0.0571, "step": 7740 }, { "epoch": 14.737993342843557, "grad_norm": 0.18597128987312317, "learning_rate": 5.0866941886313116e-05, "loss": 0.0626, "step": 7741 }, { "epoch": 14.739895387541607, "grad_norm": 0.1565275639295578, "learning_rate": 5.086059066370277e-05, "loss": 0.0503, "step": 7742 }, { "epoch": 14.741797432239657, "grad_norm": 0.20083200931549072, "learning_rate": 5.085423944109241e-05, "loss": 0.0663, "step": 7743 }, { "epoch": 14.743699476937708, "grad_norm": 0.17310886085033417, "learning_rate": 5.0847888218482064e-05, "loss": 0.0729, "step": 7744 }, { "epoch": 14.74560152163576, "grad_norm": 0.1712331473827362, "learning_rate": 5.084153699587171e-05, "loss": 0.0628, "step": 7745 }, { "epoch": 14.747503566333808, "grad_norm": 0.29841336607933044, "learning_rate": 5.0835185773261354e-05, "loss": 0.0576, "step": 7746 }, { "epoch": 14.74940561103186, "grad_norm": 0.27211645245552063, "learning_rate": 5.0828834550651006e-05, "loss": 0.05, "step": 7747 }, { "epoch": 14.75130765572991, "grad_norm": 0.1731298565864563, "learning_rate": 5.082248332804065e-05, "loss": 0.0679, "step": 7748 }, { "epoch": 14.75320970042796, "grad_norm": 0.1546994149684906, "learning_rate": 5.08161321054303e-05, "loss": 0.0518, "step": 7749 }, { "epoch": 14.75511174512601, "grad_norm": 0.20409636199474335, "learning_rate": 5.080978088281995e-05, "loss": 0.0883, "step": 7750 }, { "epoch": 14.757013789824061, "grad_norm": 0.194264218211174, "learning_rate": 5.0803429660209587e-05, "loss": 0.0677, "step": 7751 }, { "epoch": 14.758915834522112, "grad_norm": 0.209025040268898, "learning_rate": 5.0797078437599245e-05, "loss": 0.0578, "step": 7752 }, { "epoch": 14.760817879220161, "grad_norm": 0.24536709487438202, "learning_rate": 5.079072721498889e-05, "loss": 0.051, "step": 7753 }, { "epoch": 14.762719923918212, "grad_norm": 0.2700499892234802, "learning_rate": 5.078437599237853e-05, "loss": 0.0642, "step": 7754 }, { "epoch": 14.764621968616263, "grad_norm": 0.16249141097068787, "learning_rate": 5.077802476976819e-05, "loss": 0.061, "step": 7755 }, { "epoch": 14.766524013314314, "grad_norm": 0.10296684503555298, "learning_rate": 5.0771673547157825e-05, "loss": 0.0474, "step": 7756 }, { "epoch": 14.768426058012363, "grad_norm": 0.29506006836891174, "learning_rate": 5.0765322324547484e-05, "loss": 0.0731, "step": 7757 }, { "epoch": 14.770328102710414, "grad_norm": 0.21537403762340546, "learning_rate": 5.075897110193712e-05, "loss": 0.0649, "step": 7758 }, { "epoch": 14.772230147408465, "grad_norm": 0.2675336003303528, "learning_rate": 5.075261987932677e-05, "loss": 0.0781, "step": 7759 }, { "epoch": 14.774132192106514, "grad_norm": 0.14951659739017487, "learning_rate": 5.074626865671642e-05, "loss": 0.055, "step": 7760 }, { "epoch": 14.776034236804565, "grad_norm": 0.13454633951187134, "learning_rate": 5.0739917434106064e-05, "loss": 0.0498, "step": 7761 }, { "epoch": 14.777936281502615, "grad_norm": 0.08589652925729752, "learning_rate": 5.073356621149572e-05, "loss": 0.0652, "step": 7762 }, { "epoch": 14.779838326200666, "grad_norm": 0.1903010755777359, "learning_rate": 5.072721498888536e-05, "loss": 0.0731, "step": 7763 }, { "epoch": 14.781740370898715, "grad_norm": 0.15340054035186768, "learning_rate": 5.0720863766275006e-05, "loss": 0.0498, "step": 7764 }, { "epoch": 14.783642415596766, "grad_norm": 0.1111394464969635, "learning_rate": 5.071451254366466e-05, "loss": 0.033, "step": 7765 }, { "epoch": 14.785544460294817, "grad_norm": 0.1255098134279251, "learning_rate": 5.07081613210543e-05, "loss": 0.0609, "step": 7766 }, { "epoch": 14.787446504992868, "grad_norm": 0.26106560230255127, "learning_rate": 5.0701810098443955e-05, "loss": 0.0579, "step": 7767 }, { "epoch": 14.789348549690917, "grad_norm": 0.23423247039318085, "learning_rate": 5.06954588758336e-05, "loss": 0.0514, "step": 7768 }, { "epoch": 14.791250594388968, "grad_norm": 0.12845613062381744, "learning_rate": 5.0689107653223245e-05, "loss": 0.055, "step": 7769 }, { "epoch": 14.793152639087019, "grad_norm": 0.17294085025787354, "learning_rate": 5.06827564306129e-05, "loss": 0.066, "step": 7770 }, { "epoch": 14.795054683785068, "grad_norm": 0.18765872716903687, "learning_rate": 5.067640520800254e-05, "loss": 0.0577, "step": 7771 }, { "epoch": 14.796956728483119, "grad_norm": 0.17650574445724487, "learning_rate": 5.067005398539219e-05, "loss": 0.0841, "step": 7772 }, { "epoch": 14.79885877318117, "grad_norm": 0.19177024066448212, "learning_rate": 5.066370276278184e-05, "loss": 0.0681, "step": 7773 }, { "epoch": 14.80076081787922, "grad_norm": 0.2309020757675171, "learning_rate": 5.0657351540171484e-05, "loss": 0.0791, "step": 7774 }, { "epoch": 14.80266286257727, "grad_norm": 0.17799893021583557, "learning_rate": 5.0651000317561136e-05, "loss": 0.0553, "step": 7775 }, { "epoch": 14.80456490727532, "grad_norm": 0.25767970085144043, "learning_rate": 5.064464909495078e-05, "loss": 0.0489, "step": 7776 }, { "epoch": 14.806466951973372, "grad_norm": 0.11283757537603378, "learning_rate": 5.0638297872340426e-05, "loss": 0.0544, "step": 7777 }, { "epoch": 14.808368996671422, "grad_norm": 0.15177033841609955, "learning_rate": 5.063194664973008e-05, "loss": 0.0546, "step": 7778 }, { "epoch": 14.810271041369472, "grad_norm": 0.1496088206768036, "learning_rate": 5.062559542711972e-05, "loss": 0.0604, "step": 7779 }, { "epoch": 14.812173086067522, "grad_norm": 0.21631433069705963, "learning_rate": 5.0619244204509375e-05, "loss": 0.0499, "step": 7780 }, { "epoch": 14.814075130765573, "grad_norm": 0.22041334211826324, "learning_rate": 5.061289298189902e-05, "loss": 0.0393, "step": 7781 }, { "epoch": 14.815977175463622, "grad_norm": 0.18054498732089996, "learning_rate": 5.060654175928866e-05, "loss": 0.0626, "step": 7782 }, { "epoch": 14.817879220161673, "grad_norm": 0.22049325704574585, "learning_rate": 5.060019053667832e-05, "loss": 0.0625, "step": 7783 }, { "epoch": 14.819781264859724, "grad_norm": 0.19691422581672668, "learning_rate": 5.059383931406796e-05, "loss": 0.0509, "step": 7784 }, { "epoch": 14.821683309557775, "grad_norm": 0.20794346928596497, "learning_rate": 5.0587488091457614e-05, "loss": 0.0625, "step": 7785 }, { "epoch": 14.823585354255824, "grad_norm": 0.18965652585029602, "learning_rate": 5.058113686884726e-05, "loss": 0.0545, "step": 7786 }, { "epoch": 14.825487398953875, "grad_norm": 0.13831253349781036, "learning_rate": 5.05747856462369e-05, "loss": 0.0694, "step": 7787 }, { "epoch": 14.827389443651926, "grad_norm": 0.34300696849823, "learning_rate": 5.0568434423626556e-05, "loss": 0.0767, "step": 7788 }, { "epoch": 14.829291488349977, "grad_norm": 0.2049691379070282, "learning_rate": 5.0562083201016194e-05, "loss": 0.0651, "step": 7789 }, { "epoch": 14.831193533048026, "grad_norm": 0.11393442749977112, "learning_rate": 5.055573197840584e-05, "loss": 0.0624, "step": 7790 }, { "epoch": 14.833095577746077, "grad_norm": 0.17523103952407837, "learning_rate": 5.05493807557955e-05, "loss": 0.0665, "step": 7791 }, { "epoch": 14.834997622444128, "grad_norm": 0.24641826748847961, "learning_rate": 5.0543029533185136e-05, "loss": 0.0644, "step": 7792 }, { "epoch": 14.836899667142179, "grad_norm": 0.24968832731246948, "learning_rate": 5.0536678310574794e-05, "loss": 0.0612, "step": 7793 }, { "epoch": 14.838801711840228, "grad_norm": 0.18396158516407013, "learning_rate": 5.053032708796443e-05, "loss": 0.0635, "step": 7794 }, { "epoch": 14.840703756538279, "grad_norm": 0.21605823934078217, "learning_rate": 5.052397586535408e-05, "loss": 0.0754, "step": 7795 }, { "epoch": 14.84260580123633, "grad_norm": 0.08425106108188629, "learning_rate": 5.051762464274373e-05, "loss": 0.054, "step": 7796 }, { "epoch": 14.844507845934379, "grad_norm": 0.13125087320804596, "learning_rate": 5.0511273420133375e-05, "loss": 0.0476, "step": 7797 }, { "epoch": 14.84640989063243, "grad_norm": 0.33811628818511963, "learning_rate": 5.050492219752303e-05, "loss": 0.0699, "step": 7798 }, { "epoch": 14.84831193533048, "grad_norm": 0.16803185641765594, "learning_rate": 5.049857097491267e-05, "loss": 0.0601, "step": 7799 }, { "epoch": 14.850213980028531, "grad_norm": 0.16089555621147156, "learning_rate": 5.049221975230232e-05, "loss": 0.0646, "step": 7800 }, { "epoch": 14.85211602472658, "grad_norm": 0.1432085484266281, "learning_rate": 5.048586852969197e-05, "loss": 0.0605, "step": 7801 }, { "epoch": 14.854018069424631, "grad_norm": 0.1569204181432724, "learning_rate": 5.0479517307081614e-05, "loss": 0.056, "step": 7802 }, { "epoch": 14.855920114122682, "grad_norm": 0.28553643822669983, "learning_rate": 5.0473166084471265e-05, "loss": 0.062, "step": 7803 }, { "epoch": 14.857822158820733, "grad_norm": 0.1551143079996109, "learning_rate": 5.046681486186091e-05, "loss": 0.0694, "step": 7804 }, { "epoch": 14.859724203518782, "grad_norm": 0.21383574604988098, "learning_rate": 5.0460463639250556e-05, "loss": 0.0762, "step": 7805 }, { "epoch": 14.861626248216833, "grad_norm": 0.14724215865135193, "learning_rate": 5.045411241664021e-05, "loss": 0.0593, "step": 7806 }, { "epoch": 14.863528292914884, "grad_norm": 0.1819068044424057, "learning_rate": 5.044776119402985e-05, "loss": 0.0648, "step": 7807 }, { "epoch": 14.865430337612933, "grad_norm": 0.2799537479877472, "learning_rate": 5.04414099714195e-05, "loss": 0.0537, "step": 7808 }, { "epoch": 14.867332382310984, "grad_norm": 0.2654231786727905, "learning_rate": 5.043505874880915e-05, "loss": 0.0573, "step": 7809 }, { "epoch": 14.869234427009035, "grad_norm": 0.2610599994659424, "learning_rate": 5.0428707526198794e-05, "loss": 0.065, "step": 7810 }, { "epoch": 14.871136471707086, "grad_norm": 0.19513475894927979, "learning_rate": 5.0422356303588446e-05, "loss": 0.0696, "step": 7811 }, { "epoch": 14.873038516405135, "grad_norm": 0.1932954043149948, "learning_rate": 5.041600508097809e-05, "loss": 0.0477, "step": 7812 }, { "epoch": 14.874940561103186, "grad_norm": 0.19325312972068787, "learning_rate": 5.0409653858367736e-05, "loss": 0.0793, "step": 7813 }, { "epoch": 14.876842605801237, "grad_norm": 0.14803767204284668, "learning_rate": 5.040330263575739e-05, "loss": 0.057, "step": 7814 }, { "epoch": 14.878744650499288, "grad_norm": 0.18834733963012695, "learning_rate": 5.039695141314703e-05, "loss": 0.0967, "step": 7815 }, { "epoch": 14.880646695197337, "grad_norm": 0.21755129098892212, "learning_rate": 5.0390600190536685e-05, "loss": 0.0525, "step": 7816 }, { "epoch": 14.882548739895388, "grad_norm": 0.12757804989814758, "learning_rate": 5.038424896792633e-05, "loss": 0.0598, "step": 7817 }, { "epoch": 14.884450784593438, "grad_norm": 0.29833969473838806, "learning_rate": 5.037789774531597e-05, "loss": 0.0691, "step": 7818 }, { "epoch": 14.88635282929149, "grad_norm": 0.20505289733409882, "learning_rate": 5.037154652270563e-05, "loss": 0.0533, "step": 7819 }, { "epoch": 14.888254873989538, "grad_norm": 0.25543922185897827, "learning_rate": 5.036519530009527e-05, "loss": 0.0661, "step": 7820 }, { "epoch": 14.89015691868759, "grad_norm": 0.3089730441570282, "learning_rate": 5.0358844077484924e-05, "loss": 0.0728, "step": 7821 }, { "epoch": 14.89205896338564, "grad_norm": 0.14130769670009613, "learning_rate": 5.035249285487457e-05, "loss": 0.0575, "step": 7822 }, { "epoch": 14.89396100808369, "grad_norm": 0.24397386610507965, "learning_rate": 5.034614163226421e-05, "loss": 0.0647, "step": 7823 }, { "epoch": 14.89586305278174, "grad_norm": 0.18317031860351562, "learning_rate": 5.0339790409653866e-05, "loss": 0.0417, "step": 7824 }, { "epoch": 14.897765097479791, "grad_norm": 0.2685723602771759, "learning_rate": 5.0333439187043504e-05, "loss": 0.095, "step": 7825 }, { "epoch": 14.899667142177842, "grad_norm": 0.3372928500175476, "learning_rate": 5.032708796443315e-05, "loss": 0.0587, "step": 7826 }, { "epoch": 14.901569186875891, "grad_norm": 0.4275154769420624, "learning_rate": 5.032073674182281e-05, "loss": 0.0754, "step": 7827 }, { "epoch": 14.903471231573942, "grad_norm": 0.15616847574710846, "learning_rate": 5.0314385519212446e-05, "loss": 0.0596, "step": 7828 }, { "epoch": 14.905373276271993, "grad_norm": 0.12086091190576553, "learning_rate": 5.0308034296602105e-05, "loss": 0.0386, "step": 7829 }, { "epoch": 14.907275320970044, "grad_norm": 0.16133488714694977, "learning_rate": 5.030168307399174e-05, "loss": 0.0453, "step": 7830 }, { "epoch": 14.909177365668093, "grad_norm": 0.43575403094291687, "learning_rate": 5.029533185138139e-05, "loss": 0.0724, "step": 7831 }, { "epoch": 14.911079410366144, "grad_norm": 0.4218147099018097, "learning_rate": 5.028898062877104e-05, "loss": 0.0725, "step": 7832 }, { "epoch": 14.912981455064195, "grad_norm": 0.2845557630062103, "learning_rate": 5.0282629406160685e-05, "loss": 0.0721, "step": 7833 }, { "epoch": 14.914883499762244, "grad_norm": 0.16808871924877167, "learning_rate": 5.0276278183550344e-05, "loss": 0.0776, "step": 7834 }, { "epoch": 14.916785544460295, "grad_norm": 0.3421449661254883, "learning_rate": 5.026992696093998e-05, "loss": 0.0713, "step": 7835 }, { "epoch": 14.918687589158345, "grad_norm": 0.22598189115524292, "learning_rate": 5.026357573832963e-05, "loss": 0.0606, "step": 7836 }, { "epoch": 14.920589633856396, "grad_norm": 0.2761426568031311, "learning_rate": 5.025722451571928e-05, "loss": 0.0753, "step": 7837 }, { "epoch": 14.922491678554445, "grad_norm": 0.2742650806903839, "learning_rate": 5.0250873293108924e-05, "loss": 0.1, "step": 7838 }, { "epoch": 14.924393723252496, "grad_norm": 0.12055804580450058, "learning_rate": 5.0244522070498576e-05, "loss": 0.0651, "step": 7839 }, { "epoch": 14.926295767950547, "grad_norm": 0.1872662454843521, "learning_rate": 5.023817084788822e-05, "loss": 0.056, "step": 7840 }, { "epoch": 14.928197812648598, "grad_norm": 0.19098106026649475, "learning_rate": 5.0231819625277866e-05, "loss": 0.0653, "step": 7841 }, { "epoch": 14.930099857346647, "grad_norm": 0.18531818687915802, "learning_rate": 5.022546840266752e-05, "loss": 0.0553, "step": 7842 }, { "epoch": 14.932001902044698, "grad_norm": 0.48915934562683105, "learning_rate": 5.021911718005716e-05, "loss": 0.076, "step": 7843 }, { "epoch": 14.933903946742749, "grad_norm": 0.18385091423988342, "learning_rate": 5.021276595744681e-05, "loss": 0.1163, "step": 7844 }, { "epoch": 14.935805991440798, "grad_norm": 0.19059492647647858, "learning_rate": 5.020641473483646e-05, "loss": 0.0706, "step": 7845 }, { "epoch": 14.937708036138849, "grad_norm": 0.17051951587200165, "learning_rate": 5.0200063512226105e-05, "loss": 0.0592, "step": 7846 }, { "epoch": 14.9396100808369, "grad_norm": 0.13963809609413147, "learning_rate": 5.019371228961576e-05, "loss": 0.038, "step": 7847 }, { "epoch": 14.94151212553495, "grad_norm": 0.16215941309928894, "learning_rate": 5.01873610670054e-05, "loss": 0.0542, "step": 7848 }, { "epoch": 14.943414170233, "grad_norm": 0.25809141993522644, "learning_rate": 5.018100984439505e-05, "loss": 0.095, "step": 7849 }, { "epoch": 14.94531621493105, "grad_norm": 0.16990099847316742, "learning_rate": 5.01746586217847e-05, "loss": 0.0739, "step": 7850 }, { "epoch": 14.947218259629102, "grad_norm": 0.22535528242588043, "learning_rate": 5.0168307399174344e-05, "loss": 0.065, "step": 7851 }, { "epoch": 14.949120304327153, "grad_norm": 0.13045746088027954, "learning_rate": 5.0161956176563996e-05, "loss": 0.0592, "step": 7852 }, { "epoch": 14.951022349025202, "grad_norm": 0.14321979880332947, "learning_rate": 5.015560495395364e-05, "loss": 0.0344, "step": 7853 }, { "epoch": 14.952924393723253, "grad_norm": 0.20798403024673462, "learning_rate": 5.014925373134328e-05, "loss": 0.0543, "step": 7854 }, { "epoch": 14.954826438421303, "grad_norm": 0.35874882340431213, "learning_rate": 5.014290250873294e-05, "loss": 0.0715, "step": 7855 }, { "epoch": 14.956728483119353, "grad_norm": 0.2704923748970032, "learning_rate": 5.013655128612258e-05, "loss": 0.0597, "step": 7856 }, { "epoch": 14.958630527817403, "grad_norm": 0.18080563843250275, "learning_rate": 5.0130200063512235e-05, "loss": 0.0629, "step": 7857 }, { "epoch": 14.960532572515454, "grad_norm": 0.3157869577407837, "learning_rate": 5.012384884090188e-05, "loss": 0.0878, "step": 7858 }, { "epoch": 14.962434617213505, "grad_norm": 0.3106921315193176, "learning_rate": 5.011749761829152e-05, "loss": 0.0566, "step": 7859 }, { "epoch": 14.964336661911554, "grad_norm": 0.20733977854251862, "learning_rate": 5.0111146395681176e-05, "loss": 0.0507, "step": 7860 }, { "epoch": 14.966238706609605, "grad_norm": 0.14127065241336823, "learning_rate": 5.0104795173070815e-05, "loss": 0.0591, "step": 7861 }, { "epoch": 14.968140751307656, "grad_norm": 0.1346455067396164, "learning_rate": 5.009844395046046e-05, "loss": 0.0573, "step": 7862 }, { "epoch": 14.970042796005707, "grad_norm": 0.18170590698719025, "learning_rate": 5.009209272785012e-05, "loss": 0.0428, "step": 7863 }, { "epoch": 14.971944840703756, "grad_norm": 0.32324326038360596, "learning_rate": 5.008574150523976e-05, "loss": 0.0801, "step": 7864 }, { "epoch": 14.973846885401807, "grad_norm": 0.1589668244123459, "learning_rate": 5.0079390282629415e-05, "loss": 0.0513, "step": 7865 }, { "epoch": 14.975748930099858, "grad_norm": 0.1323167234659195, "learning_rate": 5.0073039060019054e-05, "loss": 0.0581, "step": 7866 }, { "epoch": 14.977650974797907, "grad_norm": 0.28893229365348816, "learning_rate": 5.00666878374087e-05, "loss": 0.0721, "step": 7867 }, { "epoch": 14.979553019495958, "grad_norm": 0.2315034717321396, "learning_rate": 5.006033661479835e-05, "loss": 0.0652, "step": 7868 }, { "epoch": 14.981455064194009, "grad_norm": 0.2682128846645355, "learning_rate": 5.0053985392187996e-05, "loss": 0.07, "step": 7869 }, { "epoch": 14.98335710889206, "grad_norm": 0.1930389106273651, "learning_rate": 5.004763416957765e-05, "loss": 0.0521, "step": 7870 }, { "epoch": 14.985259153590109, "grad_norm": 0.13474808633327484, "learning_rate": 5.004128294696729e-05, "loss": 0.0716, "step": 7871 }, { "epoch": 14.98716119828816, "grad_norm": 0.1883508861064911, "learning_rate": 5.003493172435694e-05, "loss": 0.0628, "step": 7872 }, { "epoch": 14.98906324298621, "grad_norm": 0.1612243503332138, "learning_rate": 5.002858050174659e-05, "loss": 0.0585, "step": 7873 }, { "epoch": 14.990965287684261, "grad_norm": 0.3346334397792816, "learning_rate": 5.0022229279136235e-05, "loss": 0.0723, "step": 7874 }, { "epoch": 14.99286733238231, "grad_norm": 0.26011380553245544, "learning_rate": 5.0015878056525886e-05, "loss": 0.0682, "step": 7875 }, { "epoch": 14.994769377080361, "grad_norm": 0.23277519643306732, "learning_rate": 5.000952683391553e-05, "loss": 0.0716, "step": 7876 }, { "epoch": 14.996671421778412, "grad_norm": 0.12851858139038086, "learning_rate": 5.0003175611305176e-05, "loss": 0.0756, "step": 7877 }, { "epoch": 14.998573466476461, "grad_norm": 0.16410908102989197, "learning_rate": 4.999682438869482e-05, "loss": 0.0611, "step": 7878 }, { "epoch": 15.000475511174512, "grad_norm": 0.3350670635700226, "learning_rate": 4.999047316608447e-05, "loss": 0.0883, "step": 7879 }, { "epoch": 15.002377555872563, "grad_norm": 0.26460301876068115, "learning_rate": 4.998412194347412e-05, "loss": 0.0813, "step": 7880 }, { "epoch": 15.004279600570614, "grad_norm": 0.14847742021083832, "learning_rate": 4.997777072086377e-05, "loss": 0.0532, "step": 7881 }, { "epoch": 15.006181645268663, "grad_norm": 0.25994306802749634, "learning_rate": 4.9971419498253415e-05, "loss": 0.0624, "step": 7882 }, { "epoch": 15.008083689966714, "grad_norm": 0.10206472128629684, "learning_rate": 4.996506827564306e-05, "loss": 0.0463, "step": 7883 }, { "epoch": 15.009985734664765, "grad_norm": 0.10814502090215683, "learning_rate": 4.995871705303271e-05, "loss": 0.0604, "step": 7884 }, { "epoch": 15.011887779362816, "grad_norm": 0.23959451913833618, "learning_rate": 4.995236583042236e-05, "loss": 0.0629, "step": 7885 }, { "epoch": 15.013789824060865, "grad_norm": 0.1656973958015442, "learning_rate": 4.994601460781201e-05, "loss": 0.0659, "step": 7886 }, { "epoch": 15.015691868758916, "grad_norm": 0.18013009428977966, "learning_rate": 4.9939663385201654e-05, "loss": 0.0403, "step": 7887 }, { "epoch": 15.017593913456967, "grad_norm": 0.19583438336849213, "learning_rate": 4.99333121625913e-05, "loss": 0.0482, "step": 7888 }, { "epoch": 15.019495958155016, "grad_norm": 0.07162532955408096, "learning_rate": 4.992696093998095e-05, "loss": 0.0537, "step": 7889 }, { "epoch": 15.021398002853067, "grad_norm": 0.1543794721364975, "learning_rate": 4.9920609717370596e-05, "loss": 0.0523, "step": 7890 }, { "epoch": 15.023300047551118, "grad_norm": 0.22192519903182983, "learning_rate": 4.991425849476024e-05, "loss": 0.071, "step": 7891 }, { "epoch": 15.025202092249168, "grad_norm": 0.13602390885353088, "learning_rate": 4.9907907272149886e-05, "loss": 0.0617, "step": 7892 }, { "epoch": 15.027104136947218, "grad_norm": 0.1319774091243744, "learning_rate": 4.990155604953954e-05, "loss": 0.0683, "step": 7893 }, { "epoch": 15.029006181645268, "grad_norm": 0.2293819636106491, "learning_rate": 4.989520482692919e-05, "loss": 0.0622, "step": 7894 }, { "epoch": 15.03090822634332, "grad_norm": 0.15792877972126007, "learning_rate": 4.9888853604318835e-05, "loss": 0.0599, "step": 7895 }, { "epoch": 15.03281027104137, "grad_norm": 0.13609178364276886, "learning_rate": 4.988250238170848e-05, "loss": 0.046, "step": 7896 }, { "epoch": 15.03471231573942, "grad_norm": 0.1835666298866272, "learning_rate": 4.9876151159098125e-05, "loss": 0.056, "step": 7897 }, { "epoch": 15.03661436043747, "grad_norm": 0.10541585832834244, "learning_rate": 4.986979993648778e-05, "loss": 0.0478, "step": 7898 }, { "epoch": 15.038516405135521, "grad_norm": 0.1154666617512703, "learning_rate": 4.986344871387742e-05, "loss": 0.0488, "step": 7899 }, { "epoch": 15.04041844983357, "grad_norm": 0.2595585584640503, "learning_rate": 4.985709749126707e-05, "loss": 0.0593, "step": 7900 }, { "epoch": 15.042320494531621, "grad_norm": 0.22063757479190826, "learning_rate": 4.985074626865672e-05, "loss": 0.0664, "step": 7901 }, { "epoch": 15.044222539229672, "grad_norm": 0.1932714283466339, "learning_rate": 4.9844395046046364e-05, "loss": 0.0789, "step": 7902 }, { "epoch": 15.046124583927723, "grad_norm": 0.1839333027601242, "learning_rate": 4.9838043823436016e-05, "loss": 0.0453, "step": 7903 }, { "epoch": 15.048026628625772, "grad_norm": 0.10126302391290665, "learning_rate": 4.983169260082566e-05, "loss": 0.0509, "step": 7904 }, { "epoch": 15.049928673323823, "grad_norm": 0.07132267951965332, "learning_rate": 4.9825341378215306e-05, "loss": 0.0514, "step": 7905 }, { "epoch": 15.051830718021874, "grad_norm": 0.2043127417564392, "learning_rate": 4.981899015560496e-05, "loss": 0.0846, "step": 7906 }, { "epoch": 15.053732762719925, "grad_norm": 0.11934273689985275, "learning_rate": 4.98126389329946e-05, "loss": 0.0515, "step": 7907 }, { "epoch": 15.055634807417974, "grad_norm": 0.14380639791488647, "learning_rate": 4.9806287710384255e-05, "loss": 0.0551, "step": 7908 }, { "epoch": 15.057536852116025, "grad_norm": 0.13530582189559937, "learning_rate": 4.979993648777389e-05, "loss": 0.0457, "step": 7909 }, { "epoch": 15.059438896814076, "grad_norm": 0.18853814899921417, "learning_rate": 4.9793585265163545e-05, "loss": 0.0644, "step": 7910 }, { "epoch": 15.061340941512125, "grad_norm": 0.21197612583637238, "learning_rate": 4.97872340425532e-05, "loss": 0.0527, "step": 7911 }, { "epoch": 15.063242986210176, "grad_norm": 0.11743347346782684, "learning_rate": 4.978088281994284e-05, "loss": 0.0429, "step": 7912 }, { "epoch": 15.065145030908226, "grad_norm": 0.2725050151348114, "learning_rate": 4.9774531597332494e-05, "loss": 0.0604, "step": 7913 }, { "epoch": 15.067047075606277, "grad_norm": 0.16864003241062164, "learning_rate": 4.976818037472213e-05, "loss": 0.0524, "step": 7914 }, { "epoch": 15.068949120304326, "grad_norm": 0.0748053789138794, "learning_rate": 4.9761829152111784e-05, "loss": 0.0272, "step": 7915 }, { "epoch": 15.070851165002377, "grad_norm": 0.14363184571266174, "learning_rate": 4.975547792950143e-05, "loss": 0.0596, "step": 7916 }, { "epoch": 15.072753209700428, "grad_norm": 0.15277619659900665, "learning_rate": 4.974912670689108e-05, "loss": 0.063, "step": 7917 }, { "epoch": 15.074655254398479, "grad_norm": 0.2070416361093521, "learning_rate": 4.9742775484280726e-05, "loss": 0.0563, "step": 7918 }, { "epoch": 15.076557299096528, "grad_norm": 0.19699743390083313, "learning_rate": 4.973642426167037e-05, "loss": 0.0574, "step": 7919 }, { "epoch": 15.078459343794579, "grad_norm": 0.15078143775463104, "learning_rate": 4.973007303906002e-05, "loss": 0.0576, "step": 7920 }, { "epoch": 15.08036138849263, "grad_norm": 0.1462477743625641, "learning_rate": 4.972372181644967e-05, "loss": 0.0408, "step": 7921 }, { "epoch": 15.08226343319068, "grad_norm": 0.23796884715557098, "learning_rate": 4.971737059383932e-05, "loss": 0.0638, "step": 7922 }, { "epoch": 15.08416547788873, "grad_norm": 0.10526379942893982, "learning_rate": 4.9711019371228965e-05, "loss": 0.0419, "step": 7923 }, { "epoch": 15.08606752258678, "grad_norm": 0.23168741166591644, "learning_rate": 4.970466814861861e-05, "loss": 0.0708, "step": 7924 }, { "epoch": 15.087969567284832, "grad_norm": 0.13456536829471588, "learning_rate": 4.969831692600826e-05, "loss": 0.0524, "step": 7925 }, { "epoch": 15.08987161198288, "grad_norm": 0.16766615211963654, "learning_rate": 4.969196570339791e-05, "loss": 0.0508, "step": 7926 }, { "epoch": 15.091773656680932, "grad_norm": 0.20241063833236694, "learning_rate": 4.968561448078755e-05, "loss": 0.0604, "step": 7927 }, { "epoch": 15.093675701378983, "grad_norm": 0.14176540076732635, "learning_rate": 4.96792632581772e-05, "loss": 0.0463, "step": 7928 }, { "epoch": 15.095577746077034, "grad_norm": 0.19786062836647034, "learning_rate": 4.967291203556685e-05, "loss": 0.0519, "step": 7929 }, { "epoch": 15.097479790775083, "grad_norm": 0.18619826436042786, "learning_rate": 4.96665608129565e-05, "loss": 0.0574, "step": 7930 }, { "epoch": 15.099381835473134, "grad_norm": 0.17755287885665894, "learning_rate": 4.9660209590346146e-05, "loss": 0.0739, "step": 7931 }, { "epoch": 15.101283880171184, "grad_norm": 0.18910397589206696, "learning_rate": 4.965385836773579e-05, "loss": 0.0707, "step": 7932 }, { "epoch": 15.103185924869235, "grad_norm": 0.16724318265914917, "learning_rate": 4.9647507145125436e-05, "loss": 0.049, "step": 7933 }, { "epoch": 15.105087969567284, "grad_norm": 0.20525459945201874, "learning_rate": 4.964115592251509e-05, "loss": 0.0656, "step": 7934 }, { "epoch": 15.106990014265335, "grad_norm": 0.14179904758930206, "learning_rate": 4.963480469990473e-05, "loss": 0.0757, "step": 7935 }, { "epoch": 15.108892058963386, "grad_norm": 0.1320018470287323, "learning_rate": 4.962845347729438e-05, "loss": 0.0577, "step": 7936 }, { "epoch": 15.110794103661435, "grad_norm": 0.11483266949653625, "learning_rate": 4.962210225468403e-05, "loss": 0.07, "step": 7937 }, { "epoch": 15.112696148359486, "grad_norm": 0.13130733370780945, "learning_rate": 4.9615751032073675e-05, "loss": 0.0609, "step": 7938 }, { "epoch": 15.114598193057537, "grad_norm": 0.14458687603473663, "learning_rate": 4.9609399809463326e-05, "loss": 0.0496, "step": 7939 }, { "epoch": 15.116500237755588, "grad_norm": 0.1600898802280426, "learning_rate": 4.960304858685297e-05, "loss": 0.0697, "step": 7940 }, { "epoch": 15.118402282453637, "grad_norm": 0.09131373465061188, "learning_rate": 4.9596697364242617e-05, "loss": 0.0401, "step": 7941 }, { "epoch": 15.120304327151688, "grad_norm": 0.20547789335250854, "learning_rate": 4.959034614163227e-05, "loss": 0.0555, "step": 7942 }, { "epoch": 15.122206371849739, "grad_norm": 0.15025202929973602, "learning_rate": 4.9583994919021913e-05, "loss": 0.0505, "step": 7943 }, { "epoch": 15.12410841654779, "grad_norm": 0.06455320864915848, "learning_rate": 4.9577643696411565e-05, "loss": 0.0634, "step": 7944 }, { "epoch": 15.126010461245839, "grad_norm": 0.12017594277858734, "learning_rate": 4.9571292473801204e-05, "loss": 0.0486, "step": 7945 }, { "epoch": 15.12791250594389, "grad_norm": 0.18531164526939392, "learning_rate": 4.9564941251190855e-05, "loss": 0.0552, "step": 7946 }, { "epoch": 15.12981455064194, "grad_norm": 0.07348939776420593, "learning_rate": 4.95585900285805e-05, "loss": 0.0596, "step": 7947 }, { "epoch": 15.13171659533999, "grad_norm": 0.3035662770271301, "learning_rate": 4.955223880597015e-05, "loss": 0.074, "step": 7948 }, { "epoch": 15.13361864003804, "grad_norm": 0.14867135882377625, "learning_rate": 4.9545887583359804e-05, "loss": 0.0563, "step": 7949 }, { "epoch": 15.135520684736091, "grad_norm": 0.1565437614917755, "learning_rate": 4.953953636074944e-05, "loss": 0.059, "step": 7950 }, { "epoch": 15.137422729434142, "grad_norm": 0.14548735320568085, "learning_rate": 4.9533185138139094e-05, "loss": 0.0612, "step": 7951 }, { "epoch": 15.139324774132191, "grad_norm": 0.1496451199054718, "learning_rate": 4.952683391552874e-05, "loss": 0.062, "step": 7952 }, { "epoch": 15.141226818830242, "grad_norm": 0.1259707808494568, "learning_rate": 4.952048269291839e-05, "loss": 0.0517, "step": 7953 }, { "epoch": 15.143128863528293, "grad_norm": 0.19889900088310242, "learning_rate": 4.9514131470308036e-05, "loss": 0.0544, "step": 7954 }, { "epoch": 15.145030908226344, "grad_norm": 0.1751696914434433, "learning_rate": 4.950778024769768e-05, "loss": 0.0821, "step": 7955 }, { "epoch": 15.146932952924393, "grad_norm": 0.17510373890399933, "learning_rate": 4.950142902508733e-05, "loss": 0.0643, "step": 7956 }, { "epoch": 15.148834997622444, "grad_norm": 0.11843284219503403, "learning_rate": 4.949507780247698e-05, "loss": 0.0621, "step": 7957 }, { "epoch": 15.150737042320495, "grad_norm": 0.1398295760154724, "learning_rate": 4.948872657986663e-05, "loss": 0.0846, "step": 7958 }, { "epoch": 15.152639087018544, "grad_norm": 0.19116874039173126, "learning_rate": 4.9482375357256275e-05, "loss": 0.0497, "step": 7959 }, { "epoch": 15.154541131716595, "grad_norm": 0.1140001118183136, "learning_rate": 4.947602413464592e-05, "loss": 0.0514, "step": 7960 }, { "epoch": 15.156443176414646, "grad_norm": 0.22718724608421326, "learning_rate": 4.946967291203557e-05, "loss": 0.0547, "step": 7961 }, { "epoch": 15.158345221112697, "grad_norm": 0.20831739902496338, "learning_rate": 4.946332168942522e-05, "loss": 0.0428, "step": 7962 }, { "epoch": 15.160247265810746, "grad_norm": 0.16769562661647797, "learning_rate": 4.945697046681486e-05, "loss": 0.0577, "step": 7963 }, { "epoch": 15.162149310508797, "grad_norm": 0.2267942875623703, "learning_rate": 4.945061924420451e-05, "loss": 0.0531, "step": 7964 }, { "epoch": 15.164051355206848, "grad_norm": 0.166763037443161, "learning_rate": 4.944426802159416e-05, "loss": 0.0585, "step": 7965 }, { "epoch": 15.165953399904899, "grad_norm": 0.13724614679813385, "learning_rate": 4.943791679898381e-05, "loss": 0.0518, "step": 7966 }, { "epoch": 15.167855444602948, "grad_norm": 0.1825903356075287, "learning_rate": 4.9431565576373456e-05, "loss": 0.0529, "step": 7967 }, { "epoch": 15.169757489300999, "grad_norm": 0.12881925702095032, "learning_rate": 4.94252143537631e-05, "loss": 0.0609, "step": 7968 }, { "epoch": 15.17165953399905, "grad_norm": 0.16033561527729034, "learning_rate": 4.9418863131152746e-05, "loss": 0.0783, "step": 7969 }, { "epoch": 15.173561578697099, "grad_norm": 0.14918583631515503, "learning_rate": 4.94125119085424e-05, "loss": 0.059, "step": 7970 }, { "epoch": 15.17546362339515, "grad_norm": 0.1096171960234642, "learning_rate": 4.940616068593204e-05, "loss": 0.0643, "step": 7971 }, { "epoch": 15.1773656680932, "grad_norm": 0.12285967171192169, "learning_rate": 4.939980946332169e-05, "loss": 0.0533, "step": 7972 }, { "epoch": 15.179267712791251, "grad_norm": 0.20297501981258392, "learning_rate": 4.939345824071134e-05, "loss": 0.0588, "step": 7973 }, { "epoch": 15.1811697574893, "grad_norm": 0.1980786919593811, "learning_rate": 4.9387107018100985e-05, "loss": 0.0553, "step": 7974 }, { "epoch": 15.183071802187351, "grad_norm": 0.2785045802593231, "learning_rate": 4.938075579549064e-05, "loss": 0.0489, "step": 7975 }, { "epoch": 15.184973846885402, "grad_norm": 0.1846664994955063, "learning_rate": 4.937440457288028e-05, "loss": 0.0403, "step": 7976 }, { "epoch": 15.186875891583453, "grad_norm": 0.27118930220603943, "learning_rate": 4.936805335026993e-05, "loss": 0.0696, "step": 7977 }, { "epoch": 15.188777936281502, "grad_norm": 0.16328345239162445, "learning_rate": 4.936170212765958e-05, "loss": 0.0848, "step": 7978 }, { "epoch": 15.190679980979553, "grad_norm": 0.1698354184627533, "learning_rate": 4.9355350905049224e-05, "loss": 0.0666, "step": 7979 }, { "epoch": 15.192582025677604, "grad_norm": 0.15171551704406738, "learning_rate": 4.9348999682438876e-05, "loss": 0.0768, "step": 7980 }, { "epoch": 15.194484070375653, "grad_norm": 0.22728462517261505, "learning_rate": 4.9342648459828514e-05, "loss": 0.069, "step": 7981 }, { "epoch": 15.196386115073704, "grad_norm": 0.08936372399330139, "learning_rate": 4.9336297237218166e-05, "loss": 0.0534, "step": 7982 }, { "epoch": 15.198288159771755, "grad_norm": 0.24935117363929749, "learning_rate": 4.932994601460781e-05, "loss": 0.0567, "step": 7983 }, { "epoch": 15.200190204469806, "grad_norm": 0.1630091816186905, "learning_rate": 4.932359479199746e-05, "loss": 0.0627, "step": 7984 }, { "epoch": 15.202092249167855, "grad_norm": 0.1229170486330986, "learning_rate": 4.9317243569387115e-05, "loss": 0.0909, "step": 7985 }, { "epoch": 15.203994293865906, "grad_norm": 0.16645725071430206, "learning_rate": 4.931089234677675e-05, "loss": 0.0611, "step": 7986 }, { "epoch": 15.205896338563957, "grad_norm": 0.12142518907785416, "learning_rate": 4.9304541124166405e-05, "loss": 0.0431, "step": 7987 }, { "epoch": 15.207798383262007, "grad_norm": 0.34056565165519714, "learning_rate": 4.929818990155605e-05, "loss": 0.0636, "step": 7988 }, { "epoch": 15.209700427960057, "grad_norm": 0.09468982368707657, "learning_rate": 4.92918386789457e-05, "loss": 0.0418, "step": 7989 }, { "epoch": 15.211602472658107, "grad_norm": 0.14799001812934875, "learning_rate": 4.928548745633535e-05, "loss": 0.0498, "step": 7990 }, { "epoch": 15.213504517356158, "grad_norm": 0.25312280654907227, "learning_rate": 4.927913623372499e-05, "loss": 0.0479, "step": 7991 }, { "epoch": 15.21540656205421, "grad_norm": 0.19187159836292267, "learning_rate": 4.9272785011114644e-05, "loss": 0.0559, "step": 7992 }, { "epoch": 15.217308606752258, "grad_norm": 0.200269877910614, "learning_rate": 4.926643378850429e-05, "loss": 0.0629, "step": 7993 }, { "epoch": 15.21921065145031, "grad_norm": 0.15146945416927338, "learning_rate": 4.926008256589394e-05, "loss": 0.0495, "step": 7994 }, { "epoch": 15.22111269614836, "grad_norm": 0.1498185396194458, "learning_rate": 4.9253731343283586e-05, "loss": 0.0619, "step": 7995 }, { "epoch": 15.22301474084641, "grad_norm": 0.16426420211791992, "learning_rate": 4.924738012067323e-05, "loss": 0.0545, "step": 7996 }, { "epoch": 15.22491678554446, "grad_norm": 0.2681984007358551, "learning_rate": 4.924102889806288e-05, "loss": 0.0562, "step": 7997 }, { "epoch": 15.226818830242511, "grad_norm": 0.17494945228099823, "learning_rate": 4.923467767545253e-05, "loss": 0.1054, "step": 7998 }, { "epoch": 15.228720874940562, "grad_norm": 0.08447415381669998, "learning_rate": 4.922832645284217e-05, "loss": 0.0652, "step": 7999 }, { "epoch": 15.230622919638611, "grad_norm": 0.17718258500099182, "learning_rate": 4.922197523023182e-05, "loss": 0.045, "step": 8000 }, { "epoch": 15.232524964336662, "grad_norm": 0.1426248401403427, "learning_rate": 4.921562400762147e-05, "loss": 0.0454, "step": 8001 }, { "epoch": 15.234427009034713, "grad_norm": 0.14476601779460907, "learning_rate": 4.9209272785011115e-05, "loss": 0.0538, "step": 8002 }, { "epoch": 15.236329053732764, "grad_norm": 0.14318709075450897, "learning_rate": 4.9202921562400766e-05, "loss": 0.0609, "step": 8003 }, { "epoch": 15.238231098430813, "grad_norm": 0.18595252931118011, "learning_rate": 4.919657033979041e-05, "loss": 0.0583, "step": 8004 }, { "epoch": 15.240133143128864, "grad_norm": 0.11332142353057861, "learning_rate": 4.9190219117180057e-05, "loss": 0.0462, "step": 8005 }, { "epoch": 15.242035187826914, "grad_norm": 0.1512480229139328, "learning_rate": 4.918386789456971e-05, "loss": 0.0585, "step": 8006 }, { "epoch": 15.243937232524964, "grad_norm": 0.09532251209020615, "learning_rate": 4.9177516671959353e-05, "loss": 0.0483, "step": 8007 }, { "epoch": 15.245839277223014, "grad_norm": 0.1605871617794037, "learning_rate": 4.9171165449349e-05, "loss": 0.0598, "step": 8008 }, { "epoch": 15.247741321921065, "grad_norm": 0.22983098030090332, "learning_rate": 4.916481422673865e-05, "loss": 0.0743, "step": 8009 }, { "epoch": 15.249643366619116, "grad_norm": 0.17360562086105347, "learning_rate": 4.9158463004128295e-05, "loss": 0.049, "step": 8010 }, { "epoch": 15.251545411317165, "grad_norm": 0.2593297064304352, "learning_rate": 4.915211178151795e-05, "loss": 0.0762, "step": 8011 }, { "epoch": 15.253447456015216, "grad_norm": 0.1943679302930832, "learning_rate": 4.914576055890759e-05, "loss": 0.0656, "step": 8012 }, { "epoch": 15.255349500713267, "grad_norm": 0.10750841349363327, "learning_rate": 4.913940933629724e-05, "loss": 0.0611, "step": 8013 }, { "epoch": 15.257251545411318, "grad_norm": 0.2768067419528961, "learning_rate": 4.913305811368689e-05, "loss": 0.0768, "step": 8014 }, { "epoch": 15.259153590109367, "grad_norm": 0.12465161085128784, "learning_rate": 4.9126706891076534e-05, "loss": 0.0536, "step": 8015 }, { "epoch": 15.261055634807418, "grad_norm": 0.07539068162441254, "learning_rate": 4.9120355668466186e-05, "loss": 0.0594, "step": 8016 }, { "epoch": 15.262957679505469, "grad_norm": 0.13377007842063904, "learning_rate": 4.9114004445855824e-05, "loss": 0.0507, "step": 8017 }, { "epoch": 15.264859724203518, "grad_norm": 0.11560075730085373, "learning_rate": 4.9107653223245476e-05, "loss": 0.0618, "step": 8018 }, { "epoch": 15.266761768901569, "grad_norm": 0.08740713447332382, "learning_rate": 4.910130200063512e-05, "loss": 0.0544, "step": 8019 }, { "epoch": 15.26866381359962, "grad_norm": 0.23278751969337463, "learning_rate": 4.909495077802477e-05, "loss": 0.0591, "step": 8020 }, { "epoch": 15.27056585829767, "grad_norm": 0.2627377510070801, "learning_rate": 4.9088599555414425e-05, "loss": 0.0638, "step": 8021 }, { "epoch": 15.27246790299572, "grad_norm": 0.1532537043094635, "learning_rate": 4.908224833280406e-05, "loss": 0.0487, "step": 8022 }, { "epoch": 15.27436994769377, "grad_norm": 0.11168943345546722, "learning_rate": 4.9075897110193715e-05, "loss": 0.0487, "step": 8023 }, { "epoch": 15.276271992391822, "grad_norm": 0.24777059257030487, "learning_rate": 4.906954588758336e-05, "loss": 0.0719, "step": 8024 }, { "epoch": 15.278174037089872, "grad_norm": 0.15639886260032654, "learning_rate": 4.906319466497301e-05, "loss": 0.0588, "step": 8025 }, { "epoch": 15.280076081787922, "grad_norm": 0.12868893146514893, "learning_rate": 4.905684344236266e-05, "loss": 0.0494, "step": 8026 }, { "epoch": 15.281978126485972, "grad_norm": 0.1784045547246933, "learning_rate": 4.90504922197523e-05, "loss": 0.0617, "step": 8027 }, { "epoch": 15.283880171184023, "grad_norm": 0.13852459192276, "learning_rate": 4.9044140997141954e-05, "loss": 0.0685, "step": 8028 }, { "epoch": 15.285782215882072, "grad_norm": 0.09702124446630478, "learning_rate": 4.90377897745316e-05, "loss": 0.0598, "step": 8029 }, { "epoch": 15.287684260580123, "grad_norm": 0.23854145407676697, "learning_rate": 4.903143855192125e-05, "loss": 0.0529, "step": 8030 }, { "epoch": 15.289586305278174, "grad_norm": 0.1306440234184265, "learning_rate": 4.902508732931089e-05, "loss": 0.0705, "step": 8031 }, { "epoch": 15.291488349976225, "grad_norm": 0.17986299097537994, "learning_rate": 4.901873610670054e-05, "loss": 0.0456, "step": 8032 }, { "epoch": 15.293390394674274, "grad_norm": 0.26708826422691345, "learning_rate": 4.901238488409019e-05, "loss": 0.0586, "step": 8033 }, { "epoch": 15.295292439372325, "grad_norm": 0.18249185383319855, "learning_rate": 4.900603366147984e-05, "loss": 0.058, "step": 8034 }, { "epoch": 15.297194484070376, "grad_norm": 0.16154305636882782, "learning_rate": 4.899968243886948e-05, "loss": 0.0452, "step": 8035 }, { "epoch": 15.299096528768427, "grad_norm": 0.1726539582014084, "learning_rate": 4.899333121625913e-05, "loss": 0.0538, "step": 8036 }, { "epoch": 15.300998573466476, "grad_norm": 0.20630763471126556, "learning_rate": 4.898697999364878e-05, "loss": 0.067, "step": 8037 }, { "epoch": 15.302900618164527, "grad_norm": 0.1309700757265091, "learning_rate": 4.8980628771038425e-05, "loss": 0.051, "step": 8038 }, { "epoch": 15.304802662862578, "grad_norm": 0.10560029000043869, "learning_rate": 4.897427754842808e-05, "loss": 0.057, "step": 8039 }, { "epoch": 15.306704707560627, "grad_norm": 0.0757644772529602, "learning_rate": 4.896792632581772e-05, "loss": 0.0491, "step": 8040 }, { "epoch": 15.308606752258678, "grad_norm": 0.21504676342010498, "learning_rate": 4.896157510320737e-05, "loss": 0.0705, "step": 8041 }, { "epoch": 15.310508796956729, "grad_norm": 0.06593248248100281, "learning_rate": 4.895522388059702e-05, "loss": 0.0298, "step": 8042 }, { "epoch": 15.31241084165478, "grad_norm": 0.13523049652576447, "learning_rate": 4.8948872657986664e-05, "loss": 0.0665, "step": 8043 }, { "epoch": 15.314312886352829, "grad_norm": 0.1855146586894989, "learning_rate": 4.894252143537631e-05, "loss": 0.0624, "step": 8044 }, { "epoch": 15.31621493105088, "grad_norm": 0.11019548773765564, "learning_rate": 4.893617021276596e-05, "loss": 0.0486, "step": 8045 }, { "epoch": 15.31811697574893, "grad_norm": 0.17465780675411224, "learning_rate": 4.8929818990155606e-05, "loss": 0.048, "step": 8046 }, { "epoch": 15.320019020446981, "grad_norm": 0.1513073593378067, "learning_rate": 4.892346776754526e-05, "loss": 0.058, "step": 8047 }, { "epoch": 15.32192106514503, "grad_norm": 0.13659310340881348, "learning_rate": 4.89171165449349e-05, "loss": 0.0431, "step": 8048 }, { "epoch": 15.323823109843081, "grad_norm": 0.15410743653774261, "learning_rate": 4.891076532232455e-05, "loss": 0.0499, "step": 8049 }, { "epoch": 15.325725154541132, "grad_norm": 0.3651988208293915, "learning_rate": 4.89044140997142e-05, "loss": 0.0694, "step": 8050 }, { "epoch": 15.327627199239181, "grad_norm": 0.2175668627023697, "learning_rate": 4.8898062877103845e-05, "loss": 0.0703, "step": 8051 }, { "epoch": 15.329529243937232, "grad_norm": 0.33220532536506653, "learning_rate": 4.8891711654493497e-05, "loss": 0.0674, "step": 8052 }, { "epoch": 15.331431288635283, "grad_norm": 0.19759467244148254, "learning_rate": 4.8885360431883135e-05, "loss": 0.0684, "step": 8053 }, { "epoch": 15.333333333333334, "grad_norm": 0.20885705947875977, "learning_rate": 4.887900920927279e-05, "loss": 0.0602, "step": 8054 }, { "epoch": 15.335235378031383, "grad_norm": 0.3209524154663086, "learning_rate": 4.887265798666243e-05, "loss": 0.0743, "step": 8055 }, { "epoch": 15.337137422729434, "grad_norm": 0.12623853981494904, "learning_rate": 4.8866306764052084e-05, "loss": 0.0669, "step": 8056 }, { "epoch": 15.339039467427485, "grad_norm": 0.06359723210334778, "learning_rate": 4.885995554144173e-05, "loss": 0.0423, "step": 8057 }, { "epoch": 15.340941512125536, "grad_norm": 0.12852895259857178, "learning_rate": 4.8853604318831374e-05, "loss": 0.0558, "step": 8058 }, { "epoch": 15.342843556823585, "grad_norm": 0.10829676687717438, "learning_rate": 4.8847253096221026e-05, "loss": 0.0444, "step": 8059 }, { "epoch": 15.344745601521636, "grad_norm": 0.17335738241672516, "learning_rate": 4.884090187361067e-05, "loss": 0.0437, "step": 8060 }, { "epoch": 15.346647646219687, "grad_norm": 0.13086184859275818, "learning_rate": 4.883455065100032e-05, "loss": 0.0584, "step": 8061 }, { "epoch": 15.348549690917736, "grad_norm": 0.1777004450559616, "learning_rate": 4.882819942838997e-05, "loss": 0.048, "step": 8062 }, { "epoch": 15.350451735615787, "grad_norm": 0.12129975855350494, "learning_rate": 4.882184820577961e-05, "loss": 0.052, "step": 8063 }, { "epoch": 15.352353780313837, "grad_norm": 0.1653653234243393, "learning_rate": 4.8815496983169264e-05, "loss": 0.0477, "step": 8064 }, { "epoch": 15.354255825011888, "grad_norm": 0.07352624088525772, "learning_rate": 4.880914576055891e-05, "loss": 0.0561, "step": 8065 }, { "epoch": 15.356157869709937, "grad_norm": 0.21875706315040588, "learning_rate": 4.880279453794856e-05, "loss": 0.0555, "step": 8066 }, { "epoch": 15.358059914407988, "grad_norm": 0.12528656423091888, "learning_rate": 4.87964433153382e-05, "loss": 0.0596, "step": 8067 }, { "epoch": 15.35996195910604, "grad_norm": 0.08361958712339401, "learning_rate": 4.879009209272785e-05, "loss": 0.0494, "step": 8068 }, { "epoch": 15.36186400380409, "grad_norm": 0.13227033615112305, "learning_rate": 4.87837408701175e-05, "loss": 0.0487, "step": 8069 }, { "epoch": 15.36376604850214, "grad_norm": 0.21816705167293549, "learning_rate": 4.877738964750715e-05, "loss": 0.0605, "step": 8070 }, { "epoch": 15.36566809320019, "grad_norm": 0.1681966334581375, "learning_rate": 4.8771038424896793e-05, "loss": 0.0535, "step": 8071 }, { "epoch": 15.367570137898241, "grad_norm": 0.11082461476325989, "learning_rate": 4.876468720228644e-05, "loss": 0.0536, "step": 8072 }, { "epoch": 15.36947218259629, "grad_norm": 0.1294272541999817, "learning_rate": 4.875833597967609e-05, "loss": 0.0673, "step": 8073 }, { "epoch": 15.371374227294341, "grad_norm": 0.14270833134651184, "learning_rate": 4.8751984757065735e-05, "loss": 0.0466, "step": 8074 }, { "epoch": 15.373276271992392, "grad_norm": 0.20790541172027588, "learning_rate": 4.874563353445539e-05, "loss": 0.0535, "step": 8075 }, { "epoch": 15.375178316690443, "grad_norm": 0.1547214835882187, "learning_rate": 4.873928231184503e-05, "loss": 0.0605, "step": 8076 }, { "epoch": 15.377080361388492, "grad_norm": 0.1377478688955307, "learning_rate": 4.873293108923468e-05, "loss": 0.059, "step": 8077 }, { "epoch": 15.378982406086543, "grad_norm": 0.23683491349220276, "learning_rate": 4.872657986662433e-05, "loss": 0.062, "step": 8078 }, { "epoch": 15.380884450784594, "grad_norm": 0.14311161637306213, "learning_rate": 4.8720228644013974e-05, "loss": 0.0723, "step": 8079 }, { "epoch": 15.382786495482645, "grad_norm": 0.13526876270771027, "learning_rate": 4.871387742140362e-05, "loss": 0.0622, "step": 8080 }, { "epoch": 15.384688540180694, "grad_norm": 0.1406906694173813, "learning_rate": 4.870752619879327e-05, "loss": 0.0476, "step": 8081 }, { "epoch": 15.386590584878745, "grad_norm": 0.11449279636144638, "learning_rate": 4.8701174976182916e-05, "loss": 0.0657, "step": 8082 }, { "epoch": 15.388492629576795, "grad_norm": 0.1769600659608841, "learning_rate": 4.869482375357257e-05, "loss": 0.0764, "step": 8083 }, { "epoch": 15.390394674274846, "grad_norm": 0.1439645141363144, "learning_rate": 4.868847253096221e-05, "loss": 0.0559, "step": 8084 }, { "epoch": 15.392296718972895, "grad_norm": 0.2625383734703064, "learning_rate": 4.868212130835186e-05, "loss": 0.0797, "step": 8085 }, { "epoch": 15.394198763670946, "grad_norm": 0.15457496047019958, "learning_rate": 4.86757700857415e-05, "loss": 0.0652, "step": 8086 }, { "epoch": 15.396100808368997, "grad_norm": 0.07629223167896271, "learning_rate": 4.8669418863131155e-05, "loss": 0.0427, "step": 8087 }, { "epoch": 15.398002853067046, "grad_norm": 0.09746023267507553, "learning_rate": 4.866306764052081e-05, "loss": 0.0361, "step": 8088 }, { "epoch": 15.399904897765097, "grad_norm": 0.11538923531770706, "learning_rate": 4.8656716417910445e-05, "loss": 0.0504, "step": 8089 }, { "epoch": 15.401806942463148, "grad_norm": 0.16864782571792603, "learning_rate": 4.86503651953001e-05, "loss": 0.0672, "step": 8090 }, { "epoch": 15.403708987161199, "grad_norm": 0.23421289026737213, "learning_rate": 4.864401397268974e-05, "loss": 0.0648, "step": 8091 }, { "epoch": 15.405611031859248, "grad_norm": 0.20838795602321625, "learning_rate": 4.8637662750079394e-05, "loss": 0.0694, "step": 8092 }, { "epoch": 15.407513076557299, "grad_norm": 0.24948616325855255, "learning_rate": 4.863131152746904e-05, "loss": 0.0723, "step": 8093 }, { "epoch": 15.40941512125535, "grad_norm": 0.10712212324142456, "learning_rate": 4.8624960304858684e-05, "loss": 0.0559, "step": 8094 }, { "epoch": 15.4113171659534, "grad_norm": 0.18813996016979218, "learning_rate": 4.8618609082248336e-05, "loss": 0.0636, "step": 8095 }, { "epoch": 15.41321921065145, "grad_norm": 0.10939308255910873, "learning_rate": 4.861225785963798e-05, "loss": 0.069, "step": 8096 }, { "epoch": 15.4151212553495, "grad_norm": 0.24954181909561157, "learning_rate": 4.860590663702763e-05, "loss": 0.0737, "step": 8097 }, { "epoch": 15.417023300047552, "grad_norm": 0.20011039078235626, "learning_rate": 4.859955541441728e-05, "loss": 0.0501, "step": 8098 }, { "epoch": 15.4189253447456, "grad_norm": 0.2183552086353302, "learning_rate": 4.859320419180692e-05, "loss": 0.0688, "step": 8099 }, { "epoch": 15.420827389443652, "grad_norm": 0.0974702537059784, "learning_rate": 4.8586852969196575e-05, "loss": 0.0511, "step": 8100 }, { "epoch": 15.422729434141702, "grad_norm": 0.3261789083480835, "learning_rate": 4.858050174658622e-05, "loss": 0.0713, "step": 8101 }, { "epoch": 15.424631478839753, "grad_norm": 0.1563624143600464, "learning_rate": 4.857415052397587e-05, "loss": 0.0435, "step": 8102 }, { "epoch": 15.426533523537802, "grad_norm": 0.34335121512413025, "learning_rate": 4.856779930136551e-05, "loss": 0.0841, "step": 8103 }, { "epoch": 15.428435568235853, "grad_norm": 0.1576901227235794, "learning_rate": 4.856144807875516e-05, "loss": 0.0632, "step": 8104 }, { "epoch": 15.430337612933904, "grad_norm": 0.31260785460472107, "learning_rate": 4.8555096856144814e-05, "loss": 0.0439, "step": 8105 }, { "epoch": 15.432239657631955, "grad_norm": 0.1318841129541397, "learning_rate": 4.854874563353446e-05, "loss": 0.0638, "step": 8106 }, { "epoch": 15.434141702330004, "grad_norm": 0.143747016787529, "learning_rate": 4.8542394410924104e-05, "loss": 0.0488, "step": 8107 }, { "epoch": 15.436043747028055, "grad_norm": 0.23869341611862183, "learning_rate": 4.853604318831375e-05, "loss": 0.058, "step": 8108 }, { "epoch": 15.437945791726106, "grad_norm": 0.178604394197464, "learning_rate": 4.85296919657034e-05, "loss": 0.0514, "step": 8109 }, { "epoch": 15.439847836424155, "grad_norm": 0.18076927959918976, "learning_rate": 4.8523340743093046e-05, "loss": 0.0688, "step": 8110 }, { "epoch": 15.441749881122206, "grad_norm": 0.2589855194091797, "learning_rate": 4.85169895204827e-05, "loss": 0.0541, "step": 8111 }, { "epoch": 15.443651925820257, "grad_norm": 0.11828634887933731, "learning_rate": 4.851063829787234e-05, "loss": 0.1125, "step": 8112 }, { "epoch": 15.445553970518308, "grad_norm": 0.1342373639345169, "learning_rate": 4.850428707526199e-05, "loss": 0.0607, "step": 8113 }, { "epoch": 15.447456015216357, "grad_norm": 0.12266510725021362, "learning_rate": 4.849793585265164e-05, "loss": 0.0397, "step": 8114 }, { "epoch": 15.449358059914408, "grad_norm": 0.26072725653648376, "learning_rate": 4.8491584630041285e-05, "loss": 0.0711, "step": 8115 }, { "epoch": 15.451260104612459, "grad_norm": 0.06860945373773575, "learning_rate": 4.848523340743093e-05, "loss": 0.03, "step": 8116 }, { "epoch": 15.45316214931051, "grad_norm": 0.09830333292484283, "learning_rate": 4.847888218482058e-05, "loss": 0.0584, "step": 8117 }, { "epoch": 15.455064194008559, "grad_norm": 0.2472551167011261, "learning_rate": 4.847253096221023e-05, "loss": 0.0856, "step": 8118 }, { "epoch": 15.45696623870661, "grad_norm": 0.15534979104995728, "learning_rate": 4.846617973959988e-05, "loss": 0.0657, "step": 8119 }, { "epoch": 15.45886828340466, "grad_norm": 0.3664383590221405, "learning_rate": 4.8459828516989524e-05, "loss": 0.0638, "step": 8120 }, { "epoch": 15.46077032810271, "grad_norm": 0.16775761544704437, "learning_rate": 4.845347729437917e-05, "loss": 0.0504, "step": 8121 }, { "epoch": 15.46267237280076, "grad_norm": 0.1890975385904312, "learning_rate": 4.8447126071768814e-05, "loss": 0.0579, "step": 8122 }, { "epoch": 15.464574417498811, "grad_norm": 0.28263619542121887, "learning_rate": 4.8440774849158466e-05, "loss": 0.0999, "step": 8123 }, { "epoch": 15.466476462196862, "grad_norm": 0.20361609756946564, "learning_rate": 4.843442362654812e-05, "loss": 0.0689, "step": 8124 }, { "epoch": 15.468378506894911, "grad_norm": 0.159327894449234, "learning_rate": 4.8428072403937756e-05, "loss": 0.0544, "step": 8125 }, { "epoch": 15.470280551592962, "grad_norm": 0.07594948261976242, "learning_rate": 4.842172118132741e-05, "loss": 0.0528, "step": 8126 }, { "epoch": 15.472182596291013, "grad_norm": 0.12283579260110855, "learning_rate": 4.841536995871705e-05, "loss": 0.0477, "step": 8127 }, { "epoch": 15.474084640989064, "grad_norm": 0.10897643864154816, "learning_rate": 4.8409018736106705e-05, "loss": 0.0499, "step": 8128 }, { "epoch": 15.475986685687113, "grad_norm": 0.08511614054441452, "learning_rate": 4.840266751349635e-05, "loss": 0.0664, "step": 8129 }, { "epoch": 15.477888730385164, "grad_norm": 0.12299087643623352, "learning_rate": 4.8396316290885995e-05, "loss": 0.076, "step": 8130 }, { "epoch": 15.479790775083215, "grad_norm": 0.20369911193847656, "learning_rate": 4.8389965068275646e-05, "loss": 0.0533, "step": 8131 }, { "epoch": 15.481692819781266, "grad_norm": 0.1380971223115921, "learning_rate": 4.838361384566529e-05, "loss": 0.0486, "step": 8132 }, { "epoch": 15.483594864479315, "grad_norm": 0.15812714397907257, "learning_rate": 4.8377262623054943e-05, "loss": 0.0564, "step": 8133 }, { "epoch": 15.485496909177366, "grad_norm": 0.1546802818775177, "learning_rate": 4.837091140044459e-05, "loss": 0.0524, "step": 8134 }, { "epoch": 15.487398953875417, "grad_norm": 0.156464621424675, "learning_rate": 4.8364560177834234e-05, "loss": 0.0834, "step": 8135 }, { "epoch": 15.489300998573466, "grad_norm": 0.2060707062482834, "learning_rate": 4.8358208955223885e-05, "loss": 0.0568, "step": 8136 }, { "epoch": 15.491203043271517, "grad_norm": 0.25839850306510925, "learning_rate": 4.835185773261353e-05, "loss": 0.068, "step": 8137 }, { "epoch": 15.493105087969568, "grad_norm": 0.2130364328622818, "learning_rate": 4.834550651000318e-05, "loss": 0.0546, "step": 8138 }, { "epoch": 15.495007132667618, "grad_norm": 0.2569848299026489, "learning_rate": 4.833915528739282e-05, "loss": 0.071, "step": 8139 }, { "epoch": 15.496909177365668, "grad_norm": 0.15983575582504272, "learning_rate": 4.833280406478247e-05, "loss": 0.0456, "step": 8140 }, { "epoch": 15.498811222063718, "grad_norm": 0.1227196604013443, "learning_rate": 4.832645284217212e-05, "loss": 0.0511, "step": 8141 }, { "epoch": 15.50071326676177, "grad_norm": 0.13389058411121368, "learning_rate": 4.832010161956177e-05, "loss": 0.0554, "step": 8142 }, { "epoch": 15.50261531145982, "grad_norm": 0.21467959880828857, "learning_rate": 4.8313750396951414e-05, "loss": 0.0679, "step": 8143 }, { "epoch": 15.50451735615787, "grad_norm": 0.13476331532001495, "learning_rate": 4.830739917434106e-05, "loss": 0.0491, "step": 8144 }, { "epoch": 15.50641940085592, "grad_norm": 0.14584288001060486, "learning_rate": 4.830104795173071e-05, "loss": 0.048, "step": 8145 }, { "epoch": 15.508321445553971, "grad_norm": 0.1891852468252182, "learning_rate": 4.8294696729120356e-05, "loss": 0.0643, "step": 8146 }, { "epoch": 15.51022349025202, "grad_norm": 0.1584828644990921, "learning_rate": 4.828834550651001e-05, "loss": 0.0559, "step": 8147 }, { "epoch": 15.512125534950071, "grad_norm": 0.10334078967571259, "learning_rate": 4.828199428389965e-05, "loss": 0.0486, "step": 8148 }, { "epoch": 15.514027579648122, "grad_norm": 0.20074616372585297, "learning_rate": 4.82756430612893e-05, "loss": 0.08, "step": 8149 }, { "epoch": 15.515929624346173, "grad_norm": 0.2370256930589676, "learning_rate": 4.826929183867895e-05, "loss": 0.087, "step": 8150 }, { "epoch": 15.517831669044222, "grad_norm": 0.15915529429912567, "learning_rate": 4.8262940616068595e-05, "loss": 0.0686, "step": 8151 }, { "epoch": 15.519733713742273, "grad_norm": 0.14760880172252655, "learning_rate": 4.825658939345824e-05, "loss": 0.0624, "step": 8152 }, { "epoch": 15.521635758440324, "grad_norm": 0.1157349944114685, "learning_rate": 4.825023817084789e-05, "loss": 0.0578, "step": 8153 }, { "epoch": 15.523537803138375, "grad_norm": 0.1379801630973816, "learning_rate": 4.824388694823754e-05, "loss": 0.0583, "step": 8154 }, { "epoch": 15.525439847836424, "grad_norm": 0.17183905839920044, "learning_rate": 4.823753572562719e-05, "loss": 0.0585, "step": 8155 }, { "epoch": 15.527341892534475, "grad_norm": 0.1357431262731552, "learning_rate": 4.8231184503016834e-05, "loss": 0.0647, "step": 8156 }, { "epoch": 15.529243937232525, "grad_norm": 0.1262008398771286, "learning_rate": 4.822483328040648e-05, "loss": 0.0488, "step": 8157 }, { "epoch": 15.531145981930575, "grad_norm": 0.1317247599363327, "learning_rate": 4.8218482057796124e-05, "loss": 0.0543, "step": 8158 }, { "epoch": 15.533048026628625, "grad_norm": 0.16477461159229279, "learning_rate": 4.8212130835185776e-05, "loss": 0.0701, "step": 8159 }, { "epoch": 15.534950071326676, "grad_norm": 0.2284783273935318, "learning_rate": 4.820577961257543e-05, "loss": 0.076, "step": 8160 }, { "epoch": 15.536852116024727, "grad_norm": 0.10719820857048035, "learning_rate": 4.8199428389965066e-05, "loss": 0.0649, "step": 8161 }, { "epoch": 15.538754160722776, "grad_norm": 0.18569643795490265, "learning_rate": 4.819307716735472e-05, "loss": 0.09, "step": 8162 }, { "epoch": 15.540656205420827, "grad_norm": 0.15722434222698212, "learning_rate": 4.818672594474436e-05, "loss": 0.0623, "step": 8163 }, { "epoch": 15.542558250118878, "grad_norm": 0.19346733391284943, "learning_rate": 4.8180374722134015e-05, "loss": 0.053, "step": 8164 }, { "epoch": 15.544460294816929, "grad_norm": 0.11424779891967773, "learning_rate": 4.817402349952366e-05, "loss": 0.0659, "step": 8165 }, { "epoch": 15.546362339514978, "grad_norm": 0.2630043923854828, "learning_rate": 4.8167672276913305e-05, "loss": 0.0562, "step": 8166 }, { "epoch": 15.548264384213029, "grad_norm": 0.12630194425582886, "learning_rate": 4.816132105430296e-05, "loss": 0.0722, "step": 8167 }, { "epoch": 15.55016642891108, "grad_norm": 0.1517813354730606, "learning_rate": 4.81549698316926e-05, "loss": 0.0565, "step": 8168 }, { "epoch": 15.552068473609129, "grad_norm": 0.11152484267950058, "learning_rate": 4.8148618609082254e-05, "loss": 0.048, "step": 8169 }, { "epoch": 15.55397051830718, "grad_norm": 0.13790766894817352, "learning_rate": 4.814226738647189e-05, "loss": 0.0543, "step": 8170 }, { "epoch": 15.55587256300523, "grad_norm": 0.1103183776140213, "learning_rate": 4.8135916163861544e-05, "loss": 0.0769, "step": 8171 }, { "epoch": 15.557774607703282, "grad_norm": 0.24293473362922668, "learning_rate": 4.8129564941251196e-05, "loss": 0.0493, "step": 8172 }, { "epoch": 15.55967665240133, "grad_norm": 0.17838114500045776, "learning_rate": 4.812321371864084e-05, "loss": 0.061, "step": 8173 }, { "epoch": 15.561578697099382, "grad_norm": 0.272238552570343, "learning_rate": 4.811686249603049e-05, "loss": 0.0522, "step": 8174 }, { "epoch": 15.563480741797433, "grad_norm": 0.18040479719638824, "learning_rate": 4.811051127342013e-05, "loss": 0.0627, "step": 8175 }, { "epoch": 15.565382786495483, "grad_norm": 0.09657354652881622, "learning_rate": 4.810416005080978e-05, "loss": 0.0507, "step": 8176 }, { "epoch": 15.567284831193533, "grad_norm": 0.120743028819561, "learning_rate": 4.809780882819943e-05, "loss": 0.0567, "step": 8177 }, { "epoch": 15.569186875891583, "grad_norm": 0.20059272646903992, "learning_rate": 4.809145760558908e-05, "loss": 0.0366, "step": 8178 }, { "epoch": 15.571088920589634, "grad_norm": 0.13962025940418243, "learning_rate": 4.8085106382978725e-05, "loss": 0.0628, "step": 8179 }, { "epoch": 15.572990965287683, "grad_norm": 0.1431548148393631, "learning_rate": 4.807875516036837e-05, "loss": 0.0571, "step": 8180 }, { "epoch": 15.574893009985734, "grad_norm": 0.17564918100833893, "learning_rate": 4.807240393775802e-05, "loss": 0.043, "step": 8181 }, { "epoch": 15.576795054683785, "grad_norm": 0.1403467208147049, "learning_rate": 4.806605271514767e-05, "loss": 0.0506, "step": 8182 }, { "epoch": 15.578697099381836, "grad_norm": 0.24989385902881622, "learning_rate": 4.805970149253732e-05, "loss": 0.0742, "step": 8183 }, { "epoch": 15.580599144079885, "grad_norm": 0.16122576594352722, "learning_rate": 4.8053350269926964e-05, "loss": 0.0517, "step": 8184 }, { "epoch": 15.582501188777936, "grad_norm": 0.19307172298431396, "learning_rate": 4.804699904731661e-05, "loss": 0.0508, "step": 8185 }, { "epoch": 15.584403233475987, "grad_norm": 0.2435937225818634, "learning_rate": 4.804064782470626e-05, "loss": 0.071, "step": 8186 }, { "epoch": 15.586305278174038, "grad_norm": 0.20309656858444214, "learning_rate": 4.8034296602095906e-05, "loss": 0.0686, "step": 8187 }, { "epoch": 15.588207322872087, "grad_norm": 0.12786833941936493, "learning_rate": 4.802794537948555e-05, "loss": 0.0542, "step": 8188 }, { "epoch": 15.590109367570138, "grad_norm": 0.21476751565933228, "learning_rate": 4.80215941568752e-05, "loss": 0.063, "step": 8189 }, { "epoch": 15.592011412268189, "grad_norm": 0.17351064085960388, "learning_rate": 4.801524293426485e-05, "loss": 0.0668, "step": 8190 }, { "epoch": 15.593913456966238, "grad_norm": 0.12176606804132462, "learning_rate": 4.80088917116545e-05, "loss": 0.0661, "step": 8191 }, { "epoch": 15.595815501664289, "grad_norm": 0.2677692174911499, "learning_rate": 4.8002540489044145e-05, "loss": 0.0556, "step": 8192 }, { "epoch": 15.59771754636234, "grad_norm": 0.23667269945144653, "learning_rate": 4.799618926643379e-05, "loss": 0.0625, "step": 8193 }, { "epoch": 15.59961959106039, "grad_norm": 0.18815861642360687, "learning_rate": 4.7989838043823435e-05, "loss": 0.0651, "step": 8194 }, { "epoch": 15.60152163575844, "grad_norm": 0.11295945942401886, "learning_rate": 4.7983486821213087e-05, "loss": 0.0463, "step": 8195 }, { "epoch": 15.60342368045649, "grad_norm": 0.25460055470466614, "learning_rate": 4.797713559860273e-05, "loss": 0.1022, "step": 8196 }, { "epoch": 15.605325725154541, "grad_norm": 0.15465350449085236, "learning_rate": 4.797078437599238e-05, "loss": 0.0502, "step": 8197 }, { "epoch": 15.607227769852592, "grad_norm": 0.21603916585445404, "learning_rate": 4.796443315338203e-05, "loss": 0.051, "step": 8198 }, { "epoch": 15.609129814550641, "grad_norm": 0.22721683979034424, "learning_rate": 4.7958081930771674e-05, "loss": 0.0852, "step": 8199 }, { "epoch": 15.611031859248692, "grad_norm": 0.11398090422153473, "learning_rate": 4.7951730708161325e-05, "loss": 0.0467, "step": 8200 }, { "epoch": 15.612933903946743, "grad_norm": 0.19684173166751862, "learning_rate": 4.794537948555097e-05, "loss": 0.0827, "step": 8201 }, { "epoch": 15.614835948644792, "grad_norm": 0.12274474650621414, "learning_rate": 4.7939028262940616e-05, "loss": 0.0463, "step": 8202 }, { "epoch": 15.616737993342843, "grad_norm": 0.12376654893159866, "learning_rate": 4.793267704033027e-05, "loss": 0.0533, "step": 8203 }, { "epoch": 15.618640038040894, "grad_norm": 0.17132851481437683, "learning_rate": 4.792632581771991e-05, "loss": 0.0524, "step": 8204 }, { "epoch": 15.620542082738945, "grad_norm": 0.2081228792667389, "learning_rate": 4.7919974595109564e-05, "loss": 0.0686, "step": 8205 }, { "epoch": 15.622444127436994, "grad_norm": 0.18067365884780884, "learning_rate": 4.79136233724992e-05, "loss": 0.0492, "step": 8206 }, { "epoch": 15.624346172135045, "grad_norm": 0.24764224886894226, "learning_rate": 4.7907272149888854e-05, "loss": 0.0579, "step": 8207 }, { "epoch": 15.626248216833096, "grad_norm": 0.2526845932006836, "learning_rate": 4.7900920927278506e-05, "loss": 0.0663, "step": 8208 }, { "epoch": 15.628150261531147, "grad_norm": 0.1451825648546219, "learning_rate": 4.789456970466815e-05, "loss": 0.0677, "step": 8209 }, { "epoch": 15.630052306229196, "grad_norm": 0.1915256679058075, "learning_rate": 4.78882184820578e-05, "loss": 0.06, "step": 8210 }, { "epoch": 15.631954350927247, "grad_norm": 0.27265143394470215, "learning_rate": 4.788186725944744e-05, "loss": 0.0555, "step": 8211 }, { "epoch": 15.633856395625298, "grad_norm": 0.19599300622940063, "learning_rate": 4.787551603683709e-05, "loss": 0.0525, "step": 8212 }, { "epoch": 15.635758440323347, "grad_norm": 0.24216334521770477, "learning_rate": 4.786916481422674e-05, "loss": 0.0639, "step": 8213 }, { "epoch": 15.637660485021398, "grad_norm": 0.22544489800930023, "learning_rate": 4.786281359161639e-05, "loss": 0.0661, "step": 8214 }, { "epoch": 15.639562529719448, "grad_norm": 0.15521033108234406, "learning_rate": 4.7856462369006035e-05, "loss": 0.0526, "step": 8215 }, { "epoch": 15.6414645744175, "grad_norm": 0.20516115427017212, "learning_rate": 4.785011114639568e-05, "loss": 0.0704, "step": 8216 }, { "epoch": 15.643366619115548, "grad_norm": 0.19163091480731964, "learning_rate": 4.784375992378533e-05, "loss": 0.0649, "step": 8217 }, { "epoch": 15.6452686638136, "grad_norm": 0.2147558182477951, "learning_rate": 4.783740870117498e-05, "loss": 0.0473, "step": 8218 }, { "epoch": 15.64717070851165, "grad_norm": 0.1446150541305542, "learning_rate": 4.783105747856463e-05, "loss": 0.1167, "step": 8219 }, { "epoch": 15.649072753209701, "grad_norm": 0.2023557424545288, "learning_rate": 4.7824706255954274e-05, "loss": 0.0642, "step": 8220 }, { "epoch": 15.65097479790775, "grad_norm": 0.1383785754442215, "learning_rate": 4.781835503334392e-05, "loss": 0.0345, "step": 8221 }, { "epoch": 15.652876842605801, "grad_norm": 0.15243691205978394, "learning_rate": 4.781200381073357e-05, "loss": 0.053, "step": 8222 }, { "epoch": 15.654778887303852, "grad_norm": 0.11674341559410095, "learning_rate": 4.7805652588123216e-05, "loss": 0.0624, "step": 8223 }, { "epoch": 15.656680932001901, "grad_norm": 0.2048717737197876, "learning_rate": 4.779930136551286e-05, "loss": 0.0743, "step": 8224 }, { "epoch": 15.658582976699952, "grad_norm": 0.3081852197647095, "learning_rate": 4.7792950142902506e-05, "loss": 0.0547, "step": 8225 }, { "epoch": 15.660485021398003, "grad_norm": 0.20836210250854492, "learning_rate": 4.778659892029216e-05, "loss": 0.0607, "step": 8226 }, { "epoch": 15.662387066096054, "grad_norm": 0.22958265244960785, "learning_rate": 4.778024769768181e-05, "loss": 0.0693, "step": 8227 }, { "epoch": 15.664289110794103, "grad_norm": 0.18431368470191956, "learning_rate": 4.7773896475071455e-05, "loss": 0.0737, "step": 8228 }, { "epoch": 15.666191155492154, "grad_norm": 0.18889397382736206, "learning_rate": 4.77675452524611e-05, "loss": 0.0923, "step": 8229 }, { "epoch": 15.668093200190205, "grad_norm": 0.17911040782928467, "learning_rate": 4.7761194029850745e-05, "loss": 0.0811, "step": 8230 }, { "epoch": 15.669995244888256, "grad_norm": 0.13110655546188354, "learning_rate": 4.77548428072404e-05, "loss": 0.0603, "step": 8231 }, { "epoch": 15.671897289586305, "grad_norm": 0.08929801732301712, "learning_rate": 4.774849158463004e-05, "loss": 0.043, "step": 8232 }, { "epoch": 15.673799334284356, "grad_norm": 0.2299952208995819, "learning_rate": 4.774214036201969e-05, "loss": 0.0659, "step": 8233 }, { "epoch": 15.675701378982406, "grad_norm": 0.3628097176551819, "learning_rate": 4.773578913940934e-05, "loss": 0.2409, "step": 8234 }, { "epoch": 15.677603423680456, "grad_norm": 0.08881036937236786, "learning_rate": 4.7729437916798984e-05, "loss": 0.0798, "step": 8235 }, { "epoch": 15.679505468378506, "grad_norm": 0.14017745852470398, "learning_rate": 4.7723086694188636e-05, "loss": 0.0633, "step": 8236 }, { "epoch": 15.681407513076557, "grad_norm": 0.13978202641010284, "learning_rate": 4.771673547157828e-05, "loss": 0.0595, "step": 8237 }, { "epoch": 15.683309557774608, "grad_norm": 0.28331878781318665, "learning_rate": 4.7710384248967926e-05, "loss": 0.0717, "step": 8238 }, { "epoch": 15.685211602472657, "grad_norm": 0.1580718457698822, "learning_rate": 4.770403302635758e-05, "loss": 0.0481, "step": 8239 }, { "epoch": 15.687113647170708, "grad_norm": 0.2504642605781555, "learning_rate": 4.769768180374722e-05, "loss": 0.0909, "step": 8240 }, { "epoch": 15.689015691868759, "grad_norm": 0.16315074265003204, "learning_rate": 4.7691330581136875e-05, "loss": 0.0507, "step": 8241 }, { "epoch": 15.69091773656681, "grad_norm": 0.28321632742881775, "learning_rate": 4.768497935852651e-05, "loss": 0.0744, "step": 8242 }, { "epoch": 15.692819781264859, "grad_norm": 0.2194773256778717, "learning_rate": 4.7678628135916165e-05, "loss": 0.0617, "step": 8243 }, { "epoch": 15.69472182596291, "grad_norm": 0.17935332655906677, "learning_rate": 4.767227691330582e-05, "loss": 0.0623, "step": 8244 }, { "epoch": 15.69662387066096, "grad_norm": 0.17090705037117004, "learning_rate": 4.766592569069546e-05, "loss": 0.0495, "step": 8245 }, { "epoch": 15.698525915359012, "grad_norm": 0.2691819965839386, "learning_rate": 4.7659574468085114e-05, "loss": 0.0541, "step": 8246 }, { "epoch": 15.70042796005706, "grad_norm": 0.12617425620555878, "learning_rate": 4.765322324547475e-05, "loss": 0.0743, "step": 8247 }, { "epoch": 15.702330004755112, "grad_norm": 0.14223243296146393, "learning_rate": 4.7646872022864404e-05, "loss": 0.0489, "step": 8248 }, { "epoch": 15.704232049453163, "grad_norm": 0.13523830473423004, "learning_rate": 4.764052080025405e-05, "loss": 0.0681, "step": 8249 }, { "epoch": 15.706134094151212, "grad_norm": 0.19689877331256866, "learning_rate": 4.76341695776437e-05, "loss": 0.0675, "step": 8250 }, { "epoch": 15.708036138849263, "grad_norm": 0.313142329454422, "learning_rate": 4.7627818355033346e-05, "loss": 0.0875, "step": 8251 }, { "epoch": 15.709938183547314, "grad_norm": 0.12644127011299133, "learning_rate": 4.762146713242299e-05, "loss": 0.056, "step": 8252 }, { "epoch": 15.711840228245364, "grad_norm": 0.11934950947761536, "learning_rate": 4.761511590981264e-05, "loss": 0.0783, "step": 8253 }, { "epoch": 15.713742272943414, "grad_norm": 0.33159083127975464, "learning_rate": 4.760876468720229e-05, "loss": 0.0543, "step": 8254 }, { "epoch": 15.715644317641464, "grad_norm": 0.18594646453857422, "learning_rate": 4.760241346459194e-05, "loss": 0.0666, "step": 8255 }, { "epoch": 15.717546362339515, "grad_norm": 0.13968947529792786, "learning_rate": 4.7596062241981585e-05, "loss": 0.0574, "step": 8256 }, { "epoch": 15.719448407037566, "grad_norm": 0.18439897894859314, "learning_rate": 4.758971101937123e-05, "loss": 0.0547, "step": 8257 }, { "epoch": 15.721350451735615, "grad_norm": 0.25268253684043884, "learning_rate": 4.758335979676088e-05, "loss": 0.064, "step": 8258 }, { "epoch": 15.723252496433666, "grad_norm": 0.15728792548179626, "learning_rate": 4.7577008574150527e-05, "loss": 0.073, "step": 8259 }, { "epoch": 15.725154541131717, "grad_norm": 0.2027149647474289, "learning_rate": 4.757065735154017e-05, "loss": 0.0667, "step": 8260 }, { "epoch": 15.727056585829766, "grad_norm": 0.12121910601854324, "learning_rate": 4.756430612892982e-05, "loss": 0.0549, "step": 8261 }, { "epoch": 15.728958630527817, "grad_norm": 0.09283791482448578, "learning_rate": 4.755795490631947e-05, "loss": 0.0552, "step": 8262 }, { "epoch": 15.730860675225868, "grad_norm": 0.1861935704946518, "learning_rate": 4.755160368370912e-05, "loss": 0.0749, "step": 8263 }, { "epoch": 15.732762719923919, "grad_norm": 0.14479346573352814, "learning_rate": 4.7545252461098765e-05, "loss": 0.0667, "step": 8264 }, { "epoch": 15.734664764621968, "grad_norm": 0.08699013292789459, "learning_rate": 4.753890123848841e-05, "loss": 0.0642, "step": 8265 }, { "epoch": 15.736566809320019, "grad_norm": 0.11008250713348389, "learning_rate": 4.7532550015878056e-05, "loss": 0.05, "step": 8266 }, { "epoch": 15.73846885401807, "grad_norm": 0.17910899221897125, "learning_rate": 4.752619879326771e-05, "loss": 0.0991, "step": 8267 }, { "epoch": 15.74037089871612, "grad_norm": 0.15026019513607025, "learning_rate": 4.751984757065735e-05, "loss": 0.0391, "step": 8268 }, { "epoch": 15.74227294341417, "grad_norm": 0.2448321282863617, "learning_rate": 4.7513496348047e-05, "loss": 0.0644, "step": 8269 }, { "epoch": 15.74417498811222, "grad_norm": 0.2921144962310791, "learning_rate": 4.750714512543665e-05, "loss": 0.0709, "step": 8270 }, { "epoch": 15.746077032810271, "grad_norm": 0.14855507016181946, "learning_rate": 4.7500793902826294e-05, "loss": 0.0529, "step": 8271 }, { "epoch": 15.747979077508322, "grad_norm": 0.17389892041683197, "learning_rate": 4.7494442680215946e-05, "loss": 0.0653, "step": 8272 }, { "epoch": 15.749881122206371, "grad_norm": 0.204713374376297, "learning_rate": 4.748809145760559e-05, "loss": 0.0572, "step": 8273 }, { "epoch": 15.751783166904422, "grad_norm": 0.22001193463802338, "learning_rate": 4.7481740234995236e-05, "loss": 0.0698, "step": 8274 }, { "epoch": 15.753685211602473, "grad_norm": 0.15850436687469482, "learning_rate": 4.747538901238489e-05, "loss": 0.0673, "step": 8275 }, { "epoch": 15.755587256300522, "grad_norm": 0.11191987991333008, "learning_rate": 4.746903778977453e-05, "loss": 0.0405, "step": 8276 }, { "epoch": 15.757489300998573, "grad_norm": 0.08066005259752274, "learning_rate": 4.7462686567164185e-05, "loss": 0.0773, "step": 8277 }, { "epoch": 15.759391345696624, "grad_norm": 0.11931698024272919, "learning_rate": 4.7456335344553823e-05, "loss": 0.06, "step": 8278 }, { "epoch": 15.761293390394675, "grad_norm": 0.16694439947605133, "learning_rate": 4.7449984121943475e-05, "loss": 0.0635, "step": 8279 }, { "epoch": 15.763195435092724, "grad_norm": 0.2340782880783081, "learning_rate": 4.744363289933312e-05, "loss": 0.0654, "step": 8280 }, { "epoch": 15.765097479790775, "grad_norm": 0.15663287043571472, "learning_rate": 4.743728167672277e-05, "loss": 0.058, "step": 8281 }, { "epoch": 15.766999524488826, "grad_norm": 0.19084382057189941, "learning_rate": 4.7430930454112424e-05, "loss": 0.0742, "step": 8282 }, { "epoch": 15.768901569186877, "grad_norm": 0.21217328310012817, "learning_rate": 4.742457923150206e-05, "loss": 0.0877, "step": 8283 }, { "epoch": 15.770803613884926, "grad_norm": 0.1141515001654625, "learning_rate": 4.7418228008891714e-05, "loss": 0.0577, "step": 8284 }, { "epoch": 15.772705658582977, "grad_norm": 0.26023924350738525, "learning_rate": 4.741187678628136e-05, "loss": 0.0671, "step": 8285 }, { "epoch": 15.774607703281028, "grad_norm": 0.057885292917490005, "learning_rate": 4.740552556367101e-05, "loss": 0.0479, "step": 8286 }, { "epoch": 15.776509747979077, "grad_norm": 0.16264577209949493, "learning_rate": 4.7399174341060656e-05, "loss": 0.0642, "step": 8287 }, { "epoch": 15.778411792677128, "grad_norm": 0.18721486628055573, "learning_rate": 4.73928231184503e-05, "loss": 0.0642, "step": 8288 }, { "epoch": 15.780313837375179, "grad_norm": 0.15313813090324402, "learning_rate": 4.738647189583995e-05, "loss": 0.0543, "step": 8289 }, { "epoch": 15.78221588207323, "grad_norm": 0.13567408919334412, "learning_rate": 4.73801206732296e-05, "loss": 0.0395, "step": 8290 }, { "epoch": 15.784117926771279, "grad_norm": 0.14394468069076538, "learning_rate": 4.737376945061925e-05, "loss": 0.0514, "step": 8291 }, { "epoch": 15.78601997146933, "grad_norm": 0.1596677452325821, "learning_rate": 4.7367418228008895e-05, "loss": 0.0505, "step": 8292 }, { "epoch": 15.78792201616738, "grad_norm": 0.19569194316864014, "learning_rate": 4.736106700539854e-05, "loss": 0.0541, "step": 8293 }, { "epoch": 15.789824060865431, "grad_norm": 0.14746233820915222, "learning_rate": 4.735471578278819e-05, "loss": 0.0654, "step": 8294 }, { "epoch": 15.79172610556348, "grad_norm": 0.13105404376983643, "learning_rate": 4.734836456017784e-05, "loss": 0.0886, "step": 8295 }, { "epoch": 15.793628150261531, "grad_norm": 0.18374143540859222, "learning_rate": 4.734201333756748e-05, "loss": 0.0623, "step": 8296 }, { "epoch": 15.795530194959582, "grad_norm": 0.11379442363977432, "learning_rate": 4.733566211495713e-05, "loss": 0.0496, "step": 8297 }, { "epoch": 15.797432239657631, "grad_norm": 0.20474670827388763, "learning_rate": 4.732931089234678e-05, "loss": 0.0627, "step": 8298 }, { "epoch": 15.799334284355682, "grad_norm": 0.2521989941596985, "learning_rate": 4.732295966973643e-05, "loss": 0.0693, "step": 8299 }, { "epoch": 15.801236329053733, "grad_norm": 0.19155509769916534, "learning_rate": 4.7316608447126076e-05, "loss": 0.0747, "step": 8300 }, { "epoch": 15.803138373751784, "grad_norm": 0.2652791440486908, "learning_rate": 4.731025722451572e-05, "loss": 0.0554, "step": 8301 }, { "epoch": 15.805040418449833, "grad_norm": 0.20905011892318726, "learning_rate": 4.7303906001905366e-05, "loss": 0.0778, "step": 8302 }, { "epoch": 15.806942463147884, "grad_norm": 0.25589483976364136, "learning_rate": 4.729755477929502e-05, "loss": 0.0563, "step": 8303 }, { "epoch": 15.808844507845935, "grad_norm": 0.1645364761352539, "learning_rate": 4.729120355668466e-05, "loss": 0.0592, "step": 8304 }, { "epoch": 15.810746552543986, "grad_norm": 0.12126277387142181, "learning_rate": 4.728485233407431e-05, "loss": 0.0513, "step": 8305 }, { "epoch": 15.812648597242035, "grad_norm": 0.21371135115623474, "learning_rate": 4.727850111146396e-05, "loss": 0.0693, "step": 8306 }, { "epoch": 15.814550641940086, "grad_norm": 0.19225803017616272, "learning_rate": 4.7272149888853605e-05, "loss": 0.0603, "step": 8307 }, { "epoch": 15.816452686638137, "grad_norm": 0.24223649501800537, "learning_rate": 4.726579866624326e-05, "loss": 0.0554, "step": 8308 }, { "epoch": 15.818354731336186, "grad_norm": 0.1132277324795723, "learning_rate": 4.72594474436329e-05, "loss": 0.0642, "step": 8309 }, { "epoch": 15.820256776034237, "grad_norm": 0.2975490391254425, "learning_rate": 4.725309622102255e-05, "loss": 0.0759, "step": 8310 }, { "epoch": 15.822158820732287, "grad_norm": 0.19871625304222107, "learning_rate": 4.72467449984122e-05, "loss": 0.0671, "step": 8311 }, { "epoch": 15.824060865430338, "grad_norm": 0.29103484749794006, "learning_rate": 4.7240393775801844e-05, "loss": 0.0596, "step": 8312 }, { "epoch": 15.825962910128387, "grad_norm": 0.11662924289703369, "learning_rate": 4.7234042553191496e-05, "loss": 0.064, "step": 8313 }, { "epoch": 15.827864954826438, "grad_norm": 0.18209360539913177, "learning_rate": 4.7227691330581134e-05, "loss": 0.0703, "step": 8314 }, { "epoch": 15.82976699952449, "grad_norm": 0.14348343014717102, "learning_rate": 4.7221340107970786e-05, "loss": 0.0554, "step": 8315 }, { "epoch": 15.83166904422254, "grad_norm": 0.23151347041130066, "learning_rate": 4.721498888536043e-05, "loss": 0.0703, "step": 8316 }, { "epoch": 15.83357108892059, "grad_norm": 0.16555550694465637, "learning_rate": 4.720863766275008e-05, "loss": 0.0641, "step": 8317 }, { "epoch": 15.83547313361864, "grad_norm": 0.1724887490272522, "learning_rate": 4.7202286440139734e-05, "loss": 0.0795, "step": 8318 }, { "epoch": 15.837375178316691, "grad_norm": 0.1328611522912979, "learning_rate": 4.719593521752937e-05, "loss": 0.0583, "step": 8319 }, { "epoch": 15.83927722301474, "grad_norm": 0.3197205066680908, "learning_rate": 4.7189583994919025e-05, "loss": 0.0638, "step": 8320 }, { "epoch": 15.841179267712791, "grad_norm": 0.25061967968940735, "learning_rate": 4.718323277230867e-05, "loss": 0.0644, "step": 8321 }, { "epoch": 15.843081312410842, "grad_norm": 0.26406586170196533, "learning_rate": 4.717688154969832e-05, "loss": 0.0502, "step": 8322 }, { "epoch": 15.844983357108893, "grad_norm": 0.15122944116592407, "learning_rate": 4.7170530327087967e-05, "loss": 0.0611, "step": 8323 }, { "epoch": 15.846885401806942, "grad_norm": 0.1766766756772995, "learning_rate": 4.716417910447761e-05, "loss": 0.0659, "step": 8324 }, { "epoch": 15.848787446504993, "grad_norm": 0.20306788384914398, "learning_rate": 4.7157827881867263e-05, "loss": 0.0619, "step": 8325 }, { "epoch": 15.850689491203044, "grad_norm": 0.13225118815898895, "learning_rate": 4.715147665925691e-05, "loss": 0.0666, "step": 8326 }, { "epoch": 15.852591535901094, "grad_norm": 0.19744844734668732, "learning_rate": 4.714512543664656e-05, "loss": 0.0541, "step": 8327 }, { "epoch": 15.854493580599144, "grad_norm": 0.2040027529001236, "learning_rate": 4.7138774214036205e-05, "loss": 0.0675, "step": 8328 }, { "epoch": 15.856395625297194, "grad_norm": 0.28461000323295593, "learning_rate": 4.713242299142585e-05, "loss": 0.0574, "step": 8329 }, { "epoch": 15.858297669995245, "grad_norm": 0.13227978348731995, "learning_rate": 4.71260717688155e-05, "loss": 0.0588, "step": 8330 }, { "epoch": 15.860199714693294, "grad_norm": 0.16374631226062775, "learning_rate": 4.711972054620515e-05, "loss": 0.0591, "step": 8331 }, { "epoch": 15.862101759391345, "grad_norm": 0.22766095399856567, "learning_rate": 4.711336932359479e-05, "loss": 0.0668, "step": 8332 }, { "epoch": 15.864003804089396, "grad_norm": 0.19878962635993958, "learning_rate": 4.710701810098444e-05, "loss": 0.053, "step": 8333 }, { "epoch": 15.865905848787447, "grad_norm": 0.14449188113212585, "learning_rate": 4.710066687837409e-05, "loss": 0.0646, "step": 8334 }, { "epoch": 15.867807893485496, "grad_norm": 0.1568823605775833, "learning_rate": 4.7094315655763734e-05, "loss": 0.0569, "step": 8335 }, { "epoch": 15.869709938183547, "grad_norm": 0.14062386751174927, "learning_rate": 4.7087964433153386e-05, "loss": 0.0646, "step": 8336 }, { "epoch": 15.871611982881598, "grad_norm": 0.17233572900295258, "learning_rate": 4.708161321054303e-05, "loss": 0.0679, "step": 8337 }, { "epoch": 15.873514027579649, "grad_norm": 0.12581130862236023, "learning_rate": 4.7075261987932676e-05, "loss": 0.0428, "step": 8338 }, { "epoch": 15.875416072277698, "grad_norm": 0.161576509475708, "learning_rate": 4.706891076532233e-05, "loss": 0.0585, "step": 8339 }, { "epoch": 15.877318116975749, "grad_norm": 0.22900627553462982, "learning_rate": 4.706255954271197e-05, "loss": 0.0615, "step": 8340 }, { "epoch": 15.8792201616738, "grad_norm": 0.17060215771198273, "learning_rate": 4.705620832010162e-05, "loss": 0.0928, "step": 8341 }, { "epoch": 15.881122206371849, "grad_norm": 0.3081725835800171, "learning_rate": 4.704985709749127e-05, "loss": 0.0605, "step": 8342 }, { "epoch": 15.8830242510699, "grad_norm": 0.24117806553840637, "learning_rate": 4.7043505874880915e-05, "loss": 0.0799, "step": 8343 }, { "epoch": 15.88492629576795, "grad_norm": 0.1827300786972046, "learning_rate": 4.703715465227057e-05, "loss": 0.077, "step": 8344 }, { "epoch": 15.886828340466002, "grad_norm": 0.11569055914878845, "learning_rate": 4.703080342966021e-05, "loss": 0.0562, "step": 8345 }, { "epoch": 15.88873038516405, "grad_norm": 0.17954939603805542, "learning_rate": 4.702445220704986e-05, "loss": 0.0658, "step": 8346 }, { "epoch": 15.890632429862102, "grad_norm": 0.26830634474754333, "learning_rate": 4.701810098443951e-05, "loss": 0.0567, "step": 8347 }, { "epoch": 15.892534474560152, "grad_norm": 0.14093312621116638, "learning_rate": 4.7011749761829154e-05, "loss": 0.064, "step": 8348 }, { "epoch": 15.894436519258203, "grad_norm": 0.12327145040035248, "learning_rate": 4.7005398539218806e-05, "loss": 0.0519, "step": 8349 }, { "epoch": 15.896338563956252, "grad_norm": 0.29526394605636597, "learning_rate": 4.6999047316608444e-05, "loss": 0.0596, "step": 8350 }, { "epoch": 15.898240608654303, "grad_norm": 0.16847003996372223, "learning_rate": 4.6992696093998096e-05, "loss": 0.0742, "step": 8351 }, { "epoch": 15.900142653352354, "grad_norm": 0.10561301559209824, "learning_rate": 4.698634487138774e-05, "loss": 0.0378, "step": 8352 }, { "epoch": 15.902044698050403, "grad_norm": 0.15217702090740204, "learning_rate": 4.697999364877739e-05, "loss": 0.0701, "step": 8353 }, { "epoch": 15.903946742748454, "grad_norm": 0.15947742760181427, "learning_rate": 4.6973642426167045e-05, "loss": 0.0618, "step": 8354 }, { "epoch": 15.905848787446505, "grad_norm": 0.08902820944786072, "learning_rate": 4.696729120355668e-05, "loss": 0.041, "step": 8355 }, { "epoch": 15.907750832144556, "grad_norm": 0.11983463168144226, "learning_rate": 4.6960939980946335e-05, "loss": 0.0615, "step": 8356 }, { "epoch": 15.909652876842605, "grad_norm": 0.3213430941104889, "learning_rate": 4.695458875833598e-05, "loss": 0.0635, "step": 8357 }, { "epoch": 15.911554921540656, "grad_norm": 0.10112504661083221, "learning_rate": 4.694823753572563e-05, "loss": 0.0647, "step": 8358 }, { "epoch": 15.913456966238707, "grad_norm": 0.1713016778230667, "learning_rate": 4.694188631311528e-05, "loss": 0.0705, "step": 8359 }, { "epoch": 15.915359010936758, "grad_norm": 0.18278667330741882, "learning_rate": 4.693553509050492e-05, "loss": 0.0858, "step": 8360 }, { "epoch": 15.917261055634807, "grad_norm": 0.23004773259162903, "learning_rate": 4.6929183867894574e-05, "loss": 0.0774, "step": 8361 }, { "epoch": 15.919163100332858, "grad_norm": 0.13863714039325714, "learning_rate": 4.692283264528422e-05, "loss": 0.056, "step": 8362 }, { "epoch": 15.921065145030909, "grad_norm": 0.17896798253059387, "learning_rate": 4.691648142267387e-05, "loss": 0.0583, "step": 8363 }, { "epoch": 15.922967189728958, "grad_norm": 0.1130218356847763, "learning_rate": 4.691013020006351e-05, "loss": 0.0527, "step": 8364 }, { "epoch": 15.924869234427009, "grad_norm": 0.15107358992099762, "learning_rate": 4.690377897745316e-05, "loss": 0.0662, "step": 8365 }, { "epoch": 15.92677127912506, "grad_norm": 0.1794845461845398, "learning_rate": 4.689742775484281e-05, "loss": 0.0569, "step": 8366 }, { "epoch": 15.92867332382311, "grad_norm": 0.17926360666751862, "learning_rate": 4.689107653223246e-05, "loss": 0.0607, "step": 8367 }, { "epoch": 15.93057536852116, "grad_norm": 0.19702889025211334, "learning_rate": 4.68847253096221e-05, "loss": 0.0728, "step": 8368 }, { "epoch": 15.93247741321921, "grad_norm": 0.13820995390415192, "learning_rate": 4.687837408701175e-05, "loss": 0.0469, "step": 8369 }, { "epoch": 15.934379457917261, "grad_norm": 0.16547589004039764, "learning_rate": 4.68720228644014e-05, "loss": 0.0393, "step": 8370 }, { "epoch": 15.936281502615312, "grad_norm": 0.17976239323616028, "learning_rate": 4.6865671641791045e-05, "loss": 0.0542, "step": 8371 }, { "epoch": 15.938183547313361, "grad_norm": 0.24729850888252258, "learning_rate": 4.68593204191807e-05, "loss": 0.0729, "step": 8372 }, { "epoch": 15.940085592011412, "grad_norm": 0.09225702285766602, "learning_rate": 4.685296919657034e-05, "loss": 0.0748, "step": 8373 }, { "epoch": 15.941987636709463, "grad_norm": 0.27477720379829407, "learning_rate": 4.684661797395999e-05, "loss": 0.0441, "step": 8374 }, { "epoch": 15.943889681407512, "grad_norm": 0.10589617490768433, "learning_rate": 4.684026675134964e-05, "loss": 0.0509, "step": 8375 }, { "epoch": 15.945791726105563, "grad_norm": 0.21618284285068512, "learning_rate": 4.6833915528739284e-05, "loss": 0.066, "step": 8376 }, { "epoch": 15.947693770803614, "grad_norm": 0.11763995885848999, "learning_rate": 4.682756430612893e-05, "loss": 0.0516, "step": 8377 }, { "epoch": 15.949595815501665, "grad_norm": 0.10359404236078262, "learning_rate": 4.682121308351858e-05, "loss": 0.054, "step": 8378 }, { "epoch": 15.951497860199714, "grad_norm": 0.2101137787103653, "learning_rate": 4.6814861860908226e-05, "loss": 0.0736, "step": 8379 }, { "epoch": 15.953399904897765, "grad_norm": 0.14261220395565033, "learning_rate": 4.680851063829788e-05, "loss": 0.058, "step": 8380 }, { "epoch": 15.955301949595816, "grad_norm": 0.18092776834964752, "learning_rate": 4.680215941568752e-05, "loss": 0.0661, "step": 8381 }, { "epoch": 15.957203994293867, "grad_norm": 0.15513984858989716, "learning_rate": 4.679580819307717e-05, "loss": 0.056, "step": 8382 }, { "epoch": 15.959106038991916, "grad_norm": 0.26116275787353516, "learning_rate": 4.678945697046682e-05, "loss": 0.0685, "step": 8383 }, { "epoch": 15.961008083689967, "grad_norm": 0.22781796753406525, "learning_rate": 4.6783105747856465e-05, "loss": 0.0521, "step": 8384 }, { "epoch": 15.962910128388017, "grad_norm": 0.26958703994750977, "learning_rate": 4.6776754525246116e-05, "loss": 0.0458, "step": 8385 }, { "epoch": 15.964812173086067, "grad_norm": 0.14877475798130035, "learning_rate": 4.6770403302635755e-05, "loss": 0.0831, "step": 8386 }, { "epoch": 15.966714217784117, "grad_norm": 0.2953677773475647, "learning_rate": 4.676405208002541e-05, "loss": 0.0814, "step": 8387 }, { "epoch": 15.968616262482168, "grad_norm": 0.12530766427516937, "learning_rate": 4.675770085741505e-05, "loss": 0.0578, "step": 8388 }, { "epoch": 15.97051830718022, "grad_norm": 0.1906973272562027, "learning_rate": 4.6751349634804704e-05, "loss": 0.063, "step": 8389 }, { "epoch": 15.972420351878268, "grad_norm": 0.15123619139194489, "learning_rate": 4.674499841219435e-05, "loss": 0.0526, "step": 8390 }, { "epoch": 15.97432239657632, "grad_norm": 0.2910292148590088, "learning_rate": 4.6738647189583994e-05, "loss": 0.0715, "step": 8391 }, { "epoch": 15.97622444127437, "grad_norm": 0.13333997130393982, "learning_rate": 4.6732295966973646e-05, "loss": 0.041, "step": 8392 }, { "epoch": 15.978126485972421, "grad_norm": 0.16418004035949707, "learning_rate": 4.672594474436329e-05, "loss": 0.0784, "step": 8393 }, { "epoch": 15.98002853067047, "grad_norm": 0.19421927630901337, "learning_rate": 4.671959352175294e-05, "loss": 0.0501, "step": 8394 }, { "epoch": 15.981930575368521, "grad_norm": 0.2116997241973877, "learning_rate": 4.671324229914259e-05, "loss": 0.0704, "step": 8395 }, { "epoch": 15.983832620066572, "grad_norm": 0.12056510895490646, "learning_rate": 4.670689107653223e-05, "loss": 0.0904, "step": 8396 }, { "epoch": 15.985734664764623, "grad_norm": 0.15489526093006134, "learning_rate": 4.6700539853921884e-05, "loss": 0.0567, "step": 8397 }, { "epoch": 15.987636709462672, "grad_norm": 0.25165241956710815, "learning_rate": 4.669418863131153e-05, "loss": 0.0539, "step": 8398 }, { "epoch": 15.989538754160723, "grad_norm": 0.16446714103221893, "learning_rate": 4.668783740870118e-05, "loss": 0.068, "step": 8399 }, { "epoch": 15.991440798858774, "grad_norm": 0.18781155347824097, "learning_rate": 4.668148618609082e-05, "loss": 0.0507, "step": 8400 }, { "epoch": 15.993342843556823, "grad_norm": 0.14649319648742676, "learning_rate": 4.667513496348047e-05, "loss": 0.0435, "step": 8401 }, { "epoch": 15.995244888254874, "grad_norm": 0.11849051713943481, "learning_rate": 4.666878374087012e-05, "loss": 0.0543, "step": 8402 }, { "epoch": 15.997146932952925, "grad_norm": 0.21823644638061523, "learning_rate": 4.666243251825977e-05, "loss": 0.0669, "step": 8403 }, { "epoch": 15.999048977650975, "grad_norm": 0.15036791563034058, "learning_rate": 4.6656081295649413e-05, "loss": 0.0434, "step": 8404 }, { "epoch": 16.000951022349025, "grad_norm": 0.1519092172384262, "learning_rate": 4.664973007303906e-05, "loss": 0.0591, "step": 8405 }, { "epoch": 16.002853067047077, "grad_norm": 0.20130537450313568, "learning_rate": 4.664337885042871e-05, "loss": 0.0477, "step": 8406 }, { "epoch": 16.004755111745126, "grad_norm": 0.1303798258304596, "learning_rate": 4.6637027627818355e-05, "loss": 0.0667, "step": 8407 }, { "epoch": 16.006657156443175, "grad_norm": 0.051126472651958466, "learning_rate": 4.663067640520801e-05, "loss": 0.0407, "step": 8408 }, { "epoch": 16.008559201141228, "grad_norm": 0.10072528570890427, "learning_rate": 4.662432518259765e-05, "loss": 0.0503, "step": 8409 }, { "epoch": 16.010461245839277, "grad_norm": 0.11757353693246841, "learning_rate": 4.66179739599873e-05, "loss": 0.0611, "step": 8410 }, { "epoch": 16.012363290537326, "grad_norm": 0.15045824646949768, "learning_rate": 4.661162273737695e-05, "loss": 0.051, "step": 8411 }, { "epoch": 16.01426533523538, "grad_norm": 0.1323220580816269, "learning_rate": 4.6605271514766594e-05, "loss": 0.0475, "step": 8412 }, { "epoch": 16.016167379933428, "grad_norm": 0.11407540738582611, "learning_rate": 4.659892029215624e-05, "loss": 0.064, "step": 8413 }, { "epoch": 16.018069424631477, "grad_norm": 0.07627911120653152, "learning_rate": 4.659256906954589e-05, "loss": 0.0529, "step": 8414 }, { "epoch": 16.01997146932953, "grad_norm": 0.08195697516202927, "learning_rate": 4.6586217846935536e-05, "loss": 0.0388, "step": 8415 }, { "epoch": 16.02187351402758, "grad_norm": 0.0576738566160202, "learning_rate": 4.657986662432519e-05, "loss": 0.0462, "step": 8416 }, { "epoch": 16.02377555872563, "grad_norm": 0.09754709899425507, "learning_rate": 4.657351540171483e-05, "loss": 0.0645, "step": 8417 }, { "epoch": 16.02567760342368, "grad_norm": 0.09327246248722076, "learning_rate": 4.656716417910448e-05, "loss": 0.0589, "step": 8418 }, { "epoch": 16.02757964812173, "grad_norm": 0.1296929121017456, "learning_rate": 4.656081295649412e-05, "loss": 0.0468, "step": 8419 }, { "epoch": 16.029481692819783, "grad_norm": 0.09633622318506241, "learning_rate": 4.6554461733883775e-05, "loss": 0.0647, "step": 8420 }, { "epoch": 16.03138373751783, "grad_norm": 0.2070033997297287, "learning_rate": 4.654811051127343e-05, "loss": 0.0568, "step": 8421 }, { "epoch": 16.03328578221588, "grad_norm": 0.17608699202537537, "learning_rate": 4.6541759288663065e-05, "loss": 0.0567, "step": 8422 }, { "epoch": 16.035187826913933, "grad_norm": 0.11420154571533203, "learning_rate": 4.653540806605272e-05, "loss": 0.0836, "step": 8423 }, { "epoch": 16.037089871611983, "grad_norm": 0.20214304327964783, "learning_rate": 4.652905684344236e-05, "loss": 0.0551, "step": 8424 }, { "epoch": 16.03899191631003, "grad_norm": 0.3227376341819763, "learning_rate": 4.6522705620832014e-05, "loss": 0.0788, "step": 8425 }, { "epoch": 16.040893961008084, "grad_norm": 0.185341477394104, "learning_rate": 4.651635439822166e-05, "loss": 0.0539, "step": 8426 }, { "epoch": 16.042796005706133, "grad_norm": 0.09545671194791794, "learning_rate": 4.6510003175611304e-05, "loss": 0.0532, "step": 8427 }, { "epoch": 16.044698050404186, "grad_norm": 0.11102737486362457, "learning_rate": 4.6503651953000956e-05, "loss": 0.0509, "step": 8428 }, { "epoch": 16.046600095102235, "grad_norm": 0.1727059930562973, "learning_rate": 4.64973007303906e-05, "loss": 0.0613, "step": 8429 }, { "epoch": 16.048502139800284, "grad_norm": 0.06402069330215454, "learning_rate": 4.649094950778025e-05, "loss": 0.0619, "step": 8430 }, { "epoch": 16.050404184498337, "grad_norm": 0.16342858970165253, "learning_rate": 4.64845982851699e-05, "loss": 0.063, "step": 8431 }, { "epoch": 16.052306229196386, "grad_norm": 0.15879632532596588, "learning_rate": 4.647824706255954e-05, "loss": 0.0719, "step": 8432 }, { "epoch": 16.054208273894435, "grad_norm": 0.07986364513635635, "learning_rate": 4.6471895839949195e-05, "loss": 0.056, "step": 8433 }, { "epoch": 16.056110318592488, "grad_norm": 0.17455364763736725, "learning_rate": 4.646554461733884e-05, "loss": 0.0622, "step": 8434 }, { "epoch": 16.058012363290537, "grad_norm": 0.1904873251914978, "learning_rate": 4.645919339472849e-05, "loss": 0.0711, "step": 8435 }, { "epoch": 16.059914407988586, "grad_norm": 0.2067917138338089, "learning_rate": 4.645284217211813e-05, "loss": 0.0474, "step": 8436 }, { "epoch": 16.06181645268664, "grad_norm": 0.10311882197856903, "learning_rate": 4.644649094950778e-05, "loss": 0.0616, "step": 8437 }, { "epoch": 16.063718497384688, "grad_norm": 0.20031601190567017, "learning_rate": 4.644013972689743e-05, "loss": 0.0613, "step": 8438 }, { "epoch": 16.06562054208274, "grad_norm": 0.1746343970298767, "learning_rate": 4.643378850428708e-05, "loss": 0.0796, "step": 8439 }, { "epoch": 16.06752258678079, "grad_norm": 0.16077488660812378, "learning_rate": 4.6427437281676724e-05, "loss": 0.0494, "step": 8440 }, { "epoch": 16.06942463147884, "grad_norm": 0.2355642169713974, "learning_rate": 4.642108605906637e-05, "loss": 0.0686, "step": 8441 }, { "epoch": 16.07132667617689, "grad_norm": 0.16697682440280914, "learning_rate": 4.641473483645602e-05, "loss": 0.0622, "step": 8442 }, { "epoch": 16.07322872087494, "grad_norm": 0.2681947350502014, "learning_rate": 4.6408383613845666e-05, "loss": 0.0696, "step": 8443 }, { "epoch": 16.07513076557299, "grad_norm": 0.0967094749212265, "learning_rate": 4.640203239123532e-05, "loss": 0.0476, "step": 8444 }, { "epoch": 16.077032810271042, "grad_norm": 0.2911648154258728, "learning_rate": 4.639568116862496e-05, "loss": 0.077, "step": 8445 }, { "epoch": 16.07893485496909, "grad_norm": 0.16672903299331665, "learning_rate": 4.638932994601461e-05, "loss": 0.0451, "step": 8446 }, { "epoch": 16.08083689966714, "grad_norm": 0.15733489394187927, "learning_rate": 4.638297872340426e-05, "loss": 0.0595, "step": 8447 }, { "epoch": 16.082738944365193, "grad_norm": 0.25597983598709106, "learning_rate": 4.6376627500793905e-05, "loss": 0.0717, "step": 8448 }, { "epoch": 16.084640989063242, "grad_norm": 0.13080979883670807, "learning_rate": 4.637027627818355e-05, "loss": 0.055, "step": 8449 }, { "epoch": 16.086543033761295, "grad_norm": 0.23891276121139526, "learning_rate": 4.63639250555732e-05, "loss": 0.057, "step": 8450 }, { "epoch": 16.088445078459344, "grad_norm": 0.13582190871238708, "learning_rate": 4.635757383296285e-05, "loss": 0.0529, "step": 8451 }, { "epoch": 16.090347123157393, "grad_norm": 0.22233571112155914, "learning_rate": 4.63512226103525e-05, "loss": 0.0618, "step": 8452 }, { "epoch": 16.092249167855446, "grad_norm": 0.186944842338562, "learning_rate": 4.6344871387742144e-05, "loss": 0.0605, "step": 8453 }, { "epoch": 16.094151212553495, "grad_norm": 0.13477811217308044, "learning_rate": 4.633852016513179e-05, "loss": 0.0682, "step": 8454 }, { "epoch": 16.096053257251544, "grad_norm": 0.15073803067207336, "learning_rate": 4.6332168942521434e-05, "loss": 0.0628, "step": 8455 }, { "epoch": 16.097955301949597, "grad_norm": 0.25887051224708557, "learning_rate": 4.6325817719911086e-05, "loss": 0.062, "step": 8456 }, { "epoch": 16.099857346647646, "grad_norm": 0.07140891999006271, "learning_rate": 4.631946649730074e-05, "loss": 0.0562, "step": 8457 }, { "epoch": 16.101759391345695, "grad_norm": 0.09805332124233246, "learning_rate": 4.6313115274690376e-05, "loss": 0.0426, "step": 8458 }, { "epoch": 16.103661436043748, "grad_norm": 0.1520451009273529, "learning_rate": 4.630676405208003e-05, "loss": 0.089, "step": 8459 }, { "epoch": 16.105563480741797, "grad_norm": 0.1416059285402298, "learning_rate": 4.630041282946967e-05, "loss": 0.0585, "step": 8460 }, { "epoch": 16.10746552543985, "grad_norm": 0.2078314870595932, "learning_rate": 4.6294061606859324e-05, "loss": 0.0668, "step": 8461 }, { "epoch": 16.1093675701379, "grad_norm": 0.1390911340713501, "learning_rate": 4.628771038424897e-05, "loss": 0.0512, "step": 8462 }, { "epoch": 16.111269614835948, "grad_norm": 0.16498126089572906, "learning_rate": 4.6281359161638615e-05, "loss": 0.0442, "step": 8463 }, { "epoch": 16.113171659534, "grad_norm": 0.15705092251300812, "learning_rate": 4.6275007939028266e-05, "loss": 0.0542, "step": 8464 }, { "epoch": 16.11507370423205, "grad_norm": 0.12232176959514618, "learning_rate": 4.626865671641791e-05, "loss": 0.0575, "step": 8465 }, { "epoch": 16.1169757489301, "grad_norm": 0.10896648466587067, "learning_rate": 4.626230549380756e-05, "loss": 0.0608, "step": 8466 }, { "epoch": 16.11887779362815, "grad_norm": 0.09567489475011826, "learning_rate": 4.62559542711972e-05, "loss": 0.0568, "step": 8467 }, { "epoch": 16.1207798383262, "grad_norm": 0.19636806845664978, "learning_rate": 4.6249603048586853e-05, "loss": 0.0514, "step": 8468 }, { "epoch": 16.12268188302425, "grad_norm": 0.18234984576702118, "learning_rate": 4.6243251825976505e-05, "loss": 0.0504, "step": 8469 }, { "epoch": 16.124583927722302, "grad_norm": 0.15296286344528198, "learning_rate": 4.623690060336615e-05, "loss": 0.0376, "step": 8470 }, { "epoch": 16.12648597242035, "grad_norm": 0.11294987797737122, "learning_rate": 4.62305493807558e-05, "loss": 0.0489, "step": 8471 }, { "epoch": 16.128388017118404, "grad_norm": 0.13020162284374237, "learning_rate": 4.622419815814544e-05, "loss": 0.0525, "step": 8472 }, { "epoch": 16.130290061816453, "grad_norm": 0.13996310532093048, "learning_rate": 4.621784693553509e-05, "loss": 0.0467, "step": 8473 }, { "epoch": 16.132192106514502, "grad_norm": 0.10891363024711609, "learning_rate": 4.621149571292474e-05, "loss": 0.0486, "step": 8474 }, { "epoch": 16.134094151212555, "grad_norm": 0.17132295668125153, "learning_rate": 4.620514449031439e-05, "loss": 0.0704, "step": 8475 }, { "epoch": 16.135996195910604, "grad_norm": 0.18272633850574493, "learning_rate": 4.6198793267704034e-05, "loss": 0.0472, "step": 8476 }, { "epoch": 16.137898240608653, "grad_norm": 0.1485610157251358, "learning_rate": 4.619244204509368e-05, "loss": 0.0559, "step": 8477 }, { "epoch": 16.139800285306706, "grad_norm": 0.13875699043273926, "learning_rate": 4.618609082248333e-05, "loss": 0.0592, "step": 8478 }, { "epoch": 16.141702330004755, "grad_norm": 0.1410178542137146, "learning_rate": 4.6179739599872976e-05, "loss": 0.0647, "step": 8479 }, { "epoch": 16.143604374702804, "grad_norm": 0.230701744556427, "learning_rate": 4.617338837726263e-05, "loss": 0.0528, "step": 8480 }, { "epoch": 16.145506419400856, "grad_norm": 0.23617468774318695, "learning_rate": 4.616703715465227e-05, "loss": 0.0594, "step": 8481 }, { "epoch": 16.147408464098906, "grad_norm": 0.18603870272636414, "learning_rate": 4.616068593204192e-05, "loss": 0.0433, "step": 8482 }, { "epoch": 16.149310508796958, "grad_norm": 0.18782128393650055, "learning_rate": 4.615433470943157e-05, "loss": 0.0566, "step": 8483 }, { "epoch": 16.151212553495007, "grad_norm": 0.25286000967025757, "learning_rate": 4.6147983486821215e-05, "loss": 0.0785, "step": 8484 }, { "epoch": 16.153114598193056, "grad_norm": 0.0904570072889328, "learning_rate": 4.614163226421086e-05, "loss": 0.0508, "step": 8485 }, { "epoch": 16.15501664289111, "grad_norm": 0.2545378506183624, "learning_rate": 4.613528104160051e-05, "loss": 0.0783, "step": 8486 }, { "epoch": 16.156918687589158, "grad_norm": 0.191772922873497, "learning_rate": 4.612892981899016e-05, "loss": 0.0683, "step": 8487 }, { "epoch": 16.158820732287207, "grad_norm": 0.1491425633430481, "learning_rate": 4.612257859637981e-05, "loss": 0.0737, "step": 8488 }, { "epoch": 16.16072277698526, "grad_norm": 0.0845363661646843, "learning_rate": 4.6116227373769454e-05, "loss": 0.061, "step": 8489 }, { "epoch": 16.16262482168331, "grad_norm": 0.24539388716220856, "learning_rate": 4.61098761511591e-05, "loss": 0.0675, "step": 8490 }, { "epoch": 16.16452686638136, "grad_norm": 0.10510896891355515, "learning_rate": 4.6103524928548744e-05, "loss": 0.0387, "step": 8491 }, { "epoch": 16.16642891107941, "grad_norm": 0.13409045338630676, "learning_rate": 4.6097173705938396e-05, "loss": 0.05, "step": 8492 }, { "epoch": 16.16833095577746, "grad_norm": 0.14605560898780823, "learning_rate": 4.609082248332804e-05, "loss": 0.0747, "step": 8493 }, { "epoch": 16.170233000475513, "grad_norm": 0.10427720844745636, "learning_rate": 4.6084471260717686e-05, "loss": 0.0689, "step": 8494 }, { "epoch": 16.17213504517356, "grad_norm": 0.1358361840248108, "learning_rate": 4.607812003810734e-05, "loss": 0.053, "step": 8495 }, { "epoch": 16.17403708987161, "grad_norm": 0.1160115897655487, "learning_rate": 4.607176881549698e-05, "loss": 0.0422, "step": 8496 }, { "epoch": 16.175939134569663, "grad_norm": 0.11981600522994995, "learning_rate": 4.6065417592886635e-05, "loss": 0.0553, "step": 8497 }, { "epoch": 16.177841179267713, "grad_norm": 0.15992315113544464, "learning_rate": 4.605906637027628e-05, "loss": 0.053, "step": 8498 }, { "epoch": 16.17974322396576, "grad_norm": 0.14129142463207245, "learning_rate": 4.6052715147665925e-05, "loss": 0.0587, "step": 8499 }, { "epoch": 16.181645268663814, "grad_norm": 0.12155603617429733, "learning_rate": 4.604636392505558e-05, "loss": 0.0488, "step": 8500 }, { "epoch": 16.183547313361863, "grad_norm": 0.19479072093963623, "learning_rate": 4.604001270244522e-05, "loss": 0.0592, "step": 8501 }, { "epoch": 16.185449358059916, "grad_norm": 0.16191153228282928, "learning_rate": 4.6033661479834874e-05, "loss": 0.0874, "step": 8502 }, { "epoch": 16.187351402757965, "grad_norm": 0.2593137323856354, "learning_rate": 4.602731025722451e-05, "loss": 0.0732, "step": 8503 }, { "epoch": 16.189253447456014, "grad_norm": 0.07688070833683014, "learning_rate": 4.6020959034614164e-05, "loss": 0.0646, "step": 8504 }, { "epoch": 16.191155492154067, "grad_norm": 0.15450550615787506, "learning_rate": 4.6014607812003816e-05, "loss": 0.0603, "step": 8505 }, { "epoch": 16.193057536852116, "grad_norm": 0.31790322065353394, "learning_rate": 4.600825658939346e-05, "loss": 0.0793, "step": 8506 }, { "epoch": 16.194959581550165, "grad_norm": 0.12882187962532043, "learning_rate": 4.600190536678311e-05, "loss": 0.0524, "step": 8507 }, { "epoch": 16.196861626248218, "grad_norm": 0.25104114413261414, "learning_rate": 4.599555414417275e-05, "loss": 0.0679, "step": 8508 }, { "epoch": 16.198763670946267, "grad_norm": 0.13157489895820618, "learning_rate": 4.59892029215624e-05, "loss": 0.0562, "step": 8509 }, { "epoch": 16.200665715644316, "grad_norm": 0.1363213062286377, "learning_rate": 4.598285169895205e-05, "loss": 0.06, "step": 8510 }, { "epoch": 16.20256776034237, "grad_norm": 0.15622791647911072, "learning_rate": 4.59765004763417e-05, "loss": 0.0722, "step": 8511 }, { "epoch": 16.204469805040418, "grad_norm": 0.1738666146993637, "learning_rate": 4.5970149253731345e-05, "loss": 0.0496, "step": 8512 }, { "epoch": 16.20637184973847, "grad_norm": 0.1258222460746765, "learning_rate": 4.596379803112099e-05, "loss": 0.0528, "step": 8513 }, { "epoch": 16.20827389443652, "grad_norm": 0.1232779324054718, "learning_rate": 4.595744680851064e-05, "loss": 0.0535, "step": 8514 }, { "epoch": 16.21017593913457, "grad_norm": 0.1608402281999588, "learning_rate": 4.595109558590029e-05, "loss": 0.0465, "step": 8515 }, { "epoch": 16.21207798383262, "grad_norm": 0.16735756397247314, "learning_rate": 4.594474436328994e-05, "loss": 0.0594, "step": 8516 }, { "epoch": 16.21398002853067, "grad_norm": 0.18840819597244263, "learning_rate": 4.5938393140679584e-05, "loss": 0.0588, "step": 8517 }, { "epoch": 16.21588207322872, "grad_norm": 0.12657614052295685, "learning_rate": 4.593204191806923e-05, "loss": 0.0669, "step": 8518 }, { "epoch": 16.217784117926772, "grad_norm": 0.18073607981204987, "learning_rate": 4.592569069545888e-05, "loss": 0.0598, "step": 8519 }, { "epoch": 16.21968616262482, "grad_norm": 0.14543786644935608, "learning_rate": 4.5919339472848526e-05, "loss": 0.074, "step": 8520 }, { "epoch": 16.22158820732287, "grad_norm": 0.09334524720907211, "learning_rate": 4.591298825023817e-05, "loss": 0.055, "step": 8521 }, { "epoch": 16.223490252020923, "grad_norm": 0.13787609338760376, "learning_rate": 4.5906637027627816e-05, "loss": 0.0417, "step": 8522 }, { "epoch": 16.225392296718972, "grad_norm": 0.204119011759758, "learning_rate": 4.590028580501747e-05, "loss": 0.0559, "step": 8523 }, { "epoch": 16.227294341417025, "grad_norm": 0.12836776673793793, "learning_rate": 4.589393458240712e-05, "loss": 0.0465, "step": 8524 }, { "epoch": 16.229196386115074, "grad_norm": 0.12782390415668488, "learning_rate": 4.5887583359796764e-05, "loss": 0.0561, "step": 8525 }, { "epoch": 16.231098430813123, "grad_norm": 0.2859640419483185, "learning_rate": 4.588123213718641e-05, "loss": 0.058, "step": 8526 }, { "epoch": 16.233000475511176, "grad_norm": 0.11105810105800629, "learning_rate": 4.5874880914576055e-05, "loss": 0.0688, "step": 8527 }, { "epoch": 16.234902520209225, "grad_norm": 0.22457514703273773, "learning_rate": 4.5868529691965706e-05, "loss": 0.0796, "step": 8528 }, { "epoch": 16.236804564907274, "grad_norm": 0.09397421032190323, "learning_rate": 4.586217846935535e-05, "loss": 0.0595, "step": 8529 }, { "epoch": 16.238706609605327, "grad_norm": 0.28852370381355286, "learning_rate": 4.5855827246744997e-05, "loss": 0.0549, "step": 8530 }, { "epoch": 16.240608654303376, "grad_norm": 0.1166771650314331, "learning_rate": 4.584947602413465e-05, "loss": 0.0493, "step": 8531 }, { "epoch": 16.242510699001425, "grad_norm": 0.2033771574497223, "learning_rate": 4.5843124801524293e-05, "loss": 0.059, "step": 8532 }, { "epoch": 16.244412743699478, "grad_norm": 0.10996011644601822, "learning_rate": 4.5836773578913945e-05, "loss": 0.0445, "step": 8533 }, { "epoch": 16.246314788397527, "grad_norm": 0.11977210640907288, "learning_rate": 4.583042235630359e-05, "loss": 0.0556, "step": 8534 }, { "epoch": 16.24821683309558, "grad_norm": 0.2993720769882202, "learning_rate": 4.5824071133693235e-05, "loss": 0.0709, "step": 8535 }, { "epoch": 16.25011887779363, "grad_norm": 0.12694013118743896, "learning_rate": 4.581771991108289e-05, "loss": 0.0655, "step": 8536 }, { "epoch": 16.252020922491678, "grad_norm": 0.1645960509777069, "learning_rate": 4.581136868847253e-05, "loss": 0.0639, "step": 8537 }, { "epoch": 16.25392296718973, "grad_norm": 0.09638939052820206, "learning_rate": 4.5805017465862184e-05, "loss": 0.0475, "step": 8538 }, { "epoch": 16.25582501188778, "grad_norm": 0.20398004353046417, "learning_rate": 4.579866624325182e-05, "loss": 0.0381, "step": 8539 }, { "epoch": 16.25772705658583, "grad_norm": 0.20141464471817017, "learning_rate": 4.5792315020641474e-05, "loss": 0.0594, "step": 8540 }, { "epoch": 16.25962910128388, "grad_norm": 0.16405193507671356, "learning_rate": 4.5785963798031126e-05, "loss": 0.0421, "step": 8541 }, { "epoch": 16.26153114598193, "grad_norm": 0.29997482895851135, "learning_rate": 4.577961257542077e-05, "loss": 0.0876, "step": 8542 }, { "epoch": 16.26343319067998, "grad_norm": 0.06307131052017212, "learning_rate": 4.577326135281042e-05, "loss": 0.0348, "step": 8543 }, { "epoch": 16.265335235378032, "grad_norm": 0.1158008873462677, "learning_rate": 4.576691013020006e-05, "loss": 0.0291, "step": 8544 }, { "epoch": 16.26723728007608, "grad_norm": 0.3139667212963104, "learning_rate": 4.576055890758971e-05, "loss": 0.0596, "step": 8545 }, { "epoch": 16.269139324774134, "grad_norm": 0.3778131306171417, "learning_rate": 4.575420768497936e-05, "loss": 0.1135, "step": 8546 }, { "epoch": 16.271041369472183, "grad_norm": 0.12188387662172318, "learning_rate": 4.574785646236901e-05, "loss": 0.0578, "step": 8547 }, { "epoch": 16.272943414170232, "grad_norm": 0.2640130817890167, "learning_rate": 4.5741505239758655e-05, "loss": 0.0858, "step": 8548 }, { "epoch": 16.274845458868285, "grad_norm": 0.23133710026741028, "learning_rate": 4.57351540171483e-05, "loss": 0.0729, "step": 8549 }, { "epoch": 16.276747503566334, "grad_norm": 0.11715468764305115, "learning_rate": 4.572880279453795e-05, "loss": 0.0505, "step": 8550 }, { "epoch": 16.278649548264383, "grad_norm": 0.23677362501621246, "learning_rate": 4.57224515719276e-05, "loss": 0.0601, "step": 8551 }, { "epoch": 16.280551592962436, "grad_norm": 0.18508802354335785, "learning_rate": 4.571610034931725e-05, "loss": 0.0696, "step": 8552 }, { "epoch": 16.282453637660485, "grad_norm": 0.0752667635679245, "learning_rate": 4.5709749126706894e-05, "loss": 0.0468, "step": 8553 }, { "epoch": 16.284355682358534, "grad_norm": 0.13637211918830872, "learning_rate": 4.570339790409654e-05, "loss": 0.0423, "step": 8554 }, { "epoch": 16.286257727056586, "grad_norm": 0.11643306910991669, "learning_rate": 4.569704668148619e-05, "loss": 0.0591, "step": 8555 }, { "epoch": 16.288159771754636, "grad_norm": 0.1637072116136551, "learning_rate": 4.5690695458875836e-05, "loss": 0.0689, "step": 8556 }, { "epoch": 16.29006181645269, "grad_norm": 0.14512047171592712, "learning_rate": 4.568434423626548e-05, "loss": 0.0569, "step": 8557 }, { "epoch": 16.291963861150737, "grad_norm": 0.18429428339004517, "learning_rate": 4.5677993013655126e-05, "loss": 0.0767, "step": 8558 }, { "epoch": 16.293865905848786, "grad_norm": 0.0927993431687355, "learning_rate": 4.567164179104478e-05, "loss": 0.0353, "step": 8559 }, { "epoch": 16.29576795054684, "grad_norm": 0.08778216689825058, "learning_rate": 4.566529056843443e-05, "loss": 0.0479, "step": 8560 }, { "epoch": 16.29766999524489, "grad_norm": 0.09512235969305038, "learning_rate": 4.5658939345824075e-05, "loss": 0.0576, "step": 8561 }, { "epoch": 16.299572039942937, "grad_norm": 0.20626294612884521, "learning_rate": 4.565258812321372e-05, "loss": 0.0534, "step": 8562 }, { "epoch": 16.30147408464099, "grad_norm": 0.21326245367527008, "learning_rate": 4.5646236900603365e-05, "loss": 0.0605, "step": 8563 }, { "epoch": 16.30337612933904, "grad_norm": 0.17919912934303284, "learning_rate": 4.563988567799302e-05, "loss": 0.0605, "step": 8564 }, { "epoch": 16.30527817403709, "grad_norm": 0.16517671942710876, "learning_rate": 4.563353445538266e-05, "loss": 0.0455, "step": 8565 }, { "epoch": 16.30718021873514, "grad_norm": 0.1184915080666542, "learning_rate": 4.562718323277231e-05, "loss": 0.0574, "step": 8566 }, { "epoch": 16.30908226343319, "grad_norm": 0.065870501101017, "learning_rate": 4.562083201016196e-05, "loss": 0.0599, "step": 8567 }, { "epoch": 16.310984308131243, "grad_norm": 0.17375320196151733, "learning_rate": 4.5614480787551604e-05, "loss": 0.0594, "step": 8568 }, { "epoch": 16.312886352829292, "grad_norm": 0.18376989662647247, "learning_rate": 4.5608129564941256e-05, "loss": 0.0688, "step": 8569 }, { "epoch": 16.31478839752734, "grad_norm": 0.12935777008533478, "learning_rate": 4.56017783423309e-05, "loss": 0.0479, "step": 8570 }, { "epoch": 16.316690442225394, "grad_norm": 0.19566315412521362, "learning_rate": 4.5595427119720546e-05, "loss": 0.0463, "step": 8571 }, { "epoch": 16.318592486923443, "grad_norm": 0.10081527382135391, "learning_rate": 4.55890758971102e-05, "loss": 0.0389, "step": 8572 }, { "epoch": 16.32049453162149, "grad_norm": 0.11026335507631302, "learning_rate": 4.558272467449984e-05, "loss": 0.0523, "step": 8573 }, { "epoch": 16.322396576319544, "grad_norm": 0.1704561710357666, "learning_rate": 4.5576373451889495e-05, "loss": 0.0661, "step": 8574 }, { "epoch": 16.324298621017594, "grad_norm": 0.0993368849158287, "learning_rate": 4.557002222927913e-05, "loss": 0.059, "step": 8575 }, { "epoch": 16.326200665715643, "grad_norm": 0.2205575853586197, "learning_rate": 4.5563671006668785e-05, "loss": 0.0799, "step": 8576 }, { "epoch": 16.328102710413695, "grad_norm": 0.12398331612348557, "learning_rate": 4.555731978405843e-05, "loss": 0.0684, "step": 8577 }, { "epoch": 16.330004755111744, "grad_norm": 0.15090776979923248, "learning_rate": 4.555096856144808e-05, "loss": 0.0588, "step": 8578 }, { "epoch": 16.331906799809797, "grad_norm": 0.2645479142665863, "learning_rate": 4.5544617338837734e-05, "loss": 0.0968, "step": 8579 }, { "epoch": 16.333808844507846, "grad_norm": 0.2185746282339096, "learning_rate": 4.553826611622737e-05, "loss": 0.0645, "step": 8580 }, { "epoch": 16.335710889205895, "grad_norm": 0.09869100898504257, "learning_rate": 4.5531914893617024e-05, "loss": 0.064, "step": 8581 }, { "epoch": 16.337612933903948, "grad_norm": 0.16115908324718475, "learning_rate": 4.552556367100667e-05, "loss": 0.0526, "step": 8582 }, { "epoch": 16.339514978601997, "grad_norm": 0.18371903896331787, "learning_rate": 4.551921244839632e-05, "loss": 0.0619, "step": 8583 }, { "epoch": 16.341417023300046, "grad_norm": 0.16014857590198517, "learning_rate": 4.5512861225785966e-05, "loss": 0.0611, "step": 8584 }, { "epoch": 16.3433190679981, "grad_norm": 0.19126653671264648, "learning_rate": 4.550651000317561e-05, "loss": 0.0573, "step": 8585 }, { "epoch": 16.345221112696148, "grad_norm": 0.11099161207675934, "learning_rate": 4.550015878056526e-05, "loss": 0.0698, "step": 8586 }, { "epoch": 16.347123157394197, "grad_norm": 0.13271382451057434, "learning_rate": 4.549380755795491e-05, "loss": 0.0433, "step": 8587 }, { "epoch": 16.34902520209225, "grad_norm": 0.157912015914917, "learning_rate": 4.548745633534456e-05, "loss": 0.0679, "step": 8588 }, { "epoch": 16.3509272467903, "grad_norm": 0.15022407472133636, "learning_rate": 4.5481105112734204e-05, "loss": 0.0509, "step": 8589 }, { "epoch": 16.35282929148835, "grad_norm": 0.1722324937582016, "learning_rate": 4.547475389012385e-05, "loss": 0.042, "step": 8590 }, { "epoch": 16.3547313361864, "grad_norm": 0.11643511801958084, "learning_rate": 4.54684026675135e-05, "loss": 0.0602, "step": 8591 }, { "epoch": 16.35663338088445, "grad_norm": 0.11521922796964645, "learning_rate": 4.5462051444903146e-05, "loss": 0.0466, "step": 8592 }, { "epoch": 16.358535425582502, "grad_norm": 0.17267794907093048, "learning_rate": 4.545570022229279e-05, "loss": 0.0437, "step": 8593 }, { "epoch": 16.36043747028055, "grad_norm": 0.20885154604911804, "learning_rate": 4.5449348999682437e-05, "loss": 0.0563, "step": 8594 }, { "epoch": 16.3623395149786, "grad_norm": 0.21735620498657227, "learning_rate": 4.544299777707209e-05, "loss": 0.0589, "step": 8595 }, { "epoch": 16.364241559676653, "grad_norm": 0.11824718117713928, "learning_rate": 4.543664655446174e-05, "loss": 0.0503, "step": 8596 }, { "epoch": 16.366143604374702, "grad_norm": 0.16579841077327728, "learning_rate": 4.5430295331851385e-05, "loss": 0.0692, "step": 8597 }, { "epoch": 16.36804564907275, "grad_norm": 0.11211599409580231, "learning_rate": 4.542394410924103e-05, "loss": 0.075, "step": 8598 }, { "epoch": 16.369947693770804, "grad_norm": 0.07448209822177887, "learning_rate": 4.5417592886630675e-05, "loss": 0.0675, "step": 8599 }, { "epoch": 16.371849738468853, "grad_norm": 0.10178573429584503, "learning_rate": 4.541124166402033e-05, "loss": 0.0556, "step": 8600 }, { "epoch": 16.373751783166906, "grad_norm": 0.2198735624551773, "learning_rate": 4.540489044140997e-05, "loss": 0.0669, "step": 8601 }, { "epoch": 16.375653827864955, "grad_norm": 0.20772859454154968, "learning_rate": 4.539853921879962e-05, "loss": 0.056, "step": 8602 }, { "epoch": 16.377555872563004, "grad_norm": 0.23242707550525665, "learning_rate": 4.539218799618927e-05, "loss": 0.0556, "step": 8603 }, { "epoch": 16.379457917261057, "grad_norm": 0.08447839319705963, "learning_rate": 4.5385836773578914e-05, "loss": 0.0538, "step": 8604 }, { "epoch": 16.381359961959106, "grad_norm": 0.15451155602931976, "learning_rate": 4.5379485550968566e-05, "loss": 0.0444, "step": 8605 }, { "epoch": 16.383262006657155, "grad_norm": 0.24637392163276672, "learning_rate": 4.537313432835821e-05, "loss": 0.0753, "step": 8606 }, { "epoch": 16.385164051355208, "grad_norm": 0.11653976887464523, "learning_rate": 4.5366783105747856e-05, "loss": 0.0495, "step": 8607 }, { "epoch": 16.387066096053257, "grad_norm": 0.35665005445480347, "learning_rate": 4.536043188313751e-05, "loss": 0.0734, "step": 8608 }, { "epoch": 16.388968140751306, "grad_norm": 0.09935169667005539, "learning_rate": 4.535408066052715e-05, "loss": 0.0521, "step": 8609 }, { "epoch": 16.39087018544936, "grad_norm": 0.14690108597278595, "learning_rate": 4.5347729437916805e-05, "loss": 0.0458, "step": 8610 }, { "epoch": 16.392772230147408, "grad_norm": 0.14574097096920013, "learning_rate": 4.534137821530644e-05, "loss": 0.0609, "step": 8611 }, { "epoch": 16.39467427484546, "grad_norm": 0.12584878504276276, "learning_rate": 4.5335026992696095e-05, "loss": 0.0818, "step": 8612 }, { "epoch": 16.39657631954351, "grad_norm": 0.17045927047729492, "learning_rate": 4.532867577008574e-05, "loss": 0.055, "step": 8613 }, { "epoch": 16.39847836424156, "grad_norm": 0.1100262850522995, "learning_rate": 4.532232454747539e-05, "loss": 0.0452, "step": 8614 }, { "epoch": 16.40038040893961, "grad_norm": 0.18738383054733276, "learning_rate": 4.5315973324865044e-05, "loss": 0.0556, "step": 8615 }, { "epoch": 16.40228245363766, "grad_norm": 0.15285401046276093, "learning_rate": 4.530962210225468e-05, "loss": 0.0576, "step": 8616 }, { "epoch": 16.40418449833571, "grad_norm": 0.268604576587677, "learning_rate": 4.5303270879644334e-05, "loss": 0.0649, "step": 8617 }, { "epoch": 16.406086543033762, "grad_norm": 0.11927369236946106, "learning_rate": 4.529691965703398e-05, "loss": 0.0777, "step": 8618 }, { "epoch": 16.40798858773181, "grad_norm": 0.2090739905834198, "learning_rate": 4.529056843442363e-05, "loss": 0.0776, "step": 8619 }, { "epoch": 16.409890632429864, "grad_norm": 0.1961270570755005, "learning_rate": 4.5284217211813276e-05, "loss": 0.0716, "step": 8620 }, { "epoch": 16.411792677127913, "grad_norm": 0.18359386920928955, "learning_rate": 4.527786598920292e-05, "loss": 0.0573, "step": 8621 }, { "epoch": 16.413694721825962, "grad_norm": 0.20374822616577148, "learning_rate": 4.527151476659257e-05, "loss": 0.067, "step": 8622 }, { "epoch": 16.415596766524015, "grad_norm": 0.054867297410964966, "learning_rate": 4.526516354398222e-05, "loss": 0.0575, "step": 8623 }, { "epoch": 16.417498811222064, "grad_norm": 0.14874067902565002, "learning_rate": 4.525881232137187e-05, "loss": 0.0466, "step": 8624 }, { "epoch": 16.419400855920113, "grad_norm": 0.15803728997707367, "learning_rate": 4.5252461098761515e-05, "loss": 0.0604, "step": 8625 }, { "epoch": 16.421302900618166, "grad_norm": 0.08036740124225616, "learning_rate": 4.524610987615116e-05, "loss": 0.031, "step": 8626 }, { "epoch": 16.423204945316215, "grad_norm": 0.2112816870212555, "learning_rate": 4.523975865354081e-05, "loss": 0.0491, "step": 8627 }, { "epoch": 16.425106990014264, "grad_norm": 0.13649682700634003, "learning_rate": 4.523340743093046e-05, "loss": 0.0646, "step": 8628 }, { "epoch": 16.427009034712317, "grad_norm": 0.1682991087436676, "learning_rate": 4.52270562083201e-05, "loss": 0.0595, "step": 8629 }, { "epoch": 16.428911079410366, "grad_norm": 0.15135952830314636, "learning_rate": 4.522070498570975e-05, "loss": 0.0567, "step": 8630 }, { "epoch": 16.43081312410842, "grad_norm": 0.25466933846473694, "learning_rate": 4.52143537630994e-05, "loss": 0.0773, "step": 8631 }, { "epoch": 16.432715168806467, "grad_norm": 0.18868578970432281, "learning_rate": 4.5208002540489044e-05, "loss": 0.0516, "step": 8632 }, { "epoch": 16.434617213504517, "grad_norm": 0.09199167042970657, "learning_rate": 4.5201651317878696e-05, "loss": 0.0461, "step": 8633 }, { "epoch": 16.43651925820257, "grad_norm": 0.09501266479492188, "learning_rate": 4.519530009526834e-05, "loss": 0.0695, "step": 8634 }, { "epoch": 16.43842130290062, "grad_norm": 0.08783094584941864, "learning_rate": 4.5188948872657986e-05, "loss": 0.0584, "step": 8635 }, { "epoch": 16.440323347598667, "grad_norm": 0.18721364438533783, "learning_rate": 4.518259765004764e-05, "loss": 0.0604, "step": 8636 }, { "epoch": 16.44222539229672, "grad_norm": 0.18143963813781738, "learning_rate": 4.517624642743728e-05, "loss": 0.0856, "step": 8637 }, { "epoch": 16.44412743699477, "grad_norm": 0.1065838411450386, "learning_rate": 4.516989520482693e-05, "loss": 0.0616, "step": 8638 }, { "epoch": 16.44602948169282, "grad_norm": 0.15803262591362, "learning_rate": 4.516354398221658e-05, "loss": 0.0434, "step": 8639 }, { "epoch": 16.44793152639087, "grad_norm": 0.13359306752681732, "learning_rate": 4.5157192759606225e-05, "loss": 0.0565, "step": 8640 }, { "epoch": 16.44983357108892, "grad_norm": 0.09912649542093277, "learning_rate": 4.515084153699588e-05, "loss": 0.0541, "step": 8641 }, { "epoch": 16.451735615786973, "grad_norm": 0.13819362223148346, "learning_rate": 4.514449031438552e-05, "loss": 0.0735, "step": 8642 }, { "epoch": 16.453637660485022, "grad_norm": 0.1289387047290802, "learning_rate": 4.513813909177517e-05, "loss": 0.0577, "step": 8643 }, { "epoch": 16.45553970518307, "grad_norm": 0.16566374897956848, "learning_rate": 4.513178786916482e-05, "loss": 0.0636, "step": 8644 }, { "epoch": 16.457441749881124, "grad_norm": 0.29767850041389465, "learning_rate": 4.5125436646554464e-05, "loss": 0.064, "step": 8645 }, { "epoch": 16.459343794579173, "grad_norm": 0.07450270652770996, "learning_rate": 4.5119085423944116e-05, "loss": 0.054, "step": 8646 }, { "epoch": 16.461245839277222, "grad_norm": 0.18993811309337616, "learning_rate": 4.5112734201333754e-05, "loss": 0.0651, "step": 8647 }, { "epoch": 16.463147883975275, "grad_norm": 0.2176159918308258, "learning_rate": 4.5106382978723406e-05, "loss": 0.0601, "step": 8648 }, { "epoch": 16.465049928673324, "grad_norm": 0.40817978978157043, "learning_rate": 4.510003175611305e-05, "loss": 0.0944, "step": 8649 }, { "epoch": 16.466951973371373, "grad_norm": 0.13390858471393585, "learning_rate": 4.50936805335027e-05, "loss": 0.0429, "step": 8650 }, { "epoch": 16.468854018069425, "grad_norm": 0.166989266872406, "learning_rate": 4.5087329310892354e-05, "loss": 0.0493, "step": 8651 }, { "epoch": 16.470756062767475, "grad_norm": 0.24092860519886017, "learning_rate": 4.508097808828199e-05, "loss": 0.0681, "step": 8652 }, { "epoch": 16.472658107465527, "grad_norm": 0.11901987344026566, "learning_rate": 4.5074626865671645e-05, "loss": 0.0632, "step": 8653 }, { "epoch": 16.474560152163576, "grad_norm": 0.14405383169651031, "learning_rate": 4.506827564306129e-05, "loss": 0.0666, "step": 8654 }, { "epoch": 16.476462196861625, "grad_norm": 0.07029570639133453, "learning_rate": 4.506192442045094e-05, "loss": 0.0563, "step": 8655 }, { "epoch": 16.478364241559678, "grad_norm": 0.14019885659217834, "learning_rate": 4.5055573197840586e-05, "loss": 0.0518, "step": 8656 }, { "epoch": 16.480266286257727, "grad_norm": 0.12269244343042374, "learning_rate": 4.504922197523023e-05, "loss": 0.0483, "step": 8657 }, { "epoch": 16.482168330955776, "grad_norm": 0.29546546936035156, "learning_rate": 4.5042870752619883e-05, "loss": 0.0733, "step": 8658 }, { "epoch": 16.48407037565383, "grad_norm": 0.12874148786067963, "learning_rate": 4.503651953000953e-05, "loss": 0.066, "step": 8659 }, { "epoch": 16.485972420351878, "grad_norm": 0.22310982644557953, "learning_rate": 4.503016830739918e-05, "loss": 0.065, "step": 8660 }, { "epoch": 16.487874465049927, "grad_norm": 0.12721015512943268, "learning_rate": 4.502381708478882e-05, "loss": 0.0753, "step": 8661 }, { "epoch": 16.48977650974798, "grad_norm": 0.15060150623321533, "learning_rate": 4.501746586217847e-05, "loss": 0.0525, "step": 8662 }, { "epoch": 16.49167855444603, "grad_norm": 0.13536332547664642, "learning_rate": 4.501111463956812e-05, "loss": 0.0601, "step": 8663 }, { "epoch": 16.49358059914408, "grad_norm": 0.1088651642203331, "learning_rate": 4.500476341695777e-05, "loss": 0.0357, "step": 8664 }, { "epoch": 16.49548264384213, "grad_norm": 0.17497827112674713, "learning_rate": 4.499841219434741e-05, "loss": 0.0709, "step": 8665 }, { "epoch": 16.49738468854018, "grad_norm": 0.18341687321662903, "learning_rate": 4.499206097173706e-05, "loss": 0.0563, "step": 8666 }, { "epoch": 16.499286733238232, "grad_norm": 0.11489497125148773, "learning_rate": 4.498570974912671e-05, "loss": 0.0652, "step": 8667 }, { "epoch": 16.50118877793628, "grad_norm": 0.06444443762302399, "learning_rate": 4.4979358526516354e-05, "loss": 0.0341, "step": 8668 }, { "epoch": 16.50309082263433, "grad_norm": 0.13678719103336334, "learning_rate": 4.4973007303906006e-05, "loss": 0.0571, "step": 8669 }, { "epoch": 16.504992867332383, "grad_norm": 0.11370810866355896, "learning_rate": 4.496665608129565e-05, "loss": 0.0603, "step": 8670 }, { "epoch": 16.506894912030432, "grad_norm": 0.1342094987630844, "learning_rate": 4.4960304858685296e-05, "loss": 0.0576, "step": 8671 }, { "epoch": 16.50879695672848, "grad_norm": 0.06139456853270531, "learning_rate": 4.495395363607495e-05, "loss": 0.0606, "step": 8672 }, { "epoch": 16.510699001426534, "grad_norm": 0.1667710840702057, "learning_rate": 4.494760241346459e-05, "loss": 0.0855, "step": 8673 }, { "epoch": 16.512601046124583, "grad_norm": 0.1714620143175125, "learning_rate": 4.494125119085424e-05, "loss": 0.0632, "step": 8674 }, { "epoch": 16.514503090822636, "grad_norm": 0.21141253411769867, "learning_rate": 4.493489996824389e-05, "loss": 0.0459, "step": 8675 }, { "epoch": 16.516405135520685, "grad_norm": 0.19178642332553864, "learning_rate": 4.4928548745633535e-05, "loss": 0.0448, "step": 8676 }, { "epoch": 16.518307180218734, "grad_norm": 0.1269862800836563, "learning_rate": 4.492219752302319e-05, "loss": 0.0574, "step": 8677 }, { "epoch": 16.520209224916787, "grad_norm": 0.31225162744522095, "learning_rate": 4.491584630041283e-05, "loss": 0.0822, "step": 8678 }, { "epoch": 16.522111269614836, "grad_norm": 0.250698983669281, "learning_rate": 4.490949507780248e-05, "loss": 0.0597, "step": 8679 }, { "epoch": 16.524013314312885, "grad_norm": 0.24116088449954987, "learning_rate": 4.490314385519213e-05, "loss": 0.0481, "step": 8680 }, { "epoch": 16.525915359010938, "grad_norm": 0.24615411460399628, "learning_rate": 4.4896792632581774e-05, "loss": 0.0613, "step": 8681 }, { "epoch": 16.527817403708987, "grad_norm": 0.1894298940896988, "learning_rate": 4.4890441409971426e-05, "loss": 0.0735, "step": 8682 }, { "epoch": 16.529719448407036, "grad_norm": 0.09118109941482544, "learning_rate": 4.4884090187361064e-05, "loss": 0.0493, "step": 8683 }, { "epoch": 16.53162149310509, "grad_norm": 0.2347007691860199, "learning_rate": 4.4877738964750716e-05, "loss": 0.0791, "step": 8684 }, { "epoch": 16.533523537803138, "grad_norm": 0.1817847639322281, "learning_rate": 4.487138774214036e-05, "loss": 0.0587, "step": 8685 }, { "epoch": 16.53542558250119, "grad_norm": 0.240115687251091, "learning_rate": 4.486503651953001e-05, "loss": 0.0673, "step": 8686 }, { "epoch": 16.53732762719924, "grad_norm": 0.17856906354427338, "learning_rate": 4.485868529691966e-05, "loss": 0.0505, "step": 8687 }, { "epoch": 16.53922967189729, "grad_norm": 0.15239039063453674, "learning_rate": 4.48523340743093e-05, "loss": 0.0472, "step": 8688 }, { "epoch": 16.54113171659534, "grad_norm": 0.11803076416254044, "learning_rate": 4.4845982851698955e-05, "loss": 0.0524, "step": 8689 }, { "epoch": 16.54303376129339, "grad_norm": 0.11645831167697906, "learning_rate": 4.48396316290886e-05, "loss": 0.0687, "step": 8690 }, { "epoch": 16.54493580599144, "grad_norm": 0.07123297452926636, "learning_rate": 4.483328040647825e-05, "loss": 0.0347, "step": 8691 }, { "epoch": 16.546837850689492, "grad_norm": 0.19448618590831757, "learning_rate": 4.48269291838679e-05, "loss": 0.071, "step": 8692 }, { "epoch": 16.54873989538754, "grad_norm": 0.13725486397743225, "learning_rate": 4.482057796125754e-05, "loss": 0.0619, "step": 8693 }, { "epoch": 16.55064194008559, "grad_norm": 0.17994223535060883, "learning_rate": 4.4814226738647194e-05, "loss": 0.0602, "step": 8694 }, { "epoch": 16.552543984783643, "grad_norm": 0.17840850353240967, "learning_rate": 4.480787551603684e-05, "loss": 0.0607, "step": 8695 }, { "epoch": 16.554446029481692, "grad_norm": 0.16476470232009888, "learning_rate": 4.480152429342649e-05, "loss": 0.0697, "step": 8696 }, { "epoch": 16.556348074179745, "grad_norm": 0.24432149529457092, "learning_rate": 4.479517307081613e-05, "loss": 0.0793, "step": 8697 }, { "epoch": 16.558250118877794, "grad_norm": 0.23164185881614685, "learning_rate": 4.478882184820578e-05, "loss": 0.066, "step": 8698 }, { "epoch": 16.560152163575843, "grad_norm": 0.11395987868309021, "learning_rate": 4.478247062559543e-05, "loss": 0.0299, "step": 8699 }, { "epoch": 16.562054208273896, "grad_norm": 0.11444274336099625, "learning_rate": 4.477611940298508e-05, "loss": 0.0535, "step": 8700 }, { "epoch": 16.563956252971945, "grad_norm": 0.2184406965970993, "learning_rate": 4.476976818037472e-05, "loss": 0.061, "step": 8701 }, { "epoch": 16.565858297669994, "grad_norm": 0.11942858248949051, "learning_rate": 4.476341695776437e-05, "loss": 0.0583, "step": 8702 }, { "epoch": 16.567760342368047, "grad_norm": 0.0957055389881134, "learning_rate": 4.475706573515402e-05, "loss": 0.0568, "step": 8703 }, { "epoch": 16.569662387066096, "grad_norm": 0.27850326895713806, "learning_rate": 4.4750714512543665e-05, "loss": 0.075, "step": 8704 }, { "epoch": 16.571564431764145, "grad_norm": 0.1830519586801529, "learning_rate": 4.474436328993332e-05, "loss": 0.0564, "step": 8705 }, { "epoch": 16.573466476462198, "grad_norm": 0.07941751182079315, "learning_rate": 4.473801206732296e-05, "loss": 0.0454, "step": 8706 }, { "epoch": 16.575368521160247, "grad_norm": 0.11852432787418365, "learning_rate": 4.473166084471261e-05, "loss": 0.0622, "step": 8707 }, { "epoch": 16.5772705658583, "grad_norm": 0.16959674656391144, "learning_rate": 4.472530962210226e-05, "loss": 0.0552, "step": 8708 }, { "epoch": 16.57917261055635, "grad_norm": 0.1568812131881714, "learning_rate": 4.4718958399491904e-05, "loss": 0.064, "step": 8709 }, { "epoch": 16.581074655254397, "grad_norm": 0.12478756904602051, "learning_rate": 4.471260717688155e-05, "loss": 0.0668, "step": 8710 }, { "epoch": 16.58297669995245, "grad_norm": 0.16158562898635864, "learning_rate": 4.47062559542712e-05, "loss": 0.0551, "step": 8711 }, { "epoch": 16.5848787446505, "grad_norm": 0.17838051915168762, "learning_rate": 4.4699904731660846e-05, "loss": 0.0702, "step": 8712 }, { "epoch": 16.58678078934855, "grad_norm": 0.1448523849248886, "learning_rate": 4.46935535090505e-05, "loss": 0.0665, "step": 8713 }, { "epoch": 16.5886828340466, "grad_norm": 0.21258023381233215, "learning_rate": 4.468720228644014e-05, "loss": 0.0672, "step": 8714 }, { "epoch": 16.59058487874465, "grad_norm": 0.13940654695034027, "learning_rate": 4.468085106382979e-05, "loss": 0.0519, "step": 8715 }, { "epoch": 16.5924869234427, "grad_norm": 0.09483486413955688, "learning_rate": 4.467449984121943e-05, "loss": 0.0645, "step": 8716 }, { "epoch": 16.594388968140752, "grad_norm": 0.16645945608615875, "learning_rate": 4.4668148618609085e-05, "loss": 0.0899, "step": 8717 }, { "epoch": 16.5962910128388, "grad_norm": 0.12020470947027206, "learning_rate": 4.4661797395998736e-05, "loss": 0.0552, "step": 8718 }, { "epoch": 16.598193057536854, "grad_norm": 0.11481377482414246, "learning_rate": 4.4655446173388375e-05, "loss": 0.0538, "step": 8719 }, { "epoch": 16.600095102234903, "grad_norm": 0.18040695786476135, "learning_rate": 4.4649094950778027e-05, "loss": 0.0635, "step": 8720 }, { "epoch": 16.601997146932952, "grad_norm": 0.08994857221841812, "learning_rate": 4.464274372816767e-05, "loss": 0.0567, "step": 8721 }, { "epoch": 16.603899191631005, "grad_norm": 0.15336550772190094, "learning_rate": 4.4636392505557323e-05, "loss": 0.0671, "step": 8722 }, { "epoch": 16.605801236329054, "grad_norm": 0.11266004294157028, "learning_rate": 4.463004128294697e-05, "loss": 0.062, "step": 8723 }, { "epoch": 16.607703281027103, "grad_norm": 0.10975999385118484, "learning_rate": 4.4623690060336614e-05, "loss": 0.0563, "step": 8724 }, { "epoch": 16.609605325725155, "grad_norm": 0.11690010130405426, "learning_rate": 4.4617338837726265e-05, "loss": 0.0757, "step": 8725 }, { "epoch": 16.611507370423205, "grad_norm": 0.19387713074684143, "learning_rate": 4.461098761511591e-05, "loss": 0.0476, "step": 8726 }, { "epoch": 16.613409415121254, "grad_norm": 0.11086385697126389, "learning_rate": 4.460463639250556e-05, "loss": 0.0356, "step": 8727 }, { "epoch": 16.615311459819306, "grad_norm": 0.2539948523044586, "learning_rate": 4.459828516989521e-05, "loss": 0.0421, "step": 8728 }, { "epoch": 16.617213504517355, "grad_norm": 0.17350983619689941, "learning_rate": 4.459193394728485e-05, "loss": 0.0575, "step": 8729 }, { "epoch": 16.619115549215408, "grad_norm": 0.11512711644172668, "learning_rate": 4.4585582724674504e-05, "loss": 0.0512, "step": 8730 }, { "epoch": 16.621017593913457, "grad_norm": 0.23560310900211334, "learning_rate": 4.457923150206415e-05, "loss": 0.0675, "step": 8731 }, { "epoch": 16.622919638611506, "grad_norm": 0.13212914764881134, "learning_rate": 4.45728802794538e-05, "loss": 0.0645, "step": 8732 }, { "epoch": 16.62482168330956, "grad_norm": 0.07807771861553192, "learning_rate": 4.456652905684344e-05, "loss": 0.0537, "step": 8733 }, { "epoch": 16.626723728007608, "grad_norm": 0.1315772980451584, "learning_rate": 4.456017783423309e-05, "loss": 0.0492, "step": 8734 }, { "epoch": 16.628625772705657, "grad_norm": 0.17557771503925323, "learning_rate": 4.455382661162274e-05, "loss": 0.0486, "step": 8735 }, { "epoch": 16.63052781740371, "grad_norm": 0.1737060397863388, "learning_rate": 4.454747538901239e-05, "loss": 0.0476, "step": 8736 }, { "epoch": 16.63242986210176, "grad_norm": 0.11324983835220337, "learning_rate": 4.454112416640203e-05, "loss": 0.0395, "step": 8737 }, { "epoch": 16.634331906799808, "grad_norm": 0.0777667984366417, "learning_rate": 4.453477294379168e-05, "loss": 0.0482, "step": 8738 }, { "epoch": 16.63623395149786, "grad_norm": 0.17832614481449127, "learning_rate": 4.452842172118133e-05, "loss": 0.0607, "step": 8739 }, { "epoch": 16.63813599619591, "grad_norm": 0.16999396681785583, "learning_rate": 4.4522070498570975e-05, "loss": 0.0543, "step": 8740 }, { "epoch": 16.640038040893963, "grad_norm": 0.06686799973249435, "learning_rate": 4.451571927596063e-05, "loss": 0.0596, "step": 8741 }, { "epoch": 16.64194008559201, "grad_norm": 0.15245421230793, "learning_rate": 4.450936805335027e-05, "loss": 0.0653, "step": 8742 }, { "epoch": 16.64384213029006, "grad_norm": 0.10946781933307648, "learning_rate": 4.450301683073992e-05, "loss": 0.0511, "step": 8743 }, { "epoch": 16.645744174988113, "grad_norm": 0.133508563041687, "learning_rate": 4.449666560812957e-05, "loss": 0.0538, "step": 8744 }, { "epoch": 16.647646219686163, "grad_norm": 0.1146819069981575, "learning_rate": 4.4490314385519214e-05, "loss": 0.1121, "step": 8745 }, { "epoch": 16.64954826438421, "grad_norm": 0.10533533990383148, "learning_rate": 4.448396316290886e-05, "loss": 0.0567, "step": 8746 }, { "epoch": 16.651450309082264, "grad_norm": 0.10547687858343124, "learning_rate": 4.447761194029851e-05, "loss": 0.066, "step": 8747 }, { "epoch": 16.653352353780313, "grad_norm": 0.18761403858661652, "learning_rate": 4.4471260717688156e-05, "loss": 0.0635, "step": 8748 }, { "epoch": 16.655254398478363, "grad_norm": 0.11144470423460007, "learning_rate": 4.446490949507781e-05, "loss": 0.0711, "step": 8749 }, { "epoch": 16.657156443176415, "grad_norm": 0.16650743782520294, "learning_rate": 4.445855827246745e-05, "loss": 0.0465, "step": 8750 }, { "epoch": 16.659058487874464, "grad_norm": 0.26453036069869995, "learning_rate": 4.44522070498571e-05, "loss": 0.0659, "step": 8751 }, { "epoch": 16.660960532572517, "grad_norm": 0.27274948358535767, "learning_rate": 4.444585582724674e-05, "loss": 0.0577, "step": 8752 }, { "epoch": 16.662862577270566, "grad_norm": 0.2193809300661087, "learning_rate": 4.4439504604636395e-05, "loss": 0.0724, "step": 8753 }, { "epoch": 16.664764621968615, "grad_norm": 0.15518517792224884, "learning_rate": 4.443315338202605e-05, "loss": 0.0615, "step": 8754 }, { "epoch": 16.666666666666668, "grad_norm": 0.09343301504850388, "learning_rate": 4.4426802159415685e-05, "loss": 0.0574, "step": 8755 }, { "epoch": 16.668568711364717, "grad_norm": 0.14349409937858582, "learning_rate": 4.442045093680534e-05, "loss": 0.0677, "step": 8756 }, { "epoch": 16.670470756062766, "grad_norm": 0.13636383414268494, "learning_rate": 4.441409971419498e-05, "loss": 0.0767, "step": 8757 }, { "epoch": 16.67237280076082, "grad_norm": 0.2968313992023468, "learning_rate": 4.4407748491584634e-05, "loss": 0.0605, "step": 8758 }, { "epoch": 16.674274845458868, "grad_norm": 0.18513229489326477, "learning_rate": 4.440139726897428e-05, "loss": 0.0675, "step": 8759 }, { "epoch": 16.676176890156917, "grad_norm": 0.14191585779190063, "learning_rate": 4.4395046046363924e-05, "loss": 0.0563, "step": 8760 }, { "epoch": 16.67807893485497, "grad_norm": 0.10100386291742325, "learning_rate": 4.4388694823753576e-05, "loss": 0.0682, "step": 8761 }, { "epoch": 16.67998097955302, "grad_norm": 0.16358493268489838, "learning_rate": 4.438234360114322e-05, "loss": 0.0518, "step": 8762 }, { "epoch": 16.68188302425107, "grad_norm": 0.19515903294086456, "learning_rate": 4.437599237853287e-05, "loss": 0.0757, "step": 8763 }, { "epoch": 16.68378506894912, "grad_norm": 0.09387758374214172, "learning_rate": 4.436964115592252e-05, "loss": 0.0704, "step": 8764 }, { "epoch": 16.68568711364717, "grad_norm": 0.17992985248565674, "learning_rate": 4.436328993331216e-05, "loss": 0.0634, "step": 8765 }, { "epoch": 16.687589158345222, "grad_norm": 0.0763043537735939, "learning_rate": 4.4356938710701815e-05, "loss": 0.0597, "step": 8766 }, { "epoch": 16.68949120304327, "grad_norm": 0.14316275715827942, "learning_rate": 4.435058748809146e-05, "loss": 0.0659, "step": 8767 }, { "epoch": 16.69139324774132, "grad_norm": 0.23489262163639069, "learning_rate": 4.434423626548111e-05, "loss": 0.0625, "step": 8768 }, { "epoch": 16.693295292439373, "grad_norm": 0.10423056036233902, "learning_rate": 4.433788504287075e-05, "loss": 0.0496, "step": 8769 }, { "epoch": 16.695197337137422, "grad_norm": 0.16861405968666077, "learning_rate": 4.43315338202604e-05, "loss": 0.065, "step": 8770 }, { "epoch": 16.69709938183547, "grad_norm": 0.1740831732749939, "learning_rate": 4.432518259765005e-05, "loss": 0.0471, "step": 8771 }, { "epoch": 16.699001426533524, "grad_norm": 0.13501209020614624, "learning_rate": 4.43188313750397e-05, "loss": 0.0473, "step": 8772 }, { "epoch": 16.700903471231573, "grad_norm": 0.2359786033630371, "learning_rate": 4.4312480152429344e-05, "loss": 0.071, "step": 8773 }, { "epoch": 16.702805515929626, "grad_norm": 0.11417681723833084, "learning_rate": 4.430612892981899e-05, "loss": 0.0724, "step": 8774 }, { "epoch": 16.704707560627675, "grad_norm": 0.07876874506473541, "learning_rate": 4.429977770720864e-05, "loss": 0.0417, "step": 8775 }, { "epoch": 16.706609605325724, "grad_norm": 0.17278744280338287, "learning_rate": 4.4293426484598286e-05, "loss": 0.0564, "step": 8776 }, { "epoch": 16.708511650023777, "grad_norm": 0.175479918718338, "learning_rate": 4.428707526198794e-05, "loss": 0.054, "step": 8777 }, { "epoch": 16.710413694721826, "grad_norm": 0.16353516280651093, "learning_rate": 4.428072403937758e-05, "loss": 0.0444, "step": 8778 }, { "epoch": 16.712315739419875, "grad_norm": 0.09007339924573898, "learning_rate": 4.427437281676723e-05, "loss": 0.0549, "step": 8779 }, { "epoch": 16.714217784117928, "grad_norm": 0.16510137915611267, "learning_rate": 4.426802159415688e-05, "loss": 0.0726, "step": 8780 }, { "epoch": 16.716119828815977, "grad_norm": 0.2640451490879059, "learning_rate": 4.4261670371546525e-05, "loss": 0.0745, "step": 8781 }, { "epoch": 16.718021873514026, "grad_norm": 0.15771816670894623, "learning_rate": 4.425531914893617e-05, "loss": 0.0525, "step": 8782 }, { "epoch": 16.71992391821208, "grad_norm": 0.11966612190008163, "learning_rate": 4.424896792632582e-05, "loss": 0.0511, "step": 8783 }, { "epoch": 16.721825962910128, "grad_norm": 0.2334906905889511, "learning_rate": 4.4242616703715467e-05, "loss": 0.0518, "step": 8784 }, { "epoch": 16.72372800760818, "grad_norm": 0.20914064347743988, "learning_rate": 4.423626548110512e-05, "loss": 0.0591, "step": 8785 }, { "epoch": 16.72563005230623, "grad_norm": 0.08859287202358246, "learning_rate": 4.4229914258494763e-05, "loss": 0.0704, "step": 8786 }, { "epoch": 16.72753209700428, "grad_norm": 0.10872913897037506, "learning_rate": 4.422356303588441e-05, "loss": 0.0541, "step": 8787 }, { "epoch": 16.72943414170233, "grad_norm": 0.2138252556324005, "learning_rate": 4.4217211813274054e-05, "loss": 0.0505, "step": 8788 }, { "epoch": 16.73133618640038, "grad_norm": 0.0785028338432312, "learning_rate": 4.4210860590663705e-05, "loss": 0.0542, "step": 8789 }, { "epoch": 16.73323823109843, "grad_norm": 0.20003677904605865, "learning_rate": 4.420450936805336e-05, "loss": 0.1044, "step": 8790 }, { "epoch": 16.735140275796482, "grad_norm": 0.0482601672410965, "learning_rate": 4.4198158145442996e-05, "loss": 0.0593, "step": 8791 }, { "epoch": 16.73704232049453, "grad_norm": 0.13578015565872192, "learning_rate": 4.419180692283265e-05, "loss": 0.0608, "step": 8792 }, { "epoch": 16.73894436519258, "grad_norm": 0.15712803602218628, "learning_rate": 4.418545570022229e-05, "loss": 0.0592, "step": 8793 }, { "epoch": 16.740846409890633, "grad_norm": 0.09721986204385757, "learning_rate": 4.4179104477611944e-05, "loss": 0.0435, "step": 8794 }, { "epoch": 16.742748454588682, "grad_norm": 0.13611485064029694, "learning_rate": 4.417275325500159e-05, "loss": 0.0517, "step": 8795 }, { "epoch": 16.744650499286735, "grad_norm": 0.11596285551786423, "learning_rate": 4.4166402032391234e-05, "loss": 0.0538, "step": 8796 }, { "epoch": 16.746552543984784, "grad_norm": 0.1389114111661911, "learning_rate": 4.4160050809780886e-05, "loss": 0.0677, "step": 8797 }, { "epoch": 16.748454588682833, "grad_norm": 0.13880036771297455, "learning_rate": 4.415369958717053e-05, "loss": 0.0557, "step": 8798 }, { "epoch": 16.750356633380886, "grad_norm": 0.17362141609191895, "learning_rate": 4.414734836456018e-05, "loss": 0.0782, "step": 8799 }, { "epoch": 16.752258678078935, "grad_norm": 0.21298301219940186, "learning_rate": 4.414099714194982e-05, "loss": 0.0537, "step": 8800 }, { "epoch": 16.754160722776984, "grad_norm": 0.21551482379436493, "learning_rate": 4.413464591933947e-05, "loss": 0.0584, "step": 8801 }, { "epoch": 16.756062767475036, "grad_norm": 0.06975243985652924, "learning_rate": 4.4128294696729125e-05, "loss": 0.0457, "step": 8802 }, { "epoch": 16.757964812173086, "grad_norm": 0.1738225221633911, "learning_rate": 4.412194347411877e-05, "loss": 0.0568, "step": 8803 }, { "epoch": 16.759866856871135, "grad_norm": 0.17236964404582977, "learning_rate": 4.411559225150842e-05, "loss": 0.0603, "step": 8804 }, { "epoch": 16.761768901569187, "grad_norm": 0.1748720407485962, "learning_rate": 4.410924102889806e-05, "loss": 0.0505, "step": 8805 }, { "epoch": 16.763670946267236, "grad_norm": 0.2570509612560272, "learning_rate": 4.410288980628771e-05, "loss": 0.0623, "step": 8806 }, { "epoch": 16.76557299096529, "grad_norm": 0.18713904917240143, "learning_rate": 4.409653858367736e-05, "loss": 0.0643, "step": 8807 }, { "epoch": 16.767475035663338, "grad_norm": 0.1882544904947281, "learning_rate": 4.409018736106701e-05, "loss": 0.0557, "step": 8808 }, { "epoch": 16.769377080361387, "grad_norm": 0.15929445624351501, "learning_rate": 4.4083836138456654e-05, "loss": 0.0512, "step": 8809 }, { "epoch": 16.77127912505944, "grad_norm": 0.17998144030570984, "learning_rate": 4.40774849158463e-05, "loss": 0.0656, "step": 8810 }, { "epoch": 16.77318116975749, "grad_norm": 0.1303442269563675, "learning_rate": 4.407113369323595e-05, "loss": 0.0475, "step": 8811 }, { "epoch": 16.775083214455538, "grad_norm": 0.4382564425468445, "learning_rate": 4.4064782470625596e-05, "loss": 0.0731, "step": 8812 }, { "epoch": 16.77698525915359, "grad_norm": 0.19651977717876434, "learning_rate": 4.405843124801525e-05, "loss": 0.0697, "step": 8813 }, { "epoch": 16.77888730385164, "grad_norm": 0.21434944868087769, "learning_rate": 4.405208002540489e-05, "loss": 0.0737, "step": 8814 }, { "epoch": 16.780789348549693, "grad_norm": 0.2197565883398056, "learning_rate": 4.404572880279454e-05, "loss": 0.0952, "step": 8815 }, { "epoch": 16.78269139324774, "grad_norm": 0.30654266476631165, "learning_rate": 4.403937758018419e-05, "loss": 0.0658, "step": 8816 }, { "epoch": 16.78459343794579, "grad_norm": 0.1521390974521637, "learning_rate": 4.4033026357573835e-05, "loss": 0.0463, "step": 8817 }, { "epoch": 16.786495482643844, "grad_norm": 0.16150012612342834, "learning_rate": 4.402667513496348e-05, "loss": 0.0599, "step": 8818 }, { "epoch": 16.788397527341893, "grad_norm": 0.17025931179523468, "learning_rate": 4.402032391235313e-05, "loss": 0.0591, "step": 8819 }, { "epoch": 16.79029957203994, "grad_norm": 0.17232930660247803, "learning_rate": 4.401397268974278e-05, "loss": 0.0683, "step": 8820 }, { "epoch": 16.792201616737994, "grad_norm": 0.15758217871189117, "learning_rate": 4.400762146713243e-05, "loss": 0.0466, "step": 8821 }, { "epoch": 16.794103661436043, "grad_norm": 0.08380364626646042, "learning_rate": 4.4001270244522074e-05, "loss": 0.0478, "step": 8822 }, { "epoch": 16.796005706134093, "grad_norm": 0.0926741287112236, "learning_rate": 4.399491902191172e-05, "loss": 0.08, "step": 8823 }, { "epoch": 16.797907750832145, "grad_norm": 0.16255873441696167, "learning_rate": 4.3988567799301364e-05, "loss": 0.0655, "step": 8824 }, { "epoch": 16.799809795530194, "grad_norm": 0.11387108266353607, "learning_rate": 4.3982216576691016e-05, "loss": 0.0658, "step": 8825 }, { "epoch": 16.801711840228247, "grad_norm": 0.2308814972639084, "learning_rate": 4.397586535408066e-05, "loss": 0.0931, "step": 8826 }, { "epoch": 16.803613884926296, "grad_norm": 0.12670093774795532, "learning_rate": 4.3969514131470306e-05, "loss": 0.0406, "step": 8827 }, { "epoch": 16.805515929624345, "grad_norm": 0.2202993482351303, "learning_rate": 4.396316290885996e-05, "loss": 0.0648, "step": 8828 }, { "epoch": 16.807417974322398, "grad_norm": 0.12769080698490143, "learning_rate": 4.39568116862496e-05, "loss": 0.0633, "step": 8829 }, { "epoch": 16.809320019020447, "grad_norm": 0.12309324741363525, "learning_rate": 4.3950460463639255e-05, "loss": 0.0589, "step": 8830 }, { "epoch": 16.811222063718496, "grad_norm": 0.1103903204202652, "learning_rate": 4.39441092410289e-05, "loss": 0.0483, "step": 8831 }, { "epoch": 16.81312410841655, "grad_norm": 0.16271445155143738, "learning_rate": 4.3937758018418545e-05, "loss": 0.0428, "step": 8832 }, { "epoch": 16.815026153114598, "grad_norm": 0.17252840101718903, "learning_rate": 4.39314067958082e-05, "loss": 0.0532, "step": 8833 }, { "epoch": 16.816928197812647, "grad_norm": 0.11162525415420532, "learning_rate": 4.392505557319784e-05, "loss": 0.0567, "step": 8834 }, { "epoch": 16.8188302425107, "grad_norm": 0.12144871801137924, "learning_rate": 4.3918704350587494e-05, "loss": 0.0568, "step": 8835 }, { "epoch": 16.82073228720875, "grad_norm": 0.2525745928287506, "learning_rate": 4.391235312797713e-05, "loss": 0.0436, "step": 8836 }, { "epoch": 16.8226343319068, "grad_norm": 0.08460042625665665, "learning_rate": 4.3906001905366784e-05, "loss": 0.0541, "step": 8837 }, { "epoch": 16.82453637660485, "grad_norm": 0.07870131731033325, "learning_rate": 4.3899650682756436e-05, "loss": 0.0647, "step": 8838 }, { "epoch": 16.8264384213029, "grad_norm": 0.17138586938381195, "learning_rate": 4.389329946014608e-05, "loss": 0.0707, "step": 8839 }, { "epoch": 16.828340466000952, "grad_norm": 0.19924390316009521, "learning_rate": 4.388694823753573e-05, "loss": 0.0723, "step": 8840 }, { "epoch": 16.830242510699, "grad_norm": 0.18340855836868286, "learning_rate": 4.388059701492537e-05, "loss": 0.0445, "step": 8841 }, { "epoch": 16.83214455539705, "grad_norm": 0.149722620844841, "learning_rate": 4.387424579231502e-05, "loss": 0.0507, "step": 8842 }, { "epoch": 16.834046600095103, "grad_norm": 0.14245644211769104, "learning_rate": 4.386789456970467e-05, "loss": 0.0633, "step": 8843 }, { "epoch": 16.835948644793152, "grad_norm": 0.06980764120817184, "learning_rate": 4.386154334709432e-05, "loss": 0.0588, "step": 8844 }, { "epoch": 16.8378506894912, "grad_norm": 0.10202120244503021, "learning_rate": 4.3855192124483965e-05, "loss": 0.0568, "step": 8845 }, { "epoch": 16.839752734189254, "grad_norm": 0.10382965207099915, "learning_rate": 4.384884090187361e-05, "loss": 0.0569, "step": 8846 }, { "epoch": 16.841654778887303, "grad_norm": 0.18479005992412567, "learning_rate": 4.384248967926326e-05, "loss": 0.0561, "step": 8847 }, { "epoch": 16.843556823585356, "grad_norm": 0.16195909678936005, "learning_rate": 4.3836138456652907e-05, "loss": 0.0675, "step": 8848 }, { "epoch": 16.845458868283405, "grad_norm": 0.2066333144903183, "learning_rate": 4.382978723404256e-05, "loss": 0.0605, "step": 8849 }, { "epoch": 16.847360912981454, "grad_norm": 0.13491970300674438, "learning_rate": 4.3823436011432204e-05, "loss": 0.0415, "step": 8850 }, { "epoch": 16.849262957679507, "grad_norm": 0.13810434937477112, "learning_rate": 4.381708478882185e-05, "loss": 0.0603, "step": 8851 }, { "epoch": 16.851165002377556, "grad_norm": 0.07920050621032715, "learning_rate": 4.38107335662115e-05, "loss": 0.0673, "step": 8852 }, { "epoch": 16.853067047075605, "grad_norm": 0.17346441745758057, "learning_rate": 4.3804382343601145e-05, "loss": 0.0397, "step": 8853 }, { "epoch": 16.854969091773658, "grad_norm": 0.08918207138776779, "learning_rate": 4.379803112099079e-05, "loss": 0.0487, "step": 8854 }, { "epoch": 16.856871136471707, "grad_norm": 0.11225546151399612, "learning_rate": 4.3791679898380436e-05, "loss": 0.0653, "step": 8855 }, { "epoch": 16.858773181169756, "grad_norm": 0.20793363451957703, "learning_rate": 4.378532867577009e-05, "loss": 0.0732, "step": 8856 }, { "epoch": 16.86067522586781, "grad_norm": 0.16826403141021729, "learning_rate": 4.377897745315974e-05, "loss": 0.0785, "step": 8857 }, { "epoch": 16.862577270565858, "grad_norm": 0.12270570546388626, "learning_rate": 4.3772626230549384e-05, "loss": 0.0544, "step": 8858 }, { "epoch": 16.86447931526391, "grad_norm": 0.11855387687683105, "learning_rate": 4.376627500793903e-05, "loss": 0.0541, "step": 8859 }, { "epoch": 16.86638135996196, "grad_norm": 0.1780882179737091, "learning_rate": 4.3759923785328674e-05, "loss": 0.0596, "step": 8860 }, { "epoch": 16.86828340466001, "grad_norm": 0.0805153101682663, "learning_rate": 4.3753572562718326e-05, "loss": 0.0437, "step": 8861 }, { "epoch": 16.87018544935806, "grad_norm": 0.2657288908958435, "learning_rate": 4.374722134010797e-05, "loss": 0.0819, "step": 8862 }, { "epoch": 16.87208749405611, "grad_norm": 0.14114326238632202, "learning_rate": 4.3740870117497616e-05, "loss": 0.0635, "step": 8863 }, { "epoch": 16.87398953875416, "grad_norm": 0.12601017951965332, "learning_rate": 4.373451889488727e-05, "loss": 0.0566, "step": 8864 }, { "epoch": 16.875891583452212, "grad_norm": 0.18008174002170563, "learning_rate": 4.372816767227691e-05, "loss": 0.0634, "step": 8865 }, { "epoch": 16.87779362815026, "grad_norm": 0.125730499625206, "learning_rate": 4.3721816449666565e-05, "loss": 0.0577, "step": 8866 }, { "epoch": 16.87969567284831, "grad_norm": 0.05357951298356056, "learning_rate": 4.371546522705621e-05, "loss": 0.0569, "step": 8867 }, { "epoch": 16.881597717546363, "grad_norm": 0.06639115512371063, "learning_rate": 4.3709114004445855e-05, "loss": 0.0472, "step": 8868 }, { "epoch": 16.883499762244412, "grad_norm": 0.10009011626243591, "learning_rate": 4.370276278183551e-05, "loss": 0.0421, "step": 8869 }, { "epoch": 16.885401806942465, "grad_norm": 0.19268018007278442, "learning_rate": 4.369641155922515e-05, "loss": 0.0648, "step": 8870 }, { "epoch": 16.887303851640514, "grad_norm": 0.24461723864078522, "learning_rate": 4.3690060336614804e-05, "loss": 0.0732, "step": 8871 }, { "epoch": 16.889205896338563, "grad_norm": 0.13241729140281677, "learning_rate": 4.368370911400444e-05, "loss": 0.0392, "step": 8872 }, { "epoch": 16.891107941036616, "grad_norm": 0.1267736554145813, "learning_rate": 4.3677357891394094e-05, "loss": 0.0747, "step": 8873 }, { "epoch": 16.893009985734665, "grad_norm": 0.2197117805480957, "learning_rate": 4.3671006668783746e-05, "loss": 0.0591, "step": 8874 }, { "epoch": 16.894912030432714, "grad_norm": 0.2949211001396179, "learning_rate": 4.366465544617339e-05, "loss": 0.0731, "step": 8875 }, { "epoch": 16.896814075130766, "grad_norm": 0.13164544105529785, "learning_rate": 4.365830422356304e-05, "loss": 0.0595, "step": 8876 }, { "epoch": 16.898716119828816, "grad_norm": 0.12970279157161713, "learning_rate": 4.365195300095268e-05, "loss": 0.0346, "step": 8877 }, { "epoch": 16.900618164526865, "grad_norm": 0.07170283794403076, "learning_rate": 4.364560177834233e-05, "loss": 0.0243, "step": 8878 }, { "epoch": 16.902520209224917, "grad_norm": 0.14044728875160217, "learning_rate": 4.363925055573198e-05, "loss": 0.06, "step": 8879 }, { "epoch": 16.904422253922966, "grad_norm": 0.1995735913515091, "learning_rate": 4.363289933312163e-05, "loss": 0.0554, "step": 8880 }, { "epoch": 16.90632429862102, "grad_norm": 0.08931490033864975, "learning_rate": 4.3626548110511275e-05, "loss": 0.0509, "step": 8881 }, { "epoch": 16.90822634331907, "grad_norm": 0.17955714464187622, "learning_rate": 4.362019688790092e-05, "loss": 0.0846, "step": 8882 }, { "epoch": 16.910128388017117, "grad_norm": 0.17314022779464722, "learning_rate": 4.361384566529057e-05, "loss": 0.0589, "step": 8883 }, { "epoch": 16.91203043271517, "grad_norm": 0.172648623585701, "learning_rate": 4.360749444268022e-05, "loss": 0.0612, "step": 8884 }, { "epoch": 16.91393247741322, "grad_norm": 0.3400585353374481, "learning_rate": 4.360114322006987e-05, "loss": 0.0662, "step": 8885 }, { "epoch": 16.91583452211127, "grad_norm": 0.23420454561710358, "learning_rate": 4.3594791997459514e-05, "loss": 0.0595, "step": 8886 }, { "epoch": 16.91773656680932, "grad_norm": 0.12573030591011047, "learning_rate": 4.358844077484916e-05, "loss": 0.05, "step": 8887 }, { "epoch": 16.91963861150737, "grad_norm": 0.1461486518383026, "learning_rate": 4.358208955223881e-05, "loss": 0.0424, "step": 8888 }, { "epoch": 16.92154065620542, "grad_norm": 0.16064852476119995, "learning_rate": 4.3575738329628456e-05, "loss": 0.0652, "step": 8889 }, { "epoch": 16.923442700903472, "grad_norm": 0.060332030057907104, "learning_rate": 4.35693871070181e-05, "loss": 0.0545, "step": 8890 }, { "epoch": 16.92534474560152, "grad_norm": 0.23492804169654846, "learning_rate": 4.3563035884407746e-05, "loss": 0.0675, "step": 8891 }, { "epoch": 16.927246790299574, "grad_norm": 0.17212089896202087, "learning_rate": 4.35566846617974e-05, "loss": 0.0675, "step": 8892 }, { "epoch": 16.929148834997623, "grad_norm": 0.15113942325115204, "learning_rate": 4.355033343918705e-05, "loss": 0.0411, "step": 8893 }, { "epoch": 16.931050879695672, "grad_norm": 0.1071106493473053, "learning_rate": 4.3543982216576695e-05, "loss": 0.0544, "step": 8894 }, { "epoch": 16.932952924393724, "grad_norm": 0.11964888870716095, "learning_rate": 4.353763099396634e-05, "loss": 0.0593, "step": 8895 }, { "epoch": 16.934854969091774, "grad_norm": 0.12841713428497314, "learning_rate": 4.3531279771355985e-05, "loss": 0.0521, "step": 8896 }, { "epoch": 16.936757013789823, "grad_norm": 0.10380429774522781, "learning_rate": 4.352492854874564e-05, "loss": 0.0459, "step": 8897 }, { "epoch": 16.938659058487875, "grad_norm": 0.1602270007133484, "learning_rate": 4.351857732613528e-05, "loss": 0.0603, "step": 8898 }, { "epoch": 16.940561103185924, "grad_norm": 0.11509580910205841, "learning_rate": 4.351222610352493e-05, "loss": 0.0603, "step": 8899 }, { "epoch": 16.942463147883974, "grad_norm": 0.26956668496131897, "learning_rate": 4.350587488091458e-05, "loss": 0.0732, "step": 8900 }, { "epoch": 16.944365192582026, "grad_norm": 0.0985705703496933, "learning_rate": 4.3499523658304224e-05, "loss": 0.0743, "step": 8901 }, { "epoch": 16.946267237280075, "grad_norm": 0.08205585181713104, "learning_rate": 4.3493172435693876e-05, "loss": 0.0305, "step": 8902 }, { "epoch": 16.948169281978128, "grad_norm": 0.1018296554684639, "learning_rate": 4.348682121308352e-05, "loss": 0.0466, "step": 8903 }, { "epoch": 16.950071326676177, "grad_norm": 0.13760146498680115, "learning_rate": 4.3480469990473166e-05, "loss": 0.0462, "step": 8904 }, { "epoch": 16.951973371374226, "grad_norm": 0.11483436077833176, "learning_rate": 4.347411876786282e-05, "loss": 0.0583, "step": 8905 }, { "epoch": 16.95387541607228, "grad_norm": 0.18903884291648865, "learning_rate": 4.346776754525246e-05, "loss": 0.0436, "step": 8906 }, { "epoch": 16.955777460770328, "grad_norm": 0.10095207393169403, "learning_rate": 4.3461416322642115e-05, "loss": 0.0559, "step": 8907 }, { "epoch": 16.957679505468377, "grad_norm": 0.0754164457321167, "learning_rate": 4.345506510003175e-05, "loss": 0.0699, "step": 8908 }, { "epoch": 16.95958155016643, "grad_norm": 0.18949873745441437, "learning_rate": 4.3448713877421405e-05, "loss": 0.0498, "step": 8909 }, { "epoch": 16.96148359486448, "grad_norm": 0.16381996870040894, "learning_rate": 4.344236265481105e-05, "loss": 0.0825, "step": 8910 }, { "epoch": 16.96338563956253, "grad_norm": 0.23997418582439423, "learning_rate": 4.34360114322007e-05, "loss": 0.0599, "step": 8911 }, { "epoch": 16.96528768426058, "grad_norm": 0.07024307548999786, "learning_rate": 4.3429660209590353e-05, "loss": 0.0377, "step": 8912 }, { "epoch": 16.96718972895863, "grad_norm": 0.20647969841957092, "learning_rate": 4.342330898697999e-05, "loss": 0.051, "step": 8913 }, { "epoch": 16.969091773656682, "grad_norm": 0.10372774302959442, "learning_rate": 4.3416957764369644e-05, "loss": 0.0407, "step": 8914 }, { "epoch": 16.97099381835473, "grad_norm": 0.0911819338798523, "learning_rate": 4.341060654175929e-05, "loss": 0.0469, "step": 8915 }, { "epoch": 16.97289586305278, "grad_norm": 0.1432979255914688, "learning_rate": 4.340425531914894e-05, "loss": 0.0522, "step": 8916 }, { "epoch": 16.974797907750833, "grad_norm": 0.131869375705719, "learning_rate": 4.3397904096538586e-05, "loss": 0.0785, "step": 8917 }, { "epoch": 16.976699952448882, "grad_norm": 0.2788298428058624, "learning_rate": 4.339155287392823e-05, "loss": 0.051, "step": 8918 }, { "epoch": 16.97860199714693, "grad_norm": 0.16402453184127808, "learning_rate": 4.338520165131788e-05, "loss": 0.0806, "step": 8919 }, { "epoch": 16.980504041844984, "grad_norm": 0.1125316321849823, "learning_rate": 4.337885042870753e-05, "loss": 0.0777, "step": 8920 }, { "epoch": 16.982406086543033, "grad_norm": 0.09106596559286118, "learning_rate": 4.337249920609718e-05, "loss": 0.0425, "step": 8921 }, { "epoch": 16.984308131241086, "grad_norm": 0.13478001952171326, "learning_rate": 4.3366147983486824e-05, "loss": 0.0396, "step": 8922 }, { "epoch": 16.986210175939135, "grad_norm": 0.19370102882385254, "learning_rate": 4.335979676087647e-05, "loss": 0.0477, "step": 8923 }, { "epoch": 16.988112220637184, "grad_norm": 0.1331225037574768, "learning_rate": 4.335344553826612e-05, "loss": 0.0764, "step": 8924 }, { "epoch": 16.990014265335237, "grad_norm": 0.16695334017276764, "learning_rate": 4.3347094315655766e-05, "loss": 0.0376, "step": 8925 }, { "epoch": 16.991916310033286, "grad_norm": 0.13133040070533752, "learning_rate": 4.334074309304541e-05, "loss": 0.0462, "step": 8926 }, { "epoch": 16.993818354731335, "grad_norm": 0.09299250692129135, "learning_rate": 4.3334391870435056e-05, "loss": 0.0557, "step": 8927 }, { "epoch": 16.995720399429388, "grad_norm": 0.11600998789072037, "learning_rate": 4.332804064782471e-05, "loss": 0.0669, "step": 8928 }, { "epoch": 16.997622444127437, "grad_norm": 0.1207716166973114, "learning_rate": 4.332168942521436e-05, "loss": 0.0536, "step": 8929 }, { "epoch": 16.999524488825486, "grad_norm": 0.16815738379955292, "learning_rate": 4.3315338202604005e-05, "loss": 0.069, "step": 8930 }, { "epoch": 17.00142653352354, "grad_norm": 0.1072743833065033, "learning_rate": 4.330898697999365e-05, "loss": 0.0607, "step": 8931 }, { "epoch": 17.003328578221588, "grad_norm": 0.08723615109920502, "learning_rate": 4.3302635757383295e-05, "loss": 0.0505, "step": 8932 }, { "epoch": 17.00523062291964, "grad_norm": 0.22191157937049866, "learning_rate": 4.329628453477295e-05, "loss": 0.054, "step": 8933 }, { "epoch": 17.00713266761769, "grad_norm": 0.10282241553068161, "learning_rate": 4.328993331216259e-05, "loss": 0.0503, "step": 8934 }, { "epoch": 17.00903471231574, "grad_norm": 0.11215320974588394, "learning_rate": 4.328358208955224e-05, "loss": 0.057, "step": 8935 }, { "epoch": 17.01093675701379, "grad_norm": 0.09615226835012436, "learning_rate": 4.327723086694189e-05, "loss": 0.058, "step": 8936 }, { "epoch": 17.01283880171184, "grad_norm": 0.08828164637088776, "learning_rate": 4.3270879644331534e-05, "loss": 0.0526, "step": 8937 }, { "epoch": 17.01474084640989, "grad_norm": 0.0911029502749443, "learning_rate": 4.3264528421721186e-05, "loss": 0.0547, "step": 8938 }, { "epoch": 17.016642891107942, "grad_norm": 0.04437772557139397, "learning_rate": 4.325817719911083e-05, "loss": 0.0471, "step": 8939 }, { "epoch": 17.01854493580599, "grad_norm": 0.11651269346475601, "learning_rate": 4.3251825976500476e-05, "loss": 0.052, "step": 8940 }, { "epoch": 17.02044698050404, "grad_norm": 0.16090331971645355, "learning_rate": 4.324547475389013e-05, "loss": 0.0876, "step": 8941 }, { "epoch": 17.022349025202093, "grad_norm": 0.0497426874935627, "learning_rate": 4.323912353127977e-05, "loss": 0.0602, "step": 8942 }, { "epoch": 17.024251069900142, "grad_norm": 0.15628616511821747, "learning_rate": 4.3232772308669425e-05, "loss": 0.0632, "step": 8943 }, { "epoch": 17.026153114598195, "grad_norm": 0.08810368925333023, "learning_rate": 4.322642108605906e-05, "loss": 0.0391, "step": 8944 }, { "epoch": 17.028055159296244, "grad_norm": 0.21196871995925903, "learning_rate": 4.3220069863448715e-05, "loss": 0.0584, "step": 8945 }, { "epoch": 17.029957203994293, "grad_norm": 0.05883145332336426, "learning_rate": 4.321371864083836e-05, "loss": 0.0464, "step": 8946 }, { "epoch": 17.031859248692346, "grad_norm": 0.18730250000953674, "learning_rate": 4.320736741822801e-05, "loss": 0.062, "step": 8947 }, { "epoch": 17.033761293390395, "grad_norm": 0.16714142262935638, "learning_rate": 4.3201016195617664e-05, "loss": 0.0442, "step": 8948 }, { "epoch": 17.035663338088444, "grad_norm": 0.114844910800457, "learning_rate": 4.31946649730073e-05, "loss": 0.0503, "step": 8949 }, { "epoch": 17.037565382786497, "grad_norm": 0.25291353464126587, "learning_rate": 4.3188313750396954e-05, "loss": 0.0736, "step": 8950 }, { "epoch": 17.039467427484546, "grad_norm": 0.1454509198665619, "learning_rate": 4.31819625277866e-05, "loss": 0.0613, "step": 8951 }, { "epoch": 17.041369472182595, "grad_norm": 0.10287206619977951, "learning_rate": 4.317561130517625e-05, "loss": 0.0581, "step": 8952 }, { "epoch": 17.043271516880647, "grad_norm": 0.12564142048358917, "learning_rate": 4.3169260082565896e-05, "loss": 0.05, "step": 8953 }, { "epoch": 17.045173561578697, "grad_norm": 0.070656418800354, "learning_rate": 4.316290885995554e-05, "loss": 0.0506, "step": 8954 }, { "epoch": 17.04707560627675, "grad_norm": 0.06617604941129684, "learning_rate": 4.315655763734519e-05, "loss": 0.0458, "step": 8955 }, { "epoch": 17.0489776509748, "grad_norm": 0.14419837296009064, "learning_rate": 4.315020641473484e-05, "loss": 0.0745, "step": 8956 }, { "epoch": 17.050879695672847, "grad_norm": 0.18425264954566956, "learning_rate": 4.314385519212449e-05, "loss": 0.0544, "step": 8957 }, { "epoch": 17.0527817403709, "grad_norm": 0.12209388613700867, "learning_rate": 4.313750396951413e-05, "loss": 0.065, "step": 8958 }, { "epoch": 17.05468378506895, "grad_norm": 0.2421053647994995, "learning_rate": 4.313115274690378e-05, "loss": 0.0572, "step": 8959 }, { "epoch": 17.056585829767, "grad_norm": 0.14239047467708588, "learning_rate": 4.312480152429343e-05, "loss": 0.059, "step": 8960 }, { "epoch": 17.05848787446505, "grad_norm": 0.20295925438404083, "learning_rate": 4.311845030168308e-05, "loss": 0.0644, "step": 8961 }, { "epoch": 17.0603899191631, "grad_norm": 0.08817723393440247, "learning_rate": 4.311209907907272e-05, "loss": 0.0383, "step": 8962 }, { "epoch": 17.06229196386115, "grad_norm": 0.14773733913898468, "learning_rate": 4.310574785646237e-05, "loss": 0.0546, "step": 8963 }, { "epoch": 17.064194008559202, "grad_norm": 0.11199629306793213, "learning_rate": 4.309939663385202e-05, "loss": 0.0372, "step": 8964 }, { "epoch": 17.06609605325725, "grad_norm": 0.10948487371206284, "learning_rate": 4.3093045411241664e-05, "loss": 0.067, "step": 8965 }, { "epoch": 17.067998097955304, "grad_norm": 0.1963731348514557, "learning_rate": 4.3086694188631316e-05, "loss": 0.05, "step": 8966 }, { "epoch": 17.069900142653353, "grad_norm": 0.1474691927433014, "learning_rate": 4.308034296602096e-05, "loss": 0.0403, "step": 8967 }, { "epoch": 17.071802187351402, "grad_norm": 0.20371627807617188, "learning_rate": 4.3073991743410606e-05, "loss": 0.0659, "step": 8968 }, { "epoch": 17.073704232049455, "grad_norm": 0.21123164892196655, "learning_rate": 4.306764052080026e-05, "loss": 0.0592, "step": 8969 }, { "epoch": 17.075606276747504, "grad_norm": 0.14474251866340637, "learning_rate": 4.30612892981899e-05, "loss": 0.0469, "step": 8970 }, { "epoch": 17.077508321445553, "grad_norm": 0.1162516176700592, "learning_rate": 4.305493807557955e-05, "loss": 0.052, "step": 8971 }, { "epoch": 17.079410366143605, "grad_norm": 0.09336848556995392, "learning_rate": 4.30485868529692e-05, "loss": 0.0456, "step": 8972 }, { "epoch": 17.081312410841655, "grad_norm": 0.09849970042705536, "learning_rate": 4.3042235630358845e-05, "loss": 0.0837, "step": 8973 }, { "epoch": 17.083214455539704, "grad_norm": 0.0521208755671978, "learning_rate": 4.3035884407748497e-05, "loss": 0.0376, "step": 8974 }, { "epoch": 17.085116500237756, "grad_norm": 0.14380308985710144, "learning_rate": 4.302953318513814e-05, "loss": 0.0517, "step": 8975 }, { "epoch": 17.087018544935805, "grad_norm": 0.06576641649007797, "learning_rate": 4.302318196252779e-05, "loss": 0.0569, "step": 8976 }, { "epoch": 17.088920589633858, "grad_norm": 0.05938639119267464, "learning_rate": 4.301683073991744e-05, "loss": 0.06, "step": 8977 }, { "epoch": 17.090822634331907, "grad_norm": 0.13843615353107452, "learning_rate": 4.3010479517307084e-05, "loss": 0.0549, "step": 8978 }, { "epoch": 17.092724679029956, "grad_norm": 0.2245621383190155, "learning_rate": 4.3004128294696735e-05, "loss": 0.0582, "step": 8979 }, { "epoch": 17.09462672372801, "grad_norm": 0.08693769574165344, "learning_rate": 4.2997777072086374e-05, "loss": 0.0704, "step": 8980 }, { "epoch": 17.096528768426058, "grad_norm": 0.13792353868484497, "learning_rate": 4.2991425849476026e-05, "loss": 0.0631, "step": 8981 }, { "epoch": 17.098430813124107, "grad_norm": 0.13329333066940308, "learning_rate": 4.298507462686567e-05, "loss": 0.0518, "step": 8982 }, { "epoch": 17.10033285782216, "grad_norm": 0.10913736373186111, "learning_rate": 4.297872340425532e-05, "loss": 0.0565, "step": 8983 }, { "epoch": 17.10223490252021, "grad_norm": 0.1331338733434677, "learning_rate": 4.2972372181644974e-05, "loss": 0.0566, "step": 8984 }, { "epoch": 17.104136947218258, "grad_norm": 0.10640410333871841, "learning_rate": 4.296602095903461e-05, "loss": 0.0434, "step": 8985 }, { "epoch": 17.10603899191631, "grad_norm": 0.11068563908338547, "learning_rate": 4.2959669736424264e-05, "loss": 0.0588, "step": 8986 }, { "epoch": 17.10794103661436, "grad_norm": 0.06091459468007088, "learning_rate": 4.295331851381391e-05, "loss": 0.0449, "step": 8987 }, { "epoch": 17.109843081312412, "grad_norm": 0.22359995543956757, "learning_rate": 4.294696729120356e-05, "loss": 0.0743, "step": 8988 }, { "epoch": 17.11174512601046, "grad_norm": 0.1244855746626854, "learning_rate": 4.2940616068593206e-05, "loss": 0.0628, "step": 8989 }, { "epoch": 17.11364717070851, "grad_norm": 0.27003106474876404, "learning_rate": 4.293426484598285e-05, "loss": 0.065, "step": 8990 }, { "epoch": 17.115549215406563, "grad_norm": 0.08874871581792831, "learning_rate": 4.29279136233725e-05, "loss": 0.0487, "step": 8991 }, { "epoch": 17.117451260104612, "grad_norm": 0.07236160337924957, "learning_rate": 4.292156240076215e-05, "loss": 0.0586, "step": 8992 }, { "epoch": 17.11935330480266, "grad_norm": 0.1268148273229599, "learning_rate": 4.29152111781518e-05, "loss": 0.0529, "step": 8993 }, { "epoch": 17.121255349500714, "grad_norm": 0.11032159626483917, "learning_rate": 4.290885995554144e-05, "loss": 0.0476, "step": 8994 }, { "epoch": 17.123157394198763, "grad_norm": 0.16266649961471558, "learning_rate": 4.290250873293109e-05, "loss": 0.049, "step": 8995 }, { "epoch": 17.125059438896812, "grad_norm": 0.06318007409572601, "learning_rate": 4.289615751032074e-05, "loss": 0.0723, "step": 8996 }, { "epoch": 17.126961483594865, "grad_norm": 0.06825682520866394, "learning_rate": 4.288980628771039e-05, "loss": 0.0415, "step": 8997 }, { "epoch": 17.128863528292914, "grad_norm": 0.22206497192382812, "learning_rate": 4.288345506510003e-05, "loss": 0.0565, "step": 8998 }, { "epoch": 17.130765572990967, "grad_norm": 0.1304975301027298, "learning_rate": 4.287710384248968e-05, "loss": 0.0485, "step": 8999 }, { "epoch": 17.132667617689016, "grad_norm": 0.13121330738067627, "learning_rate": 4.287075261987933e-05, "loss": 0.0577, "step": 9000 }, { "epoch": 17.134569662387065, "grad_norm": 0.1600920408964157, "learning_rate": 4.2864401397268974e-05, "loss": 0.0535, "step": 9001 }, { "epoch": 17.136471707085118, "grad_norm": 0.10079571604728699, "learning_rate": 4.2858050174658626e-05, "loss": 0.0472, "step": 9002 }, { "epoch": 17.138373751783167, "grad_norm": 0.18356472253799438, "learning_rate": 4.285169895204827e-05, "loss": 0.0735, "step": 9003 }, { "epoch": 17.140275796481216, "grad_norm": 0.0878254771232605, "learning_rate": 4.2845347729437916e-05, "loss": 0.0551, "step": 9004 }, { "epoch": 17.14217784117927, "grad_norm": 0.12589243054389954, "learning_rate": 4.283899650682757e-05, "loss": 0.0628, "step": 9005 }, { "epoch": 17.144079885877318, "grad_norm": 0.08670032769441605, "learning_rate": 4.283264528421721e-05, "loss": 0.0425, "step": 9006 }, { "epoch": 17.145981930575367, "grad_norm": 0.22173863649368286, "learning_rate": 4.282629406160686e-05, "loss": 0.0515, "step": 9007 }, { "epoch": 17.14788397527342, "grad_norm": 0.06839998066425323, "learning_rate": 4.281994283899651e-05, "loss": 0.0623, "step": 9008 }, { "epoch": 17.14978601997147, "grad_norm": 0.20462091267108917, "learning_rate": 4.2813591616386155e-05, "loss": 0.0565, "step": 9009 }, { "epoch": 17.15168806466952, "grad_norm": 0.1404951810836792, "learning_rate": 4.280724039377581e-05, "loss": 0.0629, "step": 9010 }, { "epoch": 17.15359010936757, "grad_norm": 0.0752057284116745, "learning_rate": 4.280088917116545e-05, "loss": 0.0615, "step": 9011 }, { "epoch": 17.15549215406562, "grad_norm": 0.09520772099494934, "learning_rate": 4.27945379485551e-05, "loss": 0.0415, "step": 9012 }, { "epoch": 17.157394198763672, "grad_norm": 0.06295536458492279, "learning_rate": 4.278818672594474e-05, "loss": 0.033, "step": 9013 }, { "epoch": 17.15929624346172, "grad_norm": 0.07264743000268936, "learning_rate": 4.2781835503334394e-05, "loss": 0.0495, "step": 9014 }, { "epoch": 17.16119828815977, "grad_norm": 0.1129545047879219, "learning_rate": 4.2775484280724046e-05, "loss": 0.0591, "step": 9015 }, { "epoch": 17.163100332857823, "grad_norm": 0.16535106301307678, "learning_rate": 4.2769133058113684e-05, "loss": 0.0649, "step": 9016 }, { "epoch": 17.165002377555872, "grad_norm": 0.05102641135454178, "learning_rate": 4.2762781835503336e-05, "loss": 0.0437, "step": 9017 }, { "epoch": 17.16690442225392, "grad_norm": 0.08517751842737198, "learning_rate": 4.275643061289298e-05, "loss": 0.0425, "step": 9018 }, { "epoch": 17.168806466951974, "grad_norm": 0.12171544134616852, "learning_rate": 4.275007939028263e-05, "loss": 0.0652, "step": 9019 }, { "epoch": 17.170708511650023, "grad_norm": 0.10095885396003723, "learning_rate": 4.274372816767228e-05, "loss": 0.0673, "step": 9020 }, { "epoch": 17.172610556348076, "grad_norm": 0.14293339848518372, "learning_rate": 4.273737694506192e-05, "loss": 0.0549, "step": 9021 }, { "epoch": 17.174512601046125, "grad_norm": 0.07936553657054901, "learning_rate": 4.2731025722451575e-05, "loss": 0.0509, "step": 9022 }, { "epoch": 17.176414645744174, "grad_norm": 0.06809721887111664, "learning_rate": 4.272467449984122e-05, "loss": 0.0609, "step": 9023 }, { "epoch": 17.178316690442227, "grad_norm": 0.09068653732538223, "learning_rate": 4.271832327723087e-05, "loss": 0.0588, "step": 9024 }, { "epoch": 17.180218735140276, "grad_norm": 0.13408097624778748, "learning_rate": 4.271197205462052e-05, "loss": 0.0421, "step": 9025 }, { "epoch": 17.182120779838325, "grad_norm": 0.13926245272159576, "learning_rate": 4.270562083201016e-05, "loss": 0.076, "step": 9026 }, { "epoch": 17.184022824536378, "grad_norm": 0.08979006856679916, "learning_rate": 4.2699269609399814e-05, "loss": 0.0573, "step": 9027 }, { "epoch": 17.185924869234427, "grad_norm": 0.09927933663129807, "learning_rate": 4.269291838678946e-05, "loss": 0.0415, "step": 9028 }, { "epoch": 17.187826913932476, "grad_norm": 0.05623723194003105, "learning_rate": 4.268656716417911e-05, "loss": 0.0501, "step": 9029 }, { "epoch": 17.18972895863053, "grad_norm": 0.10407004505395889, "learning_rate": 4.268021594156875e-05, "loss": 0.0669, "step": 9030 }, { "epoch": 17.191631003328578, "grad_norm": 0.11468394100666046, "learning_rate": 4.26738647189584e-05, "loss": 0.0453, "step": 9031 }, { "epoch": 17.19353304802663, "grad_norm": 0.052839286625385284, "learning_rate": 4.266751349634805e-05, "loss": 0.0579, "step": 9032 }, { "epoch": 17.19543509272468, "grad_norm": 0.16107793152332306, "learning_rate": 4.26611622737377e-05, "loss": 0.0561, "step": 9033 }, { "epoch": 17.19733713742273, "grad_norm": 0.0660393089056015, "learning_rate": 4.265481105112734e-05, "loss": 0.0532, "step": 9034 }, { "epoch": 17.19923918212078, "grad_norm": 0.11889426410198212, "learning_rate": 4.264845982851699e-05, "loss": 0.0897, "step": 9035 }, { "epoch": 17.20114122681883, "grad_norm": 0.04962438344955444, "learning_rate": 4.264210860590664e-05, "loss": 0.0757, "step": 9036 }, { "epoch": 17.20304327151688, "grad_norm": 0.07310406118631363, "learning_rate": 4.2635757383296285e-05, "loss": 0.0494, "step": 9037 }, { "epoch": 17.204945316214932, "grad_norm": 0.16161684691905975, "learning_rate": 4.2629406160685937e-05, "loss": 0.0645, "step": 9038 }, { "epoch": 17.20684736091298, "grad_norm": 0.0898459404706955, "learning_rate": 4.262305493807558e-05, "loss": 0.0579, "step": 9039 }, { "epoch": 17.20874940561103, "grad_norm": 0.060915347188711166, "learning_rate": 4.261670371546523e-05, "loss": 0.0446, "step": 9040 }, { "epoch": 17.210651450309083, "grad_norm": 0.1457240730524063, "learning_rate": 4.261035249285488e-05, "loss": 0.0509, "step": 9041 }, { "epoch": 17.212553495007132, "grad_norm": 0.05205492302775383, "learning_rate": 4.2604001270244524e-05, "loss": 0.0309, "step": 9042 }, { "epoch": 17.214455539705185, "grad_norm": 0.1869315356016159, "learning_rate": 4.259765004763417e-05, "loss": 0.0455, "step": 9043 }, { "epoch": 17.216357584403234, "grad_norm": 0.19125977158546448, "learning_rate": 4.259129882502382e-05, "loss": 0.0581, "step": 9044 }, { "epoch": 17.218259629101283, "grad_norm": 0.11834566295146942, "learning_rate": 4.2584947602413466e-05, "loss": 0.1067, "step": 9045 }, { "epoch": 17.220161673799335, "grad_norm": 0.07625547796487808, "learning_rate": 4.257859637980312e-05, "loss": 0.0427, "step": 9046 }, { "epoch": 17.222063718497385, "grad_norm": 0.08554868400096893, "learning_rate": 4.257224515719276e-05, "loss": 0.0529, "step": 9047 }, { "epoch": 17.223965763195434, "grad_norm": 0.19139155745506287, "learning_rate": 4.256589393458241e-05, "loss": 0.0744, "step": 9048 }, { "epoch": 17.225867807893486, "grad_norm": 0.13853976130485535, "learning_rate": 4.255954271197205e-05, "loss": 0.0729, "step": 9049 }, { "epoch": 17.227769852591535, "grad_norm": 0.1726534515619278, "learning_rate": 4.2553191489361704e-05, "loss": 0.0614, "step": 9050 }, { "epoch": 17.229671897289585, "grad_norm": 0.10510444641113281, "learning_rate": 4.2546840266751356e-05, "loss": 0.0501, "step": 9051 }, { "epoch": 17.231573941987637, "grad_norm": 0.05099013075232506, "learning_rate": 4.2540489044140995e-05, "loss": 0.0471, "step": 9052 }, { "epoch": 17.233475986685686, "grad_norm": 0.17027002573013306, "learning_rate": 4.2534137821530646e-05, "loss": 0.0463, "step": 9053 }, { "epoch": 17.23537803138374, "grad_norm": 0.08581602573394775, "learning_rate": 4.252778659892029e-05, "loss": 0.0543, "step": 9054 }, { "epoch": 17.237280076081788, "grad_norm": 0.1270342320203781, "learning_rate": 4.252143537630994e-05, "loss": 0.0445, "step": 9055 }, { "epoch": 17.239182120779837, "grad_norm": 0.1030355840921402, "learning_rate": 4.251508415369959e-05, "loss": 0.0526, "step": 9056 }, { "epoch": 17.24108416547789, "grad_norm": 0.18514138460159302, "learning_rate": 4.2508732931089233e-05, "loss": 0.0531, "step": 9057 }, { "epoch": 17.24298621017594, "grad_norm": 0.18377025425434113, "learning_rate": 4.2502381708478885e-05, "loss": 0.0819, "step": 9058 }, { "epoch": 17.244888254873988, "grad_norm": 0.18528705835342407, "learning_rate": 4.249603048586853e-05, "loss": 0.0575, "step": 9059 }, { "epoch": 17.24679029957204, "grad_norm": 0.3869665563106537, "learning_rate": 4.248967926325818e-05, "loss": 0.0807, "step": 9060 }, { "epoch": 17.24869234427009, "grad_norm": 0.2586633861064911, "learning_rate": 4.248332804064783e-05, "loss": 0.0763, "step": 9061 }, { "epoch": 17.25059438896814, "grad_norm": 0.2239067256450653, "learning_rate": 4.247697681803747e-05, "loss": 0.0719, "step": 9062 }, { "epoch": 17.25249643366619, "grad_norm": 0.1460609883069992, "learning_rate": 4.2470625595427124e-05, "loss": 0.0497, "step": 9063 }, { "epoch": 17.25439847836424, "grad_norm": 0.13906528055667877, "learning_rate": 4.246427437281677e-05, "loss": 0.0651, "step": 9064 }, { "epoch": 17.256300523062293, "grad_norm": 0.10617247968912125, "learning_rate": 4.245792315020642e-05, "loss": 0.0621, "step": 9065 }, { "epoch": 17.258202567760343, "grad_norm": 0.05661832168698311, "learning_rate": 4.245157192759606e-05, "loss": 0.061, "step": 9066 }, { "epoch": 17.26010461245839, "grad_norm": 0.2621058523654938, "learning_rate": 4.244522070498571e-05, "loss": 0.0571, "step": 9067 }, { "epoch": 17.262006657156444, "grad_norm": 0.07775229215621948, "learning_rate": 4.2438869482375356e-05, "loss": 0.039, "step": 9068 }, { "epoch": 17.263908701854493, "grad_norm": 0.07392751425504684, "learning_rate": 4.243251825976501e-05, "loss": 0.0761, "step": 9069 }, { "epoch": 17.265810746552543, "grad_norm": 0.1043642908334732, "learning_rate": 4.242616703715465e-05, "loss": 0.052, "step": 9070 }, { "epoch": 17.267712791250595, "grad_norm": 0.2676692306995392, "learning_rate": 4.24198158145443e-05, "loss": 0.0587, "step": 9071 }, { "epoch": 17.269614835948644, "grad_norm": 0.2079627513885498, "learning_rate": 4.241346459193395e-05, "loss": 0.0574, "step": 9072 }, { "epoch": 17.271516880646693, "grad_norm": 0.06418656557798386, "learning_rate": 4.2407113369323595e-05, "loss": 0.0541, "step": 9073 }, { "epoch": 17.273418925344746, "grad_norm": 0.09768824279308319, "learning_rate": 4.240076214671325e-05, "loss": 0.0499, "step": 9074 }, { "epoch": 17.275320970042795, "grad_norm": 0.04967249184846878, "learning_rate": 4.239441092410289e-05, "loss": 0.0345, "step": 9075 }, { "epoch": 17.277223014740848, "grad_norm": 0.2924100160598755, "learning_rate": 4.238805970149254e-05, "loss": 0.0736, "step": 9076 }, { "epoch": 17.279125059438897, "grad_norm": 0.10819932073354721, "learning_rate": 4.238170847888219e-05, "loss": 0.0488, "step": 9077 }, { "epoch": 17.281027104136946, "grad_norm": 0.3450181186199188, "learning_rate": 4.2375357256271834e-05, "loss": 0.0732, "step": 9078 }, { "epoch": 17.282929148835, "grad_norm": 0.14250418543815613, "learning_rate": 4.236900603366148e-05, "loss": 0.048, "step": 9079 }, { "epoch": 17.284831193533048, "grad_norm": 0.08190340548753738, "learning_rate": 4.236265481105113e-05, "loss": 0.0354, "step": 9080 }, { "epoch": 17.286733238231097, "grad_norm": 0.11507826298475266, "learning_rate": 4.2356303588440776e-05, "loss": 0.0638, "step": 9081 }, { "epoch": 17.28863528292915, "grad_norm": 0.11006578058004379, "learning_rate": 4.234995236583043e-05, "loss": 0.0569, "step": 9082 }, { "epoch": 17.2905373276272, "grad_norm": 0.09880606085062027, "learning_rate": 4.234360114322007e-05, "loss": 0.0535, "step": 9083 }, { "epoch": 17.292439372325248, "grad_norm": 0.13434161245822906, "learning_rate": 4.233724992060972e-05, "loss": 0.0501, "step": 9084 }, { "epoch": 17.2943414170233, "grad_norm": 0.08274086564779282, "learning_rate": 4.233089869799936e-05, "loss": 0.0626, "step": 9085 }, { "epoch": 17.29624346172135, "grad_norm": 0.11226063221693039, "learning_rate": 4.2324547475389015e-05, "loss": 0.0591, "step": 9086 }, { "epoch": 17.298145506419402, "grad_norm": 0.14939825236797333, "learning_rate": 4.231819625277867e-05, "loss": 0.0635, "step": 9087 }, { "epoch": 17.30004755111745, "grad_norm": 0.18860632181167603, "learning_rate": 4.2311845030168305e-05, "loss": 0.0735, "step": 9088 }, { "epoch": 17.3019495958155, "grad_norm": 0.05516693368554115, "learning_rate": 4.230549380755796e-05, "loss": 0.0582, "step": 9089 }, { "epoch": 17.303851640513553, "grad_norm": 0.16363638639450073, "learning_rate": 4.22991425849476e-05, "loss": 0.0537, "step": 9090 }, { "epoch": 17.305753685211602, "grad_norm": 0.11976895481348038, "learning_rate": 4.2292791362337254e-05, "loss": 0.0503, "step": 9091 }, { "epoch": 17.30765572990965, "grad_norm": 0.13421820104122162, "learning_rate": 4.22864401397269e-05, "loss": 0.0905, "step": 9092 }, { "epoch": 17.309557774607704, "grad_norm": 0.18360990285873413, "learning_rate": 4.2280088917116544e-05, "loss": 0.0594, "step": 9093 }, { "epoch": 17.311459819305753, "grad_norm": 0.09387153387069702, "learning_rate": 4.2273737694506196e-05, "loss": 0.0587, "step": 9094 }, { "epoch": 17.313361864003806, "grad_norm": 0.060100257396698, "learning_rate": 4.226738647189584e-05, "loss": 0.0604, "step": 9095 }, { "epoch": 17.315263908701855, "grad_norm": 0.16482652723789215, "learning_rate": 4.226103524928549e-05, "loss": 0.0586, "step": 9096 }, { "epoch": 17.317165953399904, "grad_norm": 0.16404573619365692, "learning_rate": 4.225468402667513e-05, "loss": 0.0553, "step": 9097 }, { "epoch": 17.319067998097957, "grad_norm": 0.1388007551431656, "learning_rate": 4.224833280406478e-05, "loss": 0.0633, "step": 9098 }, { "epoch": 17.320970042796006, "grad_norm": 0.04446406289935112, "learning_rate": 4.2241981581454435e-05, "loss": 0.0445, "step": 9099 }, { "epoch": 17.322872087494055, "grad_norm": 0.06030283495783806, "learning_rate": 4.223563035884408e-05, "loss": 0.0517, "step": 9100 }, { "epoch": 17.324774132192108, "grad_norm": 0.1730167716741562, "learning_rate": 4.222927913623373e-05, "loss": 0.0596, "step": 9101 }, { "epoch": 17.326676176890157, "grad_norm": 0.13487887382507324, "learning_rate": 4.222292791362337e-05, "loss": 0.0545, "step": 9102 }, { "epoch": 17.328578221588206, "grad_norm": 0.05873151496052742, "learning_rate": 4.221657669101302e-05, "loss": 0.0553, "step": 9103 }, { "epoch": 17.33048026628626, "grad_norm": 0.1045122891664505, "learning_rate": 4.221022546840267e-05, "loss": 0.0678, "step": 9104 }, { "epoch": 17.332382310984308, "grad_norm": 0.06493869423866272, "learning_rate": 4.220387424579232e-05, "loss": 0.0556, "step": 9105 }, { "epoch": 17.33428435568236, "grad_norm": 0.09537652879953384, "learning_rate": 4.2197523023181964e-05, "loss": 0.0442, "step": 9106 }, { "epoch": 17.33618640038041, "grad_norm": 0.15815818309783936, "learning_rate": 4.219117180057161e-05, "loss": 0.0654, "step": 9107 }, { "epoch": 17.33808844507846, "grad_norm": 0.11845643073320389, "learning_rate": 4.218482057796126e-05, "loss": 0.0905, "step": 9108 }, { "epoch": 17.33999048977651, "grad_norm": 0.06879471242427826, "learning_rate": 4.2178469355350906e-05, "loss": 0.0449, "step": 9109 }, { "epoch": 17.34189253447456, "grad_norm": 0.05066360533237457, "learning_rate": 4.217211813274056e-05, "loss": 0.0496, "step": 9110 }, { "epoch": 17.34379457917261, "grad_norm": 0.041844941675662994, "learning_rate": 4.21657669101302e-05, "loss": 0.0498, "step": 9111 }, { "epoch": 17.345696623870662, "grad_norm": 0.19003881514072418, "learning_rate": 4.215941568751985e-05, "loss": 0.0543, "step": 9112 }, { "epoch": 17.34759866856871, "grad_norm": 0.09213196486234665, "learning_rate": 4.21530644649095e-05, "loss": 0.0466, "step": 9113 }, { "epoch": 17.34950071326676, "grad_norm": 0.14282000064849854, "learning_rate": 4.2146713242299144e-05, "loss": 0.0569, "step": 9114 }, { "epoch": 17.351402757964813, "grad_norm": 0.05803923308849335, "learning_rate": 4.214036201968879e-05, "loss": 0.037, "step": 9115 }, { "epoch": 17.353304802662862, "grad_norm": 0.07661207020282745, "learning_rate": 4.213401079707844e-05, "loss": 0.0388, "step": 9116 }, { "epoch": 17.355206847360915, "grad_norm": 0.05535397306084633, "learning_rate": 4.2127659574468086e-05, "loss": 0.0336, "step": 9117 }, { "epoch": 17.357108892058964, "grad_norm": 0.13280822336673737, "learning_rate": 4.212130835185774e-05, "loss": 0.0639, "step": 9118 }, { "epoch": 17.359010936757013, "grad_norm": 0.12745091319084167, "learning_rate": 4.211495712924738e-05, "loss": 0.0723, "step": 9119 }, { "epoch": 17.360912981455066, "grad_norm": 0.1197303980588913, "learning_rate": 4.210860590663703e-05, "loss": 0.0559, "step": 9120 }, { "epoch": 17.362815026153115, "grad_norm": 0.24809758365154266, "learning_rate": 4.2102254684026673e-05, "loss": 0.0603, "step": 9121 }, { "epoch": 17.364717070851164, "grad_norm": 0.10350430756807327, "learning_rate": 4.2095903461416325e-05, "loss": 0.0623, "step": 9122 }, { "epoch": 17.366619115549216, "grad_norm": 0.1336364895105362, "learning_rate": 4.208955223880597e-05, "loss": 0.0749, "step": 9123 }, { "epoch": 17.368521160247266, "grad_norm": 0.07633499056100845, "learning_rate": 4.2083201016195615e-05, "loss": 0.0486, "step": 9124 }, { "epoch": 17.370423204945315, "grad_norm": 0.06507447361946106, "learning_rate": 4.207684979358527e-05, "loss": 0.0404, "step": 9125 }, { "epoch": 17.372325249643367, "grad_norm": 0.19423268735408783, "learning_rate": 4.207049857097491e-05, "loss": 0.0531, "step": 9126 }, { "epoch": 17.374227294341416, "grad_norm": 0.14882993698120117, "learning_rate": 4.2064147348364564e-05, "loss": 0.0511, "step": 9127 }, { "epoch": 17.37612933903947, "grad_norm": 0.10786750912666321, "learning_rate": 4.205779612575421e-05, "loss": 0.0769, "step": 9128 }, { "epoch": 17.378031383737518, "grad_norm": 0.12144813686609268, "learning_rate": 4.2051444903143854e-05, "loss": 0.0612, "step": 9129 }, { "epoch": 17.379933428435567, "grad_norm": 0.07385139167308807, "learning_rate": 4.2045093680533506e-05, "loss": 0.0645, "step": 9130 }, { "epoch": 17.38183547313362, "grad_norm": 0.12522324919700623, "learning_rate": 4.203874245792315e-05, "loss": 0.0597, "step": 9131 }, { "epoch": 17.38373751783167, "grad_norm": 0.1233920082449913, "learning_rate": 4.20323912353128e-05, "loss": 0.0779, "step": 9132 }, { "epoch": 17.385639562529718, "grad_norm": 0.12006913125514984, "learning_rate": 4.202604001270244e-05, "loss": 0.0399, "step": 9133 }, { "epoch": 17.38754160722777, "grad_norm": 0.14210516214370728, "learning_rate": 4.201968879009209e-05, "loss": 0.0545, "step": 9134 }, { "epoch": 17.38944365192582, "grad_norm": 0.08909688889980316, "learning_rate": 4.2013337567481745e-05, "loss": 0.0703, "step": 9135 }, { "epoch": 17.39134569662387, "grad_norm": 0.06299816071987152, "learning_rate": 4.200698634487139e-05, "loss": 0.043, "step": 9136 }, { "epoch": 17.39324774132192, "grad_norm": 0.16345307230949402, "learning_rate": 4.200063512226104e-05, "loss": 0.0625, "step": 9137 }, { "epoch": 17.39514978601997, "grad_norm": 0.14235182106494904, "learning_rate": 4.199428389965068e-05, "loss": 0.0742, "step": 9138 }, { "epoch": 17.397051830718024, "grad_norm": 0.14397858083248138, "learning_rate": 4.198793267704033e-05, "loss": 0.069, "step": 9139 }, { "epoch": 17.398953875416073, "grad_norm": 0.10805745422840118, "learning_rate": 4.198158145442998e-05, "loss": 0.0615, "step": 9140 }, { "epoch": 17.40085592011412, "grad_norm": 0.19101940095424652, "learning_rate": 4.197523023181963e-05, "loss": 0.0384, "step": 9141 }, { "epoch": 17.402757964812174, "grad_norm": 0.15251103043556213, "learning_rate": 4.1968879009209274e-05, "loss": 0.0377, "step": 9142 }, { "epoch": 17.404660009510224, "grad_norm": 0.11683591455221176, "learning_rate": 4.196252778659892e-05, "loss": 0.0575, "step": 9143 }, { "epoch": 17.406562054208273, "grad_norm": 0.11389310657978058, "learning_rate": 4.195617656398857e-05, "loss": 0.0598, "step": 9144 }, { "epoch": 17.408464098906325, "grad_norm": 0.12031054496765137, "learning_rate": 4.1949825341378216e-05, "loss": 0.0459, "step": 9145 }, { "epoch": 17.410366143604374, "grad_norm": 0.18291601538658142, "learning_rate": 4.194347411876787e-05, "loss": 0.0553, "step": 9146 }, { "epoch": 17.412268188302424, "grad_norm": 0.12735716998577118, "learning_rate": 4.193712289615751e-05, "loss": 0.0358, "step": 9147 }, { "epoch": 17.414170233000476, "grad_norm": 0.15188108384609222, "learning_rate": 4.193077167354716e-05, "loss": 0.0463, "step": 9148 }, { "epoch": 17.416072277698525, "grad_norm": 0.14176872372627258, "learning_rate": 4.192442045093681e-05, "loss": 0.0633, "step": 9149 }, { "epoch": 17.417974322396578, "grad_norm": 0.17167988419532776, "learning_rate": 4.1918069228326455e-05, "loss": 0.0494, "step": 9150 }, { "epoch": 17.419876367094627, "grad_norm": 0.07536623626947403, "learning_rate": 4.19117180057161e-05, "loss": 0.0484, "step": 9151 }, { "epoch": 17.421778411792676, "grad_norm": 0.1476680040359497, "learning_rate": 4.1905366783105745e-05, "loss": 0.0615, "step": 9152 }, { "epoch": 17.42368045649073, "grad_norm": 0.09555692970752716, "learning_rate": 4.18990155604954e-05, "loss": 0.0685, "step": 9153 }, { "epoch": 17.425582501188778, "grad_norm": 0.18903157114982605, "learning_rate": 4.189266433788505e-05, "loss": 0.0547, "step": 9154 }, { "epoch": 17.427484545886827, "grad_norm": 0.13380809128284454, "learning_rate": 4.1886313115274694e-05, "loss": 0.0475, "step": 9155 }, { "epoch": 17.42938659058488, "grad_norm": 0.20205047726631165, "learning_rate": 4.187996189266434e-05, "loss": 0.064, "step": 9156 }, { "epoch": 17.43128863528293, "grad_norm": 0.15946093201637268, "learning_rate": 4.1873610670053984e-05, "loss": 0.0455, "step": 9157 }, { "epoch": 17.433190679980978, "grad_norm": 0.10756520926952362, "learning_rate": 4.1867259447443636e-05, "loss": 0.0474, "step": 9158 }, { "epoch": 17.43509272467903, "grad_norm": 0.07083479315042496, "learning_rate": 4.186090822483328e-05, "loss": 0.0591, "step": 9159 }, { "epoch": 17.43699476937708, "grad_norm": 0.21123982965946198, "learning_rate": 4.1854557002222926e-05, "loss": 0.0682, "step": 9160 }, { "epoch": 17.438896814075132, "grad_norm": 0.15162432193756104, "learning_rate": 4.184820577961258e-05, "loss": 0.0763, "step": 9161 }, { "epoch": 17.44079885877318, "grad_norm": 0.11110099405050278, "learning_rate": 4.184185455700222e-05, "loss": 0.0495, "step": 9162 }, { "epoch": 17.44270090347123, "grad_norm": 0.12019350379705429, "learning_rate": 4.1835503334391875e-05, "loss": 0.0407, "step": 9163 }, { "epoch": 17.444602948169283, "grad_norm": 0.166811004281044, "learning_rate": 4.182915211178152e-05, "loss": 0.0632, "step": 9164 }, { "epoch": 17.446504992867332, "grad_norm": 0.12711088359355927, "learning_rate": 4.1822800889171165e-05, "loss": 0.0347, "step": 9165 }, { "epoch": 17.44840703756538, "grad_norm": 0.09420914947986603, "learning_rate": 4.181644966656082e-05, "loss": 0.0427, "step": 9166 }, { "epoch": 17.450309082263434, "grad_norm": 0.12314459681510925, "learning_rate": 4.181009844395046e-05, "loss": 0.0591, "step": 9167 }, { "epoch": 17.452211126961483, "grad_norm": 0.05265713855624199, "learning_rate": 4.1803747221340114e-05, "loss": 0.0378, "step": 9168 }, { "epoch": 17.454113171659532, "grad_norm": 0.08246375620365143, "learning_rate": 4.179739599872975e-05, "loss": 0.0409, "step": 9169 }, { "epoch": 17.456015216357585, "grad_norm": 0.08644389361143112, "learning_rate": 4.1791044776119404e-05, "loss": 0.0511, "step": 9170 }, { "epoch": 17.457917261055634, "grad_norm": 0.10643388330936432, "learning_rate": 4.1784693553509056e-05, "loss": 0.0592, "step": 9171 }, { "epoch": 17.459819305753687, "grad_norm": 0.1023353561758995, "learning_rate": 4.17783423308987e-05, "loss": 0.0657, "step": 9172 }, { "epoch": 17.461721350451736, "grad_norm": 0.2840350270271301, "learning_rate": 4.177199110828835e-05, "loss": 0.0902, "step": 9173 }, { "epoch": 17.463623395149785, "grad_norm": 0.23073290288448334, "learning_rate": 4.176563988567799e-05, "loss": 0.0592, "step": 9174 }, { "epoch": 17.465525439847838, "grad_norm": 0.15955200791358948, "learning_rate": 4.175928866306764e-05, "loss": 0.0418, "step": 9175 }, { "epoch": 17.467427484545887, "grad_norm": 0.17804458737373352, "learning_rate": 4.175293744045729e-05, "loss": 0.0592, "step": 9176 }, { "epoch": 17.469329529243936, "grad_norm": 0.055416397750377655, "learning_rate": 4.174658621784694e-05, "loss": 0.0351, "step": 9177 }, { "epoch": 17.47123157394199, "grad_norm": 0.10548324137926102, "learning_rate": 4.1740234995236585e-05, "loss": 0.0484, "step": 9178 }, { "epoch": 17.473133618640038, "grad_norm": 0.06586059927940369, "learning_rate": 4.173388377262623e-05, "loss": 0.0719, "step": 9179 }, { "epoch": 17.475035663338087, "grad_norm": 0.10012559592723846, "learning_rate": 4.172753255001588e-05, "loss": 0.0842, "step": 9180 }, { "epoch": 17.47693770803614, "grad_norm": 0.13187265396118164, "learning_rate": 4.1721181327405526e-05, "loss": 0.0786, "step": 9181 }, { "epoch": 17.47883975273419, "grad_norm": 0.08381135761737823, "learning_rate": 4.171483010479518e-05, "loss": 0.0743, "step": 9182 }, { "epoch": 17.48074179743224, "grad_norm": 0.1986529380083084, "learning_rate": 4.1708478882184823e-05, "loss": 0.0751, "step": 9183 }, { "epoch": 17.48264384213029, "grad_norm": 0.12799467146396637, "learning_rate": 4.170212765957447e-05, "loss": 0.0566, "step": 9184 }, { "epoch": 17.48454588682834, "grad_norm": 0.2495906800031662, "learning_rate": 4.169577643696412e-05, "loss": 0.0542, "step": 9185 }, { "epoch": 17.486447931526392, "grad_norm": 0.0946408286690712, "learning_rate": 4.1689425214353765e-05, "loss": 0.0524, "step": 9186 }, { "epoch": 17.48834997622444, "grad_norm": 0.16105763614177704, "learning_rate": 4.168307399174341e-05, "loss": 0.0692, "step": 9187 }, { "epoch": 17.49025202092249, "grad_norm": 0.12365740537643433, "learning_rate": 4.1676722769133056e-05, "loss": 0.0774, "step": 9188 }, { "epoch": 17.492154065620543, "grad_norm": 0.231750950217247, "learning_rate": 4.167037154652271e-05, "loss": 0.0684, "step": 9189 }, { "epoch": 17.494056110318592, "grad_norm": 0.19699423015117645, "learning_rate": 4.166402032391236e-05, "loss": 0.0652, "step": 9190 }, { "epoch": 17.49595815501664, "grad_norm": 0.11165280640125275, "learning_rate": 4.1657669101302004e-05, "loss": 0.0556, "step": 9191 }, { "epoch": 17.497860199714694, "grad_norm": 0.13366112112998962, "learning_rate": 4.165131787869165e-05, "loss": 0.0587, "step": 9192 }, { "epoch": 17.499762244412743, "grad_norm": 0.07594496011734009, "learning_rate": 4.1644966656081294e-05, "loss": 0.0555, "step": 9193 }, { "epoch": 17.501664289110796, "grad_norm": 0.14714041352272034, "learning_rate": 4.1638615433470946e-05, "loss": 0.073, "step": 9194 }, { "epoch": 17.503566333808845, "grad_norm": 0.08447927236557007, "learning_rate": 4.163226421086059e-05, "loss": 0.0683, "step": 9195 }, { "epoch": 17.505468378506894, "grad_norm": 0.09991919994354248, "learning_rate": 4.1625912988250236e-05, "loss": 0.0686, "step": 9196 }, { "epoch": 17.507370423204947, "grad_norm": 0.055409085005521774, "learning_rate": 4.161956176563989e-05, "loss": 0.0458, "step": 9197 }, { "epoch": 17.509272467902996, "grad_norm": 0.13919416069984436, "learning_rate": 4.161321054302953e-05, "loss": 0.0596, "step": 9198 }, { "epoch": 17.511174512601045, "grad_norm": 0.14709287881851196, "learning_rate": 4.1606859320419185e-05, "loss": 0.0463, "step": 9199 }, { "epoch": 17.513076557299097, "grad_norm": 0.06390659511089325, "learning_rate": 4.160050809780883e-05, "loss": 0.0631, "step": 9200 }, { "epoch": 17.514978601997147, "grad_norm": 0.08628946542739868, "learning_rate": 4.1594156875198475e-05, "loss": 0.0562, "step": 9201 }, { "epoch": 17.516880646695196, "grad_norm": 0.06527147442102432, "learning_rate": 4.158780565258813e-05, "loss": 0.052, "step": 9202 }, { "epoch": 17.51878269139325, "grad_norm": 0.1150786429643631, "learning_rate": 4.158145442997777e-05, "loss": 0.064, "step": 9203 }, { "epoch": 17.520684736091297, "grad_norm": 0.17034420371055603, "learning_rate": 4.1575103207367424e-05, "loss": 0.079, "step": 9204 }, { "epoch": 17.52258678078935, "grad_norm": 0.07963655143976212, "learning_rate": 4.156875198475706e-05, "loss": 0.0377, "step": 9205 }, { "epoch": 17.5244888254874, "grad_norm": 0.17314720153808594, "learning_rate": 4.1562400762146714e-05, "loss": 0.0445, "step": 9206 }, { "epoch": 17.52639087018545, "grad_norm": 0.09530575573444366, "learning_rate": 4.155604953953636e-05, "loss": 0.056, "step": 9207 }, { "epoch": 17.5282929148835, "grad_norm": 0.1464606672525406, "learning_rate": 4.154969831692601e-05, "loss": 0.0531, "step": 9208 }, { "epoch": 17.53019495958155, "grad_norm": 0.15889886021614075, "learning_rate": 4.154334709431566e-05, "loss": 0.05, "step": 9209 }, { "epoch": 17.5320970042796, "grad_norm": 0.11242558807134628, "learning_rate": 4.15369958717053e-05, "loss": 0.0485, "step": 9210 }, { "epoch": 17.533999048977652, "grad_norm": 0.05474596098065376, "learning_rate": 4.153064464909495e-05, "loss": 0.0551, "step": 9211 }, { "epoch": 17.5359010936757, "grad_norm": 0.06351004540920258, "learning_rate": 4.15242934264846e-05, "loss": 0.0738, "step": 9212 }, { "epoch": 17.537803138373754, "grad_norm": 0.17597231268882751, "learning_rate": 4.151794220387425e-05, "loss": 0.044, "step": 9213 }, { "epoch": 17.539705183071803, "grad_norm": 0.05791318044066429, "learning_rate": 4.1511590981263895e-05, "loss": 0.0631, "step": 9214 }, { "epoch": 17.541607227769852, "grad_norm": 0.1329626441001892, "learning_rate": 4.150523975865354e-05, "loss": 0.0715, "step": 9215 }, { "epoch": 17.543509272467904, "grad_norm": 0.07686080783605576, "learning_rate": 4.149888853604319e-05, "loss": 0.0579, "step": 9216 }, { "epoch": 17.545411317165954, "grad_norm": 0.14616870880126953, "learning_rate": 4.149253731343284e-05, "loss": 0.0482, "step": 9217 }, { "epoch": 17.547313361864003, "grad_norm": 0.14873604476451874, "learning_rate": 4.148618609082249e-05, "loss": 0.0594, "step": 9218 }, { "epoch": 17.549215406562055, "grad_norm": 0.0986090674996376, "learning_rate": 4.1479834868212134e-05, "loss": 0.0768, "step": 9219 }, { "epoch": 17.551117451260104, "grad_norm": 0.11944249272346497, "learning_rate": 4.147348364560178e-05, "loss": 0.0506, "step": 9220 }, { "epoch": 17.553019495958154, "grad_norm": 0.06361818313598633, "learning_rate": 4.146713242299143e-05, "loss": 0.0476, "step": 9221 }, { "epoch": 17.554921540656206, "grad_norm": 0.1397957056760788, "learning_rate": 4.1460781200381076e-05, "loss": 0.0636, "step": 9222 }, { "epoch": 17.556823585354255, "grad_norm": 0.08839410543441772, "learning_rate": 4.145442997777072e-05, "loss": 0.0517, "step": 9223 }, { "epoch": 17.558725630052308, "grad_norm": 0.24125997722148895, "learning_rate": 4.1448078755160366e-05, "loss": 0.0421, "step": 9224 }, { "epoch": 17.560627674750357, "grad_norm": 0.07652424275875092, "learning_rate": 4.144172753255002e-05, "loss": 0.0591, "step": 9225 }, { "epoch": 17.562529719448406, "grad_norm": 0.08479491621255875, "learning_rate": 4.143537630993967e-05, "loss": 0.0524, "step": 9226 }, { "epoch": 17.56443176414646, "grad_norm": 0.15499165654182434, "learning_rate": 4.1429025087329315e-05, "loss": 0.0569, "step": 9227 }, { "epoch": 17.566333808844508, "grad_norm": 0.1465625762939453, "learning_rate": 4.142267386471896e-05, "loss": 0.0578, "step": 9228 }, { "epoch": 17.568235853542557, "grad_norm": 0.19261913001537323, "learning_rate": 4.1416322642108605e-05, "loss": 0.049, "step": 9229 }, { "epoch": 17.57013789824061, "grad_norm": 0.1542554348707199, "learning_rate": 4.140997141949826e-05, "loss": 0.0541, "step": 9230 }, { "epoch": 17.57203994293866, "grad_norm": 0.12585055828094482, "learning_rate": 4.14036201968879e-05, "loss": 0.0514, "step": 9231 }, { "epoch": 17.573941987636708, "grad_norm": 0.06084693223237991, "learning_rate": 4.139726897427755e-05, "loss": 0.0485, "step": 9232 }, { "epoch": 17.57584403233476, "grad_norm": 0.06361477077007294, "learning_rate": 4.13909177516672e-05, "loss": 0.0597, "step": 9233 }, { "epoch": 17.57774607703281, "grad_norm": 0.14518912136554718, "learning_rate": 4.1384566529056844e-05, "loss": 0.0767, "step": 9234 }, { "epoch": 17.579648121730862, "grad_norm": 0.14766274392604828, "learning_rate": 4.1378215306446496e-05, "loss": 0.0475, "step": 9235 }, { "epoch": 17.58155016642891, "grad_norm": 0.15703773498535156, "learning_rate": 4.137186408383614e-05, "loss": 0.0614, "step": 9236 }, { "epoch": 17.58345221112696, "grad_norm": 0.17378023266792297, "learning_rate": 4.1365512861225786e-05, "loss": 0.0478, "step": 9237 }, { "epoch": 17.585354255825013, "grad_norm": 0.18739935755729675, "learning_rate": 4.135916163861544e-05, "loss": 0.0629, "step": 9238 }, { "epoch": 17.587256300523062, "grad_norm": 0.10931984335184097, "learning_rate": 4.135281041600508e-05, "loss": 0.0717, "step": 9239 }, { "epoch": 17.58915834522111, "grad_norm": 0.23572999238967896, "learning_rate": 4.1346459193394734e-05, "loss": 0.0562, "step": 9240 }, { "epoch": 17.591060389919164, "grad_norm": 0.2369711995124817, "learning_rate": 4.134010797078437e-05, "loss": 0.0428, "step": 9241 }, { "epoch": 17.592962434617213, "grad_norm": 0.14604787528514862, "learning_rate": 4.1333756748174025e-05, "loss": 0.0603, "step": 9242 }, { "epoch": 17.594864479315262, "grad_norm": 0.06450549513101578, "learning_rate": 4.132740552556367e-05, "loss": 0.0687, "step": 9243 }, { "epoch": 17.596766524013315, "grad_norm": 0.10120340436697006, "learning_rate": 4.132105430295332e-05, "loss": 0.0708, "step": 9244 }, { "epoch": 17.598668568711364, "grad_norm": 0.09536679089069366, "learning_rate": 4.131470308034297e-05, "loss": 0.0538, "step": 9245 }, { "epoch": 17.600570613409417, "grad_norm": 0.13788338005542755, "learning_rate": 4.130835185773261e-05, "loss": 0.0704, "step": 9246 }, { "epoch": 17.602472658107466, "grad_norm": 0.1518242061138153, "learning_rate": 4.1302000635122263e-05, "loss": 0.0709, "step": 9247 }, { "epoch": 17.604374702805515, "grad_norm": 0.18178845942020416, "learning_rate": 4.129564941251191e-05, "loss": 0.0732, "step": 9248 }, { "epoch": 17.606276747503568, "grad_norm": 0.06598396599292755, "learning_rate": 4.128929818990156e-05, "loss": 0.0535, "step": 9249 }, { "epoch": 17.608178792201617, "grad_norm": 0.07990466803312302, "learning_rate": 4.1282946967291205e-05, "loss": 0.0512, "step": 9250 }, { "epoch": 17.610080836899666, "grad_norm": 0.07749851047992706, "learning_rate": 4.127659574468085e-05, "loss": 0.0499, "step": 9251 }, { "epoch": 17.61198288159772, "grad_norm": 0.07298611849546432, "learning_rate": 4.12702445220705e-05, "loss": 0.0527, "step": 9252 }, { "epoch": 17.613884926295768, "grad_norm": 0.06433837860822678, "learning_rate": 4.126389329946015e-05, "loss": 0.0421, "step": 9253 }, { "epoch": 17.615786970993817, "grad_norm": 0.13365700840950012, "learning_rate": 4.12575420768498e-05, "loss": 0.057, "step": 9254 }, { "epoch": 17.61768901569187, "grad_norm": 0.16515879333019257, "learning_rate": 4.1251190854239444e-05, "loss": 0.0356, "step": 9255 }, { "epoch": 17.61959106038992, "grad_norm": 0.16000066697597504, "learning_rate": 4.124483963162909e-05, "loss": 0.0581, "step": 9256 }, { "epoch": 17.62149310508797, "grad_norm": 0.09683466702699661, "learning_rate": 4.123848840901874e-05, "loss": 0.0531, "step": 9257 }, { "epoch": 17.62339514978602, "grad_norm": 0.1364901065826416, "learning_rate": 4.1232137186408386e-05, "loss": 0.066, "step": 9258 }, { "epoch": 17.62529719448407, "grad_norm": 0.07379920780658722, "learning_rate": 4.122578596379803e-05, "loss": 0.0489, "step": 9259 }, { "epoch": 17.627199239182122, "grad_norm": 0.13212312757968903, "learning_rate": 4.1219434741187676e-05, "loss": 0.0741, "step": 9260 }, { "epoch": 17.62910128388017, "grad_norm": 0.14788813889026642, "learning_rate": 4.121308351857733e-05, "loss": 0.0576, "step": 9261 }, { "epoch": 17.63100332857822, "grad_norm": 0.0804387703537941, "learning_rate": 4.120673229596697e-05, "loss": 0.0426, "step": 9262 }, { "epoch": 17.632905373276273, "grad_norm": 0.14654266834259033, "learning_rate": 4.1200381073356625e-05, "loss": 0.0627, "step": 9263 }, { "epoch": 17.634807417974322, "grad_norm": 0.08965173363685608, "learning_rate": 4.119402985074627e-05, "loss": 0.0574, "step": 9264 }, { "epoch": 17.63670946267237, "grad_norm": 0.17583389580249786, "learning_rate": 4.1187678628135915e-05, "loss": 0.0757, "step": 9265 }, { "epoch": 17.638611507370424, "grad_norm": 0.06688621640205383, "learning_rate": 4.118132740552557e-05, "loss": 0.0662, "step": 9266 }, { "epoch": 17.640513552068473, "grad_norm": 0.10981274396181107, "learning_rate": 4.117497618291521e-05, "loss": 0.0439, "step": 9267 }, { "epoch": 17.642415596766526, "grad_norm": 0.07107902318239212, "learning_rate": 4.116862496030486e-05, "loss": 0.0575, "step": 9268 }, { "epoch": 17.644317641464575, "grad_norm": 0.11891138553619385, "learning_rate": 4.116227373769451e-05, "loss": 0.0433, "step": 9269 }, { "epoch": 17.646219686162624, "grad_norm": 0.15733720362186432, "learning_rate": 4.1155922515084154e-05, "loss": 0.0527, "step": 9270 }, { "epoch": 17.648121730860677, "grad_norm": 0.12876959145069122, "learning_rate": 4.1149571292473806e-05, "loss": 0.0607, "step": 9271 }, { "epoch": 17.650023775558726, "grad_norm": 0.08375310152769089, "learning_rate": 4.114322006986345e-05, "loss": 0.06, "step": 9272 }, { "epoch": 17.651925820256775, "grad_norm": 0.21883268654346466, "learning_rate": 4.1136868847253096e-05, "loss": 0.0459, "step": 9273 }, { "epoch": 17.653827864954827, "grad_norm": 0.06427763402462006, "learning_rate": 4.113051762464275e-05, "loss": 0.0428, "step": 9274 }, { "epoch": 17.655729909652877, "grad_norm": 0.12494489550590515, "learning_rate": 4.112416640203239e-05, "loss": 0.0632, "step": 9275 }, { "epoch": 17.657631954350926, "grad_norm": 0.125833198428154, "learning_rate": 4.1117815179422045e-05, "loss": 0.0849, "step": 9276 }, { "epoch": 17.65953399904898, "grad_norm": 0.09488727897405624, "learning_rate": 4.111146395681168e-05, "loss": 0.0638, "step": 9277 }, { "epoch": 17.661436043747027, "grad_norm": 0.29696619510650635, "learning_rate": 4.1105112734201335e-05, "loss": 0.06, "step": 9278 }, { "epoch": 17.66333808844508, "grad_norm": 0.14339213073253632, "learning_rate": 4.109876151159098e-05, "loss": 0.0465, "step": 9279 }, { "epoch": 17.66524013314313, "grad_norm": 0.1306292712688446, "learning_rate": 4.109241028898063e-05, "loss": 0.0431, "step": 9280 }, { "epoch": 17.66714217784118, "grad_norm": 0.09760496765375137, "learning_rate": 4.1086059066370284e-05, "loss": 0.0697, "step": 9281 }, { "epoch": 17.66904422253923, "grad_norm": 0.19088013470172882, "learning_rate": 4.107970784375992e-05, "loss": 0.0653, "step": 9282 }, { "epoch": 17.67094626723728, "grad_norm": 0.09736175835132599, "learning_rate": 4.1073356621149574e-05, "loss": 0.0622, "step": 9283 }, { "epoch": 17.67284831193533, "grad_norm": 0.11242244392633438, "learning_rate": 4.106700539853922e-05, "loss": 0.0884, "step": 9284 }, { "epoch": 17.674750356633382, "grad_norm": 0.10703061521053314, "learning_rate": 4.106065417592887e-05, "loss": 0.0626, "step": 9285 }, { "epoch": 17.67665240133143, "grad_norm": 0.16119727492332458, "learning_rate": 4.1054302953318516e-05, "loss": 0.0697, "step": 9286 }, { "epoch": 17.67855444602948, "grad_norm": 0.09668602794408798, "learning_rate": 4.104795173070816e-05, "loss": 0.0612, "step": 9287 }, { "epoch": 17.680456490727533, "grad_norm": 0.1401471346616745, "learning_rate": 4.104160050809781e-05, "loss": 0.0609, "step": 9288 }, { "epoch": 17.682358535425582, "grad_norm": 0.05594095587730408, "learning_rate": 4.103524928548746e-05, "loss": 0.0497, "step": 9289 }, { "epoch": 17.684260580123635, "grad_norm": 0.11976776272058487, "learning_rate": 4.102889806287711e-05, "loss": 0.0902, "step": 9290 }, { "epoch": 17.686162624821684, "grad_norm": 0.211091086268425, "learning_rate": 4.102254684026675e-05, "loss": 0.0502, "step": 9291 }, { "epoch": 17.688064669519733, "grad_norm": 0.14465105533599854, "learning_rate": 4.10161956176564e-05, "loss": 0.0681, "step": 9292 }, { "epoch": 17.689966714217785, "grad_norm": 0.10496453195810318, "learning_rate": 4.100984439504605e-05, "loss": 0.0595, "step": 9293 }, { "epoch": 17.691868758915835, "grad_norm": 0.09541040658950806, "learning_rate": 4.10034931724357e-05, "loss": 0.0452, "step": 9294 }, { "epoch": 17.693770803613884, "grad_norm": 0.43040457367897034, "learning_rate": 4.099714194982534e-05, "loss": 0.0689, "step": 9295 }, { "epoch": 17.695672848311936, "grad_norm": 0.13549309968948364, "learning_rate": 4.099079072721499e-05, "loss": 0.0734, "step": 9296 }, { "epoch": 17.697574893009985, "grad_norm": 0.10545626282691956, "learning_rate": 4.098443950460464e-05, "loss": 0.0585, "step": 9297 }, { "epoch": 17.699476937708035, "grad_norm": 0.1298322230577469, "learning_rate": 4.0978088281994284e-05, "loss": 0.0664, "step": 9298 }, { "epoch": 17.701378982406087, "grad_norm": 0.09501194208860397, "learning_rate": 4.0971737059383936e-05, "loss": 0.0539, "step": 9299 }, { "epoch": 17.703281027104136, "grad_norm": 0.1124708279967308, "learning_rate": 4.096538583677358e-05, "loss": 0.0644, "step": 9300 }, { "epoch": 17.70518307180219, "grad_norm": 0.14791899919509888, "learning_rate": 4.0959034614163226e-05, "loss": 0.0434, "step": 9301 }, { "epoch": 17.707085116500238, "grad_norm": 0.08081545680761337, "learning_rate": 4.095268339155288e-05, "loss": 0.0393, "step": 9302 }, { "epoch": 17.708987161198287, "grad_norm": 0.2517786920070648, "learning_rate": 4.094633216894252e-05, "loss": 0.0839, "step": 9303 }, { "epoch": 17.71088920589634, "grad_norm": 0.10763931274414062, "learning_rate": 4.093998094633217e-05, "loss": 0.0427, "step": 9304 }, { "epoch": 17.71279125059439, "grad_norm": 0.1142471581697464, "learning_rate": 4.093362972372182e-05, "loss": 0.0615, "step": 9305 }, { "epoch": 17.714693295292438, "grad_norm": 0.07118445634841919, "learning_rate": 4.0927278501111465e-05, "loss": 0.0606, "step": 9306 }, { "epoch": 17.71659533999049, "grad_norm": 0.0904897153377533, "learning_rate": 4.0920927278501116e-05, "loss": 0.0464, "step": 9307 }, { "epoch": 17.71849738468854, "grad_norm": 0.203889861702919, "learning_rate": 4.091457605589076e-05, "loss": 0.1143, "step": 9308 }, { "epoch": 17.72039942938659, "grad_norm": 0.17758524417877197, "learning_rate": 4.0908224833280407e-05, "loss": 0.0507, "step": 9309 }, { "epoch": 17.72230147408464, "grad_norm": 0.2005939483642578, "learning_rate": 4.090187361067006e-05, "loss": 0.0502, "step": 9310 }, { "epoch": 17.72420351878269, "grad_norm": 0.06343390792608261, "learning_rate": 4.0895522388059703e-05, "loss": 0.0507, "step": 9311 }, { "epoch": 17.726105563480743, "grad_norm": 0.1854616403579712, "learning_rate": 4.0889171165449355e-05, "loss": 0.0696, "step": 9312 }, { "epoch": 17.728007608178793, "grad_norm": 0.08541128039360046, "learning_rate": 4.0882819942838994e-05, "loss": 0.0475, "step": 9313 }, { "epoch": 17.72990965287684, "grad_norm": 0.12330986559391022, "learning_rate": 4.0876468720228645e-05, "loss": 0.0585, "step": 9314 }, { "epoch": 17.731811697574894, "grad_norm": 0.12485690414905548, "learning_rate": 4.087011749761829e-05, "loss": 0.0476, "step": 9315 }, { "epoch": 17.733713742272943, "grad_norm": 0.08095765858888626, "learning_rate": 4.086376627500794e-05, "loss": 0.057, "step": 9316 }, { "epoch": 17.735615786970993, "grad_norm": 0.1634143441915512, "learning_rate": 4.085741505239759e-05, "loss": 0.0732, "step": 9317 }, { "epoch": 17.737517831669045, "grad_norm": 0.1279916763305664, "learning_rate": 4.085106382978723e-05, "loss": 0.1143, "step": 9318 }, { "epoch": 17.739419876367094, "grad_norm": 0.11108207702636719, "learning_rate": 4.0844712607176884e-05, "loss": 0.0565, "step": 9319 }, { "epoch": 17.741321921065143, "grad_norm": 0.056614622473716736, "learning_rate": 4.083836138456653e-05, "loss": 0.0423, "step": 9320 }, { "epoch": 17.743223965763196, "grad_norm": 0.11688531935214996, "learning_rate": 4.083201016195618e-05, "loss": 0.0519, "step": 9321 }, { "epoch": 17.745126010461245, "grad_norm": 0.11915720999240875, "learning_rate": 4.0825658939345826e-05, "loss": 0.0385, "step": 9322 }, { "epoch": 17.747028055159298, "grad_norm": 0.12320053577423096, "learning_rate": 4.081930771673547e-05, "loss": 0.0548, "step": 9323 }, { "epoch": 17.748930099857347, "grad_norm": 0.11300956457853317, "learning_rate": 4.081295649412512e-05, "loss": 0.0758, "step": 9324 }, { "epoch": 17.750832144555396, "grad_norm": 0.1921488642692566, "learning_rate": 4.080660527151477e-05, "loss": 0.0541, "step": 9325 }, { "epoch": 17.75273418925345, "grad_norm": 0.128909632563591, "learning_rate": 4.080025404890442e-05, "loss": 0.0445, "step": 9326 }, { "epoch": 17.754636233951498, "grad_norm": 0.23205921053886414, "learning_rate": 4.079390282629406e-05, "loss": 0.0938, "step": 9327 }, { "epoch": 17.756538278649547, "grad_norm": 0.15705791115760803, "learning_rate": 4.078755160368371e-05, "loss": 0.0517, "step": 9328 }, { "epoch": 17.7584403233476, "grad_norm": 0.11689947545528412, "learning_rate": 4.078120038107336e-05, "loss": 0.0541, "step": 9329 }, { "epoch": 17.76034236804565, "grad_norm": 0.09988643229007721, "learning_rate": 4.077484915846301e-05, "loss": 0.0467, "step": 9330 }, { "epoch": 17.762244412743698, "grad_norm": 0.09019043296575546, "learning_rate": 4.076849793585265e-05, "loss": 0.0545, "step": 9331 }, { "epoch": 17.76414645744175, "grad_norm": 0.22703583538532257, "learning_rate": 4.07621467132423e-05, "loss": 0.0712, "step": 9332 }, { "epoch": 17.7660485021398, "grad_norm": 0.10877406597137451, "learning_rate": 4.075579549063195e-05, "loss": 0.0526, "step": 9333 }, { "epoch": 17.767950546837852, "grad_norm": 0.11539573222398758, "learning_rate": 4.0749444268021594e-05, "loss": 0.0718, "step": 9334 }, { "epoch": 17.7698525915359, "grad_norm": 0.061567336320877075, "learning_rate": 4.0743093045411246e-05, "loss": 0.0614, "step": 9335 }, { "epoch": 17.77175463623395, "grad_norm": 0.18987290561199188, "learning_rate": 4.073674182280089e-05, "loss": 0.0762, "step": 9336 }, { "epoch": 17.773656680932003, "grad_norm": 0.28458330035209656, "learning_rate": 4.0730390600190536e-05, "loss": 0.0795, "step": 9337 }, { "epoch": 17.775558725630052, "grad_norm": 0.11308696120977402, "learning_rate": 4.072403937758019e-05, "loss": 0.0466, "step": 9338 }, { "epoch": 17.7774607703281, "grad_norm": 0.2028082013130188, "learning_rate": 4.071768815496983e-05, "loss": 0.0587, "step": 9339 }, { "epoch": 17.779362815026154, "grad_norm": 0.17155446112155914, "learning_rate": 4.071133693235948e-05, "loss": 0.0851, "step": 9340 }, { "epoch": 17.781264859724203, "grad_norm": 0.16016116738319397, "learning_rate": 4.070498570974913e-05, "loss": 0.0902, "step": 9341 }, { "epoch": 17.783166904422252, "grad_norm": 0.09539316594600677, "learning_rate": 4.0698634487138775e-05, "loss": 0.0589, "step": 9342 }, { "epoch": 17.785068949120305, "grad_norm": 0.1688692420721054, "learning_rate": 4.069228326452843e-05, "loss": 0.0627, "step": 9343 }, { "epoch": 17.786970993818354, "grad_norm": 0.0660916119813919, "learning_rate": 4.068593204191807e-05, "loss": 0.048, "step": 9344 }, { "epoch": 17.788873038516407, "grad_norm": 0.2147185355424881, "learning_rate": 4.067958081930772e-05, "loss": 0.058, "step": 9345 }, { "epoch": 17.790775083214456, "grad_norm": 0.07245621830224991, "learning_rate": 4.067322959669736e-05, "loss": 0.0511, "step": 9346 }, { "epoch": 17.792677127912505, "grad_norm": 0.12138759344816208, "learning_rate": 4.0666878374087014e-05, "loss": 0.0457, "step": 9347 }, { "epoch": 17.794579172610558, "grad_norm": 0.1689005345106125, "learning_rate": 4.0660527151476666e-05, "loss": 0.0657, "step": 9348 }, { "epoch": 17.796481217308607, "grad_norm": 0.10606629401445389, "learning_rate": 4.0654175928866304e-05, "loss": 0.0565, "step": 9349 }, { "epoch": 17.798383262006656, "grad_norm": 0.34530267119407654, "learning_rate": 4.0647824706255956e-05, "loss": 0.0664, "step": 9350 }, { "epoch": 17.80028530670471, "grad_norm": 0.14835722744464874, "learning_rate": 4.06414734836456e-05, "loss": 0.062, "step": 9351 }, { "epoch": 17.802187351402758, "grad_norm": 0.12061794847249985, "learning_rate": 4.063512226103525e-05, "loss": 0.0522, "step": 9352 }, { "epoch": 17.804089396100807, "grad_norm": 0.15052704513072968, "learning_rate": 4.06287710384249e-05, "loss": 0.052, "step": 9353 }, { "epoch": 17.80599144079886, "grad_norm": 0.12164096534252167, "learning_rate": 4.062241981581454e-05, "loss": 0.0635, "step": 9354 }, { "epoch": 17.80789348549691, "grad_norm": 0.05688644200563431, "learning_rate": 4.0616068593204195e-05, "loss": 0.0416, "step": 9355 }, { "epoch": 17.80979553019496, "grad_norm": 0.10969038307666779, "learning_rate": 4.060971737059384e-05, "loss": 0.0704, "step": 9356 }, { "epoch": 17.81169757489301, "grad_norm": 0.06995966285467148, "learning_rate": 4.060336614798349e-05, "loss": 0.0733, "step": 9357 }, { "epoch": 17.81359961959106, "grad_norm": 0.13542746007442474, "learning_rate": 4.059701492537314e-05, "loss": 0.0735, "step": 9358 }, { "epoch": 17.815501664289112, "grad_norm": 0.1096753180027008, "learning_rate": 4.059066370276278e-05, "loss": 0.0606, "step": 9359 }, { "epoch": 17.81740370898716, "grad_norm": 0.2971826195716858, "learning_rate": 4.0584312480152434e-05, "loss": 0.0839, "step": 9360 }, { "epoch": 17.81930575368521, "grad_norm": 0.17957726120948792, "learning_rate": 4.057796125754208e-05, "loss": 0.0581, "step": 9361 }, { "epoch": 17.821207798383263, "grad_norm": 0.1047806441783905, "learning_rate": 4.057161003493173e-05, "loss": 0.0479, "step": 9362 }, { "epoch": 17.823109843081312, "grad_norm": 0.15887323021888733, "learning_rate": 4.056525881232137e-05, "loss": 0.0457, "step": 9363 }, { "epoch": 17.82501188777936, "grad_norm": 0.15369363129138947, "learning_rate": 4.055890758971102e-05, "loss": 0.0684, "step": 9364 }, { "epoch": 17.826913932477414, "grad_norm": 0.19774940609931946, "learning_rate": 4.055255636710067e-05, "loss": 0.067, "step": 9365 }, { "epoch": 17.828815977175463, "grad_norm": 0.1530078649520874, "learning_rate": 4.054620514449032e-05, "loss": 0.0652, "step": 9366 }, { "epoch": 17.830718021873516, "grad_norm": 0.3438361585140228, "learning_rate": 4.053985392187996e-05, "loss": 0.0593, "step": 9367 }, { "epoch": 17.832620066571565, "grad_norm": 0.16979555785655975, "learning_rate": 4.053350269926961e-05, "loss": 0.0505, "step": 9368 }, { "epoch": 17.834522111269614, "grad_norm": 0.07902833074331284, "learning_rate": 4.052715147665926e-05, "loss": 0.0478, "step": 9369 }, { "epoch": 17.836424155967666, "grad_norm": 0.18646280467510223, "learning_rate": 4.0520800254048905e-05, "loss": 0.0504, "step": 9370 }, { "epoch": 17.838326200665716, "grad_norm": 0.17114807665348053, "learning_rate": 4.0514449031438556e-05, "loss": 0.0792, "step": 9371 }, { "epoch": 17.840228245363765, "grad_norm": 0.18604673445224762, "learning_rate": 4.05080978088282e-05, "loss": 0.0628, "step": 9372 }, { "epoch": 17.842130290061817, "grad_norm": 0.13605882227420807, "learning_rate": 4.0501746586217847e-05, "loss": 0.0522, "step": 9373 }, { "epoch": 17.844032334759866, "grad_norm": 0.11220191419124603, "learning_rate": 4.04953953636075e-05, "loss": 0.0601, "step": 9374 }, { "epoch": 17.845934379457915, "grad_norm": 0.09776952862739563, "learning_rate": 4.0489044140997144e-05, "loss": 0.0745, "step": 9375 }, { "epoch": 17.847836424155968, "grad_norm": 0.15183986723423004, "learning_rate": 4.048269291838679e-05, "loss": 0.0586, "step": 9376 }, { "epoch": 17.849738468854017, "grad_norm": 0.19326747953891754, "learning_rate": 4.047634169577644e-05, "loss": 0.0676, "step": 9377 }, { "epoch": 17.85164051355207, "grad_norm": 0.058094944804906845, "learning_rate": 4.0469990473166085e-05, "loss": 0.0496, "step": 9378 }, { "epoch": 17.85354255825012, "grad_norm": 0.14105942845344543, "learning_rate": 4.046363925055574e-05, "loss": 0.0498, "step": 9379 }, { "epoch": 17.855444602948168, "grad_norm": 0.2387823611497879, "learning_rate": 4.045728802794538e-05, "loss": 0.0693, "step": 9380 }, { "epoch": 17.85734664764622, "grad_norm": 0.10509767383337021, "learning_rate": 4.045093680533503e-05, "loss": 0.0575, "step": 9381 }, { "epoch": 17.85924869234427, "grad_norm": 0.18093208968639374, "learning_rate": 4.044458558272467e-05, "loss": 0.0415, "step": 9382 }, { "epoch": 17.86115073704232, "grad_norm": 0.19939205050468445, "learning_rate": 4.0438234360114324e-05, "loss": 0.0646, "step": 9383 }, { "epoch": 17.86305278174037, "grad_norm": 0.14076243340969086, "learning_rate": 4.0431883137503976e-05, "loss": 0.0718, "step": 9384 }, { "epoch": 17.86495482643842, "grad_norm": 0.20016847550868988, "learning_rate": 4.0425531914893614e-05, "loss": 0.055, "step": 9385 }, { "epoch": 17.86685687113647, "grad_norm": 0.0891173854470253, "learning_rate": 4.0419180692283266e-05, "loss": 0.057, "step": 9386 }, { "epoch": 17.868758915834523, "grad_norm": 0.1260504126548767, "learning_rate": 4.041282946967291e-05, "loss": 0.059, "step": 9387 }, { "epoch": 17.87066096053257, "grad_norm": 0.05717617645859718, "learning_rate": 4.040647824706256e-05, "loss": 0.0497, "step": 9388 }, { "epoch": 17.872563005230624, "grad_norm": 0.06866225600242615, "learning_rate": 4.040012702445221e-05, "loss": 0.0599, "step": 9389 }, { "epoch": 17.874465049928673, "grad_norm": 0.09405122697353363, "learning_rate": 4.039377580184185e-05, "loss": 0.0536, "step": 9390 }, { "epoch": 17.876367094626723, "grad_norm": 0.09114693105220795, "learning_rate": 4.0387424579231505e-05, "loss": 0.045, "step": 9391 }, { "epoch": 17.878269139324775, "grad_norm": 0.12476793676614761, "learning_rate": 4.038107335662115e-05, "loss": 0.0473, "step": 9392 }, { "epoch": 17.880171184022824, "grad_norm": 0.14439664781093597, "learning_rate": 4.03747221340108e-05, "loss": 0.0608, "step": 9393 }, { "epoch": 17.882073228720873, "grad_norm": 0.09330099076032639, "learning_rate": 4.036837091140045e-05, "loss": 0.0676, "step": 9394 }, { "epoch": 17.883975273418926, "grad_norm": 0.10382474213838577, "learning_rate": 4.036201968879009e-05, "loss": 0.049, "step": 9395 }, { "epoch": 17.885877318116975, "grad_norm": 0.22255682945251465, "learning_rate": 4.0355668466179744e-05, "loss": 0.0505, "step": 9396 }, { "epoch": 17.887779362815024, "grad_norm": 0.059081196784973145, "learning_rate": 4.034931724356939e-05, "loss": 0.0489, "step": 9397 }, { "epoch": 17.889681407513077, "grad_norm": 0.11481790244579315, "learning_rate": 4.034296602095904e-05, "loss": 0.0633, "step": 9398 }, { "epoch": 17.891583452211126, "grad_norm": 0.08961722254753113, "learning_rate": 4.033661479834868e-05, "loss": 0.0376, "step": 9399 }, { "epoch": 17.89348549690918, "grad_norm": 0.10771533101797104, "learning_rate": 4.033026357573833e-05, "loss": 0.0568, "step": 9400 }, { "epoch": 17.895387541607228, "grad_norm": 0.1264873445034027, "learning_rate": 4.0323912353127976e-05, "loss": 0.0677, "step": 9401 }, { "epoch": 17.897289586305277, "grad_norm": 0.12841086089611053, "learning_rate": 4.031756113051763e-05, "loss": 0.0479, "step": 9402 }, { "epoch": 17.89919163100333, "grad_norm": 0.09496669471263885, "learning_rate": 4.031120990790727e-05, "loss": 0.0567, "step": 9403 }, { "epoch": 17.90109367570138, "grad_norm": 0.17920030653476715, "learning_rate": 4.030485868529692e-05, "loss": 0.0715, "step": 9404 }, { "epoch": 17.902995720399428, "grad_norm": 0.1478681117296219, "learning_rate": 4.029850746268657e-05, "loss": 0.0614, "step": 9405 }, { "epoch": 17.90489776509748, "grad_norm": 0.22791436314582825, "learning_rate": 4.0292156240076215e-05, "loss": 0.0608, "step": 9406 }, { "epoch": 17.90679980979553, "grad_norm": 0.11851348727941513, "learning_rate": 4.028580501746587e-05, "loss": 0.0701, "step": 9407 }, { "epoch": 17.908701854493582, "grad_norm": 0.06529906392097473, "learning_rate": 4.027945379485551e-05, "loss": 0.0572, "step": 9408 }, { "epoch": 17.91060389919163, "grad_norm": 0.095278300344944, "learning_rate": 4.027310257224516e-05, "loss": 0.0657, "step": 9409 }, { "epoch": 17.91250594388968, "grad_norm": 0.12427160888910294, "learning_rate": 4.026675134963481e-05, "loss": 0.0339, "step": 9410 }, { "epoch": 17.914407988587733, "grad_norm": 0.23034276068210602, "learning_rate": 4.0260400127024454e-05, "loss": 0.0669, "step": 9411 }, { "epoch": 17.916310033285782, "grad_norm": 0.2623104155063629, "learning_rate": 4.02540489044141e-05, "loss": 0.0885, "step": 9412 }, { "epoch": 17.91821207798383, "grad_norm": 0.13643351197242737, "learning_rate": 4.024769768180375e-05, "loss": 0.0639, "step": 9413 }, { "epoch": 17.920114122681884, "grad_norm": 0.20017459988594055, "learning_rate": 4.0241346459193396e-05, "loss": 0.0479, "step": 9414 }, { "epoch": 17.922016167379933, "grad_norm": 0.08508337289094925, "learning_rate": 4.023499523658305e-05, "loss": 0.0467, "step": 9415 }, { "epoch": 17.923918212077982, "grad_norm": 0.09643582999706268, "learning_rate": 4.022864401397269e-05, "loss": 0.0551, "step": 9416 }, { "epoch": 17.925820256776035, "grad_norm": 0.28682664036750793, "learning_rate": 4.022229279136234e-05, "loss": 0.0963, "step": 9417 }, { "epoch": 17.927722301474084, "grad_norm": 0.14447623491287231, "learning_rate": 4.021594156875198e-05, "loss": 0.0735, "step": 9418 }, { "epoch": 17.929624346172137, "grad_norm": 0.04629140719771385, "learning_rate": 4.0209590346141635e-05, "loss": 0.0339, "step": 9419 }, { "epoch": 17.931526390870186, "grad_norm": 0.18401731550693512, "learning_rate": 4.020323912353129e-05, "loss": 0.0519, "step": 9420 }, { "epoch": 17.933428435568235, "grad_norm": 0.14074082672595978, "learning_rate": 4.0196887900920925e-05, "loss": 0.079, "step": 9421 }, { "epoch": 17.935330480266288, "grad_norm": 0.071122907102108, "learning_rate": 4.019053667831058e-05, "loss": 0.0601, "step": 9422 }, { "epoch": 17.937232524964337, "grad_norm": 0.20074374973773956, "learning_rate": 4.018418545570022e-05, "loss": 0.0644, "step": 9423 }, { "epoch": 17.939134569662386, "grad_norm": 0.17692893743515015, "learning_rate": 4.0177834233089874e-05, "loss": 0.0552, "step": 9424 }, { "epoch": 17.94103661436044, "grad_norm": 0.2404235452413559, "learning_rate": 4.017148301047952e-05, "loss": 0.0498, "step": 9425 }, { "epoch": 17.942938659058488, "grad_norm": 0.19352063536643982, "learning_rate": 4.0165131787869164e-05, "loss": 0.056, "step": 9426 }, { "epoch": 17.944840703756537, "grad_norm": 0.17906826734542847, "learning_rate": 4.0158780565258816e-05, "loss": 0.0584, "step": 9427 }, { "epoch": 17.94674274845459, "grad_norm": 0.06080584228038788, "learning_rate": 4.015242934264846e-05, "loss": 0.0345, "step": 9428 }, { "epoch": 17.94864479315264, "grad_norm": 0.1453639268875122, "learning_rate": 4.014607812003811e-05, "loss": 0.0718, "step": 9429 }, { "epoch": 17.95054683785069, "grad_norm": 0.10231205821037292, "learning_rate": 4.013972689742775e-05, "loss": 0.0571, "step": 9430 }, { "epoch": 17.95244888254874, "grad_norm": 0.11176486313343048, "learning_rate": 4.01333756748174e-05, "loss": 0.0518, "step": 9431 }, { "epoch": 17.95435092724679, "grad_norm": 0.2039271742105484, "learning_rate": 4.0127024452207055e-05, "loss": 0.0782, "step": 9432 }, { "epoch": 17.956252971944842, "grad_norm": 0.08733384311199188, "learning_rate": 4.01206732295967e-05, "loss": 0.0624, "step": 9433 }, { "epoch": 17.95815501664289, "grad_norm": 0.13305670022964478, "learning_rate": 4.011432200698635e-05, "loss": 0.0678, "step": 9434 }, { "epoch": 17.96005706134094, "grad_norm": 0.2694123685359955, "learning_rate": 4.010797078437599e-05, "loss": 0.0662, "step": 9435 }, { "epoch": 17.961959106038993, "grad_norm": 0.16008444130420685, "learning_rate": 4.010161956176564e-05, "loss": 0.0668, "step": 9436 }, { "epoch": 17.963861150737042, "grad_norm": 0.09083766490221024, "learning_rate": 4.009526833915529e-05, "loss": 0.0699, "step": 9437 }, { "epoch": 17.96576319543509, "grad_norm": 0.07772954553365707, "learning_rate": 4.008891711654494e-05, "loss": 0.0796, "step": 9438 }, { "epoch": 17.967665240133144, "grad_norm": 0.14468494057655334, "learning_rate": 4.0082565893934584e-05, "loss": 0.0389, "step": 9439 }, { "epoch": 17.969567284831193, "grad_norm": 0.20007829368114471, "learning_rate": 4.007621467132423e-05, "loss": 0.0711, "step": 9440 }, { "epoch": 17.971469329529246, "grad_norm": 0.14036419987678528, "learning_rate": 4.006986344871388e-05, "loss": 0.0542, "step": 9441 }, { "epoch": 17.973371374227295, "grad_norm": 0.13243912160396576, "learning_rate": 4.0063512226103526e-05, "loss": 0.0605, "step": 9442 }, { "epoch": 17.975273418925344, "grad_norm": 0.17457394301891327, "learning_rate": 4.005716100349318e-05, "loss": 0.048, "step": 9443 }, { "epoch": 17.977175463623396, "grad_norm": 0.14571994543075562, "learning_rate": 4.005080978088282e-05, "loss": 0.0397, "step": 9444 }, { "epoch": 17.979077508321446, "grad_norm": 0.1325615793466568, "learning_rate": 4.004445855827247e-05, "loss": 0.0579, "step": 9445 }, { "epoch": 17.980979553019495, "grad_norm": 0.129814013838768, "learning_rate": 4.003810733566212e-05, "loss": 0.074, "step": 9446 }, { "epoch": 17.982881597717547, "grad_norm": 0.11676670610904694, "learning_rate": 4.0031756113051764e-05, "loss": 0.0544, "step": 9447 }, { "epoch": 17.984783642415596, "grad_norm": 0.09288892149925232, "learning_rate": 4.002540489044141e-05, "loss": 0.0406, "step": 9448 }, { "epoch": 17.986685687113646, "grad_norm": 0.15818986296653748, "learning_rate": 4.001905366783106e-05, "loss": 0.0514, "step": 9449 }, { "epoch": 17.9885877318117, "grad_norm": 0.17206180095672607, "learning_rate": 4.0012702445220706e-05, "loss": 0.1, "step": 9450 }, { "epoch": 17.990489776509747, "grad_norm": 0.16787725687026978, "learning_rate": 4.000635122261036e-05, "loss": 0.0557, "step": 9451 }, { "epoch": 17.9923918212078, "grad_norm": 0.18895794451236725, "learning_rate": 4e-05, "loss": 0.0568, "step": 9452 }, { "epoch": 17.99429386590585, "grad_norm": 0.21941068768501282, "learning_rate": 3.999364877738965e-05, "loss": 0.0464, "step": 9453 }, { "epoch": 17.9961959106039, "grad_norm": 0.16816534101963043, "learning_rate": 3.9987297554779293e-05, "loss": 0.046, "step": 9454 }, { "epoch": 17.99809795530195, "grad_norm": 0.16162028908729553, "learning_rate": 3.9980946332168945e-05, "loss": 0.0777, "step": 9455 }, { "epoch": 18.0, "grad_norm": 0.18661636114120483, "learning_rate": 3.997459510955859e-05, "loss": 0.0686, "step": 9456 }, { "epoch": 18.00190204469805, "grad_norm": 0.05548053979873657, "learning_rate": 3.9968243886948235e-05, "loss": 0.0855, "step": 9457 }, { "epoch": 18.0038040893961, "grad_norm": 0.08743029087781906, "learning_rate": 3.996189266433789e-05, "loss": 0.0389, "step": 9458 }, { "epoch": 18.00570613409415, "grad_norm": 0.08235698193311691, "learning_rate": 3.995554144172753e-05, "loss": 0.074, "step": 9459 }, { "epoch": 18.0076081787922, "grad_norm": 0.07210565358400345, "learning_rate": 3.9949190219117184e-05, "loss": 0.0665, "step": 9460 }, { "epoch": 18.009510223490253, "grad_norm": 0.08912031352519989, "learning_rate": 3.994283899650683e-05, "loss": 0.0453, "step": 9461 }, { "epoch": 18.0114122681883, "grad_norm": 0.05706711858510971, "learning_rate": 3.9936487773896474e-05, "loss": 0.044, "step": 9462 }, { "epoch": 18.013314312886354, "grad_norm": 0.129007026553154, "learning_rate": 3.9930136551286126e-05, "loss": 0.052, "step": 9463 }, { "epoch": 18.015216357584404, "grad_norm": 0.043063823133707047, "learning_rate": 3.992378532867577e-05, "loss": 0.0264, "step": 9464 }, { "epoch": 18.017118402282453, "grad_norm": 0.12655483186244965, "learning_rate": 3.991743410606542e-05, "loss": 0.0494, "step": 9465 }, { "epoch": 18.019020446980505, "grad_norm": 0.0626833587884903, "learning_rate": 3.991108288345506e-05, "loss": 0.0386, "step": 9466 }, { "epoch": 18.020922491678554, "grad_norm": 0.10463612526655197, "learning_rate": 3.990473166084471e-05, "loss": 0.0736, "step": 9467 }, { "epoch": 18.022824536376604, "grad_norm": 0.1362827867269516, "learning_rate": 3.9898380438234365e-05, "loss": 0.0405, "step": 9468 }, { "epoch": 18.024726581074656, "grad_norm": 0.07904388010501862, "learning_rate": 3.989202921562401e-05, "loss": 0.0388, "step": 9469 }, { "epoch": 18.026628625772705, "grad_norm": 0.09696925431489944, "learning_rate": 3.988567799301366e-05, "loss": 0.0374, "step": 9470 }, { "epoch": 18.028530670470754, "grad_norm": 0.08310946077108383, "learning_rate": 3.98793267704033e-05, "loss": 0.0488, "step": 9471 }, { "epoch": 18.030432715168807, "grad_norm": 0.06327807903289795, "learning_rate": 3.987297554779295e-05, "loss": 0.0656, "step": 9472 }, { "epoch": 18.032334759866856, "grad_norm": 0.07315284013748169, "learning_rate": 3.98666243251826e-05, "loss": 0.0439, "step": 9473 }, { "epoch": 18.03423680456491, "grad_norm": 0.07850883901119232, "learning_rate": 3.986027310257225e-05, "loss": 0.0356, "step": 9474 }, { "epoch": 18.036138849262958, "grad_norm": 0.03636496514081955, "learning_rate": 3.9853921879961894e-05, "loss": 0.0411, "step": 9475 }, { "epoch": 18.038040893961007, "grad_norm": 0.11127316951751709, "learning_rate": 3.984757065735154e-05, "loss": 0.0594, "step": 9476 }, { "epoch": 18.03994293865906, "grad_norm": 0.15661272406578064, "learning_rate": 3.984121943474119e-05, "loss": 0.0472, "step": 9477 }, { "epoch": 18.04184498335711, "grad_norm": 0.08048074692487717, "learning_rate": 3.9834868212130836e-05, "loss": 0.0471, "step": 9478 }, { "epoch": 18.043747028055158, "grad_norm": 0.0691787526011467, "learning_rate": 3.982851698952049e-05, "loss": 0.0572, "step": 9479 }, { "epoch": 18.04564907275321, "grad_norm": 0.10766416043043137, "learning_rate": 3.982216576691013e-05, "loss": 0.0494, "step": 9480 }, { "epoch": 18.04755111745126, "grad_norm": 0.11665042489767075, "learning_rate": 3.981581454429978e-05, "loss": 0.0449, "step": 9481 }, { "epoch": 18.04945316214931, "grad_norm": 0.1952570229768753, "learning_rate": 3.980946332168943e-05, "loss": 0.0646, "step": 9482 }, { "epoch": 18.05135520684736, "grad_norm": 0.15641002357006073, "learning_rate": 3.9803112099079075e-05, "loss": 0.0513, "step": 9483 }, { "epoch": 18.05325725154541, "grad_norm": 0.08670450747013092, "learning_rate": 3.979676087646872e-05, "loss": 0.0517, "step": 9484 }, { "epoch": 18.055159296243463, "grad_norm": 0.11871020495891571, "learning_rate": 3.9790409653858365e-05, "loss": 0.052, "step": 9485 }, { "epoch": 18.057061340941512, "grad_norm": 0.05478886887431145, "learning_rate": 3.978405843124802e-05, "loss": 0.0514, "step": 9486 }, { "epoch": 18.05896338563956, "grad_norm": 0.15602456033229828, "learning_rate": 3.977770720863767e-05, "loss": 0.0646, "step": 9487 }, { "epoch": 18.060865430337614, "grad_norm": 0.20108024775981903, "learning_rate": 3.9771355986027314e-05, "loss": 0.073, "step": 9488 }, { "epoch": 18.062767475035663, "grad_norm": 0.23799696564674377, "learning_rate": 3.976500476341696e-05, "loss": 0.0526, "step": 9489 }, { "epoch": 18.064669519733712, "grad_norm": 0.11661495268344879, "learning_rate": 3.9758653540806604e-05, "loss": 0.0729, "step": 9490 }, { "epoch": 18.066571564431765, "grad_norm": 0.06901799887418747, "learning_rate": 3.9752302318196256e-05, "loss": 0.0471, "step": 9491 }, { "epoch": 18.068473609129814, "grad_norm": 0.05189414322376251, "learning_rate": 3.97459510955859e-05, "loss": 0.0488, "step": 9492 }, { "epoch": 18.070375653827863, "grad_norm": 0.06394827365875244, "learning_rate": 3.9739599872975546e-05, "loss": 0.0549, "step": 9493 }, { "epoch": 18.072277698525916, "grad_norm": 0.057227928191423416, "learning_rate": 3.97332486503652e-05, "loss": 0.0508, "step": 9494 }, { "epoch": 18.074179743223965, "grad_norm": 0.09823498874902725, "learning_rate": 3.972689742775484e-05, "loss": 0.0394, "step": 9495 }, { "epoch": 18.076081787922018, "grad_norm": 0.1826847344636917, "learning_rate": 3.9720546205144495e-05, "loss": 0.0597, "step": 9496 }, { "epoch": 18.077983832620067, "grad_norm": 0.08450349420309067, "learning_rate": 3.971419498253414e-05, "loss": 0.0665, "step": 9497 }, { "epoch": 18.079885877318116, "grad_norm": 0.05135255306959152, "learning_rate": 3.9707843759923785e-05, "loss": 0.0418, "step": 9498 }, { "epoch": 18.08178792201617, "grad_norm": 0.13653814792633057, "learning_rate": 3.9701492537313437e-05, "loss": 0.0554, "step": 9499 }, { "epoch": 18.083689966714218, "grad_norm": 0.21696190536022186, "learning_rate": 3.969514131470308e-05, "loss": 0.053, "step": 9500 }, { "epoch": 18.085592011412267, "grad_norm": 0.2852039933204651, "learning_rate": 3.9688790092092733e-05, "loss": 0.0645, "step": 9501 }, { "epoch": 18.08749405611032, "grad_norm": 0.2527969479560852, "learning_rate": 3.968243886948237e-05, "loss": 0.0782, "step": 9502 }, { "epoch": 18.08939610080837, "grad_norm": 0.10853327065706253, "learning_rate": 3.9676087646872024e-05, "loss": 0.0895, "step": 9503 }, { "epoch": 18.091298145506418, "grad_norm": 0.08898596465587616, "learning_rate": 3.966973642426167e-05, "loss": 0.0604, "step": 9504 }, { "epoch": 18.09320019020447, "grad_norm": 0.03284698724746704, "learning_rate": 3.966338520165132e-05, "loss": 0.0402, "step": 9505 }, { "epoch": 18.09510223490252, "grad_norm": 0.0882166400551796, "learning_rate": 3.965703397904097e-05, "loss": 0.0717, "step": 9506 }, { "epoch": 18.097004279600572, "grad_norm": 0.06083635240793228, "learning_rate": 3.965068275643061e-05, "loss": 0.0522, "step": 9507 }, { "epoch": 18.09890632429862, "grad_norm": 0.07042375952005386, "learning_rate": 3.964433153382026e-05, "loss": 0.0435, "step": 9508 }, { "epoch": 18.10080836899667, "grad_norm": 0.10826339572668076, "learning_rate": 3.963798031120991e-05, "loss": 0.0635, "step": 9509 }, { "epoch": 18.102710413694723, "grad_norm": 0.09245724976062775, "learning_rate": 3.963162908859956e-05, "loss": 0.0637, "step": 9510 }, { "epoch": 18.104612458392772, "grad_norm": 0.21710948646068573, "learning_rate": 3.9625277865989204e-05, "loss": 0.0586, "step": 9511 }, { "epoch": 18.10651450309082, "grad_norm": 0.10763686895370483, "learning_rate": 3.961892664337885e-05, "loss": 0.0554, "step": 9512 }, { "epoch": 18.108416547788874, "grad_norm": 0.05405883491039276, "learning_rate": 3.96125754207685e-05, "loss": 0.0436, "step": 9513 }, { "epoch": 18.110318592486923, "grad_norm": 0.07853370159864426, "learning_rate": 3.9606224198158146e-05, "loss": 0.042, "step": 9514 }, { "epoch": 18.112220637184972, "grad_norm": 0.07172422111034393, "learning_rate": 3.95998729755478e-05, "loss": 0.0596, "step": 9515 }, { "epoch": 18.114122681883025, "grad_norm": 0.11832434684038162, "learning_rate": 3.959352175293744e-05, "loss": 0.0605, "step": 9516 }, { "epoch": 18.116024726581074, "grad_norm": 0.12276314944028854, "learning_rate": 3.958717053032709e-05, "loss": 0.0341, "step": 9517 }, { "epoch": 18.117926771279127, "grad_norm": 0.057973865419626236, "learning_rate": 3.958081930771674e-05, "loss": 0.0481, "step": 9518 }, { "epoch": 18.119828815977176, "grad_norm": 0.12534184753894806, "learning_rate": 3.9574468085106385e-05, "loss": 0.0603, "step": 9519 }, { "epoch": 18.121730860675225, "grad_norm": 0.12781070172786713, "learning_rate": 3.956811686249603e-05, "loss": 0.0556, "step": 9520 }, { "epoch": 18.123632905373277, "grad_norm": 0.2016114741563797, "learning_rate": 3.9561765639885675e-05, "loss": 0.0567, "step": 9521 }, { "epoch": 18.125534950071327, "grad_norm": 0.15887132287025452, "learning_rate": 3.955541441727533e-05, "loss": 0.0557, "step": 9522 }, { "epoch": 18.127436994769376, "grad_norm": 0.12661483883857727, "learning_rate": 3.954906319466498e-05, "loss": 0.0605, "step": 9523 }, { "epoch": 18.12933903946743, "grad_norm": 0.10252156108617783, "learning_rate": 3.9542711972054624e-05, "loss": 0.0406, "step": 9524 }, { "epoch": 18.131241084165477, "grad_norm": 0.10569868236780167, "learning_rate": 3.953636074944427e-05, "loss": 0.0643, "step": 9525 }, { "epoch": 18.13314312886353, "grad_norm": 0.08536829799413681, "learning_rate": 3.9530009526833914e-05, "loss": 0.0708, "step": 9526 }, { "epoch": 18.13504517356158, "grad_norm": 0.14921991527080536, "learning_rate": 3.9523658304223566e-05, "loss": 0.0765, "step": 9527 }, { "epoch": 18.13694721825963, "grad_norm": 0.19710759818553925, "learning_rate": 3.951730708161321e-05, "loss": 0.0667, "step": 9528 }, { "epoch": 18.13884926295768, "grad_norm": 0.08138823509216309, "learning_rate": 3.9510955859002856e-05, "loss": 0.0736, "step": 9529 }, { "epoch": 18.14075130765573, "grad_norm": 0.06059684976935387, "learning_rate": 3.950460463639251e-05, "loss": 0.0562, "step": 9530 }, { "epoch": 18.14265335235378, "grad_norm": 0.13167749345302582, "learning_rate": 3.949825341378215e-05, "loss": 0.035, "step": 9531 }, { "epoch": 18.144555397051832, "grad_norm": 0.07073647528886795, "learning_rate": 3.9491902191171805e-05, "loss": 0.0455, "step": 9532 }, { "epoch": 18.14645744174988, "grad_norm": 0.10322169214487076, "learning_rate": 3.948555096856145e-05, "loss": 0.0489, "step": 9533 }, { "epoch": 18.14835948644793, "grad_norm": 0.07083230465650558, "learning_rate": 3.9479199745951095e-05, "loss": 0.0558, "step": 9534 }, { "epoch": 18.150261531145983, "grad_norm": 0.08989362418651581, "learning_rate": 3.947284852334075e-05, "loss": 0.0578, "step": 9535 }, { "epoch": 18.152163575844032, "grad_norm": 0.1856735646724701, "learning_rate": 3.946649730073039e-05, "loss": 0.0562, "step": 9536 }, { "epoch": 18.154065620542085, "grad_norm": 0.1071576476097107, "learning_rate": 3.9460146078120044e-05, "loss": 0.0415, "step": 9537 }, { "epoch": 18.155967665240134, "grad_norm": 0.18466047942638397, "learning_rate": 3.945379485550968e-05, "loss": 0.0639, "step": 9538 }, { "epoch": 18.157869709938183, "grad_norm": 0.29862919449806213, "learning_rate": 3.9447443632899334e-05, "loss": 0.0697, "step": 9539 }, { "epoch": 18.159771754636235, "grad_norm": 0.10799165070056915, "learning_rate": 3.944109241028898e-05, "loss": 0.0372, "step": 9540 }, { "epoch": 18.161673799334284, "grad_norm": 0.12296050041913986, "learning_rate": 3.943474118767863e-05, "loss": 0.0696, "step": 9541 }, { "epoch": 18.163575844032334, "grad_norm": 0.19168108701705933, "learning_rate": 3.942838996506828e-05, "loss": 0.0673, "step": 9542 }, { "epoch": 18.165477888730386, "grad_norm": 0.06364916265010834, "learning_rate": 3.942203874245792e-05, "loss": 0.0381, "step": 9543 }, { "epoch": 18.167379933428435, "grad_norm": 0.1005852073431015, "learning_rate": 3.941568751984757e-05, "loss": 0.0535, "step": 9544 }, { "epoch": 18.169281978126484, "grad_norm": 0.10540492832660675, "learning_rate": 3.940933629723722e-05, "loss": 0.0536, "step": 9545 }, { "epoch": 18.171184022824537, "grad_norm": 0.13814863562583923, "learning_rate": 3.940298507462687e-05, "loss": 0.0619, "step": 9546 }, { "epoch": 18.173086067522586, "grad_norm": 0.1335911750793457, "learning_rate": 3.9396633852016515e-05, "loss": 0.0459, "step": 9547 }, { "epoch": 18.17498811222064, "grad_norm": 0.16157962381839752, "learning_rate": 3.939028262940616e-05, "loss": 0.0548, "step": 9548 }, { "epoch": 18.176890156918688, "grad_norm": 0.05575089901685715, "learning_rate": 3.938393140679581e-05, "loss": 0.0643, "step": 9549 }, { "epoch": 18.178792201616737, "grad_norm": 0.10665811598300934, "learning_rate": 3.937758018418546e-05, "loss": 0.0458, "step": 9550 }, { "epoch": 18.18069424631479, "grad_norm": 0.12008770555257797, "learning_rate": 3.937122896157511e-05, "loss": 0.032, "step": 9551 }, { "epoch": 18.18259629101284, "grad_norm": 0.09343229979276657, "learning_rate": 3.9364877738964754e-05, "loss": 0.0488, "step": 9552 }, { "epoch": 18.184498335710888, "grad_norm": 0.08769842237234116, "learning_rate": 3.93585265163544e-05, "loss": 0.066, "step": 9553 }, { "epoch": 18.18640038040894, "grad_norm": 0.07487979531288147, "learning_rate": 3.935217529374405e-05, "loss": 0.0614, "step": 9554 }, { "epoch": 18.18830242510699, "grad_norm": 0.15553294122219086, "learning_rate": 3.9345824071133696e-05, "loss": 0.0797, "step": 9555 }, { "epoch": 18.19020446980504, "grad_norm": 0.19011497497558594, "learning_rate": 3.933947284852334e-05, "loss": 0.0698, "step": 9556 }, { "epoch": 18.19210651450309, "grad_norm": 0.1535508632659912, "learning_rate": 3.9333121625912986e-05, "loss": 0.0686, "step": 9557 }, { "epoch": 18.19400855920114, "grad_norm": 0.18773606419563293, "learning_rate": 3.932677040330264e-05, "loss": 0.0541, "step": 9558 }, { "epoch": 18.195910603899193, "grad_norm": 0.06210647150874138, "learning_rate": 3.932041918069228e-05, "loss": 0.0367, "step": 9559 }, { "epoch": 18.197812648597242, "grad_norm": 0.09794329106807709, "learning_rate": 3.9314067958081935e-05, "loss": 0.06, "step": 9560 }, { "epoch": 18.19971469329529, "grad_norm": 0.0627339705824852, "learning_rate": 3.930771673547158e-05, "loss": 0.0619, "step": 9561 }, { "epoch": 18.201616737993344, "grad_norm": 0.1042981967329979, "learning_rate": 3.9301365512861225e-05, "loss": 0.0616, "step": 9562 }, { "epoch": 18.203518782691393, "grad_norm": 0.05875473469495773, "learning_rate": 3.9295014290250877e-05, "loss": 0.0597, "step": 9563 }, { "epoch": 18.205420827389442, "grad_norm": 0.08454211801290512, "learning_rate": 3.928866306764052e-05, "loss": 0.0553, "step": 9564 }, { "epoch": 18.207322872087495, "grad_norm": 0.052836548537015915, "learning_rate": 3.928231184503017e-05, "loss": 0.0726, "step": 9565 }, { "epoch": 18.209224916785544, "grad_norm": 0.15156498551368713, "learning_rate": 3.927596062241982e-05, "loss": 0.0887, "step": 9566 }, { "epoch": 18.211126961483593, "grad_norm": 0.11899285763502121, "learning_rate": 3.9269609399809464e-05, "loss": 0.0536, "step": 9567 }, { "epoch": 18.213029006181646, "grad_norm": 0.048953719437122345, "learning_rate": 3.9263258177199115e-05, "loss": 0.0678, "step": 9568 }, { "epoch": 18.214931050879695, "grad_norm": 0.1252700537443161, "learning_rate": 3.925690695458876e-05, "loss": 0.0574, "step": 9569 }, { "epoch": 18.216833095577748, "grad_norm": 0.06751298904418945, "learning_rate": 3.9250555731978406e-05, "loss": 0.0457, "step": 9570 }, { "epoch": 18.218735140275797, "grad_norm": 0.0774497240781784, "learning_rate": 3.924420450936806e-05, "loss": 0.0438, "step": 9571 }, { "epoch": 18.220637184973846, "grad_norm": 0.1059066578745842, "learning_rate": 3.92378532867577e-05, "loss": 0.0509, "step": 9572 }, { "epoch": 18.2225392296719, "grad_norm": 0.04546501487493515, "learning_rate": 3.9231502064147354e-05, "loss": 0.0409, "step": 9573 }, { "epoch": 18.224441274369948, "grad_norm": 0.07507932186126709, "learning_rate": 3.922515084153699e-05, "loss": 0.0461, "step": 9574 }, { "epoch": 18.226343319067997, "grad_norm": 0.06796334683895111, "learning_rate": 3.9218799618926644e-05, "loss": 0.0583, "step": 9575 }, { "epoch": 18.22824536376605, "grad_norm": 0.09650825709104538, "learning_rate": 3.921244839631629e-05, "loss": 0.0627, "step": 9576 }, { "epoch": 18.2301474084641, "grad_norm": 0.04558471590280533, "learning_rate": 3.920609717370594e-05, "loss": 0.047, "step": 9577 }, { "epoch": 18.232049453162148, "grad_norm": 0.09572546929121017, "learning_rate": 3.919974595109559e-05, "loss": 0.0425, "step": 9578 }, { "epoch": 18.2339514978602, "grad_norm": 0.14911305904388428, "learning_rate": 3.919339472848523e-05, "loss": 0.0578, "step": 9579 }, { "epoch": 18.23585354255825, "grad_norm": 0.08339757472276688, "learning_rate": 3.918704350587488e-05, "loss": 0.101, "step": 9580 }, { "epoch": 18.237755587256302, "grad_norm": 0.0699375569820404, "learning_rate": 3.918069228326453e-05, "loss": 0.0612, "step": 9581 }, { "epoch": 18.23965763195435, "grad_norm": 0.1783619374036789, "learning_rate": 3.917434106065418e-05, "loss": 0.0563, "step": 9582 }, { "epoch": 18.2415596766524, "grad_norm": 0.06447594612836838, "learning_rate": 3.9167989838043825e-05, "loss": 0.0614, "step": 9583 }, { "epoch": 18.243461721350453, "grad_norm": 0.06985105574131012, "learning_rate": 3.916163861543347e-05, "loss": 0.0364, "step": 9584 }, { "epoch": 18.245363766048502, "grad_norm": 0.1365368813276291, "learning_rate": 3.915528739282312e-05, "loss": 0.0699, "step": 9585 }, { "epoch": 18.24726581074655, "grad_norm": 0.07527942955493927, "learning_rate": 3.914893617021277e-05, "loss": 0.0385, "step": 9586 }, { "epoch": 18.249167855444604, "grad_norm": 0.06135424226522446, "learning_rate": 3.914258494760242e-05, "loss": 0.0781, "step": 9587 }, { "epoch": 18.251069900142653, "grad_norm": 0.04152238368988037, "learning_rate": 3.913623372499206e-05, "loss": 0.066, "step": 9588 }, { "epoch": 18.252971944840702, "grad_norm": 0.12861324846744537, "learning_rate": 3.912988250238171e-05, "loss": 0.0587, "step": 9589 }, { "epoch": 18.254873989538755, "grad_norm": 0.17604462802410126, "learning_rate": 3.912353127977136e-05, "loss": 0.0682, "step": 9590 }, { "epoch": 18.256776034236804, "grad_norm": 0.04712732136249542, "learning_rate": 3.9117180057161006e-05, "loss": 0.0481, "step": 9591 }, { "epoch": 18.258678078934857, "grad_norm": 0.12864311039447784, "learning_rate": 3.911082883455065e-05, "loss": 0.0647, "step": 9592 }, { "epoch": 18.260580123632906, "grad_norm": 0.08881017565727234, "learning_rate": 3.9104477611940296e-05, "loss": 0.0716, "step": 9593 }, { "epoch": 18.262482168330955, "grad_norm": 0.21135424077510834, "learning_rate": 3.909812638932995e-05, "loss": 0.0576, "step": 9594 }, { "epoch": 18.264384213029007, "grad_norm": 0.05159885808825493, "learning_rate": 3.909177516671959e-05, "loss": 0.0697, "step": 9595 }, { "epoch": 18.266286257727057, "grad_norm": 0.05154569074511528, "learning_rate": 3.9085423944109245e-05, "loss": 0.0645, "step": 9596 }, { "epoch": 18.268188302425106, "grad_norm": 0.1738889068365097, "learning_rate": 3.907907272149889e-05, "loss": 0.0484, "step": 9597 }, { "epoch": 18.27009034712316, "grad_norm": 0.11774470657110214, "learning_rate": 3.9072721498888535e-05, "loss": 0.0434, "step": 9598 }, { "epoch": 18.271992391821207, "grad_norm": 0.06924553215503693, "learning_rate": 3.906637027627819e-05, "loss": 0.0341, "step": 9599 }, { "epoch": 18.273894436519257, "grad_norm": 0.15278260409832, "learning_rate": 3.906001905366783e-05, "loss": 0.0477, "step": 9600 }, { "epoch": 18.27579648121731, "grad_norm": 0.036438386887311935, "learning_rate": 3.905366783105748e-05, "loss": 0.0452, "step": 9601 }, { "epoch": 18.27769852591536, "grad_norm": 0.04843940585851669, "learning_rate": 3.904731660844713e-05, "loss": 0.0548, "step": 9602 }, { "epoch": 18.27960057061341, "grad_norm": 0.0953247919678688, "learning_rate": 3.9040965385836774e-05, "loss": 0.0593, "step": 9603 }, { "epoch": 18.28150261531146, "grad_norm": 0.04957016557455063, "learning_rate": 3.9034614163226426e-05, "loss": 0.0571, "step": 9604 }, { "epoch": 18.28340466000951, "grad_norm": 0.07393553107976913, "learning_rate": 3.902826294061607e-05, "loss": 0.0408, "step": 9605 }, { "epoch": 18.285306704707562, "grad_norm": 0.12568874657154083, "learning_rate": 3.9021911718005716e-05, "loss": 0.0572, "step": 9606 }, { "epoch": 18.28720874940561, "grad_norm": 0.19227972626686096, "learning_rate": 3.901556049539537e-05, "loss": 0.0561, "step": 9607 }, { "epoch": 18.28911079410366, "grad_norm": 0.1546197235584259, "learning_rate": 3.900920927278501e-05, "loss": 0.0544, "step": 9608 }, { "epoch": 18.291012838801713, "grad_norm": 0.0979616641998291, "learning_rate": 3.9002858050174665e-05, "loss": 0.0687, "step": 9609 }, { "epoch": 18.292914883499762, "grad_norm": 0.1661892682313919, "learning_rate": 3.89965068275643e-05, "loss": 0.0571, "step": 9610 }, { "epoch": 18.29481692819781, "grad_norm": 0.051106564700603485, "learning_rate": 3.8990155604953955e-05, "loss": 0.0521, "step": 9611 }, { "epoch": 18.296718972895864, "grad_norm": 0.0543074905872345, "learning_rate": 3.89838043823436e-05, "loss": 0.0588, "step": 9612 }, { "epoch": 18.298621017593913, "grad_norm": 0.09150418639183044, "learning_rate": 3.897745315973325e-05, "loss": 0.0831, "step": 9613 }, { "epoch": 18.300523062291965, "grad_norm": 0.08435828238725662, "learning_rate": 3.89711019371229e-05, "loss": 0.06, "step": 9614 }, { "epoch": 18.302425106990015, "grad_norm": 0.07140857726335526, "learning_rate": 3.896475071451254e-05, "loss": 0.0492, "step": 9615 }, { "epoch": 18.304327151688064, "grad_norm": 0.1313878744840622, "learning_rate": 3.8958399491902194e-05, "loss": 0.0446, "step": 9616 }, { "epoch": 18.306229196386116, "grad_norm": 0.16966655850410461, "learning_rate": 3.895204826929184e-05, "loss": 0.0497, "step": 9617 }, { "epoch": 18.308131241084165, "grad_norm": 0.05971246212720871, "learning_rate": 3.894569704668149e-05, "loss": 0.0466, "step": 9618 }, { "epoch": 18.310033285782215, "grad_norm": 0.09588700532913208, "learning_rate": 3.8939345824071136e-05, "loss": 0.0595, "step": 9619 }, { "epoch": 18.311935330480267, "grad_norm": 0.15829181671142578, "learning_rate": 3.893299460146078e-05, "loss": 0.0684, "step": 9620 }, { "epoch": 18.313837375178316, "grad_norm": 0.061553433537483215, "learning_rate": 3.892664337885043e-05, "loss": 0.0504, "step": 9621 }, { "epoch": 18.315739419876365, "grad_norm": 0.1587333083152771, "learning_rate": 3.892029215624008e-05, "loss": 0.0598, "step": 9622 }, { "epoch": 18.317641464574418, "grad_norm": 0.11620922386646271, "learning_rate": 3.891394093362973e-05, "loss": 0.0547, "step": 9623 }, { "epoch": 18.319543509272467, "grad_norm": 0.0708557590842247, "learning_rate": 3.890758971101937e-05, "loss": 0.0519, "step": 9624 }, { "epoch": 18.32144555397052, "grad_norm": 0.10213367640972137, "learning_rate": 3.890123848840902e-05, "loss": 0.0543, "step": 9625 }, { "epoch": 18.32334759866857, "grad_norm": 0.06195381283760071, "learning_rate": 3.889488726579867e-05, "loss": 0.055, "step": 9626 }, { "epoch": 18.325249643366618, "grad_norm": 0.06340436637401581, "learning_rate": 3.8888536043188317e-05, "loss": 0.0644, "step": 9627 }, { "epoch": 18.32715168806467, "grad_norm": 0.5210753679275513, "learning_rate": 3.888218482057796e-05, "loss": 0.0675, "step": 9628 }, { "epoch": 18.32905373276272, "grad_norm": 0.12499348074197769, "learning_rate": 3.887583359796761e-05, "loss": 0.0557, "step": 9629 }, { "epoch": 18.33095577746077, "grad_norm": 0.05937238037586212, "learning_rate": 3.886948237535726e-05, "loss": 0.06, "step": 9630 }, { "epoch": 18.33285782215882, "grad_norm": 0.07244454324245453, "learning_rate": 3.8863131152746904e-05, "loss": 0.0454, "step": 9631 }, { "epoch": 18.33475986685687, "grad_norm": 0.16597986221313477, "learning_rate": 3.8856779930136555e-05, "loss": 0.0826, "step": 9632 }, { "epoch": 18.33666191155492, "grad_norm": 0.09148790687322617, "learning_rate": 3.88504287075262e-05, "loss": 0.0522, "step": 9633 }, { "epoch": 18.338563956252973, "grad_norm": 0.05849875882267952, "learning_rate": 3.8844077484915846e-05, "loss": 0.051, "step": 9634 }, { "epoch": 18.34046600095102, "grad_norm": 0.044688720256090164, "learning_rate": 3.88377262623055e-05, "loss": 0.0503, "step": 9635 }, { "epoch": 18.342368045649074, "grad_norm": 0.06226931884884834, "learning_rate": 3.883137503969514e-05, "loss": 0.0483, "step": 9636 }, { "epoch": 18.344270090347123, "grad_norm": 0.0717255249619484, "learning_rate": 3.882502381708479e-05, "loss": 0.0649, "step": 9637 }, { "epoch": 18.346172135045173, "grad_norm": 0.07575838267803192, "learning_rate": 3.881867259447444e-05, "loss": 0.0658, "step": 9638 }, { "epoch": 18.348074179743225, "grad_norm": 0.1126398965716362, "learning_rate": 3.8812321371864084e-05, "loss": 0.0519, "step": 9639 }, { "epoch": 18.349976224441274, "grad_norm": 0.11585336923599243, "learning_rate": 3.8805970149253736e-05, "loss": 0.0546, "step": 9640 }, { "epoch": 18.351878269139323, "grad_norm": 0.2785167396068573, "learning_rate": 3.879961892664338e-05, "loss": 0.0624, "step": 9641 }, { "epoch": 18.353780313837376, "grad_norm": 0.21943818032741547, "learning_rate": 3.8793267704033026e-05, "loss": 0.0512, "step": 9642 }, { "epoch": 18.355682358535425, "grad_norm": 0.09890948235988617, "learning_rate": 3.878691648142267e-05, "loss": 0.0527, "step": 9643 }, { "epoch": 18.357584403233474, "grad_norm": 0.08608918637037277, "learning_rate": 3.878056525881232e-05, "loss": 0.0714, "step": 9644 }, { "epoch": 18.359486447931527, "grad_norm": 0.21114416420459747, "learning_rate": 3.8774214036201975e-05, "loss": 0.0473, "step": 9645 }, { "epoch": 18.361388492629576, "grad_norm": 0.12862014770507812, "learning_rate": 3.8767862813591614e-05, "loss": 0.0524, "step": 9646 }, { "epoch": 18.36329053732763, "grad_norm": 0.1056724265217781, "learning_rate": 3.8761511590981265e-05, "loss": 0.0483, "step": 9647 }, { "epoch": 18.365192582025678, "grad_norm": 0.05439826101064682, "learning_rate": 3.875516036837091e-05, "loss": 0.0332, "step": 9648 }, { "epoch": 18.367094626723727, "grad_norm": 0.12853200733661652, "learning_rate": 3.874880914576056e-05, "loss": 0.0461, "step": 9649 }, { "epoch": 18.36899667142178, "grad_norm": 0.10463688522577286, "learning_rate": 3.874245792315021e-05, "loss": 0.0686, "step": 9650 }, { "epoch": 18.37089871611983, "grad_norm": 0.10133697092533112, "learning_rate": 3.873610670053985e-05, "loss": 0.0452, "step": 9651 }, { "epoch": 18.372800760817878, "grad_norm": 0.13786332309246063, "learning_rate": 3.8729755477929504e-05, "loss": 0.0674, "step": 9652 }, { "epoch": 18.37470280551593, "grad_norm": 0.1269512176513672, "learning_rate": 3.872340425531915e-05, "loss": 0.0563, "step": 9653 }, { "epoch": 18.37660485021398, "grad_norm": 0.08988190442323685, "learning_rate": 3.87170530327088e-05, "loss": 0.0461, "step": 9654 }, { "epoch": 18.37850689491203, "grad_norm": 0.29547205567359924, "learning_rate": 3.8710701810098446e-05, "loss": 0.0721, "step": 9655 }, { "epoch": 18.38040893961008, "grad_norm": 0.07171815633773804, "learning_rate": 3.870435058748809e-05, "loss": 0.0729, "step": 9656 }, { "epoch": 18.38231098430813, "grad_norm": 0.10650094598531723, "learning_rate": 3.869799936487774e-05, "loss": 0.0643, "step": 9657 }, { "epoch": 18.384213029006183, "grad_norm": 0.0583612322807312, "learning_rate": 3.869164814226739e-05, "loss": 0.0441, "step": 9658 }, { "epoch": 18.386115073704232, "grad_norm": 0.14970900118350983, "learning_rate": 3.868529691965704e-05, "loss": 0.0553, "step": 9659 }, { "epoch": 18.38801711840228, "grad_norm": 0.07470922917127609, "learning_rate": 3.867894569704668e-05, "loss": 0.055, "step": 9660 }, { "epoch": 18.389919163100334, "grad_norm": 0.06020907685160637, "learning_rate": 3.867259447443633e-05, "loss": 0.0705, "step": 9661 }, { "epoch": 18.391821207798383, "grad_norm": 0.12234506756067276, "learning_rate": 3.866624325182598e-05, "loss": 0.0523, "step": 9662 }, { "epoch": 18.393723252496432, "grad_norm": 0.1236337348818779, "learning_rate": 3.865989202921563e-05, "loss": 0.0653, "step": 9663 }, { "epoch": 18.395625297194485, "grad_norm": 0.12261811643838882, "learning_rate": 3.865354080660527e-05, "loss": 0.0414, "step": 9664 }, { "epoch": 18.397527341892534, "grad_norm": 0.07360794395208359, "learning_rate": 3.864718958399492e-05, "loss": 0.059, "step": 9665 }, { "epoch": 18.399429386590583, "grad_norm": 0.07546956092119217, "learning_rate": 3.864083836138457e-05, "loss": 0.0534, "step": 9666 }, { "epoch": 18.401331431288636, "grad_norm": 0.058266524225473404, "learning_rate": 3.8634487138774214e-05, "loss": 0.0653, "step": 9667 }, { "epoch": 18.403233475986685, "grad_norm": 0.21543626487255096, "learning_rate": 3.8628135916163866e-05, "loss": 0.0642, "step": 9668 }, { "epoch": 18.405135520684738, "grad_norm": 0.04944286495447159, "learning_rate": 3.862178469355351e-05, "loss": 0.0543, "step": 9669 }, { "epoch": 18.407037565382787, "grad_norm": 0.0344243198633194, "learning_rate": 3.8615433470943156e-05, "loss": 0.0378, "step": 9670 }, { "epoch": 18.408939610080836, "grad_norm": 0.04360641539096832, "learning_rate": 3.860908224833281e-05, "loss": 0.0411, "step": 9671 }, { "epoch": 18.41084165477889, "grad_norm": 0.0884445533156395, "learning_rate": 3.860273102572245e-05, "loss": 0.0583, "step": 9672 }, { "epoch": 18.412743699476938, "grad_norm": 0.42718735337257385, "learning_rate": 3.85963798031121e-05, "loss": 0.0634, "step": 9673 }, { "epoch": 18.414645744174987, "grad_norm": 0.18887248635292053, "learning_rate": 3.859002858050175e-05, "loss": 0.0466, "step": 9674 }, { "epoch": 18.41654778887304, "grad_norm": 0.11296916007995605, "learning_rate": 3.8583677357891395e-05, "loss": 0.0456, "step": 9675 }, { "epoch": 18.41844983357109, "grad_norm": 0.07237987220287323, "learning_rate": 3.857732613528105e-05, "loss": 0.0856, "step": 9676 }, { "epoch": 18.420351878269138, "grad_norm": 0.08038794994354248, "learning_rate": 3.857097491267069e-05, "loss": 0.0928, "step": 9677 }, { "epoch": 18.42225392296719, "grad_norm": 0.07001104205846786, "learning_rate": 3.856462369006034e-05, "loss": 0.0416, "step": 9678 }, { "epoch": 18.42415596766524, "grad_norm": 0.09842484444379807, "learning_rate": 3.855827246744998e-05, "loss": 0.056, "step": 9679 }, { "epoch": 18.426058012363292, "grad_norm": 0.11220046132802963, "learning_rate": 3.8551921244839634e-05, "loss": 0.0422, "step": 9680 }, { "epoch": 18.42796005706134, "grad_norm": 0.053936492651700974, "learning_rate": 3.8545570022229286e-05, "loss": 0.044, "step": 9681 }, { "epoch": 18.42986210175939, "grad_norm": 0.061404965817928314, "learning_rate": 3.8539218799618924e-05, "loss": 0.0497, "step": 9682 }, { "epoch": 18.431764146457443, "grad_norm": 0.15061938762664795, "learning_rate": 3.8532867577008576e-05, "loss": 0.0508, "step": 9683 }, { "epoch": 18.433666191155492, "grad_norm": 0.13211102783679962, "learning_rate": 3.852651635439822e-05, "loss": 0.057, "step": 9684 }, { "epoch": 18.43556823585354, "grad_norm": 0.0436306893825531, "learning_rate": 3.852016513178787e-05, "loss": 0.0682, "step": 9685 }, { "epoch": 18.437470280551594, "grad_norm": 0.19738776981830597, "learning_rate": 3.851381390917752e-05, "loss": 0.059, "step": 9686 }, { "epoch": 18.439372325249643, "grad_norm": 0.27122023701667786, "learning_rate": 3.850746268656716e-05, "loss": 0.055, "step": 9687 }, { "epoch": 18.441274369947692, "grad_norm": 0.11561478674411774, "learning_rate": 3.8501111463956815e-05, "loss": 0.0462, "step": 9688 }, { "epoch": 18.443176414645745, "grad_norm": 0.09915643185377121, "learning_rate": 3.849476024134646e-05, "loss": 0.0674, "step": 9689 }, { "epoch": 18.445078459343794, "grad_norm": 0.049296531826257706, "learning_rate": 3.848840901873611e-05, "loss": 0.0515, "step": 9690 }, { "epoch": 18.446980504041846, "grad_norm": 0.04804540425539017, "learning_rate": 3.848205779612576e-05, "loss": 0.0466, "step": 9691 }, { "epoch": 18.448882548739896, "grad_norm": 0.17450645565986633, "learning_rate": 3.84757065735154e-05, "loss": 0.0555, "step": 9692 }, { "epoch": 18.450784593437945, "grad_norm": 0.1308811604976654, "learning_rate": 3.8469355350905054e-05, "loss": 0.0601, "step": 9693 }, { "epoch": 18.452686638135997, "grad_norm": 0.09358209371566772, "learning_rate": 3.84630041282947e-05, "loss": 0.0626, "step": 9694 }, { "epoch": 18.454588682834046, "grad_norm": 0.13447386026382446, "learning_rate": 3.845665290568435e-05, "loss": 0.0655, "step": 9695 }, { "epoch": 18.456490727532096, "grad_norm": 0.08530683815479279, "learning_rate": 3.845030168307399e-05, "loss": 0.0517, "step": 9696 }, { "epoch": 18.458392772230148, "grad_norm": 0.038144953548908234, "learning_rate": 3.844395046046364e-05, "loss": 0.0435, "step": 9697 }, { "epoch": 18.460294816928197, "grad_norm": 0.11692409962415695, "learning_rate": 3.8437599237853286e-05, "loss": 0.0495, "step": 9698 }, { "epoch": 18.46219686162625, "grad_norm": 0.07053724676370621, "learning_rate": 3.843124801524294e-05, "loss": 0.0512, "step": 9699 }, { "epoch": 18.4640989063243, "grad_norm": 0.107081338763237, "learning_rate": 3.842489679263258e-05, "loss": 0.061, "step": 9700 }, { "epoch": 18.466000951022348, "grad_norm": 0.07253830879926682, "learning_rate": 3.841854557002223e-05, "loss": 0.0708, "step": 9701 }, { "epoch": 18.4679029957204, "grad_norm": 0.1957426518201828, "learning_rate": 3.841219434741188e-05, "loss": 0.0759, "step": 9702 }, { "epoch": 18.46980504041845, "grad_norm": 0.14733651280403137, "learning_rate": 3.8405843124801525e-05, "loss": 0.037, "step": 9703 }, { "epoch": 18.4717070851165, "grad_norm": 0.1280432641506195, "learning_rate": 3.8399491902191176e-05, "loss": 0.053, "step": 9704 }, { "epoch": 18.47360912981455, "grad_norm": 0.0851169005036354, "learning_rate": 3.839314067958082e-05, "loss": 0.073, "step": 9705 }, { "epoch": 18.4755111745126, "grad_norm": 0.15699921548366547, "learning_rate": 3.8386789456970467e-05, "loss": 0.0489, "step": 9706 }, { "epoch": 18.47741321921065, "grad_norm": 0.24640056490898132, "learning_rate": 3.838043823436012e-05, "loss": 0.067, "step": 9707 }, { "epoch": 18.479315263908703, "grad_norm": 0.0925830528140068, "learning_rate": 3.8374087011749763e-05, "loss": 0.0458, "step": 9708 }, { "epoch": 18.48121730860675, "grad_norm": 0.17469635605812073, "learning_rate": 3.836773578913941e-05, "loss": 0.0629, "step": 9709 }, { "epoch": 18.483119353304804, "grad_norm": 0.10162944346666336, "learning_rate": 3.836138456652906e-05, "loss": 0.054, "step": 9710 }, { "epoch": 18.485021398002853, "grad_norm": 0.1978461891412735, "learning_rate": 3.8355033343918705e-05, "loss": 0.047, "step": 9711 }, { "epoch": 18.486923442700903, "grad_norm": 0.06301341205835342, "learning_rate": 3.834868212130836e-05, "loss": 0.0588, "step": 9712 }, { "epoch": 18.488825487398955, "grad_norm": 0.08585040271282196, "learning_rate": 3.8342330898698e-05, "loss": 0.0502, "step": 9713 }, { "epoch": 18.490727532097004, "grad_norm": 0.05659103021025658, "learning_rate": 3.833597967608765e-05, "loss": 0.0508, "step": 9714 }, { "epoch": 18.492629576795053, "grad_norm": 0.07119178771972656, "learning_rate": 3.832962845347729e-05, "loss": 0.0379, "step": 9715 }, { "epoch": 18.494531621493106, "grad_norm": 0.12380468100309372, "learning_rate": 3.8323277230866944e-05, "loss": 0.0693, "step": 9716 }, { "epoch": 18.496433666191155, "grad_norm": 0.09325138479471207, "learning_rate": 3.8316926008256596e-05, "loss": 0.0478, "step": 9717 }, { "epoch": 18.498335710889204, "grad_norm": 0.04756435379385948, "learning_rate": 3.8310574785646234e-05, "loss": 0.0549, "step": 9718 }, { "epoch": 18.500237755587257, "grad_norm": 0.06491974741220474, "learning_rate": 3.8304223563035886e-05, "loss": 0.0498, "step": 9719 }, { "epoch": 18.502139800285306, "grad_norm": 0.07270771265029907, "learning_rate": 3.829787234042553e-05, "loss": 0.0391, "step": 9720 }, { "epoch": 18.50404184498336, "grad_norm": 0.07048745453357697, "learning_rate": 3.829152111781518e-05, "loss": 0.0373, "step": 9721 }, { "epoch": 18.505943889681408, "grad_norm": 0.1918988972902298, "learning_rate": 3.828516989520483e-05, "loss": 0.0563, "step": 9722 }, { "epoch": 18.507845934379457, "grad_norm": 0.18292328715324402, "learning_rate": 3.827881867259447e-05, "loss": 0.0504, "step": 9723 }, { "epoch": 18.50974797907751, "grad_norm": 0.06293179094791412, "learning_rate": 3.8272467449984125e-05, "loss": 0.0593, "step": 9724 }, { "epoch": 18.51165002377556, "grad_norm": 0.0628834217786789, "learning_rate": 3.826611622737377e-05, "loss": 0.0526, "step": 9725 }, { "epoch": 18.513552068473608, "grad_norm": 0.15109018981456757, "learning_rate": 3.825976500476342e-05, "loss": 0.0599, "step": 9726 }, { "epoch": 18.51545411317166, "grad_norm": 0.0666005089879036, "learning_rate": 3.825341378215306e-05, "loss": 0.069, "step": 9727 }, { "epoch": 18.51735615786971, "grad_norm": 0.1296105533838272, "learning_rate": 3.824706255954271e-05, "loss": 0.0578, "step": 9728 }, { "epoch": 18.51925820256776, "grad_norm": 0.07938151061534882, "learning_rate": 3.8240711336932364e-05, "loss": 0.0481, "step": 9729 }, { "epoch": 18.52116024726581, "grad_norm": 0.20475080609321594, "learning_rate": 3.823436011432201e-05, "loss": 0.0718, "step": 9730 }, { "epoch": 18.52306229196386, "grad_norm": 0.21218127012252808, "learning_rate": 3.822800889171166e-05, "loss": 0.0447, "step": 9731 }, { "epoch": 18.524964336661913, "grad_norm": 0.11543460190296173, "learning_rate": 3.82216576691013e-05, "loss": 0.0589, "step": 9732 }, { "epoch": 18.526866381359962, "grad_norm": 0.10953544080257416, "learning_rate": 3.821530644649095e-05, "loss": 0.0467, "step": 9733 }, { "epoch": 18.52876842605801, "grad_norm": 0.13765722513198853, "learning_rate": 3.8208955223880596e-05, "loss": 0.0424, "step": 9734 }, { "epoch": 18.530670470756064, "grad_norm": 0.0649055689573288, "learning_rate": 3.820260400127025e-05, "loss": 0.0432, "step": 9735 }, { "epoch": 18.532572515454113, "grad_norm": 0.041515689343214035, "learning_rate": 3.819625277865989e-05, "loss": 0.0477, "step": 9736 }, { "epoch": 18.534474560152162, "grad_norm": 0.046874310821294785, "learning_rate": 3.818990155604954e-05, "loss": 0.051, "step": 9737 }, { "epoch": 18.536376604850215, "grad_norm": 0.12375843524932861, "learning_rate": 3.818355033343919e-05, "loss": 0.0588, "step": 9738 }, { "epoch": 18.538278649548264, "grad_norm": 0.06181807816028595, "learning_rate": 3.8177199110828835e-05, "loss": 0.044, "step": 9739 }, { "epoch": 18.540180694246313, "grad_norm": 0.12714730203151703, "learning_rate": 3.817084788821849e-05, "loss": 0.0484, "step": 9740 }, { "epoch": 18.542082738944366, "grad_norm": 0.05887744575738907, "learning_rate": 3.816449666560813e-05, "loss": 0.0635, "step": 9741 }, { "epoch": 18.543984783642415, "grad_norm": 0.06551394611597061, "learning_rate": 3.815814544299778e-05, "loss": 0.0699, "step": 9742 }, { "epoch": 18.545886828340468, "grad_norm": 0.18102426826953888, "learning_rate": 3.815179422038743e-05, "loss": 0.0656, "step": 9743 }, { "epoch": 18.547788873038517, "grad_norm": 0.15179798007011414, "learning_rate": 3.8145442997777074e-05, "loss": 0.0622, "step": 9744 }, { "epoch": 18.549690917736566, "grad_norm": 0.17021843791007996, "learning_rate": 3.813909177516672e-05, "loss": 0.0572, "step": 9745 }, { "epoch": 18.55159296243462, "grad_norm": 0.05107571929693222, "learning_rate": 3.813274055255637e-05, "loss": 0.0587, "step": 9746 }, { "epoch": 18.553495007132668, "grad_norm": 0.09118980914354324, "learning_rate": 3.8126389329946016e-05, "loss": 0.0641, "step": 9747 }, { "epoch": 18.555397051830717, "grad_norm": 0.12549744546413422, "learning_rate": 3.812003810733567e-05, "loss": 0.0464, "step": 9748 }, { "epoch": 18.55729909652877, "grad_norm": 0.05056701973080635, "learning_rate": 3.811368688472531e-05, "loss": 0.0522, "step": 9749 }, { "epoch": 18.55920114122682, "grad_norm": 0.04617400839924812, "learning_rate": 3.810733566211496e-05, "loss": 0.0492, "step": 9750 }, { "epoch": 18.561103185924868, "grad_norm": 0.16483484208583832, "learning_rate": 3.81009844395046e-05, "loss": 0.0613, "step": 9751 }, { "epoch": 18.56300523062292, "grad_norm": 0.07073646783828735, "learning_rate": 3.8094633216894255e-05, "loss": 0.0484, "step": 9752 }, { "epoch": 18.56490727532097, "grad_norm": 0.07303962856531143, "learning_rate": 3.80882819942839e-05, "loss": 0.0639, "step": 9753 }, { "epoch": 18.566809320019022, "grad_norm": 0.07416113466024399, "learning_rate": 3.8081930771673545e-05, "loss": 0.0531, "step": 9754 }, { "epoch": 18.56871136471707, "grad_norm": 0.03861843794584274, "learning_rate": 3.80755795490632e-05, "loss": 0.0566, "step": 9755 }, { "epoch": 18.57061340941512, "grad_norm": 0.15391989052295685, "learning_rate": 3.806922832645284e-05, "loss": 0.0696, "step": 9756 }, { "epoch": 18.572515454113173, "grad_norm": 0.10909277945756912, "learning_rate": 3.8062877103842494e-05, "loss": 0.0531, "step": 9757 }, { "epoch": 18.574417498811222, "grad_norm": 0.24341370165348053, "learning_rate": 3.805652588123214e-05, "loss": 0.0571, "step": 9758 }, { "epoch": 18.57631954350927, "grad_norm": 0.0979224294424057, "learning_rate": 3.8050174658621784e-05, "loss": 0.0577, "step": 9759 }, { "epoch": 18.578221588207324, "grad_norm": 0.13480636477470398, "learning_rate": 3.8043823436011436e-05, "loss": 0.0605, "step": 9760 }, { "epoch": 18.580123632905373, "grad_norm": 0.18941956758499146, "learning_rate": 3.803747221340108e-05, "loss": 0.0463, "step": 9761 }, { "epoch": 18.582025677603422, "grad_norm": 0.049710191786289215, "learning_rate": 3.803112099079073e-05, "loss": 0.0525, "step": 9762 }, { "epoch": 18.583927722301475, "grad_norm": 0.17660582065582275, "learning_rate": 3.802476976818037e-05, "loss": 0.0711, "step": 9763 }, { "epoch": 18.585829766999524, "grad_norm": 0.22386837005615234, "learning_rate": 3.801841854557002e-05, "loss": 0.072, "step": 9764 }, { "epoch": 18.587731811697576, "grad_norm": 0.11074668169021606, "learning_rate": 3.8012067322959674e-05, "loss": 0.0549, "step": 9765 }, { "epoch": 18.589633856395626, "grad_norm": 0.055570632219314575, "learning_rate": 3.800571610034932e-05, "loss": 0.0565, "step": 9766 }, { "epoch": 18.591535901093675, "grad_norm": 0.14890772104263306, "learning_rate": 3.799936487773897e-05, "loss": 0.0642, "step": 9767 }, { "epoch": 18.593437945791727, "grad_norm": 0.15929993987083435, "learning_rate": 3.799301365512861e-05, "loss": 0.0684, "step": 9768 }, { "epoch": 18.595339990489776, "grad_norm": 0.12377023696899414, "learning_rate": 3.798666243251826e-05, "loss": 0.0611, "step": 9769 }, { "epoch": 18.597242035187826, "grad_norm": 0.15041153132915497, "learning_rate": 3.7980311209907907e-05, "loss": 0.0554, "step": 9770 }, { "epoch": 18.59914407988588, "grad_norm": 0.12970572710037231, "learning_rate": 3.797395998729756e-05, "loss": 0.0572, "step": 9771 }, { "epoch": 18.601046124583927, "grad_norm": 0.15677066147327423, "learning_rate": 3.7967608764687203e-05, "loss": 0.0614, "step": 9772 }, { "epoch": 18.602948169281976, "grad_norm": 0.06385092437267303, "learning_rate": 3.796125754207685e-05, "loss": 0.0552, "step": 9773 }, { "epoch": 18.60485021398003, "grad_norm": 0.08093471825122833, "learning_rate": 3.79549063194665e-05, "loss": 0.0529, "step": 9774 }, { "epoch": 18.60675225867808, "grad_norm": 0.09495603293180466, "learning_rate": 3.7948555096856145e-05, "loss": 0.0734, "step": 9775 }, { "epoch": 18.60865430337613, "grad_norm": 0.05810278654098511, "learning_rate": 3.79422038742458e-05, "loss": 0.0792, "step": 9776 }, { "epoch": 18.61055634807418, "grad_norm": 0.09176381677389145, "learning_rate": 3.793585265163544e-05, "loss": 0.0643, "step": 9777 }, { "epoch": 18.61245839277223, "grad_norm": 0.16379646956920624, "learning_rate": 3.792950142902509e-05, "loss": 0.0446, "step": 9778 }, { "epoch": 18.614360437470282, "grad_norm": 0.1531377136707306, "learning_rate": 3.792315020641474e-05, "loss": 0.0583, "step": 9779 }, { "epoch": 18.61626248216833, "grad_norm": 0.09140586107969284, "learning_rate": 3.7916798983804384e-05, "loss": 0.0493, "step": 9780 }, { "epoch": 18.61816452686638, "grad_norm": 0.10958456248044968, "learning_rate": 3.791044776119403e-05, "loss": 0.0586, "step": 9781 }, { "epoch": 18.620066571564433, "grad_norm": 0.19568602740764618, "learning_rate": 3.7904096538583674e-05, "loss": 0.0589, "step": 9782 }, { "epoch": 18.621968616262482, "grad_norm": 0.14151211082935333, "learning_rate": 3.7897745315973326e-05, "loss": 0.0544, "step": 9783 }, { "epoch": 18.62387066096053, "grad_norm": 0.05674430727958679, "learning_rate": 3.789139409336298e-05, "loss": 0.0442, "step": 9784 }, { "epoch": 18.625772705658584, "grad_norm": 0.16973741352558136, "learning_rate": 3.788504287075262e-05, "loss": 0.0504, "step": 9785 }, { "epoch": 18.627674750356633, "grad_norm": 0.24356374144554138, "learning_rate": 3.787869164814227e-05, "loss": 0.0613, "step": 9786 }, { "epoch": 18.629576795054685, "grad_norm": 0.10595981031656265, "learning_rate": 3.787234042553191e-05, "loss": 0.0528, "step": 9787 }, { "epoch": 18.631478839752734, "grad_norm": 0.19772733747959137, "learning_rate": 3.7865989202921565e-05, "loss": 0.0585, "step": 9788 }, { "epoch": 18.633380884450784, "grad_norm": 0.05060689151287079, "learning_rate": 3.785963798031121e-05, "loss": 0.0452, "step": 9789 }, { "epoch": 18.635282929148836, "grad_norm": 0.3703983724117279, "learning_rate": 3.7853286757700855e-05, "loss": 0.0902, "step": 9790 }, { "epoch": 18.637184973846885, "grad_norm": 0.17736543715000153, "learning_rate": 3.784693553509051e-05, "loss": 0.0503, "step": 9791 }, { "epoch": 18.639087018544934, "grad_norm": 0.13419793546199799, "learning_rate": 3.784058431248015e-05, "loss": 0.0479, "step": 9792 }, { "epoch": 18.640989063242987, "grad_norm": 0.08425474166870117, "learning_rate": 3.7834233089869804e-05, "loss": 0.0586, "step": 9793 }, { "epoch": 18.642891107941036, "grad_norm": 0.2423335611820221, "learning_rate": 3.782788186725945e-05, "loss": 0.0635, "step": 9794 }, { "epoch": 18.644793152639085, "grad_norm": 0.09958801418542862, "learning_rate": 3.7821530644649094e-05, "loss": 0.0569, "step": 9795 }, { "epoch": 18.646695197337138, "grad_norm": 0.0595712773501873, "learning_rate": 3.7815179422038746e-05, "loss": 0.0542, "step": 9796 }, { "epoch": 18.648597242035187, "grad_norm": 0.13254064321517944, "learning_rate": 3.780882819942839e-05, "loss": 0.0587, "step": 9797 }, { "epoch": 18.65049928673324, "grad_norm": 0.0418279692530632, "learning_rate": 3.780247697681804e-05, "loss": 0.0637, "step": 9798 }, { "epoch": 18.65240133143129, "grad_norm": 0.1887357085943222, "learning_rate": 3.779612575420768e-05, "loss": 0.061, "step": 9799 }, { "epoch": 18.654303376129338, "grad_norm": 0.07455997169017792, "learning_rate": 3.778977453159733e-05, "loss": 0.0563, "step": 9800 }, { "epoch": 18.65620542082739, "grad_norm": 0.1308373212814331, "learning_rate": 3.7783423308986985e-05, "loss": 0.0601, "step": 9801 }, { "epoch": 18.65810746552544, "grad_norm": 0.13394196331501007, "learning_rate": 3.777707208637663e-05, "loss": 0.0591, "step": 9802 }, { "epoch": 18.66000951022349, "grad_norm": 0.12642307579517365, "learning_rate": 3.777072086376628e-05, "loss": 0.0466, "step": 9803 }, { "epoch": 18.66191155492154, "grad_norm": 0.20655491948127747, "learning_rate": 3.776436964115592e-05, "loss": 0.0612, "step": 9804 }, { "epoch": 18.66381359961959, "grad_norm": 0.045045897364616394, "learning_rate": 3.775801841854557e-05, "loss": 0.06, "step": 9805 }, { "epoch": 18.665715644317643, "grad_norm": 0.16911624372005463, "learning_rate": 3.775166719593522e-05, "loss": 0.0519, "step": 9806 }, { "epoch": 18.667617689015692, "grad_norm": 0.10162369161844254, "learning_rate": 3.774531597332487e-05, "loss": 0.0657, "step": 9807 }, { "epoch": 18.66951973371374, "grad_norm": 0.06274081021547318, "learning_rate": 3.7738964750714514e-05, "loss": 0.0473, "step": 9808 }, { "epoch": 18.671421778411794, "grad_norm": 0.1434098482131958, "learning_rate": 3.773261352810416e-05, "loss": 0.0669, "step": 9809 }, { "epoch": 18.673323823109843, "grad_norm": 0.29461508989334106, "learning_rate": 3.772626230549381e-05, "loss": 0.053, "step": 9810 }, { "epoch": 18.675225867807892, "grad_norm": 0.12209858745336533, "learning_rate": 3.7719911082883456e-05, "loss": 0.0585, "step": 9811 }, { "epoch": 18.677127912505945, "grad_norm": 0.16458265483379364, "learning_rate": 3.771355986027311e-05, "loss": 0.0582, "step": 9812 }, { "epoch": 18.679029957203994, "grad_norm": 0.08619740605354309, "learning_rate": 3.770720863766275e-05, "loss": 0.0412, "step": 9813 }, { "epoch": 18.680932001902043, "grad_norm": 0.08477325737476349, "learning_rate": 3.77008574150524e-05, "loss": 0.0433, "step": 9814 }, { "epoch": 18.682834046600096, "grad_norm": 0.35625943541526794, "learning_rate": 3.769450619244205e-05, "loss": 0.0609, "step": 9815 }, { "epoch": 18.684736091298145, "grad_norm": 0.08370004594326019, "learning_rate": 3.7688154969831695e-05, "loss": 0.0459, "step": 9816 }, { "epoch": 18.686638135996198, "grad_norm": 0.3711557388305664, "learning_rate": 3.768180374722134e-05, "loss": 0.0722, "step": 9817 }, { "epoch": 18.688540180694247, "grad_norm": 0.17637227475643158, "learning_rate": 3.7675452524610985e-05, "loss": 0.0735, "step": 9818 }, { "epoch": 18.690442225392296, "grad_norm": 0.06635082513093948, "learning_rate": 3.766910130200064e-05, "loss": 0.0682, "step": 9819 }, { "epoch": 18.69234427009035, "grad_norm": 0.061188675463199615, "learning_rate": 3.766275007939029e-05, "loss": 0.0557, "step": 9820 }, { "epoch": 18.694246314788398, "grad_norm": 0.06702593713998795, "learning_rate": 3.7656398856779934e-05, "loss": 0.0534, "step": 9821 }, { "epoch": 18.696148359486447, "grad_norm": 0.18100468814373016, "learning_rate": 3.765004763416958e-05, "loss": 0.0593, "step": 9822 }, { "epoch": 18.6980504041845, "grad_norm": 0.09489830583333969, "learning_rate": 3.7643696411559224e-05, "loss": 0.07, "step": 9823 }, { "epoch": 18.69995244888255, "grad_norm": 0.10796690732240677, "learning_rate": 3.7637345188948876e-05, "loss": 0.0696, "step": 9824 }, { "epoch": 18.701854493580598, "grad_norm": 0.09641650319099426, "learning_rate": 3.763099396633852e-05, "loss": 0.0543, "step": 9825 }, { "epoch": 18.70375653827865, "grad_norm": 0.17483478784561157, "learning_rate": 3.7624642743728166e-05, "loss": 0.0752, "step": 9826 }, { "epoch": 18.7056585829767, "grad_norm": 0.07531461119651794, "learning_rate": 3.761829152111782e-05, "loss": 0.0588, "step": 9827 }, { "epoch": 18.707560627674752, "grad_norm": 0.11430740356445312, "learning_rate": 3.761194029850746e-05, "loss": 0.0515, "step": 9828 }, { "epoch": 18.7094626723728, "grad_norm": 0.10490404814481735, "learning_rate": 3.7605589075897114e-05, "loss": 0.0527, "step": 9829 }, { "epoch": 18.71136471707085, "grad_norm": 0.24117016792297363, "learning_rate": 3.759923785328676e-05, "loss": 0.0681, "step": 9830 }, { "epoch": 18.713266761768903, "grad_norm": 0.05267453193664551, "learning_rate": 3.7592886630676405e-05, "loss": 0.0641, "step": 9831 }, { "epoch": 18.715168806466952, "grad_norm": 0.13616354763507843, "learning_rate": 3.7586535408066056e-05, "loss": 0.0584, "step": 9832 }, { "epoch": 18.717070851165, "grad_norm": 0.045377179980278015, "learning_rate": 3.75801841854557e-05, "loss": 0.0579, "step": 9833 }, { "epoch": 18.718972895863054, "grad_norm": 0.30514270067214966, "learning_rate": 3.757383296284535e-05, "loss": 0.0697, "step": 9834 }, { "epoch": 18.720874940561103, "grad_norm": 0.19669856131076813, "learning_rate": 3.756748174023499e-05, "loss": 0.067, "step": 9835 }, { "epoch": 18.722776985259152, "grad_norm": 0.13380081951618195, "learning_rate": 3.7561130517624643e-05, "loss": 0.0563, "step": 9836 }, { "epoch": 18.724679029957205, "grad_norm": 0.0629597082734108, "learning_rate": 3.755477929501429e-05, "loss": 0.0643, "step": 9837 }, { "epoch": 18.726581074655254, "grad_norm": 0.11358380317687988, "learning_rate": 3.754842807240394e-05, "loss": 0.0594, "step": 9838 }, { "epoch": 18.728483119353307, "grad_norm": 0.28481239080429077, "learning_rate": 3.754207684979359e-05, "loss": 0.0534, "step": 9839 }, { "epoch": 18.730385164051356, "grad_norm": 0.4196884334087372, "learning_rate": 3.753572562718323e-05, "loss": 0.0625, "step": 9840 }, { "epoch": 18.732287208749405, "grad_norm": 0.24829143285751343, "learning_rate": 3.752937440457288e-05, "loss": 0.062, "step": 9841 }, { "epoch": 18.734189253447457, "grad_norm": 0.047377172857522964, "learning_rate": 3.752302318196253e-05, "loss": 0.0363, "step": 9842 }, { "epoch": 18.736091298145507, "grad_norm": 0.07733766734600067, "learning_rate": 3.751667195935218e-05, "loss": 0.0448, "step": 9843 }, { "epoch": 18.737993342843556, "grad_norm": 0.1072051152586937, "learning_rate": 3.7510320736741824e-05, "loss": 0.0521, "step": 9844 }, { "epoch": 18.73989538754161, "grad_norm": 0.08531222492456436, "learning_rate": 3.750396951413147e-05, "loss": 0.0828, "step": 9845 }, { "epoch": 18.741797432239657, "grad_norm": 0.091969795525074, "learning_rate": 3.749761829152112e-05, "loss": 0.0317, "step": 9846 }, { "epoch": 18.743699476937707, "grad_norm": 0.061905376613140106, "learning_rate": 3.7491267068910766e-05, "loss": 0.0319, "step": 9847 }, { "epoch": 18.74560152163576, "grad_norm": 0.17943806946277618, "learning_rate": 3.748491584630042e-05, "loss": 0.0665, "step": 9848 }, { "epoch": 18.74750356633381, "grad_norm": 0.06048533692955971, "learning_rate": 3.747856462369006e-05, "loss": 0.0466, "step": 9849 }, { "epoch": 18.74940561103186, "grad_norm": 0.07251735031604767, "learning_rate": 3.747221340107971e-05, "loss": 0.0579, "step": 9850 }, { "epoch": 18.75130765572991, "grad_norm": 0.07588538527488708, "learning_rate": 3.746586217846936e-05, "loss": 0.0602, "step": 9851 }, { "epoch": 18.75320970042796, "grad_norm": 0.2024068981409073, "learning_rate": 3.7459510955859005e-05, "loss": 0.0633, "step": 9852 }, { "epoch": 18.755111745126012, "grad_norm": 0.06142650172114372, "learning_rate": 3.745315973324865e-05, "loss": 0.0575, "step": 9853 }, { "epoch": 18.75701378982406, "grad_norm": 0.11343108862638474, "learning_rate": 3.7446808510638295e-05, "loss": 0.0657, "step": 9854 }, { "epoch": 18.75891583452211, "grad_norm": 0.2155323028564453, "learning_rate": 3.744045728802795e-05, "loss": 0.0611, "step": 9855 }, { "epoch": 18.760817879220163, "grad_norm": 0.11705797165632248, "learning_rate": 3.74341060654176e-05, "loss": 0.0512, "step": 9856 }, { "epoch": 18.762719923918212, "grad_norm": 0.09704883396625519, "learning_rate": 3.7427754842807244e-05, "loss": 0.0471, "step": 9857 }, { "epoch": 18.76462196861626, "grad_norm": 0.08368376642465591, "learning_rate": 3.742140362019689e-05, "loss": 0.0594, "step": 9858 }, { "epoch": 18.766524013314314, "grad_norm": 0.1207922101020813, "learning_rate": 3.7415052397586534e-05, "loss": 0.042, "step": 9859 }, { "epoch": 18.768426058012363, "grad_norm": 0.039832353591918945, "learning_rate": 3.7408701174976186e-05, "loss": 0.053, "step": 9860 }, { "epoch": 18.770328102710415, "grad_norm": 0.045956000685691833, "learning_rate": 3.740234995236583e-05, "loss": 0.0463, "step": 9861 }, { "epoch": 18.772230147408465, "grad_norm": 0.12723292410373688, "learning_rate": 3.7395998729755476e-05, "loss": 0.0632, "step": 9862 }, { "epoch": 18.774132192106514, "grad_norm": 0.04386424273252487, "learning_rate": 3.738964750714513e-05, "loss": 0.061, "step": 9863 }, { "epoch": 18.776034236804566, "grad_norm": 0.12253253161907196, "learning_rate": 3.738329628453477e-05, "loss": 0.0559, "step": 9864 }, { "epoch": 18.777936281502615, "grad_norm": 0.11969088017940521, "learning_rate": 3.7376945061924425e-05, "loss": 0.069, "step": 9865 }, { "epoch": 18.779838326200665, "grad_norm": 0.252768874168396, "learning_rate": 3.737059383931407e-05, "loss": 0.0745, "step": 9866 }, { "epoch": 18.781740370898717, "grad_norm": 0.1536416858434677, "learning_rate": 3.7364242616703715e-05, "loss": 0.0688, "step": 9867 }, { "epoch": 18.783642415596766, "grad_norm": 0.16543470323085785, "learning_rate": 3.735789139409337e-05, "loss": 0.0532, "step": 9868 }, { "epoch": 18.785544460294815, "grad_norm": 0.14163970947265625, "learning_rate": 3.735154017148301e-05, "loss": 0.0601, "step": 9869 }, { "epoch": 18.787446504992868, "grad_norm": 0.11656033247709274, "learning_rate": 3.7345188948872664e-05, "loss": 0.0668, "step": 9870 }, { "epoch": 18.789348549690917, "grad_norm": 0.0778069794178009, "learning_rate": 3.73388377262623e-05, "loss": 0.0359, "step": 9871 }, { "epoch": 18.79125059438897, "grad_norm": 0.2035890817642212, "learning_rate": 3.7332486503651954e-05, "loss": 0.06, "step": 9872 }, { "epoch": 18.79315263908702, "grad_norm": 0.040098268538713455, "learning_rate": 3.73261352810416e-05, "loss": 0.0392, "step": 9873 }, { "epoch": 18.795054683785068, "grad_norm": 0.12130275368690491, "learning_rate": 3.731978405843125e-05, "loss": 0.053, "step": 9874 }, { "epoch": 18.79695672848312, "grad_norm": 0.2116493582725525, "learning_rate": 3.73134328358209e-05, "loss": 0.0629, "step": 9875 }, { "epoch": 18.79885877318117, "grad_norm": 0.19741222262382507, "learning_rate": 3.730708161321054e-05, "loss": 0.0778, "step": 9876 }, { "epoch": 18.80076081787922, "grad_norm": 0.09970089048147202, "learning_rate": 3.730073039060019e-05, "loss": 0.0457, "step": 9877 }, { "epoch": 18.80266286257727, "grad_norm": 0.15264080464839935, "learning_rate": 3.729437916798984e-05, "loss": 0.0455, "step": 9878 }, { "epoch": 18.80456490727532, "grad_norm": 0.1529812514781952, "learning_rate": 3.728802794537949e-05, "loss": 0.0518, "step": 9879 }, { "epoch": 18.80646695197337, "grad_norm": 0.18237809836864471, "learning_rate": 3.7281676722769135e-05, "loss": 0.0594, "step": 9880 }, { "epoch": 18.808368996671422, "grad_norm": 0.23065491020679474, "learning_rate": 3.727532550015878e-05, "loss": 0.0785, "step": 9881 }, { "epoch": 18.81027104136947, "grad_norm": 0.05364197492599487, "learning_rate": 3.726897427754843e-05, "loss": 0.0384, "step": 9882 }, { "epoch": 18.812173086067524, "grad_norm": 0.05554148554801941, "learning_rate": 3.726262305493808e-05, "loss": 0.057, "step": 9883 }, { "epoch": 18.814075130765573, "grad_norm": 0.1097605749964714, "learning_rate": 3.725627183232773e-05, "loss": 0.0664, "step": 9884 }, { "epoch": 18.815977175463622, "grad_norm": 0.05670863762497902, "learning_rate": 3.7249920609717374e-05, "loss": 0.0581, "step": 9885 }, { "epoch": 18.817879220161675, "grad_norm": 0.09237883985042572, "learning_rate": 3.724356938710702e-05, "loss": 0.0391, "step": 9886 }, { "epoch": 18.819781264859724, "grad_norm": 0.12565287947654724, "learning_rate": 3.723721816449667e-05, "loss": 0.069, "step": 9887 }, { "epoch": 18.821683309557773, "grad_norm": 0.24651116132736206, "learning_rate": 3.7230866941886316e-05, "loss": 0.0576, "step": 9888 }, { "epoch": 18.823585354255826, "grad_norm": 0.21622343361377716, "learning_rate": 3.722451571927596e-05, "loss": 0.0798, "step": 9889 }, { "epoch": 18.825487398953875, "grad_norm": 0.12411265820264816, "learning_rate": 3.7218164496665606e-05, "loss": 0.064, "step": 9890 }, { "epoch": 18.827389443651924, "grad_norm": 0.1262037754058838, "learning_rate": 3.721181327405526e-05, "loss": 0.0737, "step": 9891 }, { "epoch": 18.829291488349977, "grad_norm": 0.15461592376232147, "learning_rate": 3.72054620514449e-05, "loss": 0.0531, "step": 9892 }, { "epoch": 18.831193533048026, "grad_norm": 0.598727822303772, "learning_rate": 3.7199110828834555e-05, "loss": 0.0751, "step": 9893 }, { "epoch": 18.83309557774608, "grad_norm": 0.1309041827917099, "learning_rate": 3.71927596062242e-05, "loss": 0.0494, "step": 9894 }, { "epoch": 18.834997622444128, "grad_norm": 0.1532328873872757, "learning_rate": 3.7186408383613845e-05, "loss": 0.0464, "step": 9895 }, { "epoch": 18.836899667142177, "grad_norm": 0.14370164275169373, "learning_rate": 3.7180057161003496e-05, "loss": 0.0651, "step": 9896 }, { "epoch": 18.83880171184023, "grad_norm": 0.14575162529945374, "learning_rate": 3.717370593839314e-05, "loss": 0.0625, "step": 9897 }, { "epoch": 18.84070375653828, "grad_norm": 0.06873523443937302, "learning_rate": 3.7167354715782787e-05, "loss": 0.0535, "step": 9898 }, { "epoch": 18.842605801236328, "grad_norm": 0.10545411705970764, "learning_rate": 3.716100349317244e-05, "loss": 0.0578, "step": 9899 }, { "epoch": 18.84450784593438, "grad_norm": 0.08575357496738434, "learning_rate": 3.7154652270562084e-05, "loss": 0.053, "step": 9900 }, { "epoch": 18.84640989063243, "grad_norm": 0.13706247508525848, "learning_rate": 3.7148301047951735e-05, "loss": 0.0733, "step": 9901 }, { "epoch": 18.84831193533048, "grad_norm": 0.14186565577983856, "learning_rate": 3.714194982534138e-05, "loss": 0.0816, "step": 9902 }, { "epoch": 18.85021398002853, "grad_norm": 0.08031070232391357, "learning_rate": 3.7135598602731025e-05, "loss": 0.0756, "step": 9903 }, { "epoch": 18.85211602472658, "grad_norm": 0.0668841004371643, "learning_rate": 3.712924738012068e-05, "loss": 0.0619, "step": 9904 }, { "epoch": 18.854018069424633, "grad_norm": 0.1793297678232193, "learning_rate": 3.712289615751032e-05, "loss": 0.0636, "step": 9905 }, { "epoch": 18.855920114122682, "grad_norm": 0.1464206874370575, "learning_rate": 3.7116544934899974e-05, "loss": 0.0615, "step": 9906 }, { "epoch": 18.85782215882073, "grad_norm": 0.07394376397132874, "learning_rate": 3.711019371228961e-05, "loss": 0.05, "step": 9907 }, { "epoch": 18.859724203518784, "grad_norm": 0.07852068543434143, "learning_rate": 3.7103842489679264e-05, "loss": 0.0643, "step": 9908 }, { "epoch": 18.861626248216833, "grad_norm": 0.2197633534669876, "learning_rate": 3.709749126706891e-05, "loss": 0.062, "step": 9909 }, { "epoch": 18.863528292914882, "grad_norm": 0.07696213573217392, "learning_rate": 3.709114004445856e-05, "loss": 0.0222, "step": 9910 }, { "epoch": 18.865430337612935, "grad_norm": 0.06208226457238197, "learning_rate": 3.708478882184821e-05, "loss": 0.0553, "step": 9911 }, { "epoch": 18.867332382310984, "grad_norm": 0.16470803320407867, "learning_rate": 3.707843759923785e-05, "loss": 0.0664, "step": 9912 }, { "epoch": 18.869234427009033, "grad_norm": 0.056323740631341934, "learning_rate": 3.70720863766275e-05, "loss": 0.061, "step": 9913 }, { "epoch": 18.871136471707086, "grad_norm": 0.038745343685150146, "learning_rate": 3.706573515401715e-05, "loss": 0.0502, "step": 9914 }, { "epoch": 18.873038516405135, "grad_norm": 0.13453522324562073, "learning_rate": 3.70593839314068e-05, "loss": 0.0754, "step": 9915 }, { "epoch": 18.874940561103188, "grad_norm": 0.1173306480050087, "learning_rate": 3.7053032708796445e-05, "loss": 0.0605, "step": 9916 }, { "epoch": 18.876842605801237, "grad_norm": 0.10980434715747833, "learning_rate": 3.704668148618609e-05, "loss": 0.0537, "step": 9917 }, { "epoch": 18.878744650499286, "grad_norm": 0.19415007531642914, "learning_rate": 3.704033026357574e-05, "loss": 0.0584, "step": 9918 }, { "epoch": 18.88064669519734, "grad_norm": 0.08781924843788147, "learning_rate": 3.703397904096539e-05, "loss": 0.0435, "step": 9919 }, { "epoch": 18.882548739895388, "grad_norm": 0.26694977283477783, "learning_rate": 3.702762781835504e-05, "loss": 0.0606, "step": 9920 }, { "epoch": 18.884450784593437, "grad_norm": 0.14742548763751984, "learning_rate": 3.702127659574468e-05, "loss": 0.047, "step": 9921 }, { "epoch": 18.88635282929149, "grad_norm": 0.04659523442387581, "learning_rate": 3.701492537313433e-05, "loss": 0.0386, "step": 9922 }, { "epoch": 18.88825487398954, "grad_norm": 0.3608655631542206, "learning_rate": 3.700857415052398e-05, "loss": 0.0619, "step": 9923 }, { "epoch": 18.890156918687588, "grad_norm": 0.05233491584658623, "learning_rate": 3.7002222927913626e-05, "loss": 0.0539, "step": 9924 }, { "epoch": 18.89205896338564, "grad_norm": 0.16963283717632294, "learning_rate": 3.699587170530327e-05, "loss": 0.0695, "step": 9925 }, { "epoch": 18.89396100808369, "grad_norm": 0.04924313351511955, "learning_rate": 3.6989520482692916e-05, "loss": 0.069, "step": 9926 }, { "epoch": 18.895863052781742, "grad_norm": 0.1375628262758255, "learning_rate": 3.698316926008257e-05, "loss": 0.0702, "step": 9927 }, { "epoch": 18.89776509747979, "grad_norm": 0.10310663282871246, "learning_rate": 3.697681803747221e-05, "loss": 0.0487, "step": 9928 }, { "epoch": 18.89966714217784, "grad_norm": 0.04487544670701027, "learning_rate": 3.6970466814861865e-05, "loss": 0.0444, "step": 9929 }, { "epoch": 18.901569186875893, "grad_norm": 0.15529364347457886, "learning_rate": 3.696411559225151e-05, "loss": 0.0508, "step": 9930 }, { "epoch": 18.903471231573942, "grad_norm": 0.08870761096477509, "learning_rate": 3.6957764369641155e-05, "loss": 0.0575, "step": 9931 }, { "epoch": 18.90537327627199, "grad_norm": 0.06250828504562378, "learning_rate": 3.695141314703081e-05, "loss": 0.0562, "step": 9932 }, { "epoch": 18.907275320970044, "grad_norm": 0.08538121730089188, "learning_rate": 3.694506192442045e-05, "loss": 0.0466, "step": 9933 }, { "epoch": 18.909177365668093, "grad_norm": 0.0566830113530159, "learning_rate": 3.69387107018101e-05, "loss": 0.0609, "step": 9934 }, { "epoch": 18.911079410366142, "grad_norm": 0.04256056994199753, "learning_rate": 3.693235947919975e-05, "loss": 0.0433, "step": 9935 }, { "epoch": 18.912981455064195, "grad_norm": 0.0648518055677414, "learning_rate": 3.6926008256589394e-05, "loss": 0.0739, "step": 9936 }, { "epoch": 18.914883499762244, "grad_norm": 0.14472368359565735, "learning_rate": 3.6919657033979046e-05, "loss": 0.0407, "step": 9937 }, { "epoch": 18.916785544460296, "grad_norm": 0.14395765960216522, "learning_rate": 3.691330581136869e-05, "loss": 0.0698, "step": 9938 }, { "epoch": 18.918687589158345, "grad_norm": 0.08163261413574219, "learning_rate": 3.6906954588758336e-05, "loss": 0.0626, "step": 9939 }, { "epoch": 18.920589633856395, "grad_norm": 0.12085986137390137, "learning_rate": 3.690060336614799e-05, "loss": 0.0368, "step": 9940 }, { "epoch": 18.922491678554447, "grad_norm": 0.11507006734609604, "learning_rate": 3.689425214353763e-05, "loss": 0.068, "step": 9941 }, { "epoch": 18.924393723252496, "grad_norm": 0.10598558932542801, "learning_rate": 3.6887900920927285e-05, "loss": 0.0858, "step": 9942 }, { "epoch": 18.926295767950545, "grad_norm": 0.13431620597839355, "learning_rate": 3.688154969831692e-05, "loss": 0.0557, "step": 9943 }, { "epoch": 18.928197812648598, "grad_norm": 0.1234978586435318, "learning_rate": 3.6875198475706575e-05, "loss": 0.0528, "step": 9944 }, { "epoch": 18.930099857346647, "grad_norm": 0.34976011514663696, "learning_rate": 3.686884725309622e-05, "loss": 0.1134, "step": 9945 }, { "epoch": 18.932001902044696, "grad_norm": 0.1423528492450714, "learning_rate": 3.686249603048587e-05, "loss": 0.0527, "step": 9946 }, { "epoch": 18.93390394674275, "grad_norm": 0.3800347149372101, "learning_rate": 3.685614480787552e-05, "loss": 0.0717, "step": 9947 }, { "epoch": 18.935805991440798, "grad_norm": 0.09121809899806976, "learning_rate": 3.684979358526516e-05, "loss": 0.0526, "step": 9948 }, { "epoch": 18.93770803613885, "grad_norm": 0.17573939263820648, "learning_rate": 3.6843442362654814e-05, "loss": 0.0665, "step": 9949 }, { "epoch": 18.9396100808369, "grad_norm": 0.09202369302511215, "learning_rate": 3.683709114004446e-05, "loss": 0.0464, "step": 9950 }, { "epoch": 18.94151212553495, "grad_norm": 0.1471950262784958, "learning_rate": 3.683073991743411e-05, "loss": 0.0745, "step": 9951 }, { "epoch": 18.943414170233, "grad_norm": 0.16292431950569153, "learning_rate": 3.6824388694823756e-05, "loss": 0.0447, "step": 9952 }, { "epoch": 18.94531621493105, "grad_norm": 0.1641230583190918, "learning_rate": 3.68180374722134e-05, "loss": 0.0566, "step": 9953 }, { "epoch": 18.9472182596291, "grad_norm": 0.1794688105583191, "learning_rate": 3.681168624960305e-05, "loss": 0.0677, "step": 9954 }, { "epoch": 18.949120304327153, "grad_norm": 0.10057643055915833, "learning_rate": 3.68053350269927e-05, "loss": 0.056, "step": 9955 }, { "epoch": 18.9510223490252, "grad_norm": 0.08977890014648438, "learning_rate": 3.679898380438235e-05, "loss": 0.0816, "step": 9956 }, { "epoch": 18.95292439372325, "grad_norm": 0.1806146204471588, "learning_rate": 3.679263258177199e-05, "loss": 0.0523, "step": 9957 }, { "epoch": 18.954826438421303, "grad_norm": 0.1459428369998932, "learning_rate": 3.678628135916164e-05, "loss": 0.0549, "step": 9958 }, { "epoch": 18.956728483119353, "grad_norm": 0.08471745997667313, "learning_rate": 3.677993013655129e-05, "loss": 0.064, "step": 9959 }, { "epoch": 18.958630527817405, "grad_norm": 0.06056053936481476, "learning_rate": 3.6773578913940937e-05, "loss": 0.0492, "step": 9960 }, { "epoch": 18.960532572515454, "grad_norm": 0.09086448699235916, "learning_rate": 3.676722769133058e-05, "loss": 0.0583, "step": 9961 }, { "epoch": 18.962434617213503, "grad_norm": 0.10881451517343521, "learning_rate": 3.676087646872023e-05, "loss": 0.0544, "step": 9962 }, { "epoch": 18.964336661911556, "grad_norm": 0.1478363275527954, "learning_rate": 3.675452524610988e-05, "loss": 0.0497, "step": 9963 }, { "epoch": 18.966238706609605, "grad_norm": 0.19032606482505798, "learning_rate": 3.6748174023499524e-05, "loss": 0.0643, "step": 9964 }, { "epoch": 18.968140751307654, "grad_norm": 0.13042497634887695, "learning_rate": 3.6741822800889175e-05, "loss": 0.0437, "step": 9965 }, { "epoch": 18.970042796005707, "grad_norm": 0.0954694151878357, "learning_rate": 3.673547157827882e-05, "loss": 0.0472, "step": 9966 }, { "epoch": 18.971944840703756, "grad_norm": 0.06691458821296692, "learning_rate": 3.6729120355668466e-05, "loss": 0.0587, "step": 9967 }, { "epoch": 18.973846885401805, "grad_norm": 0.13316847383975983, "learning_rate": 3.672276913305812e-05, "loss": 0.0685, "step": 9968 }, { "epoch": 18.975748930099858, "grad_norm": 0.21592672169208527, "learning_rate": 3.671641791044776e-05, "loss": 0.0573, "step": 9969 }, { "epoch": 18.977650974797907, "grad_norm": 0.10553871840238571, "learning_rate": 3.671006668783741e-05, "loss": 0.0456, "step": 9970 }, { "epoch": 18.97955301949596, "grad_norm": 0.27601495385169983, "learning_rate": 3.670371546522706e-05, "loss": 0.068, "step": 9971 }, { "epoch": 18.98145506419401, "grad_norm": 0.09027394652366638, "learning_rate": 3.6697364242616704e-05, "loss": 0.0466, "step": 9972 }, { "epoch": 18.983357108892058, "grad_norm": 0.05911664292216301, "learning_rate": 3.6691013020006356e-05, "loss": 0.0467, "step": 9973 }, { "epoch": 18.98525915359011, "grad_norm": 0.10544353723526001, "learning_rate": 3.6684661797396e-05, "loss": 0.0597, "step": 9974 }, { "epoch": 18.98716119828816, "grad_norm": 0.09973032772541046, "learning_rate": 3.6678310574785646e-05, "loss": 0.0438, "step": 9975 }, { "epoch": 18.98906324298621, "grad_norm": 0.24099227786064148, "learning_rate": 3.667195935217529e-05, "loss": 0.0803, "step": 9976 }, { "epoch": 18.99096528768426, "grad_norm": 0.11139773577451706, "learning_rate": 3.666560812956494e-05, "loss": 0.0556, "step": 9977 }, { "epoch": 18.99286733238231, "grad_norm": 0.08617474138736725, "learning_rate": 3.6659256906954595e-05, "loss": 0.0469, "step": 9978 }, { "epoch": 18.99476937708036, "grad_norm": 0.12138453125953674, "learning_rate": 3.6652905684344233e-05, "loss": 0.0576, "step": 9979 }, { "epoch": 18.996671421778412, "grad_norm": 0.10799018293619156, "learning_rate": 3.6646554461733885e-05, "loss": 0.0406, "step": 9980 }, { "epoch": 18.99857346647646, "grad_norm": 0.12339373677968979, "learning_rate": 3.664020323912353e-05, "loss": 0.0572, "step": 9981 }, { "epoch": 19.000475511174514, "grad_norm": 0.39112773537635803, "learning_rate": 3.663385201651318e-05, "loss": 0.0725, "step": 9982 }, { "epoch": 19.002377555872563, "grad_norm": 0.05707325041294098, "learning_rate": 3.662750079390283e-05, "loss": 0.0684, "step": 9983 }, { "epoch": 19.004279600570612, "grad_norm": 0.0484582856297493, "learning_rate": 3.662114957129247e-05, "loss": 0.0501, "step": 9984 }, { "epoch": 19.006181645268665, "grad_norm": 0.20799849927425385, "learning_rate": 3.6614798348682124e-05, "loss": 0.0806, "step": 9985 }, { "epoch": 19.008083689966714, "grad_norm": 0.11114415526390076, "learning_rate": 3.660844712607177e-05, "loss": 0.0699, "step": 9986 }, { "epoch": 19.009985734664763, "grad_norm": 0.06936005502939224, "learning_rate": 3.660209590346142e-05, "loss": 0.0785, "step": 9987 }, { "epoch": 19.011887779362816, "grad_norm": 0.17966769635677338, "learning_rate": 3.6595744680851066e-05, "loss": 0.0642, "step": 9988 }, { "epoch": 19.013789824060865, "grad_norm": 0.057487085461616516, "learning_rate": 3.658939345824071e-05, "loss": 0.0541, "step": 9989 }, { "epoch": 19.015691868758918, "grad_norm": 0.215240940451622, "learning_rate": 3.658304223563036e-05, "loss": 0.0479, "step": 9990 }, { "epoch": 19.017593913456967, "grad_norm": 0.11331439763307571, "learning_rate": 3.657669101302001e-05, "loss": 0.0383, "step": 9991 }, { "epoch": 19.019495958155016, "grad_norm": 0.10905072838068008, "learning_rate": 3.657033979040966e-05, "loss": 0.0314, "step": 9992 }, { "epoch": 19.02139800285307, "grad_norm": 0.08001358062028885, "learning_rate": 3.65639885677993e-05, "loss": 0.0359, "step": 9993 }, { "epoch": 19.023300047551118, "grad_norm": 0.0985516756772995, "learning_rate": 3.655763734518895e-05, "loss": 0.0513, "step": 9994 }, { "epoch": 19.025202092249167, "grad_norm": 0.1317702978849411, "learning_rate": 3.65512861225786e-05, "loss": 0.0576, "step": 9995 }, { "epoch": 19.02710413694722, "grad_norm": 0.33076581358909607, "learning_rate": 3.654493489996825e-05, "loss": 0.0773, "step": 9996 }, { "epoch": 19.02900618164527, "grad_norm": 0.06298033893108368, "learning_rate": 3.653858367735789e-05, "loss": 0.0953, "step": 9997 }, { "epoch": 19.030908226343318, "grad_norm": 0.049048468470573425, "learning_rate": 3.653223245474754e-05, "loss": 0.0387, "step": 9998 }, { "epoch": 19.03281027104137, "grad_norm": 0.14701910316944122, "learning_rate": 3.652588123213719e-05, "loss": 0.0793, "step": 9999 }, { "epoch": 19.03471231573942, "grad_norm": 0.16040882468223572, "learning_rate": 3.6519530009526834e-05, "loss": 0.0787, "step": 10000 }, { "epoch": 19.036614360437472, "grad_norm": 0.08740217983722687, "learning_rate": 3.6513178786916486e-05, "loss": 0.0637, "step": 10001 }, { "epoch": 19.03851640513552, "grad_norm": 0.20253409445285797, "learning_rate": 3.650682756430613e-05, "loss": 0.0554, "step": 10002 }, { "epoch": 19.04041844983357, "grad_norm": 0.06349057704210281, "learning_rate": 3.6500476341695776e-05, "loss": 0.0603, "step": 10003 }, { "epoch": 19.042320494531623, "grad_norm": 0.17448563873767853, "learning_rate": 3.649412511908543e-05, "loss": 0.0405, "step": 10004 }, { "epoch": 19.044222539229672, "grad_norm": 0.15310613811016083, "learning_rate": 3.648777389647507e-05, "loss": 0.0569, "step": 10005 }, { "epoch": 19.04612458392772, "grad_norm": 0.03419190272688866, "learning_rate": 3.648142267386472e-05, "loss": 0.0484, "step": 10006 }, { "epoch": 19.048026628625774, "grad_norm": 0.06719169020652771, "learning_rate": 3.647507145125437e-05, "loss": 0.0813, "step": 10007 }, { "epoch": 19.049928673323823, "grad_norm": 0.10958858579397202, "learning_rate": 3.6468720228644015e-05, "loss": 0.0634, "step": 10008 }, { "epoch": 19.051830718021872, "grad_norm": 0.12859925627708435, "learning_rate": 3.646236900603367e-05, "loss": 0.06, "step": 10009 }, { "epoch": 19.053732762719925, "grad_norm": 0.1362864226102829, "learning_rate": 3.645601778342331e-05, "loss": 0.0487, "step": 10010 }, { "epoch": 19.055634807417974, "grad_norm": 0.05309566110372543, "learning_rate": 3.644966656081296e-05, "loss": 0.0892, "step": 10011 }, { "epoch": 19.057536852116026, "grad_norm": 0.05769931152462959, "learning_rate": 3.64433153382026e-05, "loss": 0.041, "step": 10012 }, { "epoch": 19.059438896814076, "grad_norm": 0.0420185886323452, "learning_rate": 3.6436964115592254e-05, "loss": 0.0561, "step": 10013 }, { "epoch": 19.061340941512125, "grad_norm": 0.11808836460113525, "learning_rate": 3.6430612892981906e-05, "loss": 0.0507, "step": 10014 }, { "epoch": 19.063242986210177, "grad_norm": 0.10188145935535431, "learning_rate": 3.6424261670371544e-05, "loss": 0.0639, "step": 10015 }, { "epoch": 19.065145030908226, "grad_norm": 0.12096687406301498, "learning_rate": 3.6417910447761196e-05, "loss": 0.0582, "step": 10016 }, { "epoch": 19.067047075606276, "grad_norm": 0.07498153299093246, "learning_rate": 3.641155922515084e-05, "loss": 0.0539, "step": 10017 }, { "epoch": 19.068949120304328, "grad_norm": 0.15140143036842346, "learning_rate": 3.640520800254049e-05, "loss": 0.0543, "step": 10018 }, { "epoch": 19.070851165002377, "grad_norm": 0.11248353868722916, "learning_rate": 3.639885677993014e-05, "loss": 0.0522, "step": 10019 }, { "epoch": 19.072753209700426, "grad_norm": 0.13222387433052063, "learning_rate": 3.639250555731978e-05, "loss": 0.0698, "step": 10020 }, { "epoch": 19.07465525439848, "grad_norm": 0.09478526562452316, "learning_rate": 3.6386154334709435e-05, "loss": 0.041, "step": 10021 }, { "epoch": 19.076557299096528, "grad_norm": 0.049942344427108765, "learning_rate": 3.637980311209908e-05, "loss": 0.0501, "step": 10022 }, { "epoch": 19.07845934379458, "grad_norm": 0.08634361624717712, "learning_rate": 3.637345188948873e-05, "loss": 0.0555, "step": 10023 }, { "epoch": 19.08036138849263, "grad_norm": 0.14032310247421265, "learning_rate": 3.636710066687837e-05, "loss": 0.0462, "step": 10024 }, { "epoch": 19.08226343319068, "grad_norm": 0.117254339158535, "learning_rate": 3.636074944426802e-05, "loss": 0.0541, "step": 10025 }, { "epoch": 19.08416547788873, "grad_norm": 0.05373654142022133, "learning_rate": 3.6354398221657673e-05, "loss": 0.0446, "step": 10026 }, { "epoch": 19.08606752258678, "grad_norm": 0.11854445934295654, "learning_rate": 3.634804699904732e-05, "loss": 0.0596, "step": 10027 }, { "epoch": 19.08796956728483, "grad_norm": 0.18134529888629913, "learning_rate": 3.634169577643697e-05, "loss": 0.0587, "step": 10028 }, { "epoch": 19.089871611982883, "grad_norm": 0.10500608384609222, "learning_rate": 3.633534455382661e-05, "loss": 0.0683, "step": 10029 }, { "epoch": 19.09177365668093, "grad_norm": 0.10136354714632034, "learning_rate": 3.632899333121626e-05, "loss": 0.0458, "step": 10030 }, { "epoch": 19.09367570137898, "grad_norm": 0.13646340370178223, "learning_rate": 3.6322642108605906e-05, "loss": 0.0293, "step": 10031 }, { "epoch": 19.095577746077034, "grad_norm": 0.10555324703454971, "learning_rate": 3.631629088599556e-05, "loss": 0.0585, "step": 10032 }, { "epoch": 19.097479790775083, "grad_norm": 0.15480376780033112, "learning_rate": 3.63099396633852e-05, "loss": 0.0542, "step": 10033 }, { "epoch": 19.099381835473135, "grad_norm": 0.21401821076869965, "learning_rate": 3.630358844077485e-05, "loss": 0.0544, "step": 10034 }, { "epoch": 19.101283880171184, "grad_norm": 0.08610215783119202, "learning_rate": 3.62972372181645e-05, "loss": 0.0485, "step": 10035 }, { "epoch": 19.103185924869234, "grad_norm": 0.07398564368486404, "learning_rate": 3.6290885995554144e-05, "loss": 0.0466, "step": 10036 }, { "epoch": 19.105087969567286, "grad_norm": 0.05519363284111023, "learning_rate": 3.6284534772943796e-05, "loss": 0.0572, "step": 10037 }, { "epoch": 19.106990014265335, "grad_norm": 0.09300468862056732, "learning_rate": 3.627818355033344e-05, "loss": 0.0286, "step": 10038 }, { "epoch": 19.108892058963384, "grad_norm": 0.07158180326223373, "learning_rate": 3.6271832327723086e-05, "loss": 0.064, "step": 10039 }, { "epoch": 19.110794103661437, "grad_norm": 0.1243014857172966, "learning_rate": 3.626548110511274e-05, "loss": 0.0739, "step": 10040 }, { "epoch": 19.112696148359486, "grad_norm": 0.10833297669887543, "learning_rate": 3.625912988250238e-05, "loss": 0.0593, "step": 10041 }, { "epoch": 19.114598193057535, "grad_norm": 0.04479862004518509, "learning_rate": 3.625277865989203e-05, "loss": 0.0513, "step": 10042 }, { "epoch": 19.116500237755588, "grad_norm": 0.051182232797145844, "learning_rate": 3.624642743728168e-05, "loss": 0.0666, "step": 10043 }, { "epoch": 19.118402282453637, "grad_norm": 0.07713011652231216, "learning_rate": 3.6240076214671325e-05, "loss": 0.0575, "step": 10044 }, { "epoch": 19.12030432715169, "grad_norm": 0.11209198087453842, "learning_rate": 3.623372499206098e-05, "loss": 0.0653, "step": 10045 }, { "epoch": 19.12220637184974, "grad_norm": 0.1346711814403534, "learning_rate": 3.622737376945062e-05, "loss": 0.0348, "step": 10046 }, { "epoch": 19.124108416547788, "grad_norm": 0.09184443205595016, "learning_rate": 3.622102254684027e-05, "loss": 0.0624, "step": 10047 }, { "epoch": 19.12601046124584, "grad_norm": 0.08998651057481766, "learning_rate": 3.621467132422991e-05, "loss": 0.0458, "step": 10048 }, { "epoch": 19.12791250594389, "grad_norm": 0.16010458767414093, "learning_rate": 3.6208320101619564e-05, "loss": 0.0558, "step": 10049 }, { "epoch": 19.12981455064194, "grad_norm": 0.08595812320709229, "learning_rate": 3.6201968879009216e-05, "loss": 0.0514, "step": 10050 }, { "epoch": 19.13171659533999, "grad_norm": 0.09739263355731964, "learning_rate": 3.6195617656398854e-05, "loss": 0.0652, "step": 10051 }, { "epoch": 19.13361864003804, "grad_norm": 0.1122368648648262, "learning_rate": 3.6189266433788506e-05, "loss": 0.0547, "step": 10052 }, { "epoch": 19.13552068473609, "grad_norm": 0.16903916001319885, "learning_rate": 3.618291521117815e-05, "loss": 0.048, "step": 10053 }, { "epoch": 19.137422729434142, "grad_norm": 0.046694718301296234, "learning_rate": 3.61765639885678e-05, "loss": 0.0607, "step": 10054 }, { "epoch": 19.13932477413219, "grad_norm": 0.07032367587089539, "learning_rate": 3.617021276595745e-05, "loss": 0.0493, "step": 10055 }, { "epoch": 19.141226818830244, "grad_norm": 0.06717967242002487, "learning_rate": 3.616386154334709e-05, "loss": 0.0499, "step": 10056 }, { "epoch": 19.143128863528293, "grad_norm": 0.06115768849849701, "learning_rate": 3.6157510320736745e-05, "loss": 0.0614, "step": 10057 }, { "epoch": 19.145030908226342, "grad_norm": 0.0605919249355793, "learning_rate": 3.615115909812639e-05, "loss": 0.0357, "step": 10058 }, { "epoch": 19.146932952924395, "grad_norm": 0.1991409808397293, "learning_rate": 3.614480787551604e-05, "loss": 0.0538, "step": 10059 }, { "epoch": 19.148834997622444, "grad_norm": 0.19580891728401184, "learning_rate": 3.613845665290568e-05, "loss": 0.0652, "step": 10060 }, { "epoch": 19.150737042320493, "grad_norm": 0.13768815994262695, "learning_rate": 3.613210543029533e-05, "loss": 0.057, "step": 10061 }, { "epoch": 19.152639087018546, "grad_norm": 0.347734272480011, "learning_rate": 3.6125754207684984e-05, "loss": 0.0477, "step": 10062 }, { "epoch": 19.154541131716595, "grad_norm": 0.13527154922485352, "learning_rate": 3.611940298507463e-05, "loss": 0.052, "step": 10063 }, { "epoch": 19.156443176414644, "grad_norm": 0.2703625559806824, "learning_rate": 3.611305176246428e-05, "loss": 0.063, "step": 10064 }, { "epoch": 19.158345221112697, "grad_norm": 0.09101396054029465, "learning_rate": 3.610670053985392e-05, "loss": 0.0661, "step": 10065 }, { "epoch": 19.160247265810746, "grad_norm": 0.15997478365898132, "learning_rate": 3.610034931724357e-05, "loss": 0.0499, "step": 10066 }, { "epoch": 19.1621493105088, "grad_norm": 0.06663242727518082, "learning_rate": 3.6093998094633216e-05, "loss": 0.06, "step": 10067 }, { "epoch": 19.164051355206848, "grad_norm": 0.11330113559961319, "learning_rate": 3.608764687202287e-05, "loss": 0.0418, "step": 10068 }, { "epoch": 19.165953399904897, "grad_norm": 0.10328996926546097, "learning_rate": 3.608129564941251e-05, "loss": 0.0581, "step": 10069 }, { "epoch": 19.16785544460295, "grad_norm": 0.043847475200891495, "learning_rate": 3.607494442680216e-05, "loss": 0.0574, "step": 10070 }, { "epoch": 19.169757489301, "grad_norm": 0.049880996346473694, "learning_rate": 3.606859320419181e-05, "loss": 0.0479, "step": 10071 }, { "epoch": 19.171659533999048, "grad_norm": 0.2610563039779663, "learning_rate": 3.6062241981581455e-05, "loss": 0.063, "step": 10072 }, { "epoch": 19.1735615786971, "grad_norm": 0.08486849814653397, "learning_rate": 3.605589075897111e-05, "loss": 0.0641, "step": 10073 }, { "epoch": 19.17546362339515, "grad_norm": 0.0873064175248146, "learning_rate": 3.604953953636075e-05, "loss": 0.0556, "step": 10074 }, { "epoch": 19.1773656680932, "grad_norm": 0.0841386616230011, "learning_rate": 3.60431883137504e-05, "loss": 0.0572, "step": 10075 }, { "epoch": 19.17926771279125, "grad_norm": 0.1779516637325287, "learning_rate": 3.603683709114005e-05, "loss": 0.0511, "step": 10076 }, { "epoch": 19.1811697574893, "grad_norm": 0.09647682309150696, "learning_rate": 3.6030485868529694e-05, "loss": 0.0716, "step": 10077 }, { "epoch": 19.183071802187353, "grad_norm": 0.08160854876041412, "learning_rate": 3.602413464591934e-05, "loss": 0.0598, "step": 10078 }, { "epoch": 19.184973846885402, "grad_norm": 0.06882409751415253, "learning_rate": 3.6017783423308984e-05, "loss": 0.0517, "step": 10079 }, { "epoch": 19.18687589158345, "grad_norm": 0.08491438627243042, "learning_rate": 3.6011432200698636e-05, "loss": 0.0561, "step": 10080 }, { "epoch": 19.188777936281504, "grad_norm": 0.343395859003067, "learning_rate": 3.600508097808829e-05, "loss": 0.0768, "step": 10081 }, { "epoch": 19.190679980979553, "grad_norm": 0.1169079914689064, "learning_rate": 3.599872975547793e-05, "loss": 0.0423, "step": 10082 }, { "epoch": 19.192582025677602, "grad_norm": 0.24359755218029022, "learning_rate": 3.599237853286758e-05, "loss": 0.0738, "step": 10083 }, { "epoch": 19.194484070375655, "grad_norm": 0.06958276033401489, "learning_rate": 3.598602731025722e-05, "loss": 0.047, "step": 10084 }, { "epoch": 19.196386115073704, "grad_norm": 0.07158949971199036, "learning_rate": 3.5979676087646875e-05, "loss": 0.027, "step": 10085 }, { "epoch": 19.198288159771753, "grad_norm": 0.14709332585334778, "learning_rate": 3.597332486503652e-05, "loss": 0.0518, "step": 10086 }, { "epoch": 19.200190204469806, "grad_norm": 0.09249371290206909, "learning_rate": 3.5966973642426165e-05, "loss": 0.0563, "step": 10087 }, { "epoch": 19.202092249167855, "grad_norm": 0.059265051037073135, "learning_rate": 3.5960622419815817e-05, "loss": 0.0517, "step": 10088 }, { "epoch": 19.203994293865907, "grad_norm": 0.1328250616788864, "learning_rate": 3.595427119720546e-05, "loss": 0.067, "step": 10089 }, { "epoch": 19.205896338563957, "grad_norm": 0.04016127064824104, "learning_rate": 3.5947919974595113e-05, "loss": 0.0398, "step": 10090 }, { "epoch": 19.207798383262006, "grad_norm": 0.12123529613018036, "learning_rate": 3.594156875198476e-05, "loss": 0.0425, "step": 10091 }, { "epoch": 19.20970042796006, "grad_norm": 0.12374745309352875, "learning_rate": 3.5935217529374404e-05, "loss": 0.0597, "step": 10092 }, { "epoch": 19.211602472658107, "grad_norm": 0.13038672506809235, "learning_rate": 3.5928866306764055e-05, "loss": 0.06, "step": 10093 }, { "epoch": 19.213504517356156, "grad_norm": 0.10279489308595657, "learning_rate": 3.59225150841537e-05, "loss": 0.0563, "step": 10094 }, { "epoch": 19.21540656205421, "grad_norm": 0.12781091034412384, "learning_rate": 3.591616386154335e-05, "loss": 0.0603, "step": 10095 }, { "epoch": 19.21730860675226, "grad_norm": 0.09477554261684418, "learning_rate": 3.590981263893299e-05, "loss": 0.0782, "step": 10096 }, { "epoch": 19.219210651450307, "grad_norm": 0.07034879177808762, "learning_rate": 3.590346141632264e-05, "loss": 0.0505, "step": 10097 }, { "epoch": 19.22111269614836, "grad_norm": 0.06071623042225838, "learning_rate": 3.5897110193712294e-05, "loss": 0.045, "step": 10098 }, { "epoch": 19.22301474084641, "grad_norm": 0.039386387914419174, "learning_rate": 3.589075897110194e-05, "loss": 0.0487, "step": 10099 }, { "epoch": 19.224916785544462, "grad_norm": 0.1408051699399948, "learning_rate": 3.588440774849159e-05, "loss": 0.0458, "step": 10100 }, { "epoch": 19.22681883024251, "grad_norm": 0.10527674853801727, "learning_rate": 3.587805652588123e-05, "loss": 0.084, "step": 10101 }, { "epoch": 19.22872087494056, "grad_norm": 0.2394716739654541, "learning_rate": 3.587170530327088e-05, "loss": 0.0507, "step": 10102 }, { "epoch": 19.230622919638613, "grad_norm": 0.13282881677150726, "learning_rate": 3.5865354080660526e-05, "loss": 0.0416, "step": 10103 }, { "epoch": 19.232524964336662, "grad_norm": 0.13951736688613892, "learning_rate": 3.585900285805018e-05, "loss": 0.0668, "step": 10104 }, { "epoch": 19.23442700903471, "grad_norm": 0.09385409951210022, "learning_rate": 3.585265163543982e-05, "loss": 0.0596, "step": 10105 }, { "epoch": 19.236329053732764, "grad_norm": 0.11713048070669174, "learning_rate": 3.584630041282947e-05, "loss": 0.0675, "step": 10106 }, { "epoch": 19.238231098430813, "grad_norm": 0.14428676664829254, "learning_rate": 3.583994919021912e-05, "loss": 0.0602, "step": 10107 }, { "epoch": 19.240133143128862, "grad_norm": 0.04688987135887146, "learning_rate": 3.5833597967608765e-05, "loss": 0.0541, "step": 10108 }, { "epoch": 19.242035187826914, "grad_norm": 0.20087163150310516, "learning_rate": 3.582724674499842e-05, "loss": 0.0473, "step": 10109 }, { "epoch": 19.243937232524964, "grad_norm": 0.165956050157547, "learning_rate": 3.582089552238806e-05, "loss": 0.0624, "step": 10110 }, { "epoch": 19.245839277223016, "grad_norm": 0.16869060695171356, "learning_rate": 3.581454429977771e-05, "loss": 0.0597, "step": 10111 }, { "epoch": 19.247741321921065, "grad_norm": 0.10722652822732925, "learning_rate": 3.580819307716736e-05, "loss": 0.0722, "step": 10112 }, { "epoch": 19.249643366619114, "grad_norm": 0.07774709910154343, "learning_rate": 3.5801841854557004e-05, "loss": 0.0338, "step": 10113 }, { "epoch": 19.251545411317167, "grad_norm": 0.13211393356323242, "learning_rate": 3.579549063194665e-05, "loss": 0.0511, "step": 10114 }, { "epoch": 19.253447456015216, "grad_norm": 0.1211496964097023, "learning_rate": 3.5789139409336294e-05, "loss": 0.0486, "step": 10115 }, { "epoch": 19.255349500713265, "grad_norm": 0.10808859020471573, "learning_rate": 3.5782788186725946e-05, "loss": 0.0367, "step": 10116 }, { "epoch": 19.257251545411318, "grad_norm": 0.10460115969181061, "learning_rate": 3.57764369641156e-05, "loss": 0.0577, "step": 10117 }, { "epoch": 19.259153590109367, "grad_norm": 0.0897640734910965, "learning_rate": 3.577008574150524e-05, "loss": 0.0628, "step": 10118 }, { "epoch": 19.261055634807416, "grad_norm": 0.12556083500385284, "learning_rate": 3.576373451889489e-05, "loss": 0.0636, "step": 10119 }, { "epoch": 19.26295767950547, "grad_norm": 0.13245490193367004, "learning_rate": 3.575738329628453e-05, "loss": 0.0432, "step": 10120 }, { "epoch": 19.264859724203518, "grad_norm": 0.15191958844661713, "learning_rate": 3.5751032073674185e-05, "loss": 0.0657, "step": 10121 }, { "epoch": 19.26676176890157, "grad_norm": 0.04129410162568092, "learning_rate": 3.574468085106383e-05, "loss": 0.0443, "step": 10122 }, { "epoch": 19.26866381359962, "grad_norm": 0.3152913451194763, "learning_rate": 3.5738329628453475e-05, "loss": 0.0739, "step": 10123 }, { "epoch": 19.27056585829767, "grad_norm": 0.16405954957008362, "learning_rate": 3.573197840584313e-05, "loss": 0.0365, "step": 10124 }, { "epoch": 19.27246790299572, "grad_norm": 0.1917743682861328, "learning_rate": 3.572562718323277e-05, "loss": 0.0369, "step": 10125 }, { "epoch": 19.27436994769377, "grad_norm": 0.09143206477165222, "learning_rate": 3.5719275960622424e-05, "loss": 0.055, "step": 10126 }, { "epoch": 19.27627199239182, "grad_norm": 0.13609574735164642, "learning_rate": 3.571292473801207e-05, "loss": 0.0459, "step": 10127 }, { "epoch": 19.278174037089872, "grad_norm": 0.40472212433815, "learning_rate": 3.5706573515401714e-05, "loss": 0.0825, "step": 10128 }, { "epoch": 19.28007608178792, "grad_norm": 0.1614009439945221, "learning_rate": 3.5700222292791366e-05, "loss": 0.059, "step": 10129 }, { "epoch": 19.281978126485974, "grad_norm": 0.06693167984485626, "learning_rate": 3.569387107018101e-05, "loss": 0.0489, "step": 10130 }, { "epoch": 19.283880171184023, "grad_norm": 0.10688638687133789, "learning_rate": 3.568751984757066e-05, "loss": 0.0567, "step": 10131 }, { "epoch": 19.285782215882072, "grad_norm": 0.4077149033546448, "learning_rate": 3.56811686249603e-05, "loss": 0.0406, "step": 10132 }, { "epoch": 19.287684260580125, "grad_norm": 0.1552218645811081, "learning_rate": 3.567481740234995e-05, "loss": 0.0546, "step": 10133 }, { "epoch": 19.289586305278174, "grad_norm": 0.1504446119070053, "learning_rate": 3.56684661797396e-05, "loss": 0.0776, "step": 10134 }, { "epoch": 19.291488349976223, "grad_norm": 0.10397607088088989, "learning_rate": 3.566211495712925e-05, "loss": 0.0492, "step": 10135 }, { "epoch": 19.293390394674276, "grad_norm": 0.10803500562906265, "learning_rate": 3.56557637345189e-05, "loss": 0.0598, "step": 10136 }, { "epoch": 19.295292439372325, "grad_norm": 0.13279162347316742, "learning_rate": 3.564941251190854e-05, "loss": 0.0542, "step": 10137 }, { "epoch": 19.297194484070374, "grad_norm": 0.11373448371887207, "learning_rate": 3.564306128929819e-05, "loss": 0.0504, "step": 10138 }, { "epoch": 19.299096528768427, "grad_norm": 0.10581234842538834, "learning_rate": 3.563671006668784e-05, "loss": 0.0709, "step": 10139 }, { "epoch": 19.300998573466476, "grad_norm": 0.07865399122238159, "learning_rate": 3.563035884407749e-05, "loss": 0.0638, "step": 10140 }, { "epoch": 19.30290061816453, "grad_norm": 0.050489168614149094, "learning_rate": 3.5624007621467134e-05, "loss": 0.0779, "step": 10141 }, { "epoch": 19.304802662862578, "grad_norm": 0.10635179281234741, "learning_rate": 3.561765639885678e-05, "loss": 0.0646, "step": 10142 }, { "epoch": 19.306704707560627, "grad_norm": 0.12745051085948944, "learning_rate": 3.561130517624643e-05, "loss": 0.0537, "step": 10143 }, { "epoch": 19.30860675225868, "grad_norm": 0.07300863415002823, "learning_rate": 3.5604953953636076e-05, "loss": 0.0554, "step": 10144 }, { "epoch": 19.31050879695673, "grad_norm": 0.1162499487400055, "learning_rate": 3.559860273102573e-05, "loss": 0.0518, "step": 10145 }, { "epoch": 19.312410841654778, "grad_norm": 0.11278656125068665, "learning_rate": 3.559225150841537e-05, "loss": 0.0566, "step": 10146 }, { "epoch": 19.31431288635283, "grad_norm": 0.21737448871135712, "learning_rate": 3.558590028580502e-05, "loss": 0.0715, "step": 10147 }, { "epoch": 19.31621493105088, "grad_norm": 0.08031684905290604, "learning_rate": 3.557954906319467e-05, "loss": 0.0521, "step": 10148 }, { "epoch": 19.31811697574893, "grad_norm": 0.046094682067632675, "learning_rate": 3.5573197840584315e-05, "loss": 0.0427, "step": 10149 }, { "epoch": 19.32001902044698, "grad_norm": 0.10708703100681305, "learning_rate": 3.556684661797396e-05, "loss": 0.0519, "step": 10150 }, { "epoch": 19.32192106514503, "grad_norm": 0.34062817692756653, "learning_rate": 3.5560495395363605e-05, "loss": 0.07, "step": 10151 }, { "epoch": 19.323823109843083, "grad_norm": 0.05861439183354378, "learning_rate": 3.555414417275326e-05, "loss": 0.0484, "step": 10152 }, { "epoch": 19.325725154541132, "grad_norm": 0.08207020908594131, "learning_rate": 3.554779295014291e-05, "loss": 0.0504, "step": 10153 }, { "epoch": 19.32762719923918, "grad_norm": 0.10471838712692261, "learning_rate": 3.5541441727532554e-05, "loss": 0.0443, "step": 10154 }, { "epoch": 19.329529243937234, "grad_norm": 0.15948818624019623, "learning_rate": 3.55350905049222e-05, "loss": 0.0551, "step": 10155 }, { "epoch": 19.331431288635283, "grad_norm": 0.06961742043495178, "learning_rate": 3.5528739282311844e-05, "loss": 0.0376, "step": 10156 }, { "epoch": 19.333333333333332, "grad_norm": 0.04763415455818176, "learning_rate": 3.5522388059701495e-05, "loss": 0.0497, "step": 10157 }, { "epoch": 19.335235378031385, "grad_norm": 0.07544784247875214, "learning_rate": 3.551603683709114e-05, "loss": 0.0484, "step": 10158 }, { "epoch": 19.337137422729434, "grad_norm": 0.05961260944604874, "learning_rate": 3.5509685614480786e-05, "loss": 0.0528, "step": 10159 }, { "epoch": 19.339039467427483, "grad_norm": 0.03512318432331085, "learning_rate": 3.550333439187044e-05, "loss": 0.0452, "step": 10160 }, { "epoch": 19.340941512125536, "grad_norm": 0.12750087678432465, "learning_rate": 3.549698316926008e-05, "loss": 0.0486, "step": 10161 }, { "epoch": 19.342843556823585, "grad_norm": 0.1510218381881714, "learning_rate": 3.5490631946649734e-05, "loss": 0.0548, "step": 10162 }, { "epoch": 19.344745601521637, "grad_norm": 0.03664156794548035, "learning_rate": 3.548428072403938e-05, "loss": 0.0506, "step": 10163 }, { "epoch": 19.346647646219687, "grad_norm": 0.30709025263786316, "learning_rate": 3.5477929501429025e-05, "loss": 0.0758, "step": 10164 }, { "epoch": 19.348549690917736, "grad_norm": 0.1910550892353058, "learning_rate": 3.5471578278818676e-05, "loss": 0.0845, "step": 10165 }, { "epoch": 19.35045173561579, "grad_norm": 0.3427481949329376, "learning_rate": 3.546522705620832e-05, "loss": 0.0534, "step": 10166 }, { "epoch": 19.352353780313837, "grad_norm": 0.13792464137077332, "learning_rate": 3.545887583359797e-05, "loss": 0.0554, "step": 10167 }, { "epoch": 19.354255825011887, "grad_norm": 0.09045601636171341, "learning_rate": 3.545252461098761e-05, "loss": 0.0398, "step": 10168 }, { "epoch": 19.35615786970994, "grad_norm": 0.06533253937959671, "learning_rate": 3.544617338837726e-05, "loss": 0.0315, "step": 10169 }, { "epoch": 19.35805991440799, "grad_norm": 0.10264572501182556, "learning_rate": 3.543982216576691e-05, "loss": 0.0408, "step": 10170 }, { "epoch": 19.359961959106037, "grad_norm": 0.04178214818239212, "learning_rate": 3.543347094315656e-05, "loss": 0.0575, "step": 10171 }, { "epoch": 19.36186400380409, "grad_norm": 0.05626368522644043, "learning_rate": 3.542711972054621e-05, "loss": 0.0427, "step": 10172 }, { "epoch": 19.36376604850214, "grad_norm": 0.13891632854938507, "learning_rate": 3.542076849793585e-05, "loss": 0.0465, "step": 10173 }, { "epoch": 19.365668093200192, "grad_norm": 0.09107466787099838, "learning_rate": 3.54144172753255e-05, "loss": 0.0623, "step": 10174 }, { "epoch": 19.36757013789824, "grad_norm": 0.09597960114479065, "learning_rate": 3.540806605271515e-05, "loss": 0.0485, "step": 10175 }, { "epoch": 19.36947218259629, "grad_norm": 0.07946845144033432, "learning_rate": 3.54017148301048e-05, "loss": 0.0541, "step": 10176 }, { "epoch": 19.371374227294343, "grad_norm": 0.16330133378505707, "learning_rate": 3.5395363607494444e-05, "loss": 0.063, "step": 10177 }, { "epoch": 19.373276271992392, "grad_norm": 0.060695718973875046, "learning_rate": 3.538901238488409e-05, "loss": 0.0661, "step": 10178 }, { "epoch": 19.37517831669044, "grad_norm": 0.08531861007213593, "learning_rate": 3.538266116227374e-05, "loss": 0.0499, "step": 10179 }, { "epoch": 19.377080361388494, "grad_norm": 0.06099654361605644, "learning_rate": 3.5376309939663386e-05, "loss": 0.0394, "step": 10180 }, { "epoch": 19.378982406086543, "grad_norm": 0.05763452872633934, "learning_rate": 3.536995871705304e-05, "loss": 0.056, "step": 10181 }, { "epoch": 19.380884450784592, "grad_norm": 0.16771380603313446, "learning_rate": 3.536360749444268e-05, "loss": 0.0545, "step": 10182 }, { "epoch": 19.382786495482645, "grad_norm": 0.1479615420103073, "learning_rate": 3.535725627183233e-05, "loss": 0.0461, "step": 10183 }, { "epoch": 19.384688540180694, "grad_norm": 0.10853920876979828, "learning_rate": 3.535090504922198e-05, "loss": 0.0497, "step": 10184 }, { "epoch": 19.386590584878746, "grad_norm": 0.11250828206539154, "learning_rate": 3.5344553826611625e-05, "loss": 0.0582, "step": 10185 }, { "epoch": 19.388492629576795, "grad_norm": 0.08319451659917831, "learning_rate": 3.533820260400127e-05, "loss": 0.0612, "step": 10186 }, { "epoch": 19.390394674274845, "grad_norm": 0.09340374171733856, "learning_rate": 3.5331851381390915e-05, "loss": 0.046, "step": 10187 }, { "epoch": 19.392296718972897, "grad_norm": 0.12733030319213867, "learning_rate": 3.532550015878057e-05, "loss": 0.0457, "step": 10188 }, { "epoch": 19.394198763670946, "grad_norm": 0.10150488466024399, "learning_rate": 3.531914893617021e-05, "loss": 0.046, "step": 10189 }, { "epoch": 19.396100808368995, "grad_norm": 0.05435825511813164, "learning_rate": 3.5312797713559864e-05, "loss": 0.0344, "step": 10190 }, { "epoch": 19.398002853067048, "grad_norm": 0.06481046974658966, "learning_rate": 3.530644649094951e-05, "loss": 0.0543, "step": 10191 }, { "epoch": 19.399904897765097, "grad_norm": 0.1499750167131424, "learning_rate": 3.5300095268339154e-05, "loss": 0.0507, "step": 10192 }, { "epoch": 19.401806942463146, "grad_norm": 0.05150365084409714, "learning_rate": 3.5293744045728806e-05, "loss": 0.0386, "step": 10193 }, { "epoch": 19.4037089871612, "grad_norm": 0.03388383612036705, "learning_rate": 3.528739282311845e-05, "loss": 0.0366, "step": 10194 }, { "epoch": 19.405611031859248, "grad_norm": 0.19244486093521118, "learning_rate": 3.5281041600508096e-05, "loss": 0.0536, "step": 10195 }, { "epoch": 19.4075130765573, "grad_norm": 0.12983280420303345, "learning_rate": 3.527469037789775e-05, "loss": 0.037, "step": 10196 }, { "epoch": 19.40941512125535, "grad_norm": 0.08089195936918259, "learning_rate": 3.526833915528739e-05, "loss": 0.0746, "step": 10197 }, { "epoch": 19.4113171659534, "grad_norm": 0.08440740406513214, "learning_rate": 3.5261987932677045e-05, "loss": 0.0564, "step": 10198 }, { "epoch": 19.41321921065145, "grad_norm": 0.24003563821315765, "learning_rate": 3.525563671006669e-05, "loss": 0.0702, "step": 10199 }, { "epoch": 19.4151212553495, "grad_norm": 0.2483237385749817, "learning_rate": 3.5249285487456335e-05, "loss": 0.0716, "step": 10200 }, { "epoch": 19.41702330004755, "grad_norm": 0.1826210469007492, "learning_rate": 3.524293426484599e-05, "loss": 0.0598, "step": 10201 }, { "epoch": 19.418925344745603, "grad_norm": 0.09709428995847702, "learning_rate": 3.523658304223563e-05, "loss": 0.0531, "step": 10202 }, { "epoch": 19.42082738944365, "grad_norm": 0.18354997038841248, "learning_rate": 3.5230231819625284e-05, "loss": 0.0586, "step": 10203 }, { "epoch": 19.4227294341417, "grad_norm": 0.050882138311862946, "learning_rate": 3.522388059701492e-05, "loss": 0.0466, "step": 10204 }, { "epoch": 19.424631478839753, "grad_norm": 0.1463881880044937, "learning_rate": 3.5217529374404574e-05, "loss": 0.0597, "step": 10205 }, { "epoch": 19.426533523537802, "grad_norm": 0.055197589099407196, "learning_rate": 3.521117815179422e-05, "loss": 0.0411, "step": 10206 }, { "epoch": 19.428435568235855, "grad_norm": 0.13385817408561707, "learning_rate": 3.520482692918387e-05, "loss": 0.0552, "step": 10207 }, { "epoch": 19.430337612933904, "grad_norm": 0.1626228392124176, "learning_rate": 3.519847570657352e-05, "loss": 0.0351, "step": 10208 }, { "epoch": 19.432239657631953, "grad_norm": 0.11927636712789536, "learning_rate": 3.519212448396316e-05, "loss": 0.0591, "step": 10209 }, { "epoch": 19.434141702330006, "grad_norm": 0.07239444553852081, "learning_rate": 3.518577326135281e-05, "loss": 0.0627, "step": 10210 }, { "epoch": 19.436043747028055, "grad_norm": 0.07885508984327316, "learning_rate": 3.517942203874246e-05, "loss": 0.0502, "step": 10211 }, { "epoch": 19.437945791726104, "grad_norm": 0.04506470263004303, "learning_rate": 3.517307081613211e-05, "loss": 0.039, "step": 10212 }, { "epoch": 19.439847836424157, "grad_norm": 0.1872227042913437, "learning_rate": 3.5166719593521755e-05, "loss": 0.0638, "step": 10213 }, { "epoch": 19.441749881122206, "grad_norm": 0.12122814357280731, "learning_rate": 3.51603683709114e-05, "loss": 0.0661, "step": 10214 }, { "epoch": 19.443651925820255, "grad_norm": 0.06379745900630951, "learning_rate": 3.515401714830105e-05, "loss": 0.0401, "step": 10215 }, { "epoch": 19.445553970518308, "grad_norm": 0.0780811533331871, "learning_rate": 3.51476659256907e-05, "loss": 0.0553, "step": 10216 }, { "epoch": 19.447456015216357, "grad_norm": 0.07205761969089508, "learning_rate": 3.514131470308035e-05, "loss": 0.0662, "step": 10217 }, { "epoch": 19.44935805991441, "grad_norm": 0.14351381361484528, "learning_rate": 3.513496348046999e-05, "loss": 0.0624, "step": 10218 }, { "epoch": 19.45126010461246, "grad_norm": 0.042609211057424545, "learning_rate": 3.512861225785964e-05, "loss": 0.0351, "step": 10219 }, { "epoch": 19.453162149310508, "grad_norm": 0.19034677743911743, "learning_rate": 3.512226103524929e-05, "loss": 0.0662, "step": 10220 }, { "epoch": 19.45506419400856, "grad_norm": 0.16874302923679352, "learning_rate": 3.5115909812638936e-05, "loss": 0.0491, "step": 10221 }, { "epoch": 19.45696623870661, "grad_norm": 0.0731043592095375, "learning_rate": 3.510955859002858e-05, "loss": 0.0526, "step": 10222 }, { "epoch": 19.45886828340466, "grad_norm": 0.05459411069750786, "learning_rate": 3.5103207367418226e-05, "loss": 0.047, "step": 10223 }, { "epoch": 19.46077032810271, "grad_norm": 0.11637942492961884, "learning_rate": 3.509685614480788e-05, "loss": 0.0407, "step": 10224 }, { "epoch": 19.46267237280076, "grad_norm": 0.0811336413025856, "learning_rate": 3.509050492219752e-05, "loss": 0.0625, "step": 10225 }, { "epoch": 19.46457441749881, "grad_norm": 0.21679244935512543, "learning_rate": 3.5084153699587174e-05, "loss": 0.0703, "step": 10226 }, { "epoch": 19.466476462196862, "grad_norm": 0.14192962646484375, "learning_rate": 3.507780247697682e-05, "loss": 0.0709, "step": 10227 }, { "epoch": 19.46837850689491, "grad_norm": 0.10032680630683899, "learning_rate": 3.5071451254366465e-05, "loss": 0.0836, "step": 10228 }, { "epoch": 19.470280551592964, "grad_norm": 0.13514381647109985, "learning_rate": 3.5065100031756116e-05, "loss": 0.0607, "step": 10229 }, { "epoch": 19.472182596291013, "grad_norm": 0.06436719000339508, "learning_rate": 3.505874880914576e-05, "loss": 0.0532, "step": 10230 }, { "epoch": 19.474084640989062, "grad_norm": 0.09055536240339279, "learning_rate": 3.5052397586535407e-05, "loss": 0.0608, "step": 10231 }, { "epoch": 19.475986685687115, "grad_norm": 0.1204552948474884, "learning_rate": 3.504604636392506e-05, "loss": 0.0442, "step": 10232 }, { "epoch": 19.477888730385164, "grad_norm": 0.10365813970565796, "learning_rate": 3.5039695141314703e-05, "loss": 0.0474, "step": 10233 }, { "epoch": 19.479790775083213, "grad_norm": 0.07257441431283951, "learning_rate": 3.5033343918704355e-05, "loss": 0.0489, "step": 10234 }, { "epoch": 19.481692819781266, "grad_norm": 0.05598134547472, "learning_rate": 3.5026992696094e-05, "loss": 0.0486, "step": 10235 }, { "epoch": 19.483594864479315, "grad_norm": 0.07868688553571701, "learning_rate": 3.5020641473483645e-05, "loss": 0.0508, "step": 10236 }, { "epoch": 19.485496909177364, "grad_norm": 0.1124541312456131, "learning_rate": 3.50142902508733e-05, "loss": 0.0712, "step": 10237 }, { "epoch": 19.487398953875417, "grad_norm": 0.10084667801856995, "learning_rate": 3.500793902826294e-05, "loss": 0.0527, "step": 10238 }, { "epoch": 19.489300998573466, "grad_norm": 0.04100062698125839, "learning_rate": 3.5001587805652594e-05, "loss": 0.0527, "step": 10239 }, { "epoch": 19.49120304327152, "grad_norm": 0.04239039495587349, "learning_rate": 3.499523658304223e-05, "loss": 0.0576, "step": 10240 }, { "epoch": 19.493105087969568, "grad_norm": 0.119795061647892, "learning_rate": 3.4988885360431884e-05, "loss": 0.0576, "step": 10241 }, { "epoch": 19.495007132667617, "grad_norm": 0.12805697321891785, "learning_rate": 3.498253413782153e-05, "loss": 0.0574, "step": 10242 }, { "epoch": 19.49690917736567, "grad_norm": 0.1418042778968811, "learning_rate": 3.497618291521118e-05, "loss": 0.0578, "step": 10243 }, { "epoch": 19.49881122206372, "grad_norm": 0.07065989077091217, "learning_rate": 3.4969831692600826e-05, "loss": 0.0448, "step": 10244 }, { "epoch": 19.500713266761768, "grad_norm": 0.09072795510292053, "learning_rate": 3.496348046999047e-05, "loss": 0.0826, "step": 10245 }, { "epoch": 19.50261531145982, "grad_norm": 0.12471240758895874, "learning_rate": 3.495712924738012e-05, "loss": 0.0538, "step": 10246 }, { "epoch": 19.50451735615787, "grad_norm": 0.057024117559194565, "learning_rate": 3.495077802476977e-05, "loss": 0.0456, "step": 10247 }, { "epoch": 19.50641940085592, "grad_norm": 0.14013096690177917, "learning_rate": 3.494442680215942e-05, "loss": 0.061, "step": 10248 }, { "epoch": 19.50832144555397, "grad_norm": 0.18541331589221954, "learning_rate": 3.4938075579549065e-05, "loss": 0.0482, "step": 10249 }, { "epoch": 19.51022349025202, "grad_norm": 0.1770530641078949, "learning_rate": 3.493172435693871e-05, "loss": 0.083, "step": 10250 }, { "epoch": 19.512125534950073, "grad_norm": 0.15656158328056335, "learning_rate": 3.492537313432836e-05, "loss": 0.0582, "step": 10251 }, { "epoch": 19.514027579648122, "grad_norm": 0.12950193881988525, "learning_rate": 3.491902191171801e-05, "loss": 0.0575, "step": 10252 }, { "epoch": 19.51592962434617, "grad_norm": 0.07617585361003876, "learning_rate": 3.491267068910766e-05, "loss": 0.0542, "step": 10253 }, { "epoch": 19.517831669044224, "grad_norm": 0.08753058314323425, "learning_rate": 3.49063194664973e-05, "loss": 0.0687, "step": 10254 }, { "epoch": 19.519733713742273, "grad_norm": 0.1600852906703949, "learning_rate": 3.489996824388695e-05, "loss": 0.0532, "step": 10255 }, { "epoch": 19.521635758440322, "grad_norm": 0.13967394828796387, "learning_rate": 3.48936170212766e-05, "loss": 0.0824, "step": 10256 }, { "epoch": 19.523537803138375, "grad_norm": 0.10907625406980515, "learning_rate": 3.4887265798666246e-05, "loss": 0.048, "step": 10257 }, { "epoch": 19.525439847836424, "grad_norm": 0.06240085884928703, "learning_rate": 3.488091457605589e-05, "loss": 0.0527, "step": 10258 }, { "epoch": 19.527341892534473, "grad_norm": 0.044482551515102386, "learning_rate": 3.4874563353445536e-05, "loss": 0.0568, "step": 10259 }, { "epoch": 19.529243937232525, "grad_norm": 0.13326877355575562, "learning_rate": 3.486821213083519e-05, "loss": 0.0518, "step": 10260 }, { "epoch": 19.531145981930575, "grad_norm": 0.04680095613002777, "learning_rate": 3.486186090822483e-05, "loss": 0.0608, "step": 10261 }, { "epoch": 19.533048026628627, "grad_norm": 0.05832425877451897, "learning_rate": 3.4855509685614485e-05, "loss": 0.0513, "step": 10262 }, { "epoch": 19.534950071326676, "grad_norm": 0.13309139013290405, "learning_rate": 3.484915846300413e-05, "loss": 0.0508, "step": 10263 }, { "epoch": 19.536852116024725, "grad_norm": 0.22880618274211884, "learning_rate": 3.4842807240393775e-05, "loss": 0.0698, "step": 10264 }, { "epoch": 19.538754160722778, "grad_norm": 0.6374916434288025, "learning_rate": 3.483645601778343e-05, "loss": 0.0584, "step": 10265 }, { "epoch": 19.540656205420827, "grad_norm": 0.09376731514930725, "learning_rate": 3.483010479517307e-05, "loss": 0.0923, "step": 10266 }, { "epoch": 19.542558250118876, "grad_norm": 0.29286977648735046, "learning_rate": 3.482375357256272e-05, "loss": 0.0543, "step": 10267 }, { "epoch": 19.54446029481693, "grad_norm": 0.0732014924287796, "learning_rate": 3.481740234995237e-05, "loss": 0.0503, "step": 10268 }, { "epoch": 19.546362339514978, "grad_norm": 0.1219160258769989, "learning_rate": 3.4811051127342014e-05, "loss": 0.0471, "step": 10269 }, { "epoch": 19.548264384213027, "grad_norm": 0.20467039942741394, "learning_rate": 3.4804699904731666e-05, "loss": 0.0596, "step": 10270 }, { "epoch": 19.55016642891108, "grad_norm": 0.1130104586482048, "learning_rate": 3.479834868212131e-05, "loss": 0.0886, "step": 10271 }, { "epoch": 19.55206847360913, "grad_norm": 0.18084308505058289, "learning_rate": 3.4791997459510956e-05, "loss": 0.0766, "step": 10272 }, { "epoch": 19.55397051830718, "grad_norm": 0.14531627297401428, "learning_rate": 3.47856462369006e-05, "loss": 0.047, "step": 10273 }, { "epoch": 19.55587256300523, "grad_norm": 0.058127906173467636, "learning_rate": 3.477929501429025e-05, "loss": 0.0317, "step": 10274 }, { "epoch": 19.55777460770328, "grad_norm": 0.15582560002803802, "learning_rate": 3.4772943791679905e-05, "loss": 0.0742, "step": 10275 }, { "epoch": 19.559676652401333, "grad_norm": 0.3124534785747528, "learning_rate": 3.476659256906954e-05, "loss": 0.0913, "step": 10276 }, { "epoch": 19.56157869709938, "grad_norm": 0.09679864346981049, "learning_rate": 3.4760241346459195e-05, "loss": 0.0406, "step": 10277 }, { "epoch": 19.56348074179743, "grad_norm": 0.04385126382112503, "learning_rate": 3.475389012384884e-05, "loss": 0.052, "step": 10278 }, { "epoch": 19.565382786495483, "grad_norm": 0.09517911076545715, "learning_rate": 3.474753890123849e-05, "loss": 0.0772, "step": 10279 }, { "epoch": 19.567284831193533, "grad_norm": 0.11840872466564178, "learning_rate": 3.474118767862814e-05, "loss": 0.066, "step": 10280 }, { "epoch": 19.56918687589158, "grad_norm": 0.11991573125123978, "learning_rate": 3.473483645601778e-05, "loss": 0.0498, "step": 10281 }, { "epoch": 19.571088920589634, "grad_norm": 0.15331275761127472, "learning_rate": 3.4728485233407434e-05, "loss": 0.0668, "step": 10282 }, { "epoch": 19.572990965287683, "grad_norm": 0.2799055278301239, "learning_rate": 3.472213401079708e-05, "loss": 0.0768, "step": 10283 }, { "epoch": 19.574893009985736, "grad_norm": 0.1397206336259842, "learning_rate": 3.471578278818673e-05, "loss": 0.0539, "step": 10284 }, { "epoch": 19.576795054683785, "grad_norm": 0.18653438985347748, "learning_rate": 3.4709431565576376e-05, "loss": 0.0501, "step": 10285 }, { "epoch": 19.578697099381834, "grad_norm": 0.07166846096515656, "learning_rate": 3.470308034296602e-05, "loss": 0.0701, "step": 10286 }, { "epoch": 19.580599144079887, "grad_norm": 0.1638810932636261, "learning_rate": 3.469672912035567e-05, "loss": 0.0905, "step": 10287 }, { "epoch": 19.582501188777936, "grad_norm": 0.14252188801765442, "learning_rate": 3.469037789774532e-05, "loss": 0.0791, "step": 10288 }, { "epoch": 19.584403233475985, "grad_norm": 0.08229123800992966, "learning_rate": 3.468402667513497e-05, "loss": 0.0411, "step": 10289 }, { "epoch": 19.586305278174038, "grad_norm": 0.1315241903066635, "learning_rate": 3.467767545252461e-05, "loss": 0.0674, "step": 10290 }, { "epoch": 19.588207322872087, "grad_norm": 0.10541167110204697, "learning_rate": 3.467132422991426e-05, "loss": 0.0595, "step": 10291 }, { "epoch": 19.590109367570136, "grad_norm": 0.14340007305145264, "learning_rate": 3.466497300730391e-05, "loss": 0.0426, "step": 10292 }, { "epoch": 19.59201141226819, "grad_norm": 0.15541459619998932, "learning_rate": 3.4658621784693556e-05, "loss": 0.0588, "step": 10293 }, { "epoch": 19.593913456966238, "grad_norm": 0.3654743432998657, "learning_rate": 3.46522705620832e-05, "loss": 0.0756, "step": 10294 }, { "epoch": 19.59581550166429, "grad_norm": 0.10362816601991653, "learning_rate": 3.4645919339472847e-05, "loss": 0.0482, "step": 10295 }, { "epoch": 19.59771754636234, "grad_norm": 0.11532525718212128, "learning_rate": 3.46395681168625e-05, "loss": 0.053, "step": 10296 }, { "epoch": 19.59961959106039, "grad_norm": 0.20203377306461334, "learning_rate": 3.4633216894252143e-05, "loss": 0.0738, "step": 10297 }, { "epoch": 19.60152163575844, "grad_norm": 0.12903465330600739, "learning_rate": 3.4626865671641795e-05, "loss": 0.0512, "step": 10298 }, { "epoch": 19.60342368045649, "grad_norm": 0.08098254352807999, "learning_rate": 3.462051444903144e-05, "loss": 0.0439, "step": 10299 }, { "epoch": 19.60532572515454, "grad_norm": 0.0791269913315773, "learning_rate": 3.4614163226421085e-05, "loss": 0.0686, "step": 10300 }, { "epoch": 19.607227769852592, "grad_norm": 0.054660454392433167, "learning_rate": 3.460781200381074e-05, "loss": 0.0547, "step": 10301 }, { "epoch": 19.60912981455064, "grad_norm": 0.11273625493049622, "learning_rate": 3.460146078120038e-05, "loss": 0.0753, "step": 10302 }, { "epoch": 19.61103185924869, "grad_norm": 0.07658755034208298, "learning_rate": 3.459510955859003e-05, "loss": 0.0371, "step": 10303 }, { "epoch": 19.612933903946743, "grad_norm": 0.10010501742362976, "learning_rate": 3.458875833597968e-05, "loss": 0.0506, "step": 10304 }, { "epoch": 19.614835948644792, "grad_norm": 0.22586584091186523, "learning_rate": 3.4582407113369324e-05, "loss": 0.0585, "step": 10305 }, { "epoch": 19.616737993342845, "grad_norm": 0.06970422714948654, "learning_rate": 3.4576055890758976e-05, "loss": 0.0444, "step": 10306 }, { "epoch": 19.618640038040894, "grad_norm": 0.06534264981746674, "learning_rate": 3.456970466814862e-05, "loss": 0.031, "step": 10307 }, { "epoch": 19.620542082738943, "grad_norm": 0.29313337802886963, "learning_rate": 3.4563353445538266e-05, "loss": 0.0867, "step": 10308 }, { "epoch": 19.622444127436996, "grad_norm": 0.06673599034547806, "learning_rate": 3.455700222292791e-05, "loss": 0.0523, "step": 10309 }, { "epoch": 19.624346172135045, "grad_norm": 0.13317318260669708, "learning_rate": 3.455065100031756e-05, "loss": 0.0498, "step": 10310 }, { "epoch": 19.626248216833094, "grad_norm": 0.0629139170050621, "learning_rate": 3.4544299777707215e-05, "loss": 0.0643, "step": 10311 }, { "epoch": 19.628150261531147, "grad_norm": 0.1272716075181961, "learning_rate": 3.453794855509685e-05, "loss": 0.067, "step": 10312 }, { "epoch": 19.630052306229196, "grad_norm": 0.22649399936199188, "learning_rate": 3.4531597332486505e-05, "loss": 0.0505, "step": 10313 }, { "epoch": 19.63195435092725, "grad_norm": 0.09296654909849167, "learning_rate": 3.452524610987615e-05, "loss": 0.045, "step": 10314 }, { "epoch": 19.633856395625298, "grad_norm": 0.05611781030893326, "learning_rate": 3.45188948872658e-05, "loss": 0.0537, "step": 10315 }, { "epoch": 19.635758440323347, "grad_norm": 0.0745164155960083, "learning_rate": 3.451254366465545e-05, "loss": 0.0589, "step": 10316 }, { "epoch": 19.6376604850214, "grad_norm": 0.10677458345890045, "learning_rate": 3.450619244204509e-05, "loss": 0.0498, "step": 10317 }, { "epoch": 19.63956252971945, "grad_norm": 0.10812623053789139, "learning_rate": 3.4499841219434744e-05, "loss": 0.0477, "step": 10318 }, { "epoch": 19.641464574417498, "grad_norm": 0.13432307541370392, "learning_rate": 3.449348999682439e-05, "loss": 0.0711, "step": 10319 }, { "epoch": 19.64336661911555, "grad_norm": 0.143142431974411, "learning_rate": 3.448713877421404e-05, "loss": 0.0575, "step": 10320 }, { "epoch": 19.6452686638136, "grad_norm": 0.1534140706062317, "learning_rate": 3.4480787551603686e-05, "loss": 0.0714, "step": 10321 }, { "epoch": 19.64717070851165, "grad_norm": 0.08591202646493912, "learning_rate": 3.447443632899333e-05, "loss": 0.0554, "step": 10322 }, { "epoch": 19.6490727532097, "grad_norm": 0.19998444616794586, "learning_rate": 3.446808510638298e-05, "loss": 0.0403, "step": 10323 }, { "epoch": 19.65097479790775, "grad_norm": 0.14870277047157288, "learning_rate": 3.446173388377263e-05, "loss": 0.061, "step": 10324 }, { "epoch": 19.652876842605803, "grad_norm": 0.2212769240140915, "learning_rate": 3.445538266116228e-05, "loss": 0.0625, "step": 10325 }, { "epoch": 19.654778887303852, "grad_norm": 0.188511461019516, "learning_rate": 3.444903143855192e-05, "loss": 0.0668, "step": 10326 }, { "epoch": 19.6566809320019, "grad_norm": 0.13568802177906036, "learning_rate": 3.444268021594157e-05, "loss": 0.0565, "step": 10327 }, { "epoch": 19.658582976699954, "grad_norm": 0.141657292842865, "learning_rate": 3.4436328993331215e-05, "loss": 0.0692, "step": 10328 }, { "epoch": 19.660485021398003, "grad_norm": 0.0863770842552185, "learning_rate": 3.442997777072087e-05, "loss": 0.0594, "step": 10329 }, { "epoch": 19.662387066096052, "grad_norm": 0.13790996372699738, "learning_rate": 3.442362654811051e-05, "loss": 0.0615, "step": 10330 }, { "epoch": 19.664289110794105, "grad_norm": 0.14185914397239685, "learning_rate": 3.441727532550016e-05, "loss": 0.0661, "step": 10331 }, { "epoch": 19.666191155492154, "grad_norm": 0.18941640853881836, "learning_rate": 3.441092410288981e-05, "loss": 0.0657, "step": 10332 }, { "epoch": 19.668093200190203, "grad_norm": 0.13069617748260498, "learning_rate": 3.4404572880279454e-05, "loss": 0.0482, "step": 10333 }, { "epoch": 19.669995244888256, "grad_norm": 0.12583699822425842, "learning_rate": 3.4398221657669106e-05, "loss": 0.0487, "step": 10334 }, { "epoch": 19.671897289586305, "grad_norm": 0.10469093173742294, "learning_rate": 3.439187043505875e-05, "loss": 0.0415, "step": 10335 }, { "epoch": 19.673799334284357, "grad_norm": 0.05414561554789543, "learning_rate": 3.4385519212448396e-05, "loss": 0.0554, "step": 10336 }, { "epoch": 19.675701378982406, "grad_norm": 0.22049076855182648, "learning_rate": 3.437916798983805e-05, "loss": 0.0663, "step": 10337 }, { "epoch": 19.677603423680456, "grad_norm": 0.10063643753528595, "learning_rate": 3.437281676722769e-05, "loss": 0.0678, "step": 10338 }, { "epoch": 19.67950546837851, "grad_norm": 0.05266137793660164, "learning_rate": 3.436646554461734e-05, "loss": 0.0629, "step": 10339 }, { "epoch": 19.681407513076557, "grad_norm": 0.1943947821855545, "learning_rate": 3.436011432200699e-05, "loss": 0.0345, "step": 10340 }, { "epoch": 19.683309557774606, "grad_norm": 0.10251480340957642, "learning_rate": 3.4353763099396635e-05, "loss": 0.0664, "step": 10341 }, { "epoch": 19.68521160247266, "grad_norm": 0.058005135506391525, "learning_rate": 3.4347411876786287e-05, "loss": 0.0534, "step": 10342 }, { "epoch": 19.687113647170708, "grad_norm": 0.10651689022779465, "learning_rate": 3.434106065417593e-05, "loss": 0.06, "step": 10343 }, { "epoch": 19.689015691868757, "grad_norm": 0.17154528200626373, "learning_rate": 3.433470943156558e-05, "loss": 0.0594, "step": 10344 }, { "epoch": 19.69091773656681, "grad_norm": 0.055961765348911285, "learning_rate": 3.432835820895522e-05, "loss": 0.0647, "step": 10345 }, { "epoch": 19.69281978126486, "grad_norm": 0.3030036389827728, "learning_rate": 3.4322006986344874e-05, "loss": 0.0497, "step": 10346 }, { "epoch": 19.69472182596291, "grad_norm": 0.04463787376880646, "learning_rate": 3.4315655763734525e-05, "loss": 0.0558, "step": 10347 }, { "epoch": 19.69662387066096, "grad_norm": 0.13033027946949005, "learning_rate": 3.4309304541124164e-05, "loss": 0.0599, "step": 10348 }, { "epoch": 19.69852591535901, "grad_norm": 0.056211188435554504, "learning_rate": 3.4302953318513816e-05, "loss": 0.0574, "step": 10349 }, { "epoch": 19.700427960057063, "grad_norm": 0.14974373579025269, "learning_rate": 3.429660209590346e-05, "loss": 0.0634, "step": 10350 }, { "epoch": 19.70233000475511, "grad_norm": 0.20371678471565247, "learning_rate": 3.429025087329311e-05, "loss": 0.0621, "step": 10351 }, { "epoch": 19.70423204945316, "grad_norm": 0.1817709505558014, "learning_rate": 3.428389965068276e-05, "loss": 0.0649, "step": 10352 }, { "epoch": 19.706134094151214, "grad_norm": 0.04039102420210838, "learning_rate": 3.42775484280724e-05, "loss": 0.0461, "step": 10353 }, { "epoch": 19.708036138849263, "grad_norm": 0.24125699698925018, "learning_rate": 3.4271197205462054e-05, "loss": 0.0501, "step": 10354 }, { "epoch": 19.70993818354731, "grad_norm": 0.0726756826043129, "learning_rate": 3.42648459828517e-05, "loss": 0.0522, "step": 10355 }, { "epoch": 19.711840228245364, "grad_norm": 0.12377038598060608, "learning_rate": 3.425849476024135e-05, "loss": 0.0573, "step": 10356 }, { "epoch": 19.713742272943414, "grad_norm": 0.11465056985616684, "learning_rate": 3.425214353763099e-05, "loss": 0.0681, "step": 10357 }, { "epoch": 19.715644317641466, "grad_norm": 0.04917832091450691, "learning_rate": 3.424579231502064e-05, "loss": 0.045, "step": 10358 }, { "epoch": 19.717546362339515, "grad_norm": 0.059877946972846985, "learning_rate": 3.423944109241029e-05, "loss": 0.047, "step": 10359 }, { "epoch": 19.719448407037564, "grad_norm": 0.10934069007635117, "learning_rate": 3.423308986979994e-05, "loss": 0.0543, "step": 10360 }, { "epoch": 19.721350451735617, "grad_norm": 0.18297024071216583, "learning_rate": 3.422673864718959e-05, "loss": 0.0492, "step": 10361 }, { "epoch": 19.723252496433666, "grad_norm": 0.11962675303220749, "learning_rate": 3.422038742457923e-05, "loss": 0.0739, "step": 10362 }, { "epoch": 19.725154541131715, "grad_norm": 0.18719187378883362, "learning_rate": 3.421403620196888e-05, "loss": 0.0809, "step": 10363 }, { "epoch": 19.727056585829768, "grad_norm": 0.0692734643816948, "learning_rate": 3.4207684979358525e-05, "loss": 0.0345, "step": 10364 }, { "epoch": 19.728958630527817, "grad_norm": 0.056538041681051254, "learning_rate": 3.420133375674818e-05, "loss": 0.0407, "step": 10365 }, { "epoch": 19.730860675225866, "grad_norm": 0.04328737035393715, "learning_rate": 3.419498253413782e-05, "loss": 0.0443, "step": 10366 }, { "epoch": 19.73276271992392, "grad_norm": 0.0875651016831398, "learning_rate": 3.418863131152747e-05, "loss": 0.073, "step": 10367 }, { "epoch": 19.734664764621968, "grad_norm": 0.10402372479438782, "learning_rate": 3.418228008891712e-05, "loss": 0.0613, "step": 10368 }, { "epoch": 19.73656680932002, "grad_norm": 0.09834878146648407, "learning_rate": 3.4175928866306764e-05, "loss": 0.058, "step": 10369 }, { "epoch": 19.73846885401807, "grad_norm": 0.11881900578737259, "learning_rate": 3.4169577643696416e-05, "loss": 0.1091, "step": 10370 }, { "epoch": 19.74037089871612, "grad_norm": 0.21092288196086884, "learning_rate": 3.416322642108606e-05, "loss": 0.064, "step": 10371 }, { "epoch": 19.74227294341417, "grad_norm": 0.06970597803592682, "learning_rate": 3.4156875198475706e-05, "loss": 0.0847, "step": 10372 }, { "epoch": 19.74417498811222, "grad_norm": 0.219869002699852, "learning_rate": 3.415052397586536e-05, "loss": 0.0689, "step": 10373 }, { "epoch": 19.74607703281027, "grad_norm": 0.16127139329910278, "learning_rate": 3.4144172753255e-05, "loss": 0.0505, "step": 10374 }, { "epoch": 19.747979077508322, "grad_norm": 0.24652771651744843, "learning_rate": 3.413782153064465e-05, "loss": 0.0548, "step": 10375 }, { "epoch": 19.74988112220637, "grad_norm": 0.07329868525266647, "learning_rate": 3.41314703080343e-05, "loss": 0.0465, "step": 10376 }, { "epoch": 19.75178316690442, "grad_norm": 0.1390024572610855, "learning_rate": 3.4125119085423945e-05, "loss": 0.0475, "step": 10377 }, { "epoch": 19.753685211602473, "grad_norm": 0.2154022455215454, "learning_rate": 3.41187678628136e-05, "loss": 0.0493, "step": 10378 }, { "epoch": 19.755587256300522, "grad_norm": 0.13576474785804749, "learning_rate": 3.411241664020324e-05, "loss": 0.0483, "step": 10379 }, { "epoch": 19.757489300998575, "grad_norm": 0.05765054374933243, "learning_rate": 3.410606541759289e-05, "loss": 0.0463, "step": 10380 }, { "epoch": 19.759391345696624, "grad_norm": 0.04597964510321617, "learning_rate": 3.409971419498253e-05, "loss": 0.0471, "step": 10381 }, { "epoch": 19.761293390394673, "grad_norm": 0.10041475296020508, "learning_rate": 3.4093362972372184e-05, "loss": 0.0485, "step": 10382 }, { "epoch": 19.763195435092726, "grad_norm": 0.1566227674484253, "learning_rate": 3.408701174976183e-05, "loss": 0.0803, "step": 10383 }, { "epoch": 19.765097479790775, "grad_norm": 0.13510426878929138, "learning_rate": 3.4080660527151474e-05, "loss": 0.056, "step": 10384 }, { "epoch": 19.766999524488824, "grad_norm": 0.11006447672843933, "learning_rate": 3.4074309304541126e-05, "loss": 0.0502, "step": 10385 }, { "epoch": 19.768901569186877, "grad_norm": 0.0916387289762497, "learning_rate": 3.406795808193077e-05, "loss": 0.0463, "step": 10386 }, { "epoch": 19.770803613884926, "grad_norm": 0.04085199907422066, "learning_rate": 3.406160685932042e-05, "loss": 0.0612, "step": 10387 }, { "epoch": 19.772705658582975, "grad_norm": 0.05123887583613396, "learning_rate": 3.405525563671007e-05, "loss": 0.0495, "step": 10388 }, { "epoch": 19.774607703281028, "grad_norm": 0.12037782371044159, "learning_rate": 3.404890441409971e-05, "loss": 0.0837, "step": 10389 }, { "epoch": 19.776509747979077, "grad_norm": 0.11001364886760712, "learning_rate": 3.4042553191489365e-05, "loss": 0.0525, "step": 10390 }, { "epoch": 19.77841179267713, "grad_norm": 0.25168079137802124, "learning_rate": 3.403620196887901e-05, "loss": 0.0759, "step": 10391 }, { "epoch": 19.78031383737518, "grad_norm": 0.07635489851236343, "learning_rate": 3.402985074626866e-05, "loss": 0.051, "step": 10392 }, { "epoch": 19.782215882073228, "grad_norm": 0.06394420564174652, "learning_rate": 3.40234995236583e-05, "loss": 0.0479, "step": 10393 }, { "epoch": 19.78411792677128, "grad_norm": 0.15731367468833923, "learning_rate": 3.401714830104795e-05, "loss": 0.0593, "step": 10394 }, { "epoch": 19.78601997146933, "grad_norm": 0.05368688702583313, "learning_rate": 3.4010797078437604e-05, "loss": 0.0519, "step": 10395 }, { "epoch": 19.78792201616738, "grad_norm": 0.07790486514568329, "learning_rate": 3.400444585582725e-05, "loss": 0.0569, "step": 10396 }, { "epoch": 19.78982406086543, "grad_norm": 0.11774320900440216, "learning_rate": 3.39980946332169e-05, "loss": 0.0529, "step": 10397 }, { "epoch": 19.79172610556348, "grad_norm": 0.06621931493282318, "learning_rate": 3.399174341060654e-05, "loss": 0.0541, "step": 10398 }, { "epoch": 19.79362815026153, "grad_norm": 0.2989416718482971, "learning_rate": 3.398539218799619e-05, "loss": 0.0608, "step": 10399 }, { "epoch": 19.795530194959582, "grad_norm": 0.18191273510456085, "learning_rate": 3.3979040965385836e-05, "loss": 0.05, "step": 10400 }, { "epoch": 19.79743223965763, "grad_norm": 0.13151490688323975, "learning_rate": 3.397268974277549e-05, "loss": 0.0614, "step": 10401 }, { "epoch": 19.799334284355684, "grad_norm": 0.16322055459022522, "learning_rate": 3.396633852016513e-05, "loss": 0.0675, "step": 10402 }, { "epoch": 19.801236329053733, "grad_norm": 0.17609618604183197, "learning_rate": 3.395998729755478e-05, "loss": 0.048, "step": 10403 }, { "epoch": 19.803138373751782, "grad_norm": 0.17454537749290466, "learning_rate": 3.395363607494443e-05, "loss": 0.0587, "step": 10404 }, { "epoch": 19.805040418449835, "grad_norm": 0.06503864377737045, "learning_rate": 3.3947284852334075e-05, "loss": 0.0449, "step": 10405 }, { "epoch": 19.806942463147884, "grad_norm": 0.09223943948745728, "learning_rate": 3.394093362972373e-05, "loss": 0.0659, "step": 10406 }, { "epoch": 19.808844507845933, "grad_norm": 0.06822570413351059, "learning_rate": 3.393458240711337e-05, "loss": 0.0858, "step": 10407 }, { "epoch": 19.810746552543986, "grad_norm": 0.2909882068634033, "learning_rate": 3.392823118450302e-05, "loss": 0.0472, "step": 10408 }, { "epoch": 19.812648597242035, "grad_norm": 0.05018160492181778, "learning_rate": 3.392187996189267e-05, "loss": 0.0711, "step": 10409 }, { "epoch": 19.814550641940087, "grad_norm": 0.21213145554065704, "learning_rate": 3.3915528739282314e-05, "loss": 0.0512, "step": 10410 }, { "epoch": 19.816452686638137, "grad_norm": 0.10144142806529999, "learning_rate": 3.390917751667196e-05, "loss": 0.0535, "step": 10411 }, { "epoch": 19.818354731336186, "grad_norm": 0.14137786626815796, "learning_rate": 3.3902826294061604e-05, "loss": 0.0491, "step": 10412 }, { "epoch": 19.82025677603424, "grad_norm": 0.09976770728826523, "learning_rate": 3.3896475071451256e-05, "loss": 0.0621, "step": 10413 }, { "epoch": 19.822158820732287, "grad_norm": 0.07440201938152313, "learning_rate": 3.389012384884091e-05, "loss": 0.059, "step": 10414 }, { "epoch": 19.824060865430337, "grad_norm": 0.028187036514282227, "learning_rate": 3.388377262623055e-05, "loss": 0.0376, "step": 10415 }, { "epoch": 19.82596291012839, "grad_norm": 0.24573412537574768, "learning_rate": 3.38774214036202e-05, "loss": 0.1015, "step": 10416 }, { "epoch": 19.82786495482644, "grad_norm": 0.12045437842607498, "learning_rate": 3.387107018100984e-05, "loss": 0.0506, "step": 10417 }, { "epoch": 19.829766999524487, "grad_norm": 0.3481733500957489, "learning_rate": 3.3864718958399495e-05, "loss": 0.0791, "step": 10418 }, { "epoch": 19.83166904422254, "grad_norm": 0.1278395652770996, "learning_rate": 3.385836773578914e-05, "loss": 0.0351, "step": 10419 }, { "epoch": 19.83357108892059, "grad_norm": 0.04318676143884659, "learning_rate": 3.3852016513178785e-05, "loss": 0.0456, "step": 10420 }, { "epoch": 19.835473133618642, "grad_norm": 0.15962186455726624, "learning_rate": 3.3845665290568436e-05, "loss": 0.072, "step": 10421 }, { "epoch": 19.83737517831669, "grad_norm": 0.2105236053466797, "learning_rate": 3.383931406795808e-05, "loss": 0.0533, "step": 10422 }, { "epoch": 19.83927722301474, "grad_norm": 0.17966818809509277, "learning_rate": 3.3832962845347733e-05, "loss": 0.0576, "step": 10423 }, { "epoch": 19.841179267712793, "grad_norm": 0.10667653381824493, "learning_rate": 3.382661162273738e-05, "loss": 0.0466, "step": 10424 }, { "epoch": 19.843081312410842, "grad_norm": 0.13803623616695404, "learning_rate": 3.3820260400127024e-05, "loss": 0.0625, "step": 10425 }, { "epoch": 19.84498335710889, "grad_norm": 0.1208210214972496, "learning_rate": 3.3813909177516675e-05, "loss": 0.0544, "step": 10426 }, { "epoch": 19.846885401806944, "grad_norm": 0.09105329215526581, "learning_rate": 3.380755795490632e-05, "loss": 0.0758, "step": 10427 }, { "epoch": 19.848787446504993, "grad_norm": 0.07196736335754395, "learning_rate": 3.380120673229597e-05, "loss": 0.0597, "step": 10428 }, { "epoch": 19.850689491203042, "grad_norm": 0.21177725493907928, "learning_rate": 3.379485550968561e-05, "loss": 0.0538, "step": 10429 }, { "epoch": 19.852591535901094, "grad_norm": 0.15468494594097137, "learning_rate": 3.378850428707526e-05, "loss": 0.0612, "step": 10430 }, { "epoch": 19.854493580599144, "grad_norm": 0.15754562616348267, "learning_rate": 3.3782153064464914e-05, "loss": 0.0537, "step": 10431 }, { "epoch": 19.856395625297196, "grad_norm": 0.21864154934883118, "learning_rate": 3.377580184185456e-05, "loss": 0.057, "step": 10432 }, { "epoch": 19.858297669995245, "grad_norm": 0.19252237677574158, "learning_rate": 3.376945061924421e-05, "loss": 0.0537, "step": 10433 }, { "epoch": 19.860199714693294, "grad_norm": 0.1940947026014328, "learning_rate": 3.376309939663385e-05, "loss": 0.0606, "step": 10434 }, { "epoch": 19.862101759391347, "grad_norm": 0.14336170256137848, "learning_rate": 3.37567481740235e-05, "loss": 0.0673, "step": 10435 }, { "epoch": 19.864003804089396, "grad_norm": 0.09128237515687943, "learning_rate": 3.3750396951413146e-05, "loss": 0.0648, "step": 10436 }, { "epoch": 19.865905848787445, "grad_norm": 0.10571899265050888, "learning_rate": 3.37440457288028e-05, "loss": 0.0566, "step": 10437 }, { "epoch": 19.867807893485498, "grad_norm": 0.0520542673766613, "learning_rate": 3.373769450619244e-05, "loss": 0.083, "step": 10438 }, { "epoch": 19.869709938183547, "grad_norm": 0.0973014086484909, "learning_rate": 3.373134328358209e-05, "loss": 0.0561, "step": 10439 }, { "epoch": 19.871611982881596, "grad_norm": 0.1153886690735817, "learning_rate": 3.372499206097174e-05, "loss": 0.0594, "step": 10440 }, { "epoch": 19.87351402757965, "grad_norm": 0.0537719763815403, "learning_rate": 3.3718640838361385e-05, "loss": 0.0385, "step": 10441 }, { "epoch": 19.875416072277698, "grad_norm": 0.0892142504453659, "learning_rate": 3.371228961575104e-05, "loss": 0.0516, "step": 10442 }, { "epoch": 19.87731811697575, "grad_norm": 0.11739884316921234, "learning_rate": 3.370593839314068e-05, "loss": 0.0526, "step": 10443 }, { "epoch": 19.8792201616738, "grad_norm": 0.08801928907632828, "learning_rate": 3.369958717053033e-05, "loss": 0.0581, "step": 10444 }, { "epoch": 19.88112220637185, "grad_norm": 0.12499243021011353, "learning_rate": 3.369323594791998e-05, "loss": 0.0594, "step": 10445 }, { "epoch": 19.8830242510699, "grad_norm": 0.14099840819835663, "learning_rate": 3.3686884725309624e-05, "loss": 0.0634, "step": 10446 }, { "epoch": 19.88492629576795, "grad_norm": 0.12861575186252594, "learning_rate": 3.368053350269927e-05, "loss": 0.0371, "step": 10447 }, { "epoch": 19.886828340466, "grad_norm": 0.16485625505447388, "learning_rate": 3.3674182280088914e-05, "loss": 0.0504, "step": 10448 }, { "epoch": 19.888730385164052, "grad_norm": 0.07762710005044937, "learning_rate": 3.3667831057478566e-05, "loss": 0.0523, "step": 10449 }, { "epoch": 19.8906324298621, "grad_norm": 0.07982052117586136, "learning_rate": 3.366147983486822e-05, "loss": 0.0543, "step": 10450 }, { "epoch": 19.89253447456015, "grad_norm": 0.05542469024658203, "learning_rate": 3.365512861225786e-05, "loss": 0.0465, "step": 10451 }, { "epoch": 19.894436519258203, "grad_norm": 0.1404300183057785, "learning_rate": 3.364877738964751e-05, "loss": 0.0462, "step": 10452 }, { "epoch": 19.896338563956252, "grad_norm": 0.14150568842887878, "learning_rate": 3.364242616703715e-05, "loss": 0.0528, "step": 10453 }, { "epoch": 19.898240608654305, "grad_norm": 0.3389735817909241, "learning_rate": 3.3636074944426805e-05, "loss": 0.0537, "step": 10454 }, { "epoch": 19.900142653352354, "grad_norm": 0.0653960183262825, "learning_rate": 3.362972372181645e-05, "loss": 0.0503, "step": 10455 }, { "epoch": 19.902044698050403, "grad_norm": 0.128154456615448, "learning_rate": 3.3623372499206095e-05, "loss": 0.0627, "step": 10456 }, { "epoch": 19.903946742748456, "grad_norm": 0.17091745138168335, "learning_rate": 3.361702127659575e-05, "loss": 0.0774, "step": 10457 }, { "epoch": 19.905848787446505, "grad_norm": 0.28092002868652344, "learning_rate": 3.361067005398539e-05, "loss": 0.0446, "step": 10458 }, { "epoch": 19.907750832144554, "grad_norm": 0.10894166678190231, "learning_rate": 3.3604318831375044e-05, "loss": 0.0457, "step": 10459 }, { "epoch": 19.909652876842607, "grad_norm": 0.13073298335075378, "learning_rate": 3.359796760876469e-05, "loss": 0.0335, "step": 10460 }, { "epoch": 19.911554921540656, "grad_norm": 0.1777719408273697, "learning_rate": 3.3591616386154334e-05, "loss": 0.0654, "step": 10461 }, { "epoch": 19.913456966238705, "grad_norm": 0.24284358322620392, "learning_rate": 3.3585265163543986e-05, "loss": 0.0787, "step": 10462 }, { "epoch": 19.915359010936758, "grad_norm": 0.0868089497089386, "learning_rate": 3.357891394093363e-05, "loss": 0.0881, "step": 10463 }, { "epoch": 19.917261055634807, "grad_norm": 0.19045250117778778, "learning_rate": 3.357256271832328e-05, "loss": 0.0762, "step": 10464 }, { "epoch": 19.91916310033286, "grad_norm": 0.10188870877027512, "learning_rate": 3.356621149571292e-05, "loss": 0.0672, "step": 10465 }, { "epoch": 19.92106514503091, "grad_norm": 0.06768245249986649, "learning_rate": 3.355986027310257e-05, "loss": 0.0564, "step": 10466 }, { "epoch": 19.922967189728958, "grad_norm": 0.20635931193828583, "learning_rate": 3.355350905049222e-05, "loss": 0.0504, "step": 10467 }, { "epoch": 19.92486923442701, "grad_norm": 0.126325324177742, "learning_rate": 3.354715782788187e-05, "loss": 0.0491, "step": 10468 }, { "epoch": 19.92677127912506, "grad_norm": 0.118792824447155, "learning_rate": 3.354080660527152e-05, "loss": 0.0579, "step": 10469 }, { "epoch": 19.92867332382311, "grad_norm": 0.12369075417518616, "learning_rate": 3.353445538266116e-05, "loss": 0.0572, "step": 10470 }, { "epoch": 19.93057536852116, "grad_norm": 0.12031448632478714, "learning_rate": 3.352810416005081e-05, "loss": 0.0564, "step": 10471 }, { "epoch": 19.93247741321921, "grad_norm": 0.12101674824953079, "learning_rate": 3.352175293744046e-05, "loss": 0.0612, "step": 10472 }, { "epoch": 19.93437945791726, "grad_norm": 0.05656763166189194, "learning_rate": 3.351540171483011e-05, "loss": 0.0558, "step": 10473 }, { "epoch": 19.936281502615312, "grad_norm": 0.1553266942501068, "learning_rate": 3.3509050492219754e-05, "loss": 0.0457, "step": 10474 }, { "epoch": 19.93818354731336, "grad_norm": 0.21062347292900085, "learning_rate": 3.35026992696094e-05, "loss": 0.0558, "step": 10475 }, { "epoch": 19.940085592011414, "grad_norm": 0.1241660863161087, "learning_rate": 3.349634804699905e-05, "loss": 0.0851, "step": 10476 }, { "epoch": 19.941987636709463, "grad_norm": 0.10217336565256119, "learning_rate": 3.3489996824388696e-05, "loss": 0.0683, "step": 10477 }, { "epoch": 19.943889681407512, "grad_norm": 0.2679067850112915, "learning_rate": 3.348364560177835e-05, "loss": 0.0585, "step": 10478 }, { "epoch": 19.945791726105565, "grad_norm": 0.07839953899383545, "learning_rate": 3.347729437916799e-05, "loss": 0.0568, "step": 10479 }, { "epoch": 19.947693770803614, "grad_norm": 0.04855075106024742, "learning_rate": 3.347094315655764e-05, "loss": 0.063, "step": 10480 }, { "epoch": 19.949595815501663, "grad_norm": 0.10905822366476059, "learning_rate": 3.346459193394729e-05, "loss": 0.0612, "step": 10481 }, { "epoch": 19.951497860199716, "grad_norm": 0.055349789559841156, "learning_rate": 3.3458240711336935e-05, "loss": 0.0628, "step": 10482 }, { "epoch": 19.953399904897765, "grad_norm": 0.05278193950653076, "learning_rate": 3.345188948872658e-05, "loss": 0.0554, "step": 10483 }, { "epoch": 19.955301949595814, "grad_norm": 0.10295037180185318, "learning_rate": 3.3445538266116225e-05, "loss": 0.0515, "step": 10484 }, { "epoch": 19.957203994293867, "grad_norm": 0.123154416680336, "learning_rate": 3.3439187043505877e-05, "loss": 0.047, "step": 10485 }, { "epoch": 19.959106038991916, "grad_norm": 0.0768277496099472, "learning_rate": 3.343283582089553e-05, "loss": 0.0667, "step": 10486 }, { "epoch": 19.96100808368997, "grad_norm": 0.15484827756881714, "learning_rate": 3.3426484598285173e-05, "loss": 0.0769, "step": 10487 }, { "epoch": 19.962910128388017, "grad_norm": 0.06407146155834198, "learning_rate": 3.342013337567482e-05, "loss": 0.0435, "step": 10488 }, { "epoch": 19.964812173086067, "grad_norm": 0.04490051791071892, "learning_rate": 3.3413782153064464e-05, "loss": 0.0437, "step": 10489 }, { "epoch": 19.96671421778412, "grad_norm": 0.1047554686665535, "learning_rate": 3.3407430930454115e-05, "loss": 0.034, "step": 10490 }, { "epoch": 19.96861626248217, "grad_norm": 0.14887818694114685, "learning_rate": 3.340107970784376e-05, "loss": 0.0666, "step": 10491 }, { "epoch": 19.970518307180217, "grad_norm": 0.16302020847797394, "learning_rate": 3.3394728485233406e-05, "loss": 0.054, "step": 10492 }, { "epoch": 19.97242035187827, "grad_norm": 0.10449177771806717, "learning_rate": 3.338837726262306e-05, "loss": 0.0347, "step": 10493 }, { "epoch": 19.97432239657632, "grad_norm": 0.05189422518014908, "learning_rate": 3.33820260400127e-05, "loss": 0.0609, "step": 10494 }, { "epoch": 19.97622444127437, "grad_norm": 0.10788179188966751, "learning_rate": 3.3375674817402354e-05, "loss": 0.0685, "step": 10495 }, { "epoch": 19.97812648597242, "grad_norm": 0.20024745166301727, "learning_rate": 3.3369323594792e-05, "loss": 0.0687, "step": 10496 }, { "epoch": 19.98002853067047, "grad_norm": 0.14526651799678802, "learning_rate": 3.3362972372181644e-05, "loss": 0.0574, "step": 10497 }, { "epoch": 19.981930575368523, "grad_norm": 0.06175341457128525, "learning_rate": 3.3356621149571296e-05, "loss": 0.0566, "step": 10498 }, { "epoch": 19.983832620066572, "grad_norm": 0.08872699737548828, "learning_rate": 3.335026992696094e-05, "loss": 0.0389, "step": 10499 }, { "epoch": 19.98573466476462, "grad_norm": 0.10846307873725891, "learning_rate": 3.334391870435059e-05, "loss": 0.0648, "step": 10500 }, { "epoch": 19.987636709462674, "grad_norm": 0.13726340234279633, "learning_rate": 3.333756748174023e-05, "loss": 0.0515, "step": 10501 }, { "epoch": 19.989538754160723, "grad_norm": 0.08092505484819412, "learning_rate": 3.333121625912988e-05, "loss": 0.0444, "step": 10502 }, { "epoch": 19.991440798858772, "grad_norm": 0.1583898663520813, "learning_rate": 3.332486503651953e-05, "loss": 0.044, "step": 10503 }, { "epoch": 19.993342843556825, "grad_norm": 0.3377038538455963, "learning_rate": 3.331851381390918e-05, "loss": 0.0705, "step": 10504 }, { "epoch": 19.995244888254874, "grad_norm": 0.14866070449352264, "learning_rate": 3.331216259129883e-05, "loss": 0.0688, "step": 10505 }, { "epoch": 19.997146932952923, "grad_norm": 0.08868202567100525, "learning_rate": 3.330581136868847e-05, "loss": 0.0554, "step": 10506 }, { "epoch": 19.999048977650975, "grad_norm": 0.058857131749391556, "learning_rate": 3.329946014607812e-05, "loss": 0.0576, "step": 10507 }, { "epoch": 20.000951022349025, "grad_norm": 0.24435585737228394, "learning_rate": 3.329310892346777e-05, "loss": 0.1003, "step": 10508 }, { "epoch": 20.002853067047077, "grad_norm": 0.1501217633485794, "learning_rate": 3.328675770085742e-05, "loss": 0.0406, "step": 10509 }, { "epoch": 20.004755111745126, "grad_norm": 0.04581620916724205, "learning_rate": 3.3280406478247064e-05, "loss": 0.0585, "step": 10510 }, { "epoch": 20.006657156443175, "grad_norm": 0.034946098923683167, "learning_rate": 3.327405525563671e-05, "loss": 0.0249, "step": 10511 }, { "epoch": 20.008559201141228, "grad_norm": 0.057416193187236786, "learning_rate": 3.326770403302636e-05, "loss": 0.0498, "step": 10512 }, { "epoch": 20.010461245839277, "grad_norm": 0.04225662723183632, "learning_rate": 3.3261352810416006e-05, "loss": 0.0572, "step": 10513 }, { "epoch": 20.012363290537326, "grad_norm": 0.05202086642384529, "learning_rate": 3.325500158780566e-05, "loss": 0.0347, "step": 10514 }, { "epoch": 20.01426533523538, "grad_norm": 0.03269398584961891, "learning_rate": 3.32486503651953e-05, "loss": 0.0497, "step": 10515 }, { "epoch": 20.016167379933428, "grad_norm": 0.0800165981054306, "learning_rate": 3.324229914258495e-05, "loss": 0.0473, "step": 10516 }, { "epoch": 20.018069424631477, "grad_norm": 0.033391691744327545, "learning_rate": 3.32359479199746e-05, "loss": 0.0513, "step": 10517 }, { "epoch": 20.01997146932953, "grad_norm": 0.04623610898852348, "learning_rate": 3.3229596697364245e-05, "loss": 0.0658, "step": 10518 }, { "epoch": 20.02187351402758, "grad_norm": 0.10310270637273788, "learning_rate": 3.322324547475389e-05, "loss": 0.0581, "step": 10519 }, { "epoch": 20.02377555872563, "grad_norm": 0.0826338678598404, "learning_rate": 3.3216894252143535e-05, "loss": 0.0469, "step": 10520 }, { "epoch": 20.02567760342368, "grad_norm": 0.12246903777122498, "learning_rate": 3.321054302953319e-05, "loss": 0.0397, "step": 10521 }, { "epoch": 20.02757964812173, "grad_norm": 0.1892233043909073, "learning_rate": 3.320419180692283e-05, "loss": 0.0516, "step": 10522 }, { "epoch": 20.029481692819783, "grad_norm": 0.355392187833786, "learning_rate": 3.3197840584312484e-05, "loss": 0.0651, "step": 10523 }, { "epoch": 20.03138373751783, "grad_norm": 0.11444549262523651, "learning_rate": 3.319148936170213e-05, "loss": 0.0398, "step": 10524 }, { "epoch": 20.03328578221588, "grad_norm": 0.041265226900577545, "learning_rate": 3.3185138139091774e-05, "loss": 0.0417, "step": 10525 }, { "epoch": 20.035187826913933, "grad_norm": 0.16662882268428802, "learning_rate": 3.3178786916481426e-05, "loss": 0.0507, "step": 10526 }, { "epoch": 20.037089871611983, "grad_norm": 0.040374815464019775, "learning_rate": 3.317243569387107e-05, "loss": 0.0491, "step": 10527 }, { "epoch": 20.03899191631003, "grad_norm": 0.09320390224456787, "learning_rate": 3.3166084471260716e-05, "loss": 0.0601, "step": 10528 }, { "epoch": 20.040893961008084, "grad_norm": 0.09097753465175629, "learning_rate": 3.315973324865037e-05, "loss": 0.0487, "step": 10529 }, { "epoch": 20.042796005706133, "grad_norm": 0.06951908022165298, "learning_rate": 3.315338202604001e-05, "loss": 0.0507, "step": 10530 }, { "epoch": 20.044698050404186, "grad_norm": 0.11662323772907257, "learning_rate": 3.3147030803429665e-05, "loss": 0.0621, "step": 10531 }, { "epoch": 20.046600095102235, "grad_norm": 0.04502786695957184, "learning_rate": 3.314067958081931e-05, "loss": 0.0721, "step": 10532 }, { "epoch": 20.048502139800284, "grad_norm": 0.11359043419361115, "learning_rate": 3.3134328358208955e-05, "loss": 0.0604, "step": 10533 }, { "epoch": 20.050404184498337, "grad_norm": 0.06094936281442642, "learning_rate": 3.312797713559861e-05, "loss": 0.0504, "step": 10534 }, { "epoch": 20.052306229196386, "grad_norm": 0.07370217144489288, "learning_rate": 3.312162591298825e-05, "loss": 0.0448, "step": 10535 }, { "epoch": 20.054208273894435, "grad_norm": 0.0950629934668541, "learning_rate": 3.3115274690377904e-05, "loss": 0.0699, "step": 10536 }, { "epoch": 20.056110318592488, "grad_norm": 0.39011773467063904, "learning_rate": 3.310892346776754e-05, "loss": 0.0969, "step": 10537 }, { "epoch": 20.058012363290537, "grad_norm": 0.10561854392290115, "learning_rate": 3.3102572245157194e-05, "loss": 0.0477, "step": 10538 }, { "epoch": 20.059914407988586, "grad_norm": 0.041889406740665436, "learning_rate": 3.309622102254684e-05, "loss": 0.0513, "step": 10539 }, { "epoch": 20.06181645268664, "grad_norm": 0.07895664125680923, "learning_rate": 3.308986979993649e-05, "loss": 0.0527, "step": 10540 }, { "epoch": 20.063718497384688, "grad_norm": 0.10135366022586823, "learning_rate": 3.308351857732614e-05, "loss": 0.0405, "step": 10541 }, { "epoch": 20.06562054208274, "grad_norm": 0.05142286792397499, "learning_rate": 3.307716735471578e-05, "loss": 0.0487, "step": 10542 }, { "epoch": 20.06752258678079, "grad_norm": 0.08144795894622803, "learning_rate": 3.307081613210543e-05, "loss": 0.0546, "step": 10543 }, { "epoch": 20.06942463147884, "grad_norm": 0.04765758290886879, "learning_rate": 3.306446490949508e-05, "loss": 0.0493, "step": 10544 }, { "epoch": 20.07132667617689, "grad_norm": 0.13438722491264343, "learning_rate": 3.305811368688473e-05, "loss": 0.0552, "step": 10545 }, { "epoch": 20.07322872087494, "grad_norm": 0.07511651515960693, "learning_rate": 3.3051762464274375e-05, "loss": 0.05, "step": 10546 }, { "epoch": 20.07513076557299, "grad_norm": 0.18269193172454834, "learning_rate": 3.304541124166402e-05, "loss": 0.0574, "step": 10547 }, { "epoch": 20.077032810271042, "grad_norm": 0.1806280016899109, "learning_rate": 3.303906001905367e-05, "loss": 0.0662, "step": 10548 }, { "epoch": 20.07893485496909, "grad_norm": 0.030926980078220367, "learning_rate": 3.3032708796443317e-05, "loss": 0.0418, "step": 10549 }, { "epoch": 20.08083689966714, "grad_norm": 0.14629711210727692, "learning_rate": 3.302635757383297e-05, "loss": 0.0574, "step": 10550 }, { "epoch": 20.082738944365193, "grad_norm": 0.17619511485099792, "learning_rate": 3.302000635122261e-05, "loss": 0.0517, "step": 10551 }, { "epoch": 20.084640989063242, "grad_norm": 0.05990555137395859, "learning_rate": 3.301365512861226e-05, "loss": 0.0466, "step": 10552 }, { "epoch": 20.086543033761295, "grad_norm": 0.0348682776093483, "learning_rate": 3.300730390600191e-05, "loss": 0.0341, "step": 10553 }, { "epoch": 20.088445078459344, "grad_norm": 0.13899552822113037, "learning_rate": 3.3000952683391555e-05, "loss": 0.0481, "step": 10554 }, { "epoch": 20.090347123157393, "grad_norm": 0.07599950581789017, "learning_rate": 3.29946014607812e-05, "loss": 0.0579, "step": 10555 }, { "epoch": 20.092249167855446, "grad_norm": 0.117378830909729, "learning_rate": 3.2988250238170846e-05, "loss": 0.0689, "step": 10556 }, { "epoch": 20.094151212553495, "grad_norm": 0.30050474405288696, "learning_rate": 3.29818990155605e-05, "loss": 0.1331, "step": 10557 }, { "epoch": 20.096053257251544, "grad_norm": 0.040973104536533356, "learning_rate": 3.297554779295014e-05, "loss": 0.0588, "step": 10558 }, { "epoch": 20.097955301949597, "grad_norm": 0.05349220335483551, "learning_rate": 3.2969196570339794e-05, "loss": 0.0563, "step": 10559 }, { "epoch": 20.099857346647646, "grad_norm": 0.14277498424053192, "learning_rate": 3.296284534772944e-05, "loss": 0.0468, "step": 10560 }, { "epoch": 20.101759391345695, "grad_norm": 0.11612348258495331, "learning_rate": 3.2956494125119084e-05, "loss": 0.0646, "step": 10561 }, { "epoch": 20.103661436043748, "grad_norm": 0.08324389159679413, "learning_rate": 3.2950142902508736e-05, "loss": 0.0504, "step": 10562 }, { "epoch": 20.105563480741797, "grad_norm": 0.12529146671295166, "learning_rate": 3.294379167989838e-05, "loss": 0.0433, "step": 10563 }, { "epoch": 20.10746552543985, "grad_norm": 0.20667465031147003, "learning_rate": 3.2937440457288026e-05, "loss": 0.0534, "step": 10564 }, { "epoch": 20.1093675701379, "grad_norm": 0.06124941259622574, "learning_rate": 3.293108923467768e-05, "loss": 0.0474, "step": 10565 }, { "epoch": 20.111269614835948, "grad_norm": 0.05799207463860512, "learning_rate": 3.292473801206732e-05, "loss": 0.0506, "step": 10566 }, { "epoch": 20.113171659534, "grad_norm": 0.07804218679666519, "learning_rate": 3.2918386789456975e-05, "loss": 0.0523, "step": 10567 }, { "epoch": 20.11507370423205, "grad_norm": 0.044461559504270554, "learning_rate": 3.291203556684662e-05, "loss": 0.063, "step": 10568 }, { "epoch": 20.1169757489301, "grad_norm": 0.07837837934494019, "learning_rate": 3.2905684344236265e-05, "loss": 0.0563, "step": 10569 }, { "epoch": 20.11887779362815, "grad_norm": 0.04336393624544144, "learning_rate": 3.289933312162591e-05, "loss": 0.0471, "step": 10570 }, { "epoch": 20.1207798383262, "grad_norm": 0.10369131714105606, "learning_rate": 3.289298189901556e-05, "loss": 0.0577, "step": 10571 }, { "epoch": 20.12268188302425, "grad_norm": 0.16776293516159058, "learning_rate": 3.2886630676405214e-05, "loss": 0.0368, "step": 10572 }, { "epoch": 20.124583927722302, "grad_norm": 0.20040836930274963, "learning_rate": 3.288027945379485e-05, "loss": 0.0735, "step": 10573 }, { "epoch": 20.12648597242035, "grad_norm": 0.11139001697301865, "learning_rate": 3.2873928231184504e-05, "loss": 0.0689, "step": 10574 }, { "epoch": 20.128388017118404, "grad_norm": 0.08649170398712158, "learning_rate": 3.286757700857415e-05, "loss": 0.057, "step": 10575 }, { "epoch": 20.130290061816453, "grad_norm": 0.09043058007955551, "learning_rate": 3.28612257859638e-05, "loss": 0.0806, "step": 10576 }, { "epoch": 20.132192106514502, "grad_norm": 0.03538661077618599, "learning_rate": 3.2854874563353446e-05, "loss": 0.0419, "step": 10577 }, { "epoch": 20.134094151212555, "grad_norm": 0.2169920653104782, "learning_rate": 3.284852334074309e-05, "loss": 0.0542, "step": 10578 }, { "epoch": 20.135996195910604, "grad_norm": 0.1910613477230072, "learning_rate": 3.284217211813274e-05, "loss": 0.0606, "step": 10579 }, { "epoch": 20.137898240608653, "grad_norm": 0.0717383474111557, "learning_rate": 3.283582089552239e-05, "loss": 0.058, "step": 10580 }, { "epoch": 20.139800285306706, "grad_norm": 0.15653537213802338, "learning_rate": 3.282946967291204e-05, "loss": 0.0464, "step": 10581 }, { "epoch": 20.141702330004755, "grad_norm": 0.07739397138357162, "learning_rate": 3.2823118450301685e-05, "loss": 0.06, "step": 10582 }, { "epoch": 20.143604374702804, "grad_norm": 0.10936830937862396, "learning_rate": 3.281676722769133e-05, "loss": 0.0569, "step": 10583 }, { "epoch": 20.145506419400856, "grad_norm": 0.1430310606956482, "learning_rate": 3.281041600508098e-05, "loss": 0.0563, "step": 10584 }, { "epoch": 20.147408464098906, "grad_norm": 0.0776374563574791, "learning_rate": 3.280406478247063e-05, "loss": 0.0461, "step": 10585 }, { "epoch": 20.149310508796958, "grad_norm": 0.11177509278059006, "learning_rate": 3.279771355986028e-05, "loss": 0.047, "step": 10586 }, { "epoch": 20.151212553495007, "grad_norm": 0.06614487618207932, "learning_rate": 3.279136233724992e-05, "loss": 0.0497, "step": 10587 }, { "epoch": 20.153114598193056, "grad_norm": 0.1806342899799347, "learning_rate": 3.278501111463957e-05, "loss": 0.0645, "step": 10588 }, { "epoch": 20.15501664289111, "grad_norm": 0.11989549547433853, "learning_rate": 3.277865989202922e-05, "loss": 0.0521, "step": 10589 }, { "epoch": 20.156918687589158, "grad_norm": 0.04524906724691391, "learning_rate": 3.2772308669418866e-05, "loss": 0.0651, "step": 10590 }, { "epoch": 20.158820732287207, "grad_norm": 0.07851403206586838, "learning_rate": 3.276595744680851e-05, "loss": 0.0574, "step": 10591 }, { "epoch": 20.16072277698526, "grad_norm": 0.13270311057567596, "learning_rate": 3.2759606224198156e-05, "loss": 0.0432, "step": 10592 }, { "epoch": 20.16262482168331, "grad_norm": 0.0337836928665638, "learning_rate": 3.275325500158781e-05, "loss": 0.0444, "step": 10593 }, { "epoch": 20.16452686638136, "grad_norm": 0.12775632739067078, "learning_rate": 3.274690377897745e-05, "loss": 0.0475, "step": 10594 }, { "epoch": 20.16642891107941, "grad_norm": 0.05581698939204216, "learning_rate": 3.2740552556367105e-05, "loss": 0.0341, "step": 10595 }, { "epoch": 20.16833095577746, "grad_norm": 0.06510213762521744, "learning_rate": 3.273420133375675e-05, "loss": 0.0529, "step": 10596 }, { "epoch": 20.170233000475513, "grad_norm": 0.09131671488285065, "learning_rate": 3.2727850111146395e-05, "loss": 0.0457, "step": 10597 }, { "epoch": 20.17213504517356, "grad_norm": 0.21071843802928925, "learning_rate": 3.272149888853605e-05, "loss": 0.0515, "step": 10598 }, { "epoch": 20.17403708987161, "grad_norm": 0.06667834520339966, "learning_rate": 3.271514766592569e-05, "loss": 0.0865, "step": 10599 }, { "epoch": 20.175939134569663, "grad_norm": 0.11008747667074203, "learning_rate": 3.270879644331534e-05, "loss": 0.0742, "step": 10600 }, { "epoch": 20.177841179267713, "grad_norm": 0.2698976397514343, "learning_rate": 3.270244522070499e-05, "loss": 0.0775, "step": 10601 }, { "epoch": 20.17974322396576, "grad_norm": 0.10627546161413193, "learning_rate": 3.2696093998094634e-05, "loss": 0.0513, "step": 10602 }, { "epoch": 20.181645268663814, "grad_norm": 0.041233550757169724, "learning_rate": 3.2689742775484286e-05, "loss": 0.0397, "step": 10603 }, { "epoch": 20.183547313361863, "grad_norm": 0.07916425168514252, "learning_rate": 3.268339155287393e-05, "loss": 0.0485, "step": 10604 }, { "epoch": 20.185449358059916, "grad_norm": 0.060306694358587265, "learning_rate": 3.2677040330263576e-05, "loss": 0.0489, "step": 10605 }, { "epoch": 20.187351402757965, "grad_norm": 0.08756151050329208, "learning_rate": 3.267068910765322e-05, "loss": 0.0771, "step": 10606 }, { "epoch": 20.189253447456014, "grad_norm": 0.26062679290771484, "learning_rate": 3.266433788504287e-05, "loss": 0.0882, "step": 10607 }, { "epoch": 20.191155492154067, "grad_norm": 0.03976987302303314, "learning_rate": 3.2657986662432524e-05, "loss": 0.0528, "step": 10608 }, { "epoch": 20.193057536852116, "grad_norm": 0.056227684020996094, "learning_rate": 3.265163543982216e-05, "loss": 0.0552, "step": 10609 }, { "epoch": 20.194959581550165, "grad_norm": 0.13015170395374298, "learning_rate": 3.2645284217211815e-05, "loss": 0.0598, "step": 10610 }, { "epoch": 20.196861626248218, "grad_norm": 0.05390501394867897, "learning_rate": 3.263893299460146e-05, "loss": 0.0467, "step": 10611 }, { "epoch": 20.198763670946267, "grad_norm": 0.08726727217435837, "learning_rate": 3.263258177199111e-05, "loss": 0.0368, "step": 10612 }, { "epoch": 20.200665715644316, "grad_norm": 0.11441841721534729, "learning_rate": 3.2626230549380757e-05, "loss": 0.0651, "step": 10613 }, { "epoch": 20.20256776034237, "grad_norm": 0.1922193467617035, "learning_rate": 3.26198793267704e-05, "loss": 0.0534, "step": 10614 }, { "epoch": 20.204469805040418, "grad_norm": 0.03666595742106438, "learning_rate": 3.2613528104160053e-05, "loss": 0.0385, "step": 10615 }, { "epoch": 20.20637184973847, "grad_norm": 0.04062128812074661, "learning_rate": 3.26071768815497e-05, "loss": 0.0422, "step": 10616 }, { "epoch": 20.20827389443652, "grad_norm": 0.1665174514055252, "learning_rate": 3.260082565893935e-05, "loss": 0.0449, "step": 10617 }, { "epoch": 20.21017593913457, "grad_norm": 0.16695557534694672, "learning_rate": 3.2594474436328995e-05, "loss": 0.0513, "step": 10618 }, { "epoch": 20.21207798383262, "grad_norm": 0.14871883392333984, "learning_rate": 3.258812321371864e-05, "loss": 0.0531, "step": 10619 }, { "epoch": 20.21398002853067, "grad_norm": 0.04111943021416664, "learning_rate": 3.258177199110829e-05, "loss": 0.0468, "step": 10620 }, { "epoch": 20.21588207322872, "grad_norm": 0.07345671206712723, "learning_rate": 3.257542076849794e-05, "loss": 0.0476, "step": 10621 }, { "epoch": 20.217784117926772, "grad_norm": 0.09153008460998535, "learning_rate": 3.256906954588759e-05, "loss": 0.0476, "step": 10622 }, { "epoch": 20.21968616262482, "grad_norm": 0.07967954874038696, "learning_rate": 3.256271832327723e-05, "loss": 0.0705, "step": 10623 }, { "epoch": 20.22158820732287, "grad_norm": 0.08356297761201859, "learning_rate": 3.255636710066688e-05, "loss": 0.0411, "step": 10624 }, { "epoch": 20.223490252020923, "grad_norm": 0.0991276279091835, "learning_rate": 3.2550015878056524e-05, "loss": 0.0682, "step": 10625 }, { "epoch": 20.225392296718972, "grad_norm": 0.05717969685792923, "learning_rate": 3.2543664655446176e-05, "loss": 0.0605, "step": 10626 }, { "epoch": 20.227294341417025, "grad_norm": 0.14118197560310364, "learning_rate": 3.253731343283582e-05, "loss": 0.0532, "step": 10627 }, { "epoch": 20.229196386115074, "grad_norm": 0.09270215779542923, "learning_rate": 3.2530962210225466e-05, "loss": 0.0666, "step": 10628 }, { "epoch": 20.231098430813123, "grad_norm": 0.09237004071474075, "learning_rate": 3.252461098761512e-05, "loss": 0.0587, "step": 10629 }, { "epoch": 20.233000475511176, "grad_norm": 0.16020096838474274, "learning_rate": 3.251825976500476e-05, "loss": 0.0743, "step": 10630 }, { "epoch": 20.234902520209225, "grad_norm": 0.11568637937307358, "learning_rate": 3.2511908542394415e-05, "loss": 0.0558, "step": 10631 }, { "epoch": 20.236804564907274, "grad_norm": 0.049830514937639236, "learning_rate": 3.250555731978406e-05, "loss": 0.0593, "step": 10632 }, { "epoch": 20.238706609605327, "grad_norm": 0.1097976490855217, "learning_rate": 3.2499206097173705e-05, "loss": 0.0614, "step": 10633 }, { "epoch": 20.240608654303376, "grad_norm": 0.07119275629520416, "learning_rate": 3.249285487456336e-05, "loss": 0.064, "step": 10634 }, { "epoch": 20.242510699001425, "grad_norm": 0.11860954761505127, "learning_rate": 3.2486503651953e-05, "loss": 0.0656, "step": 10635 }, { "epoch": 20.244412743699478, "grad_norm": 0.0620465911924839, "learning_rate": 3.248015242934265e-05, "loss": 0.0463, "step": 10636 }, { "epoch": 20.246314788397527, "grad_norm": 0.09755115956068039, "learning_rate": 3.24738012067323e-05, "loss": 0.0398, "step": 10637 }, { "epoch": 20.24821683309558, "grad_norm": 0.21140997111797333, "learning_rate": 3.2467449984121944e-05, "loss": 0.0716, "step": 10638 }, { "epoch": 20.25011887779363, "grad_norm": 0.38212889432907104, "learning_rate": 3.2461098761511596e-05, "loss": 0.07, "step": 10639 }, { "epoch": 20.252020922491678, "grad_norm": 0.11554834991693497, "learning_rate": 3.245474753890124e-05, "loss": 0.0524, "step": 10640 }, { "epoch": 20.25392296718973, "grad_norm": 0.14220361411571503, "learning_rate": 3.2448396316290886e-05, "loss": 0.0562, "step": 10641 }, { "epoch": 20.25582501188778, "grad_norm": 0.09919548779726028, "learning_rate": 3.244204509368053e-05, "loss": 0.0471, "step": 10642 }, { "epoch": 20.25772705658583, "grad_norm": 0.04594764858484268, "learning_rate": 3.243569387107018e-05, "loss": 0.0479, "step": 10643 }, { "epoch": 20.25962910128388, "grad_norm": 0.1690056025981903, "learning_rate": 3.2429342648459835e-05, "loss": 0.0657, "step": 10644 }, { "epoch": 20.26153114598193, "grad_norm": 0.07409583777189255, "learning_rate": 3.242299142584947e-05, "loss": 0.0647, "step": 10645 }, { "epoch": 20.26343319067998, "grad_norm": 0.049228012561798096, "learning_rate": 3.2416640203239125e-05, "loss": 0.0732, "step": 10646 }, { "epoch": 20.265335235378032, "grad_norm": 0.15843163430690765, "learning_rate": 3.241028898062877e-05, "loss": 0.0509, "step": 10647 }, { "epoch": 20.26723728007608, "grad_norm": 0.15096253156661987, "learning_rate": 3.240393775801842e-05, "loss": 0.0542, "step": 10648 }, { "epoch": 20.269139324774134, "grad_norm": 0.18820707499980927, "learning_rate": 3.239758653540807e-05, "loss": 0.0638, "step": 10649 }, { "epoch": 20.271041369472183, "grad_norm": 0.09935000538825989, "learning_rate": 3.239123531279771e-05, "loss": 0.0653, "step": 10650 }, { "epoch": 20.272943414170232, "grad_norm": 0.05760449171066284, "learning_rate": 3.2384884090187364e-05, "loss": 0.0413, "step": 10651 }, { "epoch": 20.274845458868285, "grad_norm": 0.08053761720657349, "learning_rate": 3.237853286757701e-05, "loss": 0.033, "step": 10652 }, { "epoch": 20.276747503566334, "grad_norm": 0.04330631345510483, "learning_rate": 3.237218164496666e-05, "loss": 0.0395, "step": 10653 }, { "epoch": 20.278649548264383, "grad_norm": 0.11636463552713394, "learning_rate": 3.23658304223563e-05, "loss": 0.0588, "step": 10654 }, { "epoch": 20.280551592962436, "grad_norm": 0.16587413847446442, "learning_rate": 3.235947919974595e-05, "loss": 0.0788, "step": 10655 }, { "epoch": 20.282453637660485, "grad_norm": 0.10477234423160553, "learning_rate": 3.23531279771356e-05, "loss": 0.0715, "step": 10656 }, { "epoch": 20.284355682358534, "grad_norm": 0.05622519552707672, "learning_rate": 3.234677675452525e-05, "loss": 0.0691, "step": 10657 }, { "epoch": 20.286257727056586, "grad_norm": 0.0460372194647789, "learning_rate": 3.23404255319149e-05, "loss": 0.0574, "step": 10658 }, { "epoch": 20.288159771754636, "grad_norm": 0.07962246239185333, "learning_rate": 3.233407430930454e-05, "loss": 0.0682, "step": 10659 }, { "epoch": 20.29006181645269, "grad_norm": 0.10750008374452591, "learning_rate": 3.232772308669419e-05, "loss": 0.0617, "step": 10660 }, { "epoch": 20.291963861150737, "grad_norm": 0.2016543298959732, "learning_rate": 3.2321371864083835e-05, "loss": 0.0619, "step": 10661 }, { "epoch": 20.293865905848786, "grad_norm": 0.05132821947336197, "learning_rate": 3.231502064147349e-05, "loss": 0.0388, "step": 10662 }, { "epoch": 20.29576795054684, "grad_norm": 0.1965903788805008, "learning_rate": 3.230866941886313e-05, "loss": 0.0657, "step": 10663 }, { "epoch": 20.29766999524489, "grad_norm": 0.22762592136859894, "learning_rate": 3.230231819625278e-05, "loss": 0.0711, "step": 10664 }, { "epoch": 20.299572039942937, "grad_norm": 0.06518333405256271, "learning_rate": 3.229596697364243e-05, "loss": 0.0841, "step": 10665 }, { "epoch": 20.30147408464099, "grad_norm": 0.048307519406080246, "learning_rate": 3.2289615751032074e-05, "loss": 0.0357, "step": 10666 }, { "epoch": 20.30337612933904, "grad_norm": 0.04469428211450577, "learning_rate": 3.2283264528421726e-05, "loss": 0.062, "step": 10667 }, { "epoch": 20.30527817403709, "grad_norm": 0.1484098583459854, "learning_rate": 3.227691330581137e-05, "loss": 0.0607, "step": 10668 }, { "epoch": 20.30718021873514, "grad_norm": 0.0734637901186943, "learning_rate": 3.2270562083201016e-05, "loss": 0.0993, "step": 10669 }, { "epoch": 20.30908226343319, "grad_norm": 0.03457577899098396, "learning_rate": 3.226421086059067e-05, "loss": 0.0516, "step": 10670 }, { "epoch": 20.310984308131243, "grad_norm": 0.05358421057462692, "learning_rate": 3.225785963798031e-05, "loss": 0.066, "step": 10671 }, { "epoch": 20.312886352829292, "grad_norm": 0.033646367490291595, "learning_rate": 3.225150841536996e-05, "loss": 0.0477, "step": 10672 }, { "epoch": 20.31478839752734, "grad_norm": 0.10537423938512802, "learning_rate": 3.224515719275961e-05, "loss": 0.0797, "step": 10673 }, { "epoch": 20.316690442225394, "grad_norm": 0.16200460493564606, "learning_rate": 3.2238805970149255e-05, "loss": 0.0626, "step": 10674 }, { "epoch": 20.318592486923443, "grad_norm": 0.07985933870077133, "learning_rate": 3.2232454747538906e-05, "loss": 0.0667, "step": 10675 }, { "epoch": 20.32049453162149, "grad_norm": 0.047756507992744446, "learning_rate": 3.222610352492855e-05, "loss": 0.0641, "step": 10676 }, { "epoch": 20.322396576319544, "grad_norm": 0.04638848453760147, "learning_rate": 3.22197523023182e-05, "loss": 0.0489, "step": 10677 }, { "epoch": 20.324298621017594, "grad_norm": 0.07590161263942719, "learning_rate": 3.221340107970784e-05, "loss": 0.0556, "step": 10678 }, { "epoch": 20.326200665715643, "grad_norm": 0.04426351562142372, "learning_rate": 3.2207049857097494e-05, "loss": 0.0592, "step": 10679 }, { "epoch": 20.328102710413695, "grad_norm": 0.03708796575665474, "learning_rate": 3.220069863448714e-05, "loss": 0.0463, "step": 10680 }, { "epoch": 20.330004755111744, "grad_norm": 0.053088005632162094, "learning_rate": 3.2194347411876784e-05, "loss": 0.063, "step": 10681 }, { "epoch": 20.331906799809797, "grad_norm": 0.12782998383045197, "learning_rate": 3.2187996189266436e-05, "loss": 0.0604, "step": 10682 }, { "epoch": 20.333808844507846, "grad_norm": 0.06219214200973511, "learning_rate": 3.218164496665608e-05, "loss": 0.0762, "step": 10683 }, { "epoch": 20.335710889205895, "grad_norm": 0.15822537243366241, "learning_rate": 3.217529374404573e-05, "loss": 0.0525, "step": 10684 }, { "epoch": 20.337612933903948, "grad_norm": 0.0818241685628891, "learning_rate": 3.216894252143538e-05, "loss": 0.0487, "step": 10685 }, { "epoch": 20.339514978601997, "grad_norm": 0.1592274010181427, "learning_rate": 3.216259129882502e-05, "loss": 0.0469, "step": 10686 }, { "epoch": 20.341417023300046, "grad_norm": 0.10056457668542862, "learning_rate": 3.2156240076214674e-05, "loss": 0.0555, "step": 10687 }, { "epoch": 20.3433190679981, "grad_norm": 0.04942634701728821, "learning_rate": 3.214988885360432e-05, "loss": 0.0548, "step": 10688 }, { "epoch": 20.345221112696148, "grad_norm": 0.13030976057052612, "learning_rate": 3.214353763099397e-05, "loss": 0.0523, "step": 10689 }, { "epoch": 20.347123157394197, "grad_norm": 0.07076043635606766, "learning_rate": 3.213718640838361e-05, "loss": 0.0471, "step": 10690 }, { "epoch": 20.34902520209225, "grad_norm": 0.06825718283653259, "learning_rate": 3.213083518577326e-05, "loss": 0.0574, "step": 10691 }, { "epoch": 20.3509272467903, "grad_norm": 0.17072850465774536, "learning_rate": 3.212448396316291e-05, "loss": 0.0439, "step": 10692 }, { "epoch": 20.35282929148835, "grad_norm": 0.05790894106030464, "learning_rate": 3.211813274055256e-05, "loss": 0.037, "step": 10693 }, { "epoch": 20.3547313361864, "grad_norm": 0.1315700113773346, "learning_rate": 3.211178151794221e-05, "loss": 0.0632, "step": 10694 }, { "epoch": 20.35663338088445, "grad_norm": 0.16016748547554016, "learning_rate": 3.210543029533185e-05, "loss": 0.0655, "step": 10695 }, { "epoch": 20.358535425582502, "grad_norm": 0.07636621594429016, "learning_rate": 3.20990790727215e-05, "loss": 0.0591, "step": 10696 }, { "epoch": 20.36043747028055, "grad_norm": 0.18454785645008087, "learning_rate": 3.2092727850111145e-05, "loss": 0.0617, "step": 10697 }, { "epoch": 20.3623395149786, "grad_norm": 0.1629074215888977, "learning_rate": 3.20863766275008e-05, "loss": 0.0581, "step": 10698 }, { "epoch": 20.364241559676653, "grad_norm": 0.1100032702088356, "learning_rate": 3.208002540489044e-05, "loss": 0.0496, "step": 10699 }, { "epoch": 20.366143604374702, "grad_norm": 0.20545372366905212, "learning_rate": 3.207367418228009e-05, "loss": 0.0573, "step": 10700 }, { "epoch": 20.36804564907275, "grad_norm": 0.07017530500888824, "learning_rate": 3.206732295966974e-05, "loss": 0.0367, "step": 10701 }, { "epoch": 20.369947693770804, "grad_norm": 0.16110143065452576, "learning_rate": 3.2060971737059384e-05, "loss": 0.0636, "step": 10702 }, { "epoch": 20.371849738468853, "grad_norm": 0.20994989573955536, "learning_rate": 3.2054620514449036e-05, "loss": 0.056, "step": 10703 }, { "epoch": 20.373751783166906, "grad_norm": 0.10679108649492264, "learning_rate": 3.204826929183868e-05, "loss": 0.0571, "step": 10704 }, { "epoch": 20.375653827864955, "grad_norm": 0.0616210512816906, "learning_rate": 3.2041918069228326e-05, "loss": 0.0654, "step": 10705 }, { "epoch": 20.377555872563004, "grad_norm": 0.04933302849531174, "learning_rate": 3.203556684661798e-05, "loss": 0.0616, "step": 10706 }, { "epoch": 20.379457917261057, "grad_norm": 0.029315801337361336, "learning_rate": 3.202921562400762e-05, "loss": 0.0435, "step": 10707 }, { "epoch": 20.381359961959106, "grad_norm": 0.0827157124876976, "learning_rate": 3.202286440139727e-05, "loss": 0.0386, "step": 10708 }, { "epoch": 20.383262006657155, "grad_norm": 0.12883013486862183, "learning_rate": 3.201651317878691e-05, "loss": 0.0499, "step": 10709 }, { "epoch": 20.385164051355208, "grad_norm": 0.12958528101444244, "learning_rate": 3.2010161956176565e-05, "loss": 0.0619, "step": 10710 }, { "epoch": 20.387066096053257, "grad_norm": 0.057290948927402496, "learning_rate": 3.200381073356622e-05, "loss": 0.0534, "step": 10711 }, { "epoch": 20.388968140751306, "grad_norm": 0.06070676073431969, "learning_rate": 3.199745951095586e-05, "loss": 0.0652, "step": 10712 }, { "epoch": 20.39087018544936, "grad_norm": 0.10806555300951004, "learning_rate": 3.199110828834551e-05, "loss": 0.0411, "step": 10713 }, { "epoch": 20.392772230147408, "grad_norm": 0.07100493460893631, "learning_rate": 3.198475706573515e-05, "loss": 0.0441, "step": 10714 }, { "epoch": 20.39467427484546, "grad_norm": 0.1170794889330864, "learning_rate": 3.1978405843124804e-05, "loss": 0.0545, "step": 10715 }, { "epoch": 20.39657631954351, "grad_norm": 0.07554541528224945, "learning_rate": 3.197205462051445e-05, "loss": 0.0424, "step": 10716 }, { "epoch": 20.39847836424156, "grad_norm": 0.17473971843719482, "learning_rate": 3.1965703397904094e-05, "loss": 0.0444, "step": 10717 }, { "epoch": 20.40038040893961, "grad_norm": 0.11114981025457382, "learning_rate": 3.1959352175293746e-05, "loss": 0.0478, "step": 10718 }, { "epoch": 20.40228245363766, "grad_norm": 0.15614411234855652, "learning_rate": 3.195300095268339e-05, "loss": 0.0689, "step": 10719 }, { "epoch": 20.40418449833571, "grad_norm": 0.07727070897817612, "learning_rate": 3.194664973007304e-05, "loss": 0.0489, "step": 10720 }, { "epoch": 20.406086543033762, "grad_norm": 0.11544694751501083, "learning_rate": 3.194029850746269e-05, "loss": 0.0627, "step": 10721 }, { "epoch": 20.40798858773181, "grad_norm": 0.07351727038621902, "learning_rate": 3.193394728485233e-05, "loss": 0.07, "step": 10722 }, { "epoch": 20.409890632429864, "grad_norm": 0.04631667584180832, "learning_rate": 3.1927596062241985e-05, "loss": 0.0702, "step": 10723 }, { "epoch": 20.411792677127913, "grad_norm": 0.06886739283800125, "learning_rate": 3.192124483963163e-05, "loss": 0.0393, "step": 10724 }, { "epoch": 20.413694721825962, "grad_norm": 0.05337328091263771, "learning_rate": 3.191489361702128e-05, "loss": 0.0609, "step": 10725 }, { "epoch": 20.415596766524015, "grad_norm": 0.11911852657794952, "learning_rate": 3.190854239441092e-05, "loss": 0.0726, "step": 10726 }, { "epoch": 20.417498811222064, "grad_norm": 0.16817975044250488, "learning_rate": 3.190219117180057e-05, "loss": 0.0719, "step": 10727 }, { "epoch": 20.419400855920113, "grad_norm": 0.04718000814318657, "learning_rate": 3.1895839949190224e-05, "loss": 0.0559, "step": 10728 }, { "epoch": 20.421302900618166, "grad_norm": 0.09537462890148163, "learning_rate": 3.188948872657987e-05, "loss": 0.0762, "step": 10729 }, { "epoch": 20.423204945316215, "grad_norm": 0.18325896561145782, "learning_rate": 3.188313750396952e-05, "loss": 0.058, "step": 10730 }, { "epoch": 20.425106990014264, "grad_norm": 0.28283315896987915, "learning_rate": 3.187678628135916e-05, "loss": 0.0598, "step": 10731 }, { "epoch": 20.427009034712317, "grad_norm": 0.15887336432933807, "learning_rate": 3.187043505874881e-05, "loss": 0.0394, "step": 10732 }, { "epoch": 20.428911079410366, "grad_norm": 0.08057612180709839, "learning_rate": 3.1864083836138456e-05, "loss": 0.0644, "step": 10733 }, { "epoch": 20.43081312410842, "grad_norm": 0.05225507542490959, "learning_rate": 3.185773261352811e-05, "loss": 0.044, "step": 10734 }, { "epoch": 20.432715168806467, "grad_norm": 0.07600190490484238, "learning_rate": 3.185138139091775e-05, "loss": 0.0595, "step": 10735 }, { "epoch": 20.434617213504517, "grad_norm": 0.3140774965286255, "learning_rate": 3.18450301683074e-05, "loss": 0.0707, "step": 10736 }, { "epoch": 20.43651925820257, "grad_norm": 0.09039638191461563, "learning_rate": 3.183867894569705e-05, "loss": 0.0476, "step": 10737 }, { "epoch": 20.43842130290062, "grad_norm": 0.0431450791656971, "learning_rate": 3.1832327723086695e-05, "loss": 0.0644, "step": 10738 }, { "epoch": 20.440323347598667, "grad_norm": 0.11939884722232819, "learning_rate": 3.1825976500476347e-05, "loss": 0.0681, "step": 10739 }, { "epoch": 20.44222539229672, "grad_norm": 0.06911275535821915, "learning_rate": 3.181962527786599e-05, "loss": 0.0501, "step": 10740 }, { "epoch": 20.44412743699477, "grad_norm": 0.18807461857795715, "learning_rate": 3.181327405525564e-05, "loss": 0.0616, "step": 10741 }, { "epoch": 20.44602948169282, "grad_norm": 0.11017563194036484, "learning_rate": 3.180692283264529e-05, "loss": 0.0593, "step": 10742 }, { "epoch": 20.44793152639087, "grad_norm": 0.09336445480585098, "learning_rate": 3.1800571610034934e-05, "loss": 0.0429, "step": 10743 }, { "epoch": 20.44983357108892, "grad_norm": 0.14734186232089996, "learning_rate": 3.179422038742458e-05, "loss": 0.0414, "step": 10744 }, { "epoch": 20.451735615786973, "grad_norm": 0.06388304382562637, "learning_rate": 3.1787869164814224e-05, "loss": 0.0492, "step": 10745 }, { "epoch": 20.453637660485022, "grad_norm": 0.08828426897525787, "learning_rate": 3.1781517942203876e-05, "loss": 0.0416, "step": 10746 }, { "epoch": 20.45553970518307, "grad_norm": 0.10653724521398544, "learning_rate": 3.177516671959353e-05, "loss": 0.0627, "step": 10747 }, { "epoch": 20.457441749881124, "grad_norm": 0.058555085211992264, "learning_rate": 3.176881549698317e-05, "loss": 0.0529, "step": 10748 }, { "epoch": 20.459343794579173, "grad_norm": 0.08683603256940842, "learning_rate": 3.176246427437282e-05, "loss": 0.0595, "step": 10749 }, { "epoch": 20.461245839277222, "grad_norm": 0.1622881293296814, "learning_rate": 3.175611305176246e-05, "loss": 0.0635, "step": 10750 }, { "epoch": 20.463147883975275, "grad_norm": 0.09806717187166214, "learning_rate": 3.1749761829152114e-05, "loss": 0.0549, "step": 10751 }, { "epoch": 20.465049928673324, "grad_norm": 0.17263513803482056, "learning_rate": 3.174341060654176e-05, "loss": 0.0649, "step": 10752 }, { "epoch": 20.466951973371373, "grad_norm": 0.061647363007068634, "learning_rate": 3.1737059383931405e-05, "loss": 0.065, "step": 10753 }, { "epoch": 20.468854018069425, "grad_norm": 0.08864830434322357, "learning_rate": 3.1730708161321056e-05, "loss": 0.0564, "step": 10754 }, { "epoch": 20.470756062767475, "grad_norm": 0.06646085530519485, "learning_rate": 3.17243569387107e-05, "loss": 0.0717, "step": 10755 }, { "epoch": 20.472658107465527, "grad_norm": 0.04390653595328331, "learning_rate": 3.171800571610035e-05, "loss": 0.0627, "step": 10756 }, { "epoch": 20.474560152163576, "grad_norm": 0.0896117091178894, "learning_rate": 3.171165449349e-05, "loss": 0.0781, "step": 10757 }, { "epoch": 20.476462196861625, "grad_norm": 0.07033764570951462, "learning_rate": 3.1705303270879643e-05, "loss": 0.0548, "step": 10758 }, { "epoch": 20.478364241559678, "grad_norm": 0.056597303599119186, "learning_rate": 3.1698952048269295e-05, "loss": 0.0499, "step": 10759 }, { "epoch": 20.480266286257727, "grad_norm": 0.04619000479578972, "learning_rate": 3.169260082565894e-05, "loss": 0.0613, "step": 10760 }, { "epoch": 20.482168330955776, "grad_norm": 0.15724436938762665, "learning_rate": 3.168624960304859e-05, "loss": 0.0471, "step": 10761 }, { "epoch": 20.48407037565383, "grad_norm": 0.06588472425937653, "learning_rate": 3.167989838043823e-05, "loss": 0.0569, "step": 10762 }, { "epoch": 20.485972420351878, "grad_norm": 0.038958970457315445, "learning_rate": 3.167354715782788e-05, "loss": 0.052, "step": 10763 }, { "epoch": 20.487874465049927, "grad_norm": 0.13805849850177765, "learning_rate": 3.166719593521753e-05, "loss": 0.0502, "step": 10764 }, { "epoch": 20.48977650974798, "grad_norm": 0.1267121285200119, "learning_rate": 3.166084471260718e-05, "loss": 0.0529, "step": 10765 }, { "epoch": 20.49167855444603, "grad_norm": 0.04090994969010353, "learning_rate": 3.165449348999683e-05, "loss": 0.0403, "step": 10766 }, { "epoch": 20.49358059914408, "grad_norm": 0.0730077475309372, "learning_rate": 3.164814226738647e-05, "loss": 0.073, "step": 10767 }, { "epoch": 20.49548264384213, "grad_norm": 0.050069231539964676, "learning_rate": 3.164179104477612e-05, "loss": 0.0624, "step": 10768 }, { "epoch": 20.49738468854018, "grad_norm": 0.04610844701528549, "learning_rate": 3.1635439822165766e-05, "loss": 0.04, "step": 10769 }, { "epoch": 20.499286733238232, "grad_norm": 0.19118037819862366, "learning_rate": 3.162908859955542e-05, "loss": 0.0596, "step": 10770 }, { "epoch": 20.50118877793628, "grad_norm": 0.09393315762281418, "learning_rate": 3.162273737694506e-05, "loss": 0.0526, "step": 10771 }, { "epoch": 20.50309082263433, "grad_norm": 0.08483687788248062, "learning_rate": 3.161638615433471e-05, "loss": 0.0662, "step": 10772 }, { "epoch": 20.504992867332383, "grad_norm": 0.1932431012392044, "learning_rate": 3.161003493172436e-05, "loss": 0.0451, "step": 10773 }, { "epoch": 20.506894912030432, "grad_norm": 0.10873065888881683, "learning_rate": 3.1603683709114005e-05, "loss": 0.0485, "step": 10774 }, { "epoch": 20.50879695672848, "grad_norm": 0.08355213701725006, "learning_rate": 3.159733248650366e-05, "loss": 0.0419, "step": 10775 }, { "epoch": 20.510699001426534, "grad_norm": 0.0491744689643383, "learning_rate": 3.15909812638933e-05, "loss": 0.0466, "step": 10776 }, { "epoch": 20.512601046124583, "grad_norm": 0.09892838448286057, "learning_rate": 3.158463004128295e-05, "loss": 0.0563, "step": 10777 }, { "epoch": 20.514503090822636, "grad_norm": 0.09677373617887497, "learning_rate": 3.15782788186726e-05, "loss": 0.0774, "step": 10778 }, { "epoch": 20.516405135520685, "grad_norm": 0.11103257536888123, "learning_rate": 3.1571927596062244e-05, "loss": 0.0595, "step": 10779 }, { "epoch": 20.518307180218734, "grad_norm": 0.19784103333950043, "learning_rate": 3.156557637345189e-05, "loss": 0.0512, "step": 10780 }, { "epoch": 20.520209224916787, "grad_norm": 0.04738070070743561, "learning_rate": 3.1559225150841534e-05, "loss": 0.0466, "step": 10781 }, { "epoch": 20.522111269614836, "grad_norm": 0.11906538903713226, "learning_rate": 3.1552873928231186e-05, "loss": 0.0806, "step": 10782 }, { "epoch": 20.524013314312885, "grad_norm": 0.038742128759622574, "learning_rate": 3.154652270562084e-05, "loss": 0.0581, "step": 10783 }, { "epoch": 20.525915359010938, "grad_norm": 0.1178169995546341, "learning_rate": 3.154017148301048e-05, "loss": 0.0491, "step": 10784 }, { "epoch": 20.527817403708987, "grad_norm": 0.03254316374659538, "learning_rate": 3.153382026040013e-05, "loss": 0.0409, "step": 10785 }, { "epoch": 20.529719448407036, "grad_norm": 0.04235231876373291, "learning_rate": 3.152746903778977e-05, "loss": 0.0597, "step": 10786 }, { "epoch": 20.53162149310509, "grad_norm": 0.16329194605350494, "learning_rate": 3.1521117815179425e-05, "loss": 0.0644, "step": 10787 }, { "epoch": 20.533523537803138, "grad_norm": 0.04553667828440666, "learning_rate": 3.151476659256907e-05, "loss": 0.0485, "step": 10788 }, { "epoch": 20.53542558250119, "grad_norm": 0.13178446888923645, "learning_rate": 3.1508415369958715e-05, "loss": 0.0569, "step": 10789 }, { "epoch": 20.53732762719924, "grad_norm": 0.04360739141702652, "learning_rate": 3.150206414734837e-05, "loss": 0.0662, "step": 10790 }, { "epoch": 20.53922967189729, "grad_norm": 0.06077009066939354, "learning_rate": 3.149571292473801e-05, "loss": 0.0437, "step": 10791 }, { "epoch": 20.54113171659534, "grad_norm": 0.15785424411296844, "learning_rate": 3.1489361702127664e-05, "loss": 0.0656, "step": 10792 }, { "epoch": 20.54303376129339, "grad_norm": 0.05192839354276657, "learning_rate": 3.148301047951731e-05, "loss": 0.0518, "step": 10793 }, { "epoch": 20.54493580599144, "grad_norm": 0.05656667798757553, "learning_rate": 3.1476659256906954e-05, "loss": 0.0665, "step": 10794 }, { "epoch": 20.546837850689492, "grad_norm": 0.044597215950489044, "learning_rate": 3.1470308034296606e-05, "loss": 0.0619, "step": 10795 }, { "epoch": 20.54873989538754, "grad_norm": 0.22907453775405884, "learning_rate": 3.146395681168625e-05, "loss": 0.0696, "step": 10796 }, { "epoch": 20.55064194008559, "grad_norm": 0.05552733689546585, "learning_rate": 3.14576055890759e-05, "loss": 0.0772, "step": 10797 }, { "epoch": 20.552543984783643, "grad_norm": 0.026332750916481018, "learning_rate": 3.145125436646554e-05, "loss": 0.0393, "step": 10798 }, { "epoch": 20.554446029481692, "grad_norm": 0.13045305013656616, "learning_rate": 3.144490314385519e-05, "loss": 0.0458, "step": 10799 }, { "epoch": 20.556348074179745, "grad_norm": 0.051635902374982834, "learning_rate": 3.143855192124484e-05, "loss": 0.0564, "step": 10800 }, { "epoch": 20.558250118877794, "grad_norm": 0.056866101920604706, "learning_rate": 3.143220069863449e-05, "loss": 0.0663, "step": 10801 }, { "epoch": 20.560152163575843, "grad_norm": 0.05314189940690994, "learning_rate": 3.142584947602414e-05, "loss": 0.0668, "step": 10802 }, { "epoch": 20.562054208273896, "grad_norm": 0.1597507894039154, "learning_rate": 3.141949825341378e-05, "loss": 0.0569, "step": 10803 }, { "epoch": 20.563956252971945, "grad_norm": 0.06841409206390381, "learning_rate": 3.141314703080343e-05, "loss": 0.0699, "step": 10804 }, { "epoch": 20.565858297669994, "grad_norm": 0.06581047922372818, "learning_rate": 3.140679580819308e-05, "loss": 0.0535, "step": 10805 }, { "epoch": 20.567760342368047, "grad_norm": 0.11147249490022659, "learning_rate": 3.140044458558273e-05, "loss": 0.0545, "step": 10806 }, { "epoch": 20.569662387066096, "grad_norm": 0.1470136195421219, "learning_rate": 3.1394093362972374e-05, "loss": 0.0508, "step": 10807 }, { "epoch": 20.571564431764145, "grad_norm": 0.12944050133228302, "learning_rate": 3.138774214036202e-05, "loss": 0.0582, "step": 10808 }, { "epoch": 20.573466476462198, "grad_norm": 0.042375169694423676, "learning_rate": 3.138139091775167e-05, "loss": 0.0541, "step": 10809 }, { "epoch": 20.575368521160247, "grad_norm": 0.03796051815152168, "learning_rate": 3.1375039695141316e-05, "loss": 0.0475, "step": 10810 }, { "epoch": 20.5772705658583, "grad_norm": 0.03297824040055275, "learning_rate": 3.136868847253097e-05, "loss": 0.0434, "step": 10811 }, { "epoch": 20.57917261055635, "grad_norm": 0.0863594189286232, "learning_rate": 3.136233724992061e-05, "loss": 0.0382, "step": 10812 }, { "epoch": 20.581074655254397, "grad_norm": 0.13227379322052002, "learning_rate": 3.135598602731026e-05, "loss": 0.0576, "step": 10813 }, { "epoch": 20.58297669995245, "grad_norm": 0.07175970822572708, "learning_rate": 3.134963480469991e-05, "loss": 0.0525, "step": 10814 }, { "epoch": 20.5848787446505, "grad_norm": 0.0745711550116539, "learning_rate": 3.1343283582089554e-05, "loss": 0.0528, "step": 10815 }, { "epoch": 20.58678078934855, "grad_norm": 0.10481584072113037, "learning_rate": 3.13369323594792e-05, "loss": 0.0495, "step": 10816 }, { "epoch": 20.5886828340466, "grad_norm": 0.19122248888015747, "learning_rate": 3.1330581136868845e-05, "loss": 0.0593, "step": 10817 }, { "epoch": 20.59058487874465, "grad_norm": 0.12699630856513977, "learning_rate": 3.1324229914258496e-05, "loss": 0.0621, "step": 10818 }, { "epoch": 20.5924869234427, "grad_norm": 0.11133062094449997, "learning_rate": 3.131787869164814e-05, "loss": 0.0439, "step": 10819 }, { "epoch": 20.594388968140752, "grad_norm": 0.21265095472335815, "learning_rate": 3.131152746903779e-05, "loss": 0.032, "step": 10820 }, { "epoch": 20.5962910128388, "grad_norm": 0.11187831312417984, "learning_rate": 3.130517624642744e-05, "loss": 0.0674, "step": 10821 }, { "epoch": 20.598193057536854, "grad_norm": 0.09113247692584991, "learning_rate": 3.1298825023817083e-05, "loss": 0.0431, "step": 10822 }, { "epoch": 20.600095102234903, "grad_norm": 0.20912741124629974, "learning_rate": 3.1292473801206735e-05, "loss": 0.0459, "step": 10823 }, { "epoch": 20.601997146932952, "grad_norm": 0.3858474791049957, "learning_rate": 3.128612257859638e-05, "loss": 0.0699, "step": 10824 }, { "epoch": 20.603899191631005, "grad_norm": 0.13949212431907654, "learning_rate": 3.1279771355986025e-05, "loss": 0.0672, "step": 10825 }, { "epoch": 20.605801236329054, "grad_norm": 0.05572618916630745, "learning_rate": 3.127342013337568e-05, "loss": 0.0607, "step": 10826 }, { "epoch": 20.607703281027103, "grad_norm": 0.24470728635787964, "learning_rate": 3.126706891076532e-05, "loss": 0.0976, "step": 10827 }, { "epoch": 20.609605325725155, "grad_norm": 0.10430791974067688, "learning_rate": 3.1260717688154974e-05, "loss": 0.0522, "step": 10828 }, { "epoch": 20.611507370423205, "grad_norm": 0.05254757031798363, "learning_rate": 3.125436646554462e-05, "loss": 0.0684, "step": 10829 }, { "epoch": 20.613409415121254, "grad_norm": 0.10193728655576706, "learning_rate": 3.1248015242934264e-05, "loss": 0.0571, "step": 10830 }, { "epoch": 20.615311459819306, "grad_norm": 0.0633193850517273, "learning_rate": 3.1241664020323916e-05, "loss": 0.0625, "step": 10831 }, { "epoch": 20.617213504517355, "grad_norm": 0.15912435948848724, "learning_rate": 3.123531279771356e-05, "loss": 0.077, "step": 10832 }, { "epoch": 20.619115549215408, "grad_norm": 0.1428174376487732, "learning_rate": 3.122896157510321e-05, "loss": 0.057, "step": 10833 }, { "epoch": 20.621017593913457, "grad_norm": 0.18162624537944794, "learning_rate": 3.122261035249285e-05, "loss": 0.061, "step": 10834 }, { "epoch": 20.622919638611506, "grad_norm": 0.34952130913734436, "learning_rate": 3.12162591298825e-05, "loss": 0.1014, "step": 10835 }, { "epoch": 20.62482168330956, "grad_norm": 0.06106233596801758, "learning_rate": 3.120990790727215e-05, "loss": 0.0434, "step": 10836 }, { "epoch": 20.626723728007608, "grad_norm": 0.08583278954029083, "learning_rate": 3.12035566846618e-05, "loss": 0.0609, "step": 10837 }, { "epoch": 20.628625772705657, "grad_norm": 0.3161568343639374, "learning_rate": 3.119720546205145e-05, "loss": 0.0511, "step": 10838 }, { "epoch": 20.63052781740371, "grad_norm": 0.04114923253655434, "learning_rate": 3.119085423944109e-05, "loss": 0.0592, "step": 10839 }, { "epoch": 20.63242986210176, "grad_norm": 0.33630475401878357, "learning_rate": 3.118450301683074e-05, "loss": 0.0786, "step": 10840 }, { "epoch": 20.634331906799808, "grad_norm": 0.19183726608753204, "learning_rate": 3.117815179422039e-05, "loss": 0.0512, "step": 10841 }, { "epoch": 20.63623395149786, "grad_norm": 0.0717271938920021, "learning_rate": 3.117180057161004e-05, "loss": 0.0655, "step": 10842 }, { "epoch": 20.63813599619591, "grad_norm": 0.0996527448296547, "learning_rate": 3.1165449348999684e-05, "loss": 0.0629, "step": 10843 }, { "epoch": 20.640038040893963, "grad_norm": 0.18561841547489166, "learning_rate": 3.115909812638933e-05, "loss": 0.046, "step": 10844 }, { "epoch": 20.64194008559201, "grad_norm": 0.08247508108615875, "learning_rate": 3.115274690377898e-05, "loss": 0.0628, "step": 10845 }, { "epoch": 20.64384213029006, "grad_norm": 0.056294526904821396, "learning_rate": 3.1146395681168626e-05, "loss": 0.0415, "step": 10846 }, { "epoch": 20.645744174988113, "grad_norm": 0.14750158786773682, "learning_rate": 3.114004445855828e-05, "loss": 0.0636, "step": 10847 }, { "epoch": 20.647646219686163, "grad_norm": 0.12316614389419556, "learning_rate": 3.1133693235947916e-05, "loss": 0.0507, "step": 10848 }, { "epoch": 20.64954826438421, "grad_norm": 0.12978743016719818, "learning_rate": 3.112734201333757e-05, "loss": 0.0622, "step": 10849 }, { "epoch": 20.651450309082264, "grad_norm": 0.08613703399896622, "learning_rate": 3.112099079072722e-05, "loss": 0.0529, "step": 10850 }, { "epoch": 20.653352353780313, "grad_norm": 0.1138317883014679, "learning_rate": 3.1114639568116865e-05, "loss": 0.0408, "step": 10851 }, { "epoch": 20.655254398478363, "grad_norm": 0.14546433091163635, "learning_rate": 3.110828834550651e-05, "loss": 0.0593, "step": 10852 }, { "epoch": 20.657156443176415, "grad_norm": 0.11413826048374176, "learning_rate": 3.1101937122896155e-05, "loss": 0.0464, "step": 10853 }, { "epoch": 20.659058487874464, "grad_norm": 0.11910077184438705, "learning_rate": 3.109558590028581e-05, "loss": 0.0645, "step": 10854 }, { "epoch": 20.660960532572517, "grad_norm": 0.058692291378974915, "learning_rate": 3.108923467767545e-05, "loss": 0.0482, "step": 10855 }, { "epoch": 20.662862577270566, "grad_norm": 0.13482342660427094, "learning_rate": 3.1082883455065104e-05, "loss": 0.0626, "step": 10856 }, { "epoch": 20.664764621968615, "grad_norm": 0.07138006389141083, "learning_rate": 3.107653223245475e-05, "loss": 0.0412, "step": 10857 }, { "epoch": 20.666666666666668, "grad_norm": 0.06352374702692032, "learning_rate": 3.1070181009844394e-05, "loss": 0.0512, "step": 10858 }, { "epoch": 20.668568711364717, "grad_norm": 0.13332059979438782, "learning_rate": 3.1063829787234046e-05, "loss": 0.0546, "step": 10859 }, { "epoch": 20.670470756062766, "grad_norm": 0.14220471680164337, "learning_rate": 3.105747856462369e-05, "loss": 0.0702, "step": 10860 }, { "epoch": 20.67237280076082, "grad_norm": 0.2089843600988388, "learning_rate": 3.1051127342013336e-05, "loss": 0.0652, "step": 10861 }, { "epoch": 20.674274845458868, "grad_norm": 0.03339073434472084, "learning_rate": 3.104477611940299e-05, "loss": 0.0281, "step": 10862 }, { "epoch": 20.676176890156917, "grad_norm": 0.04318920150399208, "learning_rate": 3.103842489679263e-05, "loss": 0.0523, "step": 10863 }, { "epoch": 20.67807893485497, "grad_norm": 0.21177279949188232, "learning_rate": 3.1032073674182285e-05, "loss": 0.0564, "step": 10864 }, { "epoch": 20.67998097955302, "grad_norm": 0.08779855817556381, "learning_rate": 3.102572245157193e-05, "loss": 0.0443, "step": 10865 }, { "epoch": 20.68188302425107, "grad_norm": 0.18260325491428375, "learning_rate": 3.1019371228961575e-05, "loss": 0.0574, "step": 10866 }, { "epoch": 20.68378506894912, "grad_norm": 0.18878425657749176, "learning_rate": 3.1013020006351227e-05, "loss": 0.067, "step": 10867 }, { "epoch": 20.68568711364717, "grad_norm": 0.27964478731155396, "learning_rate": 3.100666878374087e-05, "loss": 0.0713, "step": 10868 }, { "epoch": 20.687589158345222, "grad_norm": 0.08817431330680847, "learning_rate": 3.1000317561130524e-05, "loss": 0.0418, "step": 10869 }, { "epoch": 20.68949120304327, "grad_norm": 0.40673351287841797, "learning_rate": 3.099396633852016e-05, "loss": 0.0635, "step": 10870 }, { "epoch": 20.69139324774132, "grad_norm": 0.09810397773981094, "learning_rate": 3.0987615115909814e-05, "loss": 0.055, "step": 10871 }, { "epoch": 20.693295292439373, "grad_norm": 0.1463918834924698, "learning_rate": 3.098126389329946e-05, "loss": 0.0435, "step": 10872 }, { "epoch": 20.695197337137422, "grad_norm": 0.18395887315273285, "learning_rate": 3.097491267068911e-05, "loss": 0.0578, "step": 10873 }, { "epoch": 20.69709938183547, "grad_norm": 0.10693424940109253, "learning_rate": 3.0968561448078756e-05, "loss": 0.0653, "step": 10874 }, { "epoch": 20.699001426533524, "grad_norm": 0.1919068992137909, "learning_rate": 3.09622102254684e-05, "loss": 0.068, "step": 10875 }, { "epoch": 20.700903471231573, "grad_norm": 0.06700928509235382, "learning_rate": 3.095585900285805e-05, "loss": 0.0469, "step": 10876 }, { "epoch": 20.702805515929626, "grad_norm": 0.10424041748046875, "learning_rate": 3.09495077802477e-05, "loss": 0.0499, "step": 10877 }, { "epoch": 20.704707560627675, "grad_norm": 0.09712348133325577, "learning_rate": 3.094315655763735e-05, "loss": 0.0631, "step": 10878 }, { "epoch": 20.706609605325724, "grad_norm": 0.06249113380908966, "learning_rate": 3.0936805335026994e-05, "loss": 0.0542, "step": 10879 }, { "epoch": 20.708511650023777, "grad_norm": 0.1309369057416916, "learning_rate": 3.093045411241664e-05, "loss": 0.0502, "step": 10880 }, { "epoch": 20.710413694721826, "grad_norm": 0.09550520032644272, "learning_rate": 3.092410288980629e-05, "loss": 0.0572, "step": 10881 }, { "epoch": 20.712315739419875, "grad_norm": 0.055158138275146484, "learning_rate": 3.0917751667195936e-05, "loss": 0.0622, "step": 10882 }, { "epoch": 20.714217784117928, "grad_norm": 0.20897230505943298, "learning_rate": 3.091140044458559e-05, "loss": 0.0584, "step": 10883 }, { "epoch": 20.716119828815977, "grad_norm": 0.06125312298536301, "learning_rate": 3.0905049221975227e-05, "loss": 0.0517, "step": 10884 }, { "epoch": 20.718021873514026, "grad_norm": 0.09136564284563065, "learning_rate": 3.089869799936488e-05, "loss": 0.0502, "step": 10885 }, { "epoch": 20.71992391821208, "grad_norm": 0.06094088777899742, "learning_rate": 3.089234677675453e-05, "loss": 0.0575, "step": 10886 }, { "epoch": 20.721825962910128, "grad_norm": 0.12295346707105637, "learning_rate": 3.0885995554144175e-05, "loss": 0.0672, "step": 10887 }, { "epoch": 20.72372800760818, "grad_norm": 0.061890989542007446, "learning_rate": 3.087964433153382e-05, "loss": 0.0631, "step": 10888 }, { "epoch": 20.72563005230623, "grad_norm": 0.08851854503154755, "learning_rate": 3.0873293108923465e-05, "loss": 0.0553, "step": 10889 }, { "epoch": 20.72753209700428, "grad_norm": 0.13970603048801422, "learning_rate": 3.086694188631312e-05, "loss": 0.0537, "step": 10890 }, { "epoch": 20.72943414170233, "grad_norm": 0.13283051550388336, "learning_rate": 3.086059066370276e-05, "loss": 0.0567, "step": 10891 }, { "epoch": 20.73133618640038, "grad_norm": 0.12537896633148193, "learning_rate": 3.0854239441092414e-05, "loss": 0.0542, "step": 10892 }, { "epoch": 20.73323823109843, "grad_norm": 0.1433955729007721, "learning_rate": 3.084788821848206e-05, "loss": 0.0602, "step": 10893 }, { "epoch": 20.735140275796482, "grad_norm": 0.04828595370054245, "learning_rate": 3.0841536995871704e-05, "loss": 0.0453, "step": 10894 }, { "epoch": 20.73704232049453, "grad_norm": 0.08964021503925323, "learning_rate": 3.0835185773261356e-05, "loss": 0.04, "step": 10895 }, { "epoch": 20.73894436519258, "grad_norm": 0.06883792579174042, "learning_rate": 3.0828834550651e-05, "loss": 0.0591, "step": 10896 }, { "epoch": 20.740846409890633, "grad_norm": 0.06266488879919052, "learning_rate": 3.0822483328040646e-05, "loss": 0.0572, "step": 10897 }, { "epoch": 20.742748454588682, "grad_norm": 0.12018706649541855, "learning_rate": 3.08161321054303e-05, "loss": 0.0529, "step": 10898 }, { "epoch": 20.744650499286735, "grad_norm": 0.11263476312160492, "learning_rate": 3.080978088281994e-05, "loss": 0.0564, "step": 10899 }, { "epoch": 20.746552543984784, "grad_norm": 0.05064086988568306, "learning_rate": 3.0803429660209595e-05, "loss": 0.051, "step": 10900 }, { "epoch": 20.748454588682833, "grad_norm": 0.05158840864896774, "learning_rate": 3.079707843759924e-05, "loss": 0.0469, "step": 10901 }, { "epoch": 20.750356633380886, "grad_norm": 0.1185307428240776, "learning_rate": 3.0790727214988885e-05, "loss": 0.059, "step": 10902 }, { "epoch": 20.752258678078935, "grad_norm": 0.09362038969993591, "learning_rate": 3.078437599237853e-05, "loss": 0.0561, "step": 10903 }, { "epoch": 20.754160722776984, "grad_norm": 0.11035136878490448, "learning_rate": 3.077802476976818e-05, "loss": 0.0342, "step": 10904 }, { "epoch": 20.756062767475036, "grad_norm": 0.1329907476902008, "learning_rate": 3.0771673547157834e-05, "loss": 0.0597, "step": 10905 }, { "epoch": 20.757964812173086, "grad_norm": 0.17069882154464722, "learning_rate": 3.076532232454747e-05, "loss": 0.038, "step": 10906 }, { "epoch": 20.759866856871135, "grad_norm": 0.0572471059858799, "learning_rate": 3.0758971101937124e-05, "loss": 0.0602, "step": 10907 }, { "epoch": 20.761768901569187, "grad_norm": 0.0768604725599289, "learning_rate": 3.075261987932677e-05, "loss": 0.0556, "step": 10908 }, { "epoch": 20.763670946267236, "grad_norm": 0.09952004253864288, "learning_rate": 3.074626865671642e-05, "loss": 0.0541, "step": 10909 }, { "epoch": 20.76557299096529, "grad_norm": 0.07463855296373367, "learning_rate": 3.0739917434106066e-05, "loss": 0.0424, "step": 10910 }, { "epoch": 20.767475035663338, "grad_norm": 0.04839564859867096, "learning_rate": 3.073356621149571e-05, "loss": 0.0537, "step": 10911 }, { "epoch": 20.769377080361387, "grad_norm": 0.13054612278938293, "learning_rate": 3.072721498888536e-05, "loss": 0.0589, "step": 10912 }, { "epoch": 20.77127912505944, "grad_norm": 0.05364089086651802, "learning_rate": 3.072086376627501e-05, "loss": 0.0448, "step": 10913 }, { "epoch": 20.77318116975749, "grad_norm": 0.05852840840816498, "learning_rate": 3.071451254366466e-05, "loss": 0.0526, "step": 10914 }, { "epoch": 20.775083214455538, "grad_norm": 0.08128932118415833, "learning_rate": 3.0708161321054305e-05, "loss": 0.0709, "step": 10915 }, { "epoch": 20.77698525915359, "grad_norm": 0.057719286531209946, "learning_rate": 3.070181009844395e-05, "loss": 0.0422, "step": 10916 }, { "epoch": 20.77888730385164, "grad_norm": 0.16810031235218048, "learning_rate": 3.06954588758336e-05, "loss": 0.0791, "step": 10917 }, { "epoch": 20.780789348549693, "grad_norm": 0.11165507137775421, "learning_rate": 3.068910765322325e-05, "loss": 0.0465, "step": 10918 }, { "epoch": 20.78269139324774, "grad_norm": 0.09537408500909805, "learning_rate": 3.06827564306129e-05, "loss": 0.0683, "step": 10919 }, { "epoch": 20.78459343794579, "grad_norm": 0.039117906242609024, "learning_rate": 3.067640520800254e-05, "loss": 0.0376, "step": 10920 }, { "epoch": 20.786495482643844, "grad_norm": 0.05061280354857445, "learning_rate": 3.067005398539219e-05, "loss": 0.0524, "step": 10921 }, { "epoch": 20.788397527341893, "grad_norm": 0.19125020503997803, "learning_rate": 3.066370276278184e-05, "loss": 0.0582, "step": 10922 }, { "epoch": 20.79029957203994, "grad_norm": 0.07217196375131607, "learning_rate": 3.0657351540171486e-05, "loss": 0.0569, "step": 10923 }, { "epoch": 20.792201616737994, "grad_norm": 0.04245905950665474, "learning_rate": 3.065100031756113e-05, "loss": 0.0615, "step": 10924 }, { "epoch": 20.794103661436043, "grad_norm": 0.12768147885799408, "learning_rate": 3.0644649094950776e-05, "loss": 0.0597, "step": 10925 }, { "epoch": 20.796005706134093, "grad_norm": 0.21361514925956726, "learning_rate": 3.063829787234043e-05, "loss": 0.0492, "step": 10926 }, { "epoch": 20.797907750832145, "grad_norm": 0.07605758309364319, "learning_rate": 3.063194664973007e-05, "loss": 0.0569, "step": 10927 }, { "epoch": 20.799809795530194, "grad_norm": 0.2108287811279297, "learning_rate": 3.0625595427119725e-05, "loss": 0.0536, "step": 10928 }, { "epoch": 20.801711840228247, "grad_norm": 0.08548277616500854, "learning_rate": 3.061924420450937e-05, "loss": 0.047, "step": 10929 }, { "epoch": 20.803613884926296, "grad_norm": 0.14243417978286743, "learning_rate": 3.0612892981899015e-05, "loss": 0.0505, "step": 10930 }, { "epoch": 20.805515929624345, "grad_norm": 0.12442498654127121, "learning_rate": 3.060654175928867e-05, "loss": 0.0621, "step": 10931 }, { "epoch": 20.807417974322398, "grad_norm": 0.11285270750522614, "learning_rate": 3.060019053667831e-05, "loss": 0.0724, "step": 10932 }, { "epoch": 20.809320019020447, "grad_norm": 0.1174018383026123, "learning_rate": 3.059383931406796e-05, "loss": 0.0671, "step": 10933 }, { "epoch": 20.811222063718496, "grad_norm": 0.048765238374471664, "learning_rate": 3.058748809145761e-05, "loss": 0.0535, "step": 10934 }, { "epoch": 20.81312410841655, "grad_norm": 0.11892572790384293, "learning_rate": 3.0581136868847254e-05, "loss": 0.0509, "step": 10935 }, { "epoch": 20.815026153114598, "grad_norm": 0.11556268483400345, "learning_rate": 3.0574785646236906e-05, "loss": 0.056, "step": 10936 }, { "epoch": 20.816928197812647, "grad_norm": 0.12910594046115875, "learning_rate": 3.056843442362655e-05, "loss": 0.0657, "step": 10937 }, { "epoch": 20.8188302425107, "grad_norm": 0.17326128482818604, "learning_rate": 3.0562083201016196e-05, "loss": 0.0569, "step": 10938 }, { "epoch": 20.82073228720875, "grad_norm": 0.16716662049293518, "learning_rate": 3.055573197840584e-05, "loss": 0.0531, "step": 10939 }, { "epoch": 20.8226343319068, "grad_norm": 0.06517747044563293, "learning_rate": 3.054938075579549e-05, "loss": 0.0406, "step": 10940 }, { "epoch": 20.82453637660485, "grad_norm": 0.07059972733259201, "learning_rate": 3.0543029533185144e-05, "loss": 0.0558, "step": 10941 }, { "epoch": 20.8264384213029, "grad_norm": 0.05612773075699806, "learning_rate": 3.053667831057478e-05, "loss": 0.0493, "step": 10942 }, { "epoch": 20.828340466000952, "grad_norm": 0.169120654463768, "learning_rate": 3.0530327087964435e-05, "loss": 0.056, "step": 10943 }, { "epoch": 20.830242510699, "grad_norm": 0.07485458999872208, "learning_rate": 3.052397586535408e-05, "loss": 0.0534, "step": 10944 }, { "epoch": 20.83214455539705, "grad_norm": 0.08926668763160706, "learning_rate": 3.051762464274373e-05, "loss": 0.049, "step": 10945 }, { "epoch": 20.834046600095103, "grad_norm": 0.12054421752691269, "learning_rate": 3.051127342013338e-05, "loss": 0.0455, "step": 10946 }, { "epoch": 20.835948644793152, "grad_norm": 0.04459091275930405, "learning_rate": 3.050492219752302e-05, "loss": 0.0416, "step": 10947 }, { "epoch": 20.8378506894912, "grad_norm": 0.057703930884599686, "learning_rate": 3.049857097491267e-05, "loss": 0.07, "step": 10948 }, { "epoch": 20.839752734189254, "grad_norm": 0.13715192675590515, "learning_rate": 3.049221975230232e-05, "loss": 0.0488, "step": 10949 }, { "epoch": 20.841654778887303, "grad_norm": 0.0581369549036026, "learning_rate": 3.048586852969197e-05, "loss": 0.0609, "step": 10950 }, { "epoch": 20.843556823585356, "grad_norm": 0.2517063319683075, "learning_rate": 3.0479517307081612e-05, "loss": 0.0628, "step": 10951 }, { "epoch": 20.845458868283405, "grad_norm": 0.05997535586357117, "learning_rate": 3.047316608447126e-05, "loss": 0.0546, "step": 10952 }, { "epoch": 20.847360912981454, "grad_norm": 0.09997560828924179, "learning_rate": 3.046681486186091e-05, "loss": 0.0532, "step": 10953 }, { "epoch": 20.849262957679507, "grad_norm": 0.15614329278469086, "learning_rate": 3.0460463639250557e-05, "loss": 0.0592, "step": 10954 }, { "epoch": 20.851165002377556, "grad_norm": 0.09338696300983429, "learning_rate": 3.0454112416640206e-05, "loss": 0.0468, "step": 10955 }, { "epoch": 20.853067047075605, "grad_norm": 0.04714600369334221, "learning_rate": 3.044776119402985e-05, "loss": 0.0514, "step": 10956 }, { "epoch": 20.854969091773658, "grad_norm": 0.12118776142597198, "learning_rate": 3.04414099714195e-05, "loss": 0.06, "step": 10957 }, { "epoch": 20.856871136471707, "grad_norm": 0.053253013640642166, "learning_rate": 3.0435058748809148e-05, "loss": 0.0443, "step": 10958 }, { "epoch": 20.858773181169756, "grad_norm": 0.08597280830144882, "learning_rate": 3.0428707526198796e-05, "loss": 0.049, "step": 10959 }, { "epoch": 20.86067522586781, "grad_norm": 0.045080091804265976, "learning_rate": 3.0422356303588438e-05, "loss": 0.0498, "step": 10960 }, { "epoch": 20.862577270565858, "grad_norm": 0.09501554816961288, "learning_rate": 3.041600508097809e-05, "loss": 0.0977, "step": 10961 }, { "epoch": 20.86447931526391, "grad_norm": 0.13913887739181519, "learning_rate": 3.0409653858367738e-05, "loss": 0.0466, "step": 10962 }, { "epoch": 20.86638135996196, "grad_norm": 0.1571308970451355, "learning_rate": 3.0403302635757387e-05, "loss": 0.0637, "step": 10963 }, { "epoch": 20.86828340466001, "grad_norm": 0.10953457653522491, "learning_rate": 3.0396951413147035e-05, "loss": 0.0535, "step": 10964 }, { "epoch": 20.87018544935806, "grad_norm": 0.08388474583625793, "learning_rate": 3.0390600190536677e-05, "loss": 0.0448, "step": 10965 }, { "epoch": 20.87208749405611, "grad_norm": 0.07518678158521652, "learning_rate": 3.0384248967926325e-05, "loss": 0.0478, "step": 10966 }, { "epoch": 20.87398953875416, "grad_norm": 0.1467273086309433, "learning_rate": 3.0377897745315974e-05, "loss": 0.0452, "step": 10967 }, { "epoch": 20.875891583452212, "grad_norm": 0.05250808969140053, "learning_rate": 3.0371546522705626e-05, "loss": 0.0577, "step": 10968 }, { "epoch": 20.87779362815026, "grad_norm": 0.07057197391986847, "learning_rate": 3.0365195300095267e-05, "loss": 0.049, "step": 10969 }, { "epoch": 20.87969567284831, "grad_norm": 0.0900682657957077, "learning_rate": 3.0358844077484916e-05, "loss": 0.0478, "step": 10970 }, { "epoch": 20.881597717546363, "grad_norm": 0.16264529526233673, "learning_rate": 3.0352492854874564e-05, "loss": 0.062, "step": 10971 }, { "epoch": 20.883499762244412, "grad_norm": 0.10332495719194412, "learning_rate": 3.0346141632264213e-05, "loss": 0.065, "step": 10972 }, { "epoch": 20.885401806942465, "grad_norm": 0.04506974667310715, "learning_rate": 3.033979040965386e-05, "loss": 0.046, "step": 10973 }, { "epoch": 20.887303851640514, "grad_norm": 0.1779114454984665, "learning_rate": 3.0333439187043506e-05, "loss": 0.0544, "step": 10974 }, { "epoch": 20.889205896338563, "grad_norm": 0.07354097068309784, "learning_rate": 3.0327087964433155e-05, "loss": 0.061, "step": 10975 }, { "epoch": 20.891107941036616, "grad_norm": 0.0837244987487793, "learning_rate": 3.0320736741822803e-05, "loss": 0.0682, "step": 10976 }, { "epoch": 20.893009985734665, "grad_norm": 0.03713522478938103, "learning_rate": 3.031438551921245e-05, "loss": 0.0564, "step": 10977 }, { "epoch": 20.894912030432714, "grad_norm": 0.09520062804222107, "learning_rate": 3.0308034296602093e-05, "loss": 0.0504, "step": 10978 }, { "epoch": 20.896814075130766, "grad_norm": 0.10911743342876434, "learning_rate": 3.0301683073991745e-05, "loss": 0.0537, "step": 10979 }, { "epoch": 20.898716119828816, "grad_norm": 0.07108257710933685, "learning_rate": 3.0295331851381393e-05, "loss": 0.0442, "step": 10980 }, { "epoch": 20.900618164526865, "grad_norm": 0.12098158895969391, "learning_rate": 3.0288980628771042e-05, "loss": 0.0633, "step": 10981 }, { "epoch": 20.902520209224917, "grad_norm": 0.06423678994178772, "learning_rate": 3.028262940616069e-05, "loss": 0.0583, "step": 10982 }, { "epoch": 20.904422253922966, "grad_norm": 0.20237389206886292, "learning_rate": 3.0276278183550332e-05, "loss": 0.0751, "step": 10983 }, { "epoch": 20.90632429862102, "grad_norm": 0.24858830869197845, "learning_rate": 3.026992696093998e-05, "loss": 0.0766, "step": 10984 }, { "epoch": 20.90822634331907, "grad_norm": 0.03466055914759636, "learning_rate": 3.026357573832963e-05, "loss": 0.0486, "step": 10985 }, { "epoch": 20.910128388017117, "grad_norm": 0.12383092939853668, "learning_rate": 3.025722451571928e-05, "loss": 0.046, "step": 10986 }, { "epoch": 20.91203043271517, "grad_norm": 0.06242097169160843, "learning_rate": 3.0250873293108922e-05, "loss": 0.0532, "step": 10987 }, { "epoch": 20.91393247741322, "grad_norm": 0.09692638367414474, "learning_rate": 3.024452207049857e-05, "loss": 0.0585, "step": 10988 }, { "epoch": 20.91583452211127, "grad_norm": 0.10633906722068787, "learning_rate": 3.023817084788822e-05, "loss": 0.0508, "step": 10989 }, { "epoch": 20.91773656680932, "grad_norm": 0.08840779960155487, "learning_rate": 3.0231819625277868e-05, "loss": 0.0547, "step": 10990 }, { "epoch": 20.91963861150737, "grad_norm": 0.08700352907180786, "learning_rate": 3.0225468402667516e-05, "loss": 0.0617, "step": 10991 }, { "epoch": 20.92154065620542, "grad_norm": 0.08046238124370575, "learning_rate": 3.021911718005716e-05, "loss": 0.0522, "step": 10992 }, { "epoch": 20.923442700903472, "grad_norm": 0.17197772860527039, "learning_rate": 3.021276595744681e-05, "loss": 0.0668, "step": 10993 }, { "epoch": 20.92534474560152, "grad_norm": 0.15205800533294678, "learning_rate": 3.0206414734836458e-05, "loss": 0.0748, "step": 10994 }, { "epoch": 20.927246790299574, "grad_norm": 0.11835450679063797, "learning_rate": 3.0200063512226107e-05, "loss": 0.0563, "step": 10995 }, { "epoch": 20.929148834997623, "grad_norm": 0.09660196304321289, "learning_rate": 3.019371228961575e-05, "loss": 0.0703, "step": 10996 }, { "epoch": 20.931050879695672, "grad_norm": 0.04566892236471176, "learning_rate": 3.01873610670054e-05, "loss": 0.0536, "step": 10997 }, { "epoch": 20.932952924393724, "grad_norm": 0.11894775927066803, "learning_rate": 3.018100984439505e-05, "loss": 0.0624, "step": 10998 }, { "epoch": 20.934854969091774, "grad_norm": 0.05920516699552536, "learning_rate": 3.0174658621784697e-05, "loss": 0.0709, "step": 10999 }, { "epoch": 20.936757013789823, "grad_norm": 0.037830401211977005, "learning_rate": 3.0168307399174346e-05, "loss": 0.0576, "step": 11000 }, { "epoch": 20.938659058487875, "grad_norm": 0.29501423239707947, "learning_rate": 3.0161956176563987e-05, "loss": 0.0718, "step": 11001 }, { "epoch": 20.940561103185924, "grad_norm": 0.17233391106128693, "learning_rate": 3.0155604953953636e-05, "loss": 0.0622, "step": 11002 }, { "epoch": 20.942463147883974, "grad_norm": 0.10794918239116669, "learning_rate": 3.0149253731343284e-05, "loss": 0.0566, "step": 11003 }, { "epoch": 20.944365192582026, "grad_norm": 0.08928713202476501, "learning_rate": 3.0142902508732933e-05, "loss": 0.0667, "step": 11004 }, { "epoch": 20.946267237280075, "grad_norm": 0.11571650952100754, "learning_rate": 3.0136551286122578e-05, "loss": 0.048, "step": 11005 }, { "epoch": 20.948169281978128, "grad_norm": 0.09128861874341965, "learning_rate": 3.0130200063512226e-05, "loss": 0.0675, "step": 11006 }, { "epoch": 20.950071326676177, "grad_norm": 0.06981503218412399, "learning_rate": 3.0123848840901875e-05, "loss": 0.0471, "step": 11007 }, { "epoch": 20.951973371374226, "grad_norm": 0.0702807605266571, "learning_rate": 3.0117497618291523e-05, "loss": 0.0706, "step": 11008 }, { "epoch": 20.95387541607228, "grad_norm": 0.07049190998077393, "learning_rate": 3.011114639568117e-05, "loss": 0.0763, "step": 11009 }, { "epoch": 20.955777460770328, "grad_norm": 0.07275652885437012, "learning_rate": 3.0104795173070817e-05, "loss": 0.0522, "step": 11010 }, { "epoch": 20.957679505468377, "grad_norm": 0.10747750848531723, "learning_rate": 3.0098443950460465e-05, "loss": 0.0555, "step": 11011 }, { "epoch": 20.95958155016643, "grad_norm": 0.20730341970920563, "learning_rate": 3.0092092727850113e-05, "loss": 0.0609, "step": 11012 }, { "epoch": 20.96148359486448, "grad_norm": 0.1200687512755394, "learning_rate": 3.0085741505239762e-05, "loss": 0.0621, "step": 11013 }, { "epoch": 20.96338563956253, "grad_norm": 0.04054923355579376, "learning_rate": 3.0079390282629404e-05, "loss": 0.0502, "step": 11014 }, { "epoch": 20.96528768426058, "grad_norm": 0.08221112191677094, "learning_rate": 3.0073039060019052e-05, "loss": 0.0436, "step": 11015 }, { "epoch": 20.96718972895863, "grad_norm": 0.134440079331398, "learning_rate": 3.0066687837408704e-05, "loss": 0.0751, "step": 11016 }, { "epoch": 20.969091773656682, "grad_norm": 0.07171544432640076, "learning_rate": 3.0060336614798352e-05, "loss": 0.0511, "step": 11017 }, { "epoch": 20.97099381835473, "grad_norm": 0.07600289583206177, "learning_rate": 3.0053985392188e-05, "loss": 0.0471, "step": 11018 }, { "epoch": 20.97289586305278, "grad_norm": 0.06850898265838623, "learning_rate": 3.0047634169577642e-05, "loss": 0.0529, "step": 11019 }, { "epoch": 20.974797907750833, "grad_norm": 0.19148507714271545, "learning_rate": 3.004128294696729e-05, "loss": 0.0604, "step": 11020 }, { "epoch": 20.976699952448882, "grad_norm": 0.06374479085206985, "learning_rate": 3.003493172435694e-05, "loss": 0.0543, "step": 11021 }, { "epoch": 20.97860199714693, "grad_norm": 0.16483956575393677, "learning_rate": 3.0028580501746588e-05, "loss": 0.0646, "step": 11022 }, { "epoch": 20.980504041844984, "grad_norm": 0.2773119807243347, "learning_rate": 3.0022229279136233e-05, "loss": 0.0552, "step": 11023 }, { "epoch": 20.982406086543033, "grad_norm": 0.03720984607934952, "learning_rate": 3.001587805652588e-05, "loss": 0.0346, "step": 11024 }, { "epoch": 20.984308131241086, "grad_norm": 0.05585749074816704, "learning_rate": 3.000952683391553e-05, "loss": 0.0461, "step": 11025 } ], "logging_steps": 1, "max_steps": 15750, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 525, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.8240761081897124e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }