{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 80200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0024937655860349127, "grad_norm": NaN, "learning_rate": 1.9998753117206985e-05, "loss": 7.3204, "step": 10 }, { "epoch": 0.004987531172069825, "grad_norm": 30.45345687866211, "learning_rate": 1.999625935162095e-05, "loss": 6.3759, "step": 20 }, { "epoch": 0.007481296758104738, "grad_norm": 52.34556579589844, "learning_rate": 1.9993765586034916e-05, "loss": 5.2081, "step": 30 }, { "epoch": 0.00997506234413965, "grad_norm": 33.23439025878906, "learning_rate": 1.999127182044888e-05, "loss": 3.7536, "step": 40 }, { "epoch": 0.012468827930174564, "grad_norm": 19.338409423828125, "learning_rate": 1.9988778054862846e-05, "loss": 2.4798, "step": 50 }, { "epoch": 0.014962593516209476, "grad_norm": 5.575465679168701, "learning_rate": 1.998628428927681e-05, "loss": 1.4935, "step": 60 }, { "epoch": 0.017456359102244388, "grad_norm": 1.394232153892517, "learning_rate": 1.9983790523690774e-05, "loss": 1.0447, "step": 70 }, { "epoch": 0.0199501246882793, "grad_norm": 2.6528663635253906, "learning_rate": 1.998129675810474e-05, "loss": 1.1943, "step": 80 }, { "epoch": 0.022443890274314215, "grad_norm": 2.9787819385528564, "learning_rate": 1.9978802992518704e-05, "loss": 1.2081, "step": 90 }, { "epoch": 0.02493765586034913, "grad_norm": 1.774791955947876, "learning_rate": 1.997630922693267e-05, "loss": 0.9248, "step": 100 }, { "epoch": 0.02743142144638404, "grad_norm": 1.5466320514678955, "learning_rate": 1.9973815461346635e-05, "loss": 1.0454, "step": 110 }, { "epoch": 0.029925187032418952, "grad_norm": 2.4883105754852295, "learning_rate": 1.99713216957606e-05, "loss": 1.1251, "step": 120 }, { "epoch": 0.032418952618453865, "grad_norm": 2.3588509559631348, "learning_rate": 1.9968827930174565e-05, "loss": 1.178, "step": 130 }, { "epoch": 0.034912718204488775, "grad_norm": 2.097666025161743, "learning_rate": 1.996633416458853e-05, "loss": 0.937, "step": 140 }, { "epoch": 0.03740648379052369, "grad_norm": 2.220046043395996, "learning_rate": 1.9963840399002496e-05, "loss": 0.9336, "step": 150 }, { "epoch": 0.0399002493765586, "grad_norm": 1.8168127536773682, "learning_rate": 1.996134663341646e-05, "loss": 0.8292, "step": 160 }, { "epoch": 0.04239401496259352, "grad_norm": 2.687063217163086, "learning_rate": 1.9958852867830423e-05, "loss": 0.9569, "step": 170 }, { "epoch": 0.04488778054862843, "grad_norm": 2.103942632675171, "learning_rate": 1.995635910224439e-05, "loss": 0.7538, "step": 180 }, { "epoch": 0.04738154613466334, "grad_norm": 2.371488571166992, "learning_rate": 1.9953865336658357e-05, "loss": 0.8276, "step": 190 }, { "epoch": 0.04987531172069826, "grad_norm": 3.574516534805298, "learning_rate": 1.995137157107232e-05, "loss": 1.021, "step": 200 }, { "epoch": 0.05236907730673317, "grad_norm": 3.9193525314331055, "learning_rate": 1.9948877805486288e-05, "loss": 0.8487, "step": 210 }, { "epoch": 0.05486284289276808, "grad_norm": 2.934713363647461, "learning_rate": 1.994638403990025e-05, "loss": 0.7093, "step": 220 }, { "epoch": 0.057356608478802994, "grad_norm": 2.5535857677459717, "learning_rate": 1.994389027431422e-05, "loss": 0.8764, "step": 230 }, { "epoch": 0.059850374064837904, "grad_norm": 2.949345827102661, "learning_rate": 1.9941396508728182e-05, "loss": 0.8242, "step": 240 }, { "epoch": 0.06234413965087282, "grad_norm": 2.376368522644043, "learning_rate": 1.9938902743142146e-05, "loss": 0.8442, "step": 250 }, { "epoch": 0.06483790523690773, "grad_norm": 5.437434673309326, "learning_rate": 1.9936408977556113e-05, "loss": 1.0203, "step": 260 }, { "epoch": 0.06733167082294264, "grad_norm": 2.630307674407959, "learning_rate": 1.9933915211970076e-05, "loss": 0.6734, "step": 270 }, { "epoch": 0.06982543640897755, "grad_norm": 4.224475860595703, "learning_rate": 1.993142144638404e-05, "loss": 0.9501, "step": 280 }, { "epoch": 0.07231920199501247, "grad_norm": 4.720646858215332, "learning_rate": 1.9928927680798007e-05, "loss": 0.6527, "step": 290 }, { "epoch": 0.07481296758104738, "grad_norm": 4.588720321655273, "learning_rate": 1.992643391521197e-05, "loss": 0.7645, "step": 300 }, { "epoch": 0.0773067331670823, "grad_norm": 2.956965923309326, "learning_rate": 1.9923940149625938e-05, "loss": 0.8471, "step": 310 }, { "epoch": 0.0798004987531172, "grad_norm": 2.415419816970825, "learning_rate": 1.99214463840399e-05, "loss": 0.8739, "step": 320 }, { "epoch": 0.08229426433915212, "grad_norm": 4.1891188621521, "learning_rate": 1.9918952618453865e-05, "loss": 0.8344, "step": 330 }, { "epoch": 0.08478802992518704, "grad_norm": 4.513364315032959, "learning_rate": 1.9916458852867832e-05, "loss": 0.7266, "step": 340 }, { "epoch": 0.08728179551122195, "grad_norm": 3.10957670211792, "learning_rate": 1.9913965087281796e-05, "loss": 0.8771, "step": 350 }, { "epoch": 0.08977556109725686, "grad_norm": 2.839132070541382, "learning_rate": 1.9911471321695763e-05, "loss": 0.5522, "step": 360 }, { "epoch": 0.09226932668329177, "grad_norm": 7.818932056427002, "learning_rate": 1.9908977556109726e-05, "loss": 0.7509, "step": 370 }, { "epoch": 0.09476309226932668, "grad_norm": 3.391204357147217, "learning_rate": 1.9906483790523693e-05, "loss": 0.8433, "step": 380 }, { "epoch": 0.09725685785536159, "grad_norm": 5.5061726570129395, "learning_rate": 1.9903990024937657e-05, "loss": 1.0054, "step": 390 }, { "epoch": 0.09975062344139651, "grad_norm": 3.288151979446411, "learning_rate": 1.9901496259351624e-05, "loss": 0.563, "step": 400 }, { "epoch": 0.10224438902743142, "grad_norm": 2.599865674972534, "learning_rate": 1.9899002493765587e-05, "loss": 0.5838, "step": 410 }, { "epoch": 0.10473815461346633, "grad_norm": 10.21823787689209, "learning_rate": 1.9896508728179554e-05, "loss": 0.8896, "step": 420 }, { "epoch": 0.10723192019950124, "grad_norm": 3.7210872173309326, "learning_rate": 1.9894014962593518e-05, "loss": 0.7539, "step": 430 }, { "epoch": 0.10972568578553615, "grad_norm": 3.3652985095977783, "learning_rate": 1.9891521197007485e-05, "loss": 0.6975, "step": 440 }, { "epoch": 0.11221945137157108, "grad_norm": 4.095602512359619, "learning_rate": 1.988902743142145e-05, "loss": 0.7403, "step": 450 }, { "epoch": 0.11471321695760599, "grad_norm": 4.6530561447143555, "learning_rate": 1.9886533665835412e-05, "loss": 0.6642, "step": 460 }, { "epoch": 0.1172069825436409, "grad_norm": 3.8766672611236572, "learning_rate": 1.988403990024938e-05, "loss": 0.6265, "step": 470 }, { "epoch": 0.11970074812967581, "grad_norm": 6.619718551635742, "learning_rate": 1.9881546134663343e-05, "loss": 0.6444, "step": 480 }, { "epoch": 0.12219451371571072, "grad_norm": 2.7176246643066406, "learning_rate": 1.9879052369077306e-05, "loss": 0.6325, "step": 490 }, { "epoch": 0.12468827930174564, "grad_norm": 4.887167930603027, "learning_rate": 1.9876558603491273e-05, "loss": 0.5515, "step": 500 }, { "epoch": 0.12718204488778054, "grad_norm": 3.827092170715332, "learning_rate": 1.9874064837905237e-05, "loss": 0.8736, "step": 510 }, { "epoch": 0.12967581047381546, "grad_norm": 3.9637207984924316, "learning_rate": 1.9871571072319204e-05, "loss": 0.6267, "step": 520 }, { "epoch": 0.13216957605985039, "grad_norm": 4.160803318023682, "learning_rate": 1.9869077306733168e-05, "loss": 0.6462, "step": 530 }, { "epoch": 0.13466334164588528, "grad_norm": 4.465733528137207, "learning_rate": 1.9866583541147135e-05, "loss": 0.488, "step": 540 }, { "epoch": 0.1371571072319202, "grad_norm": 3.197251081466675, "learning_rate": 1.98640897755611e-05, "loss": 0.7185, "step": 550 }, { "epoch": 0.1396508728179551, "grad_norm": 3.279279947280884, "learning_rate": 1.9861596009975065e-05, "loss": 0.599, "step": 560 }, { "epoch": 0.14214463840399003, "grad_norm": 7.450759410858154, "learning_rate": 1.985910224438903e-05, "loss": 0.7233, "step": 570 }, { "epoch": 0.14463840399002495, "grad_norm": 4.170207500457764, "learning_rate": 1.9856608478802996e-05, "loss": 0.5774, "step": 580 }, { "epoch": 0.14713216957605985, "grad_norm": 9.313362121582031, "learning_rate": 1.985411471321696e-05, "loss": 0.8636, "step": 590 }, { "epoch": 0.14962593516209477, "grad_norm": 4.21543550491333, "learning_rate": 1.9851620947630927e-05, "loss": 0.648, "step": 600 }, { "epoch": 0.15211970074812967, "grad_norm": 7.928882122039795, "learning_rate": 1.984912718204489e-05, "loss": 0.6985, "step": 610 }, { "epoch": 0.1546134663341646, "grad_norm": 3.2070388793945312, "learning_rate": 1.9846633416458854e-05, "loss": 0.5127, "step": 620 }, { "epoch": 0.1571072319201995, "grad_norm": 4.8116278648376465, "learning_rate": 1.984413965087282e-05, "loss": 0.6521, "step": 630 }, { "epoch": 0.1596009975062344, "grad_norm": 6.319545269012451, "learning_rate": 1.9841645885286784e-05, "loss": 0.5693, "step": 640 }, { "epoch": 0.16209476309226933, "grad_norm": 3.7397208213806152, "learning_rate": 1.983915211970075e-05, "loss": 0.6335, "step": 650 }, { "epoch": 0.16458852867830423, "grad_norm": 4.853140830993652, "learning_rate": 1.9836658354114715e-05, "loss": 0.7663, "step": 660 }, { "epoch": 0.16708229426433915, "grad_norm": 3.868441104888916, "learning_rate": 1.983416458852868e-05, "loss": 0.6261, "step": 670 }, { "epoch": 0.16957605985037408, "grad_norm": 5.102773189544678, "learning_rate": 1.9831670822942646e-05, "loss": 0.7153, "step": 680 }, { "epoch": 0.17206982543640897, "grad_norm": 4.242384433746338, "learning_rate": 1.982917705735661e-05, "loss": 0.6275, "step": 690 }, { "epoch": 0.1745635910224439, "grad_norm": 3.3436532020568848, "learning_rate": 1.9826683291770573e-05, "loss": 0.5709, "step": 700 }, { "epoch": 0.1770573566084788, "grad_norm": 5.803040027618408, "learning_rate": 1.982418952618454e-05, "loss": 0.5584, "step": 710 }, { "epoch": 0.17955112219451372, "grad_norm": 5.347147464752197, "learning_rate": 1.9821695760598503e-05, "loss": 0.5785, "step": 720 }, { "epoch": 0.18204488778054864, "grad_norm": 4.12513542175293, "learning_rate": 1.981920199501247e-05, "loss": 0.6152, "step": 730 }, { "epoch": 0.18453865336658354, "grad_norm": 5.8045220375061035, "learning_rate": 1.9816708229426434e-05, "loss": 0.6319, "step": 740 }, { "epoch": 0.18703241895261846, "grad_norm": 8.319589614868164, "learning_rate": 1.98142144638404e-05, "loss": 0.5286, "step": 750 }, { "epoch": 0.18952618453865336, "grad_norm": 3.427260637283325, "learning_rate": 1.9811720698254365e-05, "loss": 0.6325, "step": 760 }, { "epoch": 0.19201995012468828, "grad_norm": 12.969819068908691, "learning_rate": 1.9809226932668332e-05, "loss": 0.6051, "step": 770 }, { "epoch": 0.19451371571072318, "grad_norm": 4.117243766784668, "learning_rate": 1.9806733167082295e-05, "loss": 0.7746, "step": 780 }, { "epoch": 0.1970074812967581, "grad_norm": 8.8555326461792, "learning_rate": 1.9804239401496262e-05, "loss": 0.6756, "step": 790 }, { "epoch": 0.19950124688279303, "grad_norm": 6.0134596824646, "learning_rate": 1.9801745635910226e-05, "loss": 0.479, "step": 800 }, { "epoch": 0.20199501246882792, "grad_norm": 3.967231273651123, "learning_rate": 1.9799251870324193e-05, "loss": 0.5358, "step": 810 }, { "epoch": 0.20448877805486285, "grad_norm": 3.7455625534057617, "learning_rate": 1.9796758104738157e-05, "loss": 0.5306, "step": 820 }, { "epoch": 0.20698254364089774, "grad_norm": 4.493707656860352, "learning_rate": 1.979426433915212e-05, "loss": 0.6243, "step": 830 }, { "epoch": 0.20947630922693267, "grad_norm": 4.1963653564453125, "learning_rate": 1.9791770573566087e-05, "loss": 0.6031, "step": 840 }, { "epoch": 0.2119700748129676, "grad_norm": 3.4451446533203125, "learning_rate": 1.978927680798005e-05, "loss": 0.5625, "step": 850 }, { "epoch": 0.2144638403990025, "grad_norm": 10.554472923278809, "learning_rate": 1.9786783042394014e-05, "loss": 0.7107, "step": 860 }, { "epoch": 0.2169576059850374, "grad_norm": 6.821500778198242, "learning_rate": 1.978428927680798e-05, "loss": 0.5901, "step": 870 }, { "epoch": 0.2194513715710723, "grad_norm": 8.539198875427246, "learning_rate": 1.9781795511221945e-05, "loss": 0.6157, "step": 880 }, { "epoch": 0.22194513715710723, "grad_norm": 5.176815032958984, "learning_rate": 1.9779301745635912e-05, "loss": 0.5484, "step": 890 }, { "epoch": 0.22443890274314215, "grad_norm": 7.525144577026367, "learning_rate": 1.9776807980049876e-05, "loss": 0.6059, "step": 900 }, { "epoch": 0.22693266832917705, "grad_norm": 6.711145401000977, "learning_rate": 1.9774314214463843e-05, "loss": 0.6239, "step": 910 }, { "epoch": 0.22942643391521197, "grad_norm": 4.520840644836426, "learning_rate": 1.9771820448877806e-05, "loss": 0.5336, "step": 920 }, { "epoch": 0.23192019950124687, "grad_norm": 11.031719207763672, "learning_rate": 1.9769326683291773e-05, "loss": 0.5649, "step": 930 }, { "epoch": 0.2344139650872818, "grad_norm": 7.578179836273193, "learning_rate": 1.9766832917705737e-05, "loss": 0.6233, "step": 940 }, { "epoch": 0.23690773067331672, "grad_norm": 4.446279048919678, "learning_rate": 1.9764339152119704e-05, "loss": 0.6529, "step": 950 }, { "epoch": 0.23940149625935161, "grad_norm": 5.39076042175293, "learning_rate": 1.9761845386533668e-05, "loss": 0.6355, "step": 960 }, { "epoch": 0.24189526184538654, "grad_norm": 5.869710445404053, "learning_rate": 1.9759351620947635e-05, "loss": 0.6045, "step": 970 }, { "epoch": 0.24438902743142144, "grad_norm": 4.218006610870361, "learning_rate": 1.9756857855361598e-05, "loss": 0.5569, "step": 980 }, { "epoch": 0.24688279301745636, "grad_norm": 4.749696731567383, "learning_rate": 1.9754364089775562e-05, "loss": 0.5298, "step": 990 }, { "epoch": 0.24937655860349128, "grad_norm": 7.573767185211182, "learning_rate": 1.975187032418953e-05, "loss": 0.5561, "step": 1000 }, { "epoch": 0.2518703241895262, "grad_norm": 5.48563289642334, "learning_rate": 1.9749376558603492e-05, "loss": 0.5115, "step": 1010 }, { "epoch": 0.2543640897755611, "grad_norm": 6.635676383972168, "learning_rate": 1.974688279301746e-05, "loss": 0.6161, "step": 1020 }, { "epoch": 0.256857855361596, "grad_norm": 4.294478893280029, "learning_rate": 1.9744389027431423e-05, "loss": 0.5136, "step": 1030 }, { "epoch": 0.2593516209476309, "grad_norm": 2.8301312923431396, "learning_rate": 1.9741895261845387e-05, "loss": 0.451, "step": 1040 }, { "epoch": 0.26184538653366585, "grad_norm": 4.692950248718262, "learning_rate": 1.9739401496259354e-05, "loss": 0.5172, "step": 1050 }, { "epoch": 0.26433915211970077, "grad_norm": 9.1524076461792, "learning_rate": 1.9736907730673317e-05, "loss": 0.7668, "step": 1060 }, { "epoch": 0.26683291770573564, "grad_norm": 3.0480797290802, "learning_rate": 1.973441396508728e-05, "loss": 0.5275, "step": 1070 }, { "epoch": 0.26932668329177056, "grad_norm": 3.7727952003479004, "learning_rate": 1.9731920199501248e-05, "loss": 0.6045, "step": 1080 }, { "epoch": 0.2718204488778055, "grad_norm": 3.7919719219207764, "learning_rate": 1.972942643391521e-05, "loss": 0.568, "step": 1090 }, { "epoch": 0.2743142144638404, "grad_norm": 4.75301456451416, "learning_rate": 1.972693266832918e-05, "loss": 0.567, "step": 1100 }, { "epoch": 0.27680798004987534, "grad_norm": 3.6362855434417725, "learning_rate": 1.9724438902743142e-05, "loss": 0.5015, "step": 1110 }, { "epoch": 0.2793017456359102, "grad_norm": 13.187034606933594, "learning_rate": 1.972194513715711e-05, "loss": 0.5433, "step": 1120 }, { "epoch": 0.2817955112219451, "grad_norm": 4.809338092803955, "learning_rate": 1.9719451371571076e-05, "loss": 0.5669, "step": 1130 }, { "epoch": 0.28428927680798005, "grad_norm": 3.7646372318267822, "learning_rate": 1.971695760598504e-05, "loss": 0.5286, "step": 1140 }, { "epoch": 0.286783042394015, "grad_norm": 3.752352237701416, "learning_rate": 1.9714463840399007e-05, "loss": 0.5286, "step": 1150 }, { "epoch": 0.2892768079800499, "grad_norm": 4.207645416259766, "learning_rate": 1.971197007481297e-05, "loss": 0.5052, "step": 1160 }, { "epoch": 0.29177057356608477, "grad_norm": 3.126208782196045, "learning_rate": 1.9709476309226934e-05, "loss": 0.5188, "step": 1170 }, { "epoch": 0.2942643391521197, "grad_norm": 4.613558769226074, "learning_rate": 1.97069825436409e-05, "loss": 0.5405, "step": 1180 }, { "epoch": 0.2967581047381546, "grad_norm": 4.35943603515625, "learning_rate": 1.9704488778054865e-05, "loss": 0.4617, "step": 1190 }, { "epoch": 0.29925187032418954, "grad_norm": 3.4332399368286133, "learning_rate": 1.9701995012468828e-05, "loss": 0.5354, "step": 1200 }, { "epoch": 0.30174563591022446, "grad_norm": 3.738420009613037, "learning_rate": 1.9699501246882795e-05, "loss": 0.4887, "step": 1210 }, { "epoch": 0.30423940149625933, "grad_norm": 6.625039577484131, "learning_rate": 1.969700748129676e-05, "loss": 0.5395, "step": 1220 }, { "epoch": 0.30673316708229426, "grad_norm": 3.6015474796295166, "learning_rate": 1.9694513715710726e-05, "loss": 0.4949, "step": 1230 }, { "epoch": 0.3092269326683292, "grad_norm": 9.316793441772461, "learning_rate": 1.969201995012469e-05, "loss": 0.6079, "step": 1240 }, { "epoch": 0.3117206982543641, "grad_norm": 10.953588485717773, "learning_rate": 1.9689526184538653e-05, "loss": 0.524, "step": 1250 }, { "epoch": 0.314214463840399, "grad_norm": 5.9117512702941895, "learning_rate": 1.968703241895262e-05, "loss": 0.6395, "step": 1260 }, { "epoch": 0.3167082294264339, "grad_norm": 3.799406051635742, "learning_rate": 1.9684538653366584e-05, "loss": 0.5604, "step": 1270 }, { "epoch": 0.3192019950124688, "grad_norm": 5.208715915679932, "learning_rate": 1.968204488778055e-05, "loss": 0.5423, "step": 1280 }, { "epoch": 0.32169576059850374, "grad_norm": 8.043681144714355, "learning_rate": 1.9679551122194514e-05, "loss": 0.5482, "step": 1290 }, { "epoch": 0.32418952618453867, "grad_norm": 4.668685436248779, "learning_rate": 1.967705735660848e-05, "loss": 0.5868, "step": 1300 }, { "epoch": 0.3266832917705736, "grad_norm": 5.136122703552246, "learning_rate": 1.9674563591022445e-05, "loss": 0.5217, "step": 1310 }, { "epoch": 0.32917705735660846, "grad_norm": 3.8749747276306152, "learning_rate": 1.9672069825436412e-05, "loss": 0.48, "step": 1320 }, { "epoch": 0.3316708229426434, "grad_norm": 2.9240078926086426, "learning_rate": 1.9669576059850376e-05, "loss": 0.6384, "step": 1330 }, { "epoch": 0.3341645885286783, "grad_norm": 3.375347852706909, "learning_rate": 1.9667082294264343e-05, "loss": 0.5041, "step": 1340 }, { "epoch": 0.33665835411471323, "grad_norm": 3.96232008934021, "learning_rate": 1.9664588528678306e-05, "loss": 0.4538, "step": 1350 }, { "epoch": 0.33915211970074816, "grad_norm": 16.26675796508789, "learning_rate": 1.966209476309227e-05, "loss": 0.4686, "step": 1360 }, { "epoch": 0.341645885286783, "grad_norm": 6.03434944152832, "learning_rate": 1.9659600997506237e-05, "loss": 0.5372, "step": 1370 }, { "epoch": 0.34413965087281795, "grad_norm": 4.742402076721191, "learning_rate": 1.96571072319202e-05, "loss": 0.4793, "step": 1380 }, { "epoch": 0.34663341645885287, "grad_norm": 4.454680442810059, "learning_rate": 1.9654613466334167e-05, "loss": 0.4901, "step": 1390 }, { "epoch": 0.3491271820448878, "grad_norm": 4.6647796630859375, "learning_rate": 1.965211970074813e-05, "loss": 0.5023, "step": 1400 }, { "epoch": 0.3516209476309227, "grad_norm": 5.384413719177246, "learning_rate": 1.9649625935162095e-05, "loss": 0.5912, "step": 1410 }, { "epoch": 0.3541147132169576, "grad_norm": 6.326955795288086, "learning_rate": 1.964713216957606e-05, "loss": 0.5661, "step": 1420 }, { "epoch": 0.3566084788029925, "grad_norm": 3.840012550354004, "learning_rate": 1.9644638403990025e-05, "loss": 0.5107, "step": 1430 }, { "epoch": 0.35910224438902744, "grad_norm": 5.383571147918701, "learning_rate": 1.9642144638403992e-05, "loss": 0.4882, "step": 1440 }, { "epoch": 0.36159600997506236, "grad_norm": 2.377634048461914, "learning_rate": 1.9639650872817956e-05, "loss": 0.5656, "step": 1450 }, { "epoch": 0.3640897755610973, "grad_norm": 3.8716111183166504, "learning_rate": 1.963715710723192e-05, "loss": 0.5097, "step": 1460 }, { "epoch": 0.36658354114713215, "grad_norm": 5.310560703277588, "learning_rate": 1.9634663341645886e-05, "loss": 0.4505, "step": 1470 }, { "epoch": 0.3690773067331671, "grad_norm": 7.158385753631592, "learning_rate": 1.9632169576059853e-05, "loss": 0.4722, "step": 1480 }, { "epoch": 0.371571072319202, "grad_norm": 3.6055357456207275, "learning_rate": 1.9629675810473817e-05, "loss": 0.4542, "step": 1490 }, { "epoch": 0.3740648379052369, "grad_norm": 3.826148748397827, "learning_rate": 1.9627182044887784e-05, "loss": 0.5559, "step": 1500 }, { "epoch": 0.3765586034912718, "grad_norm": 5.25114631652832, "learning_rate": 1.9624688279301748e-05, "loss": 0.5079, "step": 1510 }, { "epoch": 0.3790523690773067, "grad_norm": 5.912806034088135, "learning_rate": 1.9622194513715715e-05, "loss": 0.5651, "step": 1520 }, { "epoch": 0.38154613466334164, "grad_norm": 5.626669883728027, "learning_rate": 1.961970074812968e-05, "loss": 0.483, "step": 1530 }, { "epoch": 0.38403990024937656, "grad_norm": 5.233978271484375, "learning_rate": 1.9617206982543642e-05, "loss": 0.472, "step": 1540 }, { "epoch": 0.3865336658354115, "grad_norm": 4.9565606117248535, "learning_rate": 1.961471321695761e-05, "loss": 0.5137, "step": 1550 }, { "epoch": 0.38902743142144636, "grad_norm": 5.3332414627075195, "learning_rate": 1.9612219451371573e-05, "loss": 0.5708, "step": 1560 }, { "epoch": 0.3915211970074813, "grad_norm": 4.999671459197998, "learning_rate": 1.9609725685785536e-05, "loss": 0.5666, "step": 1570 }, { "epoch": 0.3940149625935162, "grad_norm": 10.932353019714355, "learning_rate": 1.9607231920199503e-05, "loss": 0.5769, "step": 1580 }, { "epoch": 0.39650872817955113, "grad_norm": 7.1936211585998535, "learning_rate": 1.9604738154613467e-05, "loss": 0.5631, "step": 1590 }, { "epoch": 0.39900249376558605, "grad_norm": 6.666138172149658, "learning_rate": 1.9602244389027434e-05, "loss": 0.5239, "step": 1600 }, { "epoch": 0.4014962593516209, "grad_norm": 3.0620758533477783, "learning_rate": 1.9599750623441397e-05, "loss": 0.5028, "step": 1610 }, { "epoch": 0.40399002493765584, "grad_norm": 4.261651515960693, "learning_rate": 1.959725685785536e-05, "loss": 0.4711, "step": 1620 }, { "epoch": 0.40648379052369077, "grad_norm": 3.4371354579925537, "learning_rate": 1.9594763092269328e-05, "loss": 0.4729, "step": 1630 }, { "epoch": 0.4089775561097257, "grad_norm": 3.691063642501831, "learning_rate": 1.959226932668329e-05, "loss": 0.4844, "step": 1640 }, { "epoch": 0.4114713216957606, "grad_norm": 4.673091411590576, "learning_rate": 1.958977556109726e-05, "loss": 0.5131, "step": 1650 }, { "epoch": 0.4139650872817955, "grad_norm": 2.6619250774383545, "learning_rate": 1.9587281795511222e-05, "loss": 0.5196, "step": 1660 }, { "epoch": 0.4164588528678304, "grad_norm": 6.241903781890869, "learning_rate": 1.958478802992519e-05, "loss": 0.471, "step": 1670 }, { "epoch": 0.41895261845386533, "grad_norm": 3.603950023651123, "learning_rate": 1.9582294264339153e-05, "loss": 0.4476, "step": 1680 }, { "epoch": 0.42144638403990026, "grad_norm": 3.6078081130981445, "learning_rate": 1.957980049875312e-05, "loss": 0.5308, "step": 1690 }, { "epoch": 0.4239401496259352, "grad_norm": 4.073026657104492, "learning_rate": 1.9577306733167084e-05, "loss": 0.4973, "step": 1700 }, { "epoch": 0.42643391521197005, "grad_norm": 4.738262176513672, "learning_rate": 1.957481296758105e-05, "loss": 0.4709, "step": 1710 }, { "epoch": 0.428927680798005, "grad_norm": 16.924922943115234, "learning_rate": 1.9572319201995014e-05, "loss": 0.5383, "step": 1720 }, { "epoch": 0.4314214463840399, "grad_norm": 8.632024765014648, "learning_rate": 1.956982543640898e-05, "loss": 0.4519, "step": 1730 }, { "epoch": 0.4339152119700748, "grad_norm": 8.484312057495117, "learning_rate": 1.9567331670822945e-05, "loss": 0.5642, "step": 1740 }, { "epoch": 0.43640897755610975, "grad_norm": 4.789985179901123, "learning_rate": 1.956483790523691e-05, "loss": 0.4486, "step": 1750 }, { "epoch": 0.4389027431421446, "grad_norm": 4.4060587882995605, "learning_rate": 1.9562344139650875e-05, "loss": 0.5041, "step": 1760 }, { "epoch": 0.44139650872817954, "grad_norm": 8.840462684631348, "learning_rate": 1.955985037406484e-05, "loss": 0.5466, "step": 1770 }, { "epoch": 0.44389027431421446, "grad_norm": 3.662015438079834, "learning_rate": 1.9557356608478803e-05, "loss": 0.4922, "step": 1780 }, { "epoch": 0.4463840399002494, "grad_norm": 5.66322135925293, "learning_rate": 1.955486284289277e-05, "loss": 0.4851, "step": 1790 }, { "epoch": 0.4488778054862843, "grad_norm": 3.9291810989379883, "learning_rate": 1.9552369077306733e-05, "loss": 0.5185, "step": 1800 }, { "epoch": 0.4513715710723192, "grad_norm": 4.53109073638916, "learning_rate": 1.95498753117207e-05, "loss": 0.5114, "step": 1810 }, { "epoch": 0.4538653366583541, "grad_norm": 3.999675750732422, "learning_rate": 1.9547381546134664e-05, "loss": 0.4159, "step": 1820 }, { "epoch": 0.456359102244389, "grad_norm": 8.522893905639648, "learning_rate": 1.954488778054863e-05, "loss": 0.74, "step": 1830 }, { "epoch": 0.45885286783042395, "grad_norm": 3.3957653045654297, "learning_rate": 1.9542394014962594e-05, "loss": 0.5078, "step": 1840 }, { "epoch": 0.4613466334164589, "grad_norm": 2.780721664428711, "learning_rate": 1.953990024937656e-05, "loss": 0.5148, "step": 1850 }, { "epoch": 0.46384039900249374, "grad_norm": 3.9767260551452637, "learning_rate": 1.9537406483790525e-05, "loss": 0.5263, "step": 1860 }, { "epoch": 0.46633416458852867, "grad_norm": 15.764490127563477, "learning_rate": 1.9534912718204492e-05, "loss": 0.4691, "step": 1870 }, { "epoch": 0.4688279301745636, "grad_norm": 7.174173831939697, "learning_rate": 1.9532418952618456e-05, "loss": 0.5482, "step": 1880 }, { "epoch": 0.4713216957605985, "grad_norm": 3.9777376651763916, "learning_rate": 1.9529925187032423e-05, "loss": 0.4499, "step": 1890 }, { "epoch": 0.47381546134663344, "grad_norm": 4.089155197143555, "learning_rate": 1.9527431421446386e-05, "loss": 0.4241, "step": 1900 }, { "epoch": 0.4763092269326683, "grad_norm": 3.909698009490967, "learning_rate": 1.952493765586035e-05, "loss": 0.5389, "step": 1910 }, { "epoch": 0.47880299251870323, "grad_norm": 4.601511478424072, "learning_rate": 1.9522443890274317e-05, "loss": 0.5299, "step": 1920 }, { "epoch": 0.48129675810473815, "grad_norm": 3.4147753715515137, "learning_rate": 1.951995012468828e-05, "loss": 0.4071, "step": 1930 }, { "epoch": 0.4837905236907731, "grad_norm": 10.128273963928223, "learning_rate": 1.9517456359102248e-05, "loss": 0.5228, "step": 1940 }, { "epoch": 0.486284289276808, "grad_norm": 6.3098464012146, "learning_rate": 1.951496259351621e-05, "loss": 0.5648, "step": 1950 }, { "epoch": 0.48877805486284287, "grad_norm": 9.323540687561035, "learning_rate": 1.9512468827930175e-05, "loss": 0.5566, "step": 1960 }, { "epoch": 0.4912718204488778, "grad_norm": 2.8909482955932617, "learning_rate": 1.9509975062344142e-05, "loss": 0.4574, "step": 1970 }, { "epoch": 0.4937655860349127, "grad_norm": 8.425788879394531, "learning_rate": 1.9507481296758105e-05, "loss": 0.594, "step": 1980 }, { "epoch": 0.49625935162094764, "grad_norm": 5.776350498199463, "learning_rate": 1.950498753117207e-05, "loss": 0.4679, "step": 1990 }, { "epoch": 0.49875311720698257, "grad_norm": 5.987747669219971, "learning_rate": 1.9502493765586036e-05, "loss": 0.5371, "step": 2000 }, { "epoch": 0.5012468827930174, "grad_norm": 3.423344373703003, "learning_rate": 1.95e-05, "loss": 0.4901, "step": 2010 }, { "epoch": 0.5037406483790524, "grad_norm": 3.8344333171844482, "learning_rate": 1.9497506234413967e-05, "loss": 0.4937, "step": 2020 }, { "epoch": 0.5062344139650873, "grad_norm": 4.415685653686523, "learning_rate": 1.949501246882793e-05, "loss": 0.4658, "step": 2030 }, { "epoch": 0.5087281795511222, "grad_norm": 4.0992631912231445, "learning_rate": 1.9492518703241897e-05, "loss": 0.4264, "step": 2040 }, { "epoch": 0.5112219451371571, "grad_norm": 6.2230305671691895, "learning_rate": 1.949002493765586e-05, "loss": 0.5018, "step": 2050 }, { "epoch": 0.513715710723192, "grad_norm": 7.8946661949157715, "learning_rate": 1.9487531172069828e-05, "loss": 0.477, "step": 2060 }, { "epoch": 0.516209476309227, "grad_norm": 6.977099418640137, "learning_rate": 1.948503740648379e-05, "loss": 0.4574, "step": 2070 }, { "epoch": 0.5187032418952618, "grad_norm": 5.954398155212402, "learning_rate": 1.948254364089776e-05, "loss": 0.5273, "step": 2080 }, { "epoch": 0.5211970074812967, "grad_norm": 5.636336326599121, "learning_rate": 1.9480049875311722e-05, "loss": 0.4623, "step": 2090 }, { "epoch": 0.5236907730673317, "grad_norm": 7.925063610076904, "learning_rate": 1.947755610972569e-05, "loss": 0.4776, "step": 2100 }, { "epoch": 0.5261845386533666, "grad_norm": 6.0404276847839355, "learning_rate": 1.9475062344139653e-05, "loss": 0.5091, "step": 2110 }, { "epoch": 0.5286783042394015, "grad_norm": 5.379130840301514, "learning_rate": 1.9472568578553616e-05, "loss": 0.4522, "step": 2120 }, { "epoch": 0.5311720698254364, "grad_norm": 5.63524055480957, "learning_rate": 1.9470074812967583e-05, "loss": 0.5296, "step": 2130 }, { "epoch": 0.5336658354114713, "grad_norm": 3.867295026779175, "learning_rate": 1.9467581047381547e-05, "loss": 0.4351, "step": 2140 }, { "epoch": 0.5361596009975063, "grad_norm": 10.726486206054688, "learning_rate": 1.9465087281795514e-05, "loss": 0.4772, "step": 2150 }, { "epoch": 0.5386533665835411, "grad_norm": 5.092080593109131, "learning_rate": 1.9462593516209478e-05, "loss": 0.4877, "step": 2160 }, { "epoch": 0.5411471321695761, "grad_norm": 4.903257846832275, "learning_rate": 1.946009975062344e-05, "loss": 0.5226, "step": 2170 }, { "epoch": 0.543640897755611, "grad_norm": 12.123847007751465, "learning_rate": 1.9457605985037408e-05, "loss": 0.4826, "step": 2180 }, { "epoch": 0.5461346633416458, "grad_norm": 7.251633644104004, "learning_rate": 1.9455112219451372e-05, "loss": 0.5337, "step": 2190 }, { "epoch": 0.5486284289276808, "grad_norm": 5.906688213348389, "learning_rate": 1.945261845386534e-05, "loss": 0.4971, "step": 2200 }, { "epoch": 0.5511221945137157, "grad_norm": 16.242473602294922, "learning_rate": 1.9450124688279302e-05, "loss": 0.5198, "step": 2210 }, { "epoch": 0.5536159600997507, "grad_norm": 5.047711372375488, "learning_rate": 1.944763092269327e-05, "loss": 0.5753, "step": 2220 }, { "epoch": 0.5561097256857855, "grad_norm": 12.766270637512207, "learning_rate": 1.9445137157107233e-05, "loss": 0.4736, "step": 2230 }, { "epoch": 0.5586034912718204, "grad_norm": 9.127399444580078, "learning_rate": 1.94426433915212e-05, "loss": 0.4879, "step": 2240 }, { "epoch": 0.5610972568578554, "grad_norm": 5.662039756774902, "learning_rate": 1.9440149625935164e-05, "loss": 0.4147, "step": 2250 }, { "epoch": 0.5635910224438903, "grad_norm": 5.060816764831543, "learning_rate": 1.943765586034913e-05, "loss": 0.4406, "step": 2260 }, { "epoch": 0.5660847880299252, "grad_norm": 7.149324893951416, "learning_rate": 1.9435162094763094e-05, "loss": 0.5203, "step": 2270 }, { "epoch": 0.5685785536159601, "grad_norm": 3.712177038192749, "learning_rate": 1.9432668329177058e-05, "loss": 0.4658, "step": 2280 }, { "epoch": 0.571072319201995, "grad_norm": 3.989894151687622, "learning_rate": 1.9430174563591025e-05, "loss": 0.4428, "step": 2290 }, { "epoch": 0.57356608478803, "grad_norm": 4.171450614929199, "learning_rate": 1.942768079800499e-05, "loss": 0.5034, "step": 2300 }, { "epoch": 0.5760598503740648, "grad_norm": 5.396224021911621, "learning_rate": 1.9425187032418956e-05, "loss": 0.4929, "step": 2310 }, { "epoch": 0.5785536159600998, "grad_norm": 5.547377586364746, "learning_rate": 1.942269326683292e-05, "loss": 0.4344, "step": 2320 }, { "epoch": 0.5810473815461347, "grad_norm": 9.413858413696289, "learning_rate": 1.9420199501246883e-05, "loss": 0.4321, "step": 2330 }, { "epoch": 0.5835411471321695, "grad_norm": 6.898621082305908, "learning_rate": 1.941770573566085e-05, "loss": 0.5428, "step": 2340 }, { "epoch": 0.5860349127182045, "grad_norm": 4.387313365936279, "learning_rate": 1.9415211970074813e-05, "loss": 0.4786, "step": 2350 }, { "epoch": 0.5885286783042394, "grad_norm": 8.502609252929688, "learning_rate": 1.941271820448878e-05, "loss": 0.4587, "step": 2360 }, { "epoch": 0.5910224438902744, "grad_norm": 5.494476795196533, "learning_rate": 1.9410224438902744e-05, "loss": 0.5174, "step": 2370 }, { "epoch": 0.5935162094763092, "grad_norm": 4.911831378936768, "learning_rate": 1.9407730673316708e-05, "loss": 0.5318, "step": 2380 }, { "epoch": 0.5960099750623441, "grad_norm": 3.805800676345825, "learning_rate": 1.9405236907730675e-05, "loss": 0.5523, "step": 2390 }, { "epoch": 0.5985037406483791, "grad_norm": 7.420222759246826, "learning_rate": 1.9402743142144638e-05, "loss": 0.492, "step": 2400 }, { "epoch": 0.600997506234414, "grad_norm": 5.9345245361328125, "learning_rate": 1.9400249376558605e-05, "loss": 0.4199, "step": 2410 }, { "epoch": 0.6034912718204489, "grad_norm": 4.605949878692627, "learning_rate": 1.9397755610972572e-05, "loss": 0.5301, "step": 2420 }, { "epoch": 0.6059850374064838, "grad_norm": 11.447821617126465, "learning_rate": 1.9395261845386536e-05, "loss": 0.5429, "step": 2430 }, { "epoch": 0.6084788029925187, "grad_norm": 4.791404724121094, "learning_rate": 1.9392768079800503e-05, "loss": 0.5576, "step": 2440 }, { "epoch": 0.6109725685785536, "grad_norm": 4.776665210723877, "learning_rate": 1.9390274314214466e-05, "loss": 0.4874, "step": 2450 }, { "epoch": 0.6134663341645885, "grad_norm": 8.155165672302246, "learning_rate": 1.938778054862843e-05, "loss": 0.5492, "step": 2460 }, { "epoch": 0.6159600997506235, "grad_norm": 15.007184028625488, "learning_rate": 1.9385286783042397e-05, "loss": 0.4631, "step": 2470 }, { "epoch": 0.6184538653366584, "grad_norm": 3.208120107650757, "learning_rate": 1.938279301745636e-05, "loss": 0.4789, "step": 2480 }, { "epoch": 0.6209476309226932, "grad_norm": 5.233438014984131, "learning_rate": 1.9380299251870324e-05, "loss": 0.5402, "step": 2490 }, { "epoch": 0.6234413965087282, "grad_norm": 8.981165885925293, "learning_rate": 1.937780548628429e-05, "loss": 0.491, "step": 2500 }, { "epoch": 0.6259351620947631, "grad_norm": 8.05521011352539, "learning_rate": 1.9375311720698255e-05, "loss": 0.4621, "step": 2510 }, { "epoch": 0.628428927680798, "grad_norm": 3.094724178314209, "learning_rate": 1.9372817955112222e-05, "loss": 0.4244, "step": 2520 }, { "epoch": 0.6309226932668329, "grad_norm": 5.453985691070557, "learning_rate": 1.9370324189526186e-05, "loss": 0.5035, "step": 2530 }, { "epoch": 0.6334164588528678, "grad_norm": 4.8853559494018555, "learning_rate": 1.936783042394015e-05, "loss": 0.4758, "step": 2540 }, { "epoch": 0.6359102244389028, "grad_norm": 4.26847505569458, "learning_rate": 1.9365336658354116e-05, "loss": 0.493, "step": 2550 }, { "epoch": 0.6384039900249376, "grad_norm": 7.400485038757324, "learning_rate": 1.936284289276808e-05, "loss": 0.465, "step": 2560 }, { "epoch": 0.6408977556109726, "grad_norm": 4.4760260581970215, "learning_rate": 1.9360349127182047e-05, "loss": 0.4429, "step": 2570 }, { "epoch": 0.6433915211970075, "grad_norm": 4.798295974731445, "learning_rate": 1.935785536159601e-05, "loss": 0.4837, "step": 2580 }, { "epoch": 0.6458852867830424, "grad_norm": 11.160234451293945, "learning_rate": 1.9355361596009977e-05, "loss": 0.4985, "step": 2590 }, { "epoch": 0.6483790523690773, "grad_norm": 6.34352970123291, "learning_rate": 1.935286783042394e-05, "loss": 0.5137, "step": 2600 }, { "epoch": 0.6508728179551122, "grad_norm": 4.095518589019775, "learning_rate": 1.9350374064837908e-05, "loss": 0.4254, "step": 2610 }, { "epoch": 0.6533665835411472, "grad_norm": 8.039348602294922, "learning_rate": 1.934788029925187e-05, "loss": 0.4655, "step": 2620 }, { "epoch": 0.655860349127182, "grad_norm": 4.135050296783447, "learning_rate": 1.934538653366584e-05, "loss": 0.5008, "step": 2630 }, { "epoch": 0.6583541147132169, "grad_norm": 7.032421112060547, "learning_rate": 1.9342892768079802e-05, "loss": 0.4439, "step": 2640 }, { "epoch": 0.6608478802992519, "grad_norm": 4.22358512878418, "learning_rate": 1.934039900249377e-05, "loss": 0.5356, "step": 2650 }, { "epoch": 0.6633416458852868, "grad_norm": 6.675922393798828, "learning_rate": 1.9337905236907733e-05, "loss": 0.5038, "step": 2660 }, { "epoch": 0.6658354114713217, "grad_norm": 4.241270542144775, "learning_rate": 1.9335411471321697e-05, "loss": 0.4468, "step": 2670 }, { "epoch": 0.6683291770573566, "grad_norm": 4.54661226272583, "learning_rate": 1.9332917705735664e-05, "loss": 0.4758, "step": 2680 }, { "epoch": 0.6708229426433915, "grad_norm": 3.0999062061309814, "learning_rate": 1.9330423940149627e-05, "loss": 0.566, "step": 2690 }, { "epoch": 0.6733167082294265, "grad_norm": 5.668433666229248, "learning_rate": 1.932793017456359e-05, "loss": 0.4979, "step": 2700 }, { "epoch": 0.6758104738154613, "grad_norm": 7.616546154022217, "learning_rate": 1.9325436408977558e-05, "loss": 0.3973, "step": 2710 }, { "epoch": 0.6783042394014963, "grad_norm": 4.385749816894531, "learning_rate": 1.932294264339152e-05, "loss": 0.5626, "step": 2720 }, { "epoch": 0.6807980049875312, "grad_norm": 12.706242561340332, "learning_rate": 1.932044887780549e-05, "loss": 0.5379, "step": 2730 }, { "epoch": 0.683291770573566, "grad_norm": 4.375461101531982, "learning_rate": 1.9317955112219452e-05, "loss": 0.4326, "step": 2740 }, { "epoch": 0.685785536159601, "grad_norm": 4.31388521194458, "learning_rate": 1.931546134663342e-05, "loss": 0.5309, "step": 2750 }, { "epoch": 0.6882793017456359, "grad_norm": 4.78411865234375, "learning_rate": 1.9312967581047383e-05, "loss": 0.4068, "step": 2760 }, { "epoch": 0.6907730673316709, "grad_norm": 10.348709106445312, "learning_rate": 1.931047381546135e-05, "loss": 0.4246, "step": 2770 }, { "epoch": 0.6932668329177057, "grad_norm": 3.510700225830078, "learning_rate": 1.9307980049875313e-05, "loss": 0.4845, "step": 2780 }, { "epoch": 0.6957605985037406, "grad_norm": 4.097596168518066, "learning_rate": 1.930548628428928e-05, "loss": 0.5605, "step": 2790 }, { "epoch": 0.6982543640897756, "grad_norm": 4.968656539916992, "learning_rate": 1.9302992518703244e-05, "loss": 0.467, "step": 2800 }, { "epoch": 0.7007481296758105, "grad_norm": 7.420864105224609, "learning_rate": 1.930049875311721e-05, "loss": 0.6162, "step": 2810 }, { "epoch": 0.7032418952618454, "grad_norm": 7.818281173706055, "learning_rate": 1.9298004987531174e-05, "loss": 0.4224, "step": 2820 }, { "epoch": 0.7057356608478803, "grad_norm": 6.321879863739014, "learning_rate": 1.9295511221945138e-05, "loss": 0.5507, "step": 2830 }, { "epoch": 0.7082294264339152, "grad_norm": 4.185654640197754, "learning_rate": 1.9293017456359105e-05, "loss": 0.4075, "step": 2840 }, { "epoch": 0.7107231920199502, "grad_norm": 4.573608875274658, "learning_rate": 1.929052369077307e-05, "loss": 0.5097, "step": 2850 }, { "epoch": 0.713216957605985, "grad_norm": 6.115342617034912, "learning_rate": 1.9288029925187036e-05, "loss": 0.4963, "step": 2860 }, { "epoch": 0.71571072319202, "grad_norm": 4.295200824737549, "learning_rate": 1.9285536159601e-05, "loss": 0.4322, "step": 2870 }, { "epoch": 0.7182044887780549, "grad_norm": 2.7827277183532715, "learning_rate": 1.9283042394014963e-05, "loss": 0.4749, "step": 2880 }, { "epoch": 0.7206982543640897, "grad_norm": 4.560075283050537, "learning_rate": 1.928054862842893e-05, "loss": 0.4603, "step": 2890 }, { "epoch": 0.7231920199501247, "grad_norm": 6.625837326049805, "learning_rate": 1.9278054862842894e-05, "loss": 0.4779, "step": 2900 }, { "epoch": 0.7256857855361596, "grad_norm": 6.110123634338379, "learning_rate": 1.9275561097256857e-05, "loss": 0.5923, "step": 2910 }, { "epoch": 0.7281795511221946, "grad_norm": 5.3894429206848145, "learning_rate": 1.9273067331670824e-05, "loss": 0.4797, "step": 2920 }, { "epoch": 0.7306733167082294, "grad_norm": 6.485326290130615, "learning_rate": 1.9270573566084788e-05, "loss": 0.5242, "step": 2930 }, { "epoch": 0.7331670822942643, "grad_norm": 8.976933479309082, "learning_rate": 1.9268079800498755e-05, "loss": 0.44, "step": 2940 }, { "epoch": 0.7356608478802993, "grad_norm": 8.325933456420898, "learning_rate": 1.926558603491272e-05, "loss": 0.5028, "step": 2950 }, { "epoch": 0.7381546134663342, "grad_norm": 4.603588581085205, "learning_rate": 1.9263092269326685e-05, "loss": 0.4457, "step": 2960 }, { "epoch": 0.7406483790523691, "grad_norm": 4.730616569519043, "learning_rate": 1.926059850374065e-05, "loss": 0.4515, "step": 2970 }, { "epoch": 0.743142144638404, "grad_norm": 4.1613264083862305, "learning_rate": 1.9258104738154616e-05, "loss": 0.4357, "step": 2980 }, { "epoch": 0.7456359102244389, "grad_norm": 4.212600231170654, "learning_rate": 1.925561097256858e-05, "loss": 0.4426, "step": 2990 }, { "epoch": 0.7481296758104738, "grad_norm": 5.657212257385254, "learning_rate": 1.9253117206982547e-05, "loss": 0.4656, "step": 3000 }, { "epoch": 0.7506234413965087, "grad_norm": 6.499466419219971, "learning_rate": 1.925062344139651e-05, "loss": 0.421, "step": 3010 }, { "epoch": 0.7531172069825436, "grad_norm": 4.284633159637451, "learning_rate": 1.9248129675810477e-05, "loss": 0.5375, "step": 3020 }, { "epoch": 0.7556109725685786, "grad_norm": 4.252184867858887, "learning_rate": 1.924563591022444e-05, "loss": 0.4443, "step": 3030 }, { "epoch": 0.7581047381546134, "grad_norm": 4.567617416381836, "learning_rate": 1.9243142144638405e-05, "loss": 0.4453, "step": 3040 }, { "epoch": 0.7605985037406484, "grad_norm": 3.3405144214630127, "learning_rate": 1.924064837905237e-05, "loss": 0.432, "step": 3050 }, { "epoch": 0.7630922693266833, "grad_norm": 7.207590579986572, "learning_rate": 1.9238154613466335e-05, "loss": 0.446, "step": 3060 }, { "epoch": 0.7655860349127181, "grad_norm": 7.11346960067749, "learning_rate": 1.92356608478803e-05, "loss": 0.4085, "step": 3070 }, { "epoch": 0.7680798004987531, "grad_norm": 3.7571468353271484, "learning_rate": 1.9233167082294266e-05, "loss": 0.4873, "step": 3080 }, { "epoch": 0.770573566084788, "grad_norm": 6.159874439239502, "learning_rate": 1.923067331670823e-05, "loss": 0.5184, "step": 3090 }, { "epoch": 0.773067331670823, "grad_norm": 6.5550689697265625, "learning_rate": 1.9228179551122196e-05, "loss": 0.4748, "step": 3100 }, { "epoch": 0.7755610972568578, "grad_norm": 6.619629383087158, "learning_rate": 1.922568578553616e-05, "loss": 0.4853, "step": 3110 }, { "epoch": 0.7780548628428927, "grad_norm": 5.71950626373291, "learning_rate": 1.9223192019950127e-05, "loss": 0.4853, "step": 3120 }, { "epoch": 0.7805486284289277, "grad_norm": 4.437851905822754, "learning_rate": 1.922069825436409e-05, "loss": 0.4058, "step": 3130 }, { "epoch": 0.7830423940149626, "grad_norm": 13.215971946716309, "learning_rate": 1.9218204488778058e-05, "loss": 0.51, "step": 3140 }, { "epoch": 0.7855361596009975, "grad_norm": 4.856098175048828, "learning_rate": 1.921571072319202e-05, "loss": 0.4832, "step": 3150 }, { "epoch": 0.7880299251870324, "grad_norm": 6.450615882873535, "learning_rate": 1.9213216957605988e-05, "loss": 0.4538, "step": 3160 }, { "epoch": 0.7905236907730673, "grad_norm": 7.96874475479126, "learning_rate": 1.9210723192019952e-05, "loss": 0.4463, "step": 3170 }, { "epoch": 0.7930174563591023, "grad_norm": 5.789658069610596, "learning_rate": 1.920822942643392e-05, "loss": 0.3977, "step": 3180 }, { "epoch": 0.7955112219451371, "grad_norm": 4.556354999542236, "learning_rate": 1.9205735660847882e-05, "loss": 0.5665, "step": 3190 }, { "epoch": 0.7980049875311721, "grad_norm": 7.1619181632995605, "learning_rate": 1.9203241895261846e-05, "loss": 0.4884, "step": 3200 }, { "epoch": 0.800498753117207, "grad_norm": 5.486998081207275, "learning_rate": 1.9200748129675813e-05, "loss": 0.5249, "step": 3210 }, { "epoch": 0.8029925187032418, "grad_norm": 5.061128616333008, "learning_rate": 1.9198254364089777e-05, "loss": 0.5166, "step": 3220 }, { "epoch": 0.8054862842892768, "grad_norm": 5.740884780883789, "learning_rate": 1.9195760598503744e-05, "loss": 0.4312, "step": 3230 }, { "epoch": 0.8079800498753117, "grad_norm": 6.897968769073486, "learning_rate": 1.9193266832917707e-05, "loss": 0.4602, "step": 3240 }, { "epoch": 0.8104738154613467, "grad_norm": 11.832649230957031, "learning_rate": 1.919077306733167e-05, "loss": 0.4456, "step": 3250 }, { "epoch": 0.8129675810473815, "grad_norm": 6.845297336578369, "learning_rate": 1.9188279301745638e-05, "loss": 0.5935, "step": 3260 }, { "epoch": 0.8154613466334164, "grad_norm": 7.5892157554626465, "learning_rate": 1.91857855361596e-05, "loss": 0.5575, "step": 3270 }, { "epoch": 0.8179551122194514, "grad_norm": 5.861032009124756, "learning_rate": 1.9183291770573565e-05, "loss": 0.4618, "step": 3280 }, { "epoch": 0.8204488778054863, "grad_norm": 7.565516471862793, "learning_rate": 1.9180798004987532e-05, "loss": 0.5081, "step": 3290 }, { "epoch": 0.8229426433915212, "grad_norm": 3.7106189727783203, "learning_rate": 1.9178304239401496e-05, "loss": 0.4926, "step": 3300 }, { "epoch": 0.8254364089775561, "grad_norm": 4.886735916137695, "learning_rate": 1.9175810473815463e-05, "loss": 0.491, "step": 3310 }, { "epoch": 0.827930174563591, "grad_norm": 4.354279041290283, "learning_rate": 1.9173316708229426e-05, "loss": 0.3565, "step": 3320 }, { "epoch": 0.830423940149626, "grad_norm": 6.275864601135254, "learning_rate": 1.9170822942643393e-05, "loss": 0.4157, "step": 3330 }, { "epoch": 0.8329177057356608, "grad_norm": 5.567061901092529, "learning_rate": 1.916832917705736e-05, "loss": 0.5054, "step": 3340 }, { "epoch": 0.8354114713216958, "grad_norm": 5.522775650024414, "learning_rate": 1.9165835411471324e-05, "loss": 0.504, "step": 3350 }, { "epoch": 0.8379052369077307, "grad_norm": 4.974456310272217, "learning_rate": 1.916334164588529e-05, "loss": 0.4118, "step": 3360 }, { "epoch": 0.8403990024937655, "grad_norm": 3.5197205543518066, "learning_rate": 1.9160847880299255e-05, "loss": 0.4819, "step": 3370 }, { "epoch": 0.8428927680798005, "grad_norm": 4.410624027252197, "learning_rate": 1.9158354114713218e-05, "loss": 0.583, "step": 3380 }, { "epoch": 0.8453865336658354, "grad_norm": 6.812956809997559, "learning_rate": 1.9155860349127185e-05, "loss": 0.463, "step": 3390 }, { "epoch": 0.8478802992518704, "grad_norm": 17.613037109375, "learning_rate": 1.915336658354115e-05, "loss": 0.4853, "step": 3400 }, { "epoch": 0.8503740648379052, "grad_norm": 6.056275844573975, "learning_rate": 1.9150872817955113e-05, "loss": 0.5459, "step": 3410 }, { "epoch": 0.8528678304239401, "grad_norm": 4.816941261291504, "learning_rate": 1.914837905236908e-05, "loss": 0.4013, "step": 3420 }, { "epoch": 0.8553615960099751, "grad_norm": 5.128777027130127, "learning_rate": 1.9145885286783043e-05, "loss": 0.47, "step": 3430 }, { "epoch": 0.85785536159601, "grad_norm": 5.4511566162109375, "learning_rate": 1.914339152119701e-05, "loss": 0.4487, "step": 3440 }, { "epoch": 0.8603491271820449, "grad_norm": 7.571670055389404, "learning_rate": 1.9140897755610974e-05, "loss": 0.3869, "step": 3450 }, { "epoch": 0.8628428927680798, "grad_norm": 5.735408782958984, "learning_rate": 1.9138403990024937e-05, "loss": 0.4186, "step": 3460 }, { "epoch": 0.8653366583541147, "grad_norm": 5.8194475173950195, "learning_rate": 1.9135910224438904e-05, "loss": 0.5033, "step": 3470 }, { "epoch": 0.8678304239401496, "grad_norm": 6.551326274871826, "learning_rate": 1.9133416458852868e-05, "loss": 0.4501, "step": 3480 }, { "epoch": 0.8703241895261845, "grad_norm": 7.928218841552734, "learning_rate": 1.9130922693266835e-05, "loss": 0.492, "step": 3490 }, { "epoch": 0.8728179551122195, "grad_norm": 4.229366302490234, "learning_rate": 1.91284289276808e-05, "loss": 0.375, "step": 3500 }, { "epoch": 0.8753117206982544, "grad_norm": 7.293509006500244, "learning_rate": 1.9125935162094766e-05, "loss": 0.4393, "step": 3510 }, { "epoch": 0.8778054862842892, "grad_norm": 6.139492511749268, "learning_rate": 1.912344139650873e-05, "loss": 0.3927, "step": 3520 }, { "epoch": 0.8802992518703242, "grad_norm": 7.048768520355225, "learning_rate": 1.9120947630922696e-05, "loss": 0.4707, "step": 3530 }, { "epoch": 0.8827930174563591, "grad_norm": 4.429152488708496, "learning_rate": 1.911845386533666e-05, "loss": 0.4569, "step": 3540 }, { "epoch": 0.885286783042394, "grad_norm": 7.218087196350098, "learning_rate": 1.9115960099750627e-05, "loss": 0.4156, "step": 3550 }, { "epoch": 0.8877805486284289, "grad_norm": 5.999967098236084, "learning_rate": 1.911346633416459e-05, "loss": 0.4679, "step": 3560 }, { "epoch": 0.8902743142144638, "grad_norm": 5.682243824005127, "learning_rate": 1.9110972568578554e-05, "loss": 0.4587, "step": 3570 }, { "epoch": 0.8927680798004988, "grad_norm": 4.374421119689941, "learning_rate": 1.910847880299252e-05, "loss": 0.4885, "step": 3580 }, { "epoch": 0.8952618453865336, "grad_norm": 3.372025966644287, "learning_rate": 1.9105985037406485e-05, "loss": 0.4402, "step": 3590 }, { "epoch": 0.8977556109725686, "grad_norm": 8.967430114746094, "learning_rate": 1.9103491271820452e-05, "loss": 0.4719, "step": 3600 }, { "epoch": 0.9002493765586035, "grad_norm": 7.0953850746154785, "learning_rate": 1.9100997506234415e-05, "loss": 0.4882, "step": 3610 }, { "epoch": 0.9027431421446384, "grad_norm": 7.988110542297363, "learning_rate": 1.909850374064838e-05, "loss": 0.3987, "step": 3620 }, { "epoch": 0.9052369077306733, "grad_norm": 5.712813377380371, "learning_rate": 1.9096009975062346e-05, "loss": 0.4625, "step": 3630 }, { "epoch": 0.9077306733167082, "grad_norm": 5.640379905700684, "learning_rate": 1.909351620947631e-05, "loss": 0.4713, "step": 3640 }, { "epoch": 0.9102244389027432, "grad_norm": 6.33805513381958, "learning_rate": 1.9091022443890277e-05, "loss": 0.499, "step": 3650 }, { "epoch": 0.912718204488778, "grad_norm": 4.548992156982422, "learning_rate": 1.908852867830424e-05, "loss": 0.5038, "step": 3660 }, { "epoch": 0.9152119700748129, "grad_norm": 8.613363265991211, "learning_rate": 1.9086034912718204e-05, "loss": 0.484, "step": 3670 }, { "epoch": 0.9177057356608479, "grad_norm": 4.469968318939209, "learning_rate": 1.908354114713217e-05, "loss": 0.4473, "step": 3680 }, { "epoch": 0.9201995012468828, "grad_norm": 4.103412628173828, "learning_rate": 1.9081047381546138e-05, "loss": 0.4483, "step": 3690 }, { "epoch": 0.9226932668329177, "grad_norm": 4.446389675140381, "learning_rate": 1.90785536159601e-05, "loss": 0.4354, "step": 3700 }, { "epoch": 0.9251870324189526, "grad_norm": 3.4375393390655518, "learning_rate": 1.907605985037407e-05, "loss": 0.5089, "step": 3710 }, { "epoch": 0.9276807980049875, "grad_norm": 4.128108024597168, "learning_rate": 1.9073566084788032e-05, "loss": 0.4881, "step": 3720 }, { "epoch": 0.9301745635910225, "grad_norm": 6.5702104568481445, "learning_rate": 1.9071072319202e-05, "loss": 0.4994, "step": 3730 }, { "epoch": 0.9326683291770573, "grad_norm": 4.294257164001465, "learning_rate": 1.9068578553615963e-05, "loss": 0.5258, "step": 3740 }, { "epoch": 0.9351620947630923, "grad_norm": 10.037517547607422, "learning_rate": 1.9066084788029926e-05, "loss": 0.5119, "step": 3750 }, { "epoch": 0.9376558603491272, "grad_norm": 12.020997047424316, "learning_rate": 1.9063591022443893e-05, "loss": 0.5925, "step": 3760 }, { "epoch": 0.940149625935162, "grad_norm": 6.246609210968018, "learning_rate": 1.9061097256857857e-05, "loss": 0.4407, "step": 3770 }, { "epoch": 0.942643391521197, "grad_norm": 6.553652286529541, "learning_rate": 1.905860349127182e-05, "loss": 0.4833, "step": 3780 }, { "epoch": 0.9451371571072319, "grad_norm": 5.495224952697754, "learning_rate": 1.9056109725685788e-05, "loss": 0.4579, "step": 3790 }, { "epoch": 0.9476309226932669, "grad_norm": 5.287566184997559, "learning_rate": 1.905361596009975e-05, "loss": 0.5213, "step": 3800 }, { "epoch": 0.9501246882793017, "grad_norm": 5.307123184204102, "learning_rate": 1.9051122194513718e-05, "loss": 0.4347, "step": 3810 }, { "epoch": 0.9526184538653366, "grad_norm": 6.510472774505615, "learning_rate": 1.9048628428927682e-05, "loss": 0.5203, "step": 3820 }, { "epoch": 0.9551122194513716, "grad_norm": 4.413476467132568, "learning_rate": 1.9046134663341645e-05, "loss": 0.4359, "step": 3830 }, { "epoch": 0.9576059850374065, "grad_norm": 4.353250503540039, "learning_rate": 1.9043640897755612e-05, "loss": 0.4169, "step": 3840 }, { "epoch": 0.9600997506234414, "grad_norm": 6.984455108642578, "learning_rate": 1.9041147132169576e-05, "loss": 0.5156, "step": 3850 }, { "epoch": 0.9625935162094763, "grad_norm": 4.519622325897217, "learning_rate": 1.9038653366583543e-05, "loss": 0.4545, "step": 3860 }, { "epoch": 0.9650872817955112, "grad_norm": 4.484631061553955, "learning_rate": 1.9036159600997507e-05, "loss": 0.5115, "step": 3870 }, { "epoch": 0.9675810473815462, "grad_norm": 6.899198532104492, "learning_rate": 1.9033665835411474e-05, "loss": 0.4979, "step": 3880 }, { "epoch": 0.970074812967581, "grad_norm": 3.723076581954956, "learning_rate": 1.9031172069825437e-05, "loss": 0.464, "step": 3890 }, { "epoch": 0.972568578553616, "grad_norm": 6.418664932250977, "learning_rate": 1.9028678304239404e-05, "loss": 0.4556, "step": 3900 }, { "epoch": 0.9750623441396509, "grad_norm": 3.5927555561065674, "learning_rate": 1.9026184538653368e-05, "loss": 0.3993, "step": 3910 }, { "epoch": 0.9775561097256857, "grad_norm": 4.519811153411865, "learning_rate": 1.9023690773067335e-05, "loss": 0.5431, "step": 3920 }, { "epoch": 0.9800498753117207, "grad_norm": 11.50717830657959, "learning_rate": 1.90211970074813e-05, "loss": 0.4571, "step": 3930 }, { "epoch": 0.9825436408977556, "grad_norm": 4.3499531745910645, "learning_rate": 1.9018703241895265e-05, "loss": 0.4586, "step": 3940 }, { "epoch": 0.9850374064837906, "grad_norm": 6.324216365814209, "learning_rate": 1.901620947630923e-05, "loss": 0.4397, "step": 3950 }, { "epoch": 0.9875311720698254, "grad_norm": 6.713108062744141, "learning_rate": 1.9013715710723193e-05, "loss": 0.4178, "step": 3960 }, { "epoch": 0.9900249376558603, "grad_norm": 4.340822219848633, "learning_rate": 1.901122194513716e-05, "loss": 0.5256, "step": 3970 }, { "epoch": 0.9925187032418953, "grad_norm": 15.54155445098877, "learning_rate": 1.9008728179551123e-05, "loss": 0.4169, "step": 3980 }, { "epoch": 0.9950124688279302, "grad_norm": 3.4416344165802, "learning_rate": 1.9006234413965087e-05, "loss": 0.4568, "step": 3990 }, { "epoch": 0.9975062344139651, "grad_norm": 4.241981506347656, "learning_rate": 1.9003740648379054e-05, "loss": 0.4386, "step": 4000 }, { "epoch": 1.0, "grad_norm": 2.7881031036376953, "learning_rate": 1.9001246882793018e-05, "loss": 0.4039, "step": 4010 }, { "epoch": 1.0, "eval_loss": 0.46533510088920593, "eval_runtime": 60.0033, "eval_samples_per_second": 16.716, "eval_steps_per_second": 16.716, "step": 4010 }, { "epoch": 1.0024937655860349, "grad_norm": 5.6270012855529785, "learning_rate": 1.8998753117206985e-05, "loss": 0.4246, "step": 4020 }, { "epoch": 1.0049875311720697, "grad_norm": 9.492588996887207, "learning_rate": 1.8996259351620948e-05, "loss": 0.5515, "step": 4030 }, { "epoch": 1.0074812967581048, "grad_norm": 4.804108142852783, "learning_rate": 1.8993765586034915e-05, "loss": 0.4456, "step": 4040 }, { "epoch": 1.0099750623441397, "grad_norm": 4.964878082275391, "learning_rate": 1.899127182044888e-05, "loss": 0.418, "step": 4050 }, { "epoch": 1.0124688279301746, "grad_norm": 4.545513153076172, "learning_rate": 1.8988778054862846e-05, "loss": 0.4251, "step": 4060 }, { "epoch": 1.0149625935162094, "grad_norm": 4.988970756530762, "learning_rate": 1.898628428927681e-05, "loss": 0.4635, "step": 4070 }, { "epoch": 1.0174563591022443, "grad_norm": 3.7804431915283203, "learning_rate": 1.8983790523690776e-05, "loss": 0.4948, "step": 4080 }, { "epoch": 1.0199501246882794, "grad_norm": 6.862154960632324, "learning_rate": 1.898129675810474e-05, "loss": 0.471, "step": 4090 }, { "epoch": 1.0224438902743143, "grad_norm": 4.912306308746338, "learning_rate": 1.8978802992518707e-05, "loss": 0.4769, "step": 4100 }, { "epoch": 1.0249376558603491, "grad_norm": 6.113458633422852, "learning_rate": 1.897630922693267e-05, "loss": 0.4927, "step": 4110 }, { "epoch": 1.027431421446384, "grad_norm": 7.236551761627197, "learning_rate": 1.8973815461346634e-05, "loss": 0.4274, "step": 4120 }, { "epoch": 1.0299251870324189, "grad_norm": 5.630270481109619, "learning_rate": 1.89713216957606e-05, "loss": 0.4913, "step": 4130 }, { "epoch": 1.032418952618454, "grad_norm": 7.250363349914551, "learning_rate": 1.8968827930174565e-05, "loss": 0.3752, "step": 4140 }, { "epoch": 1.0349127182044888, "grad_norm": 4.857931137084961, "learning_rate": 1.8966334164588532e-05, "loss": 0.4775, "step": 4150 }, { "epoch": 1.0374064837905237, "grad_norm": 4.707160949707031, "learning_rate": 1.8963840399002495e-05, "loss": 0.5688, "step": 4160 }, { "epoch": 1.0399002493765586, "grad_norm": 6.663334846496582, "learning_rate": 1.896134663341646e-05, "loss": 0.4619, "step": 4170 }, { "epoch": 1.0423940149625934, "grad_norm": 8.353853225708008, "learning_rate": 1.8958852867830426e-05, "loss": 0.4525, "step": 4180 }, { "epoch": 1.0448877805486285, "grad_norm": 5.910696506500244, "learning_rate": 1.895635910224439e-05, "loss": 0.4344, "step": 4190 }, { "epoch": 1.0473815461346634, "grad_norm": 5.698975086212158, "learning_rate": 1.8953865336658353e-05, "loss": 0.4816, "step": 4200 }, { "epoch": 1.0498753117206983, "grad_norm": 9.77688217163086, "learning_rate": 1.895137157107232e-05, "loss": 0.417, "step": 4210 }, { "epoch": 1.0523690773067331, "grad_norm": 4.210202693939209, "learning_rate": 1.8948877805486284e-05, "loss": 0.5169, "step": 4220 }, { "epoch": 1.054862842892768, "grad_norm": 4.911707401275635, "learning_rate": 1.894638403990025e-05, "loss": 0.4293, "step": 4230 }, { "epoch": 1.057356608478803, "grad_norm": 5.228448867797852, "learning_rate": 1.8943890274314215e-05, "loss": 0.429, "step": 4240 }, { "epoch": 1.059850374064838, "grad_norm": 5.143721103668213, "learning_rate": 1.894139650872818e-05, "loss": 0.4066, "step": 4250 }, { "epoch": 1.0623441396508728, "grad_norm": 7.817266464233398, "learning_rate": 1.8938902743142145e-05, "loss": 0.6464, "step": 4260 }, { "epoch": 1.0648379052369077, "grad_norm": 5.188505172729492, "learning_rate": 1.8936408977556112e-05, "loss": 0.4417, "step": 4270 }, { "epoch": 1.0673316708229426, "grad_norm": 6.084823131561279, "learning_rate": 1.8933915211970076e-05, "loss": 0.47, "step": 4280 }, { "epoch": 1.0698254364089776, "grad_norm": 4.737970352172852, "learning_rate": 1.8931421446384043e-05, "loss": 0.5196, "step": 4290 }, { "epoch": 1.0723192019950125, "grad_norm": 6.13934326171875, "learning_rate": 1.8928927680798006e-05, "loss": 0.3904, "step": 4300 }, { "epoch": 1.0748129675810474, "grad_norm": 7.485795497894287, "learning_rate": 1.8926433915211973e-05, "loss": 0.4556, "step": 4310 }, { "epoch": 1.0773067331670823, "grad_norm": 6.431264877319336, "learning_rate": 1.8923940149625937e-05, "loss": 0.4713, "step": 4320 }, { "epoch": 1.0798004987531171, "grad_norm": 3.5695624351501465, "learning_rate": 1.89214463840399e-05, "loss": 0.4109, "step": 4330 }, { "epoch": 1.0822942643391522, "grad_norm": 4.416280746459961, "learning_rate": 1.8918952618453868e-05, "loss": 0.4734, "step": 4340 }, { "epoch": 1.084788029925187, "grad_norm": 4.557003021240234, "learning_rate": 1.891645885286783e-05, "loss": 0.4291, "step": 4350 }, { "epoch": 1.087281795511222, "grad_norm": 7.2662272453308105, "learning_rate": 1.8913965087281798e-05, "loss": 0.4176, "step": 4360 }, { "epoch": 1.0897755610972568, "grad_norm": 7.033773422241211, "learning_rate": 1.8911471321695762e-05, "loss": 0.4534, "step": 4370 }, { "epoch": 1.0922693266832917, "grad_norm": 4.475677490234375, "learning_rate": 1.8908977556109726e-05, "loss": 0.4394, "step": 4380 }, { "epoch": 1.0947630922693268, "grad_norm": 4.8636698722839355, "learning_rate": 1.8906483790523693e-05, "loss": 0.4371, "step": 4390 }, { "epoch": 1.0972568578553616, "grad_norm": 5.832028865814209, "learning_rate": 1.8903990024937656e-05, "loss": 0.444, "step": 4400 }, { "epoch": 1.0997506234413965, "grad_norm": 7.826235771179199, "learning_rate": 1.8901496259351623e-05, "loss": 0.5365, "step": 4410 }, { "epoch": 1.1022443890274314, "grad_norm": 6.200789928436279, "learning_rate": 1.8899002493765587e-05, "loss": 0.4081, "step": 4420 }, { "epoch": 1.1047381546134662, "grad_norm": 5.708020210266113, "learning_rate": 1.8896508728179554e-05, "loss": 0.5594, "step": 4430 }, { "epoch": 1.1072319201995013, "grad_norm": 5.138545513153076, "learning_rate": 1.8894014962593517e-05, "loss": 0.4128, "step": 4440 }, { "epoch": 1.1097256857855362, "grad_norm": 5.702857971191406, "learning_rate": 1.8891521197007484e-05, "loss": 0.4568, "step": 4450 }, { "epoch": 1.112219451371571, "grad_norm": 5.061423301696777, "learning_rate": 1.8889027431421448e-05, "loss": 0.4298, "step": 4460 }, { "epoch": 1.114713216957606, "grad_norm": 4.579878330230713, "learning_rate": 1.8886533665835415e-05, "loss": 0.3446, "step": 4470 }, { "epoch": 1.1172069825436408, "grad_norm": 4.493147373199463, "learning_rate": 1.888403990024938e-05, "loss": 0.4652, "step": 4480 }, { "epoch": 1.119700748129676, "grad_norm": 3.8753387928009033, "learning_rate": 1.8881546134663342e-05, "loss": 0.4559, "step": 4490 }, { "epoch": 1.1221945137157108, "grad_norm": 5.9976325035095215, "learning_rate": 1.887905236907731e-05, "loss": 0.4102, "step": 4500 }, { "epoch": 1.1246882793017456, "grad_norm": 7.784470081329346, "learning_rate": 1.8876558603491273e-05, "loss": 0.4989, "step": 4510 }, { "epoch": 1.1271820448877805, "grad_norm": 5.6073737144470215, "learning_rate": 1.887406483790524e-05, "loss": 0.3927, "step": 4520 }, { "epoch": 1.1296758104738154, "grad_norm": 11.797880172729492, "learning_rate": 1.8871571072319203e-05, "loss": 0.5698, "step": 4530 }, { "epoch": 1.1321695760598505, "grad_norm": 10.158886909484863, "learning_rate": 1.8869077306733167e-05, "loss": 0.3766, "step": 4540 }, { "epoch": 1.1346633416458853, "grad_norm": 6.6217875480651855, "learning_rate": 1.8866583541147134e-05, "loss": 0.46, "step": 4550 }, { "epoch": 1.1371571072319202, "grad_norm": 5.993943691253662, "learning_rate": 1.8864089775561098e-05, "loss": 0.512, "step": 4560 }, { "epoch": 1.139650872817955, "grad_norm": 5.103926181793213, "learning_rate": 1.886159600997506e-05, "loss": 0.4591, "step": 4570 }, { "epoch": 1.14214463840399, "grad_norm": 6.188269138336182, "learning_rate": 1.885910224438903e-05, "loss": 0.4811, "step": 4580 }, { "epoch": 1.144638403990025, "grad_norm": 3.5611369609832764, "learning_rate": 1.8856608478802992e-05, "loss": 0.4138, "step": 4590 }, { "epoch": 1.14713216957606, "grad_norm": 6.32553768157959, "learning_rate": 1.885411471321696e-05, "loss": 0.4718, "step": 4600 }, { "epoch": 1.1496259351620948, "grad_norm": 5.74522590637207, "learning_rate": 1.8851620947630923e-05, "loss": 0.4678, "step": 4610 }, { "epoch": 1.1521197007481296, "grad_norm": 3.8611743450164795, "learning_rate": 1.884912718204489e-05, "loss": 0.361, "step": 4620 }, { "epoch": 1.1546134663341645, "grad_norm": 5.439576625823975, "learning_rate": 1.8846633416458857e-05, "loss": 0.4243, "step": 4630 }, { "epoch": 1.1571072319201996, "grad_norm": 6.206503391265869, "learning_rate": 1.884413965087282e-05, "loss": 0.4304, "step": 4640 }, { "epoch": 1.1596009975062345, "grad_norm": 10.82448959350586, "learning_rate": 1.8841645885286787e-05, "loss": 0.5127, "step": 4650 }, { "epoch": 1.1620947630922693, "grad_norm": 5.439650535583496, "learning_rate": 1.883915211970075e-05, "loss": 0.4592, "step": 4660 }, { "epoch": 1.1645885286783042, "grad_norm": 4.041094779968262, "learning_rate": 1.8836658354114714e-05, "loss": 0.4481, "step": 4670 }, { "epoch": 1.167082294264339, "grad_norm": 4.277478218078613, "learning_rate": 1.883416458852868e-05, "loss": 0.4371, "step": 4680 }, { "epoch": 1.1695760598503742, "grad_norm": 9.352392196655273, "learning_rate": 1.8831670822942645e-05, "loss": 0.4469, "step": 4690 }, { "epoch": 1.172069825436409, "grad_norm": 6.215358257293701, "learning_rate": 1.882917705735661e-05, "loss": 0.4068, "step": 4700 }, { "epoch": 1.174563591022444, "grad_norm": 8.063887596130371, "learning_rate": 1.8826683291770576e-05, "loss": 0.4997, "step": 4710 }, { "epoch": 1.1770573566084788, "grad_norm": 9.79735279083252, "learning_rate": 1.882418952618454e-05, "loss": 0.4281, "step": 4720 }, { "epoch": 1.1795511221945136, "grad_norm": 8.45605182647705, "learning_rate": 1.8821695760598506e-05, "loss": 0.4207, "step": 4730 }, { "epoch": 1.1820448877805487, "grad_norm": 5.719277858734131, "learning_rate": 1.881920199501247e-05, "loss": 0.4669, "step": 4740 }, { "epoch": 1.1845386533665836, "grad_norm": 5.201423645019531, "learning_rate": 1.8816708229426434e-05, "loss": 0.4345, "step": 4750 }, { "epoch": 1.1870324189526185, "grad_norm": 4.76845121383667, "learning_rate": 1.88142144638404e-05, "loss": 0.4489, "step": 4760 }, { "epoch": 1.1895261845386533, "grad_norm": 7.2256622314453125, "learning_rate": 1.8811720698254364e-05, "loss": 0.4855, "step": 4770 }, { "epoch": 1.1920199501246882, "grad_norm": 6.201477527618408, "learning_rate": 1.880922693266833e-05, "loss": 0.4176, "step": 4780 }, { "epoch": 1.1945137157107233, "grad_norm": 7.298521995544434, "learning_rate": 1.8806733167082295e-05, "loss": 0.4582, "step": 4790 }, { "epoch": 1.1970074812967582, "grad_norm": 6.724678993225098, "learning_rate": 1.8804239401496262e-05, "loss": 0.502, "step": 4800 }, { "epoch": 1.199501246882793, "grad_norm": 4.230656623840332, "learning_rate": 1.8801745635910225e-05, "loss": 0.4097, "step": 4810 }, { "epoch": 1.201995012468828, "grad_norm": 4.771544456481934, "learning_rate": 1.8799251870324192e-05, "loss": 0.4729, "step": 4820 }, { "epoch": 1.2044887780548628, "grad_norm": 5.5048651695251465, "learning_rate": 1.8796758104738156e-05, "loss": 0.4848, "step": 4830 }, { "epoch": 1.2069825436408976, "grad_norm": 10.239465713500977, "learning_rate": 1.8794264339152123e-05, "loss": 0.3863, "step": 4840 }, { "epoch": 1.2094763092269327, "grad_norm": 6.2510986328125, "learning_rate": 1.8791770573566087e-05, "loss": 0.4414, "step": 4850 }, { "epoch": 1.2119700748129676, "grad_norm": 4.370057106018066, "learning_rate": 1.8789276807980054e-05, "loss": 0.4307, "step": 4860 }, { "epoch": 1.2144638403990025, "grad_norm": 6.856778621673584, "learning_rate": 1.8786783042394017e-05, "loss": 0.5257, "step": 4870 }, { "epoch": 1.2169576059850373, "grad_norm": 3.6856114864349365, "learning_rate": 1.878428927680798e-05, "loss": 0.433, "step": 4880 }, { "epoch": 1.2194513715710724, "grad_norm": 4.310407638549805, "learning_rate": 1.8781795511221948e-05, "loss": 0.4211, "step": 4890 }, { "epoch": 1.2219451371571073, "grad_norm": 4.885754585266113, "learning_rate": 1.877930174563591e-05, "loss": 0.3955, "step": 4900 }, { "epoch": 1.2244389027431422, "grad_norm": 5.621823787689209, "learning_rate": 1.8776807980049875e-05, "loss": 0.4374, "step": 4910 }, { "epoch": 1.226932668329177, "grad_norm": 8.302716255187988, "learning_rate": 1.8774314214463842e-05, "loss": 0.4245, "step": 4920 }, { "epoch": 1.229426433915212, "grad_norm": 5.974333763122559, "learning_rate": 1.8771820448877806e-05, "loss": 0.547, "step": 4930 }, { "epoch": 1.2319201995012468, "grad_norm": 6.3131303787231445, "learning_rate": 1.8769326683291773e-05, "loss": 0.4662, "step": 4940 }, { "epoch": 1.2344139650872819, "grad_norm": 5.7204389572143555, "learning_rate": 1.8766832917705736e-05, "loss": 0.5053, "step": 4950 }, { "epoch": 1.2369077306733167, "grad_norm": 4.9055047035217285, "learning_rate": 1.87643391521197e-05, "loss": 0.5201, "step": 4960 }, { "epoch": 1.2394014962593516, "grad_norm": 3.9648923873901367, "learning_rate": 1.8761845386533667e-05, "loss": 0.4802, "step": 4970 }, { "epoch": 1.2418952618453865, "grad_norm": 7.1016364097595215, "learning_rate": 1.8759351620947634e-05, "loss": 0.4384, "step": 4980 }, { "epoch": 1.2443890274314215, "grad_norm": 4.486512184143066, "learning_rate": 1.8756857855361598e-05, "loss": 0.4456, "step": 4990 }, { "epoch": 1.2468827930174564, "grad_norm": 5.411483287811279, "learning_rate": 1.8754364089775565e-05, "loss": 0.4317, "step": 5000 }, { "epoch": 1.2493765586034913, "grad_norm": 6.580456733703613, "learning_rate": 1.8751870324189528e-05, "loss": 0.4559, "step": 5010 }, { "epoch": 1.2518703241895262, "grad_norm": 3.885417938232422, "learning_rate": 1.8749376558603495e-05, "loss": 0.4265, "step": 5020 }, { "epoch": 1.254364089775561, "grad_norm": 5.22758150100708, "learning_rate": 1.874688279301746e-05, "loss": 0.4173, "step": 5030 }, { "epoch": 1.2568578553615959, "grad_norm": 6.1364827156066895, "learning_rate": 1.8744389027431422e-05, "loss": 0.4624, "step": 5040 }, { "epoch": 1.259351620947631, "grad_norm": 6.770162105560303, "learning_rate": 1.874189526184539e-05, "loss": 0.4087, "step": 5050 }, { "epoch": 1.2618453865336658, "grad_norm": 6.774170875549316, "learning_rate": 1.8739401496259353e-05, "loss": 0.4163, "step": 5060 }, { "epoch": 1.2643391521197007, "grad_norm": 5.055146217346191, "learning_rate": 1.8736907730673317e-05, "loss": 0.4277, "step": 5070 }, { "epoch": 1.2668329177057356, "grad_norm": 4.696500301361084, "learning_rate": 1.8734413965087284e-05, "loss": 0.3918, "step": 5080 }, { "epoch": 1.2693266832917707, "grad_norm": 5.685618877410889, "learning_rate": 1.8731920199501247e-05, "loss": 0.5113, "step": 5090 }, { "epoch": 1.2718204488778055, "grad_norm": 4.808736324310303, "learning_rate": 1.8729426433915214e-05, "loss": 0.4891, "step": 5100 }, { "epoch": 1.2743142144638404, "grad_norm": 8.052289009094238, "learning_rate": 1.8726932668329178e-05, "loss": 0.4433, "step": 5110 }, { "epoch": 1.2768079800498753, "grad_norm": 6.9519853591918945, "learning_rate": 1.872443890274314e-05, "loss": 0.4916, "step": 5120 }, { "epoch": 1.2793017456359101, "grad_norm": 4.836465358734131, "learning_rate": 1.872194513715711e-05, "loss": 0.4411, "step": 5130 }, { "epoch": 1.281795511221945, "grad_norm": 4.297245025634766, "learning_rate": 1.8719451371571072e-05, "loss": 0.4697, "step": 5140 }, { "epoch": 1.28428927680798, "grad_norm": 5.085397720336914, "learning_rate": 1.871695760598504e-05, "loss": 0.4445, "step": 5150 }, { "epoch": 1.286783042394015, "grad_norm": 4.748317241668701, "learning_rate": 1.8714463840399003e-05, "loss": 0.4488, "step": 5160 }, { "epoch": 1.2892768079800498, "grad_norm": 5.959413528442383, "learning_rate": 1.871197007481297e-05, "loss": 0.4648, "step": 5170 }, { "epoch": 1.2917705735660847, "grad_norm": 11.551535606384277, "learning_rate": 1.8709476309226933e-05, "loss": 0.445, "step": 5180 }, { "epoch": 1.2942643391521198, "grad_norm": 6.094724655151367, "learning_rate": 1.87069825436409e-05, "loss": 0.4175, "step": 5190 }, { "epoch": 1.2967581047381547, "grad_norm": 9.543522834777832, "learning_rate": 1.8704488778054864e-05, "loss": 0.5068, "step": 5200 }, { "epoch": 1.2992518703241895, "grad_norm": 6.417718410491943, "learning_rate": 1.870199501246883e-05, "loss": 0.4341, "step": 5210 }, { "epoch": 1.3017456359102244, "grad_norm": 4.876067638397217, "learning_rate": 1.8699501246882795e-05, "loss": 0.5283, "step": 5220 }, { "epoch": 1.3042394014962593, "grad_norm": 6.962366580963135, "learning_rate": 1.869700748129676e-05, "loss": 0.5235, "step": 5230 }, { "epoch": 1.3067331670822941, "grad_norm": 6.60381555557251, "learning_rate": 1.8694513715710725e-05, "loss": 0.5055, "step": 5240 }, { "epoch": 1.3092269326683292, "grad_norm": 4.449953079223633, "learning_rate": 1.869201995012469e-05, "loss": 0.5745, "step": 5250 }, { "epoch": 1.311720698254364, "grad_norm": 6.488658905029297, "learning_rate": 1.8689526184538656e-05, "loss": 0.4261, "step": 5260 }, { "epoch": 1.314214463840399, "grad_norm": 4.1007795333862305, "learning_rate": 1.868703241895262e-05, "loss": 0.3836, "step": 5270 }, { "epoch": 1.3167082294264338, "grad_norm": 7.238208293914795, "learning_rate": 1.8684538653366583e-05, "loss": 0.5217, "step": 5280 }, { "epoch": 1.319201995012469, "grad_norm": 5.4013590812683105, "learning_rate": 1.868204488778055e-05, "loss": 0.4387, "step": 5290 }, { "epoch": 1.3216957605985038, "grad_norm": 8.315073013305664, "learning_rate": 1.8679551122194514e-05, "loss": 0.4616, "step": 5300 }, { "epoch": 1.3241895261845387, "grad_norm": 3.4126434326171875, "learning_rate": 1.867705735660848e-05, "loss": 0.4431, "step": 5310 }, { "epoch": 1.3266832917705735, "grad_norm": 7.5681681632995605, "learning_rate": 1.8674563591022444e-05, "loss": 0.5109, "step": 5320 }, { "epoch": 1.3291770573566084, "grad_norm": 5.397674560546875, "learning_rate": 1.867206982543641e-05, "loss": 0.4732, "step": 5330 }, { "epoch": 1.3316708229426433, "grad_norm": 5.570555686950684, "learning_rate": 1.8669576059850375e-05, "loss": 0.4385, "step": 5340 }, { "epoch": 1.3341645885286784, "grad_norm": 6.722954273223877, "learning_rate": 1.8667082294264342e-05, "loss": 0.4784, "step": 5350 }, { "epoch": 1.3366583541147132, "grad_norm": 4.577895164489746, "learning_rate": 1.8664588528678306e-05, "loss": 0.4526, "step": 5360 }, { "epoch": 1.339152119700748, "grad_norm": 5.092601776123047, "learning_rate": 1.8662094763092273e-05, "loss": 0.37, "step": 5370 }, { "epoch": 1.341645885286783, "grad_norm": 6.236178398132324, "learning_rate": 1.8659600997506236e-05, "loss": 0.4303, "step": 5380 }, { "epoch": 1.344139650872818, "grad_norm": 4.475374698638916, "learning_rate": 1.8657107231920203e-05, "loss": 0.4483, "step": 5390 }, { "epoch": 1.346633416458853, "grad_norm": 4.651284217834473, "learning_rate": 1.8654613466334167e-05, "loss": 0.431, "step": 5400 }, { "epoch": 1.3491271820448878, "grad_norm": 4.801163673400879, "learning_rate": 1.865211970074813e-05, "loss": 0.4817, "step": 5410 }, { "epoch": 1.3516209476309227, "grad_norm": 6.051980972290039, "learning_rate": 1.8649625935162097e-05, "loss": 0.4447, "step": 5420 }, { "epoch": 1.3541147132169575, "grad_norm": 6.429530143737793, "learning_rate": 1.864713216957606e-05, "loss": 0.5124, "step": 5430 }, { "epoch": 1.3566084788029924, "grad_norm": 9.302543640136719, "learning_rate": 1.8644638403990028e-05, "loss": 0.4148, "step": 5440 }, { "epoch": 1.3591022443890275, "grad_norm": 6.575656414031982, "learning_rate": 1.864214463840399e-05, "loss": 0.4759, "step": 5450 }, { "epoch": 1.3615960099750624, "grad_norm": 6.7204084396362305, "learning_rate": 1.8639650872817955e-05, "loss": 0.4483, "step": 5460 }, { "epoch": 1.3640897755610972, "grad_norm": 9.02580451965332, "learning_rate": 1.8637157107231922e-05, "loss": 0.4601, "step": 5470 }, { "epoch": 1.366583541147132, "grad_norm": 4.408763885498047, "learning_rate": 1.8634663341645886e-05, "loss": 0.4205, "step": 5480 }, { "epoch": 1.3690773067331672, "grad_norm": 4.800736427307129, "learning_rate": 1.863216957605985e-05, "loss": 0.4998, "step": 5490 }, { "epoch": 1.371571072319202, "grad_norm": 7.011260509490967, "learning_rate": 1.8629675810473817e-05, "loss": 0.5127, "step": 5500 }, { "epoch": 1.374064837905237, "grad_norm": 4.573014259338379, "learning_rate": 1.862718204488778e-05, "loss": 0.4685, "step": 5510 }, { "epoch": 1.3765586034912718, "grad_norm": 6.397534370422363, "learning_rate": 1.8624688279301747e-05, "loss": 0.464, "step": 5520 }, { "epoch": 1.3790523690773067, "grad_norm": 4.248035907745361, "learning_rate": 1.862219451371571e-05, "loss": 0.4275, "step": 5530 }, { "epoch": 1.3815461346633415, "grad_norm": 4.859241008758545, "learning_rate": 1.8619700748129678e-05, "loss": 0.3563, "step": 5540 }, { "epoch": 1.3840399002493766, "grad_norm": 4.7854905128479, "learning_rate": 1.861720698254364e-05, "loss": 0.4193, "step": 5550 }, { "epoch": 1.3865336658354115, "grad_norm": 17.388259887695312, "learning_rate": 1.861471321695761e-05, "loss": 0.5405, "step": 5560 }, { "epoch": 1.3890274314214464, "grad_norm": 25.828964233398438, "learning_rate": 1.8612219451371572e-05, "loss": 0.4526, "step": 5570 }, { "epoch": 1.3915211970074812, "grad_norm": 12.78157901763916, "learning_rate": 1.860972568578554e-05, "loss": 0.45, "step": 5580 }, { "epoch": 1.3940149625935163, "grad_norm": 5.025924205780029, "learning_rate": 1.8607231920199503e-05, "loss": 0.4668, "step": 5590 }, { "epoch": 1.3965087281795512, "grad_norm": 3.784602403640747, "learning_rate": 1.860473815461347e-05, "loss": 0.4034, "step": 5600 }, { "epoch": 1.399002493765586, "grad_norm": 5.713232517242432, "learning_rate": 1.8602244389027433e-05, "loss": 0.4374, "step": 5610 }, { "epoch": 1.401496259351621, "grad_norm": 5.523266792297363, "learning_rate": 1.8599750623441397e-05, "loss": 0.4581, "step": 5620 }, { "epoch": 1.4039900249376558, "grad_norm": 12.597986221313477, "learning_rate": 1.8597256857855364e-05, "loss": 0.4511, "step": 5630 }, { "epoch": 1.4064837905236907, "grad_norm": 3.8616204261779785, "learning_rate": 1.8594763092269327e-05, "loss": 0.4322, "step": 5640 }, { "epoch": 1.4089775561097257, "grad_norm": 6.135002613067627, "learning_rate": 1.8592269326683294e-05, "loss": 0.4043, "step": 5650 }, { "epoch": 1.4114713216957606, "grad_norm": 4.191798210144043, "learning_rate": 1.8589775561097258e-05, "loss": 0.4508, "step": 5660 }, { "epoch": 1.4139650872817955, "grad_norm": 5.872029781341553, "learning_rate": 1.858728179551122e-05, "loss": 0.4515, "step": 5670 }, { "epoch": 1.4164588528678304, "grad_norm": 7.221251010894775, "learning_rate": 1.858478802992519e-05, "loss": 0.4471, "step": 5680 }, { "epoch": 1.4189526184538654, "grad_norm": 3.4195024967193604, "learning_rate": 1.8582294264339152e-05, "loss": 0.3757, "step": 5690 }, { "epoch": 1.4214463840399003, "grad_norm": 4.992902755737305, "learning_rate": 1.857980049875312e-05, "loss": 0.4098, "step": 5700 }, { "epoch": 1.4239401496259352, "grad_norm": 7.851417064666748, "learning_rate": 1.8577306733167083e-05, "loss": 0.54, "step": 5710 }, { "epoch": 1.42643391521197, "grad_norm": 5.539144992828369, "learning_rate": 1.857481296758105e-05, "loss": 0.4191, "step": 5720 }, { "epoch": 1.428927680798005, "grad_norm": 9.11573314666748, "learning_rate": 1.8572319201995014e-05, "loss": 0.4237, "step": 5730 }, { "epoch": 1.4314214463840398, "grad_norm": 5.815394401550293, "learning_rate": 1.856982543640898e-05, "loss": 0.4564, "step": 5740 }, { "epoch": 1.4339152119700749, "grad_norm": 4.174201011657715, "learning_rate": 1.8567331670822944e-05, "loss": 0.4493, "step": 5750 }, { "epoch": 1.4364089775561097, "grad_norm": 7.393326759338379, "learning_rate": 1.856483790523691e-05, "loss": 0.4579, "step": 5760 }, { "epoch": 1.4389027431421446, "grad_norm": 4.805573463439941, "learning_rate": 1.8562344139650875e-05, "loss": 0.4525, "step": 5770 }, { "epoch": 1.4413965087281795, "grad_norm": 4.3430495262146, "learning_rate": 1.855985037406484e-05, "loss": 0.4323, "step": 5780 }, { "epoch": 1.4438902743142146, "grad_norm": 4.935792922973633, "learning_rate": 1.8557356608478805e-05, "loss": 0.4649, "step": 5790 }, { "epoch": 1.4463840399002494, "grad_norm": 3.4280688762664795, "learning_rate": 1.855486284289277e-05, "loss": 0.4597, "step": 5800 }, { "epoch": 1.4488778054862843, "grad_norm": 5.937455654144287, "learning_rate": 1.8552369077306736e-05, "loss": 0.4628, "step": 5810 }, { "epoch": 1.4513715710723192, "grad_norm": 4.315957069396973, "learning_rate": 1.85498753117207e-05, "loss": 0.3815, "step": 5820 }, { "epoch": 1.453865336658354, "grad_norm": 6.386058330535889, "learning_rate": 1.8547381546134663e-05, "loss": 0.5149, "step": 5830 }, { "epoch": 1.456359102244389, "grad_norm": 7.211171627044678, "learning_rate": 1.854488778054863e-05, "loss": 0.399, "step": 5840 }, { "epoch": 1.458852867830424, "grad_norm": 6.84633207321167, "learning_rate": 1.8542394014962594e-05, "loss": 0.5866, "step": 5850 }, { "epoch": 1.4613466334164589, "grad_norm": 5.431811332702637, "learning_rate": 1.853990024937656e-05, "loss": 0.4732, "step": 5860 }, { "epoch": 1.4638403990024937, "grad_norm": 4.845829486846924, "learning_rate": 1.8537406483790524e-05, "loss": 0.4998, "step": 5870 }, { "epoch": 1.4663341645885286, "grad_norm": 5.635951042175293, "learning_rate": 1.8534912718204488e-05, "loss": 0.3866, "step": 5880 }, { "epoch": 1.4688279301745637, "grad_norm": 8.562129974365234, "learning_rate": 1.8532418952618455e-05, "loss": 0.452, "step": 5890 }, { "epoch": 1.4713216957605986, "grad_norm": 5.379266738891602, "learning_rate": 1.852992518703242e-05, "loss": 0.4434, "step": 5900 }, { "epoch": 1.4738154613466334, "grad_norm": 3.9054388999938965, "learning_rate": 1.8527431421446386e-05, "loss": 0.4882, "step": 5910 }, { "epoch": 1.4763092269326683, "grad_norm": 7.7161383628845215, "learning_rate": 1.8524937655860353e-05, "loss": 0.4105, "step": 5920 }, { "epoch": 1.4788029925187032, "grad_norm": 5.12331485748291, "learning_rate": 1.8522443890274316e-05, "loss": 0.4601, "step": 5930 }, { "epoch": 1.481296758104738, "grad_norm": 7.548189640045166, "learning_rate": 1.8519950124688283e-05, "loss": 0.4507, "step": 5940 }, { "epoch": 1.4837905236907731, "grad_norm": 3.955315589904785, "learning_rate": 1.8517456359102247e-05, "loss": 0.4234, "step": 5950 }, { "epoch": 1.486284289276808, "grad_norm": 6.11161470413208, "learning_rate": 1.851496259351621e-05, "loss": 0.4254, "step": 5960 }, { "epoch": 1.4887780548628429, "grad_norm": 8.937747955322266, "learning_rate": 1.8512468827930178e-05, "loss": 0.4604, "step": 5970 }, { "epoch": 1.4912718204488777, "grad_norm": 13.255558967590332, "learning_rate": 1.850997506234414e-05, "loss": 0.4284, "step": 5980 }, { "epoch": 1.4937655860349128, "grad_norm": 6.2703776359558105, "learning_rate": 1.8507481296758105e-05, "loss": 0.3886, "step": 5990 }, { "epoch": 1.4962593516209477, "grad_norm": 6.531227111816406, "learning_rate": 1.8504987531172072e-05, "loss": 0.4601, "step": 6000 }, { "epoch": 1.4987531172069826, "grad_norm": 5.363514423370361, "learning_rate": 1.8502493765586035e-05, "loss": 0.5012, "step": 6010 }, { "epoch": 1.5012468827930174, "grad_norm": 3.8353939056396484, "learning_rate": 1.8500000000000002e-05, "loss": 0.4911, "step": 6020 }, { "epoch": 1.5037406483790523, "grad_norm": 5.923316955566406, "learning_rate": 1.8497506234413966e-05, "loss": 0.3982, "step": 6030 }, { "epoch": 1.5062344139650872, "grad_norm": 4.840806484222412, "learning_rate": 1.849501246882793e-05, "loss": 0.4035, "step": 6040 }, { "epoch": 1.508728179551122, "grad_norm": 4.171060085296631, "learning_rate": 1.8492518703241897e-05, "loss": 0.515, "step": 6050 }, { "epoch": 1.5112219451371571, "grad_norm": 7.2029876708984375, "learning_rate": 1.849002493765586e-05, "loss": 0.4485, "step": 6060 }, { "epoch": 1.513715710723192, "grad_norm": 4.763010025024414, "learning_rate": 1.8487531172069827e-05, "loss": 0.3859, "step": 6070 }, { "epoch": 1.516209476309227, "grad_norm": 4.51248836517334, "learning_rate": 1.848503740648379e-05, "loss": 0.4476, "step": 6080 }, { "epoch": 1.518703241895262, "grad_norm": 5.466489791870117, "learning_rate": 1.8482543640897758e-05, "loss": 0.4742, "step": 6090 }, { "epoch": 1.5211970074812968, "grad_norm": 6.138864994049072, "learning_rate": 1.848004987531172e-05, "loss": 0.4634, "step": 6100 }, { "epoch": 1.5236907730673317, "grad_norm": 4.4593892097473145, "learning_rate": 1.847755610972569e-05, "loss": 0.4097, "step": 6110 }, { "epoch": 1.5261845386533666, "grad_norm": 5.734845161437988, "learning_rate": 1.8475062344139652e-05, "loss": 0.4536, "step": 6120 }, { "epoch": 1.5286783042394014, "grad_norm": 5.039837837219238, "learning_rate": 1.847256857855362e-05, "loss": 0.4227, "step": 6130 }, { "epoch": 1.5311720698254363, "grad_norm": 5.665532112121582, "learning_rate": 1.8470074812967583e-05, "loss": 0.3996, "step": 6140 }, { "epoch": 1.5336658354114712, "grad_norm": 8.553827285766602, "learning_rate": 1.846758104738155e-05, "loss": 0.5689, "step": 6150 }, { "epoch": 1.5361596009975063, "grad_norm": 10.087984085083008, "learning_rate": 1.8465087281795513e-05, "loss": 0.3943, "step": 6160 }, { "epoch": 1.5386533665835411, "grad_norm": 4.186636924743652, "learning_rate": 1.8462593516209477e-05, "loss": 0.3991, "step": 6170 }, { "epoch": 1.5411471321695762, "grad_norm": 6.2279839515686035, "learning_rate": 1.8460099750623444e-05, "loss": 0.4065, "step": 6180 }, { "epoch": 1.543640897755611, "grad_norm": 4.494476795196533, "learning_rate": 1.8457605985037408e-05, "loss": 0.3763, "step": 6190 }, { "epoch": 1.546134663341646, "grad_norm": 4.94999885559082, "learning_rate": 1.845511221945137e-05, "loss": 0.4988, "step": 6200 }, { "epoch": 1.5486284289276808, "grad_norm": 5.871788024902344, "learning_rate": 1.8452618453865338e-05, "loss": 0.4231, "step": 6210 }, { "epoch": 1.5511221945137157, "grad_norm": 5.616420269012451, "learning_rate": 1.8450124688279302e-05, "loss": 0.372, "step": 6220 }, { "epoch": 1.5536159600997506, "grad_norm": 4.9915971755981445, "learning_rate": 1.844763092269327e-05, "loss": 0.3971, "step": 6230 }, { "epoch": 1.5561097256857854, "grad_norm": 4.821728229522705, "learning_rate": 1.8445137157107232e-05, "loss": 0.4182, "step": 6240 }, { "epoch": 1.5586034912718203, "grad_norm": 8.948670387268066, "learning_rate": 1.8442643391521196e-05, "loss": 0.4545, "step": 6250 }, { "epoch": 1.5610972568578554, "grad_norm": 6.1959228515625, "learning_rate": 1.8440149625935163e-05, "loss": 0.397, "step": 6260 }, { "epoch": 1.5635910224438903, "grad_norm": 4.38476037979126, "learning_rate": 1.843765586034913e-05, "loss": 0.3934, "step": 6270 }, { "epoch": 1.5660847880299253, "grad_norm": 6.135573387145996, "learning_rate": 1.8435162094763094e-05, "loss": 0.4019, "step": 6280 }, { "epoch": 1.5685785536159602, "grad_norm": 3.752264976501465, "learning_rate": 1.843266832917706e-05, "loss": 0.4041, "step": 6290 }, { "epoch": 1.571072319201995, "grad_norm": 6.69175386428833, "learning_rate": 1.8430174563591024e-05, "loss": 0.3357, "step": 6300 }, { "epoch": 1.57356608478803, "grad_norm": 3.5837090015411377, "learning_rate": 1.842768079800499e-05, "loss": 0.4172, "step": 6310 }, { "epoch": 1.5760598503740648, "grad_norm": 4.377597332000732, "learning_rate": 1.8425187032418955e-05, "loss": 0.48, "step": 6320 }, { "epoch": 1.5785536159600997, "grad_norm": 6.266040325164795, "learning_rate": 1.842269326683292e-05, "loss": 0.5284, "step": 6330 }, { "epoch": 1.5810473815461346, "grad_norm": 6.752136707305908, "learning_rate": 1.8420199501246886e-05, "loss": 0.4682, "step": 6340 }, { "epoch": 1.5835411471321694, "grad_norm": 4.733704090118408, "learning_rate": 1.841770573566085e-05, "loss": 0.345, "step": 6350 }, { "epoch": 1.5860349127182045, "grad_norm": 6.0275397300720215, "learning_rate": 1.8415211970074816e-05, "loss": 0.5216, "step": 6360 }, { "epoch": 1.5885286783042394, "grad_norm": 4.750929832458496, "learning_rate": 1.841271820448878e-05, "loss": 0.4312, "step": 6370 }, { "epoch": 1.5910224438902745, "grad_norm": 3.941704511642456, "learning_rate": 1.8410224438902743e-05, "loss": 0.4652, "step": 6380 }, { "epoch": 1.5935162094763093, "grad_norm": 6.078151226043701, "learning_rate": 1.840773067331671e-05, "loss": 0.4478, "step": 6390 }, { "epoch": 1.5960099750623442, "grad_norm": 5.187417030334473, "learning_rate": 1.8405236907730674e-05, "loss": 0.4094, "step": 6400 }, { "epoch": 1.598503740648379, "grad_norm": 6.688471794128418, "learning_rate": 1.8402743142144638e-05, "loss": 0.4707, "step": 6410 }, { "epoch": 1.600997506234414, "grad_norm": 4.863025188446045, "learning_rate": 1.8400249376558605e-05, "loss": 0.4326, "step": 6420 }, { "epoch": 1.6034912718204488, "grad_norm": 6.477893352508545, "learning_rate": 1.8397755610972568e-05, "loss": 0.4071, "step": 6430 }, { "epoch": 1.6059850374064837, "grad_norm": 3.6920061111450195, "learning_rate": 1.8395261845386535e-05, "loss": 0.4258, "step": 6440 }, { "epoch": 1.6084788029925186, "grad_norm": 5.195054531097412, "learning_rate": 1.83927680798005e-05, "loss": 0.4388, "step": 6450 }, { "epoch": 1.6109725685785536, "grad_norm": 6.680331707000732, "learning_rate": 1.8390274314214466e-05, "loss": 0.4053, "step": 6460 }, { "epoch": 1.6134663341645885, "grad_norm": 4.663398742675781, "learning_rate": 1.838778054862843e-05, "loss": 0.457, "step": 6470 }, { "epoch": 1.6159600997506236, "grad_norm": 4.989374160766602, "learning_rate": 1.8385286783042397e-05, "loss": 0.451, "step": 6480 }, { "epoch": 1.6184538653366585, "grad_norm": 7.583066940307617, "learning_rate": 1.838279301745636e-05, "loss": 0.3729, "step": 6490 }, { "epoch": 1.6209476309226933, "grad_norm": 4.013853073120117, "learning_rate": 1.8380299251870327e-05, "loss": 0.4288, "step": 6500 }, { "epoch": 1.6234413965087282, "grad_norm": 4.080069065093994, "learning_rate": 1.837780548628429e-05, "loss": 0.3728, "step": 6510 }, { "epoch": 1.625935162094763, "grad_norm": 6.781628131866455, "learning_rate": 1.8375311720698258e-05, "loss": 0.4198, "step": 6520 }, { "epoch": 1.628428927680798, "grad_norm": 5.727022647857666, "learning_rate": 1.837281795511222e-05, "loss": 0.5096, "step": 6530 }, { "epoch": 1.6309226932668328, "grad_norm": 4.232419967651367, "learning_rate": 1.8370324189526185e-05, "loss": 0.4087, "step": 6540 }, { "epoch": 1.6334164588528677, "grad_norm": 6.57977294921875, "learning_rate": 1.8367830423940152e-05, "loss": 0.5512, "step": 6550 }, { "epoch": 1.6359102244389028, "grad_norm": 5.6456427574157715, "learning_rate": 1.8365336658354116e-05, "loss": 0.448, "step": 6560 }, { "epoch": 1.6384039900249376, "grad_norm": 5.072854518890381, "learning_rate": 1.8362842892768083e-05, "loss": 0.4374, "step": 6570 }, { "epoch": 1.6408977556109727, "grad_norm": 6.125659465789795, "learning_rate": 1.8360349127182046e-05, "loss": 0.4616, "step": 6580 }, { "epoch": 1.6433915211970076, "grad_norm": 5.4351301193237305, "learning_rate": 1.835785536159601e-05, "loss": 0.561, "step": 6590 }, { "epoch": 1.6458852867830425, "grad_norm": 5.115979194641113, "learning_rate": 1.8355361596009977e-05, "loss": 0.4454, "step": 6600 }, { "epoch": 1.6483790523690773, "grad_norm": 5.563187599182129, "learning_rate": 1.835286783042394e-05, "loss": 0.4437, "step": 6610 }, { "epoch": 1.6508728179551122, "grad_norm": 8.810982704162598, "learning_rate": 1.8350374064837907e-05, "loss": 0.472, "step": 6620 }, { "epoch": 1.653366583541147, "grad_norm": 6.700620174407959, "learning_rate": 1.834788029925187e-05, "loss": 0.4538, "step": 6630 }, { "epoch": 1.655860349127182, "grad_norm": 5.371676921844482, "learning_rate": 1.8345386533665838e-05, "loss": 0.4628, "step": 6640 }, { "epoch": 1.6583541147132168, "grad_norm": 6.457888126373291, "learning_rate": 1.8342892768079802e-05, "loss": 0.3982, "step": 6650 }, { "epoch": 1.660847880299252, "grad_norm": 3.3349645137786865, "learning_rate": 1.834039900249377e-05, "loss": 0.5806, "step": 6660 }, { "epoch": 1.6633416458852868, "grad_norm": 4.4449334144592285, "learning_rate": 1.8337905236907732e-05, "loss": 0.3955, "step": 6670 }, { "epoch": 1.6658354114713219, "grad_norm": 6.8672776222229, "learning_rate": 1.83354114713217e-05, "loss": 0.4466, "step": 6680 }, { "epoch": 1.6683291770573567, "grad_norm": 8.786575317382812, "learning_rate": 1.8332917705735663e-05, "loss": 0.4113, "step": 6690 }, { "epoch": 1.6708229426433916, "grad_norm": 5.177858352661133, "learning_rate": 1.8330423940149627e-05, "loss": 0.4509, "step": 6700 }, { "epoch": 1.6733167082294265, "grad_norm": 8.495447158813477, "learning_rate": 1.8327930174563594e-05, "loss": 0.4024, "step": 6710 }, { "epoch": 1.6758104738154613, "grad_norm": 3.8897151947021484, "learning_rate": 1.8325436408977557e-05, "loss": 0.4081, "step": 6720 }, { "epoch": 1.6783042394014962, "grad_norm": 7.075688362121582, "learning_rate": 1.8322942643391524e-05, "loss": 0.4058, "step": 6730 }, { "epoch": 1.680798004987531, "grad_norm": 5.916785717010498, "learning_rate": 1.8320448877805488e-05, "loss": 0.4553, "step": 6740 }, { "epoch": 1.683291770573566, "grad_norm": 6.220627307891846, "learning_rate": 1.831795511221945e-05, "loss": 0.3365, "step": 6750 }, { "epoch": 1.685785536159601, "grad_norm": 5.613668441772461, "learning_rate": 1.831546134663342e-05, "loss": 0.4703, "step": 6760 }, { "epoch": 1.688279301745636, "grad_norm": 5.4366841316223145, "learning_rate": 1.8312967581047382e-05, "loss": 0.4233, "step": 6770 }, { "epoch": 1.690773067331671, "grad_norm": 7.235218524932861, "learning_rate": 1.8310473815461346e-05, "loss": 0.4584, "step": 6780 }, { "epoch": 1.6932668329177059, "grad_norm": 5.918466567993164, "learning_rate": 1.8307980049875313e-05, "loss": 0.4717, "step": 6790 }, { "epoch": 1.6957605985037407, "grad_norm": 7.333720684051514, "learning_rate": 1.8305486284289276e-05, "loss": 0.4562, "step": 6800 }, { "epoch": 1.6982543640897756, "grad_norm": 4.598586082458496, "learning_rate": 1.8302992518703243e-05, "loss": 0.3797, "step": 6810 }, { "epoch": 1.7007481296758105, "grad_norm": 6.30976676940918, "learning_rate": 1.8300498753117207e-05, "loss": 0.5182, "step": 6820 }, { "epoch": 1.7032418952618453, "grad_norm": 5.384555339813232, "learning_rate": 1.8298004987531174e-05, "loss": 0.3699, "step": 6830 }, { "epoch": 1.7057356608478802, "grad_norm": 6.011791229248047, "learning_rate": 1.8295511221945138e-05, "loss": 0.4675, "step": 6840 }, { "epoch": 1.708229426433915, "grad_norm": 5.3825883865356445, "learning_rate": 1.8293017456359105e-05, "loss": 0.4217, "step": 6850 }, { "epoch": 1.7107231920199502, "grad_norm": 5.053082466125488, "learning_rate": 1.829052369077307e-05, "loss": 0.5013, "step": 6860 }, { "epoch": 1.713216957605985, "grad_norm": 3.9529480934143066, "learning_rate": 1.8288029925187035e-05, "loss": 0.5152, "step": 6870 }, { "epoch": 1.7157107231920201, "grad_norm": 7.375166416168213, "learning_rate": 1.8285536159601e-05, "loss": 0.3735, "step": 6880 }, { "epoch": 1.718204488778055, "grad_norm": 6.015711307525635, "learning_rate": 1.8283042394014966e-05, "loss": 0.3918, "step": 6890 }, { "epoch": 1.7206982543640899, "grad_norm": 4.770452499389648, "learning_rate": 1.828054862842893e-05, "loss": 0.4266, "step": 6900 }, { "epoch": 1.7231920199501247, "grad_norm": 4.618214130401611, "learning_rate": 1.8278054862842893e-05, "loss": 0.4717, "step": 6910 }, { "epoch": 1.7256857855361596, "grad_norm": 4.1790924072265625, "learning_rate": 1.827556109725686e-05, "loss": 0.4128, "step": 6920 }, { "epoch": 1.7281795511221945, "grad_norm": 6.608766078948975, "learning_rate": 1.8273067331670824e-05, "loss": 0.4591, "step": 6930 }, { "epoch": 1.7306733167082293, "grad_norm": 8.431743621826172, "learning_rate": 1.827057356608479e-05, "loss": 0.5575, "step": 6940 }, { "epoch": 1.7331670822942642, "grad_norm": 5.013129711151123, "learning_rate": 1.8268079800498754e-05, "loss": 0.3743, "step": 6950 }, { "epoch": 1.7356608478802993, "grad_norm": 4.251693248748779, "learning_rate": 1.8265586034912718e-05, "loss": 0.4133, "step": 6960 }, { "epoch": 1.7381546134663342, "grad_norm": 4.549345970153809, "learning_rate": 1.8263092269326685e-05, "loss": 0.5037, "step": 6970 }, { "epoch": 1.7406483790523692, "grad_norm": 6.431280612945557, "learning_rate": 1.826059850374065e-05, "loss": 0.3989, "step": 6980 }, { "epoch": 1.7431421446384041, "grad_norm": 5.792124271392822, "learning_rate": 1.8258104738154615e-05, "loss": 0.4512, "step": 6990 }, { "epoch": 1.745635910224439, "grad_norm": 8.254396438598633, "learning_rate": 1.825561097256858e-05, "loss": 0.4982, "step": 7000 }, { "epoch": 1.7481296758104738, "grad_norm": 4.77825927734375, "learning_rate": 1.8253117206982546e-05, "loss": 0.4053, "step": 7010 }, { "epoch": 1.7506234413965087, "grad_norm": 6.53203821182251, "learning_rate": 1.825062344139651e-05, "loss": 0.4921, "step": 7020 }, { "epoch": 1.7531172069825436, "grad_norm": 4.8709940910339355, "learning_rate": 1.8248129675810477e-05, "loss": 0.4883, "step": 7030 }, { "epoch": 1.7556109725685785, "grad_norm": 4.446110725402832, "learning_rate": 1.824563591022444e-05, "loss": 0.4667, "step": 7040 }, { "epoch": 1.7581047381546133, "grad_norm": 6.364370346069336, "learning_rate": 1.8243142144638407e-05, "loss": 0.5304, "step": 7050 }, { "epoch": 1.7605985037406484, "grad_norm": 5.594391822814941, "learning_rate": 1.824064837905237e-05, "loss": 0.4105, "step": 7060 }, { "epoch": 1.7630922693266833, "grad_norm": 4.667792320251465, "learning_rate": 1.8238154613466338e-05, "loss": 0.445, "step": 7070 }, { "epoch": 1.7655860349127181, "grad_norm": 30.49921417236328, "learning_rate": 1.82356608478803e-05, "loss": 0.5001, "step": 7080 }, { "epoch": 1.7680798004987532, "grad_norm": 4.282893657684326, "learning_rate": 1.8233167082294265e-05, "loss": 0.428, "step": 7090 }, { "epoch": 1.770573566084788, "grad_norm": 6.280821323394775, "learning_rate": 1.8230673316708232e-05, "loss": 0.4246, "step": 7100 }, { "epoch": 1.773067331670823, "grad_norm": 5.7564005851745605, "learning_rate": 1.8228179551122196e-05, "loss": 0.5076, "step": 7110 }, { "epoch": 1.7755610972568578, "grad_norm": 5.439352035522461, "learning_rate": 1.822568578553616e-05, "loss": 0.448, "step": 7120 }, { "epoch": 1.7780548628428927, "grad_norm": 4.840354919433594, "learning_rate": 1.8223192019950126e-05, "loss": 0.4518, "step": 7130 }, { "epoch": 1.7805486284289276, "grad_norm": 4.0967631340026855, "learning_rate": 1.822069825436409e-05, "loss": 0.441, "step": 7140 }, { "epoch": 1.7830423940149625, "grad_norm": 5.563769340515137, "learning_rate": 1.8218204488778057e-05, "loss": 0.5182, "step": 7150 }, { "epoch": 1.7855361596009975, "grad_norm": 6.383294582366943, "learning_rate": 1.821571072319202e-05, "loss": 0.4211, "step": 7160 }, { "epoch": 1.7880299251870324, "grad_norm": 4.494612216949463, "learning_rate": 1.8213216957605984e-05, "loss": 0.426, "step": 7170 }, { "epoch": 1.7905236907730673, "grad_norm": 4.303411483764648, "learning_rate": 1.821072319201995e-05, "loss": 0.3868, "step": 7180 }, { "epoch": 1.7930174563591024, "grad_norm": 7.266903400421143, "learning_rate": 1.8208229426433915e-05, "loss": 0.4369, "step": 7190 }, { "epoch": 1.7955112219451372, "grad_norm": 4.56803560256958, "learning_rate": 1.8205735660847882e-05, "loss": 0.4737, "step": 7200 }, { "epoch": 1.798004987531172, "grad_norm": 5.030318260192871, "learning_rate": 1.820324189526185e-05, "loss": 0.5418, "step": 7210 }, { "epoch": 1.800498753117207, "grad_norm": 5.952413558959961, "learning_rate": 1.8200748129675813e-05, "loss": 0.3784, "step": 7220 }, { "epoch": 1.8029925187032418, "grad_norm": 13.113987922668457, "learning_rate": 1.819825436408978e-05, "loss": 0.4306, "step": 7230 }, { "epoch": 1.8054862842892767, "grad_norm": 4.897136688232422, "learning_rate": 1.8195760598503743e-05, "loss": 0.4035, "step": 7240 }, { "epoch": 1.8079800498753116, "grad_norm": 4.879326343536377, "learning_rate": 1.8193266832917707e-05, "loss": 0.4381, "step": 7250 }, { "epoch": 1.8104738154613467, "grad_norm": 4.985284328460693, "learning_rate": 1.8190773067331674e-05, "loss": 0.4622, "step": 7260 }, { "epoch": 1.8129675810473815, "grad_norm": 6.7815775871276855, "learning_rate": 1.8188279301745637e-05, "loss": 0.5456, "step": 7270 }, { "epoch": 1.8154613466334164, "grad_norm": 7.435234069824219, "learning_rate": 1.81857855361596e-05, "loss": 0.4235, "step": 7280 }, { "epoch": 1.8179551122194515, "grad_norm": 4.0493483543396, "learning_rate": 1.8183291770573568e-05, "loss": 0.4922, "step": 7290 }, { "epoch": 1.8204488778054864, "grad_norm": 7.476971626281738, "learning_rate": 1.818079800498753e-05, "loss": 0.3972, "step": 7300 }, { "epoch": 1.8229426433915212, "grad_norm": 5.938323497772217, "learning_rate": 1.81783042394015e-05, "loss": 0.4129, "step": 7310 }, { "epoch": 1.825436408977556, "grad_norm": 6.04447078704834, "learning_rate": 1.8175810473815462e-05, "loss": 0.4718, "step": 7320 }, { "epoch": 1.827930174563591, "grad_norm": 11.774287223815918, "learning_rate": 1.8173316708229426e-05, "loss": 0.4558, "step": 7330 }, { "epoch": 1.8304239401496258, "grad_norm": 3.780583381652832, "learning_rate": 1.8170822942643393e-05, "loss": 0.4411, "step": 7340 }, { "epoch": 1.8329177057356607, "grad_norm": 3.6571693420410156, "learning_rate": 1.8168329177057356e-05, "loss": 0.4096, "step": 7350 }, { "epoch": 1.8354114713216958, "grad_norm": 7.564289093017578, "learning_rate": 1.8165835411471323e-05, "loss": 0.3931, "step": 7360 }, { "epoch": 1.8379052369077307, "grad_norm": 7.691397666931152, "learning_rate": 1.8163341645885287e-05, "loss": 0.4236, "step": 7370 }, { "epoch": 1.8403990024937655, "grad_norm": 5.345376968383789, "learning_rate": 1.8160847880299254e-05, "loss": 0.4377, "step": 7380 }, { "epoch": 1.8428927680798006, "grad_norm": 11.432061195373535, "learning_rate": 1.8158354114713218e-05, "loss": 0.4218, "step": 7390 }, { "epoch": 1.8453865336658355, "grad_norm": 4.74954080581665, "learning_rate": 1.8155860349127185e-05, "loss": 0.4077, "step": 7400 }, { "epoch": 1.8478802992518704, "grad_norm": 4.802658557891846, "learning_rate": 1.815336658354115e-05, "loss": 0.4862, "step": 7410 }, { "epoch": 1.8503740648379052, "grad_norm": 5.43306303024292, "learning_rate": 1.8150872817955115e-05, "loss": 0.4187, "step": 7420 }, { "epoch": 1.85286783042394, "grad_norm": 3.525193929672241, "learning_rate": 1.814837905236908e-05, "loss": 0.3819, "step": 7430 }, { "epoch": 1.855361596009975, "grad_norm": 6.635593891143799, "learning_rate": 1.8145885286783046e-05, "loss": 0.4183, "step": 7440 }, { "epoch": 1.8578553615960098, "grad_norm": 6.29930305480957, "learning_rate": 1.814339152119701e-05, "loss": 0.5308, "step": 7450 }, { "epoch": 1.860349127182045, "grad_norm": 5.674611568450928, "learning_rate": 1.8140897755610973e-05, "loss": 0.4003, "step": 7460 }, { "epoch": 1.8628428927680798, "grad_norm": 9.636392593383789, "learning_rate": 1.813840399002494e-05, "loss": 0.4318, "step": 7470 }, { "epoch": 1.8653366583541147, "grad_norm": 4.685451030731201, "learning_rate": 1.8135910224438904e-05, "loss": 0.4437, "step": 7480 }, { "epoch": 1.8678304239401498, "grad_norm": 8.09288501739502, "learning_rate": 1.8133416458852867e-05, "loss": 0.5241, "step": 7490 }, { "epoch": 1.8703241895261846, "grad_norm": 4.083009243011475, "learning_rate": 1.8130922693266834e-05, "loss": 0.3948, "step": 7500 }, { "epoch": 1.8728179551122195, "grad_norm": 6.581864833831787, "learning_rate": 1.8128428927680798e-05, "loss": 0.5274, "step": 7510 }, { "epoch": 1.8753117206982544, "grad_norm": 5.297054767608643, "learning_rate": 1.8125935162094765e-05, "loss": 0.4802, "step": 7520 }, { "epoch": 1.8778054862842892, "grad_norm": 5.629668235778809, "learning_rate": 1.812344139650873e-05, "loss": 0.4183, "step": 7530 }, { "epoch": 1.880299251870324, "grad_norm": 5.578089237213135, "learning_rate": 1.8120947630922692e-05, "loss": 0.4061, "step": 7540 }, { "epoch": 1.882793017456359, "grad_norm": 6.019644260406494, "learning_rate": 1.811845386533666e-05, "loss": 0.4339, "step": 7550 }, { "epoch": 1.885286783042394, "grad_norm": 6.0486016273498535, "learning_rate": 1.8115960099750626e-05, "loss": 0.3962, "step": 7560 }, { "epoch": 1.887780548628429, "grad_norm": 12.667963027954102, "learning_rate": 1.811346633416459e-05, "loss": 0.4148, "step": 7570 }, { "epoch": 1.8902743142144638, "grad_norm": 5.244442462921143, "learning_rate": 1.8110972568578557e-05, "loss": 0.4431, "step": 7580 }, { "epoch": 1.8927680798004989, "grad_norm": 5.28915548324585, "learning_rate": 1.810847880299252e-05, "loss": 0.4288, "step": 7590 }, { "epoch": 1.8952618453865338, "grad_norm": 5.8130717277526855, "learning_rate": 1.8105985037406487e-05, "loss": 0.4385, "step": 7600 }, { "epoch": 1.8977556109725686, "grad_norm": 9.284071922302246, "learning_rate": 1.810349127182045e-05, "loss": 0.4476, "step": 7610 }, { "epoch": 1.9002493765586035, "grad_norm": 4.337800979614258, "learning_rate": 1.8100997506234415e-05, "loss": 0.4499, "step": 7620 }, { "epoch": 1.9027431421446384, "grad_norm": 5.436014652252197, "learning_rate": 1.8098503740648382e-05, "loss": 0.4046, "step": 7630 }, { "epoch": 1.9052369077306732, "grad_norm": 5.804083347320557, "learning_rate": 1.8096009975062345e-05, "loss": 0.4042, "step": 7640 }, { "epoch": 1.907730673316708, "grad_norm": 5.294909477233887, "learning_rate": 1.8093516209476312e-05, "loss": 0.4618, "step": 7650 }, { "epoch": 1.9102244389027432, "grad_norm": 5.705154895782471, "learning_rate": 1.8091022443890276e-05, "loss": 0.3835, "step": 7660 }, { "epoch": 1.912718204488778, "grad_norm": 7.614785194396973, "learning_rate": 1.808852867830424e-05, "loss": 0.4388, "step": 7670 }, { "epoch": 1.915211970074813, "grad_norm": 9.722792625427246, "learning_rate": 1.8086034912718207e-05, "loss": 0.4568, "step": 7680 }, { "epoch": 1.917705735660848, "grad_norm": 5.578179836273193, "learning_rate": 1.808354114713217e-05, "loss": 0.3962, "step": 7690 }, { "epoch": 1.9201995012468829, "grad_norm": 5.2010178565979, "learning_rate": 1.8081047381546134e-05, "loss": 0.4123, "step": 7700 }, { "epoch": 1.9226932668329177, "grad_norm": 4.397924423217773, "learning_rate": 1.80785536159601e-05, "loss": 0.4321, "step": 7710 }, { "epoch": 1.9251870324189526, "grad_norm": 4.920018196105957, "learning_rate": 1.8076059850374064e-05, "loss": 0.4054, "step": 7720 }, { "epoch": 1.9276807980049875, "grad_norm": 3.409426212310791, "learning_rate": 1.807356608478803e-05, "loss": 0.4464, "step": 7730 }, { "epoch": 1.9301745635910224, "grad_norm": 7.432455062866211, "learning_rate": 1.8071072319201995e-05, "loss": 0.4521, "step": 7740 }, { "epoch": 1.9326683291770572, "grad_norm": 4.5080485343933105, "learning_rate": 1.8068578553615962e-05, "loss": 0.4328, "step": 7750 }, { "epoch": 1.9351620947630923, "grad_norm": 5.043903827667236, "learning_rate": 1.8066084788029926e-05, "loss": 0.4508, "step": 7760 }, { "epoch": 1.9376558603491272, "grad_norm": 6.6283369064331055, "learning_rate": 1.8063591022443893e-05, "loss": 0.464, "step": 7770 }, { "epoch": 1.940149625935162, "grad_norm": 4.977077007293701, "learning_rate": 1.8061097256857856e-05, "loss": 0.5318, "step": 7780 }, { "epoch": 1.9426433915211971, "grad_norm": 4.694863796234131, "learning_rate": 1.8058603491271823e-05, "loss": 0.3441, "step": 7790 }, { "epoch": 1.945137157107232, "grad_norm": 5.31052303314209, "learning_rate": 1.8056109725685787e-05, "loss": 0.4264, "step": 7800 }, { "epoch": 1.9476309226932669, "grad_norm": 5.178094863891602, "learning_rate": 1.8053615960099754e-05, "loss": 0.3537, "step": 7810 }, { "epoch": 1.9501246882793017, "grad_norm": 11.527572631835938, "learning_rate": 1.8051122194513718e-05, "loss": 0.5116, "step": 7820 }, { "epoch": 1.9526184538653366, "grad_norm": 5.841403961181641, "learning_rate": 1.804862842892768e-05, "loss": 0.4658, "step": 7830 }, { "epoch": 1.9551122194513715, "grad_norm": 8.75449275970459, "learning_rate": 1.8046134663341648e-05, "loss": 0.449, "step": 7840 }, { "epoch": 1.9576059850374063, "grad_norm": 7.627124309539795, "learning_rate": 1.8043640897755612e-05, "loss": 0.4512, "step": 7850 }, { "epoch": 1.9600997506234414, "grad_norm": 7.299831867218018, "learning_rate": 1.804114713216958e-05, "loss": 0.4311, "step": 7860 }, { "epoch": 1.9625935162094763, "grad_norm": 5.300114154815674, "learning_rate": 1.8038653366583542e-05, "loss": 0.3713, "step": 7870 }, { "epoch": 1.9650872817955112, "grad_norm": 5.638365745544434, "learning_rate": 1.8036159600997506e-05, "loss": 0.5088, "step": 7880 }, { "epoch": 1.9675810473815463, "grad_norm": 6.054098606109619, "learning_rate": 1.8033665835411473e-05, "loss": 0.4084, "step": 7890 }, { "epoch": 1.9700748129675811, "grad_norm": 7.920881271362305, "learning_rate": 1.8031172069825437e-05, "loss": 0.4036, "step": 7900 }, { "epoch": 1.972568578553616, "grad_norm": 5.523858070373535, "learning_rate": 1.8028678304239404e-05, "loss": 0.4152, "step": 7910 }, { "epoch": 1.9750623441396509, "grad_norm": 7.041183948516846, "learning_rate": 1.8026184538653367e-05, "loss": 0.314, "step": 7920 }, { "epoch": 1.9775561097256857, "grad_norm": 5.018404960632324, "learning_rate": 1.8023690773067334e-05, "loss": 0.4854, "step": 7930 }, { "epoch": 1.9800498753117206, "grad_norm": 6.31093168258667, "learning_rate": 1.8021446384039903e-05, "loss": 0.4654, "step": 7940 }, { "epoch": 1.9825436408977555, "grad_norm": 6.725535869598389, "learning_rate": 1.8018952618453866e-05, "loss": 0.3975, "step": 7950 }, { "epoch": 1.9850374064837906, "grad_norm": 7.007317066192627, "learning_rate": 1.801645885286783e-05, "loss": 0.4361, "step": 7960 }, { "epoch": 1.9875311720698254, "grad_norm": 4.911192893981934, "learning_rate": 1.8013965087281797e-05, "loss": 0.4742, "step": 7970 }, { "epoch": 1.9900249376558603, "grad_norm": 6.7747955322265625, "learning_rate": 1.801147132169576e-05, "loss": 0.4091, "step": 7980 }, { "epoch": 1.9925187032418954, "grad_norm": 5.228945732116699, "learning_rate": 1.8008977556109727e-05, "loss": 0.4404, "step": 7990 }, { "epoch": 1.9950124688279303, "grad_norm": 6.581811428070068, "learning_rate": 1.800648379052369e-05, "loss": 0.4312, "step": 8000 }, { "epoch": 1.9975062344139651, "grad_norm": 5.50934362411499, "learning_rate": 1.8003990024937658e-05, "loss": 0.3873, "step": 8010 }, { "epoch": 2.0, "grad_norm": 6.261854648590088, "learning_rate": 1.800149625935162e-05, "loss": 0.4136, "step": 8020 }, { "epoch": 2.0, "eval_loss": 0.4465249180793762, "eval_runtime": 59.866, "eval_samples_per_second": 16.754, "eval_steps_per_second": 16.754, "step": 8020 }, { "epoch": 2.002493765586035, "grad_norm": 3.6723527908325195, "learning_rate": 1.799900249376559e-05, "loss": 0.3571, "step": 8030 }, { "epoch": 2.0049875311720697, "grad_norm": 5.929795265197754, "learning_rate": 1.7996508728179552e-05, "loss": 0.5315, "step": 8040 }, { "epoch": 2.0074812967581046, "grad_norm": 6.144455909729004, "learning_rate": 1.799401496259352e-05, "loss": 0.4404, "step": 8050 }, { "epoch": 2.0099750623441395, "grad_norm": 6.373076438903809, "learning_rate": 1.7991521197007483e-05, "loss": 0.374, "step": 8060 }, { "epoch": 2.0124688279301743, "grad_norm": 3.5335922241210938, "learning_rate": 1.798902743142145e-05, "loss": 0.3972, "step": 8070 }, { "epoch": 2.0149625935162097, "grad_norm": 6.1185479164123535, "learning_rate": 1.7986533665835413e-05, "loss": 0.4131, "step": 8080 }, { "epoch": 2.0174563591022445, "grad_norm": 5.0511345863342285, "learning_rate": 1.7984039900249377e-05, "loss": 0.4324, "step": 8090 }, { "epoch": 2.0199501246882794, "grad_norm": 7.270425796508789, "learning_rate": 1.7981546134663344e-05, "loss": 0.3787, "step": 8100 }, { "epoch": 2.0224438902743143, "grad_norm": 4.606263637542725, "learning_rate": 1.7979052369077308e-05, "loss": 0.3524, "step": 8110 }, { "epoch": 2.024937655860349, "grad_norm": 5.3458452224731445, "learning_rate": 1.7976558603491275e-05, "loss": 0.4371, "step": 8120 }, { "epoch": 2.027431421446384, "grad_norm": 4.736631870269775, "learning_rate": 1.797406483790524e-05, "loss": 0.3898, "step": 8130 }, { "epoch": 2.029925187032419, "grad_norm": 5.234148025512695, "learning_rate": 1.7971571072319202e-05, "loss": 0.4307, "step": 8140 }, { "epoch": 2.0324189526184537, "grad_norm": 6.201371669769287, "learning_rate": 1.796907730673317e-05, "loss": 0.4022, "step": 8150 }, { "epoch": 2.0349127182044886, "grad_norm": 5.895164489746094, "learning_rate": 1.7966583541147133e-05, "loss": 0.4254, "step": 8160 }, { "epoch": 2.037406483790524, "grad_norm": 4.677857398986816, "learning_rate": 1.7964089775561096e-05, "loss": 0.5048, "step": 8170 }, { "epoch": 2.039900249376559, "grad_norm": 4.634955406188965, "learning_rate": 1.7961596009975063e-05, "loss": 0.4376, "step": 8180 }, { "epoch": 2.0423940149625937, "grad_norm": 5.4965667724609375, "learning_rate": 1.7959102244389027e-05, "loss": 0.3972, "step": 8190 }, { "epoch": 2.0448877805486285, "grad_norm": 5.892706394195557, "learning_rate": 1.7956608478802994e-05, "loss": 0.3795, "step": 8200 }, { "epoch": 2.0473815461346634, "grad_norm": 6.747429370880127, "learning_rate": 1.7954114713216957e-05, "loss": 0.394, "step": 8210 }, { "epoch": 2.0498753117206983, "grad_norm": 6.291106224060059, "learning_rate": 1.7951620947630924e-05, "loss": 0.4976, "step": 8220 }, { "epoch": 2.052369077306733, "grad_norm": 5.840002536773682, "learning_rate": 1.794912718204489e-05, "loss": 0.4118, "step": 8230 }, { "epoch": 2.054862842892768, "grad_norm": 9.487309455871582, "learning_rate": 1.7946633416458855e-05, "loss": 0.3606, "step": 8240 }, { "epoch": 2.057356608478803, "grad_norm": 5.123715877532959, "learning_rate": 1.7944139650872822e-05, "loss": 0.4149, "step": 8250 }, { "epoch": 2.0598503740648377, "grad_norm": 5.808225154876709, "learning_rate": 1.7941645885286786e-05, "loss": 0.4553, "step": 8260 }, { "epoch": 2.0623441396508726, "grad_norm": 9.865265846252441, "learning_rate": 1.793915211970075e-05, "loss": 0.4352, "step": 8270 }, { "epoch": 2.064837905236908, "grad_norm": 4.490272045135498, "learning_rate": 1.7936658354114716e-05, "loss": 0.4435, "step": 8280 }, { "epoch": 2.067331670822943, "grad_norm": 4.552779197692871, "learning_rate": 1.793416458852868e-05, "loss": 0.3933, "step": 8290 }, { "epoch": 2.0698254364089776, "grad_norm": 5.0049004554748535, "learning_rate": 1.7931670822942644e-05, "loss": 0.4585, "step": 8300 }, { "epoch": 2.0723192019950125, "grad_norm": 5.8257904052734375, "learning_rate": 1.792917705735661e-05, "loss": 0.4453, "step": 8310 }, { "epoch": 2.0748129675810474, "grad_norm": 5.575977802276611, "learning_rate": 1.7926683291770574e-05, "loss": 0.4536, "step": 8320 }, { "epoch": 2.0773067331670823, "grad_norm": 4.55002498626709, "learning_rate": 1.792418952618454e-05, "loss": 0.4842, "step": 8330 }, { "epoch": 2.079800498753117, "grad_norm": 6.223612308502197, "learning_rate": 1.7921695760598505e-05, "loss": 0.4209, "step": 8340 }, { "epoch": 2.082294264339152, "grad_norm": 5.589142322540283, "learning_rate": 1.791920199501247e-05, "loss": 0.3957, "step": 8350 }, { "epoch": 2.084788029925187, "grad_norm": 5.206849575042725, "learning_rate": 1.7916708229426435e-05, "loss": 0.4253, "step": 8360 }, { "epoch": 2.087281795511222, "grad_norm": 7.368830680847168, "learning_rate": 1.79142144638404e-05, "loss": 0.391, "step": 8370 }, { "epoch": 2.089775561097257, "grad_norm": 4.8193817138671875, "learning_rate": 1.7911720698254366e-05, "loss": 0.397, "step": 8380 }, { "epoch": 2.092269326683292, "grad_norm": 5.361727237701416, "learning_rate": 1.790922693266833e-05, "loss": 0.4468, "step": 8390 }, { "epoch": 2.0947630922693268, "grad_norm": 5.065185546875, "learning_rate": 1.7906733167082297e-05, "loss": 0.4142, "step": 8400 }, { "epoch": 2.0972568578553616, "grad_norm": 4.190534591674805, "learning_rate": 1.790423940149626e-05, "loss": 0.3448, "step": 8410 }, { "epoch": 2.0997506234413965, "grad_norm": 7.922725677490234, "learning_rate": 1.7901745635910227e-05, "loss": 0.3987, "step": 8420 }, { "epoch": 2.1022443890274314, "grad_norm": 4.495791912078857, "learning_rate": 1.789925187032419e-05, "loss": 0.4028, "step": 8430 }, { "epoch": 2.1047381546134662, "grad_norm": 5.718870639801025, "learning_rate": 1.7896758104738158e-05, "loss": 0.3954, "step": 8440 }, { "epoch": 2.107231920199501, "grad_norm": 5.126749038696289, "learning_rate": 1.789426433915212e-05, "loss": 0.387, "step": 8450 }, { "epoch": 2.109725685785536, "grad_norm": 6.903149604797363, "learning_rate": 1.7891770573566085e-05, "loss": 0.3494, "step": 8460 }, { "epoch": 2.112219451371571, "grad_norm": 6.055934429168701, "learning_rate": 1.7889276807980052e-05, "loss": 0.4843, "step": 8470 }, { "epoch": 2.114713216957606, "grad_norm": 5.180309295654297, "learning_rate": 1.7886783042394016e-05, "loss": 0.3912, "step": 8480 }, { "epoch": 2.117206982543641, "grad_norm": 5.292367458343506, "learning_rate": 1.7884289276807983e-05, "loss": 0.4334, "step": 8490 }, { "epoch": 2.119700748129676, "grad_norm": 6.745512008666992, "learning_rate": 1.7881795511221946e-05, "loss": 0.3954, "step": 8500 }, { "epoch": 2.1221945137157108, "grad_norm": 6.295596122741699, "learning_rate": 1.787930174563591e-05, "loss": 0.3938, "step": 8510 }, { "epoch": 2.1246882793017456, "grad_norm": 3.398513078689575, "learning_rate": 1.7876807980049877e-05, "loss": 0.3826, "step": 8520 }, { "epoch": 2.1271820448877805, "grad_norm": 7.377808570861816, "learning_rate": 1.787431421446384e-05, "loss": 0.4062, "step": 8530 }, { "epoch": 2.1296758104738154, "grad_norm": 5.2859272956848145, "learning_rate": 1.7871820448877808e-05, "loss": 0.3489, "step": 8540 }, { "epoch": 2.1321695760598502, "grad_norm": 5.597499370574951, "learning_rate": 1.786932668329177e-05, "loss": 0.3924, "step": 8550 }, { "epoch": 2.134663341645885, "grad_norm": 4.033814430236816, "learning_rate": 1.7866832917705735e-05, "loss": 0.401, "step": 8560 }, { "epoch": 2.1371571072319204, "grad_norm": 10.128519058227539, "learning_rate": 1.7864339152119702e-05, "loss": 0.4635, "step": 8570 }, { "epoch": 2.1396508728179553, "grad_norm": 3.9514145851135254, "learning_rate": 1.786184538653367e-05, "loss": 0.4217, "step": 8580 }, { "epoch": 2.14214463840399, "grad_norm": 4.673630714416504, "learning_rate": 1.7859351620947632e-05, "loss": 0.4318, "step": 8590 }, { "epoch": 2.144638403990025, "grad_norm": 5.571914196014404, "learning_rate": 1.78568578553616e-05, "loss": 0.414, "step": 8600 }, { "epoch": 2.14713216957606, "grad_norm": 4.7045063972473145, "learning_rate": 1.7854364089775563e-05, "loss": 0.4471, "step": 8610 }, { "epoch": 2.1496259351620948, "grad_norm": 4.17690372467041, "learning_rate": 1.785187032418953e-05, "loss": 0.4632, "step": 8620 }, { "epoch": 2.1521197007481296, "grad_norm": 5.189255237579346, "learning_rate": 1.7849376558603494e-05, "loss": 0.5894, "step": 8630 }, { "epoch": 2.1546134663341645, "grad_norm": 5.136035442352295, "learning_rate": 1.7846882793017457e-05, "loss": 0.4954, "step": 8640 }, { "epoch": 2.1571072319201994, "grad_norm": 6.0098443031311035, "learning_rate": 1.7844389027431424e-05, "loss": 0.3998, "step": 8650 }, { "epoch": 2.1596009975062342, "grad_norm": 6.5170979499816895, "learning_rate": 1.7841895261845388e-05, "loss": 0.4394, "step": 8660 }, { "epoch": 2.162094763092269, "grad_norm": 7.620209217071533, "learning_rate": 1.783940149625935e-05, "loss": 0.4352, "step": 8670 }, { "epoch": 2.1645885286783044, "grad_norm": 5.3294548988342285, "learning_rate": 1.783690773067332e-05, "loss": 0.4285, "step": 8680 }, { "epoch": 2.1670822942643393, "grad_norm": 4.24174165725708, "learning_rate": 1.7834413965087282e-05, "loss": 0.4855, "step": 8690 }, { "epoch": 2.169576059850374, "grad_norm": 4.900123119354248, "learning_rate": 1.783192019950125e-05, "loss": 0.5264, "step": 8700 }, { "epoch": 2.172069825436409, "grad_norm": 5.917686939239502, "learning_rate": 1.7829426433915213e-05, "loss": 0.4571, "step": 8710 }, { "epoch": 2.174563591022444, "grad_norm": 5.674629211425781, "learning_rate": 1.7826932668329176e-05, "loss": 0.44, "step": 8720 }, { "epoch": 2.1770573566084788, "grad_norm": 6.6443986892700195, "learning_rate": 1.7824438902743143e-05, "loss": 0.3972, "step": 8730 }, { "epoch": 2.1795511221945136, "grad_norm": 4.557736873626709, "learning_rate": 1.7821945137157107e-05, "loss": 0.4578, "step": 8740 }, { "epoch": 2.1820448877805485, "grad_norm": 5.653411865234375, "learning_rate": 1.7819451371571074e-05, "loss": 0.4691, "step": 8750 }, { "epoch": 2.1845386533665834, "grad_norm": 5.747959613800049, "learning_rate": 1.7816957605985038e-05, "loss": 0.4597, "step": 8760 }, { "epoch": 2.1870324189526187, "grad_norm": 4.90444803237915, "learning_rate": 1.7814463840399005e-05, "loss": 0.4057, "step": 8770 }, { "epoch": 2.1895261845386536, "grad_norm": 5.881532192230225, "learning_rate": 1.7811970074812968e-05, "loss": 0.4028, "step": 8780 }, { "epoch": 2.1920199501246884, "grad_norm": 5.233574390411377, "learning_rate": 1.7809476309226935e-05, "loss": 0.3608, "step": 8790 }, { "epoch": 2.1945137157107233, "grad_norm": 6.65619421005249, "learning_rate": 1.78069825436409e-05, "loss": 0.3862, "step": 8800 }, { "epoch": 2.197007481296758, "grad_norm": 4.836643695831299, "learning_rate": 1.7804488778054866e-05, "loss": 0.4005, "step": 8810 }, { "epoch": 2.199501246882793, "grad_norm": 5.234933376312256, "learning_rate": 1.780199501246883e-05, "loss": 0.4855, "step": 8820 }, { "epoch": 2.201995012468828, "grad_norm": 4.512629985809326, "learning_rate": 1.7799501246882796e-05, "loss": 0.4217, "step": 8830 }, { "epoch": 2.2044887780548628, "grad_norm": 7.3246541023254395, "learning_rate": 1.779700748129676e-05, "loss": 0.4968, "step": 8840 }, { "epoch": 2.2069825436408976, "grad_norm": 5.89198112487793, "learning_rate": 1.7794513715710724e-05, "loss": 0.3842, "step": 8850 }, { "epoch": 2.2094763092269325, "grad_norm": 5.893228530883789, "learning_rate": 1.779201995012469e-05, "loss": 0.3892, "step": 8860 }, { "epoch": 2.2119700748129674, "grad_norm": 11.834216117858887, "learning_rate": 1.7789526184538654e-05, "loss": 0.4357, "step": 8870 }, { "epoch": 2.2144638403990027, "grad_norm": 6.141737461090088, "learning_rate": 1.7787032418952618e-05, "loss": 0.3841, "step": 8880 }, { "epoch": 2.2169576059850375, "grad_norm": 10.668390274047852, "learning_rate": 1.7784538653366585e-05, "loss": 0.4482, "step": 8890 }, { "epoch": 2.2194513715710724, "grad_norm": 5.236949443817139, "learning_rate": 1.778204488778055e-05, "loss": 0.4517, "step": 8900 }, { "epoch": 2.2219451371571073, "grad_norm": 6.584339618682861, "learning_rate": 1.7779551122194516e-05, "loss": 0.4163, "step": 8910 }, { "epoch": 2.224438902743142, "grad_norm": 6.040124893188477, "learning_rate": 1.777705735660848e-05, "loss": 0.4095, "step": 8920 }, { "epoch": 2.226932668329177, "grad_norm": 7.1335062980651855, "learning_rate": 1.7774563591022446e-05, "loss": 0.5042, "step": 8930 }, { "epoch": 2.229426433915212, "grad_norm": 8.298208236694336, "learning_rate": 1.777206982543641e-05, "loss": 0.5001, "step": 8940 }, { "epoch": 2.2319201995012468, "grad_norm": 4.262420177459717, "learning_rate": 1.7769576059850377e-05, "loss": 0.4444, "step": 8950 }, { "epoch": 2.2344139650872816, "grad_norm": 3.8085227012634277, "learning_rate": 1.776708229426434e-05, "loss": 0.4736, "step": 8960 }, { "epoch": 2.236907730673317, "grad_norm": 8.276735305786133, "learning_rate": 1.7764588528678307e-05, "loss": 0.4522, "step": 8970 }, { "epoch": 2.239401496259352, "grad_norm": 6.127620220184326, "learning_rate": 1.776209476309227e-05, "loss": 0.3601, "step": 8980 }, { "epoch": 2.2418952618453867, "grad_norm": 5.782129287719727, "learning_rate": 1.7759600997506238e-05, "loss": 0.4117, "step": 8990 }, { "epoch": 2.2443890274314215, "grad_norm": 6.870619773864746, "learning_rate": 1.77571072319202e-05, "loss": 0.4816, "step": 9000 }, { "epoch": 2.2468827930174564, "grad_norm": 3.876192331314087, "learning_rate": 1.7754613466334165e-05, "loss": 0.41, "step": 9010 }, { "epoch": 2.2493765586034913, "grad_norm": 7.108855724334717, "learning_rate": 1.7752119700748132e-05, "loss": 0.4026, "step": 9020 }, { "epoch": 2.251870324189526, "grad_norm": 5.940926551818848, "learning_rate": 1.7749625935162096e-05, "loss": 0.4307, "step": 9030 }, { "epoch": 2.254364089775561, "grad_norm": 7.2069926261901855, "learning_rate": 1.7747132169576063e-05, "loss": 0.4925, "step": 9040 }, { "epoch": 2.256857855361596, "grad_norm": 5.103030681610107, "learning_rate": 1.7744638403990026e-05, "loss": 0.4119, "step": 9050 }, { "epoch": 2.2593516209476308, "grad_norm": 6.820674419403076, "learning_rate": 1.774214463840399e-05, "loss": 0.4423, "step": 9060 }, { "epoch": 2.2618453865336656, "grad_norm": 5.859368801116943, "learning_rate": 1.7739650872817957e-05, "loss": 0.416, "step": 9070 }, { "epoch": 2.264339152119701, "grad_norm": 3.5914974212646484, "learning_rate": 1.773715710723192e-05, "loss": 0.3593, "step": 9080 }, { "epoch": 2.266832917705736, "grad_norm": 5.854361057281494, "learning_rate": 1.7734663341645884e-05, "loss": 0.3665, "step": 9090 }, { "epoch": 2.2693266832917707, "grad_norm": 6.185330390930176, "learning_rate": 1.773216957605985e-05, "loss": 0.3968, "step": 9100 }, { "epoch": 2.2718204488778055, "grad_norm": 9.060643196105957, "learning_rate": 1.7729675810473815e-05, "loss": 0.444, "step": 9110 }, { "epoch": 2.2743142144638404, "grad_norm": 6.3430633544921875, "learning_rate": 1.7727182044887782e-05, "loss": 0.4807, "step": 9120 }, { "epoch": 2.2768079800498753, "grad_norm": 6.883560657501221, "learning_rate": 1.7724688279301746e-05, "loss": 0.4042, "step": 9130 }, { "epoch": 2.27930174563591, "grad_norm": 5.706015110015869, "learning_rate": 1.7722194513715713e-05, "loss": 0.4687, "step": 9140 }, { "epoch": 2.281795511221945, "grad_norm": 6.107770919799805, "learning_rate": 1.7719700748129676e-05, "loss": 0.6099, "step": 9150 }, { "epoch": 2.28428927680798, "grad_norm": 4.1198930740356445, "learning_rate": 1.7717206982543643e-05, "loss": 0.3808, "step": 9160 }, { "epoch": 2.286783042394015, "grad_norm": 5.448758602142334, "learning_rate": 1.7714713216957607e-05, "loss": 0.3805, "step": 9170 }, { "epoch": 2.28927680798005, "grad_norm": 4.109248638153076, "learning_rate": 1.7712219451371574e-05, "loss": 0.3963, "step": 9180 }, { "epoch": 2.291770573566085, "grad_norm": 4.992825508117676, "learning_rate": 1.7709725685785537e-05, "loss": 0.3987, "step": 9190 }, { "epoch": 2.29426433915212, "grad_norm": 4.800196647644043, "learning_rate": 1.7707231920199504e-05, "loss": 0.3984, "step": 9200 }, { "epoch": 2.2967581047381547, "grad_norm": 7.4912896156311035, "learning_rate": 1.7704738154613468e-05, "loss": 0.462, "step": 9210 }, { "epoch": 2.2992518703241895, "grad_norm": 7.3595991134643555, "learning_rate": 1.770224438902743e-05, "loss": 0.4066, "step": 9220 }, { "epoch": 2.3017456359102244, "grad_norm": 8.947257995605469, "learning_rate": 1.76997506234414e-05, "loss": 0.4297, "step": 9230 }, { "epoch": 2.3042394014962593, "grad_norm": 5.556568622589111, "learning_rate": 1.7697256857855362e-05, "loss": 0.4001, "step": 9240 }, { "epoch": 2.306733167082294, "grad_norm": 5.276797294616699, "learning_rate": 1.769476309226933e-05, "loss": 0.4318, "step": 9250 }, { "epoch": 2.309226932668329, "grad_norm": 7.36549711227417, "learning_rate": 1.7692269326683293e-05, "loss": 0.4791, "step": 9260 }, { "epoch": 2.311720698254364, "grad_norm": 6.3212761878967285, "learning_rate": 1.7689775561097257e-05, "loss": 0.4505, "step": 9270 }, { "epoch": 2.314214463840399, "grad_norm": 8.907316207885742, "learning_rate": 1.7687281795511224e-05, "loss": 0.5421, "step": 9280 }, { "epoch": 2.316708229426434, "grad_norm": 5.004027843475342, "learning_rate": 1.7684788029925187e-05, "loss": 0.4232, "step": 9290 }, { "epoch": 2.319201995012469, "grad_norm": 5.411210060119629, "learning_rate": 1.7682294264339154e-05, "loss": 0.374, "step": 9300 }, { "epoch": 2.321695760598504, "grad_norm": 5.966613292694092, "learning_rate": 1.7679800498753118e-05, "loss": 0.4109, "step": 9310 }, { "epoch": 2.3241895261845387, "grad_norm": 5.003303050994873, "learning_rate": 1.7677306733167085e-05, "loss": 0.4241, "step": 9320 }, { "epoch": 2.3266832917705735, "grad_norm": 5.208148002624512, "learning_rate": 1.767481296758105e-05, "loss": 0.4793, "step": 9330 }, { "epoch": 2.3291770573566084, "grad_norm": 8.17491626739502, "learning_rate": 1.7672319201995015e-05, "loss": 0.4423, "step": 9340 }, { "epoch": 2.3316708229426433, "grad_norm": 8.450447082519531, "learning_rate": 1.766982543640898e-05, "loss": 0.4155, "step": 9350 }, { "epoch": 2.334164588528678, "grad_norm": 6.473998546600342, "learning_rate": 1.7667331670822946e-05, "loss": 0.425, "step": 9360 }, { "epoch": 2.3366583541147135, "grad_norm": 8.660719871520996, "learning_rate": 1.766483790523691e-05, "loss": 0.4596, "step": 9370 }, { "epoch": 2.3391521197007483, "grad_norm": 5.478455543518066, "learning_rate": 1.7662344139650873e-05, "loss": 0.3841, "step": 9380 }, { "epoch": 2.341645885286783, "grad_norm": 5.475340843200684, "learning_rate": 1.765985037406484e-05, "loss": 0.4179, "step": 9390 }, { "epoch": 2.344139650872818, "grad_norm": 4.660368919372559, "learning_rate": 1.7657356608478804e-05, "loss": 0.4493, "step": 9400 }, { "epoch": 2.346633416458853, "grad_norm": 3.002950668334961, "learning_rate": 1.765486284289277e-05, "loss": 0.3333, "step": 9410 }, { "epoch": 2.349127182044888, "grad_norm": 7.417391777038574, "learning_rate": 1.7652369077306734e-05, "loss": 0.3882, "step": 9420 }, { "epoch": 2.3516209476309227, "grad_norm": 5.637429714202881, "learning_rate": 1.7649875311720698e-05, "loss": 0.3217, "step": 9430 }, { "epoch": 2.3541147132169575, "grad_norm": 9.37643814086914, "learning_rate": 1.7647381546134665e-05, "loss": 0.423, "step": 9440 }, { "epoch": 2.3566084788029924, "grad_norm": 4.772799968719482, "learning_rate": 1.764488778054863e-05, "loss": 0.4789, "step": 9450 }, { "epoch": 2.3591022443890273, "grad_norm": 6.806458950042725, "learning_rate": 1.7642394014962592e-05, "loss": 0.4245, "step": 9460 }, { "epoch": 2.361596009975062, "grad_norm": 6.0944647789001465, "learning_rate": 1.763990024937656e-05, "loss": 0.4306, "step": 9470 }, { "epoch": 2.3640897755610975, "grad_norm": 4.526205062866211, "learning_rate": 1.7637406483790523e-05, "loss": 0.3647, "step": 9480 }, { "epoch": 2.3665835411471323, "grad_norm": 10.576577186584473, "learning_rate": 1.763491271820449e-05, "loss": 0.4645, "step": 9490 }, { "epoch": 2.369077306733167, "grad_norm": 8.364253044128418, "learning_rate": 1.7632418952618457e-05, "loss": 0.4524, "step": 9500 }, { "epoch": 2.371571072319202, "grad_norm": 6.188162326812744, "learning_rate": 1.762992518703242e-05, "loss": 0.446, "step": 9510 }, { "epoch": 2.374064837905237, "grad_norm": 5.653889179229736, "learning_rate": 1.7627431421446388e-05, "loss": 0.3859, "step": 9520 }, { "epoch": 2.376558603491272, "grad_norm": 7.959500789642334, "learning_rate": 1.762493765586035e-05, "loss": 0.4085, "step": 9530 }, { "epoch": 2.3790523690773067, "grad_norm": 7.066885471343994, "learning_rate": 1.7622443890274318e-05, "loss": 0.4195, "step": 9540 }, { "epoch": 2.3815461346633415, "grad_norm": 4.418666839599609, "learning_rate": 1.7619950124688282e-05, "loss": 0.3748, "step": 9550 }, { "epoch": 2.3840399002493764, "grad_norm": 7.192332744598389, "learning_rate": 1.7617456359102245e-05, "loss": 0.4545, "step": 9560 }, { "epoch": 2.3865336658354117, "grad_norm": 6.895399570465088, "learning_rate": 1.7614962593516212e-05, "loss": 0.4023, "step": 9570 }, { "epoch": 2.3890274314214466, "grad_norm": 5.375655174255371, "learning_rate": 1.7612468827930176e-05, "loss": 0.4108, "step": 9580 }, { "epoch": 2.3915211970074814, "grad_norm": 5.821678161621094, "learning_rate": 1.760997506234414e-05, "loss": 0.4258, "step": 9590 }, { "epoch": 2.3940149625935163, "grad_norm": 7.174010276794434, "learning_rate": 1.7607481296758107e-05, "loss": 0.4199, "step": 9600 }, { "epoch": 2.396508728179551, "grad_norm": 4.9364519119262695, "learning_rate": 1.760498753117207e-05, "loss": 0.4044, "step": 9610 }, { "epoch": 2.399002493765586, "grad_norm": 6.391279220581055, "learning_rate": 1.7602493765586037e-05, "loss": 0.4368, "step": 9620 }, { "epoch": 2.401496259351621, "grad_norm": 4.990833282470703, "learning_rate": 1.76e-05, "loss": 0.3174, "step": 9630 }, { "epoch": 2.403990024937656, "grad_norm": 4.146638870239258, "learning_rate": 1.7597506234413965e-05, "loss": 0.3995, "step": 9640 }, { "epoch": 2.4064837905236907, "grad_norm": 4.520384311676025, "learning_rate": 1.759501246882793e-05, "loss": 0.3627, "step": 9650 }, { "epoch": 2.4089775561097255, "grad_norm": 5.92678165435791, "learning_rate": 1.7592518703241895e-05, "loss": 0.3737, "step": 9660 }, { "epoch": 2.4114713216957604, "grad_norm": 8.433207511901855, "learning_rate": 1.7590024937655862e-05, "loss": 0.5038, "step": 9670 }, { "epoch": 2.4139650872817953, "grad_norm": 6.0681986808776855, "learning_rate": 1.7587531172069826e-05, "loss": 0.4544, "step": 9680 }, { "epoch": 2.4164588528678306, "grad_norm": 6.954349040985107, "learning_rate": 1.7585037406483793e-05, "loss": 0.4765, "step": 9690 }, { "epoch": 2.4189526184538654, "grad_norm": 5.360884666442871, "learning_rate": 1.7582543640897756e-05, "loss": 0.4173, "step": 9700 }, { "epoch": 2.4214463840399003, "grad_norm": 5.455905437469482, "learning_rate": 1.7580049875311723e-05, "loss": 0.3981, "step": 9710 }, { "epoch": 2.423940149625935, "grad_norm": 7.694284439086914, "learning_rate": 1.7577556109725687e-05, "loss": 0.4159, "step": 9720 }, { "epoch": 2.42643391521197, "grad_norm": 2.6241650581359863, "learning_rate": 1.7575062344139654e-05, "loss": 0.4415, "step": 9730 }, { "epoch": 2.428927680798005, "grad_norm": 5.173274040222168, "learning_rate": 1.7572568578553618e-05, "loss": 0.3576, "step": 9740 }, { "epoch": 2.43142144638404, "grad_norm": 5.52868127822876, "learning_rate": 1.7570074812967585e-05, "loss": 0.4224, "step": 9750 }, { "epoch": 2.4339152119700747, "grad_norm": 6.820600509643555, "learning_rate": 1.7567581047381548e-05, "loss": 0.4329, "step": 9760 }, { "epoch": 2.43640897755611, "grad_norm": 5.07393741607666, "learning_rate": 1.7565087281795512e-05, "loss": 0.4344, "step": 9770 }, { "epoch": 2.438902743142145, "grad_norm": 6.3466572761535645, "learning_rate": 1.756259351620948e-05, "loss": 0.3851, "step": 9780 }, { "epoch": 2.4413965087281797, "grad_norm": 4.636044025421143, "learning_rate": 1.7560099750623442e-05, "loss": 0.4259, "step": 9790 }, { "epoch": 2.4438902743142146, "grad_norm": 4.414524078369141, "learning_rate": 1.7557605985037406e-05, "loss": 0.4189, "step": 9800 }, { "epoch": 2.4463840399002494, "grad_norm": 5.910994529724121, "learning_rate": 1.7555112219451373e-05, "loss": 0.3701, "step": 9810 }, { "epoch": 2.4488778054862843, "grad_norm": 5.326263427734375, "learning_rate": 1.7552618453865337e-05, "loss": 0.3784, "step": 9820 }, { "epoch": 2.451371571072319, "grad_norm": 4.930222511291504, "learning_rate": 1.7550124688279304e-05, "loss": 0.4245, "step": 9830 }, { "epoch": 2.453865336658354, "grad_norm": 14.357271194458008, "learning_rate": 1.7547630922693267e-05, "loss": 0.5032, "step": 9840 }, { "epoch": 2.456359102244389, "grad_norm": 6.140951633453369, "learning_rate": 1.7545137157107234e-05, "loss": 0.3132, "step": 9850 }, { "epoch": 2.458852867830424, "grad_norm": 6.1243743896484375, "learning_rate": 1.7542643391521198e-05, "loss": 0.4094, "step": 9860 }, { "epoch": 2.4613466334164587, "grad_norm": 5.133067607879639, "learning_rate": 1.7540149625935165e-05, "loss": 0.4155, "step": 9870 }, { "epoch": 2.4638403990024935, "grad_norm": 5.039419174194336, "learning_rate": 1.753765586034913e-05, "loss": 0.3812, "step": 9880 }, { "epoch": 2.466334164588529, "grad_norm": 4.443388938903809, "learning_rate": 1.7535162094763096e-05, "loss": 0.3958, "step": 9890 }, { "epoch": 2.4688279301745637, "grad_norm": 4.9401140213012695, "learning_rate": 1.753266832917706e-05, "loss": 0.3768, "step": 9900 }, { "epoch": 2.4713216957605986, "grad_norm": 4.5980916023254395, "learning_rate": 1.7530174563591026e-05, "loss": 0.3657, "step": 9910 }, { "epoch": 2.4738154613466334, "grad_norm": 4.5691423416137695, "learning_rate": 1.752768079800499e-05, "loss": 0.5407, "step": 9920 }, { "epoch": 2.4763092269326683, "grad_norm": 10.96796989440918, "learning_rate": 1.7525187032418953e-05, "loss": 0.3909, "step": 9930 }, { "epoch": 2.478802992518703, "grad_norm": 8.20064640045166, "learning_rate": 1.752269326683292e-05, "loss": 0.3977, "step": 9940 }, { "epoch": 2.481296758104738, "grad_norm": 6.133816242218018, "learning_rate": 1.7520199501246884e-05, "loss": 0.4741, "step": 9950 }, { "epoch": 2.483790523690773, "grad_norm": 3.481630325317383, "learning_rate": 1.751770573566085e-05, "loss": 0.359, "step": 9960 }, { "epoch": 2.4862842892768082, "grad_norm": 6.158251762390137, "learning_rate": 1.7515211970074815e-05, "loss": 0.43, "step": 9970 }, { "epoch": 2.488778054862843, "grad_norm": 6.90919828414917, "learning_rate": 1.7512718204488778e-05, "loss": 0.4154, "step": 9980 }, { "epoch": 2.491271820448878, "grad_norm": 6.205567836761475, "learning_rate": 1.7510224438902745e-05, "loss": 0.4585, "step": 9990 }, { "epoch": 2.493765586034913, "grad_norm": 8.581686973571777, "learning_rate": 1.750773067331671e-05, "loss": 0.4748, "step": 10000 }, { "epoch": 2.4962593516209477, "grad_norm": 6.189614295959473, "learning_rate": 1.7505236907730673e-05, "loss": 0.4728, "step": 10010 }, { "epoch": 2.4987531172069826, "grad_norm": 6.721256732940674, "learning_rate": 1.750274314214464e-05, "loss": 0.4508, "step": 10020 }, { "epoch": 2.5012468827930174, "grad_norm": 7.564716815948486, "learning_rate": 1.7500249376558603e-05, "loss": 0.4478, "step": 10030 }, { "epoch": 2.5037406483790523, "grad_norm": 6.12339973449707, "learning_rate": 1.749775561097257e-05, "loss": 0.3435, "step": 10040 }, { "epoch": 2.506234413965087, "grad_norm": 6.196720123291016, "learning_rate": 1.7495261845386534e-05, "loss": 0.4308, "step": 10050 }, { "epoch": 2.508728179551122, "grad_norm": 4.724903106689453, "learning_rate": 1.74927680798005e-05, "loss": 0.4072, "step": 10060 }, { "epoch": 2.511221945137157, "grad_norm": 5.753084659576416, "learning_rate": 1.7490274314214464e-05, "loss": 0.3926, "step": 10070 }, { "epoch": 2.5137157107231918, "grad_norm": 4.251994609832764, "learning_rate": 1.748778054862843e-05, "loss": 0.4337, "step": 10080 }, { "epoch": 2.516209476309227, "grad_norm": 5.593048095703125, "learning_rate": 1.7485286783042395e-05, "loss": 0.3766, "step": 10090 }, { "epoch": 2.518703241895262, "grad_norm": 3.456326723098755, "learning_rate": 1.7482793017456362e-05, "loss": 0.416, "step": 10100 }, { "epoch": 2.521197007481297, "grad_norm": 6.803287982940674, "learning_rate": 1.7480299251870326e-05, "loss": 0.4372, "step": 10110 }, { "epoch": 2.5236907730673317, "grad_norm": 6.90035343170166, "learning_rate": 1.7477805486284293e-05, "loss": 0.3842, "step": 10120 }, { "epoch": 2.5261845386533666, "grad_norm": 7.092517375946045, "learning_rate": 1.7475311720698256e-05, "loss": 0.4817, "step": 10130 }, { "epoch": 2.5286783042394014, "grad_norm": 7.831302642822266, "learning_rate": 1.747281795511222e-05, "loss": 0.3565, "step": 10140 }, { "epoch": 2.5311720698254363, "grad_norm": 5.484219551086426, "learning_rate": 1.7470324189526187e-05, "loss": 0.4546, "step": 10150 }, { "epoch": 2.533665835411471, "grad_norm": 9.50973129272461, "learning_rate": 1.746783042394015e-05, "loss": 0.3583, "step": 10160 }, { "epoch": 2.5361596009975065, "grad_norm": 6.531426429748535, "learning_rate": 1.7465336658354114e-05, "loss": 0.3899, "step": 10170 }, { "epoch": 2.5386533665835413, "grad_norm": 6.3029632568359375, "learning_rate": 1.746284289276808e-05, "loss": 0.4156, "step": 10180 }, { "epoch": 2.541147132169576, "grad_norm": 6.3402252197265625, "learning_rate": 1.7460349127182045e-05, "loss": 0.5982, "step": 10190 }, { "epoch": 2.543640897755611, "grad_norm": 6.966554164886475, "learning_rate": 1.745785536159601e-05, "loss": 0.4151, "step": 10200 }, { "epoch": 2.546134663341646, "grad_norm": 4.213450908660889, "learning_rate": 1.7455361596009975e-05, "loss": 0.4014, "step": 10210 }, { "epoch": 2.548628428927681, "grad_norm": 6.965415000915527, "learning_rate": 1.7452867830423942e-05, "loss": 0.5053, "step": 10220 }, { "epoch": 2.5511221945137157, "grad_norm": 7.399175643920898, "learning_rate": 1.7450374064837906e-05, "loss": 0.4794, "step": 10230 }, { "epoch": 2.5536159600997506, "grad_norm": 7.18726921081543, "learning_rate": 1.7447880299251873e-05, "loss": 0.4237, "step": 10240 }, { "epoch": 2.5561097256857854, "grad_norm": 5.636864185333252, "learning_rate": 1.7445386533665837e-05, "loss": 0.4104, "step": 10250 }, { "epoch": 2.5586034912718203, "grad_norm": 6.8820977210998535, "learning_rate": 1.7442892768079804e-05, "loss": 0.4435, "step": 10260 }, { "epoch": 2.561097256857855, "grad_norm": 9.02462100982666, "learning_rate": 1.7440399002493767e-05, "loss": 0.4672, "step": 10270 }, { "epoch": 2.56359102244389, "grad_norm": 5.171300411224365, "learning_rate": 1.7437905236907734e-05, "loss": 0.4258, "step": 10280 }, { "epoch": 2.5660847880299253, "grad_norm": 11.12934684753418, "learning_rate": 1.7435411471321698e-05, "loss": 0.3967, "step": 10290 }, { "epoch": 2.56857855361596, "grad_norm": 6.156098365783691, "learning_rate": 1.743291770573566e-05, "loss": 0.3973, "step": 10300 }, { "epoch": 2.571072319201995, "grad_norm": 4.736686706542969, "learning_rate": 1.743042394014963e-05, "loss": 0.462, "step": 10310 }, { "epoch": 2.57356608478803, "grad_norm": 8.570772171020508, "learning_rate": 1.7427930174563592e-05, "loss": 0.4494, "step": 10320 }, { "epoch": 2.576059850374065, "grad_norm": 7.194964408874512, "learning_rate": 1.742543640897756e-05, "loss": 0.4351, "step": 10330 }, { "epoch": 2.5785536159600997, "grad_norm": 4.6874918937683105, "learning_rate": 1.7422942643391523e-05, "loss": 0.4341, "step": 10340 }, { "epoch": 2.5810473815461346, "grad_norm": 6.765628337860107, "learning_rate": 1.7420448877805486e-05, "loss": 0.4364, "step": 10350 }, { "epoch": 2.5835411471321694, "grad_norm": 6.419005870819092, "learning_rate": 1.7417955112219453e-05, "loss": 0.4402, "step": 10360 }, { "epoch": 2.5860349127182047, "grad_norm": 6.456396102905273, "learning_rate": 1.7415461346633417e-05, "loss": 0.4289, "step": 10370 }, { "epoch": 2.5885286783042396, "grad_norm": 6.123867034912109, "learning_rate": 1.741296758104738e-05, "loss": 0.4403, "step": 10380 }, { "epoch": 2.5910224438902745, "grad_norm": 5.706097602844238, "learning_rate": 1.7410473815461347e-05, "loss": 0.4294, "step": 10390 }, { "epoch": 2.5935162094763093, "grad_norm": 6.1752543449401855, "learning_rate": 1.740798004987531e-05, "loss": 0.3477, "step": 10400 }, { "epoch": 2.596009975062344, "grad_norm": 5.515731334686279, "learning_rate": 1.7405486284289278e-05, "loss": 0.4414, "step": 10410 }, { "epoch": 2.598503740648379, "grad_norm": 6.281205654144287, "learning_rate": 1.7402992518703242e-05, "loss": 0.4038, "step": 10420 }, { "epoch": 2.600997506234414, "grad_norm": 5.598005294799805, "learning_rate": 1.740049875311721e-05, "loss": 0.3969, "step": 10430 }, { "epoch": 2.603491271820449, "grad_norm": 4.307186126708984, "learning_rate": 1.7398004987531176e-05, "loss": 0.4112, "step": 10440 }, { "epoch": 2.6059850374064837, "grad_norm": 5.855963230133057, "learning_rate": 1.739551122194514e-05, "loss": 0.4137, "step": 10450 }, { "epoch": 2.6084788029925186, "grad_norm": 6.094501972198486, "learning_rate": 1.7393017456359106e-05, "loss": 0.4384, "step": 10460 }, { "epoch": 2.6109725685785534, "grad_norm": 7.285081386566162, "learning_rate": 1.739052369077307e-05, "loss": 0.4306, "step": 10470 }, { "epoch": 2.6134663341645883, "grad_norm": 7.074899196624756, "learning_rate": 1.7388029925187034e-05, "loss": 0.4767, "step": 10480 }, { "epoch": 2.6159600997506236, "grad_norm": 6.743325710296631, "learning_rate": 1.7385536159601e-05, "loss": 0.4486, "step": 10490 }, { "epoch": 2.6184538653366585, "grad_norm": 5.235808372497559, "learning_rate": 1.7383042394014964e-05, "loss": 0.4621, "step": 10500 }, { "epoch": 2.6209476309226933, "grad_norm": 5.081497669219971, "learning_rate": 1.7380548628428928e-05, "loss": 0.4273, "step": 10510 }, { "epoch": 2.623441396508728, "grad_norm": 5.6099443435668945, "learning_rate": 1.7378054862842895e-05, "loss": 0.4259, "step": 10520 }, { "epoch": 2.625935162094763, "grad_norm": 6.103354454040527, "learning_rate": 1.737556109725686e-05, "loss": 0.4568, "step": 10530 }, { "epoch": 2.628428927680798, "grad_norm": 4.791126728057861, "learning_rate": 1.7373067331670825e-05, "loss": 0.4052, "step": 10540 }, { "epoch": 2.630922693266833, "grad_norm": 4.903756618499756, "learning_rate": 1.737057356608479e-05, "loss": 0.4314, "step": 10550 }, { "epoch": 2.6334164588528677, "grad_norm": 5.318016052246094, "learning_rate": 1.7368079800498753e-05, "loss": 0.4311, "step": 10560 }, { "epoch": 2.635910224438903, "grad_norm": 10.588895797729492, "learning_rate": 1.736558603491272e-05, "loss": 0.4499, "step": 10570 }, { "epoch": 2.638403990024938, "grad_norm": 4.9873833656311035, "learning_rate": 1.7363092269326683e-05, "loss": 0.4632, "step": 10580 }, { "epoch": 2.6408977556109727, "grad_norm": 7.042355537414551, "learning_rate": 1.736059850374065e-05, "loss": 0.3997, "step": 10590 }, { "epoch": 2.6433915211970076, "grad_norm": 6.466985702514648, "learning_rate": 1.7358104738154614e-05, "loss": 0.4844, "step": 10600 }, { "epoch": 2.6458852867830425, "grad_norm": 4.656626224517822, "learning_rate": 1.735561097256858e-05, "loss": 0.3745, "step": 10610 }, { "epoch": 2.6483790523690773, "grad_norm": 9.50407886505127, "learning_rate": 1.7353117206982545e-05, "loss": 0.5028, "step": 10620 }, { "epoch": 2.650872817955112, "grad_norm": 5.126711845397949, "learning_rate": 1.735062344139651e-05, "loss": 0.4899, "step": 10630 }, { "epoch": 2.653366583541147, "grad_norm": 10.905644416809082, "learning_rate": 1.7348129675810475e-05, "loss": 0.4768, "step": 10640 }, { "epoch": 2.655860349127182, "grad_norm": 5.238419532775879, "learning_rate": 1.7345635910224442e-05, "loss": 0.3905, "step": 10650 }, { "epoch": 2.658354114713217, "grad_norm": 5.836606979370117, "learning_rate": 1.7343142144638406e-05, "loss": 0.4586, "step": 10660 }, { "epoch": 2.6608478802992517, "grad_norm": 6.579746723175049, "learning_rate": 1.734064837905237e-05, "loss": 0.5069, "step": 10670 }, { "epoch": 2.6633416458852865, "grad_norm": 4.4686279296875, "learning_rate": 1.7338154613466336e-05, "loss": 0.3852, "step": 10680 }, { "epoch": 2.665835411471322, "grad_norm": 6.548900604248047, "learning_rate": 1.73356608478803e-05, "loss": 0.3776, "step": 10690 }, { "epoch": 2.6683291770573567, "grad_norm": 5.394494533538818, "learning_rate": 1.7333167082294267e-05, "loss": 0.3714, "step": 10700 }, { "epoch": 2.6708229426433916, "grad_norm": 4.6077141761779785, "learning_rate": 1.733067331670823e-05, "loss": 0.3991, "step": 10710 }, { "epoch": 2.6733167082294265, "grad_norm": 9.343939781188965, "learning_rate": 1.7328179551122194e-05, "loss": 0.3998, "step": 10720 }, { "epoch": 2.6758104738154613, "grad_norm": 5.4641032218933105, "learning_rate": 1.732568578553616e-05, "loss": 0.3978, "step": 10730 }, { "epoch": 2.678304239401496, "grad_norm": 6.154629230499268, "learning_rate": 1.7323192019950125e-05, "loss": 0.3814, "step": 10740 }, { "epoch": 2.680798004987531, "grad_norm": 8.188713073730469, "learning_rate": 1.7320698254364092e-05, "loss": 0.454, "step": 10750 }, { "epoch": 2.683291770573566, "grad_norm": 6.351066589355469, "learning_rate": 1.7318204488778055e-05, "loss": 0.4385, "step": 10760 }, { "epoch": 2.6857855361596013, "grad_norm": 5.4914069175720215, "learning_rate": 1.731571072319202e-05, "loss": 0.4687, "step": 10770 }, { "epoch": 2.688279301745636, "grad_norm": 10.653285026550293, "learning_rate": 1.7313216957605986e-05, "loss": 0.3617, "step": 10780 }, { "epoch": 2.690773067331671, "grad_norm": 5.647527694702148, "learning_rate": 1.7310723192019953e-05, "loss": 0.4058, "step": 10790 }, { "epoch": 2.693266832917706, "grad_norm": 8.179327964782715, "learning_rate": 1.7308229426433917e-05, "loss": 0.4148, "step": 10800 }, { "epoch": 2.6957605985037407, "grad_norm": 4.55966854095459, "learning_rate": 1.7305735660847884e-05, "loss": 0.3592, "step": 10810 }, { "epoch": 2.6982543640897756, "grad_norm": 7.562872409820557, "learning_rate": 1.7303241895261847e-05, "loss": 0.3956, "step": 10820 }, { "epoch": 2.7007481296758105, "grad_norm": 12.162755012512207, "learning_rate": 1.7300748129675814e-05, "loss": 0.3981, "step": 10830 }, { "epoch": 2.7032418952618453, "grad_norm": 9.855378150939941, "learning_rate": 1.7298254364089778e-05, "loss": 0.4338, "step": 10840 }, { "epoch": 2.70573566084788, "grad_norm": 4.713982582092285, "learning_rate": 1.729576059850374e-05, "loss": 0.3565, "step": 10850 }, { "epoch": 2.708229426433915, "grad_norm": 6.571569919586182, "learning_rate": 1.729326683291771e-05, "loss": 0.3863, "step": 10860 }, { "epoch": 2.71072319201995, "grad_norm": 4.679405212402344, "learning_rate": 1.7290773067331672e-05, "loss": 0.3811, "step": 10870 }, { "epoch": 2.713216957605985, "grad_norm": 6.7202982902526855, "learning_rate": 1.7288279301745636e-05, "loss": 0.3924, "step": 10880 }, { "epoch": 2.71571072319202, "grad_norm": 6.362400531768799, "learning_rate": 1.7285785536159603e-05, "loss": 0.3774, "step": 10890 }, { "epoch": 2.718204488778055, "grad_norm": 5.451074600219727, "learning_rate": 1.7283291770573566e-05, "loss": 0.391, "step": 10900 }, { "epoch": 2.72069825436409, "grad_norm": 4.991129398345947, "learning_rate": 1.7280798004987533e-05, "loss": 0.4007, "step": 10910 }, { "epoch": 2.7231920199501247, "grad_norm": 6.635371208190918, "learning_rate": 1.7278304239401497e-05, "loss": 0.4801, "step": 10920 }, { "epoch": 2.7256857855361596, "grad_norm": 7.7395501136779785, "learning_rate": 1.727581047381546e-05, "loss": 0.4322, "step": 10930 }, { "epoch": 2.7281795511221945, "grad_norm": 13.381205558776855, "learning_rate": 1.7273316708229428e-05, "loss": 0.468, "step": 10940 }, { "epoch": 2.7306733167082293, "grad_norm": 7.249978542327881, "learning_rate": 1.727082294264339e-05, "loss": 0.4083, "step": 10950 }, { "epoch": 2.733167082294264, "grad_norm": 7.9330573081970215, "learning_rate": 1.7268329177057358e-05, "loss": 0.4786, "step": 10960 }, { "epoch": 2.7356608478802995, "grad_norm": 4.235700607299805, "learning_rate": 1.7265835411471322e-05, "loss": 0.4408, "step": 10970 }, { "epoch": 2.7381546134663344, "grad_norm": 10.445588111877441, "learning_rate": 1.726334164588529e-05, "loss": 0.4088, "step": 10980 }, { "epoch": 2.7406483790523692, "grad_norm": 4.6287031173706055, "learning_rate": 1.7260847880299253e-05, "loss": 0.4159, "step": 10990 }, { "epoch": 2.743142144638404, "grad_norm": 6.074006080627441, "learning_rate": 1.725835411471322e-05, "loss": 0.4362, "step": 11000 }, { "epoch": 2.745635910224439, "grad_norm": 6.392845153808594, "learning_rate": 1.7255860349127183e-05, "loss": 0.4406, "step": 11010 }, { "epoch": 2.748129675810474, "grad_norm": 4.722134590148926, "learning_rate": 1.725336658354115e-05, "loss": 0.4392, "step": 11020 }, { "epoch": 2.7506234413965087, "grad_norm": 5.2035698890686035, "learning_rate": 1.7250872817955114e-05, "loss": 0.4408, "step": 11030 }, { "epoch": 2.7531172069825436, "grad_norm": 5.009521007537842, "learning_rate": 1.724837905236908e-05, "loss": 0.3727, "step": 11040 }, { "epoch": 2.7556109725685785, "grad_norm": 8.132833480834961, "learning_rate": 1.7245885286783044e-05, "loss": 0.4183, "step": 11050 }, { "epoch": 2.7581047381546133, "grad_norm": 4.388547897338867, "learning_rate": 1.7243391521197008e-05, "loss": 0.3871, "step": 11060 }, { "epoch": 2.760598503740648, "grad_norm": 5.839278221130371, "learning_rate": 1.7240897755610975e-05, "loss": 0.3665, "step": 11070 }, { "epoch": 2.763092269326683, "grad_norm": 6.53806734085083, "learning_rate": 1.723840399002494e-05, "loss": 0.4331, "step": 11080 }, { "epoch": 2.765586034912718, "grad_norm": 9.082895278930664, "learning_rate": 1.7235910224438902e-05, "loss": 0.379, "step": 11090 }, { "epoch": 2.7680798004987532, "grad_norm": 4.956233978271484, "learning_rate": 1.723341645885287e-05, "loss": 0.4789, "step": 11100 }, { "epoch": 2.770573566084788, "grad_norm": 4.803463935852051, "learning_rate": 1.7230922693266833e-05, "loss": 0.4266, "step": 11110 }, { "epoch": 2.773067331670823, "grad_norm": 5.308382511138916, "learning_rate": 1.72284289276808e-05, "loss": 0.4347, "step": 11120 }, { "epoch": 2.775561097256858, "grad_norm": 4.9697418212890625, "learning_rate": 1.7225935162094763e-05, "loss": 0.4041, "step": 11130 }, { "epoch": 2.7780548628428927, "grad_norm": 5.6570281982421875, "learning_rate": 1.722344139650873e-05, "loss": 0.4769, "step": 11140 }, { "epoch": 2.7805486284289276, "grad_norm": 5.522329330444336, "learning_rate": 1.7220947630922694e-05, "loss": 0.4414, "step": 11150 }, { "epoch": 2.7830423940149625, "grad_norm": 6.803591728210449, "learning_rate": 1.721845386533666e-05, "loss": 0.4085, "step": 11160 }, { "epoch": 2.7855361596009978, "grad_norm": 13.538893699645996, "learning_rate": 1.7215960099750625e-05, "loss": 0.4206, "step": 11170 }, { "epoch": 2.7880299251870326, "grad_norm": 5.024692535400391, "learning_rate": 1.7213466334164592e-05, "loss": 0.4413, "step": 11180 }, { "epoch": 2.7905236907730675, "grad_norm": 6.203422546386719, "learning_rate": 1.7210972568578555e-05, "loss": 0.3727, "step": 11190 }, { "epoch": 2.7930174563591024, "grad_norm": 4.57298469543457, "learning_rate": 1.7208478802992522e-05, "loss": 0.393, "step": 11200 }, { "epoch": 2.7955112219451372, "grad_norm": 5.918140888214111, "learning_rate": 1.7205985037406486e-05, "loss": 0.3846, "step": 11210 }, { "epoch": 2.798004987531172, "grad_norm": 3.546912670135498, "learning_rate": 1.720349127182045e-05, "loss": 0.4229, "step": 11220 }, { "epoch": 2.800498753117207, "grad_norm": 4.094278335571289, "learning_rate": 1.7200997506234417e-05, "loss": 0.3498, "step": 11230 }, { "epoch": 2.802992518703242, "grad_norm": 5.546658039093018, "learning_rate": 1.719850374064838e-05, "loss": 0.4516, "step": 11240 }, { "epoch": 2.8054862842892767, "grad_norm": 3.7501320838928223, "learning_rate": 1.7196009975062347e-05, "loss": 0.366, "step": 11250 }, { "epoch": 2.8079800498753116, "grad_norm": 7.085123062133789, "learning_rate": 1.719351620947631e-05, "loss": 0.3878, "step": 11260 }, { "epoch": 2.8104738154613464, "grad_norm": 5.683539390563965, "learning_rate": 1.7191022443890274e-05, "loss": 0.4439, "step": 11270 }, { "epoch": 2.8129675810473813, "grad_norm": 7.982929229736328, "learning_rate": 1.718852867830424e-05, "loss": 0.426, "step": 11280 }, { "epoch": 2.815461346633416, "grad_norm": 5.907116889953613, "learning_rate": 1.7186034912718205e-05, "loss": 0.4181, "step": 11290 }, { "epoch": 2.8179551122194515, "grad_norm": 6.01471471786499, "learning_rate": 1.718354114713217e-05, "loss": 0.4148, "step": 11300 }, { "epoch": 2.8204488778054864, "grad_norm": 4.76801872253418, "learning_rate": 1.7181047381546136e-05, "loss": 0.3851, "step": 11310 }, { "epoch": 2.8229426433915212, "grad_norm": 5.2211785316467285, "learning_rate": 1.71785536159601e-05, "loss": 0.4307, "step": 11320 }, { "epoch": 2.825436408977556, "grad_norm": 3.697915554046631, "learning_rate": 1.7176059850374066e-05, "loss": 0.4286, "step": 11330 }, { "epoch": 2.827930174563591, "grad_norm": 4.701713562011719, "learning_rate": 1.717356608478803e-05, "loss": 0.4104, "step": 11340 }, { "epoch": 2.830423940149626, "grad_norm": 7.1458964347839355, "learning_rate": 1.7171072319201997e-05, "loss": 0.4007, "step": 11350 }, { "epoch": 2.8329177057356607, "grad_norm": 4.9811272621154785, "learning_rate": 1.716857855361596e-05, "loss": 0.3228, "step": 11360 }, { "epoch": 2.835411471321696, "grad_norm": 7.584052085876465, "learning_rate": 1.7166084788029928e-05, "loss": 0.4144, "step": 11370 }, { "epoch": 2.837905236907731, "grad_norm": 6.256438732147217, "learning_rate": 1.716359102244389e-05, "loss": 0.459, "step": 11380 }, { "epoch": 2.8403990024937658, "grad_norm": 6.128487586975098, "learning_rate": 1.7161097256857858e-05, "loss": 0.4185, "step": 11390 }, { "epoch": 2.8428927680798006, "grad_norm": 5.535672664642334, "learning_rate": 1.7158603491271822e-05, "loss": 0.451, "step": 11400 }, { "epoch": 2.8453865336658355, "grad_norm": 5.653360843658447, "learning_rate": 1.715610972568579e-05, "loss": 0.5224, "step": 11410 }, { "epoch": 2.8478802992518704, "grad_norm": 8.358339309692383, "learning_rate": 1.7153615960099752e-05, "loss": 0.431, "step": 11420 }, { "epoch": 2.8503740648379052, "grad_norm": 5.042673587799072, "learning_rate": 1.7151122194513716e-05, "loss": 0.4588, "step": 11430 }, { "epoch": 2.85286783042394, "grad_norm": 5.333296298980713, "learning_rate": 1.7148628428927683e-05, "loss": 0.3947, "step": 11440 }, { "epoch": 2.855361596009975, "grad_norm": 4.792226791381836, "learning_rate": 1.7146134663341647e-05, "loss": 0.3985, "step": 11450 }, { "epoch": 2.85785536159601, "grad_norm": 6.703670501708984, "learning_rate": 1.7143640897755614e-05, "loss": 0.4351, "step": 11460 }, { "epoch": 2.8603491271820447, "grad_norm": 6.484237194061279, "learning_rate": 1.7141147132169577e-05, "loss": 0.4915, "step": 11470 }, { "epoch": 2.8628428927680796, "grad_norm": 7.9989447593688965, "learning_rate": 1.713865336658354e-05, "loss": 0.3769, "step": 11480 }, { "epoch": 2.8653366583541144, "grad_norm": 5.715572834014893, "learning_rate": 1.7136159600997508e-05, "loss": 0.5566, "step": 11490 }, { "epoch": 2.8678304239401498, "grad_norm": 5.8361945152282715, "learning_rate": 1.713366583541147e-05, "loss": 0.3843, "step": 11500 }, { "epoch": 2.8703241895261846, "grad_norm": 7.280886650085449, "learning_rate": 1.713117206982544e-05, "loss": 0.4158, "step": 11510 }, { "epoch": 2.8728179551122195, "grad_norm": 6.559328556060791, "learning_rate": 1.7128678304239402e-05, "loss": 0.4373, "step": 11520 }, { "epoch": 2.8753117206982544, "grad_norm": 8.497318267822266, "learning_rate": 1.712618453865337e-05, "loss": 0.4206, "step": 11530 }, { "epoch": 2.8778054862842892, "grad_norm": 4.384303092956543, "learning_rate": 1.7123690773067333e-05, "loss": 0.3942, "step": 11540 }, { "epoch": 2.880299251870324, "grad_norm": 6.503232002258301, "learning_rate": 1.71211970074813e-05, "loss": 0.5279, "step": 11550 }, { "epoch": 2.882793017456359, "grad_norm": 6.880457878112793, "learning_rate": 1.7118703241895263e-05, "loss": 0.4544, "step": 11560 }, { "epoch": 2.8852867830423943, "grad_norm": 5.289419174194336, "learning_rate": 1.711620947630923e-05, "loss": 0.414, "step": 11570 }, { "epoch": 2.887780548628429, "grad_norm": 7.233415126800537, "learning_rate": 1.7113715710723194e-05, "loss": 0.4409, "step": 11580 }, { "epoch": 2.890274314214464, "grad_norm": 5.534732818603516, "learning_rate": 1.7111221945137158e-05, "loss": 0.4652, "step": 11590 }, { "epoch": 2.892768079800499, "grad_norm": 6.039757251739502, "learning_rate": 1.7108728179551125e-05, "loss": 0.4492, "step": 11600 }, { "epoch": 2.8952618453865338, "grad_norm": 8.882451057434082, "learning_rate": 1.7106234413965088e-05, "loss": 0.3906, "step": 11610 }, { "epoch": 2.8977556109725686, "grad_norm": 6.894459247589111, "learning_rate": 1.7103740648379055e-05, "loss": 0.4368, "step": 11620 }, { "epoch": 2.9002493765586035, "grad_norm": 4.845560073852539, "learning_rate": 1.710124688279302e-05, "loss": 0.4496, "step": 11630 }, { "epoch": 2.9027431421446384, "grad_norm": 4.972646713256836, "learning_rate": 1.7098753117206982e-05, "loss": 0.4763, "step": 11640 }, { "epoch": 2.9052369077306732, "grad_norm": 5.399471759796143, "learning_rate": 1.709625935162095e-05, "loss": 0.4365, "step": 11650 }, { "epoch": 2.907730673316708, "grad_norm": 5.46916389465332, "learning_rate": 1.7093765586034913e-05, "loss": 0.4315, "step": 11660 }, { "epoch": 2.910224438902743, "grad_norm": 9.235302925109863, "learning_rate": 1.7091271820448877e-05, "loss": 0.4335, "step": 11670 }, { "epoch": 2.912718204488778, "grad_norm": 3.6637065410614014, "learning_rate": 1.7088778054862844e-05, "loss": 0.4421, "step": 11680 }, { "epoch": 2.9152119700748127, "grad_norm": 4.475642681121826, "learning_rate": 1.7086284289276807e-05, "loss": 0.3812, "step": 11690 }, { "epoch": 2.917705735660848, "grad_norm": 4.71533203125, "learning_rate": 1.7083790523690774e-05, "loss": 0.3723, "step": 11700 }, { "epoch": 2.920199501246883, "grad_norm": 9.815446853637695, "learning_rate": 1.7081296758104738e-05, "loss": 0.4587, "step": 11710 }, { "epoch": 2.9226932668329177, "grad_norm": 5.071043968200684, "learning_rate": 1.7078802992518705e-05, "loss": 0.4285, "step": 11720 }, { "epoch": 2.9251870324189526, "grad_norm": 4.660521507263184, "learning_rate": 1.7076309226932672e-05, "loss": 0.4849, "step": 11730 }, { "epoch": 2.9276807980049875, "grad_norm": 4.910328388214111, "learning_rate": 1.7073815461346636e-05, "loss": 0.4813, "step": 11740 }, { "epoch": 2.9301745635910224, "grad_norm": 5.175450801849365, "learning_rate": 1.7071321695760603e-05, "loss": 0.4067, "step": 11750 }, { "epoch": 2.932668329177057, "grad_norm": 5.369016170501709, "learning_rate": 1.7068827930174566e-05, "loss": 0.4048, "step": 11760 }, { "epoch": 2.9351620947630925, "grad_norm": 4.3544745445251465, "learning_rate": 1.706633416458853e-05, "loss": 0.4286, "step": 11770 }, { "epoch": 2.9376558603491274, "grad_norm": 7.268121242523193, "learning_rate": 1.7063840399002497e-05, "loss": 0.3988, "step": 11780 }, { "epoch": 2.9401496259351623, "grad_norm": 5.836695194244385, "learning_rate": 1.706134663341646e-05, "loss": 0.4187, "step": 11790 }, { "epoch": 2.942643391521197, "grad_norm": 4.910001754760742, "learning_rate": 1.7058852867830424e-05, "loss": 0.4232, "step": 11800 }, { "epoch": 2.945137157107232, "grad_norm": 3.5782833099365234, "learning_rate": 1.705635910224439e-05, "loss": 0.5034, "step": 11810 }, { "epoch": 2.947630922693267, "grad_norm": 4.067619323730469, "learning_rate": 1.7053865336658355e-05, "loss": 0.3851, "step": 11820 }, { "epoch": 2.9501246882793017, "grad_norm": 9.091952323913574, "learning_rate": 1.705137157107232e-05, "loss": 0.5536, "step": 11830 }, { "epoch": 2.9526184538653366, "grad_norm": 7.382187366485596, "learning_rate": 1.7048877805486285e-05, "loss": 0.3732, "step": 11840 }, { "epoch": 2.9551122194513715, "grad_norm": 5.961577415466309, "learning_rate": 1.704638403990025e-05, "loss": 0.3849, "step": 11850 }, { "epoch": 2.9576059850374063, "grad_norm": 7.97136116027832, "learning_rate": 1.7043890274314216e-05, "loss": 0.4156, "step": 11860 }, { "epoch": 2.960099750623441, "grad_norm": 14.47574234008789, "learning_rate": 1.704139650872818e-05, "loss": 0.4306, "step": 11870 }, { "epoch": 2.962593516209476, "grad_norm": 6.207291603088379, "learning_rate": 1.7038902743142146e-05, "loss": 0.3749, "step": 11880 }, { "epoch": 2.965087281795511, "grad_norm": 6.804027080535889, "learning_rate": 1.703640897755611e-05, "loss": 0.4808, "step": 11890 }, { "epoch": 2.9675810473815463, "grad_norm": 7.510690689086914, "learning_rate": 1.7033915211970077e-05, "loss": 0.5506, "step": 11900 }, { "epoch": 2.970074812967581, "grad_norm": 10.168417930603027, "learning_rate": 1.703142144638404e-05, "loss": 0.5405, "step": 11910 }, { "epoch": 2.972568578553616, "grad_norm": 5.433992385864258, "learning_rate": 1.7028927680798008e-05, "loss": 0.4205, "step": 11920 }, { "epoch": 2.975062344139651, "grad_norm": 5.19559907913208, "learning_rate": 1.702643391521197e-05, "loss": 0.3868, "step": 11930 }, { "epoch": 2.9775561097256857, "grad_norm": 6.645895004272461, "learning_rate": 1.702394014962594e-05, "loss": 0.4322, "step": 11940 }, { "epoch": 2.9800498753117206, "grad_norm": 5.916768550872803, "learning_rate": 1.7021446384039902e-05, "loss": 0.3847, "step": 11950 }, { "epoch": 2.9825436408977555, "grad_norm": 4.953275203704834, "learning_rate": 1.701895261845387e-05, "loss": 0.4585, "step": 11960 }, { "epoch": 2.985037406483791, "grad_norm": 3.9088947772979736, "learning_rate": 1.7016458852867833e-05, "loss": 0.3925, "step": 11970 }, { "epoch": 2.9875311720698257, "grad_norm": 12.187833786010742, "learning_rate": 1.7013965087281796e-05, "loss": 0.5033, "step": 11980 }, { "epoch": 2.9900249376558605, "grad_norm": 6.370995998382568, "learning_rate": 1.7011471321695763e-05, "loss": 0.3995, "step": 11990 }, { "epoch": 2.9925187032418954, "grad_norm": 5.4293365478515625, "learning_rate": 1.7008977556109727e-05, "loss": 0.4607, "step": 12000 }, { "epoch": 2.9950124688279303, "grad_norm": 10.32349967956543, "learning_rate": 1.700648379052369e-05, "loss": 0.4499, "step": 12010 }, { "epoch": 2.997506234413965, "grad_norm": 4.519608497619629, "learning_rate": 1.7003990024937657e-05, "loss": 0.4193, "step": 12020 }, { "epoch": 3.0, "grad_norm": 4.505052089691162, "learning_rate": 1.700149625935162e-05, "loss": 0.3652, "step": 12030 }, { "epoch": 3.0, "eval_loss": 0.43368199467658997, "eval_runtime": 67.1343, "eval_samples_per_second": 14.94, "eval_steps_per_second": 14.94, "step": 12030 }, { "epoch": 3.002493765586035, "grad_norm": 6.454890727996826, "learning_rate": 1.6999002493765588e-05, "loss": 0.4374, "step": 12040 }, { "epoch": 3.0049875311720697, "grad_norm": 7.071195125579834, "learning_rate": 1.699650872817955e-05, "loss": 0.4221, "step": 12050 }, { "epoch": 3.0074812967581046, "grad_norm": 5.097235202789307, "learning_rate": 1.6994014962593515e-05, "loss": 0.4258, "step": 12060 }, { "epoch": 3.0099750623441395, "grad_norm": 4.156216144561768, "learning_rate": 1.6991521197007482e-05, "loss": 0.3844, "step": 12070 }, { "epoch": 3.0124688279301743, "grad_norm": 4.2227325439453125, "learning_rate": 1.698902743142145e-05, "loss": 0.3943, "step": 12080 }, { "epoch": 3.0149625935162097, "grad_norm": 5.322434425354004, "learning_rate": 1.6986533665835413e-05, "loss": 0.4466, "step": 12090 }, { "epoch": 3.0174563591022445, "grad_norm": 7.377932548522949, "learning_rate": 1.698403990024938e-05, "loss": 0.4465, "step": 12100 }, { "epoch": 3.0199501246882794, "grad_norm": 4.9225263595581055, "learning_rate": 1.6981546134663343e-05, "loss": 0.389, "step": 12110 }, { "epoch": 3.0224438902743143, "grad_norm": 4.286352157592773, "learning_rate": 1.697905236907731e-05, "loss": 0.3605, "step": 12120 }, { "epoch": 3.024937655860349, "grad_norm": 7.271639347076416, "learning_rate": 1.6976558603491274e-05, "loss": 0.4093, "step": 12130 }, { "epoch": 3.027431421446384, "grad_norm": 9.17022705078125, "learning_rate": 1.6974064837905238e-05, "loss": 0.4649, "step": 12140 }, { "epoch": 3.029925187032419, "grad_norm": 11.9248046875, "learning_rate": 1.6971571072319205e-05, "loss": 0.4102, "step": 12150 }, { "epoch": 3.0324189526184537, "grad_norm": 5.974640369415283, "learning_rate": 1.696907730673317e-05, "loss": 0.4345, "step": 12160 }, { "epoch": 3.0349127182044886, "grad_norm": 7.954996109008789, "learning_rate": 1.6966583541147132e-05, "loss": 0.3358, "step": 12170 }, { "epoch": 3.037406483790524, "grad_norm": 7.6409220695495605, "learning_rate": 1.6964339152119704e-05, "loss": 0.4094, "step": 12180 }, { "epoch": 3.039900249376559, "grad_norm": 6.415146827697754, "learning_rate": 1.6961845386533667e-05, "loss": 0.408, "step": 12190 }, { "epoch": 3.0423940149625937, "grad_norm": 6.253342151641846, "learning_rate": 1.6959351620947634e-05, "loss": 0.385, "step": 12200 }, { "epoch": 3.0448877805486285, "grad_norm": 6.785040378570557, "learning_rate": 1.6956857855361598e-05, "loss": 0.3796, "step": 12210 }, { "epoch": 3.0473815461346634, "grad_norm": 5.031459331512451, "learning_rate": 1.6954364089775565e-05, "loss": 0.3762, "step": 12220 }, { "epoch": 3.0498753117206983, "grad_norm": 5.665307521820068, "learning_rate": 1.695187032418953e-05, "loss": 0.4444, "step": 12230 }, { "epoch": 3.052369077306733, "grad_norm": 9.208979606628418, "learning_rate": 1.6949376558603492e-05, "loss": 0.41, "step": 12240 }, { "epoch": 3.054862842892768, "grad_norm": 6.723043441772461, "learning_rate": 1.694688279301746e-05, "loss": 0.378, "step": 12250 }, { "epoch": 3.057356608478803, "grad_norm": 9.06165599822998, "learning_rate": 1.6944389027431423e-05, "loss": 0.3957, "step": 12260 }, { "epoch": 3.0598503740648377, "grad_norm": 6.359433650970459, "learning_rate": 1.6941895261845386e-05, "loss": 0.3632, "step": 12270 }, { "epoch": 3.0623441396508726, "grad_norm": 4.539393901824951, "learning_rate": 1.6939401496259353e-05, "loss": 0.4039, "step": 12280 }, { "epoch": 3.064837905236908, "grad_norm": 7.049586772918701, "learning_rate": 1.6936907730673317e-05, "loss": 0.4395, "step": 12290 }, { "epoch": 3.067331670822943, "grad_norm": 7.9074835777282715, "learning_rate": 1.6934413965087284e-05, "loss": 0.4092, "step": 12300 }, { "epoch": 3.0698254364089776, "grad_norm": 6.730893611907959, "learning_rate": 1.6931920199501248e-05, "loss": 0.4422, "step": 12310 }, { "epoch": 3.0723192019950125, "grad_norm": 4.9922194480896, "learning_rate": 1.692942643391521e-05, "loss": 0.3308, "step": 12320 }, { "epoch": 3.0748129675810474, "grad_norm": 7.608273506164551, "learning_rate": 1.6926932668329178e-05, "loss": 0.363, "step": 12330 }, { "epoch": 3.0773067331670823, "grad_norm": 7.005294322967529, "learning_rate": 1.6924438902743142e-05, "loss": 0.4175, "step": 12340 }, { "epoch": 3.079800498753117, "grad_norm": 7.616174221038818, "learning_rate": 1.692194513715711e-05, "loss": 0.3848, "step": 12350 }, { "epoch": 3.082294264339152, "grad_norm": 5.1623921394348145, "learning_rate": 1.6919451371571072e-05, "loss": 0.4422, "step": 12360 }, { "epoch": 3.084788029925187, "grad_norm": 7.74609899520874, "learning_rate": 1.691695760598504e-05, "loss": 0.3759, "step": 12370 }, { "epoch": 3.087281795511222, "grad_norm": 5.895921230316162, "learning_rate": 1.6914463840399003e-05, "loss": 0.4285, "step": 12380 }, { "epoch": 3.089775561097257, "grad_norm": 10.510361671447754, "learning_rate": 1.691197007481297e-05, "loss": 0.3517, "step": 12390 }, { "epoch": 3.092269326683292, "grad_norm": 6.013721466064453, "learning_rate": 1.6909476309226934e-05, "loss": 0.3976, "step": 12400 }, { "epoch": 3.0947630922693268, "grad_norm": 7.516636371612549, "learning_rate": 1.69069825436409e-05, "loss": 0.4454, "step": 12410 }, { "epoch": 3.0972568578553616, "grad_norm": 4.560348033905029, "learning_rate": 1.6904488778054864e-05, "loss": 0.4975, "step": 12420 }, { "epoch": 3.0997506234413965, "grad_norm": 5.868087291717529, "learning_rate": 1.690199501246883e-05, "loss": 0.4223, "step": 12430 }, { "epoch": 3.1022443890274314, "grad_norm": 5.38186502456665, "learning_rate": 1.6899501246882795e-05, "loss": 0.3995, "step": 12440 }, { "epoch": 3.1047381546134662, "grad_norm": 6.7590436935424805, "learning_rate": 1.689700748129676e-05, "loss": 0.3922, "step": 12450 }, { "epoch": 3.107231920199501, "grad_norm": 6.269115447998047, "learning_rate": 1.6894513715710726e-05, "loss": 0.4457, "step": 12460 }, { "epoch": 3.109725685785536, "grad_norm": 17.736539840698242, "learning_rate": 1.689201995012469e-05, "loss": 0.4161, "step": 12470 }, { "epoch": 3.112219451371571, "grad_norm": 6.468410491943359, "learning_rate": 1.6889526184538653e-05, "loss": 0.4411, "step": 12480 }, { "epoch": 3.114713216957606, "grad_norm": 11.51278305053711, "learning_rate": 1.688703241895262e-05, "loss": 0.4001, "step": 12490 }, { "epoch": 3.117206982543641, "grad_norm": 7.40097188949585, "learning_rate": 1.6884538653366583e-05, "loss": 0.3631, "step": 12500 }, { "epoch": 3.119700748129676, "grad_norm": 4.9506683349609375, "learning_rate": 1.688204488778055e-05, "loss": 0.4302, "step": 12510 }, { "epoch": 3.1221945137157108, "grad_norm": 4.265248775482178, "learning_rate": 1.6879551122194514e-05, "loss": 0.3701, "step": 12520 }, { "epoch": 3.1246882793017456, "grad_norm": 13.634464263916016, "learning_rate": 1.687705735660848e-05, "loss": 0.3986, "step": 12530 }, { "epoch": 3.1271820448877805, "grad_norm": 6.742231369018555, "learning_rate": 1.6874563591022445e-05, "loss": 0.4203, "step": 12540 }, { "epoch": 3.1296758104738154, "grad_norm": 5.28926944732666, "learning_rate": 1.687206982543641e-05, "loss": 0.4416, "step": 12550 }, { "epoch": 3.1321695760598502, "grad_norm": 5.512608051300049, "learning_rate": 1.6869576059850375e-05, "loss": 0.4417, "step": 12560 }, { "epoch": 3.134663341645885, "grad_norm": 6.725086212158203, "learning_rate": 1.6867082294264342e-05, "loss": 0.3786, "step": 12570 }, { "epoch": 3.1371571072319204, "grad_norm": 6.855806827545166, "learning_rate": 1.6864588528678306e-05, "loss": 0.4306, "step": 12580 }, { "epoch": 3.1396508728179553, "grad_norm": 8.445453643798828, "learning_rate": 1.6862094763092273e-05, "loss": 0.4489, "step": 12590 }, { "epoch": 3.14214463840399, "grad_norm": 4.596224784851074, "learning_rate": 1.6859600997506236e-05, "loss": 0.3834, "step": 12600 }, { "epoch": 3.144638403990025, "grad_norm": 6.923195838928223, "learning_rate": 1.68571072319202e-05, "loss": 0.4399, "step": 12610 }, { "epoch": 3.14713216957606, "grad_norm": 3.491319417953491, "learning_rate": 1.6854613466334167e-05, "loss": 0.3944, "step": 12620 }, { "epoch": 3.1496259351620948, "grad_norm": 6.328956127166748, "learning_rate": 1.685211970074813e-05, "loss": 0.3583, "step": 12630 }, { "epoch": 3.1521197007481296, "grad_norm": 6.093251705169678, "learning_rate": 1.6849625935162098e-05, "loss": 0.3517, "step": 12640 }, { "epoch": 3.1546134663341645, "grad_norm": 6.187097549438477, "learning_rate": 1.684713216957606e-05, "loss": 0.4233, "step": 12650 }, { "epoch": 3.1571072319201994, "grad_norm": 7.890842437744141, "learning_rate": 1.6844638403990025e-05, "loss": 0.3321, "step": 12660 }, { "epoch": 3.1596009975062342, "grad_norm": 6.837558746337891, "learning_rate": 1.6842144638403992e-05, "loss": 0.4426, "step": 12670 }, { "epoch": 3.162094763092269, "grad_norm": 5.910398960113525, "learning_rate": 1.6839650872817956e-05, "loss": 0.4121, "step": 12680 }, { "epoch": 3.1645885286783044, "grad_norm": 6.540918827056885, "learning_rate": 1.683715710723192e-05, "loss": 0.4617, "step": 12690 }, { "epoch": 3.1670822942643393, "grad_norm": 7.381392955780029, "learning_rate": 1.6834663341645886e-05, "loss": 0.4303, "step": 12700 }, { "epoch": 3.169576059850374, "grad_norm": 7.120436668395996, "learning_rate": 1.683216957605985e-05, "loss": 0.4569, "step": 12710 }, { "epoch": 3.172069825436409, "grad_norm": 4.337961196899414, "learning_rate": 1.6829675810473817e-05, "loss": 0.3563, "step": 12720 }, { "epoch": 3.174563591022444, "grad_norm": 5.855359077453613, "learning_rate": 1.682718204488778e-05, "loss": 0.5174, "step": 12730 }, { "epoch": 3.1770573566084788, "grad_norm": 5.234665393829346, "learning_rate": 1.6824688279301747e-05, "loss": 0.3649, "step": 12740 }, { "epoch": 3.1795511221945136, "grad_norm": 7.155577182769775, "learning_rate": 1.6822194513715714e-05, "loss": 0.4493, "step": 12750 }, { "epoch": 3.1820448877805485, "grad_norm": 6.0398406982421875, "learning_rate": 1.6819700748129678e-05, "loss": 0.337, "step": 12760 }, { "epoch": 3.1845386533665834, "grad_norm": 6.6076250076293945, "learning_rate": 1.681720698254364e-05, "loss": 0.3503, "step": 12770 }, { "epoch": 3.1870324189526187, "grad_norm": 6.856815814971924, "learning_rate": 1.681471321695761e-05, "loss": 0.4076, "step": 12780 }, { "epoch": 3.1895261845386536, "grad_norm": 9.046415328979492, "learning_rate": 1.6812219451371572e-05, "loss": 0.4621, "step": 12790 }, { "epoch": 3.1920199501246884, "grad_norm": 6.603618144989014, "learning_rate": 1.680972568578554e-05, "loss": 0.4491, "step": 12800 }, { "epoch": 3.1945137157107233, "grad_norm": 8.234539985656738, "learning_rate": 1.6807231920199503e-05, "loss": 0.355, "step": 12810 }, { "epoch": 3.197007481296758, "grad_norm": 6.2086005210876465, "learning_rate": 1.6804738154613467e-05, "loss": 0.3679, "step": 12820 }, { "epoch": 3.199501246882793, "grad_norm": 5.592929363250732, "learning_rate": 1.6802244389027434e-05, "loss": 0.3855, "step": 12830 }, { "epoch": 3.201995012468828, "grad_norm": 5.385773658752441, "learning_rate": 1.6799750623441397e-05, "loss": 0.4592, "step": 12840 }, { "epoch": 3.2044887780548628, "grad_norm": 4.771883010864258, "learning_rate": 1.679725685785536e-05, "loss": 0.3648, "step": 12850 }, { "epoch": 3.2069825436408976, "grad_norm": 7.125466823577881, "learning_rate": 1.6794763092269328e-05, "loss": 0.4146, "step": 12860 }, { "epoch": 3.2094763092269325, "grad_norm": 5.639003753662109, "learning_rate": 1.679226932668329e-05, "loss": 0.4068, "step": 12870 }, { "epoch": 3.2119700748129674, "grad_norm": 4.13544225692749, "learning_rate": 1.678977556109726e-05, "loss": 0.3519, "step": 12880 }, { "epoch": 3.2144638403990027, "grad_norm": 5.362157344818115, "learning_rate": 1.6787281795511222e-05, "loss": 0.4477, "step": 12890 }, { "epoch": 3.2169576059850375, "grad_norm": 5.619980335235596, "learning_rate": 1.678478802992519e-05, "loss": 0.416, "step": 12900 }, { "epoch": 3.2194513715710724, "grad_norm": 6.641584396362305, "learning_rate": 1.6782294264339153e-05, "loss": 0.3819, "step": 12910 }, { "epoch": 3.2219451371571073, "grad_norm": 6.697845935821533, "learning_rate": 1.677980049875312e-05, "loss": 0.3809, "step": 12920 }, { "epoch": 3.224438902743142, "grad_norm": 9.737817764282227, "learning_rate": 1.6777306733167083e-05, "loss": 0.4195, "step": 12930 }, { "epoch": 3.226932668329177, "grad_norm": 3.7950658798217773, "learning_rate": 1.677481296758105e-05, "loss": 0.4657, "step": 12940 }, { "epoch": 3.229426433915212, "grad_norm": 6.923771858215332, "learning_rate": 1.6772319201995014e-05, "loss": 0.3954, "step": 12950 }, { "epoch": 3.2319201995012468, "grad_norm": 8.065861701965332, "learning_rate": 1.676982543640898e-05, "loss": 0.4534, "step": 12960 }, { "epoch": 3.2344139650872816, "grad_norm": 7.0419769287109375, "learning_rate": 1.6767331670822944e-05, "loss": 0.3881, "step": 12970 }, { "epoch": 3.236907730673317, "grad_norm": 5.294747352600098, "learning_rate": 1.6764837905236908e-05, "loss": 0.4064, "step": 12980 }, { "epoch": 3.239401496259352, "grad_norm": 4.490688800811768, "learning_rate": 1.6762344139650875e-05, "loss": 0.4715, "step": 12990 }, { "epoch": 3.2418952618453867, "grad_norm": 6.766694068908691, "learning_rate": 1.675985037406484e-05, "loss": 0.3785, "step": 13000 }, { "epoch": 3.2443890274314215, "grad_norm": 4.411302089691162, "learning_rate": 1.6757356608478806e-05, "loss": 0.4342, "step": 13010 }, { "epoch": 3.2468827930174564, "grad_norm": 5.128880500793457, "learning_rate": 1.675486284289277e-05, "loss": 0.4306, "step": 13020 }, { "epoch": 3.2493765586034913, "grad_norm": 8.443227767944336, "learning_rate": 1.6752369077306733e-05, "loss": 0.3925, "step": 13030 }, { "epoch": 3.251870324189526, "grad_norm": 5.713132858276367, "learning_rate": 1.67498753117207e-05, "loss": 0.377, "step": 13040 }, { "epoch": 3.254364089775561, "grad_norm": 6.96317720413208, "learning_rate": 1.6747381546134664e-05, "loss": 0.3748, "step": 13050 }, { "epoch": 3.256857855361596, "grad_norm": 5.586208343505859, "learning_rate": 1.6744887780548627e-05, "loss": 0.3898, "step": 13060 }, { "epoch": 3.2593516209476308, "grad_norm": 8.379228591918945, "learning_rate": 1.6742394014962594e-05, "loss": 0.4784, "step": 13070 }, { "epoch": 3.2618453865336656, "grad_norm": 8.025406837463379, "learning_rate": 1.6739900249376558e-05, "loss": 0.3903, "step": 13080 }, { "epoch": 3.264339152119701, "grad_norm": 8.072983741760254, "learning_rate": 1.6737406483790525e-05, "loss": 0.4254, "step": 13090 }, { "epoch": 3.266832917705736, "grad_norm": 5.819389343261719, "learning_rate": 1.6734912718204492e-05, "loss": 0.4922, "step": 13100 }, { "epoch": 3.2693266832917707, "grad_norm": 6.674468040466309, "learning_rate": 1.6732418952618455e-05, "loss": 0.438, "step": 13110 }, { "epoch": 3.2718204488778055, "grad_norm": 5.349725246429443, "learning_rate": 1.6729925187032422e-05, "loss": 0.3625, "step": 13120 }, { "epoch": 3.2743142144638404, "grad_norm": 5.366141319274902, "learning_rate": 1.6727431421446386e-05, "loss": 0.3778, "step": 13130 }, { "epoch": 3.2768079800498753, "grad_norm": 4.81095552444458, "learning_rate": 1.6724937655860353e-05, "loss": 0.3809, "step": 13140 }, { "epoch": 3.27930174563591, "grad_norm": 5.11724328994751, "learning_rate": 1.6722443890274317e-05, "loss": 0.3866, "step": 13150 }, { "epoch": 3.281795511221945, "grad_norm": 4.886108875274658, "learning_rate": 1.671995012468828e-05, "loss": 0.3952, "step": 13160 }, { "epoch": 3.28428927680798, "grad_norm": 6.775887966156006, "learning_rate": 1.6717456359102247e-05, "loss": 0.4382, "step": 13170 }, { "epoch": 3.286783042394015, "grad_norm": 4.1350579261779785, "learning_rate": 1.671496259351621e-05, "loss": 0.3817, "step": 13180 }, { "epoch": 3.28927680798005, "grad_norm": 10.852806091308594, "learning_rate": 1.6712468827930174e-05, "loss": 0.4278, "step": 13190 }, { "epoch": 3.291770573566085, "grad_norm": 4.8792877197265625, "learning_rate": 1.670997506234414e-05, "loss": 0.3502, "step": 13200 }, { "epoch": 3.29426433915212, "grad_norm": 6.0365400314331055, "learning_rate": 1.6707481296758105e-05, "loss": 0.3906, "step": 13210 }, { "epoch": 3.2967581047381547, "grad_norm": 8.025360107421875, "learning_rate": 1.6704987531172072e-05, "loss": 0.4406, "step": 13220 }, { "epoch": 3.2992518703241895, "grad_norm": 7.672976970672607, "learning_rate": 1.6702493765586036e-05, "loss": 0.4107, "step": 13230 }, { "epoch": 3.3017456359102244, "grad_norm": 4.216901779174805, "learning_rate": 1.67e-05, "loss": 0.3429, "step": 13240 }, { "epoch": 3.3042394014962593, "grad_norm": 5.8930583000183105, "learning_rate": 1.6697506234413966e-05, "loss": 0.4131, "step": 13250 }, { "epoch": 3.306733167082294, "grad_norm": 6.364641189575195, "learning_rate": 1.669501246882793e-05, "loss": 0.4126, "step": 13260 }, { "epoch": 3.309226932668329, "grad_norm": 4.867658615112305, "learning_rate": 1.6692518703241897e-05, "loss": 0.3936, "step": 13270 }, { "epoch": 3.311720698254364, "grad_norm": 4.648925304412842, "learning_rate": 1.669002493765586e-05, "loss": 0.356, "step": 13280 }, { "epoch": 3.314214463840399, "grad_norm": 5.959649562835693, "learning_rate": 1.6687531172069828e-05, "loss": 0.4527, "step": 13290 }, { "epoch": 3.316708229426434, "grad_norm": 5.857553005218506, "learning_rate": 1.668503740648379e-05, "loss": 0.3464, "step": 13300 }, { "epoch": 3.319201995012469, "grad_norm": 7.903529167175293, "learning_rate": 1.6682543640897758e-05, "loss": 0.3983, "step": 13310 }, { "epoch": 3.321695760598504, "grad_norm": 7.403318881988525, "learning_rate": 1.6680049875311722e-05, "loss": 0.39, "step": 13320 }, { "epoch": 3.3241895261845387, "grad_norm": 4.094770431518555, "learning_rate": 1.667755610972569e-05, "loss": 0.3534, "step": 13330 }, { "epoch": 3.3266832917705735, "grad_norm": 6.073254585266113, "learning_rate": 1.6675062344139652e-05, "loss": 0.4348, "step": 13340 }, { "epoch": 3.3291770573566084, "grad_norm": 5.4333271980285645, "learning_rate": 1.667256857855362e-05, "loss": 0.4438, "step": 13350 }, { "epoch": 3.3316708229426433, "grad_norm": 8.86145305633545, "learning_rate": 1.6670074812967583e-05, "loss": 0.4223, "step": 13360 }, { "epoch": 3.334164588528678, "grad_norm": 4.68287467956543, "learning_rate": 1.6667581047381547e-05, "loss": 0.419, "step": 13370 }, { "epoch": 3.3366583541147135, "grad_norm": 5.4317827224731445, "learning_rate": 1.6665087281795514e-05, "loss": 0.4438, "step": 13380 }, { "epoch": 3.3391521197007483, "grad_norm": 6.64384651184082, "learning_rate": 1.6662842892768082e-05, "loss": 0.4448, "step": 13390 }, { "epoch": 3.341645885286783, "grad_norm": 6.038212776184082, "learning_rate": 1.6660349127182046e-05, "loss": 0.4357, "step": 13400 }, { "epoch": 3.344139650872818, "grad_norm": 7.981716156005859, "learning_rate": 1.6657855361596013e-05, "loss": 0.4141, "step": 13410 }, { "epoch": 3.346633416458853, "grad_norm": 6.937891006469727, "learning_rate": 1.6655361596009976e-05, "loss": 0.426, "step": 13420 }, { "epoch": 3.349127182044888, "grad_norm": 8.279930114746094, "learning_rate": 1.6652867830423943e-05, "loss": 0.3797, "step": 13430 }, { "epoch": 3.3516209476309227, "grad_norm": 6.905846118927002, "learning_rate": 1.6650374064837907e-05, "loss": 0.3517, "step": 13440 }, { "epoch": 3.3541147132169575, "grad_norm": 4.052714824676514, "learning_rate": 1.664788029925187e-05, "loss": 0.3177, "step": 13450 }, { "epoch": 3.3566084788029924, "grad_norm": 6.537426948547363, "learning_rate": 1.6645386533665837e-05, "loss": 0.4482, "step": 13460 }, { "epoch": 3.3591022443890273, "grad_norm": 5.7491455078125, "learning_rate": 1.66428927680798e-05, "loss": 0.385, "step": 13470 }, { "epoch": 3.361596009975062, "grad_norm": 7.766640663146973, "learning_rate": 1.6640399002493768e-05, "loss": 0.3945, "step": 13480 }, { "epoch": 3.3640897755610975, "grad_norm": 5.237179279327393, "learning_rate": 1.663790523690773e-05, "loss": 0.4368, "step": 13490 }, { "epoch": 3.3665835411471323, "grad_norm": 7.344296932220459, "learning_rate": 1.6635411471321695e-05, "loss": 0.3749, "step": 13500 }, { "epoch": 3.369077306733167, "grad_norm": 5.677723407745361, "learning_rate": 1.6632917705735662e-05, "loss": 0.41, "step": 13510 }, { "epoch": 3.371571072319202, "grad_norm": 6.3155412673950195, "learning_rate": 1.6630423940149626e-05, "loss": 0.4143, "step": 13520 }, { "epoch": 3.374064837905237, "grad_norm": 6.052978038787842, "learning_rate": 1.6627930174563593e-05, "loss": 0.4444, "step": 13530 }, { "epoch": 3.376558603491272, "grad_norm": 5.203304767608643, "learning_rate": 1.6625436408977557e-05, "loss": 0.3819, "step": 13540 }, { "epoch": 3.3790523690773067, "grad_norm": 7.154673099517822, "learning_rate": 1.6622942643391524e-05, "loss": 0.421, "step": 13550 }, { "epoch": 3.3815461346633415, "grad_norm": 6.324401378631592, "learning_rate": 1.6620448877805487e-05, "loss": 0.4142, "step": 13560 }, { "epoch": 3.3840399002493764, "grad_norm": 5.293837070465088, "learning_rate": 1.6617955112219454e-05, "loss": 0.341, "step": 13570 }, { "epoch": 3.3865336658354117, "grad_norm": 6.022460460662842, "learning_rate": 1.6615461346633418e-05, "loss": 0.4016, "step": 13580 }, { "epoch": 3.3890274314214466, "grad_norm": 3.799043655395508, "learning_rate": 1.6612967581047385e-05, "loss": 0.4037, "step": 13590 }, { "epoch": 3.3915211970074814, "grad_norm": 5.069158554077148, "learning_rate": 1.661047381546135e-05, "loss": 0.3429, "step": 13600 }, { "epoch": 3.3940149625935163, "grad_norm": 6.114314079284668, "learning_rate": 1.6607980049875315e-05, "loss": 0.4867, "step": 13610 }, { "epoch": 3.396508728179551, "grad_norm": 5.830687046051025, "learning_rate": 1.660548628428928e-05, "loss": 0.3991, "step": 13620 }, { "epoch": 3.399002493765586, "grad_norm": 10.9564847946167, "learning_rate": 1.6602992518703243e-05, "loss": 0.398, "step": 13630 }, { "epoch": 3.401496259351621, "grad_norm": 6.169339179992676, "learning_rate": 1.660049875311721e-05, "loss": 0.3501, "step": 13640 }, { "epoch": 3.403990024937656, "grad_norm": 4.284657001495361, "learning_rate": 1.6598004987531173e-05, "loss": 0.5303, "step": 13650 }, { "epoch": 3.4064837905236907, "grad_norm": 5.175209999084473, "learning_rate": 1.6595511221945137e-05, "loss": 0.4677, "step": 13660 }, { "epoch": 3.4089775561097255, "grad_norm": 4.939456462860107, "learning_rate": 1.6593017456359104e-05, "loss": 0.5759, "step": 13670 }, { "epoch": 3.4114713216957604, "grad_norm": 6.589729309082031, "learning_rate": 1.6590523690773067e-05, "loss": 0.374, "step": 13680 }, { "epoch": 3.4139650872817953, "grad_norm": 5.515322685241699, "learning_rate": 1.6588029925187034e-05, "loss": 0.3992, "step": 13690 }, { "epoch": 3.4164588528678306, "grad_norm": 7.204263687133789, "learning_rate": 1.6585536159600998e-05, "loss": 0.443, "step": 13700 }, { "epoch": 3.4189526184538654, "grad_norm": 5.2953877449035645, "learning_rate": 1.6583042394014962e-05, "loss": 0.3856, "step": 13710 }, { "epoch": 3.4214463840399003, "grad_norm": 5.203874111175537, "learning_rate": 1.658054862842893e-05, "loss": 0.45, "step": 13720 }, { "epoch": 3.423940149625935, "grad_norm": 5.980413436889648, "learning_rate": 1.6578054862842892e-05, "loss": 0.4115, "step": 13730 }, { "epoch": 3.42643391521197, "grad_norm": 4.752535343170166, "learning_rate": 1.657556109725686e-05, "loss": 0.4392, "step": 13740 }, { "epoch": 3.428927680798005, "grad_norm": 5.2310919761657715, "learning_rate": 1.6573067331670823e-05, "loss": 0.361, "step": 13750 }, { "epoch": 3.43142144638404, "grad_norm": 9.963449478149414, "learning_rate": 1.657057356608479e-05, "loss": 0.5536, "step": 13760 }, { "epoch": 3.4339152119700747, "grad_norm": 5.837104320526123, "learning_rate": 1.6568079800498757e-05, "loss": 0.3722, "step": 13770 }, { "epoch": 3.43640897755611, "grad_norm": 7.110990524291992, "learning_rate": 1.656558603491272e-05, "loss": 0.4229, "step": 13780 }, { "epoch": 3.438902743142145, "grad_norm": 5.253129005432129, "learning_rate": 1.6563092269326684e-05, "loss": 0.3619, "step": 13790 }, { "epoch": 3.4413965087281797, "grad_norm": 7.7098517417907715, "learning_rate": 1.656059850374065e-05, "loss": 0.452, "step": 13800 }, { "epoch": 3.4438902743142146, "grad_norm": 6.151096820831299, "learning_rate": 1.6558104738154615e-05, "loss": 0.4055, "step": 13810 }, { "epoch": 3.4463840399002494, "grad_norm": 6.894789218902588, "learning_rate": 1.6555610972568582e-05, "loss": 0.3736, "step": 13820 }, { "epoch": 3.4488778054862843, "grad_norm": 5.148143291473389, "learning_rate": 1.6553117206982545e-05, "loss": 0.462, "step": 13830 }, { "epoch": 3.451371571072319, "grad_norm": 5.808004856109619, "learning_rate": 1.655062344139651e-05, "loss": 0.3789, "step": 13840 }, { "epoch": 3.453865336658354, "grad_norm": 4.1240339279174805, "learning_rate": 1.6548129675810476e-05, "loss": 0.4021, "step": 13850 }, { "epoch": 3.456359102244389, "grad_norm": 4.346762180328369, "learning_rate": 1.654563591022444e-05, "loss": 0.3446, "step": 13860 }, { "epoch": 3.458852867830424, "grad_norm": 7.146320819854736, "learning_rate": 1.6543142144638403e-05, "loss": 0.395, "step": 13870 }, { "epoch": 3.4613466334164587, "grad_norm": 5.127925395965576, "learning_rate": 1.654064837905237e-05, "loss": 0.3873, "step": 13880 }, { "epoch": 3.4638403990024935, "grad_norm": 6.493956565856934, "learning_rate": 1.6538154613466334e-05, "loss": 0.4049, "step": 13890 }, { "epoch": 3.466334164588529, "grad_norm": 7.458160400390625, "learning_rate": 1.65356608478803e-05, "loss": 0.3721, "step": 13900 }, { "epoch": 3.4688279301745637, "grad_norm": 6.495090007781982, "learning_rate": 1.6533167082294265e-05, "loss": 0.4702, "step": 13910 }, { "epoch": 3.4713216957605986, "grad_norm": 6.47518253326416, "learning_rate": 1.653067331670823e-05, "loss": 0.4566, "step": 13920 }, { "epoch": 3.4738154613466334, "grad_norm": 5.553424835205078, "learning_rate": 1.6528179551122195e-05, "loss": 0.3799, "step": 13930 }, { "epoch": 3.4763092269326683, "grad_norm": 5.419945240020752, "learning_rate": 1.6525685785536162e-05, "loss": 0.4483, "step": 13940 }, { "epoch": 3.478802992518703, "grad_norm": 5.477248191833496, "learning_rate": 1.6523192019950126e-05, "loss": 0.326, "step": 13950 }, { "epoch": 3.481296758104738, "grad_norm": 7.121800899505615, "learning_rate": 1.6520698254364093e-05, "loss": 0.3673, "step": 13960 }, { "epoch": 3.483790523690773, "grad_norm": 6.306591510772705, "learning_rate": 1.6518204488778056e-05, "loss": 0.4262, "step": 13970 }, { "epoch": 3.4862842892768082, "grad_norm": 8.00887393951416, "learning_rate": 1.6515710723192023e-05, "loss": 0.4345, "step": 13980 }, { "epoch": 3.488778054862843, "grad_norm": 5.474499702453613, "learning_rate": 1.6513216957605987e-05, "loss": 0.4994, "step": 13990 }, { "epoch": 3.491271820448878, "grad_norm": 6.708861827850342, "learning_rate": 1.651072319201995e-05, "loss": 0.3884, "step": 14000 }, { "epoch": 3.493765586034913, "grad_norm": 4.315401077270508, "learning_rate": 1.6508229426433918e-05, "loss": 0.3686, "step": 14010 }, { "epoch": 3.4962593516209477, "grad_norm": 5.418401718139648, "learning_rate": 1.650573566084788e-05, "loss": 0.41, "step": 14020 }, { "epoch": 3.4987531172069826, "grad_norm": 5.228391647338867, "learning_rate": 1.6503241895261848e-05, "loss": 0.4117, "step": 14030 }, { "epoch": 3.5012468827930174, "grad_norm": 5.129116535186768, "learning_rate": 1.6500748129675812e-05, "loss": 0.3595, "step": 14040 }, { "epoch": 3.5037406483790523, "grad_norm": 3.8561089038848877, "learning_rate": 1.6498254364089775e-05, "loss": 0.3515, "step": 14050 }, { "epoch": 3.506234413965087, "grad_norm": 7.8990278244018555, "learning_rate": 1.6495760598503742e-05, "loss": 0.3853, "step": 14060 }, { "epoch": 3.508728179551122, "grad_norm": 8.074807167053223, "learning_rate": 1.6493266832917706e-05, "loss": 0.4222, "step": 14070 }, { "epoch": 3.511221945137157, "grad_norm": 7.334726333618164, "learning_rate": 1.649077306733167e-05, "loss": 0.4673, "step": 14080 }, { "epoch": 3.5137157107231918, "grad_norm": 5.140750885009766, "learning_rate": 1.6488279301745637e-05, "loss": 0.3758, "step": 14090 }, { "epoch": 3.516209476309227, "grad_norm": 5.326815605163574, "learning_rate": 1.64857855361596e-05, "loss": 0.3841, "step": 14100 }, { "epoch": 3.518703241895262, "grad_norm": 4.542361259460449, "learning_rate": 1.6483291770573567e-05, "loss": 0.4115, "step": 14110 }, { "epoch": 3.521197007481297, "grad_norm": 9.180794715881348, "learning_rate": 1.6480798004987534e-05, "loss": 0.3703, "step": 14120 }, { "epoch": 3.5236907730673317, "grad_norm": 6.60845422744751, "learning_rate": 1.6478304239401498e-05, "loss": 0.396, "step": 14130 }, { "epoch": 3.5261845386533666, "grad_norm": 13.184927940368652, "learning_rate": 1.6475810473815465e-05, "loss": 0.444, "step": 14140 }, { "epoch": 3.5286783042394014, "grad_norm": 8.303811073303223, "learning_rate": 1.647331670822943e-05, "loss": 0.3708, "step": 14150 }, { "epoch": 3.5311720698254363, "grad_norm": 7.675179958343506, "learning_rate": 1.6470822942643392e-05, "loss": 0.4134, "step": 14160 }, { "epoch": 3.533665835411471, "grad_norm": 7.668274879455566, "learning_rate": 1.646832917705736e-05, "loss": 0.3957, "step": 14170 }, { "epoch": 3.5361596009975065, "grad_norm": 19.888633728027344, "learning_rate": 1.6465835411471323e-05, "loss": 0.5228, "step": 14180 }, { "epoch": 3.5386533665835413, "grad_norm": 6.9068474769592285, "learning_rate": 1.646334164588529e-05, "loss": 0.381, "step": 14190 }, { "epoch": 3.541147132169576, "grad_norm": 9.011292457580566, "learning_rate": 1.6460847880299253e-05, "loss": 0.4341, "step": 14200 }, { "epoch": 3.543640897755611, "grad_norm": 7.827251434326172, "learning_rate": 1.6458354114713217e-05, "loss": 0.4266, "step": 14210 }, { "epoch": 3.546134663341646, "grad_norm": 7.010087966918945, "learning_rate": 1.6455860349127184e-05, "loss": 0.4038, "step": 14220 }, { "epoch": 3.548628428927681, "grad_norm": 7.390041828155518, "learning_rate": 1.6453366583541148e-05, "loss": 0.4247, "step": 14230 }, { "epoch": 3.5511221945137157, "grad_norm": 7.997394561767578, "learning_rate": 1.645087281795511e-05, "loss": 0.4505, "step": 14240 }, { "epoch": 3.5536159600997506, "grad_norm": 7.814496040344238, "learning_rate": 1.6448379052369078e-05, "loss": 0.4635, "step": 14250 }, { "epoch": 3.5561097256857854, "grad_norm": 4.237307548522949, "learning_rate": 1.6445885286783042e-05, "loss": 0.3517, "step": 14260 }, { "epoch": 3.5586034912718203, "grad_norm": 5.595398426055908, "learning_rate": 1.644339152119701e-05, "loss": 0.3995, "step": 14270 }, { "epoch": 3.561097256857855, "grad_norm": 6.063884258270264, "learning_rate": 1.6440897755610972e-05, "loss": 0.3919, "step": 14280 }, { "epoch": 3.56359102244389, "grad_norm": 16.324724197387695, "learning_rate": 1.643840399002494e-05, "loss": 0.4988, "step": 14290 }, { "epoch": 3.5660847880299253, "grad_norm": 5.327268600463867, "learning_rate": 1.6435910224438903e-05, "loss": 0.4688, "step": 14300 }, { "epoch": 3.56857855361596, "grad_norm": 9.353160858154297, "learning_rate": 1.643341645885287e-05, "loss": 0.56, "step": 14310 }, { "epoch": 3.571072319201995, "grad_norm": 4.720142364501953, "learning_rate": 1.6430922693266834e-05, "loss": 0.4159, "step": 14320 }, { "epoch": 3.57356608478803, "grad_norm": 4.005600452423096, "learning_rate": 1.64284289276808e-05, "loss": 0.3677, "step": 14330 }, { "epoch": 3.576059850374065, "grad_norm": 9.215933799743652, "learning_rate": 1.6425935162094764e-05, "loss": 0.432, "step": 14340 }, { "epoch": 3.5785536159600997, "grad_norm": 4.696425437927246, "learning_rate": 1.642344139650873e-05, "loss": 0.3727, "step": 14350 }, { "epoch": 3.5810473815461346, "grad_norm": 4.839138984680176, "learning_rate": 1.6420947630922695e-05, "loss": 0.4013, "step": 14360 }, { "epoch": 3.5835411471321694, "grad_norm": 7.069340705871582, "learning_rate": 1.641845386533666e-05, "loss": 0.3633, "step": 14370 }, { "epoch": 3.5860349127182047, "grad_norm": 5.56005334854126, "learning_rate": 1.6415960099750626e-05, "loss": 0.3376, "step": 14380 }, { "epoch": 3.5885286783042396, "grad_norm": 5.112822532653809, "learning_rate": 1.641346633416459e-05, "loss": 0.4179, "step": 14390 }, { "epoch": 3.5910224438902745, "grad_norm": 6.113282203674316, "learning_rate": 1.6410972568578556e-05, "loss": 0.4436, "step": 14400 }, { "epoch": 3.5935162094763093, "grad_norm": 4.90158748626709, "learning_rate": 1.640847880299252e-05, "loss": 0.4283, "step": 14410 }, { "epoch": 3.596009975062344, "grad_norm": 6.325465679168701, "learning_rate": 1.6405985037406483e-05, "loss": 0.4857, "step": 14420 }, { "epoch": 3.598503740648379, "grad_norm": 8.354521751403809, "learning_rate": 1.640349127182045e-05, "loss": 0.4173, "step": 14430 }, { "epoch": 3.600997506234414, "grad_norm": 5.772945880889893, "learning_rate": 1.6400997506234414e-05, "loss": 0.4367, "step": 14440 }, { "epoch": 3.603491271820449, "grad_norm": 5.8793864250183105, "learning_rate": 1.6398503740648378e-05, "loss": 0.351, "step": 14450 }, { "epoch": 3.6059850374064837, "grad_norm": 4.758648872375488, "learning_rate": 1.6396009975062345e-05, "loss": 0.4236, "step": 14460 }, { "epoch": 3.6084788029925186, "grad_norm": 9.100720405578613, "learning_rate": 1.639351620947631e-05, "loss": 0.3892, "step": 14470 }, { "epoch": 3.6109725685785534, "grad_norm": 5.36121129989624, "learning_rate": 1.6391022443890275e-05, "loss": 0.4256, "step": 14480 }, { "epoch": 3.6134663341645883, "grad_norm": 6.1626505851745605, "learning_rate": 1.6388528678304242e-05, "loss": 0.4083, "step": 14490 }, { "epoch": 3.6159600997506236, "grad_norm": 4.670628070831299, "learning_rate": 1.6386034912718206e-05, "loss": 0.4056, "step": 14500 }, { "epoch": 3.6184538653366585, "grad_norm": 5.522136211395264, "learning_rate": 1.6383541147132173e-05, "loss": 0.4228, "step": 14510 }, { "epoch": 3.6209476309226933, "grad_norm": 4.898807048797607, "learning_rate": 1.6381047381546137e-05, "loss": 0.3984, "step": 14520 }, { "epoch": 3.623441396508728, "grad_norm": 6.272655963897705, "learning_rate": 1.6378553615960104e-05, "loss": 0.4146, "step": 14530 }, { "epoch": 3.625935162094763, "grad_norm": 5.485193729400635, "learning_rate": 1.6376059850374067e-05, "loss": 0.3955, "step": 14540 }, { "epoch": 3.628428927680798, "grad_norm": 5.901163578033447, "learning_rate": 1.637356608478803e-05, "loss": 0.4093, "step": 14550 }, { "epoch": 3.630922693266833, "grad_norm": 7.169445991516113, "learning_rate": 1.6371072319201998e-05, "loss": 0.4289, "step": 14560 }, { "epoch": 3.6334164588528677, "grad_norm": 7.727000713348389, "learning_rate": 1.636857855361596e-05, "loss": 0.4101, "step": 14570 }, { "epoch": 3.635910224438903, "grad_norm": 5.395565509796143, "learning_rate": 1.6366084788029925e-05, "loss": 0.4015, "step": 14580 }, { "epoch": 3.638403990024938, "grad_norm": 8.575839042663574, "learning_rate": 1.6363591022443892e-05, "loss": 0.4276, "step": 14590 }, { "epoch": 3.6408977556109727, "grad_norm": 6.496227264404297, "learning_rate": 1.6361097256857856e-05, "loss": 0.3814, "step": 14600 }, { "epoch": 3.6433915211970076, "grad_norm": 5.557234764099121, "learning_rate": 1.6358603491271823e-05, "loss": 0.3926, "step": 14610 }, { "epoch": 3.6458852867830425, "grad_norm": 9.716835021972656, "learning_rate": 1.6356109725685786e-05, "loss": 0.3903, "step": 14620 }, { "epoch": 3.6483790523690773, "grad_norm": 4.228976726531982, "learning_rate": 1.635361596009975e-05, "loss": 0.4135, "step": 14630 }, { "epoch": 3.650872817955112, "grad_norm": 9.507262229919434, "learning_rate": 1.6351122194513717e-05, "loss": 0.4225, "step": 14640 }, { "epoch": 3.653366583541147, "grad_norm": 8.070616722106934, "learning_rate": 1.634862842892768e-05, "loss": 0.3617, "step": 14650 }, { "epoch": 3.655860349127182, "grad_norm": 7.822412490844727, "learning_rate": 1.6346134663341647e-05, "loss": 0.4306, "step": 14660 }, { "epoch": 3.658354114713217, "grad_norm": 6.324841022491455, "learning_rate": 1.634364089775561e-05, "loss": 0.3662, "step": 14670 }, { "epoch": 3.6608478802992517, "grad_norm": 4.228864669799805, "learning_rate": 1.6341147132169578e-05, "loss": 0.3822, "step": 14680 }, { "epoch": 3.6633416458852865, "grad_norm": 6.306748867034912, "learning_rate": 1.6338653366583542e-05, "loss": 0.4123, "step": 14690 }, { "epoch": 3.665835411471322, "grad_norm": 6.977389812469482, "learning_rate": 1.633615960099751e-05, "loss": 0.4305, "step": 14700 }, { "epoch": 3.6683291770573567, "grad_norm": 5.803625583648682, "learning_rate": 1.6333665835411472e-05, "loss": 0.4697, "step": 14710 }, { "epoch": 3.6708229426433916, "grad_norm": 5.12143087387085, "learning_rate": 1.633117206982544e-05, "loss": 0.3995, "step": 14720 }, { "epoch": 3.6733167082294265, "grad_norm": 4.334492206573486, "learning_rate": 1.6328678304239403e-05, "loss": 0.4086, "step": 14730 }, { "epoch": 3.6758104738154613, "grad_norm": 4.964009761810303, "learning_rate": 1.6326184538653367e-05, "loss": 0.3887, "step": 14740 }, { "epoch": 3.678304239401496, "grad_norm": 5.872185707092285, "learning_rate": 1.6323690773067334e-05, "loss": 0.4069, "step": 14750 }, { "epoch": 3.680798004987531, "grad_norm": 5.125484466552734, "learning_rate": 1.6321197007481297e-05, "loss": 0.4077, "step": 14760 }, { "epoch": 3.683291770573566, "grad_norm": 6.067224502563477, "learning_rate": 1.6318703241895264e-05, "loss": 0.4064, "step": 14770 }, { "epoch": 3.6857855361596013, "grad_norm": 5.0722455978393555, "learning_rate": 1.6316209476309228e-05, "loss": 0.3666, "step": 14780 }, { "epoch": 3.688279301745636, "grad_norm": 8.505644798278809, "learning_rate": 1.631371571072319e-05, "loss": 0.4493, "step": 14790 }, { "epoch": 3.690773067331671, "grad_norm": 4.582368850708008, "learning_rate": 1.631122194513716e-05, "loss": 0.4366, "step": 14800 }, { "epoch": 3.693266832917706, "grad_norm": 8.571370124816895, "learning_rate": 1.6308728179551122e-05, "loss": 0.3407, "step": 14810 }, { "epoch": 3.6957605985037407, "grad_norm": 4.467719554901123, "learning_rate": 1.630623441396509e-05, "loss": 0.4311, "step": 14820 }, { "epoch": 3.6982543640897756, "grad_norm": 5.480850696563721, "learning_rate": 1.6303740648379053e-05, "loss": 0.3438, "step": 14830 }, { "epoch": 3.7007481296758105, "grad_norm": 4.664899826049805, "learning_rate": 1.630124688279302e-05, "loss": 0.3888, "step": 14840 }, { "epoch": 3.7032418952618453, "grad_norm": 5.578117370605469, "learning_rate": 1.6298753117206983e-05, "loss": 0.3614, "step": 14850 }, { "epoch": 3.70573566084788, "grad_norm": 6.581024646759033, "learning_rate": 1.629625935162095e-05, "loss": 0.5117, "step": 14860 }, { "epoch": 3.708229426433915, "grad_norm": 7.885854721069336, "learning_rate": 1.6293765586034914e-05, "loss": 0.3758, "step": 14870 }, { "epoch": 3.71072319201995, "grad_norm": 6.261788368225098, "learning_rate": 1.629127182044888e-05, "loss": 0.4068, "step": 14880 }, { "epoch": 3.713216957605985, "grad_norm": 5.979361057281494, "learning_rate": 1.6288778054862845e-05, "loss": 0.3442, "step": 14890 }, { "epoch": 3.71571072319202, "grad_norm": 6.158860683441162, "learning_rate": 1.628628428927681e-05, "loss": 0.4162, "step": 14900 }, { "epoch": 3.718204488778055, "grad_norm": 6.998167991638184, "learning_rate": 1.6283790523690775e-05, "loss": 0.4444, "step": 14910 }, { "epoch": 3.72069825436409, "grad_norm": 4.483408451080322, "learning_rate": 1.628129675810474e-05, "loss": 0.4146, "step": 14920 }, { "epoch": 3.7231920199501247, "grad_norm": 7.948692321777344, "learning_rate": 1.6278802992518706e-05, "loss": 0.4934, "step": 14930 }, { "epoch": 3.7256857855361596, "grad_norm": 9.365506172180176, "learning_rate": 1.627630922693267e-05, "loss": 0.3799, "step": 14940 }, { "epoch": 3.7281795511221945, "grad_norm": 4.377537727355957, "learning_rate": 1.6273815461346633e-05, "loss": 0.3687, "step": 14950 }, { "epoch": 3.7306733167082293, "grad_norm": 14.354135513305664, "learning_rate": 1.62713216957606e-05, "loss": 0.4541, "step": 14960 }, { "epoch": 3.733167082294264, "grad_norm": 5.008823871612549, "learning_rate": 1.6268827930174564e-05, "loss": 0.46, "step": 14970 }, { "epoch": 3.7356608478802995, "grad_norm": 6.697189807891846, "learning_rate": 1.626633416458853e-05, "loss": 0.3777, "step": 14980 }, { "epoch": 3.7381546134663344, "grad_norm": 6.032017707824707, "learning_rate": 1.6263840399002494e-05, "loss": 0.3748, "step": 14990 }, { "epoch": 3.7406483790523692, "grad_norm": 5.817931652069092, "learning_rate": 1.6261346633416458e-05, "loss": 0.4145, "step": 15000 }, { "epoch": 3.743142144638404, "grad_norm": 5.026968479156494, "learning_rate": 1.6258852867830425e-05, "loss": 0.3745, "step": 15010 }, { "epoch": 3.745635910224439, "grad_norm": 4.957605361938477, "learning_rate": 1.625635910224439e-05, "loss": 0.427, "step": 15020 }, { "epoch": 3.748129675810474, "grad_norm": 6.634207248687744, "learning_rate": 1.6253865336658355e-05, "loss": 0.3864, "step": 15030 }, { "epoch": 3.7506234413965087, "grad_norm": 5.489914894104004, "learning_rate": 1.625137157107232e-05, "loss": 0.4067, "step": 15040 }, { "epoch": 3.7531172069825436, "grad_norm": 4.863682270050049, "learning_rate": 1.6248877805486286e-05, "loss": 0.3701, "step": 15050 }, { "epoch": 3.7556109725685785, "grad_norm": 5.958093643188477, "learning_rate": 1.6246384039900253e-05, "loss": 0.5015, "step": 15060 }, { "epoch": 3.7581047381546133, "grad_norm": 6.1467084884643555, "learning_rate": 1.6243890274314217e-05, "loss": 0.3989, "step": 15070 }, { "epoch": 3.760598503740648, "grad_norm": 5.685709476470947, "learning_rate": 1.624139650872818e-05, "loss": 0.4924, "step": 15080 }, { "epoch": 3.763092269326683, "grad_norm": 5.435773849487305, "learning_rate": 1.6238902743142147e-05, "loss": 0.3388, "step": 15090 }, { "epoch": 3.765586034912718, "grad_norm": 4.414902687072754, "learning_rate": 1.623640897755611e-05, "loss": 0.4236, "step": 15100 }, { "epoch": 3.7680798004987532, "grad_norm": 4.539722919464111, "learning_rate": 1.6233915211970078e-05, "loss": 0.457, "step": 15110 }, { "epoch": 3.770573566084788, "grad_norm": 7.175182342529297, "learning_rate": 1.623142144638404e-05, "loss": 0.3434, "step": 15120 }, { "epoch": 3.773067331670823, "grad_norm": 7.918752670288086, "learning_rate": 1.6228927680798005e-05, "loss": 0.4374, "step": 15130 }, { "epoch": 3.775561097256858, "grad_norm": 5.573683738708496, "learning_rate": 1.6226433915211972e-05, "loss": 0.4069, "step": 15140 }, { "epoch": 3.7780548628428927, "grad_norm": 6.013986110687256, "learning_rate": 1.6223940149625936e-05, "loss": 0.372, "step": 15150 }, { "epoch": 3.7805486284289276, "grad_norm": 7.387099742889404, "learning_rate": 1.62214463840399e-05, "loss": 0.4348, "step": 15160 }, { "epoch": 3.7830423940149625, "grad_norm": 8.931721687316895, "learning_rate": 1.6218952618453866e-05, "loss": 0.4304, "step": 15170 }, { "epoch": 3.7855361596009978, "grad_norm": 6.92478084564209, "learning_rate": 1.621645885286783e-05, "loss": 0.4486, "step": 15180 }, { "epoch": 3.7880299251870326, "grad_norm": 5.480514049530029, "learning_rate": 1.6213965087281797e-05, "loss": 0.4168, "step": 15190 }, { "epoch": 3.7905236907730675, "grad_norm": 6.616566181182861, "learning_rate": 1.621147132169576e-05, "loss": 0.3913, "step": 15200 }, { "epoch": 3.7930174563591024, "grad_norm": 6.839771270751953, "learning_rate": 1.6208977556109728e-05, "loss": 0.4484, "step": 15210 }, { "epoch": 3.7955112219451372, "grad_norm": 17.69823455810547, "learning_rate": 1.620648379052369e-05, "loss": 0.4469, "step": 15220 }, { "epoch": 3.798004987531172, "grad_norm": 5.237950325012207, "learning_rate": 1.6203990024937658e-05, "loss": 0.4347, "step": 15230 }, { "epoch": 3.800498753117207, "grad_norm": 11.648773193359375, "learning_rate": 1.6201496259351622e-05, "loss": 0.4735, "step": 15240 }, { "epoch": 3.802992518703242, "grad_norm": 4.579110622406006, "learning_rate": 1.619900249376559e-05, "loss": 0.3873, "step": 15250 }, { "epoch": 3.8054862842892767, "grad_norm": 5.944640636444092, "learning_rate": 1.6196508728179553e-05, "loss": 0.4153, "step": 15260 }, { "epoch": 3.8079800498753116, "grad_norm": 6.9358696937561035, "learning_rate": 1.619401496259352e-05, "loss": 0.3617, "step": 15270 }, { "epoch": 3.8104738154613464, "grad_norm": 6.954071521759033, "learning_rate": 1.6191521197007483e-05, "loss": 0.4271, "step": 15280 }, { "epoch": 3.8129675810473813, "grad_norm": 5.045011520385742, "learning_rate": 1.6189027431421447e-05, "loss": 0.4202, "step": 15290 }, { "epoch": 3.815461346633416, "grad_norm": 5.211480140686035, "learning_rate": 1.6186533665835414e-05, "loss": 0.4072, "step": 15300 }, { "epoch": 3.8179551122194515, "grad_norm": 5.901054382324219, "learning_rate": 1.6184039900249377e-05, "loss": 0.3808, "step": 15310 }, { "epoch": 3.8204488778054864, "grad_norm": 7.312894821166992, "learning_rate": 1.6181546134663344e-05, "loss": 0.4005, "step": 15320 }, { "epoch": 3.8229426433915212, "grad_norm": 4.741137981414795, "learning_rate": 1.6179052369077308e-05, "loss": 0.4235, "step": 15330 }, { "epoch": 3.825436408977556, "grad_norm": 14.889175415039062, "learning_rate": 1.617655860349127e-05, "loss": 0.5169, "step": 15340 }, { "epoch": 3.827930174563591, "grad_norm": 6.416481018066406, "learning_rate": 1.617406483790524e-05, "loss": 0.4472, "step": 15350 }, { "epoch": 3.830423940149626, "grad_norm": 4.6261701583862305, "learning_rate": 1.6171571072319202e-05, "loss": 0.4069, "step": 15360 }, { "epoch": 3.8329177057356607, "grad_norm": 4.661010265350342, "learning_rate": 1.6169077306733166e-05, "loss": 0.4312, "step": 15370 }, { "epoch": 3.835411471321696, "grad_norm": 5.187774658203125, "learning_rate": 1.6166583541147133e-05, "loss": 0.3947, "step": 15380 }, { "epoch": 3.837905236907731, "grad_norm": 4.887078762054443, "learning_rate": 1.6164089775561096e-05, "loss": 0.3661, "step": 15390 }, { "epoch": 3.8403990024937658, "grad_norm": 4.326541423797607, "learning_rate": 1.6161596009975063e-05, "loss": 0.4481, "step": 15400 }, { "epoch": 3.8428927680798006, "grad_norm": 6.179752826690674, "learning_rate": 1.615910224438903e-05, "loss": 0.424, "step": 15410 }, { "epoch": 3.8453865336658355, "grad_norm": 5.251521110534668, "learning_rate": 1.6156608478802994e-05, "loss": 0.4454, "step": 15420 }, { "epoch": 3.8478802992518704, "grad_norm": 9.553689956665039, "learning_rate": 1.615411471321696e-05, "loss": 0.4171, "step": 15430 }, { "epoch": 3.8503740648379052, "grad_norm": 5.729796409606934, "learning_rate": 1.6151620947630925e-05, "loss": 0.4229, "step": 15440 }, { "epoch": 3.85286783042394, "grad_norm": 4.4921345710754395, "learning_rate": 1.614912718204489e-05, "loss": 0.3616, "step": 15450 }, { "epoch": 3.855361596009975, "grad_norm": 5.6195502281188965, "learning_rate": 1.6146633416458855e-05, "loss": 0.3688, "step": 15460 }, { "epoch": 3.85785536159601, "grad_norm": 6.036675930023193, "learning_rate": 1.614413965087282e-05, "loss": 0.379, "step": 15470 }, { "epoch": 3.8603491271820447, "grad_norm": 6.581831455230713, "learning_rate": 1.6141645885286786e-05, "loss": 0.4892, "step": 15480 }, { "epoch": 3.8628428927680796, "grad_norm": 9.268636703491211, "learning_rate": 1.613915211970075e-05, "loss": 0.4021, "step": 15490 }, { "epoch": 3.8653366583541144, "grad_norm": 6.355315685272217, "learning_rate": 1.6136658354114713e-05, "loss": 0.4159, "step": 15500 }, { "epoch": 3.8678304239401498, "grad_norm": 6.00802755355835, "learning_rate": 1.613416458852868e-05, "loss": 0.4285, "step": 15510 }, { "epoch": 3.8703241895261846, "grad_norm": 5.517050743103027, "learning_rate": 1.6131670822942644e-05, "loss": 0.3094, "step": 15520 }, { "epoch": 3.8728179551122195, "grad_norm": 5.958836078643799, "learning_rate": 1.612917705735661e-05, "loss": 0.5116, "step": 15530 }, { "epoch": 3.8753117206982544, "grad_norm": 4.79234504699707, "learning_rate": 1.6126683291770574e-05, "loss": 0.3931, "step": 15540 }, { "epoch": 3.8778054862842892, "grad_norm": 7.781925201416016, "learning_rate": 1.6124189526184538e-05, "loss": 0.4291, "step": 15550 }, { "epoch": 3.880299251870324, "grad_norm": 6.349798679351807, "learning_rate": 1.6121695760598505e-05, "loss": 0.5231, "step": 15560 }, { "epoch": 3.882793017456359, "grad_norm": 6.636193752288818, "learning_rate": 1.611920199501247e-05, "loss": 0.369, "step": 15570 }, { "epoch": 3.8852867830423943, "grad_norm": 5.7847580909729, "learning_rate": 1.6116708229426436e-05, "loss": 0.4435, "step": 15580 }, { "epoch": 3.887780548628429, "grad_norm": 6.807628154754639, "learning_rate": 1.61142144638404e-05, "loss": 0.4008, "step": 15590 }, { "epoch": 3.890274314214464, "grad_norm": 8.10521125793457, "learning_rate": 1.6111720698254366e-05, "loss": 0.4364, "step": 15600 }, { "epoch": 3.892768079800499, "grad_norm": 5.665589332580566, "learning_rate": 1.610922693266833e-05, "loss": 0.4302, "step": 15610 }, { "epoch": 3.8952618453865338, "grad_norm": 6.3911943435668945, "learning_rate": 1.6106733167082297e-05, "loss": 0.4188, "step": 15620 }, { "epoch": 3.8977556109725686, "grad_norm": 7.833658695220947, "learning_rate": 1.610423940149626e-05, "loss": 0.3424, "step": 15630 }, { "epoch": 3.9002493765586035, "grad_norm": 5.939789772033691, "learning_rate": 1.6101745635910228e-05, "loss": 0.4221, "step": 15640 }, { "epoch": 3.9027431421446384, "grad_norm": 6.181157112121582, "learning_rate": 1.609925187032419e-05, "loss": 0.4927, "step": 15650 }, { "epoch": 3.9052369077306732, "grad_norm": 10.083945274353027, "learning_rate": 1.6096758104738155e-05, "loss": 0.4323, "step": 15660 }, { "epoch": 3.907730673316708, "grad_norm": 7.5239129066467285, "learning_rate": 1.6094264339152122e-05, "loss": 0.4576, "step": 15670 }, { "epoch": 3.910224438902743, "grad_norm": 7.4439311027526855, "learning_rate": 1.6091770573566085e-05, "loss": 0.373, "step": 15680 }, { "epoch": 3.912718204488778, "grad_norm": 8.566959381103516, "learning_rate": 1.6089276807980052e-05, "loss": 0.4426, "step": 15690 }, { "epoch": 3.9152119700748127, "grad_norm": 6.725310802459717, "learning_rate": 1.6086783042394016e-05, "loss": 0.4016, "step": 15700 }, { "epoch": 3.917705735660848, "grad_norm": 7.9485392570495605, "learning_rate": 1.608428927680798e-05, "loss": 0.416, "step": 15710 }, { "epoch": 3.920199501246883, "grad_norm": 7.6618170738220215, "learning_rate": 1.6081795511221947e-05, "loss": 0.3751, "step": 15720 }, { "epoch": 3.9226932668329177, "grad_norm": 5.6373291015625, "learning_rate": 1.607930174563591e-05, "loss": 0.3605, "step": 15730 }, { "epoch": 3.9251870324189526, "grad_norm": 4.732673168182373, "learning_rate": 1.6076807980049874e-05, "loss": 0.3484, "step": 15740 }, { "epoch": 3.9276807980049875, "grad_norm": 6.598507404327393, "learning_rate": 1.607431421446384e-05, "loss": 0.381, "step": 15750 }, { "epoch": 3.9301745635910224, "grad_norm": 4.610682010650635, "learning_rate": 1.6071820448877808e-05, "loss": 0.4001, "step": 15760 }, { "epoch": 3.932668329177057, "grad_norm": 4.403365135192871, "learning_rate": 1.606932668329177e-05, "loss": 0.4298, "step": 15770 }, { "epoch": 3.9351620947630925, "grad_norm": 5.305014133453369, "learning_rate": 1.606683291770574e-05, "loss": 0.361, "step": 15780 }, { "epoch": 3.9376558603491274, "grad_norm": 7.3379998207092285, "learning_rate": 1.6064339152119702e-05, "loss": 0.4318, "step": 15790 }, { "epoch": 3.9401496259351623, "grad_norm": 4.637630939483643, "learning_rate": 1.606184538653367e-05, "loss": 0.3809, "step": 15800 }, { "epoch": 3.942643391521197, "grad_norm": 7.531750202178955, "learning_rate": 1.6059351620947633e-05, "loss": 0.3713, "step": 15810 }, { "epoch": 3.945137157107232, "grad_norm": 5.172116756439209, "learning_rate": 1.60568578553616e-05, "loss": 0.5203, "step": 15820 }, { "epoch": 3.947630922693267, "grad_norm": 4.275954723358154, "learning_rate": 1.6054364089775563e-05, "loss": 0.3671, "step": 15830 }, { "epoch": 3.9501246882793017, "grad_norm": 9.31353759765625, "learning_rate": 1.6051870324189527e-05, "loss": 0.376, "step": 15840 }, { "epoch": 3.9526184538653366, "grad_norm": 7.531866550445557, "learning_rate": 1.6049376558603494e-05, "loss": 0.3427, "step": 15850 }, { "epoch": 3.9551122194513715, "grad_norm": 24.911481857299805, "learning_rate": 1.6046882793017458e-05, "loss": 0.3652, "step": 15860 }, { "epoch": 3.9576059850374063, "grad_norm": 8.664227485656738, "learning_rate": 1.604438902743142e-05, "loss": 0.4131, "step": 15870 }, { "epoch": 3.960099750623441, "grad_norm": 32.71113586425781, "learning_rate": 1.6041895261845388e-05, "loss": 0.3416, "step": 15880 }, { "epoch": 3.962593516209476, "grad_norm": 5.102753162384033, "learning_rate": 1.6039401496259352e-05, "loss": 0.359, "step": 15890 }, { "epoch": 3.965087281795511, "grad_norm": 4.086287021636963, "learning_rate": 1.603690773067332e-05, "loss": 0.3796, "step": 15900 }, { "epoch": 3.9675810473815463, "grad_norm": 6.271583080291748, "learning_rate": 1.6034413965087282e-05, "loss": 0.4116, "step": 15910 }, { "epoch": 3.970074812967581, "grad_norm": 6.0941877365112305, "learning_rate": 1.6031920199501246e-05, "loss": 0.4144, "step": 15920 }, { "epoch": 3.972568578553616, "grad_norm": 6.220065593719482, "learning_rate": 1.6029426433915213e-05, "loss": 0.3997, "step": 15930 }, { "epoch": 3.975062344139651, "grad_norm": 6.358421802520752, "learning_rate": 1.6026932668329177e-05, "loss": 0.415, "step": 15940 }, { "epoch": 3.9775561097256857, "grad_norm": 5.868401050567627, "learning_rate": 1.6024438902743144e-05, "loss": 0.4425, "step": 15950 }, { "epoch": 3.9800498753117206, "grad_norm": 4.294557571411133, "learning_rate": 1.6021945137157107e-05, "loss": 0.3949, "step": 15960 }, { "epoch": 3.9825436408977555, "grad_norm": 8.493629455566406, "learning_rate": 1.6019451371571074e-05, "loss": 0.4398, "step": 15970 }, { "epoch": 3.985037406483791, "grad_norm": 7.112602233886719, "learning_rate": 1.6016957605985038e-05, "loss": 0.4643, "step": 15980 }, { "epoch": 3.9875311720698257, "grad_norm": 11.970419883728027, "learning_rate": 1.6014463840399005e-05, "loss": 0.3528, "step": 15990 }, { "epoch": 3.9900249376558605, "grad_norm": 6.464308738708496, "learning_rate": 1.601197007481297e-05, "loss": 0.393, "step": 16000 }, { "epoch": 3.9925187032418954, "grad_norm": 11.118416786193848, "learning_rate": 1.6009476309226936e-05, "loss": 0.3788, "step": 16010 }, { "epoch": 3.9950124688279303, "grad_norm": 9.291110038757324, "learning_rate": 1.60069825436409e-05, "loss": 0.4067, "step": 16020 }, { "epoch": 3.997506234413965, "grad_norm": 2.8626253604888916, "learning_rate": 1.6004488778054866e-05, "loss": 0.3647, "step": 16030 }, { "epoch": 4.0, "grad_norm": 4.152261734008789, "learning_rate": 1.600199501246883e-05, "loss": 0.3658, "step": 16040 }, { "epoch": 4.0, "eval_loss": 0.4268016815185547, "eval_runtime": 59.9163, "eval_samples_per_second": 16.74, "eval_steps_per_second": 16.74, "step": 16040 }, { "epoch": 4.002493765586035, "grad_norm": 7.153280735015869, "learning_rate": 1.5999501246882793e-05, "loss": 0.417, "step": 16050 }, { "epoch": 4.00498753117207, "grad_norm": 6.5900163650512695, "learning_rate": 1.599700748129676e-05, "loss": 0.3892, "step": 16060 }, { "epoch": 4.007481296758105, "grad_norm": 4.667524337768555, "learning_rate": 1.5994513715710724e-05, "loss": 0.4389, "step": 16070 }, { "epoch": 4.0099750623441395, "grad_norm": 4.793924808502197, "learning_rate": 1.5992019950124688e-05, "loss": 0.4087, "step": 16080 }, { "epoch": 4.012468827930174, "grad_norm": 4.576432228088379, "learning_rate": 1.5989526184538655e-05, "loss": 0.4334, "step": 16090 }, { "epoch": 4.014962593516209, "grad_norm": 6.454722881317139, "learning_rate": 1.5987032418952618e-05, "loss": 0.3957, "step": 16100 }, { "epoch": 4.017456359102244, "grad_norm": 5.114348411560059, "learning_rate": 1.5984538653366585e-05, "loss": 0.3999, "step": 16110 }, { "epoch": 4.019950124688279, "grad_norm": 3.897921085357666, "learning_rate": 1.598204488778055e-05, "loss": 0.3734, "step": 16120 }, { "epoch": 4.022443890274314, "grad_norm": 6.09066915512085, "learning_rate": 1.5979551122194516e-05, "loss": 0.339, "step": 16130 }, { "epoch": 4.024937655860349, "grad_norm": 7.583807945251465, "learning_rate": 1.597705735660848e-05, "loss": 0.3507, "step": 16140 }, { "epoch": 4.027431421446384, "grad_norm": 10.415247917175293, "learning_rate": 1.5974563591022446e-05, "loss": 0.4775, "step": 16150 }, { "epoch": 4.029925187032419, "grad_norm": 8.367255210876465, "learning_rate": 1.597206982543641e-05, "loss": 0.4167, "step": 16160 }, { "epoch": 4.032418952618454, "grad_norm": 4.611159801483154, "learning_rate": 1.5969576059850377e-05, "loss": 0.4007, "step": 16170 }, { "epoch": 4.034912718204489, "grad_norm": 4.2518205642700195, "learning_rate": 1.596708229426434e-05, "loss": 0.3998, "step": 16180 }, { "epoch": 4.037406483790524, "grad_norm": 5.522350788116455, "learning_rate": 1.5964588528678308e-05, "loss": 0.4691, "step": 16190 }, { "epoch": 4.039900249376559, "grad_norm": 4.40548849105835, "learning_rate": 1.596209476309227e-05, "loss": 0.4, "step": 16200 }, { "epoch": 4.042394014962594, "grad_norm": 6.4272141456604, "learning_rate": 1.5959600997506235e-05, "loss": 0.4412, "step": 16210 }, { "epoch": 4.0448877805486285, "grad_norm": 5.885697364807129, "learning_rate": 1.5957107231920202e-05, "loss": 0.3978, "step": 16220 }, { "epoch": 4.047381546134663, "grad_norm": 6.327160835266113, "learning_rate": 1.5954613466334166e-05, "loss": 0.4464, "step": 16230 }, { "epoch": 4.049875311720698, "grad_norm": 6.883283615112305, "learning_rate": 1.595211970074813e-05, "loss": 0.4137, "step": 16240 }, { "epoch": 4.052369077306733, "grad_norm": 6.365976810455322, "learning_rate": 1.5949625935162096e-05, "loss": 0.4346, "step": 16250 }, { "epoch": 4.054862842892768, "grad_norm": 5.095077037811279, "learning_rate": 1.594713216957606e-05, "loss": 0.3615, "step": 16260 }, { "epoch": 4.057356608478803, "grad_norm": 5.573093414306641, "learning_rate": 1.5944638403990027e-05, "loss": 0.3894, "step": 16270 }, { "epoch": 4.059850374064838, "grad_norm": 7.349832057952881, "learning_rate": 1.594214463840399e-05, "loss": 0.405, "step": 16280 }, { "epoch": 4.062344139650873, "grad_norm": 7.18329381942749, "learning_rate": 1.5939650872817954e-05, "loss": 0.3667, "step": 16290 }, { "epoch": 4.0648379052369075, "grad_norm": 5.264151096343994, "learning_rate": 1.593715710723192e-05, "loss": 0.4239, "step": 16300 }, { "epoch": 4.067331670822942, "grad_norm": 5.923305511474609, "learning_rate": 1.5934663341645885e-05, "loss": 0.4125, "step": 16310 }, { "epoch": 4.069825436408977, "grad_norm": 7.834733486175537, "learning_rate": 1.593216957605985e-05, "loss": 0.4075, "step": 16320 }, { "epoch": 4.072319201995012, "grad_norm": 6.473409175872803, "learning_rate": 1.592967581047382e-05, "loss": 0.3263, "step": 16330 }, { "epoch": 4.074812967581048, "grad_norm": 4.875690460205078, "learning_rate": 1.5927182044887782e-05, "loss": 0.3769, "step": 16340 }, { "epoch": 4.077306733167083, "grad_norm": 6.263331890106201, "learning_rate": 1.592468827930175e-05, "loss": 0.3858, "step": 16350 }, { "epoch": 4.079800498753118, "grad_norm": 6.207515716552734, "learning_rate": 1.5922194513715713e-05, "loss": 0.5084, "step": 16360 }, { "epoch": 4.082294264339152, "grad_norm": 3.502013683319092, "learning_rate": 1.5919700748129676e-05, "loss": 0.3862, "step": 16370 }, { "epoch": 4.084788029925187, "grad_norm": 7.68639612197876, "learning_rate": 1.5917206982543643e-05, "loss": 0.4036, "step": 16380 }, { "epoch": 4.087281795511222, "grad_norm": 7.196399688720703, "learning_rate": 1.5914713216957607e-05, "loss": 0.3566, "step": 16390 }, { "epoch": 4.089775561097257, "grad_norm": 6.002182960510254, "learning_rate": 1.5912219451371574e-05, "loss": 0.3955, "step": 16400 }, { "epoch": 4.092269326683292, "grad_norm": 8.02131462097168, "learning_rate": 1.5909725685785538e-05, "loss": 0.3704, "step": 16410 }, { "epoch": 4.094763092269327, "grad_norm": 5.517106533050537, "learning_rate": 1.59072319201995e-05, "loss": 0.4051, "step": 16420 }, { "epoch": 4.097256857855362, "grad_norm": 9.180191040039062, "learning_rate": 1.590473815461347e-05, "loss": 0.3857, "step": 16430 }, { "epoch": 4.0997506234413965, "grad_norm": 6.0054030418396, "learning_rate": 1.5902244389027432e-05, "loss": 0.3396, "step": 16440 }, { "epoch": 4.102244389027431, "grad_norm": 5.941941261291504, "learning_rate": 1.5899750623441396e-05, "loss": 0.373, "step": 16450 }, { "epoch": 4.104738154613466, "grad_norm": 6.833188056945801, "learning_rate": 1.5897256857855363e-05, "loss": 0.3765, "step": 16460 }, { "epoch": 4.107231920199501, "grad_norm": 5.7840986251831055, "learning_rate": 1.5894763092269326e-05, "loss": 0.396, "step": 16470 }, { "epoch": 4.109725685785536, "grad_norm": 7.854373931884766, "learning_rate": 1.5892269326683293e-05, "loss": 0.3982, "step": 16480 }, { "epoch": 4.112219451371571, "grad_norm": 3.8397293090820312, "learning_rate": 1.5889775561097257e-05, "loss": 0.3874, "step": 16490 }, { "epoch": 4.114713216957606, "grad_norm": 5.715033054351807, "learning_rate": 1.5887281795511224e-05, "loss": 0.3209, "step": 16500 }, { "epoch": 4.117206982543641, "grad_norm": 7.231847286224365, "learning_rate": 1.5884788029925187e-05, "loss": 0.4834, "step": 16510 }, { "epoch": 4.1197007481296755, "grad_norm": 4.733031272888184, "learning_rate": 1.5882294264339154e-05, "loss": 0.2822, "step": 16520 }, { "epoch": 4.12219451371571, "grad_norm": 5.9622273445129395, "learning_rate": 1.5879800498753118e-05, "loss": 0.413, "step": 16530 }, { "epoch": 4.124688279301745, "grad_norm": 5.854028701782227, "learning_rate": 1.5877306733167085e-05, "loss": 0.3331, "step": 16540 }, { "epoch": 4.127182044887781, "grad_norm": 8.097307205200195, "learning_rate": 1.587481296758105e-05, "loss": 0.5102, "step": 16550 }, { "epoch": 4.129675810473816, "grad_norm": 4.774763584136963, "learning_rate": 1.5872319201995016e-05, "loss": 0.3981, "step": 16560 }, { "epoch": 4.132169576059851, "grad_norm": 6.483943939208984, "learning_rate": 1.586982543640898e-05, "loss": 0.4533, "step": 16570 }, { "epoch": 4.134663341645886, "grad_norm": 10.268522262573242, "learning_rate": 1.5867331670822943e-05, "loss": 0.3954, "step": 16580 }, { "epoch": 4.13715710723192, "grad_norm": 4.830404281616211, "learning_rate": 1.586483790523691e-05, "loss": 0.3578, "step": 16590 }, { "epoch": 4.139650872817955, "grad_norm": 3.815784215927124, "learning_rate": 1.5862344139650874e-05, "loss": 0.3744, "step": 16600 }, { "epoch": 4.14214463840399, "grad_norm": 5.319235324859619, "learning_rate": 1.585985037406484e-05, "loss": 0.3813, "step": 16610 }, { "epoch": 4.144638403990025, "grad_norm": 7.98597526550293, "learning_rate": 1.5857356608478804e-05, "loss": 0.3337, "step": 16620 }, { "epoch": 4.14713216957606, "grad_norm": 5.068573951721191, "learning_rate": 1.5854862842892768e-05, "loss": 0.4009, "step": 16630 }, { "epoch": 4.149625935162095, "grad_norm": 4.63468599319458, "learning_rate": 1.5852369077306735e-05, "loss": 0.3888, "step": 16640 }, { "epoch": 4.15211970074813, "grad_norm": 4.852334976196289, "learning_rate": 1.58498753117207e-05, "loss": 0.3634, "step": 16650 }, { "epoch": 4.1546134663341645, "grad_norm": 10.55074691772461, "learning_rate": 1.5847381546134662e-05, "loss": 0.4119, "step": 16660 }, { "epoch": 4.157107231920199, "grad_norm": 5.653322219848633, "learning_rate": 1.584488778054863e-05, "loss": 0.3409, "step": 16670 }, { "epoch": 4.159600997506234, "grad_norm": 7.006217002868652, "learning_rate": 1.5842394014962596e-05, "loss": 0.3994, "step": 16680 }, { "epoch": 4.162094763092269, "grad_norm": 6.214487552642822, "learning_rate": 1.583990024937656e-05, "loss": 0.4093, "step": 16690 }, { "epoch": 4.164588528678304, "grad_norm": 9.91429615020752, "learning_rate": 1.5837406483790527e-05, "loss": 0.4333, "step": 16700 }, { "epoch": 4.167082294264339, "grad_norm": 5.225426197052002, "learning_rate": 1.583491271820449e-05, "loss": 0.418, "step": 16710 }, { "epoch": 4.169576059850374, "grad_norm": 4.0191969871521, "learning_rate": 1.5832418952618457e-05, "loss": 0.3455, "step": 16720 }, { "epoch": 4.172069825436409, "grad_norm": 4.602033615112305, "learning_rate": 1.582992518703242e-05, "loss": 0.3552, "step": 16730 }, { "epoch": 4.174563591022444, "grad_norm": 4.991748332977295, "learning_rate": 1.5827431421446384e-05, "loss": 0.4175, "step": 16740 }, { "epoch": 4.177057356608479, "grad_norm": 6.951719284057617, "learning_rate": 1.582493765586035e-05, "loss": 0.4059, "step": 16750 }, { "epoch": 4.179551122194514, "grad_norm": 5.848910808563232, "learning_rate": 1.5822443890274315e-05, "loss": 0.3944, "step": 16760 }, { "epoch": 4.182044887780549, "grad_norm": 29.50406837463379, "learning_rate": 1.5819950124688282e-05, "loss": 0.4481, "step": 16770 }, { "epoch": 4.184538653366584, "grad_norm": 8.905052185058594, "learning_rate": 1.5817456359102246e-05, "loss": 0.3706, "step": 16780 }, { "epoch": 4.187032418952619, "grad_norm": 4.365262031555176, "learning_rate": 1.581496259351621e-05, "loss": 0.4487, "step": 16790 }, { "epoch": 4.1895261845386536, "grad_norm": 6.916237831115723, "learning_rate": 1.5812468827930176e-05, "loss": 0.4587, "step": 16800 }, { "epoch": 4.192019950124688, "grad_norm": 5.828021049499512, "learning_rate": 1.580997506234414e-05, "loss": 0.3274, "step": 16810 }, { "epoch": 4.194513715710723, "grad_norm": 5.9600138664245605, "learning_rate": 1.5807481296758107e-05, "loss": 0.4037, "step": 16820 }, { "epoch": 4.197007481296758, "grad_norm": 6.082637310028076, "learning_rate": 1.580498753117207e-05, "loss": 0.4237, "step": 16830 }, { "epoch": 4.199501246882793, "grad_norm": 4.901458740234375, "learning_rate": 1.5802493765586034e-05, "loss": 0.4596, "step": 16840 }, { "epoch": 4.201995012468828, "grad_norm": 6.101640701293945, "learning_rate": 1.58e-05, "loss": 0.3973, "step": 16850 }, { "epoch": 4.204488778054863, "grad_norm": 6.996503829956055, "learning_rate": 1.5797506234413965e-05, "loss": 0.3572, "step": 16860 }, { "epoch": 4.206982543640898, "grad_norm": 7.564293384552002, "learning_rate": 1.5795012468827932e-05, "loss": 0.3673, "step": 16870 }, { "epoch": 4.2094763092269325, "grad_norm": 5.015920162200928, "learning_rate": 1.5792518703241895e-05, "loss": 0.3798, "step": 16880 }, { "epoch": 4.211970074812967, "grad_norm": 9.611790657043457, "learning_rate": 1.5790024937655862e-05, "loss": 0.3374, "step": 16890 }, { "epoch": 4.214463840399002, "grad_norm": 8.28926944732666, "learning_rate": 1.5787531172069826e-05, "loss": 0.3995, "step": 16900 }, { "epoch": 4.216957605985037, "grad_norm": 7.903942108154297, "learning_rate": 1.5785037406483793e-05, "loss": 0.4166, "step": 16910 }, { "epoch": 4.219451371571072, "grad_norm": 6.265456199645996, "learning_rate": 1.5782543640897757e-05, "loss": 0.4312, "step": 16920 }, { "epoch": 4.221945137157107, "grad_norm": 7.296082019805908, "learning_rate": 1.5780049875311724e-05, "loss": 0.3976, "step": 16930 }, { "epoch": 4.224438902743142, "grad_norm": 5.318020820617676, "learning_rate": 1.5777556109725687e-05, "loss": 0.3702, "step": 16940 }, { "epoch": 4.2269326683291775, "grad_norm": 7.501054763793945, "learning_rate": 1.577506234413965e-05, "loss": 0.4675, "step": 16950 }, { "epoch": 4.229426433915212, "grad_norm": 6.426706790924072, "learning_rate": 1.5772568578553618e-05, "loss": 0.3556, "step": 16960 }, { "epoch": 4.231920199501247, "grad_norm": 8.110666275024414, "learning_rate": 1.577007481296758e-05, "loss": 0.4167, "step": 16970 }, { "epoch": 4.234413965087282, "grad_norm": 6.1675238609313965, "learning_rate": 1.576758104738155e-05, "loss": 0.3816, "step": 16980 }, { "epoch": 4.236907730673317, "grad_norm": 5.011690616607666, "learning_rate": 1.5765087281795512e-05, "loss": 0.3866, "step": 16990 }, { "epoch": 4.239401496259352, "grad_norm": 6.697452545166016, "learning_rate": 1.5762593516209476e-05, "loss": 0.5945, "step": 17000 }, { "epoch": 4.241895261845387, "grad_norm": 11.016718864440918, "learning_rate": 1.5760099750623443e-05, "loss": 0.3831, "step": 17010 }, { "epoch": 4.2443890274314215, "grad_norm": 7.073511123657227, "learning_rate": 1.5757605985037406e-05, "loss": 0.4155, "step": 17020 }, { "epoch": 4.246882793017456, "grad_norm": 6.227389812469482, "learning_rate": 1.5755112219451373e-05, "loss": 0.3701, "step": 17030 }, { "epoch": 4.249376558603491, "grad_norm": 5.904865264892578, "learning_rate": 1.5752618453865337e-05, "loss": 0.4464, "step": 17040 }, { "epoch": 4.251870324189526, "grad_norm": 6.812474250793457, "learning_rate": 1.5750124688279304e-05, "loss": 0.399, "step": 17050 }, { "epoch": 4.254364089775561, "grad_norm": 6.779056072235107, "learning_rate": 1.5747630922693268e-05, "loss": 0.3984, "step": 17060 }, { "epoch": 4.256857855361596, "grad_norm": 5.1520891189575195, "learning_rate": 1.5745137157107235e-05, "loss": 0.4212, "step": 17070 }, { "epoch": 4.259351620947631, "grad_norm": 10.970466613769531, "learning_rate": 1.5742643391521198e-05, "loss": 0.3745, "step": 17080 }, { "epoch": 4.261845386533666, "grad_norm": 5.555710792541504, "learning_rate": 1.5740149625935165e-05, "loss": 0.4095, "step": 17090 }, { "epoch": 4.2643391521197005, "grad_norm": 5.996181011199951, "learning_rate": 1.573765586034913e-05, "loss": 0.3816, "step": 17100 }, { "epoch": 4.266832917705735, "grad_norm": 5.2007646560668945, "learning_rate": 1.5735162094763096e-05, "loss": 0.4572, "step": 17110 }, { "epoch": 4.26932668329177, "grad_norm": 6.208393096923828, "learning_rate": 1.573266832917706e-05, "loss": 0.3948, "step": 17120 }, { "epoch": 4.271820448877805, "grad_norm": 6.411615371704102, "learning_rate": 1.5730174563591023e-05, "loss": 0.4109, "step": 17130 }, { "epoch": 4.274314214463841, "grad_norm": 7.031231880187988, "learning_rate": 1.572768079800499e-05, "loss": 0.3633, "step": 17140 }, { "epoch": 4.276807980049876, "grad_norm": 7.267471790313721, "learning_rate": 1.5725187032418954e-05, "loss": 0.3945, "step": 17150 }, { "epoch": 4.279301745635911, "grad_norm": 6.009880065917969, "learning_rate": 1.5722693266832917e-05, "loss": 0.3755, "step": 17160 }, { "epoch": 4.2817955112219455, "grad_norm": 6.955780029296875, "learning_rate": 1.5720199501246884e-05, "loss": 0.4382, "step": 17170 }, { "epoch": 4.28428927680798, "grad_norm": 4.450560569763184, "learning_rate": 1.5717705735660848e-05, "loss": 0.4039, "step": 17180 }, { "epoch": 4.286783042394015, "grad_norm": 7.145969390869141, "learning_rate": 1.5715211970074815e-05, "loss": 0.4875, "step": 17190 }, { "epoch": 4.28927680798005, "grad_norm": 5.937014102935791, "learning_rate": 1.571271820448878e-05, "loss": 0.3465, "step": 17200 }, { "epoch": 4.291770573566085, "grad_norm": 6.380857944488525, "learning_rate": 1.5710224438902742e-05, "loss": 0.4347, "step": 17210 }, { "epoch": 4.29426433915212, "grad_norm": 7.357115268707275, "learning_rate": 1.570773067331671e-05, "loss": 0.3523, "step": 17220 }, { "epoch": 4.296758104738155, "grad_norm": 6.485621929168701, "learning_rate": 1.5705236907730673e-05, "loss": 0.3737, "step": 17230 }, { "epoch": 4.2992518703241895, "grad_norm": 5.544325828552246, "learning_rate": 1.570274314214464e-05, "loss": 0.4713, "step": 17240 }, { "epoch": 4.301745635910224, "grad_norm": 4.941150188446045, "learning_rate": 1.5700249376558603e-05, "loss": 0.4139, "step": 17250 }, { "epoch": 4.304239401496259, "grad_norm": 5.890329837799072, "learning_rate": 1.569775561097257e-05, "loss": 0.4586, "step": 17260 }, { "epoch": 4.306733167082294, "grad_norm": 4.7308669090271, "learning_rate": 1.5695261845386537e-05, "loss": 0.3211, "step": 17270 }, { "epoch": 4.309226932668329, "grad_norm": 6.511252403259277, "learning_rate": 1.56927680798005e-05, "loss": 0.3643, "step": 17280 }, { "epoch": 4.311720698254364, "grad_norm": 6.328287124633789, "learning_rate": 1.5690274314214465e-05, "loss": 0.377, "step": 17290 }, { "epoch": 4.314214463840399, "grad_norm": 12.119094848632812, "learning_rate": 1.568778054862843e-05, "loss": 0.3672, "step": 17300 }, { "epoch": 4.316708229426434, "grad_norm": 5.892948627471924, "learning_rate": 1.5685286783042395e-05, "loss": 0.3174, "step": 17310 }, { "epoch": 4.3192019950124685, "grad_norm": 6.068002700805664, "learning_rate": 1.5682793017456362e-05, "loss": 0.4032, "step": 17320 }, { "epoch": 4.321695760598503, "grad_norm": 4.890083312988281, "learning_rate": 1.5680299251870326e-05, "loss": 0.422, "step": 17330 }, { "epoch": 4.324189526184538, "grad_norm": 4.682448863983154, "learning_rate": 1.567780548628429e-05, "loss": 0.3798, "step": 17340 }, { "epoch": 4.326683291770574, "grad_norm": 9.495527267456055, "learning_rate": 1.5675311720698257e-05, "loss": 0.4184, "step": 17350 }, { "epoch": 4.329177057356609, "grad_norm": 6.011453151702881, "learning_rate": 1.567281795511222e-05, "loss": 0.3809, "step": 17360 }, { "epoch": 4.331670822942644, "grad_norm": 8.991615295410156, "learning_rate": 1.5670324189526184e-05, "loss": 0.3961, "step": 17370 }, { "epoch": 4.334164588528679, "grad_norm": 7.84186315536499, "learning_rate": 1.566783042394015e-05, "loss": 0.4104, "step": 17380 }, { "epoch": 4.3366583541147135, "grad_norm": 4.632195949554443, "learning_rate": 1.5665336658354114e-05, "loss": 0.3265, "step": 17390 }, { "epoch": 4.339152119700748, "grad_norm": 7.049432754516602, "learning_rate": 1.566284289276808e-05, "loss": 0.3176, "step": 17400 }, { "epoch": 4.341645885286783, "grad_norm": 5.684892177581787, "learning_rate": 1.5660349127182045e-05, "loss": 0.3997, "step": 17410 }, { "epoch": 4.344139650872818, "grad_norm": 7.792070388793945, "learning_rate": 1.5657855361596012e-05, "loss": 0.4101, "step": 17420 }, { "epoch": 4.346633416458853, "grad_norm": 6.015437126159668, "learning_rate": 1.5655361596009976e-05, "loss": 0.4351, "step": 17430 }, { "epoch": 4.349127182044888, "grad_norm": 5.80635404586792, "learning_rate": 1.5652867830423943e-05, "loss": 0.3938, "step": 17440 }, { "epoch": 4.351620947630923, "grad_norm": 6.462253570556641, "learning_rate": 1.5650374064837906e-05, "loss": 0.4272, "step": 17450 }, { "epoch": 4.3541147132169575, "grad_norm": 5.280763149261475, "learning_rate": 1.5647880299251873e-05, "loss": 0.3625, "step": 17460 }, { "epoch": 4.356608478802992, "grad_norm": 6.560273170471191, "learning_rate": 1.5645386533665837e-05, "loss": 0.403, "step": 17470 }, { "epoch": 4.359102244389027, "grad_norm": 6.191250324249268, "learning_rate": 1.5642892768079804e-05, "loss": 0.3826, "step": 17480 }, { "epoch": 4.361596009975062, "grad_norm": 7.668184280395508, "learning_rate": 1.5640399002493767e-05, "loss": 0.3455, "step": 17490 }, { "epoch": 4.364089775561097, "grad_norm": 6.343785285949707, "learning_rate": 1.563790523690773e-05, "loss": 0.3462, "step": 17500 }, { "epoch": 4.366583541147132, "grad_norm": 7.69766092300415, "learning_rate": 1.5635411471321698e-05, "loss": 0.4203, "step": 17510 }, { "epoch": 4.369077306733167, "grad_norm": 5.314762115478516, "learning_rate": 1.5632917705735662e-05, "loss": 0.4044, "step": 17520 }, { "epoch": 4.371571072319202, "grad_norm": 4.914552688598633, "learning_rate": 1.563042394014963e-05, "loss": 0.3755, "step": 17530 }, { "epoch": 4.374064837905237, "grad_norm": 5.039554119110107, "learning_rate": 1.5627930174563592e-05, "loss": 0.3815, "step": 17540 }, { "epoch": 4.376558603491272, "grad_norm": 7.485365390777588, "learning_rate": 1.5625436408977556e-05, "loss": 0.418, "step": 17550 }, { "epoch": 4.379052369077307, "grad_norm": 5.805276393890381, "learning_rate": 1.5622942643391523e-05, "loss": 0.4342, "step": 17560 }, { "epoch": 4.381546134663342, "grad_norm": 5.376327037811279, "learning_rate": 1.562069825436409e-05, "loss": 0.4123, "step": 17570 }, { "epoch": 4.384039900249377, "grad_norm": 7.425755500793457, "learning_rate": 1.5618204488778058e-05, "loss": 0.3918, "step": 17580 }, { "epoch": 4.386533665835412, "grad_norm": 7.2214813232421875, "learning_rate": 1.5615710723192022e-05, "loss": 0.405, "step": 17590 }, { "epoch": 4.389027431421447, "grad_norm": 7.342413902282715, "learning_rate": 1.5613216957605985e-05, "loss": 0.4112, "step": 17600 }, { "epoch": 4.3915211970074814, "grad_norm": 6.210078716278076, "learning_rate": 1.5610723192019952e-05, "loss": 0.4194, "step": 17610 }, { "epoch": 4.394014962593516, "grad_norm": 8.023131370544434, "learning_rate": 1.5608229426433916e-05, "loss": 0.3781, "step": 17620 }, { "epoch": 4.396508728179551, "grad_norm": 6.007671356201172, "learning_rate": 1.560573566084788e-05, "loss": 0.4539, "step": 17630 }, { "epoch": 4.399002493765586, "grad_norm": 6.679064750671387, "learning_rate": 1.5603241895261847e-05, "loss": 0.4137, "step": 17640 }, { "epoch": 4.401496259351621, "grad_norm": 3.1536433696746826, "learning_rate": 1.560074812967581e-05, "loss": 0.4058, "step": 17650 }, { "epoch": 4.403990024937656, "grad_norm": 6.0214009284973145, "learning_rate": 1.5598254364089777e-05, "loss": 0.4396, "step": 17660 }, { "epoch": 4.406483790523691, "grad_norm": 4.901978015899658, "learning_rate": 1.559576059850374e-05, "loss": 0.3561, "step": 17670 }, { "epoch": 4.4089775561097255, "grad_norm": 7.766887664794922, "learning_rate": 1.5593266832917705e-05, "loss": 0.4944, "step": 17680 }, { "epoch": 4.41147132169576, "grad_norm": 5.148433685302734, "learning_rate": 1.559077306733167e-05, "loss": 0.4223, "step": 17690 }, { "epoch": 4.413965087281795, "grad_norm": 4.767563343048096, "learning_rate": 1.558827930174564e-05, "loss": 0.3936, "step": 17700 }, { "epoch": 4.41645885286783, "grad_norm": 7.088444232940674, "learning_rate": 1.5585785536159602e-05, "loss": 0.4696, "step": 17710 }, { "epoch": 4.418952618453865, "grad_norm": 7.782017707824707, "learning_rate": 1.558329177057357e-05, "loss": 0.3238, "step": 17720 }, { "epoch": 4.4214463840399, "grad_norm": 8.426640510559082, "learning_rate": 1.5580798004987533e-05, "loss": 0.422, "step": 17730 }, { "epoch": 4.423940149625935, "grad_norm": 6.744356155395508, "learning_rate": 1.55783042394015e-05, "loss": 0.3593, "step": 17740 }, { "epoch": 4.42643391521197, "grad_norm": 4.268016338348389, "learning_rate": 1.5575810473815463e-05, "loss": 0.4009, "step": 17750 }, { "epoch": 4.428927680798005, "grad_norm": 6.248506546020508, "learning_rate": 1.5573316708229427e-05, "loss": 0.3938, "step": 17760 }, { "epoch": 4.43142144638404, "grad_norm": 5.938436031341553, "learning_rate": 1.5570822942643394e-05, "loss": 0.4448, "step": 17770 }, { "epoch": 4.433915211970075, "grad_norm": 6.432234764099121, "learning_rate": 1.5568329177057358e-05, "loss": 0.3664, "step": 17780 }, { "epoch": 4.43640897755611, "grad_norm": 5.563558101654053, "learning_rate": 1.5565835411471325e-05, "loss": 0.5319, "step": 17790 }, { "epoch": 4.438902743142145, "grad_norm": 5.881470680236816, "learning_rate": 1.5563341645885288e-05, "loss": 0.435, "step": 17800 }, { "epoch": 4.44139650872818, "grad_norm": 6.403785705566406, "learning_rate": 1.5560847880299252e-05, "loss": 0.3419, "step": 17810 }, { "epoch": 4.443890274314215, "grad_norm": 6.167340278625488, "learning_rate": 1.555835411471322e-05, "loss": 0.4303, "step": 17820 }, { "epoch": 4.446384039900249, "grad_norm": 8.766891479492188, "learning_rate": 1.5555860349127182e-05, "loss": 0.3443, "step": 17830 }, { "epoch": 4.448877805486284, "grad_norm": 6.43876314163208, "learning_rate": 1.5553366583541146e-05, "loss": 0.3824, "step": 17840 }, { "epoch": 4.451371571072319, "grad_norm": 14.504803657531738, "learning_rate": 1.5550872817955113e-05, "loss": 0.3503, "step": 17850 }, { "epoch": 4.453865336658354, "grad_norm": 8.448104858398438, "learning_rate": 1.5548379052369077e-05, "loss": 0.4532, "step": 17860 }, { "epoch": 4.456359102244389, "grad_norm": 7.163640022277832, "learning_rate": 1.5545885286783044e-05, "loss": 0.4343, "step": 17870 }, { "epoch": 4.458852867830424, "grad_norm": 5.441543102264404, "learning_rate": 1.5543391521197007e-05, "loss": 0.3523, "step": 17880 }, { "epoch": 4.461346633416459, "grad_norm": 4.966891765594482, "learning_rate": 1.5540897755610974e-05, "loss": 0.3701, "step": 17890 }, { "epoch": 4.4638403990024935, "grad_norm": 5.674979209899902, "learning_rate": 1.5538403990024938e-05, "loss": 0.3415, "step": 17900 }, { "epoch": 4.466334164588528, "grad_norm": 6.1280646324157715, "learning_rate": 1.5535910224438905e-05, "loss": 0.4511, "step": 17910 }, { "epoch": 4.468827930174563, "grad_norm": 6.596588611602783, "learning_rate": 1.553341645885287e-05, "loss": 0.3627, "step": 17920 }, { "epoch": 4.471321695760598, "grad_norm": 7.014939785003662, "learning_rate": 1.5530922693266836e-05, "loss": 0.3572, "step": 17930 }, { "epoch": 4.473815461346634, "grad_norm": 5.796361923217773, "learning_rate": 1.55284289276808e-05, "loss": 0.448, "step": 17940 }, { "epoch": 4.476309226932669, "grad_norm": 7.400503635406494, "learning_rate": 1.5525935162094766e-05, "loss": 0.3636, "step": 17950 }, { "epoch": 4.478802992518704, "grad_norm": 6.007717132568359, "learning_rate": 1.552344139650873e-05, "loss": 0.3922, "step": 17960 }, { "epoch": 4.4812967581047385, "grad_norm": 11.898872375488281, "learning_rate": 1.5520947630922693e-05, "loss": 0.4342, "step": 17970 }, { "epoch": 4.483790523690773, "grad_norm": 6.466373443603516, "learning_rate": 1.551845386533666e-05, "loss": 0.4021, "step": 17980 }, { "epoch": 4.486284289276808, "grad_norm": 5.270114898681641, "learning_rate": 1.5515960099750624e-05, "loss": 0.384, "step": 17990 }, { "epoch": 4.488778054862843, "grad_norm": 5.372076034545898, "learning_rate": 1.551346633416459e-05, "loss": 0.4017, "step": 18000 }, { "epoch": 4.491271820448878, "grad_norm": 7.527553558349609, "learning_rate": 1.5510972568578555e-05, "loss": 0.3786, "step": 18010 }, { "epoch": 4.493765586034913, "grad_norm": 7.008097171783447, "learning_rate": 1.5508478802992518e-05, "loss": 0.4263, "step": 18020 }, { "epoch": 4.496259351620948, "grad_norm": 5.688605308532715, "learning_rate": 1.5505985037406485e-05, "loss": 0.3271, "step": 18030 }, { "epoch": 4.498753117206983, "grad_norm": 3.6691343784332275, "learning_rate": 1.550349127182045e-05, "loss": 0.3862, "step": 18040 }, { "epoch": 4.501246882793017, "grad_norm": 5.9994635581970215, "learning_rate": 1.5500997506234416e-05, "loss": 0.4175, "step": 18050 }, { "epoch": 4.503740648379052, "grad_norm": 5.447284698486328, "learning_rate": 1.549850374064838e-05, "loss": 0.4004, "step": 18060 }, { "epoch": 4.506234413965087, "grad_norm": 6.669367790222168, "learning_rate": 1.5496009975062347e-05, "loss": 0.4212, "step": 18070 }, { "epoch": 4.508728179551122, "grad_norm": 5.361328601837158, "learning_rate": 1.549351620947631e-05, "loss": 0.4352, "step": 18080 }, { "epoch": 4.511221945137157, "grad_norm": 7.188773155212402, "learning_rate": 1.5491022443890277e-05, "loss": 0.3761, "step": 18090 }, { "epoch": 4.513715710723192, "grad_norm": 4.965447902679443, "learning_rate": 1.548852867830424e-05, "loss": 0.3335, "step": 18100 }, { "epoch": 4.516209476309227, "grad_norm": 8.553069114685059, "learning_rate": 1.5486034912718208e-05, "loss": 0.4081, "step": 18110 }, { "epoch": 4.5187032418952615, "grad_norm": 6.796214580535889, "learning_rate": 1.548354114713217e-05, "loss": 0.3981, "step": 18120 }, { "epoch": 4.521197007481296, "grad_norm": 5.6893815994262695, "learning_rate": 1.5481047381546135e-05, "loss": 0.5065, "step": 18130 }, { "epoch": 4.523690773067331, "grad_norm": 6.233173370361328, "learning_rate": 1.5478553615960102e-05, "loss": 0.3679, "step": 18140 }, { "epoch": 4.526184538653366, "grad_norm": 7.2343597412109375, "learning_rate": 1.5476059850374066e-05, "loss": 0.4395, "step": 18150 }, { "epoch": 4.528678304239402, "grad_norm": 7.412199020385742, "learning_rate": 1.5473566084788033e-05, "loss": 0.4316, "step": 18160 }, { "epoch": 4.531172069825437, "grad_norm": 4.937939643859863, "learning_rate": 1.5471072319201996e-05, "loss": 0.3744, "step": 18170 }, { "epoch": 4.533665835411472, "grad_norm": 6.558340072631836, "learning_rate": 1.546857855361596e-05, "loss": 0.418, "step": 18180 }, { "epoch": 4.5361596009975065, "grad_norm": 6.0610551834106445, "learning_rate": 1.5466084788029927e-05, "loss": 0.4406, "step": 18190 }, { "epoch": 4.538653366583541, "grad_norm": 4.485851764678955, "learning_rate": 1.546359102244389e-05, "loss": 0.4511, "step": 18200 }, { "epoch": 4.541147132169576, "grad_norm": 5.121599197387695, "learning_rate": 1.5461097256857857e-05, "loss": 0.3711, "step": 18210 }, { "epoch": 4.543640897755611, "grad_norm": 4.987266540527344, "learning_rate": 1.545860349127182e-05, "loss": 0.3687, "step": 18220 }, { "epoch": 4.546134663341646, "grad_norm": 6.394310474395752, "learning_rate": 1.5456109725685785e-05, "loss": 0.3217, "step": 18230 }, { "epoch": 4.548628428927681, "grad_norm": 5.757366180419922, "learning_rate": 1.5453615960099752e-05, "loss": 0.3671, "step": 18240 }, { "epoch": 4.551122194513716, "grad_norm": 6.678833961486816, "learning_rate": 1.5451122194513715e-05, "loss": 0.3765, "step": 18250 }, { "epoch": 4.553615960099751, "grad_norm": 6.454516410827637, "learning_rate": 1.5448628428927682e-05, "loss": 0.4051, "step": 18260 }, { "epoch": 4.556109725685785, "grad_norm": 5.4813337326049805, "learning_rate": 1.5446134663341646e-05, "loss": 0.3641, "step": 18270 }, { "epoch": 4.55860349127182, "grad_norm": 5.699728488922119, "learning_rate": 1.5443640897755613e-05, "loss": 0.3532, "step": 18280 }, { "epoch": 4.561097256857855, "grad_norm": 24.547840118408203, "learning_rate": 1.544114713216958e-05, "loss": 0.5092, "step": 18290 }, { "epoch": 4.56359102244389, "grad_norm": 3.5861904621124268, "learning_rate": 1.5438653366583544e-05, "loss": 0.3255, "step": 18300 }, { "epoch": 4.566084788029925, "grad_norm": 5.126884937286377, "learning_rate": 1.5436159600997507e-05, "loss": 0.3716, "step": 18310 }, { "epoch": 4.56857855361596, "grad_norm": 6.409011363983154, "learning_rate": 1.5433665835411474e-05, "loss": 0.3852, "step": 18320 }, { "epoch": 4.571072319201995, "grad_norm": 5.01663875579834, "learning_rate": 1.5431172069825438e-05, "loss": 0.4487, "step": 18330 }, { "epoch": 4.57356608478803, "grad_norm": 5.1992506980896, "learning_rate": 1.54286783042394e-05, "loss": 0.384, "step": 18340 }, { "epoch": 4.576059850374065, "grad_norm": 9.998324394226074, "learning_rate": 1.542618453865337e-05, "loss": 0.4018, "step": 18350 }, { "epoch": 4.5785536159601, "grad_norm": 5.419407844543457, "learning_rate": 1.5423690773067332e-05, "loss": 0.3933, "step": 18360 }, { "epoch": 4.581047381546135, "grad_norm": 3.5099966526031494, "learning_rate": 1.54211970074813e-05, "loss": 0.3373, "step": 18370 }, { "epoch": 4.58354114713217, "grad_norm": 6.4149627685546875, "learning_rate": 1.5418703241895263e-05, "loss": 0.3542, "step": 18380 }, { "epoch": 4.586034912718205, "grad_norm": 7.924310684204102, "learning_rate": 1.5416209476309226e-05, "loss": 0.4282, "step": 18390 }, { "epoch": 4.58852867830424, "grad_norm": 19.269880294799805, "learning_rate": 1.5413715710723193e-05, "loss": 0.4397, "step": 18400 }, { "epoch": 4.5910224438902745, "grad_norm": 8.860027313232422, "learning_rate": 1.5411221945137157e-05, "loss": 0.3427, "step": 18410 }, { "epoch": 4.593516209476309, "grad_norm": 7.146231174468994, "learning_rate": 1.5408728179551124e-05, "loss": 0.4281, "step": 18420 }, { "epoch": 4.596009975062344, "grad_norm": 5.25446081161499, "learning_rate": 1.5406234413965088e-05, "loss": 0.4066, "step": 18430 }, { "epoch": 4.598503740648379, "grad_norm": 4.486170291900635, "learning_rate": 1.5403740648379055e-05, "loss": 0.3729, "step": 18440 }, { "epoch": 4.600997506234414, "grad_norm": 5.795819282531738, "learning_rate": 1.5401496259351623e-05, "loss": 0.3982, "step": 18450 }, { "epoch": 4.603491271820449, "grad_norm": 5.524128437042236, "learning_rate": 1.5399002493765586e-05, "loss": 0.3987, "step": 18460 }, { "epoch": 4.605985037406484, "grad_norm": 9.092752456665039, "learning_rate": 1.5396508728179553e-05, "loss": 0.3542, "step": 18470 }, { "epoch": 4.6084788029925186, "grad_norm": 4.337942123413086, "learning_rate": 1.5394014962593517e-05, "loss": 0.3433, "step": 18480 }, { "epoch": 4.610972568578553, "grad_norm": 7.767664909362793, "learning_rate": 1.539152119700748e-05, "loss": 0.412, "step": 18490 }, { "epoch": 4.613466334164588, "grad_norm": 7.6244964599609375, "learning_rate": 1.5389027431421448e-05, "loss": 0.4377, "step": 18500 }, { "epoch": 4.615960099750623, "grad_norm": 5.876872539520264, "learning_rate": 1.538653366583541e-05, "loss": 0.4679, "step": 18510 }, { "epoch": 4.618453865336658, "grad_norm": 7.280019283294678, "learning_rate": 1.5384039900249378e-05, "loss": 0.4518, "step": 18520 }, { "epoch": 4.620947630922693, "grad_norm": 6.68482780456543, "learning_rate": 1.5381546134663342e-05, "loss": 0.351, "step": 18530 }, { "epoch": 4.623441396508728, "grad_norm": 6.15846061706543, "learning_rate": 1.537905236907731e-05, "loss": 0.4252, "step": 18540 }, { "epoch": 4.625935162094763, "grad_norm": 13.20067024230957, "learning_rate": 1.5376558603491272e-05, "loss": 0.3845, "step": 18550 }, { "epoch": 4.628428927680798, "grad_norm": 5.230330467224121, "learning_rate": 1.537406483790524e-05, "loss": 0.4242, "step": 18560 }, { "epoch": 4.630922693266833, "grad_norm": 6.149280071258545, "learning_rate": 1.5371571072319203e-05, "loss": 0.4017, "step": 18570 }, { "epoch": 4.633416458852868, "grad_norm": 5.865719318389893, "learning_rate": 1.536907730673317e-05, "loss": 0.3697, "step": 18580 }, { "epoch": 4.635910224438903, "grad_norm": 7.069497585296631, "learning_rate": 1.5366583541147134e-05, "loss": 0.3835, "step": 18590 }, { "epoch": 4.638403990024938, "grad_norm": 10.992486000061035, "learning_rate": 1.53640897755611e-05, "loss": 0.4504, "step": 18600 }, { "epoch": 4.640897755610973, "grad_norm": 9.323265075683594, "learning_rate": 1.5361596009975064e-05, "loss": 0.5272, "step": 18610 }, { "epoch": 4.643391521197008, "grad_norm": 7.848715305328369, "learning_rate": 1.5359102244389028e-05, "loss": 0.4452, "step": 18620 }, { "epoch": 4.6458852867830425, "grad_norm": 7.649922847747803, "learning_rate": 1.5356608478802995e-05, "loss": 0.3857, "step": 18630 }, { "epoch": 4.648379052369077, "grad_norm": 4.88482141494751, "learning_rate": 1.535411471321696e-05, "loss": 0.3648, "step": 18640 }, { "epoch": 4.650872817955112, "grad_norm": 8.439806938171387, "learning_rate": 1.5351620947630922e-05, "loss": 0.3763, "step": 18650 }, { "epoch": 4.653366583541147, "grad_norm": 7.73594856262207, "learning_rate": 1.534912718204489e-05, "loss": 0.354, "step": 18660 }, { "epoch": 4.655860349127182, "grad_norm": 7.768985271453857, "learning_rate": 1.5346633416458853e-05, "loss": 0.3676, "step": 18670 }, { "epoch": 4.658354114713217, "grad_norm": 5.735325336456299, "learning_rate": 1.534413965087282e-05, "loss": 0.3333, "step": 18680 }, { "epoch": 4.660847880299252, "grad_norm": 4.903052806854248, "learning_rate": 1.5341645885286783e-05, "loss": 0.467, "step": 18690 }, { "epoch": 4.6633416458852865, "grad_norm": 6.769343376159668, "learning_rate": 1.5339152119700747e-05, "loss": 0.4424, "step": 18700 }, { "epoch": 4.665835411471321, "grad_norm": 22.33620262145996, "learning_rate": 1.5336658354114714e-05, "loss": 0.4389, "step": 18710 }, { "epoch": 4.668329177057356, "grad_norm": 5.3549909591674805, "learning_rate": 1.533416458852868e-05, "loss": 0.362, "step": 18720 }, { "epoch": 4.670822942643391, "grad_norm": 4.449096202850342, "learning_rate": 1.5331670822942645e-05, "loss": 0.3292, "step": 18730 }, { "epoch": 4.673316708229427, "grad_norm": 7.292250156402588, "learning_rate": 1.532917705735661e-05, "loss": 0.3926, "step": 18740 }, { "epoch": 4.675810473815462, "grad_norm": 6.387401103973389, "learning_rate": 1.5326683291770575e-05, "loss": 0.4135, "step": 18750 }, { "epoch": 4.678304239401497, "grad_norm": 5.179088115692139, "learning_rate": 1.5324189526184542e-05, "loss": 0.3263, "step": 18760 }, { "epoch": 4.6807980049875315, "grad_norm": 6.1036529541015625, "learning_rate": 1.5321695760598506e-05, "loss": 0.3773, "step": 18770 }, { "epoch": 4.683291770573566, "grad_norm": 6.317544937133789, "learning_rate": 1.531920199501247e-05, "loss": 0.3549, "step": 18780 }, { "epoch": 4.685785536159601, "grad_norm": 4.250217437744141, "learning_rate": 1.5316708229426437e-05, "loss": 0.3675, "step": 18790 }, { "epoch": 4.688279301745636, "grad_norm": 4.993629455566406, "learning_rate": 1.53142144638404e-05, "loss": 0.3732, "step": 18800 }, { "epoch": 4.690773067331671, "grad_norm": 6.042954444885254, "learning_rate": 1.5311720698254364e-05, "loss": 0.4064, "step": 18810 }, { "epoch": 4.693266832917706, "grad_norm": 7.933563709259033, "learning_rate": 1.530922693266833e-05, "loss": 0.4322, "step": 18820 }, { "epoch": 4.695760598503741, "grad_norm": 5.294432163238525, "learning_rate": 1.5306733167082294e-05, "loss": 0.3844, "step": 18830 }, { "epoch": 4.698254364089776, "grad_norm": 4.980920791625977, "learning_rate": 1.530423940149626e-05, "loss": 0.3597, "step": 18840 }, { "epoch": 4.7007481296758105, "grad_norm": 5.460011005401611, "learning_rate": 1.5301745635910225e-05, "loss": 0.3212, "step": 18850 }, { "epoch": 4.703241895261845, "grad_norm": 6.553736686706543, "learning_rate": 1.529925187032419e-05, "loss": 0.4003, "step": 18860 }, { "epoch": 4.70573566084788, "grad_norm": 6.58586311340332, "learning_rate": 1.5296758104738156e-05, "loss": 0.4062, "step": 18870 }, { "epoch": 4.708229426433915, "grad_norm": 6.710026741027832, "learning_rate": 1.529426433915212e-05, "loss": 0.4248, "step": 18880 }, { "epoch": 4.71072319201995, "grad_norm": 6.377664089202881, "learning_rate": 1.5291770573566086e-05, "loss": 0.467, "step": 18890 }, { "epoch": 4.713216957605985, "grad_norm": 8.970911979675293, "learning_rate": 1.528927680798005e-05, "loss": 0.4081, "step": 18900 }, { "epoch": 4.71571072319202, "grad_norm": 5.463969707489014, "learning_rate": 1.5286783042394017e-05, "loss": 0.3779, "step": 18910 }, { "epoch": 4.7182044887780545, "grad_norm": 5.64981746673584, "learning_rate": 1.528428927680798e-05, "loss": 0.4542, "step": 18920 }, { "epoch": 4.720698254364089, "grad_norm": 7.127771377563477, "learning_rate": 1.5281795511221947e-05, "loss": 0.3373, "step": 18930 }, { "epoch": 4.723192019950124, "grad_norm": 6.836137771606445, "learning_rate": 1.527930174563591e-05, "loss": 0.3651, "step": 18940 }, { "epoch": 4.725685785536159, "grad_norm": 7.067853927612305, "learning_rate": 1.5276807980049878e-05, "loss": 0.3785, "step": 18950 }, { "epoch": 4.728179551122195, "grad_norm": 8.48742389678955, "learning_rate": 1.5274314214463842e-05, "loss": 0.3731, "step": 18960 }, { "epoch": 4.73067331670823, "grad_norm": 13.028044700622559, "learning_rate": 1.527182044887781e-05, "loss": 0.3846, "step": 18970 }, { "epoch": 4.733167082294265, "grad_norm": 8.332368850708008, "learning_rate": 1.5269326683291772e-05, "loss": 0.4137, "step": 18980 }, { "epoch": 4.7356608478802995, "grad_norm": 7.124736785888672, "learning_rate": 1.5266832917705736e-05, "loss": 0.386, "step": 18990 }, { "epoch": 4.738154613466334, "grad_norm": 6.675353527069092, "learning_rate": 1.5264339152119703e-05, "loss": 0.4329, "step": 19000 }, { "epoch": 4.740648379052369, "grad_norm": 5.873729705810547, "learning_rate": 1.5261845386533667e-05, "loss": 0.4582, "step": 19010 }, { "epoch": 4.743142144638404, "grad_norm": 7.303366184234619, "learning_rate": 1.525935162094763e-05, "loss": 0.416, "step": 19020 }, { "epoch": 4.745635910224439, "grad_norm": 4.469297885894775, "learning_rate": 1.5256857855361597e-05, "loss": 0.3637, "step": 19030 }, { "epoch": 4.748129675810474, "grad_norm": 4.885001182556152, "learning_rate": 1.5254364089775563e-05, "loss": 0.3829, "step": 19040 }, { "epoch": 4.750623441396509, "grad_norm": 12.459643363952637, "learning_rate": 1.5251870324189528e-05, "loss": 0.3955, "step": 19050 }, { "epoch": 4.753117206982544, "grad_norm": 4.591975688934326, "learning_rate": 1.5249376558603493e-05, "loss": 0.3381, "step": 19060 }, { "epoch": 4.7556109725685785, "grad_norm": 5.559258937835693, "learning_rate": 1.5246882793017457e-05, "loss": 0.4173, "step": 19070 }, { "epoch": 4.758104738154613, "grad_norm": 8.134532928466797, "learning_rate": 1.5244389027431424e-05, "loss": 0.4132, "step": 19080 }, { "epoch": 4.760598503740648, "grad_norm": 8.197823524475098, "learning_rate": 1.5241895261845387e-05, "loss": 0.4422, "step": 19090 }, { "epoch": 4.763092269326683, "grad_norm": 6.592520713806152, "learning_rate": 1.5239401496259354e-05, "loss": 0.3964, "step": 19100 }, { "epoch": 4.765586034912718, "grad_norm": 9.510995864868164, "learning_rate": 1.5236907730673318e-05, "loss": 0.4011, "step": 19110 }, { "epoch": 4.768079800498753, "grad_norm": 8.02636432647705, "learning_rate": 1.5234413965087282e-05, "loss": 0.3814, "step": 19120 }, { "epoch": 4.770573566084788, "grad_norm": 8.324048042297363, "learning_rate": 1.5231920199501249e-05, "loss": 0.4601, "step": 19130 }, { "epoch": 4.773067331670823, "grad_norm": 4.599636554718018, "learning_rate": 1.5229426433915214e-05, "loss": 0.3584, "step": 19140 }, { "epoch": 4.775561097256858, "grad_norm": 7.513518333435059, "learning_rate": 1.5226932668329178e-05, "loss": 0.4006, "step": 19150 }, { "epoch": 4.778054862842893, "grad_norm": 6.876750946044922, "learning_rate": 1.5224438902743145e-05, "loss": 0.3725, "step": 19160 }, { "epoch": 4.780548628428928, "grad_norm": 5.191885471343994, "learning_rate": 1.5221945137157108e-05, "loss": 0.3534, "step": 19170 }, { "epoch": 4.783042394014963, "grad_norm": 7.336450099945068, "learning_rate": 1.5219451371571075e-05, "loss": 0.408, "step": 19180 }, { "epoch": 4.785536159600998, "grad_norm": 4.654347896575928, "learning_rate": 1.5216957605985039e-05, "loss": 0.3971, "step": 19190 }, { "epoch": 4.788029925187033, "grad_norm": 8.277083396911621, "learning_rate": 1.5214463840399002e-05, "loss": 0.4695, "step": 19200 }, { "epoch": 4.7905236907730675, "grad_norm": 5.439416885375977, "learning_rate": 1.521197007481297e-05, "loss": 0.4059, "step": 19210 }, { "epoch": 4.793017456359102, "grad_norm": 7.094707489013672, "learning_rate": 1.5209476309226933e-05, "loss": 0.4118, "step": 19220 }, { "epoch": 4.795511221945137, "grad_norm": 4.1312575340271, "learning_rate": 1.5206982543640898e-05, "loss": 0.3257, "step": 19230 }, { "epoch": 4.798004987531172, "grad_norm": 14.407641410827637, "learning_rate": 1.5204488778054864e-05, "loss": 0.3796, "step": 19240 }, { "epoch": 4.800498753117207, "grad_norm": 5.280395984649658, "learning_rate": 1.5201995012468829e-05, "loss": 0.3183, "step": 19250 }, { "epoch": 4.802992518703242, "grad_norm": 5.899741172790527, "learning_rate": 1.5199501246882796e-05, "loss": 0.3432, "step": 19260 }, { "epoch": 4.805486284289277, "grad_norm": 6.163057804107666, "learning_rate": 1.519700748129676e-05, "loss": 0.3459, "step": 19270 }, { "epoch": 4.807980049875312, "grad_norm": 6.30502986907959, "learning_rate": 1.5194513715710723e-05, "loss": 0.4026, "step": 19280 }, { "epoch": 4.8104738154613464, "grad_norm": 8.228434562683105, "learning_rate": 1.519201995012469e-05, "loss": 0.4527, "step": 19290 }, { "epoch": 4.812967581047381, "grad_norm": 7.36271333694458, "learning_rate": 1.5189526184538654e-05, "loss": 0.4226, "step": 19300 }, { "epoch": 4.815461346633416, "grad_norm": 6.21150016784668, "learning_rate": 1.5187032418952619e-05, "loss": 0.3855, "step": 19310 }, { "epoch": 4.817955112219451, "grad_norm": 5.929027557373047, "learning_rate": 1.5184538653366584e-05, "loss": 0.3232, "step": 19320 }, { "epoch": 4.820448877805486, "grad_norm": 12.694462776184082, "learning_rate": 1.518204488778055e-05, "loss": 0.3749, "step": 19330 }, { "epoch": 4.822942643391521, "grad_norm": 4.2973456382751465, "learning_rate": 1.5179551122194515e-05, "loss": 0.3592, "step": 19340 }, { "epoch": 4.825436408977556, "grad_norm": 6.927724838256836, "learning_rate": 1.517705735660848e-05, "loss": 0.3819, "step": 19350 }, { "epoch": 4.8279301745635905, "grad_norm": 8.140523910522461, "learning_rate": 1.5174563591022444e-05, "loss": 0.4302, "step": 19360 }, { "epoch": 4.830423940149626, "grad_norm": 9.106082916259766, "learning_rate": 1.5172069825436411e-05, "loss": 0.4181, "step": 19370 }, { "epoch": 4.832917705735661, "grad_norm": 5.513749599456787, "learning_rate": 1.5169576059850375e-05, "loss": 0.3663, "step": 19380 }, { "epoch": 4.835411471321696, "grad_norm": 4.832230567932129, "learning_rate": 1.5167082294264342e-05, "loss": 0.3402, "step": 19390 }, { "epoch": 4.837905236907731, "grad_norm": 5.5058183670043945, "learning_rate": 1.5164588528678305e-05, "loss": 0.4934, "step": 19400 }, { "epoch": 4.840399002493766, "grad_norm": 6.715292930603027, "learning_rate": 1.516209476309227e-05, "loss": 0.4119, "step": 19410 }, { "epoch": 4.842892768079801, "grad_norm": 7.778273105621338, "learning_rate": 1.5159600997506236e-05, "loss": 0.3779, "step": 19420 }, { "epoch": 4.8453865336658355, "grad_norm": 5.197988510131836, "learning_rate": 1.5157107231920201e-05, "loss": 0.3754, "step": 19430 }, { "epoch": 4.84788029925187, "grad_norm": 4.799652099609375, "learning_rate": 1.5154613466334165e-05, "loss": 0.3453, "step": 19440 }, { "epoch": 4.850374064837905, "grad_norm": 7.939327716827393, "learning_rate": 1.5152119700748132e-05, "loss": 0.3741, "step": 19450 }, { "epoch": 4.85286783042394, "grad_norm": 7.11137056350708, "learning_rate": 1.5149625935162095e-05, "loss": 0.3905, "step": 19460 }, { "epoch": 4.855361596009975, "grad_norm": 6.2819061279296875, "learning_rate": 1.5147132169576062e-05, "loss": 0.3733, "step": 19470 }, { "epoch": 4.85785536159601, "grad_norm": 5.845408916473389, "learning_rate": 1.5144638403990026e-05, "loss": 0.4008, "step": 19480 }, { "epoch": 4.860349127182045, "grad_norm": 7.455866813659668, "learning_rate": 1.5142144638403991e-05, "loss": 0.418, "step": 19490 }, { "epoch": 4.86284289276808, "grad_norm": 5.083822250366211, "learning_rate": 1.5139650872817957e-05, "loss": 0.3625, "step": 19500 }, { "epoch": 4.865336658354114, "grad_norm": 6.3122968673706055, "learning_rate": 1.5137157107231922e-05, "loss": 0.396, "step": 19510 }, { "epoch": 4.867830423940149, "grad_norm": 6.627301216125488, "learning_rate": 1.5134663341645886e-05, "loss": 0.3841, "step": 19520 }, { "epoch": 4.870324189526184, "grad_norm": 7.174892425537109, "learning_rate": 1.5132169576059853e-05, "loss": 0.3484, "step": 19530 }, { "epoch": 4.87281795511222, "grad_norm": 6.404763698577881, "learning_rate": 1.5129675810473816e-05, "loss": 0.4297, "step": 19540 }, { "epoch": 4.875311720698255, "grad_norm": 9.780095100402832, "learning_rate": 1.5127182044887783e-05, "loss": 0.4377, "step": 19550 }, { "epoch": 4.87780548628429, "grad_norm": 6.8640546798706055, "learning_rate": 1.5124688279301747e-05, "loss": 0.3958, "step": 19560 }, { "epoch": 4.8802992518703245, "grad_norm": 4.705758094787598, "learning_rate": 1.512219451371571e-05, "loss": 0.4039, "step": 19570 }, { "epoch": 4.882793017456359, "grad_norm": 4.98261833190918, "learning_rate": 1.5119700748129677e-05, "loss": 0.4121, "step": 19580 }, { "epoch": 4.885286783042394, "grad_norm": 9.222187042236328, "learning_rate": 1.5117206982543641e-05, "loss": 0.412, "step": 19590 }, { "epoch": 4.887780548628429, "grad_norm": 8.537120819091797, "learning_rate": 1.5114713216957608e-05, "loss": 0.4807, "step": 19600 }, { "epoch": 4.890274314214464, "grad_norm": 9.445647239685059, "learning_rate": 1.5112219451371573e-05, "loss": 0.4003, "step": 19610 }, { "epoch": 4.892768079800499, "grad_norm": 7.455517292022705, "learning_rate": 1.5109725685785537e-05, "loss": 0.3835, "step": 19620 }, { "epoch": 4.895261845386534, "grad_norm": 7.285569667816162, "learning_rate": 1.5107231920199504e-05, "loss": 0.3211, "step": 19630 }, { "epoch": 4.897755610972569, "grad_norm": 8.98646068572998, "learning_rate": 1.5104738154613468e-05, "loss": 0.4019, "step": 19640 }, { "epoch": 4.9002493765586035, "grad_norm": 8.273704528808594, "learning_rate": 1.5102244389027431e-05, "loss": 0.4262, "step": 19650 }, { "epoch": 4.902743142144638, "grad_norm": 3.785250663757324, "learning_rate": 1.5099750623441398e-05, "loss": 0.3724, "step": 19660 }, { "epoch": 4.905236907730673, "grad_norm": 6.882282257080078, "learning_rate": 1.5097256857855362e-05, "loss": 0.4621, "step": 19670 }, { "epoch": 4.907730673316708, "grad_norm": 9.958419799804688, "learning_rate": 1.5094763092269329e-05, "loss": 0.3655, "step": 19680 }, { "epoch": 4.910224438902743, "grad_norm": 4.580639839172363, "learning_rate": 1.5092269326683292e-05, "loss": 0.3733, "step": 19690 }, { "epoch": 4.912718204488778, "grad_norm": 6.3402276039123535, "learning_rate": 1.5089775561097258e-05, "loss": 0.3853, "step": 19700 }, { "epoch": 4.915211970074813, "grad_norm": 6.479229927062988, "learning_rate": 1.5087281795511223e-05, "loss": 0.4365, "step": 19710 }, { "epoch": 4.917705735660848, "grad_norm": 5.332212924957275, "learning_rate": 1.5084788029925188e-05, "loss": 0.4072, "step": 19720 }, { "epoch": 4.920199501246882, "grad_norm": 5.466404438018799, "learning_rate": 1.5082294264339152e-05, "loss": 0.4019, "step": 19730 }, { "epoch": 4.922693266832917, "grad_norm": 4.750015735626221, "learning_rate": 1.5079800498753119e-05, "loss": 0.3635, "step": 19740 }, { "epoch": 4.925187032418952, "grad_norm": 5.45477294921875, "learning_rate": 1.5077306733167083e-05, "loss": 0.3239, "step": 19750 }, { "epoch": 4.927680798004987, "grad_norm": 7.6153564453125, "learning_rate": 1.507481296758105e-05, "loss": 0.3935, "step": 19760 }, { "epoch": 4.930174563591023, "grad_norm": 5.9966816902160645, "learning_rate": 1.5072319201995013e-05, "loss": 0.3912, "step": 19770 }, { "epoch": 4.932668329177058, "grad_norm": 4.490943908691406, "learning_rate": 1.5069825436408978e-05, "loss": 0.3943, "step": 19780 }, { "epoch": 4.9351620947630925, "grad_norm": 4.777584075927734, "learning_rate": 1.5067331670822944e-05, "loss": 0.3863, "step": 19790 }, { "epoch": 4.937655860349127, "grad_norm": 6.837095737457275, "learning_rate": 1.5064837905236909e-05, "loss": 0.3762, "step": 19800 }, { "epoch": 4.940149625935162, "grad_norm": 6.350588321685791, "learning_rate": 1.5062344139650873e-05, "loss": 0.4286, "step": 19810 }, { "epoch": 4.942643391521197, "grad_norm": 4.62924337387085, "learning_rate": 1.505985037406484e-05, "loss": 0.3772, "step": 19820 }, { "epoch": 4.945137157107232, "grad_norm": 6.749570369720459, "learning_rate": 1.5057356608478803e-05, "loss": 0.4314, "step": 19830 }, { "epoch": 4.947630922693267, "grad_norm": 5.663025856018066, "learning_rate": 1.505486284289277e-05, "loss": 0.4405, "step": 19840 }, { "epoch": 4.950124688279302, "grad_norm": 4.510965347290039, "learning_rate": 1.5052369077306734e-05, "loss": 0.351, "step": 19850 }, { "epoch": 4.952618453865337, "grad_norm": 4.851863384246826, "learning_rate": 1.50498753117207e-05, "loss": 0.3867, "step": 19860 }, { "epoch": 4.9551122194513715, "grad_norm": 5.330405235290527, "learning_rate": 1.5047381546134665e-05, "loss": 0.4789, "step": 19870 }, { "epoch": 4.957605985037406, "grad_norm": 13.125707626342773, "learning_rate": 1.504488778054863e-05, "loss": 0.4699, "step": 19880 }, { "epoch": 4.960099750623441, "grad_norm": 6.033151149749756, "learning_rate": 1.5042394014962595e-05, "loss": 0.381, "step": 19890 }, { "epoch": 4.962593516209476, "grad_norm": 5.597021102905273, "learning_rate": 1.503990024937656e-05, "loss": 0.3887, "step": 19900 }, { "epoch": 4.965087281795511, "grad_norm": 5.585844039916992, "learning_rate": 1.5037406483790524e-05, "loss": 0.4441, "step": 19910 }, { "epoch": 4.967581047381546, "grad_norm": 6.1360602378845215, "learning_rate": 1.5034912718204491e-05, "loss": 0.4344, "step": 19920 }, { "epoch": 4.970074812967581, "grad_norm": 13.40194320678711, "learning_rate": 1.5032418952618455e-05, "loss": 0.4552, "step": 19930 }, { "epoch": 4.9725685785536164, "grad_norm": 7.084679126739502, "learning_rate": 1.502992518703242e-05, "loss": 0.3554, "step": 19940 }, { "epoch": 4.975062344139651, "grad_norm": 7.618978977203369, "learning_rate": 1.5027431421446385e-05, "loss": 0.3493, "step": 19950 }, { "epoch": 4.977556109725686, "grad_norm": 6.25748348236084, "learning_rate": 1.502493765586035e-05, "loss": 0.4517, "step": 19960 }, { "epoch": 4.980049875311721, "grad_norm": 5.518866062164307, "learning_rate": 1.5022443890274316e-05, "loss": 0.3737, "step": 19970 }, { "epoch": 4.982543640897756, "grad_norm": 6.079436779022217, "learning_rate": 1.5019950124688281e-05, "loss": 0.4093, "step": 19980 }, { "epoch": 4.985037406483791, "grad_norm": 6.771421432495117, "learning_rate": 1.5017456359102245e-05, "loss": 0.3948, "step": 19990 }, { "epoch": 4.987531172069826, "grad_norm": 5.409707069396973, "learning_rate": 1.5014962593516212e-05, "loss": 0.4326, "step": 20000 }, { "epoch": 4.9900249376558605, "grad_norm": 4.681028842926025, "learning_rate": 1.5012468827930176e-05, "loss": 0.3327, "step": 20010 }, { "epoch": 4.992518703241895, "grad_norm": 7.876126766204834, "learning_rate": 1.5009975062344139e-05, "loss": 0.3613, "step": 20020 }, { "epoch": 4.99501246882793, "grad_norm": 5.726997375488281, "learning_rate": 1.5007481296758106e-05, "loss": 0.4506, "step": 20030 }, { "epoch": 4.997506234413965, "grad_norm": 6.533457279205322, "learning_rate": 1.500498753117207e-05, "loss": 0.4232, "step": 20040 }, { "epoch": 5.0, "grad_norm": 21.448368072509766, "learning_rate": 1.5002493765586037e-05, "loss": 0.4523, "step": 20050 }, { "epoch": 5.0, "eval_loss": 0.42168691754341125, "eval_runtime": 60.2711, "eval_samples_per_second": 16.641, "eval_steps_per_second": 16.641, "step": 20050 }, { "epoch": 5.002493765586035, "grad_norm": 6.531921863555908, "learning_rate": 1.5000000000000002e-05, "loss": 0.406, "step": 20060 }, { "epoch": 5.00498753117207, "grad_norm": 5.814652442932129, "learning_rate": 1.4997506234413966e-05, "loss": 0.4352, "step": 20070 }, { "epoch": 5.007481296758105, "grad_norm": 3.371870279312134, "learning_rate": 1.4995012468827933e-05, "loss": 0.3459, "step": 20080 }, { "epoch": 5.0099750623441395, "grad_norm": 4.824948787689209, "learning_rate": 1.4992518703241896e-05, "loss": 0.3522, "step": 20090 }, { "epoch": 5.012468827930174, "grad_norm": 10.411859512329102, "learning_rate": 1.4990024937655863e-05, "loss": 0.4159, "step": 20100 }, { "epoch": 5.014962593516209, "grad_norm": 7.6682562828063965, "learning_rate": 1.4987531172069827e-05, "loss": 0.3963, "step": 20110 }, { "epoch": 5.017456359102244, "grad_norm": 7.620563983917236, "learning_rate": 1.498503740648379e-05, "loss": 0.3902, "step": 20120 }, { "epoch": 5.019950124688279, "grad_norm": 7.573136806488037, "learning_rate": 1.4982543640897758e-05, "loss": 0.5429, "step": 20130 }, { "epoch": 5.022443890274314, "grad_norm": 5.5496697425842285, "learning_rate": 1.4980049875311721e-05, "loss": 0.3934, "step": 20140 }, { "epoch": 5.024937655860349, "grad_norm": 4.304521560668945, "learning_rate": 1.4977556109725686e-05, "loss": 0.3509, "step": 20150 }, { "epoch": 5.027431421446384, "grad_norm": 9.141407012939453, "learning_rate": 1.4975062344139652e-05, "loss": 0.433, "step": 20160 }, { "epoch": 5.029925187032419, "grad_norm": 5.6662678718566895, "learning_rate": 1.4972568578553617e-05, "loss": 0.3718, "step": 20170 }, { "epoch": 5.032418952618454, "grad_norm": 7.040060997009277, "learning_rate": 1.4970074812967582e-05, "loss": 0.3879, "step": 20180 }, { "epoch": 5.034912718204489, "grad_norm": 5.835836410522461, "learning_rate": 1.4967581047381548e-05, "loss": 0.3697, "step": 20190 }, { "epoch": 5.037406483790524, "grad_norm": 3.57354998588562, "learning_rate": 1.4965087281795511e-05, "loss": 0.3932, "step": 20200 }, { "epoch": 5.039900249376559, "grad_norm": 7.62668514251709, "learning_rate": 1.4962593516209478e-05, "loss": 0.3542, "step": 20210 }, { "epoch": 5.042394014962594, "grad_norm": 5.238462924957275, "learning_rate": 1.4960099750623442e-05, "loss": 0.3125, "step": 20220 }, { "epoch": 5.0448877805486285, "grad_norm": 6.3393235206604, "learning_rate": 1.4957605985037407e-05, "loss": 0.4034, "step": 20230 }, { "epoch": 5.047381546134663, "grad_norm": 7.231442451477051, "learning_rate": 1.4955112219451373e-05, "loss": 0.3211, "step": 20240 }, { "epoch": 5.049875311720698, "grad_norm": 6.194590091705322, "learning_rate": 1.4952618453865338e-05, "loss": 0.4005, "step": 20250 }, { "epoch": 5.052369077306733, "grad_norm": 5.869102478027344, "learning_rate": 1.4950124688279303e-05, "loss": 0.4237, "step": 20260 }, { "epoch": 5.054862842892768, "grad_norm": 6.967468738555908, "learning_rate": 1.4947630922693268e-05, "loss": 0.4117, "step": 20270 }, { "epoch": 5.057356608478803, "grad_norm": 4.7718658447265625, "learning_rate": 1.4945137157107232e-05, "loss": 0.3834, "step": 20280 }, { "epoch": 5.059850374064838, "grad_norm": 6.20999813079834, "learning_rate": 1.4942643391521199e-05, "loss": 0.3369, "step": 20290 }, { "epoch": 5.062344139650873, "grad_norm": 5.620708465576172, "learning_rate": 1.4940149625935163e-05, "loss": 0.3699, "step": 20300 }, { "epoch": 5.0648379052369075, "grad_norm": 9.380097389221191, "learning_rate": 1.493765586034913e-05, "loss": 0.3487, "step": 20310 }, { "epoch": 5.067331670822942, "grad_norm": 5.500340938568115, "learning_rate": 1.4935162094763093e-05, "loss": 0.3864, "step": 20320 }, { "epoch": 5.069825436408977, "grad_norm": 6.254300117492676, "learning_rate": 1.4932668329177059e-05, "loss": 0.3982, "step": 20330 }, { "epoch": 5.072319201995012, "grad_norm": 7.905352592468262, "learning_rate": 1.4930174563591024e-05, "loss": 0.4213, "step": 20340 }, { "epoch": 5.074812967581048, "grad_norm": 5.519759654998779, "learning_rate": 1.492768079800499e-05, "loss": 0.3563, "step": 20350 }, { "epoch": 5.077306733167083, "grad_norm": 7.680830955505371, "learning_rate": 1.4925187032418953e-05, "loss": 0.3744, "step": 20360 }, { "epoch": 5.079800498753118, "grad_norm": 5.350339889526367, "learning_rate": 1.492269326683292e-05, "loss": 0.361, "step": 20370 }, { "epoch": 5.082294264339152, "grad_norm": 6.773752212524414, "learning_rate": 1.4920199501246884e-05, "loss": 0.4192, "step": 20380 }, { "epoch": 5.084788029925187, "grad_norm": 7.636809825897217, "learning_rate": 1.491770573566085e-05, "loss": 0.3706, "step": 20390 }, { "epoch": 5.087281795511222, "grad_norm": 8.180363655090332, "learning_rate": 1.4915211970074814e-05, "loss": 0.3809, "step": 20400 }, { "epoch": 5.089775561097257, "grad_norm": 6.33188533782959, "learning_rate": 1.491271820448878e-05, "loss": 0.3553, "step": 20410 }, { "epoch": 5.092269326683292, "grad_norm": 9.772364616394043, "learning_rate": 1.4910224438902745e-05, "loss": 0.4478, "step": 20420 }, { "epoch": 5.094763092269327, "grad_norm": 8.910597801208496, "learning_rate": 1.490773067331671e-05, "loss": 0.3909, "step": 20430 }, { "epoch": 5.097256857855362, "grad_norm": 5.329483509063721, "learning_rate": 1.4905236907730674e-05, "loss": 0.3552, "step": 20440 }, { "epoch": 5.0997506234413965, "grad_norm": 8.715051651000977, "learning_rate": 1.490274314214464e-05, "loss": 0.3806, "step": 20450 }, { "epoch": 5.102244389027431, "grad_norm": 4.4914422035217285, "learning_rate": 1.4900249376558604e-05, "loss": 0.3125, "step": 20460 }, { "epoch": 5.104738154613466, "grad_norm": 8.738079071044922, "learning_rate": 1.4897755610972571e-05, "loss": 0.3728, "step": 20470 }, { "epoch": 5.107231920199501, "grad_norm": 6.785305023193359, "learning_rate": 1.4895261845386535e-05, "loss": 0.387, "step": 20480 }, { "epoch": 5.109725685785536, "grad_norm": 6.179448127746582, "learning_rate": 1.4892768079800499e-05, "loss": 0.3828, "step": 20490 }, { "epoch": 5.112219451371571, "grad_norm": 8.303686141967773, "learning_rate": 1.4890274314214466e-05, "loss": 0.4654, "step": 20500 }, { "epoch": 5.114713216957606, "grad_norm": 5.6898956298828125, "learning_rate": 1.4887780548628429e-05, "loss": 0.3792, "step": 20510 }, { "epoch": 5.117206982543641, "grad_norm": 7.065920829772949, "learning_rate": 1.4885286783042394e-05, "loss": 0.4403, "step": 20520 }, { "epoch": 5.1197007481296755, "grad_norm": 7.726041316986084, "learning_rate": 1.4882793017456361e-05, "loss": 0.4045, "step": 20530 }, { "epoch": 5.12219451371571, "grad_norm": 7.321938991546631, "learning_rate": 1.4880299251870325e-05, "loss": 0.365, "step": 20540 }, { "epoch": 5.124688279301745, "grad_norm": 5.387010097503662, "learning_rate": 1.4877805486284292e-05, "loss": 0.4085, "step": 20550 }, { "epoch": 5.127182044887781, "grad_norm": 5.158405303955078, "learning_rate": 1.4875311720698256e-05, "loss": 0.3001, "step": 20560 }, { "epoch": 5.129675810473816, "grad_norm": 6.263799667358398, "learning_rate": 1.487281795511222e-05, "loss": 0.3872, "step": 20570 }, { "epoch": 5.132169576059851, "grad_norm": 6.450216293334961, "learning_rate": 1.4870324189526186e-05, "loss": 0.3789, "step": 20580 }, { "epoch": 5.134663341645886, "grad_norm": 7.4370198249816895, "learning_rate": 1.486783042394015e-05, "loss": 0.3508, "step": 20590 }, { "epoch": 5.13715710723192, "grad_norm": 7.416337013244629, "learning_rate": 1.4865336658354117e-05, "loss": 0.3011, "step": 20600 }, { "epoch": 5.139650872817955, "grad_norm": 7.354401588439941, "learning_rate": 1.486284289276808e-05, "loss": 0.3835, "step": 20610 }, { "epoch": 5.14214463840399, "grad_norm": 8.334373474121094, "learning_rate": 1.4860349127182046e-05, "loss": 0.3471, "step": 20620 }, { "epoch": 5.144638403990025, "grad_norm": 6.268391132354736, "learning_rate": 1.4857855361596011e-05, "loss": 0.4272, "step": 20630 }, { "epoch": 5.14713216957606, "grad_norm": 4.680897235870361, "learning_rate": 1.4855361596009976e-05, "loss": 0.4065, "step": 20640 }, { "epoch": 5.149625935162095, "grad_norm": 4.887267112731934, "learning_rate": 1.485286783042394e-05, "loss": 0.4085, "step": 20650 }, { "epoch": 5.15211970074813, "grad_norm": 6.771895408630371, "learning_rate": 1.4850374064837907e-05, "loss": 0.3727, "step": 20660 }, { "epoch": 5.1546134663341645, "grad_norm": 7.750133037567139, "learning_rate": 1.484788029925187e-05, "loss": 0.321, "step": 20670 }, { "epoch": 5.157107231920199, "grad_norm": 4.861640930175781, "learning_rate": 1.4845386533665838e-05, "loss": 0.4132, "step": 20680 }, { "epoch": 5.159600997506234, "grad_norm": 4.334446907043457, "learning_rate": 1.4842892768079801e-05, "loss": 0.3547, "step": 20690 }, { "epoch": 5.162094763092269, "grad_norm": 5.8802618980407715, "learning_rate": 1.4840399002493767e-05, "loss": 0.4131, "step": 20700 }, { "epoch": 5.164588528678304, "grad_norm": 7.876018524169922, "learning_rate": 1.4837905236907732e-05, "loss": 0.3721, "step": 20710 }, { "epoch": 5.167082294264339, "grad_norm": 6.277840614318848, "learning_rate": 1.4835411471321697e-05, "loss": 0.4001, "step": 20720 }, { "epoch": 5.169576059850374, "grad_norm": 6.03588342666626, "learning_rate": 1.4832917705735661e-05, "loss": 0.3861, "step": 20730 }, { "epoch": 5.172069825436409, "grad_norm": 17.586366653442383, "learning_rate": 1.4830423940149628e-05, "loss": 0.3589, "step": 20740 }, { "epoch": 5.174563591022444, "grad_norm": 5.083337783813477, "learning_rate": 1.4827930174563592e-05, "loss": 0.3565, "step": 20750 }, { "epoch": 5.177057356608479, "grad_norm": 7.520929336547852, "learning_rate": 1.4825436408977559e-05, "loss": 0.3924, "step": 20760 }, { "epoch": 5.179551122194514, "grad_norm": 10.510026931762695, "learning_rate": 1.4822942643391522e-05, "loss": 0.3726, "step": 20770 }, { "epoch": 5.182044887780549, "grad_norm": 7.425801753997803, "learning_rate": 1.4820448877805487e-05, "loss": 0.3746, "step": 20780 }, { "epoch": 5.184538653366584, "grad_norm": 5.459587574005127, "learning_rate": 1.4817955112219453e-05, "loss": 0.3441, "step": 20790 }, { "epoch": 5.187032418952619, "grad_norm": 5.989594459533691, "learning_rate": 1.4815461346633418e-05, "loss": 0.43, "step": 20800 }, { "epoch": 5.1895261845386536, "grad_norm": 4.831539154052734, "learning_rate": 1.4812967581047383e-05, "loss": 0.4618, "step": 20810 }, { "epoch": 5.192019950124688, "grad_norm": 5.510180473327637, "learning_rate": 1.4810473815461349e-05, "loss": 0.3506, "step": 20820 }, { "epoch": 5.194513715710723, "grad_norm": 6.887777805328369, "learning_rate": 1.4807980049875312e-05, "loss": 0.3266, "step": 20830 }, { "epoch": 5.197007481296758, "grad_norm": 6.549706935882568, "learning_rate": 1.480548628428928e-05, "loss": 0.4234, "step": 20840 }, { "epoch": 5.199501246882793, "grad_norm": 4.920103073120117, "learning_rate": 1.4802992518703243e-05, "loss": 0.3687, "step": 20850 }, { "epoch": 5.201995012468828, "grad_norm": 6.742281436920166, "learning_rate": 1.4800498753117207e-05, "loss": 0.4335, "step": 20860 }, { "epoch": 5.204488778054863, "grad_norm": 11.688139915466309, "learning_rate": 1.4798004987531174e-05, "loss": 0.3504, "step": 20870 }, { "epoch": 5.206982543640898, "grad_norm": 7.675383567810059, "learning_rate": 1.4795511221945139e-05, "loss": 0.3989, "step": 20880 }, { "epoch": 5.2094763092269325, "grad_norm": 5.377547740936279, "learning_rate": 1.4793017456359104e-05, "loss": 0.4201, "step": 20890 }, { "epoch": 5.211970074812967, "grad_norm": 6.458754539489746, "learning_rate": 1.479052369077307e-05, "loss": 0.4486, "step": 20900 }, { "epoch": 5.214463840399002, "grad_norm": 6.1860551834106445, "learning_rate": 1.4788029925187033e-05, "loss": 0.3515, "step": 20910 }, { "epoch": 5.216957605985037, "grad_norm": 5.372089862823486, "learning_rate": 1.4785536159601e-05, "loss": 0.3294, "step": 20920 }, { "epoch": 5.219451371571072, "grad_norm": 7.561584949493408, "learning_rate": 1.4783042394014964e-05, "loss": 0.3811, "step": 20930 }, { "epoch": 5.221945137157107, "grad_norm": 7.178483963012695, "learning_rate": 1.4780548628428927e-05, "loss": 0.4044, "step": 20940 }, { "epoch": 5.224438902743142, "grad_norm": 4.487502098083496, "learning_rate": 1.4778054862842894e-05, "loss": 0.334, "step": 20950 }, { "epoch": 5.2269326683291775, "grad_norm": 6.48040246963501, "learning_rate": 1.4775561097256858e-05, "loss": 0.3088, "step": 20960 }, { "epoch": 5.229426433915212, "grad_norm": 6.952301979064941, "learning_rate": 1.4773067331670825e-05, "loss": 0.4753, "step": 20970 }, { "epoch": 5.231920199501247, "grad_norm": 5.854089260101318, "learning_rate": 1.4770573566084789e-05, "loss": 0.3515, "step": 20980 }, { "epoch": 5.234413965087282, "grad_norm": 5.807803630828857, "learning_rate": 1.4768079800498754e-05, "loss": 0.4071, "step": 20990 }, { "epoch": 5.236907730673317, "grad_norm": 8.918316841125488, "learning_rate": 1.4765586034912721e-05, "loss": 0.3717, "step": 21000 }, { "epoch": 5.239401496259352, "grad_norm": 5.254445552825928, "learning_rate": 1.4763092269326684e-05, "loss": 0.3259, "step": 21010 }, { "epoch": 5.241895261845387, "grad_norm": 5.781581401824951, "learning_rate": 1.4760598503740648e-05, "loss": 0.3459, "step": 21020 }, { "epoch": 5.2443890274314215, "grad_norm": 5.134096622467041, "learning_rate": 1.4758104738154615e-05, "loss": 0.377, "step": 21030 }, { "epoch": 5.246882793017456, "grad_norm": 3.860678195953369, "learning_rate": 1.4755610972568579e-05, "loss": 0.3596, "step": 21040 }, { "epoch": 5.249376558603491, "grad_norm": 5.5030083656311035, "learning_rate": 1.4753117206982546e-05, "loss": 0.3723, "step": 21050 }, { "epoch": 5.251870324189526, "grad_norm": 8.171494483947754, "learning_rate": 1.475062344139651e-05, "loss": 0.3947, "step": 21060 }, { "epoch": 5.254364089775561, "grad_norm": 5.602912425994873, "learning_rate": 1.4748129675810475e-05, "loss": 0.4279, "step": 21070 }, { "epoch": 5.256857855361596, "grad_norm": 6.1152472496032715, "learning_rate": 1.474563591022444e-05, "loss": 0.3779, "step": 21080 }, { "epoch": 5.259351620947631, "grad_norm": 5.26813268661499, "learning_rate": 1.4743142144638405e-05, "loss": 0.3819, "step": 21090 }, { "epoch": 5.261845386533666, "grad_norm": 4.3624491691589355, "learning_rate": 1.474064837905237e-05, "loss": 0.3442, "step": 21100 }, { "epoch": 5.2643391521197005, "grad_norm": 5.86679220199585, "learning_rate": 1.4738154613466336e-05, "loss": 0.3749, "step": 21110 }, { "epoch": 5.266832917705735, "grad_norm": 7.071092128753662, "learning_rate": 1.47356608478803e-05, "loss": 0.3797, "step": 21120 }, { "epoch": 5.26932668329177, "grad_norm": 5.458619117736816, "learning_rate": 1.4733167082294266e-05, "loss": 0.4415, "step": 21130 }, { "epoch": 5.271820448877805, "grad_norm": 5.8463544845581055, "learning_rate": 1.473067331670823e-05, "loss": 0.3957, "step": 21140 }, { "epoch": 5.274314214463841, "grad_norm": 9.386983871459961, "learning_rate": 1.4728179551122195e-05, "loss": 0.3172, "step": 21150 }, { "epoch": 5.276807980049876, "grad_norm": 5.963527679443359, "learning_rate": 1.472568578553616e-05, "loss": 0.3782, "step": 21160 }, { "epoch": 5.279301745635911, "grad_norm": 8.052948951721191, "learning_rate": 1.4723192019950126e-05, "loss": 0.4264, "step": 21170 }, { "epoch": 5.2817955112219455, "grad_norm": 9.773006439208984, "learning_rate": 1.4720698254364091e-05, "loss": 0.3145, "step": 21180 }, { "epoch": 5.28428927680798, "grad_norm": 6.8744330406188965, "learning_rate": 1.4718204488778057e-05, "loss": 0.3495, "step": 21190 }, { "epoch": 5.286783042394015, "grad_norm": 4.006411075592041, "learning_rate": 1.471571072319202e-05, "loss": 0.3099, "step": 21200 }, { "epoch": 5.28927680798005, "grad_norm": 5.908966541290283, "learning_rate": 1.4713216957605987e-05, "loss": 0.4145, "step": 21210 }, { "epoch": 5.291770573566085, "grad_norm": 5.07480001449585, "learning_rate": 1.4710723192019951e-05, "loss": 0.3299, "step": 21220 }, { "epoch": 5.29426433915212, "grad_norm": 5.2527995109558105, "learning_rate": 1.4708229426433916e-05, "loss": 0.4364, "step": 21230 }, { "epoch": 5.296758104738155, "grad_norm": 4.9071526527404785, "learning_rate": 1.4705735660847882e-05, "loss": 0.4228, "step": 21240 }, { "epoch": 5.2992518703241895, "grad_norm": 7.707085609436035, "learning_rate": 1.4703241895261847e-05, "loss": 0.3715, "step": 21250 }, { "epoch": 5.301745635910224, "grad_norm": 10.137189865112305, "learning_rate": 1.4700748129675812e-05, "loss": 0.3773, "step": 21260 }, { "epoch": 5.304239401496259, "grad_norm": 6.27385950088501, "learning_rate": 1.4698254364089777e-05, "loss": 0.4286, "step": 21270 }, { "epoch": 5.306733167082294, "grad_norm": 6.91831636428833, "learning_rate": 1.4695760598503741e-05, "loss": 0.3541, "step": 21280 }, { "epoch": 5.309226932668329, "grad_norm": 4.474343299865723, "learning_rate": 1.4693266832917708e-05, "loss": 0.341, "step": 21290 }, { "epoch": 5.311720698254364, "grad_norm": 6.206080913543701, "learning_rate": 1.4690773067331672e-05, "loss": 0.386, "step": 21300 }, { "epoch": 5.314214463840399, "grad_norm": 5.673043251037598, "learning_rate": 1.4688279301745639e-05, "loss": 0.3751, "step": 21310 }, { "epoch": 5.316708229426434, "grad_norm": 6.9373955726623535, "learning_rate": 1.4685785536159602e-05, "loss": 0.4205, "step": 21320 }, { "epoch": 5.3192019950124685, "grad_norm": 5.714535713195801, "learning_rate": 1.4683291770573566e-05, "loss": 0.4316, "step": 21330 }, { "epoch": 5.321695760598503, "grad_norm": 5.814162254333496, "learning_rate": 1.4680798004987533e-05, "loss": 0.4013, "step": 21340 }, { "epoch": 5.324189526184538, "grad_norm": 5.710855484008789, "learning_rate": 1.4678304239401498e-05, "loss": 0.381, "step": 21350 }, { "epoch": 5.326683291770574, "grad_norm": 4.715007781982422, "learning_rate": 1.4675810473815462e-05, "loss": 0.3644, "step": 21360 }, { "epoch": 5.329177057356609, "grad_norm": 6.894988536834717, "learning_rate": 1.4673316708229429e-05, "loss": 0.3702, "step": 21370 }, { "epoch": 5.331670822942644, "grad_norm": 5.765067100524902, "learning_rate": 1.4670822942643392e-05, "loss": 0.3833, "step": 21380 }, { "epoch": 5.334164588528679, "grad_norm": 5.794302940368652, "learning_rate": 1.466832917705736e-05, "loss": 0.3816, "step": 21390 }, { "epoch": 5.3366583541147135, "grad_norm": 7.873318195343018, "learning_rate": 1.4665835411471323e-05, "loss": 0.4156, "step": 21400 }, { "epoch": 5.339152119700748, "grad_norm": 6.067409038543701, "learning_rate": 1.4663341645885287e-05, "loss": 0.3925, "step": 21410 }, { "epoch": 5.341645885286783, "grad_norm": 4.952661514282227, "learning_rate": 1.4660847880299254e-05, "loss": 0.3794, "step": 21420 }, { "epoch": 5.344139650872818, "grad_norm": 8.906375885009766, "learning_rate": 1.4658354114713217e-05, "loss": 0.4743, "step": 21430 }, { "epoch": 5.346633416458853, "grad_norm": 5.5464630126953125, "learning_rate": 1.4655860349127183e-05, "loss": 0.3503, "step": 21440 }, { "epoch": 5.349127182044888, "grad_norm": 4.894265651702881, "learning_rate": 1.4653366583541148e-05, "loss": 0.3889, "step": 21450 }, { "epoch": 5.351620947630923, "grad_norm": 5.675085544586182, "learning_rate": 1.4650872817955113e-05, "loss": 0.4552, "step": 21460 }, { "epoch": 5.3541147132169575, "grad_norm": 5.673407077789307, "learning_rate": 1.464837905236908e-05, "loss": 0.4368, "step": 21470 }, { "epoch": 5.356608478802992, "grad_norm": 6.846253395080566, "learning_rate": 1.4645885286783044e-05, "loss": 0.3714, "step": 21480 }, { "epoch": 5.359102244389027, "grad_norm": 7.071938991546631, "learning_rate": 1.4643391521197007e-05, "loss": 0.453, "step": 21490 }, { "epoch": 5.361596009975062, "grad_norm": 5.694882392883301, "learning_rate": 1.4640897755610974e-05, "loss": 0.3199, "step": 21500 }, { "epoch": 5.364089775561097, "grad_norm": 6.255917072296143, "learning_rate": 1.4638403990024938e-05, "loss": 0.3442, "step": 21510 }, { "epoch": 5.366583541147132, "grad_norm": 5.694520950317383, "learning_rate": 1.4635910224438903e-05, "loss": 0.366, "step": 21520 }, { "epoch": 5.369077306733167, "grad_norm": 4.848544597625732, "learning_rate": 1.4633416458852869e-05, "loss": 0.3746, "step": 21530 }, { "epoch": 5.371571072319202, "grad_norm": 7.683907508850098, "learning_rate": 1.4630922693266834e-05, "loss": 0.401, "step": 21540 }, { "epoch": 5.374064837905237, "grad_norm": 7.421435832977295, "learning_rate": 1.46284289276808e-05, "loss": 0.4114, "step": 21550 }, { "epoch": 5.376558603491272, "grad_norm": 5.23777437210083, "learning_rate": 1.4625935162094765e-05, "loss": 0.3773, "step": 21560 }, { "epoch": 5.379052369077307, "grad_norm": 10.150872230529785, "learning_rate": 1.4623441396508728e-05, "loss": 0.4342, "step": 21570 }, { "epoch": 5.381546134663342, "grad_norm": 6.791860103607178, "learning_rate": 1.4621197007481298e-05, "loss": 0.3954, "step": 21580 }, { "epoch": 5.384039900249377, "grad_norm": 7.470459461212158, "learning_rate": 1.4618703241895262e-05, "loss": 0.3082, "step": 21590 }, { "epoch": 5.386533665835412, "grad_norm": 7.525590419769287, "learning_rate": 1.4616209476309229e-05, "loss": 0.4149, "step": 21600 }, { "epoch": 5.389027431421447, "grad_norm": 8.42238998413086, "learning_rate": 1.4613715710723192e-05, "loss": 0.3672, "step": 21610 }, { "epoch": 5.3915211970074814, "grad_norm": 5.296651363372803, "learning_rate": 1.4611221945137158e-05, "loss": 0.4263, "step": 21620 }, { "epoch": 5.394014962593516, "grad_norm": 4.646917343139648, "learning_rate": 1.4608728179551123e-05, "loss": 0.357, "step": 21630 }, { "epoch": 5.396508728179551, "grad_norm": 7.128079414367676, "learning_rate": 1.4606234413965088e-05, "loss": 0.3383, "step": 21640 }, { "epoch": 5.399002493765586, "grad_norm": 6.30106782913208, "learning_rate": 1.4603740648379054e-05, "loss": 0.4503, "step": 21650 }, { "epoch": 5.401496259351621, "grad_norm": 3.676443576812744, "learning_rate": 1.4601246882793019e-05, "loss": 0.3028, "step": 21660 }, { "epoch": 5.403990024937656, "grad_norm": 6.699869155883789, "learning_rate": 1.4598753117206983e-05, "loss": 0.3205, "step": 21670 }, { "epoch": 5.406483790523691, "grad_norm": 7.279172897338867, "learning_rate": 1.459625935162095e-05, "loss": 0.3449, "step": 21680 }, { "epoch": 5.4089775561097255, "grad_norm": 5.297791004180908, "learning_rate": 1.4593765586034913e-05, "loss": 0.3866, "step": 21690 }, { "epoch": 5.41147132169576, "grad_norm": 4.082034587860107, "learning_rate": 1.4591271820448879e-05, "loss": 0.3785, "step": 21700 }, { "epoch": 5.413965087281795, "grad_norm": 4.643633842468262, "learning_rate": 1.4588778054862844e-05, "loss": 0.3734, "step": 21710 }, { "epoch": 5.41645885286783, "grad_norm": 5.9492597579956055, "learning_rate": 1.458628428927681e-05, "loss": 0.4107, "step": 21720 }, { "epoch": 5.418952618453865, "grad_norm": 6.517092704772949, "learning_rate": 1.4583790523690774e-05, "loss": 0.4124, "step": 21730 }, { "epoch": 5.4214463840399, "grad_norm": 5.166103363037109, "learning_rate": 1.458129675810474e-05, "loss": 0.3856, "step": 21740 }, { "epoch": 5.423940149625935, "grad_norm": 7.804215431213379, "learning_rate": 1.4578802992518703e-05, "loss": 0.3939, "step": 21750 }, { "epoch": 5.42643391521197, "grad_norm": 9.893004417419434, "learning_rate": 1.457630922693267e-05, "loss": 0.3276, "step": 21760 }, { "epoch": 5.428927680798005, "grad_norm": 6.463005065917969, "learning_rate": 1.4573815461346634e-05, "loss": 0.3526, "step": 21770 }, { "epoch": 5.43142144638404, "grad_norm": 6.165153980255127, "learning_rate": 1.4571321695760601e-05, "loss": 0.4449, "step": 21780 }, { "epoch": 5.433915211970075, "grad_norm": 4.93517541885376, "learning_rate": 1.4568827930174565e-05, "loss": 0.328, "step": 21790 }, { "epoch": 5.43640897755611, "grad_norm": 5.643067359924316, "learning_rate": 1.456633416458853e-05, "loss": 0.4141, "step": 21800 }, { "epoch": 5.438902743142145, "grad_norm": 7.786581039428711, "learning_rate": 1.4563840399002495e-05, "loss": 0.4098, "step": 21810 }, { "epoch": 5.44139650872818, "grad_norm": 4.224101543426514, "learning_rate": 1.456134663341646e-05, "loss": 0.4034, "step": 21820 }, { "epoch": 5.443890274314215, "grad_norm": 8.033632278442383, "learning_rate": 1.4558852867830424e-05, "loss": 0.3504, "step": 21830 }, { "epoch": 5.446384039900249, "grad_norm": 6.644708156585693, "learning_rate": 1.4556359102244391e-05, "loss": 0.335, "step": 21840 }, { "epoch": 5.448877805486284, "grad_norm": 8.7880859375, "learning_rate": 1.4553865336658355e-05, "loss": 0.3907, "step": 21850 }, { "epoch": 5.451371571072319, "grad_norm": 8.938055038452148, "learning_rate": 1.4551371571072322e-05, "loss": 0.3681, "step": 21860 }, { "epoch": 5.453865336658354, "grad_norm": 5.602493762969971, "learning_rate": 1.4548877805486285e-05, "loss": 0.4231, "step": 21870 }, { "epoch": 5.456359102244389, "grad_norm": 6.839218616485596, "learning_rate": 1.4546384039900249e-05, "loss": 0.3974, "step": 21880 }, { "epoch": 5.458852867830424, "grad_norm": 6.831507682800293, "learning_rate": 1.4543890274314216e-05, "loss": 0.4212, "step": 21890 }, { "epoch": 5.461346633416459, "grad_norm": 6.885910987854004, "learning_rate": 1.4541396508728181e-05, "loss": 0.3996, "step": 21900 }, { "epoch": 5.4638403990024935, "grad_norm": 9.154892921447754, "learning_rate": 1.4538902743142145e-05, "loss": 0.4248, "step": 21910 }, { "epoch": 5.466334164588528, "grad_norm": 5.8164591789245605, "learning_rate": 1.4536408977556112e-05, "loss": 0.3866, "step": 21920 }, { "epoch": 5.468827930174563, "grad_norm": 7.783123016357422, "learning_rate": 1.4533915211970076e-05, "loss": 0.3988, "step": 21930 }, { "epoch": 5.471321695760598, "grad_norm": 8.292793273925781, "learning_rate": 1.4531421446384043e-05, "loss": 0.5072, "step": 21940 }, { "epoch": 5.473815461346634, "grad_norm": 8.550527572631836, "learning_rate": 1.4528927680798006e-05, "loss": 0.4445, "step": 21950 }, { "epoch": 5.476309226932669, "grad_norm": 7.110447883605957, "learning_rate": 1.452643391521197e-05, "loss": 0.3796, "step": 21960 }, { "epoch": 5.478802992518704, "grad_norm": 5.753549575805664, "learning_rate": 1.4523940149625937e-05, "loss": 0.3745, "step": 21970 }, { "epoch": 5.4812967581047385, "grad_norm": 12.874857902526855, "learning_rate": 1.45214463840399e-05, "loss": 0.3587, "step": 21980 }, { "epoch": 5.483790523690773, "grad_norm": 7.264901161193848, "learning_rate": 1.4518952618453867e-05, "loss": 0.3727, "step": 21990 }, { "epoch": 5.486284289276808, "grad_norm": 6.658206939697266, "learning_rate": 1.4516458852867831e-05, "loss": 0.4147, "step": 22000 }, { "epoch": 5.488778054862843, "grad_norm": 8.003405570983887, "learning_rate": 1.4513965087281796e-05, "loss": 0.4492, "step": 22010 }, { "epoch": 5.491271820448878, "grad_norm": 8.046963691711426, "learning_rate": 1.4511471321695763e-05, "loss": 0.3884, "step": 22020 }, { "epoch": 5.493765586034913, "grad_norm": 4.5463385581970215, "learning_rate": 1.4508977556109727e-05, "loss": 0.3515, "step": 22030 }, { "epoch": 5.496259351620948, "grad_norm": 7.268316268920898, "learning_rate": 1.450648379052369e-05, "loss": 0.3548, "step": 22040 }, { "epoch": 5.498753117206983, "grad_norm": 6.258470058441162, "learning_rate": 1.4503990024937658e-05, "loss": 0.3813, "step": 22050 }, { "epoch": 5.501246882793017, "grad_norm": 5.818022727966309, "learning_rate": 1.4501496259351621e-05, "loss": 0.4218, "step": 22060 }, { "epoch": 5.503740648379052, "grad_norm": 5.728906631469727, "learning_rate": 1.4499002493765588e-05, "loss": 0.3664, "step": 22070 }, { "epoch": 5.506234413965087, "grad_norm": 6.165427207946777, "learning_rate": 1.4496508728179552e-05, "loss": 0.3949, "step": 22080 }, { "epoch": 5.508728179551122, "grad_norm": 6.8010783195495605, "learning_rate": 1.4494014962593517e-05, "loss": 0.3695, "step": 22090 }, { "epoch": 5.511221945137157, "grad_norm": 5.022993087768555, "learning_rate": 1.4491521197007482e-05, "loss": 0.2954, "step": 22100 }, { "epoch": 5.513715710723192, "grad_norm": 8.099823951721191, "learning_rate": 1.4489027431421448e-05, "loss": 0.3272, "step": 22110 }, { "epoch": 5.516209476309227, "grad_norm": 7.62611198425293, "learning_rate": 1.4486533665835411e-05, "loss": 0.4664, "step": 22120 }, { "epoch": 5.5187032418952615, "grad_norm": 9.035438537597656, "learning_rate": 1.4484039900249378e-05, "loss": 0.4082, "step": 22130 }, { "epoch": 5.521197007481296, "grad_norm": 5.160449028015137, "learning_rate": 1.4481546134663342e-05, "loss": 0.4821, "step": 22140 }, { "epoch": 5.523690773067331, "grad_norm": 5.530974388122559, "learning_rate": 1.4479052369077309e-05, "loss": 0.4418, "step": 22150 }, { "epoch": 5.526184538653366, "grad_norm": 7.125683784484863, "learning_rate": 1.4476558603491273e-05, "loss": 0.358, "step": 22160 }, { "epoch": 5.528678304239402, "grad_norm": 5.76719331741333, "learning_rate": 1.4474064837905238e-05, "loss": 0.3801, "step": 22170 }, { "epoch": 5.531172069825437, "grad_norm": 5.448780536651611, "learning_rate": 1.4471571072319203e-05, "loss": 0.3539, "step": 22180 }, { "epoch": 5.533665835411472, "grad_norm": 5.663511753082275, "learning_rate": 1.4469077306733169e-05, "loss": 0.451, "step": 22190 }, { "epoch": 5.5361596009975065, "grad_norm": 5.449460983276367, "learning_rate": 1.4466583541147132e-05, "loss": 0.3879, "step": 22200 }, { "epoch": 5.538653366583541, "grad_norm": 6.016726493835449, "learning_rate": 1.44640897755611e-05, "loss": 0.4061, "step": 22210 }, { "epoch": 5.541147132169576, "grad_norm": 5.542261123657227, "learning_rate": 1.4461596009975063e-05, "loss": 0.3877, "step": 22220 }, { "epoch": 5.543640897755611, "grad_norm": 8.105876922607422, "learning_rate": 1.445910224438903e-05, "loss": 0.3574, "step": 22230 }, { "epoch": 5.546134663341646, "grad_norm": 6.1067914962768555, "learning_rate": 1.4456608478802993e-05, "loss": 0.4374, "step": 22240 }, { "epoch": 5.548628428927681, "grad_norm": 7.3289265632629395, "learning_rate": 1.4454114713216959e-05, "loss": 0.3598, "step": 22250 }, { "epoch": 5.551122194513716, "grad_norm": 6.727115154266357, "learning_rate": 1.4451620947630924e-05, "loss": 0.4083, "step": 22260 }, { "epoch": 5.553615960099751, "grad_norm": 9.422988891601562, "learning_rate": 1.444912718204489e-05, "loss": 0.4603, "step": 22270 }, { "epoch": 5.556109725685785, "grad_norm": 8.968878746032715, "learning_rate": 1.4446633416458855e-05, "loss": 0.4231, "step": 22280 }, { "epoch": 5.55860349127182, "grad_norm": 5.541744232177734, "learning_rate": 1.444413965087282e-05, "loss": 0.3105, "step": 22290 }, { "epoch": 5.561097256857855, "grad_norm": 5.500674724578857, "learning_rate": 1.4441645885286784e-05, "loss": 0.3357, "step": 22300 }, { "epoch": 5.56359102244389, "grad_norm": 9.031492233276367, "learning_rate": 1.443915211970075e-05, "loss": 0.582, "step": 22310 }, { "epoch": 5.566084788029925, "grad_norm": 9.218156814575195, "learning_rate": 1.4436658354114714e-05, "loss": 0.3842, "step": 22320 }, { "epoch": 5.56857855361596, "grad_norm": 7.594570636749268, "learning_rate": 1.4434164588528678e-05, "loss": 0.4222, "step": 22330 }, { "epoch": 5.571072319201995, "grad_norm": 5.0406270027160645, "learning_rate": 1.4431670822942645e-05, "loss": 0.4213, "step": 22340 }, { "epoch": 5.57356608478803, "grad_norm": 5.456195831298828, "learning_rate": 1.4429177057356608e-05, "loss": 0.3552, "step": 22350 }, { "epoch": 5.576059850374065, "grad_norm": 6.551260948181152, "learning_rate": 1.4426683291770575e-05, "loss": 0.5219, "step": 22360 }, { "epoch": 5.5785536159601, "grad_norm": 6.80320405960083, "learning_rate": 1.442418952618454e-05, "loss": 0.3416, "step": 22370 }, { "epoch": 5.581047381546135, "grad_norm": 5.928180694580078, "learning_rate": 1.4421695760598504e-05, "loss": 0.3299, "step": 22380 }, { "epoch": 5.58354114713217, "grad_norm": 4.945070266723633, "learning_rate": 1.4419201995012471e-05, "loss": 0.3556, "step": 22390 }, { "epoch": 5.586034912718205, "grad_norm": 6.342123031616211, "learning_rate": 1.4416708229426435e-05, "loss": 0.427, "step": 22400 }, { "epoch": 5.58852867830424, "grad_norm": 6.5954060554504395, "learning_rate": 1.4414214463840399e-05, "loss": 0.4065, "step": 22410 }, { "epoch": 5.5910224438902745, "grad_norm": 5.893113136291504, "learning_rate": 1.4411720698254366e-05, "loss": 0.4155, "step": 22420 }, { "epoch": 5.593516209476309, "grad_norm": 6.298617839813232, "learning_rate": 1.440922693266833e-05, "loss": 0.3652, "step": 22430 }, { "epoch": 5.596009975062344, "grad_norm": 5.96094274520874, "learning_rate": 1.4406733167082296e-05, "loss": 0.4139, "step": 22440 }, { "epoch": 5.598503740648379, "grad_norm": 5.680459022521973, "learning_rate": 1.440423940149626e-05, "loss": 0.3219, "step": 22450 }, { "epoch": 5.600997506234414, "grad_norm": 6.752685546875, "learning_rate": 1.4401745635910225e-05, "loss": 0.3695, "step": 22460 }, { "epoch": 5.603491271820449, "grad_norm": 6.382385730743408, "learning_rate": 1.439925187032419e-05, "loss": 0.3797, "step": 22470 }, { "epoch": 5.605985037406484, "grad_norm": 7.606115341186523, "learning_rate": 1.4396758104738156e-05, "loss": 0.3709, "step": 22480 }, { "epoch": 5.6084788029925186, "grad_norm": 9.14214038848877, "learning_rate": 1.4394264339152123e-05, "loss": 0.3548, "step": 22490 }, { "epoch": 5.610972568578553, "grad_norm": 4.893841743469238, "learning_rate": 1.4391770573566086e-05, "loss": 0.389, "step": 22500 }, { "epoch": 5.613466334164588, "grad_norm": 7.275263786315918, "learning_rate": 1.438927680798005e-05, "loss": 0.3461, "step": 22510 }, { "epoch": 5.615960099750623, "grad_norm": 5.975040435791016, "learning_rate": 1.4386783042394017e-05, "loss": 0.4767, "step": 22520 }, { "epoch": 5.618453865336658, "grad_norm": 6.58006477355957, "learning_rate": 1.438428927680798e-05, "loss": 0.3466, "step": 22530 }, { "epoch": 5.620947630922693, "grad_norm": 5.844588756561279, "learning_rate": 1.4381795511221946e-05, "loss": 0.3944, "step": 22540 }, { "epoch": 5.623441396508728, "grad_norm": 12.84826374053955, "learning_rate": 1.4379301745635911e-05, "loss": 0.3679, "step": 22550 }, { "epoch": 5.625935162094763, "grad_norm": 5.966850757598877, "learning_rate": 1.4376807980049877e-05, "loss": 0.3632, "step": 22560 }, { "epoch": 5.628428927680798, "grad_norm": 5.235140323638916, "learning_rate": 1.4374314214463842e-05, "loss": 0.3889, "step": 22570 }, { "epoch": 5.630922693266833, "grad_norm": 5.83263635635376, "learning_rate": 1.4371820448877807e-05, "loss": 0.3315, "step": 22580 }, { "epoch": 5.633416458852868, "grad_norm": 4.937515735626221, "learning_rate": 1.436932668329177e-05, "loss": 0.3964, "step": 22590 }, { "epoch": 5.635910224438903, "grad_norm": 6.0770392417907715, "learning_rate": 1.4366832917705738e-05, "loss": 0.3959, "step": 22600 }, { "epoch": 5.638403990024938, "grad_norm": 7.637709140777588, "learning_rate": 1.4364339152119701e-05, "loss": 0.402, "step": 22610 }, { "epoch": 5.640897755610973, "grad_norm": 5.632735252380371, "learning_rate": 1.4361845386533667e-05, "loss": 0.3721, "step": 22620 }, { "epoch": 5.643391521197008, "grad_norm": 7.393962860107422, "learning_rate": 1.4359351620947632e-05, "loss": 0.3827, "step": 22630 }, { "epoch": 5.6458852867830425, "grad_norm": 7.217513561248779, "learning_rate": 1.4356857855361597e-05, "loss": 0.3942, "step": 22640 }, { "epoch": 5.648379052369077, "grad_norm": 8.804829597473145, "learning_rate": 1.4354364089775563e-05, "loss": 0.4314, "step": 22650 }, { "epoch": 5.650872817955112, "grad_norm": 9.382487297058105, "learning_rate": 1.4351870324189528e-05, "loss": 0.552, "step": 22660 }, { "epoch": 5.653366583541147, "grad_norm": 4.75145959854126, "learning_rate": 1.4349376558603492e-05, "loss": 0.3351, "step": 22670 }, { "epoch": 5.655860349127182, "grad_norm": 5.727991104125977, "learning_rate": 1.4346882793017459e-05, "loss": 0.3271, "step": 22680 }, { "epoch": 5.658354114713217, "grad_norm": 6.325864791870117, "learning_rate": 1.4344389027431422e-05, "loss": 0.3485, "step": 22690 }, { "epoch": 5.660847880299252, "grad_norm": 6.61782169342041, "learning_rate": 1.4341895261845386e-05, "loss": 0.3918, "step": 22700 }, { "epoch": 5.6633416458852865, "grad_norm": 8.966527938842773, "learning_rate": 1.4339401496259353e-05, "loss": 0.4352, "step": 22710 }, { "epoch": 5.665835411471321, "grad_norm": 3.9728758335113525, "learning_rate": 1.4336907730673318e-05, "loss": 0.3993, "step": 22720 }, { "epoch": 5.668329177057356, "grad_norm": 4.767123222351074, "learning_rate": 1.4334413965087283e-05, "loss": 0.3053, "step": 22730 }, { "epoch": 5.670822942643391, "grad_norm": 5.514127254486084, "learning_rate": 1.4331920199501249e-05, "loss": 0.3162, "step": 22740 }, { "epoch": 5.673316708229427, "grad_norm": 5.728281497955322, "learning_rate": 1.4329426433915212e-05, "loss": 0.3697, "step": 22750 }, { "epoch": 5.675810473815462, "grad_norm": 5.388779640197754, "learning_rate": 1.432693266832918e-05, "loss": 0.3806, "step": 22760 }, { "epoch": 5.678304239401497, "grad_norm": 5.20200252532959, "learning_rate": 1.4324438902743143e-05, "loss": 0.3255, "step": 22770 }, { "epoch": 5.6807980049875315, "grad_norm": 6.948160648345947, "learning_rate": 1.432194513715711e-05, "loss": 0.3598, "step": 22780 }, { "epoch": 5.683291770573566, "grad_norm": 5.765408515930176, "learning_rate": 1.4319451371571074e-05, "loss": 0.4121, "step": 22790 }, { "epoch": 5.685785536159601, "grad_norm": 11.699581146240234, "learning_rate": 1.4316957605985037e-05, "loss": 0.4094, "step": 22800 }, { "epoch": 5.688279301745636, "grad_norm": 5.797543048858643, "learning_rate": 1.4314463840399004e-05, "loss": 0.3325, "step": 22810 }, { "epoch": 5.690773067331671, "grad_norm": 5.791079044342041, "learning_rate": 1.4311970074812968e-05, "loss": 0.3589, "step": 22820 }, { "epoch": 5.693266832917706, "grad_norm": 5.654359817504883, "learning_rate": 1.4309476309226933e-05, "loss": 0.362, "step": 22830 }, { "epoch": 5.695760598503741, "grad_norm": 5.24976110458374, "learning_rate": 1.43069825436409e-05, "loss": 0.3756, "step": 22840 }, { "epoch": 5.698254364089776, "grad_norm": 6.083651542663574, "learning_rate": 1.4304488778054864e-05, "loss": 0.3565, "step": 22850 }, { "epoch": 5.7007481296758105, "grad_norm": 6.845718860626221, "learning_rate": 1.430199501246883e-05, "loss": 0.3706, "step": 22860 }, { "epoch": 5.703241895261845, "grad_norm": 4.934425354003906, "learning_rate": 1.4299501246882794e-05, "loss": 0.3646, "step": 22870 }, { "epoch": 5.70573566084788, "grad_norm": 9.184797286987305, "learning_rate": 1.4297007481296758e-05, "loss": 0.4083, "step": 22880 }, { "epoch": 5.708229426433915, "grad_norm": 9.234002113342285, "learning_rate": 1.4294513715710725e-05, "loss": 0.3188, "step": 22890 }, { "epoch": 5.71072319201995, "grad_norm": 7.473569393157959, "learning_rate": 1.4292019950124689e-05, "loss": 0.4069, "step": 22900 }, { "epoch": 5.713216957605985, "grad_norm": 5.984492301940918, "learning_rate": 1.4289526184538654e-05, "loss": 0.3863, "step": 22910 }, { "epoch": 5.71571072319202, "grad_norm": 8.733806610107422, "learning_rate": 1.428703241895262e-05, "loss": 0.4223, "step": 22920 }, { "epoch": 5.7182044887780545, "grad_norm": 7.522699356079102, "learning_rate": 1.4284538653366585e-05, "loss": 0.402, "step": 22930 }, { "epoch": 5.720698254364089, "grad_norm": 6.460349082946777, "learning_rate": 1.428204488778055e-05, "loss": 0.3046, "step": 22940 }, { "epoch": 5.723192019950124, "grad_norm": 6.2640204429626465, "learning_rate": 1.4279551122194515e-05, "loss": 0.3377, "step": 22950 }, { "epoch": 5.725685785536159, "grad_norm": 3.072329521179199, "learning_rate": 1.4277057356608479e-05, "loss": 0.3212, "step": 22960 }, { "epoch": 5.728179551122195, "grad_norm": 5.633548259735107, "learning_rate": 1.4274563591022446e-05, "loss": 0.3203, "step": 22970 }, { "epoch": 5.73067331670823, "grad_norm": 8.383296012878418, "learning_rate": 1.427206982543641e-05, "loss": 0.4422, "step": 22980 }, { "epoch": 5.733167082294265, "grad_norm": 7.479531764984131, "learning_rate": 1.4269576059850376e-05, "loss": 0.3443, "step": 22990 }, { "epoch": 5.7356608478802995, "grad_norm": 8.796760559082031, "learning_rate": 1.426708229426434e-05, "loss": 0.4097, "step": 23000 }, { "epoch": 5.738154613466334, "grad_norm": 8.372130393981934, "learning_rate": 1.4264588528678305e-05, "loss": 0.4402, "step": 23010 }, { "epoch": 5.740648379052369, "grad_norm": 9.008794784545898, "learning_rate": 1.426209476309227e-05, "loss": 0.4109, "step": 23020 }, { "epoch": 5.743142144638404, "grad_norm": 8.82776927947998, "learning_rate": 1.4259600997506236e-05, "loss": 0.3458, "step": 23030 }, { "epoch": 5.745635910224439, "grad_norm": 6.604305267333984, "learning_rate": 1.42571072319202e-05, "loss": 0.3969, "step": 23040 }, { "epoch": 5.748129675810474, "grad_norm": 6.345452308654785, "learning_rate": 1.4254613466334167e-05, "loss": 0.3854, "step": 23050 }, { "epoch": 5.750623441396509, "grad_norm": 8.280367851257324, "learning_rate": 1.425211970074813e-05, "loss": 0.395, "step": 23060 }, { "epoch": 5.753117206982544, "grad_norm": 4.906089782714844, "learning_rate": 1.4249625935162097e-05, "loss": 0.3929, "step": 23070 }, { "epoch": 5.7556109725685785, "grad_norm": 8.1076021194458, "learning_rate": 1.424713216957606e-05, "loss": 0.3963, "step": 23080 }, { "epoch": 5.758104738154613, "grad_norm": 5.047501087188721, "learning_rate": 1.4244638403990026e-05, "loss": 0.354, "step": 23090 }, { "epoch": 5.760598503740648, "grad_norm": 5.581216812133789, "learning_rate": 1.4242144638403991e-05, "loss": 0.3966, "step": 23100 }, { "epoch": 5.763092269326683, "grad_norm": 5.1039347648620605, "learning_rate": 1.4239650872817957e-05, "loss": 0.3249, "step": 23110 }, { "epoch": 5.765586034912718, "grad_norm": 7.597653388977051, "learning_rate": 1.423715710723192e-05, "loss": 0.369, "step": 23120 }, { "epoch": 5.768079800498753, "grad_norm": 4.918848037719727, "learning_rate": 1.4234663341645887e-05, "loss": 0.394, "step": 23130 }, { "epoch": 5.770573566084788, "grad_norm": 7.739204406738281, "learning_rate": 1.4232169576059851e-05, "loss": 0.3303, "step": 23140 }, { "epoch": 5.773067331670823, "grad_norm": 6.075505256652832, "learning_rate": 1.4229675810473818e-05, "loss": 0.3693, "step": 23150 }, { "epoch": 5.775561097256858, "grad_norm": 10.600066184997559, "learning_rate": 1.4227182044887782e-05, "loss": 0.3751, "step": 23160 }, { "epoch": 5.778054862842893, "grad_norm": 7.901242733001709, "learning_rate": 1.4224688279301745e-05, "loss": 0.3912, "step": 23170 }, { "epoch": 5.780548628428928, "grad_norm": 7.340981483459473, "learning_rate": 1.4222194513715712e-05, "loss": 0.437, "step": 23180 }, { "epoch": 5.783042394014963, "grad_norm": 6.0289626121521, "learning_rate": 1.4219700748129678e-05, "loss": 0.4226, "step": 23190 }, { "epoch": 5.785536159600998, "grad_norm": 6.679872989654541, "learning_rate": 1.4217206982543641e-05, "loss": 0.4471, "step": 23200 }, { "epoch": 5.788029925187033, "grad_norm": 7.326256275177002, "learning_rate": 1.4214713216957608e-05, "loss": 0.39, "step": 23210 }, { "epoch": 5.7905236907730675, "grad_norm": 5.042798042297363, "learning_rate": 1.4212219451371572e-05, "loss": 0.3997, "step": 23220 }, { "epoch": 5.793017456359102, "grad_norm": 6.334157943725586, "learning_rate": 1.4209725685785539e-05, "loss": 0.3387, "step": 23230 }, { "epoch": 5.795511221945137, "grad_norm": 4.981834888458252, "learning_rate": 1.4207231920199502e-05, "loss": 0.5182, "step": 23240 }, { "epoch": 5.798004987531172, "grad_norm": 4.364705562591553, "learning_rate": 1.4204738154613466e-05, "loss": 0.3756, "step": 23250 }, { "epoch": 5.800498753117207, "grad_norm": 5.2104268074035645, "learning_rate": 1.4202244389027433e-05, "loss": 0.4008, "step": 23260 }, { "epoch": 5.802992518703242, "grad_norm": 6.920286178588867, "learning_rate": 1.4199750623441397e-05, "loss": 0.4209, "step": 23270 }, { "epoch": 5.805486284289277, "grad_norm": 7.996755599975586, "learning_rate": 1.4197256857855364e-05, "loss": 0.4087, "step": 23280 }, { "epoch": 5.807980049875312, "grad_norm": 5.335063934326172, "learning_rate": 1.4194763092269327e-05, "loss": 0.3644, "step": 23290 }, { "epoch": 5.8104738154613464, "grad_norm": 7.114324569702148, "learning_rate": 1.4192269326683293e-05, "loss": 0.3954, "step": 23300 }, { "epoch": 5.812967581047381, "grad_norm": 4.345088005065918, "learning_rate": 1.418977556109726e-05, "loss": 0.4041, "step": 23310 }, { "epoch": 5.815461346633416, "grad_norm": 6.6690287590026855, "learning_rate": 1.4187281795511223e-05, "loss": 0.3639, "step": 23320 }, { "epoch": 5.817955112219451, "grad_norm": 7.393259048461914, "learning_rate": 1.4184788029925187e-05, "loss": 0.449, "step": 23330 }, { "epoch": 5.820448877805486, "grad_norm": 6.419971466064453, "learning_rate": 1.4182294264339154e-05, "loss": 0.4075, "step": 23340 }, { "epoch": 5.822942643391521, "grad_norm": 5.764027118682861, "learning_rate": 1.4179800498753117e-05, "loss": 0.3845, "step": 23350 }, { "epoch": 5.825436408977556, "grad_norm": 7.242391109466553, "learning_rate": 1.4177306733167084e-05, "loss": 0.3426, "step": 23360 }, { "epoch": 5.8279301745635905, "grad_norm": 7.269883155822754, "learning_rate": 1.4174812967581048e-05, "loss": 0.3911, "step": 23370 }, { "epoch": 5.830423940149626, "grad_norm": 4.136662006378174, "learning_rate": 1.4172319201995013e-05, "loss": 0.5394, "step": 23380 }, { "epoch": 5.832917705735661, "grad_norm": 7.696037769317627, "learning_rate": 1.4169825436408979e-05, "loss": 0.3823, "step": 23390 }, { "epoch": 5.835411471321696, "grad_norm": 8.662434577941895, "learning_rate": 1.4167331670822944e-05, "loss": 0.448, "step": 23400 }, { "epoch": 5.837905236907731, "grad_norm": 7.484658718109131, "learning_rate": 1.4164837905236908e-05, "loss": 0.4121, "step": 23410 }, { "epoch": 5.840399002493766, "grad_norm": 4.754652976989746, "learning_rate": 1.4162344139650875e-05, "loss": 0.3833, "step": 23420 }, { "epoch": 5.842892768079801, "grad_norm": 7.560851573944092, "learning_rate": 1.4159850374064838e-05, "loss": 0.4049, "step": 23430 }, { "epoch": 5.8453865336658355, "grad_norm": 7.429394721984863, "learning_rate": 1.4157356608478805e-05, "loss": 0.409, "step": 23440 }, { "epoch": 5.84788029925187, "grad_norm": 7.763380527496338, "learning_rate": 1.4154862842892769e-05, "loss": 0.3994, "step": 23450 }, { "epoch": 5.850374064837905, "grad_norm": 6.119085311889648, "learning_rate": 1.4152369077306734e-05, "loss": 0.4111, "step": 23460 }, { "epoch": 5.85286783042394, "grad_norm": 7.172895431518555, "learning_rate": 1.41498753117207e-05, "loss": 0.4037, "step": 23470 }, { "epoch": 5.855361596009975, "grad_norm": 4.939626693725586, "learning_rate": 1.4147381546134665e-05, "loss": 0.4928, "step": 23480 }, { "epoch": 5.85785536159601, "grad_norm": 5.627965450286865, "learning_rate": 1.414488778054863e-05, "loss": 0.4169, "step": 23490 }, { "epoch": 5.860349127182045, "grad_norm": 7.699008464813232, "learning_rate": 1.4142394014962595e-05, "loss": 0.3669, "step": 23500 }, { "epoch": 5.86284289276808, "grad_norm": 8.648443222045898, "learning_rate": 1.4139900249376559e-05, "loss": 0.3886, "step": 23510 }, { "epoch": 5.865336658354114, "grad_norm": 6.288234710693359, "learning_rate": 1.4137406483790526e-05, "loss": 0.4166, "step": 23520 }, { "epoch": 5.867830423940149, "grad_norm": 7.797109603881836, "learning_rate": 1.413491271820449e-05, "loss": 0.3971, "step": 23530 }, { "epoch": 5.870324189526184, "grad_norm": 5.520691394805908, "learning_rate": 1.4132418952618455e-05, "loss": 0.4536, "step": 23540 }, { "epoch": 5.87281795511222, "grad_norm": 4.677943229675293, "learning_rate": 1.412992518703242e-05, "loss": 0.3958, "step": 23550 }, { "epoch": 5.875311720698255, "grad_norm": 5.682249069213867, "learning_rate": 1.4127431421446386e-05, "loss": 0.3522, "step": 23560 }, { "epoch": 5.87780548628429, "grad_norm": 6.83863639831543, "learning_rate": 1.412493765586035e-05, "loss": 0.3656, "step": 23570 }, { "epoch": 5.8802992518703245, "grad_norm": 6.313279628753662, "learning_rate": 1.4122443890274316e-05, "loss": 0.3988, "step": 23580 }, { "epoch": 5.882793017456359, "grad_norm": 7.157898426055908, "learning_rate": 1.411995012468828e-05, "loss": 0.401, "step": 23590 }, { "epoch": 5.885286783042394, "grad_norm": 6.3576531410217285, "learning_rate": 1.4117456359102247e-05, "loss": 0.4291, "step": 23600 }, { "epoch": 5.887780548628429, "grad_norm": 6.434720993041992, "learning_rate": 1.411496259351621e-05, "loss": 0.3876, "step": 23610 }, { "epoch": 5.890274314214464, "grad_norm": 6.252552509307861, "learning_rate": 1.4112468827930174e-05, "loss": 0.3471, "step": 23620 }, { "epoch": 5.892768079800499, "grad_norm": 3.91813588142395, "learning_rate": 1.4109975062344141e-05, "loss": 0.3224, "step": 23630 }, { "epoch": 5.895261845386534, "grad_norm": 6.144435882568359, "learning_rate": 1.4107481296758105e-05, "loss": 0.4012, "step": 23640 }, { "epoch": 5.897755610972569, "grad_norm": 6.0497565269470215, "learning_rate": 1.4104987531172072e-05, "loss": 0.4003, "step": 23650 }, { "epoch": 5.9002493765586035, "grad_norm": 4.732110500335693, "learning_rate": 1.4102493765586037e-05, "loss": 0.419, "step": 23660 }, { "epoch": 5.902743142144638, "grad_norm": 13.11367416381836, "learning_rate": 1.41e-05, "loss": 0.3991, "step": 23670 }, { "epoch": 5.905236907730673, "grad_norm": 5.816923141479492, "learning_rate": 1.4097506234413968e-05, "loss": 0.34, "step": 23680 }, { "epoch": 5.907730673316708, "grad_norm": 7.775258541107178, "learning_rate": 1.4095012468827931e-05, "loss": 0.3784, "step": 23690 }, { "epoch": 5.910224438902743, "grad_norm": 5.4291486740112305, "learning_rate": 1.4092518703241898e-05, "loss": 0.404, "step": 23700 }, { "epoch": 5.912718204488778, "grad_norm": 7.315398216247559, "learning_rate": 1.4090024937655862e-05, "loss": 0.4278, "step": 23710 }, { "epoch": 5.915211970074813, "grad_norm": 5.724961757659912, "learning_rate": 1.4087531172069825e-05, "loss": 0.4336, "step": 23720 }, { "epoch": 5.917705735660848, "grad_norm": 8.703821182250977, "learning_rate": 1.4085037406483792e-05, "loss": 0.4091, "step": 23730 }, { "epoch": 5.920199501246882, "grad_norm": 6.634580612182617, "learning_rate": 1.4082543640897756e-05, "loss": 0.3738, "step": 23740 }, { "epoch": 5.922693266832917, "grad_norm": 7.790684223175049, "learning_rate": 1.4080049875311721e-05, "loss": 0.4328, "step": 23750 }, { "epoch": 5.925187032418952, "grad_norm": 6.3984808921813965, "learning_rate": 1.4077556109725687e-05, "loss": 0.3641, "step": 23760 }, { "epoch": 5.927680798004987, "grad_norm": 5.913004398345947, "learning_rate": 1.4075062344139652e-05, "loss": 0.3595, "step": 23770 }, { "epoch": 5.930174563591023, "grad_norm": 9.154977798461914, "learning_rate": 1.4072568578553619e-05, "loss": 0.3679, "step": 23780 }, { "epoch": 5.932668329177058, "grad_norm": 6.98581075668335, "learning_rate": 1.4070074812967583e-05, "loss": 0.3907, "step": 23790 }, { "epoch": 5.9351620947630925, "grad_norm": 6.550581932067871, "learning_rate": 1.4067581047381546e-05, "loss": 0.4143, "step": 23800 }, { "epoch": 5.937655860349127, "grad_norm": 7.637538909912109, "learning_rate": 1.4065087281795513e-05, "loss": 0.3854, "step": 23810 }, { "epoch": 5.940149625935162, "grad_norm": 8.336382865905762, "learning_rate": 1.4062593516209477e-05, "loss": 0.3903, "step": 23820 }, { "epoch": 5.942643391521197, "grad_norm": 5.107188701629639, "learning_rate": 1.4060099750623442e-05, "loss": 0.3879, "step": 23830 }, { "epoch": 5.945137157107232, "grad_norm": 5.9631571769714355, "learning_rate": 1.4057605985037407e-05, "loss": 0.4691, "step": 23840 }, { "epoch": 5.947630922693267, "grad_norm": 6.080106258392334, "learning_rate": 1.4055112219451373e-05, "loss": 0.4799, "step": 23850 }, { "epoch": 5.950124688279302, "grad_norm": 9.913834571838379, "learning_rate": 1.4052618453865338e-05, "loss": 0.3768, "step": 23860 }, { "epoch": 5.952618453865337, "grad_norm": 6.052908420562744, "learning_rate": 1.4050124688279303e-05, "loss": 0.3323, "step": 23870 }, { "epoch": 5.9551122194513715, "grad_norm": 8.074835777282715, "learning_rate": 1.4047630922693267e-05, "loss": 0.4003, "step": 23880 }, { "epoch": 5.957605985037406, "grad_norm": 4.824693202972412, "learning_rate": 1.4045137157107234e-05, "loss": 0.3868, "step": 23890 }, { "epoch": 5.960099750623441, "grad_norm": 8.275466918945312, "learning_rate": 1.4042643391521198e-05, "loss": 0.3944, "step": 23900 }, { "epoch": 5.962593516209476, "grad_norm": 7.059325695037842, "learning_rate": 1.4040149625935163e-05, "loss": 0.4608, "step": 23910 }, { "epoch": 5.965087281795511, "grad_norm": 6.65533447265625, "learning_rate": 1.4037655860349128e-05, "loss": 0.3445, "step": 23920 }, { "epoch": 5.967581047381546, "grad_norm": 6.2728590965271, "learning_rate": 1.4035162094763093e-05, "loss": 0.3764, "step": 23930 }, { "epoch": 5.970074812967581, "grad_norm": 8.21418571472168, "learning_rate": 1.4032668329177059e-05, "loss": 0.3982, "step": 23940 }, { "epoch": 5.9725685785536164, "grad_norm": 6.642632007598877, "learning_rate": 1.4030423940149627e-05, "loss": 0.3619, "step": 23950 }, { "epoch": 5.975062344139651, "grad_norm": 7.222472667694092, "learning_rate": 1.4027930174563592e-05, "loss": 0.3871, "step": 23960 }, { "epoch": 5.977556109725686, "grad_norm": 8.01220417022705, "learning_rate": 1.4025436408977558e-05, "loss": 0.3979, "step": 23970 }, { "epoch": 5.980049875311721, "grad_norm": 5.2847981452941895, "learning_rate": 1.4022942643391521e-05, "loss": 0.4304, "step": 23980 }, { "epoch": 5.982543640897756, "grad_norm": 10.303918838500977, "learning_rate": 1.4020448877805488e-05, "loss": 0.377, "step": 23990 }, { "epoch": 5.985037406483791, "grad_norm": 10.450936317443848, "learning_rate": 1.4017955112219452e-05, "loss": 0.4502, "step": 24000 }, { "epoch": 5.987531172069826, "grad_norm": 6.296121120452881, "learning_rate": 1.4015461346633417e-05, "loss": 0.4307, "step": 24010 }, { "epoch": 5.9900249376558605, "grad_norm": 5.6589202880859375, "learning_rate": 1.4012967581047383e-05, "loss": 0.4116, "step": 24020 }, { "epoch": 5.992518703241895, "grad_norm": 6.098679065704346, "learning_rate": 1.4010473815461348e-05, "loss": 0.336, "step": 24030 }, { "epoch": 5.99501246882793, "grad_norm": 6.028469562530518, "learning_rate": 1.4007980049875313e-05, "loss": 0.3988, "step": 24040 }, { "epoch": 5.997506234413965, "grad_norm": 7.266406536102295, "learning_rate": 1.4005486284289278e-05, "loss": 0.435, "step": 24050 }, { "epoch": 6.0, "grad_norm": 4.367632865905762, "learning_rate": 1.4002992518703242e-05, "loss": 0.3838, "step": 24060 }, { "epoch": 6.0, "eval_loss": 0.41945403814315796, "eval_runtime": 59.8425, "eval_samples_per_second": 16.761, "eval_steps_per_second": 16.761, "step": 24060 }, { "epoch": 6.002493765586035, "grad_norm": 5.5019001960754395, "learning_rate": 1.4000498753117209e-05, "loss": 0.3192, "step": 24070 }, { "epoch": 6.00498753117207, "grad_norm": 6.230374813079834, "learning_rate": 1.3998004987531173e-05, "loss": 0.3463, "step": 24080 }, { "epoch": 6.007481296758105, "grad_norm": 5.844031810760498, "learning_rate": 1.3995511221945138e-05, "loss": 0.4043, "step": 24090 }, { "epoch": 6.0099750623441395, "grad_norm": 6.5853166580200195, "learning_rate": 1.3993017456359103e-05, "loss": 0.4222, "step": 24100 }, { "epoch": 6.012468827930174, "grad_norm": 7.990192890167236, "learning_rate": 1.3990523690773069e-05, "loss": 0.3578, "step": 24110 }, { "epoch": 6.014962593516209, "grad_norm": 5.267573356628418, "learning_rate": 1.3988029925187034e-05, "loss": 0.3111, "step": 24120 }, { "epoch": 6.017456359102244, "grad_norm": 6.116258144378662, "learning_rate": 1.3985536159601e-05, "loss": 0.3556, "step": 24130 }, { "epoch": 6.019950124688279, "grad_norm": 7.899449825286865, "learning_rate": 1.3983042394014963e-05, "loss": 0.3691, "step": 24140 }, { "epoch": 6.022443890274314, "grad_norm": 8.339388847351074, "learning_rate": 1.398054862842893e-05, "loss": 0.3704, "step": 24150 }, { "epoch": 6.024937655860349, "grad_norm": 5.403593063354492, "learning_rate": 1.3978054862842893e-05, "loss": 0.3668, "step": 24160 }, { "epoch": 6.027431421446384, "grad_norm": 7.102303981781006, "learning_rate": 1.397556109725686e-05, "loss": 0.3266, "step": 24170 }, { "epoch": 6.029925187032419, "grad_norm": 5.142783164978027, "learning_rate": 1.3973067331670824e-05, "loss": 0.3544, "step": 24180 }, { "epoch": 6.032418952618454, "grad_norm": 7.026705265045166, "learning_rate": 1.3970573566084788e-05, "loss": 0.3973, "step": 24190 }, { "epoch": 6.034912718204489, "grad_norm": 5.733627796173096, "learning_rate": 1.3968079800498755e-05, "loss": 0.3553, "step": 24200 }, { "epoch": 6.037406483790524, "grad_norm": 6.362379550933838, "learning_rate": 1.396558603491272e-05, "loss": 0.3822, "step": 24210 }, { "epoch": 6.039900249376559, "grad_norm": 7.283205509185791, "learning_rate": 1.3963092269326684e-05, "loss": 0.4559, "step": 24220 }, { "epoch": 6.042394014962594, "grad_norm": 7.929378509521484, "learning_rate": 1.396059850374065e-05, "loss": 0.3449, "step": 24230 }, { "epoch": 6.0448877805486285, "grad_norm": 7.110724449157715, "learning_rate": 1.3958104738154614e-05, "loss": 0.3746, "step": 24240 }, { "epoch": 6.047381546134663, "grad_norm": 5.7137227058410645, "learning_rate": 1.3955610972568581e-05, "loss": 0.4771, "step": 24250 }, { "epoch": 6.049875311720698, "grad_norm": 6.553785800933838, "learning_rate": 1.3953117206982545e-05, "loss": 0.3555, "step": 24260 }, { "epoch": 6.052369077306733, "grad_norm": 5.964046955108643, "learning_rate": 1.3950623441396509e-05, "loss": 0.3619, "step": 24270 }, { "epoch": 6.054862842892768, "grad_norm": 7.368553638458252, "learning_rate": 1.3948129675810476e-05, "loss": 0.3385, "step": 24280 }, { "epoch": 6.057356608478803, "grad_norm": 9.049263000488281, "learning_rate": 1.3945635910224439e-05, "loss": 0.3605, "step": 24290 }, { "epoch": 6.059850374064838, "grad_norm": 7.191588401794434, "learning_rate": 1.3943142144638404e-05, "loss": 0.4145, "step": 24300 }, { "epoch": 6.062344139650873, "grad_norm": 10.2169828414917, "learning_rate": 1.394064837905237e-05, "loss": 0.3857, "step": 24310 }, { "epoch": 6.0648379052369075, "grad_norm": 7.191344738006592, "learning_rate": 1.3938154613466335e-05, "loss": 0.3727, "step": 24320 }, { "epoch": 6.067331670822942, "grad_norm": 6.484828472137451, "learning_rate": 1.3935660847880302e-05, "loss": 0.4169, "step": 24330 }, { "epoch": 6.069825436408977, "grad_norm": 5.972647190093994, "learning_rate": 1.3933167082294266e-05, "loss": 0.2752, "step": 24340 }, { "epoch": 6.072319201995012, "grad_norm": 7.331430912017822, "learning_rate": 1.393067331670823e-05, "loss": 0.3688, "step": 24350 }, { "epoch": 6.074812967581048, "grad_norm": 4.472461223602295, "learning_rate": 1.3928179551122196e-05, "loss": 0.3835, "step": 24360 }, { "epoch": 6.077306733167083, "grad_norm": 6.023663520812988, "learning_rate": 1.392568578553616e-05, "loss": 0.3425, "step": 24370 }, { "epoch": 6.079800498753118, "grad_norm": 6.141242980957031, "learning_rate": 1.3923192019950127e-05, "loss": 0.3483, "step": 24380 }, { "epoch": 6.082294264339152, "grad_norm": 6.454586029052734, "learning_rate": 1.392069825436409e-05, "loss": 0.3239, "step": 24390 }, { "epoch": 6.084788029925187, "grad_norm": 5.868727684020996, "learning_rate": 1.3918204488778056e-05, "loss": 0.2889, "step": 24400 }, { "epoch": 6.087281795511222, "grad_norm": 5.985288619995117, "learning_rate": 1.3915710723192021e-05, "loss": 0.3998, "step": 24410 }, { "epoch": 6.089775561097257, "grad_norm": 8.732137680053711, "learning_rate": 1.3913216957605986e-05, "loss": 0.4191, "step": 24420 }, { "epoch": 6.092269326683292, "grad_norm": 3.857243061065674, "learning_rate": 1.391072319201995e-05, "loss": 0.3174, "step": 24430 }, { "epoch": 6.094763092269327, "grad_norm": 6.74662446975708, "learning_rate": 1.3908229426433917e-05, "loss": 0.4585, "step": 24440 }, { "epoch": 6.097256857855362, "grad_norm": 8.382293701171875, "learning_rate": 1.390573566084788e-05, "loss": 0.4119, "step": 24450 }, { "epoch": 6.0997506234413965, "grad_norm": 5.25961446762085, "learning_rate": 1.3903241895261848e-05, "loss": 0.355, "step": 24460 }, { "epoch": 6.102244389027431, "grad_norm": 10.296069145202637, "learning_rate": 1.3900748129675811e-05, "loss": 0.4015, "step": 24470 }, { "epoch": 6.104738154613466, "grad_norm": 6.3385009765625, "learning_rate": 1.3898254364089777e-05, "loss": 0.4538, "step": 24480 }, { "epoch": 6.107231920199501, "grad_norm": 5.728212356567383, "learning_rate": 1.3895760598503742e-05, "loss": 0.3313, "step": 24490 }, { "epoch": 6.109725685785536, "grad_norm": 7.13115930557251, "learning_rate": 1.3893266832917707e-05, "loss": 0.3547, "step": 24500 }, { "epoch": 6.112219451371571, "grad_norm": 7.432462692260742, "learning_rate": 1.3890773067331671e-05, "loss": 0.3595, "step": 24510 }, { "epoch": 6.114713216957606, "grad_norm": 7.006857872009277, "learning_rate": 1.3888279301745638e-05, "loss": 0.3668, "step": 24520 }, { "epoch": 6.117206982543641, "grad_norm": 6.406911373138428, "learning_rate": 1.3885785536159601e-05, "loss": 0.3775, "step": 24530 }, { "epoch": 6.1197007481296755, "grad_norm": 4.907394886016846, "learning_rate": 1.3883291770573568e-05, "loss": 0.3529, "step": 24540 }, { "epoch": 6.12219451371571, "grad_norm": 5.443017482757568, "learning_rate": 1.3880798004987532e-05, "loss": 0.3293, "step": 24550 }, { "epoch": 6.124688279301745, "grad_norm": 6.361752510070801, "learning_rate": 1.3878304239401497e-05, "loss": 0.3988, "step": 24560 }, { "epoch": 6.127182044887781, "grad_norm": 7.4896464347839355, "learning_rate": 1.3875810473815463e-05, "loss": 0.3992, "step": 24570 }, { "epoch": 6.129675810473816, "grad_norm": 5.954341888427734, "learning_rate": 1.3873316708229428e-05, "loss": 0.3524, "step": 24580 }, { "epoch": 6.132169576059851, "grad_norm": 7.877580642700195, "learning_rate": 1.3870822942643392e-05, "loss": 0.3688, "step": 24590 }, { "epoch": 6.134663341645886, "grad_norm": 11.095531463623047, "learning_rate": 1.3868329177057359e-05, "loss": 0.3773, "step": 24600 }, { "epoch": 6.13715710723192, "grad_norm": 4.941054344177246, "learning_rate": 1.3865835411471322e-05, "loss": 0.337, "step": 24610 }, { "epoch": 6.139650872817955, "grad_norm": 7.861473083496094, "learning_rate": 1.386334164588529e-05, "loss": 0.3896, "step": 24620 }, { "epoch": 6.14214463840399, "grad_norm": 7.050281524658203, "learning_rate": 1.3860847880299253e-05, "loss": 0.3908, "step": 24630 }, { "epoch": 6.144638403990025, "grad_norm": 8.201066970825195, "learning_rate": 1.3858354114713217e-05, "loss": 0.4176, "step": 24640 }, { "epoch": 6.14713216957606, "grad_norm": 6.268393516540527, "learning_rate": 1.3855860349127184e-05, "loss": 0.3486, "step": 24650 }, { "epoch": 6.149625935162095, "grad_norm": 7.479492664337158, "learning_rate": 1.3853366583541147e-05, "loss": 0.3568, "step": 24660 }, { "epoch": 6.15211970074813, "grad_norm": 9.700624465942383, "learning_rate": 1.3850872817955114e-05, "loss": 0.5033, "step": 24670 }, { "epoch": 6.1546134663341645, "grad_norm": 7.117500305175781, "learning_rate": 1.384837905236908e-05, "loss": 0.3423, "step": 24680 }, { "epoch": 6.157107231920199, "grad_norm": 6.819540023803711, "learning_rate": 1.3845885286783043e-05, "loss": 0.4107, "step": 24690 }, { "epoch": 6.159600997506234, "grad_norm": 7.1624755859375, "learning_rate": 1.384339152119701e-05, "loss": 0.3808, "step": 24700 }, { "epoch": 6.162094763092269, "grad_norm": 5.549429416656494, "learning_rate": 1.3840897755610974e-05, "loss": 0.4034, "step": 24710 }, { "epoch": 6.164588528678304, "grad_norm": 6.838397026062012, "learning_rate": 1.3838403990024937e-05, "loss": 0.4178, "step": 24720 }, { "epoch": 6.167082294264339, "grad_norm": 6.023339748382568, "learning_rate": 1.3835910224438904e-05, "loss": 0.3682, "step": 24730 }, { "epoch": 6.169576059850374, "grad_norm": 6.62280797958374, "learning_rate": 1.3833416458852868e-05, "loss": 0.3843, "step": 24740 }, { "epoch": 6.172069825436409, "grad_norm": 6.568355083465576, "learning_rate": 1.3830922693266835e-05, "loss": 0.4233, "step": 24750 }, { "epoch": 6.174563591022444, "grad_norm": 9.093072891235352, "learning_rate": 1.3828428927680799e-05, "loss": 0.3631, "step": 24760 }, { "epoch": 6.177057356608479, "grad_norm": 8.72046184539795, "learning_rate": 1.3825935162094764e-05, "loss": 0.3435, "step": 24770 }, { "epoch": 6.179551122194514, "grad_norm": 9.039979934692383, "learning_rate": 1.3823441396508729e-05, "loss": 0.3879, "step": 24780 }, { "epoch": 6.182044887780549, "grad_norm": 8.933988571166992, "learning_rate": 1.3820947630922694e-05, "loss": 0.3646, "step": 24790 }, { "epoch": 6.184538653366584, "grad_norm": 7.515913963317871, "learning_rate": 1.3818453865336658e-05, "loss": 0.3503, "step": 24800 }, { "epoch": 6.187032418952619, "grad_norm": 6.617506504058838, "learning_rate": 1.3815960099750625e-05, "loss": 0.4107, "step": 24810 }, { "epoch": 6.1895261845386536, "grad_norm": 5.950352668762207, "learning_rate": 1.3813466334164589e-05, "loss": 0.3318, "step": 24820 }, { "epoch": 6.192019950124688, "grad_norm": 6.522132873535156, "learning_rate": 1.3810972568578556e-05, "loss": 0.3876, "step": 24830 }, { "epoch": 6.194513715710723, "grad_norm": 5.656475067138672, "learning_rate": 1.380847880299252e-05, "loss": 0.3361, "step": 24840 }, { "epoch": 6.197007481296758, "grad_norm": 9.664381980895996, "learning_rate": 1.3805985037406485e-05, "loss": 0.4696, "step": 24850 }, { "epoch": 6.199501246882793, "grad_norm": 5.316076755523682, "learning_rate": 1.380349127182045e-05, "loss": 0.3133, "step": 24860 }, { "epoch": 6.201995012468828, "grad_norm": 5.598194599151611, "learning_rate": 1.3800997506234415e-05, "loss": 0.4303, "step": 24870 }, { "epoch": 6.204488778054863, "grad_norm": 9.467336654663086, "learning_rate": 1.379850374064838e-05, "loss": 0.3493, "step": 24880 }, { "epoch": 6.206982543640898, "grad_norm": 5.858520030975342, "learning_rate": 1.3796009975062346e-05, "loss": 0.4155, "step": 24890 }, { "epoch": 6.2094763092269325, "grad_norm": 6.202467441558838, "learning_rate": 1.379351620947631e-05, "loss": 0.3583, "step": 24900 }, { "epoch": 6.211970074812967, "grad_norm": 10.094200134277344, "learning_rate": 1.3791022443890276e-05, "loss": 0.3416, "step": 24910 }, { "epoch": 6.214463840399002, "grad_norm": 6.892770290374756, "learning_rate": 1.378852867830424e-05, "loss": 0.3961, "step": 24920 }, { "epoch": 6.216957605985037, "grad_norm": 6.055887699127197, "learning_rate": 1.3786034912718205e-05, "loss": 0.3684, "step": 24930 }, { "epoch": 6.219451371571072, "grad_norm": 7.503059387207031, "learning_rate": 1.378354114713217e-05, "loss": 0.3396, "step": 24940 }, { "epoch": 6.221945137157107, "grad_norm": 6.7039642333984375, "learning_rate": 1.3781047381546136e-05, "loss": 0.3621, "step": 24950 }, { "epoch": 6.224438902743142, "grad_norm": 6.32158088684082, "learning_rate": 1.3778553615960101e-05, "loss": 0.3421, "step": 24960 }, { "epoch": 6.2269326683291775, "grad_norm": 7.217518329620361, "learning_rate": 1.3776059850374067e-05, "loss": 0.3611, "step": 24970 }, { "epoch": 6.229426433915212, "grad_norm": 6.535889148712158, "learning_rate": 1.377356608478803e-05, "loss": 0.3765, "step": 24980 }, { "epoch": 6.231920199501247, "grad_norm": 5.55719518661499, "learning_rate": 1.3771072319201997e-05, "loss": 0.3583, "step": 24990 }, { "epoch": 6.234413965087282, "grad_norm": 8.752567291259766, "learning_rate": 1.3768578553615961e-05, "loss": 0.3583, "step": 25000 }, { "epoch": 6.236907730673317, "grad_norm": 6.028888702392578, "learning_rate": 1.3766084788029924e-05, "loss": 0.3537, "step": 25010 }, { "epoch": 6.239401496259352, "grad_norm": 8.308853149414062, "learning_rate": 1.3763591022443891e-05, "loss": 0.4142, "step": 25020 }, { "epoch": 6.241895261845387, "grad_norm": 7.740601539611816, "learning_rate": 1.3761097256857857e-05, "loss": 0.3631, "step": 25030 }, { "epoch": 6.2443890274314215, "grad_norm": 6.1072611808776855, "learning_rate": 1.3758603491271822e-05, "loss": 0.3523, "step": 25040 }, { "epoch": 6.246882793017456, "grad_norm": 5.326240062713623, "learning_rate": 1.3756109725685787e-05, "loss": 0.3606, "step": 25050 }, { "epoch": 6.249376558603491, "grad_norm": 7.631753921508789, "learning_rate": 1.3753615960099751e-05, "loss": 0.3653, "step": 25060 }, { "epoch": 6.251870324189526, "grad_norm": 4.055186748504639, "learning_rate": 1.3751122194513718e-05, "loss": 0.3317, "step": 25070 }, { "epoch": 6.254364089775561, "grad_norm": 6.139715671539307, "learning_rate": 1.3748628428927682e-05, "loss": 0.3835, "step": 25080 }, { "epoch": 6.256857855361596, "grad_norm": 6.292752265930176, "learning_rate": 1.3746134663341645e-05, "loss": 0.2972, "step": 25090 }, { "epoch": 6.259351620947631, "grad_norm": 7.624993324279785, "learning_rate": 1.3743640897755612e-05, "loss": 0.4233, "step": 25100 }, { "epoch": 6.261845386533666, "grad_norm": 5.454896450042725, "learning_rate": 1.3741147132169576e-05, "loss": 0.3935, "step": 25110 }, { "epoch": 6.2643391521197005, "grad_norm": 7.689428329467773, "learning_rate": 1.3738653366583543e-05, "loss": 0.3478, "step": 25120 }, { "epoch": 6.266832917705735, "grad_norm": 5.5055365562438965, "learning_rate": 1.3736159600997507e-05, "loss": 0.3336, "step": 25130 }, { "epoch": 6.26932668329177, "grad_norm": 4.818638324737549, "learning_rate": 1.3733665835411472e-05, "loss": 0.3446, "step": 25140 }, { "epoch": 6.271820448877805, "grad_norm": 6.679276943206787, "learning_rate": 1.3731172069825439e-05, "loss": 0.3915, "step": 25150 }, { "epoch": 6.274314214463841, "grad_norm": 7.1155009269714355, "learning_rate": 1.3728678304239402e-05, "loss": 0.3504, "step": 25160 }, { "epoch": 6.276807980049876, "grad_norm": 7.2714009284973145, "learning_rate": 1.372618453865337e-05, "loss": 0.5088, "step": 25170 }, { "epoch": 6.279301745635911, "grad_norm": 8.970192909240723, "learning_rate": 1.3723690773067333e-05, "loss": 0.4345, "step": 25180 }, { "epoch": 6.2817955112219455, "grad_norm": 6.6837334632873535, "learning_rate": 1.3721197007481297e-05, "loss": 0.3889, "step": 25190 }, { "epoch": 6.28428927680798, "grad_norm": 5.488166809082031, "learning_rate": 1.3718703241895264e-05, "loss": 0.3952, "step": 25200 }, { "epoch": 6.286783042394015, "grad_norm": 7.671344757080078, "learning_rate": 1.3716209476309227e-05, "loss": 0.3589, "step": 25210 }, { "epoch": 6.28927680798005, "grad_norm": 6.706274032592773, "learning_rate": 1.3713715710723193e-05, "loss": 0.4448, "step": 25220 }, { "epoch": 6.291770573566085, "grad_norm": 8.258255004882812, "learning_rate": 1.3711221945137158e-05, "loss": 0.3784, "step": 25230 }, { "epoch": 6.29426433915212, "grad_norm": 8.191130638122559, "learning_rate": 1.3708728179551123e-05, "loss": 0.4016, "step": 25240 }, { "epoch": 6.296758104738155, "grad_norm": 6.618471622467041, "learning_rate": 1.3706234413965089e-05, "loss": 0.319, "step": 25250 }, { "epoch": 6.2992518703241895, "grad_norm": 4.492340564727783, "learning_rate": 1.3703740648379054e-05, "loss": 0.3433, "step": 25260 }, { "epoch": 6.301745635910224, "grad_norm": 7.213396072387695, "learning_rate": 1.3701246882793017e-05, "loss": 0.336, "step": 25270 }, { "epoch": 6.304239401496259, "grad_norm": 6.88414192199707, "learning_rate": 1.3698753117206984e-05, "loss": 0.3996, "step": 25280 }, { "epoch": 6.306733167082294, "grad_norm": 5.107346057891846, "learning_rate": 1.3696259351620948e-05, "loss": 0.3807, "step": 25290 }, { "epoch": 6.309226932668329, "grad_norm": 6.0163445472717285, "learning_rate": 1.3693765586034913e-05, "loss": 0.3609, "step": 25300 }, { "epoch": 6.311720698254364, "grad_norm": 6.904167175292969, "learning_rate": 1.3691271820448879e-05, "loss": 0.3868, "step": 25310 }, { "epoch": 6.314214463840399, "grad_norm": 7.417514801025391, "learning_rate": 1.3688778054862844e-05, "loss": 0.384, "step": 25320 }, { "epoch": 6.316708229426434, "grad_norm": 7.786649227142334, "learning_rate": 1.368628428927681e-05, "loss": 0.4802, "step": 25330 }, { "epoch": 6.3192019950124685, "grad_norm": 7.041213512420654, "learning_rate": 1.3683790523690775e-05, "loss": 0.4231, "step": 25340 }, { "epoch": 6.321695760598503, "grad_norm": 4.815100193023682, "learning_rate": 1.3681296758104738e-05, "loss": 0.3803, "step": 25350 }, { "epoch": 6.324189526184538, "grad_norm": 5.091302871704102, "learning_rate": 1.3678802992518705e-05, "loss": 0.3602, "step": 25360 }, { "epoch": 6.326683291770574, "grad_norm": 6.59751558303833, "learning_rate": 1.3676309226932669e-05, "loss": 0.3558, "step": 25370 }, { "epoch": 6.329177057356609, "grad_norm": 6.500771522521973, "learning_rate": 1.3673815461346636e-05, "loss": 0.3743, "step": 25380 }, { "epoch": 6.331670822942644, "grad_norm": 5.946366786956787, "learning_rate": 1.36713216957606e-05, "loss": 0.3757, "step": 25390 }, { "epoch": 6.334164588528679, "grad_norm": 6.4445672035217285, "learning_rate": 1.3668827930174565e-05, "loss": 0.3488, "step": 25400 }, { "epoch": 6.3366583541147135, "grad_norm": 8.107527732849121, "learning_rate": 1.366633416458853e-05, "loss": 0.3637, "step": 25410 }, { "epoch": 6.339152119700748, "grad_norm": 7.021248817443848, "learning_rate": 1.3663840399002495e-05, "loss": 0.4392, "step": 25420 }, { "epoch": 6.341645885286783, "grad_norm": 7.19385290145874, "learning_rate": 1.3661346633416459e-05, "loss": 0.3443, "step": 25430 }, { "epoch": 6.344139650872818, "grad_norm": 8.480057716369629, "learning_rate": 1.3658852867830426e-05, "loss": 0.3249, "step": 25440 }, { "epoch": 6.346633416458853, "grad_norm": 6.283059120178223, "learning_rate": 1.365635910224439e-05, "loss": 0.4301, "step": 25450 }, { "epoch": 6.349127182044888, "grad_norm": 9.004962921142578, "learning_rate": 1.3653865336658357e-05, "loss": 0.3856, "step": 25460 }, { "epoch": 6.351620947630923, "grad_norm": 6.921197891235352, "learning_rate": 1.365137157107232e-05, "loss": 0.3504, "step": 25470 }, { "epoch": 6.3541147132169575, "grad_norm": 7.9730634689331055, "learning_rate": 1.3648877805486284e-05, "loss": 0.4598, "step": 25480 }, { "epoch": 6.356608478802992, "grad_norm": 5.783198356628418, "learning_rate": 1.3646384039900251e-05, "loss": 0.388, "step": 25490 }, { "epoch": 6.359102244389027, "grad_norm": 5.059525489807129, "learning_rate": 1.3643890274314216e-05, "loss": 0.327, "step": 25500 }, { "epoch": 6.361596009975062, "grad_norm": 8.875046730041504, "learning_rate": 1.364139650872818e-05, "loss": 0.3387, "step": 25510 }, { "epoch": 6.364089775561097, "grad_norm": 8.62806224822998, "learning_rate": 1.3638902743142147e-05, "loss": 0.3109, "step": 25520 }, { "epoch": 6.366583541147132, "grad_norm": 6.7610344886779785, "learning_rate": 1.363640897755611e-05, "loss": 0.3855, "step": 25530 }, { "epoch": 6.369077306733167, "grad_norm": 10.090450286865234, "learning_rate": 1.3633915211970077e-05, "loss": 0.3404, "step": 25540 }, { "epoch": 6.371571072319202, "grad_norm": 7.940425872802734, "learning_rate": 1.3631421446384041e-05, "loss": 0.3752, "step": 25550 }, { "epoch": 6.374064837905237, "grad_norm": 6.398936748504639, "learning_rate": 1.3628927680798005e-05, "loss": 0.3769, "step": 25560 }, { "epoch": 6.376558603491272, "grad_norm": 4.926769733428955, "learning_rate": 1.3626433915211972e-05, "loss": 0.4024, "step": 25570 }, { "epoch": 6.379052369077307, "grad_norm": 7.145235538482666, "learning_rate": 1.3623940149625935e-05, "loss": 0.3512, "step": 25580 }, { "epoch": 6.381546134663342, "grad_norm": 8.193603515625, "learning_rate": 1.36214463840399e-05, "loss": 0.3556, "step": 25590 }, { "epoch": 6.384039900249377, "grad_norm": 6.601048469543457, "learning_rate": 1.3618952618453866e-05, "loss": 0.442, "step": 25600 }, { "epoch": 6.386533665835412, "grad_norm": 6.299713134765625, "learning_rate": 1.3616458852867831e-05, "loss": 0.3942, "step": 25610 }, { "epoch": 6.389027431421447, "grad_norm": 5.013650417327881, "learning_rate": 1.3613965087281798e-05, "loss": 0.3576, "step": 25620 }, { "epoch": 6.3915211970074814, "grad_norm": 6.1320905685424805, "learning_rate": 1.3611471321695762e-05, "loss": 0.426, "step": 25630 }, { "epoch": 6.394014962593516, "grad_norm": 7.493283748626709, "learning_rate": 1.3608977556109725e-05, "loss": 0.4207, "step": 25640 }, { "epoch": 6.396508728179551, "grad_norm": 7.163254261016846, "learning_rate": 1.3606483790523692e-05, "loss": 0.4041, "step": 25650 }, { "epoch": 6.399002493765586, "grad_norm": 4.770473957061768, "learning_rate": 1.3603990024937656e-05, "loss": 0.3493, "step": 25660 }, { "epoch": 6.401496259351621, "grad_norm": 6.536076545715332, "learning_rate": 1.3601496259351623e-05, "loss": 0.3204, "step": 25670 }, { "epoch": 6.403990024937656, "grad_norm": 8.567519187927246, "learning_rate": 1.3599002493765587e-05, "loss": 0.4084, "step": 25680 }, { "epoch": 6.406483790523691, "grad_norm": 10.568842887878418, "learning_rate": 1.3596508728179552e-05, "loss": 0.3572, "step": 25690 }, { "epoch": 6.4089775561097255, "grad_norm": 8.251084327697754, "learning_rate": 1.3594014962593517e-05, "loss": 0.4384, "step": 25700 }, { "epoch": 6.41147132169576, "grad_norm": 5.1175408363342285, "learning_rate": 1.3591521197007483e-05, "loss": 0.3512, "step": 25710 }, { "epoch": 6.413965087281795, "grad_norm": 5.352699279785156, "learning_rate": 1.3589027431421446e-05, "loss": 0.3152, "step": 25720 }, { "epoch": 6.41645885286783, "grad_norm": 11.443229675292969, "learning_rate": 1.3586533665835413e-05, "loss": 0.3842, "step": 25730 }, { "epoch": 6.418952618453865, "grad_norm": 4.957744598388672, "learning_rate": 1.3584039900249377e-05, "loss": 0.353, "step": 25740 }, { "epoch": 6.4214463840399, "grad_norm": 5.221795082092285, "learning_rate": 1.3581546134663344e-05, "loss": 0.3797, "step": 25750 }, { "epoch": 6.423940149625935, "grad_norm": 5.243051052093506, "learning_rate": 1.3579052369077307e-05, "loss": 0.3356, "step": 25760 }, { "epoch": 6.42643391521197, "grad_norm": 6.537060260772705, "learning_rate": 1.3576558603491273e-05, "loss": 0.4025, "step": 25770 }, { "epoch": 6.428927680798005, "grad_norm": 7.9792094230651855, "learning_rate": 1.3574064837905238e-05, "loss": 0.4992, "step": 25780 }, { "epoch": 6.43142144638404, "grad_norm": 6.797430992126465, "learning_rate": 1.3571571072319203e-05, "loss": 0.3187, "step": 25790 }, { "epoch": 6.433915211970075, "grad_norm": 5.758587837219238, "learning_rate": 1.3569077306733167e-05, "loss": 0.3202, "step": 25800 }, { "epoch": 6.43640897755611, "grad_norm": 6.541738986968994, "learning_rate": 1.3566583541147134e-05, "loss": 0.4289, "step": 25810 }, { "epoch": 6.438902743142145, "grad_norm": 7.5989227294921875, "learning_rate": 1.3564089775561098e-05, "loss": 0.4379, "step": 25820 }, { "epoch": 6.44139650872818, "grad_norm": 10.161792755126953, "learning_rate": 1.3561596009975065e-05, "loss": 0.344, "step": 25830 }, { "epoch": 6.443890274314215, "grad_norm": 6.123249053955078, "learning_rate": 1.3559102244389028e-05, "loss": 0.3364, "step": 25840 }, { "epoch": 6.446384039900249, "grad_norm": 4.593570709228516, "learning_rate": 1.3556608478802994e-05, "loss": 0.4108, "step": 25850 }, { "epoch": 6.448877805486284, "grad_norm": 14.478041648864746, "learning_rate": 1.3554114713216959e-05, "loss": 0.2916, "step": 25860 }, { "epoch": 6.451371571072319, "grad_norm": 9.636341094970703, "learning_rate": 1.3551620947630924e-05, "loss": 0.4033, "step": 25870 }, { "epoch": 6.453865336658354, "grad_norm": 6.884788513183594, "learning_rate": 1.354912718204489e-05, "loss": 0.3478, "step": 25880 }, { "epoch": 6.456359102244389, "grad_norm": 6.347228527069092, "learning_rate": 1.3546633416458855e-05, "loss": 0.3867, "step": 25890 }, { "epoch": 6.458852867830424, "grad_norm": 6.127768516540527, "learning_rate": 1.3544139650872818e-05, "loss": 0.3709, "step": 25900 }, { "epoch": 6.461346633416459, "grad_norm": 5.719086647033691, "learning_rate": 1.3541645885286785e-05, "loss": 0.3649, "step": 25910 }, { "epoch": 6.4638403990024935, "grad_norm": 7.747276306152344, "learning_rate": 1.3539152119700749e-05, "loss": 0.3425, "step": 25920 }, { "epoch": 6.466334164588528, "grad_norm": 7.665780544281006, "learning_rate": 1.3536658354114713e-05, "loss": 0.427, "step": 25930 }, { "epoch": 6.468827930174563, "grad_norm": 4.468764305114746, "learning_rate": 1.353416458852868e-05, "loss": 0.3758, "step": 25940 }, { "epoch": 6.471321695760598, "grad_norm": 8.336400985717773, "learning_rate": 1.3531670822942643e-05, "loss": 0.3451, "step": 25950 }, { "epoch": 6.473815461346634, "grad_norm": 9.712798118591309, "learning_rate": 1.352917705735661e-05, "loss": 0.3795, "step": 25960 }, { "epoch": 6.476309226932669, "grad_norm": 7.729135990142822, "learning_rate": 1.3526683291770576e-05, "loss": 0.3696, "step": 25970 }, { "epoch": 6.478802992518704, "grad_norm": 6.578273296356201, "learning_rate": 1.352418952618454e-05, "loss": 0.4044, "step": 25980 }, { "epoch": 6.4812967581047385, "grad_norm": 11.453405380249023, "learning_rate": 1.3521695760598506e-05, "loss": 0.3201, "step": 25990 }, { "epoch": 6.483790523690773, "grad_norm": 6.865024566650391, "learning_rate": 1.351920199501247e-05, "loss": 0.3353, "step": 26000 }, { "epoch": 6.486284289276808, "grad_norm": 6.444002151489258, "learning_rate": 1.3516708229426433e-05, "loss": 0.3806, "step": 26010 }, { "epoch": 6.488778054862843, "grad_norm": 7.583583831787109, "learning_rate": 1.35142144638404e-05, "loss": 0.3622, "step": 26020 }, { "epoch": 6.491271820448878, "grad_norm": 7.034835338592529, "learning_rate": 1.3511720698254364e-05, "loss": 0.4238, "step": 26030 }, { "epoch": 6.493765586034913, "grad_norm": 7.059061050415039, "learning_rate": 1.3509226932668331e-05, "loss": 0.3675, "step": 26040 }, { "epoch": 6.496259351620948, "grad_norm": 5.760257720947266, "learning_rate": 1.3506733167082295e-05, "loss": 0.3814, "step": 26050 }, { "epoch": 6.498753117206983, "grad_norm": 6.137911319732666, "learning_rate": 1.350423940149626e-05, "loss": 0.3576, "step": 26060 }, { "epoch": 6.501246882793017, "grad_norm": 7.457276821136475, "learning_rate": 1.3501745635910225e-05, "loss": 0.3941, "step": 26070 }, { "epoch": 6.503740648379052, "grad_norm": 7.268845081329346, "learning_rate": 1.349925187032419e-05, "loss": 0.3924, "step": 26080 }, { "epoch": 6.506234413965087, "grad_norm": 7.861654758453369, "learning_rate": 1.3496758104738154e-05, "loss": 0.3578, "step": 26090 }, { "epoch": 6.508728179551122, "grad_norm": 8.676166534423828, "learning_rate": 1.3494264339152121e-05, "loss": 0.4239, "step": 26100 }, { "epoch": 6.511221945137157, "grad_norm": 6.739658355712891, "learning_rate": 1.3491770573566085e-05, "loss": 0.4131, "step": 26110 }, { "epoch": 6.513715710723192, "grad_norm": 6.20055627822876, "learning_rate": 1.3489276807980052e-05, "loss": 0.4278, "step": 26120 }, { "epoch": 6.516209476309227, "grad_norm": 6.212075710296631, "learning_rate": 1.3486783042394015e-05, "loss": 0.3607, "step": 26130 }, { "epoch": 6.5187032418952615, "grad_norm": 10.847241401672363, "learning_rate": 1.348428927680798e-05, "loss": 0.3624, "step": 26140 }, { "epoch": 6.521197007481296, "grad_norm": 6.094544887542725, "learning_rate": 1.3481795511221946e-05, "loss": 0.4353, "step": 26150 }, { "epoch": 6.523690773067331, "grad_norm": 7.994797706604004, "learning_rate": 1.3479301745635911e-05, "loss": 0.3961, "step": 26160 }, { "epoch": 6.526184538653366, "grad_norm": 8.158559799194336, "learning_rate": 1.3476807980049877e-05, "loss": 0.3696, "step": 26170 }, { "epoch": 6.528678304239402, "grad_norm": 7.66077995300293, "learning_rate": 1.3474314214463842e-05, "loss": 0.3302, "step": 26180 }, { "epoch": 6.531172069825437, "grad_norm": 6.937994480133057, "learning_rate": 1.3471820448877806e-05, "loss": 0.4102, "step": 26190 }, { "epoch": 6.533665835411472, "grad_norm": 7.02347993850708, "learning_rate": 1.3469326683291773e-05, "loss": 0.4596, "step": 26200 }, { "epoch": 6.5361596009975065, "grad_norm": 7.245387077331543, "learning_rate": 1.3466832917705736e-05, "loss": 0.3513, "step": 26210 }, { "epoch": 6.538653366583541, "grad_norm": 7.9023237228393555, "learning_rate": 1.3464339152119702e-05, "loss": 0.3554, "step": 26220 }, { "epoch": 6.541147132169576, "grad_norm": 5.199001789093018, "learning_rate": 1.3461845386533667e-05, "loss": 0.4321, "step": 26230 }, { "epoch": 6.543640897755611, "grad_norm": 5.115858554840088, "learning_rate": 1.3459351620947632e-05, "loss": 0.3597, "step": 26240 }, { "epoch": 6.546134663341646, "grad_norm": 7.653583526611328, "learning_rate": 1.3456857855361597e-05, "loss": 0.4044, "step": 26250 }, { "epoch": 6.548628428927681, "grad_norm": 5.315465450286865, "learning_rate": 1.3454364089775563e-05, "loss": 0.3737, "step": 26260 }, { "epoch": 6.551122194513716, "grad_norm": 6.182226657867432, "learning_rate": 1.3451870324189526e-05, "loss": 0.3521, "step": 26270 }, { "epoch": 6.553615960099751, "grad_norm": 5.8310546875, "learning_rate": 1.3449376558603493e-05, "loss": 0.3277, "step": 26280 }, { "epoch": 6.556109725685785, "grad_norm": 6.5603437423706055, "learning_rate": 1.3446882793017457e-05, "loss": 0.3724, "step": 26290 }, { "epoch": 6.55860349127182, "grad_norm": 7.382344722747803, "learning_rate": 1.344438902743142e-05, "loss": 0.3898, "step": 26300 }, { "epoch": 6.561097256857855, "grad_norm": 5.281245708465576, "learning_rate": 1.3441895261845388e-05, "loss": 0.3697, "step": 26310 }, { "epoch": 6.56359102244389, "grad_norm": 6.038678169250488, "learning_rate": 1.3439401496259353e-05, "loss": 0.3634, "step": 26320 }, { "epoch": 6.566084788029925, "grad_norm": 8.70938491821289, "learning_rate": 1.3436907730673318e-05, "loss": 0.3867, "step": 26330 }, { "epoch": 6.56857855361596, "grad_norm": 7.3120598793029785, "learning_rate": 1.3434413965087284e-05, "loss": 0.3791, "step": 26340 }, { "epoch": 6.571072319201995, "grad_norm": 6.2208428382873535, "learning_rate": 1.3431920199501247e-05, "loss": 0.3998, "step": 26350 }, { "epoch": 6.57356608478803, "grad_norm": 6.549139499664307, "learning_rate": 1.3429426433915214e-05, "loss": 0.3935, "step": 26360 }, { "epoch": 6.576059850374065, "grad_norm": 4.998687267303467, "learning_rate": 1.3426932668329178e-05, "loss": 0.41, "step": 26370 }, { "epoch": 6.5785536159601, "grad_norm": 5.387908458709717, "learning_rate": 1.3424438902743145e-05, "loss": 0.3421, "step": 26380 }, { "epoch": 6.581047381546135, "grad_norm": 7.001126766204834, "learning_rate": 1.3421945137157108e-05, "loss": 0.3603, "step": 26390 }, { "epoch": 6.58354114713217, "grad_norm": 10.226015090942383, "learning_rate": 1.3419451371571072e-05, "loss": 0.3955, "step": 26400 }, { "epoch": 6.586034912718205, "grad_norm": 6.176059722900391, "learning_rate": 1.3416957605985039e-05, "loss": 0.302, "step": 26410 }, { "epoch": 6.58852867830424, "grad_norm": 5.293604373931885, "learning_rate": 1.3414463840399003e-05, "loss": 0.3701, "step": 26420 }, { "epoch": 6.5910224438902745, "grad_norm": 8.43692398071289, "learning_rate": 1.3411970074812968e-05, "loss": 0.3852, "step": 26430 }, { "epoch": 6.593516209476309, "grad_norm": 7.703579425811768, "learning_rate": 1.3409476309226935e-05, "loss": 0.4242, "step": 26440 }, { "epoch": 6.596009975062344, "grad_norm": 9.26103687286377, "learning_rate": 1.3406982543640899e-05, "loss": 0.3328, "step": 26450 }, { "epoch": 6.598503740648379, "grad_norm": 7.32110071182251, "learning_rate": 1.3404488778054866e-05, "loss": 0.3679, "step": 26460 }, { "epoch": 6.600997506234414, "grad_norm": 6.620180130004883, "learning_rate": 1.340199501246883e-05, "loss": 0.4228, "step": 26470 }, { "epoch": 6.603491271820449, "grad_norm": 7.809380531311035, "learning_rate": 1.3399501246882793e-05, "loss": 0.4333, "step": 26480 }, { "epoch": 6.605985037406484, "grad_norm": 5.4282331466674805, "learning_rate": 1.339700748129676e-05, "loss": 0.4154, "step": 26490 }, { "epoch": 6.6084788029925186, "grad_norm": 6.508066177368164, "learning_rate": 1.3394513715710723e-05, "loss": 0.3998, "step": 26500 }, { "epoch": 6.610972568578553, "grad_norm": 9.248714447021484, "learning_rate": 1.3392019950124689e-05, "loss": 0.3763, "step": 26510 }, { "epoch": 6.613466334164588, "grad_norm": 7.227468967437744, "learning_rate": 1.3389526184538654e-05, "loss": 0.3659, "step": 26520 }, { "epoch": 6.615960099750623, "grad_norm": 7.72911262512207, "learning_rate": 1.338703241895262e-05, "loss": 0.4427, "step": 26530 }, { "epoch": 6.618453865336658, "grad_norm": 5.077463626861572, "learning_rate": 1.3384538653366585e-05, "loss": 0.3655, "step": 26540 }, { "epoch": 6.620947630922693, "grad_norm": 5.688047885894775, "learning_rate": 1.338204488778055e-05, "loss": 0.3803, "step": 26550 }, { "epoch": 6.623441396508728, "grad_norm": 5.621146202087402, "learning_rate": 1.3379551122194514e-05, "loss": 0.3938, "step": 26560 }, { "epoch": 6.625935162094763, "grad_norm": 7.967491626739502, "learning_rate": 1.337705735660848e-05, "loss": 0.3353, "step": 26570 }, { "epoch": 6.628428927680798, "grad_norm": 10.237777709960938, "learning_rate": 1.3374563591022444e-05, "loss": 0.3249, "step": 26580 }, { "epoch": 6.630922693266833, "grad_norm": 13.385808944702148, "learning_rate": 1.337206982543641e-05, "loss": 0.4134, "step": 26590 }, { "epoch": 6.633416458852868, "grad_norm": 4.2393341064453125, "learning_rate": 1.3369576059850375e-05, "loss": 0.4, "step": 26600 }, { "epoch": 6.635910224438903, "grad_norm": 4.3933281898498535, "learning_rate": 1.336708229426434e-05, "loss": 0.3216, "step": 26610 }, { "epoch": 6.638403990024938, "grad_norm": 4.819437503814697, "learning_rate": 1.3364588528678305e-05, "loss": 0.3578, "step": 26620 }, { "epoch": 6.640897755610973, "grad_norm": 4.126856803894043, "learning_rate": 1.336209476309227e-05, "loss": 0.3625, "step": 26630 }, { "epoch": 6.643391521197008, "grad_norm": 5.08698034286499, "learning_rate": 1.3359600997506234e-05, "loss": 0.417, "step": 26640 }, { "epoch": 6.6458852867830425, "grad_norm": 5.38383674621582, "learning_rate": 1.3357107231920201e-05, "loss": 0.356, "step": 26650 }, { "epoch": 6.648379052369077, "grad_norm": 8.648747444152832, "learning_rate": 1.3354613466334165e-05, "loss": 0.3886, "step": 26660 }, { "epoch": 6.650872817955112, "grad_norm": 4.306643486022949, "learning_rate": 1.3352119700748132e-05, "loss": 0.3493, "step": 26670 }, { "epoch": 6.653366583541147, "grad_norm": 6.597197532653809, "learning_rate": 1.3349625935162096e-05, "loss": 0.372, "step": 26680 }, { "epoch": 6.655860349127182, "grad_norm": 6.69064474105835, "learning_rate": 1.3347132169576061e-05, "loss": 0.3965, "step": 26690 }, { "epoch": 6.658354114713217, "grad_norm": 7.140894412994385, "learning_rate": 1.3344638403990026e-05, "loss": 0.3822, "step": 26700 }, { "epoch": 6.660847880299252, "grad_norm": 5.859182357788086, "learning_rate": 1.3342144638403992e-05, "loss": 0.3458, "step": 26710 }, { "epoch": 6.6633416458852865, "grad_norm": 6.949796199798584, "learning_rate": 1.3339650872817955e-05, "loss": 0.4071, "step": 26720 }, { "epoch": 6.665835411471321, "grad_norm": 8.996414184570312, "learning_rate": 1.3337157107231922e-05, "loss": 0.3914, "step": 26730 }, { "epoch": 6.668329177057356, "grad_norm": 9.598322868347168, "learning_rate": 1.3334663341645886e-05, "loss": 0.3846, "step": 26740 }, { "epoch": 6.670822942643391, "grad_norm": 5.436387062072754, "learning_rate": 1.3332169576059853e-05, "loss": 0.3324, "step": 26750 }, { "epoch": 6.673316708229427, "grad_norm": 8.324487686157227, "learning_rate": 1.3329675810473816e-05, "loss": 0.3996, "step": 26760 }, { "epoch": 6.675810473815462, "grad_norm": 9.037124633789062, "learning_rate": 1.3327182044887782e-05, "loss": 0.4561, "step": 26770 }, { "epoch": 6.678304239401497, "grad_norm": 7.689178466796875, "learning_rate": 1.3324688279301747e-05, "loss": 0.4511, "step": 26780 }, { "epoch": 6.6807980049875315, "grad_norm": 6.272636890411377, "learning_rate": 1.3322194513715712e-05, "loss": 0.4475, "step": 26790 }, { "epoch": 6.683291770573566, "grad_norm": 4.632606029510498, "learning_rate": 1.3319700748129676e-05, "loss": 0.4122, "step": 26800 }, { "epoch": 6.685785536159601, "grad_norm": 7.478541851043701, "learning_rate": 1.3317206982543643e-05, "loss": 0.4107, "step": 26810 }, { "epoch": 6.688279301745636, "grad_norm": 10.429938316345215, "learning_rate": 1.3314713216957607e-05, "loss": 0.4623, "step": 26820 }, { "epoch": 6.690773067331671, "grad_norm": 7.3426995277404785, "learning_rate": 1.3312219451371574e-05, "loss": 0.4568, "step": 26830 }, { "epoch": 6.693266832917706, "grad_norm": 7.244011402130127, "learning_rate": 1.3309725685785537e-05, "loss": 0.3298, "step": 26840 }, { "epoch": 6.695760598503741, "grad_norm": 9.129273414611816, "learning_rate": 1.33072319201995e-05, "loss": 0.3867, "step": 26850 }, { "epoch": 6.698254364089776, "grad_norm": 7.908605575561523, "learning_rate": 1.3304738154613468e-05, "loss": 0.394, "step": 26860 }, { "epoch": 6.7007481296758105, "grad_norm": 5.406508445739746, "learning_rate": 1.3302244389027431e-05, "loss": 0.3463, "step": 26870 }, { "epoch": 6.703241895261845, "grad_norm": 7.902821063995361, "learning_rate": 1.3299750623441398e-05, "loss": 0.4256, "step": 26880 }, { "epoch": 6.70573566084788, "grad_norm": 5.451107978820801, "learning_rate": 1.3297256857855362e-05, "loss": 0.3999, "step": 26890 }, { "epoch": 6.708229426433915, "grad_norm": 4.476869106292725, "learning_rate": 1.3294763092269327e-05, "loss": 0.3836, "step": 26900 }, { "epoch": 6.71072319201995, "grad_norm": 6.899260520935059, "learning_rate": 1.3292269326683294e-05, "loss": 0.4067, "step": 26910 }, { "epoch": 6.713216957605985, "grad_norm": 8.896599769592285, "learning_rate": 1.3289775561097258e-05, "loss": 0.4425, "step": 26920 }, { "epoch": 6.71571072319202, "grad_norm": 7.202389717102051, "learning_rate": 1.3287281795511222e-05, "loss": 0.3334, "step": 26930 }, { "epoch": 6.7182044887780545, "grad_norm": 5.247492790222168, "learning_rate": 1.3284788029925189e-05, "loss": 0.4, "step": 26940 }, { "epoch": 6.720698254364089, "grad_norm": 10.521095275878906, "learning_rate": 1.3282294264339152e-05, "loss": 0.3652, "step": 26950 }, { "epoch": 6.723192019950124, "grad_norm": 5.5208611488342285, "learning_rate": 1.327980049875312e-05, "loss": 0.4477, "step": 26960 }, { "epoch": 6.725685785536159, "grad_norm": 8.8683500289917, "learning_rate": 1.3277306733167083e-05, "loss": 0.3449, "step": 26970 }, { "epoch": 6.728179551122195, "grad_norm": 5.689904689788818, "learning_rate": 1.3274812967581048e-05, "loss": 0.4239, "step": 26980 }, { "epoch": 6.73067331670823, "grad_norm": 7.4817328453063965, "learning_rate": 1.3272319201995013e-05, "loss": 0.4688, "step": 26990 }, { "epoch": 6.733167082294265, "grad_norm": 8.645492553710938, "learning_rate": 1.3269825436408979e-05, "loss": 0.415, "step": 27000 }, { "epoch": 6.7356608478802995, "grad_norm": 4.103111743927002, "learning_rate": 1.3267331670822942e-05, "loss": 0.3628, "step": 27010 }, { "epoch": 6.738154613466334, "grad_norm": 6.457451820373535, "learning_rate": 1.326483790523691e-05, "loss": 0.4813, "step": 27020 }, { "epoch": 6.740648379052369, "grad_norm": 11.222722053527832, "learning_rate": 1.3262344139650873e-05, "loss": 0.3365, "step": 27030 }, { "epoch": 6.743142144638404, "grad_norm": 5.118739128112793, "learning_rate": 1.325985037406484e-05, "loss": 0.35, "step": 27040 }, { "epoch": 6.745635910224439, "grad_norm": 8.133965492248535, "learning_rate": 1.3257356608478804e-05, "loss": 0.3907, "step": 27050 }, { "epoch": 6.748129675810474, "grad_norm": 7.104856491088867, "learning_rate": 1.3254862842892769e-05, "loss": 0.4105, "step": 27060 }, { "epoch": 6.750623441396509, "grad_norm": 5.757190227508545, "learning_rate": 1.3252369077306734e-05, "loss": 0.4462, "step": 27070 }, { "epoch": 6.753117206982544, "grad_norm": 8.72276782989502, "learning_rate": 1.32498753117207e-05, "loss": 0.344, "step": 27080 }, { "epoch": 6.7556109725685785, "grad_norm": 3.8614084720611572, "learning_rate": 1.3247381546134663e-05, "loss": 0.2905, "step": 27090 }, { "epoch": 6.758104738154613, "grad_norm": 6.985355377197266, "learning_rate": 1.324488778054863e-05, "loss": 0.3524, "step": 27100 }, { "epoch": 6.760598503740648, "grad_norm": 5.594140529632568, "learning_rate": 1.3242394014962594e-05, "loss": 0.4011, "step": 27110 }, { "epoch": 6.763092269326683, "grad_norm": 7.396782875061035, "learning_rate": 1.323990024937656e-05, "loss": 0.378, "step": 27120 }, { "epoch": 6.765586034912718, "grad_norm": 7.146440505981445, "learning_rate": 1.3237406483790524e-05, "loss": 0.3565, "step": 27130 }, { "epoch": 6.768079800498753, "grad_norm": 4.529737949371338, "learning_rate": 1.323491271820449e-05, "loss": 0.3357, "step": 27140 }, { "epoch": 6.770573566084788, "grad_norm": 5.635150909423828, "learning_rate": 1.3232418952618455e-05, "loss": 0.4166, "step": 27150 }, { "epoch": 6.773067331670823, "grad_norm": 7.09439754486084, "learning_rate": 1.322992518703242e-05, "loss": 0.434, "step": 27160 }, { "epoch": 6.775561097256858, "grad_norm": 7.409408092498779, "learning_rate": 1.3227431421446386e-05, "loss": 0.3876, "step": 27170 }, { "epoch": 6.778054862842893, "grad_norm": 7.326381683349609, "learning_rate": 1.3224937655860351e-05, "loss": 0.3933, "step": 27180 }, { "epoch": 6.780548628428928, "grad_norm": 6.994472980499268, "learning_rate": 1.3222443890274315e-05, "loss": 0.3826, "step": 27190 }, { "epoch": 6.783042394014963, "grad_norm": 6.760201454162598, "learning_rate": 1.3219950124688282e-05, "loss": 0.4412, "step": 27200 }, { "epoch": 6.785536159600998, "grad_norm": 5.957584381103516, "learning_rate": 1.3217456359102245e-05, "loss": 0.3916, "step": 27210 }, { "epoch": 6.788029925187033, "grad_norm": 8.132131576538086, "learning_rate": 1.3214962593516209e-05, "loss": 0.4124, "step": 27220 }, { "epoch": 6.7905236907730675, "grad_norm": 7.923279762268066, "learning_rate": 1.3212468827930176e-05, "loss": 0.3725, "step": 27230 }, { "epoch": 6.793017456359102, "grad_norm": 8.802374839782715, "learning_rate": 1.3209975062344141e-05, "loss": 0.332, "step": 27240 }, { "epoch": 6.795511221945137, "grad_norm": 8.764188766479492, "learning_rate": 1.3207481296758106e-05, "loss": 0.4053, "step": 27250 }, { "epoch": 6.798004987531172, "grad_norm": 7.860824108123779, "learning_rate": 1.3204987531172072e-05, "loss": 0.4145, "step": 27260 }, { "epoch": 6.800498753117207, "grad_norm": 5.447219371795654, "learning_rate": 1.3202493765586035e-05, "loss": 0.3885, "step": 27270 }, { "epoch": 6.802992518703242, "grad_norm": 4.570130348205566, "learning_rate": 1.3200000000000002e-05, "loss": 0.3414, "step": 27280 }, { "epoch": 6.805486284289277, "grad_norm": 5.973016262054443, "learning_rate": 1.3197506234413966e-05, "loss": 0.3436, "step": 27290 }, { "epoch": 6.807980049875312, "grad_norm": 6.451689720153809, "learning_rate": 1.319501246882793e-05, "loss": 0.3842, "step": 27300 }, { "epoch": 6.8104738154613464, "grad_norm": 6.162374496459961, "learning_rate": 1.3192518703241897e-05, "loss": 0.386, "step": 27310 }, { "epoch": 6.812967581047381, "grad_norm": 7.707452774047852, "learning_rate": 1.319002493765586e-05, "loss": 0.4001, "step": 27320 }, { "epoch": 6.815461346633416, "grad_norm": 5.568814754486084, "learning_rate": 1.3187531172069827e-05, "loss": 0.4183, "step": 27330 }, { "epoch": 6.817955112219451, "grad_norm": 6.596290588378906, "learning_rate": 1.318503740648379e-05, "loss": 0.4037, "step": 27340 }, { "epoch": 6.820448877805486, "grad_norm": 6.735906600952148, "learning_rate": 1.3182543640897756e-05, "loss": 0.3821, "step": 27350 }, { "epoch": 6.822942643391521, "grad_norm": 5.793524265289307, "learning_rate": 1.3180049875311723e-05, "loss": 0.3668, "step": 27360 }, { "epoch": 6.825436408977556, "grad_norm": 5.449307918548584, "learning_rate": 1.3177556109725687e-05, "loss": 0.3029, "step": 27370 }, { "epoch": 6.8279301745635905, "grad_norm": 7.30305814743042, "learning_rate": 1.3175062344139654e-05, "loss": 0.3419, "step": 27380 }, { "epoch": 6.830423940149626, "grad_norm": 6.304130554199219, "learning_rate": 1.3172568578553617e-05, "loss": 0.3157, "step": 27390 }, { "epoch": 6.832917705735661, "grad_norm": 4.693641185760498, "learning_rate": 1.3170074812967581e-05, "loss": 0.3667, "step": 27400 }, { "epoch": 6.835411471321696, "grad_norm": 13.741803169250488, "learning_rate": 1.3167581047381548e-05, "loss": 0.3824, "step": 27410 }, { "epoch": 6.837905236907731, "grad_norm": 5.390978813171387, "learning_rate": 1.3165087281795512e-05, "loss": 0.3571, "step": 27420 }, { "epoch": 6.840399002493766, "grad_norm": 8.878130912780762, "learning_rate": 1.3162593516209477e-05, "loss": 0.3623, "step": 27430 }, { "epoch": 6.842892768079801, "grad_norm": 9.977058410644531, "learning_rate": 1.3160099750623442e-05, "loss": 0.3624, "step": 27440 }, { "epoch": 6.8453865336658355, "grad_norm": 4.6882452964782715, "learning_rate": 1.3157605985037408e-05, "loss": 0.3141, "step": 27450 }, { "epoch": 6.84788029925187, "grad_norm": 14.156923294067383, "learning_rate": 1.3155112219451373e-05, "loss": 0.4383, "step": 27460 }, { "epoch": 6.850374064837905, "grad_norm": 5.467523097991943, "learning_rate": 1.3152618453865338e-05, "loss": 0.367, "step": 27470 }, { "epoch": 6.85286783042394, "grad_norm": 5.007584571838379, "learning_rate": 1.3150124688279302e-05, "loss": 0.3777, "step": 27480 }, { "epoch": 6.855361596009975, "grad_norm": 9.654122352600098, "learning_rate": 1.3147630922693269e-05, "loss": 0.3868, "step": 27490 }, { "epoch": 6.85785536159601, "grad_norm": 8.467918395996094, "learning_rate": 1.3145137157107232e-05, "loss": 0.3602, "step": 27500 }, { "epoch": 6.860349127182045, "grad_norm": 8.106925010681152, "learning_rate": 1.3142643391521198e-05, "loss": 0.3881, "step": 27510 }, { "epoch": 6.86284289276808, "grad_norm": 4.681951999664307, "learning_rate": 1.3140149625935163e-05, "loss": 0.4089, "step": 27520 }, { "epoch": 6.865336658354114, "grad_norm": 10.276632308959961, "learning_rate": 1.3137655860349128e-05, "loss": 0.4051, "step": 27530 }, { "epoch": 6.867830423940149, "grad_norm": 6.851044178009033, "learning_rate": 1.3135162094763094e-05, "loss": 0.3344, "step": 27540 }, { "epoch": 6.870324189526184, "grad_norm": 8.850982666015625, "learning_rate": 1.3132668329177059e-05, "loss": 0.4303, "step": 27550 }, { "epoch": 6.87281795511222, "grad_norm": 6.095160007476807, "learning_rate": 1.3130174563591023e-05, "loss": 0.4008, "step": 27560 }, { "epoch": 6.875311720698255, "grad_norm": 4.623822212219238, "learning_rate": 1.312768079800499e-05, "loss": 0.4192, "step": 27570 }, { "epoch": 6.87780548628429, "grad_norm": 4.292810440063477, "learning_rate": 1.3125187032418953e-05, "loss": 0.3421, "step": 27580 }, { "epoch": 6.8802992518703245, "grad_norm": 5.254152774810791, "learning_rate": 1.312269326683292e-05, "loss": 0.4478, "step": 27590 }, { "epoch": 6.882793017456359, "grad_norm": 10.666980743408203, "learning_rate": 1.3120199501246884e-05, "loss": 0.347, "step": 27600 }, { "epoch": 6.885286783042394, "grad_norm": 5.066367149353027, "learning_rate": 1.3117705735660849e-05, "loss": 0.3701, "step": 27610 }, { "epoch": 6.887780548628429, "grad_norm": 7.213781356811523, "learning_rate": 1.3115211970074814e-05, "loss": 0.4525, "step": 27620 }, { "epoch": 6.890274314214464, "grad_norm": 8.0278959274292, "learning_rate": 1.311271820448878e-05, "loss": 0.4072, "step": 27630 }, { "epoch": 6.892768079800499, "grad_norm": 4.485868453979492, "learning_rate": 1.3110224438902743e-05, "loss": 0.3232, "step": 27640 }, { "epoch": 6.895261845386534, "grad_norm": 7.174385070800781, "learning_rate": 1.310773067331671e-05, "loss": 0.3655, "step": 27650 }, { "epoch": 6.897755610972569, "grad_norm": 3.9336626529693604, "learning_rate": 1.3105236907730674e-05, "loss": 0.3454, "step": 27660 }, { "epoch": 6.9002493765586035, "grad_norm": 6.965961933135986, "learning_rate": 1.3102743142144641e-05, "loss": 0.37, "step": 27670 }, { "epoch": 6.902743142144638, "grad_norm": 6.3173675537109375, "learning_rate": 1.3100249376558605e-05, "loss": 0.3578, "step": 27680 }, { "epoch": 6.905236907730673, "grad_norm": 5.358870983123779, "learning_rate": 1.3097755610972568e-05, "loss": 0.399, "step": 27690 }, { "epoch": 6.907730673316708, "grad_norm": 7.68316125869751, "learning_rate": 1.3095261845386535e-05, "loss": 0.409, "step": 27700 }, { "epoch": 6.910224438902743, "grad_norm": 7.902895927429199, "learning_rate": 1.30927680798005e-05, "loss": 0.3588, "step": 27710 }, { "epoch": 6.912718204488778, "grad_norm": 5.715063571929932, "learning_rate": 1.3090274314214464e-05, "loss": 0.3029, "step": 27720 }, { "epoch": 6.915211970074813, "grad_norm": 6.375357627868652, "learning_rate": 1.3087780548628431e-05, "loss": 0.3668, "step": 27730 }, { "epoch": 6.917705735660848, "grad_norm": 8.482056617736816, "learning_rate": 1.3085286783042395e-05, "loss": 0.3974, "step": 27740 }, { "epoch": 6.920199501246882, "grad_norm": 7.30125617980957, "learning_rate": 1.3082793017456362e-05, "loss": 0.368, "step": 27750 }, { "epoch": 6.922693266832917, "grad_norm": 8.676459312438965, "learning_rate": 1.3080299251870325e-05, "loss": 0.4905, "step": 27760 }, { "epoch": 6.925187032418952, "grad_norm": 6.748859405517578, "learning_rate": 1.3077805486284289e-05, "loss": 0.4028, "step": 27770 }, { "epoch": 6.927680798004987, "grad_norm": 5.195030212402344, "learning_rate": 1.3075311720698256e-05, "loss": 0.3717, "step": 27780 }, { "epoch": 6.930174563591023, "grad_norm": 7.407238960266113, "learning_rate": 1.307281795511222e-05, "loss": 0.3701, "step": 27790 }, { "epoch": 6.932668329177058, "grad_norm": 9.330326080322266, "learning_rate": 1.3070324189526185e-05, "loss": 0.3282, "step": 27800 }, { "epoch": 6.9351620947630925, "grad_norm": 6.410877704620361, "learning_rate": 1.306783042394015e-05, "loss": 0.4224, "step": 27810 }, { "epoch": 6.937655860349127, "grad_norm": 8.255925178527832, "learning_rate": 1.3065336658354116e-05, "loss": 0.4419, "step": 27820 }, { "epoch": 6.940149625935162, "grad_norm": 5.41157341003418, "learning_rate": 1.3062842892768083e-05, "loss": 0.3181, "step": 27830 }, { "epoch": 6.942643391521197, "grad_norm": 6.762349605560303, "learning_rate": 1.3060349127182046e-05, "loss": 0.2942, "step": 27840 }, { "epoch": 6.945137157107232, "grad_norm": 6.1585588455200195, "learning_rate": 1.305785536159601e-05, "loss": 0.3983, "step": 27850 }, { "epoch": 6.947630922693267, "grad_norm": 6.742948532104492, "learning_rate": 1.3055361596009977e-05, "loss": 0.3911, "step": 27860 }, { "epoch": 6.950124688279302, "grad_norm": 9.091787338256836, "learning_rate": 1.305286783042394e-05, "loss": 0.3564, "step": 27870 }, { "epoch": 6.952618453865337, "grad_norm": 8.21529769897461, "learning_rate": 1.3050374064837907e-05, "loss": 0.3723, "step": 27880 }, { "epoch": 6.9551122194513715, "grad_norm": 5.098814487457275, "learning_rate": 1.3047880299251871e-05, "loss": 0.2938, "step": 27890 }, { "epoch": 6.957605985037406, "grad_norm": 7.9842000007629395, "learning_rate": 1.3045386533665836e-05, "loss": 0.3766, "step": 27900 }, { "epoch": 6.960099750623441, "grad_norm": 6.199515342712402, "learning_rate": 1.3042892768079802e-05, "loss": 0.4068, "step": 27910 }, { "epoch": 6.962593516209476, "grad_norm": 5.990458011627197, "learning_rate": 1.3040399002493767e-05, "loss": 0.3514, "step": 27920 }, { "epoch": 6.965087281795511, "grad_norm": 6.094311714172363, "learning_rate": 1.303790523690773e-05, "loss": 0.3625, "step": 27930 }, { "epoch": 6.967581047381546, "grad_norm": 6.9624481201171875, "learning_rate": 1.3035411471321698e-05, "loss": 0.4484, "step": 27940 }, { "epoch": 6.970074812967581, "grad_norm": 6.599002838134766, "learning_rate": 1.3032917705735661e-05, "loss": 0.4132, "step": 27950 }, { "epoch": 6.9725685785536164, "grad_norm": 5.155543327331543, "learning_rate": 1.3030423940149628e-05, "loss": 0.3469, "step": 27960 }, { "epoch": 6.975062344139651, "grad_norm": 6.06123685836792, "learning_rate": 1.3027930174563592e-05, "loss": 0.4548, "step": 27970 }, { "epoch": 6.977556109725686, "grad_norm": 8.714743614196777, "learning_rate": 1.3025436408977557e-05, "loss": 0.4578, "step": 27980 }, { "epoch": 6.980049875311721, "grad_norm": 5.861020565032959, "learning_rate": 1.3023192019950125e-05, "loss": 0.337, "step": 27990 }, { "epoch": 6.982543640897756, "grad_norm": 6.951167106628418, "learning_rate": 1.302069825436409e-05, "loss": 0.3824, "step": 28000 }, { "epoch": 6.985037406483791, "grad_norm": 3.6698153018951416, "learning_rate": 1.3018204488778056e-05, "loss": 0.3376, "step": 28010 }, { "epoch": 6.987531172069826, "grad_norm": 3.0680627822875977, "learning_rate": 1.3015710723192021e-05, "loss": 0.3163, "step": 28020 }, { "epoch": 6.9900249376558605, "grad_norm": 11.675896644592285, "learning_rate": 1.3013216957605985e-05, "loss": 0.3303, "step": 28030 }, { "epoch": 6.992518703241895, "grad_norm": 4.275930404663086, "learning_rate": 1.3010723192019952e-05, "loss": 0.3455, "step": 28040 }, { "epoch": 6.99501246882793, "grad_norm": 7.872333526611328, "learning_rate": 1.3008229426433916e-05, "loss": 0.3802, "step": 28050 }, { "epoch": 6.997506234413965, "grad_norm": 8.095891952514648, "learning_rate": 1.3005735660847883e-05, "loss": 0.4079, "step": 28060 }, { "epoch": 7.0, "grad_norm": 8.841901779174805, "learning_rate": 1.3003241895261846e-05, "loss": 0.4457, "step": 28070 }, { "epoch": 7.0, "eval_loss": 0.4166116714477539, "eval_runtime": 59.9481, "eval_samples_per_second": 16.731, "eval_steps_per_second": 16.731, "step": 28070 }, { "epoch": 7.002493765586035, "grad_norm": 10.384051322937012, "learning_rate": 1.3000748129675811e-05, "loss": 0.3682, "step": 28080 }, { "epoch": 7.00498753117207, "grad_norm": 5.374267578125, "learning_rate": 1.2998254364089777e-05, "loss": 0.4221, "step": 28090 }, { "epoch": 7.007481296758105, "grad_norm": 11.389906883239746, "learning_rate": 1.2995760598503742e-05, "loss": 0.3828, "step": 28100 }, { "epoch": 7.0099750623441395, "grad_norm": 7.218733787536621, "learning_rate": 1.2993266832917706e-05, "loss": 0.3267, "step": 28110 }, { "epoch": 7.012468827930174, "grad_norm": 5.1964263916015625, "learning_rate": 1.2990773067331673e-05, "loss": 0.3566, "step": 28120 }, { "epoch": 7.014962593516209, "grad_norm": 9.020042419433594, "learning_rate": 1.2988279301745636e-05, "loss": 0.346, "step": 28130 }, { "epoch": 7.017456359102244, "grad_norm": 13.949241638183594, "learning_rate": 1.2985785536159603e-05, "loss": 0.31, "step": 28140 }, { "epoch": 7.019950124688279, "grad_norm": 6.440070152282715, "learning_rate": 1.2983291770573567e-05, "loss": 0.3707, "step": 28150 }, { "epoch": 7.022443890274314, "grad_norm": 6.926358222961426, "learning_rate": 1.2980798004987532e-05, "loss": 0.3829, "step": 28160 }, { "epoch": 7.024937655860349, "grad_norm": 6.67525053024292, "learning_rate": 1.2978304239401498e-05, "loss": 0.3468, "step": 28170 }, { "epoch": 7.027431421446384, "grad_norm": 6.060240745544434, "learning_rate": 1.2976059850374066e-05, "loss": 0.3461, "step": 28180 }, { "epoch": 7.029925187032419, "grad_norm": 9.439708709716797, "learning_rate": 1.2973566084788031e-05, "loss": 0.4009, "step": 28190 }, { "epoch": 7.032418952618454, "grad_norm": 3.421870708465576, "learning_rate": 1.2971072319201996e-05, "loss": 0.3896, "step": 28200 }, { "epoch": 7.034912718204489, "grad_norm": 5.836117744445801, "learning_rate": 1.296857855361596e-05, "loss": 0.3461, "step": 28210 }, { "epoch": 7.037406483790524, "grad_norm": 7.276211738586426, "learning_rate": 1.2966084788029927e-05, "loss": 0.3559, "step": 28220 }, { "epoch": 7.039900249376559, "grad_norm": 5.882770538330078, "learning_rate": 1.296359102244389e-05, "loss": 0.294, "step": 28230 }, { "epoch": 7.042394014962594, "grad_norm": 4.949868679046631, "learning_rate": 1.2961097256857858e-05, "loss": 0.3668, "step": 28240 }, { "epoch": 7.0448877805486285, "grad_norm": 5.131392955780029, "learning_rate": 1.2958603491271821e-05, "loss": 0.3404, "step": 28250 }, { "epoch": 7.047381546134663, "grad_norm": 6.9679856300354, "learning_rate": 1.2956109725685787e-05, "loss": 0.4193, "step": 28260 }, { "epoch": 7.049875311720698, "grad_norm": 6.4561767578125, "learning_rate": 1.2953615960099752e-05, "loss": 0.3525, "step": 28270 }, { "epoch": 7.052369077306733, "grad_norm": 7.776483058929443, "learning_rate": 1.2951122194513717e-05, "loss": 0.3326, "step": 28280 }, { "epoch": 7.054862842892768, "grad_norm": 5.461517810821533, "learning_rate": 1.2948628428927681e-05, "loss": 0.3554, "step": 28290 }, { "epoch": 7.057356608478803, "grad_norm": 7.675981521606445, "learning_rate": 1.2946134663341648e-05, "loss": 0.4105, "step": 28300 }, { "epoch": 7.059850374064838, "grad_norm": 3.51208233833313, "learning_rate": 1.2943640897755611e-05, "loss": 0.3293, "step": 28310 }, { "epoch": 7.062344139650873, "grad_norm": 5.394303321838379, "learning_rate": 1.2941147132169578e-05, "loss": 0.4108, "step": 28320 }, { "epoch": 7.0648379052369075, "grad_norm": 5.80884313583374, "learning_rate": 1.2938653366583542e-05, "loss": 0.2895, "step": 28330 }, { "epoch": 7.067331670822942, "grad_norm": 11.093313217163086, "learning_rate": 1.2936159600997507e-05, "loss": 0.3812, "step": 28340 }, { "epoch": 7.069825436408977, "grad_norm": 8.297402381896973, "learning_rate": 1.2933665835411473e-05, "loss": 0.4159, "step": 28350 }, { "epoch": 7.072319201995012, "grad_norm": 8.410477638244629, "learning_rate": 1.2931172069825438e-05, "loss": 0.3703, "step": 28360 }, { "epoch": 7.074812967581048, "grad_norm": 8.238855361938477, "learning_rate": 1.2928678304239402e-05, "loss": 0.4046, "step": 28370 }, { "epoch": 7.077306733167083, "grad_norm": 9.850193977355957, "learning_rate": 1.2926184538653369e-05, "loss": 0.4449, "step": 28380 }, { "epoch": 7.079800498753118, "grad_norm": 4.337007999420166, "learning_rate": 1.2923690773067332e-05, "loss": 0.3567, "step": 28390 }, { "epoch": 7.082294264339152, "grad_norm": 7.723695278167725, "learning_rate": 1.29211970074813e-05, "loss": 0.3221, "step": 28400 }, { "epoch": 7.084788029925187, "grad_norm": 3.753911256790161, "learning_rate": 1.2918703241895263e-05, "loss": 0.3524, "step": 28410 }, { "epoch": 7.087281795511222, "grad_norm": 6.445272922515869, "learning_rate": 1.2916209476309226e-05, "loss": 0.3343, "step": 28420 }, { "epoch": 7.089775561097257, "grad_norm": 7.809503078460693, "learning_rate": 1.2913715710723193e-05, "loss": 0.3879, "step": 28430 }, { "epoch": 7.092269326683292, "grad_norm": 8.968701362609863, "learning_rate": 1.2911221945137157e-05, "loss": 0.4012, "step": 28440 }, { "epoch": 7.094763092269327, "grad_norm": 8.712312698364258, "learning_rate": 1.2908728179551124e-05, "loss": 0.514, "step": 28450 }, { "epoch": 7.097256857855362, "grad_norm": 7.140172481536865, "learning_rate": 1.290623441396509e-05, "loss": 0.3044, "step": 28460 }, { "epoch": 7.0997506234413965, "grad_norm": 11.479339599609375, "learning_rate": 1.2903740648379053e-05, "loss": 0.3965, "step": 28470 }, { "epoch": 7.102244389027431, "grad_norm": 7.283442974090576, "learning_rate": 1.290124688279302e-05, "loss": 0.3227, "step": 28480 }, { "epoch": 7.104738154613466, "grad_norm": 8.901700019836426, "learning_rate": 1.2898753117206984e-05, "loss": 0.3847, "step": 28490 }, { "epoch": 7.107231920199501, "grad_norm": 5.670577049255371, "learning_rate": 1.2896259351620947e-05, "loss": 0.4015, "step": 28500 }, { "epoch": 7.109725685785536, "grad_norm": 5.779824733734131, "learning_rate": 1.2893765586034914e-05, "loss": 0.2875, "step": 28510 }, { "epoch": 7.112219451371571, "grad_norm": 7.593159198760986, "learning_rate": 1.2891271820448878e-05, "loss": 0.3917, "step": 28520 }, { "epoch": 7.114713216957606, "grad_norm": 5.073284149169922, "learning_rate": 1.2888778054862845e-05, "loss": 0.2831, "step": 28530 }, { "epoch": 7.117206982543641, "grad_norm": 7.407405376434326, "learning_rate": 1.2886284289276809e-05, "loss": 0.3676, "step": 28540 }, { "epoch": 7.1197007481296755, "grad_norm": 6.4434638023376465, "learning_rate": 1.2883790523690774e-05, "loss": 0.363, "step": 28550 }, { "epoch": 7.12219451371571, "grad_norm": 4.871036052703857, "learning_rate": 1.2881296758104739e-05, "loss": 0.3373, "step": 28560 }, { "epoch": 7.124688279301745, "grad_norm": 5.883360862731934, "learning_rate": 1.2878802992518704e-05, "loss": 0.3403, "step": 28570 }, { "epoch": 7.127182044887781, "grad_norm": 7.858293533325195, "learning_rate": 1.2876309226932668e-05, "loss": 0.3905, "step": 28580 }, { "epoch": 7.129675810473816, "grad_norm": 6.678670883178711, "learning_rate": 1.2873815461346635e-05, "loss": 0.3882, "step": 28590 }, { "epoch": 7.132169576059851, "grad_norm": 8.354759216308594, "learning_rate": 1.2871321695760599e-05, "loss": 0.389, "step": 28600 }, { "epoch": 7.134663341645886, "grad_norm": 8.125460624694824, "learning_rate": 1.2868827930174566e-05, "loss": 0.3412, "step": 28610 }, { "epoch": 7.13715710723192, "grad_norm": 6.524233818054199, "learning_rate": 1.286633416458853e-05, "loss": 0.4105, "step": 28620 }, { "epoch": 7.139650872817955, "grad_norm": 9.085260391235352, "learning_rate": 1.2863840399002495e-05, "loss": 0.3852, "step": 28630 }, { "epoch": 7.14214463840399, "grad_norm": 6.906386852264404, "learning_rate": 1.286134663341646e-05, "loss": 0.3602, "step": 28640 }, { "epoch": 7.144638403990025, "grad_norm": 11.312614440917969, "learning_rate": 1.2858852867830425e-05, "loss": 0.4129, "step": 28650 }, { "epoch": 7.14713216957606, "grad_norm": 5.9242353439331055, "learning_rate": 1.2856359102244389e-05, "loss": 0.3578, "step": 28660 }, { "epoch": 7.149625935162095, "grad_norm": 8.224637031555176, "learning_rate": 1.2853865336658356e-05, "loss": 0.3501, "step": 28670 }, { "epoch": 7.15211970074813, "grad_norm": 5.648616313934326, "learning_rate": 1.285137157107232e-05, "loss": 0.385, "step": 28680 }, { "epoch": 7.1546134663341645, "grad_norm": 5.693350791931152, "learning_rate": 1.2848877805486286e-05, "loss": 0.4725, "step": 28690 }, { "epoch": 7.157107231920199, "grad_norm": 5.901285171508789, "learning_rate": 1.284638403990025e-05, "loss": 0.3856, "step": 28700 }, { "epoch": 7.159600997506234, "grad_norm": 8.282814979553223, "learning_rate": 1.2843890274314215e-05, "loss": 0.3343, "step": 28710 }, { "epoch": 7.162094763092269, "grad_norm": 6.745294094085693, "learning_rate": 1.284139650872818e-05, "loss": 0.3845, "step": 28720 }, { "epoch": 7.164588528678304, "grad_norm": 5.898577690124512, "learning_rate": 1.2838902743142146e-05, "loss": 0.3966, "step": 28730 }, { "epoch": 7.167082294264339, "grad_norm": 5.124606132507324, "learning_rate": 1.2836408977556111e-05, "loss": 0.4228, "step": 28740 }, { "epoch": 7.169576059850374, "grad_norm": 5.206420421600342, "learning_rate": 1.2833915211970077e-05, "loss": 0.3557, "step": 28750 }, { "epoch": 7.172069825436409, "grad_norm": 8.561698913574219, "learning_rate": 1.283142144638404e-05, "loss": 0.3579, "step": 28760 }, { "epoch": 7.174563591022444, "grad_norm": 6.212121486663818, "learning_rate": 1.2828927680798007e-05, "loss": 0.3384, "step": 28770 }, { "epoch": 7.177057356608479, "grad_norm": 7.147701263427734, "learning_rate": 1.2826433915211971e-05, "loss": 0.3883, "step": 28780 }, { "epoch": 7.179551122194514, "grad_norm": 6.2454023361206055, "learning_rate": 1.2823940149625934e-05, "loss": 0.4135, "step": 28790 }, { "epoch": 7.182044887780549, "grad_norm": 9.466367721557617, "learning_rate": 1.2821446384039901e-05, "loss": 0.4502, "step": 28800 }, { "epoch": 7.184538653366584, "grad_norm": 8.00047779083252, "learning_rate": 1.2818952618453867e-05, "loss": 0.4377, "step": 28810 }, { "epoch": 7.187032418952619, "grad_norm": 7.6417951583862305, "learning_rate": 1.2816458852867832e-05, "loss": 0.363, "step": 28820 }, { "epoch": 7.1895261845386536, "grad_norm": 4.634132385253906, "learning_rate": 1.2813965087281797e-05, "loss": 0.3622, "step": 28830 }, { "epoch": 7.192019950124688, "grad_norm": 7.929607391357422, "learning_rate": 1.2811471321695761e-05, "loss": 0.3553, "step": 28840 }, { "epoch": 7.194513715710723, "grad_norm": 6.655643463134766, "learning_rate": 1.2808977556109728e-05, "loss": 0.3342, "step": 28850 }, { "epoch": 7.197007481296758, "grad_norm": 7.349606037139893, "learning_rate": 1.2806483790523692e-05, "loss": 0.3685, "step": 28860 }, { "epoch": 7.199501246882793, "grad_norm": 8.308873176574707, "learning_rate": 1.2803990024937655e-05, "loss": 0.3837, "step": 28870 }, { "epoch": 7.201995012468828, "grad_norm": 9.069894790649414, "learning_rate": 1.2801496259351622e-05, "loss": 0.3609, "step": 28880 }, { "epoch": 7.204488778054863, "grad_norm": 10.108824729919434, "learning_rate": 1.2799002493765586e-05, "loss": 0.4051, "step": 28890 }, { "epoch": 7.206982543640898, "grad_norm": 9.224723815917969, "learning_rate": 1.2796508728179553e-05, "loss": 0.4147, "step": 28900 }, { "epoch": 7.2094763092269325, "grad_norm": 5.563706398010254, "learning_rate": 1.2794014962593517e-05, "loss": 0.3391, "step": 28910 }, { "epoch": 7.211970074812967, "grad_norm": 6.96822452545166, "learning_rate": 1.2791521197007482e-05, "loss": 0.3146, "step": 28920 }, { "epoch": 7.214463840399002, "grad_norm": 8.976058006286621, "learning_rate": 1.2789027431421449e-05, "loss": 0.3729, "step": 28930 }, { "epoch": 7.216957605985037, "grad_norm": 6.567900657653809, "learning_rate": 1.2786533665835412e-05, "loss": 0.3472, "step": 28940 }, { "epoch": 7.219451371571072, "grad_norm": 6.142775535583496, "learning_rate": 1.278403990024938e-05, "loss": 0.3296, "step": 28950 }, { "epoch": 7.221945137157107, "grad_norm": 6.999728202819824, "learning_rate": 1.2781546134663343e-05, "loss": 0.3493, "step": 28960 }, { "epoch": 7.224438902743142, "grad_norm": 6.844429969787598, "learning_rate": 1.2779052369077307e-05, "loss": 0.3389, "step": 28970 }, { "epoch": 7.2269326683291775, "grad_norm": 8.233739852905273, "learning_rate": 1.2776558603491274e-05, "loss": 0.3728, "step": 28980 }, { "epoch": 7.229426433915212, "grad_norm": 6.4842848777771, "learning_rate": 1.2774064837905237e-05, "loss": 0.4287, "step": 28990 }, { "epoch": 7.231920199501247, "grad_norm": 7.557778835296631, "learning_rate": 1.2771571072319203e-05, "loss": 0.3734, "step": 29000 }, { "epoch": 7.234413965087282, "grad_norm": 6.296581268310547, "learning_rate": 1.2769077306733168e-05, "loss": 0.3677, "step": 29010 }, { "epoch": 7.236907730673317, "grad_norm": 7.28203821182251, "learning_rate": 1.2766583541147133e-05, "loss": 0.3798, "step": 29020 }, { "epoch": 7.239401496259352, "grad_norm": 5.902282238006592, "learning_rate": 1.2764089775561099e-05, "loss": 0.3399, "step": 29030 }, { "epoch": 7.241895261845387, "grad_norm": 6.5836310386657715, "learning_rate": 1.2761596009975064e-05, "loss": 0.406, "step": 29040 }, { "epoch": 7.2443890274314215, "grad_norm": 8.319708824157715, "learning_rate": 1.2759102244389027e-05, "loss": 0.3888, "step": 29050 }, { "epoch": 7.246882793017456, "grad_norm": 8.096579551696777, "learning_rate": 1.2756608478802994e-05, "loss": 0.3393, "step": 29060 }, { "epoch": 7.249376558603491, "grad_norm": 6.496395587921143, "learning_rate": 1.2754114713216958e-05, "loss": 0.3642, "step": 29070 }, { "epoch": 7.251870324189526, "grad_norm": 7.759038925170898, "learning_rate": 1.2751620947630923e-05, "loss": 0.3334, "step": 29080 }, { "epoch": 7.254364089775561, "grad_norm": 10.256757736206055, "learning_rate": 1.2749127182044889e-05, "loss": 0.3924, "step": 29090 }, { "epoch": 7.256857855361596, "grad_norm": 10.726069450378418, "learning_rate": 1.2746633416458854e-05, "loss": 0.4015, "step": 29100 }, { "epoch": 7.259351620947631, "grad_norm": 8.847235679626465, "learning_rate": 1.274413965087282e-05, "loss": 0.4174, "step": 29110 }, { "epoch": 7.261845386533666, "grad_norm": 7.851325035095215, "learning_rate": 1.2741645885286785e-05, "loss": 0.3122, "step": 29120 }, { "epoch": 7.2643391521197005, "grad_norm": 6.145222187042236, "learning_rate": 1.2739152119700748e-05, "loss": 0.3972, "step": 29130 }, { "epoch": 7.266832917705735, "grad_norm": 7.113053798675537, "learning_rate": 1.2736658354114715e-05, "loss": 0.4018, "step": 29140 }, { "epoch": 7.26932668329177, "grad_norm": 10.120803833007812, "learning_rate": 1.2734164588528679e-05, "loss": 0.3977, "step": 29150 }, { "epoch": 7.271820448877805, "grad_norm": 5.162845134735107, "learning_rate": 1.2731670822942644e-05, "loss": 0.3468, "step": 29160 }, { "epoch": 7.274314214463841, "grad_norm": 9.55453109741211, "learning_rate": 1.272917705735661e-05, "loss": 0.3615, "step": 29170 }, { "epoch": 7.276807980049876, "grad_norm": 7.122071266174316, "learning_rate": 1.2726683291770575e-05, "loss": 0.3974, "step": 29180 }, { "epoch": 7.279301745635911, "grad_norm": 6.449893474578857, "learning_rate": 1.272418952618454e-05, "loss": 0.3763, "step": 29190 }, { "epoch": 7.2817955112219455, "grad_norm": 12.209590911865234, "learning_rate": 1.2721695760598505e-05, "loss": 0.32, "step": 29200 }, { "epoch": 7.28428927680798, "grad_norm": 7.169990062713623, "learning_rate": 1.2719201995012469e-05, "loss": 0.3413, "step": 29210 }, { "epoch": 7.286783042394015, "grad_norm": 8.662860870361328, "learning_rate": 1.2716708229426436e-05, "loss": 0.3932, "step": 29220 }, { "epoch": 7.28927680798005, "grad_norm": 6.312235355377197, "learning_rate": 1.27142144638404e-05, "loss": 0.3372, "step": 29230 }, { "epoch": 7.291770573566085, "grad_norm": 5.396263599395752, "learning_rate": 1.2711720698254367e-05, "loss": 0.2894, "step": 29240 }, { "epoch": 7.29426433915212, "grad_norm": 7.881542205810547, "learning_rate": 1.270922693266833e-05, "loss": 0.4439, "step": 29250 }, { "epoch": 7.296758104738155, "grad_norm": 6.539719104766846, "learning_rate": 1.2706733167082294e-05, "loss": 0.3688, "step": 29260 }, { "epoch": 7.2992518703241895, "grad_norm": 9.538723945617676, "learning_rate": 1.2704239401496261e-05, "loss": 0.3785, "step": 29270 }, { "epoch": 7.301745635910224, "grad_norm": 8.049639701843262, "learning_rate": 1.2701745635910226e-05, "loss": 0.3698, "step": 29280 }, { "epoch": 7.304239401496259, "grad_norm": 11.048667907714844, "learning_rate": 1.269925187032419e-05, "loss": 0.477, "step": 29290 }, { "epoch": 7.306733167082294, "grad_norm": 4.723964214324951, "learning_rate": 1.2696758104738157e-05, "loss": 0.278, "step": 29300 }, { "epoch": 7.309226932668329, "grad_norm": 8.363397598266602, "learning_rate": 1.269426433915212e-05, "loss": 0.3963, "step": 29310 }, { "epoch": 7.311720698254364, "grad_norm": 5.296679973602295, "learning_rate": 1.2691770573566087e-05, "loss": 0.3419, "step": 29320 }, { "epoch": 7.314214463840399, "grad_norm": 4.19307804107666, "learning_rate": 1.2689276807980051e-05, "loss": 0.3923, "step": 29330 }, { "epoch": 7.316708229426434, "grad_norm": 4.72712516784668, "learning_rate": 1.2686783042394015e-05, "loss": 0.3854, "step": 29340 }, { "epoch": 7.3192019950124685, "grad_norm": 7.943241119384766, "learning_rate": 1.2684289276807982e-05, "loss": 0.4142, "step": 29350 }, { "epoch": 7.321695760598503, "grad_norm": 7.628240585327148, "learning_rate": 1.2681795511221945e-05, "loss": 0.3044, "step": 29360 }, { "epoch": 7.324189526184538, "grad_norm": 6.917629718780518, "learning_rate": 1.267930174563591e-05, "loss": 0.3286, "step": 29370 }, { "epoch": 7.326683291770574, "grad_norm": 5.416843891143799, "learning_rate": 1.2676807980049876e-05, "loss": 0.442, "step": 29380 }, { "epoch": 7.329177057356609, "grad_norm": 4.867097854614258, "learning_rate": 1.2674314214463841e-05, "loss": 0.3525, "step": 29390 }, { "epoch": 7.331670822942644, "grad_norm": 8.583399772644043, "learning_rate": 1.2671820448877808e-05, "loss": 0.3817, "step": 29400 }, { "epoch": 7.334164588528679, "grad_norm": 7.867173194885254, "learning_rate": 1.2669326683291772e-05, "loss": 0.3231, "step": 29410 }, { "epoch": 7.3366583541147135, "grad_norm": 6.9553022384643555, "learning_rate": 1.2666832917705735e-05, "loss": 0.321, "step": 29420 }, { "epoch": 7.339152119700748, "grad_norm": 6.242718696594238, "learning_rate": 1.2664339152119702e-05, "loss": 0.365, "step": 29430 }, { "epoch": 7.341645885286783, "grad_norm": 8.64468765258789, "learning_rate": 1.2661845386533666e-05, "loss": 0.3114, "step": 29440 }, { "epoch": 7.344139650872818, "grad_norm": 6.854546546936035, "learning_rate": 1.2659351620947633e-05, "loss": 0.3563, "step": 29450 }, { "epoch": 7.346633416458853, "grad_norm": 10.145390510559082, "learning_rate": 1.2656857855361597e-05, "loss": 0.3541, "step": 29460 }, { "epoch": 7.349127182044888, "grad_norm": 5.386590480804443, "learning_rate": 1.2654364089775562e-05, "loss": 0.3673, "step": 29470 }, { "epoch": 7.351620947630923, "grad_norm": 5.273983955383301, "learning_rate": 1.2651870324189527e-05, "loss": 0.2825, "step": 29480 }, { "epoch": 7.3541147132169575, "grad_norm": 4.6638689041137695, "learning_rate": 1.2649376558603493e-05, "loss": 0.4178, "step": 29490 }, { "epoch": 7.356608478802992, "grad_norm": 5.493322372436523, "learning_rate": 1.2646882793017456e-05, "loss": 0.3265, "step": 29500 }, { "epoch": 7.359102244389027, "grad_norm": 6.716048240661621, "learning_rate": 1.2644389027431423e-05, "loss": 0.3692, "step": 29510 }, { "epoch": 7.361596009975062, "grad_norm": 6.258030891418457, "learning_rate": 1.2641895261845387e-05, "loss": 0.426, "step": 29520 }, { "epoch": 7.364089775561097, "grad_norm": 5.933165550231934, "learning_rate": 1.2639401496259354e-05, "loss": 0.3618, "step": 29530 }, { "epoch": 7.366583541147132, "grad_norm": 5.566700458526611, "learning_rate": 1.2636907730673317e-05, "loss": 0.4001, "step": 29540 }, { "epoch": 7.369077306733167, "grad_norm": 7.390594959259033, "learning_rate": 1.2634413965087283e-05, "loss": 0.3826, "step": 29550 }, { "epoch": 7.371571072319202, "grad_norm": 4.834929466247559, "learning_rate": 1.2631920199501248e-05, "loss": 0.3512, "step": 29560 }, { "epoch": 7.374064837905237, "grad_norm": 8.851217269897461, "learning_rate": 1.2629426433915213e-05, "loss": 0.387, "step": 29570 }, { "epoch": 7.376558603491272, "grad_norm": 14.366303443908691, "learning_rate": 1.2626932668329177e-05, "loss": 0.3875, "step": 29580 }, { "epoch": 7.379052369077307, "grad_norm": 7.262747287750244, "learning_rate": 1.2624438902743144e-05, "loss": 0.3718, "step": 29590 }, { "epoch": 7.381546134663342, "grad_norm": 5.579659938812256, "learning_rate": 1.2621945137157108e-05, "loss": 0.3919, "step": 29600 }, { "epoch": 7.384039900249377, "grad_norm": 5.182689189910889, "learning_rate": 1.2619451371571075e-05, "loss": 0.3414, "step": 29610 }, { "epoch": 7.386533665835412, "grad_norm": 4.612008094787598, "learning_rate": 1.2616957605985038e-05, "loss": 0.3101, "step": 29620 }, { "epoch": 7.389027431421447, "grad_norm": 11.192936897277832, "learning_rate": 1.2614463840399004e-05, "loss": 0.3038, "step": 29630 }, { "epoch": 7.3915211970074814, "grad_norm": 8.110973358154297, "learning_rate": 1.2611970074812969e-05, "loss": 0.451, "step": 29640 }, { "epoch": 7.394014962593516, "grad_norm": 9.395048141479492, "learning_rate": 1.2609476309226934e-05, "loss": 0.3936, "step": 29650 }, { "epoch": 7.396508728179551, "grad_norm": 6.499456882476807, "learning_rate": 1.2606982543640898e-05, "loss": 0.3217, "step": 29660 }, { "epoch": 7.399002493765586, "grad_norm": 6.436877250671387, "learning_rate": 1.2604488778054865e-05, "loss": 0.4203, "step": 29670 }, { "epoch": 7.401496259351621, "grad_norm": 4.4001688957214355, "learning_rate": 1.2601995012468828e-05, "loss": 0.4034, "step": 29680 }, { "epoch": 7.403990024937656, "grad_norm": 7.0502800941467285, "learning_rate": 1.2599501246882795e-05, "loss": 0.4124, "step": 29690 }, { "epoch": 7.406483790523691, "grad_norm": 10.673737525939941, "learning_rate": 1.2597007481296759e-05, "loss": 0.4948, "step": 29700 }, { "epoch": 7.4089775561097255, "grad_norm": 6.177337646484375, "learning_rate": 1.2594513715710723e-05, "loss": 0.4471, "step": 29710 }, { "epoch": 7.41147132169576, "grad_norm": 5.368765830993652, "learning_rate": 1.259201995012469e-05, "loss": 0.3887, "step": 29720 }, { "epoch": 7.413965087281795, "grad_norm": 6.879129409790039, "learning_rate": 1.2589526184538653e-05, "loss": 0.3773, "step": 29730 }, { "epoch": 7.41645885286783, "grad_norm": 8.17301082611084, "learning_rate": 1.258703241895262e-05, "loss": 0.3722, "step": 29740 }, { "epoch": 7.418952618453865, "grad_norm": 8.106537818908691, "learning_rate": 1.2584538653366586e-05, "loss": 0.41, "step": 29750 }, { "epoch": 7.4214463840399, "grad_norm": 6.589071273803711, "learning_rate": 1.258204488778055e-05, "loss": 0.4002, "step": 29760 }, { "epoch": 7.423940149625935, "grad_norm": 8.137639999389648, "learning_rate": 1.2579551122194516e-05, "loss": 0.3859, "step": 29770 }, { "epoch": 7.42643391521197, "grad_norm": 6.957583427429199, "learning_rate": 1.257705735660848e-05, "loss": 0.4258, "step": 29780 }, { "epoch": 7.428927680798005, "grad_norm": 8.225882530212402, "learning_rate": 1.2574563591022443e-05, "loss": 0.3842, "step": 29790 }, { "epoch": 7.43142144638404, "grad_norm": 6.6841888427734375, "learning_rate": 1.257206982543641e-05, "loss": 0.3717, "step": 29800 }, { "epoch": 7.433915211970075, "grad_norm": 6.028921127319336, "learning_rate": 1.2569576059850374e-05, "loss": 0.3616, "step": 29810 }, { "epoch": 7.43640897755611, "grad_norm": 5.823741436004639, "learning_rate": 1.2567082294264341e-05, "loss": 0.3755, "step": 29820 }, { "epoch": 7.438902743142145, "grad_norm": 11.203161239624023, "learning_rate": 1.2564588528678305e-05, "loss": 0.3806, "step": 29830 }, { "epoch": 7.44139650872818, "grad_norm": 5.957489967346191, "learning_rate": 1.256209476309227e-05, "loss": 0.3821, "step": 29840 }, { "epoch": 7.443890274314215, "grad_norm": 4.782578468322754, "learning_rate": 1.2559600997506235e-05, "loss": 0.3689, "step": 29850 }, { "epoch": 7.446384039900249, "grad_norm": 4.5286784172058105, "learning_rate": 1.25571072319202e-05, "loss": 0.3414, "step": 29860 }, { "epoch": 7.448877805486284, "grad_norm": 11.462789535522461, "learning_rate": 1.2554613466334164e-05, "loss": 0.373, "step": 29870 }, { "epoch": 7.451371571072319, "grad_norm": 8.337203979492188, "learning_rate": 1.2552119700748131e-05, "loss": 0.3389, "step": 29880 }, { "epoch": 7.453865336658354, "grad_norm": 13.295976638793945, "learning_rate": 1.2549625935162095e-05, "loss": 0.3165, "step": 29890 }, { "epoch": 7.456359102244389, "grad_norm": 6.52192497253418, "learning_rate": 1.2547132169576062e-05, "loss": 0.3477, "step": 29900 }, { "epoch": 7.458852867830424, "grad_norm": 7.2272491455078125, "learning_rate": 1.2544638403990025e-05, "loss": 0.3611, "step": 29910 }, { "epoch": 7.461346633416459, "grad_norm": 3.4752140045166016, "learning_rate": 1.254214463840399e-05, "loss": 0.3382, "step": 29920 }, { "epoch": 7.4638403990024935, "grad_norm": 9.237256050109863, "learning_rate": 1.2539650872817956e-05, "loss": 0.3683, "step": 29930 }, { "epoch": 7.466334164588528, "grad_norm": 9.527120590209961, "learning_rate": 1.2537157107231921e-05, "loss": 0.3784, "step": 29940 }, { "epoch": 7.468827930174563, "grad_norm": 5.257556915283203, "learning_rate": 1.2534663341645887e-05, "loss": 0.3919, "step": 29950 }, { "epoch": 7.471321695760598, "grad_norm": 5.737821102142334, "learning_rate": 1.2532169576059852e-05, "loss": 0.3766, "step": 29960 }, { "epoch": 7.473815461346634, "grad_norm": 8.01321792602539, "learning_rate": 1.2529675810473816e-05, "loss": 0.3299, "step": 29970 }, { "epoch": 7.476309226932669, "grad_norm": 5.301641464233398, "learning_rate": 1.2527182044887783e-05, "loss": 0.3851, "step": 29980 }, { "epoch": 7.478802992518704, "grad_norm": 7.687638759613037, "learning_rate": 1.2524688279301746e-05, "loss": 0.3364, "step": 29990 }, { "epoch": 7.4812967581047385, "grad_norm": 6.31342077255249, "learning_rate": 1.2522194513715712e-05, "loss": 0.3607, "step": 30000 }, { "epoch": 7.483790523690773, "grad_norm": 5.950140953063965, "learning_rate": 1.2519700748129677e-05, "loss": 0.3616, "step": 30010 }, { "epoch": 7.486284289276808, "grad_norm": 6.606958389282227, "learning_rate": 1.2517206982543642e-05, "loss": 0.3231, "step": 30020 }, { "epoch": 7.488778054862843, "grad_norm": 8.758807182312012, "learning_rate": 1.2514713216957607e-05, "loss": 0.3508, "step": 30030 }, { "epoch": 7.491271820448878, "grad_norm": 3.6969501972198486, "learning_rate": 1.2512219451371573e-05, "loss": 0.3668, "step": 30040 }, { "epoch": 7.493765586034913, "grad_norm": 7.893050670623779, "learning_rate": 1.2509725685785536e-05, "loss": 0.3001, "step": 30050 }, { "epoch": 7.496259351620948, "grad_norm": 8.5418119430542, "learning_rate": 1.2507231920199503e-05, "loss": 0.3428, "step": 30060 }, { "epoch": 7.498753117206983, "grad_norm": 7.37095308303833, "learning_rate": 1.2504738154613467e-05, "loss": 0.36, "step": 30070 }, { "epoch": 7.501246882793017, "grad_norm": 5.3542704582214355, "learning_rate": 1.250224438902743e-05, "loss": 0.354, "step": 30080 }, { "epoch": 7.503740648379052, "grad_norm": 7.024536609649658, "learning_rate": 1.2499750623441398e-05, "loss": 0.3936, "step": 30090 }, { "epoch": 7.506234413965087, "grad_norm": 8.495894432067871, "learning_rate": 1.2497256857855363e-05, "loss": 0.3381, "step": 30100 }, { "epoch": 7.508728179551122, "grad_norm": 6.884313106536865, "learning_rate": 1.2494763092269328e-05, "loss": 0.381, "step": 30110 }, { "epoch": 7.511221945137157, "grad_norm": 7.271829128265381, "learning_rate": 1.2492269326683294e-05, "loss": 0.4289, "step": 30120 }, { "epoch": 7.513715710723192, "grad_norm": 14.023894309997559, "learning_rate": 1.2489775561097257e-05, "loss": 0.4139, "step": 30130 }, { "epoch": 7.516209476309227, "grad_norm": 7.130033493041992, "learning_rate": 1.2487281795511224e-05, "loss": 0.3319, "step": 30140 }, { "epoch": 7.5187032418952615, "grad_norm": 6.766006946563721, "learning_rate": 1.2484788029925188e-05, "loss": 0.3546, "step": 30150 }, { "epoch": 7.521197007481296, "grad_norm": 10.241814613342285, "learning_rate": 1.2482294264339151e-05, "loss": 0.3268, "step": 30160 }, { "epoch": 7.523690773067331, "grad_norm": 8.794346809387207, "learning_rate": 1.2479800498753118e-05, "loss": 0.3394, "step": 30170 }, { "epoch": 7.526184538653366, "grad_norm": 8.435714721679688, "learning_rate": 1.2477306733167082e-05, "loss": 0.3174, "step": 30180 }, { "epoch": 7.528678304239402, "grad_norm": 9.198299407958984, "learning_rate": 1.2474812967581049e-05, "loss": 0.36, "step": 30190 }, { "epoch": 7.531172069825437, "grad_norm": 8.56997013092041, "learning_rate": 1.2472319201995013e-05, "loss": 0.3682, "step": 30200 }, { "epoch": 7.533665835411472, "grad_norm": 9.016803741455078, "learning_rate": 1.2469825436408978e-05, "loss": 0.3727, "step": 30210 }, { "epoch": 7.5361596009975065, "grad_norm": 7.877820014953613, "learning_rate": 1.2467331670822945e-05, "loss": 0.4129, "step": 30220 }, { "epoch": 7.538653366583541, "grad_norm": 5.137962818145752, "learning_rate": 1.2464837905236909e-05, "loss": 0.3079, "step": 30230 }, { "epoch": 7.541147132169576, "grad_norm": 6.477614879608154, "learning_rate": 1.2462344139650876e-05, "loss": 0.3845, "step": 30240 }, { "epoch": 7.543640897755611, "grad_norm": 7.037804126739502, "learning_rate": 1.245985037406484e-05, "loss": 0.4319, "step": 30250 }, { "epoch": 7.546134663341646, "grad_norm": 6.353123664855957, "learning_rate": 1.2457356608478803e-05, "loss": 0.3744, "step": 30260 }, { "epoch": 7.548628428927681, "grad_norm": 18.240354537963867, "learning_rate": 1.245486284289277e-05, "loss": 0.3774, "step": 30270 }, { "epoch": 7.551122194513716, "grad_norm": 9.531089782714844, "learning_rate": 1.2452369077306733e-05, "loss": 0.3673, "step": 30280 }, { "epoch": 7.553615960099751, "grad_norm": 6.397697925567627, "learning_rate": 1.2449875311720699e-05, "loss": 0.4171, "step": 30290 }, { "epoch": 7.556109725685785, "grad_norm": 5.065841197967529, "learning_rate": 1.2447381546134664e-05, "loss": 0.3296, "step": 30300 }, { "epoch": 7.55860349127182, "grad_norm": 6.741978645324707, "learning_rate": 1.244488778054863e-05, "loss": 0.3183, "step": 30310 }, { "epoch": 7.561097256857855, "grad_norm": 6.0341620445251465, "learning_rate": 1.2442394014962595e-05, "loss": 0.399, "step": 30320 }, { "epoch": 7.56359102244389, "grad_norm": 5.0857977867126465, "learning_rate": 1.243990024937656e-05, "loss": 0.3163, "step": 30330 }, { "epoch": 7.566084788029925, "grad_norm": 7.0515456199646, "learning_rate": 1.2437406483790524e-05, "loss": 0.3199, "step": 30340 }, { "epoch": 7.56857855361596, "grad_norm": 8.328603744506836, "learning_rate": 1.243491271820449e-05, "loss": 0.3836, "step": 30350 }, { "epoch": 7.571072319201995, "grad_norm": 7.779388427734375, "learning_rate": 1.2432418952618454e-05, "loss": 0.3254, "step": 30360 }, { "epoch": 7.57356608478803, "grad_norm": 10.46296215057373, "learning_rate": 1.242992518703242e-05, "loss": 0.4414, "step": 30370 }, { "epoch": 7.576059850374065, "grad_norm": 6.196642875671387, "learning_rate": 1.2427431421446385e-05, "loss": 0.3855, "step": 30380 }, { "epoch": 7.5785536159601, "grad_norm": 9.153206825256348, "learning_rate": 1.242493765586035e-05, "loss": 0.2903, "step": 30390 }, { "epoch": 7.581047381546135, "grad_norm": 6.691465377807617, "learning_rate": 1.2422443890274315e-05, "loss": 0.3715, "step": 30400 }, { "epoch": 7.58354114713217, "grad_norm": 10.62185287475586, "learning_rate": 1.241995012468828e-05, "loss": 0.4213, "step": 30410 }, { "epoch": 7.586034912718205, "grad_norm": 5.947132587432861, "learning_rate": 1.2417456359102244e-05, "loss": 0.3693, "step": 30420 }, { "epoch": 7.58852867830424, "grad_norm": 7.0988993644714355, "learning_rate": 1.2414962593516211e-05, "loss": 0.3849, "step": 30430 }, { "epoch": 7.5910224438902745, "grad_norm": 6.948726177215576, "learning_rate": 1.2412468827930175e-05, "loss": 0.3642, "step": 30440 }, { "epoch": 7.593516209476309, "grad_norm": 5.161392688751221, "learning_rate": 1.2409975062344142e-05, "loss": 0.4056, "step": 30450 }, { "epoch": 7.596009975062344, "grad_norm": 8.350500106811523, "learning_rate": 1.2407481296758106e-05, "loss": 0.4004, "step": 30460 }, { "epoch": 7.598503740648379, "grad_norm": 7.8617753982543945, "learning_rate": 1.2404987531172071e-05, "loss": 0.3857, "step": 30470 }, { "epoch": 7.600997506234414, "grad_norm": 6.594773292541504, "learning_rate": 1.2402493765586036e-05, "loss": 0.3984, "step": 30480 }, { "epoch": 7.603491271820449, "grad_norm": 5.078848361968994, "learning_rate": 1.2400000000000002e-05, "loss": 0.3225, "step": 30490 }, { "epoch": 7.605985037406484, "grad_norm": 6.860848903656006, "learning_rate": 1.2397506234413965e-05, "loss": 0.3953, "step": 30500 }, { "epoch": 7.6084788029925186, "grad_norm": 6.241046905517578, "learning_rate": 1.2395012468827932e-05, "loss": 0.3999, "step": 30510 }, { "epoch": 7.610972568578553, "grad_norm": 4.877734661102295, "learning_rate": 1.2392518703241896e-05, "loss": 0.3535, "step": 30520 }, { "epoch": 7.613466334164588, "grad_norm": 6.138383388519287, "learning_rate": 1.2390024937655863e-05, "loss": 0.4553, "step": 30530 }, { "epoch": 7.615960099750623, "grad_norm": 6.23154354095459, "learning_rate": 1.2387531172069826e-05, "loss": 0.3939, "step": 30540 }, { "epoch": 7.618453865336658, "grad_norm": 5.0998945236206055, "learning_rate": 1.238503740648379e-05, "loss": 0.3799, "step": 30550 }, { "epoch": 7.620947630922693, "grad_norm": 5.4622392654418945, "learning_rate": 1.2382543640897757e-05, "loss": 0.4081, "step": 30560 }, { "epoch": 7.623441396508728, "grad_norm": 4.260573863983154, "learning_rate": 1.2380049875311722e-05, "loss": 0.3986, "step": 30570 }, { "epoch": 7.625935162094763, "grad_norm": 8.23079776763916, "learning_rate": 1.2377556109725686e-05, "loss": 0.3604, "step": 30580 }, { "epoch": 7.628428927680798, "grad_norm": 8.109442710876465, "learning_rate": 1.2375062344139653e-05, "loss": 0.4082, "step": 30590 }, { "epoch": 7.630922693266833, "grad_norm": 4.392514705657959, "learning_rate": 1.2372568578553617e-05, "loss": 0.3406, "step": 30600 }, { "epoch": 7.633416458852868, "grad_norm": 6.927611827850342, "learning_rate": 1.2370074812967584e-05, "loss": 0.3685, "step": 30610 }, { "epoch": 7.635910224438903, "grad_norm": 8.26883602142334, "learning_rate": 1.2367581047381547e-05, "loss": 0.4477, "step": 30620 }, { "epoch": 7.638403990024938, "grad_norm": 6.3622517585754395, "learning_rate": 1.236508728179551e-05, "loss": 0.337, "step": 30630 }, { "epoch": 7.640897755610973, "grad_norm": 9.278800010681152, "learning_rate": 1.2362593516209478e-05, "loss": 0.3987, "step": 30640 }, { "epoch": 7.643391521197008, "grad_norm": 4.636536598205566, "learning_rate": 1.2360099750623441e-05, "loss": 0.3779, "step": 30650 }, { "epoch": 7.6458852867830425, "grad_norm": 7.597597122192383, "learning_rate": 1.2357605985037408e-05, "loss": 0.3261, "step": 30660 }, { "epoch": 7.648379052369077, "grad_norm": 6.5270771980285645, "learning_rate": 1.2355112219451372e-05, "loss": 0.3113, "step": 30670 }, { "epoch": 7.650872817955112, "grad_norm": 8.069908142089844, "learning_rate": 1.2352618453865337e-05, "loss": 0.3637, "step": 30680 }, { "epoch": 7.653366583541147, "grad_norm": 7.320400238037109, "learning_rate": 1.2350124688279304e-05, "loss": 0.3699, "step": 30690 }, { "epoch": 7.655860349127182, "grad_norm": 15.375004768371582, "learning_rate": 1.2347630922693268e-05, "loss": 0.367, "step": 30700 }, { "epoch": 7.658354114713217, "grad_norm": 7.086206436157227, "learning_rate": 1.2345137157107232e-05, "loss": 0.3479, "step": 30710 }, { "epoch": 7.660847880299252, "grad_norm": 7.063462257385254, "learning_rate": 1.2342643391521199e-05, "loss": 0.3503, "step": 30720 }, { "epoch": 7.6633416458852865, "grad_norm": 6.879456043243408, "learning_rate": 1.2340149625935162e-05, "loss": 0.3552, "step": 30730 }, { "epoch": 7.665835411471321, "grad_norm": 7.064803600311279, "learning_rate": 1.233765586034913e-05, "loss": 0.3794, "step": 30740 }, { "epoch": 7.668329177057356, "grad_norm": 6.3318023681640625, "learning_rate": 1.2335162094763093e-05, "loss": 0.3434, "step": 30750 }, { "epoch": 7.670822942643391, "grad_norm": 6.186346054077148, "learning_rate": 1.2332668329177058e-05, "loss": 0.3624, "step": 30760 }, { "epoch": 7.673316708229427, "grad_norm": 6.247686386108398, "learning_rate": 1.2330174563591023e-05, "loss": 0.3922, "step": 30770 }, { "epoch": 7.675810473815462, "grad_norm": 5.863278865814209, "learning_rate": 1.2327680798004989e-05, "loss": 0.3569, "step": 30780 }, { "epoch": 7.678304239401497, "grad_norm": 13.845823287963867, "learning_rate": 1.2325187032418952e-05, "loss": 0.3568, "step": 30790 }, { "epoch": 7.6807980049875315, "grad_norm": 7.736221790313721, "learning_rate": 1.232269326683292e-05, "loss": 0.3471, "step": 30800 }, { "epoch": 7.683291770573566, "grad_norm": 6.512248992919922, "learning_rate": 1.2320199501246883e-05, "loss": 0.3172, "step": 30810 }, { "epoch": 7.685785536159601, "grad_norm": 6.663067817687988, "learning_rate": 1.231770573566085e-05, "loss": 0.3857, "step": 30820 }, { "epoch": 7.688279301745636, "grad_norm": 8.590274810791016, "learning_rate": 1.2315211970074814e-05, "loss": 0.3427, "step": 30830 }, { "epoch": 7.690773067331671, "grad_norm": 7.1273722648620605, "learning_rate": 1.2312718204488779e-05, "loss": 0.4224, "step": 30840 }, { "epoch": 7.693266832917706, "grad_norm": 6.8802924156188965, "learning_rate": 1.2310224438902744e-05, "loss": 0.4097, "step": 30850 }, { "epoch": 7.695760598503741, "grad_norm": 9.271326065063477, "learning_rate": 1.230773067331671e-05, "loss": 0.3757, "step": 30860 }, { "epoch": 7.698254364089776, "grad_norm": 6.1901421546936035, "learning_rate": 1.2305236907730673e-05, "loss": 0.3571, "step": 30870 }, { "epoch": 7.7007481296758105, "grad_norm": 8.706985473632812, "learning_rate": 1.230274314214464e-05, "loss": 0.335, "step": 30880 }, { "epoch": 7.703241895261845, "grad_norm": 5.975154399871826, "learning_rate": 1.2300249376558604e-05, "loss": 0.3703, "step": 30890 }, { "epoch": 7.70573566084788, "grad_norm": 8.364744186401367, "learning_rate": 1.229775561097257e-05, "loss": 0.3532, "step": 30900 }, { "epoch": 7.708229426433915, "grad_norm": 6.633474826812744, "learning_rate": 1.2295261845386534e-05, "loss": 0.3447, "step": 30910 }, { "epoch": 7.71072319201995, "grad_norm": 7.192882061004639, "learning_rate": 1.22927680798005e-05, "loss": 0.365, "step": 30920 }, { "epoch": 7.713216957605985, "grad_norm": 4.311435222625732, "learning_rate": 1.2290274314214465e-05, "loss": 0.3817, "step": 30930 }, { "epoch": 7.71571072319202, "grad_norm": 9.537071228027344, "learning_rate": 1.228778054862843e-05, "loss": 0.4124, "step": 30940 }, { "epoch": 7.7182044887780545, "grad_norm": 6.788525581359863, "learning_rate": 1.2285286783042396e-05, "loss": 0.4284, "step": 30950 }, { "epoch": 7.720698254364089, "grad_norm": 4.7663798332214355, "learning_rate": 1.2282793017456361e-05, "loss": 0.3825, "step": 30960 }, { "epoch": 7.723192019950124, "grad_norm": 6.757378578186035, "learning_rate": 1.2280299251870325e-05, "loss": 0.3331, "step": 30970 }, { "epoch": 7.725685785536159, "grad_norm": 6.984048366546631, "learning_rate": 1.2277805486284292e-05, "loss": 0.354, "step": 30980 }, { "epoch": 7.728179551122195, "grad_norm": 6.14350700378418, "learning_rate": 1.2275311720698255e-05, "loss": 0.3543, "step": 30990 }, { "epoch": 7.73067331670823, "grad_norm": 5.969043254852295, "learning_rate": 1.2272817955112219e-05, "loss": 0.3788, "step": 31000 }, { "epoch": 7.733167082294265, "grad_norm": 6.313365936279297, "learning_rate": 1.2270324189526186e-05, "loss": 0.3608, "step": 31010 }, { "epoch": 7.7356608478802995, "grad_norm": 5.8176703453063965, "learning_rate": 1.226783042394015e-05, "loss": 0.3599, "step": 31020 }, { "epoch": 7.738154613466334, "grad_norm": 5.260715484619141, "learning_rate": 1.2265336658354116e-05, "loss": 0.3885, "step": 31030 }, { "epoch": 7.740648379052369, "grad_norm": 9.272298812866211, "learning_rate": 1.2262842892768082e-05, "loss": 0.3606, "step": 31040 }, { "epoch": 7.743142144638404, "grad_norm": 6.716904163360596, "learning_rate": 1.2260349127182045e-05, "loss": 0.3364, "step": 31050 }, { "epoch": 7.745635910224439, "grad_norm": 7.895336151123047, "learning_rate": 1.2257855361596012e-05, "loss": 0.3685, "step": 31060 }, { "epoch": 7.748129675810474, "grad_norm": 6.1272969245910645, "learning_rate": 1.2255361596009976e-05, "loss": 0.3745, "step": 31070 }, { "epoch": 7.750623441396509, "grad_norm": 6.436134338378906, "learning_rate": 1.225286783042394e-05, "loss": 0.363, "step": 31080 }, { "epoch": 7.753117206982544, "grad_norm": 7.552138328552246, "learning_rate": 1.2250374064837907e-05, "loss": 0.4172, "step": 31090 }, { "epoch": 7.7556109725685785, "grad_norm": 4.2307658195495605, "learning_rate": 1.224788029925187e-05, "loss": 0.4331, "step": 31100 }, { "epoch": 7.758104738154613, "grad_norm": 6.373022556304932, "learning_rate": 1.2245386533665837e-05, "loss": 0.3571, "step": 31110 }, { "epoch": 7.760598503740648, "grad_norm": 6.442211151123047, "learning_rate": 1.22428927680798e-05, "loss": 0.3335, "step": 31120 }, { "epoch": 7.763092269326683, "grad_norm": 5.978911399841309, "learning_rate": 1.2240399002493766e-05, "loss": 0.3363, "step": 31130 }, { "epoch": 7.765586034912718, "grad_norm": 7.628538131713867, "learning_rate": 1.2237905236907731e-05, "loss": 0.389, "step": 31140 }, { "epoch": 7.768079800498753, "grad_norm": 5.834782600402832, "learning_rate": 1.2235411471321697e-05, "loss": 0.3835, "step": 31150 }, { "epoch": 7.770573566084788, "grad_norm": 9.812804222106934, "learning_rate": 1.2232917705735664e-05, "loss": 0.3542, "step": 31160 }, { "epoch": 7.773067331670823, "grad_norm": 5.178422451019287, "learning_rate": 1.2230423940149627e-05, "loss": 0.3612, "step": 31170 }, { "epoch": 7.775561097256858, "grad_norm": 5.95904016494751, "learning_rate": 1.2227930174563591e-05, "loss": 0.3285, "step": 31180 }, { "epoch": 7.778054862842893, "grad_norm": 7.034958362579346, "learning_rate": 1.2225436408977558e-05, "loss": 0.3777, "step": 31190 }, { "epoch": 7.780548628428928, "grad_norm": 13.055363655090332, "learning_rate": 1.2222942643391522e-05, "loss": 0.4765, "step": 31200 }, { "epoch": 7.783042394014963, "grad_norm": 8.228713989257812, "learning_rate": 1.2220448877805487e-05, "loss": 0.393, "step": 31210 }, { "epoch": 7.785536159600998, "grad_norm": 8.480477333068848, "learning_rate": 1.2217955112219452e-05, "loss": 0.352, "step": 31220 }, { "epoch": 7.788029925187033, "grad_norm": 7.811897277832031, "learning_rate": 1.2215461346633418e-05, "loss": 0.4076, "step": 31230 }, { "epoch": 7.7905236907730675, "grad_norm": 6.502315521240234, "learning_rate": 1.2212967581047383e-05, "loss": 0.3238, "step": 31240 }, { "epoch": 7.793017456359102, "grad_norm": 6.6692657470703125, "learning_rate": 1.2210473815461348e-05, "loss": 0.3445, "step": 31250 }, { "epoch": 7.795511221945137, "grad_norm": 5.5514655113220215, "learning_rate": 1.2207980049875312e-05, "loss": 0.3603, "step": 31260 }, { "epoch": 7.798004987531172, "grad_norm": 5.353459358215332, "learning_rate": 1.2205486284289279e-05, "loss": 0.3117, "step": 31270 }, { "epoch": 7.800498753117207, "grad_norm": 6.266322135925293, "learning_rate": 1.2202992518703242e-05, "loss": 0.3608, "step": 31280 }, { "epoch": 7.802992518703242, "grad_norm": 5.57569694519043, "learning_rate": 1.2200498753117208e-05, "loss": 0.3953, "step": 31290 }, { "epoch": 7.805486284289277, "grad_norm": 5.770155429840088, "learning_rate": 1.2198004987531173e-05, "loss": 0.4057, "step": 31300 }, { "epoch": 7.807980049875312, "grad_norm": 8.527885437011719, "learning_rate": 1.2195511221945138e-05, "loss": 0.4245, "step": 31310 }, { "epoch": 7.8104738154613464, "grad_norm": 8.572240829467773, "learning_rate": 1.2193017456359104e-05, "loss": 0.3845, "step": 31320 }, { "epoch": 7.812967581047381, "grad_norm": 8.089426040649414, "learning_rate": 1.2190523690773069e-05, "loss": 0.3858, "step": 31330 }, { "epoch": 7.815461346633416, "grad_norm": 5.3781657218933105, "learning_rate": 1.2188029925187033e-05, "loss": 0.4435, "step": 31340 }, { "epoch": 7.817955112219451, "grad_norm": 7.371554374694824, "learning_rate": 1.2185536159601e-05, "loss": 0.3908, "step": 31350 }, { "epoch": 7.820448877805486, "grad_norm": 6.417791366577148, "learning_rate": 1.2183042394014963e-05, "loss": 0.3638, "step": 31360 }, { "epoch": 7.822942643391521, "grad_norm": 8.140090942382812, "learning_rate": 1.2180548628428927e-05, "loss": 0.4004, "step": 31370 }, { "epoch": 7.825436408977556, "grad_norm": 28.9359073638916, "learning_rate": 1.2178054862842894e-05, "loss": 0.3789, "step": 31380 }, { "epoch": 7.8279301745635905, "grad_norm": 5.224822044372559, "learning_rate": 1.2175561097256859e-05, "loss": 0.3685, "step": 31390 }, { "epoch": 7.830423940149626, "grad_norm": 5.957163333892822, "learning_rate": 1.2173067331670824e-05, "loss": 0.2989, "step": 31400 }, { "epoch": 7.832917705735661, "grad_norm": 7.902908802032471, "learning_rate": 1.217057356608479e-05, "loss": 0.3216, "step": 31410 }, { "epoch": 7.835411471321696, "grad_norm": 9.915661811828613, "learning_rate": 1.2168079800498753e-05, "loss": 0.4022, "step": 31420 }, { "epoch": 7.837905236907731, "grad_norm": 8.479707717895508, "learning_rate": 1.216558603491272e-05, "loss": 0.3614, "step": 31430 }, { "epoch": 7.840399002493766, "grad_norm": 5.247765064239502, "learning_rate": 1.2163092269326684e-05, "loss": 0.354, "step": 31440 }, { "epoch": 7.842892768079801, "grad_norm": 9.220551490783691, "learning_rate": 1.2160598503740651e-05, "loss": 0.4473, "step": 31450 }, { "epoch": 7.8453865336658355, "grad_norm": 5.292842864990234, "learning_rate": 1.2158104738154615e-05, "loss": 0.3996, "step": 31460 }, { "epoch": 7.84788029925187, "grad_norm": 9.123703956604004, "learning_rate": 1.2155610972568578e-05, "loss": 0.4442, "step": 31470 }, { "epoch": 7.850374064837905, "grad_norm": 5.475888252258301, "learning_rate": 1.2153117206982545e-05, "loss": 0.3169, "step": 31480 }, { "epoch": 7.85286783042394, "grad_norm": 7.926106929779053, "learning_rate": 1.2150623441396509e-05, "loss": 0.3636, "step": 31490 }, { "epoch": 7.855361596009975, "grad_norm": 6.426756858825684, "learning_rate": 1.2148129675810474e-05, "loss": 0.3884, "step": 31500 }, { "epoch": 7.85785536159601, "grad_norm": 7.212559700012207, "learning_rate": 1.2145635910224441e-05, "loss": 0.3312, "step": 31510 }, { "epoch": 7.860349127182045, "grad_norm": 5.426643371582031, "learning_rate": 1.2143142144638405e-05, "loss": 0.3075, "step": 31520 }, { "epoch": 7.86284289276808, "grad_norm": 6.562136650085449, "learning_rate": 1.2140648379052372e-05, "loss": 0.4217, "step": 31530 }, { "epoch": 7.865336658354114, "grad_norm": 7.82149076461792, "learning_rate": 1.2138154613466335e-05, "loss": 0.347, "step": 31540 }, { "epoch": 7.867830423940149, "grad_norm": 6.736440181732178, "learning_rate": 1.2135660847880299e-05, "loss": 0.3143, "step": 31550 }, { "epoch": 7.870324189526184, "grad_norm": 7.551044940948486, "learning_rate": 1.2133167082294266e-05, "loss": 0.4345, "step": 31560 }, { "epoch": 7.87281795511222, "grad_norm": 11.6002836227417, "learning_rate": 1.213067331670823e-05, "loss": 0.3961, "step": 31570 }, { "epoch": 7.875311720698255, "grad_norm": 4.926860809326172, "learning_rate": 1.2128179551122195e-05, "loss": 0.3755, "step": 31580 }, { "epoch": 7.87780548628429, "grad_norm": 13.205385208129883, "learning_rate": 1.212568578553616e-05, "loss": 0.3714, "step": 31590 }, { "epoch": 7.8802992518703245, "grad_norm": 4.073211193084717, "learning_rate": 1.2123192019950126e-05, "loss": 0.2892, "step": 31600 }, { "epoch": 7.882793017456359, "grad_norm": 11.72350788116455, "learning_rate": 1.212069825436409e-05, "loss": 0.3547, "step": 31610 }, { "epoch": 7.885286783042394, "grad_norm": 6.860603332519531, "learning_rate": 1.2118204488778056e-05, "loss": 0.3574, "step": 31620 }, { "epoch": 7.887780548628429, "grad_norm": 5.04127836227417, "learning_rate": 1.211571072319202e-05, "loss": 0.3101, "step": 31630 }, { "epoch": 7.890274314214464, "grad_norm": 5.518341064453125, "learning_rate": 1.2113216957605987e-05, "loss": 0.4506, "step": 31640 }, { "epoch": 7.892768079800499, "grad_norm": 6.156492710113525, "learning_rate": 1.211072319201995e-05, "loss": 0.3326, "step": 31650 }, { "epoch": 7.895261845386534, "grad_norm": 7.646767616271973, "learning_rate": 1.2108229426433917e-05, "loss": 0.3646, "step": 31660 }, { "epoch": 7.897755610972569, "grad_norm": 5.843283176422119, "learning_rate": 1.2105735660847881e-05, "loss": 0.418, "step": 31670 }, { "epoch": 7.9002493765586035, "grad_norm": 7.041114330291748, "learning_rate": 1.2103241895261846e-05, "loss": 0.3759, "step": 31680 }, { "epoch": 7.902743142144638, "grad_norm": 8.82856273651123, "learning_rate": 1.2100748129675812e-05, "loss": 0.478, "step": 31690 }, { "epoch": 7.905236907730673, "grad_norm": 6.0212273597717285, "learning_rate": 1.2098254364089777e-05, "loss": 0.3257, "step": 31700 }, { "epoch": 7.907730673316708, "grad_norm": 8.000700950622559, "learning_rate": 1.209576059850374e-05, "loss": 0.3212, "step": 31710 }, { "epoch": 7.910224438902743, "grad_norm": 9.041226387023926, "learning_rate": 1.2093266832917708e-05, "loss": 0.3234, "step": 31720 }, { "epoch": 7.912718204488778, "grad_norm": 10.657549858093262, "learning_rate": 1.2090773067331671e-05, "loss": 0.4932, "step": 31730 }, { "epoch": 7.915211970074813, "grad_norm": 8.23529052734375, "learning_rate": 1.2088279301745638e-05, "loss": 0.3676, "step": 31740 }, { "epoch": 7.917705735660848, "grad_norm": 5.820125102996826, "learning_rate": 1.2085785536159602e-05, "loss": 0.38, "step": 31750 }, { "epoch": 7.920199501246882, "grad_norm": 7.681821346282959, "learning_rate": 1.2083291770573567e-05, "loss": 0.3368, "step": 31760 }, { "epoch": 7.922693266832917, "grad_norm": 6.52007532119751, "learning_rate": 1.2080798004987532e-05, "loss": 0.3466, "step": 31770 }, { "epoch": 7.925187032418952, "grad_norm": 8.937047958374023, "learning_rate": 1.2078304239401498e-05, "loss": 0.3921, "step": 31780 }, { "epoch": 7.927680798004987, "grad_norm": 7.19578742980957, "learning_rate": 1.2075810473815461e-05, "loss": 0.3695, "step": 31790 }, { "epoch": 7.930174563591023, "grad_norm": 9.430617332458496, "learning_rate": 1.2073316708229428e-05, "loss": 0.3753, "step": 31800 }, { "epoch": 7.932668329177058, "grad_norm": 5.713561058044434, "learning_rate": 1.2070822942643392e-05, "loss": 0.4164, "step": 31810 }, { "epoch": 7.9351620947630925, "grad_norm": 6.72670316696167, "learning_rate": 1.2068329177057359e-05, "loss": 0.4523, "step": 31820 }, { "epoch": 7.937655860349127, "grad_norm": 6.967277526855469, "learning_rate": 1.2065835411471323e-05, "loss": 0.4086, "step": 31830 }, { "epoch": 7.940149625935162, "grad_norm": 6.393032073974609, "learning_rate": 1.2063341645885286e-05, "loss": 0.3532, "step": 31840 }, { "epoch": 7.942643391521197, "grad_norm": 8.138836860656738, "learning_rate": 1.2060847880299253e-05, "loss": 0.3699, "step": 31850 }, { "epoch": 7.945137157107232, "grad_norm": 7.282432556152344, "learning_rate": 1.2058354114713218e-05, "loss": 0.3925, "step": 31860 }, { "epoch": 7.947630922693267, "grad_norm": 5.286504745483398, "learning_rate": 1.2055860349127182e-05, "loss": 0.2947, "step": 31870 }, { "epoch": 7.950124688279302, "grad_norm": 7.868513584136963, "learning_rate": 1.2053366583541149e-05, "loss": 0.501, "step": 31880 }, { "epoch": 7.952618453865337, "grad_norm": 8.221051216125488, "learning_rate": 1.2050872817955113e-05, "loss": 0.4184, "step": 31890 }, { "epoch": 7.9551122194513715, "grad_norm": 8.704934120178223, "learning_rate": 1.204837905236908e-05, "loss": 0.3372, "step": 31900 }, { "epoch": 7.957605985037406, "grad_norm": 8.388035774230957, "learning_rate": 1.2045885286783043e-05, "loss": 0.2941, "step": 31910 }, { "epoch": 7.960099750623441, "grad_norm": 6.82948637008667, "learning_rate": 1.2043391521197007e-05, "loss": 0.3935, "step": 31920 }, { "epoch": 7.962593516209476, "grad_norm": 18.161638259887695, "learning_rate": 1.2040897755610974e-05, "loss": 0.4707, "step": 31930 }, { "epoch": 7.965087281795511, "grad_norm": 7.017074108123779, "learning_rate": 1.2038403990024938e-05, "loss": 0.3647, "step": 31940 }, { "epoch": 7.967581047381546, "grad_norm": 7.731515884399414, "learning_rate": 1.2035910224438905e-05, "loss": 0.3442, "step": 31950 }, { "epoch": 7.970074812967581, "grad_norm": 5.6125898361206055, "learning_rate": 1.2033416458852868e-05, "loss": 0.4005, "step": 31960 }, { "epoch": 7.9725685785536164, "grad_norm": 8.200444221496582, "learning_rate": 1.2030922693266834e-05, "loss": 0.4055, "step": 31970 }, { "epoch": 7.975062344139651, "grad_norm": 8.561641693115234, "learning_rate": 1.20284289276808e-05, "loss": 0.3479, "step": 31980 }, { "epoch": 7.977556109725686, "grad_norm": 6.790326118469238, "learning_rate": 1.2025935162094764e-05, "loss": 0.4134, "step": 31990 }, { "epoch": 7.980049875311721, "grad_norm": 6.383502006530762, "learning_rate": 1.2023441396508728e-05, "loss": 0.3949, "step": 32000 }, { "epoch": 7.982543640897756, "grad_norm": 5.95525598526001, "learning_rate": 1.2020947630922695e-05, "loss": 0.4402, "step": 32010 }, { "epoch": 7.985037406483791, "grad_norm": 7.909287929534912, "learning_rate": 1.2018453865336658e-05, "loss": 0.349, "step": 32020 }, { "epoch": 7.987531172069826, "grad_norm": 5.539024829864502, "learning_rate": 1.2015960099750625e-05, "loss": 0.3202, "step": 32030 }, { "epoch": 7.9900249376558605, "grad_norm": 7.293623447418213, "learning_rate": 1.2013466334164589e-05, "loss": 0.3595, "step": 32040 }, { "epoch": 7.992518703241895, "grad_norm": 6.55578088760376, "learning_rate": 1.2010972568578554e-05, "loss": 0.3922, "step": 32050 }, { "epoch": 7.99501246882793, "grad_norm": 5.113552093505859, "learning_rate": 1.200847880299252e-05, "loss": 0.4029, "step": 32060 }, { "epoch": 7.997506234413965, "grad_norm": 6.873347282409668, "learning_rate": 1.2005985037406485e-05, "loss": 0.3852, "step": 32070 }, { "epoch": 8.0, "grad_norm": 10.353362083435059, "learning_rate": 1.2003491271820449e-05, "loss": 0.3054, "step": 32080 }, { "epoch": 8.0, "eval_loss": 0.4143351912498474, "eval_runtime": 59.8965, "eval_samples_per_second": 16.746, "eval_steps_per_second": 16.746, "step": 32080 }, { "epoch": 8.002493765586035, "grad_norm": 6.222545623779297, "learning_rate": 1.2000997506234416e-05, "loss": 0.3814, "step": 32090 }, { "epoch": 8.00498753117207, "grad_norm": 6.579477310180664, "learning_rate": 1.1998503740648379e-05, "loss": 0.3755, "step": 32100 }, { "epoch": 8.007481296758105, "grad_norm": 4.621208667755127, "learning_rate": 1.1996009975062346e-05, "loss": 0.2821, "step": 32110 }, { "epoch": 8.00997506234414, "grad_norm": 5.9014081954956055, "learning_rate": 1.199351620947631e-05, "loss": 0.3424, "step": 32120 }, { "epoch": 8.012468827930174, "grad_norm": 8.920719146728516, "learning_rate": 1.1991022443890275e-05, "loss": 0.4108, "step": 32130 }, { "epoch": 8.01496259351621, "grad_norm": 7.701635360717773, "learning_rate": 1.198852867830424e-05, "loss": 0.3543, "step": 32140 }, { "epoch": 8.017456359102244, "grad_norm": 6.4233856201171875, "learning_rate": 1.1986034912718206e-05, "loss": 0.3799, "step": 32150 }, { "epoch": 8.019950124688279, "grad_norm": 9.75025463104248, "learning_rate": 1.1983541147132171e-05, "loss": 0.4066, "step": 32160 }, { "epoch": 8.022443890274314, "grad_norm": 7.719704627990723, "learning_rate": 1.1981047381546136e-05, "loss": 0.3938, "step": 32170 }, { "epoch": 8.024937655860349, "grad_norm": 4.815377712249756, "learning_rate": 1.19785536159601e-05, "loss": 0.3635, "step": 32180 }, { "epoch": 8.027431421446384, "grad_norm": 7.045759201049805, "learning_rate": 1.197630922693267e-05, "loss": 0.3618, "step": 32190 }, { "epoch": 8.029925187032418, "grad_norm": 5.257409572601318, "learning_rate": 1.1973815461346634e-05, "loss": 0.3325, "step": 32200 }, { "epoch": 8.032418952618453, "grad_norm": 12.353095054626465, "learning_rate": 1.19713216957606e-05, "loss": 0.356, "step": 32210 }, { "epoch": 8.034912718204488, "grad_norm": 5.638147830963135, "learning_rate": 1.1968827930174564e-05, "loss": 0.3877, "step": 32220 }, { "epoch": 8.037406483790523, "grad_norm": 4.99983024597168, "learning_rate": 1.196633416458853e-05, "loss": 0.357, "step": 32230 }, { "epoch": 8.039900249376558, "grad_norm": 5.99961519241333, "learning_rate": 1.1963840399002495e-05, "loss": 0.3408, "step": 32240 }, { "epoch": 8.042394014962593, "grad_norm": 8.61434555053711, "learning_rate": 1.196134663341646e-05, "loss": 0.3429, "step": 32250 }, { "epoch": 8.044887780548628, "grad_norm": 6.741703033447266, "learning_rate": 1.1958852867830424e-05, "loss": 0.3562, "step": 32260 }, { "epoch": 8.047381546134662, "grad_norm": 4.7175703048706055, "learning_rate": 1.195635910224439e-05, "loss": 0.3447, "step": 32270 }, { "epoch": 8.049875311720697, "grad_norm": 6.885346412658691, "learning_rate": 1.1953865336658354e-05, "loss": 0.3637, "step": 32280 }, { "epoch": 8.052369077306734, "grad_norm": 7.284684181213379, "learning_rate": 1.1951371571072321e-05, "loss": 0.3799, "step": 32290 }, { "epoch": 8.054862842892769, "grad_norm": 8.595766067504883, "learning_rate": 1.1948877805486285e-05, "loss": 0.3967, "step": 32300 }, { "epoch": 8.057356608478804, "grad_norm": 6.696188926696777, "learning_rate": 1.194638403990025e-05, "loss": 0.3993, "step": 32310 }, { "epoch": 8.059850374064839, "grad_norm": 5.798731327056885, "learning_rate": 1.1943890274314216e-05, "loss": 0.3159, "step": 32320 }, { "epoch": 8.062344139650873, "grad_norm": 6.5133891105651855, "learning_rate": 1.1941396508728181e-05, "loss": 0.3556, "step": 32330 }, { "epoch": 8.064837905236908, "grad_norm": 8.15516471862793, "learning_rate": 1.1938902743142146e-05, "loss": 0.3533, "step": 32340 }, { "epoch": 8.067331670822943, "grad_norm": 9.06305980682373, "learning_rate": 1.1936408977556111e-05, "loss": 0.419, "step": 32350 }, { "epoch": 8.069825436408978, "grad_norm": 5.8989644050598145, "learning_rate": 1.1933915211970075e-05, "loss": 0.3469, "step": 32360 }, { "epoch": 8.072319201995013, "grad_norm": 4.38079833984375, "learning_rate": 1.1931421446384042e-05, "loss": 0.3461, "step": 32370 }, { "epoch": 8.074812967581048, "grad_norm": 7.34318208694458, "learning_rate": 1.1928927680798006e-05, "loss": 0.3793, "step": 32380 }, { "epoch": 8.077306733167083, "grad_norm": 6.742260932922363, "learning_rate": 1.192643391521197e-05, "loss": 0.3649, "step": 32390 }, { "epoch": 8.079800498753118, "grad_norm": 6.300445079803467, "learning_rate": 1.1923940149625936e-05, "loss": 0.3527, "step": 32400 }, { "epoch": 8.082294264339152, "grad_norm": 12.198246002197266, "learning_rate": 1.1921446384039902e-05, "loss": 0.3832, "step": 32410 }, { "epoch": 8.084788029925187, "grad_norm": 6.432736873626709, "learning_rate": 1.1918952618453867e-05, "loss": 0.3424, "step": 32420 }, { "epoch": 8.087281795511222, "grad_norm": 7.7809271812438965, "learning_rate": 1.1916458852867832e-05, "loss": 0.3318, "step": 32430 }, { "epoch": 8.089775561097257, "grad_norm": 8.33100700378418, "learning_rate": 1.1913965087281796e-05, "loss": 0.3166, "step": 32440 }, { "epoch": 8.092269326683292, "grad_norm": 7.229568958282471, "learning_rate": 1.1911471321695763e-05, "loss": 0.4009, "step": 32450 }, { "epoch": 8.094763092269327, "grad_norm": 6.71392822265625, "learning_rate": 1.1908977556109726e-05, "loss": 0.3876, "step": 32460 }, { "epoch": 8.097256857855362, "grad_norm": 7.794308185577393, "learning_rate": 1.190648379052369e-05, "loss": 0.406, "step": 32470 }, { "epoch": 8.099750623441397, "grad_norm": 6.342446327209473, "learning_rate": 1.1903990024937657e-05, "loss": 0.3472, "step": 32480 }, { "epoch": 8.102244389027431, "grad_norm": 7.7976393699646, "learning_rate": 1.190149625935162e-05, "loss": 0.3918, "step": 32490 }, { "epoch": 8.104738154613466, "grad_norm": 5.704341411590576, "learning_rate": 1.1899002493765588e-05, "loss": 0.3502, "step": 32500 }, { "epoch": 8.107231920199501, "grad_norm": 6.822027206420898, "learning_rate": 1.1896508728179551e-05, "loss": 0.3749, "step": 32510 }, { "epoch": 8.109725685785536, "grad_norm": 6.865316867828369, "learning_rate": 1.1894014962593517e-05, "loss": 0.3452, "step": 32520 }, { "epoch": 8.11221945137157, "grad_norm": 9.309464454650879, "learning_rate": 1.1891521197007484e-05, "loss": 0.3609, "step": 32530 }, { "epoch": 8.114713216957606, "grad_norm": 6.361128807067871, "learning_rate": 1.1889027431421447e-05, "loss": 0.3868, "step": 32540 }, { "epoch": 8.11720698254364, "grad_norm": 6.661703586578369, "learning_rate": 1.1886533665835411e-05, "loss": 0.3508, "step": 32550 }, { "epoch": 8.119700748129675, "grad_norm": 5.073139667510986, "learning_rate": 1.1884039900249378e-05, "loss": 0.4003, "step": 32560 }, { "epoch": 8.12219451371571, "grad_norm": 5.183114051818848, "learning_rate": 1.1881546134663342e-05, "loss": 0.3226, "step": 32570 }, { "epoch": 8.124688279301745, "grad_norm": 6.288895130157471, "learning_rate": 1.1879052369077309e-05, "loss": 0.4334, "step": 32580 }, { "epoch": 8.12718204488778, "grad_norm": 6.090054988861084, "learning_rate": 1.1876558603491272e-05, "loss": 0.3291, "step": 32590 }, { "epoch": 8.129675810473815, "grad_norm": 7.359978675842285, "learning_rate": 1.1874064837905237e-05, "loss": 0.4043, "step": 32600 }, { "epoch": 8.13216957605985, "grad_norm": 9.150673866271973, "learning_rate": 1.1871571072319203e-05, "loss": 0.3534, "step": 32610 }, { "epoch": 8.134663341645885, "grad_norm": 10.117431640625, "learning_rate": 1.1869077306733168e-05, "loss": 0.3602, "step": 32620 }, { "epoch": 8.13715710723192, "grad_norm": 4.644679546356201, "learning_rate": 1.1866583541147133e-05, "loss": 0.3498, "step": 32630 }, { "epoch": 8.139650872817954, "grad_norm": 6.087995529174805, "learning_rate": 1.1864089775561099e-05, "loss": 0.3325, "step": 32640 }, { "epoch": 8.14214463840399, "grad_norm": 6.6635050773620605, "learning_rate": 1.1861596009975062e-05, "loss": 0.3532, "step": 32650 }, { "epoch": 8.144638403990024, "grad_norm": 7.93565559387207, "learning_rate": 1.185910224438903e-05, "loss": 0.3327, "step": 32660 }, { "epoch": 8.147132169576059, "grad_norm": 5.548285961151123, "learning_rate": 1.1856608478802993e-05, "loss": 0.409, "step": 32670 }, { "epoch": 8.149625935162096, "grad_norm": 6.113910675048828, "learning_rate": 1.1854114713216958e-05, "loss": 0.3934, "step": 32680 }, { "epoch": 8.15211970074813, "grad_norm": 8.297721862792969, "learning_rate": 1.1851620947630924e-05, "loss": 0.4034, "step": 32690 }, { "epoch": 8.154613466334165, "grad_norm": 5.073780536651611, "learning_rate": 1.1849127182044889e-05, "loss": 0.326, "step": 32700 }, { "epoch": 8.1571072319202, "grad_norm": 6.652329921722412, "learning_rate": 1.1846633416458854e-05, "loss": 0.3051, "step": 32710 }, { "epoch": 8.159600997506235, "grad_norm": 8.228754043579102, "learning_rate": 1.184413965087282e-05, "loss": 0.346, "step": 32720 }, { "epoch": 8.16209476309227, "grad_norm": 5.232443809509277, "learning_rate": 1.1841645885286783e-05, "loss": 0.3189, "step": 32730 }, { "epoch": 8.164588528678305, "grad_norm": 6.608932018280029, "learning_rate": 1.183915211970075e-05, "loss": 0.291, "step": 32740 }, { "epoch": 8.16708229426434, "grad_norm": 6.14049768447876, "learning_rate": 1.1836658354114714e-05, "loss": 0.3777, "step": 32750 }, { "epoch": 8.169576059850375, "grad_norm": 6.535306453704834, "learning_rate": 1.1834164588528679e-05, "loss": 0.3807, "step": 32760 }, { "epoch": 8.17206982543641, "grad_norm": 8.01197624206543, "learning_rate": 1.1831670822942644e-05, "loss": 0.3618, "step": 32770 }, { "epoch": 8.174563591022444, "grad_norm": 6.140148639678955, "learning_rate": 1.182917705735661e-05, "loss": 0.396, "step": 32780 }, { "epoch": 8.17705735660848, "grad_norm": 9.92581844329834, "learning_rate": 1.1826683291770575e-05, "loss": 0.4123, "step": 32790 }, { "epoch": 8.179551122194514, "grad_norm": 5.655374526977539, "learning_rate": 1.182418952618454e-05, "loss": 0.3478, "step": 32800 }, { "epoch": 8.182044887780549, "grad_norm": 6.091732501983643, "learning_rate": 1.1821695760598504e-05, "loss": 0.34, "step": 32810 }, { "epoch": 8.184538653366584, "grad_norm": 6.486967086791992, "learning_rate": 1.1819201995012471e-05, "loss": 0.3117, "step": 32820 }, { "epoch": 8.187032418952619, "grad_norm": 6.588410377502441, "learning_rate": 1.1816708229426434e-05, "loss": 0.3123, "step": 32830 }, { "epoch": 8.189526184538654, "grad_norm": 5.499619007110596, "learning_rate": 1.1814214463840401e-05, "loss": 0.3842, "step": 32840 }, { "epoch": 8.192019950124688, "grad_norm": 11.53024673461914, "learning_rate": 1.1811720698254365e-05, "loss": 0.437, "step": 32850 }, { "epoch": 8.194513715710723, "grad_norm": 9.12915325164795, "learning_rate": 1.1809226932668329e-05, "loss": 0.3859, "step": 32860 }, { "epoch": 8.197007481296758, "grad_norm": 6.353451728820801, "learning_rate": 1.1806733167082296e-05, "loss": 0.3672, "step": 32870 }, { "epoch": 8.199501246882793, "grad_norm": 8.243021965026855, "learning_rate": 1.1804239401496261e-05, "loss": 0.351, "step": 32880 }, { "epoch": 8.201995012468828, "grad_norm": 10.141286849975586, "learning_rate": 1.1801745635910225e-05, "loss": 0.3614, "step": 32890 }, { "epoch": 8.204488778054863, "grad_norm": 6.647899627685547, "learning_rate": 1.1799251870324192e-05, "loss": 0.3553, "step": 32900 }, { "epoch": 8.206982543640898, "grad_norm": 8.836320877075195, "learning_rate": 1.1796758104738155e-05, "loss": 0.3578, "step": 32910 }, { "epoch": 8.209476309226932, "grad_norm": 5.279130458831787, "learning_rate": 1.1794264339152122e-05, "loss": 0.3746, "step": 32920 }, { "epoch": 8.211970074812967, "grad_norm": 8.758357048034668, "learning_rate": 1.1791770573566086e-05, "loss": 0.3627, "step": 32930 }, { "epoch": 8.214463840399002, "grad_norm": 7.450492858886719, "learning_rate": 1.178927680798005e-05, "loss": 0.3138, "step": 32940 }, { "epoch": 8.216957605985037, "grad_norm": 7.956082820892334, "learning_rate": 1.1786783042394016e-05, "loss": 0.3486, "step": 32950 }, { "epoch": 8.219451371571072, "grad_norm": 6.743360996246338, "learning_rate": 1.178428927680798e-05, "loss": 0.3642, "step": 32960 }, { "epoch": 8.221945137157107, "grad_norm": 8.854829788208008, "learning_rate": 1.1781795511221945e-05, "loss": 0.3638, "step": 32970 }, { "epoch": 8.224438902743142, "grad_norm": 8.033832550048828, "learning_rate": 1.177930174563591e-05, "loss": 0.3792, "step": 32980 }, { "epoch": 8.226932668329177, "grad_norm": 3.877129554748535, "learning_rate": 1.1776807980049876e-05, "loss": 0.3361, "step": 32990 }, { "epoch": 8.229426433915211, "grad_norm": 7.193696975708008, "learning_rate": 1.1774314214463843e-05, "loss": 0.3768, "step": 33000 }, { "epoch": 8.231920199501246, "grad_norm": 6.511844635009766, "learning_rate": 1.1771820448877807e-05, "loss": 0.3266, "step": 33010 }, { "epoch": 8.234413965087281, "grad_norm": 7.392646789550781, "learning_rate": 1.176932668329177e-05, "loss": 0.3634, "step": 33020 }, { "epoch": 8.236907730673316, "grad_norm": 7.599452495574951, "learning_rate": 1.1766832917705737e-05, "loss": 0.4454, "step": 33030 }, { "epoch": 8.239401496259351, "grad_norm": 8.29662036895752, "learning_rate": 1.1764339152119701e-05, "loss": 0.3168, "step": 33040 }, { "epoch": 8.241895261845386, "grad_norm": 9.465046882629395, "learning_rate": 1.1761845386533666e-05, "loss": 0.4269, "step": 33050 }, { "epoch": 8.24438902743142, "grad_norm": 5.553979396820068, "learning_rate": 1.1759351620947632e-05, "loss": 0.3853, "step": 33060 }, { "epoch": 8.246882793017456, "grad_norm": 10.552316665649414, "learning_rate": 1.1756857855361597e-05, "loss": 0.3968, "step": 33070 }, { "epoch": 8.24937655860349, "grad_norm": 6.383450984954834, "learning_rate": 1.1754364089775562e-05, "loss": 0.3565, "step": 33080 }, { "epoch": 8.251870324189527, "grad_norm": 6.994113922119141, "learning_rate": 1.1751870324189527e-05, "loss": 0.3601, "step": 33090 }, { "epoch": 8.254364089775562, "grad_norm": 5.046513080596924, "learning_rate": 1.1749376558603491e-05, "loss": 0.3747, "step": 33100 }, { "epoch": 8.256857855361597, "grad_norm": 10.145176887512207, "learning_rate": 1.1746882793017458e-05, "loss": 0.4286, "step": 33110 }, { "epoch": 8.259351620947632, "grad_norm": 8.742334365844727, "learning_rate": 1.1744389027431422e-05, "loss": 0.3592, "step": 33120 }, { "epoch": 8.261845386533667, "grad_norm": 7.104617595672607, "learning_rate": 1.1741895261845389e-05, "loss": 0.3678, "step": 33130 }, { "epoch": 8.264339152119701, "grad_norm": 7.787363529205322, "learning_rate": 1.1739401496259352e-05, "loss": 0.3511, "step": 33140 }, { "epoch": 8.266832917705736, "grad_norm": 5.79642915725708, "learning_rate": 1.1736907730673318e-05, "loss": 0.3319, "step": 33150 }, { "epoch": 8.269326683291771, "grad_norm": 7.905724048614502, "learning_rate": 1.1734413965087283e-05, "loss": 0.3393, "step": 33160 }, { "epoch": 8.271820448877806, "grad_norm": 9.876928329467773, "learning_rate": 1.1731920199501248e-05, "loss": 0.3568, "step": 33170 }, { "epoch": 8.27431421446384, "grad_norm": 9.139424324035645, "learning_rate": 1.1729426433915212e-05, "loss": 0.4312, "step": 33180 }, { "epoch": 8.276807980049876, "grad_norm": 4.746652126312256, "learning_rate": 1.1726932668329179e-05, "loss": 0.3464, "step": 33190 }, { "epoch": 8.27930174563591, "grad_norm": 7.954774379730225, "learning_rate": 1.1724438902743142e-05, "loss": 0.328, "step": 33200 }, { "epoch": 8.281795511221945, "grad_norm": 8.069437980651855, "learning_rate": 1.172194513715711e-05, "loss": 0.4912, "step": 33210 }, { "epoch": 8.28428927680798, "grad_norm": 5.376629829406738, "learning_rate": 1.1719451371571073e-05, "loss": 0.339, "step": 33220 }, { "epoch": 8.286783042394015, "grad_norm": 5.5280632972717285, "learning_rate": 1.1716957605985038e-05, "loss": 0.3914, "step": 33230 }, { "epoch": 8.28927680798005, "grad_norm": 6.899628162384033, "learning_rate": 1.1714463840399004e-05, "loss": 0.362, "step": 33240 }, { "epoch": 8.291770573566085, "grad_norm": 5.153944969177246, "learning_rate": 1.1711970074812969e-05, "loss": 0.3784, "step": 33250 }, { "epoch": 8.29426433915212, "grad_norm": 5.902993679046631, "learning_rate": 1.1709476309226933e-05, "loss": 0.3422, "step": 33260 }, { "epoch": 8.296758104738155, "grad_norm": 5.776882648468018, "learning_rate": 1.17069825436409e-05, "loss": 0.3242, "step": 33270 }, { "epoch": 8.29925187032419, "grad_norm": 6.157176494598389, "learning_rate": 1.1704488778054863e-05, "loss": 0.3903, "step": 33280 }, { "epoch": 8.301745635910224, "grad_norm": 9.823055267333984, "learning_rate": 1.170199501246883e-05, "loss": 0.3959, "step": 33290 }, { "epoch": 8.30423940149626, "grad_norm": 8.448189735412598, "learning_rate": 1.1699501246882794e-05, "loss": 0.3757, "step": 33300 }, { "epoch": 8.306733167082294, "grad_norm": 8.766897201538086, "learning_rate": 1.1697007481296757e-05, "loss": 0.3782, "step": 33310 }, { "epoch": 8.309226932668329, "grad_norm": 5.299570560455322, "learning_rate": 1.1694513715710724e-05, "loss": 0.3758, "step": 33320 }, { "epoch": 8.311720698254364, "grad_norm": 5.184985637664795, "learning_rate": 1.1692019950124688e-05, "loss": 0.3417, "step": 33330 }, { "epoch": 8.314214463840399, "grad_norm": 6.362977981567383, "learning_rate": 1.1689526184538655e-05, "loss": 0.341, "step": 33340 }, { "epoch": 8.316708229426434, "grad_norm": 7.140589714050293, "learning_rate": 1.168703241895262e-05, "loss": 0.3829, "step": 33350 }, { "epoch": 8.319201995012468, "grad_norm": 7.842562675476074, "learning_rate": 1.1684538653366584e-05, "loss": 0.293, "step": 33360 }, { "epoch": 8.321695760598503, "grad_norm": 7.240875244140625, "learning_rate": 1.1682044887780551e-05, "loss": 0.4359, "step": 33370 }, { "epoch": 8.324189526184538, "grad_norm": 10.326569557189941, "learning_rate": 1.1679551122194515e-05, "loss": 0.342, "step": 33380 }, { "epoch": 8.326683291770573, "grad_norm": 8.610823631286621, "learning_rate": 1.1677057356608478e-05, "loss": 0.3688, "step": 33390 }, { "epoch": 8.329177057356608, "grad_norm": 7.092180252075195, "learning_rate": 1.1674563591022445e-05, "loss": 0.3506, "step": 33400 }, { "epoch": 8.331670822942643, "grad_norm": 6.74815559387207, "learning_rate": 1.1672069825436409e-05, "loss": 0.3515, "step": 33410 }, { "epoch": 8.334164588528678, "grad_norm": 6.766687393188477, "learning_rate": 1.1669576059850376e-05, "loss": 0.3856, "step": 33420 }, { "epoch": 8.336658354114713, "grad_norm": 7.77371883392334, "learning_rate": 1.166708229426434e-05, "loss": 0.3606, "step": 33430 }, { "epoch": 8.339152119700747, "grad_norm": 8.904953956604004, "learning_rate": 1.1664588528678305e-05, "loss": 0.4668, "step": 33440 }, { "epoch": 8.341645885286782, "grad_norm": 4.797825336456299, "learning_rate": 1.166209476309227e-05, "loss": 0.363, "step": 33450 }, { "epoch": 8.344139650872817, "grad_norm": 9.200943946838379, "learning_rate": 1.1659600997506235e-05, "loss": 0.3227, "step": 33460 }, { "epoch": 8.346633416458852, "grad_norm": 6.8330888748168945, "learning_rate": 1.1657107231920199e-05, "loss": 0.3053, "step": 33470 }, { "epoch": 8.349127182044889, "grad_norm": 8.612521171569824, "learning_rate": 1.1654613466334166e-05, "loss": 0.3805, "step": 33480 }, { "epoch": 8.351620947630924, "grad_norm": 6.210586071014404, "learning_rate": 1.165211970074813e-05, "loss": 0.396, "step": 33490 }, { "epoch": 8.354114713216958, "grad_norm": 6.151739597320557, "learning_rate": 1.1649625935162097e-05, "loss": 0.4046, "step": 33500 }, { "epoch": 8.356608478802993, "grad_norm": 7.85900354385376, "learning_rate": 1.164713216957606e-05, "loss": 0.3673, "step": 33510 }, { "epoch": 8.359102244389028, "grad_norm": 6.287712097167969, "learning_rate": 1.1644638403990026e-05, "loss": 0.3073, "step": 33520 }, { "epoch": 8.361596009975063, "grad_norm": 6.707120418548584, "learning_rate": 1.1642144638403991e-05, "loss": 0.3577, "step": 33530 }, { "epoch": 8.364089775561098, "grad_norm": 6.422928810119629, "learning_rate": 1.1639650872817956e-05, "loss": 0.3452, "step": 33540 }, { "epoch": 8.366583541147133, "grad_norm": 7.4128031730651855, "learning_rate": 1.163715710723192e-05, "loss": 0.4418, "step": 33550 }, { "epoch": 8.369077306733168, "grad_norm": 7.202417850494385, "learning_rate": 1.1634663341645887e-05, "loss": 0.4487, "step": 33560 }, { "epoch": 8.371571072319203, "grad_norm": 6.886559963226318, "learning_rate": 1.163216957605985e-05, "loss": 0.399, "step": 33570 }, { "epoch": 8.374064837905237, "grad_norm": 6.747541427612305, "learning_rate": 1.1629675810473817e-05, "loss": 0.345, "step": 33580 }, { "epoch": 8.376558603491272, "grad_norm": 5.691372871398926, "learning_rate": 1.1627182044887781e-05, "loss": 0.3636, "step": 33590 }, { "epoch": 8.379052369077307, "grad_norm": 7.294488906860352, "learning_rate": 1.1624688279301746e-05, "loss": 0.3643, "step": 33600 }, { "epoch": 8.381546134663342, "grad_norm": 6.717970371246338, "learning_rate": 1.1622194513715712e-05, "loss": 0.3814, "step": 33610 }, { "epoch": 8.384039900249377, "grad_norm": 4.82089376449585, "learning_rate": 1.1619700748129677e-05, "loss": 0.3283, "step": 33620 }, { "epoch": 8.386533665835412, "grad_norm": 10.288949966430664, "learning_rate": 1.1617206982543642e-05, "loss": 0.3937, "step": 33630 }, { "epoch": 8.389027431421447, "grad_norm": 8.169456481933594, "learning_rate": 1.1614713216957608e-05, "loss": 0.3911, "step": 33640 }, { "epoch": 8.391521197007481, "grad_norm": 4.798164367675781, "learning_rate": 1.1612219451371571e-05, "loss": 0.333, "step": 33650 }, { "epoch": 8.394014962593516, "grad_norm": 8.796186447143555, "learning_rate": 1.1609725685785538e-05, "loss": 0.3268, "step": 33660 }, { "epoch": 8.396508728179551, "grad_norm": 6.837301731109619, "learning_rate": 1.1607231920199502e-05, "loss": 0.3797, "step": 33670 }, { "epoch": 8.399002493765586, "grad_norm": 4.7810282707214355, "learning_rate": 1.1604738154613465e-05, "loss": 0.3541, "step": 33680 }, { "epoch": 8.401496259351621, "grad_norm": 7.039614200592041, "learning_rate": 1.1602244389027432e-05, "loss": 0.4047, "step": 33690 }, { "epoch": 8.403990024937656, "grad_norm": 7.48544454574585, "learning_rate": 1.1599750623441398e-05, "loss": 0.3616, "step": 33700 }, { "epoch": 8.40648379052369, "grad_norm": 12.1786470413208, "learning_rate": 1.1597256857855363e-05, "loss": 0.4326, "step": 33710 }, { "epoch": 8.408977556109726, "grad_norm": 6.31995964050293, "learning_rate": 1.1594763092269328e-05, "loss": 0.3818, "step": 33720 }, { "epoch": 8.41147132169576, "grad_norm": 9.140199661254883, "learning_rate": 1.1592269326683292e-05, "loss": 0.3978, "step": 33730 }, { "epoch": 8.413965087281795, "grad_norm": 5.953067779541016, "learning_rate": 1.1589775561097259e-05, "loss": 0.3253, "step": 33740 }, { "epoch": 8.41645885286783, "grad_norm": 6.9121246337890625, "learning_rate": 1.1587281795511223e-05, "loss": 0.3492, "step": 33750 }, { "epoch": 8.418952618453865, "grad_norm": 6.274780750274658, "learning_rate": 1.1584788029925186e-05, "loss": 0.4546, "step": 33760 }, { "epoch": 8.4214463840399, "grad_norm": 7.781808376312256, "learning_rate": 1.1582294264339153e-05, "loss": 0.3125, "step": 33770 }, { "epoch": 8.423940149625935, "grad_norm": 9.689964294433594, "learning_rate": 1.1579800498753117e-05, "loss": 0.3515, "step": 33780 }, { "epoch": 8.42643391521197, "grad_norm": 5.320454120635986, "learning_rate": 1.1577306733167084e-05, "loss": 0.2771, "step": 33790 }, { "epoch": 8.428927680798004, "grad_norm": 7.566685199737549, "learning_rate": 1.1574812967581047e-05, "loss": 0.3667, "step": 33800 }, { "epoch": 8.43142144638404, "grad_norm": 8.28944206237793, "learning_rate": 1.1572319201995013e-05, "loss": 0.3956, "step": 33810 }, { "epoch": 8.433915211970074, "grad_norm": 7.253947734832764, "learning_rate": 1.156982543640898e-05, "loss": 0.3837, "step": 33820 }, { "epoch": 8.436408977556109, "grad_norm": 9.780487060546875, "learning_rate": 1.1567331670822943e-05, "loss": 0.4626, "step": 33830 }, { "epoch": 8.438902743142144, "grad_norm": 6.923801898956299, "learning_rate": 1.156483790523691e-05, "loss": 0.3675, "step": 33840 }, { "epoch": 8.441396508728179, "grad_norm": 7.305504322052002, "learning_rate": 1.1562344139650874e-05, "loss": 0.3247, "step": 33850 }, { "epoch": 8.443890274314214, "grad_norm": 5.790426731109619, "learning_rate": 1.1559850374064838e-05, "loss": 0.3559, "step": 33860 }, { "epoch": 8.446384039900249, "grad_norm": 5.2143731117248535, "learning_rate": 1.1557356608478805e-05, "loss": 0.3547, "step": 33870 }, { "epoch": 8.448877805486283, "grad_norm": 7.238703727722168, "learning_rate": 1.1554862842892768e-05, "loss": 0.3329, "step": 33880 }, { "epoch": 8.451371571072318, "grad_norm": 5.136308193206787, "learning_rate": 1.1552369077306734e-05, "loss": 0.3662, "step": 33890 }, { "epoch": 8.453865336658355, "grad_norm": 7.3576836585998535, "learning_rate": 1.1549875311720699e-05, "loss": 0.3306, "step": 33900 }, { "epoch": 8.45635910224439, "grad_norm": 8.722187042236328, "learning_rate": 1.1547381546134664e-05, "loss": 0.3519, "step": 33910 }, { "epoch": 8.458852867830425, "grad_norm": 8.820263862609863, "learning_rate": 1.154488778054863e-05, "loss": 0.4284, "step": 33920 }, { "epoch": 8.46134663341646, "grad_norm": 8.29401683807373, "learning_rate": 1.1542394014962595e-05, "loss": 0.3871, "step": 33930 }, { "epoch": 8.463840399002494, "grad_norm": 6.295302867889404, "learning_rate": 1.1539900249376558e-05, "loss": 0.3196, "step": 33940 }, { "epoch": 8.46633416458853, "grad_norm": 7.778040885925293, "learning_rate": 1.1537406483790525e-05, "loss": 0.348, "step": 33950 }, { "epoch": 8.468827930174564, "grad_norm": 9.188948631286621, "learning_rate": 1.1534912718204489e-05, "loss": 0.3504, "step": 33960 }, { "epoch": 8.471321695760599, "grad_norm": 4.633731842041016, "learning_rate": 1.1532418952618454e-05, "loss": 0.3604, "step": 33970 }, { "epoch": 8.473815461346634, "grad_norm": 7.2224884033203125, "learning_rate": 1.152992518703242e-05, "loss": 0.3979, "step": 33980 }, { "epoch": 8.476309226932669, "grad_norm": 7.150679588317871, "learning_rate": 1.1527431421446385e-05, "loss": 0.3612, "step": 33990 }, { "epoch": 8.478802992518704, "grad_norm": 6.690948486328125, "learning_rate": 1.152493765586035e-05, "loss": 0.3037, "step": 34000 }, { "epoch": 8.481296758104738, "grad_norm": 5.986937046051025, "learning_rate": 1.1522443890274316e-05, "loss": 0.4867, "step": 34010 }, { "epoch": 8.483790523690773, "grad_norm": 6.088142395019531, "learning_rate": 1.151995012468828e-05, "loss": 0.3606, "step": 34020 }, { "epoch": 8.486284289276808, "grad_norm": 6.499013423919678, "learning_rate": 1.1517456359102246e-05, "loss": 0.3891, "step": 34030 }, { "epoch": 8.488778054862843, "grad_norm": 9.472525596618652, "learning_rate": 1.151496259351621e-05, "loss": 0.3548, "step": 34040 }, { "epoch": 8.491271820448878, "grad_norm": 7.18366813659668, "learning_rate": 1.1512468827930175e-05, "loss": 0.3939, "step": 34050 }, { "epoch": 8.493765586034913, "grad_norm": 6.115909099578857, "learning_rate": 1.150997506234414e-05, "loss": 0.4296, "step": 34060 }, { "epoch": 8.496259351620948, "grad_norm": 7.923805236816406, "learning_rate": 1.1507481296758106e-05, "loss": 0.389, "step": 34070 }, { "epoch": 8.498753117206983, "grad_norm": 6.619093418121338, "learning_rate": 1.1504987531172071e-05, "loss": 0.3364, "step": 34080 }, { "epoch": 8.501246882793017, "grad_norm": 4.969188213348389, "learning_rate": 1.1502493765586036e-05, "loss": 0.3985, "step": 34090 }, { "epoch": 8.503740648379052, "grad_norm": 5.744391441345215, "learning_rate": 1.15e-05, "loss": 0.4074, "step": 34100 }, { "epoch": 8.506234413965087, "grad_norm": 6.1294755935668945, "learning_rate": 1.1497506234413967e-05, "loss": 0.3872, "step": 34110 }, { "epoch": 8.508728179551122, "grad_norm": 7.260706901550293, "learning_rate": 1.149501246882793e-05, "loss": 0.3354, "step": 34120 }, { "epoch": 8.511221945137157, "grad_norm": 5.858883380889893, "learning_rate": 1.1492518703241898e-05, "loss": 0.3727, "step": 34130 }, { "epoch": 8.513715710723192, "grad_norm": 5.901925086975098, "learning_rate": 1.1490024937655861e-05, "loss": 0.3703, "step": 34140 }, { "epoch": 8.516209476309227, "grad_norm": 6.566878318786621, "learning_rate": 1.1487531172069825e-05, "loss": 0.3385, "step": 34150 }, { "epoch": 8.518703241895262, "grad_norm": 6.557463645935059, "learning_rate": 1.1485037406483792e-05, "loss": 0.2862, "step": 34160 }, { "epoch": 8.521197007481296, "grad_norm": 8.497570037841797, "learning_rate": 1.1482543640897757e-05, "loss": 0.3043, "step": 34170 }, { "epoch": 8.523690773067331, "grad_norm": 6.737511157989502, "learning_rate": 1.148004987531172e-05, "loss": 0.3957, "step": 34180 }, { "epoch": 8.526184538653366, "grad_norm": 7.414823055267334, "learning_rate": 1.1477556109725688e-05, "loss": 0.3243, "step": 34190 }, { "epoch": 8.528678304239401, "grad_norm": 6.70835542678833, "learning_rate": 1.1475311720698254e-05, "loss": 0.2703, "step": 34200 }, { "epoch": 8.531172069825436, "grad_norm": 6.923582077026367, "learning_rate": 1.1472817955112221e-05, "loss": 0.3621, "step": 34210 }, { "epoch": 8.53366583541147, "grad_norm": 3.6266000270843506, "learning_rate": 1.1470324189526185e-05, "loss": 0.2875, "step": 34220 }, { "epoch": 8.536159600997506, "grad_norm": 7.4206156730651855, "learning_rate": 1.1467830423940152e-05, "loss": 0.3646, "step": 34230 }, { "epoch": 8.53865336658354, "grad_norm": 5.679503440856934, "learning_rate": 1.1465336658354116e-05, "loss": 0.355, "step": 34240 }, { "epoch": 8.541147132169575, "grad_norm": 8.173449516296387, "learning_rate": 1.1462842892768081e-05, "loss": 0.3537, "step": 34250 }, { "epoch": 8.54364089775561, "grad_norm": 7.642554759979248, "learning_rate": 1.1460349127182046e-05, "loss": 0.3652, "step": 34260 }, { "epoch": 8.546134663341645, "grad_norm": 10.153780937194824, "learning_rate": 1.1457855361596012e-05, "loss": 0.4083, "step": 34270 }, { "epoch": 8.548628428927682, "grad_norm": 7.6003289222717285, "learning_rate": 1.1455361596009975e-05, "loss": 0.4111, "step": 34280 }, { "epoch": 8.551122194513717, "grad_norm": 8.038273811340332, "learning_rate": 1.1452867830423942e-05, "loss": 0.3452, "step": 34290 }, { "epoch": 8.553615960099751, "grad_norm": 7.191328048706055, "learning_rate": 1.1450374064837906e-05, "loss": 0.3575, "step": 34300 }, { "epoch": 8.556109725685786, "grad_norm": 8.192909240722656, "learning_rate": 1.1447880299251873e-05, "loss": 0.3853, "step": 34310 }, { "epoch": 8.558603491271821, "grad_norm": 7.0611677169799805, "learning_rate": 1.1445386533665836e-05, "loss": 0.3626, "step": 34320 }, { "epoch": 8.561097256857856, "grad_norm": 6.423704624176025, "learning_rate": 1.14428927680798e-05, "loss": 0.3413, "step": 34330 }, { "epoch": 8.563591022443891, "grad_norm": 7.521974086761475, "learning_rate": 1.1440399002493767e-05, "loss": 0.3469, "step": 34340 }, { "epoch": 8.566084788029926, "grad_norm": 4.919178485870361, "learning_rate": 1.143790523690773e-05, "loss": 0.3544, "step": 34350 }, { "epoch": 8.56857855361596, "grad_norm": 6.528305530548096, "learning_rate": 1.1435411471321696e-05, "loss": 0.3007, "step": 34360 }, { "epoch": 8.571072319201996, "grad_norm": 6.000070571899414, "learning_rate": 1.1432917705735663e-05, "loss": 0.3419, "step": 34370 }, { "epoch": 8.57356608478803, "grad_norm": 6.5107245445251465, "learning_rate": 1.1430423940149627e-05, "loss": 0.3416, "step": 34380 }, { "epoch": 8.576059850374065, "grad_norm": 8.171274185180664, "learning_rate": 1.1427930174563594e-05, "loss": 0.4224, "step": 34390 }, { "epoch": 8.5785536159601, "grad_norm": 7.173635959625244, "learning_rate": 1.1425436408977557e-05, "loss": 0.3584, "step": 34400 }, { "epoch": 8.581047381546135, "grad_norm": 6.059718608856201, "learning_rate": 1.142294264339152e-05, "loss": 0.3048, "step": 34410 }, { "epoch": 8.58354114713217, "grad_norm": 5.417482376098633, "learning_rate": 1.1420448877805488e-05, "loss": 0.3443, "step": 34420 }, { "epoch": 8.586034912718205, "grad_norm": 8.786724090576172, "learning_rate": 1.1417955112219451e-05, "loss": 0.4161, "step": 34430 }, { "epoch": 8.58852867830424, "grad_norm": 8.032851219177246, "learning_rate": 1.1415461346633417e-05, "loss": 0.4395, "step": 34440 }, { "epoch": 8.591022443890274, "grad_norm": 8.335599899291992, "learning_rate": 1.1412967581047382e-05, "loss": 0.3912, "step": 34450 }, { "epoch": 8.59351620947631, "grad_norm": 8.087762832641602, "learning_rate": 1.1410473815461347e-05, "loss": 0.3549, "step": 34460 }, { "epoch": 8.596009975062344, "grad_norm": 5.728715896606445, "learning_rate": 1.1407980049875313e-05, "loss": 0.3888, "step": 34470 }, { "epoch": 8.598503740648379, "grad_norm": 8.289750099182129, "learning_rate": 1.1405486284289278e-05, "loss": 0.3327, "step": 34480 }, { "epoch": 8.600997506234414, "grad_norm": 11.838362693786621, "learning_rate": 1.1402992518703242e-05, "loss": 0.3676, "step": 34490 }, { "epoch": 8.603491271820449, "grad_norm": 7.733099937438965, "learning_rate": 1.1400498753117209e-05, "loss": 0.4015, "step": 34500 }, { "epoch": 8.605985037406484, "grad_norm": 6.227921009063721, "learning_rate": 1.1398004987531172e-05, "loss": 0.3205, "step": 34510 }, { "epoch": 8.608478802992519, "grad_norm": 10.058944702148438, "learning_rate": 1.139551122194514e-05, "loss": 0.3644, "step": 34520 }, { "epoch": 8.610972568578553, "grad_norm": 5.577748775482178, "learning_rate": 1.1393017456359103e-05, "loss": 0.3545, "step": 34530 }, { "epoch": 8.613466334164588, "grad_norm": 6.9346923828125, "learning_rate": 1.1390523690773068e-05, "loss": 0.3203, "step": 34540 }, { "epoch": 8.615960099750623, "grad_norm": 7.139277935028076, "learning_rate": 1.1388029925187033e-05, "loss": 0.296, "step": 34550 }, { "epoch": 8.618453865336658, "grad_norm": 6.940938949584961, "learning_rate": 1.1385536159600999e-05, "loss": 0.339, "step": 34560 }, { "epoch": 8.620947630922693, "grad_norm": 7.233773708343506, "learning_rate": 1.1383042394014962e-05, "loss": 0.3677, "step": 34570 }, { "epoch": 8.623441396508728, "grad_norm": 11.383943557739258, "learning_rate": 1.138054862842893e-05, "loss": 0.3711, "step": 34580 }, { "epoch": 8.625935162094763, "grad_norm": 5.546501636505127, "learning_rate": 1.1378054862842893e-05, "loss": 0.4015, "step": 34590 }, { "epoch": 8.628428927680797, "grad_norm": 8.203598022460938, "learning_rate": 1.137556109725686e-05, "loss": 0.32, "step": 34600 }, { "epoch": 8.630922693266832, "grad_norm": 8.763114929199219, "learning_rate": 1.1373067331670824e-05, "loss": 0.2881, "step": 34610 }, { "epoch": 8.633416458852867, "grad_norm": 9.64941120147705, "learning_rate": 1.1370573566084789e-05, "loss": 0.4229, "step": 34620 }, { "epoch": 8.635910224438902, "grad_norm": 8.64614486694336, "learning_rate": 1.1368079800498754e-05, "loss": 0.3512, "step": 34630 }, { "epoch": 8.638403990024937, "grad_norm": 9.003388404846191, "learning_rate": 1.136558603491272e-05, "loss": 0.3894, "step": 34640 }, { "epoch": 8.640897755610972, "grad_norm": 6.2537336349487305, "learning_rate": 1.1363092269326683e-05, "loss": 0.3445, "step": 34650 }, { "epoch": 8.643391521197007, "grad_norm": 7.811208248138428, "learning_rate": 1.136059850374065e-05, "loss": 0.4709, "step": 34660 }, { "epoch": 8.645885286783042, "grad_norm": 7.517146587371826, "learning_rate": 1.1358104738154614e-05, "loss": 0.307, "step": 34670 }, { "epoch": 8.648379052369076, "grad_norm": 7.594816207885742, "learning_rate": 1.135561097256858e-05, "loss": 0.3967, "step": 34680 }, { "epoch": 8.650872817955111, "grad_norm": 6.5755510330200195, "learning_rate": 1.1353117206982544e-05, "loss": 0.3044, "step": 34690 }, { "epoch": 8.653366583541148, "grad_norm": 10.061249732971191, "learning_rate": 1.1350623441396508e-05, "loss": 0.4416, "step": 34700 }, { "epoch": 8.655860349127183, "grad_norm": 7.96301794052124, "learning_rate": 1.1348129675810475e-05, "loss": 0.4184, "step": 34710 }, { "epoch": 8.658354114713218, "grad_norm": 7.919782638549805, "learning_rate": 1.134563591022444e-05, "loss": 0.3322, "step": 34720 }, { "epoch": 8.660847880299253, "grad_norm": 8.549415588378906, "learning_rate": 1.1343142144638406e-05, "loss": 0.3588, "step": 34730 }, { "epoch": 8.663341645885287, "grad_norm": 5.7566070556640625, "learning_rate": 1.1340648379052371e-05, "loss": 0.3391, "step": 34740 }, { "epoch": 8.665835411471322, "grad_norm": 8.828804016113281, "learning_rate": 1.1338154613466335e-05, "loss": 0.3558, "step": 34750 }, { "epoch": 8.668329177057357, "grad_norm": 7.40009069442749, "learning_rate": 1.1335660847880302e-05, "loss": 0.3556, "step": 34760 }, { "epoch": 8.670822942643392, "grad_norm": 7.1716814041137695, "learning_rate": 1.1333167082294265e-05, "loss": 0.3363, "step": 34770 }, { "epoch": 8.673316708229427, "grad_norm": 8.354803085327148, "learning_rate": 1.1330673316708229e-05, "loss": 0.3615, "step": 34780 }, { "epoch": 8.675810473815462, "grad_norm": 5.095800399780273, "learning_rate": 1.1328179551122196e-05, "loss": 0.3083, "step": 34790 }, { "epoch": 8.678304239401497, "grad_norm": 8.354507446289062, "learning_rate": 1.132568578553616e-05, "loss": 0.3613, "step": 34800 }, { "epoch": 8.680798004987532, "grad_norm": 6.8850555419921875, "learning_rate": 1.1323192019950126e-05, "loss": 0.3627, "step": 34810 }, { "epoch": 8.683291770573566, "grad_norm": 7.482059001922607, "learning_rate": 1.132069825436409e-05, "loss": 0.3885, "step": 34820 }, { "epoch": 8.685785536159601, "grad_norm": 8.189881324768066, "learning_rate": 1.1318204488778055e-05, "loss": 0.3067, "step": 34830 }, { "epoch": 8.688279301745636, "grad_norm": 6.003298759460449, "learning_rate": 1.1315710723192022e-05, "loss": 0.3853, "step": 34840 }, { "epoch": 8.690773067331671, "grad_norm": 9.855851173400879, "learning_rate": 1.1313216957605986e-05, "loss": 0.426, "step": 34850 }, { "epoch": 8.693266832917706, "grad_norm": 7.669548988342285, "learning_rate": 1.131072319201995e-05, "loss": 0.3384, "step": 34860 }, { "epoch": 8.69576059850374, "grad_norm": 7.204586029052734, "learning_rate": 1.1308229426433917e-05, "loss": 0.3972, "step": 34870 }, { "epoch": 8.698254364089776, "grad_norm": 5.998164176940918, "learning_rate": 1.130573566084788e-05, "loss": 0.3572, "step": 34880 }, { "epoch": 8.70074812967581, "grad_norm": 7.780731201171875, "learning_rate": 1.1303241895261847e-05, "loss": 0.3469, "step": 34890 }, { "epoch": 8.703241895261845, "grad_norm": 8.073392868041992, "learning_rate": 1.130074812967581e-05, "loss": 0.3723, "step": 34900 }, { "epoch": 8.70573566084788, "grad_norm": 5.444243907928467, "learning_rate": 1.1298254364089776e-05, "loss": 0.3219, "step": 34910 }, { "epoch": 8.708229426433915, "grad_norm": 10.334959030151367, "learning_rate": 1.1295760598503741e-05, "loss": 0.3386, "step": 34920 }, { "epoch": 8.71072319201995, "grad_norm": 8.497328758239746, "learning_rate": 1.1293266832917707e-05, "loss": 0.3967, "step": 34930 }, { "epoch": 8.713216957605985, "grad_norm": 6.893999099731445, "learning_rate": 1.129077306733167e-05, "loss": 0.2833, "step": 34940 }, { "epoch": 8.71571072319202, "grad_norm": 6.12678337097168, "learning_rate": 1.1288279301745637e-05, "loss": 0.3801, "step": 34950 }, { "epoch": 8.718204488778055, "grad_norm": 4.06450080871582, "learning_rate": 1.1285785536159601e-05, "loss": 0.3047, "step": 34960 }, { "epoch": 8.72069825436409, "grad_norm": 6.966952323913574, "learning_rate": 1.1283291770573568e-05, "loss": 0.371, "step": 34970 }, { "epoch": 8.723192019950124, "grad_norm": 6.376713752746582, "learning_rate": 1.1280798004987532e-05, "loss": 0.3878, "step": 34980 }, { "epoch": 8.72568578553616, "grad_norm": 7.877644062042236, "learning_rate": 1.1278304239401497e-05, "loss": 0.3533, "step": 34990 }, { "epoch": 8.728179551122194, "grad_norm": 4.689276218414307, "learning_rate": 1.1275810473815462e-05, "loss": 0.3081, "step": 35000 }, { "epoch": 8.730673316708229, "grad_norm": 8.68629264831543, "learning_rate": 1.1273316708229428e-05, "loss": 0.3049, "step": 35010 }, { "epoch": 8.733167082294264, "grad_norm": 6.382472991943359, "learning_rate": 1.1270822942643393e-05, "loss": 0.3957, "step": 35020 }, { "epoch": 8.735660847880299, "grad_norm": 5.842182636260986, "learning_rate": 1.1268329177057358e-05, "loss": 0.3513, "step": 35030 }, { "epoch": 8.738154613466333, "grad_norm": 5.1576948165893555, "learning_rate": 1.1265835411471322e-05, "loss": 0.3957, "step": 35040 }, { "epoch": 8.740648379052368, "grad_norm": 12.842966079711914, "learning_rate": 1.1263341645885289e-05, "loss": 0.3318, "step": 35050 }, { "epoch": 8.743142144638403, "grad_norm": 5.631074905395508, "learning_rate": 1.1260847880299252e-05, "loss": 0.2967, "step": 35060 }, { "epoch": 8.745635910224438, "grad_norm": 6.541484355926514, "learning_rate": 1.1258354114713218e-05, "loss": 0.3186, "step": 35070 }, { "epoch": 8.748129675810475, "grad_norm": 5.197242259979248, "learning_rate": 1.1255860349127183e-05, "loss": 0.3606, "step": 35080 }, { "epoch": 8.75062344139651, "grad_norm": 7.91013240814209, "learning_rate": 1.1253366583541148e-05, "loss": 0.3609, "step": 35090 }, { "epoch": 8.753117206982544, "grad_norm": 6.427094459533691, "learning_rate": 1.1250872817955114e-05, "loss": 0.4667, "step": 35100 }, { "epoch": 8.75561097256858, "grad_norm": 5.502357482910156, "learning_rate": 1.1248379052369079e-05, "loss": 0.3279, "step": 35110 }, { "epoch": 8.758104738154614, "grad_norm": 6.215767860412598, "learning_rate": 1.1245885286783043e-05, "loss": 0.3371, "step": 35120 }, { "epoch": 8.760598503740649, "grad_norm": 6.710012435913086, "learning_rate": 1.124339152119701e-05, "loss": 0.3477, "step": 35130 }, { "epoch": 8.763092269326684, "grad_norm": 7.127229690551758, "learning_rate": 1.1240897755610973e-05, "loss": 0.3687, "step": 35140 }, { "epoch": 8.765586034912719, "grad_norm": 7.16987419128418, "learning_rate": 1.1238403990024937e-05, "loss": 0.3494, "step": 35150 }, { "epoch": 8.768079800498754, "grad_norm": 8.195234298706055, "learning_rate": 1.1235910224438904e-05, "loss": 0.331, "step": 35160 }, { "epoch": 8.770573566084789, "grad_norm": 5.69235372543335, "learning_rate": 1.1233416458852869e-05, "loss": 0.3815, "step": 35170 }, { "epoch": 8.773067331670823, "grad_norm": 7.860400199890137, "learning_rate": 1.1230922693266834e-05, "loss": 0.3599, "step": 35180 }, { "epoch": 8.775561097256858, "grad_norm": 16.83560562133789, "learning_rate": 1.12284289276808e-05, "loss": 0.3941, "step": 35190 }, { "epoch": 8.778054862842893, "grad_norm": 6.330166816711426, "learning_rate": 1.1225935162094763e-05, "loss": 0.3688, "step": 35200 }, { "epoch": 8.780548628428928, "grad_norm": 6.7685418128967285, "learning_rate": 1.122344139650873e-05, "loss": 0.3861, "step": 35210 }, { "epoch": 8.783042394014963, "grad_norm": 8.012506484985352, "learning_rate": 1.1220947630922694e-05, "loss": 0.3795, "step": 35220 }, { "epoch": 8.785536159600998, "grad_norm": 7.705891132354736, "learning_rate": 1.1218453865336661e-05, "loss": 0.439, "step": 35230 }, { "epoch": 8.788029925187033, "grad_norm": 6.158017158508301, "learning_rate": 1.1215960099750625e-05, "loss": 0.3485, "step": 35240 }, { "epoch": 8.790523690773068, "grad_norm": 10.359025001525879, "learning_rate": 1.1213466334164588e-05, "loss": 0.397, "step": 35250 }, { "epoch": 8.793017456359102, "grad_norm": 6.772762298583984, "learning_rate": 1.1210972568578555e-05, "loss": 0.351, "step": 35260 }, { "epoch": 8.795511221945137, "grad_norm": 8.312684059143066, "learning_rate": 1.1208478802992519e-05, "loss": 0.3252, "step": 35270 }, { "epoch": 8.798004987531172, "grad_norm": 7.869873523712158, "learning_rate": 1.1205985037406484e-05, "loss": 0.3912, "step": 35280 }, { "epoch": 8.800498753117207, "grad_norm": 7.570711612701416, "learning_rate": 1.1203491271820451e-05, "loss": 0.3725, "step": 35290 }, { "epoch": 8.802992518703242, "grad_norm": 9.398818016052246, "learning_rate": 1.1200997506234415e-05, "loss": 0.3509, "step": 35300 }, { "epoch": 8.805486284289277, "grad_norm": 6.553288459777832, "learning_rate": 1.1198503740648382e-05, "loss": 0.3134, "step": 35310 }, { "epoch": 8.807980049875312, "grad_norm": 6.000061511993408, "learning_rate": 1.1196009975062345e-05, "loss": 0.3332, "step": 35320 }, { "epoch": 8.810473815461346, "grad_norm": 10.576332092285156, "learning_rate": 1.1193516209476309e-05, "loss": 0.4149, "step": 35330 }, { "epoch": 8.812967581047381, "grad_norm": 6.1524505615234375, "learning_rate": 1.1191022443890276e-05, "loss": 0.3426, "step": 35340 }, { "epoch": 8.815461346633416, "grad_norm": 10.099903106689453, "learning_rate": 1.118852867830424e-05, "loss": 0.3464, "step": 35350 }, { "epoch": 8.817955112219451, "grad_norm": 5.1967854499816895, "learning_rate": 1.1186034912718205e-05, "loss": 0.3209, "step": 35360 }, { "epoch": 8.820448877805486, "grad_norm": 10.381093978881836, "learning_rate": 1.118354114713217e-05, "loss": 0.3733, "step": 35370 }, { "epoch": 8.82294264339152, "grad_norm": 17.32126808166504, "learning_rate": 1.1181047381546136e-05, "loss": 0.3812, "step": 35380 }, { "epoch": 8.825436408977556, "grad_norm": 11.370584487915039, "learning_rate": 1.11785536159601e-05, "loss": 0.3574, "step": 35390 }, { "epoch": 8.82793017456359, "grad_norm": 6.021444797515869, "learning_rate": 1.1176059850374066e-05, "loss": 0.3345, "step": 35400 }, { "epoch": 8.830423940149625, "grad_norm": 9.910523414611816, "learning_rate": 1.117356608478803e-05, "loss": 0.3779, "step": 35410 }, { "epoch": 8.83291770573566, "grad_norm": 10.403486251831055, "learning_rate": 1.1171072319201997e-05, "loss": 0.2948, "step": 35420 }, { "epoch": 8.835411471321695, "grad_norm": 5.244110107421875, "learning_rate": 1.116857855361596e-05, "loss": 0.3526, "step": 35430 }, { "epoch": 8.83790523690773, "grad_norm": 8.06598949432373, "learning_rate": 1.1166084788029926e-05, "loss": 0.3763, "step": 35440 }, { "epoch": 8.840399002493765, "grad_norm": 6.2660322189331055, "learning_rate": 1.1163591022443891e-05, "loss": 0.36, "step": 35450 }, { "epoch": 8.8428927680798, "grad_norm": 7.570070266723633, "learning_rate": 1.1161097256857856e-05, "loss": 0.423, "step": 35460 }, { "epoch": 8.845386533665835, "grad_norm": 5.027857780456543, "learning_rate": 1.1158603491271822e-05, "loss": 0.3392, "step": 35470 }, { "epoch": 8.84788029925187, "grad_norm": 7.999138832092285, "learning_rate": 1.1156109725685787e-05, "loss": 0.3899, "step": 35480 }, { "epoch": 8.850374064837904, "grad_norm": 9.944479942321777, "learning_rate": 1.115361596009975e-05, "loss": 0.3727, "step": 35490 }, { "epoch": 8.85286783042394, "grad_norm": 7.199899673461914, "learning_rate": 1.1151122194513718e-05, "loss": 0.3394, "step": 35500 }, { "epoch": 8.855361596009976, "grad_norm": 5.359086036682129, "learning_rate": 1.1148628428927681e-05, "loss": 0.4158, "step": 35510 }, { "epoch": 8.85785536159601, "grad_norm": 5.6271820068359375, "learning_rate": 1.1146134663341648e-05, "loss": 0.391, "step": 35520 }, { "epoch": 8.860349127182046, "grad_norm": 7.151126861572266, "learning_rate": 1.1143640897755612e-05, "loss": 0.3635, "step": 35530 }, { "epoch": 8.86284289276808, "grad_norm": 6.151798725128174, "learning_rate": 1.1141147132169577e-05, "loss": 0.3526, "step": 35540 }, { "epoch": 8.865336658354115, "grad_norm": 6.384893417358398, "learning_rate": 1.1138653366583542e-05, "loss": 0.3955, "step": 35550 }, { "epoch": 8.86783042394015, "grad_norm": 11.764251708984375, "learning_rate": 1.1136159600997508e-05, "loss": 0.3387, "step": 35560 }, { "epoch": 8.870324189526185, "grad_norm": 10.541848182678223, "learning_rate": 1.1133665835411471e-05, "loss": 0.3911, "step": 35570 }, { "epoch": 8.87281795511222, "grad_norm": 6.850974082946777, "learning_rate": 1.1131172069825438e-05, "loss": 0.48, "step": 35580 }, { "epoch": 8.875311720698255, "grad_norm": 7.8186211585998535, "learning_rate": 1.1128678304239402e-05, "loss": 0.4199, "step": 35590 }, { "epoch": 8.87780548628429, "grad_norm": 7.9405012130737305, "learning_rate": 1.1126184538653369e-05, "loss": 0.3845, "step": 35600 }, { "epoch": 8.880299251870325, "grad_norm": 7.980791091918945, "learning_rate": 1.1123690773067333e-05, "loss": 0.431, "step": 35610 }, { "epoch": 8.88279301745636, "grad_norm": 8.639333724975586, "learning_rate": 1.1121197007481296e-05, "loss": 0.3792, "step": 35620 }, { "epoch": 8.885286783042394, "grad_norm": 6.000261306762695, "learning_rate": 1.1118703241895263e-05, "loss": 0.3846, "step": 35630 }, { "epoch": 8.88778054862843, "grad_norm": 8.1222505569458, "learning_rate": 1.1116209476309228e-05, "loss": 0.3591, "step": 35640 }, { "epoch": 8.890274314214464, "grad_norm": 9.311159133911133, "learning_rate": 1.1113715710723192e-05, "loss": 0.3771, "step": 35650 }, { "epoch": 8.892768079800499, "grad_norm": 5.505731105804443, "learning_rate": 1.1111221945137159e-05, "loss": 0.3531, "step": 35660 }, { "epoch": 8.895261845386534, "grad_norm": 11.431441307067871, "learning_rate": 1.1108728179551123e-05, "loss": 0.3913, "step": 35670 }, { "epoch": 8.897755610972569, "grad_norm": 6.601688385009766, "learning_rate": 1.110623441396509e-05, "loss": 0.374, "step": 35680 }, { "epoch": 8.900249376558603, "grad_norm": 4.69558048248291, "learning_rate": 1.1103740648379053e-05, "loss": 0.3695, "step": 35690 }, { "epoch": 8.902743142144638, "grad_norm": 6.6589131355285645, "learning_rate": 1.1101246882793017e-05, "loss": 0.3566, "step": 35700 }, { "epoch": 8.905236907730673, "grad_norm": 7.764341831207275, "learning_rate": 1.1098753117206984e-05, "loss": 0.3723, "step": 35710 }, { "epoch": 8.907730673316708, "grad_norm": 5.244799613952637, "learning_rate": 1.1096259351620948e-05, "loss": 0.3354, "step": 35720 }, { "epoch": 8.910224438902743, "grad_norm": 7.864104270935059, "learning_rate": 1.1093765586034915e-05, "loss": 0.3841, "step": 35730 }, { "epoch": 8.912718204488778, "grad_norm": 7.269486427307129, "learning_rate": 1.1091271820448878e-05, "loss": 0.4008, "step": 35740 }, { "epoch": 8.915211970074813, "grad_norm": 7.298030376434326, "learning_rate": 1.1088778054862843e-05, "loss": 0.3713, "step": 35750 }, { "epoch": 8.917705735660848, "grad_norm": 3.9201197624206543, "learning_rate": 1.108628428927681e-05, "loss": 0.3732, "step": 35760 }, { "epoch": 8.920199501246882, "grad_norm": 5.9074249267578125, "learning_rate": 1.1083790523690774e-05, "loss": 0.3367, "step": 35770 }, { "epoch": 8.922693266832917, "grad_norm": 16.71881866455078, "learning_rate": 1.1081296758104738e-05, "loss": 0.4005, "step": 35780 }, { "epoch": 8.925187032418952, "grad_norm": 7.94209623336792, "learning_rate": 1.1078802992518705e-05, "loss": 0.3229, "step": 35790 }, { "epoch": 8.927680798004987, "grad_norm": 6.1742143630981445, "learning_rate": 1.1076309226932668e-05, "loss": 0.3859, "step": 35800 }, { "epoch": 8.930174563591022, "grad_norm": 14.812112808227539, "learning_rate": 1.1073815461346635e-05, "loss": 0.4756, "step": 35810 }, { "epoch": 8.932668329177057, "grad_norm": 7.1429338455200195, "learning_rate": 1.1071321695760599e-05, "loss": 0.2895, "step": 35820 }, { "epoch": 8.935162094763092, "grad_norm": 6.503314018249512, "learning_rate": 1.1068827930174564e-05, "loss": 0.3162, "step": 35830 }, { "epoch": 8.937655860349127, "grad_norm": 8.817343711853027, "learning_rate": 1.106633416458853e-05, "loss": 0.374, "step": 35840 }, { "epoch": 8.940149625935161, "grad_norm": 5.8551740646362305, "learning_rate": 1.1063840399002495e-05, "loss": 0.4058, "step": 35850 }, { "epoch": 8.942643391521196, "grad_norm": 7.214171409606934, "learning_rate": 1.1061346633416459e-05, "loss": 0.388, "step": 35860 }, { "epoch": 8.945137157107231, "grad_norm": 6.604674816131592, "learning_rate": 1.1058852867830426e-05, "loss": 0.3192, "step": 35870 }, { "epoch": 8.947630922693268, "grad_norm": 9.637167930603027, "learning_rate": 1.1056359102244389e-05, "loss": 0.3364, "step": 35880 }, { "epoch": 8.950124688279303, "grad_norm": 6.015578746795654, "learning_rate": 1.1053865336658356e-05, "loss": 0.3099, "step": 35890 }, { "epoch": 8.952618453865338, "grad_norm": 6.540172100067139, "learning_rate": 1.105137157107232e-05, "loss": 0.4536, "step": 35900 }, { "epoch": 8.955112219451372, "grad_norm": 7.110110759735107, "learning_rate": 1.1048877805486285e-05, "loss": 0.319, "step": 35910 }, { "epoch": 8.957605985037407, "grad_norm": 5.485593318939209, "learning_rate": 1.104638403990025e-05, "loss": 0.3557, "step": 35920 }, { "epoch": 8.960099750623442, "grad_norm": 6.532005786895752, "learning_rate": 1.1043890274314216e-05, "loss": 0.3922, "step": 35930 }, { "epoch": 8.962593516209477, "grad_norm": 6.834729194641113, "learning_rate": 1.104139650872818e-05, "loss": 0.3233, "step": 35940 }, { "epoch": 8.965087281795512, "grad_norm": 5.890754699707031, "learning_rate": 1.1038902743142146e-05, "loss": 0.3753, "step": 35950 }, { "epoch": 8.967581047381547, "grad_norm": 6.0453619956970215, "learning_rate": 1.103640897755611e-05, "loss": 0.4137, "step": 35960 }, { "epoch": 8.970074812967582, "grad_norm": 7.995392322540283, "learning_rate": 1.1033915211970077e-05, "loss": 0.3435, "step": 35970 }, { "epoch": 8.972568578553616, "grad_norm": 7.464896202087402, "learning_rate": 1.103142144638404e-05, "loss": 0.3894, "step": 35980 }, { "epoch": 8.975062344139651, "grad_norm": 9.544695854187012, "learning_rate": 1.1028927680798006e-05, "loss": 0.4271, "step": 35990 }, { "epoch": 8.977556109725686, "grad_norm": 5.784707069396973, "learning_rate": 1.1026433915211971e-05, "loss": 0.3547, "step": 36000 }, { "epoch": 8.980049875311721, "grad_norm": 4.10465669631958, "learning_rate": 1.1023940149625936e-05, "loss": 0.3881, "step": 36010 }, { "epoch": 8.982543640897756, "grad_norm": 3.3419559001922607, "learning_rate": 1.1021446384039902e-05, "loss": 0.3459, "step": 36020 }, { "epoch": 8.98503740648379, "grad_norm": 7.649290561676025, "learning_rate": 1.1018952618453867e-05, "loss": 0.2882, "step": 36030 }, { "epoch": 8.987531172069826, "grad_norm": 4.2098798751831055, "learning_rate": 1.101645885286783e-05, "loss": 0.3061, "step": 36040 }, { "epoch": 8.99002493765586, "grad_norm": 7.673419952392578, "learning_rate": 1.1013965087281798e-05, "loss": 0.3267, "step": 36050 }, { "epoch": 8.992518703241895, "grad_norm": 8.080073356628418, "learning_rate": 1.1011471321695761e-05, "loss": 0.3203, "step": 36060 }, { "epoch": 8.99501246882793, "grad_norm": 4.7040205001831055, "learning_rate": 1.1008977556109725e-05, "loss": 0.3322, "step": 36070 }, { "epoch": 8.997506234413965, "grad_norm": 6.1027069091796875, "learning_rate": 1.1006483790523692e-05, "loss": 0.384, "step": 36080 }, { "epoch": 9.0, "grad_norm": 9.147186279296875, "learning_rate": 1.1003990024937656e-05, "loss": 0.4646, "step": 36090 }, { "epoch": 9.0, "eval_loss": 0.41386765241622925, "eval_runtime": 59.9541, "eval_samples_per_second": 16.729, "eval_steps_per_second": 16.729, "step": 36090 }, { "epoch": 9.002493765586035, "grad_norm": 7.474572658538818, "learning_rate": 1.1001496259351623e-05, "loss": 0.3304, "step": 36100 }, { "epoch": 9.00498753117207, "grad_norm": 4.692042827606201, "learning_rate": 1.0999002493765588e-05, "loss": 0.3193, "step": 36110 }, { "epoch": 9.007481296758105, "grad_norm": 5.829125881195068, "learning_rate": 1.0996508728179551e-05, "loss": 0.3628, "step": 36120 }, { "epoch": 9.00997506234414, "grad_norm": 7.040647029876709, "learning_rate": 1.0994014962593518e-05, "loss": 0.3537, "step": 36130 }, { "epoch": 9.012468827930174, "grad_norm": 7.125100612640381, "learning_rate": 1.0991521197007482e-05, "loss": 0.3556, "step": 36140 }, { "epoch": 9.01496259351621, "grad_norm": 8.841943740844727, "learning_rate": 1.0989027431421446e-05, "loss": 0.3286, "step": 36150 }, { "epoch": 9.017456359102244, "grad_norm": 6.718105316162109, "learning_rate": 1.0986533665835413e-05, "loss": 0.3856, "step": 36160 }, { "epoch": 9.019950124688279, "grad_norm": 6.22119665145874, "learning_rate": 1.0984039900249376e-05, "loss": 0.3323, "step": 36170 }, { "epoch": 9.022443890274314, "grad_norm": 6.809352874755859, "learning_rate": 1.0981546134663343e-05, "loss": 0.3431, "step": 36180 }, { "epoch": 9.024937655860349, "grad_norm": 8.36000919342041, "learning_rate": 1.0979052369077307e-05, "loss": 0.3702, "step": 36190 }, { "epoch": 9.027431421446384, "grad_norm": 9.731013298034668, "learning_rate": 1.0976558603491272e-05, "loss": 0.3476, "step": 36200 }, { "epoch": 9.029925187032418, "grad_norm": 5.753609657287598, "learning_rate": 1.0974064837905238e-05, "loss": 0.3427, "step": 36210 }, { "epoch": 9.032418952618453, "grad_norm": 3.485929012298584, "learning_rate": 1.0971571072319203e-05, "loss": 0.3366, "step": 36220 }, { "epoch": 9.034912718204488, "grad_norm": 5.159622669219971, "learning_rate": 1.096907730673317e-05, "loss": 0.3141, "step": 36230 }, { "epoch": 9.037406483790523, "grad_norm": 6.331995010375977, "learning_rate": 1.0966583541147134e-05, "loss": 0.3536, "step": 36240 }, { "epoch": 9.039900249376558, "grad_norm": 5.746108055114746, "learning_rate": 1.0964089775561097e-05, "loss": 0.3514, "step": 36250 }, { "epoch": 9.042394014962593, "grad_norm": 6.630804061889648, "learning_rate": 1.0961596009975064e-05, "loss": 0.3299, "step": 36260 }, { "epoch": 9.044887780548628, "grad_norm": 7.066774845123291, "learning_rate": 1.0959102244389028e-05, "loss": 0.2654, "step": 36270 }, { "epoch": 9.047381546134662, "grad_norm": 9.622215270996094, "learning_rate": 1.0956608478802993e-05, "loss": 0.3501, "step": 36280 }, { "epoch": 9.049875311720697, "grad_norm": 8.551993370056152, "learning_rate": 1.0954114713216958e-05, "loss": 0.3529, "step": 36290 }, { "epoch": 9.052369077306734, "grad_norm": 6.227419376373291, "learning_rate": 1.0951620947630924e-05, "loss": 0.3803, "step": 36300 }, { "epoch": 9.054862842892769, "grad_norm": 8.591386795043945, "learning_rate": 1.0949127182044889e-05, "loss": 0.3134, "step": 36310 }, { "epoch": 9.057356608478804, "grad_norm": 5.2157673835754395, "learning_rate": 1.0946633416458854e-05, "loss": 0.3214, "step": 36320 }, { "epoch": 9.059850374064839, "grad_norm": 5.971094131469727, "learning_rate": 1.0944139650872818e-05, "loss": 0.3343, "step": 36330 }, { "epoch": 9.062344139650873, "grad_norm": 7.728214263916016, "learning_rate": 1.0941645885286785e-05, "loss": 0.4131, "step": 36340 }, { "epoch": 9.064837905236908, "grad_norm": 9.236366271972656, "learning_rate": 1.0939401496259351e-05, "loss": 0.3518, "step": 36350 }, { "epoch": 9.067331670822943, "grad_norm": 4.353968620300293, "learning_rate": 1.0936907730673318e-05, "loss": 0.352, "step": 36360 }, { "epoch": 9.069825436408978, "grad_norm": 9.550552368164062, "learning_rate": 1.0934413965087282e-05, "loss": 0.4209, "step": 36370 }, { "epoch": 9.072319201995013, "grad_norm": 8.467497825622559, "learning_rate": 1.0931920199501247e-05, "loss": 0.3342, "step": 36380 }, { "epoch": 9.074812967581048, "grad_norm": 11.831032752990723, "learning_rate": 1.0929675810473817e-05, "loss": 0.3481, "step": 36390 }, { "epoch": 9.077306733167083, "grad_norm": 5.57447624206543, "learning_rate": 1.0927182044887781e-05, "loss": 0.2991, "step": 36400 }, { "epoch": 9.079800498753118, "grad_norm": 10.63701057434082, "learning_rate": 1.0924688279301748e-05, "loss": 0.3668, "step": 36410 }, { "epoch": 9.082294264339152, "grad_norm": 9.0299711227417, "learning_rate": 1.0922194513715712e-05, "loss": 0.4303, "step": 36420 }, { "epoch": 9.084788029925187, "grad_norm": 6.327706813812256, "learning_rate": 1.0919700748129675e-05, "loss": 0.3057, "step": 36430 }, { "epoch": 9.087281795511222, "grad_norm": 10.897625923156738, "learning_rate": 1.0917206982543642e-05, "loss": 0.3726, "step": 36440 }, { "epoch": 9.089775561097257, "grad_norm": 8.112853050231934, "learning_rate": 1.0914713216957606e-05, "loss": 0.3477, "step": 36450 }, { "epoch": 9.092269326683292, "grad_norm": 7.813316345214844, "learning_rate": 1.0912219451371573e-05, "loss": 0.2972, "step": 36460 }, { "epoch": 9.094763092269327, "grad_norm": 8.015786170959473, "learning_rate": 1.0909725685785536e-05, "loss": 0.3787, "step": 36470 }, { "epoch": 9.097256857855362, "grad_norm": 6.349668502807617, "learning_rate": 1.0907231920199502e-05, "loss": 0.3598, "step": 36480 }, { "epoch": 9.099750623441397, "grad_norm": 5.772064208984375, "learning_rate": 1.0904738154613467e-05, "loss": 0.32, "step": 36490 }, { "epoch": 9.102244389027431, "grad_norm": 6.944436073303223, "learning_rate": 1.0902244389027432e-05, "loss": 0.3449, "step": 36500 }, { "epoch": 9.104738154613466, "grad_norm": 6.625913619995117, "learning_rate": 1.0899750623441396e-05, "loss": 0.3236, "step": 36510 }, { "epoch": 9.107231920199501, "grad_norm": 6.98844051361084, "learning_rate": 1.0897256857855363e-05, "loss": 0.3226, "step": 36520 }, { "epoch": 9.109725685785536, "grad_norm": 10.177889823913574, "learning_rate": 1.0894763092269327e-05, "loss": 0.3706, "step": 36530 }, { "epoch": 9.11221945137157, "grad_norm": 9.637530326843262, "learning_rate": 1.0892269326683294e-05, "loss": 0.3774, "step": 36540 }, { "epoch": 9.114713216957606, "grad_norm": 6.966104030609131, "learning_rate": 1.0889775561097257e-05, "loss": 0.3834, "step": 36550 }, { "epoch": 9.11720698254364, "grad_norm": 4.891960620880127, "learning_rate": 1.0887281795511223e-05, "loss": 0.3099, "step": 36560 }, { "epoch": 9.119700748129675, "grad_norm": 12.025975227355957, "learning_rate": 1.0884788029925188e-05, "loss": 0.3532, "step": 36570 }, { "epoch": 9.12219451371571, "grad_norm": 7.283741474151611, "learning_rate": 1.0882294264339153e-05, "loss": 0.3582, "step": 36580 }, { "epoch": 9.124688279301745, "grad_norm": 16.89406967163086, "learning_rate": 1.0879800498753118e-05, "loss": 0.42, "step": 36590 }, { "epoch": 9.12718204488778, "grad_norm": 9.610578536987305, "learning_rate": 1.0877306733167084e-05, "loss": 0.3352, "step": 36600 }, { "epoch": 9.129675810473815, "grad_norm": 6.792508602142334, "learning_rate": 1.0874812967581047e-05, "loss": 0.3906, "step": 36610 }, { "epoch": 9.13216957605985, "grad_norm": 6.052822113037109, "learning_rate": 1.0872319201995014e-05, "loss": 0.3852, "step": 36620 }, { "epoch": 9.134663341645885, "grad_norm": 8.799402236938477, "learning_rate": 1.0869825436408978e-05, "loss": 0.4047, "step": 36630 }, { "epoch": 9.13715710723192, "grad_norm": 8.114533424377441, "learning_rate": 1.0867331670822943e-05, "loss": 0.3871, "step": 36640 }, { "epoch": 9.139650872817954, "grad_norm": 9.247639656066895, "learning_rate": 1.0864837905236909e-05, "loss": 0.3993, "step": 36650 }, { "epoch": 9.14214463840399, "grad_norm": 7.904721736907959, "learning_rate": 1.0862344139650874e-05, "loss": 0.3441, "step": 36660 }, { "epoch": 9.144638403990024, "grad_norm": 12.610846519470215, "learning_rate": 1.085985037406484e-05, "loss": 0.3699, "step": 36670 }, { "epoch": 9.147132169576059, "grad_norm": 5.233782768249512, "learning_rate": 1.0857356608478805e-05, "loss": 0.3623, "step": 36680 }, { "epoch": 9.149625935162096, "grad_norm": 6.99005651473999, "learning_rate": 1.0854862842892768e-05, "loss": 0.3476, "step": 36690 }, { "epoch": 9.15211970074813, "grad_norm": 11.361775398254395, "learning_rate": 1.0852369077306735e-05, "loss": 0.412, "step": 36700 }, { "epoch": 9.154613466334165, "grad_norm": 6.979437828063965, "learning_rate": 1.0849875311720699e-05, "loss": 0.4043, "step": 36710 }, { "epoch": 9.1571072319202, "grad_norm": 6.4810943603515625, "learning_rate": 1.0847381546134662e-05, "loss": 0.3768, "step": 36720 }, { "epoch": 9.159600997506235, "grad_norm": 10.649642944335938, "learning_rate": 1.084488778054863e-05, "loss": 0.3687, "step": 36730 }, { "epoch": 9.16209476309227, "grad_norm": 10.065546035766602, "learning_rate": 1.0842394014962595e-05, "loss": 0.3891, "step": 36740 }, { "epoch": 9.164588528678305, "grad_norm": 7.575584411621094, "learning_rate": 1.083990024937656e-05, "loss": 0.3816, "step": 36750 }, { "epoch": 9.16708229426434, "grad_norm": 10.201583862304688, "learning_rate": 1.0837406483790525e-05, "loss": 0.3173, "step": 36760 }, { "epoch": 9.169576059850375, "grad_norm": 10.348787307739258, "learning_rate": 1.0834912718204489e-05, "loss": 0.4488, "step": 36770 }, { "epoch": 9.17206982543641, "grad_norm": 6.726578235626221, "learning_rate": 1.0832418952618456e-05, "loss": 0.3276, "step": 36780 }, { "epoch": 9.174563591022444, "grad_norm": 8.635772705078125, "learning_rate": 1.082992518703242e-05, "loss": 0.3571, "step": 36790 }, { "epoch": 9.17705735660848, "grad_norm": 9.382979393005371, "learning_rate": 1.0827431421446383e-05, "loss": 0.3251, "step": 36800 }, { "epoch": 9.179551122194514, "grad_norm": 5.400996208190918, "learning_rate": 1.082493765586035e-05, "loss": 0.3253, "step": 36810 }, { "epoch": 9.182044887780549, "grad_norm": 6.488340854644775, "learning_rate": 1.0822443890274314e-05, "loss": 0.3224, "step": 36820 }, { "epoch": 9.184538653366584, "grad_norm": 6.428625583648682, "learning_rate": 1.081995012468828e-05, "loss": 0.3234, "step": 36830 }, { "epoch": 9.187032418952619, "grad_norm": 7.193785667419434, "learning_rate": 1.0817456359102244e-05, "loss": 0.2956, "step": 36840 }, { "epoch": 9.189526184538654, "grad_norm": 9.758343696594238, "learning_rate": 1.081496259351621e-05, "loss": 0.3444, "step": 36850 }, { "epoch": 9.192019950124688, "grad_norm": 6.714698314666748, "learning_rate": 1.0812468827930177e-05, "loss": 0.4052, "step": 36860 }, { "epoch": 9.194513715710723, "grad_norm": 8.280781745910645, "learning_rate": 1.080997506234414e-05, "loss": 0.3476, "step": 36870 }, { "epoch": 9.197007481296758, "grad_norm": 9.618976593017578, "learning_rate": 1.0807481296758107e-05, "loss": 0.3929, "step": 36880 }, { "epoch": 9.199501246882793, "grad_norm": 8.107794761657715, "learning_rate": 1.0804987531172071e-05, "loss": 0.3866, "step": 36890 }, { "epoch": 9.201995012468828, "grad_norm": 6.555667400360107, "learning_rate": 1.0802493765586035e-05, "loss": 0.3293, "step": 36900 }, { "epoch": 9.204488778054863, "grad_norm": 5.286886215209961, "learning_rate": 1.0800000000000002e-05, "loss": 0.3245, "step": 36910 }, { "epoch": 9.206982543640898, "grad_norm": 16.370393753051758, "learning_rate": 1.0797506234413965e-05, "loss": 0.3701, "step": 36920 }, { "epoch": 9.209476309226932, "grad_norm": 8.964117050170898, "learning_rate": 1.079501246882793e-05, "loss": 0.4376, "step": 36930 }, { "epoch": 9.211970074812967, "grad_norm": 11.275566101074219, "learning_rate": 1.0792518703241896e-05, "loss": 0.3203, "step": 36940 }, { "epoch": 9.214463840399002, "grad_norm": 7.305138111114502, "learning_rate": 1.0790024937655861e-05, "loss": 0.3401, "step": 36950 }, { "epoch": 9.216957605985037, "grad_norm": 8.845014572143555, "learning_rate": 1.0787531172069826e-05, "loss": 0.3508, "step": 36960 }, { "epoch": 9.219451371571072, "grad_norm": 7.507462024688721, "learning_rate": 1.0785037406483792e-05, "loss": 0.3717, "step": 36970 }, { "epoch": 9.221945137157107, "grad_norm": 6.304407119750977, "learning_rate": 1.0782543640897755e-05, "loss": 0.4225, "step": 36980 }, { "epoch": 9.224438902743142, "grad_norm": 6.4769511222839355, "learning_rate": 1.0780049875311722e-05, "loss": 0.3015, "step": 36990 }, { "epoch": 9.226932668329177, "grad_norm": 5.64371395111084, "learning_rate": 1.0777556109725686e-05, "loss": 0.3666, "step": 37000 }, { "epoch": 9.229426433915211, "grad_norm": 7.976513385772705, "learning_rate": 1.0775062344139651e-05, "loss": 0.3437, "step": 37010 }, { "epoch": 9.231920199501246, "grad_norm": 8.061870574951172, "learning_rate": 1.0772568578553617e-05, "loss": 0.4176, "step": 37020 }, { "epoch": 9.234413965087281, "grad_norm": 6.633289813995361, "learning_rate": 1.0770074812967582e-05, "loss": 0.3978, "step": 37030 }, { "epoch": 9.236907730673316, "grad_norm": 6.083536624908447, "learning_rate": 1.0767581047381547e-05, "loss": 0.3067, "step": 37040 }, { "epoch": 9.239401496259351, "grad_norm": 9.767627716064453, "learning_rate": 1.0765087281795513e-05, "loss": 0.449, "step": 37050 }, { "epoch": 9.241895261845386, "grad_norm": 6.643617153167725, "learning_rate": 1.0762593516209476e-05, "loss": 0.4396, "step": 37060 }, { "epoch": 9.24438902743142, "grad_norm": 13.932653427124023, "learning_rate": 1.0760099750623443e-05, "loss": 0.2836, "step": 37070 }, { "epoch": 9.246882793017456, "grad_norm": 8.449307441711426, "learning_rate": 1.0757605985037407e-05, "loss": 0.3397, "step": 37080 }, { "epoch": 9.24937655860349, "grad_norm": 4.122343063354492, "learning_rate": 1.0755112219451374e-05, "loss": 0.3449, "step": 37090 }, { "epoch": 9.251870324189527, "grad_norm": 5.507016658782959, "learning_rate": 1.0752618453865337e-05, "loss": 0.3574, "step": 37100 }, { "epoch": 9.254364089775562, "grad_norm": 4.903787136077881, "learning_rate": 1.0750124688279303e-05, "loss": 0.3682, "step": 37110 }, { "epoch": 9.256857855361597, "grad_norm": 10.680002212524414, "learning_rate": 1.0747630922693268e-05, "loss": 0.2884, "step": 37120 }, { "epoch": 9.259351620947632, "grad_norm": 7.1772284507751465, "learning_rate": 1.0745137157107233e-05, "loss": 0.3803, "step": 37130 }, { "epoch": 9.261845386533667, "grad_norm": 5.805769920349121, "learning_rate": 1.0742643391521197e-05, "loss": 0.3874, "step": 37140 }, { "epoch": 9.264339152119701, "grad_norm": 6.096296787261963, "learning_rate": 1.0740149625935164e-05, "loss": 0.2803, "step": 37150 }, { "epoch": 9.266832917705736, "grad_norm": 9.108000755310059, "learning_rate": 1.0737655860349128e-05, "loss": 0.4142, "step": 37160 }, { "epoch": 9.269326683291771, "grad_norm": 8.92357063293457, "learning_rate": 1.0735162094763095e-05, "loss": 0.3979, "step": 37170 }, { "epoch": 9.271820448877806, "grad_norm": 7.647791862487793, "learning_rate": 1.0732668329177058e-05, "loss": 0.3712, "step": 37180 }, { "epoch": 9.27431421446384, "grad_norm": 8.077733993530273, "learning_rate": 1.0730174563591022e-05, "loss": 0.334, "step": 37190 }, { "epoch": 9.276807980049876, "grad_norm": 9.143230438232422, "learning_rate": 1.0727680798004989e-05, "loss": 0.3473, "step": 37200 }, { "epoch": 9.27930174563591, "grad_norm": 9.733719825744629, "learning_rate": 1.0725187032418954e-05, "loss": 0.329, "step": 37210 }, { "epoch": 9.281795511221945, "grad_norm": 6.865516185760498, "learning_rate": 1.0722693266832918e-05, "loss": 0.4986, "step": 37220 }, { "epoch": 9.28428927680798, "grad_norm": 6.8000664710998535, "learning_rate": 1.0720199501246885e-05, "loss": 0.3473, "step": 37230 }, { "epoch": 9.286783042394015, "grad_norm": 5.607472896575928, "learning_rate": 1.0717705735660848e-05, "loss": 0.3789, "step": 37240 }, { "epoch": 9.28927680798005, "grad_norm": 8.08205795288086, "learning_rate": 1.0715211970074815e-05, "loss": 0.4234, "step": 37250 }, { "epoch": 9.291770573566085, "grad_norm": 5.731466293334961, "learning_rate": 1.0712718204488779e-05, "loss": 0.3009, "step": 37260 }, { "epoch": 9.29426433915212, "grad_norm": 8.150042533874512, "learning_rate": 1.0710224438902743e-05, "loss": 0.382, "step": 37270 }, { "epoch": 9.296758104738155, "grad_norm": 4.996364116668701, "learning_rate": 1.070773067331671e-05, "loss": 0.319, "step": 37280 }, { "epoch": 9.29925187032419, "grad_norm": 5.883243083953857, "learning_rate": 1.0705236907730673e-05, "loss": 0.3691, "step": 37290 }, { "epoch": 9.301745635910224, "grad_norm": 8.430337905883789, "learning_rate": 1.070274314214464e-05, "loss": 0.3654, "step": 37300 }, { "epoch": 9.30423940149626, "grad_norm": 5.940845012664795, "learning_rate": 1.0700249376558604e-05, "loss": 0.4136, "step": 37310 }, { "epoch": 9.306733167082294, "grad_norm": 8.069353103637695, "learning_rate": 1.069775561097257e-05, "loss": 0.3543, "step": 37320 }, { "epoch": 9.309226932668329, "grad_norm": 7.512514591217041, "learning_rate": 1.0695261845386536e-05, "loss": 0.3448, "step": 37330 }, { "epoch": 9.311720698254364, "grad_norm": 8.438852310180664, "learning_rate": 1.06927680798005e-05, "loss": 0.3801, "step": 37340 }, { "epoch": 9.314214463840399, "grad_norm": 6.524256229400635, "learning_rate": 1.0690274314214463e-05, "loss": 0.43, "step": 37350 }, { "epoch": 9.316708229426434, "grad_norm": 7.598324298858643, "learning_rate": 1.068778054862843e-05, "loss": 0.3323, "step": 37360 }, { "epoch": 9.319201995012468, "grad_norm": 6.724202632904053, "learning_rate": 1.0685286783042394e-05, "loss": 0.3265, "step": 37370 }, { "epoch": 9.321695760598503, "grad_norm": 6.113863945007324, "learning_rate": 1.0682793017456361e-05, "loss": 0.3499, "step": 37380 }, { "epoch": 9.324189526184538, "grad_norm": 5.505059719085693, "learning_rate": 1.0680299251870325e-05, "loss": 0.4184, "step": 37390 }, { "epoch": 9.326683291770573, "grad_norm": 7.815418243408203, "learning_rate": 1.067780548628429e-05, "loss": 0.4034, "step": 37400 }, { "epoch": 9.329177057356608, "grad_norm": 7.3198957443237305, "learning_rate": 1.0675311720698255e-05, "loss": 0.3815, "step": 37410 }, { "epoch": 9.331670822942643, "grad_norm": 5.2639570236206055, "learning_rate": 1.067281795511222e-05, "loss": 0.4119, "step": 37420 }, { "epoch": 9.334164588528678, "grad_norm": 7.291960716247559, "learning_rate": 1.0670324189526184e-05, "loss": 0.3337, "step": 37430 }, { "epoch": 9.336658354114713, "grad_norm": 6.731927871704102, "learning_rate": 1.0667830423940151e-05, "loss": 0.3206, "step": 37440 }, { "epoch": 9.339152119700747, "grad_norm": 6.022372722625732, "learning_rate": 1.0665336658354115e-05, "loss": 0.3708, "step": 37450 }, { "epoch": 9.341645885286782, "grad_norm": 8.287287712097168, "learning_rate": 1.0662842892768082e-05, "loss": 0.3234, "step": 37460 }, { "epoch": 9.344139650872817, "grad_norm": 5.861464023590088, "learning_rate": 1.0660349127182045e-05, "loss": 0.3365, "step": 37470 }, { "epoch": 9.346633416458852, "grad_norm": 7.660949230194092, "learning_rate": 1.065785536159601e-05, "loss": 0.4347, "step": 37480 }, { "epoch": 9.349127182044889, "grad_norm": 7.541104793548584, "learning_rate": 1.0655361596009976e-05, "loss": 0.3541, "step": 37490 }, { "epoch": 9.351620947630924, "grad_norm": 6.571524620056152, "learning_rate": 1.0652867830423941e-05, "loss": 0.3529, "step": 37500 }, { "epoch": 9.354114713216958, "grad_norm": 7.663899898529053, "learning_rate": 1.0650374064837905e-05, "loss": 0.3262, "step": 37510 }, { "epoch": 9.356608478802993, "grad_norm": 5.076836109161377, "learning_rate": 1.0647880299251872e-05, "loss": 0.3693, "step": 37520 }, { "epoch": 9.359102244389028, "grad_norm": 8.110295295715332, "learning_rate": 1.0645386533665836e-05, "loss": 0.3447, "step": 37530 }, { "epoch": 9.361596009975063, "grad_norm": 9.976151466369629, "learning_rate": 1.0642892768079803e-05, "loss": 0.3858, "step": 37540 }, { "epoch": 9.364089775561098, "grad_norm": 5.691299915313721, "learning_rate": 1.0640399002493766e-05, "loss": 0.3673, "step": 37550 }, { "epoch": 9.366583541147133, "grad_norm": 6.755293369293213, "learning_rate": 1.0637905236907732e-05, "loss": 0.4159, "step": 37560 }, { "epoch": 9.369077306733168, "grad_norm": 6.009609699249268, "learning_rate": 1.0635411471321697e-05, "loss": 0.3256, "step": 37570 }, { "epoch": 9.371571072319203, "grad_norm": 8.494285583496094, "learning_rate": 1.0632917705735662e-05, "loss": 0.3694, "step": 37580 }, { "epoch": 9.374064837905237, "grad_norm": 5.737974643707275, "learning_rate": 1.0630423940149627e-05, "loss": 0.3142, "step": 37590 }, { "epoch": 9.376558603491272, "grad_norm": 6.128436088562012, "learning_rate": 1.0627930174563593e-05, "loss": 0.3198, "step": 37600 }, { "epoch": 9.379052369077307, "grad_norm": 7.005685806274414, "learning_rate": 1.0625436408977556e-05, "loss": 0.2682, "step": 37610 }, { "epoch": 9.381546134663342, "grad_norm": 9.262737274169922, "learning_rate": 1.0622942643391523e-05, "loss": 0.3833, "step": 37620 }, { "epoch": 9.384039900249377, "grad_norm": 7.676625728607178, "learning_rate": 1.0620448877805487e-05, "loss": 0.3851, "step": 37630 }, { "epoch": 9.386533665835412, "grad_norm": 7.627851486206055, "learning_rate": 1.061795511221945e-05, "loss": 0.3433, "step": 37640 }, { "epoch": 9.389027431421447, "grad_norm": 7.219352722167969, "learning_rate": 1.0615461346633418e-05, "loss": 0.3822, "step": 37650 }, { "epoch": 9.391521197007481, "grad_norm": 6.273348808288574, "learning_rate": 1.0612967581047381e-05, "loss": 0.3067, "step": 37660 }, { "epoch": 9.394014962593516, "grad_norm": 7.28784704208374, "learning_rate": 1.0610473815461348e-05, "loss": 0.3483, "step": 37670 }, { "epoch": 9.396508728179551, "grad_norm": 7.136936187744141, "learning_rate": 1.0607980049875314e-05, "loss": 0.3342, "step": 37680 }, { "epoch": 9.399002493765586, "grad_norm": 9.429666519165039, "learning_rate": 1.0605486284289277e-05, "loss": 0.387, "step": 37690 }, { "epoch": 9.401496259351621, "grad_norm": 10.183398246765137, "learning_rate": 1.0602992518703244e-05, "loss": 0.3281, "step": 37700 }, { "epoch": 9.403990024937656, "grad_norm": 9.305542945861816, "learning_rate": 1.0600498753117208e-05, "loss": 0.422, "step": 37710 }, { "epoch": 9.40648379052369, "grad_norm": 13.953811645507812, "learning_rate": 1.0598004987531171e-05, "loss": 0.4099, "step": 37720 }, { "epoch": 9.408977556109726, "grad_norm": 7.635592460632324, "learning_rate": 1.0595511221945138e-05, "loss": 0.3641, "step": 37730 }, { "epoch": 9.41147132169576, "grad_norm": 8.282495498657227, "learning_rate": 1.0593017456359102e-05, "loss": 0.3229, "step": 37740 }, { "epoch": 9.413965087281795, "grad_norm": 6.074685096740723, "learning_rate": 1.0590523690773069e-05, "loss": 0.3637, "step": 37750 }, { "epoch": 9.41645885286783, "grad_norm": 5.761679172515869, "learning_rate": 1.0588029925187033e-05, "loss": 0.3487, "step": 37760 }, { "epoch": 9.418952618453865, "grad_norm": 9.500771522521973, "learning_rate": 1.0585536159600998e-05, "loss": 0.3125, "step": 37770 }, { "epoch": 9.4214463840399, "grad_norm": 8.102692604064941, "learning_rate": 1.0583042394014963e-05, "loss": 0.3755, "step": 37780 }, { "epoch": 9.423940149625935, "grad_norm": 7.283730506896973, "learning_rate": 1.0580548628428929e-05, "loss": 0.3766, "step": 37790 }, { "epoch": 9.42643391521197, "grad_norm": 8.951366424560547, "learning_rate": 1.0578054862842896e-05, "loss": 0.4029, "step": 37800 }, { "epoch": 9.428927680798004, "grad_norm": 5.814979076385498, "learning_rate": 1.057556109725686e-05, "loss": 0.3822, "step": 37810 }, { "epoch": 9.43142144638404, "grad_norm": 5.397665977478027, "learning_rate": 1.0573067331670823e-05, "loss": 0.3395, "step": 37820 }, { "epoch": 9.433915211970074, "grad_norm": 8.175272941589355, "learning_rate": 1.057057356608479e-05, "loss": 0.3406, "step": 37830 }, { "epoch": 9.436408977556109, "grad_norm": 9.08521556854248, "learning_rate": 1.0568079800498753e-05, "loss": 0.3476, "step": 37840 }, { "epoch": 9.438902743142144, "grad_norm": 5.0024518966674805, "learning_rate": 1.0565586034912719e-05, "loss": 0.4372, "step": 37850 }, { "epoch": 9.441396508728179, "grad_norm": 9.28567886352539, "learning_rate": 1.0563092269326684e-05, "loss": 0.3301, "step": 37860 }, { "epoch": 9.443890274314214, "grad_norm": 10.780661582946777, "learning_rate": 1.056059850374065e-05, "loss": 0.3487, "step": 37870 }, { "epoch": 9.446384039900249, "grad_norm": 6.956487655639648, "learning_rate": 1.0558104738154615e-05, "loss": 0.3411, "step": 37880 }, { "epoch": 9.448877805486283, "grad_norm": 8.598062515258789, "learning_rate": 1.055561097256858e-05, "loss": 0.406, "step": 37890 }, { "epoch": 9.451371571072318, "grad_norm": 7.002991676330566, "learning_rate": 1.0553117206982544e-05, "loss": 0.3886, "step": 37900 }, { "epoch": 9.453865336658355, "grad_norm": 6.1540846824646, "learning_rate": 1.055062344139651e-05, "loss": 0.3386, "step": 37910 }, { "epoch": 9.45635910224439, "grad_norm": 4.934481620788574, "learning_rate": 1.0548129675810474e-05, "loss": 0.3195, "step": 37920 }, { "epoch": 9.458852867830425, "grad_norm": 7.763730525970459, "learning_rate": 1.054563591022444e-05, "loss": 0.3492, "step": 37930 }, { "epoch": 9.46134663341646, "grad_norm": 10.117830276489258, "learning_rate": 1.0543142144638405e-05, "loss": 0.3134, "step": 37940 }, { "epoch": 9.463840399002494, "grad_norm": 6.80835485458374, "learning_rate": 1.054064837905237e-05, "loss": 0.3832, "step": 37950 }, { "epoch": 9.46633416458853, "grad_norm": 6.438353538513184, "learning_rate": 1.0538154613466335e-05, "loss": 0.3753, "step": 37960 }, { "epoch": 9.468827930174564, "grad_norm": 7.7765278816223145, "learning_rate": 1.05356608478803e-05, "loss": 0.342, "step": 37970 }, { "epoch": 9.471321695760599, "grad_norm": 7.024895191192627, "learning_rate": 1.0533167082294264e-05, "loss": 0.3745, "step": 37980 }, { "epoch": 9.473815461346634, "grad_norm": 9.783491134643555, "learning_rate": 1.0530673316708231e-05, "loss": 0.3311, "step": 37990 }, { "epoch": 9.476309226932669, "grad_norm": 7.7284979820251465, "learning_rate": 1.0528179551122195e-05, "loss": 0.3344, "step": 38000 }, { "epoch": 9.478802992518704, "grad_norm": 5.616584777832031, "learning_rate": 1.0525685785536159e-05, "loss": 0.3145, "step": 38010 }, { "epoch": 9.481296758104738, "grad_norm": 7.88400936126709, "learning_rate": 1.0523192019950126e-05, "loss": 0.332, "step": 38020 }, { "epoch": 9.483790523690773, "grad_norm": 4.7109222412109375, "learning_rate": 1.0520698254364091e-05, "loss": 0.3345, "step": 38030 }, { "epoch": 9.486284289276808, "grad_norm": 7.7549543380737305, "learning_rate": 1.0518204488778056e-05, "loss": 0.3887, "step": 38040 }, { "epoch": 9.488778054862843, "grad_norm": 13.061963081359863, "learning_rate": 1.0515710723192022e-05, "loss": 0.3815, "step": 38050 }, { "epoch": 9.491271820448878, "grad_norm": 7.7246479988098145, "learning_rate": 1.0513216957605985e-05, "loss": 0.2914, "step": 38060 }, { "epoch": 9.493765586034913, "grad_norm": 9.497514724731445, "learning_rate": 1.0510723192019952e-05, "loss": 0.3772, "step": 38070 }, { "epoch": 9.496259351620948, "grad_norm": 10.470547676086426, "learning_rate": 1.0508229426433916e-05, "loss": 0.3788, "step": 38080 }, { "epoch": 9.498753117206983, "grad_norm": 5.6805009841918945, "learning_rate": 1.0505735660847883e-05, "loss": 0.3683, "step": 38090 }, { "epoch": 9.501246882793017, "grad_norm": 8.353499412536621, "learning_rate": 1.0503241895261846e-05, "loss": 0.3531, "step": 38100 }, { "epoch": 9.503740648379052, "grad_norm": 5.944037914276123, "learning_rate": 1.050074812967581e-05, "loss": 0.3669, "step": 38110 }, { "epoch": 9.506234413965087, "grad_norm": 10.917936325073242, "learning_rate": 1.0498254364089777e-05, "loss": 0.383, "step": 38120 }, { "epoch": 9.508728179551122, "grad_norm": 8.01206111907959, "learning_rate": 1.049576059850374e-05, "loss": 0.3929, "step": 38130 }, { "epoch": 9.511221945137157, "grad_norm": 8.861490249633789, "learning_rate": 1.0493266832917706e-05, "loss": 0.3805, "step": 38140 }, { "epoch": 9.513715710723192, "grad_norm": 5.089669227600098, "learning_rate": 1.0490773067331673e-05, "loss": 0.3602, "step": 38150 }, { "epoch": 9.516209476309227, "grad_norm": 7.066434383392334, "learning_rate": 1.0488279301745637e-05, "loss": 0.376, "step": 38160 }, { "epoch": 9.518703241895262, "grad_norm": 6.961791038513184, "learning_rate": 1.0485785536159604e-05, "loss": 0.3763, "step": 38170 }, { "epoch": 9.521197007481296, "grad_norm": 9.394706726074219, "learning_rate": 1.0483291770573567e-05, "loss": 0.3548, "step": 38180 }, { "epoch": 9.523690773067331, "grad_norm": 8.510271072387695, "learning_rate": 1.048079800498753e-05, "loss": 0.3591, "step": 38190 }, { "epoch": 9.526184538653366, "grad_norm": 6.186204433441162, "learning_rate": 1.0478304239401498e-05, "loss": 0.3024, "step": 38200 }, { "epoch": 9.528678304239401, "grad_norm": 8.06693172454834, "learning_rate": 1.0475810473815461e-05, "loss": 0.3328, "step": 38210 }, { "epoch": 9.531172069825436, "grad_norm": 5.462076663970947, "learning_rate": 1.0473316708229427e-05, "loss": 0.3593, "step": 38220 }, { "epoch": 9.53366583541147, "grad_norm": 9.915395736694336, "learning_rate": 1.0470822942643392e-05, "loss": 0.3356, "step": 38230 }, { "epoch": 9.536159600997506, "grad_norm": 7.561076641082764, "learning_rate": 1.0468329177057357e-05, "loss": 0.4131, "step": 38240 }, { "epoch": 9.53865336658354, "grad_norm": 11.387033462524414, "learning_rate": 1.0465835411471323e-05, "loss": 0.3629, "step": 38250 }, { "epoch": 9.541147132169575, "grad_norm": 6.235090255737305, "learning_rate": 1.0463341645885288e-05, "loss": 0.2799, "step": 38260 }, { "epoch": 9.54364089775561, "grad_norm": 6.4282989501953125, "learning_rate": 1.0460847880299252e-05, "loss": 0.3212, "step": 38270 }, { "epoch": 9.546134663341645, "grad_norm": 6.645716667175293, "learning_rate": 1.0458354114713219e-05, "loss": 0.3519, "step": 38280 }, { "epoch": 9.548628428927682, "grad_norm": 8.45456314086914, "learning_rate": 1.0455860349127182e-05, "loss": 0.3729, "step": 38290 }, { "epoch": 9.551122194513717, "grad_norm": 5.648975849151611, "learning_rate": 1.045336658354115e-05, "loss": 0.3289, "step": 38300 }, { "epoch": 9.553615960099751, "grad_norm": 9.984889030456543, "learning_rate": 1.0450872817955113e-05, "loss": 0.4206, "step": 38310 }, { "epoch": 9.556109725685786, "grad_norm": 4.996423721313477, "learning_rate": 1.0448379052369078e-05, "loss": 0.2972, "step": 38320 }, { "epoch": 9.558603491271821, "grad_norm": 5.108618259429932, "learning_rate": 1.0445885286783043e-05, "loss": 0.3033, "step": 38330 }, { "epoch": 9.561097256857856, "grad_norm": 6.292821884155273, "learning_rate": 1.0443391521197009e-05, "loss": 0.3105, "step": 38340 }, { "epoch": 9.563591022443891, "grad_norm": 7.772547245025635, "learning_rate": 1.0440897755610972e-05, "loss": 0.3646, "step": 38350 }, { "epoch": 9.566084788029926, "grad_norm": 6.700054168701172, "learning_rate": 1.043840399002494e-05, "loss": 0.3842, "step": 38360 }, { "epoch": 9.56857855361596, "grad_norm": 5.910054683685303, "learning_rate": 1.0435910224438903e-05, "loss": 0.3718, "step": 38370 }, { "epoch": 9.571072319201996, "grad_norm": 5.428488731384277, "learning_rate": 1.043341645885287e-05, "loss": 0.3454, "step": 38380 }, { "epoch": 9.57356608478803, "grad_norm": 7.547824382781982, "learning_rate": 1.0430922693266834e-05, "loss": 0.3771, "step": 38390 }, { "epoch": 9.576059850374065, "grad_norm": 5.2548909187316895, "learning_rate": 1.0428428927680799e-05, "loss": 0.3896, "step": 38400 }, { "epoch": 9.5785536159601, "grad_norm": 6.828177452087402, "learning_rate": 1.0425935162094764e-05, "loss": 0.3847, "step": 38410 }, { "epoch": 9.581047381546135, "grad_norm": 8.20153522491455, "learning_rate": 1.042344139650873e-05, "loss": 0.3524, "step": 38420 }, { "epoch": 9.58354114713217, "grad_norm": 4.548335075378418, "learning_rate": 1.0420947630922693e-05, "loss": 0.3613, "step": 38430 }, { "epoch": 9.586034912718205, "grad_norm": 6.978244304656982, "learning_rate": 1.041845386533666e-05, "loss": 0.3803, "step": 38440 }, { "epoch": 9.58852867830424, "grad_norm": 7.369738578796387, "learning_rate": 1.0415960099750624e-05, "loss": 0.3134, "step": 38450 }, { "epoch": 9.591022443890274, "grad_norm": 8.234667778015137, "learning_rate": 1.041346633416459e-05, "loss": 0.3537, "step": 38460 }, { "epoch": 9.59351620947631, "grad_norm": 6.086751461029053, "learning_rate": 1.0410972568578554e-05, "loss": 0.3783, "step": 38470 }, { "epoch": 9.596009975062344, "grad_norm": 7.9929046630859375, "learning_rate": 1.0408478802992518e-05, "loss": 0.3491, "step": 38480 }, { "epoch": 9.598503740648379, "grad_norm": 11.648649215698242, "learning_rate": 1.0405985037406485e-05, "loss": 0.3418, "step": 38490 }, { "epoch": 9.600997506234414, "grad_norm": 8.06251049041748, "learning_rate": 1.040349127182045e-05, "loss": 0.3483, "step": 38500 }, { "epoch": 9.603491271820449, "grad_norm": 9.651390075683594, "learning_rate": 1.0400997506234414e-05, "loss": 0.3574, "step": 38510 }, { "epoch": 9.605985037406484, "grad_norm": 4.3901777267456055, "learning_rate": 1.0398503740648381e-05, "loss": 0.3304, "step": 38520 }, { "epoch": 9.608478802992519, "grad_norm": 5.272586345672607, "learning_rate": 1.0396009975062345e-05, "loss": 0.2917, "step": 38530 }, { "epoch": 9.610972568578553, "grad_norm": 9.170848846435547, "learning_rate": 1.0393516209476312e-05, "loss": 0.2899, "step": 38540 }, { "epoch": 9.613466334164588, "grad_norm": 7.961601257324219, "learning_rate": 1.0391022443890275e-05, "loss": 0.3714, "step": 38550 }, { "epoch": 9.615960099750623, "grad_norm": 8.155771255493164, "learning_rate": 1.0388528678304239e-05, "loss": 0.3591, "step": 38560 }, { "epoch": 9.618453865336658, "grad_norm": 7.194378852844238, "learning_rate": 1.0386034912718206e-05, "loss": 0.355, "step": 38570 }, { "epoch": 9.620947630922693, "grad_norm": 8.259054183959961, "learning_rate": 1.038354114713217e-05, "loss": 0.2957, "step": 38580 }, { "epoch": 9.623441396508728, "grad_norm": 6.669407844543457, "learning_rate": 1.0381047381546136e-05, "loss": 0.3607, "step": 38590 }, { "epoch": 9.625935162094763, "grad_norm": 7.477216720581055, "learning_rate": 1.03785536159601e-05, "loss": 0.3427, "step": 38600 }, { "epoch": 9.628428927680797, "grad_norm": 5.8216986656188965, "learning_rate": 1.0376059850374065e-05, "loss": 0.3458, "step": 38610 }, { "epoch": 9.630922693266832, "grad_norm": 10.364824295043945, "learning_rate": 1.0373566084788032e-05, "loss": 0.3817, "step": 38620 }, { "epoch": 9.633416458852867, "grad_norm": 5.567168235778809, "learning_rate": 1.0371072319201996e-05, "loss": 0.2853, "step": 38630 }, { "epoch": 9.635910224438902, "grad_norm": 7.306951999664307, "learning_rate": 1.036857855361596e-05, "loss": 0.3538, "step": 38640 }, { "epoch": 9.638403990024937, "grad_norm": 8.60983943939209, "learning_rate": 1.0366084788029927e-05, "loss": 0.3719, "step": 38650 }, { "epoch": 9.640897755610972, "grad_norm": 8.791508674621582, "learning_rate": 1.036359102244389e-05, "loss": 0.4041, "step": 38660 }, { "epoch": 9.643391521197007, "grad_norm": 6.479540824890137, "learning_rate": 1.0361097256857857e-05, "loss": 0.3139, "step": 38670 }, { "epoch": 9.645885286783042, "grad_norm": 7.419809818267822, "learning_rate": 1.035860349127182e-05, "loss": 0.3653, "step": 38680 }, { "epoch": 9.648379052369076, "grad_norm": 11.746686935424805, "learning_rate": 1.0356109725685786e-05, "loss": 0.3622, "step": 38690 }, { "epoch": 9.650872817955111, "grad_norm": 9.593301773071289, "learning_rate": 1.0353615960099751e-05, "loss": 0.3942, "step": 38700 }, { "epoch": 9.653366583541148, "grad_norm": 5.149587154388428, "learning_rate": 1.0351122194513717e-05, "loss": 0.3576, "step": 38710 }, { "epoch": 9.655860349127183, "grad_norm": 5.903531551361084, "learning_rate": 1.034862842892768e-05, "loss": 0.3395, "step": 38720 }, { "epoch": 9.658354114713218, "grad_norm": 7.770636558532715, "learning_rate": 1.0346134663341647e-05, "loss": 0.2986, "step": 38730 }, { "epoch": 9.660847880299253, "grad_norm": 8.158554077148438, "learning_rate": 1.0343640897755611e-05, "loss": 0.3346, "step": 38740 }, { "epoch": 9.663341645885287, "grad_norm": 5.318978786468506, "learning_rate": 1.0341147132169578e-05, "loss": 0.312, "step": 38750 }, { "epoch": 9.665835411471322, "grad_norm": 7.235579490661621, "learning_rate": 1.0338653366583542e-05, "loss": 0.3416, "step": 38760 }, { "epoch": 9.668329177057357, "grad_norm": 6.485595226287842, "learning_rate": 1.0336159600997507e-05, "loss": 0.335, "step": 38770 }, { "epoch": 9.670822942643392, "grad_norm": 8.763544082641602, "learning_rate": 1.0333665835411472e-05, "loss": 0.3305, "step": 38780 }, { "epoch": 9.673316708229427, "grad_norm": 11.1206693649292, "learning_rate": 1.0331172069825438e-05, "loss": 0.3349, "step": 38790 }, { "epoch": 9.675810473815462, "grad_norm": 9.942346572875977, "learning_rate": 1.0328678304239403e-05, "loss": 0.3211, "step": 38800 }, { "epoch": 9.678304239401497, "grad_norm": 7.735063552856445, "learning_rate": 1.0326184538653368e-05, "loss": 0.3628, "step": 38810 }, { "epoch": 9.680798004987532, "grad_norm": 8.735753059387207, "learning_rate": 1.0323690773067332e-05, "loss": 0.3722, "step": 38820 }, { "epoch": 9.683291770573566, "grad_norm": 8.924358367919922, "learning_rate": 1.0321197007481299e-05, "loss": 0.3788, "step": 38830 }, { "epoch": 9.685785536159601, "grad_norm": 10.292500495910645, "learning_rate": 1.0318703241895262e-05, "loss": 0.4222, "step": 38840 }, { "epoch": 9.688279301745636, "grad_norm": 5.393009662628174, "learning_rate": 1.0316209476309228e-05, "loss": 0.4147, "step": 38850 }, { "epoch": 9.690773067331671, "grad_norm": 6.392873287200928, "learning_rate": 1.0313715710723193e-05, "loss": 0.2981, "step": 38860 }, { "epoch": 9.693266832917706, "grad_norm": 5.524891376495361, "learning_rate": 1.0311221945137158e-05, "loss": 0.3877, "step": 38870 }, { "epoch": 9.69576059850374, "grad_norm": 7.086117744445801, "learning_rate": 1.0308728179551124e-05, "loss": 0.3449, "step": 38880 }, { "epoch": 9.698254364089776, "grad_norm": 6.8754072189331055, "learning_rate": 1.0306234413965089e-05, "loss": 0.365, "step": 38890 }, { "epoch": 9.70074812967581, "grad_norm": 8.261271476745605, "learning_rate": 1.0303740648379053e-05, "loss": 0.2822, "step": 38900 }, { "epoch": 9.703241895261845, "grad_norm": 8.90969467163086, "learning_rate": 1.030124688279302e-05, "loss": 0.4126, "step": 38910 }, { "epoch": 9.70573566084788, "grad_norm": 6.020090579986572, "learning_rate": 1.0298753117206983e-05, "loss": 0.3471, "step": 38920 }, { "epoch": 9.708229426433915, "grad_norm": 7.7385993003845215, "learning_rate": 1.0296259351620947e-05, "loss": 0.3681, "step": 38930 }, { "epoch": 9.71072319201995, "grad_norm": 8.860572814941406, "learning_rate": 1.0293765586034914e-05, "loss": 0.435, "step": 38940 }, { "epoch": 9.713216957605985, "grad_norm": 8.749921798706055, "learning_rate": 1.0291271820448877e-05, "loss": 0.3425, "step": 38950 }, { "epoch": 9.71571072319202, "grad_norm": 8.579872131347656, "learning_rate": 1.0288778054862844e-05, "loss": 0.3983, "step": 38960 }, { "epoch": 9.718204488778055, "grad_norm": 7.9669389724731445, "learning_rate": 1.028628428927681e-05, "loss": 0.3475, "step": 38970 }, { "epoch": 9.72069825436409, "grad_norm": 7.299527645111084, "learning_rate": 1.0283790523690773e-05, "loss": 0.3911, "step": 38980 }, { "epoch": 9.723192019950124, "grad_norm": 5.163363933563232, "learning_rate": 1.028129675810474e-05, "loss": 0.361, "step": 38990 }, { "epoch": 9.72568578553616, "grad_norm": 8.54089641571045, "learning_rate": 1.0278802992518704e-05, "loss": 0.33, "step": 39000 }, { "epoch": 9.728179551122194, "grad_norm": 7.269198894500732, "learning_rate": 1.0276309226932668e-05, "loss": 0.3695, "step": 39010 }, { "epoch": 9.730673316708229, "grad_norm": 8.969745635986328, "learning_rate": 1.0273815461346635e-05, "loss": 0.365, "step": 39020 }, { "epoch": 9.733167082294264, "grad_norm": 5.630902290344238, "learning_rate": 1.0271321695760598e-05, "loss": 0.3615, "step": 39030 }, { "epoch": 9.735660847880299, "grad_norm": 8.235981941223145, "learning_rate": 1.0268827930174565e-05, "loss": 0.4206, "step": 39040 }, { "epoch": 9.738154613466333, "grad_norm": 7.914098262786865, "learning_rate": 1.0266334164588529e-05, "loss": 0.343, "step": 39050 }, { "epoch": 9.740648379052368, "grad_norm": 7.478903293609619, "learning_rate": 1.0263840399002494e-05, "loss": 0.3678, "step": 39060 }, { "epoch": 9.743142144638403, "grad_norm": 7.806370735168457, "learning_rate": 1.026134663341646e-05, "loss": 0.3855, "step": 39070 }, { "epoch": 9.745635910224438, "grad_norm": 6.9918341636657715, "learning_rate": 1.0258852867830425e-05, "loss": 0.3848, "step": 39080 }, { "epoch": 9.748129675810475, "grad_norm": 7.807930946350098, "learning_rate": 1.0256359102244392e-05, "loss": 0.3518, "step": 39090 }, { "epoch": 9.75062344139651, "grad_norm": 5.30518913269043, "learning_rate": 1.0253865336658355e-05, "loss": 0.3512, "step": 39100 }, { "epoch": 9.753117206982544, "grad_norm": 7.166024208068848, "learning_rate": 1.0251371571072319e-05, "loss": 0.4185, "step": 39110 }, { "epoch": 9.75561097256858, "grad_norm": 7.371334075927734, "learning_rate": 1.0248877805486286e-05, "loss": 0.385, "step": 39120 }, { "epoch": 9.758104738154614, "grad_norm": 5.414678573608398, "learning_rate": 1.024638403990025e-05, "loss": 0.3971, "step": 39130 }, { "epoch": 9.760598503740649, "grad_norm": 8.119851112365723, "learning_rate": 1.0243890274314215e-05, "loss": 0.3534, "step": 39140 }, { "epoch": 9.763092269326684, "grad_norm": 10.847015380859375, "learning_rate": 1.024139650872818e-05, "loss": 0.4538, "step": 39150 }, { "epoch": 9.765586034912719, "grad_norm": 7.289000511169434, "learning_rate": 1.0238902743142145e-05, "loss": 0.3739, "step": 39160 }, { "epoch": 9.768079800498754, "grad_norm": 9.922390937805176, "learning_rate": 1.023640897755611e-05, "loss": 0.3072, "step": 39170 }, { "epoch": 9.770573566084789, "grad_norm": 8.990649223327637, "learning_rate": 1.0233915211970076e-05, "loss": 0.3926, "step": 39180 }, { "epoch": 9.773067331670823, "grad_norm": 6.73281717300415, "learning_rate": 1.023142144638404e-05, "loss": 0.3652, "step": 39190 }, { "epoch": 9.775561097256858, "grad_norm": 6.121176242828369, "learning_rate": 1.0228927680798007e-05, "loss": 0.3424, "step": 39200 }, { "epoch": 9.778054862842893, "grad_norm": 10.394766807556152, "learning_rate": 1.022643391521197e-05, "loss": 0.3365, "step": 39210 }, { "epoch": 9.780548628428928, "grad_norm": 6.58123254776001, "learning_rate": 1.0223940149625936e-05, "loss": 0.3641, "step": 39220 }, { "epoch": 9.783042394014963, "grad_norm": 6.802944183349609, "learning_rate": 1.0221446384039901e-05, "loss": 0.2725, "step": 39230 }, { "epoch": 9.785536159600998, "grad_norm": 8.817499160766602, "learning_rate": 1.0218952618453866e-05, "loss": 0.3455, "step": 39240 }, { "epoch": 9.788029925187033, "grad_norm": 9.854268074035645, "learning_rate": 1.0216458852867832e-05, "loss": 0.3698, "step": 39250 }, { "epoch": 9.790523690773068, "grad_norm": 7.4926323890686035, "learning_rate": 1.0213965087281797e-05, "loss": 0.4717, "step": 39260 }, { "epoch": 9.793017456359102, "grad_norm": 5.085393905639648, "learning_rate": 1.021147132169576e-05, "loss": 0.2991, "step": 39270 }, { "epoch": 9.795511221945137, "grad_norm": 9.098230361938477, "learning_rate": 1.0208977556109728e-05, "loss": 0.2923, "step": 39280 }, { "epoch": 9.798004987531172, "grad_norm": 10.33260440826416, "learning_rate": 1.0206483790523691e-05, "loss": 0.4181, "step": 39290 }, { "epoch": 9.800498753117207, "grad_norm": 8.845526695251465, "learning_rate": 1.0203990024937658e-05, "loss": 0.4501, "step": 39300 }, { "epoch": 9.802992518703242, "grad_norm": 11.282034873962402, "learning_rate": 1.0201496259351622e-05, "loss": 0.3389, "step": 39310 }, { "epoch": 9.805486284289277, "grad_norm": 7.388114929199219, "learning_rate": 1.0199002493765587e-05, "loss": 0.3754, "step": 39320 }, { "epoch": 9.807980049875312, "grad_norm": 7.895745754241943, "learning_rate": 1.0196508728179552e-05, "loss": 0.3507, "step": 39330 }, { "epoch": 9.810473815461346, "grad_norm": 7.767431259155273, "learning_rate": 1.0194014962593518e-05, "loss": 0.4053, "step": 39340 }, { "epoch": 9.812967581047381, "grad_norm": 6.963127613067627, "learning_rate": 1.0191521197007481e-05, "loss": 0.3367, "step": 39350 }, { "epoch": 9.815461346633416, "grad_norm": 5.868590354919434, "learning_rate": 1.0189027431421448e-05, "loss": 0.3903, "step": 39360 }, { "epoch": 9.817955112219451, "grad_norm": 8.267974853515625, "learning_rate": 1.0186533665835412e-05, "loss": 0.3503, "step": 39370 }, { "epoch": 9.820448877805486, "grad_norm": 6.432572841644287, "learning_rate": 1.0184039900249379e-05, "loss": 0.3092, "step": 39380 }, { "epoch": 9.82294264339152, "grad_norm": 9.071556091308594, "learning_rate": 1.0181546134663343e-05, "loss": 0.4478, "step": 39390 }, { "epoch": 9.825436408977556, "grad_norm": 8.835619926452637, "learning_rate": 1.0179052369077306e-05, "loss": 0.3756, "step": 39400 }, { "epoch": 9.82793017456359, "grad_norm": 10.740196228027344, "learning_rate": 1.0176558603491273e-05, "loss": 0.3349, "step": 39410 }, { "epoch": 9.830423940149625, "grad_norm": 11.372931480407715, "learning_rate": 1.0174064837905237e-05, "loss": 0.403, "step": 39420 }, { "epoch": 9.83291770573566, "grad_norm": 9.308483123779297, "learning_rate": 1.0171571072319202e-05, "loss": 0.3523, "step": 39430 }, { "epoch": 9.835411471321695, "grad_norm": 6.578537464141846, "learning_rate": 1.0169077306733169e-05, "loss": 0.324, "step": 39440 }, { "epoch": 9.83790523690773, "grad_norm": 6.254716396331787, "learning_rate": 1.0166583541147133e-05, "loss": 0.3546, "step": 39450 }, { "epoch": 9.840399002493765, "grad_norm": 6.601047992706299, "learning_rate": 1.01640897755611e-05, "loss": 0.405, "step": 39460 }, { "epoch": 9.8428927680798, "grad_norm": 9.484192848205566, "learning_rate": 1.0161596009975063e-05, "loss": 0.3502, "step": 39470 }, { "epoch": 9.845386533665835, "grad_norm": 7.499188423156738, "learning_rate": 1.0159102244389027e-05, "loss": 0.3508, "step": 39480 }, { "epoch": 9.84788029925187, "grad_norm": 5.97933292388916, "learning_rate": 1.0156608478802994e-05, "loss": 0.3444, "step": 39490 }, { "epoch": 9.850374064837904, "grad_norm": 7.985541343688965, "learning_rate": 1.0154114713216958e-05, "loss": 0.3577, "step": 39500 }, { "epoch": 9.85286783042394, "grad_norm": 9.508009910583496, "learning_rate": 1.0151620947630923e-05, "loss": 0.2855, "step": 39510 }, { "epoch": 9.855361596009976, "grad_norm": 7.971591472625732, "learning_rate": 1.0149127182044888e-05, "loss": 0.3184, "step": 39520 }, { "epoch": 9.85785536159601, "grad_norm": 6.7001848220825195, "learning_rate": 1.0146633416458853e-05, "loss": 0.3265, "step": 39530 }, { "epoch": 9.860349127182046, "grad_norm": 5.2400689125061035, "learning_rate": 1.0144139650872819e-05, "loss": 0.3971, "step": 39540 }, { "epoch": 9.86284289276808, "grad_norm": 7.525379657745361, "learning_rate": 1.0141645885286784e-05, "loss": 0.384, "step": 39550 }, { "epoch": 9.865336658354115, "grad_norm": 6.4652628898620605, "learning_rate": 1.0139152119700748e-05, "loss": 0.4057, "step": 39560 }, { "epoch": 9.86783042394015, "grad_norm": 7.187132835388184, "learning_rate": 1.0136658354114715e-05, "loss": 0.366, "step": 39570 }, { "epoch": 9.870324189526185, "grad_norm": 7.20662784576416, "learning_rate": 1.0134164588528678e-05, "loss": 0.344, "step": 39580 }, { "epoch": 9.87281795511222, "grad_norm": 5.796775817871094, "learning_rate": 1.0131670822942645e-05, "loss": 0.3455, "step": 39590 }, { "epoch": 9.875311720698255, "grad_norm": 5.326415061950684, "learning_rate": 1.0129177057356609e-05, "loss": 0.3139, "step": 39600 }, { "epoch": 9.87780548628429, "grad_norm": 9.870059967041016, "learning_rate": 1.0126683291770574e-05, "loss": 0.4385, "step": 39610 }, { "epoch": 9.880299251870325, "grad_norm": 5.55562686920166, "learning_rate": 1.012418952618454e-05, "loss": 0.3718, "step": 39620 }, { "epoch": 9.88279301745636, "grad_norm": 6.446163177490234, "learning_rate": 1.0121695760598505e-05, "loss": 0.4105, "step": 39630 }, { "epoch": 9.885286783042394, "grad_norm": 7.71967887878418, "learning_rate": 1.0119201995012469e-05, "loss": 0.3544, "step": 39640 }, { "epoch": 9.88778054862843, "grad_norm": 6.094079494476318, "learning_rate": 1.0116708229426436e-05, "loss": 0.3683, "step": 39650 }, { "epoch": 9.890274314214464, "grad_norm": 6.991535186767578, "learning_rate": 1.0114214463840399e-05, "loss": 0.3176, "step": 39660 }, { "epoch": 9.892768079800499, "grad_norm": 6.277289867401123, "learning_rate": 1.0111720698254366e-05, "loss": 0.3603, "step": 39670 }, { "epoch": 9.895261845386534, "grad_norm": 5.89650821685791, "learning_rate": 1.010922693266833e-05, "loss": 0.3687, "step": 39680 }, { "epoch": 9.897755610972569, "grad_norm": 8.910008430480957, "learning_rate": 1.0106733167082295e-05, "loss": 0.3066, "step": 39690 }, { "epoch": 9.900249376558603, "grad_norm": 7.53981876373291, "learning_rate": 1.010423940149626e-05, "loss": 0.3586, "step": 39700 }, { "epoch": 9.902743142144638, "grad_norm": 6.939689636230469, "learning_rate": 1.0101745635910226e-05, "loss": 0.3845, "step": 39710 }, { "epoch": 9.905236907730673, "grad_norm": 7.977263927459717, "learning_rate": 1.009925187032419e-05, "loss": 0.4126, "step": 39720 }, { "epoch": 9.907730673316708, "grad_norm": 7.981022357940674, "learning_rate": 1.0096758104738156e-05, "loss": 0.2967, "step": 39730 }, { "epoch": 9.910224438902743, "grad_norm": 7.351691246032715, "learning_rate": 1.009426433915212e-05, "loss": 0.3309, "step": 39740 }, { "epoch": 9.912718204488778, "grad_norm": 6.551850318908691, "learning_rate": 1.0091770573566087e-05, "loss": 0.3027, "step": 39750 }, { "epoch": 9.915211970074813, "grad_norm": 10.421834945678711, "learning_rate": 1.008927680798005e-05, "loss": 0.3109, "step": 39760 }, { "epoch": 9.917705735660848, "grad_norm": 9.007574081420898, "learning_rate": 1.0086783042394014e-05, "loss": 0.3231, "step": 39770 }, { "epoch": 9.920199501246882, "grad_norm": 7.615650177001953, "learning_rate": 1.0084289276807981e-05, "loss": 0.3444, "step": 39780 }, { "epoch": 9.922693266832917, "grad_norm": 5.678508758544922, "learning_rate": 1.0081795511221946e-05, "loss": 0.3623, "step": 39790 }, { "epoch": 9.925187032418952, "grad_norm": 7.512736797332764, "learning_rate": 1.0079301745635912e-05, "loss": 0.2864, "step": 39800 }, { "epoch": 9.927680798004987, "grad_norm": 6.589831829071045, "learning_rate": 1.0076807980049877e-05, "loss": 0.3472, "step": 39810 }, { "epoch": 9.930174563591022, "grad_norm": 7.6342902183532715, "learning_rate": 1.007431421446384e-05, "loss": 0.3813, "step": 39820 }, { "epoch": 9.932668329177057, "grad_norm": 11.522653579711914, "learning_rate": 1.0071820448877808e-05, "loss": 0.3478, "step": 39830 }, { "epoch": 9.935162094763092, "grad_norm": 8.16284465789795, "learning_rate": 1.0069326683291771e-05, "loss": 0.3339, "step": 39840 }, { "epoch": 9.937655860349127, "grad_norm": 7.553341865539551, "learning_rate": 1.0066832917705735e-05, "loss": 0.3129, "step": 39850 }, { "epoch": 9.940149625935161, "grad_norm": 6.100545883178711, "learning_rate": 1.0064339152119702e-05, "loss": 0.3501, "step": 39860 }, { "epoch": 9.942643391521196, "grad_norm": 5.523427486419678, "learning_rate": 1.0061845386533666e-05, "loss": 0.392, "step": 39870 }, { "epoch": 9.945137157107231, "grad_norm": 6.449985980987549, "learning_rate": 1.0059351620947633e-05, "loss": 0.3228, "step": 39880 }, { "epoch": 9.947630922693268, "grad_norm": 8.599003791809082, "learning_rate": 1.0056857855361596e-05, "loss": 0.3922, "step": 39890 }, { "epoch": 9.950124688279303, "grad_norm": 7.818081855773926, "learning_rate": 1.0054364089775561e-05, "loss": 0.4501, "step": 39900 }, { "epoch": 9.952618453865338, "grad_norm": 6.245206356048584, "learning_rate": 1.0051870324189528e-05, "loss": 0.3167, "step": 39910 }, { "epoch": 9.955112219451372, "grad_norm": 9.523896217346191, "learning_rate": 1.0049376558603492e-05, "loss": 0.3944, "step": 39920 }, { "epoch": 9.957605985037407, "grad_norm": 12.862037658691406, "learning_rate": 1.0046882793017456e-05, "loss": 0.3859, "step": 39930 }, { "epoch": 9.960099750623442, "grad_norm": 11.528470993041992, "learning_rate": 1.0044389027431423e-05, "loss": 0.3911, "step": 39940 }, { "epoch": 9.962593516209477, "grad_norm": 9.270523071289062, "learning_rate": 1.0041895261845386e-05, "loss": 0.4362, "step": 39950 }, { "epoch": 9.965087281795512, "grad_norm": 9.051032066345215, "learning_rate": 1.0039401496259353e-05, "loss": 0.422, "step": 39960 }, { "epoch": 9.967581047381547, "grad_norm": 4.276822566986084, "learning_rate": 1.0036907730673317e-05, "loss": 0.397, "step": 39970 }, { "epoch": 9.970074812967582, "grad_norm": 4.984722137451172, "learning_rate": 1.0034413965087282e-05, "loss": 0.3636, "step": 39980 }, { "epoch": 9.972568578553616, "grad_norm": 5.631036281585693, "learning_rate": 1.0031920199501248e-05, "loss": 0.3556, "step": 39990 }, { "epoch": 9.975062344139651, "grad_norm": 8.992472648620605, "learning_rate": 1.0029426433915213e-05, "loss": 0.3561, "step": 40000 }, { "epoch": 9.977556109725686, "grad_norm": 9.377120971679688, "learning_rate": 1.0026932668329176e-05, "loss": 0.444, "step": 40010 }, { "epoch": 9.980049875311721, "grad_norm": NaN, "learning_rate": 1.0024688279301746e-05, "loss": 0.3719, "step": 40020 }, { "epoch": 9.982543640897756, "grad_norm": 6.351677417755127, "learning_rate": 1.002219451371571e-05, "loss": 0.2969, "step": 40030 }, { "epoch": 9.98503740648379, "grad_norm": 7.045421600341797, "learning_rate": 1.0019700748129677e-05, "loss": 0.3032, "step": 40040 }, { "epoch": 9.987531172069826, "grad_norm": 7.283246994018555, "learning_rate": 1.001720698254364e-05, "loss": 0.4718, "step": 40050 }, { "epoch": 9.99002493765586, "grad_norm": 6.027595520019531, "learning_rate": 1.0014713216957608e-05, "loss": 0.3524, "step": 40060 }, { "epoch": 9.992518703241895, "grad_norm": 5.910287380218506, "learning_rate": 1.0012219451371571e-05, "loss": 0.3885, "step": 40070 }, { "epoch": 9.99501246882793, "grad_norm": 6.286899089813232, "learning_rate": 1.0009725685785537e-05, "loss": 0.3322, "step": 40080 }, { "epoch": 9.997506234413965, "grad_norm": 6.434454441070557, "learning_rate": 1.0007231920199502e-05, "loss": 0.3139, "step": 40090 }, { "epoch": 10.0, "grad_norm": 6.4550700187683105, "learning_rate": 1.0004738154613467e-05, "loss": 0.3902, "step": 40100 }, { "epoch": 10.0, "eval_loss": 0.4127441346645355, "eval_runtime": 59.9961, "eval_samples_per_second": 16.718, "eval_steps_per_second": 16.718, "step": 40100 }, { "epoch": 10.002493765586035, "grad_norm": 7.405590057373047, "learning_rate": 1.0002244389027431e-05, "loss": 0.3798, "step": 40110 }, { "epoch": 10.00498753117207, "grad_norm": 6.414757251739502, "learning_rate": 9.999750623441398e-06, "loss": 0.3407, "step": 40120 }, { "epoch": 10.007481296758105, "grad_norm": 12.407922744750977, "learning_rate": 9.997256857855361e-06, "loss": 0.3276, "step": 40130 }, { "epoch": 10.00997506234414, "grad_norm": 7.273416996002197, "learning_rate": 9.994763092269327e-06, "loss": 0.3299, "step": 40140 }, { "epoch": 10.012468827930174, "grad_norm": 5.8642449378967285, "learning_rate": 9.992269326683292e-06, "loss": 0.3227, "step": 40150 }, { "epoch": 10.01496259351621, "grad_norm": 5.257193565368652, "learning_rate": 9.989775561097257e-06, "loss": 0.3175, "step": 40160 }, { "epoch": 10.017456359102244, "grad_norm": 10.810057640075684, "learning_rate": 9.987281795511223e-06, "loss": 0.4082, "step": 40170 }, { "epoch": 10.019950124688279, "grad_norm": 6.935123443603516, "learning_rate": 9.984788029925188e-06, "loss": 0.323, "step": 40180 }, { "epoch": 10.022443890274314, "grad_norm": 5.4332051277160645, "learning_rate": 9.982294264339153e-06, "loss": 0.231, "step": 40190 }, { "epoch": 10.024937655860349, "grad_norm": 8.775599479675293, "learning_rate": 9.979800498753119e-06, "loss": 0.3459, "step": 40200 }, { "epoch": 10.027431421446384, "grad_norm": 4.293675422668457, "learning_rate": 9.977306733167084e-06, "loss": 0.3608, "step": 40210 }, { "epoch": 10.029925187032418, "grad_norm": 7.175844669342041, "learning_rate": 9.974812967581048e-06, "loss": 0.3441, "step": 40220 }, { "epoch": 10.032418952618453, "grad_norm": 6.6354522705078125, "learning_rate": 9.972319201995013e-06, "loss": 0.3302, "step": 40230 }, { "epoch": 10.034912718204488, "grad_norm": 8.921165466308594, "learning_rate": 9.969825436408978e-06, "loss": 0.3587, "step": 40240 }, { "epoch": 10.037406483790523, "grad_norm": 5.317387580871582, "learning_rate": 9.967331670822943e-06, "loss": 0.3295, "step": 40250 }, { "epoch": 10.039900249376558, "grad_norm": 5.346241474151611, "learning_rate": 9.964837905236909e-06, "loss": 0.3452, "step": 40260 }, { "epoch": 10.042394014962593, "grad_norm": 7.5005106925964355, "learning_rate": 9.962344139650874e-06, "loss": 0.3677, "step": 40270 }, { "epoch": 10.044887780548628, "grad_norm": 6.900725841522217, "learning_rate": 9.95985037406484e-06, "loss": 0.3433, "step": 40280 }, { "epoch": 10.047381546134662, "grad_norm": 6.401949405670166, "learning_rate": 9.957356608478805e-06, "loss": 0.3746, "step": 40290 }, { "epoch": 10.049875311720697, "grad_norm": 8.203397750854492, "learning_rate": 9.954862842892768e-06, "loss": 0.2977, "step": 40300 }, { "epoch": 10.052369077306734, "grad_norm": 4.425477027893066, "learning_rate": 9.952369077306734e-06, "loss": 0.3271, "step": 40310 }, { "epoch": 10.054862842892769, "grad_norm": 9.94776439666748, "learning_rate": 9.949875311720699e-06, "loss": 0.3569, "step": 40320 }, { "epoch": 10.057356608478804, "grad_norm": 6.8607892990112305, "learning_rate": 9.947381546134664e-06, "loss": 0.3441, "step": 40330 }, { "epoch": 10.059850374064839, "grad_norm": 8.044713973999023, "learning_rate": 9.94488778054863e-06, "loss": 0.3992, "step": 40340 }, { "epoch": 10.062344139650873, "grad_norm": 7.818645477294922, "learning_rate": 9.942394014962595e-06, "loss": 0.3718, "step": 40350 }, { "epoch": 10.064837905236908, "grad_norm": 8.542047500610352, "learning_rate": 9.93990024937656e-06, "loss": 0.4638, "step": 40360 }, { "epoch": 10.067331670822943, "grad_norm": 9.035058975219727, "learning_rate": 9.937406483790526e-06, "loss": 0.3159, "step": 40370 }, { "epoch": 10.069825436408978, "grad_norm": 4.869987964630127, "learning_rate": 9.934912718204489e-06, "loss": 0.3242, "step": 40380 }, { "epoch": 10.072319201995013, "grad_norm": 10.995007514953613, "learning_rate": 9.932418952618454e-06, "loss": 0.3318, "step": 40390 }, { "epoch": 10.074812967581048, "grad_norm": 10.2716703414917, "learning_rate": 9.92992518703242e-06, "loss": 0.4521, "step": 40400 }, { "epoch": 10.077306733167083, "grad_norm": 5.772310256958008, "learning_rate": 9.927431421446385e-06, "loss": 0.3316, "step": 40410 }, { "epoch": 10.079800498753118, "grad_norm": 8.326214790344238, "learning_rate": 9.924937655860349e-06, "loss": 0.3413, "step": 40420 }, { "epoch": 10.082294264339152, "grad_norm": 6.764336109161377, "learning_rate": 9.922443890274314e-06, "loss": 0.3069, "step": 40430 }, { "epoch": 10.084788029925187, "grad_norm": 7.891993522644043, "learning_rate": 9.91995012468828e-06, "loss": 0.2665, "step": 40440 }, { "epoch": 10.087281795511222, "grad_norm": 6.041927814483643, "learning_rate": 9.917456359102246e-06, "loss": 0.2921, "step": 40450 }, { "epoch": 10.089775561097257, "grad_norm": 9.28152084350586, "learning_rate": 9.914962593516212e-06, "loss": 0.3558, "step": 40460 }, { "epoch": 10.092269326683292, "grad_norm": 6.925419330596924, "learning_rate": 9.912468827930175e-06, "loss": 0.3164, "step": 40470 }, { "epoch": 10.094763092269327, "grad_norm": 6.952348709106445, "learning_rate": 9.90997506234414e-06, "loss": 0.4215, "step": 40480 }, { "epoch": 10.097256857855362, "grad_norm": 8.346542358398438, "learning_rate": 9.907481296758106e-06, "loss": 0.4134, "step": 40490 }, { "epoch": 10.099750623441397, "grad_norm": 6.4831719398498535, "learning_rate": 9.904987531172071e-06, "loss": 0.3628, "step": 40500 }, { "epoch": 10.102244389027431, "grad_norm": 6.676276206970215, "learning_rate": 9.902493765586035e-06, "loss": 0.3413, "step": 40510 }, { "epoch": 10.104738154613466, "grad_norm": 9.890116691589355, "learning_rate": 9.9e-06, "loss": 0.3215, "step": 40520 }, { "epoch": 10.107231920199501, "grad_norm": 5.857176780700684, "learning_rate": 9.897506234413965e-06, "loss": 0.3393, "step": 40530 }, { "epoch": 10.109725685785536, "grad_norm": 6.540131092071533, "learning_rate": 9.89501246882793e-06, "loss": 0.3765, "step": 40540 }, { "epoch": 10.11221945137157, "grad_norm": 9.703693389892578, "learning_rate": 9.892518703241896e-06, "loss": 0.3769, "step": 40550 }, { "epoch": 10.114713216957606, "grad_norm": 9.611767768859863, "learning_rate": 9.890024937655861e-06, "loss": 0.3857, "step": 40560 }, { "epoch": 10.11720698254364, "grad_norm": 6.961813926696777, "learning_rate": 9.887531172069827e-06, "loss": 0.3274, "step": 40570 }, { "epoch": 10.119700748129675, "grad_norm": 7.749450206756592, "learning_rate": 9.885037406483792e-06, "loss": 0.3409, "step": 40580 }, { "epoch": 10.12219451371571, "grad_norm": 10.376683235168457, "learning_rate": 9.882543640897756e-06, "loss": 0.36, "step": 40590 }, { "epoch": 10.124688279301745, "grad_norm": 9.793608665466309, "learning_rate": 9.880049875311721e-06, "loss": 0.3467, "step": 40600 }, { "epoch": 10.12718204488778, "grad_norm": 6.481242656707764, "learning_rate": 9.877556109725686e-06, "loss": 0.3184, "step": 40610 }, { "epoch": 10.129675810473815, "grad_norm": 5.596631050109863, "learning_rate": 9.875062344139651e-06, "loss": 0.3533, "step": 40620 }, { "epoch": 10.13216957605985, "grad_norm": 12.358193397521973, "learning_rate": 9.872568578553617e-06, "loss": 0.3539, "step": 40630 }, { "epoch": 10.134663341645885, "grad_norm": 7.618841171264648, "learning_rate": 9.870074812967582e-06, "loss": 0.4053, "step": 40640 }, { "epoch": 10.13715710723192, "grad_norm": 6.739760875701904, "learning_rate": 9.867581047381547e-06, "loss": 0.3235, "step": 40650 }, { "epoch": 10.139650872817954, "grad_norm": 7.255684852600098, "learning_rate": 9.865087281795513e-06, "loss": 0.3134, "step": 40660 }, { "epoch": 10.14214463840399, "grad_norm": 10.463494300842285, "learning_rate": 9.862593516209476e-06, "loss": 0.3657, "step": 40670 }, { "epoch": 10.144638403990024, "grad_norm": 6.290800094604492, "learning_rate": 9.860099750623442e-06, "loss": 0.3472, "step": 40680 }, { "epoch": 10.147132169576059, "grad_norm": 7.880048751831055, "learning_rate": 9.857605985037407e-06, "loss": 0.349, "step": 40690 }, { "epoch": 10.149625935162096, "grad_norm": 8.012491226196289, "learning_rate": 9.855112219451372e-06, "loss": 0.3229, "step": 40700 }, { "epoch": 10.15211970074813, "grad_norm": 9.101916313171387, "learning_rate": 9.85286783042394e-06, "loss": 0.3899, "step": 40710 }, { "epoch": 10.154613466334165, "grad_norm": 10.565949440002441, "learning_rate": 9.850374064837906e-06, "loss": 0.342, "step": 40720 }, { "epoch": 10.1571072319202, "grad_norm": 6.891229629516602, "learning_rate": 9.847880299251871e-06, "loss": 0.3361, "step": 40730 }, { "epoch": 10.159600997506235, "grad_norm": 7.994174957275391, "learning_rate": 9.845386533665836e-06, "loss": 0.3542, "step": 40740 }, { "epoch": 10.16209476309227, "grad_norm": 13.515098571777344, "learning_rate": 9.842892768079802e-06, "loss": 0.371, "step": 40750 }, { "epoch": 10.164588528678305, "grad_norm": 6.52901554107666, "learning_rate": 9.840399002493767e-06, "loss": 0.3398, "step": 40760 }, { "epoch": 10.16708229426434, "grad_norm": 6.790650844573975, "learning_rate": 9.83790523690773e-06, "loss": 0.3099, "step": 40770 }, { "epoch": 10.169576059850375, "grad_norm": 4.857789516448975, "learning_rate": 9.835411471321696e-06, "loss": 0.3564, "step": 40780 }, { "epoch": 10.17206982543641, "grad_norm": 10.275793075561523, "learning_rate": 9.832917705735661e-06, "loss": 0.4056, "step": 40790 }, { "epoch": 10.174563591022444, "grad_norm": 5.9475812911987305, "learning_rate": 9.830423940149627e-06, "loss": 0.3462, "step": 40800 }, { "epoch": 10.17705735660848, "grad_norm": 7.682375431060791, "learning_rate": 9.827930174563592e-06, "loss": 0.3232, "step": 40810 }, { "epoch": 10.179551122194514, "grad_norm": 7.272604942321777, "learning_rate": 9.825436408977557e-06, "loss": 0.3526, "step": 40820 }, { "epoch": 10.182044887780549, "grad_norm": 9.69630241394043, "learning_rate": 9.822942643391523e-06, "loss": 0.3904, "step": 40830 }, { "epoch": 10.184538653366584, "grad_norm": 5.306740760803223, "learning_rate": 9.820448877805488e-06, "loss": 0.2868, "step": 40840 }, { "epoch": 10.187032418952619, "grad_norm": 6.456069469451904, "learning_rate": 9.817955112219451e-06, "loss": 0.3426, "step": 40850 }, { "epoch": 10.189526184538654, "grad_norm": 13.075380325317383, "learning_rate": 9.815461346633417e-06, "loss": 0.311, "step": 40860 }, { "epoch": 10.192019950124688, "grad_norm": 5.357050895690918, "learning_rate": 9.812967581047382e-06, "loss": 0.4198, "step": 40870 }, { "epoch": 10.194513715710723, "grad_norm": 7.080076694488525, "learning_rate": 9.810473815461347e-06, "loss": 0.3124, "step": 40880 }, { "epoch": 10.197007481296758, "grad_norm": 4.402791500091553, "learning_rate": 9.807980049875313e-06, "loss": 0.3291, "step": 40890 }, { "epoch": 10.199501246882793, "grad_norm": 6.491812705993652, "learning_rate": 9.805486284289278e-06, "loss": 0.3769, "step": 40900 }, { "epoch": 10.201995012468828, "grad_norm": 7.707225322723389, "learning_rate": 9.802992518703243e-06, "loss": 0.395, "step": 40910 }, { "epoch": 10.204488778054863, "grad_norm": 6.598991394042969, "learning_rate": 9.800498753117209e-06, "loss": 0.3356, "step": 40920 }, { "epoch": 10.206982543640898, "grad_norm": 6.032766342163086, "learning_rate": 9.798004987531174e-06, "loss": 0.3243, "step": 40930 }, { "epoch": 10.209476309226932, "grad_norm": 5.5279340744018555, "learning_rate": 9.795511221945138e-06, "loss": 0.3259, "step": 40940 }, { "epoch": 10.211970074812967, "grad_norm": 5.94141149520874, "learning_rate": 9.793017456359103e-06, "loss": 0.3158, "step": 40950 }, { "epoch": 10.214463840399002, "grad_norm": 8.342867851257324, "learning_rate": 9.790523690773068e-06, "loss": 0.3472, "step": 40960 }, { "epoch": 10.216957605985037, "grad_norm": 6.11809778213501, "learning_rate": 9.788029925187034e-06, "loss": 0.3245, "step": 40970 }, { "epoch": 10.219451371571072, "grad_norm": 3.928636074066162, "learning_rate": 9.785536159600997e-06, "loss": 0.2654, "step": 40980 }, { "epoch": 10.221945137157107, "grad_norm": 5.511411190032959, "learning_rate": 9.783042394014962e-06, "loss": 0.378, "step": 40990 }, { "epoch": 10.224438902743142, "grad_norm": 7.285804271697998, "learning_rate": 9.78054862842893e-06, "loss": 0.2728, "step": 41000 }, { "epoch": 10.226932668329177, "grad_norm": 10.05148696899414, "learning_rate": 9.778054862842895e-06, "loss": 0.2886, "step": 41010 }, { "epoch": 10.229426433915211, "grad_norm": 6.61144495010376, "learning_rate": 9.775561097256858e-06, "loss": 0.3763, "step": 41020 }, { "epoch": 10.231920199501246, "grad_norm": 11.291173934936523, "learning_rate": 9.773067331670824e-06, "loss": 0.3521, "step": 41030 }, { "epoch": 10.234413965087281, "grad_norm": 7.441661834716797, "learning_rate": 9.770573566084789e-06, "loss": 0.3958, "step": 41040 }, { "epoch": 10.236907730673316, "grad_norm": 4.365013599395752, "learning_rate": 9.768079800498754e-06, "loss": 0.3623, "step": 41050 }, { "epoch": 10.239401496259351, "grad_norm": 7.878891468048096, "learning_rate": 9.765586034912718e-06, "loss": 0.3634, "step": 41060 }, { "epoch": 10.241895261845386, "grad_norm": 8.221386909484863, "learning_rate": 9.763092269326683e-06, "loss": 0.394, "step": 41070 }, { "epoch": 10.24438902743142, "grad_norm": 6.871973991394043, "learning_rate": 9.760598503740649e-06, "loss": 0.3359, "step": 41080 }, { "epoch": 10.246882793017456, "grad_norm": 7.841533184051514, "learning_rate": 9.758104738154614e-06, "loss": 0.316, "step": 41090 }, { "epoch": 10.24937655860349, "grad_norm": 10.699889183044434, "learning_rate": 9.755610972568579e-06, "loss": 0.3793, "step": 41100 }, { "epoch": 10.251870324189527, "grad_norm": 7.462698459625244, "learning_rate": 9.753117206982544e-06, "loss": 0.3795, "step": 41110 }, { "epoch": 10.254364089775562, "grad_norm": 5.521877765655518, "learning_rate": 9.75062344139651e-06, "loss": 0.3306, "step": 41120 }, { "epoch": 10.256857855361597, "grad_norm": 6.224538803100586, "learning_rate": 9.748129675810475e-06, "loss": 0.4206, "step": 41130 }, { "epoch": 10.259351620947632, "grad_norm": 10.534749984741211, "learning_rate": 9.74563591022444e-06, "loss": 0.3515, "step": 41140 }, { "epoch": 10.261845386533667, "grad_norm": 7.875677585601807, "learning_rate": 9.743142144638404e-06, "loss": 0.3535, "step": 41150 }, { "epoch": 10.264339152119701, "grad_norm": 6.144274711608887, "learning_rate": 9.74064837905237e-06, "loss": 0.3605, "step": 41160 }, { "epoch": 10.266832917705736, "grad_norm": 9.638225555419922, "learning_rate": 9.738154613466335e-06, "loss": 0.3261, "step": 41170 }, { "epoch": 10.269326683291771, "grad_norm": 13.971467018127441, "learning_rate": 9.7356608478803e-06, "loss": 0.3333, "step": 41180 }, { "epoch": 10.271820448877806, "grad_norm": 7.335572242736816, "learning_rate": 9.733167082294265e-06, "loss": 0.3326, "step": 41190 }, { "epoch": 10.27431421446384, "grad_norm": 5.852632999420166, "learning_rate": 9.73067331670823e-06, "loss": 0.3612, "step": 41200 }, { "epoch": 10.276807980049876, "grad_norm": 6.09345006942749, "learning_rate": 9.728179551122196e-06, "loss": 0.3995, "step": 41210 }, { "epoch": 10.27930174563591, "grad_norm": 5.712091445922852, "learning_rate": 9.725685785536161e-06, "loss": 0.3078, "step": 41220 }, { "epoch": 10.281795511221945, "grad_norm": 8.864590644836426, "learning_rate": 9.723192019950125e-06, "loss": 0.3629, "step": 41230 }, { "epoch": 10.28428927680798, "grad_norm": 8.034436225891113, "learning_rate": 9.72069825436409e-06, "loss": 0.3913, "step": 41240 }, { "epoch": 10.286783042394015, "grad_norm": 7.192776203155518, "learning_rate": 9.718204488778055e-06, "loss": 0.3908, "step": 41250 }, { "epoch": 10.28927680798005, "grad_norm": 5.29744291305542, "learning_rate": 9.71571072319202e-06, "loss": 0.2776, "step": 41260 }, { "epoch": 10.291770573566085, "grad_norm": 8.176728248596191, "learning_rate": 9.713216957605986e-06, "loss": 0.3514, "step": 41270 }, { "epoch": 10.29426433915212, "grad_norm": 6.879700660705566, "learning_rate": 9.710723192019951e-06, "loss": 0.3185, "step": 41280 }, { "epoch": 10.296758104738155, "grad_norm": 5.136245250701904, "learning_rate": 9.708229426433917e-06, "loss": 0.2969, "step": 41290 }, { "epoch": 10.29925187032419, "grad_norm": 12.164958953857422, "learning_rate": 9.705735660847882e-06, "loss": 0.3606, "step": 41300 }, { "epoch": 10.301745635910224, "grad_norm": 5.413372993469238, "learning_rate": 9.703241895261846e-06, "loss": 0.3706, "step": 41310 }, { "epoch": 10.30423940149626, "grad_norm": 5.834495544433594, "learning_rate": 9.700748129675811e-06, "loss": 0.2906, "step": 41320 }, { "epoch": 10.306733167082294, "grad_norm": 6.524380207061768, "learning_rate": 9.698254364089776e-06, "loss": 0.3777, "step": 41330 }, { "epoch": 10.309226932668329, "grad_norm": 9.674921989440918, "learning_rate": 9.695760598503741e-06, "loss": 0.3734, "step": 41340 }, { "epoch": 10.311720698254364, "grad_norm": 7.306479454040527, "learning_rate": 9.693266832917707e-06, "loss": 0.4354, "step": 41350 }, { "epoch": 10.314214463840399, "grad_norm": 5.288915157318115, "learning_rate": 9.690773067331672e-06, "loss": 0.2979, "step": 41360 }, { "epoch": 10.316708229426434, "grad_norm": 7.090877056121826, "learning_rate": 9.688279301745637e-06, "loss": 0.3617, "step": 41370 }, { "epoch": 10.319201995012468, "grad_norm": 10.45274543762207, "learning_rate": 9.685785536159603e-06, "loss": 0.3738, "step": 41380 }, { "epoch": 10.321695760598503, "grad_norm": 6.137414455413818, "learning_rate": 9.683291770573568e-06, "loss": 0.3855, "step": 41390 }, { "epoch": 10.324189526184538, "grad_norm": 6.6708221435546875, "learning_rate": 9.680798004987532e-06, "loss": 0.3804, "step": 41400 }, { "epoch": 10.326683291770573, "grad_norm": 7.918160438537598, "learning_rate": 9.678304239401497e-06, "loss": 0.3436, "step": 41410 }, { "epoch": 10.329177057356608, "grad_norm": 5.83142614364624, "learning_rate": 9.675810473815462e-06, "loss": 0.3542, "step": 41420 }, { "epoch": 10.331670822942643, "grad_norm": 7.670297145843506, "learning_rate": 9.673316708229428e-06, "loss": 0.3867, "step": 41430 }, { "epoch": 10.334164588528678, "grad_norm": 5.303461074829102, "learning_rate": 9.670822942643391e-06, "loss": 0.3995, "step": 41440 }, { "epoch": 10.336658354114713, "grad_norm": 6.452824115753174, "learning_rate": 9.668329177057357e-06, "loss": 0.3595, "step": 41450 }, { "epoch": 10.339152119700747, "grad_norm": 14.923982620239258, "learning_rate": 9.665835411471322e-06, "loss": 0.3651, "step": 41460 }, { "epoch": 10.341645885286782, "grad_norm": 5.03542947769165, "learning_rate": 9.663341645885289e-06, "loss": 0.3361, "step": 41470 }, { "epoch": 10.344139650872817, "grad_norm": 7.917986869812012, "learning_rate": 9.660847880299252e-06, "loss": 0.3642, "step": 41480 }, { "epoch": 10.346633416458852, "grad_norm": 8.379772186279297, "learning_rate": 9.658354114713218e-06, "loss": 0.3277, "step": 41490 }, { "epoch": 10.349127182044889, "grad_norm": 7.017518997192383, "learning_rate": 9.655860349127183e-06, "loss": 0.3944, "step": 41500 }, { "epoch": 10.351620947630924, "grad_norm": 5.158971309661865, "learning_rate": 9.653366583541148e-06, "loss": 0.3771, "step": 41510 }, { "epoch": 10.354114713216958, "grad_norm": 5.925132751464844, "learning_rate": 9.650872817955112e-06, "loss": 0.3433, "step": 41520 }, { "epoch": 10.356608478802993, "grad_norm": 8.094498634338379, "learning_rate": 9.648379052369077e-06, "loss": 0.3903, "step": 41530 }, { "epoch": 10.359102244389028, "grad_norm": 5.886862277984619, "learning_rate": 9.645885286783043e-06, "loss": 0.3353, "step": 41540 }, { "epoch": 10.361596009975063, "grad_norm": 7.800416469573975, "learning_rate": 9.643391521197008e-06, "loss": 0.3465, "step": 41550 }, { "epoch": 10.364089775561098, "grad_norm": 5.397622108459473, "learning_rate": 9.640897755610973e-06, "loss": 0.4301, "step": 41560 }, { "epoch": 10.366583541147133, "grad_norm": 8.980853080749512, "learning_rate": 9.638403990024939e-06, "loss": 0.3571, "step": 41570 }, { "epoch": 10.369077306733168, "grad_norm": 9.865492820739746, "learning_rate": 9.635910224438904e-06, "loss": 0.322, "step": 41580 }, { "epoch": 10.371571072319203, "grad_norm": 7.520500183105469, "learning_rate": 9.63341645885287e-06, "loss": 0.4147, "step": 41590 }, { "epoch": 10.374064837905237, "grad_norm": 8.898337364196777, "learning_rate": 9.630922693266833e-06, "loss": 0.3443, "step": 41600 }, { "epoch": 10.376558603491272, "grad_norm": 7.803848743438721, "learning_rate": 9.628428927680798e-06, "loss": 0.3786, "step": 41610 }, { "epoch": 10.379052369077307, "grad_norm": 9.381965637207031, "learning_rate": 9.625935162094763e-06, "loss": 0.4683, "step": 41620 }, { "epoch": 10.381546134663342, "grad_norm": 6.264208793640137, "learning_rate": 9.623441396508729e-06, "loss": 0.3297, "step": 41630 }, { "epoch": 10.384039900249377, "grad_norm": 6.923156261444092, "learning_rate": 9.620947630922694e-06, "loss": 0.3056, "step": 41640 }, { "epoch": 10.386533665835412, "grad_norm": 7.695561408996582, "learning_rate": 9.61845386533666e-06, "loss": 0.424, "step": 41650 }, { "epoch": 10.389027431421447, "grad_norm": 5.747522354125977, "learning_rate": 9.615960099750625e-06, "loss": 0.3297, "step": 41660 }, { "epoch": 10.391521197007481, "grad_norm": 5.526257038116455, "learning_rate": 9.61346633416459e-06, "loss": 0.3702, "step": 41670 }, { "epoch": 10.394014962593516, "grad_norm": 7.8705244064331055, "learning_rate": 9.610972568578555e-06, "loss": 0.346, "step": 41680 }, { "epoch": 10.396508728179551, "grad_norm": 7.783495903015137, "learning_rate": 9.608478802992519e-06, "loss": 0.3441, "step": 41690 }, { "epoch": 10.399002493765586, "grad_norm": 8.003525733947754, "learning_rate": 9.605985037406484e-06, "loss": 0.3707, "step": 41700 }, { "epoch": 10.401496259351621, "grad_norm": 7.88194465637207, "learning_rate": 9.60349127182045e-06, "loss": 0.3363, "step": 41710 }, { "epoch": 10.403990024937656, "grad_norm": 7.859609127044678, "learning_rate": 9.600997506234415e-06, "loss": 0.3417, "step": 41720 }, { "epoch": 10.40648379052369, "grad_norm": 6.967172145843506, "learning_rate": 9.59850374064838e-06, "loss": 0.3449, "step": 41730 }, { "epoch": 10.408977556109726, "grad_norm": 6.901447772979736, "learning_rate": 9.596009975062345e-06, "loss": 0.4124, "step": 41740 }, { "epoch": 10.41147132169576, "grad_norm": 7.131737232208252, "learning_rate": 9.59351620947631e-06, "loss": 0.3701, "step": 41750 }, { "epoch": 10.413965087281795, "grad_norm": 8.898688316345215, "learning_rate": 9.591022443890276e-06, "loss": 0.323, "step": 41760 }, { "epoch": 10.41645885286783, "grad_norm": 7.909594535827637, "learning_rate": 9.58852867830424e-06, "loss": 0.3552, "step": 41770 }, { "epoch": 10.418952618453865, "grad_norm": 5.500792026519775, "learning_rate": 9.586034912718205e-06, "loss": 0.3423, "step": 41780 }, { "epoch": 10.4214463840399, "grad_norm": 9.347502708435059, "learning_rate": 9.58354114713217e-06, "loss": 0.3671, "step": 41790 }, { "epoch": 10.423940149625935, "grad_norm": 6.794683933258057, "learning_rate": 9.581047381546136e-06, "loss": 0.4234, "step": 41800 }, { "epoch": 10.42643391521197, "grad_norm": 13.040794372558594, "learning_rate": 9.5785536159601e-06, "loss": 0.3615, "step": 41810 }, { "epoch": 10.428927680798004, "grad_norm": 8.55950927734375, "learning_rate": 9.576059850374066e-06, "loss": 0.3848, "step": 41820 }, { "epoch": 10.43142144638404, "grad_norm": 8.469449043273926, "learning_rate": 9.573566084788032e-06, "loss": 0.315, "step": 41830 }, { "epoch": 10.433915211970074, "grad_norm": 6.907922744750977, "learning_rate": 9.571072319201997e-06, "loss": 0.4197, "step": 41840 }, { "epoch": 10.436408977556109, "grad_norm": 5.317709445953369, "learning_rate": 9.56857855361596e-06, "loss": 0.3535, "step": 41850 }, { "epoch": 10.438902743142144, "grad_norm": 6.60930061340332, "learning_rate": 9.566084788029926e-06, "loss": 0.3643, "step": 41860 }, { "epoch": 10.441396508728179, "grad_norm": 9.468186378479004, "learning_rate": 9.563591022443891e-06, "loss": 0.3384, "step": 41870 }, { "epoch": 10.443890274314214, "grad_norm": 5.716958522796631, "learning_rate": 9.561097256857856e-06, "loss": 0.3265, "step": 41880 }, { "epoch": 10.446384039900249, "grad_norm": 7.9432172775268555, "learning_rate": 9.558603491271822e-06, "loss": 0.3969, "step": 41890 }, { "epoch": 10.448877805486283, "grad_norm": 11.144307136535645, "learning_rate": 9.556109725685785e-06, "loss": 0.3584, "step": 41900 }, { "epoch": 10.451371571072318, "grad_norm": 9.144835472106934, "learning_rate": 9.55361596009975e-06, "loss": 0.3963, "step": 41910 }, { "epoch": 10.453865336658355, "grad_norm": 6.574597358703613, "learning_rate": 9.551122194513716e-06, "loss": 0.3258, "step": 41920 }, { "epoch": 10.45635910224439, "grad_norm": 5.504093647003174, "learning_rate": 9.548628428927681e-06, "loss": 0.377, "step": 41930 }, { "epoch": 10.458852867830425, "grad_norm": 5.041776180267334, "learning_rate": 9.546134663341647e-06, "loss": 0.3502, "step": 41940 }, { "epoch": 10.46134663341646, "grad_norm": 8.293055534362793, "learning_rate": 9.543640897755612e-06, "loss": 0.3173, "step": 41950 }, { "epoch": 10.463840399002494, "grad_norm": 6.408956527709961, "learning_rate": 9.541147132169577e-06, "loss": 0.3826, "step": 41960 }, { "epoch": 10.46633416458853, "grad_norm": 9.970688819885254, "learning_rate": 9.538653366583542e-06, "loss": 0.3575, "step": 41970 }, { "epoch": 10.468827930174564, "grad_norm": 6.766303062438965, "learning_rate": 9.536159600997506e-06, "loss": 0.3414, "step": 41980 }, { "epoch": 10.471321695760599, "grad_norm": 5.876713275909424, "learning_rate": 9.533665835411471e-06, "loss": 0.3462, "step": 41990 }, { "epoch": 10.473815461346634, "grad_norm": 8.713937759399414, "learning_rate": 9.531172069825437e-06, "loss": 0.3541, "step": 42000 }, { "epoch": 10.476309226932669, "grad_norm": 5.201702117919922, "learning_rate": 9.528678304239402e-06, "loss": 0.3199, "step": 42010 }, { "epoch": 10.478802992518704, "grad_norm": 12.989068031311035, "learning_rate": 9.526184538653367e-06, "loss": 0.3021, "step": 42020 }, { "epoch": 10.481296758104738, "grad_norm": 7.327224254608154, "learning_rate": 9.523690773067333e-06, "loss": 0.3544, "step": 42030 }, { "epoch": 10.483790523690773, "grad_norm": 10.310848236083984, "learning_rate": 9.521197007481298e-06, "loss": 0.3914, "step": 42040 }, { "epoch": 10.486284289276808, "grad_norm": 6.748259544372559, "learning_rate": 9.518703241895263e-06, "loss": 0.3456, "step": 42050 }, { "epoch": 10.488778054862843, "grad_norm": 12.408527374267578, "learning_rate": 9.516209476309227e-06, "loss": 0.3296, "step": 42060 }, { "epoch": 10.491271820448878, "grad_norm": 8.86363697052002, "learning_rate": 9.513715710723192e-06, "loss": 0.3759, "step": 42070 }, { "epoch": 10.493765586034913, "grad_norm": 9.177289962768555, "learning_rate": 9.511221945137157e-06, "loss": 0.375, "step": 42080 }, { "epoch": 10.496259351620948, "grad_norm": 8.17337703704834, "learning_rate": 9.508728179551123e-06, "loss": 0.3493, "step": 42090 }, { "epoch": 10.498753117206983, "grad_norm": 7.164012908935547, "learning_rate": 9.506234413965088e-06, "loss": 0.3652, "step": 42100 }, { "epoch": 10.501246882793017, "grad_norm": 6.665464401245117, "learning_rate": 9.503740648379053e-06, "loss": 0.3838, "step": 42110 }, { "epoch": 10.503740648379052, "grad_norm": 7.328868389129639, "learning_rate": 9.501246882793019e-06, "loss": 0.3464, "step": 42120 }, { "epoch": 10.506234413965087, "grad_norm": 6.057427406311035, "learning_rate": 9.498753117206984e-06, "loss": 0.321, "step": 42130 }, { "epoch": 10.508728179551122, "grad_norm": 5.329481601715088, "learning_rate": 9.49625935162095e-06, "loss": 0.2875, "step": 42140 }, { "epoch": 10.511221945137157, "grad_norm": 7.551417827606201, "learning_rate": 9.493765586034913e-06, "loss": 0.3713, "step": 42150 }, { "epoch": 10.513715710723192, "grad_norm": 5.047544956207275, "learning_rate": 9.491271820448878e-06, "loss": 0.4153, "step": 42160 }, { "epoch": 10.516209476309227, "grad_norm": 5.810826301574707, "learning_rate": 9.488778054862844e-06, "loss": 0.3527, "step": 42170 }, { "epoch": 10.518703241895262, "grad_norm": 5.0741753578186035, "learning_rate": 9.486284289276809e-06, "loss": 0.3569, "step": 42180 }, { "epoch": 10.521197007481296, "grad_norm": 5.9377641677856445, "learning_rate": 9.483790523690774e-06, "loss": 0.3968, "step": 42190 }, { "epoch": 10.523690773067331, "grad_norm": 8.227517127990723, "learning_rate": 9.48129675810474e-06, "loss": 0.3678, "step": 42200 }, { "epoch": 10.526184538653366, "grad_norm": 7.43702507019043, "learning_rate": 9.478802992518705e-06, "loss": 0.3083, "step": 42210 }, { "epoch": 10.528678304239401, "grad_norm": 6.5147199630737305, "learning_rate": 9.47630922693267e-06, "loss": 0.2658, "step": 42220 }, { "epoch": 10.531172069825436, "grad_norm": 6.781185150146484, "learning_rate": 9.473815461346634e-06, "loss": 0.3596, "step": 42230 }, { "epoch": 10.53366583541147, "grad_norm": 7.411608695983887, "learning_rate": 9.471321695760599e-06, "loss": 0.2931, "step": 42240 }, { "epoch": 10.536159600997506, "grad_norm": 12.974010467529297, "learning_rate": 9.468827930174564e-06, "loss": 0.2816, "step": 42250 }, { "epoch": 10.53865336658354, "grad_norm": 11.239740371704102, "learning_rate": 9.46633416458853e-06, "loss": 0.3805, "step": 42260 }, { "epoch": 10.541147132169575, "grad_norm": 7.265082836151123, "learning_rate": 9.463840399002493e-06, "loss": 0.3101, "step": 42270 }, { "epoch": 10.54364089775561, "grad_norm": 7.100124359130859, "learning_rate": 9.461346633416459e-06, "loss": 0.343, "step": 42280 }, { "epoch": 10.546134663341645, "grad_norm": 5.751792907714844, "learning_rate": 9.458852867830426e-06, "loss": 0.3176, "step": 42290 }, { "epoch": 10.548628428927682, "grad_norm": 6.716248989105225, "learning_rate": 9.456359102244391e-06, "loss": 0.3307, "step": 42300 }, { "epoch": 10.551122194513717, "grad_norm": 9.329224586486816, "learning_rate": 9.453865336658355e-06, "loss": 0.3778, "step": 42310 }, { "epoch": 10.553615960099751, "grad_norm": 8.02515697479248, "learning_rate": 9.45137157107232e-06, "loss": 0.4504, "step": 42320 }, { "epoch": 10.556109725685786, "grad_norm": 8.72829818725586, "learning_rate": 9.448877805486285e-06, "loss": 0.4172, "step": 42330 }, { "epoch": 10.558603491271821, "grad_norm": 6.4581780433654785, "learning_rate": 9.44638403990025e-06, "loss": 0.328, "step": 42340 }, { "epoch": 10.561097256857856, "grad_norm": 10.020159721374512, "learning_rate": 9.443890274314216e-06, "loss": 0.4516, "step": 42350 }, { "epoch": 10.563591022443891, "grad_norm": 8.494389533996582, "learning_rate": 9.44139650872818e-06, "loss": 0.3872, "step": 42360 }, { "epoch": 10.566084788029926, "grad_norm": 7.352220058441162, "learning_rate": 9.438902743142145e-06, "loss": 0.2962, "step": 42370 }, { "epoch": 10.56857855361596, "grad_norm": 7.202230930328369, "learning_rate": 9.43640897755611e-06, "loss": 0.3706, "step": 42380 }, { "epoch": 10.571072319201996, "grad_norm": 6.778585433959961, "learning_rate": 9.433915211970075e-06, "loss": 0.3202, "step": 42390 }, { "epoch": 10.57356608478803, "grad_norm": 10.143278121948242, "learning_rate": 9.43142144638404e-06, "loss": 0.3963, "step": 42400 }, { "epoch": 10.576059850374065, "grad_norm": 9.119635581970215, "learning_rate": 9.428927680798006e-06, "loss": 0.3016, "step": 42410 }, { "epoch": 10.5785536159601, "grad_norm": 6.856378555297852, "learning_rate": 9.426433915211971e-06, "loss": 0.3479, "step": 42420 }, { "epoch": 10.581047381546135, "grad_norm": 5.599526882171631, "learning_rate": 9.423940149625937e-06, "loss": 0.3469, "step": 42430 }, { "epoch": 10.58354114713217, "grad_norm": 6.967793941497803, "learning_rate": 9.4214463840399e-06, "loss": 0.3133, "step": 42440 }, { "epoch": 10.586034912718205, "grad_norm": 5.5582404136657715, "learning_rate": 9.418952618453865e-06, "loss": 0.3838, "step": 42450 }, { "epoch": 10.58852867830424, "grad_norm": 11.962347030639648, "learning_rate": 9.41645885286783e-06, "loss": 0.3827, "step": 42460 }, { "epoch": 10.591022443890274, "grad_norm": 5.947192668914795, "learning_rate": 9.413965087281796e-06, "loss": 0.3638, "step": 42470 }, { "epoch": 10.59351620947631, "grad_norm": 5.291638374328613, "learning_rate": 9.411471321695761e-06, "loss": 0.3622, "step": 42480 }, { "epoch": 10.596009975062344, "grad_norm": 6.401669979095459, "learning_rate": 9.408977556109727e-06, "loss": 0.3045, "step": 42490 }, { "epoch": 10.598503740648379, "grad_norm": 8.00327205657959, "learning_rate": 9.406483790523692e-06, "loss": 0.3862, "step": 42500 }, { "epoch": 10.600997506234414, "grad_norm": 8.175292015075684, "learning_rate": 9.403990024937657e-06, "loss": 0.3123, "step": 42510 }, { "epoch": 10.603491271820449, "grad_norm": 13.475317001342773, "learning_rate": 9.401496259351621e-06, "loss": 0.3399, "step": 42520 }, { "epoch": 10.605985037406484, "grad_norm": 8.634927749633789, "learning_rate": 9.399002493765586e-06, "loss": 0.369, "step": 42530 }, { "epoch": 10.608478802992519, "grad_norm": 8.839632034301758, "learning_rate": 9.396508728179552e-06, "loss": 0.3389, "step": 42540 }, { "epoch": 10.610972568578553, "grad_norm": 4.931496620178223, "learning_rate": 9.394014962593517e-06, "loss": 0.3422, "step": 42550 }, { "epoch": 10.613466334164588, "grad_norm": 5.307199001312256, "learning_rate": 9.391521197007482e-06, "loss": 0.2964, "step": 42560 }, { "epoch": 10.615960099750623, "grad_norm": 11.918758392333984, "learning_rate": 9.389027431421447e-06, "loss": 0.3132, "step": 42570 }, { "epoch": 10.618453865336658, "grad_norm": 10.152669906616211, "learning_rate": 9.386533665835413e-06, "loss": 0.3228, "step": 42580 }, { "epoch": 10.620947630922693, "grad_norm": 8.567078590393066, "learning_rate": 9.384039900249378e-06, "loss": 0.4226, "step": 42590 }, { "epoch": 10.623441396508728, "grad_norm": 9.373230934143066, "learning_rate": 9.381546134663343e-06, "loss": 0.3664, "step": 42600 }, { "epoch": 10.625935162094763, "grad_norm": 7.585546016693115, "learning_rate": 9.379052369077307e-06, "loss": 0.3636, "step": 42610 }, { "epoch": 10.628428927680797, "grad_norm": 6.881632328033447, "learning_rate": 9.376558603491272e-06, "loss": 0.304, "step": 42620 }, { "epoch": 10.630922693266832, "grad_norm": 5.626585483551025, "learning_rate": 9.374064837905238e-06, "loss": 0.3563, "step": 42630 }, { "epoch": 10.633416458852867, "grad_norm": 14.296299934387207, "learning_rate": 9.371571072319203e-06, "loss": 0.3678, "step": 42640 }, { "epoch": 10.635910224438902, "grad_norm": 9.416000366210938, "learning_rate": 9.369077306733168e-06, "loss": 0.3737, "step": 42650 }, { "epoch": 10.638403990024937, "grad_norm": 7.975756645202637, "learning_rate": 9.366583541147134e-06, "loss": 0.3162, "step": 42660 }, { "epoch": 10.640897755610972, "grad_norm": 10.369644165039062, "learning_rate": 9.364089775561099e-06, "loss": 0.3617, "step": 42670 }, { "epoch": 10.643391521197007, "grad_norm": 6.275656700134277, "learning_rate": 9.361596009975064e-06, "loss": 0.3393, "step": 42680 }, { "epoch": 10.645885286783042, "grad_norm": 8.759428977966309, "learning_rate": 9.359102244389028e-06, "loss": 0.3428, "step": 42690 }, { "epoch": 10.648379052369076, "grad_norm": 5.912599563598633, "learning_rate": 9.356608478802993e-06, "loss": 0.3031, "step": 42700 }, { "epoch": 10.650872817955111, "grad_norm": 6.889318943023682, "learning_rate": 9.354114713216958e-06, "loss": 0.3897, "step": 42710 }, { "epoch": 10.653366583541148, "grad_norm": 5.2232346534729, "learning_rate": 9.351620947630924e-06, "loss": 0.3764, "step": 42720 }, { "epoch": 10.655860349127183, "grad_norm": 9.666282653808594, "learning_rate": 9.349127182044887e-06, "loss": 0.355, "step": 42730 }, { "epoch": 10.658354114713218, "grad_norm": 6.856553077697754, "learning_rate": 9.346633416458853e-06, "loss": 0.3644, "step": 42740 }, { "epoch": 10.660847880299253, "grad_norm": 7.544265270233154, "learning_rate": 9.344139650872818e-06, "loss": 0.3547, "step": 42750 }, { "epoch": 10.663341645885287, "grad_norm": 9.202102661132812, "learning_rate": 9.341645885286785e-06, "loss": 0.3561, "step": 42760 }, { "epoch": 10.665835411471322, "grad_norm": 5.572080135345459, "learning_rate": 9.339152119700749e-06, "loss": 0.379, "step": 42770 }, { "epoch": 10.668329177057357, "grad_norm": 7.436700820922852, "learning_rate": 9.336658354114714e-06, "loss": 0.341, "step": 42780 }, { "epoch": 10.670822942643392, "grad_norm": 7.51735782623291, "learning_rate": 9.33416458852868e-06, "loss": 0.3007, "step": 42790 }, { "epoch": 10.673316708229427, "grad_norm": 7.93154239654541, "learning_rate": 9.331670822942645e-06, "loss": 0.3787, "step": 42800 }, { "epoch": 10.675810473815462, "grad_norm": 7.7301106452941895, "learning_rate": 9.329177057356608e-06, "loss": 0.3315, "step": 42810 }, { "epoch": 10.678304239401497, "grad_norm": 4.633336544036865, "learning_rate": 9.326683291770573e-06, "loss": 0.4015, "step": 42820 }, { "epoch": 10.680798004987532, "grad_norm": 8.565386772155762, "learning_rate": 9.324189526184539e-06, "loss": 0.3873, "step": 42830 }, { "epoch": 10.683291770573566, "grad_norm": 5.983166694641113, "learning_rate": 9.321695760598504e-06, "loss": 0.3298, "step": 42840 }, { "epoch": 10.685785536159601, "grad_norm": 8.084731101989746, "learning_rate": 9.31920199501247e-06, "loss": 0.2891, "step": 42850 }, { "epoch": 10.688279301745636, "grad_norm": 9.424635887145996, "learning_rate": 9.316708229426435e-06, "loss": 0.3609, "step": 42860 }, { "epoch": 10.690773067331671, "grad_norm": 4.796233654022217, "learning_rate": 9.3142144638404e-06, "loss": 0.3446, "step": 42870 }, { "epoch": 10.693266832917706, "grad_norm": 8.594659805297852, "learning_rate": 9.311720698254365e-06, "loss": 0.3575, "step": 42880 }, { "epoch": 10.69576059850374, "grad_norm": 5.583627223968506, "learning_rate": 9.30922693266833e-06, "loss": 0.3207, "step": 42890 }, { "epoch": 10.698254364089776, "grad_norm": 7.472324848175049, "learning_rate": 9.306733167082294e-06, "loss": 0.3412, "step": 42900 }, { "epoch": 10.70074812967581, "grad_norm": 5.4451584815979, "learning_rate": 9.30423940149626e-06, "loss": 0.3852, "step": 42910 }, { "epoch": 10.703241895261845, "grad_norm": 8.073676109313965, "learning_rate": 9.301745635910225e-06, "loss": 0.3938, "step": 42920 }, { "epoch": 10.70573566084788, "grad_norm": 5.401284694671631, "learning_rate": 9.29925187032419e-06, "loss": 0.2982, "step": 42930 }, { "epoch": 10.708229426433915, "grad_norm": 10.709132194519043, "learning_rate": 9.296758104738155e-06, "loss": 0.3338, "step": 42940 }, { "epoch": 10.71072319201995, "grad_norm": 6.343472957611084, "learning_rate": 9.29426433915212e-06, "loss": 0.3251, "step": 42950 }, { "epoch": 10.713216957605985, "grad_norm": 7.1593499183654785, "learning_rate": 9.291770573566086e-06, "loss": 0.332, "step": 42960 }, { "epoch": 10.71571072319202, "grad_norm": 7.830443382263184, "learning_rate": 9.289276807980051e-06, "loss": 0.4038, "step": 42970 }, { "epoch": 10.718204488778055, "grad_norm": 8.076725959777832, "learning_rate": 9.286783042394015e-06, "loss": 0.3343, "step": 42980 }, { "epoch": 10.72069825436409, "grad_norm": 7.131729602813721, "learning_rate": 9.28428927680798e-06, "loss": 0.316, "step": 42990 }, { "epoch": 10.723192019950124, "grad_norm": 6.504873752593994, "learning_rate": 9.281795511221946e-06, "loss": 0.3059, "step": 43000 }, { "epoch": 10.72568578553616, "grad_norm": 6.572418212890625, "learning_rate": 9.279301745635911e-06, "loss": 0.3717, "step": 43010 }, { "epoch": 10.728179551122194, "grad_norm": 7.904941558837891, "learning_rate": 9.276807980049876e-06, "loss": 0.405, "step": 43020 }, { "epoch": 10.730673316708229, "grad_norm": 7.299516201019287, "learning_rate": 9.274314214463842e-06, "loss": 0.3487, "step": 43030 }, { "epoch": 10.733167082294264, "grad_norm": 10.756522178649902, "learning_rate": 9.271820448877807e-06, "loss": 0.3513, "step": 43040 }, { "epoch": 10.735660847880299, "grad_norm": 5.3038010597229, "learning_rate": 9.269326683291772e-06, "loss": 0.2722, "step": 43050 }, { "epoch": 10.738154613466333, "grad_norm": 9.962454795837402, "learning_rate": 9.266832917705736e-06, "loss": 0.3812, "step": 43060 }, { "epoch": 10.740648379052368, "grad_norm": 6.948879718780518, "learning_rate": 9.264339152119701e-06, "loss": 0.3602, "step": 43070 }, { "epoch": 10.743142144638403, "grad_norm": 6.786881923675537, "learning_rate": 9.261845386533666e-06, "loss": 0.4163, "step": 43080 }, { "epoch": 10.745635910224438, "grad_norm": 9.627817153930664, "learning_rate": 9.259351620947632e-06, "loss": 0.3901, "step": 43090 }, { "epoch": 10.748129675810475, "grad_norm": 6.600256443023682, "learning_rate": 9.256857855361597e-06, "loss": 0.324, "step": 43100 }, { "epoch": 10.75062344139651, "grad_norm": 6.746035575866699, "learning_rate": 9.254364089775562e-06, "loss": 0.3914, "step": 43110 }, { "epoch": 10.753117206982544, "grad_norm": 8.608478546142578, "learning_rate": 9.251870324189528e-06, "loss": 0.3469, "step": 43120 }, { "epoch": 10.75561097256858, "grad_norm": 7.662023544311523, "learning_rate": 9.249376558603493e-06, "loss": 0.3648, "step": 43130 }, { "epoch": 10.758104738154614, "grad_norm": 7.992726802825928, "learning_rate": 9.246882793017458e-06, "loss": 0.3541, "step": 43140 }, { "epoch": 10.760598503740649, "grad_norm": 6.671484470367432, "learning_rate": 9.244389027431422e-06, "loss": 0.3287, "step": 43150 }, { "epoch": 10.763092269326684, "grad_norm": 8.085719108581543, "learning_rate": 9.241895261845387e-06, "loss": 0.3401, "step": 43160 }, { "epoch": 10.765586034912719, "grad_norm": 5.610658168792725, "learning_rate": 9.239401496259353e-06, "loss": 0.3296, "step": 43170 }, { "epoch": 10.768079800498754, "grad_norm": 8.575346946716309, "learning_rate": 9.236907730673318e-06, "loss": 0.3825, "step": 43180 }, { "epoch": 10.770573566084789, "grad_norm": 9.167414665222168, "learning_rate": 9.234413965087281e-06, "loss": 0.3747, "step": 43190 }, { "epoch": 10.773067331670823, "grad_norm": 9.802268028259277, "learning_rate": 9.231920199501247e-06, "loss": 0.323, "step": 43200 }, { "epoch": 10.775561097256858, "grad_norm": 5.6160101890563965, "learning_rate": 9.229426433915212e-06, "loss": 0.34, "step": 43210 }, { "epoch": 10.778054862842893, "grad_norm": 6.598016738891602, "learning_rate": 9.226932668329179e-06, "loss": 0.3541, "step": 43220 }, { "epoch": 10.780548628428928, "grad_norm": 7.513404369354248, "learning_rate": 9.224438902743143e-06, "loss": 0.3587, "step": 43230 }, { "epoch": 10.783042394014963, "grad_norm": 8.931615829467773, "learning_rate": 9.221945137157108e-06, "loss": 0.3301, "step": 43240 }, { "epoch": 10.785536159600998, "grad_norm": 8.84310245513916, "learning_rate": 9.219451371571073e-06, "loss": 0.3577, "step": 43250 }, { "epoch": 10.788029925187033, "grad_norm": 6.9853668212890625, "learning_rate": 9.216957605985039e-06, "loss": 0.3472, "step": 43260 }, { "epoch": 10.790523690773068, "grad_norm": 8.114693641662598, "learning_rate": 9.214463840399002e-06, "loss": 0.3509, "step": 43270 }, { "epoch": 10.793017456359102, "grad_norm": 6.722007751464844, "learning_rate": 9.211970074812968e-06, "loss": 0.3194, "step": 43280 }, { "epoch": 10.795511221945137, "grad_norm": 11.021949768066406, "learning_rate": 9.209476309226933e-06, "loss": 0.3875, "step": 43290 }, { "epoch": 10.798004987531172, "grad_norm": 9.12653636932373, "learning_rate": 9.206982543640898e-06, "loss": 0.3549, "step": 43300 }, { "epoch": 10.800498753117207, "grad_norm": 6.999925136566162, "learning_rate": 9.204488778054863e-06, "loss": 0.3565, "step": 43310 }, { "epoch": 10.802992518703242, "grad_norm": 8.389660835266113, "learning_rate": 9.201995012468829e-06, "loss": 0.2577, "step": 43320 }, { "epoch": 10.805486284289277, "grad_norm": 6.072205543518066, "learning_rate": 9.199501246882794e-06, "loss": 0.3147, "step": 43330 }, { "epoch": 10.807980049875312, "grad_norm": 7.625298500061035, "learning_rate": 9.19700748129676e-06, "loss": 0.2968, "step": 43340 }, { "epoch": 10.810473815461346, "grad_norm": 9.655031204223633, "learning_rate": 9.194513715710725e-06, "loss": 0.3788, "step": 43350 }, { "epoch": 10.812967581047381, "grad_norm": 5.24294900894165, "learning_rate": 9.192019950124688e-06, "loss": 0.3467, "step": 43360 }, { "epoch": 10.815461346633416, "grad_norm": 3.961106777191162, "learning_rate": 9.189526184538654e-06, "loss": 0.2739, "step": 43370 }, { "epoch": 10.817955112219451, "grad_norm": 9.00661563873291, "learning_rate": 9.187032418952619e-06, "loss": 0.3829, "step": 43380 }, { "epoch": 10.820448877805486, "grad_norm": 5.253776550292969, "learning_rate": 9.184538653366584e-06, "loss": 0.3532, "step": 43390 }, { "epoch": 10.82294264339152, "grad_norm": 6.8196587562561035, "learning_rate": 9.18204488778055e-06, "loss": 0.3576, "step": 43400 }, { "epoch": 10.825436408977556, "grad_norm": 6.497467517852783, "learning_rate": 9.179551122194515e-06, "loss": 0.3487, "step": 43410 }, { "epoch": 10.82793017456359, "grad_norm": 7.977984428405762, "learning_rate": 9.17705735660848e-06, "loss": 0.3008, "step": 43420 }, { "epoch": 10.830423940149625, "grad_norm": 5.911102771759033, "learning_rate": 9.174563591022445e-06, "loss": 0.4166, "step": 43430 }, { "epoch": 10.83291770573566, "grad_norm": 5.653079032897949, "learning_rate": 9.172069825436409e-06, "loss": 0.3284, "step": 43440 }, { "epoch": 10.835411471321695, "grad_norm": 5.5909199714660645, "learning_rate": 9.169576059850374e-06, "loss": 0.3524, "step": 43450 }, { "epoch": 10.83790523690773, "grad_norm": 5.932044982910156, "learning_rate": 9.16708229426434e-06, "loss": 0.371, "step": 43460 }, { "epoch": 10.840399002493765, "grad_norm": 8.470308303833008, "learning_rate": 9.164588528678305e-06, "loss": 0.3592, "step": 43470 }, { "epoch": 10.8428927680798, "grad_norm": 5.490266799926758, "learning_rate": 9.16209476309227e-06, "loss": 0.3547, "step": 43480 }, { "epoch": 10.845386533665835, "grad_norm": 7.31662654876709, "learning_rate": 9.159600997506236e-06, "loss": 0.3529, "step": 43490 }, { "epoch": 10.84788029925187, "grad_norm": 9.396953582763672, "learning_rate": 9.157107231920201e-06, "loss": 0.3525, "step": 43500 }, { "epoch": 10.850374064837904, "grad_norm": 8.587018013000488, "learning_rate": 9.154613466334166e-06, "loss": 0.3494, "step": 43510 }, { "epoch": 10.85286783042394, "grad_norm": 5.789772033691406, "learning_rate": 9.15211970074813e-06, "loss": 0.3412, "step": 43520 }, { "epoch": 10.855361596009976, "grad_norm": 8.550657272338867, "learning_rate": 9.149625935162095e-06, "loss": 0.3953, "step": 43530 }, { "epoch": 10.85785536159601, "grad_norm": 7.927114963531494, "learning_rate": 9.14713216957606e-06, "loss": 0.3944, "step": 43540 }, { "epoch": 10.860349127182046, "grad_norm": 5.831830978393555, "learning_rate": 9.144638403990026e-06, "loss": 0.3161, "step": 43550 }, { "epoch": 10.86284289276808, "grad_norm": 7.033876419067383, "learning_rate": 9.14214463840399e-06, "loss": 0.3668, "step": 43560 }, { "epoch": 10.865336658354115, "grad_norm": 5.114386081695557, "learning_rate": 9.139650872817956e-06, "loss": 0.3579, "step": 43570 }, { "epoch": 10.86783042394015, "grad_norm": 5.873979568481445, "learning_rate": 9.137157107231922e-06, "loss": 0.3652, "step": 43580 }, { "epoch": 10.870324189526185, "grad_norm": 7.800948619842529, "learning_rate": 9.134663341645887e-06, "loss": 0.3274, "step": 43590 }, { "epoch": 10.87281795511222, "grad_norm": 7.095271587371826, "learning_rate": 9.132169576059852e-06, "loss": 0.3438, "step": 43600 }, { "epoch": 10.875311720698255, "grad_norm": 19.4151668548584, "learning_rate": 9.129675810473816e-06, "loss": 0.3501, "step": 43610 }, { "epoch": 10.87780548628429, "grad_norm": 7.045140743255615, "learning_rate": 9.127182044887781e-06, "loss": 0.3194, "step": 43620 }, { "epoch": 10.880299251870325, "grad_norm": 7.247674465179443, "learning_rate": 9.124688279301747e-06, "loss": 0.3335, "step": 43630 }, { "epoch": 10.88279301745636, "grad_norm": 7.939310550689697, "learning_rate": 9.122194513715712e-06, "loss": 0.3741, "step": 43640 }, { "epoch": 10.885286783042394, "grad_norm": 7.598067283630371, "learning_rate": 9.119700748129676e-06, "loss": 0.3175, "step": 43650 }, { "epoch": 10.88778054862843, "grad_norm": 6.730564594268799, "learning_rate": 9.11720698254364e-06, "loss": 0.3614, "step": 43660 }, { "epoch": 10.890274314214464, "grad_norm": 8.91694450378418, "learning_rate": 9.114713216957606e-06, "loss": 0.4158, "step": 43670 }, { "epoch": 10.892768079800499, "grad_norm": 11.187231063842773, "learning_rate": 9.112219451371571e-06, "loss": 0.3443, "step": 43680 }, { "epoch": 10.895261845386534, "grad_norm": 4.5806450843811035, "learning_rate": 9.109725685785537e-06, "loss": 0.3801, "step": 43690 }, { "epoch": 10.897755610972569, "grad_norm": 8.682236671447754, "learning_rate": 9.107231920199502e-06, "loss": 0.3462, "step": 43700 }, { "epoch": 10.900249376558603, "grad_norm": 6.818325042724609, "learning_rate": 9.104738154613467e-06, "loss": 0.3968, "step": 43710 }, { "epoch": 10.902743142144638, "grad_norm": 6.7168426513671875, "learning_rate": 9.102244389027433e-06, "loss": 0.5048, "step": 43720 }, { "epoch": 10.905236907730673, "grad_norm": 7.661280155181885, "learning_rate": 9.099750623441396e-06, "loss": 0.3603, "step": 43730 }, { "epoch": 10.907730673316708, "grad_norm": 11.238551139831543, "learning_rate": 9.097256857855362e-06, "loss": 0.3616, "step": 43740 }, { "epoch": 10.910224438902743, "grad_norm": 8.43984317779541, "learning_rate": 9.094763092269327e-06, "loss": 0.3914, "step": 43750 }, { "epoch": 10.912718204488778, "grad_norm": 6.167079925537109, "learning_rate": 9.092269326683292e-06, "loss": 0.3538, "step": 43760 }, { "epoch": 10.915211970074813, "grad_norm": 5.880906581878662, "learning_rate": 9.089775561097258e-06, "loss": 0.3578, "step": 43770 }, { "epoch": 10.917705735660848, "grad_norm": 11.39787483215332, "learning_rate": 9.087281795511223e-06, "loss": 0.3781, "step": 43780 }, { "epoch": 10.920199501246882, "grad_norm": 11.926407814025879, "learning_rate": 9.084788029925188e-06, "loss": 0.3413, "step": 43790 }, { "epoch": 10.922693266832917, "grad_norm": 9.85400390625, "learning_rate": 9.082294264339153e-06, "loss": 0.4972, "step": 43800 }, { "epoch": 10.925187032418952, "grad_norm": 8.283854484558105, "learning_rate": 9.079800498753117e-06, "loss": 0.3979, "step": 43810 }, { "epoch": 10.927680798004987, "grad_norm": 9.599810600280762, "learning_rate": 9.077306733167082e-06, "loss": 0.3405, "step": 43820 }, { "epoch": 10.930174563591022, "grad_norm": 8.065226554870605, "learning_rate": 9.074812967581048e-06, "loss": 0.4098, "step": 43830 }, { "epoch": 10.932668329177057, "grad_norm": 6.116038799285889, "learning_rate": 9.072319201995013e-06, "loss": 0.3745, "step": 43840 }, { "epoch": 10.935162094763092, "grad_norm": 8.363578796386719, "learning_rate": 9.069825436408978e-06, "loss": 0.3899, "step": 43850 }, { "epoch": 10.937655860349127, "grad_norm": 5.87863302230835, "learning_rate": 9.067331670822944e-06, "loss": 0.3468, "step": 43860 }, { "epoch": 10.940149625935161, "grad_norm": 7.009591102600098, "learning_rate": 9.064837905236909e-06, "loss": 0.3486, "step": 43870 }, { "epoch": 10.942643391521196, "grad_norm": 8.262612342834473, "learning_rate": 9.062344139650874e-06, "loss": 0.3601, "step": 43880 }, { "epoch": 10.945137157107231, "grad_norm": 11.304754257202148, "learning_rate": 9.05985037406484e-06, "loss": 0.3933, "step": 43890 }, { "epoch": 10.947630922693268, "grad_norm": 7.655803680419922, "learning_rate": 9.057356608478803e-06, "loss": 0.3177, "step": 43900 }, { "epoch": 10.950124688279303, "grad_norm": 7.3225226402282715, "learning_rate": 9.054862842892768e-06, "loss": 0.3793, "step": 43910 }, { "epoch": 10.952618453865338, "grad_norm": 11.696869850158691, "learning_rate": 9.052369077306734e-06, "loss": 0.3915, "step": 43920 }, { "epoch": 10.955112219451372, "grad_norm": 7.450771331787109, "learning_rate": 9.049875311720699e-06, "loss": 0.3423, "step": 43930 }, { "epoch": 10.957605985037407, "grad_norm": 6.9573655128479, "learning_rate": 9.047381546134664e-06, "loss": 0.3169, "step": 43940 }, { "epoch": 10.960099750623442, "grad_norm": 6.270671844482422, "learning_rate": 9.04488778054863e-06, "loss": 0.3519, "step": 43950 }, { "epoch": 10.962593516209477, "grad_norm": 8.106279373168945, "learning_rate": 9.042394014962595e-06, "loss": 0.3458, "step": 43960 }, { "epoch": 10.965087281795512, "grad_norm": 2.7840754985809326, "learning_rate": 9.03990024937656e-06, "loss": 0.3244, "step": 43970 }, { "epoch": 10.967581047381547, "grad_norm": 6.383938789367676, "learning_rate": 9.037406483790524e-06, "loss": 0.3242, "step": 43980 }, { "epoch": 10.970074812967582, "grad_norm": 8.362710952758789, "learning_rate": 9.03491271820449e-06, "loss": 0.3966, "step": 43990 }, { "epoch": 10.972568578553616, "grad_norm": 8.788472175598145, "learning_rate": 9.032418952618455e-06, "loss": 0.3279, "step": 44000 }, { "epoch": 10.975062344139651, "grad_norm": 7.121370792388916, "learning_rate": 9.02992518703242e-06, "loss": 0.3171, "step": 44010 }, { "epoch": 10.977556109725686, "grad_norm": 8.656166076660156, "learning_rate": 9.027431421446384e-06, "loss": 0.3256, "step": 44020 }, { "epoch": 10.980049875311721, "grad_norm": 7.024209022521973, "learning_rate": 9.024937655860349e-06, "loss": 0.394, "step": 44030 }, { "epoch": 10.982543640897756, "grad_norm": 8.922717094421387, "learning_rate": 9.022443890274316e-06, "loss": 0.32, "step": 44040 }, { "epoch": 10.98503740648379, "grad_norm": 7.146017074584961, "learning_rate": 9.019950124688281e-06, "loss": 0.3368, "step": 44050 }, { "epoch": 10.987531172069826, "grad_norm": 6.3751749992370605, "learning_rate": 9.017456359102245e-06, "loss": 0.3405, "step": 44060 }, { "epoch": 10.99002493765586, "grad_norm": 4.537290096282959, "learning_rate": 9.01496259351621e-06, "loss": 0.2856, "step": 44070 }, { "epoch": 10.992518703241895, "grad_norm": 9.014129638671875, "learning_rate": 9.012468827930175e-06, "loss": 0.3719, "step": 44080 }, { "epoch": 10.99501246882793, "grad_norm": 7.991206645965576, "learning_rate": 9.00997506234414e-06, "loss": 0.3403, "step": 44090 }, { "epoch": 10.997506234413965, "grad_norm": 6.865902900695801, "learning_rate": 9.007481296758106e-06, "loss": 0.3167, "step": 44100 }, { "epoch": 11.0, "grad_norm": 8.729063034057617, "learning_rate": 9.00498753117207e-06, "loss": 0.3674, "step": 44110 }, { "epoch": 11.0, "eval_loss": 0.4132729470729828, "eval_runtime": 59.9748, "eval_samples_per_second": 16.724, "eval_steps_per_second": 16.724, "step": 44110 }, { "epoch": 11.002493765586035, "grad_norm": 8.256260871887207, "learning_rate": 9.002493765586035e-06, "loss": 0.3417, "step": 44120 }, { "epoch": 11.00498753117207, "grad_norm": 8.294591903686523, "learning_rate": 9e-06, "loss": 0.3305, "step": 44130 }, { "epoch": 11.007481296758105, "grad_norm": 7.374525547027588, "learning_rate": 8.997506234413966e-06, "loss": 0.3561, "step": 44140 }, { "epoch": 11.00997506234414, "grad_norm": 7.575953006744385, "learning_rate": 8.995012468827931e-06, "loss": 0.3326, "step": 44150 }, { "epoch": 11.012468827930174, "grad_norm": 8.2825345993042, "learning_rate": 8.992518703241896e-06, "loss": 0.3432, "step": 44160 }, { "epoch": 11.01496259351621, "grad_norm": 6.32546854019165, "learning_rate": 8.990024937655861e-06, "loss": 0.345, "step": 44170 }, { "epoch": 11.017456359102244, "grad_norm": 8.19118595123291, "learning_rate": 8.987531172069827e-06, "loss": 0.3045, "step": 44180 }, { "epoch": 11.019950124688279, "grad_norm": 6.752303600311279, "learning_rate": 8.98503740648379e-06, "loss": 0.3918, "step": 44190 }, { "epoch": 11.022443890274314, "grad_norm": 5.932526588439941, "learning_rate": 8.982543640897756e-06, "loss": 0.2983, "step": 44200 }, { "epoch": 11.024937655860349, "grad_norm": 10.784844398498535, "learning_rate": 8.980049875311721e-06, "loss": 0.328, "step": 44210 }, { "epoch": 11.027431421446384, "grad_norm": 6.154462814331055, "learning_rate": 8.977556109725686e-06, "loss": 0.3924, "step": 44220 }, { "epoch": 11.029925187032418, "grad_norm": 6.966713905334473, "learning_rate": 8.975062344139652e-06, "loss": 0.3309, "step": 44230 }, { "epoch": 11.032418952618453, "grad_norm": 4.729964256286621, "learning_rate": 8.972568578553617e-06, "loss": 0.3863, "step": 44240 }, { "epoch": 11.034912718204488, "grad_norm": 7.567785739898682, "learning_rate": 8.970074812967582e-06, "loss": 0.305, "step": 44250 }, { "epoch": 11.037406483790523, "grad_norm": 7.73862886428833, "learning_rate": 8.967581047381548e-06, "loss": 0.4145, "step": 44260 }, { "epoch": 11.039900249376558, "grad_norm": 6.4053168296813965, "learning_rate": 8.965087281795511e-06, "loss": 0.3216, "step": 44270 }, { "epoch": 11.042394014962593, "grad_norm": 4.572265148162842, "learning_rate": 8.962593516209476e-06, "loss": 0.3534, "step": 44280 }, { "epoch": 11.044887780548628, "grad_norm": 5.619957447052002, "learning_rate": 8.960099750623442e-06, "loss": 0.3327, "step": 44290 }, { "epoch": 11.047381546134662, "grad_norm": 13.95361042022705, "learning_rate": 8.957605985037407e-06, "loss": 0.3672, "step": 44300 }, { "epoch": 11.049875311720697, "grad_norm": 5.5569963455200195, "learning_rate": 8.955112219451372e-06, "loss": 0.4164, "step": 44310 }, { "epoch": 11.052369077306734, "grad_norm": 5.730642318725586, "learning_rate": 8.952618453865338e-06, "loss": 0.2852, "step": 44320 }, { "epoch": 11.054862842892769, "grad_norm": 10.969292640686035, "learning_rate": 8.950124688279303e-06, "loss": 0.3387, "step": 44330 }, { "epoch": 11.057356608478804, "grad_norm": 6.42288064956665, "learning_rate": 8.947630922693268e-06, "loss": 0.3338, "step": 44340 }, { "epoch": 11.059850374064839, "grad_norm": 7.737945556640625, "learning_rate": 8.945137157107234e-06, "loss": 0.3709, "step": 44350 }, { "epoch": 11.062344139650873, "grad_norm": 8.051152229309082, "learning_rate": 8.942643391521197e-06, "loss": 0.356, "step": 44360 }, { "epoch": 11.064837905236908, "grad_norm": 6.058350563049316, "learning_rate": 8.940149625935163e-06, "loss": 0.2939, "step": 44370 }, { "epoch": 11.067331670822943, "grad_norm": 7.329736232757568, "learning_rate": 8.937655860349128e-06, "loss": 0.3819, "step": 44380 }, { "epoch": 11.069825436408978, "grad_norm": 7.439964771270752, "learning_rate": 8.935162094763093e-06, "loss": 0.325, "step": 44390 }, { "epoch": 11.072319201995013, "grad_norm": 7.003705978393555, "learning_rate": 8.932668329177059e-06, "loss": 0.3168, "step": 44400 }, { "epoch": 11.074812967581048, "grad_norm": 5.3289570808410645, "learning_rate": 8.930423940149627e-06, "loss": 0.3069, "step": 44410 }, { "epoch": 11.077306733167083, "grad_norm": 6.427891731262207, "learning_rate": 8.927930174563592e-06, "loss": 0.3028, "step": 44420 }, { "epoch": 11.079800498753118, "grad_norm": 7.827418327331543, "learning_rate": 8.925436408977557e-06, "loss": 0.3543, "step": 44430 }, { "epoch": 11.082294264339152, "grad_norm": 9.805115699768066, "learning_rate": 8.922942643391523e-06, "loss": 0.3216, "step": 44440 }, { "epoch": 11.084788029925187, "grad_norm": 6.640481948852539, "learning_rate": 8.920448877805486e-06, "loss": 0.372, "step": 44450 }, { "epoch": 11.087281795511222, "grad_norm": 7.233997821807861, "learning_rate": 8.917955112219452e-06, "loss": 0.3521, "step": 44460 }, { "epoch": 11.089775561097257, "grad_norm": 7.673696517944336, "learning_rate": 8.915461346633417e-06, "loss": 0.3184, "step": 44470 }, { "epoch": 11.092269326683292, "grad_norm": 7.302619934082031, "learning_rate": 8.912967581047382e-06, "loss": 0.3365, "step": 44480 }, { "epoch": 11.094763092269327, "grad_norm": 9.098041534423828, "learning_rate": 8.910473815461348e-06, "loss": 0.3513, "step": 44490 }, { "epoch": 11.097256857855362, "grad_norm": 6.597679615020752, "learning_rate": 8.907980049875313e-06, "loss": 0.3133, "step": 44500 }, { "epoch": 11.099750623441397, "grad_norm": 6.860264301300049, "learning_rate": 8.905486284289278e-06, "loss": 0.4096, "step": 44510 }, { "epoch": 11.102244389027431, "grad_norm": 4.556776523590088, "learning_rate": 8.902992518703243e-06, "loss": 0.3389, "step": 44520 }, { "epoch": 11.104738154613466, "grad_norm": 9.30516529083252, "learning_rate": 8.900498753117209e-06, "loss": 0.3051, "step": 44530 }, { "epoch": 11.107231920199501, "grad_norm": 10.449723243713379, "learning_rate": 8.898004987531172e-06, "loss": 0.3412, "step": 44540 }, { "epoch": 11.109725685785536, "grad_norm": 7.47760009765625, "learning_rate": 8.895511221945138e-06, "loss": 0.3901, "step": 44550 }, { "epoch": 11.11221945137157, "grad_norm": 12.093515396118164, "learning_rate": 8.893017456359103e-06, "loss": 0.3252, "step": 44560 }, { "epoch": 11.114713216957606, "grad_norm": 9.427157402038574, "learning_rate": 8.890523690773068e-06, "loss": 0.3101, "step": 44570 }, { "epoch": 11.11720698254364, "grad_norm": 10.620285034179688, "learning_rate": 8.888029925187032e-06, "loss": 0.3178, "step": 44580 }, { "epoch": 11.119700748129675, "grad_norm": 7.5438079833984375, "learning_rate": 8.885536159600999e-06, "loss": 0.3176, "step": 44590 }, { "epoch": 11.12219451371571, "grad_norm": 6.367938041687012, "learning_rate": 8.883042394014964e-06, "loss": 0.3195, "step": 44600 }, { "epoch": 11.124688279301745, "grad_norm": 16.17655372619629, "learning_rate": 8.88054862842893e-06, "loss": 0.3953, "step": 44610 }, { "epoch": 11.12718204488778, "grad_norm": 10.65559196472168, "learning_rate": 8.878054862842893e-06, "loss": 0.3251, "step": 44620 }, { "epoch": 11.129675810473815, "grad_norm": 6.366180419921875, "learning_rate": 8.875561097256859e-06, "loss": 0.3519, "step": 44630 }, { "epoch": 11.13216957605985, "grad_norm": 8.204825401306152, "learning_rate": 8.873067331670824e-06, "loss": 0.3497, "step": 44640 }, { "epoch": 11.134663341645885, "grad_norm": 10.065892219543457, "learning_rate": 8.870573566084789e-06, "loss": 0.2901, "step": 44650 }, { "epoch": 11.13715710723192, "grad_norm": 7.356030464172363, "learning_rate": 8.868079800498753e-06, "loss": 0.3841, "step": 44660 }, { "epoch": 11.139650872817954, "grad_norm": 7.364969730377197, "learning_rate": 8.865586034912718e-06, "loss": 0.3247, "step": 44670 }, { "epoch": 11.14214463840399, "grad_norm": 7.613313674926758, "learning_rate": 8.863092269326683e-06, "loss": 0.3508, "step": 44680 }, { "epoch": 11.144638403990024, "grad_norm": 8.078596115112305, "learning_rate": 8.860598503740649e-06, "loss": 0.3305, "step": 44690 }, { "epoch": 11.147132169576059, "grad_norm": 10.208447456359863, "learning_rate": 8.858104738154614e-06, "loss": 0.3537, "step": 44700 }, { "epoch": 11.149625935162096, "grad_norm": 8.215570449829102, "learning_rate": 8.85561097256858e-06, "loss": 0.3275, "step": 44710 }, { "epoch": 11.15211970074813, "grad_norm": 5.312885284423828, "learning_rate": 8.853117206982545e-06, "loss": 0.3202, "step": 44720 }, { "epoch": 11.154613466334165, "grad_norm": 7.666956901550293, "learning_rate": 8.85062344139651e-06, "loss": 0.3238, "step": 44730 }, { "epoch": 11.1571072319202, "grad_norm": 7.116825103759766, "learning_rate": 8.848129675810474e-06, "loss": 0.3231, "step": 44740 }, { "epoch": 11.159600997506235, "grad_norm": 6.489451885223389, "learning_rate": 8.845635910224439e-06, "loss": 0.3411, "step": 44750 }, { "epoch": 11.16209476309227, "grad_norm": 6.022216320037842, "learning_rate": 8.843142144638404e-06, "loss": 0.4006, "step": 44760 }, { "epoch": 11.164588528678305, "grad_norm": 8.421282768249512, "learning_rate": 8.84064837905237e-06, "loss": 0.4011, "step": 44770 }, { "epoch": 11.16708229426434, "grad_norm": 7.724409580230713, "learning_rate": 8.838154613466335e-06, "loss": 0.325, "step": 44780 }, { "epoch": 11.169576059850375, "grad_norm": 8.80372142791748, "learning_rate": 8.8356608478803e-06, "loss": 0.3197, "step": 44790 }, { "epoch": 11.17206982543641, "grad_norm": 10.359016418457031, "learning_rate": 8.833167082294265e-06, "loss": 0.3586, "step": 44800 }, { "epoch": 11.174563591022444, "grad_norm": 8.572481155395508, "learning_rate": 8.83067331670823e-06, "loss": 0.3292, "step": 44810 }, { "epoch": 11.17705735660848, "grad_norm": 13.053606033325195, "learning_rate": 8.828179551122196e-06, "loss": 0.3645, "step": 44820 }, { "epoch": 11.179551122194514, "grad_norm": 10.115473747253418, "learning_rate": 8.82568578553616e-06, "loss": 0.2904, "step": 44830 }, { "epoch": 11.182044887780549, "grad_norm": 8.798450469970703, "learning_rate": 8.823192019950125e-06, "loss": 0.334, "step": 44840 }, { "epoch": 11.184538653366584, "grad_norm": 8.745047569274902, "learning_rate": 8.82069825436409e-06, "loss": 0.3467, "step": 44850 }, { "epoch": 11.187032418952619, "grad_norm": 6.6508941650390625, "learning_rate": 8.818204488778056e-06, "loss": 0.4056, "step": 44860 }, { "epoch": 11.189526184538654, "grad_norm": 7.657419204711914, "learning_rate": 8.815710723192021e-06, "loss": 0.3708, "step": 44870 }, { "epoch": 11.192019950124688, "grad_norm": 7.383201599121094, "learning_rate": 8.813216957605986e-06, "loss": 0.3759, "step": 44880 }, { "epoch": 11.194513715710723, "grad_norm": 5.854804992675781, "learning_rate": 8.810723192019951e-06, "loss": 0.3151, "step": 44890 }, { "epoch": 11.197007481296758, "grad_norm": 6.18996000289917, "learning_rate": 8.808229426433917e-06, "loss": 0.3223, "step": 44900 }, { "epoch": 11.199501246882793, "grad_norm": 7.372582912445068, "learning_rate": 8.80573566084788e-06, "loss": 0.3174, "step": 44910 }, { "epoch": 11.201995012468828, "grad_norm": 4.5158233642578125, "learning_rate": 8.803241895261846e-06, "loss": 0.2967, "step": 44920 }, { "epoch": 11.204488778054863, "grad_norm": 8.73290729522705, "learning_rate": 8.800748129675811e-06, "loss": 0.3015, "step": 44930 }, { "epoch": 11.206982543640898, "grad_norm": 12.908029556274414, "learning_rate": 8.798254364089776e-06, "loss": 0.351, "step": 44940 }, { "epoch": 11.209476309226932, "grad_norm": 6.993527889251709, "learning_rate": 8.795760598503742e-06, "loss": 0.3274, "step": 44950 }, { "epoch": 11.211970074812967, "grad_norm": 8.653640747070312, "learning_rate": 8.793266832917707e-06, "loss": 0.3817, "step": 44960 }, { "epoch": 11.214463840399002, "grad_norm": 4.629430770874023, "learning_rate": 8.790773067331672e-06, "loss": 0.3772, "step": 44970 }, { "epoch": 11.216957605985037, "grad_norm": 7.76257848739624, "learning_rate": 8.788279301745638e-06, "loss": 0.3459, "step": 44980 }, { "epoch": 11.219451371571072, "grad_norm": 8.239355087280273, "learning_rate": 8.785785536159601e-06, "loss": 0.3039, "step": 44990 }, { "epoch": 11.221945137157107, "grad_norm": 8.355988502502441, "learning_rate": 8.783291770573566e-06, "loss": 0.363, "step": 45000 }, { "epoch": 11.224438902743142, "grad_norm": 7.481101989746094, "learning_rate": 8.780798004987532e-06, "loss": 0.3703, "step": 45010 }, { "epoch": 11.226932668329177, "grad_norm": 4.93237829208374, "learning_rate": 8.778304239401497e-06, "loss": 0.3426, "step": 45020 }, { "epoch": 11.229426433915211, "grad_norm": 5.618884563446045, "learning_rate": 8.775810473815462e-06, "loss": 0.3202, "step": 45030 }, { "epoch": 11.231920199501246, "grad_norm": 12.628522872924805, "learning_rate": 8.773316708229426e-06, "loss": 0.36, "step": 45040 }, { "epoch": 11.234413965087281, "grad_norm": 7.585283279418945, "learning_rate": 8.770822942643391e-06, "loss": 0.3422, "step": 45050 }, { "epoch": 11.236907730673316, "grad_norm": 7.396781921386719, "learning_rate": 8.768329177057358e-06, "loss": 0.3276, "step": 45060 }, { "epoch": 11.239401496259351, "grad_norm": 13.765165328979492, "learning_rate": 8.765835411471324e-06, "loss": 0.3655, "step": 45070 }, { "epoch": 11.241895261845386, "grad_norm": 7.234029769897461, "learning_rate": 8.763341645885287e-06, "loss": 0.3718, "step": 45080 }, { "epoch": 11.24438902743142, "grad_norm": 4.461481094360352, "learning_rate": 8.760847880299253e-06, "loss": 0.3412, "step": 45090 }, { "epoch": 11.246882793017456, "grad_norm": 8.242216110229492, "learning_rate": 8.758354114713218e-06, "loss": 0.3455, "step": 45100 }, { "epoch": 11.24937655860349, "grad_norm": 10.74194622039795, "learning_rate": 8.755860349127183e-06, "loss": 0.3511, "step": 45110 }, { "epoch": 11.251870324189527, "grad_norm": 9.051909446716309, "learning_rate": 8.753366583541147e-06, "loss": 0.3333, "step": 45120 }, { "epoch": 11.254364089775562, "grad_norm": 9.297087669372559, "learning_rate": 8.750872817955112e-06, "loss": 0.3345, "step": 45130 }, { "epoch": 11.256857855361597, "grad_norm": 7.317951679229736, "learning_rate": 8.748379052369077e-06, "loss": 0.3696, "step": 45140 }, { "epoch": 11.259351620947632, "grad_norm": 6.456167221069336, "learning_rate": 8.745885286783043e-06, "loss": 0.2631, "step": 45150 }, { "epoch": 11.261845386533667, "grad_norm": 9.548169136047363, "learning_rate": 8.743391521197008e-06, "loss": 0.3358, "step": 45160 }, { "epoch": 11.264339152119701, "grad_norm": 4.220322608947754, "learning_rate": 8.740897755610973e-06, "loss": 0.309, "step": 45170 }, { "epoch": 11.266832917705736, "grad_norm": 7.340099334716797, "learning_rate": 8.738403990024939e-06, "loss": 0.2948, "step": 45180 }, { "epoch": 11.269326683291771, "grad_norm": 6.146076202392578, "learning_rate": 8.735910224438904e-06, "loss": 0.3575, "step": 45190 }, { "epoch": 11.271820448877806, "grad_norm": 8.388291358947754, "learning_rate": 8.733416458852868e-06, "loss": 0.3306, "step": 45200 }, { "epoch": 11.27431421446384, "grad_norm": 12.431069374084473, "learning_rate": 8.730922693266833e-06, "loss": 0.3423, "step": 45210 }, { "epoch": 11.276807980049876, "grad_norm": 7.34416389465332, "learning_rate": 8.728428927680798e-06, "loss": 0.3473, "step": 45220 }, { "epoch": 11.27930174563591, "grad_norm": 4.615060806274414, "learning_rate": 8.725935162094764e-06, "loss": 0.3393, "step": 45230 }, { "epoch": 11.281795511221945, "grad_norm": 7.668536186218262, "learning_rate": 8.723441396508729e-06, "loss": 0.3442, "step": 45240 }, { "epoch": 11.28428927680798, "grad_norm": 8.232738494873047, "learning_rate": 8.720947630922694e-06, "loss": 0.334, "step": 45250 }, { "epoch": 11.286783042394015, "grad_norm": 9.045811653137207, "learning_rate": 8.71845386533666e-06, "loss": 0.428, "step": 45260 }, { "epoch": 11.28927680798005, "grad_norm": 7.1297526359558105, "learning_rate": 8.715960099750625e-06, "loss": 0.3156, "step": 45270 }, { "epoch": 11.291770573566085, "grad_norm": 8.763090133666992, "learning_rate": 8.71346633416459e-06, "loss": 0.3447, "step": 45280 }, { "epoch": 11.29426433915212, "grad_norm": 8.615846633911133, "learning_rate": 8.710972568578554e-06, "loss": 0.3308, "step": 45290 }, { "epoch": 11.296758104738155, "grad_norm": 10.320062637329102, "learning_rate": 8.708478802992519e-06, "loss": 0.3569, "step": 45300 }, { "epoch": 11.29925187032419, "grad_norm": 7.310534954071045, "learning_rate": 8.705985037406484e-06, "loss": 0.362, "step": 45310 }, { "epoch": 11.301745635910224, "grad_norm": 6.569928169250488, "learning_rate": 8.70349127182045e-06, "loss": 0.2893, "step": 45320 }, { "epoch": 11.30423940149626, "grad_norm": 6.204464435577393, "learning_rate": 8.700997506234415e-06, "loss": 0.3239, "step": 45330 }, { "epoch": 11.306733167082294, "grad_norm": 6.959688663482666, "learning_rate": 8.69850374064838e-06, "loss": 0.3394, "step": 45340 }, { "epoch": 11.309226932668329, "grad_norm": 8.13005256652832, "learning_rate": 8.696009975062346e-06, "loss": 0.3523, "step": 45350 }, { "epoch": 11.311720698254364, "grad_norm": 5.307433128356934, "learning_rate": 8.693516209476311e-06, "loss": 0.3485, "step": 45360 }, { "epoch": 11.314214463840399, "grad_norm": 7.195748329162598, "learning_rate": 8.691022443890274e-06, "loss": 0.3992, "step": 45370 }, { "epoch": 11.316708229426434, "grad_norm": 8.693116188049316, "learning_rate": 8.68852867830424e-06, "loss": 0.3582, "step": 45380 }, { "epoch": 11.319201995012468, "grad_norm": 7.39258337020874, "learning_rate": 8.686034912718205e-06, "loss": 0.33, "step": 45390 }, { "epoch": 11.321695760598503, "grad_norm": 7.933567047119141, "learning_rate": 8.68354114713217e-06, "loss": 0.3464, "step": 45400 }, { "epoch": 11.324189526184538, "grad_norm": 8.168654441833496, "learning_rate": 8.681047381546136e-06, "loss": 0.3156, "step": 45410 }, { "epoch": 11.326683291770573, "grad_norm": 10.021574974060059, "learning_rate": 8.678553615960101e-06, "loss": 0.3644, "step": 45420 }, { "epoch": 11.329177057356608, "grad_norm": 7.775262832641602, "learning_rate": 8.676059850374066e-06, "loss": 0.3631, "step": 45430 }, { "epoch": 11.331670822942643, "grad_norm": 8.023462295532227, "learning_rate": 8.673566084788032e-06, "loss": 0.3567, "step": 45440 }, { "epoch": 11.334164588528678, "grad_norm": 10.410110473632812, "learning_rate": 8.671072319201995e-06, "loss": 0.3938, "step": 45450 }, { "epoch": 11.336658354114713, "grad_norm": 10.069509506225586, "learning_rate": 8.66857855361596e-06, "loss": 0.4094, "step": 45460 }, { "epoch": 11.339152119700747, "grad_norm": 5.57853364944458, "learning_rate": 8.666084788029926e-06, "loss": 0.3017, "step": 45470 }, { "epoch": 11.341645885286782, "grad_norm": 7.023243427276611, "learning_rate": 8.663591022443891e-06, "loss": 0.3085, "step": 45480 }, { "epoch": 11.344139650872817, "grad_norm": 6.86598014831543, "learning_rate": 8.661097256857855e-06, "loss": 0.3638, "step": 45490 }, { "epoch": 11.346633416458852, "grad_norm": 8.568735122680664, "learning_rate": 8.65860349127182e-06, "loss": 0.3982, "step": 45500 }, { "epoch": 11.349127182044889, "grad_norm": 8.5831880569458, "learning_rate": 8.656109725685785e-06, "loss": 0.3462, "step": 45510 }, { "epoch": 11.351620947630924, "grad_norm": 6.069805145263672, "learning_rate": 8.65361596009975e-06, "loss": 0.2987, "step": 45520 }, { "epoch": 11.354114713216958, "grad_norm": 7.621370315551758, "learning_rate": 8.651122194513718e-06, "loss": 0.3374, "step": 45530 }, { "epoch": 11.356608478802993, "grad_norm": 6.071798324584961, "learning_rate": 8.648628428927681e-06, "loss": 0.3222, "step": 45540 }, { "epoch": 11.359102244389028, "grad_norm": 5.18229341506958, "learning_rate": 8.646134663341647e-06, "loss": 0.3218, "step": 45550 }, { "epoch": 11.361596009975063, "grad_norm": 6.7007293701171875, "learning_rate": 8.643640897755612e-06, "loss": 0.3675, "step": 45560 }, { "epoch": 11.364089775561098, "grad_norm": 5.716126918792725, "learning_rate": 8.641147132169577e-06, "loss": 0.336, "step": 45570 }, { "epoch": 11.366583541147133, "grad_norm": 6.14850378036499, "learning_rate": 8.638653366583541e-06, "loss": 0.3959, "step": 45580 }, { "epoch": 11.369077306733168, "grad_norm": 4.6964898109436035, "learning_rate": 8.636159600997506e-06, "loss": 0.343, "step": 45590 }, { "epoch": 11.371571072319203, "grad_norm": 8.84803581237793, "learning_rate": 8.633665835411472e-06, "loss": 0.332, "step": 45600 }, { "epoch": 11.374064837905237, "grad_norm": 5.964016914367676, "learning_rate": 8.631172069825437e-06, "loss": 0.3045, "step": 45610 }, { "epoch": 11.376558603491272, "grad_norm": 4.455854415893555, "learning_rate": 8.628678304239402e-06, "loss": 0.3272, "step": 45620 }, { "epoch": 11.379052369077307, "grad_norm": 8.803267478942871, "learning_rate": 8.626184538653367e-06, "loss": 0.337, "step": 45630 }, { "epoch": 11.381546134663342, "grad_norm": 9.714362144470215, "learning_rate": 8.623690773067333e-06, "loss": 0.3589, "step": 45640 }, { "epoch": 11.384039900249377, "grad_norm": 4.950045108795166, "learning_rate": 8.621197007481298e-06, "loss": 0.3321, "step": 45650 }, { "epoch": 11.386533665835412, "grad_norm": 8.148637771606445, "learning_rate": 8.618703241895262e-06, "loss": 0.3178, "step": 45660 }, { "epoch": 11.389027431421447, "grad_norm": 21.125547409057617, "learning_rate": 8.616209476309227e-06, "loss": 0.3481, "step": 45670 }, { "epoch": 11.391521197007481, "grad_norm": 7.620448112487793, "learning_rate": 8.613715710723192e-06, "loss": 0.3811, "step": 45680 }, { "epoch": 11.394014962593516, "grad_norm": 7.3992743492126465, "learning_rate": 8.611221945137158e-06, "loss": 0.3509, "step": 45690 }, { "epoch": 11.396508728179551, "grad_norm": 7.22592830657959, "learning_rate": 8.608728179551123e-06, "loss": 0.3888, "step": 45700 }, { "epoch": 11.399002493765586, "grad_norm": 7.094658851623535, "learning_rate": 8.606234413965088e-06, "loss": 0.3421, "step": 45710 }, { "epoch": 11.401496259351621, "grad_norm": 7.960940837860107, "learning_rate": 8.603740648379054e-06, "loss": 0.4286, "step": 45720 }, { "epoch": 11.403990024937656, "grad_norm": 10.1074857711792, "learning_rate": 8.601246882793019e-06, "loss": 0.3917, "step": 45730 }, { "epoch": 11.40648379052369, "grad_norm": 10.943230628967285, "learning_rate": 8.598753117206984e-06, "loss": 0.3824, "step": 45740 }, { "epoch": 11.408977556109726, "grad_norm": 6.1284685134887695, "learning_rate": 8.596259351620948e-06, "loss": 0.2953, "step": 45750 }, { "epoch": 11.41147132169576, "grad_norm": 6.4964776039123535, "learning_rate": 8.593765586034913e-06, "loss": 0.3705, "step": 45760 }, { "epoch": 11.413965087281795, "grad_norm": 7.87790584564209, "learning_rate": 8.591271820448878e-06, "loss": 0.3237, "step": 45770 }, { "epoch": 11.41645885286783, "grad_norm": 7.569821357727051, "learning_rate": 8.588778054862844e-06, "loss": 0.303, "step": 45780 }, { "epoch": 11.418952618453865, "grad_norm": 8.668131828308105, "learning_rate": 8.586284289276809e-06, "loss": 0.2865, "step": 45790 }, { "epoch": 11.4214463840399, "grad_norm": 8.034530639648438, "learning_rate": 8.583790523690774e-06, "loss": 0.3503, "step": 45800 }, { "epoch": 11.423940149625935, "grad_norm": 8.05600357055664, "learning_rate": 8.58129675810474e-06, "loss": 0.3285, "step": 45810 }, { "epoch": 11.42643391521197, "grad_norm": 7.00352668762207, "learning_rate": 8.578802992518705e-06, "loss": 0.3335, "step": 45820 }, { "epoch": 11.428927680798004, "grad_norm": 12.199906349182129, "learning_rate": 8.576309226932669e-06, "loss": 0.4104, "step": 45830 }, { "epoch": 11.43142144638404, "grad_norm": 8.1293306350708, "learning_rate": 8.573815461346634e-06, "loss": 0.3462, "step": 45840 }, { "epoch": 11.433915211970074, "grad_norm": 9.158780097961426, "learning_rate": 8.5713216957606e-06, "loss": 0.3138, "step": 45850 }, { "epoch": 11.436408977556109, "grad_norm": 13.985179901123047, "learning_rate": 8.568827930174564e-06, "loss": 0.3182, "step": 45860 }, { "epoch": 11.438902743142144, "grad_norm": 7.04974889755249, "learning_rate": 8.566334164588528e-06, "loss": 0.3452, "step": 45870 }, { "epoch": 11.441396508728179, "grad_norm": 7.684841632843018, "learning_rate": 8.563840399002495e-06, "loss": 0.3312, "step": 45880 }, { "epoch": 11.443890274314214, "grad_norm": 7.361491680145264, "learning_rate": 8.56134663341646e-06, "loss": 0.3133, "step": 45890 }, { "epoch": 11.446384039900249, "grad_norm": 8.644780158996582, "learning_rate": 8.558852867830426e-06, "loss": 0.3371, "step": 45900 }, { "epoch": 11.448877805486283, "grad_norm": 8.315042495727539, "learning_rate": 8.55635910224439e-06, "loss": 0.3792, "step": 45910 }, { "epoch": 11.451371571072318, "grad_norm": 8.369626998901367, "learning_rate": 8.553865336658355e-06, "loss": 0.3525, "step": 45920 }, { "epoch": 11.453865336658355, "grad_norm": 12.835140228271484, "learning_rate": 8.55137157107232e-06, "loss": 0.3088, "step": 45930 }, { "epoch": 11.45635910224439, "grad_norm": 10.077214241027832, "learning_rate": 8.548877805486285e-06, "loss": 0.3456, "step": 45940 }, { "epoch": 11.458852867830425, "grad_norm": 8.558807373046875, "learning_rate": 8.546384039900249e-06, "loss": 0.2983, "step": 45950 }, { "epoch": 11.46134663341646, "grad_norm": 10.858403205871582, "learning_rate": 8.543890274314214e-06, "loss": 0.3396, "step": 45960 }, { "epoch": 11.463840399002494, "grad_norm": 6.8004469871521, "learning_rate": 8.54139650872818e-06, "loss": 0.3272, "step": 45970 }, { "epoch": 11.46633416458853, "grad_norm": 6.694581985473633, "learning_rate": 8.538902743142145e-06, "loss": 0.3775, "step": 45980 }, { "epoch": 11.468827930174564, "grad_norm": 8.675992012023926, "learning_rate": 8.53640897755611e-06, "loss": 0.3789, "step": 45990 }, { "epoch": 11.471321695760599, "grad_norm": 6.141960620880127, "learning_rate": 8.533915211970075e-06, "loss": 0.3364, "step": 46000 }, { "epoch": 11.473815461346634, "grad_norm": 14.198030471801758, "learning_rate": 8.53142144638404e-06, "loss": 0.3398, "step": 46010 }, { "epoch": 11.476309226932669, "grad_norm": 6.926119327545166, "learning_rate": 8.528927680798006e-06, "loss": 0.2966, "step": 46020 }, { "epoch": 11.478802992518704, "grad_norm": 5.021880626678467, "learning_rate": 8.526433915211971e-06, "loss": 0.3197, "step": 46030 }, { "epoch": 11.481296758104738, "grad_norm": 6.237401008605957, "learning_rate": 8.523940149625935e-06, "loss": 0.3225, "step": 46040 }, { "epoch": 11.483790523690773, "grad_norm": 11.097943305969238, "learning_rate": 8.5214463840399e-06, "loss": 0.4856, "step": 46050 }, { "epoch": 11.486284289276808, "grad_norm": 8.584773063659668, "learning_rate": 8.518952618453866e-06, "loss": 0.3588, "step": 46060 }, { "epoch": 11.488778054862843, "grad_norm": 5.824810981750488, "learning_rate": 8.516458852867831e-06, "loss": 0.2905, "step": 46070 }, { "epoch": 11.491271820448878, "grad_norm": 8.207144737243652, "learning_rate": 8.513965087281796e-06, "loss": 0.3239, "step": 46080 }, { "epoch": 11.493765586034913, "grad_norm": 8.661499977111816, "learning_rate": 8.511471321695762e-06, "loss": 0.3761, "step": 46090 }, { "epoch": 11.496259351620948, "grad_norm": 6.3622941970825195, "learning_rate": 8.508977556109727e-06, "loss": 0.346, "step": 46100 }, { "epoch": 11.498753117206983, "grad_norm": 5.697100639343262, "learning_rate": 8.506483790523692e-06, "loss": 0.3489, "step": 46110 }, { "epoch": 11.501246882793017, "grad_norm": 10.397525787353516, "learning_rate": 8.503990024937656e-06, "loss": 0.4333, "step": 46120 }, { "epoch": 11.503740648379052, "grad_norm": 11.356361389160156, "learning_rate": 8.501496259351621e-06, "loss": 0.3788, "step": 46130 }, { "epoch": 11.506234413965087, "grad_norm": 10.53273868560791, "learning_rate": 8.499002493765586e-06, "loss": 0.4482, "step": 46140 }, { "epoch": 11.508728179551122, "grad_norm": 7.545499324798584, "learning_rate": 8.496508728179552e-06, "loss": 0.3394, "step": 46150 }, { "epoch": 11.511221945137157, "grad_norm": 5.596482276916504, "learning_rate": 8.494014962593517e-06, "loss": 0.3306, "step": 46160 }, { "epoch": 11.513715710723192, "grad_norm": 7.381369590759277, "learning_rate": 8.491521197007482e-06, "loss": 0.3239, "step": 46170 }, { "epoch": 11.516209476309227, "grad_norm": 9.224650382995605, "learning_rate": 8.489027431421448e-06, "loss": 0.3463, "step": 46180 }, { "epoch": 11.518703241895262, "grad_norm": 6.10484504699707, "learning_rate": 8.486533665835413e-06, "loss": 0.457, "step": 46190 }, { "epoch": 11.521197007481296, "grad_norm": 8.393863677978516, "learning_rate": 8.484039900249377e-06, "loss": 0.3499, "step": 46200 }, { "epoch": 11.523690773067331, "grad_norm": 10.636434555053711, "learning_rate": 8.481546134663342e-06, "loss": 0.429, "step": 46210 }, { "epoch": 11.526184538653366, "grad_norm": 8.618541717529297, "learning_rate": 8.479052369077307e-06, "loss": 0.355, "step": 46220 }, { "epoch": 11.528678304239401, "grad_norm": 5.982786655426025, "learning_rate": 8.476558603491272e-06, "loss": 0.3373, "step": 46230 }, { "epoch": 11.531172069825436, "grad_norm": 10.3162202835083, "learning_rate": 8.474064837905238e-06, "loss": 0.2873, "step": 46240 }, { "epoch": 11.53366583541147, "grad_norm": 5.130282878875732, "learning_rate": 8.471571072319203e-06, "loss": 0.2964, "step": 46250 }, { "epoch": 11.536159600997506, "grad_norm": 5.8889360427856445, "learning_rate": 8.469077306733168e-06, "loss": 0.3102, "step": 46260 }, { "epoch": 11.53865336658354, "grad_norm": 4.879234313964844, "learning_rate": 8.466583541147134e-06, "loss": 0.3211, "step": 46270 }, { "epoch": 11.541147132169575, "grad_norm": 7.243957042694092, "learning_rate": 8.464089775561099e-06, "loss": 0.3416, "step": 46280 }, { "epoch": 11.54364089775561, "grad_norm": 8.841197967529297, "learning_rate": 8.461596009975063e-06, "loss": 0.3346, "step": 46290 }, { "epoch": 11.546134663341645, "grad_norm": 7.920102119445801, "learning_rate": 8.459102244389028e-06, "loss": 0.3587, "step": 46300 }, { "epoch": 11.548628428927682, "grad_norm": 8.432982444763184, "learning_rate": 8.456608478802993e-06, "loss": 0.3364, "step": 46310 }, { "epoch": 11.551122194513717, "grad_norm": 7.410879135131836, "learning_rate": 8.454114713216959e-06, "loss": 0.4021, "step": 46320 }, { "epoch": 11.553615960099751, "grad_norm": 9.919821739196777, "learning_rate": 8.451620947630922e-06, "loss": 0.4665, "step": 46330 }, { "epoch": 11.556109725685786, "grad_norm": 6.146061420440674, "learning_rate": 8.449127182044888e-06, "loss": 0.3562, "step": 46340 }, { "epoch": 11.558603491271821, "grad_norm": 6.502902507781982, "learning_rate": 8.446633416458855e-06, "loss": 0.3341, "step": 46350 }, { "epoch": 11.561097256857856, "grad_norm": 7.523041725158691, "learning_rate": 8.44413965087282e-06, "loss": 0.3158, "step": 46360 }, { "epoch": 11.563591022443891, "grad_norm": 5.467228412628174, "learning_rate": 8.441645885286783e-06, "loss": 0.3464, "step": 46370 }, { "epoch": 11.566084788029926, "grad_norm": 7.694350242614746, "learning_rate": 8.439152119700749e-06, "loss": 0.3111, "step": 46380 }, { "epoch": 11.56857855361596, "grad_norm": 13.799556732177734, "learning_rate": 8.436658354114714e-06, "loss": 0.3468, "step": 46390 }, { "epoch": 11.571072319201996, "grad_norm": 8.76827621459961, "learning_rate": 8.43416458852868e-06, "loss": 0.3029, "step": 46400 }, { "epoch": 11.57356608478803, "grad_norm": 11.704144477844238, "learning_rate": 8.431670822942643e-06, "loss": 0.3546, "step": 46410 }, { "epoch": 11.576059850374065, "grad_norm": 8.31985855102539, "learning_rate": 8.429177057356608e-06, "loss": 0.3563, "step": 46420 }, { "epoch": 11.5785536159601, "grad_norm": 6.618094444274902, "learning_rate": 8.426683291770574e-06, "loss": 0.2983, "step": 46430 }, { "epoch": 11.581047381546135, "grad_norm": 13.60894775390625, "learning_rate": 8.424189526184539e-06, "loss": 0.3255, "step": 46440 }, { "epoch": 11.58354114713217, "grad_norm": 5.429731369018555, "learning_rate": 8.421695760598504e-06, "loss": 0.3448, "step": 46450 }, { "epoch": 11.586034912718205, "grad_norm": 5.1549224853515625, "learning_rate": 8.41920199501247e-06, "loss": 0.3547, "step": 46460 }, { "epoch": 11.58852867830424, "grad_norm": 6.888156890869141, "learning_rate": 8.416708229426435e-06, "loss": 0.3126, "step": 46470 }, { "epoch": 11.591022443890274, "grad_norm": 8.632163047790527, "learning_rate": 8.4142144638404e-06, "loss": 0.3216, "step": 46480 }, { "epoch": 11.59351620947631, "grad_norm": 8.811448097229004, "learning_rate": 8.411720698254365e-06, "loss": 0.3131, "step": 46490 }, { "epoch": 11.596009975062344, "grad_norm": 7.669614791870117, "learning_rate": 8.409226932668329e-06, "loss": 0.3222, "step": 46500 }, { "epoch": 11.598503740648379, "grad_norm": 12.28158950805664, "learning_rate": 8.406733167082294e-06, "loss": 0.3609, "step": 46510 }, { "epoch": 11.600997506234414, "grad_norm": 12.088496208190918, "learning_rate": 8.40423940149626e-06, "loss": 0.3294, "step": 46520 }, { "epoch": 11.603491271820449, "grad_norm": 7.644925594329834, "learning_rate": 8.401745635910225e-06, "loss": 0.3499, "step": 46530 }, { "epoch": 11.605985037406484, "grad_norm": 8.952886581420898, "learning_rate": 8.39925187032419e-06, "loss": 0.4748, "step": 46540 }, { "epoch": 11.608478802992519, "grad_norm": 7.003511428833008, "learning_rate": 8.396758104738156e-06, "loss": 0.3068, "step": 46550 }, { "epoch": 11.610972568578553, "grad_norm": 9.938292503356934, "learning_rate": 8.394264339152121e-06, "loss": 0.3804, "step": 46560 }, { "epoch": 11.613466334164588, "grad_norm": 6.018657684326172, "learning_rate": 8.391770573566086e-06, "loss": 0.3036, "step": 46570 }, { "epoch": 11.615960099750623, "grad_norm": 4.126202583312988, "learning_rate": 8.38927680798005e-06, "loss": 0.2909, "step": 46580 }, { "epoch": 11.618453865336658, "grad_norm": 4.8967132568359375, "learning_rate": 8.386783042394015e-06, "loss": 0.2835, "step": 46590 }, { "epoch": 11.620947630922693, "grad_norm": 7.265740871429443, "learning_rate": 8.38428927680798e-06, "loss": 0.3781, "step": 46600 }, { "epoch": 11.623441396508728, "grad_norm": 8.042332649230957, "learning_rate": 8.381795511221946e-06, "loss": 0.3315, "step": 46610 }, { "epoch": 11.625935162094763, "grad_norm": 9.90203857421875, "learning_rate": 8.379301745635911e-06, "loss": 0.3179, "step": 46620 }, { "epoch": 11.628428927680797, "grad_norm": 5.170167446136475, "learning_rate": 8.376807980049876e-06, "loss": 0.3211, "step": 46630 }, { "epoch": 11.630922693266832, "grad_norm": 11.844198226928711, "learning_rate": 8.374314214463842e-06, "loss": 0.3923, "step": 46640 }, { "epoch": 11.633416458852867, "grad_norm": 6.6527862548828125, "learning_rate": 8.371820448877807e-06, "loss": 0.3308, "step": 46650 }, { "epoch": 11.635910224438902, "grad_norm": 10.204750061035156, "learning_rate": 8.36932668329177e-06, "loss": 0.4115, "step": 46660 }, { "epoch": 11.638403990024937, "grad_norm": 5.856804847717285, "learning_rate": 8.366832917705736e-06, "loss": 0.3647, "step": 46670 }, { "epoch": 11.640897755610972, "grad_norm": 5.034014701843262, "learning_rate": 8.364339152119701e-06, "loss": 0.344, "step": 46680 }, { "epoch": 11.643391521197007, "grad_norm": 6.951847076416016, "learning_rate": 8.361845386533667e-06, "loss": 0.3241, "step": 46690 }, { "epoch": 11.645885286783042, "grad_norm": 6.871116638183594, "learning_rate": 8.359351620947632e-06, "loss": 0.3133, "step": 46700 }, { "epoch": 11.648379052369076, "grad_norm": 9.318560600280762, "learning_rate": 8.356857855361597e-06, "loss": 0.3388, "step": 46710 }, { "epoch": 11.650872817955111, "grad_norm": 10.321300506591797, "learning_rate": 8.354364089775563e-06, "loss": 0.3425, "step": 46720 }, { "epoch": 11.653366583541148, "grad_norm": 6.536388874053955, "learning_rate": 8.351870324189528e-06, "loss": 0.4034, "step": 46730 }, { "epoch": 11.655860349127183, "grad_norm": 7.95648717880249, "learning_rate": 8.349376558603493e-06, "loss": 0.387, "step": 46740 }, { "epoch": 11.658354114713218, "grad_norm": 4.451998710632324, "learning_rate": 8.346882793017457e-06, "loss": 0.3272, "step": 46750 }, { "epoch": 11.660847880299253, "grad_norm": 5.903833389282227, "learning_rate": 8.344389027431422e-06, "loss": 0.3689, "step": 46760 }, { "epoch": 11.663341645885287, "grad_norm": 9.391934394836426, "learning_rate": 8.341895261845387e-06, "loss": 0.3681, "step": 46770 }, { "epoch": 11.665835411471322, "grad_norm": 7.286145210266113, "learning_rate": 8.339401496259353e-06, "loss": 0.3198, "step": 46780 }, { "epoch": 11.668329177057357, "grad_norm": 7.64100456237793, "learning_rate": 8.336907730673316e-06, "loss": 0.3116, "step": 46790 }, { "epoch": 11.670822942643392, "grad_norm": 6.605118751525879, "learning_rate": 8.334413965087282e-06, "loss": 0.2764, "step": 46800 }, { "epoch": 11.673316708229427, "grad_norm": 7.660946369171143, "learning_rate": 8.331920199501249e-06, "loss": 0.4052, "step": 46810 }, { "epoch": 11.675810473815462, "grad_norm": 5.863259315490723, "learning_rate": 8.329426433915214e-06, "loss": 0.3599, "step": 46820 }, { "epoch": 11.678304239401497, "grad_norm": 8.02147388458252, "learning_rate": 8.326932668329178e-06, "loss": 0.3401, "step": 46830 }, { "epoch": 11.680798004987532, "grad_norm": 7.035722255706787, "learning_rate": 8.324438902743143e-06, "loss": 0.3427, "step": 46840 }, { "epoch": 11.683291770573566, "grad_norm": 8.856318473815918, "learning_rate": 8.321945137157108e-06, "loss": 0.3217, "step": 46850 }, { "epoch": 11.685785536159601, "grad_norm": 12.737922668457031, "learning_rate": 8.319451371571073e-06, "loss": 0.3705, "step": 46860 }, { "epoch": 11.688279301745636, "grad_norm": 7.125262260437012, "learning_rate": 8.316957605985037e-06, "loss": 0.2677, "step": 46870 }, { "epoch": 11.690773067331671, "grad_norm": 9.105042457580566, "learning_rate": 8.314463840399002e-06, "loss": 0.3248, "step": 46880 }, { "epoch": 11.693266832917706, "grad_norm": 6.2453436851501465, "learning_rate": 8.311970074812968e-06, "loss": 0.3346, "step": 46890 }, { "epoch": 11.69576059850374, "grad_norm": 9.779679298400879, "learning_rate": 8.309476309226933e-06, "loss": 0.3915, "step": 46900 }, { "epoch": 11.698254364089776, "grad_norm": 8.361473083496094, "learning_rate": 8.306982543640898e-06, "loss": 0.3421, "step": 46910 }, { "epoch": 11.70074812967581, "grad_norm": 8.903398513793945, "learning_rate": 8.304488778054864e-06, "loss": 0.3676, "step": 46920 }, { "epoch": 11.703241895261845, "grad_norm": 7.4261016845703125, "learning_rate": 8.301995012468829e-06, "loss": 0.4291, "step": 46930 }, { "epoch": 11.70573566084788, "grad_norm": 10.814020156860352, "learning_rate": 8.299501246882794e-06, "loss": 0.3552, "step": 46940 }, { "epoch": 11.708229426433915, "grad_norm": 6.702065467834473, "learning_rate": 8.297007481296758e-06, "loss": 0.2939, "step": 46950 }, { "epoch": 11.71072319201995, "grad_norm": 8.938770294189453, "learning_rate": 8.294513715710723e-06, "loss": 0.3835, "step": 46960 }, { "epoch": 11.713216957605985, "grad_norm": 8.248211860656738, "learning_rate": 8.292019950124688e-06, "loss": 0.3454, "step": 46970 }, { "epoch": 11.71571072319202, "grad_norm": 5.559350967407227, "learning_rate": 8.289526184538654e-06, "loss": 0.4123, "step": 46980 }, { "epoch": 11.718204488778055, "grad_norm": 7.779847145080566, "learning_rate": 8.287032418952619e-06, "loss": 0.3581, "step": 46990 }, { "epoch": 11.72069825436409, "grad_norm": 6.771767616271973, "learning_rate": 8.284538653366584e-06, "loss": 0.3246, "step": 47000 }, { "epoch": 11.723192019950124, "grad_norm": 6.579812049865723, "learning_rate": 8.28204488778055e-06, "loss": 0.3924, "step": 47010 }, { "epoch": 11.72568578553616, "grad_norm": 10.849081039428711, "learning_rate": 8.279551122194515e-06, "loss": 0.4271, "step": 47020 }, { "epoch": 11.728179551122194, "grad_norm": 7.1727824211120605, "learning_rate": 8.27705735660848e-06, "loss": 0.3297, "step": 47030 }, { "epoch": 11.730673316708229, "grad_norm": 9.346254348754883, "learning_rate": 8.274563591022444e-06, "loss": 0.336, "step": 47040 }, { "epoch": 11.733167082294264, "grad_norm": 9.930221557617188, "learning_rate": 8.27206982543641e-06, "loss": 0.3422, "step": 47050 }, { "epoch": 11.735660847880299, "grad_norm": 8.86354923248291, "learning_rate": 8.269576059850375e-06, "loss": 0.3921, "step": 47060 }, { "epoch": 11.738154613466333, "grad_norm": 6.808560371398926, "learning_rate": 8.26708229426434e-06, "loss": 0.3291, "step": 47070 }, { "epoch": 11.740648379052368, "grad_norm": 8.037238121032715, "learning_rate": 8.264588528678305e-06, "loss": 0.3975, "step": 47080 }, { "epoch": 11.743142144638403, "grad_norm": 6.9091010093688965, "learning_rate": 8.26209476309227e-06, "loss": 0.2959, "step": 47090 }, { "epoch": 11.745635910224438, "grad_norm": 7.109037399291992, "learning_rate": 8.259600997506236e-06, "loss": 0.3498, "step": 47100 }, { "epoch": 11.748129675810475, "grad_norm": 13.113969802856445, "learning_rate": 8.257107231920201e-06, "loss": 0.3391, "step": 47110 }, { "epoch": 11.75062344139651, "grad_norm": 10.038171768188477, "learning_rate": 8.254613466334165e-06, "loss": 0.3651, "step": 47120 }, { "epoch": 11.753117206982544, "grad_norm": 9.342373847961426, "learning_rate": 8.25211970074813e-06, "loss": 0.3071, "step": 47130 }, { "epoch": 11.75561097256858, "grad_norm": 6.506314277648926, "learning_rate": 8.249625935162095e-06, "loss": 0.4171, "step": 47140 }, { "epoch": 11.758104738154614, "grad_norm": 9.478023529052734, "learning_rate": 8.24713216957606e-06, "loss": 0.3932, "step": 47150 }, { "epoch": 11.760598503740649, "grad_norm": 6.497440814971924, "learning_rate": 8.244638403990026e-06, "loss": 0.3667, "step": 47160 }, { "epoch": 11.763092269326684, "grad_norm": 7.87533712387085, "learning_rate": 8.242144638403991e-06, "loss": 0.3391, "step": 47170 }, { "epoch": 11.765586034912719, "grad_norm": 5.2254557609558105, "learning_rate": 8.239650872817957e-06, "loss": 0.3359, "step": 47180 }, { "epoch": 11.768079800498754, "grad_norm": 6.769067287445068, "learning_rate": 8.237157107231922e-06, "loss": 0.3649, "step": 47190 }, { "epoch": 11.770573566084789, "grad_norm": 9.426133155822754, "learning_rate": 8.234663341645886e-06, "loss": 0.3654, "step": 47200 }, { "epoch": 11.773067331670823, "grad_norm": 14.436060905456543, "learning_rate": 8.23216957605985e-06, "loss": 0.404, "step": 47210 }, { "epoch": 11.775561097256858, "grad_norm": 5.8233137130737305, "learning_rate": 8.229675810473816e-06, "loss": 0.3201, "step": 47220 }, { "epoch": 11.778054862842893, "grad_norm": 8.890005111694336, "learning_rate": 8.227182044887781e-06, "loss": 0.3244, "step": 47230 }, { "epoch": 11.780548628428928, "grad_norm": 9.727415084838867, "learning_rate": 8.224688279301747e-06, "loss": 0.3729, "step": 47240 }, { "epoch": 11.783042394014963, "grad_norm": 7.346099853515625, "learning_rate": 8.22219451371571e-06, "loss": 0.3708, "step": 47250 }, { "epoch": 11.785536159600998, "grad_norm": 7.652705192565918, "learning_rate": 8.219700748129676e-06, "loss": 0.3199, "step": 47260 }, { "epoch": 11.788029925187033, "grad_norm": 9.516809463500977, "learning_rate": 8.217206982543641e-06, "loss": 0.3202, "step": 47270 }, { "epoch": 11.790523690773068, "grad_norm": 6.376964569091797, "learning_rate": 8.214713216957608e-06, "loss": 0.3306, "step": 47280 }, { "epoch": 11.793017456359102, "grad_norm": 7.07741641998291, "learning_rate": 8.212219451371572e-06, "loss": 0.3668, "step": 47290 }, { "epoch": 11.795511221945137, "grad_norm": 8.181482315063477, "learning_rate": 8.209725685785537e-06, "loss": 0.3843, "step": 47300 }, { "epoch": 11.798004987531172, "grad_norm": 9.766548156738281, "learning_rate": 8.207231920199502e-06, "loss": 0.3964, "step": 47310 }, { "epoch": 11.800498753117207, "grad_norm": 7.150549411773682, "learning_rate": 8.204738154613468e-06, "loss": 0.3973, "step": 47320 }, { "epoch": 11.802992518703242, "grad_norm": 6.823990821838379, "learning_rate": 8.202244389027431e-06, "loss": 0.3975, "step": 47330 }, { "epoch": 11.805486284289277, "grad_norm": 7.102343559265137, "learning_rate": 8.199750623441396e-06, "loss": 0.3856, "step": 47340 }, { "epoch": 11.807980049875312, "grad_norm": 7.7780585289001465, "learning_rate": 8.197256857855362e-06, "loss": 0.3287, "step": 47350 }, { "epoch": 11.810473815461346, "grad_norm": 16.928571701049805, "learning_rate": 8.194763092269327e-06, "loss": 0.315, "step": 47360 }, { "epoch": 11.812967581047381, "grad_norm": 9.088364601135254, "learning_rate": 8.192269326683292e-06, "loss": 0.3788, "step": 47370 }, { "epoch": 11.815461346633416, "grad_norm": 6.3990044593811035, "learning_rate": 8.189775561097258e-06, "loss": 0.3317, "step": 47380 }, { "epoch": 11.817955112219451, "grad_norm": 8.615131378173828, "learning_rate": 8.187281795511223e-06, "loss": 0.449, "step": 47390 }, { "epoch": 11.820448877805486, "grad_norm": 5.688783168792725, "learning_rate": 8.184788029925188e-06, "loss": 0.3203, "step": 47400 }, { "epoch": 11.82294264339152, "grad_norm": 9.329352378845215, "learning_rate": 8.182294264339152e-06, "loss": 0.4166, "step": 47410 }, { "epoch": 11.825436408977556, "grad_norm": 5.748128414154053, "learning_rate": 8.179800498753117e-06, "loss": 0.3375, "step": 47420 }, { "epoch": 11.82793017456359, "grad_norm": 8.721724510192871, "learning_rate": 8.177306733167083e-06, "loss": 0.361, "step": 47430 }, { "epoch": 11.830423940149625, "grad_norm": 8.815082550048828, "learning_rate": 8.174812967581048e-06, "loss": 0.3311, "step": 47440 }, { "epoch": 11.83291770573566, "grad_norm": 9.213290214538574, "learning_rate": 8.172319201995013e-06, "loss": 0.3334, "step": 47450 }, { "epoch": 11.835411471321695, "grad_norm": 10.43193531036377, "learning_rate": 8.169825436408978e-06, "loss": 0.4101, "step": 47460 }, { "epoch": 11.83790523690773, "grad_norm": 7.8552350997924805, "learning_rate": 8.167331670822944e-06, "loss": 0.3305, "step": 47470 }, { "epoch": 11.840399002493765, "grad_norm": 8.425485610961914, "learning_rate": 8.164837905236909e-06, "loss": 0.4038, "step": 47480 }, { "epoch": 11.8428927680798, "grad_norm": 7.427910804748535, "learning_rate": 8.162344139650874e-06, "loss": 0.3453, "step": 47490 }, { "epoch": 11.845386533665835, "grad_norm": 4.184894561767578, "learning_rate": 8.159850374064838e-06, "loss": 0.3295, "step": 47500 }, { "epoch": 11.84788029925187, "grad_norm": 9.951261520385742, "learning_rate": 8.157356608478803e-06, "loss": 0.3587, "step": 47510 }, { "epoch": 11.850374064837904, "grad_norm": 9.598414421081543, "learning_rate": 8.154862842892769e-06, "loss": 0.4135, "step": 47520 }, { "epoch": 11.85286783042394, "grad_norm": 7.297958850860596, "learning_rate": 8.152369077306734e-06, "loss": 0.3501, "step": 47530 }, { "epoch": 11.855361596009976, "grad_norm": 6.240098476409912, "learning_rate": 8.1498753117207e-06, "loss": 0.344, "step": 47540 }, { "epoch": 11.85785536159601, "grad_norm": 7.912670135498047, "learning_rate": 8.147381546134665e-06, "loss": 0.2993, "step": 47550 }, { "epoch": 11.860349127182046, "grad_norm": 7.632608890533447, "learning_rate": 8.14488778054863e-06, "loss": 0.3528, "step": 47560 }, { "epoch": 11.86284289276808, "grad_norm": 12.331526756286621, "learning_rate": 8.142394014962595e-06, "loss": 0.4064, "step": 47570 }, { "epoch": 11.865336658354115, "grad_norm": 10.794772148132324, "learning_rate": 8.139900249376559e-06, "loss": 0.3422, "step": 47580 }, { "epoch": 11.86783042394015, "grad_norm": 9.130438804626465, "learning_rate": 8.137406483790524e-06, "loss": 0.3605, "step": 47590 }, { "epoch": 11.870324189526185, "grad_norm": 6.942079067230225, "learning_rate": 8.13491271820449e-06, "loss": 0.3104, "step": 47600 }, { "epoch": 11.87281795511222, "grad_norm": 12.40993595123291, "learning_rate": 8.132418952618455e-06, "loss": 0.2948, "step": 47610 }, { "epoch": 11.875311720698255, "grad_norm": 6.655416011810303, "learning_rate": 8.129925187032418e-06, "loss": 0.3096, "step": 47620 }, { "epoch": 11.87780548628429, "grad_norm": 6.1850175857543945, "learning_rate": 8.127431421446385e-06, "loss": 0.3626, "step": 47630 }, { "epoch": 11.880299251870325, "grad_norm": 9.720369338989258, "learning_rate": 8.12493765586035e-06, "loss": 0.3307, "step": 47640 }, { "epoch": 11.88279301745636, "grad_norm": 9.563785552978516, "learning_rate": 8.122443890274316e-06, "loss": 0.3062, "step": 47650 }, { "epoch": 11.885286783042394, "grad_norm": 10.218947410583496, "learning_rate": 8.11995012468828e-06, "loss": 0.4228, "step": 47660 }, { "epoch": 11.88778054862843, "grad_norm": 5.189842700958252, "learning_rate": 8.117456359102245e-06, "loss": 0.354, "step": 47670 }, { "epoch": 11.890274314214464, "grad_norm": 6.639830112457275, "learning_rate": 8.11496259351621e-06, "loss": 0.3331, "step": 47680 }, { "epoch": 11.892768079800499, "grad_norm": 8.736455917358398, "learning_rate": 8.112468827930176e-06, "loss": 0.3691, "step": 47690 }, { "epoch": 11.895261845386534, "grad_norm": 7.995376110076904, "learning_rate": 8.109975062344139e-06, "loss": 0.3516, "step": 47700 }, { "epoch": 11.897755610972569, "grad_norm": 7.362260341644287, "learning_rate": 8.107481296758104e-06, "loss": 0.4472, "step": 47710 }, { "epoch": 11.900249376558603, "grad_norm": 9.83117961883545, "learning_rate": 8.10498753117207e-06, "loss": 0.3402, "step": 47720 }, { "epoch": 11.902743142144638, "grad_norm": 7.758623123168945, "learning_rate": 8.102493765586035e-06, "loss": 0.3799, "step": 47730 }, { "epoch": 11.905236907730673, "grad_norm": 9.175792694091797, "learning_rate": 8.1e-06, "loss": 0.391, "step": 47740 }, { "epoch": 11.907730673316708, "grad_norm": 7.377160549163818, "learning_rate": 8.097506234413966e-06, "loss": 0.3799, "step": 47750 }, { "epoch": 11.910224438902743, "grad_norm": 5.763824462890625, "learning_rate": 8.095012468827931e-06, "loss": 0.348, "step": 47760 }, { "epoch": 11.912718204488778, "grad_norm": 8.172435760498047, "learning_rate": 8.092518703241896e-06, "loss": 0.4006, "step": 47770 }, { "epoch": 11.915211970074813, "grad_norm": 9.261870384216309, "learning_rate": 8.090024937655862e-06, "loss": 0.3811, "step": 47780 }, { "epoch": 11.917705735660848, "grad_norm": 11.933865547180176, "learning_rate": 8.087531172069825e-06, "loss": 0.4131, "step": 47790 }, { "epoch": 11.920199501246882, "grad_norm": 8.930726051330566, "learning_rate": 8.08503740648379e-06, "loss": 0.3954, "step": 47800 }, { "epoch": 11.922693266832917, "grad_norm": 7.120776653289795, "learning_rate": 8.082543640897756e-06, "loss": 0.3536, "step": 47810 }, { "epoch": 11.925187032418952, "grad_norm": 6.882978439331055, "learning_rate": 8.080049875311721e-06, "loss": 0.351, "step": 47820 }, { "epoch": 11.927680798004987, "grad_norm": 7.883481502532959, "learning_rate": 8.077556109725686e-06, "loss": 0.3385, "step": 47830 }, { "epoch": 11.930174563591022, "grad_norm": 8.083521842956543, "learning_rate": 8.075062344139652e-06, "loss": 0.31, "step": 47840 }, { "epoch": 11.932668329177057, "grad_norm": 7.6285080909729, "learning_rate": 8.072568578553617e-06, "loss": 0.3895, "step": 47850 }, { "epoch": 11.935162094763092, "grad_norm": 5.246705532073975, "learning_rate": 8.070074812967582e-06, "loss": 0.3273, "step": 47860 }, { "epoch": 11.937655860349127, "grad_norm": 7.005518913269043, "learning_rate": 8.067581047381546e-06, "loss": 0.3466, "step": 47870 }, { "epoch": 11.940149625935161, "grad_norm": 10.184428215026855, "learning_rate": 8.065087281795511e-06, "loss": 0.3226, "step": 47880 }, { "epoch": 11.942643391521196, "grad_norm": 6.404749870300293, "learning_rate": 8.062593516209477e-06, "loss": 0.3122, "step": 47890 }, { "epoch": 11.945137157107231, "grad_norm": 9.22337818145752, "learning_rate": 8.060099750623442e-06, "loss": 0.4094, "step": 47900 }, { "epoch": 11.947630922693268, "grad_norm": 6.901223182678223, "learning_rate": 8.057605985037407e-06, "loss": 0.3463, "step": 47910 }, { "epoch": 11.950124688279303, "grad_norm": 7.0179033279418945, "learning_rate": 8.055112219451373e-06, "loss": 0.335, "step": 47920 }, { "epoch": 11.952618453865338, "grad_norm": 8.986421585083008, "learning_rate": 8.052618453865338e-06, "loss": 0.3582, "step": 47930 }, { "epoch": 11.955112219451372, "grad_norm": 7.0583577156066895, "learning_rate": 8.050124688279303e-06, "loss": 0.3613, "step": 47940 }, { "epoch": 11.957605985037407, "grad_norm": 6.976990222930908, "learning_rate": 8.047630922693267e-06, "loss": 0.3075, "step": 47950 }, { "epoch": 11.960099750623442, "grad_norm": 7.128075122833252, "learning_rate": 8.045137157107232e-06, "loss": 0.4033, "step": 47960 }, { "epoch": 11.962593516209477, "grad_norm": 7.31989049911499, "learning_rate": 8.042643391521197e-06, "loss": 0.3093, "step": 47970 }, { "epoch": 11.965087281795512, "grad_norm": 4.972882270812988, "learning_rate": 8.040149625935163e-06, "loss": 0.299, "step": 47980 }, { "epoch": 11.967581047381547, "grad_norm": 11.064085006713867, "learning_rate": 8.037655860349128e-06, "loss": 0.3248, "step": 47990 }, { "epoch": 11.970074812967582, "grad_norm": 13.1663179397583, "learning_rate": 8.035162094763093e-06, "loss": 0.3242, "step": 48000 }, { "epoch": 11.972568578553616, "grad_norm": 5.935441970825195, "learning_rate": 8.032668329177059e-06, "loss": 0.3544, "step": 48010 }, { "epoch": 11.975062344139651, "grad_norm": 7.8656840324401855, "learning_rate": 8.030174563591024e-06, "loss": 0.2541, "step": 48020 }, { "epoch": 11.977556109725686, "grad_norm": 11.03922176361084, "learning_rate": 8.02768079800499e-06, "loss": 0.3548, "step": 48030 }, { "epoch": 11.980049875311721, "grad_norm": 8.151224136352539, "learning_rate": 8.025187032418953e-06, "loss": 0.3088, "step": 48040 }, { "epoch": 11.982543640897756, "grad_norm": 9.86962890625, "learning_rate": 8.022693266832918e-06, "loss": 0.3831, "step": 48050 }, { "epoch": 11.98503740648379, "grad_norm": 6.20054292678833, "learning_rate": 8.020199501246884e-06, "loss": 0.3202, "step": 48060 }, { "epoch": 11.987531172069826, "grad_norm": 7.702700614929199, "learning_rate": 8.017705735660849e-06, "loss": 0.3719, "step": 48070 }, { "epoch": 11.99002493765586, "grad_norm": 8.15425968170166, "learning_rate": 8.015211970074812e-06, "loss": 0.3887, "step": 48080 }, { "epoch": 11.992518703241895, "grad_norm": 4.551555633544922, "learning_rate": 8.012718204488778e-06, "loss": 0.3482, "step": 48090 }, { "epoch": 11.99501246882793, "grad_norm": 5.3351640701293945, "learning_rate": 8.010224438902745e-06, "loss": 0.3449, "step": 48100 }, { "epoch": 11.997506234413965, "grad_norm": 9.141931533813477, "learning_rate": 8.00773067331671e-06, "loss": 0.3044, "step": 48110 }, { "epoch": 12.0, "grad_norm": 4.466921806335449, "learning_rate": 8.005236907730674e-06, "loss": 0.3335, "step": 48120 }, { "epoch": 12.0, "eval_loss": 0.41446128487586975, "eval_runtime": 60.0916, "eval_samples_per_second": 16.691, "eval_steps_per_second": 16.691, "step": 48120 }, { "epoch": 12.002493765586035, "grad_norm": 7.059330463409424, "learning_rate": 8.002743142144639e-06, "loss": 0.3357, "step": 48130 }, { "epoch": 12.00498753117207, "grad_norm": 7.653445720672607, "learning_rate": 8.000249376558604e-06, "loss": 0.4164, "step": 48140 }, { "epoch": 12.007481296758105, "grad_norm": 6.720803737640381, "learning_rate": 7.99775561097257e-06, "loss": 0.3236, "step": 48150 }, { "epoch": 12.00997506234414, "grad_norm": 5.9595746994018555, "learning_rate": 7.995261845386533e-06, "loss": 0.3662, "step": 48160 }, { "epoch": 12.012468827930174, "grad_norm": 6.095608234405518, "learning_rate": 7.992768079800499e-06, "loss": 0.3405, "step": 48170 }, { "epoch": 12.01496259351621, "grad_norm": 8.565361022949219, "learning_rate": 7.990274314214464e-06, "loss": 0.2904, "step": 48180 }, { "epoch": 12.017456359102244, "grad_norm": 6.547665119171143, "learning_rate": 7.987780548628429e-06, "loss": 0.3475, "step": 48190 }, { "epoch": 12.019950124688279, "grad_norm": 10.47833251953125, "learning_rate": 7.985286783042394e-06, "loss": 0.3089, "step": 48200 }, { "epoch": 12.022443890274314, "grad_norm": 15.010177612304688, "learning_rate": 7.98279301745636e-06, "loss": 0.3161, "step": 48210 }, { "epoch": 12.024937655860349, "grad_norm": 9.223240852355957, "learning_rate": 7.980299251870325e-06, "loss": 0.3958, "step": 48220 }, { "epoch": 12.027431421446384, "grad_norm": 7.6100382804870605, "learning_rate": 7.97780548628429e-06, "loss": 0.3337, "step": 48230 }, { "epoch": 12.029925187032418, "grad_norm": 8.66632080078125, "learning_rate": 7.975311720698256e-06, "loss": 0.3571, "step": 48240 }, { "epoch": 12.032418952618453, "grad_norm": 8.049689292907715, "learning_rate": 7.97281795511222e-06, "loss": 0.398, "step": 48250 }, { "epoch": 12.034912718204488, "grad_norm": 9.214011192321777, "learning_rate": 7.970324189526185e-06, "loss": 0.3354, "step": 48260 }, { "epoch": 12.037406483790523, "grad_norm": 7.5215229988098145, "learning_rate": 7.96783042394015e-06, "loss": 0.3055, "step": 48270 }, { "epoch": 12.039900249376558, "grad_norm": 5.8552327156066895, "learning_rate": 7.965336658354115e-06, "loss": 0.3736, "step": 48280 }, { "epoch": 12.042394014962593, "grad_norm": 8.036945343017578, "learning_rate": 7.96284289276808e-06, "loss": 0.3435, "step": 48290 }, { "epoch": 12.044887780548628, "grad_norm": 9.113265037536621, "learning_rate": 7.960349127182046e-06, "loss": 0.3947, "step": 48300 }, { "epoch": 12.047381546134662, "grad_norm": 8.261163711547852, "learning_rate": 7.957855361596011e-06, "loss": 0.3772, "step": 48310 }, { "epoch": 12.049875311720697, "grad_norm": 8.655675888061523, "learning_rate": 7.955361596009976e-06, "loss": 0.4154, "step": 48320 }, { "epoch": 12.052369077306734, "grad_norm": 7.443731307983398, "learning_rate": 7.95286783042394e-06, "loss": 0.3444, "step": 48330 }, { "epoch": 12.054862842892769, "grad_norm": 8.582282066345215, "learning_rate": 7.950374064837905e-06, "loss": 0.3394, "step": 48340 }, { "epoch": 12.057356608478804, "grad_norm": 4.728538513183594, "learning_rate": 7.94788029925187e-06, "loss": 0.334, "step": 48350 }, { "epoch": 12.059850374064839, "grad_norm": 7.743102073669434, "learning_rate": 7.945386533665836e-06, "loss": 0.2972, "step": 48360 }, { "epoch": 12.062344139650873, "grad_norm": 5.460146427154541, "learning_rate": 7.942892768079801e-06, "loss": 0.3959, "step": 48370 }, { "epoch": 12.064837905236908, "grad_norm": 9.469954490661621, "learning_rate": 7.940399002493767e-06, "loss": 0.3682, "step": 48380 }, { "epoch": 12.067331670822943, "grad_norm": 8.519420623779297, "learning_rate": 7.937905236907732e-06, "loss": 0.3083, "step": 48390 }, { "epoch": 12.069825436408978, "grad_norm": 5.299350261688232, "learning_rate": 7.935411471321697e-06, "loss": 0.3441, "step": 48400 }, { "epoch": 12.072319201995013, "grad_norm": 7.708430290222168, "learning_rate": 7.932917705735661e-06, "loss": 0.3613, "step": 48410 }, { "epoch": 12.074812967581048, "grad_norm": 8.682879447937012, "learning_rate": 7.930423940149626e-06, "loss": 0.3806, "step": 48420 }, { "epoch": 12.077306733167083, "grad_norm": 7.677736282348633, "learning_rate": 7.927930174563591e-06, "loss": 0.3095, "step": 48430 }, { "epoch": 12.079800498753118, "grad_norm": 6.203550338745117, "learning_rate": 7.925436408977557e-06, "loss": 0.3583, "step": 48440 }, { "epoch": 12.082294264339152, "grad_norm": 9.091981887817383, "learning_rate": 7.922942643391522e-06, "loss": 0.3517, "step": 48450 }, { "epoch": 12.084788029925187, "grad_norm": 6.73786735534668, "learning_rate": 7.920448877805487e-06, "loss": 0.3773, "step": 48460 }, { "epoch": 12.087281795511222, "grad_norm": 10.410232543945312, "learning_rate": 7.917955112219453e-06, "loss": 0.3852, "step": 48470 }, { "epoch": 12.089775561097257, "grad_norm": 6.987138271331787, "learning_rate": 7.915461346633418e-06, "loss": 0.3285, "step": 48480 }, { "epoch": 12.092269326683292, "grad_norm": 6.348522663116455, "learning_rate": 7.912967581047383e-06, "loss": 0.3437, "step": 48490 }, { "epoch": 12.094763092269327, "grad_norm": 5.560189247131348, "learning_rate": 7.910473815461347e-06, "loss": 0.326, "step": 48500 }, { "epoch": 12.097256857855362, "grad_norm": 6.605633735656738, "learning_rate": 7.907980049875312e-06, "loss": 0.3387, "step": 48510 }, { "epoch": 12.099750623441397, "grad_norm": 11.42223072052002, "learning_rate": 7.905486284289278e-06, "loss": 0.4234, "step": 48520 }, { "epoch": 12.102244389027431, "grad_norm": 7.262038707733154, "learning_rate": 7.902992518703243e-06, "loss": 0.3345, "step": 48530 }, { "epoch": 12.104738154613466, "grad_norm": 10.022313117980957, "learning_rate": 7.900498753117207e-06, "loss": 0.3145, "step": 48540 }, { "epoch": 12.107231920199501, "grad_norm": 8.85555362701416, "learning_rate": 7.898004987531172e-06, "loss": 0.2839, "step": 48550 }, { "epoch": 12.109725685785536, "grad_norm": 7.570572376251221, "learning_rate": 7.895511221945137e-06, "loss": 0.3078, "step": 48560 }, { "epoch": 12.11221945137157, "grad_norm": 7.913335800170898, "learning_rate": 7.893017456359104e-06, "loss": 0.314, "step": 48570 }, { "epoch": 12.114713216957606, "grad_norm": 7.83607292175293, "learning_rate": 7.890523690773068e-06, "loss": 0.2858, "step": 48580 }, { "epoch": 12.11720698254364, "grad_norm": 9.3693208694458, "learning_rate": 7.888279301745636e-06, "loss": 0.2934, "step": 48590 }, { "epoch": 12.119700748129675, "grad_norm": 6.200734615325928, "learning_rate": 7.885785536159601e-06, "loss": 0.409, "step": 48600 }, { "epoch": 12.12219451371571, "grad_norm": 4.377243518829346, "learning_rate": 7.883291770573567e-06, "loss": 0.2854, "step": 48610 }, { "epoch": 12.124688279301745, "grad_norm": 9.177326202392578, "learning_rate": 7.880798004987532e-06, "loss": 0.3049, "step": 48620 }, { "epoch": 12.12718204488778, "grad_norm": 7.598722457885742, "learning_rate": 7.878304239401496e-06, "loss": 0.3693, "step": 48630 }, { "epoch": 12.129675810473815, "grad_norm": 8.089449882507324, "learning_rate": 7.875810473815461e-06, "loss": 0.3015, "step": 48640 }, { "epoch": 12.13216957605985, "grad_norm": 6.351145267486572, "learning_rate": 7.873316708229428e-06, "loss": 0.3215, "step": 48650 }, { "epoch": 12.134663341645885, "grad_norm": 4.465890884399414, "learning_rate": 7.870822942643393e-06, "loss": 0.3268, "step": 48660 }, { "epoch": 12.13715710723192, "grad_norm": 7.8744707107543945, "learning_rate": 7.868329177057359e-06, "loss": 0.3502, "step": 48670 }, { "epoch": 12.139650872817954, "grad_norm": 6.566856384277344, "learning_rate": 7.865835411471322e-06, "loss": 0.2754, "step": 48680 }, { "epoch": 12.14214463840399, "grad_norm": 8.633282661437988, "learning_rate": 7.863341645885287e-06, "loss": 0.3319, "step": 48690 }, { "epoch": 12.144638403990024, "grad_norm": 5.682807922363281, "learning_rate": 7.860847880299253e-06, "loss": 0.3287, "step": 48700 }, { "epoch": 12.147132169576059, "grad_norm": 6.86955451965332, "learning_rate": 7.858354114713218e-06, "loss": 0.3137, "step": 48710 }, { "epoch": 12.149625935162096, "grad_norm": 8.783101081848145, "learning_rate": 7.855860349127182e-06, "loss": 0.4136, "step": 48720 }, { "epoch": 12.15211970074813, "grad_norm": 3.7242095470428467, "learning_rate": 7.853366583541147e-06, "loss": 0.2878, "step": 48730 }, { "epoch": 12.154613466334165, "grad_norm": 7.640246391296387, "learning_rate": 7.850872817955112e-06, "loss": 0.3841, "step": 48740 }, { "epoch": 12.1571072319202, "grad_norm": 11.29294490814209, "learning_rate": 7.848379052369078e-06, "loss": 0.3812, "step": 48750 }, { "epoch": 12.159600997506235, "grad_norm": 7.655768871307373, "learning_rate": 7.845885286783043e-06, "loss": 0.3404, "step": 48760 }, { "epoch": 12.16209476309227, "grad_norm": 6.344391822814941, "learning_rate": 7.843391521197008e-06, "loss": 0.3118, "step": 48770 }, { "epoch": 12.164588528678305, "grad_norm": 9.420690536499023, "learning_rate": 7.840897755610974e-06, "loss": 0.3424, "step": 48780 }, { "epoch": 12.16708229426434, "grad_norm": 7.383553981781006, "learning_rate": 7.838403990024939e-06, "loss": 0.3298, "step": 48790 }, { "epoch": 12.169576059850375, "grad_norm": 6.917516708374023, "learning_rate": 7.835910224438902e-06, "loss": 0.3557, "step": 48800 }, { "epoch": 12.17206982543641, "grad_norm": 7.802567005157471, "learning_rate": 7.833416458852868e-06, "loss": 0.4077, "step": 48810 }, { "epoch": 12.174563591022444, "grad_norm": 6.911114692687988, "learning_rate": 7.830922693266833e-06, "loss": 0.2852, "step": 48820 }, { "epoch": 12.17705735660848, "grad_norm": 10.203409194946289, "learning_rate": 7.828428927680798e-06, "loss": 0.3605, "step": 48830 }, { "epoch": 12.179551122194514, "grad_norm": 9.16174030303955, "learning_rate": 7.825935162094764e-06, "loss": 0.3409, "step": 48840 }, { "epoch": 12.182044887780549, "grad_norm": 6.887585639953613, "learning_rate": 7.823441396508729e-06, "loss": 0.3659, "step": 48850 }, { "epoch": 12.184538653366584, "grad_norm": 7.380236625671387, "learning_rate": 7.820947630922694e-06, "loss": 0.3259, "step": 48860 }, { "epoch": 12.187032418952619, "grad_norm": 5.912197589874268, "learning_rate": 7.81845386533666e-06, "loss": 0.2935, "step": 48870 }, { "epoch": 12.189526184538654, "grad_norm": 10.71356201171875, "learning_rate": 7.815960099750623e-06, "loss": 0.3154, "step": 48880 }, { "epoch": 12.192019950124688, "grad_norm": 9.142561912536621, "learning_rate": 7.813466334164589e-06, "loss": 0.3491, "step": 48890 }, { "epoch": 12.194513715710723, "grad_norm": 5.923270225524902, "learning_rate": 7.810972568578554e-06, "loss": 0.3462, "step": 48900 }, { "epoch": 12.197007481296758, "grad_norm": 8.043718338012695, "learning_rate": 7.80847880299252e-06, "loss": 0.3948, "step": 48910 }, { "epoch": 12.199501246882793, "grad_norm": 10.630451202392578, "learning_rate": 7.805985037406484e-06, "loss": 0.2927, "step": 48920 }, { "epoch": 12.201995012468828, "grad_norm": 8.33590316772461, "learning_rate": 7.80349127182045e-06, "loss": 0.3302, "step": 48930 }, { "epoch": 12.204488778054863, "grad_norm": 4.248763084411621, "learning_rate": 7.800997506234415e-06, "loss": 0.2981, "step": 48940 }, { "epoch": 12.206982543640898, "grad_norm": 7.462469577789307, "learning_rate": 7.79850374064838e-06, "loss": 0.353, "step": 48950 }, { "epoch": 12.209476309226932, "grad_norm": 7.565711975097656, "learning_rate": 7.796009975062346e-06, "loss": 0.3257, "step": 48960 }, { "epoch": 12.211970074812967, "grad_norm": 5.535632133483887, "learning_rate": 7.79351620947631e-06, "loss": 0.318, "step": 48970 }, { "epoch": 12.214463840399002, "grad_norm": 8.999574661254883, "learning_rate": 7.791022443890275e-06, "loss": 0.3884, "step": 48980 }, { "epoch": 12.216957605985037, "grad_norm": 7.833513259887695, "learning_rate": 7.78852867830424e-06, "loss": 0.3175, "step": 48990 }, { "epoch": 12.219451371571072, "grad_norm": 6.90658712387085, "learning_rate": 7.786034912718205e-06, "loss": 0.2902, "step": 49000 }, { "epoch": 12.221945137157107, "grad_norm": 10.725184440612793, "learning_rate": 7.78354114713217e-06, "loss": 0.3352, "step": 49010 }, { "epoch": 12.224438902743142, "grad_norm": 5.924967288970947, "learning_rate": 7.781047381546136e-06, "loss": 0.3747, "step": 49020 }, { "epoch": 12.226932668329177, "grad_norm": 15.080745697021484, "learning_rate": 7.778553615960101e-06, "loss": 0.3795, "step": 49030 }, { "epoch": 12.229426433915211, "grad_norm": 9.706949234008789, "learning_rate": 7.776059850374066e-06, "loss": 0.3756, "step": 49040 }, { "epoch": 12.231920199501246, "grad_norm": 6.478403091430664, "learning_rate": 7.77356608478803e-06, "loss": 0.3344, "step": 49050 }, { "epoch": 12.234413965087281, "grad_norm": 20.941816329956055, "learning_rate": 7.771072319201995e-06, "loss": 0.3319, "step": 49060 }, { "epoch": 12.236907730673316, "grad_norm": 4.482845306396484, "learning_rate": 7.76857855361596e-06, "loss": 0.3203, "step": 49070 }, { "epoch": 12.239401496259351, "grad_norm": 5.751954078674316, "learning_rate": 7.766084788029926e-06, "loss": 0.3554, "step": 49080 }, { "epoch": 12.241895261845386, "grad_norm": 7.152318477630615, "learning_rate": 7.76359102244389e-06, "loss": 0.3193, "step": 49090 }, { "epoch": 12.24438902743142, "grad_norm": 7.085709571838379, "learning_rate": 7.761097256857855e-06, "loss": 0.3458, "step": 49100 }, { "epoch": 12.246882793017456, "grad_norm": 7.26195764541626, "learning_rate": 7.75860349127182e-06, "loss": 0.3644, "step": 49110 }, { "epoch": 12.24937655860349, "grad_norm": 5.610538959503174, "learning_rate": 7.756109725685787e-06, "loss": 0.3654, "step": 49120 }, { "epoch": 12.251870324189527, "grad_norm": 6.579119682312012, "learning_rate": 7.753615960099751e-06, "loss": 0.3116, "step": 49130 }, { "epoch": 12.254364089775562, "grad_norm": 9.59024429321289, "learning_rate": 7.751122194513716e-06, "loss": 0.3193, "step": 49140 }, { "epoch": 12.256857855361597, "grad_norm": 7.97966194152832, "learning_rate": 7.748628428927682e-06, "loss": 0.3293, "step": 49150 }, { "epoch": 12.259351620947632, "grad_norm": 9.137805938720703, "learning_rate": 7.746134663341647e-06, "loss": 0.409, "step": 49160 }, { "epoch": 12.261845386533667, "grad_norm": 9.003076553344727, "learning_rate": 7.743640897755612e-06, "loss": 0.3663, "step": 49170 }, { "epoch": 12.264339152119701, "grad_norm": 9.627756118774414, "learning_rate": 7.741147132169576e-06, "loss": 0.3485, "step": 49180 }, { "epoch": 12.266832917705736, "grad_norm": 5.549960613250732, "learning_rate": 7.738653366583541e-06, "loss": 0.3514, "step": 49190 }, { "epoch": 12.269326683291771, "grad_norm": 6.5951080322265625, "learning_rate": 7.736159600997506e-06, "loss": 0.3182, "step": 49200 }, { "epoch": 12.271820448877806, "grad_norm": 10.914071083068848, "learning_rate": 7.733665835411472e-06, "loss": 0.3302, "step": 49210 }, { "epoch": 12.27431421446384, "grad_norm": 8.802125930786133, "learning_rate": 7.731172069825437e-06, "loss": 0.2761, "step": 49220 }, { "epoch": 12.276807980049876, "grad_norm": 7.288644790649414, "learning_rate": 7.728678304239402e-06, "loss": 0.365, "step": 49230 }, { "epoch": 12.27930174563591, "grad_norm": 6.142082691192627, "learning_rate": 7.726184538653368e-06, "loss": 0.2988, "step": 49240 }, { "epoch": 12.281795511221945, "grad_norm": 8.087503433227539, "learning_rate": 7.723690773067333e-06, "loss": 0.3527, "step": 49250 }, { "epoch": 12.28428927680798, "grad_norm": 7.012362480163574, "learning_rate": 7.721197007481297e-06, "loss": 0.3552, "step": 49260 }, { "epoch": 12.286783042394015, "grad_norm": 5.985617160797119, "learning_rate": 7.718703241895262e-06, "loss": 0.2818, "step": 49270 }, { "epoch": 12.28927680798005, "grad_norm": 6.581480026245117, "learning_rate": 7.716209476309227e-06, "loss": 0.3897, "step": 49280 }, { "epoch": 12.291770573566085, "grad_norm": 6.56619930267334, "learning_rate": 7.713965087281795e-06, "loss": 0.4206, "step": 49290 }, { "epoch": 12.29426433915212, "grad_norm": 12.421707153320312, "learning_rate": 7.71147132169576e-06, "loss": 0.3616, "step": 49300 }, { "epoch": 12.296758104738155, "grad_norm": 5.2989726066589355, "learning_rate": 7.708977556109726e-06, "loss": 0.3153, "step": 49310 }, { "epoch": 12.29925187032419, "grad_norm": 8.248827934265137, "learning_rate": 7.706483790523691e-06, "loss": 0.4233, "step": 49320 }, { "epoch": 12.301745635910224, "grad_norm": 9.170528411865234, "learning_rate": 7.703990024937657e-06, "loss": 0.3728, "step": 49330 }, { "epoch": 12.30423940149626, "grad_norm": 8.818942070007324, "learning_rate": 7.701496259351622e-06, "loss": 0.3719, "step": 49340 }, { "epoch": 12.306733167082294, "grad_norm": 4.5717997550964355, "learning_rate": 7.699002493765587e-06, "loss": 0.3448, "step": 49350 }, { "epoch": 12.309226932668329, "grad_norm": 7.510899543762207, "learning_rate": 7.696508728179551e-06, "loss": 0.3549, "step": 49360 }, { "epoch": 12.311720698254364, "grad_norm": 9.351420402526855, "learning_rate": 7.694014962593516e-06, "loss": 0.2777, "step": 49370 }, { "epoch": 12.314214463840399, "grad_norm": 5.524614334106445, "learning_rate": 7.691521197007482e-06, "loss": 0.3373, "step": 49380 }, { "epoch": 12.316708229426434, "grad_norm": 6.964102268218994, "learning_rate": 7.689027431421447e-06, "loss": 0.3343, "step": 49390 }, { "epoch": 12.319201995012468, "grad_norm": 5.369339466094971, "learning_rate": 7.686533665835412e-06, "loss": 0.2794, "step": 49400 }, { "epoch": 12.321695760598503, "grad_norm": 8.168196678161621, "learning_rate": 7.684039900249377e-06, "loss": 0.3399, "step": 49410 }, { "epoch": 12.324189526184538, "grad_norm": 8.461885452270508, "learning_rate": 7.681546134663343e-06, "loss": 0.363, "step": 49420 }, { "epoch": 12.326683291770573, "grad_norm": 8.73070240020752, "learning_rate": 7.679052369077308e-06, "loss": 0.352, "step": 49430 }, { "epoch": 12.329177057356608, "grad_norm": 5.573559284210205, "learning_rate": 7.676558603491272e-06, "loss": 0.2903, "step": 49440 }, { "epoch": 12.331670822942643, "grad_norm": 8.736505508422852, "learning_rate": 7.674064837905237e-06, "loss": 0.3443, "step": 49450 }, { "epoch": 12.334164588528678, "grad_norm": 8.074347496032715, "learning_rate": 7.671571072319202e-06, "loss": 0.3055, "step": 49460 }, { "epoch": 12.336658354114713, "grad_norm": 10.68688678741455, "learning_rate": 7.669077306733168e-06, "loss": 0.3402, "step": 49470 }, { "epoch": 12.339152119700747, "grad_norm": 6.418087005615234, "learning_rate": 7.666583541147133e-06, "loss": 0.3485, "step": 49480 }, { "epoch": 12.341645885286782, "grad_norm": 6.3780903816223145, "learning_rate": 7.664089775561098e-06, "loss": 0.3058, "step": 49490 }, { "epoch": 12.344139650872817, "grad_norm": 6.528293132781982, "learning_rate": 7.661596009975064e-06, "loss": 0.3228, "step": 49500 }, { "epoch": 12.346633416458852, "grad_norm": 12.134942054748535, "learning_rate": 7.659102244389029e-06, "loss": 0.3832, "step": 49510 }, { "epoch": 12.349127182044889, "grad_norm": 9.033289909362793, "learning_rate": 7.656608478802992e-06, "loss": 0.3629, "step": 49520 }, { "epoch": 12.351620947630924, "grad_norm": 5.786545276641846, "learning_rate": 7.654114713216958e-06, "loss": 0.386, "step": 49530 }, { "epoch": 12.354114713216958, "grad_norm": 5.447564601898193, "learning_rate": 7.651620947630923e-06, "loss": 0.2737, "step": 49540 }, { "epoch": 12.356608478802993, "grad_norm": 6.9369916915893555, "learning_rate": 7.649127182044888e-06, "loss": 0.4029, "step": 49550 }, { "epoch": 12.359102244389028, "grad_norm": 5.646476745605469, "learning_rate": 7.646633416458854e-06, "loss": 0.3803, "step": 49560 }, { "epoch": 12.361596009975063, "grad_norm": 8.804864883422852, "learning_rate": 7.644139650872819e-06, "loss": 0.3256, "step": 49570 }, { "epoch": 12.364089775561098, "grad_norm": 4.971907615661621, "learning_rate": 7.641645885286784e-06, "loss": 0.3518, "step": 49580 }, { "epoch": 12.366583541147133, "grad_norm": 7.365206718444824, "learning_rate": 7.63915211970075e-06, "loss": 0.3382, "step": 49590 }, { "epoch": 12.369077306733168, "grad_norm": 4.726197719573975, "learning_rate": 7.636658354114715e-06, "loss": 0.325, "step": 49600 }, { "epoch": 12.371571072319203, "grad_norm": 8.982644081115723, "learning_rate": 7.634164588528679e-06, "loss": 0.3807, "step": 49610 }, { "epoch": 12.374064837905237, "grad_norm": 6.123873233795166, "learning_rate": 7.631670822942644e-06, "loss": 0.3543, "step": 49620 }, { "epoch": 12.376558603491272, "grad_norm": 7.114482879638672, "learning_rate": 7.629177057356609e-06, "loss": 0.3948, "step": 49630 }, { "epoch": 12.379052369077307, "grad_norm": 6.665993690490723, "learning_rate": 7.6266832917705745e-06, "loss": 0.2877, "step": 49640 }, { "epoch": 12.381546134663342, "grad_norm": 8.79172134399414, "learning_rate": 7.624189526184539e-06, "loss": 0.3899, "step": 49650 }, { "epoch": 12.384039900249377, "grad_norm": 6.153012752532959, "learning_rate": 7.621695760598504e-06, "loss": 0.318, "step": 49660 }, { "epoch": 12.386533665835412, "grad_norm": 8.169590950012207, "learning_rate": 7.6192019950124696e-06, "loss": 0.3068, "step": 49670 }, { "epoch": 12.389027431421447, "grad_norm": 8.219849586486816, "learning_rate": 7.616708229426435e-06, "loss": 0.3231, "step": 49680 }, { "epoch": 12.391521197007481, "grad_norm": 7.9375152587890625, "learning_rate": 7.614214463840399e-06, "loss": 0.2944, "step": 49690 }, { "epoch": 12.394014962593516, "grad_norm": 6.999446392059326, "learning_rate": 7.611720698254365e-06, "loss": 0.3445, "step": 49700 }, { "epoch": 12.396508728179551, "grad_norm": 5.5128278732299805, "learning_rate": 7.60922693266833e-06, "loss": 0.3404, "step": 49710 }, { "epoch": 12.399002493765586, "grad_norm": 17.4468994140625, "learning_rate": 7.606733167082295e-06, "loss": 0.3414, "step": 49720 }, { "epoch": 12.401496259351621, "grad_norm": 7.349917888641357, "learning_rate": 7.60423940149626e-06, "loss": 0.4121, "step": 49730 }, { "epoch": 12.403990024937656, "grad_norm": 7.664532661437988, "learning_rate": 7.601745635910225e-06, "loss": 0.3575, "step": 49740 }, { "epoch": 12.40648379052369, "grad_norm": 7.233582496643066, "learning_rate": 7.59925187032419e-06, "loss": 0.3506, "step": 49750 }, { "epoch": 12.408977556109726, "grad_norm": 5.654517650604248, "learning_rate": 7.596758104738156e-06, "loss": 0.3195, "step": 49760 }, { "epoch": 12.41147132169576, "grad_norm": 10.009135246276855, "learning_rate": 7.59426433915212e-06, "loss": 0.2798, "step": 49770 }, { "epoch": 12.413965087281795, "grad_norm": 6.770684719085693, "learning_rate": 7.5917705735660854e-06, "loss": 0.3402, "step": 49780 }, { "epoch": 12.41645885286783, "grad_norm": 7.9065093994140625, "learning_rate": 7.589276807980051e-06, "loss": 0.2968, "step": 49790 }, { "epoch": 12.418952618453865, "grad_norm": 6.457505226135254, "learning_rate": 7.586783042394016e-06, "loss": 0.2829, "step": 49800 }, { "epoch": 12.4214463840399, "grad_norm": 10.425678253173828, "learning_rate": 7.584289276807981e-06, "loss": 0.3081, "step": 49810 }, { "epoch": 12.423940149625935, "grad_norm": 9.779804229736328, "learning_rate": 7.581795511221946e-06, "loss": 0.3627, "step": 49820 }, { "epoch": 12.42643391521197, "grad_norm": 8.749654769897461, "learning_rate": 7.579301745635911e-06, "loss": 0.31, "step": 49830 }, { "epoch": 12.428927680798004, "grad_norm": 5.994139194488525, "learning_rate": 7.5768079800498764e-06, "loss": 0.3538, "step": 49840 }, { "epoch": 12.43142144638404, "grad_norm": 9.143582344055176, "learning_rate": 7.574314214463842e-06, "loss": 0.3849, "step": 49850 }, { "epoch": 12.433915211970074, "grad_norm": 5.614033222198486, "learning_rate": 7.571820448877805e-06, "loss": 0.3452, "step": 49860 }, { "epoch": 12.436408977556109, "grad_norm": 5.022126197814941, "learning_rate": 7.5693266832917715e-06, "loss": 0.3065, "step": 49870 }, { "epoch": 12.438902743142144, "grad_norm": 8.072562217712402, "learning_rate": 7.566832917705737e-06, "loss": 0.3097, "step": 49880 }, { "epoch": 12.441396508728179, "grad_norm": 11.745954513549805, "learning_rate": 7.564339152119702e-06, "loss": 0.3353, "step": 49890 }, { "epoch": 12.443890274314214, "grad_norm": 7.331192970275879, "learning_rate": 7.561845386533666e-06, "loss": 0.3727, "step": 49900 }, { "epoch": 12.446384039900249, "grad_norm": 8.64783000946045, "learning_rate": 7.559351620947631e-06, "loss": 0.3649, "step": 49910 }, { "epoch": 12.448877805486283, "grad_norm": 8.688132286071777, "learning_rate": 7.556857855361596e-06, "loss": 0.3288, "step": 49920 }, { "epoch": 12.451371571072318, "grad_norm": 9.612068176269531, "learning_rate": 7.5543640897755625e-06, "loss": 0.3738, "step": 49930 }, { "epoch": 12.453865336658355, "grad_norm": 10.88000202178955, "learning_rate": 7.551870324189526e-06, "loss": 0.3455, "step": 49940 }, { "epoch": 12.45635910224439, "grad_norm": 6.210239887237549, "learning_rate": 7.5493765586034915e-06, "loss": 0.3381, "step": 49950 }, { "epoch": 12.458852867830425, "grad_norm": 6.904968738555908, "learning_rate": 7.546882793017457e-06, "loss": 0.323, "step": 49960 }, { "epoch": 12.46134663341646, "grad_norm": 8.996318817138672, "learning_rate": 7.544389027431422e-06, "loss": 0.3161, "step": 49970 }, { "epoch": 12.463840399002494, "grad_norm": 12.361374855041504, "learning_rate": 7.5418952618453865e-06, "loss": 0.4072, "step": 49980 }, { "epoch": 12.46633416458853, "grad_norm": 6.888818740844727, "learning_rate": 7.539401496259352e-06, "loss": 0.4031, "step": 49990 }, { "epoch": 12.468827930174564, "grad_norm": 7.364432334899902, "learning_rate": 7.536907730673317e-06, "loss": 0.3472, "step": 50000 }, { "epoch": 12.471321695760599, "grad_norm": 7.185758590698242, "learning_rate": 7.5344139650872825e-06, "loss": 0.3492, "step": 50010 }, { "epoch": 12.473815461346634, "grad_norm": 4.996051788330078, "learning_rate": 7.531920199501247e-06, "loss": 0.3285, "step": 50020 }, { "epoch": 12.476309226932669, "grad_norm": 9.784656524658203, "learning_rate": 7.529426433915212e-06, "loss": 0.3681, "step": 50030 }, { "epoch": 12.478802992518704, "grad_norm": 9.993110656738281, "learning_rate": 7.5269326683291776e-06, "loss": 0.4345, "step": 50040 }, { "epoch": 12.481296758104738, "grad_norm": 7.204555511474609, "learning_rate": 7.524438902743143e-06, "loss": 0.336, "step": 50050 }, { "epoch": 12.483790523690773, "grad_norm": 5.9999165534973145, "learning_rate": 7.521945137157108e-06, "loss": 0.4092, "step": 50060 }, { "epoch": 12.486284289276808, "grad_norm": 4.7937445640563965, "learning_rate": 7.519451371571073e-06, "loss": 0.3532, "step": 50070 }, { "epoch": 12.488778054862843, "grad_norm": 7.650313854217529, "learning_rate": 7.516957605985038e-06, "loss": 0.277, "step": 50080 }, { "epoch": 12.491271820448878, "grad_norm": 9.389827728271484, "learning_rate": 7.514463840399003e-06, "loss": 0.3082, "step": 50090 }, { "epoch": 12.493765586034913, "grad_norm": 7.2986578941345215, "learning_rate": 7.5119700748129686e-06, "loss": 0.3464, "step": 50100 }, { "epoch": 12.496259351620948, "grad_norm": 9.90869140625, "learning_rate": 7.509476309226933e-06, "loss": 0.3514, "step": 50110 }, { "epoch": 12.498753117206983, "grad_norm": 10.251230239868164, "learning_rate": 7.506982543640898e-06, "loss": 0.3623, "step": 50120 }, { "epoch": 12.501246882793017, "grad_norm": 6.3379974365234375, "learning_rate": 7.504488778054864e-06, "loss": 0.3323, "step": 50130 }, { "epoch": 12.503740648379052, "grad_norm": 7.917457580566406, "learning_rate": 7.501995012468829e-06, "loss": 0.3967, "step": 50140 }, { "epoch": 12.506234413965087, "grad_norm": 10.125364303588867, "learning_rate": 7.499501246882793e-06, "loss": 0.3394, "step": 50150 }, { "epoch": 12.508728179551122, "grad_norm": 6.0608978271484375, "learning_rate": 7.497007481296759e-06, "loss": 0.3227, "step": 50160 }, { "epoch": 12.511221945137157, "grad_norm": 6.0084919929504395, "learning_rate": 7.494513715710724e-06, "loss": 0.3547, "step": 50170 }, { "epoch": 12.513715710723192, "grad_norm": 7.461294174194336, "learning_rate": 7.492019950124689e-06, "loss": 0.3247, "step": 50180 }, { "epoch": 12.516209476309227, "grad_norm": 8.043869018554688, "learning_rate": 7.489526184538654e-06, "loss": 0.3308, "step": 50190 }, { "epoch": 12.518703241895262, "grad_norm": 8.986414909362793, "learning_rate": 7.487032418952619e-06, "loss": 0.3246, "step": 50200 }, { "epoch": 12.521197007481296, "grad_norm": 9.545452117919922, "learning_rate": 7.4845386533665844e-06, "loss": 0.3351, "step": 50210 }, { "epoch": 12.523690773067331, "grad_norm": 7.211226940155029, "learning_rate": 7.48204488778055e-06, "loss": 0.3332, "step": 50220 }, { "epoch": 12.526184538653366, "grad_norm": 15.852411270141602, "learning_rate": 7.479551122194514e-06, "loss": 0.3832, "step": 50230 }, { "epoch": 12.528678304239401, "grad_norm": 11.03563117980957, "learning_rate": 7.4770573566084795e-06, "loss": 0.3479, "step": 50240 }, { "epoch": 12.531172069825436, "grad_norm": 7.793381214141846, "learning_rate": 7.474563591022445e-06, "loss": 0.3877, "step": 50250 }, { "epoch": 12.53366583541147, "grad_norm": 6.964505195617676, "learning_rate": 7.47206982543641e-06, "loss": 0.3602, "step": 50260 }, { "epoch": 12.536159600997506, "grad_norm": 9.570211410522461, "learning_rate": 7.469576059850374e-06, "loss": 0.4639, "step": 50270 }, { "epoch": 12.53865336658354, "grad_norm": 6.632506370544434, "learning_rate": 7.46708229426434e-06, "loss": 0.3231, "step": 50280 }, { "epoch": 12.541147132169575, "grad_norm": 12.00216293334961, "learning_rate": 7.464588528678305e-06, "loss": 0.3751, "step": 50290 }, { "epoch": 12.54364089775561, "grad_norm": 9.346277236938477, "learning_rate": 7.4620947630922705e-06, "loss": 0.384, "step": 50300 }, { "epoch": 12.546134663341645, "grad_norm": 11.572287559509277, "learning_rate": 7.459600997506236e-06, "loss": 0.3448, "step": 50310 }, { "epoch": 12.548628428927682, "grad_norm": 5.089780807495117, "learning_rate": 7.4571072319201995e-06, "loss": 0.3452, "step": 50320 }, { "epoch": 12.551122194513717, "grad_norm": 5.670629024505615, "learning_rate": 7.454613466334165e-06, "loss": 0.2836, "step": 50330 }, { "epoch": 12.553615960099751, "grad_norm": 8.412741661071777, "learning_rate": 7.452119700748131e-06, "loss": 0.3167, "step": 50340 }, { "epoch": 12.556109725685786, "grad_norm": 6.057131767272949, "learning_rate": 7.449625935162096e-06, "loss": 0.3257, "step": 50350 }, { "epoch": 12.558603491271821, "grad_norm": 13.145732879638672, "learning_rate": 7.44713216957606e-06, "loss": 0.3762, "step": 50360 }, { "epoch": 12.561097256857856, "grad_norm": 6.84805965423584, "learning_rate": 7.444638403990025e-06, "loss": 0.3416, "step": 50370 }, { "epoch": 12.563591022443891, "grad_norm": 9.685379028320312, "learning_rate": 7.4421446384039905e-06, "loss": 0.4172, "step": 50380 }, { "epoch": 12.566084788029926, "grad_norm": 10.725334167480469, "learning_rate": 7.439650872817956e-06, "loss": 0.3302, "step": 50390 }, { "epoch": 12.56857855361596, "grad_norm": 9.620137214660645, "learning_rate": 7.43715710723192e-06, "loss": 0.3222, "step": 50400 }, { "epoch": 12.571072319201996, "grad_norm": 6.089214324951172, "learning_rate": 7.4346633416458855e-06, "loss": 0.2796, "step": 50410 }, { "epoch": 12.57356608478803, "grad_norm": 9.723833084106445, "learning_rate": 7.432169576059851e-06, "loss": 0.3645, "step": 50420 }, { "epoch": 12.576059850374065, "grad_norm": 7.900272369384766, "learning_rate": 7.429675810473816e-06, "loss": 0.3341, "step": 50430 }, { "epoch": 12.5785536159601, "grad_norm": 5.3839521408081055, "learning_rate": 7.427182044887781e-06, "loss": 0.3581, "step": 50440 }, { "epoch": 12.581047381546135, "grad_norm": 7.9673848152160645, "learning_rate": 7.424688279301746e-06, "loss": 0.3934, "step": 50450 }, { "epoch": 12.58354114713217, "grad_norm": 7.145327091217041, "learning_rate": 7.422194513715711e-06, "loss": 0.3564, "step": 50460 }, { "epoch": 12.586034912718205, "grad_norm": 8.317575454711914, "learning_rate": 7.4197007481296766e-06, "loss": 0.3919, "step": 50470 }, { "epoch": 12.58852867830424, "grad_norm": 8.809738159179688, "learning_rate": 7.417206982543641e-06, "loss": 0.3832, "step": 50480 }, { "epoch": 12.591022443890274, "grad_norm": 7.227417945861816, "learning_rate": 7.414713216957606e-06, "loss": 0.3157, "step": 50490 }, { "epoch": 12.59351620947631, "grad_norm": 7.549991130828857, "learning_rate": 7.412219451371572e-06, "loss": 0.3329, "step": 50500 }, { "epoch": 12.596009975062344, "grad_norm": 15.35284423828125, "learning_rate": 7.409725685785537e-06, "loss": 0.3495, "step": 50510 }, { "epoch": 12.598503740648379, "grad_norm": 4.876797199249268, "learning_rate": 7.407231920199501e-06, "loss": 0.3429, "step": 50520 }, { "epoch": 12.600997506234414, "grad_norm": 6.604551315307617, "learning_rate": 7.404738154613467e-06, "loss": 0.3698, "step": 50530 }, { "epoch": 12.603491271820449, "grad_norm": 7.2110443115234375, "learning_rate": 7.402244389027432e-06, "loss": 0.331, "step": 50540 }, { "epoch": 12.605985037406484, "grad_norm": 7.359330654144287, "learning_rate": 7.399750623441397e-06, "loss": 0.3591, "step": 50550 }, { "epoch": 12.608478802992519, "grad_norm": 5.08826208114624, "learning_rate": 7.397256857855363e-06, "loss": 0.2935, "step": 50560 }, { "epoch": 12.610972568578553, "grad_norm": 4.144226551055908, "learning_rate": 7.394763092269327e-06, "loss": 0.3926, "step": 50570 }, { "epoch": 12.613466334164588, "grad_norm": 5.52124547958374, "learning_rate": 7.392269326683292e-06, "loss": 0.4041, "step": 50580 }, { "epoch": 12.615960099750623, "grad_norm": 13.029586791992188, "learning_rate": 7.389775561097258e-06, "loss": 0.3669, "step": 50590 }, { "epoch": 12.618453865336658, "grad_norm": 7.825071811676025, "learning_rate": 7.387281795511223e-06, "loss": 0.3541, "step": 50600 }, { "epoch": 12.620947630922693, "grad_norm": 7.498974323272705, "learning_rate": 7.3847880299251875e-06, "loss": 0.2758, "step": 50610 }, { "epoch": 12.623441396508728, "grad_norm": 10.242654800415039, "learning_rate": 7.382294264339153e-06, "loss": 0.302, "step": 50620 }, { "epoch": 12.625935162094763, "grad_norm": 6.927699089050293, "learning_rate": 7.379800498753118e-06, "loss": 0.3352, "step": 50630 }, { "epoch": 12.628428927680797, "grad_norm": 4.644331932067871, "learning_rate": 7.3773067331670834e-06, "loss": 0.3491, "step": 50640 }, { "epoch": 12.630922693266832, "grad_norm": 5.701728343963623, "learning_rate": 7.374812967581048e-06, "loss": 0.2982, "step": 50650 }, { "epoch": 12.633416458852867, "grad_norm": 10.834186553955078, "learning_rate": 7.372319201995013e-06, "loss": 0.3135, "step": 50660 }, { "epoch": 12.635910224438902, "grad_norm": 9.567243576049805, "learning_rate": 7.3698254364089785e-06, "loss": 0.3627, "step": 50670 }, { "epoch": 12.638403990024937, "grad_norm": 5.764795780181885, "learning_rate": 7.367331670822944e-06, "loss": 0.3256, "step": 50680 }, { "epoch": 12.640897755610972, "grad_norm": 6.912472724914551, "learning_rate": 7.364837905236908e-06, "loss": 0.3154, "step": 50690 }, { "epoch": 12.643391521197007, "grad_norm": 4.959378719329834, "learning_rate": 7.362344139650874e-06, "loss": 0.3111, "step": 50700 }, { "epoch": 12.645885286783042, "grad_norm": 8.026573181152344, "learning_rate": 7.359850374064839e-06, "loss": 0.3211, "step": 50710 }, { "epoch": 12.648379052369076, "grad_norm": 9.039690017700195, "learning_rate": 7.357356608478804e-06, "loss": 0.2915, "step": 50720 }, { "epoch": 12.650872817955111, "grad_norm": 6.101404666900635, "learning_rate": 7.354862842892768e-06, "loss": 0.3123, "step": 50730 }, { "epoch": 12.653366583541148, "grad_norm": 8.664443969726562, "learning_rate": 7.352369077306733e-06, "loss": 0.3825, "step": 50740 }, { "epoch": 12.655860349127183, "grad_norm": 9.475874900817871, "learning_rate": 7.349875311720699e-06, "loss": 0.3463, "step": 50750 }, { "epoch": 12.658354114713218, "grad_norm": 6.189550399780273, "learning_rate": 7.347381546134665e-06, "loss": 0.3556, "step": 50760 }, { "epoch": 12.660847880299253, "grad_norm": 6.720332622528076, "learning_rate": 7.344887780548628e-06, "loss": 0.2882, "step": 50770 }, { "epoch": 12.663341645885287, "grad_norm": 6.461765766143799, "learning_rate": 7.3423940149625935e-06, "loss": 0.3059, "step": 50780 }, { "epoch": 12.665835411471322, "grad_norm": 5.625673294067383, "learning_rate": 7.339900249376559e-06, "loss": 0.3903, "step": 50790 }, { "epoch": 12.668329177057357, "grad_norm": 4.355860233306885, "learning_rate": 7.337406483790524e-06, "loss": 0.332, "step": 50800 }, { "epoch": 12.670822942643392, "grad_norm": 8.476668357849121, "learning_rate": 7.33491271820449e-06, "loss": 0.354, "step": 50810 }, { "epoch": 12.673316708229427, "grad_norm": 8.867452621459961, "learning_rate": 7.332418952618454e-06, "loss": 0.2912, "step": 50820 }, { "epoch": 12.675810473815462, "grad_norm": 6.675667762756348, "learning_rate": 7.329925187032419e-06, "loss": 0.369, "step": 50830 }, { "epoch": 12.678304239401497, "grad_norm": 6.0077972412109375, "learning_rate": 7.327680798004988e-06, "loss": 0.3377, "step": 50840 }, { "epoch": 12.680798004987532, "grad_norm": 10.627485275268555, "learning_rate": 7.325187032418954e-06, "loss": 0.3032, "step": 50850 }, { "epoch": 12.683291770573566, "grad_norm": 6.776688098907471, "learning_rate": 7.322693266832919e-06, "loss": 0.3235, "step": 50860 }, { "epoch": 12.685785536159601, "grad_norm": 8.06655216217041, "learning_rate": 7.320199501246883e-06, "loss": 0.3717, "step": 50870 }, { "epoch": 12.688279301745636, "grad_norm": 5.898749351501465, "learning_rate": 7.317705735660848e-06, "loss": 0.3136, "step": 50880 }, { "epoch": 12.690773067331671, "grad_norm": 5.939793109893799, "learning_rate": 7.315211970074814e-06, "loss": 0.2735, "step": 50890 }, { "epoch": 12.693266832917706, "grad_norm": 9.688426971435547, "learning_rate": 7.312718204488779e-06, "loss": 0.3447, "step": 50900 }, { "epoch": 12.69576059850374, "grad_norm": 8.256994247436523, "learning_rate": 7.310224438902743e-06, "loss": 0.3271, "step": 50910 }, { "epoch": 12.698254364089776, "grad_norm": 8.417771339416504, "learning_rate": 7.307730673316708e-06, "loss": 0.4127, "step": 50920 }, { "epoch": 12.70074812967581, "grad_norm": 7.872901916503906, "learning_rate": 7.305236907730674e-06, "loss": 0.3853, "step": 50930 }, { "epoch": 12.703241895261845, "grad_norm": 7.196390151977539, "learning_rate": 7.302743142144639e-06, "loss": 0.3114, "step": 50940 }, { "epoch": 12.70573566084788, "grad_norm": 7.429532051086426, "learning_rate": 7.300249376558603e-06, "loss": 0.3283, "step": 50950 }, { "epoch": 12.708229426433915, "grad_norm": 11.154653549194336, "learning_rate": 7.297755610972569e-06, "loss": 0.3902, "step": 50960 }, { "epoch": 12.71072319201995, "grad_norm": 4.860759258270264, "learning_rate": 7.295261845386534e-06, "loss": 0.3529, "step": 50970 }, { "epoch": 12.713216957605985, "grad_norm": 4.447295665740967, "learning_rate": 7.292768079800499e-06, "loss": 0.3266, "step": 50980 }, { "epoch": 12.71571072319202, "grad_norm": 8.30553913116455, "learning_rate": 7.290274314214465e-06, "loss": 0.3435, "step": 50990 }, { "epoch": 12.718204488778055, "grad_norm": 8.856745719909668, "learning_rate": 7.287780548628429e-06, "loss": 0.3163, "step": 51000 }, { "epoch": 12.72069825436409, "grad_norm": 8.923396110534668, "learning_rate": 7.285286783042394e-06, "loss": 0.3417, "step": 51010 }, { "epoch": 12.723192019950124, "grad_norm": 9.019140243530273, "learning_rate": 7.28279301745636e-06, "loss": 0.3917, "step": 51020 }, { "epoch": 12.72568578553616, "grad_norm": 8.857991218566895, "learning_rate": 7.280299251870325e-06, "loss": 0.3368, "step": 51030 }, { "epoch": 12.728179551122194, "grad_norm": 7.459349632263184, "learning_rate": 7.2778054862842895e-06, "loss": 0.2827, "step": 51040 }, { "epoch": 12.730673316708229, "grad_norm": 6.536787033081055, "learning_rate": 7.275311720698255e-06, "loss": 0.3304, "step": 51050 }, { "epoch": 12.733167082294264, "grad_norm": 6.800075531005859, "learning_rate": 7.27281795511222e-06, "loss": 0.4169, "step": 51060 }, { "epoch": 12.735660847880299, "grad_norm": 9.216288566589355, "learning_rate": 7.270324189526185e-06, "loss": 0.3732, "step": 51070 }, { "epoch": 12.738154613466333, "grad_norm": 9.165617942810059, "learning_rate": 7.26783042394015e-06, "loss": 0.31, "step": 51080 }, { "epoch": 12.740648379052368, "grad_norm": 7.988641262054443, "learning_rate": 7.265336658354115e-06, "loss": 0.3234, "step": 51090 }, { "epoch": 12.743142144638403, "grad_norm": 6.585142135620117, "learning_rate": 7.2628428927680805e-06, "loss": 0.3289, "step": 51100 }, { "epoch": 12.745635910224438, "grad_norm": 10.255621910095215, "learning_rate": 7.260349127182046e-06, "loss": 0.3875, "step": 51110 }, { "epoch": 12.748129675810475, "grad_norm": 11.782647132873535, "learning_rate": 7.25785536159601e-06, "loss": 0.417, "step": 51120 }, { "epoch": 12.75062344139651, "grad_norm": 9.408047676086426, "learning_rate": 7.2553615960099756e-06, "loss": 0.2934, "step": 51130 }, { "epoch": 12.753117206982544, "grad_norm": 8.07828426361084, "learning_rate": 7.252867830423941e-06, "loss": 0.3938, "step": 51140 }, { "epoch": 12.75561097256858, "grad_norm": 4.914299011230469, "learning_rate": 7.250374064837906e-06, "loss": 0.3387, "step": 51150 }, { "epoch": 12.758104738154614, "grad_norm": 6.668241024017334, "learning_rate": 7.247880299251871e-06, "loss": 0.3265, "step": 51160 }, { "epoch": 12.760598503740649, "grad_norm": 9.32829761505127, "learning_rate": 7.245386533665836e-06, "loss": 0.3249, "step": 51170 }, { "epoch": 12.763092269326684, "grad_norm": 10.00203800201416, "learning_rate": 7.242892768079801e-06, "loss": 0.4209, "step": 51180 }, { "epoch": 12.765586034912719, "grad_norm": 8.861324310302734, "learning_rate": 7.2403990024937666e-06, "loss": 0.3101, "step": 51190 }, { "epoch": 12.768079800498754, "grad_norm": 8.35321044921875, "learning_rate": 7.237905236907731e-06, "loss": 0.3043, "step": 51200 }, { "epoch": 12.770573566084789, "grad_norm": 5.797079086303711, "learning_rate": 7.235411471321696e-06, "loss": 0.3619, "step": 51210 }, { "epoch": 12.773067331670823, "grad_norm": 8.274916648864746, "learning_rate": 7.232917705735662e-06, "loss": 0.2735, "step": 51220 }, { "epoch": 12.775561097256858, "grad_norm": 6.340693473815918, "learning_rate": 7.230423940149627e-06, "loss": 0.3211, "step": 51230 }, { "epoch": 12.778054862842893, "grad_norm": 6.9829792976379395, "learning_rate": 7.227930174563592e-06, "loss": 0.2852, "step": 51240 }, { "epoch": 12.780548628428928, "grad_norm": 11.263195037841797, "learning_rate": 7.225436408977557e-06, "loss": 0.3557, "step": 51250 }, { "epoch": 12.783042394014963, "grad_norm": 8.167370796203613, "learning_rate": 7.222942643391522e-06, "loss": 0.3264, "step": 51260 }, { "epoch": 12.785536159600998, "grad_norm": 6.718631744384766, "learning_rate": 7.220448877805487e-06, "loss": 0.2878, "step": 51270 }, { "epoch": 12.788029925187033, "grad_norm": 7.236240386962891, "learning_rate": 7.217955112219453e-06, "loss": 0.3249, "step": 51280 }, { "epoch": 12.790523690773068, "grad_norm": 10.440741539001465, "learning_rate": 7.215461346633416e-06, "loss": 0.3534, "step": 51290 }, { "epoch": 12.793017456359102, "grad_norm": 5.670083522796631, "learning_rate": 7.2129675810473824e-06, "loss": 0.3517, "step": 51300 }, { "epoch": 12.795511221945137, "grad_norm": 15.356858253479004, "learning_rate": 7.210473815461348e-06, "loss": 0.4574, "step": 51310 }, { "epoch": 12.798004987531172, "grad_norm": 9.100034713745117, "learning_rate": 7.207980049875313e-06, "loss": 0.3513, "step": 51320 }, { "epoch": 12.800498753117207, "grad_norm": 5.6677937507629395, "learning_rate": 7.205486284289277e-06, "loss": 0.3663, "step": 51330 }, { "epoch": 12.802992518703242, "grad_norm": 8.217962265014648, "learning_rate": 7.202992518703242e-06, "loss": 0.2736, "step": 51340 }, { "epoch": 12.805486284289277, "grad_norm": 5.241400241851807, "learning_rate": 7.200498753117207e-06, "loss": 0.3087, "step": 51350 }, { "epoch": 12.807980049875312, "grad_norm": 10.871718406677246, "learning_rate": 7.1980049875311734e-06, "loss": 0.3461, "step": 51360 }, { "epoch": 12.810473815461346, "grad_norm": 6.515952110290527, "learning_rate": 7.195511221945137e-06, "loss": 0.3521, "step": 51370 }, { "epoch": 12.812967581047381, "grad_norm": 9.03691577911377, "learning_rate": 7.193017456359102e-06, "loss": 0.3166, "step": 51380 }, { "epoch": 12.815461346633416, "grad_norm": 8.544856071472168, "learning_rate": 7.190523690773068e-06, "loss": 0.3556, "step": 51390 }, { "epoch": 12.817955112219451, "grad_norm": 6.503901481628418, "learning_rate": 7.188029925187033e-06, "loss": 0.3814, "step": 51400 }, { "epoch": 12.820448877805486, "grad_norm": 6.1890387535095215, "learning_rate": 7.1855361596009975e-06, "loss": 0.3247, "step": 51410 }, { "epoch": 12.82294264339152, "grad_norm": 5.103339672088623, "learning_rate": 7.183042394014963e-06, "loss": 0.4179, "step": 51420 }, { "epoch": 12.825436408977556, "grad_norm": 9.049005508422852, "learning_rate": 7.180548628428928e-06, "loss": 0.3371, "step": 51430 }, { "epoch": 12.82793017456359, "grad_norm": 8.748699188232422, "learning_rate": 7.178054862842893e-06, "loss": 0.3176, "step": 51440 }, { "epoch": 12.830423940149625, "grad_norm": 8.368945121765137, "learning_rate": 7.175561097256858e-06, "loss": 0.3286, "step": 51450 }, { "epoch": 12.83291770573566, "grad_norm": 7.695786952972412, "learning_rate": 7.173067331670823e-06, "loss": 0.3397, "step": 51460 }, { "epoch": 12.835411471321695, "grad_norm": 8.735140800476074, "learning_rate": 7.1705735660847885e-06, "loss": 0.3353, "step": 51470 }, { "epoch": 12.83790523690773, "grad_norm": 4.386609077453613, "learning_rate": 7.168079800498754e-06, "loss": 0.3446, "step": 51480 }, { "epoch": 12.840399002493765, "grad_norm": 6.733331680297852, "learning_rate": 7.165586034912719e-06, "loss": 0.3685, "step": 51490 }, { "epoch": 12.8428927680798, "grad_norm": 7.894585132598877, "learning_rate": 7.1630922693266835e-06, "loss": 0.332, "step": 51500 }, { "epoch": 12.845386533665835, "grad_norm": 9.471677780151367, "learning_rate": 7.160598503740649e-06, "loss": 0.2614, "step": 51510 }, { "epoch": 12.84788029925187, "grad_norm": 11.194404602050781, "learning_rate": 7.158104738154614e-06, "loss": 0.3671, "step": 51520 }, { "epoch": 12.850374064837904, "grad_norm": 9.611897468566895, "learning_rate": 7.1556109725685795e-06, "loss": 0.3233, "step": 51530 }, { "epoch": 12.85286783042394, "grad_norm": 8.516155242919922, "learning_rate": 7.153117206982544e-06, "loss": 0.3259, "step": 51540 }, { "epoch": 12.855361596009976, "grad_norm": 6.97627067565918, "learning_rate": 7.150623441396509e-06, "loss": 0.3679, "step": 51550 }, { "epoch": 12.85785536159601, "grad_norm": 9.25831127166748, "learning_rate": 7.1481296758104746e-06, "loss": 0.3705, "step": 51560 }, { "epoch": 12.860349127182046, "grad_norm": 4.845349311828613, "learning_rate": 7.14563591022444e-06, "loss": 0.2548, "step": 51570 }, { "epoch": 12.86284289276808, "grad_norm": 7.061251163482666, "learning_rate": 7.143142144638404e-06, "loss": 0.3352, "step": 51580 }, { "epoch": 12.865336658354115, "grad_norm": 7.0790557861328125, "learning_rate": 7.14064837905237e-06, "loss": 0.2947, "step": 51590 }, { "epoch": 12.86783042394015, "grad_norm": 9.186144828796387, "learning_rate": 7.138154613466335e-06, "loss": 0.3689, "step": 51600 }, { "epoch": 12.870324189526185, "grad_norm": 5.841263771057129, "learning_rate": 7.1356608478803e-06, "loss": 0.3377, "step": 51610 }, { "epoch": 12.87281795511222, "grad_norm": 15.965258598327637, "learning_rate": 7.133167082294265e-06, "loss": 0.3976, "step": 51620 }, { "epoch": 12.875311720698255, "grad_norm": 7.985716342926025, "learning_rate": 7.13067331670823e-06, "loss": 0.3237, "step": 51630 }, { "epoch": 12.87780548628429, "grad_norm": 7.872910499572754, "learning_rate": 7.128179551122195e-06, "loss": 0.3373, "step": 51640 }, { "epoch": 12.880299251870325, "grad_norm": 8.896018028259277, "learning_rate": 7.125685785536161e-06, "loss": 0.3554, "step": 51650 }, { "epoch": 12.88279301745636, "grad_norm": 8.754325866699219, "learning_rate": 7.123192019950125e-06, "loss": 0.3481, "step": 51660 }, { "epoch": 12.885286783042394, "grad_norm": 9.110607147216797, "learning_rate": 7.1206982543640904e-06, "loss": 0.3519, "step": 51670 }, { "epoch": 12.88778054862843, "grad_norm": 10.086724281311035, "learning_rate": 7.118204488778056e-06, "loss": 0.3255, "step": 51680 }, { "epoch": 12.890274314214464, "grad_norm": 9.028765678405762, "learning_rate": 7.115710723192021e-06, "loss": 0.3527, "step": 51690 }, { "epoch": 12.892768079800499, "grad_norm": 12.050107955932617, "learning_rate": 7.113216957605985e-06, "loss": 0.3747, "step": 51700 }, { "epoch": 12.895261845386534, "grad_norm": 5.377803325653076, "learning_rate": 7.110723192019951e-06, "loss": 0.3493, "step": 51710 }, { "epoch": 12.897755610972569, "grad_norm": 12.90459156036377, "learning_rate": 7.108229426433916e-06, "loss": 0.3973, "step": 51720 }, { "epoch": 12.900249376558603, "grad_norm": 8.819035530090332, "learning_rate": 7.1057356608478814e-06, "loss": 0.3746, "step": 51730 }, { "epoch": 12.902743142144638, "grad_norm": 7.119004726409912, "learning_rate": 7.103241895261847e-06, "loss": 0.3413, "step": 51740 }, { "epoch": 12.905236907730673, "grad_norm": 9.566146850585938, "learning_rate": 7.10074812967581e-06, "loss": 0.3475, "step": 51750 }, { "epoch": 12.907730673316708, "grad_norm": 6.812338829040527, "learning_rate": 7.098254364089776e-06, "loss": 0.3002, "step": 51760 }, { "epoch": 12.910224438902743, "grad_norm": 6.894874095916748, "learning_rate": 7.095760598503742e-06, "loss": 0.3632, "step": 51770 }, { "epoch": 12.912718204488778, "grad_norm": 10.196784019470215, "learning_rate": 7.093266832917707e-06, "loss": 0.3053, "step": 51780 }, { "epoch": 12.915211970074813, "grad_norm": 7.024259090423584, "learning_rate": 7.090773067331671e-06, "loss": 0.3858, "step": 51790 }, { "epoch": 12.917705735660848, "grad_norm": 8.830977439880371, "learning_rate": 7.088279301745636e-06, "loss": 0.319, "step": 51800 }, { "epoch": 12.920199501246882, "grad_norm": 7.131579875946045, "learning_rate": 7.085785536159601e-06, "loss": 0.3245, "step": 51810 }, { "epoch": 12.922693266832917, "grad_norm": 10.366734504699707, "learning_rate": 7.083291770573567e-06, "loss": 0.3072, "step": 51820 }, { "epoch": 12.925187032418952, "grad_norm": 14.305132865905762, "learning_rate": 7.080798004987531e-06, "loss": 0.3282, "step": 51830 }, { "epoch": 12.927680798004987, "grad_norm": 5.115872859954834, "learning_rate": 7.0783042394014965e-06, "loss": 0.3539, "step": 51840 }, { "epoch": 12.930174563591022, "grad_norm": 6.633999347686768, "learning_rate": 7.075810473815462e-06, "loss": 0.4114, "step": 51850 }, { "epoch": 12.932668329177057, "grad_norm": 9.143322944641113, "learning_rate": 7.073316708229427e-06, "loss": 0.3024, "step": 51860 }, { "epoch": 12.935162094763092, "grad_norm": 8.794203758239746, "learning_rate": 7.0708229426433915e-06, "loss": 0.3265, "step": 51870 }, { "epoch": 12.937655860349127, "grad_norm": 8.29458999633789, "learning_rate": 7.068329177057357e-06, "loss": 0.3302, "step": 51880 }, { "epoch": 12.940149625935161, "grad_norm": 8.905904769897461, "learning_rate": 7.065835411471322e-06, "loss": 0.4036, "step": 51890 }, { "epoch": 12.942643391521196, "grad_norm": 11.505474090576172, "learning_rate": 7.0633416458852875e-06, "loss": 0.3459, "step": 51900 }, { "epoch": 12.945137157107231, "grad_norm": 7.7994561195373535, "learning_rate": 7.060847880299252e-06, "loss": 0.3477, "step": 51910 }, { "epoch": 12.947630922693268, "grad_norm": 8.445225715637207, "learning_rate": 7.058354114713217e-06, "loss": 0.3153, "step": 51920 }, { "epoch": 12.950124688279303, "grad_norm": 7.54745626449585, "learning_rate": 7.0558603491271825e-06, "loss": 0.2894, "step": 51930 }, { "epoch": 12.952618453865338, "grad_norm": 8.576315879821777, "learning_rate": 7.053366583541148e-06, "loss": 0.3849, "step": 51940 }, { "epoch": 12.955112219451372, "grad_norm": 11.285699844360352, "learning_rate": 7.050872817955112e-06, "loss": 0.3399, "step": 51950 }, { "epoch": 12.957605985037407, "grad_norm": 5.247829914093018, "learning_rate": 7.048379052369078e-06, "loss": 0.299, "step": 51960 }, { "epoch": 12.960099750623442, "grad_norm": 8.704336166381836, "learning_rate": 7.045885286783043e-06, "loss": 0.3415, "step": 51970 }, { "epoch": 12.962593516209477, "grad_norm": 9.179566383361816, "learning_rate": 7.043391521197008e-06, "loss": 0.3734, "step": 51980 }, { "epoch": 12.965087281795512, "grad_norm": 6.699599266052246, "learning_rate": 7.0408977556109736e-06, "loss": 0.3288, "step": 51990 }, { "epoch": 12.967581047381547, "grad_norm": 6.417098045349121, "learning_rate": 7.038403990024938e-06, "loss": 0.2907, "step": 52000 }, { "epoch": 12.970074812967582, "grad_norm": 7.011273384094238, "learning_rate": 7.035910224438903e-06, "loss": 0.3603, "step": 52010 }, { "epoch": 12.972568578553616, "grad_norm": 8.827155113220215, "learning_rate": 7.033416458852869e-06, "loss": 0.3839, "step": 52020 }, { "epoch": 12.975062344139651, "grad_norm": 7.616529941558838, "learning_rate": 7.030922693266834e-06, "loss": 0.3665, "step": 52030 }, { "epoch": 12.977556109725686, "grad_norm": 8.937703132629395, "learning_rate": 7.028428927680798e-06, "loss": 0.3125, "step": 52040 }, { "epoch": 12.980049875311721, "grad_norm": 9.48192310333252, "learning_rate": 7.025935162094764e-06, "loss": 0.445, "step": 52050 }, { "epoch": 12.982543640897756, "grad_norm": 8.205970764160156, "learning_rate": 7.023441396508729e-06, "loss": 0.3578, "step": 52060 }, { "epoch": 12.98503740648379, "grad_norm": 9.55613899230957, "learning_rate": 7.020947630922694e-06, "loss": 0.301, "step": 52070 }, { "epoch": 12.987531172069826, "grad_norm": 7.20795202255249, "learning_rate": 7.018453865336659e-06, "loss": 0.3893, "step": 52080 }, { "epoch": 12.99002493765586, "grad_norm": 12.070457458496094, "learning_rate": 7.015960099750624e-06, "loss": 0.3387, "step": 52090 }, { "epoch": 12.992518703241895, "grad_norm": 4.415650844573975, "learning_rate": 7.0134663341645894e-06, "loss": 0.3183, "step": 52100 }, { "epoch": 12.99501246882793, "grad_norm": 6.928340911865234, "learning_rate": 7.010972568578555e-06, "loss": 0.3319, "step": 52110 }, { "epoch": 12.997506234413965, "grad_norm": 6.329419136047363, "learning_rate": 7.008478802992519e-06, "loss": 0.3946, "step": 52120 }, { "epoch": 13.0, "grad_norm": 4.517069339752197, "learning_rate": 7.0059850374064845e-06, "loss": 0.3482, "step": 52130 }, { "epoch": 13.0, "eval_loss": 0.41498884558677673, "eval_runtime": 60.065, "eval_samples_per_second": 16.699, "eval_steps_per_second": 16.699, "step": 52130 }, { "epoch": 13.002493765586035, "grad_norm": 6.689090728759766, "learning_rate": 7.00349127182045e-06, "loss": 0.3395, "step": 52140 }, { "epoch": 13.00498753117207, "grad_norm": 7.670643329620361, "learning_rate": 7.000997506234415e-06, "loss": 0.3143, "step": 52150 }, { "epoch": 13.007481296758105, "grad_norm": 10.944851875305176, "learning_rate": 6.998503740648379e-06, "loss": 0.3196, "step": 52160 }, { "epoch": 13.00997506234414, "grad_norm": 5.979905128479004, "learning_rate": 6.996009975062344e-06, "loss": 0.3221, "step": 52170 }, { "epoch": 13.012468827930174, "grad_norm": 6.5112762451171875, "learning_rate": 6.99351620947631e-06, "loss": 0.2684, "step": 52180 }, { "epoch": 13.01496259351621, "grad_norm": 5.4960761070251465, "learning_rate": 6.9910224438902755e-06, "loss": 0.2942, "step": 52190 }, { "epoch": 13.017456359102244, "grad_norm": 6.578577995300293, "learning_rate": 6.988528678304239e-06, "loss": 0.3318, "step": 52200 }, { "epoch": 13.019950124688279, "grad_norm": 7.574843406677246, "learning_rate": 6.9860349127182044e-06, "loss": 0.3365, "step": 52210 }, { "epoch": 13.022443890274314, "grad_norm": 6.713075160980225, "learning_rate": 6.98354114713217e-06, "loss": 0.3522, "step": 52220 }, { "epoch": 13.024937655860349, "grad_norm": 10.647099494934082, "learning_rate": 6.981047381546135e-06, "loss": 0.2822, "step": 52230 }, { "epoch": 13.027431421446384, "grad_norm": 6.798612594604492, "learning_rate": 6.978553615960101e-06, "loss": 0.2812, "step": 52240 }, { "epoch": 13.029925187032418, "grad_norm": 6.139517784118652, "learning_rate": 6.976059850374065e-06, "loss": 0.3224, "step": 52250 }, { "epoch": 13.032418952618453, "grad_norm": 5.845915794372559, "learning_rate": 6.97356608478803e-06, "loss": 0.2987, "step": 52260 }, { "epoch": 13.034912718204488, "grad_norm": 6.590796947479248, "learning_rate": 6.9710723192019955e-06, "loss": 0.3018, "step": 52270 }, { "epoch": 13.037406483790523, "grad_norm": 6.91572904586792, "learning_rate": 6.968578553615961e-06, "loss": 0.3517, "step": 52280 }, { "epoch": 13.039900249376558, "grad_norm": 5.153928756713867, "learning_rate": 6.966084788029925e-06, "loss": 0.2843, "step": 52290 }, { "epoch": 13.042394014962593, "grad_norm": 8.661881446838379, "learning_rate": 6.9635910224438905e-06, "loss": 0.278, "step": 52300 }, { "epoch": 13.044887780548628, "grad_norm": 6.513763904571533, "learning_rate": 6.961097256857856e-06, "loss": 0.3141, "step": 52310 }, { "epoch": 13.047381546134662, "grad_norm": 7.926911354064941, "learning_rate": 6.958603491271821e-06, "loss": 0.3725, "step": 52320 }, { "epoch": 13.049875311720697, "grad_norm": 10.43213176727295, "learning_rate": 6.956109725685786e-06, "loss": 0.3259, "step": 52330 }, { "epoch": 13.052369077306734, "grad_norm": 5.205043792724609, "learning_rate": 6.953615960099751e-06, "loss": 0.3007, "step": 52340 }, { "epoch": 13.054862842892769, "grad_norm": 6.522244930267334, "learning_rate": 6.951122194513716e-06, "loss": 0.2957, "step": 52350 }, { "epoch": 13.057356608478804, "grad_norm": 6.536242961883545, "learning_rate": 6.9486284289276815e-06, "loss": 0.3116, "step": 52360 }, { "epoch": 13.059850374064839, "grad_norm": 8.81574535369873, "learning_rate": 6.946134663341646e-06, "loss": 0.3632, "step": 52370 }, { "epoch": 13.062344139650873, "grad_norm": 6.305959701538086, "learning_rate": 6.943640897755611e-06, "loss": 0.3337, "step": 52380 }, { "epoch": 13.064837905236908, "grad_norm": 12.51152515411377, "learning_rate": 6.941147132169577e-06, "loss": 0.3609, "step": 52390 }, { "epoch": 13.067331670822943, "grad_norm": 8.995133399963379, "learning_rate": 6.938653366583542e-06, "loss": 0.3537, "step": 52400 }, { "epoch": 13.069825436408978, "grad_norm": 10.008258819580078, "learning_rate": 6.936159600997506e-06, "loss": 0.3284, "step": 52410 }, { "epoch": 13.072319201995013, "grad_norm": 7.722633361816406, "learning_rate": 6.933665835411472e-06, "loss": 0.3823, "step": 52420 }, { "epoch": 13.074812967581048, "grad_norm": 8.553972244262695, "learning_rate": 6.931172069825437e-06, "loss": 0.3226, "step": 52430 }, { "epoch": 13.077306733167083, "grad_norm": 6.591207027435303, "learning_rate": 6.928678304239402e-06, "loss": 0.3421, "step": 52440 }, { "epoch": 13.079800498753118, "grad_norm": 5.500464916229248, "learning_rate": 6.926184538653367e-06, "loss": 0.2842, "step": 52450 }, { "epoch": 13.082294264339152, "grad_norm": 8.214122772216797, "learning_rate": 6.923690773067332e-06, "loss": 0.2846, "step": 52460 }, { "epoch": 13.084788029925187, "grad_norm": 7.543227195739746, "learning_rate": 6.921197007481297e-06, "loss": 0.3326, "step": 52470 }, { "epoch": 13.087281795511222, "grad_norm": 8.072737693786621, "learning_rate": 6.918703241895263e-06, "loss": 0.3319, "step": 52480 }, { "epoch": 13.089775561097257, "grad_norm": 8.425067901611328, "learning_rate": 6.916209476309228e-06, "loss": 0.303, "step": 52490 }, { "epoch": 13.092269326683292, "grad_norm": 8.168347358703613, "learning_rate": 6.9137157107231925e-06, "loss": 0.3316, "step": 52500 }, { "epoch": 13.094763092269327, "grad_norm": 7.062584400177002, "learning_rate": 6.911221945137158e-06, "loss": 0.2915, "step": 52510 }, { "epoch": 13.097256857855362, "grad_norm": 8.098386764526367, "learning_rate": 6.908728179551123e-06, "loss": 0.3826, "step": 52520 }, { "epoch": 13.099750623441397, "grad_norm": 6.54594087600708, "learning_rate": 6.9062344139650884e-06, "loss": 0.3046, "step": 52530 }, { "epoch": 13.102244389027431, "grad_norm": 8.44714069366455, "learning_rate": 6.903740648379053e-06, "loss": 0.296, "step": 52540 }, { "epoch": 13.104738154613466, "grad_norm": 7.943593502044678, "learning_rate": 6.901246882793018e-06, "loss": 0.3459, "step": 52550 }, { "epoch": 13.107231920199501, "grad_norm": 5.09817361831665, "learning_rate": 6.8987531172069835e-06, "loss": 0.2701, "step": 52560 }, { "epoch": 13.109725685785536, "grad_norm": 10.518460273742676, "learning_rate": 6.896259351620949e-06, "loss": 0.3679, "step": 52570 }, { "epoch": 13.11221945137157, "grad_norm": 7.73917293548584, "learning_rate": 6.893765586034913e-06, "loss": 0.2963, "step": 52580 }, { "epoch": 13.114713216957606, "grad_norm": 8.970417976379395, "learning_rate": 6.891271820448879e-06, "loss": 0.316, "step": 52590 }, { "epoch": 13.11720698254364, "grad_norm": 9.928048133850098, "learning_rate": 6.888778054862844e-06, "loss": 0.3444, "step": 52600 }, { "epoch": 13.119700748129675, "grad_norm": 12.679057121276855, "learning_rate": 6.886284289276809e-06, "loss": 0.3914, "step": 52610 }, { "epoch": 13.12219451371571, "grad_norm": 8.21277904510498, "learning_rate": 6.883790523690773e-06, "loss": 0.293, "step": 52620 }, { "epoch": 13.124688279301745, "grad_norm": 8.76871109008789, "learning_rate": 6.881296758104738e-06, "loss": 0.3419, "step": 52630 }, { "epoch": 13.12718204488778, "grad_norm": 7.2397236824035645, "learning_rate": 6.878802992518704e-06, "loss": 0.3769, "step": 52640 }, { "epoch": 13.129675810473815, "grad_norm": 7.731866836547852, "learning_rate": 6.87630922693267e-06, "loss": 0.3589, "step": 52650 }, { "epoch": 13.13216957605985, "grad_norm": 8.515292167663574, "learning_rate": 6.873815461346633e-06, "loss": 0.3328, "step": 52660 }, { "epoch": 13.134663341645885, "grad_norm": 5.041269779205322, "learning_rate": 6.8713216957605985e-06, "loss": 0.3182, "step": 52670 }, { "epoch": 13.13715710723192, "grad_norm": 7.612249851226807, "learning_rate": 6.868827930174564e-06, "loss": 0.3596, "step": 52680 }, { "epoch": 13.139650872817954, "grad_norm": 8.448023796081543, "learning_rate": 6.866334164588529e-06, "loss": 0.3286, "step": 52690 }, { "epoch": 13.14214463840399, "grad_norm": 8.699821472167969, "learning_rate": 6.863840399002495e-06, "loss": 0.3803, "step": 52700 }, { "epoch": 13.144638403990024, "grad_norm": 9.197280883789062, "learning_rate": 6.861346633416459e-06, "loss": 0.3577, "step": 52710 }, { "epoch": 13.147132169576059, "grad_norm": 5.9945597648620605, "learning_rate": 6.858852867830424e-06, "loss": 0.3208, "step": 52720 }, { "epoch": 13.149625935162096, "grad_norm": 7.514776229858398, "learning_rate": 6.8563591022443895e-06, "loss": 0.3625, "step": 52730 }, { "epoch": 13.15211970074813, "grad_norm": 17.048492431640625, "learning_rate": 6.853865336658355e-06, "loss": 0.3416, "step": 52740 }, { "epoch": 13.154613466334165, "grad_norm": 9.108981132507324, "learning_rate": 6.851371571072319e-06, "loss": 0.3295, "step": 52750 }, { "epoch": 13.1571072319202, "grad_norm": 4.699746131896973, "learning_rate": 6.848877805486285e-06, "loss": 0.3258, "step": 52760 }, { "epoch": 13.159600997506235, "grad_norm": 9.46227741241455, "learning_rate": 6.84638403990025e-06, "loss": 0.3021, "step": 52770 }, { "epoch": 13.16209476309227, "grad_norm": 10.8130521774292, "learning_rate": 6.843890274314215e-06, "loss": 0.3349, "step": 52780 }, { "epoch": 13.164588528678305, "grad_norm": 6.772179126739502, "learning_rate": 6.84139650872818e-06, "loss": 0.3241, "step": 52790 }, { "epoch": 13.16708229426434, "grad_norm": 8.732086181640625, "learning_rate": 6.838902743142145e-06, "loss": 0.2967, "step": 52800 }, { "epoch": 13.169576059850375, "grad_norm": 8.218696594238281, "learning_rate": 6.83640897755611e-06, "loss": 0.3601, "step": 52810 }, { "epoch": 13.17206982543641, "grad_norm": 11.002167701721191, "learning_rate": 6.833915211970076e-06, "loss": 0.3686, "step": 52820 }, { "epoch": 13.174563591022444, "grad_norm": 7.543792724609375, "learning_rate": 6.83142144638404e-06, "loss": 0.3467, "step": 52830 }, { "epoch": 13.17705735660848, "grad_norm": 7.169468402862549, "learning_rate": 6.828927680798005e-06, "loss": 0.3385, "step": 52840 }, { "epoch": 13.179551122194514, "grad_norm": 11.047392845153809, "learning_rate": 6.826433915211971e-06, "loss": 0.3213, "step": 52850 }, { "epoch": 13.182044887780549, "grad_norm": 7.974063396453857, "learning_rate": 6.823940149625936e-06, "loss": 0.3075, "step": 52860 }, { "epoch": 13.184538653366584, "grad_norm": 9.911652565002441, "learning_rate": 6.8214463840399005e-06, "loss": 0.2636, "step": 52870 }, { "epoch": 13.187032418952619, "grad_norm": 5.923473834991455, "learning_rate": 6.818952618453866e-06, "loss": 0.3013, "step": 52880 }, { "epoch": 13.189526184538654, "grad_norm": 7.126490592956543, "learning_rate": 6.816458852867831e-06, "loss": 0.2964, "step": 52890 }, { "epoch": 13.192019950124688, "grad_norm": 6.427547931671143, "learning_rate": 6.813965087281796e-06, "loss": 0.3058, "step": 52900 }, { "epoch": 13.194513715710723, "grad_norm": 6.297787189483643, "learning_rate": 6.811471321695761e-06, "loss": 0.2955, "step": 52910 }, { "epoch": 13.197007481296758, "grad_norm": 7.9449005126953125, "learning_rate": 6.808977556109726e-06, "loss": 0.3024, "step": 52920 }, { "epoch": 13.199501246882793, "grad_norm": 10.155074119567871, "learning_rate": 6.8064837905236915e-06, "loss": 0.3011, "step": 52930 }, { "epoch": 13.201995012468828, "grad_norm": 8.037057876586914, "learning_rate": 6.803990024937657e-06, "loss": 0.3507, "step": 52940 }, { "epoch": 13.204488778054863, "grad_norm": 8.216534614562988, "learning_rate": 6.801496259351622e-06, "loss": 0.3815, "step": 52950 }, { "epoch": 13.206982543640898, "grad_norm": 8.674677848815918, "learning_rate": 6.799002493765587e-06, "loss": 0.3702, "step": 52960 }, { "epoch": 13.209476309226932, "grad_norm": 6.525129795074463, "learning_rate": 6.796508728179552e-06, "loss": 0.2832, "step": 52970 }, { "epoch": 13.211970074812967, "grad_norm": 20.87378692626953, "learning_rate": 6.794014962593517e-06, "loss": 0.4099, "step": 52980 }, { "epoch": 13.214463840399002, "grad_norm": 6.119201183319092, "learning_rate": 6.7915211970074825e-06, "loss": 0.3321, "step": 52990 }, { "epoch": 13.216957605985037, "grad_norm": 9.405641555786133, "learning_rate": 6.789027431421447e-06, "loss": 0.3069, "step": 53000 }, { "epoch": 13.219451371571072, "grad_norm": 9.708418846130371, "learning_rate": 6.786533665835412e-06, "loss": 0.3953, "step": 53010 }, { "epoch": 13.221945137157107, "grad_norm": 6.832964897155762, "learning_rate": 6.784039900249378e-06, "loss": 0.3541, "step": 53020 }, { "epoch": 13.224438902743142, "grad_norm": 15.42807674407959, "learning_rate": 6.781546134663343e-06, "loss": 0.4261, "step": 53030 }, { "epoch": 13.226932668329177, "grad_norm": 17.721216201782227, "learning_rate": 6.7790523690773065e-06, "loss": 0.3629, "step": 53040 }, { "epoch": 13.229426433915211, "grad_norm": 10.285332679748535, "learning_rate": 6.776558603491273e-06, "loss": 0.329, "step": 53050 }, { "epoch": 13.231920199501246, "grad_norm": 4.8609113693237305, "learning_rate": 6.774064837905238e-06, "loss": 0.3147, "step": 53060 }, { "epoch": 13.234413965087281, "grad_norm": 8.452110290527344, "learning_rate": 6.771571072319203e-06, "loss": 0.3458, "step": 53070 }, { "epoch": 13.236907730673316, "grad_norm": 6.897256851196289, "learning_rate": 6.769077306733167e-06, "loss": 0.3171, "step": 53080 }, { "epoch": 13.239401496259351, "grad_norm": 8.701833724975586, "learning_rate": 6.766583541147132e-06, "loss": 0.3182, "step": 53090 }, { "epoch": 13.241895261845386, "grad_norm": 6.701672554016113, "learning_rate": 6.7640897755610975e-06, "loss": 0.4088, "step": 53100 }, { "epoch": 13.24438902743142, "grad_norm": 9.619290351867676, "learning_rate": 6.761596009975064e-06, "loss": 0.354, "step": 53110 }, { "epoch": 13.246882793017456, "grad_norm": 8.733819007873535, "learning_rate": 6.759102244389027e-06, "loss": 0.3971, "step": 53120 }, { "epoch": 13.24937655860349, "grad_norm": 7.536902904510498, "learning_rate": 6.756608478802993e-06, "loss": 0.382, "step": 53130 }, { "epoch": 13.251870324189527, "grad_norm": 8.806652069091797, "learning_rate": 6.754114713216958e-06, "loss": 0.359, "step": 53140 }, { "epoch": 13.254364089775562, "grad_norm": 8.000337600708008, "learning_rate": 6.751620947630923e-06, "loss": 0.3648, "step": 53150 }, { "epoch": 13.256857855361597, "grad_norm": 10.321428298950195, "learning_rate": 6.749127182044888e-06, "loss": 0.3462, "step": 53160 }, { "epoch": 13.259351620947632, "grad_norm": 11.810160636901855, "learning_rate": 6.746633416458853e-06, "loss": 0.3129, "step": 53170 }, { "epoch": 13.261845386533667, "grad_norm": 8.895573616027832, "learning_rate": 6.744139650872818e-06, "loss": 0.2583, "step": 53180 }, { "epoch": 13.264339152119701, "grad_norm": 8.861416816711426, "learning_rate": 6.741645885286784e-06, "loss": 0.3389, "step": 53190 }, { "epoch": 13.266832917705736, "grad_norm": 7.294126510620117, "learning_rate": 6.739152119700749e-06, "loss": 0.3496, "step": 53200 }, { "epoch": 13.269326683291771, "grad_norm": 7.825159072875977, "learning_rate": 6.736658354114713e-06, "loss": 0.321, "step": 53210 }, { "epoch": 13.271820448877806, "grad_norm": 7.070465564727783, "learning_rate": 6.734164588528679e-06, "loss": 0.2964, "step": 53220 }, { "epoch": 13.27431421446384, "grad_norm": 11.65014934539795, "learning_rate": 6.731670822942644e-06, "loss": 0.2895, "step": 53230 }, { "epoch": 13.276807980049876, "grad_norm": 4.547674655914307, "learning_rate": 6.729177057356609e-06, "loss": 0.3454, "step": 53240 }, { "epoch": 13.27930174563591, "grad_norm": 6.833098411560059, "learning_rate": 6.726683291770574e-06, "loss": 0.3121, "step": 53250 }, { "epoch": 13.281795511221945, "grad_norm": 11.146747589111328, "learning_rate": 6.724189526184539e-06, "loss": 0.36, "step": 53260 }, { "epoch": 13.28428927680798, "grad_norm": 9.843538284301758, "learning_rate": 6.721695760598504e-06, "loss": 0.3221, "step": 53270 }, { "epoch": 13.286783042394015, "grad_norm": 8.288082122802734, "learning_rate": 6.71920199501247e-06, "loss": 0.3221, "step": 53280 }, { "epoch": 13.28927680798005, "grad_norm": 9.893383979797363, "learning_rate": 6.716708229426434e-06, "loss": 0.2934, "step": 53290 }, { "epoch": 13.291770573566085, "grad_norm": 5.806131362915039, "learning_rate": 6.7142144638403995e-06, "loss": 0.401, "step": 53300 }, { "epoch": 13.29426433915212, "grad_norm": 9.158594131469727, "learning_rate": 6.711720698254365e-06, "loss": 0.2943, "step": 53310 }, { "epoch": 13.296758104738155, "grad_norm": 6.4060139656066895, "learning_rate": 6.70922693266833e-06, "loss": 0.3538, "step": 53320 }, { "epoch": 13.29925187032419, "grad_norm": 10.887904167175293, "learning_rate": 6.7067331670822946e-06, "loss": 0.3591, "step": 53330 }, { "epoch": 13.301745635910224, "grad_norm": 6.867051124572754, "learning_rate": 6.70423940149626e-06, "loss": 0.2973, "step": 53340 }, { "epoch": 13.30423940149626, "grad_norm": 6.542517185211182, "learning_rate": 6.701745635910225e-06, "loss": 0.2912, "step": 53350 }, { "epoch": 13.306733167082294, "grad_norm": 7.9812517166137695, "learning_rate": 6.6992518703241905e-06, "loss": 0.29, "step": 53360 }, { "epoch": 13.309226932668329, "grad_norm": 7.906765460968018, "learning_rate": 6.696758104738155e-06, "loss": 0.2706, "step": 53370 }, { "epoch": 13.311720698254364, "grad_norm": 8.329736709594727, "learning_rate": 6.69426433915212e-06, "loss": 0.3764, "step": 53380 }, { "epoch": 13.314214463840399, "grad_norm": 9.357162475585938, "learning_rate": 6.691770573566086e-06, "loss": 0.3357, "step": 53390 }, { "epoch": 13.316708229426434, "grad_norm": 7.998769760131836, "learning_rate": 6.689276807980051e-06, "loss": 0.3845, "step": 53400 }, { "epoch": 13.319201995012468, "grad_norm": 9.348040580749512, "learning_rate": 6.686783042394015e-06, "loss": 0.389, "step": 53410 }, { "epoch": 13.321695760598503, "grad_norm": 6.765722274780273, "learning_rate": 6.684289276807981e-06, "loss": 0.3126, "step": 53420 }, { "epoch": 13.324189526184538, "grad_norm": 7.536268711090088, "learning_rate": 6.681795511221946e-06, "loss": 0.3189, "step": 53430 }, { "epoch": 13.326683291770573, "grad_norm": 5.146828651428223, "learning_rate": 6.679301745635911e-06, "loss": 0.3335, "step": 53440 }, { "epoch": 13.329177057356608, "grad_norm": 7.732426166534424, "learning_rate": 6.676807980049877e-06, "loss": 0.3094, "step": 53450 }, { "epoch": 13.331670822942643, "grad_norm": 7.80210018157959, "learning_rate": 6.674314214463841e-06, "loss": 0.4274, "step": 53460 }, { "epoch": 13.334164588528678, "grad_norm": 11.900932312011719, "learning_rate": 6.671820448877806e-06, "loss": 0.3221, "step": 53470 }, { "epoch": 13.336658354114713, "grad_norm": 7.599810600280762, "learning_rate": 6.669326683291772e-06, "loss": 0.3771, "step": 53480 }, { "epoch": 13.339152119700747, "grad_norm": 6.467700958251953, "learning_rate": 6.666832917705737e-06, "loss": 0.32, "step": 53490 }, { "epoch": 13.341645885286782, "grad_norm": 8.103590965270996, "learning_rate": 6.664339152119701e-06, "loss": 0.3803, "step": 53500 }, { "epoch": 13.344139650872817, "grad_norm": 8.192358016967773, "learning_rate": 6.661845386533666e-06, "loss": 0.3562, "step": 53510 }, { "epoch": 13.346633416458852, "grad_norm": 6.621999263763428, "learning_rate": 6.659351620947632e-06, "loss": 0.306, "step": 53520 }, { "epoch": 13.349127182044889, "grad_norm": 10.717852592468262, "learning_rate": 6.656857855361597e-06, "loss": 0.3724, "step": 53530 }, { "epoch": 13.351620947630924, "grad_norm": 11.658101081848145, "learning_rate": 6.654364089775561e-06, "loss": 0.3673, "step": 53540 }, { "epoch": 13.354114713216958, "grad_norm": 8.711099624633789, "learning_rate": 6.651870324189526e-06, "loss": 0.3506, "step": 53550 }, { "epoch": 13.356608478802993, "grad_norm": 8.620141983032227, "learning_rate": 6.649376558603492e-06, "loss": 0.341, "step": 53560 }, { "epoch": 13.359102244389028, "grad_norm": 6.421422958374023, "learning_rate": 6.646882793017457e-06, "loss": 0.3318, "step": 53570 }, { "epoch": 13.361596009975063, "grad_norm": 7.4736127853393555, "learning_rate": 6.644389027431421e-06, "loss": 0.386, "step": 53580 }, { "epoch": 13.364089775561098, "grad_norm": 8.752002716064453, "learning_rate": 6.641895261845387e-06, "loss": 0.3384, "step": 53590 }, { "epoch": 13.366583541147133, "grad_norm": 4.710286617279053, "learning_rate": 6.639401496259352e-06, "loss": 0.3548, "step": 53600 }, { "epoch": 13.369077306733168, "grad_norm": 6.772241115570068, "learning_rate": 6.636907730673317e-06, "loss": 0.3097, "step": 53610 }, { "epoch": 13.371571072319203, "grad_norm": 9.710107803344727, "learning_rate": 6.634413965087282e-06, "loss": 0.3294, "step": 53620 }, { "epoch": 13.374064837905237, "grad_norm": 7.724490165710449, "learning_rate": 6.631920199501247e-06, "loss": 0.3282, "step": 53630 }, { "epoch": 13.376558603491272, "grad_norm": 11.170283317565918, "learning_rate": 6.629426433915212e-06, "loss": 0.4139, "step": 53640 }, { "epoch": 13.379052369077307, "grad_norm": 11.032581329345703, "learning_rate": 6.626932668329178e-06, "loss": 0.3775, "step": 53650 }, { "epoch": 13.381546134663342, "grad_norm": 7.674757480621338, "learning_rate": 6.624438902743142e-06, "loss": 0.2728, "step": 53660 }, { "epoch": 13.384039900249377, "grad_norm": 8.27985954284668, "learning_rate": 6.6219451371571075e-06, "loss": 0.3075, "step": 53670 }, { "epoch": 13.386533665835412, "grad_norm": 9.89207935333252, "learning_rate": 6.619451371571073e-06, "loss": 0.3486, "step": 53680 }, { "epoch": 13.389027431421447, "grad_norm": 7.001489162445068, "learning_rate": 6.616957605985038e-06, "loss": 0.3218, "step": 53690 }, { "epoch": 13.391521197007481, "grad_norm": 8.43419361114502, "learning_rate": 6.614463840399003e-06, "loss": 0.3296, "step": 53700 }, { "epoch": 13.394014962593516, "grad_norm": 9.101119995117188, "learning_rate": 6.611970074812968e-06, "loss": 0.4188, "step": 53710 }, { "epoch": 13.396508728179551, "grad_norm": 7.946018218994141, "learning_rate": 6.609476309226933e-06, "loss": 0.3638, "step": 53720 }, { "epoch": 13.399002493765586, "grad_norm": 4.495571136474609, "learning_rate": 6.6069825436408985e-06, "loss": 0.2811, "step": 53730 }, { "epoch": 13.401496259351621, "grad_norm": 9.637377738952637, "learning_rate": 6.604488778054864e-06, "loss": 0.3226, "step": 53740 }, { "epoch": 13.403990024937656, "grad_norm": 12.018438339233398, "learning_rate": 6.601995012468828e-06, "loss": 0.3199, "step": 53750 }, { "epoch": 13.40648379052369, "grad_norm": 8.298521995544434, "learning_rate": 6.5995012468827936e-06, "loss": 0.3609, "step": 53760 }, { "epoch": 13.408977556109726, "grad_norm": 8.563523292541504, "learning_rate": 6.597007481296759e-06, "loss": 0.319, "step": 53770 }, { "epoch": 13.41147132169576, "grad_norm": 6.873093128204346, "learning_rate": 6.594513715710724e-06, "loss": 0.3697, "step": 53780 }, { "epoch": 13.413965087281795, "grad_norm": 6.433836936950684, "learning_rate": 6.592019950124689e-06, "loss": 0.3516, "step": 53790 }, { "epoch": 13.41645885286783, "grad_norm": 7.9647908210754395, "learning_rate": 6.589526184538654e-06, "loss": 0.3636, "step": 53800 }, { "epoch": 13.418952618453865, "grad_norm": 8.467369079589844, "learning_rate": 6.587032418952619e-06, "loss": 0.3561, "step": 53810 }, { "epoch": 13.4214463840399, "grad_norm": 5.356767654418945, "learning_rate": 6.584538653366585e-06, "loss": 0.3067, "step": 53820 }, { "epoch": 13.423940149625935, "grad_norm": 8.168974876403809, "learning_rate": 6.582044887780549e-06, "loss": 0.3501, "step": 53830 }, { "epoch": 13.42643391521197, "grad_norm": 7.033174514770508, "learning_rate": 6.579551122194514e-06, "loss": 0.3376, "step": 53840 }, { "epoch": 13.428927680798004, "grad_norm": 9.300447463989258, "learning_rate": 6.57705735660848e-06, "loss": 0.3486, "step": 53850 }, { "epoch": 13.43142144638404, "grad_norm": 7.828090667724609, "learning_rate": 6.574563591022445e-06, "loss": 0.3739, "step": 53860 }, { "epoch": 13.433915211970074, "grad_norm": 8.298700332641602, "learning_rate": 6.5720698254364094e-06, "loss": 0.3279, "step": 53870 }, { "epoch": 13.436408977556109, "grad_norm": 6.854353427886963, "learning_rate": 6.569576059850375e-06, "loss": 0.2454, "step": 53880 }, { "epoch": 13.438902743142144, "grad_norm": 10.509041786193848, "learning_rate": 6.56708229426434e-06, "loss": 0.4803, "step": 53890 }, { "epoch": 13.441396508728179, "grad_norm": 9.381756782531738, "learning_rate": 6.564588528678305e-06, "loss": 0.3879, "step": 53900 }, { "epoch": 13.443890274314214, "grad_norm": 7.465641498565674, "learning_rate": 6.562094763092269e-06, "loss": 0.3324, "step": 53910 }, { "epoch": 13.446384039900249, "grad_norm": 9.815617561340332, "learning_rate": 6.559600997506234e-06, "loss": 0.3524, "step": 53920 }, { "epoch": 13.448877805486283, "grad_norm": 7.948529243469238, "learning_rate": 6.5571072319202004e-06, "loss": 0.4092, "step": 53930 }, { "epoch": 13.451371571072318, "grad_norm": 6.234860897064209, "learning_rate": 6.554613466334166e-06, "loss": 0.2804, "step": 53940 }, { "epoch": 13.453865336658355, "grad_norm": 8.012157440185547, "learning_rate": 6.552119700748131e-06, "loss": 0.2756, "step": 53950 }, { "epoch": 13.45635910224439, "grad_norm": 9.687308311462402, "learning_rate": 6.549625935162095e-06, "loss": 0.3588, "step": 53960 }, { "epoch": 13.458852867830425, "grad_norm": 9.27532958984375, "learning_rate": 6.54713216957606e-06, "loss": 0.3675, "step": 53970 }, { "epoch": 13.46134663341646, "grad_norm": 6.1434712409973145, "learning_rate": 6.544638403990025e-06, "loss": 0.3445, "step": 53980 }, { "epoch": 13.463840399002494, "grad_norm": 8.947775840759277, "learning_rate": 6.5421446384039915e-06, "loss": 0.334, "step": 53990 }, { "epoch": 13.46633416458853, "grad_norm": 9.55975341796875, "learning_rate": 6.539650872817955e-06, "loss": 0.4319, "step": 54000 }, { "epoch": 13.468827930174564, "grad_norm": 7.890659332275391, "learning_rate": 6.53715710723192e-06, "loss": 0.3558, "step": 54010 }, { "epoch": 13.471321695760599, "grad_norm": 9.459158897399902, "learning_rate": 6.534663341645886e-06, "loss": 0.347, "step": 54020 }, { "epoch": 13.473815461346634, "grad_norm": 10.759072303771973, "learning_rate": 6.532169576059851e-06, "loss": 0.336, "step": 54030 }, { "epoch": 13.476309226932669, "grad_norm": 5.313018798828125, "learning_rate": 6.5296758104738155e-06, "loss": 0.2731, "step": 54040 }, { "epoch": 13.478802992518704, "grad_norm": 6.71405029296875, "learning_rate": 6.527182044887781e-06, "loss": 0.3615, "step": 54050 }, { "epoch": 13.481296758104738, "grad_norm": 10.912115097045898, "learning_rate": 6.524688279301746e-06, "loss": 0.3431, "step": 54060 }, { "epoch": 13.483790523690773, "grad_norm": 7.443094730377197, "learning_rate": 6.522194513715711e-06, "loss": 0.3877, "step": 54070 }, { "epoch": 13.486284289276808, "grad_norm": 6.929405689239502, "learning_rate": 6.519700748129676e-06, "loss": 0.2966, "step": 54080 }, { "epoch": 13.488778054862843, "grad_norm": 6.779537200927734, "learning_rate": 6.517206982543641e-06, "loss": 0.2756, "step": 54090 }, { "epoch": 13.491271820448878, "grad_norm": 13.575571060180664, "learning_rate": 6.5147132169576065e-06, "loss": 0.3851, "step": 54100 }, { "epoch": 13.493765586034913, "grad_norm": 6.581482410430908, "learning_rate": 6.512219451371572e-06, "loss": 0.3736, "step": 54110 }, { "epoch": 13.496259351620948, "grad_norm": 9.323427200317383, "learning_rate": 6.509725685785536e-06, "loss": 0.4017, "step": 54120 }, { "epoch": 13.498753117206983, "grad_norm": 6.501451015472412, "learning_rate": 6.5072319201995016e-06, "loss": 0.363, "step": 54130 }, { "epoch": 13.501246882793017, "grad_norm": 7.850208282470703, "learning_rate": 6.504738154613467e-06, "loss": 0.351, "step": 54140 }, { "epoch": 13.503740648379052, "grad_norm": 7.5729780197143555, "learning_rate": 6.502244389027432e-06, "loss": 0.2832, "step": 54150 }, { "epoch": 13.506234413965087, "grad_norm": 5.65486478805542, "learning_rate": 6.499750623441397e-06, "loss": 0.3237, "step": 54160 }, { "epoch": 13.508728179551122, "grad_norm": 10.24453067779541, "learning_rate": 6.497256857855362e-06, "loss": 0.3901, "step": 54170 }, { "epoch": 13.511221945137157, "grad_norm": 7.5691962242126465, "learning_rate": 6.494763092269327e-06, "loss": 0.3493, "step": 54180 }, { "epoch": 13.513715710723192, "grad_norm": 8.992122650146484, "learning_rate": 6.4922693266832926e-06, "loss": 0.3426, "step": 54190 }, { "epoch": 13.516209476309227, "grad_norm": 6.614286422729492, "learning_rate": 6.489775561097258e-06, "loss": 0.3462, "step": 54200 }, { "epoch": 13.518703241895262, "grad_norm": 8.444540977478027, "learning_rate": 6.487281795511222e-06, "loss": 0.2959, "step": 54210 }, { "epoch": 13.521197007481296, "grad_norm": 4.639540672302246, "learning_rate": 6.484788029925188e-06, "loss": 0.3116, "step": 54220 }, { "epoch": 13.523690773067331, "grad_norm": 5.426086902618408, "learning_rate": 6.482294264339153e-06, "loss": 0.3411, "step": 54230 }, { "epoch": 13.526184538653366, "grad_norm": 12.386012077331543, "learning_rate": 6.479800498753118e-06, "loss": 0.3102, "step": 54240 }, { "epoch": 13.528678304239401, "grad_norm": 6.084399700164795, "learning_rate": 6.477306733167083e-06, "loss": 0.3842, "step": 54250 }, { "epoch": 13.531172069825436, "grad_norm": 7.526169300079346, "learning_rate": 6.474812967581048e-06, "loss": 0.3568, "step": 54260 }, { "epoch": 13.53366583541147, "grad_norm": 7.161496639251709, "learning_rate": 6.472319201995013e-06, "loss": 0.3225, "step": 54270 }, { "epoch": 13.536159600997506, "grad_norm": 5.644833087921143, "learning_rate": 6.469825436408979e-06, "loss": 0.293, "step": 54280 }, { "epoch": 13.53865336658354, "grad_norm": 11.303448677062988, "learning_rate": 6.467331670822943e-06, "loss": 0.3523, "step": 54290 }, { "epoch": 13.541147132169575, "grad_norm": 9.144119262695312, "learning_rate": 6.4648379052369084e-06, "loss": 0.3661, "step": 54300 }, { "epoch": 13.54364089775561, "grad_norm": 8.02839469909668, "learning_rate": 6.462344139650874e-06, "loss": 0.3792, "step": 54310 }, { "epoch": 13.546134663341645, "grad_norm": 9.855666160583496, "learning_rate": 6.459850374064839e-06, "loss": 0.3324, "step": 54320 }, { "epoch": 13.548628428927682, "grad_norm": 8.013253211975098, "learning_rate": 6.457356608478803e-06, "loss": 0.3991, "step": 54330 }, { "epoch": 13.551122194513717, "grad_norm": 8.09378433227539, "learning_rate": 6.454862842892769e-06, "loss": 0.3573, "step": 54340 }, { "epoch": 13.553615960099751, "grad_norm": 8.810187339782715, "learning_rate": 6.452369077306734e-06, "loss": 0.335, "step": 54350 }, { "epoch": 13.556109725685786, "grad_norm": 5.002645492553711, "learning_rate": 6.4498753117206994e-06, "loss": 0.2775, "step": 54360 }, { "epoch": 13.558603491271821, "grad_norm": 5.791070938110352, "learning_rate": 6.447381546134663e-06, "loss": 0.2979, "step": 54370 }, { "epoch": 13.561097256857856, "grad_norm": 11.853717803955078, "learning_rate": 6.444887780548628e-06, "loss": 0.3168, "step": 54380 }, { "epoch": 13.563591022443891, "grad_norm": 7.837883949279785, "learning_rate": 6.442394014962594e-06, "loss": 0.4237, "step": 54390 }, { "epoch": 13.566084788029926, "grad_norm": 8.088776588439941, "learning_rate": 6.43990024937656e-06, "loss": 0.29, "step": 54400 }, { "epoch": 13.56857855361596, "grad_norm": 8.3961763381958, "learning_rate": 6.4374064837905235e-06, "loss": 0.3884, "step": 54410 }, { "epoch": 13.571072319201996, "grad_norm": 8.64345645904541, "learning_rate": 6.434912718204489e-06, "loss": 0.335, "step": 54420 }, { "epoch": 13.57356608478803, "grad_norm": 5.859470844268799, "learning_rate": 6.432418952618454e-06, "loss": 0.3534, "step": 54430 }, { "epoch": 13.576059850374065, "grad_norm": 3.3717424869537354, "learning_rate": 6.429925187032419e-06, "loss": 0.2729, "step": 54440 }, { "epoch": 13.5785536159601, "grad_norm": 6.788022518157959, "learning_rate": 6.427431421446385e-06, "loss": 0.311, "step": 54450 }, { "epoch": 13.581047381546135, "grad_norm": 5.033174514770508, "learning_rate": 6.424937655860349e-06, "loss": 0.3251, "step": 54460 }, { "epoch": 13.58354114713217, "grad_norm": 8.036091804504395, "learning_rate": 6.4224438902743145e-06, "loss": 0.4072, "step": 54470 }, { "epoch": 13.586034912718205, "grad_norm": 8.163284301757812, "learning_rate": 6.41995012468828e-06, "loss": 0.318, "step": 54480 }, { "epoch": 13.58852867830424, "grad_norm": 6.626342296600342, "learning_rate": 6.417456359102245e-06, "loss": 0.3064, "step": 54490 }, { "epoch": 13.591022443890274, "grad_norm": 7.7200469970703125, "learning_rate": 6.4149625935162095e-06, "loss": 0.3844, "step": 54500 }, { "epoch": 13.59351620947631, "grad_norm": 8.76436710357666, "learning_rate": 6.412468827930175e-06, "loss": 0.3757, "step": 54510 }, { "epoch": 13.596009975062344, "grad_norm": 9.245163917541504, "learning_rate": 6.40997506234414e-06, "loss": 0.3857, "step": 54520 }, { "epoch": 13.598503740648379, "grad_norm": 6.557892322540283, "learning_rate": 6.4074812967581055e-06, "loss": 0.3491, "step": 54530 }, { "epoch": 13.600997506234414, "grad_norm": 9.482400894165039, "learning_rate": 6.40498753117207e-06, "loss": 0.3553, "step": 54540 }, { "epoch": 13.603491271820449, "grad_norm": 8.862476348876953, "learning_rate": 6.402493765586035e-06, "loss": 0.334, "step": 54550 }, { "epoch": 13.605985037406484, "grad_norm": 6.397747039794922, "learning_rate": 6.4000000000000006e-06, "loss": 0.3245, "step": 54560 }, { "epoch": 13.608478802992519, "grad_norm": 6.339082717895508, "learning_rate": 6.397506234413966e-06, "loss": 0.3779, "step": 54570 }, { "epoch": 13.610972568578553, "grad_norm": 11.248029708862305, "learning_rate": 6.39501246882793e-06, "loss": 0.3455, "step": 54580 }, { "epoch": 13.613466334164588, "grad_norm": 6.5518341064453125, "learning_rate": 6.392518703241896e-06, "loss": 0.2906, "step": 54590 }, { "epoch": 13.615960099750623, "grad_norm": 6.682394981384277, "learning_rate": 6.390024937655861e-06, "loss": 0.314, "step": 54600 }, { "epoch": 13.618453865336658, "grad_norm": 8.466720581054688, "learning_rate": 6.387531172069826e-06, "loss": 0.4433, "step": 54610 }, { "epoch": 13.620947630922693, "grad_norm": 13.29272174835205, "learning_rate": 6.385037406483791e-06, "loss": 0.3942, "step": 54620 }, { "epoch": 13.623441396508728, "grad_norm": 6.229150295257568, "learning_rate": 6.382543640897756e-06, "loss": 0.3084, "step": 54630 }, { "epoch": 13.625935162094763, "grad_norm": 10.2169828414917, "learning_rate": 6.380049875311721e-06, "loss": 0.4638, "step": 54640 }, { "epoch": 13.628428927680797, "grad_norm": 8.496891021728516, "learning_rate": 6.377556109725687e-06, "loss": 0.3698, "step": 54650 }, { "epoch": 13.630922693266832, "grad_norm": 8.761347770690918, "learning_rate": 6.375062344139651e-06, "loss": 0.3581, "step": 54660 }, { "epoch": 13.633416458852867, "grad_norm": 7.339252948760986, "learning_rate": 6.372568578553616e-06, "loss": 0.3113, "step": 54670 }, { "epoch": 13.635910224438902, "grad_norm": 8.492396354675293, "learning_rate": 6.370074812967582e-06, "loss": 0.3409, "step": 54680 }, { "epoch": 13.638403990024937, "grad_norm": 11.395689964294434, "learning_rate": 6.367581047381547e-06, "loss": 0.3892, "step": 54690 }, { "epoch": 13.640897755610972, "grad_norm": 8.986297607421875, "learning_rate": 6.365087281795512e-06, "loss": 0.3154, "step": 54700 }, { "epoch": 13.643391521197007, "grad_norm": 9.709697723388672, "learning_rate": 6.362593516209477e-06, "loss": 0.3331, "step": 54710 }, { "epoch": 13.645885286783042, "grad_norm": 6.466365337371826, "learning_rate": 6.360099750623442e-06, "loss": 0.273, "step": 54720 }, { "epoch": 13.648379052369076, "grad_norm": 10.639359474182129, "learning_rate": 6.3576059850374074e-06, "loss": 0.3514, "step": 54730 }, { "epoch": 13.650872817955111, "grad_norm": 7.763825416564941, "learning_rate": 6.355112219451373e-06, "loss": 0.3726, "step": 54740 }, { "epoch": 13.653366583541148, "grad_norm": 6.4132585525512695, "learning_rate": 6.352618453865337e-06, "loss": 0.3819, "step": 54750 }, { "epoch": 13.655860349127183, "grad_norm": 9.225993156433105, "learning_rate": 6.3501246882793025e-06, "loss": 0.32, "step": 54760 }, { "epoch": 13.658354114713218, "grad_norm": 7.557957172393799, "learning_rate": 6.347630922693268e-06, "loss": 0.3545, "step": 54770 }, { "epoch": 13.660847880299253, "grad_norm": 6.421631813049316, "learning_rate": 6.345137157107233e-06, "loss": 0.2719, "step": 54780 }, { "epoch": 13.663341645885287, "grad_norm": 7.9836602210998535, "learning_rate": 6.342643391521197e-06, "loss": 0.3194, "step": 54790 }, { "epoch": 13.665835411471322, "grad_norm": 7.941399097442627, "learning_rate": 6.340149625935162e-06, "loss": 0.2969, "step": 54800 }, { "epoch": 13.668329177057357, "grad_norm": 6.198439121246338, "learning_rate": 6.337655860349128e-06, "loss": 0.3156, "step": 54810 }, { "epoch": 13.670822942643392, "grad_norm": 8.44516658782959, "learning_rate": 6.3351620947630935e-06, "loss": 0.3344, "step": 54820 }, { "epoch": 13.673316708229427, "grad_norm": 8.17326831817627, "learning_rate": 6.332668329177057e-06, "loss": 0.2907, "step": 54830 }, { "epoch": 13.675810473815462, "grad_norm": 8.938278198242188, "learning_rate": 6.3301745635910225e-06, "loss": 0.3873, "step": 54840 }, { "epoch": 13.678304239401497, "grad_norm": 7.996182918548584, "learning_rate": 6.327680798004988e-06, "loss": 0.3435, "step": 54850 }, { "epoch": 13.680798004987532, "grad_norm": 5.846700191497803, "learning_rate": 6.325187032418953e-06, "loss": 0.3513, "step": 54860 }, { "epoch": 13.683291770573566, "grad_norm": 5.51074743270874, "learning_rate": 6.3226932668329175e-06, "loss": 0.3079, "step": 54870 }, { "epoch": 13.685785536159601, "grad_norm": 6.8024492263793945, "learning_rate": 6.320199501246883e-06, "loss": 0.306, "step": 54880 }, { "epoch": 13.688279301745636, "grad_norm": 11.52742862701416, "learning_rate": 6.317705735660848e-06, "loss": 0.3317, "step": 54890 }, { "epoch": 13.690773067331671, "grad_norm": 14.795852661132812, "learning_rate": 6.3152119700748135e-06, "loss": 0.3375, "step": 54900 }, { "epoch": 13.693266832917706, "grad_norm": 12.156822204589844, "learning_rate": 6.312718204488778e-06, "loss": 0.3397, "step": 54910 }, { "epoch": 13.69576059850374, "grad_norm": 9.437907218933105, "learning_rate": 6.310224438902743e-06, "loss": 0.365, "step": 54920 }, { "epoch": 13.698254364089776, "grad_norm": 9.907096862792969, "learning_rate": 6.3077306733167085e-06, "loss": 0.3039, "step": 54930 }, { "epoch": 13.70074812967581, "grad_norm": 8.711502075195312, "learning_rate": 6.305236907730674e-06, "loss": 0.3617, "step": 54940 }, { "epoch": 13.703241895261845, "grad_norm": 7.6768622398376465, "learning_rate": 6.302743142144639e-06, "loss": 0.3392, "step": 54950 }, { "epoch": 13.70573566084788, "grad_norm": 5.637898921966553, "learning_rate": 6.300249376558604e-06, "loss": 0.3591, "step": 54960 }, { "epoch": 13.708229426433915, "grad_norm": 8.709186553955078, "learning_rate": 6.297755610972569e-06, "loss": 0.3944, "step": 54970 }, { "epoch": 13.71072319201995, "grad_norm": 9.957995414733887, "learning_rate": 6.295261845386534e-06, "loss": 0.2579, "step": 54980 }, { "epoch": 13.713216957605985, "grad_norm": 7.942922115325928, "learning_rate": 6.2927680798004996e-06, "loss": 0.3825, "step": 54990 }, { "epoch": 13.71571072319202, "grad_norm": 6.098124980926514, "learning_rate": 6.290274314214464e-06, "loss": 0.315, "step": 55000 }, { "epoch": 13.718204488778055, "grad_norm": 6.805457592010498, "learning_rate": 6.287780548628429e-06, "loss": 0.3479, "step": 55010 }, { "epoch": 13.72069825436409, "grad_norm": 7.8078765869140625, "learning_rate": 6.285286783042395e-06, "loss": 0.3516, "step": 55020 }, { "epoch": 13.723192019950124, "grad_norm": 8.491788864135742, "learning_rate": 6.28279301745636e-06, "loss": 0.3239, "step": 55030 }, { "epoch": 13.72568578553616, "grad_norm": 5.934401512145996, "learning_rate": 6.280299251870324e-06, "loss": 0.3531, "step": 55040 }, { "epoch": 13.728179551122194, "grad_norm": 6.76987886428833, "learning_rate": 6.27780548628429e-06, "loss": 0.37, "step": 55050 }, { "epoch": 13.730673316708229, "grad_norm": 9.285640716552734, "learning_rate": 6.275311720698255e-06, "loss": 0.3165, "step": 55060 }, { "epoch": 13.733167082294264, "grad_norm": 6.509362697601318, "learning_rate": 6.27281795511222e-06, "loss": 0.3212, "step": 55070 }, { "epoch": 13.735660847880299, "grad_norm": 9.682168006896973, "learning_rate": 6.270324189526185e-06, "loss": 0.3663, "step": 55080 }, { "epoch": 13.738154613466333, "grad_norm": 12.535362243652344, "learning_rate": 6.26783042394015e-06, "loss": 0.3563, "step": 55090 }, { "epoch": 13.740648379052368, "grad_norm": 7.953882217407227, "learning_rate": 6.2653366583541154e-06, "loss": 0.3366, "step": 55100 }, { "epoch": 13.743142144638403, "grad_norm": 9.631477355957031, "learning_rate": 6.262842892768081e-06, "loss": 0.322, "step": 55110 }, { "epoch": 13.745635910224438, "grad_norm": 7.036581516265869, "learning_rate": 6.260349127182045e-06, "loss": 0.3567, "step": 55120 }, { "epoch": 13.748129675810475, "grad_norm": 10.336029052734375, "learning_rate": 6.2578553615960105e-06, "loss": 0.3196, "step": 55130 }, { "epoch": 13.75062344139651, "grad_norm": 5.572134017944336, "learning_rate": 6.255361596009976e-06, "loss": 0.2873, "step": 55140 }, { "epoch": 13.753117206982544, "grad_norm": 7.690764427185059, "learning_rate": 6.252867830423941e-06, "loss": 0.3743, "step": 55150 }, { "epoch": 13.75561097256858, "grad_norm": 9.607911109924316, "learning_rate": 6.250374064837906e-06, "loss": 0.3554, "step": 55160 }, { "epoch": 13.758104738154614, "grad_norm": 7.156585216522217, "learning_rate": 6.247880299251871e-06, "loss": 0.3376, "step": 55170 }, { "epoch": 13.760598503740649, "grad_norm": 7.8262128829956055, "learning_rate": 6.245386533665836e-06, "loss": 0.3767, "step": 55180 }, { "epoch": 13.763092269326684, "grad_norm": 7.737610340118408, "learning_rate": 6.2428927680798015e-06, "loss": 0.3563, "step": 55190 }, { "epoch": 13.765586034912719, "grad_norm": 8.548540115356445, "learning_rate": 6.240399002493767e-06, "loss": 0.3358, "step": 55200 }, { "epoch": 13.768079800498754, "grad_norm": 5.896496295928955, "learning_rate": 6.2379052369077304e-06, "loss": 0.3586, "step": 55210 }, { "epoch": 13.770573566084789, "grad_norm": 6.512773036956787, "learning_rate": 6.235411471321697e-06, "loss": 0.3925, "step": 55220 }, { "epoch": 13.773067331670823, "grad_norm": 8.241216659545898, "learning_rate": 6.232917705735662e-06, "loss": 0.3697, "step": 55230 }, { "epoch": 13.775561097256858, "grad_norm": 7.041107654571533, "learning_rate": 6.230423940149627e-06, "loss": 0.3284, "step": 55240 }, { "epoch": 13.778054862842893, "grad_norm": 7.809773921966553, "learning_rate": 6.227930174563591e-06, "loss": 0.3691, "step": 55250 }, { "epoch": 13.780548628428928, "grad_norm": 6.446750640869141, "learning_rate": 6.225436408977556e-06, "loss": 0.3216, "step": 55260 }, { "epoch": 13.783042394014963, "grad_norm": 6.692153453826904, "learning_rate": 6.2229426433915215e-06, "loss": 0.3639, "step": 55270 }, { "epoch": 13.785536159600998, "grad_norm": 8.99974536895752, "learning_rate": 6.220448877805488e-06, "loss": 0.3577, "step": 55280 }, { "epoch": 13.788029925187033, "grad_norm": 8.162135124206543, "learning_rate": 6.217955112219451e-06, "loss": 0.3082, "step": 55290 }, { "epoch": 13.790523690773068, "grad_norm": 9.059871673583984, "learning_rate": 6.2154613466334165e-06, "loss": 0.3538, "step": 55300 }, { "epoch": 13.793017456359102, "grad_norm": 8.866132736206055, "learning_rate": 6.212967581047382e-06, "loss": 0.3224, "step": 55310 }, { "epoch": 13.795511221945137, "grad_norm": 6.690679550170898, "learning_rate": 6.210473815461347e-06, "loss": 0.3352, "step": 55320 }, { "epoch": 13.798004987531172, "grad_norm": 7.613649368286133, "learning_rate": 6.207980049875312e-06, "loss": 0.3311, "step": 55330 }, { "epoch": 13.800498753117207, "grad_norm": 5.005457878112793, "learning_rate": 6.205486284289277e-06, "loss": 0.3305, "step": 55340 }, { "epoch": 13.802992518703242, "grad_norm": 5.639405250549316, "learning_rate": 6.202992518703242e-06, "loss": 0.3504, "step": 55350 }, { "epoch": 13.805486284289277, "grad_norm": 4.40685510635376, "learning_rate": 6.2004987531172075e-06, "loss": 0.3078, "step": 55360 }, { "epoch": 13.807980049875312, "grad_norm": 6.308961868286133, "learning_rate": 6.198004987531172e-06, "loss": 0.3049, "step": 55370 }, { "epoch": 13.810473815461346, "grad_norm": 6.514129638671875, "learning_rate": 6.195511221945137e-06, "loss": 0.3479, "step": 55380 }, { "epoch": 13.812967581047381, "grad_norm": 6.240500450134277, "learning_rate": 6.193017456359103e-06, "loss": 0.3622, "step": 55390 }, { "epoch": 13.815461346633416, "grad_norm": 6.060596466064453, "learning_rate": 6.190523690773068e-06, "loss": 0.3123, "step": 55400 }, { "epoch": 13.817955112219451, "grad_norm": 8.058761596679688, "learning_rate": 6.188029925187032e-06, "loss": 0.3519, "step": 55410 }, { "epoch": 13.820448877805486, "grad_norm": 5.577200889587402, "learning_rate": 6.185536159600998e-06, "loss": 0.3323, "step": 55420 }, { "epoch": 13.82294264339152, "grad_norm": 8.922237396240234, "learning_rate": 6.183042394014963e-06, "loss": 0.3886, "step": 55430 }, { "epoch": 13.825436408977556, "grad_norm": 10.180780410766602, "learning_rate": 6.180548628428928e-06, "loss": 0.3128, "step": 55440 }, { "epoch": 13.82793017456359, "grad_norm": 6.981229305267334, "learning_rate": 6.178054862842894e-06, "loss": 0.2746, "step": 55450 }, { "epoch": 13.830423940149625, "grad_norm": 5.601588726043701, "learning_rate": 6.175561097256858e-06, "loss": 0.2593, "step": 55460 }, { "epoch": 13.83291770573566, "grad_norm": 9.501214027404785, "learning_rate": 6.173067331670823e-06, "loss": 0.3332, "step": 55470 }, { "epoch": 13.835411471321695, "grad_norm": 10.897354125976562, "learning_rate": 6.170573566084789e-06, "loss": 0.3535, "step": 55480 }, { "epoch": 13.83790523690773, "grad_norm": 9.307845115661621, "learning_rate": 6.168079800498754e-06, "loss": 0.3206, "step": 55490 }, { "epoch": 13.840399002493765, "grad_norm": 5.495110511779785, "learning_rate": 6.1655860349127185e-06, "loss": 0.3112, "step": 55500 }, { "epoch": 13.8428927680798, "grad_norm": 9.099871635437012, "learning_rate": 6.163092269326684e-06, "loss": 0.3681, "step": 55510 }, { "epoch": 13.845386533665835, "grad_norm": 7.402480602264404, "learning_rate": 6.160598503740649e-06, "loss": 0.3062, "step": 55520 }, { "epoch": 13.84788029925187, "grad_norm": 6.1998982429504395, "learning_rate": 6.1581047381546144e-06, "loss": 0.3698, "step": 55530 }, { "epoch": 13.850374064837904, "grad_norm": 5.2379021644592285, "learning_rate": 6.155610972568579e-06, "loss": 0.319, "step": 55540 }, { "epoch": 13.85286783042394, "grad_norm": 7.226074695587158, "learning_rate": 6.153117206982544e-06, "loss": 0.35, "step": 55550 }, { "epoch": 13.855361596009976, "grad_norm": 8.524081230163574, "learning_rate": 6.1506234413965095e-06, "loss": 0.3067, "step": 55560 }, { "epoch": 13.85785536159601, "grad_norm": 7.00763463973999, "learning_rate": 6.148129675810475e-06, "loss": 0.3146, "step": 55570 }, { "epoch": 13.860349127182046, "grad_norm": 10.624029159545898, "learning_rate": 6.145635910224439e-06, "loss": 0.3798, "step": 55580 }, { "epoch": 13.86284289276808, "grad_norm": 8.03925609588623, "learning_rate": 6.143142144638405e-06, "loss": 0.3862, "step": 55590 }, { "epoch": 13.865336658354115, "grad_norm": 12.083026885986328, "learning_rate": 6.14064837905237e-06, "loss": 0.3485, "step": 55600 }, { "epoch": 13.86783042394015, "grad_norm": 7.343206405639648, "learning_rate": 6.138154613466335e-06, "loss": 0.3835, "step": 55610 }, { "epoch": 13.870324189526185, "grad_norm": 11.571039199829102, "learning_rate": 6.1356608478803e-06, "loss": 0.3813, "step": 55620 }, { "epoch": 13.87281795511222, "grad_norm": 5.217142581939697, "learning_rate": 6.133167082294265e-06, "loss": 0.3064, "step": 55630 }, { "epoch": 13.875311720698255, "grad_norm": 7.893732070922852, "learning_rate": 6.13067331670823e-06, "loss": 0.3811, "step": 55640 }, { "epoch": 13.87780548628429, "grad_norm": 7.880045413970947, "learning_rate": 6.128179551122196e-06, "loss": 0.3338, "step": 55650 }, { "epoch": 13.880299251870325, "grad_norm": 10.907844543457031, "learning_rate": 6.125685785536159e-06, "loss": 0.4448, "step": 55660 }, { "epoch": 13.88279301745636, "grad_norm": 6.203114032745361, "learning_rate": 6.1231920199501245e-06, "loss": 0.3407, "step": 55670 }, { "epoch": 13.885286783042394, "grad_norm": 9.57861328125, "learning_rate": 6.120698254364091e-06, "loss": 0.3475, "step": 55680 }, { "epoch": 13.88778054862843, "grad_norm": 9.820565223693848, "learning_rate": 6.118204488778056e-06, "loss": 0.3151, "step": 55690 }, { "epoch": 13.890274314214464, "grad_norm": 7.9553046226501465, "learning_rate": 6.115710723192021e-06, "loss": 0.3343, "step": 55700 }, { "epoch": 13.892768079800499, "grad_norm": 7.718328952789307, "learning_rate": 6.113216957605985e-06, "loss": 0.3281, "step": 55710 }, { "epoch": 13.895261845386534, "grad_norm": 9.396270751953125, "learning_rate": 6.11072319201995e-06, "loss": 0.3393, "step": 55720 }, { "epoch": 13.897755610972569, "grad_norm": 6.914912223815918, "learning_rate": 6.1082294264339155e-06, "loss": 0.3104, "step": 55730 }, { "epoch": 13.900249376558603, "grad_norm": 7.916605472564697, "learning_rate": 6.105735660847882e-06, "loss": 0.3162, "step": 55740 }, { "epoch": 13.902743142144638, "grad_norm": 7.331538200378418, "learning_rate": 6.103241895261845e-06, "loss": 0.3076, "step": 55750 }, { "epoch": 13.905236907730673, "grad_norm": 6.395208835601807, "learning_rate": 6.100748129675811e-06, "loss": 0.2805, "step": 55760 }, { "epoch": 13.907730673316708, "grad_norm": 9.674216270446777, "learning_rate": 6.098254364089776e-06, "loss": 0.303, "step": 55770 }, { "epoch": 13.910224438902743, "grad_norm": 8.970970153808594, "learning_rate": 6.095760598503741e-06, "loss": 0.3443, "step": 55780 }, { "epoch": 13.912718204488778, "grad_norm": 7.25998592376709, "learning_rate": 6.093266832917706e-06, "loss": 0.2639, "step": 55790 }, { "epoch": 13.915211970074813, "grad_norm": 10.04069709777832, "learning_rate": 6.090773067331671e-06, "loss": 0.361, "step": 55800 }, { "epoch": 13.917705735660848, "grad_norm": 10.420782089233398, "learning_rate": 6.088279301745636e-06, "loss": 0.3985, "step": 55810 }, { "epoch": 13.920199501246882, "grad_norm": 7.295711040496826, "learning_rate": 6.085785536159602e-06, "loss": 0.2836, "step": 55820 }, { "epoch": 13.922693266832917, "grad_norm": 10.063495635986328, "learning_rate": 6.083291770573566e-06, "loss": 0.3421, "step": 55830 }, { "epoch": 13.925187032418952, "grad_norm": 11.42710018157959, "learning_rate": 6.080798004987531e-06, "loss": 0.3341, "step": 55840 }, { "epoch": 13.927680798004987, "grad_norm": 5.669824123382568, "learning_rate": 6.078304239401497e-06, "loss": 0.3636, "step": 55850 }, { "epoch": 13.930174563591022, "grad_norm": 9.360200881958008, "learning_rate": 6.075810473815462e-06, "loss": 0.3751, "step": 55860 }, { "epoch": 13.932668329177057, "grad_norm": 8.900922775268555, "learning_rate": 6.0733167082294265e-06, "loss": 0.3607, "step": 55870 }, { "epoch": 13.935162094763092, "grad_norm": 5.942974090576172, "learning_rate": 6.070822942643392e-06, "loss": 0.2925, "step": 55880 }, { "epoch": 13.937655860349127, "grad_norm": 8.712531089782715, "learning_rate": 6.068329177057357e-06, "loss": 0.3076, "step": 55890 }, { "epoch": 13.940149625935161, "grad_norm": 5.564552307128906, "learning_rate": 6.065835411471322e-06, "loss": 0.347, "step": 55900 }, { "epoch": 13.942643391521196, "grad_norm": 11.01707935333252, "learning_rate": 6.063341645885287e-06, "loss": 0.3483, "step": 55910 }, { "epoch": 13.945137157107231, "grad_norm": 6.945023536682129, "learning_rate": 6.060847880299252e-06, "loss": 0.3604, "step": 55920 }, { "epoch": 13.947630922693268, "grad_norm": 6.796305179595947, "learning_rate": 6.0583541147132175e-06, "loss": 0.3503, "step": 55930 }, { "epoch": 13.950124688279303, "grad_norm": 5.797430515289307, "learning_rate": 6.055860349127183e-06, "loss": 0.4618, "step": 55940 }, { "epoch": 13.952618453865338, "grad_norm": 8.793232917785645, "learning_rate": 6.053615960099751e-06, "loss": 0.3292, "step": 55950 }, { "epoch": 13.955112219451372, "grad_norm": 7.438122272491455, "learning_rate": 6.051122194513716e-06, "loss": 0.2957, "step": 55960 }, { "epoch": 13.957605985037407, "grad_norm": 9.06357192993164, "learning_rate": 6.048628428927681e-06, "loss": 0.392, "step": 55970 }, { "epoch": 13.960099750623442, "grad_norm": 11.190245628356934, "learning_rate": 6.046134663341646e-06, "loss": 0.3157, "step": 55980 }, { "epoch": 13.962593516209477, "grad_norm": 11.194109916687012, "learning_rate": 6.0436408977556115e-06, "loss": 0.333, "step": 55990 }, { "epoch": 13.965087281795512, "grad_norm": 10.485845565795898, "learning_rate": 6.041147132169577e-06, "loss": 0.3388, "step": 56000 }, { "epoch": 13.967581047381547, "grad_norm": 9.132210731506348, "learning_rate": 6.038653366583541e-06, "loss": 0.3132, "step": 56010 }, { "epoch": 13.970074812967582, "grad_norm": 9.017107963562012, "learning_rate": 6.0361596009975065e-06, "loss": 0.3127, "step": 56020 }, { "epoch": 13.972568578553616, "grad_norm": 8.930654525756836, "learning_rate": 6.033665835411472e-06, "loss": 0.3004, "step": 56030 }, { "epoch": 13.975062344139651, "grad_norm": 14.12149429321289, "learning_rate": 6.031172069825437e-06, "loss": 0.413, "step": 56040 }, { "epoch": 13.977556109725686, "grad_norm": 5.871008396148682, "learning_rate": 6.028678304239402e-06, "loss": 0.2982, "step": 56050 }, { "epoch": 13.980049875311721, "grad_norm": 6.392510890960693, "learning_rate": 6.026184538653367e-06, "loss": 0.3514, "step": 56060 }, { "epoch": 13.982543640897756, "grad_norm": 10.883293151855469, "learning_rate": 6.023690773067332e-06, "loss": 0.3422, "step": 56070 }, { "epoch": 13.98503740648379, "grad_norm": 5.680593013763428, "learning_rate": 6.0211970074812976e-06, "loss": 0.3912, "step": 56080 }, { "epoch": 13.987531172069826, "grad_norm": 11.184502601623535, "learning_rate": 6.018703241895262e-06, "loss": 0.3154, "step": 56090 }, { "epoch": 13.99002493765586, "grad_norm": 8.648889541625977, "learning_rate": 6.016209476309227e-06, "loss": 0.3405, "step": 56100 }, { "epoch": 13.992518703241895, "grad_norm": 9.194254875183105, "learning_rate": 6.013715710723193e-06, "loss": 0.3542, "step": 56110 }, { "epoch": 13.99501246882793, "grad_norm": 7.495959281921387, "learning_rate": 6.011221945137158e-06, "loss": 0.3882, "step": 56120 }, { "epoch": 13.997506234413965, "grad_norm": 10.184008598327637, "learning_rate": 6.008728179551123e-06, "loss": 0.332, "step": 56130 }, { "epoch": 14.0, "grad_norm": 7.123004913330078, "learning_rate": 6.006234413965088e-06, "loss": 0.3622, "step": 56140 }, { "epoch": 14.0, "eval_loss": 0.41418567299842834, "eval_runtime": 60.1065, "eval_samples_per_second": 16.687, "eval_steps_per_second": 16.687, "step": 56140 }, { "epoch": 14.002493765586035, "grad_norm": 7.25279426574707, "learning_rate": 6.003740648379053e-06, "loss": 0.3301, "step": 56150 }, { "epoch": 14.00498753117207, "grad_norm": 5.500768661499023, "learning_rate": 6.001246882793018e-06, "loss": 0.3115, "step": 56160 }, { "epoch": 14.007481296758105, "grad_norm": 6.643815994262695, "learning_rate": 5.998753117206984e-06, "loss": 0.2826, "step": 56170 }, { "epoch": 14.00997506234414, "grad_norm": 10.706582069396973, "learning_rate": 5.996259351620948e-06, "loss": 0.3527, "step": 56180 }, { "epoch": 14.012468827930174, "grad_norm": 15.78691291809082, "learning_rate": 5.9937655860349134e-06, "loss": 0.3879, "step": 56190 }, { "epoch": 14.01496259351621, "grad_norm": 7.744325637817383, "learning_rate": 5.991271820448879e-06, "loss": 0.2975, "step": 56200 }, { "epoch": 14.017456359102244, "grad_norm": 6.840442657470703, "learning_rate": 5.988778054862844e-06, "loss": 0.3212, "step": 56210 }, { "epoch": 14.019950124688279, "grad_norm": 6.66912841796875, "learning_rate": 5.986284289276808e-06, "loss": 0.3231, "step": 56220 }, { "epoch": 14.022443890274314, "grad_norm": 11.040656089782715, "learning_rate": 5.983790523690774e-06, "loss": 0.3638, "step": 56230 }, { "epoch": 14.024937655860349, "grad_norm": 12.657490730285645, "learning_rate": 5.981296758104739e-06, "loss": 0.3794, "step": 56240 }, { "epoch": 14.027431421446384, "grad_norm": 7.612829208374023, "learning_rate": 5.9788029925187044e-06, "loss": 0.379, "step": 56250 }, { "epoch": 14.029925187032418, "grad_norm": 6.018895149230957, "learning_rate": 5.976309226932668e-06, "loss": 0.3329, "step": 56260 }, { "epoch": 14.032418952618453, "grad_norm": 9.050536155700684, "learning_rate": 5.973815461346633e-06, "loss": 0.3569, "step": 56270 }, { "epoch": 14.034912718204488, "grad_norm": 5.334194660186768, "learning_rate": 5.971321695760599e-06, "loss": 0.3331, "step": 56280 }, { "epoch": 14.037406483790523, "grad_norm": 7.58427095413208, "learning_rate": 5.968827930174565e-06, "loss": 0.2786, "step": 56290 }, { "epoch": 14.039900249376558, "grad_norm": 10.09249210357666, "learning_rate": 5.9663341645885284e-06, "loss": 0.3011, "step": 56300 }, { "epoch": 14.042394014962593, "grad_norm": 12.218475341796875, "learning_rate": 5.963840399002494e-06, "loss": 0.3299, "step": 56310 }, { "epoch": 14.044887780548628, "grad_norm": 6.630500316619873, "learning_rate": 5.961346633416459e-06, "loss": 0.3609, "step": 56320 }, { "epoch": 14.047381546134662, "grad_norm": 5.265425205230713, "learning_rate": 5.958852867830424e-06, "loss": 0.2524, "step": 56330 }, { "epoch": 14.049875311720697, "grad_norm": 10.030232429504395, "learning_rate": 5.95635910224439e-06, "loss": 0.3, "step": 56340 }, { "epoch": 14.052369077306734, "grad_norm": 8.025049209594727, "learning_rate": 5.953865336658354e-06, "loss": 0.3647, "step": 56350 }, { "epoch": 14.054862842892769, "grad_norm": 8.812301635742188, "learning_rate": 5.9513715710723195e-06, "loss": 0.4328, "step": 56360 }, { "epoch": 14.057356608478804, "grad_norm": 8.302797317504883, "learning_rate": 5.948877805486285e-06, "loss": 0.3217, "step": 56370 }, { "epoch": 14.059850374064839, "grad_norm": 7.739553928375244, "learning_rate": 5.94638403990025e-06, "loss": 0.352, "step": 56380 }, { "epoch": 14.062344139650873, "grad_norm": 6.211906433105469, "learning_rate": 5.9438902743142145e-06, "loss": 0.3744, "step": 56390 }, { "epoch": 14.064837905236908, "grad_norm": 9.784815788269043, "learning_rate": 5.94139650872818e-06, "loss": 0.3688, "step": 56400 }, { "epoch": 14.067331670822943, "grad_norm": 7.506394386291504, "learning_rate": 5.938902743142145e-06, "loss": 0.3215, "step": 56410 }, { "epoch": 14.069825436408978, "grad_norm": 9.98291301727295, "learning_rate": 5.9364089775561105e-06, "loss": 0.3251, "step": 56420 }, { "epoch": 14.072319201995013, "grad_norm": NaN, "learning_rate": 5.934164588528679e-06, "loss": 0.35, "step": 56430 }, { "epoch": 14.074812967581048, "grad_norm": 7.1763434410095215, "learning_rate": 5.931670822942643e-06, "loss": 0.3382, "step": 56440 }, { "epoch": 14.077306733167083, "grad_norm": 11.202238082885742, "learning_rate": 5.9291770573566085e-06, "loss": 0.3186, "step": 56450 }, { "epoch": 14.079800498753118, "grad_norm": 7.892131805419922, "learning_rate": 5.926683291770574e-06, "loss": 0.3167, "step": 56460 }, { "epoch": 14.082294264339152, "grad_norm": 7.04252815246582, "learning_rate": 5.924189526184539e-06, "loss": 0.3132, "step": 56470 }, { "epoch": 14.084788029925187, "grad_norm": 10.996591567993164, "learning_rate": 5.921695760598504e-06, "loss": 0.3149, "step": 56480 }, { "epoch": 14.087281795511222, "grad_norm": 6.079100608825684, "learning_rate": 5.919201995012469e-06, "loss": 0.3049, "step": 56490 }, { "epoch": 14.089775561097257, "grad_norm": 7.392295837402344, "learning_rate": 5.916708229426434e-06, "loss": 0.3318, "step": 56500 }, { "epoch": 14.092269326683292, "grad_norm": 5.498250484466553, "learning_rate": 5.9142144638403995e-06, "loss": 0.2813, "step": 56510 }, { "epoch": 14.094763092269327, "grad_norm": 8.986740112304688, "learning_rate": 5.911720698254365e-06, "loss": 0.285, "step": 56520 }, { "epoch": 14.097256857855362, "grad_norm": 8.847763061523438, "learning_rate": 5.909226932668329e-06, "loss": 0.3253, "step": 56530 }, { "epoch": 14.099750623441397, "grad_norm": 10.011739730834961, "learning_rate": 5.906733167082295e-06, "loss": 0.3322, "step": 56540 }, { "epoch": 14.102244389027431, "grad_norm": 7.457201957702637, "learning_rate": 5.90423940149626e-06, "loss": 0.3628, "step": 56550 }, { "epoch": 14.104738154613466, "grad_norm": 5.719525337219238, "learning_rate": 5.901745635910225e-06, "loss": 0.3435, "step": 56560 }, { "epoch": 14.107231920199501, "grad_norm": 11.263422012329102, "learning_rate": 5.89925187032419e-06, "loss": 0.3875, "step": 56570 }, { "epoch": 14.109725685785536, "grad_norm": 9.578998565673828, "learning_rate": 5.896758104738155e-06, "loss": 0.3096, "step": 56580 }, { "epoch": 14.11221945137157, "grad_norm": 7.107484817504883, "learning_rate": 5.89426433915212e-06, "loss": 0.3913, "step": 56590 }, { "epoch": 14.114713216957606, "grad_norm": 10.798450469970703, "learning_rate": 5.891770573566086e-06, "loss": 0.2936, "step": 56600 }, { "epoch": 14.11720698254364, "grad_norm": 7.937857627868652, "learning_rate": 5.88927680798005e-06, "loss": 0.2655, "step": 56610 }, { "epoch": 14.119700748129675, "grad_norm": 5.9058613777160645, "learning_rate": 5.886783042394015e-06, "loss": 0.3056, "step": 56620 }, { "epoch": 14.12219451371571, "grad_norm": 5.481210231781006, "learning_rate": 5.884289276807981e-06, "loss": 0.378, "step": 56630 }, { "epoch": 14.124688279301745, "grad_norm": 8.08363151550293, "learning_rate": 5.881795511221946e-06, "loss": 0.336, "step": 56640 }, { "epoch": 14.12718204488778, "grad_norm": 10.191091537475586, "learning_rate": 5.8793017456359105e-06, "loss": 0.3545, "step": 56650 }, { "epoch": 14.129675810473815, "grad_norm": 8.802706718444824, "learning_rate": 5.876807980049876e-06, "loss": 0.3284, "step": 56660 }, { "epoch": 14.13216957605985, "grad_norm": 6.7587890625, "learning_rate": 5.874314214463841e-06, "loss": 0.3659, "step": 56670 }, { "epoch": 14.134663341645885, "grad_norm": 6.1083526611328125, "learning_rate": 5.871820448877806e-06, "loss": 0.3007, "step": 56680 }, { "epoch": 14.13715710723192, "grad_norm": 10.109125137329102, "learning_rate": 5.869326683291771e-06, "loss": 0.3703, "step": 56690 }, { "epoch": 14.139650872817954, "grad_norm": 9.299257278442383, "learning_rate": 5.866832917705736e-06, "loss": 0.3109, "step": 56700 }, { "epoch": 14.14214463840399, "grad_norm": 11.763932228088379, "learning_rate": 5.8643391521197015e-06, "loss": 0.4337, "step": 56710 }, { "epoch": 14.144638403990024, "grad_norm": 9.687064170837402, "learning_rate": 5.861845386533667e-06, "loss": 0.3345, "step": 56720 }, { "epoch": 14.147132169576059, "grad_norm": 7.107930660247803, "learning_rate": 5.859351620947631e-06, "loss": 0.3442, "step": 56730 }, { "epoch": 14.149625935162096, "grad_norm": 8.584643363952637, "learning_rate": 5.8568578553615966e-06, "loss": 0.2821, "step": 56740 }, { "epoch": 14.15211970074813, "grad_norm": 10.098069190979004, "learning_rate": 5.854364089775562e-06, "loss": 0.3944, "step": 56750 }, { "epoch": 14.154613466334165, "grad_norm": 9.100006103515625, "learning_rate": 5.851870324189527e-06, "loss": 0.2938, "step": 56760 }, { "epoch": 14.1571072319202, "grad_norm": 12.640045166015625, "learning_rate": 5.8493765586034925e-06, "loss": 0.3651, "step": 56770 }, { "epoch": 14.159600997506235, "grad_norm": 8.430350303649902, "learning_rate": 5.846882793017457e-06, "loss": 0.2942, "step": 56780 }, { "epoch": 14.16209476309227, "grad_norm": 7.6195855140686035, "learning_rate": 5.844389027431422e-06, "loss": 0.3382, "step": 56790 }, { "epoch": 14.164588528678305, "grad_norm": 10.6289701461792, "learning_rate": 5.8418952618453876e-06, "loss": 0.3124, "step": 56800 }, { "epoch": 14.16708229426434, "grad_norm": 10.420262336730957, "learning_rate": 5.839401496259353e-06, "loss": 0.3315, "step": 56810 }, { "epoch": 14.169576059850375, "grad_norm": 8.555750846862793, "learning_rate": 5.8369077306733165e-06, "loss": 0.3292, "step": 56820 }, { "epoch": 14.17206982543641, "grad_norm": 10.164884567260742, "learning_rate": 5.834413965087282e-06, "loss": 0.3674, "step": 56830 }, { "epoch": 14.174563591022444, "grad_norm": 6.1933746337890625, "learning_rate": 5.831920199501248e-06, "loss": 0.3447, "step": 56840 }, { "epoch": 14.17705735660848, "grad_norm": 6.741939544677734, "learning_rate": 5.829426433915213e-06, "loss": 0.3415, "step": 56850 }, { "epoch": 14.179551122194514, "grad_norm": 9.930939674377441, "learning_rate": 5.826932668329177e-06, "loss": 0.3466, "step": 56860 }, { "epoch": 14.182044887780549, "grad_norm": 6.0956854820251465, "learning_rate": 5.824438902743142e-06, "loss": 0.3609, "step": 56870 }, { "epoch": 14.184538653366584, "grad_norm": 9.120026588439941, "learning_rate": 5.8219451371571075e-06, "loss": 0.3623, "step": 56880 }, { "epoch": 14.187032418952619, "grad_norm": 5.484027862548828, "learning_rate": 5.819451371571073e-06, "loss": 0.334, "step": 56890 }, { "epoch": 14.189526184538654, "grad_norm": 10.343612670898438, "learning_rate": 5.816957605985037e-06, "loss": 0.3805, "step": 56900 }, { "epoch": 14.192019950124688, "grad_norm": 11.961114883422852, "learning_rate": 5.814463840399003e-06, "loss": 0.4391, "step": 56910 }, { "epoch": 14.194513715710723, "grad_norm": 8.149081230163574, "learning_rate": 5.811970074812968e-06, "loss": 0.3533, "step": 56920 }, { "epoch": 14.197007481296758, "grad_norm": 11.395288467407227, "learning_rate": 5.809476309226933e-06, "loss": 0.3897, "step": 56930 }, { "epoch": 14.199501246882793, "grad_norm": 8.990917205810547, "learning_rate": 5.806982543640898e-06, "loss": 0.2918, "step": 56940 }, { "epoch": 14.201995012468828, "grad_norm": 9.0519380569458, "learning_rate": 5.804488778054863e-06, "loss": 0.326, "step": 56950 }, { "epoch": 14.204488778054863, "grad_norm": 5.42004919052124, "learning_rate": 5.801995012468828e-06, "loss": 0.3336, "step": 56960 }, { "epoch": 14.206982543640898, "grad_norm": 24.50922966003418, "learning_rate": 5.799501246882794e-06, "loss": 0.3462, "step": 56970 }, { "epoch": 14.209476309226932, "grad_norm": 7.2156877517700195, "learning_rate": 5.797007481296758e-06, "loss": 0.299, "step": 56980 }, { "epoch": 14.211970074812967, "grad_norm": 8.618878364562988, "learning_rate": 5.794513715710723e-06, "loss": 0.3368, "step": 56990 }, { "epoch": 14.214463840399002, "grad_norm": 8.678366661071777, "learning_rate": 5.792019950124689e-06, "loss": 0.3555, "step": 57000 }, { "epoch": 14.216957605985037, "grad_norm": 10.28817081451416, "learning_rate": 5.789526184538654e-06, "loss": 0.3683, "step": 57010 }, { "epoch": 14.219451371571072, "grad_norm": 11.527560234069824, "learning_rate": 5.787032418952619e-06, "loss": 0.3426, "step": 57020 }, { "epoch": 14.221945137157107, "grad_norm": 5.7110724449157715, "learning_rate": 5.784538653366584e-06, "loss": 0.3402, "step": 57030 }, { "epoch": 14.224438902743142, "grad_norm": 8.909784317016602, "learning_rate": 5.782044887780549e-06, "loss": 0.415, "step": 57040 }, { "epoch": 14.226932668329177, "grad_norm": 9.159612655639648, "learning_rate": 5.779551122194514e-06, "loss": 0.3765, "step": 57050 }, { "epoch": 14.229426433915211, "grad_norm": 7.4799346923828125, "learning_rate": 5.77705735660848e-06, "loss": 0.3718, "step": 57060 }, { "epoch": 14.231920199501246, "grad_norm": 17.878170013427734, "learning_rate": 5.774563591022444e-06, "loss": 0.3199, "step": 57070 }, { "epoch": 14.234413965087281, "grad_norm": 7.576568603515625, "learning_rate": 5.7720698254364095e-06, "loss": 0.3624, "step": 57080 }, { "epoch": 14.236907730673316, "grad_norm": 8.394084930419922, "learning_rate": 5.769576059850375e-06, "loss": 0.3559, "step": 57090 }, { "epoch": 14.239401496259351, "grad_norm": 9.45663070678711, "learning_rate": 5.76708229426434e-06, "loss": 0.3238, "step": 57100 }, { "epoch": 14.241895261845386, "grad_norm": 7.425997734069824, "learning_rate": 5.7645885286783046e-06, "loss": 0.3416, "step": 57110 }, { "epoch": 14.24438902743142, "grad_norm": 7.545619964599609, "learning_rate": 5.76209476309227e-06, "loss": 0.4245, "step": 57120 }, { "epoch": 14.246882793017456, "grad_norm": 9.640861511230469, "learning_rate": 5.759600997506235e-06, "loss": 0.3163, "step": 57130 }, { "epoch": 14.24937655860349, "grad_norm": 9.203802108764648, "learning_rate": 5.7571072319202005e-06, "loss": 0.4469, "step": 57140 }, { "epoch": 14.251870324189527, "grad_norm": 7.76652193069458, "learning_rate": 5.754613466334165e-06, "loss": 0.3865, "step": 57150 }, { "epoch": 14.254364089775562, "grad_norm": 7.3147358894348145, "learning_rate": 5.75211970074813e-06, "loss": 0.3029, "step": 57160 }, { "epoch": 14.256857855361597, "grad_norm": 9.476174354553223, "learning_rate": 5.7496259351620956e-06, "loss": 0.3637, "step": 57170 }, { "epoch": 14.259351620947632, "grad_norm": 6.6377434730529785, "learning_rate": 5.747132169576061e-06, "loss": 0.2983, "step": 57180 }, { "epoch": 14.261845386533667, "grad_norm": 6.473023891448975, "learning_rate": 5.744638403990025e-06, "loss": 0.319, "step": 57190 }, { "epoch": 14.264339152119701, "grad_norm": 5.787720680236816, "learning_rate": 5.742144638403991e-06, "loss": 0.325, "step": 57200 }, { "epoch": 14.266832917705736, "grad_norm": 7.323090076446533, "learning_rate": 5.739650872817956e-06, "loss": 0.3233, "step": 57210 }, { "epoch": 14.269326683291771, "grad_norm": 7.3568549156188965, "learning_rate": 5.737157107231921e-06, "loss": 0.3679, "step": 57220 }, { "epoch": 14.271820448877806, "grad_norm": 8.636281967163086, "learning_rate": 5.734663341645885e-06, "loss": 0.2744, "step": 57230 }, { "epoch": 14.27431421446384, "grad_norm": 6.717350482940674, "learning_rate": 5.73216957605985e-06, "loss": 0.293, "step": 57240 }, { "epoch": 14.276807980049876, "grad_norm": 9.460376739501953, "learning_rate": 5.729675810473816e-06, "loss": 0.3502, "step": 57250 }, { "epoch": 14.27930174563591, "grad_norm": 8.800605773925781, "learning_rate": 5.727182044887782e-06, "loss": 0.3078, "step": 57260 }, { "epoch": 14.281795511221945, "grad_norm": 7.889537811279297, "learning_rate": 5.724688279301747e-06, "loss": 0.365, "step": 57270 }, { "epoch": 14.28428927680798, "grad_norm": 10.018247604370117, "learning_rate": 5.722194513715711e-06, "loss": 0.2993, "step": 57280 }, { "epoch": 14.286783042394015, "grad_norm": 5.913888931274414, "learning_rate": 5.719700748129676e-06, "loss": 0.2821, "step": 57290 }, { "epoch": 14.28927680798005, "grad_norm": 6.76155424118042, "learning_rate": 5.717206982543641e-06, "loss": 0.3191, "step": 57300 }, { "epoch": 14.291770573566085, "grad_norm": 6.1279215812683105, "learning_rate": 5.714713216957607e-06, "loss": 0.3588, "step": 57310 }, { "epoch": 14.29426433915212, "grad_norm": 8.195113182067871, "learning_rate": 5.712219451371571e-06, "loss": 0.3062, "step": 57320 }, { "epoch": 14.296758104738155, "grad_norm": 7.183683395385742, "learning_rate": 5.709725685785536e-06, "loss": 0.3615, "step": 57330 }, { "epoch": 14.29925187032419, "grad_norm": 10.08060359954834, "learning_rate": 5.707231920199502e-06, "loss": 0.3116, "step": 57340 }, { "epoch": 14.301745635910224, "grad_norm": 7.461446762084961, "learning_rate": 5.704738154613467e-06, "loss": 0.3036, "step": 57350 }, { "epoch": 14.30423940149626, "grad_norm": 8.784198760986328, "learning_rate": 5.702244389027431e-06, "loss": 0.2936, "step": 57360 }, { "epoch": 14.306733167082294, "grad_norm": 8.67398452758789, "learning_rate": 5.699750623441397e-06, "loss": 0.2893, "step": 57370 }, { "epoch": 14.309226932668329, "grad_norm": 7.162827014923096, "learning_rate": 5.697256857855362e-06, "loss": 0.4105, "step": 57380 }, { "epoch": 14.311720698254364, "grad_norm": 7.423861026763916, "learning_rate": 5.694763092269327e-06, "loss": 0.3044, "step": 57390 }, { "epoch": 14.314214463840399, "grad_norm": 11.698079109191895, "learning_rate": 5.692269326683292e-06, "loss": 0.3955, "step": 57400 }, { "epoch": 14.316708229426434, "grad_norm": 11.152626037597656, "learning_rate": 5.689775561097257e-06, "loss": 0.3488, "step": 57410 }, { "epoch": 14.319201995012468, "grad_norm": 9.103079795837402, "learning_rate": 5.687281795511222e-06, "loss": 0.3548, "step": 57420 }, { "epoch": 14.321695760598503, "grad_norm": 10.388227462768555, "learning_rate": 5.684788029925188e-06, "loss": 0.323, "step": 57430 }, { "epoch": 14.324189526184538, "grad_norm": 9.910160064697266, "learning_rate": 5.682294264339152e-06, "loss": 0.3425, "step": 57440 }, { "epoch": 14.326683291770573, "grad_norm": 8.148764610290527, "learning_rate": 5.6798004987531175e-06, "loss": 0.3473, "step": 57450 }, { "epoch": 14.329177057356608, "grad_norm": 9.755147933959961, "learning_rate": 5.677306733167083e-06, "loss": 0.316, "step": 57460 }, { "epoch": 14.331670822942643, "grad_norm": 5.160188674926758, "learning_rate": 5.674812967581048e-06, "loss": 0.3304, "step": 57470 }, { "epoch": 14.334164588528678, "grad_norm": 9.043822288513184, "learning_rate": 5.6723192019950125e-06, "loss": 0.2585, "step": 57480 }, { "epoch": 14.336658354114713, "grad_norm": 9.036372184753418, "learning_rate": 5.669825436408978e-06, "loss": 0.2661, "step": 57490 }, { "epoch": 14.339152119700747, "grad_norm": 8.77037525177002, "learning_rate": 5.667331670822943e-06, "loss": 0.288, "step": 57500 }, { "epoch": 14.341645885286782, "grad_norm": 5.4910712242126465, "learning_rate": 5.6648379052369085e-06, "loss": 0.3033, "step": 57510 }, { "epoch": 14.344139650872817, "grad_norm": 11.051765441894531, "learning_rate": 5.662344139650874e-06, "loss": 0.3897, "step": 57520 }, { "epoch": 14.346633416458852, "grad_norm": 6.824289798736572, "learning_rate": 5.659850374064838e-06, "loss": 0.3452, "step": 57530 }, { "epoch": 14.349127182044889, "grad_norm": 7.920559406280518, "learning_rate": 5.6573566084788036e-06, "loss": 0.3175, "step": 57540 }, { "epoch": 14.351620947630924, "grad_norm": 9.274563789367676, "learning_rate": 5.654862842892769e-06, "loss": 0.3346, "step": 57550 }, { "epoch": 14.354114713216958, "grad_norm": 9.150640487670898, "learning_rate": 5.652369077306734e-06, "loss": 0.2985, "step": 57560 }, { "epoch": 14.356608478802993, "grad_norm": 10.429580688476562, "learning_rate": 5.649875311720699e-06, "loss": 0.3712, "step": 57570 }, { "epoch": 14.359102244389028, "grad_norm": 6.863627910614014, "learning_rate": 5.647381546134664e-06, "loss": 0.3518, "step": 57580 }, { "epoch": 14.361596009975063, "grad_norm": 10.7223482131958, "learning_rate": 5.644887780548629e-06, "loss": 0.2925, "step": 57590 }, { "epoch": 14.364089775561098, "grad_norm": 9.246960639953613, "learning_rate": 5.6423940149625946e-06, "loss": 0.3043, "step": 57600 }, { "epoch": 14.366583541147133, "grad_norm": 8.545487403869629, "learning_rate": 5.639900249376559e-06, "loss": 0.3126, "step": 57610 }, { "epoch": 14.369077306733168, "grad_norm": 7.441158771514893, "learning_rate": 5.637406483790524e-06, "loss": 0.3215, "step": 57620 }, { "epoch": 14.371571072319203, "grad_norm": 8.528327941894531, "learning_rate": 5.63491271820449e-06, "loss": 0.352, "step": 57630 }, { "epoch": 14.374064837905237, "grad_norm": 10.5371732711792, "learning_rate": 5.632418952618455e-06, "loss": 0.3339, "step": 57640 }, { "epoch": 14.376558603491272, "grad_norm": 7.135348320007324, "learning_rate": 5.6299251870324186e-06, "loss": 0.4033, "step": 57650 }, { "epoch": 14.379052369077307, "grad_norm": 8.768062591552734, "learning_rate": 5.627431421446385e-06, "loss": 0.3769, "step": 57660 }, { "epoch": 14.381546134663342, "grad_norm": 9.260017395019531, "learning_rate": 5.62493765586035e-06, "loss": 0.3351, "step": 57670 }, { "epoch": 14.384039900249377, "grad_norm": 10.839845657348633, "learning_rate": 5.622443890274315e-06, "loss": 0.317, "step": 57680 }, { "epoch": 14.386533665835412, "grad_norm": 5.972354888916016, "learning_rate": 5.619950124688279e-06, "loss": 0.3857, "step": 57690 }, { "epoch": 14.389027431421447, "grad_norm": 9.633734703063965, "learning_rate": 5.617456359102244e-06, "loss": 0.3434, "step": 57700 }, { "epoch": 14.391521197007481, "grad_norm": 9.835312843322754, "learning_rate": 5.61496259351621e-06, "loss": 0.3031, "step": 57710 }, { "epoch": 14.394014962593516, "grad_norm": 8.749693870544434, "learning_rate": 5.612468827930176e-06, "loss": 0.3306, "step": 57720 }, { "epoch": 14.396508728179551, "grad_norm": 6.144033908843994, "learning_rate": 5.609975062344139e-06, "loss": 0.332, "step": 57730 }, { "epoch": 14.399002493765586, "grad_norm": 6.014637470245361, "learning_rate": 5.607481296758105e-06, "loss": 0.3146, "step": 57740 }, { "epoch": 14.401496259351621, "grad_norm": 7.245567321777344, "learning_rate": 5.60498753117207e-06, "loss": 0.3847, "step": 57750 }, { "epoch": 14.403990024937656, "grad_norm": 4.784339904785156, "learning_rate": 5.602493765586035e-06, "loss": 0.3821, "step": 57760 }, { "epoch": 14.40648379052369, "grad_norm": 8.494146347045898, "learning_rate": 5.600000000000001e-06, "loss": 0.3402, "step": 57770 }, { "epoch": 14.408977556109726, "grad_norm": 7.598515510559082, "learning_rate": 5.597506234413965e-06, "loss": 0.325, "step": 57780 }, { "epoch": 14.41147132169576, "grad_norm": 7.4690423011779785, "learning_rate": 5.59501246882793e-06, "loss": 0.3582, "step": 57790 }, { "epoch": 14.413965087281795, "grad_norm": 8.942574501037598, "learning_rate": 5.592518703241896e-06, "loss": 0.3168, "step": 57800 }, { "epoch": 14.41645885286783, "grad_norm": 8.781198501586914, "learning_rate": 5.590024937655861e-06, "loss": 0.2854, "step": 57810 }, { "epoch": 14.418952618453865, "grad_norm": 5.599854469299316, "learning_rate": 5.5875311720698254e-06, "loss": 0.3373, "step": 57820 }, { "epoch": 14.4214463840399, "grad_norm": 6.340407371520996, "learning_rate": 5.585037406483791e-06, "loss": 0.2954, "step": 57830 }, { "epoch": 14.423940149625935, "grad_norm": 5.647294998168945, "learning_rate": 5.582543640897756e-06, "loss": 0.2989, "step": 57840 }, { "epoch": 14.42643391521197, "grad_norm": 8.372790336608887, "learning_rate": 5.580049875311721e-06, "loss": 0.332, "step": 57850 }, { "epoch": 14.428927680798004, "grad_norm": 8.006759643554688, "learning_rate": 5.577556109725686e-06, "loss": 0.362, "step": 57860 }, { "epoch": 14.43142144638404, "grad_norm": 5.205109596252441, "learning_rate": 5.575062344139651e-06, "loss": 0.3863, "step": 57870 }, { "epoch": 14.433915211970074, "grad_norm": 6.987797260284424, "learning_rate": 5.5725685785536165e-06, "loss": 0.3135, "step": 57880 }, { "epoch": 14.436408977556109, "grad_norm": 9.735954284667969, "learning_rate": 5.570074812967582e-06, "loss": 0.2886, "step": 57890 }, { "epoch": 14.438902743142144, "grad_norm": 10.121098518371582, "learning_rate": 5.567581047381546e-06, "loss": 0.3274, "step": 57900 }, { "epoch": 14.441396508728179, "grad_norm": 8.132126808166504, "learning_rate": 5.5650872817955115e-06, "loss": 0.309, "step": 57910 }, { "epoch": 14.443890274314214, "grad_norm": 9.95006275177002, "learning_rate": 5.562593516209477e-06, "loss": 0.3218, "step": 57920 }, { "epoch": 14.446384039900249, "grad_norm": 9.619258880615234, "learning_rate": 5.560099750623442e-06, "loss": 0.3107, "step": 57930 }, { "epoch": 14.448877805486283, "grad_norm": 8.964179039001465, "learning_rate": 5.557605985037407e-06, "loss": 0.4061, "step": 57940 }, { "epoch": 14.451371571072318, "grad_norm": 15.57805347442627, "learning_rate": 5.555112219451372e-06, "loss": 0.3487, "step": 57950 }, { "epoch": 14.453865336658355, "grad_norm": 10.295059204101562, "learning_rate": 5.552618453865337e-06, "loss": 0.2948, "step": 57960 }, { "epoch": 14.45635910224439, "grad_norm": 6.323406219482422, "learning_rate": 5.5501246882793026e-06, "loss": 0.3311, "step": 57970 }, { "epoch": 14.458852867830425, "grad_norm": 6.79067325592041, "learning_rate": 5.547630922693267e-06, "loss": 0.368, "step": 57980 }, { "epoch": 14.46134663341646, "grad_norm": 7.148687839508057, "learning_rate": 5.545137157107232e-06, "loss": 0.312, "step": 57990 }, { "epoch": 14.463840399002494, "grad_norm": 11.232911109924316, "learning_rate": 5.542643391521198e-06, "loss": 0.3485, "step": 58000 }, { "epoch": 14.46633416458853, "grad_norm": 7.9350738525390625, "learning_rate": 5.540149625935163e-06, "loss": 0.2499, "step": 58010 }, { "epoch": 14.468827930174564, "grad_norm": 7.820169448852539, "learning_rate": 5.537655860349128e-06, "loss": 0.3012, "step": 58020 }, { "epoch": 14.471321695760599, "grad_norm": 6.683993816375732, "learning_rate": 5.535162094763093e-06, "loss": 0.3388, "step": 58030 }, { "epoch": 14.473815461346634, "grad_norm": 8.00978946685791, "learning_rate": 5.532668329177058e-06, "loss": 0.2825, "step": 58040 }, { "epoch": 14.476309226932669, "grad_norm": 7.9973015785217285, "learning_rate": 5.530174563591023e-06, "loss": 0.3373, "step": 58050 }, { "epoch": 14.478802992518704, "grad_norm": 9.217852592468262, "learning_rate": 5.527680798004989e-06, "loss": 0.2765, "step": 58060 }, { "epoch": 14.481296758104738, "grad_norm": 9.490992546081543, "learning_rate": 5.525187032418953e-06, "loss": 0.3424, "step": 58070 }, { "epoch": 14.483790523690773, "grad_norm": 6.221439361572266, "learning_rate": 5.522693266832918e-06, "loss": 0.3139, "step": 58080 }, { "epoch": 14.486284289276808, "grad_norm": 5.196888446807861, "learning_rate": 5.520199501246884e-06, "loss": 0.3762, "step": 58090 }, { "epoch": 14.488778054862843, "grad_norm": 9.887178421020508, "learning_rate": 5.517705735660849e-06, "loss": 0.3322, "step": 58100 }, { "epoch": 14.491271820448878, "grad_norm": 11.542625427246094, "learning_rate": 5.515211970074813e-06, "loss": 0.3423, "step": 58110 }, { "epoch": 14.493765586034913, "grad_norm": 7.159249782562256, "learning_rate": 5.512718204488778e-06, "loss": 0.3121, "step": 58120 }, { "epoch": 14.496259351620948, "grad_norm": 11.838788986206055, "learning_rate": 5.510224438902744e-06, "loss": 0.3516, "step": 58130 }, { "epoch": 14.498753117206983, "grad_norm": 8.8449068069458, "learning_rate": 5.5077306733167094e-06, "loss": 0.3357, "step": 58140 }, { "epoch": 14.501246882793017, "grad_norm": 10.590514183044434, "learning_rate": 5.505236907730673e-06, "loss": 0.2957, "step": 58150 }, { "epoch": 14.503740648379052, "grad_norm": 9.069424629211426, "learning_rate": 5.502743142144638e-06, "loss": 0.2783, "step": 58160 }, { "epoch": 14.506234413965087, "grad_norm": 7.590739727020264, "learning_rate": 5.500249376558604e-06, "loss": 0.2904, "step": 58170 }, { "epoch": 14.508728179551122, "grad_norm": 6.409052848815918, "learning_rate": 5.497755610972569e-06, "loss": 0.3753, "step": 58180 }, { "epoch": 14.511221945137157, "grad_norm": 7.4375481605529785, "learning_rate": 5.4952618453865334e-06, "loss": 0.2849, "step": 58190 }, { "epoch": 14.513715710723192, "grad_norm": 8.864033699035645, "learning_rate": 5.492768079800499e-06, "loss": 0.32, "step": 58200 }, { "epoch": 14.516209476309227, "grad_norm": 6.455445766448975, "learning_rate": 5.490274314214464e-06, "loss": 0.3475, "step": 58210 }, { "epoch": 14.518703241895262, "grad_norm": 6.384631633758545, "learning_rate": 5.487780548628429e-06, "loss": 0.2812, "step": 58220 }, { "epoch": 14.521197007481296, "grad_norm": 9.22549057006836, "learning_rate": 5.485286783042394e-06, "loss": 0.3304, "step": 58230 }, { "epoch": 14.523690773067331, "grad_norm": 7.639939308166504, "learning_rate": 5.482793017456359e-06, "loss": 0.3435, "step": 58240 }, { "epoch": 14.526184538653366, "grad_norm": 5.905279636383057, "learning_rate": 5.4802992518703244e-06, "loss": 0.2744, "step": 58250 }, { "epoch": 14.528678304239401, "grad_norm": 8.444292068481445, "learning_rate": 5.47780548628429e-06, "loss": 0.3367, "step": 58260 }, { "epoch": 14.531172069825436, "grad_norm": 9.172270774841309, "learning_rate": 5.475311720698255e-06, "loss": 0.3669, "step": 58270 }, { "epoch": 14.53366583541147, "grad_norm": 15.167724609375, "learning_rate": 5.4728179551122195e-06, "loss": 0.4021, "step": 58280 }, { "epoch": 14.536159600997506, "grad_norm": 8.261863708496094, "learning_rate": 5.470324189526185e-06, "loss": 0.2871, "step": 58290 }, { "epoch": 14.53865336658354, "grad_norm": 10.16102123260498, "learning_rate": 5.46783042394015e-06, "loss": 0.3401, "step": 58300 }, { "epoch": 14.541147132169575, "grad_norm": 8.043190956115723, "learning_rate": 5.4653366583541155e-06, "loss": 0.3537, "step": 58310 }, { "epoch": 14.54364089775561, "grad_norm": 21.67076873779297, "learning_rate": 5.46284289276808e-06, "loss": 0.3269, "step": 58320 }, { "epoch": 14.546134663341645, "grad_norm": 6.5939459800720215, "learning_rate": 5.460349127182045e-06, "loss": 0.3313, "step": 58330 }, { "epoch": 14.548628428927682, "grad_norm": 6.585400581359863, "learning_rate": 5.4578553615960105e-06, "loss": 0.3449, "step": 58340 }, { "epoch": 14.551122194513717, "grad_norm": 9.368444442749023, "learning_rate": 5.455361596009976e-06, "loss": 0.3536, "step": 58350 }, { "epoch": 14.553615960099751, "grad_norm": 8.152172088623047, "learning_rate": 5.45286783042394e-06, "loss": 0.3483, "step": 58360 }, { "epoch": 14.556109725685786, "grad_norm": 9.879807472229004, "learning_rate": 5.450374064837906e-06, "loss": 0.3036, "step": 58370 }, { "epoch": 14.558603491271821, "grad_norm": 16.792516708374023, "learning_rate": 5.447880299251871e-06, "loss": 0.3782, "step": 58380 }, { "epoch": 14.561097256857856, "grad_norm": 6.8559112548828125, "learning_rate": 5.445386533665836e-06, "loss": 0.3641, "step": 58390 }, { "epoch": 14.563591022443891, "grad_norm": 12.230280876159668, "learning_rate": 5.442892768079801e-06, "loss": 0.3335, "step": 58400 }, { "epoch": 14.566084788029926, "grad_norm": 5.788147926330566, "learning_rate": 5.440399002493766e-06, "loss": 0.3376, "step": 58410 }, { "epoch": 14.56857855361596, "grad_norm": 9.005399703979492, "learning_rate": 5.437905236907731e-06, "loss": 0.3828, "step": 58420 }, { "epoch": 14.571072319201996, "grad_norm": 6.153980255126953, "learning_rate": 5.435411471321697e-06, "loss": 0.3214, "step": 58430 }, { "epoch": 14.57356608478803, "grad_norm": 7.769611835479736, "learning_rate": 5.432917705735661e-06, "loss": 0.2673, "step": 58440 }, { "epoch": 14.576059850374065, "grad_norm": 7.903584957122803, "learning_rate": 5.430423940149626e-06, "loss": 0.3313, "step": 58450 }, { "epoch": 14.5785536159601, "grad_norm": 11.687101364135742, "learning_rate": 5.427930174563592e-06, "loss": 0.3828, "step": 58460 }, { "epoch": 14.581047381546135, "grad_norm": 9.845630645751953, "learning_rate": 5.425436408977557e-06, "loss": 0.3014, "step": 58470 }, { "epoch": 14.58354114713217, "grad_norm": 7.4955244064331055, "learning_rate": 5.4229426433915215e-06, "loss": 0.3243, "step": 58480 }, { "epoch": 14.586034912718205, "grad_norm": 9.153183937072754, "learning_rate": 5.420448877805487e-06, "loss": 0.3849, "step": 58490 }, { "epoch": 14.58852867830424, "grad_norm": 8.650388717651367, "learning_rate": 5.417955112219452e-06, "loss": 0.3057, "step": 58500 }, { "epoch": 14.591022443890274, "grad_norm": 5.724157333374023, "learning_rate": 5.415461346633417e-06, "loss": 0.2931, "step": 58510 }, { "epoch": 14.59351620947631, "grad_norm": 8.662379264831543, "learning_rate": 5.412967581047383e-06, "loss": 0.324, "step": 58520 }, { "epoch": 14.596009975062344, "grad_norm": 6.165246963500977, "learning_rate": 5.410473815461346e-06, "loss": 0.305, "step": 58530 }, { "epoch": 14.598503740648379, "grad_norm": 10.333334922790527, "learning_rate": 5.4079800498753125e-06, "loss": 0.412, "step": 58540 }, { "epoch": 14.600997506234414, "grad_norm": 12.94969654083252, "learning_rate": 5.405486284289278e-06, "loss": 0.3273, "step": 58550 }, { "epoch": 14.603491271820449, "grad_norm": 8.342123985290527, "learning_rate": 5.402992518703243e-06, "loss": 0.3934, "step": 58560 }, { "epoch": 14.605985037406484, "grad_norm": 6.839729309082031, "learning_rate": 5.400498753117207e-06, "loss": 0.303, "step": 58570 }, { "epoch": 14.608478802992519, "grad_norm": 9.169561386108398, "learning_rate": 5.398004987531172e-06, "loss": 0.2673, "step": 58580 }, { "epoch": 14.610972568578553, "grad_norm": 7.035985946655273, "learning_rate": 5.395511221945137e-06, "loss": 0.2992, "step": 58590 }, { "epoch": 14.613466334164588, "grad_norm": 7.293179988861084, "learning_rate": 5.3930174563591035e-06, "loss": 0.3203, "step": 58600 }, { "epoch": 14.615960099750623, "grad_norm": 8.904796600341797, "learning_rate": 5.390523690773067e-06, "loss": 0.3217, "step": 58610 }, { "epoch": 14.618453865336658, "grad_norm": 9.282125473022461, "learning_rate": 5.3880299251870324e-06, "loss": 0.3283, "step": 58620 }, { "epoch": 14.620947630922693, "grad_norm": 7.752005577087402, "learning_rate": 5.385536159600998e-06, "loss": 0.3145, "step": 58630 }, { "epoch": 14.623441396508728, "grad_norm": 5.774524688720703, "learning_rate": 5.383042394014963e-06, "loss": 0.2747, "step": 58640 }, { "epoch": 14.625935162094763, "grad_norm": 8.292061805725098, "learning_rate": 5.3805486284289275e-06, "loss": 0.2488, "step": 58650 }, { "epoch": 14.628428927680797, "grad_norm": 9.226009368896484, "learning_rate": 5.378054862842893e-06, "loss": 0.2889, "step": 58660 }, { "epoch": 14.630922693266832, "grad_norm": 8.59598159790039, "learning_rate": 5.375561097256858e-06, "loss": 0.3311, "step": 58670 }, { "epoch": 14.633416458852867, "grad_norm": 11.77875804901123, "learning_rate": 5.3730673316708234e-06, "loss": 0.289, "step": 58680 }, { "epoch": 14.635910224438902, "grad_norm": 8.944512367248535, "learning_rate": 5.370573566084788e-06, "loss": 0.3456, "step": 58690 }, { "epoch": 14.638403990024937, "grad_norm": 8.320801734924316, "learning_rate": 5.368079800498753e-06, "loss": 0.3284, "step": 58700 }, { "epoch": 14.640897755610972, "grad_norm": 20.068309783935547, "learning_rate": 5.3655860349127185e-06, "loss": 0.3567, "step": 58710 }, { "epoch": 14.643391521197007, "grad_norm": 13.9030122756958, "learning_rate": 5.363092269326684e-06, "loss": 0.3672, "step": 58720 }, { "epoch": 14.645885286783042, "grad_norm": 9.718421936035156, "learning_rate": 5.360598503740648e-06, "loss": 0.3213, "step": 58730 }, { "epoch": 14.648379052369076, "grad_norm": 7.2954301834106445, "learning_rate": 5.358104738154614e-06, "loss": 0.3902, "step": 58740 }, { "epoch": 14.650872817955111, "grad_norm": 7.279516696929932, "learning_rate": 5.355610972568579e-06, "loss": 0.2793, "step": 58750 }, { "epoch": 14.653366583541148, "grad_norm": 10.923696517944336, "learning_rate": 5.353117206982544e-06, "loss": 0.3365, "step": 58760 }, { "epoch": 14.655860349127183, "grad_norm": 9.616853713989258, "learning_rate": 5.3506234413965095e-06, "loss": 0.3174, "step": 58770 }, { "epoch": 14.658354114713218, "grad_norm": 9.328225135803223, "learning_rate": 5.348129675810474e-06, "loss": 0.3395, "step": 58780 }, { "epoch": 14.660847880299253, "grad_norm": 8.011582374572754, "learning_rate": 5.345635910224439e-06, "loss": 0.3506, "step": 58790 }, { "epoch": 14.663341645885287, "grad_norm": 9.812850952148438, "learning_rate": 5.343142144638405e-06, "loss": 0.2907, "step": 58800 }, { "epoch": 14.665835411471322, "grad_norm": 8.969762802124023, "learning_rate": 5.34064837905237e-06, "loss": 0.3821, "step": 58810 }, { "epoch": 14.668329177057357, "grad_norm": 8.796773910522461, "learning_rate": 5.338154613466334e-06, "loss": 0.3282, "step": 58820 }, { "epoch": 14.670822942643392, "grad_norm": 9.331019401550293, "learning_rate": 5.3356608478803e-06, "loss": 0.3352, "step": 58830 }, { "epoch": 14.673316708229427, "grad_norm": 14.473604202270508, "learning_rate": 5.333167082294265e-06, "loss": 0.2999, "step": 58840 }, { "epoch": 14.675810473815462, "grad_norm": 7.026119232177734, "learning_rate": 5.33067331670823e-06, "loss": 0.3559, "step": 58850 }, { "epoch": 14.678304239401497, "grad_norm": 7.760469436645508, "learning_rate": 5.328179551122195e-06, "loss": 0.3282, "step": 58860 }, { "epoch": 14.680798004987532, "grad_norm": 10.54293155670166, "learning_rate": 5.32568578553616e-06, "loss": 0.4874, "step": 58870 }, { "epoch": 14.683291770573566, "grad_norm": 10.247196197509766, "learning_rate": 5.323192019950125e-06, "loss": 0.3434, "step": 58880 }, { "epoch": 14.685785536159601, "grad_norm": 12.179352760314941, "learning_rate": 5.320698254364091e-06, "loss": 0.3833, "step": 58890 }, { "epoch": 14.688279301745636, "grad_norm": 8.831155776977539, "learning_rate": 5.318204488778055e-06, "loss": 0.3422, "step": 58900 }, { "epoch": 14.690773067331671, "grad_norm": 5.796462535858154, "learning_rate": 5.3157107231920205e-06, "loss": 0.3022, "step": 58910 }, { "epoch": 14.693266832917706, "grad_norm": 9.8500337600708, "learning_rate": 5.313216957605986e-06, "loss": 0.3686, "step": 58920 }, { "epoch": 14.69576059850374, "grad_norm": 9.79806137084961, "learning_rate": 5.310723192019951e-06, "loss": 0.3164, "step": 58930 }, { "epoch": 14.698254364089776, "grad_norm": 7.471320629119873, "learning_rate": 5.308229426433915e-06, "loss": 0.3621, "step": 58940 }, { "epoch": 14.70074812967581, "grad_norm": 15.296502113342285, "learning_rate": 5.305735660847881e-06, "loss": 0.3376, "step": 58950 }, { "epoch": 14.703241895261845, "grad_norm": 8.871153831481934, "learning_rate": 5.303241895261846e-06, "loss": 0.3574, "step": 58960 }, { "epoch": 14.70573566084788, "grad_norm": 9.778191566467285, "learning_rate": 5.3007481296758115e-06, "loss": 0.3363, "step": 58970 }, { "epoch": 14.708229426433915, "grad_norm": 8.317192077636719, "learning_rate": 5.298254364089775e-06, "loss": 0.2979, "step": 58980 }, { "epoch": 14.71072319201995, "grad_norm": 9.88344669342041, "learning_rate": 5.2957605985037404e-06, "loss": 0.3778, "step": 58990 }, { "epoch": 14.713216957605985, "grad_norm": 8.39840030670166, "learning_rate": 5.293266832917706e-06, "loss": 0.3757, "step": 59000 }, { "epoch": 14.71571072319202, "grad_norm": 11.54598331451416, "learning_rate": 5.290773067331672e-06, "loss": 0.3286, "step": 59010 }, { "epoch": 14.718204488778055, "grad_norm": 6.494427680969238, "learning_rate": 5.28852867830424e-06, "loss": 0.3799, "step": 59020 }, { "epoch": 14.72069825436409, "grad_norm": 9.357276916503906, "learning_rate": 5.2860349127182055e-06, "loss": 0.2867, "step": 59030 }, { "epoch": 14.723192019950124, "grad_norm": 8.344010353088379, "learning_rate": 5.28354114713217e-06, "loss": 0.3489, "step": 59040 }, { "epoch": 14.72568578553616, "grad_norm": 6.008798599243164, "learning_rate": 5.281047381546135e-06, "loss": 0.3094, "step": 59050 }, { "epoch": 14.728179551122194, "grad_norm": 10.23482894897461, "learning_rate": 5.2785536159601006e-06, "loss": 0.3203, "step": 59060 }, { "epoch": 14.730673316708229, "grad_norm": 6.847126007080078, "learning_rate": 5.276059850374066e-06, "loss": 0.25, "step": 59070 }, { "epoch": 14.733167082294264, "grad_norm": 9.797203063964844, "learning_rate": 5.2735660847880295e-06, "loss": 0.3885, "step": 59080 }, { "epoch": 14.735660847880299, "grad_norm": 8.682002067565918, "learning_rate": 5.271072319201996e-06, "loss": 0.3408, "step": 59090 }, { "epoch": 14.738154613466333, "grad_norm": 7.708632946014404, "learning_rate": 5.268578553615961e-06, "loss": 0.3084, "step": 59100 }, { "epoch": 14.740648379052368, "grad_norm": 9.106949806213379, "learning_rate": 5.266084788029926e-06, "loss": 0.2894, "step": 59110 }, { "epoch": 14.743142144638403, "grad_norm": 8.195284843444824, "learning_rate": 5.26359102244389e-06, "loss": 0.3228, "step": 59120 }, { "epoch": 14.745635910224438, "grad_norm": 10.221166610717773, "learning_rate": 5.261097256857855e-06, "loss": 0.3822, "step": 59130 }, { "epoch": 14.748129675810475, "grad_norm": 8.522336959838867, "learning_rate": 5.2586034912718205e-06, "loss": 0.3341, "step": 59140 }, { "epoch": 14.75062344139651, "grad_norm": 13.70825481414795, "learning_rate": 5.256109725685787e-06, "loss": 0.3543, "step": 59150 }, { "epoch": 14.753117206982544, "grad_norm": 7.4037184715271, "learning_rate": 5.25361596009975e-06, "loss": 0.3357, "step": 59160 }, { "epoch": 14.75561097256858, "grad_norm": 10.474981307983398, "learning_rate": 5.2511221945137156e-06, "loss": 0.3487, "step": 59170 }, { "epoch": 14.758104738154614, "grad_norm": 5.967161655426025, "learning_rate": 5.248628428927681e-06, "loss": 0.3564, "step": 59180 }, { "epoch": 14.760598503740649, "grad_norm": 7.465957164764404, "learning_rate": 5.246134663341646e-06, "loss": 0.3241, "step": 59190 }, { "epoch": 14.763092269326684, "grad_norm": 7.333925247192383, "learning_rate": 5.2436408977556115e-06, "loss": 0.2976, "step": 59200 }, { "epoch": 14.765586034912719, "grad_norm": 8.941734313964844, "learning_rate": 5.241147132169576e-06, "loss": 0.4209, "step": 59210 }, { "epoch": 14.768079800498754, "grad_norm": 7.812376976013184, "learning_rate": 5.238653366583541e-06, "loss": 0.2961, "step": 59220 }, { "epoch": 14.770573566084789, "grad_norm": 9.06418228149414, "learning_rate": 5.236159600997507e-06, "loss": 0.3249, "step": 59230 }, { "epoch": 14.773067331670823, "grad_norm": 8.520550727844238, "learning_rate": 5.233665835411472e-06, "loss": 0.3469, "step": 59240 }, { "epoch": 14.775561097256858, "grad_norm": 5.776762962341309, "learning_rate": 5.231172069825436e-06, "loss": 0.3048, "step": 59250 }, { "epoch": 14.778054862842893, "grad_norm": 13.731014251708984, "learning_rate": 5.228678304239402e-06, "loss": 0.3533, "step": 59260 }, { "epoch": 14.780548628428928, "grad_norm": 10.045135498046875, "learning_rate": 5.226184538653367e-06, "loss": 0.3062, "step": 59270 }, { "epoch": 14.783042394014963, "grad_norm": 9.857339859008789, "learning_rate": 5.223690773067332e-06, "loss": 0.2885, "step": 59280 }, { "epoch": 14.785536159600998, "grad_norm": 7.196331977844238, "learning_rate": 5.221197007481297e-06, "loss": 0.3024, "step": 59290 }, { "epoch": 14.788029925187033, "grad_norm": 8.33714485168457, "learning_rate": 5.218703241895262e-06, "loss": 0.3089, "step": 59300 }, { "epoch": 14.790523690773068, "grad_norm": 7.40169095993042, "learning_rate": 5.216209476309227e-06, "loss": 0.3327, "step": 59310 }, { "epoch": 14.793017456359102, "grad_norm": 6.180658340454102, "learning_rate": 5.213715710723193e-06, "loss": 0.3209, "step": 59320 }, { "epoch": 14.795511221945137, "grad_norm": 8.041952133178711, "learning_rate": 5.211221945137157e-06, "loss": 0.2623, "step": 59330 }, { "epoch": 14.798004987531172, "grad_norm": 7.726354598999023, "learning_rate": 5.2087281795511225e-06, "loss": 0.3279, "step": 59340 }, { "epoch": 14.800498753117207, "grad_norm": 7.853705406188965, "learning_rate": 5.206234413965088e-06, "loss": 0.3187, "step": 59350 }, { "epoch": 14.802992518703242, "grad_norm": 6.197792053222656, "learning_rate": 5.203740648379053e-06, "loss": 0.4057, "step": 59360 }, { "epoch": 14.805486284289277, "grad_norm": 8.7774658203125, "learning_rate": 5.2012468827930175e-06, "loss": 0.3922, "step": 59370 }, { "epoch": 14.807980049875312, "grad_norm": 7.630876541137695, "learning_rate": 5.198753117206983e-06, "loss": 0.3802, "step": 59380 }, { "epoch": 14.810473815461346, "grad_norm": 9.605398178100586, "learning_rate": 5.196259351620948e-06, "loss": 0.3564, "step": 59390 }, { "epoch": 14.812967581047381, "grad_norm": 11.865791320800781, "learning_rate": 5.1937655860349135e-06, "loss": 0.4148, "step": 59400 }, { "epoch": 14.815461346633416, "grad_norm": 12.097999572753906, "learning_rate": 5.191271820448879e-06, "loss": 0.3287, "step": 59410 }, { "epoch": 14.817955112219451, "grad_norm": 7.8101277351379395, "learning_rate": 5.188778054862843e-06, "loss": 0.3158, "step": 59420 }, { "epoch": 14.820448877805486, "grad_norm": 6.201448917388916, "learning_rate": 5.1862842892768085e-06, "loss": 0.3469, "step": 59430 }, { "epoch": 14.82294264339152, "grad_norm": 11.883674621582031, "learning_rate": 5.183790523690774e-06, "loss": 0.3443, "step": 59440 }, { "epoch": 14.825436408977556, "grad_norm": 10.622366905212402, "learning_rate": 5.181296758104739e-06, "loss": 0.3804, "step": 59450 }, { "epoch": 14.82793017456359, "grad_norm": 10.955135345458984, "learning_rate": 5.178802992518704e-06, "loss": 0.3149, "step": 59460 }, { "epoch": 14.830423940149625, "grad_norm": 6.3978495597839355, "learning_rate": 5.176309226932669e-06, "loss": 0.3854, "step": 59470 }, { "epoch": 14.83291770573566, "grad_norm": 5.390768527984619, "learning_rate": 5.173815461346634e-06, "loss": 0.3576, "step": 59480 }, { "epoch": 14.835411471321695, "grad_norm": 6.621118068695068, "learning_rate": 5.1713216957605996e-06, "loss": 0.328, "step": 59490 }, { "epoch": 14.83790523690773, "grad_norm": 7.241116523742676, "learning_rate": 5.168827930174564e-06, "loss": 0.3244, "step": 59500 }, { "epoch": 14.840399002493765, "grad_norm": 9.426288604736328, "learning_rate": 5.166334164588529e-06, "loss": 0.358, "step": 59510 }, { "epoch": 14.8428927680798, "grad_norm": 7.703991413116455, "learning_rate": 5.163840399002495e-06, "loss": 0.304, "step": 59520 }, { "epoch": 14.845386533665835, "grad_norm": 5.415951251983643, "learning_rate": 5.16134663341646e-06, "loss": 0.2659, "step": 59530 }, { "epoch": 14.84788029925187, "grad_norm": 10.298295021057129, "learning_rate": 5.1588528678304236e-06, "loss": 0.3267, "step": 59540 }, { "epoch": 14.850374064837904, "grad_norm": 8.1857328414917, "learning_rate": 5.156359102244389e-06, "loss": 0.4304, "step": 59550 }, { "epoch": 14.85286783042394, "grad_norm": 8.54920482635498, "learning_rate": 5.153865336658355e-06, "loss": 0.4247, "step": 59560 }, { "epoch": 14.855361596009976, "grad_norm": 6.647639751434326, "learning_rate": 5.15137157107232e-06, "loss": 0.3184, "step": 59570 }, { "epoch": 14.85785536159601, "grad_norm": 6.457409858703613, "learning_rate": 5.148877805486284e-06, "loss": 0.3595, "step": 59580 }, { "epoch": 14.860349127182046, "grad_norm": 9.045681953430176, "learning_rate": 5.146384039900249e-06, "loss": 0.3741, "step": 59590 }, { "epoch": 14.86284289276808, "grad_norm": 8.095196723937988, "learning_rate": 5.1438902743142146e-06, "loss": 0.3488, "step": 59600 }, { "epoch": 14.865336658354115, "grad_norm": 12.992335319519043, "learning_rate": 5.14139650872818e-06, "loss": 0.3655, "step": 59610 }, { "epoch": 14.86783042394015, "grad_norm": 8.386945724487305, "learning_rate": 5.138902743142144e-06, "loss": 0.3483, "step": 59620 }, { "epoch": 14.870324189526185, "grad_norm": 7.935437202453613, "learning_rate": 5.13640897755611e-06, "loss": 0.3158, "step": 59630 }, { "epoch": 14.87281795511222, "grad_norm": 6.484121322631836, "learning_rate": 5.133915211970075e-06, "loss": 0.3386, "step": 59640 }, { "epoch": 14.875311720698255, "grad_norm": 8.329977035522461, "learning_rate": 5.13142144638404e-06, "loss": 0.2833, "step": 59650 }, { "epoch": 14.87780548628429, "grad_norm": 6.874395370483398, "learning_rate": 5.128927680798006e-06, "loss": 0.3502, "step": 59660 }, { "epoch": 14.880299251870325, "grad_norm": 7.59706974029541, "learning_rate": 5.12643391521197e-06, "loss": 0.3216, "step": 59670 }, { "epoch": 14.88279301745636, "grad_norm": 7.426278591156006, "learning_rate": 5.123940149625935e-06, "loss": 0.297, "step": 59680 }, { "epoch": 14.885286783042394, "grad_norm": 10.72069263458252, "learning_rate": 5.121446384039901e-06, "loss": 0.3663, "step": 59690 }, { "epoch": 14.88778054862843, "grad_norm": 8.7599458694458, "learning_rate": 5.118952618453866e-06, "loss": 0.308, "step": 59700 }, { "epoch": 14.890274314214464, "grad_norm": 7.034759521484375, "learning_rate": 5.1164588528678304e-06, "loss": 0.2957, "step": 59710 }, { "epoch": 14.892768079800499, "grad_norm": 8.246294975280762, "learning_rate": 5.113965087281796e-06, "loss": 0.3802, "step": 59720 }, { "epoch": 14.895261845386534, "grad_norm": 7.399212837219238, "learning_rate": 5.111471321695761e-06, "loss": 0.3906, "step": 59730 }, { "epoch": 14.897755610972569, "grad_norm": 6.495842933654785, "learning_rate": 5.108977556109726e-06, "loss": 0.3517, "step": 59740 }, { "epoch": 14.900249376558603, "grad_norm": 8.81147289276123, "learning_rate": 5.106483790523691e-06, "loss": 0.307, "step": 59750 }, { "epoch": 14.902743142144638, "grad_norm": 8.447113990783691, "learning_rate": 5.103990024937656e-06, "loss": 0.3051, "step": 59760 }, { "epoch": 14.905236907730673, "grad_norm": 7.5276288986206055, "learning_rate": 5.1014962593516215e-06, "loss": 0.3735, "step": 59770 }, { "epoch": 14.907730673316708, "grad_norm": 5.61832857131958, "learning_rate": 5.099002493765587e-06, "loss": 0.2694, "step": 59780 }, { "epoch": 14.910224438902743, "grad_norm": 7.429715633392334, "learning_rate": 5.096508728179551e-06, "loss": 0.3523, "step": 59790 }, { "epoch": 14.912718204488778, "grad_norm": 6.293092727661133, "learning_rate": 5.0940149625935165e-06, "loss": 0.2826, "step": 59800 }, { "epoch": 14.915211970074813, "grad_norm": 8.379650115966797, "learning_rate": 5.091521197007482e-06, "loss": 0.2812, "step": 59810 }, { "epoch": 14.917705735660848, "grad_norm": 8.945039749145508, "learning_rate": 5.089027431421447e-06, "loss": 0.393, "step": 59820 }, { "epoch": 14.920199501246882, "grad_norm": 5.737639904022217, "learning_rate": 5.086533665835412e-06, "loss": 0.3738, "step": 59830 }, { "epoch": 14.922693266832917, "grad_norm": 10.540114402770996, "learning_rate": 5.084039900249377e-06, "loss": 0.3052, "step": 59840 }, { "epoch": 14.925187032418952, "grad_norm": 7.083829879760742, "learning_rate": 5.081546134663342e-06, "loss": 0.3163, "step": 59850 }, { "epoch": 14.927680798004987, "grad_norm": 9.188494682312012, "learning_rate": 5.0790523690773075e-06, "loss": 0.3344, "step": 59860 }, { "epoch": 14.930174563591022, "grad_norm": 5.866366386413574, "learning_rate": 5.076558603491272e-06, "loss": 0.3339, "step": 59870 }, { "epoch": 14.932668329177057, "grad_norm": 7.339391708374023, "learning_rate": 5.074064837905237e-06, "loss": 0.312, "step": 59880 }, { "epoch": 14.935162094763092, "grad_norm": 8.920500755310059, "learning_rate": 5.071571072319203e-06, "loss": 0.338, "step": 59890 }, { "epoch": 14.937655860349127, "grad_norm": 11.074267387390137, "learning_rate": 5.069077306733168e-06, "loss": 0.3554, "step": 59900 }, { "epoch": 14.940149625935161, "grad_norm": 7.53389310836792, "learning_rate": 5.066583541147133e-06, "loss": 0.3171, "step": 59910 }, { "epoch": 14.942643391521196, "grad_norm": 5.2787346839904785, "learning_rate": 5.064089775561098e-06, "loss": 0.3658, "step": 59920 }, { "epoch": 14.945137157107231, "grad_norm": 10.729007720947266, "learning_rate": 5.061596009975063e-06, "loss": 0.3204, "step": 59930 }, { "epoch": 14.947630922693268, "grad_norm": 4.510223865509033, "learning_rate": 5.059102244389028e-06, "loss": 0.3533, "step": 59940 }, { "epoch": 14.950124688279303, "grad_norm": 8.356785774230957, "learning_rate": 5.056608478802994e-06, "loss": 0.3236, "step": 59950 }, { "epoch": 14.952618453865338, "grad_norm": 8.854043006896973, "learning_rate": 5.054114713216958e-06, "loss": 0.3763, "step": 59960 }, { "epoch": 14.955112219451372, "grad_norm": 7.022368907928467, "learning_rate": 5.051620947630923e-06, "loss": 0.3308, "step": 59970 }, { "epoch": 14.957605985037407, "grad_norm": 7.40346622467041, "learning_rate": 5.049127182044889e-06, "loss": 0.2865, "step": 59980 }, { "epoch": 14.960099750623442, "grad_norm": 6.403690338134766, "learning_rate": 5.046633416458854e-06, "loss": 0.3002, "step": 59990 }, { "epoch": 14.962593516209477, "grad_norm": 4.785618782043457, "learning_rate": 5.044139650872818e-06, "loss": 0.3558, "step": 60000 }, { "epoch": 14.965087281795512, "grad_norm": 8.796896934509277, "learning_rate": 5.041645885286783e-06, "loss": 0.3605, "step": 60010 }, { "epoch": 14.967581047381547, "grad_norm": 8.200934410095215, "learning_rate": 5.039152119700749e-06, "loss": 0.3184, "step": 60020 }, { "epoch": 14.970074812967582, "grad_norm": 9.922554016113281, "learning_rate": 5.036658354114714e-06, "loss": 0.3724, "step": 60030 }, { "epoch": 14.972568578553616, "grad_norm": 8.850462913513184, "learning_rate": 5.034164588528678e-06, "loss": 0.3298, "step": 60040 }, { "epoch": 14.975062344139651, "grad_norm": 4.286106586456299, "learning_rate": 5.031670822942643e-06, "loss": 0.3485, "step": 60050 }, { "epoch": 14.977556109725686, "grad_norm": 8.860762596130371, "learning_rate": 5.029177057356609e-06, "loss": 0.3658, "step": 60060 }, { "epoch": 14.980049875311721, "grad_norm": 11.523205757141113, "learning_rate": 5.026683291770574e-06, "loss": 0.3549, "step": 60070 }, { "epoch": 14.982543640897756, "grad_norm": 7.7817277908325195, "learning_rate": 5.0241895261845384e-06, "loss": 0.287, "step": 60080 }, { "epoch": 14.98503740648379, "grad_norm": 9.745018005371094, "learning_rate": 5.021695760598504e-06, "loss": 0.3697, "step": 60090 }, { "epoch": 14.987531172069826, "grad_norm": 7.144688606262207, "learning_rate": 5.019201995012469e-06, "loss": 0.3164, "step": 60100 }, { "epoch": 14.99002493765586, "grad_norm": 7.69403600692749, "learning_rate": 5.016708229426434e-06, "loss": 0.3714, "step": 60110 }, { "epoch": 14.992518703241895, "grad_norm": 7.915139198303223, "learning_rate": 5.014214463840399e-06, "loss": 0.3015, "step": 60120 }, { "epoch": 14.99501246882793, "grad_norm": 7.1008381843566895, "learning_rate": 5.011720698254364e-06, "loss": 0.314, "step": 60130 }, { "epoch": 14.997506234413965, "grad_norm": 9.205920219421387, "learning_rate": 5.0092269326683294e-06, "loss": 0.3682, "step": 60140 }, { "epoch": 15.0, "grad_norm": 5.531941890716553, "learning_rate": 5.006733167082295e-06, "loss": 0.2957, "step": 60150 }, { "epoch": 15.0, "eval_loss": 0.4161190390586853, "eval_runtime": 60.2534, "eval_samples_per_second": 16.646, "eval_steps_per_second": 16.646, "step": 60150 }, { "epoch": 15.002493765586035, "grad_norm": 5.979081153869629, "learning_rate": 5.00423940149626e-06, "loss": 0.3142, "step": 60160 }, { "epoch": 15.00498753117207, "grad_norm": 14.604164123535156, "learning_rate": 5.0017456359102245e-06, "loss": 0.3561, "step": 60170 }, { "epoch": 15.007481296758105, "grad_norm": 8.22153377532959, "learning_rate": 4.99925187032419e-06, "loss": 0.379, "step": 60180 }, { "epoch": 15.00997506234414, "grad_norm": 8.119734764099121, "learning_rate": 4.996758104738155e-06, "loss": 0.2916, "step": 60190 }, { "epoch": 15.012468827930174, "grad_norm": 8.341341972351074, "learning_rate": 4.9942643391521205e-06, "loss": 0.345, "step": 60200 }, { "epoch": 15.01496259351621, "grad_norm": 9.086451530456543, "learning_rate": 4.991770573566086e-06, "loss": 0.3281, "step": 60210 }, { "epoch": 15.017456359102244, "grad_norm": 6.594851970672607, "learning_rate": 4.98927680798005e-06, "loss": 0.3928, "step": 60220 }, { "epoch": 15.019950124688279, "grad_norm": 4.85838508605957, "learning_rate": 4.9867830423940155e-06, "loss": 0.2773, "step": 60230 }, { "epoch": 15.022443890274314, "grad_norm": 6.930510520935059, "learning_rate": 4.98428927680798e-06, "loss": 0.3527, "step": 60240 }, { "epoch": 15.024937655860349, "grad_norm": 10.528151512145996, "learning_rate": 4.981795511221945e-06, "loss": 0.3468, "step": 60250 }, { "epoch": 15.027431421446384, "grad_norm": 13.538934707641602, "learning_rate": 4.979301745635911e-06, "loss": 0.3329, "step": 60260 }, { "epoch": 15.029925187032418, "grad_norm": 9.08583927154541, "learning_rate": 4.976807980049876e-06, "loss": 0.3067, "step": 60270 }, { "epoch": 15.032418952618453, "grad_norm": 6.1941423416137695, "learning_rate": 4.97431421446384e-06, "loss": 0.3442, "step": 60280 }, { "epoch": 15.034912718204488, "grad_norm": 8.902884483337402, "learning_rate": 4.971820448877806e-06, "loss": 0.2784, "step": 60290 }, { "epoch": 15.037406483790523, "grad_norm": 8.3753023147583, "learning_rate": 4.969326683291771e-06, "loss": 0.3317, "step": 60300 }, { "epoch": 15.039900249376558, "grad_norm": 9.228056907653809, "learning_rate": 4.966832917705736e-06, "loss": 0.3551, "step": 60310 }, { "epoch": 15.042394014962593, "grad_norm": 8.301530838012695, "learning_rate": 4.964339152119701e-06, "loss": 0.4634, "step": 60320 }, { "epoch": 15.044887780548628, "grad_norm": 7.827813625335693, "learning_rate": 4.961845386533666e-06, "loss": 0.336, "step": 60330 }, { "epoch": 15.047381546134662, "grad_norm": 8.87314510345459, "learning_rate": 4.959351620947631e-06, "loss": 0.3343, "step": 60340 }, { "epoch": 15.049875311720697, "grad_norm": 5.845686912536621, "learning_rate": 4.956857855361597e-06, "loss": 0.3175, "step": 60350 }, { "epoch": 15.052369077306734, "grad_norm": 7.405905246734619, "learning_rate": 4.954364089775561e-06, "loss": 0.354, "step": 60360 }, { "epoch": 15.054862842892769, "grad_norm": 6.437865734100342, "learning_rate": 4.9518703241895265e-06, "loss": 0.3171, "step": 60370 }, { "epoch": 15.057356608478804, "grad_norm": 10.79374885559082, "learning_rate": 4.949376558603492e-06, "loss": 0.317, "step": 60380 }, { "epoch": 15.059850374064839, "grad_norm": 8.71488094329834, "learning_rate": 4.946882793017457e-06, "loss": 0.3546, "step": 60390 }, { "epoch": 15.062344139650873, "grad_norm": 7.864600658416748, "learning_rate": 4.944389027431422e-06, "loss": 0.4108, "step": 60400 }, { "epoch": 15.064837905236908, "grad_norm": 6.649560928344727, "learning_rate": 4.941895261845387e-06, "loss": 0.3197, "step": 60410 }, { "epoch": 15.067331670822943, "grad_norm": 9.850674629211426, "learning_rate": 4.939401496259352e-06, "loss": 0.3384, "step": 60420 }, { "epoch": 15.069825436408978, "grad_norm": 16.706798553466797, "learning_rate": 4.9369077306733175e-06, "loss": 0.3184, "step": 60430 }, { "epoch": 15.072319201995013, "grad_norm": 8.220061302185059, "learning_rate": 4.934413965087283e-06, "loss": 0.308, "step": 60440 }, { "epoch": 15.074812967581048, "grad_norm": 6.942609786987305, "learning_rate": 4.931920199501247e-06, "loss": 0.3105, "step": 60450 }, { "epoch": 15.077306733167083, "grad_norm": 8.182219505310059, "learning_rate": 4.9294264339152126e-06, "loss": 0.2868, "step": 60460 }, { "epoch": 15.079800498753118, "grad_norm": 7.964657783508301, "learning_rate": 4.926932668329177e-06, "loss": 0.3713, "step": 60470 }, { "epoch": 15.082294264339152, "grad_norm": 6.614937782287598, "learning_rate": 4.924438902743142e-06, "loss": 0.3498, "step": 60480 }, { "epoch": 15.084788029925187, "grad_norm": 9.722759246826172, "learning_rate": 4.921945137157108e-06, "loss": 0.3951, "step": 60490 }, { "epoch": 15.087281795511222, "grad_norm": 7.471789836883545, "learning_rate": 4.919451371571073e-06, "loss": 0.289, "step": 60500 }, { "epoch": 15.089775561097257, "grad_norm": 8.353428840637207, "learning_rate": 4.9169576059850374e-06, "loss": 0.3236, "step": 60510 }, { "epoch": 15.092269326683292, "grad_norm": 6.462151527404785, "learning_rate": 4.914463840399003e-06, "loss": 0.3103, "step": 60520 }, { "epoch": 15.094763092269327, "grad_norm": 10.315178871154785, "learning_rate": 4.911970074812968e-06, "loss": 0.3587, "step": 60530 }, { "epoch": 15.097256857855362, "grad_norm": 7.671100616455078, "learning_rate": 4.909476309226933e-06, "loss": 0.3657, "step": 60540 }, { "epoch": 15.099750623441397, "grad_norm": 8.20209789276123, "learning_rate": 4.906982543640898e-06, "loss": 0.3923, "step": 60550 }, { "epoch": 15.102244389027431, "grad_norm": 5.816895008087158, "learning_rate": 4.904488778054863e-06, "loss": 0.3362, "step": 60560 }, { "epoch": 15.104738154613466, "grad_norm": 6.219974994659424, "learning_rate": 4.9019950124688284e-06, "loss": 0.4108, "step": 60570 }, { "epoch": 15.107231920199501, "grad_norm": 8.702142715454102, "learning_rate": 4.899501246882794e-06, "loss": 0.3341, "step": 60580 }, { "epoch": 15.109725685785536, "grad_norm": 7.942946434020996, "learning_rate": 4.897007481296758e-06, "loss": 0.298, "step": 60590 }, { "epoch": 15.11221945137157, "grad_norm": 14.992578506469727, "learning_rate": 4.8945137157107235e-06, "loss": 0.3463, "step": 60600 }, { "epoch": 15.114713216957606, "grad_norm": 8.40830135345459, "learning_rate": 4.892019950124689e-06, "loss": 0.3821, "step": 60610 }, { "epoch": 15.11720698254364, "grad_norm": 9.050780296325684, "learning_rate": 4.889526184538654e-06, "loss": 0.3323, "step": 60620 }, { "epoch": 15.119700748129675, "grad_norm": 3.6410436630249023, "learning_rate": 4.887032418952619e-06, "loss": 0.321, "step": 60630 }, { "epoch": 15.12219451371571, "grad_norm": 10.186809539794922, "learning_rate": 4.884538653366584e-06, "loss": 0.3119, "step": 60640 }, { "epoch": 15.124688279301745, "grad_norm": 9.280686378479004, "learning_rate": 4.882044887780549e-06, "loss": 0.2915, "step": 60650 }, { "epoch": 15.12718204488778, "grad_norm": 10.575470924377441, "learning_rate": 4.879551122194514e-06, "loss": 0.3587, "step": 60660 }, { "epoch": 15.129675810473815, "grad_norm": 10.077107429504395, "learning_rate": 4.87705735660848e-06, "loss": 0.4142, "step": 60670 }, { "epoch": 15.13216957605985, "grad_norm": 8.206668853759766, "learning_rate": 4.874563591022444e-06, "loss": 0.3306, "step": 60680 }, { "epoch": 15.134663341645885, "grad_norm": 6.796830654144287, "learning_rate": 4.87206982543641e-06, "loss": 0.3437, "step": 60690 }, { "epoch": 15.13715710723192, "grad_norm": 8.173311233520508, "learning_rate": 4.869576059850374e-06, "loss": 0.3, "step": 60700 }, { "epoch": 15.139650872817954, "grad_norm": 11.791805267333984, "learning_rate": 4.867082294264339e-06, "loss": 0.2989, "step": 60710 }, { "epoch": 15.14214463840399, "grad_norm": 8.304753303527832, "learning_rate": 4.864588528678305e-06, "loss": 0.3687, "step": 60720 }, { "epoch": 15.144638403990024, "grad_norm": 6.197001934051514, "learning_rate": 4.86209476309227e-06, "loss": 0.3366, "step": 60730 }, { "epoch": 15.147132169576059, "grad_norm": 11.391712188720703, "learning_rate": 4.8596009975062345e-06, "loss": 0.3422, "step": 60740 }, { "epoch": 15.149625935162096, "grad_norm": 5.738236904144287, "learning_rate": 4.8571072319202e-06, "loss": 0.3359, "step": 60750 }, { "epoch": 15.15211970074813, "grad_norm": 9.785381317138672, "learning_rate": 4.854613466334165e-06, "loss": 0.3786, "step": 60760 }, { "epoch": 15.154613466334165, "grad_norm": 6.396745204925537, "learning_rate": 4.85211970074813e-06, "loss": 0.2676, "step": 60770 }, { "epoch": 15.1571072319202, "grad_norm": 7.886439800262451, "learning_rate": 4.849625935162095e-06, "loss": 0.3038, "step": 60780 }, { "epoch": 15.159600997506235, "grad_norm": 11.808472633361816, "learning_rate": 4.84713216957606e-06, "loss": 0.3218, "step": 60790 }, { "epoch": 15.16209476309227, "grad_norm": 13.33260440826416, "learning_rate": 4.8446384039900255e-06, "loss": 0.3795, "step": 60800 }, { "epoch": 15.164588528678305, "grad_norm": 6.950128078460693, "learning_rate": 4.842144638403991e-06, "loss": 0.3344, "step": 60810 }, { "epoch": 15.16708229426434, "grad_norm": 10.497044563293457, "learning_rate": 4.839650872817955e-06, "loss": 0.3543, "step": 60820 }, { "epoch": 15.169576059850375, "grad_norm": 10.250978469848633, "learning_rate": 4.8371571072319206e-06, "loss": 0.3404, "step": 60830 }, { "epoch": 15.17206982543641, "grad_norm": 9.228592872619629, "learning_rate": 4.834663341645886e-06, "loss": 0.3418, "step": 60840 }, { "epoch": 15.174563591022444, "grad_norm": 9.230294227600098, "learning_rate": 4.832169576059851e-06, "loss": 0.3317, "step": 60850 }, { "epoch": 15.17705735660848, "grad_norm": 6.274941921234131, "learning_rate": 4.829675810473816e-06, "loss": 0.3361, "step": 60860 }, { "epoch": 15.179551122194514, "grad_norm": 7.867391586303711, "learning_rate": 4.827182044887781e-06, "loss": 0.3096, "step": 60870 }, { "epoch": 15.182044887780549, "grad_norm": 7.890642166137695, "learning_rate": 4.824688279301745e-06, "loss": 0.3399, "step": 60880 }, { "epoch": 15.184538653366584, "grad_norm": 9.05803394317627, "learning_rate": 4.822194513715711e-06, "loss": 0.3148, "step": 60890 }, { "epoch": 15.187032418952619, "grad_norm": 5.658694744110107, "learning_rate": 4.819700748129677e-06, "loss": 0.3686, "step": 60900 }, { "epoch": 15.189526184538654, "grad_norm": 7.717238426208496, "learning_rate": 4.817206982543641e-06, "loss": 0.3431, "step": 60910 }, { "epoch": 15.192019950124688, "grad_norm": 9.4716215133667, "learning_rate": 4.814713216957607e-06, "loss": 0.3377, "step": 60920 }, { "epoch": 15.194513715710723, "grad_norm": 6.269632816314697, "learning_rate": 4.812219451371571e-06, "loss": 0.3173, "step": 60930 }, { "epoch": 15.197007481296758, "grad_norm": 4.857752799987793, "learning_rate": 4.8097256857855364e-06, "loss": 0.2644, "step": 60940 }, { "epoch": 15.199501246882793, "grad_norm": 4.956846714019775, "learning_rate": 4.807231920199502e-06, "loss": 0.4057, "step": 60950 }, { "epoch": 15.201995012468828, "grad_norm": 11.810905456542969, "learning_rate": 4.804738154613467e-06, "loss": 0.3344, "step": 60960 }, { "epoch": 15.204488778054863, "grad_norm": 10.063115119934082, "learning_rate": 4.8022443890274315e-06, "loss": 0.356, "step": 60970 }, { "epoch": 15.206982543640898, "grad_norm": 8.853325843811035, "learning_rate": 4.799750623441397e-06, "loss": 0.2966, "step": 60980 }, { "epoch": 15.209476309226932, "grad_norm": 7.260303974151611, "learning_rate": 4.797256857855362e-06, "loss": 0.2996, "step": 60990 }, { "epoch": 15.211970074812967, "grad_norm": 9.30881118774414, "learning_rate": 4.7947630922693274e-06, "loss": 0.3598, "step": 61000 }, { "epoch": 15.214463840399002, "grad_norm": 8.756742477416992, "learning_rate": 4.792269326683292e-06, "loss": 0.2864, "step": 61010 }, { "epoch": 15.216957605985037, "grad_norm": 6.806258201599121, "learning_rate": 4.789775561097257e-06, "loss": 0.3255, "step": 61020 }, { "epoch": 15.219451371571072, "grad_norm": 7.81227445602417, "learning_rate": 4.7872817955112225e-06, "loss": 0.42, "step": 61030 }, { "epoch": 15.221945137157107, "grad_norm": 8.680373191833496, "learning_rate": 4.784788029925188e-06, "loss": 0.3207, "step": 61040 }, { "epoch": 15.224438902743142, "grad_norm": 7.925756454467773, "learning_rate": 4.782294264339152e-06, "loss": 0.3674, "step": 61050 }, { "epoch": 15.226932668329177, "grad_norm": 9.870626449584961, "learning_rate": 4.779800498753118e-06, "loss": 0.3326, "step": 61060 }, { "epoch": 15.229426433915211, "grad_norm": 6.636346340179443, "learning_rate": 4.777306733167082e-06, "loss": 0.2297, "step": 61070 }, { "epoch": 15.231920199501246, "grad_norm": 7.3385009765625, "learning_rate": 4.774812967581048e-06, "loss": 0.3006, "step": 61080 }, { "epoch": 15.234413965087281, "grad_norm": 7.8916215896606445, "learning_rate": 4.772319201995013e-06, "loss": 0.3171, "step": 61090 }, { "epoch": 15.236907730673316, "grad_norm": 7.509432792663574, "learning_rate": 4.769825436408978e-06, "loss": 0.3321, "step": 61100 }, { "epoch": 15.239401496259351, "grad_norm": 5.825787544250488, "learning_rate": 4.7673316708229425e-06, "loss": 0.3242, "step": 61110 }, { "epoch": 15.241895261845386, "grad_norm": 43.301937103271484, "learning_rate": 4.764837905236908e-06, "loss": 0.3342, "step": 61120 }, { "epoch": 15.24438902743142, "grad_norm": 5.396152019500732, "learning_rate": 4.762344139650873e-06, "loss": 0.3322, "step": 61130 }, { "epoch": 15.246882793017456, "grad_norm": 7.503886699676514, "learning_rate": 4.759850374064838e-06, "loss": 0.3069, "step": 61140 }, { "epoch": 15.24937655860349, "grad_norm": 9.121578216552734, "learning_rate": 4.757356608478804e-06, "loss": 0.3596, "step": 61150 }, { "epoch": 15.251870324189527, "grad_norm": 8.742789268493652, "learning_rate": 4.754862842892768e-06, "loss": 0.2804, "step": 61160 }, { "epoch": 15.254364089775562, "grad_norm": 7.584442138671875, "learning_rate": 4.7523690773067335e-06, "loss": 0.2849, "step": 61170 }, { "epoch": 15.256857855361597, "grad_norm": 6.909498691558838, "learning_rate": 4.749875311720699e-06, "loss": 0.2826, "step": 61180 }, { "epoch": 15.259351620947632, "grad_norm": 6.300384044647217, "learning_rate": 4.747381546134664e-06, "loss": 0.3083, "step": 61190 }, { "epoch": 15.261845386533667, "grad_norm": 7.162856101989746, "learning_rate": 4.7448877805486286e-06, "loss": 0.2551, "step": 61200 }, { "epoch": 15.264339152119701, "grad_norm": 8.662135124206543, "learning_rate": 4.742394014962594e-06, "loss": 0.3813, "step": 61210 }, { "epoch": 15.266832917705736, "grad_norm": 7.602468013763428, "learning_rate": 4.739900249376559e-06, "loss": 0.3187, "step": 61220 }, { "epoch": 15.269326683291771, "grad_norm": 8.263647079467773, "learning_rate": 4.7374064837905245e-06, "loss": 0.3223, "step": 61230 }, { "epoch": 15.271820448877806, "grad_norm": 8.883642196655273, "learning_rate": 4.734912718204489e-06, "loss": 0.4101, "step": 61240 }, { "epoch": 15.27431421446384, "grad_norm": 13.972535133361816, "learning_rate": 4.732418952618454e-06, "loss": 0.3395, "step": 61250 }, { "epoch": 15.276807980049876, "grad_norm": 12.901040077209473, "learning_rate": 4.7299251870324196e-06, "loss": 0.3364, "step": 61260 }, { "epoch": 15.27930174563591, "grad_norm": 9.556396484375, "learning_rate": 4.727431421446385e-06, "loss": 0.3784, "step": 61270 }, { "epoch": 15.281795511221945, "grad_norm": 9.603511810302734, "learning_rate": 4.724937655860349e-06, "loss": 0.321, "step": 61280 }, { "epoch": 15.28428927680798, "grad_norm": 12.59118366241455, "learning_rate": 4.722443890274315e-06, "loss": 0.3617, "step": 61290 }, { "epoch": 15.286783042394015, "grad_norm": 7.6207194328308105, "learning_rate": 4.719950124688279e-06, "loss": 0.2976, "step": 61300 }, { "epoch": 15.28927680798005, "grad_norm": 10.782099723815918, "learning_rate": 4.717456359102245e-06, "loss": 0.2937, "step": 61310 }, { "epoch": 15.291770573566085, "grad_norm": 7.862745761871338, "learning_rate": 4.71496259351621e-06, "loss": 0.2923, "step": 61320 }, { "epoch": 15.29426433915212, "grad_norm": 10.322379112243652, "learning_rate": 4.712468827930175e-06, "loss": 0.3436, "step": 61330 }, { "epoch": 15.296758104738155, "grad_norm": 6.147068500518799, "learning_rate": 4.7099750623441395e-06, "loss": 0.3373, "step": 61340 }, { "epoch": 15.29925187032419, "grad_norm": 7.12548303604126, "learning_rate": 4.707481296758105e-06, "loss": 0.3147, "step": 61350 }, { "epoch": 15.301745635910224, "grad_norm": 10.481398582458496, "learning_rate": 4.70498753117207e-06, "loss": 0.3268, "step": 61360 }, { "epoch": 15.30423940149626, "grad_norm": 8.344328880310059, "learning_rate": 4.7024937655860354e-06, "loss": 0.3231, "step": 61370 }, { "epoch": 15.306733167082294, "grad_norm": 5.839080333709717, "learning_rate": 4.7e-06, "loss": 0.2777, "step": 61380 }, { "epoch": 15.309226932668329, "grad_norm": 14.974725723266602, "learning_rate": 4.697506234413965e-06, "loss": 0.3513, "step": 61390 }, { "epoch": 15.311720698254364, "grad_norm": 6.981603145599365, "learning_rate": 4.6950124688279305e-06, "loss": 0.3106, "step": 61400 }, { "epoch": 15.314214463840399, "grad_norm": 10.228157043457031, "learning_rate": 4.692518703241896e-06, "loss": 0.3035, "step": 61410 }, { "epoch": 15.316708229426434, "grad_norm": 10.667278289794922, "learning_rate": 4.690024937655861e-06, "loss": 0.331, "step": 61420 }, { "epoch": 15.319201995012468, "grad_norm": 7.606900215148926, "learning_rate": 4.687531172069826e-06, "loss": 0.2967, "step": 61430 }, { "epoch": 15.321695760598503, "grad_norm": 8.036834716796875, "learning_rate": 4.685037406483791e-06, "loss": 0.3349, "step": 61440 }, { "epoch": 15.324189526184538, "grad_norm": 7.838779449462891, "learning_rate": 4.682543640897756e-06, "loss": 0.3439, "step": 61450 }, { "epoch": 15.326683291770573, "grad_norm": 13.692665100097656, "learning_rate": 4.6800498753117215e-06, "loss": 0.3764, "step": 61460 }, { "epoch": 15.329177057356608, "grad_norm": 9.359086036682129, "learning_rate": 4.677556109725686e-06, "loss": 0.3118, "step": 61470 }, { "epoch": 15.331670822942643, "grad_norm": 7.252995014190674, "learning_rate": 4.675062344139651e-06, "loss": 0.311, "step": 61480 }, { "epoch": 15.334164588528678, "grad_norm": 7.640556812286377, "learning_rate": 4.672568578553617e-06, "loss": 0.3451, "step": 61490 }, { "epoch": 15.336658354114713, "grad_norm": 8.669981956481934, "learning_rate": 4.670074812967582e-06, "loss": 0.3612, "step": 61500 }, { "epoch": 15.339152119700747, "grad_norm": 12.157310485839844, "learning_rate": 4.667581047381546e-06, "loss": 0.3596, "step": 61510 }, { "epoch": 15.341645885286782, "grad_norm": 7.998473644256592, "learning_rate": 4.665087281795512e-06, "loss": 0.3214, "step": 61520 }, { "epoch": 15.344139650872817, "grad_norm": 11.854910850524902, "learning_rate": 4.662593516209476e-06, "loss": 0.3224, "step": 61530 }, { "epoch": 15.346633416458852, "grad_norm": 8.815274238586426, "learning_rate": 4.660099750623442e-06, "loss": 0.317, "step": 61540 }, { "epoch": 15.349127182044889, "grad_norm": 6.209719181060791, "learning_rate": 4.657605985037407e-06, "loss": 0.3578, "step": 61550 }, { "epoch": 15.351620947630924, "grad_norm": 10.795744895935059, "learning_rate": 4.655112219451372e-06, "loss": 0.2813, "step": 61560 }, { "epoch": 15.354114713216958, "grad_norm": 8.15768814086914, "learning_rate": 4.6526184538653365e-06, "loss": 0.3336, "step": 61570 }, { "epoch": 15.356608478802993, "grad_norm": 8.229902267456055, "learning_rate": 4.650124688279302e-06, "loss": 0.291, "step": 61580 }, { "epoch": 15.359102244389028, "grad_norm": 8.55030632019043, "learning_rate": 4.647630922693267e-06, "loss": 0.2709, "step": 61590 }, { "epoch": 15.361596009975063, "grad_norm": 6.729324817657471, "learning_rate": 4.6451371571072325e-06, "loss": 0.3014, "step": 61600 }, { "epoch": 15.364089775561098, "grad_norm": 9.249410629272461, "learning_rate": 4.642643391521197e-06, "loss": 0.3409, "step": 61610 }, { "epoch": 15.366583541147133, "grad_norm": 8.171239852905273, "learning_rate": 4.640149625935162e-06, "loss": 0.2956, "step": 61620 }, { "epoch": 15.369077306733168, "grad_norm": 8.610503196716309, "learning_rate": 4.6376558603491276e-06, "loss": 0.2898, "step": 61630 }, { "epoch": 15.371571072319203, "grad_norm": 6.898585796356201, "learning_rate": 4.635162094763093e-06, "loss": 0.3064, "step": 61640 }, { "epoch": 15.374064837905237, "grad_norm": 10.55229377746582, "learning_rate": 4.632668329177058e-06, "loss": 0.3469, "step": 61650 }, { "epoch": 15.376558603491272, "grad_norm": 13.5460786819458, "learning_rate": 4.630174563591023e-06, "loss": 0.3784, "step": 61660 }, { "epoch": 15.379052369077307, "grad_norm": 6.476283073425293, "learning_rate": 4.627680798004988e-06, "loss": 0.2643, "step": 61670 }, { "epoch": 15.381546134663342, "grad_norm": 13.659258842468262, "learning_rate": 4.625187032418953e-06, "loss": 0.4079, "step": 61680 }, { "epoch": 15.384039900249377, "grad_norm": 10.668188095092773, "learning_rate": 4.6226932668329186e-06, "loss": 0.3074, "step": 61690 }, { "epoch": 15.386533665835412, "grad_norm": 8.238740921020508, "learning_rate": 4.620199501246883e-06, "loss": 0.304, "step": 61700 }, { "epoch": 15.389027431421447, "grad_norm": 7.179644584655762, "learning_rate": 4.617705735660848e-06, "loss": 0.3141, "step": 61710 }, { "epoch": 15.391521197007481, "grad_norm": 6.697994709014893, "learning_rate": 4.615211970074814e-06, "loss": 0.3026, "step": 61720 }, { "epoch": 15.394014962593516, "grad_norm": 8.70664119720459, "learning_rate": 4.612718204488779e-06, "loss": 0.3417, "step": 61730 }, { "epoch": 15.396508728179551, "grad_norm": 6.116442680358887, "learning_rate": 4.610224438902743e-06, "loss": 0.3057, "step": 61740 }, { "epoch": 15.399002493765586, "grad_norm": 7.209672927856445, "learning_rate": 4.607730673316709e-06, "loss": 0.2968, "step": 61750 }, { "epoch": 15.401496259351621, "grad_norm": 9.802582740783691, "learning_rate": 4.605236907730673e-06, "loss": 0.2957, "step": 61760 }, { "epoch": 15.403990024937656, "grad_norm": 15.796855926513672, "learning_rate": 4.6027431421446385e-06, "loss": 0.4296, "step": 61770 }, { "epoch": 15.40648379052369, "grad_norm": 6.593606948852539, "learning_rate": 4.600249376558604e-06, "loss": 0.3552, "step": 61780 }, { "epoch": 15.408977556109726, "grad_norm": 6.938211441040039, "learning_rate": 4.597755610972569e-06, "loss": 0.3235, "step": 61790 }, { "epoch": 15.41147132169576, "grad_norm": 9.084220886230469, "learning_rate": 4.595261845386534e-06, "loss": 0.3951, "step": 61800 }, { "epoch": 15.413965087281795, "grad_norm": 8.550397872924805, "learning_rate": 4.592768079800499e-06, "loss": 0.3256, "step": 61810 }, { "epoch": 15.41645885286783, "grad_norm": 8.361339569091797, "learning_rate": 4.590274314214464e-06, "loss": 0.3312, "step": 61820 }, { "epoch": 15.418952618453865, "grad_norm": 6.701113700866699, "learning_rate": 4.5877805486284295e-06, "loss": 0.2941, "step": 61830 }, { "epoch": 15.4214463840399, "grad_norm": 14.242850303649902, "learning_rate": 4.585286783042394e-06, "loss": 0.3327, "step": 61840 }, { "epoch": 15.423940149625935, "grad_norm": 8.893102645874023, "learning_rate": 4.582793017456359e-06, "loss": 0.3021, "step": 61850 }, { "epoch": 15.42643391521197, "grad_norm": 9.866777420043945, "learning_rate": 4.580299251870325e-06, "loss": 0.3488, "step": 61860 }, { "epoch": 15.428927680798004, "grad_norm": 9.676253318786621, "learning_rate": 4.57780548628429e-06, "loss": 0.3901, "step": 61870 }, { "epoch": 15.43142144638404, "grad_norm": 6.8704657554626465, "learning_rate": 4.575311720698254e-06, "loss": 0.3065, "step": 61880 }, { "epoch": 15.433915211970074, "grad_norm": 8.320914268493652, "learning_rate": 4.57281795511222e-06, "loss": 0.3204, "step": 61890 }, { "epoch": 15.436408977556109, "grad_norm": 9.73550033569336, "learning_rate": 4.570324189526185e-06, "loss": 0.3785, "step": 61900 }, { "epoch": 15.438902743142144, "grad_norm": 7.0275139808654785, "learning_rate": 4.56783042394015e-06, "loss": 0.3277, "step": 61910 }, { "epoch": 15.441396508728179, "grad_norm": 8.163436889648438, "learning_rate": 4.565336658354116e-06, "loss": 0.3182, "step": 61920 }, { "epoch": 15.443890274314214, "grad_norm": 7.171289920806885, "learning_rate": 4.56284289276808e-06, "loss": 0.3612, "step": 61930 }, { "epoch": 15.446384039900249, "grad_norm": 11.349479675292969, "learning_rate": 4.560349127182045e-06, "loss": 0.3601, "step": 61940 }, { "epoch": 15.448877805486283, "grad_norm": 5.093303680419922, "learning_rate": 4.557855361596011e-06, "loss": 0.3162, "step": 61950 }, { "epoch": 15.451371571072318, "grad_norm": 8.058874130249023, "learning_rate": 4.555361596009976e-06, "loss": 0.3508, "step": 61960 }, { "epoch": 15.453865336658355, "grad_norm": 7.41152286529541, "learning_rate": 4.5528678304239405e-06, "loss": 0.256, "step": 61970 }, { "epoch": 15.45635910224439, "grad_norm": 5.370427131652832, "learning_rate": 4.550374064837906e-06, "loss": 0.3207, "step": 61980 }, { "epoch": 15.458852867830425, "grad_norm": 9.166321754455566, "learning_rate": 4.54788029925187e-06, "loss": 0.3825, "step": 61990 }, { "epoch": 15.46134663341646, "grad_norm": 11.046178817749023, "learning_rate": 4.5453865336658355e-06, "loss": 0.3506, "step": 62000 }, { "epoch": 15.463840399002494, "grad_norm": 10.814038276672363, "learning_rate": 4.542892768079801e-06, "loss": 0.3217, "step": 62010 }, { "epoch": 15.46633416458853, "grad_norm": 6.165196895599365, "learning_rate": 4.540399002493766e-06, "loss": 0.321, "step": 62020 }, { "epoch": 15.468827930174564, "grad_norm": 6.679711818695068, "learning_rate": 4.537905236907731e-06, "loss": 0.3212, "step": 62030 }, { "epoch": 15.471321695760599, "grad_norm": 10.853338241577148, "learning_rate": 4.535411471321696e-06, "loss": 0.2962, "step": 62040 }, { "epoch": 15.473815461346634, "grad_norm": 7.180294990539551, "learning_rate": 4.532917705735661e-06, "loss": 0.3367, "step": 62050 }, { "epoch": 15.476309226932669, "grad_norm": 6.090845108032227, "learning_rate": 4.5304239401496266e-06, "loss": 0.3281, "step": 62060 }, { "epoch": 15.478802992518704, "grad_norm": 9.395833015441895, "learning_rate": 4.527930174563591e-06, "loss": 0.3029, "step": 62070 }, { "epoch": 15.481296758104738, "grad_norm": 8.523276329040527, "learning_rate": 4.525436408977556e-06, "loss": 0.3147, "step": 62080 }, { "epoch": 15.483790523690773, "grad_norm": 6.579370498657227, "learning_rate": 4.522942643391522e-06, "loss": 0.3089, "step": 62090 }, { "epoch": 15.486284289276808, "grad_norm": 9.35316276550293, "learning_rate": 4.520448877805487e-06, "loss": 0.3479, "step": 62100 }, { "epoch": 15.488778054862843, "grad_norm": 8.047480583190918, "learning_rate": 4.517955112219451e-06, "loss": 0.3327, "step": 62110 }, { "epoch": 15.491271820448878, "grad_norm": 9.682567596435547, "learning_rate": 4.515461346633417e-06, "loss": 0.2934, "step": 62120 }, { "epoch": 15.493765586034913, "grad_norm": 6.29941463470459, "learning_rate": 4.512967581047382e-06, "loss": 0.3575, "step": 62130 }, { "epoch": 15.496259351620948, "grad_norm": 6.268800735473633, "learning_rate": 4.510473815461347e-06, "loss": 0.301, "step": 62140 }, { "epoch": 15.498753117206983, "grad_norm": 9.379264831542969, "learning_rate": 4.507980049875313e-06, "loss": 0.3548, "step": 62150 }, { "epoch": 15.501246882793017, "grad_norm": 7.664111137390137, "learning_rate": 4.505486284289277e-06, "loss": 0.2977, "step": 62160 }, { "epoch": 15.503740648379052, "grad_norm": 5.527115821838379, "learning_rate": 4.502992518703242e-06, "loss": 0.3337, "step": 62170 }, { "epoch": 15.506234413965087, "grad_norm": 9.211461067199707, "learning_rate": 4.500498753117207e-06, "loss": 0.3509, "step": 62180 }, { "epoch": 15.508728179551122, "grad_norm": 8.124340057373047, "learning_rate": 4.498004987531173e-06, "loss": 0.37, "step": 62190 }, { "epoch": 15.511221945137157, "grad_norm": 8.361662864685059, "learning_rate": 4.4955112219451375e-06, "loss": 0.3075, "step": 62200 }, { "epoch": 15.513715710723192, "grad_norm": 9.21060848236084, "learning_rate": 4.493017456359103e-06, "loss": 0.277, "step": 62210 }, { "epoch": 15.516209476309227, "grad_norm": 9.97355842590332, "learning_rate": 4.490523690773067e-06, "loss": 0.3102, "step": 62220 }, { "epoch": 15.518703241895262, "grad_norm": 7.273804664611816, "learning_rate": 4.488029925187033e-06, "loss": 0.273, "step": 62230 }, { "epoch": 15.521197007481296, "grad_norm": 4.933453559875488, "learning_rate": 4.485536159600998e-06, "loss": 0.2666, "step": 62240 }, { "epoch": 15.523690773067331, "grad_norm": 6.50958251953125, "learning_rate": 4.483042394014963e-06, "loss": 0.2831, "step": 62250 }, { "epoch": 15.526184538653366, "grad_norm": 5.90592622756958, "learning_rate": 4.480548628428928e-06, "loss": 0.4132, "step": 62260 }, { "epoch": 15.528678304239401, "grad_norm": 7.461799144744873, "learning_rate": 4.478054862842893e-06, "loss": 0.3894, "step": 62270 }, { "epoch": 15.531172069825436, "grad_norm": 6.117193222045898, "learning_rate": 4.475561097256858e-06, "loss": 0.318, "step": 62280 }, { "epoch": 15.53366583541147, "grad_norm": 7.674281120300293, "learning_rate": 4.473067331670824e-06, "loss": 0.3456, "step": 62290 }, { "epoch": 15.536159600997506, "grad_norm": 7.840825080871582, "learning_rate": 4.470573566084788e-06, "loss": 0.3274, "step": 62300 }, { "epoch": 15.53865336658354, "grad_norm": 9.947293281555176, "learning_rate": 4.468079800498753e-06, "loss": 0.3462, "step": 62310 }, { "epoch": 15.541147132169575, "grad_norm": 6.473944664001465, "learning_rate": 4.465586034912719e-06, "loss": 0.2612, "step": 62320 }, { "epoch": 15.54364089775561, "grad_norm": 10.87900447845459, "learning_rate": 4.463092269326684e-06, "loss": 0.3052, "step": 62330 }, { "epoch": 15.546134663341645, "grad_norm": 7.721189022064209, "learning_rate": 4.4605985037406484e-06, "loss": 0.3771, "step": 62340 }, { "epoch": 15.548628428927682, "grad_norm": 7.034101963043213, "learning_rate": 4.458104738154614e-06, "loss": 0.3067, "step": 62350 }, { "epoch": 15.551122194513717, "grad_norm": 8.35641860961914, "learning_rate": 4.455610972568579e-06, "loss": 0.3294, "step": 62360 }, { "epoch": 15.553615960099751, "grad_norm": 7.652080535888672, "learning_rate": 4.453117206982544e-06, "loss": 0.3738, "step": 62370 }, { "epoch": 15.556109725685786, "grad_norm": 7.658141136169434, "learning_rate": 4.450623441396509e-06, "loss": 0.3161, "step": 62380 }, { "epoch": 15.558603491271821, "grad_norm": 7.808701992034912, "learning_rate": 4.448129675810474e-06, "loss": 0.3701, "step": 62390 }, { "epoch": 15.561097256857856, "grad_norm": 9.722882270812988, "learning_rate": 4.4456359102244395e-06, "loss": 0.3375, "step": 62400 }, { "epoch": 15.563591022443891, "grad_norm": 8.67414379119873, "learning_rate": 4.443142144638404e-06, "loss": 0.3462, "step": 62410 }, { "epoch": 15.566084788029926, "grad_norm": 5.231639385223389, "learning_rate": 4.44064837905237e-06, "loss": 0.3686, "step": 62420 }, { "epoch": 15.56857855361596, "grad_norm": 11.116351127624512, "learning_rate": 4.4381546134663345e-06, "loss": 0.3446, "step": 62430 }, { "epoch": 15.571072319201996, "grad_norm": 6.519227504730225, "learning_rate": 4.4356608478803e-06, "loss": 0.2998, "step": 62440 }, { "epoch": 15.57356608478803, "grad_norm": 12.292588233947754, "learning_rate": 4.433167082294264e-06, "loss": 0.3128, "step": 62450 }, { "epoch": 15.576059850374065, "grad_norm": 10.114727973937988, "learning_rate": 4.43067331670823e-06, "loss": 0.4101, "step": 62460 }, { "epoch": 15.5785536159601, "grad_norm": 6.243036270141602, "learning_rate": 4.428179551122195e-06, "loss": 0.3902, "step": 62470 }, { "epoch": 15.581047381546135, "grad_norm": 7.189294338226318, "learning_rate": 4.42568578553616e-06, "loss": 0.2728, "step": 62480 }, { "epoch": 15.58354114713217, "grad_norm": 6.678802967071533, "learning_rate": 4.423192019950125e-06, "loss": 0.3568, "step": 62490 }, { "epoch": 15.586034912718205, "grad_norm": 10.736851692199707, "learning_rate": 4.42069825436409e-06, "loss": 0.3335, "step": 62500 }, { "epoch": 15.58852867830424, "grad_norm": 7.438121318817139, "learning_rate": 4.418204488778055e-06, "loss": 0.2941, "step": 62510 }, { "epoch": 15.591022443890274, "grad_norm": 6.257914066314697, "learning_rate": 4.415710723192021e-06, "loss": 0.2712, "step": 62520 }, { "epoch": 15.59351620947631, "grad_norm": 9.817953109741211, "learning_rate": 4.413216957605985e-06, "loss": 0.327, "step": 62530 }, { "epoch": 15.596009975062344, "grad_norm": 6.332709312438965, "learning_rate": 4.41072319201995e-06, "loss": 0.3838, "step": 62540 }, { "epoch": 15.598503740648379, "grad_norm": 7.172346115112305, "learning_rate": 4.408229426433916e-06, "loss": 0.3826, "step": 62550 }, { "epoch": 15.600997506234414, "grad_norm": 8.06655502319336, "learning_rate": 4.405735660847881e-06, "loss": 0.2852, "step": 62560 }, { "epoch": 15.603491271820449, "grad_norm": 12.748686790466309, "learning_rate": 4.4032418952618455e-06, "loss": 0.3728, "step": 62570 }, { "epoch": 15.605985037406484, "grad_norm": 8.942939758300781, "learning_rate": 4.400748129675811e-06, "loss": 0.3794, "step": 62580 }, { "epoch": 15.608478802992519, "grad_norm": 7.845804691314697, "learning_rate": 4.398254364089775e-06, "loss": 0.3231, "step": 62590 }, { "epoch": 15.610972568578553, "grad_norm": 10.373620986938477, "learning_rate": 4.395760598503741e-06, "loss": 0.2595, "step": 62600 }, { "epoch": 15.613466334164588, "grad_norm": 7.292490005493164, "learning_rate": 4.393266832917706e-06, "loss": 0.316, "step": 62610 }, { "epoch": 15.615960099750623, "grad_norm": 8.094284057617188, "learning_rate": 4.390773067331671e-06, "loss": 0.3601, "step": 62620 }, { "epoch": 15.618453865336658, "grad_norm": 8.893630981445312, "learning_rate": 4.3882793017456365e-06, "loss": 0.3487, "step": 62630 }, { "epoch": 15.620947630922693, "grad_norm": 6.261669635772705, "learning_rate": 4.385785536159601e-06, "loss": 0.2983, "step": 62640 }, { "epoch": 15.623441396508728, "grad_norm": 6.368238925933838, "learning_rate": 4.383291770573566e-06, "loss": 0.3524, "step": 62650 }, { "epoch": 15.625935162094763, "grad_norm": 15.371926307678223, "learning_rate": 4.380798004987532e-06, "loss": 0.378, "step": 62660 }, { "epoch": 15.628428927680797, "grad_norm": 5.758829593658447, "learning_rate": 4.378304239401497e-06, "loss": 0.3458, "step": 62670 }, { "epoch": 15.630922693266832, "grad_norm": 7.6789326667785645, "learning_rate": 4.375810473815461e-06, "loss": 0.2688, "step": 62680 }, { "epoch": 15.633416458852867, "grad_norm": 7.106253147125244, "learning_rate": 4.373316708229427e-06, "loss": 0.2851, "step": 62690 }, { "epoch": 15.635910224438902, "grad_norm": 6.524157524108887, "learning_rate": 4.370822942643392e-06, "loss": 0.3196, "step": 62700 }, { "epoch": 15.638403990024937, "grad_norm": 12.354263305664062, "learning_rate": 4.368329177057357e-06, "loss": 0.3764, "step": 62710 }, { "epoch": 15.640897755610972, "grad_norm": 8.018882751464844, "learning_rate": 4.365835411471322e-06, "loss": 0.3431, "step": 62720 }, { "epoch": 15.643391521197007, "grad_norm": 4.7391228675842285, "learning_rate": 4.363341645885287e-06, "loss": 0.2748, "step": 62730 }, { "epoch": 15.645885286783042, "grad_norm": 7.503388404846191, "learning_rate": 4.360847880299252e-06, "loss": 0.3772, "step": 62740 }, { "epoch": 15.648379052369076, "grad_norm": 8.94284725189209, "learning_rate": 4.358354114713218e-06, "loss": 0.3285, "step": 62750 }, { "epoch": 15.650872817955111, "grad_norm": 7.088261127471924, "learning_rate": 4.355860349127182e-06, "loss": 0.2969, "step": 62760 }, { "epoch": 15.653366583541148, "grad_norm": 5.1243414878845215, "learning_rate": 4.3533665835411475e-06, "loss": 0.3318, "step": 62770 }, { "epoch": 15.655860349127183, "grad_norm": 9.318085670471191, "learning_rate": 4.350872817955113e-06, "loss": 0.3598, "step": 62780 }, { "epoch": 15.658354114713218, "grad_norm": 7.903304576873779, "learning_rate": 4.348379052369078e-06, "loss": 0.3319, "step": 62790 }, { "epoch": 15.660847880299253, "grad_norm": 7.943011283874512, "learning_rate": 4.3458852867830425e-06, "loss": 0.3095, "step": 62800 }, { "epoch": 15.663341645885287, "grad_norm": 8.990947723388672, "learning_rate": 4.343391521197008e-06, "loss": 0.2832, "step": 62810 }, { "epoch": 15.665835411471322, "grad_norm": 11.269744873046875, "learning_rate": 4.340897755610972e-06, "loss": 0.3504, "step": 62820 }, { "epoch": 15.668329177057357, "grad_norm": 8.170928955078125, "learning_rate": 4.3384039900249385e-06, "loss": 0.2957, "step": 62830 }, { "epoch": 15.670822942643392, "grad_norm": 10.878934860229492, "learning_rate": 4.335910224438903e-06, "loss": 0.3487, "step": 62840 }, { "epoch": 15.673316708229427, "grad_norm": 7.941391944885254, "learning_rate": 4.333416458852868e-06, "loss": 0.3278, "step": 62850 }, { "epoch": 15.675810473815462, "grad_norm": 8.739579200744629, "learning_rate": 4.330922693266833e-06, "loss": 0.3454, "step": 62860 }, { "epoch": 15.678304239401497, "grad_norm": 7.752486705780029, "learning_rate": 4.328428927680798e-06, "loss": 0.3343, "step": 62870 }, { "epoch": 15.680798004987532, "grad_norm": 10.62015151977539, "learning_rate": 4.325935162094763e-06, "loss": 0.3248, "step": 62880 }, { "epoch": 15.683291770573566, "grad_norm": 7.1810832023620605, "learning_rate": 4.323441396508729e-06, "loss": 0.2936, "step": 62890 }, { "epoch": 15.685785536159601, "grad_norm": 7.9837236404418945, "learning_rate": 4.320947630922694e-06, "loss": 0.2879, "step": 62900 }, { "epoch": 15.688279301745636, "grad_norm": 9.112959861755371, "learning_rate": 4.318453865336658e-06, "loss": 0.2799, "step": 62910 }, { "epoch": 15.690773067331671, "grad_norm": 8.914131164550781, "learning_rate": 4.315960099750624e-06, "loss": 0.3874, "step": 62920 }, { "epoch": 15.693266832917706, "grad_norm": 10.754671096801758, "learning_rate": 4.313466334164589e-06, "loss": 0.3674, "step": 62930 }, { "epoch": 15.69576059850374, "grad_norm": 8.270440101623535, "learning_rate": 4.310972568578554e-06, "loss": 0.3293, "step": 62940 }, { "epoch": 15.698254364089776, "grad_norm": 9.554506301879883, "learning_rate": 4.308478802992519e-06, "loss": 0.3053, "step": 62950 }, { "epoch": 15.70074812967581, "grad_norm": 8.808588981628418, "learning_rate": 4.305985037406484e-06, "loss": 0.3664, "step": 62960 }, { "epoch": 15.703241895261845, "grad_norm": 16.313865661621094, "learning_rate": 4.303491271820449e-06, "loss": 0.3232, "step": 62970 }, { "epoch": 15.70573566084788, "grad_norm": 18.93476104736328, "learning_rate": 4.300997506234415e-06, "loss": 0.3315, "step": 62980 }, { "epoch": 15.708229426433915, "grad_norm": 7.082018852233887, "learning_rate": 4.298503740648379e-06, "loss": 0.3039, "step": 62990 }, { "epoch": 15.71072319201995, "grad_norm": 5.019651889801025, "learning_rate": 4.2960099750623445e-06, "loss": 0.3773, "step": 63000 }, { "epoch": 15.713216957605985, "grad_norm": 7.572514057159424, "learning_rate": 4.29351620947631e-06, "loss": 0.3187, "step": 63010 }, { "epoch": 15.71571072319202, "grad_norm": 7.050946235656738, "learning_rate": 4.291022443890275e-06, "loss": 0.3341, "step": 63020 }, { "epoch": 15.718204488778055, "grad_norm": 11.754009246826172, "learning_rate": 4.2885286783042396e-06, "loss": 0.3174, "step": 63030 }, { "epoch": 15.72069825436409, "grad_norm": 6.041005611419678, "learning_rate": 4.286034912718205e-06, "loss": 0.2952, "step": 63040 }, { "epoch": 15.723192019950124, "grad_norm": 9.302490234375, "learning_rate": 4.283541147132169e-06, "loss": 0.3406, "step": 63050 }, { "epoch": 15.72568578553616, "grad_norm": 7.180544853210449, "learning_rate": 4.2810473815461355e-06, "loss": 0.3036, "step": 63060 }, { "epoch": 15.728179551122194, "grad_norm": 10.246197700500488, "learning_rate": 4.2785536159601e-06, "loss": 0.3262, "step": 63070 }, { "epoch": 15.730673316708229, "grad_norm": 10.846905708312988, "learning_rate": 4.276059850374065e-06, "loss": 0.3819, "step": 63080 }, { "epoch": 15.733167082294264, "grad_norm": 7.604201316833496, "learning_rate": 4.27356608478803e-06, "loss": 0.3568, "step": 63090 }, { "epoch": 15.735660847880299, "grad_norm": 9.36225414276123, "learning_rate": 4.271072319201995e-06, "loss": 0.3201, "step": 63100 }, { "epoch": 15.738154613466333, "grad_norm": 15.31506061553955, "learning_rate": 4.26857855361596e-06, "loss": 0.3591, "step": 63110 }, { "epoch": 15.740648379052368, "grad_norm": 10.249829292297363, "learning_rate": 4.266084788029926e-06, "loss": 0.3486, "step": 63120 }, { "epoch": 15.743142144638403, "grad_norm": 8.24986457824707, "learning_rate": 4.263591022443891e-06, "loss": 0.3455, "step": 63130 }, { "epoch": 15.745635910224438, "grad_norm": 6.340268135070801, "learning_rate": 4.2610972568578554e-06, "loss": 0.3159, "step": 63140 }, { "epoch": 15.748129675810475, "grad_norm": 8.134374618530273, "learning_rate": 4.258603491271821e-06, "loss": 0.3524, "step": 63150 }, { "epoch": 15.75062344139651, "grad_norm": 12.707883834838867, "learning_rate": 4.256109725685786e-06, "loss": 0.3194, "step": 63160 }, { "epoch": 15.753117206982544, "grad_norm": 10.832149505615234, "learning_rate": 4.253615960099751e-06, "loss": 0.3122, "step": 63170 }, { "epoch": 15.75561097256858, "grad_norm": 10.519184112548828, "learning_rate": 4.251122194513716e-06, "loss": 0.3642, "step": 63180 }, { "epoch": 15.758104738154614, "grad_norm": 14.12321949005127, "learning_rate": 4.248628428927681e-06, "loss": 0.3388, "step": 63190 }, { "epoch": 15.760598503740649, "grad_norm": 6.439551830291748, "learning_rate": 4.2461346633416465e-06, "loss": 0.385, "step": 63200 }, { "epoch": 15.763092269326684, "grad_norm": 25.51148223876953, "learning_rate": 4.243640897755612e-06, "loss": 0.4235, "step": 63210 }, { "epoch": 15.765586034912719, "grad_norm": 9.474204063415527, "learning_rate": 4.241147132169576e-06, "loss": 0.3963, "step": 63220 }, { "epoch": 15.768079800498754, "grad_norm": 10.349353790283203, "learning_rate": 4.2386533665835415e-06, "loss": 0.293, "step": 63230 }, { "epoch": 15.770573566084789, "grad_norm": 9.036044120788574, "learning_rate": 4.236159600997507e-06, "loss": 0.3493, "step": 63240 }, { "epoch": 15.773067331670823, "grad_norm": 6.4520392417907715, "learning_rate": 4.233665835411472e-06, "loss": 0.3464, "step": 63250 }, { "epoch": 15.775561097256858, "grad_norm": 6.277656078338623, "learning_rate": 4.231172069825437e-06, "loss": 0.4086, "step": 63260 }, { "epoch": 15.778054862842893, "grad_norm": 7.3123979568481445, "learning_rate": 4.228678304239402e-06, "loss": 0.3339, "step": 63270 }, { "epoch": 15.780548628428928, "grad_norm": 4.954773426055908, "learning_rate": 4.226184538653366e-06, "loss": 0.3325, "step": 63280 }, { "epoch": 15.783042394014963, "grad_norm": 8.477773666381836, "learning_rate": 4.223690773067332e-06, "loss": 0.3338, "step": 63290 }, { "epoch": 15.785536159600998, "grad_norm": 6.290766716003418, "learning_rate": 4.221197007481297e-06, "loss": 0.3108, "step": 63300 }, { "epoch": 15.788029925187033, "grad_norm": 6.442891597747803, "learning_rate": 4.218703241895262e-06, "loss": 0.304, "step": 63310 }, { "epoch": 15.790523690773068, "grad_norm": 7.096189022064209, "learning_rate": 4.216209476309227e-06, "loss": 0.2894, "step": 63320 }, { "epoch": 15.793017456359102, "grad_norm": 7.581128120422363, "learning_rate": 4.213715710723192e-06, "loss": 0.3329, "step": 63330 }, { "epoch": 15.795511221945137, "grad_norm": 7.5609130859375, "learning_rate": 4.211221945137157e-06, "loss": 0.233, "step": 63340 }, { "epoch": 15.798004987531172, "grad_norm": 6.8791584968566895, "learning_rate": 4.208728179551123e-06, "loss": 0.3167, "step": 63350 }, { "epoch": 15.800498753117207, "grad_norm": 8.420093536376953, "learning_rate": 4.206234413965087e-06, "loss": 0.3346, "step": 63360 }, { "epoch": 15.802992518703242, "grad_norm": 8.641119003295898, "learning_rate": 4.2037406483790525e-06, "loss": 0.3995, "step": 63370 }, { "epoch": 15.805486284289277, "grad_norm": 13.097655296325684, "learning_rate": 4.201246882793018e-06, "loss": 0.3696, "step": 63380 }, { "epoch": 15.807980049875312, "grad_norm": 7.517343521118164, "learning_rate": 4.198753117206983e-06, "loss": 0.2953, "step": 63390 }, { "epoch": 15.810473815461346, "grad_norm": 9.811420440673828, "learning_rate": 4.196259351620948e-06, "loss": 0.3546, "step": 63400 }, { "epoch": 15.812967581047381, "grad_norm": 8.417027473449707, "learning_rate": 4.193765586034913e-06, "loss": 0.3077, "step": 63410 }, { "epoch": 15.815461346633416, "grad_norm": 7.933226108551025, "learning_rate": 4.191271820448878e-06, "loss": 0.3254, "step": 63420 }, { "epoch": 15.817955112219451, "grad_norm": 6.797752380371094, "learning_rate": 4.1887780548628435e-06, "loss": 0.2962, "step": 63430 }, { "epoch": 15.820448877805486, "grad_norm": 6.070878505706787, "learning_rate": 4.186284289276809e-06, "loss": 0.2808, "step": 63440 }, { "epoch": 15.82294264339152, "grad_norm": 10.812505722045898, "learning_rate": 4.183790523690773e-06, "loss": 0.334, "step": 63450 }, { "epoch": 15.825436408977556, "grad_norm": 9.91736125946045, "learning_rate": 4.1812967581047386e-06, "loss": 0.2903, "step": 63460 }, { "epoch": 15.82793017456359, "grad_norm": 7.868504524230957, "learning_rate": 4.178802992518704e-06, "loss": 0.3427, "step": 63470 }, { "epoch": 15.830423940149625, "grad_norm": 10.431106567382812, "learning_rate": 4.176309226932669e-06, "loss": 0.3997, "step": 63480 }, { "epoch": 15.83291770573566, "grad_norm": 9.199599266052246, "learning_rate": 4.173815461346634e-06, "loss": 0.3854, "step": 63490 }, { "epoch": 15.835411471321695, "grad_norm": 9.323473930358887, "learning_rate": 4.171321695760599e-06, "loss": 0.3475, "step": 63500 }, { "epoch": 15.83790523690773, "grad_norm": 6.273562431335449, "learning_rate": 4.1688279301745634e-06, "loss": 0.3192, "step": 63510 }, { "epoch": 15.840399002493765, "grad_norm": 9.469862937927246, "learning_rate": 4.166334164588529e-06, "loss": 0.3289, "step": 63520 }, { "epoch": 15.8428927680798, "grad_norm": 10.834332466125488, "learning_rate": 4.163840399002494e-06, "loss": 0.3935, "step": 63530 }, { "epoch": 15.845386533665835, "grad_norm": 11.64710807800293, "learning_rate": 4.161346633416459e-06, "loss": 0.3366, "step": 63540 }, { "epoch": 15.84788029925187, "grad_norm": 8.563414573669434, "learning_rate": 4.158852867830424e-06, "loss": 0.3576, "step": 63550 }, { "epoch": 15.850374064837904, "grad_norm": 8.679342269897461, "learning_rate": 4.156359102244389e-06, "loss": 0.2798, "step": 63560 }, { "epoch": 15.85286783042394, "grad_norm": 4.7781548500061035, "learning_rate": 4.1538653366583544e-06, "loss": 0.3337, "step": 63570 }, { "epoch": 15.855361596009976, "grad_norm": 4.6593523025512695, "learning_rate": 4.15137157107232e-06, "loss": 0.3748, "step": 63580 }, { "epoch": 15.85785536159601, "grad_norm": 7.632694244384766, "learning_rate": 4.148877805486284e-06, "loss": 0.3599, "step": 63590 }, { "epoch": 15.860349127182046, "grad_norm": 6.817638397216797, "learning_rate": 4.1463840399002495e-06, "loss": 0.2949, "step": 63600 }, { "epoch": 15.86284289276808, "grad_norm": 10.572402954101562, "learning_rate": 4.143890274314215e-06, "loss": 0.3316, "step": 63610 }, { "epoch": 15.865336658354115, "grad_norm": 7.423572540283203, "learning_rate": 4.14139650872818e-06, "loss": 0.2964, "step": 63620 }, { "epoch": 15.86783042394015, "grad_norm": 8.025785446166992, "learning_rate": 4.1389027431421455e-06, "loss": 0.2967, "step": 63630 }, { "epoch": 15.870324189526185, "grad_norm": 5.359241485595703, "learning_rate": 4.13640897755611e-06, "loss": 0.3303, "step": 63640 }, { "epoch": 15.87281795511222, "grad_norm": 7.528040885925293, "learning_rate": 4.133915211970075e-06, "loss": 0.3465, "step": 63650 }, { "epoch": 15.875311720698255, "grad_norm": 8.721107482910156, "learning_rate": 4.1314214463840405e-06, "loss": 0.3032, "step": 63660 }, { "epoch": 15.87780548628429, "grad_norm": 8.390384674072266, "learning_rate": 4.128927680798006e-06, "loss": 0.2707, "step": 63670 }, { "epoch": 15.880299251870325, "grad_norm": 6.81118631362915, "learning_rate": 4.12643391521197e-06, "loss": 0.3659, "step": 63680 }, { "epoch": 15.88279301745636, "grad_norm": 11.449249267578125, "learning_rate": 4.123940149625936e-06, "loss": 0.3118, "step": 63690 }, { "epoch": 15.885286783042394, "grad_norm": 8.672457695007324, "learning_rate": 4.1214463840399e-06, "loss": 0.3703, "step": 63700 }, { "epoch": 15.88778054862843, "grad_norm": 6.935692310333252, "learning_rate": 4.118952618453866e-06, "loss": 0.2715, "step": 63710 }, { "epoch": 15.890274314214464, "grad_norm": 7.293999195098877, "learning_rate": 4.116458852867831e-06, "loss": 0.3367, "step": 63720 }, { "epoch": 15.892768079800499, "grad_norm": 8.051675796508789, "learning_rate": 4.113965087281796e-06, "loss": 0.3378, "step": 63730 }, { "epoch": 15.895261845386534, "grad_norm": 8.13101577758789, "learning_rate": 4.1114713216957605e-06, "loss": 0.3245, "step": 63740 }, { "epoch": 15.897755610972569, "grad_norm": 8.935870170593262, "learning_rate": 4.108977556109726e-06, "loss": 0.3168, "step": 63750 }, { "epoch": 15.900249376558603, "grad_norm": 8.20327091217041, "learning_rate": 4.106483790523691e-06, "loss": 0.3086, "step": 63760 }, { "epoch": 15.902743142144638, "grad_norm": 6.9828667640686035, "learning_rate": 4.103990024937656e-06, "loss": 0.3724, "step": 63770 }, { "epoch": 15.905236907730673, "grad_norm": 7.1580915451049805, "learning_rate": 4.101496259351621e-06, "loss": 0.3337, "step": 63780 }, { "epoch": 15.907730673316708, "grad_norm": 7.152059078216553, "learning_rate": 4.099002493765586e-06, "loss": 0.3067, "step": 63790 }, { "epoch": 15.910224438902743, "grad_norm": 8.175209999084473, "learning_rate": 4.0965087281795515e-06, "loss": 0.3518, "step": 63800 }, { "epoch": 15.912718204488778, "grad_norm": 8.411026000976562, "learning_rate": 4.094014962593517e-06, "loss": 0.2738, "step": 63810 }, { "epoch": 15.915211970074813, "grad_norm": 4.850775241851807, "learning_rate": 4.091521197007481e-06, "loss": 0.2802, "step": 63820 }, { "epoch": 15.917705735660848, "grad_norm": 11.431254386901855, "learning_rate": 4.0890274314214466e-06, "loss": 0.3478, "step": 63830 }, { "epoch": 15.920199501246882, "grad_norm": 7.01432991027832, "learning_rate": 4.086533665835412e-06, "loss": 0.3393, "step": 63840 }, { "epoch": 15.922693266832917, "grad_norm": 6.793428421020508, "learning_rate": 4.084039900249377e-06, "loss": 0.3174, "step": 63850 }, { "epoch": 15.925187032418952, "grad_norm": 7.512074947357178, "learning_rate": 4.081546134663342e-06, "loss": 0.2974, "step": 63860 }, { "epoch": 15.927680798004987, "grad_norm": 8.08775806427002, "learning_rate": 4.079052369077307e-06, "loss": 0.3465, "step": 63870 }, { "epoch": 15.930174563591022, "grad_norm": 8.87288761138916, "learning_rate": 4.076558603491272e-06, "loss": 0.329, "step": 63880 }, { "epoch": 15.932668329177057, "grad_norm": 8.738507270812988, "learning_rate": 4.0740648379052376e-06, "loss": 0.3361, "step": 63890 }, { "epoch": 15.935162094763092, "grad_norm": 6.354411602020264, "learning_rate": 4.071571072319203e-06, "loss": 0.3113, "step": 63900 }, { "epoch": 15.937655860349127, "grad_norm": 10.517175674438477, "learning_rate": 4.069077306733167e-06, "loss": 0.3381, "step": 63910 }, { "epoch": 15.940149625935161, "grad_norm": 8.709144592285156, "learning_rate": 4.066583541147133e-06, "loss": 0.3011, "step": 63920 }, { "epoch": 15.942643391521196, "grad_norm": 7.812455654144287, "learning_rate": 4.064089775561097e-06, "loss": 0.3099, "step": 63930 }, { "epoch": 15.945137157107231, "grad_norm": 7.7127685546875, "learning_rate": 4.061596009975063e-06, "loss": 0.3536, "step": 63940 }, { "epoch": 15.947630922693268, "grad_norm": 8.810644149780273, "learning_rate": 4.059102244389028e-06, "loss": 0.3048, "step": 63950 }, { "epoch": 15.950124688279303, "grad_norm": 9.955845832824707, "learning_rate": 4.056608478802993e-06, "loss": 0.3582, "step": 63960 }, { "epoch": 15.952618453865338, "grad_norm": 8.85975456237793, "learning_rate": 4.0541147132169575e-06, "loss": 0.3893, "step": 63970 }, { "epoch": 15.955112219451372, "grad_norm": 8.970422744750977, "learning_rate": 4.051620947630923e-06, "loss": 0.2967, "step": 63980 }, { "epoch": 15.957605985037407, "grad_norm": 8.689909934997559, "learning_rate": 4.049127182044888e-06, "loss": 0.3129, "step": 63990 }, { "epoch": 15.960099750623442, "grad_norm": 12.43597412109375, "learning_rate": 4.0466334164588534e-06, "loss": 0.4056, "step": 64000 }, { "epoch": 15.962593516209477, "grad_norm": 5.898103713989258, "learning_rate": 4.044139650872818e-06, "loss": 0.3884, "step": 64010 }, { "epoch": 15.965087281795512, "grad_norm": 9.561400413513184, "learning_rate": 4.041645885286783e-06, "loss": 0.2965, "step": 64020 }, { "epoch": 15.967581047381547, "grad_norm": 13.149139404296875, "learning_rate": 4.0391521197007485e-06, "loss": 0.2631, "step": 64030 }, { "epoch": 15.970074812967582, "grad_norm": 8.486005783081055, "learning_rate": 4.036658354114714e-06, "loss": 0.2731, "step": 64040 }, { "epoch": 15.972568578553616, "grad_norm": 10.793266296386719, "learning_rate": 4.034164588528678e-06, "loss": 0.3073, "step": 64050 }, { "epoch": 15.975062344139651, "grad_norm": 19.346826553344727, "learning_rate": 4.031670822942644e-06, "loss": 0.3753, "step": 64060 }, { "epoch": 15.977556109725686, "grad_norm": 9.948132514953613, "learning_rate": 4.029177057356609e-06, "loss": 0.314, "step": 64070 }, { "epoch": 15.980049875311721, "grad_norm": 8.224431991577148, "learning_rate": 4.026683291770574e-06, "loss": 0.3025, "step": 64080 }, { "epoch": 15.982543640897756, "grad_norm": 9.23838996887207, "learning_rate": 4.024189526184539e-06, "loss": 0.4478, "step": 64090 }, { "epoch": 15.98503740648379, "grad_norm": 11.388845443725586, "learning_rate": 4.021695760598504e-06, "loss": 0.3425, "step": 64100 }, { "epoch": 15.987531172069826, "grad_norm": 7.132105350494385, "learning_rate": 4.0192019950124685e-06, "loss": 0.3889, "step": 64110 }, { "epoch": 15.99002493765586, "grad_norm": 7.140593528747559, "learning_rate": 4.016708229426435e-06, "loss": 0.319, "step": 64120 }, { "epoch": 15.992518703241895, "grad_norm": 7.605659484863281, "learning_rate": 4.0142144638404e-06, "loss": 0.3565, "step": 64130 }, { "epoch": 15.99501246882793, "grad_norm": 9.850330352783203, "learning_rate": 4.011720698254364e-06, "loss": 0.363, "step": 64140 }, { "epoch": 15.997506234413965, "grad_norm": 5.737972259521484, "learning_rate": 4.00922693266833e-06, "loss": 0.3383, "step": 64150 }, { "epoch": 16.0, "grad_norm": 9.490584373474121, "learning_rate": 4.006733167082294e-06, "loss": 0.3047, "step": 64160 }, { "epoch": 16.0, "eval_loss": 0.41599616408348083, "eval_runtime": 60.0234, "eval_samples_per_second": 16.71, "eval_steps_per_second": 16.71, "step": 64160 }, { "epoch": 16.002493765586035, "grad_norm": 13.953182220458984, "learning_rate": 4.0042394014962595e-06, "loss": 0.3067, "step": 64170 }, { "epoch": 16.00498753117207, "grad_norm": 7.221786022186279, "learning_rate": 4.001745635910225e-06, "loss": 0.3639, "step": 64180 }, { "epoch": 16.007481296758105, "grad_norm": 8.095586776733398, "learning_rate": 3.99925187032419e-06, "loss": 0.3427, "step": 64190 }, { "epoch": 16.00997506234414, "grad_norm": 5.289329528808594, "learning_rate": 3.9967581047381546e-06, "loss": 0.3658, "step": 64200 }, { "epoch": 16.012468827930174, "grad_norm": 10.353299140930176, "learning_rate": 3.99426433915212e-06, "loss": 0.3201, "step": 64210 }, { "epoch": 16.01496259351621, "grad_norm": 6.504826545715332, "learning_rate": 3.991770573566085e-06, "loss": 0.338, "step": 64220 }, { "epoch": 16.017456359102244, "grad_norm": 10.51669692993164, "learning_rate": 3.9892768079800505e-06, "loss": 0.2674, "step": 64230 }, { "epoch": 16.01995012468828, "grad_norm": 10.650725364685059, "learning_rate": 3.986783042394015e-06, "loss": 0.2787, "step": 64240 }, { "epoch": 16.022443890274314, "grad_norm": 8.505513191223145, "learning_rate": 3.98428927680798e-06, "loss": 0.2972, "step": 64250 }, { "epoch": 16.02493765586035, "grad_norm": 10.878944396972656, "learning_rate": 3.9817955112219456e-06, "loss": 0.3168, "step": 64260 }, { "epoch": 16.027431421446384, "grad_norm": 7.452136993408203, "learning_rate": 3.979301745635911e-06, "loss": 0.319, "step": 64270 }, { "epoch": 16.02992518703242, "grad_norm": 7.434098720550537, "learning_rate": 3.976807980049875e-06, "loss": 0.3388, "step": 64280 }, { "epoch": 16.032418952618453, "grad_norm": 6.33979606628418, "learning_rate": 3.974314214463841e-06, "loss": 0.2748, "step": 64290 }, { "epoch": 16.034912718204488, "grad_norm": 13.049262046813965, "learning_rate": 3.971820448877806e-06, "loss": 0.3333, "step": 64300 }, { "epoch": 16.037406483790523, "grad_norm": 12.862238883972168, "learning_rate": 3.969326683291771e-06, "loss": 0.3516, "step": 64310 }, { "epoch": 16.039900249376558, "grad_norm": 7.679028034210205, "learning_rate": 3.966832917705736e-06, "loss": 0.3192, "step": 64320 }, { "epoch": 16.042394014962593, "grad_norm": 7.861089706420898, "learning_rate": 3.964339152119701e-06, "loss": 0.2909, "step": 64330 }, { "epoch": 16.044887780548628, "grad_norm": 7.005455493927002, "learning_rate": 3.9618453865336655e-06, "loss": 0.3583, "step": 64340 }, { "epoch": 16.047381546134662, "grad_norm": 11.43169116973877, "learning_rate": 3.959351620947632e-06, "loss": 0.3038, "step": 64350 }, { "epoch": 16.049875311720697, "grad_norm": 8.389317512512207, "learning_rate": 3.956857855361596e-06, "loss": 0.2716, "step": 64360 }, { "epoch": 16.052369077306732, "grad_norm": 8.883370399475098, "learning_rate": 3.9543640897755614e-06, "loss": 0.3067, "step": 64370 }, { "epoch": 16.054862842892767, "grad_norm": 10.897636413574219, "learning_rate": 3.951870324189527e-06, "loss": 0.297, "step": 64380 }, { "epoch": 16.057356608478802, "grad_norm": 9.456404685974121, "learning_rate": 3.949376558603491e-06, "loss": 0.3652, "step": 64390 }, { "epoch": 16.059850374064837, "grad_norm": 8.108953475952148, "learning_rate": 3.9468827930174565e-06, "loss": 0.323, "step": 64400 }, { "epoch": 16.06234413965087, "grad_norm": 5.497220039367676, "learning_rate": 3.944389027431422e-06, "loss": 0.2902, "step": 64410 }, { "epoch": 16.064837905236907, "grad_norm": 8.865570068359375, "learning_rate": 3.941895261845387e-06, "loss": 0.356, "step": 64420 }, { "epoch": 16.06733167082294, "grad_norm": 6.835111141204834, "learning_rate": 3.939401496259352e-06, "loss": 0.3206, "step": 64430 }, { "epoch": 16.069825436408976, "grad_norm": 10.127936363220215, "learning_rate": 3.936907730673317e-06, "loss": 0.2834, "step": 64440 }, { "epoch": 16.07231920199501, "grad_norm": 10.34428882598877, "learning_rate": 3.934413965087282e-06, "loss": 0.2795, "step": 64450 }, { "epoch": 16.074812967581046, "grad_norm": 7.148595809936523, "learning_rate": 3.9319201995012475e-06, "loss": 0.3463, "step": 64460 }, { "epoch": 16.07730673316708, "grad_norm": 7.980280876159668, "learning_rate": 3.929426433915212e-06, "loss": 0.3167, "step": 64470 }, { "epoch": 16.079800498753116, "grad_norm": 11.756796836853027, "learning_rate": 3.926932668329177e-06, "loss": 0.3765, "step": 64480 }, { "epoch": 16.08229426433915, "grad_norm": 11.97585391998291, "learning_rate": 3.924438902743143e-06, "loss": 0.3805, "step": 64490 }, { "epoch": 16.084788029925186, "grad_norm": 11.348858833312988, "learning_rate": 3.921945137157108e-06, "loss": 0.2978, "step": 64500 }, { "epoch": 16.08728179551122, "grad_norm": 6.33409309387207, "learning_rate": 3.919451371571072e-06, "loss": 0.3154, "step": 64510 }, { "epoch": 16.089775561097255, "grad_norm": 10.497039794921875, "learning_rate": 3.916957605985038e-06, "loss": 0.2952, "step": 64520 }, { "epoch": 16.09226932668329, "grad_norm": 6.439425945281982, "learning_rate": 3.914463840399003e-06, "loss": 0.3474, "step": 64530 }, { "epoch": 16.094763092269325, "grad_norm": 11.366449356079102, "learning_rate": 3.911970074812968e-06, "loss": 0.3775, "step": 64540 }, { "epoch": 16.09725685785536, "grad_norm": 7.353078365325928, "learning_rate": 3.909476309226933e-06, "loss": 0.3125, "step": 64550 }, { "epoch": 16.099750623441395, "grad_norm": 7.988730430603027, "learning_rate": 3.906982543640898e-06, "loss": 0.3449, "step": 64560 }, { "epoch": 16.102244389027433, "grad_norm": 7.281782627105713, "learning_rate": 3.9044887780548625e-06, "loss": 0.3189, "step": 64570 }, { "epoch": 16.104738154613468, "grad_norm": 7.062814712524414, "learning_rate": 3.901995012468829e-06, "loss": 0.3316, "step": 64580 }, { "epoch": 16.107231920199503, "grad_norm": 6.708259582519531, "learning_rate": 3.899501246882793e-06, "loss": 0.2937, "step": 64590 }, { "epoch": 16.109725685785538, "grad_norm": 7.511795520782471, "learning_rate": 3.8970074812967585e-06, "loss": 0.3319, "step": 64600 }, { "epoch": 16.112219451371573, "grad_norm": 7.629890441894531, "learning_rate": 3.894513715710724e-06, "loss": 0.3135, "step": 64610 }, { "epoch": 16.114713216957608, "grad_norm": 12.183755874633789, "learning_rate": 3.892019950124688e-06, "loss": 0.3092, "step": 64620 }, { "epoch": 16.117206982543642, "grad_norm": 12.015825271606445, "learning_rate": 3.8895261845386536e-06, "loss": 0.3199, "step": 64630 }, { "epoch": 16.119700748129677, "grad_norm": 8.182219505310059, "learning_rate": 3.887032418952619e-06, "loss": 0.3227, "step": 64640 }, { "epoch": 16.122194513715712, "grad_norm": 8.107385635375977, "learning_rate": 3.884538653366584e-06, "loss": 0.275, "step": 64650 }, { "epoch": 16.124688279301747, "grad_norm": 12.364629745483398, "learning_rate": 3.882044887780549e-06, "loss": 0.3205, "step": 64660 }, { "epoch": 16.127182044887782, "grad_norm": 9.689504623413086, "learning_rate": 3.879551122194514e-06, "loss": 0.3629, "step": 64670 }, { "epoch": 16.129675810473817, "grad_norm": 9.650782585144043, "learning_rate": 3.877057356608479e-06, "loss": 0.3607, "step": 64680 }, { "epoch": 16.13216957605985, "grad_norm": 8.96108341217041, "learning_rate": 3.8745635910224446e-06, "loss": 0.3243, "step": 64690 }, { "epoch": 16.134663341645886, "grad_norm": 9.31535816192627, "learning_rate": 3.872069825436409e-06, "loss": 0.353, "step": 64700 }, { "epoch": 16.13715710723192, "grad_norm": 9.662627220153809, "learning_rate": 3.869576059850374e-06, "loss": 0.2988, "step": 64710 }, { "epoch": 16.139650872817956, "grad_norm": 6.727764129638672, "learning_rate": 3.86708229426434e-06, "loss": 0.2802, "step": 64720 }, { "epoch": 16.14214463840399, "grad_norm": 11.656500816345215, "learning_rate": 3.864588528678305e-06, "loss": 0.319, "step": 64730 }, { "epoch": 16.144638403990026, "grad_norm": 9.718652725219727, "learning_rate": 3.862094763092269e-06, "loss": 0.3288, "step": 64740 }, { "epoch": 16.14713216957606, "grad_norm": 9.155292510986328, "learning_rate": 3.859600997506235e-06, "loss": 0.305, "step": 64750 }, { "epoch": 16.149625935162096, "grad_norm": 8.969863891601562, "learning_rate": 3.8571072319202e-06, "loss": 0.3435, "step": 64760 }, { "epoch": 16.15211970074813, "grad_norm": 9.428884506225586, "learning_rate": 3.854613466334165e-06, "loss": 0.3198, "step": 64770 }, { "epoch": 16.154613466334165, "grad_norm": 10.018813133239746, "learning_rate": 3.85211970074813e-06, "loss": 0.3452, "step": 64780 }, { "epoch": 16.1571072319202, "grad_norm": 11.803756713867188, "learning_rate": 3.849625935162095e-06, "loss": 0.3619, "step": 64790 }, { "epoch": 16.159600997506235, "grad_norm": 14.001852989196777, "learning_rate": 3.84713216957606e-06, "loss": 0.3259, "step": 64800 }, { "epoch": 16.16209476309227, "grad_norm": 7.694314956665039, "learning_rate": 3.844638403990025e-06, "loss": 0.3697, "step": 64810 }, { "epoch": 16.164588528678305, "grad_norm": 7.822086811065674, "learning_rate": 3.84214463840399e-06, "loss": 0.2878, "step": 64820 }, { "epoch": 16.16708229426434, "grad_norm": 8.987359046936035, "learning_rate": 3.8396508728179555e-06, "loss": 0.3268, "step": 64830 }, { "epoch": 16.169576059850375, "grad_norm": 6.423337459564209, "learning_rate": 3.83715710723192e-06, "loss": 0.3347, "step": 64840 }, { "epoch": 16.17206982543641, "grad_norm": 9.673188209533691, "learning_rate": 3.834663341645885e-06, "loss": 0.3471, "step": 64850 }, { "epoch": 16.174563591022444, "grad_norm": 4.7321319580078125, "learning_rate": 3.832169576059851e-06, "loss": 0.3425, "step": 64860 }, { "epoch": 16.17705735660848, "grad_norm": 10.942599296569824, "learning_rate": 3.829675810473816e-06, "loss": 0.2551, "step": 64870 }, { "epoch": 16.179551122194514, "grad_norm": 9.56152629852295, "learning_rate": 3.827182044887781e-06, "loss": 0.3806, "step": 64880 }, { "epoch": 16.18204488778055, "grad_norm": 10.860115051269531, "learning_rate": 3.824688279301746e-06, "loss": 0.3677, "step": 64890 }, { "epoch": 16.184538653366584, "grad_norm": 6.14926815032959, "learning_rate": 3.822194513715711e-06, "loss": 0.3018, "step": 64900 }, { "epoch": 16.18703241895262, "grad_norm": 9.40666675567627, "learning_rate": 3.819700748129676e-06, "loss": 0.307, "step": 64910 }, { "epoch": 16.189526184538654, "grad_norm": 10.993182182312012, "learning_rate": 3.817206982543642e-06, "loss": 0.287, "step": 64920 }, { "epoch": 16.19201995012469, "grad_norm": 8.050580978393555, "learning_rate": 3.814713216957606e-06, "loss": 0.3454, "step": 64930 }, { "epoch": 16.194513715710723, "grad_norm": 7.049538612365723, "learning_rate": 3.8122194513715714e-06, "loss": 0.3349, "step": 64940 }, { "epoch": 16.197007481296758, "grad_norm": 6.026520729064941, "learning_rate": 3.8097256857855363e-06, "loss": 0.2819, "step": 64950 }, { "epoch": 16.199501246882793, "grad_norm": 9.586450576782227, "learning_rate": 3.8072319201995016e-06, "loss": 0.369, "step": 64960 }, { "epoch": 16.201995012468828, "grad_norm": 12.329106330871582, "learning_rate": 3.8047381546134665e-06, "loss": 0.2958, "step": 64970 }, { "epoch": 16.204488778054863, "grad_norm": 9.353446960449219, "learning_rate": 3.8022443890274318e-06, "loss": 0.3131, "step": 64980 }, { "epoch": 16.206982543640898, "grad_norm": 7.740355014801025, "learning_rate": 3.7997506234413967e-06, "loss": 0.265, "step": 64990 }, { "epoch": 16.209476309226932, "grad_norm": 7.058204174041748, "learning_rate": 3.797256857855362e-06, "loss": 0.3649, "step": 65000 }, { "epoch": 16.211970074812967, "grad_norm": 9.42304801940918, "learning_rate": 3.794763092269327e-06, "loss": 0.3743, "step": 65010 }, { "epoch": 16.214463840399002, "grad_norm": 10.909148216247559, "learning_rate": 3.792269326683292e-06, "loss": 0.3381, "step": 65020 }, { "epoch": 16.216957605985037, "grad_norm": 6.674330711364746, "learning_rate": 3.789775561097257e-06, "loss": 0.3916, "step": 65030 }, { "epoch": 16.219451371571072, "grad_norm": 7.515557765960693, "learning_rate": 3.7872817955112224e-06, "loss": 0.2461, "step": 65040 }, { "epoch": 16.221945137157107, "grad_norm": 8.817490577697754, "learning_rate": 3.7847880299251872e-06, "loss": 0.282, "step": 65050 }, { "epoch": 16.22443890274314, "grad_norm": 9.24267578125, "learning_rate": 3.7822942643391526e-06, "loss": 0.3451, "step": 65060 }, { "epoch": 16.226932668329177, "grad_norm": 8.435280799865723, "learning_rate": 3.7798004987531174e-06, "loss": 0.3645, "step": 65070 }, { "epoch": 16.22942643391521, "grad_norm": 8.729866027832031, "learning_rate": 3.7773067331670827e-06, "loss": 0.346, "step": 65080 }, { "epoch": 16.231920199501246, "grad_norm": 9.877275466918945, "learning_rate": 3.7748129675810476e-06, "loss": 0.3218, "step": 65090 }, { "epoch": 16.23441396508728, "grad_norm": 7.41887903213501, "learning_rate": 3.772319201995013e-06, "loss": 0.2789, "step": 65100 }, { "epoch": 16.236907730673316, "grad_norm": 8.857915878295898, "learning_rate": 3.7698254364089783e-06, "loss": 0.347, "step": 65110 }, { "epoch": 16.23940149625935, "grad_norm": 9.079187393188477, "learning_rate": 3.767331670822943e-06, "loss": 0.2908, "step": 65120 }, { "epoch": 16.241895261845386, "grad_norm": 8.195780754089355, "learning_rate": 3.7648379052369085e-06, "loss": 0.3252, "step": 65130 }, { "epoch": 16.24438902743142, "grad_norm": 11.435444831848145, "learning_rate": 3.762344139650873e-06, "loss": 0.377, "step": 65140 }, { "epoch": 16.246882793017456, "grad_norm": 8.027785301208496, "learning_rate": 3.7598503740648386e-06, "loss": 0.2409, "step": 65150 }, { "epoch": 16.24937655860349, "grad_norm": 9.426610946655273, "learning_rate": 3.757356608478803e-06, "loss": 0.3731, "step": 65160 }, { "epoch": 16.251870324189525, "grad_norm": 11.091520309448242, "learning_rate": 3.7548628428927684e-06, "loss": 0.3764, "step": 65170 }, { "epoch": 16.25436408977556, "grad_norm": 5.973906517028809, "learning_rate": 3.7523690773067333e-06, "loss": 0.2766, "step": 65180 }, { "epoch": 16.256857855361595, "grad_norm": 8.147195816040039, "learning_rate": 3.7498753117206986e-06, "loss": 0.3152, "step": 65190 }, { "epoch": 16.25935162094763, "grad_norm": 8.979331016540527, "learning_rate": 3.7473815461346635e-06, "loss": 0.3257, "step": 65200 }, { "epoch": 16.261845386533665, "grad_norm": 11.531305313110352, "learning_rate": 3.744887780548629e-06, "loss": 0.3556, "step": 65210 }, { "epoch": 16.2643391521197, "grad_norm": 11.863848686218262, "learning_rate": 3.7423940149625937e-06, "loss": 0.3515, "step": 65220 }, { "epoch": 16.266832917705734, "grad_norm": 5.3083038330078125, "learning_rate": 3.739900249376559e-06, "loss": 0.2752, "step": 65230 }, { "epoch": 16.26932668329177, "grad_norm": 7.9635820388793945, "learning_rate": 3.737406483790524e-06, "loss": 0.3552, "step": 65240 }, { "epoch": 16.271820448877804, "grad_norm": 10.961655616760254, "learning_rate": 3.734912718204489e-06, "loss": 0.3359, "step": 65250 }, { "epoch": 16.27431421446384, "grad_norm": 8.826502799987793, "learning_rate": 3.732418952618454e-06, "loss": 0.2981, "step": 65260 }, { "epoch": 16.276807980049874, "grad_norm": 11.395182609558105, "learning_rate": 3.7299251870324194e-06, "loss": 0.3708, "step": 65270 }, { "epoch": 16.27930174563591, "grad_norm": 7.6695146560668945, "learning_rate": 3.7274314214463843e-06, "loss": 0.3029, "step": 65280 }, { "epoch": 16.281795511221944, "grad_norm": 10.41583251953125, "learning_rate": 3.7249376558603496e-06, "loss": 0.3226, "step": 65290 }, { "epoch": 16.28428927680798, "grad_norm": 7.618106842041016, "learning_rate": 3.7224438902743145e-06, "loss": 0.3199, "step": 65300 }, { "epoch": 16.286783042394013, "grad_norm": 8.326313018798828, "learning_rate": 3.71995012468828e-06, "loss": 0.3513, "step": 65310 }, { "epoch": 16.28927680798005, "grad_norm": 5.363463878631592, "learning_rate": 3.7174563591022443e-06, "loss": 0.3408, "step": 65320 }, { "epoch": 16.291770573566083, "grad_norm": 9.034497261047363, "learning_rate": 3.71496259351621e-06, "loss": 0.324, "step": 65330 }, { "epoch": 16.294264339152118, "grad_norm": 13.3429536819458, "learning_rate": 3.7124688279301744e-06, "loss": 0.3495, "step": 65340 }, { "epoch": 16.296758104738153, "grad_norm": 14.830777168273926, "learning_rate": 3.7099750623441398e-06, "loss": 0.3499, "step": 65350 }, { "epoch": 16.29925187032419, "grad_norm": 6.612533092498779, "learning_rate": 3.7074812967581055e-06, "loss": 0.3221, "step": 65360 }, { "epoch": 16.301745635910226, "grad_norm": 6.141602516174316, "learning_rate": 3.70498753117207e-06, "loss": 0.3303, "step": 65370 }, { "epoch": 16.30423940149626, "grad_norm": 8.41512393951416, "learning_rate": 3.7024937655860353e-06, "loss": 0.2878, "step": 65380 }, { "epoch": 16.306733167082296, "grad_norm": 5.508592128753662, "learning_rate": 3.7e-06, "loss": 0.3166, "step": 65390 }, { "epoch": 16.30922693266833, "grad_norm": 9.821744918823242, "learning_rate": 3.6975062344139655e-06, "loss": 0.3125, "step": 65400 }, { "epoch": 16.311720698254366, "grad_norm": 6.723392963409424, "learning_rate": 3.6950124688279303e-06, "loss": 0.3018, "step": 65410 }, { "epoch": 16.3142144638404, "grad_norm": 8.72385311126709, "learning_rate": 3.6925187032418957e-06, "loss": 0.3337, "step": 65420 }, { "epoch": 16.316708229426435, "grad_norm": 12.842667579650879, "learning_rate": 3.6900249376558605e-06, "loss": 0.3676, "step": 65430 }, { "epoch": 16.31920199501247, "grad_norm": 9.1225004196167, "learning_rate": 3.687531172069826e-06, "loss": 0.3989, "step": 65440 }, { "epoch": 16.321695760598505, "grad_norm": 11.330621719360352, "learning_rate": 3.6850374064837907e-06, "loss": 0.3592, "step": 65450 }, { "epoch": 16.32418952618454, "grad_norm": 10.708224296569824, "learning_rate": 3.682543640897756e-06, "loss": 0.3368, "step": 65460 }, { "epoch": 16.326683291770575, "grad_norm": 11.212067604064941, "learning_rate": 3.680049875311721e-06, "loss": 0.3081, "step": 65470 }, { "epoch": 16.32917705735661, "grad_norm": 6.18818473815918, "learning_rate": 3.6775561097256862e-06, "loss": 0.3428, "step": 65480 }, { "epoch": 16.331670822942645, "grad_norm": 7.7143635749816895, "learning_rate": 3.675062344139651e-06, "loss": 0.2853, "step": 65490 }, { "epoch": 16.33416458852868, "grad_norm": 9.196212768554688, "learning_rate": 3.6725685785536164e-06, "loss": 0.3176, "step": 65500 }, { "epoch": 16.336658354114714, "grad_norm": 9.68375301361084, "learning_rate": 3.6700748129675813e-06, "loss": 0.3151, "step": 65510 }, { "epoch": 16.33915211970075, "grad_norm": 7.000097274780273, "learning_rate": 3.6675810473815466e-06, "loss": 0.3043, "step": 65520 }, { "epoch": 16.341645885286784, "grad_norm": 6.249640464782715, "learning_rate": 3.6650872817955115e-06, "loss": 0.3183, "step": 65530 }, { "epoch": 16.34413965087282, "grad_norm": 9.410320281982422, "learning_rate": 3.662593516209477e-06, "loss": 0.355, "step": 65540 }, { "epoch": 16.346633416458854, "grad_norm": 8.34964656829834, "learning_rate": 3.6600997506234413e-06, "loss": 0.3565, "step": 65550 }, { "epoch": 16.34912718204489, "grad_norm": 7.488079071044922, "learning_rate": 3.657605985037407e-06, "loss": 0.3327, "step": 65560 }, { "epoch": 16.351620947630924, "grad_norm": 12.476749420166016, "learning_rate": 3.6551122194513715e-06, "loss": 0.2578, "step": 65570 }, { "epoch": 16.35411471321696, "grad_norm": 7.019128322601318, "learning_rate": 3.652618453865337e-06, "loss": 0.332, "step": 65580 }, { "epoch": 16.356608478802993, "grad_norm": 7.373568058013916, "learning_rate": 3.6501246882793017e-06, "loss": 0.3988, "step": 65590 }, { "epoch": 16.359102244389028, "grad_norm": 8.25925064086914, "learning_rate": 3.647630922693267e-06, "loss": 0.359, "step": 65600 }, { "epoch": 16.361596009975063, "grad_norm": 9.343754768371582, "learning_rate": 3.6451371571072323e-06, "loss": 0.3369, "step": 65610 }, { "epoch": 16.364089775561098, "grad_norm": 7.88173770904541, "learning_rate": 3.642892768079801e-06, "loss": 0.3372, "step": 65620 }, { "epoch": 16.366583541147133, "grad_norm": 12.04601001739502, "learning_rate": 3.640399002493766e-06, "loss": 0.3215, "step": 65630 }, { "epoch": 16.369077306733168, "grad_norm": 11.845905303955078, "learning_rate": 3.637905236907731e-06, "loss": 0.4036, "step": 65640 }, { "epoch": 16.371571072319203, "grad_norm": 11.93884563446045, "learning_rate": 3.635411471321696e-06, "loss": 0.3596, "step": 65650 }, { "epoch": 16.374064837905237, "grad_norm": 8.246026992797852, "learning_rate": 3.6329177057356614e-06, "loss": 0.2996, "step": 65660 }, { "epoch": 16.376558603491272, "grad_norm": 10.389656066894531, "learning_rate": 3.6304239401496263e-06, "loss": 0.3356, "step": 65670 }, { "epoch": 16.379052369077307, "grad_norm": 9.619356155395508, "learning_rate": 3.6279301745635916e-06, "loss": 0.3053, "step": 65680 }, { "epoch": 16.381546134663342, "grad_norm": 10.337359428405762, "learning_rate": 3.625436408977556e-06, "loss": 0.3775, "step": 65690 }, { "epoch": 16.384039900249377, "grad_norm": 9.273616790771484, "learning_rate": 3.6229426433915218e-06, "loss": 0.3492, "step": 65700 }, { "epoch": 16.38653366583541, "grad_norm": 7.120643138885498, "learning_rate": 3.6204488778054862e-06, "loss": 0.3044, "step": 65710 }, { "epoch": 16.389027431421447, "grad_norm": 7.253455638885498, "learning_rate": 3.6179551122194516e-06, "loss": 0.3432, "step": 65720 }, { "epoch": 16.39152119700748, "grad_norm": 9.472309112548828, "learning_rate": 3.6154613466334164e-06, "loss": 0.3314, "step": 65730 }, { "epoch": 16.394014962593516, "grad_norm": 12.149874687194824, "learning_rate": 3.6129675810473818e-06, "loss": 0.4217, "step": 65740 }, { "epoch": 16.39650872817955, "grad_norm": 9.527915954589844, "learning_rate": 3.6104738154613466e-06, "loss": 0.3714, "step": 65750 }, { "epoch": 16.399002493765586, "grad_norm": 11.615216255187988, "learning_rate": 3.607980049875312e-06, "loss": 0.3939, "step": 65760 }, { "epoch": 16.40149625935162, "grad_norm": 7.617849826812744, "learning_rate": 3.605486284289277e-06, "loss": 0.3533, "step": 65770 }, { "epoch": 16.403990024937656, "grad_norm": 6.392154693603516, "learning_rate": 3.602992518703242e-06, "loss": 0.3103, "step": 65780 }, { "epoch": 16.40648379052369, "grad_norm": 9.79951000213623, "learning_rate": 3.6004987531172075e-06, "loss": 0.3713, "step": 65790 }, { "epoch": 16.408977556109726, "grad_norm": 8.584783554077148, "learning_rate": 3.5980049875311723e-06, "loss": 0.3699, "step": 65800 }, { "epoch": 16.41147132169576, "grad_norm": 9.751562118530273, "learning_rate": 3.5955112219451376e-06, "loss": 0.3959, "step": 65810 }, { "epoch": 16.413965087281795, "grad_norm": 6.995100975036621, "learning_rate": 3.5930174563591025e-06, "loss": 0.2217, "step": 65820 }, { "epoch": 16.41645885286783, "grad_norm": 12.037008285522461, "learning_rate": 3.590523690773068e-06, "loss": 0.3115, "step": 65830 }, { "epoch": 16.418952618453865, "grad_norm": 6.427266597747803, "learning_rate": 3.5880299251870327e-06, "loss": 0.3391, "step": 65840 }, { "epoch": 16.4214463840399, "grad_norm": 6.5554890632629395, "learning_rate": 3.585536159600998e-06, "loss": 0.3549, "step": 65850 }, { "epoch": 16.423940149625935, "grad_norm": 7.430528163909912, "learning_rate": 3.583042394014963e-06, "loss": 0.322, "step": 65860 }, { "epoch": 16.42643391521197, "grad_norm": 6.291595935821533, "learning_rate": 3.5805486284289282e-06, "loss": 0.3099, "step": 65870 }, { "epoch": 16.428927680798004, "grad_norm": 8.320535659790039, "learning_rate": 3.578054862842893e-06, "loss": 0.304, "step": 65880 }, { "epoch": 16.43142144638404, "grad_norm": 8.501724243164062, "learning_rate": 3.5755610972568584e-06, "loss": 0.3474, "step": 65890 }, { "epoch": 16.433915211970074, "grad_norm": 10.219943046569824, "learning_rate": 3.573067331670823e-06, "loss": 0.2741, "step": 65900 }, { "epoch": 16.43640897755611, "grad_norm": 9.32536792755127, "learning_rate": 3.5705735660847886e-06, "loss": 0.3118, "step": 65910 }, { "epoch": 16.438902743142144, "grad_norm": 7.996541976928711, "learning_rate": 3.568079800498753e-06, "loss": 0.3923, "step": 65920 }, { "epoch": 16.44139650872818, "grad_norm": 7.270846366882324, "learning_rate": 3.5655860349127184e-06, "loss": 0.4512, "step": 65930 }, { "epoch": 16.443890274314214, "grad_norm": 8.105086326599121, "learning_rate": 3.5630922693266833e-06, "loss": 0.325, "step": 65940 }, { "epoch": 16.44638403990025, "grad_norm": 7.63092565536499, "learning_rate": 3.5605985037406486e-06, "loss": 0.3325, "step": 65950 }, { "epoch": 16.448877805486283, "grad_norm": 8.287890434265137, "learning_rate": 3.5581047381546135e-06, "loss": 0.3664, "step": 65960 }, { "epoch": 16.45137157107232, "grad_norm": 9.698472023010254, "learning_rate": 3.555610972568579e-06, "loss": 0.297, "step": 65970 }, { "epoch": 16.453865336658353, "grad_norm": 8.163863182067871, "learning_rate": 3.5531172069825437e-06, "loss": 0.3469, "step": 65980 }, { "epoch": 16.456359102244388, "grad_norm": 6.485888957977295, "learning_rate": 3.550623441396509e-06, "loss": 0.3444, "step": 65990 }, { "epoch": 16.458852867830423, "grad_norm": 9.558843612670898, "learning_rate": 3.548129675810474e-06, "loss": 0.3527, "step": 66000 }, { "epoch": 16.461346633416458, "grad_norm": 11.721412658691406, "learning_rate": 3.545635910224439e-06, "loss": 0.3955, "step": 66010 }, { "epoch": 16.463840399002493, "grad_norm": 7.238114356994629, "learning_rate": 3.5431421446384045e-06, "loss": 0.3075, "step": 66020 }, { "epoch": 16.466334164588527, "grad_norm": 8.826272964477539, "learning_rate": 3.5406483790523694e-06, "loss": 0.3622, "step": 66030 }, { "epoch": 16.468827930174562, "grad_norm": 8.230854034423828, "learning_rate": 3.5381546134663347e-06, "loss": 0.3097, "step": 66040 }, { "epoch": 16.471321695760597, "grad_norm": 7.81684684753418, "learning_rate": 3.5356608478802996e-06, "loss": 0.3132, "step": 66050 }, { "epoch": 16.473815461346632, "grad_norm": 12.46484661102295, "learning_rate": 3.533167082294265e-06, "loss": 0.3048, "step": 66060 }, { "epoch": 16.476309226932667, "grad_norm": 7.251391887664795, "learning_rate": 3.5306733167082298e-06, "loss": 0.313, "step": 66070 }, { "epoch": 16.478802992518702, "grad_norm": 4.723079681396484, "learning_rate": 3.528179551122195e-06, "loss": 0.2885, "step": 66080 }, { "epoch": 16.481296758104737, "grad_norm": 9.58585262298584, "learning_rate": 3.52568578553616e-06, "loss": 0.3646, "step": 66090 }, { "epoch": 16.48379052369077, "grad_norm": 8.872747421264648, "learning_rate": 3.5231920199501253e-06, "loss": 0.3397, "step": 66100 }, { "epoch": 16.486284289276806, "grad_norm": 7.260976314544678, "learning_rate": 3.52069825436409e-06, "loss": 0.2967, "step": 66110 }, { "epoch": 16.48877805486284, "grad_norm": 8.966513633728027, "learning_rate": 3.5182044887780555e-06, "loss": 0.3234, "step": 66120 }, { "epoch": 16.491271820448876, "grad_norm": 8.668418884277344, "learning_rate": 3.51571072319202e-06, "loss": 0.3109, "step": 66130 }, { "epoch": 16.49376558603491, "grad_norm": 9.92924690246582, "learning_rate": 3.5132169576059857e-06, "loss": 0.3407, "step": 66140 }, { "epoch": 16.496259351620946, "grad_norm": 8.693164825439453, "learning_rate": 3.51072319201995e-06, "loss": 0.3044, "step": 66150 }, { "epoch": 16.49875311720698, "grad_norm": 14.11042594909668, "learning_rate": 3.5082294264339154e-06, "loss": 0.3371, "step": 66160 }, { "epoch": 16.50124688279302, "grad_norm": 8.329866409301758, "learning_rate": 3.5057356608478803e-06, "loss": 0.3241, "step": 66170 }, { "epoch": 16.503740648379054, "grad_norm": 7.5446553230285645, "learning_rate": 3.5032418952618456e-06, "loss": 0.3096, "step": 66180 }, { "epoch": 16.50623441396509, "grad_norm": 5.738336563110352, "learning_rate": 3.5007481296758105e-06, "loss": 0.3996, "step": 66190 }, { "epoch": 16.508728179551124, "grad_norm": 6.674221515655518, "learning_rate": 3.498254364089776e-06, "loss": 0.327, "step": 66200 }, { "epoch": 16.51122194513716, "grad_norm": 6.403293132781982, "learning_rate": 3.4957605985037407e-06, "loss": 0.3152, "step": 66210 }, { "epoch": 16.513715710723194, "grad_norm": 9.35428237915039, "learning_rate": 3.493266832917706e-06, "loss": 0.3089, "step": 66220 }, { "epoch": 16.51620947630923, "grad_norm": 7.628061771392822, "learning_rate": 3.490773067331671e-06, "loss": 0.3204, "step": 66230 }, { "epoch": 16.518703241895263, "grad_norm": 12.166457176208496, "learning_rate": 3.4882793017456362e-06, "loss": 0.354, "step": 66240 }, { "epoch": 16.521197007481298, "grad_norm": 10.643746376037598, "learning_rate": 3.485785536159601e-06, "loss": 0.3516, "step": 66250 }, { "epoch": 16.523690773067333, "grad_norm": 6.07993745803833, "learning_rate": 3.4832917705735664e-06, "loss": 0.3867, "step": 66260 }, { "epoch": 16.526184538653368, "grad_norm": 7.434722423553467, "learning_rate": 3.4807980049875317e-06, "loss": 0.3612, "step": 66270 }, { "epoch": 16.528678304239403, "grad_norm": 7.824409484863281, "learning_rate": 3.4783042394014966e-06, "loss": 0.3013, "step": 66280 }, { "epoch": 16.531172069825438, "grad_norm": 10.760315895080566, "learning_rate": 3.475810473815462e-06, "loss": 0.3326, "step": 66290 }, { "epoch": 16.533665835411473, "grad_norm": 9.23198413848877, "learning_rate": 3.473316708229427e-06, "loss": 0.4361, "step": 66300 }, { "epoch": 16.536159600997507, "grad_norm": 7.750860214233398, "learning_rate": 3.470822942643392e-06, "loss": 0.3055, "step": 66310 }, { "epoch": 16.538653366583542, "grad_norm": 7.906196594238281, "learning_rate": 3.468329177057357e-06, "loss": 0.282, "step": 66320 }, { "epoch": 16.541147132169577, "grad_norm": 8.782937049865723, "learning_rate": 3.4658354114713223e-06, "loss": 0.3205, "step": 66330 }, { "epoch": 16.543640897755612, "grad_norm": 10.13814926147461, "learning_rate": 3.4633416458852868e-06, "loss": 0.3365, "step": 66340 }, { "epoch": 16.546134663341647, "grad_norm": 9.947074890136719, "learning_rate": 3.4608478802992525e-06, "loss": 0.3592, "step": 66350 }, { "epoch": 16.54862842892768, "grad_norm": 12.302644729614258, "learning_rate": 3.458354114713217e-06, "loss": 0.3738, "step": 66360 }, { "epoch": 16.551122194513717, "grad_norm": 8.22531509399414, "learning_rate": 3.4558603491271823e-06, "loss": 0.3024, "step": 66370 }, { "epoch": 16.55361596009975, "grad_norm": 8.880837440490723, "learning_rate": 3.453366583541147e-06, "loss": 0.2816, "step": 66380 }, { "epoch": 16.556109725685786, "grad_norm": 7.537718296051025, "learning_rate": 3.4508728179551125e-06, "loss": 0.3165, "step": 66390 }, { "epoch": 16.55860349127182, "grad_norm": 11.703903198242188, "learning_rate": 3.4483790523690774e-06, "loss": 0.4277, "step": 66400 }, { "epoch": 16.561097256857856, "grad_norm": 13.591885566711426, "learning_rate": 3.4458852867830427e-06, "loss": 0.2994, "step": 66410 }, { "epoch": 16.56359102244389, "grad_norm": 8.424422264099121, "learning_rate": 3.4433915211970076e-06, "loss": 0.3531, "step": 66420 }, { "epoch": 16.566084788029926, "grad_norm": 5.670681953430176, "learning_rate": 3.440897755610973e-06, "loss": 0.3566, "step": 66430 }, { "epoch": 16.56857855361596, "grad_norm": 13.465998649597168, "learning_rate": 3.4384039900249378e-06, "loss": 0.297, "step": 66440 }, { "epoch": 16.571072319201996, "grad_norm": 6.211828708648682, "learning_rate": 3.435910224438903e-06, "loss": 0.2967, "step": 66450 }, { "epoch": 16.57356608478803, "grad_norm": 8.583992958068848, "learning_rate": 3.433416458852868e-06, "loss": 0.2802, "step": 66460 }, { "epoch": 16.576059850374065, "grad_norm": 6.500486373901367, "learning_rate": 3.4309226932668333e-06, "loss": 0.3138, "step": 66470 }, { "epoch": 16.5785536159601, "grad_norm": 9.81982707977295, "learning_rate": 3.428428927680798e-06, "loss": 0.3349, "step": 66480 }, { "epoch": 16.581047381546135, "grad_norm": 9.09636402130127, "learning_rate": 3.4259351620947635e-06, "loss": 0.2904, "step": 66490 }, { "epoch": 16.58354114713217, "grad_norm": 7.976047039031982, "learning_rate": 3.4234413965087283e-06, "loss": 0.3574, "step": 66500 }, { "epoch": 16.586034912718205, "grad_norm": 6.352416038513184, "learning_rate": 3.4209476309226937e-06, "loss": 0.3534, "step": 66510 }, { "epoch": 16.58852867830424, "grad_norm": 7.957899570465088, "learning_rate": 3.418453865336659e-06, "loss": 0.417, "step": 66520 }, { "epoch": 16.591022443890274, "grad_norm": 7.401804447174072, "learning_rate": 3.415960099750624e-06, "loss": 0.3865, "step": 66530 }, { "epoch": 16.59351620947631, "grad_norm": 10.400482177734375, "learning_rate": 3.413466334164589e-06, "loss": 0.3071, "step": 66540 }, { "epoch": 16.596009975062344, "grad_norm": 8.039289474487305, "learning_rate": 3.410972568578554e-06, "loss": 0.3396, "step": 66550 }, { "epoch": 16.59850374064838, "grad_norm": 6.777886390686035, "learning_rate": 3.4084788029925194e-06, "loss": 0.2646, "step": 66560 }, { "epoch": 16.600997506234414, "grad_norm": 9.714869499206543, "learning_rate": 3.405985037406484e-06, "loss": 0.3047, "step": 66570 }, { "epoch": 16.60349127182045, "grad_norm": 8.811583518981934, "learning_rate": 3.4034912718204496e-06, "loss": 0.2972, "step": 66580 }, { "epoch": 16.605985037406484, "grad_norm": 8.421089172363281, "learning_rate": 3.400997506234414e-06, "loss": 0.3288, "step": 66590 }, { "epoch": 16.60847880299252, "grad_norm": 7.008131980895996, "learning_rate": 3.3985037406483793e-06, "loss": 0.3063, "step": 66600 }, { "epoch": 16.610972568578553, "grad_norm": 7.8703789710998535, "learning_rate": 3.3960099750623442e-06, "loss": 0.2479, "step": 66610 }, { "epoch": 16.61346633416459, "grad_norm": 8.42544937133789, "learning_rate": 3.3935162094763095e-06, "loss": 0.3384, "step": 66620 }, { "epoch": 16.615960099750623, "grad_norm": 6.888559341430664, "learning_rate": 3.3910224438902744e-06, "loss": 0.3679, "step": 66630 }, { "epoch": 16.618453865336658, "grad_norm": 8.545351028442383, "learning_rate": 3.3885286783042397e-06, "loss": 0.3226, "step": 66640 }, { "epoch": 16.620947630922693, "grad_norm": 7.288325309753418, "learning_rate": 3.3860349127182046e-06, "loss": 0.3422, "step": 66650 }, { "epoch": 16.623441396508728, "grad_norm": 10.019088745117188, "learning_rate": 3.38354114713217e-06, "loss": 0.3509, "step": 66660 }, { "epoch": 16.625935162094763, "grad_norm": 6.888891220092773, "learning_rate": 3.381047381546135e-06, "loss": 0.3493, "step": 66670 }, { "epoch": 16.628428927680797, "grad_norm": 11.555747032165527, "learning_rate": 3.3785536159601e-06, "loss": 0.3701, "step": 66680 }, { "epoch": 16.630922693266832, "grad_norm": 8.428287506103516, "learning_rate": 3.376059850374065e-06, "loss": 0.3779, "step": 66690 }, { "epoch": 16.633416458852867, "grad_norm": 10.036828994750977, "learning_rate": 3.3735660847880303e-06, "loss": 0.3879, "step": 66700 }, { "epoch": 16.635910224438902, "grad_norm": 7.905212879180908, "learning_rate": 3.371072319201995e-06, "loss": 0.2564, "step": 66710 }, { "epoch": 16.638403990024937, "grad_norm": 7.539322853088379, "learning_rate": 3.3685785536159605e-06, "loss": 0.3989, "step": 66720 }, { "epoch": 16.640897755610972, "grad_norm": 8.233596801757812, "learning_rate": 3.3660847880299254e-06, "loss": 0.3014, "step": 66730 }, { "epoch": 16.643391521197007, "grad_norm": 7.139582633972168, "learning_rate": 3.3635910224438907e-06, "loss": 0.2907, "step": 66740 }, { "epoch": 16.64588528678304, "grad_norm": 9.440067291259766, "learning_rate": 3.361097256857855e-06, "loss": 0.3126, "step": 66750 }, { "epoch": 16.648379052369076, "grad_norm": 10.730769157409668, "learning_rate": 3.358603491271821e-06, "loss": 0.3447, "step": 66760 }, { "epoch": 16.65087281795511, "grad_norm": 12.145304679870605, "learning_rate": 3.356109725685786e-06, "loss": 0.318, "step": 66770 }, { "epoch": 16.653366583541146, "grad_norm": 13.246317863464355, "learning_rate": 3.3536159600997507e-06, "loss": 0.3085, "step": 66780 }, { "epoch": 16.65586034912718, "grad_norm": 6.979218482971191, "learning_rate": 3.3511221945137164e-06, "loss": 0.328, "step": 66790 }, { "epoch": 16.658354114713216, "grad_norm": 7.944563388824463, "learning_rate": 3.348628428927681e-06, "loss": 0.2856, "step": 66800 }, { "epoch": 16.66084788029925, "grad_norm": 8.603992462158203, "learning_rate": 3.346134663341646e-06, "loss": 0.3337, "step": 66810 }, { "epoch": 16.663341645885286, "grad_norm": 9.603078842163086, "learning_rate": 3.343640897755611e-06, "loss": 0.3376, "step": 66820 }, { "epoch": 16.66583541147132, "grad_norm": 5.392066478729248, "learning_rate": 3.3411471321695764e-06, "loss": 0.3134, "step": 66830 }, { "epoch": 16.668329177057355, "grad_norm": 6.164698600769043, "learning_rate": 3.3386533665835413e-06, "loss": 0.323, "step": 66840 }, { "epoch": 16.67082294264339, "grad_norm": 8.777530670166016, "learning_rate": 3.3361596009975066e-06, "loss": 0.3715, "step": 66850 }, { "epoch": 16.673316708229425, "grad_norm": 9.735917091369629, "learning_rate": 3.3336658354114715e-06, "loss": 0.3017, "step": 66860 }, { "epoch": 16.67581047381546, "grad_norm": 5.572236061096191, "learning_rate": 3.3311720698254368e-06, "loss": 0.324, "step": 66870 }, { "epoch": 16.678304239401495, "grad_norm": 8.303897857666016, "learning_rate": 3.3286783042394016e-06, "loss": 0.3146, "step": 66880 }, { "epoch": 16.68079800498753, "grad_norm": 10.715437889099121, "learning_rate": 3.326184538653367e-06, "loss": 0.3026, "step": 66890 }, { "epoch": 16.683291770573565, "grad_norm": 8.109511375427246, "learning_rate": 3.323690773067332e-06, "loss": 0.3878, "step": 66900 }, { "epoch": 16.6857855361596, "grad_norm": 5.563307762145996, "learning_rate": 3.321197007481297e-06, "loss": 0.2698, "step": 66910 }, { "epoch": 16.688279301745634, "grad_norm": 13.141777038574219, "learning_rate": 3.318703241895262e-06, "loss": 0.3028, "step": 66920 }, { "epoch": 16.69077306733167, "grad_norm": 9.709403991699219, "learning_rate": 3.3162094763092273e-06, "loss": 0.3567, "step": 66930 }, { "epoch": 16.693266832917704, "grad_norm": 9.365985870361328, "learning_rate": 3.3137157107231922e-06, "loss": 0.2793, "step": 66940 }, { "epoch": 16.69576059850374, "grad_norm": 10.170108795166016, "learning_rate": 3.3112219451371575e-06, "loss": 0.2982, "step": 66950 }, { "epoch": 16.698254364089777, "grad_norm": 5.353804588317871, "learning_rate": 3.3087281795511224e-06, "loss": 0.3412, "step": 66960 }, { "epoch": 16.70074812967581, "grad_norm": 13.283953666687012, "learning_rate": 3.3062344139650877e-06, "loss": 0.2865, "step": 66970 }, { "epoch": 16.703241895261847, "grad_norm": 9.242694854736328, "learning_rate": 3.303740648379052e-06, "loss": 0.2743, "step": 66980 }, { "epoch": 16.705735660847882, "grad_norm": 9.898309707641602, "learning_rate": 3.301246882793018e-06, "loss": 0.3607, "step": 66990 }, { "epoch": 16.708229426433917, "grad_norm": 7.244779109954834, "learning_rate": 3.2987531172069824e-06, "loss": 0.2872, "step": 67000 }, { "epoch": 16.71072319201995, "grad_norm": 7.123824119567871, "learning_rate": 3.2962593516209477e-06, "loss": 0.2879, "step": 67010 }, { "epoch": 16.713216957605987, "grad_norm": 11.419690132141113, "learning_rate": 3.2937655860349134e-06, "loss": 0.3193, "step": 67020 }, { "epoch": 16.71571072319202, "grad_norm": 12.919515609741211, "learning_rate": 3.291271820448878e-06, "loss": 0.3836, "step": 67030 }, { "epoch": 16.718204488778056, "grad_norm": 8.977980613708496, "learning_rate": 3.2887780548628432e-06, "loss": 0.303, "step": 67040 }, { "epoch": 16.72069825436409, "grad_norm": 8.428853988647461, "learning_rate": 3.286284289276808e-06, "loss": 0.3334, "step": 67050 }, { "epoch": 16.723192019950126, "grad_norm": 9.528043746948242, "learning_rate": 3.2837905236907734e-06, "loss": 0.3773, "step": 67060 }, { "epoch": 16.72568578553616, "grad_norm": 8.615249633789062, "learning_rate": 3.2812967581047383e-06, "loss": 0.2869, "step": 67070 }, { "epoch": 16.728179551122196, "grad_norm": 11.474807739257812, "learning_rate": 3.2788029925187036e-06, "loss": 0.3592, "step": 67080 }, { "epoch": 16.73067331670823, "grad_norm": 7.433091640472412, "learning_rate": 3.2763092269326685e-06, "loss": 0.294, "step": 67090 }, { "epoch": 16.733167082294266, "grad_norm": 9.934019088745117, "learning_rate": 3.273815461346634e-06, "loss": 0.3228, "step": 67100 }, { "epoch": 16.7356608478803, "grad_norm": 8.578768730163574, "learning_rate": 3.2713216957605987e-06, "loss": 0.279, "step": 67110 }, { "epoch": 16.738154613466335, "grad_norm": 7.551397800445557, "learning_rate": 3.268827930174564e-06, "loss": 0.3451, "step": 67120 }, { "epoch": 16.74064837905237, "grad_norm": 7.463733196258545, "learning_rate": 3.266334164588529e-06, "loss": 0.3792, "step": 67130 }, { "epoch": 16.743142144638405, "grad_norm": 9.001909255981445, "learning_rate": 3.263840399002494e-06, "loss": 0.3244, "step": 67140 }, { "epoch": 16.74563591022444, "grad_norm": 10.928662300109863, "learning_rate": 3.261346633416459e-06, "loss": 0.3328, "step": 67150 }, { "epoch": 16.748129675810475, "grad_norm": 5.639915466308594, "learning_rate": 3.2588528678304244e-06, "loss": 0.3326, "step": 67160 }, { "epoch": 16.75062344139651, "grad_norm": 10.53235912322998, "learning_rate": 3.2563591022443893e-06, "loss": 0.361, "step": 67170 }, { "epoch": 16.753117206982544, "grad_norm": 5.4268951416015625, "learning_rate": 3.2538653366583546e-06, "loss": 0.3213, "step": 67180 }, { "epoch": 16.75561097256858, "grad_norm": 10.269506454467773, "learning_rate": 3.2513715710723195e-06, "loss": 0.3346, "step": 67190 }, { "epoch": 16.758104738154614, "grad_norm": 21.99469757080078, "learning_rate": 3.2488778054862848e-06, "loss": 0.3419, "step": 67200 }, { "epoch": 16.76059850374065, "grad_norm": 6.769282341003418, "learning_rate": 3.2463840399002492e-06, "loss": 0.321, "step": 67210 }, { "epoch": 16.763092269326684, "grad_norm": 5.740752220153809, "learning_rate": 3.243890274314215e-06, "loss": 0.3051, "step": 67220 }, { "epoch": 16.76558603491272, "grad_norm": 9.068950653076172, "learning_rate": 3.2413965087281794e-06, "loss": 0.4055, "step": 67230 }, { "epoch": 16.768079800498754, "grad_norm": 11.04249095916748, "learning_rate": 3.2389027431421448e-06, "loss": 0.336, "step": 67240 }, { "epoch": 16.77057356608479, "grad_norm": 9.966495513916016, "learning_rate": 3.2364089775561096e-06, "loss": 0.3491, "step": 67250 }, { "epoch": 16.773067331670823, "grad_norm": 11.092144012451172, "learning_rate": 3.233915211970075e-06, "loss": 0.2806, "step": 67260 }, { "epoch": 16.77556109725686, "grad_norm": 8.785165786743164, "learning_rate": 3.2314214463840403e-06, "loss": 0.3081, "step": 67270 }, { "epoch": 16.778054862842893, "grad_norm": 9.333272933959961, "learning_rate": 3.228927680798005e-06, "loss": 0.364, "step": 67280 }, { "epoch": 16.780548628428928, "grad_norm": 8.067471504211426, "learning_rate": 3.2264339152119705e-06, "loss": 0.3608, "step": 67290 }, { "epoch": 16.783042394014963, "grad_norm": 4.908388614654541, "learning_rate": 3.2239401496259353e-06, "loss": 0.3417, "step": 67300 }, { "epoch": 16.785536159600998, "grad_norm": 10.774919509887695, "learning_rate": 3.2214463840399006e-06, "loss": 0.3317, "step": 67310 }, { "epoch": 16.788029925187033, "grad_norm": 19.7656192779541, "learning_rate": 3.2189526184538655e-06, "loss": 0.3345, "step": 67320 }, { "epoch": 16.790523690773068, "grad_norm": 8.48336124420166, "learning_rate": 3.216458852867831e-06, "loss": 0.3249, "step": 67330 }, { "epoch": 16.793017456359102, "grad_norm": 7.753775119781494, "learning_rate": 3.2139650872817957e-06, "loss": 0.295, "step": 67340 }, { "epoch": 16.795511221945137, "grad_norm": 7.943778038024902, "learning_rate": 3.211471321695761e-06, "loss": 0.356, "step": 67350 }, { "epoch": 16.798004987531172, "grad_norm": 8.604585647583008, "learning_rate": 3.208977556109726e-06, "loss": 0.3197, "step": 67360 }, { "epoch": 16.800498753117207, "grad_norm": 7.526785373687744, "learning_rate": 3.2064837905236912e-06, "loss": 0.3185, "step": 67370 }, { "epoch": 16.802992518703242, "grad_norm": 9.398643493652344, "learning_rate": 3.203990024937656e-06, "loss": 0.3266, "step": 67380 }, { "epoch": 16.805486284289277, "grad_norm": 9.048074722290039, "learning_rate": 3.2014962593516214e-06, "loss": 0.3011, "step": 67390 }, { "epoch": 16.80798004987531, "grad_norm": 8.05639362335205, "learning_rate": 3.1990024937655863e-06, "loss": 0.3413, "step": 67400 }, { "epoch": 16.810473815461346, "grad_norm": 8.279003143310547, "learning_rate": 3.1965087281795516e-06, "loss": 0.3302, "step": 67410 }, { "epoch": 16.81296758104738, "grad_norm": 10.437573432922363, "learning_rate": 3.194014962593516e-06, "loss": 0.3326, "step": 67420 }, { "epoch": 16.815461346633416, "grad_norm": 10.088639259338379, "learning_rate": 3.191521197007482e-06, "loss": 0.2668, "step": 67430 }, { "epoch": 16.81795511221945, "grad_norm": 11.739412307739258, "learning_rate": 3.1890274314214463e-06, "loss": 0.2719, "step": 67440 }, { "epoch": 16.820448877805486, "grad_norm": 7.2401604652404785, "learning_rate": 3.1865336658354116e-06, "loss": 0.3168, "step": 67450 }, { "epoch": 16.82294264339152, "grad_norm": 9.88704776763916, "learning_rate": 3.1840399002493765e-06, "loss": 0.4028, "step": 67460 }, { "epoch": 16.825436408977556, "grad_norm": 10.055147171020508, "learning_rate": 3.181546134663342e-06, "loss": 0.3385, "step": 67470 }, { "epoch": 16.82793017456359, "grad_norm": 11.214886665344238, "learning_rate": 3.1790523690773067e-06, "loss": 0.3381, "step": 67480 }, { "epoch": 16.830423940149625, "grad_norm": 9.053804397583008, "learning_rate": 3.176558603491272e-06, "loss": 0.3107, "step": 67490 }, { "epoch": 16.83291770573566, "grad_norm": 9.388381004333496, "learning_rate": 3.174064837905237e-06, "loss": 0.3519, "step": 67500 }, { "epoch": 16.835411471321695, "grad_norm": 11.288336753845215, "learning_rate": 3.171571072319202e-06, "loss": 0.3049, "step": 67510 }, { "epoch": 16.83790523690773, "grad_norm": 11.12149429321289, "learning_rate": 3.1690773067331675e-06, "loss": 0.3766, "step": 67520 }, { "epoch": 16.840399002493765, "grad_norm": 6.969629287719727, "learning_rate": 3.1665835411471324e-06, "loss": 0.3209, "step": 67530 }, { "epoch": 16.8428927680798, "grad_norm": 9.759892463684082, "learning_rate": 3.1640897755610977e-06, "loss": 0.2822, "step": 67540 }, { "epoch": 16.845386533665835, "grad_norm": 9.30257797241211, "learning_rate": 3.1615960099750626e-06, "loss": 0.3563, "step": 67550 }, { "epoch": 16.84788029925187, "grad_norm": 7.607780933380127, "learning_rate": 3.159102244389028e-06, "loss": 0.352, "step": 67560 }, { "epoch": 16.850374064837904, "grad_norm": 6.663292407989502, "learning_rate": 3.1566084788029928e-06, "loss": 0.3008, "step": 67570 }, { "epoch": 16.85286783042394, "grad_norm": 9.025524139404297, "learning_rate": 3.154114713216958e-06, "loss": 0.3307, "step": 67580 }, { "epoch": 16.855361596009974, "grad_norm": 7.929129123687744, "learning_rate": 3.151620947630923e-06, "loss": 0.3064, "step": 67590 }, { "epoch": 16.85785536159601, "grad_norm": 6.258866786956787, "learning_rate": 3.1491271820448883e-06, "loss": 0.2586, "step": 67600 }, { "epoch": 16.860349127182044, "grad_norm": 5.975445747375488, "learning_rate": 3.146633416458853e-06, "loss": 0.2883, "step": 67610 }, { "epoch": 16.86284289276808, "grad_norm": 6.100348472595215, "learning_rate": 3.1441396508728185e-06, "loss": 0.3816, "step": 67620 }, { "epoch": 16.865336658354114, "grad_norm": 8.461503028869629, "learning_rate": 3.1416458852867834e-06, "loss": 0.3295, "step": 67630 }, { "epoch": 16.86783042394015, "grad_norm": 11.094927787780762, "learning_rate": 3.1391521197007487e-06, "loss": 0.3346, "step": 67640 }, { "epoch": 16.870324189526183, "grad_norm": 8.778679847717285, "learning_rate": 3.136658354114713e-06, "loss": 0.3169, "step": 67650 }, { "epoch": 16.872817955112218, "grad_norm": 8.165081024169922, "learning_rate": 3.134164588528679e-06, "loss": 0.3515, "step": 67660 }, { "epoch": 16.875311720698253, "grad_norm": 8.707517623901367, "learning_rate": 3.1316708229426433e-06, "loss": 0.3255, "step": 67670 }, { "epoch": 16.877805486284288, "grad_norm": 12.160292625427246, "learning_rate": 3.1291770573566086e-06, "loss": 0.3691, "step": 67680 }, { "epoch": 16.880299251870323, "grad_norm": 7.535975933074951, "learning_rate": 3.1266832917705735e-06, "loss": 0.3717, "step": 67690 }, { "epoch": 16.882793017456358, "grad_norm": 9.464982032775879, "learning_rate": 3.124189526184539e-06, "loss": 0.3114, "step": 67700 }, { "epoch": 16.885286783042392, "grad_norm": 6.380134582519531, "learning_rate": 3.1216957605985037e-06, "loss": 0.2951, "step": 67710 }, { "epoch": 16.887780548628427, "grad_norm": 11.643170356750488, "learning_rate": 3.119201995012469e-06, "loss": 0.3128, "step": 67720 }, { "epoch": 16.890274314214462, "grad_norm": 12.784978866577148, "learning_rate": 3.116708229426434e-06, "loss": 0.3273, "step": 67730 }, { "epoch": 16.892768079800497, "grad_norm": 9.829355239868164, "learning_rate": 3.1142144638403992e-06, "loss": 0.3061, "step": 67740 }, { "epoch": 16.895261845386532, "grad_norm": 10.183358192443848, "learning_rate": 3.111720698254364e-06, "loss": 0.3194, "step": 67750 }, { "epoch": 16.897755610972567, "grad_norm": 9.071417808532715, "learning_rate": 3.1092269326683294e-06, "loss": 0.2804, "step": 67760 }, { "epoch": 16.900249376558605, "grad_norm": 10.647289276123047, "learning_rate": 3.1067331670822947e-06, "loss": 0.3488, "step": 67770 }, { "epoch": 16.902743142144637, "grad_norm": 9.39749813079834, "learning_rate": 3.1042394014962596e-06, "loss": 0.335, "step": 67780 }, { "epoch": 16.905236907730675, "grad_norm": 9.534618377685547, "learning_rate": 3.101745635910225e-06, "loss": 0.2999, "step": 67790 }, { "epoch": 16.90773067331671, "grad_norm": 13.632329940795898, "learning_rate": 3.09925187032419e-06, "loss": 0.3342, "step": 67800 }, { "epoch": 16.910224438902745, "grad_norm": 7.592889308929443, "learning_rate": 3.096758104738155e-06, "loss": 0.2828, "step": 67810 }, { "epoch": 16.91271820448878, "grad_norm": 6.196469783782959, "learning_rate": 3.09426433915212e-06, "loss": 0.3377, "step": 67820 }, { "epoch": 16.915211970074814, "grad_norm": 9.526688575744629, "learning_rate": 3.0917705735660853e-06, "loss": 0.2684, "step": 67830 }, { "epoch": 16.91770573566085, "grad_norm": 7.148454189300537, "learning_rate": 3.08927680798005e-06, "loss": 0.3152, "step": 67840 }, { "epoch": 16.920199501246884, "grad_norm": 7.666889190673828, "learning_rate": 3.0867830423940155e-06, "loss": 0.3181, "step": 67850 }, { "epoch": 16.92269326683292, "grad_norm": 6.633835792541504, "learning_rate": 3.08428927680798e-06, "loss": 0.3662, "step": 67860 }, { "epoch": 16.925187032418954, "grad_norm": 9.805349349975586, "learning_rate": 3.0817955112219457e-06, "loss": 0.3224, "step": 67870 }, { "epoch": 16.92768079800499, "grad_norm": 25.773767471313477, "learning_rate": 3.07930174563591e-06, "loss": 0.3182, "step": 67880 }, { "epoch": 16.930174563591024, "grad_norm": 5.862217426300049, "learning_rate": 3.0768079800498755e-06, "loss": 0.362, "step": 67890 }, { "epoch": 16.93266832917706, "grad_norm": 6.981509208679199, "learning_rate": 3.0743142144638404e-06, "loss": 0.2904, "step": 67900 }, { "epoch": 16.935162094763093, "grad_norm": 7.372140407562256, "learning_rate": 3.0718204488778057e-06, "loss": 0.3158, "step": 67910 }, { "epoch": 16.93765586034913, "grad_norm": 6.7870612144470215, "learning_rate": 3.0693266832917706e-06, "loss": 0.323, "step": 67920 }, { "epoch": 16.940149625935163, "grad_norm": 11.441544532775879, "learning_rate": 3.066832917705736e-06, "loss": 0.3994, "step": 67930 }, { "epoch": 16.942643391521198, "grad_norm": 7.740766525268555, "learning_rate": 3.0643391521197008e-06, "loss": 0.4086, "step": 67940 }, { "epoch": 16.945137157107233, "grad_norm": 10.935362815856934, "learning_rate": 3.061845386533666e-06, "loss": 0.32, "step": 67950 }, { "epoch": 16.947630922693268, "grad_norm": 9.322646141052246, "learning_rate": 3.059351620947631e-06, "loss": 0.3604, "step": 67960 }, { "epoch": 16.950124688279303, "grad_norm": 8.889856338500977, "learning_rate": 3.0568578553615963e-06, "loss": 0.3302, "step": 67970 }, { "epoch": 16.952618453865338, "grad_norm": 12.106937408447266, "learning_rate": 3.054364089775561e-06, "loss": 0.2517, "step": 67980 }, { "epoch": 16.955112219451372, "grad_norm": 9.038848876953125, "learning_rate": 3.0518703241895265e-06, "loss": 0.3813, "step": 67990 }, { "epoch": 16.957605985037407, "grad_norm": 10.314130783081055, "learning_rate": 3.0493765586034918e-06, "loss": 0.3414, "step": 68000 }, { "epoch": 16.960099750623442, "grad_norm": 6.741665840148926, "learning_rate": 3.0468827930174567e-06, "loss": 0.3446, "step": 68010 }, { "epoch": 16.962593516209477, "grad_norm": 7.708677768707275, "learning_rate": 3.044389027431422e-06, "loss": 0.2833, "step": 68020 }, { "epoch": 16.965087281795512, "grad_norm": 7.930616855621338, "learning_rate": 3.041895261845387e-06, "loss": 0.2681, "step": 68030 }, { "epoch": 16.967581047381547, "grad_norm": 6.562017440795898, "learning_rate": 3.039401496259352e-06, "loss": 0.3424, "step": 68040 }, { "epoch": 16.97007481296758, "grad_norm": 9.841915130615234, "learning_rate": 3.036907730673317e-06, "loss": 0.3586, "step": 68050 }, { "epoch": 16.972568578553616, "grad_norm": 13.928132057189941, "learning_rate": 3.0344139650872824e-06, "loss": 0.3287, "step": 68060 }, { "epoch": 16.97506234413965, "grad_norm": 6.605895042419434, "learning_rate": 3.0319201995012472e-06, "loss": 0.3126, "step": 68070 }, { "epoch": 16.977556109725686, "grad_norm": 8.77118968963623, "learning_rate": 3.0294264339152126e-06, "loss": 0.345, "step": 68080 }, { "epoch": 16.98004987531172, "grad_norm": 10.565832138061523, "learning_rate": 3.026932668329177e-06, "loss": 0.3402, "step": 68090 }, { "epoch": 16.982543640897756, "grad_norm": 7.508286952972412, "learning_rate": 3.0244389027431428e-06, "loss": 0.3443, "step": 68100 }, { "epoch": 16.98503740648379, "grad_norm": 7.770997524261475, "learning_rate": 3.0219451371571072e-06, "loss": 0.3283, "step": 68110 }, { "epoch": 16.987531172069826, "grad_norm": 10.09821891784668, "learning_rate": 3.0194513715710725e-06, "loss": 0.3236, "step": 68120 }, { "epoch": 16.99002493765586, "grad_norm": 6.971126079559326, "learning_rate": 3.0169576059850374e-06, "loss": 0.3546, "step": 68130 }, { "epoch": 16.992518703241895, "grad_norm": 8.345663070678711, "learning_rate": 3.0144638403990027e-06, "loss": 0.3018, "step": 68140 }, { "epoch": 16.99501246882793, "grad_norm": 8.50364875793457, "learning_rate": 3.0119700748129676e-06, "loss": 0.342, "step": 68150 }, { "epoch": 16.997506234413965, "grad_norm": 7.714181423187256, "learning_rate": 3.009476309226933e-06, "loss": 0.2599, "step": 68160 }, { "epoch": 17.0, "grad_norm": 9.013660430908203, "learning_rate": 3.006982543640898e-06, "loss": 0.2809, "step": 68170 }, { "epoch": 17.0, "eval_loss": 0.4180484116077423, "eval_runtime": 59.928, "eval_samples_per_second": 16.737, "eval_steps_per_second": 16.737, "step": 68170 }, { "epoch": 17.002493765586035, "grad_norm": 9.489847183227539, "learning_rate": 3.004488778054863e-06, "loss": 0.3557, "step": 68180 }, { "epoch": 17.00498753117207, "grad_norm": 7.423935890197754, "learning_rate": 3.001995012468828e-06, "loss": 0.3209, "step": 68190 }, { "epoch": 17.007481296758105, "grad_norm": 5.265260219573975, "learning_rate": 2.9995012468827933e-06, "loss": 0.2767, "step": 68200 }, { "epoch": 17.00997506234414, "grad_norm": 6.882249355316162, "learning_rate": 2.997007481296758e-06, "loss": 0.2949, "step": 68210 }, { "epoch": 17.012468827930174, "grad_norm": 10.994675636291504, "learning_rate": 2.9945137157107235e-06, "loss": 0.2567, "step": 68220 }, { "epoch": 17.01496259351621, "grad_norm": 6.603150367736816, "learning_rate": 2.9920199501246884e-06, "loss": 0.2465, "step": 68230 }, { "epoch": 17.017456359102244, "grad_norm": 7.306191921234131, "learning_rate": 2.9895261845386537e-06, "loss": 0.3047, "step": 68240 }, { "epoch": 17.01995012468828, "grad_norm": 6.446793556213379, "learning_rate": 2.987032418952619e-06, "loss": 0.3648, "step": 68250 }, { "epoch": 17.022443890274314, "grad_norm": 9.686697006225586, "learning_rate": 2.984538653366584e-06, "loss": 0.4001, "step": 68260 }, { "epoch": 17.02493765586035, "grad_norm": 8.045364379882812, "learning_rate": 2.982044887780549e-06, "loss": 0.3045, "step": 68270 }, { "epoch": 17.027431421446384, "grad_norm": 11.373425483703613, "learning_rate": 2.979551122194514e-06, "loss": 0.3371, "step": 68280 }, { "epoch": 17.02992518703242, "grad_norm": 9.176929473876953, "learning_rate": 2.9770573566084794e-06, "loss": 0.3362, "step": 68290 }, { "epoch": 17.032418952618453, "grad_norm": 9.755634307861328, "learning_rate": 2.974563591022444e-06, "loss": 0.3246, "step": 68300 }, { "epoch": 17.034912718204488, "grad_norm": 10.913490295410156, "learning_rate": 2.9720698254364096e-06, "loss": 0.3034, "step": 68310 }, { "epoch": 17.037406483790523, "grad_norm": 7.916656494140625, "learning_rate": 2.969576059850374e-06, "loss": 0.2902, "step": 68320 }, { "epoch": 17.039900249376558, "grad_norm": 12.00803279876709, "learning_rate": 2.9670822942643394e-06, "loss": 0.3386, "step": 68330 }, { "epoch": 17.042394014962593, "grad_norm": 8.119451522827148, "learning_rate": 2.9645885286783043e-06, "loss": 0.3176, "step": 68340 }, { "epoch": 17.044887780548628, "grad_norm": 11.683120727539062, "learning_rate": 2.9620947630922696e-06, "loss": 0.3665, "step": 68350 }, { "epoch": 17.047381546134662, "grad_norm": 8.209905624389648, "learning_rate": 2.9596009975062345e-06, "loss": 0.3437, "step": 68360 }, { "epoch": 17.049875311720697, "grad_norm": 9.127650260925293, "learning_rate": 2.9571072319201998e-06, "loss": 0.4074, "step": 68370 }, { "epoch": 17.052369077306732, "grad_norm": 8.173192977905273, "learning_rate": 2.9546134663341646e-06, "loss": 0.2907, "step": 68380 }, { "epoch": 17.054862842892767, "grad_norm": 8.60781192779541, "learning_rate": 2.95211970074813e-06, "loss": 0.3102, "step": 68390 }, { "epoch": 17.057356608478802, "grad_norm": 12.1088228225708, "learning_rate": 2.949625935162095e-06, "loss": 0.3105, "step": 68400 }, { "epoch": 17.059850374064837, "grad_norm": 6.979897499084473, "learning_rate": 2.94713216957606e-06, "loss": 0.3078, "step": 68410 }, { "epoch": 17.06234413965087, "grad_norm": 9.642083168029785, "learning_rate": 2.944638403990025e-06, "loss": 0.3219, "step": 68420 }, { "epoch": 17.064837905236907, "grad_norm": 7.635913848876953, "learning_rate": 2.9421446384039903e-06, "loss": 0.3145, "step": 68430 }, { "epoch": 17.06733167082294, "grad_norm": 7.816982269287109, "learning_rate": 2.9396508728179552e-06, "loss": 0.3144, "step": 68440 }, { "epoch": 17.069825436408976, "grad_norm": 9.944385528564453, "learning_rate": 2.9371571072319205e-06, "loss": 0.3679, "step": 68450 }, { "epoch": 17.07231920199501, "grad_norm": 7.051234722137451, "learning_rate": 2.9346633416458854e-06, "loss": 0.3836, "step": 68460 }, { "epoch": 17.074812967581046, "grad_norm": 6.5841965675354, "learning_rate": 2.9321695760598507e-06, "loss": 0.3354, "step": 68470 }, { "epoch": 17.07730673316708, "grad_norm": 7.379906177520752, "learning_rate": 2.9296758104738156e-06, "loss": 0.3299, "step": 68480 }, { "epoch": 17.079800498753116, "grad_norm": 6.527453899383545, "learning_rate": 2.927182044887781e-06, "loss": 0.2663, "step": 68490 }, { "epoch": 17.08229426433915, "grad_norm": 9.604272842407227, "learning_rate": 2.9246882793017462e-06, "loss": 0.3929, "step": 68500 }, { "epoch": 17.084788029925186, "grad_norm": 13.529383659362793, "learning_rate": 2.922194513715711e-06, "loss": 0.2815, "step": 68510 }, { "epoch": 17.08728179551122, "grad_norm": 8.98625373840332, "learning_rate": 2.9197007481296764e-06, "loss": 0.303, "step": 68520 }, { "epoch": 17.089775561097255, "grad_norm": 8.426309585571289, "learning_rate": 2.917206982543641e-06, "loss": 0.3067, "step": 68530 }, { "epoch": 17.09226932668329, "grad_norm": 11.854998588562012, "learning_rate": 2.9147132169576066e-06, "loss": 0.3384, "step": 68540 }, { "epoch": 17.094763092269325, "grad_norm": 7.317206382751465, "learning_rate": 2.912219451371571e-06, "loss": 0.3036, "step": 68550 }, { "epoch": 17.09725685785536, "grad_norm": 7.078338146209717, "learning_rate": 2.9097256857855364e-06, "loss": 0.3772, "step": 68560 }, { "epoch": 17.099750623441395, "grad_norm": 3.3603427410125732, "learning_rate": 2.9072319201995013e-06, "loss": 0.3848, "step": 68570 }, { "epoch": 17.102244389027433, "grad_norm": 8.076667785644531, "learning_rate": 2.9047381546134666e-06, "loss": 0.3636, "step": 68580 }, { "epoch": 17.104738154613468, "grad_norm": 8.68441104888916, "learning_rate": 2.9022443890274315e-06, "loss": 0.3029, "step": 68590 }, { "epoch": 17.107231920199503, "grad_norm": 15.482074737548828, "learning_rate": 2.899750623441397e-06, "loss": 0.3457, "step": 68600 }, { "epoch": 17.109725685785538, "grad_norm": 8.531909942626953, "learning_rate": 2.8972568578553617e-06, "loss": 0.3074, "step": 68610 }, { "epoch": 17.112219451371573, "grad_norm": 6.706440448760986, "learning_rate": 2.894763092269327e-06, "loss": 0.3379, "step": 68620 }, { "epoch": 17.114713216957608, "grad_norm": 8.066004753112793, "learning_rate": 2.892269326683292e-06, "loss": 0.3175, "step": 68630 }, { "epoch": 17.117206982543642, "grad_norm": 7.285860061645508, "learning_rate": 2.889775561097257e-06, "loss": 0.2926, "step": 68640 }, { "epoch": 17.119700748129677, "grad_norm": 11.93729019165039, "learning_rate": 2.887531172069826e-06, "loss": 0.4289, "step": 68650 }, { "epoch": 17.122194513715712, "grad_norm": 9.996557235717773, "learning_rate": 2.8850374064837904e-06, "loss": 0.2979, "step": 68660 }, { "epoch": 17.124688279301747, "grad_norm": 10.025423049926758, "learning_rate": 2.8825436408977557e-06, "loss": 0.3345, "step": 68670 }, { "epoch": 17.127182044887782, "grad_norm": 7.561511993408203, "learning_rate": 2.8800498753117214e-06, "loss": 0.3385, "step": 68680 }, { "epoch": 17.129675810473817, "grad_norm": 6.57540225982666, "learning_rate": 2.877556109725686e-06, "loss": 0.2567, "step": 68690 }, { "epoch": 17.13216957605985, "grad_norm": 11.776047706604004, "learning_rate": 2.875062344139651e-06, "loss": 0.3308, "step": 68700 }, { "epoch": 17.134663341645886, "grad_norm": 6.203807353973389, "learning_rate": 2.872568578553616e-06, "loss": 0.2472, "step": 68710 }, { "epoch": 17.13715710723192, "grad_norm": 9.786406517028809, "learning_rate": 2.8700748129675814e-06, "loss": 0.3577, "step": 68720 }, { "epoch": 17.139650872817956, "grad_norm": 6.93914794921875, "learning_rate": 2.8675810473815462e-06, "loss": 0.2713, "step": 68730 }, { "epoch": 17.14214463840399, "grad_norm": 7.5632500648498535, "learning_rate": 2.8650872817955116e-06, "loss": 0.2773, "step": 68740 }, { "epoch": 17.144638403990026, "grad_norm": 6.6398234367370605, "learning_rate": 2.8625935162094764e-06, "loss": 0.3466, "step": 68750 }, { "epoch": 17.14713216957606, "grad_norm": 5.831292152404785, "learning_rate": 2.8600997506234418e-06, "loss": 0.348, "step": 68760 }, { "epoch": 17.149625935162096, "grad_norm": 11.87356185913086, "learning_rate": 2.8576059850374066e-06, "loss": 0.2968, "step": 68770 }, { "epoch": 17.15211970074813, "grad_norm": 10.019725799560547, "learning_rate": 2.855112219451372e-06, "loss": 0.3226, "step": 68780 }, { "epoch": 17.154613466334165, "grad_norm": 9.507071495056152, "learning_rate": 2.852618453865337e-06, "loss": 0.3549, "step": 68790 }, { "epoch": 17.1571072319202, "grad_norm": 10.056197166442871, "learning_rate": 2.850124688279302e-06, "loss": 0.2982, "step": 68800 }, { "epoch": 17.159600997506235, "grad_norm": 15.976298332214355, "learning_rate": 2.847630922693267e-06, "loss": 0.3499, "step": 68810 }, { "epoch": 17.16209476309227, "grad_norm": 11.574234962463379, "learning_rate": 2.8451371571072323e-06, "loss": 0.3486, "step": 68820 }, { "epoch": 17.164588528678305, "grad_norm": 7.35728645324707, "learning_rate": 2.8426433915211972e-06, "loss": 0.3749, "step": 68830 }, { "epoch": 17.16708229426434, "grad_norm": 9.311493873596191, "learning_rate": 2.8401496259351625e-06, "loss": 0.3784, "step": 68840 }, { "epoch": 17.169576059850375, "grad_norm": 7.325751304626465, "learning_rate": 2.837655860349127e-06, "loss": 0.2806, "step": 68850 }, { "epoch": 17.17206982543641, "grad_norm": 6.172769069671631, "learning_rate": 2.8351620947630927e-06, "loss": 0.2538, "step": 68860 }, { "epoch": 17.174563591022444, "grad_norm": 10.258570671081543, "learning_rate": 2.832668329177057e-06, "loss": 0.3006, "step": 68870 }, { "epoch": 17.17705735660848, "grad_norm": 6.926236152648926, "learning_rate": 2.8301745635910225e-06, "loss": 0.2841, "step": 68880 }, { "epoch": 17.179551122194514, "grad_norm": 9.146723747253418, "learning_rate": 2.8276807980049874e-06, "loss": 0.3016, "step": 68890 }, { "epoch": 17.18204488778055, "grad_norm": 6.017948150634766, "learning_rate": 2.8251870324189527e-06, "loss": 0.3461, "step": 68900 }, { "epoch": 17.184538653366584, "grad_norm": 10.936685562133789, "learning_rate": 2.8226932668329176e-06, "loss": 0.3771, "step": 68910 }, { "epoch": 17.18703241895262, "grad_norm": 8.554216384887695, "learning_rate": 2.820199501246883e-06, "loss": 0.2986, "step": 68920 }, { "epoch": 17.189526184538654, "grad_norm": 8.152572631835938, "learning_rate": 2.817705735660848e-06, "loss": 0.3383, "step": 68930 }, { "epoch": 17.19201995012469, "grad_norm": 8.737922668457031, "learning_rate": 2.815211970074813e-06, "loss": 0.316, "step": 68940 }, { "epoch": 17.194513715710723, "grad_norm": 8.8933744430542, "learning_rate": 2.8127182044887784e-06, "loss": 0.3481, "step": 68950 }, { "epoch": 17.197007481296758, "grad_norm": 9.593643188476562, "learning_rate": 2.8102244389027433e-06, "loss": 0.3012, "step": 68960 }, { "epoch": 17.199501246882793, "grad_norm": 6.942617416381836, "learning_rate": 2.8077306733167086e-06, "loss": 0.2645, "step": 68970 }, { "epoch": 17.201995012468828, "grad_norm": 9.486404418945312, "learning_rate": 2.8052369077306735e-06, "loss": 0.3266, "step": 68980 }, { "epoch": 17.204488778054863, "grad_norm": 5.64856481552124, "learning_rate": 2.802743142144639e-06, "loss": 0.2926, "step": 68990 }, { "epoch": 17.206982543640898, "grad_norm": 8.93766975402832, "learning_rate": 2.8002493765586037e-06, "loss": 0.2835, "step": 69000 }, { "epoch": 17.209476309226932, "grad_norm": 7.289651393890381, "learning_rate": 2.797755610972569e-06, "loss": 0.3044, "step": 69010 }, { "epoch": 17.211970074812967, "grad_norm": 9.757658004760742, "learning_rate": 2.795261845386534e-06, "loss": 0.304, "step": 69020 }, { "epoch": 17.214463840399002, "grad_norm": 9.677502632141113, "learning_rate": 2.792768079800499e-06, "loss": 0.354, "step": 69030 }, { "epoch": 17.216957605985037, "grad_norm": 7.527002811431885, "learning_rate": 2.790274314214464e-06, "loss": 0.3659, "step": 69040 }, { "epoch": 17.219451371571072, "grad_norm": 15.7623872756958, "learning_rate": 2.7877805486284294e-06, "loss": 0.2914, "step": 69050 }, { "epoch": 17.221945137157107, "grad_norm": 7.540277004241943, "learning_rate": 2.7852867830423943e-06, "loss": 0.3512, "step": 69060 }, { "epoch": 17.22443890274314, "grad_norm": 8.1314697265625, "learning_rate": 2.7827930174563596e-06, "loss": 0.317, "step": 69070 }, { "epoch": 17.226932668329177, "grad_norm": 8.093489646911621, "learning_rate": 2.780299251870324e-06, "loss": 0.3402, "step": 69080 }, { "epoch": 17.22942643391521, "grad_norm": 4.8851094245910645, "learning_rate": 2.7778054862842898e-06, "loss": 0.3102, "step": 69090 }, { "epoch": 17.231920199501246, "grad_norm": 6.918999671936035, "learning_rate": 2.7753117206982542e-06, "loss": 0.2954, "step": 69100 }, { "epoch": 17.23441396508728, "grad_norm": 11.607989311218262, "learning_rate": 2.7728179551122195e-06, "loss": 0.3554, "step": 69110 }, { "epoch": 17.236907730673316, "grad_norm": 8.533638000488281, "learning_rate": 2.7703241895261844e-06, "loss": 0.2778, "step": 69120 }, { "epoch": 17.23940149625935, "grad_norm": 6.778941631317139, "learning_rate": 2.7678304239401497e-06, "loss": 0.3103, "step": 69130 }, { "epoch": 17.241895261845386, "grad_norm": 8.85729694366455, "learning_rate": 2.7653366583541146e-06, "loss": 0.328, "step": 69140 }, { "epoch": 17.24438902743142, "grad_norm": 11.336535453796387, "learning_rate": 2.76284289276808e-06, "loss": 0.3471, "step": 69150 }, { "epoch": 17.246882793017456, "grad_norm": 8.672399520874023, "learning_rate": 2.760349127182045e-06, "loss": 0.4223, "step": 69160 }, { "epoch": 17.24937655860349, "grad_norm": 6.419161796569824, "learning_rate": 2.75785536159601e-06, "loss": 0.3823, "step": 69170 }, { "epoch": 17.251870324189525, "grad_norm": 6.270040035247803, "learning_rate": 2.7553615960099754e-06, "loss": 0.2993, "step": 69180 }, { "epoch": 17.25436408977556, "grad_norm": 6.386322975158691, "learning_rate": 2.7528678304239403e-06, "loss": 0.3222, "step": 69190 }, { "epoch": 17.256857855361595, "grad_norm": 10.8588285446167, "learning_rate": 2.7503740648379056e-06, "loss": 0.372, "step": 69200 }, { "epoch": 17.25935162094763, "grad_norm": 9.645064353942871, "learning_rate": 2.7478802992518705e-06, "loss": 0.2823, "step": 69210 }, { "epoch": 17.261845386533665, "grad_norm": 8.688511848449707, "learning_rate": 2.745386533665836e-06, "loss": 0.3246, "step": 69220 }, { "epoch": 17.2643391521197, "grad_norm": 7.549156665802002, "learning_rate": 2.7428927680798007e-06, "loss": 0.2917, "step": 69230 }, { "epoch": 17.266832917705734, "grad_norm": 7.3209357261657715, "learning_rate": 2.740399002493766e-06, "loss": 0.2893, "step": 69240 }, { "epoch": 17.26932668329177, "grad_norm": 9.329798698425293, "learning_rate": 2.737905236907731e-06, "loss": 0.3382, "step": 69250 }, { "epoch": 17.271820448877804, "grad_norm": 17.011377334594727, "learning_rate": 2.7354114713216962e-06, "loss": 0.2877, "step": 69260 }, { "epoch": 17.27431421446384, "grad_norm": 13.201550483703613, "learning_rate": 2.732917705735661e-06, "loss": 0.3846, "step": 69270 }, { "epoch": 17.276807980049874, "grad_norm": 7.851199150085449, "learning_rate": 2.7304239401496264e-06, "loss": 0.2899, "step": 69280 }, { "epoch": 17.27930174563591, "grad_norm": 7.01190185546875, "learning_rate": 2.7279301745635913e-06, "loss": 0.3709, "step": 69290 }, { "epoch": 17.281795511221944, "grad_norm": 8.432855606079102, "learning_rate": 2.7254364089775566e-06, "loss": 0.2868, "step": 69300 }, { "epoch": 17.28428927680798, "grad_norm": 9.088372230529785, "learning_rate": 2.722942643391521e-06, "loss": 0.3208, "step": 69310 }, { "epoch": 17.286783042394013, "grad_norm": 7.609044551849365, "learning_rate": 2.720448877805487e-06, "loss": 0.3291, "step": 69320 }, { "epoch": 17.28927680798005, "grad_norm": 18.67131996154785, "learning_rate": 2.7179551122194513e-06, "loss": 0.38, "step": 69330 }, { "epoch": 17.291770573566083, "grad_norm": 7.562769889831543, "learning_rate": 2.7154613466334166e-06, "loss": 0.3773, "step": 69340 }, { "epoch": 17.294264339152118, "grad_norm": 6.86823844909668, "learning_rate": 2.7129675810473815e-06, "loss": 0.3447, "step": 69350 }, { "epoch": 17.296758104738153, "grad_norm": 13.892622947692871, "learning_rate": 2.7104738154613468e-06, "loss": 0.3643, "step": 69360 }, { "epoch": 17.29925187032419, "grad_norm": 7.478859901428223, "learning_rate": 2.7079800498753117e-06, "loss": 0.3812, "step": 69370 }, { "epoch": 17.301745635910226, "grad_norm": 11.587136268615723, "learning_rate": 2.705486284289277e-06, "loss": 0.3247, "step": 69380 }, { "epoch": 17.30423940149626, "grad_norm": 11.155722618103027, "learning_rate": 2.702992518703242e-06, "loss": 0.2928, "step": 69390 }, { "epoch": 17.306733167082296, "grad_norm": 5.776709079742432, "learning_rate": 2.700498753117207e-06, "loss": 0.3003, "step": 69400 }, { "epoch": 17.30922693266833, "grad_norm": 11.081796646118164, "learning_rate": 2.698004987531172e-06, "loss": 0.3558, "step": 69410 }, { "epoch": 17.311720698254366, "grad_norm": 10.856132507324219, "learning_rate": 2.6955112219451374e-06, "loss": 0.3469, "step": 69420 }, { "epoch": 17.3142144638404, "grad_norm": 7.837653636932373, "learning_rate": 2.6930174563591027e-06, "loss": 0.2866, "step": 69430 }, { "epoch": 17.316708229426435, "grad_norm": 8.779632568359375, "learning_rate": 2.6905236907730676e-06, "loss": 0.3857, "step": 69440 }, { "epoch": 17.31920199501247, "grad_norm": 9.35014820098877, "learning_rate": 2.688029925187033e-06, "loss": 0.2853, "step": 69450 }, { "epoch": 17.321695760598505, "grad_norm": 4.780736923217773, "learning_rate": 2.6855361596009978e-06, "loss": 0.3128, "step": 69460 }, { "epoch": 17.32418952618454, "grad_norm": 9.295073509216309, "learning_rate": 2.683042394014963e-06, "loss": 0.3148, "step": 69470 }, { "epoch": 17.326683291770575, "grad_norm": 8.449419975280762, "learning_rate": 2.680548628428928e-06, "loss": 0.3382, "step": 69480 }, { "epoch": 17.32917705735661, "grad_norm": 7.874120712280273, "learning_rate": 2.6780548628428933e-06, "loss": 0.3143, "step": 69490 }, { "epoch": 17.331670822942645, "grad_norm": 6.140561103820801, "learning_rate": 2.675561097256858e-06, "loss": 0.3332, "step": 69500 }, { "epoch": 17.33416458852868, "grad_norm": 9.675243377685547, "learning_rate": 2.6730673316708235e-06, "loss": 0.2659, "step": 69510 }, { "epoch": 17.336658354114714, "grad_norm": 8.507969856262207, "learning_rate": 2.670573566084788e-06, "loss": 0.3464, "step": 69520 }, { "epoch": 17.33915211970075, "grad_norm": 9.791665077209473, "learning_rate": 2.6680798004987537e-06, "loss": 0.3107, "step": 69530 }, { "epoch": 17.341645885286784, "grad_norm": 6.693220138549805, "learning_rate": 2.665586034912718e-06, "loss": 0.3108, "step": 69540 }, { "epoch": 17.34413965087282, "grad_norm": 9.014039993286133, "learning_rate": 2.6630922693266834e-06, "loss": 0.3689, "step": 69550 }, { "epoch": 17.346633416458854, "grad_norm": 9.79101848602295, "learning_rate": 2.6605985037406483e-06, "loss": 0.3281, "step": 69560 }, { "epoch": 17.34912718204489, "grad_norm": 6.341735363006592, "learning_rate": 2.6581047381546136e-06, "loss": 0.2585, "step": 69570 }, { "epoch": 17.351620947630924, "grad_norm": 7.738804340362549, "learning_rate": 2.6556109725685785e-06, "loss": 0.3152, "step": 69580 }, { "epoch": 17.35411471321696, "grad_norm": 8.257248878479004, "learning_rate": 2.653117206982544e-06, "loss": 0.3581, "step": 69590 }, { "epoch": 17.356608478802993, "grad_norm": 9.338336944580078, "learning_rate": 2.6506234413965087e-06, "loss": 0.302, "step": 69600 }, { "epoch": 17.359102244389028, "grad_norm": 9.739322662353516, "learning_rate": 2.648129675810474e-06, "loss": 0.2901, "step": 69610 }, { "epoch": 17.361596009975063, "grad_norm": 9.487130165100098, "learning_rate": 2.645635910224439e-06, "loss": 0.3585, "step": 69620 }, { "epoch": 17.364089775561098, "grad_norm": 10.598410606384277, "learning_rate": 2.6431421446384042e-06, "loss": 0.3382, "step": 69630 }, { "epoch": 17.366583541147133, "grad_norm": 8.358076095581055, "learning_rate": 2.640648379052369e-06, "loss": 0.3341, "step": 69640 }, { "epoch": 17.369077306733168, "grad_norm": 11.573534965515137, "learning_rate": 2.6381546134663344e-06, "loss": 0.3532, "step": 69650 }, { "epoch": 17.371571072319203, "grad_norm": 10.417460441589355, "learning_rate": 2.6356608478802997e-06, "loss": 0.3544, "step": 69660 }, { "epoch": 17.374064837905237, "grad_norm": 12.179265975952148, "learning_rate": 2.6331670822942646e-06, "loss": 0.304, "step": 69670 }, { "epoch": 17.376558603491272, "grad_norm": 8.12159252166748, "learning_rate": 2.63067331670823e-06, "loss": 0.3341, "step": 69680 }, { "epoch": 17.379052369077307, "grad_norm": 9.424428939819336, "learning_rate": 2.628179551122195e-06, "loss": 0.3425, "step": 69690 }, { "epoch": 17.381546134663342, "grad_norm": 7.936733245849609, "learning_rate": 2.62568578553616e-06, "loss": 0.2796, "step": 69700 }, { "epoch": 17.384039900249377, "grad_norm": 9.32441234588623, "learning_rate": 2.623192019950125e-06, "loss": 0.3235, "step": 69710 }, { "epoch": 17.38653366583541, "grad_norm": 10.321310043334961, "learning_rate": 2.6206982543640903e-06, "loss": 0.3289, "step": 69720 }, { "epoch": 17.389027431421447, "grad_norm": 7.7665181159973145, "learning_rate": 2.618204488778055e-06, "loss": 0.3161, "step": 69730 }, { "epoch": 17.39152119700748, "grad_norm": 9.732200622558594, "learning_rate": 2.6157107231920205e-06, "loss": 0.2868, "step": 69740 }, { "epoch": 17.394014962593516, "grad_norm": 6.528557300567627, "learning_rate": 2.613216957605985e-06, "loss": 0.3435, "step": 69750 }, { "epoch": 17.39650872817955, "grad_norm": 8.758277893066406, "learning_rate": 2.6107231920199507e-06, "loss": 0.3674, "step": 69760 }, { "epoch": 17.399002493765586, "grad_norm": 7.994776248931885, "learning_rate": 2.608229426433915e-06, "loss": 0.372, "step": 69770 }, { "epoch": 17.40149625935162, "grad_norm": 9.376730918884277, "learning_rate": 2.6057356608478805e-06, "loss": 0.3216, "step": 69780 }, { "epoch": 17.403990024937656, "grad_norm": 6.297952175140381, "learning_rate": 2.6032418952618454e-06, "loss": 0.3051, "step": 69790 }, { "epoch": 17.40648379052369, "grad_norm": 8.456469535827637, "learning_rate": 2.6007481296758107e-06, "loss": 0.3467, "step": 69800 }, { "epoch": 17.408977556109726, "grad_norm": 7.9176859855651855, "learning_rate": 2.5982543640897756e-06, "loss": 0.3259, "step": 69810 }, { "epoch": 17.41147132169576, "grad_norm": 9.109759330749512, "learning_rate": 2.595760598503741e-06, "loss": 0.3349, "step": 69820 }, { "epoch": 17.413965087281795, "grad_norm": 8.499098777770996, "learning_rate": 2.5932668329177058e-06, "loss": 0.3412, "step": 69830 }, { "epoch": 17.41645885286783, "grad_norm": 8.873187065124512, "learning_rate": 2.590773067331671e-06, "loss": 0.2991, "step": 69840 }, { "epoch": 17.418952618453865, "grad_norm": 6.6649489402771, "learning_rate": 2.588279301745636e-06, "loss": 0.362, "step": 69850 }, { "epoch": 17.4214463840399, "grad_norm": 7.3186259269714355, "learning_rate": 2.5857855361596013e-06, "loss": 0.3563, "step": 69860 }, { "epoch": 17.423940149625935, "grad_norm": 6.746584892272949, "learning_rate": 2.583291770573566e-06, "loss": 0.3346, "step": 69870 }, { "epoch": 17.42643391521197, "grad_norm": 10.556224822998047, "learning_rate": 2.5807980049875315e-06, "loss": 0.367, "step": 69880 }, { "epoch": 17.428927680798004, "grad_norm": 8.307790756225586, "learning_rate": 2.5783042394014963e-06, "loss": 0.337, "step": 69890 }, { "epoch": 17.43142144638404, "grad_norm": 18.567258834838867, "learning_rate": 2.5758104738154617e-06, "loss": 0.3254, "step": 69900 }, { "epoch": 17.433915211970074, "grad_norm": 8.430441856384277, "learning_rate": 2.573316708229427e-06, "loss": 0.2723, "step": 69910 }, { "epoch": 17.43640897755611, "grad_norm": 6.945594310760498, "learning_rate": 2.570822942643392e-06, "loss": 0.3487, "step": 69920 }, { "epoch": 17.438902743142144, "grad_norm": 9.82526969909668, "learning_rate": 2.568329177057357e-06, "loss": 0.3007, "step": 69930 }, { "epoch": 17.44139650872818, "grad_norm": 9.881370544433594, "learning_rate": 2.565835411471322e-06, "loss": 0.3641, "step": 69940 }, { "epoch": 17.443890274314214, "grad_norm": 10.655155181884766, "learning_rate": 2.5633416458852874e-06, "loss": 0.316, "step": 69950 }, { "epoch": 17.44638403990025, "grad_norm": 7.941926002502441, "learning_rate": 2.560847880299252e-06, "loss": 0.2926, "step": 69960 }, { "epoch": 17.448877805486283, "grad_norm": 10.348051071166992, "learning_rate": 2.5583541147132175e-06, "loss": 0.3244, "step": 69970 }, { "epoch": 17.45137157107232, "grad_norm": 9.219032287597656, "learning_rate": 2.555860349127182e-06, "loss": 0.2996, "step": 69980 }, { "epoch": 17.453865336658353, "grad_norm": 12.931209564208984, "learning_rate": 2.5533665835411473e-06, "loss": 0.3215, "step": 69990 }, { "epoch": 17.456359102244388, "grad_norm": 12.621508598327637, "learning_rate": 2.550872817955112e-06, "loss": 0.3766, "step": 70000 }, { "epoch": 17.458852867830423, "grad_norm": 13.947381019592285, "learning_rate": 2.5483790523690775e-06, "loss": 0.3613, "step": 70010 }, { "epoch": 17.461346633416458, "grad_norm": 8.649821281433105, "learning_rate": 2.5458852867830424e-06, "loss": 0.3608, "step": 70020 }, { "epoch": 17.463840399002493, "grad_norm": 13.746424674987793, "learning_rate": 2.5433915211970077e-06, "loss": 0.3476, "step": 70030 }, { "epoch": 17.466334164588527, "grad_norm": 8.04629898071289, "learning_rate": 2.5408977556109726e-06, "loss": 0.2839, "step": 70040 }, { "epoch": 17.468827930174562, "grad_norm": 13.645923614501953, "learning_rate": 2.538403990024938e-06, "loss": 0.3092, "step": 70050 }, { "epoch": 17.471321695760597, "grad_norm": 6.63210391998291, "learning_rate": 2.535910224438903e-06, "loss": 0.3725, "step": 70060 }, { "epoch": 17.473815461346632, "grad_norm": 6.891839981079102, "learning_rate": 2.533416458852868e-06, "loss": 0.2888, "step": 70070 }, { "epoch": 17.476309226932667, "grad_norm": 9.221366882324219, "learning_rate": 2.530922693266833e-06, "loss": 0.3489, "step": 70080 }, { "epoch": 17.478802992518702, "grad_norm": 10.869357109069824, "learning_rate": 2.5284289276807983e-06, "loss": 0.4087, "step": 70090 }, { "epoch": 17.481296758104737, "grad_norm": 9.193957328796387, "learning_rate": 2.525935162094763e-06, "loss": 0.324, "step": 70100 }, { "epoch": 17.48379052369077, "grad_norm": 7.817142009735107, "learning_rate": 2.5234413965087285e-06, "loss": 0.3491, "step": 70110 }, { "epoch": 17.486284289276806, "grad_norm": 11.097153663635254, "learning_rate": 2.5209476309226934e-06, "loss": 0.2901, "step": 70120 }, { "epoch": 17.48877805486284, "grad_norm": 8.653058052062988, "learning_rate": 2.5184538653366587e-06, "loss": 0.3366, "step": 70130 }, { "epoch": 17.491271820448876, "grad_norm": 7.455108642578125, "learning_rate": 2.5159600997506236e-06, "loss": 0.342, "step": 70140 }, { "epoch": 17.49376558603491, "grad_norm": 6.872528553009033, "learning_rate": 2.513466334164589e-06, "loss": 0.3033, "step": 70150 }, { "epoch": 17.496259351620946, "grad_norm": 7.771061420440674, "learning_rate": 2.510972568578554e-06, "loss": 0.2978, "step": 70160 }, { "epoch": 17.49875311720698, "grad_norm": 7.269931316375732, "learning_rate": 2.508478802992519e-06, "loss": 0.3072, "step": 70170 }, { "epoch": 17.50124688279302, "grad_norm": 10.571673393249512, "learning_rate": 2.5059850374064844e-06, "loss": 0.4029, "step": 70180 }, { "epoch": 17.503740648379054, "grad_norm": 7.55682373046875, "learning_rate": 2.503491271820449e-06, "loss": 0.3645, "step": 70190 }, { "epoch": 17.50623441396509, "grad_norm": 8.619101524353027, "learning_rate": 2.5009975062344146e-06, "loss": 0.4596, "step": 70200 }, { "epoch": 17.508728179551124, "grad_norm": 8.528571128845215, "learning_rate": 2.498503740648379e-06, "loss": 0.3011, "step": 70210 }, { "epoch": 17.51122194513716, "grad_norm": 11.281834602355957, "learning_rate": 2.4960099750623444e-06, "loss": 0.3314, "step": 70220 }, { "epoch": 17.513715710723194, "grad_norm": 6.625566482543945, "learning_rate": 2.4935162094763092e-06, "loss": 0.2803, "step": 70230 }, { "epoch": 17.51620947630923, "grad_norm": 11.519903182983398, "learning_rate": 2.4910224438902746e-06, "loss": 0.3145, "step": 70240 }, { "epoch": 17.518703241895263, "grad_norm": 7.888875484466553, "learning_rate": 2.4885286783042394e-06, "loss": 0.3021, "step": 70250 }, { "epoch": 17.521197007481298, "grad_norm": 7.573606967926025, "learning_rate": 2.4860349127182048e-06, "loss": 0.2962, "step": 70260 }, { "epoch": 17.523690773067333, "grad_norm": 8.137581825256348, "learning_rate": 2.4835411471321696e-06, "loss": 0.3396, "step": 70270 }, { "epoch": 17.526184538653368, "grad_norm": 10.423111915588379, "learning_rate": 2.481047381546135e-06, "loss": 0.3194, "step": 70280 }, { "epoch": 17.528678304239403, "grad_norm": 11.856714248657227, "learning_rate": 2.4785536159601003e-06, "loss": 0.3732, "step": 70290 }, { "epoch": 17.531172069825438, "grad_norm": 8.993725776672363, "learning_rate": 2.476059850374065e-06, "loss": 0.288, "step": 70300 }, { "epoch": 17.533665835411473, "grad_norm": 9.244604110717773, "learning_rate": 2.47356608478803e-06, "loss": 0.3439, "step": 70310 }, { "epoch": 17.536159600997507, "grad_norm": 9.765801429748535, "learning_rate": 2.4710723192019953e-06, "loss": 0.3375, "step": 70320 }, { "epoch": 17.538653366583542, "grad_norm": 5.98468542098999, "learning_rate": 2.4685785536159602e-06, "loss": 0.302, "step": 70330 }, { "epoch": 17.541147132169577, "grad_norm": 8.877677917480469, "learning_rate": 2.4660847880299255e-06, "loss": 0.387, "step": 70340 }, { "epoch": 17.543640897755612, "grad_norm": 10.188618659973145, "learning_rate": 2.4635910224438904e-06, "loss": 0.3714, "step": 70350 }, { "epoch": 17.546134663341647, "grad_norm": 9.320550918579102, "learning_rate": 2.4610972568578557e-06, "loss": 0.3256, "step": 70360 }, { "epoch": 17.54862842892768, "grad_norm": 7.385508060455322, "learning_rate": 2.4586034912718206e-06, "loss": 0.2825, "step": 70370 }, { "epoch": 17.551122194513717, "grad_norm": 8.037924766540527, "learning_rate": 2.456109725685786e-06, "loss": 0.3273, "step": 70380 }, { "epoch": 17.55361596009975, "grad_norm": 10.301573753356934, "learning_rate": 2.453615960099751e-06, "loss": 0.2664, "step": 70390 }, { "epoch": 17.556109725685786, "grad_norm": 7.737729072570801, "learning_rate": 2.451122194513716e-06, "loss": 0.3422, "step": 70400 }, { "epoch": 17.55860349127182, "grad_norm": 7.797543525695801, "learning_rate": 2.448628428927681e-06, "loss": 0.3644, "step": 70410 }, { "epoch": 17.561097256857856, "grad_norm": 9.298747062683105, "learning_rate": 2.446134663341646e-06, "loss": 0.3983, "step": 70420 }, { "epoch": 17.56359102244389, "grad_norm": 8.475441932678223, "learning_rate": 2.443640897755611e-06, "loss": 0.3913, "step": 70430 }, { "epoch": 17.566084788029926, "grad_norm": 7.733828544616699, "learning_rate": 2.441147132169576e-06, "loss": 0.3371, "step": 70440 }, { "epoch": 17.56857855361596, "grad_norm": 7.250517845153809, "learning_rate": 2.4386533665835414e-06, "loss": 0.3646, "step": 70450 }, { "epoch": 17.571072319201996, "grad_norm": 13.300467491149902, "learning_rate": 2.4361596009975063e-06, "loss": 0.3736, "step": 70460 }, { "epoch": 17.57356608478803, "grad_norm": 7.141026973724365, "learning_rate": 2.4336658354114716e-06, "loss": 0.2415, "step": 70470 }, { "epoch": 17.576059850374065, "grad_norm": 6.321515083312988, "learning_rate": 2.4311720698254365e-06, "loss": 0.3182, "step": 70480 }, { "epoch": 17.5785536159601, "grad_norm": 10.123273849487305, "learning_rate": 2.428678304239402e-06, "loss": 0.3007, "step": 70490 }, { "epoch": 17.581047381546135, "grad_norm": 6.999142169952393, "learning_rate": 2.4261845386533667e-06, "loss": 0.3756, "step": 70500 }, { "epoch": 17.58354114713217, "grad_norm": 11.76720905303955, "learning_rate": 2.4236907730673316e-06, "loss": 0.3423, "step": 70510 }, { "epoch": 17.586034912718205, "grad_norm": 7.583934783935547, "learning_rate": 2.421197007481297e-06, "loss": 0.3457, "step": 70520 }, { "epoch": 17.58852867830424, "grad_norm": 7.330530166625977, "learning_rate": 2.418703241895262e-06, "loss": 0.2704, "step": 70530 }, { "epoch": 17.591022443890274, "grad_norm": 9.098861694335938, "learning_rate": 2.416209476309227e-06, "loss": 0.3421, "step": 70540 }, { "epoch": 17.59351620947631, "grad_norm": 7.645537853240967, "learning_rate": 2.4137157107231924e-06, "loss": 0.3421, "step": 70550 }, { "epoch": 17.596009975062344, "grad_norm": 7.081808090209961, "learning_rate": 2.4112219451371573e-06, "loss": 0.3081, "step": 70560 }, { "epoch": 17.59850374064838, "grad_norm": 9.392647743225098, "learning_rate": 2.4087281795511226e-06, "loss": 0.322, "step": 70570 }, { "epoch": 17.600997506234414, "grad_norm": 7.144293308258057, "learning_rate": 2.4062344139650875e-06, "loss": 0.3469, "step": 70580 }, { "epoch": 17.60349127182045, "grad_norm": 4.9244489669799805, "learning_rate": 2.4037406483790528e-06, "loss": 0.2498, "step": 70590 }, { "epoch": 17.605985037406484, "grad_norm": 7.220204830169678, "learning_rate": 2.4012468827930177e-06, "loss": 0.2706, "step": 70600 }, { "epoch": 17.60847880299252, "grad_norm": 13.143561363220215, "learning_rate": 2.398753117206983e-06, "loss": 0.2832, "step": 70610 }, { "epoch": 17.610972568578553, "grad_norm": 6.485062599182129, "learning_rate": 2.396259351620948e-06, "loss": 0.2975, "step": 70620 }, { "epoch": 17.61346633416459, "grad_norm": 9.139724731445312, "learning_rate": 2.3937655860349127e-06, "loss": 0.3112, "step": 70630 }, { "epoch": 17.615960099750623, "grad_norm": 9.77446460723877, "learning_rate": 2.391271820448878e-06, "loss": 0.3726, "step": 70640 }, { "epoch": 17.618453865336658, "grad_norm": 8.16642951965332, "learning_rate": 2.388778054862843e-06, "loss": 0.3558, "step": 70650 }, { "epoch": 17.620947630922693, "grad_norm": 8.044200897216797, "learning_rate": 2.3862842892768082e-06, "loss": 0.3816, "step": 70660 }, { "epoch": 17.623441396508728, "grad_norm": 6.677599906921387, "learning_rate": 2.383790523690773e-06, "loss": 0.3632, "step": 70670 }, { "epoch": 17.625935162094763, "grad_norm": 7.929919719696045, "learning_rate": 2.3812967581047384e-06, "loss": 0.3247, "step": 70680 }, { "epoch": 17.628428927680797, "grad_norm": 5.376601219177246, "learning_rate": 2.3788029925187033e-06, "loss": 0.3405, "step": 70690 }, { "epoch": 17.630922693266832, "grad_norm": 3.786297082901001, "learning_rate": 2.3763092269326686e-06, "loss": 0.2899, "step": 70700 }, { "epoch": 17.633416458852867, "grad_norm": 6.230623722076416, "learning_rate": 2.3738154613466335e-06, "loss": 0.3461, "step": 70710 }, { "epoch": 17.635910224438902, "grad_norm": 7.953461170196533, "learning_rate": 2.3713216957605984e-06, "loss": 0.2477, "step": 70720 }, { "epoch": 17.638403990024937, "grad_norm": 6.597314357757568, "learning_rate": 2.3688279301745637e-06, "loss": 0.3126, "step": 70730 }, { "epoch": 17.640897755610972, "grad_norm": 10.526558876037598, "learning_rate": 2.3663341645885286e-06, "loss": 0.3603, "step": 70740 }, { "epoch": 17.643391521197007, "grad_norm": 6.605552673339844, "learning_rate": 2.363840399002494e-06, "loss": 0.3097, "step": 70750 }, { "epoch": 17.64588528678304, "grad_norm": 7.1740031242370605, "learning_rate": 2.361346633416459e-06, "loss": 0.3014, "step": 70760 }, { "epoch": 17.648379052369076, "grad_norm": 9.406676292419434, "learning_rate": 2.358852867830424e-06, "loss": 0.3107, "step": 70770 }, { "epoch": 17.65087281795511, "grad_norm": 8.029032707214355, "learning_rate": 2.3563591022443894e-06, "loss": 0.3425, "step": 70780 }, { "epoch": 17.653366583541146, "grad_norm": 13.621152877807617, "learning_rate": 2.3538653366583543e-06, "loss": 0.4274, "step": 70790 }, { "epoch": 17.65586034912718, "grad_norm": 7.198687553405762, "learning_rate": 2.3513715710723196e-06, "loss": 0.3595, "step": 70800 }, { "epoch": 17.658354114713216, "grad_norm": 7.494908332824707, "learning_rate": 2.3488778054862845e-06, "loss": 0.2798, "step": 70810 }, { "epoch": 17.66084788029925, "grad_norm": 10.102776527404785, "learning_rate": 2.34638403990025e-06, "loss": 0.3385, "step": 70820 }, { "epoch": 17.663341645885286, "grad_norm": 7.839235782623291, "learning_rate": 2.3438902743142147e-06, "loss": 0.3029, "step": 70830 }, { "epoch": 17.66583541147132, "grad_norm": 7.807239532470703, "learning_rate": 2.34139650872818e-06, "loss": 0.2806, "step": 70840 }, { "epoch": 17.668329177057355, "grad_norm": 7.2098069190979, "learning_rate": 2.338902743142145e-06, "loss": 0.3324, "step": 70850 }, { "epoch": 17.67082294264339, "grad_norm": 12.253145217895508, "learning_rate": 2.3364089775561098e-06, "loss": 0.3326, "step": 70860 }, { "epoch": 17.673316708229425, "grad_norm": 6.79581880569458, "learning_rate": 2.333915211970075e-06, "loss": 0.3108, "step": 70870 }, { "epoch": 17.67581047381546, "grad_norm": 6.1217145919799805, "learning_rate": 2.33142144638404e-06, "loss": 0.2697, "step": 70880 }, { "epoch": 17.678304239401495, "grad_norm": 5.935173034667969, "learning_rate": 2.3289276807980053e-06, "loss": 0.3026, "step": 70890 }, { "epoch": 17.68079800498753, "grad_norm": 10.652816772460938, "learning_rate": 2.32643391521197e-06, "loss": 0.2878, "step": 70900 }, { "epoch": 17.683291770573565, "grad_norm": 9.911921501159668, "learning_rate": 2.3239401496259355e-06, "loss": 0.305, "step": 70910 }, { "epoch": 17.6857855361596, "grad_norm": 9.336223602294922, "learning_rate": 2.3214463840399004e-06, "loss": 0.3035, "step": 70920 }, { "epoch": 17.688279301745634, "grad_norm": 10.193656921386719, "learning_rate": 2.3189526184538657e-06, "loss": 0.2473, "step": 70930 }, { "epoch": 17.69077306733167, "grad_norm": 11.795038223266602, "learning_rate": 2.3164588528678306e-06, "loss": 0.3295, "step": 70940 }, { "epoch": 17.693266832917704, "grad_norm": 10.375980377197266, "learning_rate": 2.3139650872817955e-06, "loss": 0.3085, "step": 70950 }, { "epoch": 17.69576059850374, "grad_norm": 6.3128533363342285, "learning_rate": 2.3114713216957608e-06, "loss": 0.282, "step": 70960 }, { "epoch": 17.698254364089777, "grad_norm": 5.7575907707214355, "learning_rate": 2.3089775561097256e-06, "loss": 0.3209, "step": 70970 }, { "epoch": 17.70074812967581, "grad_norm": 8.107242584228516, "learning_rate": 2.306483790523691e-06, "loss": 0.3082, "step": 70980 }, { "epoch": 17.703241895261847, "grad_norm": 9.448348999023438, "learning_rate": 2.303990024937656e-06, "loss": 0.3068, "step": 70990 }, { "epoch": 17.705735660847882, "grad_norm": 15.375732421875, "learning_rate": 2.301496259351621e-06, "loss": 0.3056, "step": 71000 }, { "epoch": 17.708229426433917, "grad_norm": 10.136244773864746, "learning_rate": 2.299002493765586e-06, "loss": 0.297, "step": 71010 }, { "epoch": 17.71072319201995, "grad_norm": 7.518948078155518, "learning_rate": 2.2965087281795514e-06, "loss": 0.3659, "step": 71020 }, { "epoch": 17.713216957605987, "grad_norm": 8.733294486999512, "learning_rate": 2.2940149625935167e-06, "loss": 0.3045, "step": 71030 }, { "epoch": 17.71571072319202, "grad_norm": 11.162075996398926, "learning_rate": 2.2915211970074815e-06, "loss": 0.3342, "step": 71040 }, { "epoch": 17.718204488778056, "grad_norm": 7.541927814483643, "learning_rate": 2.289027431421447e-06, "loss": 0.2797, "step": 71050 }, { "epoch": 17.72069825436409, "grad_norm": 9.148726463317871, "learning_rate": 2.2865336658354117e-06, "loss": 0.2668, "step": 71060 }, { "epoch": 17.723192019950126, "grad_norm": 10.433960914611816, "learning_rate": 2.2840399002493766e-06, "loss": 0.3384, "step": 71070 }, { "epoch": 17.72568578553616, "grad_norm": 10.94124984741211, "learning_rate": 2.281546134663342e-06, "loss": 0.2648, "step": 71080 }, { "epoch": 17.728179551122196, "grad_norm": 10.347514152526855, "learning_rate": 2.279052369077307e-06, "loss": 0.3198, "step": 71090 }, { "epoch": 17.73067331670823, "grad_norm": 9.887333869934082, "learning_rate": 2.276558603491272e-06, "loss": 0.3412, "step": 71100 }, { "epoch": 17.733167082294266, "grad_norm": 11.739676475524902, "learning_rate": 2.274064837905237e-06, "loss": 0.3266, "step": 71110 }, { "epoch": 17.7356608478803, "grad_norm": 8.010821342468262, "learning_rate": 2.2715710723192023e-06, "loss": 0.2687, "step": 71120 }, { "epoch": 17.738154613466335, "grad_norm": 7.8744730949401855, "learning_rate": 2.2690773067331672e-06, "loss": 0.3463, "step": 71130 }, { "epoch": 17.74064837905237, "grad_norm": 6.5923237800598145, "learning_rate": 2.2665835411471325e-06, "loss": 0.3428, "step": 71140 }, { "epoch": 17.743142144638405, "grad_norm": 12.174713134765625, "learning_rate": 2.2640897755610974e-06, "loss": 0.3093, "step": 71150 }, { "epoch": 17.74563591022444, "grad_norm": 7.997066020965576, "learning_rate": 2.2615960099750627e-06, "loss": 0.3085, "step": 71160 }, { "epoch": 17.748129675810475, "grad_norm": 10.162776947021484, "learning_rate": 2.2591022443890276e-06, "loss": 0.3193, "step": 71170 }, { "epoch": 17.75062344139651, "grad_norm": 8.954973220825195, "learning_rate": 2.2566084788029925e-06, "loss": 0.3455, "step": 71180 }, { "epoch": 17.753117206982544, "grad_norm": 5.909659385681152, "learning_rate": 2.254114713216958e-06, "loss": 0.3149, "step": 71190 }, { "epoch": 17.75561097256858, "grad_norm": 7.491812229156494, "learning_rate": 2.2516209476309227e-06, "loss": 0.2545, "step": 71200 }, { "epoch": 17.758104738154614, "grad_norm": 7.293992519378662, "learning_rate": 2.249127182044888e-06, "loss": 0.2647, "step": 71210 }, { "epoch": 17.76059850374065, "grad_norm": 8.303019523620605, "learning_rate": 2.246633416458853e-06, "loss": 0.3833, "step": 71220 }, { "epoch": 17.763092269326684, "grad_norm": 7.720460414886475, "learning_rate": 2.244139650872818e-06, "loss": 0.2887, "step": 71230 }, { "epoch": 17.76558603491272, "grad_norm": 10.40861988067627, "learning_rate": 2.241645885286783e-06, "loss": 0.3591, "step": 71240 }, { "epoch": 17.768079800498754, "grad_norm": 12.28356647491455, "learning_rate": 2.2391521197007484e-06, "loss": 0.3628, "step": 71250 }, { "epoch": 17.77057356608479, "grad_norm": 10.298823356628418, "learning_rate": 2.2366583541147133e-06, "loss": 0.3126, "step": 71260 }, { "epoch": 17.773067331670823, "grad_norm": 12.331887245178223, "learning_rate": 2.2341645885286786e-06, "loss": 0.3214, "step": 71270 }, { "epoch": 17.77556109725686, "grad_norm": 5.967679977416992, "learning_rate": 2.231670822942644e-06, "loss": 0.3022, "step": 71280 }, { "epoch": 17.778054862842893, "grad_norm": 8.2744722366333, "learning_rate": 2.2291770573566088e-06, "loss": 0.3448, "step": 71290 }, { "epoch": 17.780548628428928, "grad_norm": 7.625812530517578, "learning_rate": 2.2266832917705737e-06, "loss": 0.2958, "step": 71300 }, { "epoch": 17.783042394014963, "grad_norm": 9.751514434814453, "learning_rate": 2.224189526184539e-06, "loss": 0.3786, "step": 71310 }, { "epoch": 17.785536159600998, "grad_norm": 8.716211318969727, "learning_rate": 2.221695760598504e-06, "loss": 0.2933, "step": 71320 }, { "epoch": 17.788029925187033, "grad_norm": 11.199061393737793, "learning_rate": 2.219201995012469e-06, "loss": 0.3668, "step": 71330 }, { "epoch": 17.790523690773068, "grad_norm": 14.230875015258789, "learning_rate": 2.216708229426434e-06, "loss": 0.3918, "step": 71340 }, { "epoch": 17.793017456359102, "grad_norm": 7.556332111358643, "learning_rate": 2.2142144638403994e-06, "loss": 0.2992, "step": 71350 }, { "epoch": 17.795511221945137, "grad_norm": 10.51162052154541, "learning_rate": 2.2117206982543643e-06, "loss": 0.2895, "step": 71360 }, { "epoch": 17.798004987531172, "grad_norm": 8.932967185974121, "learning_rate": 2.2092269326683296e-06, "loss": 0.3283, "step": 71370 }, { "epoch": 17.800498753117207, "grad_norm": 6.184909343719482, "learning_rate": 2.2067331670822945e-06, "loss": 0.3352, "step": 71380 }, { "epoch": 17.802992518703242, "grad_norm": 16.87145233154297, "learning_rate": 2.2042394014962593e-06, "loss": 0.3586, "step": 71390 }, { "epoch": 17.805486284289277, "grad_norm": 9.270705223083496, "learning_rate": 2.2017456359102246e-06, "loss": 0.2885, "step": 71400 }, { "epoch": 17.80798004987531, "grad_norm": 8.402549743652344, "learning_rate": 2.1992518703241895e-06, "loss": 0.2626, "step": 71410 }, { "epoch": 17.810473815461346, "grad_norm": 5.3837738037109375, "learning_rate": 2.196758104738155e-06, "loss": 0.3185, "step": 71420 }, { "epoch": 17.81296758104738, "grad_norm": 9.095513343811035, "learning_rate": 2.1942643391521197e-06, "loss": 0.3384, "step": 71430 }, { "epoch": 17.815461346633416, "grad_norm": 15.026154518127441, "learning_rate": 2.191770573566085e-06, "loss": 0.3384, "step": 71440 }, { "epoch": 17.81795511221945, "grad_norm": 9.241385459899902, "learning_rate": 2.18927680798005e-06, "loss": 0.3365, "step": 71450 }, { "epoch": 17.820448877805486, "grad_norm": 8.417593002319336, "learning_rate": 2.1867830423940152e-06, "loss": 0.3111, "step": 71460 }, { "epoch": 17.82294264339152, "grad_norm": 6.55775260925293, "learning_rate": 2.18428927680798e-06, "loss": 0.3183, "step": 71470 }, { "epoch": 17.825436408977556, "grad_norm": 5.751813888549805, "learning_rate": 2.181795511221945e-06, "loss": 0.248, "step": 71480 }, { "epoch": 17.82793017456359, "grad_norm": 8.865701675415039, "learning_rate": 2.1793017456359103e-06, "loss": 0.3105, "step": 71490 }, { "epoch": 17.830423940149625, "grad_norm": 8.64411449432373, "learning_rate": 2.176807980049875e-06, "loss": 0.3189, "step": 71500 }, { "epoch": 17.83291770573566, "grad_norm": 10.088348388671875, "learning_rate": 2.1743142144638405e-06, "loss": 0.3171, "step": 71510 }, { "epoch": 17.835411471321695, "grad_norm": 8.263297080993652, "learning_rate": 2.171820448877806e-06, "loss": 0.2926, "step": 71520 }, { "epoch": 17.83790523690773, "grad_norm": 14.566165924072266, "learning_rate": 2.1693266832917707e-06, "loss": 0.3174, "step": 71530 }, { "epoch": 17.840399002493765, "grad_norm": 7.461530685424805, "learning_rate": 2.166832917705736e-06, "loss": 0.3255, "step": 71540 }, { "epoch": 17.8428927680798, "grad_norm": 5.400354385375977, "learning_rate": 2.164339152119701e-06, "loss": 0.263, "step": 71550 }, { "epoch": 17.845386533665835, "grad_norm": 9.746865272521973, "learning_rate": 2.1618453865336662e-06, "loss": 0.3657, "step": 71560 }, { "epoch": 17.84788029925187, "grad_norm": 7.5796380043029785, "learning_rate": 2.159351620947631e-06, "loss": 0.3111, "step": 71570 }, { "epoch": 17.850374064837904, "grad_norm": 11.697222709655762, "learning_rate": 2.1568578553615964e-06, "loss": 0.2908, "step": 71580 }, { "epoch": 17.85286783042394, "grad_norm": 6.474806308746338, "learning_rate": 2.1543640897755613e-06, "loss": 0.285, "step": 71590 }, { "epoch": 17.855361596009974, "grad_norm": 9.604869842529297, "learning_rate": 2.1518703241895266e-06, "loss": 0.3314, "step": 71600 }, { "epoch": 17.85785536159601, "grad_norm": 7.376259803771973, "learning_rate": 2.1493765586034915e-06, "loss": 0.257, "step": 71610 }, { "epoch": 17.860349127182044, "grad_norm": 8.758426666259766, "learning_rate": 2.1468827930174564e-06, "loss": 0.3672, "step": 71620 }, { "epoch": 17.86284289276808, "grad_norm": 8.611882209777832, "learning_rate": 2.1443890274314217e-06, "loss": 0.3306, "step": 71630 }, { "epoch": 17.865336658354114, "grad_norm": 10.081382751464844, "learning_rate": 2.1418952618453866e-06, "loss": 0.3223, "step": 71640 }, { "epoch": 17.86783042394015, "grad_norm": 11.713033676147461, "learning_rate": 2.139401496259352e-06, "loss": 0.3424, "step": 71650 }, { "epoch": 17.870324189526183, "grad_norm": 8.003156661987305, "learning_rate": 2.1369077306733168e-06, "loss": 0.327, "step": 71660 }, { "epoch": 17.872817955112218, "grad_norm": 9.68770694732666, "learning_rate": 2.134413965087282e-06, "loss": 0.3158, "step": 71670 }, { "epoch": 17.875311720698253, "grad_norm": 4.524538516998291, "learning_rate": 2.131920199501247e-06, "loss": 0.3178, "step": 71680 }, { "epoch": 17.877805486284288, "grad_norm": 8.126871109008789, "learning_rate": 2.1294264339152123e-06, "loss": 0.3448, "step": 71690 }, { "epoch": 17.880299251870323, "grad_norm": 8.234221458435059, "learning_rate": 2.126932668329177e-06, "loss": 0.3894, "step": 71700 }, { "epoch": 17.882793017456358, "grad_norm": 8.27507209777832, "learning_rate": 2.124438902743142e-06, "loss": 0.3331, "step": 71710 }, { "epoch": 17.885286783042392, "grad_norm": 9.675071716308594, "learning_rate": 2.1219451371571074e-06, "loss": 0.3486, "step": 71720 }, { "epoch": 17.887780548628427, "grad_norm": 7.789792537689209, "learning_rate": 2.1194513715710722e-06, "loss": 0.3582, "step": 71730 }, { "epoch": 17.890274314214462, "grad_norm": 17.104684829711914, "learning_rate": 2.1169576059850376e-06, "loss": 0.3253, "step": 71740 }, { "epoch": 17.892768079800497, "grad_norm": 10.092325210571289, "learning_rate": 2.1144638403990024e-06, "loss": 0.3024, "step": 71750 }, { "epoch": 17.895261845386532, "grad_norm": 7.519723415374756, "learning_rate": 2.1119700748129678e-06, "loss": 0.3506, "step": 71760 }, { "epoch": 17.897755610972567, "grad_norm": 9.717748641967773, "learning_rate": 2.109476309226933e-06, "loss": 0.2639, "step": 71770 }, { "epoch": 17.900249376558605, "grad_norm": 7.854363441467285, "learning_rate": 2.106982543640898e-06, "loss": 0.3382, "step": 71780 }, { "epoch": 17.902743142144637, "grad_norm": 6.1837158203125, "learning_rate": 2.1044887780548633e-06, "loss": 0.328, "step": 71790 }, { "epoch": 17.905236907730675, "grad_norm": 9.821382522583008, "learning_rate": 2.101995012468828e-06, "loss": 0.3765, "step": 71800 }, { "epoch": 17.90773067331671, "grad_norm": 8.692037582397461, "learning_rate": 2.0995012468827935e-06, "loss": 0.3051, "step": 71810 }, { "epoch": 17.910224438902745, "grad_norm": 8.943056106567383, "learning_rate": 2.0970074812967583e-06, "loss": 0.339, "step": 71820 }, { "epoch": 17.91271820448878, "grad_norm": 8.475810050964355, "learning_rate": 2.0945137157107232e-06, "loss": 0.3219, "step": 71830 }, { "epoch": 17.915211970074814, "grad_norm": 13.694457054138184, "learning_rate": 2.0920199501246885e-06, "loss": 0.2674, "step": 71840 }, { "epoch": 17.91770573566085, "grad_norm": 15.57154655456543, "learning_rate": 2.0895261845386534e-06, "loss": 0.3422, "step": 71850 }, { "epoch": 17.920199501246884, "grad_norm": 7.596667289733887, "learning_rate": 2.0870324189526187e-06, "loss": 0.352, "step": 71860 }, { "epoch": 17.92269326683292, "grad_norm": 7.299078941345215, "learning_rate": 2.0845386533665836e-06, "loss": 0.3473, "step": 71870 }, { "epoch": 17.925187032418954, "grad_norm": 8.171175003051758, "learning_rate": 2.082044887780549e-06, "loss": 0.3868, "step": 71880 }, { "epoch": 17.92768079800499, "grad_norm": 27.5074405670166, "learning_rate": 2.079551122194514e-06, "loss": 0.3894, "step": 71890 }, { "epoch": 17.930174563591024, "grad_norm": 10.98017692565918, "learning_rate": 2.077057356608479e-06, "loss": 0.2976, "step": 71900 }, { "epoch": 17.93266832917706, "grad_norm": 10.127460479736328, "learning_rate": 2.074563591022444e-06, "loss": 0.3238, "step": 71910 }, { "epoch": 17.935162094763093, "grad_norm": 6.212870121002197, "learning_rate": 2.0720698254364093e-06, "loss": 0.3211, "step": 71920 }, { "epoch": 17.93765586034913, "grad_norm": 8.205334663391113, "learning_rate": 2.069576059850374e-06, "loss": 0.3003, "step": 71930 }, { "epoch": 17.940149625935163, "grad_norm": 8.863457679748535, "learning_rate": 2.067082294264339e-06, "loss": 0.3392, "step": 71940 }, { "epoch": 17.942643391521198, "grad_norm": 8.939859390258789, "learning_rate": 2.0645885286783044e-06, "loss": 0.2978, "step": 71950 }, { "epoch": 17.945137157107233, "grad_norm": 12.467900276184082, "learning_rate": 2.0620947630922693e-06, "loss": 0.3426, "step": 71960 }, { "epoch": 17.947630922693268, "grad_norm": 9.684351921081543, "learning_rate": 2.0596009975062346e-06, "loss": 0.3648, "step": 71970 }, { "epoch": 17.950124688279303, "grad_norm": 7.206076622009277, "learning_rate": 2.0571072319201995e-06, "loss": 0.3311, "step": 71980 }, { "epoch": 17.952618453865338, "grad_norm": 8.474637985229492, "learning_rate": 2.054613466334165e-06, "loss": 0.2923, "step": 71990 }, { "epoch": 17.955112219451372, "grad_norm": 5.08350944519043, "learning_rate": 2.0521197007481297e-06, "loss": 0.2908, "step": 72000 }, { "epoch": 17.957605985037407, "grad_norm": 8.969040870666504, "learning_rate": 2.049625935162095e-06, "loss": 0.3462, "step": 72010 }, { "epoch": 17.960099750623442, "grad_norm": 6.145038604736328, "learning_rate": 2.0471321695760603e-06, "loss": 0.3551, "step": 72020 }, { "epoch": 17.962593516209477, "grad_norm": 6.762228012084961, "learning_rate": 2.044638403990025e-06, "loss": 0.3054, "step": 72030 }, { "epoch": 17.965087281795512, "grad_norm": 8.712302207946777, "learning_rate": 2.0421446384039905e-06, "loss": 0.3249, "step": 72040 }, { "epoch": 17.967581047381547, "grad_norm": 8.052441596984863, "learning_rate": 2.0396508728179554e-06, "loss": 0.2844, "step": 72050 }, { "epoch": 17.97007481296758, "grad_norm": 11.43187427520752, "learning_rate": 2.0371571072319203e-06, "loss": 0.3358, "step": 72060 }, { "epoch": 17.972568578553616, "grad_norm": 11.102864265441895, "learning_rate": 2.0346633416458856e-06, "loss": 0.3596, "step": 72070 }, { "epoch": 17.97506234413965, "grad_norm": 9.357625961303711, "learning_rate": 2.0321695760598505e-06, "loss": 0.4051, "step": 72080 }, { "epoch": 17.977556109725686, "grad_norm": 10.954850196838379, "learning_rate": 2.0296758104738158e-06, "loss": 0.3903, "step": 72090 }, { "epoch": 17.98004987531172, "grad_norm": 7.4843645095825195, "learning_rate": 2.0271820448877807e-06, "loss": 0.3329, "step": 72100 }, { "epoch": 17.982543640897756, "grad_norm": 6.2788286209106445, "learning_rate": 2.024688279301746e-06, "loss": 0.3574, "step": 72110 }, { "epoch": 17.98503740648379, "grad_norm": 8.560807228088379, "learning_rate": 2.022194513715711e-06, "loss": 0.384, "step": 72120 }, { "epoch": 17.987531172069826, "grad_norm": 6.629812717437744, "learning_rate": 2.019700748129676e-06, "loss": 0.3336, "step": 72130 }, { "epoch": 17.99002493765586, "grad_norm": 7.1960859298706055, "learning_rate": 2.017206982543641e-06, "loss": 0.3042, "step": 72140 }, { "epoch": 17.992518703241895, "grad_norm": 12.617613792419434, "learning_rate": 2.014713216957606e-06, "loss": 0.303, "step": 72150 }, { "epoch": 17.99501246882793, "grad_norm": 8.579712867736816, "learning_rate": 2.0122194513715712e-06, "loss": 0.3387, "step": 72160 }, { "epoch": 17.997506234413965, "grad_norm": 7.062646865844727, "learning_rate": 2.009725685785536e-06, "loss": 0.3563, "step": 72170 }, { "epoch": 18.0, "grad_norm": 9.439546585083008, "learning_rate": 2.0072319201995014e-06, "loss": 0.3308, "step": 72180 }, { "epoch": 18.0, "eval_loss": 0.4180074632167816, "eval_runtime": 60.175, "eval_samples_per_second": 16.668, "eval_steps_per_second": 16.668, "step": 72180 }, { "epoch": 18.002493765586035, "grad_norm": 12.682416915893555, "learning_rate": 2.0047381546134663e-06, "loss": 0.315, "step": 72190 }, { "epoch": 18.00498753117207, "grad_norm": 6.629142761230469, "learning_rate": 2.0022443890274316e-06, "loss": 0.2881, "step": 72200 }, { "epoch": 18.007481296758105, "grad_norm": 11.43194580078125, "learning_rate": 1.9997506234413965e-06, "loss": 0.3674, "step": 72210 }, { "epoch": 18.00997506234414, "grad_norm": 10.104514122009277, "learning_rate": 1.997256857855362e-06, "loss": 0.3005, "step": 72220 }, { "epoch": 18.012468827930174, "grad_norm": 10.51015853881836, "learning_rate": 1.9947630922693267e-06, "loss": 0.3445, "step": 72230 }, { "epoch": 18.01496259351621, "grad_norm": 7.913299083709717, "learning_rate": 1.9922693266832916e-06, "loss": 0.3416, "step": 72240 }, { "epoch": 18.017456359102244, "grad_norm": 10.231574058532715, "learning_rate": 1.989775561097257e-06, "loss": 0.2745, "step": 72250 }, { "epoch": 18.01995012468828, "grad_norm": 7.487124919891357, "learning_rate": 1.9872817955112222e-06, "loss": 0.3448, "step": 72260 }, { "epoch": 18.022443890274314, "grad_norm": 8.64004898071289, "learning_rate": 1.984788029925187e-06, "loss": 0.3579, "step": 72270 }, { "epoch": 18.02493765586035, "grad_norm": 10.440835952758789, "learning_rate": 1.9822942643391524e-06, "loss": 0.2995, "step": 72280 }, { "epoch": 18.027431421446384, "grad_norm": 7.366189956665039, "learning_rate": 1.9798004987531173e-06, "loss": 0.276, "step": 72290 }, { "epoch": 18.02992518703242, "grad_norm": 3.2743232250213623, "learning_rate": 1.9773067331670826e-06, "loss": 0.2778, "step": 72300 }, { "epoch": 18.032418952618453, "grad_norm": 5.090819358825684, "learning_rate": 1.9748129675810475e-06, "loss": 0.296, "step": 72310 }, { "epoch": 18.034912718204488, "grad_norm": 9.35080337524414, "learning_rate": 1.972319201995013e-06, "loss": 0.2632, "step": 72320 }, { "epoch": 18.037406483790523, "grad_norm": 7.471287250518799, "learning_rate": 1.9698254364089777e-06, "loss": 0.3306, "step": 72330 }, { "epoch": 18.039900249376558, "grad_norm": 8.039196014404297, "learning_rate": 1.967331670822943e-06, "loss": 0.3021, "step": 72340 }, { "epoch": 18.042394014962593, "grad_norm": 8.003158569335938, "learning_rate": 1.964837905236908e-06, "loss": 0.3735, "step": 72350 }, { "epoch": 18.044887780548628, "grad_norm": 7.758488178253174, "learning_rate": 1.962344139650873e-06, "loss": 0.3305, "step": 72360 }, { "epoch": 18.047381546134662, "grad_norm": 9.10808277130127, "learning_rate": 1.959850374064838e-06, "loss": 0.3393, "step": 72370 }, { "epoch": 18.049875311720697, "grad_norm": 6.801268100738525, "learning_rate": 1.957356608478803e-06, "loss": 0.2978, "step": 72380 }, { "epoch": 18.052369077306732, "grad_norm": 7.056491851806641, "learning_rate": 1.9548628428927683e-06, "loss": 0.3622, "step": 72390 }, { "epoch": 18.054862842892767, "grad_norm": 8.197697639465332, "learning_rate": 1.952369077306733e-06, "loss": 0.3272, "step": 72400 }, { "epoch": 18.057356608478802, "grad_norm": 9.100240707397461, "learning_rate": 1.9498753117206985e-06, "loss": 0.2909, "step": 72410 }, { "epoch": 18.059850374064837, "grad_norm": 9.016037940979004, "learning_rate": 1.9473815461346634e-06, "loss": 0.3508, "step": 72420 }, { "epoch": 18.06234413965087, "grad_norm": 11.260120391845703, "learning_rate": 1.9448877805486287e-06, "loss": 0.3345, "step": 72430 }, { "epoch": 18.064837905236907, "grad_norm": 8.461393356323242, "learning_rate": 1.9423940149625936e-06, "loss": 0.2626, "step": 72440 }, { "epoch": 18.06733167082294, "grad_norm": 7.152003765106201, "learning_rate": 1.939900249376559e-06, "loss": 0.3907, "step": 72450 }, { "epoch": 18.069825436408976, "grad_norm": 7.978116512298584, "learning_rate": 1.9374064837905238e-06, "loss": 0.3898, "step": 72460 }, { "epoch": 18.07231920199501, "grad_norm": 6.364957332611084, "learning_rate": 1.9349127182044886e-06, "loss": 0.2275, "step": 72470 }, { "epoch": 18.074812967581046, "grad_norm": 8.800970077514648, "learning_rate": 1.932418952618454e-06, "loss": 0.253, "step": 72480 }, { "epoch": 18.07730673316708, "grad_norm": 5.478081226348877, "learning_rate": 1.929925187032419e-06, "loss": 0.3061, "step": 72490 }, { "epoch": 18.079800498753116, "grad_norm": 6.752138614654541, "learning_rate": 1.927431421446384e-06, "loss": 0.3113, "step": 72500 }, { "epoch": 18.08229426433915, "grad_norm": 10.513050079345703, "learning_rate": 1.9249376558603495e-06, "loss": 0.2699, "step": 72510 }, { "epoch": 18.084788029925186, "grad_norm": 8.9693021774292, "learning_rate": 1.9224438902743144e-06, "loss": 0.3346, "step": 72520 }, { "epoch": 18.08728179551122, "grad_norm": 7.213470458984375, "learning_rate": 1.9199501246882797e-06, "loss": 0.306, "step": 72530 }, { "epoch": 18.089775561097255, "grad_norm": 9.323833465576172, "learning_rate": 1.9174563591022445e-06, "loss": 0.3102, "step": 72540 }, { "epoch": 18.09226932668329, "grad_norm": 12.904609680175781, "learning_rate": 1.91496259351621e-06, "loss": 0.3915, "step": 72550 }, { "epoch": 18.094763092269325, "grad_norm": 7.509303092956543, "learning_rate": 1.9124688279301747e-06, "loss": 0.2904, "step": 72560 }, { "epoch": 18.09725685785536, "grad_norm": 7.649249076843262, "learning_rate": 1.90997506234414e-06, "loss": 0.2865, "step": 72570 }, { "epoch": 18.099750623441395, "grad_norm": 11.983275413513184, "learning_rate": 1.907481296758105e-06, "loss": 0.3197, "step": 72580 }, { "epoch": 18.102244389027433, "grad_norm": 10.603513717651367, "learning_rate": 1.90498753117207e-06, "loss": 0.3186, "step": 72590 }, { "epoch": 18.104738154613468, "grad_norm": 8.808806419372559, "learning_rate": 1.9024937655860351e-06, "loss": 0.3585, "step": 72600 }, { "epoch": 18.107231920199503, "grad_norm": 8.247468948364258, "learning_rate": 1.9000000000000002e-06, "loss": 0.3955, "step": 72610 }, { "epoch": 18.109725685785538, "grad_norm": 10.441139221191406, "learning_rate": 1.8975062344139653e-06, "loss": 0.355, "step": 72620 }, { "epoch": 18.112219451371573, "grad_norm": 6.936132431030273, "learning_rate": 1.8950124688279304e-06, "loss": 0.3121, "step": 72630 }, { "epoch": 18.114713216957608, "grad_norm": 7.062928676605225, "learning_rate": 1.8925187032418953e-06, "loss": 0.2723, "step": 72640 }, { "epoch": 18.117206982543642, "grad_norm": 6.312658786773682, "learning_rate": 1.8900249376558604e-06, "loss": 0.2906, "step": 72650 }, { "epoch": 18.119700748129677, "grad_norm": 8.172001838684082, "learning_rate": 1.8877805486284289e-06, "loss": 0.3375, "step": 72660 }, { "epoch": 18.122194513715712, "grad_norm": 9.601921081542969, "learning_rate": 1.885286783042394e-06, "loss": 0.365, "step": 72670 }, { "epoch": 18.124688279301747, "grad_norm": 7.265673637390137, "learning_rate": 1.882793017456359e-06, "loss": 0.2651, "step": 72680 }, { "epoch": 18.127182044887782, "grad_norm": 9.785295486450195, "learning_rate": 1.8802992518703244e-06, "loss": 0.3108, "step": 72690 }, { "epoch": 18.129675810473817, "grad_norm": 10.865586280822754, "learning_rate": 1.8778054862842895e-06, "loss": 0.2968, "step": 72700 }, { "epoch": 18.13216957605985, "grad_norm": 5.767469882965088, "learning_rate": 1.8753117206982546e-06, "loss": 0.2573, "step": 72710 }, { "epoch": 18.134663341645886, "grad_norm": 9.119926452636719, "learning_rate": 1.8728179551122197e-06, "loss": 0.336, "step": 72720 }, { "epoch": 18.13715710723192, "grad_norm": 11.801755905151367, "learning_rate": 1.8703241895261848e-06, "loss": 0.3547, "step": 72730 }, { "epoch": 18.139650872817956, "grad_norm": 8.99133014678955, "learning_rate": 1.8678304239401499e-06, "loss": 0.3888, "step": 72740 }, { "epoch": 18.14214463840399, "grad_norm": 9.552180290222168, "learning_rate": 1.865336658354115e-06, "loss": 0.3176, "step": 72750 }, { "epoch": 18.144638403990026, "grad_norm": 9.201723098754883, "learning_rate": 1.86284289276808e-06, "loss": 0.2666, "step": 72760 }, { "epoch": 18.14713216957606, "grad_norm": 9.10981273651123, "learning_rate": 1.8603491271820452e-06, "loss": 0.2701, "step": 72770 }, { "epoch": 18.149625935162096, "grad_norm": 5.472398281097412, "learning_rate": 1.85785536159601e-06, "loss": 0.3164, "step": 72780 }, { "epoch": 18.15211970074813, "grad_norm": 10.77421760559082, "learning_rate": 1.8553615960099752e-06, "loss": 0.3434, "step": 72790 }, { "epoch": 18.154613466334165, "grad_norm": 7.737188339233398, "learning_rate": 1.8528678304239403e-06, "loss": 0.2856, "step": 72800 }, { "epoch": 18.1571072319202, "grad_norm": 10.025762557983398, "learning_rate": 1.8503740648379054e-06, "loss": 0.3094, "step": 72810 }, { "epoch": 18.159600997506235, "grad_norm": 12.608803749084473, "learning_rate": 1.8478802992518705e-06, "loss": 0.3368, "step": 72820 }, { "epoch": 18.16209476309227, "grad_norm": 17.044055938720703, "learning_rate": 1.8453865336658356e-06, "loss": 0.3715, "step": 72830 }, { "epoch": 18.164588528678305, "grad_norm": 8.355241775512695, "learning_rate": 1.8428927680798007e-06, "loss": 0.329, "step": 72840 }, { "epoch": 18.16708229426434, "grad_norm": 8.667624473571777, "learning_rate": 1.8403990024937658e-06, "loss": 0.3585, "step": 72850 }, { "epoch": 18.169576059850375, "grad_norm": 9.941221237182617, "learning_rate": 1.8379052369077309e-06, "loss": 0.376, "step": 72860 }, { "epoch": 18.17206982543641, "grad_norm": 8.516250610351562, "learning_rate": 1.8354114713216957e-06, "loss": 0.2555, "step": 72870 }, { "epoch": 18.174563591022444, "grad_norm": 9.351677894592285, "learning_rate": 1.8329177057356608e-06, "loss": 0.312, "step": 72880 }, { "epoch": 18.17705735660848, "grad_norm": 10.373080253601074, "learning_rate": 1.830423940149626e-06, "loss": 0.3313, "step": 72890 }, { "epoch": 18.179551122194514, "grad_norm": 6.510061264038086, "learning_rate": 1.827930174563591e-06, "loss": 0.2775, "step": 72900 }, { "epoch": 18.18204488778055, "grad_norm": 7.710317611694336, "learning_rate": 1.8254364089775561e-06, "loss": 0.3265, "step": 72910 }, { "epoch": 18.184538653366584, "grad_norm": 9.246587753295898, "learning_rate": 1.8229426433915212e-06, "loss": 0.3061, "step": 72920 }, { "epoch": 18.18703241895262, "grad_norm": 8.690896987915039, "learning_rate": 1.8204488778054865e-06, "loss": 0.3098, "step": 72930 }, { "epoch": 18.189526184538654, "grad_norm": 4.682961463928223, "learning_rate": 1.8179551122194516e-06, "loss": 0.2886, "step": 72940 }, { "epoch": 18.19201995012469, "grad_norm": 5.759263038635254, "learning_rate": 1.8154613466334167e-06, "loss": 0.2854, "step": 72950 }, { "epoch": 18.194513715710723, "grad_norm": 7.35156774520874, "learning_rate": 1.8129675810473818e-06, "loss": 0.3615, "step": 72960 }, { "epoch": 18.197007481296758, "grad_norm": 7.8746819496154785, "learning_rate": 1.810473815461347e-06, "loss": 0.3827, "step": 72970 }, { "epoch": 18.199501246882793, "grad_norm": 10.189519882202148, "learning_rate": 1.807980049875312e-06, "loss": 0.3083, "step": 72980 }, { "epoch": 18.201995012468828, "grad_norm": 8.979336738586426, "learning_rate": 1.8054862842892771e-06, "loss": 0.366, "step": 72990 }, { "epoch": 18.204488778054863, "grad_norm": 6.781293869018555, "learning_rate": 1.802992518703242e-06, "loss": 0.348, "step": 73000 }, { "epoch": 18.206982543640898, "grad_norm": 9.620187759399414, "learning_rate": 1.8004987531172071e-06, "loss": 0.3721, "step": 73010 }, { "epoch": 18.209476309226932, "grad_norm": 8.865036964416504, "learning_rate": 1.7980049875311722e-06, "loss": 0.3411, "step": 73020 }, { "epoch": 18.211970074812967, "grad_norm": 9.20656681060791, "learning_rate": 1.7955112219451373e-06, "loss": 0.3595, "step": 73030 }, { "epoch": 18.214463840399002, "grad_norm": 11.974329948425293, "learning_rate": 1.7930174563591024e-06, "loss": 0.3608, "step": 73040 }, { "epoch": 18.216957605985037, "grad_norm": 13.708403587341309, "learning_rate": 1.7905236907730675e-06, "loss": 0.3302, "step": 73050 }, { "epoch": 18.219451371571072, "grad_norm": 8.671035766601562, "learning_rate": 1.7880299251870326e-06, "loss": 0.2768, "step": 73060 }, { "epoch": 18.221945137157107, "grad_norm": 9.130983352661133, "learning_rate": 1.7855361596009977e-06, "loss": 0.3167, "step": 73070 }, { "epoch": 18.22443890274314, "grad_norm": 9.449166297912598, "learning_rate": 1.7830423940149628e-06, "loss": 0.3409, "step": 73080 }, { "epoch": 18.226932668329177, "grad_norm": 8.308408737182617, "learning_rate": 1.7805486284289277e-06, "loss": 0.2954, "step": 73090 }, { "epoch": 18.22942643391521, "grad_norm": 11.468031883239746, "learning_rate": 1.7780548628428928e-06, "loss": 0.3183, "step": 73100 }, { "epoch": 18.231920199501246, "grad_norm": 10.764180183410645, "learning_rate": 1.7755610972568579e-06, "loss": 0.3812, "step": 73110 }, { "epoch": 18.23441396508728, "grad_norm": 8.669768333435059, "learning_rate": 1.773067331670823e-06, "loss": 0.3437, "step": 73120 }, { "epoch": 18.236907730673316, "grad_norm": 8.164244651794434, "learning_rate": 1.770573566084788e-06, "loss": 0.2734, "step": 73130 }, { "epoch": 18.23940149625935, "grad_norm": 6.5684943199157715, "learning_rate": 1.7680798004987532e-06, "loss": 0.3716, "step": 73140 }, { "epoch": 18.241895261845386, "grad_norm": 8.023477554321289, "learning_rate": 1.7655860349127183e-06, "loss": 0.3461, "step": 73150 }, { "epoch": 18.24438902743142, "grad_norm": 6.442568302154541, "learning_rate": 1.7630922693266834e-06, "loss": 0.3287, "step": 73160 }, { "epoch": 18.246882793017456, "grad_norm": 10.371484756469727, "learning_rate": 1.7605985037406485e-06, "loss": 0.2998, "step": 73170 }, { "epoch": 18.24937655860349, "grad_norm": 10.30907917022705, "learning_rate": 1.7581047381546138e-06, "loss": 0.3161, "step": 73180 }, { "epoch": 18.251870324189525, "grad_norm": 11.629962921142578, "learning_rate": 1.7556109725685789e-06, "loss": 0.2857, "step": 73190 }, { "epoch": 18.25436408977556, "grad_norm": 10.187129020690918, "learning_rate": 1.753117206982544e-06, "loss": 0.3485, "step": 73200 }, { "epoch": 18.256857855361595, "grad_norm": 13.606742858886719, "learning_rate": 1.750623441396509e-06, "loss": 0.3258, "step": 73210 }, { "epoch": 18.25935162094763, "grad_norm": 7.9029860496521, "learning_rate": 1.748129675810474e-06, "loss": 0.264, "step": 73220 }, { "epoch": 18.261845386533665, "grad_norm": 6.414330959320068, "learning_rate": 1.745635910224439e-06, "loss": 0.3278, "step": 73230 }, { "epoch": 18.2643391521197, "grad_norm": 4.603034019470215, "learning_rate": 1.7431421446384042e-06, "loss": 0.304, "step": 73240 }, { "epoch": 18.266832917705734, "grad_norm": 9.417990684509277, "learning_rate": 1.7406483790523692e-06, "loss": 0.3797, "step": 73250 }, { "epoch": 18.26932668329177, "grad_norm": 10.289613723754883, "learning_rate": 1.7381546134663343e-06, "loss": 0.4033, "step": 73260 }, { "epoch": 18.271820448877804, "grad_norm": 18.14396858215332, "learning_rate": 1.7356608478802994e-06, "loss": 0.2768, "step": 73270 }, { "epoch": 18.27431421446384, "grad_norm": 11.941176414489746, "learning_rate": 1.7331670822942645e-06, "loss": 0.431, "step": 73280 }, { "epoch": 18.276807980049874, "grad_norm": 9.569537162780762, "learning_rate": 1.7306733167082296e-06, "loss": 0.2749, "step": 73290 }, { "epoch": 18.27930174563591, "grad_norm": 7.719649791717529, "learning_rate": 1.7281795511221947e-06, "loss": 0.3432, "step": 73300 }, { "epoch": 18.281795511221944, "grad_norm": 6.728184223175049, "learning_rate": 1.7256857855361596e-06, "loss": 0.2864, "step": 73310 }, { "epoch": 18.28428927680798, "grad_norm": 7.6080708503723145, "learning_rate": 1.7231920199501247e-06, "loss": 0.3257, "step": 73320 }, { "epoch": 18.286783042394013, "grad_norm": 8.406044006347656, "learning_rate": 1.7206982543640898e-06, "loss": 0.3528, "step": 73330 }, { "epoch": 18.28927680798005, "grad_norm": 11.21180534362793, "learning_rate": 1.718204488778055e-06, "loss": 0.3166, "step": 73340 }, { "epoch": 18.291770573566083, "grad_norm": 12.678295135498047, "learning_rate": 1.71571072319202e-06, "loss": 0.3484, "step": 73350 }, { "epoch": 18.294264339152118, "grad_norm": 13.616421699523926, "learning_rate": 1.7132169576059851e-06, "loss": 0.3677, "step": 73360 }, { "epoch": 18.296758104738153, "grad_norm": 8.696226119995117, "learning_rate": 1.7107231920199502e-06, "loss": 0.3082, "step": 73370 }, { "epoch": 18.29925187032419, "grad_norm": 10.46649169921875, "learning_rate": 1.7082294264339153e-06, "loss": 0.3416, "step": 73380 }, { "epoch": 18.301745635910226, "grad_norm": 5.726923942565918, "learning_rate": 1.7057356608478804e-06, "loss": 0.3065, "step": 73390 }, { "epoch": 18.30423940149626, "grad_norm": 12.030797004699707, "learning_rate": 1.7032418952618455e-06, "loss": 0.328, "step": 73400 }, { "epoch": 18.306733167082296, "grad_norm": 8.201338768005371, "learning_rate": 1.7007481296758104e-06, "loss": 0.3321, "step": 73410 }, { "epoch": 18.30922693266833, "grad_norm": 9.528154373168945, "learning_rate": 1.6982543640897755e-06, "loss": 0.2487, "step": 73420 }, { "epoch": 18.311720698254366, "grad_norm": 8.872268676757812, "learning_rate": 1.695760598503741e-06, "loss": 0.3468, "step": 73430 }, { "epoch": 18.3142144638404, "grad_norm": 6.192471027374268, "learning_rate": 1.693266832917706e-06, "loss": 0.2942, "step": 73440 }, { "epoch": 18.316708229426435, "grad_norm": 8.524904251098633, "learning_rate": 1.690773067331671e-06, "loss": 0.3305, "step": 73450 }, { "epoch": 18.31920199501247, "grad_norm": 6.720682621002197, "learning_rate": 1.688279301745636e-06, "loss": 0.3114, "step": 73460 }, { "epoch": 18.321695760598505, "grad_norm": 8.790159225463867, "learning_rate": 1.6857855361596012e-06, "loss": 0.3181, "step": 73470 }, { "epoch": 18.32418952618454, "grad_norm": 6.875416278839111, "learning_rate": 1.6832917705735663e-06, "loss": 0.3115, "step": 73480 }, { "epoch": 18.326683291770575, "grad_norm": 8.09900188446045, "learning_rate": 1.6807980049875314e-06, "loss": 0.3607, "step": 73490 }, { "epoch": 18.32917705735661, "grad_norm": 8.227255821228027, "learning_rate": 1.6783042394014965e-06, "loss": 0.3341, "step": 73500 }, { "epoch": 18.331670822942645, "grad_norm": 11.627192497253418, "learning_rate": 1.6758104738154616e-06, "loss": 0.3915, "step": 73510 }, { "epoch": 18.33416458852868, "grad_norm": 11.05119800567627, "learning_rate": 1.6733167082294267e-06, "loss": 0.3357, "step": 73520 }, { "epoch": 18.336658354114714, "grad_norm": 9.547077178955078, "learning_rate": 1.6708229426433918e-06, "loss": 0.3351, "step": 73530 }, { "epoch": 18.33915211970075, "grad_norm": 10.876405715942383, "learning_rate": 1.6683291770573567e-06, "loss": 0.3085, "step": 73540 }, { "epoch": 18.341645885286784, "grad_norm": 5.926941394805908, "learning_rate": 1.6658354114713218e-06, "loss": 0.2994, "step": 73550 }, { "epoch": 18.34413965087282, "grad_norm": 9.938848495483398, "learning_rate": 1.6633416458852869e-06, "loss": 0.3295, "step": 73560 }, { "epoch": 18.346633416458854, "grad_norm": 9.622965812683105, "learning_rate": 1.660847880299252e-06, "loss": 0.2535, "step": 73570 }, { "epoch": 18.34912718204489, "grad_norm": 7.86170768737793, "learning_rate": 1.658354114713217e-06, "loss": 0.3655, "step": 73580 }, { "epoch": 18.351620947630924, "grad_norm": 11.039167404174805, "learning_rate": 1.6558603491271822e-06, "loss": 0.3227, "step": 73590 }, { "epoch": 18.35411471321696, "grad_norm": 9.131935119628906, "learning_rate": 1.6533665835411473e-06, "loss": 0.2782, "step": 73600 }, { "epoch": 18.356608478802993, "grad_norm": 7.852529048919678, "learning_rate": 1.6508728179551124e-06, "loss": 0.2881, "step": 73610 }, { "epoch": 18.359102244389028, "grad_norm": 6.67081356048584, "learning_rate": 1.6483790523690775e-06, "loss": 0.3281, "step": 73620 }, { "epoch": 18.361596009975063, "grad_norm": 9.186594009399414, "learning_rate": 1.6458852867830423e-06, "loss": 0.3104, "step": 73630 }, { "epoch": 18.364089775561098, "grad_norm": 10.543354988098145, "learning_rate": 1.6433915211970074e-06, "loss": 0.2973, "step": 73640 }, { "epoch": 18.366583541147133, "grad_norm": 8.135270118713379, "learning_rate": 1.6408977556109725e-06, "loss": 0.3834, "step": 73650 }, { "epoch": 18.369077306733168, "grad_norm": 8.696205139160156, "learning_rate": 1.6384039900249376e-06, "loss": 0.317, "step": 73660 }, { "epoch": 18.371571072319203, "grad_norm": 8.284186363220215, "learning_rate": 1.6359102244389027e-06, "loss": 0.3482, "step": 73670 }, { "epoch": 18.374064837905237, "grad_norm": 8.957206726074219, "learning_rate": 1.633416458852868e-06, "loss": 0.3858, "step": 73680 }, { "epoch": 18.376558603491272, "grad_norm": 11.914627075195312, "learning_rate": 1.6309226932668331e-06, "loss": 0.335, "step": 73690 }, { "epoch": 18.379052369077307, "grad_norm": 5.708254814147949, "learning_rate": 1.6284289276807982e-06, "loss": 0.365, "step": 73700 }, { "epoch": 18.381546134663342, "grad_norm": 9.29157829284668, "learning_rate": 1.6259351620947633e-06, "loss": 0.29, "step": 73710 }, { "epoch": 18.384039900249377, "grad_norm": 6.766468048095703, "learning_rate": 1.6234413965087284e-06, "loss": 0.2897, "step": 73720 }, { "epoch": 18.38653366583541, "grad_norm": 7.3969621658325195, "learning_rate": 1.6209476309226935e-06, "loss": 0.3482, "step": 73730 }, { "epoch": 18.389027431421447, "grad_norm": 6.613345623016357, "learning_rate": 1.6184538653366586e-06, "loss": 0.3101, "step": 73740 }, { "epoch": 18.39152119700748, "grad_norm": 7.1980881690979, "learning_rate": 1.6159600997506237e-06, "loss": 0.2709, "step": 73750 }, { "epoch": 18.394014962593516, "grad_norm": 12.448799133300781, "learning_rate": 1.6134663341645886e-06, "loss": 0.2926, "step": 73760 }, { "epoch": 18.39650872817955, "grad_norm": 9.95907974243164, "learning_rate": 1.6109725685785537e-06, "loss": 0.303, "step": 73770 }, { "epoch": 18.399002493765586, "grad_norm": 8.818389892578125, "learning_rate": 1.6084788029925188e-06, "loss": 0.2965, "step": 73780 }, { "epoch": 18.40149625935162, "grad_norm": 6.657719612121582, "learning_rate": 1.605985037406484e-06, "loss": 0.3098, "step": 73790 }, { "epoch": 18.403990024937656, "grad_norm": 8.187033653259277, "learning_rate": 1.603491271820449e-06, "loss": 0.2981, "step": 73800 }, { "epoch": 18.40648379052369, "grad_norm": 8.11359977722168, "learning_rate": 1.600997506234414e-06, "loss": 0.3991, "step": 73810 }, { "epoch": 18.408977556109726, "grad_norm": 5.73296594619751, "learning_rate": 1.5985037406483792e-06, "loss": 0.3214, "step": 73820 }, { "epoch": 18.41147132169576, "grad_norm": 7.935540199279785, "learning_rate": 1.5960099750623443e-06, "loss": 0.3434, "step": 73830 }, { "epoch": 18.413965087281795, "grad_norm": 6.838698863983154, "learning_rate": 1.5935162094763094e-06, "loss": 0.2924, "step": 73840 }, { "epoch": 18.41645885286783, "grad_norm": 7.937646389007568, "learning_rate": 1.5910224438902743e-06, "loss": 0.2674, "step": 73850 }, { "epoch": 18.418952618453865, "grad_norm": 8.589198112487793, "learning_rate": 1.5885286783042394e-06, "loss": 0.3232, "step": 73860 }, { "epoch": 18.4214463840399, "grad_norm": 6.257779598236084, "learning_rate": 1.5860349127182045e-06, "loss": 0.2764, "step": 73870 }, { "epoch": 18.423940149625935, "grad_norm": 9.784756660461426, "learning_rate": 1.5835411471321696e-06, "loss": 0.3288, "step": 73880 }, { "epoch": 18.42643391521197, "grad_norm": 7.663664817810059, "learning_rate": 1.5810473815461347e-06, "loss": 0.2703, "step": 73890 }, { "epoch": 18.428927680798004, "grad_norm": 7.855859756469727, "learning_rate": 1.5785536159600998e-06, "loss": 0.2777, "step": 73900 }, { "epoch": 18.43142144638404, "grad_norm": 8.598638534545898, "learning_rate": 1.5760598503740649e-06, "loss": 0.3726, "step": 73910 }, { "epoch": 18.433915211970074, "grad_norm": 12.342257499694824, "learning_rate": 1.5735660847880302e-06, "loss": 0.3803, "step": 73920 }, { "epoch": 18.43640897755611, "grad_norm": 10.327555656433105, "learning_rate": 1.5710723192019953e-06, "loss": 0.324, "step": 73930 }, { "epoch": 18.438902743142144, "grad_norm": 10.554641723632812, "learning_rate": 1.5685785536159604e-06, "loss": 0.2346, "step": 73940 }, { "epoch": 18.44139650872818, "grad_norm": 10.217893600463867, "learning_rate": 1.5660847880299255e-06, "loss": 0.3869, "step": 73950 }, { "epoch": 18.443890274314214, "grad_norm": 9.727376937866211, "learning_rate": 1.5635910224438906e-06, "loss": 0.3377, "step": 73960 }, { "epoch": 18.44638403990025, "grad_norm": 8.947395324707031, "learning_rate": 1.5610972568578557e-06, "loss": 0.3863, "step": 73970 }, { "epoch": 18.448877805486283, "grad_norm": 8.704399108886719, "learning_rate": 1.5586034912718206e-06, "loss": 0.3594, "step": 73980 }, { "epoch": 18.45137157107232, "grad_norm": 7.907713413238525, "learning_rate": 1.5561097256857857e-06, "loss": 0.3043, "step": 73990 }, { "epoch": 18.453865336658353, "grad_norm": 11.05506420135498, "learning_rate": 1.5536159600997507e-06, "loss": 0.307, "step": 74000 }, { "epoch": 18.456359102244388, "grad_norm": 10.79157829284668, "learning_rate": 1.5511221945137158e-06, "loss": 0.3657, "step": 74010 }, { "epoch": 18.458852867830423, "grad_norm": 9.126988410949707, "learning_rate": 1.548628428927681e-06, "loss": 0.2559, "step": 74020 }, { "epoch": 18.461346633416458, "grad_norm": 15.90583324432373, "learning_rate": 1.546134663341646e-06, "loss": 0.387, "step": 74030 }, { "epoch": 18.463840399002493, "grad_norm": 7.69087028503418, "learning_rate": 1.5436408977556111e-06, "loss": 0.3183, "step": 74040 }, { "epoch": 18.466334164588527, "grad_norm": 10.746506690979004, "learning_rate": 1.5411471321695762e-06, "loss": 0.3444, "step": 74050 }, { "epoch": 18.468827930174562, "grad_norm": 10.501643180847168, "learning_rate": 1.5386533665835413e-06, "loss": 0.3714, "step": 74060 }, { "epoch": 18.471321695760597, "grad_norm": 5.846063137054443, "learning_rate": 1.5361596009975062e-06, "loss": 0.3788, "step": 74070 }, { "epoch": 18.473815461346632, "grad_norm": 5.375149726867676, "learning_rate": 1.5336658354114713e-06, "loss": 0.2884, "step": 74080 }, { "epoch": 18.476309226932667, "grad_norm": 8.232563972473145, "learning_rate": 1.5311720698254364e-06, "loss": 0.3549, "step": 74090 }, { "epoch": 18.478802992518702, "grad_norm": 7.693140983581543, "learning_rate": 1.5286783042394015e-06, "loss": 0.3765, "step": 74100 }, { "epoch": 18.481296758104737, "grad_norm": 8.750802040100098, "learning_rate": 1.5261845386533666e-06, "loss": 0.2949, "step": 74110 }, { "epoch": 18.48379052369077, "grad_norm": 7.751341819763184, "learning_rate": 1.5236907730673317e-06, "loss": 0.3237, "step": 74120 }, { "epoch": 18.486284289276806, "grad_norm": 8.8473482131958, "learning_rate": 1.5211970074812968e-06, "loss": 0.2869, "step": 74130 }, { "epoch": 18.48877805486284, "grad_norm": 8.20888900756836, "learning_rate": 1.518703241895262e-06, "loss": 0.3399, "step": 74140 }, { "epoch": 18.491271820448876, "grad_norm": 8.531835556030273, "learning_rate": 1.516209476309227e-06, "loss": 0.2784, "step": 74150 }, { "epoch": 18.49376558603491, "grad_norm": 13.030848503112793, "learning_rate": 1.513715710723192e-06, "loss": 0.3464, "step": 74160 }, { "epoch": 18.496259351620946, "grad_norm": 9.676426887512207, "learning_rate": 1.5112219451371574e-06, "loss": 0.3156, "step": 74170 }, { "epoch": 18.49875311720698, "grad_norm": 5.307429313659668, "learning_rate": 1.5087281795511225e-06, "loss": 0.3121, "step": 74180 }, { "epoch": 18.50124688279302, "grad_norm": 8.61061954498291, "learning_rate": 1.5062344139650876e-06, "loss": 0.4055, "step": 74190 }, { "epoch": 18.503740648379054, "grad_norm": 7.381380558013916, "learning_rate": 1.5037406483790525e-06, "loss": 0.3907, "step": 74200 }, { "epoch": 18.50623441396509, "grad_norm": 10.964561462402344, "learning_rate": 1.5012468827930176e-06, "loss": 0.3313, "step": 74210 }, { "epoch": 18.508728179551124, "grad_norm": 8.3394136428833, "learning_rate": 1.4987531172069827e-06, "loss": 0.2691, "step": 74220 }, { "epoch": 18.51122194513716, "grad_norm": 7.458098411560059, "learning_rate": 1.4962593516209478e-06, "loss": 0.307, "step": 74230 }, { "epoch": 18.513715710723194, "grad_norm": 12.269042015075684, "learning_rate": 1.4937655860349129e-06, "loss": 0.299, "step": 74240 }, { "epoch": 18.51620947630923, "grad_norm": 5.956721305847168, "learning_rate": 1.491271820448878e-06, "loss": 0.3303, "step": 74250 }, { "epoch": 18.518703241895263, "grad_norm": 8.427618980407715, "learning_rate": 1.488778054862843e-06, "loss": 0.3256, "step": 74260 }, { "epoch": 18.521197007481298, "grad_norm": 7.890618801116943, "learning_rate": 1.4862842892768082e-06, "loss": 0.3507, "step": 74270 }, { "epoch": 18.523690773067333, "grad_norm": 10.247663497924805, "learning_rate": 1.4837905236907733e-06, "loss": 0.345, "step": 74280 }, { "epoch": 18.526184538653368, "grad_norm": 11.79736614227295, "learning_rate": 1.4812967581047384e-06, "loss": 0.3271, "step": 74290 }, { "epoch": 18.528678304239403, "grad_norm": 7.097879409790039, "learning_rate": 1.4788029925187033e-06, "loss": 0.2992, "step": 74300 }, { "epoch": 18.531172069825438, "grad_norm": 9.422330856323242, "learning_rate": 1.4763092269326684e-06, "loss": 0.3164, "step": 74310 }, { "epoch": 18.533665835411473, "grad_norm": 8.391748428344727, "learning_rate": 1.4738154613466335e-06, "loss": 0.3513, "step": 74320 }, { "epoch": 18.536159600997507, "grad_norm": 12.254179954528809, "learning_rate": 1.4713216957605986e-06, "loss": 0.3117, "step": 74330 }, { "epoch": 18.538653366583542, "grad_norm": 7.639408588409424, "learning_rate": 1.4688279301745637e-06, "loss": 0.2665, "step": 74340 }, { "epoch": 18.541147132169577, "grad_norm": 13.742928504943848, "learning_rate": 1.4663341645885288e-06, "loss": 0.2916, "step": 74350 }, { "epoch": 18.543640897755612, "grad_norm": 8.131094932556152, "learning_rate": 1.4638403990024939e-06, "loss": 0.3436, "step": 74360 }, { "epoch": 18.546134663341647, "grad_norm": 8.458292961120605, "learning_rate": 1.461346633416459e-06, "loss": 0.3361, "step": 74370 }, { "epoch": 18.54862842892768, "grad_norm": 7.697943210601807, "learning_rate": 1.458852867830424e-06, "loss": 0.2833, "step": 74380 }, { "epoch": 18.551122194513717, "grad_norm": 11.632762908935547, "learning_rate": 1.456359102244389e-06, "loss": 0.2625, "step": 74390 }, { "epoch": 18.55361596009975, "grad_norm": 8.714035034179688, "learning_rate": 1.453865336658354e-06, "loss": 0.3138, "step": 74400 }, { "epoch": 18.556109725685786, "grad_norm": 10.529500961303711, "learning_rate": 1.4513715710723191e-06, "loss": 0.4054, "step": 74410 }, { "epoch": 18.55860349127182, "grad_norm": 7.038276672363281, "learning_rate": 1.4488778054862844e-06, "loss": 0.318, "step": 74420 }, { "epoch": 18.561097256857856, "grad_norm": 8.944663047790527, "learning_rate": 1.4463840399002495e-06, "loss": 0.3261, "step": 74430 }, { "epoch": 18.56359102244389, "grad_norm": 6.774261474609375, "learning_rate": 1.4438902743142146e-06, "loss": 0.3315, "step": 74440 }, { "epoch": 18.566084788029926, "grad_norm": 8.720541954040527, "learning_rate": 1.4413965087281797e-06, "loss": 0.3715, "step": 74450 }, { "epoch": 18.56857855361596, "grad_norm": 9.304561614990234, "learning_rate": 1.4389027431421448e-06, "loss": 0.2913, "step": 74460 }, { "epoch": 18.571072319201996, "grad_norm": 6.90985631942749, "learning_rate": 1.43640897755611e-06, "loss": 0.3142, "step": 74470 }, { "epoch": 18.57356608478803, "grad_norm": 6.733709335327148, "learning_rate": 1.433915211970075e-06, "loss": 0.3388, "step": 74480 }, { "epoch": 18.576059850374065, "grad_norm": 10.039854049682617, "learning_rate": 1.4314214463840401e-06, "loss": 0.3232, "step": 74490 }, { "epoch": 18.5785536159601, "grad_norm": 9.462594032287598, "learning_rate": 1.4289276807980052e-06, "loss": 0.3591, "step": 74500 }, { "epoch": 18.581047381546135, "grad_norm": 9.804282188415527, "learning_rate": 1.4264339152119703e-06, "loss": 0.3105, "step": 74510 }, { "epoch": 18.58354114713217, "grad_norm": 11.309243202209473, "learning_rate": 1.4239401496259352e-06, "loss": 0.3292, "step": 74520 }, { "epoch": 18.586034912718205, "grad_norm": 13.264601707458496, "learning_rate": 1.4214463840399003e-06, "loss": 0.3184, "step": 74530 }, { "epoch": 18.58852867830424, "grad_norm": 8.009174346923828, "learning_rate": 1.4189526184538654e-06, "loss": 0.3198, "step": 74540 }, { "epoch": 18.591022443890274, "grad_norm": 9.68816089630127, "learning_rate": 1.4164588528678305e-06, "loss": 0.2911, "step": 74550 }, { "epoch": 18.59351620947631, "grad_norm": 15.687336921691895, "learning_rate": 1.4139650872817956e-06, "loss": 0.2892, "step": 74560 }, { "epoch": 18.596009975062344, "grad_norm": 11.459478378295898, "learning_rate": 1.4114713216957607e-06, "loss": 0.361, "step": 74570 }, { "epoch": 18.59850374064838, "grad_norm": 12.541219711303711, "learning_rate": 1.4089775561097258e-06, "loss": 0.3387, "step": 74580 }, { "epoch": 18.600997506234414, "grad_norm": 8.407682418823242, "learning_rate": 1.4064837905236909e-06, "loss": 0.366, "step": 74590 }, { "epoch": 18.60349127182045, "grad_norm": 9.860024452209473, "learning_rate": 1.403990024937656e-06, "loss": 0.3266, "step": 74600 }, { "epoch": 18.605985037406484, "grad_norm": 13.05792236328125, "learning_rate": 1.4014962593516209e-06, "loss": 0.3454, "step": 74610 }, { "epoch": 18.60847880299252, "grad_norm": 9.172198295593262, "learning_rate": 1.399002493765586e-06, "loss": 0.3367, "step": 74620 }, { "epoch": 18.610972568578553, "grad_norm": 9.569443702697754, "learning_rate": 1.396508728179551e-06, "loss": 0.2912, "step": 74630 }, { "epoch": 18.61346633416459, "grad_norm": 6.067528247833252, "learning_rate": 1.3940149625935162e-06, "loss": 0.2704, "step": 74640 }, { "epoch": 18.615960099750623, "grad_norm": 12.0790433883667, "learning_rate": 1.3915211970074813e-06, "loss": 0.2749, "step": 74650 }, { "epoch": 18.618453865336658, "grad_norm": 8.685276985168457, "learning_rate": 1.3890274314214464e-06, "loss": 0.2838, "step": 74660 }, { "epoch": 18.620947630922693, "grad_norm": 8.051512718200684, "learning_rate": 1.3865336658354117e-06, "loss": 0.2934, "step": 74670 }, { "epoch": 18.623441396508728, "grad_norm": 6.928664684295654, "learning_rate": 1.3840399002493768e-06, "loss": 0.2874, "step": 74680 }, { "epoch": 18.625935162094763, "grad_norm": 5.946801662445068, "learning_rate": 1.3815461346633419e-06, "loss": 0.2963, "step": 74690 }, { "epoch": 18.628428927680797, "grad_norm": 9.436132431030273, "learning_rate": 1.379052369077307e-06, "loss": 0.3327, "step": 74700 }, { "epoch": 18.630922693266832, "grad_norm": 6.325564384460449, "learning_rate": 1.376558603491272e-06, "loss": 0.313, "step": 74710 }, { "epoch": 18.633416458852867, "grad_norm": 5.90425443649292, "learning_rate": 1.3740648379052372e-06, "loss": 0.2912, "step": 74720 }, { "epoch": 18.635910224438902, "grad_norm": 6.267174243927002, "learning_rate": 1.3715710723192023e-06, "loss": 0.3487, "step": 74730 }, { "epoch": 18.638403990024937, "grad_norm": 6.616288185119629, "learning_rate": 1.3690773067331672e-06, "loss": 0.3146, "step": 74740 }, { "epoch": 18.640897755610972, "grad_norm": 8.680601119995117, "learning_rate": 1.3665835411471322e-06, "loss": 0.3009, "step": 74750 }, { "epoch": 18.643391521197007, "grad_norm": 12.029298782348633, "learning_rate": 1.3640897755610973e-06, "loss": 0.328, "step": 74760 }, { "epoch": 18.64588528678304, "grad_norm": 6.389432907104492, "learning_rate": 1.3615960099750624e-06, "loss": 0.3464, "step": 74770 }, { "epoch": 18.648379052369076, "grad_norm": 6.558785915374756, "learning_rate": 1.3591022443890275e-06, "loss": 0.2935, "step": 74780 }, { "epoch": 18.65087281795511, "grad_norm": 8.406317710876465, "learning_rate": 1.3566084788029926e-06, "loss": 0.3135, "step": 74790 }, { "epoch": 18.653366583541146, "grad_norm": 11.625472068786621, "learning_rate": 1.3541147132169577e-06, "loss": 0.3286, "step": 74800 }, { "epoch": 18.65586034912718, "grad_norm": 7.691009044647217, "learning_rate": 1.3516209476309228e-06, "loss": 0.2337, "step": 74810 }, { "epoch": 18.658354114713216, "grad_norm": 8.831992149353027, "learning_rate": 1.349127182044888e-06, "loss": 0.393, "step": 74820 }, { "epoch": 18.66084788029925, "grad_norm": 10.607884407043457, "learning_rate": 1.3466334164588528e-06, "loss": 0.2846, "step": 74830 }, { "epoch": 18.663341645885286, "grad_norm": 12.452134132385254, "learning_rate": 1.344139650872818e-06, "loss": 0.391, "step": 74840 }, { "epoch": 18.66583541147132, "grad_norm": 7.653696537017822, "learning_rate": 1.341645885286783e-06, "loss": 0.2668, "step": 74850 }, { "epoch": 18.668329177057355, "grad_norm": 4.616998195648193, "learning_rate": 1.3391521197007481e-06, "loss": 0.2979, "step": 74860 }, { "epoch": 18.67082294264339, "grad_norm": 8.645465850830078, "learning_rate": 1.3366583541147132e-06, "loss": 0.3351, "step": 74870 }, { "epoch": 18.673316708229425, "grad_norm": 11.444828987121582, "learning_rate": 1.3341645885286783e-06, "loss": 0.2851, "step": 74880 }, { "epoch": 18.67581047381546, "grad_norm": 8.822684288024902, "learning_rate": 1.3316708229426434e-06, "loss": 0.3381, "step": 74890 }, { "epoch": 18.678304239401495, "grad_norm": 6.625451564788818, "learning_rate": 1.3291770573566085e-06, "loss": 0.3038, "step": 74900 }, { "epoch": 18.68079800498753, "grad_norm": 9.063295364379883, "learning_rate": 1.3269326683291772e-06, "loss": 0.2841, "step": 74910 }, { "epoch": 18.683291770573565, "grad_norm": 10.658787727355957, "learning_rate": 1.3244389027431423e-06, "loss": 0.2965, "step": 74920 }, { "epoch": 18.6857855361596, "grad_norm": 10.322769165039062, "learning_rate": 1.3219451371571074e-06, "loss": 0.3753, "step": 74930 }, { "epoch": 18.688279301745634, "grad_norm": 5.894896030426025, "learning_rate": 1.3194513715710725e-06, "loss": 0.351, "step": 74940 }, { "epoch": 18.69077306733167, "grad_norm": 11.83723258972168, "learning_rate": 1.3169576059850376e-06, "loss": 0.3247, "step": 74950 }, { "epoch": 18.693266832917704, "grad_norm": 8.492645263671875, "learning_rate": 1.314713216957606e-06, "loss": 0.3656, "step": 74960 }, { "epoch": 18.69576059850374, "grad_norm": 11.175936698913574, "learning_rate": 1.3122194513715712e-06, "loss": 0.348, "step": 74970 }, { "epoch": 18.698254364089777, "grad_norm": 7.240017890930176, "learning_rate": 1.309725685785536e-06, "loss": 0.2931, "step": 74980 }, { "epoch": 18.70074812967581, "grad_norm": 8.305476188659668, "learning_rate": 1.3072319201995012e-06, "loss": 0.3615, "step": 74990 }, { "epoch": 18.703241895261847, "grad_norm": 12.025943756103516, "learning_rate": 1.3047381546134663e-06, "loss": 0.3484, "step": 75000 }, { "epoch": 18.705735660847882, "grad_norm": 9.331719398498535, "learning_rate": 1.3022443890274316e-06, "loss": 0.3532, "step": 75010 }, { "epoch": 18.708229426433917, "grad_norm": 11.926877975463867, "learning_rate": 1.2997506234413967e-06, "loss": 0.3444, "step": 75020 }, { "epoch": 18.71072319201995, "grad_norm": 7.448859214782715, "learning_rate": 1.2972568578553618e-06, "loss": 0.2879, "step": 75030 }, { "epoch": 18.713216957605987, "grad_norm": 11.024309158325195, "learning_rate": 1.2947630922693269e-06, "loss": 0.34, "step": 75040 }, { "epoch": 18.71571072319202, "grad_norm": 6.629947185516357, "learning_rate": 1.292269326683292e-06, "loss": 0.3714, "step": 75050 }, { "epoch": 18.718204488778056, "grad_norm": 7.899852275848389, "learning_rate": 1.289775561097257e-06, "loss": 0.3046, "step": 75060 }, { "epoch": 18.72069825436409, "grad_norm": 8.794214248657227, "learning_rate": 1.2872817955112222e-06, "loss": 0.3517, "step": 75070 }, { "epoch": 18.723192019950126, "grad_norm": 6.43766450881958, "learning_rate": 1.2847880299251873e-06, "loss": 0.3165, "step": 75080 }, { "epoch": 18.72568578553616, "grad_norm": 6.629555702209473, "learning_rate": 1.2822942643391523e-06, "loss": 0.3499, "step": 75090 }, { "epoch": 18.728179551122196, "grad_norm": 11.044578552246094, "learning_rate": 1.2798004987531174e-06, "loss": 0.3129, "step": 75100 }, { "epoch": 18.73067331670823, "grad_norm": 9.229756355285645, "learning_rate": 1.2773067331670823e-06, "loss": 0.3121, "step": 75110 }, { "epoch": 18.733167082294266, "grad_norm": 7.941111087799072, "learning_rate": 1.2748129675810474e-06, "loss": 0.4249, "step": 75120 }, { "epoch": 18.7356608478803, "grad_norm": 9.484469413757324, "learning_rate": 1.2723192019950125e-06, "loss": 0.2897, "step": 75130 }, { "epoch": 18.738154613466335, "grad_norm": 7.1095075607299805, "learning_rate": 1.2698254364089776e-06, "loss": 0.3664, "step": 75140 }, { "epoch": 18.74064837905237, "grad_norm": 6.883621692657471, "learning_rate": 1.2673316708229427e-06, "loss": 0.2501, "step": 75150 }, { "epoch": 18.743142144638405, "grad_norm": 8.864466667175293, "learning_rate": 1.2648379052369078e-06, "loss": 0.3146, "step": 75160 }, { "epoch": 18.74563591022444, "grad_norm": 7.686639308929443, "learning_rate": 1.262344139650873e-06, "loss": 0.2874, "step": 75170 }, { "epoch": 18.748129675810475, "grad_norm": 7.733112812042236, "learning_rate": 1.259850374064838e-06, "loss": 0.3358, "step": 75180 }, { "epoch": 18.75062344139651, "grad_norm": 8.906079292297363, "learning_rate": 1.2573566084788031e-06, "loss": 0.3112, "step": 75190 }, { "epoch": 18.753117206982544, "grad_norm": 11.127494812011719, "learning_rate": 1.254862842892768e-06, "loss": 0.3105, "step": 75200 }, { "epoch": 18.75561097256858, "grad_norm": 8.906811714172363, "learning_rate": 1.252369077306733e-06, "loss": 0.3047, "step": 75210 }, { "epoch": 18.758104738154614, "grad_norm": 6.8946027755737305, "learning_rate": 1.2498753117206984e-06, "loss": 0.3289, "step": 75220 }, { "epoch": 18.76059850374065, "grad_norm": 8.487861633300781, "learning_rate": 1.2473815461346635e-06, "loss": 0.4042, "step": 75230 }, { "epoch": 18.763092269326684, "grad_norm": 8.7031831741333, "learning_rate": 1.2448877805486286e-06, "loss": 0.3045, "step": 75240 }, { "epoch": 18.76558603491272, "grad_norm": 8.133673667907715, "learning_rate": 1.2423940149625937e-06, "loss": 0.324, "step": 75250 }, { "epoch": 18.768079800498754, "grad_norm": 8.087395668029785, "learning_rate": 1.2399002493765588e-06, "loss": 0.3004, "step": 75260 }, { "epoch": 18.77057356608479, "grad_norm": 8.729161262512207, "learning_rate": 1.2374064837905237e-06, "loss": 0.3408, "step": 75270 }, { "epoch": 18.773067331670823, "grad_norm": 9.859537124633789, "learning_rate": 1.2349127182044888e-06, "loss": 0.3635, "step": 75280 }, { "epoch": 18.77556109725686, "grad_norm": 8.983450889587402, "learning_rate": 1.2324189526184539e-06, "loss": 0.2727, "step": 75290 }, { "epoch": 18.778054862842893, "grad_norm": 7.337526798248291, "learning_rate": 1.229925187032419e-06, "loss": 0.2853, "step": 75300 }, { "epoch": 18.780548628428928, "grad_norm": 9.413741111755371, "learning_rate": 1.227431421446384e-06, "loss": 0.3259, "step": 75310 }, { "epoch": 18.783042394014963, "grad_norm": 8.917171478271484, "learning_rate": 1.2249376558603494e-06, "loss": 0.3087, "step": 75320 }, { "epoch": 18.785536159600998, "grad_norm": 9.118664741516113, "learning_rate": 1.2224438902743143e-06, "loss": 0.3468, "step": 75330 }, { "epoch": 18.788029925187033, "grad_norm": 11.285893440246582, "learning_rate": 1.2199501246882794e-06, "loss": 0.3248, "step": 75340 }, { "epoch": 18.790523690773068, "grad_norm": 8.557662963867188, "learning_rate": 1.2174563591022445e-06, "loss": 0.3836, "step": 75350 }, { "epoch": 18.793017456359102, "grad_norm": 9.565607070922852, "learning_rate": 1.2149625935162096e-06, "loss": 0.3616, "step": 75360 }, { "epoch": 18.795511221945137, "grad_norm": 7.839760780334473, "learning_rate": 1.2124688279301747e-06, "loss": 0.4556, "step": 75370 }, { "epoch": 18.798004987531172, "grad_norm": 8.867552757263184, "learning_rate": 1.2099750623441398e-06, "loss": 0.3541, "step": 75380 }, { "epoch": 18.800498753117207, "grad_norm": 8.26724910736084, "learning_rate": 1.2074812967581049e-06, "loss": 0.3567, "step": 75390 }, { "epoch": 18.802992518703242, "grad_norm": 11.374317169189453, "learning_rate": 1.20498753117207e-06, "loss": 0.2463, "step": 75400 }, { "epoch": 18.805486284289277, "grad_norm": 6.54574728012085, "learning_rate": 1.202493765586035e-06, "loss": 0.3166, "step": 75410 }, { "epoch": 18.80798004987531, "grad_norm": 6.992059230804443, "learning_rate": 1.2000000000000002e-06, "loss": 0.3017, "step": 75420 }, { "epoch": 18.810473815461346, "grad_norm": 6.233072757720947, "learning_rate": 1.197506234413965e-06, "loss": 0.3673, "step": 75430 }, { "epoch": 18.81296758104738, "grad_norm": 7.485711097717285, "learning_rate": 1.1950124688279301e-06, "loss": 0.2989, "step": 75440 }, { "epoch": 18.815461346633416, "grad_norm": 9.747830390930176, "learning_rate": 1.1925187032418955e-06, "loss": 0.325, "step": 75450 }, { "epoch": 18.81795511221945, "grad_norm": 6.727051734924316, "learning_rate": 1.1900249376558605e-06, "loss": 0.3339, "step": 75460 }, { "epoch": 18.820448877805486, "grad_norm": 7.6110968589782715, "learning_rate": 1.1875311720698256e-06, "loss": 0.2869, "step": 75470 }, { "epoch": 18.82294264339152, "grad_norm": 9.239736557006836, "learning_rate": 1.1850374064837907e-06, "loss": 0.2949, "step": 75480 }, { "epoch": 18.825436408977556, "grad_norm": 11.853121757507324, "learning_rate": 1.1825436408977556e-06, "loss": 0.3425, "step": 75490 }, { "epoch": 18.82793017456359, "grad_norm": 9.181052207946777, "learning_rate": 1.1800498753117207e-06, "loss": 0.3708, "step": 75500 }, { "epoch": 18.830423940149625, "grad_norm": 13.224994659423828, "learning_rate": 1.1775561097256858e-06, "loss": 0.3853, "step": 75510 }, { "epoch": 18.83291770573566, "grad_norm": 8.818222045898438, "learning_rate": 1.175062344139651e-06, "loss": 0.3206, "step": 75520 }, { "epoch": 18.835411471321695, "grad_norm": 10.314600944519043, "learning_rate": 1.172568578553616e-06, "loss": 0.3437, "step": 75530 }, { "epoch": 18.83790523690773, "grad_norm": 9.383597373962402, "learning_rate": 1.1700748129675811e-06, "loss": 0.4307, "step": 75540 }, { "epoch": 18.840399002493765, "grad_norm": 7.339836120605469, "learning_rate": 1.1675810473815462e-06, "loss": 0.3421, "step": 75550 }, { "epoch": 18.8428927680798, "grad_norm": 8.67222785949707, "learning_rate": 1.1650872817955113e-06, "loss": 0.2738, "step": 75560 }, { "epoch": 18.845386533665835, "grad_norm": 12.440013885498047, "learning_rate": 1.1625935162094764e-06, "loss": 0.3523, "step": 75570 }, { "epoch": 18.84788029925187, "grad_norm": 8.511222839355469, "learning_rate": 1.1600997506234415e-06, "loss": 0.3205, "step": 75580 }, { "epoch": 18.850374064837904, "grad_norm": 10.684443473815918, "learning_rate": 1.1576059850374066e-06, "loss": 0.3092, "step": 75590 }, { "epoch": 18.85286783042394, "grad_norm": 8.839110374450684, "learning_rate": 1.1551122194513717e-06, "loss": 0.3351, "step": 75600 }, { "epoch": 18.855361596009974, "grad_norm": 9.381013870239258, "learning_rate": 1.1526184538653368e-06, "loss": 0.2935, "step": 75610 }, { "epoch": 18.85785536159601, "grad_norm": 10.822402954101562, "learning_rate": 1.150124688279302e-06, "loss": 0.3281, "step": 75620 }, { "epoch": 18.860349127182044, "grad_norm": 8.065682411193848, "learning_rate": 1.147630922693267e-06, "loss": 0.336, "step": 75630 }, { "epoch": 18.86284289276808, "grad_norm": 8.760025978088379, "learning_rate": 1.145137157107232e-06, "loss": 0.285, "step": 75640 }, { "epoch": 18.865336658354114, "grad_norm": 10.8186616897583, "learning_rate": 1.142643391521197e-06, "loss": 0.4083, "step": 75650 }, { "epoch": 18.86783042394015, "grad_norm": 10.44265079498291, "learning_rate": 1.140149625935162e-06, "loss": 0.2865, "step": 75660 }, { "epoch": 18.870324189526183, "grad_norm": 12.26836109161377, "learning_rate": 1.1376558603491272e-06, "loss": 0.3483, "step": 75670 }, { "epoch": 18.872817955112218, "grad_norm": 7.17544412612915, "learning_rate": 1.1351620947630923e-06, "loss": 0.3349, "step": 75680 }, { "epoch": 18.875311720698253, "grad_norm": 10.033705711364746, "learning_rate": 1.1326683291770576e-06, "loss": 0.3098, "step": 75690 }, { "epoch": 18.877805486284288, "grad_norm": 11.135791778564453, "learning_rate": 1.1301745635910227e-06, "loss": 0.2847, "step": 75700 }, { "epoch": 18.880299251870323, "grad_norm": 6.80994176864624, "learning_rate": 1.1276807980049876e-06, "loss": 0.2888, "step": 75710 }, { "epoch": 18.882793017456358, "grad_norm": 7.766765117645264, "learning_rate": 1.1251870324189527e-06, "loss": 0.3815, "step": 75720 }, { "epoch": 18.885286783042392, "grad_norm": 6.553428649902344, "learning_rate": 1.1226932668329178e-06, "loss": 0.3442, "step": 75730 }, { "epoch": 18.887780548628427, "grad_norm": 5.193823337554932, "learning_rate": 1.1201995012468829e-06, "loss": 0.2764, "step": 75740 }, { "epoch": 18.890274314214462, "grad_norm": 12.545166015625, "learning_rate": 1.117705735660848e-06, "loss": 0.338, "step": 75750 }, { "epoch": 18.892768079800497, "grad_norm": 9.6239595413208, "learning_rate": 1.115211970074813e-06, "loss": 0.3336, "step": 75760 }, { "epoch": 18.895261845386532, "grad_norm": 8.860621452331543, "learning_rate": 1.1127182044887782e-06, "loss": 0.2912, "step": 75770 }, { "epoch": 18.897755610972567, "grad_norm": 10.455620765686035, "learning_rate": 1.1102244389027433e-06, "loss": 0.3473, "step": 75780 }, { "epoch": 18.900249376558605, "grad_norm": 7.781944751739502, "learning_rate": 1.1077306733167084e-06, "loss": 0.3008, "step": 75790 }, { "epoch": 18.902743142144637, "grad_norm": 6.780787944793701, "learning_rate": 1.1052369077306735e-06, "loss": 0.2852, "step": 75800 }, { "epoch": 18.905236907730675, "grad_norm": 11.432109832763672, "learning_rate": 1.1027431421446383e-06, "loss": 0.3317, "step": 75810 }, { "epoch": 18.90773067331671, "grad_norm": 9.996076583862305, "learning_rate": 1.1002493765586037e-06, "loss": 0.2939, "step": 75820 }, { "epoch": 18.910224438902745, "grad_norm": 9.775397300720215, "learning_rate": 1.0977556109725688e-06, "loss": 0.3415, "step": 75830 }, { "epoch": 18.91271820448878, "grad_norm": 10.835949897766113, "learning_rate": 1.0952618453865338e-06, "loss": 0.2963, "step": 75840 }, { "epoch": 18.915211970074814, "grad_norm": 8.529813766479492, "learning_rate": 1.092768079800499e-06, "loss": 0.2763, "step": 75850 }, { "epoch": 18.91770573566085, "grad_norm": 7.813754558563232, "learning_rate": 1.090274314214464e-06, "loss": 0.2713, "step": 75860 }, { "epoch": 18.920199501246884, "grad_norm": 10.183265686035156, "learning_rate": 1.087780548628429e-06, "loss": 0.3235, "step": 75870 }, { "epoch": 18.92269326683292, "grad_norm": 8.408893585205078, "learning_rate": 1.085286783042394e-06, "loss": 0.3334, "step": 75880 }, { "epoch": 18.925187032418954, "grad_norm": 11.72058391571045, "learning_rate": 1.0827930174563591e-06, "loss": 0.3118, "step": 75890 }, { "epoch": 18.92768079800499, "grad_norm": 6.197012424468994, "learning_rate": 1.0802992518703242e-06, "loss": 0.2938, "step": 75900 }, { "epoch": 18.930174563591024, "grad_norm": 7.970619201660156, "learning_rate": 1.0778054862842893e-06, "loss": 0.3396, "step": 75910 }, { "epoch": 18.93266832917706, "grad_norm": 10.110791206359863, "learning_rate": 1.0753117206982544e-06, "loss": 0.3286, "step": 75920 }, { "epoch": 18.935162094763093, "grad_norm": 10.215046882629395, "learning_rate": 1.0728179551122195e-06, "loss": 0.3226, "step": 75930 }, { "epoch": 18.93765586034913, "grad_norm": 10.97467041015625, "learning_rate": 1.0703241895261846e-06, "loss": 0.2734, "step": 75940 }, { "epoch": 18.940149625935163, "grad_norm": 8.248761177062988, "learning_rate": 1.0678304239401497e-06, "loss": 0.379, "step": 75950 }, { "epoch": 18.942643391521198, "grad_norm": 8.312829971313477, "learning_rate": 1.0653366583541148e-06, "loss": 0.3253, "step": 75960 }, { "epoch": 18.945137157107233, "grad_norm": 4.772835731506348, "learning_rate": 1.06284289276808e-06, "loss": 0.3596, "step": 75970 }, { "epoch": 18.947630922693268, "grad_norm": 10.931731224060059, "learning_rate": 1.060349127182045e-06, "loss": 0.3222, "step": 75980 }, { "epoch": 18.950124688279303, "grad_norm": 10.215840339660645, "learning_rate": 1.05785536159601e-06, "loss": 0.3021, "step": 75990 }, { "epoch": 18.952618453865338, "grad_norm": 9.405722618103027, "learning_rate": 1.0553615960099752e-06, "loss": 0.2664, "step": 76000 }, { "epoch": 18.955112219451372, "grad_norm": 7.368799209594727, "learning_rate": 1.0528678304239403e-06, "loss": 0.3743, "step": 76010 }, { "epoch": 18.957605985037407, "grad_norm": 6.817213535308838, "learning_rate": 1.0503740648379054e-06, "loss": 0.3206, "step": 76020 }, { "epoch": 18.960099750623442, "grad_norm": 12.117660522460938, "learning_rate": 1.0478802992518703e-06, "loss": 0.3007, "step": 76030 }, { "epoch": 18.962593516209477, "grad_norm": 12.789581298828125, "learning_rate": 1.0453865336658354e-06, "loss": 0.2822, "step": 76040 }, { "epoch": 18.965087281795512, "grad_norm": 11.194233894348145, "learning_rate": 1.0428927680798005e-06, "loss": 0.3346, "step": 76050 }, { "epoch": 18.967581047381547, "grad_norm": 7.340683937072754, "learning_rate": 1.0403990024937656e-06, "loss": 0.2921, "step": 76060 }, { "epoch": 18.97007481296758, "grad_norm": 8.430867195129395, "learning_rate": 1.0379052369077309e-06, "loss": 0.2891, "step": 76070 }, { "epoch": 18.972568578553616, "grad_norm": 8.543846130371094, "learning_rate": 1.035411471321696e-06, "loss": 0.2746, "step": 76080 }, { "epoch": 18.97506234413965, "grad_norm": 7.078313827514648, "learning_rate": 1.0329177057356609e-06, "loss": 0.2897, "step": 76090 }, { "epoch": 18.977556109725686, "grad_norm": 5.388221263885498, "learning_rate": 1.030423940149626e-06, "loss": 0.2994, "step": 76100 }, { "epoch": 18.98004987531172, "grad_norm": 7.963678359985352, "learning_rate": 1.027930174563591e-06, "loss": 0.3703, "step": 76110 }, { "epoch": 18.982543640897756, "grad_norm": 8.849908828735352, "learning_rate": 1.0254364089775562e-06, "loss": 0.3534, "step": 76120 }, { "epoch": 18.98503740648379, "grad_norm": 8.504463195800781, "learning_rate": 1.0229426433915213e-06, "loss": 0.3156, "step": 76130 }, { "epoch": 18.987531172069826, "grad_norm": 7.680803298950195, "learning_rate": 1.0204488778054864e-06, "loss": 0.3605, "step": 76140 }, { "epoch": 18.99002493765586, "grad_norm": 8.039253234863281, "learning_rate": 1.0179551122194515e-06, "loss": 0.3321, "step": 76150 }, { "epoch": 18.992518703241895, "grad_norm": 6.414565563201904, "learning_rate": 1.0154613466334166e-06, "loss": 0.3486, "step": 76160 }, { "epoch": 18.99501246882793, "grad_norm": 10.342339515686035, "learning_rate": 1.0129675810473817e-06, "loss": 0.3262, "step": 76170 }, { "epoch": 18.997506234413965, "grad_norm": 10.92263412475586, "learning_rate": 1.0104738154613468e-06, "loss": 0.3949, "step": 76180 }, { "epoch": 19.0, "grad_norm": 9.860681533813477, "learning_rate": 1.0079800498753119e-06, "loss": 0.3274, "step": 76190 }, { "epoch": 19.0, "eval_loss": 0.4184245765209198, "eval_runtime": 60.1429, "eval_samples_per_second": 16.677, "eval_steps_per_second": 16.677, "step": 76190 }, { "epoch": 19.002493765586035, "grad_norm": 6.654420852661133, "learning_rate": 1.005486284289277e-06, "loss": 0.3282, "step": 76200 }, { "epoch": 19.00498753117207, "grad_norm": 11.368739128112793, "learning_rate": 1.002992518703242e-06, "loss": 0.3369, "step": 76210 }, { "epoch": 19.007481296758105, "grad_norm": 6.13724422454834, "learning_rate": 1.0004987531172071e-06, "loss": 0.3149, "step": 76220 }, { "epoch": 19.00997506234414, "grad_norm": 9.071732521057129, "learning_rate": 9.980049875311722e-07, "loss": 0.3517, "step": 76230 }, { "epoch": 19.012468827930174, "grad_norm": 9.942281723022461, "learning_rate": 9.955112219451373e-07, "loss": 0.3144, "step": 76240 }, { "epoch": 19.01496259351621, "grad_norm": 7.623038291931152, "learning_rate": 9.930174563591022e-07, "loss": 0.3624, "step": 76250 }, { "epoch": 19.017456359102244, "grad_norm": 8.054025650024414, "learning_rate": 9.905236907730673e-07, "loss": 0.3465, "step": 76260 }, { "epoch": 19.01995012468828, "grad_norm": 8.097540855407715, "learning_rate": 9.880299251870324e-07, "loss": 0.2926, "step": 76270 }, { "epoch": 19.022443890274314, "grad_norm": 8.881487846374512, "learning_rate": 9.855361596009975e-07, "loss": 0.3272, "step": 76280 }, { "epoch": 19.02493765586035, "grad_norm": 9.261902809143066, "learning_rate": 9.830423940149626e-07, "loss": 0.3085, "step": 76290 }, { "epoch": 19.027431421446384, "grad_norm": 8.03958511352539, "learning_rate": 9.805486284289277e-07, "loss": 0.2904, "step": 76300 }, { "epoch": 19.02992518703242, "grad_norm": 7.877828121185303, "learning_rate": 9.780548628428928e-07, "loss": 0.3262, "step": 76310 }, { "epoch": 19.032418952618453, "grad_norm": 8.608955383300781, "learning_rate": 9.75561097256858e-07, "loss": 0.3105, "step": 76320 }, { "epoch": 19.034912718204488, "grad_norm": 5.959877014160156, "learning_rate": 9.73067331670823e-07, "loss": 0.2964, "step": 76330 }, { "epoch": 19.037406483790523, "grad_norm": 7.390963554382324, "learning_rate": 9.705735660847881e-07, "loss": 0.3047, "step": 76340 }, { "epoch": 19.039900249376558, "grad_norm": 7.07940149307251, "learning_rate": 9.680798004987532e-07, "loss": 0.2936, "step": 76350 }, { "epoch": 19.042394014962593, "grad_norm": 10.116901397705078, "learning_rate": 9.655860349127183e-07, "loss": 0.3208, "step": 76360 }, { "epoch": 19.044887780548628, "grad_norm": 6.5279717445373535, "learning_rate": 9.630922693266834e-07, "loss": 0.3736, "step": 76370 }, { "epoch": 19.047381546134662, "grad_norm": 7.244894981384277, "learning_rate": 9.605985037406485e-07, "loss": 0.3719, "step": 76380 }, { "epoch": 19.049875311720697, "grad_norm": 9.443936347961426, "learning_rate": 9.581047381546136e-07, "loss": 0.2698, "step": 76390 }, { "epoch": 19.052369077306732, "grad_norm": 7.093422889709473, "learning_rate": 9.556109725685787e-07, "loss": 0.3562, "step": 76400 }, { "epoch": 19.054862842892767, "grad_norm": 8.13650131225586, "learning_rate": 9.531172069825437e-07, "loss": 0.3588, "step": 76410 }, { "epoch": 19.057356608478802, "grad_norm": 8.89100170135498, "learning_rate": 9.506234413965088e-07, "loss": 0.2875, "step": 76420 }, { "epoch": 19.059850374064837, "grad_norm": 12.49089527130127, "learning_rate": 9.481296758104738e-07, "loss": 0.2841, "step": 76430 }, { "epoch": 19.06234413965087, "grad_norm": 8.83289623260498, "learning_rate": 9.456359102244391e-07, "loss": 0.3851, "step": 76440 }, { "epoch": 19.064837905236907, "grad_norm": 8.544670104980469, "learning_rate": 9.431421446384041e-07, "loss": 0.309, "step": 76450 }, { "epoch": 19.06733167082294, "grad_norm": 13.055142402648926, "learning_rate": 9.406483790523692e-07, "loss": 0.3448, "step": 76460 }, { "epoch": 19.069825436408976, "grad_norm": 5.936143398284912, "learning_rate": 9.381546134663343e-07, "loss": 0.2878, "step": 76470 }, { "epoch": 19.07231920199501, "grad_norm": 9.928505897521973, "learning_rate": 9.356608478802994e-07, "loss": 0.2742, "step": 76480 }, { "epoch": 19.074812967581046, "grad_norm": 10.485602378845215, "learning_rate": 9.331670822942644e-07, "loss": 0.2967, "step": 76490 }, { "epoch": 19.07730673316708, "grad_norm": 9.832148551940918, "learning_rate": 9.306733167082295e-07, "loss": 0.3564, "step": 76500 }, { "epoch": 19.079800498753116, "grad_norm": 5.720829963684082, "learning_rate": 9.281795511221946e-07, "loss": 0.3261, "step": 76510 }, { "epoch": 19.08229426433915, "grad_norm": 9.377291679382324, "learning_rate": 9.256857855361597e-07, "loss": 0.3256, "step": 76520 }, { "epoch": 19.084788029925186, "grad_norm": 8.832295417785645, "learning_rate": 9.231920199501248e-07, "loss": 0.3498, "step": 76530 }, { "epoch": 19.08728179551122, "grad_norm": 7.698011875152588, "learning_rate": 9.206982543640898e-07, "loss": 0.2729, "step": 76540 }, { "epoch": 19.089775561097255, "grad_norm": 7.579693794250488, "learning_rate": 9.182044887780549e-07, "loss": 0.322, "step": 76550 }, { "epoch": 19.09226932668329, "grad_norm": 8.33119010925293, "learning_rate": 9.157107231920201e-07, "loss": 0.2915, "step": 76560 }, { "epoch": 19.094763092269325, "grad_norm": 11.253091812133789, "learning_rate": 9.132169576059852e-07, "loss": 0.3446, "step": 76570 }, { "epoch": 19.09725685785536, "grad_norm": 9.968659400939941, "learning_rate": 9.107231920199502e-07, "loss": 0.2651, "step": 76580 }, { "epoch": 19.099750623441395, "grad_norm": 10.114205360412598, "learning_rate": 9.082294264339153e-07, "loss": 0.3819, "step": 76590 }, { "epoch": 19.102244389027433, "grad_norm": 10.0556058883667, "learning_rate": 9.057356608478804e-07, "loss": 0.3939, "step": 76600 }, { "epoch": 19.104738154613468, "grad_norm": 6.652792930603027, "learning_rate": 9.032418952618454e-07, "loss": 0.4161, "step": 76610 }, { "epoch": 19.107231920199503, "grad_norm": 8.197524070739746, "learning_rate": 9.007481296758105e-07, "loss": 0.3817, "step": 76620 }, { "epoch": 19.109725685785538, "grad_norm": 9.194473266601562, "learning_rate": 8.982543640897756e-07, "loss": 0.3205, "step": 76630 }, { "epoch": 19.112219451371573, "grad_norm": 9.719461441040039, "learning_rate": 8.957605985037407e-07, "loss": 0.3286, "step": 76640 }, { "epoch": 19.114713216957608, "grad_norm": 5.662110328674316, "learning_rate": 8.932668329177057e-07, "loss": 0.3241, "step": 76650 }, { "epoch": 19.117206982543642, "grad_norm": 7.443528652191162, "learning_rate": 8.907730673316708e-07, "loss": 0.3294, "step": 76660 }, { "epoch": 19.119700748129677, "grad_norm": 8.850680351257324, "learning_rate": 8.882793017456359e-07, "loss": 0.4117, "step": 76670 }, { "epoch": 19.122194513715712, "grad_norm": 9.90893840789795, "learning_rate": 8.857855361596011e-07, "loss": 0.3235, "step": 76680 }, { "epoch": 19.124688279301747, "grad_norm": 6.257623672485352, "learning_rate": 8.832917705735662e-07, "loss": 0.2876, "step": 76690 }, { "epoch": 19.127182044887782, "grad_norm": 10.78929615020752, "learning_rate": 8.807980049875313e-07, "loss": 0.2937, "step": 76700 }, { "epoch": 19.129675810473817, "grad_norm": 12.467625617980957, "learning_rate": 8.783042394014964e-07, "loss": 0.2836, "step": 76710 }, { "epoch": 19.13216957605985, "grad_norm": 9.467742919921875, "learning_rate": 8.758104738154614e-07, "loss": 0.2906, "step": 76720 }, { "epoch": 19.134663341645886, "grad_norm": 7.464754581451416, "learning_rate": 8.733167082294265e-07, "loss": 0.3037, "step": 76730 }, { "epoch": 19.13715710723192, "grad_norm": 9.207623481750488, "learning_rate": 8.708229426433916e-07, "loss": 0.3483, "step": 76740 }, { "epoch": 19.139650872817956, "grad_norm": 5.893961429595947, "learning_rate": 8.683291770573567e-07, "loss": 0.327, "step": 76750 }, { "epoch": 19.14214463840399, "grad_norm": 7.405910015106201, "learning_rate": 8.658354114713217e-07, "loss": 0.2723, "step": 76760 }, { "epoch": 19.144638403990026, "grad_norm": 6.716821670532227, "learning_rate": 8.633416458852868e-07, "loss": 0.3184, "step": 76770 }, { "epoch": 19.14713216957606, "grad_norm": 10.331329345703125, "learning_rate": 8.608478802992519e-07, "loss": 0.3315, "step": 76780 }, { "epoch": 19.149625935162096, "grad_norm": 11.650618553161621, "learning_rate": 8.58354114713217e-07, "loss": 0.3256, "step": 76790 }, { "epoch": 19.15211970074813, "grad_norm": 10.218818664550781, "learning_rate": 8.558603491271821e-07, "loss": 0.3823, "step": 76800 }, { "epoch": 19.154613466334165, "grad_norm": 11.632461547851562, "learning_rate": 8.533665835411473e-07, "loss": 0.2996, "step": 76810 }, { "epoch": 19.1571072319202, "grad_norm": 7.20107364654541, "learning_rate": 8.508728179551124e-07, "loss": 0.2705, "step": 76820 }, { "epoch": 19.159600997506235, "grad_norm": 7.080248832702637, "learning_rate": 8.483790523690774e-07, "loss": 0.3582, "step": 76830 }, { "epoch": 19.16209476309227, "grad_norm": 7.180111885070801, "learning_rate": 8.458852867830425e-07, "loss": 0.3788, "step": 76840 }, { "epoch": 19.164588528678305, "grad_norm": 7.654664516448975, "learning_rate": 8.433915211970076e-07, "loss": 0.3894, "step": 76850 }, { "epoch": 19.16708229426434, "grad_norm": 7.925827980041504, "learning_rate": 8.408977556109727e-07, "loss": 0.3759, "step": 76860 }, { "epoch": 19.169576059850375, "grad_norm": 9.838397979736328, "learning_rate": 8.384039900249377e-07, "loss": 0.2958, "step": 76870 }, { "epoch": 19.17206982543641, "grad_norm": 10.40937328338623, "learning_rate": 8.359102244389028e-07, "loss": 0.3657, "step": 76880 }, { "epoch": 19.174563591022444, "grad_norm": 9.061084747314453, "learning_rate": 8.334164588528679e-07, "loss": 0.3918, "step": 76890 }, { "epoch": 19.17705735660848, "grad_norm": 6.745436668395996, "learning_rate": 8.30922693266833e-07, "loss": 0.314, "step": 76900 }, { "epoch": 19.179551122194514, "grad_norm": 9.674360275268555, "learning_rate": 8.284289276807981e-07, "loss": 0.3089, "step": 76910 }, { "epoch": 19.18204488778055, "grad_norm": 7.983299732208252, "learning_rate": 8.25935162094763e-07, "loss": 0.3077, "step": 76920 }, { "epoch": 19.184538653366584, "grad_norm": 8.201288223266602, "learning_rate": 8.234413965087284e-07, "loss": 0.3265, "step": 76930 }, { "epoch": 19.18703241895262, "grad_norm": 6.199214935302734, "learning_rate": 8.209476309226934e-07, "loss": 0.2781, "step": 76940 }, { "epoch": 19.189526184538654, "grad_norm": 7.515051364898682, "learning_rate": 8.184538653366585e-07, "loss": 0.2741, "step": 76950 }, { "epoch": 19.19201995012469, "grad_norm": 7.1991658210754395, "learning_rate": 8.159600997506235e-07, "loss": 0.3119, "step": 76960 }, { "epoch": 19.194513715710723, "grad_norm": 13.45710277557373, "learning_rate": 8.134663341645886e-07, "loss": 0.3671, "step": 76970 }, { "epoch": 19.197007481296758, "grad_norm": 6.285548210144043, "learning_rate": 8.109725685785537e-07, "loss": 0.2633, "step": 76980 }, { "epoch": 19.199501246882793, "grad_norm": 13.47431755065918, "learning_rate": 8.087281795511223e-07, "loss": 0.3932, "step": 76990 }, { "epoch": 19.201995012468828, "grad_norm": 11.600540161132812, "learning_rate": 8.062344139650874e-07, "loss": 0.3936, "step": 77000 }, { "epoch": 19.204488778054863, "grad_norm": 8.96855640411377, "learning_rate": 8.037406483790524e-07, "loss": 0.361, "step": 77010 }, { "epoch": 19.206982543640898, "grad_norm": 10.361422538757324, "learning_rate": 8.012468827930175e-07, "loss": 0.3356, "step": 77020 }, { "epoch": 19.209476309226932, "grad_norm": 7.201541423797607, "learning_rate": 7.987531172069826e-07, "loss": 0.3742, "step": 77030 }, { "epoch": 19.211970074812967, "grad_norm": 8.441081047058105, "learning_rate": 7.962593516209477e-07, "loss": 0.3109, "step": 77040 }, { "epoch": 19.214463840399002, "grad_norm": 7.7734174728393555, "learning_rate": 7.937655860349128e-07, "loss": 0.2973, "step": 77050 }, { "epoch": 19.216957605985037, "grad_norm": 10.10339641571045, "learning_rate": 7.912718204488778e-07, "loss": 0.2633, "step": 77060 }, { "epoch": 19.219451371571072, "grad_norm": 11.610758781433105, "learning_rate": 7.887780548628429e-07, "loss": 0.3264, "step": 77070 }, { "epoch": 19.221945137157107, "grad_norm": 11.709192276000977, "learning_rate": 7.86284289276808e-07, "loss": 0.3189, "step": 77080 }, { "epoch": 19.22443890274314, "grad_norm": 6.54279899597168, "learning_rate": 7.837905236907731e-07, "loss": 0.2883, "step": 77090 }, { "epoch": 19.226932668329177, "grad_norm": 11.589969635009766, "learning_rate": 7.812967581047382e-07, "loss": 0.2831, "step": 77100 }, { "epoch": 19.22942643391521, "grad_norm": 6.521005153656006, "learning_rate": 7.788029925187034e-07, "loss": 0.2952, "step": 77110 }, { "epoch": 19.231920199501246, "grad_norm": 6.133444309234619, "learning_rate": 7.763092269326684e-07, "loss": 0.2859, "step": 77120 }, { "epoch": 19.23441396508728, "grad_norm": 7.011719703674316, "learning_rate": 7.738154613466335e-07, "loss": 0.3177, "step": 77130 }, { "epoch": 19.236907730673316, "grad_norm": 6.836624622344971, "learning_rate": 7.713216957605986e-07, "loss": 0.3955, "step": 77140 }, { "epoch": 19.23940149625935, "grad_norm": 11.907731056213379, "learning_rate": 7.688279301745637e-07, "loss": 0.2778, "step": 77150 }, { "epoch": 19.241895261845386, "grad_norm": 11.505409240722656, "learning_rate": 7.663341645885288e-07, "loss": 0.366, "step": 77160 }, { "epoch": 19.24438902743142, "grad_norm": 10.92941951751709, "learning_rate": 7.638403990024938e-07, "loss": 0.3527, "step": 77170 }, { "epoch": 19.246882793017456, "grad_norm": 8.509658813476562, "learning_rate": 7.613466334164589e-07, "loss": 0.269, "step": 77180 }, { "epoch": 19.24937655860349, "grad_norm": 6.147039413452148, "learning_rate": 7.58852867830424e-07, "loss": 0.3095, "step": 77190 }, { "epoch": 19.251870324189525, "grad_norm": 9.925485610961914, "learning_rate": 7.563591022443891e-07, "loss": 0.3432, "step": 77200 }, { "epoch": 19.25436408977556, "grad_norm": 7.7526116371154785, "learning_rate": 7.538653366583542e-07, "loss": 0.3582, "step": 77210 }, { "epoch": 19.256857855361595, "grad_norm": 9.207817077636719, "learning_rate": 7.513715710723192e-07, "loss": 0.2891, "step": 77220 }, { "epoch": 19.25935162094763, "grad_norm": 8.330227851867676, "learning_rate": 7.488778054862844e-07, "loss": 0.2408, "step": 77230 }, { "epoch": 19.261845386533665, "grad_norm": 7.16584587097168, "learning_rate": 7.463840399002495e-07, "loss": 0.2844, "step": 77240 }, { "epoch": 19.2643391521197, "grad_norm": 7.896230220794678, "learning_rate": 7.438902743142146e-07, "loss": 0.3163, "step": 77250 }, { "epoch": 19.266832917705734, "grad_norm": 6.485842704772949, "learning_rate": 7.413965087281797e-07, "loss": 0.3416, "step": 77260 }, { "epoch": 19.26932668329177, "grad_norm": 10.570438385009766, "learning_rate": 7.389027431421448e-07, "loss": 0.3151, "step": 77270 }, { "epoch": 19.271820448877804, "grad_norm": 10.489306449890137, "learning_rate": 7.364089775561097e-07, "loss": 0.3103, "step": 77280 }, { "epoch": 19.27431421446384, "grad_norm": 17.813852310180664, "learning_rate": 7.339152119700748e-07, "loss": 0.3384, "step": 77290 }, { "epoch": 19.276807980049874, "grad_norm": 6.28749418258667, "learning_rate": 7.314214463840399e-07, "loss": 0.3067, "step": 77300 }, { "epoch": 19.27930174563591, "grad_norm": 10.992226600646973, "learning_rate": 7.28927680798005e-07, "loss": 0.3487, "step": 77310 }, { "epoch": 19.281795511221944, "grad_norm": 8.586907386779785, "learning_rate": 7.264339152119701e-07, "loss": 0.2888, "step": 77320 }, { "epoch": 19.28428927680798, "grad_norm": 8.104104995727539, "learning_rate": 7.239401496259351e-07, "loss": 0.2809, "step": 77330 }, { "epoch": 19.286783042394013, "grad_norm": 9.069332122802734, "learning_rate": 7.214463840399002e-07, "loss": 0.2921, "step": 77340 }, { "epoch": 19.28927680798005, "grad_norm": 8.148301124572754, "learning_rate": 7.189526184538654e-07, "loss": 0.4029, "step": 77350 }, { "epoch": 19.291770573566083, "grad_norm": 7.358614921569824, "learning_rate": 7.164588528678305e-07, "loss": 0.3364, "step": 77360 }, { "epoch": 19.294264339152118, "grad_norm": 11.08927059173584, "learning_rate": 7.139650872817956e-07, "loss": 0.3398, "step": 77370 }, { "epoch": 19.296758104738153, "grad_norm": 10.33334732055664, "learning_rate": 7.114713216957607e-07, "loss": 0.326, "step": 77380 }, { "epoch": 19.29925187032419, "grad_norm": 10.560935974121094, "learning_rate": 7.089775561097257e-07, "loss": 0.3557, "step": 77390 }, { "epoch": 19.301745635910226, "grad_norm": 8.311334609985352, "learning_rate": 7.064837905236908e-07, "loss": 0.3314, "step": 77400 }, { "epoch": 19.30423940149626, "grad_norm": 11.71918773651123, "learning_rate": 7.039900249376559e-07, "loss": 0.3246, "step": 77410 }, { "epoch": 19.306733167082296, "grad_norm": 8.50728988647461, "learning_rate": 7.01496259351621e-07, "loss": 0.3343, "step": 77420 }, { "epoch": 19.30922693266833, "grad_norm": 11.546276092529297, "learning_rate": 6.990024937655861e-07, "loss": 0.2922, "step": 77430 }, { "epoch": 19.311720698254366, "grad_norm": 10.145132064819336, "learning_rate": 6.965087281795511e-07, "loss": 0.3306, "step": 77440 }, { "epoch": 19.3142144638404, "grad_norm": 12.020730018615723, "learning_rate": 6.940149625935162e-07, "loss": 0.2949, "step": 77450 }, { "epoch": 19.316708229426435, "grad_norm": 13.925124168395996, "learning_rate": 6.915211970074813e-07, "loss": 0.4027, "step": 77460 }, { "epoch": 19.31920199501247, "grad_norm": 9.396332740783691, "learning_rate": 6.890274314214464e-07, "loss": 0.283, "step": 77470 }, { "epoch": 19.321695760598505, "grad_norm": 5.274721622467041, "learning_rate": 6.865336658354116e-07, "loss": 0.3043, "step": 77480 }, { "epoch": 19.32418952618454, "grad_norm": 7.85326623916626, "learning_rate": 6.840399002493767e-07, "loss": 0.3407, "step": 77490 }, { "epoch": 19.326683291770575, "grad_norm": 12.338591575622559, "learning_rate": 6.815461346633417e-07, "loss": 0.2622, "step": 77500 }, { "epoch": 19.32917705735661, "grad_norm": 7.619169235229492, "learning_rate": 6.790523690773068e-07, "loss": 0.2779, "step": 77510 }, { "epoch": 19.331670822942645, "grad_norm": 8.566308975219727, "learning_rate": 6.768079800498753e-07, "loss": 0.3057, "step": 77520 }, { "epoch": 19.33416458852868, "grad_norm": 11.934048652648926, "learning_rate": 6.743142144638405e-07, "loss": 0.3626, "step": 77530 }, { "epoch": 19.336658354114714, "grad_norm": 12.807787895202637, "learning_rate": 6.718204488778056e-07, "loss": 0.297, "step": 77540 }, { "epoch": 19.33915211970075, "grad_norm": 8.593514442443848, "learning_rate": 6.693266832917707e-07, "loss": 0.3174, "step": 77550 }, { "epoch": 19.341645885286784, "grad_norm": 10.601576805114746, "learning_rate": 6.668329177057358e-07, "loss": 0.3191, "step": 77560 }, { "epoch": 19.34413965087282, "grad_norm": 10.128413200378418, "learning_rate": 6.643391521197009e-07, "loss": 0.302, "step": 77570 }, { "epoch": 19.346633416458854, "grad_norm": 8.060428619384766, "learning_rate": 6.618453865336659e-07, "loss": 0.2979, "step": 77580 }, { "epoch": 19.34912718204489, "grad_norm": 5.3289079666137695, "learning_rate": 6.59351620947631e-07, "loss": 0.2762, "step": 77590 }, { "epoch": 19.351620947630924, "grad_norm": 6.777313709259033, "learning_rate": 6.568578553615961e-07, "loss": 0.2989, "step": 77600 }, { "epoch": 19.35411471321696, "grad_norm": 6.17332124710083, "learning_rate": 6.543640897755612e-07, "loss": 0.2851, "step": 77610 }, { "epoch": 19.356608478802993, "grad_norm": 9.076419830322266, "learning_rate": 6.518703241895261e-07, "loss": 0.325, "step": 77620 }, { "epoch": 19.359102244389028, "grad_norm": 9.705153465270996, "learning_rate": 6.493765586034912e-07, "loss": 0.2434, "step": 77630 }, { "epoch": 19.361596009975063, "grad_norm": 11.839200973510742, "learning_rate": 6.468827930174563e-07, "loss": 0.3439, "step": 77640 }, { "epoch": 19.364089775561098, "grad_norm": 10.060364723205566, "learning_rate": 6.443890274314215e-07, "loss": 0.3398, "step": 77650 }, { "epoch": 19.366583541147133, "grad_norm": 4.9707560539245605, "learning_rate": 6.418952618453866e-07, "loss": 0.293, "step": 77660 }, { "epoch": 19.369077306733168, "grad_norm": 7.344512462615967, "learning_rate": 6.394014962593517e-07, "loss": 0.3303, "step": 77670 }, { "epoch": 19.371571072319203, "grad_norm": 8.75753402709961, "learning_rate": 6.369077306733168e-07, "loss": 0.3225, "step": 77680 }, { "epoch": 19.374064837905237, "grad_norm": 8.907544136047363, "learning_rate": 6.344139650872818e-07, "loss": 0.4169, "step": 77690 }, { "epoch": 19.376558603491272, "grad_norm": 8.724864959716797, "learning_rate": 6.319201995012469e-07, "loss": 0.4323, "step": 77700 }, { "epoch": 19.379052369077307, "grad_norm": 10.523849487304688, "learning_rate": 6.29426433915212e-07, "loss": 0.3098, "step": 77710 }, { "epoch": 19.381546134663342, "grad_norm": 6.705490589141846, "learning_rate": 6.269326683291771e-07, "loss": 0.2966, "step": 77720 }, { "epoch": 19.384039900249377, "grad_norm": 9.971707344055176, "learning_rate": 6.244389027431422e-07, "loss": 0.3189, "step": 77730 }, { "epoch": 19.38653366583541, "grad_norm": 10.387377738952637, "learning_rate": 6.219451371571072e-07, "loss": 0.3321, "step": 77740 }, { "epoch": 19.389027431421447, "grad_norm": 8.39964485168457, "learning_rate": 6.194513715710724e-07, "loss": 0.2786, "step": 77750 }, { "epoch": 19.39152119700748, "grad_norm": 7.76651668548584, "learning_rate": 6.169576059850375e-07, "loss": 0.3166, "step": 77760 }, { "epoch": 19.394014962593516, "grad_norm": 8.83347225189209, "learning_rate": 6.144638403990025e-07, "loss": 0.3197, "step": 77770 }, { "epoch": 19.39650872817955, "grad_norm": 11.635566711425781, "learning_rate": 6.119700748129676e-07, "loss": 0.3204, "step": 77780 }, { "epoch": 19.399002493765586, "grad_norm": 8.75010871887207, "learning_rate": 6.094763092269327e-07, "loss": 0.3247, "step": 77790 }, { "epoch": 19.40149625935162, "grad_norm": 12.465825080871582, "learning_rate": 6.069825436408978e-07, "loss": 0.3004, "step": 77800 }, { "epoch": 19.403990024937656, "grad_norm": 10.757222175598145, "learning_rate": 6.044887780548629e-07, "loss": 0.3444, "step": 77810 }, { "epoch": 19.40648379052369, "grad_norm": 4.095909118652344, "learning_rate": 6.01995012468828e-07, "loss": 0.3217, "step": 77820 }, { "epoch": 19.408977556109726, "grad_norm": 9.890026092529297, "learning_rate": 5.995012468827931e-07, "loss": 0.3058, "step": 77830 }, { "epoch": 19.41147132169576, "grad_norm": 9.338248252868652, "learning_rate": 5.970074812967582e-07, "loss": 0.2974, "step": 77840 }, { "epoch": 19.413965087281795, "grad_norm": 7.386726379394531, "learning_rate": 5.945137157107232e-07, "loss": 0.4195, "step": 77850 }, { "epoch": 19.41645885286783, "grad_norm": 10.347369194030762, "learning_rate": 5.920199501246883e-07, "loss": 0.3167, "step": 77860 }, { "epoch": 19.418952618453865, "grad_norm": 5.733013153076172, "learning_rate": 5.895261845386535e-07, "loss": 0.2585, "step": 77870 }, { "epoch": 19.4214463840399, "grad_norm": 8.385955810546875, "learning_rate": 5.870324189526185e-07, "loss": 0.2847, "step": 77880 }, { "epoch": 19.423940149625935, "grad_norm": 7.1023736000061035, "learning_rate": 5.845386533665836e-07, "loss": 0.336, "step": 77890 }, { "epoch": 19.42643391521197, "grad_norm": 9.88353157043457, "learning_rate": 5.820448877805487e-07, "loss": 0.3549, "step": 77900 }, { "epoch": 19.428927680798004, "grad_norm": 8.863442420959473, "learning_rate": 5.795511221945138e-07, "loss": 0.3858, "step": 77910 }, { "epoch": 19.43142144638404, "grad_norm": 8.624554634094238, "learning_rate": 5.770573566084789e-07, "loss": 0.3137, "step": 77920 }, { "epoch": 19.433915211970074, "grad_norm": 9.726763725280762, "learning_rate": 5.74563591022444e-07, "loss": 0.345, "step": 77930 }, { "epoch": 19.43640897755611, "grad_norm": 7.916418075561523, "learning_rate": 5.720698254364091e-07, "loss": 0.2833, "step": 77940 }, { "epoch": 19.438902743142144, "grad_norm": 10.546299934387207, "learning_rate": 5.695760598503742e-07, "loss": 0.2614, "step": 77950 }, { "epoch": 19.44139650872818, "grad_norm": 9.657865524291992, "learning_rate": 5.670822942643392e-07, "loss": 0.3221, "step": 77960 }, { "epoch": 19.443890274314214, "grad_norm": 7.228270530700684, "learning_rate": 5.645885286783043e-07, "loss": 0.285, "step": 77970 }, { "epoch": 19.44638403990025, "grad_norm": 12.523133277893066, "learning_rate": 5.620947630922694e-07, "loss": 0.2654, "step": 77980 }, { "epoch": 19.448877805486283, "grad_norm": 9.408562660217285, "learning_rate": 5.596009975062345e-07, "loss": 0.2996, "step": 77990 }, { "epoch": 19.45137157107232, "grad_norm": 8.884123802185059, "learning_rate": 5.571072319201996e-07, "loss": 0.3365, "step": 78000 }, { "epoch": 19.453865336658353, "grad_norm": 7.461065769195557, "learning_rate": 5.546134663341646e-07, "loss": 0.3295, "step": 78010 }, { "epoch": 19.456359102244388, "grad_norm": 7.785035610198975, "learning_rate": 5.521197007481297e-07, "loss": 0.235, "step": 78020 }, { "epoch": 19.458852867830423, "grad_norm": 8.66875171661377, "learning_rate": 5.496259351620948e-07, "loss": 0.2687, "step": 78030 }, { "epoch": 19.461346633416458, "grad_norm": 6.394353866577148, "learning_rate": 5.471321695760598e-07, "loss": 0.2604, "step": 78040 }, { "epoch": 19.463840399002493, "grad_norm": 9.329657554626465, "learning_rate": 5.44638403990025e-07, "loss": 0.3127, "step": 78050 }, { "epoch": 19.466334164588527, "grad_norm": 9.868875503540039, "learning_rate": 5.421446384039901e-07, "loss": 0.2944, "step": 78060 }, { "epoch": 19.468827930174562, "grad_norm": 8.829019546508789, "learning_rate": 5.396508728179551e-07, "loss": 0.4614, "step": 78070 }, { "epoch": 19.471321695760597, "grad_norm": 5.467095851898193, "learning_rate": 5.371571072319202e-07, "loss": 0.3467, "step": 78080 }, { "epoch": 19.473815461346632, "grad_norm": 11.852470397949219, "learning_rate": 5.346633416458853e-07, "loss": 0.3606, "step": 78090 }, { "epoch": 19.476309226932667, "grad_norm": 10.978060722351074, "learning_rate": 5.321695760598504e-07, "loss": 0.4001, "step": 78100 }, { "epoch": 19.478802992518702, "grad_norm": 4.6936163902282715, "learning_rate": 5.296758104738155e-07, "loss": 0.2956, "step": 78110 }, { "epoch": 19.481296758104737, "grad_norm": 8.054365158081055, "learning_rate": 5.271820448877806e-07, "loss": 0.3031, "step": 78120 }, { "epoch": 19.48379052369077, "grad_norm": 7.315567493438721, "learning_rate": 5.246882793017457e-07, "loss": 0.292, "step": 78130 }, { "epoch": 19.486284289276806, "grad_norm": 6.582238674163818, "learning_rate": 5.221945137157108e-07, "loss": 0.3452, "step": 78140 }, { "epoch": 19.48877805486284, "grad_norm": 9.56821346282959, "learning_rate": 5.197007481296758e-07, "loss": 0.371, "step": 78150 }, { "epoch": 19.491271820448876, "grad_norm": 10.5318603515625, "learning_rate": 5.172069825436409e-07, "loss": 0.3602, "step": 78160 }, { "epoch": 19.49376558603491, "grad_norm": 7.174575328826904, "learning_rate": 5.14713216957606e-07, "loss": 0.2854, "step": 78170 }, { "epoch": 19.496259351620946, "grad_norm": 9.72573184967041, "learning_rate": 5.122194513715711e-07, "loss": 0.2975, "step": 78180 }, { "epoch": 19.49875311720698, "grad_norm": 9.575770378112793, "learning_rate": 5.097256857855362e-07, "loss": 0.303, "step": 78190 }, { "epoch": 19.50124688279302, "grad_norm": 9.664167404174805, "learning_rate": 5.072319201995013e-07, "loss": 0.3959, "step": 78200 }, { "epoch": 19.503740648379054, "grad_norm": 9.702856063842773, "learning_rate": 5.047381546134664e-07, "loss": 0.3454, "step": 78210 }, { "epoch": 19.50623441396509, "grad_norm": 13.055885314941406, "learning_rate": 5.022443890274315e-07, "loss": 0.2945, "step": 78220 }, { "epoch": 19.508728179551124, "grad_norm": 7.412993907928467, "learning_rate": 4.997506234413965e-07, "loss": 0.3167, "step": 78230 }, { "epoch": 19.51122194513716, "grad_norm": 5.914247512817383, "learning_rate": 4.972568578553617e-07, "loss": 0.3067, "step": 78240 }, { "epoch": 19.513715710723194, "grad_norm": 6.335524559020996, "learning_rate": 4.947630922693268e-07, "loss": 0.3002, "step": 78250 }, { "epoch": 19.51620947630923, "grad_norm": 9.340645790100098, "learning_rate": 4.922693266832918e-07, "loss": 0.3535, "step": 78260 }, { "epoch": 19.518703241895263, "grad_norm": 8.127272605895996, "learning_rate": 4.897755610972569e-07, "loss": 0.3276, "step": 78270 }, { "epoch": 19.521197007481298, "grad_norm": 8.352450370788574, "learning_rate": 4.87281795511222e-07, "loss": 0.3708, "step": 78280 }, { "epoch": 19.523690773067333, "grad_norm": 5.065845489501953, "learning_rate": 4.847880299251871e-07, "loss": 0.266, "step": 78290 }, { "epoch": 19.526184538653368, "grad_norm": 9.564516067504883, "learning_rate": 4.822942643391522e-07, "loss": 0.2801, "step": 78300 }, { "epoch": 19.528678304239403, "grad_norm": 8.529273986816406, "learning_rate": 4.798004987531173e-07, "loss": 0.3554, "step": 78310 }, { "epoch": 19.531172069825438, "grad_norm": 9.940313339233398, "learning_rate": 4.773067331670824e-07, "loss": 0.2956, "step": 78320 }, { "epoch": 19.533665835411473, "grad_norm": 7.539468288421631, "learning_rate": 4.748129675810474e-07, "loss": 0.3047, "step": 78330 }, { "epoch": 19.536159600997507, "grad_norm": 9.497523307800293, "learning_rate": 4.723192019950125e-07, "loss": 0.2872, "step": 78340 }, { "epoch": 19.538653366583542, "grad_norm": 8.197120666503906, "learning_rate": 4.6982543640897756e-07, "loss": 0.3116, "step": 78350 }, { "epoch": 19.541147132169577, "grad_norm": 8.740259170532227, "learning_rate": 4.673316708229427e-07, "loss": 0.3306, "step": 78360 }, { "epoch": 19.543640897755612, "grad_norm": 6.9125494956970215, "learning_rate": 4.648379052369078e-07, "loss": 0.3007, "step": 78370 }, { "epoch": 19.546134663341647, "grad_norm": 7.752559185028076, "learning_rate": 4.6234413965087285e-07, "loss": 0.3, "step": 78380 }, { "epoch": 19.54862842892768, "grad_norm": 10.674647331237793, "learning_rate": 4.5985037406483795e-07, "loss": 0.3157, "step": 78390 }, { "epoch": 19.551122194513717, "grad_norm": 9.210230827331543, "learning_rate": 4.57356608478803e-07, "loss": 0.2928, "step": 78400 }, { "epoch": 19.55361596009975, "grad_norm": 8.934961318969727, "learning_rate": 4.548628428927681e-07, "loss": 0.3272, "step": 78410 }, { "epoch": 19.556109725685786, "grad_norm": 9.600099563598633, "learning_rate": 4.523690773067332e-07, "loss": 0.3249, "step": 78420 }, { "epoch": 19.55860349127182, "grad_norm": 9.030034065246582, "learning_rate": 4.4987531172069834e-07, "loss": 0.283, "step": 78430 }, { "epoch": 19.561097256857856, "grad_norm": 8.656168937683105, "learning_rate": 4.473815461346634e-07, "loss": 0.2666, "step": 78440 }, { "epoch": 19.56359102244389, "grad_norm": 8.763611793518066, "learning_rate": 4.448877805486285e-07, "loss": 0.3127, "step": 78450 }, { "epoch": 19.566084788029926, "grad_norm": 6.0083136558532715, "learning_rate": 4.4239401496259353e-07, "loss": 0.2952, "step": 78460 }, { "epoch": 19.56857855361596, "grad_norm": 8.106850624084473, "learning_rate": 4.399002493765586e-07, "loss": 0.2619, "step": 78470 }, { "epoch": 19.571072319201996, "grad_norm": 5.120926380157471, "learning_rate": 4.3740648379052367e-07, "loss": 0.3112, "step": 78480 }, { "epoch": 19.57356608478803, "grad_norm": 8.02582836151123, "learning_rate": 4.349127182044888e-07, "loss": 0.3119, "step": 78490 }, { "epoch": 19.576059850374065, "grad_norm": 14.72515869140625, "learning_rate": 4.324189526184539e-07, "loss": 0.3285, "step": 78500 }, { "epoch": 19.5785536159601, "grad_norm": 10.0016508102417, "learning_rate": 4.29925187032419e-07, "loss": 0.3348, "step": 78510 }, { "epoch": 19.581047381546135, "grad_norm": 8.692368507385254, "learning_rate": 4.2743142144638406e-07, "loss": 0.3112, "step": 78520 }, { "epoch": 19.58354114713217, "grad_norm": 10.636957168579102, "learning_rate": 4.2493765586034916e-07, "loss": 0.3008, "step": 78530 }, { "epoch": 19.586034912718205, "grad_norm": 14.530603408813477, "learning_rate": 4.224438902743142e-07, "loss": 0.3169, "step": 78540 }, { "epoch": 19.58852867830424, "grad_norm": 10.313661575317383, "learning_rate": 4.1995012468827936e-07, "loss": 0.4105, "step": 78550 }, { "epoch": 19.591022443890274, "grad_norm": 8.854100227355957, "learning_rate": 4.1745635910224446e-07, "loss": 0.3473, "step": 78560 }, { "epoch": 19.59351620947631, "grad_norm": 5.632763385772705, "learning_rate": 4.149625935162095e-07, "loss": 0.3534, "step": 78570 }, { "epoch": 19.596009975062344, "grad_norm": 10.139565467834473, "learning_rate": 4.124688279301746e-07, "loss": 0.2864, "step": 78580 }, { "epoch": 19.59850374064838, "grad_norm": 10.44018268585205, "learning_rate": 4.0997506234413964e-07, "loss": 0.3235, "step": 78590 }, { "epoch": 19.600997506234414, "grad_norm": 8.089249610900879, "learning_rate": 4.0748129675810474e-07, "loss": 0.3483, "step": 78600 }, { "epoch": 19.60349127182045, "grad_norm": 8.919448852539062, "learning_rate": 4.049875311720699e-07, "loss": 0.2935, "step": 78610 }, { "epoch": 19.605985037406484, "grad_norm": 12.397672653198242, "learning_rate": 4.02493765586035e-07, "loss": 0.2637, "step": 78620 }, { "epoch": 19.60847880299252, "grad_norm": 7.087367057800293, "learning_rate": 4.0000000000000003e-07, "loss": 0.3281, "step": 78630 }, { "epoch": 19.610972568578553, "grad_norm": 10.792654037475586, "learning_rate": 3.9750623441396513e-07, "loss": 0.2798, "step": 78640 }, { "epoch": 19.61346633416459, "grad_norm": 11.265054702758789, "learning_rate": 3.950124688279302e-07, "loss": 0.3599, "step": 78650 }, { "epoch": 19.615960099750623, "grad_norm": 7.813275337219238, "learning_rate": 3.925187032418953e-07, "loss": 0.3727, "step": 78660 }, { "epoch": 19.618453865336658, "grad_norm": 7.351382255554199, "learning_rate": 3.9002493765586043e-07, "loss": 0.337, "step": 78670 }, { "epoch": 19.620947630922693, "grad_norm": 8.567827224731445, "learning_rate": 3.8753117206982547e-07, "loss": 0.3548, "step": 78680 }, { "epoch": 19.623441396508728, "grad_norm": 8.134788513183594, "learning_rate": 3.8503740648379057e-07, "loss": 0.3234, "step": 78690 }, { "epoch": 19.625935162094763, "grad_norm": 8.092358589172363, "learning_rate": 3.8254364089775567e-07, "loss": 0.4081, "step": 78700 }, { "epoch": 19.628428927680797, "grad_norm": 10.603907585144043, "learning_rate": 3.800498753117207e-07, "loss": 0.3384, "step": 78710 }, { "epoch": 19.630922693266832, "grad_norm": 7.839836120605469, "learning_rate": 3.775561097256858e-07, "loss": 0.3905, "step": 78720 }, { "epoch": 19.633416458852867, "grad_norm": 10.831620216369629, "learning_rate": 3.7506234413965086e-07, "loss": 0.2838, "step": 78730 }, { "epoch": 19.635910224438902, "grad_norm": 7.171167850494385, "learning_rate": 3.72568578553616e-07, "loss": 0.361, "step": 78740 }, { "epoch": 19.638403990024937, "grad_norm": 8.736506462097168, "learning_rate": 3.700748129675811e-07, "loss": 0.3024, "step": 78750 }, { "epoch": 19.640897755610972, "grad_norm": 8.326260566711426, "learning_rate": 3.6758104738154615e-07, "loss": 0.3674, "step": 78760 }, { "epoch": 19.643391521197007, "grad_norm": 7.458507061004639, "learning_rate": 3.6508728179551125e-07, "loss": 0.3018, "step": 78770 }, { "epoch": 19.64588528678304, "grad_norm": 10.653674125671387, "learning_rate": 3.625935162094763e-07, "loss": 0.3053, "step": 78780 }, { "epoch": 19.648379052369076, "grad_norm": 7.049093723297119, "learning_rate": 3.600997506234414e-07, "loss": 0.3749, "step": 78790 }, { "epoch": 19.65087281795511, "grad_norm": 8.122016906738281, "learning_rate": 3.5760598503740654e-07, "loss": 0.3234, "step": 78800 }, { "epoch": 19.653366583541146, "grad_norm": 11.616135597229004, "learning_rate": 3.5511221945137164e-07, "loss": 0.2943, "step": 78810 }, { "epoch": 19.65586034912718, "grad_norm": 12.034366607666016, "learning_rate": 3.526184538653367e-07, "loss": 0.322, "step": 78820 }, { "epoch": 19.658354114713216, "grad_norm": 11.825723648071289, "learning_rate": 3.501246882793018e-07, "loss": 0.3468, "step": 78830 }, { "epoch": 19.66084788029925, "grad_norm": 8.839386940002441, "learning_rate": 3.4763092269326683e-07, "loss": 0.3245, "step": 78840 }, { "epoch": 19.663341645885286, "grad_norm": 5.619390487670898, "learning_rate": 3.451371571072319e-07, "loss": 0.2613, "step": 78850 }, { "epoch": 19.66583541147132, "grad_norm": 7.329450607299805, "learning_rate": 3.426433915211971e-07, "loss": 0.3291, "step": 78860 }, { "epoch": 19.668329177057355, "grad_norm": 8.157301902770996, "learning_rate": 3.401496259351621e-07, "loss": 0.3298, "step": 78870 }, { "epoch": 19.67082294264339, "grad_norm": 18.877344131469727, "learning_rate": 3.376558603491272e-07, "loss": 0.279, "step": 78880 }, { "epoch": 19.673316708229425, "grad_norm": 9.491035461425781, "learning_rate": 3.351620947630923e-07, "loss": 0.2876, "step": 78890 }, { "epoch": 19.67581047381546, "grad_norm": 9.576622009277344, "learning_rate": 3.3266832917705736e-07, "loss": 0.2932, "step": 78900 }, { "epoch": 19.678304239401495, "grad_norm": 8.893289566040039, "learning_rate": 3.3017456359102246e-07, "loss": 0.2582, "step": 78910 }, { "epoch": 19.68079800498753, "grad_norm": 12.161534309387207, "learning_rate": 3.276807980049876e-07, "loss": 0.3097, "step": 78920 }, { "epoch": 19.683291770573565, "grad_norm": 9.909988403320312, "learning_rate": 3.2518703241895266e-07, "loss": 0.326, "step": 78930 }, { "epoch": 19.6857855361596, "grad_norm": 7.692135810852051, "learning_rate": 3.2269326683291775e-07, "loss": 0.3324, "step": 78940 }, { "epoch": 19.688279301745634, "grad_norm": 9.485343933105469, "learning_rate": 3.201995012468828e-07, "loss": 0.2779, "step": 78950 }, { "epoch": 19.69077306733167, "grad_norm": 9.124640464782715, "learning_rate": 3.177057356608479e-07, "loss": 0.3118, "step": 78960 }, { "epoch": 19.693266832917704, "grad_norm": 8.47995376586914, "learning_rate": 3.1521197007481294e-07, "loss": 0.2873, "step": 78970 }, { "epoch": 19.69576059850374, "grad_norm": 9.394947052001953, "learning_rate": 3.127182044887781e-07, "loss": 0.317, "step": 78980 }, { "epoch": 19.698254364089777, "grad_norm": 9.908628463745117, "learning_rate": 3.1022443890274314e-07, "loss": 0.3296, "step": 78990 }, { "epoch": 19.70074812967581, "grad_norm": 8.319961547851562, "learning_rate": 3.077306733167083e-07, "loss": 0.3183, "step": 79000 }, { "epoch": 19.703241895261847, "grad_norm": 9.598522186279297, "learning_rate": 3.0523690773067333e-07, "loss": 0.3059, "step": 79010 }, { "epoch": 19.705735660847882, "grad_norm": 7.940982341766357, "learning_rate": 3.0274314214463843e-07, "loss": 0.385, "step": 79020 }, { "epoch": 19.708229426433917, "grad_norm": 9.714994430541992, "learning_rate": 3.0024937655860353e-07, "loss": 0.3216, "step": 79030 }, { "epoch": 19.71072319201995, "grad_norm": 11.18153190612793, "learning_rate": 2.9775561097256863e-07, "loss": 0.3392, "step": 79040 }, { "epoch": 19.713216957605987, "grad_norm": 8.196310043334961, "learning_rate": 2.9526184538653367e-07, "loss": 0.277, "step": 79050 }, { "epoch": 19.71571072319202, "grad_norm": 12.826026916503906, "learning_rate": 2.9276807980049877e-07, "loss": 0.3442, "step": 79060 }, { "epoch": 19.718204488778056, "grad_norm": 14.502406120300293, "learning_rate": 2.9027431421446387e-07, "loss": 0.3195, "step": 79070 }, { "epoch": 19.72069825436409, "grad_norm": 10.214339256286621, "learning_rate": 2.8778054862842897e-07, "loss": 0.2983, "step": 79080 }, { "epoch": 19.723192019950126, "grad_norm": 9.869833946228027, "learning_rate": 2.8528678304239406e-07, "loss": 0.3399, "step": 79090 }, { "epoch": 19.72568578553616, "grad_norm": 10.579812049865723, "learning_rate": 2.827930174563591e-07, "loss": 0.2615, "step": 79100 }, { "epoch": 19.728179551122196, "grad_norm": 9.650101661682129, "learning_rate": 2.802992518703242e-07, "loss": 0.2366, "step": 79110 }, { "epoch": 19.73067331670823, "grad_norm": 7.501360893249512, "learning_rate": 2.778054862842893e-07, "loss": 0.2624, "step": 79120 }, { "epoch": 19.733167082294266, "grad_norm": 7.877026081085205, "learning_rate": 2.753117206982544e-07, "loss": 0.302, "step": 79130 }, { "epoch": 19.7356608478803, "grad_norm": 9.600006103515625, "learning_rate": 2.7281795511221945e-07, "loss": 0.3465, "step": 79140 }, { "epoch": 19.738154613466335, "grad_norm": 8.802675247192383, "learning_rate": 2.703241895261846e-07, "loss": 0.2923, "step": 79150 }, { "epoch": 19.74064837905237, "grad_norm": 16.73734474182129, "learning_rate": 2.6783042394014964e-07, "loss": 0.3215, "step": 79160 }, { "epoch": 19.743142144638405, "grad_norm": 13.215644836425781, "learning_rate": 2.6533665835411474e-07, "loss": 0.3195, "step": 79170 }, { "epoch": 19.74563591022444, "grad_norm": 6.847998142242432, "learning_rate": 2.6284289276807984e-07, "loss": 0.3243, "step": 79180 }, { "epoch": 19.748129675810475, "grad_norm": 8.965968132019043, "learning_rate": 2.6034912718204494e-07, "loss": 0.3299, "step": 79190 }, { "epoch": 19.75062344139651, "grad_norm": 8.921945571899414, "learning_rate": 2.5785536159601e-07, "loss": 0.3414, "step": 79200 }, { "epoch": 19.753117206982544, "grad_norm": 8.51665210723877, "learning_rate": 2.553615960099751e-07, "loss": 0.3682, "step": 79210 }, { "epoch": 19.75561097256858, "grad_norm": 9.166069984436035, "learning_rate": 2.528678304239402e-07, "loss": 0.3229, "step": 79220 }, { "epoch": 19.758104738154614, "grad_norm": 16.574934005737305, "learning_rate": 2.503740648379053e-07, "loss": 0.3596, "step": 79230 }, { "epoch": 19.76059850374065, "grad_norm": 8.440783500671387, "learning_rate": 2.478802992518703e-07, "loss": 0.259, "step": 79240 }, { "epoch": 19.763092269326684, "grad_norm": 10.631123542785645, "learning_rate": 2.453865336658354e-07, "loss": 0.3265, "step": 79250 }, { "epoch": 19.76558603491272, "grad_norm": 10.455692291259766, "learning_rate": 2.428927680798005e-07, "loss": 0.3613, "step": 79260 }, { "epoch": 19.768079800498754, "grad_norm": 9.9364013671875, "learning_rate": 2.403990024937656e-07, "loss": 0.3156, "step": 79270 }, { "epoch": 19.77057356608479, "grad_norm": 8.415620803833008, "learning_rate": 2.3790523690773071e-07, "loss": 0.3035, "step": 79280 }, { "epoch": 19.773067331670823, "grad_norm": 9.595970153808594, "learning_rate": 2.3541147132169579e-07, "loss": 0.2897, "step": 79290 }, { "epoch": 19.77556109725686, "grad_norm": 8.665029525756836, "learning_rate": 2.3291770573566086e-07, "loss": 0.4021, "step": 79300 }, { "epoch": 19.778054862842893, "grad_norm": 8.567267417907715, "learning_rate": 2.3042394014962595e-07, "loss": 0.3056, "step": 79310 }, { "epoch": 19.780548628428928, "grad_norm": 12.760359764099121, "learning_rate": 2.2793017456359103e-07, "loss": 0.3164, "step": 79320 }, { "epoch": 19.783042394014963, "grad_norm": 11.60568904876709, "learning_rate": 2.2543640897755612e-07, "loss": 0.3371, "step": 79330 }, { "epoch": 19.785536159600998, "grad_norm": 8.413047790527344, "learning_rate": 2.2294264339152122e-07, "loss": 0.312, "step": 79340 }, { "epoch": 19.788029925187033, "grad_norm": 9.830360412597656, "learning_rate": 2.204488778054863e-07, "loss": 0.3139, "step": 79350 }, { "epoch": 19.790523690773068, "grad_norm": 9.22851848602295, "learning_rate": 2.1795511221945137e-07, "loss": 0.3348, "step": 79360 }, { "epoch": 19.793017456359102, "grad_norm": 9.71314811706543, "learning_rate": 2.154613466334165e-07, "loss": 0.4009, "step": 79370 }, { "epoch": 19.795511221945137, "grad_norm": 12.098420143127441, "learning_rate": 2.1296758104738156e-07, "loss": 0.2935, "step": 79380 }, { "epoch": 19.798004987531172, "grad_norm": 11.904885292053223, "learning_rate": 2.1047381546134663e-07, "loss": 0.3176, "step": 79390 }, { "epoch": 19.800498753117207, "grad_norm": 10.853218078613281, "learning_rate": 2.0798004987531176e-07, "loss": 0.3419, "step": 79400 }, { "epoch": 19.802992518703242, "grad_norm": 10.429359436035156, "learning_rate": 2.0548628428927683e-07, "loss": 0.28, "step": 79410 }, { "epoch": 19.805486284289277, "grad_norm": 6.893282413482666, "learning_rate": 2.029925187032419e-07, "loss": 0.2803, "step": 79420 }, { "epoch": 19.80798004987531, "grad_norm": 17.683412551879883, "learning_rate": 2.0049875311720702e-07, "loss": 0.3656, "step": 79430 }, { "epoch": 19.810473815461346, "grad_norm": 6.478290557861328, "learning_rate": 1.980049875311721e-07, "loss": 0.3119, "step": 79440 }, { "epoch": 19.81296758104738, "grad_norm": 11.704423904418945, "learning_rate": 1.9551122194513717e-07, "loss": 0.4239, "step": 79450 }, { "epoch": 19.815461346633416, "grad_norm": 10.740715980529785, "learning_rate": 1.9301745635910227e-07, "loss": 0.3632, "step": 79460 }, { "epoch": 19.81795511221945, "grad_norm": 7.998754501342773, "learning_rate": 1.9052369077306736e-07, "loss": 0.3205, "step": 79470 }, { "epoch": 19.820448877805486, "grad_norm": 12.84117317199707, "learning_rate": 1.8802992518703243e-07, "loss": 0.322, "step": 79480 }, { "epoch": 19.82294264339152, "grad_norm": 11.327634811401367, "learning_rate": 1.8553615960099753e-07, "loss": 0.3062, "step": 79490 }, { "epoch": 19.825436408977556, "grad_norm": 16.317493438720703, "learning_rate": 1.830423940149626e-07, "loss": 0.3968, "step": 79500 }, { "epoch": 19.82793017456359, "grad_norm": 9.288350105285645, "learning_rate": 1.8054862842892768e-07, "loss": 0.2814, "step": 79510 }, { "epoch": 19.830423940149625, "grad_norm": 6.440745830535889, "learning_rate": 1.780548628428928e-07, "loss": 0.2889, "step": 79520 }, { "epoch": 19.83291770573566, "grad_norm": 7.100279331207275, "learning_rate": 1.7556109725685787e-07, "loss": 0.2817, "step": 79530 }, { "epoch": 19.835411471321695, "grad_norm": 8.271157264709473, "learning_rate": 1.7306733167082294e-07, "loss": 0.2249, "step": 79540 }, { "epoch": 19.83790523690773, "grad_norm": 7.640126705169678, "learning_rate": 1.7057356608478801e-07, "loss": 0.3156, "step": 79550 }, { "epoch": 19.840399002493765, "grad_norm": 11.971923828125, "learning_rate": 1.6807980049875314e-07, "loss": 0.3263, "step": 79560 }, { "epoch": 19.8428927680798, "grad_norm": 6.482527256011963, "learning_rate": 1.655860349127182e-07, "loss": 0.2904, "step": 79570 }, { "epoch": 19.845386533665835, "grad_norm": 7.9649200439453125, "learning_rate": 1.6309226932668328e-07, "loss": 0.309, "step": 79580 }, { "epoch": 19.84788029925187, "grad_norm": 9.517223358154297, "learning_rate": 1.605985037406484e-07, "loss": 0.3661, "step": 79590 }, { "epoch": 19.850374064837904, "grad_norm": 16.75897216796875, "learning_rate": 1.5810473815461348e-07, "loss": 0.3179, "step": 79600 }, { "epoch": 19.85286783042394, "grad_norm": 7.259194850921631, "learning_rate": 1.5561097256857858e-07, "loss": 0.313, "step": 79610 }, { "epoch": 19.855361596009974, "grad_norm": 6.734643936157227, "learning_rate": 1.5311720698254367e-07, "loss": 0.3129, "step": 79620 }, { "epoch": 19.85785536159601, "grad_norm": 8.320770263671875, "learning_rate": 1.5062344139650875e-07, "loss": 0.2911, "step": 79630 }, { "epoch": 19.860349127182044, "grad_norm": 9.448941230773926, "learning_rate": 1.4812967581047384e-07, "loss": 0.3353, "step": 79640 }, { "epoch": 19.86284289276808, "grad_norm": 7.158919811248779, "learning_rate": 1.4563591022443891e-07, "loss": 0.3327, "step": 79650 }, { "epoch": 19.865336658354114, "grad_norm": 6.863051891326904, "learning_rate": 1.43142144638404e-07, "loss": 0.2756, "step": 79660 }, { "epoch": 19.86783042394015, "grad_norm": 9.206826210021973, "learning_rate": 1.4064837905236908e-07, "loss": 0.3184, "step": 79670 }, { "epoch": 19.870324189526183, "grad_norm": 9.977852821350098, "learning_rate": 1.3815461346633418e-07, "loss": 0.3783, "step": 79680 }, { "epoch": 19.872817955112218, "grad_norm": 8.145519256591797, "learning_rate": 1.3566084788029925e-07, "loss": 0.3138, "step": 79690 }, { "epoch": 19.875311720698253, "grad_norm": 10.240920066833496, "learning_rate": 1.3316708229426435e-07, "loss": 0.3352, "step": 79700 }, { "epoch": 19.877805486284288, "grad_norm": 4.372189998626709, "learning_rate": 1.3067331670822942e-07, "loss": 0.3623, "step": 79710 }, { "epoch": 19.880299251870323, "grad_norm": 8.158557891845703, "learning_rate": 1.2817955112219452e-07, "loss": 0.3992, "step": 79720 }, { "epoch": 19.882793017456358, "grad_norm": 9.305794715881348, "learning_rate": 1.2568578553615962e-07, "loss": 0.3528, "step": 79730 }, { "epoch": 19.885286783042392, "grad_norm": 8.110395431518555, "learning_rate": 1.231920199501247e-07, "loss": 0.3128, "step": 79740 }, { "epoch": 19.887780548628427, "grad_norm": 7.968203067779541, "learning_rate": 1.206982543640898e-07, "loss": 0.3007, "step": 79750 }, { "epoch": 19.890274314214462, "grad_norm": 7.759973526000977, "learning_rate": 1.1820448877805489e-07, "loss": 0.2825, "step": 79760 }, { "epoch": 19.892768079800497, "grad_norm": 10.173065185546875, "learning_rate": 1.1571072319201996e-07, "loss": 0.411, "step": 79770 }, { "epoch": 19.895261845386532, "grad_norm": 7.4826555252075195, "learning_rate": 1.1321695760598506e-07, "loss": 0.2562, "step": 79780 }, { "epoch": 19.897755610972567, "grad_norm": 9.044584274291992, "learning_rate": 1.1072319201995014e-07, "loss": 0.3396, "step": 79790 }, { "epoch": 19.900249376558605, "grad_norm": 9.79257583618164, "learning_rate": 1.0822942643391521e-07, "loss": 0.323, "step": 79800 }, { "epoch": 19.902743142144637, "grad_norm": 10.325493812561035, "learning_rate": 1.0573566084788031e-07, "loss": 0.2948, "step": 79810 }, { "epoch": 19.905236907730675, "grad_norm": 8.43407917022705, "learning_rate": 1.0324189526184538e-07, "loss": 0.2562, "step": 79820 }, { "epoch": 19.90773067331671, "grad_norm": 9.039451599121094, "learning_rate": 1.0074812967581048e-07, "loss": 0.319, "step": 79830 }, { "epoch": 19.910224438902745, "grad_norm": 5.012162208557129, "learning_rate": 9.825436408977558e-08, "loss": 0.2856, "step": 79840 }, { "epoch": 19.91271820448878, "grad_norm": 6.816104412078857, "learning_rate": 9.576059850374065e-08, "loss": 0.2679, "step": 79850 }, { "epoch": 19.915211970074814, "grad_norm": 12.247861862182617, "learning_rate": 9.326683291770575e-08, "loss": 0.3458, "step": 79860 }, { "epoch": 19.91770573566085, "grad_norm": 6.4964118003845215, "learning_rate": 9.077306733167083e-08, "loss": 0.3334, "step": 79870 }, { "epoch": 19.920199501246884, "grad_norm": 9.710594177246094, "learning_rate": 8.827930174563592e-08, "loss": 0.3469, "step": 79880 }, { "epoch": 19.92269326683292, "grad_norm": 12.090693473815918, "learning_rate": 8.5785536159601e-08, "loss": 0.3702, "step": 79890 }, { "epoch": 19.925187032418954, "grad_norm": 8.259521484375, "learning_rate": 8.32917705735661e-08, "loss": 0.3142, "step": 79900 }, { "epoch": 19.92768079800499, "grad_norm": 10.591052055358887, "learning_rate": 8.079800498753117e-08, "loss": 0.3415, "step": 79910 }, { "epoch": 19.930174563591024, "grad_norm": 8.798907279968262, "learning_rate": 7.830423940149627e-08, "loss": 0.2823, "step": 79920 }, { "epoch": 19.93266832917706, "grad_norm": 6.978452682495117, "learning_rate": 7.581047381546135e-08, "loss": 0.3098, "step": 79930 }, { "epoch": 19.935162094763093, "grad_norm": 8.613764762878418, "learning_rate": 7.331670822942644e-08, "loss": 0.3456, "step": 79940 }, { "epoch": 19.93765586034913, "grad_norm": 9.246316909790039, "learning_rate": 7.082294264339154e-08, "loss": 0.2926, "step": 79950 }, { "epoch": 19.940149625935163, "grad_norm": 6.40701961517334, "learning_rate": 6.832917705735662e-08, "loss": 0.4304, "step": 79960 }, { "epoch": 19.942643391521198, "grad_norm": 9.808752059936523, "learning_rate": 6.58354114713217e-08, "loss": 0.3531, "step": 79970 }, { "epoch": 19.945137157107233, "grad_norm": 10.570954322814941, "learning_rate": 6.334164588528679e-08, "loss": 0.304, "step": 79980 }, { "epoch": 19.947630922693268, "grad_norm": 6.992794036865234, "learning_rate": 6.084788029925187e-08, "loss": 0.333, "step": 79990 }, { "epoch": 19.950124688279303, "grad_norm": 6.577144145965576, "learning_rate": 5.835411471321696e-08, "loss": 0.2929, "step": 80000 }, { "epoch": 19.952618453865338, "grad_norm": 8.778485298156738, "learning_rate": 5.5860349127182044e-08, "loss": 0.3606, "step": 80010 }, { "epoch": 19.955112219451372, "grad_norm": 6.685991287231445, "learning_rate": 5.336658354114714e-08, "loss": 0.3066, "step": 80020 }, { "epoch": 19.957605985037407, "grad_norm": 11.452533721923828, "learning_rate": 5.087281795511222e-08, "loss": 0.284, "step": 80030 }, { "epoch": 19.960099750623442, "grad_norm": 9.021291732788086, "learning_rate": 4.8379052369077305e-08, "loss": 0.3149, "step": 80040 }, { "epoch": 19.962593516209477, "grad_norm": 8.216629981994629, "learning_rate": 4.58852867830424e-08, "loss": 0.3321, "step": 80050 }, { "epoch": 19.965087281795512, "grad_norm": 9.343437194824219, "learning_rate": 4.339152119700749e-08, "loss": 0.325, "step": 80060 }, { "epoch": 19.967581047381547, "grad_norm": 7.705197811126709, "learning_rate": 4.089775561097257e-08, "loss": 0.3533, "step": 80070 }, { "epoch": 19.97007481296758, "grad_norm": 8.739502906799316, "learning_rate": 3.840399002493766e-08, "loss": 0.3282, "step": 80080 }, { "epoch": 19.972568578553616, "grad_norm": 10.272692680358887, "learning_rate": 3.591022443890275e-08, "loss": 0.3355, "step": 80090 }, { "epoch": 19.97506234413965, "grad_norm": 7.91961145401001, "learning_rate": 3.341645885286783e-08, "loss": 0.342, "step": 80100 }, { "epoch": 19.977556109725686, "grad_norm": 7.7013936042785645, "learning_rate": 3.0922693266832924e-08, "loss": 0.3139, "step": 80110 }, { "epoch": 19.98004987531172, "grad_norm": 8.244073867797852, "learning_rate": 2.8428927680798006e-08, "loss": 0.3602, "step": 80120 }, { "epoch": 19.982543640897756, "grad_norm": 9.814148902893066, "learning_rate": 2.5935162094763094e-08, "loss": 0.3641, "step": 80130 }, { "epoch": 19.98503740648379, "grad_norm": 7.1216020584106445, "learning_rate": 2.344139650872818e-08, "loss": 0.326, "step": 80140 }, { "epoch": 19.987531172069826, "grad_norm": 9.763239860534668, "learning_rate": 2.094763092269327e-08, "loss": 0.3333, "step": 80150 }, { "epoch": 19.99002493765586, "grad_norm": 7.955997943878174, "learning_rate": 1.8453865336658355e-08, "loss": 0.3693, "step": 80160 }, { "epoch": 19.992518703241895, "grad_norm": 8.751893997192383, "learning_rate": 1.5960099750623443e-08, "loss": 0.3065, "step": 80170 }, { "epoch": 19.99501246882793, "grad_norm": 11.452372550964355, "learning_rate": 1.346633416458853e-08, "loss": 0.3088, "step": 80180 }, { "epoch": 19.997506234413965, "grad_norm": 7.998083591461182, "learning_rate": 1.0972568578553617e-08, "loss": 0.2875, "step": 80190 }, { "epoch": 20.0, "grad_norm": 9.69516372680664, "learning_rate": 8.478802992518703e-09, "loss": 0.3132, "step": 80200 } ], "logging_steps": 10, "max_steps": 80200, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.9992048140288e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }