{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.399936035819941, "eval_steps": 500, "global_step": 45024, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005330348338263906, "grad_norm": 22.384035169953112, "learning_rate": 1.0660980810234541e-08, "loss": 0.8117, "step": 10 }, { "epoch": 0.0010660696676527812, "grad_norm": 12.56156999016183, "learning_rate": 2.1321961620469082e-08, "loss": 0.8163, "step": 20 }, { "epoch": 0.0015991045014791716, "grad_norm": 33.76307089269726, "learning_rate": 3.1982942430703625e-08, "loss": 0.7923, "step": 30 }, { "epoch": 0.0021321393353055623, "grad_norm": 16.83726744730983, "learning_rate": 4.2643923240938164e-08, "loss": 0.8066, "step": 40 }, { "epoch": 0.002665174169131953, "grad_norm": 49.634523742971666, "learning_rate": 5.330490405117271e-08, "loss": 0.7986, "step": 50 }, { "epoch": 0.0031982090029583432, "grad_norm": 13.026076869540546, "learning_rate": 6.396588486140725e-08, "loss": 0.8245, "step": 60 }, { "epoch": 0.003731243836784734, "grad_norm": 38.37983400304547, "learning_rate": 7.46268656716418e-08, "loss": 0.8151, "step": 70 }, { "epoch": 0.004264278670611125, "grad_norm": 24.292803186984795, "learning_rate": 8.528784648187633e-08, "loss": 0.8024, "step": 80 }, { "epoch": 0.004797313504437515, "grad_norm": 20.63481080073996, "learning_rate": 9.594882729211088e-08, "loss": 0.8165, "step": 90 }, { "epoch": 0.005330348338263906, "grad_norm": 8.059503536005575, "learning_rate": 1.0660980810234542e-07, "loss": 0.795, "step": 100 }, { "epoch": 0.005863383172090296, "grad_norm": 5.470117214566169, "learning_rate": 1.1727078891257995e-07, "loss": 0.7837, "step": 110 }, { "epoch": 0.0063964180059166865, "grad_norm": 67.1429137488128, "learning_rate": 1.279317697228145e-07, "loss": 0.7636, "step": 120 }, { "epoch": 0.006929452839743077, "grad_norm": 27.57553604148236, "learning_rate": 1.3859275053304905e-07, "loss": 0.7615, "step": 130 }, { "epoch": 0.007462487673569468, "grad_norm": 96.95190578421065, "learning_rate": 1.492537313432836e-07, "loss": 0.7654, "step": 140 }, { "epoch": 0.007995522507395858, "grad_norm": 6.513880444574477, "learning_rate": 1.5991471215351813e-07, "loss": 0.7314, "step": 150 }, { "epoch": 0.00852855734122225, "grad_norm": 17.34658807824645, "learning_rate": 1.7057569296375266e-07, "loss": 0.7413, "step": 160 }, { "epoch": 0.009061592175048639, "grad_norm": 4.68951448214306, "learning_rate": 1.812366737739872e-07, "loss": 0.7237, "step": 170 }, { "epoch": 0.00959462700887503, "grad_norm": 31.904806594531866, "learning_rate": 1.9189765458422176e-07, "loss": 0.6961, "step": 180 }, { "epoch": 0.01012766184270142, "grad_norm": 8.610528268372763, "learning_rate": 2.0255863539445632e-07, "loss": 0.7138, "step": 190 }, { "epoch": 0.010660696676527812, "grad_norm": 27.407492655186093, "learning_rate": 2.1321961620469084e-07, "loss": 0.7031, "step": 200 }, { "epoch": 0.011193731510354202, "grad_norm": 15.23960798334783, "learning_rate": 2.2388059701492537e-07, "loss": 0.6725, "step": 210 }, { "epoch": 0.011726766344180592, "grad_norm": 14.339840399958527, "learning_rate": 2.345415778251599e-07, "loss": 0.7094, "step": 220 }, { "epoch": 0.012259801178006983, "grad_norm": 40.257564402060794, "learning_rate": 2.4520255863539447e-07, "loss": 0.6648, "step": 230 }, { "epoch": 0.012792836011833373, "grad_norm": 38.12287684104614, "learning_rate": 2.55863539445629e-07, "loss": 0.6624, "step": 240 }, { "epoch": 0.013325870845659765, "grad_norm": 16.667179938633755, "learning_rate": 2.665245202558635e-07, "loss": 0.6578, "step": 250 }, { "epoch": 0.013858905679486154, "grad_norm": 30.69626401265156, "learning_rate": 2.771855010660981e-07, "loss": 0.6452, "step": 260 }, { "epoch": 0.014391940513312544, "grad_norm": 53.91940567739014, "learning_rate": 2.8784648187633263e-07, "loss": 0.679, "step": 270 }, { "epoch": 0.014924975347138936, "grad_norm": 6.14720521465269, "learning_rate": 2.985074626865672e-07, "loss": 0.6497, "step": 280 }, { "epoch": 0.015458010180965326, "grad_norm": 37.24542287618424, "learning_rate": 3.0916844349680174e-07, "loss": 0.6759, "step": 290 }, { "epoch": 0.015991045014791715, "grad_norm": 18.31377801368936, "learning_rate": 3.1982942430703626e-07, "loss": 0.6243, "step": 300 }, { "epoch": 0.016524079848618107, "grad_norm": 8.048409639692276, "learning_rate": 3.304904051172708e-07, "loss": 0.6416, "step": 310 }, { "epoch": 0.0170571146824445, "grad_norm": 18.265003051468547, "learning_rate": 3.411513859275053e-07, "loss": 0.6474, "step": 320 }, { "epoch": 0.01759014951627089, "grad_norm": 22.044928666366484, "learning_rate": 3.518123667377399e-07, "loss": 0.6612, "step": 330 }, { "epoch": 0.018123184350097278, "grad_norm": 6.29550754706564, "learning_rate": 3.624733475479744e-07, "loss": 0.6526, "step": 340 }, { "epoch": 0.01865621918392367, "grad_norm": 23.655194210354356, "learning_rate": 3.73134328358209e-07, "loss": 0.6335, "step": 350 }, { "epoch": 0.01918925401775006, "grad_norm": 18.457694770192315, "learning_rate": 3.837953091684435e-07, "loss": 0.6215, "step": 360 }, { "epoch": 0.01972228885157645, "grad_norm": 3.1958736725407744, "learning_rate": 3.9445628997867805e-07, "loss": 0.6317, "step": 370 }, { "epoch": 0.02025532368540284, "grad_norm": 19.973374225574325, "learning_rate": 4.0511727078891263e-07, "loss": 0.618, "step": 380 }, { "epoch": 0.020788358519229232, "grad_norm": 6.883229999182656, "learning_rate": 4.157782515991471e-07, "loss": 0.6513, "step": 390 }, { "epoch": 0.021321393353055624, "grad_norm": 34.58236951166425, "learning_rate": 4.264392324093817e-07, "loss": 0.6385, "step": 400 }, { "epoch": 0.021854428186882012, "grad_norm": 4.896875627926621, "learning_rate": 4.371002132196162e-07, "loss": 0.6121, "step": 410 }, { "epoch": 0.022387463020708404, "grad_norm": 9.571937291164867, "learning_rate": 4.4776119402985074e-07, "loss": 0.6372, "step": 420 }, { "epoch": 0.022920497854534795, "grad_norm": 7.7820482020663215, "learning_rate": 4.584221748400853e-07, "loss": 0.6205, "step": 430 }, { "epoch": 0.023453532688361183, "grad_norm": 17.09258095859951, "learning_rate": 4.690831556503198e-07, "loss": 0.6497, "step": 440 }, { "epoch": 0.023986567522187575, "grad_norm": 15.810404773206715, "learning_rate": 4.797441364605544e-07, "loss": 0.6007, "step": 450 }, { "epoch": 0.024519602356013966, "grad_norm": 2.4769181005915932, "learning_rate": 4.904051172707889e-07, "loss": 0.6212, "step": 460 }, { "epoch": 0.025052637189840354, "grad_norm": 2.7466335080586206, "learning_rate": 5.010660980810235e-07, "loss": 0.6024, "step": 470 }, { "epoch": 0.025585672023666746, "grad_norm": 7.819298025658019, "learning_rate": 5.11727078891258e-07, "loss": 0.6024, "step": 480 }, { "epoch": 0.026118706857493137, "grad_norm": 11.993606063767926, "learning_rate": 5.223880597014926e-07, "loss": 0.618, "step": 490 }, { "epoch": 0.02665174169131953, "grad_norm": 6.092448929136046, "learning_rate": 5.33049040511727e-07, "loss": 0.6181, "step": 500 }, { "epoch": 0.027184776525145917, "grad_norm": 8.52801558890943, "learning_rate": 5.437100213219616e-07, "loss": 0.608, "step": 510 }, { "epoch": 0.02771781135897231, "grad_norm": 3.4101231810925174, "learning_rate": 5.543710021321962e-07, "loss": 0.6033, "step": 520 }, { "epoch": 0.0282508461927987, "grad_norm": 17.453953167558357, "learning_rate": 5.650319829424307e-07, "loss": 0.586, "step": 530 }, { "epoch": 0.02878388102662509, "grad_norm": 3.7853039467463434, "learning_rate": 5.756929637526653e-07, "loss": 0.5761, "step": 540 }, { "epoch": 0.02931691586045148, "grad_norm": 19.26469192065809, "learning_rate": 5.863539445628998e-07, "loss": 0.6047, "step": 550 }, { "epoch": 0.02984995069427787, "grad_norm": 4.660799518343717, "learning_rate": 5.970149253731344e-07, "loss": 0.5951, "step": 560 }, { "epoch": 0.030382985528104263, "grad_norm": 11.724266047364479, "learning_rate": 6.076759061833688e-07, "loss": 0.6186, "step": 570 }, { "epoch": 0.03091602036193065, "grad_norm": 12.045147025901208, "learning_rate": 6.183368869936035e-07, "loss": 0.5995, "step": 580 }, { "epoch": 0.031449055195757046, "grad_norm": 6.611619546789754, "learning_rate": 6.28997867803838e-07, "loss": 0.5916, "step": 590 }, { "epoch": 0.03198209002958343, "grad_norm": 2.6668492445044536, "learning_rate": 6.396588486140725e-07, "loss": 0.5986, "step": 600 }, { "epoch": 0.03251512486340982, "grad_norm": 2.409130712574357, "learning_rate": 6.50319829424307e-07, "loss": 0.5943, "step": 610 }, { "epoch": 0.033048159697236214, "grad_norm": 7.3172633211536615, "learning_rate": 6.609808102345416e-07, "loss": 0.5985, "step": 620 }, { "epoch": 0.033581194531062605, "grad_norm": 2.5021776516635104, "learning_rate": 6.716417910447762e-07, "loss": 0.5688, "step": 630 }, { "epoch": 0.034114229364889, "grad_norm": 3.092066217506789, "learning_rate": 6.823027718550106e-07, "loss": 0.5968, "step": 640 }, { "epoch": 0.03464726419871539, "grad_norm": 2.5459200872949483, "learning_rate": 6.929637526652453e-07, "loss": 0.5908, "step": 650 }, { "epoch": 0.03518029903254178, "grad_norm": 6.494246936455836, "learning_rate": 7.036247334754798e-07, "loss": 0.5744, "step": 660 }, { "epoch": 0.035713333866368165, "grad_norm": 4.162093112846186, "learning_rate": 7.142857142857143e-07, "loss": 0.5924, "step": 670 }, { "epoch": 0.036246368700194556, "grad_norm": 6.283815107667488, "learning_rate": 7.249466950959488e-07, "loss": 0.5918, "step": 680 }, { "epoch": 0.03677940353402095, "grad_norm": 13.747666320600384, "learning_rate": 7.356076759061834e-07, "loss": 0.5875, "step": 690 }, { "epoch": 0.03731243836784734, "grad_norm": 6.495457820303132, "learning_rate": 7.46268656716418e-07, "loss": 0.5655, "step": 700 }, { "epoch": 0.03784547320167373, "grad_norm": 2.6123117632996795, "learning_rate": 7.569296375266524e-07, "loss": 0.5656, "step": 710 }, { "epoch": 0.03837850803550012, "grad_norm": 2.169732322924637, "learning_rate": 7.67590618336887e-07, "loss": 0.5554, "step": 720 }, { "epoch": 0.038911542869326514, "grad_norm": 2.3438244121416956, "learning_rate": 7.782515991471216e-07, "loss": 0.5791, "step": 730 }, { "epoch": 0.0394445777031529, "grad_norm": 2.6726563129303824, "learning_rate": 7.889125799573561e-07, "loss": 0.575, "step": 740 }, { "epoch": 0.03997761253697929, "grad_norm": 2.8732249315321723, "learning_rate": 7.995735607675906e-07, "loss": 0.5628, "step": 750 }, { "epoch": 0.04051064737080568, "grad_norm": 1.90982824669672, "learning_rate": 8.102345415778253e-07, "loss": 0.5526, "step": 760 }, { "epoch": 0.04104368220463207, "grad_norm": 6.766200067447573, "learning_rate": 8.208955223880597e-07, "loss": 0.572, "step": 770 }, { "epoch": 0.041576717038458465, "grad_norm": 9.412682209911177, "learning_rate": 8.315565031982942e-07, "loss": 0.5702, "step": 780 }, { "epoch": 0.042109751872284856, "grad_norm": 6.025336896305503, "learning_rate": 8.422174840085288e-07, "loss": 0.5465, "step": 790 }, { "epoch": 0.04264278670611125, "grad_norm": 1.9706615562451428, "learning_rate": 8.528784648187634e-07, "loss": 0.5568, "step": 800 }, { "epoch": 0.04317582153993763, "grad_norm": 2.939411355491427, "learning_rate": 8.635394456289978e-07, "loss": 0.5502, "step": 810 }, { "epoch": 0.043708856373764024, "grad_norm": 5.142970989416449, "learning_rate": 8.742004264392324e-07, "loss": 0.5517, "step": 820 }, { "epoch": 0.044241891207590416, "grad_norm": 3.053201788230296, "learning_rate": 8.84861407249467e-07, "loss": 0.5663, "step": 830 }, { "epoch": 0.04477492604141681, "grad_norm": 1.8449978083615863, "learning_rate": 8.955223880597015e-07, "loss": 0.5371, "step": 840 }, { "epoch": 0.0453079608752432, "grad_norm": 3.952962229474722, "learning_rate": 9.06183368869936e-07, "loss": 0.5518, "step": 850 }, { "epoch": 0.04584099570906959, "grad_norm": 2.3114170550600517, "learning_rate": 9.168443496801706e-07, "loss": 0.5339, "step": 860 }, { "epoch": 0.046374030542895975, "grad_norm": 2.2912760175353872, "learning_rate": 9.275053304904052e-07, "loss": 0.5326, "step": 870 }, { "epoch": 0.046907065376722366, "grad_norm": 4.298637664764211, "learning_rate": 9.381663113006396e-07, "loss": 0.5602, "step": 880 }, { "epoch": 0.04744010021054876, "grad_norm": 2.208073245907923, "learning_rate": 9.488272921108742e-07, "loss": 0.5555, "step": 890 }, { "epoch": 0.04797313504437515, "grad_norm": 2.0989454369450793, "learning_rate": 9.594882729211088e-07, "loss": 0.5431, "step": 900 }, { "epoch": 0.04850616987820154, "grad_norm": 4.868043085397458, "learning_rate": 9.701492537313434e-07, "loss": 0.5554, "step": 910 }, { "epoch": 0.04903920471202793, "grad_norm": 2.163101936478925, "learning_rate": 9.808102345415779e-07, "loss": 0.5575, "step": 920 }, { "epoch": 0.049572239545854324, "grad_norm": 2.5780084223910076, "learning_rate": 9.914712153518124e-07, "loss": 0.537, "step": 930 }, { "epoch": 0.05010527437968071, "grad_norm": 7.951366322245886, "learning_rate": 1.002132196162047e-06, "loss": 0.5213, "step": 940 }, { "epoch": 0.0506383092135071, "grad_norm": 4.603313864051097, "learning_rate": 1.0127931769722815e-06, "loss": 0.5477, "step": 950 }, { "epoch": 0.05117134404733349, "grad_norm": 2.8636243672532316, "learning_rate": 1.023454157782516e-06, "loss": 0.5342, "step": 960 }, { "epoch": 0.05170437888115988, "grad_norm": 3.0321492345169703, "learning_rate": 1.0341151385927505e-06, "loss": 0.55, "step": 970 }, { "epoch": 0.052237413714986275, "grad_norm": 9.474189998020558, "learning_rate": 1.0447761194029853e-06, "loss": 0.5481, "step": 980 }, { "epoch": 0.052770448548812667, "grad_norm": 2.8402495149841402, "learning_rate": 1.0554371002132196e-06, "loss": 0.5358, "step": 990 }, { "epoch": 0.05330348338263906, "grad_norm": 2.83582514386478, "learning_rate": 1.066098081023454e-06, "loss": 0.522, "step": 1000 }, { "epoch": 0.05383651821646544, "grad_norm": 2.8322815447404937, "learning_rate": 1.0767590618336888e-06, "loss": 0.5339, "step": 1010 }, { "epoch": 0.054369553050291834, "grad_norm": 2.673931322281884, "learning_rate": 1.0874200426439232e-06, "loss": 0.5337, "step": 1020 }, { "epoch": 0.054902587884118226, "grad_norm": 1.984491606409298, "learning_rate": 1.0980810234541577e-06, "loss": 0.5238, "step": 1030 }, { "epoch": 0.05543562271794462, "grad_norm": 2.0699566932848374, "learning_rate": 1.1087420042643924e-06, "loss": 0.5281, "step": 1040 }, { "epoch": 0.05596865755177101, "grad_norm": 2.152694343376385, "learning_rate": 1.119402985074627e-06, "loss": 0.5291, "step": 1050 }, { "epoch": 0.0565016923855974, "grad_norm": 2.3181371881447985, "learning_rate": 1.1300639658848615e-06, "loss": 0.5341, "step": 1060 }, { "epoch": 0.05703472721942379, "grad_norm": 2.9679916174369407, "learning_rate": 1.140724946695096e-06, "loss": 0.5279, "step": 1070 }, { "epoch": 0.05756776205325018, "grad_norm": 1.6467045554688482, "learning_rate": 1.1513859275053305e-06, "loss": 0.5259, "step": 1080 }, { "epoch": 0.05810079688707657, "grad_norm": 2.9173913321251193, "learning_rate": 1.162046908315565e-06, "loss": 0.5326, "step": 1090 }, { "epoch": 0.05863383172090296, "grad_norm": 1.9327154653150131, "learning_rate": 1.1727078891257996e-06, "loss": 0.5444, "step": 1100 }, { "epoch": 0.05916686655472935, "grad_norm": 2.053579315100549, "learning_rate": 1.183368869936034e-06, "loss": 0.5328, "step": 1110 }, { "epoch": 0.05969990138855574, "grad_norm": 1.8406327414695598, "learning_rate": 1.1940298507462688e-06, "loss": 0.5179, "step": 1120 }, { "epoch": 0.060232936222382134, "grad_norm": 2.3056097145291674, "learning_rate": 1.2046908315565032e-06, "loss": 0.5204, "step": 1130 }, { "epoch": 0.060765971056208526, "grad_norm": 2.896462098442556, "learning_rate": 1.2153518123667377e-06, "loss": 0.5165, "step": 1140 }, { "epoch": 0.06129900589003491, "grad_norm": 2.789749420612799, "learning_rate": 1.2260127931769724e-06, "loss": 0.5238, "step": 1150 }, { "epoch": 0.0618320407238613, "grad_norm": 1.8478165324546423, "learning_rate": 1.236673773987207e-06, "loss": 0.5179, "step": 1160 }, { "epoch": 0.062365075557687694, "grad_norm": 1.8375243417739309, "learning_rate": 1.2473347547974413e-06, "loss": 0.5284, "step": 1170 }, { "epoch": 0.06289811039151409, "grad_norm": 1.9052439579336735, "learning_rate": 1.257995735607676e-06, "loss": 0.5258, "step": 1180 }, { "epoch": 0.06343114522534048, "grad_norm": 5.901778710118485, "learning_rate": 1.2686567164179105e-06, "loss": 0.5316, "step": 1190 }, { "epoch": 0.06396418005916686, "grad_norm": 7.426400086233918, "learning_rate": 1.279317697228145e-06, "loss": 0.5041, "step": 1200 }, { "epoch": 0.06449721489299326, "grad_norm": 1.7416422626181, "learning_rate": 1.2899786780383796e-06, "loss": 0.5388, "step": 1210 }, { "epoch": 0.06503024972681964, "grad_norm": 5.260557965260364, "learning_rate": 1.300639658848614e-06, "loss": 0.5127, "step": 1220 }, { "epoch": 0.06556328456064604, "grad_norm": 2.038127736521098, "learning_rate": 1.3113006396588486e-06, "loss": 0.5139, "step": 1230 }, { "epoch": 0.06609631939447243, "grad_norm": 1.8823373915151784, "learning_rate": 1.3219616204690832e-06, "loss": 0.5215, "step": 1240 }, { "epoch": 0.06662935422829883, "grad_norm": 2.15503695451607, "learning_rate": 1.3326226012793177e-06, "loss": 0.5199, "step": 1250 }, { "epoch": 0.06716238906212521, "grad_norm": 2.1383222903150276, "learning_rate": 1.3432835820895524e-06, "loss": 0.5015, "step": 1260 }, { "epoch": 0.0676954238959516, "grad_norm": 2.2511503739891987, "learning_rate": 1.3539445628997867e-06, "loss": 0.519, "step": 1270 }, { "epoch": 0.068228458729778, "grad_norm": 1.917158848873185, "learning_rate": 1.3646055437100213e-06, "loss": 0.5136, "step": 1280 }, { "epoch": 0.06876149356360438, "grad_norm": 2.1325053191679006, "learning_rate": 1.375266524520256e-06, "loss": 0.5372, "step": 1290 }, { "epoch": 0.06929452839743078, "grad_norm": 2.4772885935630407, "learning_rate": 1.3859275053304905e-06, "loss": 0.5194, "step": 1300 }, { "epoch": 0.06982756323125716, "grad_norm": 1.8973258351709514, "learning_rate": 1.3965884861407248e-06, "loss": 0.531, "step": 1310 }, { "epoch": 0.07036059806508356, "grad_norm": 1.9395058463946968, "learning_rate": 1.4072494669509596e-06, "loss": 0.5183, "step": 1320 }, { "epoch": 0.07089363289890994, "grad_norm": 2.011140162870199, "learning_rate": 1.417910447761194e-06, "loss": 0.4906, "step": 1330 }, { "epoch": 0.07142666773273633, "grad_norm": 2.365711885922838, "learning_rate": 1.4285714285714286e-06, "loss": 0.5066, "step": 1340 }, { "epoch": 0.07195970256656273, "grad_norm": 2.3177942824451674, "learning_rate": 1.4392324093816632e-06, "loss": 0.5173, "step": 1350 }, { "epoch": 0.07249273740038911, "grad_norm": 1.7555752174395958, "learning_rate": 1.4498933901918977e-06, "loss": 0.5141, "step": 1360 }, { "epoch": 0.07302577223421551, "grad_norm": 1.889980839900935, "learning_rate": 1.4605543710021322e-06, "loss": 0.5027, "step": 1370 }, { "epoch": 0.0735588070680419, "grad_norm": 1.7582289396362074, "learning_rate": 1.4712153518123667e-06, "loss": 0.5276, "step": 1380 }, { "epoch": 0.0740918419018683, "grad_norm": 1.7602655151027362, "learning_rate": 1.4818763326226013e-06, "loss": 0.5158, "step": 1390 }, { "epoch": 0.07462487673569468, "grad_norm": 1.8001183881260645, "learning_rate": 1.492537313432836e-06, "loss": 0.4986, "step": 1400 }, { "epoch": 0.07515791156952106, "grad_norm": 2.0725353366183694, "learning_rate": 1.5031982942430703e-06, "loss": 0.5217, "step": 1410 }, { "epoch": 0.07569094640334746, "grad_norm": 1.7278480431234928, "learning_rate": 1.5138592750533048e-06, "loss": 0.5215, "step": 1420 }, { "epoch": 0.07622398123717385, "grad_norm": 1.9474652366857994, "learning_rate": 1.5245202558635396e-06, "loss": 0.517, "step": 1430 }, { "epoch": 0.07675701607100024, "grad_norm": 2.1422897034566413, "learning_rate": 1.535181236673774e-06, "loss": 0.5026, "step": 1440 }, { "epoch": 0.07729005090482663, "grad_norm": 1.7905462109235504, "learning_rate": 1.5458422174840086e-06, "loss": 0.5119, "step": 1450 }, { "epoch": 0.07782308573865303, "grad_norm": 2.493897087442035, "learning_rate": 1.5565031982942432e-06, "loss": 0.5165, "step": 1460 }, { "epoch": 0.07835612057247941, "grad_norm": 1.8945720968238624, "learning_rate": 1.5671641791044775e-06, "loss": 0.5251, "step": 1470 }, { "epoch": 0.0788891554063058, "grad_norm": 1.9361905130988597, "learning_rate": 1.5778251599147122e-06, "loss": 0.5119, "step": 1480 }, { "epoch": 0.0794221902401322, "grad_norm": 1.6540771960472516, "learning_rate": 1.5884861407249467e-06, "loss": 0.5132, "step": 1490 }, { "epoch": 0.07995522507395858, "grad_norm": 2.0366997500257087, "learning_rate": 1.5991471215351813e-06, "loss": 0.4875, "step": 1500 }, { "epoch": 0.08048825990778498, "grad_norm": 2.481201500484451, "learning_rate": 1.6098081023454158e-06, "loss": 0.5168, "step": 1510 }, { "epoch": 0.08102129474161136, "grad_norm": 2.843400113923369, "learning_rate": 1.6204690831556505e-06, "loss": 0.4963, "step": 1520 }, { "epoch": 0.08155432957543776, "grad_norm": 1.7815166397160784, "learning_rate": 1.631130063965885e-06, "loss": 0.524, "step": 1530 }, { "epoch": 0.08208736440926415, "grad_norm": 2.1943964965228533, "learning_rate": 1.6417910447761194e-06, "loss": 0.5223, "step": 1540 }, { "epoch": 0.08262039924309053, "grad_norm": 1.7228556034029094, "learning_rate": 1.6524520255863539e-06, "loss": 0.5189, "step": 1550 }, { "epoch": 0.08315343407691693, "grad_norm": 3.2132263175111073, "learning_rate": 1.6631130063965884e-06, "loss": 0.4928, "step": 1560 }, { "epoch": 0.08368646891074331, "grad_norm": 3.2032275982706997, "learning_rate": 1.6737739872068232e-06, "loss": 0.5154, "step": 1570 }, { "epoch": 0.08421950374456971, "grad_norm": 1.783161362038307, "learning_rate": 1.6844349680170577e-06, "loss": 0.5008, "step": 1580 }, { "epoch": 0.0847525385783961, "grad_norm": 2.776910027467588, "learning_rate": 1.6950959488272922e-06, "loss": 0.5113, "step": 1590 }, { "epoch": 0.0852855734122225, "grad_norm": 4.011219424278999, "learning_rate": 1.7057569296375267e-06, "loss": 0.5123, "step": 1600 }, { "epoch": 0.08581860824604888, "grad_norm": 1.8879771864726966, "learning_rate": 1.716417910447761e-06, "loss": 0.5084, "step": 1610 }, { "epoch": 0.08635164307987526, "grad_norm": 1.604188895583308, "learning_rate": 1.7270788912579956e-06, "loss": 0.4971, "step": 1620 }, { "epoch": 0.08688467791370166, "grad_norm": 3.4755012148328883, "learning_rate": 1.7377398720682303e-06, "loss": 0.5014, "step": 1630 }, { "epoch": 0.08741771274752805, "grad_norm": 1.7670914630278758, "learning_rate": 1.7484008528784648e-06, "loss": 0.4961, "step": 1640 }, { "epoch": 0.08795074758135445, "grad_norm": 1.8279328976983698, "learning_rate": 1.7590618336886994e-06, "loss": 0.5114, "step": 1650 }, { "epoch": 0.08848378241518083, "grad_norm": 6.293161998539346, "learning_rate": 1.769722814498934e-06, "loss": 0.4984, "step": 1660 }, { "epoch": 0.08901681724900722, "grad_norm": 1.597910745986726, "learning_rate": 1.7803837953091686e-06, "loss": 0.5396, "step": 1670 }, { "epoch": 0.08954985208283361, "grad_norm": 1.6653635349148304, "learning_rate": 1.791044776119403e-06, "loss": 0.5303, "step": 1680 }, { "epoch": 0.09008288691666, "grad_norm": 1.4482133951906653, "learning_rate": 1.8017057569296375e-06, "loss": 0.5043, "step": 1690 }, { "epoch": 0.0906159217504864, "grad_norm": 1.7349298164399845, "learning_rate": 1.812366737739872e-06, "loss": 0.4874, "step": 1700 }, { "epoch": 0.09114895658431278, "grad_norm": 1.6070828987136645, "learning_rate": 1.8230277185501067e-06, "loss": 0.4832, "step": 1710 }, { "epoch": 0.09168199141813918, "grad_norm": 1.9743951285284194, "learning_rate": 1.8336886993603413e-06, "loss": 0.4964, "step": 1720 }, { "epoch": 0.09221502625196557, "grad_norm": 1.717799481918251, "learning_rate": 1.8443496801705758e-06, "loss": 0.5089, "step": 1730 }, { "epoch": 0.09274806108579195, "grad_norm": 1.9356727490694972, "learning_rate": 1.8550106609808103e-06, "loss": 0.5109, "step": 1740 }, { "epoch": 0.09328109591961835, "grad_norm": 1.9580272645326764, "learning_rate": 1.8656716417910446e-06, "loss": 0.4917, "step": 1750 }, { "epoch": 0.09381413075344473, "grad_norm": 2.727933294298909, "learning_rate": 1.8763326226012792e-06, "loss": 0.5025, "step": 1760 }, { "epoch": 0.09434716558727113, "grad_norm": 1.8659027091038973, "learning_rate": 1.8869936034115139e-06, "loss": 0.4967, "step": 1770 }, { "epoch": 0.09488020042109752, "grad_norm": 4.100730576838372, "learning_rate": 1.8976545842217484e-06, "loss": 0.4976, "step": 1780 }, { "epoch": 0.09541323525492391, "grad_norm": 1.7911168265329525, "learning_rate": 1.908315565031983e-06, "loss": 0.5013, "step": 1790 }, { "epoch": 0.0959462700887503, "grad_norm": 1.7463986183897582, "learning_rate": 1.9189765458422177e-06, "loss": 0.4761, "step": 1800 }, { "epoch": 0.09647930492257668, "grad_norm": 2.082040030487041, "learning_rate": 1.929637526652452e-06, "loss": 0.4984, "step": 1810 }, { "epoch": 0.09701233975640308, "grad_norm": 3.405347729207966, "learning_rate": 1.9402985074626867e-06, "loss": 0.4988, "step": 1820 }, { "epoch": 0.09754537459022947, "grad_norm": 2.0886731520776167, "learning_rate": 1.950959488272921e-06, "loss": 0.5064, "step": 1830 }, { "epoch": 0.09807840942405587, "grad_norm": 2.051895534884051, "learning_rate": 1.9616204690831558e-06, "loss": 0.4901, "step": 1840 }, { "epoch": 0.09861144425788225, "grad_norm": 1.709668302914341, "learning_rate": 1.9722814498933903e-06, "loss": 0.4918, "step": 1850 }, { "epoch": 0.09914447909170865, "grad_norm": 1.7128961914267717, "learning_rate": 1.982942430703625e-06, "loss": 0.5093, "step": 1860 }, { "epoch": 0.09967751392553503, "grad_norm": 1.6581351982370527, "learning_rate": 1.9936034115138594e-06, "loss": 0.513, "step": 1870 }, { "epoch": 0.10021054875936142, "grad_norm": 1.6911304413453174, "learning_rate": 2.004264392324094e-06, "loss": 0.4966, "step": 1880 }, { "epoch": 0.10074358359318782, "grad_norm": 1.9285274642420187, "learning_rate": 2.0149253731343284e-06, "loss": 0.4908, "step": 1890 }, { "epoch": 0.1012766184270142, "grad_norm": 1.6229726939701612, "learning_rate": 2.025586353944563e-06, "loss": 0.4919, "step": 1900 }, { "epoch": 0.1018096532608406, "grad_norm": 1.6722981844499845, "learning_rate": 2.0362473347547975e-06, "loss": 0.5038, "step": 1910 }, { "epoch": 0.10234268809466698, "grad_norm": 1.8525929467338784, "learning_rate": 2.046908315565032e-06, "loss": 0.4851, "step": 1920 }, { "epoch": 0.10287572292849338, "grad_norm": 1.7651203389199857, "learning_rate": 2.0575692963752665e-06, "loss": 0.4978, "step": 1930 }, { "epoch": 0.10340875776231977, "grad_norm": 3.0399113460745153, "learning_rate": 2.068230277185501e-06, "loss": 0.5093, "step": 1940 }, { "epoch": 0.10394179259614615, "grad_norm": 1.694188069109413, "learning_rate": 2.0788912579957356e-06, "loss": 0.4852, "step": 1950 }, { "epoch": 0.10447482742997255, "grad_norm": 1.4749248767326193, "learning_rate": 2.0895522388059705e-06, "loss": 0.4991, "step": 1960 }, { "epoch": 0.10500786226379893, "grad_norm": 1.6545205597942974, "learning_rate": 2.1002132196162046e-06, "loss": 0.5046, "step": 1970 }, { "epoch": 0.10554089709762533, "grad_norm": 1.789844021052239, "learning_rate": 2.110874200426439e-06, "loss": 0.4856, "step": 1980 }, { "epoch": 0.10607393193145172, "grad_norm": 2.389747097340645, "learning_rate": 2.1215351812366737e-06, "loss": 0.492, "step": 1990 }, { "epoch": 0.10660696676527812, "grad_norm": 1.9999702735537674, "learning_rate": 2.132196162046908e-06, "loss": 0.4776, "step": 2000 }, { "epoch": 0.1071400015991045, "grad_norm": 1.708572902721768, "learning_rate": 2.142857142857143e-06, "loss": 0.5082, "step": 2010 }, { "epoch": 0.10767303643293089, "grad_norm": 1.6616552169351602, "learning_rate": 2.1535181236673777e-06, "loss": 0.4991, "step": 2020 }, { "epoch": 0.10820607126675728, "grad_norm": 1.9961949316657603, "learning_rate": 2.164179104477612e-06, "loss": 0.4916, "step": 2030 }, { "epoch": 0.10873910610058367, "grad_norm": 1.947126393495355, "learning_rate": 2.1748400852878463e-06, "loss": 0.5075, "step": 2040 }, { "epoch": 0.10927214093441007, "grad_norm": 2.1974453403429144, "learning_rate": 2.185501066098081e-06, "loss": 0.477, "step": 2050 }, { "epoch": 0.10980517576823645, "grad_norm": 2.831922532150069, "learning_rate": 2.1961620469083154e-06, "loss": 0.4886, "step": 2060 }, { "epoch": 0.11033821060206285, "grad_norm": 1.684708827472965, "learning_rate": 2.2068230277185503e-06, "loss": 0.502, "step": 2070 }, { "epoch": 0.11087124543588923, "grad_norm": 2.0331477351606724, "learning_rate": 2.217484008528785e-06, "loss": 0.4804, "step": 2080 }, { "epoch": 0.11140428026971562, "grad_norm": 1.717450101243865, "learning_rate": 2.2281449893390194e-06, "loss": 0.4947, "step": 2090 }, { "epoch": 0.11193731510354202, "grad_norm": 1.8174981757295043, "learning_rate": 2.238805970149254e-06, "loss": 0.4929, "step": 2100 }, { "epoch": 0.1124703499373684, "grad_norm": 1.6889988447513715, "learning_rate": 2.249466950959488e-06, "loss": 0.5005, "step": 2110 }, { "epoch": 0.1130033847711948, "grad_norm": 1.6480565960509483, "learning_rate": 2.260127931769723e-06, "loss": 0.4948, "step": 2120 }, { "epoch": 0.11353641960502119, "grad_norm": 1.6952436618404707, "learning_rate": 2.2707889125799575e-06, "loss": 0.5001, "step": 2130 }, { "epoch": 0.11406945443884758, "grad_norm": 1.5994581672256405, "learning_rate": 2.281449893390192e-06, "loss": 0.4866, "step": 2140 }, { "epoch": 0.11460248927267397, "grad_norm": 1.714526187540524, "learning_rate": 2.2921108742004265e-06, "loss": 0.4797, "step": 2150 }, { "epoch": 0.11513552410650035, "grad_norm": 1.721179665627559, "learning_rate": 2.302771855010661e-06, "loss": 0.5066, "step": 2160 }, { "epoch": 0.11566855894032675, "grad_norm": 1.639833643560835, "learning_rate": 2.3134328358208956e-06, "loss": 0.4781, "step": 2170 }, { "epoch": 0.11620159377415314, "grad_norm": 1.690239965307617, "learning_rate": 2.32409381663113e-06, "loss": 0.4688, "step": 2180 }, { "epoch": 0.11673462860797953, "grad_norm": 1.8205864495338293, "learning_rate": 2.3347547974413646e-06, "loss": 0.4897, "step": 2190 }, { "epoch": 0.11726766344180592, "grad_norm": 2.0410830299330716, "learning_rate": 2.345415778251599e-06, "loss": 0.4926, "step": 2200 }, { "epoch": 0.11780069827563232, "grad_norm": 2.3481204247479006, "learning_rate": 2.3560767590618337e-06, "loss": 0.4907, "step": 2210 }, { "epoch": 0.1183337331094587, "grad_norm": 3.3565284556836095, "learning_rate": 2.366737739872068e-06, "loss": 0.4942, "step": 2220 }, { "epoch": 0.11886676794328509, "grad_norm": 1.765724500598554, "learning_rate": 2.3773987206823027e-06, "loss": 0.4559, "step": 2230 }, { "epoch": 0.11939980277711149, "grad_norm": 1.6685529628290774, "learning_rate": 2.3880597014925377e-06, "loss": 0.4796, "step": 2240 }, { "epoch": 0.11993283761093787, "grad_norm": 2.025184489439367, "learning_rate": 2.3987206823027718e-06, "loss": 0.4974, "step": 2250 }, { "epoch": 0.12046587244476427, "grad_norm": 1.5877609639194825, "learning_rate": 2.4093816631130063e-06, "loss": 0.4862, "step": 2260 }, { "epoch": 0.12099890727859065, "grad_norm": 1.75789703015658, "learning_rate": 2.420042643923241e-06, "loss": 0.4758, "step": 2270 }, { "epoch": 0.12153194211241705, "grad_norm": 1.6715829585184263, "learning_rate": 2.4307036247334754e-06, "loss": 0.4875, "step": 2280 }, { "epoch": 0.12206497694624344, "grad_norm": 1.7692896856499511, "learning_rate": 2.4413646055437103e-06, "loss": 0.4849, "step": 2290 }, { "epoch": 0.12259801178006982, "grad_norm": 1.5709069675016976, "learning_rate": 2.452025586353945e-06, "loss": 0.4749, "step": 2300 }, { "epoch": 0.12313104661389622, "grad_norm": 1.725433914578352, "learning_rate": 2.4626865671641794e-06, "loss": 0.4731, "step": 2310 }, { "epoch": 0.1236640814477226, "grad_norm": 1.6116971493392425, "learning_rate": 2.473347547974414e-06, "loss": 0.4803, "step": 2320 }, { "epoch": 0.124197116281549, "grad_norm": 1.6656362662222297, "learning_rate": 2.484008528784648e-06, "loss": 0.5, "step": 2330 }, { "epoch": 0.12473015111537539, "grad_norm": 1.7717514478220375, "learning_rate": 2.4946695095948825e-06, "loss": 0.4696, "step": 2340 }, { "epoch": 0.12526318594920177, "grad_norm": 1.7157129500673733, "learning_rate": 2.5053304904051175e-06, "loss": 0.4848, "step": 2350 }, { "epoch": 0.12579622078302818, "grad_norm": 1.6394167467054033, "learning_rate": 2.515991471215352e-06, "loss": 0.4882, "step": 2360 }, { "epoch": 0.12632925561685457, "grad_norm": 2.0113883742254712, "learning_rate": 2.5266524520255865e-06, "loss": 0.4791, "step": 2370 }, { "epoch": 0.12686229045068095, "grad_norm": 1.5991081210634723, "learning_rate": 2.537313432835821e-06, "loss": 0.4899, "step": 2380 }, { "epoch": 0.12739532528450734, "grad_norm": 1.7140376837663376, "learning_rate": 2.5479744136460556e-06, "loss": 0.4596, "step": 2390 }, { "epoch": 0.12792836011833372, "grad_norm": 1.7508722678866806, "learning_rate": 2.55863539445629e-06, "loss": 0.4698, "step": 2400 }, { "epoch": 0.12846139495216014, "grad_norm": 1.9199249496948412, "learning_rate": 2.5692963752665246e-06, "loss": 0.4598, "step": 2410 }, { "epoch": 0.12899442978598652, "grad_norm": 1.8211627501867966, "learning_rate": 2.579957356076759e-06, "loss": 0.4921, "step": 2420 }, { "epoch": 0.1295274646198129, "grad_norm": 1.7122266715459322, "learning_rate": 2.5906183368869937e-06, "loss": 0.4949, "step": 2430 }, { "epoch": 0.1300604994536393, "grad_norm": 1.843237819990565, "learning_rate": 2.601279317697228e-06, "loss": 0.4927, "step": 2440 }, { "epoch": 0.13059353428746567, "grad_norm": 1.5700771392593305, "learning_rate": 2.6119402985074627e-06, "loss": 0.4811, "step": 2450 }, { "epoch": 0.13112656912129209, "grad_norm": 1.7209228243885426, "learning_rate": 2.6226012793176973e-06, "loss": 0.4853, "step": 2460 }, { "epoch": 0.13165960395511847, "grad_norm": 1.7261663158334686, "learning_rate": 2.6332622601279318e-06, "loss": 0.4725, "step": 2470 }, { "epoch": 0.13219263878894486, "grad_norm": 1.5867794526316623, "learning_rate": 2.6439232409381663e-06, "loss": 0.4711, "step": 2480 }, { "epoch": 0.13272567362277124, "grad_norm": 1.651517768428253, "learning_rate": 2.654584221748401e-06, "loss": 0.4626, "step": 2490 }, { "epoch": 0.13325870845659765, "grad_norm": 1.7942624515241987, "learning_rate": 2.6652452025586354e-06, "loss": 0.4762, "step": 2500 }, { "epoch": 0.13379174329042404, "grad_norm": 1.838220915085115, "learning_rate": 2.67590618336887e-06, "loss": 0.4873, "step": 2510 }, { "epoch": 0.13432477812425042, "grad_norm": 1.6777720734017567, "learning_rate": 2.686567164179105e-06, "loss": 0.4752, "step": 2520 }, { "epoch": 0.1348578129580768, "grad_norm": 1.6952704077043272, "learning_rate": 2.6972281449893394e-06, "loss": 0.4733, "step": 2530 }, { "epoch": 0.1353908477919032, "grad_norm": 1.7370410349742849, "learning_rate": 2.7078891257995735e-06, "loss": 0.4893, "step": 2540 }, { "epoch": 0.1359238826257296, "grad_norm": 1.5855106890772668, "learning_rate": 2.718550106609808e-06, "loss": 0.4757, "step": 2550 }, { "epoch": 0.136456917459556, "grad_norm": 1.767876350225791, "learning_rate": 2.7292110874200425e-06, "loss": 0.4786, "step": 2560 }, { "epoch": 0.13698995229338237, "grad_norm": 1.921436725621705, "learning_rate": 2.739872068230277e-06, "loss": 0.4775, "step": 2570 }, { "epoch": 0.13752298712720876, "grad_norm": 1.9038990150261386, "learning_rate": 2.750533049040512e-06, "loss": 0.4832, "step": 2580 }, { "epoch": 0.13805602196103514, "grad_norm": 1.7501851035936127, "learning_rate": 2.7611940298507465e-06, "loss": 0.4762, "step": 2590 }, { "epoch": 0.13858905679486155, "grad_norm": 1.631228035358413, "learning_rate": 2.771855010660981e-06, "loss": 0.476, "step": 2600 }, { "epoch": 0.13912209162868794, "grad_norm": 1.5680229889762707, "learning_rate": 2.782515991471215e-06, "loss": 0.4724, "step": 2610 }, { "epoch": 0.13965512646251432, "grad_norm": 1.7053595348929826, "learning_rate": 2.7931769722814497e-06, "loss": 0.4652, "step": 2620 }, { "epoch": 0.1401881612963407, "grad_norm": 1.815725483080737, "learning_rate": 2.8038379530916846e-06, "loss": 0.499, "step": 2630 }, { "epoch": 0.14072119613016712, "grad_norm": 1.7622924765811276, "learning_rate": 2.814498933901919e-06, "loss": 0.4652, "step": 2640 }, { "epoch": 0.1412542309639935, "grad_norm": 1.8000297407262529, "learning_rate": 2.8251599147121537e-06, "loss": 0.4671, "step": 2650 }, { "epoch": 0.1417872657978199, "grad_norm": 1.7028419578488374, "learning_rate": 2.835820895522388e-06, "loss": 0.4724, "step": 2660 }, { "epoch": 0.14232030063164627, "grad_norm": 1.557502405149442, "learning_rate": 2.8464818763326227e-06, "loss": 0.4877, "step": 2670 }, { "epoch": 0.14285333546547266, "grad_norm": 1.682514725509012, "learning_rate": 2.8571428571428573e-06, "loss": 0.4676, "step": 2680 }, { "epoch": 0.14338637029929907, "grad_norm": 1.6917198647698848, "learning_rate": 2.8678038379530918e-06, "loss": 0.4972, "step": 2690 }, { "epoch": 0.14391940513312546, "grad_norm": 1.647667906805554, "learning_rate": 2.8784648187633263e-06, "loss": 0.4809, "step": 2700 }, { "epoch": 0.14445243996695184, "grad_norm": 1.6304257308791026, "learning_rate": 2.889125799573561e-06, "loss": 0.4832, "step": 2710 }, { "epoch": 0.14498547480077822, "grad_norm": 1.776749710670109, "learning_rate": 2.8997867803837954e-06, "loss": 0.4712, "step": 2720 }, { "epoch": 0.1455185096346046, "grad_norm": 1.4680748829527526, "learning_rate": 2.91044776119403e-06, "loss": 0.4715, "step": 2730 }, { "epoch": 0.14605154446843102, "grad_norm": 1.6920857612601896, "learning_rate": 2.9211087420042644e-06, "loss": 0.4931, "step": 2740 }, { "epoch": 0.1465845793022574, "grad_norm": 1.7897945480485358, "learning_rate": 2.931769722814499e-06, "loss": 0.4874, "step": 2750 }, { "epoch": 0.1471176141360838, "grad_norm": 1.5377484124553449, "learning_rate": 2.9424307036247335e-06, "loss": 0.4714, "step": 2760 }, { "epoch": 0.14765064896991018, "grad_norm": 1.5803752311002037, "learning_rate": 2.953091684434968e-06, "loss": 0.4738, "step": 2770 }, { "epoch": 0.1481836838037366, "grad_norm": 1.6654580252453022, "learning_rate": 2.9637526652452025e-06, "loss": 0.4576, "step": 2780 }, { "epoch": 0.14871671863756297, "grad_norm": 1.5853688122421294, "learning_rate": 2.974413646055437e-06, "loss": 0.4845, "step": 2790 }, { "epoch": 0.14924975347138936, "grad_norm": 1.928646439015856, "learning_rate": 2.985074626865672e-06, "loss": 0.4722, "step": 2800 }, { "epoch": 0.14978278830521574, "grad_norm": 1.6957165307693083, "learning_rate": 2.9957356076759065e-06, "loss": 0.4819, "step": 2810 }, { "epoch": 0.15031582313904213, "grad_norm": 1.8031655894710157, "learning_rate": 2.999999906780211e-06, "loss": 0.4747, "step": 2820 }, { "epoch": 0.15084885797286854, "grad_norm": 1.6750833780100653, "learning_rate": 2.999999337103763e-06, "loss": 0.4689, "step": 2830 }, { "epoch": 0.15138189280669492, "grad_norm": 1.612344140009288, "learning_rate": 2.9999982495398344e-06, "loss": 0.4739, "step": 2840 }, { "epoch": 0.1519149276405213, "grad_norm": 1.795313546791791, "learning_rate": 2.9999966440888023e-06, "loss": 0.4792, "step": 2850 }, { "epoch": 0.1524479624743477, "grad_norm": 1.831543063210286, "learning_rate": 2.9999945207512196e-06, "loss": 0.4564, "step": 2860 }, { "epoch": 0.15298099730817408, "grad_norm": 1.850784587056977, "learning_rate": 2.99999187952782e-06, "loss": 0.4723, "step": 2870 }, { "epoch": 0.1535140321420005, "grad_norm": 1.6376500469394626, "learning_rate": 2.9999887204195153e-06, "loss": 0.4752, "step": 2880 }, { "epoch": 0.15404706697582687, "grad_norm": 1.6880735125963033, "learning_rate": 2.9999850434273964e-06, "loss": 0.4822, "step": 2890 }, { "epoch": 0.15458010180965326, "grad_norm": 1.7296444039455219, "learning_rate": 2.9999808485527327e-06, "loss": 0.4674, "step": 2900 }, { "epoch": 0.15511313664347964, "grad_norm": 1.6623189317763998, "learning_rate": 2.9999761357969725e-06, "loss": 0.4731, "step": 2910 }, { "epoch": 0.15564617147730606, "grad_norm": 1.6810682813609874, "learning_rate": 2.9999709051617425e-06, "loss": 0.4841, "step": 2920 }, { "epoch": 0.15617920631113244, "grad_norm": 1.7816054365878717, "learning_rate": 2.999965156648849e-06, "loss": 0.4824, "step": 2930 }, { "epoch": 0.15671224114495882, "grad_norm": 1.7533582395129694, "learning_rate": 2.999958890260277e-06, "loss": 0.4707, "step": 2940 }, { "epoch": 0.1572452759787852, "grad_norm": 1.6974204301749585, "learning_rate": 2.9999521059981897e-06, "loss": 0.4604, "step": 2950 }, { "epoch": 0.1577783108126116, "grad_norm": 1.7570577121286866, "learning_rate": 2.9999448038649293e-06, "loss": 0.4561, "step": 2960 }, { "epoch": 0.158311345646438, "grad_norm": 1.5112858903325956, "learning_rate": 2.9999369838630167e-06, "loss": 0.4607, "step": 2970 }, { "epoch": 0.1588443804802644, "grad_norm": 1.5787465254453812, "learning_rate": 2.9999286459951527e-06, "loss": 0.4598, "step": 2980 }, { "epoch": 0.15937741531409078, "grad_norm": 1.864721779879429, "learning_rate": 2.9999197902642153e-06, "loss": 0.4632, "step": 2990 }, { "epoch": 0.15991045014791716, "grad_norm": 1.7829518435710483, "learning_rate": 2.999910416673263e-06, "loss": 0.4659, "step": 3000 }, { "epoch": 0.16044348498174354, "grad_norm": 1.6079863952299247, "learning_rate": 2.9999005252255305e-06, "loss": 0.4564, "step": 3010 }, { "epoch": 0.16097651981556996, "grad_norm": 1.756347760567654, "learning_rate": 2.999890115924434e-06, "loss": 0.4867, "step": 3020 }, { "epoch": 0.16150955464939634, "grad_norm": 1.6040713307058265, "learning_rate": 2.9998791887735674e-06, "loss": 0.4656, "step": 3030 }, { "epoch": 0.16204258948322273, "grad_norm": 1.548518872333851, "learning_rate": 2.999867743776703e-06, "loss": 0.463, "step": 3040 }, { "epoch": 0.1625756243170491, "grad_norm": 1.6072392237096114, "learning_rate": 2.999855780937792e-06, "loss": 0.4604, "step": 3050 }, { "epoch": 0.16310865915087552, "grad_norm": 1.7739583912751717, "learning_rate": 2.9998433002609654e-06, "loss": 0.4647, "step": 3060 }, { "epoch": 0.1636416939847019, "grad_norm": 1.6223359684588095, "learning_rate": 2.9998303017505324e-06, "loss": 0.4816, "step": 3070 }, { "epoch": 0.1641747288185283, "grad_norm": 1.5145051243584609, "learning_rate": 2.9998167854109794e-06, "loss": 0.4616, "step": 3080 }, { "epoch": 0.16470776365235468, "grad_norm": 1.7462618302450663, "learning_rate": 2.999802751246975e-06, "loss": 0.4547, "step": 3090 }, { "epoch": 0.16524079848618106, "grad_norm": 1.63459286289502, "learning_rate": 2.999788199263363e-06, "loss": 0.4702, "step": 3100 }, { "epoch": 0.16577383332000747, "grad_norm": 1.6066524785723333, "learning_rate": 2.9997731294651688e-06, "loss": 0.4636, "step": 3110 }, { "epoch": 0.16630686815383386, "grad_norm": 1.6988577352511753, "learning_rate": 2.9997575418575943e-06, "loss": 0.4759, "step": 3120 }, { "epoch": 0.16683990298766024, "grad_norm": 1.7921629641000747, "learning_rate": 2.9997414364460224e-06, "loss": 0.4684, "step": 3130 }, { "epoch": 0.16737293782148663, "grad_norm": 1.7007876391018237, "learning_rate": 2.999724813236012e-06, "loss": 0.4585, "step": 3140 }, { "epoch": 0.167905972655313, "grad_norm": 1.5536161177891106, "learning_rate": 2.9997076722333044e-06, "loss": 0.4672, "step": 3150 }, { "epoch": 0.16843900748913943, "grad_norm": 1.6697459933103784, "learning_rate": 2.9996900134438164e-06, "loss": 0.4662, "step": 3160 }, { "epoch": 0.1689720423229658, "grad_norm": 1.647010387805045, "learning_rate": 2.999671836873645e-06, "loss": 0.4544, "step": 3170 }, { "epoch": 0.1695050771567922, "grad_norm": 1.5098449280976487, "learning_rate": 2.999653142529066e-06, "loss": 0.4734, "step": 3180 }, { "epoch": 0.17003811199061858, "grad_norm": 1.645930940874626, "learning_rate": 2.999633930416534e-06, "loss": 0.4536, "step": 3190 }, { "epoch": 0.170571146824445, "grad_norm": 1.3842232473056548, "learning_rate": 2.999614200542681e-06, "loss": 0.4669, "step": 3200 }, { "epoch": 0.17110418165827138, "grad_norm": 1.5909963769780455, "learning_rate": 2.9995939529143205e-06, "loss": 0.4677, "step": 3210 }, { "epoch": 0.17163721649209776, "grad_norm": 1.6452226092287423, "learning_rate": 2.9995731875384426e-06, "loss": 0.4623, "step": 3220 }, { "epoch": 0.17217025132592415, "grad_norm": 1.6399656056241063, "learning_rate": 2.9995519044222163e-06, "loss": 0.4731, "step": 3230 }, { "epoch": 0.17270328615975053, "grad_norm": 1.6762569466713584, "learning_rate": 2.99953010357299e-06, "loss": 0.4506, "step": 3240 }, { "epoch": 0.17323632099357694, "grad_norm": 1.6167067522288208, "learning_rate": 2.9995077849982907e-06, "loss": 0.4613, "step": 3250 }, { "epoch": 0.17376935582740333, "grad_norm": 1.627533327437292, "learning_rate": 2.9994849487058244e-06, "loss": 0.468, "step": 3260 }, { "epoch": 0.1743023906612297, "grad_norm": 1.6686721709677732, "learning_rate": 2.9994615947034745e-06, "loss": 0.4756, "step": 3270 }, { "epoch": 0.1748354254950561, "grad_norm": 1.5324487409521619, "learning_rate": 2.9994377229993056e-06, "loss": 0.4647, "step": 3280 }, { "epoch": 0.17536846032888248, "grad_norm": 1.5869013859847092, "learning_rate": 2.999413333601558e-06, "loss": 0.4621, "step": 3290 }, { "epoch": 0.1759014951627089, "grad_norm": 1.5278219436297606, "learning_rate": 2.9993884265186537e-06, "loss": 0.4651, "step": 3300 }, { "epoch": 0.17643452999653528, "grad_norm": 1.3878421460176371, "learning_rate": 2.9993630017591918e-06, "loss": 0.4629, "step": 3310 }, { "epoch": 0.17696756483036166, "grad_norm": 1.6146096998380026, "learning_rate": 2.9993370593319498e-06, "loss": 0.4681, "step": 3320 }, { "epoch": 0.17750059966418805, "grad_norm": 1.5566443040707618, "learning_rate": 2.9993105992458846e-06, "loss": 0.4561, "step": 3330 }, { "epoch": 0.17803363449801443, "grad_norm": 1.4577621441660769, "learning_rate": 2.999283621510133e-06, "loss": 0.4662, "step": 3340 }, { "epoch": 0.17856666933184084, "grad_norm": 1.4085624064590525, "learning_rate": 2.999256126134008e-06, "loss": 0.4561, "step": 3350 }, { "epoch": 0.17909970416566723, "grad_norm": 1.620228539762752, "learning_rate": 2.9992281131270027e-06, "loss": 0.4673, "step": 3360 }, { "epoch": 0.1796327389994936, "grad_norm": 1.5831400842402863, "learning_rate": 2.9991995824987893e-06, "loss": 0.4865, "step": 3370 }, { "epoch": 0.18016577383332, "grad_norm": 1.8363371878280756, "learning_rate": 2.999170534259218e-06, "loss": 0.4554, "step": 3380 }, { "epoch": 0.1806988086671464, "grad_norm": 1.5466565191656683, "learning_rate": 2.999140968418318e-06, "loss": 0.4591, "step": 3390 }, { "epoch": 0.1812318435009728, "grad_norm": 1.5707390027381525, "learning_rate": 2.9991108849862973e-06, "loss": 0.4704, "step": 3400 }, { "epoch": 0.18176487833479918, "grad_norm": 1.6366916399312674, "learning_rate": 2.9990802839735428e-06, "loss": 0.4692, "step": 3410 }, { "epoch": 0.18229791316862556, "grad_norm": 1.7019086412840565, "learning_rate": 2.9990491653906185e-06, "loss": 0.4649, "step": 3420 }, { "epoch": 0.18283094800245195, "grad_norm": 1.6396315172684046, "learning_rate": 2.99901752924827e-06, "loss": 0.4681, "step": 3430 }, { "epoch": 0.18336398283627836, "grad_norm": 1.6613298906787837, "learning_rate": 2.9989853755574184e-06, "loss": 0.4418, "step": 3440 }, { "epoch": 0.18389701767010475, "grad_norm": 1.6646590609464886, "learning_rate": 2.998952704329166e-06, "loss": 0.4471, "step": 3450 }, { "epoch": 0.18443005250393113, "grad_norm": 1.5013075178238962, "learning_rate": 2.9989195155747923e-06, "loss": 0.4651, "step": 3460 }, { "epoch": 0.18496308733775751, "grad_norm": 1.662972481207715, "learning_rate": 2.9988858093057563e-06, "loss": 0.4508, "step": 3470 }, { "epoch": 0.1854961221715839, "grad_norm": 1.5961630317747317, "learning_rate": 2.9988515855336954e-06, "loss": 0.4681, "step": 3480 }, { "epoch": 0.1860291570054103, "grad_norm": 1.618814745656187, "learning_rate": 2.9988168442704255e-06, "loss": 0.4792, "step": 3490 }, { "epoch": 0.1865621918392367, "grad_norm": 1.5353643377113313, "learning_rate": 2.9987815855279417e-06, "loss": 0.4551, "step": 3500 }, { "epoch": 0.18709522667306308, "grad_norm": 1.6703325650389744, "learning_rate": 2.998745809318417e-06, "loss": 0.4568, "step": 3510 }, { "epoch": 0.18762826150688947, "grad_norm": 1.6319317545687613, "learning_rate": 2.9987095156542034e-06, "loss": 0.4587, "step": 3520 }, { "epoch": 0.18816129634071588, "grad_norm": 1.4890644677542084, "learning_rate": 2.9986727045478315e-06, "loss": 0.4582, "step": 3530 }, { "epoch": 0.18869433117454226, "grad_norm": 1.494555208176799, "learning_rate": 2.9986353760120113e-06, "loss": 0.4459, "step": 3540 }, { "epoch": 0.18922736600836865, "grad_norm": 2.0564914458540016, "learning_rate": 2.99859753005963e-06, "loss": 0.4638, "step": 3550 }, { "epoch": 0.18976040084219503, "grad_norm": 1.7241381527855608, "learning_rate": 2.9985591667037546e-06, "loss": 0.4743, "step": 3560 }, { "epoch": 0.19029343567602142, "grad_norm": 1.6711262755757803, "learning_rate": 2.9985202859576304e-06, "loss": 0.4585, "step": 3570 }, { "epoch": 0.19082647050984783, "grad_norm": 1.7132918739150431, "learning_rate": 2.998480887834681e-06, "loss": 0.4431, "step": 3580 }, { "epoch": 0.1913595053436742, "grad_norm": 1.5313033903680584, "learning_rate": 2.99844097234851e-06, "loss": 0.4606, "step": 3590 }, { "epoch": 0.1918925401775006, "grad_norm": 1.7254901092253947, "learning_rate": 2.9984005395128974e-06, "loss": 0.4559, "step": 3600 }, { "epoch": 0.19242557501132698, "grad_norm": 1.645097433635555, "learning_rate": 2.998359589341803e-06, "loss": 0.4374, "step": 3610 }, { "epoch": 0.19295860984515337, "grad_norm": 1.5395304911207355, "learning_rate": 2.9983181218493654e-06, "loss": 0.4498, "step": 3620 }, { "epoch": 0.19349164467897978, "grad_norm": 1.7567010962136618, "learning_rate": 2.9982761370499024e-06, "loss": 0.4658, "step": 3630 }, { "epoch": 0.19402467951280616, "grad_norm": 1.8571868955078175, "learning_rate": 2.9982336349579085e-06, "loss": 0.4649, "step": 3640 }, { "epoch": 0.19455771434663255, "grad_norm": 1.637593677791729, "learning_rate": 2.998190615588058e-06, "loss": 0.4484, "step": 3650 }, { "epoch": 0.19509074918045893, "grad_norm": 1.6434315395657313, "learning_rate": 2.9981470789552046e-06, "loss": 0.4642, "step": 3660 }, { "epoch": 0.19562378401428535, "grad_norm": 1.815340805041272, "learning_rate": 2.998103025074379e-06, "loss": 0.4484, "step": 3670 }, { "epoch": 0.19615681884811173, "grad_norm": 1.5389948675670018, "learning_rate": 2.9980584539607913e-06, "loss": 0.4634, "step": 3680 }, { "epoch": 0.19668985368193811, "grad_norm": 1.5489883254493602, "learning_rate": 2.99801336562983e-06, "loss": 0.4706, "step": 3690 }, { "epoch": 0.1972228885157645, "grad_norm": 1.6274345202406608, "learning_rate": 2.9979677600970622e-06, "loss": 0.47, "step": 3700 }, { "epoch": 0.19775592334959088, "grad_norm": 1.5760744328735794, "learning_rate": 2.9979216373782343e-06, "loss": 0.4628, "step": 3710 }, { "epoch": 0.1982889581834173, "grad_norm": 1.4956785159988102, "learning_rate": 2.997874997489269e-06, "loss": 0.4492, "step": 3720 }, { "epoch": 0.19882199301724368, "grad_norm": 1.7080924559166886, "learning_rate": 2.997827840446271e-06, "loss": 0.4529, "step": 3730 }, { "epoch": 0.19935502785107007, "grad_norm": 1.5915206618874034, "learning_rate": 2.99778016626552e-06, "loss": 0.4633, "step": 3740 }, { "epoch": 0.19988806268489645, "grad_norm": 1.5048489860129535, "learning_rate": 2.9977319749634774e-06, "loss": 0.4526, "step": 3750 }, { "epoch": 0.20042109751872283, "grad_norm": 1.5041280839206856, "learning_rate": 2.99768326655678e-06, "loss": 0.4641, "step": 3760 }, { "epoch": 0.20095413235254925, "grad_norm": 1.466087678237714, "learning_rate": 2.9976340410622464e-06, "loss": 0.4396, "step": 3770 }, { "epoch": 0.20148716718637563, "grad_norm": 1.7743885987789316, "learning_rate": 2.997584298496871e-06, "loss": 0.4665, "step": 3780 }, { "epoch": 0.20202020202020202, "grad_norm": 1.603992714080962, "learning_rate": 2.9975340388778286e-06, "loss": 0.457, "step": 3790 }, { "epoch": 0.2025532368540284, "grad_norm": 1.543570274830021, "learning_rate": 2.9974832622224712e-06, "loss": 0.4451, "step": 3800 }, { "epoch": 0.2030862716878548, "grad_norm": 1.5287734491563887, "learning_rate": 2.9974319685483304e-06, "loss": 0.4463, "step": 3810 }, { "epoch": 0.2036193065216812, "grad_norm": 1.5868593930462622, "learning_rate": 2.997380157873115e-06, "loss": 0.4253, "step": 3820 }, { "epoch": 0.20415234135550758, "grad_norm": 1.651829324425076, "learning_rate": 2.997327830214714e-06, "loss": 0.4306, "step": 3830 }, { "epoch": 0.20468537618933397, "grad_norm": 1.8722112674795424, "learning_rate": 2.997274985591193e-06, "loss": 0.4612, "step": 3840 }, { "epoch": 0.20521841102316035, "grad_norm": 1.586839450943598, "learning_rate": 2.9972216240207976e-06, "loss": 0.4613, "step": 3850 }, { "epoch": 0.20575144585698676, "grad_norm": 1.65561725732763, "learning_rate": 2.9971677455219515e-06, "loss": 0.4753, "step": 3860 }, { "epoch": 0.20628448069081315, "grad_norm": 1.6654730662916013, "learning_rate": 2.9971133501132568e-06, "loss": 0.4293, "step": 3870 }, { "epoch": 0.20681751552463953, "grad_norm": 1.7221959528288986, "learning_rate": 2.9970584378134934e-06, "loss": 0.4348, "step": 3880 }, { "epoch": 0.20735055035846592, "grad_norm": 1.5910407161008908, "learning_rate": 2.9970030086416204e-06, "loss": 0.4511, "step": 3890 }, { "epoch": 0.2078835851922923, "grad_norm": 1.6141970451791834, "learning_rate": 2.9969470626167754e-06, "loss": 0.4414, "step": 3900 }, { "epoch": 0.20841662002611872, "grad_norm": 1.7867460769140482, "learning_rate": 2.996890599758274e-06, "loss": 0.445, "step": 3910 }, { "epoch": 0.2089496548599451, "grad_norm": 1.595718696260754, "learning_rate": 2.9968336200856107e-06, "loss": 0.4353, "step": 3920 }, { "epoch": 0.20948268969377148, "grad_norm": 1.676254990466111, "learning_rate": 2.9967761236184587e-06, "loss": 0.452, "step": 3930 }, { "epoch": 0.21001572452759787, "grad_norm": 1.5269354127652917, "learning_rate": 2.9967181103766682e-06, "loss": 0.4371, "step": 3940 }, { "epoch": 0.21054875936142428, "grad_norm": 1.7744320905768576, "learning_rate": 2.9966595803802686e-06, "loss": 0.4502, "step": 3950 }, { "epoch": 0.21108179419525067, "grad_norm": 1.4757823489660078, "learning_rate": 2.9966005336494692e-06, "loss": 0.4679, "step": 3960 }, { "epoch": 0.21161482902907705, "grad_norm": 1.4989736270803655, "learning_rate": 2.9965409702046552e-06, "loss": 0.4606, "step": 3970 }, { "epoch": 0.21214786386290344, "grad_norm": 1.704780250787622, "learning_rate": 2.9964808900663917e-06, "loss": 0.4323, "step": 3980 }, { "epoch": 0.21268089869672982, "grad_norm": 1.5334460115950397, "learning_rate": 2.9964202932554224e-06, "loss": 0.4456, "step": 3990 }, { "epoch": 0.21321393353055623, "grad_norm": 1.7155779812422818, "learning_rate": 2.9963591797926683e-06, "loss": 0.4332, "step": 4000 }, { "epoch": 0.21374696836438262, "grad_norm": 1.5465873446480514, "learning_rate": 2.996297549699229e-06, "loss": 0.4542, "step": 4010 }, { "epoch": 0.214280003198209, "grad_norm": 1.5309415649407119, "learning_rate": 2.9962354029963836e-06, "loss": 0.4527, "step": 4020 }, { "epoch": 0.21481303803203539, "grad_norm": 1.7228835864420653, "learning_rate": 2.9961727397055887e-06, "loss": 0.4423, "step": 4030 }, { "epoch": 0.21534607286586177, "grad_norm": 1.4923330650311342, "learning_rate": 2.996109559848479e-06, "loss": 0.4575, "step": 4040 }, { "epoch": 0.21587910769968818, "grad_norm": 1.5728391343623689, "learning_rate": 2.9960458634468673e-06, "loss": 0.4594, "step": 4050 }, { "epoch": 0.21641214253351457, "grad_norm": 1.708177585976328, "learning_rate": 2.9959816505227467e-06, "loss": 0.4394, "step": 4060 }, { "epoch": 0.21694517736734095, "grad_norm": 1.6196290389455337, "learning_rate": 2.9959169210982866e-06, "loss": 0.45, "step": 4070 }, { "epoch": 0.21747821220116734, "grad_norm": 1.6243128885757592, "learning_rate": 2.9958516751958354e-06, "loss": 0.4427, "step": 4080 }, { "epoch": 0.21801124703499375, "grad_norm": 1.6786587625364946, "learning_rate": 2.995785912837919e-06, "loss": 0.4522, "step": 4090 }, { "epoch": 0.21854428186882013, "grad_norm": 1.5079021102365517, "learning_rate": 2.995719634047244e-06, "loss": 0.4559, "step": 4100 }, { "epoch": 0.21907731670264652, "grad_norm": 1.6724642613778926, "learning_rate": 2.9956528388466922e-06, "loss": 0.4484, "step": 4110 }, { "epoch": 0.2196103515364729, "grad_norm": 1.8030881030305077, "learning_rate": 2.995585527259326e-06, "loss": 0.4567, "step": 4120 }, { "epoch": 0.2201433863702993, "grad_norm": 1.6748168171675852, "learning_rate": 2.9955176993083855e-06, "loss": 0.4387, "step": 4130 }, { "epoch": 0.2206764212041257, "grad_norm": 1.648500665231352, "learning_rate": 2.9954493550172885e-06, "loss": 0.4384, "step": 4140 }, { "epoch": 0.22120945603795208, "grad_norm": 1.6012217561678899, "learning_rate": 2.995380494409631e-06, "loss": 0.4548, "step": 4150 }, { "epoch": 0.22174249087177847, "grad_norm": 1.6559612740139638, "learning_rate": 2.9953111175091887e-06, "loss": 0.4647, "step": 4160 }, { "epoch": 0.22227552570560485, "grad_norm": 1.6208015918349856, "learning_rate": 2.995241224339914e-06, "loss": 0.4442, "step": 4170 }, { "epoch": 0.22280856053943124, "grad_norm": 1.8070273610482273, "learning_rate": 2.995170814925938e-06, "loss": 0.4518, "step": 4180 }, { "epoch": 0.22334159537325765, "grad_norm": 2.08914606239165, "learning_rate": 2.99509988929157e-06, "loss": 0.4485, "step": 4190 }, { "epoch": 0.22387463020708404, "grad_norm": 1.579705453532804, "learning_rate": 2.9950284474612985e-06, "loss": 0.4445, "step": 4200 }, { "epoch": 0.22440766504091042, "grad_norm": 1.6207813632705006, "learning_rate": 2.9949564894597887e-06, "loss": 0.4431, "step": 4210 }, { "epoch": 0.2249406998747368, "grad_norm": 1.5802741216513316, "learning_rate": 2.994884015311885e-06, "loss": 0.4547, "step": 4220 }, { "epoch": 0.22547373470856322, "grad_norm": 1.6297823572798982, "learning_rate": 2.9948110250426093e-06, "loss": 0.4523, "step": 4230 }, { "epoch": 0.2260067695423896, "grad_norm": 1.8281204535912232, "learning_rate": 2.994737518677163e-06, "loss": 0.4485, "step": 4240 }, { "epoch": 0.226539804376216, "grad_norm": 1.5153505884941239, "learning_rate": 2.9946634962409237e-06, "loss": 0.4512, "step": 4250 }, { "epoch": 0.22707283921004237, "grad_norm": 1.9042971464522553, "learning_rate": 2.994588957759449e-06, "loss": 0.4583, "step": 4260 }, { "epoch": 0.22760587404386876, "grad_norm": 1.552844823810127, "learning_rate": 2.994513903258474e-06, "loss": 0.4529, "step": 4270 }, { "epoch": 0.22813890887769517, "grad_norm": 1.551437621496883, "learning_rate": 2.9944383327639113e-06, "loss": 0.4648, "step": 4280 }, { "epoch": 0.22867194371152155, "grad_norm": 1.5646925445220212, "learning_rate": 2.994362246301852e-06, "loss": 0.4472, "step": 4290 }, { "epoch": 0.22920497854534794, "grad_norm": 1.6304750433301312, "learning_rate": 2.9942856438985673e-06, "loss": 0.4499, "step": 4300 }, { "epoch": 0.22973801337917432, "grad_norm": 1.6472194227955004, "learning_rate": 2.994208525580503e-06, "loss": 0.4442, "step": 4310 }, { "epoch": 0.2302710482130007, "grad_norm": 1.4959734612979503, "learning_rate": 2.9941308913742854e-06, "loss": 0.4572, "step": 4320 }, { "epoch": 0.23080408304682712, "grad_norm": 1.810912152153463, "learning_rate": 2.994052741306719e-06, "loss": 0.4306, "step": 4330 }, { "epoch": 0.2313371178806535, "grad_norm": 1.5520143659917245, "learning_rate": 2.993974075404785e-06, "loss": 0.441, "step": 4340 }, { "epoch": 0.2318701527144799, "grad_norm": 1.6303210751414012, "learning_rate": 2.9938948936956443e-06, "loss": 0.4414, "step": 4350 }, { "epoch": 0.23240318754830627, "grad_norm": 1.5863261623737483, "learning_rate": 2.9938151962066338e-06, "loss": 0.4489, "step": 4360 }, { "epoch": 0.23293622238213268, "grad_norm": 1.5723471828450144, "learning_rate": 2.9937349829652706e-06, "loss": 0.455, "step": 4370 }, { "epoch": 0.23346925721595907, "grad_norm": 1.720463131126141, "learning_rate": 2.993654253999249e-06, "loss": 0.4405, "step": 4380 }, { "epoch": 0.23400229204978545, "grad_norm": 1.5772912374693178, "learning_rate": 2.9935730093364417e-06, "loss": 0.4402, "step": 4390 }, { "epoch": 0.23453532688361184, "grad_norm": 1.6987453469415965, "learning_rate": 2.993491249004898e-06, "loss": 0.4598, "step": 4400 }, { "epoch": 0.23506836171743822, "grad_norm": 1.5128089543682928, "learning_rate": 2.9934089730328474e-06, "loss": 0.441, "step": 4410 }, { "epoch": 0.23560139655126464, "grad_norm": 1.785023375618966, "learning_rate": 2.9933261814486956e-06, "loss": 0.4537, "step": 4420 }, { "epoch": 0.23613443138509102, "grad_norm": 1.567988135815809, "learning_rate": 2.9932428742810276e-06, "loss": 0.4439, "step": 4430 }, { "epoch": 0.2366674662189174, "grad_norm": 1.8560104630453909, "learning_rate": 2.9931590515586057e-06, "loss": 0.4392, "step": 4440 }, { "epoch": 0.2372005010527438, "grad_norm": 1.714821979884251, "learning_rate": 2.9930747133103705e-06, "loss": 0.4429, "step": 4450 }, { "epoch": 0.23773353588657017, "grad_norm": 1.5661715064623511, "learning_rate": 2.99298985956544e-06, "loss": 0.4616, "step": 4460 }, { "epoch": 0.2382665707203966, "grad_norm": 1.6532985857875688, "learning_rate": 2.9929044903531117e-06, "loss": 0.4494, "step": 4470 }, { "epoch": 0.23879960555422297, "grad_norm": 1.5534915918437062, "learning_rate": 2.9928186057028595e-06, "loss": 0.4468, "step": 4480 }, { "epoch": 0.23933264038804936, "grad_norm": 1.7257876945892578, "learning_rate": 2.9927322056443354e-06, "loss": 0.433, "step": 4490 }, { "epoch": 0.23986567522187574, "grad_norm": 1.7097647077243794, "learning_rate": 2.9926452902073707e-06, "loss": 0.4346, "step": 4500 }, { "epoch": 0.24039871005570215, "grad_norm": 1.7245345610777312, "learning_rate": 2.9925578594219724e-06, "loss": 0.4433, "step": 4510 }, { "epoch": 0.24093174488952854, "grad_norm": 1.94640057153672, "learning_rate": 2.9924699133183283e-06, "loss": 0.4314, "step": 4520 }, { "epoch": 0.24146477972335492, "grad_norm": 1.5639012383831647, "learning_rate": 2.992381451926801e-06, "loss": 0.4424, "step": 4530 }, { "epoch": 0.2419978145571813, "grad_norm": 1.5119201408008984, "learning_rate": 2.992292475277933e-06, "loss": 0.4499, "step": 4540 }, { "epoch": 0.2425308493910077, "grad_norm": 1.5661485849848098, "learning_rate": 2.992202983402445e-06, "loss": 0.4517, "step": 4550 }, { "epoch": 0.2430638842248341, "grad_norm": 1.7286594213343105, "learning_rate": 2.992112976331234e-06, "loss": 0.4335, "step": 4560 }, { "epoch": 0.2435969190586605, "grad_norm": 1.65235104393594, "learning_rate": 2.992022454095376e-06, "loss": 0.4532, "step": 4570 }, { "epoch": 0.24412995389248687, "grad_norm": 1.649496576108496, "learning_rate": 2.991931416726125e-06, "loss": 0.4502, "step": 4580 }, { "epoch": 0.24466298872631326, "grad_norm": 1.671076820394612, "learning_rate": 2.9918398642549112e-06, "loss": 0.455, "step": 4590 }, { "epoch": 0.24519602356013964, "grad_norm": 1.5682819727388604, "learning_rate": 2.9917477967133456e-06, "loss": 0.4507, "step": 4600 }, { "epoch": 0.24572905839396605, "grad_norm": 1.6792524816544012, "learning_rate": 2.9916552141332138e-06, "loss": 0.4318, "step": 4610 }, { "epoch": 0.24626209322779244, "grad_norm": 1.7151732533704052, "learning_rate": 2.9915621165464816e-06, "loss": 0.4334, "step": 4620 }, { "epoch": 0.24679512806161882, "grad_norm": 1.6050371790176439, "learning_rate": 2.9914685039852906e-06, "loss": 0.4441, "step": 4630 }, { "epoch": 0.2473281628954452, "grad_norm": 1.6422256225869465, "learning_rate": 2.991374376481963e-06, "loss": 0.4362, "step": 4640 }, { "epoch": 0.24786119772927162, "grad_norm": 1.5457940878568954, "learning_rate": 2.9912797340689955e-06, "loss": 0.4355, "step": 4650 }, { "epoch": 0.248394232563098, "grad_norm": 1.69589490670946, "learning_rate": 2.9911845767790655e-06, "loss": 0.4207, "step": 4660 }, { "epoch": 0.2489272673969244, "grad_norm": 1.6914609782763563, "learning_rate": 2.991088904645026e-06, "loss": 0.4559, "step": 4670 }, { "epoch": 0.24946030223075077, "grad_norm": 1.574786275327489, "learning_rate": 2.9909927176999097e-06, "loss": 0.4634, "step": 4680 }, { "epoch": 0.24999333706457716, "grad_norm": 1.6916817167859033, "learning_rate": 2.9908960159769243e-06, "loss": 0.4409, "step": 4690 }, { "epoch": 0.25052637189840354, "grad_norm": 1.6346762114793119, "learning_rate": 2.990798799509458e-06, "loss": 0.4315, "step": 4700 }, { "epoch": 0.25105940673222993, "grad_norm": 1.6446348908077835, "learning_rate": 2.990701068331075e-06, "loss": 0.438, "step": 4710 }, { "epoch": 0.25159244156605637, "grad_norm": 1.8249833266882203, "learning_rate": 2.9906028224755193e-06, "loss": 0.4295, "step": 4720 }, { "epoch": 0.25212547639988275, "grad_norm": 1.7045165285792323, "learning_rate": 2.990504061976709e-06, "loss": 0.4274, "step": 4730 }, { "epoch": 0.25265851123370914, "grad_norm": 1.5419183954887759, "learning_rate": 2.9904047868687432e-06, "loss": 0.4424, "step": 4740 }, { "epoch": 0.2531915460675355, "grad_norm": 1.5816311318418992, "learning_rate": 2.9903049971858977e-06, "loss": 0.4435, "step": 4750 }, { "epoch": 0.2537245809013619, "grad_norm": 1.4922048988802745, "learning_rate": 2.990204692962625e-06, "loss": 0.4411, "step": 4760 }, { "epoch": 0.2542576157351883, "grad_norm": 1.6124055351644564, "learning_rate": 2.9901038742335556e-06, "loss": 0.4254, "step": 4770 }, { "epoch": 0.2547906505690147, "grad_norm": 1.562286493859035, "learning_rate": 2.9900025410334997e-06, "loss": 0.4473, "step": 4780 }, { "epoch": 0.25532368540284106, "grad_norm": 1.6138582673418314, "learning_rate": 2.9899006933974425e-06, "loss": 0.4477, "step": 4790 }, { "epoch": 0.25585672023666745, "grad_norm": 1.6158155095198716, "learning_rate": 2.9897983313605473e-06, "loss": 0.4458, "step": 4800 }, { "epoch": 0.25638975507049383, "grad_norm": 1.6217362897850136, "learning_rate": 2.989695454958156e-06, "loss": 0.4454, "step": 4810 }, { "epoch": 0.25692278990432027, "grad_norm": 1.4371598050105188, "learning_rate": 2.989592064225787e-06, "loss": 0.4192, "step": 4820 }, { "epoch": 0.25745582473814665, "grad_norm": 1.7616371896755234, "learning_rate": 2.9894881591991377e-06, "loss": 0.4425, "step": 4830 }, { "epoch": 0.25798885957197304, "grad_norm": 1.6407524182793443, "learning_rate": 2.9893837399140816e-06, "loss": 0.4427, "step": 4840 }, { "epoch": 0.2585218944057994, "grad_norm": 1.557889807385073, "learning_rate": 2.989278806406671e-06, "loss": 0.4446, "step": 4850 }, { "epoch": 0.2590549292396258, "grad_norm": 1.8828968253643723, "learning_rate": 2.9891733587131342e-06, "loss": 0.4489, "step": 4860 }, { "epoch": 0.2595879640734522, "grad_norm": 1.4840557364856535, "learning_rate": 2.989067396869878e-06, "loss": 0.4386, "step": 4870 }, { "epoch": 0.2601209989072786, "grad_norm": 1.6513095004826863, "learning_rate": 2.988960920913487e-06, "loss": 0.4395, "step": 4880 }, { "epoch": 0.26065403374110496, "grad_norm": 1.594827034124819, "learning_rate": 2.988853930880723e-06, "loss": 0.4385, "step": 4890 }, { "epoch": 0.26118706857493135, "grad_norm": 1.522785754866806, "learning_rate": 2.988746426808525e-06, "loss": 0.4385, "step": 4900 }, { "epoch": 0.2617201034087578, "grad_norm": 1.6824409079130382, "learning_rate": 2.98863840873401e-06, "loss": 0.4338, "step": 4910 }, { "epoch": 0.26225313824258417, "grad_norm": 1.5677604985204645, "learning_rate": 2.9885298766944715e-06, "loss": 0.43, "step": 4920 }, { "epoch": 0.26278617307641056, "grad_norm": 1.7230004988468286, "learning_rate": 2.9884208307273815e-06, "loss": 0.4392, "step": 4930 }, { "epoch": 0.26331920791023694, "grad_norm": 1.6514760693400496, "learning_rate": 2.988311270870389e-06, "loss": 0.4458, "step": 4940 }, { "epoch": 0.2638522427440633, "grad_norm": 1.702580415570755, "learning_rate": 2.988201197161321e-06, "loss": 0.4327, "step": 4950 }, { "epoch": 0.2643852775778897, "grad_norm": 1.6407046943697028, "learning_rate": 2.9880906096381804e-06, "loss": 0.4417, "step": 4960 }, { "epoch": 0.2649183124117161, "grad_norm": 1.9061693114597238, "learning_rate": 2.9879795083391493e-06, "loss": 0.4429, "step": 4970 }, { "epoch": 0.2654513472455425, "grad_norm": 1.484711555285733, "learning_rate": 2.987867893302586e-06, "loss": 0.4344, "step": 4980 }, { "epoch": 0.26598438207936886, "grad_norm": 1.5815148651198652, "learning_rate": 2.9877557645670264e-06, "loss": 0.4335, "step": 4990 }, { "epoch": 0.2665174169131953, "grad_norm": 1.7081891699885108, "learning_rate": 2.987643122171184e-06, "loss": 0.4439, "step": 5000 }, { "epoch": 0.2670504517470217, "grad_norm": 1.5648904132118493, "learning_rate": 2.9875299661539498e-06, "loss": 0.4442, "step": 5010 }, { "epoch": 0.2675834865808481, "grad_norm": 1.7643629619545491, "learning_rate": 2.9874162965543913e-06, "loss": 0.4484, "step": 5020 }, { "epoch": 0.26811652141467446, "grad_norm": 1.6310592892945788, "learning_rate": 2.987302113411755e-06, "loss": 0.4577, "step": 5030 }, { "epoch": 0.26864955624850084, "grad_norm": 1.4743085963469467, "learning_rate": 2.9871874167654625e-06, "loss": 0.4377, "step": 5040 }, { "epoch": 0.2691825910823272, "grad_norm": 1.7405038010027776, "learning_rate": 2.987072206655114e-06, "loss": 0.445, "step": 5050 }, { "epoch": 0.2697156259161536, "grad_norm": 1.7380779059647404, "learning_rate": 2.9869564831204872e-06, "loss": 0.4582, "step": 5060 }, { "epoch": 0.27024866074998, "grad_norm": 1.5999970454550323, "learning_rate": 2.9868402462015363e-06, "loss": 0.4376, "step": 5070 }, { "epoch": 0.2707816955838064, "grad_norm": 1.4936113294612414, "learning_rate": 2.986723495938393e-06, "loss": 0.4299, "step": 5080 }, { "epoch": 0.27131473041763277, "grad_norm": 1.6148393782948467, "learning_rate": 2.9866062323713664e-06, "loss": 0.4367, "step": 5090 }, { "epoch": 0.2718477652514592, "grad_norm": 1.3821227021650941, "learning_rate": 2.986488455540943e-06, "loss": 0.4159, "step": 5100 }, { "epoch": 0.2723808000852856, "grad_norm": 1.787216851872511, "learning_rate": 2.9863701654877864e-06, "loss": 0.4411, "step": 5110 }, { "epoch": 0.272913834919112, "grad_norm": 1.4371472591681986, "learning_rate": 2.9862513622527365e-06, "loss": 0.4388, "step": 5120 }, { "epoch": 0.27344686975293836, "grad_norm": 1.8033827313505777, "learning_rate": 2.986132045876811e-06, "loss": 0.452, "step": 5130 }, { "epoch": 0.27397990458676474, "grad_norm": 1.7330518783082376, "learning_rate": 2.986012216401206e-06, "loss": 0.444, "step": 5140 }, { "epoch": 0.27451293942059113, "grad_norm": 1.6235747832288199, "learning_rate": 2.9858918738672933e-06, "loss": 0.45, "step": 5150 }, { "epoch": 0.2750459742544175, "grad_norm": 1.5854865507159814, "learning_rate": 2.9857710183166213e-06, "loss": 0.434, "step": 5160 }, { "epoch": 0.2755790090882439, "grad_norm": 1.594168237598214, "learning_rate": 2.9856496497909177e-06, "loss": 0.4301, "step": 5170 }, { "epoch": 0.2761120439220703, "grad_norm": 1.5168110000684847, "learning_rate": 2.9855277683320847e-06, "loss": 0.4383, "step": 5180 }, { "epoch": 0.2766450787558967, "grad_norm": 1.5571375377001808, "learning_rate": 2.985405373982204e-06, "loss": 0.4524, "step": 5190 }, { "epoch": 0.2771781135897231, "grad_norm": 1.687513312399059, "learning_rate": 2.9852824667835323e-06, "loss": 0.4493, "step": 5200 }, { "epoch": 0.2777111484235495, "grad_norm": 1.6539174041403841, "learning_rate": 2.9851590467785056e-06, "loss": 0.4417, "step": 5210 }, { "epoch": 0.2782441832573759, "grad_norm": 1.5525801128997774, "learning_rate": 2.9850351140097344e-06, "loss": 0.4476, "step": 5220 }, { "epoch": 0.27877721809120226, "grad_norm": 1.6108736999393107, "learning_rate": 2.9849106685200085e-06, "loss": 0.4337, "step": 5230 }, { "epoch": 0.27931025292502865, "grad_norm": 1.6967534154613837, "learning_rate": 2.984785710352293e-06, "loss": 0.455, "step": 5240 }, { "epoch": 0.27984328775885503, "grad_norm": 1.5422861418485796, "learning_rate": 2.9846602395497314e-06, "loss": 0.4369, "step": 5250 }, { "epoch": 0.2803763225926814, "grad_norm": 1.5532485024698688, "learning_rate": 2.9845342561556433e-06, "loss": 0.4166, "step": 5260 }, { "epoch": 0.2809093574265078, "grad_norm": 1.5971608274093063, "learning_rate": 2.9844077602135255e-06, "loss": 0.4531, "step": 5270 }, { "epoch": 0.28144239226033424, "grad_norm": 1.698567222653232, "learning_rate": 2.9842807517670524e-06, "loss": 0.4284, "step": 5280 }, { "epoch": 0.2819754270941606, "grad_norm": 1.5620879819533362, "learning_rate": 2.984153230860073e-06, "loss": 0.4339, "step": 5290 }, { "epoch": 0.282508461927987, "grad_norm": 1.9091119340297336, "learning_rate": 2.984025197536617e-06, "loss": 0.4467, "step": 5300 }, { "epoch": 0.2830414967618134, "grad_norm": 1.5726783217485447, "learning_rate": 2.983896651840888e-06, "loss": 0.4257, "step": 5310 }, { "epoch": 0.2835745315956398, "grad_norm": 1.5237104351967512, "learning_rate": 2.9837675938172676e-06, "loss": 0.4316, "step": 5320 }, { "epoch": 0.28410756642946616, "grad_norm": 1.6206240469613897, "learning_rate": 2.983638023510314e-06, "loss": 0.4405, "step": 5330 }, { "epoch": 0.28464060126329255, "grad_norm": 1.8390155270157214, "learning_rate": 2.983507940964763e-06, "loss": 0.4296, "step": 5340 }, { "epoch": 0.28517363609711893, "grad_norm": 1.5588583841602022, "learning_rate": 2.983377346225526e-06, "loss": 0.4359, "step": 5350 }, { "epoch": 0.2857066709309453, "grad_norm": 1.6656545852820501, "learning_rate": 2.9832462393376928e-06, "loss": 0.4506, "step": 5360 }, { "epoch": 0.2862397057647717, "grad_norm": 1.4371177693550143, "learning_rate": 2.9831146203465288e-06, "loss": 0.4294, "step": 5370 }, { "epoch": 0.28677274059859814, "grad_norm": 1.6977410718840877, "learning_rate": 2.982982489297476e-06, "loss": 0.4308, "step": 5380 }, { "epoch": 0.2873057754324245, "grad_norm": 1.6767286162428785, "learning_rate": 2.982849846236154e-06, "loss": 0.4405, "step": 5390 }, { "epoch": 0.2878388102662509, "grad_norm": 1.670718264926009, "learning_rate": 2.9827166912083594e-06, "loss": 0.44, "step": 5400 }, { "epoch": 0.2883718451000773, "grad_norm": 1.5229507938665452, "learning_rate": 2.982583024260065e-06, "loss": 0.444, "step": 5410 }, { "epoch": 0.2889048799339037, "grad_norm": 1.5235669298931718, "learning_rate": 2.9824488454374197e-06, "loss": 0.4299, "step": 5420 }, { "epoch": 0.28943791476773006, "grad_norm": 1.6380394830128373, "learning_rate": 2.9823141547867506e-06, "loss": 0.4298, "step": 5430 }, { "epoch": 0.28997094960155645, "grad_norm": 1.6540862277102244, "learning_rate": 2.982178952354561e-06, "loss": 0.4331, "step": 5440 }, { "epoch": 0.29050398443538283, "grad_norm": 1.677499163271086, "learning_rate": 2.9820432381875305e-06, "loss": 0.4176, "step": 5450 }, { "epoch": 0.2910370192692092, "grad_norm": 1.6015713385649804, "learning_rate": 2.981907012332515e-06, "loss": 0.4277, "step": 5460 }, { "epoch": 0.29157005410303566, "grad_norm": 1.4559848203782635, "learning_rate": 2.981770274836548e-06, "loss": 0.433, "step": 5470 }, { "epoch": 0.29210308893686204, "grad_norm": 1.664718914012717, "learning_rate": 2.9816330257468398e-06, "loss": 0.4385, "step": 5480 }, { "epoch": 0.2926361237706884, "grad_norm": 1.8270615353833766, "learning_rate": 2.981495265110776e-06, "loss": 0.4252, "step": 5490 }, { "epoch": 0.2931691586045148, "grad_norm": 1.530178399595827, "learning_rate": 2.9813569929759202e-06, "loss": 0.4336, "step": 5500 }, { "epoch": 0.2937021934383412, "grad_norm": 1.6294386231734062, "learning_rate": 2.9812182093900115e-06, "loss": 0.4283, "step": 5510 }, { "epoch": 0.2942352282721676, "grad_norm": 1.4422882137266106, "learning_rate": 2.981078914400966e-06, "loss": 0.4325, "step": 5520 }, { "epoch": 0.29476826310599397, "grad_norm": 1.677668765720838, "learning_rate": 2.9809391080568775e-06, "loss": 0.4362, "step": 5530 }, { "epoch": 0.29530129793982035, "grad_norm": 1.513264583171719, "learning_rate": 2.9807987904060146e-06, "loss": 0.4271, "step": 5540 }, { "epoch": 0.29583433277364674, "grad_norm": 1.6348742300180483, "learning_rate": 2.980657961496823e-06, "loss": 0.4361, "step": 5550 }, { "epoch": 0.2963673676074732, "grad_norm": 1.5907611755718645, "learning_rate": 2.9805166213779255e-06, "loss": 0.4386, "step": 5560 }, { "epoch": 0.29690040244129956, "grad_norm": 1.5621826881478291, "learning_rate": 2.9803747700981203e-06, "loss": 0.4259, "step": 5570 }, { "epoch": 0.29743343727512594, "grad_norm": 1.5964323571177013, "learning_rate": 2.980232407706384e-06, "loss": 0.441, "step": 5580 }, { "epoch": 0.29796647210895233, "grad_norm": 1.6482586306304368, "learning_rate": 2.9800895342518674e-06, "loss": 0.4339, "step": 5590 }, { "epoch": 0.2984995069427787, "grad_norm": 1.7961368679894048, "learning_rate": 2.9799461497838986e-06, "loss": 0.4321, "step": 5600 }, { "epoch": 0.2990325417766051, "grad_norm": 1.4773957185360056, "learning_rate": 2.9798022543519834e-06, "loss": 0.4178, "step": 5610 }, { "epoch": 0.2995655766104315, "grad_norm": 1.6191512300986841, "learning_rate": 2.9796578480058018e-06, "loss": 0.4298, "step": 5620 }, { "epoch": 0.30009861144425787, "grad_norm": 1.5817940874948935, "learning_rate": 2.9795129307952124e-06, "loss": 0.4337, "step": 5630 }, { "epoch": 0.30063164627808425, "grad_norm": 1.648708283015776, "learning_rate": 2.979367502770248e-06, "loss": 0.4401, "step": 5640 }, { "epoch": 0.30116468111191064, "grad_norm": 1.787315503059311, "learning_rate": 2.9792215639811193e-06, "loss": 0.4456, "step": 5650 }, { "epoch": 0.3016977159457371, "grad_norm": 1.6223314451023445, "learning_rate": 2.9790751144782125e-06, "loss": 0.4351, "step": 5660 }, { "epoch": 0.30223075077956346, "grad_norm": 1.431844924729601, "learning_rate": 2.9789281543120912e-06, "loss": 0.4079, "step": 5670 }, { "epoch": 0.30276378561338985, "grad_norm": 1.6867517408716806, "learning_rate": 2.978780683533495e-06, "loss": 0.4316, "step": 5680 }, { "epoch": 0.30329682044721623, "grad_norm": 1.6110745018766008, "learning_rate": 2.9786327021933382e-06, "loss": 0.4344, "step": 5690 }, { "epoch": 0.3038298552810426, "grad_norm": 1.542450302892925, "learning_rate": 2.978484210342714e-06, "loss": 0.4345, "step": 5700 }, { "epoch": 0.304362890114869, "grad_norm": 1.563208533037698, "learning_rate": 2.9783352080328888e-06, "loss": 0.4216, "step": 5710 }, { "epoch": 0.3048959249486954, "grad_norm": 1.6242438533234171, "learning_rate": 2.9781856953153085e-06, "loss": 0.4292, "step": 5720 }, { "epoch": 0.30542895978252177, "grad_norm": 1.493895541562595, "learning_rate": 2.9780356722415928e-06, "loss": 0.4229, "step": 5730 }, { "epoch": 0.30596199461634815, "grad_norm": 1.6046335270551686, "learning_rate": 2.9778851388635382e-06, "loss": 0.4376, "step": 5740 }, { "epoch": 0.3064950294501746, "grad_norm": 1.7383356432998933, "learning_rate": 2.9777340952331187e-06, "loss": 0.436, "step": 5750 }, { "epoch": 0.307028064284001, "grad_norm": 1.6825344903169306, "learning_rate": 2.9775825414024823e-06, "loss": 0.4342, "step": 5760 }, { "epoch": 0.30756109911782736, "grad_norm": 1.6270778665885282, "learning_rate": 2.977430477423955e-06, "loss": 0.4401, "step": 5770 }, { "epoch": 0.30809413395165375, "grad_norm": 1.5179321119960736, "learning_rate": 2.9772779033500375e-06, "loss": 0.4424, "step": 5780 }, { "epoch": 0.30862716878548013, "grad_norm": 1.7945348767052742, "learning_rate": 2.9771248192334077e-06, "loss": 0.4428, "step": 5790 }, { "epoch": 0.3091602036193065, "grad_norm": 1.800422032662795, "learning_rate": 2.976971225126919e-06, "loss": 0.4229, "step": 5800 }, { "epoch": 0.3096932384531329, "grad_norm": 1.681592093147011, "learning_rate": 2.9768171210836015e-06, "loss": 0.4365, "step": 5810 }, { "epoch": 0.3102262732869593, "grad_norm": 1.6179092632145686, "learning_rate": 2.97666250715666e-06, "loss": 0.4355, "step": 5820 }, { "epoch": 0.31075930812078567, "grad_norm": 1.4760324448495825, "learning_rate": 2.9765073833994777e-06, "loss": 0.4359, "step": 5830 }, { "epoch": 0.3112923429546121, "grad_norm": 1.5392970132003494, "learning_rate": 2.9763517498656107e-06, "loss": 0.4196, "step": 5840 }, { "epoch": 0.3118253777884385, "grad_norm": 1.7311081029836572, "learning_rate": 2.9761956066087947e-06, "loss": 0.4139, "step": 5850 }, { "epoch": 0.3123584126222649, "grad_norm": 1.661098479249934, "learning_rate": 2.9760389536829374e-06, "loss": 0.4304, "step": 5860 }, { "epoch": 0.31289144745609127, "grad_norm": 1.4948310451170144, "learning_rate": 2.975881791142126e-06, "loss": 0.4164, "step": 5870 }, { "epoch": 0.31342448228991765, "grad_norm": 1.5948477699862678, "learning_rate": 2.975724119040622e-06, "loss": 0.4244, "step": 5880 }, { "epoch": 0.31395751712374403, "grad_norm": 1.405655037969923, "learning_rate": 2.9755659374328626e-06, "loss": 0.423, "step": 5890 }, { "epoch": 0.3144905519575704, "grad_norm": 1.751875042948595, "learning_rate": 2.975407246373462e-06, "loss": 0.4199, "step": 5900 }, { "epoch": 0.3150235867913968, "grad_norm": 1.6307903578015341, "learning_rate": 2.975248045917209e-06, "loss": 0.4355, "step": 5910 }, { "epoch": 0.3155566216252232, "grad_norm": 1.6219989949867764, "learning_rate": 2.975088336119069e-06, "loss": 0.4223, "step": 5920 }, { "epoch": 0.3160896564590496, "grad_norm": 1.5536610478037198, "learning_rate": 2.974928117034183e-06, "loss": 0.4347, "step": 5930 }, { "epoch": 0.316622691292876, "grad_norm": 1.641102860114325, "learning_rate": 2.9747673887178686e-06, "loss": 0.4384, "step": 5940 }, { "epoch": 0.3171557261267024, "grad_norm": 1.7237900940901483, "learning_rate": 2.974606151225618e-06, "loss": 0.4302, "step": 5950 }, { "epoch": 0.3176887609605288, "grad_norm": 1.6168013083983608, "learning_rate": 2.9744444046131002e-06, "loss": 0.4472, "step": 5960 }, { "epoch": 0.31822179579435517, "grad_norm": 1.5470949319976988, "learning_rate": 2.97428214893616e-06, "loss": 0.4418, "step": 5970 }, { "epoch": 0.31875483062818155, "grad_norm": 1.4250231775624402, "learning_rate": 2.9741193842508162e-06, "loss": 0.4311, "step": 5980 }, { "epoch": 0.31928786546200794, "grad_norm": 1.6151878496603973, "learning_rate": 2.973956110613266e-06, "loss": 0.4318, "step": 5990 }, { "epoch": 0.3198209002958343, "grad_norm": 1.5052112012216967, "learning_rate": 2.9737923280798806e-06, "loss": 0.4339, "step": 6000 }, { "epoch": 0.3203539351296607, "grad_norm": 1.5141431267749252, "learning_rate": 2.9736280367072067e-06, "loss": 0.4384, "step": 6010 }, { "epoch": 0.3208869699634871, "grad_norm": 1.5849168683837396, "learning_rate": 2.9734632365519678e-06, "loss": 0.4345, "step": 6020 }, { "epoch": 0.32142000479731353, "grad_norm": 1.5358308949250419, "learning_rate": 2.9732979276710633e-06, "loss": 0.423, "step": 6030 }, { "epoch": 0.3219530396311399, "grad_norm": 1.5538071002008005, "learning_rate": 2.973132110121566e-06, "loss": 0.4109, "step": 6040 }, { "epoch": 0.3224860744649663, "grad_norm": 1.5521549886236965, "learning_rate": 2.972965783960727e-06, "loss": 0.4385, "step": 6050 }, { "epoch": 0.3230191092987927, "grad_norm": 1.5698741983940294, "learning_rate": 2.972798949245971e-06, "loss": 0.4332, "step": 6060 }, { "epoch": 0.32355214413261907, "grad_norm": 1.6140370750755964, "learning_rate": 2.9726316060349005e-06, "loss": 0.4193, "step": 6070 }, { "epoch": 0.32408517896644545, "grad_norm": 1.464931396391657, "learning_rate": 2.9724637543852902e-06, "loss": 0.4364, "step": 6080 }, { "epoch": 0.32461821380027184, "grad_norm": 1.8637631354477113, "learning_rate": 2.9722953943550935e-06, "loss": 0.43, "step": 6090 }, { "epoch": 0.3251512486340982, "grad_norm": 1.549463043157198, "learning_rate": 2.9721265260024375e-06, "loss": 0.4284, "step": 6100 }, { "epoch": 0.3256842834679246, "grad_norm": 1.5134739981809822, "learning_rate": 2.9719571493856265e-06, "loss": 0.4274, "step": 6110 }, { "epoch": 0.32621731830175105, "grad_norm": 1.6343886349274972, "learning_rate": 2.971787264563138e-06, "loss": 0.4396, "step": 6120 }, { "epoch": 0.32675035313557743, "grad_norm": 1.6616364072107324, "learning_rate": 2.971616871593627e-06, "loss": 0.4334, "step": 6130 }, { "epoch": 0.3272833879694038, "grad_norm": 1.678041669454987, "learning_rate": 2.9714459705359226e-06, "loss": 0.428, "step": 6140 }, { "epoch": 0.3278164228032302, "grad_norm": 1.6647908349147245, "learning_rate": 2.97127456144903e-06, "loss": 0.4276, "step": 6150 }, { "epoch": 0.3283494576370566, "grad_norm": 1.8462658095224402, "learning_rate": 2.9711026443921298e-06, "loss": 0.4302, "step": 6160 }, { "epoch": 0.32888249247088297, "grad_norm": 1.5878897605642863, "learning_rate": 2.9709302194245776e-06, "loss": 0.4227, "step": 6170 }, { "epoch": 0.32941552730470935, "grad_norm": 1.706024186802193, "learning_rate": 2.970757286605905e-06, "loss": 0.4259, "step": 6180 }, { "epoch": 0.32994856213853574, "grad_norm": 1.7177152431099663, "learning_rate": 2.970583845995818e-06, "loss": 0.4139, "step": 6190 }, { "epoch": 0.3304815969723621, "grad_norm": 1.527582656257654, "learning_rate": 2.970409897654198e-06, "loss": 0.4187, "step": 6200 }, { "epoch": 0.3310146318061885, "grad_norm": 1.6131152731146399, "learning_rate": 2.970235441641104e-06, "loss": 0.4278, "step": 6210 }, { "epoch": 0.33154766664001495, "grad_norm": 1.9354559182132087, "learning_rate": 2.9700604780167667e-06, "loss": 0.438, "step": 6220 }, { "epoch": 0.33208070147384133, "grad_norm": 1.749048160771119, "learning_rate": 2.969885006841594e-06, "loss": 0.4481, "step": 6230 }, { "epoch": 0.3326137363076677, "grad_norm": 1.645947297635966, "learning_rate": 2.9697090281761696e-06, "loss": 0.4265, "step": 6240 }, { "epoch": 0.3331467711414941, "grad_norm": 1.6999800045470679, "learning_rate": 2.9695325420812506e-06, "loss": 0.4182, "step": 6250 }, { "epoch": 0.3336798059753205, "grad_norm": 1.7444989233639856, "learning_rate": 2.969355548617771e-06, "loss": 0.4438, "step": 6260 }, { "epoch": 0.33421284080914687, "grad_norm": 1.5392374214355635, "learning_rate": 2.969178047846839e-06, "loss": 0.4362, "step": 6270 }, { "epoch": 0.33474587564297326, "grad_norm": 1.4600387222080204, "learning_rate": 2.9690000398297383e-06, "loss": 0.4274, "step": 6280 }, { "epoch": 0.33527891047679964, "grad_norm": 1.5442277557133366, "learning_rate": 2.9688215246279284e-06, "loss": 0.423, "step": 6290 }, { "epoch": 0.335811945310626, "grad_norm": 1.501160320948494, "learning_rate": 2.9686425023030418e-06, "loss": 0.4089, "step": 6300 }, { "epoch": 0.33634498014445247, "grad_norm": 1.5531044566240355, "learning_rate": 2.9684629729168886e-06, "loss": 0.4383, "step": 6310 }, { "epoch": 0.33687801497827885, "grad_norm": 1.720673900539268, "learning_rate": 2.968282936531452e-06, "loss": 0.4306, "step": 6320 }, { "epoch": 0.33741104981210523, "grad_norm": 1.7350909267725416, "learning_rate": 2.968102393208892e-06, "loss": 0.4485, "step": 6330 }, { "epoch": 0.3379440846459316, "grad_norm": 1.5377549780201976, "learning_rate": 2.9679213430115425e-06, "loss": 0.4363, "step": 6340 }, { "epoch": 0.338477119479758, "grad_norm": 1.5606358850945456, "learning_rate": 2.967739786001912e-06, "loss": 0.441, "step": 6350 }, { "epoch": 0.3390101543135844, "grad_norm": 1.616705804620765, "learning_rate": 2.9675577222426846e-06, "loss": 0.435, "step": 6360 }, { "epoch": 0.3395431891474108, "grad_norm": 1.7178853286696927, "learning_rate": 2.96737515179672e-06, "loss": 0.4267, "step": 6370 }, { "epoch": 0.34007622398123716, "grad_norm": 1.6173103553224393, "learning_rate": 2.967192074727052e-06, "loss": 0.4235, "step": 6380 }, { "epoch": 0.34060925881506354, "grad_norm": 1.6221072249479735, "learning_rate": 2.9670084910968893e-06, "loss": 0.4413, "step": 6390 }, { "epoch": 0.34114229364889, "grad_norm": 1.49390225963427, "learning_rate": 2.966824400969616e-06, "loss": 0.4346, "step": 6400 }, { "epoch": 0.34167532848271637, "grad_norm": 1.825090247515385, "learning_rate": 2.96663980440879e-06, "loss": 0.4218, "step": 6410 }, { "epoch": 0.34220836331654275, "grad_norm": 1.7278060711820127, "learning_rate": 2.966454701478146e-06, "loss": 0.4207, "step": 6420 }, { "epoch": 0.34274139815036914, "grad_norm": 1.5568723067877832, "learning_rate": 2.966269092241592e-06, "loss": 0.4337, "step": 6430 }, { "epoch": 0.3432744329841955, "grad_norm": 1.6662446944834262, "learning_rate": 2.96608297676321e-06, "loss": 0.4208, "step": 6440 }, { "epoch": 0.3438074678180219, "grad_norm": 1.614855733975428, "learning_rate": 2.96589635510726e-06, "loss": 0.4136, "step": 6450 }, { "epoch": 0.3443405026518483, "grad_norm": 1.5933718003086859, "learning_rate": 2.9657092273381728e-06, "loss": 0.4203, "step": 6460 }, { "epoch": 0.3448735374856747, "grad_norm": 1.7060118616852338, "learning_rate": 2.965521593520557e-06, "loss": 0.4253, "step": 6470 }, { "epoch": 0.34540657231950106, "grad_norm": 1.6310484172430664, "learning_rate": 2.9653334537191944e-06, "loss": 0.4334, "step": 6480 }, { "epoch": 0.34593960715332744, "grad_norm": 1.466491307785643, "learning_rate": 2.9651448079990417e-06, "loss": 0.418, "step": 6490 }, { "epoch": 0.3464726419871539, "grad_norm": 1.5697743652363598, "learning_rate": 2.964955656425231e-06, "loss": 0.4306, "step": 6500 }, { "epoch": 0.34700567682098027, "grad_norm": 1.5972880783969272, "learning_rate": 2.9647659990630675e-06, "loss": 0.4185, "step": 6510 }, { "epoch": 0.34753871165480665, "grad_norm": 1.507300869963076, "learning_rate": 2.9645758359780326e-06, "loss": 0.4264, "step": 6520 }, { "epoch": 0.34807174648863304, "grad_norm": 1.5407374887590906, "learning_rate": 2.9643851672357822e-06, "loss": 0.425, "step": 6530 }, { "epoch": 0.3486047813224594, "grad_norm": 1.638447449847881, "learning_rate": 2.9641939929021457e-06, "loss": 0.4246, "step": 6540 }, { "epoch": 0.3491378161562858, "grad_norm": 1.6997729045022967, "learning_rate": 2.9640023130431273e-06, "loss": 0.446, "step": 6550 }, { "epoch": 0.3496708509901122, "grad_norm": 1.6622712567874156, "learning_rate": 2.963810127724907e-06, "loss": 0.4286, "step": 6560 }, { "epoch": 0.3502038858239386, "grad_norm": 1.5191238753665401, "learning_rate": 2.963617437013838e-06, "loss": 0.4094, "step": 6570 }, { "epoch": 0.35073692065776496, "grad_norm": 1.6061869639350372, "learning_rate": 2.9634242409764483e-06, "loss": 0.4241, "step": 6580 }, { "epoch": 0.3512699554915914, "grad_norm": 1.6408748077375865, "learning_rate": 2.96323053967944e-06, "loss": 0.4515, "step": 6590 }, { "epoch": 0.3518029903254178, "grad_norm": 1.5519063034535103, "learning_rate": 2.963036333189691e-06, "loss": 0.4272, "step": 6600 }, { "epoch": 0.35233602515924417, "grad_norm": 1.7048124018824924, "learning_rate": 2.962841621574253e-06, "loss": 0.4301, "step": 6610 }, { "epoch": 0.35286905999307056, "grad_norm": 1.633526500844667, "learning_rate": 2.9626464049003507e-06, "loss": 0.4432, "step": 6620 }, { "epoch": 0.35340209482689694, "grad_norm": 1.4921691584219783, "learning_rate": 2.962450683235385e-06, "loss": 0.4149, "step": 6630 }, { "epoch": 0.3539351296607233, "grad_norm": 1.766329625149137, "learning_rate": 2.9622544566469303e-06, "loss": 0.4264, "step": 6640 }, { "epoch": 0.3544681644945497, "grad_norm": 1.5875140369840268, "learning_rate": 2.9620577252027346e-06, "loss": 0.4376, "step": 6650 }, { "epoch": 0.3550011993283761, "grad_norm": 1.4506568654201621, "learning_rate": 2.9618604889707233e-06, "loss": 0.4296, "step": 6660 }, { "epoch": 0.3555342341622025, "grad_norm": 1.6275768055961044, "learning_rate": 2.9616627480189917e-06, "loss": 0.4236, "step": 6670 }, { "epoch": 0.35606726899602886, "grad_norm": 1.5301223863021702, "learning_rate": 2.961464502415813e-06, "loss": 0.4236, "step": 6680 }, { "epoch": 0.3566003038298553, "grad_norm": 1.5631224569949131, "learning_rate": 2.961265752229632e-06, "loss": 0.4322, "step": 6690 }, { "epoch": 0.3571333386636817, "grad_norm": 1.67832387761322, "learning_rate": 2.9610664975290697e-06, "loss": 0.4318, "step": 6700 }, { "epoch": 0.35766637349750807, "grad_norm": 1.5596583006537503, "learning_rate": 2.9608667383829204e-06, "loss": 0.4156, "step": 6710 }, { "epoch": 0.35819940833133446, "grad_norm": 1.7075693422562126, "learning_rate": 2.9606664748601527e-06, "loss": 0.4269, "step": 6720 }, { "epoch": 0.35873244316516084, "grad_norm": 1.707483865521375, "learning_rate": 2.9604657070299084e-06, "loss": 0.4156, "step": 6730 }, { "epoch": 0.3592654779989872, "grad_norm": 1.6991749329911974, "learning_rate": 2.9602644349615054e-06, "loss": 0.4376, "step": 6740 }, { "epoch": 0.3597985128328136, "grad_norm": 1.617189763978349, "learning_rate": 2.9600626587244344e-06, "loss": 0.4201, "step": 6750 }, { "epoch": 0.36033154766664, "grad_norm": 1.6928129337056317, "learning_rate": 2.9598603783883594e-06, "loss": 0.4264, "step": 6760 }, { "epoch": 0.3608645825004664, "grad_norm": 1.6973673604437498, "learning_rate": 2.9596575940231205e-06, "loss": 0.426, "step": 6770 }, { "epoch": 0.3613976173342928, "grad_norm": 1.6584859170233988, "learning_rate": 2.95945430569873e-06, "loss": 0.4243, "step": 6780 }, { "epoch": 0.3619306521681192, "grad_norm": 1.69323846893125, "learning_rate": 2.959250513485376e-06, "loss": 0.4212, "step": 6790 }, { "epoch": 0.3624636870019456, "grad_norm": 1.6544006172694286, "learning_rate": 2.959046217453418e-06, "loss": 0.425, "step": 6800 }, { "epoch": 0.362996721835772, "grad_norm": 1.7845355812314003, "learning_rate": 2.9588414176733913e-06, "loss": 0.4082, "step": 6810 }, { "epoch": 0.36352975666959836, "grad_norm": 1.6598532140728652, "learning_rate": 2.9586361142160055e-06, "loss": 0.433, "step": 6820 }, { "epoch": 0.36406279150342474, "grad_norm": 1.5263144632372705, "learning_rate": 2.958430307152143e-06, "loss": 0.4144, "step": 6830 }, { "epoch": 0.3645958263372511, "grad_norm": 1.4939023195672378, "learning_rate": 2.9582239965528606e-06, "loss": 0.4332, "step": 6840 }, { "epoch": 0.3651288611710775, "grad_norm": 1.5667616611848862, "learning_rate": 2.9580171824893887e-06, "loss": 0.4229, "step": 6850 }, { "epoch": 0.3656618960049039, "grad_norm": 1.5669429935745147, "learning_rate": 2.9578098650331312e-06, "loss": 0.4065, "step": 6860 }, { "epoch": 0.36619493083873034, "grad_norm": 1.433683483555676, "learning_rate": 2.957602044255667e-06, "loss": 0.403, "step": 6870 }, { "epoch": 0.3667279656725567, "grad_norm": 1.4912206002853188, "learning_rate": 2.957393720228747e-06, "loss": 0.4035, "step": 6880 }, { "epoch": 0.3672610005063831, "grad_norm": 1.504232794105487, "learning_rate": 2.9571848930242975e-06, "loss": 0.4161, "step": 6890 }, { "epoch": 0.3677940353402095, "grad_norm": 1.650561754017296, "learning_rate": 2.9569755627144175e-06, "loss": 0.4337, "step": 6900 }, { "epoch": 0.3683270701740359, "grad_norm": 1.7591672315611653, "learning_rate": 2.95676572937138e-06, "loss": 0.4285, "step": 6910 }, { "epoch": 0.36886010500786226, "grad_norm": 1.60301610622303, "learning_rate": 2.9565553930676323e-06, "loss": 0.421, "step": 6920 }, { "epoch": 0.36939313984168864, "grad_norm": 1.5168789019932878, "learning_rate": 2.956344553875795e-06, "loss": 0.4228, "step": 6930 }, { "epoch": 0.36992617467551503, "grad_norm": 1.5940718104914382, "learning_rate": 2.956133211868661e-06, "loss": 0.4144, "step": 6940 }, { "epoch": 0.3704592095093414, "grad_norm": 1.6099433387984643, "learning_rate": 2.9559213671191983e-06, "loss": 0.4129, "step": 6950 }, { "epoch": 0.3709922443431678, "grad_norm": 1.5992118581782357, "learning_rate": 2.955709019700549e-06, "loss": 0.4266, "step": 6960 }, { "epoch": 0.37152527917699424, "grad_norm": 2.0938413496975716, "learning_rate": 2.9554961696860263e-06, "loss": 0.426, "step": 6970 }, { "epoch": 0.3720583140108206, "grad_norm": 1.579150885321858, "learning_rate": 2.95528281714912e-06, "loss": 0.4125, "step": 6980 }, { "epoch": 0.372591348844647, "grad_norm": 1.3469684322843098, "learning_rate": 2.9550689621634906e-06, "loss": 0.4124, "step": 6990 }, { "epoch": 0.3731243836784734, "grad_norm": 1.4689342197584156, "learning_rate": 2.9548546048029745e-06, "loss": 0.3875, "step": 7000 }, { "epoch": 0.3736574185122998, "grad_norm": 1.3959784585326207, "learning_rate": 2.9546397451415796e-06, "loss": 0.4246, "step": 7010 }, { "epoch": 0.37419045334612616, "grad_norm": 1.6699340776550209, "learning_rate": 2.9544243832534882e-06, "loss": 0.4168, "step": 7020 }, { "epoch": 0.37472348817995255, "grad_norm": 1.7791707413975577, "learning_rate": 2.954208519213056e-06, "loss": 0.4167, "step": 7030 }, { "epoch": 0.37525652301377893, "grad_norm": 1.5243558486319142, "learning_rate": 2.953992153094812e-06, "loss": 0.4231, "step": 7040 }, { "epoch": 0.3757895578476053, "grad_norm": 1.652255628611805, "learning_rate": 2.953775284973458e-06, "loss": 0.4427, "step": 7050 }, { "epoch": 0.37632259268143176, "grad_norm": 1.6337680171819196, "learning_rate": 2.9535579149238694e-06, "loss": 0.4164, "step": 7060 }, { "epoch": 0.37685562751525814, "grad_norm": 1.4421046818351917, "learning_rate": 2.9533400430210956e-06, "loss": 0.4221, "step": 7070 }, { "epoch": 0.3773886623490845, "grad_norm": 1.5573453388988545, "learning_rate": 2.9531216693403585e-06, "loss": 0.4279, "step": 7080 }, { "epoch": 0.3779216971829109, "grad_norm": 1.5800406967377374, "learning_rate": 2.9529027939570536e-06, "loss": 0.4197, "step": 7090 }, { "epoch": 0.3784547320167373, "grad_norm": 1.5852799254910612, "learning_rate": 2.9526834169467496e-06, "loss": 0.4353, "step": 7100 }, { "epoch": 0.3789877668505637, "grad_norm": 1.4653958075983655, "learning_rate": 2.952463538385188e-06, "loss": 0.4287, "step": 7110 }, { "epoch": 0.37952080168439006, "grad_norm": 1.5555537517929727, "learning_rate": 2.9522431583482834e-06, "loss": 0.437, "step": 7120 }, { "epoch": 0.38005383651821645, "grad_norm": 1.5173481914084632, "learning_rate": 2.9520222769121242e-06, "loss": 0.4103, "step": 7130 }, { "epoch": 0.38058687135204283, "grad_norm": 1.5005103614495325, "learning_rate": 2.9518008941529715e-06, "loss": 0.4204, "step": 7140 }, { "epoch": 0.3811199061858693, "grad_norm": 1.5431517066178606, "learning_rate": 2.95157901014726e-06, "loss": 0.4161, "step": 7150 }, { "epoch": 0.38165294101969566, "grad_norm": 1.5104382693845257, "learning_rate": 2.951356624971597e-06, "loss": 0.4111, "step": 7160 }, { "epoch": 0.38218597585352204, "grad_norm": 1.854950512809969, "learning_rate": 2.9511337387027623e-06, "loss": 0.4396, "step": 7170 }, { "epoch": 0.3827190106873484, "grad_norm": 1.5589332316311124, "learning_rate": 2.95091035141771e-06, "loss": 0.4146, "step": 7180 }, { "epoch": 0.3832520455211748, "grad_norm": 1.5159826140520984, "learning_rate": 2.9506864631935658e-06, "loss": 0.4174, "step": 7190 }, { "epoch": 0.3837850803550012, "grad_norm": 1.5324743603681843, "learning_rate": 2.950462074107629e-06, "loss": 0.4019, "step": 7200 }, { "epoch": 0.3843181151888276, "grad_norm": 1.6573051677286261, "learning_rate": 2.950237184237372e-06, "loss": 0.4148, "step": 7210 }, { "epoch": 0.38485115002265396, "grad_norm": 1.7077462727011432, "learning_rate": 2.9500117936604405e-06, "loss": 0.4136, "step": 7220 }, { "epoch": 0.38538418485648035, "grad_norm": 1.7161341232616143, "learning_rate": 2.9497859024546517e-06, "loss": 0.4194, "step": 7230 }, { "epoch": 0.38591721969030673, "grad_norm": 1.642902871832163, "learning_rate": 2.9495595106979966e-06, "loss": 0.4224, "step": 7240 }, { "epoch": 0.3864502545241332, "grad_norm": 2.0018461107067735, "learning_rate": 2.9493326184686394e-06, "loss": 0.4141, "step": 7250 }, { "epoch": 0.38698328935795956, "grad_norm": 1.5791876695803535, "learning_rate": 2.949105225844916e-06, "loss": 0.4292, "step": 7260 }, { "epoch": 0.38751632419178594, "grad_norm": 1.578545563864404, "learning_rate": 2.9488773329053357e-06, "loss": 0.4032, "step": 7270 }, { "epoch": 0.38804935902561233, "grad_norm": 1.6503153659541419, "learning_rate": 2.9486489397285807e-06, "loss": 0.4246, "step": 7280 }, { "epoch": 0.3885823938594387, "grad_norm": 1.6620855149679803, "learning_rate": 2.9484200463935058e-06, "loss": 0.4058, "step": 7290 }, { "epoch": 0.3891154286932651, "grad_norm": 1.4629456758669772, "learning_rate": 2.948190652979138e-06, "loss": 0.4098, "step": 7300 }, { "epoch": 0.3896484635270915, "grad_norm": 1.6222819828694544, "learning_rate": 2.9479607595646775e-06, "loss": 0.4194, "step": 7310 }, { "epoch": 0.39018149836091787, "grad_norm": 1.62215954812823, "learning_rate": 2.947730366229497e-06, "loss": 0.408, "step": 7320 }, { "epoch": 0.39071453319474425, "grad_norm": 1.5370123610447388, "learning_rate": 2.9474994730531414e-06, "loss": 0.4178, "step": 7330 }, { "epoch": 0.3912475680285707, "grad_norm": 1.5638120823993014, "learning_rate": 2.947268080115329e-06, "loss": 0.4094, "step": 7340 }, { "epoch": 0.3917806028623971, "grad_norm": 1.5482151621238354, "learning_rate": 2.9470361874959496e-06, "loss": 0.4308, "step": 7350 }, { "epoch": 0.39231363769622346, "grad_norm": 1.6338482214479055, "learning_rate": 2.946803795275067e-06, "loss": 0.4167, "step": 7360 }, { "epoch": 0.39284667253004985, "grad_norm": 1.7199532895827334, "learning_rate": 2.9465709035329157e-06, "loss": 0.4089, "step": 7370 }, { "epoch": 0.39337970736387623, "grad_norm": 1.4099147454250378, "learning_rate": 2.9463375123499036e-06, "loss": 0.416, "step": 7380 }, { "epoch": 0.3939127421977026, "grad_norm": 1.398138412374819, "learning_rate": 2.946103621806612e-06, "loss": 0.4078, "step": 7390 }, { "epoch": 0.394445777031529, "grad_norm": 1.5471801294339103, "learning_rate": 2.9458692319837927e-06, "loss": 0.4185, "step": 7400 }, { "epoch": 0.3949788118653554, "grad_norm": 1.4259788957885504, "learning_rate": 2.9456343429623705e-06, "loss": 0.4272, "step": 7410 }, { "epoch": 0.39551184669918177, "grad_norm": 1.5519588853612656, "learning_rate": 2.9453989548234435e-06, "loss": 0.4133, "step": 7420 }, { "epoch": 0.3960448815330082, "grad_norm": 1.5652354183527466, "learning_rate": 2.945163067648281e-06, "loss": 0.4345, "step": 7430 }, { "epoch": 0.3965779163668346, "grad_norm": 1.6600246160799446, "learning_rate": 2.9449266815183254e-06, "loss": 0.426, "step": 7440 }, { "epoch": 0.397110951200661, "grad_norm": 1.6229649686580363, "learning_rate": 2.9446897965151906e-06, "loss": 0.4022, "step": 7450 }, { "epoch": 0.39764398603448736, "grad_norm": 1.6743591863411296, "learning_rate": 2.9444524127206638e-06, "loss": 0.4252, "step": 7460 }, { "epoch": 0.39817702086831375, "grad_norm": 1.4605448278499182, "learning_rate": 2.9442145302167033e-06, "loss": 0.4316, "step": 7470 }, { "epoch": 0.39871005570214013, "grad_norm": 1.5880113421344213, "learning_rate": 2.94397614908544e-06, "loss": 0.4264, "step": 7480 }, { "epoch": 0.3992430905359665, "grad_norm": 1.6206534767222953, "learning_rate": 2.943737269409177e-06, "loss": 0.398, "step": 7490 }, { "epoch": 0.3997761253697929, "grad_norm": 1.5889574564873155, "learning_rate": 2.9434978912703902e-06, "loss": 0.4281, "step": 7500 }, { "epoch": 0.4003091602036193, "grad_norm": 1.8813772021245978, "learning_rate": 2.943258014751726e-06, "loss": 0.4253, "step": 7510 }, { "epoch": 0.40084219503744567, "grad_norm": 1.6030100273647854, "learning_rate": 2.943017639936004e-06, "loss": 0.4198, "step": 7520 }, { "epoch": 0.4013752298712721, "grad_norm": 1.5762066109514417, "learning_rate": 2.942776766906216e-06, "loss": 0.4103, "step": 7530 }, { "epoch": 0.4019082647050985, "grad_norm": 1.5983110610277405, "learning_rate": 2.9425353957455257e-06, "loss": 0.4218, "step": 7540 }, { "epoch": 0.4024412995389249, "grad_norm": 1.723442508143728, "learning_rate": 2.9422935265372677e-06, "loss": 0.4211, "step": 7550 }, { "epoch": 0.40297433437275126, "grad_norm": 1.844308496148667, "learning_rate": 2.9420511593649502e-06, "loss": 0.4297, "step": 7560 }, { "epoch": 0.40350736920657765, "grad_norm": 1.648711969216378, "learning_rate": 2.941808294312252e-06, "loss": 0.4226, "step": 7570 }, { "epoch": 0.40404040404040403, "grad_norm": 1.679764401534548, "learning_rate": 2.9415649314630245e-06, "loss": 0.4052, "step": 7580 }, { "epoch": 0.4045734388742304, "grad_norm": 1.7239791386058514, "learning_rate": 2.9413210709012906e-06, "loss": 0.4175, "step": 7590 }, { "epoch": 0.4051064737080568, "grad_norm": 1.5555799360749905, "learning_rate": 2.9410767127112454e-06, "loss": 0.413, "step": 7600 }, { "epoch": 0.4056395085418832, "grad_norm": 1.546525208250271, "learning_rate": 2.9408318569772556e-06, "loss": 0.4283, "step": 7610 }, { "epoch": 0.4061725433757096, "grad_norm": 1.5047584974914554, "learning_rate": 2.94058650378386e-06, "loss": 0.4174, "step": 7620 }, { "epoch": 0.406705578209536, "grad_norm": 1.80382041917988, "learning_rate": 2.9403406532157685e-06, "loss": 0.4041, "step": 7630 }, { "epoch": 0.4072386130433624, "grad_norm": 1.5916222482663263, "learning_rate": 2.940094305357863e-06, "loss": 0.4084, "step": 7640 }, { "epoch": 0.4077716478771888, "grad_norm": 1.699528886818383, "learning_rate": 2.9398474602951973e-06, "loss": 0.4205, "step": 7650 }, { "epoch": 0.40830468271101517, "grad_norm": 1.5201243647034341, "learning_rate": 2.939600118112997e-06, "loss": 0.412, "step": 7660 }, { "epoch": 0.40883771754484155, "grad_norm": 1.647707954049428, "learning_rate": 2.939352278896659e-06, "loss": 0.4258, "step": 7670 }, { "epoch": 0.40937075237866793, "grad_norm": 1.5593164657936662, "learning_rate": 2.9391039427317514e-06, "loss": 0.4119, "step": 7680 }, { "epoch": 0.4099037872124943, "grad_norm": 1.6064711590170297, "learning_rate": 2.938855109704015e-06, "loss": 0.4077, "step": 7690 }, { "epoch": 0.4104368220463207, "grad_norm": 1.5577253551905574, "learning_rate": 2.938605779899362e-06, "loss": 0.4137, "step": 7700 }, { "epoch": 0.41096985688014714, "grad_norm": 1.803277144438798, "learning_rate": 2.9383559534038737e-06, "loss": 0.4312, "step": 7710 }, { "epoch": 0.41150289171397353, "grad_norm": 1.4249147439088885, "learning_rate": 2.9381056303038068e-06, "loss": 0.4081, "step": 7720 }, { "epoch": 0.4120359265477999, "grad_norm": 1.485248645330218, "learning_rate": 2.9378548106855864e-06, "loss": 0.419, "step": 7730 }, { "epoch": 0.4125689613816263, "grad_norm": 1.6704245434888088, "learning_rate": 2.9376034946358103e-06, "loss": 0.4234, "step": 7740 }, { "epoch": 0.4131019962154527, "grad_norm": 1.4781501841453533, "learning_rate": 2.9373516822412476e-06, "loss": 0.4194, "step": 7750 }, { "epoch": 0.41363503104927907, "grad_norm": 1.5847656324884918, "learning_rate": 2.9370993735888386e-06, "loss": 0.4036, "step": 7760 }, { "epoch": 0.41416806588310545, "grad_norm": 1.4994033270444906, "learning_rate": 2.9368465687656953e-06, "loss": 0.4126, "step": 7770 }, { "epoch": 0.41470110071693184, "grad_norm": 1.6230128273969524, "learning_rate": 2.9365932678591004e-06, "loss": 0.4097, "step": 7780 }, { "epoch": 0.4152341355507582, "grad_norm": 1.4459593408420588, "learning_rate": 2.936339470956508e-06, "loss": 0.4182, "step": 7790 }, { "epoch": 0.4157671703845846, "grad_norm": 1.6286273125696396, "learning_rate": 2.936085178145544e-06, "loss": 0.4233, "step": 7800 }, { "epoch": 0.41630020521841105, "grad_norm": 1.4223471036220727, "learning_rate": 2.935830389514005e-06, "loss": 0.4178, "step": 7810 }, { "epoch": 0.41683324005223743, "grad_norm": 1.5905822093442257, "learning_rate": 2.9355751051498593e-06, "loss": 0.3976, "step": 7820 }, { "epoch": 0.4173662748860638, "grad_norm": 1.5301889702526843, "learning_rate": 2.935319325141245e-06, "loss": 0.4043, "step": 7830 }, { "epoch": 0.4178993097198902, "grad_norm": 1.5904661838595013, "learning_rate": 2.9350630495764734e-06, "loss": 0.4048, "step": 7840 }, { "epoch": 0.4184323445537166, "grad_norm": 1.4784304422367702, "learning_rate": 2.934806278544025e-06, "loss": 0.4138, "step": 7850 }, { "epoch": 0.41896537938754297, "grad_norm": 1.4167844735407589, "learning_rate": 2.934549012132553e-06, "loss": 0.4232, "step": 7860 }, { "epoch": 0.41949841422136935, "grad_norm": 1.6454819116212354, "learning_rate": 2.93429125043088e-06, "loss": 0.4163, "step": 7870 }, { "epoch": 0.42003144905519574, "grad_norm": 1.593421438965332, "learning_rate": 2.934032993528001e-06, "loss": 0.4199, "step": 7880 }, { "epoch": 0.4205644838890221, "grad_norm": 1.514826322961756, "learning_rate": 2.9337742415130807e-06, "loss": 0.4216, "step": 7890 }, { "epoch": 0.42109751872284856, "grad_norm": 1.546654329029193, "learning_rate": 2.933514994475456e-06, "loss": 0.4156, "step": 7900 }, { "epoch": 0.42163055355667495, "grad_norm": 1.682864628752226, "learning_rate": 2.9332552525046345e-06, "loss": 0.4154, "step": 7910 }, { "epoch": 0.42216358839050133, "grad_norm": 1.7701263773277038, "learning_rate": 2.9329950156902932e-06, "loss": 0.4105, "step": 7920 }, { "epoch": 0.4226966232243277, "grad_norm": 1.6851463631217232, "learning_rate": 2.932734284122282e-06, "loss": 0.4095, "step": 7930 }, { "epoch": 0.4232296580581541, "grad_norm": 1.5079217751467697, "learning_rate": 2.9324730578906203e-06, "loss": 0.4128, "step": 7940 }, { "epoch": 0.4237626928919805, "grad_norm": 1.7300414513140812, "learning_rate": 2.9322113370854987e-06, "loss": 0.4221, "step": 7950 }, { "epoch": 0.42429572772580687, "grad_norm": 1.53899885108711, "learning_rate": 2.9319491217972787e-06, "loss": 0.4109, "step": 7960 }, { "epoch": 0.42482876255963326, "grad_norm": 1.5558466269927365, "learning_rate": 2.931686412116492e-06, "loss": 0.4267, "step": 7970 }, { "epoch": 0.42536179739345964, "grad_norm": 1.5017860916884838, "learning_rate": 2.9314232081338417e-06, "loss": 0.4007, "step": 7980 }, { "epoch": 0.4258948322272861, "grad_norm": 1.6343789482636109, "learning_rate": 2.931159509940201e-06, "loss": 0.409, "step": 7990 }, { "epoch": 0.42642786706111246, "grad_norm": 1.6222545648960967, "learning_rate": 2.930895317626614e-06, "loss": 0.4188, "step": 8000 }, { "epoch": 0.42696090189493885, "grad_norm": 1.6155586118015453, "learning_rate": 2.9306306312842954e-06, "loss": 0.4109, "step": 8010 }, { "epoch": 0.42749393672876523, "grad_norm": 1.5765458288307308, "learning_rate": 2.9303654510046304e-06, "loss": 0.4053, "step": 8020 }, { "epoch": 0.4280269715625916, "grad_norm": 1.6413995649684712, "learning_rate": 2.9300997768791746e-06, "loss": 0.4127, "step": 8030 }, { "epoch": 0.428560006396418, "grad_norm": 1.5874700448655936, "learning_rate": 2.9298336089996537e-06, "loss": 0.41, "step": 8040 }, { "epoch": 0.4290930412302444, "grad_norm": 1.6844801959264637, "learning_rate": 2.9295669474579657e-06, "loss": 0.409, "step": 8050 }, { "epoch": 0.42962607606407077, "grad_norm": 1.460845751006175, "learning_rate": 2.929299792346177e-06, "loss": 0.4012, "step": 8060 }, { "epoch": 0.43015911089789716, "grad_norm": 1.6786113157387517, "learning_rate": 2.9290321437565254e-06, "loss": 0.4078, "step": 8070 }, { "epoch": 0.43069214573172354, "grad_norm": 1.5880080840732538, "learning_rate": 2.9287640017814184e-06, "loss": 0.3992, "step": 8080 }, { "epoch": 0.43122518056555, "grad_norm": 1.691837892650592, "learning_rate": 2.928495366513435e-06, "loss": 0.4232, "step": 8090 }, { "epoch": 0.43175821539937637, "grad_norm": 1.5769955779968576, "learning_rate": 2.928226238045323e-06, "loss": 0.403, "step": 8100 }, { "epoch": 0.43229125023320275, "grad_norm": 1.5119353223803156, "learning_rate": 2.9279566164700016e-06, "loss": 0.3914, "step": 8110 }, { "epoch": 0.43282428506702914, "grad_norm": 1.6975993903696593, "learning_rate": 2.9276865018805598e-06, "loss": 0.4236, "step": 8120 }, { "epoch": 0.4333573199008555, "grad_norm": 1.5882894145215583, "learning_rate": 2.9274158943702576e-06, "loss": 0.4296, "step": 8130 }, { "epoch": 0.4338903547346819, "grad_norm": 1.5448781175456678, "learning_rate": 2.927144794032524e-06, "loss": 0.4251, "step": 8140 }, { "epoch": 0.4344233895685083, "grad_norm": 1.729775236818066, "learning_rate": 2.926873200960959e-06, "loss": 0.4297, "step": 8150 }, { "epoch": 0.4349564244023347, "grad_norm": 1.5906976534767527, "learning_rate": 2.9266011152493318e-06, "loss": 0.4136, "step": 8160 }, { "epoch": 0.43548945923616106, "grad_norm": 1.7401715177125827, "learning_rate": 2.9263285369915825e-06, "loss": 0.4167, "step": 8170 }, { "epoch": 0.4360224940699875, "grad_norm": 1.6079276841774246, "learning_rate": 2.9260554662818217e-06, "loss": 0.4105, "step": 8180 }, { "epoch": 0.4365555289038139, "grad_norm": 1.6881425967310364, "learning_rate": 2.925781903214329e-06, "loss": 0.411, "step": 8190 }, { "epoch": 0.43708856373764027, "grad_norm": 1.8368120422345755, "learning_rate": 2.9255078478835536e-06, "loss": 0.4162, "step": 8200 }, { "epoch": 0.43762159857146665, "grad_norm": 1.4735301058229453, "learning_rate": 2.9252333003841164e-06, "loss": 0.4039, "step": 8210 }, { "epoch": 0.43815463340529304, "grad_norm": 1.4785167027135069, "learning_rate": 2.9249582608108076e-06, "loss": 0.4252, "step": 8220 }, { "epoch": 0.4386876682391194, "grad_norm": 1.4928343612572648, "learning_rate": 2.9246827292585852e-06, "loss": 0.4213, "step": 8230 }, { "epoch": 0.4392207030729458, "grad_norm": 1.4389330256335633, "learning_rate": 2.9244067058225808e-06, "loss": 0.4165, "step": 8240 }, { "epoch": 0.4397537379067722, "grad_norm": 1.6293801431128347, "learning_rate": 2.9241301905980926e-06, "loss": 0.4188, "step": 8250 }, { "epoch": 0.4402867727405986, "grad_norm": 1.6015578269306125, "learning_rate": 2.92385318368059e-06, "loss": 0.4157, "step": 8260 }, { "epoch": 0.440819807574425, "grad_norm": 1.435916236499711, "learning_rate": 2.9235756851657123e-06, "loss": 0.4132, "step": 8270 }, { "epoch": 0.4413528424082514, "grad_norm": 1.5676414055445547, "learning_rate": 2.9232976951492678e-06, "loss": 0.4117, "step": 8280 }, { "epoch": 0.4418858772420778, "grad_norm": 1.479414067746698, "learning_rate": 2.9230192137272357e-06, "loss": 0.408, "step": 8290 }, { "epoch": 0.44241891207590417, "grad_norm": 1.652095027984649, "learning_rate": 2.922740240995763e-06, "loss": 0.3991, "step": 8300 }, { "epoch": 0.44295194690973055, "grad_norm": 1.497777155913364, "learning_rate": 2.9224607770511683e-06, "loss": 0.4195, "step": 8310 }, { "epoch": 0.44348498174355694, "grad_norm": 1.7255631646054368, "learning_rate": 2.9221808219899384e-06, "loss": 0.4253, "step": 8320 }, { "epoch": 0.4440180165773833, "grad_norm": 1.5921587932125698, "learning_rate": 2.92190037590873e-06, "loss": 0.4072, "step": 8330 }, { "epoch": 0.4445510514112097, "grad_norm": 1.4915530417684664, "learning_rate": 2.9216194389043704e-06, "loss": 0.4116, "step": 8340 }, { "epoch": 0.4450840862450361, "grad_norm": 1.6958537172297319, "learning_rate": 2.921338011073854e-06, "loss": 0.4107, "step": 8350 }, { "epoch": 0.4456171210788625, "grad_norm": 1.5904646503087096, "learning_rate": 2.9210560925143475e-06, "loss": 0.4006, "step": 8360 }, { "epoch": 0.4461501559126889, "grad_norm": 1.573718123214918, "learning_rate": 2.920773683323185e-06, "loss": 0.4208, "step": 8370 }, { "epoch": 0.4466831907465153, "grad_norm": 1.5765420572072621, "learning_rate": 2.920490783597871e-06, "loss": 0.408, "step": 8380 }, { "epoch": 0.4472162255803417, "grad_norm": 1.388783808975152, "learning_rate": 2.920207393436079e-06, "loss": 0.4045, "step": 8390 }, { "epoch": 0.44774926041416807, "grad_norm": 1.5560064769009971, "learning_rate": 2.9199235129356516e-06, "loss": 0.4191, "step": 8400 }, { "epoch": 0.44828229524799446, "grad_norm": 1.572600763217562, "learning_rate": 2.9196391421946006e-06, "loss": 0.4056, "step": 8410 }, { "epoch": 0.44881533008182084, "grad_norm": 1.534456816067655, "learning_rate": 2.9193542813111084e-06, "loss": 0.403, "step": 8420 }, { "epoch": 0.4493483649156472, "grad_norm": 1.574436258731142, "learning_rate": 2.9190689303835245e-06, "loss": 0.4231, "step": 8430 }, { "epoch": 0.4498813997494736, "grad_norm": 1.687153536583124, "learning_rate": 2.91878308951037e-06, "loss": 0.4223, "step": 8440 }, { "epoch": 0.4504144345833, "grad_norm": 1.5588068876245815, "learning_rate": 2.918496758790333e-06, "loss": 0.4132, "step": 8450 }, { "epoch": 0.45094746941712643, "grad_norm": 1.733942782998014, "learning_rate": 2.9182099383222716e-06, "loss": 0.4073, "step": 8460 }, { "epoch": 0.4514805042509528, "grad_norm": 1.6026277202619796, "learning_rate": 2.917922628205213e-06, "loss": 0.4183, "step": 8470 }, { "epoch": 0.4520135390847792, "grad_norm": 1.471843191590328, "learning_rate": 2.9176348285383546e-06, "loss": 0.4171, "step": 8480 }, { "epoch": 0.4525465739186056, "grad_norm": 1.5120739698155388, "learning_rate": 2.91734653942106e-06, "loss": 0.4075, "step": 8490 }, { "epoch": 0.453079608752432, "grad_norm": 1.638814100821955, "learning_rate": 2.9170577609528643e-06, "loss": 0.3936, "step": 8500 }, { "epoch": 0.45361264358625836, "grad_norm": 1.578675272530091, "learning_rate": 2.9167684932334704e-06, "loss": 0.4023, "step": 8510 }, { "epoch": 0.45414567842008474, "grad_norm": 1.7097708663287463, "learning_rate": 2.916478736362751e-06, "loss": 0.4281, "step": 8520 }, { "epoch": 0.4546787132539111, "grad_norm": 1.4068358574230455, "learning_rate": 2.9161884904407465e-06, "loss": 0.4081, "step": 8530 }, { "epoch": 0.4552117480877375, "grad_norm": 1.6917007396890231, "learning_rate": 2.9158977555676674e-06, "loss": 0.4134, "step": 8540 }, { "epoch": 0.45574478292156395, "grad_norm": 1.4742286194606795, "learning_rate": 2.915606531843892e-06, "loss": 0.4128, "step": 8550 }, { "epoch": 0.45627781775539034, "grad_norm": 1.4630514633399585, "learning_rate": 2.9153148193699674e-06, "loss": 0.4103, "step": 8560 }, { "epoch": 0.4568108525892167, "grad_norm": 1.7006752475123899, "learning_rate": 2.91502261824661e-06, "loss": 0.4152, "step": 8570 }, { "epoch": 0.4573438874230431, "grad_norm": 1.7474348651195561, "learning_rate": 2.9147299285747055e-06, "loss": 0.4223, "step": 8580 }, { "epoch": 0.4578769222568695, "grad_norm": 1.6107835714965402, "learning_rate": 2.914436750455306e-06, "loss": 0.4119, "step": 8590 }, { "epoch": 0.4584099570906959, "grad_norm": 1.9125128017514217, "learning_rate": 2.914143083989635e-06, "loss": 0.4097, "step": 8600 }, { "epoch": 0.45894299192452226, "grad_norm": 1.4319198063050853, "learning_rate": 2.9138489292790833e-06, "loss": 0.4178, "step": 8610 }, { "epoch": 0.45947602675834864, "grad_norm": 1.7328201137135673, "learning_rate": 2.913554286425209e-06, "loss": 0.4287, "step": 8620 }, { "epoch": 0.46000906159217503, "grad_norm": 1.447257843385008, "learning_rate": 2.9132591555297423e-06, "loss": 0.4059, "step": 8630 }, { "epoch": 0.4605420964260014, "grad_norm": 1.505222329441332, "learning_rate": 2.9129635366945774e-06, "loss": 0.3954, "step": 8640 }, { "epoch": 0.46107513125982785, "grad_norm": 1.5656522742020575, "learning_rate": 2.91266743002178e-06, "loss": 0.3953, "step": 8650 }, { "epoch": 0.46160816609365424, "grad_norm": 1.416237604591824, "learning_rate": 2.9123708356135844e-06, "loss": 0.4066, "step": 8660 }, { "epoch": 0.4621412009274806, "grad_norm": 1.5846909544632926, "learning_rate": 2.912073753572391e-06, "loss": 0.4104, "step": 8670 }, { "epoch": 0.462674235761307, "grad_norm": 1.6736703787065854, "learning_rate": 2.91177618400077e-06, "loss": 0.4065, "step": 8680 }, { "epoch": 0.4632072705951334, "grad_norm": 1.5122104576155988, "learning_rate": 2.9114781270014607e-06, "loss": 0.4063, "step": 8690 }, { "epoch": 0.4637403054289598, "grad_norm": 1.4959558072520527, "learning_rate": 2.9111795826773693e-06, "loss": 0.4042, "step": 8700 }, { "epoch": 0.46427334026278616, "grad_norm": 1.5126427115522083, "learning_rate": 2.9108805511315703e-06, "loss": 0.4156, "step": 8710 }, { "epoch": 0.46480637509661255, "grad_norm": 1.6018787294791146, "learning_rate": 2.9105810324673076e-06, "loss": 0.4134, "step": 8720 }, { "epoch": 0.46533940993043893, "grad_norm": 1.759863015005145, "learning_rate": 2.9102810267879925e-06, "loss": 0.4138, "step": 8730 }, { "epoch": 0.46587244476426537, "grad_norm": 1.805514369757183, "learning_rate": 2.9099805341972045e-06, "loss": 0.4078, "step": 8740 }, { "epoch": 0.46640547959809175, "grad_norm": 1.5148198702600317, "learning_rate": 2.9096795547986903e-06, "loss": 0.4069, "step": 8750 }, { "epoch": 0.46693851443191814, "grad_norm": 1.8852231296025546, "learning_rate": 2.9093780886963667e-06, "loss": 0.4083, "step": 8760 }, { "epoch": 0.4674715492657445, "grad_norm": 1.4539023783041651, "learning_rate": 2.909076135994317e-06, "loss": 0.4233, "step": 8770 }, { "epoch": 0.4680045840995709, "grad_norm": 1.5313647620887976, "learning_rate": 2.9087736967967928e-06, "loss": 0.4082, "step": 8780 }, { "epoch": 0.4685376189333973, "grad_norm": 1.5542347545163724, "learning_rate": 2.908470771208214e-06, "loss": 0.4213, "step": 8790 }, { "epoch": 0.4690706537672237, "grad_norm": 1.5199089239696877, "learning_rate": 2.908167359333168e-06, "loss": 0.4162, "step": 8800 }, { "epoch": 0.46960368860105006, "grad_norm": 1.759628928213846, "learning_rate": 2.9078634612764106e-06, "loss": 0.408, "step": 8810 }, { "epoch": 0.47013672343487645, "grad_norm": 1.5557512397654005, "learning_rate": 2.9075590771428657e-06, "loss": 0.397, "step": 8820 }, { "epoch": 0.4706697582687029, "grad_norm": 1.5479658711487387, "learning_rate": 2.907254207037623e-06, "loss": 0.3984, "step": 8830 }, { "epoch": 0.47120279310252927, "grad_norm": 1.5140068535227431, "learning_rate": 2.9069488510659427e-06, "loss": 0.4017, "step": 8840 }, { "epoch": 0.47173582793635566, "grad_norm": 1.5897960704006835, "learning_rate": 2.906643009333251e-06, "loss": 0.4143, "step": 8850 }, { "epoch": 0.47226886277018204, "grad_norm": 1.7285592168271886, "learning_rate": 2.906336681945143e-06, "loss": 0.4142, "step": 8860 }, { "epoch": 0.4728018976040084, "grad_norm": 1.653535089139761, "learning_rate": 2.9060298690073802e-06, "loss": 0.3957, "step": 8870 }, { "epoch": 0.4733349324378348, "grad_norm": 1.472867310685367, "learning_rate": 2.905722570625893e-06, "loss": 0.4155, "step": 8880 }, { "epoch": 0.4738679672716612, "grad_norm": 1.7057156878110258, "learning_rate": 2.9054147869067774e-06, "loss": 0.4094, "step": 8890 }, { "epoch": 0.4744010021054876, "grad_norm": 1.6670466156944517, "learning_rate": 2.905106517956299e-06, "loss": 0.396, "step": 8900 }, { "epoch": 0.47493403693931396, "grad_norm": 1.5978373853824697, "learning_rate": 2.9047977638808916e-06, "loss": 0.4043, "step": 8910 }, { "epoch": 0.47546707177314035, "grad_norm": 1.5194113205110502, "learning_rate": 2.9044885247871534e-06, "loss": 0.4108, "step": 8920 }, { "epoch": 0.4760001066069668, "grad_norm": 1.543181279894468, "learning_rate": 2.904178800781853e-06, "loss": 0.4118, "step": 8930 }, { "epoch": 0.4765331414407932, "grad_norm": 1.5496527131823437, "learning_rate": 2.9038685919719245e-06, "loss": 0.4082, "step": 8940 }, { "epoch": 0.47706617627461956, "grad_norm": 1.5488129943677427, "learning_rate": 2.9035578984644697e-06, "loss": 0.3993, "step": 8950 }, { "epoch": 0.47759921110844594, "grad_norm": 1.4794880202048881, "learning_rate": 2.9032467203667594e-06, "loss": 0.4122, "step": 8960 }, { "epoch": 0.4781322459422723, "grad_norm": 1.5167802714318308, "learning_rate": 2.90293505778623e-06, "loss": 0.4067, "step": 8970 }, { "epoch": 0.4786652807760987, "grad_norm": 1.546092391364774, "learning_rate": 2.902622910830485e-06, "loss": 0.4071, "step": 8980 }, { "epoch": 0.4791983156099251, "grad_norm": 1.6234210410818577, "learning_rate": 2.902310279607297e-06, "loss": 0.407, "step": 8990 }, { "epoch": 0.4797313504437515, "grad_norm": 1.4703342878927876, "learning_rate": 2.9019971642246037e-06, "loss": 0.4117, "step": 9000 }, { "epoch": 0.48026438527757787, "grad_norm": 1.6655778748112098, "learning_rate": 2.901683564790511e-06, "loss": 0.4131, "step": 9010 }, { "epoch": 0.4807974201114043, "grad_norm": 1.4377216238803339, "learning_rate": 2.901369481413291e-06, "loss": 0.4143, "step": 9020 }, { "epoch": 0.4813304549452307, "grad_norm": 1.5870004932408421, "learning_rate": 2.9010549142013857e-06, "loss": 0.4098, "step": 9030 }, { "epoch": 0.4818634897790571, "grad_norm": 1.7810825428351413, "learning_rate": 2.9007398632634e-06, "loss": 0.4071, "step": 9040 }, { "epoch": 0.48239652461288346, "grad_norm": 1.7734200644758729, "learning_rate": 2.9004243287081093e-06, "loss": 0.4123, "step": 9050 }, { "epoch": 0.48292955944670984, "grad_norm": 1.4639650208073236, "learning_rate": 2.9001083106444534e-06, "loss": 0.3949, "step": 9060 }, { "epoch": 0.48346259428053623, "grad_norm": 1.5984334547779417, "learning_rate": 2.8997918091815408e-06, "loss": 0.3931, "step": 9070 }, { "epoch": 0.4839956291143626, "grad_norm": 1.5279463839184906, "learning_rate": 2.899474824428647e-06, "loss": 0.4003, "step": 9080 }, { "epoch": 0.484528663948189, "grad_norm": 1.3729773971165875, "learning_rate": 2.8991573564952123e-06, "loss": 0.4098, "step": 9090 }, { "epoch": 0.4850616987820154, "grad_norm": 1.5837278152853682, "learning_rate": 2.8988394054908465e-06, "loss": 0.4024, "step": 9100 }, { "epoch": 0.4855947336158418, "grad_norm": 1.621571263150768, "learning_rate": 2.898520971525324e-06, "loss": 0.4111, "step": 9110 }, { "epoch": 0.4861277684496682, "grad_norm": 1.4334226400605534, "learning_rate": 2.898202054708587e-06, "loss": 0.4004, "step": 9120 }, { "epoch": 0.4866608032834946, "grad_norm": 1.7329660280862744, "learning_rate": 2.8978826551507453e-06, "loss": 0.412, "step": 9130 }, { "epoch": 0.487193838117321, "grad_norm": 1.5738326821537885, "learning_rate": 2.8975627729620724e-06, "loss": 0.4082, "step": 9140 }, { "epoch": 0.48772687295114736, "grad_norm": 1.5010788169288483, "learning_rate": 2.8972424082530114e-06, "loss": 0.3957, "step": 9150 }, { "epoch": 0.48825990778497375, "grad_norm": 1.4915431525703808, "learning_rate": 2.8969215611341708e-06, "loss": 0.3961, "step": 9160 }, { "epoch": 0.48879294261880013, "grad_norm": 1.6784132066746704, "learning_rate": 2.8966002317163256e-06, "loss": 0.4007, "step": 9170 }, { "epoch": 0.4893259774526265, "grad_norm": 1.6421114541747448, "learning_rate": 2.896278420110418e-06, "loss": 0.4058, "step": 9180 }, { "epoch": 0.4898590122864529, "grad_norm": 1.5909288229672987, "learning_rate": 2.8959561264275557e-06, "loss": 0.4099, "step": 9190 }, { "epoch": 0.4903920471202793, "grad_norm": 1.3819604352287718, "learning_rate": 2.895633350779014e-06, "loss": 0.3903, "step": 9200 }, { "epoch": 0.4909250819541057, "grad_norm": 1.7378949142298985, "learning_rate": 2.8953100932762332e-06, "loss": 0.404, "step": 9210 }, { "epoch": 0.4914581167879321, "grad_norm": 1.7313372132857148, "learning_rate": 2.8949863540308206e-06, "loss": 0.3939, "step": 9220 }, { "epoch": 0.4919911516217585, "grad_norm": 1.657200380205512, "learning_rate": 2.8946621331545502e-06, "loss": 0.4156, "step": 9230 }, { "epoch": 0.4925241864555849, "grad_norm": 1.6936440183309365, "learning_rate": 2.894337430759363e-06, "loss": 0.4096, "step": 9240 }, { "epoch": 0.49305722128941126, "grad_norm": 1.4933169673193765, "learning_rate": 2.894012246957363e-06, "loss": 0.4097, "step": 9250 }, { "epoch": 0.49359025612323765, "grad_norm": 1.6031539399954224, "learning_rate": 2.893686581860825e-06, "loss": 0.3934, "step": 9260 }, { "epoch": 0.49412329095706403, "grad_norm": 1.57481672676039, "learning_rate": 2.8933604355821863e-06, "loss": 0.3975, "step": 9270 }, { "epoch": 0.4946563257908904, "grad_norm": 1.6604083625961876, "learning_rate": 2.893033808234052e-06, "loss": 0.3993, "step": 9280 }, { "epoch": 0.4951893606247168, "grad_norm": 1.336039210116603, "learning_rate": 2.8927066999291927e-06, "loss": 0.3986, "step": 9290 }, { "epoch": 0.49572239545854324, "grad_norm": 1.5058883724743022, "learning_rate": 2.892379110780546e-06, "loss": 0.3996, "step": 9300 }, { "epoch": 0.4962554302923696, "grad_norm": 1.5092913581208995, "learning_rate": 2.8920510409012135e-06, "loss": 0.4034, "step": 9310 }, { "epoch": 0.496788465126196, "grad_norm": 1.698502350366665, "learning_rate": 2.8917224904044653e-06, "loss": 0.3967, "step": 9320 }, { "epoch": 0.4973214999600224, "grad_norm": 1.6794594752979148, "learning_rate": 2.891393459403736e-06, "loss": 0.4005, "step": 9330 }, { "epoch": 0.4978545347938488, "grad_norm": 1.6575559516729883, "learning_rate": 2.8910639480126265e-06, "loss": 0.4082, "step": 9340 }, { "epoch": 0.49838756962767516, "grad_norm": 1.5836818547664802, "learning_rate": 2.8907339563449034e-06, "loss": 0.4145, "step": 9350 }, { "epoch": 0.49892060446150155, "grad_norm": 1.5492688677923814, "learning_rate": 2.8904034845144982e-06, "loss": 0.4081, "step": 9360 }, { "epoch": 0.49945363929532793, "grad_norm": 1.5151683645537453, "learning_rate": 2.89007253263551e-06, "loss": 0.3979, "step": 9370 }, { "epoch": 0.4999866741291543, "grad_norm": 1.6403046958185434, "learning_rate": 2.8897411008222027e-06, "loss": 0.4054, "step": 9380 }, { "epoch": 0.5005197089629807, "grad_norm": 1.7066554864694206, "learning_rate": 2.8894091891890057e-06, "loss": 0.4079, "step": 9390 }, { "epoch": 0.5010527437968071, "grad_norm": 1.415185528732737, "learning_rate": 2.8890767978505143e-06, "loss": 0.4147, "step": 9400 }, { "epoch": 0.5015857786306335, "grad_norm": 1.664150509088447, "learning_rate": 2.888743926921489e-06, "loss": 0.4099, "step": 9410 }, { "epoch": 0.5021188134644599, "grad_norm": 1.553847379787356, "learning_rate": 2.8884105765168582e-06, "loss": 0.4269, "step": 9420 }, { "epoch": 0.5026518482982862, "grad_norm": 1.526068972526621, "learning_rate": 2.8880767467517113e-06, "loss": 0.412, "step": 9430 }, { "epoch": 0.5031848831321127, "grad_norm": 1.6446946775590638, "learning_rate": 2.8877424377413076e-06, "loss": 0.4123, "step": 9440 }, { "epoch": 0.5037179179659391, "grad_norm": 1.7282185506409873, "learning_rate": 2.887407649601069e-06, "loss": 0.4156, "step": 9450 }, { "epoch": 0.5042509527997655, "grad_norm": 1.678648478014172, "learning_rate": 2.887072382446585e-06, "loss": 0.4092, "step": 9460 }, { "epoch": 0.5047839876335919, "grad_norm": 1.6388324219631059, "learning_rate": 2.8867366363936096e-06, "loss": 0.4221, "step": 9470 }, { "epoch": 0.5053170224674183, "grad_norm": 1.4352677320818634, "learning_rate": 2.8864004115580604e-06, "loss": 0.4078, "step": 9480 }, { "epoch": 0.5058500573012447, "grad_norm": 1.5946313421328584, "learning_rate": 2.8860637080560234e-06, "loss": 0.404, "step": 9490 }, { "epoch": 0.506383092135071, "grad_norm": 1.6394255732614298, "learning_rate": 2.8857265260037477e-06, "loss": 0.4115, "step": 9500 }, { "epoch": 0.5069161269688974, "grad_norm": 1.4922328394211986, "learning_rate": 2.8853888655176478e-06, "loss": 0.4136, "step": 9510 }, { "epoch": 0.5074491618027238, "grad_norm": 1.7176331599168186, "learning_rate": 2.885050726714305e-06, "loss": 0.389, "step": 9520 }, { "epoch": 0.5079821966365502, "grad_norm": 1.7143563046989718, "learning_rate": 2.8847121097104636e-06, "loss": 0.4076, "step": 9530 }, { "epoch": 0.5085152314703766, "grad_norm": 1.6454780687145094, "learning_rate": 2.8843730146230347e-06, "loss": 0.3969, "step": 9540 }, { "epoch": 0.509048266304203, "grad_norm": 1.4689363468454348, "learning_rate": 2.884033441569093e-06, "loss": 0.4064, "step": 9550 }, { "epoch": 0.5095813011380294, "grad_norm": 1.5044312829945745, "learning_rate": 2.8836933906658797e-06, "loss": 0.3976, "step": 9560 }, { "epoch": 0.5101143359718557, "grad_norm": 1.4836392218032295, "learning_rate": 2.8833528620307995e-06, "loss": 0.3901, "step": 9570 }, { "epoch": 0.5106473708056821, "grad_norm": 1.5766636297460137, "learning_rate": 2.8830118557814236e-06, "loss": 0.4079, "step": 9580 }, { "epoch": 0.5111804056395085, "grad_norm": 1.6254674879634141, "learning_rate": 2.8826703720354865e-06, "loss": 0.405, "step": 9590 }, { "epoch": 0.5117134404733349, "grad_norm": 1.4133846767074656, "learning_rate": 2.8823284109108893e-06, "loss": 0.4023, "step": 9600 }, { "epoch": 0.5122464753071613, "grad_norm": 1.7031582596260313, "learning_rate": 2.8819859725256962e-06, "loss": 0.3947, "step": 9610 }, { "epoch": 0.5127795101409877, "grad_norm": 1.5624163066184436, "learning_rate": 2.8816430569981373e-06, "loss": 0.4122, "step": 9620 }, { "epoch": 0.5133125449748142, "grad_norm": 1.7328336859833728, "learning_rate": 2.881299664446607e-06, "loss": 0.3944, "step": 9630 }, { "epoch": 0.5138455798086405, "grad_norm": 1.6153217714134185, "learning_rate": 2.8809557949896644e-06, "loss": 0.4089, "step": 9640 }, { "epoch": 0.5143786146424669, "grad_norm": 1.515318833380608, "learning_rate": 2.8806114487460336e-06, "loss": 0.4089, "step": 9650 }, { "epoch": 0.5149116494762933, "grad_norm": 1.4466205077618761, "learning_rate": 2.8802666258346025e-06, "loss": 0.4136, "step": 9660 }, { "epoch": 0.5154446843101197, "grad_norm": 1.6161321640816924, "learning_rate": 2.8799213263744252e-06, "loss": 0.4083, "step": 9670 }, { "epoch": 0.5159777191439461, "grad_norm": 1.4885332756276555, "learning_rate": 2.8795755504847175e-06, "loss": 0.4076, "step": 9680 }, { "epoch": 0.5165107539777725, "grad_norm": 1.6471482339865045, "learning_rate": 2.8792292982848636e-06, "loss": 0.3956, "step": 9690 }, { "epoch": 0.5170437888115988, "grad_norm": 1.568301485808753, "learning_rate": 2.8788825698944086e-06, "loss": 0.4132, "step": 9700 }, { "epoch": 0.5175768236454252, "grad_norm": 1.4389512590562439, "learning_rate": 2.8785353654330637e-06, "loss": 0.4058, "step": 9710 }, { "epoch": 0.5181098584792516, "grad_norm": 1.6866785287908799, "learning_rate": 2.8781876850207044e-06, "loss": 0.4005, "step": 9720 }, { "epoch": 0.518642893313078, "grad_norm": 1.6458968524252657, "learning_rate": 2.8778395287773696e-06, "loss": 0.4005, "step": 9730 }, { "epoch": 0.5191759281469044, "grad_norm": 1.7150434456610066, "learning_rate": 2.8774908968232643e-06, "loss": 0.395, "step": 9740 }, { "epoch": 0.5197089629807308, "grad_norm": 1.8132235876417566, "learning_rate": 2.877141789278756e-06, "loss": 0.4084, "step": 9750 }, { "epoch": 0.5202419978145572, "grad_norm": 1.4785716305633303, "learning_rate": 2.876792206264377e-06, "loss": 0.3914, "step": 9760 }, { "epoch": 0.5207750326483835, "grad_norm": 1.6596947462689444, "learning_rate": 2.876442147900824e-06, "loss": 0.4026, "step": 9770 }, { "epoch": 0.5213080674822099, "grad_norm": 1.406171622880347, "learning_rate": 2.8760916143089575e-06, "loss": 0.394, "step": 9780 }, { "epoch": 0.5218411023160363, "grad_norm": 1.571862309547308, "learning_rate": 2.8757406056098024e-06, "loss": 0.3953, "step": 9790 }, { "epoch": 0.5223741371498627, "grad_norm": 1.7362524083313324, "learning_rate": 2.875389121924547e-06, "loss": 0.4071, "step": 9800 }, { "epoch": 0.5229071719836891, "grad_norm": 1.6167194815084442, "learning_rate": 2.8750371633745442e-06, "loss": 0.4051, "step": 9810 }, { "epoch": 0.5234402068175156, "grad_norm": 1.5816653347720229, "learning_rate": 2.8746847300813106e-06, "loss": 0.4129, "step": 9820 }, { "epoch": 0.523973241651342, "grad_norm": 1.6458069252330907, "learning_rate": 2.8743318221665273e-06, "loss": 0.3885, "step": 9830 }, { "epoch": 0.5245062764851683, "grad_norm": 1.4772944455091512, "learning_rate": 2.8739784397520384e-06, "loss": 0.3793, "step": 9840 }, { "epoch": 0.5250393113189947, "grad_norm": 1.3845055958148909, "learning_rate": 2.8736245829598517e-06, "loss": 0.4003, "step": 9850 }, { "epoch": 0.5255723461528211, "grad_norm": 1.6157949763742652, "learning_rate": 2.8732702519121404e-06, "loss": 0.396, "step": 9860 }, { "epoch": 0.5261053809866475, "grad_norm": 1.6330823052705719, "learning_rate": 2.872915446731239e-06, "loss": 0.3987, "step": 9870 }, { "epoch": 0.5266384158204739, "grad_norm": 1.711796487941707, "learning_rate": 2.8725601675396475e-06, "loss": 0.4113, "step": 9880 }, { "epoch": 0.5271714506543003, "grad_norm": 1.5848820299463564, "learning_rate": 2.87220441446003e-06, "loss": 0.3991, "step": 9890 }, { "epoch": 0.5277044854881267, "grad_norm": 1.3866219205029024, "learning_rate": 2.8718481876152116e-06, "loss": 0.4253, "step": 9900 }, { "epoch": 0.528237520321953, "grad_norm": 1.4503470440045874, "learning_rate": 2.8714914871281833e-06, "loss": 0.4135, "step": 9910 }, { "epoch": 0.5287705551557794, "grad_norm": 1.433593960550652, "learning_rate": 2.8711343131220996e-06, "loss": 0.3949, "step": 9920 }, { "epoch": 0.5293035899896058, "grad_norm": 1.531094824111099, "learning_rate": 2.870776665720277e-06, "loss": 0.4014, "step": 9930 }, { "epoch": 0.5298366248234322, "grad_norm": 1.6274275512213063, "learning_rate": 2.870418545046197e-06, "loss": 0.4032, "step": 9940 }, { "epoch": 0.5303696596572586, "grad_norm": 1.5505587034864172, "learning_rate": 2.870059951223503e-06, "loss": 0.3941, "step": 9950 }, { "epoch": 0.530902694491085, "grad_norm": 1.618968848772058, "learning_rate": 2.8697008843760035e-06, "loss": 0.3993, "step": 9960 }, { "epoch": 0.5314357293249113, "grad_norm": 1.6120874627959603, "learning_rate": 2.869341344627669e-06, "loss": 0.4058, "step": 9970 }, { "epoch": 0.5319687641587377, "grad_norm": 1.6356430931524362, "learning_rate": 2.8689813321026333e-06, "loss": 0.4121, "step": 9980 }, { "epoch": 0.5325017989925641, "grad_norm": 1.5785457872866275, "learning_rate": 2.868620846925194e-06, "loss": 0.3986, "step": 9990 }, { "epoch": 0.5330348338263906, "grad_norm": 1.4965603722100826, "learning_rate": 2.868259889219811e-06, "loss": 0.3938, "step": 10000 }, { "epoch": 0.533567868660217, "grad_norm": 1.5867025101395884, "learning_rate": 2.8678984591111097e-06, "loss": 0.386, "step": 10010 }, { "epoch": 0.5341009034940434, "grad_norm": 1.5534019324161734, "learning_rate": 2.8675365567238753e-06, "loss": 0.3961, "step": 10020 }, { "epoch": 0.5346339383278698, "grad_norm": 1.8061862740226802, "learning_rate": 2.8671741821830582e-06, "loss": 0.4127, "step": 10030 }, { "epoch": 0.5351669731616961, "grad_norm": 1.4579109951997724, "learning_rate": 2.866811335613772e-06, "loss": 0.4177, "step": 10040 }, { "epoch": 0.5357000079955225, "grad_norm": 1.6642463591867394, "learning_rate": 2.866448017141291e-06, "loss": 0.3907, "step": 10050 }, { "epoch": 0.5362330428293489, "grad_norm": 1.4892110596304102, "learning_rate": 2.866084226891055e-06, "loss": 0.3935, "step": 10060 }, { "epoch": 0.5367660776631753, "grad_norm": 1.5359037672697213, "learning_rate": 2.8657199649886657e-06, "loss": 0.3951, "step": 10070 }, { "epoch": 0.5372991124970017, "grad_norm": 1.5144731486207554, "learning_rate": 2.8653552315598875e-06, "loss": 0.3954, "step": 10080 }, { "epoch": 0.5378321473308281, "grad_norm": 1.5215931325124186, "learning_rate": 2.8649900267306478e-06, "loss": 0.3957, "step": 10090 }, { "epoch": 0.5383651821646545, "grad_norm": 1.6155875390867562, "learning_rate": 2.864624350627036e-06, "loss": 0.3909, "step": 10100 }, { "epoch": 0.5388982169984808, "grad_norm": 1.6428343439318636, "learning_rate": 2.864258203375306e-06, "loss": 0.4025, "step": 10110 }, { "epoch": 0.5394312518323072, "grad_norm": 1.5654541597805702, "learning_rate": 2.863891585101872e-06, "loss": 0.3969, "step": 10120 }, { "epoch": 0.5399642866661336, "grad_norm": 1.5265120674901116, "learning_rate": 2.863524495933313e-06, "loss": 0.3938, "step": 10130 }, { "epoch": 0.54049732149996, "grad_norm": 1.4736365199067178, "learning_rate": 2.863156935996369e-06, "loss": 0.3912, "step": 10140 }, { "epoch": 0.5410303563337864, "grad_norm": 1.7192348969896323, "learning_rate": 2.8627889054179437e-06, "loss": 0.3988, "step": 10150 }, { "epoch": 0.5415633911676128, "grad_norm": 1.7429638348817058, "learning_rate": 2.8624204043251025e-06, "loss": 0.3906, "step": 10160 }, { "epoch": 0.5420964260014391, "grad_norm": 1.5919744294896851, "learning_rate": 2.8620514328450734e-06, "loss": 0.4018, "step": 10170 }, { "epoch": 0.5426294608352655, "grad_norm": 1.6527949272175446, "learning_rate": 2.8616819911052477e-06, "loss": 0.3846, "step": 10180 }, { "epoch": 0.543162495669092, "grad_norm": 1.3926071798106674, "learning_rate": 2.8613120792331775e-06, "loss": 0.3956, "step": 10190 }, { "epoch": 0.5436955305029184, "grad_norm": 1.6568790431469391, "learning_rate": 2.860941697356578e-06, "loss": 0.4014, "step": 10200 }, { "epoch": 0.5442285653367448, "grad_norm": 1.636416760466028, "learning_rate": 2.8605708456033274e-06, "loss": 0.3872, "step": 10210 }, { "epoch": 0.5447616001705712, "grad_norm": 1.4547880434668894, "learning_rate": 2.860199524101464e-06, "loss": 0.3884, "step": 10220 }, { "epoch": 0.5452946350043976, "grad_norm": 1.74912827875163, "learning_rate": 2.859827732979191e-06, "loss": 0.4126, "step": 10230 }, { "epoch": 0.545827669838224, "grad_norm": 1.5875232908515233, "learning_rate": 2.8594554723648723e-06, "loss": 0.3972, "step": 10240 }, { "epoch": 0.5463607046720503, "grad_norm": 1.5339760028458476, "learning_rate": 2.859082742387034e-06, "loss": 0.393, "step": 10250 }, { "epoch": 0.5468937395058767, "grad_norm": 1.4114830634948567, "learning_rate": 2.8587095431743637e-06, "loss": 0.3913, "step": 10260 }, { "epoch": 0.5474267743397031, "grad_norm": 1.3990399103587143, "learning_rate": 2.858335874855712e-06, "loss": 0.4046, "step": 10270 }, { "epoch": 0.5479598091735295, "grad_norm": 1.514447667085036, "learning_rate": 2.8579617375600912e-06, "loss": 0.3998, "step": 10280 }, { "epoch": 0.5484928440073559, "grad_norm": 1.3963669858759415, "learning_rate": 2.857587131416675e-06, "loss": 0.3994, "step": 10290 }, { "epoch": 0.5490258788411823, "grad_norm": 1.486639377515058, "learning_rate": 2.8572120565547994e-06, "loss": 0.4085, "step": 10300 }, { "epoch": 0.5495589136750086, "grad_norm": 1.5333184988023538, "learning_rate": 2.8568365131039624e-06, "loss": 0.4029, "step": 10310 }, { "epoch": 0.550091948508835, "grad_norm": 1.3969208013298322, "learning_rate": 2.8564605011938234e-06, "loss": 0.3912, "step": 10320 }, { "epoch": 0.5506249833426614, "grad_norm": 1.5779766323495066, "learning_rate": 2.856084020954204e-06, "loss": 0.3984, "step": 10330 }, { "epoch": 0.5511580181764878, "grad_norm": 1.5721736227160277, "learning_rate": 2.855707072515087e-06, "loss": 0.3938, "step": 10340 }, { "epoch": 0.5516910530103142, "grad_norm": 1.4232754576981241, "learning_rate": 2.8553296560066166e-06, "loss": 0.3952, "step": 10350 }, { "epoch": 0.5522240878441406, "grad_norm": 1.5407831072897882, "learning_rate": 2.854951771559099e-06, "loss": 0.3951, "step": 10360 }, { "epoch": 0.552757122677967, "grad_norm": 1.44353905855885, "learning_rate": 2.854573419303003e-06, "loss": 0.4079, "step": 10370 }, { "epoch": 0.5532901575117934, "grad_norm": 1.502073545758752, "learning_rate": 2.854194599368958e-06, "loss": 0.3972, "step": 10380 }, { "epoch": 0.5538231923456198, "grad_norm": 1.4819012616625045, "learning_rate": 2.853815311887753e-06, "loss": 0.3887, "step": 10390 }, { "epoch": 0.5543562271794462, "grad_norm": 1.5078684627676604, "learning_rate": 2.8534355569903423e-06, "loss": 0.3866, "step": 10400 }, { "epoch": 0.5548892620132726, "grad_norm": 1.5126708128236444, "learning_rate": 2.853055334807838e-06, "loss": 0.4035, "step": 10410 }, { "epoch": 0.555422296847099, "grad_norm": 1.6044363976388567, "learning_rate": 2.8526746454715158e-06, "loss": 0.4021, "step": 10420 }, { "epoch": 0.5559553316809254, "grad_norm": 1.6704315725220897, "learning_rate": 2.8522934891128114e-06, "loss": 0.3908, "step": 10430 }, { "epoch": 0.5564883665147518, "grad_norm": 1.662836301288045, "learning_rate": 2.851911865863323e-06, "loss": 0.4005, "step": 10440 }, { "epoch": 0.5570214013485781, "grad_norm": 1.601158339393024, "learning_rate": 2.8515297758548085e-06, "loss": 0.4062, "step": 10450 }, { "epoch": 0.5575544361824045, "grad_norm": 1.6237095561230108, "learning_rate": 2.8511472192191884e-06, "loss": 0.3974, "step": 10460 }, { "epoch": 0.5580874710162309, "grad_norm": 1.5793596654689936, "learning_rate": 2.850764196088543e-06, "loss": 0.3966, "step": 10470 }, { "epoch": 0.5586205058500573, "grad_norm": 1.5605363535407812, "learning_rate": 2.8503807065951147e-06, "loss": 0.3881, "step": 10480 }, { "epoch": 0.5591535406838837, "grad_norm": 1.6702675353241125, "learning_rate": 2.849996750871306e-06, "loss": 0.4118, "step": 10490 }, { "epoch": 0.5596865755177101, "grad_norm": 1.643604603389349, "learning_rate": 2.849612329049682e-06, "loss": 0.4049, "step": 10500 }, { "epoch": 0.5602196103515364, "grad_norm": 1.6561848706502336, "learning_rate": 2.849227441262966e-06, "loss": 0.3982, "step": 10510 }, { "epoch": 0.5607526451853628, "grad_norm": 1.590531903879771, "learning_rate": 2.848842087644045e-06, "loss": 0.391, "step": 10520 }, { "epoch": 0.5612856800191892, "grad_norm": 1.6889155758124705, "learning_rate": 2.8484562683259646e-06, "loss": 0.4042, "step": 10530 }, { "epoch": 0.5618187148530156, "grad_norm": 1.641126267683817, "learning_rate": 2.848069983441934e-06, "loss": 0.4022, "step": 10540 }, { "epoch": 0.562351749686842, "grad_norm": 1.5798488813596172, "learning_rate": 2.847683233125319e-06, "loss": 0.389, "step": 10550 }, { "epoch": 0.5628847845206685, "grad_norm": 1.7486106855174088, "learning_rate": 2.8472960175096492e-06, "loss": 0.4088, "step": 10560 }, { "epoch": 0.5634178193544949, "grad_norm": 1.6875542372746397, "learning_rate": 2.8469083367286156e-06, "loss": 0.3923, "step": 10570 }, { "epoch": 0.5639508541883212, "grad_norm": 1.602928739183086, "learning_rate": 2.846520190916066e-06, "loss": 0.3904, "step": 10580 }, { "epoch": 0.5644838890221476, "grad_norm": 1.5715388461988955, "learning_rate": 2.8461315802060128e-06, "loss": 0.4133, "step": 10590 }, { "epoch": 0.565016923855974, "grad_norm": 1.6412992031891698, "learning_rate": 2.845742504732626e-06, "loss": 0.4186, "step": 10600 }, { "epoch": 0.5655499586898004, "grad_norm": 3.6979589778440727, "learning_rate": 2.845352964630238e-06, "loss": 0.3924, "step": 10610 }, { "epoch": 0.5660829935236268, "grad_norm": 1.6569160402984553, "learning_rate": 2.8449629600333402e-06, "loss": 0.3913, "step": 10620 }, { "epoch": 0.5666160283574532, "grad_norm": 1.7075699277783256, "learning_rate": 2.8445724910765854e-06, "loss": 0.4074, "step": 10630 }, { "epoch": 0.5671490631912796, "grad_norm": 1.613437597893956, "learning_rate": 2.8441815578947863e-06, "loss": 0.4077, "step": 10640 }, { "epoch": 0.5676820980251059, "grad_norm": 1.560621673960656, "learning_rate": 2.8437901606229157e-06, "loss": 0.407, "step": 10650 }, { "epoch": 0.5682151328589323, "grad_norm": 1.6156946155944378, "learning_rate": 2.843398299396106e-06, "loss": 0.3958, "step": 10660 }, { "epoch": 0.5687481676927587, "grad_norm": 1.3741828683826942, "learning_rate": 2.8430059743496527e-06, "loss": 0.3976, "step": 10670 }, { "epoch": 0.5692812025265851, "grad_norm": 1.5938642039216717, "learning_rate": 2.8426131856190074e-06, "loss": 0.3998, "step": 10680 }, { "epoch": 0.5698142373604115, "grad_norm": 1.6185251189724847, "learning_rate": 2.842219933339785e-06, "loss": 0.387, "step": 10690 }, { "epoch": 0.5703472721942379, "grad_norm": 1.721631309827698, "learning_rate": 2.8418262176477585e-06, "loss": 0.4054, "step": 10700 }, { "epoch": 0.5708803070280642, "grad_norm": 1.5312620396476804, "learning_rate": 2.8414320386788613e-06, "loss": 0.3848, "step": 10710 }, { "epoch": 0.5714133418618906, "grad_norm": 1.6466118131579388, "learning_rate": 2.841037396569188e-06, "loss": 0.3935, "step": 10720 }, { "epoch": 0.571946376695717, "grad_norm": 1.6032642631175114, "learning_rate": 2.8406422914549915e-06, "loss": 0.3995, "step": 10730 }, { "epoch": 0.5724794115295434, "grad_norm": 1.677288561601342, "learning_rate": 2.8402467234726847e-06, "loss": 0.4024, "step": 10740 }, { "epoch": 0.5730124463633699, "grad_norm": 1.7205492469098584, "learning_rate": 2.839850692758842e-06, "loss": 0.3802, "step": 10750 }, { "epoch": 0.5735454811971963, "grad_norm": 1.60233188086089, "learning_rate": 2.8394541994501955e-06, "loss": 0.3848, "step": 10760 }, { "epoch": 0.5740785160310227, "grad_norm": 1.5647105400654844, "learning_rate": 2.8390572436836382e-06, "loss": 0.3886, "step": 10770 }, { "epoch": 0.574611550864849, "grad_norm": 1.4561842724504217, "learning_rate": 2.8386598255962226e-06, "loss": 0.3931, "step": 10780 }, { "epoch": 0.5751445856986754, "grad_norm": 1.6279145145893323, "learning_rate": 2.8382619453251598e-06, "loss": 0.3981, "step": 10790 }, { "epoch": 0.5756776205325018, "grad_norm": 1.4857469776656202, "learning_rate": 2.8378636030078225e-06, "loss": 0.3799, "step": 10800 }, { "epoch": 0.5762106553663282, "grad_norm": 1.4461093301470354, "learning_rate": 2.837464798781741e-06, "loss": 0.4011, "step": 10810 }, { "epoch": 0.5767436902001546, "grad_norm": 1.4643713873836488, "learning_rate": 2.8370655327846064e-06, "loss": 0.3965, "step": 10820 }, { "epoch": 0.577276725033981, "grad_norm": 1.6397069192723366, "learning_rate": 2.836665805154268e-06, "loss": 0.3904, "step": 10830 }, { "epoch": 0.5778097598678074, "grad_norm": 1.6153524399193817, "learning_rate": 2.8362656160287352e-06, "loss": 0.393, "step": 10840 }, { "epoch": 0.5783427947016337, "grad_norm": 1.533128077411933, "learning_rate": 2.835864965546177e-06, "loss": 0.3881, "step": 10850 }, { "epoch": 0.5788758295354601, "grad_norm": 1.506981038459625, "learning_rate": 2.835463853844922e-06, "loss": 0.3881, "step": 10860 }, { "epoch": 0.5794088643692865, "grad_norm": 1.3957981422127663, "learning_rate": 2.835062281063456e-06, "loss": 0.3939, "step": 10870 }, { "epoch": 0.5799418992031129, "grad_norm": 1.556890609180311, "learning_rate": 2.834660247340426e-06, "loss": 0.382, "step": 10880 }, { "epoch": 0.5804749340369393, "grad_norm": 1.5439773883675694, "learning_rate": 2.8342577528146384e-06, "loss": 0.388, "step": 10890 }, { "epoch": 0.5810079688707657, "grad_norm": 1.6327680129307451, "learning_rate": 2.8338547976250564e-06, "loss": 0.3955, "step": 10900 }, { "epoch": 0.581541003704592, "grad_norm": 1.600281933147193, "learning_rate": 2.8334513819108054e-06, "loss": 0.4014, "step": 10910 }, { "epoch": 0.5820740385384184, "grad_norm": 1.621057959094922, "learning_rate": 2.8330475058111667e-06, "loss": 0.3872, "step": 10920 }, { "epoch": 0.5826070733722448, "grad_norm": 1.601534627497534, "learning_rate": 2.8326431694655822e-06, "loss": 0.3856, "step": 10930 }, { "epoch": 0.5831401082060713, "grad_norm": 1.6904303075461933, "learning_rate": 2.8322383730136524e-06, "loss": 0.4069, "step": 10940 }, { "epoch": 0.5836731430398977, "grad_norm": 1.6290138862314838, "learning_rate": 2.8318331165951376e-06, "loss": 0.4096, "step": 10950 }, { "epoch": 0.5842061778737241, "grad_norm": 1.40145473356515, "learning_rate": 2.831427400349955e-06, "loss": 0.3917, "step": 10960 }, { "epoch": 0.5847392127075505, "grad_norm": 1.4930505550808872, "learning_rate": 2.831021224418182e-06, "loss": 0.3878, "step": 10970 }, { "epoch": 0.5852722475413769, "grad_norm": 1.730859699973955, "learning_rate": 2.8306145889400537e-06, "loss": 0.3844, "step": 10980 }, { "epoch": 0.5858052823752032, "grad_norm": 1.6731858008414877, "learning_rate": 2.830207494055965e-06, "loss": 0.396, "step": 10990 }, { "epoch": 0.5863383172090296, "grad_norm": 1.860870918490304, "learning_rate": 2.8297999399064696e-06, "loss": 0.3836, "step": 11000 }, { "epoch": 0.586871352042856, "grad_norm": 1.6065116215870288, "learning_rate": 2.8293919266322776e-06, "loss": 0.3946, "step": 11010 }, { "epoch": 0.5874043868766824, "grad_norm": 1.7196307359790566, "learning_rate": 2.8289834543742597e-06, "loss": 0.3921, "step": 11020 }, { "epoch": 0.5879374217105088, "grad_norm": 1.4683301802490478, "learning_rate": 2.8285745232734446e-06, "loss": 0.3972, "step": 11030 }, { "epoch": 0.5884704565443352, "grad_norm": 1.58314055523465, "learning_rate": 2.828165133471019e-06, "loss": 0.3904, "step": 11040 }, { "epoch": 0.5890034913781615, "grad_norm": 1.3428032769263376, "learning_rate": 2.8277552851083276e-06, "loss": 0.3957, "step": 11050 }, { "epoch": 0.5895365262119879, "grad_norm": 1.6980158256452536, "learning_rate": 2.827344978326875e-06, "loss": 0.3956, "step": 11060 }, { "epoch": 0.5900695610458143, "grad_norm": 1.4273036074140575, "learning_rate": 2.826934213268323e-06, "loss": 0.4123, "step": 11070 }, { "epoch": 0.5906025958796407, "grad_norm": 1.4760804808151409, "learning_rate": 2.826522990074491e-06, "loss": 0.3933, "step": 11080 }, { "epoch": 0.5911356307134671, "grad_norm": 1.5592700800967758, "learning_rate": 2.826111308887358e-06, "loss": 0.3901, "step": 11090 }, { "epoch": 0.5916686655472935, "grad_norm": 1.6755229269602052, "learning_rate": 2.82569916984906e-06, "loss": 0.3887, "step": 11100 }, { "epoch": 0.5922017003811199, "grad_norm": 1.5754410324386798, "learning_rate": 2.8252865731018915e-06, "loss": 0.3926, "step": 11110 }, { "epoch": 0.5927347352149464, "grad_norm": 1.689941680605763, "learning_rate": 2.8248735187883053e-06, "loss": 0.403, "step": 11120 }, { "epoch": 0.5932677700487727, "grad_norm": 1.4887919059673544, "learning_rate": 2.824460007050912e-06, "loss": 0.412, "step": 11130 }, { "epoch": 0.5938008048825991, "grad_norm": 1.4112522194521475, "learning_rate": 2.824046038032479e-06, "loss": 0.393, "step": 11140 }, { "epoch": 0.5943338397164255, "grad_norm": 1.5321257088523927, "learning_rate": 2.8236316118759337e-06, "loss": 0.3945, "step": 11150 }, { "epoch": 0.5948668745502519, "grad_norm": 1.5716652350993627, "learning_rate": 2.82321672872436e-06, "loss": 0.4042, "step": 11160 }, { "epoch": 0.5953999093840783, "grad_norm": 1.618259304671504, "learning_rate": 2.822801388721e-06, "loss": 0.3931, "step": 11170 }, { "epoch": 0.5959329442179047, "grad_norm": 1.589493000573229, "learning_rate": 2.8223855920092527e-06, "loss": 0.4032, "step": 11180 }, { "epoch": 0.596465979051731, "grad_norm": 1.637701216793911, "learning_rate": 2.821969338732676e-06, "loss": 0.3916, "step": 11190 }, { "epoch": 0.5969990138855574, "grad_norm": 1.6518253397411737, "learning_rate": 2.8215526290349852e-06, "loss": 0.3974, "step": 11200 }, { "epoch": 0.5975320487193838, "grad_norm": 1.708003531491653, "learning_rate": 2.8211354630600516e-06, "loss": 0.3787, "step": 11210 }, { "epoch": 0.5980650835532102, "grad_norm": 1.6079405003350675, "learning_rate": 2.8207178409519065e-06, "loss": 0.3952, "step": 11220 }, { "epoch": 0.5985981183870366, "grad_norm": 1.6239283535281774, "learning_rate": 2.8202997628547372e-06, "loss": 0.4077, "step": 11230 }, { "epoch": 0.599131153220863, "grad_norm": 1.6018865470980452, "learning_rate": 2.819881228912888e-06, "loss": 0.3956, "step": 11240 }, { "epoch": 0.5996641880546894, "grad_norm": 1.5010263983394176, "learning_rate": 2.8194622392708623e-06, "loss": 0.3975, "step": 11250 }, { "epoch": 0.6001972228885157, "grad_norm": 1.5002876031292254, "learning_rate": 2.819042794073319e-06, "loss": 0.3939, "step": 11260 }, { "epoch": 0.6007302577223421, "grad_norm": 1.4924650200651106, "learning_rate": 2.8186228934650757e-06, "loss": 0.3998, "step": 11270 }, { "epoch": 0.6012632925561685, "grad_norm": 1.8183182565396483, "learning_rate": 2.8182025375911064e-06, "loss": 0.4073, "step": 11280 }, { "epoch": 0.6017963273899949, "grad_norm": 1.542696349302032, "learning_rate": 2.8177817265965424e-06, "loss": 0.4024, "step": 11290 }, { "epoch": 0.6023293622238213, "grad_norm": 1.7645257426266767, "learning_rate": 2.8173604606266723e-06, "loss": 0.3937, "step": 11300 }, { "epoch": 0.6028623970576478, "grad_norm": 1.5206299790783435, "learning_rate": 2.816938739826942e-06, "loss": 0.4063, "step": 11310 }, { "epoch": 0.6033954318914742, "grad_norm": 1.5435971182398083, "learning_rate": 2.816516564342954e-06, "loss": 0.3863, "step": 11320 }, { "epoch": 0.6039284667253005, "grad_norm": 1.6734834601567221, "learning_rate": 2.8160939343204675e-06, "loss": 0.3961, "step": 11330 }, { "epoch": 0.6044615015591269, "grad_norm": 1.658222991081644, "learning_rate": 2.8156708499054003e-06, "loss": 0.3838, "step": 11340 }, { "epoch": 0.6049945363929533, "grad_norm": 1.5771183053159084, "learning_rate": 2.815247311243825e-06, "loss": 0.4024, "step": 11350 }, { "epoch": 0.6055275712267797, "grad_norm": 1.578314296310473, "learning_rate": 2.8148233184819717e-06, "loss": 0.3935, "step": 11360 }, { "epoch": 0.6060606060606061, "grad_norm": 1.6897340844857647, "learning_rate": 2.814398871766228e-06, "loss": 0.4054, "step": 11370 }, { "epoch": 0.6065936408944325, "grad_norm": 1.6566983053221538, "learning_rate": 2.813973971243138e-06, "loss": 0.3946, "step": 11380 }, { "epoch": 0.6071266757282588, "grad_norm": 1.5285278034309233, "learning_rate": 2.813548617059401e-06, "loss": 0.3991, "step": 11390 }, { "epoch": 0.6076597105620852, "grad_norm": 1.6301930066901391, "learning_rate": 2.8131228093618756e-06, "loss": 0.3985, "step": 11400 }, { "epoch": 0.6081927453959116, "grad_norm": 1.562617132118841, "learning_rate": 2.8126965482975745e-06, "loss": 0.3931, "step": 11410 }, { "epoch": 0.608725780229738, "grad_norm": 1.6545662683126874, "learning_rate": 2.8122698340136684e-06, "loss": 0.3931, "step": 11420 }, { "epoch": 0.6092588150635644, "grad_norm": 1.557998796185988, "learning_rate": 2.811842666657484e-06, "loss": 0.3955, "step": 11430 }, { "epoch": 0.6097918498973908, "grad_norm": 1.7368039494892533, "learning_rate": 2.811415046376504e-06, "loss": 0.39, "step": 11440 }, { "epoch": 0.6103248847312172, "grad_norm": 1.4119999043698248, "learning_rate": 2.8109869733183683e-06, "loss": 0.3812, "step": 11450 }, { "epoch": 0.6108579195650435, "grad_norm": 1.5442733322001594, "learning_rate": 2.8105584476308726e-06, "loss": 0.3971, "step": 11460 }, { "epoch": 0.6113909543988699, "grad_norm": 1.6011152090696201, "learning_rate": 2.8101294694619697e-06, "loss": 0.3979, "step": 11470 }, { "epoch": 0.6119239892326963, "grad_norm": 1.7118684012085856, "learning_rate": 2.809700038959767e-06, "loss": 0.4046, "step": 11480 }, { "epoch": 0.6124570240665227, "grad_norm": 1.5630829040472216, "learning_rate": 2.80927015627253e-06, "loss": 0.3885, "step": 11490 }, { "epoch": 0.6129900589003492, "grad_norm": 1.4463955260745038, "learning_rate": 2.808839821548678e-06, "loss": 0.387, "step": 11500 }, { "epoch": 0.6135230937341756, "grad_norm": 1.559307508910976, "learning_rate": 2.8084090349367884e-06, "loss": 0.3924, "step": 11510 }, { "epoch": 0.614056128568002, "grad_norm": 1.4961447241233707, "learning_rate": 2.807977796585595e-06, "loss": 0.3983, "step": 11520 }, { "epoch": 0.6145891634018283, "grad_norm": 1.5905112762805313, "learning_rate": 2.8075461066439847e-06, "loss": 0.3978, "step": 11530 }, { "epoch": 0.6151221982356547, "grad_norm": 1.594145707426966, "learning_rate": 2.807113965261003e-06, "loss": 0.3906, "step": 11540 }, { "epoch": 0.6156552330694811, "grad_norm": 1.5166910573016834, "learning_rate": 2.8066813725858503e-06, "loss": 0.3941, "step": 11550 }, { "epoch": 0.6161882679033075, "grad_norm": 1.550322330198248, "learning_rate": 2.8062483287678827e-06, "loss": 0.3825, "step": 11560 }, { "epoch": 0.6167213027371339, "grad_norm": 1.5409823337605422, "learning_rate": 2.8058148339566126e-06, "loss": 0.3885, "step": 11570 }, { "epoch": 0.6172543375709603, "grad_norm": 1.54912229475863, "learning_rate": 2.8053808883017076e-06, "loss": 0.3976, "step": 11580 }, { "epoch": 0.6177873724047866, "grad_norm": 1.6634766519483923, "learning_rate": 2.804946491952991e-06, "loss": 0.402, "step": 11590 }, { "epoch": 0.618320407238613, "grad_norm": 1.5752399891749032, "learning_rate": 2.8045116450604415e-06, "loss": 0.3974, "step": 11600 }, { "epoch": 0.6188534420724394, "grad_norm": 1.4769240667947436, "learning_rate": 2.8040763477741943e-06, "loss": 0.3881, "step": 11610 }, { "epoch": 0.6193864769062658, "grad_norm": 1.4996019797104767, "learning_rate": 2.8036406002445397e-06, "loss": 0.3867, "step": 11620 }, { "epoch": 0.6199195117400922, "grad_norm": 1.6892343067338462, "learning_rate": 2.8032044026219223e-06, "loss": 0.3916, "step": 11630 }, { "epoch": 0.6204525465739186, "grad_norm": 1.499296099145915, "learning_rate": 2.802767755056944e-06, "loss": 0.3843, "step": 11640 }, { "epoch": 0.620985581407745, "grad_norm": 1.6891078099206098, "learning_rate": 2.8023306577003603e-06, "loss": 0.3802, "step": 11650 }, { "epoch": 0.6215186162415713, "grad_norm": 1.5359532620906358, "learning_rate": 2.8018931107030827e-06, "loss": 0.3974, "step": 11660 }, { "epoch": 0.6220516510753977, "grad_norm": 1.551650860156057, "learning_rate": 2.8014551142161787e-06, "loss": 0.3895, "step": 11670 }, { "epoch": 0.6225846859092242, "grad_norm": 1.3962965306756638, "learning_rate": 2.8010166683908696e-06, "loss": 0.3881, "step": 11680 }, { "epoch": 0.6231177207430506, "grad_norm": 1.5671334600304856, "learning_rate": 2.800577773378533e-06, "loss": 0.4011, "step": 11690 }, { "epoch": 0.623650755576877, "grad_norm": 1.6220708730378142, "learning_rate": 2.800138429330701e-06, "loss": 0.3995, "step": 11700 }, { "epoch": 0.6241837904107034, "grad_norm": 1.580132090675789, "learning_rate": 2.799698636399061e-06, "loss": 0.3936, "step": 11710 }, { "epoch": 0.6247168252445298, "grad_norm": 1.4441344481236609, "learning_rate": 2.7992583947354555e-06, "loss": 0.3971, "step": 11720 }, { "epoch": 0.6252498600783561, "grad_norm": 1.8916747056041694, "learning_rate": 2.7988177044918806e-06, "loss": 0.4003, "step": 11730 }, { "epoch": 0.6257828949121825, "grad_norm": 1.6477323388946823, "learning_rate": 2.7983765658204892e-06, "loss": 0.3939, "step": 11740 }, { "epoch": 0.6263159297460089, "grad_norm": 1.5218455501090793, "learning_rate": 2.797934978873588e-06, "loss": 0.3939, "step": 11750 }, { "epoch": 0.6268489645798353, "grad_norm": 1.4701590918490661, "learning_rate": 2.7974929438036384e-06, "loss": 0.3896, "step": 11760 }, { "epoch": 0.6273819994136617, "grad_norm": 1.5835516193376344, "learning_rate": 2.797050460763257e-06, "loss": 0.3922, "step": 11770 }, { "epoch": 0.6279150342474881, "grad_norm": 1.5041646873772463, "learning_rate": 2.7966075299052145e-06, "loss": 0.3892, "step": 11780 }, { "epoch": 0.6284480690813145, "grad_norm": 1.6303887715028764, "learning_rate": 2.7961641513824376e-06, "loss": 0.3887, "step": 11790 }, { "epoch": 0.6289811039151408, "grad_norm": 1.5349812867480792, "learning_rate": 2.7957203253480044e-06, "loss": 0.3917, "step": 11800 }, { "epoch": 0.6295141387489672, "grad_norm": 1.6465981440360993, "learning_rate": 2.7952760519551523e-06, "loss": 0.3738, "step": 11810 }, { "epoch": 0.6300471735827936, "grad_norm": 1.6584516900753647, "learning_rate": 2.794831331357268e-06, "loss": 0.4002, "step": 11820 }, { "epoch": 0.63058020841662, "grad_norm": 1.5859618642595954, "learning_rate": 2.7943861637078965e-06, "loss": 0.4007, "step": 11830 }, { "epoch": 0.6311132432504464, "grad_norm": 1.7511956751160198, "learning_rate": 2.793940549160735e-06, "loss": 0.4001, "step": 11840 }, { "epoch": 0.6316462780842728, "grad_norm": 1.4246756095863924, "learning_rate": 2.7934944878696365e-06, "loss": 0.3909, "step": 11850 }, { "epoch": 0.6321793129180991, "grad_norm": 1.646032214208831, "learning_rate": 2.793047979988607e-06, "loss": 0.3961, "step": 11860 }, { "epoch": 0.6327123477519256, "grad_norm": 1.5620031004110237, "learning_rate": 2.7926010256718064e-06, "loss": 0.3898, "step": 11870 }, { "epoch": 0.633245382585752, "grad_norm": 1.7086478858528293, "learning_rate": 2.7921536250735508e-06, "loss": 0.3971, "step": 11880 }, { "epoch": 0.6337784174195784, "grad_norm": 1.6443157640472792, "learning_rate": 2.791705778348308e-06, "loss": 0.3984, "step": 11890 }, { "epoch": 0.6343114522534048, "grad_norm": 1.584048024256836, "learning_rate": 2.7912574856507014e-06, "loss": 0.3923, "step": 11900 }, { "epoch": 0.6348444870872312, "grad_norm": 1.510070722855611, "learning_rate": 2.7908087471355076e-06, "loss": 0.3888, "step": 11910 }, { "epoch": 0.6353775219210576, "grad_norm": 1.6268634317390713, "learning_rate": 2.790359562957657e-06, "loss": 0.3895, "step": 11920 }, { "epoch": 0.635910556754884, "grad_norm": 1.6236884243463365, "learning_rate": 2.7899099332722352e-06, "loss": 0.3973, "step": 11930 }, { "epoch": 0.6364435915887103, "grad_norm": 1.6338186141778763, "learning_rate": 2.7894598582344803e-06, "loss": 0.3866, "step": 11940 }, { "epoch": 0.6369766264225367, "grad_norm": 1.5622984679180052, "learning_rate": 2.789009337999784e-06, "loss": 0.3763, "step": 11950 }, { "epoch": 0.6375096612563631, "grad_norm": 1.673952296983321, "learning_rate": 2.788558372723693e-06, "loss": 0.3925, "step": 11960 }, { "epoch": 0.6380426960901895, "grad_norm": 1.5932902944192429, "learning_rate": 2.7881069625619065e-06, "loss": 0.3848, "step": 11970 }, { "epoch": 0.6385757309240159, "grad_norm": 1.4751362343477044, "learning_rate": 2.7876551076702774e-06, "loss": 0.3869, "step": 11980 }, { "epoch": 0.6391087657578423, "grad_norm": 1.4843454969109606, "learning_rate": 2.7872028082048128e-06, "loss": 0.3819, "step": 11990 }, { "epoch": 0.6396418005916686, "grad_norm": 1.5677105104818196, "learning_rate": 2.7867500643216733e-06, "loss": 0.3892, "step": 12000 }, { "epoch": 0.640174835425495, "grad_norm": 1.5718146747780057, "learning_rate": 2.7862968761771713e-06, "loss": 0.3888, "step": 12010 }, { "epoch": 0.6407078702593214, "grad_norm": 1.5279642054407692, "learning_rate": 2.7858432439277754e-06, "loss": 0.3968, "step": 12020 }, { "epoch": 0.6412409050931478, "grad_norm": 1.6503034433177566, "learning_rate": 2.7853891677301045e-06, "loss": 0.3834, "step": 12030 }, { "epoch": 0.6417739399269742, "grad_norm": 1.7350616613879568, "learning_rate": 2.784934647740934e-06, "loss": 0.4051, "step": 12040 }, { "epoch": 0.6423069747608006, "grad_norm": 1.7500245634075582, "learning_rate": 2.784479684117189e-06, "loss": 0.3959, "step": 12050 }, { "epoch": 0.6428400095946271, "grad_norm": 1.6174581393382492, "learning_rate": 2.7840242770159503e-06, "loss": 0.3949, "step": 12060 }, { "epoch": 0.6433730444284534, "grad_norm": 1.4739002685686147, "learning_rate": 2.783568426594451e-06, "loss": 0.3791, "step": 12070 }, { "epoch": 0.6439060792622798, "grad_norm": 1.6727367696671884, "learning_rate": 2.7831121330100775e-06, "loss": 0.3863, "step": 12080 }, { "epoch": 0.6444391140961062, "grad_norm": 1.5022474982429521, "learning_rate": 2.7826553964203694e-06, "loss": 0.3845, "step": 12090 }, { "epoch": 0.6449721489299326, "grad_norm": 1.5254855001374756, "learning_rate": 2.782198216983018e-06, "loss": 0.3992, "step": 12100 }, { "epoch": 0.645505183763759, "grad_norm": 1.6587868380084196, "learning_rate": 2.7817405948558684e-06, "loss": 0.3871, "step": 12110 }, { "epoch": 0.6460382185975854, "grad_norm": 1.5555910493220377, "learning_rate": 2.7812825301969194e-06, "loss": 0.3994, "step": 12120 }, { "epoch": 0.6465712534314118, "grad_norm": 1.5755611511995655, "learning_rate": 2.7808240231643207e-06, "loss": 0.3913, "step": 12130 }, { "epoch": 0.6471042882652381, "grad_norm": 1.6869162094152887, "learning_rate": 2.780365073916377e-06, "loss": 0.3932, "step": 12140 }, { "epoch": 0.6476373230990645, "grad_norm": 1.519299268086837, "learning_rate": 2.779905682611543e-06, "loss": 0.3978, "step": 12150 }, { "epoch": 0.6481703579328909, "grad_norm": 1.4803838347284977, "learning_rate": 2.779445849408428e-06, "loss": 0.3783, "step": 12160 }, { "epoch": 0.6487033927667173, "grad_norm": 1.7044629885259677, "learning_rate": 2.7789855744657935e-06, "loss": 0.3766, "step": 12170 }, { "epoch": 0.6492364276005437, "grad_norm": 1.5840843113701972, "learning_rate": 2.7785248579425537e-06, "loss": 0.3749, "step": 12180 }, { "epoch": 0.6497694624343701, "grad_norm": 1.5211073198637621, "learning_rate": 2.7780636999977737e-06, "loss": 0.3908, "step": 12190 }, { "epoch": 0.6503024972681964, "grad_norm": 1.5858033266004543, "learning_rate": 2.7776021007906734e-06, "loss": 0.3901, "step": 12200 }, { "epoch": 0.6508355321020228, "grad_norm": 1.6076389777867792, "learning_rate": 2.7771400604806227e-06, "loss": 0.3923, "step": 12210 }, { "epoch": 0.6513685669358492, "grad_norm": 1.7696672716687496, "learning_rate": 2.7766775792271464e-06, "loss": 0.3976, "step": 12220 }, { "epoch": 0.6519016017696756, "grad_norm": 1.7391643004557578, "learning_rate": 2.7762146571899187e-06, "loss": 0.3777, "step": 12230 }, { "epoch": 0.6524346366035021, "grad_norm": 1.6984356458111702, "learning_rate": 2.775751294528768e-06, "loss": 0.3855, "step": 12240 }, { "epoch": 0.6529676714373285, "grad_norm": 1.7935011590016339, "learning_rate": 2.775287491403674e-06, "loss": 0.3936, "step": 12250 }, { "epoch": 0.6535007062711549, "grad_norm": 1.6694000704578658, "learning_rate": 2.7748232479747683e-06, "loss": 0.3895, "step": 12260 }, { "epoch": 0.6540337411049812, "grad_norm": 1.499603418456414, "learning_rate": 2.774358564402336e-06, "loss": 0.3925, "step": 12270 }, { "epoch": 0.6545667759388076, "grad_norm": 1.6250028271238461, "learning_rate": 2.773893440846812e-06, "loss": 0.3975, "step": 12280 }, { "epoch": 0.655099810772634, "grad_norm": 1.481337339713028, "learning_rate": 2.7734278774687842e-06, "loss": 0.3887, "step": 12290 }, { "epoch": 0.6556328456064604, "grad_norm": 1.5277235272770724, "learning_rate": 2.7729618744289933e-06, "loss": 0.3902, "step": 12300 }, { "epoch": 0.6561658804402868, "grad_norm": 1.6356182023504948, "learning_rate": 2.7724954318883294e-06, "loss": 0.3938, "step": 12310 }, { "epoch": 0.6566989152741132, "grad_norm": 1.5844459219660234, "learning_rate": 2.772028550007837e-06, "loss": 0.3879, "step": 12320 }, { "epoch": 0.6572319501079396, "grad_norm": 1.74063027247512, "learning_rate": 2.77156122894871e-06, "loss": 0.3794, "step": 12330 }, { "epoch": 0.6577649849417659, "grad_norm": 1.6460156163706472, "learning_rate": 2.7710934688722958e-06, "loss": 0.3958, "step": 12340 }, { "epoch": 0.6582980197755923, "grad_norm": 1.4862972578445002, "learning_rate": 2.7706252699400917e-06, "loss": 0.3901, "step": 12350 }, { "epoch": 0.6588310546094187, "grad_norm": 1.6798943550451348, "learning_rate": 2.7701566323137478e-06, "loss": 0.3924, "step": 12360 }, { "epoch": 0.6593640894432451, "grad_norm": 1.7390232253098628, "learning_rate": 2.7696875561550656e-06, "loss": 0.397, "step": 12370 }, { "epoch": 0.6598971242770715, "grad_norm": 1.6570460319815885, "learning_rate": 2.769218041625997e-06, "loss": 0.3744, "step": 12380 }, { "epoch": 0.6604301591108979, "grad_norm": 1.7009473756868425, "learning_rate": 2.768748088888646e-06, "loss": 0.3823, "step": 12390 }, { "epoch": 0.6609631939447242, "grad_norm": 1.633786993834928, "learning_rate": 2.768277698105268e-06, "loss": 0.3915, "step": 12400 }, { "epoch": 0.6614962287785506, "grad_norm": 1.6082217898062294, "learning_rate": 2.767806869438269e-06, "loss": 0.3939, "step": 12410 }, { "epoch": 0.662029263612377, "grad_norm": 1.8336157331627372, "learning_rate": 2.767335603050207e-06, "loss": 0.3989, "step": 12420 }, { "epoch": 0.6625622984462035, "grad_norm": 1.5944938834193176, "learning_rate": 2.7668638991037905e-06, "loss": 0.3828, "step": 12430 }, { "epoch": 0.6630953332800299, "grad_norm": 1.6435895951256687, "learning_rate": 2.7663917577618794e-06, "loss": 0.3866, "step": 12440 }, { "epoch": 0.6636283681138563, "grad_norm": 1.6459012383545821, "learning_rate": 2.7659191791874847e-06, "loss": 0.3867, "step": 12450 }, { "epoch": 0.6641614029476827, "grad_norm": 1.6654858926813907, "learning_rate": 2.7654461635437677e-06, "loss": 0.386, "step": 12460 }, { "epoch": 0.664694437781509, "grad_norm": 1.6554821904539627, "learning_rate": 2.7649727109940407e-06, "loss": 0.3967, "step": 12470 }, { "epoch": 0.6652274726153354, "grad_norm": 1.4834347250957596, "learning_rate": 2.7644988217017683e-06, "loss": 0.3742, "step": 12480 }, { "epoch": 0.6657605074491618, "grad_norm": 1.3693380798575572, "learning_rate": 2.7640244958305642e-06, "loss": 0.3847, "step": 12490 }, { "epoch": 0.6662935422829882, "grad_norm": 1.5588271991203428, "learning_rate": 2.763549733544193e-06, "loss": 0.3939, "step": 12500 }, { "epoch": 0.6668265771168146, "grad_norm": 1.5094609414840297, "learning_rate": 2.7630745350065716e-06, "loss": 0.3948, "step": 12510 }, { "epoch": 0.667359611950641, "grad_norm": 1.6096562139624555, "learning_rate": 2.7625989003817652e-06, "loss": 0.3873, "step": 12520 }, { "epoch": 0.6678926467844674, "grad_norm": 1.3720150144708103, "learning_rate": 2.762122829833991e-06, "loss": 0.4006, "step": 12530 }, { "epoch": 0.6684256816182937, "grad_norm": 1.5973622483367065, "learning_rate": 2.761646323527616e-06, "loss": 0.3935, "step": 12540 }, { "epoch": 0.6689587164521201, "grad_norm": 1.670572258949521, "learning_rate": 2.7611693816271594e-06, "loss": 0.3831, "step": 12550 }, { "epoch": 0.6694917512859465, "grad_norm": 1.5466619369265269, "learning_rate": 2.760692004297288e-06, "loss": 0.3962, "step": 12560 }, { "epoch": 0.6700247861197729, "grad_norm": 1.5784082936759711, "learning_rate": 2.7602141917028215e-06, "loss": 0.3893, "step": 12570 }, { "epoch": 0.6705578209535993, "grad_norm": 1.3770342295151579, "learning_rate": 2.7597359440087274e-06, "loss": 0.3785, "step": 12580 }, { "epoch": 0.6710908557874257, "grad_norm": 1.5267884625368433, "learning_rate": 2.7592572613801262e-06, "loss": 0.3958, "step": 12590 }, { "epoch": 0.671623890621252, "grad_norm": 1.55731717516025, "learning_rate": 2.7587781439822857e-06, "loss": 0.3944, "step": 12600 }, { "epoch": 0.6721569254550784, "grad_norm": 1.5908625787217203, "learning_rate": 2.758298591980626e-06, "loss": 0.4036, "step": 12610 }, { "epoch": 0.6726899602889049, "grad_norm": 1.7357248306716566, "learning_rate": 2.7578186055407176e-06, "loss": 0.394, "step": 12620 }, { "epoch": 0.6732229951227313, "grad_norm": 1.595642554910627, "learning_rate": 2.757338184828278e-06, "loss": 0.3869, "step": 12630 }, { "epoch": 0.6737560299565577, "grad_norm": 1.5204323027139102, "learning_rate": 2.756857330009177e-06, "loss": 0.3994, "step": 12640 }, { "epoch": 0.6742890647903841, "grad_norm": 1.4689370333113319, "learning_rate": 2.7563760412494345e-06, "loss": 0.3902, "step": 12650 }, { "epoch": 0.6748220996242105, "grad_norm": 1.5979756040887692, "learning_rate": 2.755894318715219e-06, "loss": 0.4041, "step": 12660 }, { "epoch": 0.6753551344580369, "grad_norm": 1.5231933200814187, "learning_rate": 2.7554121625728494e-06, "loss": 0.376, "step": 12670 }, { "epoch": 0.6758881692918632, "grad_norm": 1.5699045488484404, "learning_rate": 2.7549295729887943e-06, "loss": 0.394, "step": 12680 }, { "epoch": 0.6764212041256896, "grad_norm": 1.4778739839337431, "learning_rate": 2.7544465501296718e-06, "loss": 0.3901, "step": 12690 }, { "epoch": 0.676954238959516, "grad_norm": 1.6906897466958548, "learning_rate": 2.753963094162249e-06, "loss": 0.3907, "step": 12700 }, { "epoch": 0.6774872737933424, "grad_norm": 1.6157173022924884, "learning_rate": 2.7534792052534444e-06, "loss": 0.3981, "step": 12710 }, { "epoch": 0.6780203086271688, "grad_norm": 1.4746277625120372, "learning_rate": 2.752994883570324e-06, "loss": 0.3818, "step": 12720 }, { "epoch": 0.6785533434609952, "grad_norm": 1.4980146475409215, "learning_rate": 2.7525101292801038e-06, "loss": 0.3947, "step": 12730 }, { "epoch": 0.6790863782948215, "grad_norm": 1.7288711020539997, "learning_rate": 2.7520249425501493e-06, "loss": 0.3874, "step": 12740 }, { "epoch": 0.6796194131286479, "grad_norm": 1.487037201541101, "learning_rate": 2.7515393235479764e-06, "loss": 0.3682, "step": 12750 }, { "epoch": 0.6801524479624743, "grad_norm": 1.5890556211452727, "learning_rate": 2.751053272441248e-06, "loss": 0.3829, "step": 12760 }, { "epoch": 0.6806854827963007, "grad_norm": 1.4327425499391107, "learning_rate": 2.7505667893977774e-06, "loss": 0.3935, "step": 12770 }, { "epoch": 0.6812185176301271, "grad_norm": 1.691058617531051, "learning_rate": 2.750079874585528e-06, "loss": 0.4008, "step": 12780 }, { "epoch": 0.6817515524639535, "grad_norm": 1.520556124456952, "learning_rate": 2.74959252817261e-06, "loss": 0.3705, "step": 12790 }, { "epoch": 0.68228458729778, "grad_norm": 1.7311682938052233, "learning_rate": 2.7491047503272848e-06, "loss": 0.3881, "step": 12800 }, { "epoch": 0.6828176221316063, "grad_norm": 1.6002491186645775, "learning_rate": 2.748616541217961e-06, "loss": 0.3981, "step": 12810 }, { "epoch": 0.6833506569654327, "grad_norm": 1.8889318875427128, "learning_rate": 2.7481279010131975e-06, "loss": 0.3976, "step": 12820 }, { "epoch": 0.6838836917992591, "grad_norm": 1.5127717189862142, "learning_rate": 2.747638829881702e-06, "loss": 0.3881, "step": 12830 }, { "epoch": 0.6844167266330855, "grad_norm": 1.4793281533153604, "learning_rate": 2.74714932799233e-06, "loss": 0.3851, "step": 12840 }, { "epoch": 0.6849497614669119, "grad_norm": 1.693718557764651, "learning_rate": 2.746659395514085e-06, "loss": 0.3949, "step": 12850 }, { "epoch": 0.6854827963007383, "grad_norm": 1.7812046830091415, "learning_rate": 2.746169032616122e-06, "loss": 0.3862, "step": 12860 }, { "epoch": 0.6860158311345647, "grad_norm": 1.559640382471567, "learning_rate": 2.7456782394677428e-06, "loss": 0.3866, "step": 12870 }, { "epoch": 0.686548865968391, "grad_norm": 1.574501810795579, "learning_rate": 2.745187016238397e-06, "loss": 0.3915, "step": 12880 }, { "epoch": 0.6870819008022174, "grad_norm": 1.5767675304948638, "learning_rate": 2.7446953630976836e-06, "loss": 0.3839, "step": 12890 }, { "epoch": 0.6876149356360438, "grad_norm": 1.5216465100054701, "learning_rate": 2.744203280215351e-06, "loss": 0.3852, "step": 12900 }, { "epoch": 0.6881479704698702, "grad_norm": 1.7421711843910284, "learning_rate": 2.743710767761294e-06, "loss": 0.3938, "step": 12910 }, { "epoch": 0.6886810053036966, "grad_norm": 1.5065946298557589, "learning_rate": 2.7432178259055574e-06, "loss": 0.3816, "step": 12920 }, { "epoch": 0.689214040137523, "grad_norm": 1.4825540407382936, "learning_rate": 2.742724454818333e-06, "loss": 0.3889, "step": 12930 }, { "epoch": 0.6897470749713493, "grad_norm": 1.640329470277106, "learning_rate": 2.7422306546699618e-06, "loss": 0.3915, "step": 12940 }, { "epoch": 0.6902801098051757, "grad_norm": 1.5228227884510819, "learning_rate": 2.741736425630932e-06, "loss": 0.3837, "step": 12950 }, { "epoch": 0.6908131446390021, "grad_norm": 1.5943598376766, "learning_rate": 2.7412417678718803e-06, "loss": 0.3936, "step": 12960 }, { "epoch": 0.6913461794728285, "grad_norm": 1.6640939943309663, "learning_rate": 2.7407466815635925e-06, "loss": 0.389, "step": 12970 }, { "epoch": 0.6918792143066549, "grad_norm": 1.5914265221935253, "learning_rate": 2.740251166877001e-06, "loss": 0.3937, "step": 12980 }, { "epoch": 0.6924122491404814, "grad_norm": 1.4778763138575604, "learning_rate": 2.739755223983185e-06, "loss": 0.3947, "step": 12990 }, { "epoch": 0.6929452839743078, "grad_norm": 1.5526855020948873, "learning_rate": 2.7392588530533744e-06, "loss": 0.373, "step": 13000 }, { "epoch": 0.6934783188081342, "grad_norm": 1.6275445729544191, "learning_rate": 2.7387620542589457e-06, "loss": 0.3891, "step": 13010 }, { "epoch": 0.6940113536419605, "grad_norm": 1.5013424776270292, "learning_rate": 2.738264827771422e-06, "loss": 0.387, "step": 13020 }, { "epoch": 0.6945443884757869, "grad_norm": 1.6394412410844779, "learning_rate": 2.7377671737624754e-06, "loss": 0.3848, "step": 13030 }, { "epoch": 0.6950774233096133, "grad_norm": 1.6087724219258532, "learning_rate": 2.737269092403925e-06, "loss": 0.3866, "step": 13040 }, { "epoch": 0.6956104581434397, "grad_norm": 1.499480313581256, "learning_rate": 2.7367705838677377e-06, "loss": 0.3872, "step": 13050 }, { "epoch": 0.6961434929772661, "grad_norm": 1.4249272150967984, "learning_rate": 2.736271648326028e-06, "loss": 0.3861, "step": 13060 }, { "epoch": 0.6966765278110925, "grad_norm": 1.5347293157426203, "learning_rate": 2.735772285951057e-06, "loss": 0.3866, "step": 13070 }, { "epoch": 0.6972095626449188, "grad_norm": 1.4839464584619286, "learning_rate": 2.7352724969152344e-06, "loss": 0.3792, "step": 13080 }, { "epoch": 0.6977425974787452, "grad_norm": 1.660798590531833, "learning_rate": 2.7347722813911164e-06, "loss": 0.3786, "step": 13090 }, { "epoch": 0.6982756323125716, "grad_norm": 1.615091682066585, "learning_rate": 2.734271639551407e-06, "loss": 0.3978, "step": 13100 }, { "epoch": 0.698808667146398, "grad_norm": 1.6289233735933937, "learning_rate": 2.733770571568956e-06, "loss": 0.3816, "step": 13110 }, { "epoch": 0.6993417019802244, "grad_norm": 1.5640971774206687, "learning_rate": 2.733269077616762e-06, "loss": 0.3855, "step": 13120 }, { "epoch": 0.6998747368140508, "grad_norm": 3.1169071364406746, "learning_rate": 2.732767157867971e-06, "loss": 0.3996, "step": 13130 }, { "epoch": 0.7004077716478772, "grad_norm": 1.637535585462466, "learning_rate": 2.7322648124958733e-06, "loss": 0.3877, "step": 13140 }, { "epoch": 0.7009408064817035, "grad_norm": 1.338365834276967, "learning_rate": 2.731762041673909e-06, "loss": 0.3782, "step": 13150 }, { "epoch": 0.7014738413155299, "grad_norm": 1.6075817656975602, "learning_rate": 2.731258845575664e-06, "loss": 0.388, "step": 13160 }, { "epoch": 0.7020068761493563, "grad_norm": 1.5489653356145214, "learning_rate": 2.73075522437487e-06, "loss": 0.4131, "step": 13170 }, { "epoch": 0.7025399109831828, "grad_norm": 1.5015209058029906, "learning_rate": 2.7302511782454075e-06, "loss": 0.3825, "step": 13180 }, { "epoch": 0.7030729458170092, "grad_norm": 1.4912536277176622, "learning_rate": 2.7297467073613023e-06, "loss": 0.3821, "step": 13190 }, { "epoch": 0.7036059806508356, "grad_norm": 1.5127147010007216, "learning_rate": 2.729241811896728e-06, "loss": 0.3934, "step": 13200 }, { "epoch": 0.704139015484662, "grad_norm": 1.522773318601683, "learning_rate": 2.728736492026003e-06, "loss": 0.3881, "step": 13210 }, { "epoch": 0.7046720503184883, "grad_norm": 1.622986666839565, "learning_rate": 2.7282307479235935e-06, "loss": 0.3878, "step": 13220 }, { "epoch": 0.7052050851523147, "grad_norm": 1.57857404434383, "learning_rate": 2.7277245797641123e-06, "loss": 0.3736, "step": 13230 }, { "epoch": 0.7057381199861411, "grad_norm": 1.6580360295681327, "learning_rate": 2.727217987722318e-06, "loss": 0.3828, "step": 13240 }, { "epoch": 0.7062711548199675, "grad_norm": 1.577739831378232, "learning_rate": 2.7267109719731157e-06, "loss": 0.4002, "step": 13250 }, { "epoch": 0.7068041896537939, "grad_norm": 1.6991613464206654, "learning_rate": 2.726203532691557e-06, "loss": 0.3832, "step": 13260 }, { "epoch": 0.7073372244876203, "grad_norm": 1.50920445086471, "learning_rate": 2.72569567005284e-06, "loss": 0.3562, "step": 13270 }, { "epoch": 0.7078702593214466, "grad_norm": 1.594758541333458, "learning_rate": 2.725187384232308e-06, "loss": 0.3794, "step": 13280 }, { "epoch": 0.708403294155273, "grad_norm": 1.4690326485687684, "learning_rate": 2.7246786754054507e-06, "loss": 0.3982, "step": 13290 }, { "epoch": 0.7089363289890994, "grad_norm": 1.5137623870548904, "learning_rate": 2.7241695437479054e-06, "loss": 0.3742, "step": 13300 }, { "epoch": 0.7094693638229258, "grad_norm": 1.5745854341801928, "learning_rate": 2.723659989435453e-06, "loss": 0.3829, "step": 13310 }, { "epoch": 0.7100023986567522, "grad_norm": 1.8141976136973568, "learning_rate": 2.7231500126440216e-06, "loss": 0.3775, "step": 13320 }, { "epoch": 0.7105354334905786, "grad_norm": 1.3874108647877745, "learning_rate": 2.7226396135496854e-06, "loss": 0.3751, "step": 13330 }, { "epoch": 0.711068468324405, "grad_norm": 1.525279426750466, "learning_rate": 2.722128792328664e-06, "loss": 0.3713, "step": 13340 }, { "epoch": 0.7116015031582313, "grad_norm": 1.4276440398999402, "learning_rate": 2.721617549157322e-06, "loss": 0.3883, "step": 13350 }, { "epoch": 0.7121345379920577, "grad_norm": 1.3819094931839373, "learning_rate": 2.721105884212172e-06, "loss": 0.3917, "step": 13360 }, { "epoch": 0.7126675728258842, "grad_norm": 1.6112203117077113, "learning_rate": 2.720593797669869e-06, "loss": 0.377, "step": 13370 }, { "epoch": 0.7132006076597106, "grad_norm": 1.5913240291688735, "learning_rate": 2.7200812897072156e-06, "loss": 0.3837, "step": 13380 }, { "epoch": 0.713733642493537, "grad_norm": 1.4725209599416555, "learning_rate": 2.71956836050116e-06, "loss": 0.3893, "step": 13390 }, { "epoch": 0.7142666773273634, "grad_norm": 1.648601587773875, "learning_rate": 2.719055010228795e-06, "loss": 0.3815, "step": 13400 }, { "epoch": 0.7147997121611898, "grad_norm": 1.6433557817933246, "learning_rate": 2.7185412390673595e-06, "loss": 0.3896, "step": 13410 }, { "epoch": 0.7153327469950161, "grad_norm": 1.4819913856175566, "learning_rate": 2.718027047194237e-06, "loss": 0.3716, "step": 13420 }, { "epoch": 0.7158657818288425, "grad_norm": 1.5042676159233608, "learning_rate": 2.717512434786957e-06, "loss": 0.3951, "step": 13430 }, { "epoch": 0.7163988166626689, "grad_norm": 1.7504478372677417, "learning_rate": 2.716997402023193e-06, "loss": 0.3901, "step": 13440 }, { "epoch": 0.7169318514964953, "grad_norm": 1.6553920045258095, "learning_rate": 2.7164819490807652e-06, "loss": 0.3883, "step": 13450 }, { "epoch": 0.7174648863303217, "grad_norm": 1.462738438758308, "learning_rate": 2.7159660761376383e-06, "loss": 0.3743, "step": 13460 }, { "epoch": 0.7179979211641481, "grad_norm": 1.6043869019545287, "learning_rate": 2.715449783371921e-06, "loss": 0.3945, "step": 13470 }, { "epoch": 0.7185309559979745, "grad_norm": 1.5124295693475063, "learning_rate": 2.7149330709618677e-06, "loss": 0.3887, "step": 13480 }, { "epoch": 0.7190639908318008, "grad_norm": 1.699222381556251, "learning_rate": 2.7144159390858787e-06, "loss": 0.4007, "step": 13490 }, { "epoch": 0.7195970256656272, "grad_norm": 1.5741560149187015, "learning_rate": 2.7138983879224974e-06, "loss": 0.3723, "step": 13500 }, { "epoch": 0.7201300604994536, "grad_norm": 1.6301904876684477, "learning_rate": 2.7133804176504126e-06, "loss": 0.3787, "step": 13510 }, { "epoch": 0.72066309533328, "grad_norm": 1.6197016339526287, "learning_rate": 2.712862028448459e-06, "loss": 0.3785, "step": 13520 }, { "epoch": 0.7211961301671064, "grad_norm": 1.5527484867333805, "learning_rate": 2.7123432204956133e-06, "loss": 0.4004, "step": 13530 }, { "epoch": 0.7217291650009328, "grad_norm": 1.7191320204858784, "learning_rate": 2.7118239939709993e-06, "loss": 0.3976, "step": 13540 }, { "epoch": 0.7222621998347593, "grad_norm": 1.7710265285463023, "learning_rate": 2.711304349053884e-06, "loss": 0.3714, "step": 13550 }, { "epoch": 0.7227952346685856, "grad_norm": 1.7964362812482046, "learning_rate": 2.7107842859236805e-06, "loss": 0.3914, "step": 13560 }, { "epoch": 0.723328269502412, "grad_norm": 1.5334407354457222, "learning_rate": 2.710263804759943e-06, "loss": 0.3757, "step": 13570 }, { "epoch": 0.7238613043362384, "grad_norm": 1.5419702064547496, "learning_rate": 2.709742905742373e-06, "loss": 0.383, "step": 13580 }, { "epoch": 0.7243943391700648, "grad_norm": 1.5701350411320052, "learning_rate": 2.7092215890508153e-06, "loss": 0.3864, "step": 13590 }, { "epoch": 0.7249273740038912, "grad_norm": 1.6177726224412516, "learning_rate": 2.708699854865259e-06, "loss": 0.3751, "step": 13600 }, { "epoch": 0.7254604088377176, "grad_norm": 1.696569539318935, "learning_rate": 2.708177703365837e-06, "loss": 0.3903, "step": 13610 }, { "epoch": 0.725993443671544, "grad_norm": 1.7301027838412937, "learning_rate": 2.7076551347328267e-06, "loss": 0.3931, "step": 13620 }, { "epoch": 0.7265264785053703, "grad_norm": 1.4018042335658578, "learning_rate": 2.7071321491466493e-06, "loss": 0.3822, "step": 13630 }, { "epoch": 0.7270595133391967, "grad_norm": 1.6852557666125316, "learning_rate": 2.7066087467878696e-06, "loss": 0.3854, "step": 13640 }, { "epoch": 0.7275925481730231, "grad_norm": 1.621367972057748, "learning_rate": 2.7060849278371975e-06, "loss": 0.4003, "step": 13650 }, { "epoch": 0.7281255830068495, "grad_norm": 1.50999693876737, "learning_rate": 2.705560692475485e-06, "loss": 0.3823, "step": 13660 }, { "epoch": 0.7286586178406759, "grad_norm": 1.5439011651767698, "learning_rate": 2.70503604088373e-06, "loss": 0.3845, "step": 13670 }, { "epoch": 0.7291916526745023, "grad_norm": 1.5693718207431124, "learning_rate": 2.7045109732430718e-06, "loss": 0.3928, "step": 13680 }, { "epoch": 0.7297246875083286, "grad_norm": 1.7260676201328702, "learning_rate": 2.703985489734795e-06, "loss": 0.3924, "step": 13690 }, { "epoch": 0.730257722342155, "grad_norm": 1.5912490301842381, "learning_rate": 2.703459590540327e-06, "loss": 0.3908, "step": 13700 }, { "epoch": 0.7307907571759814, "grad_norm": 1.4819689534606348, "learning_rate": 2.7029332758412393e-06, "loss": 0.3797, "step": 13710 }, { "epoch": 0.7313237920098078, "grad_norm": 1.573359263649508, "learning_rate": 2.702406545819246e-06, "loss": 0.3893, "step": 13720 }, { "epoch": 0.7318568268436342, "grad_norm": 1.515514883608717, "learning_rate": 2.7018794006562055e-06, "loss": 0.3803, "step": 13730 }, { "epoch": 0.7323898616774607, "grad_norm": 1.4455124863133146, "learning_rate": 2.701351840534119e-06, "loss": 0.3879, "step": 13740 }, { "epoch": 0.7329228965112871, "grad_norm": 1.553992008773466, "learning_rate": 2.7008238656351312e-06, "loss": 0.3958, "step": 13750 }, { "epoch": 0.7334559313451134, "grad_norm": 1.514945479265164, "learning_rate": 2.70029547614153e-06, "loss": 0.3887, "step": 13760 }, { "epoch": 0.7339889661789398, "grad_norm": 2.0475782789851737, "learning_rate": 2.6997666722357456e-06, "loss": 0.3834, "step": 13770 }, { "epoch": 0.7345220010127662, "grad_norm": 1.5685919373167614, "learning_rate": 2.6992374541003523e-06, "loss": 0.3757, "step": 13780 }, { "epoch": 0.7350550358465926, "grad_norm": 1.5066461477919797, "learning_rate": 2.698707821918068e-06, "loss": 0.367, "step": 13790 }, { "epoch": 0.735588070680419, "grad_norm": 1.439312999748882, "learning_rate": 2.698177775871752e-06, "loss": 0.3838, "step": 13800 }, { "epoch": 0.7361211055142454, "grad_norm": 1.507905304836201, "learning_rate": 2.6976473161444076e-06, "loss": 0.382, "step": 13810 }, { "epoch": 0.7366541403480718, "grad_norm": 1.8021993262354592, "learning_rate": 2.69711644291918e-06, "loss": 0.3858, "step": 13820 }, { "epoch": 0.7371871751818981, "grad_norm": 1.5815430459684612, "learning_rate": 2.6965851563793578e-06, "loss": 0.3865, "step": 13830 }, { "epoch": 0.7377202100157245, "grad_norm": 1.6980970729787377, "learning_rate": 2.696053456708373e-06, "loss": 0.3851, "step": 13840 }, { "epoch": 0.7382532448495509, "grad_norm": 1.5879026673067482, "learning_rate": 2.6955213440897976e-06, "loss": 0.3777, "step": 13850 }, { "epoch": 0.7387862796833773, "grad_norm": 1.706498150529188, "learning_rate": 2.6949888187073495e-06, "loss": 0.3874, "step": 13860 }, { "epoch": 0.7393193145172037, "grad_norm": 1.6244462033848353, "learning_rate": 2.6944558807448874e-06, "loss": 0.3899, "step": 13870 }, { "epoch": 0.7398523493510301, "grad_norm": 1.4863210698554998, "learning_rate": 2.693922530386412e-06, "loss": 0.3784, "step": 13880 }, { "epoch": 0.7403853841848564, "grad_norm": 1.5687367500223122, "learning_rate": 2.6933887678160683e-06, "loss": 0.3818, "step": 13890 }, { "epoch": 0.7409184190186828, "grad_norm": 1.4901809941082058, "learning_rate": 2.6928545932181406e-06, "loss": 0.3816, "step": 13900 }, { "epoch": 0.7414514538525092, "grad_norm": 1.579608611906349, "learning_rate": 2.692320006777059e-06, "loss": 0.3907, "step": 13910 }, { "epoch": 0.7419844886863356, "grad_norm": 1.4820958708779177, "learning_rate": 2.6917850086773926e-06, "loss": 0.3697, "step": 13920 }, { "epoch": 0.7425175235201621, "grad_norm": 1.4872939118579305, "learning_rate": 2.691249599103855e-06, "loss": 0.3759, "step": 13930 }, { "epoch": 0.7430505583539885, "grad_norm": 1.4542868805457698, "learning_rate": 2.6907137782413e-06, "loss": 0.3825, "step": 13940 }, { "epoch": 0.7435835931878149, "grad_norm": 1.4380016676050995, "learning_rate": 2.6901775462747243e-06, "loss": 0.3937, "step": 13950 }, { "epoch": 0.7441166280216412, "grad_norm": 1.524173149205176, "learning_rate": 2.689640903389268e-06, "loss": 0.3707, "step": 13960 }, { "epoch": 0.7446496628554676, "grad_norm": 1.6651272277498275, "learning_rate": 2.68910384977021e-06, "loss": 0.3986, "step": 13970 }, { "epoch": 0.745182697689294, "grad_norm": 1.8018108869900669, "learning_rate": 2.688566385602973e-06, "loss": 0.3993, "step": 13980 }, { "epoch": 0.7457157325231204, "grad_norm": 1.4878680397164445, "learning_rate": 2.6880285110731215e-06, "loss": 0.3804, "step": 13990 }, { "epoch": 0.7462487673569468, "grad_norm": 1.4537101709160996, "learning_rate": 2.6874902263663612e-06, "loss": 0.3781, "step": 14000 }, { "epoch": 0.7467818021907732, "grad_norm": 1.5481186542559633, "learning_rate": 2.686951531668539e-06, "loss": 0.3767, "step": 14010 }, { "epoch": 0.7473148370245996, "grad_norm": 1.545481683502921, "learning_rate": 2.6864124271656437e-06, "loss": 0.3747, "step": 14020 }, { "epoch": 0.7478478718584259, "grad_norm": 1.6328173089179736, "learning_rate": 2.6858729130438067e-06, "loss": 0.3864, "step": 14030 }, { "epoch": 0.7483809066922523, "grad_norm": 1.558402507260832, "learning_rate": 2.6853329894892992e-06, "loss": 0.392, "step": 14040 }, { "epoch": 0.7489139415260787, "grad_norm": 1.705139255518345, "learning_rate": 2.6847926566885338e-06, "loss": 0.3778, "step": 14050 }, { "epoch": 0.7494469763599051, "grad_norm": 1.6721644047160122, "learning_rate": 2.684251914828066e-06, "loss": 0.3693, "step": 14060 }, { "epoch": 0.7499800111937315, "grad_norm": 1.5516489095124752, "learning_rate": 2.6837107640945906e-06, "loss": 0.3712, "step": 14070 }, { "epoch": 0.7505130460275579, "grad_norm": 1.5983957786606795, "learning_rate": 2.683169204674945e-06, "loss": 0.378, "step": 14080 }, { "epoch": 0.7510460808613842, "grad_norm": 1.565366324946499, "learning_rate": 2.682627236756107e-06, "loss": 0.3864, "step": 14090 }, { "epoch": 0.7515791156952106, "grad_norm": 1.5239895960312135, "learning_rate": 2.682084860525196e-06, "loss": 0.3703, "step": 14100 }, { "epoch": 0.7521121505290371, "grad_norm": 1.4760910387103512, "learning_rate": 2.6815420761694716e-06, "loss": 0.3633, "step": 14110 }, { "epoch": 0.7526451853628635, "grad_norm": 1.6231749377585034, "learning_rate": 2.680998883876335e-06, "loss": 0.3853, "step": 14120 }, { "epoch": 0.7531782201966899, "grad_norm": 1.431045760213454, "learning_rate": 2.680455283833327e-06, "loss": 0.3826, "step": 14130 }, { "epoch": 0.7537112550305163, "grad_norm": 1.5416390952620564, "learning_rate": 2.679911276228131e-06, "loss": 0.3688, "step": 14140 }, { "epoch": 0.7542442898643427, "grad_norm": 1.5148282528028743, "learning_rate": 2.679366861248571e-06, "loss": 0.3649, "step": 14150 }, { "epoch": 0.754777324698169, "grad_norm": 1.5817808971069427, "learning_rate": 2.678822039082609e-06, "loss": 0.3799, "step": 14160 }, { "epoch": 0.7553103595319954, "grad_norm": 1.4906424703900187, "learning_rate": 2.678276809918351e-06, "loss": 0.38, "step": 14170 }, { "epoch": 0.7558433943658218, "grad_norm": 1.610919235890603, "learning_rate": 2.677731173944041e-06, "loss": 0.3864, "step": 14180 }, { "epoch": 0.7563764291996482, "grad_norm": 1.7412703916621453, "learning_rate": 2.6771851313480652e-06, "loss": 0.3825, "step": 14190 }, { "epoch": 0.7569094640334746, "grad_norm": 1.4859213488164094, "learning_rate": 2.676638682318949e-06, "loss": 0.3681, "step": 14200 }, { "epoch": 0.757442498867301, "grad_norm": 1.4486163249900998, "learning_rate": 2.676091827045359e-06, "loss": 0.3802, "step": 14210 }, { "epoch": 0.7579755337011274, "grad_norm": 1.9040289677671705, "learning_rate": 2.6755445657161005e-06, "loss": 0.3855, "step": 14220 }, { "epoch": 0.7585085685349537, "grad_norm": 1.610102578431111, "learning_rate": 2.674996898520121e-06, "loss": 0.3743, "step": 14230 }, { "epoch": 0.7590416033687801, "grad_norm": 1.532273974811401, "learning_rate": 2.674448825646508e-06, "loss": 0.3845, "step": 14240 }, { "epoch": 0.7595746382026065, "grad_norm": 1.622277955554152, "learning_rate": 2.6739003472844866e-06, "loss": 0.3898, "step": 14250 }, { "epoch": 0.7601076730364329, "grad_norm": 1.677938381989656, "learning_rate": 2.6733514636234246e-06, "loss": 0.3821, "step": 14260 }, { "epoch": 0.7606407078702593, "grad_norm": 1.5323608443982975, "learning_rate": 2.6728021748528285e-06, "loss": 0.377, "step": 14270 }, { "epoch": 0.7611737427040857, "grad_norm": 1.6028970491668473, "learning_rate": 2.672252481162345e-06, "loss": 0.3774, "step": 14280 }, { "epoch": 0.761706777537912, "grad_norm": 1.7027384185760273, "learning_rate": 2.671702382741761e-06, "loss": 0.3878, "step": 14290 }, { "epoch": 0.7622398123717385, "grad_norm": 1.7698752611373998, "learning_rate": 2.6711518797810016e-06, "loss": 0.3818, "step": 14300 }, { "epoch": 0.7627728472055649, "grad_norm": 1.651857115872705, "learning_rate": 2.670600972470133e-06, "loss": 0.3747, "step": 14310 }, { "epoch": 0.7633058820393913, "grad_norm": 1.5117780004384307, "learning_rate": 2.670049660999361e-06, "loss": 0.3803, "step": 14320 }, { "epoch": 0.7638389168732177, "grad_norm": 1.6762370842755865, "learning_rate": 2.66949794555903e-06, "loss": 0.372, "step": 14330 }, { "epoch": 0.7643719517070441, "grad_norm": 1.5275310309304424, "learning_rate": 2.668945826339625e-06, "loss": 0.3788, "step": 14340 }, { "epoch": 0.7649049865408705, "grad_norm": 1.5285612240160436, "learning_rate": 2.6683933035317695e-06, "loss": 0.3753, "step": 14350 }, { "epoch": 0.7654380213746969, "grad_norm": 1.5206932444236243, "learning_rate": 2.667840377326227e-06, "loss": 0.3801, "step": 14360 }, { "epoch": 0.7659710562085232, "grad_norm": 1.5537614704481293, "learning_rate": 2.667287047913899e-06, "loss": 0.3835, "step": 14370 }, { "epoch": 0.7665040910423496, "grad_norm": 1.5682640700687065, "learning_rate": 2.6667333154858285e-06, "loss": 0.3779, "step": 14380 }, { "epoch": 0.767037125876176, "grad_norm": 1.705467179337518, "learning_rate": 2.6661791802331956e-06, "loss": 0.3856, "step": 14390 }, { "epoch": 0.7675701607100024, "grad_norm": 1.636218762545239, "learning_rate": 2.66562464234732e-06, "loss": 0.3656, "step": 14400 }, { "epoch": 0.7681031955438288, "grad_norm": 1.3767479995706844, "learning_rate": 2.665069702019661e-06, "loss": 0.3914, "step": 14410 }, { "epoch": 0.7686362303776552, "grad_norm": 1.8589785835593269, "learning_rate": 2.664514359441817e-06, "loss": 0.3753, "step": 14420 }, { "epoch": 0.7691692652114815, "grad_norm": 1.7060659719896527, "learning_rate": 2.663958614805523e-06, "loss": 0.3935, "step": 14430 }, { "epoch": 0.7697023000453079, "grad_norm": 1.591105907921803, "learning_rate": 2.663402468302656e-06, "loss": 0.3724, "step": 14440 }, { "epoch": 0.7702353348791343, "grad_norm": 1.6738017723436225, "learning_rate": 2.66284592012523e-06, "loss": 0.3955, "step": 14450 }, { "epoch": 0.7707683697129607, "grad_norm": 1.7085572652224879, "learning_rate": 2.6622889704653975e-06, "loss": 0.396, "step": 14460 }, { "epoch": 0.7713014045467871, "grad_norm": 1.6441537457697075, "learning_rate": 2.6617316195154508e-06, "loss": 0.367, "step": 14470 }, { "epoch": 0.7718344393806135, "grad_norm": 1.5215714048435565, "learning_rate": 2.6611738674678195e-06, "loss": 0.3861, "step": 14480 }, { "epoch": 0.77236747421444, "grad_norm": 1.5405281400606958, "learning_rate": 2.6606157145150727e-06, "loss": 0.3836, "step": 14490 }, { "epoch": 0.7729005090482663, "grad_norm": 1.6955724201748459, "learning_rate": 2.660057160849917e-06, "loss": 0.3712, "step": 14500 }, { "epoch": 0.7734335438820927, "grad_norm": 1.623092111117168, "learning_rate": 2.6594982066651976e-06, "loss": 0.3766, "step": 14510 }, { "epoch": 0.7739665787159191, "grad_norm": 1.6379731630258436, "learning_rate": 2.658938852153899e-06, "loss": 0.3895, "step": 14520 }, { "epoch": 0.7744996135497455, "grad_norm": 1.5002087878104542, "learning_rate": 2.6583790975091423e-06, "loss": 0.387, "step": 14530 }, { "epoch": 0.7750326483835719, "grad_norm": 1.530748386011778, "learning_rate": 2.6578189429241876e-06, "loss": 0.3861, "step": 14540 }, { "epoch": 0.7755656832173983, "grad_norm": 1.7659549894503823, "learning_rate": 2.6572583885924337e-06, "loss": 0.3842, "step": 14550 }, { "epoch": 0.7760987180512247, "grad_norm": 1.5865874524892567, "learning_rate": 2.656697434707416e-06, "loss": 0.3748, "step": 14560 }, { "epoch": 0.776631752885051, "grad_norm": 1.5595482234022222, "learning_rate": 2.6561360814628093e-06, "loss": 0.3839, "step": 14570 }, { "epoch": 0.7771647877188774, "grad_norm": 1.6152141636044386, "learning_rate": 2.6555743290524245e-06, "loss": 0.3926, "step": 14580 }, { "epoch": 0.7776978225527038, "grad_norm": 1.658589427232685, "learning_rate": 2.6550121776702123e-06, "loss": 0.3777, "step": 14590 }, { "epoch": 0.7782308573865302, "grad_norm": 1.5946282310880198, "learning_rate": 2.65444962751026e-06, "loss": 0.3703, "step": 14600 }, { "epoch": 0.7787638922203566, "grad_norm": 1.583960561867612, "learning_rate": 2.6538866787667925e-06, "loss": 0.3824, "step": 14610 }, { "epoch": 0.779296927054183, "grad_norm": 1.5401933646754378, "learning_rate": 2.653323331634173e-06, "loss": 0.3722, "step": 14620 }, { "epoch": 0.7798299618880093, "grad_norm": 1.4717065312762139, "learning_rate": 2.652759586306901e-06, "loss": 0.3761, "step": 14630 }, { "epoch": 0.7803629967218357, "grad_norm": 1.542125287119514, "learning_rate": 2.652195442979616e-06, "loss": 0.3702, "step": 14640 }, { "epoch": 0.7808960315556621, "grad_norm": 1.709435641369509, "learning_rate": 2.651630901847092e-06, "loss": 0.3807, "step": 14650 }, { "epoch": 0.7814290663894885, "grad_norm": 1.6756778931666965, "learning_rate": 2.651065963104242e-06, "loss": 0.3727, "step": 14660 }, { "epoch": 0.781962101223315, "grad_norm": 1.6617455201194067, "learning_rate": 2.650500626946116e-06, "loss": 0.3821, "step": 14670 }, { "epoch": 0.7824951360571414, "grad_norm": 1.5889826510780378, "learning_rate": 2.649934893567901e-06, "loss": 0.3906, "step": 14680 }, { "epoch": 0.7830281708909678, "grad_norm": 1.543896943124956, "learning_rate": 2.649368763164921e-06, "loss": 0.3774, "step": 14690 }, { "epoch": 0.7835612057247942, "grad_norm": 1.458772394809297, "learning_rate": 2.648802235932637e-06, "loss": 0.3733, "step": 14700 }, { "epoch": 0.7840942405586205, "grad_norm": 1.4064454865750395, "learning_rate": 2.6482353120666485e-06, "loss": 0.3814, "step": 14710 }, { "epoch": 0.7846272753924469, "grad_norm": 1.6576605975658734, "learning_rate": 2.6476679917626898e-06, "loss": 0.3785, "step": 14720 }, { "epoch": 0.7851603102262733, "grad_norm": 1.5398003798072037, "learning_rate": 2.647100275216633e-06, "loss": 0.3826, "step": 14730 }, { "epoch": 0.7856933450600997, "grad_norm": 1.6009980451790458, "learning_rate": 2.646532162624488e-06, "loss": 0.3839, "step": 14740 }, { "epoch": 0.7862263798939261, "grad_norm": 1.5974359764117594, "learning_rate": 2.6459636541823995e-06, "loss": 0.3773, "step": 14750 }, { "epoch": 0.7867594147277525, "grad_norm": 1.526211852989671, "learning_rate": 2.6453947500866505e-06, "loss": 0.3907, "step": 14760 }, { "epoch": 0.7872924495615788, "grad_norm": 1.7564690904552895, "learning_rate": 2.644825450533659e-06, "loss": 0.3828, "step": 14770 }, { "epoch": 0.7878254843954052, "grad_norm": 1.4530410472836608, "learning_rate": 2.6442557557199818e-06, "loss": 0.3698, "step": 14780 }, { "epoch": 0.7883585192292316, "grad_norm": 1.5889706646970847, "learning_rate": 2.6436856658423097e-06, "loss": 0.3698, "step": 14790 }, { "epoch": 0.788891554063058, "grad_norm": 1.499026000053821, "learning_rate": 2.6431151810974724e-06, "loss": 0.3811, "step": 14800 }, { "epoch": 0.7894245888968844, "grad_norm": 1.5459216488342085, "learning_rate": 2.6425443016824335e-06, "loss": 0.3805, "step": 14810 }, { "epoch": 0.7899576237307108, "grad_norm": 1.5909499923729846, "learning_rate": 2.641973027794294e-06, "loss": 0.3611, "step": 14820 }, { "epoch": 0.7904906585645372, "grad_norm": 1.6968052394583941, "learning_rate": 2.6414013596302914e-06, "loss": 0.372, "step": 14830 }, { "epoch": 0.7910236933983635, "grad_norm": 1.691090314111364, "learning_rate": 2.6408292973877984e-06, "loss": 0.3875, "step": 14840 }, { "epoch": 0.7915567282321899, "grad_norm": 1.6026699152846986, "learning_rate": 2.6402568412643255e-06, "loss": 0.3766, "step": 14850 }, { "epoch": 0.7920897630660164, "grad_norm": 1.5919582698850068, "learning_rate": 2.6396839914575168e-06, "loss": 0.3686, "step": 14860 }, { "epoch": 0.7926227978998428, "grad_norm": 1.6596544716727593, "learning_rate": 2.6391107481651544e-06, "loss": 0.3782, "step": 14870 }, { "epoch": 0.7931558327336692, "grad_norm": 1.456029829493708, "learning_rate": 2.6385371115851548e-06, "loss": 0.3537, "step": 14880 }, { "epoch": 0.7936888675674956, "grad_norm": 1.4015542912795187, "learning_rate": 2.6379630819155713e-06, "loss": 0.3608, "step": 14890 }, { "epoch": 0.794221902401322, "grad_norm": 1.548085500573455, "learning_rate": 2.6373886593545925e-06, "loss": 0.3649, "step": 14900 }, { "epoch": 0.7947549372351483, "grad_norm": 1.4242272018327753, "learning_rate": 2.6368138441005425e-06, "loss": 0.3908, "step": 14910 }, { "epoch": 0.7952879720689747, "grad_norm": 1.9033306068739955, "learning_rate": 2.6362386363518814e-06, "loss": 0.3807, "step": 14920 }, { "epoch": 0.7958210069028011, "grad_norm": 1.801444482296597, "learning_rate": 2.635663036307204e-06, "loss": 0.381, "step": 14930 }, { "epoch": 0.7963540417366275, "grad_norm": 1.5576521323179306, "learning_rate": 2.6350870441652413e-06, "loss": 0.3798, "step": 14940 }, { "epoch": 0.7968870765704539, "grad_norm": 1.571417644855115, "learning_rate": 2.63451066012486e-06, "loss": 0.3842, "step": 14950 }, { "epoch": 0.7974201114042803, "grad_norm": 1.531332735013395, "learning_rate": 2.6339338843850607e-06, "loss": 0.3797, "step": 14960 }, { "epoch": 0.7979531462381066, "grad_norm": 1.5441614344992205, "learning_rate": 2.6333567171449803e-06, "loss": 0.3641, "step": 14970 }, { "epoch": 0.798486181071933, "grad_norm": 1.407836934841589, "learning_rate": 2.6327791586038917e-06, "loss": 0.3758, "step": 14980 }, { "epoch": 0.7990192159057594, "grad_norm": 1.4204714634793922, "learning_rate": 2.632201208961201e-06, "loss": 0.3717, "step": 14990 }, { "epoch": 0.7995522507395858, "grad_norm": 1.6047150043523342, "learning_rate": 2.63162286841645e-06, "loss": 0.375, "step": 15000 }, { "epoch": 0.8000852855734122, "grad_norm": 1.6087766601697615, "learning_rate": 2.631044137169316e-06, "loss": 0.3733, "step": 15010 }, { "epoch": 0.8006183204072386, "grad_norm": 1.605954450636087, "learning_rate": 2.630465015419611e-06, "loss": 0.3779, "step": 15020 }, { "epoch": 0.801151355241065, "grad_norm": 1.6558969809918074, "learning_rate": 2.6298855033672816e-06, "loss": 0.386, "step": 15030 }, { "epoch": 0.8016843900748913, "grad_norm": 1.6257344700118077, "learning_rate": 2.629305601212409e-06, "loss": 0.3801, "step": 15040 }, { "epoch": 0.8022174249087178, "grad_norm": 1.5377736833006272, "learning_rate": 2.62872530915521e-06, "loss": 0.3842, "step": 15050 }, { "epoch": 0.8027504597425442, "grad_norm": 1.5732148435230946, "learning_rate": 2.628144627396034e-06, "loss": 0.379, "step": 15060 }, { "epoch": 0.8032834945763706, "grad_norm": 1.4165809386610702, "learning_rate": 2.6275635561353686e-06, "loss": 0.3624, "step": 15070 }, { "epoch": 0.803816529410197, "grad_norm": 1.54934296562986, "learning_rate": 2.626982095573831e-06, "loss": 0.3806, "step": 15080 }, { "epoch": 0.8043495642440234, "grad_norm": 1.6292097271628858, "learning_rate": 2.6264002459121773e-06, "loss": 0.3828, "step": 15090 }, { "epoch": 0.8048825990778498, "grad_norm": 1.6270654164154734, "learning_rate": 2.6258180073512943e-06, "loss": 0.3893, "step": 15100 }, { "epoch": 0.8054156339116761, "grad_norm": 1.760893696539679, "learning_rate": 2.6252353800922066e-06, "loss": 0.3744, "step": 15110 }, { "epoch": 0.8059486687455025, "grad_norm": 1.542118720290548, "learning_rate": 2.62465236433607e-06, "loss": 0.3707, "step": 15120 }, { "epoch": 0.8064817035793289, "grad_norm": 1.587410532519548, "learning_rate": 2.6240689602841762e-06, "loss": 0.3708, "step": 15130 }, { "epoch": 0.8070147384131553, "grad_norm": 1.6615269663652716, "learning_rate": 2.62348516813795e-06, "loss": 0.3714, "step": 15140 }, { "epoch": 0.8075477732469817, "grad_norm": 1.6162044168857292, "learning_rate": 2.622900988098951e-06, "loss": 0.3724, "step": 15150 }, { "epoch": 0.8080808080808081, "grad_norm": 1.6059293626523352, "learning_rate": 2.6223164203688715e-06, "loss": 0.3754, "step": 15160 }, { "epoch": 0.8086138429146345, "grad_norm": 1.6827122531337826, "learning_rate": 2.6217314651495398e-06, "loss": 0.3842, "step": 15170 }, { "epoch": 0.8091468777484608, "grad_norm": 1.629828723261033, "learning_rate": 2.6211461226429154e-06, "loss": 0.3763, "step": 15180 }, { "epoch": 0.8096799125822872, "grad_norm": 1.4945387461282473, "learning_rate": 2.620560393051093e-06, "loss": 0.3736, "step": 15190 }, { "epoch": 0.8102129474161136, "grad_norm": 1.5745774243136732, "learning_rate": 2.6199742765763015e-06, "loss": 0.3695, "step": 15200 }, { "epoch": 0.81074598224994, "grad_norm": 1.6168392082579486, "learning_rate": 2.6193877734209024e-06, "loss": 0.3665, "step": 15210 }, { "epoch": 0.8112790170837664, "grad_norm": 1.668775283846866, "learning_rate": 2.6188008837873897e-06, "loss": 0.3849, "step": 15220 }, { "epoch": 0.8118120519175929, "grad_norm": 1.5811239058275377, "learning_rate": 2.6182136078783928e-06, "loss": 0.375, "step": 15230 }, { "epoch": 0.8123450867514193, "grad_norm": 1.616824865935622, "learning_rate": 2.6176259458966743e-06, "loss": 0.3732, "step": 15240 }, { "epoch": 0.8128781215852456, "grad_norm": 1.6154223509146513, "learning_rate": 2.6170378980451284e-06, "loss": 0.3734, "step": 15250 }, { "epoch": 0.813411156419072, "grad_norm": 1.6879889522393021, "learning_rate": 2.6164494645267845e-06, "loss": 0.3686, "step": 15260 }, { "epoch": 0.8139441912528984, "grad_norm": 1.610260987642283, "learning_rate": 2.6158606455448036e-06, "loss": 0.3784, "step": 15270 }, { "epoch": 0.8144772260867248, "grad_norm": 1.5035690608274144, "learning_rate": 2.615271441302481e-06, "loss": 0.3822, "step": 15280 }, { "epoch": 0.8150102609205512, "grad_norm": 1.6677448426405344, "learning_rate": 2.6146818520032437e-06, "loss": 0.3742, "step": 15290 }, { "epoch": 0.8155432957543776, "grad_norm": 1.4544448963978922, "learning_rate": 2.6140918778506533e-06, "loss": 0.375, "step": 15300 }, { "epoch": 0.816076330588204, "grad_norm": 1.5192376991238918, "learning_rate": 2.6135015190484026e-06, "loss": 0.3804, "step": 15310 }, { "epoch": 0.8166093654220303, "grad_norm": 1.6237236655698981, "learning_rate": 2.612910775800318e-06, "loss": 0.3831, "step": 15320 }, { "epoch": 0.8171424002558567, "grad_norm": 1.5335464491926976, "learning_rate": 2.6123196483103597e-06, "loss": 0.384, "step": 15330 }, { "epoch": 0.8176754350896831, "grad_norm": 1.5343674002288097, "learning_rate": 2.611728136782618e-06, "loss": 0.3872, "step": 15340 }, { "epoch": 0.8182084699235095, "grad_norm": 1.499365496692385, "learning_rate": 2.6111362414213184e-06, "loss": 0.3658, "step": 15350 }, { "epoch": 0.8187415047573359, "grad_norm": 1.5453789470920598, "learning_rate": 2.6105439624308173e-06, "loss": 0.3729, "step": 15360 }, { "epoch": 0.8192745395911623, "grad_norm": 1.5945461006809831, "learning_rate": 2.609951300015604e-06, "loss": 0.3682, "step": 15370 }, { "epoch": 0.8198075744249886, "grad_norm": 1.6214924921885283, "learning_rate": 2.6093582543803e-06, "loss": 0.3852, "step": 15380 }, { "epoch": 0.820340609258815, "grad_norm": 1.653306574781278, "learning_rate": 2.60876482572966e-06, "loss": 0.3747, "step": 15390 }, { "epoch": 0.8208736440926414, "grad_norm": 1.652697903243615, "learning_rate": 2.6081710142685704e-06, "loss": 0.3844, "step": 15400 }, { "epoch": 0.8214066789264678, "grad_norm": 1.5806590223261436, "learning_rate": 2.6075768202020483e-06, "loss": 0.3762, "step": 15410 }, { "epoch": 0.8219397137602943, "grad_norm": 1.6276971132412037, "learning_rate": 2.6069822437352456e-06, "loss": 0.3821, "step": 15420 }, { "epoch": 0.8224727485941207, "grad_norm": 1.4094612102213824, "learning_rate": 2.6063872850734438e-06, "loss": 0.3671, "step": 15430 }, { "epoch": 0.8230057834279471, "grad_norm": 1.6332986254206006, "learning_rate": 2.605791944422058e-06, "loss": 0.3877, "step": 15440 }, { "epoch": 0.8235388182617734, "grad_norm": 1.6790547002385467, "learning_rate": 2.605196221986635e-06, "loss": 0.362, "step": 15450 }, { "epoch": 0.8240718530955998, "grad_norm": 1.5907722567882416, "learning_rate": 2.6046001179728524e-06, "loss": 0.3737, "step": 15460 }, { "epoch": 0.8246048879294262, "grad_norm": 1.6107971218364705, "learning_rate": 2.6040036325865208e-06, "loss": 0.3809, "step": 15470 }, { "epoch": 0.8251379227632526, "grad_norm": 1.532492314013442, "learning_rate": 2.6034067660335803e-06, "loss": 0.3794, "step": 15480 }, { "epoch": 0.825670957597079, "grad_norm": 1.6443022286253832, "learning_rate": 2.602809518520106e-06, "loss": 0.3765, "step": 15490 }, { "epoch": 0.8262039924309054, "grad_norm": 1.5919629885529851, "learning_rate": 2.602211890252302e-06, "loss": 0.3986, "step": 15500 }, { "epoch": 0.8267370272647317, "grad_norm": 1.5154786873485722, "learning_rate": 2.601613881436504e-06, "loss": 0.3773, "step": 15510 }, { "epoch": 0.8272700620985581, "grad_norm": 1.4776845993666656, "learning_rate": 2.6010154922791804e-06, "loss": 0.3697, "step": 15520 }, { "epoch": 0.8278030969323845, "grad_norm": 1.6374945759086548, "learning_rate": 2.6004167229869293e-06, "loss": 0.3716, "step": 15530 }, { "epoch": 0.8283361317662109, "grad_norm": 1.5549057219632585, "learning_rate": 2.5998175737664816e-06, "loss": 0.3715, "step": 15540 }, { "epoch": 0.8288691666000373, "grad_norm": 1.5094224164605325, "learning_rate": 2.5992180448246982e-06, "loss": 0.3733, "step": 15550 }, { "epoch": 0.8294022014338637, "grad_norm": 1.7336169609341017, "learning_rate": 2.5986181363685713e-06, "loss": 0.3728, "step": 15560 }, { "epoch": 0.8299352362676901, "grad_norm": 1.8428380304146343, "learning_rate": 2.5980178486052257e-06, "loss": 0.3815, "step": 15570 }, { "epoch": 0.8304682711015164, "grad_norm": 1.6699268292173932, "learning_rate": 2.5974171817419143e-06, "loss": 0.3856, "step": 15580 }, { "epoch": 0.8310013059353428, "grad_norm": 1.7862875544807921, "learning_rate": 2.596816135986023e-06, "loss": 0.3725, "step": 15590 }, { "epoch": 0.8315343407691692, "grad_norm": 1.7560587533461487, "learning_rate": 2.596214711545068e-06, "loss": 0.394, "step": 15600 }, { "epoch": 0.8320673756029957, "grad_norm": 1.5871906646477703, "learning_rate": 2.5956129086266964e-06, "loss": 0.3744, "step": 15610 }, { "epoch": 0.8326004104368221, "grad_norm": 1.6228239940567555, "learning_rate": 2.5950107274386853e-06, "loss": 0.379, "step": 15620 }, { "epoch": 0.8331334452706485, "grad_norm": 1.4800779794282513, "learning_rate": 2.5944081681889435e-06, "loss": 0.3758, "step": 15630 }, { "epoch": 0.8336664801044749, "grad_norm": 1.529981787566232, "learning_rate": 2.5938052310855085e-06, "loss": 0.3964, "step": 15640 }, { "epoch": 0.8341995149383012, "grad_norm": 1.5146217835555718, "learning_rate": 2.5932019163365505e-06, "loss": 0.3652, "step": 15650 }, { "epoch": 0.8347325497721276, "grad_norm": 1.6121703106601808, "learning_rate": 2.592598224150369e-06, "loss": 0.3684, "step": 15660 }, { "epoch": 0.835265584605954, "grad_norm": 1.5833644705515921, "learning_rate": 2.5919941547353933e-06, "loss": 0.3815, "step": 15670 }, { "epoch": 0.8357986194397804, "grad_norm": 1.6844655081422828, "learning_rate": 2.591389708300184e-06, "loss": 0.3811, "step": 15680 }, { "epoch": 0.8363316542736068, "grad_norm": 1.5220854699172737, "learning_rate": 2.5907848850534308e-06, "loss": 0.3757, "step": 15690 }, { "epoch": 0.8368646891074332, "grad_norm": 1.386063400713167, "learning_rate": 2.5901796852039546e-06, "loss": 0.3778, "step": 15700 }, { "epoch": 0.8373977239412596, "grad_norm": 1.4196135201780693, "learning_rate": 2.5895741089607054e-06, "loss": 0.3667, "step": 15710 }, { "epoch": 0.8379307587750859, "grad_norm": 1.523409425044886, "learning_rate": 2.588968156532764e-06, "loss": 0.3682, "step": 15720 }, { "epoch": 0.8384637936089123, "grad_norm": 1.5576176343821933, "learning_rate": 2.58836182812934e-06, "loss": 0.3819, "step": 15730 }, { "epoch": 0.8389968284427387, "grad_norm": 1.5725986020128702, "learning_rate": 2.587755123959774e-06, "loss": 0.3791, "step": 15740 }, { "epoch": 0.8395298632765651, "grad_norm": 1.3689228427535598, "learning_rate": 2.587148044233535e-06, "loss": 0.3756, "step": 15750 }, { "epoch": 0.8400628981103915, "grad_norm": 1.778739762848662, "learning_rate": 2.5865405891602234e-06, "loss": 0.3838, "step": 15760 }, { "epoch": 0.8405959329442179, "grad_norm": 1.549534558837446, "learning_rate": 2.585932758949567e-06, "loss": 0.3647, "step": 15770 }, { "epoch": 0.8411289677780442, "grad_norm": 1.6465677234811233, "learning_rate": 2.585324553811425e-06, "loss": 0.3542, "step": 15780 }, { "epoch": 0.8416620026118707, "grad_norm": 1.5172515613551347, "learning_rate": 2.584715973955786e-06, "loss": 0.3785, "step": 15790 }, { "epoch": 0.8421950374456971, "grad_norm": 1.521408041097268, "learning_rate": 2.5841070195927665e-06, "loss": 0.3863, "step": 15800 }, { "epoch": 0.8427280722795235, "grad_norm": 1.6940708584064232, "learning_rate": 2.583497690932613e-06, "loss": 0.3751, "step": 15810 }, { "epoch": 0.8432611071133499, "grad_norm": 1.5649364796648757, "learning_rate": 2.582887988185702e-06, "loss": 0.3761, "step": 15820 }, { "epoch": 0.8437941419471763, "grad_norm": 1.5321600487979177, "learning_rate": 2.582277911562538e-06, "loss": 0.3641, "step": 15830 }, { "epoch": 0.8443271767810027, "grad_norm": 1.5282635965656433, "learning_rate": 2.5816674612737566e-06, "loss": 0.3695, "step": 15840 }, { "epoch": 0.844860211614829, "grad_norm": 1.6004399333232189, "learning_rate": 2.581056637530119e-06, "loss": 0.3692, "step": 15850 }, { "epoch": 0.8453932464486554, "grad_norm": 1.5888084113911927, "learning_rate": 2.580445440542518e-06, "loss": 0.3832, "step": 15860 }, { "epoch": 0.8459262812824818, "grad_norm": 1.5962224594629717, "learning_rate": 2.579833870521974e-06, "loss": 0.3787, "step": 15870 }, { "epoch": 0.8464593161163082, "grad_norm": 1.3847873658756509, "learning_rate": 2.5792219276796383e-06, "loss": 0.3735, "step": 15880 }, { "epoch": 0.8469923509501346, "grad_norm": 1.6443905605117313, "learning_rate": 2.578609612226788e-06, "loss": 0.3722, "step": 15890 }, { "epoch": 0.847525385783961, "grad_norm": 1.346841177561081, "learning_rate": 2.5779969243748307e-06, "loss": 0.3733, "step": 15900 }, { "epoch": 0.8480584206177874, "grad_norm": 1.6341427788319907, "learning_rate": 2.577383864335302e-06, "loss": 0.3771, "step": 15910 }, { "epoch": 0.8485914554516137, "grad_norm": 1.6817339865397696, "learning_rate": 2.576770432319865e-06, "loss": 0.3773, "step": 15920 }, { "epoch": 0.8491244902854401, "grad_norm": 1.5871104413822126, "learning_rate": 2.5761566285403145e-06, "loss": 0.3793, "step": 15930 }, { "epoch": 0.8496575251192665, "grad_norm": 1.6519170350433356, "learning_rate": 2.5755424532085695e-06, "loss": 0.3677, "step": 15940 }, { "epoch": 0.8501905599530929, "grad_norm": 1.7186682458139666, "learning_rate": 2.5749279065366804e-06, "loss": 0.3812, "step": 15950 }, { "epoch": 0.8507235947869193, "grad_norm": 1.5822487152956444, "learning_rate": 2.574312988736824e-06, "loss": 0.378, "step": 15960 }, { "epoch": 0.8512566296207457, "grad_norm": 1.6510525798610263, "learning_rate": 2.5736977000213064e-06, "loss": 0.3837, "step": 15970 }, { "epoch": 0.8517896644545722, "grad_norm": 1.5310094266109477, "learning_rate": 2.573082040602561e-06, "loss": 0.3698, "step": 15980 }, { "epoch": 0.8523226992883985, "grad_norm": 1.4529211881849582, "learning_rate": 2.572466010693149e-06, "loss": 0.3749, "step": 15990 }, { "epoch": 0.8528557341222249, "grad_norm": 1.5101692805802407, "learning_rate": 2.5718496105057607e-06, "loss": 0.364, "step": 16000 }, { "epoch": 0.8533887689560513, "grad_norm": 1.4688126022706927, "learning_rate": 2.5712328402532126e-06, "loss": 0.3667, "step": 16010 }, { "epoch": 0.8539218037898777, "grad_norm": 1.4942247042528989, "learning_rate": 2.570615700148451e-06, "loss": 0.3691, "step": 16020 }, { "epoch": 0.8544548386237041, "grad_norm": 1.6603712157923245, "learning_rate": 2.569998190404548e-06, "loss": 0.375, "step": 16030 }, { "epoch": 0.8549878734575305, "grad_norm": 1.5588096332047554, "learning_rate": 2.569380311234704e-06, "loss": 0.381, "step": 16040 }, { "epoch": 0.8555209082913569, "grad_norm": 1.4659923184327874, "learning_rate": 2.568762062852248e-06, "loss": 0.3778, "step": 16050 }, { "epoch": 0.8560539431251832, "grad_norm": 1.3979362089825116, "learning_rate": 2.5681434454706335e-06, "loss": 0.3755, "step": 16060 }, { "epoch": 0.8565869779590096, "grad_norm": 1.752895491513466, "learning_rate": 2.567524459303445e-06, "loss": 0.3615, "step": 16070 }, { "epoch": 0.857120012792836, "grad_norm": 1.7547747372537255, "learning_rate": 2.566905104564393e-06, "loss": 0.3838, "step": 16080 }, { "epoch": 0.8576530476266624, "grad_norm": 1.6129268055446493, "learning_rate": 2.5662853814673137e-06, "loss": 0.3667, "step": 16090 }, { "epoch": 0.8581860824604888, "grad_norm": 1.6319171638649352, "learning_rate": 2.565665290226172e-06, "loss": 0.376, "step": 16100 }, { "epoch": 0.8587191172943152, "grad_norm": 1.5177051176078427, "learning_rate": 2.5650448310550606e-06, "loss": 0.3705, "step": 16110 }, { "epoch": 0.8592521521281415, "grad_norm": 1.73445074077126, "learning_rate": 2.5644240041681977e-06, "loss": 0.3647, "step": 16120 }, { "epoch": 0.8597851869619679, "grad_norm": 1.6452702383339917, "learning_rate": 2.5638028097799287e-06, "loss": 0.3729, "step": 16130 }, { "epoch": 0.8603182217957943, "grad_norm": 1.7696969414639545, "learning_rate": 2.5631812481047268e-06, "loss": 0.3661, "step": 16140 }, { "epoch": 0.8608512566296207, "grad_norm": 1.546585335395, "learning_rate": 2.562559319357191e-06, "loss": 0.3634, "step": 16150 }, { "epoch": 0.8613842914634471, "grad_norm": 1.491649513938422, "learning_rate": 2.5619370237520477e-06, "loss": 0.3719, "step": 16160 }, { "epoch": 0.8619173262972736, "grad_norm": 1.5068967664791548, "learning_rate": 2.5613143615041496e-06, "loss": 0.3724, "step": 16170 }, { "epoch": 0.8624503611311, "grad_norm": 1.5853384603878022, "learning_rate": 2.5606913328284764e-06, "loss": 0.3708, "step": 16180 }, { "epoch": 0.8629833959649263, "grad_norm": 1.5099798844478494, "learning_rate": 2.5600679379401336e-06, "loss": 0.3783, "step": 16190 }, { "epoch": 0.8635164307987527, "grad_norm": 1.7102450189589395, "learning_rate": 2.559444177054354e-06, "loss": 0.3876, "step": 16200 }, { "epoch": 0.8640494656325791, "grad_norm": 1.6677562151293002, "learning_rate": 2.5588200503864957e-06, "loss": 0.3804, "step": 16210 }, { "epoch": 0.8645825004664055, "grad_norm": 1.626738274622516, "learning_rate": 2.5581955581520446e-06, "loss": 0.3792, "step": 16220 }, { "epoch": 0.8651155353002319, "grad_norm": 1.5608963153656066, "learning_rate": 2.5575707005666116e-06, "loss": 0.3788, "step": 16230 }, { "epoch": 0.8656485701340583, "grad_norm": 1.4639992337473382, "learning_rate": 2.5569454778459336e-06, "loss": 0.3665, "step": 16240 }, { "epoch": 0.8661816049678847, "grad_norm": 1.5077559027975775, "learning_rate": 2.5563198902058747e-06, "loss": 0.3734, "step": 16250 }, { "epoch": 0.866714639801711, "grad_norm": 1.6879167166085143, "learning_rate": 2.555693937862424e-06, "loss": 0.3801, "step": 16260 }, { "epoch": 0.8672476746355374, "grad_norm": 1.638295981773267, "learning_rate": 2.5550676210316965e-06, "loss": 0.3642, "step": 16270 }, { "epoch": 0.8677807094693638, "grad_norm": 1.7287336334949517, "learning_rate": 2.554440939929934e-06, "loss": 0.3895, "step": 16280 }, { "epoch": 0.8683137443031902, "grad_norm": 1.603657332634575, "learning_rate": 2.5538138947735035e-06, "loss": 0.3847, "step": 16290 }, { "epoch": 0.8688467791370166, "grad_norm": 1.6615849368066204, "learning_rate": 2.553186485778897e-06, "loss": 0.3745, "step": 16300 }, { "epoch": 0.869379813970843, "grad_norm": 1.598232462242146, "learning_rate": 2.5525587131627336e-06, "loss": 0.3749, "step": 16310 }, { "epoch": 0.8699128488046693, "grad_norm": 1.638500175883556, "learning_rate": 2.5519305771417564e-06, "loss": 0.3524, "step": 16320 }, { "epoch": 0.8704458836384957, "grad_norm": 1.5447728314436266, "learning_rate": 2.551302077932835e-06, "loss": 0.3778, "step": 16330 }, { "epoch": 0.8709789184723221, "grad_norm": 1.6940851981408045, "learning_rate": 2.550673215752964e-06, "loss": 0.3586, "step": 16340 }, { "epoch": 0.8715119533061485, "grad_norm": 1.6388450343210867, "learning_rate": 2.5500439908192637e-06, "loss": 0.374, "step": 16350 }, { "epoch": 0.872044988139975, "grad_norm": 1.6200539154563658, "learning_rate": 2.5494144033489784e-06, "loss": 0.3784, "step": 16360 }, { "epoch": 0.8725780229738014, "grad_norm": 1.3792852368666522, "learning_rate": 2.54878445355948e-06, "loss": 0.3721, "step": 16370 }, { "epoch": 0.8731110578076278, "grad_norm": 1.6614456476798383, "learning_rate": 2.5481541416682624e-06, "loss": 0.3716, "step": 16380 }, { "epoch": 0.8736440926414542, "grad_norm": 1.871795895368198, "learning_rate": 2.5475234678929475e-06, "loss": 0.3746, "step": 16390 }, { "epoch": 0.8741771274752805, "grad_norm": 1.5939911537858293, "learning_rate": 2.54689243245128e-06, "loss": 0.3719, "step": 16400 }, { "epoch": 0.8747101623091069, "grad_norm": 1.5813498650047086, "learning_rate": 2.54626103556113e-06, "loss": 0.4017, "step": 16410 }, { "epoch": 0.8752431971429333, "grad_norm": 1.6391277452202946, "learning_rate": 2.5456292774404936e-06, "loss": 0.3822, "step": 16420 }, { "epoch": 0.8757762319767597, "grad_norm": 1.585544556941181, "learning_rate": 2.5449971583074897e-06, "loss": 0.3682, "step": 16430 }, { "epoch": 0.8763092668105861, "grad_norm": 1.3774672461817439, "learning_rate": 2.5443646783803637e-06, "loss": 0.3701, "step": 16440 }, { "epoch": 0.8768423016444125, "grad_norm": 1.5370211442878157, "learning_rate": 2.5437318378774834e-06, "loss": 0.3633, "step": 16450 }, { "epoch": 0.8773753364782388, "grad_norm": 1.4835213341033064, "learning_rate": 2.5430986370173433e-06, "loss": 0.3704, "step": 16460 }, { "epoch": 0.8779083713120652, "grad_norm": 1.4503348420392632, "learning_rate": 2.542465076018561e-06, "loss": 0.362, "step": 16470 }, { "epoch": 0.8784414061458916, "grad_norm": 1.7153479897425834, "learning_rate": 2.541831155099879e-06, "loss": 0.3718, "step": 16480 }, { "epoch": 0.878974440979718, "grad_norm": 1.6442307406855792, "learning_rate": 2.5411968744801646e-06, "loss": 0.3726, "step": 16490 }, { "epoch": 0.8795074758135444, "grad_norm": 1.6064051120357314, "learning_rate": 2.5405622343784068e-06, "loss": 0.375, "step": 16500 }, { "epoch": 0.8800405106473708, "grad_norm": 1.8617132070149183, "learning_rate": 2.5399272350137213e-06, "loss": 0.3705, "step": 16510 }, { "epoch": 0.8805735454811972, "grad_norm": 1.6586012144764883, "learning_rate": 2.5392918766053475e-06, "loss": 0.3768, "step": 16520 }, { "epoch": 0.8811065803150235, "grad_norm": 1.7688549617684624, "learning_rate": 2.5386561593726477e-06, "loss": 0.3914, "step": 16530 }, { "epoch": 0.88163961514885, "grad_norm": 1.5140260813796467, "learning_rate": 2.5380200835351095e-06, "loss": 0.3727, "step": 16540 }, { "epoch": 0.8821726499826764, "grad_norm": 1.5847791420617885, "learning_rate": 2.537383649312342e-06, "loss": 0.3656, "step": 16550 }, { "epoch": 0.8827056848165028, "grad_norm": 1.5786263398800064, "learning_rate": 2.5367468569240807e-06, "loss": 0.3732, "step": 16560 }, { "epoch": 0.8832387196503292, "grad_norm": 1.5010672497747122, "learning_rate": 2.5361097065901833e-06, "loss": 0.3537, "step": 16570 }, { "epoch": 0.8837717544841556, "grad_norm": 1.6741605612831219, "learning_rate": 2.5354721985306313e-06, "loss": 0.3653, "step": 16580 }, { "epoch": 0.884304789317982, "grad_norm": 1.3879840429853558, "learning_rate": 2.5348343329655298e-06, "loss": 0.3742, "step": 16590 }, { "epoch": 0.8848378241518083, "grad_norm": 1.5094327415354354, "learning_rate": 2.534196110115107e-06, "loss": 0.378, "step": 16600 }, { "epoch": 0.8853708589856347, "grad_norm": 1.430564918210685, "learning_rate": 2.5335575301997148e-06, "loss": 0.3638, "step": 16610 }, { "epoch": 0.8859038938194611, "grad_norm": 1.7528605349800008, "learning_rate": 2.5329185934398286e-06, "loss": 0.3752, "step": 16620 }, { "epoch": 0.8864369286532875, "grad_norm": 1.7256482527304138, "learning_rate": 2.5322793000560467e-06, "loss": 0.3767, "step": 16630 }, { "epoch": 0.8869699634871139, "grad_norm": 1.6233691311033993, "learning_rate": 2.5316396502690906e-06, "loss": 0.3705, "step": 16640 }, { "epoch": 0.8875029983209403, "grad_norm": 1.5534173200233747, "learning_rate": 2.5309996442998043e-06, "loss": 0.3518, "step": 16650 }, { "epoch": 0.8880360331547666, "grad_norm": 1.474485031730477, "learning_rate": 2.530359282369156e-06, "loss": 0.3863, "step": 16660 }, { "epoch": 0.888569067988593, "grad_norm": 1.5660077731666442, "learning_rate": 2.529718564698235e-06, "loss": 0.3682, "step": 16670 }, { "epoch": 0.8891021028224194, "grad_norm": 1.5987562218011735, "learning_rate": 2.529077491508256e-06, "loss": 0.3708, "step": 16680 }, { "epoch": 0.8896351376562458, "grad_norm": 1.464437429640059, "learning_rate": 2.528436063020554e-06, "loss": 0.3753, "step": 16690 }, { "epoch": 0.8901681724900722, "grad_norm": 1.7208027187125727, "learning_rate": 2.527794279456588e-06, "loss": 0.3753, "step": 16700 }, { "epoch": 0.8907012073238986, "grad_norm": 1.7898384322917824, "learning_rate": 2.5271521410379387e-06, "loss": 0.3795, "step": 16710 }, { "epoch": 0.891234242157725, "grad_norm": 1.5138370911923953, "learning_rate": 2.5265096479863104e-06, "loss": 0.363, "step": 16720 }, { "epoch": 0.8917672769915514, "grad_norm": 1.6610679993188082, "learning_rate": 2.525866800523529e-06, "loss": 0.3714, "step": 16730 }, { "epoch": 0.8923003118253778, "grad_norm": 1.6235260763561166, "learning_rate": 2.5252235988715434e-06, "loss": 0.3621, "step": 16740 }, { "epoch": 0.8928333466592042, "grad_norm": 1.5866802026765918, "learning_rate": 2.5245800432524235e-06, "loss": 0.3564, "step": 16750 }, { "epoch": 0.8933663814930306, "grad_norm": 1.522207694721726, "learning_rate": 2.523936133888363e-06, "loss": 0.3688, "step": 16760 }, { "epoch": 0.893899416326857, "grad_norm": 1.505365445553488, "learning_rate": 2.5232918710016766e-06, "loss": 0.3772, "step": 16770 }, { "epoch": 0.8944324511606834, "grad_norm": 1.4937826924163504, "learning_rate": 2.5226472548148024e-06, "loss": 0.367, "step": 16780 }, { "epoch": 0.8949654859945098, "grad_norm": 1.6835416780129235, "learning_rate": 2.5220022855502994e-06, "loss": 0.3756, "step": 16790 }, { "epoch": 0.8954985208283361, "grad_norm": 1.5492489149071564, "learning_rate": 2.521356963430848e-06, "loss": 0.3838, "step": 16800 }, { "epoch": 0.8960315556621625, "grad_norm": 1.470886389362583, "learning_rate": 2.5207112886792515e-06, "loss": 0.3661, "step": 16810 }, { "epoch": 0.8965645904959889, "grad_norm": 1.7466458774817337, "learning_rate": 2.520065261518435e-06, "loss": 0.367, "step": 16820 }, { "epoch": 0.8970976253298153, "grad_norm": 1.5430961351416572, "learning_rate": 2.519418882171444e-06, "loss": 0.3775, "step": 16830 }, { "epoch": 0.8976306601636417, "grad_norm": 1.4481540710904404, "learning_rate": 2.5187721508614472e-06, "loss": 0.3692, "step": 16840 }, { "epoch": 0.8981636949974681, "grad_norm": 1.4749951731980901, "learning_rate": 2.5181250678117338e-06, "loss": 0.3602, "step": 16850 }, { "epoch": 0.8986967298312944, "grad_norm": 1.3894301359762353, "learning_rate": 2.517477633245714e-06, "loss": 0.3797, "step": 16860 }, { "epoch": 0.8992297646651208, "grad_norm": 1.5390883022574198, "learning_rate": 2.5168298473869214e-06, "loss": 0.3724, "step": 16870 }, { "epoch": 0.8997627994989472, "grad_norm": 1.5873898724450495, "learning_rate": 2.516181710459009e-06, "loss": 0.3888, "step": 16880 }, { "epoch": 0.9002958343327736, "grad_norm": 1.462613791560266, "learning_rate": 2.5155332226857514e-06, "loss": 0.3698, "step": 16890 }, { "epoch": 0.9008288691666, "grad_norm": 1.6735106794062748, "learning_rate": 2.5148843842910443e-06, "loss": 0.3777, "step": 16900 }, { "epoch": 0.9013619040004264, "grad_norm": 1.4762980101025525, "learning_rate": 2.514235195498905e-06, "loss": 0.3745, "step": 16910 }, { "epoch": 0.9018949388342529, "grad_norm": 1.698960198130694, "learning_rate": 2.513585656533471e-06, "loss": 0.3749, "step": 16920 }, { "epoch": 0.9024279736680793, "grad_norm": 1.7180561980762914, "learning_rate": 2.512935767619002e-06, "loss": 0.3664, "step": 16930 }, { "epoch": 0.9029610085019056, "grad_norm": 1.4304462538158722, "learning_rate": 2.512285528979877e-06, "loss": 0.3585, "step": 16940 }, { "epoch": 0.903494043335732, "grad_norm": 1.9039798130211056, "learning_rate": 2.5116349408405963e-06, "loss": 0.3665, "step": 16950 }, { "epoch": 0.9040270781695584, "grad_norm": 1.487438368131928, "learning_rate": 2.5109840034257813e-06, "loss": 0.3704, "step": 16960 }, { "epoch": 0.9045601130033848, "grad_norm": 1.610107122753202, "learning_rate": 2.5103327169601737e-06, "loss": 0.3659, "step": 16970 }, { "epoch": 0.9050931478372112, "grad_norm": 1.6894004250974037, "learning_rate": 2.5096810816686353e-06, "loss": 0.3778, "step": 16980 }, { "epoch": 0.9056261826710376, "grad_norm": 1.6339970168391242, "learning_rate": 2.509029097776149e-06, "loss": 0.3689, "step": 16990 }, { "epoch": 0.906159217504864, "grad_norm": 1.6852246573107863, "learning_rate": 2.508376765507818e-06, "loss": 0.3682, "step": 17000 }, { "epoch": 0.9066922523386903, "grad_norm": 1.4630275327257258, "learning_rate": 2.507724085088865e-06, "loss": 0.3787, "step": 17010 }, { "epoch": 0.9072252871725167, "grad_norm": 1.6517431522186092, "learning_rate": 2.507071056744633e-06, "loss": 0.3676, "step": 17020 }, { "epoch": 0.9077583220063431, "grad_norm": 1.701800878945533, "learning_rate": 2.5064176807005873e-06, "loss": 0.3632, "step": 17030 }, { "epoch": 0.9082913568401695, "grad_norm": 1.692234126968275, "learning_rate": 2.50576395718231e-06, "loss": 0.3512, "step": 17040 }, { "epoch": 0.9088243916739959, "grad_norm": 1.5124708655596437, "learning_rate": 2.5051098864155057e-06, "loss": 0.3612, "step": 17050 }, { "epoch": 0.9093574265078223, "grad_norm": 1.6165180533973313, "learning_rate": 2.504455468625997e-06, "loss": 0.3739, "step": 17060 }, { "epoch": 0.9098904613416486, "grad_norm": 1.6833039741171256, "learning_rate": 2.5038007040397268e-06, "loss": 0.3695, "step": 17070 }, { "epoch": 0.910423496175475, "grad_norm": 1.4967775873948406, "learning_rate": 2.5031455928827595e-06, "loss": 0.3786, "step": 17080 }, { "epoch": 0.9109565310093014, "grad_norm": 1.6169457528277469, "learning_rate": 2.502490135381277e-06, "loss": 0.3624, "step": 17090 }, { "epoch": 0.9114895658431279, "grad_norm": 1.4820685636734916, "learning_rate": 2.5018343317615818e-06, "loss": 0.3655, "step": 17100 }, { "epoch": 0.9120226006769543, "grad_norm": 1.6196579726566909, "learning_rate": 2.5011781822500955e-06, "loss": 0.375, "step": 17110 }, { "epoch": 0.9125556355107807, "grad_norm": 1.508273970983703, "learning_rate": 2.500521687073359e-06, "loss": 0.3654, "step": 17120 }, { "epoch": 0.9130886703446071, "grad_norm": 1.5030710300776673, "learning_rate": 2.4998648464580332e-06, "loss": 0.3646, "step": 17130 }, { "epoch": 0.9136217051784334, "grad_norm": 1.620283186805845, "learning_rate": 2.499207660630898e-06, "loss": 0.379, "step": 17140 }, { "epoch": 0.9141547400122598, "grad_norm": 1.5628084945231049, "learning_rate": 2.498550129818852e-06, "loss": 0.3687, "step": 17150 }, { "epoch": 0.9146877748460862, "grad_norm": 1.696540509817431, "learning_rate": 2.4978922542489137e-06, "loss": 0.3653, "step": 17160 }, { "epoch": 0.9152208096799126, "grad_norm": 1.7455171273145023, "learning_rate": 2.4972340341482198e-06, "loss": 0.3717, "step": 17170 }, { "epoch": 0.915753844513739, "grad_norm": 1.6338894695615422, "learning_rate": 2.4965754697440263e-06, "loss": 0.3721, "step": 17180 }, { "epoch": 0.9162868793475654, "grad_norm": 1.8131928426266846, "learning_rate": 2.495916561263709e-06, "loss": 0.3572, "step": 17190 }, { "epoch": 0.9168199141813917, "grad_norm": 1.5300294807613743, "learning_rate": 2.49525730893476e-06, "loss": 0.3655, "step": 17200 }, { "epoch": 0.9173529490152181, "grad_norm": 1.6216814078782733, "learning_rate": 2.494597712984794e-06, "loss": 0.367, "step": 17210 }, { "epoch": 0.9178859838490445, "grad_norm": 1.4364420541354284, "learning_rate": 2.49393777364154e-06, "loss": 0.3716, "step": 17220 }, { "epoch": 0.9184190186828709, "grad_norm": 1.7022997921564593, "learning_rate": 2.4932774911328488e-06, "loss": 0.3716, "step": 17230 }, { "epoch": 0.9189520535166973, "grad_norm": 1.7891312385508846, "learning_rate": 2.492616865686688e-06, "loss": 0.3752, "step": 17240 }, { "epoch": 0.9194850883505237, "grad_norm": 1.5373504256347597, "learning_rate": 2.4919558975311447e-06, "loss": 0.3602, "step": 17250 }, { "epoch": 0.9200181231843501, "grad_norm": 1.5604028028552743, "learning_rate": 2.491294586894424e-06, "loss": 0.3666, "step": 17260 }, { "epoch": 0.9205511580181764, "grad_norm": 1.4687632862878885, "learning_rate": 2.4906329340048473e-06, "loss": 0.3721, "step": 17270 }, { "epoch": 0.9210841928520028, "grad_norm": 1.5554092704522868, "learning_rate": 2.489970939090858e-06, "loss": 0.3715, "step": 17280 }, { "epoch": 0.9216172276858293, "grad_norm": 1.5603746558545535, "learning_rate": 2.4893086023810137e-06, "loss": 0.369, "step": 17290 }, { "epoch": 0.9221502625196557, "grad_norm": 1.9214917456509688, "learning_rate": 2.4886459241039927e-06, "loss": 0.3682, "step": 17300 }, { "epoch": 0.9226832973534821, "grad_norm": 1.5814369586103256, "learning_rate": 2.48798290448859e-06, "loss": 0.3619, "step": 17310 }, { "epoch": 0.9232163321873085, "grad_norm": 1.633097754000609, "learning_rate": 2.487319543763719e-06, "loss": 0.3706, "step": 17320 }, { "epoch": 0.9237493670211349, "grad_norm": 1.4030161367094047, "learning_rate": 2.4866558421584104e-06, "loss": 0.3677, "step": 17330 }, { "epoch": 0.9242824018549612, "grad_norm": 1.443605802267004, "learning_rate": 2.485991799901813e-06, "loss": 0.3627, "step": 17340 }, { "epoch": 0.9248154366887876, "grad_norm": 1.5282972689662562, "learning_rate": 2.4853274172231927e-06, "loss": 0.3573, "step": 17350 }, { "epoch": 0.925348471522614, "grad_norm": 1.7561577627016143, "learning_rate": 2.484662694351933e-06, "loss": 0.3641, "step": 17360 }, { "epoch": 0.9258815063564404, "grad_norm": 1.415277556429084, "learning_rate": 2.483997631517536e-06, "loss": 0.3649, "step": 17370 }, { "epoch": 0.9264145411902668, "grad_norm": 1.5985875375566883, "learning_rate": 2.48333222894962e-06, "loss": 0.372, "step": 17380 }, { "epoch": 0.9269475760240932, "grad_norm": 1.6241152316961869, "learning_rate": 2.48266648687792e-06, "loss": 0.3763, "step": 17390 }, { "epoch": 0.9274806108579196, "grad_norm": 1.6356146736491557, "learning_rate": 2.4820004055322905e-06, "loss": 0.378, "step": 17400 }, { "epoch": 0.9280136456917459, "grad_norm": 1.681918348554039, "learning_rate": 2.4813339851427007e-06, "loss": 0.3698, "step": 17410 }, { "epoch": 0.9285466805255723, "grad_norm": 1.6842573868607527, "learning_rate": 2.480667225939238e-06, "loss": 0.3762, "step": 17420 }, { "epoch": 0.9290797153593987, "grad_norm": 1.5264317352005894, "learning_rate": 2.4800001281521072e-06, "loss": 0.3761, "step": 17430 }, { "epoch": 0.9296127501932251, "grad_norm": 1.476755271296365, "learning_rate": 2.479332692011629e-06, "loss": 0.358, "step": 17440 }, { "epoch": 0.9301457850270515, "grad_norm": 1.4805263758967948, "learning_rate": 2.478664917748241e-06, "loss": 0.3603, "step": 17450 }, { "epoch": 0.9306788198608779, "grad_norm": 1.525436465381181, "learning_rate": 2.477996805592499e-06, "loss": 0.376, "step": 17460 }, { "epoch": 0.9312118546947042, "grad_norm": 1.648566810118355, "learning_rate": 2.4773283557750737e-06, "loss": 0.3695, "step": 17470 }, { "epoch": 0.9317448895285307, "grad_norm": 1.7113367370825123, "learning_rate": 2.476659568526753e-06, "loss": 0.374, "step": 17480 }, { "epoch": 0.9322779243623571, "grad_norm": 1.4904209524537468, "learning_rate": 2.4759904440784418e-06, "loss": 0.3687, "step": 17490 }, { "epoch": 0.9328109591961835, "grad_norm": 1.5561071215329212, "learning_rate": 2.4753209826611604e-06, "loss": 0.376, "step": 17500 }, { "epoch": 0.9333439940300099, "grad_norm": 1.6039022614531266, "learning_rate": 2.474651184506046e-06, "loss": 0.3726, "step": 17510 }, { "epoch": 0.9338770288638363, "grad_norm": 1.7460051562511008, "learning_rate": 2.473981049844353e-06, "loss": 0.3661, "step": 17520 }, { "epoch": 0.9344100636976627, "grad_norm": 1.4938989063829358, "learning_rate": 2.4733105789074503e-06, "loss": 0.3686, "step": 17530 }, { "epoch": 0.934943098531489, "grad_norm": 1.58904564233612, "learning_rate": 2.472639771926824e-06, "loss": 0.3656, "step": 17540 }, { "epoch": 0.9354761333653154, "grad_norm": 1.4791366539508526, "learning_rate": 2.4719686291340754e-06, "loss": 0.3663, "step": 17550 }, { "epoch": 0.9360091681991418, "grad_norm": 1.6234568159798726, "learning_rate": 2.471297150760923e-06, "loss": 0.3686, "step": 17560 }, { "epoch": 0.9365422030329682, "grad_norm": 1.5914629135678837, "learning_rate": 2.4706253370392e-06, "loss": 0.3644, "step": 17570 }, { "epoch": 0.9370752378667946, "grad_norm": 1.3192337859178085, "learning_rate": 2.4699531882008553e-06, "loss": 0.3672, "step": 17580 }, { "epoch": 0.937608272700621, "grad_norm": 1.4983221392346286, "learning_rate": 2.4692807044779545e-06, "loss": 0.3603, "step": 17590 }, { "epoch": 0.9381413075344474, "grad_norm": 1.5973126916992249, "learning_rate": 2.4686078861026786e-06, "loss": 0.3721, "step": 17600 }, { "epoch": 0.9386743423682737, "grad_norm": 1.5523063050352968, "learning_rate": 2.467934733307323e-06, "loss": 0.3633, "step": 17610 }, { "epoch": 0.9392073772021001, "grad_norm": 1.6736114168264564, "learning_rate": 2.4672612463243004e-06, "loss": 0.371, "step": 17620 }, { "epoch": 0.9397404120359265, "grad_norm": 1.9173319357627825, "learning_rate": 2.466587425386137e-06, "loss": 0.3596, "step": 17630 }, { "epoch": 0.9402734468697529, "grad_norm": 1.603323919224505, "learning_rate": 2.465913270725476e-06, "loss": 0.3617, "step": 17640 }, { "epoch": 0.9408064817035793, "grad_norm": 1.6153372751997013, "learning_rate": 2.4652387825750744e-06, "loss": 0.3666, "step": 17650 }, { "epoch": 0.9413395165374058, "grad_norm": 1.476201457804405, "learning_rate": 2.4645639611678054e-06, "loss": 0.3729, "step": 17660 }, { "epoch": 0.9418725513712322, "grad_norm": 1.8343743669833457, "learning_rate": 2.4638888067366557e-06, "loss": 0.3735, "step": 17670 }, { "epoch": 0.9424055862050585, "grad_norm": 1.8501413391469148, "learning_rate": 2.4632133195147298e-06, "loss": 0.3694, "step": 17680 }, { "epoch": 0.9429386210388849, "grad_norm": 1.6882610304258439, "learning_rate": 2.4625374997352436e-06, "loss": 0.3629, "step": 17690 }, { "epoch": 0.9434716558727113, "grad_norm": 1.6062698520592167, "learning_rate": 2.4618613476315314e-06, "loss": 0.367, "step": 17700 }, { "epoch": 0.9440046907065377, "grad_norm": 1.5738805619236607, "learning_rate": 2.461184863437039e-06, "loss": 0.3555, "step": 17710 }, { "epoch": 0.9445377255403641, "grad_norm": 1.6530339092462851, "learning_rate": 2.460508047385329e-06, "loss": 0.3658, "step": 17720 }, { "epoch": 0.9450707603741905, "grad_norm": 1.6161906807110695, "learning_rate": 2.4598308997100784e-06, "loss": 0.3847, "step": 17730 }, { "epoch": 0.9456037952080169, "grad_norm": 1.4604749876874386, "learning_rate": 2.4591534206450767e-06, "loss": 0.3692, "step": 17740 }, { "epoch": 0.9461368300418432, "grad_norm": 1.5185517295183157, "learning_rate": 2.45847561042423e-06, "loss": 0.3708, "step": 17750 }, { "epoch": 0.9466698648756696, "grad_norm": 1.7132156635550504, "learning_rate": 2.4577974692815584e-06, "loss": 0.3717, "step": 17760 }, { "epoch": 0.947202899709496, "grad_norm": 1.543104191911181, "learning_rate": 2.457118997451196e-06, "loss": 0.3731, "step": 17770 }, { "epoch": 0.9477359345433224, "grad_norm": 1.6161894911997858, "learning_rate": 2.45644019516739e-06, "loss": 0.3696, "step": 17780 }, { "epoch": 0.9482689693771488, "grad_norm": 1.4729742964038686, "learning_rate": 2.4557610626645035e-06, "loss": 0.3748, "step": 17790 }, { "epoch": 0.9488020042109752, "grad_norm": 1.5276426885909562, "learning_rate": 2.4550816001770123e-06, "loss": 0.3617, "step": 17800 }, { "epoch": 0.9493350390448015, "grad_norm": 1.7163154329909904, "learning_rate": 2.454401807939506e-06, "loss": 0.364, "step": 17810 }, { "epoch": 0.9498680738786279, "grad_norm": 1.5921794005720344, "learning_rate": 2.4537216861866907e-06, "loss": 0.3629, "step": 17820 }, { "epoch": 0.9504011087124543, "grad_norm": 1.3869546460769775, "learning_rate": 2.4530412351533822e-06, "loss": 0.3643, "step": 17830 }, { "epoch": 0.9509341435462807, "grad_norm": 1.5203927594497786, "learning_rate": 2.452360455074512e-06, "loss": 0.3719, "step": 17840 }, { "epoch": 0.9514671783801072, "grad_norm": 1.545618630544393, "learning_rate": 2.4516793461851267e-06, "loss": 0.3639, "step": 17850 }, { "epoch": 0.9520002132139336, "grad_norm": 1.579502199217928, "learning_rate": 2.4509979087203837e-06, "loss": 0.364, "step": 17860 }, { "epoch": 0.95253324804776, "grad_norm": 1.5573899724930402, "learning_rate": 2.4503161429155553e-06, "loss": 0.3709, "step": 17870 }, { "epoch": 0.9530662828815863, "grad_norm": 1.5166311273174973, "learning_rate": 2.4496340490060275e-06, "loss": 0.3529, "step": 17880 }, { "epoch": 0.9535993177154127, "grad_norm": 1.5785594326300647, "learning_rate": 2.4489516272272976e-06, "loss": 0.351, "step": 17890 }, { "epoch": 0.9541323525492391, "grad_norm": 1.6356705833215939, "learning_rate": 2.448268877814979e-06, "loss": 0.3673, "step": 17900 }, { "epoch": 0.9546653873830655, "grad_norm": 1.405334914648528, "learning_rate": 2.447585801004796e-06, "loss": 0.373, "step": 17910 }, { "epoch": 0.9551984222168919, "grad_norm": 1.5614116417005306, "learning_rate": 2.446902397032587e-06, "loss": 0.3502, "step": 17920 }, { "epoch": 0.9557314570507183, "grad_norm": 1.5052327333711553, "learning_rate": 2.4462186661343027e-06, "loss": 0.366, "step": 17930 }, { "epoch": 0.9562644918845447, "grad_norm": 1.728569145652302, "learning_rate": 2.4455346085460066e-06, "loss": 0.3642, "step": 17940 }, { "epoch": 0.956797526718371, "grad_norm": 1.542268593167441, "learning_rate": 2.4448502245038766e-06, "loss": 0.3695, "step": 17950 }, { "epoch": 0.9573305615521974, "grad_norm": 1.397372313417784, "learning_rate": 2.444165514244201e-06, "loss": 0.3734, "step": 17960 }, { "epoch": 0.9578635963860238, "grad_norm": 1.6251252046449296, "learning_rate": 2.4434804780033823e-06, "loss": 0.3674, "step": 17970 }, { "epoch": 0.9583966312198502, "grad_norm": 1.7163614107110303, "learning_rate": 2.442795116017935e-06, "loss": 0.3593, "step": 17980 }, { "epoch": 0.9589296660536766, "grad_norm": 1.566021206897545, "learning_rate": 2.4421094285244855e-06, "loss": 0.3722, "step": 17990 }, { "epoch": 0.959462700887503, "grad_norm": 1.6304647026237082, "learning_rate": 2.441423415759774e-06, "loss": 0.3587, "step": 18000 }, { "epoch": 0.9599957357213293, "grad_norm": 1.492626828595643, "learning_rate": 2.440737077960652e-06, "loss": 0.3597, "step": 18010 }, { "epoch": 0.9605287705551557, "grad_norm": 1.5001595509604813, "learning_rate": 2.440050415364084e-06, "loss": 0.3599, "step": 18020 }, { "epoch": 0.9610618053889821, "grad_norm": 1.6063213206597977, "learning_rate": 2.439363428207145e-06, "loss": 0.3631, "step": 18030 }, { "epoch": 0.9615948402228086, "grad_norm": 1.6093009715552609, "learning_rate": 2.4386761167270234e-06, "loss": 0.3614, "step": 18040 }, { "epoch": 0.962127875056635, "grad_norm": 1.6308074058789368, "learning_rate": 2.43798848116102e-06, "loss": 0.3638, "step": 18050 }, { "epoch": 0.9626609098904614, "grad_norm": 1.7420568186962917, "learning_rate": 2.4373005217465465e-06, "loss": 0.371, "step": 18060 }, { "epoch": 0.9631939447242878, "grad_norm": 1.4968721393315962, "learning_rate": 2.436612238721126e-06, "loss": 0.3669, "step": 18070 }, { "epoch": 0.9637269795581141, "grad_norm": 1.5665639159164495, "learning_rate": 2.435923632322395e-06, "loss": 0.378, "step": 18080 }, { "epoch": 0.9642600143919405, "grad_norm": 1.6717461177536967, "learning_rate": 2.4352347027881005e-06, "loss": 0.3632, "step": 18090 }, { "epoch": 0.9647930492257669, "grad_norm": 1.5647881759413753, "learning_rate": 2.434545450356101e-06, "loss": 0.3624, "step": 18100 }, { "epoch": 0.9653260840595933, "grad_norm": 1.600302406680916, "learning_rate": 2.4338558752643664e-06, "loss": 0.3686, "step": 18110 }, { "epoch": 0.9658591188934197, "grad_norm": 1.636536800418388, "learning_rate": 2.433165977750979e-06, "loss": 0.3641, "step": 18120 }, { "epoch": 0.9663921537272461, "grad_norm": 1.5731316349695552, "learning_rate": 2.4324757580541317e-06, "loss": 0.3584, "step": 18130 }, { "epoch": 0.9669251885610725, "grad_norm": 1.5097527142077196, "learning_rate": 2.431785216412129e-06, "loss": 0.3603, "step": 18140 }, { "epoch": 0.9674582233948988, "grad_norm": 1.4878256752404058, "learning_rate": 2.431094353063385e-06, "loss": 0.3724, "step": 18150 }, { "epoch": 0.9679912582287252, "grad_norm": 1.5942259210670973, "learning_rate": 2.430403168246427e-06, "loss": 0.3528, "step": 18160 }, { "epoch": 0.9685242930625516, "grad_norm": 1.5265735741518125, "learning_rate": 2.4297116621998925e-06, "loss": 0.3616, "step": 18170 }, { "epoch": 0.969057327896378, "grad_norm": 1.746135241116584, "learning_rate": 2.4290198351625295e-06, "loss": 0.3705, "step": 18180 }, { "epoch": 0.9695903627302044, "grad_norm": 1.5578279963726467, "learning_rate": 2.428327687373197e-06, "loss": 0.3646, "step": 18190 }, { "epoch": 0.9701233975640308, "grad_norm": 1.6358521318726396, "learning_rate": 2.427635219070866e-06, "loss": 0.3581, "step": 18200 }, { "epoch": 0.9706564323978571, "grad_norm": 1.4343881217679273, "learning_rate": 2.4269424304946155e-06, "loss": 0.3606, "step": 18210 }, { "epoch": 0.9711894672316836, "grad_norm": 1.510311478861491, "learning_rate": 2.4262493218836377e-06, "loss": 0.3648, "step": 18220 }, { "epoch": 0.97172250206551, "grad_norm": 1.660404789477891, "learning_rate": 2.425555893477234e-06, "loss": 0.371, "step": 18230 }, { "epoch": 0.9722555368993364, "grad_norm": 1.4212144544897773, "learning_rate": 2.4248621455148165e-06, "loss": 0.3744, "step": 18240 }, { "epoch": 0.9727885717331628, "grad_norm": 1.4489060964325087, "learning_rate": 2.4241680782359068e-06, "loss": 0.3609, "step": 18250 }, { "epoch": 0.9733216065669892, "grad_norm": 1.426970079723212, "learning_rate": 2.423473691880138e-06, "loss": 0.365, "step": 18260 }, { "epoch": 0.9738546414008156, "grad_norm": 1.5536965151741717, "learning_rate": 2.422778986687253e-06, "loss": 0.364, "step": 18270 }, { "epoch": 0.974387676234642, "grad_norm": 1.635580908258095, "learning_rate": 2.422083962897105e-06, "loss": 0.3566, "step": 18280 }, { "epoch": 0.9749207110684683, "grad_norm": 1.4618499087496961, "learning_rate": 2.4213886207496564e-06, "loss": 0.3771, "step": 18290 }, { "epoch": 0.9754537459022947, "grad_norm": 1.6157693302876939, "learning_rate": 2.4206929604849796e-06, "loss": 0.3627, "step": 18300 }, { "epoch": 0.9759867807361211, "grad_norm": 1.4240405488738535, "learning_rate": 2.419996982343258e-06, "loss": 0.3733, "step": 18310 }, { "epoch": 0.9765198155699475, "grad_norm": 1.500814605425278, "learning_rate": 2.419300686564784e-06, "loss": 0.3623, "step": 18320 }, { "epoch": 0.9770528504037739, "grad_norm": 1.5294416558247144, "learning_rate": 2.418604073389958e-06, "loss": 0.3589, "step": 18330 }, { "epoch": 0.9775858852376003, "grad_norm": 1.4592799169375035, "learning_rate": 2.4179071430592935e-06, "loss": 0.3693, "step": 18340 }, { "epoch": 0.9781189200714266, "grad_norm": 1.49931538348573, "learning_rate": 2.417209895813411e-06, "loss": 0.3542, "step": 18350 }, { "epoch": 0.978651954905253, "grad_norm": 1.379484310147667, "learning_rate": 2.416512331893041e-06, "loss": 0.3668, "step": 18360 }, { "epoch": 0.9791849897390794, "grad_norm": 1.7133936154226403, "learning_rate": 2.4158144515390228e-06, "loss": 0.3631, "step": 18370 }, { "epoch": 0.9797180245729058, "grad_norm": 1.5444301178680617, "learning_rate": 2.415116254992306e-06, "loss": 0.3712, "step": 18380 }, { "epoch": 0.9802510594067322, "grad_norm": 1.5739836196563841, "learning_rate": 2.4144177424939493e-06, "loss": 0.3529, "step": 18390 }, { "epoch": 0.9807840942405586, "grad_norm": 1.5059406995094413, "learning_rate": 2.413718914285119e-06, "loss": 0.351, "step": 18400 }, { "epoch": 0.9813171290743851, "grad_norm": 1.6895649086446567, "learning_rate": 2.4130197706070926e-06, "loss": 0.3612, "step": 18410 }, { "epoch": 0.9818501639082114, "grad_norm": 1.7063452143561457, "learning_rate": 2.4123203117012546e-06, "loss": 0.3817, "step": 18420 }, { "epoch": 0.9823831987420378, "grad_norm": 1.6181306893430538, "learning_rate": 2.411620537809099e-06, "loss": 0.3628, "step": 18430 }, { "epoch": 0.9829162335758642, "grad_norm": 1.552597442162855, "learning_rate": 2.4109204491722298e-06, "loss": 0.3659, "step": 18440 }, { "epoch": 0.9834492684096906, "grad_norm": 1.4596416357276856, "learning_rate": 2.410220046032357e-06, "loss": 0.3532, "step": 18450 }, { "epoch": 0.983982303243517, "grad_norm": 1.612795654690574, "learning_rate": 2.409519328631302e-06, "loss": 0.3676, "step": 18460 }, { "epoch": 0.9845153380773434, "grad_norm": 1.5919827568767952, "learning_rate": 2.4088182972109924e-06, "loss": 0.3529, "step": 18470 }, { "epoch": 0.9850483729111698, "grad_norm": 1.7529574427579884, "learning_rate": 2.408116952013466e-06, "loss": 0.3756, "step": 18480 }, { "epoch": 0.9855814077449961, "grad_norm": 1.926275698771631, "learning_rate": 2.407415293280868e-06, "loss": 0.3785, "step": 18490 }, { "epoch": 0.9861144425788225, "grad_norm": 1.6733985301065029, "learning_rate": 2.406713321255452e-06, "loss": 0.3773, "step": 18500 }, { "epoch": 0.9866474774126489, "grad_norm": 1.6789861335387402, "learning_rate": 2.4060110361795796e-06, "loss": 0.3667, "step": 18510 }, { "epoch": 0.9871805122464753, "grad_norm": 1.6123061536924932, "learning_rate": 2.4053084382957204e-06, "loss": 0.366, "step": 18520 }, { "epoch": 0.9877135470803017, "grad_norm": 1.587191956927505, "learning_rate": 2.404605527846453e-06, "loss": 0.3604, "step": 18530 }, { "epoch": 0.9882465819141281, "grad_norm": 1.530623928395734, "learning_rate": 2.4039023050744633e-06, "loss": 0.3633, "step": 18540 }, { "epoch": 0.9887796167479544, "grad_norm": 1.4767438794529086, "learning_rate": 2.403198770222543e-06, "loss": 0.3663, "step": 18550 }, { "epoch": 0.9893126515817808, "grad_norm": 1.6436863081529758, "learning_rate": 2.4024949235335962e-06, "loss": 0.3746, "step": 18560 }, { "epoch": 0.9898456864156072, "grad_norm": 1.6375251689344046, "learning_rate": 2.4017907652506306e-06, "loss": 0.367, "step": 18570 }, { "epoch": 0.9903787212494336, "grad_norm": 1.5685974020849787, "learning_rate": 2.4010862956167625e-06, "loss": 0.3658, "step": 18580 }, { "epoch": 0.99091175608326, "grad_norm": 1.69192421210803, "learning_rate": 2.4003815148752167e-06, "loss": 0.3783, "step": 18590 }, { "epoch": 0.9914447909170865, "grad_norm": 1.5988772782352858, "learning_rate": 2.3996764232693237e-06, "loss": 0.3706, "step": 18600 }, { "epoch": 0.9919778257509129, "grad_norm": 1.6918062581514597, "learning_rate": 2.398971021042524e-06, "loss": 0.3663, "step": 18610 }, { "epoch": 0.9925108605847393, "grad_norm": 1.5502574966642315, "learning_rate": 2.398265308438362e-06, "loss": 0.3567, "step": 18620 }, { "epoch": 0.9930438954185656, "grad_norm": 1.657632474364943, "learning_rate": 2.3975592857004926e-06, "loss": 0.357, "step": 18630 }, { "epoch": 0.993576930252392, "grad_norm": 1.6473630177735619, "learning_rate": 2.3968529530726748e-06, "loss": 0.3791, "step": 18640 }, { "epoch": 0.9941099650862184, "grad_norm": 1.5931069797319237, "learning_rate": 2.3961463107987757e-06, "loss": 0.3675, "step": 18650 }, { "epoch": 0.9946429999200448, "grad_norm": 1.7627140795985976, "learning_rate": 2.395439359122771e-06, "loss": 0.3612, "step": 18660 }, { "epoch": 0.9951760347538712, "grad_norm": 1.6035597605909455, "learning_rate": 2.3947320982887403e-06, "loss": 0.3692, "step": 18670 }, { "epoch": 0.9957090695876976, "grad_norm": 1.533608065769785, "learning_rate": 2.394024528540873e-06, "loss": 0.3549, "step": 18680 }, { "epoch": 0.9962421044215239, "grad_norm": 1.5374536888105628, "learning_rate": 2.3933166501234615e-06, "loss": 0.3696, "step": 18690 }, { "epoch": 0.9967751392553503, "grad_norm": 1.54924377405105, "learning_rate": 2.3926084632809084e-06, "loss": 0.3672, "step": 18700 }, { "epoch": 0.9973081740891767, "grad_norm": 1.565844385793466, "learning_rate": 2.391899968257721e-06, "loss": 0.3466, "step": 18710 }, { "epoch": 0.9978412089230031, "grad_norm": 1.5983914218659367, "learning_rate": 2.3911911652985127e-06, "loss": 0.3485, "step": 18720 }, { "epoch": 0.9983742437568295, "grad_norm": 1.5797941383572287, "learning_rate": 2.3904820546480036e-06, "loss": 0.3513, "step": 18730 }, { "epoch": 0.9989072785906559, "grad_norm": 1.5663481196163236, "learning_rate": 2.3897726365510214e-06, "loss": 0.3625, "step": 18740 }, { "epoch": 0.9994403134244823, "grad_norm": 1.5840773697615038, "learning_rate": 2.3890629112524967e-06, "loss": 0.3526, "step": 18750 }, { "epoch": 0.9999733482583086, "grad_norm": 1.6273337690743201, "learning_rate": 2.38835287899747e-06, "loss": 0.3572, "step": 18760 }, { "epoch": 1.000506383092135, "grad_norm": 1.7185577385814188, "learning_rate": 2.3876425400310852e-06, "loss": 0.3197, "step": 18770 }, { "epoch": 1.0010394179259614, "grad_norm": 1.4672795083447345, "learning_rate": 2.3869318945985934e-06, "loss": 0.3064, "step": 18780 }, { "epoch": 1.0015724527597878, "grad_norm": 1.6578994929501827, "learning_rate": 2.38622094294535e-06, "loss": 0.3204, "step": 18790 }, { "epoch": 1.0021054875936142, "grad_norm": 1.6202311086614796, "learning_rate": 2.385509685316817e-06, "loss": 0.3102, "step": 18800 }, { "epoch": 1.0026385224274406, "grad_norm": 1.7411381826142234, "learning_rate": 2.3847981219585633e-06, "loss": 0.3166, "step": 18810 }, { "epoch": 1.003171557261267, "grad_norm": 1.7111186110108108, "learning_rate": 2.3840862531162614e-06, "loss": 0.3263, "step": 18820 }, { "epoch": 1.0037045920950933, "grad_norm": 1.6358968238976415, "learning_rate": 2.3833740790356897e-06, "loss": 0.3092, "step": 18830 }, { "epoch": 1.0042376269289197, "grad_norm": 1.7713080472771519, "learning_rate": 2.3826615999627336e-06, "loss": 0.3116, "step": 18840 }, { "epoch": 1.004770661762746, "grad_norm": 1.54931472801709, "learning_rate": 2.3819488161433816e-06, "loss": 0.3107, "step": 18850 }, { "epoch": 1.0053036965965725, "grad_norm": 1.6212523398451233, "learning_rate": 2.381235727823728e-06, "loss": 0.3154, "step": 18860 }, { "epoch": 1.0058367314303989, "grad_norm": 1.6009910761940318, "learning_rate": 2.3805223352499735e-06, "loss": 0.3146, "step": 18870 }, { "epoch": 1.0063697662642255, "grad_norm": 1.693460271570902, "learning_rate": 2.3798086386684223e-06, "loss": 0.3236, "step": 18880 }, { "epoch": 1.0069028010980519, "grad_norm": 1.6340783931107634, "learning_rate": 2.3790946383254847e-06, "loss": 0.3059, "step": 18890 }, { "epoch": 1.0074358359318782, "grad_norm": 1.5458288811540937, "learning_rate": 2.3783803344676753e-06, "loss": 0.3157, "step": 18900 }, { "epoch": 1.0079688707657046, "grad_norm": 1.6652450212515932, "learning_rate": 2.3776657273416126e-06, "loss": 0.3249, "step": 18910 }, { "epoch": 1.008501905599531, "grad_norm": 1.593818177263969, "learning_rate": 2.3769508171940223e-06, "loss": 0.308, "step": 18920 }, { "epoch": 1.0090349404333574, "grad_norm": 1.5977494921414706, "learning_rate": 2.376235604271732e-06, "loss": 0.3096, "step": 18930 }, { "epoch": 1.0095679752671838, "grad_norm": 1.5088384012401328, "learning_rate": 2.3755200888216753e-06, "loss": 0.3099, "step": 18940 }, { "epoch": 1.0101010101010102, "grad_norm": 1.7601843230682628, "learning_rate": 2.3748042710908905e-06, "loss": 0.3118, "step": 18950 }, { "epoch": 1.0106340449348366, "grad_norm": 1.5501196543875728, "learning_rate": 2.374088151326519e-06, "loss": 0.3274, "step": 18960 }, { "epoch": 1.011167079768663, "grad_norm": 1.6502996693443244, "learning_rate": 2.3733717297758075e-06, "loss": 0.3033, "step": 18970 }, { "epoch": 1.0117001146024893, "grad_norm": 1.4951842464731067, "learning_rate": 2.372655006686107e-06, "loss": 0.3145, "step": 18980 }, { "epoch": 1.0122331494363157, "grad_norm": 1.5839191501728258, "learning_rate": 2.3719379823048718e-06, "loss": 0.3238, "step": 18990 }, { "epoch": 1.012766184270142, "grad_norm": 1.6237552148984358, "learning_rate": 2.37122065687966e-06, "loss": 0.306, "step": 19000 }, { "epoch": 1.0132992191039685, "grad_norm": 1.6308397511300092, "learning_rate": 2.370503030658136e-06, "loss": 0.307, "step": 19010 }, { "epoch": 1.0138322539377949, "grad_norm": 1.695769860890057, "learning_rate": 2.369785103888065e-06, "loss": 0.3249, "step": 19020 }, { "epoch": 1.0143652887716212, "grad_norm": 1.5964506225425257, "learning_rate": 2.369066876817317e-06, "loss": 0.304, "step": 19030 }, { "epoch": 1.0148983236054476, "grad_norm": 1.8266405020677607, "learning_rate": 2.368348349693867e-06, "loss": 0.3181, "step": 19040 }, { "epoch": 1.015431358439274, "grad_norm": 1.7223808238330438, "learning_rate": 2.3676295227657925e-06, "loss": 0.3091, "step": 19050 }, { "epoch": 1.0159643932731004, "grad_norm": 1.692758669499585, "learning_rate": 2.3669103962812744e-06, "loss": 0.3138, "step": 19060 }, { "epoch": 1.0164974281069268, "grad_norm": 1.5167307661108342, "learning_rate": 2.3661909704885965e-06, "loss": 0.3147, "step": 19070 }, { "epoch": 1.0170304629407532, "grad_norm": 1.5184974066061596, "learning_rate": 2.3654712456361474e-06, "loss": 0.3069, "step": 19080 }, { "epoch": 1.0175634977745796, "grad_norm": 1.751316879862646, "learning_rate": 2.364751221972418e-06, "loss": 0.3034, "step": 19090 }, { "epoch": 1.018096532608406, "grad_norm": 1.7597166150123467, "learning_rate": 2.3640308997460028e-06, "loss": 0.306, "step": 19100 }, { "epoch": 1.0186295674422323, "grad_norm": 1.724748724320993, "learning_rate": 2.363310279205599e-06, "loss": 0.319, "step": 19110 }, { "epoch": 1.0191626022760587, "grad_norm": 1.6481963472818624, "learning_rate": 2.362589360600007e-06, "loss": 0.3117, "step": 19120 }, { "epoch": 1.019695637109885, "grad_norm": 1.7116242445127332, "learning_rate": 2.361868144178129e-06, "loss": 0.3159, "step": 19130 }, { "epoch": 1.0202286719437115, "grad_norm": 1.5986354262012121, "learning_rate": 2.361146630188973e-06, "loss": 0.3041, "step": 19140 }, { "epoch": 1.0207617067775379, "grad_norm": 1.6965889643844392, "learning_rate": 2.3604248188816473e-06, "loss": 0.3108, "step": 19150 }, { "epoch": 1.0212947416113642, "grad_norm": 1.6781525876036727, "learning_rate": 2.3597027105053622e-06, "loss": 0.3141, "step": 19160 }, { "epoch": 1.0218277764451906, "grad_norm": 2.000216796698841, "learning_rate": 2.3589803053094324e-06, "loss": 0.3113, "step": 19170 }, { "epoch": 1.022360811279017, "grad_norm": 1.6150207837547592, "learning_rate": 2.3582576035432746e-06, "loss": 0.3009, "step": 19180 }, { "epoch": 1.0228938461128434, "grad_norm": 1.5035315637901694, "learning_rate": 2.3575346054564075e-06, "loss": 0.311, "step": 19190 }, { "epoch": 1.0234268809466698, "grad_norm": 1.617097767592438, "learning_rate": 2.356811311298453e-06, "loss": 0.3107, "step": 19200 }, { "epoch": 1.0239599157804962, "grad_norm": 1.6109057529637225, "learning_rate": 2.3560877213191332e-06, "loss": 0.3131, "step": 19210 }, { "epoch": 1.0244929506143226, "grad_norm": 1.6197081334960173, "learning_rate": 2.3553638357682747e-06, "loss": 0.3018, "step": 19220 }, { "epoch": 1.025025985448149, "grad_norm": 1.5921502789122441, "learning_rate": 2.354639654895805e-06, "loss": 0.3012, "step": 19230 }, { "epoch": 1.0255590202819753, "grad_norm": 1.6044360167651812, "learning_rate": 2.353915178951753e-06, "loss": 0.3173, "step": 19240 }, { "epoch": 1.026092055115802, "grad_norm": 1.5173977729856618, "learning_rate": 2.3531904081862512e-06, "loss": 0.3056, "step": 19250 }, { "epoch": 1.0266250899496283, "grad_norm": 1.5361598436139567, "learning_rate": 2.352465342849532e-06, "loss": 0.3089, "step": 19260 }, { "epoch": 1.0271581247834547, "grad_norm": 1.7125955590421769, "learning_rate": 2.351739983191931e-06, "loss": 0.3136, "step": 19270 }, { "epoch": 1.027691159617281, "grad_norm": 1.6376777345092481, "learning_rate": 2.351014329463884e-06, "loss": 0.3031, "step": 19280 }, { "epoch": 1.0282241944511075, "grad_norm": 1.6857913781260123, "learning_rate": 2.35028838191593e-06, "loss": 0.3021, "step": 19290 }, { "epoch": 1.0287572292849338, "grad_norm": 1.8233865996276521, "learning_rate": 2.349562140798708e-06, "loss": 0.3138, "step": 19300 }, { "epoch": 1.0292902641187602, "grad_norm": 1.7449481445071577, "learning_rate": 2.3488356063629588e-06, "loss": 0.3171, "step": 19310 }, { "epoch": 1.0298232989525866, "grad_norm": 1.6815181872052598, "learning_rate": 2.3481087788595244e-06, "loss": 0.3109, "step": 19320 }, { "epoch": 1.030356333786413, "grad_norm": 1.6453996080077002, "learning_rate": 2.347381658539349e-06, "loss": 0.3146, "step": 19330 }, { "epoch": 1.0308893686202394, "grad_norm": 1.6689190716130082, "learning_rate": 2.3466542456534764e-06, "loss": 0.3147, "step": 19340 }, { "epoch": 1.0314224034540658, "grad_norm": 1.5376012777142196, "learning_rate": 2.345926540453052e-06, "loss": 0.3056, "step": 19350 }, { "epoch": 1.0319554382878922, "grad_norm": 1.5967960069280163, "learning_rate": 2.3451985431893228e-06, "loss": 0.3167, "step": 19360 }, { "epoch": 1.0324884731217185, "grad_norm": 1.7716987549981884, "learning_rate": 2.3444702541136354e-06, "loss": 0.3082, "step": 19370 }, { "epoch": 1.033021507955545, "grad_norm": 1.74645380911529, "learning_rate": 2.343741673477438e-06, "loss": 0.3137, "step": 19380 }, { "epoch": 1.0335545427893713, "grad_norm": 1.5213147413735495, "learning_rate": 2.3430128015322793e-06, "loss": 0.3076, "step": 19390 }, { "epoch": 1.0340875776231977, "grad_norm": 1.621692540015206, "learning_rate": 2.3422836385298083e-06, "loss": 0.3136, "step": 19400 }, { "epoch": 1.034620612457024, "grad_norm": 1.5393512097869018, "learning_rate": 2.3415541847217745e-06, "loss": 0.3146, "step": 19410 }, { "epoch": 1.0351536472908505, "grad_norm": 1.5225064430863735, "learning_rate": 2.340824440360029e-06, "loss": 0.3056, "step": 19420 }, { "epoch": 1.0356866821246768, "grad_norm": 1.8123159853874484, "learning_rate": 2.3400944056965217e-06, "loss": 0.3167, "step": 19430 }, { "epoch": 1.0362197169585032, "grad_norm": 1.7187955329348583, "learning_rate": 2.3393640809833025e-06, "loss": 0.3141, "step": 19440 }, { "epoch": 1.0367527517923296, "grad_norm": 1.59679186854062, "learning_rate": 2.3386334664725237e-06, "loss": 0.2993, "step": 19450 }, { "epoch": 1.037285786626156, "grad_norm": 1.6695201115464604, "learning_rate": 2.337902562416434e-06, "loss": 0.3201, "step": 19460 }, { "epoch": 1.0378188214599824, "grad_norm": 1.5553969990181506, "learning_rate": 2.337171369067386e-06, "loss": 0.3011, "step": 19470 }, { "epoch": 1.0383518562938088, "grad_norm": 1.6465971085509254, "learning_rate": 2.3364398866778303e-06, "loss": 0.3101, "step": 19480 }, { "epoch": 1.0388848911276352, "grad_norm": 1.7314529715906022, "learning_rate": 2.335708115500317e-06, "loss": 0.3089, "step": 19490 }, { "epoch": 1.0394179259614615, "grad_norm": 1.5774253111142744, "learning_rate": 2.334976055787496e-06, "loss": 0.3052, "step": 19500 }, { "epoch": 1.039950960795288, "grad_norm": 1.6174879057925393, "learning_rate": 2.3342437077921175e-06, "loss": 0.3205, "step": 19510 }, { "epoch": 1.0404839956291143, "grad_norm": 1.809457281242797, "learning_rate": 2.333511071767031e-06, "loss": 0.3014, "step": 19520 }, { "epoch": 1.0410170304629407, "grad_norm": 1.4083336884286384, "learning_rate": 2.3327781479651847e-06, "loss": 0.2862, "step": 19530 }, { "epoch": 1.041550065296767, "grad_norm": 1.5764674355674486, "learning_rate": 2.332044936639627e-06, "loss": 0.32, "step": 19540 }, { "epoch": 1.0420831001305935, "grad_norm": 1.6602003856524954, "learning_rate": 2.3313114380435055e-06, "loss": 0.315, "step": 19550 }, { "epoch": 1.0426161349644198, "grad_norm": 1.6676535847675475, "learning_rate": 2.3305776524300666e-06, "loss": 0.3058, "step": 19560 }, { "epoch": 1.0431491697982462, "grad_norm": 1.8072315645482462, "learning_rate": 2.3298435800526565e-06, "loss": 0.3038, "step": 19570 }, { "epoch": 1.0436822046320726, "grad_norm": 1.7604445798192565, "learning_rate": 2.3291092211647193e-06, "loss": 0.3078, "step": 19580 }, { "epoch": 1.044215239465899, "grad_norm": 1.8702712758079347, "learning_rate": 2.328374576019799e-06, "loss": 0.3145, "step": 19590 }, { "epoch": 1.0447482742997254, "grad_norm": 1.4996388808956578, "learning_rate": 2.327639644871538e-06, "loss": 0.3096, "step": 19600 }, { "epoch": 1.0452813091335518, "grad_norm": 1.6064118336755877, "learning_rate": 2.3269044279736773e-06, "loss": 0.2993, "step": 19610 }, { "epoch": 1.0458143439673782, "grad_norm": 1.6946610163064237, "learning_rate": 2.3261689255800567e-06, "loss": 0.3162, "step": 19620 }, { "epoch": 1.0463473788012048, "grad_norm": 1.5171742988218002, "learning_rate": 2.325433137944615e-06, "loss": 0.3135, "step": 19630 }, { "epoch": 1.0468804136350311, "grad_norm": 1.681952371397534, "learning_rate": 2.324697065321389e-06, "loss": 0.311, "step": 19640 }, { "epoch": 1.0474134484688575, "grad_norm": 1.6643104263190116, "learning_rate": 2.323960707964514e-06, "loss": 0.3046, "step": 19650 }, { "epoch": 1.047946483302684, "grad_norm": 1.6210342981082129, "learning_rate": 2.3232240661282237e-06, "loss": 0.3028, "step": 19660 }, { "epoch": 1.0484795181365103, "grad_norm": 1.9197904912037602, "learning_rate": 2.3224871400668493e-06, "loss": 0.315, "step": 19670 }, { "epoch": 1.0490125529703367, "grad_norm": 1.7093300384936556, "learning_rate": 2.3217499300348213e-06, "loss": 0.3085, "step": 19680 }, { "epoch": 1.049545587804163, "grad_norm": 1.6590430355043468, "learning_rate": 2.3210124362866682e-06, "loss": 0.3079, "step": 19690 }, { "epoch": 1.0500786226379895, "grad_norm": 1.7439279113739472, "learning_rate": 2.320274659077015e-06, "loss": 0.316, "step": 19700 }, { "epoch": 1.0506116574718158, "grad_norm": 1.733935189703632, "learning_rate": 2.319536598660586e-06, "loss": 0.3076, "step": 19710 }, { "epoch": 1.0511446923056422, "grad_norm": 1.6608416566934736, "learning_rate": 2.3187982552922033e-06, "loss": 0.3063, "step": 19720 }, { "epoch": 1.0516777271394686, "grad_norm": 1.8158532269444727, "learning_rate": 2.3180596292267853e-06, "loss": 0.3189, "step": 19730 }, { "epoch": 1.052210761973295, "grad_norm": 1.764521685337587, "learning_rate": 2.317320720719349e-06, "loss": 0.3152, "step": 19740 }, { "epoch": 1.0527437968071214, "grad_norm": 1.756732379108052, "learning_rate": 2.31658153002501e-06, "loss": 0.3132, "step": 19750 }, { "epoch": 1.0532768316409478, "grad_norm": 1.537573893676739, "learning_rate": 2.3158420573989783e-06, "loss": 0.3108, "step": 19760 }, { "epoch": 1.0538098664747741, "grad_norm": 1.68342688875765, "learning_rate": 2.3151023030965644e-06, "loss": 0.3125, "step": 19770 }, { "epoch": 1.0543429013086005, "grad_norm": 1.7489867022687386, "learning_rate": 2.3143622673731746e-06, "loss": 0.3075, "step": 19780 }, { "epoch": 1.054875936142427, "grad_norm": 1.986931380085191, "learning_rate": 2.3136219504843122e-06, "loss": 0.3093, "step": 19790 }, { "epoch": 1.0554089709762533, "grad_norm": 1.5481125556801067, "learning_rate": 2.312881352685578e-06, "loss": 0.2974, "step": 19800 }, { "epoch": 1.0559420058100797, "grad_norm": 1.6705110735201283, "learning_rate": 2.3121404742326697e-06, "loss": 0.3091, "step": 19810 }, { "epoch": 1.056475040643906, "grad_norm": 1.5946087149307544, "learning_rate": 2.3113993153813814e-06, "loss": 0.3018, "step": 19820 }, { "epoch": 1.0570080754777325, "grad_norm": 1.4459937441736979, "learning_rate": 2.3106578763876055e-06, "loss": 0.3109, "step": 19830 }, { "epoch": 1.0575411103115588, "grad_norm": 1.7260057890441056, "learning_rate": 2.309916157507329e-06, "loss": 0.3067, "step": 19840 }, { "epoch": 1.0580741451453852, "grad_norm": 1.7412517385452415, "learning_rate": 2.309174158996637e-06, "loss": 0.3133, "step": 19850 }, { "epoch": 1.0586071799792116, "grad_norm": 1.7765175839440783, "learning_rate": 2.3084318811117115e-06, "loss": 0.3151, "step": 19860 }, { "epoch": 1.059140214813038, "grad_norm": 1.7349533089906835, "learning_rate": 2.307689324108829e-06, "loss": 0.2978, "step": 19870 }, { "epoch": 1.0596732496468644, "grad_norm": 1.6101318274152165, "learning_rate": 2.3069464882443643e-06, "loss": 0.31, "step": 19880 }, { "epoch": 1.0602062844806908, "grad_norm": 1.8430313961496845, "learning_rate": 2.306203373774788e-06, "loss": 0.3198, "step": 19890 }, { "epoch": 1.0607393193145171, "grad_norm": 1.5926851698217748, "learning_rate": 2.305459980956667e-06, "loss": 0.3249, "step": 19900 }, { "epoch": 1.0612723541483435, "grad_norm": 1.750694904810236, "learning_rate": 2.304716310046662e-06, "loss": 0.3136, "step": 19910 }, { "epoch": 1.06180538898217, "grad_norm": 1.6243073105838994, "learning_rate": 2.3039723613015335e-06, "loss": 0.3089, "step": 19920 }, { "epoch": 1.0623384238159963, "grad_norm": 1.5534089185294786, "learning_rate": 2.3032281349781355e-06, "loss": 0.3065, "step": 19930 }, { "epoch": 1.0628714586498227, "grad_norm": 1.824439581254945, "learning_rate": 2.302483631333419e-06, "loss": 0.3164, "step": 19940 }, { "epoch": 1.063404493483649, "grad_norm": 1.6199443334732788, "learning_rate": 2.3017388506244293e-06, "loss": 0.315, "step": 19950 }, { "epoch": 1.0639375283174755, "grad_norm": 1.6000992674164802, "learning_rate": 2.3009937931083083e-06, "loss": 0.323, "step": 19960 }, { "epoch": 1.0644705631513018, "grad_norm": 1.801001843022137, "learning_rate": 2.3002484590422946e-06, "loss": 0.3061, "step": 19970 }, { "epoch": 1.0650035979851282, "grad_norm": 1.5457269985803401, "learning_rate": 2.29950284868372e-06, "loss": 0.3172, "step": 19980 }, { "epoch": 1.0655366328189548, "grad_norm": 1.5899898759268671, "learning_rate": 2.2987569622900134e-06, "loss": 0.3076, "step": 19990 }, { "epoch": 1.0660696676527812, "grad_norm": 1.5605886239159608, "learning_rate": 2.2980108001186978e-06, "loss": 0.3123, "step": 20000 }, { "epoch": 1.0666027024866076, "grad_norm": 1.4545797018638003, "learning_rate": 2.297264362427392e-06, "loss": 0.3165, "step": 20010 }, { "epoch": 1.067135737320434, "grad_norm": 1.586730670029662, "learning_rate": 2.2965176494738107e-06, "loss": 0.316, "step": 20020 }, { "epoch": 1.0676687721542604, "grad_norm": 1.61818578088921, "learning_rate": 2.2957706615157625e-06, "loss": 0.3105, "step": 20030 }, { "epoch": 1.0682018069880868, "grad_norm": 1.6082334666108968, "learning_rate": 2.295023398811151e-06, "loss": 0.3069, "step": 20040 }, { "epoch": 1.0687348418219131, "grad_norm": 1.758159498075471, "learning_rate": 2.294275861617975e-06, "loss": 0.3024, "step": 20050 }, { "epoch": 1.0692678766557395, "grad_norm": 1.5892949055954335, "learning_rate": 2.2935280501943285e-06, "loss": 0.3193, "step": 20060 }, { "epoch": 1.069800911489566, "grad_norm": 1.6830657737207504, "learning_rate": 2.2927799647983998e-06, "loss": 0.3008, "step": 20070 }, { "epoch": 1.0703339463233923, "grad_norm": 1.6309903623130078, "learning_rate": 2.292031605688471e-06, "loss": 0.3082, "step": 20080 }, { "epoch": 1.0708669811572187, "grad_norm": 1.6944918118515484, "learning_rate": 2.29128297312292e-06, "loss": 0.3055, "step": 20090 }, { "epoch": 1.071400015991045, "grad_norm": 1.6289333482834578, "learning_rate": 2.2905340673602185e-06, "loss": 0.313, "step": 20100 }, { "epoch": 1.0719330508248714, "grad_norm": 1.6392344620667187, "learning_rate": 2.289784888658932e-06, "loss": 0.3005, "step": 20110 }, { "epoch": 1.0724660856586978, "grad_norm": 1.8524108200767249, "learning_rate": 2.289035437277722e-06, "loss": 0.3152, "step": 20120 }, { "epoch": 1.0729991204925242, "grad_norm": 1.5796125723740158, "learning_rate": 2.2882857134753417e-06, "loss": 0.3262, "step": 20130 }, { "epoch": 1.0735321553263506, "grad_norm": 1.753728939535228, "learning_rate": 2.2875357175106397e-06, "loss": 0.3121, "step": 20140 }, { "epoch": 1.074065190160177, "grad_norm": 1.5623208561509776, "learning_rate": 2.2867854496425595e-06, "loss": 0.322, "step": 20150 }, { "epoch": 1.0745982249940034, "grad_norm": 1.5325756892143907, "learning_rate": 2.286034910130136e-06, "loss": 0.3021, "step": 20160 }, { "epoch": 1.0751312598278298, "grad_norm": 1.7761690651214683, "learning_rate": 2.285284099232501e-06, "loss": 0.2988, "step": 20170 }, { "epoch": 1.0756642946616561, "grad_norm": 1.5483310944763116, "learning_rate": 2.284533017208877e-06, "loss": 0.3045, "step": 20180 }, { "epoch": 1.0761973294954825, "grad_norm": 1.5443949582015404, "learning_rate": 2.283781664318582e-06, "loss": 0.3164, "step": 20190 }, { "epoch": 1.076730364329309, "grad_norm": 1.6396497040544773, "learning_rate": 2.2830300408210266e-06, "loss": 0.3058, "step": 20200 }, { "epoch": 1.0772633991631353, "grad_norm": 1.554129101309212, "learning_rate": 2.2822781469757158e-06, "loss": 0.3134, "step": 20210 }, { "epoch": 1.0777964339969617, "grad_norm": 1.7526946278918778, "learning_rate": 2.281525983042246e-06, "loss": 0.3017, "step": 20220 }, { "epoch": 1.078329468830788, "grad_norm": 1.744192157689558, "learning_rate": 2.28077354928031e-06, "loss": 0.3051, "step": 20230 }, { "epoch": 1.0788625036646144, "grad_norm": 1.4976672640563824, "learning_rate": 2.2800208459496907e-06, "loss": 0.3107, "step": 20240 }, { "epoch": 1.0793955384984408, "grad_norm": 1.6659791336010934, "learning_rate": 2.279267873310266e-06, "loss": 0.3091, "step": 20250 }, { "epoch": 1.0799285733322672, "grad_norm": 1.5970576389357873, "learning_rate": 2.2785146316220048e-06, "loss": 0.3143, "step": 20260 }, { "epoch": 1.0804616081660936, "grad_norm": 1.6284122566142822, "learning_rate": 2.2777611211449713e-06, "loss": 0.3177, "step": 20270 }, { "epoch": 1.08099464299992, "grad_norm": 1.6316189260615348, "learning_rate": 2.2770073421393214e-06, "loss": 0.2989, "step": 20280 }, { "epoch": 1.0815276778337464, "grad_norm": 1.5986144293097817, "learning_rate": 2.2762532948653033e-06, "loss": 0.317, "step": 20290 }, { "epoch": 1.0820607126675728, "grad_norm": 1.5181692529740152, "learning_rate": 2.2754989795832587e-06, "loss": 0.3162, "step": 20300 }, { "epoch": 1.0825937475013991, "grad_norm": 1.627712541610012, "learning_rate": 2.274744396553621e-06, "loss": 0.3043, "step": 20310 }, { "epoch": 1.0831267823352255, "grad_norm": 1.8331087744547756, "learning_rate": 2.2739895460369155e-06, "loss": 0.3184, "step": 20320 }, { "epoch": 1.083659817169052, "grad_norm": 1.6356187656274184, "learning_rate": 2.273234428293762e-06, "loss": 0.3033, "step": 20330 }, { "epoch": 1.0841928520028783, "grad_norm": 1.489854250295549, "learning_rate": 2.2724790435848718e-06, "loss": 0.3063, "step": 20340 }, { "epoch": 1.0847258868367047, "grad_norm": 1.4351976874950956, "learning_rate": 2.271723392171046e-06, "loss": 0.3143, "step": 20350 }, { "epoch": 1.085258921670531, "grad_norm": 1.703422703327441, "learning_rate": 2.2709674743131806e-06, "loss": 0.3151, "step": 20360 }, { "epoch": 1.0857919565043574, "grad_norm": 1.7045448455070467, "learning_rate": 2.270211290272263e-06, "loss": 0.3087, "step": 20370 }, { "epoch": 1.086324991338184, "grad_norm": 1.7436578222576975, "learning_rate": 2.2694548403093717e-06, "loss": 0.3244, "step": 20380 }, { "epoch": 1.0868580261720104, "grad_norm": 1.6890954265416962, "learning_rate": 2.2686981246856778e-06, "loss": 0.3111, "step": 20390 }, { "epoch": 1.0873910610058368, "grad_norm": 1.7061659818947192, "learning_rate": 2.267941143662443e-06, "loss": 0.3133, "step": 20400 }, { "epoch": 1.0879240958396632, "grad_norm": 1.5839361067641207, "learning_rate": 2.2671838975010225e-06, "loss": 0.3042, "step": 20410 }, { "epoch": 1.0884571306734896, "grad_norm": 1.5942352715656116, "learning_rate": 2.266426386462862e-06, "loss": 0.3103, "step": 20420 }, { "epoch": 1.088990165507316, "grad_norm": 1.6642037967323897, "learning_rate": 2.2656686108094967e-06, "loss": 0.3032, "step": 20430 }, { "epoch": 1.0895232003411424, "grad_norm": 1.7016467213703885, "learning_rate": 2.264910570802557e-06, "loss": 0.305, "step": 20440 }, { "epoch": 1.0900562351749687, "grad_norm": 1.7196687873703265, "learning_rate": 2.264152266703762e-06, "loss": 0.3137, "step": 20450 }, { "epoch": 1.0905892700087951, "grad_norm": 1.6065413791228573, "learning_rate": 2.263393698774923e-06, "loss": 0.3119, "step": 20460 }, { "epoch": 1.0911223048426215, "grad_norm": 1.8610955070531345, "learning_rate": 2.2626348672779413e-06, "loss": 0.3163, "step": 20470 }, { "epoch": 1.091655339676448, "grad_norm": 1.4764983834948127, "learning_rate": 2.26187577247481e-06, "loss": 0.3091, "step": 20480 }, { "epoch": 1.0921883745102743, "grad_norm": 1.6021245669401387, "learning_rate": 2.261116414627613e-06, "loss": 0.3219, "step": 20490 }, { "epoch": 1.0927214093441007, "grad_norm": 1.6533767859742208, "learning_rate": 2.2603567939985263e-06, "loss": 0.2912, "step": 20500 }, { "epoch": 1.093254444177927, "grad_norm": 1.7035475743547959, "learning_rate": 2.259596910849814e-06, "loss": 0.3035, "step": 20510 }, { "epoch": 1.0937874790117534, "grad_norm": 1.6508501878264963, "learning_rate": 2.2588367654438323e-06, "loss": 0.2991, "step": 20520 }, { "epoch": 1.0943205138455798, "grad_norm": 1.5718945916959757, "learning_rate": 2.2580763580430285e-06, "loss": 0.3087, "step": 20530 }, { "epoch": 1.0948535486794062, "grad_norm": 1.6251940722322025, "learning_rate": 2.2573156889099384e-06, "loss": 0.3073, "step": 20540 }, { "epoch": 1.0953865835132326, "grad_norm": 1.653377268999388, "learning_rate": 2.2565547583071914e-06, "loss": 0.3101, "step": 20550 }, { "epoch": 1.095919618347059, "grad_norm": 1.75462182597595, "learning_rate": 2.2557935664975033e-06, "loss": 0.307, "step": 20560 }, { "epoch": 1.0964526531808854, "grad_norm": 1.5769222679718922, "learning_rate": 2.2550321137436834e-06, "loss": 0.2991, "step": 20570 }, { "epoch": 1.0969856880147117, "grad_norm": 1.5746337490387257, "learning_rate": 2.254270400308629e-06, "loss": 0.3212, "step": 20580 }, { "epoch": 1.0975187228485381, "grad_norm": 1.6480521242495094, "learning_rate": 2.253508426455329e-06, "loss": 0.3067, "step": 20590 }, { "epoch": 1.0980517576823645, "grad_norm": 1.7058446812952348, "learning_rate": 2.25274619244686e-06, "loss": 0.3131, "step": 20600 }, { "epoch": 1.098584792516191, "grad_norm": 1.7460479668951798, "learning_rate": 2.25198369854639e-06, "loss": 0.3067, "step": 20610 }, { "epoch": 1.0991178273500173, "grad_norm": 1.8632265859307715, "learning_rate": 2.251220945017178e-06, "loss": 0.3141, "step": 20620 }, { "epoch": 1.0996508621838437, "grad_norm": 1.8075011489080604, "learning_rate": 2.2504579321225696e-06, "loss": 0.3167, "step": 20630 }, { "epoch": 1.10018389701767, "grad_norm": 1.7099966301645766, "learning_rate": 2.2496946601260016e-06, "loss": 0.2998, "step": 20640 }, { "epoch": 1.1007169318514964, "grad_norm": 1.7027089587326318, "learning_rate": 2.2489311292910006e-06, "loss": 0.3033, "step": 20650 }, { "epoch": 1.1012499666853228, "grad_norm": 1.5919803688603003, "learning_rate": 2.2481673398811816e-06, "loss": 0.3038, "step": 20660 }, { "epoch": 1.1017830015191492, "grad_norm": 1.6024281162470557, "learning_rate": 2.24740329216025e-06, "loss": 0.3034, "step": 20670 }, { "epoch": 1.1023160363529756, "grad_norm": 1.6126467574543146, "learning_rate": 2.2466389863919996e-06, "loss": 0.3205, "step": 20680 }, { "epoch": 1.102849071186802, "grad_norm": 1.7827413541414465, "learning_rate": 2.2458744228403127e-06, "loss": 0.317, "step": 20690 }, { "epoch": 1.1033821060206284, "grad_norm": 1.7144444470739588, "learning_rate": 2.245109601769162e-06, "loss": 0.3227, "step": 20700 }, { "epoch": 1.1039151408544547, "grad_norm": 1.8014751200955856, "learning_rate": 2.2443445234426083e-06, "loss": 0.303, "step": 20710 }, { "epoch": 1.1044481756882811, "grad_norm": 1.5914755244129393, "learning_rate": 2.2435791881248017e-06, "loss": 0.3178, "step": 20720 }, { "epoch": 1.1049812105221077, "grad_norm": 1.5137246111318892, "learning_rate": 2.242813596079981e-06, "loss": 0.3102, "step": 20730 }, { "epoch": 1.1055142453559341, "grad_norm": 1.6801762479589506, "learning_rate": 2.2420477475724714e-06, "loss": 0.3113, "step": 20740 }, { "epoch": 1.1060472801897605, "grad_norm": 1.5590419708036913, "learning_rate": 2.241281642866691e-06, "loss": 0.3117, "step": 20750 }, { "epoch": 1.106580315023587, "grad_norm": 1.7898995494588794, "learning_rate": 2.240515282227142e-06, "loss": 0.3075, "step": 20760 }, { "epoch": 1.1071133498574133, "grad_norm": 1.6834613055252743, "learning_rate": 2.2397486659184185e-06, "loss": 0.303, "step": 20770 }, { "epoch": 1.1076463846912397, "grad_norm": 1.6889123149883707, "learning_rate": 2.2389817942052004e-06, "loss": 0.3089, "step": 20780 }, { "epoch": 1.108179419525066, "grad_norm": 1.7293717410097444, "learning_rate": 2.238214667352257e-06, "loss": 0.3155, "step": 20790 }, { "epoch": 1.1087124543588924, "grad_norm": 1.8093789039301587, "learning_rate": 2.237447285624445e-06, "loss": 0.2983, "step": 20800 }, { "epoch": 1.1092454891927188, "grad_norm": 1.5501234210844848, "learning_rate": 2.2366796492867095e-06, "loss": 0.3053, "step": 20810 }, { "epoch": 1.1097785240265452, "grad_norm": 1.5685323574540726, "learning_rate": 2.2359117586040837e-06, "loss": 0.3109, "step": 20820 }, { "epoch": 1.1103115588603716, "grad_norm": 1.7710444058403545, "learning_rate": 2.2351436138416883e-06, "loss": 0.3161, "step": 20830 }, { "epoch": 1.110844593694198, "grad_norm": 1.6265566997745051, "learning_rate": 2.2343752152647314e-06, "loss": 0.298, "step": 20840 }, { "epoch": 1.1113776285280244, "grad_norm": 1.7343040660344997, "learning_rate": 2.2336065631385097e-06, "loss": 0.3221, "step": 20850 }, { "epoch": 1.1119106633618507, "grad_norm": 1.470688582654047, "learning_rate": 2.2328376577284066e-06, "loss": 0.3083, "step": 20860 }, { "epoch": 1.1124436981956771, "grad_norm": 1.4296680219315556, "learning_rate": 2.232068499299893e-06, "loss": 0.3165, "step": 20870 }, { "epoch": 1.1129767330295035, "grad_norm": 1.676832282763617, "learning_rate": 2.2312990881185277e-06, "loss": 0.2973, "step": 20880 }, { "epoch": 1.11350976786333, "grad_norm": 1.774768679716971, "learning_rate": 2.2305294244499563e-06, "loss": 0.3058, "step": 20890 }, { "epoch": 1.1140428026971563, "grad_norm": 1.7799279652979498, "learning_rate": 2.229759508559912e-06, "loss": 0.3076, "step": 20900 }, { "epoch": 1.1145758375309827, "grad_norm": 1.5867822428300116, "learning_rate": 2.228989340714214e-06, "loss": 0.3138, "step": 20910 }, { "epoch": 1.115108872364809, "grad_norm": 1.5487436829033223, "learning_rate": 2.2282189211787697e-06, "loss": 0.2968, "step": 20920 }, { "epoch": 1.1156419071986354, "grad_norm": 1.6873803592953087, "learning_rate": 2.2274482502195733e-06, "loss": 0.3136, "step": 20930 }, { "epoch": 1.1161749420324618, "grad_norm": 1.559447683810378, "learning_rate": 2.2266773281027054e-06, "loss": 0.3095, "step": 20940 }, { "epoch": 1.1167079768662882, "grad_norm": 1.775421677185727, "learning_rate": 2.225906155094333e-06, "loss": 0.318, "step": 20950 }, { "epoch": 1.1172410117001146, "grad_norm": 1.7386104982302553, "learning_rate": 2.22513473146071e-06, "loss": 0.3059, "step": 20960 }, { "epoch": 1.117774046533941, "grad_norm": 1.7534474388667118, "learning_rate": 2.2243630574681775e-06, "loss": 0.2964, "step": 20970 }, { "epoch": 1.1183070813677674, "grad_norm": 1.7102947030492215, "learning_rate": 2.2235911333831627e-06, "loss": 0.3003, "step": 20980 }, { "epoch": 1.1188401162015937, "grad_norm": 1.609490185760629, "learning_rate": 2.222818959472178e-06, "loss": 0.3153, "step": 20990 }, { "epoch": 1.1193731510354201, "grad_norm": 1.5880032628420742, "learning_rate": 2.222046536001823e-06, "loss": 0.3091, "step": 21000 }, { "epoch": 1.1199061858692465, "grad_norm": 1.6401802227458995, "learning_rate": 2.2212738632387845e-06, "loss": 0.3094, "step": 21010 }, { "epoch": 1.120439220703073, "grad_norm": 1.8675617159676652, "learning_rate": 2.220500941449834e-06, "loss": 0.3127, "step": 21020 }, { "epoch": 1.1209722555368993, "grad_norm": 1.7019986096359507, "learning_rate": 2.2197277709018286e-06, "loss": 0.3084, "step": 21030 }, { "epoch": 1.1215052903707257, "grad_norm": 1.80160179780082, "learning_rate": 2.2189543518617124e-06, "loss": 0.3103, "step": 21040 }, { "epoch": 1.122038325204552, "grad_norm": 1.491361860680009, "learning_rate": 2.218180684596515e-06, "loss": 0.3039, "step": 21050 }, { "epoch": 1.1225713600383784, "grad_norm": 1.742841298640953, "learning_rate": 2.217406769373351e-06, "loss": 0.299, "step": 21060 }, { "epoch": 1.1231043948722048, "grad_norm": 1.7785023317108535, "learning_rate": 2.216632606459422e-06, "loss": 0.3137, "step": 21070 }, { "epoch": 1.1236374297060312, "grad_norm": 1.5783916705654453, "learning_rate": 2.215858196122014e-06, "loss": 0.3125, "step": 21080 }, { "epoch": 1.1241704645398576, "grad_norm": 1.577594442437047, "learning_rate": 2.215083538628499e-06, "loss": 0.3166, "step": 21090 }, { "epoch": 1.124703499373684, "grad_norm": 1.7045582673880306, "learning_rate": 2.214308634246333e-06, "loss": 0.3101, "step": 21100 }, { "epoch": 1.1252365342075104, "grad_norm": 1.6790255485034211, "learning_rate": 2.21353348324306e-06, "loss": 0.3064, "step": 21110 }, { "epoch": 1.1257695690413367, "grad_norm": 1.6277292643713521, "learning_rate": 2.2127580858863057e-06, "loss": 0.3139, "step": 21120 }, { "epoch": 1.1263026038751631, "grad_norm": 1.6873977006034813, "learning_rate": 2.2119824424437836e-06, "loss": 0.3153, "step": 21130 }, { "epoch": 1.1268356387089897, "grad_norm": 1.728680549544708, "learning_rate": 2.211206553183291e-06, "loss": 0.3086, "step": 21140 }, { "epoch": 1.1273686735428161, "grad_norm": 1.632470745754458, "learning_rate": 2.2104304183727102e-06, "loss": 0.3076, "step": 21150 }, { "epoch": 1.1279017083766425, "grad_norm": 1.7190508575469043, "learning_rate": 2.2096540382800085e-06, "loss": 0.3162, "step": 21160 }, { "epoch": 1.1284347432104689, "grad_norm": 1.8658650127720948, "learning_rate": 2.2088774131732376e-06, "loss": 0.3135, "step": 21170 }, { "epoch": 1.1289677780442953, "grad_norm": 1.5545140142065954, "learning_rate": 2.2081005433205334e-06, "loss": 0.3149, "step": 21180 }, { "epoch": 1.1295008128781217, "grad_norm": 1.6608493215939488, "learning_rate": 2.207323428990118e-06, "loss": 0.3155, "step": 21190 }, { "epoch": 1.130033847711948, "grad_norm": 1.70239514678641, "learning_rate": 2.2065460704502954e-06, "loss": 0.315, "step": 21200 }, { "epoch": 1.1305668825457744, "grad_norm": 1.6496785874268263, "learning_rate": 2.2057684679694557e-06, "loss": 0.3025, "step": 21210 }, { "epoch": 1.1310999173796008, "grad_norm": 1.7550833561227488, "learning_rate": 2.2049906218160724e-06, "loss": 0.3075, "step": 21220 }, { "epoch": 1.1316329522134272, "grad_norm": 1.64706352637559, "learning_rate": 2.2042125322587048e-06, "loss": 0.3103, "step": 21230 }, { "epoch": 1.1321659870472536, "grad_norm": 1.687132715132537, "learning_rate": 2.2034341995659938e-06, "loss": 0.3158, "step": 21240 }, { "epoch": 1.13269902188108, "grad_norm": 1.6543614390022436, "learning_rate": 2.202655624006665e-06, "loss": 0.2985, "step": 21250 }, { "epoch": 1.1332320567149063, "grad_norm": 1.69337061810349, "learning_rate": 2.2018768058495292e-06, "loss": 0.3066, "step": 21260 }, { "epoch": 1.1337650915487327, "grad_norm": 1.5222096149297102, "learning_rate": 2.2010977453634794e-06, "loss": 0.3193, "step": 21270 }, { "epoch": 1.1342981263825591, "grad_norm": 1.622855621916122, "learning_rate": 2.2003184428174932e-06, "loss": 0.3034, "step": 21280 }, { "epoch": 1.1348311612163855, "grad_norm": 1.6956987624602966, "learning_rate": 2.1995388984806308e-06, "loss": 0.296, "step": 21290 }, { "epoch": 1.1353641960502119, "grad_norm": 1.6649515239504777, "learning_rate": 2.1987591126220368e-06, "loss": 0.3148, "step": 21300 }, { "epoch": 1.1358972308840383, "grad_norm": 1.8845503856479293, "learning_rate": 2.1979790855109394e-06, "loss": 0.3154, "step": 21310 }, { "epoch": 1.1364302657178647, "grad_norm": 1.6307121195488599, "learning_rate": 2.1971988174166485e-06, "loss": 0.3063, "step": 21320 }, { "epoch": 1.136963300551691, "grad_norm": 1.6593939526643333, "learning_rate": 2.196418308608559e-06, "loss": 0.31, "step": 21330 }, { "epoch": 1.1374963353855174, "grad_norm": 1.497105604345187, "learning_rate": 2.195637559356149e-06, "loss": 0.3192, "step": 21340 }, { "epoch": 1.1380293702193438, "grad_norm": 1.558046444444482, "learning_rate": 2.194856569928977e-06, "loss": 0.2963, "step": 21350 }, { "epoch": 1.1385624050531702, "grad_norm": 1.7640136875409804, "learning_rate": 2.194075340596687e-06, "loss": 0.3052, "step": 21360 }, { "epoch": 1.1390954398869966, "grad_norm": 1.5952577900721743, "learning_rate": 2.1932938716290055e-06, "loss": 0.3113, "step": 21370 }, { "epoch": 1.139628474720823, "grad_norm": 1.695727652741668, "learning_rate": 2.1925121632957406e-06, "loss": 0.3027, "step": 21380 }, { "epoch": 1.1401615095546493, "grad_norm": 1.5878130950883684, "learning_rate": 2.1917302158667838e-06, "loss": 0.3124, "step": 21390 }, { "epoch": 1.1406945443884757, "grad_norm": 1.7535662725272476, "learning_rate": 2.1909480296121097e-06, "loss": 0.3134, "step": 21400 }, { "epoch": 1.1412275792223021, "grad_norm": 1.7125827345330777, "learning_rate": 2.1901656048017742e-06, "loss": 0.3116, "step": 21410 }, { "epoch": 1.1417606140561285, "grad_norm": 1.480422649066469, "learning_rate": 2.189382941705916e-06, "loss": 0.3097, "step": 21420 }, { "epoch": 1.1422936488899549, "grad_norm": 1.8264689307430122, "learning_rate": 2.1886000405947567e-06, "loss": 0.3134, "step": 21430 }, { "epoch": 1.1428266837237813, "grad_norm": 1.641378088819736, "learning_rate": 2.187816901738599e-06, "loss": 0.3097, "step": 21440 }, { "epoch": 1.1433597185576077, "grad_norm": 1.6014079320825023, "learning_rate": 2.1870335254078285e-06, "loss": 0.3103, "step": 21450 }, { "epoch": 1.143892753391434, "grad_norm": 1.8008018324362969, "learning_rate": 2.1862499118729126e-06, "loss": 0.3124, "step": 21460 }, { "epoch": 1.1444257882252606, "grad_norm": 1.71813725982525, "learning_rate": 2.1854660614044003e-06, "loss": 0.3024, "step": 21470 }, { "epoch": 1.144958823059087, "grad_norm": 1.7574964653927196, "learning_rate": 2.1846819742729226e-06, "loss": 0.3092, "step": 21480 }, { "epoch": 1.1454918578929134, "grad_norm": 1.7483329992033536, "learning_rate": 2.1838976507491923e-06, "loss": 0.3068, "step": 21490 }, { "epoch": 1.1460248927267398, "grad_norm": 1.5211470719893452, "learning_rate": 2.183113091104004e-06, "loss": 0.3101, "step": 21500 }, { "epoch": 1.1465579275605662, "grad_norm": 1.7043341464293227, "learning_rate": 2.182328295608233e-06, "loss": 0.3126, "step": 21510 }, { "epoch": 1.1470909623943926, "grad_norm": 1.60302367740354, "learning_rate": 2.181543264532837e-06, "loss": 0.3122, "step": 21520 }, { "epoch": 1.147623997228219, "grad_norm": 1.6727570095138387, "learning_rate": 2.1807579981488544e-06, "loss": 0.3089, "step": 21530 }, { "epoch": 1.1481570320620453, "grad_norm": 1.5566991037908373, "learning_rate": 2.1799724967274057e-06, "loss": 0.3086, "step": 21540 }, { "epoch": 1.1486900668958717, "grad_norm": 1.5842030034937153, "learning_rate": 2.179186760539691e-06, "loss": 0.3075, "step": 21550 }, { "epoch": 1.149223101729698, "grad_norm": 1.5494798850814615, "learning_rate": 2.178400789856993e-06, "loss": 0.3087, "step": 21560 }, { "epoch": 1.1497561365635245, "grad_norm": 1.5190822090674974, "learning_rate": 2.1776145849506747e-06, "loss": 0.3242, "step": 21570 }, { "epoch": 1.1502891713973509, "grad_norm": 1.7427207164534948, "learning_rate": 2.17682814609218e-06, "loss": 0.3087, "step": 21580 }, { "epoch": 1.1508222062311773, "grad_norm": 1.6870435612018546, "learning_rate": 2.176041473553033e-06, "loss": 0.3117, "step": 21590 }, { "epoch": 1.1513552410650036, "grad_norm": 1.6056465538593587, "learning_rate": 2.1752545676048393e-06, "loss": 0.3053, "step": 21600 }, { "epoch": 1.15188827589883, "grad_norm": 1.5915232972592372, "learning_rate": 2.174467428519285e-06, "loss": 0.2988, "step": 21610 }, { "epoch": 1.1524213107326564, "grad_norm": 1.8515770996618846, "learning_rate": 2.1736800565681373e-06, "loss": 0.3082, "step": 21620 }, { "epoch": 1.1529543455664828, "grad_norm": 1.604787640476992, "learning_rate": 2.172892452023242e-06, "loss": 0.316, "step": 21630 }, { "epoch": 1.1534873804003092, "grad_norm": 1.6983919866739992, "learning_rate": 2.1721046151565264e-06, "loss": 0.303, "step": 21640 }, { "epoch": 1.1540204152341356, "grad_norm": 1.4715984036168932, "learning_rate": 2.171316546239998e-06, "loss": 0.3044, "step": 21650 }, { "epoch": 1.154553450067962, "grad_norm": 1.553740217520958, "learning_rate": 2.1705282455457443e-06, "loss": 0.2974, "step": 21660 }, { "epoch": 1.1550864849017883, "grad_norm": 1.7916951176843285, "learning_rate": 2.169739713345933e-06, "loss": 0.3097, "step": 21670 }, { "epoch": 1.1556195197356147, "grad_norm": 1.9372575436666049, "learning_rate": 2.168950949912811e-06, "loss": 0.301, "step": 21680 }, { "epoch": 1.156152554569441, "grad_norm": 1.7182848690242787, "learning_rate": 2.1681619555187056e-06, "loss": 0.3183, "step": 21690 }, { "epoch": 1.1566855894032675, "grad_norm": 1.639511494870053, "learning_rate": 2.1673727304360248e-06, "loss": 0.3087, "step": 21700 }, { "epoch": 1.1572186242370939, "grad_norm": 1.8034293067921714, "learning_rate": 2.1665832749372545e-06, "loss": 0.2964, "step": 21710 }, { "epoch": 1.1577516590709203, "grad_norm": 1.5385832132752124, "learning_rate": 2.16579358929496e-06, "loss": 0.31, "step": 21720 }, { "epoch": 1.1582846939047466, "grad_norm": 1.7144540520139608, "learning_rate": 2.1650036737817887e-06, "loss": 0.2997, "step": 21730 }, { "epoch": 1.158817728738573, "grad_norm": 1.5596836257732085, "learning_rate": 2.1642135286704646e-06, "loss": 0.3057, "step": 21740 }, { "epoch": 1.1593507635723994, "grad_norm": 1.634680174118517, "learning_rate": 2.163423154233792e-06, "loss": 0.3068, "step": 21750 }, { "epoch": 1.1598837984062258, "grad_norm": 1.5657800662297066, "learning_rate": 2.1626325507446546e-06, "loss": 0.3061, "step": 21760 }, { "epoch": 1.1604168332400522, "grad_norm": 1.744269865003573, "learning_rate": 2.161841718476015e-06, "loss": 0.3111, "step": 21770 }, { "epoch": 1.1609498680738786, "grad_norm": 1.6058667266676994, "learning_rate": 2.1610506577009143e-06, "loss": 0.3144, "step": 21780 }, { "epoch": 1.161482902907705, "grad_norm": 1.6710651992799488, "learning_rate": 2.160259368692474e-06, "loss": 0.2988, "step": 21790 }, { "epoch": 1.1620159377415313, "grad_norm": 1.9227312302736368, "learning_rate": 2.159467851723892e-06, "loss": 0.3072, "step": 21800 }, { "epoch": 1.1625489725753577, "grad_norm": 1.7040728840656527, "learning_rate": 2.158676107068447e-06, "loss": 0.3066, "step": 21810 }, { "epoch": 1.163082007409184, "grad_norm": 1.6147110825363211, "learning_rate": 2.157884134999495e-06, "loss": 0.3152, "step": 21820 }, { "epoch": 1.1636150422430105, "grad_norm": 1.571570932644986, "learning_rate": 2.1570919357904716e-06, "loss": 0.3094, "step": 21830 }, { "epoch": 1.1641480770768369, "grad_norm": 1.6328097280554397, "learning_rate": 2.156299509714891e-06, "loss": 0.3095, "step": 21840 }, { "epoch": 1.1646811119106633, "grad_norm": 1.7166939795346328, "learning_rate": 2.1555068570463434e-06, "loss": 0.3124, "step": 21850 }, { "epoch": 1.1652141467444896, "grad_norm": 1.6741991831516585, "learning_rate": 2.1547139780585e-06, "loss": 0.2976, "step": 21860 }, { "epoch": 1.165747181578316, "grad_norm": 1.7286356505487748, "learning_rate": 2.1539208730251082e-06, "loss": 0.3059, "step": 21870 }, { "epoch": 1.1662802164121424, "grad_norm": 1.5976550217520216, "learning_rate": 2.153127542219995e-06, "loss": 0.3141, "step": 21880 }, { "epoch": 1.166813251245969, "grad_norm": 1.7611518622741738, "learning_rate": 2.152333985917064e-06, "loss": 0.3241, "step": 21890 }, { "epoch": 1.1673462860797954, "grad_norm": 1.8151169172675925, "learning_rate": 2.1515402043902973e-06, "loss": 0.3066, "step": 21900 }, { "epoch": 1.1678793209136218, "grad_norm": 1.6553179755714715, "learning_rate": 2.1507461979137556e-06, "loss": 0.3118, "step": 21910 }, { "epoch": 1.1684123557474482, "grad_norm": 1.5655280372293336, "learning_rate": 2.149951966761575e-06, "loss": 0.3062, "step": 21920 }, { "epoch": 1.1689453905812746, "grad_norm": 1.6644324250326497, "learning_rate": 2.149157511207971e-06, "loss": 0.2993, "step": 21930 }, { "epoch": 1.169478425415101, "grad_norm": 1.8397480389823289, "learning_rate": 2.1483628315272365e-06, "loss": 0.3029, "step": 21940 }, { "epoch": 1.1700114602489273, "grad_norm": 1.8807998797665644, "learning_rate": 2.1475679279937414e-06, "loss": 0.3122, "step": 21950 }, { "epoch": 1.1705444950827537, "grad_norm": 1.5626249997871782, "learning_rate": 2.1467728008819328e-06, "loss": 0.3077, "step": 21960 }, { "epoch": 1.17107752991658, "grad_norm": 1.8431944130382634, "learning_rate": 2.1459774504663344e-06, "loss": 0.305, "step": 21970 }, { "epoch": 1.1716105647504065, "grad_norm": 1.6110578833170766, "learning_rate": 2.145181877021548e-06, "loss": 0.3039, "step": 21980 }, { "epoch": 1.1721435995842329, "grad_norm": 1.7211334909055216, "learning_rate": 2.1443860808222526e-06, "loss": 0.3108, "step": 21990 }, { "epoch": 1.1726766344180592, "grad_norm": 1.5750700781222808, "learning_rate": 2.1435900621432035e-06, "loss": 0.3096, "step": 22000 }, { "epoch": 1.1732096692518856, "grad_norm": 1.8140573432231122, "learning_rate": 2.1427938212592327e-06, "loss": 0.3021, "step": 22010 }, { "epoch": 1.173742704085712, "grad_norm": 1.605119764072049, "learning_rate": 2.1419973584452487e-06, "loss": 0.3068, "step": 22020 }, { "epoch": 1.1742757389195384, "grad_norm": 1.5827382536387942, "learning_rate": 2.1412006739762374e-06, "loss": 0.3142, "step": 22030 }, { "epoch": 1.1748087737533648, "grad_norm": 1.7016552488188008, "learning_rate": 2.1404037681272608e-06, "loss": 0.3045, "step": 22040 }, { "epoch": 1.1753418085871912, "grad_norm": 1.5043105100648253, "learning_rate": 2.139606641173457e-06, "loss": 0.2995, "step": 22050 }, { "epoch": 1.1758748434210176, "grad_norm": 1.929533869811147, "learning_rate": 2.138809293390042e-06, "loss": 0.3108, "step": 22060 }, { "epoch": 1.176407878254844, "grad_norm": 1.7875585483390928, "learning_rate": 2.138011725052306e-06, "loss": 0.3027, "step": 22070 }, { "epoch": 1.1769409130886703, "grad_norm": 1.5814222403582903, "learning_rate": 2.137213936435616e-06, "loss": 0.2852, "step": 22080 }, { "epoch": 1.1774739479224967, "grad_norm": 1.7272130776912558, "learning_rate": 2.136415927815416e-06, "loss": 0.3078, "step": 22090 }, { "epoch": 1.178006982756323, "grad_norm": 1.7461628884736005, "learning_rate": 2.135617699467225e-06, "loss": 0.2961, "step": 22100 }, { "epoch": 1.1785400175901495, "grad_norm": 1.5605762093119409, "learning_rate": 2.1348192516666377e-06, "loss": 0.2948, "step": 22110 }, { "epoch": 1.1790730524239759, "grad_norm": 1.8333495712183872, "learning_rate": 2.134020584689325e-06, "loss": 0.2976, "step": 22120 }, { "epoch": 1.1796060872578022, "grad_norm": 1.705994538463318, "learning_rate": 2.1332216988110344e-06, "loss": 0.3076, "step": 22130 }, { "epoch": 1.1801391220916286, "grad_norm": 1.7756943606191598, "learning_rate": 2.132422594307587e-06, "loss": 0.2998, "step": 22140 }, { "epoch": 1.180672156925455, "grad_norm": 1.5844862841286445, "learning_rate": 2.1316232714548803e-06, "loss": 0.2941, "step": 22150 }, { "epoch": 1.1812051917592814, "grad_norm": 1.622051563424511, "learning_rate": 2.130823730528888e-06, "loss": 0.3082, "step": 22160 }, { "epoch": 1.1817382265931078, "grad_norm": 1.7068849154162093, "learning_rate": 2.130023971805658e-06, "loss": 0.307, "step": 22170 }, { "epoch": 1.1822712614269342, "grad_norm": 1.5949571040846469, "learning_rate": 2.129223995561314e-06, "loss": 0.3, "step": 22180 }, { "epoch": 1.1828042962607606, "grad_norm": 1.8926393243747694, "learning_rate": 2.128423802072055e-06, "loss": 0.3142, "step": 22190 }, { "epoch": 1.183337331094587, "grad_norm": 1.6853788729619184, "learning_rate": 2.1276233916141527e-06, "loss": 0.314, "step": 22200 }, { "epoch": 1.1838703659284133, "grad_norm": 1.698763361363233, "learning_rate": 2.1268227644639577e-06, "loss": 0.3031, "step": 22210 }, { "epoch": 1.18440340076224, "grad_norm": 1.8072365688515866, "learning_rate": 2.1260219208978926e-06, "loss": 0.306, "step": 22220 }, { "epoch": 1.1849364355960663, "grad_norm": 1.6223053361664546, "learning_rate": 2.1252208611924554e-06, "loss": 0.2873, "step": 22230 }, { "epoch": 1.1854694704298927, "grad_norm": 1.6422801506442333, "learning_rate": 2.1244195856242185e-06, "loss": 0.2927, "step": 22240 }, { "epoch": 1.186002505263719, "grad_norm": 1.7164118640906687, "learning_rate": 2.123618094469829e-06, "loss": 0.3104, "step": 22250 }, { "epoch": 1.1865355400975455, "grad_norm": 1.6489880561579486, "learning_rate": 2.122816388006009e-06, "loss": 0.3155, "step": 22260 }, { "epoch": 1.1870685749313719, "grad_norm": 1.7848319733890206, "learning_rate": 2.1220144665095546e-06, "loss": 0.3117, "step": 22270 }, { "epoch": 1.1876016097651982, "grad_norm": 1.7385490230216296, "learning_rate": 2.1212123302573347e-06, "loss": 0.3054, "step": 22280 }, { "epoch": 1.1881346445990246, "grad_norm": 1.5841782393072172, "learning_rate": 2.1204099795262954e-06, "loss": 0.3047, "step": 22290 }, { "epoch": 1.188667679432851, "grad_norm": 1.6262560566098443, "learning_rate": 2.1196074145934537e-06, "loss": 0.3048, "step": 22300 }, { "epoch": 1.1892007142666774, "grad_norm": 1.6558276281356448, "learning_rate": 2.118804635735903e-06, "loss": 0.3143, "step": 22310 }, { "epoch": 1.1897337491005038, "grad_norm": 1.780908636868358, "learning_rate": 2.118001643230809e-06, "loss": 0.3023, "step": 22320 }, { "epoch": 1.1902667839343302, "grad_norm": 1.5467631173805412, "learning_rate": 2.1171984373554108e-06, "loss": 0.3005, "step": 22330 }, { "epoch": 1.1907998187681565, "grad_norm": 1.5574493707268737, "learning_rate": 2.1163950183870234e-06, "loss": 0.3101, "step": 22340 }, { "epoch": 1.191332853601983, "grad_norm": 1.6038570190265888, "learning_rate": 2.1155913866030337e-06, "loss": 0.3058, "step": 22350 }, { "epoch": 1.1918658884358093, "grad_norm": 1.7535745529465345, "learning_rate": 2.1147875422809024e-06, "loss": 0.3156, "step": 22360 }, { "epoch": 1.1923989232696357, "grad_norm": 1.6832580511940625, "learning_rate": 2.113983485698163e-06, "loss": 0.3108, "step": 22370 }, { "epoch": 1.192931958103462, "grad_norm": 1.699811789318154, "learning_rate": 2.113179217132423e-06, "loss": 0.3027, "step": 22380 }, { "epoch": 1.1934649929372885, "grad_norm": 1.7739012963629168, "learning_rate": 2.112374736861363e-06, "loss": 0.3093, "step": 22390 }, { "epoch": 1.1939980277711149, "grad_norm": 1.73807525084544, "learning_rate": 2.1115700451627374e-06, "loss": 0.298, "step": 22400 }, { "epoch": 1.1945310626049412, "grad_norm": 1.6147640813956223, "learning_rate": 2.1107651423143718e-06, "loss": 0.3071, "step": 22410 }, { "epoch": 1.1950640974387676, "grad_norm": 1.6095317332785417, "learning_rate": 2.109960028594166e-06, "loss": 0.299, "step": 22420 }, { "epoch": 1.195597132272594, "grad_norm": 1.9104919232107886, "learning_rate": 2.1091547042800923e-06, "loss": 0.3037, "step": 22430 }, { "epoch": 1.1961301671064204, "grad_norm": 1.9327535929164992, "learning_rate": 2.108349169650196e-06, "loss": 0.3099, "step": 22440 }, { "epoch": 1.1966632019402468, "grad_norm": 1.872728388314714, "learning_rate": 2.107543424982595e-06, "loss": 0.3173, "step": 22450 }, { "epoch": 1.1971962367740732, "grad_norm": 1.5868814110069622, "learning_rate": 2.106737470555479e-06, "loss": 0.3119, "step": 22460 }, { "epoch": 1.1977292716078995, "grad_norm": 1.7977527909977369, "learning_rate": 2.1059313066471106e-06, "loss": 0.2959, "step": 22470 }, { "epoch": 1.198262306441726, "grad_norm": 1.6519040627369146, "learning_rate": 2.105124933535825e-06, "loss": 0.2951, "step": 22480 }, { "epoch": 1.1987953412755523, "grad_norm": 1.5675419032610383, "learning_rate": 2.104318351500029e-06, "loss": 0.3103, "step": 22490 }, { "epoch": 1.1993283761093787, "grad_norm": 1.5919344949554, "learning_rate": 2.103511560818202e-06, "loss": 0.3067, "step": 22500 }, { "epoch": 1.199861410943205, "grad_norm": 1.7545445742383547, "learning_rate": 2.1027045617688956e-06, "loss": 0.3095, "step": 22510 }, { "epoch": 1.2003944457770315, "grad_norm": 1.6591192004643252, "learning_rate": 2.1018973546307326e-06, "loss": 0.3085, "step": 22520 }, { "epoch": 1.2009274806108579, "grad_norm": 1.555190480116379, "learning_rate": 2.1010899396824084e-06, "loss": 0.3031, "step": 22530 }, { "epoch": 1.2014605154446842, "grad_norm": 1.6180993802345915, "learning_rate": 2.1002823172026896e-06, "loss": 0.2962, "step": 22540 }, { "epoch": 1.2019935502785106, "grad_norm": 1.7505641941296128, "learning_rate": 2.0994744874704153e-06, "loss": 0.3044, "step": 22550 }, { "epoch": 1.202526585112337, "grad_norm": 1.629273304302741, "learning_rate": 2.098666450764495e-06, "loss": 0.2981, "step": 22560 }, { "epoch": 1.2030596199461634, "grad_norm": 1.6435175769695696, "learning_rate": 2.0978582073639103e-06, "loss": 0.2974, "step": 22570 }, { "epoch": 1.2035926547799898, "grad_norm": 1.7556312935660925, "learning_rate": 2.0970497575477143e-06, "loss": 0.2896, "step": 22580 }, { "epoch": 1.2041256896138162, "grad_norm": 1.7036150566418415, "learning_rate": 2.0962411015950308e-06, "loss": 0.3033, "step": 22590 }, { "epoch": 1.2046587244476425, "grad_norm": 1.5520789600676863, "learning_rate": 2.0954322397850556e-06, "loss": 0.3159, "step": 22600 }, { "epoch": 1.205191759281469, "grad_norm": 1.6845509526261502, "learning_rate": 2.0946231723970543e-06, "loss": 0.3196, "step": 22610 }, { "epoch": 1.2057247941152953, "grad_norm": 1.8151009079214864, "learning_rate": 2.0938138997103653e-06, "loss": 0.3046, "step": 22620 }, { "epoch": 1.2062578289491217, "grad_norm": 1.7163462039429207, "learning_rate": 2.093004422004396e-06, "loss": 0.3019, "step": 22630 }, { "epoch": 1.2067908637829483, "grad_norm": 1.557164469511081, "learning_rate": 2.092194739558626e-06, "loss": 0.3047, "step": 22640 }, { "epoch": 1.2073238986167747, "grad_norm": 1.7043293480417827, "learning_rate": 2.0913848526526047e-06, "loss": 0.3068, "step": 22650 }, { "epoch": 1.207856933450601, "grad_norm": 1.6921223365466198, "learning_rate": 2.0905747615659524e-06, "loss": 0.307, "step": 22660 }, { "epoch": 1.2083899682844275, "grad_norm": 1.557180842419274, "learning_rate": 2.0897644665783604e-06, "loss": 0.3112, "step": 22670 }, { "epoch": 1.2089230031182538, "grad_norm": 1.850495550684652, "learning_rate": 2.088953967969589e-06, "loss": 0.3032, "step": 22680 }, { "epoch": 1.2094560379520802, "grad_norm": 1.6137623896619744, "learning_rate": 2.0881432660194707e-06, "loss": 0.2935, "step": 22690 }, { "epoch": 1.2099890727859066, "grad_norm": 1.7013619393664292, "learning_rate": 2.0873323610079065e-06, "loss": 0.2978, "step": 22700 }, { "epoch": 1.210522107619733, "grad_norm": 1.7337313325172732, "learning_rate": 2.0865212532148687e-06, "loss": 0.2957, "step": 22710 }, { "epoch": 1.2110551424535594, "grad_norm": 1.8440407641188579, "learning_rate": 2.0857099429203988e-06, "loss": 0.3004, "step": 22720 }, { "epoch": 1.2115881772873858, "grad_norm": 1.5981660767100057, "learning_rate": 2.0848984304046087e-06, "loss": 0.3085, "step": 22730 }, { "epoch": 1.2121212121212122, "grad_norm": 1.564937949736306, "learning_rate": 2.0840867159476802e-06, "loss": 0.3026, "step": 22740 }, { "epoch": 1.2126542469550385, "grad_norm": 1.6425259906821708, "learning_rate": 2.0832747998298638e-06, "loss": 0.3038, "step": 22750 }, { "epoch": 1.213187281788865, "grad_norm": 1.656477692170798, "learning_rate": 2.082462682331481e-06, "loss": 0.3008, "step": 22760 }, { "epoch": 1.2137203166226913, "grad_norm": 1.6495460353345768, "learning_rate": 2.0816503637329227e-06, "loss": 0.3081, "step": 22770 }, { "epoch": 1.2142533514565177, "grad_norm": 1.587330993510244, "learning_rate": 2.0808378443146483e-06, "loss": 0.3013, "step": 22780 }, { "epoch": 1.214786386290344, "grad_norm": 1.708540271652481, "learning_rate": 2.0800251243571867e-06, "loss": 0.3097, "step": 22790 }, { "epoch": 1.2153194211241705, "grad_norm": 1.7538441867916539, "learning_rate": 2.0792122041411366e-06, "loss": 0.296, "step": 22800 }, { "epoch": 1.2158524559579968, "grad_norm": 1.6445908707269592, "learning_rate": 2.0783990839471654e-06, "loss": 0.3063, "step": 22810 }, { "epoch": 1.2163854907918232, "grad_norm": 1.7426460312004, "learning_rate": 2.0775857640560103e-06, "loss": 0.3068, "step": 22820 }, { "epoch": 1.2169185256256496, "grad_norm": 1.8772432626326854, "learning_rate": 2.0767722447484767e-06, "loss": 0.3095, "step": 22830 }, { "epoch": 1.217451560459476, "grad_norm": 1.7378019374550453, "learning_rate": 2.075958526305439e-06, "loss": 0.2986, "step": 22840 }, { "epoch": 1.2179845952933024, "grad_norm": 1.7433443010702305, "learning_rate": 2.0751446090078395e-06, "loss": 0.3131, "step": 22850 }, { "epoch": 1.2185176301271288, "grad_norm": 2.0335376172963398, "learning_rate": 2.074330493136692e-06, "loss": 0.3109, "step": 22860 }, { "epoch": 1.2190506649609552, "grad_norm": 1.61318448002519, "learning_rate": 2.073516178973075e-06, "loss": 0.2995, "step": 22870 }, { "epoch": 1.2195836997947815, "grad_norm": 1.7159222475285165, "learning_rate": 2.072701666798138e-06, "loss": 0.3071, "step": 22880 }, { "epoch": 1.220116734628608, "grad_norm": 1.5555244480679535, "learning_rate": 2.0718869568930985e-06, "loss": 0.3101, "step": 22890 }, { "epoch": 1.2206497694624343, "grad_norm": 1.6272077642632332, "learning_rate": 2.071072049539242e-06, "loss": 0.3245, "step": 22900 }, { "epoch": 1.2211828042962607, "grad_norm": 1.6101601507559524, "learning_rate": 2.070256945017922e-06, "loss": 0.3008, "step": 22910 }, { "epoch": 1.221715839130087, "grad_norm": 1.9018779298422441, "learning_rate": 2.06944164361056e-06, "loss": 0.3041, "step": 22920 }, { "epoch": 1.2222488739639135, "grad_norm": 1.7500364475077854, "learning_rate": 2.068626145598646e-06, "loss": 0.3022, "step": 22930 }, { "epoch": 1.2227819087977398, "grad_norm": 1.7172411071749432, "learning_rate": 2.067810451263737e-06, "loss": 0.305, "step": 22940 }, { "epoch": 1.2233149436315662, "grad_norm": 1.5086814906774648, "learning_rate": 2.0669945608874597e-06, "loss": 0.3098, "step": 22950 }, { "epoch": 1.2238479784653926, "grad_norm": 1.7174971633111764, "learning_rate": 2.0661784747515065e-06, "loss": 0.3079, "step": 22960 }, { "epoch": 1.2243810132992192, "grad_norm": 1.7003702955675264, "learning_rate": 2.0653621931376375e-06, "loss": 0.2991, "step": 22970 }, { "epoch": 1.2249140481330456, "grad_norm": 1.639439207392191, "learning_rate": 2.064545716327681e-06, "loss": 0.2998, "step": 22980 }, { "epoch": 1.225447082966872, "grad_norm": 1.9022879696254273, "learning_rate": 2.0637290446035336e-06, "loss": 0.3063, "step": 22990 }, { "epoch": 1.2259801178006984, "grad_norm": 1.617982685002921, "learning_rate": 2.062912178247157e-06, "loss": 0.2968, "step": 23000 }, { "epoch": 1.2265131526345248, "grad_norm": 1.9969217312839123, "learning_rate": 2.062095117540581e-06, "loss": 0.317, "step": 23010 }, { "epoch": 1.2270461874683511, "grad_norm": 1.6920728336270645, "learning_rate": 2.061277862765904e-06, "loss": 0.2977, "step": 23020 }, { "epoch": 1.2275792223021775, "grad_norm": 1.6313947480776152, "learning_rate": 2.0604604142052892e-06, "loss": 0.3073, "step": 23030 }, { "epoch": 1.228112257136004, "grad_norm": 1.6564264788419336, "learning_rate": 2.0596427721409686e-06, "loss": 0.3039, "step": 23040 }, { "epoch": 1.2286452919698303, "grad_norm": 1.6976190444671193, "learning_rate": 2.058824936855239e-06, "loss": 0.3066, "step": 23050 }, { "epoch": 1.2291783268036567, "grad_norm": 1.6700520547031814, "learning_rate": 2.058006908630465e-06, "loss": 0.3104, "step": 23060 }, { "epoch": 1.229711361637483, "grad_norm": 1.7890008600368514, "learning_rate": 2.057188687749078e-06, "loss": 0.3142, "step": 23070 }, { "epoch": 1.2302443964713095, "grad_norm": 1.7733026675262766, "learning_rate": 2.056370274493576e-06, "loss": 0.3093, "step": 23080 }, { "epoch": 1.2307774313051358, "grad_norm": 1.8081693412874023, "learning_rate": 2.055551669146523e-06, "loss": 0.3066, "step": 23090 }, { "epoch": 1.2313104661389622, "grad_norm": 1.5979703342482794, "learning_rate": 2.0547328719905486e-06, "loss": 0.3009, "step": 23100 }, { "epoch": 1.2318435009727886, "grad_norm": 1.6355757331358172, "learning_rate": 2.053913883308351e-06, "loss": 0.3058, "step": 23110 }, { "epoch": 1.232376535806615, "grad_norm": 1.7272048493867864, "learning_rate": 2.053094703382692e-06, "loss": 0.3068, "step": 23120 }, { "epoch": 1.2329095706404414, "grad_norm": 1.5675457306442544, "learning_rate": 2.0522753324964004e-06, "loss": 0.3091, "step": 23130 }, { "epoch": 1.2334426054742678, "grad_norm": 1.6575492285086713, "learning_rate": 2.0514557709323707e-06, "loss": 0.2946, "step": 23140 }, { "epoch": 1.2339756403080941, "grad_norm": 1.655614314037684, "learning_rate": 2.0506360189735643e-06, "loss": 0.3103, "step": 23150 }, { "epoch": 1.2345086751419205, "grad_norm": 1.6184479052156073, "learning_rate": 2.049816076903007e-06, "loss": 0.3052, "step": 23160 }, { "epoch": 1.235041709975747, "grad_norm": 1.668374985332481, "learning_rate": 2.0489959450037915e-06, "loss": 0.3063, "step": 23170 }, { "epoch": 1.2355747448095733, "grad_norm": 1.759364050447776, "learning_rate": 2.048175623559074e-06, "loss": 0.3169, "step": 23180 }, { "epoch": 1.2361077796433997, "grad_norm": 1.7854334105978855, "learning_rate": 2.0473551128520778e-06, "loss": 0.3003, "step": 23190 }, { "epoch": 1.236640814477226, "grad_norm": 1.8656959072383144, "learning_rate": 2.0465344131660917e-06, "loss": 0.3144, "step": 23200 }, { "epoch": 1.2371738493110525, "grad_norm": 1.7862010636796275, "learning_rate": 2.045713524784469e-06, "loss": 0.3114, "step": 23210 }, { "epoch": 1.2377068841448788, "grad_norm": 1.7644424348514764, "learning_rate": 2.0448924479906282e-06, "loss": 0.303, "step": 23220 }, { "epoch": 1.2382399189787052, "grad_norm": 1.6219955535347945, "learning_rate": 2.044071183068053e-06, "loss": 0.3029, "step": 23230 }, { "epoch": 1.2387729538125316, "grad_norm": 1.6118746740378496, "learning_rate": 2.0432497303002926e-06, "loss": 0.3161, "step": 23240 }, { "epoch": 1.239305988646358, "grad_norm": 1.6387435018177512, "learning_rate": 2.04242808997096e-06, "loss": 0.2994, "step": 23250 }, { "epoch": 1.2398390234801844, "grad_norm": 1.8153250914283579, "learning_rate": 2.0416062623637334e-06, "loss": 0.3061, "step": 23260 }, { "epoch": 1.2403720583140108, "grad_norm": 1.7144466259165616, "learning_rate": 2.040784247762356e-06, "loss": 0.3087, "step": 23270 }, { "epoch": 1.2409050931478371, "grad_norm": 1.639913306693048, "learning_rate": 2.039962046450635e-06, "loss": 0.3058, "step": 23280 }, { "epoch": 1.2414381279816635, "grad_norm": 1.9478436724477137, "learning_rate": 2.039139658712444e-06, "loss": 0.3115, "step": 23290 }, { "epoch": 1.24197116281549, "grad_norm": 1.5123708939139904, "learning_rate": 2.0383170848317162e-06, "loss": 0.3092, "step": 23300 }, { "epoch": 1.2425041976493163, "grad_norm": 1.7801015955009891, "learning_rate": 2.037494325092455e-06, "loss": 0.299, "step": 23310 }, { "epoch": 1.2430372324831427, "grad_norm": 1.6946527927867592, "learning_rate": 2.0366713797787235e-06, "loss": 0.3081, "step": 23320 }, { "epoch": 1.243570267316969, "grad_norm": 1.5099028998630448, "learning_rate": 2.0358482491746515e-06, "loss": 0.3046, "step": 23330 }, { "epoch": 1.2441033021507955, "grad_norm": 2.1318087650046933, "learning_rate": 2.035024933564432e-06, "loss": 0.2998, "step": 23340 }, { "epoch": 1.2446363369846218, "grad_norm": 1.7193063490887475, "learning_rate": 2.0342014332323208e-06, "loss": 0.3026, "step": 23350 }, { "epoch": 1.2451693718184482, "grad_norm": 1.6864763705247356, "learning_rate": 2.0333777484626384e-06, "loss": 0.3168, "step": 23360 }, { "epoch": 1.2457024066522746, "grad_norm": 1.6974719414453956, "learning_rate": 2.0325538795397695e-06, "loss": 0.3108, "step": 23370 }, { "epoch": 1.246235441486101, "grad_norm": 1.7508322173549118, "learning_rate": 2.031729826748162e-06, "loss": 0.2997, "step": 23380 }, { "epoch": 1.2467684763199276, "grad_norm": 1.560396403119923, "learning_rate": 2.0309055903723265e-06, "loss": 0.3138, "step": 23390 }, { "epoch": 1.247301511153754, "grad_norm": 1.7609993321443171, "learning_rate": 2.030081170696838e-06, "loss": 0.3064, "step": 23400 }, { "epoch": 1.2478345459875804, "grad_norm": 1.7677053612413458, "learning_rate": 2.029256568006334e-06, "loss": 0.3049, "step": 23410 }, { "epoch": 1.2483675808214068, "grad_norm": 1.573857880434198, "learning_rate": 2.0284317825855156e-06, "loss": 0.3203, "step": 23420 }, { "epoch": 1.2489006156552331, "grad_norm": 1.6417171599729599, "learning_rate": 2.0276068147191473e-06, "loss": 0.3166, "step": 23430 }, { "epoch": 1.2494336504890595, "grad_norm": 1.9465722922208217, "learning_rate": 2.026781664692056e-06, "loss": 0.3089, "step": 23440 }, { "epoch": 1.249966685322886, "grad_norm": 1.9061177520198818, "learning_rate": 2.0259563327891316e-06, "loss": 0.2945, "step": 23450 }, { "epoch": 1.2504997201567123, "grad_norm": 1.6761650418704517, "learning_rate": 2.0251308192953273e-06, "loss": 0.3066, "step": 23460 }, { "epoch": 1.2510327549905387, "grad_norm": 1.6406278957445162, "learning_rate": 2.024305124495659e-06, "loss": 0.3, "step": 23470 }, { "epoch": 1.251565789824365, "grad_norm": 1.7390805170953534, "learning_rate": 2.0234792486752037e-06, "loss": 0.3201, "step": 23480 }, { "epoch": 1.2520988246581914, "grad_norm": 1.8175449778740163, "learning_rate": 2.0226531921191023e-06, "loss": 0.3137, "step": 23490 }, { "epoch": 1.2526318594920178, "grad_norm": 1.6807641917012166, "learning_rate": 2.0218269551125587e-06, "loss": 0.3011, "step": 23500 }, { "epoch": 1.2531648943258442, "grad_norm": 1.7763183860779088, "learning_rate": 2.021000537940838e-06, "loss": 0.3193, "step": 23510 }, { "epoch": 1.2536979291596706, "grad_norm": 1.8974129488929747, "learning_rate": 2.020173940889267e-06, "loss": 0.3019, "step": 23520 }, { "epoch": 1.254230963993497, "grad_norm": 1.7254528384907368, "learning_rate": 2.0193471642432355e-06, "loss": 0.3068, "step": 23530 }, { "epoch": 1.2547639988273234, "grad_norm": 1.9913061487305976, "learning_rate": 2.018520208288195e-06, "loss": 0.2923, "step": 23540 }, { "epoch": 1.2552970336611498, "grad_norm": 1.6330855881949171, "learning_rate": 2.0176930733096604e-06, "loss": 0.3076, "step": 23550 }, { "epoch": 1.2558300684949761, "grad_norm": 1.907718882882335, "learning_rate": 2.0168657595932057e-06, "loss": 0.3037, "step": 23560 }, { "epoch": 1.2563631033288025, "grad_norm": 1.8810064935227189, "learning_rate": 2.0160382674244684e-06, "loss": 0.301, "step": 23570 }, { "epoch": 1.256896138162629, "grad_norm": 1.7241431314280322, "learning_rate": 2.0152105970891473e-06, "loss": 0.3002, "step": 23580 }, { "epoch": 1.2574291729964553, "grad_norm": 1.7668103365406842, "learning_rate": 2.014382748873002e-06, "loss": 0.2965, "step": 23590 }, { "epoch": 1.2579622078302817, "grad_norm": 1.6528251100277758, "learning_rate": 2.013554723061855e-06, "loss": 0.2989, "step": 23600 }, { "epoch": 1.258495242664108, "grad_norm": 1.7693076532602492, "learning_rate": 2.0127265199415885e-06, "loss": 0.3077, "step": 23610 }, { "epoch": 1.2590282774979344, "grad_norm": 1.7991073891585239, "learning_rate": 2.0118981397981467e-06, "loss": 0.3158, "step": 23620 }, { "epoch": 1.2595613123317608, "grad_norm": 1.6531727663471893, "learning_rate": 2.0110695829175354e-06, "loss": 0.3084, "step": 23630 }, { "epoch": 1.2600943471655872, "grad_norm": 1.5372854325296668, "learning_rate": 2.0102408495858204e-06, "loss": 0.3178, "step": 23640 }, { "epoch": 1.2606273819994136, "grad_norm": 1.657453755504069, "learning_rate": 2.0094119400891293e-06, "loss": 0.3103, "step": 23650 }, { "epoch": 1.26116041683324, "grad_norm": 1.6390090310046577, "learning_rate": 2.00858285471365e-06, "loss": 0.3084, "step": 23660 }, { "epoch": 1.2616934516670664, "grad_norm": 1.9280536501717365, "learning_rate": 2.00775359374563e-06, "loss": 0.3019, "step": 23670 }, { "epoch": 1.2622264865008928, "grad_norm": 1.749625768797954, "learning_rate": 2.006924157471381e-06, "loss": 0.3049, "step": 23680 }, { "epoch": 1.2627595213347194, "grad_norm": 1.6734495570784924, "learning_rate": 2.0060945461772714e-06, "loss": 0.3214, "step": 23690 }, { "epoch": 1.2632925561685457, "grad_norm": 1.7941123780022543, "learning_rate": 2.005264760149732e-06, "loss": 0.2998, "step": 23700 }, { "epoch": 1.2638255910023721, "grad_norm": 1.6987616290417822, "learning_rate": 2.0044347996752533e-06, "loss": 0.3146, "step": 23710 }, { "epoch": 1.2643586258361985, "grad_norm": 1.811070881348571, "learning_rate": 2.0036046650403857e-06, "loss": 0.3075, "step": 23720 }, { "epoch": 1.264891660670025, "grad_norm": 1.6382451184178053, "learning_rate": 2.0027743565317413e-06, "loss": 0.2982, "step": 23730 }, { "epoch": 1.2654246955038513, "grad_norm": 1.7278127555851586, "learning_rate": 2.0019438744359903e-06, "loss": 0.3072, "step": 23740 }, { "epoch": 1.2659577303376777, "grad_norm": 1.6695385711788127, "learning_rate": 2.0011132190398635e-06, "loss": 0.3, "step": 23750 }, { "epoch": 1.266490765171504, "grad_norm": 1.6744063988070408, "learning_rate": 2.0002823906301527e-06, "loss": 0.3087, "step": 23760 }, { "epoch": 1.2670238000053304, "grad_norm": 1.649259089639654, "learning_rate": 1.9994513894937082e-06, "loss": 0.3111, "step": 23770 }, { "epoch": 1.2675568348391568, "grad_norm": 1.6006381710566826, "learning_rate": 1.998620215917439e-06, "loss": 0.3112, "step": 23780 }, { "epoch": 1.2680898696729832, "grad_norm": 1.6288001078473064, "learning_rate": 1.9977888701883164e-06, "loss": 0.3092, "step": 23790 }, { "epoch": 1.2686229045068096, "grad_norm": 1.6958816442340083, "learning_rate": 1.9969573525933686e-06, "loss": 0.3026, "step": 23800 }, { "epoch": 1.269155939340636, "grad_norm": 1.7264966338483279, "learning_rate": 1.996125663419684e-06, "loss": 0.3048, "step": 23810 }, { "epoch": 1.2696889741744624, "grad_norm": 1.650861921664192, "learning_rate": 1.9952938029544113e-06, "loss": 0.2967, "step": 23820 }, { "epoch": 1.2702220090082887, "grad_norm": 1.755834153464092, "learning_rate": 1.994461771484756e-06, "loss": 0.3017, "step": 23830 }, { "epoch": 1.2707550438421151, "grad_norm": 1.767874990410785, "learning_rate": 1.9936295692979856e-06, "loss": 0.2892, "step": 23840 }, { "epoch": 1.2712880786759415, "grad_norm": 1.5762801171439977, "learning_rate": 1.992797196681424e-06, "loss": 0.2948, "step": 23850 }, { "epoch": 1.271821113509768, "grad_norm": 1.7255827702454796, "learning_rate": 1.991964653922454e-06, "loss": 0.3083, "step": 23860 }, { "epoch": 1.2723541483435943, "grad_norm": 1.5257989472792648, "learning_rate": 1.99113194130852e-06, "loss": 0.2901, "step": 23870 }, { "epoch": 1.2728871831774207, "grad_norm": 1.7370431079395767, "learning_rate": 1.990299059127122e-06, "loss": 0.2999, "step": 23880 }, { "epoch": 1.273420218011247, "grad_norm": 1.5211423379019517, "learning_rate": 1.9894660076658198e-06, "loss": 0.2887, "step": 23890 }, { "epoch": 1.2739532528450734, "grad_norm": 1.7139661543978686, "learning_rate": 1.9886327872122317e-06, "loss": 0.3093, "step": 23900 }, { "epoch": 1.2744862876788998, "grad_norm": 1.6859724829742593, "learning_rate": 1.9877993980540335e-06, "loss": 0.3089, "step": 23910 }, { "epoch": 1.2750193225127262, "grad_norm": 1.7840117165235774, "learning_rate": 1.9869658404789602e-06, "loss": 0.3017, "step": 23920 }, { "epoch": 1.2755523573465526, "grad_norm": 1.8081796216229582, "learning_rate": 1.9861321147748045e-06, "loss": 0.3167, "step": 23930 }, { "epoch": 1.276085392180379, "grad_norm": 1.8944870103297562, "learning_rate": 1.985298221229418e-06, "loss": 0.303, "step": 23940 }, { "epoch": 1.2766184270142054, "grad_norm": 1.7834241070789296, "learning_rate": 1.9844641601307084e-06, "loss": 0.3085, "step": 23950 }, { "epoch": 1.2771514618480317, "grad_norm": 1.7272653383426313, "learning_rate": 1.9836299317666434e-06, "loss": 0.3108, "step": 23960 }, { "epoch": 1.2776844966818581, "grad_norm": 1.807055148793654, "learning_rate": 1.9827955364252463e-06, "loss": 0.2955, "step": 23970 }, { "epoch": 1.2782175315156845, "grad_norm": 1.581988851962794, "learning_rate": 1.9819609743946004e-06, "loss": 0.3046, "step": 23980 }, { "epoch": 1.278750566349511, "grad_norm": 1.7575255484880108, "learning_rate": 1.9811262459628446e-06, "loss": 0.3063, "step": 23990 }, { "epoch": 1.2792836011833373, "grad_norm": 1.8110477661759514, "learning_rate": 1.9802913514181755e-06, "loss": 0.312, "step": 24000 }, { "epoch": 1.2798166360171637, "grad_norm": 1.7025572078999478, "learning_rate": 1.979456291048848e-06, "loss": 0.3054, "step": 24010 }, { "epoch": 1.28034967085099, "grad_norm": 1.674077627318722, "learning_rate": 1.9786210651431745e-06, "loss": 0.3076, "step": 24020 }, { "epoch": 1.2808827056848164, "grad_norm": 1.6409421003288633, "learning_rate": 1.977785673989523e-06, "loss": 0.3029, "step": 24030 }, { "epoch": 1.2814157405186428, "grad_norm": 1.754176402845564, "learning_rate": 1.976950117876319e-06, "loss": 0.3041, "step": 24040 }, { "epoch": 1.2819487753524692, "grad_norm": 1.6992314594222562, "learning_rate": 1.9761143970920463e-06, "loss": 0.2985, "step": 24050 }, { "epoch": 1.2824818101862956, "grad_norm": 1.6114792478961888, "learning_rate": 1.975278511925244e-06, "loss": 0.3048, "step": 24060 }, { "epoch": 1.283014845020122, "grad_norm": 1.6771823817485154, "learning_rate": 1.974442462664509e-06, "loss": 0.3117, "step": 24070 }, { "epoch": 1.2835478798539484, "grad_norm": 1.5929546704872253, "learning_rate": 1.9736062495984937e-06, "loss": 0.3166, "step": 24080 }, { "epoch": 1.2840809146877747, "grad_norm": 1.6926011350693226, "learning_rate": 1.972769873015908e-06, "loss": 0.3058, "step": 24090 }, { "epoch": 1.2846139495216011, "grad_norm": 1.7662870946208142, "learning_rate": 1.971933333205518e-06, "loss": 0.3088, "step": 24100 }, { "epoch": 1.2851469843554275, "grad_norm": 1.544872415656281, "learning_rate": 1.9710966304561466e-06, "loss": 0.3002, "step": 24110 }, { "epoch": 1.285680019189254, "grad_norm": 1.6154117522996114, "learning_rate": 1.970259765056672e-06, "loss": 0.3007, "step": 24120 }, { "epoch": 1.2862130540230803, "grad_norm": 1.6703854411528758, "learning_rate": 1.9694227372960295e-06, "loss": 0.3129, "step": 24130 }, { "epoch": 1.2867460888569067, "grad_norm": 1.6759575566336262, "learning_rate": 1.968585547463209e-06, "loss": 0.3036, "step": 24140 }, { "epoch": 1.287279123690733, "grad_norm": 1.8525776571802695, "learning_rate": 1.967748195847259e-06, "loss": 0.3073, "step": 24150 }, { "epoch": 1.2878121585245597, "grad_norm": 1.6265519772383061, "learning_rate": 1.966910682737281e-06, "loss": 0.2998, "step": 24160 }, { "epoch": 1.288345193358386, "grad_norm": 1.723349511020782, "learning_rate": 1.966073008422434e-06, "loss": 0.3075, "step": 24170 }, { "epoch": 1.2888782281922124, "grad_norm": 1.6553144898350696, "learning_rate": 1.965235173191932e-06, "loss": 0.2908, "step": 24180 }, { "epoch": 1.2894112630260388, "grad_norm": 2.010016923609899, "learning_rate": 1.9643971773350443e-06, "loss": 0.2989, "step": 24190 }, { "epoch": 1.2899442978598652, "grad_norm": 1.7007418823381912, "learning_rate": 1.9635590211410975e-06, "loss": 0.2976, "step": 24200 }, { "epoch": 1.2904773326936916, "grad_norm": 1.7832007763299094, "learning_rate": 1.9627207048994702e-06, "loss": 0.3128, "step": 24210 }, { "epoch": 1.291010367527518, "grad_norm": 1.7460412252130852, "learning_rate": 1.9618822288995995e-06, "loss": 0.3026, "step": 24220 }, { "epoch": 1.2915434023613444, "grad_norm": 1.694423923360052, "learning_rate": 1.9610435934309763e-06, "loss": 0.3078, "step": 24230 }, { "epoch": 1.2920764371951707, "grad_norm": 1.632519395205416, "learning_rate": 1.960204798783146e-06, "loss": 0.3082, "step": 24240 }, { "epoch": 1.2926094720289971, "grad_norm": 1.6988954222354853, "learning_rate": 1.9593658452457098e-06, "loss": 0.2975, "step": 24250 }, { "epoch": 1.2931425068628235, "grad_norm": 1.6967655423997856, "learning_rate": 1.9585267331083235e-06, "loss": 0.3028, "step": 24260 }, { "epoch": 1.2936755416966499, "grad_norm": 1.6389139785690472, "learning_rate": 1.9576874626606975e-06, "loss": 0.3064, "step": 24270 }, { "epoch": 1.2942085765304763, "grad_norm": 1.6194908734913336, "learning_rate": 1.9568480341925975e-06, "loss": 0.2883, "step": 24280 }, { "epoch": 1.2947416113643027, "grad_norm": 1.5632776829525976, "learning_rate": 1.9560084479938435e-06, "loss": 0.3022, "step": 24290 }, { "epoch": 1.295274646198129, "grad_norm": 1.768719916978527, "learning_rate": 1.9551687043543087e-06, "loss": 0.3145, "step": 24300 }, { "epoch": 1.2958076810319554, "grad_norm": 1.801955872522533, "learning_rate": 1.954328803563923e-06, "loss": 0.3031, "step": 24310 }, { "epoch": 1.2963407158657818, "grad_norm": 1.698453690277252, "learning_rate": 1.953488745912667e-06, "loss": 0.2984, "step": 24320 }, { "epoch": 1.2968737506996082, "grad_norm": 1.698893672874514, "learning_rate": 1.952648531690581e-06, "loss": 0.306, "step": 24330 }, { "epoch": 1.2974067855334346, "grad_norm": 1.7322497236870733, "learning_rate": 1.9518081611877537e-06, "loss": 0.303, "step": 24340 }, { "epoch": 1.297939820367261, "grad_norm": 1.6072320884166629, "learning_rate": 1.950967634694331e-06, "loss": 0.2952, "step": 24350 }, { "epoch": 1.2984728552010874, "grad_norm": 1.7462103423578497, "learning_rate": 1.950126952500511e-06, "loss": 0.3014, "step": 24360 }, { "epoch": 1.2990058900349137, "grad_norm": 1.6838721781792219, "learning_rate": 1.9492861148965475e-06, "loss": 0.3005, "step": 24370 }, { "epoch": 1.2995389248687401, "grad_norm": 1.646669854976831, "learning_rate": 1.9484451221727463e-06, "loss": 0.3117, "step": 24380 }, { "epoch": 1.3000719597025665, "grad_norm": 1.7060404482923701, "learning_rate": 1.947603974619467e-06, "loss": 0.3041, "step": 24390 }, { "epoch": 1.3006049945363929, "grad_norm": 1.7827910818425539, "learning_rate": 1.9467626725271233e-06, "loss": 0.3129, "step": 24400 }, { "epoch": 1.3011380293702193, "grad_norm": 1.6018056908764948, "learning_rate": 1.9459212161861814e-06, "loss": 0.2969, "step": 24410 }, { "epoch": 1.3016710642040457, "grad_norm": 1.8344774186788915, "learning_rate": 1.945079605887162e-06, "loss": 0.3038, "step": 24420 }, { "epoch": 1.302204099037872, "grad_norm": 1.5518862170990173, "learning_rate": 1.944237841920637e-06, "loss": 0.3019, "step": 24430 }, { "epoch": 1.3027371338716986, "grad_norm": 1.5844239031975051, "learning_rate": 1.943395924577234e-06, "loss": 0.2997, "step": 24440 }, { "epoch": 1.303270168705525, "grad_norm": 1.5187460673777748, "learning_rate": 1.942553854147631e-06, "loss": 0.3157, "step": 24450 }, { "epoch": 1.3038032035393514, "grad_norm": 1.54301777331229, "learning_rate": 1.9417116309225604e-06, "loss": 0.2973, "step": 24460 }, { "epoch": 1.3043362383731778, "grad_norm": 1.6361812824608672, "learning_rate": 1.940869255192807e-06, "loss": 0.3049, "step": 24470 }, { "epoch": 1.3048692732070042, "grad_norm": 1.6280012072784735, "learning_rate": 1.940026727249208e-06, "loss": 0.3095, "step": 24480 }, { "epoch": 1.3054023080408306, "grad_norm": 1.5670169298927417, "learning_rate": 1.9391840473826525e-06, "loss": 0.3184, "step": 24490 }, { "epoch": 1.305935342874657, "grad_norm": 1.7785725883376424, "learning_rate": 1.9383412158840845e-06, "loss": 0.2975, "step": 24500 }, { "epoch": 1.3064683777084833, "grad_norm": 1.708854521225467, "learning_rate": 1.937498233044498e-06, "loss": 0.2997, "step": 24510 }, { "epoch": 1.3070014125423097, "grad_norm": 1.600293951707941, "learning_rate": 1.936655099154939e-06, "loss": 0.308, "step": 24520 }, { "epoch": 1.307534447376136, "grad_norm": 1.595263837936558, "learning_rate": 1.935811814506508e-06, "loss": 0.2872, "step": 24530 }, { "epoch": 1.3080674822099625, "grad_norm": 1.7202152799755208, "learning_rate": 1.934968379390356e-06, "loss": 0.2963, "step": 24540 }, { "epoch": 1.3086005170437889, "grad_norm": 1.6362949253673207, "learning_rate": 1.934124794097685e-06, "loss": 0.3042, "step": 24550 }, { "epoch": 1.3091335518776153, "grad_norm": 1.5888924483958626, "learning_rate": 1.933281058919751e-06, "loss": 0.2988, "step": 24560 }, { "epoch": 1.3096665867114416, "grad_norm": 1.5366126403555402, "learning_rate": 1.9324371741478605e-06, "loss": 0.3003, "step": 24570 }, { "epoch": 1.310199621545268, "grad_norm": 1.6488126300560526, "learning_rate": 1.9315931400733716e-06, "loss": 0.2939, "step": 24580 }, { "epoch": 1.3107326563790944, "grad_norm": 1.9741920031563378, "learning_rate": 1.9307489569876948e-06, "loss": 0.3078, "step": 24590 }, { "epoch": 1.3112656912129208, "grad_norm": 1.8484048168636145, "learning_rate": 1.929904625182291e-06, "loss": 0.2986, "step": 24600 }, { "epoch": 1.3117987260467472, "grad_norm": 1.852771724086025, "learning_rate": 1.9290601449486724e-06, "loss": 0.3016, "step": 24610 }, { "epoch": 1.3123317608805736, "grad_norm": 1.725630833258073, "learning_rate": 1.928215516578404e-06, "loss": 0.2998, "step": 24620 }, { "epoch": 1.3128647957144, "grad_norm": 1.7745457828630256, "learning_rate": 1.9273707403631007e-06, "loss": 0.3086, "step": 24630 }, { "epoch": 1.3133978305482263, "grad_norm": 1.8030306995194472, "learning_rate": 1.926525816594428e-06, "loss": 0.3077, "step": 24640 }, { "epoch": 1.3139308653820527, "grad_norm": 1.6079744676091676, "learning_rate": 1.9256807455641036e-06, "loss": 0.3028, "step": 24650 }, { "epoch": 1.314463900215879, "grad_norm": 1.6697650753363078, "learning_rate": 1.924835527563895e-06, "loss": 0.3054, "step": 24660 }, { "epoch": 1.3149969350497055, "grad_norm": 1.6758335021339845, "learning_rate": 1.923990162885621e-06, "loss": 0.3068, "step": 24670 }, { "epoch": 1.3155299698835319, "grad_norm": 2.053399679183637, "learning_rate": 1.923144651821151e-06, "loss": 0.3042, "step": 24680 }, { "epoch": 1.3160630047173583, "grad_norm": 1.712923362383969, "learning_rate": 1.9222989946624053e-06, "loss": 0.3068, "step": 24690 }, { "epoch": 1.3165960395511846, "grad_norm": 1.6096277019474694, "learning_rate": 1.9214531917013537e-06, "loss": 0.2952, "step": 24700 }, { "epoch": 1.317129074385011, "grad_norm": 1.7770554268339862, "learning_rate": 1.920607243230017e-06, "loss": 0.2926, "step": 24710 }, { "epoch": 1.3176621092188374, "grad_norm": 1.71902939999981, "learning_rate": 1.9197611495404657e-06, "loss": 0.2882, "step": 24720 }, { "epoch": 1.3181951440526638, "grad_norm": 1.9011281934636743, "learning_rate": 1.918914910924821e-06, "loss": 0.2987, "step": 24730 }, { "epoch": 1.3187281788864902, "grad_norm": 1.8191261316065641, "learning_rate": 1.9180685276752544e-06, "loss": 0.294, "step": 24740 }, { "epoch": 1.3192612137203166, "grad_norm": 1.672957086841675, "learning_rate": 1.917222000083986e-06, "loss": 0.3103, "step": 24750 }, { "epoch": 1.319794248554143, "grad_norm": 1.659211577025086, "learning_rate": 1.9163753284432878e-06, "loss": 0.3045, "step": 24760 }, { "epoch": 1.3203272833879693, "grad_norm": 1.6602295046304174, "learning_rate": 1.9155285130454794e-06, "loss": 0.2941, "step": 24770 }, { "epoch": 1.3208603182217957, "grad_norm": 1.6376469812095842, "learning_rate": 1.914681554182931e-06, "loss": 0.2882, "step": 24780 }, { "epoch": 1.321393353055622, "grad_norm": 1.6196913179364119, "learning_rate": 1.913834452148063e-06, "loss": 0.3026, "step": 24790 }, { "epoch": 1.3219263878894485, "grad_norm": 1.6322955649802322, "learning_rate": 1.912987207233344e-06, "loss": 0.2953, "step": 24800 }, { "epoch": 1.3224594227232749, "grad_norm": 1.6843421006635075, "learning_rate": 1.912139819731293e-06, "loss": 0.3038, "step": 24810 }, { "epoch": 1.3229924575571013, "grad_norm": 1.6885799190564523, "learning_rate": 1.9112922899344765e-06, "loss": 0.2802, "step": 24820 }, { "epoch": 1.3235254923909276, "grad_norm": 1.693764267732668, "learning_rate": 1.9104446181355132e-06, "loss": 0.3098, "step": 24830 }, { "epoch": 1.324058527224754, "grad_norm": 1.786059441085882, "learning_rate": 1.9095968046270675e-06, "loss": 0.3038, "step": 24840 }, { "epoch": 1.3245915620585804, "grad_norm": 1.4806942027984702, "learning_rate": 1.9087488497018554e-06, "loss": 0.3007, "step": 24850 }, { "epoch": 1.3251245968924068, "grad_norm": 1.8650841428094405, "learning_rate": 1.9079007536526385e-06, "loss": 0.3197, "step": 24860 }, { "epoch": 1.3256576317262332, "grad_norm": 1.698426286117286, "learning_rate": 1.9070525167722315e-06, "loss": 0.2976, "step": 24870 }, { "epoch": 1.3261906665600596, "grad_norm": 1.8221646509067384, "learning_rate": 1.9062041393534939e-06, "loss": 0.2949, "step": 24880 }, { "epoch": 1.326723701393886, "grad_norm": 1.6906448973418389, "learning_rate": 1.905355621689336e-06, "loss": 0.3022, "step": 24890 }, { "epoch": 1.3272567362277126, "grad_norm": 1.7774432607120814, "learning_rate": 1.9045069640727152e-06, "loss": 0.2932, "step": 24900 }, { "epoch": 1.327789771061539, "grad_norm": 1.6379111782146818, "learning_rate": 1.9036581667966376e-06, "loss": 0.2892, "step": 24910 }, { "epoch": 1.3283228058953653, "grad_norm": 1.9302657990680174, "learning_rate": 1.902809230154158e-06, "loss": 0.2985, "step": 24920 }, { "epoch": 1.3288558407291917, "grad_norm": 1.7767253763951694, "learning_rate": 1.9019601544383792e-06, "loss": 0.2938, "step": 24930 }, { "epoch": 1.329388875563018, "grad_norm": 1.7220449827622353, "learning_rate": 1.901110939942451e-06, "loss": 0.3018, "step": 24940 }, { "epoch": 1.3299219103968445, "grad_norm": 1.7537968927336551, "learning_rate": 1.9002615869595727e-06, "loss": 0.315, "step": 24950 }, { "epoch": 1.3304549452306709, "grad_norm": 1.6801148746324377, "learning_rate": 1.8994120957829902e-06, "loss": 0.3029, "step": 24960 }, { "epoch": 1.3309879800644973, "grad_norm": 1.5738669557039318, "learning_rate": 1.898562466705998e-06, "loss": 0.298, "step": 24970 }, { "epoch": 1.3315210148983236, "grad_norm": 1.527901098261024, "learning_rate": 1.897712700021937e-06, "loss": 0.3003, "step": 24980 }, { "epoch": 1.33205404973215, "grad_norm": 1.8627992857368125, "learning_rate": 1.8968627960241965e-06, "loss": 0.2937, "step": 24990 }, { "epoch": 1.3325870845659764, "grad_norm": 1.5427152804467288, "learning_rate": 1.8960127550062133e-06, "loss": 0.3114, "step": 25000 }, { "epoch": 1.3331201193998028, "grad_norm": 1.8199112210337107, "learning_rate": 1.8951625772614712e-06, "loss": 0.2893, "step": 25010 }, { "epoch": 1.3336531542336292, "grad_norm": 1.711954876820095, "learning_rate": 1.894312263083502e-06, "loss": 0.3002, "step": 25020 }, { "epoch": 1.3341861890674556, "grad_norm": 1.5829722363345358, "learning_rate": 1.8934618127658822e-06, "loss": 0.2878, "step": 25030 }, { "epoch": 1.334719223901282, "grad_norm": 1.6146549782155446, "learning_rate": 1.8926112266022383e-06, "loss": 0.2905, "step": 25040 }, { "epoch": 1.3352522587351083, "grad_norm": 1.7308783549678697, "learning_rate": 1.891760504886242e-06, "loss": 0.2931, "step": 25050 }, { "epoch": 1.3357852935689347, "grad_norm": 1.7538149980414772, "learning_rate": 1.8909096479116126e-06, "loss": 0.2987, "step": 25060 }, { "epoch": 1.336318328402761, "grad_norm": 1.7923175460136251, "learning_rate": 1.8900586559721145e-06, "loss": 0.3036, "step": 25070 }, { "epoch": 1.3368513632365875, "grad_norm": 1.6176174897012252, "learning_rate": 1.889207529361561e-06, "loss": 0.2941, "step": 25080 }, { "epoch": 1.3373843980704139, "grad_norm": 1.7268935325575119, "learning_rate": 1.8883562683738106e-06, "loss": 0.2987, "step": 25090 }, { "epoch": 1.3379174329042403, "grad_norm": 1.6814566738927406, "learning_rate": 1.8875048733027682e-06, "loss": 0.2935, "step": 25100 }, { "epoch": 1.3384504677380666, "grad_norm": 1.6084892383698028, "learning_rate": 1.8866533444423852e-06, "loss": 0.2952, "step": 25110 }, { "epoch": 1.338983502571893, "grad_norm": 1.5047901580876593, "learning_rate": 1.8858016820866589e-06, "loss": 0.2971, "step": 25120 }, { "epoch": 1.3395165374057194, "grad_norm": 1.6268242355973896, "learning_rate": 1.8849498865296328e-06, "loss": 0.2988, "step": 25130 }, { "epoch": 1.3400495722395458, "grad_norm": 1.7294076091299029, "learning_rate": 1.8840979580653976e-06, "loss": 0.2984, "step": 25140 }, { "epoch": 1.3405826070733722, "grad_norm": 1.7472393140295948, "learning_rate": 1.883245896988088e-06, "loss": 0.3122, "step": 25150 }, { "epoch": 1.3411156419071986, "grad_norm": 1.837804481892729, "learning_rate": 1.882393703591885e-06, "loss": 0.3048, "step": 25160 }, { "epoch": 1.341648676741025, "grad_norm": 1.7821030265278661, "learning_rate": 1.8815413781710162e-06, "loss": 0.2914, "step": 25170 }, { "epoch": 1.3421817115748516, "grad_norm": 1.6991749907385412, "learning_rate": 1.8806889210197539e-06, "loss": 0.3018, "step": 25180 }, { "epoch": 1.342714746408678, "grad_norm": 1.5966895156481757, "learning_rate": 1.8798363324324165e-06, "loss": 0.3024, "step": 25190 }, { "epoch": 1.3432477812425043, "grad_norm": 1.4917889160441473, "learning_rate": 1.878983612703367e-06, "loss": 0.2943, "step": 25200 }, { "epoch": 1.3437808160763307, "grad_norm": 1.6929870555282298, "learning_rate": 1.8781307621270141e-06, "loss": 0.2979, "step": 25210 }, { "epoch": 1.344313850910157, "grad_norm": 1.7982102288359176, "learning_rate": 1.877277780997812e-06, "loss": 0.2994, "step": 25220 }, { "epoch": 1.3448468857439835, "grad_norm": 1.6044560556604885, "learning_rate": 1.8764246696102599e-06, "loss": 0.2948, "step": 25230 }, { "epoch": 1.3453799205778099, "grad_norm": 1.7623280762646298, "learning_rate": 1.875571428258901e-06, "loss": 0.3108, "step": 25240 }, { "epoch": 1.3459129554116362, "grad_norm": 1.8527228921505159, "learning_rate": 1.874718057238324e-06, "loss": 0.3151, "step": 25250 }, { "epoch": 1.3464459902454626, "grad_norm": 1.6735918364759137, "learning_rate": 1.8738645568431634e-06, "loss": 0.3002, "step": 25260 }, { "epoch": 1.346979025079289, "grad_norm": 1.563025738268665, "learning_rate": 1.8730109273680973e-06, "loss": 0.3072, "step": 25270 }, { "epoch": 1.3475120599131154, "grad_norm": 1.6396888735271204, "learning_rate": 1.8721571691078476e-06, "loss": 0.2936, "step": 25280 }, { "epoch": 1.3480450947469418, "grad_norm": 1.6498885530892358, "learning_rate": 1.8713032823571826e-06, "loss": 0.2965, "step": 25290 }, { "epoch": 1.3485781295807682, "grad_norm": 1.7885364385884202, "learning_rate": 1.870449267410913e-06, "loss": 0.2996, "step": 25300 }, { "epoch": 1.3491111644145946, "grad_norm": 1.724548081568859, "learning_rate": 1.869595124563896e-06, "loss": 0.2999, "step": 25310 }, { "epoch": 1.349644199248421, "grad_norm": 1.653491280163915, "learning_rate": 1.8687408541110303e-06, "loss": 0.2952, "step": 25320 }, { "epoch": 1.3501772340822473, "grad_norm": 1.8524470987183679, "learning_rate": 1.8678864563472606e-06, "loss": 0.2931, "step": 25330 }, { "epoch": 1.3507102689160737, "grad_norm": 1.7025876542375056, "learning_rate": 1.867031931567575e-06, "loss": 0.2834, "step": 25340 }, { "epoch": 1.3512433037499, "grad_norm": 1.591168911517188, "learning_rate": 1.8661772800670054e-06, "loss": 0.2844, "step": 25350 }, { "epoch": 1.3517763385837265, "grad_norm": 1.5023369096934813, "learning_rate": 1.8653225021406272e-06, "loss": 0.3004, "step": 25360 }, { "epoch": 1.3523093734175529, "grad_norm": 1.7902580587599353, "learning_rate": 1.8644675980835603e-06, "loss": 0.2903, "step": 25370 }, { "epoch": 1.3528424082513792, "grad_norm": 1.7956207687859305, "learning_rate": 1.8636125681909669e-06, "loss": 0.2983, "step": 25380 }, { "epoch": 1.3533754430852056, "grad_norm": 1.8986260767342624, "learning_rate": 1.8627574127580535e-06, "loss": 0.2963, "step": 25390 }, { "epoch": 1.353908477919032, "grad_norm": 1.8732280093213718, "learning_rate": 1.8619021320800703e-06, "loss": 0.299, "step": 25400 }, { "epoch": 1.3544415127528584, "grad_norm": 1.6210959314138322, "learning_rate": 1.86104672645231e-06, "loss": 0.3052, "step": 25410 }, { "epoch": 1.3549745475866848, "grad_norm": 1.6340301583357382, "learning_rate": 1.860191196170108e-06, "loss": 0.3024, "step": 25420 }, { "epoch": 1.3555075824205112, "grad_norm": 1.6351253586406966, "learning_rate": 1.8593355415288437e-06, "loss": 0.2914, "step": 25430 }, { "epoch": 1.3560406172543376, "grad_norm": 1.6417334726401889, "learning_rate": 1.8584797628239394e-06, "loss": 0.3036, "step": 25440 }, { "epoch": 1.356573652088164, "grad_norm": 1.7706793869060462, "learning_rate": 1.8576238603508596e-06, "loss": 0.3044, "step": 25450 }, { "epoch": 1.3571066869219903, "grad_norm": 1.702811056499645, "learning_rate": 1.856767834405112e-06, "loss": 0.2985, "step": 25460 }, { "epoch": 1.3576397217558167, "grad_norm": 1.5455985609970666, "learning_rate": 1.855911685282247e-06, "loss": 0.2955, "step": 25470 }, { "epoch": 1.358172756589643, "grad_norm": 1.713875855767372, "learning_rate": 1.8550554132778573e-06, "loss": 0.2993, "step": 25480 }, { "epoch": 1.3587057914234695, "grad_norm": 1.8676661657624682, "learning_rate": 1.854199018687578e-06, "loss": 0.3039, "step": 25490 }, { "epoch": 1.3592388262572959, "grad_norm": 1.9242276623718506, "learning_rate": 1.8533425018070863e-06, "loss": 0.3137, "step": 25500 }, { "epoch": 1.3597718610911222, "grad_norm": 1.5109209459416384, "learning_rate": 1.8524858629321022e-06, "loss": 0.292, "step": 25510 }, { "epoch": 1.3603048959249486, "grad_norm": 1.7736688794176796, "learning_rate": 1.8516291023583874e-06, "loss": 0.3006, "step": 25520 }, { "epoch": 1.360837930758775, "grad_norm": 1.5974972156106002, "learning_rate": 1.8507722203817463e-06, "loss": 0.2926, "step": 25530 }, { "epoch": 1.3613709655926014, "grad_norm": 1.709460510599122, "learning_rate": 1.8499152172980238e-06, "loss": 0.3078, "step": 25540 }, { "epoch": 1.3619040004264278, "grad_norm": 1.6988453871414007, "learning_rate": 1.8490580934031076e-06, "loss": 0.2943, "step": 25550 }, { "epoch": 1.3624370352602542, "grad_norm": 1.6459710862486738, "learning_rate": 1.8482008489929276e-06, "loss": 0.2978, "step": 25560 }, { "epoch": 1.3629700700940806, "grad_norm": 1.9608193129331608, "learning_rate": 1.8473434843634545e-06, "loss": 0.3049, "step": 25570 }, { "epoch": 1.363503104927907, "grad_norm": 1.5874307082002002, "learning_rate": 1.8464859998107007e-06, "loss": 0.307, "step": 25580 }, { "epoch": 1.3640361397617333, "grad_norm": 1.6431515878646898, "learning_rate": 1.8456283956307193e-06, "loss": 0.3006, "step": 25590 }, { "epoch": 1.3645691745955597, "grad_norm": 1.7186107937097803, "learning_rate": 1.8447706721196062e-06, "loss": 0.3008, "step": 25600 }, { "epoch": 1.365102209429386, "grad_norm": 1.564724932901496, "learning_rate": 1.8439128295734973e-06, "loss": 0.2938, "step": 25610 }, { "epoch": 1.3656352442632125, "grad_norm": 1.7183545956361774, "learning_rate": 1.8430548682885707e-06, "loss": 0.2904, "step": 25620 }, { "epoch": 1.3661682790970389, "grad_norm": 1.7157679817068054, "learning_rate": 1.8421967885610438e-06, "loss": 0.2816, "step": 25630 }, { "epoch": 1.3667013139308652, "grad_norm": 1.7082403829283734, "learning_rate": 1.8413385906871765e-06, "loss": 0.2883, "step": 25640 }, { "epoch": 1.3672343487646919, "grad_norm": 1.828671899313489, "learning_rate": 1.840480274963269e-06, "loss": 0.3079, "step": 25650 }, { "epoch": 1.3677673835985182, "grad_norm": 1.6838995679223723, "learning_rate": 1.8396218416856618e-06, "loss": 0.2857, "step": 25660 }, { "epoch": 1.3683004184323446, "grad_norm": 1.5821568461220046, "learning_rate": 1.838763291150736e-06, "loss": 0.3033, "step": 25670 }, { "epoch": 1.368833453266171, "grad_norm": 1.6990131419897607, "learning_rate": 1.8379046236549142e-06, "loss": 0.2913, "step": 25680 }, { "epoch": 1.3693664880999974, "grad_norm": 1.4897284878011985, "learning_rate": 1.8370458394946576e-06, "loss": 0.296, "step": 25690 }, { "epoch": 1.3698995229338238, "grad_norm": 1.5887160957408233, "learning_rate": 1.83618693896647e-06, "loss": 0.3061, "step": 25700 }, { "epoch": 1.3704325577676502, "grad_norm": 1.7836494945522463, "learning_rate": 1.835327922366893e-06, "loss": 0.2931, "step": 25710 }, { "epoch": 1.3709655926014765, "grad_norm": 1.7793882903729614, "learning_rate": 1.8344687899925097e-06, "loss": 0.3023, "step": 25720 }, { "epoch": 1.371498627435303, "grad_norm": 1.7685174231779794, "learning_rate": 1.8336095421399428e-06, "loss": 0.2964, "step": 25730 }, { "epoch": 1.3720316622691293, "grad_norm": 1.5550968773752014, "learning_rate": 1.8327501791058555e-06, "loss": 0.3004, "step": 25740 }, { "epoch": 1.3725646971029557, "grad_norm": 1.9417001033961754, "learning_rate": 1.8318907011869493e-06, "loss": 0.2946, "step": 25750 }, { "epoch": 1.373097731936782, "grad_norm": 1.565186024570825, "learning_rate": 1.8310311086799662e-06, "loss": 0.3029, "step": 25760 }, { "epoch": 1.3736307667706085, "grad_norm": 1.4711919777433935, "learning_rate": 1.8301714018816884e-06, "loss": 0.2989, "step": 25770 }, { "epoch": 1.3741638016044349, "grad_norm": 1.716137265923244, "learning_rate": 1.8293115810889365e-06, "loss": 0.2977, "step": 25780 }, { "epoch": 1.3746968364382612, "grad_norm": 1.7048689188613573, "learning_rate": 1.828451646598572e-06, "loss": 0.3051, "step": 25790 }, { "epoch": 1.3752298712720876, "grad_norm": 1.7180382734650073, "learning_rate": 1.8275915987074928e-06, "loss": 0.297, "step": 25800 }, { "epoch": 1.375762906105914, "grad_norm": 1.8728526945565789, "learning_rate": 1.8267314377126391e-06, "loss": 0.3052, "step": 25810 }, { "epoch": 1.3762959409397404, "grad_norm": 1.6302356608129749, "learning_rate": 1.8258711639109882e-06, "loss": 0.2937, "step": 25820 }, { "epoch": 1.3768289757735668, "grad_norm": 1.5227128346833096, "learning_rate": 1.825010777599557e-06, "loss": 0.2911, "step": 25830 }, { "epoch": 1.3773620106073932, "grad_norm": 1.9115662639842952, "learning_rate": 1.8241502790754012e-06, "loss": 0.306, "step": 25840 }, { "epoch": 1.3778950454412195, "grad_norm": 1.6411381274129313, "learning_rate": 1.823289668635615e-06, "loss": 0.3059, "step": 25850 }, { "epoch": 1.378428080275046, "grad_norm": 1.8273017090723045, "learning_rate": 1.8224289465773316e-06, "loss": 0.3151, "step": 25860 }, { "epoch": 1.3789611151088723, "grad_norm": 1.5706508452865011, "learning_rate": 1.821568113197723e-06, "loss": 0.2968, "step": 25870 }, { "epoch": 1.3794941499426987, "grad_norm": 1.8457855322102203, "learning_rate": 1.8207071687939989e-06, "loss": 0.304, "step": 25880 }, { "epoch": 1.380027184776525, "grad_norm": 1.6757696964366564, "learning_rate": 1.819846113663407e-06, "loss": 0.2978, "step": 25890 }, { "epoch": 1.3805602196103515, "grad_norm": 1.615616358562519, "learning_rate": 1.8189849481032346e-06, "loss": 0.3018, "step": 25900 }, { "epoch": 1.3810932544441779, "grad_norm": 1.7307368199348938, "learning_rate": 1.8181236724108069e-06, "loss": 0.2963, "step": 25910 }, { "epoch": 1.3816262892780042, "grad_norm": 1.6240940529558976, "learning_rate": 1.8172622868834854e-06, "loss": 0.3034, "step": 25920 }, { "epoch": 1.3821593241118308, "grad_norm": 1.6558605846339667, "learning_rate": 1.8164007918186712e-06, "loss": 0.3024, "step": 25930 }, { "epoch": 1.3826923589456572, "grad_norm": 1.6375702676984762, "learning_rate": 1.8155391875138032e-06, "loss": 0.3091, "step": 25940 }, { "epoch": 1.3832253937794836, "grad_norm": 1.759002891873053, "learning_rate": 1.8146774742663573e-06, "loss": 0.2919, "step": 25950 }, { "epoch": 1.38375842861331, "grad_norm": 1.8305215960155221, "learning_rate": 1.8138156523738474e-06, "loss": 0.2926, "step": 25960 }, { "epoch": 1.3842914634471364, "grad_norm": 1.6636208365933962, "learning_rate": 1.8129537221338247e-06, "loss": 0.3015, "step": 25970 }, { "epoch": 1.3848244982809628, "grad_norm": 1.8181836112710485, "learning_rate": 1.8120916838438775e-06, "loss": 0.2938, "step": 25980 }, { "epoch": 1.3853575331147892, "grad_norm": 1.7678384597914825, "learning_rate": 1.8112295378016326e-06, "loss": 0.2881, "step": 25990 }, { "epoch": 1.3858905679486155, "grad_norm": 1.546534730954037, "learning_rate": 1.8103672843047533e-06, "loss": 0.2922, "step": 26000 }, { "epoch": 1.386423602782442, "grad_norm": 1.758140222906188, "learning_rate": 1.8095049236509389e-06, "loss": 0.293, "step": 26010 }, { "epoch": 1.3869566376162683, "grad_norm": 1.7588011087431463, "learning_rate": 1.8086424561379277e-06, "loss": 0.298, "step": 26020 }, { "epoch": 1.3874896724500947, "grad_norm": 1.683049047556465, "learning_rate": 1.8077798820634931e-06, "loss": 0.3055, "step": 26030 }, { "epoch": 1.388022707283921, "grad_norm": 1.673331396903691, "learning_rate": 1.806917201725447e-06, "loss": 0.3043, "step": 26040 }, { "epoch": 1.3885557421177475, "grad_norm": 1.8297760493562283, "learning_rate": 1.8060544154216366e-06, "loss": 0.2976, "step": 26050 }, { "epoch": 1.3890887769515738, "grad_norm": 1.8201694755914335, "learning_rate": 1.805191523449946e-06, "loss": 0.2968, "step": 26060 }, { "epoch": 1.3896218117854002, "grad_norm": 1.8655101143362887, "learning_rate": 1.804328526108296e-06, "loss": 0.3, "step": 26070 }, { "epoch": 1.3901548466192266, "grad_norm": 1.7353512284273505, "learning_rate": 1.8034654236946447e-06, "loss": 0.2967, "step": 26080 }, { "epoch": 1.390687881453053, "grad_norm": 1.55442410359212, "learning_rate": 1.8026022165069843e-06, "loss": 0.2971, "step": 26090 }, { "epoch": 1.3912209162868794, "grad_norm": 1.582479953305048, "learning_rate": 1.801738904843345e-06, "loss": 0.2967, "step": 26100 }, { "epoch": 1.3917539511207058, "grad_norm": 1.6166498323898875, "learning_rate": 1.800875489001792e-06, "loss": 0.2832, "step": 26110 }, { "epoch": 1.3922869859545322, "grad_norm": 1.7175353069476316, "learning_rate": 1.8000119692804274e-06, "loss": 0.3019, "step": 26120 }, { "epoch": 1.3928200207883585, "grad_norm": 1.6983601849676904, "learning_rate": 1.7991483459773888e-06, "loss": 0.2929, "step": 26130 }, { "epoch": 1.393353055622185, "grad_norm": 1.6477328045513475, "learning_rate": 1.798284619390849e-06, "loss": 0.2942, "step": 26140 }, { "epoch": 1.3938860904560113, "grad_norm": 1.8751332377723902, "learning_rate": 1.797420789819017e-06, "loss": 0.2973, "step": 26150 }, { "epoch": 1.3944191252898377, "grad_norm": 1.5831994320911136, "learning_rate": 1.7965568575601374e-06, "loss": 0.2845, "step": 26160 }, { "epoch": 1.394952160123664, "grad_norm": 1.6578406734879945, "learning_rate": 1.7956928229124902e-06, "loss": 0.3113, "step": 26170 }, { "epoch": 1.3954851949574905, "grad_norm": 1.5467336199552801, "learning_rate": 1.7948286861743908e-06, "loss": 0.3102, "step": 26180 }, { "epoch": 1.3960182297913168, "grad_norm": 2.019678522633885, "learning_rate": 1.7939644476441897e-06, "loss": 0.2953, "step": 26190 }, { "epoch": 1.3965512646251432, "grad_norm": 1.4828441186240586, "learning_rate": 1.7931001076202722e-06, "loss": 0.3006, "step": 26200 }, { "epoch": 1.3970842994589696, "grad_norm": 1.4708125917698387, "learning_rate": 1.792235666401059e-06, "loss": 0.2906, "step": 26210 }, { "epoch": 1.397617334292796, "grad_norm": 1.7290179357743, "learning_rate": 1.7913711242850065e-06, "loss": 0.3048, "step": 26220 }, { "epoch": 1.3981503691266224, "grad_norm": 1.6172558371873067, "learning_rate": 1.7905064815706043e-06, "loss": 0.2972, "step": 26230 }, { "epoch": 1.3986834039604488, "grad_norm": 1.558128305715451, "learning_rate": 1.7896417385563782e-06, "loss": 0.3, "step": 26240 }, { "epoch": 1.3992164387942752, "grad_norm": 1.6550457709388975, "learning_rate": 1.7887768955408876e-06, "loss": 0.2977, "step": 26250 }, { "epoch": 1.3997494736281015, "grad_norm": 1.7918889845440522, "learning_rate": 1.787911952822728e-06, "loss": 0.2961, "step": 26260 }, { "epoch": 1.400282508461928, "grad_norm": 1.4629794316944487, "learning_rate": 1.7870469107005264e-06, "loss": 0.3064, "step": 26270 }, { "epoch": 1.4008155432957543, "grad_norm": 1.8801017899915509, "learning_rate": 1.7861817694729472e-06, "loss": 0.3073, "step": 26280 }, { "epoch": 1.4013485781295807, "grad_norm": 1.6393017748426992, "learning_rate": 1.7853165294386877e-06, "loss": 0.3057, "step": 26290 }, { "epoch": 1.401881612963407, "grad_norm": 1.6372430845091208, "learning_rate": 1.7844511908964791e-06, "loss": 0.3062, "step": 26300 }, { "epoch": 1.4024146477972335, "grad_norm": 1.6935876510269459, "learning_rate": 1.7835857541450863e-06, "loss": 0.2954, "step": 26310 }, { "epoch": 1.4029476826310598, "grad_norm": 1.7678950660144939, "learning_rate": 1.7827202194833094e-06, "loss": 0.2951, "step": 26320 }, { "epoch": 1.4034807174648862, "grad_norm": 1.4629954158245355, "learning_rate": 1.7818545872099815e-06, "loss": 0.29, "step": 26330 }, { "epoch": 1.4040137522987126, "grad_norm": 2.078019597319422, "learning_rate": 1.7809888576239693e-06, "loss": 0.3012, "step": 26340 }, { "epoch": 1.404546787132539, "grad_norm": 1.7719334412054681, "learning_rate": 1.7801230310241734e-06, "loss": 0.2999, "step": 26350 }, { "epoch": 1.4050798219663654, "grad_norm": 1.7377464565240786, "learning_rate": 1.7792571077095275e-06, "loss": 0.2998, "step": 26360 }, { "epoch": 1.4056128568001918, "grad_norm": 1.6352342111484524, "learning_rate": 1.7783910879789991e-06, "loss": 0.2964, "step": 26370 }, { "epoch": 1.4061458916340182, "grad_norm": 1.697009670134385, "learning_rate": 1.777524972131589e-06, "loss": 0.2975, "step": 26380 }, { "epoch": 1.4066789264678445, "grad_norm": 1.6718912433255346, "learning_rate": 1.7766587604663312e-06, "loss": 0.293, "step": 26390 }, { "epoch": 1.4072119613016711, "grad_norm": 1.8140347491014321, "learning_rate": 1.7757924532822922e-06, "loss": 0.3056, "step": 26400 }, { "epoch": 1.4077449961354975, "grad_norm": 1.7031312617346268, "learning_rate": 1.774926050878572e-06, "loss": 0.2985, "step": 26410 }, { "epoch": 1.408278030969324, "grad_norm": 1.5502865665642145, "learning_rate": 1.7740595535543035e-06, "loss": 0.2983, "step": 26420 }, { "epoch": 1.4088110658031503, "grad_norm": 1.7930078600790027, "learning_rate": 1.7731929616086523e-06, "loss": 0.298, "step": 26430 }, { "epoch": 1.4093441006369767, "grad_norm": 1.7710400672071704, "learning_rate": 1.772326275340816e-06, "loss": 0.2904, "step": 26440 }, { "epoch": 1.409877135470803, "grad_norm": 1.623529556403256, "learning_rate": 1.7714594950500264e-06, "loss": 0.3017, "step": 26450 }, { "epoch": 1.4104101703046295, "grad_norm": 1.6345170122712465, "learning_rate": 1.7705926210355458e-06, "loss": 0.2994, "step": 26460 }, { "epoch": 1.4109432051384558, "grad_norm": 1.5084067854427958, "learning_rate": 1.7697256535966705e-06, "loss": 0.3034, "step": 26470 }, { "epoch": 1.4114762399722822, "grad_norm": 1.6569700893461776, "learning_rate": 1.7688585930327278e-06, "loss": 0.2941, "step": 26480 }, { "epoch": 1.4120092748061086, "grad_norm": 1.714763331108559, "learning_rate": 1.7679914396430778e-06, "loss": 0.3001, "step": 26490 }, { "epoch": 1.412542309639935, "grad_norm": 1.7836240709395328, "learning_rate": 1.767124193727113e-06, "loss": 0.2961, "step": 26500 }, { "epoch": 1.4130753444737614, "grad_norm": 1.7018376151859356, "learning_rate": 1.766256855584257e-06, "loss": 0.2918, "step": 26510 }, { "epoch": 1.4136083793075878, "grad_norm": 1.7181227659134677, "learning_rate": 1.765389425513966e-06, "loss": 0.3075, "step": 26520 }, { "epoch": 1.4141414141414141, "grad_norm": 1.7638829892478762, "learning_rate": 1.7645219038157272e-06, "loss": 0.2881, "step": 26530 }, { "epoch": 1.4146744489752405, "grad_norm": 1.7173535893014005, "learning_rate": 1.7636542907890596e-06, "loss": 0.2994, "step": 26540 }, { "epoch": 1.415207483809067, "grad_norm": 1.5617235448068514, "learning_rate": 1.7627865867335146e-06, "loss": 0.2937, "step": 26550 }, { "epoch": 1.4157405186428933, "grad_norm": 1.7695225608047267, "learning_rate": 1.7619187919486743e-06, "loss": 0.2961, "step": 26560 }, { "epoch": 1.4162735534767197, "grad_norm": 1.8172435753323006, "learning_rate": 1.7610509067341514e-06, "loss": 0.2989, "step": 26570 }, { "epoch": 1.416806588310546, "grad_norm": 1.6783220829773293, "learning_rate": 1.7601829313895917e-06, "loss": 0.2981, "step": 26580 }, { "epoch": 1.4173396231443725, "grad_norm": 1.6915333985115013, "learning_rate": 1.75931486621467e-06, "loss": 0.298, "step": 26590 }, { "epoch": 1.4178726579781988, "grad_norm": 1.865018561949842, "learning_rate": 1.7584467115090942e-06, "loss": 0.2988, "step": 26600 }, { "epoch": 1.4184056928120252, "grad_norm": 1.8309259886049176, "learning_rate": 1.7575784675726015e-06, "loss": 0.2965, "step": 26610 }, { "epoch": 1.4189387276458516, "grad_norm": 1.6991754119084301, "learning_rate": 1.7567101347049603e-06, "loss": 0.2918, "step": 26620 }, { "epoch": 1.419471762479678, "grad_norm": 1.678854945907068, "learning_rate": 1.7558417132059702e-06, "loss": 0.2785, "step": 26630 }, { "epoch": 1.4200047973135044, "grad_norm": 1.7536621029688282, "learning_rate": 1.7549732033754614e-06, "loss": 0.3021, "step": 26640 }, { "epoch": 1.4205378321473308, "grad_norm": 1.9258702576188684, "learning_rate": 1.7541046055132934e-06, "loss": 0.2978, "step": 26650 }, { "epoch": 1.4210708669811571, "grad_norm": 1.7228957156370122, "learning_rate": 1.7532359199193575e-06, "loss": 0.3033, "step": 26660 }, { "epoch": 1.4216039018149835, "grad_norm": 1.585862859633546, "learning_rate": 1.7523671468935746e-06, "loss": 0.2949, "step": 26670 }, { "epoch": 1.4221369366488101, "grad_norm": 1.5090748696304355, "learning_rate": 1.751498286735896e-06, "loss": 0.299, "step": 26680 }, { "epoch": 1.4226699714826365, "grad_norm": 1.7709645874493618, "learning_rate": 1.7506293397463036e-06, "loss": 0.2898, "step": 26690 }, { "epoch": 1.423203006316463, "grad_norm": 1.8129163334222032, "learning_rate": 1.7497603062248078e-06, "loss": 0.2896, "step": 26700 }, { "epoch": 1.4237360411502893, "grad_norm": 1.7912628299896205, "learning_rate": 1.7488911864714504e-06, "loss": 0.3034, "step": 26710 }, { "epoch": 1.4242690759841157, "grad_norm": 1.6228269461486757, "learning_rate": 1.7480219807863019e-06, "loss": 0.3063, "step": 26720 }, { "epoch": 1.424802110817942, "grad_norm": 1.7485948212265539, "learning_rate": 1.7471526894694635e-06, "loss": 0.304, "step": 26730 }, { "epoch": 1.4253351456517684, "grad_norm": 1.7824154268628587, "learning_rate": 1.7462833128210645e-06, "loss": 0.2987, "step": 26740 }, { "epoch": 1.4258681804855948, "grad_norm": 1.8384270223840056, "learning_rate": 1.7454138511412656e-06, "loss": 0.3085, "step": 26750 }, { "epoch": 1.4264012153194212, "grad_norm": 1.715369316429233, "learning_rate": 1.744544304730255e-06, "loss": 0.2974, "step": 26760 }, { "epoch": 1.4269342501532476, "grad_norm": 1.8797341325758168, "learning_rate": 1.7436746738882517e-06, "loss": 0.3044, "step": 26770 }, { "epoch": 1.427467284987074, "grad_norm": 1.8423086639407293, "learning_rate": 1.742804958915503e-06, "loss": 0.2932, "step": 26780 }, { "epoch": 1.4280003198209004, "grad_norm": 1.605966035729234, "learning_rate": 1.7419351601122844e-06, "loss": 0.3009, "step": 26790 }, { "epoch": 1.4285333546547268, "grad_norm": 1.6855910361001112, "learning_rate": 1.7410652777789017e-06, "loss": 0.3024, "step": 26800 }, { "epoch": 1.4290663894885531, "grad_norm": 1.6547056730254632, "learning_rate": 1.7401953122156902e-06, "loss": 0.3004, "step": 26810 }, { "epoch": 1.4295994243223795, "grad_norm": 1.5932137378981208, "learning_rate": 1.739325263723012e-06, "loss": 0.2963, "step": 26820 }, { "epoch": 1.430132459156206, "grad_norm": 1.8332058492007164, "learning_rate": 1.7384551326012583e-06, "loss": 0.2997, "step": 26830 }, { "epoch": 1.4306654939900323, "grad_norm": 1.6335589615625263, "learning_rate": 1.7375849191508501e-06, "loss": 0.2997, "step": 26840 }, { "epoch": 1.4311985288238587, "grad_norm": 1.9018798996551556, "learning_rate": 1.7367146236722354e-06, "loss": 0.2991, "step": 26850 }, { "epoch": 1.431731563657685, "grad_norm": 1.830783303735447, "learning_rate": 1.7358442464658916e-06, "loss": 0.308, "step": 26860 }, { "epoch": 1.4322645984915114, "grad_norm": 1.8585369463117338, "learning_rate": 1.7349737878323234e-06, "loss": 0.3045, "step": 26870 }, { "epoch": 1.4327976333253378, "grad_norm": 1.674828166380364, "learning_rate": 1.7341032480720638e-06, "loss": 0.3041, "step": 26880 }, { "epoch": 1.4333306681591642, "grad_norm": 1.7175904573254501, "learning_rate": 1.733232627485675e-06, "loss": 0.2971, "step": 26890 }, { "epoch": 1.4338637029929906, "grad_norm": 1.681304584144117, "learning_rate": 1.7323619263737447e-06, "loss": 0.2928, "step": 26900 }, { "epoch": 1.434396737826817, "grad_norm": 1.9145423118374085, "learning_rate": 1.7314911450368915e-06, "loss": 0.2955, "step": 26910 }, { "epoch": 1.4349297726606434, "grad_norm": 1.411654315384082, "learning_rate": 1.7306202837757587e-06, "loss": 0.3025, "step": 26920 }, { "epoch": 1.4354628074944698, "grad_norm": 1.9406175068060867, "learning_rate": 1.729749342891019e-06, "loss": 0.3054, "step": 26930 }, { "epoch": 1.4359958423282961, "grad_norm": 1.8086314299800497, "learning_rate": 1.728878322683372e-06, "loss": 0.2978, "step": 26940 }, { "epoch": 1.4365288771621225, "grad_norm": 1.5945339211070315, "learning_rate": 1.7280072234535455e-06, "loss": 0.2967, "step": 26950 }, { "epoch": 1.437061911995949, "grad_norm": 1.7239929839957984, "learning_rate": 1.7271360455022926e-06, "loss": 0.2917, "step": 26960 }, { "epoch": 1.4375949468297753, "grad_norm": 1.7233501885608034, "learning_rate": 1.7262647891303958e-06, "loss": 0.3094, "step": 26970 }, { "epoch": 1.4381279816636017, "grad_norm": 1.9588769534508925, "learning_rate": 1.7253934546386637e-06, "loss": 0.3015, "step": 26980 }, { "epoch": 1.438661016497428, "grad_norm": 1.6982989302274034, "learning_rate": 1.7245220423279319e-06, "loss": 0.2858, "step": 26990 }, { "epoch": 1.4391940513312544, "grad_norm": 1.6486048024521598, "learning_rate": 1.7236505524990622e-06, "loss": 0.29, "step": 27000 }, { "epoch": 1.4397270861650808, "grad_norm": 1.6755161868593285, "learning_rate": 1.7227789854529447e-06, "loss": 0.3044, "step": 27010 }, { "epoch": 1.4402601209989072, "grad_norm": 1.718201720778143, "learning_rate": 1.7219073414904948e-06, "loss": 0.313, "step": 27020 }, { "epoch": 1.4407931558327336, "grad_norm": 1.737733157286925, "learning_rate": 1.7210356209126555e-06, "loss": 0.2938, "step": 27030 }, { "epoch": 1.44132619066656, "grad_norm": 1.9469823233023167, "learning_rate": 1.7201638240203952e-06, "loss": 0.2948, "step": 27040 }, { "epoch": 1.4418592255003864, "grad_norm": 1.6523027607812257, "learning_rate": 1.719291951114709e-06, "loss": 0.293, "step": 27050 }, { "epoch": 1.4423922603342128, "grad_norm": 1.9657408010892838, "learning_rate": 1.718420002496619e-06, "loss": 0.2869, "step": 27060 }, { "epoch": 1.4429252951680391, "grad_norm": 1.7535294375076989, "learning_rate": 1.7175479784671726e-06, "loss": 0.2973, "step": 27070 }, { "epoch": 1.4434583300018655, "grad_norm": 1.567368036356119, "learning_rate": 1.7166758793274435e-06, "loss": 0.2899, "step": 27080 }, { "epoch": 1.443991364835692, "grad_norm": 1.79695931807065, "learning_rate": 1.7158037053785313e-06, "loss": 0.3046, "step": 27090 }, { "epoch": 1.4445243996695183, "grad_norm": 1.5860986291818169, "learning_rate": 1.7149314569215609e-06, "loss": 0.2907, "step": 27100 }, { "epoch": 1.4450574345033447, "grad_norm": 1.833465580118085, "learning_rate": 1.7140591342576844e-06, "loss": 0.2952, "step": 27110 }, { "epoch": 1.445590469337171, "grad_norm": 1.583922563925103, "learning_rate": 1.7131867376880786e-06, "loss": 0.2915, "step": 27120 }, { "epoch": 1.4461235041709974, "grad_norm": 1.6714845641154914, "learning_rate": 1.7123142675139445e-06, "loss": 0.2991, "step": 27130 }, { "epoch": 1.4466565390048238, "grad_norm": 1.7227014574038273, "learning_rate": 1.7114417240365111e-06, "loss": 0.2837, "step": 27140 }, { "epoch": 1.4471895738386504, "grad_norm": 1.662503588245676, "learning_rate": 1.7105691075570311e-06, "loss": 0.3012, "step": 27150 }, { "epoch": 1.4477226086724768, "grad_norm": 1.7030517097273221, "learning_rate": 1.709696418376783e-06, "loss": 0.3009, "step": 27160 }, { "epoch": 1.4482556435063032, "grad_norm": 1.5937351659627172, "learning_rate": 1.7088236567970695e-06, "loss": 0.2923, "step": 27170 }, { "epoch": 1.4487886783401296, "grad_norm": 1.7278318252163622, "learning_rate": 1.7079508231192188e-06, "loss": 0.3085, "step": 27180 }, { "epoch": 1.449321713173956, "grad_norm": 1.6368988557855801, "learning_rate": 1.707077917644585e-06, "loss": 0.2912, "step": 27190 }, { "epoch": 1.4498547480077824, "grad_norm": 1.6791057003365149, "learning_rate": 1.7062049406745461e-06, "loss": 0.2871, "step": 27200 }, { "epoch": 1.4503877828416087, "grad_norm": 1.7093953675074673, "learning_rate": 1.7053318925105039e-06, "loss": 0.3, "step": 27210 }, { "epoch": 1.4509208176754351, "grad_norm": 1.8096692528204446, "learning_rate": 1.704458773453886e-06, "loss": 0.2955, "step": 27220 }, { "epoch": 1.4514538525092615, "grad_norm": 1.6433602154068048, "learning_rate": 1.7035855838061443e-06, "loss": 0.2983, "step": 27230 }, { "epoch": 1.451986887343088, "grad_norm": 1.9044622947576912, "learning_rate": 1.7027123238687548e-06, "loss": 0.288, "step": 27240 }, { "epoch": 1.4525199221769143, "grad_norm": 1.847103434506775, "learning_rate": 1.7018389939432185e-06, "loss": 0.2966, "step": 27250 }, { "epoch": 1.4530529570107407, "grad_norm": 1.7134411365194073, "learning_rate": 1.700965594331059e-06, "loss": 0.2986, "step": 27260 }, { "epoch": 1.453585991844567, "grad_norm": 1.6134127006346162, "learning_rate": 1.7000921253338256e-06, "loss": 0.2946, "step": 27270 }, { "epoch": 1.4541190266783934, "grad_norm": 1.5666513665906094, "learning_rate": 1.6992185872530904e-06, "loss": 0.2997, "step": 27280 }, { "epoch": 1.4546520615122198, "grad_norm": 1.610372338951449, "learning_rate": 1.6983449803904504e-06, "loss": 0.2971, "step": 27290 }, { "epoch": 1.4551850963460462, "grad_norm": 1.7723047540000008, "learning_rate": 1.6974713050475254e-06, "loss": 0.2987, "step": 27300 }, { "epoch": 1.4557181311798726, "grad_norm": 1.778586198140521, "learning_rate": 1.696597561525959e-06, "loss": 0.2853, "step": 27310 }, { "epoch": 1.456251166013699, "grad_norm": 1.6191848208021444, "learning_rate": 1.6957237501274193e-06, "loss": 0.2973, "step": 27320 }, { "epoch": 1.4567842008475254, "grad_norm": 1.556652193442352, "learning_rate": 1.6948498711535963e-06, "loss": 0.296, "step": 27330 }, { "epoch": 1.4573172356813517, "grad_norm": 1.802907753062817, "learning_rate": 1.6939759249062045e-06, "loss": 0.3092, "step": 27340 }, { "epoch": 1.4578502705151781, "grad_norm": 1.6794447937674568, "learning_rate": 1.6931019116869808e-06, "loss": 0.3113, "step": 27350 }, { "epoch": 1.4583833053490045, "grad_norm": 1.7767525767084973, "learning_rate": 1.6922278317976864e-06, "loss": 0.306, "step": 27360 }, { "epoch": 1.458916340182831, "grad_norm": 1.8303255899968982, "learning_rate": 1.6913536855401045e-06, "loss": 0.2956, "step": 27370 }, { "epoch": 1.4594493750166573, "grad_norm": 1.8539109764589967, "learning_rate": 1.6904794732160412e-06, "loss": 0.2883, "step": 27380 }, { "epoch": 1.4599824098504837, "grad_norm": 1.575562634112439, "learning_rate": 1.6896051951273258e-06, "loss": 0.3006, "step": 27390 }, { "epoch": 1.46051544468431, "grad_norm": 1.6053287187694665, "learning_rate": 1.6887308515758107e-06, "loss": 0.3018, "step": 27400 }, { "epoch": 1.4610484795181364, "grad_norm": 1.676656958360302, "learning_rate": 1.6878564428633698e-06, "loss": 0.3038, "step": 27410 }, { "epoch": 1.4615815143519628, "grad_norm": 1.764302001674795, "learning_rate": 1.6869819692919005e-06, "loss": 0.2885, "step": 27420 }, { "epoch": 1.4621145491857894, "grad_norm": 1.6274000913965454, "learning_rate": 1.6861074311633221e-06, "loss": 0.2887, "step": 27430 }, { "epoch": 1.4626475840196158, "grad_norm": 1.8137987646484843, "learning_rate": 1.6852328287795761e-06, "loss": 0.3024, "step": 27440 }, { "epoch": 1.4631806188534422, "grad_norm": 1.5585894595314522, "learning_rate": 1.6843581624426272e-06, "loss": 0.3044, "step": 27450 }, { "epoch": 1.4637136536872686, "grad_norm": 1.792163610404331, "learning_rate": 1.68348343245446e-06, "loss": 0.3081, "step": 27460 }, { "epoch": 1.464246688521095, "grad_norm": 1.8482431515830744, "learning_rate": 1.682608639117084e-06, "loss": 0.2996, "step": 27470 }, { "epoch": 1.4647797233549213, "grad_norm": 1.801132634260648, "learning_rate": 1.6817337827325275e-06, "loss": 0.2961, "step": 27480 }, { "epoch": 1.4653127581887477, "grad_norm": 1.6830322923094274, "learning_rate": 1.6808588636028427e-06, "loss": 0.3069, "step": 27490 }, { "epoch": 1.4658457930225741, "grad_norm": 1.648516389852871, "learning_rate": 1.6799838820301031e-06, "loss": 0.2934, "step": 27500 }, { "epoch": 1.4663788278564005, "grad_norm": 1.6178595526168695, "learning_rate": 1.6791088383164037e-06, "loss": 0.294, "step": 27510 }, { "epoch": 1.4669118626902269, "grad_norm": 1.632930528278504, "learning_rate": 1.6782337327638599e-06, "loss": 0.3016, "step": 27520 }, { "epoch": 1.4674448975240533, "grad_norm": 1.7733791927326759, "learning_rate": 1.67735856567461e-06, "loss": 0.3028, "step": 27530 }, { "epoch": 1.4679779323578797, "grad_norm": 1.7264020767448875, "learning_rate": 1.6764833373508124e-06, "loss": 0.297, "step": 27540 }, { "epoch": 1.468510967191706, "grad_norm": 1.69479142766262, "learning_rate": 1.6756080480946477e-06, "loss": 0.3065, "step": 27550 }, { "epoch": 1.4690440020255324, "grad_norm": 1.750597489582251, "learning_rate": 1.6747326982083164e-06, "loss": 0.294, "step": 27560 }, { "epoch": 1.4695770368593588, "grad_norm": 1.8259750607950043, "learning_rate": 1.6738572879940405e-06, "loss": 0.2925, "step": 27570 }, { "epoch": 1.4701100716931852, "grad_norm": 1.6500194638415577, "learning_rate": 1.6729818177540627e-06, "loss": 0.2994, "step": 27580 }, { "epoch": 1.4706431065270116, "grad_norm": 1.8644354107399674, "learning_rate": 1.6721062877906473e-06, "loss": 0.2986, "step": 27590 }, { "epoch": 1.471176141360838, "grad_norm": 1.586703376417515, "learning_rate": 1.6712306984060777e-06, "loss": 0.2902, "step": 27600 }, { "epoch": 1.4717091761946643, "grad_norm": 1.8229037829596009, "learning_rate": 1.6703550499026583e-06, "loss": 0.2982, "step": 27610 }, { "epoch": 1.4722422110284907, "grad_norm": 1.6711875682475372, "learning_rate": 1.669479342582715e-06, "loss": 0.2914, "step": 27620 }, { "epoch": 1.4727752458623171, "grad_norm": 1.717026421882349, "learning_rate": 1.6686035767485925e-06, "loss": 0.2928, "step": 27630 }, { "epoch": 1.4733082806961435, "grad_norm": 1.8111832276628441, "learning_rate": 1.6677277527026568e-06, "loss": 0.3046, "step": 27640 }, { "epoch": 1.4738413155299699, "grad_norm": 1.7503214352699468, "learning_rate": 1.6668518707472933e-06, "loss": 0.2994, "step": 27650 }, { "epoch": 1.4743743503637963, "grad_norm": 1.586385859752052, "learning_rate": 1.6659759311849082e-06, "loss": 0.2986, "step": 27660 }, { "epoch": 1.4749073851976227, "grad_norm": 1.540224465059517, "learning_rate": 1.6650999343179261e-06, "loss": 0.2935, "step": 27670 }, { "epoch": 1.475440420031449, "grad_norm": 1.4959973826274766, "learning_rate": 1.664223880448793e-06, "loss": 0.2857, "step": 27680 }, { "epoch": 1.4759734548652754, "grad_norm": 1.6336195860243121, "learning_rate": 1.663347769879974e-06, "loss": 0.2956, "step": 27690 }, { "epoch": 1.4765064896991018, "grad_norm": 1.6383625263474346, "learning_rate": 1.662471602913953e-06, "loss": 0.2841, "step": 27700 }, { "epoch": 1.4770395245329282, "grad_norm": 1.663860284596052, "learning_rate": 1.661595379853235e-06, "loss": 0.3022, "step": 27710 }, { "epoch": 1.4775725593667546, "grad_norm": 1.7020235515232844, "learning_rate": 1.660719101000343e-06, "loss": 0.2992, "step": 27720 }, { "epoch": 1.478105594200581, "grad_norm": 1.824488715931212, "learning_rate": 1.6598427666578194e-06, "loss": 0.2964, "step": 27730 }, { "epoch": 1.4786386290344073, "grad_norm": 1.6517601663000931, "learning_rate": 1.6589663771282268e-06, "loss": 0.3024, "step": 27740 }, { "epoch": 1.4791716638682337, "grad_norm": 1.7121110341342616, "learning_rate": 1.658089932714145e-06, "loss": 0.2942, "step": 27750 }, { "epoch": 1.4797046987020601, "grad_norm": 1.6880610449541016, "learning_rate": 1.6572134337181752e-06, "loss": 0.3151, "step": 27760 }, { "epoch": 1.4802377335358865, "grad_norm": 1.6285883850939522, "learning_rate": 1.656336880442935e-06, "loss": 0.2935, "step": 27770 }, { "epoch": 1.4807707683697129, "grad_norm": 1.8933376190946465, "learning_rate": 1.6554602731910623e-06, "loss": 0.2953, "step": 27780 }, { "epoch": 1.4813038032035393, "grad_norm": 1.731183515260805, "learning_rate": 1.654583612265213e-06, "loss": 0.3004, "step": 27790 }, { "epoch": 1.4818368380373657, "grad_norm": 1.6965312285850074, "learning_rate": 1.6537068979680626e-06, "loss": 0.2892, "step": 27800 }, { "epoch": 1.482369872871192, "grad_norm": 1.9242253509759262, "learning_rate": 1.6528301306023024e-06, "loss": 0.2856, "step": 27810 }, { "epoch": 1.4829029077050184, "grad_norm": 1.7044853439965404, "learning_rate": 1.6519533104706452e-06, "loss": 0.3032, "step": 27820 }, { "epoch": 1.4834359425388448, "grad_norm": 1.5883266991698182, "learning_rate": 1.65107643787582e-06, "loss": 0.2932, "step": 27830 }, { "epoch": 1.4839689773726712, "grad_norm": 1.5867537930341205, "learning_rate": 1.6501995131205746e-06, "loss": 0.2955, "step": 27840 }, { "epoch": 1.4845020122064976, "grad_norm": 1.6901333763132695, "learning_rate": 1.6493225365076754e-06, "loss": 0.2983, "step": 27850 }, { "epoch": 1.485035047040324, "grad_norm": 1.7558129590322686, "learning_rate": 1.6484455083399049e-06, "loss": 0.2939, "step": 27860 }, { "epoch": 1.4855680818741503, "grad_norm": 1.8427958295314535, "learning_rate": 1.6475684289200645e-06, "loss": 0.3053, "step": 27870 }, { "epoch": 1.4861011167079767, "grad_norm": 1.690141242985493, "learning_rate": 1.6466912985509738e-06, "loss": 0.2931, "step": 27880 }, { "epoch": 1.4866341515418033, "grad_norm": 1.7201623676231372, "learning_rate": 1.6458141175354704e-06, "loss": 0.3032, "step": 27890 }, { "epoch": 1.4871671863756297, "grad_norm": 1.7328747967864193, "learning_rate": 1.6449368861764068e-06, "loss": 0.3024, "step": 27900 }, { "epoch": 1.487700221209456, "grad_norm": 1.8989262757327343, "learning_rate": 1.6440596047766552e-06, "loss": 0.2925, "step": 27910 }, { "epoch": 1.4882332560432825, "grad_norm": 1.676818091566833, "learning_rate": 1.6431822736391045e-06, "loss": 0.2898, "step": 27920 }, { "epoch": 1.4887662908771089, "grad_norm": 1.7035824751704662, "learning_rate": 1.6423048930666609e-06, "loss": 0.298, "step": 27930 }, { "epoch": 1.4892993257109353, "grad_norm": 1.5962473302819875, "learning_rate": 1.641427463362247e-06, "loss": 0.3001, "step": 27940 }, { "epoch": 1.4898323605447616, "grad_norm": 1.8504533027552956, "learning_rate": 1.6405499848288036e-06, "loss": 0.3003, "step": 27950 }, { "epoch": 1.490365395378588, "grad_norm": 1.6907730220777866, "learning_rate": 1.6396724577692868e-06, "loss": 0.2927, "step": 27960 }, { "epoch": 1.4908984302124144, "grad_norm": 1.8161335413322999, "learning_rate": 1.638794882486671e-06, "loss": 0.3034, "step": 27970 }, { "epoch": 1.4914314650462408, "grad_norm": 1.80666646244297, "learning_rate": 1.6379172592839456e-06, "loss": 0.294, "step": 27980 }, { "epoch": 1.4919644998800672, "grad_norm": 1.826714047419686, "learning_rate": 1.637039588464118e-06, "loss": 0.2972, "step": 27990 }, { "epoch": 1.4924975347138936, "grad_norm": 1.722561708487875, "learning_rate": 1.6361618703302114e-06, "loss": 0.2856, "step": 28000 }, { "epoch": 1.49303056954772, "grad_norm": 1.5969410968034268, "learning_rate": 1.6352841051852655e-06, "loss": 0.2905, "step": 28010 }, { "epoch": 1.4935636043815463, "grad_norm": 1.6096847253544357, "learning_rate": 1.6344062933323362e-06, "loss": 0.2946, "step": 28020 }, { "epoch": 1.4940966392153727, "grad_norm": 1.7731542890231147, "learning_rate": 1.6335284350744953e-06, "loss": 0.3001, "step": 28030 }, { "epoch": 1.494629674049199, "grad_norm": 1.7508154811463157, "learning_rate": 1.6326505307148308e-06, "loss": 0.3041, "step": 28040 }, { "epoch": 1.4951627088830255, "grad_norm": 1.8031010237971825, "learning_rate": 1.6317725805564465e-06, "loss": 0.2939, "step": 28050 }, { "epoch": 1.4956957437168519, "grad_norm": 1.6997027672173084, "learning_rate": 1.630894584902463e-06, "loss": 0.3024, "step": 28060 }, { "epoch": 1.4962287785506783, "grad_norm": 1.6270487738608646, "learning_rate": 1.6300165440560144e-06, "loss": 0.289, "step": 28070 }, { "epoch": 1.4967618133845046, "grad_norm": 1.845574235405915, "learning_rate": 1.6291384583202525e-06, "loss": 0.2887, "step": 28080 }, { "epoch": 1.497294848218331, "grad_norm": 1.73738188908622, "learning_rate": 1.6282603279983439e-06, "loss": 0.2868, "step": 28090 }, { "epoch": 1.4978278830521574, "grad_norm": 1.7358893721831805, "learning_rate": 1.6273821533934703e-06, "loss": 0.2935, "step": 28100 }, { "epoch": 1.4983609178859838, "grad_norm": 1.67699992309884, "learning_rate": 1.626503934808829e-06, "loss": 0.2999, "step": 28110 }, { "epoch": 1.4988939527198102, "grad_norm": 1.8370640778598912, "learning_rate": 1.6256256725476325e-06, "loss": 0.2843, "step": 28120 }, { "epoch": 1.4994269875536366, "grad_norm": 1.6861679442667195, "learning_rate": 1.6247473669131085e-06, "loss": 0.3015, "step": 28130 }, { "epoch": 1.499960022387463, "grad_norm": 1.642119874643243, "learning_rate": 1.6238690182084987e-06, "loss": 0.2943, "step": 28140 }, { "epoch": 1.5004930572212896, "grad_norm": 1.6569100336113909, "learning_rate": 1.622990626737061e-06, "loss": 0.2965, "step": 28150 }, { "epoch": 1.501026092055116, "grad_norm": 1.9649148361340172, "learning_rate": 1.6221121928020672e-06, "loss": 0.3024, "step": 28160 }, { "epoch": 1.5015591268889423, "grad_norm": 1.6958069942772516, "learning_rate": 1.6212337167068045e-06, "loss": 0.2929, "step": 28170 }, { "epoch": 1.5020921617227687, "grad_norm": 1.8097213385035291, "learning_rate": 1.6203551987545737e-06, "loss": 0.3053, "step": 28180 }, { "epoch": 1.502625196556595, "grad_norm": 1.6659347999034089, "learning_rate": 1.6194766392486912e-06, "loss": 0.2852, "step": 28190 }, { "epoch": 1.5031582313904215, "grad_norm": 1.8238563165414425, "learning_rate": 1.6185980384924865e-06, "loss": 0.2994, "step": 28200 }, { "epoch": 1.5036912662242479, "grad_norm": 1.658849686670842, "learning_rate": 1.617719396789304e-06, "loss": 0.294, "step": 28210 }, { "epoch": 1.5042243010580743, "grad_norm": 1.7539900164457038, "learning_rate": 1.6168407144425025e-06, "loss": 0.2919, "step": 28220 }, { "epoch": 1.5047573358919006, "grad_norm": 1.6412991696555816, "learning_rate": 1.6159619917554542e-06, "loss": 0.2899, "step": 28230 }, { "epoch": 1.505290370725727, "grad_norm": 1.518043561074825, "learning_rate": 1.6150832290315463e-06, "loss": 0.292, "step": 28240 }, { "epoch": 1.5058234055595534, "grad_norm": 1.8154556112911517, "learning_rate": 1.6142044265741783e-06, "loss": 0.2893, "step": 28250 }, { "epoch": 1.5063564403933798, "grad_norm": 1.705924089473256, "learning_rate": 1.6133255846867643e-06, "loss": 0.2889, "step": 28260 }, { "epoch": 1.5068894752272062, "grad_norm": 1.6869212430216896, "learning_rate": 1.6124467036727321e-06, "loss": 0.2905, "step": 28270 }, { "epoch": 1.5074225100610326, "grad_norm": 1.853956929348773, "learning_rate": 1.611567783835523e-06, "loss": 0.3092, "step": 28280 }, { "epoch": 1.507955544894859, "grad_norm": 1.720981687695877, "learning_rate": 1.6106888254785909e-06, "loss": 0.3004, "step": 28290 }, { "epoch": 1.5084885797286853, "grad_norm": 1.7978555077404486, "learning_rate": 1.6098098289054042e-06, "loss": 0.3025, "step": 28300 }, { "epoch": 1.5090216145625117, "grad_norm": 1.7210103903336027, "learning_rate": 1.6089307944194434e-06, "loss": 0.2989, "step": 28310 }, { "epoch": 1.509554649396338, "grad_norm": 1.6837185360746634, "learning_rate": 1.6080517223242035e-06, "loss": 0.3026, "step": 28320 }, { "epoch": 1.5100876842301645, "grad_norm": 1.6144801646589821, "learning_rate": 1.6071726129231905e-06, "loss": 0.3024, "step": 28330 }, { "epoch": 1.5106207190639909, "grad_norm": 1.9599970383905356, "learning_rate": 1.6062934665199251e-06, "loss": 0.3055, "step": 28340 }, { "epoch": 1.5111537538978173, "grad_norm": 1.6931084525565177, "learning_rate": 1.6054142834179396e-06, "loss": 0.2926, "step": 28350 }, { "epoch": 1.5116867887316436, "grad_norm": 1.7486058775920885, "learning_rate": 1.6045350639207802e-06, "loss": 0.2951, "step": 28360 }, { "epoch": 1.51221982356547, "grad_norm": 1.6882984582026879, "learning_rate": 1.6036558083320037e-06, "loss": 0.3052, "step": 28370 }, { "epoch": 1.5127528583992964, "grad_norm": 1.6172156545956373, "learning_rate": 1.602776516955181e-06, "loss": 0.2999, "step": 28380 }, { "epoch": 1.5132858932331228, "grad_norm": 1.700471594039228, "learning_rate": 1.6018971900938953e-06, "loss": 0.2992, "step": 28390 }, { "epoch": 1.5138189280669492, "grad_norm": 1.8247550784338393, "learning_rate": 1.6010178280517415e-06, "loss": 0.2938, "step": 28400 }, { "epoch": 1.5143519629007756, "grad_norm": 1.6243837729790183, "learning_rate": 1.6001384311323268e-06, "loss": 0.3021, "step": 28410 }, { "epoch": 1.514884997734602, "grad_norm": 1.6028482402628443, "learning_rate": 1.5992589996392703e-06, "loss": 0.2888, "step": 28420 }, { "epoch": 1.5154180325684283, "grad_norm": 1.747798202328879, "learning_rate": 1.5983795338762029e-06, "loss": 0.3014, "step": 28430 }, { "epoch": 1.5159510674022547, "grad_norm": 1.614565188496978, "learning_rate": 1.597500034146768e-06, "loss": 0.2971, "step": 28440 }, { "epoch": 1.516484102236081, "grad_norm": 1.7266266657771903, "learning_rate": 1.5966205007546207e-06, "loss": 0.2926, "step": 28450 }, { "epoch": 1.5170171370699075, "grad_norm": 1.634752127596423, "learning_rate": 1.5957409340034263e-06, "loss": 0.2882, "step": 28460 }, { "epoch": 1.5175501719037339, "grad_norm": 1.6965604001420744, "learning_rate": 1.5948613341968633e-06, "loss": 0.2951, "step": 28470 }, { "epoch": 1.5180832067375603, "grad_norm": 1.8262756714258117, "learning_rate": 1.5939817016386212e-06, "loss": 0.2943, "step": 28480 }, { "epoch": 1.5186162415713866, "grad_norm": 1.7446263180707948, "learning_rate": 1.5931020366324005e-06, "loss": 0.2941, "step": 28490 }, { "epoch": 1.519149276405213, "grad_norm": 1.7628376517561695, "learning_rate": 1.5922223394819126e-06, "loss": 0.2942, "step": 28500 }, { "epoch": 1.5196823112390394, "grad_norm": 1.7531577696458098, "learning_rate": 1.5913426104908807e-06, "loss": 0.2922, "step": 28510 }, { "epoch": 1.5202153460728658, "grad_norm": 1.5861094510233353, "learning_rate": 1.5904628499630384e-06, "loss": 0.2964, "step": 28520 }, { "epoch": 1.5207483809066922, "grad_norm": 1.7111374605667955, "learning_rate": 1.5895830582021312e-06, "loss": 0.293, "step": 28530 }, { "epoch": 1.5212814157405186, "grad_norm": 1.703220797847656, "learning_rate": 1.5887032355119136e-06, "loss": 0.3101, "step": 28540 }, { "epoch": 1.521814450574345, "grad_norm": 2.3977667351995158, "learning_rate": 1.5878233821961527e-06, "loss": 0.3036, "step": 28550 }, { "epoch": 1.5223474854081713, "grad_norm": 1.8617842680729169, "learning_rate": 1.5869434985586248e-06, "loss": 0.2969, "step": 28560 }, { "epoch": 1.5228805202419977, "grad_norm": 1.7435109249642375, "learning_rate": 1.5860635849031177e-06, "loss": 0.2927, "step": 28570 }, { "epoch": 1.523413555075824, "grad_norm": 1.6339853635474115, "learning_rate": 1.5851836415334289e-06, "loss": 0.294, "step": 28580 }, { "epoch": 1.5239465899096505, "grad_norm": 1.8421012470615414, "learning_rate": 1.584303668753366e-06, "loss": 0.2981, "step": 28590 }, { "epoch": 1.5244796247434769, "grad_norm": 1.8311849255284487, "learning_rate": 1.5834236668667468e-06, "loss": 0.3023, "step": 28600 }, { "epoch": 1.5250126595773033, "grad_norm": 1.5577061448746736, "learning_rate": 1.5825436361774e-06, "loss": 0.2933, "step": 28610 }, { "epoch": 1.5255456944111296, "grad_norm": 1.6624329481275517, "learning_rate": 1.5816635769891636e-06, "loss": 0.2964, "step": 28620 }, { "epoch": 1.526078729244956, "grad_norm": 1.636190525351784, "learning_rate": 1.5807834896058853e-06, "loss": 0.286, "step": 28630 }, { "epoch": 1.5266117640787824, "grad_norm": 1.783624078408808, "learning_rate": 1.5799033743314229e-06, "loss": 0.2941, "step": 28640 }, { "epoch": 1.5271447989126088, "grad_norm": 1.7325584803786656, "learning_rate": 1.5790232314696433e-06, "loss": 0.2953, "step": 28650 }, { "epoch": 1.5276778337464352, "grad_norm": 1.9083094539837078, "learning_rate": 1.5781430613244236e-06, "loss": 0.2933, "step": 28660 }, { "epoch": 1.5282108685802616, "grad_norm": 2.0451081571029452, "learning_rate": 1.57726286419965e-06, "loss": 0.2974, "step": 28670 }, { "epoch": 1.528743903414088, "grad_norm": 1.8343846593639332, "learning_rate": 1.5763826403992178e-06, "loss": 0.2961, "step": 28680 }, { "epoch": 1.5292769382479146, "grad_norm": 1.9549566614955023, "learning_rate": 1.5755023902270317e-06, "loss": 0.298, "step": 28690 }, { "epoch": 1.529809973081741, "grad_norm": 1.8536852563050172, "learning_rate": 1.574622113987006e-06, "loss": 0.2997, "step": 28700 }, { "epoch": 1.5303430079155673, "grad_norm": 1.6592000043536876, "learning_rate": 1.573741811983063e-06, "loss": 0.294, "step": 28710 }, { "epoch": 1.5308760427493937, "grad_norm": 1.8881292882742458, "learning_rate": 1.5728614845191347e-06, "loss": 0.2919, "step": 28720 }, { "epoch": 1.53140907758322, "grad_norm": 1.825312457054052, "learning_rate": 1.5719811318991617e-06, "loss": 0.2942, "step": 28730 }, { "epoch": 1.5319421124170465, "grad_norm": 1.7571945083263387, "learning_rate": 1.5711007544270927e-06, "loss": 0.3029, "step": 28740 }, { "epoch": 1.5324751472508729, "grad_norm": 1.8683441116480533, "learning_rate": 1.5702203524068864e-06, "loss": 0.2928, "step": 28750 }, { "epoch": 1.5330081820846992, "grad_norm": 1.5973701567812721, "learning_rate": 1.569339926142508e-06, "loss": 0.286, "step": 28760 }, { "epoch": 1.5335412169185256, "grad_norm": 1.7716311390117534, "learning_rate": 1.5684594759379325e-06, "loss": 0.303, "step": 28770 }, { "epoch": 1.534074251752352, "grad_norm": 1.5439971205746117, "learning_rate": 1.5675790020971431e-06, "loss": 0.2857, "step": 28780 }, { "epoch": 1.5346072865861784, "grad_norm": 2.201419785810831, "learning_rate": 1.5666985049241312e-06, "loss": 0.3007, "step": 28790 }, { "epoch": 1.5351403214200048, "grad_norm": 1.7183377729934377, "learning_rate": 1.5658179847228947e-06, "loss": 0.2898, "step": 28800 }, { "epoch": 1.5356733562538312, "grad_norm": 1.802440538999993, "learning_rate": 1.5649374417974417e-06, "loss": 0.2894, "step": 28810 }, { "epoch": 1.5362063910876576, "grad_norm": 1.703431075434067, "learning_rate": 1.5640568764517867e-06, "loss": 0.296, "step": 28820 }, { "epoch": 1.536739425921484, "grad_norm": 1.885849139426709, "learning_rate": 1.563176288989952e-06, "loss": 0.2799, "step": 28830 }, { "epoch": 1.5372724607553103, "grad_norm": 1.634048682347014, "learning_rate": 1.562295679715969e-06, "loss": 0.2935, "step": 28840 }, { "epoch": 1.5378054955891367, "grad_norm": 1.7473911943796552, "learning_rate": 1.5614150489338739e-06, "loss": 0.2933, "step": 28850 }, { "epoch": 1.538338530422963, "grad_norm": 1.7636035666036844, "learning_rate": 1.5605343969477132e-06, "loss": 0.2972, "step": 28860 }, { "epoch": 1.5388715652567897, "grad_norm": 1.8471786793401228, "learning_rate": 1.559653724061539e-06, "loss": 0.2924, "step": 28870 }, { "epoch": 1.539404600090616, "grad_norm": 1.6199480807082696, "learning_rate": 1.5587730305794114e-06, "loss": 0.2892, "step": 28880 }, { "epoch": 1.5399376349244425, "grad_norm": 1.5483058660371363, "learning_rate": 1.557892316805397e-06, "loss": 0.303, "step": 28890 }, { "epoch": 1.5404706697582689, "grad_norm": 1.653927269709408, "learning_rate": 1.5570115830435693e-06, "loss": 0.303, "step": 28900 }, { "epoch": 1.5410037045920952, "grad_norm": 1.8433909449753805, "learning_rate": 1.55613082959801e-06, "loss": 0.2882, "step": 28910 }, { "epoch": 1.5415367394259216, "grad_norm": 1.8081350242458483, "learning_rate": 1.5552500567728064e-06, "loss": 0.2954, "step": 28920 }, { "epoch": 1.542069774259748, "grad_norm": 1.7542713104999963, "learning_rate": 1.554369264872052e-06, "loss": 0.2934, "step": 28930 }, { "epoch": 1.5426028090935744, "grad_norm": 1.7446725348283751, "learning_rate": 1.5534884541998486e-06, "loss": 0.2768, "step": 28940 }, { "epoch": 1.5431358439274008, "grad_norm": 1.6791827727591018, "learning_rate": 1.5526076250603028e-06, "loss": 0.2927, "step": 28950 }, { "epoch": 1.5436688787612272, "grad_norm": 1.8438925899239147, "learning_rate": 1.551726777757529e-06, "loss": 0.2893, "step": 28960 }, { "epoch": 1.5442019135950535, "grad_norm": 1.8057102543856545, "learning_rate": 1.550845912595647e-06, "loss": 0.2971, "step": 28970 }, { "epoch": 1.54473494842888, "grad_norm": 1.7197293993214577, "learning_rate": 1.549965029878783e-06, "loss": 0.2866, "step": 28980 }, { "epoch": 1.5452679832627063, "grad_norm": 1.7656653900122072, "learning_rate": 1.549084129911069e-06, "loss": 0.2928, "step": 28990 }, { "epoch": 1.5458010180965327, "grad_norm": 1.713177521103493, "learning_rate": 1.5482032129966427e-06, "loss": 0.2833, "step": 29000 }, { "epoch": 1.546334052930359, "grad_norm": 1.7665636184546882, "learning_rate": 1.5473222794396497e-06, "loss": 0.2927, "step": 29010 }, { "epoch": 1.5468670877641855, "grad_norm": 1.8510141714175767, "learning_rate": 1.5464413295442382e-06, "loss": 0.2875, "step": 29020 }, { "epoch": 1.5474001225980119, "grad_norm": 2.120257671777005, "learning_rate": 1.5455603636145643e-06, "loss": 0.3013, "step": 29030 }, { "epoch": 1.5479331574318382, "grad_norm": 1.7090098778980756, "learning_rate": 1.544679381954789e-06, "loss": 0.2904, "step": 29040 }, { "epoch": 1.5484661922656646, "grad_norm": 1.7900042413336352, "learning_rate": 1.5437983848690785e-06, "loss": 0.2869, "step": 29050 }, { "epoch": 1.548999227099491, "grad_norm": 1.7323622840467536, "learning_rate": 1.5429173726616043e-06, "loss": 0.2949, "step": 29060 }, { "epoch": 1.5495322619333174, "grad_norm": 1.8085541732086141, "learning_rate": 1.5420363456365438e-06, "loss": 0.3003, "step": 29070 }, { "epoch": 1.5500652967671438, "grad_norm": 1.7063765707716472, "learning_rate": 1.5411553040980786e-06, "loss": 0.3023, "step": 29080 }, { "epoch": 1.5505983316009702, "grad_norm": 1.7301599684775033, "learning_rate": 1.5402742483503962e-06, "loss": 0.2932, "step": 29090 }, { "epoch": 1.5511313664347965, "grad_norm": 1.7365182226562361, "learning_rate": 1.5393931786976884e-06, "loss": 0.2758, "step": 29100 }, { "epoch": 1.551664401268623, "grad_norm": 1.8591936086396716, "learning_rate": 1.5385120954441518e-06, "loss": 0.2883, "step": 29110 }, { "epoch": 1.5521974361024493, "grad_norm": 1.5892261429339873, "learning_rate": 1.5376309988939877e-06, "loss": 0.3022, "step": 29120 }, { "epoch": 1.5527304709362757, "grad_norm": 1.8519464360031765, "learning_rate": 1.5367498893514023e-06, "loss": 0.294, "step": 29130 }, { "epoch": 1.553263505770102, "grad_norm": 1.8891603306431821, "learning_rate": 1.535868767120607e-06, "loss": 0.2923, "step": 29140 }, { "epoch": 1.5537965406039285, "grad_norm": 1.6263498224727384, "learning_rate": 1.5349876325058154e-06, "loss": 0.2942, "step": 29150 }, { "epoch": 1.5543295754377549, "grad_norm": 1.6935832837348501, "learning_rate": 1.534106485811247e-06, "loss": 0.2972, "step": 29160 }, { "epoch": 1.5548626102715812, "grad_norm": 1.6496853466921766, "learning_rate": 1.5332253273411256e-06, "loss": 0.3004, "step": 29170 }, { "epoch": 1.5553956451054076, "grad_norm": 1.7745318232485863, "learning_rate": 1.532344157399679e-06, "loss": 0.2968, "step": 29180 }, { "epoch": 1.555928679939234, "grad_norm": 1.7223290085284912, "learning_rate": 1.5314629762911375e-06, "loss": 0.2898, "step": 29190 }, { "epoch": 1.5564617147730604, "grad_norm": 1.79787418568104, "learning_rate": 1.530581784319737e-06, "loss": 0.3009, "step": 29200 }, { "epoch": 1.5569947496068868, "grad_norm": 1.738331588677691, "learning_rate": 1.5297005817897162e-06, "loss": 0.2969, "step": 29210 }, { "epoch": 1.5575277844407132, "grad_norm": 1.8017796862562803, "learning_rate": 1.5288193690053182e-06, "loss": 0.2853, "step": 29220 }, { "epoch": 1.5580608192745395, "grad_norm": 1.824425767854045, "learning_rate": 1.5279381462707887e-06, "loss": 0.3009, "step": 29230 }, { "epoch": 1.558593854108366, "grad_norm": 1.6177833474108034, "learning_rate": 1.5270569138903774e-06, "loss": 0.295, "step": 29240 }, { "epoch": 1.5591268889421923, "grad_norm": 1.7554613683017242, "learning_rate": 1.5261756721683374e-06, "loss": 0.3021, "step": 29250 }, { "epoch": 1.5596599237760187, "grad_norm": 1.8089123772227371, "learning_rate": 1.525294421408925e-06, "loss": 0.3006, "step": 29260 }, { "epoch": 1.560192958609845, "grad_norm": 1.7861263387117634, "learning_rate": 1.5244131619163992e-06, "loss": 0.2841, "step": 29270 }, { "epoch": 1.5607259934436715, "grad_norm": 1.820631455076131, "learning_rate": 1.5235318939950222e-06, "loss": 0.2973, "step": 29280 }, { "epoch": 1.5612590282774979, "grad_norm": 1.7481690762500246, "learning_rate": 1.5226506179490595e-06, "loss": 0.2942, "step": 29290 }, { "epoch": 1.5617920631113242, "grad_norm": 1.6575625506593998, "learning_rate": 1.5217693340827792e-06, "loss": 0.2967, "step": 29300 }, { "epoch": 1.5623250979451506, "grad_norm": 1.8455027689733514, "learning_rate": 1.520888042700452e-06, "loss": 0.2959, "step": 29310 }, { "epoch": 1.562858132778977, "grad_norm": 1.6597897673256523, "learning_rate": 1.5200067441063505e-06, "loss": 0.2847, "step": 29320 }, { "epoch": 1.5633911676128034, "grad_norm": 1.7121060363437959, "learning_rate": 1.5191254386047516e-06, "loss": 0.2838, "step": 29330 }, { "epoch": 1.5639242024466298, "grad_norm": 1.5863357753350653, "learning_rate": 1.5182441264999327e-06, "loss": 0.2968, "step": 29340 }, { "epoch": 1.5644572372804562, "grad_norm": 1.8229288180980128, "learning_rate": 1.5173628080961747e-06, "loss": 0.2928, "step": 29350 }, { "epoch": 1.5649902721142825, "grad_norm": 1.5331678552178638, "learning_rate": 1.5164814836977604e-06, "loss": 0.2962, "step": 29360 }, { "epoch": 1.565523306948109, "grad_norm": 1.6484295238174518, "learning_rate": 1.5156001536089735e-06, "loss": 0.2908, "step": 29370 }, { "epoch": 1.5660563417819353, "grad_norm": 1.6315555651845868, "learning_rate": 1.514718818134102e-06, "loss": 0.2921, "step": 29380 }, { "epoch": 1.5665893766157617, "grad_norm": 1.653521081680679, "learning_rate": 1.513837477577434e-06, "loss": 0.2989, "step": 29390 }, { "epoch": 1.567122411449588, "grad_norm": 1.6794389480599035, "learning_rate": 1.512956132243259e-06, "loss": 0.2931, "step": 29400 }, { "epoch": 1.5676554462834145, "grad_norm": 1.7506217509942266, "learning_rate": 1.51207478243587e-06, "loss": 0.2951, "step": 29410 }, { "epoch": 1.5681884811172409, "grad_norm": 1.7427523537299987, "learning_rate": 1.5111934284595598e-06, "loss": 0.2929, "step": 29420 }, { "epoch": 1.5687215159510672, "grad_norm": 1.8252874255787626, "learning_rate": 1.5103120706186239e-06, "loss": 0.2843, "step": 29430 }, { "epoch": 1.5692545507848938, "grad_norm": 1.8166711691739543, "learning_rate": 1.5094307092173578e-06, "loss": 0.2892, "step": 29440 }, { "epoch": 1.5697875856187202, "grad_norm": 1.7704441845382535, "learning_rate": 1.5085493445600593e-06, "loss": 0.2982, "step": 29450 }, { "epoch": 1.5703206204525466, "grad_norm": 1.7339631843224352, "learning_rate": 1.5076679769510277e-06, "loss": 0.2917, "step": 29460 }, { "epoch": 1.570853655286373, "grad_norm": 1.7169978904517043, "learning_rate": 1.5067866066945622e-06, "loss": 0.2877, "step": 29470 }, { "epoch": 1.5713866901201994, "grad_norm": 1.727066373523391, "learning_rate": 1.5059052340949632e-06, "loss": 0.285, "step": 29480 }, { "epoch": 1.5719197249540258, "grad_norm": 1.7622201639321324, "learning_rate": 1.5050238594565317e-06, "loss": 0.2824, "step": 29490 }, { "epoch": 1.5724527597878522, "grad_norm": 1.8964643669719574, "learning_rate": 1.5041424830835704e-06, "loss": 0.2923, "step": 29500 }, { "epoch": 1.5729857946216785, "grad_norm": 1.6733681966839968, "learning_rate": 1.5032611052803816e-06, "loss": 0.2998, "step": 29510 }, { "epoch": 1.573518829455505, "grad_norm": 2.0127908832695067, "learning_rate": 1.5023797263512683e-06, "loss": 0.3069, "step": 29520 }, { "epoch": 1.5740518642893313, "grad_norm": 1.8383242398927957, "learning_rate": 1.5014983466005348e-06, "loss": 0.2868, "step": 29530 }, { "epoch": 1.5745848991231577, "grad_norm": 1.7218919525028973, "learning_rate": 1.5006169663324842e-06, "loss": 0.2977, "step": 29540 }, { "epoch": 1.575117933956984, "grad_norm": 1.7953636226015546, "learning_rate": 1.4997355858514206e-06, "loss": 0.2883, "step": 29550 }, { "epoch": 1.5756509687908105, "grad_norm": 1.8905724320980837, "learning_rate": 1.4988542054616486e-06, "loss": 0.3007, "step": 29560 }, { "epoch": 1.5761840036246368, "grad_norm": 1.6717849790344281, "learning_rate": 1.4979728254674717e-06, "loss": 0.2913, "step": 29570 }, { "epoch": 1.5767170384584632, "grad_norm": 1.6829301510296975, "learning_rate": 1.4970914461731938e-06, "loss": 0.3011, "step": 29580 }, { "epoch": 1.5772500732922896, "grad_norm": 1.7106737360506645, "learning_rate": 1.496210067883119e-06, "loss": 0.2851, "step": 29590 }, { "epoch": 1.577783108126116, "grad_norm": 1.7771988372987726, "learning_rate": 1.4953286909015505e-06, "loss": 0.2849, "step": 29600 }, { "epoch": 1.5783161429599424, "grad_norm": 1.7076412774843983, "learning_rate": 1.4944473155327908e-06, "loss": 0.285, "step": 29610 }, { "epoch": 1.578849177793769, "grad_norm": 2.0100980705442923, "learning_rate": 1.4935659420811425e-06, "loss": 0.2976, "step": 29620 }, { "epoch": 1.5793822126275954, "grad_norm": 2.0364791644781692, "learning_rate": 1.4926845708509071e-06, "loss": 0.2921, "step": 29630 }, { "epoch": 1.5799152474614218, "grad_norm": 1.528965050552814, "learning_rate": 1.491803202146386e-06, "loss": 0.2806, "step": 29640 }, { "epoch": 1.5804482822952481, "grad_norm": 1.6917418998977327, "learning_rate": 1.4909218362718783e-06, "loss": 0.2887, "step": 29650 }, { "epoch": 1.5809813171290745, "grad_norm": 1.5949104978843631, "learning_rate": 1.4900404735316837e-06, "loss": 0.2887, "step": 29660 }, { "epoch": 1.581514351962901, "grad_norm": 1.6591268495298312, "learning_rate": 1.4891591142301e-06, "loss": 0.2979, "step": 29670 }, { "epoch": 1.5820473867967273, "grad_norm": 1.7864903686969482, "learning_rate": 1.488277758671424e-06, "loss": 0.2959, "step": 29680 }, { "epoch": 1.5825804216305537, "grad_norm": 1.6149836453851365, "learning_rate": 1.4873964071599515e-06, "loss": 0.2909, "step": 29690 }, { "epoch": 1.58311345646438, "grad_norm": 1.7203432392741604, "learning_rate": 1.4865150599999757e-06, "loss": 0.2926, "step": 29700 }, { "epoch": 1.5836464912982064, "grad_norm": 1.9222255085932465, "learning_rate": 1.4856337174957898e-06, "loss": 0.3035, "step": 29710 }, { "epoch": 1.5841795261320328, "grad_norm": 1.9505240563639825, "learning_rate": 1.4847523799516846e-06, "loss": 0.2912, "step": 29720 }, { "epoch": 1.5847125609658592, "grad_norm": 1.495431000600028, "learning_rate": 1.4838710476719498e-06, "loss": 0.2878, "step": 29730 }, { "epoch": 1.5852455957996856, "grad_norm": 1.6225851485779565, "learning_rate": 1.482989720960872e-06, "loss": 0.2875, "step": 29740 }, { "epoch": 1.585778630633512, "grad_norm": 1.8520363950078764, "learning_rate": 1.4821084001227373e-06, "loss": 0.2948, "step": 29750 }, { "epoch": 1.5863116654673384, "grad_norm": 1.9114972356068047, "learning_rate": 1.481227085461829e-06, "loss": 0.298, "step": 29760 }, { "epoch": 1.5868447003011648, "grad_norm": 1.681523278171821, "learning_rate": 1.4803457772824287e-06, "loss": 0.2878, "step": 29770 }, { "epoch": 1.5873777351349911, "grad_norm": 2.10014393489051, "learning_rate": 1.4794644758888154e-06, "loss": 0.2852, "step": 29780 }, { "epoch": 1.5879107699688175, "grad_norm": 1.6029038473812447, "learning_rate": 1.4785831815852652e-06, "loss": 0.2994, "step": 29790 }, { "epoch": 1.588443804802644, "grad_norm": 1.7279237101284677, "learning_rate": 1.4777018946760534e-06, "loss": 0.2922, "step": 29800 }, { "epoch": 1.5889768396364703, "grad_norm": 1.7693786663101572, "learning_rate": 1.4768206154654517e-06, "loss": 0.2856, "step": 29810 }, { "epoch": 1.5895098744702967, "grad_norm": 1.7807937146128132, "learning_rate": 1.4759393442577287e-06, "loss": 0.2823, "step": 29820 }, { "epoch": 1.590042909304123, "grad_norm": 1.6651240447297195, "learning_rate": 1.4750580813571512e-06, "loss": 0.2872, "step": 29830 }, { "epoch": 1.5905759441379494, "grad_norm": 1.5479437443507142, "learning_rate": 1.4741768270679817e-06, "loss": 0.2909, "step": 29840 }, { "epoch": 1.5911089789717758, "grad_norm": 1.9486996128425538, "learning_rate": 1.4732955816944818e-06, "loss": 0.3067, "step": 29850 }, { "epoch": 1.5916420138056022, "grad_norm": 1.776230465946072, "learning_rate": 1.4724143455409086e-06, "loss": 0.2936, "step": 29860 }, { "epoch": 1.5921750486394286, "grad_norm": 1.8519639812750501, "learning_rate": 1.4715331189115164e-06, "loss": 0.2908, "step": 29870 }, { "epoch": 1.592708083473255, "grad_norm": 1.7729475522446254, "learning_rate": 1.470651902110556e-06, "loss": 0.2894, "step": 29880 }, { "epoch": 1.5932411183070814, "grad_norm": 1.7436151462487275, "learning_rate": 1.4697706954422749e-06, "loss": 0.2984, "step": 29890 }, { "epoch": 1.5937741531409078, "grad_norm": 1.807041226681752, "learning_rate": 1.4688894992109173e-06, "loss": 0.2845, "step": 29900 }, { "epoch": 1.5943071879747341, "grad_norm": 1.7369816471603763, "learning_rate": 1.4680083137207238e-06, "loss": 0.295, "step": 29910 }, { "epoch": 1.5948402228085605, "grad_norm": 1.7320307158069488, "learning_rate": 1.4671271392759311e-06, "loss": 0.2839, "step": 29920 }, { "epoch": 1.595373257642387, "grad_norm": 1.7203403014891072, "learning_rate": 1.466245976180772e-06, "loss": 0.289, "step": 29930 }, { "epoch": 1.5959062924762133, "grad_norm": 2.131879893106814, "learning_rate": 1.4653648247394755e-06, "loss": 0.297, "step": 29940 }, { "epoch": 1.5964393273100397, "grad_norm": 1.6403213281206186, "learning_rate": 1.4644836852562668e-06, "loss": 0.2937, "step": 29950 }, { "epoch": 1.596972362143866, "grad_norm": 1.9166361004830939, "learning_rate": 1.4636025580353668e-06, "loss": 0.2994, "step": 29960 }, { "epoch": 1.5975053969776924, "grad_norm": 1.7065153110500177, "learning_rate": 1.462721443380992e-06, "loss": 0.294, "step": 29970 }, { "epoch": 1.5980384318115188, "grad_norm": 1.6068123360904143, "learning_rate": 1.4618403415973546e-06, "loss": 0.2989, "step": 29980 }, { "epoch": 1.5985714666453452, "grad_norm": 1.9757782848674599, "learning_rate": 1.4609592529886624e-06, "loss": 0.2797, "step": 29990 }, { "epoch": 1.5991045014791716, "grad_norm": 1.9114273826012875, "learning_rate": 1.4600781778591194e-06, "loss": 0.2906, "step": 30000 }, { "epoch": 1.599637536312998, "grad_norm": 1.6977416149440836, "learning_rate": 1.4591971165129234e-06, "loss": 0.2916, "step": 30010 }, { "epoch": 1.6001705711468244, "grad_norm": 1.8617417990393528, "learning_rate": 1.4583160692542685e-06, "loss": 0.286, "step": 30020 }, { "epoch": 1.6007036059806508, "grad_norm": 1.7390391425669267, "learning_rate": 1.457435036387344e-06, "loss": 0.2905, "step": 30030 }, { "epoch": 1.6012366408144771, "grad_norm": 1.7204244586294035, "learning_rate": 1.456554018216334e-06, "loss": 0.2915, "step": 30040 }, { "epoch": 1.6017696756483035, "grad_norm": 1.495972777362757, "learning_rate": 1.455673015045417e-06, "loss": 0.2886, "step": 30050 }, { "epoch": 1.60230271048213, "grad_norm": 1.6698287745784772, "learning_rate": 1.4547920271787674e-06, "loss": 0.2919, "step": 30060 }, { "epoch": 1.6028357453159563, "grad_norm": 1.6563096457504, "learning_rate": 1.4539110549205531e-06, "loss": 0.2862, "step": 30070 }, { "epoch": 1.6033687801497827, "grad_norm": 1.5696180412578158, "learning_rate": 1.4530300985749376e-06, "loss": 0.2903, "step": 30080 }, { "epoch": 1.603901814983609, "grad_norm": 1.9328779705136152, "learning_rate": 1.452149158446079e-06, "loss": 0.2886, "step": 30090 }, { "epoch": 1.6044348498174354, "grad_norm": 1.6968063245285405, "learning_rate": 1.4512682348381285e-06, "loss": 0.3034, "step": 30100 }, { "epoch": 1.6049678846512618, "grad_norm": 1.7284877634340405, "learning_rate": 1.4503873280552326e-06, "loss": 0.2878, "step": 30110 }, { "epoch": 1.6055009194850882, "grad_norm": 1.5766855279671077, "learning_rate": 1.4495064384015323e-06, "loss": 0.2866, "step": 30120 }, { "epoch": 1.6060339543189146, "grad_norm": 1.6629848182261564, "learning_rate": 1.4486255661811621e-06, "loss": 0.2846, "step": 30130 }, { "epoch": 1.606566989152741, "grad_norm": 1.752335694125959, "learning_rate": 1.4477447116982503e-06, "loss": 0.3058, "step": 30140 }, { "epoch": 1.6071000239865674, "grad_norm": 1.8416926587540483, "learning_rate": 1.4468638752569193e-06, "loss": 0.2791, "step": 30150 }, { "epoch": 1.6076330588203938, "grad_norm": 1.7743146963666445, "learning_rate": 1.4459830571612857e-06, "loss": 0.291, "step": 30160 }, { "epoch": 1.6081660936542201, "grad_norm": 1.7775585093653938, "learning_rate": 1.4451022577154597e-06, "loss": 0.2964, "step": 30170 }, { "epoch": 1.6086991284880467, "grad_norm": 1.8841816144061794, "learning_rate": 1.4442214772235438e-06, "loss": 0.2923, "step": 30180 }, { "epoch": 1.6092321633218731, "grad_norm": 1.7808132885140648, "learning_rate": 1.4433407159896356e-06, "loss": 0.2928, "step": 30190 }, { "epoch": 1.6097651981556995, "grad_norm": 1.5396400528966006, "learning_rate": 1.4424599743178252e-06, "loss": 0.29, "step": 30200 }, { "epoch": 1.610298232989526, "grad_norm": 1.6457019661135046, "learning_rate": 1.4415792525121966e-06, "loss": 0.2983, "step": 30210 }, { "epoch": 1.6108312678233523, "grad_norm": 1.6855099835813079, "learning_rate": 1.4406985508768257e-06, "loss": 0.2874, "step": 30220 }, { "epoch": 1.6113643026571787, "grad_norm": 1.6990597726711465, "learning_rate": 1.4398178697157825e-06, "loss": 0.2919, "step": 30230 }, { "epoch": 1.611897337491005, "grad_norm": 1.6592408992278302, "learning_rate": 1.4389372093331295e-06, "loss": 0.2803, "step": 30240 }, { "epoch": 1.6124303723248314, "grad_norm": 1.6141288403459748, "learning_rate": 1.4380565700329225e-06, "loss": 0.2932, "step": 30250 }, { "epoch": 1.6129634071586578, "grad_norm": 1.6825199751823052, "learning_rate": 1.4371759521192097e-06, "loss": 0.2842, "step": 30260 }, { "epoch": 1.6134964419924842, "grad_norm": 1.6805441093015927, "learning_rate": 1.4362953558960316e-06, "loss": 0.2855, "step": 30270 }, { "epoch": 1.6140294768263106, "grad_norm": 1.6479847989463403, "learning_rate": 1.4354147816674215e-06, "loss": 0.2842, "step": 30280 }, { "epoch": 1.614562511660137, "grad_norm": 1.8092148696833215, "learning_rate": 1.4345342297374052e-06, "loss": 0.2986, "step": 30290 }, { "epoch": 1.6150955464939634, "grad_norm": 1.7517838444573417, "learning_rate": 1.4336537004100011e-06, "loss": 0.2855, "step": 30300 }, { "epoch": 1.6156285813277897, "grad_norm": 1.7383655206374091, "learning_rate": 1.4327731939892188e-06, "loss": 0.2991, "step": 30310 }, { "epoch": 1.6161616161616161, "grad_norm": 1.785947456975646, "learning_rate": 1.431892710779061e-06, "loss": 0.2939, "step": 30320 }, { "epoch": 1.6166946509954425, "grad_norm": 1.6288688315096507, "learning_rate": 1.4310122510835215e-06, "loss": 0.2971, "step": 30330 }, { "epoch": 1.617227685829269, "grad_norm": 1.760875376836615, "learning_rate": 1.430131815206587e-06, "loss": 0.2947, "step": 30340 }, { "epoch": 1.6177607206630953, "grad_norm": 1.9411375850550032, "learning_rate": 1.4292514034522354e-06, "loss": 0.2834, "step": 30350 }, { "epoch": 1.6182937554969217, "grad_norm": 1.707061858586393, "learning_rate": 1.428371016124436e-06, "loss": 0.2898, "step": 30360 }, { "epoch": 1.6188267903307483, "grad_norm": 1.7593713579007588, "learning_rate": 1.4274906535271503e-06, "loss": 0.303, "step": 30370 }, { "epoch": 1.6193598251645747, "grad_norm": 1.7196003736923966, "learning_rate": 1.426610315964331e-06, "loss": 0.2907, "step": 30380 }, { "epoch": 1.619892859998401, "grad_norm": 1.7452496295663822, "learning_rate": 1.4257300037399212e-06, "loss": 0.282, "step": 30390 }, { "epoch": 1.6204258948322274, "grad_norm": 1.604164679326339, "learning_rate": 1.4248497171578574e-06, "loss": 0.2966, "step": 30400 }, { "epoch": 1.6209589296660538, "grad_norm": 1.7681589780914448, "learning_rate": 1.4239694565220652e-06, "loss": 0.2856, "step": 30410 }, { "epoch": 1.6214919644998802, "grad_norm": 1.6882324712829686, "learning_rate": 1.4230892221364628e-06, "loss": 0.2956, "step": 30420 }, { "epoch": 1.6220249993337066, "grad_norm": 2.1158217342634247, "learning_rate": 1.4222090143049577e-06, "loss": 0.2983, "step": 30430 }, { "epoch": 1.622558034167533, "grad_norm": 1.643460807442831, "learning_rate": 1.4213288333314494e-06, "loss": 0.2922, "step": 30440 }, { "epoch": 1.6230910690013594, "grad_norm": 1.6784694368518194, "learning_rate": 1.420448679519828e-06, "loss": 0.2833, "step": 30450 }, { "epoch": 1.6236241038351857, "grad_norm": 1.7100141632747143, "learning_rate": 1.419568553173974e-06, "loss": 0.3038, "step": 30460 }, { "epoch": 1.6241571386690121, "grad_norm": 1.7312092930819818, "learning_rate": 1.4186884545977587e-06, "loss": 0.3029, "step": 30470 }, { "epoch": 1.6246901735028385, "grad_norm": 1.6933592248217049, "learning_rate": 1.4178083840950432e-06, "loss": 0.2846, "step": 30480 }, { "epoch": 1.625223208336665, "grad_norm": 1.743081006999338, "learning_rate": 1.4169283419696794e-06, "loss": 0.2906, "step": 30490 }, { "epoch": 1.6257562431704913, "grad_norm": 1.585804316310054, "learning_rate": 1.4160483285255092e-06, "loss": 0.2947, "step": 30500 }, { "epoch": 1.6262892780043177, "grad_norm": 1.6939153616740914, "learning_rate": 1.4151683440663654e-06, "loss": 0.2928, "step": 30510 }, { "epoch": 1.626822312838144, "grad_norm": 1.666220650877759, "learning_rate": 1.4142883888960691e-06, "loss": 0.2931, "step": 30520 }, { "epoch": 1.6273553476719704, "grad_norm": 1.5856672062829056, "learning_rate": 1.413408463318433e-06, "loss": 0.2851, "step": 30530 }, { "epoch": 1.6278883825057968, "grad_norm": 1.718183107222889, "learning_rate": 1.4125285676372582e-06, "loss": 0.2993, "step": 30540 }, { "epoch": 1.6284214173396232, "grad_norm": 1.71088841435637, "learning_rate": 1.411648702156337e-06, "loss": 0.2933, "step": 30550 }, { "epoch": 1.6289544521734496, "grad_norm": 1.575896504819221, "learning_rate": 1.4107688671794496e-06, "loss": 0.2894, "step": 30560 }, { "epoch": 1.629487487007276, "grad_norm": 1.5914106948402416, "learning_rate": 1.409889063010367e-06, "loss": 0.289, "step": 30570 }, { "epoch": 1.6300205218411024, "grad_norm": 1.8033248791093652, "learning_rate": 1.4090092899528486e-06, "loss": 0.2989, "step": 30580 }, { "epoch": 1.6305535566749287, "grad_norm": 1.896503632842607, "learning_rate": 1.4081295483106442e-06, "loss": 0.2821, "step": 30590 }, { "epoch": 1.6310865915087551, "grad_norm": 1.869582302899287, "learning_rate": 1.4072498383874913e-06, "loss": 0.2977, "step": 30600 }, { "epoch": 1.6316196263425815, "grad_norm": 1.8903593892500128, "learning_rate": 1.4063701604871174e-06, "loss": 0.3051, "step": 30610 }, { "epoch": 1.632152661176408, "grad_norm": 1.6453743564447636, "learning_rate": 1.4054905149132392e-06, "loss": 0.2974, "step": 30620 }, { "epoch": 1.6326856960102343, "grad_norm": 1.6357825558152437, "learning_rate": 1.404610901969561e-06, "loss": 0.283, "step": 30630 }, { "epoch": 1.6332187308440607, "grad_norm": 1.7210731554212018, "learning_rate": 1.4037313219597777e-06, "loss": 0.3019, "step": 30640 }, { "epoch": 1.633751765677887, "grad_norm": 1.8132888963016724, "learning_rate": 1.4028517751875706e-06, "loss": 0.2814, "step": 30650 }, { "epoch": 1.6342848005117134, "grad_norm": 1.8213050459750213, "learning_rate": 1.4019722619566114e-06, "loss": 0.2867, "step": 30660 }, { "epoch": 1.6348178353455398, "grad_norm": 1.7681456431689744, "learning_rate": 1.4010927825705594e-06, "loss": 0.2812, "step": 30670 }, { "epoch": 1.6353508701793662, "grad_norm": 2.060029572899713, "learning_rate": 1.4002133373330624e-06, "loss": 0.2927, "step": 30680 }, { "epoch": 1.6358839050131926, "grad_norm": 1.824451681296403, "learning_rate": 1.3993339265477558e-06, "loss": 0.287, "step": 30690 }, { "epoch": 1.636416939847019, "grad_norm": 1.7516508437153122, "learning_rate": 1.3984545505182638e-06, "loss": 0.294, "step": 30700 }, { "epoch": 1.6369499746808454, "grad_norm": 1.5737967599368465, "learning_rate": 1.3975752095481988e-06, "loss": 0.2763, "step": 30710 }, { "epoch": 1.6374830095146717, "grad_norm": 1.7734480332693277, "learning_rate": 1.3966959039411607e-06, "loss": 0.2903, "step": 30720 }, { "epoch": 1.6380160443484981, "grad_norm": 1.6660175896003442, "learning_rate": 1.3958166340007367e-06, "loss": 0.2836, "step": 30730 }, { "epoch": 1.6385490791823245, "grad_norm": 1.606722986016552, "learning_rate": 1.3949374000305026e-06, "loss": 0.2832, "step": 30740 }, { "epoch": 1.639082114016151, "grad_norm": 1.70063760810336, "learning_rate": 1.394058202334021e-06, "loss": 0.2826, "step": 30750 }, { "epoch": 1.6396151488499773, "grad_norm": 1.9001934845585937, "learning_rate": 1.3931790412148433e-06, "loss": 0.3007, "step": 30760 }, { "epoch": 1.6401481836838037, "grad_norm": 1.8850635456783504, "learning_rate": 1.392299916976506e-06, "loss": 0.292, "step": 30770 }, { "epoch": 1.64068121851763, "grad_norm": 1.961167911384526, "learning_rate": 1.3914208299225353e-06, "loss": 0.2722, "step": 30780 }, { "epoch": 1.6412142533514564, "grad_norm": 1.814877337325891, "learning_rate": 1.3905417803564427e-06, "loss": 0.2861, "step": 30790 }, { "epoch": 1.6417472881852828, "grad_norm": 1.5430378872135873, "learning_rate": 1.3896627685817279e-06, "loss": 0.2818, "step": 30800 }, { "epoch": 1.6422803230191092, "grad_norm": 1.6713813683934147, "learning_rate": 1.3887837949018775e-06, "loss": 0.2814, "step": 30810 }, { "epoch": 1.6428133578529356, "grad_norm": 1.6676642066215461, "learning_rate": 1.3879048596203636e-06, "loss": 0.2826, "step": 30820 }, { "epoch": 1.643346392686762, "grad_norm": 1.7736379747019733, "learning_rate": 1.3870259630406469e-06, "loss": 0.3006, "step": 30830 }, { "epoch": 1.6438794275205884, "grad_norm": 1.7487955680603482, "learning_rate": 1.3861471054661735e-06, "loss": 0.2904, "step": 30840 }, { "epoch": 1.6444124623544147, "grad_norm": 1.7673180412405818, "learning_rate": 1.3852682872003768e-06, "loss": 0.2917, "step": 30850 }, { "epoch": 1.6449454971882411, "grad_norm": 1.743238318718656, "learning_rate": 1.3843895085466758e-06, "loss": 0.2942, "step": 30860 }, { "epoch": 1.6454785320220675, "grad_norm": 1.8441954661311348, "learning_rate": 1.3835107698084766e-06, "loss": 0.2906, "step": 30870 }, { "epoch": 1.646011566855894, "grad_norm": 1.7166551298838426, "learning_rate": 1.3826320712891708e-06, "loss": 0.282, "step": 30880 }, { "epoch": 1.6465446016897203, "grad_norm": 1.5554577935018792, "learning_rate": 1.3817534132921374e-06, "loss": 0.2919, "step": 30890 }, { "epoch": 1.6470776365235467, "grad_norm": 1.57727885412223, "learning_rate": 1.3808747961207393e-06, "loss": 0.2878, "step": 30900 }, { "epoch": 1.647610671357373, "grad_norm": 1.678339484696579, "learning_rate": 1.3799962200783274e-06, "loss": 0.2792, "step": 30910 }, { "epoch": 1.6481437061911994, "grad_norm": 1.851238629627464, "learning_rate": 1.379117685468237e-06, "loss": 0.287, "step": 30920 }, { "epoch": 1.648676741025026, "grad_norm": 1.960066308299213, "learning_rate": 1.3782391925937903e-06, "loss": 0.2893, "step": 30930 }, { "epoch": 1.6492097758588524, "grad_norm": 1.6934858548087313, "learning_rate": 1.3773607417582937e-06, "loss": 0.2922, "step": 30940 }, { "epoch": 1.6497428106926788, "grad_norm": 1.7847875685908474, "learning_rate": 1.37648233326504e-06, "loss": 0.2884, "step": 30950 }, { "epoch": 1.6502758455265052, "grad_norm": 1.9306341100900701, "learning_rate": 1.3756039674173074e-06, "loss": 0.2907, "step": 30960 }, { "epoch": 1.6508088803603316, "grad_norm": 1.7903053432401599, "learning_rate": 1.3747256445183592e-06, "loss": 0.2893, "step": 30970 }, { "epoch": 1.651341915194158, "grad_norm": 1.876027980060498, "learning_rate": 1.3738473648714433e-06, "loss": 0.2912, "step": 30980 }, { "epoch": 1.6518749500279843, "grad_norm": 1.8155863850131362, "learning_rate": 1.3729691287797936e-06, "loss": 0.2893, "step": 30990 }, { "epoch": 1.6524079848618107, "grad_norm": 1.6603977771056089, "learning_rate": 1.3720909365466284e-06, "loss": 0.2925, "step": 31000 }, { "epoch": 1.6529410196956371, "grad_norm": 1.8083439913306736, "learning_rate": 1.3712127884751509e-06, "loss": 0.2799, "step": 31010 }, { "epoch": 1.6534740545294635, "grad_norm": 1.7179088723258629, "learning_rate": 1.3703346848685498e-06, "loss": 0.2916, "step": 31020 }, { "epoch": 1.6540070893632899, "grad_norm": 1.6414321465099093, "learning_rate": 1.369456626029997e-06, "loss": 0.2867, "step": 31030 }, { "epoch": 1.6545401241971163, "grad_norm": 1.683825838768604, "learning_rate": 1.36857861226265e-06, "loss": 0.2902, "step": 31040 }, { "epoch": 1.6550731590309427, "grad_norm": 1.7446146880372986, "learning_rate": 1.3677006438696508e-06, "loss": 0.2986, "step": 31050 }, { "epoch": 1.655606193864769, "grad_norm": 1.856553643919025, "learning_rate": 1.3668227211541252e-06, "loss": 0.2802, "step": 31060 }, { "epoch": 1.6561392286985954, "grad_norm": 1.7194379533897897, "learning_rate": 1.3659448444191834e-06, "loss": 0.2915, "step": 31070 }, { "epoch": 1.6566722635324218, "grad_norm": 1.749573299054026, "learning_rate": 1.3650670139679197e-06, "loss": 0.2932, "step": 31080 }, { "epoch": 1.6572052983662482, "grad_norm": 1.659394213860439, "learning_rate": 1.3641892301034129e-06, "loss": 0.2978, "step": 31090 }, { "epoch": 1.6577383332000746, "grad_norm": 1.751725710891011, "learning_rate": 1.363311493128725e-06, "loss": 0.291, "step": 31100 }, { "epoch": 1.658271368033901, "grad_norm": 1.6966003718667395, "learning_rate": 1.3624338033469023e-06, "loss": 0.2884, "step": 31110 }, { "epoch": 1.6588044028677276, "grad_norm": 1.5954907987729654, "learning_rate": 1.3615561610609744e-06, "loss": 0.289, "step": 31120 }, { "epoch": 1.659337437701554, "grad_norm": 1.8955414236679382, "learning_rate": 1.3606785665739547e-06, "loss": 0.2888, "step": 31130 }, { "epoch": 1.6598704725353803, "grad_norm": 1.635158551506039, "learning_rate": 1.3598010201888405e-06, "loss": 0.277, "step": 31140 }, { "epoch": 1.6604035073692067, "grad_norm": 1.814875722318185, "learning_rate": 1.358923522208612e-06, "loss": 0.2952, "step": 31150 }, { "epoch": 1.660936542203033, "grad_norm": 1.8490175131236, "learning_rate": 1.3580460729362325e-06, "loss": 0.2791, "step": 31160 }, { "epoch": 1.6614695770368595, "grad_norm": 1.5805777785080926, "learning_rate": 1.357168672674649e-06, "loss": 0.283, "step": 31170 }, { "epoch": 1.6620026118706859, "grad_norm": 1.68892409205369, "learning_rate": 1.3562913217267922e-06, "loss": 0.2956, "step": 31180 }, { "epoch": 1.6625356467045123, "grad_norm": 1.9158083003418402, "learning_rate": 1.355414020395573e-06, "loss": 0.2874, "step": 31190 }, { "epoch": 1.6630686815383386, "grad_norm": 1.795163319654632, "learning_rate": 1.3545367689838892e-06, "loss": 0.297, "step": 31200 }, { "epoch": 1.663601716372165, "grad_norm": 1.7260731918489889, "learning_rate": 1.3536595677946177e-06, "loss": 0.2897, "step": 31210 }, { "epoch": 1.6641347512059914, "grad_norm": 1.9107183609417713, "learning_rate": 1.3527824171306204e-06, "loss": 0.2958, "step": 31220 }, { "epoch": 1.6646677860398178, "grad_norm": 1.6750206083512973, "learning_rate": 1.3519053172947408e-06, "loss": 0.2836, "step": 31230 }, { "epoch": 1.6652008208736442, "grad_norm": 1.7052345419021837, "learning_rate": 1.3510282685898053e-06, "loss": 0.2822, "step": 31240 }, { "epoch": 1.6657338557074706, "grad_norm": 1.6297672522784827, "learning_rate": 1.3501512713186224e-06, "loss": 0.2798, "step": 31250 }, { "epoch": 1.666266890541297, "grad_norm": 1.669543425779207, "learning_rate": 1.3492743257839824e-06, "loss": 0.295, "step": 31260 }, { "epoch": 1.6667999253751233, "grad_norm": 1.6369560608739928, "learning_rate": 1.3483974322886585e-06, "loss": 0.2911, "step": 31270 }, { "epoch": 1.6673329602089497, "grad_norm": 1.7891980025993794, "learning_rate": 1.3475205911354057e-06, "loss": 0.2854, "step": 31280 }, { "epoch": 1.667865995042776, "grad_norm": 1.6609783141643637, "learning_rate": 1.3466438026269607e-06, "loss": 0.286, "step": 31290 }, { "epoch": 1.6683990298766025, "grad_norm": 1.9536200326989912, "learning_rate": 1.345767067066042e-06, "loss": 0.2876, "step": 31300 }, { "epoch": 1.6689320647104289, "grad_norm": 1.5598354111095247, "learning_rate": 1.34489038475535e-06, "loss": 0.2796, "step": 31310 }, { "epoch": 1.6694650995442553, "grad_norm": 1.7762987162645032, "learning_rate": 1.344013755997567e-06, "loss": 0.2863, "step": 31320 }, { "epoch": 1.6699981343780816, "grad_norm": 3.349481124235958, "learning_rate": 1.3431371810953566e-06, "loss": 0.2949, "step": 31330 }, { "epoch": 1.670531169211908, "grad_norm": 1.58855393309782, "learning_rate": 1.3422606603513629e-06, "loss": 0.2883, "step": 31340 }, { "epoch": 1.6710642040457344, "grad_norm": 1.6516568219929992, "learning_rate": 1.3413841940682129e-06, "loss": 0.284, "step": 31350 }, { "epoch": 1.6715972388795608, "grad_norm": 1.5289002355984553, "learning_rate": 1.3405077825485137e-06, "loss": 0.2929, "step": 31360 }, { "epoch": 1.6721302737133872, "grad_norm": 1.5399826412917716, "learning_rate": 1.3396314260948543e-06, "loss": 0.2883, "step": 31370 }, { "epoch": 1.6726633085472136, "grad_norm": 1.7998503249067999, "learning_rate": 1.3387551250098033e-06, "loss": 0.2947, "step": 31380 }, { "epoch": 1.67319634338104, "grad_norm": 1.8359330897805242, "learning_rate": 1.3378788795959117e-06, "loss": 0.283, "step": 31390 }, { "epoch": 1.6737293782148663, "grad_norm": 1.6775115709540163, "learning_rate": 1.3370026901557104e-06, "loss": 0.2902, "step": 31400 }, { "epoch": 1.6742624130486927, "grad_norm": 1.8343363698692996, "learning_rate": 1.3361265569917112e-06, "loss": 0.2921, "step": 31410 }, { "epoch": 1.674795447882519, "grad_norm": 1.9977825013841777, "learning_rate": 1.3352504804064071e-06, "loss": 0.2852, "step": 31420 }, { "epoch": 1.6753284827163455, "grad_norm": 1.8634209358258391, "learning_rate": 1.3343744607022701e-06, "loss": 0.2849, "step": 31430 }, { "epoch": 1.6758615175501719, "grad_norm": 1.821551382582791, "learning_rate": 1.3334984981817539e-06, "loss": 0.2852, "step": 31440 }, { "epoch": 1.6763945523839983, "grad_norm": 1.6794818871386097, "learning_rate": 1.3326225931472917e-06, "loss": 0.277, "step": 31450 }, { "epoch": 1.6769275872178246, "grad_norm": 1.633606121551204, "learning_rate": 1.3317467459012978e-06, "loss": 0.2951, "step": 31460 }, { "epoch": 1.677460622051651, "grad_norm": 2.1307577204739307, "learning_rate": 1.330870956746165e-06, "loss": 0.2914, "step": 31470 }, { "epoch": 1.6779936568854774, "grad_norm": 1.7030936589122425, "learning_rate": 1.3299952259842674e-06, "loss": 0.2889, "step": 31480 }, { "epoch": 1.6785266917193038, "grad_norm": 1.7935922091801022, "learning_rate": 1.3291195539179584e-06, "loss": 0.2909, "step": 31490 }, { "epoch": 1.6790597265531302, "grad_norm": 1.7550348987162039, "learning_rate": 1.3282439408495715e-06, "loss": 0.3001, "step": 31500 }, { "epoch": 1.6795927613869566, "grad_norm": 1.824332956287139, "learning_rate": 1.327368387081419e-06, "loss": 0.301, "step": 31510 }, { "epoch": 1.680125796220783, "grad_norm": 1.9263867992383745, "learning_rate": 1.326492892915793e-06, "loss": 0.2858, "step": 31520 }, { "epoch": 1.6806588310546093, "grad_norm": 1.638530444452028, "learning_rate": 1.3256174586549662e-06, "loss": 0.2782, "step": 31530 }, { "epoch": 1.6811918658884357, "grad_norm": 1.858423045256905, "learning_rate": 1.3247420846011894e-06, "loss": 0.2813, "step": 31540 }, { "epoch": 1.681724900722262, "grad_norm": 1.7383105626396633, "learning_rate": 1.3238667710566928e-06, "loss": 0.2863, "step": 31550 }, { "epoch": 1.6822579355560885, "grad_norm": 1.8716211776981713, "learning_rate": 1.3229915183236852e-06, "loss": 0.2939, "step": 31560 }, { "epoch": 1.6827909703899149, "grad_norm": 1.6548848631003752, "learning_rate": 1.3221163267043561e-06, "loss": 0.2922, "step": 31570 }, { "epoch": 1.6833240052237413, "grad_norm": 1.8097577187118805, "learning_rate": 1.321241196500872e-06, "loss": 0.2768, "step": 31580 }, { "epoch": 1.6838570400575676, "grad_norm": 1.835159675615701, "learning_rate": 1.3203661280153798e-06, "loss": 0.3014, "step": 31590 }, { "epoch": 1.684390074891394, "grad_norm": 1.6512864314305946, "learning_rate": 1.3194911215500038e-06, "loss": 0.2828, "step": 31600 }, { "epoch": 1.6849231097252204, "grad_norm": 1.782270675904756, "learning_rate": 1.318616177406847e-06, "loss": 0.2856, "step": 31610 }, { "epoch": 1.6854561445590468, "grad_norm": 1.8577533867222902, "learning_rate": 1.317741295887992e-06, "loss": 0.2795, "step": 31620 }, { "epoch": 1.6859891793928732, "grad_norm": 1.8659766822133308, "learning_rate": 1.316866477295499e-06, "loss": 0.2879, "step": 31630 }, { "epoch": 1.6865222142266996, "grad_norm": 1.967231372853565, "learning_rate": 1.3159917219314058e-06, "loss": 0.2973, "step": 31640 }, { "epoch": 1.687055249060526, "grad_norm": 1.9068235135729854, "learning_rate": 1.3151170300977299e-06, "loss": 0.2868, "step": 31650 }, { "epoch": 1.6875882838943523, "grad_norm": 1.759374970615166, "learning_rate": 1.3142424020964657e-06, "loss": 0.2784, "step": 31660 }, { "epoch": 1.6881213187281787, "grad_norm": 1.8294860719491626, "learning_rate": 1.3133678382295864e-06, "loss": 0.2925, "step": 31670 }, { "epoch": 1.6886543535620053, "grad_norm": 1.7031323954040583, "learning_rate": 1.3124933387990417e-06, "loss": 0.283, "step": 31680 }, { "epoch": 1.6891873883958317, "grad_norm": 1.5014912640479998, "learning_rate": 1.3116189041067607e-06, "loss": 0.2911, "step": 31690 }, { "epoch": 1.689720423229658, "grad_norm": 1.5626481458831571, "learning_rate": 1.310744534454649e-06, "loss": 0.2956, "step": 31700 }, { "epoch": 1.6902534580634845, "grad_norm": 1.742988721027641, "learning_rate": 1.3098702301445904e-06, "loss": 0.2838, "step": 31710 }, { "epoch": 1.6907864928973109, "grad_norm": 1.7449672312982958, "learning_rate": 1.3089959914784453e-06, "loss": 0.2818, "step": 31720 }, { "epoch": 1.6913195277311373, "grad_norm": 1.8748681739051887, "learning_rate": 1.3081218187580527e-06, "loss": 0.2907, "step": 31730 }, { "epoch": 1.6918525625649636, "grad_norm": 1.9013414254794463, "learning_rate": 1.3072477122852278e-06, "loss": 0.2877, "step": 31740 }, { "epoch": 1.69238559739879, "grad_norm": 1.668232613451, "learning_rate": 1.3063736723617634e-06, "loss": 0.2908, "step": 31750 }, { "epoch": 1.6929186322326164, "grad_norm": 1.6469132163245612, "learning_rate": 1.3054996992894294e-06, "loss": 0.2788, "step": 31760 }, { "epoch": 1.6934516670664428, "grad_norm": 1.7962405199138152, "learning_rate": 1.3046257933699718e-06, "loss": 0.291, "step": 31770 }, { "epoch": 1.6939847019002692, "grad_norm": 1.8084771430798992, "learning_rate": 1.3037519549051141e-06, "loss": 0.2899, "step": 31780 }, { "epoch": 1.6945177367340956, "grad_norm": 1.7644452209017247, "learning_rate": 1.302878184196557e-06, "loss": 0.2907, "step": 31790 }, { "epoch": 1.695050771567922, "grad_norm": 1.9754336026117922, "learning_rate": 1.3020044815459771e-06, "loss": 0.291, "step": 31800 }, { "epoch": 1.6955838064017483, "grad_norm": 1.664595344877802, "learning_rate": 1.3011308472550273e-06, "loss": 0.2794, "step": 31810 }, { "epoch": 1.6961168412355747, "grad_norm": 1.757012749517835, "learning_rate": 1.3002572816253371e-06, "loss": 0.283, "step": 31820 }, { "epoch": 1.696649876069401, "grad_norm": 1.587702243368332, "learning_rate": 1.299383784958513e-06, "loss": 0.2921, "step": 31830 }, { "epoch": 1.6971829109032275, "grad_norm": 1.6658819359421824, "learning_rate": 1.298510357556137e-06, "loss": 0.2856, "step": 31840 }, { "epoch": 1.6977159457370539, "grad_norm": 1.8413089384085608, "learning_rate": 1.297636999719767e-06, "loss": 0.2955, "step": 31850 }, { "epoch": 1.6982489805708805, "grad_norm": 1.645899438988138, "learning_rate": 1.2967637117509375e-06, "loss": 0.2864, "step": 31860 }, { "epoch": 1.6987820154047069, "grad_norm": 1.639350930993552, "learning_rate": 1.2958904939511582e-06, "loss": 0.2814, "step": 31870 }, { "epoch": 1.6993150502385332, "grad_norm": 1.6914028177368043, "learning_rate": 1.2950173466219154e-06, "loss": 0.2814, "step": 31880 }, { "epoch": 1.6998480850723596, "grad_norm": 1.6859154122951039, "learning_rate": 1.2941442700646704e-06, "loss": 0.2984, "step": 31890 }, { "epoch": 1.700381119906186, "grad_norm": 1.69675818981569, "learning_rate": 1.29327126458086e-06, "loss": 0.2856, "step": 31900 }, { "epoch": 1.7009141547400124, "grad_norm": 1.837627928283781, "learning_rate": 1.292398330471897e-06, "loss": 0.2946, "step": 31910 }, { "epoch": 1.7014471895738388, "grad_norm": 1.6825116669360909, "learning_rate": 1.2915254680391699e-06, "loss": 0.2902, "step": 31920 }, { "epoch": 1.7019802244076652, "grad_norm": 1.6862398600509785, "learning_rate": 1.2906526775840408e-06, "loss": 0.2888, "step": 31930 }, { "epoch": 1.7025132592414916, "grad_norm": 1.605175864135079, "learning_rate": 1.2897799594078484e-06, "loss": 0.2875, "step": 31940 }, { "epoch": 1.703046294075318, "grad_norm": 1.800408623747545, "learning_rate": 1.2889073138119063e-06, "loss": 0.2882, "step": 31950 }, { "epoch": 1.7035793289091443, "grad_norm": 1.9003792829068324, "learning_rate": 1.2880347410975023e-06, "loss": 0.2909, "step": 31960 }, { "epoch": 1.7041123637429707, "grad_norm": 1.5733354035455709, "learning_rate": 1.2871622415659002e-06, "loss": 0.2962, "step": 31970 }, { "epoch": 1.704645398576797, "grad_norm": 1.8486221273412649, "learning_rate": 1.2862898155183372e-06, "loss": 0.2872, "step": 31980 }, { "epoch": 1.7051784334106235, "grad_norm": 1.8013937278620276, "learning_rate": 1.2854174632560262e-06, "loss": 0.2906, "step": 31990 }, { "epoch": 1.7057114682444499, "grad_norm": 1.7335084828565217, "learning_rate": 1.284545185080154e-06, "loss": 0.2903, "step": 32000 }, { "epoch": 1.7062445030782762, "grad_norm": 1.8179861791064678, "learning_rate": 1.2836729812918823e-06, "loss": 0.2841, "step": 32010 }, { "epoch": 1.7067775379121026, "grad_norm": 1.7012698782629172, "learning_rate": 1.2828008521923463e-06, "loss": 0.2807, "step": 32020 }, { "epoch": 1.707310572745929, "grad_norm": 1.8686016778614292, "learning_rate": 1.2819287980826565e-06, "loss": 0.2907, "step": 32030 }, { "epoch": 1.7078436075797554, "grad_norm": 1.814373854463964, "learning_rate": 1.2810568192638966e-06, "loss": 0.2855, "step": 32040 }, { "epoch": 1.7083766424135818, "grad_norm": 1.731405453747505, "learning_rate": 1.2801849160371252e-06, "loss": 0.2878, "step": 32050 }, { "epoch": 1.7089096772474082, "grad_norm": 1.918965569057865, "learning_rate": 1.2793130887033736e-06, "loss": 0.2881, "step": 32060 }, { "epoch": 1.7094427120812345, "grad_norm": 1.8066009656542463, "learning_rate": 1.278441337563648e-06, "loss": 0.2963, "step": 32070 }, { "epoch": 1.709975746915061, "grad_norm": 1.6391056949774767, "learning_rate": 1.2775696629189275e-06, "loss": 0.2933, "step": 32080 }, { "epoch": 1.7105087817488873, "grad_norm": 1.724589478012094, "learning_rate": 1.276698065070166e-06, "loss": 0.2929, "step": 32090 }, { "epoch": 1.7110418165827137, "grad_norm": 1.6124000033160877, "learning_rate": 1.275826544318289e-06, "loss": 0.295, "step": 32100 }, { "epoch": 1.71157485141654, "grad_norm": 1.9262350031671895, "learning_rate": 1.274955100964197e-06, "loss": 0.299, "step": 32110 }, { "epoch": 1.7121078862503665, "grad_norm": 1.7295431598476851, "learning_rate": 1.274083735308763e-06, "loss": 0.2824, "step": 32120 }, { "epoch": 1.7126409210841929, "grad_norm": 1.6797257611595653, "learning_rate": 1.2732124476528336e-06, "loss": 0.2848, "step": 32130 }, { "epoch": 1.7131739559180192, "grad_norm": 1.6373776177580501, "learning_rate": 1.272341238297228e-06, "loss": 0.3034, "step": 32140 }, { "epoch": 1.7137069907518456, "grad_norm": 1.6542781596549008, "learning_rate": 1.2714701075427388e-06, "loss": 0.2946, "step": 32150 }, { "epoch": 1.714240025585672, "grad_norm": 1.7592181423552475, "learning_rate": 1.270599055690131e-06, "loss": 0.296, "step": 32160 }, { "epoch": 1.7147730604194984, "grad_norm": 1.7796172949126436, "learning_rate": 1.269728083040143e-06, "loss": 0.2888, "step": 32170 }, { "epoch": 1.7153060952533248, "grad_norm": 1.6094262967291313, "learning_rate": 1.2688571898934856e-06, "loss": 0.2897, "step": 32180 }, { "epoch": 1.7158391300871512, "grad_norm": 1.7634994229426055, "learning_rate": 1.2679863765508413e-06, "loss": 0.2922, "step": 32190 }, { "epoch": 1.7163721649209775, "grad_norm": 1.675866725691634, "learning_rate": 1.2671156433128662e-06, "loss": 0.2947, "step": 32200 }, { "epoch": 1.716905199754804, "grad_norm": 1.8231333892473982, "learning_rate": 1.2662449904801885e-06, "loss": 0.2806, "step": 32210 }, { "epoch": 1.7174382345886303, "grad_norm": 1.7315291727014117, "learning_rate": 1.2653744183534088e-06, "loss": 0.2882, "step": 32220 }, { "epoch": 1.7179712694224567, "grad_norm": 1.8892073254805635, "learning_rate": 1.2645039272330985e-06, "loss": 0.2819, "step": 32230 }, { "epoch": 1.718504304256283, "grad_norm": 1.9271801104470458, "learning_rate": 1.2636335174198024e-06, "loss": 0.2792, "step": 32240 }, { "epoch": 1.7190373390901095, "grad_norm": 1.8985297799323286, "learning_rate": 1.2627631892140374e-06, "loss": 0.2899, "step": 32250 }, { "epoch": 1.7195703739239359, "grad_norm": 1.8352453750448383, "learning_rate": 1.2618929429162914e-06, "loss": 0.2833, "step": 32260 }, { "epoch": 1.7201034087577622, "grad_norm": 1.7603140151922858, "learning_rate": 1.2610227788270238e-06, "loss": 0.2893, "step": 32270 }, { "epoch": 1.7206364435915886, "grad_norm": 2.066031521938059, "learning_rate": 1.2601526972466669e-06, "loss": 0.2893, "step": 32280 }, { "epoch": 1.721169478425415, "grad_norm": 1.7923899227892706, "learning_rate": 1.259282698475623e-06, "loss": 0.2979, "step": 32290 }, { "epoch": 1.7217025132592414, "grad_norm": 1.8145893131581892, "learning_rate": 1.258412782814267e-06, "loss": 0.288, "step": 32300 }, { "epoch": 1.7222355480930678, "grad_norm": 1.7860490298144265, "learning_rate": 1.257542950562945e-06, "loss": 0.288, "step": 32310 }, { "epoch": 1.7227685829268942, "grad_norm": 1.7058416146793243, "learning_rate": 1.2566732020219733e-06, "loss": 0.2731, "step": 32320 }, { "epoch": 1.7233016177607205, "grad_norm": 1.6392637297748773, "learning_rate": 1.25580353749164e-06, "loss": 0.2794, "step": 32330 }, { "epoch": 1.723834652594547, "grad_norm": 1.7619685791656245, "learning_rate": 1.2549339572722045e-06, "loss": 0.298, "step": 32340 }, { "epoch": 1.7243676874283733, "grad_norm": 1.6928993014860996, "learning_rate": 1.254064461663897e-06, "loss": 0.282, "step": 32350 }, { "epoch": 1.7249007222621997, "grad_norm": 1.8634610303765131, "learning_rate": 1.2531950509669173e-06, "loss": 0.2871, "step": 32360 }, { "epoch": 1.725433757096026, "grad_norm": 1.8198287653540872, "learning_rate": 1.2523257254814373e-06, "loss": 0.2772, "step": 32370 }, { "epoch": 1.7259667919298525, "grad_norm": 1.7712246352972696, "learning_rate": 1.251456485507599e-06, "loss": 0.2972, "step": 32380 }, { "epoch": 1.7264998267636789, "grad_norm": 1.6928742444614127, "learning_rate": 1.2505873313455152e-06, "loss": 0.2861, "step": 32390 }, { "epoch": 1.7270328615975052, "grad_norm": 1.5743304135552794, "learning_rate": 1.249718263295268e-06, "loss": 0.2782, "step": 32400 }, { "epoch": 1.7275658964313316, "grad_norm": 1.8000940569408972, "learning_rate": 1.248849281656911e-06, "loss": 0.2937, "step": 32410 }, { "epoch": 1.7280989312651582, "grad_norm": 2.1364880791695002, "learning_rate": 1.2479803867304674e-06, "loss": 0.2927, "step": 32420 }, { "epoch": 1.7286319660989846, "grad_norm": 1.7289884596703256, "learning_rate": 1.2471115788159307e-06, "loss": 0.298, "step": 32430 }, { "epoch": 1.729165000932811, "grad_norm": 1.7885437705839957, "learning_rate": 1.246242858213264e-06, "loss": 0.2805, "step": 32440 }, { "epoch": 1.7296980357666374, "grad_norm": 1.7213749843065442, "learning_rate": 1.2453742252224001e-06, "loss": 0.2828, "step": 32450 }, { "epoch": 1.7302310706004638, "grad_norm": 1.734110349766069, "learning_rate": 1.2445056801432426e-06, "loss": 0.2817, "step": 32460 }, { "epoch": 1.7307641054342902, "grad_norm": 1.793158908262709, "learning_rate": 1.2436372232756633e-06, "loss": 0.2904, "step": 32470 }, { "epoch": 1.7312971402681165, "grad_norm": 1.9125650433337338, "learning_rate": 1.2427688549195054e-06, "loss": 0.2831, "step": 32480 }, { "epoch": 1.731830175101943, "grad_norm": 1.587736332768039, "learning_rate": 1.2419005753745791e-06, "loss": 0.29, "step": 32490 }, { "epoch": 1.7323632099357693, "grad_norm": 1.6885322881543168, "learning_rate": 1.241032384940666e-06, "loss": 0.2938, "step": 32500 }, { "epoch": 1.7328962447695957, "grad_norm": 1.684896672377855, "learning_rate": 1.2401642839175164e-06, "loss": 0.2824, "step": 32510 }, { "epoch": 1.733429279603422, "grad_norm": 1.826670321336252, "learning_rate": 1.2392962726048492e-06, "loss": 0.2906, "step": 32520 }, { "epoch": 1.7339623144372485, "grad_norm": 1.6999067401066261, "learning_rate": 1.2384283513023525e-06, "loss": 0.2933, "step": 32530 }, { "epoch": 1.7344953492710748, "grad_norm": 1.8878656399525928, "learning_rate": 1.2375605203096843e-06, "loss": 0.2812, "step": 32540 }, { "epoch": 1.7350283841049012, "grad_norm": 1.8051267375967035, "learning_rate": 1.2366927799264693e-06, "loss": 0.2806, "step": 32550 }, { "epoch": 1.7355614189387276, "grad_norm": 2.028139662727251, "learning_rate": 1.2358251304523029e-06, "loss": 0.2907, "step": 32560 }, { "epoch": 1.736094453772554, "grad_norm": 1.76113236862697, "learning_rate": 1.2349575721867486e-06, "loss": 0.278, "step": 32570 }, { "epoch": 1.7366274886063804, "grad_norm": 1.8044038208104018, "learning_rate": 1.234090105429338e-06, "loss": 0.2932, "step": 32580 }, { "epoch": 1.7371605234402068, "grad_norm": 1.901315843664375, "learning_rate": 1.2332227304795716e-06, "loss": 0.2946, "step": 32590 }, { "epoch": 1.7376935582740332, "grad_norm": 1.6246469206639678, "learning_rate": 1.2323554476369176e-06, "loss": 0.2848, "step": 32600 }, { "epoch": 1.7382265931078598, "grad_norm": 1.8612589241364976, "learning_rate": 1.231488257200813e-06, "loss": 0.2808, "step": 32610 }, { "epoch": 1.7387596279416861, "grad_norm": 1.9816092252933535, "learning_rate": 1.2306211594706627e-06, "loss": 0.2848, "step": 32620 }, { "epoch": 1.7392926627755125, "grad_norm": 1.6454266590077804, "learning_rate": 1.2297541547458392e-06, "loss": 0.2943, "step": 32630 }, { "epoch": 1.739825697609339, "grad_norm": 1.6934677428973497, "learning_rate": 1.2288872433256833e-06, "loss": 0.2865, "step": 32640 }, { "epoch": 1.7403587324431653, "grad_norm": 1.8349130663313056, "learning_rate": 1.2280204255095037e-06, "loss": 0.2968, "step": 32650 }, { "epoch": 1.7408917672769917, "grad_norm": 1.8082006055395339, "learning_rate": 1.2271537015965771e-06, "loss": 0.2922, "step": 32660 }, { "epoch": 1.741424802110818, "grad_norm": 1.6132799450246456, "learning_rate": 1.226287071886146e-06, "loss": 0.2926, "step": 32670 }, { "epoch": 1.7419578369446445, "grad_norm": 1.8330073109081042, "learning_rate": 1.2254205366774225e-06, "loss": 0.2963, "step": 32680 }, { "epoch": 1.7424908717784708, "grad_norm": 1.7490262019324814, "learning_rate": 1.2245540962695852e-06, "loss": 0.2817, "step": 32690 }, { "epoch": 1.7430239066122972, "grad_norm": 1.5694743555498218, "learning_rate": 1.2236877509617797e-06, "loss": 0.3006, "step": 32700 }, { "epoch": 1.7435569414461236, "grad_norm": 1.6365517300644163, "learning_rate": 1.22282150105312e-06, "loss": 0.2815, "step": 32710 }, { "epoch": 1.74408997627995, "grad_norm": 1.6707725670788345, "learning_rate": 1.2219553468426846e-06, "loss": 0.2902, "step": 32720 }, { "epoch": 1.7446230111137764, "grad_norm": 1.6665716145520126, "learning_rate": 1.2210892886295216e-06, "loss": 0.276, "step": 32730 }, { "epoch": 1.7451560459476028, "grad_norm": 1.8149634728887676, "learning_rate": 1.220223326712645e-06, "loss": 0.289, "step": 32740 }, { "epoch": 1.7456890807814291, "grad_norm": 1.7024464590510855, "learning_rate": 1.2193574613910354e-06, "loss": 0.2795, "step": 32750 }, { "epoch": 1.7462221156152555, "grad_norm": 1.6677739205639304, "learning_rate": 1.2184916929636395e-06, "loss": 0.2943, "step": 32760 }, { "epoch": 1.746755150449082, "grad_norm": 1.8810025155021723, "learning_rate": 1.2176260217293719e-06, "loss": 0.2756, "step": 32770 }, { "epoch": 1.7472881852829083, "grad_norm": 1.6084077896840048, "learning_rate": 1.2167604479871126e-06, "loss": 0.2778, "step": 32780 }, { "epoch": 1.7478212201167347, "grad_norm": 1.8202668096359373, "learning_rate": 1.2158949720357088e-06, "loss": 0.2844, "step": 32790 }, { "epoch": 1.748354254950561, "grad_norm": 1.727782645554656, "learning_rate": 1.2150295941739725e-06, "loss": 0.2904, "step": 32800 }, { "epoch": 1.7488872897843875, "grad_norm": 1.5626873948146105, "learning_rate": 1.2141643147006835e-06, "loss": 0.2816, "step": 32810 }, { "epoch": 1.7494203246182138, "grad_norm": 1.7828010861627483, "learning_rate": 1.2132991339145864e-06, "loss": 0.2951, "step": 32820 }, { "epoch": 1.7499533594520402, "grad_norm": 1.765954718394173, "learning_rate": 1.2124340521143927e-06, "loss": 0.2842, "step": 32830 }, { "epoch": 1.7504863942858666, "grad_norm": 2.133384100820481, "learning_rate": 1.2115690695987787e-06, "loss": 0.2981, "step": 32840 }, { "epoch": 1.751019429119693, "grad_norm": 1.7693334962578569, "learning_rate": 1.2107041866663871e-06, "loss": 0.286, "step": 32850 }, { "epoch": 1.7515524639535194, "grad_norm": 1.9185758906996053, "learning_rate": 1.2098394036158261e-06, "loss": 0.2856, "step": 32860 }, { "epoch": 1.7520854987873458, "grad_norm": 1.631677298504431, "learning_rate": 1.2089747207456698e-06, "loss": 0.2783, "step": 32870 }, { "epoch": 1.7526185336211721, "grad_norm": 2.002703240882398, "learning_rate": 1.2081101383544565e-06, "loss": 0.2818, "step": 32880 }, { "epoch": 1.7531515684549985, "grad_norm": 1.857754950565365, "learning_rate": 1.207245656740691e-06, "loss": 0.2934, "step": 32890 }, { "epoch": 1.753684603288825, "grad_norm": 1.7517615486818996, "learning_rate": 1.2063812762028426e-06, "loss": 0.2905, "step": 32900 }, { "epoch": 1.7542176381226513, "grad_norm": 1.6296068835059556, "learning_rate": 1.2055169970393467e-06, "loss": 0.2906, "step": 32910 }, { "epoch": 1.7547506729564777, "grad_norm": 1.7247746637727355, "learning_rate": 1.2046528195486024e-06, "loss": 0.282, "step": 32920 }, { "epoch": 1.755283707790304, "grad_norm": 1.8232212179343361, "learning_rate": 1.2037887440289747e-06, "loss": 0.2812, "step": 32930 }, { "epoch": 1.7558167426241305, "grad_norm": 1.8554989663379087, "learning_rate": 1.2029247707787924e-06, "loss": 0.2996, "step": 32940 }, { "epoch": 1.7563497774579568, "grad_norm": 1.822530092997045, "learning_rate": 1.2020609000963501e-06, "loss": 0.2725, "step": 32950 }, { "epoch": 1.7568828122917832, "grad_norm": 1.629573216162499, "learning_rate": 1.2011971322799067e-06, "loss": 0.2914, "step": 32960 }, { "epoch": 1.7574158471256096, "grad_norm": 1.809886682543272, "learning_rate": 1.2003334676276846e-06, "loss": 0.2972, "step": 32970 }, { "epoch": 1.757948881959436, "grad_norm": 1.6532586884485587, "learning_rate": 1.1994699064378718e-06, "loss": 0.2944, "step": 32980 }, { "epoch": 1.7584819167932624, "grad_norm": 1.7716347872072205, "learning_rate": 1.1986064490086199e-06, "loss": 0.2795, "step": 32990 }, { "epoch": 1.7590149516270888, "grad_norm": 1.8549978661483972, "learning_rate": 1.1977430956380455e-06, "loss": 0.289, "step": 33000 }, { "epoch": 1.7595479864609151, "grad_norm": 1.7326000225639766, "learning_rate": 1.1968798466242278e-06, "loss": 0.2854, "step": 33010 }, { "epoch": 1.7600810212947415, "grad_norm": 1.754159471927188, "learning_rate": 1.1960167022652112e-06, "loss": 0.2822, "step": 33020 }, { "epoch": 1.760614056128568, "grad_norm": 1.7283279761900754, "learning_rate": 1.1951536628590037e-06, "loss": 0.2852, "step": 33030 }, { "epoch": 1.7611470909623943, "grad_norm": 1.793610205395446, "learning_rate": 1.1942907287035773e-06, "loss": 0.2814, "step": 33040 }, { "epoch": 1.7616801257962207, "grad_norm": 1.7109075087930035, "learning_rate": 1.1934279000968665e-06, "loss": 0.2923, "step": 33050 }, { "epoch": 1.762213160630047, "grad_norm": 1.7921948066413613, "learning_rate": 1.1925651773367706e-06, "loss": 0.2844, "step": 33060 }, { "epoch": 1.7627461954638735, "grad_norm": 1.5363061497054005, "learning_rate": 1.1917025607211521e-06, "loss": 0.2674, "step": 33070 }, { "epoch": 1.7632792302976998, "grad_norm": 1.709148251541826, "learning_rate": 1.1908400505478365e-06, "loss": 0.2842, "step": 33080 }, { "epoch": 1.7638122651315262, "grad_norm": 1.6682557427616687, "learning_rate": 1.1899776471146133e-06, "loss": 0.2956, "step": 33090 }, { "epoch": 1.7643452999653526, "grad_norm": 1.686295591873106, "learning_rate": 1.1891153507192334e-06, "loss": 0.2814, "step": 33100 }, { "epoch": 1.764878334799179, "grad_norm": 1.621018205567231, "learning_rate": 1.1882531616594128e-06, "loss": 0.286, "step": 33110 }, { "epoch": 1.7654113696330054, "grad_norm": 1.6571672657074825, "learning_rate": 1.1873910802328296e-06, "loss": 0.2867, "step": 33120 }, { "epoch": 1.7659444044668318, "grad_norm": 1.7193043288993533, "learning_rate": 1.1865291067371246e-06, "loss": 0.2853, "step": 33130 }, { "epoch": 1.7664774393006581, "grad_norm": 2.0076084089130024, "learning_rate": 1.185667241469901e-06, "loss": 0.2958, "step": 33140 }, { "epoch": 1.7670104741344845, "grad_norm": 1.793770422364409, "learning_rate": 1.1848054847287258e-06, "loss": 0.2698, "step": 33150 }, { "epoch": 1.767543508968311, "grad_norm": 1.7129722386046085, "learning_rate": 1.1839438368111272e-06, "loss": 0.278, "step": 33160 }, { "epoch": 1.7680765438021375, "grad_norm": 1.803384510958122, "learning_rate": 1.1830822980145972e-06, "loss": 0.2928, "step": 33170 }, { "epoch": 1.768609578635964, "grad_norm": 1.7457379163257314, "learning_rate": 1.1822208686365884e-06, "loss": 0.2894, "step": 33180 }, { "epoch": 1.7691426134697903, "grad_norm": 1.7378909312847994, "learning_rate": 1.1813595489745174e-06, "loss": 0.2818, "step": 33190 }, { "epoch": 1.7696756483036167, "grad_norm": 1.742084780582119, "learning_rate": 1.1804983393257616e-06, "loss": 0.2819, "step": 33200 }, { "epoch": 1.770208683137443, "grad_norm": 1.8024718189393962, "learning_rate": 1.1796372399876616e-06, "loss": 0.2881, "step": 33210 }, { "epoch": 1.7707417179712694, "grad_norm": 1.5911692022405042, "learning_rate": 1.1787762512575181e-06, "loss": 0.2916, "step": 33220 }, { "epoch": 1.7712747528050958, "grad_norm": 1.8971458335615976, "learning_rate": 1.1779153734325958e-06, "loss": 0.2769, "step": 33230 }, { "epoch": 1.7718077876389222, "grad_norm": 1.928163282189597, "learning_rate": 1.1770546068101195e-06, "loss": 0.2938, "step": 33240 }, { "epoch": 1.7723408224727486, "grad_norm": 1.7823246537926254, "learning_rate": 1.1761939516872764e-06, "loss": 0.2814, "step": 33250 }, { "epoch": 1.772873857306575, "grad_norm": 1.8134904587564507, "learning_rate": 1.1753334083612154e-06, "loss": 0.2835, "step": 33260 }, { "epoch": 1.7734068921404014, "grad_norm": 1.6642545826318993, "learning_rate": 1.1744729771290451e-06, "loss": 0.2826, "step": 33270 }, { "epoch": 1.7739399269742278, "grad_norm": 1.656731292080873, "learning_rate": 1.173612658287838e-06, "loss": 0.284, "step": 33280 }, { "epoch": 1.7744729618080541, "grad_norm": 1.6587836047364595, "learning_rate": 1.1727524521346256e-06, "loss": 0.2855, "step": 33290 }, { "epoch": 1.7750059966418805, "grad_norm": 1.6321025009912309, "learning_rate": 1.171892358966402e-06, "loss": 0.2818, "step": 33300 }, { "epoch": 1.775539031475707, "grad_norm": 1.800968689436437, "learning_rate": 1.1710323790801214e-06, "loss": 0.2844, "step": 33310 }, { "epoch": 1.7760720663095333, "grad_norm": 1.6390910368016185, "learning_rate": 1.1701725127726987e-06, "loss": 0.2857, "step": 33320 }, { "epoch": 1.7766051011433597, "grad_norm": 1.6677627476701735, "learning_rate": 1.1693127603410107e-06, "loss": 0.2793, "step": 33330 }, { "epoch": 1.777138135977186, "grad_norm": 1.577193594826864, "learning_rate": 1.1684531220818942e-06, "loss": 0.2936, "step": 33340 }, { "epoch": 1.7776711708110124, "grad_norm": 1.6814733061446683, "learning_rate": 1.167593598292146e-06, "loss": 0.2781, "step": 33350 }, { "epoch": 1.778204205644839, "grad_norm": 1.8138118523789826, "learning_rate": 1.1667341892685243e-06, "loss": 0.266, "step": 33360 }, { "epoch": 1.7787372404786654, "grad_norm": 1.755745646007814, "learning_rate": 1.1658748953077474e-06, "loss": 0.2897, "step": 33370 }, { "epoch": 1.7792702753124918, "grad_norm": 1.7647426318655384, "learning_rate": 1.1650157167064938e-06, "loss": 0.2828, "step": 33380 }, { "epoch": 1.7798033101463182, "grad_norm": 1.7501861552037314, "learning_rate": 1.1641566537614021e-06, "loss": 0.2822, "step": 33390 }, { "epoch": 1.7803363449801446, "grad_norm": 1.6381073635539256, "learning_rate": 1.163297706769071e-06, "loss": 0.2924, "step": 33400 }, { "epoch": 1.780869379813971, "grad_norm": 1.6528912100178759, "learning_rate": 1.1624388760260597e-06, "loss": 0.2879, "step": 33410 }, { "epoch": 1.7814024146477974, "grad_norm": 1.9693691431225955, "learning_rate": 1.161580161828886e-06, "loss": 0.2907, "step": 33420 }, { "epoch": 1.7819354494816237, "grad_norm": 1.6775167126991481, "learning_rate": 1.1607215644740293e-06, "loss": 0.2912, "step": 33430 }, { "epoch": 1.7824684843154501, "grad_norm": 1.8501231068500794, "learning_rate": 1.1598630842579266e-06, "loss": 0.2862, "step": 33440 }, { "epoch": 1.7830015191492765, "grad_norm": 1.7929903654136852, "learning_rate": 1.1590047214769758e-06, "loss": 0.2715, "step": 33450 }, { "epoch": 1.783534553983103, "grad_norm": 1.9417642928228735, "learning_rate": 1.1581464764275342e-06, "loss": 0.2804, "step": 33460 }, { "epoch": 1.7840675888169293, "grad_norm": 1.887765723109769, "learning_rate": 1.157288349405918e-06, "loss": 0.2811, "step": 33470 }, { "epoch": 1.7846006236507557, "grad_norm": 1.8122568285710194, "learning_rate": 1.1564303407084027e-06, "loss": 0.2861, "step": 33480 }, { "epoch": 1.785133658484582, "grad_norm": 1.753965790461032, "learning_rate": 1.1555724506312232e-06, "loss": 0.289, "step": 33490 }, { "epoch": 1.7856666933184084, "grad_norm": 1.8806664581253916, "learning_rate": 1.1547146794705729e-06, "loss": 0.2937, "step": 33500 }, { "epoch": 1.7861997281522348, "grad_norm": 1.7289241688389556, "learning_rate": 1.1538570275226055e-06, "loss": 0.2911, "step": 33510 }, { "epoch": 1.7867327629860612, "grad_norm": 1.8719795433769644, "learning_rate": 1.1529994950834317e-06, "loss": 0.2948, "step": 33520 }, { "epoch": 1.7872657978198876, "grad_norm": 1.645858881282186, "learning_rate": 1.1521420824491222e-06, "loss": 0.289, "step": 33530 }, { "epoch": 1.787798832653714, "grad_norm": 1.805836445270633, "learning_rate": 1.151284789915706e-06, "loss": 0.2873, "step": 33540 }, { "epoch": 1.7883318674875404, "grad_norm": 1.498351679995341, "learning_rate": 1.150427617779171e-06, "loss": 0.281, "step": 33550 }, { "epoch": 1.7888649023213667, "grad_norm": 1.8921876971501468, "learning_rate": 1.1495705663354622e-06, "loss": 0.293, "step": 33560 }, { "epoch": 1.7893979371551931, "grad_norm": 1.7940753021292293, "learning_rate": 1.1487136358804845e-06, "loss": 0.2942, "step": 33570 }, { "epoch": 1.7899309719890195, "grad_norm": 1.6198606871866887, "learning_rate": 1.1478568267101003e-06, "loss": 0.2802, "step": 33580 }, { "epoch": 1.790464006822846, "grad_norm": 1.8640272317564233, "learning_rate": 1.1470001391201304e-06, "loss": 0.2839, "step": 33590 }, { "epoch": 1.7909970416566723, "grad_norm": 1.6737504891714863, "learning_rate": 1.146143573406353e-06, "loss": 0.2833, "step": 33600 }, { "epoch": 1.7915300764904987, "grad_norm": 1.8129577594218758, "learning_rate": 1.145287129864505e-06, "loss": 0.2781, "step": 33610 }, { "epoch": 1.792063111324325, "grad_norm": 1.7690860864153717, "learning_rate": 1.1444308087902804e-06, "loss": 0.2854, "step": 33620 }, { "epoch": 1.7925961461581514, "grad_norm": 1.7611740146371033, "learning_rate": 1.143574610479332e-06, "loss": 0.2856, "step": 33630 }, { "epoch": 1.7931291809919778, "grad_norm": 1.5250532183756842, "learning_rate": 1.1427185352272694e-06, "loss": 0.2821, "step": 33640 }, { "epoch": 1.7936622158258042, "grad_norm": 1.8295559105368613, "learning_rate": 1.1418625833296592e-06, "loss": 0.2834, "step": 33650 }, { "epoch": 1.7941952506596306, "grad_norm": 1.737860592041329, "learning_rate": 1.1410067550820265e-06, "loss": 0.2772, "step": 33660 }, { "epoch": 1.794728285493457, "grad_norm": 1.910099360063565, "learning_rate": 1.140151050779853e-06, "loss": 0.2859, "step": 33670 }, { "epoch": 1.7952613203272834, "grad_norm": 1.6248629726418629, "learning_rate": 1.1392954707185786e-06, "loss": 0.2844, "step": 33680 }, { "epoch": 1.7957943551611097, "grad_norm": 1.700431966603427, "learning_rate": 1.1384400151935985e-06, "loss": 0.2804, "step": 33690 }, { "epoch": 1.7963273899949361, "grad_norm": 1.688983542272381, "learning_rate": 1.1375846845002666e-06, "loss": 0.2926, "step": 33700 }, { "epoch": 1.7968604248287625, "grad_norm": 1.753162322925547, "learning_rate": 1.1367294789338928e-06, "loss": 0.283, "step": 33710 }, { "epoch": 1.797393459662589, "grad_norm": 1.7652441781406532, "learning_rate": 1.1358743987897448e-06, "loss": 0.2873, "step": 33720 }, { "epoch": 1.7979264944964153, "grad_norm": 1.8861411903139922, "learning_rate": 1.135019444363045e-06, "loss": 0.2776, "step": 33730 }, { "epoch": 1.7984595293302417, "grad_norm": 1.690489967507977, "learning_rate": 1.1341646159489744e-06, "loss": 0.2832, "step": 33740 }, { "epoch": 1.798992564164068, "grad_norm": 1.6167954041258796, "learning_rate": 1.13330991384267e-06, "loss": 0.2778, "step": 33750 }, { "epoch": 1.7995255989978944, "grad_norm": 1.756470122286585, "learning_rate": 1.1324553383392246e-06, "loss": 0.2917, "step": 33760 }, { "epoch": 1.8000586338317208, "grad_norm": 1.6593793448073022, "learning_rate": 1.1316008897336873e-06, "loss": 0.2832, "step": 33770 }, { "epoch": 1.8005916686655472, "grad_norm": 1.7523875609619814, "learning_rate": 1.1307465683210644e-06, "loss": 0.2753, "step": 33780 }, { "epoch": 1.8011247034993736, "grad_norm": 1.8807585832714437, "learning_rate": 1.1298923743963172e-06, "loss": 0.2763, "step": 33790 }, { "epoch": 1.8016577383332, "grad_norm": 2.133302933958573, "learning_rate": 1.1290383082543633e-06, "loss": 0.2954, "step": 33800 }, { "epoch": 1.8021907731670264, "grad_norm": 1.8325941685987879, "learning_rate": 1.1281843701900768e-06, "loss": 0.2897, "step": 33810 }, { "epoch": 1.8027238080008527, "grad_norm": 1.7055468079395337, "learning_rate": 1.1273305604982865e-06, "loss": 0.287, "step": 33820 }, { "epoch": 1.8032568428346791, "grad_norm": 1.982174723381969, "learning_rate": 1.1264768794737777e-06, "loss": 0.291, "step": 33830 }, { "epoch": 1.8037898776685055, "grad_norm": 1.6439751284410773, "learning_rate": 1.1256233274112907e-06, "loss": 0.2845, "step": 33840 }, { "epoch": 1.804322912502332, "grad_norm": 1.830414371209443, "learning_rate": 1.1247699046055222e-06, "loss": 0.285, "step": 33850 }, { "epoch": 1.8048559473361583, "grad_norm": 1.7439309798165994, "learning_rate": 1.123916611351123e-06, "loss": 0.2757, "step": 33860 }, { "epoch": 1.8053889821699847, "grad_norm": 1.712434726007058, "learning_rate": 1.1230634479427004e-06, "loss": 0.2798, "step": 33870 }, { "epoch": 1.805922017003811, "grad_norm": 1.7080465587597295, "learning_rate": 1.1222104146748156e-06, "loss": 0.2914, "step": 33880 }, { "epoch": 1.8064550518376374, "grad_norm": 1.7349800287185455, "learning_rate": 1.1213575118419866e-06, "loss": 0.2941, "step": 33890 }, { "epoch": 1.8069880866714638, "grad_norm": 1.788350132423697, "learning_rate": 1.1205047397386841e-06, "loss": 0.2821, "step": 33900 }, { "epoch": 1.8075211215052902, "grad_norm": 1.672050369300618, "learning_rate": 1.1196520986593359e-06, "loss": 0.2848, "step": 33910 }, { "epoch": 1.8080541563391168, "grad_norm": 1.8399765950580336, "learning_rate": 1.1187995888983227e-06, "loss": 0.2818, "step": 33920 }, { "epoch": 1.8085871911729432, "grad_norm": 1.8086788054055678, "learning_rate": 1.1179472107499813e-06, "loss": 0.2932, "step": 33930 }, { "epoch": 1.8091202260067696, "grad_norm": 1.8289005131436555, "learning_rate": 1.1170949645086022e-06, "loss": 0.2847, "step": 33940 }, { "epoch": 1.809653260840596, "grad_norm": 1.5490717063639103, "learning_rate": 1.1162428504684313e-06, "loss": 0.2889, "step": 33950 }, { "epoch": 1.8101862956744224, "grad_norm": 1.7283938741145624, "learning_rate": 1.115390868923667e-06, "loss": 0.2943, "step": 33960 }, { "epoch": 1.8107193305082487, "grad_norm": 1.8194921712267031, "learning_rate": 1.114539020168464e-06, "loss": 0.277, "step": 33970 }, { "epoch": 1.8112523653420751, "grad_norm": 1.639176107193273, "learning_rate": 1.1136873044969302e-06, "loss": 0.2804, "step": 33980 }, { "epoch": 1.8117854001759015, "grad_norm": 1.7458436628548724, "learning_rate": 1.1128357222031276e-06, "loss": 0.2746, "step": 33990 }, { "epoch": 1.812318435009728, "grad_norm": 1.8650303802442367, "learning_rate": 1.111984273581072e-06, "loss": 0.2818, "step": 34000 }, { "epoch": 1.8128514698435543, "grad_norm": 1.655443839449947, "learning_rate": 1.1111329589247336e-06, "loss": 0.2763, "step": 34010 }, { "epoch": 1.8133845046773807, "grad_norm": 1.8102412312389937, "learning_rate": 1.1102817785280354e-06, "loss": 0.2889, "step": 34020 }, { "epoch": 1.813917539511207, "grad_norm": 1.6134138131586813, "learning_rate": 1.1094307326848554e-06, "loss": 0.2886, "step": 34030 }, { "epoch": 1.8144505743450334, "grad_norm": 1.7450590474867633, "learning_rate": 1.108579821689024e-06, "loss": 0.2822, "step": 34040 }, { "epoch": 1.8149836091788598, "grad_norm": 1.5897483201065454, "learning_rate": 1.1077290458343256e-06, "loss": 0.2761, "step": 34050 }, { "epoch": 1.8155166440126862, "grad_norm": 1.883617289604175, "learning_rate": 1.1068784054144973e-06, "loss": 0.2829, "step": 34060 }, { "epoch": 1.8160496788465126, "grad_norm": 1.7311891204055823, "learning_rate": 1.1060279007232305e-06, "loss": 0.2821, "step": 34070 }, { "epoch": 1.816582713680339, "grad_norm": 1.828293791159772, "learning_rate": 1.105177532054169e-06, "loss": 0.2871, "step": 34080 }, { "epoch": 1.8171157485141654, "grad_norm": 1.7575674008176536, "learning_rate": 1.1043272997009096e-06, "loss": 0.2849, "step": 34090 }, { "epoch": 1.8176487833479917, "grad_norm": 1.9130918964340493, "learning_rate": 1.103477203957002e-06, "loss": 0.2975, "step": 34100 }, { "epoch": 1.8181818181818183, "grad_norm": 1.5610682780039224, "learning_rate": 1.1026272451159496e-06, "loss": 0.288, "step": 34110 }, { "epoch": 1.8187148530156447, "grad_norm": 1.8156298165923574, "learning_rate": 1.1017774234712077e-06, "loss": 0.2685, "step": 34120 }, { "epoch": 1.8192478878494711, "grad_norm": 1.8981220587794834, "learning_rate": 1.1009277393161841e-06, "loss": 0.276, "step": 34130 }, { "epoch": 1.8197809226832975, "grad_norm": 1.7014268583428251, "learning_rate": 1.1000781929442394e-06, "loss": 0.2872, "step": 34140 }, { "epoch": 1.8203139575171239, "grad_norm": 1.6707214951958549, "learning_rate": 1.099228784648687e-06, "loss": 0.2851, "step": 34150 }, { "epoch": 1.8208469923509503, "grad_norm": 1.8265949184513688, "learning_rate": 1.0983795147227926e-06, "loss": 0.2912, "step": 34160 }, { "epoch": 1.8213800271847767, "grad_norm": 1.8979884812981793, "learning_rate": 1.0975303834597734e-06, "loss": 0.2817, "step": 34170 }, { "epoch": 1.821913062018603, "grad_norm": 1.713077673795763, "learning_rate": 1.096681391152799e-06, "loss": 0.2878, "step": 34180 }, { "epoch": 1.8224460968524294, "grad_norm": 1.9329467397832927, "learning_rate": 1.0958325380949914e-06, "loss": 0.2773, "step": 34190 }, { "epoch": 1.8229791316862558, "grad_norm": 1.751671910749437, "learning_rate": 1.0949838245794244e-06, "loss": 0.2778, "step": 34200 }, { "epoch": 1.8235121665200822, "grad_norm": 1.8642896118551555, "learning_rate": 1.0941352508991238e-06, "loss": 0.2868, "step": 34210 }, { "epoch": 1.8240452013539086, "grad_norm": 1.784714945417271, "learning_rate": 1.0932868173470662e-06, "loss": 0.289, "step": 34220 }, { "epoch": 1.824578236187735, "grad_norm": 1.9068453575818782, "learning_rate": 1.0924385242161805e-06, "loss": 0.2803, "step": 34230 }, { "epoch": 1.8251112710215613, "grad_norm": 1.7116479484949956, "learning_rate": 1.0915903717993477e-06, "loss": 0.274, "step": 34240 }, { "epoch": 1.8256443058553877, "grad_norm": 1.748251781989164, "learning_rate": 1.0907423603893996e-06, "loss": 0.2838, "step": 34250 }, { "epoch": 1.8261773406892141, "grad_norm": 1.7088192878892445, "learning_rate": 1.0898944902791186e-06, "loss": 0.2911, "step": 34260 }, { "epoch": 1.8267103755230405, "grad_norm": 1.6764555278790207, "learning_rate": 1.0890467617612395e-06, "loss": 0.2838, "step": 34270 }, { "epoch": 1.8272434103568669, "grad_norm": 1.7424091503380401, "learning_rate": 1.0881991751284478e-06, "loss": 0.2844, "step": 34280 }, { "epoch": 1.8277764451906933, "grad_norm": 1.7797215963768982, "learning_rate": 1.0873517306733797e-06, "loss": 0.2878, "step": 34290 }, { "epoch": 1.8283094800245197, "grad_norm": 1.5453777935549813, "learning_rate": 1.086504428688623e-06, "loss": 0.28, "step": 34300 }, { "epoch": 1.828842514858346, "grad_norm": 1.9896540890870467, "learning_rate": 1.0856572694667153e-06, "loss": 0.2876, "step": 34310 }, { "epoch": 1.8293755496921724, "grad_norm": 1.7179419355397545, "learning_rate": 1.0848102533001461e-06, "loss": 0.2935, "step": 34320 }, { "epoch": 1.8299085845259988, "grad_norm": 2.067669891429877, "learning_rate": 1.0839633804813547e-06, "loss": 0.2888, "step": 34330 }, { "epoch": 1.8304416193598252, "grad_norm": 2.226691334150769, "learning_rate": 1.083116651302731e-06, "loss": 0.2868, "step": 34340 }, { "epoch": 1.8309746541936516, "grad_norm": 1.8149770027364398, "learning_rate": 1.0822700660566153e-06, "loss": 0.2796, "step": 34350 }, { "epoch": 1.831507689027478, "grad_norm": 1.704603511554391, "learning_rate": 1.0814236250352986e-06, "loss": 0.2797, "step": 34360 }, { "epoch": 1.8320407238613043, "grad_norm": 1.6429964179576118, "learning_rate": 1.0805773285310217e-06, "loss": 0.2861, "step": 34370 }, { "epoch": 1.8325737586951307, "grad_norm": 1.7717173469199823, "learning_rate": 1.0797311768359758e-06, "loss": 0.2814, "step": 34380 }, { "epoch": 1.8331067935289571, "grad_norm": 1.8919850655287325, "learning_rate": 1.078885170242302e-06, "loss": 0.2811, "step": 34390 }, { "epoch": 1.8336398283627835, "grad_norm": 1.9150816559010473, "learning_rate": 1.0780393090420906e-06, "loss": 0.2742, "step": 34400 }, { "epoch": 1.8341728631966099, "grad_norm": 1.753970404084367, "learning_rate": 1.077193593527383e-06, "loss": 0.2802, "step": 34410 }, { "epoch": 1.8347058980304363, "grad_norm": 1.6730326269014233, "learning_rate": 1.0763480239901699e-06, "loss": 0.2765, "step": 34420 }, { "epoch": 1.8352389328642627, "grad_norm": 1.7477151992531457, "learning_rate": 1.0755026007223904e-06, "loss": 0.2766, "step": 34430 }, { "epoch": 1.835771967698089, "grad_norm": 1.6148231936669728, "learning_rate": 1.0746573240159346e-06, "loss": 0.2807, "step": 34440 }, { "epoch": 1.8363050025319154, "grad_norm": 1.5447582855547874, "learning_rate": 1.0738121941626415e-06, "loss": 0.2898, "step": 34450 }, { "epoch": 1.8368380373657418, "grad_norm": 1.7959913831018646, "learning_rate": 1.0729672114542995e-06, "loss": 0.2838, "step": 34460 }, { "epoch": 1.8373710721995682, "grad_norm": 1.8160129417948139, "learning_rate": 1.0721223761826454e-06, "loss": 0.289, "step": 34470 }, { "epoch": 1.8379041070333946, "grad_norm": 1.710630021315159, "learning_rate": 1.0712776886393663e-06, "loss": 0.285, "step": 34480 }, { "epoch": 1.838437141867221, "grad_norm": 1.6643078191367287, "learning_rate": 1.0704331491160973e-06, "loss": 0.2842, "step": 34490 }, { "epoch": 1.8389701767010473, "grad_norm": 1.6908870434165215, "learning_rate": 1.0695887579044235e-06, "loss": 0.2787, "step": 34500 }, { "epoch": 1.8395032115348737, "grad_norm": 1.6267838092012359, "learning_rate": 1.0687445152958773e-06, "loss": 0.2777, "step": 34510 }, { "epoch": 1.8400362463687001, "grad_norm": 1.7506964768494824, "learning_rate": 1.0679004215819412e-06, "loss": 0.2964, "step": 34520 }, { "epoch": 1.8405692812025265, "grad_norm": 1.6474621816685022, "learning_rate": 1.0670564770540455e-06, "loss": 0.2707, "step": 34530 }, { "epoch": 1.8411023160363529, "grad_norm": 1.8086883973577987, "learning_rate": 1.0662126820035693e-06, "loss": 0.2758, "step": 34540 }, { "epoch": 1.8416353508701793, "grad_norm": 1.7431654890755537, "learning_rate": 1.06536903672184e-06, "loss": 0.2677, "step": 34550 }, { "epoch": 1.8421683857040057, "grad_norm": 1.6709738595213355, "learning_rate": 1.0645255415001327e-06, "loss": 0.2759, "step": 34560 }, { "epoch": 1.842701420537832, "grad_norm": 1.6165678326395383, "learning_rate": 1.0636821966296719e-06, "loss": 0.2807, "step": 34570 }, { "epoch": 1.8432344553716584, "grad_norm": 1.7028473422401444, "learning_rate": 1.0628390024016296e-06, "loss": 0.2833, "step": 34580 }, { "epoch": 1.8437674902054848, "grad_norm": 1.7405113975136388, "learning_rate": 1.0619959591071258e-06, "loss": 0.2833, "step": 34590 }, { "epoch": 1.8443005250393112, "grad_norm": 1.6600466254275668, "learning_rate": 1.0611530670372275e-06, "loss": 0.2871, "step": 34600 }, { "epoch": 1.8448335598731376, "grad_norm": 1.7079471415002405, "learning_rate": 1.0603103264829513e-06, "loss": 0.2863, "step": 34610 }, { "epoch": 1.845366594706964, "grad_norm": 1.6562824961931186, "learning_rate": 1.0594677377352601e-06, "loss": 0.2756, "step": 34620 }, { "epoch": 1.8458996295407903, "grad_norm": 1.7123599775145584, "learning_rate": 1.058625301085065e-06, "loss": 0.2892, "step": 34630 }, { "epoch": 1.8464326643746167, "grad_norm": 1.7734656830606317, "learning_rate": 1.057783016823224e-06, "loss": 0.2838, "step": 34640 }, { "epoch": 1.8469656992084431, "grad_norm": 1.695548261231936, "learning_rate": 1.0569408852405433e-06, "loss": 0.2957, "step": 34650 }, { "epoch": 1.8474987340422695, "grad_norm": 1.7494164440516087, "learning_rate": 1.0560989066277755e-06, "loss": 0.2774, "step": 34660 }, { "epoch": 1.848031768876096, "grad_norm": 1.7330502578030142, "learning_rate": 1.0552570812756215e-06, "loss": 0.2857, "step": 34670 }, { "epoch": 1.8485648037099225, "grad_norm": 1.6587549978959704, "learning_rate": 1.0544154094747284e-06, "loss": 0.2773, "step": 34680 }, { "epoch": 1.8490978385437489, "grad_norm": 1.849411392429873, "learning_rate": 1.05357389151569e-06, "loss": 0.2823, "step": 34690 }, { "epoch": 1.8496308733775753, "grad_norm": 1.8212631169038112, "learning_rate": 1.052732527689048e-06, "loss": 0.2868, "step": 34700 }, { "epoch": 1.8501639082114016, "grad_norm": 1.7483996194095333, "learning_rate": 1.0518913182852905e-06, "loss": 0.2808, "step": 34710 }, { "epoch": 1.850696943045228, "grad_norm": 1.7317317091779638, "learning_rate": 1.0510502635948518e-06, "loss": 0.2935, "step": 34720 }, { "epoch": 1.8512299778790544, "grad_norm": 1.8719800453928523, "learning_rate": 1.0502093639081132e-06, "loss": 0.2918, "step": 34730 }, { "epoch": 1.8517630127128808, "grad_norm": 1.7022234984337459, "learning_rate": 1.0493686195154024e-06, "loss": 0.2842, "step": 34740 }, { "epoch": 1.8522960475467072, "grad_norm": 1.6309342788612091, "learning_rate": 1.048528030706994e-06, "loss": 0.2837, "step": 34750 }, { "epoch": 1.8528290823805336, "grad_norm": 1.8445263935654417, "learning_rate": 1.0476875977731083e-06, "loss": 0.2789, "step": 34760 }, { "epoch": 1.85336211721436, "grad_norm": 1.8379393888632174, "learning_rate": 1.0468473210039113e-06, "loss": 0.2778, "step": 34770 }, { "epoch": 1.8538951520481863, "grad_norm": 1.8667607983656693, "learning_rate": 1.0460072006895163e-06, "loss": 0.2933, "step": 34780 }, { "epoch": 1.8544281868820127, "grad_norm": 1.8299120005426075, "learning_rate": 1.0451672371199816e-06, "loss": 0.2867, "step": 34790 }, { "epoch": 1.854961221715839, "grad_norm": 1.8132107372428896, "learning_rate": 1.0443274305853121e-06, "loss": 0.2787, "step": 34800 }, { "epoch": 1.8554942565496655, "grad_norm": 1.75534628956572, "learning_rate": 1.0434877813754576e-06, "loss": 0.2881, "step": 34810 }, { "epoch": 1.8560272913834919, "grad_norm": 1.6882702107320098, "learning_rate": 1.0426482897803142e-06, "loss": 0.278, "step": 34820 }, { "epoch": 1.8565603262173183, "grad_norm": 1.9191956479523045, "learning_rate": 1.0418089560897239e-06, "loss": 0.2926, "step": 34830 }, { "epoch": 1.8570933610511446, "grad_norm": 1.971987032346656, "learning_rate": 1.0409697805934738e-06, "loss": 0.2784, "step": 34840 }, { "epoch": 1.8576263958849712, "grad_norm": 1.7343757430387683, "learning_rate": 1.0401307635812957e-06, "loss": 0.2871, "step": 34850 }, { "epoch": 1.8581594307187976, "grad_norm": 1.9720783163694182, "learning_rate": 1.0392919053428676e-06, "loss": 0.2743, "step": 34860 }, { "epoch": 1.858692465552624, "grad_norm": 1.7974747096027834, "learning_rate": 1.0384532061678129e-06, "loss": 0.2763, "step": 34870 }, { "epoch": 1.8592255003864504, "grad_norm": 1.5831927083400277, "learning_rate": 1.0376146663456992e-06, "loss": 0.2832, "step": 34880 }, { "epoch": 1.8597585352202768, "grad_norm": 1.9479818234794424, "learning_rate": 1.0367762861660393e-06, "loss": 0.2881, "step": 34890 }, { "epoch": 1.8602915700541032, "grad_norm": 1.6627760289046543, "learning_rate": 1.0359380659182913e-06, "loss": 0.2744, "step": 34900 }, { "epoch": 1.8608246048879296, "grad_norm": 1.8344299218133981, "learning_rate": 1.0351000058918579e-06, "loss": 0.287, "step": 34910 }, { "epoch": 1.861357639721756, "grad_norm": 1.5524926230550007, "learning_rate": 1.0342621063760864e-06, "loss": 0.2938, "step": 34920 }, { "epoch": 1.8618906745555823, "grad_norm": 1.6587993644360186, "learning_rate": 1.0334243676602687e-06, "loss": 0.2831, "step": 34930 }, { "epoch": 1.8624237093894087, "grad_norm": 1.6894005460719788, "learning_rate": 1.0325867900336408e-06, "loss": 0.2974, "step": 34940 }, { "epoch": 1.862956744223235, "grad_norm": 1.8227726132270095, "learning_rate": 1.0317493737853838e-06, "loss": 0.2779, "step": 34950 }, { "epoch": 1.8634897790570615, "grad_norm": 1.7289818601710043, "learning_rate": 1.030912119204623e-06, "loss": 0.2804, "step": 34960 }, { "epoch": 1.8640228138908879, "grad_norm": 1.7836242972182799, "learning_rate": 1.0300750265804276e-06, "loss": 0.2772, "step": 34970 }, { "epoch": 1.8645558487247142, "grad_norm": 1.682765619651718, "learning_rate": 1.0292380962018102e-06, "loss": 0.2807, "step": 34980 }, { "epoch": 1.8650888835585406, "grad_norm": 2.0450127028175333, "learning_rate": 1.0284013283577288e-06, "loss": 0.2943, "step": 34990 }, { "epoch": 1.865621918392367, "grad_norm": 1.730411000942452, "learning_rate": 1.0275647233370843e-06, "loss": 0.2748, "step": 35000 }, { "epoch": 1.8661549532261934, "grad_norm": 1.6277389975419827, "learning_rate": 1.0267282814287222e-06, "loss": 0.2832, "step": 35010 }, { "epoch": 1.8666879880600198, "grad_norm": 1.5334358552233316, "learning_rate": 1.0258920029214304e-06, "loss": 0.2815, "step": 35020 }, { "epoch": 1.8672210228938462, "grad_norm": 1.6268896579282706, "learning_rate": 1.0250558881039415e-06, "loss": 0.2786, "step": 35030 }, { "epoch": 1.8677540577276726, "grad_norm": 1.6822030193127806, "learning_rate": 1.024219937264931e-06, "loss": 0.2838, "step": 35040 }, { "epoch": 1.868287092561499, "grad_norm": 1.72656067068977, "learning_rate": 1.0233841506930188e-06, "loss": 0.2873, "step": 35050 }, { "epoch": 1.8688201273953253, "grad_norm": 1.6681560326786644, "learning_rate": 1.0225485286767662e-06, "loss": 0.2802, "step": 35060 }, { "epoch": 1.8693531622291517, "grad_norm": 1.7187599855179903, "learning_rate": 1.0217130715046794e-06, "loss": 0.2855, "step": 35070 }, { "epoch": 1.869886197062978, "grad_norm": 1.8269575746769546, "learning_rate": 1.020877779465207e-06, "loss": 0.2882, "step": 35080 }, { "epoch": 1.8704192318968045, "grad_norm": 1.6038886577188158, "learning_rate": 1.0200426528467408e-06, "loss": 0.2841, "step": 35090 }, { "epoch": 1.8709522667306309, "grad_norm": 1.816057630472255, "learning_rate": 1.0192076919376146e-06, "loss": 0.2821, "step": 35100 }, { "epoch": 1.8714853015644572, "grad_norm": 1.7406501251151498, "learning_rate": 1.0183728970261065e-06, "loss": 0.2797, "step": 35110 }, { "epoch": 1.8720183363982836, "grad_norm": 1.8775122599692728, "learning_rate": 1.0175382684004364e-06, "loss": 0.2885, "step": 35120 }, { "epoch": 1.87255137123211, "grad_norm": 1.6496897557508083, "learning_rate": 1.0167038063487669e-06, "loss": 0.2841, "step": 35130 }, { "epoch": 1.8730844060659364, "grad_norm": 1.7212205169023977, "learning_rate": 1.015869511159203e-06, "loss": 0.2877, "step": 35140 }, { "epoch": 1.8736174408997628, "grad_norm": 1.7308538260272222, "learning_rate": 1.015035383119792e-06, "loss": 0.2882, "step": 35150 }, { "epoch": 1.8741504757335892, "grad_norm": 1.9290899388149554, "learning_rate": 1.0142014225185238e-06, "loss": 0.2866, "step": 35160 }, { "epoch": 1.8746835105674156, "grad_norm": 1.7682914896377402, "learning_rate": 1.01336762964333e-06, "loss": 0.2845, "step": 35170 }, { "epoch": 1.875216545401242, "grad_norm": 1.7473730247270731, "learning_rate": 1.012534004782086e-06, "loss": 0.2799, "step": 35180 }, { "epoch": 1.8757495802350683, "grad_norm": 1.9216826681234516, "learning_rate": 1.0117005482226065e-06, "loss": 0.2781, "step": 35190 }, { "epoch": 1.8762826150688947, "grad_norm": 1.9470854303714606, "learning_rate": 1.0108672602526496e-06, "loss": 0.2769, "step": 35200 }, { "epoch": 1.876815649902721, "grad_norm": 1.7026592356819465, "learning_rate": 1.0100341411599153e-06, "loss": 0.2781, "step": 35210 }, { "epoch": 1.8773486847365475, "grad_norm": 1.7986917689102309, "learning_rate": 1.0092011912320455e-06, "loss": 0.2895, "step": 35220 }, { "epoch": 1.8778817195703739, "grad_norm": 1.6321121618712109, "learning_rate": 1.0083684107566223e-06, "loss": 0.2842, "step": 35230 }, { "epoch": 1.8784147544042002, "grad_norm": 1.8371304734996443, "learning_rate": 1.0075358000211705e-06, "loss": 0.2858, "step": 35240 }, { "epoch": 1.8789477892380266, "grad_norm": 1.883124366511591, "learning_rate": 1.006703359313157e-06, "loss": 0.2828, "step": 35250 }, { "epoch": 1.879480824071853, "grad_norm": 1.769211253565573, "learning_rate": 1.0058710889199874e-06, "loss": 0.2846, "step": 35260 }, { "epoch": 1.8800138589056794, "grad_norm": 1.8463400326176658, "learning_rate": 1.005038989129011e-06, "loss": 0.2831, "step": 35270 }, { "epoch": 1.8805468937395058, "grad_norm": 1.761129299368819, "learning_rate": 1.0042070602275173e-06, "loss": 0.2766, "step": 35280 }, { "epoch": 1.8810799285733322, "grad_norm": 1.659425918482653, "learning_rate": 1.0033753025027365e-06, "loss": 0.2895, "step": 35290 }, { "epoch": 1.8816129634071586, "grad_norm": 1.6361794089062556, "learning_rate": 1.00254371624184e-06, "loss": 0.2785, "step": 35300 }, { "epoch": 1.882145998240985, "grad_norm": 1.7094848670515799, "learning_rate": 1.0017123017319401e-06, "loss": 0.2798, "step": 35310 }, { "epoch": 1.8826790330748113, "grad_norm": 1.8248155417864, "learning_rate": 1.0008810592600904e-06, "loss": 0.2794, "step": 35320 }, { "epoch": 1.8832120679086377, "grad_norm": 1.736341576073759, "learning_rate": 1.0000499891132832e-06, "loss": 0.2838, "step": 35330 }, { "epoch": 1.883745102742464, "grad_norm": 1.754804169792267, "learning_rate": 9.992190915784532e-07, "loss": 0.2874, "step": 35340 }, { "epoch": 1.8842781375762905, "grad_norm": 1.764348330078149, "learning_rate": 9.983883669424747e-07, "loss": 0.2816, "step": 35350 }, { "epoch": 1.8848111724101169, "grad_norm": 1.8070731732141163, "learning_rate": 9.975578154921623e-07, "loss": 0.2687, "step": 35360 }, { "epoch": 1.8853442072439432, "grad_norm": 1.7172715655997957, "learning_rate": 9.967274375142714e-07, "loss": 0.2767, "step": 35370 }, { "epoch": 1.8858772420777696, "grad_norm": 1.5972635910144992, "learning_rate": 9.958972332954966e-07, "loss": 0.2806, "step": 35380 }, { "epoch": 1.886410276911596, "grad_norm": 1.838781091571979, "learning_rate": 9.950672031224729e-07, "loss": 0.289, "step": 35390 }, { "epoch": 1.8869433117454224, "grad_norm": 1.7094539989863005, "learning_rate": 9.942373472817752e-07, "loss": 0.2739, "step": 35400 }, { "epoch": 1.887476346579249, "grad_norm": 1.6199132374367613, "learning_rate": 9.934076660599186e-07, "loss": 0.2754, "step": 35410 }, { "epoch": 1.8880093814130754, "grad_norm": 1.7708410627416784, "learning_rate": 9.925781597433575e-07, "loss": 0.2703, "step": 35420 }, { "epoch": 1.8885424162469018, "grad_norm": 1.8058806282938, "learning_rate": 9.917488286184857e-07, "loss": 0.2822, "step": 35430 }, { "epoch": 1.8890754510807282, "grad_norm": 1.7438978329239643, "learning_rate": 9.90919672971637e-07, "loss": 0.2729, "step": 35440 }, { "epoch": 1.8896084859145545, "grad_norm": 1.989782730664509, "learning_rate": 9.900906930890846e-07, "loss": 0.3002, "step": 35450 }, { "epoch": 1.890141520748381, "grad_norm": 1.837710642569694, "learning_rate": 9.892618892570405e-07, "loss": 0.2879, "step": 35460 }, { "epoch": 1.8906745555822073, "grad_norm": 1.7368568619897187, "learning_rate": 9.884332617616562e-07, "loss": 0.2701, "step": 35470 }, { "epoch": 1.8912075904160337, "grad_norm": 1.7628832750362922, "learning_rate": 9.876048108890227e-07, "loss": 0.2759, "step": 35480 }, { "epoch": 1.89174062524986, "grad_norm": 1.7675087536218128, "learning_rate": 9.867765369251697e-07, "loss": 0.2763, "step": 35490 }, { "epoch": 1.8922736600836865, "grad_norm": 1.6785749400781123, "learning_rate": 9.85948440156065e-07, "loss": 0.2763, "step": 35500 }, { "epoch": 1.8928066949175129, "grad_norm": 1.9082838220747536, "learning_rate": 9.851205208676171e-07, "loss": 0.2887, "step": 35510 }, { "epoch": 1.8933397297513392, "grad_norm": 1.7062207467478618, "learning_rate": 9.842927793456716e-07, "loss": 0.2838, "step": 35520 }, { "epoch": 1.8938727645851656, "grad_norm": 1.7874699287668812, "learning_rate": 9.83465215876013e-07, "loss": 0.28, "step": 35530 }, { "epoch": 1.894405799418992, "grad_norm": 1.7579368596241673, "learning_rate": 9.82637830744366e-07, "loss": 0.2881, "step": 35540 }, { "epoch": 1.8949388342528184, "grad_norm": 1.7013266187431024, "learning_rate": 9.818106242363904e-07, "loss": 0.2804, "step": 35550 }, { "epoch": 1.8954718690866448, "grad_norm": 1.5813589158444141, "learning_rate": 9.80983596637687e-07, "loss": 0.2704, "step": 35560 }, { "epoch": 1.8960049039204712, "grad_norm": 1.7628655745151582, "learning_rate": 9.801567482337948e-07, "loss": 0.2858, "step": 35570 }, { "epoch": 1.8965379387542975, "grad_norm": 1.7395075223067322, "learning_rate": 9.793300793101896e-07, "loss": 0.2836, "step": 35580 }, { "epoch": 1.897070973588124, "grad_norm": 1.9104675550605692, "learning_rate": 9.785035901522857e-07, "loss": 0.2833, "step": 35590 }, { "epoch": 1.8976040084219505, "grad_norm": 1.6850286592323753, "learning_rate": 9.776772810454357e-07, "loss": 0.2769, "step": 35600 }, { "epoch": 1.898137043255777, "grad_norm": 1.7539249995871227, "learning_rate": 9.768511522749295e-07, "loss": 0.2661, "step": 35610 }, { "epoch": 1.8986700780896033, "grad_norm": 1.7587347179509758, "learning_rate": 9.760252041259962e-07, "loss": 0.2868, "step": 35620 }, { "epoch": 1.8992031129234297, "grad_norm": 1.907491760471663, "learning_rate": 9.751994368837998e-07, "loss": 0.2707, "step": 35630 }, { "epoch": 1.899736147757256, "grad_norm": 1.6173770880120335, "learning_rate": 9.743738508334442e-07, "loss": 0.281, "step": 35640 }, { "epoch": 1.9002691825910825, "grad_norm": 1.6612008469887718, "learning_rate": 9.735484462599701e-07, "loss": 0.2817, "step": 35650 }, { "epoch": 1.9008022174249088, "grad_norm": 1.7500269182005797, "learning_rate": 9.727232234483556e-07, "loss": 0.284, "step": 35660 }, { "epoch": 1.9013352522587352, "grad_norm": 1.7985921034784609, "learning_rate": 9.718981826835153e-07, "loss": 0.2765, "step": 35670 }, { "epoch": 1.9018682870925616, "grad_norm": 1.736399626177598, "learning_rate": 9.710733242503017e-07, "loss": 0.276, "step": 35680 }, { "epoch": 1.902401321926388, "grad_norm": 1.836772750385422, "learning_rate": 9.702486484335042e-07, "loss": 0.2931, "step": 35690 }, { "epoch": 1.9029343567602144, "grad_norm": 2.0091555482897743, "learning_rate": 9.69424155517849e-07, "loss": 0.2757, "step": 35700 }, { "epoch": 1.9034673915940408, "grad_norm": 1.6643677560844068, "learning_rate": 9.685998457880001e-07, "loss": 0.2842, "step": 35710 }, { "epoch": 1.9040004264278672, "grad_norm": 1.9242956813229226, "learning_rate": 9.677757195285562e-07, "loss": 0.2769, "step": 35720 }, { "epoch": 1.9045334612616935, "grad_norm": 1.9400606344595024, "learning_rate": 9.669517770240544e-07, "loss": 0.2839, "step": 35730 }, { "epoch": 1.90506649609552, "grad_norm": 1.7269053299340145, "learning_rate": 9.661280185589678e-07, "loss": 0.2813, "step": 35740 }, { "epoch": 1.9055995309293463, "grad_norm": 1.6869901644100265, "learning_rate": 9.653044444177066e-07, "loss": 0.2841, "step": 35750 }, { "epoch": 1.9061325657631727, "grad_norm": 1.7378547146706524, "learning_rate": 9.644810548846155e-07, "loss": 0.2845, "step": 35760 }, { "epoch": 1.906665600596999, "grad_norm": 1.7514458267258566, "learning_rate": 9.636578502439776e-07, "loss": 0.2892, "step": 35770 }, { "epoch": 1.9071986354308255, "grad_norm": 1.8273835957891131, "learning_rate": 9.62834830780011e-07, "loss": 0.279, "step": 35780 }, { "epoch": 1.9077316702646518, "grad_norm": 1.9273408099327018, "learning_rate": 9.620119967768706e-07, "loss": 0.2745, "step": 35790 }, { "epoch": 1.9082647050984782, "grad_norm": 1.6892019370881484, "learning_rate": 9.61189348518646e-07, "loss": 0.2812, "step": 35800 }, { "epoch": 1.9087977399323046, "grad_norm": 1.6635835006417588, "learning_rate": 9.60366886289364e-07, "loss": 0.2831, "step": 35810 }, { "epoch": 1.909330774766131, "grad_norm": 1.8973614749300904, "learning_rate": 9.595446103729864e-07, "loss": 0.2833, "step": 35820 }, { "epoch": 1.9098638095999574, "grad_norm": 1.7818512917547769, "learning_rate": 9.587225210534114e-07, "loss": 0.2884, "step": 35830 }, { "epoch": 1.9103968444337838, "grad_norm": 1.7489147029989127, "learning_rate": 9.579006186144717e-07, "loss": 0.286, "step": 35840 }, { "epoch": 1.9109298792676102, "grad_norm": 1.8007346747307635, "learning_rate": 9.570789033399367e-07, "loss": 0.2929, "step": 35850 }, { "epoch": 1.9114629141014365, "grad_norm": 1.9401577857368855, "learning_rate": 9.562573755135098e-07, "loss": 0.2844, "step": 35860 }, { "epoch": 1.911995948935263, "grad_norm": 1.5981758023953478, "learning_rate": 9.55436035418831e-07, "loss": 0.2814, "step": 35870 }, { "epoch": 1.9125289837690893, "grad_norm": 2.017519936804557, "learning_rate": 9.54614883339475e-07, "loss": 0.279, "step": 35880 }, { "epoch": 1.9130620186029157, "grad_norm": 1.809624859786025, "learning_rate": 9.537939195589513e-07, "loss": 0.2904, "step": 35890 }, { "epoch": 1.913595053436742, "grad_norm": 1.7320534903736615, "learning_rate": 9.529731443607042e-07, "loss": 0.271, "step": 35900 }, { "epoch": 1.9141280882705685, "grad_norm": 1.7357395668854863, "learning_rate": 9.521525580281139e-07, "loss": 0.2805, "step": 35910 }, { "epoch": 1.9146611231043948, "grad_norm": 1.722736631229961, "learning_rate": 9.513321608444949e-07, "loss": 0.2724, "step": 35920 }, { "epoch": 1.9151941579382212, "grad_norm": 1.9241266918096638, "learning_rate": 9.505119530930956e-07, "loss": 0.2839, "step": 35930 }, { "epoch": 1.9157271927720476, "grad_norm": 1.6270102749515718, "learning_rate": 9.496919350571001e-07, "loss": 0.3006, "step": 35940 }, { "epoch": 1.916260227605874, "grad_norm": 1.7199252096332123, "learning_rate": 9.488721070196264e-07, "loss": 0.283, "step": 35950 }, { "epoch": 1.9167932624397004, "grad_norm": 1.807800038786866, "learning_rate": 9.480524692637277e-07, "loss": 0.2845, "step": 35960 }, { "epoch": 1.9173262972735268, "grad_norm": 1.6166340248636812, "learning_rate": 9.472330220723898e-07, "loss": 0.2762, "step": 35970 }, { "epoch": 1.9178593321073532, "grad_norm": 1.9497393724588654, "learning_rate": 9.464137657285344e-07, "loss": 0.2756, "step": 35980 }, { "epoch": 1.9183923669411795, "grad_norm": 1.6361964040944617, "learning_rate": 9.455947005150166e-07, "loss": 0.2904, "step": 35990 }, { "epoch": 1.918925401775006, "grad_norm": 2.1849960188785404, "learning_rate": 9.447758267146262e-07, "loss": 0.2831, "step": 36000 }, { "epoch": 1.9194584366088323, "grad_norm": 1.6556210488547158, "learning_rate": 9.439571446100852e-07, "loss": 0.2801, "step": 36010 }, { "epoch": 1.9199914714426587, "grad_norm": 1.6712818507703093, "learning_rate": 9.431386544840513e-07, "loss": 0.2802, "step": 36020 }, { "epoch": 1.920524506276485, "grad_norm": 1.804559281641379, "learning_rate": 9.423203566191149e-07, "loss": 0.2804, "step": 36030 }, { "epoch": 1.9210575411103115, "grad_norm": 1.8679026065027806, "learning_rate": 9.415022512978004e-07, "loss": 0.2689, "step": 36040 }, { "epoch": 1.9215905759441378, "grad_norm": 1.7565237113543857, "learning_rate": 9.406843388025654e-07, "loss": 0.2824, "step": 36050 }, { "epoch": 1.9221236107779642, "grad_norm": 1.7958007320276206, "learning_rate": 9.398666194158013e-07, "loss": 0.2978, "step": 36060 }, { "epoch": 1.9226566456117906, "grad_norm": 1.6720638039570859, "learning_rate": 9.390490934198325e-07, "loss": 0.2824, "step": 36070 }, { "epoch": 1.923189680445617, "grad_norm": 1.8667543386615508, "learning_rate": 9.382317610969168e-07, "loss": 0.2929, "step": 36080 }, { "epoch": 1.9237227152794434, "grad_norm": 1.6668783405022032, "learning_rate": 9.374146227292455e-07, "loss": 0.2741, "step": 36090 }, { "epoch": 1.9242557501132698, "grad_norm": 1.900786039580737, "learning_rate": 9.365976785989421e-07, "loss": 0.2899, "step": 36100 }, { "epoch": 1.9247887849470962, "grad_norm": 1.8803647811781352, "learning_rate": 9.357809289880633e-07, "loss": 0.279, "step": 36110 }, { "epoch": 1.9253218197809225, "grad_norm": 1.8016212559393892, "learning_rate": 9.349643741785993e-07, "loss": 0.2819, "step": 36120 }, { "epoch": 1.925854854614749, "grad_norm": 1.720277545602245, "learning_rate": 9.341480144524727e-07, "loss": 0.2791, "step": 36130 }, { "epoch": 1.9263878894485753, "grad_norm": 1.823832720672372, "learning_rate": 9.333318500915376e-07, "loss": 0.2793, "step": 36140 }, { "epoch": 1.9269209242824017, "grad_norm": 1.7062857173624597, "learning_rate": 9.325158813775829e-07, "loss": 0.2698, "step": 36150 }, { "epoch": 1.9274539591162283, "grad_norm": 1.8998411596559452, "learning_rate": 9.317001085923278e-07, "loss": 0.2883, "step": 36160 }, { "epoch": 1.9279869939500547, "grad_norm": 1.8745664970895064, "learning_rate": 9.308845320174256e-07, "loss": 0.2861, "step": 36170 }, { "epoch": 1.928520028783881, "grad_norm": 1.800642889415994, "learning_rate": 9.300691519344602e-07, "loss": 0.2718, "step": 36180 }, { "epoch": 1.9290530636177075, "grad_norm": 1.7658027943527481, "learning_rate": 9.292539686249488e-07, "loss": 0.2825, "step": 36190 }, { "epoch": 1.9295860984515338, "grad_norm": 1.7326904870360502, "learning_rate": 9.284389823703404e-07, "loss": 0.2799, "step": 36200 }, { "epoch": 1.9301191332853602, "grad_norm": 1.7183900991266234, "learning_rate": 9.276241934520162e-07, "loss": 0.2776, "step": 36210 }, { "epoch": 1.9306521681191866, "grad_norm": 1.541043032916627, "learning_rate": 9.268096021512883e-07, "loss": 0.2738, "step": 36220 }, { "epoch": 1.931185202953013, "grad_norm": 1.4504211308843837, "learning_rate": 9.259952087494017e-07, "loss": 0.2741, "step": 36230 }, { "epoch": 1.9317182377868394, "grad_norm": 1.678681819627505, "learning_rate": 9.251810135275324e-07, "loss": 0.2685, "step": 36240 }, { "epoch": 1.9322512726206658, "grad_norm": 1.886181101109873, "learning_rate": 9.243670167667886e-07, "loss": 0.2823, "step": 36250 }, { "epoch": 1.9327843074544921, "grad_norm": 1.725188478274979, "learning_rate": 9.235532187482096e-07, "loss": 0.2883, "step": 36260 }, { "epoch": 1.9333173422883185, "grad_norm": 1.6693605414564963, "learning_rate": 9.227396197527652e-07, "loss": 0.2916, "step": 36270 }, { "epoch": 1.933850377122145, "grad_norm": 1.6576565386454711, "learning_rate": 9.219262200613581e-07, "loss": 0.2781, "step": 36280 }, { "epoch": 1.9343834119559713, "grad_norm": 1.9826939873446265, "learning_rate": 9.211130199548211e-07, "loss": 0.2841, "step": 36290 }, { "epoch": 1.9349164467897977, "grad_norm": 1.77360846179048, "learning_rate": 9.20300019713919e-07, "loss": 0.2772, "step": 36300 }, { "epoch": 1.935449481623624, "grad_norm": 1.8131102941049029, "learning_rate": 9.194872196193462e-07, "loss": 0.2748, "step": 36310 }, { "epoch": 1.9359825164574505, "grad_norm": 1.7480401783546256, "learning_rate": 9.186746199517294e-07, "loss": 0.2697, "step": 36320 }, { "epoch": 1.9365155512912768, "grad_norm": 1.9302671000366172, "learning_rate": 9.178622209916252e-07, "loss": 0.2842, "step": 36330 }, { "epoch": 1.9370485861251032, "grad_norm": 1.7671406445149795, "learning_rate": 9.170500230195219e-07, "loss": 0.2787, "step": 36340 }, { "epoch": 1.9375816209589298, "grad_norm": 1.8658607378820755, "learning_rate": 9.162380263158366e-07, "loss": 0.2864, "step": 36350 }, { "epoch": 1.9381146557927562, "grad_norm": 1.7325176679568046, "learning_rate": 9.154262311609188e-07, "loss": 0.2871, "step": 36360 }, { "epoch": 1.9386476906265826, "grad_norm": 1.7106999116737476, "learning_rate": 9.146146378350475e-07, "loss": 0.2843, "step": 36370 }, { "epoch": 1.939180725460409, "grad_norm": 1.927384135470018, "learning_rate": 9.138032466184324e-07, "loss": 0.2744, "step": 36380 }, { "epoch": 1.9397137602942354, "grad_norm": 1.681289791941231, "learning_rate": 9.129920577912127e-07, "loss": 0.2758, "step": 36390 }, { "epoch": 1.9402467951280618, "grad_norm": 1.654442238896739, "learning_rate": 9.121810716334583e-07, "loss": 0.2783, "step": 36400 }, { "epoch": 1.9407798299618881, "grad_norm": 1.7866227727992725, "learning_rate": 9.113702884251694e-07, "loss": 0.2637, "step": 36410 }, { "epoch": 1.9413128647957145, "grad_norm": 1.8568541751206897, "learning_rate": 9.105597084462756e-07, "loss": 0.2913, "step": 36420 }, { "epoch": 1.941845899629541, "grad_norm": 1.768017774977535, "learning_rate": 9.097493319766368e-07, "loss": 0.2854, "step": 36430 }, { "epoch": 1.9423789344633673, "grad_norm": 1.6884887208650003, "learning_rate": 9.089391592960417e-07, "loss": 0.2745, "step": 36440 }, { "epoch": 1.9429119692971937, "grad_norm": 1.7164951614461499, "learning_rate": 9.081291906842094e-07, "loss": 0.2759, "step": 36450 }, { "epoch": 1.94344500413102, "grad_norm": 1.7916389355127469, "learning_rate": 9.073194264207891e-07, "loss": 0.2854, "step": 36460 }, { "epoch": 1.9439780389648464, "grad_norm": 1.6384992054908771, "learning_rate": 9.065098667853582e-07, "loss": 0.2843, "step": 36470 }, { "epoch": 1.9445110737986728, "grad_norm": 1.608897986049546, "learning_rate": 9.05700512057424e-07, "loss": 0.2783, "step": 36480 }, { "epoch": 1.9450441086324992, "grad_norm": 1.7253520937710027, "learning_rate": 9.048913625164232e-07, "loss": 0.2827, "step": 36490 }, { "epoch": 1.9455771434663256, "grad_norm": 1.7691357094672795, "learning_rate": 9.040824184417215e-07, "loss": 0.2724, "step": 36500 }, { "epoch": 1.946110178300152, "grad_norm": 1.7388039852268735, "learning_rate": 9.032736801126141e-07, "loss": 0.2601, "step": 36510 }, { "epoch": 1.9466432131339784, "grad_norm": 1.657700283062224, "learning_rate": 9.024651478083242e-07, "loss": 0.2749, "step": 36520 }, { "epoch": 1.9471762479678048, "grad_norm": 1.8108259534309625, "learning_rate": 9.016568218080045e-07, "loss": 0.2852, "step": 36530 }, { "epoch": 1.9477092828016311, "grad_norm": 1.6513703960768635, "learning_rate": 9.008487023907365e-07, "loss": 0.28, "step": 36540 }, { "epoch": 1.9482423176354575, "grad_norm": 1.7844110344044464, "learning_rate": 9.000407898355308e-07, "loss": 0.2741, "step": 36550 }, { "epoch": 1.948775352469284, "grad_norm": 1.7524276951123188, "learning_rate": 8.992330844213251e-07, "loss": 0.2784, "step": 36560 }, { "epoch": 1.9493083873031103, "grad_norm": 1.76462533452567, "learning_rate": 8.98425586426987e-07, "loss": 0.2823, "step": 36570 }, { "epoch": 1.9498414221369367, "grad_norm": 1.9774632514881714, "learning_rate": 8.976182961313121e-07, "loss": 0.2886, "step": 36580 }, { "epoch": 1.950374456970763, "grad_norm": 1.5485616779649323, "learning_rate": 8.96811213813024e-07, "loss": 0.2733, "step": 36590 }, { "epoch": 1.9509074918045894, "grad_norm": 1.5921995713690051, "learning_rate": 8.960043397507753e-07, "loss": 0.2919, "step": 36600 }, { "epoch": 1.9514405266384158, "grad_norm": 1.851217827265196, "learning_rate": 8.951976742231459e-07, "loss": 0.2842, "step": 36610 }, { "epoch": 1.9519735614722422, "grad_norm": 1.9444409314252824, "learning_rate": 8.943912175086433e-07, "loss": 0.2862, "step": 36620 }, { "epoch": 1.9525065963060686, "grad_norm": 2.004441433151391, "learning_rate": 8.935849698857035e-07, "loss": 0.2788, "step": 36630 }, { "epoch": 1.953039631139895, "grad_norm": 1.8091019234763108, "learning_rate": 8.927789316326909e-07, "loss": 0.2766, "step": 36640 }, { "epoch": 1.9535726659737214, "grad_norm": 1.9518692871103265, "learning_rate": 8.919731030278968e-07, "loss": 0.2882, "step": 36650 }, { "epoch": 1.9541057008075478, "grad_norm": 1.7699123288702627, "learning_rate": 8.911674843495407e-07, "loss": 0.2786, "step": 36660 }, { "epoch": 1.9546387356413741, "grad_norm": 1.8185358547971173, "learning_rate": 8.903620758757686e-07, "loss": 0.2776, "step": 36670 }, { "epoch": 1.9551717704752005, "grad_norm": 2.0750874455067447, "learning_rate": 8.89556877884655e-07, "loss": 0.283, "step": 36680 }, { "epoch": 1.955704805309027, "grad_norm": 1.823021754208274, "learning_rate": 8.887518906542013e-07, "loss": 0.2871, "step": 36690 }, { "epoch": 1.9562378401428533, "grad_norm": 1.6258087870478337, "learning_rate": 8.879471144623363e-07, "loss": 0.2835, "step": 36700 }, { "epoch": 1.9567708749766797, "grad_norm": 1.625523077347558, "learning_rate": 8.871425495869156e-07, "loss": 0.2947, "step": 36710 }, { "epoch": 1.957303909810506, "grad_norm": 1.6825054503091643, "learning_rate": 8.86338196305722e-07, "loss": 0.2818, "step": 36720 }, { "epoch": 1.9578369446443324, "grad_norm": 1.7363485089066115, "learning_rate": 8.855340548964654e-07, "loss": 0.2868, "step": 36730 }, { "epoch": 1.9583699794781588, "grad_norm": 1.5282812420374434, "learning_rate": 8.847301256367827e-07, "loss": 0.2876, "step": 36740 }, { "epoch": 1.9589030143119852, "grad_norm": 1.8513297522198706, "learning_rate": 8.839264088042367e-07, "loss": 0.2868, "step": 36750 }, { "epoch": 1.9594360491458116, "grad_norm": 1.671874064013073, "learning_rate": 8.831229046763176e-07, "loss": 0.2885, "step": 36760 }, { "epoch": 1.959969083979638, "grad_norm": 1.8192626633607425, "learning_rate": 8.823196135304422e-07, "loss": 0.2779, "step": 36770 }, { "epoch": 1.9605021188134644, "grad_norm": 1.8149186109508246, "learning_rate": 8.815165356439539e-07, "loss": 0.2795, "step": 36780 }, { "epoch": 1.9610351536472908, "grad_norm": 1.7354926258018355, "learning_rate": 8.807136712941215e-07, "loss": 0.2784, "step": 36790 }, { "epoch": 1.9615681884811171, "grad_norm": 2.0301835861118507, "learning_rate": 8.799110207581411e-07, "loss": 0.2801, "step": 36800 }, { "epoch": 1.9621012233149435, "grad_norm": 1.8347235547990526, "learning_rate": 8.791085843131343e-07, "loss": 0.2763, "step": 36810 }, { "epoch": 1.96263425814877, "grad_norm": 1.655869620574944, "learning_rate": 8.783063622361493e-07, "loss": 0.2848, "step": 36820 }, { "epoch": 1.9631672929825963, "grad_norm": 1.7364512551448577, "learning_rate": 8.775043548041602e-07, "loss": 0.2734, "step": 36830 }, { "epoch": 1.9637003278164227, "grad_norm": 1.673780555752466, "learning_rate": 8.767025622940666e-07, "loss": 0.2897, "step": 36840 }, { "epoch": 1.964233362650249, "grad_norm": 1.9027837111580532, "learning_rate": 8.759009849826942e-07, "loss": 0.2823, "step": 36850 }, { "epoch": 1.9647663974840754, "grad_norm": 1.89275733224278, "learning_rate": 8.750996231467949e-07, "loss": 0.2908, "step": 36860 }, { "epoch": 1.9652994323179018, "grad_norm": 1.833761362428127, "learning_rate": 8.742984770630446e-07, "loss": 0.2816, "step": 36870 }, { "epoch": 1.9658324671517282, "grad_norm": 1.7172146271872315, "learning_rate": 8.734975470080465e-07, "loss": 0.2774, "step": 36880 }, { "epoch": 1.9663655019855546, "grad_norm": 1.5862669354506262, "learning_rate": 8.726968332583283e-07, "loss": 0.2832, "step": 36890 }, { "epoch": 1.966898536819381, "grad_norm": 1.6243517938754737, "learning_rate": 8.718963360903433e-07, "loss": 0.2818, "step": 36900 }, { "epoch": 1.9674315716532076, "grad_norm": 1.9561150905738685, "learning_rate": 8.710960557804697e-07, "loss": 0.2735, "step": 36910 }, { "epoch": 1.967964606487034, "grad_norm": 1.6272471835469826, "learning_rate": 8.702959926050112e-07, "loss": 0.2849, "step": 36920 }, { "epoch": 1.9684976413208604, "grad_norm": 1.6198491840703404, "learning_rate": 8.694961468401973e-07, "loss": 0.2793, "step": 36930 }, { "epoch": 1.9690306761546867, "grad_norm": 1.6823854341099054, "learning_rate": 8.6869651876218e-07, "loss": 0.2738, "step": 36940 }, { "epoch": 1.9695637109885131, "grad_norm": 1.9735162096961552, "learning_rate": 8.678971086470382e-07, "loss": 0.2855, "step": 36950 }, { "epoch": 1.9700967458223395, "grad_norm": 1.7759434252078614, "learning_rate": 8.67097916770775e-07, "loss": 0.2725, "step": 36960 }, { "epoch": 1.970629780656166, "grad_norm": 2.036361773071089, "learning_rate": 8.662989434093186e-07, "loss": 0.282, "step": 36970 }, { "epoch": 1.9711628154899923, "grad_norm": 2.280083996627828, "learning_rate": 8.655001888385208e-07, "loss": 0.2796, "step": 36980 }, { "epoch": 1.9716958503238187, "grad_norm": 1.8414921076702122, "learning_rate": 8.647016533341586e-07, "loss": 0.2798, "step": 36990 }, { "epoch": 1.972228885157645, "grad_norm": 1.995236770540033, "learning_rate": 8.639033371719332e-07, "loss": 0.2878, "step": 37000 }, { "epoch": 1.9727619199914714, "grad_norm": 1.7373901738460922, "learning_rate": 8.631052406274703e-07, "loss": 0.2754, "step": 37010 }, { "epoch": 1.9732949548252978, "grad_norm": 1.7654714181223115, "learning_rate": 8.623073639763194e-07, "loss": 0.2846, "step": 37020 }, { "epoch": 1.9738279896591242, "grad_norm": 1.97590645281513, "learning_rate": 8.615097074939534e-07, "loss": 0.2772, "step": 37030 }, { "epoch": 1.9743610244929506, "grad_norm": 1.7405176687167827, "learning_rate": 8.607122714557703e-07, "loss": 0.2753, "step": 37040 }, { "epoch": 1.974894059326777, "grad_norm": 1.8325113812080918, "learning_rate": 8.599150561370922e-07, "loss": 0.2819, "step": 37050 }, { "epoch": 1.9754270941606034, "grad_norm": 1.8010149742739963, "learning_rate": 8.59118061813164e-07, "loss": 0.2865, "step": 37060 }, { "epoch": 1.9759601289944297, "grad_norm": 1.8316197266917615, "learning_rate": 8.583212887591546e-07, "loss": 0.2732, "step": 37070 }, { "epoch": 1.9764931638282561, "grad_norm": 1.7149023060261717, "learning_rate": 8.575247372501571e-07, "loss": 0.285, "step": 37080 }, { "epoch": 1.9770261986620825, "grad_norm": 1.692541911361734, "learning_rate": 8.567284075611875e-07, "loss": 0.2724, "step": 37090 }, { "epoch": 1.9775592334959091, "grad_norm": 1.776816830406351, "learning_rate": 8.559322999671857e-07, "loss": 0.2783, "step": 37100 }, { "epoch": 1.9780922683297355, "grad_norm": 1.783780368753211, "learning_rate": 8.551364147430137e-07, "loss": 0.2901, "step": 37110 }, { "epoch": 1.9786253031635619, "grad_norm": 1.8756166103429739, "learning_rate": 8.543407521634581e-07, "loss": 0.2716, "step": 37120 }, { "epoch": 1.9791583379973883, "grad_norm": 1.5514200373776226, "learning_rate": 8.535453125032282e-07, "loss": 0.2783, "step": 37130 }, { "epoch": 1.9796913728312147, "grad_norm": 1.7423006707033284, "learning_rate": 8.527500960369563e-07, "loss": 0.2791, "step": 37140 }, { "epoch": 1.980224407665041, "grad_norm": 1.602861294093616, "learning_rate": 8.519551030391974e-07, "loss": 0.2744, "step": 37150 }, { "epoch": 1.9807574424988674, "grad_norm": 1.8920195961306705, "learning_rate": 8.511603337844299e-07, "loss": 0.2756, "step": 37160 }, { "epoch": 1.9812904773326938, "grad_norm": 1.5996132291653768, "learning_rate": 8.503657885470544e-07, "loss": 0.287, "step": 37170 }, { "epoch": 1.9818235121665202, "grad_norm": 1.7369523858256208, "learning_rate": 8.495714676013944e-07, "loss": 0.2782, "step": 37180 }, { "epoch": 1.9823565470003466, "grad_norm": 1.6885591506893616, "learning_rate": 8.487773712216966e-07, "loss": 0.2846, "step": 37190 }, { "epoch": 1.982889581834173, "grad_norm": 2.0194738365073075, "learning_rate": 8.479834996821282e-07, "loss": 0.2756, "step": 37200 }, { "epoch": 1.9834226166679993, "grad_norm": 1.8315859366223484, "learning_rate": 8.471898532567808e-07, "loss": 0.2729, "step": 37210 }, { "epoch": 1.9839556515018257, "grad_norm": 1.807615548568915, "learning_rate": 8.463964322196675e-07, "loss": 0.2797, "step": 37220 }, { "epoch": 1.9844886863356521, "grad_norm": 1.7267358480592316, "learning_rate": 8.456032368447237e-07, "loss": 0.2865, "step": 37230 }, { "epoch": 1.9850217211694785, "grad_norm": 1.5601398187036628, "learning_rate": 8.448102674058066e-07, "loss": 0.2688, "step": 37240 }, { "epoch": 1.9855547560033049, "grad_norm": 1.7277658703288845, "learning_rate": 8.44017524176696e-07, "loss": 0.2801, "step": 37250 }, { "epoch": 1.9860877908371313, "grad_norm": 1.533304161271189, "learning_rate": 8.432250074310929e-07, "loss": 0.2774, "step": 37260 }, { "epoch": 1.9866208256709577, "grad_norm": 1.6985046797786474, "learning_rate": 8.42432717442621e-07, "loss": 0.2837, "step": 37270 }, { "epoch": 1.987153860504784, "grad_norm": 2.0512378175590915, "learning_rate": 8.41640654484824e-07, "loss": 0.2854, "step": 37280 }, { "epoch": 1.9876868953386104, "grad_norm": 1.8761857339871766, "learning_rate": 8.408488188311693e-07, "loss": 0.2808, "step": 37290 }, { "epoch": 1.9882199301724368, "grad_norm": 1.7663108450591758, "learning_rate": 8.400572107550441e-07, "loss": 0.286, "step": 37300 }, { "epoch": 1.9887529650062632, "grad_norm": 1.6449848420513955, "learning_rate": 8.392658305297585e-07, "loss": 0.2827, "step": 37310 }, { "epoch": 1.9892859998400896, "grad_norm": 1.9266757012563016, "learning_rate": 8.384746784285431e-07, "loss": 0.2798, "step": 37320 }, { "epoch": 1.989819034673916, "grad_norm": 1.833410485729512, "learning_rate": 8.376837547245496e-07, "loss": 0.2931, "step": 37330 }, { "epoch": 1.9903520695077423, "grad_norm": 1.8753429970104938, "learning_rate": 8.368930596908514e-07, "loss": 0.2822, "step": 37340 }, { "epoch": 1.9908851043415687, "grad_norm": 1.7995855127131901, "learning_rate": 8.361025936004429e-07, "loss": 0.2824, "step": 37350 }, { "epoch": 1.9914181391753951, "grad_norm": 1.773127869758638, "learning_rate": 8.353123567262385e-07, "loss": 0.2715, "step": 37360 }, { "epoch": 1.9919511740092215, "grad_norm": 1.7889893350347559, "learning_rate": 8.345223493410742e-07, "loss": 0.276, "step": 37370 }, { "epoch": 1.9924842088430479, "grad_norm": 1.9878695322428117, "learning_rate": 8.337325717177072e-07, "loss": 0.2824, "step": 37380 }, { "epoch": 1.9930172436768743, "grad_norm": 1.6672478185804749, "learning_rate": 8.329430241288149e-07, "loss": 0.2859, "step": 37390 }, { "epoch": 1.9935502785107007, "grad_norm": 1.7051316752772439, "learning_rate": 8.32153706846995e-07, "loss": 0.2723, "step": 37400 }, { "epoch": 1.994083313344527, "grad_norm": 1.703657986558443, "learning_rate": 8.313646201447661e-07, "loss": 0.2807, "step": 37410 }, { "epoch": 1.9946163481783534, "grad_norm": 1.756571639964288, "learning_rate": 8.305757642945673e-07, "loss": 0.2897, "step": 37420 }, { "epoch": 1.9951493830121798, "grad_norm": 1.597870923328568, "learning_rate": 8.297871395687573e-07, "loss": 0.2757, "step": 37430 }, { "epoch": 1.9956824178460062, "grad_norm": 1.6485891201137066, "learning_rate": 8.289987462396164e-07, "loss": 0.2712, "step": 37440 }, { "epoch": 1.9962154526798326, "grad_norm": 1.8649702377234794, "learning_rate": 8.28210584579343e-07, "loss": 0.277, "step": 37450 }, { "epoch": 1.996748487513659, "grad_norm": 1.7786809382199258, "learning_rate": 8.27422654860057e-07, "loss": 0.277, "step": 37460 }, { "epoch": 1.9972815223474853, "grad_norm": 1.5475122501493337, "learning_rate": 8.266349573537975e-07, "loss": 0.278, "step": 37470 }, { "epoch": 1.9978145571813117, "grad_norm": 1.7395069185334262, "learning_rate": 8.258474923325245e-07, "loss": 0.2761, "step": 37480 }, { "epoch": 1.9983475920151381, "grad_norm": 1.8495904680787085, "learning_rate": 8.250602600681162e-07, "loss": 0.2897, "step": 37490 }, { "epoch": 1.9988806268489645, "grad_norm": 1.8715093356523964, "learning_rate": 8.242732608323717e-07, "loss": 0.2706, "step": 37500 }, { "epoch": 1.9994136616827909, "grad_norm": 1.730719376962117, "learning_rate": 8.23486494897009e-07, "loss": 0.2706, "step": 37510 }, { "epoch": 1.9999466965166173, "grad_norm": 1.748461421570759, "learning_rate": 8.226999625336663e-07, "loss": 0.2828, "step": 37520 }, { "epoch": 2.0004797313504437, "grad_norm": 1.5736294248881135, "learning_rate": 8.219136640138994e-07, "loss": 0.2298, "step": 37530 }, { "epoch": 2.00101276618427, "grad_norm": 1.7738230858131003, "learning_rate": 8.21127599609185e-07, "loss": 0.2413, "step": 37540 }, { "epoch": 2.0015458010180964, "grad_norm": 1.6640783817606957, "learning_rate": 8.203417695909185e-07, "loss": 0.2321, "step": 37550 }, { "epoch": 2.002078835851923, "grad_norm": 1.8111774501017874, "learning_rate": 8.195561742304147e-07, "loss": 0.2433, "step": 37560 }, { "epoch": 2.002611870685749, "grad_norm": 1.7014912795958672, "learning_rate": 8.187708137989066e-07, "loss": 0.2274, "step": 37570 }, { "epoch": 2.0031449055195756, "grad_norm": 1.6795478384000704, "learning_rate": 8.179856885675467e-07, "loss": 0.2395, "step": 37580 }, { "epoch": 2.003677940353402, "grad_norm": 1.6398399366440055, "learning_rate": 8.172007988074063e-07, "loss": 0.2311, "step": 37590 }, { "epoch": 2.0042109751872283, "grad_norm": 1.9108655627967637, "learning_rate": 8.164161447894752e-07, "loss": 0.2383, "step": 37600 }, { "epoch": 2.0047440100210547, "grad_norm": 1.959416804192182, "learning_rate": 8.156317267846623e-07, "loss": 0.2423, "step": 37610 }, { "epoch": 2.005277044854881, "grad_norm": 1.8418762869642187, "learning_rate": 8.148475450637933e-07, "loss": 0.2323, "step": 37620 }, { "epoch": 2.0058100796887075, "grad_norm": 1.7920108726808361, "learning_rate": 8.140635998976147e-07, "loss": 0.2413, "step": 37630 }, { "epoch": 2.006343114522534, "grad_norm": 1.871491738048841, "learning_rate": 8.132798915567895e-07, "loss": 0.2464, "step": 37640 }, { "epoch": 2.0068761493563603, "grad_norm": 1.8370651097602901, "learning_rate": 8.124964203119001e-07, "loss": 0.2371, "step": 37650 }, { "epoch": 2.0074091841901867, "grad_norm": 1.8895932681415564, "learning_rate": 8.117131864334464e-07, "loss": 0.2308, "step": 37660 }, { "epoch": 2.007942219024013, "grad_norm": 1.7556453087755124, "learning_rate": 8.109301901918466e-07, "loss": 0.2408, "step": 37670 }, { "epoch": 2.0084752538578394, "grad_norm": 1.849030597001621, "learning_rate": 8.101474318574368e-07, "loss": 0.2296, "step": 37680 }, { "epoch": 2.009008288691666, "grad_norm": 1.7692942312307667, "learning_rate": 8.093649117004716e-07, "loss": 0.2293, "step": 37690 }, { "epoch": 2.009541323525492, "grad_norm": 1.6097243173013291, "learning_rate": 8.085826299911214e-07, "loss": 0.2321, "step": 37700 }, { "epoch": 2.0100743583593186, "grad_norm": 1.9413411037236705, "learning_rate": 8.07800586999476e-07, "loss": 0.2357, "step": 37710 }, { "epoch": 2.010607393193145, "grad_norm": 1.8425236300592505, "learning_rate": 8.07018782995543e-07, "loss": 0.2438, "step": 37720 }, { "epoch": 2.0111404280269713, "grad_norm": 1.8096509442824977, "learning_rate": 8.062372182492461e-07, "loss": 0.2369, "step": 37730 }, { "epoch": 2.0116734628607977, "grad_norm": 1.805073782553207, "learning_rate": 8.054558930304279e-07, "loss": 0.2401, "step": 37740 }, { "epoch": 2.0122064976946246, "grad_norm": 2.0141296583955945, "learning_rate": 8.046748076088468e-07, "loss": 0.2336, "step": 37750 }, { "epoch": 2.012739532528451, "grad_norm": 1.7393311856686322, "learning_rate": 8.038939622541796e-07, "loss": 0.2365, "step": 37760 }, { "epoch": 2.0132725673622773, "grad_norm": 1.9116227455668509, "learning_rate": 8.031133572360197e-07, "loss": 0.2323, "step": 37770 }, { "epoch": 2.0138056021961037, "grad_norm": 1.8061397289574974, "learning_rate": 8.023329928238782e-07, "loss": 0.2311, "step": 37780 }, { "epoch": 2.01433863702993, "grad_norm": 1.7869109509532108, "learning_rate": 8.015528692871815e-07, "loss": 0.2371, "step": 37790 }, { "epoch": 2.0148716718637565, "grad_norm": 1.804094302500777, "learning_rate": 8.00772986895274e-07, "loss": 0.2333, "step": 37800 }, { "epoch": 2.015404706697583, "grad_norm": 1.9678191665846236, "learning_rate": 7.999933459174171e-07, "loss": 0.243, "step": 37810 }, { "epoch": 2.0159377415314093, "grad_norm": 1.6924137848581187, "learning_rate": 7.992139466227883e-07, "loss": 0.2313, "step": 37820 }, { "epoch": 2.0164707763652356, "grad_norm": 1.7948700307484415, "learning_rate": 7.984347892804819e-07, "loss": 0.2372, "step": 37830 }, { "epoch": 2.017003811199062, "grad_norm": 1.833744535386794, "learning_rate": 7.976558741595084e-07, "loss": 0.2432, "step": 37840 }, { "epoch": 2.0175368460328884, "grad_norm": 1.690808793338307, "learning_rate": 7.96877201528795e-07, "loss": 0.2442, "step": 37850 }, { "epoch": 2.018069880866715, "grad_norm": 1.8516431262820199, "learning_rate": 7.960987716571856e-07, "loss": 0.2357, "step": 37860 }, { "epoch": 2.018602915700541, "grad_norm": 1.6257392258732206, "learning_rate": 7.953205848134385e-07, "loss": 0.2396, "step": 37870 }, { "epoch": 2.0191359505343676, "grad_norm": 1.8577470034892662, "learning_rate": 7.945426412662299e-07, "loss": 0.2312, "step": 37880 }, { "epoch": 2.019668985368194, "grad_norm": 1.6841858566426315, "learning_rate": 7.937649412841514e-07, "loss": 0.2297, "step": 37890 }, { "epoch": 2.0202020202020203, "grad_norm": 1.903325187428609, "learning_rate": 7.929874851357104e-07, "loss": 0.2291, "step": 37900 }, { "epoch": 2.0207350550358467, "grad_norm": 1.8887622305980312, "learning_rate": 7.922102730893305e-07, "loss": 0.2387, "step": 37910 }, { "epoch": 2.021268089869673, "grad_norm": 1.77595963306003, "learning_rate": 7.914333054133503e-07, "loss": 0.2374, "step": 37920 }, { "epoch": 2.0218011247034995, "grad_norm": 1.97168107403706, "learning_rate": 7.906565823760251e-07, "loss": 0.239, "step": 37930 }, { "epoch": 2.022334159537326, "grad_norm": 1.701640934208281, "learning_rate": 7.898801042455243e-07, "loss": 0.2309, "step": 37940 }, { "epoch": 2.0228671943711523, "grad_norm": 1.706134416451444, "learning_rate": 7.89103871289935e-07, "loss": 0.2336, "step": 37950 }, { "epoch": 2.0234002292049786, "grad_norm": 1.7698411122501323, "learning_rate": 7.883278837772565e-07, "loss": 0.2319, "step": 37960 }, { "epoch": 2.023933264038805, "grad_norm": 1.736574804524401, "learning_rate": 7.875521419754062e-07, "loss": 0.2309, "step": 37970 }, { "epoch": 2.0244662988726314, "grad_norm": 1.7170654920574158, "learning_rate": 7.867766461522147e-07, "loss": 0.2372, "step": 37980 }, { "epoch": 2.024999333706458, "grad_norm": 2.1667226980332845, "learning_rate": 7.860013965754288e-07, "loss": 0.2389, "step": 37990 }, { "epoch": 2.025532368540284, "grad_norm": 1.8467594469420534, "learning_rate": 7.852263935127101e-07, "loss": 0.2337, "step": 38000 }, { "epoch": 2.0260654033741106, "grad_norm": 1.8233912648698773, "learning_rate": 7.84451637231635e-07, "loss": 0.2376, "step": 38010 }, { "epoch": 2.026598438207937, "grad_norm": 1.7944039914015908, "learning_rate": 7.836771279996946e-07, "loss": 0.2251, "step": 38020 }, { "epoch": 2.0271314730417633, "grad_norm": 1.6819159320036245, "learning_rate": 7.829028660842948e-07, "loss": 0.2331, "step": 38030 }, { "epoch": 2.0276645078755897, "grad_norm": 1.845999510494668, "learning_rate": 7.821288517527561e-07, "loss": 0.2324, "step": 38040 }, { "epoch": 2.028197542709416, "grad_norm": 1.7144658488912408, "learning_rate": 7.813550852723142e-07, "loss": 0.2337, "step": 38050 }, { "epoch": 2.0287305775432425, "grad_norm": 1.9575816872204819, "learning_rate": 7.80581566910117e-07, "loss": 0.2325, "step": 38060 }, { "epoch": 2.029263612377069, "grad_norm": 1.8772335533313278, "learning_rate": 7.798082969332292e-07, "loss": 0.2305, "step": 38070 }, { "epoch": 2.0297966472108953, "grad_norm": 1.8970683408147426, "learning_rate": 7.790352756086287e-07, "loss": 0.2321, "step": 38080 }, { "epoch": 2.0303296820447216, "grad_norm": 1.7921100108658055, "learning_rate": 7.782625032032077e-07, "loss": 0.2437, "step": 38090 }, { "epoch": 2.030862716878548, "grad_norm": 1.8013459628885917, "learning_rate": 7.774899799837724e-07, "loss": 0.2423, "step": 38100 }, { "epoch": 2.0313957517123744, "grad_norm": 1.791031280264957, "learning_rate": 7.76717706217043e-07, "loss": 0.2315, "step": 38110 }, { "epoch": 2.031928786546201, "grad_norm": 1.7816306638218051, "learning_rate": 7.759456821696536e-07, "loss": 0.2337, "step": 38120 }, { "epoch": 2.032461821380027, "grad_norm": 1.7259380841922725, "learning_rate": 7.751739081081518e-07, "loss": 0.2492, "step": 38130 }, { "epoch": 2.0329948562138536, "grad_norm": 1.6647546932772428, "learning_rate": 7.744023842989999e-07, "loss": 0.2402, "step": 38140 }, { "epoch": 2.03352789104768, "grad_norm": 1.964455360587893, "learning_rate": 7.736311110085721e-07, "loss": 0.2276, "step": 38150 }, { "epoch": 2.0340609258815063, "grad_norm": 1.9405695317132412, "learning_rate": 7.728600885031571e-07, "loss": 0.2455, "step": 38160 }, { "epoch": 2.0345939607153327, "grad_norm": 1.7219815442406532, "learning_rate": 7.720893170489574e-07, "loss": 0.2346, "step": 38170 }, { "epoch": 2.035126995549159, "grad_norm": 1.9284142510366096, "learning_rate": 7.713187969120882e-07, "loss": 0.2369, "step": 38180 }, { "epoch": 2.0356600303829855, "grad_norm": 2.131341757885997, "learning_rate": 7.70548528358578e-07, "loss": 0.236, "step": 38190 }, { "epoch": 2.036193065216812, "grad_norm": 1.9120895052394005, "learning_rate": 7.69778511654369e-07, "loss": 0.2447, "step": 38200 }, { "epoch": 2.0367261000506383, "grad_norm": 1.7948223586326344, "learning_rate": 7.690087470653153e-07, "loss": 0.2447, "step": 38210 }, { "epoch": 2.0372591348844646, "grad_norm": 1.8312476841409095, "learning_rate": 7.682392348571856e-07, "loss": 0.2356, "step": 38220 }, { "epoch": 2.037792169718291, "grad_norm": 1.9070237984493101, "learning_rate": 7.674699752956595e-07, "loss": 0.2385, "step": 38230 }, { "epoch": 2.0383252045521174, "grad_norm": 1.7664194188598732, "learning_rate": 7.667009686463306e-07, "loss": 0.2325, "step": 38240 }, { "epoch": 2.038858239385944, "grad_norm": 1.8501820426134736, "learning_rate": 7.659322151747054e-07, "loss": 0.2242, "step": 38250 }, { "epoch": 2.03939127421977, "grad_norm": 1.795331927286408, "learning_rate": 7.651637151462021e-07, "loss": 0.2461, "step": 38260 }, { "epoch": 2.0399243090535966, "grad_norm": 1.7959332250295559, "learning_rate": 7.643954688261523e-07, "loss": 0.2314, "step": 38270 }, { "epoch": 2.040457343887423, "grad_norm": 1.86266814866272, "learning_rate": 7.636274764797991e-07, "loss": 0.2469, "step": 38280 }, { "epoch": 2.0409903787212493, "grad_norm": 1.6733629886556638, "learning_rate": 7.628597383722985e-07, "loss": 0.2312, "step": 38290 }, { "epoch": 2.0415234135550757, "grad_norm": 1.7225322918660038, "learning_rate": 7.620922547687194e-07, "loss": 0.2435, "step": 38300 }, { "epoch": 2.042056448388902, "grad_norm": 1.8322505245736525, "learning_rate": 7.613250259340405e-07, "loss": 0.2309, "step": 38310 }, { "epoch": 2.0425894832227285, "grad_norm": 1.8050068586235133, "learning_rate": 7.605580521331549e-07, "loss": 0.2404, "step": 38320 }, { "epoch": 2.043122518056555, "grad_norm": 1.6833845783763375, "learning_rate": 7.597913336308668e-07, "loss": 0.2381, "step": 38330 }, { "epoch": 2.0436555528903813, "grad_norm": 1.8728951511052994, "learning_rate": 7.590248706918922e-07, "loss": 0.2353, "step": 38340 }, { "epoch": 2.0441885877242076, "grad_norm": 1.9330922143102474, "learning_rate": 7.582586635808589e-07, "loss": 0.241, "step": 38350 }, { "epoch": 2.044721622558034, "grad_norm": 1.811551623639987, "learning_rate": 7.574927125623062e-07, "loss": 0.2428, "step": 38360 }, { "epoch": 2.0452546573918604, "grad_norm": 1.779744683025959, "learning_rate": 7.567270179006854e-07, "loss": 0.239, "step": 38370 }, { "epoch": 2.045787692225687, "grad_norm": 1.7592124202232444, "learning_rate": 7.559615798603593e-07, "loss": 0.2287, "step": 38380 }, { "epoch": 2.046320727059513, "grad_norm": 1.8592685928692703, "learning_rate": 7.551963987056021e-07, "loss": 0.238, "step": 38390 }, { "epoch": 2.0468537618933396, "grad_norm": 1.8665641630841088, "learning_rate": 7.544314747005981e-07, "loss": 0.235, "step": 38400 }, { "epoch": 2.047386796727166, "grad_norm": 1.7878447557567456, "learning_rate": 7.536668081094439e-07, "loss": 0.24, "step": 38410 }, { "epoch": 2.0479198315609923, "grad_norm": 2.003101277001904, "learning_rate": 7.529023991961478e-07, "loss": 0.2305, "step": 38420 }, { "epoch": 2.0484528663948187, "grad_norm": 1.6137277206347598, "learning_rate": 7.521382482246283e-07, "loss": 0.2416, "step": 38430 }, { "epoch": 2.048985901228645, "grad_norm": 1.78262850336962, "learning_rate": 7.513743554587147e-07, "loss": 0.2343, "step": 38440 }, { "epoch": 2.0495189360624715, "grad_norm": 1.9049626532469903, "learning_rate": 7.506107211621475e-07, "loss": 0.2339, "step": 38450 }, { "epoch": 2.050051970896298, "grad_norm": 1.9196749422535442, "learning_rate": 7.498473455985782e-07, "loss": 0.2348, "step": 38460 }, { "epoch": 2.0505850057301243, "grad_norm": 1.7903853520777744, "learning_rate": 7.490842290315689e-07, "loss": 0.2376, "step": 38470 }, { "epoch": 2.0511180405639506, "grad_norm": 1.965974895304583, "learning_rate": 7.483213717245913e-07, "loss": 0.2361, "step": 38480 }, { "epoch": 2.0516510753977775, "grad_norm": 1.867664765365325, "learning_rate": 7.475587739410287e-07, "loss": 0.2423, "step": 38490 }, { "epoch": 2.052184110231604, "grad_norm": 1.8511950794225667, "learning_rate": 7.467964359441743e-07, "loss": 0.2458, "step": 38500 }, { "epoch": 2.0527171450654302, "grad_norm": 1.6968488847088037, "learning_rate": 7.460343579972319e-07, "loss": 0.2438, "step": 38510 }, { "epoch": 2.0532501798992566, "grad_norm": 1.8525495613524092, "learning_rate": 7.452725403633155e-07, "loss": 0.2352, "step": 38520 }, { "epoch": 2.053783214733083, "grad_norm": 2.050268247072766, "learning_rate": 7.445109833054489e-07, "loss": 0.2387, "step": 38530 }, { "epoch": 2.0543162495669094, "grad_norm": 1.5822781906709664, "learning_rate": 7.437496870865662e-07, "loss": 0.2407, "step": 38540 }, { "epoch": 2.0548492844007358, "grad_norm": 1.9408502637350467, "learning_rate": 7.429886519695113e-07, "loss": 0.2339, "step": 38550 }, { "epoch": 2.055382319234562, "grad_norm": 1.7503408901008903, "learning_rate": 7.422278782170389e-07, "loss": 0.2379, "step": 38560 }, { "epoch": 2.0559153540683885, "grad_norm": 1.871415401413392, "learning_rate": 7.414673660918111e-07, "loss": 0.2338, "step": 38570 }, { "epoch": 2.056448388902215, "grad_norm": 1.9291874266424698, "learning_rate": 7.407071158564017e-07, "loss": 0.2366, "step": 38580 }, { "epoch": 2.0569814237360413, "grad_norm": 1.7114149170732393, "learning_rate": 7.399471277732936e-07, "loss": 0.2391, "step": 38590 }, { "epoch": 2.0575144585698677, "grad_norm": 1.9876124650258065, "learning_rate": 7.391874021048793e-07, "loss": 0.2285, "step": 38600 }, { "epoch": 2.058047493403694, "grad_norm": 1.990972677377257, "learning_rate": 7.384279391134604e-07, "loss": 0.2454, "step": 38610 }, { "epoch": 2.0585805282375205, "grad_norm": 1.7928884933095712, "learning_rate": 7.376687390612482e-07, "loss": 0.2303, "step": 38620 }, { "epoch": 2.059113563071347, "grad_norm": 1.8264531507931925, "learning_rate": 7.369098022103624e-07, "loss": 0.2283, "step": 38630 }, { "epoch": 2.0596465979051732, "grad_norm": 1.7198716199600403, "learning_rate": 7.361511288228332e-07, "loss": 0.2407, "step": 38640 }, { "epoch": 2.0601796327389996, "grad_norm": 1.696736359376205, "learning_rate": 7.353927191605979e-07, "loss": 0.2253, "step": 38650 }, { "epoch": 2.060712667572826, "grad_norm": 1.8064393807227752, "learning_rate": 7.346345734855048e-07, "loss": 0.2336, "step": 38660 }, { "epoch": 2.0612457024066524, "grad_norm": 1.8270088564409517, "learning_rate": 7.338766920593093e-07, "loss": 0.2327, "step": 38670 }, { "epoch": 2.0617787372404788, "grad_norm": 1.710927842359247, "learning_rate": 7.331190751436772e-07, "loss": 0.2321, "step": 38680 }, { "epoch": 2.062311772074305, "grad_norm": 2.0278901653609522, "learning_rate": 7.323617230001815e-07, "loss": 0.2318, "step": 38690 }, { "epoch": 2.0628448069081315, "grad_norm": 1.7364311814129412, "learning_rate": 7.31604635890305e-07, "loss": 0.2412, "step": 38700 }, { "epoch": 2.063377841741958, "grad_norm": 1.8373109926965532, "learning_rate": 7.308478140754381e-07, "loss": 0.2343, "step": 38710 }, { "epoch": 2.0639108765757843, "grad_norm": 1.8208564095524293, "learning_rate": 7.300912578168801e-07, "loss": 0.2329, "step": 38720 }, { "epoch": 2.0644439114096107, "grad_norm": 1.8890569820637582, "learning_rate": 7.293349673758388e-07, "loss": 0.2384, "step": 38730 }, { "epoch": 2.064976946243437, "grad_norm": 1.6832413770974157, "learning_rate": 7.285789430134291e-07, "loss": 0.2311, "step": 38740 }, { "epoch": 2.0655099810772635, "grad_norm": 1.8752553035569006, "learning_rate": 7.278231849906751e-07, "loss": 0.2371, "step": 38750 }, { "epoch": 2.06604301591109, "grad_norm": 1.9815479545309946, "learning_rate": 7.270676935685087e-07, "loss": 0.2347, "step": 38760 }, { "epoch": 2.0665760507449162, "grad_norm": 1.6786789066714622, "learning_rate": 7.263124690077698e-07, "loss": 0.2331, "step": 38770 }, { "epoch": 2.0671090855787426, "grad_norm": 1.982227124213622, "learning_rate": 7.25557511569206e-07, "loss": 0.2314, "step": 38780 }, { "epoch": 2.067642120412569, "grad_norm": 1.6374566453130897, "learning_rate": 7.24802821513473e-07, "loss": 0.2331, "step": 38790 }, { "epoch": 2.0681751552463954, "grad_norm": 1.7015005455337817, "learning_rate": 7.240483991011336e-07, "loss": 0.239, "step": 38800 }, { "epoch": 2.0687081900802218, "grad_norm": 1.976667169465564, "learning_rate": 7.232942445926591e-07, "loss": 0.2346, "step": 38810 }, { "epoch": 2.069241224914048, "grad_norm": 1.874765663110959, "learning_rate": 7.225403582484269e-07, "loss": 0.2387, "step": 38820 }, { "epoch": 2.0697742597478745, "grad_norm": 1.898323939078004, "learning_rate": 7.21786740328723e-07, "loss": 0.2407, "step": 38830 }, { "epoch": 2.070307294581701, "grad_norm": 1.9122054892991258, "learning_rate": 7.210333910937404e-07, "loss": 0.2412, "step": 38840 }, { "epoch": 2.0708403294155273, "grad_norm": 1.6813385214142111, "learning_rate": 7.202803108035792e-07, "loss": 0.2412, "step": 38850 }, { "epoch": 2.0713733642493537, "grad_norm": 1.7089315268905128, "learning_rate": 7.195274997182471e-07, "loss": 0.2339, "step": 38860 }, { "epoch": 2.07190639908318, "grad_norm": 1.8119356272440994, "learning_rate": 7.187749580976582e-07, "loss": 0.2397, "step": 38870 }, { "epoch": 2.0724394339170065, "grad_norm": 1.756445736679096, "learning_rate": 7.180226862016338e-07, "loss": 0.2329, "step": 38880 }, { "epoch": 2.072972468750833, "grad_norm": 1.7488611644124183, "learning_rate": 7.172706842899025e-07, "loss": 0.2323, "step": 38890 }, { "epoch": 2.0735055035846592, "grad_norm": 1.9013774811670907, "learning_rate": 7.165189526220996e-07, "loss": 0.2296, "step": 38900 }, { "epoch": 2.0740385384184856, "grad_norm": 1.7687502529146126, "learning_rate": 7.157674914577656e-07, "loss": 0.2364, "step": 38910 }, { "epoch": 2.074571573252312, "grad_norm": 1.914945552584631, "learning_rate": 7.1501630105635e-07, "loss": 0.2297, "step": 38920 }, { "epoch": 2.0751046080861384, "grad_norm": 1.7242640875557604, "learning_rate": 7.142653816772069e-07, "loss": 0.2312, "step": 38930 }, { "epoch": 2.0756376429199648, "grad_norm": 1.9920917647403975, "learning_rate": 7.135147335795981e-07, "loss": 0.2366, "step": 38940 }, { "epoch": 2.076170677753791, "grad_norm": 2.0064009548889574, "learning_rate": 7.127643570226908e-07, "loss": 0.2363, "step": 38950 }, { "epoch": 2.0767037125876175, "grad_norm": 2.010494348160478, "learning_rate": 7.120142522655593e-07, "loss": 0.2374, "step": 38960 }, { "epoch": 2.077236747421444, "grad_norm": 1.9675213853990199, "learning_rate": 7.112644195671834e-07, "loss": 0.238, "step": 38970 }, { "epoch": 2.0777697822552703, "grad_norm": 1.773585968076842, "learning_rate": 7.105148591864497e-07, "loss": 0.2344, "step": 38980 }, { "epoch": 2.0783028170890967, "grad_norm": 1.7435585385293508, "learning_rate": 7.097655713821492e-07, "loss": 0.2263, "step": 38990 }, { "epoch": 2.078835851922923, "grad_norm": 1.903864032389861, "learning_rate": 7.090165564129805e-07, "loss": 0.2351, "step": 39000 }, { "epoch": 2.0793688867567495, "grad_norm": 1.7218540313410287, "learning_rate": 7.082678145375472e-07, "loss": 0.2317, "step": 39010 }, { "epoch": 2.079901921590576, "grad_norm": 1.887312086373205, "learning_rate": 7.07519346014359e-07, "loss": 0.2364, "step": 39020 }, { "epoch": 2.0804349564244022, "grad_norm": 2.161946878157882, "learning_rate": 7.06771151101831e-07, "loss": 0.2406, "step": 39030 }, { "epoch": 2.0809679912582286, "grad_norm": 1.952869762054716, "learning_rate": 7.060232300582835e-07, "loss": 0.2406, "step": 39040 }, { "epoch": 2.081501026092055, "grad_norm": 1.7228380440241955, "learning_rate": 7.052755831419428e-07, "loss": 0.2173, "step": 39050 }, { "epoch": 2.0820340609258814, "grad_norm": 1.7325922473151485, "learning_rate": 7.045282106109404e-07, "loss": 0.2373, "step": 39060 }, { "epoch": 2.0825670957597078, "grad_norm": 1.7394020756605846, "learning_rate": 7.037811127233134e-07, "loss": 0.2373, "step": 39070 }, { "epoch": 2.083100130593534, "grad_norm": 1.718151430492054, "learning_rate": 7.030342897370028e-07, "loss": 0.2447, "step": 39080 }, { "epoch": 2.0836331654273605, "grad_norm": 2.020804137280149, "learning_rate": 7.022877419098558e-07, "loss": 0.2305, "step": 39090 }, { "epoch": 2.084166200261187, "grad_norm": 1.8341247962630196, "learning_rate": 7.015414694996247e-07, "loss": 0.2306, "step": 39100 }, { "epoch": 2.0846992350950133, "grad_norm": 1.770468997248963, "learning_rate": 7.00795472763966e-07, "loss": 0.2268, "step": 39110 }, { "epoch": 2.0852322699288397, "grad_norm": 1.775501627589431, "learning_rate": 7.000497519604418e-07, "loss": 0.237, "step": 39120 }, { "epoch": 2.085765304762666, "grad_norm": 1.7182050160947544, "learning_rate": 6.993043073465181e-07, "loss": 0.2338, "step": 39130 }, { "epoch": 2.0862983395964925, "grad_norm": 2.0157672548255756, "learning_rate": 6.985591391795663e-07, "loss": 0.2262, "step": 39140 }, { "epoch": 2.086831374430319, "grad_norm": 1.7719185933431802, "learning_rate": 6.978142477168624e-07, "loss": 0.2347, "step": 39150 }, { "epoch": 2.0873644092641452, "grad_norm": 1.8986993285238174, "learning_rate": 6.970696332155855e-07, "loss": 0.2311, "step": 39160 }, { "epoch": 2.0878974440979716, "grad_norm": 1.8527327230404613, "learning_rate": 6.963252959328205e-07, "loss": 0.2354, "step": 39170 }, { "epoch": 2.088430478931798, "grad_norm": 1.754908281459408, "learning_rate": 6.955812361255563e-07, "loss": 0.2317, "step": 39180 }, { "epoch": 2.0889635137656244, "grad_norm": 1.784064436989912, "learning_rate": 6.948374540506856e-07, "loss": 0.2313, "step": 39190 }, { "epoch": 2.0894965485994508, "grad_norm": 1.8567877732530396, "learning_rate": 6.940939499650058e-07, "loss": 0.2377, "step": 39200 }, { "epoch": 2.090029583433277, "grad_norm": 1.7152402859454083, "learning_rate": 6.933507241252179e-07, "loss": 0.2363, "step": 39210 }, { "epoch": 2.0905626182671035, "grad_norm": 1.9249967281226645, "learning_rate": 6.926077767879267e-07, "loss": 0.2361, "step": 39220 }, { "epoch": 2.09109565310093, "grad_norm": 1.80540934872832, "learning_rate": 6.918651082096411e-07, "loss": 0.2308, "step": 39230 }, { "epoch": 2.0916286879347563, "grad_norm": 1.787963323345176, "learning_rate": 6.911227186467745e-07, "loss": 0.2335, "step": 39240 }, { "epoch": 2.0921617227685827, "grad_norm": 1.8827757615057787, "learning_rate": 6.903806083556418e-07, "loss": 0.2293, "step": 39250 }, { "epoch": 2.0926947576024095, "grad_norm": 2.063808696381716, "learning_rate": 6.896387775924637e-07, "loss": 0.2326, "step": 39260 }, { "epoch": 2.093227792436236, "grad_norm": 1.6968406597366485, "learning_rate": 6.888972266133634e-07, "loss": 0.2269, "step": 39270 }, { "epoch": 2.0937608272700623, "grad_norm": 1.9151628842902149, "learning_rate": 6.881559556743676e-07, "loss": 0.2318, "step": 39280 }, { "epoch": 2.0942938621038887, "grad_norm": 2.0935356230176065, "learning_rate": 6.874149650314067e-07, "loss": 0.2414, "step": 39290 }, { "epoch": 2.094826896937715, "grad_norm": 1.7526412945450405, "learning_rate": 6.866742549403135e-07, "loss": 0.2294, "step": 39300 }, { "epoch": 2.0953599317715415, "grad_norm": 2.1888986022680723, "learning_rate": 6.859338256568248e-07, "loss": 0.2325, "step": 39310 }, { "epoch": 2.095892966605368, "grad_norm": 2.0482125792443093, "learning_rate": 6.851936774365805e-07, "loss": 0.2349, "step": 39320 }, { "epoch": 2.096426001439194, "grad_norm": 1.7835832046778732, "learning_rate": 6.84453810535122e-07, "loss": 0.2338, "step": 39330 }, { "epoch": 2.0969590362730206, "grad_norm": 1.7853669186881789, "learning_rate": 6.837142252078956e-07, "loss": 0.2413, "step": 39340 }, { "epoch": 2.097492071106847, "grad_norm": 1.6889132930047615, "learning_rate": 6.829749217102484e-07, "loss": 0.2245, "step": 39350 }, { "epoch": 2.0980251059406734, "grad_norm": 1.8077248761450484, "learning_rate": 6.822359002974317e-07, "loss": 0.2399, "step": 39360 }, { "epoch": 2.0985581407744998, "grad_norm": 1.826672107967957, "learning_rate": 6.81497161224599e-07, "loss": 0.2297, "step": 39370 }, { "epoch": 2.099091175608326, "grad_norm": 1.658928802150467, "learning_rate": 6.807587047468062e-07, "loss": 0.2322, "step": 39380 }, { "epoch": 2.0996242104421525, "grad_norm": 1.8493166044316842, "learning_rate": 6.800205311190115e-07, "loss": 0.2325, "step": 39390 }, { "epoch": 2.100157245275979, "grad_norm": 1.7411284344819427, "learning_rate": 6.792826405960756e-07, "loss": 0.2369, "step": 39400 }, { "epoch": 2.1006902801098053, "grad_norm": 1.785820068285124, "learning_rate": 6.785450334327618e-07, "loss": 0.2401, "step": 39410 }, { "epoch": 2.1012233149436317, "grad_norm": 1.7916893581829731, "learning_rate": 6.778077098837354e-07, "loss": 0.2282, "step": 39420 }, { "epoch": 2.101756349777458, "grad_norm": 1.7425392132653401, "learning_rate": 6.770706702035626e-07, "loss": 0.2387, "step": 39430 }, { "epoch": 2.1022893846112845, "grad_norm": 1.892042111603105, "learning_rate": 6.763339146467133e-07, "loss": 0.2286, "step": 39440 }, { "epoch": 2.102822419445111, "grad_norm": 1.930893107842331, "learning_rate": 6.755974434675585e-07, "loss": 0.2335, "step": 39450 }, { "epoch": 2.103355454278937, "grad_norm": 1.897360206669216, "learning_rate": 6.74861256920371e-07, "loss": 0.2344, "step": 39460 }, { "epoch": 2.1038884891127636, "grad_norm": 1.7337369083503635, "learning_rate": 6.741253552593255e-07, "loss": 0.224, "step": 39470 }, { "epoch": 2.10442152394659, "grad_norm": 1.6695623874313024, "learning_rate": 6.733897387384985e-07, "loss": 0.2239, "step": 39480 }, { "epoch": 2.1049545587804164, "grad_norm": 1.6197288698777523, "learning_rate": 6.726544076118675e-07, "loss": 0.2347, "step": 39490 }, { "epoch": 2.1054875936142428, "grad_norm": 1.8015298501101642, "learning_rate": 6.71919362133312e-07, "loss": 0.2323, "step": 39500 }, { "epoch": 2.106020628448069, "grad_norm": 1.8076937875318893, "learning_rate": 6.711846025566131e-07, "loss": 0.2412, "step": 39510 }, { "epoch": 2.1065536632818955, "grad_norm": 1.6994128341693702, "learning_rate": 6.704501291354519e-07, "loss": 0.2324, "step": 39520 }, { "epoch": 2.107086698115722, "grad_norm": 1.8953112550396436, "learning_rate": 6.697159421234119e-07, "loss": 0.2322, "step": 39530 }, { "epoch": 2.1076197329495483, "grad_norm": 1.7324111977798289, "learning_rate": 6.689820417739776e-07, "loss": 0.2363, "step": 39540 }, { "epoch": 2.1081527677833747, "grad_norm": 1.7673454143031564, "learning_rate": 6.68248428340534e-07, "loss": 0.2358, "step": 39550 }, { "epoch": 2.108685802617201, "grad_norm": 1.7579946248665175, "learning_rate": 6.675151020763675e-07, "loss": 0.232, "step": 39560 }, { "epoch": 2.1092188374510275, "grad_norm": 1.8733235410375826, "learning_rate": 6.667820632346654e-07, "loss": 0.2215, "step": 39570 }, { "epoch": 2.109751872284854, "grad_norm": 1.9351318740912227, "learning_rate": 6.660493120685153e-07, "loss": 0.23, "step": 39580 }, { "epoch": 2.11028490711868, "grad_norm": 1.7621694950452877, "learning_rate": 6.653168488309065e-07, "loss": 0.2415, "step": 39590 }, { "epoch": 2.1108179419525066, "grad_norm": 1.733313493110517, "learning_rate": 6.645846737747269e-07, "loss": 0.2387, "step": 39600 }, { "epoch": 2.111350976786333, "grad_norm": 1.9524768422621142, "learning_rate": 6.638527871527668e-07, "loss": 0.237, "step": 39610 }, { "epoch": 2.1118840116201594, "grad_norm": 1.6983086570374055, "learning_rate": 6.63121189217716e-07, "loss": 0.2421, "step": 39620 }, { "epoch": 2.1124170464539858, "grad_norm": 1.852234657445964, "learning_rate": 6.623898802221653e-07, "loss": 0.2321, "step": 39630 }, { "epoch": 2.112950081287812, "grad_norm": 1.8625536359595851, "learning_rate": 6.61658860418605e-07, "loss": 0.2387, "step": 39640 }, { "epoch": 2.1134831161216385, "grad_norm": 1.6573335182186442, "learning_rate": 6.60928130059426e-07, "loss": 0.2258, "step": 39650 }, { "epoch": 2.114016150955465, "grad_norm": 1.6754698562016315, "learning_rate": 6.60197689396919e-07, "loss": 0.2468, "step": 39660 }, { "epoch": 2.1145491857892913, "grad_norm": 1.7886504126309044, "learning_rate": 6.594675386832752e-07, "loss": 0.2375, "step": 39670 }, { "epoch": 2.1150822206231177, "grad_norm": 1.644425466862984, "learning_rate": 6.587376781705855e-07, "loss": 0.2383, "step": 39680 }, { "epoch": 2.115615255456944, "grad_norm": 1.7138040935780128, "learning_rate": 6.580081081108395e-07, "loss": 0.2316, "step": 39690 }, { "epoch": 2.1161482902907705, "grad_norm": 1.8848922940027197, "learning_rate": 6.572788287559279e-07, "loss": 0.2352, "step": 39700 }, { "epoch": 2.116681325124597, "grad_norm": 1.8558868944049343, "learning_rate": 6.565498403576406e-07, "loss": 0.2322, "step": 39710 }, { "epoch": 2.117214359958423, "grad_norm": 1.9993093461834224, "learning_rate": 6.558211431676672e-07, "loss": 0.2413, "step": 39720 }, { "epoch": 2.1177473947922496, "grad_norm": 1.9946649051848502, "learning_rate": 6.550927374375964e-07, "loss": 0.2315, "step": 39730 }, { "epoch": 2.118280429626076, "grad_norm": 2.109337538082504, "learning_rate": 6.543646234189164e-07, "loss": 0.2308, "step": 39740 }, { "epoch": 2.1188134644599024, "grad_norm": 1.8380403322089125, "learning_rate": 6.536368013630148e-07, "loss": 0.2349, "step": 39750 }, { "epoch": 2.1193464992937288, "grad_norm": 1.9644611345038752, "learning_rate": 6.529092715211788e-07, "loss": 0.2278, "step": 39760 }, { "epoch": 2.119879534127555, "grad_norm": 1.9569142981011818, "learning_rate": 6.521820341445934e-07, "loss": 0.2415, "step": 39770 }, { "epoch": 2.1204125689613815, "grad_norm": 1.8575414784827466, "learning_rate": 6.514550894843439e-07, "loss": 0.2295, "step": 39780 }, { "epoch": 2.120945603795208, "grad_norm": 1.8877040316927678, "learning_rate": 6.50728437791414e-07, "loss": 0.2403, "step": 39790 }, { "epoch": 2.1214786386290343, "grad_norm": 1.6609374969056916, "learning_rate": 6.500020793166864e-07, "loss": 0.2335, "step": 39800 }, { "epoch": 2.1220116734628607, "grad_norm": 1.8754099560377642, "learning_rate": 6.492760143109424e-07, "loss": 0.2267, "step": 39810 }, { "epoch": 2.122544708296687, "grad_norm": 1.8831123672323284, "learning_rate": 6.485502430248623e-07, "loss": 0.2437, "step": 39820 }, { "epoch": 2.1230777431305135, "grad_norm": 1.9246693516294675, "learning_rate": 6.478247657090246e-07, "loss": 0.2346, "step": 39830 }, { "epoch": 2.12361077796434, "grad_norm": 1.9532938944496068, "learning_rate": 6.470995826139068e-07, "loss": 0.2431, "step": 39840 }, { "epoch": 2.124143812798166, "grad_norm": 2.015474153171191, "learning_rate": 6.463746939898849e-07, "loss": 0.2342, "step": 39850 }, { "epoch": 2.1246768476319926, "grad_norm": 1.8705196777189506, "learning_rate": 6.456501000872315e-07, "loss": 0.2316, "step": 39860 }, { "epoch": 2.125209882465819, "grad_norm": 1.908588922888013, "learning_rate": 6.449258011561196e-07, "loss": 0.2326, "step": 39870 }, { "epoch": 2.1257429172996454, "grad_norm": 1.8966147010270864, "learning_rate": 6.442017974466194e-07, "loss": 0.2322, "step": 39880 }, { "epoch": 2.1262759521334718, "grad_norm": 1.772780422289616, "learning_rate": 6.434780892086995e-07, "loss": 0.2336, "step": 39890 }, { "epoch": 2.126808986967298, "grad_norm": 1.7161525186765545, "learning_rate": 6.42754676692226e-07, "loss": 0.2383, "step": 39900 }, { "epoch": 2.1273420218011245, "grad_norm": 2.0559582308105133, "learning_rate": 6.420315601469636e-07, "loss": 0.2454, "step": 39910 }, { "epoch": 2.127875056634951, "grad_norm": 1.700316637880466, "learning_rate": 6.41308739822574e-07, "loss": 0.2426, "step": 39920 }, { "epoch": 2.1284080914687773, "grad_norm": 1.8919667007719585, "learning_rate": 6.40586215968618e-07, "loss": 0.235, "step": 39930 }, { "epoch": 2.1289411263026037, "grad_norm": 1.9327042429255488, "learning_rate": 6.398639888345515e-07, "loss": 0.2327, "step": 39940 }, { "epoch": 2.12947416113643, "grad_norm": 1.9661138371919133, "learning_rate": 6.391420586697306e-07, "loss": 0.2341, "step": 39950 }, { "epoch": 2.1300071959702565, "grad_norm": 1.9013372332012386, "learning_rate": 6.384204257234075e-07, "loss": 0.2371, "step": 39960 }, { "epoch": 2.1305402308040833, "grad_norm": 1.9757025184523238, "learning_rate": 6.376990902447321e-07, "loss": 0.2279, "step": 39970 }, { "epoch": 2.1310732656379097, "grad_norm": 1.8255977715193223, "learning_rate": 6.369780524827517e-07, "loss": 0.2333, "step": 39980 }, { "epoch": 2.131606300471736, "grad_norm": 2.207393599915795, "learning_rate": 6.362573126864106e-07, "loss": 0.2375, "step": 39990 }, { "epoch": 2.1321393353055624, "grad_norm": 1.6367167486604572, "learning_rate": 6.355368711045504e-07, "loss": 0.2297, "step": 40000 }, { "epoch": 2.132672370139389, "grad_norm": 1.8348815244864012, "learning_rate": 6.348167279859096e-07, "loss": 0.2343, "step": 40010 }, { "epoch": 2.133205404973215, "grad_norm": 1.851807801989843, "learning_rate": 6.340968835791241e-07, "loss": 0.2442, "step": 40020 }, { "epoch": 2.1337384398070416, "grad_norm": 1.736533017645107, "learning_rate": 6.333773381327256e-07, "loss": 0.2408, "step": 40030 }, { "epoch": 2.134271474640868, "grad_norm": 1.680825940291369, "learning_rate": 6.326580918951434e-07, "loss": 0.2449, "step": 40040 }, { "epoch": 2.1348045094746944, "grad_norm": 1.7474711548824875, "learning_rate": 6.319391451147038e-07, "loss": 0.2363, "step": 40050 }, { "epoch": 2.1353375443085207, "grad_norm": 1.6615034788368437, "learning_rate": 6.312204980396287e-07, "loss": 0.2373, "step": 40060 }, { "epoch": 2.135870579142347, "grad_norm": 1.944319080986197, "learning_rate": 6.305021509180375e-07, "loss": 0.2341, "step": 40070 }, { "epoch": 2.1364036139761735, "grad_norm": 1.7130983414424126, "learning_rate": 6.297841039979454e-07, "loss": 0.2464, "step": 40080 }, { "epoch": 2.13693664881, "grad_norm": 1.827286043198682, "learning_rate": 6.290663575272645e-07, "loss": 0.2315, "step": 40090 }, { "epoch": 2.1374696836438263, "grad_norm": 1.908884073171793, "learning_rate": 6.283489117538031e-07, "loss": 0.2381, "step": 40100 }, { "epoch": 2.1380027184776527, "grad_norm": 1.8864710073623185, "learning_rate": 6.276317669252645e-07, "loss": 0.2413, "step": 40110 }, { "epoch": 2.138535753311479, "grad_norm": 1.7561293850111093, "learning_rate": 6.269149232892495e-07, "loss": 0.2461, "step": 40120 }, { "epoch": 2.1390687881453054, "grad_norm": 1.8141178218641116, "learning_rate": 6.261983810932543e-07, "loss": 0.2332, "step": 40130 }, { "epoch": 2.139601822979132, "grad_norm": 1.7544498510912303, "learning_rate": 6.254821405846715e-07, "loss": 0.234, "step": 40140 }, { "epoch": 2.140134857812958, "grad_norm": 1.947168833596968, "learning_rate": 6.24766202010789e-07, "loss": 0.2308, "step": 40150 }, { "epoch": 2.1406678926467846, "grad_norm": 2.233100781585162, "learning_rate": 6.240505656187905e-07, "loss": 0.2314, "step": 40160 }, { "epoch": 2.141200927480611, "grad_norm": 1.8733176243835672, "learning_rate": 6.233352316557558e-07, "loss": 0.2354, "step": 40170 }, { "epoch": 2.1417339623144374, "grad_norm": 1.8502519911588187, "learning_rate": 6.226202003686601e-07, "loss": 0.2355, "step": 40180 }, { "epoch": 2.1422669971482637, "grad_norm": 1.7848900256793836, "learning_rate": 6.21905472004374e-07, "loss": 0.237, "step": 40190 }, { "epoch": 2.14280003198209, "grad_norm": 1.8951635801878093, "learning_rate": 6.211910468096631e-07, "loss": 0.2362, "step": 40200 }, { "epoch": 2.1433330668159165, "grad_norm": 1.7006800035725336, "learning_rate": 6.204769250311889e-07, "loss": 0.2244, "step": 40210 }, { "epoch": 2.143866101649743, "grad_norm": 1.7212870778421754, "learning_rate": 6.197631069155081e-07, "loss": 0.2429, "step": 40220 }, { "epoch": 2.1443991364835693, "grad_norm": 1.7735006585455204, "learning_rate": 6.190495927090725e-07, "loss": 0.231, "step": 40230 }, { "epoch": 2.1449321713173957, "grad_norm": 1.8210038930716173, "learning_rate": 6.183363826582287e-07, "loss": 0.245, "step": 40240 }, { "epoch": 2.145465206151222, "grad_norm": 1.9251286014134974, "learning_rate": 6.176234770092187e-07, "loss": 0.2342, "step": 40250 }, { "epoch": 2.1459982409850484, "grad_norm": 1.9128290810427413, "learning_rate": 6.169108760081794e-07, "loss": 0.2363, "step": 40260 }, { "epoch": 2.146531275818875, "grad_norm": 1.7790798262436074, "learning_rate": 6.161985799011426e-07, "loss": 0.2394, "step": 40270 }, { "epoch": 2.147064310652701, "grad_norm": 1.8445577079913038, "learning_rate": 6.154865889340334e-07, "loss": 0.23, "step": 40280 }, { "epoch": 2.1475973454865276, "grad_norm": 1.9163465124993924, "learning_rate": 6.147749033526736e-07, "loss": 0.2349, "step": 40290 }, { "epoch": 2.148130380320354, "grad_norm": 2.0614654944564856, "learning_rate": 6.140635234027785e-07, "loss": 0.2337, "step": 40300 }, { "epoch": 2.1486634151541804, "grad_norm": 1.8389780451693138, "learning_rate": 6.133524493299579e-07, "loss": 0.2339, "step": 40310 }, { "epoch": 2.1491964499880067, "grad_norm": 2.008779633227989, "learning_rate": 6.126416813797164e-07, "loss": 0.2438, "step": 40320 }, { "epoch": 2.149729484821833, "grad_norm": 1.706666831851398, "learning_rate": 6.119312197974525e-07, "loss": 0.2298, "step": 40330 }, { "epoch": 2.1502625196556595, "grad_norm": 2.0249086731345045, "learning_rate": 6.112210648284591e-07, "loss": 0.2405, "step": 40340 }, { "epoch": 2.150795554489486, "grad_norm": 1.9574147972801035, "learning_rate": 6.105112167179234e-07, "loss": 0.2356, "step": 40350 }, { "epoch": 2.1513285893233123, "grad_norm": 1.8816226527137219, "learning_rate": 6.098016757109266e-07, "loss": 0.2346, "step": 40360 }, { "epoch": 2.1518616241571387, "grad_norm": 1.837837352088897, "learning_rate": 6.090924420524431e-07, "loss": 0.2416, "step": 40370 }, { "epoch": 2.152394658990965, "grad_norm": 1.7478395417047299, "learning_rate": 6.083835159873421e-07, "loss": 0.2419, "step": 40380 }, { "epoch": 2.1529276938247914, "grad_norm": 1.8591808472201339, "learning_rate": 6.076748977603866e-07, "loss": 0.2391, "step": 40390 }, { "epoch": 2.153460728658618, "grad_norm": 1.9222457471360213, "learning_rate": 6.069665876162327e-07, "loss": 0.2329, "step": 40400 }, { "epoch": 2.153993763492444, "grad_norm": 1.9202576321301945, "learning_rate": 6.062585857994308e-07, "loss": 0.2184, "step": 40410 }, { "epoch": 2.1545267983262706, "grad_norm": 1.830743858035785, "learning_rate": 6.055508925544243e-07, "loss": 0.241, "step": 40420 }, { "epoch": 2.155059833160097, "grad_norm": 1.9105412717135142, "learning_rate": 6.048435081255504e-07, "loss": 0.2347, "step": 40430 }, { "epoch": 2.1555928679939234, "grad_norm": 1.6642688016971123, "learning_rate": 6.0413643275704e-07, "loss": 0.2271, "step": 40440 }, { "epoch": 2.1561259028277497, "grad_norm": 1.623355846812178, "learning_rate": 6.034296666930158e-07, "loss": 0.2431, "step": 40450 }, { "epoch": 2.156658937661576, "grad_norm": 1.701916761226239, "learning_rate": 6.027232101774955e-07, "loss": 0.2395, "step": 40460 }, { "epoch": 2.1571919724954025, "grad_norm": 1.6361426929807774, "learning_rate": 6.020170634543891e-07, "loss": 0.2319, "step": 40470 }, { "epoch": 2.157725007329229, "grad_norm": 1.8521042090478255, "learning_rate": 6.013112267674995e-07, "loss": 0.232, "step": 40480 }, { "epoch": 2.1582580421630553, "grad_norm": 1.9075258483782722, "learning_rate": 6.006057003605232e-07, "loss": 0.2477, "step": 40490 }, { "epoch": 2.1587910769968817, "grad_norm": 1.7874511827360193, "learning_rate": 5.999004844770489e-07, "loss": 0.2352, "step": 40500 }, { "epoch": 2.159324111830708, "grad_norm": 1.7542222965378522, "learning_rate": 5.991955793605585e-07, "loss": 0.2334, "step": 40510 }, { "epoch": 2.1598571466645344, "grad_norm": 1.8406409662779866, "learning_rate": 5.984909852544269e-07, "loss": 0.2305, "step": 40520 }, { "epoch": 2.160390181498361, "grad_norm": 2.2688464626274043, "learning_rate": 5.9778670240192e-07, "loss": 0.2372, "step": 40530 }, { "epoch": 2.160923216332187, "grad_norm": 1.9001663884456879, "learning_rate": 5.970827310461982e-07, "loss": 0.2301, "step": 40540 }, { "epoch": 2.1614562511660136, "grad_norm": 1.832299857326013, "learning_rate": 5.963790714303135e-07, "loss": 0.2362, "step": 40550 }, { "epoch": 2.16198928599984, "grad_norm": 1.764636082158625, "learning_rate": 5.956757237972104e-07, "loss": 0.2283, "step": 40560 }, { "epoch": 2.1625223208336664, "grad_norm": 1.8422444657459665, "learning_rate": 5.949726883897254e-07, "loss": 0.2398, "step": 40570 }, { "epoch": 2.1630553556674927, "grad_norm": 2.0173068093101176, "learning_rate": 5.942699654505876e-07, "loss": 0.2484, "step": 40580 }, { "epoch": 2.163588390501319, "grad_norm": 1.8589623409673304, "learning_rate": 5.93567555222418e-07, "loss": 0.2323, "step": 40590 }, { "epoch": 2.1641214253351455, "grad_norm": 1.800587857582897, "learning_rate": 5.928654579477298e-07, "loss": 0.228, "step": 40600 }, { "epoch": 2.164654460168972, "grad_norm": 1.7929840766407423, "learning_rate": 5.921636738689284e-07, "loss": 0.2293, "step": 40610 }, { "epoch": 2.1651874950027983, "grad_norm": 1.8451060628561284, "learning_rate": 5.914622032283097e-07, "loss": 0.2358, "step": 40620 }, { "epoch": 2.1657205298366247, "grad_norm": 1.5921258544611028, "learning_rate": 5.90761046268063e-07, "loss": 0.228, "step": 40630 }, { "epoch": 2.166253564670451, "grad_norm": 1.7890295621623638, "learning_rate": 5.900602032302685e-07, "loss": 0.2353, "step": 40640 }, { "epoch": 2.1667865995042774, "grad_norm": 1.9393112504629715, "learning_rate": 5.893596743568985e-07, "loss": 0.2322, "step": 40650 }, { "epoch": 2.167319634338104, "grad_norm": 1.9181713578254165, "learning_rate": 5.886594598898161e-07, "loss": 0.2372, "step": 40660 }, { "epoch": 2.16785266917193, "grad_norm": 1.7836954206546396, "learning_rate": 5.879595600707768e-07, "loss": 0.2396, "step": 40670 }, { "epoch": 2.1683857040057566, "grad_norm": 1.9014215755651191, "learning_rate": 5.872599751414268e-07, "loss": 0.2392, "step": 40680 }, { "epoch": 2.168918738839583, "grad_norm": 1.794862747026264, "learning_rate": 5.865607053433032e-07, "loss": 0.2401, "step": 40690 }, { "epoch": 2.1694517736734094, "grad_norm": 1.7694071044096837, "learning_rate": 5.858617509178352e-07, "loss": 0.2308, "step": 40700 }, { "epoch": 2.1699848085072357, "grad_norm": 1.8194583706889857, "learning_rate": 5.851631121063433e-07, "loss": 0.236, "step": 40710 }, { "epoch": 2.170517843341062, "grad_norm": 1.8572292630540166, "learning_rate": 5.844647891500374e-07, "loss": 0.2385, "step": 40720 }, { "epoch": 2.1710508781748885, "grad_norm": 1.989948841682253, "learning_rate": 5.837667822900199e-07, "loss": 0.2378, "step": 40730 }, { "epoch": 2.171583913008715, "grad_norm": 1.967804161061831, "learning_rate": 5.830690917672835e-07, "loss": 0.2394, "step": 40740 }, { "epoch": 2.1721169478425413, "grad_norm": 1.963925464836842, "learning_rate": 5.82371717822712e-07, "loss": 0.2371, "step": 40750 }, { "epoch": 2.172649982676368, "grad_norm": 1.9148295542836957, "learning_rate": 5.816746606970795e-07, "loss": 0.235, "step": 40760 }, { "epoch": 2.1731830175101945, "grad_norm": 1.789270293579564, "learning_rate": 5.809779206310509e-07, "loss": 0.2279, "step": 40770 }, { "epoch": 2.173716052344021, "grad_norm": 1.9674705963494186, "learning_rate": 5.802814978651817e-07, "loss": 0.2296, "step": 40780 }, { "epoch": 2.1742490871778473, "grad_norm": 1.9664260384733157, "learning_rate": 5.795853926399175e-07, "loss": 0.2294, "step": 40790 }, { "epoch": 2.1747821220116736, "grad_norm": 1.7356793118636098, "learning_rate": 5.788896051955953e-07, "loss": 0.235, "step": 40800 }, { "epoch": 2.1753151568455, "grad_norm": 1.7205912341988363, "learning_rate": 5.781941357724404e-07, "loss": 0.2409, "step": 40810 }, { "epoch": 2.1758481916793264, "grad_norm": 1.787702738502243, "learning_rate": 5.774989846105704e-07, "loss": 0.238, "step": 40820 }, { "epoch": 2.176381226513153, "grad_norm": 1.8703477157717947, "learning_rate": 5.768041519499915e-07, "loss": 0.2286, "step": 40830 }, { "epoch": 2.176914261346979, "grad_norm": 1.9274239309754442, "learning_rate": 5.761096380306011e-07, "loss": 0.2289, "step": 40840 }, { "epoch": 2.1774472961808056, "grad_norm": 2.017726288014313, "learning_rate": 5.754154430921858e-07, "loss": 0.2323, "step": 40850 }, { "epoch": 2.177980331014632, "grad_norm": 1.8697963268100288, "learning_rate": 5.747215673744224e-07, "loss": 0.2186, "step": 40860 }, { "epoch": 2.1785133658484583, "grad_norm": 1.6623568098838089, "learning_rate": 5.740280111168772e-07, "loss": 0.2376, "step": 40870 }, { "epoch": 2.1790464006822847, "grad_norm": 2.0152712054603596, "learning_rate": 5.73334774559007e-07, "loss": 0.2377, "step": 40880 }, { "epoch": 2.179579435516111, "grad_norm": 1.8907127585324865, "learning_rate": 5.726418579401565e-07, "loss": 0.2304, "step": 40890 }, { "epoch": 2.1801124703499375, "grad_norm": 1.8945059120727403, "learning_rate": 5.719492614995617e-07, "loss": 0.2399, "step": 40900 }, { "epoch": 2.180645505183764, "grad_norm": 1.7565273368230947, "learning_rate": 5.712569854763473e-07, "loss": 0.2327, "step": 40910 }, { "epoch": 2.1811785400175903, "grad_norm": 2.1596659349803193, "learning_rate": 5.705650301095273e-07, "loss": 0.2441, "step": 40920 }, { "epoch": 2.1817115748514166, "grad_norm": 1.9228147554650465, "learning_rate": 5.698733956380053e-07, "loss": 0.2355, "step": 40930 }, { "epoch": 2.182244609685243, "grad_norm": 1.658515462375666, "learning_rate": 5.691820823005741e-07, "loss": 0.2331, "step": 40940 }, { "epoch": 2.1827776445190694, "grad_norm": 1.8195605936564088, "learning_rate": 5.684910903359152e-07, "loss": 0.2386, "step": 40950 }, { "epoch": 2.183310679352896, "grad_norm": 1.7378493426447967, "learning_rate": 5.678004199825996e-07, "loss": 0.2359, "step": 40960 }, { "epoch": 2.183843714186722, "grad_norm": 1.6851770971682727, "learning_rate": 5.671100714790876e-07, "loss": 0.2352, "step": 40970 }, { "epoch": 2.1843767490205486, "grad_norm": 1.729961895177647, "learning_rate": 5.664200450637268e-07, "loss": 0.2389, "step": 40980 }, { "epoch": 2.184909783854375, "grad_norm": 1.7377335399456209, "learning_rate": 5.657303409747553e-07, "loss": 0.2327, "step": 40990 }, { "epoch": 2.1854428186882013, "grad_norm": 1.8816422231335426, "learning_rate": 5.65040959450299e-07, "loss": 0.231, "step": 41000 }, { "epoch": 2.1859758535220277, "grad_norm": 1.7219088073980446, "learning_rate": 5.64351900728373e-07, "loss": 0.2362, "step": 41010 }, { "epoch": 2.186508888355854, "grad_norm": 1.774979949990636, "learning_rate": 5.636631650468805e-07, "loss": 0.2333, "step": 41020 }, { "epoch": 2.1870419231896805, "grad_norm": 1.8455125136844523, "learning_rate": 5.629747526436134e-07, "loss": 0.2432, "step": 41030 }, { "epoch": 2.187574958023507, "grad_norm": 1.8011976764329316, "learning_rate": 5.622866637562521e-07, "loss": 0.2459, "step": 41040 }, { "epoch": 2.1881079928573333, "grad_norm": 2.2607482696987216, "learning_rate": 5.615988986223652e-07, "loss": 0.2369, "step": 41050 }, { "epoch": 2.1886410276911596, "grad_norm": 1.8643421860625518, "learning_rate": 5.609114574794086e-07, "loss": 0.2355, "step": 41060 }, { "epoch": 2.189174062524986, "grad_norm": 1.8478859369761511, "learning_rate": 5.602243405647279e-07, "loss": 0.2301, "step": 41070 }, { "epoch": 2.1897070973588124, "grad_norm": 1.7790646393456686, "learning_rate": 5.595375481155558e-07, "loss": 0.2276, "step": 41080 }, { "epoch": 2.190240132192639, "grad_norm": 1.7567693055835896, "learning_rate": 5.588510803690131e-07, "loss": 0.2247, "step": 41090 }, { "epoch": 2.190773167026465, "grad_norm": 1.8364057387463932, "learning_rate": 5.58164937562109e-07, "loss": 0.2233, "step": 41100 }, { "epoch": 2.1913062018602916, "grad_norm": 1.8249598384629306, "learning_rate": 5.5747911993174e-07, "loss": 0.2394, "step": 41110 }, { "epoch": 2.191839236694118, "grad_norm": 1.8999814847080447, "learning_rate": 5.567936277146903e-07, "loss": 0.2326, "step": 41120 }, { "epoch": 2.1923722715279443, "grad_norm": 1.9969572751593117, "learning_rate": 5.561084611476318e-07, "loss": 0.2364, "step": 41130 }, { "epoch": 2.1929053063617707, "grad_norm": 1.6343258671086636, "learning_rate": 5.554236204671247e-07, "loss": 0.2379, "step": 41140 }, { "epoch": 2.193438341195597, "grad_norm": 2.048404009499756, "learning_rate": 5.547391059096153e-07, "loss": 0.231, "step": 41150 }, { "epoch": 2.1939713760294235, "grad_norm": 1.8255090738376567, "learning_rate": 5.54054917711438e-07, "loss": 0.2336, "step": 41160 }, { "epoch": 2.19450441086325, "grad_norm": 1.9027710365258481, "learning_rate": 5.53371056108815e-07, "loss": 0.2428, "step": 41170 }, { "epoch": 2.1950374456970763, "grad_norm": 1.9699946266229345, "learning_rate": 5.526875213378554e-07, "loss": 0.2377, "step": 41180 }, { "epoch": 2.1955704805309026, "grad_norm": 1.8604479866525307, "learning_rate": 5.520043136345549e-07, "loss": 0.2366, "step": 41190 }, { "epoch": 2.196103515364729, "grad_norm": 1.9590906199732714, "learning_rate": 5.513214332347971e-07, "loss": 0.2424, "step": 41200 }, { "epoch": 2.1966365501985554, "grad_norm": 1.815167864508418, "learning_rate": 5.50638880374352e-07, "loss": 0.234, "step": 41210 }, { "epoch": 2.197169585032382, "grad_norm": 2.045426959205902, "learning_rate": 5.499566552888778e-07, "loss": 0.2299, "step": 41220 }, { "epoch": 2.197702619866208, "grad_norm": 1.8629147751819057, "learning_rate": 5.492747582139168e-07, "loss": 0.2385, "step": 41230 }, { "epoch": 2.1982356547000346, "grad_norm": 1.953497543100977, "learning_rate": 5.485931893849007e-07, "loss": 0.241, "step": 41240 }, { "epoch": 2.198768689533861, "grad_norm": 1.855971223680553, "learning_rate": 5.479119490371465e-07, "loss": 0.2356, "step": 41250 }, { "epoch": 2.1993017243676873, "grad_norm": 1.821970187769495, "learning_rate": 5.472310374058588e-07, "loss": 0.2341, "step": 41260 }, { "epoch": 2.1998347592015137, "grad_norm": 1.8890834742967249, "learning_rate": 5.465504547261275e-07, "loss": 0.237, "step": 41270 }, { "epoch": 2.20036779403534, "grad_norm": 1.885150850743481, "learning_rate": 5.4587020123293e-07, "loss": 0.2365, "step": 41280 }, { "epoch": 2.2009008288691665, "grad_norm": 1.8907399258728894, "learning_rate": 5.451902771611293e-07, "loss": 0.2305, "step": 41290 }, { "epoch": 2.201433863702993, "grad_norm": 1.7072965913464644, "learning_rate": 5.44510682745475e-07, "loss": 0.2266, "step": 41300 }, { "epoch": 2.2019668985368193, "grad_norm": 1.7210767756656278, "learning_rate": 5.438314182206034e-07, "loss": 0.2355, "step": 41310 }, { "epoch": 2.2024999333706456, "grad_norm": 1.792967201798984, "learning_rate": 5.431524838210353e-07, "loss": 0.2332, "step": 41320 }, { "epoch": 2.203032968204472, "grad_norm": 1.9881998980737898, "learning_rate": 5.424738797811792e-07, "loss": 0.2313, "step": 41330 }, { "epoch": 2.2035660030382984, "grad_norm": 1.7870206056336637, "learning_rate": 5.417956063353286e-07, "loss": 0.2365, "step": 41340 }, { "epoch": 2.204099037872125, "grad_norm": 1.994831152860379, "learning_rate": 5.411176637176633e-07, "loss": 0.238, "step": 41350 }, { "epoch": 2.204632072705951, "grad_norm": 1.7070181368201565, "learning_rate": 5.404400521622487e-07, "loss": 0.2347, "step": 41360 }, { "epoch": 2.2051651075397776, "grad_norm": 1.9823732922440238, "learning_rate": 5.397627719030361e-07, "loss": 0.2294, "step": 41370 }, { "epoch": 2.205698142373604, "grad_norm": 1.9182536798723082, "learning_rate": 5.390858231738619e-07, "loss": 0.2452, "step": 41380 }, { "epoch": 2.2062311772074303, "grad_norm": 1.929649682275285, "learning_rate": 5.384092062084492e-07, "loss": 0.2524, "step": 41390 }, { "epoch": 2.2067642120412567, "grad_norm": 2.0523633893198703, "learning_rate": 5.377329212404047e-07, "loss": 0.2317, "step": 41400 }, { "epoch": 2.207297246875083, "grad_norm": 1.8742211349317188, "learning_rate": 5.370569685032219e-07, "loss": 0.2362, "step": 41410 }, { "epoch": 2.2078302817089095, "grad_norm": 1.848327079031136, "learning_rate": 5.363813482302793e-07, "loss": 0.2441, "step": 41420 }, { "epoch": 2.208363316542736, "grad_norm": 1.8637500405563077, "learning_rate": 5.357060606548405e-07, "loss": 0.2399, "step": 41430 }, { "epoch": 2.2088963513765623, "grad_norm": 1.8348135464263529, "learning_rate": 5.350311060100541e-07, "loss": 0.2369, "step": 41440 }, { "epoch": 2.2094293862103886, "grad_norm": 1.8620144851029274, "learning_rate": 5.343564845289542e-07, "loss": 0.2359, "step": 41450 }, { "epoch": 2.2099624210442155, "grad_norm": 2.205022243332574, "learning_rate": 5.336821964444593e-07, "loss": 0.2271, "step": 41460 }, { "epoch": 2.210495455878042, "grad_norm": 1.8624232532760654, "learning_rate": 5.330082419893739e-07, "loss": 0.2324, "step": 41470 }, { "epoch": 2.2110284907118682, "grad_norm": 2.189729306612113, "learning_rate": 5.323346213963851e-07, "loss": 0.2353, "step": 41480 }, { "epoch": 2.2115615255456946, "grad_norm": 1.876837513457624, "learning_rate": 5.316613348980668e-07, "loss": 0.2397, "step": 41490 }, { "epoch": 2.212094560379521, "grad_norm": 1.7472747201796073, "learning_rate": 5.30988382726877e-07, "loss": 0.2276, "step": 41500 }, { "epoch": 2.2126275952133474, "grad_norm": 1.942813019035239, "learning_rate": 5.303157651151581e-07, "loss": 0.2322, "step": 41510 }, { "epoch": 2.213160630047174, "grad_norm": 1.8815600048930214, "learning_rate": 5.296434822951368e-07, "loss": 0.2333, "step": 41520 }, { "epoch": 2.213693664881, "grad_norm": 2.084876747021146, "learning_rate": 5.289715344989247e-07, "loss": 0.2397, "step": 41530 }, { "epoch": 2.2142266997148266, "grad_norm": 1.8256909397652323, "learning_rate": 5.282999219585172e-07, "loss": 0.2327, "step": 41540 }, { "epoch": 2.214759734548653, "grad_norm": 1.773299026851792, "learning_rate": 5.276286449057943e-07, "loss": 0.23, "step": 41550 }, { "epoch": 2.2152927693824793, "grad_norm": 1.7732854459541896, "learning_rate": 5.269577035725208e-07, "loss": 0.2448, "step": 41560 }, { "epoch": 2.2158258042163057, "grad_norm": 1.7331647752262196, "learning_rate": 5.262870981903439e-07, "loss": 0.2311, "step": 41570 }, { "epoch": 2.216358839050132, "grad_norm": 2.0154771642934586, "learning_rate": 5.256168289907959e-07, "loss": 0.2275, "step": 41580 }, { "epoch": 2.2168918738839585, "grad_norm": 1.861152621679387, "learning_rate": 5.249468962052933e-07, "loss": 0.2276, "step": 41590 }, { "epoch": 2.217424908717785, "grad_norm": 1.8022987639062718, "learning_rate": 5.242773000651357e-07, "loss": 0.233, "step": 41600 }, { "epoch": 2.2179579435516112, "grad_norm": 1.7446226513462637, "learning_rate": 5.236080408015073e-07, "loss": 0.2246, "step": 41610 }, { "epoch": 2.2184909783854376, "grad_norm": 1.714695028136221, "learning_rate": 5.229391186454751e-07, "loss": 0.2243, "step": 41620 }, { "epoch": 2.219024013219264, "grad_norm": 1.6550759358329208, "learning_rate": 5.222705338279906e-07, "loss": 0.2326, "step": 41630 }, { "epoch": 2.2195570480530904, "grad_norm": 1.8502921022397325, "learning_rate": 5.216022865798884e-07, "loss": 0.2259, "step": 41640 }, { "epoch": 2.220090082886917, "grad_norm": 1.8028172807314409, "learning_rate": 5.209343771318857e-07, "loss": 0.2293, "step": 41650 }, { "epoch": 2.220623117720743, "grad_norm": 1.6723396627343226, "learning_rate": 5.202668057145846e-07, "loss": 0.2328, "step": 41660 }, { "epoch": 2.2211561525545696, "grad_norm": 1.797152027330531, "learning_rate": 5.195995725584696e-07, "loss": 0.2346, "step": 41670 }, { "epoch": 2.221689187388396, "grad_norm": 1.8658059936380529, "learning_rate": 5.189326778939085e-07, "loss": 0.2265, "step": 41680 }, { "epoch": 2.2222222222222223, "grad_norm": 2.097308845740392, "learning_rate": 5.182661219511525e-07, "loss": 0.2354, "step": 41690 }, { "epoch": 2.2227552570560487, "grad_norm": 1.688084102481659, "learning_rate": 5.175999049603357e-07, "loss": 0.2345, "step": 41700 }, { "epoch": 2.223288291889875, "grad_norm": 1.806918329911081, "learning_rate": 5.169340271514751e-07, "loss": 0.2334, "step": 41710 }, { "epoch": 2.2238213267237015, "grad_norm": 1.8065267488088768, "learning_rate": 5.162684887544705e-07, "loss": 0.2341, "step": 41720 }, { "epoch": 2.224354361557528, "grad_norm": 1.6287845961942662, "learning_rate": 5.156032899991055e-07, "loss": 0.2321, "step": 41730 }, { "epoch": 2.2248873963913542, "grad_norm": 1.7870541928190262, "learning_rate": 5.149384311150442e-07, "loss": 0.2361, "step": 41740 }, { "epoch": 2.2254204312251806, "grad_norm": 1.8632902957098911, "learning_rate": 5.142739123318358e-07, "loss": 0.2406, "step": 41750 }, { "epoch": 2.225953466059007, "grad_norm": 1.7904847228036171, "learning_rate": 5.136097338789105e-07, "loss": 0.2413, "step": 41760 }, { "epoch": 2.2264865008928334, "grad_norm": 1.8578067378892353, "learning_rate": 5.129458959855818e-07, "loss": 0.2315, "step": 41770 }, { "epoch": 2.22701953572666, "grad_norm": 1.7122818821507455, "learning_rate": 5.122823988810454e-07, "loss": 0.2333, "step": 41780 }, { "epoch": 2.227552570560486, "grad_norm": 1.8435971896283132, "learning_rate": 5.116192427943792e-07, "loss": 0.2323, "step": 41790 }, { "epoch": 2.2280856053943126, "grad_norm": 1.7828977196808466, "learning_rate": 5.109564279545435e-07, "loss": 0.2263, "step": 41800 }, { "epoch": 2.228618640228139, "grad_norm": 1.7134832623698832, "learning_rate": 5.102939545903812e-07, "loss": 0.229, "step": 41810 }, { "epoch": 2.2291516750619653, "grad_norm": 2.0030313148881174, "learning_rate": 5.09631822930616e-07, "loss": 0.2308, "step": 41820 }, { "epoch": 2.2296847098957917, "grad_norm": 1.838416068085716, "learning_rate": 5.089700332038545e-07, "loss": 0.2249, "step": 41830 }, { "epoch": 2.230217744729618, "grad_norm": 1.8378146046419626, "learning_rate": 5.083085856385858e-07, "loss": 0.232, "step": 41840 }, { "epoch": 2.2307507795634445, "grad_norm": 1.8746900208680886, "learning_rate": 5.0764748046318e-07, "loss": 0.2337, "step": 41850 }, { "epoch": 2.231283814397271, "grad_norm": 1.7921558846555603, "learning_rate": 5.069867179058892e-07, "loss": 0.2367, "step": 41860 }, { "epoch": 2.2318168492310972, "grad_norm": 1.8708803751035725, "learning_rate": 5.063262981948475e-07, "loss": 0.2407, "step": 41870 }, { "epoch": 2.2323498840649236, "grad_norm": 1.8768091311257802, "learning_rate": 5.0566622155807e-07, "loss": 0.2362, "step": 41880 }, { "epoch": 2.23288291889875, "grad_norm": 1.8554853006993999, "learning_rate": 5.050064882234542e-07, "loss": 0.2254, "step": 41890 }, { "epoch": 2.2334159537325764, "grad_norm": 1.8157190078437806, "learning_rate": 5.043470984187787e-07, "loss": 0.2281, "step": 41900 }, { "epoch": 2.233948988566403, "grad_norm": 1.8774213062895782, "learning_rate": 5.036880523717029e-07, "loss": 0.2308, "step": 41910 }, { "epoch": 2.234482023400229, "grad_norm": 1.870961308820473, "learning_rate": 5.030293503097682e-07, "loss": 0.2286, "step": 41920 }, { "epoch": 2.2350150582340556, "grad_norm": 1.7131023886177141, "learning_rate": 5.023709924603971e-07, "loss": 0.2245, "step": 41930 }, { "epoch": 2.235548093067882, "grad_norm": 1.9098054247283778, "learning_rate": 5.017129790508932e-07, "loss": 0.2339, "step": 41940 }, { "epoch": 2.2360811279017083, "grad_norm": 1.834768247406661, "learning_rate": 5.010553103084414e-07, "loss": 0.2392, "step": 41950 }, { "epoch": 2.2366141627355347, "grad_norm": 1.8753774032794372, "learning_rate": 5.003979864601072e-07, "loss": 0.2356, "step": 41960 }, { "epoch": 2.237147197569361, "grad_norm": 2.113872789190751, "learning_rate": 4.997410077328374e-07, "loss": 0.2397, "step": 41970 }, { "epoch": 2.2376802324031875, "grad_norm": 2.063779534936551, "learning_rate": 4.990843743534598e-07, "loss": 0.2193, "step": 41980 }, { "epoch": 2.238213267237014, "grad_norm": 1.8658433844855866, "learning_rate": 4.984280865486817e-07, "loss": 0.2281, "step": 41990 }, { "epoch": 2.2387463020708402, "grad_norm": 1.9347700022938346, "learning_rate": 4.977721445450926e-07, "loss": 0.2317, "step": 42000 }, { "epoch": 2.2392793369046666, "grad_norm": 1.6694069475944362, "learning_rate": 4.971165485691618e-07, "loss": 0.2211, "step": 42010 }, { "epoch": 2.239812371738493, "grad_norm": 1.7828251662860373, "learning_rate": 4.964612988472398e-07, "loss": 0.2316, "step": 42020 }, { "epoch": 2.2403454065723194, "grad_norm": 2.1127487130410025, "learning_rate": 4.958063956055567e-07, "loss": 0.2291, "step": 42030 }, { "epoch": 2.240878441406146, "grad_norm": 1.8674174795271359, "learning_rate": 4.95151839070224e-07, "loss": 0.2314, "step": 42040 }, { "epoch": 2.241411476239972, "grad_norm": 1.9101220646138868, "learning_rate": 4.944976294672321e-07, "loss": 0.229, "step": 42050 }, { "epoch": 2.2419445110737986, "grad_norm": 1.85493508855506, "learning_rate": 4.938437670224526e-07, "loss": 0.2311, "step": 42060 }, { "epoch": 2.242477545907625, "grad_norm": 1.8403463607809167, "learning_rate": 4.931902519616373e-07, "loss": 0.2346, "step": 42070 }, { "epoch": 2.2430105807414513, "grad_norm": 1.849465526135925, "learning_rate": 4.925370845104177e-07, "loss": 0.2363, "step": 42080 }, { "epoch": 2.2435436155752777, "grad_norm": 1.760393165622021, "learning_rate": 4.918842648943058e-07, "loss": 0.2406, "step": 42090 }, { "epoch": 2.244076650409104, "grad_norm": 1.8126003007161957, "learning_rate": 4.912317933386922e-07, "loss": 0.2375, "step": 42100 }, { "epoch": 2.2446096852429305, "grad_norm": 1.737873700508675, "learning_rate": 4.905796700688489e-07, "loss": 0.2351, "step": 42110 }, { "epoch": 2.245142720076757, "grad_norm": 2.091441772225788, "learning_rate": 4.899278953099266e-07, "loss": 0.2322, "step": 42120 }, { "epoch": 2.2456757549105832, "grad_norm": 1.8487757068908606, "learning_rate": 4.892764692869566e-07, "loss": 0.2388, "step": 42130 }, { "epoch": 2.2462087897444096, "grad_norm": 1.9634130849931193, "learning_rate": 4.886253922248488e-07, "loss": 0.2317, "step": 42140 }, { "epoch": 2.246741824578236, "grad_norm": 1.8722736397728355, "learning_rate": 4.87974664348393e-07, "loss": 0.2304, "step": 42150 }, { "epoch": 2.2472748594120624, "grad_norm": 1.9453801643001631, "learning_rate": 4.873242858822589e-07, "loss": 0.2334, "step": 42160 }, { "epoch": 2.247807894245889, "grad_norm": 1.938295100944413, "learning_rate": 4.866742570509956e-07, "loss": 0.2265, "step": 42170 }, { "epoch": 2.248340929079715, "grad_norm": 1.9087832182764721, "learning_rate": 4.860245780790298e-07, "loss": 0.2431, "step": 42180 }, { "epoch": 2.2488739639135416, "grad_norm": 1.947170735985685, "learning_rate": 4.853752491906693e-07, "loss": 0.2294, "step": 42190 }, { "epoch": 2.249406998747368, "grad_norm": 1.743819283349934, "learning_rate": 4.847262706101004e-07, "loss": 0.234, "step": 42200 }, { "epoch": 2.2499400335811943, "grad_norm": 1.672139376639322, "learning_rate": 4.840776425613887e-07, "loss": 0.2315, "step": 42210 }, { "epoch": 2.2504730684150207, "grad_norm": 1.8625421344128692, "learning_rate": 4.834293652684781e-07, "loss": 0.2371, "step": 42220 }, { "epoch": 2.251006103248847, "grad_norm": 1.9270640605908427, "learning_rate": 4.827814389551921e-07, "loss": 0.2346, "step": 42230 }, { "epoch": 2.2515391380826735, "grad_norm": 1.939093175886227, "learning_rate": 4.821338638452327e-07, "loss": 0.227, "step": 42240 }, { "epoch": 2.2520721729165, "grad_norm": 1.8811465193785428, "learning_rate": 4.814866401621808e-07, "loss": 0.2311, "step": 42250 }, { "epoch": 2.2526052077503262, "grad_norm": 1.6655322973270514, "learning_rate": 4.80839768129496e-07, "loss": 0.2331, "step": 42260 }, { "epoch": 2.2531382425841526, "grad_norm": 1.8888896409919291, "learning_rate": 4.801932479705157e-07, "loss": 0.2389, "step": 42270 }, { "epoch": 2.2536712774179795, "grad_norm": 1.7826633957754323, "learning_rate": 4.795470799084569e-07, "loss": 0.2458, "step": 42280 }, { "epoch": 2.254204312251806, "grad_norm": 2.0284305155667903, "learning_rate": 4.789012641664145e-07, "loss": 0.234, "step": 42290 }, { "epoch": 2.2547373470856322, "grad_norm": 2.120019229343177, "learning_rate": 4.782558009673619e-07, "loss": 0.2346, "step": 42300 }, { "epoch": 2.2552703819194586, "grad_norm": 1.7441441734890615, "learning_rate": 4.776106905341507e-07, "loss": 0.2224, "step": 42310 }, { "epoch": 2.255803416753285, "grad_norm": 1.8788600256722958, "learning_rate": 4.769659330895109e-07, "loss": 0.2312, "step": 42320 }, { "epoch": 2.2563364515871114, "grad_norm": 1.8752322772145442, "learning_rate": 4.7632152885605025e-07, "loss": 0.2385, "step": 42330 }, { "epoch": 2.2568694864209378, "grad_norm": 1.7736666883910046, "learning_rate": 4.756774780562554e-07, "loss": 0.2354, "step": 42340 }, { "epoch": 2.257402521254764, "grad_norm": 1.7587975426282534, "learning_rate": 4.7503378091248947e-07, "loss": 0.2338, "step": 42350 }, { "epoch": 2.2579355560885905, "grad_norm": 1.978167736350367, "learning_rate": 4.7439043764699447e-07, "loss": 0.2285, "step": 42360 }, { "epoch": 2.258468590922417, "grad_norm": 1.7672834261207926, "learning_rate": 4.737474484818905e-07, "loss": 0.2344, "step": 42370 }, { "epoch": 2.2590016257562433, "grad_norm": 1.8658786813493369, "learning_rate": 4.7310481363917494e-07, "loss": 0.2371, "step": 42380 }, { "epoch": 2.2595346605900697, "grad_norm": 1.8627629990173118, "learning_rate": 4.724625333407227e-07, "loss": 0.2286, "step": 42390 }, { "epoch": 2.260067695423896, "grad_norm": 1.6903194752508697, "learning_rate": 4.7182060780828667e-07, "loss": 0.2376, "step": 42400 }, { "epoch": 2.2606007302577225, "grad_norm": 2.107018056391333, "learning_rate": 4.711790372634971e-07, "loss": 0.2349, "step": 42410 }, { "epoch": 2.261133765091549, "grad_norm": 1.9341597934084553, "learning_rate": 4.7053782192786193e-07, "loss": 0.2288, "step": 42420 }, { "epoch": 2.2616667999253752, "grad_norm": 1.9583370673619234, "learning_rate": 4.698969620227656e-07, "loss": 0.228, "step": 42430 }, { "epoch": 2.2621998347592016, "grad_norm": 1.6745201940271226, "learning_rate": 4.692564577694705e-07, "loss": 0.2406, "step": 42440 }, { "epoch": 2.262732869593028, "grad_norm": 1.851860195602235, "learning_rate": 4.6861630938911643e-07, "loss": 0.2271, "step": 42450 }, { "epoch": 2.2632659044268544, "grad_norm": 1.710943831084753, "learning_rate": 4.679765171027199e-07, "loss": 0.2208, "step": 42460 }, { "epoch": 2.2637989392606808, "grad_norm": 1.7970447050825435, "learning_rate": 4.6733708113117465e-07, "loss": 0.2338, "step": 42470 }, { "epoch": 2.264331974094507, "grad_norm": 1.8789122968307477, "learning_rate": 4.666980016952514e-07, "loss": 0.2323, "step": 42480 }, { "epoch": 2.2648650089283335, "grad_norm": 1.7167903847047923, "learning_rate": 4.660592790155977e-07, "loss": 0.2308, "step": 42490 }, { "epoch": 2.26539804376216, "grad_norm": 1.8706389285247746, "learning_rate": 4.6542091331273814e-07, "loss": 0.2468, "step": 42500 }, { "epoch": 2.2659310785959863, "grad_norm": 1.6938167223822291, "learning_rate": 4.64782904807074e-07, "loss": 0.2306, "step": 42510 }, { "epoch": 2.2664641134298127, "grad_norm": 1.8478433154322607, "learning_rate": 4.641452537188824e-07, "loss": 0.2308, "step": 42520 }, { "epoch": 2.266997148263639, "grad_norm": 1.9754760861091705, "learning_rate": 4.635079602683183e-07, "loss": 0.243, "step": 42530 }, { "epoch": 2.2675301830974655, "grad_norm": 1.8397041044859581, "learning_rate": 4.6287102467541266e-07, "loss": 0.2408, "step": 42540 }, { "epoch": 2.268063217931292, "grad_norm": 1.8496143151326303, "learning_rate": 4.622344471600728e-07, "loss": 0.2282, "step": 42550 }, { "epoch": 2.2685962527651182, "grad_norm": 1.9766410879762266, "learning_rate": 4.615982279420824e-07, "loss": 0.2419, "step": 42560 }, { "epoch": 2.2691292875989446, "grad_norm": 1.8251922862618457, "learning_rate": 4.6096236724110176e-07, "loss": 0.2313, "step": 42570 }, { "epoch": 2.269662322432771, "grad_norm": 1.6797407330918674, "learning_rate": 4.6032686527666706e-07, "loss": 0.2332, "step": 42580 }, { "epoch": 2.2701953572665974, "grad_norm": 2.10280804168582, "learning_rate": 4.596917222681912e-07, "loss": 0.2361, "step": 42590 }, { "epoch": 2.2707283921004238, "grad_norm": 1.8013404493877903, "learning_rate": 4.5905693843496184e-07, "loss": 0.2321, "step": 42600 }, { "epoch": 2.27126142693425, "grad_norm": 1.7743338907602508, "learning_rate": 4.5842251399614374e-07, "loss": 0.2368, "step": 42610 }, { "epoch": 2.2717944617680765, "grad_norm": 1.8691707983203496, "learning_rate": 4.5778844917077763e-07, "loss": 0.2297, "step": 42620 }, { "epoch": 2.272327496601903, "grad_norm": 1.857685041241757, "learning_rate": 4.571547441777794e-07, "loss": 0.2322, "step": 42630 }, { "epoch": 2.2728605314357293, "grad_norm": 1.8790157892044763, "learning_rate": 4.5652139923594146e-07, "loss": 0.2352, "step": 42640 }, { "epoch": 2.2733935662695557, "grad_norm": 1.8117625637832968, "learning_rate": 4.558884145639311e-07, "loss": 0.2375, "step": 42650 }, { "epoch": 2.273926601103382, "grad_norm": 1.7990210539400835, "learning_rate": 4.552557903802919e-07, "loss": 0.2294, "step": 42660 }, { "epoch": 2.2744596359372085, "grad_norm": 1.7693751564385993, "learning_rate": 4.546235269034428e-07, "loss": 0.2317, "step": 42670 }, { "epoch": 2.274992670771035, "grad_norm": 1.8876326125492982, "learning_rate": 4.539916243516784e-07, "loss": 0.2371, "step": 42680 }, { "epoch": 2.2755257056048612, "grad_norm": 1.947364438316141, "learning_rate": 4.5336008294316744e-07, "loss": 0.232, "step": 42690 }, { "epoch": 2.2760587404386876, "grad_norm": 1.7971182646760888, "learning_rate": 4.5272890289595555e-07, "loss": 0.239, "step": 42700 }, { "epoch": 2.276591775272514, "grad_norm": 1.999241739908646, "learning_rate": 4.5209808442796283e-07, "loss": 0.2316, "step": 42710 }, { "epoch": 2.2771248101063404, "grad_norm": 1.7267725470441606, "learning_rate": 4.51467627756985e-07, "loss": 0.2315, "step": 42720 }, { "epoch": 2.2776578449401668, "grad_norm": 1.8409026628441352, "learning_rate": 4.5083753310069245e-07, "loss": 0.2344, "step": 42730 }, { "epoch": 2.278190879773993, "grad_norm": 1.7260263407615797, "learning_rate": 4.5020780067663037e-07, "loss": 0.2248, "step": 42740 }, { "epoch": 2.2787239146078195, "grad_norm": 1.795052678220571, "learning_rate": 4.495784307022196e-07, "loss": 0.2375, "step": 42750 }, { "epoch": 2.279256949441646, "grad_norm": 1.8149960275508512, "learning_rate": 4.489494233947558e-07, "loss": 0.2272, "step": 42760 }, { "epoch": 2.2797899842754723, "grad_norm": 1.9428445748313783, "learning_rate": 4.483207789714081e-07, "loss": 0.239, "step": 42770 }, { "epoch": 2.2803230191092987, "grad_norm": 1.8301121821728776, "learning_rate": 4.4769249764922186e-07, "loss": 0.2284, "step": 42780 }, { "epoch": 2.280856053943125, "grad_norm": 1.8378815741989343, "learning_rate": 4.470645796451164e-07, "loss": 0.2292, "step": 42790 }, { "epoch": 2.2813890887769515, "grad_norm": 2.1277228878300356, "learning_rate": 4.4643702517588584e-07, "loss": 0.2317, "step": 42800 }, { "epoch": 2.281922123610778, "grad_norm": 1.771124578339705, "learning_rate": 4.458098344581987e-07, "loss": 0.2264, "step": 42810 }, { "epoch": 2.2824551584446042, "grad_norm": 1.8387020335790323, "learning_rate": 4.451830077085978e-07, "loss": 0.2335, "step": 42820 }, { "epoch": 2.2829881932784306, "grad_norm": 1.8135657261329514, "learning_rate": 4.4455654514350053e-07, "loss": 0.2254, "step": 42830 }, { "epoch": 2.283521228112257, "grad_norm": 1.902499338620403, "learning_rate": 4.4393044697919826e-07, "loss": 0.2391, "step": 42840 }, { "epoch": 2.2840542629460834, "grad_norm": 1.9089766654981986, "learning_rate": 4.4330471343185716e-07, "loss": 0.2332, "step": 42850 }, { "epoch": 2.2845872977799098, "grad_norm": 1.8034748005744983, "learning_rate": 4.42679344717516e-07, "loss": 0.2302, "step": 42860 }, { "epoch": 2.285120332613736, "grad_norm": 1.8426650090008008, "learning_rate": 4.420543410520894e-07, "loss": 0.2359, "step": 42870 }, { "epoch": 2.2856533674475625, "grad_norm": 2.033473062759244, "learning_rate": 4.414297026513648e-07, "loss": 0.2368, "step": 42880 }, { "epoch": 2.286186402281389, "grad_norm": 1.5954590444033023, "learning_rate": 4.408054297310044e-07, "loss": 0.2303, "step": 42890 }, { "epoch": 2.2867194371152153, "grad_norm": 1.7093432020464265, "learning_rate": 4.401815225065432e-07, "loss": 0.2361, "step": 42900 }, { "epoch": 2.2872524719490417, "grad_norm": 1.8502023256520532, "learning_rate": 4.3955798119339073e-07, "loss": 0.2257, "step": 42910 }, { "epoch": 2.287785506782868, "grad_norm": 1.735471106677551, "learning_rate": 4.3893480600682983e-07, "loss": 0.2385, "step": 42920 }, { "epoch": 2.2883185416166945, "grad_norm": 1.662535673374314, "learning_rate": 4.3831199716201755e-07, "loss": 0.2286, "step": 42930 }, { "epoch": 2.2888515764505213, "grad_norm": 1.7316375105576984, "learning_rate": 4.376895548739828e-07, "loss": 0.2323, "step": 42940 }, { "epoch": 2.2893846112843477, "grad_norm": 1.7395390591431699, "learning_rate": 4.370674793576298e-07, "loss": 0.2293, "step": 42950 }, { "epoch": 2.289917646118174, "grad_norm": 1.9915582547965862, "learning_rate": 4.364457708277352e-07, "loss": 0.2246, "step": 42960 }, { "epoch": 2.2904506809520004, "grad_norm": 1.9231096806475911, "learning_rate": 4.358244294989492e-07, "loss": 0.2345, "step": 42970 }, { "epoch": 2.290983715785827, "grad_norm": 1.9004509022148988, "learning_rate": 4.3520345558579503e-07, "loss": 0.2312, "step": 42980 }, { "epoch": 2.291516750619653, "grad_norm": 2.0325938137160837, "learning_rate": 4.3458284930266947e-07, "loss": 0.2337, "step": 42990 }, { "epoch": 2.2920497854534796, "grad_norm": 1.7956676692823648, "learning_rate": 4.339626108638419e-07, "loss": 0.2424, "step": 43000 }, { "epoch": 2.292582820287306, "grad_norm": 1.9228644954546887, "learning_rate": 4.3334274048345483e-07, "loss": 0.2375, "step": 43010 }, { "epoch": 2.2931158551211324, "grad_norm": 2.0439539585184088, "learning_rate": 4.327232383755245e-07, "loss": 0.2268, "step": 43020 }, { "epoch": 2.2936488899549587, "grad_norm": 1.9244388483606711, "learning_rate": 4.3210410475393796e-07, "loss": 0.2305, "step": 43030 }, { "epoch": 2.294181924788785, "grad_norm": 1.7583810860383617, "learning_rate": 4.314853398324572e-07, "loss": 0.2348, "step": 43040 }, { "epoch": 2.2947149596226115, "grad_norm": 1.8156478766029809, "learning_rate": 4.3086694382471566e-07, "loss": 0.2268, "step": 43050 }, { "epoch": 2.295247994456438, "grad_norm": 1.885166228652532, "learning_rate": 4.302489169442202e-07, "loss": 0.2371, "step": 43060 }, { "epoch": 2.2957810292902643, "grad_norm": 1.8584555041910376, "learning_rate": 4.2963125940434947e-07, "loss": 0.2327, "step": 43070 }, { "epoch": 2.2963140641240907, "grad_norm": 1.8611022220316689, "learning_rate": 4.2901397141835514e-07, "loss": 0.2335, "step": 43080 }, { "epoch": 2.296847098957917, "grad_norm": 2.0853339141325935, "learning_rate": 4.283970531993613e-07, "loss": 0.2317, "step": 43090 }, { "epoch": 2.2973801337917434, "grad_norm": 1.6461176827686417, "learning_rate": 4.2778050496036427e-07, "loss": 0.2395, "step": 43100 }, { "epoch": 2.29791316862557, "grad_norm": 1.9070429360533467, "learning_rate": 4.271643269142319e-07, "loss": 0.2275, "step": 43110 }, { "epoch": 2.298446203459396, "grad_norm": 1.8666758070460823, "learning_rate": 4.2654851927370524e-07, "loss": 0.2391, "step": 43120 }, { "epoch": 2.2989792382932226, "grad_norm": 1.7049743747771595, "learning_rate": 4.2593308225139706e-07, "loss": 0.2309, "step": 43130 }, { "epoch": 2.299512273127049, "grad_norm": 1.715478967492305, "learning_rate": 4.253180160597923e-07, "loss": 0.2394, "step": 43140 }, { "epoch": 2.3000453079608754, "grad_norm": 1.7614232042048674, "learning_rate": 4.2470332091124785e-07, "loss": 0.2356, "step": 43150 }, { "epoch": 2.3005783427947017, "grad_norm": 2.1090699973170675, "learning_rate": 4.240889970179921e-07, "loss": 0.2372, "step": 43160 }, { "epoch": 2.301111377628528, "grad_norm": 1.9380812697639274, "learning_rate": 4.23475044592126e-07, "loss": 0.2288, "step": 43170 }, { "epoch": 2.3016444124623545, "grad_norm": 1.890436193855489, "learning_rate": 4.228614638456215e-07, "loss": 0.2282, "step": 43180 }, { "epoch": 2.302177447296181, "grad_norm": 1.9718528479527335, "learning_rate": 4.222482549903232e-07, "loss": 0.2348, "step": 43190 }, { "epoch": 2.3027104821300073, "grad_norm": 1.7844388520509364, "learning_rate": 4.2163541823794554e-07, "loss": 0.2405, "step": 43200 }, { "epoch": 2.3032435169638337, "grad_norm": 1.8660275891759122, "learning_rate": 4.2102295380007626e-07, "loss": 0.22, "step": 43210 }, { "epoch": 2.30377655179766, "grad_norm": 1.7614593371642695, "learning_rate": 4.2041086188817375e-07, "loss": 0.2287, "step": 43220 }, { "epoch": 2.3043095866314864, "grad_norm": 1.9126634574050185, "learning_rate": 4.197991427135681e-07, "loss": 0.2355, "step": 43230 }, { "epoch": 2.304842621465313, "grad_norm": 1.9077801620819665, "learning_rate": 4.191877964874603e-07, "loss": 0.2286, "step": 43240 }, { "epoch": 2.305375656299139, "grad_norm": 1.9181295595242365, "learning_rate": 4.185768234209231e-07, "loss": 0.2328, "step": 43250 }, { "epoch": 2.3059086911329656, "grad_norm": 2.079126291961258, "learning_rate": 4.179662237248998e-07, "loss": 0.2349, "step": 43260 }, { "epoch": 2.306441725966792, "grad_norm": 2.077282808355432, "learning_rate": 4.1735599761020583e-07, "loss": 0.235, "step": 43270 }, { "epoch": 2.3069747608006184, "grad_norm": 1.8201828178811226, "learning_rate": 4.1674614528752604e-07, "loss": 0.2375, "step": 43280 }, { "epoch": 2.3075077956344447, "grad_norm": 1.7940395279305905, "learning_rate": 4.161366669674173e-07, "loss": 0.2326, "step": 43290 }, { "epoch": 2.308040830468271, "grad_norm": 1.9452268109562, "learning_rate": 4.1552756286030734e-07, "loss": 0.2303, "step": 43300 }, { "epoch": 2.3085738653020975, "grad_norm": 1.8500163335966857, "learning_rate": 4.149188331764946e-07, "loss": 0.234, "step": 43310 }, { "epoch": 2.309106900135924, "grad_norm": 1.9389897917401564, "learning_rate": 4.143104781261479e-07, "loss": 0.2196, "step": 43320 }, { "epoch": 2.3096399349697503, "grad_norm": 1.836229719492679, "learning_rate": 4.137024979193073e-07, "loss": 0.2321, "step": 43330 }, { "epoch": 2.3101729698035767, "grad_norm": 2.003268492653125, "learning_rate": 4.1309489276588286e-07, "loss": 0.2272, "step": 43340 }, { "epoch": 2.310706004637403, "grad_norm": 1.9834491676416712, "learning_rate": 4.1248766287565557e-07, "loss": 0.242, "step": 43350 }, { "epoch": 2.3112390394712294, "grad_norm": 1.6075310032902455, "learning_rate": 4.11880808458277e-07, "loss": 0.2277, "step": 43360 }, { "epoch": 2.311772074305056, "grad_norm": 1.843460793192278, "learning_rate": 4.112743297232681e-07, "loss": 0.2271, "step": 43370 }, { "epoch": 2.312305109138882, "grad_norm": 1.764649918319197, "learning_rate": 4.106682268800209e-07, "loss": 0.2342, "step": 43380 }, { "epoch": 2.3128381439727086, "grad_norm": 1.882159740560088, "learning_rate": 4.1006250013779815e-07, "loss": 0.2379, "step": 43390 }, { "epoch": 2.313371178806535, "grad_norm": 1.7846466642214778, "learning_rate": 4.094571497057315e-07, "loss": 0.2436, "step": 43400 }, { "epoch": 2.3139042136403614, "grad_norm": 1.7503959927259862, "learning_rate": 4.088521757928234e-07, "loss": 0.2282, "step": 43410 }, { "epoch": 2.3144372484741877, "grad_norm": 1.9495438425812188, "learning_rate": 4.082475786079463e-07, "loss": 0.2181, "step": 43420 }, { "epoch": 2.314970283308014, "grad_norm": 2.067940658477297, "learning_rate": 4.0764335835984254e-07, "loss": 0.2316, "step": 43430 }, { "epoch": 2.3155033181418405, "grad_norm": 1.9238285432771345, "learning_rate": 4.070395152571244e-07, "loss": 0.2303, "step": 43440 }, { "epoch": 2.316036352975667, "grad_norm": 1.7596627263325204, "learning_rate": 4.064360495082737e-07, "loss": 0.2391, "step": 43450 }, { "epoch": 2.3165693878094933, "grad_norm": 1.8634308869698077, "learning_rate": 4.0583296132164273e-07, "loss": 0.2313, "step": 43460 }, { "epoch": 2.3171024226433197, "grad_norm": 1.9099722976756353, "learning_rate": 4.0523025090545163e-07, "loss": 0.2272, "step": 43470 }, { "epoch": 2.317635457477146, "grad_norm": 1.9128345573092822, "learning_rate": 4.0462791846779185e-07, "loss": 0.2336, "step": 43480 }, { "epoch": 2.3181684923109724, "grad_norm": 2.0177637064665435, "learning_rate": 4.0402596421662367e-07, "loss": 0.2292, "step": 43490 }, { "epoch": 2.318701527144799, "grad_norm": 1.980972372806886, "learning_rate": 4.0342438835977706e-07, "loss": 0.2358, "step": 43500 }, { "epoch": 2.319234561978625, "grad_norm": 1.9184681073209102, "learning_rate": 4.02823191104951e-07, "loss": 0.2349, "step": 43510 }, { "epoch": 2.3197675968124516, "grad_norm": 1.7722290851541211, "learning_rate": 4.022223726597142e-07, "loss": 0.2316, "step": 43520 }, { "epoch": 2.320300631646278, "grad_norm": 1.8864562719843854, "learning_rate": 4.016219332315041e-07, "loss": 0.2326, "step": 43530 }, { "epoch": 2.3208336664801044, "grad_norm": 1.8069693763879173, "learning_rate": 4.010218730276281e-07, "loss": 0.2275, "step": 43540 }, { "epoch": 2.3213667013139307, "grad_norm": 2.0232109121801005, "learning_rate": 4.0042219225526084e-07, "loss": 0.2376, "step": 43550 }, { "epoch": 2.321899736147757, "grad_norm": 2.003864463839099, "learning_rate": 3.9982289112144816e-07, "loss": 0.2452, "step": 43560 }, { "epoch": 2.3224327709815835, "grad_norm": 1.9419991368143972, "learning_rate": 3.992239698331034e-07, "loss": 0.2249, "step": 43570 }, { "epoch": 2.32296580581541, "grad_norm": 1.9471153096981155, "learning_rate": 3.9862542859700947e-07, "loss": 0.2285, "step": 43580 }, { "epoch": 2.3234988406492363, "grad_norm": 2.000337669290855, "learning_rate": 3.9802726761981753e-07, "loss": 0.2359, "step": 43590 }, { "epoch": 2.3240318754830627, "grad_norm": 1.8254717959218814, "learning_rate": 3.974294871080482e-07, "loss": 0.2361, "step": 43600 }, { "epoch": 2.324564910316889, "grad_norm": 1.8650594242645446, "learning_rate": 3.968320872680897e-07, "loss": 0.2375, "step": 43610 }, { "epoch": 2.3250979451507154, "grad_norm": 1.725944562856312, "learning_rate": 3.9623506830619977e-07, "loss": 0.2246, "step": 43620 }, { "epoch": 2.325630979984542, "grad_norm": 1.91347340765011, "learning_rate": 3.9563843042850474e-07, "loss": 0.2488, "step": 43630 }, { "epoch": 2.326164014818368, "grad_norm": 1.6147853364370393, "learning_rate": 3.950421738409978e-07, "loss": 0.2291, "step": 43640 }, { "epoch": 2.3266970496521946, "grad_norm": 1.9493235160935352, "learning_rate": 3.944462987495419e-07, "loss": 0.2422, "step": 43650 }, { "epoch": 2.327230084486021, "grad_norm": 1.8332262940892994, "learning_rate": 3.938508053598684e-07, "loss": 0.2316, "step": 43660 }, { "epoch": 2.3277631193198474, "grad_norm": 1.7974237308584196, "learning_rate": 3.932556938775761e-07, "loss": 0.2303, "step": 43670 }, { "epoch": 2.3282961541536737, "grad_norm": 1.7566820981179891, "learning_rate": 3.9266096450813234e-07, "loss": 0.227, "step": 43680 }, { "epoch": 2.3288291889875, "grad_norm": 1.8340465834411588, "learning_rate": 3.920666174568725e-07, "loss": 0.2266, "step": 43690 }, { "epoch": 2.3293622238213265, "grad_norm": 1.839618194147695, "learning_rate": 3.9147265292900015e-07, "loss": 0.239, "step": 43700 }, { "epoch": 2.329895258655153, "grad_norm": 1.703268206510014, "learning_rate": 3.9087907112958655e-07, "loss": 0.233, "step": 43710 }, { "epoch": 2.3304282934889793, "grad_norm": 1.741300518072399, "learning_rate": 3.9028587226357044e-07, "loss": 0.2335, "step": 43720 }, { "epoch": 2.3309613283228057, "grad_norm": 2.047601950855497, "learning_rate": 3.8969305653575883e-07, "loss": 0.2269, "step": 43730 }, { "epoch": 2.331494363156632, "grad_norm": 1.8594699461092736, "learning_rate": 3.8910062415082665e-07, "loss": 0.228, "step": 43740 }, { "epoch": 2.3320273979904584, "grad_norm": 1.9306105713331234, "learning_rate": 3.8850857531331595e-07, "loss": 0.2404, "step": 43750 }, { "epoch": 2.332560432824285, "grad_norm": 1.8770415640069713, "learning_rate": 3.8791691022763694e-07, "loss": 0.2403, "step": 43760 }, { "epoch": 2.333093467658111, "grad_norm": 1.9193973920535186, "learning_rate": 3.873256290980666e-07, "loss": 0.2374, "step": 43770 }, { "epoch": 2.333626502491938, "grad_norm": 1.853722157096408, "learning_rate": 3.867347321287501e-07, "loss": 0.234, "step": 43780 }, { "epoch": 2.3341595373257644, "grad_norm": 1.7944476421511701, "learning_rate": 3.8614421952369934e-07, "loss": 0.2338, "step": 43790 }, { "epoch": 2.334692572159591, "grad_norm": 1.7157673748499487, "learning_rate": 3.8555409148679436e-07, "loss": 0.2322, "step": 43800 }, { "epoch": 2.335225606993417, "grad_norm": 1.971296922817286, "learning_rate": 3.849643482217811e-07, "loss": 0.2252, "step": 43810 }, { "epoch": 2.3357586418272436, "grad_norm": 1.8794398375986827, "learning_rate": 3.8437498993227383e-07, "loss": 0.2235, "step": 43820 }, { "epoch": 2.33629167666107, "grad_norm": 1.7166018963175234, "learning_rate": 3.837860168217532e-07, "loss": 0.2302, "step": 43830 }, { "epoch": 2.3368247114948963, "grad_norm": 2.1268001172631745, "learning_rate": 3.8319742909356753e-07, "loss": 0.2319, "step": 43840 }, { "epoch": 2.3373577463287227, "grad_norm": 1.858222160507147, "learning_rate": 3.826092269509315e-07, "loss": 0.2369, "step": 43850 }, { "epoch": 2.337890781162549, "grad_norm": 1.786878946659758, "learning_rate": 3.8202141059692715e-07, "loss": 0.2359, "step": 43860 }, { "epoch": 2.3384238159963755, "grad_norm": 1.7700973961809217, "learning_rate": 3.8143398023450264e-07, "loss": 0.2392, "step": 43870 }, { "epoch": 2.338956850830202, "grad_norm": 1.8965529849909888, "learning_rate": 3.8084693606647396e-07, "loss": 0.2387, "step": 43880 }, { "epoch": 2.3394898856640283, "grad_norm": 2.0219125392641604, "learning_rate": 3.8026027829552227e-07, "loss": 0.229, "step": 43890 }, { "epoch": 2.3400229204978547, "grad_norm": 1.575383109924614, "learning_rate": 3.7967400712419656e-07, "loss": 0.2367, "step": 43900 }, { "epoch": 2.340555955331681, "grad_norm": 1.7119252406645349, "learning_rate": 3.7908812275491177e-07, "loss": 0.2298, "step": 43910 }, { "epoch": 2.3410889901655074, "grad_norm": 1.9026835919537153, "learning_rate": 3.7850262538994956e-07, "loss": 0.2394, "step": 43920 }, { "epoch": 2.341622024999334, "grad_norm": 1.60726293277227, "learning_rate": 3.7791751523145776e-07, "loss": 0.227, "step": 43930 }, { "epoch": 2.34215505983316, "grad_norm": 1.8636574122114802, "learning_rate": 3.773327924814507e-07, "loss": 0.2438, "step": 43940 }, { "epoch": 2.3426880946669866, "grad_norm": 2.0283459079073896, "learning_rate": 3.767484573418088e-07, "loss": 0.2335, "step": 43950 }, { "epoch": 2.343221129500813, "grad_norm": 1.7168210068373488, "learning_rate": 3.761645100142788e-07, "loss": 0.2338, "step": 43960 }, { "epoch": 2.3437541643346393, "grad_norm": 1.708005975434955, "learning_rate": 3.75580950700474e-07, "loss": 0.2248, "step": 43970 }, { "epoch": 2.3442871991684657, "grad_norm": 1.9192579123528652, "learning_rate": 3.7499777960187197e-07, "loss": 0.2323, "step": 43980 }, { "epoch": 2.344820234002292, "grad_norm": 2.0162790446700325, "learning_rate": 3.7441499691981806e-07, "loss": 0.234, "step": 43990 }, { "epoch": 2.3453532688361185, "grad_norm": 1.9992310121658998, "learning_rate": 3.738326028555231e-07, "loss": 0.2351, "step": 44000 }, { "epoch": 2.345886303669945, "grad_norm": 2.081128203600095, "learning_rate": 3.732505976100635e-07, "loss": 0.2346, "step": 44010 }, { "epoch": 2.3464193385037713, "grad_norm": 2.0993624655991776, "learning_rate": 3.726689813843814e-07, "loss": 0.2296, "step": 44020 }, { "epoch": 2.3469523733375977, "grad_norm": 1.9986459316718512, "learning_rate": 3.720877543792849e-07, "loss": 0.2269, "step": 44030 }, { "epoch": 2.347485408171424, "grad_norm": 1.6067567539726733, "learning_rate": 3.7150691679544727e-07, "loss": 0.2315, "step": 44040 }, { "epoch": 2.3480184430052504, "grad_norm": 1.9850228955740072, "learning_rate": 3.7092646883340837e-07, "loss": 0.2399, "step": 44050 }, { "epoch": 2.348551477839077, "grad_norm": 1.895731229034648, "learning_rate": 3.703464106935716e-07, "loss": 0.2373, "step": 44060 }, { "epoch": 2.349084512672903, "grad_norm": 2.138490873402173, "learning_rate": 3.697667425762075e-07, "loss": 0.2333, "step": 44070 }, { "epoch": 2.3496175475067296, "grad_norm": 1.8225858237580619, "learning_rate": 3.6918746468145166e-07, "loss": 0.236, "step": 44080 }, { "epoch": 2.350150582340556, "grad_norm": 1.8938366352265967, "learning_rate": 3.686085772093042e-07, "loss": 0.2483, "step": 44090 }, { "epoch": 2.3506836171743823, "grad_norm": 1.9139577165775592, "learning_rate": 3.6803008035963126e-07, "loss": 0.2321, "step": 44100 }, { "epoch": 2.3512166520082087, "grad_norm": 2.053253716019063, "learning_rate": 3.6745197433216374e-07, "loss": 0.2354, "step": 44110 }, { "epoch": 2.351749686842035, "grad_norm": 1.8316551235267415, "learning_rate": 3.668742593264975e-07, "loss": 0.2391, "step": 44120 }, { "epoch": 2.3522827216758615, "grad_norm": 1.904036068525322, "learning_rate": 3.662969355420939e-07, "loss": 0.2308, "step": 44130 }, { "epoch": 2.352815756509688, "grad_norm": 1.8338111282682406, "learning_rate": 3.657200031782788e-07, "loss": 0.2338, "step": 44140 }, { "epoch": 2.3533487913435143, "grad_norm": 1.8796984246894208, "learning_rate": 3.6514346243424236e-07, "loss": 0.2402, "step": 44150 }, { "epoch": 2.3538818261773407, "grad_norm": 1.729343016486441, "learning_rate": 3.645673135090406e-07, "loss": 0.2279, "step": 44160 }, { "epoch": 2.354414861011167, "grad_norm": 1.942982037980061, "learning_rate": 3.639915566015939e-07, "loss": 0.2378, "step": 44170 }, { "epoch": 2.3549478958449934, "grad_norm": 1.8061841086698494, "learning_rate": 3.6341619191068706e-07, "loss": 0.2354, "step": 44180 }, { "epoch": 2.35548093067882, "grad_norm": 1.7595150339179584, "learning_rate": 3.6284121963496963e-07, "loss": 0.2398, "step": 44190 }, { "epoch": 2.356013965512646, "grad_norm": 1.80208043235444, "learning_rate": 3.622666399729558e-07, "loss": 0.2408, "step": 44200 }, { "epoch": 2.3565470003464726, "grad_norm": 1.9068596203402068, "learning_rate": 3.616924531230238e-07, "loss": 0.236, "step": 44210 }, { "epoch": 2.357080035180299, "grad_norm": 1.827092880708015, "learning_rate": 3.611186592834173e-07, "loss": 0.2336, "step": 44220 }, { "epoch": 2.3576130700141253, "grad_norm": 1.586436840595486, "learning_rate": 3.6054525865224236e-07, "loss": 0.2293, "step": 44230 }, { "epoch": 2.3581461048479517, "grad_norm": 1.866734222890412, "learning_rate": 3.5997225142747077e-07, "loss": 0.231, "step": 44240 }, { "epoch": 2.358679139681778, "grad_norm": 1.9085351986533177, "learning_rate": 3.5939963780693833e-07, "loss": 0.2411, "step": 44250 }, { "epoch": 2.3592121745156045, "grad_norm": 1.941876936676392, "learning_rate": 3.588274179883448e-07, "loss": 0.2398, "step": 44260 }, { "epoch": 2.359745209349431, "grad_norm": 1.7552977939621326, "learning_rate": 3.582555921692537e-07, "loss": 0.2338, "step": 44270 }, { "epoch": 2.3602782441832573, "grad_norm": 1.87750006585796, "learning_rate": 3.576841605470928e-07, "loss": 0.2365, "step": 44280 }, { "epoch": 2.3608112790170837, "grad_norm": 1.8335900004663377, "learning_rate": 3.5711312331915386e-07, "loss": 0.2349, "step": 44290 }, { "epoch": 2.36134431385091, "grad_norm": 1.7586720884536853, "learning_rate": 3.5654248068259197e-07, "loss": 0.2331, "step": 44300 }, { "epoch": 2.3618773486847364, "grad_norm": 1.8497322933294977, "learning_rate": 3.5597223283442727e-07, "loss": 0.2325, "step": 44310 }, { "epoch": 2.362410383518563, "grad_norm": 1.948999700885997, "learning_rate": 3.5540237997154136e-07, "loss": 0.2347, "step": 44320 }, { "epoch": 2.362943418352389, "grad_norm": 2.089930263418612, "learning_rate": 3.5483292229068135e-07, "loss": 0.233, "step": 44330 }, { "epoch": 2.3634764531862156, "grad_norm": 1.8252607559134169, "learning_rate": 3.5426385998845756e-07, "loss": 0.2257, "step": 44340 }, { "epoch": 2.364009488020042, "grad_norm": 1.910239711368906, "learning_rate": 3.5369519326134326e-07, "loss": 0.2414, "step": 44350 }, { "epoch": 2.3645425228538683, "grad_norm": 1.9372822163075252, "learning_rate": 3.531269223056754e-07, "loss": 0.2369, "step": 44360 }, { "epoch": 2.3650755576876947, "grad_norm": 1.7658933635224532, "learning_rate": 3.525590473176547e-07, "loss": 0.2282, "step": 44370 }, { "epoch": 2.365608592521521, "grad_norm": 1.8203418278159127, "learning_rate": 3.5199156849334453e-07, "loss": 0.2285, "step": 44380 }, { "epoch": 2.3661416273553475, "grad_norm": 1.903342823333316, "learning_rate": 3.5142448602867225e-07, "loss": 0.2301, "step": 44390 }, { "epoch": 2.366674662189174, "grad_norm": 1.914778583067269, "learning_rate": 3.50857800119427e-07, "loss": 0.2337, "step": 44400 }, { "epoch": 2.3672076970230003, "grad_norm": 1.7224811445103003, "learning_rate": 3.5029151096126245e-07, "loss": 0.2384, "step": 44410 }, { "epoch": 2.3677407318568267, "grad_norm": 1.82220783002562, "learning_rate": 3.497256187496946e-07, "loss": 0.2307, "step": 44420 }, { "epoch": 2.3682737666906535, "grad_norm": 1.7308138081951363, "learning_rate": 3.4916012368010256e-07, "loss": 0.2266, "step": 44430 }, { "epoch": 2.36880680152448, "grad_norm": 1.8043586888479446, "learning_rate": 3.4859502594772853e-07, "loss": 0.23, "step": 44440 }, { "epoch": 2.3693398363583063, "grad_norm": 1.8561944883823565, "learning_rate": 3.4803032574767687e-07, "loss": 0.231, "step": 44450 }, { "epoch": 2.3698728711921326, "grad_norm": 1.8076154914225688, "learning_rate": 3.474660232749153e-07, "loss": 0.2213, "step": 44460 }, { "epoch": 2.370405906025959, "grad_norm": 1.927947948267964, "learning_rate": 3.469021187242741e-07, "loss": 0.227, "step": 44470 }, { "epoch": 2.3709389408597854, "grad_norm": 1.9109891051459649, "learning_rate": 3.4633861229044634e-07, "loss": 0.2304, "step": 44480 }, { "epoch": 2.371471975693612, "grad_norm": 1.933937067531113, "learning_rate": 3.4577550416798676e-07, "loss": 0.2315, "step": 44490 }, { "epoch": 2.372005010527438, "grad_norm": 1.8457800873991088, "learning_rate": 3.4521279455131355e-07, "loss": 0.235, "step": 44500 }, { "epoch": 2.3725380453612646, "grad_norm": 1.9410414547652006, "learning_rate": 3.4465048363470694e-07, "loss": 0.2375, "step": 44510 }, { "epoch": 2.373071080195091, "grad_norm": 1.727851345591523, "learning_rate": 3.4408857161230954e-07, "loss": 0.2261, "step": 44520 }, { "epoch": 2.3736041150289173, "grad_norm": 1.896459098773258, "learning_rate": 3.435270586781263e-07, "loss": 0.2408, "step": 44530 }, { "epoch": 2.3741371498627437, "grad_norm": 1.9590896210390245, "learning_rate": 3.4296594502602425e-07, "loss": 0.2342, "step": 44540 }, { "epoch": 2.37467018469657, "grad_norm": 1.8361239227182435, "learning_rate": 3.424052308497326e-07, "loss": 0.2373, "step": 44550 }, { "epoch": 2.3752032195303965, "grad_norm": 1.8046521580889512, "learning_rate": 3.4184491634284324e-07, "loss": 0.2288, "step": 44560 }, { "epoch": 2.375736254364223, "grad_norm": 1.868681825665443, "learning_rate": 3.4128500169880857e-07, "loss": 0.2333, "step": 44570 }, { "epoch": 2.3762692891980493, "grad_norm": 1.8731354530554936, "learning_rate": 3.407254871109442e-07, "loss": 0.2329, "step": 44580 }, { "epoch": 2.3768023240318756, "grad_norm": 1.8840990667162818, "learning_rate": 3.4016637277242737e-07, "loss": 0.2326, "step": 44590 }, { "epoch": 2.377335358865702, "grad_norm": 1.7767063171137862, "learning_rate": 3.396076588762969e-07, "loss": 0.2325, "step": 44600 }, { "epoch": 2.3778683936995284, "grad_norm": 1.9163964727088443, "learning_rate": 3.3904934561545373e-07, "loss": 0.2372, "step": 44610 }, { "epoch": 2.378401428533355, "grad_norm": 1.836263090050185, "learning_rate": 3.3849143318266034e-07, "loss": 0.2261, "step": 44620 }, { "epoch": 2.378934463367181, "grad_norm": 1.8265048798532915, "learning_rate": 3.3793392177054025e-07, "loss": 0.2406, "step": 44630 }, { "epoch": 2.3794674982010076, "grad_norm": 2.0797589363804834, "learning_rate": 3.373768115715799e-07, "loss": 0.2323, "step": 44640 }, { "epoch": 2.380000533034834, "grad_norm": 1.8147188310523423, "learning_rate": 3.3682010277812504e-07, "loss": 0.2422, "step": 44650 }, { "epoch": 2.3805335678686603, "grad_norm": 2.01982338281718, "learning_rate": 3.362637955823849e-07, "loss": 0.2361, "step": 44660 }, { "epoch": 2.3810666027024867, "grad_norm": 1.8751944560296212, "learning_rate": 3.357078901764291e-07, "loss": 0.2364, "step": 44670 }, { "epoch": 2.381599637536313, "grad_norm": 1.75777002273749, "learning_rate": 3.351523867521886e-07, "loss": 0.2325, "step": 44680 }, { "epoch": 2.3821326723701395, "grad_norm": 1.7277700396005058, "learning_rate": 3.3459728550145583e-07, "loss": 0.2367, "step": 44690 }, { "epoch": 2.382665707203966, "grad_norm": 2.297881216656287, "learning_rate": 3.340425866158841e-07, "loss": 0.2417, "step": 44700 }, { "epoch": 2.3831987420377923, "grad_norm": 1.761500407236202, "learning_rate": 3.3348829028698783e-07, "loss": 0.2333, "step": 44710 }, { "epoch": 2.3837317768716186, "grad_norm": 1.8977749758015268, "learning_rate": 3.329343967061427e-07, "loss": 0.2286, "step": 44720 }, { "epoch": 2.384264811705445, "grad_norm": 2.0101267240735914, "learning_rate": 3.3238090606458544e-07, "loss": 0.234, "step": 44730 }, { "epoch": 2.3847978465392714, "grad_norm": 1.8596815489904357, "learning_rate": 3.318278185534126e-07, "loss": 0.227, "step": 44740 }, { "epoch": 2.385330881373098, "grad_norm": 1.6824458830117548, "learning_rate": 3.3127513436358324e-07, "loss": 0.2389, "step": 44750 }, { "epoch": 2.385863916206924, "grad_norm": 1.6644788570158902, "learning_rate": 3.307228536859153e-07, "loss": 0.2419, "step": 44760 }, { "epoch": 2.3863969510407506, "grad_norm": 1.8699469264626025, "learning_rate": 3.3017097671108876e-07, "loss": 0.2446, "step": 44770 }, { "epoch": 2.386929985874577, "grad_norm": 2.0404994297497745, "learning_rate": 3.29619503629644e-07, "loss": 0.2275, "step": 44780 }, { "epoch": 2.3874630207084033, "grad_norm": 1.7679521272542127, "learning_rate": 3.290684346319815e-07, "loss": 0.235, "step": 44790 }, { "epoch": 2.3879960555422297, "grad_norm": 1.8270346535223767, "learning_rate": 3.285177699083629e-07, "loss": 0.236, "step": 44800 }, { "epoch": 2.388529090376056, "grad_norm": 1.7096500971707846, "learning_rate": 3.2796750964890946e-07, "loss": 0.2326, "step": 44810 }, { "epoch": 2.3890621252098825, "grad_norm": 2.0496169854011987, "learning_rate": 3.274176540436034e-07, "loss": 0.2379, "step": 44820 }, { "epoch": 2.389595160043709, "grad_norm": 1.6942251010521172, "learning_rate": 3.2686820328228734e-07, "loss": 0.2338, "step": 44830 }, { "epoch": 2.3901281948775353, "grad_norm": 2.0017609267722407, "learning_rate": 3.263191575546632e-07, "loss": 0.2375, "step": 44840 }, { "epoch": 2.3906612297113616, "grad_norm": 2.077435475975477, "learning_rate": 3.2577051705029365e-07, "loss": 0.229, "step": 44850 }, { "epoch": 2.391194264545188, "grad_norm": 1.7167448395510585, "learning_rate": 3.25222281958602e-07, "loss": 0.2325, "step": 44860 }, { "epoch": 2.3917272993790144, "grad_norm": 1.9193049587787723, "learning_rate": 3.246744524688708e-07, "loss": 0.227, "step": 44870 }, { "epoch": 2.392260334212841, "grad_norm": 1.9681925185217501, "learning_rate": 3.2412702877024274e-07, "loss": 0.2368, "step": 44880 }, { "epoch": 2.392793369046667, "grad_norm": 1.74948522690138, "learning_rate": 3.235800110517206e-07, "loss": 0.2387, "step": 44890 }, { "epoch": 2.3933264038804936, "grad_norm": 1.921881904525258, "learning_rate": 3.2303339950216687e-07, "loss": 0.2352, "step": 44900 }, { "epoch": 2.39385943871432, "grad_norm": 2.0156324457037624, "learning_rate": 3.224871943103038e-07, "loss": 0.2255, "step": 44910 }, { "epoch": 2.3943924735481463, "grad_norm": 2.0184942248269238, "learning_rate": 3.219413956647138e-07, "loss": 0.2395, "step": 44920 }, { "epoch": 2.3949255083819727, "grad_norm": 1.977277192837676, "learning_rate": 3.213960037538376e-07, "loss": 0.2382, "step": 44930 }, { "epoch": 2.395458543215799, "grad_norm": 1.9722131989752463, "learning_rate": 3.208510187659769e-07, "loss": 0.2255, "step": 44940 }, { "epoch": 2.3959915780496255, "grad_norm": 1.7815611620797984, "learning_rate": 3.2030644088929226e-07, "loss": 0.2379, "step": 44950 }, { "epoch": 2.396524612883452, "grad_norm": 1.7455627849084154, "learning_rate": 3.1976227031180387e-07, "loss": 0.2239, "step": 44960 }, { "epoch": 2.3970576477172783, "grad_norm": 1.6978419794211683, "learning_rate": 3.1921850722139123e-07, "loss": 0.2334, "step": 44970 }, { "epoch": 2.3975906825511046, "grad_norm": 2.4973798112075034, "learning_rate": 3.1867515180579317e-07, "loss": 0.2302, "step": 44980 }, { "epoch": 2.398123717384931, "grad_norm": 1.9424031813213634, "learning_rate": 3.1813220425260757e-07, "loss": 0.2343, "step": 44990 }, { "epoch": 2.3986567522187574, "grad_norm": 1.870039107482244, "learning_rate": 3.175896647492924e-07, "loss": 0.2325, "step": 45000 }, { "epoch": 2.399189787052584, "grad_norm": 1.7563257979622962, "learning_rate": 3.170475334831631e-07, "loss": 0.2307, "step": 45010 }, { "epoch": 2.39972282188641, "grad_norm": 1.9611417379053857, "learning_rate": 3.1650581064139533e-07, "loss": 0.231, "step": 45020 } ], "logging_steps": 10, "max_steps": 56280, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 5628, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3453884985475072.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }