{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.0, "eval_steps": 500, "global_step": 325038, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.307188697936857e-05, "learning_rate": 5e-09, "loss": 15.3469, "step": 1 }, { "epoch": 0.0008614377395873713, "learning_rate": 1e-07, "loss": 13.5266, "step": 20 }, { "epoch": 0.0017228754791747427, "learning_rate": 2e-07, "loss": 12.4249, "step": 40 }, { "epoch": 0.002584313218762114, "learning_rate": 3e-07, "loss": 11.9215, "step": 60 }, { "epoch": 0.0034457509583494853, "learning_rate": 4e-07, "loss": 10.5898, "step": 80 }, { "epoch": 0.004307188697936857, "learning_rate": 5e-07, "loss": 9.9173, "step": 100 }, { "epoch": 0.005168626437524228, "learning_rate": 6e-07, "loss": 8.5743, "step": 120 }, { "epoch": 0.0060300641771116, "learning_rate": 7e-07, "loss": 7.9646, "step": 140 }, { "epoch": 0.006891501916698971, "learning_rate": 8e-07, "loss": 7.5993, "step": 160 }, { "epoch": 0.007752939656286342, "learning_rate": 9e-07, "loss": 7.2787, "step": 180 }, { "epoch": 0.008614377395873713, "learning_rate": 1e-06, "loss": 7.1685, "step": 200 }, { "epoch": 0.009475815135461085, "learning_rate": 9.99951518024521e-07, "loss": 6.7489, "step": 220 }, { "epoch": 0.010337252875048455, "learning_rate": 9.999030360490423e-07, "loss": 7.4429, "step": 240 }, { "epoch": 0.011198690614635827, "learning_rate": 9.998545540735632e-07, "loss": 7.4136, "step": 260 }, { "epoch": 0.0120601283542232, "learning_rate": 9.998060720980845e-07, "loss": 6.8118, "step": 280 }, { "epoch": 0.01292156609381057, "learning_rate": 9.997575901226053e-07, "loss": 7.0537, "step": 300 }, { "epoch": 0.013783003833397941, "learning_rate": 9.997091081471266e-07, "loss": 6.7556, "step": 320 }, { "epoch": 0.014644441572985313, "learning_rate": 9.996606261716477e-07, "loss": 7.0601, "step": 340 }, { "epoch": 0.015505879312572683, "learning_rate": 9.996121441961687e-07, "loss": 6.8547, "step": 360 }, { "epoch": 0.016367317052160057, "learning_rate": 9.9956366222069e-07, "loss": 6.4108, "step": 380 }, { "epoch": 0.017228754791747427, "learning_rate": 9.995151802452109e-07, "loss": 6.5316, "step": 400 }, { "epoch": 0.018090192531334797, "learning_rate": 9.994666982697322e-07, "loss": 6.4739, "step": 420 }, { "epoch": 0.01895163027092217, "learning_rate": 9.994182162942532e-07, "loss": 6.8201, "step": 440 }, { "epoch": 0.01981306801050954, "learning_rate": 9.993697343187743e-07, "loss": 6.4731, "step": 460 }, { "epoch": 0.02067450575009691, "learning_rate": 9.993212523432956e-07, "loss": 6.5669, "step": 480 }, { "epoch": 0.021535943489684285, "learning_rate": 9.992727703678165e-07, "loss": 6.6863, "step": 500 }, { "epoch": 0.022397381229271655, "learning_rate": 9.992242883923377e-07, "loss": 6.7291, "step": 520 }, { "epoch": 0.023258818968859025, "learning_rate": 9.991758064168586e-07, "loss": 6.2971, "step": 540 }, { "epoch": 0.0241202567084464, "learning_rate": 9.991273244413799e-07, "loss": 6.1858, "step": 560 }, { "epoch": 0.02498169444803377, "learning_rate": 9.99078842465901e-07, "loss": 6.5758, "step": 580 }, { "epoch": 0.02584313218762114, "learning_rate": 9.99030360490422e-07, "loss": 6.186, "step": 600 }, { "epoch": 0.026704569927208512, "learning_rate": 9.989818785149433e-07, "loss": 6.4182, "step": 620 }, { "epoch": 0.027566007666795882, "learning_rate": 9.989333965394644e-07, "loss": 6.263, "step": 640 }, { "epoch": 0.028427445406383253, "learning_rate": 9.988849145639854e-07, "loss": 6.0137, "step": 660 }, { "epoch": 0.029288883145970626, "learning_rate": 9.988364325885065e-07, "loss": 6.2632, "step": 680 }, { "epoch": 0.030150320885557996, "learning_rate": 9.987879506130276e-07, "loss": 6.3874, "step": 700 }, { "epoch": 0.031011758625145366, "learning_rate": 9.987394686375489e-07, "loss": 6.338, "step": 720 }, { "epoch": 0.03187319636473274, "learning_rate": 9.986909866620697e-07, "loss": 6.4615, "step": 740 }, { "epoch": 0.032734634104320114, "learning_rate": 9.98642504686591e-07, "loss": 6.3216, "step": 760 }, { "epoch": 0.033596071843907484, "learning_rate": 9.985940227111119e-07, "loss": 6.3571, "step": 780 }, { "epoch": 0.034457509583494854, "learning_rate": 9.985455407356332e-07, "loss": 6.2426, "step": 800 }, { "epoch": 0.035318947323082224, "learning_rate": 9.984970587601542e-07, "loss": 6.2687, "step": 820 }, { "epoch": 0.036180385062669594, "learning_rate": 9.984485767846753e-07, "loss": 6.2717, "step": 840 }, { "epoch": 0.037041822802256964, "learning_rate": 9.984000948091966e-07, "loss": 6.1245, "step": 860 }, { "epoch": 0.03790326054184434, "learning_rate": 9.983516128337174e-07, "loss": 6.281, "step": 880 }, { "epoch": 0.03876469828143171, "learning_rate": 9.983031308582387e-07, "loss": 6.3632, "step": 900 }, { "epoch": 0.03962613602101908, "learning_rate": 9.982546488827596e-07, "loss": 6.3535, "step": 920 }, { "epoch": 0.04048757376060645, "learning_rate": 9.982061669072809e-07, "loss": 6.0799, "step": 940 }, { "epoch": 0.04134901150019382, "learning_rate": 9.98157684931802e-07, "loss": 6.4344, "step": 960 }, { "epoch": 0.04221044923978119, "learning_rate": 9.98109202956323e-07, "loss": 6.2517, "step": 980 }, { "epoch": 0.04307188697936857, "learning_rate": 9.980607209808443e-07, "loss": 5.9678, "step": 1000 }, { "epoch": 0.04393332471895594, "learning_rate": 9.980122390053654e-07, "loss": 6.0589, "step": 1020 }, { "epoch": 0.04479476245854331, "learning_rate": 9.979637570298864e-07, "loss": 6.3632, "step": 1040 }, { "epoch": 0.04565620019813068, "learning_rate": 9.979152750544075e-07, "loss": 6.265, "step": 1060 }, { "epoch": 0.04651763793771805, "learning_rate": 9.978667930789286e-07, "loss": 6.0993, "step": 1080 }, { "epoch": 0.04737907567730542, "learning_rate": 9.978183111034499e-07, "loss": 6.1147, "step": 1100 }, { "epoch": 0.0482405134168928, "learning_rate": 9.977698291279707e-07, "loss": 6.1485, "step": 1120 }, { "epoch": 0.04910195115648017, "learning_rate": 9.97721347152492e-07, "loss": 5.9857, "step": 1140 }, { "epoch": 0.04996338889606754, "learning_rate": 9.976728651770129e-07, "loss": 6.0165, "step": 1160 }, { "epoch": 0.05082482663565491, "learning_rate": 9.976243832015342e-07, "loss": 6.176, "step": 1180 }, { "epoch": 0.05168626437524228, "learning_rate": 9.975759012260552e-07, "loss": 6.2797, "step": 1200 }, { "epoch": 0.05254770211482965, "learning_rate": 9.975274192505763e-07, "loss": 6.1033, "step": 1220 }, { "epoch": 0.053409139854417025, "learning_rate": 9.974789372750976e-07, "loss": 5.9836, "step": 1240 }, { "epoch": 0.054270577594004395, "learning_rate": 9.974304552996186e-07, "loss": 6.2921, "step": 1260 }, { "epoch": 0.055132015333591765, "learning_rate": 9.973819733241397e-07, "loss": 6.3446, "step": 1280 }, { "epoch": 0.055993453073179135, "learning_rate": 9.973334913486606e-07, "loss": 6.1988, "step": 1300 }, { "epoch": 0.056854890812766505, "learning_rate": 9.972850093731819e-07, "loss": 6.1577, "step": 1320 }, { "epoch": 0.057716328552353875, "learning_rate": 9.97236527397703e-07, "loss": 5.9894, "step": 1340 }, { "epoch": 0.05857776629194125, "learning_rate": 9.97188045422224e-07, "loss": 6.2019, "step": 1360 }, { "epoch": 0.05943920403152862, "learning_rate": 9.971395634467453e-07, "loss": 5.8516, "step": 1380 }, { "epoch": 0.06030064177111599, "learning_rate": 9.970910814712664e-07, "loss": 6.0954, "step": 1400 }, { "epoch": 0.06116207951070336, "learning_rate": 9.970425994957874e-07, "loss": 6.0892, "step": 1420 }, { "epoch": 0.06202351725029073, "learning_rate": 9.969941175203085e-07, "loss": 5.8398, "step": 1440 }, { "epoch": 0.0628849549898781, "learning_rate": 9.969456355448296e-07, "loss": 6.0239, "step": 1460 }, { "epoch": 0.06374639272946547, "learning_rate": 9.968971535693509e-07, "loss": 6.0563, "step": 1480 }, { "epoch": 0.06460783046905284, "learning_rate": 9.968486715938717e-07, "loss": 6.1168, "step": 1500 }, { "epoch": 0.06546926820864023, "learning_rate": 9.96800189618393e-07, "loss": 5.8443, "step": 1520 }, { "epoch": 0.0663307059482276, "learning_rate": 9.967517076429139e-07, "loss": 5.8785, "step": 1540 }, { "epoch": 0.06719214368781497, "learning_rate": 9.967032256674351e-07, "loss": 6.0316, "step": 1560 }, { "epoch": 0.06805358142740234, "learning_rate": 9.966547436919562e-07, "loss": 5.9318, "step": 1580 }, { "epoch": 0.06891501916698971, "learning_rate": 9.966062617164773e-07, "loss": 6.1837, "step": 1600 }, { "epoch": 0.06977645690657708, "learning_rate": 9.965577797409986e-07, "loss": 6.0558, "step": 1620 }, { "epoch": 0.07063789464616445, "learning_rate": 9.965092977655196e-07, "loss": 5.9914, "step": 1640 }, { "epoch": 0.07149933238575182, "learning_rate": 9.964608157900407e-07, "loss": 6.1015, "step": 1660 }, { "epoch": 0.07236077012533919, "learning_rate": 9.964123338145618e-07, "loss": 5.948, "step": 1680 }, { "epoch": 0.07322220786492656, "learning_rate": 9.963638518390829e-07, "loss": 5.8682, "step": 1700 }, { "epoch": 0.07408364560451393, "learning_rate": 9.963153698636041e-07, "loss": 6.0684, "step": 1720 }, { "epoch": 0.0749450833441013, "learning_rate": 9.96266887888125e-07, "loss": 5.7916, "step": 1740 }, { "epoch": 0.07580652108368868, "learning_rate": 9.962184059126463e-07, "loss": 6.0412, "step": 1760 }, { "epoch": 0.07666795882327605, "learning_rate": 9.961699239371674e-07, "loss": 6.0506, "step": 1780 }, { "epoch": 0.07752939656286342, "learning_rate": 9.961214419616884e-07, "loss": 5.8769, "step": 1800 }, { "epoch": 0.0783908343024508, "learning_rate": 9.960729599862095e-07, "loss": 5.7286, "step": 1820 }, { "epoch": 0.07925227204203816, "learning_rate": 9.960244780107306e-07, "loss": 5.8717, "step": 1840 }, { "epoch": 0.08011370978162553, "learning_rate": 9.959759960352519e-07, "loss": 5.7639, "step": 1860 }, { "epoch": 0.0809751475212129, "learning_rate": 9.959275140597727e-07, "loss": 6.1518, "step": 1880 }, { "epoch": 0.08183658526080027, "learning_rate": 9.95879032084294e-07, "loss": 5.8542, "step": 1900 }, { "epoch": 0.08269802300038764, "learning_rate": 9.95830550108815e-07, "loss": 5.8854, "step": 1920 }, { "epoch": 0.08355946073997501, "learning_rate": 9.957820681333361e-07, "loss": 5.6142, "step": 1940 }, { "epoch": 0.08442089847956238, "learning_rate": 9.957335861578572e-07, "loss": 5.9447, "step": 1960 }, { "epoch": 0.08528233621914975, "learning_rate": 9.956851041823783e-07, "loss": 5.7814, "step": 1980 }, { "epoch": 0.08614377395873714, "learning_rate": 9.956366222068996e-07, "loss": 5.7888, "step": 2000 }, { "epoch": 0.08700521169832451, "learning_rate": 9.955881402314206e-07, "loss": 5.6126, "step": 2020 }, { "epoch": 0.08786664943791188, "learning_rate": 9.955396582559417e-07, "loss": 5.6853, "step": 2040 }, { "epoch": 0.08872808717749925, "learning_rate": 9.954911762804628e-07, "loss": 5.7188, "step": 2060 }, { "epoch": 0.08958952491708662, "learning_rate": 9.954426943049839e-07, "loss": 5.8168, "step": 2080 }, { "epoch": 0.09045096265667399, "learning_rate": 9.953942123295051e-07, "loss": 5.5472, "step": 2100 }, { "epoch": 0.09131240039626136, "learning_rate": 9.95345730354026e-07, "loss": 5.5835, "step": 2120 }, { "epoch": 0.09217383813584873, "learning_rate": 9.952972483785473e-07, "loss": 5.9346, "step": 2140 }, { "epoch": 0.0930352758754361, "learning_rate": 9.952487664030683e-07, "loss": 5.9372, "step": 2160 }, { "epoch": 0.09389671361502347, "learning_rate": 9.952002844275894e-07, "loss": 6.0861, "step": 2180 }, { "epoch": 0.09475815135461084, "learning_rate": 9.951518024521105e-07, "loss": 5.7357, "step": 2200 }, { "epoch": 0.09561958909419822, "learning_rate": 9.951033204766316e-07, "loss": 5.9574, "step": 2220 }, { "epoch": 0.0964810268337856, "learning_rate": 9.950548385011528e-07, "loss": 5.6279, "step": 2240 }, { "epoch": 0.09734246457337296, "learning_rate": 9.95006356525674e-07, "loss": 5.8369, "step": 2260 }, { "epoch": 0.09820390231296033, "learning_rate": 9.94957874550195e-07, "loss": 5.7718, "step": 2280 }, { "epoch": 0.0990653400525477, "learning_rate": 9.94909392574716e-07, "loss": 5.8574, "step": 2300 }, { "epoch": 0.09992677779213507, "learning_rate": 9.948609105992371e-07, "loss": 5.5917, "step": 2320 }, { "epoch": 0.10078821553172244, "learning_rate": 9.948124286237584e-07, "loss": 5.573, "step": 2340 }, { "epoch": 0.10164965327130981, "learning_rate": 9.947639466482793e-07, "loss": 5.5742, "step": 2360 }, { "epoch": 0.10251109101089718, "learning_rate": 9.947154646728006e-07, "loss": 5.8785, "step": 2380 }, { "epoch": 0.10337252875048455, "learning_rate": 9.946669826973216e-07, "loss": 5.9297, "step": 2400 }, { "epoch": 0.10423396649007192, "learning_rate": 9.946185007218427e-07, "loss": 5.5759, "step": 2420 }, { "epoch": 0.1050954042296593, "learning_rate": 9.945700187463638e-07, "loss": 5.3543, "step": 2440 }, { "epoch": 0.10595684196924668, "learning_rate": 9.945215367708848e-07, "loss": 5.8447, "step": 2460 }, { "epoch": 0.10681827970883405, "learning_rate": 9.944730547954061e-07, "loss": 5.7428, "step": 2480 }, { "epoch": 0.10767971744842142, "learning_rate": 9.94424572819927e-07, "loss": 5.4967, "step": 2500 }, { "epoch": 0.10854115518800879, "learning_rate": 9.943760908444483e-07, "loss": 5.567, "step": 2520 }, { "epoch": 0.10940259292759616, "learning_rate": 9.943276088689693e-07, "loss": 5.8644, "step": 2540 }, { "epoch": 0.11026403066718353, "learning_rate": 9.942791268934904e-07, "loss": 5.4059, "step": 2560 }, { "epoch": 0.1111254684067709, "learning_rate": 9.942306449180115e-07, "loss": 5.6175, "step": 2580 }, { "epoch": 0.11198690614635827, "learning_rate": 9.941821629425326e-07, "loss": 5.5652, "step": 2600 }, { "epoch": 0.11284834388594564, "learning_rate": 9.941336809670538e-07, "loss": 5.5506, "step": 2620 }, { "epoch": 0.11370978162553301, "learning_rate": 9.94085198991575e-07, "loss": 5.5353, "step": 2640 }, { "epoch": 0.11457121936512038, "learning_rate": 9.94036717016096e-07, "loss": 5.678, "step": 2660 }, { "epoch": 0.11543265710470775, "learning_rate": 9.93988235040617e-07, "loss": 5.5166, "step": 2680 }, { "epoch": 0.11629409484429513, "learning_rate": 9.939397530651381e-07, "loss": 5.5444, "step": 2700 }, { "epoch": 0.1171555325838825, "learning_rate": 9.938912710896594e-07, "loss": 5.395, "step": 2720 }, { "epoch": 0.11801697032346987, "learning_rate": 9.938427891141803e-07, "loss": 5.7822, "step": 2740 }, { "epoch": 0.11887840806305724, "learning_rate": 9.937943071387016e-07, "loss": 5.5044, "step": 2760 }, { "epoch": 0.11973984580264461, "learning_rate": 9.937458251632226e-07, "loss": 5.309, "step": 2780 }, { "epoch": 0.12060128354223199, "learning_rate": 9.936973431877437e-07, "loss": 5.4511, "step": 2800 }, { "epoch": 0.12146272128181936, "learning_rate": 9.936488612122648e-07, "loss": 5.6822, "step": 2820 }, { "epoch": 0.12232415902140673, "learning_rate": 9.936003792367858e-07, "loss": 5.7788, "step": 2840 }, { "epoch": 0.1231855967609941, "learning_rate": 9.935518972613071e-07, "loss": 5.3378, "step": 2860 }, { "epoch": 0.12404703450058147, "learning_rate": 9.935034152858282e-07, "loss": 5.6238, "step": 2880 }, { "epoch": 0.12490847224016884, "learning_rate": 9.934549333103493e-07, "loss": 5.3651, "step": 2900 }, { "epoch": 0.1257699099797562, "learning_rate": 9.934064513348703e-07, "loss": 5.6061, "step": 2920 }, { "epoch": 0.1266313477193436, "learning_rate": 9.933579693593914e-07, "loss": 5.6688, "step": 2940 }, { "epoch": 0.12749278545893095, "learning_rate": 9.933094873839125e-07, "loss": 5.5402, "step": 2960 }, { "epoch": 0.12835422319851833, "learning_rate": 9.932610054084336e-07, "loss": 5.3776, "step": 2980 }, { "epoch": 0.1292156609381057, "learning_rate": 9.932125234329548e-07, "loss": 5.4292, "step": 3000 }, { "epoch": 0.13007709867769307, "learning_rate": 9.93164041457476e-07, "loss": 5.614, "step": 3020 }, { "epoch": 0.13093853641728045, "learning_rate": 9.93115559481997e-07, "loss": 5.5546, "step": 3040 }, { "epoch": 0.1317999741568678, "learning_rate": 9.930670775065183e-07, "loss": 5.5784, "step": 3060 }, { "epoch": 0.1326614118964552, "learning_rate": 9.930185955310391e-07, "loss": 4.989, "step": 3080 }, { "epoch": 0.13352284963604255, "learning_rate": 9.929701135555604e-07, "loss": 5.5975, "step": 3100 }, { "epoch": 0.13438428737562993, "learning_rate": 9.929216315800813e-07, "loss": 5.2873, "step": 3120 }, { "epoch": 0.1352457251152173, "learning_rate": 9.928731496046025e-07, "loss": 5.4276, "step": 3140 }, { "epoch": 0.13610716285480468, "learning_rate": 9.928246676291236e-07, "loss": 5.4971, "step": 3160 }, { "epoch": 0.13696860059439203, "learning_rate": 9.927761856536447e-07, "loss": 5.6698, "step": 3180 }, { "epoch": 0.13783003833397942, "learning_rate": 9.927277036781658e-07, "loss": 5.7259, "step": 3200 }, { "epoch": 0.13869147607356677, "learning_rate": 9.926792217026868e-07, "loss": 5.5365, "step": 3220 }, { "epoch": 0.13955291381315416, "learning_rate": 9.926307397272081e-07, "loss": 5.5269, "step": 3240 }, { "epoch": 0.1404143515527415, "learning_rate": 9.925822577517292e-07, "loss": 5.4957, "step": 3260 }, { "epoch": 0.1412757892923289, "learning_rate": 9.925337757762503e-07, "loss": 5.15, "step": 3280 }, { "epoch": 0.14213722703191628, "learning_rate": 9.924852938007715e-07, "loss": 5.3415, "step": 3300 }, { "epoch": 0.14299866477150364, "learning_rate": 9.924368118252924e-07, "loss": 5.446, "step": 3320 }, { "epoch": 0.14386010251109102, "learning_rate": 9.923883298498137e-07, "loss": 5.4782, "step": 3340 }, { "epoch": 0.14472154025067838, "learning_rate": 9.923398478743345e-07, "loss": 5.7166, "step": 3360 }, { "epoch": 0.14558297799026576, "learning_rate": 9.922913658988558e-07, "loss": 5.2477, "step": 3380 }, { "epoch": 0.14644441572985312, "learning_rate": 9.92242883923377e-07, "loss": 5.5064, "step": 3400 }, { "epoch": 0.1473058534694405, "learning_rate": 9.92194401947898e-07, "loss": 5.1118, "step": 3420 }, { "epoch": 0.14816729120902786, "learning_rate": 9.921459199724193e-07, "loss": 5.6227, "step": 3440 }, { "epoch": 0.14902872894861524, "learning_rate": 9.920974379969401e-07, "loss": 5.2942, "step": 3460 }, { "epoch": 0.1498901666882026, "learning_rate": 9.920489560214614e-07, "loss": 5.5577, "step": 3480 }, { "epoch": 0.15075160442778998, "learning_rate": 9.920004740459823e-07, "loss": 5.2978, "step": 3500 }, { "epoch": 0.15161304216737737, "learning_rate": 9.919519920705035e-07, "loss": 5.4462, "step": 3520 }, { "epoch": 0.15247447990696472, "learning_rate": 9.919035100950246e-07, "loss": 5.322, "step": 3540 }, { "epoch": 0.1533359176465521, "learning_rate": 9.918550281195457e-07, "loss": 5.1661, "step": 3560 }, { "epoch": 0.15419735538613946, "learning_rate": 9.918065461440668e-07, "loss": 5.4463, "step": 3580 }, { "epoch": 0.15505879312572685, "learning_rate": 9.917580641685878e-07, "loss": 5.2187, "step": 3600 }, { "epoch": 0.1559202308653142, "learning_rate": 9.917095821931091e-07, "loss": 5.4907, "step": 3620 }, { "epoch": 0.1567816686049016, "learning_rate": 9.916611002176302e-07, "loss": 5.5288, "step": 3640 }, { "epoch": 0.15764310634448894, "learning_rate": 9.916126182421513e-07, "loss": 5.4536, "step": 3660 }, { "epoch": 0.15850454408407633, "learning_rate": 9.915641362666725e-07, "loss": 5.481, "step": 3680 }, { "epoch": 0.15936598182366368, "learning_rate": 9.915156542911934e-07, "loss": 5.3777, "step": 3700 }, { "epoch": 0.16022741956325107, "learning_rate": 9.914671723157147e-07, "loss": 5.61, "step": 3720 }, { "epoch": 0.16108885730283845, "learning_rate": 9.914186903402355e-07, "loss": 5.4264, "step": 3740 }, { "epoch": 0.1619502950424258, "learning_rate": 9.913702083647568e-07, "loss": 5.4003, "step": 3760 }, { "epoch": 0.1628117327820132, "learning_rate": 9.913217263892779e-07, "loss": 5.2343, "step": 3780 }, { "epoch": 0.16367317052160055, "learning_rate": 9.91273244413799e-07, "loss": 5.3609, "step": 3800 }, { "epoch": 0.16453460826118793, "learning_rate": 9.912247624383202e-07, "loss": 5.4076, "step": 3820 }, { "epoch": 0.1653960460007753, "learning_rate": 9.911762804628411e-07, "loss": 5.2988, "step": 3840 }, { "epoch": 0.16625748374036267, "learning_rate": 9.911277984873624e-07, "loss": 5.334, "step": 3860 }, { "epoch": 0.16711892147995003, "learning_rate": 9.910793165118835e-07, "loss": 5.5399, "step": 3880 }, { "epoch": 0.1679803592195374, "learning_rate": 9.910308345364045e-07, "loss": 5.1626, "step": 3900 }, { "epoch": 0.16884179695912477, "learning_rate": 9.909823525609258e-07, "loss": 5.3731, "step": 3920 }, { "epoch": 0.16970323469871215, "learning_rate": 9.909338705854467e-07, "loss": 5.2582, "step": 3940 }, { "epoch": 0.1705646724382995, "learning_rate": 9.90885388609968e-07, "loss": 5.3624, "step": 3960 }, { "epoch": 0.1714261101778869, "learning_rate": 9.908369066344888e-07, "loss": 5.2776, "step": 3980 }, { "epoch": 0.17228754791747428, "learning_rate": 9.9078842465901e-07, "loss": 5.4069, "step": 4000 }, { "epoch": 0.17314898565706163, "learning_rate": 9.907399426835312e-07, "loss": 5.336, "step": 4020 }, { "epoch": 0.17401042339664902, "learning_rate": 9.906914607080522e-07, "loss": 5.1774, "step": 4040 }, { "epoch": 0.17487186113623637, "learning_rate": 9.906429787325735e-07, "loss": 5.2318, "step": 4060 }, { "epoch": 0.17573329887582376, "learning_rate": 9.905944967570944e-07, "loss": 5.2908, "step": 4080 }, { "epoch": 0.1765947366154111, "learning_rate": 9.905460147816157e-07, "loss": 5.3843, "step": 4100 }, { "epoch": 0.1774561743549985, "learning_rate": 9.904975328061365e-07, "loss": 5.3274, "step": 4120 }, { "epoch": 0.17831761209458585, "learning_rate": 9.904490508306578e-07, "loss": 5.2516, "step": 4140 }, { "epoch": 0.17917904983417324, "learning_rate": 9.904005688551789e-07, "loss": 5.4997, "step": 4160 }, { "epoch": 0.1800404875737606, "learning_rate": 9.903520868797e-07, "loss": 5.2678, "step": 4180 }, { "epoch": 0.18090192531334798, "learning_rate": 9.903036049042212e-07, "loss": 5.5379, "step": 4200 }, { "epoch": 0.18176336305293536, "learning_rate": 9.90255122928742e-07, "loss": 5.2914, "step": 4220 }, { "epoch": 0.18262480079252272, "learning_rate": 9.902066409532634e-07, "loss": 5.2059, "step": 4240 }, { "epoch": 0.1834862385321101, "learning_rate": 9.901581589777845e-07, "loss": 5.1896, "step": 4260 }, { "epoch": 0.18434767627169746, "learning_rate": 9.901096770023055e-07, "loss": 5.256, "step": 4280 }, { "epoch": 0.18520911401128484, "learning_rate": 9.900611950268268e-07, "loss": 5.3677, "step": 4300 }, { "epoch": 0.1860705517508722, "learning_rate": 9.900127130513477e-07, "loss": 5.1496, "step": 4320 }, { "epoch": 0.18693198949045958, "learning_rate": 9.89964231075869e-07, "loss": 5.2171, "step": 4340 }, { "epoch": 0.18779342723004694, "learning_rate": 9.899157491003898e-07, "loss": 5.1794, "step": 4360 }, { "epoch": 0.18865486496963432, "learning_rate": 9.89867267124911e-07, "loss": 5.149, "step": 4380 }, { "epoch": 0.18951630270922168, "learning_rate": 9.898187851494322e-07, "loss": 5.3623, "step": 4400 }, { "epoch": 0.19037774044880906, "learning_rate": 9.897703031739532e-07, "loss": 5.12, "step": 4420 }, { "epoch": 0.19123917818839645, "learning_rate": 9.897218211984745e-07, "loss": 5.1459, "step": 4440 }, { "epoch": 0.1921006159279838, "learning_rate": 9.896733392229954e-07, "loss": 5.1605, "step": 4460 }, { "epoch": 0.1929620536675712, "learning_rate": 9.896248572475167e-07, "loss": 5.3126, "step": 4480 }, { "epoch": 0.19382349140715854, "learning_rate": 9.895763752720377e-07, "loss": 5.3019, "step": 4500 }, { "epoch": 0.19468492914674593, "learning_rate": 9.895278932965588e-07, "loss": 5.165, "step": 4520 }, { "epoch": 0.19554636688633328, "learning_rate": 9.894794113210799e-07, "loss": 5.2627, "step": 4540 }, { "epoch": 0.19640780462592067, "learning_rate": 9.89430929345601e-07, "loss": 5.2599, "step": 4560 }, { "epoch": 0.19726924236550802, "learning_rate": 9.893824473701222e-07, "loss": 5.1008, "step": 4580 }, { "epoch": 0.1981306801050954, "learning_rate": 9.89333965394643e-07, "loss": 5.1498, "step": 4600 }, { "epoch": 0.19899211784468276, "learning_rate": 9.892854834191644e-07, "loss": 5.177, "step": 4620 }, { "epoch": 0.19985355558427015, "learning_rate": 9.892370014436854e-07, "loss": 5.4382, "step": 4640 }, { "epoch": 0.2007149933238575, "learning_rate": 9.891885194682065e-07, "loss": 5.2083, "step": 4660 }, { "epoch": 0.2015764310634449, "learning_rate": 9.891400374927278e-07, "loss": 5.1972, "step": 4680 }, { "epoch": 0.20243786880303227, "learning_rate": 9.890915555172487e-07, "loss": 5.2555, "step": 4700 }, { "epoch": 0.20329930654261963, "learning_rate": 9.8904307354177e-07, "loss": 5.3427, "step": 4720 }, { "epoch": 0.204160744282207, "learning_rate": 9.889945915662908e-07, "loss": 5.1514, "step": 4740 }, { "epoch": 0.20502218202179437, "learning_rate": 9.88946109590812e-07, "loss": 5.4223, "step": 4760 }, { "epoch": 0.20588361976138175, "learning_rate": 9.888976276153332e-07, "loss": 5.2675, "step": 4780 }, { "epoch": 0.2067450575009691, "learning_rate": 9.888491456398542e-07, "loss": 5.2755, "step": 4800 }, { "epoch": 0.2076064952405565, "learning_rate": 9.888006636643755e-07, "loss": 5.0806, "step": 4820 }, { "epoch": 0.20846793298014385, "learning_rate": 9.887521816888966e-07, "loss": 4.9737, "step": 4840 }, { "epoch": 0.20932937071973123, "learning_rate": 9.887036997134177e-07, "loss": 5.186, "step": 4860 }, { "epoch": 0.2101908084593186, "learning_rate": 9.886552177379387e-07, "loss": 5.2746, "step": 4880 }, { "epoch": 0.21105224619890597, "learning_rate": 9.886067357624598e-07, "loss": 5.359, "step": 4900 }, { "epoch": 0.21191368393849336, "learning_rate": 9.88558253786981e-07, "loss": 5.1738, "step": 4920 }, { "epoch": 0.21277512167808071, "learning_rate": 9.88509771811502e-07, "loss": 5.1438, "step": 4940 }, { "epoch": 0.2136365594176681, "learning_rate": 9.884612898360232e-07, "loss": 4.9935, "step": 4960 }, { "epoch": 0.21449799715725545, "learning_rate": 9.88412807860544e-07, "loss": 5.2627, "step": 4980 }, { "epoch": 0.21535943489684284, "learning_rate": 9.883643258850654e-07, "loss": 4.8987, "step": 5000 }, { "epoch": 0.2162208726364302, "learning_rate": 9.883158439095864e-07, "loss": 5.1943, "step": 5020 }, { "epoch": 0.21708231037601758, "learning_rate": 9.882673619341075e-07, "loss": 5.1257, "step": 5040 }, { "epoch": 0.21794374811560493, "learning_rate": 9.882188799586288e-07, "loss": 5.2447, "step": 5060 }, { "epoch": 0.21880518585519232, "learning_rate": 9.881703979831497e-07, "loss": 5.0602, "step": 5080 }, { "epoch": 0.21966662359477968, "learning_rate": 9.88121916007671e-07, "loss": 5.1273, "step": 5100 }, { "epoch": 0.22052806133436706, "learning_rate": 9.880734340321918e-07, "loss": 5.2593, "step": 5120 }, { "epoch": 0.22138949907395444, "learning_rate": 9.88024952056713e-07, "loss": 5.2149, "step": 5140 }, { "epoch": 0.2222509368135418, "learning_rate": 9.879764700812342e-07, "loss": 5.1093, "step": 5160 }, { "epoch": 0.22311237455312918, "learning_rate": 9.879279881057552e-07, "loss": 5.1914, "step": 5180 }, { "epoch": 0.22397381229271654, "learning_rate": 9.878795061302765e-07, "loss": 5.1855, "step": 5200 }, { "epoch": 0.22483525003230392, "learning_rate": 9.878310241547976e-07, "loss": 5.1667, "step": 5220 }, { "epoch": 0.22569668777189128, "learning_rate": 9.877825421793187e-07, "loss": 5.1006, "step": 5240 }, { "epoch": 0.22655812551147866, "learning_rate": 9.877340602038397e-07, "loss": 5.4394, "step": 5260 }, { "epoch": 0.22741956325106602, "learning_rate": 9.876855782283608e-07, "loss": 5.2247, "step": 5280 }, { "epoch": 0.2282810009906534, "learning_rate": 9.87637096252882e-07, "loss": 5.2253, "step": 5300 }, { "epoch": 0.22914243873024076, "learning_rate": 9.87588614277403e-07, "loss": 5.0591, "step": 5320 }, { "epoch": 0.23000387646982814, "learning_rate": 9.875401323019242e-07, "loss": 5.368, "step": 5340 }, { "epoch": 0.2308653142094155, "learning_rate": 9.87491650326445e-07, "loss": 5.1635, "step": 5360 }, { "epoch": 0.23172675194900288, "learning_rate": 9.874431683509664e-07, "loss": 5.0713, "step": 5380 }, { "epoch": 0.23258818968859027, "learning_rate": 9.873946863754874e-07, "loss": 5.1414, "step": 5400 }, { "epoch": 0.23344962742817763, "learning_rate": 9.873462044000085e-07, "loss": 5.0667, "step": 5420 }, { "epoch": 0.234311065167765, "learning_rate": 9.872977224245298e-07, "loss": 4.9844, "step": 5440 }, { "epoch": 0.23517250290735237, "learning_rate": 9.872492404490509e-07, "loss": 5.0536, "step": 5460 }, { "epoch": 0.23603394064693975, "learning_rate": 9.87200758473572e-07, "loss": 5.0188, "step": 5480 }, { "epoch": 0.2368953783865271, "learning_rate": 9.87152276498093e-07, "loss": 4.9918, "step": 5500 }, { "epoch": 0.2377568161261145, "learning_rate": 9.87103794522614e-07, "loss": 5.1782, "step": 5520 }, { "epoch": 0.23861825386570185, "learning_rate": 9.870553125471354e-07, "loss": 5.0001, "step": 5540 }, { "epoch": 0.23947969160528923, "learning_rate": 9.870068305716562e-07, "loss": 4.9272, "step": 5560 }, { "epoch": 0.2403411293448766, "learning_rate": 9.869583485961775e-07, "loss": 5.3893, "step": 5580 }, { "epoch": 0.24120256708446397, "learning_rate": 9.869098666206986e-07, "loss": 5.3314, "step": 5600 }, { "epoch": 0.24206400482405135, "learning_rate": 9.868613846452196e-07, "loss": 5.4649, "step": 5620 }, { "epoch": 0.2429254425636387, "learning_rate": 9.868129026697407e-07, "loss": 5.2219, "step": 5640 }, { "epoch": 0.2437868803032261, "learning_rate": 9.867644206942618e-07, "loss": 5.3536, "step": 5660 }, { "epoch": 0.24464831804281345, "learning_rate": 9.86715938718783e-07, "loss": 5.0812, "step": 5680 }, { "epoch": 0.24550975578240083, "learning_rate": 9.86667456743304e-07, "loss": 5.0065, "step": 5700 }, { "epoch": 0.2463711935219882, "learning_rate": 9.866189747678252e-07, "loss": 4.8052, "step": 5720 }, { "epoch": 0.24723263126157558, "learning_rate": 9.86570492792346e-07, "loss": 5.1683, "step": 5740 }, { "epoch": 0.24809406900116293, "learning_rate": 9.865220108168674e-07, "loss": 5.0243, "step": 5760 }, { "epoch": 0.24895550674075032, "learning_rate": 9.864735288413884e-07, "loss": 5.1228, "step": 5780 }, { "epoch": 0.24981694448033767, "learning_rate": 9.864250468659095e-07, "loss": 5.0573, "step": 5800 }, { "epoch": 0.25067838221992506, "learning_rate": 9.863765648904308e-07, "loss": 5.2251, "step": 5820 }, { "epoch": 0.2515398199595124, "learning_rate": 9.863280829149519e-07, "loss": 5.1655, "step": 5840 }, { "epoch": 0.2524012576990998, "learning_rate": 9.86279600939473e-07, "loss": 5.0808, "step": 5860 }, { "epoch": 0.2532626954386872, "learning_rate": 9.86231118963994e-07, "loss": 5.0284, "step": 5880 }, { "epoch": 0.25412413317827454, "learning_rate": 9.86182636988515e-07, "loss": 5.1345, "step": 5900 }, { "epoch": 0.2549855709178619, "learning_rate": 9.861341550130364e-07, "loss": 4.7051, "step": 5920 }, { "epoch": 0.2558470086574493, "learning_rate": 9.860856730375572e-07, "loss": 4.9449, "step": 5940 }, { "epoch": 0.25670844639703666, "learning_rate": 9.860371910620785e-07, "loss": 5.1681, "step": 5960 }, { "epoch": 0.257569884136624, "learning_rate": 9.859887090865996e-07, "loss": 4.9312, "step": 5980 }, { "epoch": 0.2584313218762114, "learning_rate": 9.859402271111206e-07, "loss": 5.3096, "step": 6000 }, { "epoch": 0.2592927596157988, "learning_rate": 9.858917451356417e-07, "loss": 4.9742, "step": 6020 }, { "epoch": 0.26015419735538614, "learning_rate": 9.858432631601628e-07, "loss": 5.1821, "step": 6040 }, { "epoch": 0.2610156350949735, "learning_rate": 9.85794781184684e-07, "loss": 5.1821, "step": 6060 }, { "epoch": 0.2618770728345609, "learning_rate": 9.857462992092051e-07, "loss": 5.083, "step": 6080 }, { "epoch": 0.26273851057414827, "learning_rate": 9.856978172337262e-07, "loss": 5.1987, "step": 6100 }, { "epoch": 0.2635999483137356, "learning_rate": 9.85649335258247e-07, "loss": 5.0137, "step": 6120 }, { "epoch": 0.264461386053323, "learning_rate": 9.856008532827684e-07, "loss": 4.8114, "step": 6140 }, { "epoch": 0.2653228237929104, "learning_rate": 9.855523713072894e-07, "loss": 5.1205, "step": 6160 }, { "epoch": 0.26618426153249775, "learning_rate": 9.855038893318105e-07, "loss": 4.9281, "step": 6180 }, { "epoch": 0.2670456992720851, "learning_rate": 9.854554073563318e-07, "loss": 5.0873, "step": 6200 }, { "epoch": 0.26790713701167246, "learning_rate": 9.854069253808529e-07, "loss": 5.0967, "step": 6220 }, { "epoch": 0.26876857475125987, "learning_rate": 9.85358443405374e-07, "loss": 5.2425, "step": 6240 }, { "epoch": 0.2696300124908472, "learning_rate": 9.85309961429895e-07, "loss": 5.1937, "step": 6260 }, { "epoch": 0.2704914502304346, "learning_rate": 9.85261479454416e-07, "loss": 4.8241, "step": 6280 }, { "epoch": 0.271352887970022, "learning_rate": 9.852129974789373e-07, "loss": 5.0539, "step": 6300 }, { "epoch": 0.27221432570960935, "learning_rate": 9.851645155034582e-07, "loss": 5.1938, "step": 6320 }, { "epoch": 0.2730757634491967, "learning_rate": 9.851160335279795e-07, "loss": 5.0296, "step": 6340 }, { "epoch": 0.27393720118878406, "learning_rate": 9.850675515525006e-07, "loss": 4.9956, "step": 6360 }, { "epoch": 0.2747986389283715, "learning_rate": 9.850190695770216e-07, "loss": 5.1587, "step": 6380 }, { "epoch": 0.27566007666795883, "learning_rate": 9.849705876015427e-07, "loss": 5.0578, "step": 6400 }, { "epoch": 0.2765215144075462, "learning_rate": 9.849221056260638e-07, "loss": 4.6687, "step": 6420 }, { "epoch": 0.27738295214713354, "learning_rate": 9.84873623650585e-07, "loss": 5.0046, "step": 6440 }, { "epoch": 0.27824438988672096, "learning_rate": 9.848251416751061e-07, "loss": 4.9388, "step": 6460 }, { "epoch": 0.2791058276263083, "learning_rate": 9.847766596996272e-07, "loss": 5.1524, "step": 6480 }, { "epoch": 0.27996726536589567, "learning_rate": 9.847281777241483e-07, "loss": 5.0729, "step": 6500 }, { "epoch": 0.280828703105483, "learning_rate": 9.846796957486693e-07, "loss": 5.1614, "step": 6520 }, { "epoch": 0.28169014084507044, "learning_rate": 9.846312137731906e-07, "loss": 5.0912, "step": 6540 }, { "epoch": 0.2825515785846578, "learning_rate": 9.845827317977115e-07, "loss": 4.8363, "step": 6560 }, { "epoch": 0.28341301632424515, "learning_rate": 9.845342498222328e-07, "loss": 5.1497, "step": 6580 }, { "epoch": 0.28427445406383256, "learning_rate": 9.844857678467538e-07, "loss": 5.1407, "step": 6600 }, { "epoch": 0.2851358918034199, "learning_rate": 9.84437285871275e-07, "loss": 5.1137, "step": 6620 }, { "epoch": 0.2859973295430073, "learning_rate": 9.84388803895796e-07, "loss": 4.9547, "step": 6640 }, { "epoch": 0.28685876728259463, "learning_rate": 9.84340321920317e-07, "loss": 5.0116, "step": 6660 }, { "epoch": 0.28772020502218204, "learning_rate": 9.842918399448383e-07, "loss": 5.0545, "step": 6680 }, { "epoch": 0.2885816427617694, "learning_rate": 9.842433579693592e-07, "loss": 4.8564, "step": 6700 }, { "epoch": 0.28944308050135675, "learning_rate": 9.841948759938805e-07, "loss": 4.7905, "step": 6720 }, { "epoch": 0.2903045182409441, "learning_rate": 9.841463940184016e-07, "loss": 5.0053, "step": 6740 }, { "epoch": 0.2911659559805315, "learning_rate": 9.840979120429226e-07, "loss": 4.9103, "step": 6760 }, { "epoch": 0.2920273937201189, "learning_rate": 9.840494300674437e-07, "loss": 4.9392, "step": 6780 }, { "epoch": 0.29288883145970623, "learning_rate": 9.840009480919648e-07, "loss": 4.9946, "step": 6800 }, { "epoch": 0.29375026919929365, "learning_rate": 9.83952466116486e-07, "loss": 5.123, "step": 6820 }, { "epoch": 0.294611706938881, "learning_rate": 9.839039841410071e-07, "loss": 5.0541, "step": 6840 }, { "epoch": 0.29547314467846836, "learning_rate": 9.838555021655282e-07, "loss": 5.2848, "step": 6860 }, { "epoch": 0.2963345824180557, "learning_rate": 9.838070201900493e-07, "loss": 5.1143, "step": 6880 }, { "epoch": 0.2971960201576431, "learning_rate": 9.837585382145703e-07, "loss": 5.0813, "step": 6900 }, { "epoch": 0.2980574578972305, "learning_rate": 9.837100562390916e-07, "loss": 4.902, "step": 6920 }, { "epoch": 0.29891889563681784, "learning_rate": 9.836615742636125e-07, "loss": 5.4337, "step": 6940 }, { "epoch": 0.2997803333764052, "learning_rate": 9.836130922881338e-07, "loss": 4.8747, "step": 6960 }, { "epoch": 0.3006417711159926, "learning_rate": 9.835646103126548e-07, "loss": 4.8659, "step": 6980 }, { "epoch": 0.30150320885557996, "learning_rate": 9.83516128337176e-07, "loss": 4.9519, "step": 7000 }, { "epoch": 0.3023646465951673, "learning_rate": 9.83467646361697e-07, "loss": 4.9942, "step": 7020 }, { "epoch": 0.30322608433475473, "learning_rate": 9.83419164386218e-07, "loss": 5.0863, "step": 7040 }, { "epoch": 0.3040875220743421, "learning_rate": 9.833706824107393e-07, "loss": 4.7905, "step": 7060 }, { "epoch": 0.30494895981392944, "learning_rate": 9.833222004352604e-07, "loss": 4.9519, "step": 7080 }, { "epoch": 0.3058103975535168, "learning_rate": 9.832737184597815e-07, "loss": 5.0442, "step": 7100 }, { "epoch": 0.3066718352931042, "learning_rate": 9.832252364843028e-07, "loss": 4.9433, "step": 7120 }, { "epoch": 0.30753327303269157, "learning_rate": 9.831767545088236e-07, "loss": 4.9803, "step": 7140 }, { "epoch": 0.3083947107722789, "learning_rate": 9.83128272533345e-07, "loss": 4.9704, "step": 7160 }, { "epoch": 0.3092561485118663, "learning_rate": 9.830797905578658e-07, "loss": 4.922, "step": 7180 }, { "epoch": 0.3101175862514537, "learning_rate": 9.83031308582387e-07, "loss": 5.1661, "step": 7200 }, { "epoch": 0.31097902399104105, "learning_rate": 9.829828266069081e-07, "loss": 4.9994, "step": 7220 }, { "epoch": 0.3118404617306284, "learning_rate": 9.829343446314292e-07, "loss": 5.1306, "step": 7240 }, { "epoch": 0.3127018994702158, "learning_rate": 9.828858626559505e-07, "loss": 5.1203, "step": 7260 }, { "epoch": 0.3135633372098032, "learning_rate": 9.828373806804713e-07, "loss": 4.9512, "step": 7280 }, { "epoch": 0.31442477494939053, "learning_rate": 9.827888987049926e-07, "loss": 4.9913, "step": 7300 }, { "epoch": 0.3152862126889779, "learning_rate": 9.827404167295135e-07, "loss": 4.8699, "step": 7320 }, { "epoch": 0.3161476504285653, "learning_rate": 9.826919347540348e-07, "loss": 5.0095, "step": 7340 }, { "epoch": 0.31700908816815265, "learning_rate": 9.826434527785558e-07, "loss": 4.8065, "step": 7360 }, { "epoch": 0.31787052590774, "learning_rate": 9.82594970803077e-07, "loss": 5.2293, "step": 7380 }, { "epoch": 0.31873196364732737, "learning_rate": 9.82546488827598e-07, "loss": 5.0196, "step": 7400 }, { "epoch": 0.3195934013869148, "learning_rate": 9.82498006852119e-07, "loss": 4.9105, "step": 7420 }, { "epoch": 0.32045483912650213, "learning_rate": 9.824495248766403e-07, "loss": 4.8088, "step": 7440 }, { "epoch": 0.3213162768660895, "learning_rate": 9.824010429011614e-07, "loss": 4.8067, "step": 7460 }, { "epoch": 0.3221777146056769, "learning_rate": 9.823525609256825e-07, "loss": 4.775, "step": 7480 }, { "epoch": 0.32303915234526426, "learning_rate": 9.823040789502038e-07, "loss": 4.8729, "step": 7500 }, { "epoch": 0.3239005900848516, "learning_rate": 9.822555969747246e-07, "loss": 4.8439, "step": 7520 }, { "epoch": 0.32476202782443897, "learning_rate": 9.82207114999246e-07, "loss": 4.936, "step": 7540 }, { "epoch": 0.3256234655640264, "learning_rate": 9.821586330237668e-07, "loss": 4.6856, "step": 7560 }, { "epoch": 0.32648490330361374, "learning_rate": 9.82110151048288e-07, "loss": 5.3348, "step": 7580 }, { "epoch": 0.3273463410432011, "learning_rate": 9.820616690728091e-07, "loss": 5.0314, "step": 7600 }, { "epoch": 0.32820777878278845, "learning_rate": 9.820131870973302e-07, "loss": 4.9376, "step": 7620 }, { "epoch": 0.32906921652237586, "learning_rate": 9.819647051218515e-07, "loss": 4.9632, "step": 7640 }, { "epoch": 0.3299306542619632, "learning_rate": 9.819162231463723e-07, "loss": 4.8058, "step": 7660 }, { "epoch": 0.3307920920015506, "learning_rate": 9.818677411708936e-07, "loss": 5.0257, "step": 7680 }, { "epoch": 0.331653529741138, "learning_rate": 9.818192591954147e-07, "loss": 4.8374, "step": 7700 }, { "epoch": 0.33251496748072534, "learning_rate": 9.817707772199358e-07, "loss": 5.0226, "step": 7720 }, { "epoch": 0.3333764052203127, "learning_rate": 9.817222952444568e-07, "loss": 5.1709, "step": 7740 }, { "epoch": 0.33423784295990006, "learning_rate": 9.81673813268978e-07, "loss": 5.0315, "step": 7760 }, { "epoch": 0.33509928069948747, "learning_rate": 9.81625331293499e-07, "loss": 4.8475, "step": 7780 }, { "epoch": 0.3359607184390748, "learning_rate": 9.8157684931802e-07, "loss": 4.7601, "step": 7800 }, { "epoch": 0.3368221561786622, "learning_rate": 9.815283673425413e-07, "loss": 4.9091, "step": 7820 }, { "epoch": 0.33768359391824954, "learning_rate": 9.814798853670624e-07, "loss": 4.7602, "step": 7840 }, { "epoch": 0.33854503165783695, "learning_rate": 9.814314033915835e-07, "loss": 4.9096, "step": 7860 }, { "epoch": 0.3394064693974243, "learning_rate": 9.813829214161047e-07, "loss": 5.0443, "step": 7880 }, { "epoch": 0.34026790713701166, "learning_rate": 9.813344394406256e-07, "loss": 4.9772, "step": 7900 }, { "epoch": 0.341129344876599, "learning_rate": 9.812859574651469e-07, "loss": 4.8295, "step": 7920 }, { "epoch": 0.34199078261618643, "learning_rate": 9.812374754896678e-07, "loss": 5.0334, "step": 7940 }, { "epoch": 0.3428522203557738, "learning_rate": 9.81188993514189e-07, "loss": 4.8409, "step": 7960 }, { "epoch": 0.34371365809536114, "learning_rate": 9.811405115387101e-07, "loss": 4.7826, "step": 7980 }, { "epoch": 0.34457509583494855, "learning_rate": 9.810920295632312e-07, "loss": 4.8348, "step": 8000 }, { "epoch": 0.3454365335745359, "learning_rate": 9.810435475877525e-07, "loss": 4.8643, "step": 8020 }, { "epoch": 0.34629797131412327, "learning_rate": 9.809950656122733e-07, "loss": 5.0973, "step": 8040 }, { "epoch": 0.3471594090537106, "learning_rate": 9.809465836367946e-07, "loss": 4.9742, "step": 8060 }, { "epoch": 0.34802084679329803, "learning_rate": 9.808981016613157e-07, "loss": 4.8906, "step": 8080 }, { "epoch": 0.3488822845328854, "learning_rate": 9.808496196858367e-07, "loss": 4.6369, "step": 8100 }, { "epoch": 0.34974372227247275, "learning_rate": 9.80801137710358e-07, "loss": 4.9755, "step": 8120 }, { "epoch": 0.3506051600120601, "learning_rate": 9.807526557348789e-07, "loss": 4.7116, "step": 8140 }, { "epoch": 0.3514665977516475, "learning_rate": 9.807041737594002e-07, "loss": 4.7687, "step": 8160 }, { "epoch": 0.35232803549123487, "learning_rate": 9.80655691783921e-07, "loss": 4.9314, "step": 8180 }, { "epoch": 0.3531894732308222, "learning_rate": 9.806072098084423e-07, "loss": 5.0838, "step": 8200 }, { "epoch": 0.35405091097040964, "learning_rate": 9.805587278329634e-07, "loss": 5.0102, "step": 8220 }, { "epoch": 0.354912348709997, "learning_rate": 9.805102458574845e-07, "loss": 5.0401, "step": 8240 }, { "epoch": 0.35577378644958435, "learning_rate": 9.804617638820057e-07, "loss": 4.9059, "step": 8260 }, { "epoch": 0.3566352241891717, "learning_rate": 9.804132819065266e-07, "loss": 4.6473, "step": 8280 }, { "epoch": 0.3574966619287591, "learning_rate": 9.803647999310479e-07, "loss": 4.7138, "step": 8300 }, { "epoch": 0.3583580996683465, "learning_rate": 9.803163179555687e-07, "loss": 4.8867, "step": 8320 }, { "epoch": 0.35921953740793383, "learning_rate": 9.8026783598009e-07, "loss": 4.8516, "step": 8340 }, { "epoch": 0.3600809751475212, "learning_rate": 9.80219354004611e-07, "loss": 4.5803, "step": 8360 }, { "epoch": 0.3609424128871086, "learning_rate": 9.801708720291322e-07, "loss": 4.9113, "step": 8380 }, { "epoch": 0.36180385062669596, "learning_rate": 9.801223900536535e-07, "loss": 4.7101, "step": 8400 }, { "epoch": 0.3626652883662833, "learning_rate": 9.800739080781743e-07, "loss": 4.673, "step": 8420 }, { "epoch": 0.3635267261058707, "learning_rate": 9.800254261026956e-07, "loss": 4.6649, "step": 8440 }, { "epoch": 0.3643881638454581, "learning_rate": 9.799769441272167e-07, "loss": 4.8451, "step": 8460 }, { "epoch": 0.36524960158504544, "learning_rate": 9.799284621517377e-07, "loss": 4.9897, "step": 8480 }, { "epoch": 0.3661110393246328, "learning_rate": 9.79879980176259e-07, "loss": 4.9975, "step": 8500 }, { "epoch": 0.3669724770642202, "learning_rate": 9.798314982007799e-07, "loss": 4.9695, "step": 8520 }, { "epoch": 0.36783391480380756, "learning_rate": 9.797830162253012e-07, "loss": 4.7713, "step": 8540 }, { "epoch": 0.3686953525433949, "learning_rate": 9.79734534249822e-07, "loss": 4.8941, "step": 8560 }, { "epoch": 0.3695567902829823, "learning_rate": 9.796860522743433e-07, "loss": 4.6747, "step": 8580 }, { "epoch": 0.3704182280225697, "learning_rate": 9.796375702988644e-07, "loss": 4.7685, "step": 8600 }, { "epoch": 0.37127966576215704, "learning_rate": 9.795890883233855e-07, "loss": 4.9399, "step": 8620 }, { "epoch": 0.3721411035017444, "learning_rate": 9.795406063479067e-07, "loss": 4.9424, "step": 8640 }, { "epoch": 0.3730025412413318, "learning_rate": 9.794921243724276e-07, "loss": 4.8493, "step": 8660 }, { "epoch": 0.37386397898091916, "learning_rate": 9.794436423969489e-07, "loss": 4.974, "step": 8680 }, { "epoch": 0.3747254167205065, "learning_rate": 9.7939516042147e-07, "loss": 4.6983, "step": 8700 }, { "epoch": 0.3755868544600939, "learning_rate": 9.79346678445991e-07, "loss": 4.7911, "step": 8720 }, { "epoch": 0.3764482921996813, "learning_rate": 9.792981964705123e-07, "loss": 4.7347, "step": 8740 }, { "epoch": 0.37730972993926865, "learning_rate": 9.792497144950332e-07, "loss": 4.8341, "step": 8760 }, { "epoch": 0.378171167678856, "learning_rate": 9.792012325195544e-07, "loss": 5.0537, "step": 8780 }, { "epoch": 0.37903260541844336, "learning_rate": 9.791527505440753e-07, "loss": 4.8714, "step": 8800 }, { "epoch": 0.37989404315803077, "learning_rate": 9.791042685685966e-07, "loss": 5.0039, "step": 8820 }, { "epoch": 0.3807554808976181, "learning_rate": 9.790557865931177e-07, "loss": 4.9384, "step": 8840 }, { "epoch": 0.3816169186372055, "learning_rate": 9.790073046176387e-07, "loss": 4.9291, "step": 8860 }, { "epoch": 0.3824783563767929, "learning_rate": 9.7895882264216e-07, "loss": 4.7098, "step": 8880 }, { "epoch": 0.38333979411638025, "learning_rate": 9.789103406666809e-07, "loss": 4.8243, "step": 8900 }, { "epoch": 0.3842012318559676, "learning_rate": 9.788618586912022e-07, "loss": 4.7814, "step": 8920 }, { "epoch": 0.38506266959555496, "learning_rate": 9.78813376715723e-07, "loss": 5.0305, "step": 8940 }, { "epoch": 0.3859241073351424, "learning_rate": 9.787648947402443e-07, "loss": 5.0014, "step": 8960 }, { "epoch": 0.38678554507472973, "learning_rate": 9.787164127647654e-07, "loss": 5.014, "step": 8980 }, { "epoch": 0.3876469828143171, "learning_rate": 9.786679307892864e-07, "loss": 4.8992, "step": 9000 }, { "epoch": 0.38850842055390444, "learning_rate": 9.786194488138077e-07, "loss": 4.4712, "step": 9020 }, { "epoch": 0.38936985829349186, "learning_rate": 9.785709668383288e-07, "loss": 4.9685, "step": 9040 }, { "epoch": 0.3902312960330792, "learning_rate": 9.785224848628499e-07, "loss": 4.4798, "step": 9060 }, { "epoch": 0.39109273377266657, "learning_rate": 9.78474002887371e-07, "loss": 5.0365, "step": 9080 }, { "epoch": 0.391954171512254, "learning_rate": 9.78425520911892e-07, "loss": 4.8627, "step": 9100 }, { "epoch": 0.39281560925184134, "learning_rate": 9.783770389364133e-07, "loss": 4.8344, "step": 9120 }, { "epoch": 0.3936770469914287, "learning_rate": 9.783285569609342e-07, "loss": 4.8208, "step": 9140 }, { "epoch": 0.39453848473101605, "learning_rate": 9.782800749854554e-07, "loss": 4.7272, "step": 9160 }, { "epoch": 0.39539992247060346, "learning_rate": 9.782315930099763e-07, "loss": 4.6802, "step": 9180 }, { "epoch": 0.3962613602101908, "learning_rate": 9.781831110344976e-07, "loss": 4.9891, "step": 9200 }, { "epoch": 0.3971227979497782, "learning_rate": 9.781346290590187e-07, "loss": 5.038, "step": 9220 }, { "epoch": 0.39798423568936553, "learning_rate": 9.780861470835397e-07, "loss": 4.8407, "step": 9240 }, { "epoch": 0.39884567342895294, "learning_rate": 9.78037665108061e-07, "loss": 4.9967, "step": 9260 }, { "epoch": 0.3997071111685403, "learning_rate": 9.77989183132582e-07, "loss": 4.871, "step": 9280 }, { "epoch": 0.40056854890812765, "learning_rate": 9.779407011571032e-07, "loss": 4.9019, "step": 9300 }, { "epoch": 0.401429986647715, "learning_rate": 9.778922191816242e-07, "loss": 4.4204, "step": 9320 }, { "epoch": 0.4022914243873024, "learning_rate": 9.778437372061453e-07, "loss": 4.6821, "step": 9340 }, { "epoch": 0.4031528621268898, "learning_rate": 9.777952552306664e-07, "loss": 4.8691, "step": 9360 }, { "epoch": 0.40401429986647713, "learning_rate": 9.777467732551874e-07, "loss": 5.0331, "step": 9380 }, { "epoch": 0.40487573760606455, "learning_rate": 9.776982912797087e-07, "loss": 4.992, "step": 9400 }, { "epoch": 0.4057371753456519, "learning_rate": 9.776498093042298e-07, "loss": 4.8098, "step": 9420 }, { "epoch": 0.40659861308523926, "learning_rate": 9.776013273287509e-07, "loss": 4.9163, "step": 9440 }, { "epoch": 0.4074600508248266, "learning_rate": 9.77552845353272e-07, "loss": 4.9387, "step": 9460 }, { "epoch": 0.408321488564414, "learning_rate": 9.77504363377793e-07, "loss": 4.879, "step": 9480 }, { "epoch": 0.4091829263040014, "learning_rate": 9.774558814023143e-07, "loss": 4.65, "step": 9500 }, { "epoch": 0.41004436404358874, "learning_rate": 9.774073994268352e-07, "loss": 4.5392, "step": 9520 }, { "epoch": 0.4109058017831761, "learning_rate": 9.773589174513564e-07, "loss": 4.9185, "step": 9540 }, { "epoch": 0.4117672395227635, "learning_rate": 9.773104354758773e-07, "loss": 4.8353, "step": 9560 }, { "epoch": 0.41262867726235086, "learning_rate": 9.772619535003986e-07, "loss": 4.9165, "step": 9580 }, { "epoch": 0.4134901150019382, "learning_rate": 9.772134715249196e-07, "loss": 4.9327, "step": 9600 }, { "epoch": 0.41435155274152563, "learning_rate": 9.771649895494407e-07, "loss": 4.4741, "step": 9620 }, { "epoch": 0.415212990481113, "learning_rate": 9.77116507573962e-07, "loss": 5.0317, "step": 9640 }, { "epoch": 0.41607442822070034, "learning_rate": 9.77068025598483e-07, "loss": 5.118, "step": 9660 }, { "epoch": 0.4169358659602877, "learning_rate": 9.770195436230041e-07, "loss": 4.7494, "step": 9680 }, { "epoch": 0.4177973036998751, "learning_rate": 9.769710616475252e-07, "loss": 4.9464, "step": 9700 }, { "epoch": 0.41865874143946247, "learning_rate": 9.769225796720463e-07, "loss": 4.9494, "step": 9720 }, { "epoch": 0.4195201791790498, "learning_rate": 9.768740976965676e-07, "loss": 4.841, "step": 9740 }, { "epoch": 0.4203816169186372, "learning_rate": 9.768256157210884e-07, "loss": 4.6014, "step": 9760 }, { "epoch": 0.4212430546582246, "learning_rate": 9.767771337456097e-07, "loss": 4.9624, "step": 9780 }, { "epoch": 0.42210449239781195, "learning_rate": 9.767286517701308e-07, "loss": 4.9065, "step": 9800 }, { "epoch": 0.4229659301373993, "learning_rate": 9.766801697946519e-07, "loss": 5.0393, "step": 9820 }, { "epoch": 0.4238273678769867, "learning_rate": 9.76631687819173e-07, "loss": 4.761, "step": 9840 }, { "epoch": 0.42468880561657407, "learning_rate": 9.76583205843694e-07, "loss": 4.7322, "step": 9860 }, { "epoch": 0.42555024335616143, "learning_rate": 9.765347238682153e-07, "loss": 4.7106, "step": 9880 }, { "epoch": 0.4264116810957488, "learning_rate": 9.764862418927361e-07, "loss": 4.8356, "step": 9900 }, { "epoch": 0.4272731188353362, "learning_rate": 9.764377599172574e-07, "loss": 4.6066, "step": 9920 }, { "epoch": 0.42813455657492355, "learning_rate": 9.763892779417783e-07, "loss": 4.7757, "step": 9940 }, { "epoch": 0.4289959943145109, "learning_rate": 9.763407959662996e-07, "loss": 4.8173, "step": 9960 }, { "epoch": 0.42985743205409827, "learning_rate": 9.762923139908206e-07, "loss": 4.6468, "step": 9980 }, { "epoch": 0.4307188697936857, "learning_rate": 9.762438320153417e-07, "loss": 4.8561, "step": 10000 }, { "epoch": 0.43158030753327303, "learning_rate": 9.76195350039863e-07, "loss": 4.769, "step": 10020 }, { "epoch": 0.4324417452728604, "learning_rate": 9.76146868064384e-07, "loss": 4.8013, "step": 10040 }, { "epoch": 0.4333031830124478, "learning_rate": 9.760983860889051e-07, "loss": 4.8112, "step": 10060 }, { "epoch": 0.43416462075203516, "learning_rate": 9.760499041134262e-07, "loss": 4.6127, "step": 10080 }, { "epoch": 0.4350260584916225, "learning_rate": 9.760014221379473e-07, "loss": 4.808, "step": 10100 }, { "epoch": 0.43588749623120987, "learning_rate": 9.759529401624686e-07, "loss": 4.7345, "step": 10120 }, { "epoch": 0.4367489339707973, "learning_rate": 9.759044581869894e-07, "loss": 4.6163, "step": 10140 }, { "epoch": 0.43761037171038464, "learning_rate": 9.758559762115107e-07, "loss": 4.785, "step": 10160 }, { "epoch": 0.438471809449972, "learning_rate": 9.758074942360318e-07, "loss": 5.0965, "step": 10180 }, { "epoch": 0.43933324718955935, "learning_rate": 9.757590122605529e-07, "loss": 4.7546, "step": 10200 }, { "epoch": 0.44019468492914676, "learning_rate": 9.75710530285074e-07, "loss": 4.7411, "step": 10220 }, { "epoch": 0.4410561226687341, "learning_rate": 9.75662048309595e-07, "loss": 4.7157, "step": 10240 }, { "epoch": 0.4419175604083215, "learning_rate": 9.756135663341163e-07, "loss": 4.7451, "step": 10260 }, { "epoch": 0.4427789981479089, "learning_rate": 9.755650843586374e-07, "loss": 4.9128, "step": 10280 }, { "epoch": 0.44364043588749624, "learning_rate": 9.755166023831584e-07, "loss": 4.9461, "step": 10300 }, { "epoch": 0.4445018736270836, "learning_rate": 9.754681204076795e-07, "loss": 4.902, "step": 10320 }, { "epoch": 0.44536331136667096, "learning_rate": 9.754196384322006e-07, "loss": 4.9179, "step": 10340 }, { "epoch": 0.44622474910625837, "learning_rate": 9.753711564567218e-07, "loss": 4.7129, "step": 10360 }, { "epoch": 0.4470861868458457, "learning_rate": 9.753226744812427e-07, "loss": 4.8344, "step": 10380 }, { "epoch": 0.4479476245854331, "learning_rate": 9.75274192505764e-07, "loss": 4.8156, "step": 10400 }, { "epoch": 0.44880906232502044, "learning_rate": 9.75225710530285e-07, "loss": 4.8929, "step": 10420 }, { "epoch": 0.44967050006460785, "learning_rate": 9.751772285548061e-07, "loss": 4.7655, "step": 10440 }, { "epoch": 0.4505319378041952, "learning_rate": 9.751287465793272e-07, "loss": 4.8392, "step": 10460 }, { "epoch": 0.45139337554378256, "learning_rate": 9.750802646038483e-07, "loss": 4.6094, "step": 10480 }, { "epoch": 0.45225481328336997, "learning_rate": 9.750317826283696e-07, "loss": 4.7745, "step": 10500 }, { "epoch": 0.45311625102295733, "learning_rate": 9.749833006528904e-07, "loss": 4.7868, "step": 10520 }, { "epoch": 0.4539776887625447, "learning_rate": 9.749348186774117e-07, "loss": 4.6695, "step": 10540 }, { "epoch": 0.45483912650213204, "learning_rate": 9.748863367019328e-07, "loss": 4.9459, "step": 10560 }, { "epoch": 0.45570056424171945, "learning_rate": 9.748378547264538e-07, "loss": 4.856, "step": 10580 }, { "epoch": 0.4565620019813068, "learning_rate": 9.74789372750975e-07, "loss": 4.6656, "step": 10600 }, { "epoch": 0.45742343972089416, "learning_rate": 9.74740890775496e-07, "loss": 4.7138, "step": 10620 }, { "epoch": 0.4582848774604815, "learning_rate": 9.746924088000173e-07, "loss": 4.7696, "step": 10640 }, { "epoch": 0.45914631520006893, "learning_rate": 9.746439268245383e-07, "loss": 4.7906, "step": 10660 }, { "epoch": 0.4600077529396563, "learning_rate": 9.745954448490594e-07, "loss": 4.7629, "step": 10680 }, { "epoch": 0.46086919067924365, "learning_rate": 9.745469628735805e-07, "loss": 4.9862, "step": 10700 }, { "epoch": 0.461730628418831, "learning_rate": 9.744984808981016e-07, "loss": 4.9942, "step": 10720 }, { "epoch": 0.4625920661584184, "learning_rate": 9.744499989226228e-07, "loss": 5.1121, "step": 10740 }, { "epoch": 0.46345350389800577, "learning_rate": 9.744015169471437e-07, "loss": 4.7407, "step": 10760 }, { "epoch": 0.4643149416375931, "learning_rate": 9.74353034971665e-07, "loss": 4.5919, "step": 10780 }, { "epoch": 0.46517637937718054, "learning_rate": 9.74304552996186e-07, "loss": 4.8487, "step": 10800 }, { "epoch": 0.4660378171167679, "learning_rate": 9.742560710207071e-07, "loss": 4.4517, "step": 10820 }, { "epoch": 0.46689925485635525, "learning_rate": 9.742075890452282e-07, "loss": 4.8589, "step": 10840 }, { "epoch": 0.4677606925959426, "learning_rate": 9.741591070697493e-07, "loss": 4.6649, "step": 10860 }, { "epoch": 0.46862213033553, "learning_rate": 9.741106250942706e-07, "loss": 4.7476, "step": 10880 }, { "epoch": 0.4694835680751174, "learning_rate": 9.740621431187916e-07, "loss": 5.019, "step": 10900 }, { "epoch": 0.47034500581470473, "learning_rate": 9.740136611433127e-07, "loss": 4.7025, "step": 10920 }, { "epoch": 0.4712064435542921, "learning_rate": 9.73965179167834e-07, "loss": 4.8888, "step": 10940 }, { "epoch": 0.4720678812938795, "learning_rate": 9.739166971923548e-07, "loss": 4.9776, "step": 10960 }, { "epoch": 0.47292931903346686, "learning_rate": 9.73868215216876e-07, "loss": 4.5894, "step": 10980 }, { "epoch": 0.4737907567730542, "learning_rate": 9.73819733241397e-07, "loss": 4.925, "step": 11000 }, { "epoch": 0.4746521945126416, "learning_rate": 9.737712512659183e-07, "loss": 4.7209, "step": 11020 }, { "epoch": 0.475513632252229, "learning_rate": 9.737227692904393e-07, "loss": 4.6565, "step": 11040 }, { "epoch": 0.47637506999181634, "learning_rate": 9.736742873149604e-07, "loss": 4.6889, "step": 11060 }, { "epoch": 0.4772365077314037, "learning_rate": 9.736258053394815e-07, "loss": 4.462, "step": 11080 }, { "epoch": 0.4780979454709911, "learning_rate": 9.735773233640026e-07, "loss": 4.6663, "step": 11100 }, { "epoch": 0.47895938321057846, "learning_rate": 9.735288413885238e-07, "loss": 4.8906, "step": 11120 }, { "epoch": 0.4798208209501658, "learning_rate": 9.734803594130447e-07, "loss": 5.0451, "step": 11140 }, { "epoch": 0.4806822586897532, "learning_rate": 9.73431877437566e-07, "loss": 4.7728, "step": 11160 }, { "epoch": 0.4815436964293406, "learning_rate": 9.73383395462087e-07, "loss": 4.6112, "step": 11180 }, { "epoch": 0.48240513416892794, "learning_rate": 9.733349134866081e-07, "loss": 4.7198, "step": 11200 }, { "epoch": 0.4832665719085153, "learning_rate": 9.732864315111292e-07, "loss": 4.5283, "step": 11220 }, { "epoch": 0.4841280096481027, "learning_rate": 9.732379495356503e-07, "loss": 4.6935, "step": 11240 }, { "epoch": 0.48498944738769006, "learning_rate": 9.731894675601715e-07, "loss": 4.8756, "step": 11260 }, { "epoch": 0.4858508851272774, "learning_rate": 9.731409855846926e-07, "loss": 4.7816, "step": 11280 }, { "epoch": 0.4867123228668648, "learning_rate": 9.730925036092137e-07, "loss": 4.6481, "step": 11300 }, { "epoch": 0.4875737606064522, "learning_rate": 9.73044021633735e-07, "loss": 4.7033, "step": 11320 }, { "epoch": 0.48843519834603955, "learning_rate": 9.729955396582558e-07, "loss": 4.6986, "step": 11340 }, { "epoch": 0.4892966360856269, "learning_rate": 9.729470576827771e-07, "loss": 4.7492, "step": 11360 }, { "epoch": 0.49015807382521426, "learning_rate": 9.72898575707298e-07, "loss": 4.7513, "step": 11380 }, { "epoch": 0.49101951156480167, "learning_rate": 9.728500937318193e-07, "loss": 4.7428, "step": 11400 }, { "epoch": 0.491880949304389, "learning_rate": 9.728016117563403e-07, "loss": 4.6812, "step": 11420 }, { "epoch": 0.4927423870439764, "learning_rate": 9.727531297808614e-07, "loss": 4.9698, "step": 11440 }, { "epoch": 0.4936038247835638, "learning_rate": 9.727046478053825e-07, "loss": 4.6955, "step": 11460 }, { "epoch": 0.49446526252315115, "learning_rate": 9.726561658299035e-07, "loss": 4.7071, "step": 11480 }, { "epoch": 0.4953267002627385, "learning_rate": 9.726076838544248e-07, "loss": 4.477, "step": 11500 }, { "epoch": 0.49618813800232586, "learning_rate": 9.725592018789457e-07, "loss": 4.5629, "step": 11520 }, { "epoch": 0.4970495757419133, "learning_rate": 9.72510719903467e-07, "loss": 4.2942, "step": 11540 }, { "epoch": 0.49791101348150063, "learning_rate": 9.72462237927988e-07, "loss": 4.763, "step": 11560 }, { "epoch": 0.498772451221088, "learning_rate": 9.724137559525091e-07, "loss": 4.7874, "step": 11580 }, { "epoch": 0.49963388896067534, "learning_rate": 9.723652739770302e-07, "loss": 4.8124, "step": 11600 }, { "epoch": 0.5004953267002628, "learning_rate": 9.723167920015513e-07, "loss": 4.9469, "step": 11620 }, { "epoch": 0.5013567644398501, "learning_rate": 9.722683100260725e-07, "loss": 4.8652, "step": 11640 }, { "epoch": 0.5022182021794375, "learning_rate": 9.722198280505936e-07, "loss": 4.7594, "step": 11660 }, { "epoch": 0.5030796399190248, "learning_rate": 9.721713460751147e-07, "loss": 4.4601, "step": 11680 }, { "epoch": 0.5039410776586122, "learning_rate": 9.72122864099636e-07, "loss": 4.7384, "step": 11700 }, { "epoch": 0.5048025153981996, "learning_rate": 9.720743821241568e-07, "loss": 4.7338, "step": 11720 }, { "epoch": 0.505663953137787, "learning_rate": 9.720259001486781e-07, "loss": 4.7798, "step": 11740 }, { "epoch": 0.5065253908773744, "learning_rate": 9.71977418173199e-07, "loss": 4.988, "step": 11760 }, { "epoch": 0.5073868286169617, "learning_rate": 9.719289361977203e-07, "loss": 4.7253, "step": 11780 }, { "epoch": 0.5082482663565491, "learning_rate": 9.718804542222413e-07, "loss": 4.8133, "step": 11800 }, { "epoch": 0.5091097040961364, "learning_rate": 9.718319722467624e-07, "loss": 4.8508, "step": 11820 }, { "epoch": 0.5099711418357238, "learning_rate": 9.717834902712837e-07, "loss": 4.6992, "step": 11840 }, { "epoch": 0.5108325795753111, "learning_rate": 9.717350082958045e-07, "loss": 4.7687, "step": 11860 }, { "epoch": 0.5116940173148986, "learning_rate": 9.716865263203258e-07, "loss": 4.5679, "step": 11880 }, { "epoch": 0.512555455054486, "learning_rate": 9.71638044344847e-07, "loss": 4.7933, "step": 11900 }, { "epoch": 0.5134168927940733, "learning_rate": 9.71589562369368e-07, "loss": 4.6264, "step": 11920 }, { "epoch": 0.5142783305336607, "learning_rate": 9.715410803938892e-07, "loss": 4.6911, "step": 11940 }, { "epoch": 0.515139768273248, "learning_rate": 9.714925984184101e-07, "loss": 4.6685, "step": 11960 }, { "epoch": 0.5160012060128354, "learning_rate": 9.714441164429314e-07, "loss": 4.938, "step": 11980 }, { "epoch": 0.5168626437524227, "learning_rate": 9.713956344674523e-07, "loss": 4.6868, "step": 12000 }, { "epoch": 0.5177240814920102, "learning_rate": 9.713471524919735e-07, "loss": 4.7499, "step": 12020 }, { "epoch": 0.5185855192315976, "learning_rate": 9.712986705164946e-07, "loss": 4.5444, "step": 12040 }, { "epoch": 0.5194469569711849, "learning_rate": 9.712501885410157e-07, "loss": 4.7128, "step": 12060 }, { "epoch": 0.5203083947107723, "learning_rate": 9.71201706565537e-07, "loss": 4.6333, "step": 12080 }, { "epoch": 0.5211698324503596, "learning_rate": 9.711532245900578e-07, "loss": 4.7689, "step": 12100 }, { "epoch": 0.522031270189947, "learning_rate": 9.71104742614579e-07, "loss": 4.417, "step": 12120 }, { "epoch": 0.5228927079295344, "learning_rate": 9.710562606391e-07, "loss": 4.6453, "step": 12140 }, { "epoch": 0.5237541456691218, "learning_rate": 9.710077786636212e-07, "loss": 4.7497, "step": 12160 }, { "epoch": 0.5246155834087092, "learning_rate": 9.709592966881423e-07, "loss": 4.6708, "step": 12180 }, { "epoch": 0.5254770211482965, "learning_rate": 9.709108147126634e-07, "loss": 4.6749, "step": 12200 }, { "epoch": 0.5263384588878839, "learning_rate": 9.708623327371847e-07, "loss": 4.5748, "step": 12220 }, { "epoch": 0.5271998966274712, "learning_rate": 9.708138507617055e-07, "loss": 4.4467, "step": 12240 }, { "epoch": 0.5280613343670586, "learning_rate": 9.707653687862268e-07, "loss": 4.5542, "step": 12260 }, { "epoch": 0.528922772106646, "learning_rate": 9.707168868107479e-07, "loss": 4.7008, "step": 12280 }, { "epoch": 0.5297842098462333, "learning_rate": 9.70668404835269e-07, "loss": 4.4736, "step": 12300 }, { "epoch": 0.5306456475858208, "learning_rate": 9.706199228597902e-07, "loss": 4.5476, "step": 12320 }, { "epoch": 0.5315070853254081, "learning_rate": 9.70571440884311e-07, "loss": 4.8016, "step": 12340 }, { "epoch": 0.5323685230649955, "learning_rate": 9.705229589088324e-07, "loss": 4.8178, "step": 12360 }, { "epoch": 0.5332299608045828, "learning_rate": 9.704744769333532e-07, "loss": 4.6261, "step": 12380 }, { "epoch": 0.5340913985441702, "learning_rate": 9.704259949578745e-07, "loss": 4.5225, "step": 12400 }, { "epoch": 0.5349528362837576, "learning_rate": 9.703775129823956e-07, "loss": 4.5948, "step": 12420 }, { "epoch": 0.5358142740233449, "learning_rate": 9.703290310069167e-07, "loss": 4.6267, "step": 12440 }, { "epoch": 0.5366757117629324, "learning_rate": 9.70280549031438e-07, "loss": 4.8665, "step": 12460 }, { "epoch": 0.5375371495025197, "learning_rate": 9.702320670559588e-07, "loss": 4.6497, "step": 12480 }, { "epoch": 0.5383985872421071, "learning_rate": 9.7018358508048e-07, "loss": 4.6235, "step": 12500 }, { "epoch": 0.5392600249816945, "learning_rate": 9.701351031050012e-07, "loss": 4.5948, "step": 12520 }, { "epoch": 0.5401214627212818, "learning_rate": 9.700866211295222e-07, "loss": 4.586, "step": 12540 }, { "epoch": 0.5409829004608692, "learning_rate": 9.700381391540435e-07, "loss": 4.5366, "step": 12560 }, { "epoch": 0.5418443382004565, "learning_rate": 9.699896571785644e-07, "loss": 4.4859, "step": 12580 }, { "epoch": 0.542705775940044, "learning_rate": 9.699411752030857e-07, "loss": 4.5417, "step": 12600 }, { "epoch": 0.5435672136796313, "learning_rate": 9.698926932276065e-07, "loss": 4.6418, "step": 12620 }, { "epoch": 0.5444286514192187, "learning_rate": 9.698442112521278e-07, "loss": 4.6774, "step": 12640 }, { "epoch": 0.5452900891588061, "learning_rate": 9.697957292766489e-07, "loss": 4.4528, "step": 12660 }, { "epoch": 0.5461515268983934, "learning_rate": 9.6974724730117e-07, "loss": 4.8846, "step": 12680 }, { "epoch": 0.5470129646379808, "learning_rate": 9.696987653256912e-07, "loss": 4.6795, "step": 12700 }, { "epoch": 0.5478744023775681, "learning_rate": 9.69650283350212e-07, "loss": 4.6705, "step": 12720 }, { "epoch": 0.5487358401171555, "learning_rate": 9.696018013747334e-07, "loss": 4.7057, "step": 12740 }, { "epoch": 0.549597277856743, "learning_rate": 9.695533193992542e-07, "loss": 4.8444, "step": 12760 }, { "epoch": 0.5504587155963303, "learning_rate": 9.695048374237755e-07, "loss": 4.598, "step": 12780 }, { "epoch": 0.5513201533359177, "learning_rate": 9.694563554482966e-07, "loss": 4.622, "step": 12800 }, { "epoch": 0.552181591075505, "learning_rate": 9.694078734728177e-07, "loss": 4.971, "step": 12820 }, { "epoch": 0.5530430288150924, "learning_rate": 9.69359391497339e-07, "loss": 4.5947, "step": 12840 }, { "epoch": 0.5539044665546797, "learning_rate": 9.693109095218598e-07, "loss": 4.2249, "step": 12860 }, { "epoch": 0.5547659042942671, "learning_rate": 9.69262427546381e-07, "loss": 4.5096, "step": 12880 }, { "epoch": 0.5556273420338546, "learning_rate": 9.692139455709022e-07, "loss": 4.6678, "step": 12900 }, { "epoch": 0.5564887797734419, "learning_rate": 9.691654635954232e-07, "loss": 4.6959, "step": 12920 }, { "epoch": 0.5573502175130293, "learning_rate": 9.691169816199445e-07, "loss": 4.5744, "step": 12940 }, { "epoch": 0.5582116552526166, "learning_rate": 9.690684996444654e-07, "loss": 4.8221, "step": 12960 }, { "epoch": 0.559073092992204, "learning_rate": 9.690200176689867e-07, "loss": 4.6241, "step": 12980 }, { "epoch": 0.5599345307317913, "learning_rate": 9.689715356935075e-07, "loss": 4.6407, "step": 13000 }, { "epoch": 0.5607959684713787, "learning_rate": 9.689230537180288e-07, "loss": 4.5464, "step": 13020 }, { "epoch": 0.561657406210966, "learning_rate": 9.688745717425499e-07, "loss": 4.6636, "step": 13040 }, { "epoch": 0.5625188439505535, "learning_rate": 9.68826089767071e-07, "loss": 4.5332, "step": 13060 }, { "epoch": 0.5633802816901409, "learning_rate": 9.687776077915922e-07, "loss": 4.5259, "step": 13080 }, { "epoch": 0.5642417194297282, "learning_rate": 9.687291258161133e-07, "loss": 4.7069, "step": 13100 }, { "epoch": 0.5651031571693156, "learning_rate": 9.686806438406344e-07, "loss": 4.5002, "step": 13120 }, { "epoch": 0.5659645949089029, "learning_rate": 9.686321618651552e-07, "loss": 4.802, "step": 13140 }, { "epoch": 0.5668260326484903, "learning_rate": 9.685836798896765e-07, "loss": 4.4615, "step": 13160 }, { "epoch": 0.5676874703880777, "learning_rate": 9.685351979141976e-07, "loss": 4.6674, "step": 13180 }, { "epoch": 0.5685489081276651, "learning_rate": 9.684867159387187e-07, "loss": 4.5308, "step": 13200 }, { "epoch": 0.5694103458672525, "learning_rate": 9.6843823396324e-07, "loss": 4.6764, "step": 13220 }, { "epoch": 0.5702717836068398, "learning_rate": 9.683897519877608e-07, "loss": 4.7747, "step": 13240 }, { "epoch": 0.5711332213464272, "learning_rate": 9.68341270012282e-07, "loss": 5.0853, "step": 13260 }, { "epoch": 0.5719946590860145, "learning_rate": 9.682927880368032e-07, "loss": 4.6953, "step": 13280 }, { "epoch": 0.5728560968256019, "learning_rate": 9.682443060613242e-07, "loss": 4.7413, "step": 13300 }, { "epoch": 0.5737175345651893, "learning_rate": 9.681958240858455e-07, "loss": 4.5876, "step": 13320 }, { "epoch": 0.5745789723047767, "learning_rate": 9.681473421103664e-07, "loss": 4.7571, "step": 13340 }, { "epoch": 0.5754404100443641, "learning_rate": 9.680988601348877e-07, "loss": 4.837, "step": 13360 }, { "epoch": 0.5763018477839514, "learning_rate": 9.680503781594085e-07, "loss": 4.5773, "step": 13380 }, { "epoch": 0.5771632855235388, "learning_rate": 9.680018961839298e-07, "loss": 4.7744, "step": 13400 }, { "epoch": 0.5780247232631262, "learning_rate": 9.679534142084509e-07, "loss": 4.7419, "step": 13420 }, { "epoch": 0.5788861610027135, "learning_rate": 9.67904932232972e-07, "loss": 4.8362, "step": 13440 }, { "epoch": 0.5797475987423009, "learning_rate": 9.678564502574932e-07, "loss": 4.7081, "step": 13460 }, { "epoch": 0.5806090364818882, "learning_rate": 9.678079682820143e-07, "loss": 4.7993, "step": 13480 }, { "epoch": 0.5814704742214757, "learning_rate": 9.677594863065354e-07, "loss": 4.495, "step": 13500 }, { "epoch": 0.582331911961063, "learning_rate": 9.677110043310564e-07, "loss": 4.5018, "step": 13520 }, { "epoch": 0.5831933497006504, "learning_rate": 9.676625223555775e-07, "loss": 4.7964, "step": 13540 }, { "epoch": 0.5840547874402378, "learning_rate": 9.676140403800988e-07, "loss": 4.538, "step": 13560 }, { "epoch": 0.5849162251798251, "learning_rate": 9.675655584046197e-07, "loss": 4.6586, "step": 13580 }, { "epoch": 0.5857776629194125, "learning_rate": 9.67517076429141e-07, "loss": 4.4211, "step": 13600 }, { "epoch": 0.5866391006589998, "learning_rate": 9.67468594453662e-07, "loss": 4.7492, "step": 13620 }, { "epoch": 0.5875005383985873, "learning_rate": 9.67420112478183e-07, "loss": 4.5692, "step": 13640 }, { "epoch": 0.5883619761381746, "learning_rate": 9.673716305027042e-07, "loss": 4.4928, "step": 13660 }, { "epoch": 0.589223413877762, "learning_rate": 9.673231485272252e-07, "loss": 4.5794, "step": 13680 }, { "epoch": 0.5900848516173494, "learning_rate": 9.672746665517465e-07, "loss": 4.658, "step": 13700 }, { "epoch": 0.5909462893569367, "learning_rate": 9.672261845762674e-07, "loss": 4.4166, "step": 13720 }, { "epoch": 0.5918077270965241, "learning_rate": 9.671777026007886e-07, "loss": 4.7738, "step": 13740 }, { "epoch": 0.5926691648361114, "learning_rate": 9.671292206253095e-07, "loss": 4.6694, "step": 13760 }, { "epoch": 0.5935306025756989, "learning_rate": 9.670807386498308e-07, "loss": 4.796, "step": 13780 }, { "epoch": 0.5943920403152863, "learning_rate": 9.670322566743519e-07, "loss": 4.506, "step": 13800 }, { "epoch": 0.5952534780548736, "learning_rate": 9.66983774698873e-07, "loss": 4.4972, "step": 13820 }, { "epoch": 0.596114915794461, "learning_rate": 9.669352927233942e-07, "loss": 4.7098, "step": 13840 }, { "epoch": 0.5969763535340483, "learning_rate": 9.668868107479153e-07, "loss": 4.3856, "step": 13860 }, { "epoch": 0.5978377912736357, "learning_rate": 9.668383287724364e-07, "loss": 4.6158, "step": 13880 }, { "epoch": 0.598699229013223, "learning_rate": 9.667898467969574e-07, "loss": 4.5539, "step": 13900 }, { "epoch": 0.5995606667528104, "learning_rate": 9.667413648214785e-07, "loss": 4.6079, "step": 13920 }, { "epoch": 0.6004221044923979, "learning_rate": 9.666928828459998e-07, "loss": 4.7394, "step": 13940 }, { "epoch": 0.6012835422319852, "learning_rate": 9.666444008705206e-07, "loss": 4.6476, "step": 13960 }, { "epoch": 0.6021449799715726, "learning_rate": 9.66595918895042e-07, "loss": 4.5925, "step": 13980 }, { "epoch": 0.6030064177111599, "learning_rate": 9.66547436919563e-07, "loss": 4.4916, "step": 14000 }, { "epoch": 0.6038678554507473, "learning_rate": 9.66498954944084e-07, "loss": 4.584, "step": 14020 }, { "epoch": 0.6047292931903346, "learning_rate": 9.664504729686051e-07, "loss": 4.926, "step": 14040 }, { "epoch": 0.605590730929922, "learning_rate": 9.664019909931262e-07, "loss": 4.7048, "step": 14060 }, { "epoch": 0.6064521686695095, "learning_rate": 9.663535090176475e-07, "loss": 4.8114, "step": 14080 }, { "epoch": 0.6073136064090968, "learning_rate": 9.663050270421686e-07, "loss": 4.6444, "step": 14100 }, { "epoch": 0.6081750441486842, "learning_rate": 9.662565450666896e-07, "loss": 4.7975, "step": 14120 }, { "epoch": 0.6090364818882715, "learning_rate": 9.662080630912107e-07, "loss": 4.7108, "step": 14140 }, { "epoch": 0.6098979196278589, "learning_rate": 9.661595811157318e-07, "loss": 4.5902, "step": 14160 }, { "epoch": 0.6107593573674462, "learning_rate": 9.66111099140253e-07, "loss": 4.4984, "step": 14180 }, { "epoch": 0.6116207951070336, "learning_rate": 9.66062617164774e-07, "loss": 4.8832, "step": 14200 }, { "epoch": 0.612482232846621, "learning_rate": 9.660141351892952e-07, "loss": 4.7217, "step": 14220 }, { "epoch": 0.6133436705862084, "learning_rate": 9.659656532138163e-07, "loss": 4.6702, "step": 14240 }, { "epoch": 0.6142051083257958, "learning_rate": 9.659171712383374e-07, "loss": 4.6737, "step": 14260 }, { "epoch": 0.6150665460653831, "learning_rate": 9.658686892628584e-07, "loss": 4.5198, "step": 14280 }, { "epoch": 0.6159279838049705, "learning_rate": 9.658202072873795e-07, "loss": 4.619, "step": 14300 }, { "epoch": 0.6167894215445578, "learning_rate": 9.657717253119008e-07, "loss": 4.4772, "step": 14320 }, { "epoch": 0.6176508592841452, "learning_rate": 9.657232433364216e-07, "loss": 4.6145, "step": 14340 }, { "epoch": 0.6185122970237326, "learning_rate": 9.65674761360943e-07, "loss": 4.9091, "step": 14360 }, { "epoch": 0.61937373476332, "learning_rate": 9.65626279385464e-07, "loss": 4.5172, "step": 14380 }, { "epoch": 0.6202351725029074, "learning_rate": 9.65577797409985e-07, "loss": 4.6175, "step": 14400 }, { "epoch": 0.6210966102424947, "learning_rate": 9.655293154345061e-07, "loss": 4.7035, "step": 14420 }, { "epoch": 0.6219580479820821, "learning_rate": 9.654808334590272e-07, "loss": 4.8991, "step": 14440 }, { "epoch": 0.6228194857216695, "learning_rate": 9.654323514835485e-07, "loss": 4.5864, "step": 14460 }, { "epoch": 0.6236809234612568, "learning_rate": 9.653838695080696e-07, "loss": 4.9811, "step": 14480 }, { "epoch": 0.6245423612008442, "learning_rate": 9.653353875325906e-07, "loss": 4.5354, "step": 14500 }, { "epoch": 0.6254037989404316, "learning_rate": 9.652869055571117e-07, "loss": 4.8074, "step": 14520 }, { "epoch": 0.626265236680019, "learning_rate": 9.652384235816328e-07, "loss": 4.7193, "step": 14540 }, { "epoch": 0.6271266744196063, "learning_rate": 9.65189941606154e-07, "loss": 4.7722, "step": 14560 }, { "epoch": 0.6279881121591937, "learning_rate": 9.65141459630675e-07, "loss": 4.6782, "step": 14580 }, { "epoch": 0.6288495498987811, "learning_rate": 9.650929776551962e-07, "loss": 4.5165, "step": 14600 }, { "epoch": 0.6297109876383684, "learning_rate": 9.650444956797173e-07, "loss": 4.682, "step": 14620 }, { "epoch": 0.6305724253779558, "learning_rate": 9.649960137042383e-07, "loss": 4.7696, "step": 14640 }, { "epoch": 0.6314338631175431, "learning_rate": 9.649475317287594e-07, "loss": 4.44, "step": 14660 }, { "epoch": 0.6322953008571306, "learning_rate": 9.648990497532805e-07, "loss": 4.9536, "step": 14680 }, { "epoch": 0.633156738596718, "learning_rate": 9.648505677778018e-07, "loss": 4.4343, "step": 14700 }, { "epoch": 0.6340181763363053, "learning_rate": 9.648020858023228e-07, "loss": 4.7066, "step": 14720 }, { "epoch": 0.6348796140758927, "learning_rate": 9.64753603826844e-07, "loss": 4.737, "step": 14740 }, { "epoch": 0.63574105181548, "learning_rate": 9.64705121851365e-07, "loss": 4.5779, "step": 14760 }, { "epoch": 0.6366024895550674, "learning_rate": 9.64656639875886e-07, "loss": 4.6775, "step": 14780 }, { "epoch": 0.6374639272946547, "learning_rate": 9.646081579004071e-07, "loss": 4.6419, "step": 14800 }, { "epoch": 0.6383253650342422, "learning_rate": 9.645596759249282e-07, "loss": 4.793, "step": 14820 }, { "epoch": 0.6391868027738296, "learning_rate": 9.645111939494495e-07, "loss": 4.6022, "step": 14840 }, { "epoch": 0.6400482405134169, "learning_rate": 9.644627119739706e-07, "loss": 4.7609, "step": 14860 }, { "epoch": 0.6409096782530043, "learning_rate": 9.644142299984916e-07, "loss": 4.664, "step": 14880 }, { "epoch": 0.6417711159925916, "learning_rate": 9.643657480230127e-07, "loss": 4.6633, "step": 14900 }, { "epoch": 0.642632553732179, "learning_rate": 9.643172660475338e-07, "loss": 4.7861, "step": 14920 }, { "epoch": 0.6434939914717663, "learning_rate": 9.64268784072055e-07, "loss": 4.6721, "step": 14940 }, { "epoch": 0.6443554292113538, "learning_rate": 9.64220302096576e-07, "loss": 4.6615, "step": 14960 }, { "epoch": 0.6452168669509412, "learning_rate": 9.641718201210972e-07, "loss": 4.668, "step": 14980 }, { "epoch": 0.6460783046905285, "learning_rate": 9.641233381456183e-07, "loss": 4.6814, "step": 15000 }, { "epoch": 0.6469397424301159, "learning_rate": 9.640748561701393e-07, "loss": 4.6951, "step": 15020 }, { "epoch": 0.6478011801697032, "learning_rate": 9.640263741946604e-07, "loss": 4.4504, "step": 15040 }, { "epoch": 0.6486626179092906, "learning_rate": 9.639778922191815e-07, "loss": 4.6223, "step": 15060 }, { "epoch": 0.6495240556488779, "learning_rate": 9.639294102437028e-07, "loss": 4.5445, "step": 15080 }, { "epoch": 0.6503854933884653, "learning_rate": 9.638809282682238e-07, "loss": 4.714, "step": 15100 }, { "epoch": 0.6512469311280528, "learning_rate": 9.63832446292745e-07, "loss": 4.5464, "step": 15120 }, { "epoch": 0.6521083688676401, "learning_rate": 9.637839643172662e-07, "loss": 4.266, "step": 15140 }, { "epoch": 0.6529698066072275, "learning_rate": 9.63735482341787e-07, "loss": 4.7638, "step": 15160 }, { "epoch": 0.6538312443468148, "learning_rate": 9.636870003663083e-07, "loss": 4.6303, "step": 15180 }, { "epoch": 0.6546926820864022, "learning_rate": 9.636385183908292e-07, "loss": 4.7192, "step": 15200 }, { "epoch": 0.6555541198259895, "learning_rate": 9.635900364153505e-07, "loss": 4.4276, "step": 15220 }, { "epoch": 0.6564155575655769, "learning_rate": 9.635415544398716e-07, "loss": 4.6025, "step": 15240 }, { "epoch": 0.6572769953051644, "learning_rate": 9.634930724643926e-07, "loss": 4.6005, "step": 15260 }, { "epoch": 0.6581384330447517, "learning_rate": 9.634445904889137e-07, "loss": 4.6788, "step": 15280 }, { "epoch": 0.6589998707843391, "learning_rate": 9.633961085134348e-07, "loss": 4.5772, "step": 15300 }, { "epoch": 0.6598613085239264, "learning_rate": 9.63347626537956e-07, "loss": 4.7873, "step": 15320 }, { "epoch": 0.6607227462635138, "learning_rate": 9.63299144562477e-07, "loss": 4.7228, "step": 15340 }, { "epoch": 0.6615841840031012, "learning_rate": 9.632506625869982e-07, "loss": 4.5213, "step": 15360 }, { "epoch": 0.6624456217426885, "learning_rate": 9.632021806115193e-07, "loss": 4.7668, "step": 15380 }, { "epoch": 0.663307059482276, "learning_rate": 9.631536986360403e-07, "loss": 4.6066, "step": 15400 }, { "epoch": 0.6641684972218633, "learning_rate": 9.631052166605614e-07, "loss": 4.642, "step": 15420 }, { "epoch": 0.6650299349614507, "learning_rate": 9.630567346850825e-07, "loss": 4.5487, "step": 15440 }, { "epoch": 0.665891372701038, "learning_rate": 9.630082527096038e-07, "loss": 4.5664, "step": 15460 }, { "epoch": 0.6667528104406254, "learning_rate": 9.629597707341248e-07, "loss": 4.5904, "step": 15480 }, { "epoch": 0.6676142481802128, "learning_rate": 9.62911288758646e-07, "loss": 4.5054, "step": 15500 }, { "epoch": 0.6684756859198001, "learning_rate": 9.628628067831672e-07, "loss": 4.6034, "step": 15520 }, { "epoch": 0.6693371236593875, "learning_rate": 9.62814324807688e-07, "loss": 4.6439, "step": 15540 }, { "epoch": 0.6701985613989749, "learning_rate": 9.627658428322093e-07, "loss": 4.3408, "step": 15560 }, { "epoch": 0.6710599991385623, "learning_rate": 9.627173608567302e-07, "loss": 4.8935, "step": 15580 }, { "epoch": 0.6719214368781496, "learning_rate": 9.626688788812515e-07, "loss": 4.569, "step": 15600 }, { "epoch": 0.672782874617737, "learning_rate": 9.626203969057725e-07, "loss": 4.2996, "step": 15620 }, { "epoch": 0.6736443123573244, "learning_rate": 9.625719149302936e-07, "loss": 4.6604, "step": 15640 }, { "epoch": 0.6745057500969117, "learning_rate": 9.625234329548147e-07, "loss": 4.5706, "step": 15660 }, { "epoch": 0.6753671878364991, "learning_rate": 9.624749509793358e-07, "loss": 4.5631, "step": 15680 }, { "epoch": 0.6762286255760865, "learning_rate": 9.62426469003857e-07, "loss": 4.7159, "step": 15700 }, { "epoch": 0.6770900633156739, "learning_rate": 9.623779870283781e-07, "loss": 4.7933, "step": 15720 }, { "epoch": 0.6779515010552613, "learning_rate": 9.623295050528992e-07, "loss": 4.654, "step": 15740 }, { "epoch": 0.6788129387948486, "learning_rate": 9.622810230774205e-07, "loss": 4.4683, "step": 15760 }, { "epoch": 0.679674376534436, "learning_rate": 9.622325411019413e-07, "loss": 4.7185, "step": 15780 }, { "epoch": 0.6805358142740233, "learning_rate": 9.621840591264626e-07, "loss": 4.643, "step": 15800 }, { "epoch": 0.6813972520136107, "learning_rate": 9.621355771509835e-07, "loss": 4.367, "step": 15820 }, { "epoch": 0.682258689753198, "learning_rate": 9.620870951755048e-07, "loss": 4.6144, "step": 15840 }, { "epoch": 0.6831201274927855, "learning_rate": 9.620386132000258e-07, "loss": 4.5154, "step": 15860 }, { "epoch": 0.6839815652323729, "learning_rate": 9.61990131224547e-07, "loss": 4.6272, "step": 15880 }, { "epoch": 0.6848430029719602, "learning_rate": 9.619416492490682e-07, "loss": 4.5658, "step": 15900 }, { "epoch": 0.6857044407115476, "learning_rate": 9.61893167273589e-07, "loss": 4.4771, "step": 15920 }, { "epoch": 0.6865658784511349, "learning_rate": 9.618446852981103e-07, "loss": 4.3724, "step": 15940 }, { "epoch": 0.6874273161907223, "learning_rate": 9.617962033226312e-07, "loss": 4.7635, "step": 15960 }, { "epoch": 0.6882887539303096, "learning_rate": 9.617477213471525e-07, "loss": 4.5284, "step": 15980 }, { "epoch": 0.6891501916698971, "learning_rate": 9.616992393716735e-07, "loss": 4.7441, "step": 16000 }, { "epoch": 0.6900116294094845, "learning_rate": 9.616507573961946e-07, "loss": 4.6897, "step": 16020 }, { "epoch": 0.6908730671490718, "learning_rate": 9.616022754207159e-07, "loss": 4.6016, "step": 16040 }, { "epoch": 0.6917345048886592, "learning_rate": 9.615537934452368e-07, "loss": 4.6313, "step": 16060 }, { "epoch": 0.6925959426282465, "learning_rate": 9.61505311469758e-07, "loss": 4.6305, "step": 16080 }, { "epoch": 0.6934573803678339, "learning_rate": 9.614568294942791e-07, "loss": 4.4422, "step": 16100 }, { "epoch": 0.6943188181074212, "learning_rate": 9.614083475188002e-07, "loss": 4.7132, "step": 16120 }, { "epoch": 0.6951802558470087, "learning_rate": 9.613598655433215e-07, "loss": 4.566, "step": 16140 }, { "epoch": 0.6960416935865961, "learning_rate": 9.613113835678423e-07, "loss": 4.7489, "step": 16160 }, { "epoch": 0.6969031313261834, "learning_rate": 9.612629015923636e-07, "loss": 4.6467, "step": 16180 }, { "epoch": 0.6977645690657708, "learning_rate": 9.612144196168845e-07, "loss": 4.573, "step": 16200 }, { "epoch": 0.6986260068053581, "learning_rate": 9.611659376414057e-07, "loss": 4.778, "step": 16220 }, { "epoch": 0.6994874445449455, "learning_rate": 9.611174556659268e-07, "loss": 4.2616, "step": 16240 }, { "epoch": 0.7003488822845328, "learning_rate": 9.610689736904479e-07, "loss": 4.5403, "step": 16260 }, { "epoch": 0.7012103200241202, "learning_rate": 9.610204917149692e-07, "loss": 4.436, "step": 16280 }, { "epoch": 0.7020717577637077, "learning_rate": 9.6097200973949e-07, "loss": 4.7559, "step": 16300 }, { "epoch": 0.702933195503295, "learning_rate": 9.609235277640113e-07, "loss": 4.5752, "step": 16320 }, { "epoch": 0.7037946332428824, "learning_rate": 9.608750457885324e-07, "loss": 4.7316, "step": 16340 }, { "epoch": 0.7046560709824697, "learning_rate": 9.608265638130535e-07, "loss": 4.6856, "step": 16360 }, { "epoch": 0.7055175087220571, "learning_rate": 9.607780818375745e-07, "loss": 4.4327, "step": 16380 }, { "epoch": 0.7063789464616445, "learning_rate": 9.607295998620956e-07, "loss": 4.5428, "step": 16400 }, { "epoch": 0.7072403842012318, "learning_rate": 9.606811178866169e-07, "loss": 4.4309, "step": 16420 }, { "epoch": 0.7081018219408193, "learning_rate": 9.606326359111377e-07, "loss": 4.8195, "step": 16440 }, { "epoch": 0.7089632596804066, "learning_rate": 9.60584153935659e-07, "loss": 4.7752, "step": 16460 }, { "epoch": 0.709824697419994, "learning_rate": 9.6053567196018e-07, "loss": 4.8154, "step": 16480 }, { "epoch": 0.7106861351595813, "learning_rate": 9.604871899847012e-07, "loss": 4.3601, "step": 16500 }, { "epoch": 0.7115475728991687, "learning_rate": 9.604387080092225e-07, "loss": 4.7314, "step": 16520 }, { "epoch": 0.7124090106387561, "learning_rate": 9.603902260337433e-07, "loss": 4.588, "step": 16540 }, { "epoch": 0.7132704483783434, "learning_rate": 9.603417440582646e-07, "loss": 4.7045, "step": 16560 }, { "epoch": 0.7141318861179309, "learning_rate": 9.602932620827855e-07, "loss": 4.5338, "step": 16580 }, { "epoch": 0.7149933238575182, "learning_rate": 9.602447801073067e-07, "loss": 4.9495, "step": 16600 }, { "epoch": 0.7158547615971056, "learning_rate": 9.601962981318278e-07, "loss": 4.5542, "step": 16620 }, { "epoch": 0.716716199336693, "learning_rate": 9.601478161563489e-07, "loss": 4.4112, "step": 16640 }, { "epoch": 0.7175776370762803, "learning_rate": 9.600993341808702e-07, "loss": 4.5342, "step": 16660 }, { "epoch": 0.7184390748158677, "learning_rate": 9.60050852205391e-07, "loss": 4.4651, "step": 16680 }, { "epoch": 0.719300512555455, "learning_rate": 9.600023702299123e-07, "loss": 4.7432, "step": 16700 }, { "epoch": 0.7201619502950424, "learning_rate": 9.599538882544334e-07, "loss": 4.6841, "step": 16720 }, { "epoch": 0.7210233880346298, "learning_rate": 9.599054062789545e-07, "loss": 4.4546, "step": 16740 }, { "epoch": 0.7218848257742172, "learning_rate": 9.598569243034757e-07, "loss": 4.7137, "step": 16760 }, { "epoch": 0.7227462635138046, "learning_rate": 9.598084423279966e-07, "loss": 4.5197, "step": 16780 }, { "epoch": 0.7236077012533919, "learning_rate": 9.597599603525179e-07, "loss": 4.6357, "step": 16800 }, { "epoch": 0.7244691389929793, "learning_rate": 9.597114783770387e-07, "loss": 4.4496, "step": 16820 }, { "epoch": 0.7253305767325666, "learning_rate": 9.5966299640156e-07, "loss": 4.5952, "step": 16840 }, { "epoch": 0.726192014472154, "learning_rate": 9.59614514426081e-07, "loss": 4.3745, "step": 16860 }, { "epoch": 0.7270534522117414, "learning_rate": 9.595660324506022e-07, "loss": 4.5424, "step": 16880 }, { "epoch": 0.7279148899513288, "learning_rate": 9.595175504751234e-07, "loss": 4.5643, "step": 16900 }, { "epoch": 0.7287763276909162, "learning_rate": 9.594690684996443e-07, "loss": 4.5414, "step": 16920 }, { "epoch": 0.7296377654305035, "learning_rate": 9.594205865241656e-07, "loss": 4.4815, "step": 16940 }, { "epoch": 0.7304992031700909, "learning_rate": 9.593721045486865e-07, "loss": 4.371, "step": 16960 }, { "epoch": 0.7313606409096782, "learning_rate": 9.593236225732077e-07, "loss": 4.6894, "step": 16980 }, { "epoch": 0.7322220786492656, "learning_rate": 9.592751405977288e-07, "loss": 4.603, "step": 17000 }, { "epoch": 0.7330835163888529, "learning_rate": 9.592266586222499e-07, "loss": 4.6009, "step": 17020 }, { "epoch": 0.7339449541284404, "learning_rate": 9.591781766467712e-07, "loss": 4.5632, "step": 17040 }, { "epoch": 0.7348063918680278, "learning_rate": 9.59129694671292e-07, "loss": 4.5738, "step": 17060 }, { "epoch": 0.7356678296076151, "learning_rate": 9.590812126958133e-07, "loss": 4.7349, "step": 17080 }, { "epoch": 0.7365292673472025, "learning_rate": 9.590327307203344e-07, "loss": 4.7286, "step": 17100 }, { "epoch": 0.7373907050867898, "learning_rate": 9.589842487448554e-07, "loss": 4.8498, "step": 17120 }, { "epoch": 0.7382521428263772, "learning_rate": 9.589357667693767e-07, "loss": 4.7121, "step": 17140 }, { "epoch": 0.7391135805659645, "learning_rate": 9.588872847938976e-07, "loss": 4.3988, "step": 17160 }, { "epoch": 0.739975018305552, "learning_rate": 9.588388028184189e-07, "loss": 4.6037, "step": 17180 }, { "epoch": 0.7408364560451394, "learning_rate": 9.587903208429397e-07, "loss": 4.6311, "step": 17200 }, { "epoch": 0.7416978937847267, "learning_rate": 9.58741838867461e-07, "loss": 4.5533, "step": 17220 }, { "epoch": 0.7425593315243141, "learning_rate": 9.58693356891982e-07, "loss": 4.6068, "step": 17240 }, { "epoch": 0.7434207692639014, "learning_rate": 9.586448749165032e-07, "loss": 4.6679, "step": 17260 }, { "epoch": 0.7442822070034888, "learning_rate": 9.585963929410244e-07, "loss": 4.7084, "step": 17280 }, { "epoch": 0.7451436447430762, "learning_rate": 9.585479109655455e-07, "loss": 4.5974, "step": 17300 }, { "epoch": 0.7460050824826636, "learning_rate": 9.584994289900666e-07, "loss": 4.8029, "step": 17320 }, { "epoch": 0.746866520222251, "learning_rate": 9.584509470145877e-07, "loss": 4.6615, "step": 17340 }, { "epoch": 0.7477279579618383, "learning_rate": 9.584024650391087e-07, "loss": 4.6187, "step": 17360 }, { "epoch": 0.7485893957014257, "learning_rate": 9.5835398306363e-07, "loss": 4.6186, "step": 17380 }, { "epoch": 0.749450833441013, "learning_rate": 9.583055010881509e-07, "loss": 4.5701, "step": 17400 }, { "epoch": 0.7503122711806004, "learning_rate": 9.582570191126722e-07, "loss": 4.8236, "step": 17420 }, { "epoch": 0.7511737089201878, "learning_rate": 9.58208537137193e-07, "loss": 4.7821, "step": 17440 }, { "epoch": 0.7520351466597751, "learning_rate": 9.581600551617143e-07, "loss": 4.3822, "step": 17460 }, { "epoch": 0.7528965843993626, "learning_rate": 9.581115731862354e-07, "loss": 4.8352, "step": 17480 }, { "epoch": 0.7537580221389499, "learning_rate": 9.580630912107564e-07, "loss": 4.5794, "step": 17500 }, { "epoch": 0.7546194598785373, "learning_rate": 9.580146092352777e-07, "loss": 4.3318, "step": 17520 }, { "epoch": 0.7554808976181246, "learning_rate": 9.579661272597986e-07, "loss": 4.4593, "step": 17540 }, { "epoch": 0.756342335357712, "learning_rate": 9.579176452843199e-07, "loss": 4.4453, "step": 17560 }, { "epoch": 0.7572037730972994, "learning_rate": 9.578691633088407e-07, "loss": 4.5113, "step": 17580 }, { "epoch": 0.7580652108368867, "learning_rate": 9.57820681333362e-07, "loss": 4.4507, "step": 17600 }, { "epoch": 0.7589266485764742, "learning_rate": 9.57772199357883e-07, "loss": 4.5328, "step": 17620 }, { "epoch": 0.7597880863160615, "learning_rate": 9.577237173824042e-07, "loss": 4.6588, "step": 17640 }, { "epoch": 0.7606495240556489, "learning_rate": 9.576752354069254e-07, "loss": 4.5698, "step": 17660 }, { "epoch": 0.7615109617952363, "learning_rate": 9.576267534314465e-07, "loss": 4.5499, "step": 17680 }, { "epoch": 0.7623723995348236, "learning_rate": 9.575782714559676e-07, "loss": 4.6859, "step": 17700 }, { "epoch": 0.763233837274411, "learning_rate": 9.575297894804887e-07, "loss": 4.3594, "step": 17720 }, { "epoch": 0.7640952750139983, "learning_rate": 9.574813075050097e-07, "loss": 4.3802, "step": 17740 }, { "epoch": 0.7649567127535858, "learning_rate": 9.57432825529531e-07, "loss": 4.5581, "step": 17760 }, { "epoch": 0.7658181504931731, "learning_rate": 9.573843435540519e-07, "loss": 4.6645, "step": 17780 }, { "epoch": 0.7666795882327605, "learning_rate": 9.573358615785731e-07, "loss": 4.4874, "step": 17800 }, { "epoch": 0.7675410259723479, "learning_rate": 9.572873796030942e-07, "loss": 4.401, "step": 17820 }, { "epoch": 0.7684024637119352, "learning_rate": 9.572388976276153e-07, "loss": 4.5584, "step": 17840 }, { "epoch": 0.7692639014515226, "learning_rate": 9.571904156521364e-07, "loss": 4.3602, "step": 17860 }, { "epoch": 0.7701253391911099, "learning_rate": 9.571419336766574e-07, "loss": 4.5251, "step": 17880 }, { "epoch": 0.7709867769306973, "learning_rate": 9.570934517011787e-07, "loss": 4.51, "step": 17900 }, { "epoch": 0.7718482146702847, "learning_rate": 9.570449697256998e-07, "loss": 4.3659, "step": 17920 }, { "epoch": 0.7727096524098721, "learning_rate": 9.569964877502209e-07, "loss": 4.6877, "step": 17940 }, { "epoch": 0.7735710901494595, "learning_rate": 9.56948005774742e-07, "loss": 4.4384, "step": 17960 }, { "epoch": 0.7744325278890468, "learning_rate": 9.56899523799263e-07, "loss": 4.763, "step": 17980 }, { "epoch": 0.7752939656286342, "learning_rate": 9.56851041823784e-07, "loss": 4.7351, "step": 18000 }, { "epoch": 0.7761554033682215, "learning_rate": 9.568025598483051e-07, "loss": 4.539, "step": 18020 }, { "epoch": 0.7770168411078089, "learning_rate": 9.567540778728264e-07, "loss": 4.359, "step": 18040 }, { "epoch": 0.7778782788473964, "learning_rate": 9.567055958973475e-07, "loss": 4.4866, "step": 18060 }, { "epoch": 0.7787397165869837, "learning_rate": 9.566571139218686e-07, "loss": 4.5832, "step": 18080 }, { "epoch": 0.7796011543265711, "learning_rate": 9.566086319463896e-07, "loss": 4.3959, "step": 18100 }, { "epoch": 0.7804625920661584, "learning_rate": 9.565601499709107e-07, "loss": 4.4185, "step": 18120 }, { "epoch": 0.7813240298057458, "learning_rate": 9.56511667995432e-07, "loss": 4.4384, "step": 18140 }, { "epoch": 0.7821854675453331, "learning_rate": 9.564631860199529e-07, "loss": 4.5399, "step": 18160 }, { "epoch": 0.7830469052849205, "learning_rate": 9.564147040444741e-07, "loss": 4.6471, "step": 18180 }, { "epoch": 0.783908343024508, "learning_rate": 9.563662220689952e-07, "loss": 4.72, "step": 18200 }, { "epoch": 0.7847697807640953, "learning_rate": 9.563177400935163e-07, "loss": 4.489, "step": 18220 }, { "epoch": 0.7856312185036827, "learning_rate": 9.562692581180374e-07, "loss": 4.5276, "step": 18240 }, { "epoch": 0.78649265624327, "learning_rate": 9.562207761425584e-07, "loss": 4.5964, "step": 18260 }, { "epoch": 0.7873540939828574, "learning_rate": 9.561722941670797e-07, "loss": 4.5272, "step": 18280 }, { "epoch": 0.7882155317224447, "learning_rate": 9.561238121916008e-07, "loss": 4.5325, "step": 18300 }, { "epoch": 0.7890769694620321, "learning_rate": 9.560753302161219e-07, "loss": 4.6481, "step": 18320 }, { "epoch": 0.7899384072016195, "learning_rate": 9.56026848240643e-07, "loss": 4.6294, "step": 18340 }, { "epoch": 0.7907998449412069, "learning_rate": 9.55978366265164e-07, "loss": 4.5148, "step": 18360 }, { "epoch": 0.7916612826807943, "learning_rate": 9.559298842896853e-07, "loss": 4.4899, "step": 18380 }, { "epoch": 0.7925227204203816, "learning_rate": 9.558814023142061e-07, "loss": 4.5426, "step": 18400 }, { "epoch": 0.793384158159969, "learning_rate": 9.558329203387274e-07, "loss": 4.4513, "step": 18420 }, { "epoch": 0.7942455958995563, "learning_rate": 9.557844383632485e-07, "loss": 4.4633, "step": 18440 }, { "epoch": 0.7951070336391437, "learning_rate": 9.557359563877696e-07, "loss": 4.44, "step": 18460 }, { "epoch": 0.7959684713787311, "learning_rate": 9.556874744122906e-07, "loss": 4.8242, "step": 18480 }, { "epoch": 0.7968299091183185, "learning_rate": 9.556389924368117e-07, "loss": 4.475, "step": 18500 }, { "epoch": 0.7976913468579059, "learning_rate": 9.55590510461333e-07, "loss": 4.5009, "step": 18520 }, { "epoch": 0.7985527845974932, "learning_rate": 9.555420284858539e-07, "loss": 4.4502, "step": 18540 }, { "epoch": 0.7994142223370806, "learning_rate": 9.554935465103751e-07, "loss": 4.7639, "step": 18560 }, { "epoch": 0.800275660076668, "learning_rate": 9.554450645348962e-07, "loss": 4.4771, "step": 18580 }, { "epoch": 0.8011370978162553, "learning_rate": 9.553965825594173e-07, "loss": 4.4977, "step": 18600 }, { "epoch": 0.8019985355558427, "learning_rate": 9.553481005839384e-07, "loss": 4.3852, "step": 18620 }, { "epoch": 0.80285997329543, "learning_rate": 9.552996186084594e-07, "loss": 4.5963, "step": 18640 }, { "epoch": 0.8037214110350175, "learning_rate": 9.552511366329807e-07, "loss": 4.7555, "step": 18660 }, { "epoch": 0.8045828487746048, "learning_rate": 9.552026546575018e-07, "loss": 4.7279, "step": 18680 }, { "epoch": 0.8054442865141922, "learning_rate": 9.551541726820228e-07, "loss": 4.6135, "step": 18700 }, { "epoch": 0.8063057242537796, "learning_rate": 9.55105690706544e-07, "loss": 4.474, "step": 18720 }, { "epoch": 0.8071671619933669, "learning_rate": 9.55057208731065e-07, "loss": 4.5132, "step": 18740 }, { "epoch": 0.8080285997329543, "learning_rate": 9.550087267555863e-07, "loss": 4.3501, "step": 18760 }, { "epoch": 0.8088900374725416, "learning_rate": 9.549602447801071e-07, "loss": 4.4286, "step": 18780 }, { "epoch": 0.8097514752121291, "learning_rate": 9.549117628046284e-07, "loss": 4.5616, "step": 18800 }, { "epoch": 0.8106129129517164, "learning_rate": 9.548632808291495e-07, "loss": 4.4247, "step": 18820 }, { "epoch": 0.8114743506913038, "learning_rate": 9.548147988536706e-07, "loss": 4.5656, "step": 18840 }, { "epoch": 0.8123357884308912, "learning_rate": 9.547663168781916e-07, "loss": 4.2647, "step": 18860 }, { "epoch": 0.8131972261704785, "learning_rate": 9.547178349027127e-07, "loss": 4.3358, "step": 18880 }, { "epoch": 0.8140586639100659, "learning_rate": 9.54669352927234e-07, "loss": 4.2501, "step": 18900 }, { "epoch": 0.8149201016496532, "learning_rate": 9.54620870951755e-07, "loss": 4.6137, "step": 18920 }, { "epoch": 0.8157815393892407, "learning_rate": 9.545723889762761e-07, "loss": 4.5941, "step": 18940 }, { "epoch": 0.816642977128828, "learning_rate": 9.545239070007974e-07, "loss": 4.464, "step": 18960 }, { "epoch": 0.8175044148684154, "learning_rate": 9.544754250253183e-07, "loss": 4.3827, "step": 18980 }, { "epoch": 0.8183658526080028, "learning_rate": 9.544269430498396e-07, "loss": 4.62, "step": 19000 }, { "epoch": 0.8192272903475901, "learning_rate": 9.543784610743604e-07, "loss": 4.6739, "step": 19020 }, { "epoch": 0.8200887280871775, "learning_rate": 9.543299790988817e-07, "loss": 4.5649, "step": 19040 }, { "epoch": 0.8209501658267648, "learning_rate": 9.542814971234028e-07, "loss": 4.4442, "step": 19060 }, { "epoch": 0.8218116035663522, "learning_rate": 9.542330151479238e-07, "loss": 4.5496, "step": 19080 }, { "epoch": 0.8226730413059397, "learning_rate": 9.54184533172445e-07, "loss": 4.458, "step": 19100 }, { "epoch": 0.823534479045527, "learning_rate": 9.54136051196966e-07, "loss": 4.536, "step": 19120 }, { "epoch": 0.8243959167851144, "learning_rate": 9.540875692214873e-07, "loss": 4.6454, "step": 19140 }, { "epoch": 0.8252573545247017, "learning_rate": 9.540390872460081e-07, "loss": 4.4801, "step": 19160 }, { "epoch": 0.8261187922642891, "learning_rate": 9.539906052705294e-07, "loss": 4.5506, "step": 19180 }, { "epoch": 0.8269802300038764, "learning_rate": 9.539421232950505e-07, "loss": 4.4797, "step": 19200 }, { "epoch": 0.8278416677434638, "learning_rate": 9.538936413195716e-07, "loss": 4.5092, "step": 19220 }, { "epoch": 0.8287031054830513, "learning_rate": 9.538451593440926e-07, "loss": 4.4169, "step": 19240 }, { "epoch": 0.8295645432226386, "learning_rate": 9.537966773686137e-07, "loss": 4.7532, "step": 19260 }, { "epoch": 0.830425980962226, "learning_rate": 9.53748195393135e-07, "loss": 4.6437, "step": 19280 }, { "epoch": 0.8312874187018133, "learning_rate": 9.53699713417656e-07, "loss": 4.4116, "step": 19300 }, { "epoch": 0.8321488564414007, "learning_rate": 9.536512314421771e-07, "loss": 4.6395, "step": 19320 }, { "epoch": 0.833010294180988, "learning_rate": 9.536027494666983e-07, "loss": 4.5763, "step": 19340 }, { "epoch": 0.8338717319205754, "learning_rate": 9.535542674912193e-07, "loss": 4.6412, "step": 19360 }, { "epoch": 0.8347331696601629, "learning_rate": 9.535057855157404e-07, "loss": 4.4227, "step": 19380 }, { "epoch": 0.8355946073997502, "learning_rate": 9.534573035402615e-07, "loss": 4.6791, "step": 19400 }, { "epoch": 0.8364560451393376, "learning_rate": 9.534088215647826e-07, "loss": 4.5265, "step": 19420 }, { "epoch": 0.8373174828789249, "learning_rate": 9.533603395893038e-07, "loss": 4.5809, "step": 19440 }, { "epoch": 0.8381789206185123, "learning_rate": 9.533118576138249e-07, "loss": 4.7335, "step": 19460 }, { "epoch": 0.8390403583580996, "learning_rate": 9.532633756383459e-07, "loss": 4.6606, "step": 19480 }, { "epoch": 0.839901796097687, "learning_rate": 9.532148936628671e-07, "loss": 4.5205, "step": 19500 }, { "epoch": 0.8407632338372744, "learning_rate": 9.531664116873882e-07, "loss": 4.6531, "step": 19520 }, { "epoch": 0.8416246715768618, "learning_rate": 9.531179297119093e-07, "loss": 4.7872, "step": 19540 }, { "epoch": 0.8424861093164492, "learning_rate": 9.530694477364304e-07, "loss": 4.4518, "step": 19560 }, { "epoch": 0.8433475470560365, "learning_rate": 9.530209657609516e-07, "loss": 4.4122, "step": 19580 }, { "epoch": 0.8442089847956239, "learning_rate": 9.529724837854727e-07, "loss": 4.6403, "step": 19600 }, { "epoch": 0.8450704225352113, "learning_rate": 9.529240018099936e-07, "loss": 4.6208, "step": 19620 }, { "epoch": 0.8459318602747986, "learning_rate": 9.528755198345148e-07, "loss": 4.6772, "step": 19640 }, { "epoch": 0.846793298014386, "learning_rate": 9.528270378590359e-07, "loss": 4.2239, "step": 19660 }, { "epoch": 0.8476547357539734, "learning_rate": 9.52778555883557e-07, "loss": 4.4615, "step": 19680 }, { "epoch": 0.8485161734935608, "learning_rate": 9.527300739080781e-07, "loss": 4.4219, "step": 19700 }, { "epoch": 0.8493776112331481, "learning_rate": 9.526815919325993e-07, "loss": 4.5251, "step": 19720 }, { "epoch": 0.8502390489727355, "learning_rate": 9.526331099571202e-07, "loss": 4.4332, "step": 19740 }, { "epoch": 0.8511004867123229, "learning_rate": 9.525846279816414e-07, "loss": 4.7348, "step": 19760 }, { "epoch": 0.8519619244519102, "learning_rate": 9.525361460061625e-07, "loss": 4.4744, "step": 19780 }, { "epoch": 0.8528233621914976, "learning_rate": 9.524876640306837e-07, "loss": 4.482, "step": 19800 }, { "epoch": 0.8536847999310849, "learning_rate": 9.524391820552048e-07, "loss": 4.6764, "step": 19820 }, { "epoch": 0.8545462376706724, "learning_rate": 9.523907000797259e-07, "loss": 4.2579, "step": 19840 }, { "epoch": 0.8554076754102597, "learning_rate": 9.523422181042469e-07, "loss": 4.5412, "step": 19860 }, { "epoch": 0.8562691131498471, "learning_rate": 9.522937361287681e-07, "loss": 4.4961, "step": 19880 }, { "epoch": 0.8571305508894345, "learning_rate": 9.522452541532892e-07, "loss": 4.7074, "step": 19900 }, { "epoch": 0.8579919886290218, "learning_rate": 9.521967721778103e-07, "loss": 4.7198, "step": 19920 }, { "epoch": 0.8588534263686092, "learning_rate": 9.521482902023314e-07, "loss": 4.5957, "step": 19940 }, { "epoch": 0.8597148641081965, "learning_rate": 9.520998082268526e-07, "loss": 4.5261, "step": 19960 }, { "epoch": 0.860576301847784, "learning_rate": 9.520513262513736e-07, "loss": 4.2652, "step": 19980 }, { "epoch": 0.8614377395873714, "learning_rate": 9.520028442758947e-07, "loss": 4.3014, "step": 20000 }, { "epoch": 0.8622991773269587, "learning_rate": 9.519543623004158e-07, "loss": 4.617, "step": 20020 }, { "epoch": 0.8631606150665461, "learning_rate": 9.51905880324937e-07, "loss": 4.5581, "step": 20040 }, { "epoch": 0.8640220528061334, "learning_rate": 9.51857398349458e-07, "loss": 4.3092, "step": 20060 }, { "epoch": 0.8648834905457208, "learning_rate": 9.518089163739792e-07, "loss": 4.4433, "step": 20080 }, { "epoch": 0.8657449282853081, "learning_rate": 9.517604343985003e-07, "loss": 4.3439, "step": 20100 }, { "epoch": 0.8666063660248956, "learning_rate": 9.517119524230214e-07, "loss": 4.4046, "step": 20120 }, { "epoch": 0.867467803764483, "learning_rate": 9.516634704475424e-07, "loss": 4.4703, "step": 20140 }, { "epoch": 0.8683292415040703, "learning_rate": 9.516149884720635e-07, "loss": 4.0817, "step": 20160 }, { "epoch": 0.8691906792436577, "learning_rate": 9.515665064965847e-07, "loss": 4.6784, "step": 20180 }, { "epoch": 0.870052116983245, "learning_rate": 9.515180245211058e-07, "loss": 4.3892, "step": 20200 }, { "epoch": 0.8709135547228324, "learning_rate": 9.514695425456269e-07, "loss": 4.5129, "step": 20220 }, { "epoch": 0.8717749924624197, "learning_rate": 9.51421060570148e-07, "loss": 4.4788, "step": 20240 }, { "epoch": 0.8726364302020071, "learning_rate": 9.51372578594669e-07, "loss": 4.3852, "step": 20260 }, { "epoch": 0.8734978679415946, "learning_rate": 9.513240966191901e-07, "loss": 4.3911, "step": 20280 }, { "epoch": 0.8743593056811819, "learning_rate": 9.512756146437113e-07, "loss": 4.4513, "step": 20300 }, { "epoch": 0.8752207434207693, "learning_rate": 9.512271326682324e-07, "loss": 4.5099, "step": 20320 }, { "epoch": 0.8760821811603566, "learning_rate": 9.511786506927536e-07, "loss": 4.4488, "step": 20340 }, { "epoch": 0.876943618899944, "learning_rate": 9.511301687172747e-07, "loss": 4.4457, "step": 20360 }, { "epoch": 0.8778050566395313, "learning_rate": 9.510816867417957e-07, "loss": 4.4634, "step": 20380 }, { "epoch": 0.8786664943791187, "learning_rate": 9.510332047663168e-07, "loss": 4.4281, "step": 20400 }, { "epoch": 0.8795279321187062, "learning_rate": 9.50984722790838e-07, "loss": 4.7259, "step": 20420 }, { "epoch": 0.8803893698582935, "learning_rate": 9.50936240815359e-07, "loss": 4.4135, "step": 20440 }, { "epoch": 0.8812508075978809, "learning_rate": 9.508877588398802e-07, "loss": 4.6404, "step": 20460 }, { "epoch": 0.8821122453374682, "learning_rate": 9.508392768644013e-07, "loss": 4.4357, "step": 20480 }, { "epoch": 0.8829736830770556, "learning_rate": 9.507907948889224e-07, "loss": 4.4953, "step": 20500 }, { "epoch": 0.883835120816643, "learning_rate": 9.507423129134434e-07, "loss": 4.3235, "step": 20520 }, { "epoch": 0.8846965585562303, "learning_rate": 9.506938309379646e-07, "loss": 4.5058, "step": 20540 }, { "epoch": 0.8855579962958178, "learning_rate": 9.506453489624857e-07, "loss": 4.6422, "step": 20560 }, { "epoch": 0.8864194340354051, "learning_rate": 9.505968669870069e-07, "loss": 4.5657, "step": 20580 }, { "epoch": 0.8872808717749925, "learning_rate": 9.505483850115279e-07, "loss": 4.1617, "step": 20600 }, { "epoch": 0.8881423095145798, "learning_rate": 9.50499903036049e-07, "loss": 4.4965, "step": 20620 }, { "epoch": 0.8890037472541672, "learning_rate": 9.504514210605701e-07, "loss": 4.512, "step": 20640 }, { "epoch": 0.8898651849937546, "learning_rate": 9.504029390850912e-07, "loss": 4.5665, "step": 20660 }, { "epoch": 0.8907266227333419, "learning_rate": 9.503544571096123e-07, "loss": 4.6343, "step": 20680 }, { "epoch": 0.8915880604729293, "learning_rate": 9.503059751341334e-07, "loss": 4.5786, "step": 20700 }, { "epoch": 0.8924494982125167, "learning_rate": 9.502574931586546e-07, "loss": 4.2518, "step": 20720 }, { "epoch": 0.8933109359521041, "learning_rate": 9.502090111831756e-07, "loss": 4.3461, "step": 20740 }, { "epoch": 0.8941723736916914, "learning_rate": 9.501605292076967e-07, "loss": 4.6709, "step": 20760 }, { "epoch": 0.8950338114312788, "learning_rate": 9.501120472322178e-07, "loss": 4.441, "step": 20780 }, { "epoch": 0.8958952491708662, "learning_rate": 9.50063565256739e-07, "loss": 4.4467, "step": 20800 }, { "epoch": 0.8967566869104535, "learning_rate": 9.5001508328126e-07, "loss": 4.5218, "step": 20820 }, { "epoch": 0.8976181246500409, "learning_rate": 9.499666013057812e-07, "loss": 4.2716, "step": 20840 }, { "epoch": 0.8984795623896283, "learning_rate": 9.499181193303023e-07, "loss": 4.531, "step": 20860 }, { "epoch": 0.8993410001292157, "learning_rate": 9.498696373548233e-07, "loss": 4.4809, "step": 20880 }, { "epoch": 0.900202437868803, "learning_rate": 9.498211553793444e-07, "loss": 4.5645, "step": 20900 }, { "epoch": 0.9010638756083904, "learning_rate": 9.497726734038656e-07, "loss": 4.3924, "step": 20920 }, { "epoch": 0.9019253133479778, "learning_rate": 9.497241914283867e-07, "loss": 4.5356, "step": 20940 }, { "epoch": 0.9027867510875651, "learning_rate": 9.496757094529078e-07, "loss": 4.1991, "step": 20960 }, { "epoch": 0.9036481888271525, "learning_rate": 9.496272274774289e-07, "loss": 4.4928, "step": 20980 }, { "epoch": 0.9045096265667399, "learning_rate": 9.4957874550195e-07, "loss": 4.6196, "step": 21000 }, { "epoch": 0.9053710643063273, "learning_rate": 9.495302635264711e-07, "loss": 4.2559, "step": 21020 }, { "epoch": 0.9062325020459147, "learning_rate": 9.494817815509922e-07, "loss": 4.6048, "step": 21040 }, { "epoch": 0.907093939785502, "learning_rate": 9.494332995755133e-07, "loss": 4.6164, "step": 21060 }, { "epoch": 0.9079553775250894, "learning_rate": 9.493848176000345e-07, "loss": 4.4236, "step": 21080 }, { "epoch": 0.9088168152646767, "learning_rate": 9.493363356245556e-07, "loss": 4.4766, "step": 21100 }, { "epoch": 0.9096782530042641, "learning_rate": 9.492878536490767e-07, "loss": 4.457, "step": 21120 }, { "epoch": 0.9105396907438514, "learning_rate": 9.492393716735977e-07, "loss": 4.4613, "step": 21140 }, { "epoch": 0.9114011284834389, "learning_rate": 9.491908896981189e-07, "loss": 4.3311, "step": 21160 }, { "epoch": 0.9122625662230263, "learning_rate": 9.4914240772264e-07, "loss": 4.4791, "step": 21180 }, { "epoch": 0.9131240039626136, "learning_rate": 9.49093925747161e-07, "loss": 4.5871, "step": 21200 }, { "epoch": 0.913985441702201, "learning_rate": 9.490454437716822e-07, "loss": 4.5684, "step": 21220 }, { "epoch": 0.9148468794417883, "learning_rate": 9.489969617962033e-07, "loss": 4.5018, "step": 21240 }, { "epoch": 0.9157083171813757, "learning_rate": 9.489484798207243e-07, "loss": 4.4402, "step": 21260 }, { "epoch": 0.916569754920963, "learning_rate": 9.488999978452454e-07, "loss": 4.5509, "step": 21280 }, { "epoch": 0.9174311926605505, "learning_rate": 9.488515158697666e-07, "loss": 4.4623, "step": 21300 }, { "epoch": 0.9182926304001379, "learning_rate": 9.488030338942877e-07, "loss": 4.6452, "step": 21320 }, { "epoch": 0.9191540681397252, "learning_rate": 9.487545519188088e-07, "loss": 4.3545, "step": 21340 }, { "epoch": 0.9200155058793126, "learning_rate": 9.487060699433299e-07, "loss": 4.6088, "step": 21360 }, { "epoch": 0.9208769436188999, "learning_rate": 9.486575879678511e-07, "loss": 4.4155, "step": 21380 }, { "epoch": 0.9217383813584873, "learning_rate": 9.486091059923721e-07, "loss": 4.4004, "step": 21400 }, { "epoch": 0.9225998190980746, "learning_rate": 9.485606240168932e-07, "loss": 4.5805, "step": 21420 }, { "epoch": 0.923461256837662, "learning_rate": 9.485121420414143e-07, "loss": 4.545, "step": 21440 }, { "epoch": 0.9243226945772495, "learning_rate": 9.484636600659355e-07, "loss": 4.4467, "step": 21460 }, { "epoch": 0.9251841323168368, "learning_rate": 9.484151780904566e-07, "loss": 4.47, "step": 21480 }, { "epoch": 0.9260455700564242, "learning_rate": 9.483666961149777e-07, "loss": 4.7662, "step": 21500 }, { "epoch": 0.9269070077960115, "learning_rate": 9.483182141394986e-07, "loss": 4.3187, "step": 21520 }, { "epoch": 0.9277684455355989, "learning_rate": 9.482697321640199e-07, "loss": 4.8051, "step": 21540 }, { "epoch": 0.9286298832751863, "learning_rate": 9.482212501885409e-07, "loss": 4.5825, "step": 21560 }, { "epoch": 0.9294913210147736, "learning_rate": 9.481727682130621e-07, "loss": 4.3509, "step": 21580 }, { "epoch": 0.9303527587543611, "learning_rate": 9.481242862375832e-07, "loss": 4.3702, "step": 21600 }, { "epoch": 0.9312141964939484, "learning_rate": 9.480758042621044e-07, "loss": 4.6154, "step": 21620 }, { "epoch": 0.9320756342335358, "learning_rate": 9.480273222866253e-07, "loss": 4.7423, "step": 21640 }, { "epoch": 0.9329370719731231, "learning_rate": 9.479788403111465e-07, "loss": 4.494, "step": 21660 }, { "epoch": 0.9337985097127105, "learning_rate": 9.479303583356676e-07, "loss": 4.8994, "step": 21680 }, { "epoch": 0.9346599474522979, "learning_rate": 9.478818763601888e-07, "loss": 4.5079, "step": 21700 }, { "epoch": 0.9355213851918852, "learning_rate": 9.478333943847098e-07, "loss": 4.4827, "step": 21720 }, { "epoch": 0.9363828229314727, "learning_rate": 9.47784912409231e-07, "loss": 4.5921, "step": 21740 }, { "epoch": 0.93724426067106, "learning_rate": 9.477364304337521e-07, "loss": 4.4418, "step": 21760 }, { "epoch": 0.9381056984106474, "learning_rate": 9.47687948458273e-07, "loss": 4.5204, "step": 21780 }, { "epoch": 0.9389671361502347, "learning_rate": 9.476394664827942e-07, "loss": 4.6105, "step": 21800 }, { "epoch": 0.9398285738898221, "learning_rate": 9.475909845073153e-07, "loss": 4.6572, "step": 21820 }, { "epoch": 0.9406900116294095, "learning_rate": 9.475425025318365e-07, "loss": 4.33, "step": 21840 }, { "epoch": 0.9415514493689968, "learning_rate": 9.474940205563575e-07, "loss": 4.5902, "step": 21860 }, { "epoch": 0.9424128871085842, "learning_rate": 9.474455385808787e-07, "loss": 4.3756, "step": 21880 }, { "epoch": 0.9432743248481716, "learning_rate": 9.473970566053997e-07, "loss": 4.5468, "step": 21900 }, { "epoch": 0.944135762587759, "learning_rate": 9.473485746299209e-07, "loss": 4.4465, "step": 21920 }, { "epoch": 0.9449972003273464, "learning_rate": 9.473000926544419e-07, "loss": 4.6516, "step": 21940 }, { "epoch": 0.9458586380669337, "learning_rate": 9.472516106789631e-07, "loss": 4.328, "step": 21960 }, { "epoch": 0.9467200758065211, "learning_rate": 9.472031287034842e-07, "loss": 4.6072, "step": 21980 }, { "epoch": 0.9475815135461084, "learning_rate": 9.471546467280054e-07, "loss": 4.2032, "step": 22000 }, { "epoch": 0.9484429512856958, "learning_rate": 9.471061647525264e-07, "loss": 4.3651, "step": 22020 }, { "epoch": 0.9493043890252832, "learning_rate": 9.470576827770474e-07, "loss": 4.263, "step": 22040 }, { "epoch": 0.9501658267648706, "learning_rate": 9.470092008015686e-07, "loss": 4.6553, "step": 22060 }, { "epoch": 0.951027264504458, "learning_rate": 9.469607188260898e-07, "loss": 4.4842, "step": 22080 }, { "epoch": 0.9518887022440453, "learning_rate": 9.469122368506108e-07, "loss": 4.344, "step": 22100 }, { "epoch": 0.9527501399836327, "learning_rate": 9.46863754875132e-07, "loss": 4.4443, "step": 22120 }, { "epoch": 0.95361157772322, "learning_rate": 9.468152728996532e-07, "loss": 4.4187, "step": 22140 }, { "epoch": 0.9544730154628074, "learning_rate": 9.467667909241741e-07, "loss": 4.3837, "step": 22160 }, { "epoch": 0.9553344532023949, "learning_rate": 9.467183089486952e-07, "loss": 4.6523, "step": 22180 }, { "epoch": 0.9561958909419822, "learning_rate": 9.466698269732164e-07, "loss": 4.3209, "step": 22200 }, { "epoch": 0.9570573286815696, "learning_rate": 9.466213449977375e-07, "loss": 4.6185, "step": 22220 }, { "epoch": 0.9579187664211569, "learning_rate": 9.465728630222586e-07, "loss": 4.5998, "step": 22240 }, { "epoch": 0.9587802041607443, "learning_rate": 9.465243810467797e-07, "loss": 4.5501, "step": 22260 }, { "epoch": 0.9596416419003316, "learning_rate": 9.464758990713008e-07, "loss": 4.3369, "step": 22280 }, { "epoch": 0.960503079639919, "learning_rate": 9.464274170958219e-07, "loss": 4.3487, "step": 22300 }, { "epoch": 0.9613645173795063, "learning_rate": 9.463789351203429e-07, "loss": 4.4979, "step": 22320 }, { "epoch": 0.9622259551190938, "learning_rate": 9.463304531448641e-07, "loss": 4.482, "step": 22340 }, { "epoch": 0.9630873928586812, "learning_rate": 9.462819711693852e-07, "loss": 4.5562, "step": 22360 }, { "epoch": 0.9639488305982685, "learning_rate": 9.462334891939064e-07, "loss": 4.6254, "step": 22380 }, { "epoch": 0.9648102683378559, "learning_rate": 9.461850072184274e-07, "loss": 4.442, "step": 22400 }, { "epoch": 0.9656717060774432, "learning_rate": 9.461365252429485e-07, "loss": 4.4808, "step": 22420 }, { "epoch": 0.9665331438170306, "learning_rate": 9.460880432674696e-07, "loss": 4.3537, "step": 22440 }, { "epoch": 0.967394581556618, "learning_rate": 9.460395612919907e-07, "loss": 4.4404, "step": 22460 }, { "epoch": 0.9682560192962054, "learning_rate": 9.459910793165118e-07, "loss": 4.2819, "step": 22480 }, { "epoch": 0.9691174570357928, "learning_rate": 9.45942597341033e-07, "loss": 4.2377, "step": 22500 }, { "epoch": 0.9699788947753801, "learning_rate": 9.458941153655541e-07, "loss": 4.4839, "step": 22520 }, { "epoch": 0.9708403325149675, "learning_rate": 9.458456333900751e-07, "loss": 4.4019, "step": 22540 }, { "epoch": 0.9717017702545548, "learning_rate": 9.457971514145962e-07, "loss": 4.5467, "step": 22560 }, { "epoch": 0.9725632079941422, "learning_rate": 9.457486694391174e-07, "loss": 4.3238, "step": 22580 }, { "epoch": 0.9734246457337296, "learning_rate": 9.457001874636385e-07, "loss": 4.4663, "step": 22600 }, { "epoch": 0.9742860834733169, "learning_rate": 9.456517054881596e-07, "loss": 4.4383, "step": 22620 }, { "epoch": 0.9751475212129044, "learning_rate": 9.456032235126807e-07, "loss": 4.5796, "step": 22640 }, { "epoch": 0.9760089589524917, "learning_rate": 9.455547415372018e-07, "loss": 4.4543, "step": 22660 }, { "epoch": 0.9768703966920791, "learning_rate": 9.455062595617229e-07, "loss": 4.3599, "step": 22680 }, { "epoch": 0.9777318344316664, "learning_rate": 9.45457777586244e-07, "loss": 4.5134, "step": 22700 }, { "epoch": 0.9785932721712538, "learning_rate": 9.454092956107651e-07, "loss": 4.5121, "step": 22720 }, { "epoch": 0.9794547099108412, "learning_rate": 9.453608136352863e-07, "loss": 4.6245, "step": 22740 }, { "epoch": 0.9803161476504285, "learning_rate": 9.453123316598073e-07, "loss": 4.4596, "step": 22760 }, { "epoch": 0.981177585390016, "learning_rate": 9.452638496843284e-07, "loss": 4.7281, "step": 22780 }, { "epoch": 0.9820390231296033, "learning_rate": 9.452153677088495e-07, "loss": 4.4649, "step": 22800 }, { "epoch": 0.9829004608691907, "learning_rate": 9.451668857333707e-07, "loss": 4.5645, "step": 22820 }, { "epoch": 0.983761898608778, "learning_rate": 9.451184037578917e-07, "loss": 4.5543, "step": 22840 }, { "epoch": 0.9846233363483654, "learning_rate": 9.450699217824128e-07, "loss": 4.7059, "step": 22860 }, { "epoch": 0.9854847740879528, "learning_rate": 9.45021439806934e-07, "loss": 4.5437, "step": 22880 }, { "epoch": 0.9863462118275401, "learning_rate": 9.449729578314551e-07, "loss": 4.3082, "step": 22900 }, { "epoch": 0.9872076495671276, "learning_rate": 9.449244758559761e-07, "loss": 4.5584, "step": 22920 }, { "epoch": 0.9880690873067149, "learning_rate": 9.448759938804972e-07, "loss": 4.5198, "step": 22940 }, { "epoch": 0.9889305250463023, "learning_rate": 9.448275119050184e-07, "loss": 4.632, "step": 22960 }, { "epoch": 0.9897919627858897, "learning_rate": 9.447790299295395e-07, "loss": 4.5563, "step": 22980 }, { "epoch": 0.990653400525477, "learning_rate": 9.447305479540606e-07, "loss": 4.8115, "step": 23000 }, { "epoch": 0.9915148382650644, "learning_rate": 9.446820659785817e-07, "loss": 4.5416, "step": 23020 }, { "epoch": 0.9923762760046517, "learning_rate": 9.446335840031028e-07, "loss": 4.1979, "step": 23040 }, { "epoch": 0.9932377137442391, "learning_rate": 9.445851020276238e-07, "loss": 4.4413, "step": 23060 }, { "epoch": 0.9940991514838265, "learning_rate": 9.44536620052145e-07, "loss": 4.4993, "step": 23080 }, { "epoch": 0.9949605892234139, "learning_rate": 9.444881380766661e-07, "loss": 4.4913, "step": 23100 }, { "epoch": 0.9958220269630013, "learning_rate": 9.444396561011873e-07, "loss": 4.3746, "step": 23120 }, { "epoch": 0.9966834647025886, "learning_rate": 9.443911741257083e-07, "loss": 4.6342, "step": 23140 }, { "epoch": 0.997544902442176, "learning_rate": 9.443426921502295e-07, "loss": 4.4272, "step": 23160 }, { "epoch": 0.9984063401817633, "learning_rate": 9.442942101747505e-07, "loss": 4.6873, "step": 23180 }, { "epoch": 0.9992677779213507, "learning_rate": 9.442457281992717e-07, "loss": 4.4283, "step": 23200 }, { "epoch": 1.000129215660938, "learning_rate": 9.441972462237927e-07, "loss": 4.5882, "step": 23220 }, { "epoch": 1.0009906534005255, "learning_rate": 9.441487642483139e-07, "loss": 4.5283, "step": 23240 }, { "epoch": 1.0018520911401128, "learning_rate": 9.44100282272835e-07, "loss": 4.2976, "step": 23260 }, { "epoch": 1.0027135288797002, "learning_rate": 9.440518002973562e-07, "loss": 4.3969, "step": 23280 }, { "epoch": 1.0035749666192877, "learning_rate": 9.44003318321877e-07, "loss": 4.3359, "step": 23300 }, { "epoch": 1.004436404358875, "learning_rate": 9.439548363463983e-07, "loss": 4.5208, "step": 23320 }, { "epoch": 1.0052978420984624, "learning_rate": 9.439063543709194e-07, "loss": 4.3077, "step": 23340 }, { "epoch": 1.0061592798380496, "learning_rate": 9.438578723954406e-07, "loss": 4.5879, "step": 23360 }, { "epoch": 1.0070207175776371, "learning_rate": 9.438093904199616e-07, "loss": 4.5573, "step": 23380 }, { "epoch": 1.0078821553172244, "learning_rate": 9.437609084444828e-07, "loss": 4.6482, "step": 23400 }, { "epoch": 1.0087435930568118, "learning_rate": 9.437124264690038e-07, "loss": 4.4084, "step": 23420 }, { "epoch": 1.0096050307963993, "learning_rate": 9.436639444935248e-07, "loss": 4.4637, "step": 23440 }, { "epoch": 1.0104664685359865, "learning_rate": 9.43615462518046e-07, "loss": 4.5866, "step": 23460 }, { "epoch": 1.011327906275574, "learning_rate": 9.435669805425671e-07, "loss": 4.5344, "step": 23480 }, { "epoch": 1.0121893440151613, "learning_rate": 9.435184985670883e-07, "loss": 4.4895, "step": 23500 }, { "epoch": 1.0130507817547487, "learning_rate": 9.434700165916093e-07, "loss": 4.4187, "step": 23520 }, { "epoch": 1.013912219494336, "learning_rate": 9.434215346161305e-07, "loss": 4.5773, "step": 23540 }, { "epoch": 1.0147736572339234, "learning_rate": 9.433730526406515e-07, "loss": 4.7074, "step": 23560 }, { "epoch": 1.015635094973511, "learning_rate": 9.433245706651727e-07, "loss": 4.5204, "step": 23580 }, { "epoch": 1.0164965327130981, "learning_rate": 9.432760886896937e-07, "loss": 4.3078, "step": 23600 }, { "epoch": 1.0173579704526856, "learning_rate": 9.432276067142149e-07, "loss": 4.3662, "step": 23620 }, { "epoch": 1.0182194081922729, "learning_rate": 9.43179124738736e-07, "loss": 4.4802, "step": 23640 }, { "epoch": 1.0190808459318603, "learning_rate": 9.431306427632572e-07, "loss": 4.5543, "step": 23660 }, { "epoch": 1.0199422836714476, "learning_rate": 9.430821607877781e-07, "loss": 4.5778, "step": 23680 }, { "epoch": 1.020803721411035, "learning_rate": 9.430336788122993e-07, "loss": 4.2533, "step": 23700 }, { "epoch": 1.0216651591506223, "learning_rate": 9.429851968368204e-07, "loss": 4.6842, "step": 23720 }, { "epoch": 1.0225265968902097, "learning_rate": 9.429367148613415e-07, "loss": 4.3704, "step": 23740 }, { "epoch": 1.0233880346297972, "learning_rate": 9.428882328858626e-07, "loss": 4.5711, "step": 23760 }, { "epoch": 1.0242494723693845, "learning_rate": 9.428397509103838e-07, "loss": 4.7231, "step": 23780 }, { "epoch": 1.025110910108972, "learning_rate": 9.427912689349049e-07, "loss": 4.5228, "step": 23800 }, { "epoch": 1.0259723478485592, "learning_rate": 9.427427869594258e-07, "loss": 4.4926, "step": 23820 }, { "epoch": 1.0268337855881466, "learning_rate": 9.42694304983947e-07, "loss": 4.3109, "step": 23840 }, { "epoch": 1.0276952233277339, "learning_rate": 9.426458230084681e-07, "loss": 4.5105, "step": 23860 }, { "epoch": 1.0285566610673214, "learning_rate": 9.425973410329893e-07, "loss": 4.3662, "step": 23880 }, { "epoch": 1.0294180988069088, "learning_rate": 9.425488590575104e-07, "loss": 4.3859, "step": 23900 }, { "epoch": 1.030279536546496, "learning_rate": 9.425003770820316e-07, "loss": 4.4111, "step": 23920 }, { "epoch": 1.0311409742860835, "learning_rate": 9.424518951065525e-07, "loss": 4.253, "step": 23940 }, { "epoch": 1.0320024120256708, "learning_rate": 9.424034131310737e-07, "loss": 4.5461, "step": 23960 }, { "epoch": 1.0328638497652582, "learning_rate": 9.423549311555947e-07, "loss": 4.4709, "step": 23980 }, { "epoch": 1.0337252875048455, "learning_rate": 9.423064491801159e-07, "loss": 4.376, "step": 24000 }, { "epoch": 1.034586725244433, "learning_rate": 9.42257967204637e-07, "loss": 4.4641, "step": 24020 }, { "epoch": 1.0354481629840204, "learning_rate": 9.422094852291581e-07, "loss": 4.2648, "step": 24040 }, { "epoch": 1.0363096007236077, "learning_rate": 9.421610032536791e-07, "loss": 4.428, "step": 24060 }, { "epoch": 1.0371710384631951, "learning_rate": 9.421125212782003e-07, "loss": 4.6883, "step": 24080 }, { "epoch": 1.0380324762027824, "learning_rate": 9.420640393027214e-07, "loss": 4.4379, "step": 24100 }, { "epoch": 1.0388939139423699, "learning_rate": 9.420155573272425e-07, "loss": 4.3833, "step": 24120 }, { "epoch": 1.039755351681957, "learning_rate": 9.419670753517636e-07, "loss": 4.1903, "step": 24140 }, { "epoch": 1.0406167894215446, "learning_rate": 9.419185933762848e-07, "loss": 4.5037, "step": 24160 }, { "epoch": 1.041478227161132, "learning_rate": 9.418701114008059e-07, "loss": 4.5972, "step": 24180 }, { "epoch": 1.0423396649007193, "learning_rate": 9.418216294253269e-07, "loss": 4.4044, "step": 24200 }, { "epoch": 1.0432011026403067, "learning_rate": 9.41773147449848e-07, "loss": 4.3715, "step": 24220 }, { "epoch": 1.044062540379894, "learning_rate": 9.417246654743692e-07, "loss": 4.5498, "step": 24240 }, { "epoch": 1.0449239781194815, "learning_rate": 9.416761834988903e-07, "loss": 4.237, "step": 24260 }, { "epoch": 1.0457854158590687, "learning_rate": 9.416277015234114e-07, "loss": 4.4666, "step": 24280 }, { "epoch": 1.0466468535986562, "learning_rate": 9.415792195479325e-07, "loss": 4.4463, "step": 24300 }, { "epoch": 1.0475082913382436, "learning_rate": 9.415307375724536e-07, "loss": 4.361, "step": 24320 }, { "epoch": 1.0483697290778309, "learning_rate": 9.414822555969746e-07, "loss": 4.6172, "step": 24340 }, { "epoch": 1.0492311668174183, "learning_rate": 9.414337736214958e-07, "loss": 4.5976, "step": 24360 }, { "epoch": 1.0500926045570056, "learning_rate": 9.413852916460169e-07, "loss": 4.3985, "step": 24380 }, { "epoch": 1.050954042296593, "learning_rate": 9.41336809670538e-07, "loss": 4.3975, "step": 24400 }, { "epoch": 1.0518154800361803, "learning_rate": 9.412883276950591e-07, "loss": 4.5018, "step": 24420 }, { "epoch": 1.0526769177757678, "learning_rate": 9.412398457195802e-07, "loss": 4.5551, "step": 24440 }, { "epoch": 1.053538355515355, "learning_rate": 9.411913637441013e-07, "loss": 4.5351, "step": 24460 }, { "epoch": 1.0543997932549425, "learning_rate": 9.411428817686224e-07, "loss": 4.4579, "step": 24480 }, { "epoch": 1.05526123099453, "learning_rate": 9.410943997931435e-07, "loss": 4.5496, "step": 24500 }, { "epoch": 1.0561226687341172, "learning_rate": 9.410459178176646e-07, "loss": 4.5997, "step": 24520 }, { "epoch": 1.0569841064737047, "learning_rate": 9.409974358421858e-07, "loss": 4.5158, "step": 24540 }, { "epoch": 1.057845544213292, "learning_rate": 9.409489538667069e-07, "loss": 4.3401, "step": 24560 }, { "epoch": 1.0587069819528794, "learning_rate": 9.409004718912279e-07, "loss": 4.6352, "step": 24580 }, { "epoch": 1.0595684196924666, "learning_rate": 9.40851989915749e-07, "loss": 4.3259, "step": 24600 }, { "epoch": 1.060429857432054, "learning_rate": 9.408035079402702e-07, "loss": 4.3454, "step": 24620 }, { "epoch": 1.0612912951716416, "learning_rate": 9.407550259647912e-07, "loss": 4.6752, "step": 24640 }, { "epoch": 1.0621527329112288, "learning_rate": 9.407065439893124e-07, "loss": 4.4029, "step": 24660 }, { "epoch": 1.0630141706508163, "learning_rate": 9.406580620138335e-07, "loss": 4.3, "step": 24680 }, { "epoch": 1.0638756083904035, "learning_rate": 9.406095800383546e-07, "loss": 4.6179, "step": 24700 }, { "epoch": 1.064737046129991, "learning_rate": 9.405610980628756e-07, "loss": 4.2334, "step": 24720 }, { "epoch": 1.0655984838695782, "learning_rate": 9.405126160873968e-07, "loss": 4.1275, "step": 24740 }, { "epoch": 1.0664599216091657, "learning_rate": 9.404641341119179e-07, "loss": 4.4202, "step": 24760 }, { "epoch": 1.0673213593487532, "learning_rate": 9.404156521364391e-07, "loss": 4.3064, "step": 24780 }, { "epoch": 1.0681827970883404, "learning_rate": 9.403671701609601e-07, "loss": 4.3091, "step": 24800 }, { "epoch": 1.0690442348279279, "learning_rate": 9.403186881854812e-07, "loss": 4.3217, "step": 24820 }, { "epoch": 1.0699056725675151, "learning_rate": 9.402702062100023e-07, "loss": 4.5031, "step": 24840 }, { "epoch": 1.0707671103071026, "learning_rate": 9.402217242345235e-07, "loss": 4.4397, "step": 24860 }, { "epoch": 1.0716285480466898, "learning_rate": 9.401732422590445e-07, "loss": 4.4409, "step": 24880 }, { "epoch": 1.0724899857862773, "learning_rate": 9.401247602835657e-07, "loss": 4.5023, "step": 24900 }, { "epoch": 1.0733514235258648, "learning_rate": 9.400762783080868e-07, "loss": 4.3031, "step": 24920 }, { "epoch": 1.074212861265452, "learning_rate": 9.400277963326078e-07, "loss": 4.4672, "step": 24940 }, { "epoch": 1.0750742990050395, "learning_rate": 9.399793143571289e-07, "loss": 4.6026, "step": 24960 }, { "epoch": 1.0759357367446267, "learning_rate": 9.399308323816501e-07, "loss": 4.4296, "step": 24980 }, { "epoch": 1.0767971744842142, "learning_rate": 9.398823504061712e-07, "loss": 4.4177, "step": 25000 }, { "epoch": 1.0776586122238014, "learning_rate": 9.398338684306922e-07, "loss": 4.5838, "step": 25020 }, { "epoch": 1.078520049963389, "learning_rate": 9.397853864552134e-07, "loss": 4.3385, "step": 25040 }, { "epoch": 1.0793814877029764, "learning_rate": 9.397369044797345e-07, "loss": 4.4873, "step": 25060 }, { "epoch": 1.0802429254425636, "learning_rate": 9.396884225042555e-07, "loss": 4.6927, "step": 25080 }, { "epoch": 1.081104363182151, "learning_rate": 9.396399405287766e-07, "loss": 4.1841, "step": 25100 }, { "epoch": 1.0819658009217383, "learning_rate": 9.395914585532978e-07, "loss": 4.4341, "step": 25120 }, { "epoch": 1.0828272386613258, "learning_rate": 9.395429765778189e-07, "loss": 4.3862, "step": 25140 }, { "epoch": 1.083688676400913, "learning_rate": 9.394944946023401e-07, "loss": 4.5278, "step": 25160 }, { "epoch": 1.0845501141405005, "learning_rate": 9.394460126268612e-07, "loss": 4.3656, "step": 25180 }, { "epoch": 1.085411551880088, "learning_rate": 9.393975306513822e-07, "loss": 4.3609, "step": 25200 }, { "epoch": 1.0862729896196752, "learning_rate": 9.393490486759033e-07, "loss": 4.432, "step": 25220 }, { "epoch": 1.0871344273592627, "learning_rate": 9.393005667004245e-07, "loss": 4.4633, "step": 25240 }, { "epoch": 1.08799586509885, "learning_rate": 9.392520847249455e-07, "loss": 4.3571, "step": 25260 }, { "epoch": 1.0888573028384374, "learning_rate": 9.392036027494667e-07, "loss": 4.3883, "step": 25280 }, { "epoch": 1.0897187405780246, "learning_rate": 9.391551207739878e-07, "loss": 4.5822, "step": 25300 }, { "epoch": 1.0905801783176121, "learning_rate": 9.39106638798509e-07, "loss": 4.416, "step": 25320 }, { "epoch": 1.0914416160571996, "learning_rate": 9.390581568230299e-07, "loss": 4.3431, "step": 25340 }, { "epoch": 1.0923030537967868, "learning_rate": 9.390096748475511e-07, "loss": 4.5583, "step": 25360 }, { "epoch": 1.0931644915363743, "learning_rate": 9.389611928720722e-07, "loss": 4.3422, "step": 25380 }, { "epoch": 1.0940259292759615, "learning_rate": 9.389127108965933e-07, "loss": 4.4077, "step": 25400 }, { "epoch": 1.094887367015549, "learning_rate": 9.388642289211144e-07, "loss": 4.4079, "step": 25420 }, { "epoch": 1.0957488047551363, "learning_rate": 9.388157469456356e-07, "loss": 4.581, "step": 25440 }, { "epoch": 1.0966102424947237, "learning_rate": 9.387672649701566e-07, "loss": 4.3695, "step": 25460 }, { "epoch": 1.097471680234311, "learning_rate": 9.387187829946776e-07, "loss": 4.2777, "step": 25480 }, { "epoch": 1.0983331179738984, "learning_rate": 9.386703010191988e-07, "loss": 4.5497, "step": 25500 }, { "epoch": 1.099194555713486, "learning_rate": 9.3862181904372e-07, "loss": 4.4497, "step": 25520 }, { "epoch": 1.1000559934530731, "learning_rate": 9.38573337068241e-07, "loss": 4.3623, "step": 25540 }, { "epoch": 1.1009174311926606, "learning_rate": 9.385248550927621e-07, "loss": 4.609, "step": 25560 }, { "epoch": 1.1017788689322479, "learning_rate": 9.384763731172833e-07, "loss": 4.2504, "step": 25580 }, { "epoch": 1.1026403066718353, "learning_rate": 9.384278911418043e-07, "loss": 4.5254, "step": 25600 }, { "epoch": 1.1035017444114226, "learning_rate": 9.383794091663254e-07, "loss": 4.4802, "step": 25620 }, { "epoch": 1.10436318215101, "learning_rate": 9.383309271908465e-07, "loss": 4.5648, "step": 25640 }, { "epoch": 1.1052246198905975, "learning_rate": 9.382824452153677e-07, "loss": 4.4582, "step": 25660 }, { "epoch": 1.1060860576301847, "learning_rate": 9.382339632398888e-07, "loss": 4.5596, "step": 25680 }, { "epoch": 1.1069474953697722, "learning_rate": 9.3818548126441e-07, "loss": 4.4884, "step": 25700 }, { "epoch": 1.1078089331093595, "learning_rate": 9.381369992889309e-07, "loss": 4.3298, "step": 25720 }, { "epoch": 1.108670370848947, "learning_rate": 9.380885173134521e-07, "loss": 4.3468, "step": 25740 }, { "epoch": 1.1095318085885342, "learning_rate": 9.380400353379732e-07, "loss": 4.3481, "step": 25760 }, { "epoch": 1.1103932463281216, "learning_rate": 9.379915533624943e-07, "loss": 4.4193, "step": 25780 }, { "epoch": 1.111254684067709, "learning_rate": 9.379430713870154e-07, "loss": 4.4418, "step": 25800 }, { "epoch": 1.1121161218072964, "learning_rate": 9.378945894115366e-07, "loss": 4.4261, "step": 25820 }, { "epoch": 1.1129775595468838, "learning_rate": 9.378461074360575e-07, "loss": 4.4694, "step": 25840 }, { "epoch": 1.113838997286471, "learning_rate": 9.377976254605787e-07, "loss": 4.3937, "step": 25860 }, { "epoch": 1.1147004350260585, "learning_rate": 9.377491434850998e-07, "loss": 4.2992, "step": 25880 }, { "epoch": 1.1155618727656458, "learning_rate": 9.37700661509621e-07, "loss": 4.5451, "step": 25900 }, { "epoch": 1.1164233105052332, "learning_rate": 9.37652179534142e-07, "loss": 4.4103, "step": 25920 }, { "epoch": 1.1172847482448205, "learning_rate": 9.376036975586632e-07, "loss": 4.3582, "step": 25940 }, { "epoch": 1.118146185984408, "learning_rate": 9.375552155831843e-07, "loss": 4.3502, "step": 25960 }, { "epoch": 1.1190076237239954, "learning_rate": 9.375067336077054e-07, "loss": 4.4556, "step": 25980 }, { "epoch": 1.1198690614635827, "learning_rate": 9.374582516322264e-07, "loss": 4.8482, "step": 26000 }, { "epoch": 1.1207304992031701, "learning_rate": 9.374097696567476e-07, "loss": 4.4736, "step": 26020 }, { "epoch": 1.1215919369427574, "learning_rate": 9.373612876812687e-07, "loss": 4.4255, "step": 26040 }, { "epoch": 1.1224533746823449, "learning_rate": 9.373128057057899e-07, "loss": 4.3318, "step": 26060 }, { "epoch": 1.123314812421932, "learning_rate": 9.372643237303109e-07, "loss": 4.3827, "step": 26080 }, { "epoch": 1.1241762501615196, "learning_rate": 9.372158417548319e-07, "loss": 4.4282, "step": 26100 }, { "epoch": 1.125037687901107, "learning_rate": 9.371673597793531e-07, "loss": 4.4113, "step": 26120 }, { "epoch": 1.1258991256406943, "learning_rate": 9.371188778038741e-07, "loss": 4.3691, "step": 26140 }, { "epoch": 1.1267605633802817, "learning_rate": 9.370703958283953e-07, "loss": 4.469, "step": 26160 }, { "epoch": 1.127622001119869, "learning_rate": 9.370219138529164e-07, "loss": 4.5255, "step": 26180 }, { "epoch": 1.1284834388594565, "learning_rate": 9.369734318774376e-07, "loss": 4.495, "step": 26200 }, { "epoch": 1.1293448765990437, "learning_rate": 9.369249499019586e-07, "loss": 4.4714, "step": 26220 }, { "epoch": 1.1302063143386312, "learning_rate": 9.368764679264797e-07, "loss": 4.4941, "step": 26240 }, { "epoch": 1.1310677520782186, "learning_rate": 9.368279859510008e-07, "loss": 4.457, "step": 26260 }, { "epoch": 1.1319291898178059, "learning_rate": 9.36779503975522e-07, "loss": 4.3587, "step": 26280 }, { "epoch": 1.1327906275573933, "learning_rate": 9.36731022000043e-07, "loss": 4.326, "step": 26300 }, { "epoch": 1.1336520652969806, "learning_rate": 9.366825400245642e-07, "loss": 4.4715, "step": 26320 }, { "epoch": 1.134513503036568, "learning_rate": 9.366340580490853e-07, "loss": 4.5996, "step": 26340 }, { "epoch": 1.1353749407761553, "learning_rate": 9.365855760736064e-07, "loss": 4.1874, "step": 26360 }, { "epoch": 1.1362363785157428, "learning_rate": 9.365370940981274e-07, "loss": 4.2523, "step": 26380 }, { "epoch": 1.1370978162553302, "learning_rate": 9.364886121226486e-07, "loss": 4.4615, "step": 26400 }, { "epoch": 1.1379592539949175, "learning_rate": 9.364401301471697e-07, "loss": 4.8248, "step": 26420 }, { "epoch": 1.138820691734505, "learning_rate": 9.363916481716909e-07, "loss": 4.2533, "step": 26440 }, { "epoch": 1.1396821294740922, "learning_rate": 9.363431661962119e-07, "loss": 4.5715, "step": 26460 }, { "epoch": 1.1405435672136797, "learning_rate": 9.36294684220733e-07, "loss": 4.4302, "step": 26480 }, { "epoch": 1.141405004953267, "learning_rate": 9.362462022452541e-07, "loss": 4.3582, "step": 26500 }, { "epoch": 1.1422664426928544, "learning_rate": 9.361977202697752e-07, "loss": 4.4676, "step": 26520 }, { "epoch": 1.1431278804324418, "learning_rate": 9.361492382942963e-07, "loss": 4.3547, "step": 26540 }, { "epoch": 1.143989318172029, "learning_rate": 9.361007563188174e-07, "loss": 4.2655, "step": 26560 }, { "epoch": 1.1448507559116166, "learning_rate": 9.360522743433386e-07, "loss": 4.364, "step": 26580 }, { "epoch": 1.1457121936512038, "learning_rate": 9.360037923678596e-07, "loss": 4.2633, "step": 26600 }, { "epoch": 1.1465736313907913, "learning_rate": 9.359553103923807e-07, "loss": 4.2595, "step": 26620 }, { "epoch": 1.1474350691303785, "learning_rate": 9.359068284169018e-07, "loss": 4.3531, "step": 26640 }, { "epoch": 1.148296506869966, "learning_rate": 9.35858346441423e-07, "loss": 4.6372, "step": 26660 }, { "epoch": 1.1491579446095535, "learning_rate": 9.35809864465944e-07, "loss": 4.4899, "step": 26680 }, { "epoch": 1.1500193823491407, "learning_rate": 9.357613824904652e-07, "loss": 4.2618, "step": 26700 }, { "epoch": 1.1508808200887282, "learning_rate": 9.357129005149863e-07, "loss": 4.3635, "step": 26720 }, { "epoch": 1.1517422578283154, "learning_rate": 9.356644185395074e-07, "loss": 4.2641, "step": 26740 }, { "epoch": 1.1526036955679029, "learning_rate": 9.356159365640284e-07, "loss": 4.3223, "step": 26760 }, { "epoch": 1.1534651333074901, "learning_rate": 9.355674545885496e-07, "loss": 4.666, "step": 26780 }, { "epoch": 1.1543265710470776, "learning_rate": 9.355189726130707e-07, "loss": 4.4854, "step": 26800 }, { "epoch": 1.155188008786665, "learning_rate": 9.354704906375919e-07, "loss": 4.3282, "step": 26820 }, { "epoch": 1.1560494465262523, "learning_rate": 9.354220086621129e-07, "loss": 4.4205, "step": 26840 }, { "epoch": 1.1569108842658398, "learning_rate": 9.353735266866339e-07, "loss": 4.5329, "step": 26860 }, { "epoch": 1.157772322005427, "learning_rate": 9.353250447111551e-07, "loss": 4.2963, "step": 26880 }, { "epoch": 1.1586337597450145, "learning_rate": 9.352765627356762e-07, "loss": 4.2485, "step": 26900 }, { "epoch": 1.1594951974846017, "learning_rate": 9.352280807601973e-07, "loss": 4.4059, "step": 26920 }, { "epoch": 1.1603566352241892, "learning_rate": 9.351795987847185e-07, "loss": 4.5401, "step": 26940 }, { "epoch": 1.1612180729637767, "learning_rate": 9.351311168092397e-07, "loss": 4.6506, "step": 26960 }, { "epoch": 1.162079510703364, "learning_rate": 9.350826348337606e-07, "loss": 4.3864, "step": 26980 }, { "epoch": 1.1629409484429514, "learning_rate": 9.350341528582817e-07, "loss": 4.5897, "step": 27000 }, { "epoch": 1.1638023861825386, "learning_rate": 9.349856708828029e-07, "loss": 4.4499, "step": 27020 }, { "epoch": 1.164663823922126, "learning_rate": 9.34937188907324e-07, "loss": 4.464, "step": 27040 }, { "epoch": 1.1655252616617133, "learning_rate": 9.348887069318451e-07, "loss": 4.2179, "step": 27060 }, { "epoch": 1.1663866994013008, "learning_rate": 9.348402249563662e-07, "loss": 4.2474, "step": 27080 }, { "epoch": 1.1672481371408883, "learning_rate": 9.347917429808873e-07, "loss": 4.2296, "step": 27100 }, { "epoch": 1.1681095748804755, "learning_rate": 9.347432610054083e-07, "loss": 4.5144, "step": 27120 }, { "epoch": 1.168971012620063, "learning_rate": 9.346947790299295e-07, "loss": 4.4157, "step": 27140 }, { "epoch": 1.1698324503596502, "learning_rate": 9.346462970544506e-07, "loss": 4.4907, "step": 27160 }, { "epoch": 1.1706938880992377, "learning_rate": 9.345978150789717e-07, "loss": 4.4599, "step": 27180 }, { "epoch": 1.171555325838825, "learning_rate": 9.345493331034928e-07, "loss": 4.3646, "step": 27200 }, { "epoch": 1.1724167635784124, "learning_rate": 9.345008511280139e-07, "loss": 4.4938, "step": 27220 }, { "epoch": 1.1732782013179996, "learning_rate": 9.34452369152535e-07, "loss": 4.2979, "step": 27240 }, { "epoch": 1.1741396390575871, "learning_rate": 9.344038871770561e-07, "loss": 4.0727, "step": 27260 }, { "epoch": 1.1750010767971744, "learning_rate": 9.343554052015772e-07, "loss": 4.3746, "step": 27280 }, { "epoch": 1.1758625145367618, "learning_rate": 9.343069232260983e-07, "loss": 4.4847, "step": 27300 }, { "epoch": 1.1767239522763493, "learning_rate": 9.342584412506195e-07, "loss": 4.3683, "step": 27320 }, { "epoch": 1.1775853900159365, "learning_rate": 9.342099592751406e-07, "loss": 4.4623, "step": 27340 }, { "epoch": 1.178446827755524, "learning_rate": 9.341614772996617e-07, "loss": 4.5681, "step": 27360 }, { "epoch": 1.1793082654951113, "learning_rate": 9.341129953241827e-07, "loss": 4.4459, "step": 27380 }, { "epoch": 1.1801697032346987, "learning_rate": 9.340645133487039e-07, "loss": 4.2048, "step": 27400 }, { "epoch": 1.181031140974286, "learning_rate": 9.34016031373225e-07, "loss": 4.2432, "step": 27420 }, { "epoch": 1.1818925787138734, "learning_rate": 9.339675493977461e-07, "loss": 4.5315, "step": 27440 }, { "epoch": 1.182754016453461, "learning_rate": 9.339190674222672e-07, "loss": 4.6568, "step": 27460 }, { "epoch": 1.1836154541930481, "learning_rate": 9.338705854467885e-07, "loss": 4.3739, "step": 27480 }, { "epoch": 1.1844768919326356, "learning_rate": 9.338221034713093e-07, "loss": 4.4015, "step": 27500 }, { "epoch": 1.1853383296722229, "learning_rate": 9.337736214958305e-07, "loss": 4.5714, "step": 27520 }, { "epoch": 1.1861997674118103, "learning_rate": 9.337251395203516e-07, "loss": 4.35, "step": 27540 }, { "epoch": 1.1870612051513976, "learning_rate": 9.336766575448728e-07, "loss": 4.4903, "step": 27560 }, { "epoch": 1.187922642890985, "learning_rate": 9.336281755693938e-07, "loss": 4.487, "step": 27580 }, { "epoch": 1.1887840806305725, "learning_rate": 9.33579693593915e-07, "loss": 4.446, "step": 27600 }, { "epoch": 1.1896455183701597, "learning_rate": 9.33531211618436e-07, "loss": 4.3769, "step": 27620 }, { "epoch": 1.1905069561097472, "learning_rate": 9.33482729642957e-07, "loss": 4.3718, "step": 27640 }, { "epoch": 1.1913683938493345, "learning_rate": 9.334342476674782e-07, "loss": 4.4117, "step": 27660 }, { "epoch": 1.192229831588922, "learning_rate": 9.333857656919994e-07, "loss": 4.3429, "step": 27680 }, { "epoch": 1.1930912693285092, "learning_rate": 9.333372837165205e-07, "loss": 4.334, "step": 27700 }, { "epoch": 1.1939527070680966, "learning_rate": 9.332888017410416e-07, "loss": 4.6125, "step": 27720 }, { "epoch": 1.194814144807684, "learning_rate": 9.332403197655627e-07, "loss": 4.4454, "step": 27740 }, { "epoch": 1.1956755825472714, "learning_rate": 9.331918377900837e-07, "loss": 4.3109, "step": 27760 }, { "epoch": 1.1965370202868588, "learning_rate": 9.331433558146049e-07, "loss": 4.2727, "step": 27780 }, { "epoch": 1.197398458026446, "learning_rate": 9.33094873839126e-07, "loss": 4.4393, "step": 27800 }, { "epoch": 1.1982598957660335, "learning_rate": 9.330463918636471e-07, "loss": 4.3083, "step": 27820 }, { "epoch": 1.1991213335056208, "learning_rate": 9.329979098881682e-07, "loss": 4.4089, "step": 27840 }, { "epoch": 1.1999827712452082, "learning_rate": 9.329494279126894e-07, "loss": 4.2863, "step": 27860 }, { "epoch": 1.2008442089847957, "learning_rate": 9.329009459372103e-07, "loss": 4.3972, "step": 27880 }, { "epoch": 1.201705646724383, "learning_rate": 9.328524639617315e-07, "loss": 4.4722, "step": 27900 }, { "epoch": 1.2025670844639704, "learning_rate": 9.328039819862526e-07, "loss": 4.4498, "step": 27920 }, { "epoch": 1.2034285222035577, "learning_rate": 9.327555000107738e-07, "loss": 4.6628, "step": 27940 }, { "epoch": 1.2042899599431451, "learning_rate": 9.327070180352948e-07, "loss": 4.6212, "step": 27960 }, { "epoch": 1.2051513976827324, "learning_rate": 9.32658536059816e-07, "loss": 4.5064, "step": 27980 }, { "epoch": 1.2060128354223199, "learning_rate": 9.326100540843371e-07, "loss": 4.4675, "step": 28000 }, { "epoch": 1.2068742731619073, "learning_rate": 9.325615721088582e-07, "loss": 4.4763, "step": 28020 }, { "epoch": 1.2077357109014946, "learning_rate": 9.325130901333792e-07, "loss": 4.4464, "step": 28040 }, { "epoch": 1.208597148641082, "learning_rate": 9.324646081579004e-07, "loss": 4.2176, "step": 28060 }, { "epoch": 1.2094585863806693, "learning_rate": 9.324161261824215e-07, "loss": 4.705, "step": 28080 }, { "epoch": 1.2103200241202567, "learning_rate": 9.323676442069426e-07, "loss": 4.3475, "step": 28100 }, { "epoch": 1.211181461859844, "learning_rate": 9.323191622314637e-07, "loss": 4.1616, "step": 28120 }, { "epoch": 1.2120428995994315, "learning_rate": 9.322706802559848e-07, "loss": 4.4239, "step": 28140 }, { "epoch": 1.212904337339019, "learning_rate": 9.322221982805059e-07, "loss": 4.3745, "step": 28160 }, { "epoch": 1.2137657750786062, "learning_rate": 9.321737163050269e-07, "loss": 4.394, "step": 28180 }, { "epoch": 1.2146272128181936, "learning_rate": 9.321252343295481e-07, "loss": 4.3618, "step": 28200 }, { "epoch": 1.2154886505577809, "learning_rate": 9.320767523540693e-07, "loss": 4.3503, "step": 28220 }, { "epoch": 1.2163500882973683, "learning_rate": 9.320282703785904e-07, "loss": 4.4078, "step": 28240 }, { "epoch": 1.2172115260369556, "learning_rate": 9.319797884031113e-07, "loss": 4.2004, "step": 28260 }, { "epoch": 1.218072963776543, "learning_rate": 9.319313064276325e-07, "loss": 4.4605, "step": 28280 }, { "epoch": 1.2189344015161305, "learning_rate": 9.318828244521536e-07, "loss": 4.592, "step": 28300 }, { "epoch": 1.2197958392557178, "learning_rate": 9.318343424766748e-07, "loss": 4.1173, "step": 28320 }, { "epoch": 1.2206572769953052, "learning_rate": 9.317858605011958e-07, "loss": 4.4178, "step": 28340 }, { "epoch": 1.2215187147348925, "learning_rate": 9.31737378525717e-07, "loss": 4.8156, "step": 28360 }, { "epoch": 1.22238015247448, "learning_rate": 9.316888965502381e-07, "loss": 4.591, "step": 28380 }, { "epoch": 1.2232415902140672, "learning_rate": 9.316404145747591e-07, "loss": 4.5912, "step": 28400 }, { "epoch": 1.2241030279536547, "learning_rate": 9.315919325992802e-07, "loss": 4.2252, "step": 28420 }, { "epoch": 1.2249644656932421, "learning_rate": 9.315434506238014e-07, "loss": 4.2596, "step": 28440 }, { "epoch": 1.2258259034328294, "learning_rate": 9.314949686483225e-07, "loss": 4.4703, "step": 28460 }, { "epoch": 1.2266873411724168, "learning_rate": 9.314464866728436e-07, "loss": 4.3343, "step": 28480 }, { "epoch": 1.227548778912004, "learning_rate": 9.313980046973647e-07, "loss": 4.4048, "step": 28500 }, { "epoch": 1.2284102166515916, "learning_rate": 9.313495227218858e-07, "loss": 4.6057, "step": 28520 }, { "epoch": 1.2292716543911788, "learning_rate": 9.313010407464069e-07, "loss": 4.5505, "step": 28540 }, { "epoch": 1.2301330921307663, "learning_rate": 9.31252558770928e-07, "loss": 4.5829, "step": 28560 }, { "epoch": 1.2309945298703537, "learning_rate": 9.312040767954491e-07, "loss": 4.2724, "step": 28580 }, { "epoch": 1.231855967609941, "learning_rate": 9.311555948199703e-07, "loss": 4.5347, "step": 28600 }, { "epoch": 1.2327174053495285, "learning_rate": 9.311071128444914e-07, "loss": 4.3547, "step": 28620 }, { "epoch": 1.2335788430891157, "learning_rate": 9.310586308690123e-07, "loss": 4.5261, "step": 28640 }, { "epoch": 1.2344402808287032, "learning_rate": 9.310101488935335e-07, "loss": 4.5067, "step": 28660 }, { "epoch": 1.2353017185682904, "learning_rate": 9.309616669180547e-07, "loss": 4.4154, "step": 28680 }, { "epoch": 1.2361631563078779, "learning_rate": 9.309131849425757e-07, "loss": 4.2921, "step": 28700 }, { "epoch": 1.2370245940474653, "learning_rate": 9.308647029670968e-07, "loss": 4.5793, "step": 28720 }, { "epoch": 1.2378860317870526, "learning_rate": 9.308162209916181e-07, "loss": 4.3936, "step": 28740 }, { "epoch": 1.23874746952664, "learning_rate": 9.307677390161391e-07, "loss": 4.2317, "step": 28760 }, { "epoch": 1.2396089072662273, "learning_rate": 9.307192570406601e-07, "loss": 4.1972, "step": 28780 }, { "epoch": 1.2404703450058148, "learning_rate": 9.306707750651812e-07, "loss": 4.4496, "step": 28800 }, { "epoch": 1.241331782745402, "learning_rate": 9.306222930897024e-07, "loss": 4.4471, "step": 28820 }, { "epoch": 1.2421932204849895, "learning_rate": 9.305738111142235e-07, "loss": 4.3669, "step": 28840 }, { "epoch": 1.2430546582245767, "learning_rate": 9.305253291387446e-07, "loss": 4.6186, "step": 28860 }, { "epoch": 1.2439160959641642, "learning_rate": 9.304768471632657e-07, "loss": 4.374, "step": 28880 }, { "epoch": 1.2447775337037514, "learning_rate": 9.304283651877868e-07, "loss": 4.423, "step": 28900 }, { "epoch": 1.245638971443339, "learning_rate": 9.303798832123079e-07, "loss": 4.5189, "step": 28920 }, { "epoch": 1.2465004091829264, "learning_rate": 9.30331401236829e-07, "loss": 4.215, "step": 28940 }, { "epoch": 1.2473618469225136, "learning_rate": 9.302829192613501e-07, "loss": 4.4685, "step": 28960 }, { "epoch": 1.248223284662101, "learning_rate": 9.302344372858713e-07, "loss": 4.2605, "step": 28980 }, { "epoch": 1.2490847224016883, "learning_rate": 9.301859553103923e-07, "loss": 4.5794, "step": 29000 }, { "epoch": 1.2499461601412758, "learning_rate": 9.301374733349134e-07, "loss": 4.3712, "step": 29020 }, { "epoch": 1.250807597880863, "learning_rate": 9.300889913594345e-07, "loss": 4.2, "step": 29040 }, { "epoch": 1.2516690356204505, "learning_rate": 9.300405093839557e-07, "loss": 4.3306, "step": 29060 }, { "epoch": 1.252530473360038, "learning_rate": 9.299920274084767e-07, "loss": 4.4159, "step": 29080 }, { "epoch": 1.2533919110996252, "learning_rate": 9.299435454329979e-07, "loss": 4.5091, "step": 29100 }, { "epoch": 1.2542533488392127, "learning_rate": 9.29895063457519e-07, "loss": 4.2013, "step": 29120 }, { "epoch": 1.2551147865788, "learning_rate": 9.298465814820402e-07, "loss": 4.3171, "step": 29140 }, { "epoch": 1.2559762243183874, "learning_rate": 9.297980995065611e-07, "loss": 4.5649, "step": 29160 }, { "epoch": 1.2568376620579746, "learning_rate": 9.297496175310823e-07, "loss": 5.0627, "step": 29180 }, { "epoch": 1.2576990997975621, "learning_rate": 9.297011355556034e-07, "loss": 7.4767, "step": 29200 }, { "epoch": 1.2585605375371496, "learning_rate": 9.296526535801246e-07, "loss": 9.1112, "step": 29220 }, { "epoch": 1.2594219752767368, "learning_rate": 9.296041716046456e-07, "loss": 9.0257, "step": 29240 }, { "epoch": 1.2602834130163243, "learning_rate": 9.295556896291667e-07, "loss": 8.9185, "step": 29260 }, { "epoch": 1.2611448507559115, "learning_rate": 9.295072076536878e-07, "loss": 9.0897, "step": 29280 }, { "epoch": 1.262006288495499, "learning_rate": 9.29458725678209e-07, "loss": 8.6666, "step": 29300 }, { "epoch": 1.2628677262350863, "learning_rate": 9.2941024370273e-07, "loss": 8.6319, "step": 29320 }, { "epoch": 1.2637291639746737, "learning_rate": 9.293617617272511e-07, "loss": 8.7006, "step": 29340 }, { "epoch": 1.2645906017142612, "learning_rate": 9.293132797517723e-07, "loss": 8.5091, "step": 29360 }, { "epoch": 1.2654520394538484, "learning_rate": 9.292647977762933e-07, "loss": 8.6262, "step": 29380 }, { "epoch": 1.266313477193436, "learning_rate": 9.292163158008144e-07, "loss": 8.5565, "step": 29400 }, { "epoch": 1.2671749149330231, "learning_rate": 9.291678338253355e-07, "loss": 8.1368, "step": 29420 }, { "epoch": 1.2680363526726106, "learning_rate": 9.291193518498567e-07, "loss": 8.4045, "step": 29440 }, { "epoch": 1.2688977904121979, "learning_rate": 9.290708698743777e-07, "loss": 8.4389, "step": 29460 }, { "epoch": 1.2697592281517853, "learning_rate": 9.290223878988989e-07, "loss": 8.1549, "step": 29480 }, { "epoch": 1.2706206658913728, "learning_rate": 9.2897390592342e-07, "loss": 8.3515, "step": 29500 }, { "epoch": 1.27148210363096, "learning_rate": 9.289254239479412e-07, "loss": 8.0843, "step": 29520 }, { "epoch": 1.2723435413705475, "learning_rate": 9.288769419724621e-07, "loss": 7.5458, "step": 29540 }, { "epoch": 1.2732049791101347, "learning_rate": 9.288284599969833e-07, "loss": 7.7321, "step": 29560 }, { "epoch": 1.2740664168497222, "learning_rate": 9.287799780215045e-07, "loss": 7.8405, "step": 29580 }, { "epoch": 1.2749278545893095, "learning_rate": 9.287314960460256e-07, "loss": 7.7172, "step": 29600 }, { "epoch": 1.275789292328897, "learning_rate": 9.286830140705466e-07, "loss": 8.0011, "step": 29620 }, { "epoch": 1.2766507300684844, "learning_rate": 9.286345320950678e-07, "loss": 7.8851, "step": 29640 }, { "epoch": 1.2775121678080716, "learning_rate": 9.285860501195888e-07, "loss": 7.7286, "step": 29660 }, { "epoch": 1.278373605547659, "learning_rate": 9.285375681441099e-07, "loss": 8.2685, "step": 29680 }, { "epoch": 1.2792350432872464, "learning_rate": 9.28489086168631e-07, "loss": 8.1651, "step": 29700 }, { "epoch": 1.2800964810268338, "learning_rate": 9.284406041931522e-07, "loss": 7.8719, "step": 29720 }, { "epoch": 1.280957918766421, "learning_rate": 9.283921222176733e-07, "loss": 8.0637, "step": 29740 }, { "epoch": 1.2818193565060085, "learning_rate": 9.283436402421944e-07, "loss": 7.9024, "step": 29760 }, { "epoch": 1.282680794245596, "learning_rate": 9.282951582667155e-07, "loss": 7.7219, "step": 29780 }, { "epoch": 1.2835422319851832, "learning_rate": 9.282466762912365e-07, "loss": 7.7878, "step": 29800 }, { "epoch": 1.2844036697247707, "learning_rate": 9.281981943157577e-07, "loss": 7.6931, "step": 29820 }, { "epoch": 1.285265107464358, "learning_rate": 9.281497123402788e-07, "loss": 7.3598, "step": 29840 }, { "epoch": 1.2861265452039454, "learning_rate": 9.281012303647999e-07, "loss": 7.7623, "step": 29860 }, { "epoch": 1.2869879829435327, "learning_rate": 9.28052748389321e-07, "loss": 7.6945, "step": 29880 }, { "epoch": 1.2878494206831201, "learning_rate": 9.280042664138422e-07, "loss": 7.594, "step": 29900 }, { "epoch": 1.2887108584227076, "learning_rate": 9.279557844383631e-07, "loss": 7.5393, "step": 29920 }, { "epoch": 1.2895722961622949, "learning_rate": 9.279073024628843e-07, "loss": 7.4495, "step": 29940 }, { "epoch": 1.2904337339018823, "learning_rate": 9.278588204874054e-07, "loss": 7.7063, "step": 29960 }, { "epoch": 1.2912951716414696, "learning_rate": 9.278103385119265e-07, "loss": 7.7734, "step": 29980 }, { "epoch": 1.292156609381057, "learning_rate": 9.277618565364475e-07, "loss": 7.7084, "step": 30000 }, { "epoch": 1.2930180471206443, "learning_rate": 9.277133745609688e-07, "loss": 7.6194, "step": 30020 }, { "epoch": 1.2938794848602317, "learning_rate": 9.276648925854898e-07, "loss": 7.8689, "step": 30040 }, { "epoch": 1.2947409225998192, "learning_rate": 9.276164106100109e-07, "loss": 7.2148, "step": 30060 }, { "epoch": 1.2956023603394065, "learning_rate": 9.27567928634532e-07, "loss": 7.7463, "step": 30080 }, { "epoch": 1.296463798078994, "learning_rate": 9.275194466590532e-07, "loss": 7.7451, "step": 30100 }, { "epoch": 1.2973252358185812, "learning_rate": 9.274709646835743e-07, "loss": 7.6534, "step": 30120 }, { "epoch": 1.2981866735581686, "learning_rate": 9.274224827080954e-07, "loss": 7.6972, "step": 30140 }, { "epoch": 1.2990481112977559, "learning_rate": 9.273740007326165e-07, "loss": 7.3694, "step": 30160 }, { "epoch": 1.2999095490373433, "learning_rate": 9.273255187571376e-07, "loss": 7.2392, "step": 30180 }, { "epoch": 1.3007709867769308, "learning_rate": 9.272770367816587e-07, "loss": 7.5428, "step": 30200 }, { "epoch": 1.301632424516518, "learning_rate": 9.272285548061798e-07, "loss": 7.6846, "step": 30220 }, { "epoch": 1.3024938622561053, "learning_rate": 9.271800728307009e-07, "loss": 7.8115, "step": 30240 }, { "epoch": 1.3033552999956928, "learning_rate": 9.271315908552221e-07, "loss": 7.3245, "step": 30260 }, { "epoch": 1.3042167377352802, "learning_rate": 9.270831088797431e-07, "loss": 6.7679, "step": 30280 }, { "epoch": 1.3050781754748675, "learning_rate": 9.270346269042642e-07, "loss": 7.2834, "step": 30300 }, { "epoch": 1.305939613214455, "learning_rate": 9.269861449287853e-07, "loss": 7.2348, "step": 30320 }, { "epoch": 1.3068010509540424, "learning_rate": 9.269376629533064e-07, "loss": 7.0163, "step": 30340 }, { "epoch": 1.3076624886936297, "learning_rate": 9.268891809778275e-07, "loss": 7.1488, "step": 30360 }, { "epoch": 1.308523926433217, "learning_rate": 9.268406990023487e-07, "loss": 7.7662, "step": 30380 }, { "epoch": 1.3093853641728044, "learning_rate": 9.267922170268698e-07, "loss": 7.1247, "step": 30400 }, { "epoch": 1.3102468019123918, "learning_rate": 9.267437350513908e-07, "loss": 7.4735, "step": 30420 }, { "epoch": 1.311108239651979, "learning_rate": 9.266952530759119e-07, "loss": 7.2543, "step": 30440 }, { "epoch": 1.3119696773915666, "learning_rate": 9.26646771100433e-07, "loss": 7.1354, "step": 30460 }, { "epoch": 1.312831115131154, "learning_rate": 9.265982891249542e-07, "loss": 7.3775, "step": 30480 }, { "epoch": 1.3136925528707413, "learning_rate": 9.265498071494753e-07, "loss": 6.9614, "step": 30500 }, { "epoch": 1.3145539906103285, "learning_rate": 9.265013251739965e-07, "loss": 7.279, "step": 30520 }, { "epoch": 1.315415428349916, "learning_rate": 9.264528431985175e-07, "loss": 7.148, "step": 30540 }, { "epoch": 1.3162768660895035, "learning_rate": 9.264043612230386e-07, "loss": 6.9021, "step": 30560 }, { "epoch": 1.3171383038290907, "learning_rate": 9.263558792475596e-07, "loss": 7.1499, "step": 30580 }, { "epoch": 1.3179997415686782, "learning_rate": 9.263073972720808e-07, "loss": 7.1387, "step": 30600 }, { "epoch": 1.3188611793082656, "learning_rate": 9.262589152966019e-07, "loss": 7.1563, "step": 30620 }, { "epoch": 1.3197226170478529, "learning_rate": 9.262104333211231e-07, "loss": 6.9685, "step": 30640 }, { "epoch": 1.3205840547874401, "learning_rate": 9.261619513456441e-07, "loss": 7.0773, "step": 30660 }, { "epoch": 1.3214454925270276, "learning_rate": 9.261134693701652e-07, "loss": 7.239, "step": 30680 }, { "epoch": 1.322306930266615, "learning_rate": 9.260649873946863e-07, "loss": 7.333, "step": 30700 }, { "epoch": 1.3231683680062023, "learning_rate": 9.260165054192075e-07, "loss": 7.1882, "step": 30720 }, { "epoch": 1.3240298057457898, "learning_rate": 9.259680234437285e-07, "loss": 7.0352, "step": 30740 }, { "epoch": 1.324891243485377, "learning_rate": 9.259195414682497e-07, "loss": 7.3077, "step": 30760 }, { "epoch": 1.3257526812249645, "learning_rate": 9.258710594927708e-07, "loss": 7.2401, "step": 30780 }, { "epoch": 1.3266141189645517, "learning_rate": 9.258225775172919e-07, "loss": 6.9523, "step": 30800 }, { "epoch": 1.3274755567041392, "learning_rate": 9.257740955418129e-07, "loss": 6.9704, "step": 30820 }, { "epoch": 1.3283369944437267, "learning_rate": 9.257256135663341e-07, "loss": 7.0696, "step": 30840 }, { "epoch": 1.329198432183314, "learning_rate": 9.256771315908552e-07, "loss": 6.9114, "step": 30860 }, { "epoch": 1.3300598699229014, "learning_rate": 9.256286496153762e-07, "loss": 7.0071, "step": 30880 }, { "epoch": 1.3309213076624886, "learning_rate": 9.255801676398974e-07, "loss": 7.0653, "step": 30900 }, { "epoch": 1.331782745402076, "learning_rate": 9.255316856644186e-07, "loss": 6.9926, "step": 30920 }, { "epoch": 1.3326441831416633, "learning_rate": 9.254832036889396e-07, "loss": 6.9443, "step": 30940 }, { "epoch": 1.3335056208812508, "learning_rate": 9.254347217134606e-07, "loss": 7.1948, "step": 30960 }, { "epoch": 1.3343670586208383, "learning_rate": 9.253862397379818e-07, "loss": 7.0766, "step": 30980 }, { "epoch": 1.3352284963604255, "learning_rate": 9.253377577625029e-07, "loss": 7.1149, "step": 31000 }, { "epoch": 1.336089934100013, "learning_rate": 9.252892757870241e-07, "loss": 7.3839, "step": 31020 }, { "epoch": 1.3369513718396002, "learning_rate": 9.252407938115451e-07, "loss": 7.0784, "step": 31040 }, { "epoch": 1.3378128095791877, "learning_rate": 9.251923118360662e-07, "loss": 6.9548, "step": 31060 }, { "epoch": 1.338674247318775, "learning_rate": 9.251438298605873e-07, "loss": 7.0966, "step": 31080 }, { "epoch": 1.3395356850583624, "learning_rate": 9.250953478851085e-07, "loss": 6.9126, "step": 31100 }, { "epoch": 1.3403971227979499, "learning_rate": 9.250468659096295e-07, "loss": 6.8928, "step": 31120 }, { "epoch": 1.3412585605375371, "learning_rate": 9.249983839341507e-07, "loss": 7.2436, "step": 31140 }, { "epoch": 1.3421199982771246, "learning_rate": 9.249499019586718e-07, "loss": 6.7896, "step": 31160 }, { "epoch": 1.3429814360167118, "learning_rate": 9.249014199831928e-07, "loss": 7.2349, "step": 31180 }, { "epoch": 1.3438428737562993, "learning_rate": 9.248529380077139e-07, "loss": 6.7734, "step": 31200 }, { "epoch": 1.3447043114958865, "learning_rate": 9.248044560322351e-07, "loss": 7.0566, "step": 31220 }, { "epoch": 1.345565749235474, "learning_rate": 9.247559740567562e-07, "loss": 7.0811, "step": 31240 }, { "epoch": 1.3464271869750615, "learning_rate": 9.247074920812773e-07, "loss": 7.2334, "step": 31260 }, { "epoch": 1.3472886247146487, "learning_rate": 9.246590101057984e-07, "loss": 7.1606, "step": 31280 }, { "epoch": 1.3481500624542362, "learning_rate": 9.246105281303196e-07, "loss": 7.0721, "step": 31300 }, { "epoch": 1.3490115001938234, "learning_rate": 9.245620461548406e-07, "loss": 6.8343, "step": 31320 }, { "epoch": 1.349872937933411, "learning_rate": 9.245135641793617e-07, "loss": 7.0205, "step": 31340 }, { "epoch": 1.3507343756729981, "learning_rate": 9.244650822038829e-07, "loss": 7.0218, "step": 31360 }, { "epoch": 1.3515958134125856, "learning_rate": 9.24416600228404e-07, "loss": 6.9216, "step": 31380 }, { "epoch": 1.352457251152173, "learning_rate": 9.243681182529251e-07, "loss": 7.1311, "step": 31400 }, { "epoch": 1.3533186888917603, "learning_rate": 9.243196362774461e-07, "loss": 6.7834, "step": 31420 }, { "epoch": 1.3541801266313478, "learning_rate": 9.242711543019672e-07, "loss": 6.7601, "step": 31440 }, { "epoch": 1.355041564370935, "learning_rate": 9.242226723264884e-07, "loss": 6.9749, "step": 31460 }, { "epoch": 1.3559030021105225, "learning_rate": 9.241741903510094e-07, "loss": 6.8912, "step": 31480 }, { "epoch": 1.3567644398501097, "learning_rate": 9.241257083755305e-07, "loss": 7.0472, "step": 31500 }, { "epoch": 1.3576258775896972, "learning_rate": 9.240772264000517e-07, "loss": 7.0801, "step": 31520 }, { "epoch": 1.3584873153292847, "learning_rate": 9.240287444245728e-07, "loss": 6.9824, "step": 31540 }, { "epoch": 1.359348753068872, "learning_rate": 9.23980262449094e-07, "loss": 6.8021, "step": 31560 }, { "epoch": 1.3602101908084594, "learning_rate": 9.239317804736149e-07, "loss": 6.6614, "step": 31580 }, { "epoch": 1.3610716285480466, "learning_rate": 9.238832984981361e-07, "loss": 7.0195, "step": 31600 }, { "epoch": 1.361933066287634, "learning_rate": 9.238348165226572e-07, "loss": 6.989, "step": 31620 }, { "epoch": 1.3627945040272214, "learning_rate": 9.237863345471783e-07, "loss": 6.6913, "step": 31640 }, { "epoch": 1.3636559417668088, "learning_rate": 9.237378525716994e-07, "loss": 6.7222, "step": 31660 }, { "epoch": 1.3645173795063963, "learning_rate": 9.236893705962206e-07, "loss": 6.9444, "step": 31680 }, { "epoch": 1.3653788172459835, "learning_rate": 9.236408886207416e-07, "loss": 6.6173, "step": 31700 }, { "epoch": 1.366240254985571, "learning_rate": 9.235924066452627e-07, "loss": 6.9346, "step": 31720 }, { "epoch": 1.3671016927251582, "learning_rate": 9.235439246697838e-07, "loss": 6.9446, "step": 31740 }, { "epoch": 1.3679631304647457, "learning_rate": 9.23495442694305e-07, "loss": 7.1068, "step": 31760 }, { "epoch": 1.368824568204333, "learning_rate": 9.234469607188259e-07, "loss": 6.7589, "step": 31780 }, { "epoch": 1.3696860059439204, "learning_rate": 9.233984787433472e-07, "loss": 6.7636, "step": 31800 }, { "epoch": 1.370547443683508, "learning_rate": 9.233499967678682e-07, "loss": 6.7678, "step": 31820 }, { "epoch": 1.3714088814230951, "learning_rate": 9.233015147923894e-07, "loss": 6.8524, "step": 31840 }, { "epoch": 1.3722703191626824, "learning_rate": 9.232530328169104e-07, "loss": 6.9558, "step": 31860 }, { "epoch": 1.3731317569022699, "learning_rate": 9.232045508414316e-07, "loss": 6.7935, "step": 31880 }, { "epoch": 1.3739931946418573, "learning_rate": 9.231560688659527e-07, "loss": 6.6628, "step": 31900 }, { "epoch": 1.3748546323814446, "learning_rate": 9.231075868904739e-07, "loss": 6.8103, "step": 31920 }, { "epoch": 1.375716070121032, "learning_rate": 9.230591049149949e-07, "loss": 6.5024, "step": 31940 }, { "epoch": 1.3765775078606195, "learning_rate": 9.230106229395159e-07, "loss": 6.9965, "step": 31960 }, { "epoch": 1.3774389456002067, "learning_rate": 9.229621409640371e-07, "loss": 6.8808, "step": 31980 }, { "epoch": 1.378300383339794, "learning_rate": 9.229136589885583e-07, "loss": 6.7527, "step": 32000 }, { "epoch": 1.3791618210793815, "learning_rate": 9.228651770130793e-07, "loss": 6.7427, "step": 32020 }, { "epoch": 1.380023258818969, "learning_rate": 9.228166950376004e-07, "loss": 6.4327, "step": 32040 }, { "epoch": 1.3808846965585562, "learning_rate": 9.227682130621216e-07, "loss": 6.7198, "step": 32060 }, { "epoch": 1.3817461342981436, "learning_rate": 9.227197310866425e-07, "loss": 6.6045, "step": 32080 }, { "epoch": 1.382607572037731, "learning_rate": 9.226712491111637e-07, "loss": 6.5851, "step": 32100 }, { "epoch": 1.3834690097773183, "learning_rate": 9.226227671356848e-07, "loss": 6.809, "step": 32120 }, { "epoch": 1.3843304475169056, "learning_rate": 9.22574285160206e-07, "loss": 6.5905, "step": 32140 }, { "epoch": 1.385191885256493, "learning_rate": 9.22525803184727e-07, "loss": 6.8906, "step": 32160 }, { "epoch": 1.3860533229960805, "learning_rate": 9.224773212092482e-07, "loss": 6.6777, "step": 32180 }, { "epoch": 1.3869147607356678, "learning_rate": 9.224288392337692e-07, "loss": 6.707, "step": 32200 }, { "epoch": 1.3877761984752552, "learning_rate": 9.223803572582904e-07, "loss": 6.6338, "step": 32220 }, { "epoch": 1.3886376362148427, "learning_rate": 9.223318752828114e-07, "loss": 6.8514, "step": 32240 }, { "epoch": 1.38949907395443, "learning_rate": 9.222833933073326e-07, "loss": 6.6803, "step": 32260 }, { "epoch": 1.3903605116940172, "learning_rate": 9.222349113318537e-07, "loss": 6.7469, "step": 32280 }, { "epoch": 1.3912219494336047, "learning_rate": 9.22186429356375e-07, "loss": 6.7612, "step": 32300 }, { "epoch": 1.3920833871731921, "learning_rate": 9.221379473808959e-07, "loss": 6.8637, "step": 32320 }, { "epoch": 1.3929448249127794, "learning_rate": 9.22089465405417e-07, "loss": 6.6512, "step": 32340 }, { "epoch": 1.3938062626523668, "learning_rate": 9.220409834299381e-07, "loss": 6.5928, "step": 32360 }, { "epoch": 1.394667700391954, "learning_rate": 9.219925014544593e-07, "loss": 6.5807, "step": 32380 }, { "epoch": 1.3955291381315416, "learning_rate": 9.219440194789803e-07, "loss": 6.5286, "step": 32400 }, { "epoch": 1.3963905758711288, "learning_rate": 9.218955375035015e-07, "loss": 6.4358, "step": 32420 }, { "epoch": 1.3972520136107163, "learning_rate": 9.218470555280226e-07, "loss": 6.7531, "step": 32440 }, { "epoch": 1.3981134513503037, "learning_rate": 9.217985735525436e-07, "loss": 6.7327, "step": 32460 }, { "epoch": 1.398974889089891, "learning_rate": 9.217500915770647e-07, "loss": 6.9049, "step": 32480 }, { "epoch": 1.3998363268294785, "learning_rate": 9.217016096015858e-07, "loss": 6.744, "step": 32500 }, { "epoch": 1.4006977645690657, "learning_rate": 9.21653127626107e-07, "loss": 6.7219, "step": 32520 }, { "epoch": 1.4015592023086532, "learning_rate": 9.216046456506281e-07, "loss": 6.7781, "step": 32540 }, { "epoch": 1.4024206400482404, "learning_rate": 9.215561636751492e-07, "loss": 6.7637, "step": 32560 }, { "epoch": 1.4032820777878279, "learning_rate": 9.215076816996703e-07, "loss": 6.7455, "step": 32580 }, { "epoch": 1.4041435155274153, "learning_rate": 9.214591997241914e-07, "loss": 6.5962, "step": 32600 }, { "epoch": 1.4050049532670026, "learning_rate": 9.214107177487125e-07, "loss": 6.4006, "step": 32620 }, { "epoch": 1.40586639100659, "learning_rate": 9.213622357732336e-07, "loss": 6.4812, "step": 32640 }, { "epoch": 1.4067278287461773, "learning_rate": 9.213137537977547e-07, "loss": 6.7662, "step": 32660 }, { "epoch": 1.4075892664857648, "learning_rate": 9.212652718222759e-07, "loss": 6.7259, "step": 32680 }, { "epoch": 1.408450704225352, "learning_rate": 9.212167898467969e-07, "loss": 6.7009, "step": 32700 }, { "epoch": 1.4093121419649395, "learning_rate": 9.21168307871318e-07, "loss": 6.4475, "step": 32720 }, { "epoch": 1.410173579704527, "learning_rate": 9.211198258958391e-07, "loss": 6.4441, "step": 32740 }, { "epoch": 1.4110350174441142, "learning_rate": 9.210713439203602e-07, "loss": 6.6392, "step": 32760 }, { "epoch": 1.4118964551837017, "learning_rate": 9.210228619448813e-07, "loss": 6.5847, "step": 32780 }, { "epoch": 1.412757892923289, "learning_rate": 9.209743799694025e-07, "loss": 6.5724, "step": 32800 }, { "epoch": 1.4136193306628764, "learning_rate": 9.209258979939236e-07, "loss": 6.7541, "step": 32820 }, { "epoch": 1.4144807684024636, "learning_rate": 9.208774160184446e-07, "loss": 6.5684, "step": 32840 }, { "epoch": 1.415342206142051, "learning_rate": 9.208289340429657e-07, "loss": 6.4602, "step": 32860 }, { "epoch": 1.4162036438816386, "learning_rate": 9.207804520674869e-07, "loss": 6.7454, "step": 32880 }, { "epoch": 1.4170650816212258, "learning_rate": 9.20731970092008e-07, "loss": 6.5201, "step": 32900 }, { "epoch": 1.4179265193608133, "learning_rate": 9.206834881165291e-07, "loss": 6.9773, "step": 32920 }, { "epoch": 1.4187879571004005, "learning_rate": 9.206350061410502e-07, "loss": 6.6975, "step": 32940 }, { "epoch": 1.419649394839988, "learning_rate": 9.205865241655713e-07, "loss": 6.451, "step": 32960 }, { "epoch": 1.4205108325795752, "learning_rate": 9.205380421900924e-07, "loss": 6.311, "step": 32980 }, { "epoch": 1.4213722703191627, "learning_rate": 9.204895602146135e-07, "loss": 6.6721, "step": 33000 }, { "epoch": 1.4222337080587502, "learning_rate": 9.204410782391346e-07, "loss": 6.5873, "step": 33020 }, { "epoch": 1.4230951457983374, "learning_rate": 9.203925962636556e-07, "loss": 6.7574, "step": 33040 }, { "epoch": 1.4239565835379249, "learning_rate": 9.203441142881769e-07, "loss": 6.7247, "step": 33060 }, { "epoch": 1.4248180212775121, "learning_rate": 9.20295632312698e-07, "loss": 6.7455, "step": 33080 }, { "epoch": 1.4256794590170996, "learning_rate": 9.20247150337219e-07, "loss": 6.559, "step": 33100 }, { "epoch": 1.4265408967566868, "learning_rate": 9.201986683617401e-07, "loss": 6.2385, "step": 33120 }, { "epoch": 1.4274023344962743, "learning_rate": 9.201501863862613e-07, "loss": 6.4429, "step": 33140 }, { "epoch": 1.4282637722358618, "learning_rate": 9.201017044107823e-07, "loss": 6.7354, "step": 33160 }, { "epoch": 1.429125209975449, "learning_rate": 9.200532224353035e-07, "loss": 6.3568, "step": 33180 }, { "epoch": 1.4299866477150365, "learning_rate": 9.200047404598246e-07, "loss": 6.4991, "step": 33200 }, { "epoch": 1.4308480854546237, "learning_rate": 9.199562584843456e-07, "loss": 6.6018, "step": 33220 }, { "epoch": 1.4317095231942112, "learning_rate": 9.199077765088667e-07, "loss": 6.5083, "step": 33240 }, { "epoch": 1.4325709609337984, "learning_rate": 9.198592945333879e-07, "loss": 6.408, "step": 33260 }, { "epoch": 1.433432398673386, "learning_rate": 9.19810812557909e-07, "loss": 6.7178, "step": 33280 }, { "epoch": 1.4342938364129734, "learning_rate": 9.197623305824301e-07, "loss": 6.3813, "step": 33300 }, { "epoch": 1.4351552741525606, "learning_rate": 9.197138486069512e-07, "loss": 6.601, "step": 33320 }, { "epoch": 1.4360167118921479, "learning_rate": 9.196653666314724e-07, "loss": 6.7897, "step": 33340 }, { "epoch": 1.4368781496317353, "learning_rate": 9.196168846559933e-07, "loss": 6.7652, "step": 33360 }, { "epoch": 1.4377395873713228, "learning_rate": 9.195684026805145e-07, "loss": 6.7107, "step": 33380 }, { "epoch": 1.43860102511091, "learning_rate": 9.195199207050356e-07, "loss": 5.7782, "step": 33400 }, { "epoch": 1.4394624628504975, "learning_rate": 9.194714387295568e-07, "loss": 5.4448, "step": 33420 }, { "epoch": 1.440323900590085, "learning_rate": 9.194229567540778e-07, "loss": 5.6638, "step": 33440 }, { "epoch": 1.4411853383296722, "learning_rate": 9.19374474778599e-07, "loss": 5.1264, "step": 33460 }, { "epoch": 1.4420467760692595, "learning_rate": 9.1932599280312e-07, "loss": 5.0293, "step": 33480 }, { "epoch": 1.442908213808847, "learning_rate": 9.192775108276412e-07, "loss": 4.9955, "step": 33500 }, { "epoch": 1.4437696515484344, "learning_rate": 9.192290288521622e-07, "loss": 4.7765, "step": 33520 }, { "epoch": 1.4446310892880216, "learning_rate": 9.191805468766834e-07, "loss": 4.6915, "step": 33540 }, { "epoch": 1.445492527027609, "learning_rate": 9.191320649012044e-07, "loss": 4.6286, "step": 33560 }, { "epoch": 1.4463539647671966, "learning_rate": 9.190835829257256e-07, "loss": 4.7875, "step": 33580 }, { "epoch": 1.4472154025067838, "learning_rate": 9.190351009502466e-07, "loss": 4.5608, "step": 33600 }, { "epoch": 1.448076840246371, "learning_rate": 9.189866189747678e-07, "loss": 4.3453, "step": 33620 }, { "epoch": 1.4489382779859585, "learning_rate": 9.189381369992889e-07, "loss": 4.8078, "step": 33640 }, { "epoch": 1.449799715725546, "learning_rate": 9.1888965502381e-07, "loss": 4.7774, "step": 33660 }, { "epoch": 1.4506611534651332, "learning_rate": 9.188411730483311e-07, "loss": 4.8069, "step": 33680 }, { "epoch": 1.4515225912047207, "learning_rate": 9.187926910728522e-07, "loss": 4.5375, "step": 33700 }, { "epoch": 1.4523840289443082, "learning_rate": 9.187442090973734e-07, "loss": 4.6969, "step": 33720 }, { "epoch": 1.4532454666838954, "learning_rate": 9.186957271218943e-07, "loss": 4.6461, "step": 33740 }, { "epoch": 1.4541069044234827, "learning_rate": 9.186472451464155e-07, "loss": 4.6505, "step": 33760 }, { "epoch": 1.4549683421630701, "learning_rate": 9.185987631709366e-07, "loss": 4.4535, "step": 33780 }, { "epoch": 1.4558297799026576, "learning_rate": 9.185502811954578e-07, "loss": 4.8117, "step": 33800 }, { "epoch": 1.4566912176422449, "learning_rate": 9.185017992199788e-07, "loss": 4.6628, "step": 33820 }, { "epoch": 1.4575526553818323, "learning_rate": 9.184533172445e-07, "loss": 4.6842, "step": 33840 }, { "epoch": 1.4584140931214198, "learning_rate": 9.18404835269021e-07, "loss": 4.4481, "step": 33860 }, { "epoch": 1.459275530861007, "learning_rate": 9.183563532935422e-07, "loss": 4.7234, "step": 33880 }, { "epoch": 1.4601369686005943, "learning_rate": 9.183078713180632e-07, "loss": 4.6209, "step": 33900 }, { "epoch": 1.4609984063401817, "learning_rate": 9.182593893425844e-07, "loss": 4.486, "step": 33920 }, { "epoch": 1.4618598440797692, "learning_rate": 9.182109073671055e-07, "loss": 4.6013, "step": 33940 }, { "epoch": 1.4627212818193565, "learning_rate": 9.181624253916267e-07, "loss": 4.4464, "step": 33960 }, { "epoch": 1.463582719558944, "learning_rate": 9.181139434161476e-07, "loss": 4.5521, "step": 33980 }, { "epoch": 1.4644441572985312, "learning_rate": 9.180654614406688e-07, "loss": 4.5197, "step": 34000 }, { "epoch": 1.4653055950381186, "learning_rate": 9.180169794651899e-07, "loss": 4.3679, "step": 34020 }, { "epoch": 1.4661670327777059, "learning_rate": 9.17968497489711e-07, "loss": 4.7741, "step": 34040 }, { "epoch": 1.4670284705172933, "learning_rate": 9.179200155142321e-07, "loss": 4.8057, "step": 34060 }, { "epoch": 1.4678899082568808, "learning_rate": 9.178715335387534e-07, "loss": 4.4689, "step": 34080 }, { "epoch": 1.468751345996468, "learning_rate": 9.178230515632744e-07, "loss": 4.5038, "step": 34100 }, { "epoch": 1.4696127837360555, "learning_rate": 9.177745695877953e-07, "loss": 4.6934, "step": 34120 }, { "epoch": 1.4704742214756428, "learning_rate": 9.177260876123165e-07, "loss": 4.5616, "step": 34140 }, { "epoch": 1.4713356592152302, "learning_rate": 9.176776056368377e-07, "loss": 4.5588, "step": 34160 }, { "epoch": 1.4721970969548175, "learning_rate": 9.176291236613588e-07, "loss": 4.5878, "step": 34180 }, { "epoch": 1.473058534694405, "learning_rate": 9.175806416858798e-07, "loss": 4.4511, "step": 34200 }, { "epoch": 1.4739199724339924, "learning_rate": 9.17532159710401e-07, "loss": 4.3849, "step": 34220 }, { "epoch": 1.4747814101735797, "learning_rate": 9.17483677734922e-07, "loss": 4.573, "step": 34240 }, { "epoch": 1.4756428479131671, "learning_rate": 9.174351957594432e-07, "loss": 4.6106, "step": 34260 }, { "epoch": 1.4765042856527544, "learning_rate": 9.173867137839642e-07, "loss": 4.5424, "step": 34280 }, { "epoch": 1.4773657233923418, "learning_rate": 9.173382318084854e-07, "loss": 4.3785, "step": 34300 }, { "epoch": 1.478227161131929, "learning_rate": 9.172897498330065e-07, "loss": 4.6613, "step": 34320 }, { "epoch": 1.4790885988715166, "learning_rate": 9.172412678575276e-07, "loss": 4.31, "step": 34340 }, { "epoch": 1.479950036611104, "learning_rate": 9.171927858820487e-07, "loss": 4.5539, "step": 34360 }, { "epoch": 1.4808114743506913, "learning_rate": 9.171443039065698e-07, "loss": 4.4384, "step": 34380 }, { "epoch": 1.4816729120902787, "learning_rate": 9.17095821931091e-07, "loss": 4.4427, "step": 34400 }, { "epoch": 1.482534349829866, "learning_rate": 9.17047339955612e-07, "loss": 4.7581, "step": 34420 }, { "epoch": 1.4833957875694535, "learning_rate": 9.169988579801331e-07, "loss": 4.5789, "step": 34440 }, { "epoch": 1.4842572253090407, "learning_rate": 9.169503760046543e-07, "loss": 4.4681, "step": 34460 }, { "epoch": 1.4851186630486282, "learning_rate": 9.169018940291754e-07, "loss": 4.5523, "step": 34480 }, { "epoch": 1.4859801007882156, "learning_rate": 9.168534120536964e-07, "loss": 4.3729, "step": 34500 }, { "epoch": 1.4868415385278029, "learning_rate": 9.168049300782175e-07, "loss": 4.5161, "step": 34520 }, { "epoch": 1.4877029762673903, "learning_rate": 9.167564481027387e-07, "loss": 4.4172, "step": 34540 }, { "epoch": 1.4885644140069776, "learning_rate": 9.167079661272598e-07, "loss": 4.261, "step": 34560 }, { "epoch": 1.489425851746565, "learning_rate": 9.166594841517809e-07, "loss": 4.23, "step": 34580 }, { "epoch": 1.4902872894861523, "learning_rate": 9.16611002176302e-07, "loss": 4.339, "step": 34600 }, { "epoch": 1.4911487272257398, "learning_rate": 9.165625202008231e-07, "loss": 4.1811, "step": 34620 }, { "epoch": 1.4920101649653272, "learning_rate": 9.165140382253441e-07, "loss": 4.3727, "step": 34640 }, { "epoch": 1.4928716027049145, "learning_rate": 9.164655562498652e-07, "loss": 4.5534, "step": 34660 }, { "epoch": 1.493733040444502, "learning_rate": 9.164170742743864e-07, "loss": 4.3347, "step": 34680 }, { "epoch": 1.4945944781840892, "learning_rate": 9.163685922989076e-07, "loss": 4.2064, "step": 34700 }, { "epoch": 1.4954559159236767, "learning_rate": 9.163201103234286e-07, "loss": 4.4352, "step": 34720 }, { "epoch": 1.496317353663264, "learning_rate": 9.162716283479497e-07, "loss": 4.4662, "step": 34740 }, { "epoch": 1.4971787914028514, "learning_rate": 9.162231463724708e-07, "loss": 4.4698, "step": 34760 }, { "epoch": 1.4980402291424388, "learning_rate": 9.161746643969919e-07, "loss": 4.2969, "step": 34780 }, { "epoch": 1.498901666882026, "learning_rate": 9.16126182421513e-07, "loss": 4.5373, "step": 34800 }, { "epoch": 1.4997631046216136, "learning_rate": 9.16077700446034e-07, "loss": 4.2752, "step": 34820 }, { "epoch": 1.5006245423612008, "learning_rate": 9.160292184705553e-07, "loss": 4.4435, "step": 34840 }, { "epoch": 1.5014859801007883, "learning_rate": 9.159807364950764e-07, "loss": 4.5597, "step": 34860 }, { "epoch": 1.5023474178403755, "learning_rate": 9.159322545195974e-07, "loss": 4.2376, "step": 34880 }, { "epoch": 1.503208855579963, "learning_rate": 9.158837725441185e-07, "loss": 4.6336, "step": 34900 }, { "epoch": 1.5040702933195504, "learning_rate": 9.158352905686398e-07, "loss": 4.265, "step": 34920 }, { "epoch": 1.5049317310591377, "learning_rate": 9.157868085931607e-07, "loss": 4.6778, "step": 34940 }, { "epoch": 1.505793168798725, "learning_rate": 9.157383266176819e-07, "loss": 4.5546, "step": 34960 }, { "epoch": 1.5066546065383124, "learning_rate": 9.15689844642203e-07, "loss": 4.4426, "step": 34980 }, { "epoch": 1.5075160442778999, "learning_rate": 9.156413626667241e-07, "loss": 4.6047, "step": 35000 }, { "epoch": 1.5083774820174871, "learning_rate": 9.155928806912451e-07, "loss": 4.3895, "step": 35020 }, { "epoch": 1.5092389197570746, "learning_rate": 9.155443987157663e-07, "loss": 4.2913, "step": 35040 }, { "epoch": 1.510100357496662, "learning_rate": 9.154959167402874e-07, "loss": 4.4838, "step": 35060 }, { "epoch": 1.5109617952362493, "learning_rate": 9.154474347648086e-07, "loss": 4.7426, "step": 35080 }, { "epoch": 1.5118232329758365, "learning_rate": 9.153989527893296e-07, "loss": 4.3318, "step": 35100 }, { "epoch": 1.512684670715424, "learning_rate": 9.153504708138508e-07, "loss": 4.2035, "step": 35120 }, { "epoch": 1.5135461084550115, "learning_rate": 9.153019888383718e-07, "loss": 4.4679, "step": 35140 }, { "epoch": 1.5144075461945987, "learning_rate": 9.15253506862893e-07, "loss": 4.2768, "step": 35160 }, { "epoch": 1.5152689839341862, "learning_rate": 9.15205024887414e-07, "loss": 4.447, "step": 35180 }, { "epoch": 1.5161304216737737, "learning_rate": 9.151565429119351e-07, "loss": 4.5441, "step": 35200 }, { "epoch": 1.516991859413361, "learning_rate": 9.151080609364563e-07, "loss": 4.239, "step": 35220 }, { "epoch": 1.5178532971529481, "learning_rate": 9.150595789609775e-07, "loss": 4.4943, "step": 35240 }, { "epoch": 1.5187147348925356, "learning_rate": 9.150110969854984e-07, "loss": 4.28, "step": 35260 }, { "epoch": 1.519576172632123, "learning_rate": 9.149626150100195e-07, "loss": 4.3123, "step": 35280 }, { "epoch": 1.5204376103717103, "learning_rate": 9.149141330345407e-07, "loss": 4.4965, "step": 35300 }, { "epoch": 1.5212990481112978, "learning_rate": 9.148656510590617e-07, "loss": 4.2643, "step": 35320 }, { "epoch": 1.5221604858508853, "learning_rate": 9.148171690835828e-07, "loss": 4.4908, "step": 35340 }, { "epoch": 1.5230219235904725, "learning_rate": 9.14768687108104e-07, "loss": 4.5154, "step": 35360 }, { "epoch": 1.5238833613300597, "learning_rate": 9.147202051326251e-07, "loss": 4.5667, "step": 35380 }, { "epoch": 1.5247447990696472, "learning_rate": 9.146717231571461e-07, "loss": 4.3141, "step": 35400 }, { "epoch": 1.5256062368092347, "learning_rate": 9.146232411816673e-07, "loss": 4.3248, "step": 35420 }, { "epoch": 1.526467674548822, "learning_rate": 9.145747592061884e-07, "loss": 4.4251, "step": 35440 }, { "epoch": 1.5273291122884094, "learning_rate": 9.145262772307096e-07, "loss": 4.612, "step": 35460 }, { "epoch": 1.5281905500279969, "learning_rate": 9.144777952552306e-07, "loss": 4.5537, "step": 35480 }, { "epoch": 1.529051987767584, "learning_rate": 9.144293132797518e-07, "loss": 4.414, "step": 35500 }, { "epoch": 1.5299134255071714, "learning_rate": 9.143808313042728e-07, "loss": 4.4837, "step": 35520 }, { "epoch": 1.5307748632467588, "learning_rate": 9.14332349328794e-07, "loss": 4.5767, "step": 35540 }, { "epoch": 1.5316363009863463, "learning_rate": 9.14283867353315e-07, "loss": 4.3294, "step": 35560 }, { "epoch": 1.5324977387259335, "learning_rate": 9.142353853778362e-07, "loss": 4.2305, "step": 35580 }, { "epoch": 1.533359176465521, "learning_rate": 9.141869034023573e-07, "loss": 4.522, "step": 35600 }, { "epoch": 1.5342206142051085, "learning_rate": 9.141384214268784e-07, "loss": 4.3252, "step": 35620 }, { "epoch": 1.5350820519446957, "learning_rate": 9.140899394513994e-07, "loss": 4.4498, "step": 35640 }, { "epoch": 1.535943489684283, "learning_rate": 9.140414574759206e-07, "loss": 4.3863, "step": 35660 }, { "epoch": 1.5368049274238704, "learning_rate": 9.139929755004417e-07, "loss": 4.279, "step": 35680 }, { "epoch": 1.537666365163458, "learning_rate": 9.139444935249628e-07, "loss": 4.3294, "step": 35700 }, { "epoch": 1.5385278029030451, "learning_rate": 9.138960115494839e-07, "loss": 4.5648, "step": 35720 }, { "epoch": 1.5393892406426326, "learning_rate": 9.13847529574005e-07, "loss": 4.6493, "step": 35740 }, { "epoch": 1.54025067838222, "learning_rate": 9.137990475985261e-07, "loss": 4.4849, "step": 35760 }, { "epoch": 1.5411121161218073, "learning_rate": 9.137505656230472e-07, "loss": 4.5569, "step": 35780 }, { "epoch": 1.5419735538613946, "learning_rate": 9.137020836475683e-07, "loss": 4.5644, "step": 35800 }, { "epoch": 1.542834991600982, "learning_rate": 9.136536016720894e-07, "loss": 4.4694, "step": 35820 }, { "epoch": 1.5436964293405695, "learning_rate": 9.136051196966106e-07, "loss": 4.5983, "step": 35840 }, { "epoch": 1.5445578670801567, "learning_rate": 9.135566377211316e-07, "loss": 4.4882, "step": 35860 }, { "epoch": 1.545419304819744, "learning_rate": 9.135081557456528e-07, "loss": 4.5535, "step": 35880 }, { "epoch": 1.5462807425593317, "learning_rate": 9.134596737701738e-07, "loss": 4.4972, "step": 35900 }, { "epoch": 1.547142180298919, "learning_rate": 9.134111917946949e-07, "loss": 4.6593, "step": 35920 }, { "epoch": 1.5480036180385062, "learning_rate": 9.13362709819216e-07, "loss": 4.1969, "step": 35940 }, { "epoch": 1.5488650557780936, "learning_rate": 9.133142278437372e-07, "loss": 4.2865, "step": 35960 }, { "epoch": 1.549726493517681, "learning_rate": 9.132657458682583e-07, "loss": 4.2481, "step": 35980 }, { "epoch": 1.5505879312572683, "learning_rate": 9.132172638927794e-07, "loss": 4.6457, "step": 36000 }, { "epoch": 1.5514493689968556, "learning_rate": 9.131687819173004e-07, "loss": 4.4537, "step": 36020 }, { "epoch": 1.5523108067364433, "learning_rate": 9.131202999418216e-07, "loss": 4.2257, "step": 36040 }, { "epoch": 1.5531722444760305, "learning_rate": 9.130718179663427e-07, "loss": 4.3691, "step": 36060 }, { "epoch": 1.5540336822156178, "learning_rate": 9.130233359908638e-07, "loss": 4.3142, "step": 36080 }, { "epoch": 1.5548951199552052, "learning_rate": 9.129748540153849e-07, "loss": 4.3368, "step": 36100 }, { "epoch": 1.5557565576947927, "learning_rate": 9.129263720399061e-07, "loss": 4.638, "step": 36120 }, { "epoch": 1.55661799543438, "learning_rate": 9.128778900644272e-07, "loss": 4.3089, "step": 36140 }, { "epoch": 1.5574794331739672, "learning_rate": 9.128294080889482e-07, "loss": 4.6912, "step": 36160 }, { "epoch": 1.5583408709135549, "learning_rate": 9.127809261134694e-07, "loss": 4.4828, "step": 36180 }, { "epoch": 1.5592023086531421, "learning_rate": 9.127324441379905e-07, "loss": 4.1292, "step": 36200 }, { "epoch": 1.5600637463927294, "learning_rate": 9.126839621625115e-07, "loss": 4.5201, "step": 36220 }, { "epoch": 1.5609251841323168, "learning_rate": 9.126354801870327e-07, "loss": 4.2531, "step": 36240 }, { "epoch": 1.5617866218719043, "learning_rate": 9.125869982115538e-07, "loss": 4.4037, "step": 36260 }, { "epoch": 1.5626480596114916, "learning_rate": 9.125385162360748e-07, "loss": 4.5505, "step": 36280 }, { "epoch": 1.5635094973510788, "learning_rate": 9.124900342605959e-07, "loss": 4.297, "step": 36300 }, { "epoch": 1.5643709350906663, "learning_rate": 9.124415522851171e-07, "loss": 4.5845, "step": 36320 }, { "epoch": 1.5652323728302537, "learning_rate": 9.123930703096382e-07, "loss": 4.5232, "step": 36340 }, { "epoch": 1.566093810569841, "learning_rate": 9.123445883341593e-07, "loss": 4.4532, "step": 36360 }, { "epoch": 1.5669552483094285, "learning_rate": 9.122961063586804e-07, "loss": 4.6118, "step": 36380 }, { "epoch": 1.567816686049016, "learning_rate": 9.122476243832014e-07, "loss": 4.3377, "step": 36400 }, { "epoch": 1.5686781237886032, "learning_rate": 9.121991424077226e-07, "loss": 4.4561, "step": 36420 }, { "epoch": 1.5695395615281904, "learning_rate": 9.121506604322436e-07, "loss": 4.4312, "step": 36440 }, { "epoch": 1.5704009992677779, "learning_rate": 9.121021784567648e-07, "loss": 4.5839, "step": 36460 }, { "epoch": 1.5712624370073653, "learning_rate": 9.120536964812859e-07, "loss": 4.5667, "step": 36480 }, { "epoch": 1.5721238747469526, "learning_rate": 9.120052145058071e-07, "loss": 4.4461, "step": 36500 }, { "epoch": 1.57298531248654, "learning_rate": 9.119567325303281e-07, "loss": 4.4775, "step": 36520 }, { "epoch": 1.5738467502261275, "learning_rate": 9.119082505548492e-07, "loss": 4.4642, "step": 36540 }, { "epoch": 1.5747081879657148, "learning_rate": 9.118597685793703e-07, "loss": 4.4668, "step": 36560 }, { "epoch": 1.575569625705302, "learning_rate": 9.118112866038915e-07, "loss": 4.4052, "step": 36580 }, { "epoch": 1.5764310634448895, "learning_rate": 9.117628046284124e-07, "loss": 4.6068, "step": 36600 }, { "epoch": 1.577292501184477, "learning_rate": 9.117143226529337e-07, "loss": 4.2858, "step": 36620 }, { "epoch": 1.5781539389240642, "learning_rate": 9.116658406774548e-07, "loss": 4.3676, "step": 36640 }, { "epoch": 1.5790153766636517, "learning_rate": 9.116173587019759e-07, "loss": 4.3611, "step": 36660 }, { "epoch": 1.5798768144032391, "learning_rate": 9.115688767264969e-07, "loss": 4.2911, "step": 36680 }, { "epoch": 1.5807382521428264, "learning_rate": 9.115203947510182e-07, "loss": 4.4236, "step": 36700 }, { "epoch": 1.5815996898824136, "learning_rate": 9.114719127755392e-07, "loss": 4.4679, "step": 36720 }, { "epoch": 1.582461127622001, "learning_rate": 9.114234308000604e-07, "loss": 4.4973, "step": 36740 }, { "epoch": 1.5833225653615886, "learning_rate": 9.113749488245814e-07, "loss": 4.5085, "step": 36760 }, { "epoch": 1.5841840031011758, "learning_rate": 9.113264668491025e-07, "loss": 4.2936, "step": 36780 }, { "epoch": 1.5850454408407633, "learning_rate": 9.112779848736236e-07, "loss": 4.5846, "step": 36800 }, { "epoch": 1.5859068785803507, "learning_rate": 9.112295028981446e-07, "loss": 4.4171, "step": 36820 }, { "epoch": 1.586768316319938, "learning_rate": 9.111810209226658e-07, "loss": 4.3638, "step": 36840 }, { "epoch": 1.5876297540595252, "learning_rate": 9.11132538947187e-07, "loss": 4.4992, "step": 36860 }, { "epoch": 1.5884911917991127, "learning_rate": 9.110840569717081e-07, "loss": 4.3224, "step": 36880 }, { "epoch": 1.5893526295387002, "learning_rate": 9.110355749962291e-07, "loss": 4.5416, "step": 36900 }, { "epoch": 1.5902140672782874, "learning_rate": 9.109870930207502e-07, "loss": 4.369, "step": 36920 }, { "epoch": 1.5910755050178749, "learning_rate": 9.109386110452713e-07, "loss": 4.6777, "step": 36940 }, { "epoch": 1.5919369427574623, "learning_rate": 9.108901290697925e-07, "loss": 4.4192, "step": 36960 }, { "epoch": 1.5927983804970496, "learning_rate": 9.108416470943135e-07, "loss": 4.4574, "step": 36980 }, { "epoch": 1.5936598182366368, "learning_rate": 9.107931651188347e-07, "loss": 4.4622, "step": 37000 }, { "epoch": 1.5945212559762243, "learning_rate": 9.107446831433558e-07, "loss": 4.2659, "step": 37020 }, { "epoch": 1.5953826937158118, "learning_rate": 9.106962011678769e-07, "loss": 4.3141, "step": 37040 }, { "epoch": 1.596244131455399, "learning_rate": 9.106477191923979e-07, "loss": 4.1779, "step": 37060 }, { "epoch": 1.5971055691949865, "learning_rate": 9.105992372169191e-07, "loss": 4.4769, "step": 37080 }, { "epoch": 1.597967006934574, "learning_rate": 9.105507552414402e-07, "loss": 4.306, "step": 37100 }, { "epoch": 1.5988284446741612, "learning_rate": 9.105022732659612e-07, "loss": 4.3981, "step": 37120 }, { "epoch": 1.5996898824137484, "learning_rate": 9.104537912904824e-07, "loss": 4.5685, "step": 37140 }, { "epoch": 1.600551320153336, "learning_rate": 9.104053093150035e-07, "loss": 4.2877, "step": 37160 }, { "epoch": 1.6014127578929234, "learning_rate": 9.103568273395246e-07, "loss": 4.4643, "step": 37180 }, { "epoch": 1.6022741956325106, "learning_rate": 9.103083453640457e-07, "loss": 4.4946, "step": 37200 }, { "epoch": 1.603135633372098, "learning_rate": 9.102598633885668e-07, "loss": 4.249, "step": 37220 }, { "epoch": 1.6039970711116855, "learning_rate": 9.10211381413088e-07, "loss": 4.3711, "step": 37240 }, { "epoch": 1.6048585088512728, "learning_rate": 9.101628994376091e-07, "loss": 4.4879, "step": 37260 }, { "epoch": 1.60571994659086, "learning_rate": 9.101144174621302e-07, "loss": 4.4627, "step": 37280 }, { "epoch": 1.6065813843304475, "learning_rate": 9.100659354866512e-07, "loss": 4.3324, "step": 37300 }, { "epoch": 1.607442822070035, "learning_rate": 9.100174535111724e-07, "loss": 4.345, "step": 37320 }, { "epoch": 1.6083042598096222, "learning_rate": 9.099689715356935e-07, "loss": 4.4219, "step": 37340 }, { "epoch": 1.6091656975492097, "learning_rate": 9.099204895602145e-07, "loss": 4.1766, "step": 37360 }, { "epoch": 1.6100271352887972, "learning_rate": 9.098720075847357e-07, "loss": 4.4342, "step": 37380 }, { "epoch": 1.6108885730283844, "learning_rate": 9.098235256092569e-07, "loss": 4.232, "step": 37400 }, { "epoch": 1.6117500107679716, "learning_rate": 9.097750436337778e-07, "loss": 4.3993, "step": 37420 }, { "epoch": 1.612611448507559, "learning_rate": 9.097265616582988e-07, "loss": 4.3122, "step": 37440 }, { "epoch": 1.6134728862471466, "learning_rate": 9.096780796828201e-07, "loss": 4.3472, "step": 37460 }, { "epoch": 1.6143343239867338, "learning_rate": 9.096295977073412e-07, "loss": 4.3092, "step": 37480 }, { "epoch": 1.615195761726321, "learning_rate": 9.095811157318623e-07, "loss": 4.1438, "step": 37500 }, { "epoch": 1.6160571994659088, "learning_rate": 9.095326337563834e-07, "loss": 4.4261, "step": 37520 }, { "epoch": 1.616918637205496, "learning_rate": 9.094841517809045e-07, "loss": 4.3154, "step": 37540 }, { "epoch": 1.6177800749450832, "learning_rate": 9.094356698054256e-07, "loss": 4.363, "step": 37560 }, { "epoch": 1.6186415126846707, "learning_rate": 9.093871878299467e-07, "loss": 4.2389, "step": 37580 }, { "epoch": 1.6195029504242582, "learning_rate": 9.093387058544678e-07, "loss": 4.3851, "step": 37600 }, { "epoch": 1.6203643881638454, "learning_rate": 9.09290223878989e-07, "loss": 4.4013, "step": 37620 }, { "epoch": 1.6212258259034327, "learning_rate": 9.092417419035101e-07, "loss": 4.3324, "step": 37640 }, { "epoch": 1.6220872636430204, "learning_rate": 9.091932599280312e-07, "loss": 4.373, "step": 37660 }, { "epoch": 1.6229487013826076, "learning_rate": 9.091447779525522e-07, "loss": 4.6259, "step": 37680 }, { "epoch": 1.6238101391221949, "learning_rate": 9.090962959770734e-07, "loss": 4.2269, "step": 37700 }, { "epoch": 1.6246715768617823, "learning_rate": 9.090478140015944e-07, "loss": 4.3219, "step": 37720 }, { "epoch": 1.6255330146013698, "learning_rate": 9.089993320261156e-07, "loss": 4.2986, "step": 37740 }, { "epoch": 1.626394452340957, "learning_rate": 9.089508500506367e-07, "loss": 4.4416, "step": 37760 }, { "epoch": 1.6272558900805443, "learning_rate": 9.089023680751579e-07, "loss": 4.3954, "step": 37780 }, { "epoch": 1.628117327820132, "learning_rate": 9.088538860996788e-07, "loss": 4.3065, "step": 37800 }, { "epoch": 1.6289787655597192, "learning_rate": 9.088054041242e-07, "loss": 4.2826, "step": 37820 }, { "epoch": 1.6298402032993065, "learning_rate": 9.087569221487211e-07, "loss": 4.4086, "step": 37840 }, { "epoch": 1.630701641038894, "learning_rate": 9.087084401732423e-07, "loss": 4.4004, "step": 37860 }, { "epoch": 1.6315630787784814, "learning_rate": 9.086599581977633e-07, "loss": 4.5516, "step": 37880 }, { "epoch": 1.6324245165180686, "learning_rate": 9.086114762222844e-07, "loss": 4.4274, "step": 37900 }, { "epoch": 1.6332859542576559, "learning_rate": 9.085629942468056e-07, "loss": 4.3502, "step": 37920 }, { "epoch": 1.6341473919972433, "learning_rate": 9.085145122713267e-07, "loss": 4.4738, "step": 37940 }, { "epoch": 1.6350088297368308, "learning_rate": 9.084660302958478e-07, "loss": 4.2082, "step": 37960 }, { "epoch": 1.635870267476418, "learning_rate": 9.084175483203688e-07, "loss": 4.4372, "step": 37980 }, { "epoch": 1.6367317052160055, "learning_rate": 9.0836906634489e-07, "loss": 4.3474, "step": 38000 }, { "epoch": 1.637593142955593, "learning_rate": 9.08320584369411e-07, "loss": 4.3913, "step": 38020 }, { "epoch": 1.6384545806951802, "learning_rate": 9.082721023939322e-07, "loss": 4.4225, "step": 38040 }, { "epoch": 1.6393160184347675, "learning_rate": 9.082236204184532e-07, "loss": 4.3337, "step": 38060 }, { "epoch": 1.640177456174355, "learning_rate": 9.081751384429744e-07, "loss": 4.3372, "step": 38080 }, { "epoch": 1.6410388939139424, "learning_rate": 9.081266564674954e-07, "loss": 4.2774, "step": 38100 }, { "epoch": 1.6419003316535297, "learning_rate": 9.080781744920166e-07, "loss": 4.3185, "step": 38120 }, { "epoch": 1.6427617693931171, "learning_rate": 9.080296925165377e-07, "loss": 4.3871, "step": 38140 }, { "epoch": 1.6436232071327046, "learning_rate": 9.079812105410589e-07, "loss": 4.3266, "step": 38160 }, { "epoch": 1.6444846448722918, "learning_rate": 9.079327285655798e-07, "loss": 4.4932, "step": 38180 }, { "epoch": 1.645346082611879, "learning_rate": 9.07884246590101e-07, "loss": 4.7061, "step": 38200 }, { "epoch": 1.6462075203514666, "learning_rate": 9.078357646146221e-07, "loss": 4.458, "step": 38220 }, { "epoch": 1.647068958091054, "learning_rate": 9.077872826391433e-07, "loss": 4.3307, "step": 38240 }, { "epoch": 1.6479303958306413, "learning_rate": 9.077388006636643e-07, "loss": 4.1894, "step": 38260 }, { "epoch": 1.6487918335702287, "learning_rate": 9.076903186881855e-07, "loss": 4.2687, "step": 38280 }, { "epoch": 1.6496532713098162, "learning_rate": 9.076418367127066e-07, "loss": 4.4671, "step": 38300 }, { "epoch": 1.6505147090494035, "learning_rate": 9.075933547372277e-07, "loss": 4.4285, "step": 38320 }, { "epoch": 1.6513761467889907, "learning_rate": 9.075448727617487e-07, "loss": 4.4103, "step": 38340 }, { "epoch": 1.6522375845285782, "learning_rate": 9.074963907862699e-07, "loss": 4.4297, "step": 38360 }, { "epoch": 1.6530990222681656, "learning_rate": 9.074479088107909e-07, "loss": 4.5008, "step": 38380 }, { "epoch": 1.6539604600077529, "learning_rate": 9.073994268353122e-07, "loss": 4.2244, "step": 38400 }, { "epoch": 1.6548218977473403, "learning_rate": 9.073509448598332e-07, "loss": 4.4952, "step": 38420 }, { "epoch": 1.6556833354869278, "learning_rate": 9.073024628843542e-07, "loss": 4.5604, "step": 38440 }, { "epoch": 1.656544773226515, "learning_rate": 9.072539809088754e-07, "loss": 4.4206, "step": 38460 }, { "epoch": 1.6574062109661023, "learning_rate": 9.072054989333966e-07, "loss": 4.3534, "step": 38480 }, { "epoch": 1.6582676487056898, "learning_rate": 9.071570169579176e-07, "loss": 4.4252, "step": 38500 }, { "epoch": 1.6591290864452772, "learning_rate": 9.071085349824387e-07, "loss": 4.6212, "step": 38520 }, { "epoch": 1.6599905241848645, "learning_rate": 9.070600530069599e-07, "loss": 4.3829, "step": 38540 }, { "epoch": 1.660851961924452, "learning_rate": 9.070115710314809e-07, "loss": 4.5421, "step": 38560 }, { "epoch": 1.6617133996640394, "learning_rate": 9.06963089056002e-07, "loss": 4.3745, "step": 38580 }, { "epoch": 1.6625748374036267, "learning_rate": 9.069146070805231e-07, "loss": 4.4464, "step": 38600 }, { "epoch": 1.663436275143214, "learning_rate": 9.068661251050443e-07, "loss": 4.4741, "step": 38620 }, { "epoch": 1.6642977128828014, "learning_rate": 9.068176431295653e-07, "loss": 4.2344, "step": 38640 }, { "epoch": 1.6651591506223888, "learning_rate": 9.067691611540865e-07, "loss": 4.3982, "step": 38660 }, { "epoch": 1.666020588361976, "learning_rate": 9.067206791786076e-07, "loss": 4.2757, "step": 38680 }, { "epoch": 1.6668820261015636, "learning_rate": 9.066721972031286e-07, "loss": 4.3779, "step": 38700 }, { "epoch": 1.667743463841151, "learning_rate": 9.066237152276497e-07, "loss": 4.2537, "step": 38720 }, { "epoch": 1.6686049015807383, "learning_rate": 9.065752332521709e-07, "loss": 4.459, "step": 38740 }, { "epoch": 1.6694663393203255, "learning_rate": 9.06526751276692e-07, "loss": 4.3083, "step": 38760 }, { "epoch": 1.670327777059913, "learning_rate": 9.064782693012131e-07, "loss": 4.5771, "step": 38780 }, { "epoch": 1.6711892147995004, "learning_rate": 9.064297873257342e-07, "loss": 4.6128, "step": 38800 }, { "epoch": 1.6720506525390877, "learning_rate": 9.063813053502553e-07, "loss": 4.3605, "step": 38820 }, { "epoch": 1.6729120902786752, "learning_rate": 9.063328233747764e-07, "loss": 4.3275, "step": 38840 }, { "epoch": 1.6737735280182626, "learning_rate": 9.062843413992975e-07, "loss": 4.4422, "step": 38860 }, { "epoch": 1.6746349657578499, "learning_rate": 9.062358594238186e-07, "loss": 4.4172, "step": 38880 }, { "epoch": 1.6754964034974371, "learning_rate": 9.061873774483397e-07, "loss": 4.5314, "step": 38900 }, { "epoch": 1.6763578412370246, "learning_rate": 9.061388954728609e-07, "loss": 4.4356, "step": 38920 }, { "epoch": 1.677219278976612, "learning_rate": 9.060904134973819e-07, "loss": 4.1629, "step": 38940 }, { "epoch": 1.6780807167161993, "learning_rate": 9.06041931521903e-07, "loss": 4.4948, "step": 38960 }, { "epoch": 1.6789421544557868, "learning_rate": 9.059934495464241e-07, "loss": 4.787, "step": 38980 }, { "epoch": 1.6798035921953742, "learning_rate": 9.059449675709452e-07, "loss": 4.1931, "step": 39000 }, { "epoch": 1.6806650299349615, "learning_rate": 9.058964855954664e-07, "loss": 4.1048, "step": 39020 }, { "epoch": 1.6815264676745487, "learning_rate": 9.058480036199875e-07, "loss": 4.522, "step": 39040 }, { "epoch": 1.6823879054141362, "learning_rate": 9.057995216445086e-07, "loss": 4.4355, "step": 39060 }, { "epoch": 1.6832493431537237, "learning_rate": 9.057510396690296e-07, "loss": 4.2634, "step": 39080 }, { "epoch": 1.684110780893311, "learning_rate": 9.057025576935507e-07, "loss": 4.3253, "step": 39100 }, { "epoch": 1.6849722186328981, "learning_rate": 9.056540757180719e-07, "loss": 4.2652, "step": 39120 }, { "epoch": 1.6858336563724858, "learning_rate": 9.05605593742593e-07, "loss": 4.3281, "step": 39140 }, { "epoch": 1.686695094112073, "learning_rate": 9.055571117671141e-07, "loss": 4.391, "step": 39160 }, { "epoch": 1.6875565318516603, "learning_rate": 9.055086297916352e-07, "loss": 4.3401, "step": 39180 }, { "epoch": 1.6884179695912478, "learning_rate": 9.054601478161563e-07, "loss": 4.3509, "step": 39200 }, { "epoch": 1.6892794073308353, "learning_rate": 9.054116658406772e-07, "loss": 4.4641, "step": 39220 }, { "epoch": 1.6901408450704225, "learning_rate": 9.053631838651985e-07, "loss": 4.3644, "step": 39240 }, { "epoch": 1.6910022828100097, "learning_rate": 9.053147018897196e-07, "loss": 4.4213, "step": 39260 }, { "epoch": 1.6918637205495974, "learning_rate": 9.052662199142408e-07, "loss": 4.4868, "step": 39280 }, { "epoch": 1.6927251582891847, "learning_rate": 9.052177379387618e-07, "loss": 4.3956, "step": 39300 }, { "epoch": 1.693586596028772, "learning_rate": 9.051692559632829e-07, "loss": 4.5938, "step": 39320 }, { "epoch": 1.6944480337683594, "learning_rate": 9.05120773987804e-07, "loss": 4.4835, "step": 39340 }, { "epoch": 1.6953094715079469, "learning_rate": 9.050722920123252e-07, "loss": 4.2902, "step": 39360 }, { "epoch": 1.696170909247534, "learning_rate": 9.050238100368462e-07, "loss": 4.3747, "step": 39380 }, { "epoch": 1.6970323469871214, "learning_rate": 9.049753280613674e-07, "loss": 4.482, "step": 39400 }, { "epoch": 1.697893784726709, "learning_rate": 9.049268460858885e-07, "loss": 4.2359, "step": 39420 }, { "epoch": 1.6987552224662963, "learning_rate": 9.048783641104097e-07, "loss": 4.3643, "step": 39440 }, { "epoch": 1.6996166602058835, "learning_rate": 9.048298821349306e-07, "loss": 4.1877, "step": 39460 }, { "epoch": 1.700478097945471, "learning_rate": 9.047814001594518e-07, "loss": 4.1596, "step": 39480 }, { "epoch": 1.7013395356850585, "learning_rate": 9.047329181839729e-07, "loss": 4.2136, "step": 39500 }, { "epoch": 1.7022009734246457, "learning_rate": 9.04684436208494e-07, "loss": 4.3671, "step": 39520 }, { "epoch": 1.703062411164233, "learning_rate": 9.046359542330151e-07, "loss": 4.3725, "step": 39540 }, { "epoch": 1.7039238489038204, "learning_rate": 9.045874722575363e-07, "loss": 4.4943, "step": 39560 }, { "epoch": 1.704785286643408, "learning_rate": 9.045389902820573e-07, "loss": 4.429, "step": 39580 }, { "epoch": 1.7056467243829951, "learning_rate": 9.044905083065783e-07, "loss": 4.4638, "step": 39600 }, { "epoch": 1.7065081621225826, "learning_rate": 9.044420263310995e-07, "loss": 4.3664, "step": 39620 }, { "epoch": 1.70736959986217, "learning_rate": 9.043935443556205e-07, "loss": 4.3404, "step": 39640 }, { "epoch": 1.7082310376017573, "learning_rate": 9.043450623801418e-07, "loss": 4.3263, "step": 39660 }, { "epoch": 1.7090924753413446, "learning_rate": 9.042965804046628e-07, "loss": 4.2613, "step": 39680 }, { "epoch": 1.709953913080932, "learning_rate": 9.04248098429184e-07, "loss": 4.1147, "step": 39700 }, { "epoch": 1.7108153508205195, "learning_rate": 9.04199616453705e-07, "loss": 4.2168, "step": 39720 }, { "epoch": 1.7116767885601067, "learning_rate": 9.041511344782263e-07, "loss": 4.0227, "step": 39740 }, { "epoch": 1.7125382262996942, "learning_rate": 9.041026525027472e-07, "loss": 4.563, "step": 39760 }, { "epoch": 1.7133996640392817, "learning_rate": 9.040541705272684e-07, "loss": 4.4562, "step": 39780 }, { "epoch": 1.714261101778869, "learning_rate": 9.040056885517895e-07, "loss": 4.3732, "step": 39800 }, { "epoch": 1.7151225395184562, "learning_rate": 9.039572065763107e-07, "loss": 4.5097, "step": 39820 }, { "epoch": 1.7159839772580436, "learning_rate": 9.039087246008316e-07, "loss": 4.4336, "step": 39840 }, { "epoch": 1.716845414997631, "learning_rate": 9.038602426253528e-07, "loss": 4.3732, "step": 39860 }, { "epoch": 1.7177068527372183, "learning_rate": 9.038117606498739e-07, "loss": 4.214, "step": 39880 }, { "epoch": 1.7185682904768058, "learning_rate": 9.037632786743951e-07, "loss": 4.3972, "step": 39900 }, { "epoch": 1.7194297282163933, "learning_rate": 9.037147966989161e-07, "loss": 4.3894, "step": 39920 }, { "epoch": 1.7202911659559805, "learning_rate": 9.036663147234373e-07, "loss": 4.2121, "step": 39940 }, { "epoch": 1.7211526036955678, "learning_rate": 9.036178327479583e-07, "loss": 4.3815, "step": 39960 }, { "epoch": 1.7220140414351552, "learning_rate": 9.035693507724794e-07, "loss": 4.4487, "step": 39980 }, { "epoch": 1.7228754791747427, "learning_rate": 9.035208687970005e-07, "loss": 4.2327, "step": 40000 }, { "epoch": 1.72373691691433, "learning_rate": 9.034723868215217e-07, "loss": 4.1977, "step": 40020 }, { "epoch": 1.7245983546539174, "learning_rate": 9.034239048460428e-07, "loss": 4.6189, "step": 40040 }, { "epoch": 1.7254597923935049, "learning_rate": 9.033754228705638e-07, "loss": 4.366, "step": 40060 }, { "epoch": 1.7263212301330921, "learning_rate": 9.03326940895085e-07, "loss": 4.3975, "step": 40080 }, { "epoch": 1.7271826678726794, "learning_rate": 9.032784589196061e-07, "loss": 4.2507, "step": 40100 }, { "epoch": 1.7280441056122668, "learning_rate": 9.032299769441272e-07, "loss": 4.5417, "step": 40120 }, { "epoch": 1.7289055433518543, "learning_rate": 9.031814949686482e-07, "loss": 4.4213, "step": 40140 }, { "epoch": 1.7297669810914416, "learning_rate": 9.031330129931693e-07, "loss": 4.5404, "step": 40160 }, { "epoch": 1.730628418831029, "learning_rate": 9.030845310176905e-07, "loss": 4.3475, "step": 40180 }, { "epoch": 1.7314898565706165, "learning_rate": 9.030360490422117e-07, "loss": 4.3838, "step": 40200 }, { "epoch": 1.7323512943102037, "learning_rate": 9.029875670667326e-07, "loss": 4.404, "step": 40220 }, { "epoch": 1.733212732049791, "learning_rate": 9.029390850912538e-07, "loss": 4.4143, "step": 40240 }, { "epoch": 1.7340741697893785, "learning_rate": 9.028906031157749e-07, "loss": 4.4484, "step": 40260 }, { "epoch": 1.734935607528966, "learning_rate": 9.02842121140296e-07, "loss": 4.3804, "step": 40280 }, { "epoch": 1.7357970452685532, "learning_rate": 9.027936391648171e-07, "loss": 4.3601, "step": 40300 }, { "epoch": 1.7366584830081406, "learning_rate": 9.027451571893383e-07, "loss": 4.3157, "step": 40320 }, { "epoch": 1.737519920747728, "learning_rate": 9.026966752138594e-07, "loss": 4.5075, "step": 40340 }, { "epoch": 1.7383813584873153, "learning_rate": 9.026481932383804e-07, "loss": 4.2924, "step": 40360 }, { "epoch": 1.7392427962269026, "learning_rate": 9.025997112629015e-07, "loss": 4.4275, "step": 40380 }, { "epoch": 1.74010423396649, "learning_rate": 9.025512292874227e-07, "loss": 4.198, "step": 40400 }, { "epoch": 1.7409656717060775, "learning_rate": 9.025027473119438e-07, "loss": 4.3831, "step": 40420 }, { "epoch": 1.7418271094456648, "learning_rate": 9.024542653364649e-07, "loss": 4.3877, "step": 40440 }, { "epoch": 1.7426885471852522, "learning_rate": 9.02405783360986e-07, "loss": 4.4235, "step": 40460 }, { "epoch": 1.7435499849248397, "learning_rate": 9.023573013855071e-07, "loss": 4.5444, "step": 40480 }, { "epoch": 1.744411422664427, "learning_rate": 9.023088194100282e-07, "loss": 4.2116, "step": 40500 }, { "epoch": 1.7452728604040142, "learning_rate": 9.022603374345493e-07, "loss": 4.4303, "step": 40520 }, { "epoch": 1.7461342981436017, "learning_rate": 9.022118554590704e-07, "loss": 4.2506, "step": 40540 }, { "epoch": 1.7469957358831891, "learning_rate": 9.021633734835916e-07, "loss": 4.3852, "step": 40560 }, { "epoch": 1.7478571736227764, "learning_rate": 9.021148915081126e-07, "loss": 4.3947, "step": 40580 }, { "epoch": 1.7487186113623638, "learning_rate": 9.020664095326336e-07, "loss": 4.3598, "step": 40600 }, { "epoch": 1.7495800491019513, "learning_rate": 9.020179275571548e-07, "loss": 4.2804, "step": 40620 }, { "epoch": 1.7504414868415386, "learning_rate": 9.01969445581676e-07, "loss": 4.4294, "step": 40640 }, { "epoch": 1.7513029245811258, "learning_rate": 9.01920963606197e-07, "loss": 4.5575, "step": 40660 }, { "epoch": 1.7521643623207133, "learning_rate": 9.018724816307181e-07, "loss": 4.1388, "step": 40680 }, { "epoch": 1.7530258000603007, "learning_rate": 9.018239996552393e-07, "loss": 4.2976, "step": 40700 }, { "epoch": 1.753887237799888, "learning_rate": 9.017755176797604e-07, "loss": 4.1478, "step": 40720 }, { "epoch": 1.7547486755394752, "learning_rate": 9.017270357042814e-07, "loss": 4.4194, "step": 40740 }, { "epoch": 1.755610113279063, "learning_rate": 9.016785537288025e-07, "loss": 4.3831, "step": 40760 }, { "epoch": 1.7564715510186502, "learning_rate": 9.016300717533237e-07, "loss": 4.2891, "step": 40780 }, { "epoch": 1.7573329887582374, "learning_rate": 9.015815897778448e-07, "loss": 4.3545, "step": 40800 }, { "epoch": 1.7581944264978249, "learning_rate": 9.015331078023659e-07, "loss": 4.3924, "step": 40820 }, { "epoch": 1.7590558642374123, "learning_rate": 9.01484625826887e-07, "loss": 4.4795, "step": 40840 }, { "epoch": 1.7599173019769996, "learning_rate": 9.014361438514081e-07, "loss": 4.1988, "step": 40860 }, { "epoch": 1.7607787397165868, "learning_rate": 9.013876618759291e-07, "loss": 4.5492, "step": 40880 }, { "epoch": 1.7616401774561745, "learning_rate": 9.013391799004503e-07, "loss": 4.3368, "step": 40900 }, { "epoch": 1.7625016151957618, "learning_rate": 9.012906979249714e-07, "loss": 4.388, "step": 40920 }, { "epoch": 1.763363052935349, "learning_rate": 9.012422159494926e-07, "loss": 4.2305, "step": 40940 }, { "epoch": 1.7642244906749365, "learning_rate": 9.011937339740136e-07, "loss": 4.5238, "step": 40960 }, { "epoch": 1.765085928414524, "learning_rate": 9.011452519985347e-07, "loss": 4.5662, "step": 40980 }, { "epoch": 1.7659473661541112, "learning_rate": 9.010967700230557e-07, "loss": 4.4144, "step": 41000 }, { "epoch": 1.7668088038936984, "learning_rate": 9.01048288047577e-07, "loss": 4.2149, "step": 41020 }, { "epoch": 1.767670241633286, "learning_rate": 9.00999806072098e-07, "loss": 4.4242, "step": 41040 }, { "epoch": 1.7685316793728734, "learning_rate": 9.009513240966192e-07, "loss": 4.4088, "step": 41060 }, { "epoch": 1.7693931171124606, "learning_rate": 9.009028421211403e-07, "loss": 4.323, "step": 41080 }, { "epoch": 1.770254554852048, "learning_rate": 9.008543601456614e-07, "loss": 4.4107, "step": 41100 }, { "epoch": 1.7711159925916355, "learning_rate": 9.008058781701824e-07, "loss": 4.5422, "step": 41120 }, { "epoch": 1.7719774303312228, "learning_rate": 9.007573961947035e-07, "loss": 4.4039, "step": 41140 }, { "epoch": 1.77283886807081, "learning_rate": 9.007089142192247e-07, "loss": 4.2002, "step": 41160 }, { "epoch": 1.7737003058103975, "learning_rate": 9.006604322437459e-07, "loss": 4.3473, "step": 41180 }, { "epoch": 1.774561743549985, "learning_rate": 9.006119502682669e-07, "loss": 4.2903, "step": 41200 }, { "epoch": 1.7754231812895722, "learning_rate": 9.00563468292788e-07, "loss": 4.4221, "step": 41220 }, { "epoch": 1.7762846190291597, "learning_rate": 9.005149863173091e-07, "loss": 4.3886, "step": 41240 }, { "epoch": 1.7771460567687472, "learning_rate": 9.004665043418301e-07, "loss": 4.3666, "step": 41260 }, { "epoch": 1.7780074945083344, "learning_rate": 9.004180223663513e-07, "loss": 4.3247, "step": 41280 }, { "epoch": 1.7788689322479216, "learning_rate": 9.003695403908724e-07, "loss": 4.4073, "step": 41300 }, { "epoch": 1.779730369987509, "learning_rate": 9.003210584153936e-07, "loss": 4.1473, "step": 41320 }, { "epoch": 1.7805918077270966, "learning_rate": 9.002725764399146e-07, "loss": 4.3599, "step": 41340 }, { "epoch": 1.7814532454666838, "learning_rate": 9.002240944644357e-07, "loss": 4.2765, "step": 41360 }, { "epoch": 1.7823146832062713, "learning_rate": 9.001756124889568e-07, "loss": 4.4386, "step": 41380 }, { "epoch": 1.7831761209458588, "learning_rate": 9.00127130513478e-07, "loss": 4.5666, "step": 41400 }, { "epoch": 1.784037558685446, "learning_rate": 9.000786485379989e-07, "loss": 4.3756, "step": 41420 }, { "epoch": 1.7848989964250332, "learning_rate": 9.000301665625202e-07, "loss": 4.2232, "step": 41440 }, { "epoch": 1.7857604341646207, "learning_rate": 8.999816845870413e-07, "loss": 4.2154, "step": 41460 }, { "epoch": 1.7866218719042082, "learning_rate": 8.999332026115625e-07, "loss": 4.4487, "step": 41480 }, { "epoch": 1.7874833096437954, "learning_rate": 8.998847206360834e-07, "loss": 4.1479, "step": 41500 }, { "epoch": 1.788344747383383, "learning_rate": 8.998362386606047e-07, "loss": 4.2259, "step": 41520 }, { "epoch": 1.7892061851229704, "learning_rate": 8.997877566851257e-07, "loss": 4.4609, "step": 41540 }, { "epoch": 1.7900676228625576, "learning_rate": 8.997392747096468e-07, "loss": 4.2485, "step": 41560 }, { "epoch": 1.7909290606021449, "learning_rate": 8.996907927341679e-07, "loss": 4.3681, "step": 41580 }, { "epoch": 1.7917904983417323, "learning_rate": 8.996423107586891e-07, "loss": 4.2164, "step": 41600 }, { "epoch": 1.7926519360813198, "learning_rate": 8.995938287832101e-07, "loss": 4.1929, "step": 41620 }, { "epoch": 1.793513373820907, "learning_rate": 8.995453468077312e-07, "loss": 4.226, "step": 41640 }, { "epoch": 1.7943748115604945, "learning_rate": 8.994968648322523e-07, "loss": 4.3083, "step": 41660 }, { "epoch": 1.795236249300082, "learning_rate": 8.994483828567734e-07, "loss": 4.2247, "step": 41680 }, { "epoch": 1.7960976870396692, "learning_rate": 8.993999008812946e-07, "loss": 4.1595, "step": 41700 }, { "epoch": 1.7969591247792565, "learning_rate": 8.993514189058157e-07, "loss": 4.2684, "step": 41720 }, { "epoch": 1.797820562518844, "learning_rate": 8.993029369303367e-07, "loss": 4.3604, "step": 41740 }, { "epoch": 1.7986820002584314, "learning_rate": 8.992544549548578e-07, "loss": 4.2129, "step": 41760 }, { "epoch": 1.7995434379980186, "learning_rate": 8.99205972979379e-07, "loss": 4.381, "step": 41780 }, { "epoch": 1.800404875737606, "learning_rate": 8.991574910039e-07, "loss": 4.2786, "step": 41800 }, { "epoch": 1.8012663134771936, "learning_rate": 8.991090090284212e-07, "loss": 4.4976, "step": 41820 }, { "epoch": 1.8021277512167808, "learning_rate": 8.990605270529423e-07, "loss": 4.2708, "step": 41840 }, { "epoch": 1.802989188956368, "learning_rate": 8.990120450774634e-07, "loss": 4.271, "step": 41860 }, { "epoch": 1.8038506266959555, "learning_rate": 8.989635631019844e-07, "loss": 4.5042, "step": 41880 }, { "epoch": 1.804712064435543, "learning_rate": 8.989150811265056e-07, "loss": 4.677, "step": 41900 }, { "epoch": 1.8055735021751302, "learning_rate": 8.988665991510267e-07, "loss": 4.2695, "step": 41920 }, { "epoch": 1.8064349399147177, "learning_rate": 8.988181171755477e-07, "loss": 4.3243, "step": 41940 }, { "epoch": 1.8072963776543052, "learning_rate": 8.987696352000689e-07, "loss": 4.3315, "step": 41960 }, { "epoch": 1.8081578153938924, "learning_rate": 8.987211532245901e-07, "loss": 4.4334, "step": 41980 }, { "epoch": 1.8090192531334797, "learning_rate": 8.986726712491111e-07, "loss": 4.456, "step": 42000 }, { "epoch": 1.8098806908730671, "learning_rate": 8.986241892736322e-07, "loss": 4.2427, "step": 42020 }, { "epoch": 1.8107421286126546, "learning_rate": 8.985757072981533e-07, "loss": 4.5055, "step": 42040 }, { "epoch": 1.8116035663522418, "learning_rate": 8.985272253226745e-07, "loss": 4.7073, "step": 42060 }, { "epoch": 1.8124650040918293, "learning_rate": 8.984787433471956e-07, "loss": 4.4209, "step": 42080 }, { "epoch": 1.8133264418314168, "learning_rate": 8.984302613717167e-07, "loss": 4.4284, "step": 42100 }, { "epoch": 1.814187879571004, "learning_rate": 8.983817793962378e-07, "loss": 4.2628, "step": 42120 }, { "epoch": 1.8150493173105913, "learning_rate": 8.983332974207589e-07, "loss": 4.3875, "step": 42140 }, { "epoch": 1.8159107550501787, "learning_rate": 8.982848154452799e-07, "loss": 4.2567, "step": 42160 }, { "epoch": 1.8167721927897662, "learning_rate": 8.982363334698011e-07, "loss": 4.2877, "step": 42180 }, { "epoch": 1.8176336305293535, "learning_rate": 8.981878514943222e-07, "loss": 4.4149, "step": 42200 }, { "epoch": 1.818495068268941, "learning_rate": 8.981393695188433e-07, "loss": 4.2483, "step": 42220 }, { "epoch": 1.8193565060085284, "learning_rate": 8.980908875433644e-07, "loss": 4.5543, "step": 42240 }, { "epoch": 1.8202179437481156, "learning_rate": 8.980424055678855e-07, "loss": 4.5195, "step": 42260 }, { "epoch": 1.8210793814877029, "learning_rate": 8.979939235924066e-07, "loss": 4.3553, "step": 42280 }, { "epoch": 1.8219408192272903, "learning_rate": 8.979454416169277e-07, "loss": 4.1781, "step": 42300 }, { "epoch": 1.8228022569668778, "learning_rate": 8.978969596414488e-07, "loss": 4.4369, "step": 42320 }, { "epoch": 1.823663694706465, "learning_rate": 8.978484776659699e-07, "loss": 4.2777, "step": 42340 }, { "epoch": 1.8245251324460523, "learning_rate": 8.977999956904911e-07, "loss": 4.3465, "step": 42360 }, { "epoch": 1.82538657018564, "learning_rate": 8.97751513715012e-07, "loss": 4.3886, "step": 42380 }, { "epoch": 1.8262480079252272, "learning_rate": 8.977030317395332e-07, "loss": 4.2146, "step": 42400 }, { "epoch": 1.8271094456648145, "learning_rate": 8.976545497640543e-07, "loss": 4.3834, "step": 42420 }, { "epoch": 1.827970883404402, "learning_rate": 8.976060677885755e-07, "loss": 4.4563, "step": 42440 }, { "epoch": 1.8288323211439894, "learning_rate": 8.975575858130965e-07, "loss": 4.2038, "step": 42460 }, { "epoch": 1.8296937588835767, "learning_rate": 8.975091038376177e-07, "loss": 4.4039, "step": 42480 }, { "epoch": 1.830555196623164, "learning_rate": 8.974606218621388e-07, "loss": 4.1675, "step": 42500 }, { "epoch": 1.8314166343627516, "learning_rate": 8.974121398866599e-07, "loss": 4.3842, "step": 42520 }, { "epoch": 1.8322780721023388, "learning_rate": 8.973636579111809e-07, "loss": 4.2416, "step": 42540 }, { "epoch": 1.833139509841926, "learning_rate": 8.973151759357021e-07, "loss": 4.5044, "step": 42560 }, { "epoch": 1.8340009475815136, "learning_rate": 8.972666939602232e-07, "loss": 4.3566, "step": 42580 }, { "epoch": 1.834862385321101, "learning_rate": 8.972182119847444e-07, "loss": 4.503, "step": 42600 }, { "epoch": 1.8357238230606883, "learning_rate": 8.971697300092654e-07, "loss": 4.339, "step": 42620 }, { "epoch": 1.8365852608002755, "learning_rate": 8.971212480337865e-07, "loss": 4.2754, "step": 42640 }, { "epoch": 1.837446698539863, "learning_rate": 8.970727660583076e-07, "loss": 4.1781, "step": 42660 }, { "epoch": 1.8383081362794504, "learning_rate": 8.970242840828288e-07, "loss": 4.3477, "step": 42680 }, { "epoch": 1.8391695740190377, "learning_rate": 8.969758021073498e-07, "loss": 4.4648, "step": 42700 }, { "epoch": 1.8400310117586252, "learning_rate": 8.96927320131871e-07, "loss": 4.4001, "step": 42720 }, { "epoch": 1.8408924494982126, "learning_rate": 8.968788381563921e-07, "loss": 4.3718, "step": 42740 }, { "epoch": 1.8417538872377999, "learning_rate": 8.968303561809131e-07, "loss": 4.2497, "step": 42760 }, { "epoch": 1.8426153249773871, "learning_rate": 8.967818742054341e-07, "loss": 4.426, "step": 42780 }, { "epoch": 1.8434767627169746, "learning_rate": 8.967333922299554e-07, "loss": 4.4254, "step": 42800 }, { "epoch": 1.844338200456562, "learning_rate": 8.966849102544765e-07, "loss": 4.2573, "step": 42820 }, { "epoch": 1.8451996381961493, "learning_rate": 8.966364282789975e-07, "loss": 4.205, "step": 42840 }, { "epoch": 1.8460610759357368, "learning_rate": 8.965879463035187e-07, "loss": 4.5753, "step": 42860 }, { "epoch": 1.8469225136753242, "learning_rate": 8.965394643280399e-07, "loss": 4.2849, "step": 42880 }, { "epoch": 1.8477839514149115, "learning_rate": 8.964909823525609e-07, "loss": 4.4319, "step": 42900 }, { "epoch": 1.8486453891544987, "learning_rate": 8.964425003770819e-07, "loss": 4.4898, "step": 42920 }, { "epoch": 1.8495068268940862, "learning_rate": 8.963940184016031e-07, "loss": 4.393, "step": 42940 }, { "epoch": 1.8503682646336737, "learning_rate": 8.963455364261242e-07, "loss": 4.3307, "step": 42960 }, { "epoch": 1.851229702373261, "learning_rate": 8.962970544506454e-07, "loss": 4.2222, "step": 42980 }, { "epoch": 1.8520911401128484, "learning_rate": 8.962485724751664e-07, "loss": 4.3846, "step": 43000 }, { "epoch": 1.8529525778524358, "learning_rate": 8.962000904996875e-07, "loss": 4.2039, "step": 43020 }, { "epoch": 1.853814015592023, "learning_rate": 8.961516085242086e-07, "loss": 4.3454, "step": 43040 }, { "epoch": 1.8546754533316103, "learning_rate": 8.961031265487297e-07, "loss": 4.3098, "step": 43060 }, { "epoch": 1.8555368910711978, "learning_rate": 8.960546445732508e-07, "loss": 4.3761, "step": 43080 }, { "epoch": 1.8563983288107853, "learning_rate": 8.96006162597772e-07, "loss": 4.3821, "step": 43100 }, { "epoch": 1.8572597665503725, "learning_rate": 8.959576806222931e-07, "loss": 4.3961, "step": 43120 }, { "epoch": 1.85812120428996, "learning_rate": 8.959091986468141e-07, "loss": 4.1976, "step": 43140 }, { "epoch": 1.8589826420295474, "learning_rate": 8.958607166713352e-07, "loss": 4.2587, "step": 43160 }, { "epoch": 1.8598440797691347, "learning_rate": 8.958122346958564e-07, "loss": 4.3849, "step": 43180 }, { "epoch": 1.860705517508722, "learning_rate": 8.957637527203775e-07, "loss": 4.2497, "step": 43200 }, { "epoch": 1.8615669552483094, "learning_rate": 8.957152707448986e-07, "loss": 4.2743, "step": 43220 }, { "epoch": 1.8624283929878969, "learning_rate": 8.956667887694197e-07, "loss": 4.384, "step": 43240 }, { "epoch": 1.863289830727484, "learning_rate": 8.956183067939409e-07, "loss": 4.1961, "step": 43260 }, { "epoch": 1.8641512684670716, "learning_rate": 8.955698248184619e-07, "loss": 4.1731, "step": 43280 }, { "epoch": 1.865012706206659, "learning_rate": 8.955213428429829e-07, "loss": 4.1847, "step": 43300 }, { "epoch": 1.8658741439462463, "learning_rate": 8.954728608675041e-07, "loss": 4.3402, "step": 43320 }, { "epoch": 1.8667355816858335, "learning_rate": 8.954243788920253e-07, "loss": 4.5423, "step": 43340 }, { "epoch": 1.867597019425421, "learning_rate": 8.953758969165464e-07, "loss": 4.3562, "step": 43360 }, { "epoch": 1.8684584571650085, "learning_rate": 8.953274149410674e-07, "loss": 4.3731, "step": 43380 }, { "epoch": 1.8693198949045957, "learning_rate": 8.952789329655885e-07, "loss": 4.286, "step": 43400 }, { "epoch": 1.8701813326441832, "learning_rate": 8.952304509901096e-07, "loss": 4.2101, "step": 43420 }, { "epoch": 1.8710427703837706, "learning_rate": 8.951819690146307e-07, "loss": 4.2868, "step": 43440 }, { "epoch": 1.871904208123358, "learning_rate": 8.951334870391518e-07, "loss": 4.2495, "step": 43460 }, { "epoch": 1.8727656458629451, "learning_rate": 8.95085005063673e-07, "loss": 4.3549, "step": 43480 }, { "epoch": 1.8736270836025326, "learning_rate": 8.950365230881941e-07, "loss": 4.1452, "step": 43500 }, { "epoch": 1.87448852134212, "learning_rate": 8.949880411127151e-07, "loss": 4.2519, "step": 43520 }, { "epoch": 1.8753499590817073, "learning_rate": 8.949395591372362e-07, "loss": 4.1753, "step": 43540 }, { "epoch": 1.8762113968212948, "learning_rate": 8.948910771617574e-07, "loss": 4.2263, "step": 43560 }, { "epoch": 1.8770728345608823, "learning_rate": 8.948425951862785e-07, "loss": 4.3419, "step": 43580 }, { "epoch": 1.8779342723004695, "learning_rate": 8.947941132107996e-07, "loss": 4.0941, "step": 43600 }, { "epoch": 1.8787957100400567, "learning_rate": 8.947456312353207e-07, "loss": 4.2375, "step": 43620 }, { "epoch": 1.8796571477796442, "learning_rate": 8.946971492598419e-07, "loss": 4.2234, "step": 43640 }, { "epoch": 1.8805185855192317, "learning_rate": 8.946486672843628e-07, "loss": 4.3174, "step": 43660 }, { "epoch": 1.881380023258819, "learning_rate": 8.94600185308884e-07, "loss": 4.3217, "step": 43680 }, { "epoch": 1.8822414609984064, "learning_rate": 8.945517033334051e-07, "loss": 4.4135, "step": 43700 }, { "epoch": 1.8831028987379939, "learning_rate": 8.945032213579262e-07, "loss": 4.2409, "step": 43720 }, { "epoch": 1.883964336477581, "learning_rate": 8.944547393824473e-07, "loss": 4.4634, "step": 43740 }, { "epoch": 1.8848257742171683, "learning_rate": 8.944062574069685e-07, "loss": 4.2902, "step": 43760 }, { "epoch": 1.8856872119567558, "learning_rate": 8.943577754314895e-07, "loss": 4.296, "step": 43780 }, { "epoch": 1.8865486496963433, "learning_rate": 8.943092934560107e-07, "loss": 4.2914, "step": 43800 }, { "epoch": 1.8874100874359305, "learning_rate": 8.942608114805317e-07, "loss": 4.2811, "step": 43820 }, { "epoch": 1.8882715251755178, "learning_rate": 8.942123295050528e-07, "loss": 4.4442, "step": 43840 }, { "epoch": 1.8891329629151055, "learning_rate": 8.94163847529574e-07, "loss": 4.2375, "step": 43860 }, { "epoch": 1.8899944006546927, "learning_rate": 8.941153655540952e-07, "loss": 4.202, "step": 43880 }, { "epoch": 1.89085583839428, "learning_rate": 8.940668835786162e-07, "loss": 4.2511, "step": 43900 }, { "epoch": 1.8917172761338674, "learning_rate": 8.940184016031372e-07, "loss": 4.2208, "step": 43920 }, { "epoch": 1.8925787138734549, "learning_rate": 8.939699196276584e-07, "loss": 4.4483, "step": 43940 }, { "epoch": 1.8934401516130421, "learning_rate": 8.939214376521794e-07, "loss": 4.2544, "step": 43960 }, { "epoch": 1.8943015893526294, "learning_rate": 8.938729556767006e-07, "loss": 4.3998, "step": 43980 }, { "epoch": 1.895163027092217, "learning_rate": 8.938244737012217e-07, "loss": 4.3337, "step": 44000 }, { "epoch": 1.8960244648318043, "learning_rate": 8.937759917257429e-07, "loss": 4.5991, "step": 44020 }, { "epoch": 1.8968859025713916, "learning_rate": 8.937275097502637e-07, "loss": 4.4394, "step": 44040 }, { "epoch": 1.897747340310979, "learning_rate": 8.93679027774785e-07, "loss": 4.1898, "step": 44060 }, { "epoch": 1.8986087780505665, "learning_rate": 8.936305457993061e-07, "loss": 4.2828, "step": 44080 }, { "epoch": 1.8994702157901537, "learning_rate": 8.935820638238273e-07, "loss": 4.5109, "step": 44100 }, { "epoch": 1.900331653529741, "learning_rate": 8.935335818483483e-07, "loss": 4.4199, "step": 44120 }, { "epoch": 1.9011930912693287, "learning_rate": 8.934850998728695e-07, "loss": 4.2196, "step": 44140 }, { "epoch": 1.902054529008916, "learning_rate": 8.934366178973905e-07, "loss": 4.4022, "step": 44160 }, { "epoch": 1.9029159667485032, "learning_rate": 8.933881359219117e-07, "loss": 4.2565, "step": 44180 }, { "epoch": 1.9037774044880906, "learning_rate": 8.933396539464327e-07, "loss": 4.1588, "step": 44200 }, { "epoch": 1.904638842227678, "learning_rate": 8.932911719709539e-07, "loss": 4.3023, "step": 44220 }, { "epoch": 1.9055002799672653, "learning_rate": 8.93242689995475e-07, "loss": 4.4887, "step": 44240 }, { "epoch": 1.9063617177068526, "learning_rate": 8.931942080199962e-07, "loss": 4.4413, "step": 44260 }, { "epoch": 1.90722315544644, "learning_rate": 8.931457260445172e-07, "loss": 4.549, "step": 44280 }, { "epoch": 1.9080845931860275, "learning_rate": 8.930972440690383e-07, "loss": 4.2645, "step": 44300 }, { "epoch": 1.9089460309256148, "learning_rate": 8.930487620935594e-07, "loss": 4.5101, "step": 44320 }, { "epoch": 1.9098074686652022, "learning_rate": 8.930002801180805e-07, "loss": 4.1307, "step": 44340 }, { "epoch": 1.9106689064047897, "learning_rate": 8.929517981426016e-07, "loss": 4.5143, "step": 44360 }, { "epoch": 1.911530344144377, "learning_rate": 8.929033161671227e-07, "loss": 4.3989, "step": 44380 }, { "epoch": 1.9123917818839642, "learning_rate": 8.928548341916439e-07, "loss": 4.3002, "step": 44400 }, { "epoch": 1.9132532196235517, "learning_rate": 8.928063522161649e-07, "loss": 4.4724, "step": 44420 }, { "epoch": 1.9141146573631391, "learning_rate": 8.92757870240686e-07, "loss": 4.423, "step": 44440 }, { "epoch": 1.9149760951027264, "learning_rate": 8.927093882652071e-07, "loss": 4.2459, "step": 44460 }, { "epoch": 1.9158375328423138, "learning_rate": 8.926609062897283e-07, "loss": 4.3396, "step": 44480 }, { "epoch": 1.9166989705819013, "learning_rate": 8.926124243142493e-07, "loss": 4.4372, "step": 44500 }, { "epoch": 1.9175604083214886, "learning_rate": 8.925639423387705e-07, "loss": 4.288, "step": 44520 }, { "epoch": 1.9184218460610758, "learning_rate": 8.925154603632916e-07, "loss": 4.4371, "step": 44540 }, { "epoch": 1.9192832838006633, "learning_rate": 8.924669783878125e-07, "loss": 4.1958, "step": 44560 }, { "epoch": 1.9201447215402507, "learning_rate": 8.924184964123337e-07, "loss": 4.1513, "step": 44580 }, { "epoch": 1.921006159279838, "learning_rate": 8.923700144368549e-07, "loss": 4.2007, "step": 44600 }, { "epoch": 1.9218675970194254, "learning_rate": 8.92321532461376e-07, "loss": 4.1913, "step": 44620 }, { "epoch": 1.922729034759013, "learning_rate": 8.922730504858971e-07, "loss": 4.3211, "step": 44640 }, { "epoch": 1.9235904724986002, "learning_rate": 8.922245685104183e-07, "loss": 4.3395, "step": 44660 }, { "epoch": 1.9244519102381874, "learning_rate": 8.921760865349393e-07, "loss": 4.2522, "step": 44680 }, { "epoch": 1.9253133479777749, "learning_rate": 8.921276045594604e-07, "loss": 4.279, "step": 44700 }, { "epoch": 1.9261747857173623, "learning_rate": 8.920791225839815e-07, "loss": 4.2267, "step": 44720 }, { "epoch": 1.9270362234569496, "learning_rate": 8.920306406085026e-07, "loss": 4.3767, "step": 44740 }, { "epoch": 1.927897661196537, "learning_rate": 8.919821586330238e-07, "loss": 4.0336, "step": 44760 }, { "epoch": 1.9287590989361245, "learning_rate": 8.919336766575449e-07, "loss": 4.3381, "step": 44780 }, { "epoch": 1.9296205366757118, "learning_rate": 8.918851946820659e-07, "loss": 4.218, "step": 44800 }, { "epoch": 1.930481974415299, "learning_rate": 8.91836712706587e-07, "loss": 4.251, "step": 44820 }, { "epoch": 1.9313434121548865, "learning_rate": 8.917882307311082e-07, "loss": 4.305, "step": 44840 }, { "epoch": 1.932204849894474, "learning_rate": 8.917397487556293e-07, "loss": 4.2406, "step": 44860 }, { "epoch": 1.9330662876340612, "learning_rate": 8.916912667801504e-07, "loss": 4.4796, "step": 44880 }, { "epoch": 1.9339277253736487, "learning_rate": 8.916427848046715e-07, "loss": 4.2853, "step": 44900 }, { "epoch": 1.9347891631132361, "learning_rate": 8.915943028291926e-07, "loss": 4.3997, "step": 44920 }, { "epoch": 1.9356506008528234, "learning_rate": 8.915458208537136e-07, "loss": 4.2774, "step": 44940 }, { "epoch": 1.9365120385924106, "learning_rate": 8.914973388782348e-07, "loss": 4.2732, "step": 44960 }, { "epoch": 1.937373476331998, "learning_rate": 8.914488569027559e-07, "loss": 4.4675, "step": 44980 }, { "epoch": 1.9382349140715855, "learning_rate": 8.91400374927277e-07, "loss": 4.3969, "step": 45000 }, { "epoch": 1.9390963518111728, "learning_rate": 8.913518929517981e-07, "loss": 4.3149, "step": 45020 }, { "epoch": 1.9399577895507603, "learning_rate": 8.913034109763192e-07, "loss": 4.4777, "step": 45040 }, { "epoch": 1.9408192272903477, "learning_rate": 8.912549290008403e-07, "loss": 4.426, "step": 45060 }, { "epoch": 1.941680665029935, "learning_rate": 8.912064470253614e-07, "loss": 4.2196, "step": 45080 }, { "epoch": 1.9425421027695222, "learning_rate": 8.911579650498825e-07, "loss": 4.2747, "step": 45100 }, { "epoch": 1.9434035405091097, "learning_rate": 8.911094830744036e-07, "loss": 4.2879, "step": 45120 }, { "epoch": 1.9442649782486972, "learning_rate": 8.910610010989248e-07, "loss": 4.3427, "step": 45140 }, { "epoch": 1.9451264159882844, "learning_rate": 8.910125191234459e-07, "loss": 4.3285, "step": 45160 }, { "epoch": 1.9459878537278719, "learning_rate": 8.909640371479669e-07, "loss": 4.4457, "step": 45180 }, { "epoch": 1.9468492914674593, "learning_rate": 8.90915555172488e-07, "loss": 4.5181, "step": 45200 }, { "epoch": 1.9477107292070466, "learning_rate": 8.908670731970092e-07, "loss": 4.4637, "step": 45220 }, { "epoch": 1.9485721669466338, "learning_rate": 8.908185912215302e-07, "loss": 4.2949, "step": 45240 }, { "epoch": 1.9494336046862213, "learning_rate": 8.907701092460514e-07, "loss": 4.2796, "step": 45260 }, { "epoch": 1.9502950424258088, "learning_rate": 8.907216272705725e-07, "loss": 4.3893, "step": 45280 }, { "epoch": 1.951156480165396, "learning_rate": 8.906731452950936e-07, "loss": 4.2103, "step": 45300 }, { "epoch": 1.9520179179049835, "learning_rate": 8.906246633196146e-07, "loss": 4.1402, "step": 45320 }, { "epoch": 1.952879355644571, "learning_rate": 8.905761813441358e-07, "loss": 4.3627, "step": 45340 }, { "epoch": 1.9537407933841582, "learning_rate": 8.905276993686569e-07, "loss": 4.3429, "step": 45360 }, { "epoch": 1.9546022311237454, "learning_rate": 8.904792173931781e-07, "loss": 4.4115, "step": 45380 }, { "epoch": 1.955463668863333, "learning_rate": 8.904307354176991e-07, "loss": 4.6042, "step": 45400 }, { "epoch": 1.9563251066029204, "learning_rate": 8.903822534422203e-07, "loss": 4.2902, "step": 45420 }, { "epoch": 1.9571865443425076, "learning_rate": 8.903337714667413e-07, "loss": 4.2655, "step": 45440 }, { "epoch": 1.9580479820820949, "learning_rate": 8.902852894912624e-07, "loss": 4.2639, "step": 45460 }, { "epoch": 1.9589094198216825, "learning_rate": 8.902368075157835e-07, "loss": 4.4137, "step": 45480 }, { "epoch": 1.9597708575612698, "learning_rate": 8.901883255403046e-07, "loss": 4.4555, "step": 45500 }, { "epoch": 1.960632295300857, "learning_rate": 8.901398435648258e-07, "loss": 4.2671, "step": 45520 }, { "epoch": 1.9614937330404445, "learning_rate": 8.900913615893468e-07, "loss": 4.3513, "step": 45540 }, { "epoch": 1.962355170780032, "learning_rate": 8.900428796138679e-07, "loss": 4.2389, "step": 45560 }, { "epoch": 1.9632166085196192, "learning_rate": 8.89994397638389e-07, "loss": 4.1699, "step": 45580 }, { "epoch": 1.9640780462592065, "learning_rate": 8.899459156629102e-07, "loss": 4.2811, "step": 45600 }, { "epoch": 1.9649394839987941, "learning_rate": 8.898974336874312e-07, "loss": 4.5388, "step": 45620 }, { "epoch": 1.9658009217383814, "learning_rate": 8.898489517119524e-07, "loss": 4.2654, "step": 45640 }, { "epoch": 1.9666623594779686, "learning_rate": 8.898004697364735e-07, "loss": 4.2945, "step": 45660 }, { "epoch": 1.967523797217556, "learning_rate": 8.897519877609947e-07, "loss": 4.3381, "step": 45680 }, { "epoch": 1.9683852349571436, "learning_rate": 8.897035057855156e-07, "loss": 4.3183, "step": 45700 }, { "epoch": 1.9692466726967308, "learning_rate": 8.896550238100368e-07, "loss": 4.0999, "step": 45720 }, { "epoch": 1.970108110436318, "learning_rate": 8.896065418345579e-07, "loss": 4.1506, "step": 45740 }, { "epoch": 1.9709695481759058, "learning_rate": 8.895580598590791e-07, "loss": 4.2768, "step": 45760 }, { "epoch": 1.971830985915493, "learning_rate": 8.895095778836001e-07, "loss": 4.2313, "step": 45780 }, { "epoch": 1.9726924236550802, "learning_rate": 8.894610959081213e-07, "loss": 4.1688, "step": 45800 }, { "epoch": 1.9735538613946677, "learning_rate": 8.894126139326422e-07, "loss": 4.5331, "step": 45820 }, { "epoch": 1.9744152991342552, "learning_rate": 8.893641319571635e-07, "loss": 4.1421, "step": 45840 }, { "epoch": 1.9752767368738424, "learning_rate": 8.893156499816845e-07, "loss": 4.4491, "step": 45860 }, { "epoch": 1.9761381746134297, "learning_rate": 8.892671680062057e-07, "loss": 4.1742, "step": 45880 }, { "epoch": 1.9769996123530171, "learning_rate": 8.892186860307268e-07, "loss": 4.2573, "step": 45900 }, { "epoch": 1.9778610500926046, "learning_rate": 8.89170204055248e-07, "loss": 4.3677, "step": 45920 }, { "epoch": 1.9787224878321918, "learning_rate": 8.891217220797689e-07, "loss": 4.3011, "step": 45940 }, { "epoch": 1.9795839255717793, "learning_rate": 8.890732401042901e-07, "loss": 4.4628, "step": 45960 }, { "epoch": 1.9804453633113668, "learning_rate": 8.890247581288112e-07, "loss": 4.2885, "step": 45980 }, { "epoch": 1.981306801050954, "learning_rate": 8.889762761533322e-07, "loss": 4.2826, "step": 46000 }, { "epoch": 1.9821682387905413, "learning_rate": 8.889277941778534e-07, "loss": 4.219, "step": 46020 }, { "epoch": 1.9830296765301287, "learning_rate": 8.888793122023746e-07, "loss": 4.5854, "step": 46040 }, { "epoch": 1.9838911142697162, "learning_rate": 8.888308302268957e-07, "loss": 4.2624, "step": 46060 }, { "epoch": 1.9847525520093035, "learning_rate": 8.887823482514166e-07, "loss": 4.1914, "step": 46080 }, { "epoch": 1.985613989748891, "learning_rate": 8.887338662759378e-07, "loss": 4.4736, "step": 46100 }, { "epoch": 1.9864754274884784, "learning_rate": 8.886853843004589e-07, "loss": 4.1645, "step": 46120 }, { "epoch": 1.9873368652280656, "learning_rate": 8.8863690232498e-07, "loss": 4.2593, "step": 46140 }, { "epoch": 1.9881983029676529, "learning_rate": 8.885884203495011e-07, "loss": 4.2933, "step": 46160 }, { "epoch": 1.9890597407072403, "learning_rate": 8.885399383740223e-07, "loss": 4.472, "step": 46180 }, { "epoch": 1.9899211784468278, "learning_rate": 8.884914563985433e-07, "loss": 4.5536, "step": 46200 }, { "epoch": 1.990782616186415, "learning_rate": 8.884429744230644e-07, "loss": 4.111, "step": 46220 }, { "epoch": 1.9916440539260025, "learning_rate": 8.883944924475855e-07, "loss": 4.3073, "step": 46240 }, { "epoch": 1.99250549166559, "learning_rate": 8.883460104721067e-07, "loss": 4.2787, "step": 46260 }, { "epoch": 1.9933669294051772, "learning_rate": 8.882975284966278e-07, "loss": 4.195, "step": 46280 }, { "epoch": 1.9942283671447645, "learning_rate": 8.882490465211489e-07, "loss": 4.3508, "step": 46300 }, { "epoch": 1.995089804884352, "learning_rate": 8.8820056454567e-07, "loss": 4.2283, "step": 46320 }, { "epoch": 1.9959512426239394, "learning_rate": 8.88152082570191e-07, "loss": 4.0979, "step": 46340 }, { "epoch": 1.9968126803635267, "learning_rate": 8.881036005947122e-07, "loss": 4.509, "step": 46360 }, { "epoch": 1.9976741181031141, "learning_rate": 8.880551186192333e-07, "loss": 4.3608, "step": 46380 }, { "epoch": 1.9985355558427016, "learning_rate": 8.880066366437544e-07, "loss": 4.1539, "step": 46400 }, { "epoch": 1.9993969935822888, "learning_rate": 8.879581546682756e-07, "loss": 4.287, "step": 46420 }, { "epoch": 2.000258431321876, "learning_rate": 8.879096726927968e-07, "loss": 4.2811, "step": 46440 }, { "epoch": 2.0011198690614638, "learning_rate": 8.878611907173177e-07, "loss": 4.3977, "step": 46460 }, { "epoch": 2.001981306801051, "learning_rate": 8.878127087418388e-07, "loss": 4.4591, "step": 46480 }, { "epoch": 2.0028427445406383, "learning_rate": 8.8776422676636e-07, "loss": 4.3031, "step": 46500 }, { "epoch": 2.0037041822802255, "learning_rate": 8.87715744790881e-07, "loss": 4.3416, "step": 46520 }, { "epoch": 2.004565620019813, "learning_rate": 8.876672628154021e-07, "loss": 4.2634, "step": 46540 }, { "epoch": 2.0054270577594004, "learning_rate": 8.876187808399233e-07, "loss": 4.2435, "step": 46560 }, { "epoch": 2.0062884954989877, "learning_rate": 8.875702988644444e-07, "loss": 4.444, "step": 46580 }, { "epoch": 2.0071499332385754, "learning_rate": 8.875218168889654e-07, "loss": 4.2953, "step": 46600 }, { "epoch": 2.0080113709781626, "learning_rate": 8.874733349134865e-07, "loss": 4.1916, "step": 46620 }, { "epoch": 2.00887280871775, "learning_rate": 8.874248529380077e-07, "loss": 4.208, "step": 46640 }, { "epoch": 2.009734246457337, "learning_rate": 8.873763709625289e-07, "loss": 4.3798, "step": 46660 }, { "epoch": 2.010595684196925, "learning_rate": 8.873278889870499e-07, "loss": 4.2406, "step": 46680 }, { "epoch": 2.011457121936512, "learning_rate": 8.87279407011571e-07, "loss": 4.2977, "step": 46700 }, { "epoch": 2.0123185596760993, "learning_rate": 8.872309250360921e-07, "loss": 4.1866, "step": 46720 }, { "epoch": 2.013179997415687, "learning_rate": 8.871824430606132e-07, "loss": 3.9985, "step": 46740 }, { "epoch": 2.0140414351552742, "learning_rate": 8.871339610851343e-07, "loss": 4.3315, "step": 46760 }, { "epoch": 2.0149028728948615, "learning_rate": 8.870854791096554e-07, "loss": 4.3568, "step": 46780 }, { "epoch": 2.0157643106344487, "learning_rate": 8.870369971341766e-07, "loss": 4.4983, "step": 46800 }, { "epoch": 2.0166257483740364, "learning_rate": 8.869885151586976e-07, "loss": 4.3196, "step": 46820 }, { "epoch": 2.0174871861136237, "learning_rate": 8.869400331832187e-07, "loss": 4.3064, "step": 46840 }, { "epoch": 2.018348623853211, "learning_rate": 8.868915512077398e-07, "loss": 4.2872, "step": 46860 }, { "epoch": 2.0192100615927986, "learning_rate": 8.86843069232261e-07, "loss": 4.3508, "step": 46880 }, { "epoch": 2.020071499332386, "learning_rate": 8.86794587256782e-07, "loss": 4.3117, "step": 46900 }, { "epoch": 2.020932937071973, "learning_rate": 8.867461052813032e-07, "loss": 4.3204, "step": 46920 }, { "epoch": 2.0217943748115603, "learning_rate": 8.866976233058243e-07, "loss": 4.2796, "step": 46940 }, { "epoch": 2.022655812551148, "learning_rate": 8.866491413303454e-07, "loss": 4.3953, "step": 46960 }, { "epoch": 2.0235172502907353, "learning_rate": 8.866006593548664e-07, "loss": 4.3401, "step": 46980 }, { "epoch": 2.0243786880303225, "learning_rate": 8.865521773793875e-07, "loss": 4.4807, "step": 47000 }, { "epoch": 2.02524012576991, "learning_rate": 8.865036954039087e-07, "loss": 4.2138, "step": 47020 }, { "epoch": 2.0261015635094974, "learning_rate": 8.864552134284299e-07, "loss": 4.2883, "step": 47040 }, { "epoch": 2.0269630012490847, "learning_rate": 8.864067314529509e-07, "loss": 4.353, "step": 47060 }, { "epoch": 2.027824438988672, "learning_rate": 8.86358249477472e-07, "loss": 4.291, "step": 47080 }, { "epoch": 2.0286858767282596, "learning_rate": 8.863097675019931e-07, "loss": 4.2781, "step": 47100 }, { "epoch": 2.029547314467847, "learning_rate": 8.862612855265142e-07, "loss": 4.2815, "step": 47120 }, { "epoch": 2.030408752207434, "learning_rate": 8.862128035510353e-07, "loss": 4.1737, "step": 47140 }, { "epoch": 2.031270189947022, "learning_rate": 8.861643215755564e-07, "loss": 4.1671, "step": 47160 }, { "epoch": 2.032131627686609, "learning_rate": 8.861158396000776e-07, "loss": 4.4056, "step": 47180 }, { "epoch": 2.0329930654261963, "learning_rate": 8.860673576245987e-07, "loss": 4.3964, "step": 47200 }, { "epoch": 2.0338545031657835, "learning_rate": 8.860188756491197e-07, "loss": 4.1294, "step": 47220 }, { "epoch": 2.0347159409053712, "learning_rate": 8.859703936736408e-07, "loss": 4.2129, "step": 47240 }, { "epoch": 2.0355773786449585, "learning_rate": 8.85921911698162e-07, "loss": 4.342, "step": 47260 }, { "epoch": 2.0364388163845457, "learning_rate": 8.85873429722683e-07, "loss": 4.3192, "step": 47280 }, { "epoch": 2.0373002541241334, "learning_rate": 8.858249477472042e-07, "loss": 4.4086, "step": 47300 }, { "epoch": 2.0381616918637206, "learning_rate": 8.857764657717253e-07, "loss": 4.2597, "step": 47320 }, { "epoch": 2.039023129603308, "learning_rate": 8.857279837962464e-07, "loss": 4.3538, "step": 47340 }, { "epoch": 2.039884567342895, "learning_rate": 8.856795018207674e-07, "loss": 4.3276, "step": 47360 }, { "epoch": 2.040746005082483, "learning_rate": 8.856310198452886e-07, "loss": 4.3319, "step": 47380 }, { "epoch": 2.04160744282207, "learning_rate": 8.855825378698097e-07, "loss": 4.3089, "step": 47400 }, { "epoch": 2.0424688805616573, "learning_rate": 8.855340558943309e-07, "loss": 4.4973, "step": 47420 }, { "epoch": 2.0433303183012446, "learning_rate": 8.854855739188519e-07, "loss": 4.4496, "step": 47440 }, { "epoch": 2.0441917560408323, "learning_rate": 8.854370919433731e-07, "loss": 4.395, "step": 47460 }, { "epoch": 2.0450531937804195, "learning_rate": 8.853886099678941e-07, "loss": 4.0993, "step": 47480 }, { "epoch": 2.0459146315200067, "learning_rate": 8.853401279924152e-07, "loss": 4.3092, "step": 47500 }, { "epoch": 2.0467760692595944, "learning_rate": 8.852916460169363e-07, "loss": 4.1731, "step": 47520 }, { "epoch": 2.0476375069991817, "learning_rate": 8.852431640414574e-07, "loss": 4.498, "step": 47540 }, { "epoch": 2.048498944738769, "learning_rate": 8.851946820659786e-07, "loss": 4.301, "step": 47560 }, { "epoch": 2.049360382478356, "learning_rate": 8.851462000904997e-07, "loss": 4.0986, "step": 47580 }, { "epoch": 2.050221820217944, "learning_rate": 8.850977181150206e-07, "loss": 4.3623, "step": 47600 }, { "epoch": 2.051083257957531, "learning_rate": 8.850492361395418e-07, "loss": 4.186, "step": 47620 }, { "epoch": 2.0519446956971183, "learning_rate": 8.85000754164063e-07, "loss": 4.2125, "step": 47640 }, { "epoch": 2.052806133436706, "learning_rate": 8.849522721885841e-07, "loss": 4.3799, "step": 47660 }, { "epoch": 2.0536675711762933, "learning_rate": 8.849037902131052e-07, "loss": 4.2715, "step": 47680 }, { "epoch": 2.0545290089158805, "learning_rate": 8.848553082376264e-07, "loss": 4.3085, "step": 47700 }, { "epoch": 2.0553904466554678, "learning_rate": 8.848068262621473e-07, "loss": 4.4178, "step": 47720 }, { "epoch": 2.0562518843950555, "learning_rate": 8.847583442866685e-07, "loss": 4.3978, "step": 47740 }, { "epoch": 2.0571133221346427, "learning_rate": 8.847098623111896e-07, "loss": 4.2653, "step": 47760 }, { "epoch": 2.05797475987423, "learning_rate": 8.846613803357107e-07, "loss": 4.3894, "step": 47780 }, { "epoch": 2.0588361976138176, "learning_rate": 8.846128983602318e-07, "loss": 4.1731, "step": 47800 }, { "epoch": 2.059697635353405, "learning_rate": 8.845644163847529e-07, "loss": 4.2996, "step": 47820 }, { "epoch": 2.060559073092992, "learning_rate": 8.845159344092741e-07, "loss": 4.2233, "step": 47840 }, { "epoch": 2.0614205108325794, "learning_rate": 8.844674524337951e-07, "loss": 4.3654, "step": 47860 }, { "epoch": 2.062281948572167, "learning_rate": 8.844189704583162e-07, "loss": 4.1437, "step": 47880 }, { "epoch": 2.0631433863117543, "learning_rate": 8.843704884828373e-07, "loss": 4.3644, "step": 47900 }, { "epoch": 2.0640048240513416, "learning_rate": 8.843220065073585e-07, "loss": 4.2227, "step": 47920 }, { "epoch": 2.0648662617909292, "learning_rate": 8.842735245318796e-07, "loss": 4.0454, "step": 47940 }, { "epoch": 2.0657276995305165, "learning_rate": 8.842250425564007e-07, "loss": 4.4391, "step": 47960 }, { "epoch": 2.0665891372701037, "learning_rate": 8.841765605809217e-07, "loss": 4.3234, "step": 47980 }, { "epoch": 2.067450575009691, "learning_rate": 8.841280786054429e-07, "loss": 4.2496, "step": 48000 }, { "epoch": 2.0683120127492787, "learning_rate": 8.84079596629964e-07, "loss": 4.166, "step": 48020 }, { "epoch": 2.069173450488866, "learning_rate": 8.840311146544851e-07, "loss": 4.2374, "step": 48040 }, { "epoch": 2.070034888228453, "learning_rate": 8.839826326790062e-07, "loss": 4.2214, "step": 48060 }, { "epoch": 2.070896325968041, "learning_rate": 8.839341507035273e-07, "loss": 4.177, "step": 48080 }, { "epoch": 2.071757763707628, "learning_rate": 8.838856687280484e-07, "loss": 4.1296, "step": 48100 }, { "epoch": 2.0726192014472153, "learning_rate": 8.838371867525694e-07, "loss": 4.1983, "step": 48120 }, { "epoch": 2.0734806391868026, "learning_rate": 8.837887047770906e-07, "loss": 4.2373, "step": 48140 }, { "epoch": 2.0743420769263903, "learning_rate": 8.837402228016117e-07, "loss": 4.1931, "step": 48160 }, { "epoch": 2.0752035146659775, "learning_rate": 8.836917408261328e-07, "loss": 4.0753, "step": 48180 }, { "epoch": 2.0760649524055648, "learning_rate": 8.83643258850654e-07, "loss": 4.4324, "step": 48200 }, { "epoch": 2.0769263901451525, "learning_rate": 8.835947768751752e-07, "loss": 4.2271, "step": 48220 }, { "epoch": 2.0777878278847397, "learning_rate": 8.835462948996961e-07, "loss": 4.1287, "step": 48240 }, { "epoch": 2.078649265624327, "learning_rate": 8.834978129242172e-07, "loss": 4.3173, "step": 48260 }, { "epoch": 2.079510703363914, "learning_rate": 8.834493309487383e-07, "loss": 4.1593, "step": 48280 }, { "epoch": 2.080372141103502, "learning_rate": 8.834008489732595e-07, "loss": 4.265, "step": 48300 }, { "epoch": 2.081233578843089, "learning_rate": 8.833523669977806e-07, "loss": 4.2934, "step": 48320 }, { "epoch": 2.0820950165826764, "learning_rate": 8.833038850223017e-07, "loss": 4.3757, "step": 48340 }, { "epoch": 2.082956454322264, "learning_rate": 8.832554030468227e-07, "loss": 4.3117, "step": 48360 }, { "epoch": 2.0838178920618513, "learning_rate": 8.832069210713439e-07, "loss": 4.3394, "step": 48380 }, { "epoch": 2.0846793298014386, "learning_rate": 8.831584390958649e-07, "loss": 4.2903, "step": 48400 }, { "epoch": 2.085540767541026, "learning_rate": 8.831099571203861e-07, "loss": 4.4071, "step": 48420 }, { "epoch": 2.0864022052806135, "learning_rate": 8.830614751449072e-07, "loss": 4.0438, "step": 48440 }, { "epoch": 2.0872636430202007, "learning_rate": 8.830129931694284e-07, "loss": 4.3352, "step": 48460 }, { "epoch": 2.088125080759788, "learning_rate": 8.829645111939494e-07, "loss": 4.2519, "step": 48480 }, { "epoch": 2.0889865184993757, "learning_rate": 8.829160292184705e-07, "loss": 4.3348, "step": 48500 }, { "epoch": 2.089847956238963, "learning_rate": 8.828675472429916e-07, "loss": 4.0967, "step": 48520 }, { "epoch": 2.09070939397855, "learning_rate": 8.828190652675128e-07, "loss": 4.4048, "step": 48540 }, { "epoch": 2.0915708317181374, "learning_rate": 8.827705832920338e-07, "loss": 4.379, "step": 48560 }, { "epoch": 2.092432269457725, "learning_rate": 8.82722101316555e-07, "loss": 4.3112, "step": 48580 }, { "epoch": 2.0932937071973123, "learning_rate": 8.826736193410761e-07, "loss": 4.2812, "step": 48600 }, { "epoch": 2.0941551449368996, "learning_rate": 8.82625137365597e-07, "loss": 4.2841, "step": 48620 }, { "epoch": 2.0950165826764873, "learning_rate": 8.825766553901182e-07, "loss": 4.4263, "step": 48640 }, { "epoch": 2.0958780204160745, "learning_rate": 8.825281734146394e-07, "loss": 4.4555, "step": 48660 }, { "epoch": 2.0967394581556618, "learning_rate": 8.824796914391605e-07, "loss": 4.2405, "step": 48680 }, { "epoch": 2.097600895895249, "learning_rate": 8.824312094636815e-07, "loss": 4.1133, "step": 48700 }, { "epoch": 2.0984623336348367, "learning_rate": 8.823827274882027e-07, "loss": 4.2703, "step": 48720 }, { "epoch": 2.099323771374424, "learning_rate": 8.823342455127238e-07, "loss": 4.5088, "step": 48740 }, { "epoch": 2.100185209114011, "learning_rate": 8.822857635372449e-07, "loss": 4.1103, "step": 48760 }, { "epoch": 2.1010466468535984, "learning_rate": 8.822372815617659e-07, "loss": 4.4763, "step": 48780 }, { "epoch": 2.101908084593186, "learning_rate": 8.821887995862871e-07, "loss": 4.2769, "step": 48800 }, { "epoch": 2.1027695223327734, "learning_rate": 8.821403176108082e-07, "loss": 4.4507, "step": 48820 }, { "epoch": 2.1036309600723606, "learning_rate": 8.820918356353294e-07, "loss": 4.1722, "step": 48840 }, { "epoch": 2.1044923978119483, "learning_rate": 8.820433536598504e-07, "loss": 4.1197, "step": 48860 }, { "epoch": 2.1053538355515355, "learning_rate": 8.819948716843715e-07, "loss": 4.0937, "step": 48880 }, { "epoch": 2.106215273291123, "learning_rate": 8.819463897088926e-07, "loss": 4.2682, "step": 48900 }, { "epoch": 2.10707671103071, "learning_rate": 8.818979077334138e-07, "loss": 4.2429, "step": 48920 }, { "epoch": 2.1079381487702977, "learning_rate": 8.818494257579348e-07, "loss": 4.2731, "step": 48940 }, { "epoch": 2.108799586509885, "learning_rate": 8.81800943782456e-07, "loss": 4.2936, "step": 48960 }, { "epoch": 2.109661024249472, "learning_rate": 8.817524618069771e-07, "loss": 4.3315, "step": 48980 }, { "epoch": 2.11052246198906, "learning_rate": 8.817039798314981e-07, "loss": 4.3196, "step": 49000 }, { "epoch": 2.111383899728647, "learning_rate": 8.816554978560192e-07, "loss": 4.192, "step": 49020 }, { "epoch": 2.1122453374682344, "learning_rate": 8.816070158805404e-07, "loss": 4.261, "step": 49040 }, { "epoch": 2.1131067752078216, "learning_rate": 8.815585339050615e-07, "loss": 4.3711, "step": 49060 }, { "epoch": 2.1139682129474093, "learning_rate": 8.815100519295826e-07, "loss": 4.4269, "step": 49080 }, { "epoch": 2.1148296506869966, "learning_rate": 8.814615699541037e-07, "loss": 4.4123, "step": 49100 }, { "epoch": 2.115691088426584, "learning_rate": 8.814130879786248e-07, "loss": 4.1912, "step": 49120 }, { "epoch": 2.1165525261661715, "learning_rate": 8.813646060031459e-07, "loss": 4.2693, "step": 49140 }, { "epoch": 2.1174139639057588, "learning_rate": 8.813161240276669e-07, "loss": 4.4526, "step": 49160 }, { "epoch": 2.118275401645346, "learning_rate": 8.812676420521881e-07, "loss": 4.4448, "step": 49180 }, { "epoch": 2.1191368393849332, "learning_rate": 8.812191600767093e-07, "loss": 4.1797, "step": 49200 }, { "epoch": 2.119998277124521, "learning_rate": 8.811706781012304e-07, "loss": 4.0822, "step": 49220 }, { "epoch": 2.120859714864108, "learning_rate": 8.811221961257514e-07, "loss": 4.3429, "step": 49240 }, { "epoch": 2.1217211526036954, "learning_rate": 8.810737141502725e-07, "loss": 4.2108, "step": 49260 }, { "epoch": 2.122582590343283, "learning_rate": 8.810252321747937e-07, "loss": 4.3856, "step": 49280 }, { "epoch": 2.1234440280828704, "learning_rate": 8.809767501993147e-07, "loss": 4.3599, "step": 49300 }, { "epoch": 2.1243054658224576, "learning_rate": 8.809282682238358e-07, "loss": 4.1637, "step": 49320 }, { "epoch": 2.125166903562045, "learning_rate": 8.80879786248357e-07, "loss": 4.3569, "step": 49340 }, { "epoch": 2.1260283413016325, "learning_rate": 8.808313042728781e-07, "loss": 4.312, "step": 49360 }, { "epoch": 2.12688977904122, "learning_rate": 8.80782822297399e-07, "loss": 4.2211, "step": 49380 }, { "epoch": 2.127751216780807, "learning_rate": 8.807343403219202e-07, "loss": 4.3182, "step": 49400 }, { "epoch": 2.1286126545203947, "learning_rate": 8.806858583464414e-07, "loss": 4.3647, "step": 49420 }, { "epoch": 2.129474092259982, "learning_rate": 8.806373763709625e-07, "loss": 4.1665, "step": 49440 }, { "epoch": 2.130335529999569, "learning_rate": 8.805888943954836e-07, "loss": 4.3388, "step": 49460 }, { "epoch": 2.1311969677391565, "learning_rate": 8.805404124200048e-07, "loss": 4.3251, "step": 49480 }, { "epoch": 2.132058405478744, "learning_rate": 8.804919304445258e-07, "loss": 4.3496, "step": 49500 }, { "epoch": 2.1329198432183314, "learning_rate": 8.804434484690469e-07, "loss": 4.0173, "step": 49520 }, { "epoch": 2.1337812809579186, "learning_rate": 8.80394966493568e-07, "loss": 4.291, "step": 49540 }, { "epoch": 2.1346427186975063, "learning_rate": 8.803464845180891e-07, "loss": 4.271, "step": 49560 }, { "epoch": 2.1355041564370936, "learning_rate": 8.802980025426103e-07, "loss": 4.1767, "step": 49580 }, { "epoch": 2.136365594176681, "learning_rate": 8.802495205671313e-07, "loss": 4.3604, "step": 49600 }, { "epoch": 2.137227031916268, "learning_rate": 8.802010385916525e-07, "loss": 4.2652, "step": 49620 }, { "epoch": 2.1380884696558558, "learning_rate": 8.801525566161735e-07, "loss": 4.296, "step": 49640 }, { "epoch": 2.138949907395443, "learning_rate": 8.801040746406947e-07, "loss": 4.3255, "step": 49660 }, { "epoch": 2.1398113451350302, "learning_rate": 8.800555926652157e-07, "loss": 4.2263, "step": 49680 }, { "epoch": 2.140672782874618, "learning_rate": 8.800071106897368e-07, "loss": 4.3562, "step": 49700 }, { "epoch": 2.141534220614205, "learning_rate": 8.79958628714258e-07, "loss": 4.1724, "step": 49720 }, { "epoch": 2.1423956583537924, "learning_rate": 8.799101467387792e-07, "loss": 4.3046, "step": 49740 }, { "epoch": 2.1432570960933797, "learning_rate": 8.798616647633001e-07, "loss": 4.3324, "step": 49760 }, { "epoch": 2.1441185338329674, "learning_rate": 8.798131827878212e-07, "loss": 4.106, "step": 49780 }, { "epoch": 2.1449799715725546, "learning_rate": 8.797647008123424e-07, "loss": 4.2116, "step": 49800 }, { "epoch": 2.145841409312142, "learning_rate": 8.797162188368636e-07, "loss": 4.3622, "step": 49820 }, { "epoch": 2.1467028470517295, "learning_rate": 8.796677368613846e-07, "loss": 4.2366, "step": 49840 }, { "epoch": 2.147564284791317, "learning_rate": 8.796192548859057e-07, "loss": 4.34, "step": 49860 }, { "epoch": 2.148425722530904, "learning_rate": 8.795707729104269e-07, "loss": 4.1654, "step": 49880 }, { "epoch": 2.1492871602704913, "learning_rate": 8.795222909349478e-07, "loss": 4.3001, "step": 49900 }, { "epoch": 2.150148598010079, "learning_rate": 8.79473808959469e-07, "loss": 4.2751, "step": 49920 }, { "epoch": 2.151010035749666, "learning_rate": 8.794253269839901e-07, "loss": 4.2452, "step": 49940 }, { "epoch": 2.1518714734892535, "learning_rate": 8.793768450085113e-07, "loss": 4.2826, "step": 49960 }, { "epoch": 2.152732911228841, "learning_rate": 8.793283630330323e-07, "loss": 4.0487, "step": 49980 }, { "epoch": 2.1535943489684284, "learning_rate": 8.792798810575536e-07, "loss": 4.0342, "step": 50000 }, { "epoch": 2.1544557867080156, "learning_rate": 8.792313990820745e-07, "loss": 4.3066, "step": 50020 }, { "epoch": 2.155317224447603, "learning_rate": 8.791829171065957e-07, "loss": 4.2279, "step": 50040 }, { "epoch": 2.1561786621871906, "learning_rate": 8.791344351311167e-07, "loss": 4.2122, "step": 50060 }, { "epoch": 2.157040099926778, "learning_rate": 8.790859531556379e-07, "loss": 4.2016, "step": 50080 }, { "epoch": 2.157901537666365, "learning_rate": 8.79037471180159e-07, "loss": 4.3734, "step": 50100 }, { "epoch": 2.1587629754059527, "learning_rate": 8.789889892046802e-07, "loss": 4.3605, "step": 50120 }, { "epoch": 2.15962441314554, "learning_rate": 8.789405072292011e-07, "loss": 4.125, "step": 50140 }, { "epoch": 2.1604858508851272, "learning_rate": 8.788920252537223e-07, "loss": 4.3302, "step": 50160 }, { "epoch": 2.1613472886247145, "learning_rate": 8.788435432782434e-07, "loss": 4.142, "step": 50180 }, { "epoch": 2.162208726364302, "learning_rate": 8.787950613027646e-07, "loss": 4.2163, "step": 50200 }, { "epoch": 2.1630701641038894, "learning_rate": 8.787465793272856e-07, "loss": 4.226, "step": 50220 }, { "epoch": 2.1639316018434767, "learning_rate": 8.786980973518067e-07, "loss": 4.2411, "step": 50240 }, { "epoch": 2.1647930395830643, "learning_rate": 8.786496153763279e-07, "loss": 4.4795, "step": 50260 }, { "epoch": 2.1656544773226516, "learning_rate": 8.786011334008489e-07, "loss": 4.0183, "step": 50280 }, { "epoch": 2.166515915062239, "learning_rate": 8.7855265142537e-07, "loss": 4.1343, "step": 50300 }, { "epoch": 2.167377352801826, "learning_rate": 8.785041694498912e-07, "loss": 4.1272, "step": 50320 }, { "epoch": 2.1682387905414138, "learning_rate": 8.784556874744123e-07, "loss": 4.4295, "step": 50340 }, { "epoch": 2.169100228281001, "learning_rate": 8.784072054989334e-07, "loss": 4.179, "step": 50360 }, { "epoch": 2.1699616660205883, "learning_rate": 8.783587235234545e-07, "loss": 4.4046, "step": 50380 }, { "epoch": 2.170823103760176, "learning_rate": 8.783102415479755e-07, "loss": 4.3233, "step": 50400 }, { "epoch": 2.171684541499763, "learning_rate": 8.782617595724967e-07, "loss": 4.0627, "step": 50420 }, { "epoch": 2.1725459792393504, "learning_rate": 8.782132775970177e-07, "loss": 4.2086, "step": 50440 }, { "epoch": 2.1734074169789377, "learning_rate": 8.781647956215389e-07, "loss": 4.2934, "step": 50460 }, { "epoch": 2.1742688547185254, "learning_rate": 8.7811631364606e-07, "loss": 4.3135, "step": 50480 }, { "epoch": 2.1751302924581126, "learning_rate": 8.780678316705812e-07, "loss": 4.0578, "step": 50500 }, { "epoch": 2.1759917301977, "learning_rate": 8.780193496951022e-07, "loss": 4.1289, "step": 50520 }, { "epoch": 2.1768531679372876, "learning_rate": 8.779708677196233e-07, "loss": 4.2016, "step": 50540 }, { "epoch": 2.177714605676875, "learning_rate": 8.779223857441444e-07, "loss": 4.0938, "step": 50560 }, { "epoch": 2.178576043416462, "learning_rate": 8.778739037686655e-07, "loss": 4.345, "step": 50580 }, { "epoch": 2.1794374811560493, "learning_rate": 8.778254217931866e-07, "loss": 4.3269, "step": 50600 }, { "epoch": 2.180298918895637, "learning_rate": 8.777769398177078e-07, "loss": 4.2461, "step": 50620 }, { "epoch": 2.1811603566352242, "learning_rate": 8.777284578422289e-07, "loss": 4.1303, "step": 50640 }, { "epoch": 2.1820217943748115, "learning_rate": 8.776799758667499e-07, "loss": 4.2371, "step": 50660 }, { "epoch": 2.182883232114399, "learning_rate": 8.77631493891271e-07, "loss": 4.2818, "step": 50680 }, { "epoch": 2.1837446698539864, "learning_rate": 8.775830119157922e-07, "loss": 4.5456, "step": 50700 }, { "epoch": 2.1846061075935737, "learning_rate": 8.775345299403133e-07, "loss": 4.3468, "step": 50720 }, { "epoch": 2.185467545333161, "learning_rate": 8.774860479648344e-07, "loss": 4.2847, "step": 50740 }, { "epoch": 2.1863289830727486, "learning_rate": 8.774375659893555e-07, "loss": 4.203, "step": 50760 }, { "epoch": 2.187190420812336, "learning_rate": 8.773890840138765e-07, "loss": 4.0493, "step": 50780 }, { "epoch": 2.188051858551923, "learning_rate": 8.773406020383977e-07, "loss": 4.2606, "step": 50800 }, { "epoch": 2.1889132962915103, "learning_rate": 8.772921200629188e-07, "loss": 4.1683, "step": 50820 }, { "epoch": 2.189774734031098, "learning_rate": 8.772436380874399e-07, "loss": 4.3027, "step": 50840 }, { "epoch": 2.1906361717706853, "learning_rate": 8.77195156111961e-07, "loss": 4.2621, "step": 50860 }, { "epoch": 2.1914976095102725, "learning_rate": 8.771466741364821e-07, "loss": 4.4363, "step": 50880 }, { "epoch": 2.19235904724986, "learning_rate": 8.770981921610032e-07, "loss": 4.4562, "step": 50900 }, { "epoch": 2.1932204849894474, "learning_rate": 8.770497101855243e-07, "loss": 4.2564, "step": 50920 }, { "epoch": 2.1940819227290347, "learning_rate": 8.770012282100454e-07, "loss": 4.2429, "step": 50940 }, { "epoch": 2.194943360468622, "learning_rate": 8.769527462345665e-07, "loss": 4.3255, "step": 50960 }, { "epoch": 2.1958047982082096, "learning_rate": 8.769042642590876e-07, "loss": 4.2201, "step": 50980 }, { "epoch": 2.196666235947797, "learning_rate": 8.768557822836088e-07, "loss": 4.3356, "step": 51000 }, { "epoch": 2.197527673687384, "learning_rate": 8.768073003081299e-07, "loss": 4.019, "step": 51020 }, { "epoch": 2.198389111426972, "learning_rate": 8.767588183326509e-07, "loss": 4.3707, "step": 51040 }, { "epoch": 2.199250549166559, "learning_rate": 8.76710336357172e-07, "loss": 4.2873, "step": 51060 }, { "epoch": 2.2001119869061463, "learning_rate": 8.766618543816932e-07, "loss": 4.4412, "step": 51080 }, { "epoch": 2.2009734246457335, "learning_rate": 8.766133724062143e-07, "loss": 4.2582, "step": 51100 }, { "epoch": 2.2018348623853212, "learning_rate": 8.765648904307354e-07, "loss": 4.1803, "step": 51120 }, { "epoch": 2.2026963001249085, "learning_rate": 8.765164084552565e-07, "loss": 4.3579, "step": 51140 }, { "epoch": 2.2035577378644957, "learning_rate": 8.764679264797775e-07, "loss": 4.3487, "step": 51160 }, { "epoch": 2.2044191756040834, "learning_rate": 8.764194445042986e-07, "loss": 4.2576, "step": 51180 }, { "epoch": 2.2052806133436706, "learning_rate": 8.763709625288198e-07, "loss": 4.1051, "step": 51200 }, { "epoch": 2.206142051083258, "learning_rate": 8.763224805533409e-07, "loss": 4.1606, "step": 51220 }, { "epoch": 2.207003488822845, "learning_rate": 8.762739985778621e-07, "loss": 4.594, "step": 51240 }, { "epoch": 2.207864926562433, "learning_rate": 8.762255166023832e-07, "loss": 4.1487, "step": 51260 }, { "epoch": 2.20872636430202, "learning_rate": 8.761770346269042e-07, "loss": 4.1725, "step": 51280 }, { "epoch": 2.2095878020416073, "learning_rate": 8.761285526514253e-07, "loss": 4.1257, "step": 51300 }, { "epoch": 2.210449239781195, "learning_rate": 8.760800706759464e-07, "loss": 4.2533, "step": 51320 }, { "epoch": 2.2113106775207823, "learning_rate": 8.760315887004675e-07, "loss": 4.1158, "step": 51340 }, { "epoch": 2.2121721152603695, "learning_rate": 8.759831067249887e-07, "loss": 4.4128, "step": 51360 }, { "epoch": 2.2130335529999567, "learning_rate": 8.759346247495098e-07, "loss": 4.2981, "step": 51380 }, { "epoch": 2.2138949907395444, "learning_rate": 8.758861427740309e-07, "loss": 4.2351, "step": 51400 }, { "epoch": 2.2147564284791317, "learning_rate": 8.758376607985519e-07, "loss": 4.1967, "step": 51420 }, { "epoch": 2.215617866218719, "learning_rate": 8.757891788230731e-07, "loss": 4.1482, "step": 51440 }, { "epoch": 2.2164793039583066, "learning_rate": 8.757406968475942e-07, "loss": 4.2317, "step": 51460 }, { "epoch": 2.217340741697894, "learning_rate": 8.756922148721152e-07, "loss": 4.2784, "step": 51480 }, { "epoch": 2.218202179437481, "learning_rate": 8.756437328966364e-07, "loss": 4.3519, "step": 51500 }, { "epoch": 2.2190636171770683, "learning_rate": 8.755952509211575e-07, "loss": 4.3672, "step": 51520 }, { "epoch": 2.219925054916656, "learning_rate": 8.755467689456786e-07, "loss": 4.1288, "step": 51540 }, { "epoch": 2.2207864926562433, "learning_rate": 8.754982869701996e-07, "loss": 4.4207, "step": 51560 }, { "epoch": 2.2216479303958305, "learning_rate": 8.754498049947208e-07, "loss": 4.5234, "step": 51580 }, { "epoch": 2.222509368135418, "learning_rate": 8.754013230192419e-07, "loss": 4.2745, "step": 51600 }, { "epoch": 2.2233708058750055, "learning_rate": 8.753528410437631e-07, "loss": 4.2628, "step": 51620 }, { "epoch": 2.2242322436145927, "learning_rate": 8.753043590682841e-07, "loss": 4.2123, "step": 51640 }, { "epoch": 2.22509368135418, "learning_rate": 8.752558770928053e-07, "loss": 4.3077, "step": 51660 }, { "epoch": 2.2259551190937676, "learning_rate": 8.752073951173263e-07, "loss": 4.245, "step": 51680 }, { "epoch": 2.226816556833355, "learning_rate": 8.751589131418475e-07, "loss": 4.2597, "step": 51700 }, { "epoch": 2.227677994572942, "learning_rate": 8.751104311663685e-07, "loss": 4.4867, "step": 51720 }, { "epoch": 2.2285394323125294, "learning_rate": 8.750619491908897e-07, "loss": 4.5026, "step": 51740 }, { "epoch": 2.229400870052117, "learning_rate": 8.750134672154108e-07, "loss": 4.2651, "step": 51760 }, { "epoch": 2.2302623077917043, "learning_rate": 8.749649852399321e-07, "loss": 4.3774, "step": 51780 }, { "epoch": 2.2311237455312916, "learning_rate": 8.749165032644529e-07, "loss": 4.2054, "step": 51800 }, { "epoch": 2.2319851832708792, "learning_rate": 8.748680212889741e-07, "loss": 4.3723, "step": 51820 }, { "epoch": 2.2328466210104665, "learning_rate": 8.748195393134952e-07, "loss": 4.2458, "step": 51840 }, { "epoch": 2.2337080587500537, "learning_rate": 8.747710573380162e-07, "loss": 4.1491, "step": 51860 }, { "epoch": 2.234569496489641, "learning_rate": 8.747225753625374e-07, "loss": 4.0953, "step": 51880 }, { "epoch": 2.2354309342292287, "learning_rate": 8.746740933870586e-07, "loss": 4.2957, "step": 51900 }, { "epoch": 2.236292371968816, "learning_rate": 8.746256114115796e-07, "loss": 4.0886, "step": 51920 }, { "epoch": 2.237153809708403, "learning_rate": 8.745771294361006e-07, "loss": 4.2769, "step": 51940 }, { "epoch": 2.238015247447991, "learning_rate": 8.745286474606218e-07, "loss": 4.2706, "step": 51960 }, { "epoch": 2.238876685187578, "learning_rate": 8.74480165485143e-07, "loss": 4.2817, "step": 51980 }, { "epoch": 2.2397381229271653, "learning_rate": 8.744316835096641e-07, "loss": 4.1763, "step": 52000 }, { "epoch": 2.2405995606667526, "learning_rate": 8.743832015341851e-07, "loss": 4.3284, "step": 52020 }, { "epoch": 2.2414609984063403, "learning_rate": 8.743347195587063e-07, "loss": 4.3196, "step": 52040 }, { "epoch": 2.2423224361459275, "learning_rate": 8.742862375832273e-07, "loss": 4.4508, "step": 52060 }, { "epoch": 2.2431838738855148, "learning_rate": 8.742377556077485e-07, "loss": 4.0581, "step": 52080 }, { "epoch": 2.2440453116251025, "learning_rate": 8.741892736322696e-07, "loss": 4.3669, "step": 52100 }, { "epoch": 2.2449067493646897, "learning_rate": 8.741407916567907e-07, "loss": 4.1813, "step": 52120 }, { "epoch": 2.245768187104277, "learning_rate": 8.740923096813118e-07, "loss": 4.2587, "step": 52140 }, { "epoch": 2.246629624843864, "learning_rate": 8.74043827705833e-07, "loss": 4.1264, "step": 52160 }, { "epoch": 2.247491062583452, "learning_rate": 8.739953457303539e-07, "loss": 4.2917, "step": 52180 }, { "epoch": 2.248352500323039, "learning_rate": 8.739468637548751e-07, "loss": 4.3568, "step": 52200 }, { "epoch": 2.2492139380626264, "learning_rate": 8.738983817793962e-07, "loss": 4.3348, "step": 52220 }, { "epoch": 2.250075375802214, "learning_rate": 8.738498998039173e-07, "loss": 4.3902, "step": 52240 }, { "epoch": 2.2509368135418013, "learning_rate": 8.738014178284384e-07, "loss": 4.1351, "step": 52260 }, { "epoch": 2.2517982512813886, "learning_rate": 8.737529358529596e-07, "loss": 4.3549, "step": 52280 }, { "epoch": 2.252659689020976, "learning_rate": 8.737044538774807e-07, "loss": 4.2185, "step": 52300 }, { "epoch": 2.2535211267605635, "learning_rate": 8.736559719020016e-07, "loss": 4.0571, "step": 52320 }, { "epoch": 2.2543825645001507, "learning_rate": 8.736074899265228e-07, "loss": 4.1322, "step": 52340 }, { "epoch": 2.255244002239738, "learning_rate": 8.73559007951044e-07, "loss": 4.1068, "step": 52360 }, { "epoch": 2.2561054399793257, "learning_rate": 8.73510525975565e-07, "loss": 4.4032, "step": 52380 }, { "epoch": 2.256966877718913, "learning_rate": 8.734620440000861e-07, "loss": 4.2477, "step": 52400 }, { "epoch": 2.2578283154585, "learning_rate": 8.734135620246073e-07, "loss": 4.3231, "step": 52420 }, { "epoch": 2.2586897531980874, "learning_rate": 8.733650800491284e-07, "loss": 4.1685, "step": 52440 }, { "epoch": 2.259551190937675, "learning_rate": 8.733165980736494e-07, "loss": 4.3216, "step": 52460 }, { "epoch": 2.2604126286772623, "learning_rate": 8.732681160981705e-07, "loss": 4.227, "step": 52480 }, { "epoch": 2.2612740664168496, "learning_rate": 8.732196341226917e-07, "loss": 4.244, "step": 52500 }, { "epoch": 2.2621355041564373, "learning_rate": 8.731711521472129e-07, "loss": 4.3651, "step": 52520 }, { "epoch": 2.2629969418960245, "learning_rate": 8.731226701717339e-07, "loss": 4.4109, "step": 52540 }, { "epoch": 2.2638583796356118, "learning_rate": 8.730741881962549e-07, "loss": 4.2073, "step": 52560 }, { "epoch": 2.264719817375199, "learning_rate": 8.730257062207761e-07, "loss": 4.3845, "step": 52580 }, { "epoch": 2.2655812551147867, "learning_rate": 8.729772242452972e-07, "loss": 4.3746, "step": 52600 }, { "epoch": 2.266442692854374, "learning_rate": 8.729287422698183e-07, "loss": 4.0557, "step": 52620 }, { "epoch": 2.267304130593961, "learning_rate": 8.728802602943394e-07, "loss": 4.3143, "step": 52640 }, { "epoch": 2.268165568333549, "learning_rate": 8.728317783188606e-07, "loss": 3.9698, "step": 52660 }, { "epoch": 2.269027006073136, "learning_rate": 8.727832963433817e-07, "loss": 4.1787, "step": 52680 }, { "epoch": 2.2698884438127234, "learning_rate": 8.727348143679027e-07, "loss": 4.294, "step": 52700 }, { "epoch": 2.2707498815523106, "learning_rate": 8.726863323924238e-07, "loss": 4.1294, "step": 52720 }, { "epoch": 2.2716113192918983, "learning_rate": 8.72637850416945e-07, "loss": 4.3712, "step": 52740 }, { "epoch": 2.2724727570314855, "learning_rate": 8.72589368441466e-07, "loss": 4.4626, "step": 52760 }, { "epoch": 2.273334194771073, "learning_rate": 8.725408864659872e-07, "loss": 4.1864, "step": 52780 }, { "epoch": 2.2741956325106605, "learning_rate": 8.724924044905083e-07, "loss": 4.0382, "step": 52800 }, { "epoch": 2.2750570702502477, "learning_rate": 8.724439225150294e-07, "loss": 4.1505, "step": 52820 }, { "epoch": 2.275918507989835, "learning_rate": 8.723954405395504e-07, "loss": 4.3047, "step": 52840 }, { "epoch": 2.276779945729422, "learning_rate": 8.723469585640715e-07, "loss": 4.4541, "step": 52860 }, { "epoch": 2.27764138346901, "learning_rate": 8.722984765885927e-07, "loss": 4.1365, "step": 52880 }, { "epoch": 2.278502821208597, "learning_rate": 8.722499946131139e-07, "loss": 4.2079, "step": 52900 }, { "epoch": 2.2793642589481844, "learning_rate": 8.722015126376349e-07, "loss": 4.2184, "step": 52920 }, { "epoch": 2.280225696687772, "learning_rate": 8.721530306621559e-07, "loss": 4.298, "step": 52940 }, { "epoch": 2.2810871344273593, "learning_rate": 8.721045486866771e-07, "loss": 4.2712, "step": 52960 }, { "epoch": 2.2819485721669466, "learning_rate": 8.720560667111983e-07, "loss": 4.2324, "step": 52980 }, { "epoch": 2.282810009906534, "learning_rate": 8.720075847357193e-07, "loss": 4.2026, "step": 53000 }, { "epoch": 2.2836714476461215, "learning_rate": 8.719591027602404e-07, "loss": 4.2496, "step": 53020 }, { "epoch": 2.2845328853857088, "learning_rate": 8.719106207847617e-07, "loss": 4.337, "step": 53040 }, { "epoch": 2.285394323125296, "learning_rate": 8.718621388092826e-07, "loss": 4.2557, "step": 53060 }, { "epoch": 2.2862557608648837, "learning_rate": 8.718136568338037e-07, "loss": 4.2969, "step": 53080 }, { "epoch": 2.287117198604471, "learning_rate": 8.717651748583248e-07, "loss": 4.4096, "step": 53100 }, { "epoch": 2.287978636344058, "learning_rate": 8.71716692882846e-07, "loss": 4.312, "step": 53120 }, { "epoch": 2.2888400740836454, "learning_rate": 8.71668210907367e-07, "loss": 4.2076, "step": 53140 }, { "epoch": 2.289701511823233, "learning_rate": 8.716197289318882e-07, "loss": 4.1274, "step": 53160 }, { "epoch": 2.2905629495628204, "learning_rate": 8.715712469564093e-07, "loss": 4.2475, "step": 53180 }, { "epoch": 2.2914243873024076, "learning_rate": 8.715227649809304e-07, "loss": 4.1663, "step": 53200 }, { "epoch": 2.2922858250419953, "learning_rate": 8.714742830054514e-07, "loss": 4.2687, "step": 53220 }, { "epoch": 2.2931472627815825, "learning_rate": 8.714258010299726e-07, "loss": 4.1586, "step": 53240 }, { "epoch": 2.29400870052117, "learning_rate": 8.713773190544937e-07, "loss": 4.2607, "step": 53260 }, { "epoch": 2.294870138260757, "learning_rate": 8.713288370790149e-07, "loss": 4.2607, "step": 53280 }, { "epoch": 2.2957315760003447, "learning_rate": 8.712803551035359e-07, "loss": 4.1282, "step": 53300 }, { "epoch": 2.296593013739932, "learning_rate": 8.71231873128057e-07, "loss": 4.0689, "step": 53320 }, { "epoch": 2.297454451479519, "learning_rate": 8.711833911525781e-07, "loss": 4.3091, "step": 53340 }, { "epoch": 2.298315889219107, "learning_rate": 8.711349091770992e-07, "loss": 4.1475, "step": 53360 }, { "epoch": 2.299177326958694, "learning_rate": 8.710864272016203e-07, "loss": 4.1784, "step": 53380 }, { "epoch": 2.3000387646982814, "learning_rate": 8.710379452261414e-07, "loss": 4.1835, "step": 53400 }, { "epoch": 2.3009002024378686, "learning_rate": 8.709894632506626e-07, "loss": 4.1533, "step": 53420 }, { "epoch": 2.3017616401774563, "learning_rate": 8.709409812751837e-07, "loss": 4.2843, "step": 53440 }, { "epoch": 2.3026230779170436, "learning_rate": 8.708924992997047e-07, "loss": 4.1374, "step": 53460 }, { "epoch": 2.303484515656631, "learning_rate": 8.708440173242258e-07, "loss": 4.1059, "step": 53480 }, { "epoch": 2.3043459533962185, "learning_rate": 8.70795535348747e-07, "loss": 4.1689, "step": 53500 }, { "epoch": 2.3052073911358058, "learning_rate": 8.707470533732681e-07, "loss": 4.3381, "step": 53520 }, { "epoch": 2.306068828875393, "learning_rate": 8.706985713977892e-07, "loss": 4.0695, "step": 53540 }, { "epoch": 2.3069302666149802, "learning_rate": 8.706500894223103e-07, "loss": 4.3566, "step": 53560 }, { "epoch": 2.307791704354568, "learning_rate": 8.706016074468314e-07, "loss": 4.1611, "step": 53580 }, { "epoch": 2.308653142094155, "learning_rate": 8.705531254713525e-07, "loss": 4.2708, "step": 53600 }, { "epoch": 2.3095145798337424, "learning_rate": 8.705046434958736e-07, "loss": 4.2761, "step": 53620 }, { "epoch": 2.31037601757333, "learning_rate": 8.704561615203947e-07, "loss": 4.2535, "step": 53640 }, { "epoch": 2.3112374553129174, "learning_rate": 8.704076795449159e-07, "loss": 4.331, "step": 53660 }, { "epoch": 2.3120988930525046, "learning_rate": 8.703591975694369e-07, "loss": 4.2145, "step": 53680 }, { "epoch": 2.312960330792092, "learning_rate": 8.70310715593958e-07, "loss": 4.162, "step": 53700 }, { "epoch": 2.3138217685316795, "learning_rate": 8.702622336184791e-07, "loss": 4.1911, "step": 53720 }, { "epoch": 2.314683206271267, "learning_rate": 8.702137516430002e-07, "loss": 4.4483, "step": 53740 }, { "epoch": 2.315544644010854, "learning_rate": 8.701652696675213e-07, "loss": 4.1958, "step": 53760 }, { "epoch": 2.3164060817504417, "learning_rate": 8.701167876920425e-07, "loss": 4.3123, "step": 53780 }, { "epoch": 2.317267519490029, "learning_rate": 8.700683057165636e-07, "loss": 4.251, "step": 53800 }, { "epoch": 2.318128957229616, "learning_rate": 8.700198237410847e-07, "loss": 4.3, "step": 53820 }, { "epoch": 2.3189903949692035, "learning_rate": 8.699713417656057e-07, "loss": 4.327, "step": 53840 }, { "epoch": 2.319851832708791, "learning_rate": 8.699228597901269e-07, "loss": 4.5191, "step": 53860 }, { "epoch": 2.3207132704483784, "learning_rate": 8.698743778146481e-07, "loss": 4.0079, "step": 53880 }, { "epoch": 2.3215747081879656, "learning_rate": 8.698258958391691e-07, "loss": 4.1226, "step": 53900 }, { "epoch": 2.3224361459275533, "learning_rate": 8.697774138636902e-07, "loss": 4.2492, "step": 53920 }, { "epoch": 2.3232975836671406, "learning_rate": 8.697289318882113e-07, "loss": 4.2787, "step": 53940 }, { "epoch": 2.324159021406728, "learning_rate": 8.696804499127323e-07, "loss": 4.1966, "step": 53960 }, { "epoch": 2.325020459146315, "learning_rate": 8.696319679372535e-07, "loss": 4.2201, "step": 53980 }, { "epoch": 2.3258818968859027, "learning_rate": 8.695834859617746e-07, "loss": 4.2533, "step": 54000 }, { "epoch": 2.32674333462549, "learning_rate": 8.695350039862957e-07, "loss": 4.2268, "step": 54020 }, { "epoch": 2.3276047723650772, "learning_rate": 8.694865220108168e-07, "loss": 4.2416, "step": 54040 }, { "epoch": 2.328466210104665, "learning_rate": 8.69438040035338e-07, "loss": 4.1719, "step": 54060 }, { "epoch": 2.329327647844252, "learning_rate": 8.693895580598591e-07, "loss": 4.4017, "step": 54080 }, { "epoch": 2.3301890855838394, "learning_rate": 8.693410760843801e-07, "loss": 4.3052, "step": 54100 }, { "epoch": 2.3310505233234267, "learning_rate": 8.692925941089012e-07, "loss": 4.1228, "step": 54120 }, { "epoch": 2.3319119610630143, "learning_rate": 8.692441121334224e-07, "loss": 4.2331, "step": 54140 }, { "epoch": 2.3327733988026016, "learning_rate": 8.691956301579435e-07, "loss": 4.055, "step": 54160 }, { "epoch": 2.333634836542189, "learning_rate": 8.691471481824646e-07, "loss": 4.3784, "step": 54180 }, { "epoch": 2.3344962742817765, "learning_rate": 8.690986662069857e-07, "loss": 4.362, "step": 54200 }, { "epoch": 2.3353577120213638, "learning_rate": 8.690501842315067e-07, "loss": 4.2607, "step": 54220 }, { "epoch": 2.336219149760951, "learning_rate": 8.690017022560279e-07, "loss": 4.4963, "step": 54240 }, { "epoch": 2.3370805875005383, "learning_rate": 8.68953220280549e-07, "loss": 4.2517, "step": 54260 }, { "epoch": 2.337942025240126, "learning_rate": 8.689047383050701e-07, "loss": 4.3148, "step": 54280 }, { "epoch": 2.338803462979713, "learning_rate": 8.688562563295913e-07, "loss": 4.2369, "step": 54300 }, { "epoch": 2.3396649007193004, "learning_rate": 8.688077743541124e-07, "loss": 4.2441, "step": 54320 }, { "epoch": 2.3405263384588877, "learning_rate": 8.687592923786333e-07, "loss": 4.0943, "step": 54340 }, { "epoch": 2.3413877761984754, "learning_rate": 8.687108104031545e-07, "loss": 4.3473, "step": 54360 }, { "epoch": 2.3422492139380626, "learning_rate": 8.686623284276756e-07, "loss": 4.2992, "step": 54380 }, { "epoch": 2.34311065167765, "learning_rate": 8.686138464521968e-07, "loss": 4.2246, "step": 54400 }, { "epoch": 2.3439720894172376, "learning_rate": 8.685653644767178e-07, "loss": 4.4738, "step": 54420 }, { "epoch": 2.344833527156825, "learning_rate": 8.68516882501239e-07, "loss": 4.1206, "step": 54440 }, { "epoch": 2.345694964896412, "learning_rate": 8.684684005257601e-07, "loss": 4.3221, "step": 54460 }, { "epoch": 2.3465564026359993, "learning_rate": 8.68419918550281e-07, "loss": 4.277, "step": 54480 }, { "epoch": 2.347417840375587, "learning_rate": 8.683714365748022e-07, "loss": 4.3785, "step": 54500 }, { "epoch": 2.3482792781151742, "learning_rate": 8.683229545993234e-07, "loss": 4.381, "step": 54520 }, { "epoch": 2.3491407158547615, "learning_rate": 8.682744726238445e-07, "loss": 4.4255, "step": 54540 }, { "epoch": 2.3500021535943487, "learning_rate": 8.682259906483656e-07, "loss": 4.1823, "step": 54560 }, { "epoch": 2.3508635913339364, "learning_rate": 8.681775086728867e-07, "loss": 4.3709, "step": 54580 }, { "epoch": 2.3517250290735237, "learning_rate": 8.681290266974078e-07, "loss": 4.1439, "step": 54600 }, { "epoch": 2.352586466813111, "learning_rate": 8.680805447219289e-07, "loss": 4.4158, "step": 54620 }, { "epoch": 2.3534479045526986, "learning_rate": 8.680320627464499e-07, "loss": 4.2438, "step": 54640 }, { "epoch": 2.354309342292286, "learning_rate": 8.679835807709711e-07, "loss": 4.1635, "step": 54660 }, { "epoch": 2.355170780031873, "learning_rate": 8.679350987954923e-07, "loss": 4.1807, "step": 54680 }, { "epoch": 2.3560322177714603, "learning_rate": 8.678866168200134e-07, "loss": 4.311, "step": 54700 }, { "epoch": 2.356893655511048, "learning_rate": 8.678381348445343e-07, "loss": 4.1099, "step": 54720 }, { "epoch": 2.3577550932506353, "learning_rate": 8.677896528690555e-07, "loss": 4.1695, "step": 54740 }, { "epoch": 2.3586165309902225, "learning_rate": 8.677411708935766e-07, "loss": 4.2379, "step": 54760 }, { "epoch": 2.35947796872981, "learning_rate": 8.676926889180978e-07, "loss": 4.2587, "step": 54780 }, { "epoch": 2.3603394064693974, "learning_rate": 8.676442069426188e-07, "loss": 4.1083, "step": 54800 }, { "epoch": 2.3612008442089847, "learning_rate": 8.675957249671401e-07, "loss": 4.0596, "step": 54820 }, { "epoch": 2.362062281948572, "learning_rate": 8.675472429916611e-07, "loss": 3.8836, "step": 54840 }, { "epoch": 2.3629237196881596, "learning_rate": 8.674987610161822e-07, "loss": 4.306, "step": 54860 }, { "epoch": 2.363785157427747, "learning_rate": 8.674502790407032e-07, "loss": 4.215, "step": 54880 }, { "epoch": 2.364646595167334, "learning_rate": 8.674017970652244e-07, "loss": 4.0287, "step": 54900 }, { "epoch": 2.365508032906922, "learning_rate": 8.673533150897455e-07, "loss": 4.4671, "step": 54920 }, { "epoch": 2.366369470646509, "learning_rate": 8.673048331142666e-07, "loss": 4.3428, "step": 54940 }, { "epoch": 2.3672309083860963, "learning_rate": 8.672563511387877e-07, "loss": 4.0316, "step": 54960 }, { "epoch": 2.3680923461256835, "learning_rate": 8.672078691633088e-07, "loss": 4.2033, "step": 54980 }, { "epoch": 2.3689537838652712, "learning_rate": 8.671593871878299e-07, "loss": 4.3097, "step": 55000 }, { "epoch": 2.3698152216048585, "learning_rate": 8.671109052123509e-07, "loss": 4.1758, "step": 55020 }, { "epoch": 2.3706766593444457, "learning_rate": 8.670624232368721e-07, "loss": 4.5582, "step": 55040 }, { "epoch": 2.3715380970840334, "learning_rate": 8.670139412613933e-07, "loss": 4.3608, "step": 55060 }, { "epoch": 2.3723995348236206, "learning_rate": 8.669654592859144e-07, "loss": 4.3383, "step": 55080 }, { "epoch": 2.373260972563208, "learning_rate": 8.669169773104354e-07, "loss": 4.1525, "step": 55100 }, { "epoch": 2.374122410302795, "learning_rate": 8.668684953349565e-07, "loss": 4.1372, "step": 55120 }, { "epoch": 2.374983848042383, "learning_rate": 8.668200133594777e-07, "loss": 4.3853, "step": 55140 }, { "epoch": 2.37584528578197, "learning_rate": 8.667715313839988e-07, "loss": 4.1832, "step": 55160 }, { "epoch": 2.3767067235215573, "learning_rate": 8.667230494085198e-07, "loss": 4.3249, "step": 55180 }, { "epoch": 2.377568161261145, "learning_rate": 8.66674567433041e-07, "loss": 4.231, "step": 55200 }, { "epoch": 2.3784295990007323, "learning_rate": 8.666260854575622e-07, "loss": 4.3996, "step": 55220 }, { "epoch": 2.3792910367403195, "learning_rate": 8.665776034820831e-07, "loss": 4.3011, "step": 55240 }, { "epoch": 2.3801524744799067, "learning_rate": 8.665291215066042e-07, "loss": 4.1059, "step": 55260 }, { "epoch": 2.3810139122194944, "learning_rate": 8.664806395311254e-07, "loss": 4.1404, "step": 55280 }, { "epoch": 2.3818753499590817, "learning_rate": 8.664321575556465e-07, "loss": 4.2836, "step": 55300 }, { "epoch": 2.382736787698669, "learning_rate": 8.663836755801676e-07, "loss": 4.2736, "step": 55320 }, { "epoch": 2.3835982254382566, "learning_rate": 8.663351936046887e-07, "loss": 4.0531, "step": 55340 }, { "epoch": 2.384459663177844, "learning_rate": 8.662867116292098e-07, "loss": 4.2524, "step": 55360 }, { "epoch": 2.385321100917431, "learning_rate": 8.662382296537309e-07, "loss": 4.307, "step": 55380 }, { "epoch": 2.3861825386570183, "learning_rate": 8.66189747678252e-07, "loss": 4.2786, "step": 55400 }, { "epoch": 2.387043976396606, "learning_rate": 8.661412657027731e-07, "loss": 3.9889, "step": 55420 }, { "epoch": 2.3879054141361933, "learning_rate": 8.660927837272943e-07, "loss": 4.381, "step": 55440 }, { "epoch": 2.3887668518757805, "learning_rate": 8.660443017518154e-07, "loss": 4.3136, "step": 55460 }, { "epoch": 2.389628289615368, "learning_rate": 8.659958197763364e-07, "loss": 4.1068, "step": 55480 }, { "epoch": 2.3904897273549555, "learning_rate": 8.659473378008575e-07, "loss": 4.2248, "step": 55500 }, { "epoch": 2.3913511650945427, "learning_rate": 8.658988558253787e-07, "loss": 3.9636, "step": 55520 }, { "epoch": 2.39221260283413, "learning_rate": 8.658503738498997e-07, "loss": 4.3415, "step": 55540 }, { "epoch": 2.3930740405737176, "learning_rate": 8.658018918744207e-07, "loss": 4.2074, "step": 55560 }, { "epoch": 2.393935478313305, "learning_rate": 8.65753409898942e-07, "loss": 4.472, "step": 55580 }, { "epoch": 2.394796916052892, "learning_rate": 8.657049279234632e-07, "loss": 4.221, "step": 55600 }, { "epoch": 2.39565835379248, "learning_rate": 8.656564459479841e-07, "loss": 4.027, "step": 55620 }, { "epoch": 2.396519791532067, "learning_rate": 8.656079639725052e-07, "loss": 4.1817, "step": 55640 }, { "epoch": 2.3973812292716543, "learning_rate": 8.655594819970265e-07, "loss": 3.85, "step": 55660 }, { "epoch": 2.3982426670112416, "learning_rate": 8.655110000215476e-07, "loss": 4.1537, "step": 55680 }, { "epoch": 2.3991041047508292, "learning_rate": 8.654625180460686e-07, "loss": 4.1673, "step": 55700 }, { "epoch": 2.3999655424904165, "learning_rate": 8.654140360705897e-07, "loss": 4.3035, "step": 55720 }, { "epoch": 2.4008269802300037, "learning_rate": 8.653655540951108e-07, "loss": 4.3662, "step": 55740 }, { "epoch": 2.4016884179695914, "learning_rate": 8.65317072119632e-07, "loss": 4.3216, "step": 55760 }, { "epoch": 2.4025498557091787, "learning_rate": 8.65268590144153e-07, "loss": 4.1805, "step": 55780 }, { "epoch": 2.403411293448766, "learning_rate": 8.652201081686741e-07, "loss": 4.1321, "step": 55800 }, { "epoch": 2.404272731188353, "learning_rate": 8.651716261931953e-07, "loss": 4.3409, "step": 55820 }, { "epoch": 2.405134168927941, "learning_rate": 8.651231442177163e-07, "loss": 4.2178, "step": 55840 }, { "epoch": 2.405995606667528, "learning_rate": 8.650746622422375e-07, "loss": 4.1659, "step": 55860 }, { "epoch": 2.4068570444071153, "learning_rate": 8.650261802667585e-07, "loss": 3.9914, "step": 55880 }, { "epoch": 2.407718482146703, "learning_rate": 8.649776982912797e-07, "loss": 4.1824, "step": 55900 }, { "epoch": 2.4085799198862903, "learning_rate": 8.649292163158007e-07, "loss": 4.2067, "step": 55920 }, { "epoch": 2.4094413576258775, "learning_rate": 8.648807343403219e-07, "loss": 4.3716, "step": 55940 }, { "epoch": 2.4103027953654648, "learning_rate": 8.64832252364843e-07, "loss": 4.183, "step": 55960 }, { "epoch": 2.4111642331050525, "learning_rate": 8.647837703893642e-07, "loss": 4.3275, "step": 55980 }, { "epoch": 2.4120256708446397, "learning_rate": 8.647352884138851e-07, "loss": 4.0832, "step": 56000 }, { "epoch": 2.412887108584227, "learning_rate": 8.646868064384063e-07, "loss": 4.0189, "step": 56020 }, { "epoch": 2.4137485463238146, "learning_rate": 8.646383244629274e-07, "loss": 4.3236, "step": 56040 }, { "epoch": 2.414609984063402, "learning_rate": 8.645898424874486e-07, "loss": 4.2682, "step": 56060 }, { "epoch": 2.415471421802989, "learning_rate": 8.645413605119695e-07, "loss": 4.2826, "step": 56080 }, { "epoch": 2.4163328595425764, "learning_rate": 8.644928785364907e-07, "loss": 4.0921, "step": 56100 }, { "epoch": 2.417194297282164, "learning_rate": 8.644443965610118e-07, "loss": 4.0919, "step": 56120 }, { "epoch": 2.4180557350217513, "learning_rate": 8.64395914585533e-07, "loss": 3.8716, "step": 56140 }, { "epoch": 2.4189171727613386, "learning_rate": 8.64347432610054e-07, "loss": 4.2843, "step": 56160 }, { "epoch": 2.4197786105009262, "learning_rate": 8.642989506345751e-07, "loss": 4.3531, "step": 56180 }, { "epoch": 2.4206400482405135, "learning_rate": 8.642504686590963e-07, "loss": 3.9195, "step": 56200 }, { "epoch": 2.4215014859801007, "learning_rate": 8.642019866836174e-07, "loss": 4.4277, "step": 56220 }, { "epoch": 2.422362923719688, "learning_rate": 8.641535047081385e-07, "loss": 4.2536, "step": 56240 }, { "epoch": 2.4232243614592757, "learning_rate": 8.641050227326595e-07, "loss": 4.2423, "step": 56260 }, { "epoch": 2.424085799198863, "learning_rate": 8.640565407571807e-07, "loss": 4.4274, "step": 56280 }, { "epoch": 2.42494723693845, "learning_rate": 8.640080587817018e-07, "loss": 4.3172, "step": 56300 }, { "epoch": 2.425808674678038, "learning_rate": 8.639595768062229e-07, "loss": 4.1643, "step": 56320 }, { "epoch": 2.426670112417625, "learning_rate": 8.63911094830744e-07, "loss": 4.1716, "step": 56340 }, { "epoch": 2.4275315501572123, "learning_rate": 8.638626128552652e-07, "loss": 4.2735, "step": 56360 }, { "epoch": 2.4283929878967996, "learning_rate": 8.638141308797861e-07, "loss": 4.2517, "step": 56380 }, { "epoch": 2.4292544256363873, "learning_rate": 8.637656489043073e-07, "loss": 4.2051, "step": 56400 }, { "epoch": 2.4301158633759745, "learning_rate": 8.637171669288284e-07, "loss": 4.1685, "step": 56420 }, { "epoch": 2.4309773011155618, "learning_rate": 8.636686849533496e-07, "loss": 4.0869, "step": 56440 }, { "epoch": 2.4318387388551495, "learning_rate": 8.636202029778706e-07, "loss": 4.0233, "step": 56460 }, { "epoch": 2.4327001765947367, "learning_rate": 8.635717210023918e-07, "loss": 4.2422, "step": 56480 }, { "epoch": 2.433561614334324, "learning_rate": 8.635232390269128e-07, "loss": 4.2122, "step": 56500 }, { "epoch": 2.434423052073911, "learning_rate": 8.634747570514339e-07, "loss": 4.0499, "step": 56520 }, { "epoch": 2.435284489813499, "learning_rate": 8.63426275075955e-07, "loss": 4.3853, "step": 56540 }, { "epoch": 2.436145927553086, "learning_rate": 8.633777931004762e-07, "loss": 4.4201, "step": 56560 }, { "epoch": 2.4370073652926734, "learning_rate": 8.633293111249973e-07, "loss": 4.1895, "step": 56580 }, { "epoch": 2.437868803032261, "learning_rate": 8.632808291495185e-07, "loss": 4.3688, "step": 56600 }, { "epoch": 2.4387302407718483, "learning_rate": 8.632323471740395e-07, "loss": 4.3445, "step": 56620 }, { "epoch": 2.4395916785114355, "learning_rate": 8.631838651985605e-07, "loss": 4.0405, "step": 56640 }, { "epoch": 2.440453116251023, "learning_rate": 8.631353832230817e-07, "loss": 4.1405, "step": 56660 }, { "epoch": 2.4413145539906105, "learning_rate": 8.630869012476028e-07, "loss": 4.1484, "step": 56680 }, { "epoch": 2.4421759917301977, "learning_rate": 8.630384192721239e-07, "loss": 4.2479, "step": 56700 }, { "epoch": 2.443037429469785, "learning_rate": 8.62989937296645e-07, "loss": 4.1576, "step": 56720 }, { "epoch": 2.4438988672093727, "learning_rate": 8.629414553211662e-07, "loss": 4.3753, "step": 56740 }, { "epoch": 2.44476030494896, "learning_rate": 8.628929733456872e-07, "loss": 4.4392, "step": 56760 }, { "epoch": 2.445621742688547, "learning_rate": 8.628444913702083e-07, "loss": 4.1372, "step": 56780 }, { "epoch": 2.4464831804281344, "learning_rate": 8.627960093947294e-07, "loss": 4.1564, "step": 56800 }, { "epoch": 2.447344618167722, "learning_rate": 8.627475274192505e-07, "loss": 4.1573, "step": 56820 }, { "epoch": 2.4482060559073093, "learning_rate": 8.626990454437717e-07, "loss": 4.0543, "step": 56840 }, { "epoch": 2.4490674936468966, "learning_rate": 8.626505634682928e-07, "loss": 4.077, "step": 56860 }, { "epoch": 2.4499289313864843, "learning_rate": 8.626020814928139e-07, "loss": 4.2958, "step": 56880 }, { "epoch": 2.4507903691260715, "learning_rate": 8.625535995173349e-07, "loss": 3.9361, "step": 56900 }, { "epoch": 2.4516518068656588, "learning_rate": 8.625051175418561e-07, "loss": 4.3521, "step": 56920 }, { "epoch": 2.452513244605246, "learning_rate": 8.624566355663772e-07, "loss": 4.432, "step": 56940 }, { "epoch": 2.4533746823448337, "learning_rate": 8.624081535908983e-07, "loss": 3.9227, "step": 56960 }, { "epoch": 2.454236120084421, "learning_rate": 8.623596716154194e-07, "loss": 4.1412, "step": 56980 }, { "epoch": 2.455097557824008, "learning_rate": 8.623111896399405e-07, "loss": 4.073, "step": 57000 }, { "epoch": 2.455958995563596, "learning_rate": 8.622627076644616e-07, "loss": 4.2698, "step": 57020 }, { "epoch": 2.456820433303183, "learning_rate": 8.622142256889827e-07, "loss": 4.2295, "step": 57040 }, { "epoch": 2.4576818710427704, "learning_rate": 8.621657437135038e-07, "loss": 4.3758, "step": 57060 }, { "epoch": 2.4585433087823576, "learning_rate": 8.621172617380249e-07, "loss": 4.4118, "step": 57080 }, { "epoch": 2.4594047465219453, "learning_rate": 8.620687797625461e-07, "loss": 4.3054, "step": 57100 }, { "epoch": 2.4602661842615325, "learning_rate": 8.620202977870671e-07, "loss": 4.1352, "step": 57120 }, { "epoch": 2.46112762200112, "learning_rate": 8.619718158115882e-07, "loss": 4.2934, "step": 57140 }, { "epoch": 2.4619890597407075, "learning_rate": 8.619233338361093e-07, "loss": 4.1653, "step": 57160 }, { "epoch": 2.4628504974802947, "learning_rate": 8.618748518606304e-07, "loss": 4.2276, "step": 57180 }, { "epoch": 2.463711935219882, "learning_rate": 8.618263698851515e-07, "loss": 4.1756, "step": 57200 }, { "epoch": 2.464573372959469, "learning_rate": 8.617778879096727e-07, "loss": 4.2888, "step": 57220 }, { "epoch": 2.465434810699057, "learning_rate": 8.617294059341938e-07, "loss": 4.1995, "step": 57240 }, { "epoch": 2.466296248438644, "learning_rate": 8.616809239587149e-07, "loss": 4.2819, "step": 57260 }, { "epoch": 2.4671576861782314, "learning_rate": 8.616324419832359e-07, "loss": 4.3963, "step": 57280 }, { "epoch": 2.468019123917819, "learning_rate": 8.615839600077571e-07, "loss": 4.3, "step": 57300 }, { "epoch": 2.4688805616574063, "learning_rate": 8.615354780322782e-07, "loss": 4.0509, "step": 57320 }, { "epoch": 2.4697419993969936, "learning_rate": 8.614869960567991e-07, "loss": 4.3817, "step": 57340 }, { "epoch": 2.470603437136581, "learning_rate": 8.614385140813204e-07, "loss": 4.2557, "step": 57360 }, { "epoch": 2.4714648748761685, "learning_rate": 8.613900321058416e-07, "loss": 4.3308, "step": 57380 }, { "epoch": 2.4723263126157558, "learning_rate": 8.613415501303626e-07, "loss": 4.3975, "step": 57400 }, { "epoch": 2.473187750355343, "learning_rate": 8.612930681548836e-07, "loss": 4.1042, "step": 57420 }, { "epoch": 2.4740491880949307, "learning_rate": 8.612445861794049e-07, "loss": 4.2089, "step": 57440 }, { "epoch": 2.474910625834518, "learning_rate": 8.611961042039259e-07, "loss": 4.1088, "step": 57460 }, { "epoch": 2.475772063574105, "learning_rate": 8.611476222284471e-07, "loss": 4.3419, "step": 57480 }, { "epoch": 2.4766335013136924, "learning_rate": 8.610991402529681e-07, "loss": 4.2666, "step": 57500 }, { "epoch": 2.47749493905328, "learning_rate": 8.610506582774892e-07, "loss": 4.252, "step": 57520 }, { "epoch": 2.4783563767928674, "learning_rate": 8.610021763020103e-07, "loss": 4.0077, "step": 57540 }, { "epoch": 2.4792178145324546, "learning_rate": 8.609536943265315e-07, "loss": 3.9771, "step": 57560 }, { "epoch": 2.480079252272042, "learning_rate": 8.609052123510525e-07, "loss": 4.2792, "step": 57580 }, { "epoch": 2.4809406900116295, "learning_rate": 8.608567303755737e-07, "loss": 4.3579, "step": 57600 }, { "epoch": 2.481802127751217, "learning_rate": 8.608082484000948e-07, "loss": 4.29, "step": 57620 }, { "epoch": 2.482663565490804, "learning_rate": 8.60759766424616e-07, "loss": 4.3093, "step": 57640 }, { "epoch": 2.4835250032303917, "learning_rate": 8.607112844491369e-07, "loss": 4.3589, "step": 57660 }, { "epoch": 2.484386440969979, "learning_rate": 8.606628024736581e-07, "loss": 4.4272, "step": 57680 }, { "epoch": 2.485247878709566, "learning_rate": 8.606143204981792e-07, "loss": 4.0898, "step": 57700 }, { "epoch": 2.4861093164491535, "learning_rate": 8.605658385227002e-07, "loss": 4.2245, "step": 57720 }, { "epoch": 2.486970754188741, "learning_rate": 8.605173565472214e-07, "loss": 4.4094, "step": 57740 }, { "epoch": 2.4878321919283284, "learning_rate": 8.604688745717426e-07, "loss": 4.1925, "step": 57760 }, { "epoch": 2.4886936296679156, "learning_rate": 8.604203925962636e-07, "loss": 4.2971, "step": 57780 }, { "epoch": 2.489555067407503, "learning_rate": 8.603719106207846e-07, "loss": 4.3134, "step": 57800 }, { "epoch": 2.4904165051470906, "learning_rate": 8.603234286453058e-07, "loss": 4.0966, "step": 57820 }, { "epoch": 2.491277942886678, "learning_rate": 8.60274946669827e-07, "loss": 4.1384, "step": 57840 }, { "epoch": 2.492139380626265, "learning_rate": 8.60226464694348e-07, "loss": 4.3345, "step": 57860 }, { "epoch": 2.4930008183658527, "learning_rate": 8.601779827188691e-07, "loss": 4.0805, "step": 57880 }, { "epoch": 2.49386225610544, "learning_rate": 8.601295007433902e-07, "loss": 4.3075, "step": 57900 }, { "epoch": 2.4947236938450272, "learning_rate": 8.600810187679114e-07, "loss": 3.9731, "step": 57920 }, { "epoch": 2.4955851315846145, "learning_rate": 8.600325367924325e-07, "loss": 4.2603, "step": 57940 }, { "epoch": 2.496446569324202, "learning_rate": 8.599840548169535e-07, "loss": 4.26, "step": 57960 }, { "epoch": 2.4973080070637894, "learning_rate": 8.599355728414747e-07, "loss": 3.9662, "step": 57980 }, { "epoch": 2.4981694448033767, "learning_rate": 8.598870908659958e-07, "loss": 4.4137, "step": 58000 }, { "epoch": 2.4990308825429643, "learning_rate": 8.59838608890517e-07, "loss": 4.3187, "step": 58020 }, { "epoch": 2.4998923202825516, "learning_rate": 8.597901269150379e-07, "loss": 4.3463, "step": 58040 }, { "epoch": 2.500753758022139, "learning_rate": 8.597416449395591e-07, "loss": 4.1218, "step": 58060 }, { "epoch": 2.501615195761726, "learning_rate": 8.596931629640802e-07, "loss": 4.2526, "step": 58080 }, { "epoch": 2.5024766335013138, "learning_rate": 8.596446809886013e-07, "loss": 4.4045, "step": 58100 }, { "epoch": 2.503338071240901, "learning_rate": 8.595961990131224e-07, "loss": 4.1303, "step": 58120 }, { "epoch": 2.5041995089804887, "learning_rate": 8.595477170376436e-07, "loss": 4.0786, "step": 58140 }, { "epoch": 2.505060946720076, "learning_rate": 8.594992350621646e-07, "loss": 4.2704, "step": 58160 }, { "epoch": 2.505922384459663, "learning_rate": 8.594507530866857e-07, "loss": 4.2644, "step": 58180 }, { "epoch": 2.5067838221992504, "learning_rate": 8.594022711112068e-07, "loss": 4.4116, "step": 58200 }, { "epoch": 2.5076452599388377, "learning_rate": 8.59353789135728e-07, "loss": 4.0617, "step": 58220 }, { "epoch": 2.5085066976784254, "learning_rate": 8.593053071602491e-07, "loss": 4.3488, "step": 58240 }, { "epoch": 2.5093681354180126, "learning_rate": 8.592568251847701e-07, "loss": 4.213, "step": 58260 }, { "epoch": 2.5102295731576, "learning_rate": 8.592083432092912e-07, "loss": 4.5169, "step": 58280 }, { "epoch": 2.5110910108971876, "learning_rate": 8.591598612338124e-07, "loss": 4.0503, "step": 58300 }, { "epoch": 2.511952448636775, "learning_rate": 8.591113792583334e-07, "loss": 3.8825, "step": 58320 }, { "epoch": 2.512813886376362, "learning_rate": 8.590628972828545e-07, "loss": 4.281, "step": 58340 }, { "epoch": 2.5136753241159493, "learning_rate": 8.590144153073757e-07, "loss": 4.2192, "step": 58360 }, { "epoch": 2.514536761855537, "learning_rate": 8.58965933331897e-07, "loss": 4.2605, "step": 58380 }, { "epoch": 2.5153981995951242, "learning_rate": 8.58917451356418e-07, "loss": 4.209, "step": 58400 }, { "epoch": 2.5162596373347115, "learning_rate": 8.588689693809389e-07, "loss": 4.2857, "step": 58420 }, { "epoch": 2.517121075074299, "learning_rate": 8.588204874054601e-07, "loss": 4.1435, "step": 58440 }, { "epoch": 2.5179825128138864, "learning_rate": 8.587720054299813e-07, "loss": 4.3966, "step": 58460 }, { "epoch": 2.5188439505534737, "learning_rate": 8.587235234545023e-07, "loss": 4.1411, "step": 58480 }, { "epoch": 2.519705388293061, "learning_rate": 8.586750414790234e-07, "loss": 4.2864, "step": 58500 }, { "epoch": 2.5205668260326486, "learning_rate": 8.586265595035446e-07, "loss": 4.035, "step": 58520 }, { "epoch": 2.521428263772236, "learning_rate": 8.585780775280656e-07, "loss": 4.0988, "step": 58540 }, { "epoch": 2.522289701511823, "learning_rate": 8.585295955525867e-07, "loss": 4.2534, "step": 58560 }, { "epoch": 2.5231511392514108, "learning_rate": 8.584811135771078e-07, "loss": 4.1708, "step": 58580 }, { "epoch": 2.524012576990998, "learning_rate": 8.58432631601629e-07, "loss": 4.3406, "step": 58600 }, { "epoch": 2.5248740147305853, "learning_rate": 8.5838414962615e-07, "loss": 4.4411, "step": 58620 }, { "epoch": 2.5257354524701725, "learning_rate": 8.583356676506712e-07, "loss": 4.1217, "step": 58640 }, { "epoch": 2.52659689020976, "learning_rate": 8.582871856751923e-07, "loss": 4.4278, "step": 58660 }, { "epoch": 2.5274583279493474, "learning_rate": 8.582387036997134e-07, "loss": 4.3738, "step": 58680 }, { "epoch": 2.5283197656889347, "learning_rate": 8.581902217242345e-07, "loss": 4.1657, "step": 58700 }, { "epoch": 2.5291812034285224, "learning_rate": 8.581417397487556e-07, "loss": 4.0269, "step": 58720 }, { "epoch": 2.5300426411681096, "learning_rate": 8.580932577732767e-07, "loss": 4.2034, "step": 58740 }, { "epoch": 2.530904078907697, "learning_rate": 8.580447757977979e-07, "loss": 4.26, "step": 58760 }, { "epoch": 2.531765516647284, "learning_rate": 8.579962938223189e-07, "loss": 4.0627, "step": 58780 }, { "epoch": 2.532626954386872, "learning_rate": 8.579478118468399e-07, "loss": 4.4826, "step": 58800 }, { "epoch": 2.533488392126459, "learning_rate": 8.578993298713611e-07, "loss": 4.2246, "step": 58820 }, { "epoch": 2.5343498298660463, "learning_rate": 8.578508478958823e-07, "loss": 4.0814, "step": 58840 }, { "epoch": 2.535211267605634, "learning_rate": 8.578023659204033e-07, "loss": 4.4911, "step": 58860 }, { "epoch": 2.5360727053452212, "learning_rate": 8.577538839449244e-07, "loss": 4.3088, "step": 58880 }, { "epoch": 2.5369341430848085, "learning_rate": 8.577054019694456e-07, "loss": 4.1783, "step": 58900 }, { "epoch": 2.5377955808243957, "learning_rate": 8.576569199939667e-07, "loss": 4.0773, "step": 58920 }, { "epoch": 2.5386570185639834, "learning_rate": 8.576084380184877e-07, "loss": 4.2445, "step": 58940 }, { "epoch": 2.5395184563035706, "learning_rate": 8.575599560430088e-07, "loss": 4.4122, "step": 58960 }, { "epoch": 2.540379894043158, "learning_rate": 8.5751147406753e-07, "loss": 4.0924, "step": 58980 }, { "epoch": 2.5412413317827456, "learning_rate": 8.574629920920512e-07, "loss": 4.169, "step": 59000 }, { "epoch": 2.542102769522333, "learning_rate": 8.574145101165722e-07, "loss": 4.268, "step": 59020 }, { "epoch": 2.54296420726192, "learning_rate": 8.573660281410933e-07, "loss": 4.3032, "step": 59040 }, { "epoch": 2.5438256450015073, "learning_rate": 8.573175461656144e-07, "loss": 4.2648, "step": 59060 }, { "epoch": 2.544687082741095, "learning_rate": 8.572690641901354e-07, "loss": 4.4249, "step": 59080 }, { "epoch": 2.5455485204806823, "learning_rate": 8.572205822146566e-07, "loss": 4.3171, "step": 59100 }, { "epoch": 2.5464099582202695, "learning_rate": 8.571721002391776e-07, "loss": 4.2584, "step": 59120 }, { "epoch": 2.547271395959857, "learning_rate": 8.571236182636989e-07, "loss": 4.3728, "step": 59140 }, { "epoch": 2.5481328336994444, "learning_rate": 8.570751362882199e-07, "loss": 4.3094, "step": 59160 }, { "epoch": 2.5489942714390317, "learning_rate": 8.57026654312741e-07, "loss": 3.9504, "step": 59180 }, { "epoch": 2.549855709178619, "learning_rate": 8.569781723372621e-07, "loss": 4.1956, "step": 59200 }, { "epoch": 2.5507171469182066, "learning_rate": 8.569296903617834e-07, "loss": 4.2037, "step": 59220 }, { "epoch": 2.551578584657794, "learning_rate": 8.568812083863043e-07, "loss": 4.1132, "step": 59240 }, { "epoch": 2.552440022397381, "learning_rate": 8.568327264108255e-07, "loss": 4.3493, "step": 59260 }, { "epoch": 2.553301460136969, "learning_rate": 8.567842444353466e-07, "loss": 4.361, "step": 59280 }, { "epoch": 2.554162897876556, "learning_rate": 8.567357624598676e-07, "loss": 4.1195, "step": 59300 }, { "epoch": 2.5550243356161433, "learning_rate": 8.566872804843887e-07, "loss": 4.1201, "step": 59320 }, { "epoch": 2.5558857733557305, "learning_rate": 8.566387985089098e-07, "loss": 4.3441, "step": 59340 }, { "epoch": 2.556747211095318, "learning_rate": 8.56590316533431e-07, "loss": 4.1668, "step": 59360 }, { "epoch": 2.5576086488349055, "learning_rate": 8.565418345579521e-07, "loss": 4.391, "step": 59380 }, { "epoch": 2.5584700865744927, "learning_rate": 8.564933525824732e-07, "loss": 4.1836, "step": 59400 }, { "epoch": 2.5593315243140804, "learning_rate": 8.564448706069943e-07, "loss": 4.0879, "step": 59420 }, { "epoch": 2.5601929620536676, "learning_rate": 8.563963886315154e-07, "loss": 4.187, "step": 59440 }, { "epoch": 2.561054399793255, "learning_rate": 8.563479066560365e-07, "loss": 4.1479, "step": 59460 }, { "epoch": 2.561915837532842, "learning_rate": 8.562994246805576e-07, "loss": 4.1809, "step": 59480 }, { "epoch": 2.56277727527243, "learning_rate": 8.562509427050787e-07, "loss": 4.191, "step": 59500 }, { "epoch": 2.563638713012017, "learning_rate": 8.562024607295999e-07, "loss": 4.4552, "step": 59520 }, { "epoch": 2.5645001507516043, "learning_rate": 8.56153978754121e-07, "loss": 4.3285, "step": 59540 }, { "epoch": 2.565361588491192, "learning_rate": 8.56105496778642e-07, "loss": 4.2826, "step": 59560 }, { "epoch": 2.5662230262307792, "learning_rate": 8.560570148031631e-07, "loss": 4.0214, "step": 59580 }, { "epoch": 2.5670844639703665, "learning_rate": 8.560085328276842e-07, "loss": 4.0768, "step": 59600 }, { "epoch": 2.5679459017099537, "learning_rate": 8.559600508522053e-07, "loss": 4.1235, "step": 59620 }, { "epoch": 2.5688073394495414, "learning_rate": 8.559115688767264e-07, "loss": 4.2986, "step": 59640 }, { "epoch": 2.5696687771891287, "learning_rate": 8.558630869012476e-07, "loss": 3.9553, "step": 59660 }, { "epoch": 2.570530214928716, "learning_rate": 8.558146049257686e-07, "loss": 4.144, "step": 59680 }, { "epoch": 2.5713916526683036, "learning_rate": 8.557661229502897e-07, "loss": 4.1962, "step": 59700 }, { "epoch": 2.572253090407891, "learning_rate": 8.557176409748109e-07, "loss": 4.3642, "step": 59720 }, { "epoch": 2.573114528147478, "learning_rate": 8.55669158999332e-07, "loss": 4.0517, "step": 59740 }, { "epoch": 2.5739759658870653, "learning_rate": 8.556206770238531e-07, "loss": 4.0824, "step": 59760 }, { "epoch": 2.574837403626653, "learning_rate": 8.555721950483742e-07, "loss": 4.178, "step": 59780 }, { "epoch": 2.5756988413662403, "learning_rate": 8.555237130728954e-07, "loss": 4.347, "step": 59800 }, { "epoch": 2.5765602791058275, "learning_rate": 8.554752310974164e-07, "loss": 4.3382, "step": 59820 }, { "epoch": 2.577421716845415, "learning_rate": 8.554267491219375e-07, "loss": 4.1127, "step": 59840 }, { "epoch": 2.5782831545850025, "learning_rate": 8.553782671464586e-07, "loss": 4.0289, "step": 59860 }, { "epoch": 2.5791445923245897, "learning_rate": 8.553297851709797e-07, "loss": 4.1871, "step": 59880 }, { "epoch": 2.580006030064177, "learning_rate": 8.552813031955009e-07, "loss": 4.2729, "step": 59900 }, { "epoch": 2.5808674678037646, "learning_rate": 8.55232821220022e-07, "loss": 4.1083, "step": 59920 }, { "epoch": 2.581728905543352, "learning_rate": 8.55184339244543e-07, "loss": 4.0924, "step": 59940 }, { "epoch": 2.582590343282939, "learning_rate": 8.55135857269064e-07, "loss": 4.134, "step": 59960 }, { "epoch": 2.583451781022527, "learning_rate": 8.550873752935852e-07, "loss": 4.1005, "step": 59980 }, { "epoch": 2.584313218762114, "learning_rate": 8.550388933181064e-07, "loss": 4.0589, "step": 60000 }, { "epoch": 2.5851746565017013, "learning_rate": 8.549904113426275e-07, "loss": 4.3462, "step": 60020 }, { "epoch": 2.5860360942412886, "learning_rate": 8.549419293671486e-07, "loss": 4.1581, "step": 60040 }, { "epoch": 2.5868975319808762, "learning_rate": 8.548934473916696e-07, "loss": 4.3692, "step": 60060 }, { "epoch": 2.5877589697204635, "learning_rate": 8.548449654161908e-07, "loss": 4.0618, "step": 60080 }, { "epoch": 2.5886204074600507, "learning_rate": 8.547964834407119e-07, "loss": 4.3576, "step": 60100 }, { "epoch": 2.5894818451996384, "learning_rate": 8.54748001465233e-07, "loss": 4.3794, "step": 60120 }, { "epoch": 2.5903432829392257, "learning_rate": 8.546995194897541e-07, "loss": 4.1958, "step": 60140 }, { "epoch": 2.591204720678813, "learning_rate": 8.546510375142752e-07, "loss": 4.2342, "step": 60160 }, { "epoch": 2.5920661584184, "learning_rate": 8.546025555387964e-07, "loss": 4.2702, "step": 60180 }, { "epoch": 2.592927596157988, "learning_rate": 8.545540735633173e-07, "loss": 4.0517, "step": 60200 }, { "epoch": 2.593789033897575, "learning_rate": 8.545055915878385e-07, "loss": 4.1766, "step": 60220 }, { "epoch": 2.5946504716371623, "learning_rate": 8.544571096123596e-07, "loss": 4.0639, "step": 60240 }, { "epoch": 2.59551190937675, "learning_rate": 8.544086276368808e-07, "loss": 4.3518, "step": 60260 }, { "epoch": 2.5963733471163373, "learning_rate": 8.543601456614018e-07, "loss": 4.4055, "step": 60280 }, { "epoch": 2.5972347848559245, "learning_rate": 8.54311663685923e-07, "loss": 4.3399, "step": 60300 }, { "epoch": 2.5980962225955118, "learning_rate": 8.54263181710444e-07, "loss": 4.1884, "step": 60320 }, { "epoch": 2.598957660335099, "learning_rate": 8.542146997349652e-07, "loss": 4.3731, "step": 60340 }, { "epoch": 2.5998190980746867, "learning_rate": 8.541662177594862e-07, "loss": 4.5374, "step": 60360 }, { "epoch": 2.600680535814274, "learning_rate": 8.541177357840074e-07, "loss": 4.1441, "step": 60380 }, { "epoch": 2.6015419735538616, "learning_rate": 8.540692538085285e-07, "loss": 4.2451, "step": 60400 }, { "epoch": 2.602403411293449, "learning_rate": 8.540207718330496e-07, "loss": 4.172, "step": 60420 }, { "epoch": 2.603264849033036, "learning_rate": 8.539722898575707e-07, "loss": 4.1974, "step": 60440 }, { "epoch": 2.6041262867726234, "learning_rate": 8.539238078820918e-07, "loss": 4.2203, "step": 60460 }, { "epoch": 2.6049877245122106, "learning_rate": 8.53875325906613e-07, "loss": 4.241, "step": 60480 }, { "epoch": 2.6058491622517983, "learning_rate": 8.53826843931134e-07, "loss": 4.2997, "step": 60500 }, { "epoch": 2.6067105999913855, "learning_rate": 8.537783619556551e-07, "loss": 4.0746, "step": 60520 }, { "epoch": 2.6075720377309732, "learning_rate": 8.537298799801763e-07, "loss": 4.1195, "step": 60540 }, { "epoch": 2.6084334754705605, "learning_rate": 8.536813980046974e-07, "loss": 4.2102, "step": 60560 }, { "epoch": 2.6092949132101477, "learning_rate": 8.536329160292183e-07, "loss": 4.1293, "step": 60580 }, { "epoch": 2.610156350949735, "learning_rate": 8.535844340537395e-07, "loss": 4.1909, "step": 60600 }, { "epoch": 2.611017788689322, "learning_rate": 8.535359520782607e-07, "loss": 4.0423, "step": 60620 }, { "epoch": 2.61187922642891, "learning_rate": 8.534874701027818e-07, "loss": 4.2171, "step": 60640 }, { "epoch": 2.612740664168497, "learning_rate": 8.534389881273028e-07, "loss": 3.9872, "step": 60660 }, { "epoch": 2.613602101908085, "learning_rate": 8.53390506151824e-07, "loss": 4.1679, "step": 60680 }, { "epoch": 2.614463539647672, "learning_rate": 8.53342024176345e-07, "loss": 4.2011, "step": 60700 }, { "epoch": 2.6153249773872593, "learning_rate": 8.532935422008662e-07, "loss": 4.1579, "step": 60720 }, { "epoch": 2.6161864151268466, "learning_rate": 8.532450602253872e-07, "loss": 4.3445, "step": 60740 }, { "epoch": 2.617047852866434, "learning_rate": 8.531965782499084e-07, "loss": 4.2757, "step": 60760 }, { "epoch": 2.6179092906060215, "learning_rate": 8.531480962744295e-07, "loss": 4.1861, "step": 60780 }, { "epoch": 2.6187707283456088, "learning_rate": 8.530996142989507e-07, "loss": 4.3216, "step": 60800 }, { "epoch": 2.6196321660851964, "learning_rate": 8.530511323234717e-07, "loss": 4.3104, "step": 60820 }, { "epoch": 2.6204936038247837, "learning_rate": 8.530026503479928e-07, "loss": 4.31, "step": 60840 }, { "epoch": 2.621355041564371, "learning_rate": 8.529541683725139e-07, "loss": 4.2184, "step": 60860 }, { "epoch": 2.622216479303958, "learning_rate": 8.52905686397035e-07, "loss": 4.0942, "step": 60880 }, { "epoch": 2.6230779170435454, "learning_rate": 8.52857204421556e-07, "loss": 4.3643, "step": 60900 }, { "epoch": 2.623939354783133, "learning_rate": 8.528087224460773e-07, "loss": 3.9566, "step": 60920 }, { "epoch": 2.6248007925227204, "learning_rate": 8.527602404705984e-07, "loss": 4.2772, "step": 60940 }, { "epoch": 2.625662230262308, "learning_rate": 8.527117584951193e-07, "loss": 4.1774, "step": 60960 }, { "epoch": 2.6265236680018953, "learning_rate": 8.526632765196405e-07, "loss": 4.1943, "step": 60980 }, { "epoch": 2.6273851057414825, "learning_rate": 8.526147945441618e-07, "loss": 4.1812, "step": 61000 }, { "epoch": 2.62824654348107, "learning_rate": 8.525663125686828e-07, "loss": 4.3691, "step": 61020 }, { "epoch": 2.629107981220657, "learning_rate": 8.525178305932038e-07, "loss": 4.3042, "step": 61040 }, { "epoch": 2.6299694189602447, "learning_rate": 8.52469348617725e-07, "loss": 4.2198, "step": 61060 }, { "epoch": 2.630830856699832, "learning_rate": 8.524208666422461e-07, "loss": 4.1068, "step": 61080 }, { "epoch": 2.6316922944394197, "learning_rate": 8.523723846667672e-07, "loss": 4.1193, "step": 61100 }, { "epoch": 2.632553732179007, "learning_rate": 8.523239026912882e-07, "loss": 4.4358, "step": 61120 }, { "epoch": 2.633415169918594, "learning_rate": 8.522754207158094e-07, "loss": 4.2313, "step": 61140 }, { "epoch": 2.6342766076581814, "learning_rate": 8.522269387403306e-07, "loss": 4.2971, "step": 61160 }, { "epoch": 2.6351380453977686, "learning_rate": 8.521784567648516e-07, "loss": 4.2572, "step": 61180 }, { "epoch": 2.6359994831373563, "learning_rate": 8.521299747893727e-07, "loss": 4.2868, "step": 61200 }, { "epoch": 2.6368609208769436, "learning_rate": 8.520814928138938e-07, "loss": 4.1524, "step": 61220 }, { "epoch": 2.6377223586165313, "learning_rate": 8.520330108384149e-07, "loss": 4.267, "step": 61240 }, { "epoch": 2.6385837963561185, "learning_rate": 8.51984528862936e-07, "loss": 4.1479, "step": 61260 }, { "epoch": 2.6394452340957058, "learning_rate": 8.519360468874571e-07, "loss": 4.1027, "step": 61280 }, { "epoch": 2.640306671835293, "learning_rate": 8.518875649119783e-07, "loss": 4.0376, "step": 61300 }, { "epoch": 2.6411681095748802, "learning_rate": 8.518390829364994e-07, "loss": 4.268, "step": 61320 }, { "epoch": 2.642029547314468, "learning_rate": 8.517906009610204e-07, "loss": 4.4516, "step": 61340 }, { "epoch": 2.642890985054055, "learning_rate": 8.517421189855415e-07, "loss": 4.144, "step": 61360 }, { "epoch": 2.643752422793643, "learning_rate": 8.516936370100627e-07, "loss": 4.213, "step": 61380 }, { "epoch": 2.64461386053323, "learning_rate": 8.516451550345838e-07, "loss": 4.2411, "step": 61400 }, { "epoch": 2.6454752982728174, "learning_rate": 8.515966730591048e-07, "loss": 4.125, "step": 61420 }, { "epoch": 2.6463367360124046, "learning_rate": 8.51548191083626e-07, "loss": 4.2793, "step": 61440 }, { "epoch": 2.647198173751992, "learning_rate": 8.514997091081471e-07, "loss": 4.1936, "step": 61460 }, { "epoch": 2.6480596114915795, "learning_rate": 8.514512271326681e-07, "loss": 4.3829, "step": 61480 }, { "epoch": 2.648921049231167, "learning_rate": 8.514027451571892e-07, "loss": 4.0414, "step": 61500 }, { "epoch": 2.649782486970754, "learning_rate": 8.513542631817104e-07, "loss": 4.3418, "step": 61520 }, { "epoch": 2.6506439247103417, "learning_rate": 8.513057812062316e-07, "loss": 4.2777, "step": 61540 }, { "epoch": 2.651505362449929, "learning_rate": 8.512572992307526e-07, "loss": 4.075, "step": 61560 }, { "epoch": 2.652366800189516, "learning_rate": 8.512088172552737e-07, "loss": 4.2111, "step": 61580 }, { "epoch": 2.6532282379291035, "learning_rate": 8.511603352797948e-07, "loss": 4.0313, "step": 61600 }, { "epoch": 2.654089675668691, "learning_rate": 8.51111853304316e-07, "loss": 4.1522, "step": 61620 }, { "epoch": 2.6549511134082784, "learning_rate": 8.51063371328837e-07, "loss": 4.1889, "step": 61640 }, { "epoch": 2.6558125511478656, "learning_rate": 8.510148893533581e-07, "loss": 4.0174, "step": 61660 }, { "epoch": 2.6566739888874533, "learning_rate": 8.509664073778793e-07, "loss": 4.1852, "step": 61680 }, { "epoch": 2.6575354266270406, "learning_rate": 8.509179254024005e-07, "loss": 4.2706, "step": 61700 }, { "epoch": 2.658396864366628, "learning_rate": 8.508694434269214e-07, "loss": 4.218, "step": 61720 }, { "epoch": 2.659258302106215, "learning_rate": 8.508209614514424e-07, "loss": 4.1876, "step": 61740 }, { "epoch": 2.6601197398458027, "learning_rate": 8.507724794759637e-07, "loss": 4.1555, "step": 61760 }, { "epoch": 2.66098117758539, "learning_rate": 8.507239975004847e-07, "loss": 3.943, "step": 61780 }, { "epoch": 2.6618426153249772, "learning_rate": 8.506755155250059e-07, "loss": 4.1855, "step": 61800 }, { "epoch": 2.662704053064565, "learning_rate": 8.50627033549527e-07, "loss": 4.0555, "step": 61820 }, { "epoch": 2.663565490804152, "learning_rate": 8.505785515740481e-07, "loss": 4.0709, "step": 61840 }, { "epoch": 2.6644269285437394, "learning_rate": 8.505300695985691e-07, "loss": 4.2282, "step": 61860 }, { "epoch": 2.6652883662833267, "learning_rate": 8.504815876230903e-07, "loss": 4.0509, "step": 61880 }, { "epoch": 2.6661498040229143, "learning_rate": 8.504331056476114e-07, "loss": 4.0596, "step": 61900 }, { "epoch": 2.6670112417625016, "learning_rate": 8.503846236721326e-07, "loss": 4.1527, "step": 61920 }, { "epoch": 2.667872679502089, "learning_rate": 8.503361416966536e-07, "loss": 4.3536, "step": 61940 }, { "epoch": 2.6687341172416765, "learning_rate": 8.502876597211748e-07, "loss": 4.0764, "step": 61960 }, { "epoch": 2.6695955549812638, "learning_rate": 8.502391777456958e-07, "loss": 4.1755, "step": 61980 }, { "epoch": 2.670456992720851, "learning_rate": 8.50190695770217e-07, "loss": 4.2831, "step": 62000 }, { "epoch": 2.6713184304604383, "learning_rate": 8.50142213794738e-07, "loss": 4.3237, "step": 62020 }, { "epoch": 2.672179868200026, "learning_rate": 8.500937318192591e-07, "loss": 4.186, "step": 62040 }, { "epoch": 2.673041305939613, "learning_rate": 8.500452498437803e-07, "loss": 4.3383, "step": 62060 }, { "epoch": 2.6739027436792004, "learning_rate": 8.499967678683015e-07, "loss": 3.7935, "step": 62080 }, { "epoch": 2.674764181418788, "learning_rate": 8.499482858928224e-07, "loss": 4.1606, "step": 62100 }, { "epoch": 2.6756256191583754, "learning_rate": 8.498998039173435e-07, "loss": 4.2875, "step": 62120 }, { "epoch": 2.6764870568979626, "learning_rate": 8.498513219418647e-07, "loss": 4.3184, "step": 62140 }, { "epoch": 2.67734849463755, "learning_rate": 8.498028399663858e-07, "loss": 4.2195, "step": 62160 }, { "epoch": 2.6782099323771376, "learning_rate": 8.497543579909069e-07, "loss": 4.3262, "step": 62180 }, { "epoch": 2.679071370116725, "learning_rate": 8.49705876015428e-07, "loss": 4.2036, "step": 62200 }, { "epoch": 2.679932807856312, "learning_rate": 8.496573940399492e-07, "loss": 4.1811, "step": 62220 }, { "epoch": 2.6807942455958997, "learning_rate": 8.496089120644702e-07, "loss": 4.2556, "step": 62240 }, { "epoch": 2.681655683335487, "learning_rate": 8.495604300889914e-07, "loss": 4.1667, "step": 62260 }, { "epoch": 2.6825171210750742, "learning_rate": 8.495119481135124e-07, "loss": 4.0655, "step": 62280 }, { "epoch": 2.6833785588146615, "learning_rate": 8.494634661380336e-07, "loss": 4.0451, "step": 62300 }, { "epoch": 2.684239996554249, "learning_rate": 8.494149841625546e-07, "loss": 4.3132, "step": 62320 }, { "epoch": 2.6851014342938364, "learning_rate": 8.493665021870758e-07, "loss": 4.3183, "step": 62340 }, { "epoch": 2.6859628720334237, "learning_rate": 8.493180202115968e-07, "loss": 4.0798, "step": 62360 }, { "epoch": 2.6868243097730113, "learning_rate": 8.49269538236118e-07, "loss": 4.362, "step": 62380 }, { "epoch": 2.6876857475125986, "learning_rate": 8.49221056260639e-07, "loss": 4.1945, "step": 62400 }, { "epoch": 2.688547185252186, "learning_rate": 8.491725742851602e-07, "loss": 4.2235, "step": 62420 }, { "epoch": 2.689408622991773, "learning_rate": 8.491240923096813e-07, "loss": 4.1094, "step": 62440 }, { "epoch": 2.6902700607313608, "learning_rate": 8.490756103342024e-07, "loss": 4.0557, "step": 62460 }, { "epoch": 2.691131498470948, "learning_rate": 8.490271283587234e-07, "loss": 4.2701, "step": 62480 }, { "epoch": 2.6919929362105353, "learning_rate": 8.489786463832446e-07, "loss": 4.1211, "step": 62500 }, { "epoch": 2.692854373950123, "learning_rate": 8.489301644077657e-07, "loss": 4.2872, "step": 62520 }, { "epoch": 2.69371581168971, "learning_rate": 8.488816824322868e-07, "loss": 4.1406, "step": 62540 }, { "epoch": 2.6945772494292974, "learning_rate": 8.488332004568079e-07, "loss": 4.2378, "step": 62560 }, { "epoch": 2.6954386871688847, "learning_rate": 8.48784718481329e-07, "loss": 4.2389, "step": 62580 }, { "epoch": 2.6963001249084724, "learning_rate": 8.487362365058502e-07, "loss": 4.1908, "step": 62600 }, { "epoch": 2.6971615626480596, "learning_rate": 8.486877545303712e-07, "loss": 4.269, "step": 62620 }, { "epoch": 2.698023000387647, "learning_rate": 8.486392725548923e-07, "loss": 4.0371, "step": 62640 }, { "epoch": 2.6988844381272346, "learning_rate": 8.485907905794134e-07, "loss": 4.2435, "step": 62660 }, { "epoch": 2.699745875866822, "learning_rate": 8.485423086039344e-07, "loss": 4.1919, "step": 62680 }, { "epoch": 2.700607313606409, "learning_rate": 8.484938266284557e-07, "loss": 4.0822, "step": 62700 }, { "epoch": 2.7014687513459963, "learning_rate": 8.484453446529768e-07, "loss": 4.2258, "step": 62720 }, { "epoch": 2.702330189085584, "learning_rate": 8.483968626774978e-07, "loss": 4.0884, "step": 62740 }, { "epoch": 2.7031916268251712, "learning_rate": 8.483483807020189e-07, "loss": 4.2268, "step": 62760 }, { "epoch": 2.7040530645647585, "learning_rate": 8.482998987265402e-07, "loss": 4.0974, "step": 62780 }, { "epoch": 2.704914502304346, "learning_rate": 8.482514167510612e-07, "loss": 4.2737, "step": 62800 }, { "epoch": 2.7057759400439334, "learning_rate": 8.482029347755823e-07, "loss": 4.3173, "step": 62820 }, { "epoch": 2.7066373777835206, "learning_rate": 8.481544528001034e-07, "loss": 4.0712, "step": 62840 }, { "epoch": 2.707498815523108, "learning_rate": 8.481059708246245e-07, "loss": 4.4412, "step": 62860 }, { "epoch": 2.7083602532626956, "learning_rate": 8.480574888491456e-07, "loss": 4.271, "step": 62880 }, { "epoch": 2.709221691002283, "learning_rate": 8.480090068736667e-07, "loss": 4.1806, "step": 62900 }, { "epoch": 2.71008312874187, "learning_rate": 8.479605248981878e-07, "loss": 4.0364, "step": 62920 }, { "epoch": 2.7109445664814578, "learning_rate": 8.479120429227089e-07, "loss": 4.1013, "step": 62940 }, { "epoch": 2.711806004221045, "learning_rate": 8.478635609472301e-07, "loss": 4.0226, "step": 62960 }, { "epoch": 2.7126674419606323, "learning_rate": 8.478150789717512e-07, "loss": 4.1229, "step": 62980 }, { "epoch": 2.7135288797002195, "learning_rate": 8.477665969962722e-07, "loss": 4.1475, "step": 63000 }, { "epoch": 2.714390317439807, "learning_rate": 8.477181150207933e-07, "loss": 4.1741, "step": 63020 }, { "epoch": 2.7152517551793944, "learning_rate": 8.476696330453145e-07, "loss": 4.3025, "step": 63040 }, { "epoch": 2.7161131929189817, "learning_rate": 8.476211510698355e-07, "loss": 4.2863, "step": 63060 }, { "epoch": 2.7169746306585694, "learning_rate": 8.475726690943567e-07, "loss": 4.1415, "step": 63080 }, { "epoch": 2.7178360683981566, "learning_rate": 8.475241871188778e-07, "loss": 4.1979, "step": 63100 }, { "epoch": 2.718697506137744, "learning_rate": 8.474757051433988e-07, "loss": 4.2951, "step": 63120 }, { "epoch": 2.719558943877331, "learning_rate": 8.474272231679199e-07, "loss": 4.0424, "step": 63140 }, { "epoch": 2.720420381616919, "learning_rate": 8.473787411924411e-07, "loss": 4.2505, "step": 63160 }, { "epoch": 2.721281819356506, "learning_rate": 8.473302592169622e-07, "loss": 4.4026, "step": 63180 }, { "epoch": 2.7221432570960933, "learning_rate": 8.472817772414833e-07, "loss": 4.0589, "step": 63200 }, { "epoch": 2.723004694835681, "learning_rate": 8.472332952660044e-07, "loss": 4.1424, "step": 63220 }, { "epoch": 2.723866132575268, "learning_rate": 8.471848132905255e-07, "loss": 4.256, "step": 63240 }, { "epoch": 2.7247275703148555, "learning_rate": 8.471363313150466e-07, "loss": 4.1204, "step": 63260 }, { "epoch": 2.7255890080544427, "learning_rate": 8.470878493395676e-07, "loss": 4.1596, "step": 63280 }, { "epoch": 2.7264504457940304, "learning_rate": 8.470393673640888e-07, "loss": 4.0859, "step": 63300 }, { "epoch": 2.7273118835336176, "learning_rate": 8.4699088538861e-07, "loss": 4.032, "step": 63320 }, { "epoch": 2.728173321273205, "learning_rate": 8.469424034131311e-07, "loss": 4.3392, "step": 63340 }, { "epoch": 2.7290347590127926, "learning_rate": 8.468939214376521e-07, "loss": 4.18, "step": 63360 }, { "epoch": 2.72989619675238, "learning_rate": 8.468454394621732e-07, "loss": 4.2426, "step": 63380 }, { "epoch": 2.730757634491967, "learning_rate": 8.467969574866943e-07, "loss": 4.1187, "step": 63400 }, { "epoch": 2.7316190722315543, "learning_rate": 8.467484755112155e-07, "loss": 4.1211, "step": 63420 }, { "epoch": 2.732480509971142, "learning_rate": 8.466999935357365e-07, "loss": 4.1815, "step": 63440 }, { "epoch": 2.7333419477107292, "learning_rate": 8.466515115602577e-07, "loss": 4.1454, "step": 63460 }, { "epoch": 2.7342033854503165, "learning_rate": 8.466030295847788e-07, "loss": 4.381, "step": 63480 }, { "epoch": 2.735064823189904, "learning_rate": 8.465545476092999e-07, "loss": 4.3725, "step": 63500 }, { "epoch": 2.7359262609294914, "learning_rate": 8.465060656338208e-07, "loss": 4.0545, "step": 63520 }, { "epoch": 2.7367876986690787, "learning_rate": 8.464575836583421e-07, "loss": 4.2509, "step": 63540 }, { "epoch": 2.737649136408666, "learning_rate": 8.464091016828632e-07, "loss": 4.0918, "step": 63560 }, { "epoch": 2.738510574148253, "learning_rate": 8.463606197073844e-07, "loss": 4.2106, "step": 63580 }, { "epoch": 2.739372011887841, "learning_rate": 8.463121377319054e-07, "loss": 4.145, "step": 63600 }, { "epoch": 2.740233449627428, "learning_rate": 8.462636557564265e-07, "loss": 4.2587, "step": 63620 }, { "epoch": 2.741094887367016, "learning_rate": 8.462151737809476e-07, "loss": 4.2361, "step": 63640 }, { "epoch": 2.741956325106603, "learning_rate": 8.461666918054686e-07, "loss": 4.3539, "step": 63660 }, { "epoch": 2.7428177628461903, "learning_rate": 8.461182098299898e-07, "loss": 4.2672, "step": 63680 }, { "epoch": 2.7436792005857775, "learning_rate": 8.46069727854511e-07, "loss": 4.306, "step": 63700 }, { "epoch": 2.7445406383253648, "learning_rate": 8.460212458790321e-07, "loss": 4.2876, "step": 63720 }, { "epoch": 2.7454020760649525, "learning_rate": 8.459727639035531e-07, "loss": 4.1529, "step": 63740 }, { "epoch": 2.7462635138045397, "learning_rate": 8.459242819280742e-07, "loss": 4.1421, "step": 63760 }, { "epoch": 2.7471249515441274, "learning_rate": 8.458757999525954e-07, "loss": 4.208, "step": 63780 }, { "epoch": 2.7479863892837146, "learning_rate": 8.458273179771165e-07, "loss": 4.2952, "step": 63800 }, { "epoch": 2.748847827023302, "learning_rate": 8.457788360016375e-07, "loss": 4.0587, "step": 63820 }, { "epoch": 2.749709264762889, "learning_rate": 8.457303540261587e-07, "loss": 4.1058, "step": 63840 }, { "epoch": 2.7505707025024764, "learning_rate": 8.456818720506799e-07, "loss": 4.1121, "step": 63860 }, { "epoch": 2.751432140242064, "learning_rate": 8.456333900752009e-07, "loss": 4.0427, "step": 63880 }, { "epoch": 2.7522935779816513, "learning_rate": 8.455849080997219e-07, "loss": 3.9421, "step": 63900 }, { "epoch": 2.753155015721239, "learning_rate": 8.455364261242431e-07, "loss": 4.2178, "step": 63920 }, { "epoch": 2.7540164534608262, "learning_rate": 8.454879441487641e-07, "loss": 4.3995, "step": 63940 }, { "epoch": 2.7548778912004135, "learning_rate": 8.454394621732854e-07, "loss": 3.9356, "step": 63960 }, { "epoch": 2.7557393289400007, "learning_rate": 8.453909801978064e-07, "loss": 4.1646, "step": 63980 }, { "epoch": 2.756600766679588, "learning_rate": 8.453424982223276e-07, "loss": 3.9561, "step": 64000 }, { "epoch": 2.7574622044191757, "learning_rate": 8.452940162468486e-07, "loss": 4.2556, "step": 64020 }, { "epoch": 2.758323642158763, "learning_rate": 8.452455342713698e-07, "loss": 4.1118, "step": 64040 }, { "epoch": 2.7591850798983506, "learning_rate": 8.451970522958908e-07, "loss": 4.0717, "step": 64060 }, { "epoch": 2.760046517637938, "learning_rate": 8.45148570320412e-07, "loss": 4.2172, "step": 64080 }, { "epoch": 2.760907955377525, "learning_rate": 8.451000883449331e-07, "loss": 4.1552, "step": 64100 }, { "epoch": 2.7617693931171123, "learning_rate": 8.450516063694542e-07, "loss": 4.2209, "step": 64120 }, { "epoch": 2.7626308308566996, "learning_rate": 8.450031243939752e-07, "loss": 4.1111, "step": 64140 }, { "epoch": 2.7634922685962873, "learning_rate": 8.449546424184964e-07, "loss": 4.0866, "step": 64160 }, { "epoch": 2.7643537063358745, "learning_rate": 8.449061604430175e-07, "loss": 4.1659, "step": 64180 }, { "epoch": 2.765215144075462, "learning_rate": 8.448576784675385e-07, "loss": 4.3124, "step": 64200 }, { "epoch": 2.7660765818150495, "learning_rate": 8.448091964920597e-07, "loss": 4.1531, "step": 64220 }, { "epoch": 2.7669380195546367, "learning_rate": 8.447607145165809e-07, "loss": 4.2296, "step": 64240 }, { "epoch": 2.767799457294224, "learning_rate": 8.447122325411018e-07, "loss": 4.1856, "step": 64260 }, { "epoch": 2.768660895033811, "learning_rate": 8.446637505656229e-07, "loss": 4.2284, "step": 64280 }, { "epoch": 2.769522332773399, "learning_rate": 8.446152685901441e-07, "loss": 4.2686, "step": 64300 }, { "epoch": 2.770383770512986, "learning_rate": 8.445667866146653e-07, "loss": 4.1992, "step": 64320 }, { "epoch": 2.771245208252574, "learning_rate": 8.445183046391863e-07, "loss": 4.2428, "step": 64340 }, { "epoch": 2.772106645992161, "learning_rate": 8.444698226637074e-07, "loss": 4.3385, "step": 64360 }, { "epoch": 2.7729680837317483, "learning_rate": 8.444213406882286e-07, "loss": 4.0539, "step": 64380 }, { "epoch": 2.7738295214713355, "learning_rate": 8.443728587127497e-07, "loss": 4.073, "step": 64400 }, { "epoch": 2.774690959210923, "learning_rate": 8.443243767372707e-07, "loss": 4.1707, "step": 64420 }, { "epoch": 2.7755523969505105, "learning_rate": 8.442758947617918e-07, "loss": 4.1407, "step": 64440 }, { "epoch": 2.7764138346900977, "learning_rate": 8.442274127863129e-07, "loss": 4.2477, "step": 64460 }, { "epoch": 2.7772752724296854, "learning_rate": 8.441789308108341e-07, "loss": 4.3696, "step": 64480 }, { "epoch": 2.7781367101692727, "learning_rate": 8.441304488353552e-07, "loss": 4.2808, "step": 64500 }, { "epoch": 2.77899814790886, "learning_rate": 8.440819668598762e-07, "loss": 4.248, "step": 64520 }, { "epoch": 2.779859585648447, "learning_rate": 8.440334848843974e-07, "loss": 4.3766, "step": 64540 }, { "epoch": 2.7807210233880344, "learning_rate": 8.439850029089184e-07, "loss": 4.157, "step": 64560 }, { "epoch": 2.781582461127622, "learning_rate": 8.439365209334396e-07, "loss": 4.2217, "step": 64580 }, { "epoch": 2.7824438988672093, "learning_rate": 8.438880389579607e-07, "loss": 4.1429, "step": 64600 }, { "epoch": 2.783305336606797, "learning_rate": 8.438395569824819e-07, "loss": 4.1362, "step": 64620 }, { "epoch": 2.7841667743463843, "learning_rate": 8.437910750070029e-07, "loss": 4.2166, "step": 64640 }, { "epoch": 2.7850282120859715, "learning_rate": 8.43742593031524e-07, "loss": 4.1578, "step": 64660 }, { "epoch": 2.7858896498255588, "learning_rate": 8.436941110560451e-07, "loss": 4.399, "step": 64680 }, { "epoch": 2.786751087565146, "learning_rate": 8.436456290805663e-07, "loss": 4.2696, "step": 64700 }, { "epoch": 2.7876125253047337, "learning_rate": 8.435971471050873e-07, "loss": 4.1585, "step": 64720 }, { "epoch": 2.788473963044321, "learning_rate": 8.435486651296084e-07, "loss": 4.026, "step": 64740 }, { "epoch": 2.789335400783908, "learning_rate": 8.435001831541296e-07, "loss": 4.2457, "step": 64760 }, { "epoch": 2.790196838523496, "learning_rate": 8.434517011786507e-07, "loss": 4.2927, "step": 64780 }, { "epoch": 2.791058276263083, "learning_rate": 8.434032192031717e-07, "loss": 4.2799, "step": 64800 }, { "epoch": 2.7919197140026704, "learning_rate": 8.433547372276928e-07, "loss": 4.1319, "step": 64820 }, { "epoch": 2.7927811517422576, "learning_rate": 8.43306255252214e-07, "loss": 4.1158, "step": 64840 }, { "epoch": 2.7936425894818453, "learning_rate": 8.432577732767352e-07, "loss": 4.2568, "step": 64860 }, { "epoch": 2.7945040272214325, "learning_rate": 8.432092913012562e-07, "loss": 4.2391, "step": 64880 }, { "epoch": 2.79536546496102, "learning_rate": 8.431608093257772e-07, "loss": 4.167, "step": 64900 }, { "epoch": 2.7962269027006075, "learning_rate": 8.431123273502984e-07, "loss": 4.2472, "step": 64920 }, { "epoch": 2.7970883404401947, "learning_rate": 8.430638453748195e-07, "loss": 4.1453, "step": 64940 }, { "epoch": 2.797949778179782, "learning_rate": 8.430153633993406e-07, "loss": 4.0949, "step": 64960 }, { "epoch": 2.798811215919369, "learning_rate": 8.429668814238617e-07, "loss": 4.1396, "step": 64980 }, { "epoch": 2.799672653658957, "learning_rate": 8.429183994483829e-07, "loss": 4.0352, "step": 65000 }, { "epoch": 2.800534091398544, "learning_rate": 8.428699174729039e-07, "loss": 4.0997, "step": 65020 }, { "epoch": 2.8013955291381314, "learning_rate": 8.42821435497425e-07, "loss": 4.2631, "step": 65040 }, { "epoch": 2.802256966877719, "learning_rate": 8.427729535219461e-07, "loss": 4.198, "step": 65060 }, { "epoch": 2.8031184046173063, "learning_rate": 8.427244715464673e-07, "loss": 4.3659, "step": 65080 }, { "epoch": 2.8039798423568936, "learning_rate": 8.426759895709883e-07, "loss": 3.993, "step": 65100 }, { "epoch": 2.804841280096481, "learning_rate": 8.426275075955095e-07, "loss": 4.1925, "step": 65120 }, { "epoch": 2.8057027178360685, "learning_rate": 8.425790256200306e-07, "loss": 4.4249, "step": 65140 }, { "epoch": 2.8065641555756558, "learning_rate": 8.425305436445517e-07, "loss": 4.1671, "step": 65160 }, { "epoch": 2.807425593315243, "learning_rate": 8.424820616690727e-07, "loss": 3.9986, "step": 65180 }, { "epoch": 2.8082870310548307, "learning_rate": 8.424335796935939e-07, "loss": 4.22, "step": 65200 }, { "epoch": 2.809148468794418, "learning_rate": 8.42385097718115e-07, "loss": 4.3503, "step": 65220 }, { "epoch": 2.810009906534005, "learning_rate": 8.423366157426362e-07, "loss": 4.1263, "step": 65240 }, { "epoch": 2.8108713442735924, "learning_rate": 8.422881337671572e-07, "loss": 4.4143, "step": 65260 }, { "epoch": 2.81173278201318, "learning_rate": 8.422396517916783e-07, "loss": 4.0707, "step": 65280 }, { "epoch": 2.8125942197527674, "learning_rate": 8.421911698161993e-07, "loss": 4.052, "step": 65300 }, { "epoch": 2.8134556574923546, "learning_rate": 8.421426878407205e-07, "loss": 4.1191, "step": 65320 }, { "epoch": 2.8143170952319423, "learning_rate": 8.420942058652416e-07, "loss": 4.0559, "step": 65340 }, { "epoch": 2.8151785329715295, "learning_rate": 8.420457238897627e-07, "loss": 4.0588, "step": 65360 }, { "epoch": 2.816039970711117, "learning_rate": 8.419972419142839e-07, "loss": 4.1563, "step": 65380 }, { "epoch": 2.816901408450704, "learning_rate": 8.419487599388049e-07, "loss": 4.1521, "step": 65400 }, { "epoch": 2.8177628461902917, "learning_rate": 8.41900277963326e-07, "loss": 4.202, "step": 65420 }, { "epoch": 2.818624283929879, "learning_rate": 8.418517959878471e-07, "loss": 4.1619, "step": 65440 }, { "epoch": 2.819485721669466, "learning_rate": 8.418033140123683e-07, "loss": 4.2922, "step": 65460 }, { "epoch": 2.820347159409054, "learning_rate": 8.417548320368894e-07, "loss": 4.2528, "step": 65480 }, { "epoch": 2.821208597148641, "learning_rate": 8.417063500614105e-07, "loss": 4.2084, "step": 65500 }, { "epoch": 2.8220700348882284, "learning_rate": 8.416578680859316e-07, "loss": 4.2309, "step": 65520 }, { "epoch": 2.8229314726278156, "learning_rate": 8.416093861104526e-07, "loss": 4.2342, "step": 65540 }, { "epoch": 2.8237929103674033, "learning_rate": 8.415609041349737e-07, "loss": 4.0922, "step": 65560 }, { "epoch": 2.8246543481069906, "learning_rate": 8.415124221594949e-07, "loss": 3.9988, "step": 65580 }, { "epoch": 2.825515785846578, "learning_rate": 8.41463940184016e-07, "loss": 4.2548, "step": 65600 }, { "epoch": 2.8263772235861655, "learning_rate": 8.414154582085371e-07, "loss": 4.024, "step": 65620 }, { "epoch": 2.8272386613257527, "learning_rate": 8.413669762330582e-07, "loss": 4.1746, "step": 65640 }, { "epoch": 2.82810009906534, "learning_rate": 8.413184942575793e-07, "loss": 4.1492, "step": 65660 }, { "epoch": 2.8289615368049272, "learning_rate": 8.412700122821004e-07, "loss": 4.3199, "step": 65680 }, { "epoch": 2.829822974544515, "learning_rate": 8.412215303066215e-07, "loss": 4.2033, "step": 65700 }, { "epoch": 2.830684412284102, "learning_rate": 8.411730483311425e-07, "loss": 4.2661, "step": 65720 }, { "epoch": 2.8315458500236894, "learning_rate": 8.411245663556638e-07, "loss": 4.1234, "step": 65740 }, { "epoch": 2.832407287763277, "learning_rate": 8.410760843801849e-07, "loss": 3.9953, "step": 65760 }, { "epoch": 2.8332687255028643, "learning_rate": 8.41027602404706e-07, "loss": 3.9306, "step": 65780 }, { "epoch": 2.8341301632424516, "learning_rate": 8.40979120429227e-07, "loss": 4.3457, "step": 65800 }, { "epoch": 2.834991600982039, "learning_rate": 8.409306384537481e-07, "loss": 4.3391, "step": 65820 }, { "epoch": 2.8358530387216265, "learning_rate": 8.408821564782692e-07, "loss": 4.1781, "step": 65840 }, { "epoch": 2.8367144764612138, "learning_rate": 8.408336745027904e-07, "loss": 4.1102, "step": 65860 }, { "epoch": 2.837575914200801, "learning_rate": 8.407851925273115e-07, "loss": 4.2409, "step": 65880 }, { "epoch": 2.8384373519403887, "learning_rate": 8.407367105518326e-07, "loss": 4.28, "step": 65900 }, { "epoch": 2.839298789679976, "learning_rate": 8.406882285763536e-07, "loss": 4.5097, "step": 65920 }, { "epoch": 2.840160227419563, "learning_rate": 8.406397466008748e-07, "loss": 4.4996, "step": 65940 }, { "epoch": 2.8410216651591504, "learning_rate": 8.405912646253959e-07, "loss": 4.5238, "step": 65960 }, { "epoch": 2.841883102898738, "learning_rate": 8.40542782649917e-07, "loss": 4.374, "step": 65980 }, { "epoch": 2.8427445406383254, "learning_rate": 8.404943006744381e-07, "loss": 4.3341, "step": 66000 }, { "epoch": 2.8436059783779126, "learning_rate": 8.404458186989593e-07, "loss": 4.2999, "step": 66020 }, { "epoch": 2.8444674161175003, "learning_rate": 8.403973367234803e-07, "loss": 4.0023, "step": 66040 }, { "epoch": 2.8453288538570876, "learning_rate": 8.403488547480014e-07, "loss": 4.3215, "step": 66060 }, { "epoch": 2.846190291596675, "learning_rate": 8.403003727725225e-07, "loss": 4.4189, "step": 66080 }, { "epoch": 2.847051729336262, "learning_rate": 8.402518907970436e-07, "loss": 4.0156, "step": 66100 }, { "epoch": 2.8479131670758497, "learning_rate": 8.402034088215648e-07, "loss": 3.8661, "step": 66120 }, { "epoch": 2.848774604815437, "learning_rate": 8.401549268460858e-07, "loss": 4.1853, "step": 66140 }, { "epoch": 2.8496360425550242, "learning_rate": 8.40106444870607e-07, "loss": 4.0573, "step": 66160 }, { "epoch": 2.850497480294612, "learning_rate": 8.40057962895128e-07, "loss": 4.1377, "step": 66180 }, { "epoch": 2.851358918034199, "learning_rate": 8.400094809196492e-07, "loss": 4.17, "step": 66200 }, { "epoch": 2.8522203557737864, "learning_rate": 8.399609989441702e-07, "loss": 4.1114, "step": 66220 }, { "epoch": 2.8530817935133737, "learning_rate": 8.399125169686913e-07, "loss": 4.0018, "step": 66240 }, { "epoch": 2.8539432312529613, "learning_rate": 8.398640349932125e-07, "loss": 4.0871, "step": 66260 }, { "epoch": 2.8548046689925486, "learning_rate": 8.398155530177337e-07, "loss": 4.1926, "step": 66280 }, { "epoch": 2.855666106732136, "learning_rate": 8.397670710422546e-07, "loss": 4.2544, "step": 66300 }, { "epoch": 2.8565275444717235, "learning_rate": 8.397185890667758e-07, "loss": 4.093, "step": 66320 }, { "epoch": 2.8573889822113108, "learning_rate": 8.396701070912969e-07, "loss": 4.1454, "step": 66340 }, { "epoch": 2.858250419950898, "learning_rate": 8.39621625115818e-07, "loss": 4.237, "step": 66360 }, { "epoch": 2.8591118576904853, "learning_rate": 8.395731431403391e-07, "loss": 4.1079, "step": 66380 }, { "epoch": 2.859973295430073, "learning_rate": 8.395246611648603e-07, "loss": 4.1858, "step": 66400 }, { "epoch": 2.86083473316966, "learning_rate": 8.394761791893814e-07, "loss": 4.2199, "step": 66420 }, { "epoch": 2.8616961709092474, "learning_rate": 8.394276972139023e-07, "loss": 4.1503, "step": 66440 }, { "epoch": 2.862557608648835, "learning_rate": 8.393792152384235e-07, "loss": 3.92, "step": 66460 }, { "epoch": 2.8634190463884224, "learning_rate": 8.393307332629447e-07, "loss": 4.0939, "step": 66480 }, { "epoch": 2.8642804841280096, "learning_rate": 8.392822512874658e-07, "loss": 4.3083, "step": 66500 }, { "epoch": 2.865141921867597, "learning_rate": 8.392337693119868e-07, "loss": 4.5096, "step": 66520 }, { "epoch": 2.8660033596071846, "learning_rate": 8.39185287336508e-07, "loss": 4.1628, "step": 66540 }, { "epoch": 2.866864797346772, "learning_rate": 8.391368053610291e-07, "loss": 4.1477, "step": 66560 }, { "epoch": 2.867726235086359, "learning_rate": 8.390883233855502e-07, "loss": 3.9537, "step": 66580 }, { "epoch": 2.8685876728259467, "learning_rate": 8.390398414100712e-07, "loss": 4.0073, "step": 66600 }, { "epoch": 2.869449110565534, "learning_rate": 8.389913594345924e-07, "loss": 4.1379, "step": 66620 }, { "epoch": 2.8703105483051212, "learning_rate": 8.389428774591135e-07, "loss": 4.5399, "step": 66640 }, { "epoch": 2.8711719860447085, "learning_rate": 8.388943954836347e-07, "loss": 4.172, "step": 66660 }, { "epoch": 2.8720334237842957, "learning_rate": 8.388459135081556e-07, "loss": 4.1137, "step": 66680 }, { "epoch": 2.8728948615238834, "learning_rate": 8.387974315326768e-07, "loss": 4.3093, "step": 66700 }, { "epoch": 2.8737562992634706, "learning_rate": 8.387489495571979e-07, "loss": 4.2004, "step": 66720 }, { "epoch": 2.8746177370030583, "learning_rate": 8.387004675817191e-07, "loss": 4.2117, "step": 66740 }, { "epoch": 2.8754791747426456, "learning_rate": 8.386519856062401e-07, "loss": 3.9785, "step": 66760 }, { "epoch": 2.876340612482233, "learning_rate": 8.386035036307613e-07, "loss": 4.2004, "step": 66780 }, { "epoch": 2.87720205022182, "learning_rate": 8.385550216552824e-07, "loss": 4.3149, "step": 66800 }, { "epoch": 2.8780634879614073, "learning_rate": 8.385065396798034e-07, "loss": 4.0467, "step": 66820 }, { "epoch": 2.878924925700995, "learning_rate": 8.384580577043245e-07, "loss": 4.2308, "step": 66840 }, { "epoch": 2.8797863634405823, "learning_rate": 8.384095757288457e-07, "loss": 4.2297, "step": 66860 }, { "epoch": 2.88064780118017, "learning_rate": 8.383610937533668e-07, "loss": 4.1531, "step": 66880 }, { "epoch": 2.881509238919757, "learning_rate": 8.383126117778878e-07, "loss": 4.127, "step": 66900 }, { "epoch": 2.8823706766593444, "learning_rate": 8.38264129802409e-07, "loss": 4.2216, "step": 66920 }, { "epoch": 2.8832321143989317, "learning_rate": 8.382156478269301e-07, "loss": 4.1503, "step": 66940 }, { "epoch": 2.884093552138519, "learning_rate": 8.381671658514512e-07, "loss": 4.1333, "step": 66960 }, { "epoch": 2.8849549898781066, "learning_rate": 8.381186838759722e-07, "loss": 4.2919, "step": 66980 }, { "epoch": 2.885816427617694, "learning_rate": 8.380702019004934e-07, "loss": 4.2744, "step": 67000 }, { "epoch": 2.8866778653572815, "learning_rate": 8.380217199250146e-07, "loss": 4.3332, "step": 67020 }, { "epoch": 2.887539303096869, "learning_rate": 8.379732379495357e-07, "loss": 4.237, "step": 67040 }, { "epoch": 2.888400740836456, "learning_rate": 8.379247559740567e-07, "loss": 4.0655, "step": 67060 }, { "epoch": 2.8892621785760433, "learning_rate": 8.378762739985777e-07, "loss": 4.3082, "step": 67080 }, { "epoch": 2.8901236163156305, "learning_rate": 8.37827792023099e-07, "loss": 4.2381, "step": 67100 }, { "epoch": 2.890985054055218, "learning_rate": 8.3777931004762e-07, "loss": 4.0887, "step": 67120 }, { "epoch": 2.8918464917948055, "learning_rate": 8.377308280721411e-07, "loss": 4.1576, "step": 67140 }, { "epoch": 2.892707929534393, "learning_rate": 8.376823460966623e-07, "loss": 4.1743, "step": 67160 }, { "epoch": 2.8935693672739804, "learning_rate": 8.376338641211834e-07, "loss": 4.2787, "step": 67180 }, { "epoch": 2.8944308050135676, "learning_rate": 8.375853821457044e-07, "loss": 4.2371, "step": 67200 }, { "epoch": 2.895292242753155, "learning_rate": 8.375369001702255e-07, "loss": 4.2381, "step": 67220 }, { "epoch": 2.896153680492742, "learning_rate": 8.374884181947467e-07, "loss": 4.3043, "step": 67240 }, { "epoch": 2.89701511823233, "learning_rate": 8.374399362192678e-07, "loss": 4.1704, "step": 67260 }, { "epoch": 2.897876555971917, "learning_rate": 8.373914542437889e-07, "loss": 4.0632, "step": 67280 }, { "epoch": 2.8987379937115048, "learning_rate": 8.3734297226831e-07, "loss": 4.035, "step": 67300 }, { "epoch": 2.899599431451092, "learning_rate": 8.372944902928311e-07, "loss": 4.2782, "step": 67320 }, { "epoch": 2.9004608691906792, "learning_rate": 8.372460083173522e-07, "loss": 4.1251, "step": 67340 }, { "epoch": 2.9013223069302665, "learning_rate": 8.371975263418733e-07, "loss": 4.3405, "step": 67360 }, { "epoch": 2.9021837446698537, "learning_rate": 8.371490443663944e-07, "loss": 4.1674, "step": 67380 }, { "epoch": 2.9030451824094414, "learning_rate": 8.371005623909156e-07, "loss": 4.2904, "step": 67400 }, { "epoch": 2.9039066201490287, "learning_rate": 8.370520804154366e-07, "loss": 4.2848, "step": 67420 }, { "epoch": 2.9047680578886164, "learning_rate": 8.370035984399577e-07, "loss": 4.369, "step": 67440 }, { "epoch": 2.9056294956282036, "learning_rate": 8.369551164644788e-07, "loss": 4.2133, "step": 67460 }, { "epoch": 2.906490933367791, "learning_rate": 8.36906634489e-07, "loss": 4.4579, "step": 67480 }, { "epoch": 2.907352371107378, "learning_rate": 8.368581525135209e-07, "loss": 4.0013, "step": 67500 }, { "epoch": 2.9082138088469653, "learning_rate": 8.368096705380421e-07, "loss": 4.0972, "step": 67520 }, { "epoch": 2.909075246586553, "learning_rate": 8.367611885625633e-07, "loss": 4.4095, "step": 67540 }, { "epoch": 2.9099366843261403, "learning_rate": 8.367127065870845e-07, "loss": 4.2958, "step": 67560 }, { "epoch": 2.910798122065728, "learning_rate": 8.366642246116054e-07, "loss": 4.249, "step": 67580 }, { "epoch": 2.911659559805315, "learning_rate": 8.366157426361265e-07, "loss": 4.1099, "step": 67600 }, { "epoch": 2.9125209975449025, "learning_rate": 8.365672606606477e-07, "loss": 4.3728, "step": 67620 }, { "epoch": 2.9133824352844897, "learning_rate": 8.365187786851689e-07, "loss": 4.1975, "step": 67640 }, { "epoch": 2.914243873024077, "learning_rate": 8.364702967096899e-07, "loss": 4.329, "step": 67660 }, { "epoch": 2.9151053107636646, "learning_rate": 8.36421814734211e-07, "loss": 4.2475, "step": 67680 }, { "epoch": 2.915966748503252, "learning_rate": 8.363733327587321e-07, "loss": 4.1461, "step": 67700 }, { "epoch": 2.9168281862428396, "learning_rate": 8.363248507832531e-07, "loss": 4.0494, "step": 67720 }, { "epoch": 2.917689623982427, "learning_rate": 8.362763688077743e-07, "loss": 4.1619, "step": 67740 }, { "epoch": 2.918551061722014, "learning_rate": 8.362278868322954e-07, "loss": 4.1775, "step": 67760 }, { "epoch": 2.9194124994616013, "learning_rate": 8.361794048568166e-07, "loss": 4.0533, "step": 67780 }, { "epoch": 2.9202739372011886, "learning_rate": 8.361309228813376e-07, "loss": 4.1892, "step": 67800 }, { "epoch": 2.9211353749407762, "learning_rate": 8.360824409058587e-07, "loss": 4.1913, "step": 67820 }, { "epoch": 2.9219968126803635, "learning_rate": 8.360339589303798e-07, "loss": 4.0711, "step": 67840 }, { "epoch": 2.9228582504199507, "learning_rate": 8.35985476954901e-07, "loss": 4.2778, "step": 67860 }, { "epoch": 2.9237196881595384, "learning_rate": 8.35936994979422e-07, "loss": 4.3031, "step": 67880 }, { "epoch": 2.9245811258991257, "learning_rate": 8.358885130039432e-07, "loss": 4.032, "step": 67900 }, { "epoch": 2.925442563638713, "learning_rate": 8.358400310284643e-07, "loss": 4.1971, "step": 67920 }, { "epoch": 2.9263040013783, "learning_rate": 8.357915490529855e-07, "loss": 4.133, "step": 67940 }, { "epoch": 2.927165439117888, "learning_rate": 8.357430670775064e-07, "loss": 4.1434, "step": 67960 }, { "epoch": 2.928026876857475, "learning_rate": 8.356945851020275e-07, "loss": 4.1361, "step": 67980 }, { "epoch": 2.9288883145970623, "learning_rate": 8.356461031265487e-07, "loss": 4.1048, "step": 68000 }, { "epoch": 2.92974975233665, "learning_rate": 8.355976211510697e-07, "loss": 4.108, "step": 68020 }, { "epoch": 2.9306111900762373, "learning_rate": 8.355491391755909e-07, "loss": 4.3268, "step": 68040 }, { "epoch": 2.9314726278158245, "learning_rate": 8.35500657200112e-07, "loss": 4.0361, "step": 68060 }, { "epoch": 2.9323340655554118, "learning_rate": 8.354521752246331e-07, "loss": 3.9429, "step": 68080 }, { "epoch": 2.9331955032949995, "learning_rate": 8.354036932491542e-07, "loss": 4.4815, "step": 68100 }, { "epoch": 2.9340569410345867, "learning_rate": 8.353552112736753e-07, "loss": 3.9123, "step": 68120 }, { "epoch": 2.934918378774174, "learning_rate": 8.353067292981964e-07, "loss": 4.309, "step": 68140 }, { "epoch": 2.9357798165137616, "learning_rate": 8.352582473227176e-07, "loss": 4.1525, "step": 68160 }, { "epoch": 2.936641254253349, "learning_rate": 8.352097653472387e-07, "loss": 4.2099, "step": 68180 }, { "epoch": 2.937502691992936, "learning_rate": 8.351612833717598e-07, "loss": 4.2891, "step": 68200 }, { "epoch": 2.9383641297325234, "learning_rate": 8.351128013962808e-07, "loss": 3.9832, "step": 68220 }, { "epoch": 2.939225567472111, "learning_rate": 8.35064319420802e-07, "loss": 4.2352, "step": 68240 }, { "epoch": 2.9400870052116983, "learning_rate": 8.35015837445323e-07, "loss": 4.1817, "step": 68260 }, { "epoch": 2.9409484429512855, "learning_rate": 8.349673554698442e-07, "loss": 4.0247, "step": 68280 }, { "epoch": 2.9418098806908732, "learning_rate": 8.349188734943653e-07, "loss": 4.0533, "step": 68300 }, { "epoch": 2.9426713184304605, "learning_rate": 8.348703915188865e-07, "loss": 4.1722, "step": 68320 }, { "epoch": 2.9435327561700477, "learning_rate": 8.348219095434073e-07, "loss": 4.1471, "step": 68340 }, { "epoch": 2.944394193909635, "learning_rate": 8.347734275679286e-07, "loss": 4.1911, "step": 68360 }, { "epoch": 2.9452556316492227, "learning_rate": 8.347249455924497e-07, "loss": 4.0995, "step": 68380 }, { "epoch": 2.94611706938881, "learning_rate": 8.346764636169708e-07, "loss": 4.1429, "step": 68400 }, { "epoch": 2.946978507128397, "learning_rate": 8.346279816414919e-07, "loss": 4.2546, "step": 68420 }, { "epoch": 2.947839944867985, "learning_rate": 8.345794996660131e-07, "loss": 4.003, "step": 68440 }, { "epoch": 2.948701382607572, "learning_rate": 8.345310176905341e-07, "loss": 4.0601, "step": 68460 }, { "epoch": 2.9495628203471593, "learning_rate": 8.344825357150552e-07, "loss": 4.2808, "step": 68480 }, { "epoch": 2.9504242580867466, "learning_rate": 8.344340537395763e-07, "loss": 4.3243, "step": 68500 }, { "epoch": 2.9512856958263343, "learning_rate": 8.343855717640974e-07, "loss": 4.1212, "step": 68520 }, { "epoch": 2.9521471335659215, "learning_rate": 8.343370897886186e-07, "loss": 4.0958, "step": 68540 }, { "epoch": 2.9530085713055088, "learning_rate": 8.342886078131397e-07, "loss": 4.3868, "step": 68560 }, { "epoch": 2.9538700090450964, "learning_rate": 8.342401258376608e-07, "loss": 4.2354, "step": 68580 }, { "epoch": 2.9547314467846837, "learning_rate": 8.341916438621818e-07, "loss": 4.3807, "step": 68600 }, { "epoch": 2.955592884524271, "learning_rate": 8.34143161886703e-07, "loss": 4.1667, "step": 68620 }, { "epoch": 2.956454322263858, "learning_rate": 8.340946799112241e-07, "loss": 4.0863, "step": 68640 }, { "epoch": 2.957315760003446, "learning_rate": 8.340461979357452e-07, "loss": 4.1569, "step": 68660 }, { "epoch": 2.958177197743033, "learning_rate": 8.339977159602663e-07, "loss": 4.1465, "step": 68680 }, { "epoch": 2.9590386354826204, "learning_rate": 8.339492339847874e-07, "loss": 4.0999, "step": 68700 }, { "epoch": 2.959900073222208, "learning_rate": 8.339007520093085e-07, "loss": 4.0144, "step": 68720 }, { "epoch": 2.9607615109617953, "learning_rate": 8.338522700338296e-07, "loss": 3.8672, "step": 68740 }, { "epoch": 2.9616229487013825, "learning_rate": 8.338037880583507e-07, "loss": 4.0771, "step": 68760 }, { "epoch": 2.96248438644097, "learning_rate": 8.337553060828718e-07, "loss": 4.144, "step": 68780 }, { "epoch": 2.9633458241805575, "learning_rate": 8.337068241073929e-07, "loss": 3.9042, "step": 68800 }, { "epoch": 2.9642072619201447, "learning_rate": 8.336583421319141e-07, "loss": 4.2654, "step": 68820 }, { "epoch": 2.965068699659732, "learning_rate": 8.336098601564352e-07, "loss": 4.2632, "step": 68840 }, { "epoch": 2.9659301373993197, "learning_rate": 8.335613781809561e-07, "loss": 4.1627, "step": 68860 }, { "epoch": 2.966791575138907, "learning_rate": 8.335128962054773e-07, "loss": 4.2268, "step": 68880 }, { "epoch": 2.967653012878494, "learning_rate": 8.334644142299985e-07, "loss": 4.0903, "step": 68900 }, { "epoch": 2.9685144506180814, "learning_rate": 8.334159322545196e-07, "loss": 4.4831, "step": 68920 }, { "epoch": 2.969375888357669, "learning_rate": 8.333674502790407e-07, "loss": 4.0449, "step": 68940 }, { "epoch": 2.9702373260972563, "learning_rate": 8.333189683035618e-07, "loss": 4.0883, "step": 68960 }, { "epoch": 2.9710987638368436, "learning_rate": 8.332704863280829e-07, "loss": 4.1052, "step": 68980 }, { "epoch": 2.9719602015764313, "learning_rate": 8.332220043526039e-07, "loss": 4.0703, "step": 69000 }, { "epoch": 2.9728216393160185, "learning_rate": 8.331735223771251e-07, "loss": 4.2334, "step": 69020 }, { "epoch": 2.9736830770556058, "learning_rate": 8.331250404016462e-07, "loss": 4.1308, "step": 69040 }, { "epoch": 2.974544514795193, "learning_rate": 8.330765584261673e-07, "loss": 4.23, "step": 69060 }, { "epoch": 2.9754059525347807, "learning_rate": 8.330280764506884e-07, "loss": 3.9961, "step": 69080 }, { "epoch": 2.976267390274368, "learning_rate": 8.329795944752095e-07, "loss": 4.2471, "step": 69100 }, { "epoch": 2.977128828013955, "learning_rate": 8.329311124997306e-07, "loss": 4.2375, "step": 69120 }, { "epoch": 2.977990265753543, "learning_rate": 8.328826305242517e-07, "loss": 4.2749, "step": 69140 }, { "epoch": 2.97885170349313, "learning_rate": 8.328341485487728e-07, "loss": 4.0304, "step": 69160 }, { "epoch": 2.9797131412327174, "learning_rate": 8.32785666573294e-07, "loss": 4.1121, "step": 69180 }, { "epoch": 2.9805745789723046, "learning_rate": 8.327371845978151e-07, "loss": 4.1717, "step": 69200 }, { "epoch": 2.9814360167118923, "learning_rate": 8.326887026223362e-07, "loss": 3.996, "step": 69220 }, { "epoch": 2.9822974544514795, "learning_rate": 8.326402206468572e-07, "loss": 4.3253, "step": 69240 }, { "epoch": 2.983158892191067, "learning_rate": 8.325917386713784e-07, "loss": 4.0697, "step": 69260 }, { "epoch": 2.9840203299306545, "learning_rate": 8.325432566958994e-07, "loss": 4.4012, "step": 69280 }, { "epoch": 2.9848817676702417, "learning_rate": 8.324947747204205e-07, "loss": 3.992, "step": 69300 }, { "epoch": 2.985743205409829, "learning_rate": 8.324462927449417e-07, "loss": 4.241, "step": 69320 }, { "epoch": 2.986604643149416, "learning_rate": 8.323978107694628e-07, "loss": 4.3471, "step": 69340 }, { "epoch": 2.987466080889004, "learning_rate": 8.323493287939839e-07, "loss": 4.1748, "step": 69360 }, { "epoch": 2.988327518628591, "learning_rate": 8.323008468185049e-07, "loss": 4.332, "step": 69380 }, { "epoch": 2.9891889563681784, "learning_rate": 8.322523648430261e-07, "loss": 3.9955, "step": 69400 }, { "epoch": 2.990050394107766, "learning_rate": 8.322038828675472e-07, "loss": 4.2962, "step": 69420 }, { "epoch": 2.9909118318473533, "learning_rate": 8.321554008920684e-07, "loss": 4.0723, "step": 69440 }, { "epoch": 2.9917732695869406, "learning_rate": 8.321069189165894e-07, "loss": 3.9664, "step": 69460 }, { "epoch": 2.992634707326528, "learning_rate": 8.320584369411105e-07, "loss": 4.2113, "step": 69480 }, { "epoch": 2.9934961450661155, "learning_rate": 8.320099549656316e-07, "loss": 3.9184, "step": 69500 }, { "epoch": 2.9943575828057027, "learning_rate": 8.319614729901528e-07, "loss": 4.489, "step": 69520 }, { "epoch": 2.99521902054529, "learning_rate": 8.319129910146738e-07, "loss": 4.3433, "step": 69540 }, { "epoch": 2.9960804582848777, "learning_rate": 8.31864509039195e-07, "loss": 4.1775, "step": 69560 }, { "epoch": 2.996941896024465, "learning_rate": 8.318160270637161e-07, "loss": 4.1645, "step": 69580 }, { "epoch": 2.997803333764052, "learning_rate": 8.317675450882371e-07, "loss": 4.0936, "step": 69600 }, { "epoch": 2.9986647715036394, "learning_rate": 8.317190631127582e-07, "loss": 4.243, "step": 69620 }, { "epoch": 2.999526209243227, "learning_rate": 8.316705811372794e-07, "loss": 4.3322, "step": 69640 }, { "epoch": 3.0003876469828143, "learning_rate": 8.316220991618005e-07, "loss": 4.1925, "step": 69660 }, { "epoch": 3.0012490847224016, "learning_rate": 8.315736171863215e-07, "loss": 4.1922, "step": 69680 }, { "epoch": 3.002110522461989, "learning_rate": 8.315251352108427e-07, "loss": 4.2554, "step": 69700 }, { "epoch": 3.0029719602015765, "learning_rate": 8.314766532353639e-07, "loss": 4.001, "step": 69720 }, { "epoch": 3.0038333979411638, "learning_rate": 8.314281712598849e-07, "loss": 4.1319, "step": 69740 }, { "epoch": 3.004694835680751, "learning_rate": 8.313796892844059e-07, "loss": 4.1174, "step": 69760 }, { "epoch": 3.0055562734203387, "learning_rate": 8.313312073089271e-07, "loss": 4.0008, "step": 69780 }, { "epoch": 3.006417711159926, "learning_rate": 8.312827253334482e-07, "loss": 4.1544, "step": 69800 }, { "epoch": 3.007279148899513, "learning_rate": 8.312342433579694e-07, "loss": 4.3267, "step": 69820 }, { "epoch": 3.0081405866391004, "learning_rate": 8.311857613824904e-07, "loss": 4.0394, "step": 69840 }, { "epoch": 3.009002024378688, "learning_rate": 8.311372794070115e-07, "loss": 4.3174, "step": 69860 }, { "epoch": 3.0098634621182754, "learning_rate": 8.310887974315326e-07, "loss": 4.0971, "step": 69880 }, { "epoch": 3.0107248998578626, "learning_rate": 8.310403154560537e-07, "loss": 4.0695, "step": 69900 }, { "epoch": 3.0115863375974503, "learning_rate": 8.309918334805748e-07, "loss": 4.3721, "step": 69920 }, { "epoch": 3.0124477753370376, "learning_rate": 8.30943351505096e-07, "loss": 4.2357, "step": 69940 }, { "epoch": 3.013309213076625, "learning_rate": 8.308948695296171e-07, "loss": 4.1923, "step": 69960 }, { "epoch": 3.014170650816212, "learning_rate": 8.308463875541382e-07, "loss": 4.2256, "step": 69980 }, { "epoch": 3.0150320885557997, "learning_rate": 8.307979055786592e-07, "loss": 4.2484, "step": 70000 }, { "epoch": 3.015893526295387, "learning_rate": 8.307494236031804e-07, "loss": 4.1018, "step": 70020 }, { "epoch": 3.0167549640349742, "learning_rate": 8.307009416277015e-07, "loss": 4.3796, "step": 70040 }, { "epoch": 3.017616401774562, "learning_rate": 8.306524596522226e-07, "loss": 4.2395, "step": 70060 }, { "epoch": 3.018477839514149, "learning_rate": 8.306039776767437e-07, "loss": 3.9913, "step": 70080 }, { "epoch": 3.0193392772537364, "learning_rate": 8.305554957012649e-07, "loss": 4.1971, "step": 70100 }, { "epoch": 3.0202007149933237, "learning_rate": 8.305070137257857e-07, "loss": 4.1074, "step": 70120 }, { "epoch": 3.0210621527329113, "learning_rate": 8.304585317503069e-07, "loss": 4.2932, "step": 70140 }, { "epoch": 3.0219235904724986, "learning_rate": 8.304100497748281e-07, "loss": 4.269, "step": 70160 }, { "epoch": 3.022785028212086, "learning_rate": 8.303615677993493e-07, "loss": 4.1816, "step": 70180 }, { "epoch": 3.0236464659516735, "learning_rate": 8.303130858238704e-07, "loss": 4.2207, "step": 70200 }, { "epoch": 3.0245079036912608, "learning_rate": 8.302646038483915e-07, "loss": 4.1439, "step": 70220 }, { "epoch": 3.025369341430848, "learning_rate": 8.302161218729125e-07, "loss": 3.9728, "step": 70240 }, { "epoch": 3.0262307791704353, "learning_rate": 8.301676398974337e-07, "loss": 3.9491, "step": 70260 }, { "epoch": 3.027092216910023, "learning_rate": 8.301191579219547e-07, "loss": 4.1278, "step": 70280 }, { "epoch": 3.02795365464961, "learning_rate": 8.300706759464758e-07, "loss": 4.1904, "step": 70300 }, { "epoch": 3.0288150923891974, "learning_rate": 8.30022193970997e-07, "loss": 3.9599, "step": 70320 }, { "epoch": 3.029676530128785, "learning_rate": 8.299737119955182e-07, "loss": 4.3606, "step": 70340 }, { "epoch": 3.0305379678683724, "learning_rate": 8.299252300200392e-07, "loss": 4.0411, "step": 70360 }, { "epoch": 3.0313994056079596, "learning_rate": 8.298767480445602e-07, "loss": 4.1926, "step": 70380 }, { "epoch": 3.032260843347547, "learning_rate": 8.298282660690814e-07, "loss": 4.1736, "step": 70400 }, { "epoch": 3.0331222810871346, "learning_rate": 8.297797840936025e-07, "loss": 4.1216, "step": 70420 }, { "epoch": 3.033983718826722, "learning_rate": 8.297313021181236e-07, "loss": 4.0971, "step": 70440 }, { "epoch": 3.034845156566309, "learning_rate": 8.296828201426447e-07, "loss": 4.0617, "step": 70460 }, { "epoch": 3.0357065943058967, "learning_rate": 8.296343381671659e-07, "loss": 3.9797, "step": 70480 }, { "epoch": 3.036568032045484, "learning_rate": 8.295858561916868e-07, "loss": 4.2322, "step": 70500 }, { "epoch": 3.0374294697850712, "learning_rate": 8.29537374216208e-07, "loss": 4.1371, "step": 70520 }, { "epoch": 3.0382909075246585, "learning_rate": 8.294888922407291e-07, "loss": 4.0835, "step": 70540 }, { "epoch": 3.039152345264246, "learning_rate": 8.294404102652503e-07, "loss": 4.0377, "step": 70560 }, { "epoch": 3.0400137830038334, "learning_rate": 8.293919282897713e-07, "loss": 4.1484, "step": 70580 }, { "epoch": 3.0408752207434206, "learning_rate": 8.293434463142925e-07, "loss": 4.0844, "step": 70600 }, { "epoch": 3.0417366584830083, "learning_rate": 8.292949643388136e-07, "loss": 4.1345, "step": 70620 }, { "epoch": 3.0425980962225956, "learning_rate": 8.292464823633346e-07, "loss": 4.1762, "step": 70640 }, { "epoch": 3.043459533962183, "learning_rate": 8.291980003878557e-07, "loss": 4.1371, "step": 70660 }, { "epoch": 3.04432097170177, "learning_rate": 8.291495184123768e-07, "loss": 4.0444, "step": 70680 }, { "epoch": 3.0451824094413578, "learning_rate": 8.29101036436898e-07, "loss": 4.0261, "step": 70700 }, { "epoch": 3.046043847180945, "learning_rate": 8.290525544614192e-07, "loss": 4.31, "step": 70720 }, { "epoch": 3.0469052849205323, "learning_rate": 8.290040724859403e-07, "loss": 4.0623, "step": 70740 }, { "epoch": 3.04776672266012, "learning_rate": 8.289555905104612e-07, "loss": 4.2966, "step": 70760 }, { "epoch": 3.048628160399707, "learning_rate": 8.289071085349824e-07, "loss": 4.2703, "step": 70780 }, { "epoch": 3.0494895981392944, "learning_rate": 8.288586265595036e-07, "loss": 4.2118, "step": 70800 }, { "epoch": 3.0503510358788817, "learning_rate": 8.288101445840246e-07, "loss": 4.1231, "step": 70820 }, { "epoch": 3.0512124736184694, "learning_rate": 8.287616626085457e-07, "loss": 4.0968, "step": 70840 }, { "epoch": 3.0520739113580566, "learning_rate": 8.287131806330669e-07, "loss": 4.1668, "step": 70860 }, { "epoch": 3.052935349097644, "learning_rate": 8.286646986575879e-07, "loss": 4.217, "step": 70880 }, { "epoch": 3.0537967868372315, "learning_rate": 8.28616216682109e-07, "loss": 4.3483, "step": 70900 }, { "epoch": 3.054658224576819, "learning_rate": 8.285677347066301e-07, "loss": 3.878, "step": 70920 }, { "epoch": 3.055519662316406, "learning_rate": 8.285192527311513e-07, "loss": 4.1455, "step": 70940 }, { "epoch": 3.0563811000559933, "learning_rate": 8.284707707556723e-07, "loss": 4.2554, "step": 70960 }, { "epoch": 3.057242537795581, "learning_rate": 8.284222887801935e-07, "loss": 3.9374, "step": 70980 }, { "epoch": 3.058103975535168, "learning_rate": 8.283738068047146e-07, "loss": 4.2194, "step": 71000 }, { "epoch": 3.0589654132747555, "learning_rate": 8.283253248292357e-07, "loss": 4.1952, "step": 71020 }, { "epoch": 3.059826851014343, "learning_rate": 8.282768428537567e-07, "loss": 4.0305, "step": 71040 }, { "epoch": 3.0606882887539304, "learning_rate": 8.282283608782779e-07, "loss": 4.0013, "step": 71060 }, { "epoch": 3.0615497264935176, "learning_rate": 8.28179878902799e-07, "loss": 4.3159, "step": 71080 }, { "epoch": 3.062411164233105, "learning_rate": 8.281313969273202e-07, "loss": 4.2653, "step": 71100 }, { "epoch": 3.0632726019726926, "learning_rate": 8.280829149518412e-07, "loss": 4.2117, "step": 71120 }, { "epoch": 3.06413403971228, "learning_rate": 8.280344329763623e-07, "loss": 3.9771, "step": 71140 }, { "epoch": 3.064995477451867, "learning_rate": 8.279859510008834e-07, "loss": 4.323, "step": 71160 }, { "epoch": 3.0658569151914543, "learning_rate": 8.279374690254045e-07, "loss": 4.0561, "step": 71180 }, { "epoch": 3.066718352931042, "learning_rate": 8.278889870499256e-07, "loss": 4.0266, "step": 71200 }, { "epoch": 3.0675797906706292, "learning_rate": 8.278405050744467e-07, "loss": 4.2683, "step": 71220 }, { "epoch": 3.0684412284102165, "learning_rate": 8.277920230989679e-07, "loss": 4.1258, "step": 71240 }, { "epoch": 3.069302666149804, "learning_rate": 8.277435411234889e-07, "loss": 4.164, "step": 71260 }, { "epoch": 3.0701641038893914, "learning_rate": 8.2769505914801e-07, "loss": 4.1158, "step": 71280 }, { "epoch": 3.0710255416289787, "learning_rate": 8.276465771725311e-07, "loss": 4.2193, "step": 71300 }, { "epoch": 3.071886979368566, "learning_rate": 8.275980951970523e-07, "loss": 3.9963, "step": 71320 }, { "epoch": 3.0727484171081536, "learning_rate": 8.275496132215734e-07, "loss": 4.2502, "step": 71340 }, { "epoch": 3.073609854847741, "learning_rate": 8.275011312460945e-07, "loss": 4.0821, "step": 71360 }, { "epoch": 3.074471292587328, "learning_rate": 8.274526492706156e-07, "loss": 4.2623, "step": 71380 }, { "epoch": 3.075332730326916, "learning_rate": 8.274041672951367e-07, "loss": 4.1051, "step": 71400 }, { "epoch": 3.076194168066503, "learning_rate": 8.273556853196578e-07, "loss": 4.1241, "step": 71420 }, { "epoch": 3.0770556058060903, "learning_rate": 8.273072033441789e-07, "loss": 4.2241, "step": 71440 }, { "epoch": 3.0779170435456775, "learning_rate": 8.272587213687e-07, "loss": 4.1199, "step": 71460 }, { "epoch": 3.078778481285265, "learning_rate": 8.272102393932211e-07, "loss": 4.0423, "step": 71480 }, { "epoch": 3.0796399190248525, "learning_rate": 8.271617574177422e-07, "loss": 4.0132, "step": 71500 }, { "epoch": 3.0805013567644397, "learning_rate": 8.271132754422633e-07, "loss": 4.1384, "step": 71520 }, { "epoch": 3.0813627945040274, "learning_rate": 8.270647934667844e-07, "loss": 4.0186, "step": 71540 }, { "epoch": 3.0822242322436146, "learning_rate": 8.270163114913055e-07, "loss": 4.2077, "step": 71560 }, { "epoch": 3.083085669983202, "learning_rate": 8.269678295158266e-07, "loss": 4.304, "step": 71580 }, { "epoch": 3.083947107722789, "learning_rate": 8.269193475403478e-07, "loss": 4.227, "step": 71600 }, { "epoch": 3.084808545462377, "learning_rate": 8.268708655648689e-07, "loss": 4.2034, "step": 71620 }, { "epoch": 3.085669983201964, "learning_rate": 8.268223835893899e-07, "loss": 4.1402, "step": 71640 }, { "epoch": 3.0865314209415513, "learning_rate": 8.26773901613911e-07, "loss": 4.2338, "step": 71660 }, { "epoch": 3.087392858681139, "learning_rate": 8.267254196384322e-07, "loss": 4.1142, "step": 71680 }, { "epoch": 3.0882542964207262, "learning_rate": 8.266769376629533e-07, "loss": 4.2268, "step": 71700 }, { "epoch": 3.0891157341603135, "learning_rate": 8.266284556874744e-07, "loss": 4.2228, "step": 71720 }, { "epoch": 3.0899771718999007, "learning_rate": 8.265799737119955e-07, "loss": 4.3599, "step": 71740 }, { "epoch": 3.0908386096394884, "learning_rate": 8.265314917365166e-07, "loss": 4.185, "step": 71760 }, { "epoch": 3.0917000473790757, "learning_rate": 8.264830097610376e-07, "loss": 4.0645, "step": 71780 }, { "epoch": 3.092561485118663, "learning_rate": 8.264345277855588e-07, "loss": 4.1716, "step": 71800 }, { "epoch": 3.0934229228582506, "learning_rate": 8.263860458100799e-07, "loss": 4.3095, "step": 71820 }, { "epoch": 3.094284360597838, "learning_rate": 8.26337563834601e-07, "loss": 4.1156, "step": 71840 }, { "epoch": 3.095145798337425, "learning_rate": 8.262890818591221e-07, "loss": 4.3397, "step": 71860 }, { "epoch": 3.0960072360770123, "learning_rate": 8.262405998836433e-07, "loss": 4.1411, "step": 71880 }, { "epoch": 3.0968686738166, "learning_rate": 8.261921179081642e-07, "loss": 4.4665, "step": 71900 }, { "epoch": 3.0977301115561873, "learning_rate": 8.261436359326854e-07, "loss": 4.3184, "step": 71920 }, { "epoch": 3.0985915492957745, "learning_rate": 8.260951539572065e-07, "loss": 4.1395, "step": 71940 }, { "epoch": 3.099452987035362, "learning_rate": 8.260466719817277e-07, "loss": 4.1781, "step": 71960 }, { "epoch": 3.1003144247749495, "learning_rate": 8.259981900062488e-07, "loss": 4.1504, "step": 71980 }, { "epoch": 3.1011758625145367, "learning_rate": 8.2594970803077e-07, "loss": 4.1366, "step": 72000 }, { "epoch": 3.102037300254124, "learning_rate": 8.259012260552909e-07, "loss": 3.9553, "step": 72020 }, { "epoch": 3.1028987379937116, "learning_rate": 8.25852744079812e-07, "loss": 4.1866, "step": 72040 }, { "epoch": 3.103760175733299, "learning_rate": 8.258042621043332e-07, "loss": 4.118, "step": 72060 }, { "epoch": 3.104621613472886, "learning_rate": 8.257557801288542e-07, "loss": 4.0653, "step": 72080 }, { "epoch": 3.105483051212474, "learning_rate": 8.257072981533754e-07, "loss": 4.1731, "step": 72100 }, { "epoch": 3.106344488952061, "learning_rate": 8.256588161778965e-07, "loss": 4.2102, "step": 72120 }, { "epoch": 3.1072059266916483, "learning_rate": 8.256103342024177e-07, "loss": 4.0365, "step": 72140 }, { "epoch": 3.1080673644312355, "learning_rate": 8.255618522269386e-07, "loss": 4.0914, "step": 72160 }, { "epoch": 3.1089288021708232, "learning_rate": 8.255133702514598e-07, "loss": 4.2207, "step": 72180 }, { "epoch": 3.1097902399104105, "learning_rate": 8.254648882759809e-07, "loss": 4.2689, "step": 72200 }, { "epoch": 3.1106516776499977, "learning_rate": 8.254164063005021e-07, "loss": 4.0893, "step": 72220 }, { "epoch": 3.1115131153895854, "learning_rate": 8.253679243250231e-07, "loss": 4.086, "step": 72240 }, { "epoch": 3.1123745531291727, "learning_rate": 8.253194423495443e-07, "loss": 4.2711, "step": 72260 }, { "epoch": 3.11323599086876, "learning_rate": 8.252709603740653e-07, "loss": 4.1362, "step": 72280 }, { "epoch": 3.114097428608347, "learning_rate": 8.252224783985864e-07, "loss": 4.294, "step": 72300 }, { "epoch": 3.114958866347935, "learning_rate": 8.251739964231075e-07, "loss": 3.9799, "step": 72320 }, { "epoch": 3.115820304087522, "learning_rate": 8.251255144476287e-07, "loss": 4.2485, "step": 72340 }, { "epoch": 3.1166817418271093, "learning_rate": 8.250770324721498e-07, "loss": 3.9589, "step": 72360 }, { "epoch": 3.117543179566697, "learning_rate": 8.250285504966708e-07, "loss": 4.2144, "step": 72380 }, { "epoch": 3.1184046173062843, "learning_rate": 8.24980068521192e-07, "loss": 4.0097, "step": 72400 }, { "epoch": 3.1192660550458715, "learning_rate": 8.24931586545713e-07, "loss": 3.9143, "step": 72420 }, { "epoch": 3.1201274927854588, "learning_rate": 8.248831045702342e-07, "loss": 4.2846, "step": 72440 }, { "epoch": 3.1209889305250464, "learning_rate": 8.248346225947552e-07, "loss": 4.1769, "step": 72460 }, { "epoch": 3.1218503682646337, "learning_rate": 8.247861406192764e-07, "loss": 4.1483, "step": 72480 }, { "epoch": 3.122711806004221, "learning_rate": 8.247376586437976e-07, "loss": 4.0892, "step": 72500 }, { "epoch": 3.1235732437438086, "learning_rate": 8.246891766683188e-07, "loss": 4.2878, "step": 72520 }, { "epoch": 3.124434681483396, "learning_rate": 8.246406946928396e-07, "loss": 4.2783, "step": 72540 }, { "epoch": 3.125296119222983, "learning_rate": 8.245922127173608e-07, "loss": 4.2123, "step": 72560 }, { "epoch": 3.1261575569625704, "learning_rate": 8.245437307418819e-07, "loss": 4.2162, "step": 72580 }, { "epoch": 3.127018994702158, "learning_rate": 8.244952487664031e-07, "loss": 4.0662, "step": 72600 }, { "epoch": 3.1278804324417453, "learning_rate": 8.244467667909241e-07, "loss": 4.1451, "step": 72620 }, { "epoch": 3.1287418701813325, "learning_rate": 8.243982848154453e-07, "loss": 3.9821, "step": 72640 }, { "epoch": 3.1296033079209202, "learning_rate": 8.243498028399663e-07, "loss": 4.0284, "step": 72660 }, { "epoch": 3.1304647456605075, "learning_rate": 8.243013208644875e-07, "loss": 4.1408, "step": 72680 }, { "epoch": 3.1313261834000947, "learning_rate": 8.242528388890085e-07, "loss": 4.0879, "step": 72700 }, { "epoch": 3.132187621139682, "learning_rate": 8.242043569135297e-07, "loss": 4.2258, "step": 72720 }, { "epoch": 3.1330490588792697, "learning_rate": 8.241558749380508e-07, "loss": 4.1793, "step": 72740 }, { "epoch": 3.133910496618857, "learning_rate": 8.24107392962572e-07, "loss": 4.1964, "step": 72760 }, { "epoch": 3.134771934358444, "learning_rate": 8.24058910987093e-07, "loss": 3.9955, "step": 72780 }, { "epoch": 3.135633372098032, "learning_rate": 8.240104290116141e-07, "loss": 4.1838, "step": 72800 }, { "epoch": 3.136494809837619, "learning_rate": 8.239619470361352e-07, "loss": 4.0517, "step": 72820 }, { "epoch": 3.1373562475772063, "learning_rate": 8.239134650606563e-07, "loss": 4.2192, "step": 72840 }, { "epoch": 3.1382176853167936, "learning_rate": 8.238649830851774e-07, "loss": 4.1532, "step": 72860 }, { "epoch": 3.1390791230563813, "learning_rate": 8.238165011096986e-07, "loss": 4.0666, "step": 72880 }, { "epoch": 3.1399405607959685, "learning_rate": 8.237680191342197e-07, "loss": 4.0273, "step": 72900 }, { "epoch": 3.1408019985355558, "learning_rate": 8.237195371587406e-07, "loss": 4.2761, "step": 72920 }, { "epoch": 3.1416634362751434, "learning_rate": 8.236710551832618e-07, "loss": 4.4122, "step": 72940 }, { "epoch": 3.1425248740147307, "learning_rate": 8.23622573207783e-07, "loss": 4.2766, "step": 72960 }, { "epoch": 3.143386311754318, "learning_rate": 8.23574091232304e-07, "loss": 3.9567, "step": 72980 }, { "epoch": 3.144247749493905, "learning_rate": 8.235256092568251e-07, "loss": 4.1298, "step": 73000 }, { "epoch": 3.145109187233493, "learning_rate": 8.234771272813463e-07, "loss": 4.2312, "step": 73020 }, { "epoch": 3.14597062497308, "learning_rate": 8.234286453058674e-07, "loss": 4.339, "step": 73040 }, { "epoch": 3.1468320627126674, "learning_rate": 8.233801633303884e-07, "loss": 4.1877, "step": 73060 }, { "epoch": 3.147693500452255, "learning_rate": 8.233316813549095e-07, "loss": 4.1944, "step": 73080 }, { "epoch": 3.1485549381918423, "learning_rate": 8.232831993794307e-07, "loss": 3.9856, "step": 73100 }, { "epoch": 3.1494163759314295, "learning_rate": 8.232347174039518e-07, "loss": 3.9332, "step": 73120 }, { "epoch": 3.150277813671017, "learning_rate": 8.231862354284729e-07, "loss": 3.9824, "step": 73140 }, { "epoch": 3.1511392514106045, "learning_rate": 8.23137753452994e-07, "loss": 4.0645, "step": 73160 }, { "epoch": 3.1520006891501917, "learning_rate": 8.230892714775151e-07, "loss": 4.3052, "step": 73180 }, { "epoch": 3.152862126889779, "learning_rate": 8.230407895020362e-07, "loss": 4.2081, "step": 73200 }, { "epoch": 3.153723564629366, "learning_rate": 8.229923075265573e-07, "loss": 4.3285, "step": 73220 }, { "epoch": 3.154585002368954, "learning_rate": 8.229438255510784e-07, "loss": 4.1318, "step": 73240 }, { "epoch": 3.155446440108541, "learning_rate": 8.228953435755996e-07, "loss": 4.1297, "step": 73260 }, { "epoch": 3.1563078778481284, "learning_rate": 8.228468616001207e-07, "loss": 4.1799, "step": 73280 }, { "epoch": 3.157169315587716, "learning_rate": 8.227983796246417e-07, "loss": 4.1386, "step": 73300 }, { "epoch": 3.1580307533273033, "learning_rate": 8.227498976491628e-07, "loss": 3.901, "step": 73320 }, { "epoch": 3.1588921910668906, "learning_rate": 8.22701415673684e-07, "loss": 4.0359, "step": 73340 }, { "epoch": 3.159753628806478, "learning_rate": 8.22652933698205e-07, "loss": 4.4535, "step": 73360 }, { "epoch": 3.1606150665460655, "learning_rate": 8.226044517227261e-07, "loss": 4.1214, "step": 73380 }, { "epoch": 3.1614765042856527, "learning_rate": 8.225559697472473e-07, "loss": 4.0811, "step": 73400 }, { "epoch": 3.16233794202524, "learning_rate": 8.225074877717684e-07, "loss": 4.3156, "step": 73420 }, { "epoch": 3.1631993797648277, "learning_rate": 8.224590057962894e-07, "loss": 4.1207, "step": 73440 }, { "epoch": 3.164060817504415, "learning_rate": 8.224105238208105e-07, "loss": 3.9015, "step": 73460 }, { "epoch": 3.164922255244002, "learning_rate": 8.223620418453317e-07, "loss": 4.2393, "step": 73480 }, { "epoch": 3.1657836929835894, "learning_rate": 8.223135598698529e-07, "loss": 4.0271, "step": 73500 }, { "epoch": 3.166645130723177, "learning_rate": 8.222650778943739e-07, "loss": 4.0875, "step": 73520 }, { "epoch": 3.1675065684627643, "learning_rate": 8.22216595918895e-07, "loss": 4.0711, "step": 73540 }, { "epoch": 3.1683680062023516, "learning_rate": 8.221681139434161e-07, "loss": 4.228, "step": 73560 }, { "epoch": 3.1692294439419393, "learning_rate": 8.221196319679373e-07, "loss": 4.3021, "step": 73580 }, { "epoch": 3.1700908816815265, "learning_rate": 8.220711499924583e-07, "loss": 4.0456, "step": 73600 }, { "epoch": 3.1709523194211138, "learning_rate": 8.220226680169794e-07, "loss": 4.1921, "step": 73620 }, { "epoch": 3.171813757160701, "learning_rate": 8.219741860415006e-07, "loss": 4.2232, "step": 73640 }, { "epoch": 3.1726751949002887, "learning_rate": 8.219257040660216e-07, "loss": 4.3585, "step": 73660 }, { "epoch": 3.173536632639876, "learning_rate": 8.218772220905426e-07, "loss": 4.1887, "step": 73680 }, { "epoch": 3.174398070379463, "learning_rate": 8.218287401150638e-07, "loss": 3.9149, "step": 73700 }, { "epoch": 3.175259508119051, "learning_rate": 8.21780258139585e-07, "loss": 4.0955, "step": 73720 }, { "epoch": 3.176120945858638, "learning_rate": 8.21731776164106e-07, "loss": 3.9769, "step": 73740 }, { "epoch": 3.1769823835982254, "learning_rate": 8.216832941886272e-07, "loss": 4.1118, "step": 73760 }, { "epoch": 3.1778438213378126, "learning_rate": 8.216348122131484e-07, "loss": 4.1992, "step": 73780 }, { "epoch": 3.1787052590774003, "learning_rate": 8.215863302376694e-07, "loss": 4.0834, "step": 73800 }, { "epoch": 3.1795666968169876, "learning_rate": 8.215378482621904e-07, "loss": 4.041, "step": 73820 }, { "epoch": 3.180428134556575, "learning_rate": 8.214893662867116e-07, "loss": 4.1296, "step": 73840 }, { "epoch": 3.1812895722961625, "learning_rate": 8.214408843112327e-07, "loss": 4.1386, "step": 73860 }, { "epoch": 3.1821510100357497, "learning_rate": 8.213924023357539e-07, "loss": 4.5436, "step": 73880 }, { "epoch": 3.183012447775337, "learning_rate": 8.213439203602749e-07, "loss": 4.0356, "step": 73900 }, { "epoch": 3.1838738855149242, "learning_rate": 8.21295438384796e-07, "loss": 4.2839, "step": 73920 }, { "epoch": 3.184735323254512, "learning_rate": 8.212469564093171e-07, "loss": 4.2784, "step": 73940 }, { "epoch": 3.185596760994099, "learning_rate": 8.211984744338382e-07, "loss": 4.1857, "step": 73960 }, { "epoch": 3.1864581987336864, "learning_rate": 8.211499924583593e-07, "loss": 4.0885, "step": 73980 }, { "epoch": 3.187319636473274, "learning_rate": 8.211015104828804e-07, "loss": 4.1883, "step": 74000 }, { "epoch": 3.1881810742128613, "learning_rate": 8.210530285074016e-07, "loss": 3.9912, "step": 74020 }, { "epoch": 3.1890425119524486, "learning_rate": 8.210045465319227e-07, "loss": 4.3197, "step": 74040 }, { "epoch": 3.189903949692036, "learning_rate": 8.209560645564437e-07, "loss": 4.183, "step": 74060 }, { "epoch": 3.1907653874316235, "learning_rate": 8.209075825809648e-07, "loss": 4.1738, "step": 74080 }, { "epoch": 3.1916268251712108, "learning_rate": 8.20859100605486e-07, "loss": 4.2608, "step": 74100 }, { "epoch": 3.192488262910798, "learning_rate": 8.208106186300071e-07, "loss": 4.0824, "step": 74120 }, { "epoch": 3.1933497006503853, "learning_rate": 8.207621366545282e-07, "loss": 4.1715, "step": 74140 }, { "epoch": 3.194211138389973, "learning_rate": 8.207136546790493e-07, "loss": 4.0035, "step": 74160 }, { "epoch": 3.19507257612956, "learning_rate": 8.206651727035705e-07, "loss": 4.2476, "step": 74180 }, { "epoch": 3.1959340138691474, "learning_rate": 8.206166907280914e-07, "loss": 4.189, "step": 74200 }, { "epoch": 3.196795451608735, "learning_rate": 8.205682087526126e-07, "loss": 4.1405, "step": 74220 }, { "epoch": 3.1976568893483224, "learning_rate": 8.205197267771337e-07, "loss": 4.1466, "step": 74240 }, { "epoch": 3.1985183270879096, "learning_rate": 8.204712448016549e-07, "loss": 4.0237, "step": 74260 }, { "epoch": 3.199379764827497, "learning_rate": 8.204227628261759e-07, "loss": 4.168, "step": 74280 }, { "epoch": 3.2002412025670846, "learning_rate": 8.203742808506972e-07, "loss": 4.1347, "step": 74300 }, { "epoch": 3.201102640306672, "learning_rate": 8.203257988752181e-07, "loss": 3.9789, "step": 74320 }, { "epoch": 3.201964078046259, "learning_rate": 8.202773168997392e-07, "loss": 3.9598, "step": 74340 }, { "epoch": 3.2028255157858467, "learning_rate": 8.202288349242603e-07, "loss": 3.911, "step": 74360 }, { "epoch": 3.203686953525434, "learning_rate": 8.201803529487815e-07, "loss": 4.0397, "step": 74380 }, { "epoch": 3.2045483912650212, "learning_rate": 8.201318709733026e-07, "loss": 4.0569, "step": 74400 }, { "epoch": 3.2054098290046085, "learning_rate": 8.200833889978237e-07, "loss": 4.0304, "step": 74420 }, { "epoch": 3.206271266744196, "learning_rate": 8.200349070223447e-07, "loss": 4.16, "step": 74440 }, { "epoch": 3.2071327044837834, "learning_rate": 8.199864250468658e-07, "loss": 4.3619, "step": 74460 }, { "epoch": 3.2079941422233706, "learning_rate": 8.19937943071387e-07, "loss": 4.2681, "step": 74480 }, { "epoch": 3.2088555799629583, "learning_rate": 8.198894610959081e-07, "loss": 4.1484, "step": 74500 }, { "epoch": 3.2097170177025456, "learning_rate": 8.198409791204292e-07, "loss": 4.0033, "step": 74520 }, { "epoch": 3.210578455442133, "learning_rate": 8.197924971449503e-07, "loss": 4.0437, "step": 74540 }, { "epoch": 3.21143989318172, "learning_rate": 8.197440151694715e-07, "loss": 4.1846, "step": 74560 }, { "epoch": 3.2123013309213078, "learning_rate": 8.196955331939925e-07, "loss": 4.3057, "step": 74580 }, { "epoch": 3.213162768660895, "learning_rate": 8.196470512185136e-07, "loss": 4.0408, "step": 74600 }, { "epoch": 3.2140242064004823, "learning_rate": 8.195985692430348e-07, "loss": 4.1349, "step": 74620 }, { "epoch": 3.21488564414007, "learning_rate": 8.195500872675558e-07, "loss": 4.0433, "step": 74640 }, { "epoch": 3.215747081879657, "learning_rate": 8.19501605292077e-07, "loss": 4.12, "step": 74660 }, { "epoch": 3.2166085196192444, "learning_rate": 8.194531233165981e-07, "loss": 4.1219, "step": 74680 }, { "epoch": 3.2174699573588317, "learning_rate": 8.194046413411191e-07, "loss": 4.2315, "step": 74700 }, { "epoch": 3.2183313950984194, "learning_rate": 8.193561593656402e-07, "loss": 4.139, "step": 74720 }, { "epoch": 3.2191928328380066, "learning_rate": 8.193076773901613e-07, "loss": 4.004, "step": 74740 }, { "epoch": 3.220054270577594, "learning_rate": 8.192591954146825e-07, "loss": 4.3765, "step": 74760 }, { "epoch": 3.2209157083171815, "learning_rate": 8.192107134392036e-07, "loss": 4.3537, "step": 74780 }, { "epoch": 3.221777146056769, "learning_rate": 8.191622314637247e-07, "loss": 4.1181, "step": 74800 }, { "epoch": 3.222638583796356, "learning_rate": 8.191137494882457e-07, "loss": 4.3242, "step": 74820 }, { "epoch": 3.2235000215359433, "learning_rate": 8.190652675127669e-07, "loss": 4.1551, "step": 74840 }, { "epoch": 3.224361459275531, "learning_rate": 8.19016785537288e-07, "loss": 4.2335, "step": 74860 }, { "epoch": 3.225222897015118, "learning_rate": 8.189683035618091e-07, "loss": 4.1713, "step": 74880 }, { "epoch": 3.2260843347547055, "learning_rate": 8.189198215863302e-07, "loss": 4.1811, "step": 74900 }, { "epoch": 3.226945772494293, "learning_rate": 8.188713396108514e-07, "loss": 4.0747, "step": 74920 }, { "epoch": 3.2278072102338804, "learning_rate": 8.188228576353724e-07, "loss": 4.1766, "step": 74940 }, { "epoch": 3.2286686479734676, "learning_rate": 8.187743756598935e-07, "loss": 4.0803, "step": 74960 }, { "epoch": 3.229530085713055, "learning_rate": 8.187258936844146e-07, "loss": 4.2386, "step": 74980 }, { "epoch": 3.2303915234526426, "learning_rate": 8.186774117089357e-07, "loss": 4.05, "step": 75000 }, { "epoch": 3.23125296119223, "learning_rate": 8.186289297334568e-07, "loss": 4.1587, "step": 75020 }, { "epoch": 3.232114398931817, "learning_rate": 8.18580447757978e-07, "loss": 4.2797, "step": 75040 }, { "epoch": 3.2329758366714048, "learning_rate": 8.185319657824991e-07, "loss": 4.1528, "step": 75060 }, { "epoch": 3.233837274410992, "learning_rate": 8.184834838070201e-07, "loss": 4.2551, "step": 75080 }, { "epoch": 3.2346987121505792, "learning_rate": 8.184350018315412e-07, "loss": 4.0345, "step": 75100 }, { "epoch": 3.2355601498901665, "learning_rate": 8.183865198560624e-07, "loss": 4.0762, "step": 75120 }, { "epoch": 3.236421587629754, "learning_rate": 8.183380378805835e-07, "loss": 4.0778, "step": 75140 }, { "epoch": 3.2372830253693414, "learning_rate": 8.182895559051046e-07, "loss": 4.2175, "step": 75160 }, { "epoch": 3.2381444631089287, "learning_rate": 8.182410739296257e-07, "loss": 4.0608, "step": 75180 }, { "epoch": 3.2390059008485164, "learning_rate": 8.181925919541468e-07, "loss": 3.9309, "step": 75200 }, { "epoch": 3.2398673385881036, "learning_rate": 8.181441099786679e-07, "loss": 4.2056, "step": 75220 }, { "epoch": 3.240728776327691, "learning_rate": 8.180956280031889e-07, "loss": 4.0909, "step": 75240 }, { "epoch": 3.241590214067278, "learning_rate": 8.180471460277101e-07, "loss": 4.293, "step": 75260 }, { "epoch": 3.242451651806866, "learning_rate": 8.179986640522312e-07, "loss": 3.9907, "step": 75280 }, { "epoch": 3.243313089546453, "learning_rate": 8.179501820767524e-07, "loss": 4.128, "step": 75300 }, { "epoch": 3.2441745272860403, "learning_rate": 8.179017001012734e-07, "loss": 4.2091, "step": 75320 }, { "epoch": 3.245035965025628, "learning_rate": 8.178532181257945e-07, "loss": 4.3179, "step": 75340 }, { "epoch": 3.245897402765215, "learning_rate": 8.178047361503156e-07, "loss": 3.9396, "step": 75360 }, { "epoch": 3.2467588405048025, "learning_rate": 8.177562541748368e-07, "loss": 4.4707, "step": 75380 }, { "epoch": 3.2476202782443897, "learning_rate": 8.177077721993578e-07, "loss": 4.0212, "step": 75400 }, { "epoch": 3.2484817159839774, "learning_rate": 8.17659290223879e-07, "loss": 4.2023, "step": 75420 }, { "epoch": 3.2493431537235646, "learning_rate": 8.176108082484001e-07, "loss": 4.2866, "step": 75440 }, { "epoch": 3.250204591463152, "learning_rate": 8.17562326272921e-07, "loss": 4.0377, "step": 75460 }, { "epoch": 3.2510660292027396, "learning_rate": 8.175138442974422e-07, "loss": 4.1171, "step": 75480 }, { "epoch": 3.251927466942327, "learning_rate": 8.174653623219634e-07, "loss": 4.1174, "step": 75500 }, { "epoch": 3.252788904681914, "learning_rate": 8.174168803464845e-07, "loss": 3.9708, "step": 75520 }, { "epoch": 3.2536503424215013, "learning_rate": 8.173683983710055e-07, "loss": 4.0468, "step": 75540 }, { "epoch": 3.254511780161089, "learning_rate": 8.173199163955268e-07, "loss": 4.3976, "step": 75560 }, { "epoch": 3.2553732179006762, "learning_rate": 8.172714344200478e-07, "loss": 4.2645, "step": 75580 }, { "epoch": 3.2562346556402635, "learning_rate": 8.172229524445689e-07, "loss": 4.3369, "step": 75600 }, { "epoch": 3.257096093379851, "learning_rate": 8.171744704690899e-07, "loss": 4.2552, "step": 75620 }, { "epoch": 3.2579575311194384, "learning_rate": 8.171259884936111e-07, "loss": 4.0754, "step": 75640 }, { "epoch": 3.2588189688590257, "learning_rate": 8.170775065181323e-07, "loss": 4.2354, "step": 75660 }, { "epoch": 3.259680406598613, "learning_rate": 8.170290245426534e-07, "loss": 3.9267, "step": 75680 }, { "epoch": 3.2605418443382006, "learning_rate": 8.169805425671744e-07, "loss": 4.1187, "step": 75700 }, { "epoch": 3.261403282077788, "learning_rate": 8.169320605916955e-07, "loss": 4.119, "step": 75720 }, { "epoch": 3.262264719817375, "learning_rate": 8.168835786162167e-07, "loss": 3.9992, "step": 75740 }, { "epoch": 3.263126157556963, "learning_rate": 8.168350966407378e-07, "loss": 4.1048, "step": 75760 }, { "epoch": 3.26398759529655, "learning_rate": 8.167866146652588e-07, "loss": 4.1138, "step": 75780 }, { "epoch": 3.2648490330361373, "learning_rate": 8.1673813268978e-07, "loss": 4.3028, "step": 75800 }, { "epoch": 3.2657104707757245, "learning_rate": 8.166896507143011e-07, "loss": 4.0169, "step": 75820 }, { "epoch": 3.266571908515312, "learning_rate": 8.166411687388221e-07, "loss": 4.1142, "step": 75840 }, { "epoch": 3.2674333462548995, "learning_rate": 8.165926867633432e-07, "loss": 4.0902, "step": 75860 }, { "epoch": 3.2682947839944867, "learning_rate": 8.165442047878644e-07, "loss": 4.2813, "step": 75880 }, { "epoch": 3.2691562217340744, "learning_rate": 8.164957228123855e-07, "loss": 4.2515, "step": 75900 }, { "epoch": 3.2700176594736616, "learning_rate": 8.164472408369066e-07, "loss": 4.1882, "step": 75920 }, { "epoch": 3.270879097213249, "learning_rate": 8.163987588614277e-07, "loss": 4.1375, "step": 75940 }, { "epoch": 3.271740534952836, "learning_rate": 8.163502768859489e-07, "loss": 4.1215, "step": 75960 }, { "epoch": 3.272601972692424, "learning_rate": 8.163017949104699e-07, "loss": 4.1708, "step": 75980 }, { "epoch": 3.273463410432011, "learning_rate": 8.16253312934991e-07, "loss": 4.1516, "step": 76000 }, { "epoch": 3.2743248481715983, "learning_rate": 8.162048309595121e-07, "loss": 3.9983, "step": 76020 }, { "epoch": 3.275186285911186, "learning_rate": 8.161563489840333e-07, "loss": 4.249, "step": 76040 }, { "epoch": 3.2760477236507732, "learning_rate": 8.161078670085544e-07, "loss": 3.9864, "step": 76060 }, { "epoch": 3.2769091613903605, "learning_rate": 8.160593850330754e-07, "loss": 4.0451, "step": 76080 }, { "epoch": 3.2777705991299477, "learning_rate": 8.160109030575965e-07, "loss": 4.2359, "step": 76100 }, { "epoch": 3.2786320368695354, "learning_rate": 8.159624210821177e-07, "loss": 4.2358, "step": 76120 }, { "epoch": 3.2794934746091227, "learning_rate": 8.159139391066387e-07, "loss": 4.3616, "step": 76140 }, { "epoch": 3.28035491234871, "learning_rate": 8.158654571311598e-07, "loss": 4.0616, "step": 76160 }, { "epoch": 3.2812163500882976, "learning_rate": 8.15816975155681e-07, "loss": 3.9489, "step": 76180 }, { "epoch": 3.282077787827885, "learning_rate": 8.157684931802022e-07, "loss": 4.3266, "step": 76200 }, { "epoch": 3.282939225567472, "learning_rate": 8.157200112047231e-07, "loss": 4.308, "step": 76220 }, { "epoch": 3.2838006633070593, "learning_rate": 8.156715292292442e-07, "loss": 3.9456, "step": 76240 }, { "epoch": 3.284662101046647, "learning_rate": 8.156230472537654e-07, "loss": 3.9706, "step": 76260 }, { "epoch": 3.2855235387862343, "learning_rate": 8.155745652782866e-07, "loss": 4.3075, "step": 76280 }, { "epoch": 3.2863849765258215, "learning_rate": 8.155260833028076e-07, "loss": 4.0391, "step": 76300 }, { "epoch": 3.287246414265409, "learning_rate": 8.154776013273287e-07, "loss": 4.1471, "step": 76320 }, { "epoch": 3.2881078520049964, "learning_rate": 8.154291193518499e-07, "loss": 4.1654, "step": 76340 }, { "epoch": 3.2889692897445837, "learning_rate": 8.153806373763709e-07, "loss": 4.0594, "step": 76360 }, { "epoch": 3.289830727484171, "learning_rate": 8.15332155400892e-07, "loss": 4.0196, "step": 76380 }, { "epoch": 3.2906921652237586, "learning_rate": 8.152836734254132e-07, "loss": 3.8641, "step": 76400 }, { "epoch": 3.291553602963346, "learning_rate": 8.152351914499343e-07, "loss": 4.3033, "step": 76420 }, { "epoch": 3.292415040702933, "learning_rate": 8.151867094744553e-07, "loss": 4.0645, "step": 76440 }, { "epoch": 3.293276478442521, "learning_rate": 8.151382274989765e-07, "loss": 3.9754, "step": 76460 }, { "epoch": 3.294137916182108, "learning_rate": 8.150897455234975e-07, "loss": 4.2018, "step": 76480 }, { "epoch": 3.2949993539216953, "learning_rate": 8.150412635480187e-07, "loss": 4.0713, "step": 76500 }, { "epoch": 3.2958607916612825, "learning_rate": 8.149927815725397e-07, "loss": 4.0749, "step": 76520 }, { "epoch": 3.2967222294008702, "learning_rate": 8.149442995970609e-07, "loss": 4.0327, "step": 76540 }, { "epoch": 3.2975836671404575, "learning_rate": 8.14895817621582e-07, "loss": 4.3854, "step": 76560 }, { "epoch": 3.2984451048800447, "learning_rate": 8.148473356461032e-07, "loss": 4.0602, "step": 76580 }, { "epoch": 3.2993065426196324, "learning_rate": 8.147988536706242e-07, "loss": 4.0853, "step": 76600 }, { "epoch": 3.3001679803592197, "learning_rate": 8.147503716951452e-07, "loss": 4.1677, "step": 76620 }, { "epoch": 3.301029418098807, "learning_rate": 8.147018897196664e-07, "loss": 4.0639, "step": 76640 }, { "epoch": 3.301890855838394, "learning_rate": 8.146534077441876e-07, "loss": 4.2143, "step": 76660 }, { "epoch": 3.302752293577982, "learning_rate": 8.146049257687086e-07, "loss": 4.1781, "step": 76680 }, { "epoch": 3.303613731317569, "learning_rate": 8.145564437932297e-07, "loss": 4.1863, "step": 76700 }, { "epoch": 3.3044751690571563, "learning_rate": 8.145079618177509e-07, "loss": 3.9404, "step": 76720 }, { "epoch": 3.305336606796744, "learning_rate": 8.14459479842272e-07, "loss": 4.2133, "step": 76740 }, { "epoch": 3.3061980445363313, "learning_rate": 8.14410997866793e-07, "loss": 4.0929, "step": 76760 }, { "epoch": 3.3070594822759185, "learning_rate": 8.143625158913141e-07, "loss": 4.0367, "step": 76780 }, { "epoch": 3.3079209200155058, "learning_rate": 8.143140339158353e-07, "loss": 4.2925, "step": 76800 }, { "epoch": 3.3087823577550934, "learning_rate": 8.142655519403564e-07, "loss": 3.9315, "step": 76820 }, { "epoch": 3.3096437954946807, "learning_rate": 8.142170699648775e-07, "loss": 4.1322, "step": 76840 }, { "epoch": 3.310505233234268, "learning_rate": 8.141685879893985e-07, "loss": 4.1594, "step": 76860 }, { "epoch": 3.311366670973855, "learning_rate": 8.141201060139197e-07, "loss": 4.1882, "step": 76880 }, { "epoch": 3.312228108713443, "learning_rate": 8.140716240384407e-07, "loss": 4.0579, "step": 76900 }, { "epoch": 3.31308954645303, "learning_rate": 8.140231420629619e-07, "loss": 4.0531, "step": 76920 }, { "epoch": 3.3139509841926174, "learning_rate": 8.13974660087483e-07, "loss": 4.188, "step": 76940 }, { "epoch": 3.3148124219322046, "learning_rate": 8.139261781120042e-07, "loss": 4.1626, "step": 76960 }, { "epoch": 3.3156738596717923, "learning_rate": 8.138776961365252e-07, "loss": 3.9643, "step": 76980 }, { "epoch": 3.3165352974113795, "learning_rate": 8.138292141610463e-07, "loss": 4.1296, "step": 77000 }, { "epoch": 3.317396735150967, "learning_rate": 8.137807321855674e-07, "loss": 4.1892, "step": 77020 }, { "epoch": 3.3182581728905545, "learning_rate": 8.137322502100886e-07, "loss": 4.1864, "step": 77040 }, { "epoch": 3.3191196106301417, "learning_rate": 8.136837682346096e-07, "loss": 4.311, "step": 77060 }, { "epoch": 3.319981048369729, "learning_rate": 8.136352862591308e-07, "loss": 4.1749, "step": 77080 }, { "epoch": 3.320842486109316, "learning_rate": 8.135868042836519e-07, "loss": 3.9428, "step": 77100 }, { "epoch": 3.321703923848904, "learning_rate": 8.135383223081729e-07, "loss": 4.302, "step": 77120 }, { "epoch": 3.322565361588491, "learning_rate": 8.13489840332694e-07, "loss": 4.0964, "step": 77140 }, { "epoch": 3.3234267993280784, "learning_rate": 8.134413583572151e-07, "loss": 4.1406, "step": 77160 }, { "epoch": 3.324288237067666, "learning_rate": 8.133928763817363e-07, "loss": 4.2126, "step": 77180 }, { "epoch": 3.3251496748072533, "learning_rate": 8.133443944062574e-07, "loss": 4.0669, "step": 77200 }, { "epoch": 3.3260111125468406, "learning_rate": 8.132959124307785e-07, "loss": 4.2019, "step": 77220 }, { "epoch": 3.326872550286428, "learning_rate": 8.132474304552995e-07, "loss": 4.1299, "step": 77240 }, { "epoch": 3.3277339880260155, "learning_rate": 8.131989484798207e-07, "loss": 4.1683, "step": 77260 }, { "epoch": 3.3285954257656027, "learning_rate": 8.131504665043418e-07, "loss": 3.9748, "step": 77280 }, { "epoch": 3.32945686350519, "learning_rate": 8.131019845288629e-07, "loss": 4.1484, "step": 77300 }, { "epoch": 3.3303183012447777, "learning_rate": 8.13053502553384e-07, "loss": 4.0326, "step": 77320 }, { "epoch": 3.331179738984365, "learning_rate": 8.130050205779053e-07, "loss": 3.9336, "step": 77340 }, { "epoch": 3.332041176723952, "learning_rate": 8.129565386024262e-07, "loss": 4.1124, "step": 77360 }, { "epoch": 3.3329026144635394, "learning_rate": 8.129080566269473e-07, "loss": 4.0678, "step": 77380 }, { "epoch": 3.333764052203127, "learning_rate": 8.128595746514684e-07, "loss": 4.011, "step": 77400 }, { "epoch": 3.3346254899427143, "learning_rate": 8.128110926759895e-07, "loss": 4.171, "step": 77420 }, { "epoch": 3.3354869276823016, "learning_rate": 8.127626107005106e-07, "loss": 4.1553, "step": 77440 }, { "epoch": 3.3363483654218893, "learning_rate": 8.127141287250318e-07, "loss": 4.1368, "step": 77460 }, { "epoch": 3.3372098031614765, "learning_rate": 8.126656467495529e-07, "loss": 3.9005, "step": 77480 }, { "epoch": 3.3380712409010638, "learning_rate": 8.126171647740739e-07, "loss": 4.0312, "step": 77500 }, { "epoch": 3.338932678640651, "learning_rate": 8.12568682798595e-07, "loss": 4.1016, "step": 77520 }, { "epoch": 3.3397941163802387, "learning_rate": 8.125202008231162e-07, "loss": 4.2076, "step": 77540 }, { "epoch": 3.340655554119826, "learning_rate": 8.124717188476373e-07, "loss": 4.1423, "step": 77560 }, { "epoch": 3.341516991859413, "learning_rate": 8.124232368721584e-07, "loss": 4.0852, "step": 77580 }, { "epoch": 3.342378429599001, "learning_rate": 8.123747548966795e-07, "loss": 4.0709, "step": 77600 }, { "epoch": 3.343239867338588, "learning_rate": 8.123262729212006e-07, "loss": 4.0597, "step": 77620 }, { "epoch": 3.3441013050781754, "learning_rate": 8.122777909457217e-07, "loss": 4.1672, "step": 77640 }, { "epoch": 3.3449627428177626, "learning_rate": 8.122293089702428e-07, "loss": 4.0025, "step": 77660 }, { "epoch": 3.3458241805573503, "learning_rate": 8.121808269947639e-07, "loss": 4.0862, "step": 77680 }, { "epoch": 3.3466856182969376, "learning_rate": 8.12132345019285e-07, "loss": 4.168, "step": 77700 }, { "epoch": 3.347547056036525, "learning_rate": 8.120838630438061e-07, "loss": 4.3096, "step": 77720 }, { "epoch": 3.3484084937761125, "learning_rate": 8.120353810683273e-07, "loss": 4.2381, "step": 77740 }, { "epoch": 3.3492699315156997, "learning_rate": 8.119868990928483e-07, "loss": 3.9373, "step": 77760 }, { "epoch": 3.350131369255287, "learning_rate": 8.119384171173694e-07, "loss": 4.136, "step": 77780 }, { "epoch": 3.3509928069948742, "learning_rate": 8.118899351418905e-07, "loss": 4.0034, "step": 77800 }, { "epoch": 3.351854244734462, "learning_rate": 8.118414531664117e-07, "loss": 4.0162, "step": 77820 }, { "epoch": 3.352715682474049, "learning_rate": 8.117929711909328e-07, "loss": 4.0684, "step": 77840 }, { "epoch": 3.3535771202136364, "learning_rate": 8.117444892154539e-07, "loss": 4.2784, "step": 77860 }, { "epoch": 3.354438557953224, "learning_rate": 8.116960072399749e-07, "loss": 4.1786, "step": 77880 }, { "epoch": 3.3552999956928113, "learning_rate": 8.116475252644961e-07, "loss": 4.1515, "step": 77900 }, { "epoch": 3.3561614334323986, "learning_rate": 8.115990432890172e-07, "loss": 4.2021, "step": 77920 }, { "epoch": 3.357022871171986, "learning_rate": 8.115505613135383e-07, "loss": 4.1623, "step": 77940 }, { "epoch": 3.3578843089115735, "learning_rate": 8.115020793380594e-07, "loss": 4.0848, "step": 77960 }, { "epoch": 3.3587457466511608, "learning_rate": 8.114535973625805e-07, "loss": 4.214, "step": 77980 }, { "epoch": 3.359607184390748, "learning_rate": 8.114051153871016e-07, "loss": 4.2203, "step": 78000 }, { "epoch": 3.3604686221303357, "learning_rate": 8.113566334116226e-07, "loss": 4.0898, "step": 78020 }, { "epoch": 3.361330059869923, "learning_rate": 8.113081514361438e-07, "loss": 4.2142, "step": 78040 }, { "epoch": 3.36219149760951, "learning_rate": 8.112596694606649e-07, "loss": 4.0743, "step": 78060 }, { "epoch": 3.3630529353490974, "learning_rate": 8.112111874851861e-07, "loss": 3.9457, "step": 78080 }, { "epoch": 3.363914373088685, "learning_rate": 8.111627055097071e-07, "loss": 4.1495, "step": 78100 }, { "epoch": 3.3647758108282724, "learning_rate": 8.111142235342283e-07, "loss": 4.2278, "step": 78120 }, { "epoch": 3.3656372485678596, "learning_rate": 8.110657415587493e-07, "loss": 4.1487, "step": 78140 }, { "epoch": 3.3664986863074473, "learning_rate": 8.110172595832705e-07, "loss": 3.9095, "step": 78160 }, { "epoch": 3.3673601240470346, "learning_rate": 8.109687776077916e-07, "loss": 4.0588, "step": 78180 }, { "epoch": 3.368221561786622, "learning_rate": 8.109202956323127e-07, "loss": 4.28, "step": 78200 }, { "epoch": 3.369082999526209, "learning_rate": 8.108718136568338e-07, "loss": 4.0036, "step": 78220 }, { "epoch": 3.3699444372657967, "learning_rate": 8.108233316813549e-07, "loss": 4.1697, "step": 78240 }, { "epoch": 3.370805875005384, "learning_rate": 8.107748497058759e-07, "loss": 4.0887, "step": 78260 }, { "epoch": 3.3716673127449712, "learning_rate": 8.107263677303971e-07, "loss": 4.2456, "step": 78280 }, { "epoch": 3.372528750484559, "learning_rate": 8.106778857549182e-07, "loss": 4.1127, "step": 78300 }, { "epoch": 3.373390188224146, "learning_rate": 8.106294037794392e-07, "loss": 4.0247, "step": 78320 }, { "epoch": 3.3742516259637334, "learning_rate": 8.105809218039604e-07, "loss": 4.2261, "step": 78340 }, { "epoch": 3.3751130637033206, "learning_rate": 8.105324398284816e-07, "loss": 4.2271, "step": 78360 }, { "epoch": 3.3759745014429083, "learning_rate": 8.104839578530027e-07, "loss": 4.0098, "step": 78380 }, { "epoch": 3.3768359391824956, "learning_rate": 8.104354758775236e-07, "loss": 4.2418, "step": 78400 }, { "epoch": 3.377697376922083, "learning_rate": 8.103869939020448e-07, "loss": 4.0991, "step": 78420 }, { "epoch": 3.3785588146616705, "learning_rate": 8.10338511926566e-07, "loss": 4.18, "step": 78440 }, { "epoch": 3.3794202524012578, "learning_rate": 8.102900299510871e-07, "loss": 4.0663, "step": 78460 }, { "epoch": 3.380281690140845, "learning_rate": 8.102415479756081e-07, "loss": 4.3074, "step": 78480 }, { "epoch": 3.3811431278804323, "learning_rate": 8.101930660001293e-07, "loss": 3.9928, "step": 78500 }, { "epoch": 3.38200456562002, "learning_rate": 8.101445840246503e-07, "loss": 4.0386, "step": 78520 }, { "epoch": 3.382866003359607, "learning_rate": 8.100961020491715e-07, "loss": 4.1422, "step": 78540 }, { "epoch": 3.3837274410991944, "learning_rate": 8.100476200736925e-07, "loss": 4.1758, "step": 78560 }, { "epoch": 3.384588878838782, "learning_rate": 8.099991380982137e-07, "loss": 4.0686, "step": 78580 }, { "epoch": 3.3854503165783694, "learning_rate": 8.099506561227349e-07, "loss": 4.2067, "step": 78600 }, { "epoch": 3.3863117543179566, "learning_rate": 8.09902174147256e-07, "loss": 4.0233, "step": 78620 }, { "epoch": 3.387173192057544, "learning_rate": 8.098536921717769e-07, "loss": 4.0246, "step": 78640 }, { "epoch": 3.3880346297971315, "learning_rate": 8.098052101962981e-07, "loss": 4.0137, "step": 78660 }, { "epoch": 3.388896067536719, "learning_rate": 8.097567282208192e-07, "loss": 4.1523, "step": 78680 }, { "epoch": 3.389757505276306, "learning_rate": 8.097082462453403e-07, "loss": 4.0293, "step": 78700 }, { "epoch": 3.3906189430158937, "learning_rate": 8.096597642698614e-07, "loss": 4.1168, "step": 78720 }, { "epoch": 3.391480380755481, "learning_rate": 8.096112822943826e-07, "loss": 4.0346, "step": 78740 }, { "epoch": 3.392341818495068, "learning_rate": 8.095628003189037e-07, "loss": 4.0009, "step": 78760 }, { "epoch": 3.3932032562346555, "learning_rate": 8.095143183434246e-07, "loss": 4.0161, "step": 78780 }, { "epoch": 3.394064693974243, "learning_rate": 8.094658363679458e-07, "loss": 4.1611, "step": 78800 }, { "epoch": 3.3949261317138304, "learning_rate": 8.09417354392467e-07, "loss": 4.1065, "step": 78820 }, { "epoch": 3.3957875694534176, "learning_rate": 8.093688724169881e-07, "loss": 4.0982, "step": 78840 }, { "epoch": 3.3966490071930053, "learning_rate": 8.093203904415091e-07, "loss": 4.2407, "step": 78860 }, { "epoch": 3.3975104449325926, "learning_rate": 8.092719084660303e-07, "loss": 4.1065, "step": 78880 }, { "epoch": 3.39837188267218, "learning_rate": 8.092234264905514e-07, "loss": 4.0542, "step": 78900 }, { "epoch": 3.399233320411767, "learning_rate": 8.091749445150725e-07, "loss": 3.9918, "step": 78920 }, { "epoch": 3.4000947581513548, "learning_rate": 8.091264625395935e-07, "loss": 4.2903, "step": 78940 }, { "epoch": 3.400956195890942, "learning_rate": 8.090779805641147e-07, "loss": 4.0658, "step": 78960 }, { "epoch": 3.4018176336305292, "learning_rate": 8.090294985886359e-07, "loss": 4.1685, "step": 78980 }, { "epoch": 3.402679071370117, "learning_rate": 8.08981016613157e-07, "loss": 4.1341, "step": 79000 }, { "epoch": 3.403540509109704, "learning_rate": 8.089325346376779e-07, "loss": 3.9424, "step": 79020 }, { "epoch": 3.4044019468492914, "learning_rate": 8.088840526621991e-07, "loss": 4.1204, "step": 79040 }, { "epoch": 3.4052633845888787, "learning_rate": 8.088355706867202e-07, "loss": 4.1246, "step": 79060 }, { "epoch": 3.4061248223284664, "learning_rate": 8.087870887112413e-07, "loss": 4.1989, "step": 79080 }, { "epoch": 3.4069862600680536, "learning_rate": 8.087386067357624e-07, "loss": 4.139, "step": 79100 }, { "epoch": 3.407847697807641, "learning_rate": 8.086901247602837e-07, "loss": 4.0823, "step": 79120 }, { "epoch": 3.4087091355472285, "learning_rate": 8.086416427848047e-07, "loss": 4.0169, "step": 79140 }, { "epoch": 3.409570573286816, "learning_rate": 8.085931608093257e-07, "loss": 4.0076, "step": 79160 }, { "epoch": 3.410432011026403, "learning_rate": 8.085446788338468e-07, "loss": 4.1152, "step": 79180 }, { "epoch": 3.4112934487659903, "learning_rate": 8.08496196858368e-07, "loss": 4.0021, "step": 79200 }, { "epoch": 3.412154886505578, "learning_rate": 8.08447714882889e-07, "loss": 4.0888, "step": 79220 }, { "epoch": 3.413016324245165, "learning_rate": 8.083992329074102e-07, "loss": 4.0184, "step": 79240 }, { "epoch": 3.4138777619847525, "learning_rate": 8.083507509319313e-07, "loss": 4.0808, "step": 79260 }, { "epoch": 3.41473919972434, "learning_rate": 8.083022689564524e-07, "loss": 4.2175, "step": 79280 }, { "epoch": 3.4156006374639274, "learning_rate": 8.082537869809734e-07, "loss": 4.1338, "step": 79300 }, { "epoch": 3.4164620752035146, "learning_rate": 8.082053050054945e-07, "loss": 4.032, "step": 79320 }, { "epoch": 3.417323512943102, "learning_rate": 8.081568230300157e-07, "loss": 4.4536, "step": 79340 }, { "epoch": 3.4181849506826896, "learning_rate": 8.081083410545369e-07, "loss": 4.1274, "step": 79360 }, { "epoch": 3.419046388422277, "learning_rate": 8.080598590790579e-07, "loss": 4.0179, "step": 79380 }, { "epoch": 3.419907826161864, "learning_rate": 8.08011377103579e-07, "loss": 4.0286, "step": 79400 }, { "epoch": 3.4207692639014518, "learning_rate": 8.079628951281001e-07, "loss": 4.1304, "step": 79420 }, { "epoch": 3.421630701641039, "learning_rate": 8.079144131526213e-07, "loss": 3.9421, "step": 79440 }, { "epoch": 3.4224921393806262, "learning_rate": 8.078659311771423e-07, "loss": 4.1418, "step": 79460 }, { "epoch": 3.4233535771202135, "learning_rate": 8.078174492016634e-07, "loss": 4.3102, "step": 79480 }, { "epoch": 3.424215014859801, "learning_rate": 8.077689672261846e-07, "loss": 3.9138, "step": 79500 }, { "epoch": 3.4250764525993884, "learning_rate": 8.077204852507058e-07, "loss": 3.9956, "step": 79520 }, { "epoch": 3.4259378903389757, "learning_rate": 8.076720032752267e-07, "loss": 4.123, "step": 79540 }, { "epoch": 3.4267993280785634, "learning_rate": 8.076235212997478e-07, "loss": 4.0827, "step": 79560 }, { "epoch": 3.4276607658181506, "learning_rate": 8.07575039324269e-07, "loss": 3.9543, "step": 79580 }, { "epoch": 3.428522203557738, "learning_rate": 8.0752655734879e-07, "loss": 4.219, "step": 79600 }, { "epoch": 3.429383641297325, "learning_rate": 8.074780753733112e-07, "loss": 4.2555, "step": 79620 }, { "epoch": 3.430245079036913, "learning_rate": 8.074295933978323e-07, "loss": 4.0652, "step": 79640 }, { "epoch": 3.4311065167765, "learning_rate": 8.073811114223534e-07, "loss": 4.0591, "step": 79660 }, { "epoch": 3.4319679545160873, "learning_rate": 8.073326294468744e-07, "loss": 4.1497, "step": 79680 }, { "epoch": 3.432829392255675, "learning_rate": 8.072841474713956e-07, "loss": 3.9374, "step": 79700 }, { "epoch": 3.433690829995262, "learning_rate": 8.072356654959167e-07, "loss": 4.2275, "step": 79720 }, { "epoch": 3.4345522677348495, "learning_rate": 8.071871835204379e-07, "loss": 4.0602, "step": 79740 }, { "epoch": 3.4354137054744367, "learning_rate": 8.071387015449589e-07, "loss": 4.0096, "step": 79760 }, { "epoch": 3.4362751432140244, "learning_rate": 8.0709021956948e-07, "loss": 4.1275, "step": 79780 }, { "epoch": 3.4371365809536116, "learning_rate": 8.070417375940011e-07, "loss": 4.0328, "step": 79800 }, { "epoch": 3.437998018693199, "learning_rate": 8.069932556185223e-07, "loss": 4.162, "step": 79820 }, { "epoch": 3.4388594564327866, "learning_rate": 8.069447736430433e-07, "loss": 4.3089, "step": 79840 }, { "epoch": 3.439720894172374, "learning_rate": 8.068962916675643e-07, "loss": 3.8873, "step": 79860 }, { "epoch": 3.440582331911961, "learning_rate": 8.068478096920856e-07, "loss": 4.2677, "step": 79880 }, { "epoch": 3.4414437696515483, "learning_rate": 8.067993277166068e-07, "loss": 4.2398, "step": 79900 }, { "epoch": 3.442305207391136, "learning_rate": 8.067508457411277e-07, "loss": 3.9996, "step": 79920 }, { "epoch": 3.4431666451307232, "learning_rate": 8.067023637656488e-07, "loss": 3.9097, "step": 79940 }, { "epoch": 3.4440280828703105, "learning_rate": 8.066538817901701e-07, "loss": 4.1524, "step": 79960 }, { "epoch": 3.4448895206098977, "learning_rate": 8.066053998146911e-07, "loss": 4.1301, "step": 79980 }, { "epoch": 3.4457509583494854, "learning_rate": 8.065569178392122e-07, "loss": 4.2439, "step": 80000 }, { "epoch": 3.4466123960890727, "learning_rate": 8.065084358637333e-07, "loss": 4.1968, "step": 80020 }, { "epoch": 3.44747383382866, "learning_rate": 8.064599538882544e-07, "loss": 3.9466, "step": 80040 }, { "epoch": 3.4483352715682476, "learning_rate": 8.064114719127755e-07, "loss": 4.2493, "step": 80060 }, { "epoch": 3.449196709307835, "learning_rate": 8.063629899372966e-07, "loss": 4.1672, "step": 80080 }, { "epoch": 3.450058147047422, "learning_rate": 8.063145079618177e-07, "loss": 3.9515, "step": 80100 }, { "epoch": 3.4509195847870093, "learning_rate": 8.062660259863389e-07, "loss": 3.8628, "step": 80120 }, { "epoch": 3.451781022526597, "learning_rate": 8.062175440108599e-07, "loss": 4.1605, "step": 80140 }, { "epoch": 3.4526424602661843, "learning_rate": 8.061690620353811e-07, "loss": 4.0109, "step": 80160 }, { "epoch": 3.4535038980057715, "learning_rate": 8.061205800599021e-07, "loss": 3.985, "step": 80180 }, { "epoch": 3.4543653357453588, "learning_rate": 8.060720980844232e-07, "loss": 4.147, "step": 80200 }, { "epoch": 3.4552267734849464, "learning_rate": 8.060236161089443e-07, "loss": 4.2112, "step": 80220 }, { "epoch": 3.4560882112245337, "learning_rate": 8.059751341334655e-07, "loss": 4.1059, "step": 80240 }, { "epoch": 3.456949648964121, "learning_rate": 8.059266521579866e-07, "loss": 4.1683, "step": 80260 }, { "epoch": 3.4578110867037086, "learning_rate": 8.058781701825077e-07, "loss": 4.3456, "step": 80280 }, { "epoch": 3.458672524443296, "learning_rate": 8.058296882070287e-07, "loss": 4.1346, "step": 80300 }, { "epoch": 3.459533962182883, "learning_rate": 8.057812062315499e-07, "loss": 4.1153, "step": 80320 }, { "epoch": 3.4603953999224704, "learning_rate": 8.05732724256071e-07, "loss": 3.8168, "step": 80340 }, { "epoch": 3.461256837662058, "learning_rate": 8.056842422805921e-07, "loss": 4.1554, "step": 80360 }, { "epoch": 3.4621182754016453, "learning_rate": 8.056357603051133e-07, "loss": 3.9205, "step": 80380 }, { "epoch": 3.4629797131412325, "learning_rate": 8.055872783296343e-07, "loss": 4.0034, "step": 80400 }, { "epoch": 3.4638411508808202, "learning_rate": 8.055387963541554e-07, "loss": 4.233, "step": 80420 }, { "epoch": 3.4647025886204075, "learning_rate": 8.054903143786765e-07, "loss": 4.2769, "step": 80440 }, { "epoch": 3.4655640263599947, "learning_rate": 8.054418324031976e-07, "loss": 3.8424, "step": 80460 }, { "epoch": 3.466425464099582, "learning_rate": 8.053933504277187e-07, "loss": 3.8662, "step": 80480 }, { "epoch": 3.4672869018391697, "learning_rate": 8.053448684522399e-07, "loss": 3.9894, "step": 80500 }, { "epoch": 3.468148339578757, "learning_rate": 8.05296386476761e-07, "loss": 4.0765, "step": 80520 }, { "epoch": 3.469009777318344, "learning_rate": 8.052479045012821e-07, "loss": 4.0221, "step": 80540 }, { "epoch": 3.469871215057932, "learning_rate": 8.051994225258031e-07, "loss": 4.044, "step": 80560 }, { "epoch": 3.470732652797519, "learning_rate": 8.051509405503242e-07, "loss": 4.0196, "step": 80580 }, { "epoch": 3.4715940905371063, "learning_rate": 8.051024585748454e-07, "loss": 4.1307, "step": 80600 }, { "epoch": 3.4724555282766936, "learning_rate": 8.050539765993665e-07, "loss": 3.9635, "step": 80620 }, { "epoch": 3.4733169660162813, "learning_rate": 8.050054946238876e-07, "loss": 4.1048, "step": 80640 }, { "epoch": 3.4741784037558685, "learning_rate": 8.049570126484087e-07, "loss": 3.9423, "step": 80660 }, { "epoch": 3.4750398414954558, "learning_rate": 8.049085306729297e-07, "loss": 3.9662, "step": 80680 }, { "epoch": 3.4759012792350434, "learning_rate": 8.048600486974509e-07, "loss": 4.0199, "step": 80700 }, { "epoch": 3.4767627169746307, "learning_rate": 8.04811566721972e-07, "loss": 4.1143, "step": 80720 }, { "epoch": 3.477624154714218, "learning_rate": 8.047630847464931e-07, "loss": 4.2284, "step": 80740 }, { "epoch": 3.478485592453805, "learning_rate": 8.047146027710142e-07, "loss": 4.204, "step": 80760 }, { "epoch": 3.479347030193393, "learning_rate": 8.046661207955354e-07, "loss": 3.8848, "step": 80780 }, { "epoch": 3.48020846793298, "learning_rate": 8.046176388200563e-07, "loss": 4.0762, "step": 80800 }, { "epoch": 3.4810699056725674, "learning_rate": 8.045691568445775e-07, "loss": 4.0738, "step": 80820 }, { "epoch": 3.481931343412155, "learning_rate": 8.045206748690986e-07, "loss": 4.0487, "step": 80840 }, { "epoch": 3.4827927811517423, "learning_rate": 8.044721928936198e-07, "loss": 4.0359, "step": 80860 }, { "epoch": 3.4836542188913295, "learning_rate": 8.044237109181408e-07, "loss": 3.9768, "step": 80880 }, { "epoch": 3.484515656630917, "learning_rate": 8.043752289426621e-07, "loss": 4.0637, "step": 80900 }, { "epoch": 3.4853770943705045, "learning_rate": 8.043267469671831e-07, "loss": 3.9452, "step": 80920 }, { "epoch": 3.4862385321100917, "learning_rate": 8.042782649917041e-07, "loss": 3.9939, "step": 80940 }, { "epoch": 3.487099969849679, "learning_rate": 8.042297830162252e-07, "loss": 4.086, "step": 80960 }, { "epoch": 3.4879614075892666, "learning_rate": 8.041813010407464e-07, "loss": 4.0662, "step": 80980 }, { "epoch": 3.488822845328854, "learning_rate": 8.041328190652675e-07, "loss": 4.1216, "step": 81000 }, { "epoch": 3.489684283068441, "learning_rate": 8.040843370897886e-07, "loss": 3.9984, "step": 81020 }, { "epoch": 3.4905457208080284, "learning_rate": 8.040358551143097e-07, "loss": 3.9435, "step": 81040 }, { "epoch": 3.491407158547616, "learning_rate": 8.039873731388308e-07, "loss": 4.1261, "step": 81060 }, { "epoch": 3.4922685962872033, "learning_rate": 8.039388911633519e-07, "loss": 4.124, "step": 81080 }, { "epoch": 3.4931300340267906, "learning_rate": 8.03890409187873e-07, "loss": 3.9723, "step": 81100 }, { "epoch": 3.4939914717663783, "learning_rate": 8.038419272123941e-07, "loss": 3.957, "step": 81120 }, { "epoch": 3.4948529095059655, "learning_rate": 8.037934452369152e-07, "loss": 3.9559, "step": 81140 }, { "epoch": 3.4957143472455527, "learning_rate": 8.037449632614364e-07, "loss": 4.1893, "step": 81160 }, { "epoch": 3.49657578498514, "learning_rate": 8.036964812859574e-07, "loss": 4.1786, "step": 81180 }, { "epoch": 3.4974372227247277, "learning_rate": 8.036479993104785e-07, "loss": 4.1742, "step": 81200 }, { "epoch": 3.498298660464315, "learning_rate": 8.035995173349997e-07, "loss": 4.2267, "step": 81220 }, { "epoch": 3.499160098203902, "learning_rate": 8.035510353595208e-07, "loss": 4.245, "step": 81240 }, { "epoch": 3.50002153594349, "learning_rate": 8.035025533840418e-07, "loss": 4.1625, "step": 81260 }, { "epoch": 3.500882973683077, "learning_rate": 8.03454071408563e-07, "loss": 3.9312, "step": 81280 }, { "epoch": 3.5017444114226643, "learning_rate": 8.034055894330841e-07, "loss": 4.0937, "step": 81300 }, { "epoch": 3.5026058491622516, "learning_rate": 8.033571074576052e-07, "loss": 4.0108, "step": 81320 }, { "epoch": 3.5034672869018393, "learning_rate": 8.033086254821262e-07, "loss": 4.133, "step": 81340 }, { "epoch": 3.5043287246414265, "learning_rate": 8.032601435066474e-07, "loss": 4.1074, "step": 81360 }, { "epoch": 3.5051901623810138, "learning_rate": 8.032116615311685e-07, "loss": 3.9485, "step": 81380 }, { "epoch": 3.5060516001206015, "learning_rate": 8.031631795556897e-07, "loss": 4.1104, "step": 81400 }, { "epoch": 3.5069130378601887, "learning_rate": 8.031146975802107e-07, "loss": 4.1912, "step": 81420 }, { "epoch": 3.507774475599776, "learning_rate": 8.030662156047318e-07, "loss": 4.0675, "step": 81440 }, { "epoch": 3.508635913339363, "learning_rate": 8.030177336292529e-07, "loss": 4.0326, "step": 81460 }, { "epoch": 3.509497351078951, "learning_rate": 8.029692516537739e-07, "loss": 3.8711, "step": 81480 }, { "epoch": 3.510358788818538, "learning_rate": 8.029207696782951e-07, "loss": 4.0017, "step": 81500 }, { "epoch": 3.5112202265581254, "learning_rate": 8.028722877028163e-07, "loss": 4.1761, "step": 81520 }, { "epoch": 3.512081664297713, "learning_rate": 8.028238057273374e-07, "loss": 4.0444, "step": 81540 }, { "epoch": 3.5129431020373003, "learning_rate": 8.027753237518584e-07, "loss": 4.0998, "step": 81560 }, { "epoch": 3.5138045397768876, "learning_rate": 8.027268417763795e-07, "loss": 4.1265, "step": 81580 }, { "epoch": 3.514665977516475, "learning_rate": 8.026783598009007e-07, "loss": 4.1821, "step": 81600 }, { "epoch": 3.5155274152560625, "learning_rate": 8.026298778254218e-07, "loss": 3.9004, "step": 81620 }, { "epoch": 3.5163888529956497, "learning_rate": 8.025813958499427e-07, "loss": 4.1587, "step": 81640 }, { "epoch": 3.517250290735237, "learning_rate": 8.02532913874464e-07, "loss": 4.0986, "step": 81660 }, { "epoch": 3.5181117284748247, "learning_rate": 8.024844318989851e-07, "loss": 4.1177, "step": 81680 }, { "epoch": 3.518973166214412, "learning_rate": 8.024359499235062e-07, "loss": 4.1484, "step": 81700 }, { "epoch": 3.519834603953999, "learning_rate": 8.023874679480272e-07, "loss": 3.9485, "step": 81720 }, { "epoch": 3.5206960416935864, "learning_rate": 8.023389859725485e-07, "loss": 4.1861, "step": 81740 }, { "epoch": 3.521557479433174, "learning_rate": 8.022905039970695e-07, "loss": 4.1655, "step": 81760 }, { "epoch": 3.5224189171727613, "learning_rate": 8.022420220215906e-07, "loss": 4.0772, "step": 81780 }, { "epoch": 3.5232803549123486, "learning_rate": 8.021935400461117e-07, "loss": 3.9857, "step": 81800 }, { "epoch": 3.5241417926519363, "learning_rate": 8.021450580706328e-07, "loss": 3.828, "step": 81820 }, { "epoch": 3.5250032303915235, "learning_rate": 8.020965760951539e-07, "loss": 4.0148, "step": 81840 }, { "epoch": 3.5258646681311108, "learning_rate": 8.02048094119675e-07, "loss": 4.0338, "step": 81860 }, { "epoch": 3.526726105870698, "learning_rate": 8.019996121441961e-07, "loss": 4.0018, "step": 81880 }, { "epoch": 3.5275875436102857, "learning_rate": 8.019511301687173e-07, "loss": 4.0669, "step": 81900 }, { "epoch": 3.528448981349873, "learning_rate": 8.019026481932384e-07, "loss": 4.2146, "step": 81920 }, { "epoch": 3.52931041908946, "learning_rate": 8.018541662177595e-07, "loss": 4.1122, "step": 81940 }, { "epoch": 3.530171856829048, "learning_rate": 8.018056842422805e-07, "loss": 4.1816, "step": 81960 }, { "epoch": 3.531033294568635, "learning_rate": 8.017572022668017e-07, "loss": 4.1509, "step": 81980 }, { "epoch": 3.5318947323082224, "learning_rate": 8.017087202913228e-07, "loss": 4.1564, "step": 82000 }, { "epoch": 3.5327561700478096, "learning_rate": 8.016602383158438e-07, "loss": 4.0108, "step": 82020 }, { "epoch": 3.5336176077873973, "learning_rate": 8.01611756340365e-07, "loss": 4.1146, "step": 82040 }, { "epoch": 3.5344790455269846, "learning_rate": 8.015632743648862e-07, "loss": 4.1704, "step": 82060 }, { "epoch": 3.535340483266572, "learning_rate": 8.015147923894071e-07, "loss": 3.7886, "step": 82080 }, { "epoch": 3.5362019210061595, "learning_rate": 8.014663104139282e-07, "loss": 4.2163, "step": 82100 }, { "epoch": 3.5370633587457467, "learning_rate": 8.014178284384494e-07, "loss": 4.0676, "step": 82120 }, { "epoch": 3.537924796485334, "learning_rate": 8.013693464629706e-07, "loss": 4.1845, "step": 82140 }, { "epoch": 3.5387862342249212, "learning_rate": 8.013208644874917e-07, "loss": 4.2671, "step": 82160 }, { "epoch": 3.539647671964509, "learning_rate": 8.012723825120127e-07, "loss": 3.924, "step": 82180 }, { "epoch": 3.540509109704096, "learning_rate": 8.012239005365338e-07, "loss": 3.8993, "step": 82200 }, { "epoch": 3.5413705474436834, "learning_rate": 8.011754185610549e-07, "loss": 3.9327, "step": 82220 }, { "epoch": 3.542231985183271, "learning_rate": 8.01126936585576e-07, "loss": 3.818, "step": 82240 }, { "epoch": 3.5430934229228583, "learning_rate": 8.010784546100971e-07, "loss": 3.9274, "step": 82260 }, { "epoch": 3.5439548606624456, "learning_rate": 8.010299726346183e-07, "loss": 4.0804, "step": 82280 }, { "epoch": 3.544816298402033, "learning_rate": 8.009814906591394e-07, "loss": 3.9787, "step": 82300 }, { "epoch": 3.5456777361416205, "learning_rate": 8.009330086836605e-07, "loss": 4.2806, "step": 82320 }, { "epoch": 3.5465391738812078, "learning_rate": 8.008845267081815e-07, "loss": 4.0621, "step": 82340 }, { "epoch": 3.547400611620795, "learning_rate": 8.008360447327027e-07, "loss": 4.116, "step": 82360 }, { "epoch": 3.5482620493603827, "learning_rate": 8.007875627572237e-07, "loss": 4.0408, "step": 82380 }, { "epoch": 3.54912348709997, "learning_rate": 8.007390807817449e-07, "loss": 4.0807, "step": 82400 }, { "epoch": 3.549984924839557, "learning_rate": 8.00690598806266e-07, "loss": 4.076, "step": 82420 }, { "epoch": 3.5508463625791444, "learning_rate": 8.006421168307872e-07, "loss": 3.9425, "step": 82440 }, { "epoch": 3.551707800318732, "learning_rate": 8.005936348553081e-07, "loss": 4.1282, "step": 82460 }, { "epoch": 3.5525692380583194, "learning_rate": 8.005451528798293e-07, "loss": 4.0561, "step": 82480 }, { "epoch": 3.5534306757979066, "learning_rate": 8.004966709043504e-07, "loss": 4.1453, "step": 82500 }, { "epoch": 3.5542921135374943, "learning_rate": 8.004481889288716e-07, "loss": 4.0067, "step": 82520 }, { "epoch": 3.5551535512770815, "learning_rate": 8.003997069533926e-07, "loss": 4.0551, "step": 82540 }, { "epoch": 3.556014989016669, "learning_rate": 8.003512249779137e-07, "loss": 4.0325, "step": 82560 }, { "epoch": 3.556876426756256, "learning_rate": 8.003027430024348e-07, "loss": 4.1309, "step": 82580 }, { "epoch": 3.5577378644958437, "learning_rate": 8.00254261026956e-07, "loss": 4.1027, "step": 82600 }, { "epoch": 3.558599302235431, "learning_rate": 8.00205779051477e-07, "loss": 4.0887, "step": 82620 }, { "epoch": 3.559460739975018, "learning_rate": 8.001572970759981e-07, "loss": 4.1003, "step": 82640 }, { "epoch": 3.560322177714606, "learning_rate": 8.001088151005193e-07, "loss": 3.9115, "step": 82660 }, { "epoch": 3.561183615454193, "learning_rate": 8.000603331250406e-07, "loss": 3.8889, "step": 82680 }, { "epoch": 3.5620450531937804, "learning_rate": 8.000118511495615e-07, "loss": 4.2752, "step": 82700 }, { "epoch": 3.5629064909333676, "learning_rate": 7.999633691740825e-07, "loss": 4.1105, "step": 82720 }, { "epoch": 3.563767928672955, "learning_rate": 7.999148871986037e-07, "loss": 3.9305, "step": 82740 }, { "epoch": 3.5646293664125426, "learning_rate": 7.998664052231247e-07, "loss": 4.221, "step": 82760 }, { "epoch": 3.56549080415213, "learning_rate": 7.998179232476459e-07, "loss": 3.8642, "step": 82780 }, { "epoch": 3.5663522418917175, "learning_rate": 7.99769441272167e-07, "loss": 4.0657, "step": 82800 }, { "epoch": 3.5672136796313048, "learning_rate": 7.997209592966882e-07, "loss": 4.0627, "step": 82820 }, { "epoch": 3.568075117370892, "learning_rate": 7.996724773212091e-07, "loss": 3.9574, "step": 82840 }, { "epoch": 3.5689365551104792, "learning_rate": 7.996239953457303e-07, "loss": 4.0379, "step": 82860 }, { "epoch": 3.5697979928500665, "learning_rate": 7.995755133702514e-07, "loss": 3.9904, "step": 82880 }, { "epoch": 3.570659430589654, "learning_rate": 7.995270313947726e-07, "loss": 3.9798, "step": 82900 }, { "epoch": 3.5715208683292414, "learning_rate": 7.994785494192936e-07, "loss": 3.9636, "step": 82920 }, { "epoch": 3.572382306068829, "learning_rate": 7.994300674438148e-07, "loss": 4.0279, "step": 82940 }, { "epoch": 3.5732437438084164, "learning_rate": 7.993815854683359e-07, "loss": 3.9741, "step": 82960 }, { "epoch": 3.5741051815480036, "learning_rate": 7.99333103492857e-07, "loss": 3.9285, "step": 82980 }, { "epoch": 3.574966619287591, "learning_rate": 7.992846215173781e-07, "loss": 4.0888, "step": 83000 }, { "epoch": 3.575828057027178, "learning_rate": 7.992361395418992e-07, "loss": 3.9734, "step": 83020 }, { "epoch": 3.576689494766766, "learning_rate": 7.991876575664203e-07, "loss": 4.2585, "step": 83040 }, { "epoch": 3.577550932506353, "learning_rate": 7.991391755909414e-07, "loss": 3.9455, "step": 83060 }, { "epoch": 3.5784123702459407, "learning_rate": 7.990906936154625e-07, "loss": 4.0289, "step": 83080 }, { "epoch": 3.579273807985528, "learning_rate": 7.990422116399835e-07, "loss": 4.2913, "step": 83100 }, { "epoch": 3.580135245725115, "learning_rate": 7.989937296645047e-07, "loss": 3.8413, "step": 83120 }, { "epoch": 3.5809966834647025, "learning_rate": 7.989452476890258e-07, "loss": 4.0535, "step": 83140 }, { "epoch": 3.5818581212042897, "learning_rate": 7.988967657135469e-07, "loss": 4.0097, "step": 83160 }, { "epoch": 3.5827195589438774, "learning_rate": 7.98848283738068e-07, "loss": 3.9713, "step": 83180 }, { "epoch": 3.5835809966834646, "learning_rate": 7.987998017625892e-07, "loss": 3.8081, "step": 83200 }, { "epoch": 3.5844424344230523, "learning_rate": 7.987513197871102e-07, "loss": 4.1517, "step": 83220 }, { "epoch": 3.5853038721626396, "learning_rate": 7.987028378116313e-07, "loss": 4.1589, "step": 83240 }, { "epoch": 3.586165309902227, "learning_rate": 7.986543558361524e-07, "loss": 4.0807, "step": 83260 }, { "epoch": 3.587026747641814, "learning_rate": 7.986058738606736e-07, "loss": 4.1116, "step": 83280 }, { "epoch": 3.5878881853814013, "learning_rate": 7.985573918851946e-07, "loss": 3.9938, "step": 83300 }, { "epoch": 3.588749623120989, "learning_rate": 7.985089099097158e-07, "loss": 4.0718, "step": 83320 }, { "epoch": 3.5896110608605762, "learning_rate": 7.984604279342369e-07, "loss": 3.8139, "step": 83340 }, { "epoch": 3.590472498600164, "learning_rate": 7.984119459587579e-07, "loss": 4.0234, "step": 83360 }, { "epoch": 3.591333936339751, "learning_rate": 7.98363463983279e-07, "loss": 4.1785, "step": 83380 }, { "epoch": 3.5921953740793384, "learning_rate": 7.983149820078002e-07, "loss": 3.8668, "step": 83400 }, { "epoch": 3.5930568118189257, "learning_rate": 7.982665000323212e-07, "loss": 3.9668, "step": 83420 }, { "epoch": 3.593918249558513, "learning_rate": 7.982180180568424e-07, "loss": 3.9363, "step": 83440 }, { "epoch": 3.5947796872981006, "learning_rate": 7.981695360813635e-07, "loss": 3.9967, "step": 83460 }, { "epoch": 3.595641125037688, "learning_rate": 7.981210541058846e-07, "loss": 3.844, "step": 83480 }, { "epoch": 3.5965025627772755, "learning_rate": 7.980725721304057e-07, "loss": 3.9268, "step": 83500 }, { "epoch": 3.597364000516863, "learning_rate": 7.980240901549269e-07, "loss": 4.1531, "step": 83520 }, { "epoch": 3.59822543825645, "learning_rate": 7.979756081794479e-07, "loss": 3.9779, "step": 83540 }, { "epoch": 3.5990868759960373, "learning_rate": 7.979271262039691e-07, "loss": 3.9095, "step": 83560 }, { "epoch": 3.5999483137356245, "learning_rate": 7.978786442284902e-07, "loss": 3.9255, "step": 83580 }, { "epoch": 3.600809751475212, "learning_rate": 7.978301622530112e-07, "loss": 3.8338, "step": 83600 }, { "epoch": 3.6016711892147995, "learning_rate": 7.977816802775323e-07, "loss": 4.0991, "step": 83620 }, { "epoch": 3.602532626954387, "learning_rate": 7.977331983020534e-07, "loss": 4.0504, "step": 83640 }, { "epoch": 3.6033940646939744, "learning_rate": 7.976847163265745e-07, "loss": 4.2887, "step": 83660 }, { "epoch": 3.6042555024335616, "learning_rate": 7.976362343510957e-07, "loss": 4.0872, "step": 83680 }, { "epoch": 3.605116940173149, "learning_rate": 7.975877523756168e-07, "loss": 4.2421, "step": 83700 }, { "epoch": 3.605978377912736, "learning_rate": 7.975392704001379e-07, "loss": 4.0565, "step": 83720 }, { "epoch": 3.606839815652324, "learning_rate": 7.974907884246589e-07, "loss": 4.077, "step": 83740 }, { "epoch": 3.607701253391911, "learning_rate": 7.974423064491801e-07, "loss": 4.0384, "step": 83760 }, { "epoch": 3.6085626911314987, "learning_rate": 7.973938244737012e-07, "loss": 3.9828, "step": 83780 }, { "epoch": 3.609424128871086, "learning_rate": 7.973453424982223e-07, "loss": 3.8051, "step": 83800 }, { "epoch": 3.6102855666106732, "learning_rate": 7.972968605227434e-07, "loss": 4.0368, "step": 83820 }, { "epoch": 3.6111470043502605, "learning_rate": 7.972483785472645e-07, "loss": 3.9145, "step": 83840 }, { "epoch": 3.6120084420898477, "learning_rate": 7.971998965717856e-07, "loss": 3.9743, "step": 83860 }, { "epoch": 3.6128698798294354, "learning_rate": 7.971514145963067e-07, "loss": 3.9877, "step": 83880 }, { "epoch": 3.6137313175690227, "learning_rate": 7.971029326208278e-07, "loss": 4.1971, "step": 83900 }, { "epoch": 3.61459275530861, "learning_rate": 7.970544506453489e-07, "loss": 4.0465, "step": 83920 }, { "epoch": 3.6154541930481976, "learning_rate": 7.9700596866987e-07, "loss": 4.0264, "step": 83940 }, { "epoch": 3.616315630787785, "learning_rate": 7.969574866943911e-07, "loss": 4.0337, "step": 83960 }, { "epoch": 3.617177068527372, "learning_rate": 7.969090047189122e-07, "loss": 3.9109, "step": 83980 }, { "epoch": 3.6180385062669593, "learning_rate": 7.968605227434333e-07, "loss": 3.8259, "step": 84000 }, { "epoch": 3.618899944006547, "learning_rate": 7.968120407679545e-07, "loss": 4.1498, "step": 84020 }, { "epoch": 3.6197613817461343, "learning_rate": 7.967635587924755e-07, "loss": 4.1478, "step": 84040 }, { "epoch": 3.6206228194857215, "learning_rate": 7.967150768169967e-07, "loss": 3.9209, "step": 84060 }, { "epoch": 3.621484257225309, "learning_rate": 7.966665948415178e-07, "loss": 3.9723, "step": 84080 }, { "epoch": 3.6223456949648964, "learning_rate": 7.96618112866039e-07, "loss": 3.8666, "step": 84100 }, { "epoch": 3.6232071327044837, "learning_rate": 7.965696308905599e-07, "loss": 3.7975, "step": 84120 }, { "epoch": 3.624068570444071, "learning_rate": 7.965211489150811e-07, "loss": 4.0348, "step": 84140 }, { "epoch": 3.6249300081836586, "learning_rate": 7.964726669396022e-07, "loss": 4.156, "step": 84160 }, { "epoch": 3.625791445923246, "learning_rate": 7.964241849641233e-07, "loss": 4.0054, "step": 84180 }, { "epoch": 3.626652883662833, "learning_rate": 7.963757029886444e-07, "loss": 4.2446, "step": 84200 }, { "epoch": 3.627514321402421, "learning_rate": 7.963272210131656e-07, "loss": 4.0446, "step": 84220 }, { "epoch": 3.628375759142008, "learning_rate": 7.962787390376866e-07, "loss": 4.0529, "step": 84240 }, { "epoch": 3.6292371968815953, "learning_rate": 7.962302570622075e-07, "loss": 3.8629, "step": 84260 }, { "epoch": 3.6300986346211825, "learning_rate": 7.961817750867288e-07, "loss": 3.9902, "step": 84280 }, { "epoch": 3.6309600723607702, "learning_rate": 7.9613329311125e-07, "loss": 3.8487, "step": 84300 }, { "epoch": 3.6318215101003575, "learning_rate": 7.960848111357711e-07, "loss": 3.9716, "step": 84320 }, { "epoch": 3.6326829478399447, "learning_rate": 7.960363291602921e-07, "loss": 3.9602, "step": 84340 }, { "epoch": 3.6335443855795324, "learning_rate": 7.959878471848132e-07, "loss": 3.9671, "step": 84360 }, { "epoch": 3.6344058233191197, "learning_rate": 7.959393652093343e-07, "loss": 3.8171, "step": 84380 }, { "epoch": 3.635267261058707, "learning_rate": 7.958908832338555e-07, "loss": 3.9961, "step": 84400 }, { "epoch": 3.636128698798294, "learning_rate": 7.958424012583765e-07, "loss": 4.049, "step": 84420 }, { "epoch": 3.636990136537882, "learning_rate": 7.957939192828977e-07, "loss": 4.0956, "step": 84440 }, { "epoch": 3.637851574277469, "learning_rate": 7.957454373074188e-07, "loss": 3.9901, "step": 84460 }, { "epoch": 3.6387130120170563, "learning_rate": 7.9569695533194e-07, "loss": 3.8085, "step": 84480 }, { "epoch": 3.639574449756644, "learning_rate": 7.956484733564609e-07, "loss": 4.0237, "step": 84500 }, { "epoch": 3.6404358874962313, "learning_rate": 7.955999913809821e-07, "loss": 3.8252, "step": 84520 }, { "epoch": 3.6412973252358185, "learning_rate": 7.955515094055032e-07, "loss": 3.9908, "step": 84540 }, { "epoch": 3.6421587629754058, "learning_rate": 7.955030274300244e-07, "loss": 3.9034, "step": 84560 }, { "epoch": 3.6430202007149934, "learning_rate": 7.954545454545454e-07, "loss": 3.8876, "step": 84580 }, { "epoch": 3.6438816384545807, "learning_rate": 7.954060634790666e-07, "loss": 4.1822, "step": 84600 }, { "epoch": 3.644743076194168, "learning_rate": 7.953575815035876e-07, "loss": 3.8962, "step": 84620 }, { "epoch": 3.6456045139337556, "learning_rate": 7.953090995281087e-07, "loss": 3.9814, "step": 84640 }, { "epoch": 3.646465951673343, "learning_rate": 7.952606175526298e-07, "loss": 3.759, "step": 84660 }, { "epoch": 3.64732738941293, "learning_rate": 7.95212135577151e-07, "loss": 3.8853, "step": 84680 }, { "epoch": 3.6481888271525174, "learning_rate": 7.951636536016721e-07, "loss": 3.8057, "step": 84700 }, { "epoch": 3.649050264892105, "learning_rate": 7.951151716261931e-07, "loss": 4.0741, "step": 84720 }, { "epoch": 3.6499117026316923, "learning_rate": 7.950666896507143e-07, "loss": 3.9503, "step": 84740 }, { "epoch": 3.6507731403712795, "learning_rate": 7.950182076752354e-07, "loss": 3.9285, "step": 84760 }, { "epoch": 3.6516345781108672, "learning_rate": 7.949697256997566e-07, "loss": 3.9181, "step": 84780 }, { "epoch": 3.6524960158504545, "learning_rate": 7.949212437242775e-07, "loss": 3.9765, "step": 84800 }, { "epoch": 3.6533574535900417, "learning_rate": 7.948727617487987e-07, "loss": 4.0099, "step": 84820 }, { "epoch": 3.654218891329629, "learning_rate": 7.948242797733199e-07, "loss": 3.8433, "step": 84840 }, { "epoch": 3.6550803290692166, "learning_rate": 7.94775797797841e-07, "loss": 4.1787, "step": 84860 }, { "epoch": 3.655941766808804, "learning_rate": 7.947273158223619e-07, "loss": 3.6115, "step": 84880 }, { "epoch": 3.656803204548391, "learning_rate": 7.946788338468831e-07, "loss": 4.1178, "step": 84900 }, { "epoch": 3.657664642287979, "learning_rate": 7.946303518714042e-07, "loss": 4.0226, "step": 84920 }, { "epoch": 3.658526080027566, "learning_rate": 7.945818698959253e-07, "loss": 4.1416, "step": 84940 }, { "epoch": 3.6593875177671533, "learning_rate": 7.945333879204464e-07, "loss": 3.9804, "step": 84960 }, { "epoch": 3.6602489555067406, "learning_rate": 7.944849059449676e-07, "loss": 4.0505, "step": 84980 }, { "epoch": 3.6611103932463283, "learning_rate": 7.944364239694886e-07, "loss": 3.7466, "step": 85000 }, { "epoch": 3.6619718309859155, "learning_rate": 7.943879419940097e-07, "loss": 3.9504, "step": 85020 }, { "epoch": 3.6628332687255027, "learning_rate": 7.943394600185308e-07, "loss": 4.1651, "step": 85040 }, { "epoch": 3.6636947064650904, "learning_rate": 7.94290978043052e-07, "loss": 4.0615, "step": 85060 }, { "epoch": 3.6645561442046777, "learning_rate": 7.942424960675731e-07, "loss": 3.938, "step": 85080 }, { "epoch": 3.665417581944265, "learning_rate": 7.941940140920942e-07, "loss": 3.7943, "step": 85100 }, { "epoch": 3.666279019683852, "learning_rate": 7.941455321166153e-07, "loss": 4.0416, "step": 85120 }, { "epoch": 3.66714045742344, "learning_rate": 7.940970501411364e-07, "loss": 3.9876, "step": 85140 }, { "epoch": 3.668001895163027, "learning_rate": 7.940485681656574e-07, "loss": 3.9365, "step": 85160 }, { "epoch": 3.6688633329026143, "learning_rate": 7.940000861901785e-07, "loss": 3.9608, "step": 85180 }, { "epoch": 3.669724770642202, "learning_rate": 7.939516042146996e-07, "loss": 3.8254, "step": 85200 }, { "epoch": 3.6705862083817893, "learning_rate": 7.939031222392209e-07, "loss": 3.8515, "step": 85220 }, { "epoch": 3.6714476461213765, "learning_rate": 7.93854640263742e-07, "loss": 4.2451, "step": 85240 }, { "epoch": 3.6723090838609638, "learning_rate": 7.938061582882629e-07, "loss": 3.9635, "step": 85260 }, { "epoch": 3.6731705216005515, "learning_rate": 7.937576763127841e-07, "loss": 4.0323, "step": 85280 }, { "epoch": 3.6740319593401387, "learning_rate": 7.937091943373054e-07, "loss": 3.9351, "step": 85300 }, { "epoch": 3.674893397079726, "learning_rate": 7.936607123618263e-07, "loss": 4.14, "step": 85320 }, { "epoch": 3.6757548348193136, "learning_rate": 7.936122303863474e-07, "loss": 3.8565, "step": 85340 }, { "epoch": 3.676616272558901, "learning_rate": 7.935637484108686e-07, "loss": 3.9187, "step": 85360 }, { "epoch": 3.677477710298488, "learning_rate": 7.935152664353897e-07, "loss": 4.1863, "step": 85380 }, { "epoch": 3.6783391480380754, "learning_rate": 7.934667844599107e-07, "loss": 4.061, "step": 85400 }, { "epoch": 3.679200585777663, "learning_rate": 7.934183024844318e-07, "loss": 3.716, "step": 85420 }, { "epoch": 3.6800620235172503, "learning_rate": 7.93369820508953e-07, "loss": 4.04, "step": 85440 }, { "epoch": 3.6809234612568376, "learning_rate": 7.93321338533474e-07, "loss": 3.9894, "step": 85460 }, { "epoch": 3.6817848989964252, "learning_rate": 7.932728565579952e-07, "loss": 3.9257, "step": 85480 }, { "epoch": 3.6826463367360125, "learning_rate": 7.932243745825163e-07, "loss": 4.038, "step": 85500 }, { "epoch": 3.6835077744755997, "learning_rate": 7.931758926070374e-07, "loss": 4.2045, "step": 85520 }, { "epoch": 3.684369212215187, "learning_rate": 7.931274106315584e-07, "loss": 3.9042, "step": 85540 }, { "epoch": 3.6852306499547747, "learning_rate": 7.930789286560796e-07, "loss": 4.0177, "step": 85560 }, { "epoch": 3.686092087694362, "learning_rate": 7.930304466806007e-07, "loss": 3.7278, "step": 85580 }, { "epoch": 3.686953525433949, "learning_rate": 7.929819647051219e-07, "loss": 3.944, "step": 85600 }, { "epoch": 3.687814963173537, "learning_rate": 7.929334827296429e-07, "loss": 3.8217, "step": 85620 }, { "epoch": 3.688676400913124, "learning_rate": 7.92885000754164e-07, "loss": 3.9619, "step": 85640 }, { "epoch": 3.6895378386527113, "learning_rate": 7.928365187786851e-07, "loss": 3.9583, "step": 85660 }, { "epoch": 3.6903992763922986, "learning_rate": 7.927880368032063e-07, "loss": 3.8675, "step": 85680 }, { "epoch": 3.6912607141318863, "learning_rate": 7.927395548277273e-07, "loss": 4.0699, "step": 85700 }, { "epoch": 3.6921221518714735, "learning_rate": 7.926910728522484e-07, "loss": 4.1263, "step": 85720 }, { "epoch": 3.6929835896110608, "learning_rate": 7.926425908767696e-07, "loss": 3.924, "step": 85740 }, { "epoch": 3.6938450273506485, "learning_rate": 7.925941089012907e-07, "loss": 4.0332, "step": 85760 }, { "epoch": 3.6947064650902357, "learning_rate": 7.925456269258117e-07, "loss": 3.9763, "step": 85780 }, { "epoch": 3.695567902829823, "learning_rate": 7.924971449503328e-07, "loss": 3.9511, "step": 85800 }, { "epoch": 3.69642934056941, "learning_rate": 7.92448662974854e-07, "loss": 3.9879, "step": 85820 }, { "epoch": 3.697290778308998, "learning_rate": 7.924001809993752e-07, "loss": 3.9871, "step": 85840 }, { "epoch": 3.698152216048585, "learning_rate": 7.923516990238962e-07, "loss": 4.0096, "step": 85860 }, { "epoch": 3.6990136537881724, "learning_rate": 7.923032170484173e-07, "loss": 3.8202, "step": 85880 }, { "epoch": 3.69987509152776, "learning_rate": 7.922547350729384e-07, "loss": 3.9721, "step": 85900 }, { "epoch": 3.7007365292673473, "learning_rate": 7.922062530974595e-07, "loss": 3.9915, "step": 85920 }, { "epoch": 3.7015979670069346, "learning_rate": 7.921577711219806e-07, "loss": 4.0234, "step": 85940 }, { "epoch": 3.702459404746522, "learning_rate": 7.921092891465017e-07, "loss": 3.8978, "step": 85960 }, { "epoch": 3.703320842486109, "learning_rate": 7.920608071710229e-07, "loss": 3.907, "step": 85980 }, { "epoch": 3.7041822802256967, "learning_rate": 7.920123251955439e-07, "loss": 4.0054, "step": 86000 }, { "epoch": 3.705043717965284, "learning_rate": 7.91963843220065e-07, "loss": 4.0066, "step": 86020 }, { "epoch": 3.7059051557048717, "learning_rate": 7.91915361244586e-07, "loss": 3.9255, "step": 86040 }, { "epoch": 3.706766593444459, "learning_rate": 7.918668792691073e-07, "loss": 3.7505, "step": 86060 }, { "epoch": 3.707628031184046, "learning_rate": 7.918183972936283e-07, "loss": 3.9397, "step": 86080 }, { "epoch": 3.7084894689236334, "learning_rate": 7.917699153181495e-07, "loss": 3.9661, "step": 86100 }, { "epoch": 3.7093509066632206, "learning_rate": 7.917214333426706e-07, "loss": 4.1445, "step": 86120 }, { "epoch": 3.7102123444028083, "learning_rate": 7.916729513671916e-07, "loss": 3.8973, "step": 86140 }, { "epoch": 3.7110737821423956, "learning_rate": 7.916244693917127e-07, "loss": 3.9122, "step": 86160 }, { "epoch": 3.7119352198819833, "learning_rate": 7.915759874162339e-07, "loss": 4.002, "step": 86180 }, { "epoch": 3.7127966576215705, "learning_rate": 7.91527505440755e-07, "loss": 3.9136, "step": 86200 }, { "epoch": 3.7136580953611578, "learning_rate": 7.914790234652761e-07, "loss": 3.8946, "step": 86220 }, { "epoch": 3.714519533100745, "learning_rate": 7.914305414897972e-07, "loss": 3.9535, "step": 86240 }, { "epoch": 3.7153809708403323, "learning_rate": 7.913820595143183e-07, "loss": 3.8481, "step": 86260 }, { "epoch": 3.71624240857992, "learning_rate": 7.913335775388394e-07, "loss": 4.0469, "step": 86280 }, { "epoch": 3.717103846319507, "learning_rate": 7.912850955633605e-07, "loss": 3.9189, "step": 86300 }, { "epoch": 3.717965284059095, "learning_rate": 7.912366135878816e-07, "loss": 3.9028, "step": 86320 }, { "epoch": 3.718826721798682, "learning_rate": 7.911881316124027e-07, "loss": 3.9573, "step": 86340 }, { "epoch": 3.7196881595382694, "learning_rate": 7.911396496369239e-07, "loss": 3.8091, "step": 86360 }, { "epoch": 3.7205495972778566, "learning_rate": 7.91091167661445e-07, "loss": 3.5977, "step": 86380 }, { "epoch": 3.721411035017444, "learning_rate": 7.91042685685966e-07, "loss": 3.75, "step": 86400 }, { "epoch": 3.7222724727570315, "learning_rate": 7.909942037104871e-07, "loss": 4.0414, "step": 86420 }, { "epoch": 3.723133910496619, "learning_rate": 7.909457217350082e-07, "loss": 3.8814, "step": 86440 }, { "epoch": 3.7239953482362065, "learning_rate": 7.908972397595294e-07, "loss": 3.775, "step": 86460 }, { "epoch": 3.7248567859757937, "learning_rate": 7.908487577840505e-07, "loss": 3.9186, "step": 86480 }, { "epoch": 3.725718223715381, "learning_rate": 7.908002758085716e-07, "loss": 4.0321, "step": 86500 }, { "epoch": 3.726579661454968, "learning_rate": 7.907517938330927e-07, "loss": 3.9766, "step": 86520 }, { "epoch": 3.7274410991945555, "learning_rate": 7.907033118576137e-07, "loss": 3.8738, "step": 86540 }, { "epoch": 3.728302536934143, "learning_rate": 7.90654829882135e-07, "loss": 3.8979, "step": 86560 }, { "epoch": 3.7291639746737304, "learning_rate": 7.90606347906656e-07, "loss": 3.818, "step": 86580 }, { "epoch": 3.730025412413318, "learning_rate": 7.905578659311771e-07, "loss": 4.043, "step": 86600 }, { "epoch": 3.7308868501529053, "learning_rate": 7.905093839556982e-07, "loss": 3.9054, "step": 86620 }, { "epoch": 3.7317482878924926, "learning_rate": 7.904609019802194e-07, "loss": 3.9729, "step": 86640 }, { "epoch": 3.73260972563208, "learning_rate": 7.904124200047404e-07, "loss": 3.903, "step": 86660 }, { "epoch": 3.733471163371667, "learning_rate": 7.903639380292615e-07, "loss": 3.9281, "step": 86680 }, { "epoch": 3.7343326011112548, "learning_rate": 7.903154560537826e-07, "loss": 3.9402, "step": 86700 }, { "epoch": 3.735194038850842, "learning_rate": 7.902669740783038e-07, "loss": 3.896, "step": 86720 }, { "epoch": 3.7360554765904297, "learning_rate": 7.902184921028249e-07, "loss": 4.0376, "step": 86740 }, { "epoch": 3.736916914330017, "learning_rate": 7.90170010127346e-07, "loss": 3.6785, "step": 86760 }, { "epoch": 3.737778352069604, "learning_rate": 7.90121528151867e-07, "loss": 3.961, "step": 86780 }, { "epoch": 3.7386397898091914, "learning_rate": 7.900730461763881e-07, "loss": 4.054, "step": 86800 }, { "epoch": 3.7395012275487787, "learning_rate": 7.900245642009092e-07, "loss": 3.9651, "step": 86820 }, { "epoch": 3.7403626652883664, "learning_rate": 7.899760822254304e-07, "loss": 4.0509, "step": 86840 }, { "epoch": 3.7412241030279536, "learning_rate": 7.899276002499515e-07, "loss": 3.9688, "step": 86860 }, { "epoch": 3.7420855407675413, "learning_rate": 7.898791182744726e-07, "loss": 4.0626, "step": 86880 }, { "epoch": 3.7429469785071285, "learning_rate": 7.898306362989937e-07, "loss": 3.946, "step": 86900 }, { "epoch": 3.743808416246716, "learning_rate": 7.897821543235148e-07, "loss": 3.9239, "step": 86920 }, { "epoch": 3.744669853986303, "learning_rate": 7.897336723480359e-07, "loss": 3.8131, "step": 86940 }, { "epoch": 3.7455312917258903, "learning_rate": 7.89685190372557e-07, "loss": 3.8613, "step": 86960 }, { "epoch": 3.746392729465478, "learning_rate": 7.89636708397078e-07, "loss": 3.9029, "step": 86980 }, { "epoch": 3.747254167205065, "learning_rate": 7.895882264215993e-07, "loss": 3.8923, "step": 87000 }, { "epoch": 3.748115604944653, "learning_rate": 7.895397444461204e-07, "loss": 3.7386, "step": 87020 }, { "epoch": 3.74897704268424, "learning_rate": 7.894912624706413e-07, "loss": 3.8649, "step": 87040 }, { "epoch": 3.7498384804238274, "learning_rate": 7.894427804951625e-07, "loss": 3.9059, "step": 87060 }, { "epoch": 3.7506999181634146, "learning_rate": 7.893942985196836e-07, "loss": 3.8373, "step": 87080 }, { "epoch": 3.751561355903002, "learning_rate": 7.893458165442048e-07, "loss": 4.1028, "step": 87100 }, { "epoch": 3.7524227936425896, "learning_rate": 7.892973345687258e-07, "loss": 3.7742, "step": 87120 }, { "epoch": 3.753284231382177, "learning_rate": 7.89248852593247e-07, "loss": 3.928, "step": 87140 }, { "epoch": 3.754145669121764, "learning_rate": 7.892003706177681e-07, "loss": 3.9705, "step": 87160 }, { "epoch": 3.7550071068613518, "learning_rate": 7.891518886422892e-07, "loss": 3.8655, "step": 87180 }, { "epoch": 3.755868544600939, "learning_rate": 7.891034066668102e-07, "loss": 3.8469, "step": 87200 }, { "epoch": 3.7567299823405262, "learning_rate": 7.890549246913314e-07, "loss": 4.0724, "step": 87220 }, { "epoch": 3.7575914200801135, "learning_rate": 7.890064427158525e-07, "loss": 4.0248, "step": 87240 }, { "epoch": 3.758452857819701, "learning_rate": 7.889579607403737e-07, "loss": 3.7624, "step": 87260 }, { "epoch": 3.7593142955592884, "learning_rate": 7.889094787648947e-07, "loss": 3.872, "step": 87280 }, { "epoch": 3.7601757332988757, "learning_rate": 7.888609967894158e-07, "loss": 3.9052, "step": 87300 }, { "epoch": 3.7610371710384634, "learning_rate": 7.888125148139369e-07, "loss": 4.0138, "step": 87320 }, { "epoch": 3.7618986087780506, "learning_rate": 7.88764032838458e-07, "loss": 3.5288, "step": 87340 }, { "epoch": 3.762760046517638, "learning_rate": 7.887155508629791e-07, "loss": 3.7521, "step": 87360 }, { "epoch": 3.763621484257225, "learning_rate": 7.886670688875003e-07, "loss": 4.0931, "step": 87380 }, { "epoch": 3.764482921996813, "learning_rate": 7.886185869120214e-07, "loss": 3.9274, "step": 87400 }, { "epoch": 3.7653443597364, "learning_rate": 7.885701049365423e-07, "loss": 3.9735, "step": 87420 }, { "epoch": 3.7662057974759873, "learning_rate": 7.885216229610635e-07, "loss": 4.0142, "step": 87440 }, { "epoch": 3.767067235215575, "learning_rate": 7.884731409855847e-07, "loss": 3.7194, "step": 87460 }, { "epoch": 3.767928672955162, "learning_rate": 7.884246590101058e-07, "loss": 3.8846, "step": 87480 }, { "epoch": 3.7687901106947495, "learning_rate": 7.883761770346268e-07, "loss": 3.9781, "step": 87500 }, { "epoch": 3.7696515484343367, "learning_rate": 7.88327695059148e-07, "loss": 3.6471, "step": 87520 }, { "epoch": 3.7705129861739244, "learning_rate": 7.882792130836691e-07, "loss": 4.0283, "step": 87540 }, { "epoch": 3.7713744239135116, "learning_rate": 7.882307311081902e-07, "loss": 3.8996, "step": 87560 }, { "epoch": 3.772235861653099, "learning_rate": 7.881822491327112e-07, "loss": 3.6923, "step": 87580 }, { "epoch": 3.7730972993926866, "learning_rate": 7.881337671572324e-07, "loss": 3.7923, "step": 87600 }, { "epoch": 3.773958737132274, "learning_rate": 7.880852851817535e-07, "loss": 4.0788, "step": 87620 }, { "epoch": 3.774820174871861, "learning_rate": 7.880368032062747e-07, "loss": 4.1749, "step": 87640 }, { "epoch": 3.7756816126114483, "learning_rate": 7.879883212307957e-07, "loss": 3.9226, "step": 87660 }, { "epoch": 3.776543050351036, "learning_rate": 7.879398392553168e-07, "loss": 3.8808, "step": 87680 }, { "epoch": 3.7774044880906232, "learning_rate": 7.878913572798379e-07, "loss": 3.9336, "step": 87700 }, { "epoch": 3.7782659258302105, "learning_rate": 7.87842875304359e-07, "loss": 3.9363, "step": 87720 }, { "epoch": 3.779127363569798, "learning_rate": 7.877943933288801e-07, "loss": 3.7732, "step": 87740 }, { "epoch": 3.7799888013093854, "learning_rate": 7.877459113534013e-07, "loss": 3.9161, "step": 87760 }, { "epoch": 3.7808502390489727, "learning_rate": 7.876974293779224e-07, "loss": 3.738, "step": 87780 }, { "epoch": 3.78171167678856, "learning_rate": 7.876489474024434e-07, "loss": 3.8053, "step": 87800 }, { "epoch": 3.7825731145281476, "learning_rate": 7.876004654269644e-07, "loss": 3.9258, "step": 87820 }, { "epoch": 3.783434552267735, "learning_rate": 7.875519834514857e-07, "loss": 3.8916, "step": 87840 }, { "epoch": 3.784295990007322, "learning_rate": 7.875035014760068e-07, "loss": 3.6987, "step": 87860 }, { "epoch": 3.7851574277469098, "learning_rate": 7.874550195005278e-07, "loss": 3.9392, "step": 87880 }, { "epoch": 3.786018865486497, "learning_rate": 7.87406537525049e-07, "loss": 3.8059, "step": 87900 }, { "epoch": 3.7868803032260843, "learning_rate": 7.873580555495701e-07, "loss": 3.7662, "step": 87920 }, { "epoch": 3.7877417409656715, "learning_rate": 7.873095735740912e-07, "loss": 3.9632, "step": 87940 }, { "epoch": 3.788603178705259, "learning_rate": 7.872610915986122e-07, "loss": 3.9151, "step": 87960 }, { "epoch": 3.7894646164448464, "learning_rate": 7.872126096231334e-07, "loss": 3.8365, "step": 87980 }, { "epoch": 3.7903260541844337, "learning_rate": 7.871641276476546e-07, "loss": 3.7737, "step": 88000 }, { "epoch": 3.7911874919240214, "learning_rate": 7.871156456721756e-07, "loss": 3.7817, "step": 88020 }, { "epoch": 3.7920489296636086, "learning_rate": 7.870671636966967e-07, "loss": 3.7157, "step": 88040 }, { "epoch": 3.792910367403196, "learning_rate": 7.870186817212178e-07, "loss": 3.7524, "step": 88060 }, { "epoch": 3.793771805142783, "learning_rate": 7.86970199745739e-07, "loss": 3.8023, "step": 88080 }, { "epoch": 3.794633242882371, "learning_rate": 7.8692171777026e-07, "loss": 3.7815, "step": 88100 }, { "epoch": 3.795494680621958, "learning_rate": 7.868732357947811e-07, "loss": 4.0132, "step": 88120 }, { "epoch": 3.7963561183615453, "learning_rate": 7.868247538193023e-07, "loss": 3.9462, "step": 88140 }, { "epoch": 3.797217556101133, "learning_rate": 7.867762718438234e-07, "loss": 3.7257, "step": 88160 }, { "epoch": 3.7980789938407202, "learning_rate": 7.867277898683444e-07, "loss": 3.9017, "step": 88180 }, { "epoch": 3.7989404315803075, "learning_rate": 7.866793078928655e-07, "loss": 3.7896, "step": 88200 }, { "epoch": 3.7998018693198947, "learning_rate": 7.866308259173867e-07, "loss": 3.5694, "step": 88220 }, { "epoch": 3.8006633070594824, "learning_rate": 7.865823439419076e-07, "loss": 3.8739, "step": 88240 }, { "epoch": 3.8015247447990697, "learning_rate": 7.865338619664289e-07, "loss": 3.9219, "step": 88260 }, { "epoch": 3.802386182538657, "learning_rate": 7.8648537999095e-07, "loss": 4.0126, "step": 88280 }, { "epoch": 3.8032476202782446, "learning_rate": 7.864368980154712e-07, "loss": 3.8891, "step": 88300 }, { "epoch": 3.804109058017832, "learning_rate": 7.863884160399921e-07, "loss": 3.9425, "step": 88320 }, { "epoch": 3.804970495757419, "learning_rate": 7.863399340645134e-07, "loss": 4.0016, "step": 88340 }, { "epoch": 3.8058319334970063, "learning_rate": 7.862914520890344e-07, "loss": 3.8384, "step": 88360 }, { "epoch": 3.806693371236594, "learning_rate": 7.862429701135556e-07, "loss": 4.0418, "step": 88380 }, { "epoch": 3.8075548089761813, "learning_rate": 7.861944881380766e-07, "loss": 3.6954, "step": 88400 }, { "epoch": 3.8084162467157685, "learning_rate": 7.861460061625977e-07, "loss": 3.8187, "step": 88420 }, { "epoch": 3.809277684455356, "learning_rate": 7.860975241871188e-07, "loss": 3.8867, "step": 88440 }, { "epoch": 3.8101391221949434, "learning_rate": 7.8604904221164e-07, "loss": 3.9452, "step": 88460 }, { "epoch": 3.8110005599345307, "learning_rate": 7.86000560236161e-07, "loss": 3.8708, "step": 88480 }, { "epoch": 3.811861997674118, "learning_rate": 7.859520782606821e-07, "loss": 3.834, "step": 88500 }, { "epoch": 3.8127234354137056, "learning_rate": 7.859035962852033e-07, "loss": 3.9532, "step": 88520 }, { "epoch": 3.813584873153293, "learning_rate": 7.858551143097245e-07, "loss": 3.7738, "step": 88540 }, { "epoch": 3.81444631089288, "learning_rate": 7.858066323342454e-07, "loss": 3.7843, "step": 88560 }, { "epoch": 3.815307748632468, "learning_rate": 7.857581503587665e-07, "loss": 3.7998, "step": 88580 }, { "epoch": 3.816169186372055, "learning_rate": 7.857096683832877e-07, "loss": 3.6708, "step": 88600 }, { "epoch": 3.8170306241116423, "learning_rate": 7.856611864078089e-07, "loss": 4.0309, "step": 88620 }, { "epoch": 3.8178920618512295, "learning_rate": 7.856127044323299e-07, "loss": 3.959, "step": 88640 }, { "epoch": 3.8187534995908172, "learning_rate": 7.85564222456851e-07, "loss": 3.9605, "step": 88660 }, { "epoch": 3.8196149373304045, "learning_rate": 7.855157404813722e-07, "loss": 3.8793, "step": 88680 }, { "epoch": 3.8204763750699917, "learning_rate": 7.854672585058931e-07, "loss": 3.8449, "step": 88700 }, { "epoch": 3.8213378128095794, "learning_rate": 7.854187765304143e-07, "loss": 3.7767, "step": 88720 }, { "epoch": 3.8221992505491666, "learning_rate": 7.853702945549354e-07, "loss": 3.8814, "step": 88740 }, { "epoch": 3.823060688288754, "learning_rate": 7.853218125794565e-07, "loss": 4.0609, "step": 88760 }, { "epoch": 3.823922126028341, "learning_rate": 7.852733306039776e-07, "loss": 3.9216, "step": 88780 }, { "epoch": 3.824783563767929, "learning_rate": 7.852248486284988e-07, "loss": 3.6541, "step": 88800 }, { "epoch": 3.825645001507516, "learning_rate": 7.851763666530198e-07, "loss": 3.8322, "step": 88820 }, { "epoch": 3.8265064392471033, "learning_rate": 7.85127884677541e-07, "loss": 3.7899, "step": 88840 }, { "epoch": 3.827367876986691, "learning_rate": 7.85079402702062e-07, "loss": 3.7304, "step": 88860 }, { "epoch": 3.8282293147262783, "learning_rate": 7.850309207265832e-07, "loss": 3.9979, "step": 88880 }, { "epoch": 3.8290907524658655, "learning_rate": 7.849824387511043e-07, "loss": 3.8461, "step": 88900 }, { "epoch": 3.8299521902054527, "learning_rate": 7.849339567756255e-07, "loss": 3.843, "step": 88920 }, { "epoch": 3.8308136279450404, "learning_rate": 7.848854748001465e-07, "loss": 3.9031, "step": 88940 }, { "epoch": 3.8316750656846277, "learning_rate": 7.848369928246675e-07, "loss": 3.9929, "step": 88960 }, { "epoch": 3.832536503424215, "learning_rate": 7.847885108491887e-07, "loss": 3.7308, "step": 88980 }, { "epoch": 3.8333979411638026, "learning_rate": 7.847400288737098e-07, "loss": 3.9517, "step": 89000 }, { "epoch": 3.83425937890339, "learning_rate": 7.846915468982309e-07, "loss": 3.6881, "step": 89020 }, { "epoch": 3.835120816642977, "learning_rate": 7.84643064922752e-07, "loss": 3.919, "step": 89040 }, { "epoch": 3.8359822543825643, "learning_rate": 7.845945829472732e-07, "loss": 4.0234, "step": 89060 }, { "epoch": 3.8368436921221516, "learning_rate": 7.845461009717942e-07, "loss": 3.8104, "step": 89080 }, { "epoch": 3.8377051298617393, "learning_rate": 7.844976189963153e-07, "loss": 3.9132, "step": 89100 }, { "epoch": 3.8385665676013265, "learning_rate": 7.844491370208364e-07, "loss": 3.9795, "step": 89120 }, { "epoch": 3.839428005340914, "learning_rate": 7.844006550453576e-07, "loss": 3.9139, "step": 89140 }, { "epoch": 3.8402894430805015, "learning_rate": 7.843521730698787e-07, "loss": 4.0388, "step": 89160 }, { "epoch": 3.8411508808200887, "learning_rate": 7.843036910943998e-07, "loss": 3.8791, "step": 89180 }, { "epoch": 3.842012318559676, "learning_rate": 7.842552091189208e-07, "loss": 3.9315, "step": 89200 }, { "epoch": 3.842873756299263, "learning_rate": 7.84206727143442e-07, "loss": 3.7865, "step": 89220 }, { "epoch": 3.843735194038851, "learning_rate": 7.84158245167963e-07, "loss": 3.8102, "step": 89240 }, { "epoch": 3.844596631778438, "learning_rate": 7.841097631924842e-07, "loss": 3.8697, "step": 89260 }, { "epoch": 3.845458069518026, "learning_rate": 7.840612812170053e-07, "loss": 3.7959, "step": 89280 }, { "epoch": 3.846319507257613, "learning_rate": 7.840127992415264e-07, "loss": 4.142, "step": 89300 }, { "epoch": 3.8471809449972003, "learning_rate": 7.839643172660475e-07, "loss": 3.6295, "step": 89320 }, { "epoch": 3.8480423827367876, "learning_rate": 7.839158352905686e-07, "loss": 3.7501, "step": 89340 }, { "epoch": 3.848903820476375, "learning_rate": 7.838673533150897e-07, "loss": 4.1259, "step": 89360 }, { "epoch": 3.8497652582159625, "learning_rate": 7.838188713396108e-07, "loss": 3.7878, "step": 89380 }, { "epoch": 3.8506266959555497, "learning_rate": 7.837703893641319e-07, "loss": 3.8972, "step": 89400 }, { "epoch": 3.8514881336951374, "learning_rate": 7.837219073886531e-07, "loss": 3.8955, "step": 89420 }, { "epoch": 3.8523495714347247, "learning_rate": 7.836734254131742e-07, "loss": 3.7758, "step": 89440 }, { "epoch": 3.853211009174312, "learning_rate": 7.836249434376952e-07, "loss": 3.7494, "step": 89460 }, { "epoch": 3.854072446913899, "learning_rate": 7.835764614622163e-07, "loss": 3.8453, "step": 89480 }, { "epoch": 3.8549338846534864, "learning_rate": 7.835279794867374e-07, "loss": 3.7828, "step": 89500 }, { "epoch": 3.855795322393074, "learning_rate": 7.834794975112586e-07, "loss": 3.6256, "step": 89520 }, { "epoch": 3.8566567601326613, "learning_rate": 7.834310155357797e-07, "loss": 3.9478, "step": 89540 }, { "epoch": 3.857518197872249, "learning_rate": 7.833825335603008e-07, "loss": 3.8812, "step": 89560 }, { "epoch": 3.8583796356118363, "learning_rate": 7.833340515848219e-07, "loss": 3.8641, "step": 89580 }, { "epoch": 3.8592410733514235, "learning_rate": 7.832855696093428e-07, "loss": 3.7482, "step": 89600 }, { "epoch": 3.8601025110910108, "learning_rate": 7.832370876338641e-07, "loss": 3.9785, "step": 89620 }, { "epoch": 3.860963948830598, "learning_rate": 7.831886056583852e-07, "loss": 3.7922, "step": 89640 }, { "epoch": 3.8618253865701857, "learning_rate": 7.831401236829063e-07, "loss": 3.7549, "step": 89660 }, { "epoch": 3.862686824309773, "learning_rate": 7.830916417074274e-07, "loss": 3.8124, "step": 89680 }, { "epoch": 3.8635482620493606, "learning_rate": 7.830431597319485e-07, "loss": 3.6733, "step": 89700 }, { "epoch": 3.864409699788948, "learning_rate": 7.829946777564696e-07, "loss": 3.85, "step": 89720 }, { "epoch": 3.865271137528535, "learning_rate": 7.829461957809907e-07, "loss": 3.8004, "step": 89740 }, { "epoch": 3.8661325752681224, "learning_rate": 7.828977138055118e-07, "loss": 3.9266, "step": 89760 }, { "epoch": 3.8669940130077096, "learning_rate": 7.828492318300329e-07, "loss": 3.7967, "step": 89780 }, { "epoch": 3.8678554507472973, "learning_rate": 7.828007498545541e-07, "loss": 3.8047, "step": 89800 }, { "epoch": 3.8687168884868846, "learning_rate": 7.827522678790752e-07, "loss": 3.9963, "step": 89820 }, { "epoch": 3.8695783262264722, "learning_rate": 7.827037859035962e-07, "loss": 4.0488, "step": 89840 }, { "epoch": 3.8704397639660595, "learning_rate": 7.826553039281173e-07, "loss": 3.8202, "step": 89860 }, { "epoch": 3.8713012017056467, "learning_rate": 7.826068219526385e-07, "loss": 3.8694, "step": 89880 }, { "epoch": 3.872162639445234, "learning_rate": 7.825583399771595e-07, "loss": 3.8098, "step": 89900 }, { "epoch": 3.8730240771848212, "learning_rate": 7.825098580016807e-07, "loss": 3.7308, "step": 89920 }, { "epoch": 3.873885514924409, "learning_rate": 7.824613760262018e-07, "loss": 3.7651, "step": 89940 }, { "epoch": 3.874746952663996, "learning_rate": 7.824128940507229e-07, "loss": 3.9445, "step": 89960 }, { "epoch": 3.875608390403584, "learning_rate": 7.823644120752439e-07, "loss": 3.6094, "step": 89980 }, { "epoch": 3.876469828143171, "learning_rate": 7.823159300997651e-07, "loss": 3.7239, "step": 90000 }, { "epoch": 3.8773312658827583, "learning_rate": 7.822674481242861e-07, "loss": 3.8739, "step": 90020 }, { "epoch": 3.8781927036223456, "learning_rate": 7.822189661488073e-07, "loss": 3.4923, "step": 90040 }, { "epoch": 3.879054141361933, "learning_rate": 7.821704841733284e-07, "loss": 3.7335, "step": 90060 }, { "epoch": 3.8799155791015205, "learning_rate": 7.821220021978496e-07, "loss": 3.8102, "step": 90080 }, { "epoch": 3.8807770168411078, "learning_rate": 7.820735202223706e-07, "loss": 3.7155, "step": 90100 }, { "epoch": 3.8816384545806955, "learning_rate": 7.820250382468916e-07, "loss": 3.7802, "step": 90120 }, { "epoch": 3.8824998923202827, "learning_rate": 7.819765562714128e-07, "loss": 3.7528, "step": 90140 }, { "epoch": 3.88336133005987, "learning_rate": 7.81928074295934e-07, "loss": 3.9415, "step": 90160 }, { "epoch": 3.884222767799457, "learning_rate": 7.818795923204551e-07, "loss": 3.947, "step": 90180 }, { "epoch": 3.8850842055390444, "learning_rate": 7.818311103449761e-07, "loss": 3.639, "step": 90200 }, { "epoch": 3.885945643278632, "learning_rate": 7.817826283694972e-07, "loss": 4.0083, "step": 90220 }, { "epoch": 3.8868070810182194, "learning_rate": 7.817341463940184e-07, "loss": 3.925, "step": 90240 }, { "epoch": 3.887668518757807, "learning_rate": 7.816856644185395e-07, "loss": 3.8864, "step": 90260 }, { "epoch": 3.8885299564973943, "learning_rate": 7.816371824430605e-07, "loss": 3.8101, "step": 90280 }, { "epoch": 3.8893913942369815, "learning_rate": 7.815887004675817e-07, "loss": 3.9873, "step": 90300 }, { "epoch": 3.890252831976569, "learning_rate": 7.815402184921028e-07, "loss": 3.7457, "step": 90320 }, { "epoch": 3.891114269716156, "learning_rate": 7.814917365166239e-07, "loss": 3.925, "step": 90340 }, { "epoch": 3.8919757074557437, "learning_rate": 7.814432545411449e-07, "loss": 3.6213, "step": 90360 }, { "epoch": 3.892837145195331, "learning_rate": 7.813947725656661e-07, "loss": 3.6758, "step": 90380 }, { "epoch": 3.893698582934918, "learning_rate": 7.813462905901872e-07, "loss": 3.7121, "step": 90400 }, { "epoch": 3.894560020674506, "learning_rate": 7.812978086147084e-07, "loss": 3.8724, "step": 90420 }, { "epoch": 3.895421458414093, "learning_rate": 7.812493266392294e-07, "loss": 3.8405, "step": 90440 }, { "epoch": 3.8962828961536804, "learning_rate": 7.812008446637506e-07, "loss": 3.8385, "step": 90460 }, { "epoch": 3.8971443338932676, "learning_rate": 7.811523626882716e-07, "loss": 3.7286, "step": 90480 }, { "epoch": 3.8980057716328553, "learning_rate": 7.811038807127927e-07, "loss": 3.7813, "step": 90500 }, { "epoch": 3.8988672093724426, "learning_rate": 7.810553987373138e-07, "loss": 3.8708, "step": 90520 }, { "epoch": 3.89972864711203, "learning_rate": 7.810069167618349e-07, "loss": 3.9136, "step": 90540 }, { "epoch": 3.9005900848516175, "learning_rate": 7.809584347863561e-07, "loss": 3.8608, "step": 90560 }, { "epoch": 3.9014515225912048, "learning_rate": 7.809099528108771e-07, "loss": 3.8121, "step": 90580 }, { "epoch": 3.902312960330792, "learning_rate": 7.808614708353982e-07, "loss": 3.8127, "step": 90600 }, { "epoch": 3.9031743980703792, "learning_rate": 7.808129888599194e-07, "loss": 3.7044, "step": 90620 }, { "epoch": 3.904035835809967, "learning_rate": 7.807645068844405e-07, "loss": 3.7509, "step": 90640 }, { "epoch": 3.904897273549554, "learning_rate": 7.807160249089615e-07, "loss": 3.7746, "step": 90660 }, { "epoch": 3.9057587112891414, "learning_rate": 7.806675429334827e-07, "loss": 3.7436, "step": 90680 }, { "epoch": 3.906620149028729, "learning_rate": 7.806190609580039e-07, "loss": 3.6755, "step": 90700 }, { "epoch": 3.9074815867683164, "learning_rate": 7.80570578982525e-07, "loss": 3.7125, "step": 90720 }, { "epoch": 3.9083430245079036, "learning_rate": 7.805220970070459e-07, "loss": 3.9075, "step": 90740 }, { "epoch": 3.909204462247491, "learning_rate": 7.804736150315671e-07, "loss": 3.9323, "step": 90760 }, { "epoch": 3.9100658999870785, "learning_rate": 7.804251330560883e-07, "loss": 3.6171, "step": 90780 }, { "epoch": 3.910927337726666, "learning_rate": 7.803766510806094e-07, "loss": 3.712, "step": 90800 }, { "epoch": 3.911788775466253, "learning_rate": 7.803281691051304e-07, "loss": 3.8687, "step": 90820 }, { "epoch": 3.9126502132058407, "learning_rate": 7.802796871296516e-07, "loss": 3.7504, "step": 90840 }, { "epoch": 3.913511650945428, "learning_rate": 7.802312051541725e-07, "loss": 3.818, "step": 90860 }, { "epoch": 3.914373088685015, "learning_rate": 7.801827231786937e-07, "loss": 3.8207, "step": 90880 }, { "epoch": 3.9152345264246025, "learning_rate": 7.801342412032148e-07, "loss": 3.8593, "step": 90900 }, { "epoch": 3.91609596416419, "learning_rate": 7.80085759227736e-07, "loss": 3.8238, "step": 90920 }, { "epoch": 3.9169574019037774, "learning_rate": 7.800372772522571e-07, "loss": 3.8924, "step": 90940 }, { "epoch": 3.9178188396433646, "learning_rate": 7.799887952767782e-07, "loss": 3.7231, "step": 90960 }, { "epoch": 3.9186802773829523, "learning_rate": 7.799403133012992e-07, "loss": 3.8398, "step": 90980 }, { "epoch": 3.9195417151225396, "learning_rate": 7.798918313258204e-07, "loss": 3.6378, "step": 91000 }, { "epoch": 3.920403152862127, "learning_rate": 7.798433493503415e-07, "loss": 4.0435, "step": 91020 }, { "epoch": 3.921264590601714, "learning_rate": 7.797948673748626e-07, "loss": 3.9161, "step": 91040 }, { "epoch": 3.9221260283413018, "learning_rate": 7.797463853993837e-07, "loss": 3.929, "step": 91060 }, { "epoch": 3.922987466080889, "learning_rate": 7.796979034239049e-07, "loss": 3.8256, "step": 91080 }, { "epoch": 3.9238489038204762, "learning_rate": 7.79649421448426e-07, "loss": 3.7158, "step": 91100 }, { "epoch": 3.924710341560064, "learning_rate": 7.796009394729469e-07, "loss": 3.684, "step": 91120 }, { "epoch": 3.925571779299651, "learning_rate": 7.795524574974681e-07, "loss": 3.6719, "step": 91140 }, { "epoch": 3.9264332170392384, "learning_rate": 7.795039755219893e-07, "loss": 3.9501, "step": 91160 }, { "epoch": 3.9272946547788257, "learning_rate": 7.794554935465103e-07, "loss": 3.8401, "step": 91180 }, { "epoch": 3.9281560925184134, "learning_rate": 7.794070115710314e-07, "loss": 3.7076, "step": 91200 }, { "epoch": 3.9290175302580006, "learning_rate": 7.793585295955526e-07, "loss": 3.7346, "step": 91220 }, { "epoch": 3.929878967997588, "learning_rate": 7.793100476200737e-07, "loss": 3.7959, "step": 91240 }, { "epoch": 3.9307404057371755, "learning_rate": 7.792615656445947e-07, "loss": 3.7263, "step": 91260 }, { "epoch": 3.931601843476763, "learning_rate": 7.792130836691158e-07, "loss": 3.7003, "step": 91280 }, { "epoch": 3.93246328121635, "learning_rate": 7.79164601693637e-07, "loss": 3.7147, "step": 91300 }, { "epoch": 3.9333247189559373, "learning_rate": 7.791161197181582e-07, "loss": 3.917, "step": 91320 }, { "epoch": 3.934186156695525, "learning_rate": 7.790676377426792e-07, "loss": 3.83, "step": 91340 }, { "epoch": 3.935047594435112, "learning_rate": 7.790191557672003e-07, "loss": 3.6783, "step": 91360 }, { "epoch": 3.9359090321746995, "learning_rate": 7.789706737917213e-07, "loss": 4.1309, "step": 91380 }, { "epoch": 3.936770469914287, "learning_rate": 7.789221918162424e-07, "loss": 3.6555, "step": 91400 }, { "epoch": 3.9376319076538744, "learning_rate": 7.788737098407636e-07, "loss": 3.735, "step": 91420 }, { "epoch": 3.9384933453934616, "learning_rate": 7.788252278652847e-07, "loss": 3.7647, "step": 91440 }, { "epoch": 3.939354783133049, "learning_rate": 7.787767458898059e-07, "loss": 3.8945, "step": 91460 }, { "epoch": 3.9402162208726366, "learning_rate": 7.787282639143269e-07, "loss": 3.9107, "step": 91480 }, { "epoch": 3.941077658612224, "learning_rate": 7.78679781938848e-07, "loss": 3.7594, "step": 91500 }, { "epoch": 3.941939096351811, "learning_rate": 7.786312999633691e-07, "loss": 3.9104, "step": 91520 }, { "epoch": 3.9428005340913987, "learning_rate": 7.785828179878903e-07, "loss": 3.777, "step": 91540 }, { "epoch": 3.943661971830986, "learning_rate": 7.785343360124113e-07, "loss": 3.76, "step": 91560 }, { "epoch": 3.9445234095705732, "learning_rate": 7.784858540369325e-07, "loss": 3.7582, "step": 91580 }, { "epoch": 3.9453848473101605, "learning_rate": 7.784373720614536e-07, "loss": 3.845, "step": 91600 }, { "epoch": 3.946246285049748, "learning_rate": 7.783888900859747e-07, "loss": 3.8376, "step": 91620 }, { "epoch": 3.9471077227893354, "learning_rate": 7.783404081104957e-07, "loss": 3.6206, "step": 91640 }, { "epoch": 3.9479691605289227, "learning_rate": 7.782919261350168e-07, "loss": 3.7836, "step": 91660 }, { "epoch": 3.9488305982685104, "learning_rate": 7.78243444159538e-07, "loss": 4.0539, "step": 91680 }, { "epoch": 3.9496920360080976, "learning_rate": 7.781949621840592e-07, "loss": 3.8973, "step": 91700 }, { "epoch": 3.950553473747685, "learning_rate": 7.781464802085802e-07, "loss": 3.7282, "step": 91720 }, { "epoch": 3.951414911487272, "learning_rate": 7.780979982331013e-07, "loss": 3.8039, "step": 91740 }, { "epoch": 3.9522763492268598, "learning_rate": 7.780495162576224e-07, "loss": 3.8443, "step": 91760 }, { "epoch": 3.953137786966447, "learning_rate": 7.780010342821435e-07, "loss": 3.8807, "step": 91780 }, { "epoch": 3.9539992247060343, "learning_rate": 7.779525523066645e-07, "loss": 3.641, "step": 91800 }, { "epoch": 3.954860662445622, "learning_rate": 7.779040703311857e-07, "loss": 3.9185, "step": 91820 }, { "epoch": 3.955722100185209, "learning_rate": 7.778555883557069e-07, "loss": 3.8025, "step": 91840 }, { "epoch": 3.9565835379247964, "learning_rate": 7.77807106380228e-07, "loss": 3.7172, "step": 91860 }, { "epoch": 3.9574449756643837, "learning_rate": 7.77758624404749e-07, "loss": 3.6519, "step": 91880 }, { "epoch": 3.9583064134039714, "learning_rate": 7.777101424292701e-07, "loss": 3.7237, "step": 91900 }, { "epoch": 3.9591678511435586, "learning_rate": 7.776616604537913e-07, "loss": 3.8187, "step": 91920 }, { "epoch": 3.960029288883146, "learning_rate": 7.776131784783123e-07, "loss": 3.6389, "step": 91940 }, { "epoch": 3.9608907266227336, "learning_rate": 7.775646965028335e-07, "loss": 3.8033, "step": 91960 }, { "epoch": 3.961752164362321, "learning_rate": 7.775162145273546e-07, "loss": 3.7367, "step": 91980 }, { "epoch": 3.962613602101908, "learning_rate": 7.774677325518757e-07, "loss": 3.5215, "step": 92000 }, { "epoch": 3.9634750398414953, "learning_rate": 7.774192505763967e-07, "loss": 3.7358, "step": 92020 }, { "epoch": 3.964336477581083, "learning_rate": 7.773707686009179e-07, "loss": 3.8196, "step": 92040 }, { "epoch": 3.9651979153206702, "learning_rate": 7.77322286625439e-07, "loss": 3.8156, "step": 92060 }, { "epoch": 3.9660593530602575, "learning_rate": 7.772738046499602e-07, "loss": 3.7631, "step": 92080 }, { "epoch": 3.966920790799845, "learning_rate": 7.772253226744812e-07, "loss": 3.7843, "step": 92100 }, { "epoch": 3.9677822285394324, "learning_rate": 7.771768406990023e-07, "loss": 3.8394, "step": 92120 }, { "epoch": 3.9686436662790197, "learning_rate": 7.771283587235234e-07, "loss": 3.8936, "step": 92140 }, { "epoch": 3.969505104018607, "learning_rate": 7.770798767480445e-07, "loss": 3.8979, "step": 92160 }, { "epoch": 3.9703665417581946, "learning_rate": 7.770313947725656e-07, "loss": 3.8724, "step": 92180 }, { "epoch": 3.971227979497782, "learning_rate": 7.769829127970867e-07, "loss": 3.8303, "step": 92200 }, { "epoch": 3.972089417237369, "learning_rate": 7.769344308216079e-07, "loss": 3.8259, "step": 92220 }, { "epoch": 3.9729508549769568, "learning_rate": 7.76885948846129e-07, "loss": 3.7621, "step": 92240 }, { "epoch": 3.973812292716544, "learning_rate": 7.7683746687065e-07, "loss": 3.6755, "step": 92260 }, { "epoch": 3.9746737304561313, "learning_rate": 7.767889848951711e-07, "loss": 3.6209, "step": 92280 }, { "epoch": 3.9755351681957185, "learning_rate": 7.767405029196923e-07, "loss": 3.6571, "step": 92300 }, { "epoch": 3.9763966059353058, "learning_rate": 7.766920209442133e-07, "loss": 3.761, "step": 92320 }, { "epoch": 3.9772580436748934, "learning_rate": 7.766435389687345e-07, "loss": 3.6781, "step": 92340 }, { "epoch": 3.9781194814144807, "learning_rate": 7.765950569932556e-07, "loss": 3.7519, "step": 92360 }, { "epoch": 3.9789809191540684, "learning_rate": 7.765465750177766e-07, "loss": 3.6434, "step": 92380 }, { "epoch": 3.9798423568936556, "learning_rate": 7.764980930422978e-07, "loss": 3.7371, "step": 92400 }, { "epoch": 3.980703794633243, "learning_rate": 7.764496110668189e-07, "loss": 3.6178, "step": 92420 }, { "epoch": 3.98156523237283, "learning_rate": 7.7640112909134e-07, "loss": 3.6757, "step": 92440 }, { "epoch": 3.9824266701124174, "learning_rate": 7.763526471158611e-07, "loss": 3.8007, "step": 92460 }, { "epoch": 3.983288107852005, "learning_rate": 7.763041651403822e-07, "loss": 3.7136, "step": 92480 }, { "epoch": 3.9841495455915923, "learning_rate": 7.762556831649034e-07, "loss": 3.8083, "step": 92500 }, { "epoch": 3.98501098333118, "learning_rate": 7.762072011894244e-07, "loss": 3.8293, "step": 92520 }, { "epoch": 3.9858724210707672, "learning_rate": 7.761587192139455e-07, "loss": 3.6425, "step": 92540 }, { "epoch": 3.9867338588103545, "learning_rate": 7.761102372384666e-07, "loss": 3.8174, "step": 92560 }, { "epoch": 3.9875952965499417, "learning_rate": 7.760617552629878e-07, "loss": 3.7386, "step": 92580 }, { "epoch": 3.988456734289529, "learning_rate": 7.760132732875089e-07, "loss": 3.6856, "step": 92600 }, { "epoch": 3.9893181720291166, "learning_rate": 7.7596479131203e-07, "loss": 3.7373, "step": 92620 }, { "epoch": 3.990179609768704, "learning_rate": 7.759163093365509e-07, "loss": 3.6376, "step": 92640 }, { "epoch": 3.9910410475082916, "learning_rate": 7.758678273610722e-07, "loss": 3.7604, "step": 92660 }, { "epoch": 3.991902485247879, "learning_rate": 7.758193453855932e-07, "loss": 3.7319, "step": 92680 }, { "epoch": 3.992763922987466, "learning_rate": 7.757708634101144e-07, "loss": 3.6841, "step": 92700 }, { "epoch": 3.9936253607270533, "learning_rate": 7.757223814346355e-07, "loss": 3.6348, "step": 92720 }, { "epoch": 3.9944867984666406, "learning_rate": 7.756738994591567e-07, "loss": 3.951, "step": 92740 }, { "epoch": 3.9953482362062283, "learning_rate": 7.756254174836776e-07, "loss": 3.8506, "step": 92760 }, { "epoch": 3.9962096739458155, "learning_rate": 7.755769355081988e-07, "loss": 3.7258, "step": 92780 }, { "epoch": 3.997071111685403, "learning_rate": 7.755284535327199e-07, "loss": 3.7346, "step": 92800 }, { "epoch": 3.9979325494249904, "learning_rate": 7.75479971557241e-07, "loss": 3.6512, "step": 92820 }, { "epoch": 3.9987939871645777, "learning_rate": 7.754314895817621e-07, "loss": 3.7337, "step": 92840 }, { "epoch": 3.999655424904165, "learning_rate": 7.753830076062833e-07, "loss": 3.9018, "step": 92860 }, { "epoch": 4.000516862643752, "learning_rate": 7.753345256308044e-07, "loss": 3.7847, "step": 92880 }, { "epoch": 4.001378300383339, "learning_rate": 7.752860436553254e-07, "loss": 3.6393, "step": 92900 }, { "epoch": 4.0022397381229275, "learning_rate": 7.752375616798465e-07, "loss": 3.7626, "step": 92920 }, { "epoch": 4.003101175862515, "learning_rate": 7.751890797043677e-07, "loss": 3.7545, "step": 92940 }, { "epoch": 4.003962613602102, "learning_rate": 7.751405977288888e-07, "loss": 3.8856, "step": 92960 }, { "epoch": 4.004824051341689, "learning_rate": 7.750921157534098e-07, "loss": 3.874, "step": 92980 }, { "epoch": 4.0056854890812765, "learning_rate": 7.75043633777931e-07, "loss": 3.7486, "step": 93000 }, { "epoch": 4.006546926820864, "learning_rate": 7.749951518024521e-07, "loss": 3.8536, "step": 93020 }, { "epoch": 4.007408364560451, "learning_rate": 7.749466698269732e-07, "loss": 3.7286, "step": 93040 }, { "epoch": 4.008269802300039, "learning_rate": 7.748981878514942e-07, "loss": 3.6872, "step": 93060 }, { "epoch": 4.009131240039626, "learning_rate": 7.748497058760154e-07, "loss": 3.7737, "step": 93080 }, { "epoch": 4.009992677779214, "learning_rate": 7.748012239005366e-07, "loss": 3.6935, "step": 93100 }, { "epoch": 4.010854115518801, "learning_rate": 7.747527419250577e-07, "loss": 3.7127, "step": 93120 }, { "epoch": 4.011715553258388, "learning_rate": 7.747042599495787e-07, "loss": 3.7855, "step": 93140 }, { "epoch": 4.012576990997975, "learning_rate": 7.746557779740997e-07, "loss": 3.6971, "step": 93160 }, { "epoch": 4.013438428737563, "learning_rate": 7.746072959986209e-07, "loss": 3.6356, "step": 93180 }, { "epoch": 4.014299866477151, "learning_rate": 7.74558814023142e-07, "loss": 3.6845, "step": 93200 }, { "epoch": 4.015161304216738, "learning_rate": 7.745103320476631e-07, "loss": 3.727, "step": 93220 }, { "epoch": 4.016022741956325, "learning_rate": 7.744618500721843e-07, "loss": 3.6132, "step": 93240 }, { "epoch": 4.0168841796959125, "learning_rate": 7.744133680967054e-07, "loss": 3.6475, "step": 93260 }, { "epoch": 4.0177456174355, "learning_rate": 7.743648861212263e-07, "loss": 3.7494, "step": 93280 }, { "epoch": 4.018607055175087, "learning_rate": 7.743164041457475e-07, "loss": 3.8054, "step": 93300 }, { "epoch": 4.019468492914674, "learning_rate": 7.742679221702687e-07, "loss": 3.6377, "step": 93320 }, { "epoch": 4.020329930654262, "learning_rate": 7.742194401947898e-07, "loss": 3.5, "step": 93340 }, { "epoch": 4.02119136839385, "learning_rate": 7.741709582193108e-07, "loss": 3.8877, "step": 93360 }, { "epoch": 4.022052806133437, "learning_rate": 7.74122476243832e-07, "loss": 3.8689, "step": 93380 }, { "epoch": 4.022914243873024, "learning_rate": 7.740739942683531e-07, "loss": 3.785, "step": 93400 }, { "epoch": 4.023775681612611, "learning_rate": 7.740255122928742e-07, "loss": 3.8665, "step": 93420 }, { "epoch": 4.024637119352199, "learning_rate": 7.739770303173952e-07, "loss": 3.7676, "step": 93440 }, { "epoch": 4.025498557091786, "learning_rate": 7.739285483419164e-07, "loss": 3.7239, "step": 93460 }, { "epoch": 4.026359994831374, "learning_rate": 7.738800663664376e-07, "loss": 3.6412, "step": 93480 }, { "epoch": 4.027221432570961, "learning_rate": 7.738315843909587e-07, "loss": 3.5538, "step": 93500 }, { "epoch": 4.0280828703105485, "learning_rate": 7.737831024154797e-07, "loss": 3.8761, "step": 93520 }, { "epoch": 4.028944308050136, "learning_rate": 7.737346204400008e-07, "loss": 3.7814, "step": 93540 }, { "epoch": 4.029805745789723, "learning_rate": 7.73686138464522e-07, "loss": 3.696, "step": 93560 }, { "epoch": 4.03066718352931, "learning_rate": 7.73637656489043e-07, "loss": 3.5002, "step": 93580 }, { "epoch": 4.031528621268897, "learning_rate": 7.735891745135641e-07, "loss": 3.7821, "step": 93600 }, { "epoch": 4.032390059008486, "learning_rate": 7.735406925380853e-07, "loss": 3.7736, "step": 93620 }, { "epoch": 4.033251496748073, "learning_rate": 7.734922105626065e-07, "loss": 3.6331, "step": 93640 }, { "epoch": 4.03411293448766, "learning_rate": 7.734437285871274e-07, "loss": 3.7201, "step": 93660 }, { "epoch": 4.034974372227247, "learning_rate": 7.733952466116485e-07, "loss": 3.5504, "step": 93680 }, { "epoch": 4.0358358099668346, "learning_rate": 7.733467646361697e-07, "loss": 3.773, "step": 93700 }, { "epoch": 4.036697247706422, "learning_rate": 7.732982826606908e-07, "loss": 3.5198, "step": 93720 }, { "epoch": 4.037558685446009, "learning_rate": 7.732498006852118e-07, "loss": 3.6763, "step": 93740 }, { "epoch": 4.038420123185597, "learning_rate": 7.73201318709733e-07, "loss": 3.7281, "step": 93760 }, { "epoch": 4.039281560925184, "learning_rate": 7.731528367342541e-07, "loss": 3.6656, "step": 93780 }, { "epoch": 4.040142998664772, "learning_rate": 7.731043547587752e-07, "loss": 3.961, "step": 93800 }, { "epoch": 4.041004436404359, "learning_rate": 7.730558727832962e-07, "loss": 3.6461, "step": 93820 }, { "epoch": 4.041865874143946, "learning_rate": 7.730073908078174e-07, "loss": 3.6627, "step": 93840 }, { "epoch": 4.042727311883533, "learning_rate": 7.729589088323386e-07, "loss": 3.723, "step": 93860 }, { "epoch": 4.043588749623121, "learning_rate": 7.729104268568597e-07, "loss": 3.7918, "step": 93880 }, { "epoch": 4.044450187362709, "learning_rate": 7.728619448813807e-07, "loss": 3.7381, "step": 93900 }, { "epoch": 4.045311625102296, "learning_rate": 7.728134629059018e-07, "loss": 3.6455, "step": 93920 }, { "epoch": 4.046173062841883, "learning_rate": 7.72764980930423e-07, "loss": 3.5353, "step": 93940 }, { "epoch": 4.0470345005814705, "learning_rate": 7.72716498954944e-07, "loss": 3.6592, "step": 93960 }, { "epoch": 4.047895938321058, "learning_rate": 7.726680169794651e-07, "loss": 3.8174, "step": 93980 }, { "epoch": 4.048757376060645, "learning_rate": 7.726195350039863e-07, "loss": 3.6744, "step": 94000 }, { "epoch": 4.049618813800232, "learning_rate": 7.725710530285075e-07, "loss": 3.6903, "step": 94020 }, { "epoch": 4.05048025153982, "learning_rate": 7.725225710530284e-07, "loss": 3.713, "step": 94040 }, { "epoch": 4.051341689279408, "learning_rate": 7.724740890775495e-07, "loss": 3.7358, "step": 94060 }, { "epoch": 4.052203127018995, "learning_rate": 7.724256071020707e-07, "loss": 3.7242, "step": 94080 }, { "epoch": 4.053064564758582, "learning_rate": 7.723771251265918e-07, "loss": 3.6433, "step": 94100 }, { "epoch": 4.053926002498169, "learning_rate": 7.723286431511129e-07, "loss": 3.7958, "step": 94120 }, { "epoch": 4.054787440237757, "learning_rate": 7.72280161175634e-07, "loss": 3.6197, "step": 94140 }, { "epoch": 4.055648877977344, "learning_rate": 7.722316792001551e-07, "loss": 3.4819, "step": 94160 }, { "epoch": 4.056510315716932, "learning_rate": 7.721831972246763e-07, "loss": 3.6515, "step": 94180 }, { "epoch": 4.057371753456519, "learning_rate": 7.721347152491973e-07, "loss": 3.6916, "step": 94200 }, { "epoch": 4.0582331911961065, "learning_rate": 7.720862332737184e-07, "loss": 3.6998, "step": 94220 }, { "epoch": 4.059094628935694, "learning_rate": 7.720377512982396e-07, "loss": 3.9044, "step": 94240 }, { "epoch": 4.059956066675281, "learning_rate": 7.719892693227606e-07, "loss": 3.5698, "step": 94260 }, { "epoch": 4.060817504414868, "learning_rate": 7.719407873472817e-07, "loss": 3.5303, "step": 94280 }, { "epoch": 4.0616789421544555, "learning_rate": 7.718923053718028e-07, "loss": 4.0102, "step": 94300 }, { "epoch": 4.062540379894044, "learning_rate": 7.71843823396324e-07, "loss": 3.4653, "step": 94320 }, { "epoch": 4.063401817633631, "learning_rate": 7.71795341420845e-07, "loss": 3.581, "step": 94340 }, { "epoch": 4.064263255373218, "learning_rate": 7.717468594453661e-07, "loss": 3.6084, "step": 94360 }, { "epoch": 4.065124693112805, "learning_rate": 7.716983774698873e-07, "loss": 3.5158, "step": 94380 }, { "epoch": 4.065986130852393, "learning_rate": 7.716498954944085e-07, "loss": 3.6894, "step": 94400 }, { "epoch": 4.06684756859198, "learning_rate": 7.716014135189293e-07, "loss": 3.6205, "step": 94420 }, { "epoch": 4.067709006331567, "learning_rate": 7.715529315434505e-07, "loss": 3.6581, "step": 94440 }, { "epoch": 4.068570444071154, "learning_rate": 7.715044495679717e-07, "loss": 3.593, "step": 94460 }, { "epoch": 4.0694318818107424, "learning_rate": 7.714559675924929e-07, "loss": 3.5971, "step": 94480 }, { "epoch": 4.07029331955033, "learning_rate": 7.714074856170139e-07, "loss": 3.6637, "step": 94500 }, { "epoch": 4.071154757289917, "learning_rate": 7.713590036415351e-07, "loss": 3.4868, "step": 94520 }, { "epoch": 4.072016195029504, "learning_rate": 7.713105216660561e-07, "loss": 3.6145, "step": 94540 }, { "epoch": 4.072877632769091, "learning_rate": 7.712620396905773e-07, "loss": 3.6797, "step": 94560 }, { "epoch": 4.073739070508679, "learning_rate": 7.712135577150983e-07, "loss": 3.6024, "step": 94580 }, { "epoch": 4.074600508248267, "learning_rate": 7.711650757396194e-07, "loss": 3.6174, "step": 94600 }, { "epoch": 4.075461945987854, "learning_rate": 7.711165937641406e-07, "loss": 3.6744, "step": 94620 }, { "epoch": 4.076323383727441, "learning_rate": 7.710681117886617e-07, "loss": 3.6869, "step": 94640 }, { "epoch": 4.0771848214670285, "learning_rate": 7.710196298131828e-07, "loss": 3.6593, "step": 94660 }, { "epoch": 4.078046259206616, "learning_rate": 7.709711478377038e-07, "loss": 3.5935, "step": 94680 }, { "epoch": 4.078907696946203, "learning_rate": 7.70922665862225e-07, "loss": 3.604, "step": 94700 }, { "epoch": 4.07976913468579, "learning_rate": 7.708741838867461e-07, "loss": 3.7207, "step": 94720 }, { "epoch": 4.0806305724253775, "learning_rate": 7.708257019112672e-07, "loss": 3.6882, "step": 94740 }, { "epoch": 4.081492010164966, "learning_rate": 7.707772199357883e-07, "loss": 3.7115, "step": 94760 }, { "epoch": 4.082353447904553, "learning_rate": 7.707287379603095e-07, "loss": 3.607, "step": 94780 }, { "epoch": 4.08321488564414, "learning_rate": 7.706802559848304e-07, "loss": 3.9763, "step": 94800 }, { "epoch": 4.084076323383727, "learning_rate": 7.706317740093515e-07, "loss": 3.7004, "step": 94820 }, { "epoch": 4.084937761123315, "learning_rate": 7.705832920338727e-07, "loss": 3.6568, "step": 94840 }, { "epoch": 4.085799198862902, "learning_rate": 7.705348100583939e-07, "loss": 3.7294, "step": 94860 }, { "epoch": 4.086660636602489, "learning_rate": 7.704863280829149e-07, "loss": 3.6133, "step": 94880 }, { "epoch": 4.087522074342077, "learning_rate": 7.70437846107436e-07, "loss": 3.5222, "step": 94900 }, { "epoch": 4.0883835120816645, "learning_rate": 7.703893641319572e-07, "loss": 3.8665, "step": 94920 }, { "epoch": 4.089244949821252, "learning_rate": 7.703408821564781e-07, "loss": 3.7331, "step": 94940 }, { "epoch": 4.090106387560839, "learning_rate": 7.702924001809993e-07, "loss": 3.6031, "step": 94960 }, { "epoch": 4.090967825300426, "learning_rate": 7.702439182055204e-07, "loss": 3.397, "step": 94980 }, { "epoch": 4.0918292630400135, "learning_rate": 7.701954362300416e-07, "loss": 3.9264, "step": 95000 }, { "epoch": 4.092690700779601, "learning_rate": 7.701469542545627e-07, "loss": 3.569, "step": 95020 }, { "epoch": 4.093552138519189, "learning_rate": 7.700984722790838e-07, "loss": 3.586, "step": 95040 }, { "epoch": 4.094413576258776, "learning_rate": 7.700499903036048e-07, "loss": 3.6438, "step": 95060 }, { "epoch": 4.095275013998363, "learning_rate": 7.70001508328126e-07, "loss": 3.7133, "step": 95080 }, { "epoch": 4.096136451737951, "learning_rate": 7.699530263526471e-07, "loss": 3.6876, "step": 95100 }, { "epoch": 4.096997889477538, "learning_rate": 7.699045443771682e-07, "loss": 3.5328, "step": 95120 }, { "epoch": 4.097859327217125, "learning_rate": 7.698560624016893e-07, "loss": 3.7075, "step": 95140 }, { "epoch": 4.098720764956712, "learning_rate": 7.698075804262105e-07, "loss": 3.7001, "step": 95160 }, { "epoch": 4.0995822026963005, "learning_rate": 7.697590984507315e-07, "loss": 3.6158, "step": 95180 }, { "epoch": 4.100443640435888, "learning_rate": 7.697106164752526e-07, "loss": 3.6288, "step": 95200 }, { "epoch": 4.101305078175475, "learning_rate": 7.696621344997737e-07, "loss": 3.6396, "step": 95220 }, { "epoch": 4.102166515915062, "learning_rate": 7.696136525242948e-07, "loss": 3.8677, "step": 95240 }, { "epoch": 4.1030279536546495, "learning_rate": 7.69565170548816e-07, "loss": 3.4926, "step": 95260 }, { "epoch": 4.103889391394237, "learning_rate": 7.695166885733371e-07, "loss": 3.6808, "step": 95280 }, { "epoch": 4.104750829133824, "learning_rate": 7.694682065978582e-07, "loss": 3.5624, "step": 95300 }, { "epoch": 4.105612266873412, "learning_rate": 7.694197246223792e-07, "loss": 3.4057, "step": 95320 }, { "epoch": 4.106473704612999, "learning_rate": 7.693712426469003e-07, "loss": 3.5641, "step": 95340 }, { "epoch": 4.107335142352587, "learning_rate": 7.693227606714214e-07, "loss": 3.6686, "step": 95360 }, { "epoch": 4.108196580092174, "learning_rate": 7.692742786959426e-07, "loss": 3.658, "step": 95380 }, { "epoch": 4.109058017831761, "learning_rate": 7.692257967204637e-07, "loss": 3.6373, "step": 95400 }, { "epoch": 4.109919455571348, "learning_rate": 7.691773147449848e-07, "loss": 3.6582, "step": 95420 }, { "epoch": 4.1107808933109355, "learning_rate": 7.691288327695058e-07, "loss": 3.6382, "step": 95440 }, { "epoch": 4.111642331050524, "learning_rate": 7.69080350794027e-07, "loss": 3.4615, "step": 95460 }, { "epoch": 4.112503768790111, "learning_rate": 7.690318688185481e-07, "loss": 3.7458, "step": 95480 }, { "epoch": 4.113365206529698, "learning_rate": 7.689833868430692e-07, "loss": 3.6824, "step": 95500 }, { "epoch": 4.114226644269285, "learning_rate": 7.689349048675903e-07, "loss": 3.6806, "step": 95520 }, { "epoch": 4.115088082008873, "learning_rate": 7.688864228921114e-07, "loss": 3.6325, "step": 95540 }, { "epoch": 4.11594951974846, "learning_rate": 7.688379409166325e-07, "loss": 3.7223, "step": 95560 }, { "epoch": 4.116810957488047, "learning_rate": 7.687894589411536e-07, "loss": 3.6405, "step": 95580 }, { "epoch": 4.117672395227635, "learning_rate": 7.687409769656747e-07, "loss": 3.4842, "step": 95600 }, { "epoch": 4.1185338329672225, "learning_rate": 7.686924949901958e-07, "loss": 3.6272, "step": 95620 }, { "epoch": 4.11939527070681, "learning_rate": 7.68644013014717e-07, "loss": 3.4975, "step": 95640 }, { "epoch": 4.120256708446397, "learning_rate": 7.685955310392381e-07, "loss": 3.5477, "step": 95660 }, { "epoch": 4.121118146185984, "learning_rate": 7.685470490637592e-07, "loss": 3.7781, "step": 95680 }, { "epoch": 4.1219795839255715, "learning_rate": 7.684985670882802e-07, "loss": 3.6428, "step": 95700 }, { "epoch": 4.122841021665159, "learning_rate": 7.684500851128014e-07, "loss": 3.6195, "step": 95720 }, { "epoch": 4.123702459404747, "learning_rate": 7.684016031373225e-07, "loss": 3.5773, "step": 95740 }, { "epoch": 4.124563897144334, "learning_rate": 7.683531211618436e-07, "loss": 3.7106, "step": 95760 }, { "epoch": 4.125425334883921, "learning_rate": 7.683046391863647e-07, "loss": 3.5037, "step": 95780 }, { "epoch": 4.126286772623509, "learning_rate": 7.682561572108859e-07, "loss": 3.8348, "step": 95800 }, { "epoch": 4.127148210363096, "learning_rate": 7.682076752354068e-07, "loss": 3.5621, "step": 95820 }, { "epoch": 4.128009648102683, "learning_rate": 7.681591932599279e-07, "loss": 3.6296, "step": 95840 }, { "epoch": 4.12887108584227, "learning_rate": 7.681107112844491e-07, "loss": 3.5148, "step": 95860 }, { "epoch": 4.1297325235818585, "learning_rate": 7.680622293089702e-07, "loss": 3.4836, "step": 95880 }, { "epoch": 4.130593961321446, "learning_rate": 7.680137473334913e-07, "loss": 3.6669, "step": 95900 }, { "epoch": 4.131455399061033, "learning_rate": 7.679652653580124e-07, "loss": 3.6571, "step": 95920 }, { "epoch": 4.13231683680062, "learning_rate": 7.679167833825335e-07, "loss": 3.7406, "step": 95940 }, { "epoch": 4.1331782745402075, "learning_rate": 7.678683014070546e-07, "loss": 3.5661, "step": 95960 }, { "epoch": 4.134039712279795, "learning_rate": 7.678198194315757e-07, "loss": 3.6479, "step": 95980 }, { "epoch": 4.134901150019382, "learning_rate": 7.677713374560968e-07, "loss": 3.7884, "step": 96000 }, { "epoch": 4.13576258775897, "learning_rate": 7.67722855480618e-07, "loss": 3.6712, "step": 96020 }, { "epoch": 4.136624025498557, "learning_rate": 7.676743735051391e-07, "loss": 3.551, "step": 96040 }, { "epoch": 4.137485463238145, "learning_rate": 7.676258915296602e-07, "loss": 3.6324, "step": 96060 }, { "epoch": 4.138346900977732, "learning_rate": 7.675774095541812e-07, "loss": 3.829, "step": 96080 }, { "epoch": 4.139208338717319, "learning_rate": 7.675289275787024e-07, "loss": 3.48, "step": 96100 }, { "epoch": 4.140069776456906, "learning_rate": 7.674804456032235e-07, "loss": 3.5614, "step": 96120 }, { "epoch": 4.140931214196494, "learning_rate": 7.674319636277445e-07, "loss": 3.7307, "step": 96140 }, { "epoch": 4.141792651936082, "learning_rate": 7.673834816522657e-07, "loss": 3.6099, "step": 96160 }, { "epoch": 4.142654089675669, "learning_rate": 7.673349996767869e-07, "loss": 3.7577, "step": 96180 }, { "epoch": 4.143515527415256, "learning_rate": 7.672865177013078e-07, "loss": 3.7372, "step": 96200 }, { "epoch": 4.144376965154843, "learning_rate": 7.672380357258289e-07, "loss": 3.5859, "step": 96220 }, { "epoch": 4.145238402894431, "learning_rate": 7.671895537503501e-07, "loss": 3.821, "step": 96240 }, { "epoch": 4.146099840634018, "learning_rate": 7.671410717748713e-07, "loss": 3.6523, "step": 96260 }, { "epoch": 4.146961278373605, "learning_rate": 7.670925897993924e-07, "loss": 3.7537, "step": 96280 }, { "epoch": 4.147822716113193, "learning_rate": 7.670441078239135e-07, "loss": 3.5594, "step": 96300 }, { "epoch": 4.1486841538527806, "learning_rate": 7.669956258484345e-07, "loss": 3.6303, "step": 96320 }, { "epoch": 4.149545591592368, "learning_rate": 7.669471438729557e-07, "loss": 3.6694, "step": 96340 }, { "epoch": 4.150407029331955, "learning_rate": 7.668986618974766e-07, "loss": 3.5924, "step": 96360 }, { "epoch": 4.151268467071542, "learning_rate": 7.668501799219978e-07, "loss": 3.6093, "step": 96380 }, { "epoch": 4.1521299048111295, "learning_rate": 7.66801697946519e-07, "loss": 3.7879, "step": 96400 }, { "epoch": 4.152991342550717, "learning_rate": 7.667532159710401e-07, "loss": 3.6788, "step": 96420 }, { "epoch": 4.153852780290305, "learning_rate": 7.667047339955611e-07, "loss": 3.6262, "step": 96440 }, { "epoch": 4.154714218029892, "learning_rate": 7.666562520200822e-07, "loss": 3.571, "step": 96460 }, { "epoch": 4.155575655769479, "learning_rate": 7.666077700446034e-07, "loss": 3.8031, "step": 96480 }, { "epoch": 4.156437093509067, "learning_rate": 7.665592880691245e-07, "loss": 3.553, "step": 96500 }, { "epoch": 4.157298531248654, "learning_rate": 7.665108060936455e-07, "loss": 3.6871, "step": 96520 }, { "epoch": 4.158159968988241, "learning_rate": 7.664623241181667e-07, "loss": 3.568, "step": 96540 }, { "epoch": 4.159021406727828, "learning_rate": 7.664138421426879e-07, "loss": 3.6274, "step": 96560 }, { "epoch": 4.1598828444674165, "learning_rate": 7.663653601672089e-07, "loss": 3.7104, "step": 96580 }, { "epoch": 4.160744282207004, "learning_rate": 7.663168781917299e-07, "loss": 3.7302, "step": 96600 }, { "epoch": 4.161605719946591, "learning_rate": 7.662683962162511e-07, "loss": 3.6491, "step": 96620 }, { "epoch": 4.162467157686178, "learning_rate": 7.662199142407723e-07, "loss": 3.753, "step": 96640 }, { "epoch": 4.1633285954257655, "learning_rate": 7.661714322652934e-07, "loss": 3.6668, "step": 96660 }, { "epoch": 4.164190033165353, "learning_rate": 7.661229502898144e-07, "loss": 3.6697, "step": 96680 }, { "epoch": 4.16505147090494, "learning_rate": 7.660744683143356e-07, "loss": 3.6219, "step": 96700 }, { "epoch": 4.165912908644528, "learning_rate": 7.660259863388566e-07, "loss": 3.6411, "step": 96720 }, { "epoch": 4.166774346384115, "learning_rate": 7.659775043633777e-07, "loss": 3.4607, "step": 96740 }, { "epoch": 4.167635784123703, "learning_rate": 7.659290223878988e-07, "loss": 3.7117, "step": 96760 }, { "epoch": 4.16849722186329, "learning_rate": 7.6588054041242e-07, "loss": 3.6294, "step": 96780 }, { "epoch": 4.169358659602877, "learning_rate": 7.658320584369412e-07, "loss": 3.4917, "step": 96800 }, { "epoch": 4.170220097342464, "learning_rate": 7.657835764614622e-07, "loss": 3.6277, "step": 96820 }, { "epoch": 4.171081535082052, "learning_rate": 7.657350944859832e-07, "loss": 3.3923, "step": 96840 }, { "epoch": 4.17194297282164, "learning_rate": 7.656866125105044e-07, "loss": 3.5579, "step": 96860 }, { "epoch": 4.172804410561227, "learning_rate": 7.656381305350256e-07, "loss": 3.5401, "step": 96880 }, { "epoch": 4.173665848300814, "learning_rate": 7.655896485595465e-07, "loss": 3.7217, "step": 96900 }, { "epoch": 4.1745272860404015, "learning_rate": 7.655411665840677e-07, "loss": 3.5982, "step": 96920 }, { "epoch": 4.175388723779989, "learning_rate": 7.654926846085889e-07, "loss": 3.6811, "step": 96940 }, { "epoch": 4.176250161519576, "learning_rate": 7.654442026331099e-07, "loss": 3.6916, "step": 96960 }, { "epoch": 4.177111599259163, "learning_rate": 7.653957206576309e-07, "loss": 3.6281, "step": 96980 }, { "epoch": 4.177973036998751, "learning_rate": 7.653472386821521e-07, "loss": 3.4743, "step": 97000 }, { "epoch": 4.178834474738339, "learning_rate": 7.652987567066733e-07, "loss": 3.5921, "step": 97020 }, { "epoch": 4.179695912477926, "learning_rate": 7.652502747311944e-07, "loss": 3.5673, "step": 97040 }, { "epoch": 4.180557350217513, "learning_rate": 7.652017927557154e-07, "loss": 3.6073, "step": 97060 }, { "epoch": 4.1814187879571, "learning_rate": 7.651533107802366e-07, "loss": 3.5392, "step": 97080 }, { "epoch": 4.182280225696688, "learning_rate": 7.651048288047577e-07, "loss": 3.5971, "step": 97100 }, { "epoch": 4.183141663436275, "learning_rate": 7.650563468292787e-07, "loss": 3.557, "step": 97120 }, { "epoch": 4.184003101175863, "learning_rate": 7.650078648537998e-07, "loss": 3.6023, "step": 97140 }, { "epoch": 4.18486453891545, "learning_rate": 7.64959382878321e-07, "loss": 3.6399, "step": 97160 }, { "epoch": 4.185725976655037, "learning_rate": 7.649109009028422e-07, "loss": 3.6157, "step": 97180 }, { "epoch": 4.186587414394625, "learning_rate": 7.648624189273632e-07, "loss": 3.5105, "step": 97200 }, { "epoch": 4.187448852134212, "learning_rate": 7.648139369518842e-07, "loss": 3.6493, "step": 97220 }, { "epoch": 4.188310289873799, "learning_rate": 7.647654549764054e-07, "loss": 3.734, "step": 97240 }, { "epoch": 4.189171727613386, "learning_rate": 7.647169730009266e-07, "loss": 3.6497, "step": 97260 }, { "epoch": 4.1900331653529745, "learning_rate": 7.646684910254476e-07, "loss": 3.5229, "step": 97280 }, { "epoch": 4.190894603092562, "learning_rate": 7.646200090499687e-07, "loss": 3.55, "step": 97300 }, { "epoch": 4.191756040832149, "learning_rate": 7.645715270744899e-07, "loss": 3.7054, "step": 97320 }, { "epoch": 4.192617478571736, "learning_rate": 7.64523045099011e-07, "loss": 3.6473, "step": 97340 }, { "epoch": 4.1934789163113235, "learning_rate": 7.64474563123532e-07, "loss": 3.7658, "step": 97360 }, { "epoch": 4.194340354050911, "learning_rate": 7.644260811480531e-07, "loss": 3.6228, "step": 97380 }, { "epoch": 4.195201791790498, "learning_rate": 7.643775991725743e-07, "loss": 3.6086, "step": 97400 }, { "epoch": 4.196063229530086, "learning_rate": 7.643291171970955e-07, "loss": 3.6835, "step": 97420 }, { "epoch": 4.196924667269673, "learning_rate": 7.642806352216164e-07, "loss": 3.5575, "step": 97440 }, { "epoch": 4.197786105009261, "learning_rate": 7.642321532461376e-07, "loss": 3.4224, "step": 97460 }, { "epoch": 4.198647542748848, "learning_rate": 7.641836712706587e-07, "loss": 3.6825, "step": 97480 }, { "epoch": 4.199508980488435, "learning_rate": 7.641351892951797e-07, "loss": 3.8836, "step": 97500 }, { "epoch": 4.200370418228022, "learning_rate": 7.640867073197008e-07, "loss": 3.5121, "step": 97520 }, { "epoch": 4.20123185596761, "learning_rate": 7.64038225344222e-07, "loss": 3.4411, "step": 97540 }, { "epoch": 4.202093293707197, "learning_rate": 7.639897433687432e-07, "loss": 3.6295, "step": 97560 }, { "epoch": 4.202954731446785, "learning_rate": 7.639412613932642e-07, "loss": 3.789, "step": 97580 }, { "epoch": 4.203816169186372, "learning_rate": 7.638927794177852e-07, "loss": 3.4085, "step": 97600 }, { "epoch": 4.2046776069259595, "learning_rate": 7.638442974423064e-07, "loss": 3.5776, "step": 97620 }, { "epoch": 4.205539044665547, "learning_rate": 7.637958154668276e-07, "loss": 3.6755, "step": 97640 }, { "epoch": 4.206400482405134, "learning_rate": 7.637473334913486e-07, "loss": 3.6116, "step": 97660 }, { "epoch": 4.207261920144721, "learning_rate": 7.636988515158697e-07, "loss": 3.608, "step": 97680 }, { "epoch": 4.208123357884309, "learning_rate": 7.636503695403909e-07, "loss": 3.5513, "step": 97700 }, { "epoch": 4.208984795623897, "learning_rate": 7.636018875649119e-07, "loss": 3.5045, "step": 97720 }, { "epoch": 4.209846233363484, "learning_rate": 7.63553405589433e-07, "loss": 3.487, "step": 97740 }, { "epoch": 4.210707671103071, "learning_rate": 7.635049236139541e-07, "loss": 3.6189, "step": 97760 }, { "epoch": 4.211569108842658, "learning_rate": 7.634564416384753e-07, "loss": 3.6587, "step": 97780 }, { "epoch": 4.212430546582246, "learning_rate": 7.634079596629964e-07, "loss": 3.5741, "step": 97800 }, { "epoch": 4.213291984321833, "learning_rate": 7.633594776875175e-07, "loss": 3.8757, "step": 97820 }, { "epoch": 4.21415342206142, "learning_rate": 7.633109957120386e-07, "loss": 3.4607, "step": 97840 }, { "epoch": 4.215014859801008, "learning_rate": 7.632625137365597e-07, "loss": 3.6519, "step": 97860 }, { "epoch": 4.2158762975405955, "learning_rate": 7.632140317610808e-07, "loss": 3.6615, "step": 97880 }, { "epoch": 4.216737735280183, "learning_rate": 7.631655497856019e-07, "loss": 3.6786, "step": 97900 }, { "epoch": 4.21759917301977, "learning_rate": 7.63117067810123e-07, "loss": 3.7063, "step": 97920 }, { "epoch": 4.218460610759357, "learning_rate": 7.630685858346442e-07, "loss": 3.4403, "step": 97940 }, { "epoch": 4.219322048498944, "learning_rate": 7.630201038591653e-07, "loss": 3.5957, "step": 97960 }, { "epoch": 4.220183486238533, "learning_rate": 7.629716218836862e-07, "loss": 3.5053, "step": 97980 }, { "epoch": 4.22104492397812, "learning_rate": 7.629231399082074e-07, "loss": 3.3752, "step": 98000 }, { "epoch": 4.221906361717707, "learning_rate": 7.628746579327285e-07, "loss": 3.8487, "step": 98020 }, { "epoch": 4.222767799457294, "learning_rate": 7.628261759572496e-07, "loss": 3.5937, "step": 98040 }, { "epoch": 4.2236292371968815, "learning_rate": 7.627776939817707e-07, "loss": 3.8835, "step": 98060 }, { "epoch": 4.224490674936469, "learning_rate": 7.62729212006292e-07, "loss": 3.4389, "step": 98080 }, { "epoch": 4.225352112676056, "learning_rate": 7.626807300308129e-07, "loss": 3.5515, "step": 98100 }, { "epoch": 4.226213550415643, "learning_rate": 7.62632248055334e-07, "loss": 3.6198, "step": 98120 }, { "epoch": 4.227074988155231, "learning_rate": 7.625837660798551e-07, "loss": 3.5712, "step": 98140 }, { "epoch": 4.227936425894819, "learning_rate": 7.625352841043763e-07, "loss": 3.7876, "step": 98160 }, { "epoch": 4.228797863634406, "learning_rate": 7.624868021288974e-07, "loss": 3.6569, "step": 98180 }, { "epoch": 4.229659301373993, "learning_rate": 7.624383201534185e-07, "loss": 3.6339, "step": 98200 }, { "epoch": 4.23052073911358, "learning_rate": 7.623898381779396e-07, "loss": 3.5889, "step": 98220 }, { "epoch": 4.231382176853168, "learning_rate": 7.623413562024607e-07, "loss": 3.6677, "step": 98240 }, { "epoch": 4.232243614592755, "learning_rate": 7.622928742269818e-07, "loss": 3.7587, "step": 98260 }, { "epoch": 4.233105052332343, "learning_rate": 7.622443922515029e-07, "loss": 3.3814, "step": 98280 }, { "epoch": 4.23396649007193, "learning_rate": 7.62195910276024e-07, "loss": 3.4579, "step": 98300 }, { "epoch": 4.2348279278115175, "learning_rate": 7.621474283005451e-07, "loss": 3.6606, "step": 98320 }, { "epoch": 4.235689365551105, "learning_rate": 7.620989463250663e-07, "loss": 3.5666, "step": 98340 }, { "epoch": 4.236550803290692, "learning_rate": 7.620504643495873e-07, "loss": 3.7073, "step": 98360 }, { "epoch": 4.237412241030279, "learning_rate": 7.620019823741084e-07, "loss": 3.5744, "step": 98380 }, { "epoch": 4.2382736787698665, "learning_rate": 7.619535003986295e-07, "loss": 3.7059, "step": 98400 }, { "epoch": 4.239135116509455, "learning_rate": 7.619050184231507e-07, "loss": 3.6127, "step": 98420 }, { "epoch": 4.239996554249042, "learning_rate": 7.618565364476718e-07, "loss": 3.564, "step": 98440 }, { "epoch": 4.240857991988629, "learning_rate": 7.618080544721929e-07, "loss": 3.3204, "step": 98460 }, { "epoch": 4.241719429728216, "learning_rate": 7.61759572496714e-07, "loss": 3.6988, "step": 98480 }, { "epoch": 4.242580867467804, "learning_rate": 7.61711090521235e-07, "loss": 3.7666, "step": 98500 }, { "epoch": 4.243442305207391, "learning_rate": 7.616626085457561e-07, "loss": 3.5265, "step": 98520 }, { "epoch": 4.244303742946978, "learning_rate": 7.616141265702773e-07, "loss": 3.494, "step": 98540 }, { "epoch": 4.245165180686566, "learning_rate": 7.615656445947984e-07, "loss": 3.636, "step": 98560 }, { "epoch": 4.2460266184261535, "learning_rate": 7.615171626193195e-07, "loss": 3.622, "step": 98580 }, { "epoch": 4.246888056165741, "learning_rate": 7.614686806438406e-07, "loss": 3.4535, "step": 98600 }, { "epoch": 4.247749493905328, "learning_rate": 7.614201986683616e-07, "loss": 3.3266, "step": 98620 }, { "epoch": 4.248610931644915, "learning_rate": 7.613717166928828e-07, "loss": 3.5386, "step": 98640 }, { "epoch": 4.2494723693845025, "learning_rate": 7.613232347174039e-07, "loss": 3.6697, "step": 98660 }, { "epoch": 4.25033380712409, "learning_rate": 7.61274752741925e-07, "loss": 3.6676, "step": 98680 }, { "epoch": 4.251195244863678, "learning_rate": 7.612262707664461e-07, "loss": 3.5347, "step": 98700 }, { "epoch": 4.252056682603265, "learning_rate": 7.611777887909673e-07, "loss": 3.5547, "step": 98720 }, { "epoch": 4.252918120342852, "learning_rate": 7.611293068154883e-07, "loss": 3.6636, "step": 98740 }, { "epoch": 4.25377955808244, "learning_rate": 7.610808248400094e-07, "loss": 3.5236, "step": 98760 }, { "epoch": 4.254640995822027, "learning_rate": 7.610323428645305e-07, "loss": 3.5066, "step": 98780 }, { "epoch": 4.255502433561614, "learning_rate": 7.609838608890517e-07, "loss": 3.7395, "step": 98800 }, { "epoch": 4.256363871301201, "learning_rate": 7.609353789135728e-07, "loss": 3.6356, "step": 98820 }, { "epoch": 4.257225309040789, "learning_rate": 7.608868969380939e-07, "loss": 3.539, "step": 98840 }, { "epoch": 4.258086746780377, "learning_rate": 7.60838414962615e-07, "loss": 3.5006, "step": 98860 }, { "epoch": 4.258948184519964, "learning_rate": 7.607899329871361e-07, "loss": 3.7159, "step": 98880 }, { "epoch": 4.259809622259551, "learning_rate": 7.607414510116572e-07, "loss": 3.672, "step": 98900 }, { "epoch": 4.260671059999138, "learning_rate": 7.606929690361784e-07, "loss": 3.5287, "step": 98920 }, { "epoch": 4.261532497738726, "learning_rate": 7.606444870606994e-07, "loss": 3.5974, "step": 98940 }, { "epoch": 4.262393935478313, "learning_rate": 7.605960050852206e-07, "loss": 3.565, "step": 98960 }, { "epoch": 4.263255373217901, "learning_rate": 7.605475231097417e-07, "loss": 3.5943, "step": 98980 }, { "epoch": 4.264116810957488, "learning_rate": 7.604990411342626e-07, "loss": 3.6006, "step": 99000 }, { "epoch": 4.2649782486970755, "learning_rate": 7.604505591587838e-07, "loss": 3.566, "step": 99020 }, { "epoch": 4.265839686436663, "learning_rate": 7.60402077183305e-07, "loss": 3.6672, "step": 99040 }, { "epoch": 4.26670112417625, "learning_rate": 7.60353595207826e-07, "loss": 3.4539, "step": 99060 }, { "epoch": 4.267562561915837, "learning_rate": 7.603051132323471e-07, "loss": 3.8522, "step": 99080 }, { "epoch": 4.2684239996554245, "learning_rate": 7.602566312568683e-07, "loss": 3.6473, "step": 99100 }, { "epoch": 4.269285437395013, "learning_rate": 7.602081492813893e-07, "loss": 3.5483, "step": 99120 }, { "epoch": 4.2701468751346, "learning_rate": 7.601596673059104e-07, "loss": 3.4412, "step": 99140 }, { "epoch": 4.271008312874187, "learning_rate": 7.601111853304315e-07, "loss": 3.6133, "step": 99160 }, { "epoch": 4.271869750613774, "learning_rate": 7.600627033549527e-07, "loss": 3.7525, "step": 99180 }, { "epoch": 4.272731188353362, "learning_rate": 7.600142213794738e-07, "loss": 3.7139, "step": 99200 }, { "epoch": 4.273592626092949, "learning_rate": 7.599657394039948e-07, "loss": 3.6204, "step": 99220 }, { "epoch": 4.274454063832536, "learning_rate": 7.59917257428516e-07, "loss": 3.5158, "step": 99240 }, { "epoch": 4.275315501572124, "learning_rate": 7.598687754530371e-07, "loss": 3.6156, "step": 99260 }, { "epoch": 4.2761769393117115, "learning_rate": 7.598202934775582e-07, "loss": 3.7032, "step": 99280 }, { "epoch": 4.277038377051299, "learning_rate": 7.597718115020792e-07, "loss": 3.575, "step": 99300 }, { "epoch": 4.277899814790886, "learning_rate": 7.597233295266004e-07, "loss": 3.4864, "step": 99320 }, { "epoch": 4.278761252530473, "learning_rate": 7.596748475511216e-07, "loss": 3.4801, "step": 99340 }, { "epoch": 4.2796226902700605, "learning_rate": 7.596263655756427e-07, "loss": 3.5849, "step": 99360 }, { "epoch": 4.280484128009648, "learning_rate": 7.595778836001636e-07, "loss": 3.6636, "step": 99380 }, { "epoch": 4.281345565749236, "learning_rate": 7.595294016246848e-07, "loss": 3.5957, "step": 99400 }, { "epoch": 4.282207003488823, "learning_rate": 7.59480919649206e-07, "loss": 3.7073, "step": 99420 }, { "epoch": 4.28306844122841, "learning_rate": 7.594324376737271e-07, "loss": 3.4552, "step": 99440 }, { "epoch": 4.283929878967998, "learning_rate": 7.593839556982481e-07, "loss": 3.6417, "step": 99460 }, { "epoch": 4.284791316707585, "learning_rate": 7.593354737227693e-07, "loss": 3.5259, "step": 99480 }, { "epoch": 4.285652754447172, "learning_rate": 7.592869917472904e-07, "loss": 3.7289, "step": 99500 }, { "epoch": 4.286514192186759, "learning_rate": 7.592385097718115e-07, "loss": 3.606, "step": 99520 }, { "epoch": 4.2873756299263475, "learning_rate": 7.591900277963325e-07, "loss": 3.6425, "step": 99540 }, { "epoch": 4.288237067665935, "learning_rate": 7.591415458208537e-07, "loss": 3.6355, "step": 99560 }, { "epoch": 4.289098505405522, "learning_rate": 7.590930638453749e-07, "loss": 3.527, "step": 99580 }, { "epoch": 4.289959943145109, "learning_rate": 7.590445818698958e-07, "loss": 3.3932, "step": 99600 }, { "epoch": 4.2908213808846964, "learning_rate": 7.58996099894417e-07, "loss": 3.5408, "step": 99620 }, { "epoch": 4.291682818624284, "learning_rate": 7.589476179189381e-07, "loss": 3.6079, "step": 99640 }, { "epoch": 4.292544256363871, "learning_rate": 7.588991359434592e-07, "loss": 3.4367, "step": 99660 }, { "epoch": 4.293405694103459, "learning_rate": 7.588506539679802e-07, "loss": 3.4824, "step": 99680 }, { "epoch": 4.294267131843046, "learning_rate": 7.588021719925014e-07, "loss": 3.6333, "step": 99700 }, { "epoch": 4.295128569582634, "learning_rate": 7.587536900170226e-07, "loss": 3.664, "step": 99720 }, { "epoch": 4.295990007322221, "learning_rate": 7.587052080415437e-07, "loss": 3.5497, "step": 99740 }, { "epoch": 4.296851445061808, "learning_rate": 7.586567260660646e-07, "loss": 3.5202, "step": 99760 }, { "epoch": 4.297712882801395, "learning_rate": 7.586082440905858e-07, "loss": 3.6558, "step": 99780 }, { "epoch": 4.2985743205409825, "learning_rate": 7.58559762115107e-07, "loss": 3.5368, "step": 99800 }, { "epoch": 4.299435758280571, "learning_rate": 7.58511280139628e-07, "loss": 3.5494, "step": 99820 }, { "epoch": 4.300297196020158, "learning_rate": 7.584627981641491e-07, "loss": 3.8079, "step": 99840 }, { "epoch": 4.301158633759745, "learning_rate": 7.584143161886704e-07, "loss": 3.6958, "step": 99860 }, { "epoch": 4.302020071499332, "learning_rate": 7.583658342131914e-07, "loss": 3.5206, "step": 99880 }, { "epoch": 4.30288150923892, "learning_rate": 7.583173522377124e-07, "loss": 3.5383, "step": 99900 }, { "epoch": 4.303742946978507, "learning_rate": 7.582688702622335e-07, "loss": 3.5775, "step": 99920 }, { "epoch": 4.304604384718094, "learning_rate": 7.582203882867547e-07, "loss": 3.6891, "step": 99940 }, { "epoch": 4.305465822457682, "learning_rate": 7.581719063112759e-07, "loss": 3.7513, "step": 99960 }, { "epoch": 4.3063272601972695, "learning_rate": 7.581234243357969e-07, "loss": 3.5897, "step": 99980 }, { "epoch": 4.307188697936857, "learning_rate": 7.58074942360318e-07, "loss": 3.5481, "step": 100000 }, { "epoch": 4.308050135676444, "learning_rate": 7.580264603848391e-07, "loss": 3.6821, "step": 100020 }, { "epoch": 4.308911573416031, "learning_rate": 7.579779784093603e-07, "loss": 3.6613, "step": 100040 }, { "epoch": 4.3097730111556185, "learning_rate": 7.579294964338813e-07, "loss": 3.4891, "step": 100060 }, { "epoch": 4.310634448895206, "learning_rate": 7.578810144584024e-07, "loss": 3.5326, "step": 100080 }, { "epoch": 4.311495886634794, "learning_rate": 7.578325324829236e-07, "loss": 3.7199, "step": 100100 }, { "epoch": 4.312357324374381, "learning_rate": 7.577840505074448e-07, "loss": 3.4382, "step": 100120 }, { "epoch": 4.313218762113968, "learning_rate": 7.577355685319657e-07, "loss": 3.6127, "step": 100140 }, { "epoch": 4.314080199853556, "learning_rate": 7.576870865564868e-07, "loss": 3.6243, "step": 100160 }, { "epoch": 4.314941637593143, "learning_rate": 7.57638604581008e-07, "loss": 3.7023, "step": 100180 }, { "epoch": 4.31580307533273, "learning_rate": 7.57590122605529e-07, "loss": 3.6696, "step": 100200 }, { "epoch": 4.316664513072317, "learning_rate": 7.575416406300501e-07, "loss": 3.6333, "step": 100220 }, { "epoch": 4.3175259508119055, "learning_rate": 7.574931586545713e-07, "loss": 3.6335, "step": 100240 }, { "epoch": 4.318387388551493, "learning_rate": 7.574446766790925e-07, "loss": 3.4217, "step": 100260 }, { "epoch": 4.31924882629108, "learning_rate": 7.573961947036134e-07, "loss": 3.4638, "step": 100280 }, { "epoch": 4.320110264030667, "learning_rate": 7.573477127281345e-07, "loss": 3.6377, "step": 100300 }, { "epoch": 4.3209717017702545, "learning_rate": 7.572992307526557e-07, "loss": 3.5025, "step": 100320 }, { "epoch": 4.321833139509842, "learning_rate": 7.572507487771769e-07, "loss": 3.4211, "step": 100340 }, { "epoch": 4.322694577249429, "learning_rate": 7.572022668016979e-07, "loss": 3.4665, "step": 100360 }, { "epoch": 4.323556014989016, "learning_rate": 7.57153784826219e-07, "loss": 3.635, "step": 100380 }, { "epoch": 4.324417452728604, "learning_rate": 7.571053028507401e-07, "loss": 3.7561, "step": 100400 }, { "epoch": 4.325278890468192, "learning_rate": 7.570568208752613e-07, "loss": 3.6254, "step": 100420 }, { "epoch": 4.326140328207779, "learning_rate": 7.570083388997823e-07, "loss": 3.3085, "step": 100440 }, { "epoch": 4.327001765947366, "learning_rate": 7.569598569243034e-07, "loss": 3.4187, "step": 100460 }, { "epoch": 4.327863203686953, "learning_rate": 7.569113749488246e-07, "loss": 3.4405, "step": 100480 }, { "epoch": 4.328724641426541, "learning_rate": 7.568628929733458e-07, "loss": 3.619, "step": 100500 }, { "epoch": 4.329586079166129, "learning_rate": 7.568144109978667e-07, "loss": 3.6213, "step": 100520 }, { "epoch": 4.330447516905716, "learning_rate": 7.567659290223878e-07, "loss": 3.5508, "step": 100540 }, { "epoch": 4.331308954645303, "learning_rate": 7.56717447046909e-07, "loss": 3.4848, "step": 100560 }, { "epoch": 4.33217039238489, "learning_rate": 7.566689650714301e-07, "loss": 3.4511, "step": 100580 }, { "epoch": 4.333031830124478, "learning_rate": 7.566204830959512e-07, "loss": 3.5972, "step": 100600 }, { "epoch": 4.333893267864065, "learning_rate": 7.565720011204723e-07, "loss": 3.5877, "step": 100620 }, { "epoch": 4.334754705603652, "learning_rate": 7.565235191449935e-07, "loss": 3.7472, "step": 100640 }, { "epoch": 4.335616143343239, "learning_rate": 7.564750371695145e-07, "loss": 3.5291, "step": 100660 }, { "epoch": 4.3364775810828275, "learning_rate": 7.564265551940355e-07, "loss": 3.5092, "step": 100680 }, { "epoch": 4.337339018822415, "learning_rate": 7.563780732185568e-07, "loss": 3.2994, "step": 100700 }, { "epoch": 4.338200456562002, "learning_rate": 7.563295912430779e-07, "loss": 3.4567, "step": 100720 }, { "epoch": 4.339061894301589, "learning_rate": 7.562811092675989e-07, "loss": 3.6029, "step": 100740 }, { "epoch": 4.3399233320411765, "learning_rate": 7.5623262729212e-07, "loss": 3.6637, "step": 100760 }, { "epoch": 4.340784769780764, "learning_rate": 7.561841453166411e-07, "loss": 3.6901, "step": 100780 }, { "epoch": 4.341646207520352, "learning_rate": 7.561356633411622e-07, "loss": 3.7772, "step": 100800 }, { "epoch": 4.342507645259939, "learning_rate": 7.560871813656833e-07, "loss": 3.4727, "step": 100820 }, { "epoch": 4.343369082999526, "learning_rate": 7.560386993902044e-07, "loss": 3.6293, "step": 100840 }, { "epoch": 4.344230520739114, "learning_rate": 7.559902174147256e-07, "loss": 3.566, "step": 100860 }, { "epoch": 4.345091958478701, "learning_rate": 7.559417354392467e-07, "loss": 3.612, "step": 100880 }, { "epoch": 4.345953396218288, "learning_rate": 7.558932534637677e-07, "loss": 3.5499, "step": 100900 }, { "epoch": 4.346814833957875, "learning_rate": 7.558447714882888e-07, "loss": 3.6266, "step": 100920 }, { "epoch": 4.347676271697463, "learning_rate": 7.5579628951281e-07, "loss": 3.5335, "step": 100940 }, { "epoch": 4.348537709437051, "learning_rate": 7.557478075373311e-07, "loss": 3.545, "step": 100960 }, { "epoch": 4.349399147176638, "learning_rate": 7.556993255618522e-07, "loss": 3.6761, "step": 100980 }, { "epoch": 4.350260584916225, "learning_rate": 7.556508435863733e-07, "loss": 3.7315, "step": 101000 }, { "epoch": 4.3511220226558125, "learning_rate": 7.556023616108945e-07, "loss": 3.4357, "step": 101020 }, { "epoch": 4.3519834603954, "learning_rate": 7.555538796354155e-07, "loss": 3.5213, "step": 101040 }, { "epoch": 4.352844898134987, "learning_rate": 7.555053976599366e-07, "loss": 3.5497, "step": 101060 }, { "epoch": 4.353706335874575, "learning_rate": 7.554569156844577e-07, "loss": 3.6045, "step": 101080 }, { "epoch": 4.354567773614162, "learning_rate": 7.554084337089789e-07, "loss": 3.7542, "step": 101100 }, { "epoch": 4.35542921135375, "learning_rate": 7.553599517335e-07, "loss": 3.5407, "step": 101120 }, { "epoch": 4.356290649093337, "learning_rate": 7.553114697580211e-07, "loss": 3.6474, "step": 101140 }, { "epoch": 4.357152086832924, "learning_rate": 7.552629877825421e-07, "loss": 3.6087, "step": 101160 }, { "epoch": 4.358013524572511, "learning_rate": 7.552145058070632e-07, "loss": 3.2966, "step": 101180 }, { "epoch": 4.358874962312099, "learning_rate": 7.551660238315844e-07, "loss": 3.5954, "step": 101200 }, { "epoch": 4.359736400051686, "learning_rate": 7.551175418561054e-07, "loss": 3.587, "step": 101220 }, { "epoch": 4.360597837791274, "learning_rate": 7.550690598806266e-07, "loss": 3.5307, "step": 101240 }, { "epoch": 4.361459275530861, "learning_rate": 7.550205779051477e-07, "loss": 3.5444, "step": 101260 }, { "epoch": 4.3623207132704485, "learning_rate": 7.549720959296688e-07, "loss": 3.4748, "step": 101280 }, { "epoch": 4.363182151010036, "learning_rate": 7.549236139541898e-07, "loss": 3.314, "step": 101300 }, { "epoch": 4.364043588749623, "learning_rate": 7.54875131978711e-07, "loss": 3.4928, "step": 101320 }, { "epoch": 4.36490502648921, "learning_rate": 7.548266500032321e-07, "loss": 3.5253, "step": 101340 }, { "epoch": 4.365766464228798, "learning_rate": 7.547781680277532e-07, "loss": 3.4639, "step": 101360 }, { "epoch": 4.366627901968386, "learning_rate": 7.547296860522743e-07, "loss": 3.4132, "step": 101380 }, { "epoch": 4.367489339707973, "learning_rate": 7.546812040767955e-07, "loss": 3.4828, "step": 101400 }, { "epoch": 4.36835077744756, "learning_rate": 7.546327221013165e-07, "loss": 3.5509, "step": 101420 }, { "epoch": 4.369212215187147, "learning_rate": 7.545842401258376e-07, "loss": 3.6032, "step": 101440 }, { "epoch": 4.3700736529267346, "learning_rate": 7.545357581503587e-07, "loss": 3.6337, "step": 101460 }, { "epoch": 4.370935090666322, "learning_rate": 7.544872761748798e-07, "loss": 3.51, "step": 101480 }, { "epoch": 4.371796528405909, "learning_rate": 7.54438794199401e-07, "loss": 3.5567, "step": 101500 }, { "epoch": 4.372657966145497, "learning_rate": 7.543903122239221e-07, "loss": 3.5941, "step": 101520 }, { "epoch": 4.373519403885084, "learning_rate": 7.543418302484431e-07, "loss": 3.6253, "step": 101540 }, { "epoch": 4.374380841624672, "learning_rate": 7.542933482729642e-07, "loss": 3.6321, "step": 101560 }, { "epoch": 4.375242279364259, "learning_rate": 7.542448662974854e-07, "loss": 3.685, "step": 101580 }, { "epoch": 4.376103717103846, "learning_rate": 7.541963843220065e-07, "loss": 3.5317, "step": 101600 }, { "epoch": 4.376965154843433, "learning_rate": 7.541479023465276e-07, "loss": 3.3984, "step": 101620 }, { "epoch": 4.377826592583021, "learning_rate": 7.540994203710488e-07, "loss": 3.6187, "step": 101640 }, { "epoch": 4.378688030322609, "learning_rate": 7.540509383955698e-07, "loss": 3.5354, "step": 101660 }, { "epoch": 4.379549468062196, "learning_rate": 7.540024564200909e-07, "loss": 3.5014, "step": 101680 }, { "epoch": 4.380410905801783, "learning_rate": 7.53953974444612e-07, "loss": 3.5726, "step": 101700 }, { "epoch": 4.3812723435413705, "learning_rate": 7.539054924691331e-07, "loss": 3.6799, "step": 101720 }, { "epoch": 4.382133781280958, "learning_rate": 7.538570104936543e-07, "loss": 3.6045, "step": 101740 }, { "epoch": 4.382995219020545, "learning_rate": 7.538085285181753e-07, "loss": 3.5273, "step": 101760 }, { "epoch": 4.383856656760132, "learning_rate": 7.537600465426964e-07, "loss": 3.5461, "step": 101780 }, { "epoch": 4.38471809449972, "learning_rate": 7.537115645672175e-07, "loss": 3.6679, "step": 101800 }, { "epoch": 4.385579532239308, "learning_rate": 7.536630825917386e-07, "loss": 3.5037, "step": 101820 }, { "epoch": 4.386440969978895, "learning_rate": 7.536146006162597e-07, "loss": 3.6358, "step": 101840 }, { "epoch": 4.387302407718482, "learning_rate": 7.535661186407808e-07, "loss": 3.568, "step": 101860 }, { "epoch": 4.388163845458069, "learning_rate": 7.53517636665302e-07, "loss": 3.5031, "step": 101880 }, { "epoch": 4.389025283197657, "learning_rate": 7.534691546898231e-07, "loss": 3.3478, "step": 101900 }, { "epoch": 4.389886720937244, "learning_rate": 7.534206727143442e-07, "loss": 3.6285, "step": 101920 }, { "epoch": 4.390748158676832, "learning_rate": 7.533721907388652e-07, "loss": 3.549, "step": 101940 }, { "epoch": 4.391609596416419, "learning_rate": 7.533237087633864e-07, "loss": 3.5341, "step": 101960 }, { "epoch": 4.3924710341560065, "learning_rate": 7.532752267879075e-07, "loss": 3.6116, "step": 101980 }, { "epoch": 4.393332471895594, "learning_rate": 7.532267448124286e-07, "loss": 3.4513, "step": 102000 }, { "epoch": 4.394193909635181, "learning_rate": 7.531782628369497e-07, "loss": 3.6678, "step": 102020 }, { "epoch": 4.395055347374768, "learning_rate": 7.531297808614709e-07, "loss": 3.5044, "step": 102040 }, { "epoch": 4.3959167851143555, "learning_rate": 7.530812988859919e-07, "loss": 3.4782, "step": 102060 }, { "epoch": 4.396778222853944, "learning_rate": 7.530328169105129e-07, "loss": 3.4979, "step": 102080 }, { "epoch": 4.397639660593531, "learning_rate": 7.529843349350341e-07, "loss": 3.4867, "step": 102100 }, { "epoch": 4.398501098333118, "learning_rate": 7.529358529595553e-07, "loss": 3.4821, "step": 102120 }, { "epoch": 4.399362536072705, "learning_rate": 7.528873709840764e-07, "loss": 3.6658, "step": 102140 }, { "epoch": 4.400223973812293, "learning_rate": 7.528388890085974e-07, "loss": 3.3317, "step": 102160 }, { "epoch": 4.40108541155188, "learning_rate": 7.527904070331185e-07, "loss": 3.6214, "step": 102180 }, { "epoch": 4.401946849291467, "learning_rate": 7.527419250576397e-07, "loss": 3.5743, "step": 102200 }, { "epoch": 4.402808287031055, "learning_rate": 7.526934430821608e-07, "loss": 3.5693, "step": 102220 }, { "epoch": 4.4036697247706424, "learning_rate": 7.526449611066818e-07, "loss": 3.7025, "step": 102240 }, { "epoch": 4.40453116251023, "learning_rate": 7.52596479131203e-07, "loss": 3.5446, "step": 102260 }, { "epoch": 4.405392600249817, "learning_rate": 7.525479971557242e-07, "loss": 3.689, "step": 102280 }, { "epoch": 4.406254037989404, "learning_rate": 7.524995151802452e-07, "loss": 3.4581, "step": 102300 }, { "epoch": 4.407115475728991, "learning_rate": 7.524510332047662e-07, "loss": 3.4008, "step": 102320 }, { "epoch": 4.407976913468579, "learning_rate": 7.524025512292874e-07, "loss": 3.4142, "step": 102340 }, { "epoch": 4.408838351208167, "learning_rate": 7.523540692538085e-07, "loss": 3.3853, "step": 102360 }, { "epoch": 4.409699788947754, "learning_rate": 7.523055872783294e-07, "loss": 3.4031, "step": 102380 }, { "epoch": 4.410561226687341, "learning_rate": 7.522571053028507e-07, "loss": 3.4219, "step": 102400 }, { "epoch": 4.4114226644269285, "learning_rate": 7.522086233273719e-07, "loss": 3.6066, "step": 102420 }, { "epoch": 4.412284102166516, "learning_rate": 7.521601413518929e-07, "loss": 3.4492, "step": 102440 }, { "epoch": 4.413145539906103, "learning_rate": 7.521116593764139e-07, "loss": 3.4515, "step": 102460 }, { "epoch": 4.41400697764569, "learning_rate": 7.520631774009352e-07, "loss": 3.7738, "step": 102480 }, { "epoch": 4.414868415385278, "learning_rate": 7.520146954254563e-07, "loss": 3.4874, "step": 102500 }, { "epoch": 4.415729853124866, "learning_rate": 7.519662134499774e-07, "loss": 3.3105, "step": 102520 }, { "epoch": 4.416591290864453, "learning_rate": 7.519177314744984e-07, "loss": 3.6158, "step": 102540 }, { "epoch": 4.41745272860404, "learning_rate": 7.518692494990195e-07, "loss": 3.5226, "step": 102560 }, { "epoch": 4.418314166343627, "learning_rate": 7.518207675235407e-07, "loss": 3.4994, "step": 102580 }, { "epoch": 4.419175604083215, "learning_rate": 7.517722855480618e-07, "loss": 3.4928, "step": 102600 }, { "epoch": 4.420037041822802, "learning_rate": 7.517238035725828e-07, "loss": 3.6281, "step": 102620 }, { "epoch": 4.42089847956239, "learning_rate": 7.51675321597104e-07, "loss": 3.4549, "step": 102640 }, { "epoch": 4.421759917301977, "learning_rate": 7.516268396216252e-07, "loss": 3.5594, "step": 102660 }, { "epoch": 4.4226213550415645, "learning_rate": 7.515783576461461e-07, "loss": 3.4348, "step": 102680 }, { "epoch": 4.423482792781152, "learning_rate": 7.515298756706672e-07, "loss": 3.4455, "step": 102700 }, { "epoch": 4.424344230520739, "learning_rate": 7.514813936951884e-07, "loss": 3.5326, "step": 102720 }, { "epoch": 4.425205668260326, "learning_rate": 7.514329117197096e-07, "loss": 3.8389, "step": 102740 }, { "epoch": 4.4260671059999135, "learning_rate": 7.513844297442306e-07, "loss": 3.6775, "step": 102760 }, { "epoch": 4.426928543739502, "learning_rate": 7.513359477687517e-07, "loss": 3.6713, "step": 102780 }, { "epoch": 4.427789981479089, "learning_rate": 7.512874657932729e-07, "loss": 3.5122, "step": 102800 }, { "epoch": 4.428651419218676, "learning_rate": 7.51238983817794e-07, "loss": 3.4725, "step": 102820 }, { "epoch": 4.429512856958263, "learning_rate": 7.511905018423149e-07, "loss": 3.4536, "step": 102840 }, { "epoch": 4.430374294697851, "learning_rate": 7.511420198668361e-07, "loss": 3.4621, "step": 102860 }, { "epoch": 4.431235732437438, "learning_rate": 7.510935378913573e-07, "loss": 3.6124, "step": 102880 }, { "epoch": 4.432097170177025, "learning_rate": 7.510450559158785e-07, "loss": 3.4475, "step": 102900 }, { "epoch": 4.432958607916613, "learning_rate": 7.509965739403994e-07, "loss": 3.5734, "step": 102920 }, { "epoch": 4.4338200456562005, "learning_rate": 7.509480919649205e-07, "loss": 3.3826, "step": 102940 }, { "epoch": 4.434681483395788, "learning_rate": 7.508996099894417e-07, "loss": 3.6288, "step": 102960 }, { "epoch": 4.435542921135375, "learning_rate": 7.508511280139627e-07, "loss": 3.5509, "step": 102980 }, { "epoch": 4.436404358874962, "learning_rate": 7.508026460384838e-07, "loss": 3.5255, "step": 103000 }, { "epoch": 4.4372657966145495, "learning_rate": 7.50754164063005e-07, "loss": 3.4593, "step": 103020 }, { "epoch": 4.438127234354137, "learning_rate": 7.507056820875262e-07, "loss": 3.2955, "step": 103040 }, { "epoch": 4.438988672093725, "learning_rate": 7.506572001120472e-07, "loss": 3.6332, "step": 103060 }, { "epoch": 4.439850109833312, "learning_rate": 7.506087181365682e-07, "loss": 3.3554, "step": 103080 }, { "epoch": 4.440711547572899, "learning_rate": 7.505602361610894e-07, "loss": 3.6188, "step": 103100 }, { "epoch": 4.441572985312487, "learning_rate": 7.505117541856106e-07, "loss": 3.4414, "step": 103120 }, { "epoch": 4.442434423052074, "learning_rate": 7.504632722101316e-07, "loss": 3.546, "step": 103140 }, { "epoch": 4.443295860791661, "learning_rate": 7.504147902346527e-07, "loss": 3.3908, "step": 103160 }, { "epoch": 4.444157298531248, "learning_rate": 7.503663082591739e-07, "loss": 3.5382, "step": 103180 }, { "epoch": 4.445018736270836, "learning_rate": 7.50317826283695e-07, "loss": 3.52, "step": 103200 }, { "epoch": 4.445880174010424, "learning_rate": 7.50269344308216e-07, "loss": 3.5026, "step": 103220 }, { "epoch": 4.446741611750011, "learning_rate": 7.502208623327371e-07, "loss": 3.4411, "step": 103240 }, { "epoch": 4.447603049489598, "learning_rate": 7.501723803572583e-07, "loss": 3.6578, "step": 103260 }, { "epoch": 4.448464487229185, "learning_rate": 7.501238983817795e-07, "loss": 3.5964, "step": 103280 }, { "epoch": 4.449325924968773, "learning_rate": 7.500754164063005e-07, "loss": 3.4745, "step": 103300 }, { "epoch": 4.45018736270836, "learning_rate": 7.500269344308215e-07, "loss": 3.4408, "step": 103320 }, { "epoch": 4.451048800447948, "learning_rate": 7.499784524553427e-07, "loss": 3.5472, "step": 103340 }, { "epoch": 4.451910238187535, "learning_rate": 7.499299704798638e-07, "loss": 3.3716, "step": 103360 }, { "epoch": 4.4527716759271225, "learning_rate": 7.498814885043848e-07, "loss": 3.6598, "step": 103380 }, { "epoch": 4.45363311366671, "learning_rate": 7.49833006528906e-07, "loss": 3.5674, "step": 103400 }, { "epoch": 4.454494551406297, "learning_rate": 7.497845245534273e-07, "loss": 3.402, "step": 103420 }, { "epoch": 4.455355989145884, "learning_rate": 7.497360425779482e-07, "loss": 3.4623, "step": 103440 }, { "epoch": 4.4562174268854715, "learning_rate": 7.496875606024692e-07, "loss": 3.4413, "step": 103460 }, { "epoch": 4.457078864625059, "learning_rate": 7.496390786269904e-07, "loss": 3.5012, "step": 103480 }, { "epoch": 4.457940302364647, "learning_rate": 7.495905966515116e-07, "loss": 3.5767, "step": 103500 }, { "epoch": 4.458801740104234, "learning_rate": 7.495421146760326e-07, "loss": 3.4629, "step": 103520 }, { "epoch": 4.459663177843821, "learning_rate": 7.494936327005537e-07, "loss": 3.4782, "step": 103540 }, { "epoch": 4.460524615583409, "learning_rate": 7.494451507250749e-07, "loss": 3.5955, "step": 103560 }, { "epoch": 4.461386053322996, "learning_rate": 7.49396668749596e-07, "loss": 3.3298, "step": 103580 }, { "epoch": 4.462247491062583, "learning_rate": 7.49348186774117e-07, "loss": 3.5234, "step": 103600 }, { "epoch": 4.463108928802171, "learning_rate": 7.492997047986381e-07, "loss": 3.4356, "step": 103620 }, { "epoch": 4.4639703665417585, "learning_rate": 7.492512228231593e-07, "loss": 3.3151, "step": 103640 }, { "epoch": 4.464831804281346, "learning_rate": 7.492027408476804e-07, "loss": 3.4003, "step": 103660 }, { "epoch": 4.465693242020933, "learning_rate": 7.491542588722015e-07, "loss": 3.4551, "step": 103680 }, { "epoch": 4.46655467976052, "learning_rate": 7.491057768967226e-07, "loss": 3.4606, "step": 103700 }, { "epoch": 4.4674161175001075, "learning_rate": 7.490572949212437e-07, "loss": 3.4888, "step": 103720 }, { "epoch": 4.468277555239695, "learning_rate": 7.490088129457648e-07, "loss": 3.5808, "step": 103740 }, { "epoch": 4.469138992979282, "learning_rate": 7.489603309702859e-07, "loss": 3.5023, "step": 103760 }, { "epoch": 4.47000043071887, "learning_rate": 7.48911848994807e-07, "loss": 3.4667, "step": 103780 }, { "epoch": 4.470861868458457, "learning_rate": 7.488633670193282e-07, "loss": 3.421, "step": 103800 }, { "epoch": 4.471723306198045, "learning_rate": 7.488148850438493e-07, "loss": 3.4601, "step": 103820 }, { "epoch": 4.472584743937632, "learning_rate": 7.487664030683703e-07, "loss": 3.4548, "step": 103840 }, { "epoch": 4.473446181677219, "learning_rate": 7.487179210928914e-07, "loss": 3.4377, "step": 103860 }, { "epoch": 4.474307619416806, "learning_rate": 7.486694391174126e-07, "loss": 3.2797, "step": 103880 }, { "epoch": 4.4751690571563945, "learning_rate": 7.486209571419336e-07, "loss": 3.6023, "step": 103900 }, { "epoch": 4.476030494895982, "learning_rate": 7.485724751664547e-07, "loss": 3.3117, "step": 103920 }, { "epoch": 4.476891932635569, "learning_rate": 7.485239931909759e-07, "loss": 3.352, "step": 103940 }, { "epoch": 4.477753370375156, "learning_rate": 7.484755112154969e-07, "loss": 3.6262, "step": 103960 }, { "epoch": 4.478614808114743, "learning_rate": 7.48427029240018e-07, "loss": 3.3985, "step": 103980 }, { "epoch": 4.479476245854331, "learning_rate": 7.483785472645391e-07, "loss": 3.5367, "step": 104000 }, { "epoch": 4.480337683593918, "learning_rate": 7.483300652890603e-07, "loss": 3.353, "step": 104020 }, { "epoch": 4.481199121333505, "learning_rate": 7.482815833135814e-07, "loss": 3.5371, "step": 104040 }, { "epoch": 4.482060559073093, "learning_rate": 7.482331013381025e-07, "loss": 3.4642, "step": 104060 }, { "epoch": 4.4829219968126806, "learning_rate": 7.481846193626236e-07, "loss": 3.4866, "step": 104080 }, { "epoch": 4.483783434552268, "learning_rate": 7.481361373871447e-07, "loss": 3.5181, "step": 104100 }, { "epoch": 4.484644872291855, "learning_rate": 7.480876554116658e-07, "loss": 3.6749, "step": 104120 }, { "epoch": 4.485506310031442, "learning_rate": 7.480391734361869e-07, "loss": 3.4921, "step": 104140 }, { "epoch": 4.4863677477710295, "learning_rate": 7.479906914607079e-07, "loss": 3.4176, "step": 104160 }, { "epoch": 4.487229185510618, "learning_rate": 7.479422094852292e-07, "loss": 3.5571, "step": 104180 }, { "epoch": 4.488090623250205, "learning_rate": 7.478937275097503e-07, "loss": 3.4492, "step": 104200 }, { "epoch": 4.488952060989792, "learning_rate": 7.478452455342713e-07, "loss": 3.4424, "step": 104220 }, { "epoch": 4.489813498729379, "learning_rate": 7.477967635587924e-07, "loss": 3.5695, "step": 104240 }, { "epoch": 4.490674936468967, "learning_rate": 7.477482815833137e-07, "loss": 3.4163, "step": 104260 }, { "epoch": 4.491536374208554, "learning_rate": 7.476997996078347e-07, "loss": 3.4495, "step": 104280 }, { "epoch": 4.492397811948141, "learning_rate": 7.476513176323558e-07, "loss": 3.5052, "step": 104300 }, { "epoch": 4.493259249687728, "learning_rate": 7.476028356568769e-07, "loss": 3.5367, "step": 104320 }, { "epoch": 4.4941206874273165, "learning_rate": 7.475543536813979e-07, "loss": 3.341, "step": 104340 }, { "epoch": 4.494982125166904, "learning_rate": 7.475058717059191e-07, "loss": 3.4593, "step": 104360 }, { "epoch": 4.495843562906491, "learning_rate": 7.474573897304402e-07, "loss": 3.4561, "step": 104380 }, { "epoch": 4.496705000646078, "learning_rate": 7.474089077549613e-07, "loss": 3.6461, "step": 104400 }, { "epoch": 4.4975664383856655, "learning_rate": 7.473604257794824e-07, "loss": 3.4264, "step": 104420 }, { "epoch": 4.498427876125253, "learning_rate": 7.473119438040035e-07, "loss": 3.4126, "step": 104440 }, { "epoch": 4.499289313864841, "learning_rate": 7.472634618285246e-07, "loss": 3.4079, "step": 104460 }, { "epoch": 4.500150751604428, "learning_rate": 7.472149798530457e-07, "loss": 3.5648, "step": 104480 }, { "epoch": 4.501012189344015, "learning_rate": 7.471664978775668e-07, "loss": 3.6219, "step": 104500 }, { "epoch": 4.501873627083603, "learning_rate": 7.471180159020879e-07, "loss": 3.5651, "step": 104520 }, { "epoch": 4.50273506482319, "learning_rate": 7.47069533926609e-07, "loss": 3.5358, "step": 104540 }, { "epoch": 4.503596502562777, "learning_rate": 7.470210519511301e-07, "loss": 3.3053, "step": 104560 }, { "epoch": 4.504457940302364, "learning_rate": 7.469725699756513e-07, "loss": 3.467, "step": 104580 }, { "epoch": 4.505319378041952, "learning_rate": 7.469240880001723e-07, "loss": 3.3649, "step": 104600 }, { "epoch": 4.50618081578154, "learning_rate": 7.468756060246934e-07, "loss": 3.5051, "step": 104620 }, { "epoch": 4.507042253521127, "learning_rate": 7.468271240492145e-07, "loss": 3.2979, "step": 104640 }, { "epoch": 4.507903691260714, "learning_rate": 7.467786420737357e-07, "loss": 3.5733, "step": 104660 }, { "epoch": 4.5087651290003015, "learning_rate": 7.467301600982569e-07, "loss": 3.5325, "step": 104680 }, { "epoch": 4.509626566739889, "learning_rate": 7.466816781227779e-07, "loss": 3.4373, "step": 104700 }, { "epoch": 4.510488004479476, "learning_rate": 7.466331961472989e-07, "loss": 3.3197, "step": 104720 }, { "epoch": 4.511349442219064, "learning_rate": 7.465847141718201e-07, "loss": 3.4607, "step": 104740 }, { "epoch": 4.512210879958651, "learning_rate": 7.465362321963412e-07, "loss": 3.6503, "step": 104760 }, { "epoch": 4.513072317698239, "learning_rate": 7.464877502208623e-07, "loss": 3.5161, "step": 104780 }, { "epoch": 4.513933755437826, "learning_rate": 7.464392682453834e-07, "loss": 3.4362, "step": 104800 }, { "epoch": 4.514795193177413, "learning_rate": 7.463907862699046e-07, "loss": 3.389, "step": 104820 }, { "epoch": 4.515656630917, "learning_rate": 7.463423042944257e-07, "loss": 3.6639, "step": 104840 }, { "epoch": 4.516518068656588, "learning_rate": 7.462938223189466e-07, "loss": 3.4774, "step": 104860 }, { "epoch": 4.517379506396175, "learning_rate": 7.462453403434678e-07, "loss": 3.2268, "step": 104880 }, { "epoch": 4.518240944135763, "learning_rate": 7.46196858367989e-07, "loss": 3.5172, "step": 104900 }, { "epoch": 4.51910238187535, "learning_rate": 7.461483763925101e-07, "loss": 3.5388, "step": 104920 }, { "epoch": 4.519963819614937, "learning_rate": 7.460998944170311e-07, "loss": 3.5466, "step": 104940 }, { "epoch": 4.520825257354525, "learning_rate": 7.460514124415523e-07, "loss": 3.3623, "step": 104960 }, { "epoch": 4.521686695094112, "learning_rate": 7.460029304660733e-07, "loss": 3.479, "step": 104980 }, { "epoch": 4.522548132833699, "learning_rate": 7.459544484905945e-07, "loss": 3.4183, "step": 105000 }, { "epoch": 4.523409570573287, "learning_rate": 7.459059665151155e-07, "loss": 3.3884, "step": 105020 }, { "epoch": 4.5242710083128745, "learning_rate": 7.458574845396367e-07, "loss": 3.6151, "step": 105040 }, { "epoch": 4.525132446052462, "learning_rate": 7.458090025641578e-07, "loss": 3.3566, "step": 105060 }, { "epoch": 4.525993883792049, "learning_rate": 7.457605205886789e-07, "loss": 3.4119, "step": 105080 }, { "epoch": 4.526855321531636, "learning_rate": 7.457120386131999e-07, "loss": 3.5635, "step": 105100 }, { "epoch": 4.5277167592712235, "learning_rate": 7.456635566377211e-07, "loss": 3.4142, "step": 105120 }, { "epoch": 4.528578197010811, "learning_rate": 7.456150746622422e-07, "loss": 3.5333, "step": 105140 }, { "epoch": 4.529439634750398, "learning_rate": 7.455665926867632e-07, "loss": 3.6108, "step": 105160 }, { "epoch": 4.530301072489986, "learning_rate": 7.455181107112844e-07, "loss": 3.5274, "step": 105180 }, { "epoch": 4.531162510229573, "learning_rate": 7.454696287358057e-07, "loss": 3.3089, "step": 105200 }, { "epoch": 4.532023947969161, "learning_rate": 7.454211467603267e-07, "loss": 3.4432, "step": 105220 }, { "epoch": 4.532885385708748, "learning_rate": 7.453726647848476e-07, "loss": 3.4167, "step": 105240 }, { "epoch": 4.533746823448335, "learning_rate": 7.453241828093688e-07, "loss": 3.4467, "step": 105260 }, { "epoch": 4.534608261187922, "learning_rate": 7.4527570083389e-07, "loss": 3.5601, "step": 105280 }, { "epoch": 4.5354696989275105, "learning_rate": 7.452272188584111e-07, "loss": 3.5308, "step": 105300 }, { "epoch": 4.536331136667098, "learning_rate": 7.451787368829321e-07, "loss": 3.3293, "step": 105320 }, { "epoch": 4.537192574406685, "learning_rate": 7.451302549074533e-07, "loss": 3.4035, "step": 105340 }, { "epoch": 4.538054012146272, "learning_rate": 7.450817729319744e-07, "loss": 3.4777, "step": 105360 }, { "epoch": 4.5389154498858595, "learning_rate": 7.450332909564955e-07, "loss": 3.5998, "step": 105380 }, { "epoch": 4.539776887625447, "learning_rate": 7.449848089810165e-07, "loss": 3.367, "step": 105400 }, { "epoch": 4.540638325365034, "learning_rate": 7.449363270055377e-07, "loss": 3.638, "step": 105420 }, { "epoch": 4.541499763104621, "learning_rate": 7.448878450300589e-07, "loss": 3.4904, "step": 105440 }, { "epoch": 4.542361200844209, "learning_rate": 7.4483936305458e-07, "loss": 3.4241, "step": 105460 }, { "epoch": 4.543222638583797, "learning_rate": 7.44790881079101e-07, "loss": 3.319, "step": 105480 }, { "epoch": 4.544084076323384, "learning_rate": 7.447423991036221e-07, "loss": 3.4746, "step": 105500 }, { "epoch": 4.544945514062971, "learning_rate": 7.446939171281433e-07, "loss": 3.5823, "step": 105520 }, { "epoch": 4.545806951802558, "learning_rate": 7.446454351526642e-07, "loss": 3.3827, "step": 105540 }, { "epoch": 4.546668389542146, "learning_rate": 7.445969531771854e-07, "loss": 3.3021, "step": 105560 }, { "epoch": 4.547529827281733, "learning_rate": 7.445484712017066e-07, "loss": 3.3844, "step": 105580 }, { "epoch": 4.548391265021321, "learning_rate": 7.444999892262277e-07, "loss": 3.4303, "step": 105600 }, { "epoch": 4.549252702760908, "learning_rate": 7.444515072507486e-07, "loss": 3.2746, "step": 105620 }, { "epoch": 4.5501141405004955, "learning_rate": 7.444030252752698e-07, "loss": 3.4267, "step": 105640 }, { "epoch": 4.550975578240083, "learning_rate": 7.44354543299791e-07, "loss": 3.4473, "step": 105660 }, { "epoch": 4.55183701597967, "learning_rate": 7.443060613243121e-07, "loss": 3.4104, "step": 105680 }, { "epoch": 4.552698453719257, "learning_rate": 7.442575793488331e-07, "loss": 3.5344, "step": 105700 }, { "epoch": 4.553559891458844, "learning_rate": 7.442090973733543e-07, "loss": 3.445, "step": 105720 }, { "epoch": 4.554421329198433, "learning_rate": 7.441606153978754e-07, "loss": 3.5019, "step": 105740 }, { "epoch": 4.55528276693802, "learning_rate": 7.441121334223965e-07, "loss": 3.3507, "step": 105760 }, { "epoch": 4.556144204677607, "learning_rate": 7.440636514469175e-07, "loss": 3.4429, "step": 105780 }, { "epoch": 4.557005642417194, "learning_rate": 7.440151694714387e-07, "loss": 3.4688, "step": 105800 }, { "epoch": 4.5578670801567815, "learning_rate": 7.439666874959599e-07, "loss": 3.2943, "step": 105820 }, { "epoch": 4.558728517896369, "learning_rate": 7.43918205520481e-07, "loss": 3.5317, "step": 105840 }, { "epoch": 4.559589955635956, "learning_rate": 7.43869723545002e-07, "loss": 3.4314, "step": 105860 }, { "epoch": 4.560451393375544, "learning_rate": 7.438212415695231e-07, "loss": 3.6768, "step": 105880 }, { "epoch": 4.561312831115131, "learning_rate": 7.437727595940443e-07, "loss": 3.3843, "step": 105900 }, { "epoch": 4.562174268854719, "learning_rate": 7.437242776185653e-07, "loss": 3.4105, "step": 105920 }, { "epoch": 4.563035706594306, "learning_rate": 7.436757956430863e-07, "loss": 3.4629, "step": 105940 }, { "epoch": 4.563897144333893, "learning_rate": 7.436273136676076e-07, "loss": 3.4537, "step": 105960 }, { "epoch": 4.56475858207348, "learning_rate": 7.435788316921288e-07, "loss": 3.4705, "step": 105980 }, { "epoch": 4.565620019813068, "learning_rate": 7.435303497166497e-07, "loss": 3.4859, "step": 106000 }, { "epoch": 4.566481457552655, "learning_rate": 7.434818677411708e-07, "loss": 3.5219, "step": 106020 }, { "epoch": 4.567342895292243, "learning_rate": 7.434333857656921e-07, "loss": 3.4374, "step": 106040 }, { "epoch": 4.56820433303183, "learning_rate": 7.43384903790213e-07, "loss": 3.3109, "step": 106060 }, { "epoch": 4.5690657707714175, "learning_rate": 7.433364218147341e-07, "loss": 3.5075, "step": 106080 }, { "epoch": 4.569927208511005, "learning_rate": 7.432879398392553e-07, "loss": 3.3777, "step": 106100 }, { "epoch": 4.570788646250592, "learning_rate": 7.432394578637764e-07, "loss": 3.4756, "step": 106120 }, { "epoch": 4.571650083990179, "learning_rate": 7.431909758882974e-07, "loss": 3.4662, "step": 106140 }, { "epoch": 4.572511521729767, "learning_rate": 7.431424939128185e-07, "loss": 3.4017, "step": 106160 }, { "epoch": 4.573372959469355, "learning_rate": 7.430940119373397e-07, "loss": 3.2848, "step": 106180 }, { "epoch": 4.574234397208942, "learning_rate": 7.430455299618609e-07, "loss": 3.2424, "step": 106200 }, { "epoch": 4.575095834948529, "learning_rate": 7.429970479863819e-07, "loss": 3.3857, "step": 106220 }, { "epoch": 4.575957272688116, "learning_rate": 7.42948566010903e-07, "loss": 3.5344, "step": 106240 }, { "epoch": 4.576818710427704, "learning_rate": 7.429000840354241e-07, "loss": 3.4852, "step": 106260 }, { "epoch": 4.577680148167291, "learning_rate": 7.428516020599453e-07, "loss": 3.2511, "step": 106280 }, { "epoch": 4.578541585906878, "learning_rate": 7.428031200844663e-07, "loss": 3.3752, "step": 106300 }, { "epoch": 4.579403023646466, "learning_rate": 7.427546381089874e-07, "loss": 3.5539, "step": 106320 }, { "epoch": 4.5802644613860535, "learning_rate": 7.427061561335086e-07, "loss": 3.4688, "step": 106340 }, { "epoch": 4.581125899125641, "learning_rate": 7.426576741580298e-07, "loss": 3.4532, "step": 106360 }, { "epoch": 4.581987336865228, "learning_rate": 7.426091921825507e-07, "loss": 3.3261, "step": 106380 }, { "epoch": 4.582848774604815, "learning_rate": 7.425607102070718e-07, "loss": 3.3305, "step": 106400 }, { "epoch": 4.5837102123444025, "learning_rate": 7.42512228231593e-07, "loss": 3.6248, "step": 106420 }, { "epoch": 4.584571650083991, "learning_rate": 7.424637462561142e-07, "loss": 3.5392, "step": 106440 }, { "epoch": 4.585433087823578, "learning_rate": 7.424152642806353e-07, "loss": 3.1391, "step": 106460 }, { "epoch": 4.586294525563165, "learning_rate": 7.423667823051563e-07, "loss": 3.2933, "step": 106480 }, { "epoch": 4.587155963302752, "learning_rate": 7.423183003296774e-07, "loss": 3.3931, "step": 106500 }, { "epoch": 4.58801740104234, "learning_rate": 7.422698183541985e-07, "loss": 3.3831, "step": 106520 }, { "epoch": 4.588878838781927, "learning_rate": 7.422213363787196e-07, "loss": 3.3134, "step": 106540 }, { "epoch": 4.589740276521514, "learning_rate": 7.421728544032407e-07, "loss": 3.3329, "step": 106560 }, { "epoch": 4.590601714261101, "learning_rate": 7.421243724277619e-07, "loss": 3.6775, "step": 106580 }, { "epoch": 4.591463152000689, "learning_rate": 7.420758904522829e-07, "loss": 3.4037, "step": 106600 }, { "epoch": 4.592324589740277, "learning_rate": 7.42027408476804e-07, "loss": 3.4664, "step": 106620 }, { "epoch": 4.593186027479864, "learning_rate": 7.419789265013251e-07, "loss": 3.4854, "step": 106640 }, { "epoch": 4.594047465219451, "learning_rate": 7.419304445258463e-07, "loss": 3.4455, "step": 106660 }, { "epoch": 4.594908902959038, "learning_rate": 7.418819625503673e-07, "loss": 3.2571, "step": 106680 }, { "epoch": 4.595770340698626, "learning_rate": 7.418334805748884e-07, "loss": 3.3615, "step": 106700 }, { "epoch": 4.596631778438214, "learning_rate": 7.417849985994096e-07, "loss": 3.5081, "step": 106720 }, { "epoch": 4.597493216177801, "learning_rate": 7.417365166239308e-07, "loss": 3.5212, "step": 106740 }, { "epoch": 4.598354653917388, "learning_rate": 7.416880346484517e-07, "loss": 3.4033, "step": 106760 }, { "epoch": 4.5992160916569755, "learning_rate": 7.416395526729729e-07, "loss": 3.528, "step": 106780 }, { "epoch": 4.600077529396563, "learning_rate": 7.41591070697494e-07, "loss": 3.4061, "step": 106800 }, { "epoch": 4.60093896713615, "learning_rate": 7.415425887220151e-07, "loss": 3.3811, "step": 106820 }, { "epoch": 4.601800404875737, "learning_rate": 7.414941067465362e-07, "loss": 3.4855, "step": 106840 }, { "epoch": 4.6026618426153245, "learning_rate": 7.414456247710573e-07, "loss": 3.3583, "step": 106860 }, { "epoch": 4.603523280354913, "learning_rate": 7.413971427955784e-07, "loss": 3.3726, "step": 106880 }, { "epoch": 4.6043847180945, "learning_rate": 7.413486608200995e-07, "loss": 3.5457, "step": 106900 }, { "epoch": 4.605246155834087, "learning_rate": 7.413001788446206e-07, "loss": 3.3274, "step": 106920 }, { "epoch": 4.606107593573674, "learning_rate": 7.412516968691417e-07, "loss": 3.5479, "step": 106940 }, { "epoch": 4.606969031313262, "learning_rate": 7.412032148936629e-07, "loss": 3.4976, "step": 106960 }, { "epoch": 4.607830469052849, "learning_rate": 7.411547329181841e-07, "loss": 3.3176, "step": 106980 }, { "epoch": 4.608691906792437, "learning_rate": 7.411062509427051e-07, "loss": 3.4915, "step": 107000 }, { "epoch": 4.609553344532024, "learning_rate": 7.410577689672261e-07, "loss": 3.5571, "step": 107020 }, { "epoch": 4.6104147822716115, "learning_rate": 7.410092869917472e-07, "loss": 3.5125, "step": 107040 }, { "epoch": 4.611276220011199, "learning_rate": 7.409608050162684e-07, "loss": 3.4239, "step": 107060 }, { "epoch": 4.612137657750786, "learning_rate": 7.409123230407895e-07, "loss": 3.4943, "step": 107080 }, { "epoch": 4.612999095490373, "learning_rate": 7.408638410653106e-07, "loss": 3.5271, "step": 107100 }, { "epoch": 4.6138605332299605, "learning_rate": 7.408153590898317e-07, "loss": 3.3703, "step": 107120 }, { "epoch": 4.614721970969548, "learning_rate": 7.407668771143527e-07, "loss": 3.4195, "step": 107140 }, { "epoch": 4.615583408709136, "learning_rate": 7.407183951388738e-07, "loss": 3.5447, "step": 107160 }, { "epoch": 4.616444846448723, "learning_rate": 7.40669913163395e-07, "loss": 3.468, "step": 107180 }, { "epoch": 4.61730628418831, "learning_rate": 7.406214311879161e-07, "loss": 3.6039, "step": 107200 }, { "epoch": 4.618167721927898, "learning_rate": 7.405729492124372e-07, "loss": 3.4525, "step": 107220 }, { "epoch": 4.619029159667485, "learning_rate": 7.405244672369583e-07, "loss": 3.4267, "step": 107240 }, { "epoch": 4.619890597407072, "learning_rate": 7.404759852614795e-07, "loss": 3.5692, "step": 107260 }, { "epoch": 4.62075203514666, "learning_rate": 7.404275032860005e-07, "loss": 3.4588, "step": 107280 }, { "epoch": 4.6216134728862475, "learning_rate": 7.403790213105217e-07, "loss": 3.4214, "step": 107300 }, { "epoch": 4.622474910625835, "learning_rate": 7.403305393350427e-07, "loss": 3.273, "step": 107320 }, { "epoch": 4.623336348365422, "learning_rate": 7.402820573595639e-07, "loss": 3.5492, "step": 107340 }, { "epoch": 4.624197786105009, "learning_rate": 7.40233575384085e-07, "loss": 3.3676, "step": 107360 }, { "epoch": 4.6250592238445964, "learning_rate": 7.401850934086061e-07, "loss": 3.4707, "step": 107380 }, { "epoch": 4.625920661584184, "learning_rate": 7.401366114331271e-07, "loss": 3.3834, "step": 107400 }, { "epoch": 4.626782099323771, "learning_rate": 7.400881294576482e-07, "loss": 3.5304, "step": 107420 }, { "epoch": 4.627643537063359, "learning_rate": 7.400396474821694e-07, "loss": 3.4311, "step": 107440 }, { "epoch": 4.628504974802946, "learning_rate": 7.399911655066905e-07, "loss": 3.5663, "step": 107460 }, { "epoch": 4.629366412542534, "learning_rate": 7.399426835312116e-07, "loss": 3.4356, "step": 107480 }, { "epoch": 4.630227850282121, "learning_rate": 7.398942015557327e-07, "loss": 3.5589, "step": 107500 }, { "epoch": 4.631089288021708, "learning_rate": 7.398457195802538e-07, "loss": 3.4957, "step": 107520 }, { "epoch": 4.631950725761295, "learning_rate": 7.397972376047749e-07, "loss": 3.3148, "step": 107540 }, { "epoch": 4.632812163500883, "learning_rate": 7.39748755629296e-07, "loss": 3.3393, "step": 107560 }, { "epoch": 4.633673601240471, "learning_rate": 7.397002736538171e-07, "loss": 3.3857, "step": 107580 }, { "epoch": 4.634535038980058, "learning_rate": 7.396517916783383e-07, "loss": 3.2937, "step": 107600 }, { "epoch": 4.635396476719645, "learning_rate": 7.396033097028594e-07, "loss": 3.1365, "step": 107620 }, { "epoch": 4.636257914459232, "learning_rate": 7.395548277273805e-07, "loss": 3.2864, "step": 107640 }, { "epoch": 4.63711935219882, "learning_rate": 7.395063457519015e-07, "loss": 3.4913, "step": 107660 }, { "epoch": 4.637980789938407, "learning_rate": 7.394578637764226e-07, "loss": 3.3579, "step": 107680 }, { "epoch": 4.638842227677994, "learning_rate": 7.394093818009437e-07, "loss": 3.3795, "step": 107700 }, { "epoch": 4.639703665417582, "learning_rate": 7.393608998254647e-07, "loss": 3.4284, "step": 107720 }, { "epoch": 4.6405651031571695, "learning_rate": 7.39312417849986e-07, "loss": 3.5511, "step": 107740 }, { "epoch": 4.641426540896757, "learning_rate": 7.392639358745071e-07, "loss": 3.5303, "step": 107760 }, { "epoch": 4.642287978636344, "learning_rate": 7.392154538990281e-07, "loss": 3.4329, "step": 107780 }, { "epoch": 4.643149416375931, "learning_rate": 7.391669719235492e-07, "loss": 3.2984, "step": 107800 }, { "epoch": 4.6440108541155185, "learning_rate": 7.391184899480705e-07, "loss": 3.2471, "step": 107820 }, { "epoch": 4.644872291855107, "learning_rate": 7.390700079725915e-07, "loss": 3.5918, "step": 107840 }, { "epoch": 4.645733729594694, "learning_rate": 7.390215259971126e-07, "loss": 3.3016, "step": 107860 }, { "epoch": 4.646595167334281, "learning_rate": 7.389730440216337e-07, "loss": 3.4703, "step": 107880 }, { "epoch": 4.647456605073868, "learning_rate": 7.389245620461548e-07, "loss": 3.5508, "step": 107900 }, { "epoch": 4.648318042813456, "learning_rate": 7.388760800706759e-07, "loss": 3.2261, "step": 107920 }, { "epoch": 4.649179480553043, "learning_rate": 7.38827598095197e-07, "loss": 3.5461, "step": 107940 }, { "epoch": 4.65004091829263, "learning_rate": 7.387791161197181e-07, "loss": 3.4937, "step": 107960 }, { "epoch": 4.650902356032217, "learning_rate": 7.387306341442393e-07, "loss": 3.2862, "step": 107980 }, { "epoch": 4.6517637937718055, "learning_rate": 7.386821521687604e-07, "loss": 3.2475, "step": 108000 }, { "epoch": 4.652625231511393, "learning_rate": 7.386336701932814e-07, "loss": 3.4575, "step": 108020 }, { "epoch": 4.65348666925098, "learning_rate": 7.385851882178025e-07, "loss": 3.234, "step": 108040 }, { "epoch": 4.654348106990567, "learning_rate": 7.385367062423237e-07, "loss": 3.3128, "step": 108060 }, { "epoch": 4.6552095447301545, "learning_rate": 7.384882242668448e-07, "loss": 3.3031, "step": 108080 }, { "epoch": 4.656070982469742, "learning_rate": 7.384397422913658e-07, "loss": 3.4557, "step": 108100 }, { "epoch": 4.65693242020933, "learning_rate": 7.38391260315887e-07, "loss": 3.4523, "step": 108120 }, { "epoch": 4.657793857948917, "learning_rate": 7.383427783404082e-07, "loss": 3.3763, "step": 108140 }, { "epoch": 4.658655295688504, "learning_rate": 7.382942963649292e-07, "loss": 3.3701, "step": 108160 }, { "epoch": 4.659516733428092, "learning_rate": 7.382458143894502e-07, "loss": 3.5039, "step": 108180 }, { "epoch": 4.660378171167679, "learning_rate": 7.381973324139714e-07, "loss": 3.4298, "step": 108200 }, { "epoch": 4.661239608907266, "learning_rate": 7.381488504384925e-07, "loss": 3.5846, "step": 108220 }, { "epoch": 4.662101046646853, "learning_rate": 7.381003684630136e-07, "loss": 3.3374, "step": 108240 }, { "epoch": 4.662962484386441, "learning_rate": 7.380518864875347e-07, "loss": 3.6548, "step": 108260 }, { "epoch": 4.663823922126029, "learning_rate": 7.380034045120558e-07, "loss": 3.428, "step": 108280 }, { "epoch": 4.664685359865616, "learning_rate": 7.379549225365769e-07, "loss": 3.5412, "step": 108300 }, { "epoch": 4.665546797605203, "learning_rate": 7.379064405610979e-07, "loss": 3.4767, "step": 108320 }, { "epoch": 4.66640823534479, "learning_rate": 7.378579585856191e-07, "loss": 3.317, "step": 108340 }, { "epoch": 4.667269673084378, "learning_rate": 7.378094766101403e-07, "loss": 3.4289, "step": 108360 }, { "epoch": 4.668131110823965, "learning_rate": 7.377609946346614e-07, "loss": 3.5949, "step": 108380 }, { "epoch": 4.668992548563553, "learning_rate": 7.377125126591824e-07, "loss": 3.3951, "step": 108400 }, { "epoch": 4.66985398630314, "learning_rate": 7.376640306837035e-07, "loss": 3.2444, "step": 108420 }, { "epoch": 4.6707154240427275, "learning_rate": 7.376155487082247e-07, "loss": 3.4278, "step": 108440 }, { "epoch": 4.671576861782315, "learning_rate": 7.375670667327458e-07, "loss": 3.3376, "step": 108460 }, { "epoch": 4.672438299521902, "learning_rate": 7.375185847572668e-07, "loss": 3.3744, "step": 108480 }, { "epoch": 4.673299737261489, "learning_rate": 7.37470102781788e-07, "loss": 3.5111, "step": 108500 }, { "epoch": 4.6741611750010765, "learning_rate": 7.374216208063092e-07, "loss": 3.6147, "step": 108520 }, { "epoch": 4.675022612740664, "learning_rate": 7.373731388308302e-07, "loss": 3.454, "step": 108540 }, { "epoch": 4.675884050480252, "learning_rate": 7.373246568553513e-07, "loss": 3.3569, "step": 108560 }, { "epoch": 4.676745488219839, "learning_rate": 7.372761748798724e-07, "loss": 3.5428, "step": 108580 }, { "epoch": 4.677606925959426, "learning_rate": 7.372276929043936e-07, "loss": 3.4574, "step": 108600 }, { "epoch": 4.678468363699014, "learning_rate": 7.371792109289146e-07, "loss": 3.4242, "step": 108620 }, { "epoch": 4.679329801438601, "learning_rate": 7.371307289534357e-07, "loss": 3.2827, "step": 108640 }, { "epoch": 4.680191239178188, "learning_rate": 7.370822469779568e-07, "loss": 3.209, "step": 108660 }, { "epoch": 4.681052676917775, "learning_rate": 7.37033765002478e-07, "loss": 3.3644, "step": 108680 }, { "epoch": 4.6819141146573635, "learning_rate": 7.36985283026999e-07, "loss": 3.4966, "step": 108700 }, { "epoch": 4.682775552396951, "learning_rate": 7.369368010515201e-07, "loss": 3.1675, "step": 108720 }, { "epoch": 4.683636990136538, "learning_rate": 7.368883190760413e-07, "loss": 3.3223, "step": 108740 }, { "epoch": 4.684498427876125, "learning_rate": 7.368398371005624e-07, "loss": 3.3081, "step": 108760 }, { "epoch": 4.6853598656157125, "learning_rate": 7.367913551250834e-07, "loss": 3.4824, "step": 108780 }, { "epoch": 4.6862213033553, "learning_rate": 7.367428731496045e-07, "loss": 3.4848, "step": 108800 }, { "epoch": 4.687082741094887, "learning_rate": 7.366943911741257e-07, "loss": 3.5141, "step": 108820 }, { "epoch": 4.687944178834475, "learning_rate": 7.366459091986468e-07, "loss": 3.2749, "step": 108840 }, { "epoch": 4.688805616574062, "learning_rate": 7.365974272231678e-07, "loss": 3.3197, "step": 108860 }, { "epoch": 4.68966705431365, "learning_rate": 7.36548945247689e-07, "loss": 3.41, "step": 108880 }, { "epoch": 4.690528492053237, "learning_rate": 7.365004632722102e-07, "loss": 3.5239, "step": 108900 }, { "epoch": 4.691389929792824, "learning_rate": 7.364519812967311e-07, "loss": 3.3409, "step": 108920 }, { "epoch": 4.692251367532411, "learning_rate": 7.364034993212522e-07, "loss": 3.4581, "step": 108940 }, { "epoch": 4.693112805271999, "learning_rate": 7.363550173457734e-07, "loss": 3.4348, "step": 108960 }, { "epoch": 4.693974243011587, "learning_rate": 7.363065353702946e-07, "loss": 3.3815, "step": 108980 }, { "epoch": 4.694835680751174, "learning_rate": 7.362580533948156e-07, "loss": 3.4902, "step": 109000 }, { "epoch": 4.695697118490761, "learning_rate": 7.362095714193367e-07, "loss": 3.4949, "step": 109020 }, { "epoch": 4.6965585562303485, "learning_rate": 7.361610894438579e-07, "loss": 3.4152, "step": 109040 }, { "epoch": 4.697419993969936, "learning_rate": 7.36112607468379e-07, "loss": 3.4191, "step": 109060 }, { "epoch": 4.698281431709523, "learning_rate": 7.360641254929001e-07, "loss": 3.4569, "step": 109080 }, { "epoch": 4.69914286944911, "learning_rate": 7.360156435174211e-07, "loss": 3.2765, "step": 109100 }, { "epoch": 4.700004307188697, "learning_rate": 7.359671615419423e-07, "loss": 3.3831, "step": 109120 }, { "epoch": 4.700865744928286, "learning_rate": 7.359186795664635e-07, "loss": 3.6725, "step": 109140 }, { "epoch": 4.701727182667873, "learning_rate": 7.358701975909845e-07, "loss": 3.3772, "step": 109160 }, { "epoch": 4.70258862040746, "learning_rate": 7.358217156155055e-07, "loss": 3.482, "step": 109180 }, { "epoch": 4.703450058147047, "learning_rate": 7.357732336400267e-07, "loss": 3.4736, "step": 109200 }, { "epoch": 4.7043114958866346, "learning_rate": 7.357247516645479e-07, "loss": 3.4527, "step": 109220 }, { "epoch": 4.705172933626222, "learning_rate": 7.356762696890689e-07, "loss": 3.362, "step": 109240 }, { "epoch": 4.70603437136581, "learning_rate": 7.3562778771359e-07, "loss": 3.3077, "step": 109260 }, { "epoch": 4.706895809105397, "learning_rate": 7.355793057381112e-07, "loss": 3.3716, "step": 109280 }, { "epoch": 4.707757246844984, "learning_rate": 7.355308237626321e-07, "loss": 3.4546, "step": 109300 }, { "epoch": 4.708618684584572, "learning_rate": 7.354823417871532e-07, "loss": 3.2998, "step": 109320 }, { "epoch": 4.709480122324159, "learning_rate": 7.354338598116744e-07, "loss": 3.4644, "step": 109340 }, { "epoch": 4.710341560063746, "learning_rate": 7.353853778361956e-07, "loss": 3.4126, "step": 109360 }, { "epoch": 4.711202997803333, "learning_rate": 7.353368958607166e-07, "loss": 3.3607, "step": 109380 }, { "epoch": 4.712064435542921, "learning_rate": 7.352884138852377e-07, "loss": 3.3738, "step": 109400 }, { "epoch": 4.712925873282509, "learning_rate": 7.352399319097589e-07, "loss": 3.4028, "step": 109420 }, { "epoch": 4.713787311022096, "learning_rate": 7.3519144993428e-07, "loss": 3.467, "step": 109440 }, { "epoch": 4.714648748761683, "learning_rate": 7.35142967958801e-07, "loss": 3.607, "step": 109460 }, { "epoch": 4.7155101865012705, "learning_rate": 7.350944859833221e-07, "loss": 3.4806, "step": 109480 }, { "epoch": 4.716371624240858, "learning_rate": 7.350460040078432e-07, "loss": 3.5371, "step": 109500 }, { "epoch": 4.717233061980445, "learning_rate": 7.349975220323645e-07, "loss": 3.4783, "step": 109520 }, { "epoch": 4.718094499720033, "learning_rate": 7.349490400568855e-07, "loss": 3.2858, "step": 109540 }, { "epoch": 4.71895593745962, "learning_rate": 7.349005580814065e-07, "loss": 3.6001, "step": 109560 }, { "epoch": 4.719817375199208, "learning_rate": 7.348520761059277e-07, "loss": 3.567, "step": 109580 }, { "epoch": 4.720678812938795, "learning_rate": 7.34803594130449e-07, "loss": 3.5085, "step": 109600 }, { "epoch": 4.721540250678382, "learning_rate": 7.347551121549699e-07, "loss": 3.4442, "step": 109620 }, { "epoch": 4.722401688417969, "learning_rate": 7.34706630179491e-07, "loss": 3.1194, "step": 109640 }, { "epoch": 4.723263126157557, "learning_rate": 7.346581482040122e-07, "loss": 3.2859, "step": 109660 }, { "epoch": 4.724124563897144, "learning_rate": 7.346096662285332e-07, "loss": 3.3333, "step": 109680 }, { "epoch": 4.724986001636732, "learning_rate": 7.345611842530543e-07, "loss": 3.3976, "step": 109700 }, { "epoch": 4.725847439376319, "learning_rate": 7.345127022775754e-07, "loss": 3.5718, "step": 109720 }, { "epoch": 4.7267088771159065, "learning_rate": 7.344642203020966e-07, "loss": 3.234, "step": 109740 }, { "epoch": 4.727570314855494, "learning_rate": 7.344157383266177e-07, "loss": 3.247, "step": 109760 }, { "epoch": 4.728431752595081, "learning_rate": 7.343672563511388e-07, "loss": 3.3834, "step": 109780 }, { "epoch": 4.729293190334668, "learning_rate": 7.343187743756599e-07, "loss": 3.4639, "step": 109800 }, { "epoch": 4.730154628074256, "learning_rate": 7.34270292400181e-07, "loss": 3.4121, "step": 109820 }, { "epoch": 4.731016065813844, "learning_rate": 7.34221810424702e-07, "loss": 3.4958, "step": 109840 }, { "epoch": 4.731877503553431, "learning_rate": 7.341733284492231e-07, "loss": 3.4193, "step": 109860 }, { "epoch": 4.732738941293018, "learning_rate": 7.341248464737443e-07, "loss": 3.4574, "step": 109880 }, { "epoch": 4.733600379032605, "learning_rate": 7.340763644982654e-07, "loss": 3.3919, "step": 109900 }, { "epoch": 4.734461816772193, "learning_rate": 7.340278825227865e-07, "loss": 3.4558, "step": 109920 }, { "epoch": 4.73532325451178, "learning_rate": 7.339794005473075e-07, "loss": 3.3465, "step": 109940 }, { "epoch": 4.736184692251367, "learning_rate": 7.339309185718287e-07, "loss": 3.4276, "step": 109960 }, { "epoch": 4.737046129990955, "learning_rate": 7.338824365963498e-07, "loss": 3.4679, "step": 109980 }, { "epoch": 4.7379075677305424, "learning_rate": 7.338339546208709e-07, "loss": 3.3457, "step": 110000 }, { "epoch": 4.73876900547013, "learning_rate": 7.33785472645392e-07, "loss": 3.1823, "step": 110020 }, { "epoch": 4.739630443209717, "learning_rate": 7.337369906699132e-07, "loss": 3.2189, "step": 110040 }, { "epoch": 4.740491880949304, "learning_rate": 7.336885086944342e-07, "loss": 3.4356, "step": 110060 }, { "epoch": 4.741353318688891, "learning_rate": 7.336400267189553e-07, "loss": 3.3379, "step": 110080 }, { "epoch": 4.74221475642848, "learning_rate": 7.335915447434764e-07, "loss": 3.3428, "step": 110100 }, { "epoch": 4.743076194168067, "learning_rate": 7.335430627679976e-07, "loss": 3.3437, "step": 110120 }, { "epoch": 4.743937631907654, "learning_rate": 7.334945807925187e-07, "loss": 3.4678, "step": 110140 }, { "epoch": 4.744799069647241, "learning_rate": 7.334460988170398e-07, "loss": 3.4782, "step": 110160 }, { "epoch": 4.7456605073868285, "learning_rate": 7.333976168415609e-07, "loss": 3.4444, "step": 110180 }, { "epoch": 4.746521945126416, "learning_rate": 7.333491348660819e-07, "loss": 3.4967, "step": 110200 }, { "epoch": 4.747383382866003, "learning_rate": 7.333006528906031e-07, "loss": 3.1003, "step": 110220 }, { "epoch": 4.74824482060559, "learning_rate": 7.332521709151242e-07, "loss": 3.2818, "step": 110240 }, { "epoch": 4.749106258345178, "learning_rate": 7.332036889396453e-07, "loss": 3.4107, "step": 110260 }, { "epoch": 4.749967696084766, "learning_rate": 7.331552069641664e-07, "loss": 3.4498, "step": 110280 }, { "epoch": 4.750829133824353, "learning_rate": 7.331067249886876e-07, "loss": 3.5525, "step": 110300 }, { "epoch": 4.75169057156394, "learning_rate": 7.330582430132086e-07, "loss": 3.2837, "step": 110320 }, { "epoch": 4.752552009303527, "learning_rate": 7.330097610377298e-07, "loss": 3.3246, "step": 110340 }, { "epoch": 4.753413447043115, "learning_rate": 7.329612790622508e-07, "loss": 3.3768, "step": 110360 }, { "epoch": 4.754274884782703, "learning_rate": 7.329127970867719e-07, "loss": 3.3292, "step": 110380 }, { "epoch": 4.75513632252229, "learning_rate": 7.32864315111293e-07, "loss": 3.496, "step": 110400 }, { "epoch": 4.755997760261877, "learning_rate": 7.328158331358142e-07, "loss": 3.3893, "step": 110420 }, { "epoch": 4.7568591980014645, "learning_rate": 7.327673511603352e-07, "loss": 3.3441, "step": 110440 }, { "epoch": 4.757720635741052, "learning_rate": 7.327188691848563e-07, "loss": 3.4125, "step": 110460 }, { "epoch": 4.758582073480639, "learning_rate": 7.326703872093774e-07, "loss": 3.4208, "step": 110480 }, { "epoch": 4.759443511220226, "learning_rate": 7.326219052338985e-07, "loss": 3.4095, "step": 110500 }, { "epoch": 4.7603049489598135, "learning_rate": 7.325734232584197e-07, "loss": 3.3821, "step": 110520 }, { "epoch": 4.761166386699402, "learning_rate": 7.325249412829408e-07, "loss": 3.5809, "step": 110540 }, { "epoch": 4.762027824438989, "learning_rate": 7.324764593074619e-07, "loss": 3.3793, "step": 110560 }, { "epoch": 4.762889262178576, "learning_rate": 7.324279773319829e-07, "loss": 3.4014, "step": 110580 }, { "epoch": 4.763750699918163, "learning_rate": 7.323794953565041e-07, "loss": 3.3375, "step": 110600 }, { "epoch": 4.764612137657751, "learning_rate": 7.323310133810252e-07, "loss": 3.4503, "step": 110620 }, { "epoch": 4.765473575397338, "learning_rate": 7.322825314055463e-07, "loss": 3.4172, "step": 110640 }, { "epoch": 4.766335013136926, "learning_rate": 7.322340494300674e-07, "loss": 3.4392, "step": 110660 }, { "epoch": 4.767196450876513, "learning_rate": 7.321855674545886e-07, "loss": 3.4987, "step": 110680 }, { "epoch": 4.7680578886161005, "learning_rate": 7.321370854791096e-07, "loss": 3.2126, "step": 110700 }, { "epoch": 4.768919326355688, "learning_rate": 7.320886035036307e-07, "loss": 3.4632, "step": 110720 }, { "epoch": 4.769780764095275, "learning_rate": 7.320401215281518e-07, "loss": 3.527, "step": 110740 }, { "epoch": 4.770642201834862, "learning_rate": 7.31991639552673e-07, "loss": 3.5748, "step": 110760 }, { "epoch": 4.7715036395744495, "learning_rate": 7.319431575771941e-07, "loss": 3.5437, "step": 110780 }, { "epoch": 4.772365077314037, "learning_rate": 7.318946756017151e-07, "loss": 3.3332, "step": 110800 }, { "epoch": 4.773226515053625, "learning_rate": 7.318461936262363e-07, "loss": 3.5089, "step": 110820 }, { "epoch": 4.774087952793212, "learning_rate": 7.317977116507574e-07, "loss": 3.2424, "step": 110840 }, { "epoch": 4.774949390532799, "learning_rate": 7.317492296752786e-07, "loss": 3.4568, "step": 110860 }, { "epoch": 4.775810828272387, "learning_rate": 7.317007476997995e-07, "loss": 3.2172, "step": 110880 }, { "epoch": 4.776672266011974, "learning_rate": 7.316522657243207e-07, "loss": 3.3651, "step": 110900 }, { "epoch": 4.777533703751561, "learning_rate": 7.316037837488418e-07, "loss": 3.3589, "step": 110920 }, { "epoch": 4.778395141491149, "learning_rate": 7.315553017733629e-07, "loss": 3.3583, "step": 110940 }, { "epoch": 4.779256579230736, "learning_rate": 7.315068197978839e-07, "loss": 3.1732, "step": 110960 }, { "epoch": 4.780118016970324, "learning_rate": 7.314583378224051e-07, "loss": 3.4154, "step": 110980 }, { "epoch": 4.780979454709911, "learning_rate": 7.314098558469262e-07, "loss": 3.5414, "step": 111000 }, { "epoch": 4.781840892449498, "learning_rate": 7.313613738714473e-07, "loss": 3.4901, "step": 111020 }, { "epoch": 4.782702330189085, "learning_rate": 7.313128918959684e-07, "loss": 3.3706, "step": 111040 }, { "epoch": 4.783563767928673, "learning_rate": 7.312644099204896e-07, "loss": 3.2446, "step": 111060 }, { "epoch": 4.78442520566826, "learning_rate": 7.312159279450106e-07, "loss": 3.5023, "step": 111080 }, { "epoch": 4.785286643407848, "learning_rate": 7.311674459695316e-07, "loss": 3.4189, "step": 111100 }, { "epoch": 4.786148081147435, "learning_rate": 7.311189639940528e-07, "loss": 3.2795, "step": 111120 }, { "epoch": 4.7870095188870225, "learning_rate": 7.31070482018574e-07, "loss": 3.2671, "step": 111140 }, { "epoch": 4.78787095662661, "learning_rate": 7.310220000430951e-07, "loss": 3.2637, "step": 111160 }, { "epoch": 4.788732394366197, "learning_rate": 7.309735180676161e-07, "loss": 3.2527, "step": 111180 }, { "epoch": 4.789593832105784, "learning_rate": 7.309250360921373e-07, "loss": 3.4921, "step": 111200 }, { "epoch": 4.790455269845372, "learning_rate": 7.308765541166584e-07, "loss": 3.5235, "step": 111220 }, { "epoch": 4.79131670758496, "learning_rate": 7.308280721411795e-07, "loss": 3.3067, "step": 111240 }, { "epoch": 4.792178145324547, "learning_rate": 7.307795901657005e-07, "loss": 3.3875, "step": 111260 }, { "epoch": 4.793039583064134, "learning_rate": 7.307311081902216e-07, "loss": 3.4976, "step": 111280 }, { "epoch": 4.793901020803721, "learning_rate": 7.306826262147429e-07, "loss": 3.3921, "step": 111300 }, { "epoch": 4.794762458543309, "learning_rate": 7.30634144239264e-07, "loss": 3.4683, "step": 111320 }, { "epoch": 4.795623896282896, "learning_rate": 7.305856622637849e-07, "loss": 3.4451, "step": 111340 }, { "epoch": 4.796485334022483, "learning_rate": 7.305371802883061e-07, "loss": 3.5001, "step": 111360 }, { "epoch": 4.797346771762071, "learning_rate": 7.304886983128274e-07, "loss": 3.4202, "step": 111380 }, { "epoch": 4.7982082095016585, "learning_rate": 7.304402163373484e-07, "loss": 3.4148, "step": 111400 }, { "epoch": 4.799069647241246, "learning_rate": 7.303917343618694e-07, "loss": 3.3397, "step": 111420 }, { "epoch": 4.799931084980833, "learning_rate": 7.303432523863906e-07, "loss": 3.4448, "step": 111440 }, { "epoch": 4.80079252272042, "learning_rate": 7.302947704109117e-07, "loss": 3.3335, "step": 111460 }, { "epoch": 4.8016539604600075, "learning_rate": 7.302462884354326e-07, "loss": 3.4823, "step": 111480 }, { "epoch": 4.802515398199596, "learning_rate": 7.301978064599538e-07, "loss": 3.3321, "step": 111500 }, { "epoch": 4.803376835939183, "learning_rate": 7.30149324484475e-07, "loss": 3.4012, "step": 111520 }, { "epoch": 4.80423827367877, "learning_rate": 7.301008425089961e-07, "loss": 3.4922, "step": 111540 }, { "epoch": 4.805099711418357, "learning_rate": 7.300523605335171e-07, "loss": 3.2925, "step": 111560 }, { "epoch": 4.805961149157945, "learning_rate": 7.300038785580383e-07, "loss": 3.3143, "step": 111580 }, { "epoch": 4.806822586897532, "learning_rate": 7.299553965825594e-07, "loss": 3.3931, "step": 111600 }, { "epoch": 4.807684024637119, "learning_rate": 7.299069146070805e-07, "loss": 3.3061, "step": 111620 }, { "epoch": 4.808545462376706, "learning_rate": 7.298584326316015e-07, "loss": 3.2464, "step": 111640 }, { "epoch": 4.8094069001162945, "learning_rate": 7.298099506561227e-07, "loss": 3.4188, "step": 111660 }, { "epoch": 4.810268337855882, "learning_rate": 7.297614686806439e-07, "loss": 3.2762, "step": 111680 }, { "epoch": 4.811129775595469, "learning_rate": 7.29712986705165e-07, "loss": 3.4127, "step": 111700 }, { "epoch": 4.811991213335056, "learning_rate": 7.296645047296859e-07, "loss": 3.3055, "step": 111720 }, { "epoch": 4.812852651074643, "learning_rate": 7.296160227542071e-07, "loss": 3.383, "step": 111740 }, { "epoch": 4.813714088814231, "learning_rate": 7.295675407787283e-07, "loss": 3.5977, "step": 111760 }, { "epoch": 4.814575526553819, "learning_rate": 7.295190588032493e-07, "loss": 3.2123, "step": 111780 }, { "epoch": 4.815436964293406, "learning_rate": 7.294705768277704e-07, "loss": 3.4144, "step": 111800 }, { "epoch": 4.816298402032993, "learning_rate": 7.294220948522916e-07, "loss": 3.3147, "step": 111820 }, { "epoch": 4.8171598397725806, "learning_rate": 7.293736128768127e-07, "loss": 3.3394, "step": 111840 }, { "epoch": 4.818021277512168, "learning_rate": 7.293251309013337e-07, "loss": 3.4783, "step": 111860 }, { "epoch": 4.818882715251755, "learning_rate": 7.292766489258548e-07, "loss": 3.1755, "step": 111880 }, { "epoch": 4.819744152991342, "learning_rate": 7.29228166950376e-07, "loss": 3.2937, "step": 111900 }, { "epoch": 4.8206055907309295, "learning_rate": 7.291796849748972e-07, "loss": 3.2458, "step": 111920 }, { "epoch": 4.821467028470518, "learning_rate": 7.291312029994182e-07, "loss": 3.3099, "step": 111940 }, { "epoch": 4.822328466210105, "learning_rate": 7.290827210239393e-07, "loss": 3.531, "step": 111960 }, { "epoch": 4.823189903949692, "learning_rate": 7.290342390484604e-07, "loss": 3.1037, "step": 111980 }, { "epoch": 4.824051341689279, "learning_rate": 7.289857570729814e-07, "loss": 3.4912, "step": 112000 }, { "epoch": 4.824912779428867, "learning_rate": 7.289372750975025e-07, "loss": 3.4789, "step": 112020 }, { "epoch": 4.825774217168454, "learning_rate": 7.288887931220237e-07, "loss": 3.5027, "step": 112040 }, { "epoch": 4.826635654908041, "learning_rate": 7.288403111465449e-07, "loss": 3.4467, "step": 112060 }, { "epoch": 4.827497092647629, "learning_rate": 7.28791829171066e-07, "loss": 3.3026, "step": 112080 }, { "epoch": 4.8283585303872165, "learning_rate": 7.28743347195587e-07, "loss": 3.316, "step": 112100 }, { "epoch": 4.829219968126804, "learning_rate": 7.28694865220108e-07, "loss": 3.2805, "step": 112120 }, { "epoch": 4.830081405866391, "learning_rate": 7.286463832446293e-07, "loss": 3.3764, "step": 112140 }, { "epoch": 4.830942843605978, "learning_rate": 7.285979012691503e-07, "loss": 3.384, "step": 112160 }, { "epoch": 4.8318042813455655, "learning_rate": 7.285494192936714e-07, "loss": 3.4574, "step": 112180 }, { "epoch": 4.832665719085153, "learning_rate": 7.285009373181926e-07, "loss": 3.3161, "step": 112200 }, { "epoch": 4.833527156824741, "learning_rate": 7.284524553427137e-07, "loss": 3.4587, "step": 112220 }, { "epoch": 4.834388594564328, "learning_rate": 7.284039733672347e-07, "loss": 3.3729, "step": 112240 }, { "epoch": 4.835250032303915, "learning_rate": 7.283554913917558e-07, "loss": 3.4642, "step": 112260 }, { "epoch": 4.836111470043503, "learning_rate": 7.28307009416277e-07, "loss": 3.3543, "step": 112280 }, { "epoch": 4.83697290778309, "learning_rate": 7.282585274407982e-07, "loss": 3.3695, "step": 112300 }, { "epoch": 4.837834345522677, "learning_rate": 7.282100454653192e-07, "loss": 3.4284, "step": 112320 }, { "epoch": 4.838695783262264, "learning_rate": 7.281615634898403e-07, "loss": 3.3576, "step": 112340 }, { "epoch": 4.8395572210018525, "learning_rate": 7.281130815143614e-07, "loss": 3.4828, "step": 112360 }, { "epoch": 4.84041865874144, "learning_rate": 7.280645995388825e-07, "loss": 3.4506, "step": 112380 }, { "epoch": 4.841280096481027, "learning_rate": 7.280161175634036e-07, "loss": 3.2465, "step": 112400 }, { "epoch": 4.842141534220614, "learning_rate": 7.279676355879247e-07, "loss": 3.4101, "step": 112420 }, { "epoch": 4.8430029719602015, "learning_rate": 7.279191536124459e-07, "loss": 3.4494, "step": 112440 }, { "epoch": 4.843864409699789, "learning_rate": 7.27870671636967e-07, "loss": 3.6476, "step": 112460 }, { "epoch": 4.844725847439376, "learning_rate": 7.27822189661488e-07, "loss": 3.1845, "step": 112480 }, { "epoch": 4.845587285178963, "learning_rate": 7.277737076860091e-07, "loss": 3.324, "step": 112500 }, { "epoch": 4.846448722918551, "learning_rate": 7.277252257105303e-07, "loss": 3.3586, "step": 112520 }, { "epoch": 4.847310160658139, "learning_rate": 7.276767437350512e-07, "loss": 3.4245, "step": 112540 }, { "epoch": 4.848171598397726, "learning_rate": 7.276282617595724e-07, "loss": 3.3284, "step": 112560 }, { "epoch": 4.849033036137313, "learning_rate": 7.275797797840936e-07, "loss": 3.291, "step": 112580 }, { "epoch": 4.8498944738769, "learning_rate": 7.275312978086148e-07, "loss": 3.4461, "step": 112600 }, { "epoch": 4.850755911616488, "learning_rate": 7.274828158331357e-07, "loss": 3.4262, "step": 112620 }, { "epoch": 4.851617349356076, "learning_rate": 7.274343338576568e-07, "loss": 3.3892, "step": 112640 }, { "epoch": 4.852478787095663, "learning_rate": 7.27385851882178e-07, "loss": 3.3796, "step": 112660 }, { "epoch": 4.85334022483525, "learning_rate": 7.273373699066992e-07, "loss": 3.3873, "step": 112680 }, { "epoch": 4.854201662574837, "learning_rate": 7.272888879312202e-07, "loss": 3.3284, "step": 112700 }, { "epoch": 4.855063100314425, "learning_rate": 7.272404059557413e-07, "loss": 3.3175, "step": 112720 }, { "epoch": 4.855924538054012, "learning_rate": 7.271919239802624e-07, "loss": 3.4371, "step": 112740 }, { "epoch": 4.856785975793599, "learning_rate": 7.271434420047835e-07, "loss": 3.2728, "step": 112760 }, { "epoch": 4.857647413533186, "learning_rate": 7.270949600293046e-07, "loss": 3.3408, "step": 112780 }, { "epoch": 4.8585088512727745, "learning_rate": 7.270464780538257e-07, "loss": 3.4124, "step": 112800 }, { "epoch": 4.859370289012362, "learning_rate": 7.269979960783469e-07, "loss": 3.3237, "step": 112820 }, { "epoch": 4.860231726751949, "learning_rate": 7.26949514102868e-07, "loss": 3.2809, "step": 112840 }, { "epoch": 4.861093164491536, "learning_rate": 7.26901032127389e-07, "loss": 3.1672, "step": 112860 }, { "epoch": 4.8619546022311235, "learning_rate": 7.268525501519101e-07, "loss": 3.1231, "step": 112880 }, { "epoch": 4.862816039970711, "learning_rate": 7.268040681764313e-07, "loss": 3.291, "step": 112900 }, { "epoch": 4.863677477710299, "learning_rate": 7.267555862009524e-07, "loss": 3.5074, "step": 112920 }, { "epoch": 4.864538915449886, "learning_rate": 7.267071042254735e-07, "loss": 3.4051, "step": 112940 }, { "epoch": 4.865400353189473, "learning_rate": 7.266586222499946e-07, "loss": 3.3751, "step": 112960 }, { "epoch": 4.866261790929061, "learning_rate": 7.266101402745158e-07, "loss": 3.481, "step": 112980 }, { "epoch": 4.867123228668648, "learning_rate": 7.265616582990368e-07, "loss": 3.502, "step": 113000 }, { "epoch": 4.867984666408235, "learning_rate": 7.265131763235579e-07, "loss": 3.4113, "step": 113020 }, { "epoch": 4.868846104147822, "learning_rate": 7.26464694348079e-07, "loss": 3.4011, "step": 113040 }, { "epoch": 4.86970754188741, "learning_rate": 7.264162123726e-07, "loss": 3.3976, "step": 113060 }, { "epoch": 4.870568979626998, "learning_rate": 7.263677303971212e-07, "loss": 3.4237, "step": 113080 }, { "epoch": 4.871430417366585, "learning_rate": 7.263192484216423e-07, "loss": 3.4112, "step": 113100 }, { "epoch": 4.872291855106172, "learning_rate": 7.262707664461634e-07, "loss": 3.3411, "step": 113120 }, { "epoch": 4.8731532928457595, "learning_rate": 7.262222844706845e-07, "loss": 3.5716, "step": 113140 }, { "epoch": 4.874014730585347, "learning_rate": 7.261738024952056e-07, "loss": 3.301, "step": 113160 }, { "epoch": 4.874876168324934, "learning_rate": 7.261253205197267e-07, "loss": 3.4115, "step": 113180 }, { "epoch": 4.875737606064522, "learning_rate": 7.260768385442479e-07, "loss": 3.2897, "step": 113200 }, { "epoch": 4.876599043804109, "learning_rate": 7.26028356568769e-07, "loss": 3.2583, "step": 113220 }, { "epoch": 4.877460481543697, "learning_rate": 7.259798745932901e-07, "loss": 3.3965, "step": 113240 }, { "epoch": 4.878321919283284, "learning_rate": 7.259313926178111e-07, "loss": 3.3279, "step": 113260 }, { "epoch": 4.879183357022871, "learning_rate": 7.258829106423322e-07, "loss": 3.5487, "step": 113280 }, { "epoch": 4.880044794762458, "learning_rate": 7.258344286668534e-07, "loss": 3.3615, "step": 113300 }, { "epoch": 4.880906232502046, "learning_rate": 7.257859466913745e-07, "loss": 3.2451, "step": 113320 }, { "epoch": 4.881767670241633, "learning_rate": 7.257374647158956e-07, "loss": 3.2804, "step": 113340 }, { "epoch": 4.882629107981221, "learning_rate": 7.256889827404167e-07, "loss": 3.2816, "step": 113360 }, { "epoch": 4.883490545720808, "learning_rate": 7.256405007649378e-07, "loss": 3.1812, "step": 113380 }, { "epoch": 4.8843519834603955, "learning_rate": 7.255920187894589e-07, "loss": 3.4344, "step": 113400 }, { "epoch": 4.885213421199983, "learning_rate": 7.2554353681398e-07, "loss": 3.5236, "step": 113420 }, { "epoch": 4.88607485893957, "learning_rate": 7.254950548385011e-07, "loss": 3.4669, "step": 113440 }, { "epoch": 4.886936296679157, "learning_rate": 7.254465728630223e-07, "loss": 3.7011, "step": 113460 }, { "epoch": 4.887797734418745, "learning_rate": 7.253980908875434e-07, "loss": 3.3601, "step": 113480 }, { "epoch": 4.888659172158333, "learning_rate": 7.253496089120644e-07, "loss": 3.3322, "step": 113500 }, { "epoch": 4.88952060989792, "learning_rate": 7.253011269365855e-07, "loss": 3.3331, "step": 113520 }, { "epoch": 4.890382047637507, "learning_rate": 7.252526449611067e-07, "loss": 3.4049, "step": 113540 }, { "epoch": 4.891243485377094, "learning_rate": 7.252041629856278e-07, "loss": 3.3272, "step": 113560 }, { "epoch": 4.8921049231166815, "learning_rate": 7.251556810101489e-07, "loss": 3.217, "step": 113580 }, { "epoch": 4.892966360856269, "learning_rate": 7.2510719903467e-07, "loss": 3.275, "step": 113600 }, { "epoch": 4.893827798595856, "learning_rate": 7.250587170591911e-07, "loss": 3.5163, "step": 113620 }, { "epoch": 4.894689236335444, "learning_rate": 7.250102350837121e-07, "loss": 3.372, "step": 113640 }, { "epoch": 4.895550674075031, "learning_rate": 7.249617531082332e-07, "loss": 3.4274, "step": 113660 }, { "epoch": 4.896412111814619, "learning_rate": 7.249132711327544e-07, "loss": 3.2712, "step": 113680 }, { "epoch": 4.897273549554206, "learning_rate": 7.248647891572755e-07, "loss": 3.2933, "step": 113700 }, { "epoch": 4.898134987293793, "learning_rate": 7.248163071817966e-07, "loss": 3.2578, "step": 113720 }, { "epoch": 4.89899642503338, "learning_rate": 7.247678252063177e-07, "loss": 3.2918, "step": 113740 }, { "epoch": 4.8998578627729685, "learning_rate": 7.247193432308388e-07, "loss": 3.1761, "step": 113760 }, { "epoch": 4.900719300512556, "learning_rate": 7.246708612553599e-07, "loss": 3.3231, "step": 113780 }, { "epoch": 4.901580738252143, "learning_rate": 7.24622379279881e-07, "loss": 3.2292, "step": 113800 }, { "epoch": 4.90244217599173, "learning_rate": 7.245738973044021e-07, "loss": 3.5422, "step": 113820 }, { "epoch": 4.9033036137313175, "learning_rate": 7.245254153289233e-07, "loss": 3.4631, "step": 113840 }, { "epoch": 4.904165051470905, "learning_rate": 7.244769333534444e-07, "loss": 3.4453, "step": 113860 }, { "epoch": 4.905026489210492, "learning_rate": 7.244284513779655e-07, "loss": 3.3112, "step": 113880 }, { "epoch": 4.905887926950079, "learning_rate": 7.243799694024864e-07, "loss": 3.4012, "step": 113900 }, { "epoch": 4.906749364689667, "learning_rate": 7.243314874270077e-07, "loss": 3.5263, "step": 113920 }, { "epoch": 4.907610802429255, "learning_rate": 7.242830054515288e-07, "loss": 3.3166, "step": 113940 }, { "epoch": 4.908472240168842, "learning_rate": 7.242345234760498e-07, "loss": 3.2745, "step": 113960 }, { "epoch": 4.909333677908429, "learning_rate": 7.24186041500571e-07, "loss": 3.2829, "step": 113980 }, { "epoch": 4.910195115648016, "learning_rate": 7.241375595250921e-07, "loss": 3.2522, "step": 114000 }, { "epoch": 4.911056553387604, "learning_rate": 7.240890775496132e-07, "loss": 3.0851, "step": 114020 }, { "epoch": 4.911917991127192, "learning_rate": 7.240405955741342e-07, "loss": 3.4579, "step": 114040 }, { "epoch": 4.912779428866779, "learning_rate": 7.239921135986554e-07, "loss": 3.5174, "step": 114060 }, { "epoch": 4.913640866606366, "learning_rate": 7.239436316231766e-07, "loss": 3.2439, "step": 114080 }, { "epoch": 4.9145023043459535, "learning_rate": 7.238951496476977e-07, "loss": 3.3721, "step": 114100 }, { "epoch": 4.915363742085541, "learning_rate": 7.238466676722187e-07, "loss": 3.34, "step": 114120 }, { "epoch": 4.916225179825128, "learning_rate": 7.237981856967398e-07, "loss": 3.4718, "step": 114140 }, { "epoch": 4.917086617564715, "learning_rate": 7.237497037212609e-07, "loss": 3.4232, "step": 114160 }, { "epoch": 4.9179480553043025, "learning_rate": 7.23701221745782e-07, "loss": 3.2935, "step": 114180 }, { "epoch": 4.918809493043891, "learning_rate": 7.236527397703031e-07, "loss": 3.3965, "step": 114200 }, { "epoch": 4.919670930783478, "learning_rate": 7.236042577948243e-07, "loss": 3.4413, "step": 114220 }, { "epoch": 4.920532368523065, "learning_rate": 7.235557758193454e-07, "loss": 3.1762, "step": 114240 }, { "epoch": 4.921393806262652, "learning_rate": 7.235072938438664e-07, "loss": 3.4838, "step": 114260 }, { "epoch": 4.92225524400224, "learning_rate": 7.234588118683875e-07, "loss": 3.3305, "step": 114280 }, { "epoch": 4.923116681741827, "learning_rate": 7.234103298929087e-07, "loss": 3.4817, "step": 114300 }, { "epoch": 4.923978119481415, "learning_rate": 7.233618479174297e-07, "loss": 3.4203, "step": 114320 }, { "epoch": 4.924839557221002, "learning_rate": 7.233133659419508e-07, "loss": 3.3875, "step": 114340 }, { "epoch": 4.925700994960589, "learning_rate": 7.23264883966472e-07, "loss": 3.5222, "step": 114360 }, { "epoch": 4.926562432700177, "learning_rate": 7.232164019909932e-07, "loss": 3.327, "step": 114380 }, { "epoch": 4.927423870439764, "learning_rate": 7.231679200155142e-07, "loss": 3.2285, "step": 114400 }, { "epoch": 4.928285308179351, "learning_rate": 7.231194380400352e-07, "loss": 3.2453, "step": 114420 }, { "epoch": 4.929146745918938, "learning_rate": 7.230709560645564e-07, "loss": 3.3993, "step": 114440 }, { "epoch": 4.930008183658526, "learning_rate": 7.230224740890776e-07, "loss": 3.4082, "step": 114460 }, { "epoch": 4.930869621398114, "learning_rate": 7.229739921135987e-07, "loss": 3.4327, "step": 114480 }, { "epoch": 4.931731059137701, "learning_rate": 7.229255101381197e-07, "loss": 3.3565, "step": 114500 }, { "epoch": 4.932592496877288, "learning_rate": 7.228770281626408e-07, "loss": 3.1696, "step": 114520 }, { "epoch": 4.9334539346168755, "learning_rate": 7.22828546187162e-07, "loss": 3.3463, "step": 114540 }, { "epoch": 4.934315372356463, "learning_rate": 7.22780064211683e-07, "loss": 3.3488, "step": 114560 }, { "epoch": 4.93517681009605, "learning_rate": 7.227315822362041e-07, "loss": 3.3541, "step": 114580 }, { "epoch": 4.936038247835638, "learning_rate": 7.226831002607253e-07, "loss": 3.255, "step": 114600 }, { "epoch": 4.936899685575225, "learning_rate": 7.226346182852465e-07, "loss": 3.399, "step": 114620 }, { "epoch": 4.937761123314813, "learning_rate": 7.225861363097674e-07, "loss": 3.5777, "step": 114640 }, { "epoch": 4.9386225610544, "learning_rate": 7.225376543342885e-07, "loss": 3.3915, "step": 114660 }, { "epoch": 4.939483998793987, "learning_rate": 7.224891723588097e-07, "loss": 3.3361, "step": 114680 }, { "epoch": 4.940345436533574, "learning_rate": 7.224406903833308e-07, "loss": 3.2038, "step": 114700 }, { "epoch": 4.941206874273162, "learning_rate": 7.223922084078518e-07, "loss": 3.4828, "step": 114720 }, { "epoch": 4.942068312012749, "learning_rate": 7.22343726432373e-07, "loss": 3.4013, "step": 114740 }, { "epoch": 4.942929749752337, "learning_rate": 7.222952444568942e-07, "loss": 3.4448, "step": 114760 }, { "epoch": 4.943791187491924, "learning_rate": 7.222467624814152e-07, "loss": 3.2879, "step": 114780 }, { "epoch": 4.9446526252315115, "learning_rate": 7.221982805059362e-07, "loss": 3.2429, "step": 114800 }, { "epoch": 4.945514062971099, "learning_rate": 7.221497985304574e-07, "loss": 3.3428, "step": 114820 }, { "epoch": 4.946375500710686, "learning_rate": 7.221013165549785e-07, "loss": 3.3784, "step": 114840 }, { "epoch": 4.947236938450273, "learning_rate": 7.220528345794996e-07, "loss": 3.3063, "step": 114860 }, { "epoch": 4.948098376189861, "learning_rate": 7.220043526040207e-07, "loss": 3.4032, "step": 114880 }, { "epoch": 4.948959813929449, "learning_rate": 7.219558706285418e-07, "loss": 3.3629, "step": 114900 }, { "epoch": 4.949821251669036, "learning_rate": 7.21907388653063e-07, "loss": 3.3282, "step": 114920 }, { "epoch": 4.950682689408623, "learning_rate": 7.21858906677584e-07, "loss": 3.2692, "step": 114940 }, { "epoch": 4.95154412714821, "learning_rate": 7.218104247021051e-07, "loss": 3.3157, "step": 114960 }, { "epoch": 4.952405564887798, "learning_rate": 7.217619427266263e-07, "loss": 3.3772, "step": 114980 }, { "epoch": 4.953267002627385, "learning_rate": 7.217134607511475e-07, "loss": 3.334, "step": 115000 }, { "epoch": 4.954128440366972, "learning_rate": 7.216649787756685e-07, "loss": 3.4466, "step": 115020 }, { "epoch": 4.95498987810656, "learning_rate": 7.216164968001895e-07, "loss": 3.3218, "step": 115040 }, { "epoch": 4.9558513158461475, "learning_rate": 7.215680148247107e-07, "loss": 3.2133, "step": 115060 }, { "epoch": 4.956712753585735, "learning_rate": 7.215195328492319e-07, "loss": 3.3602, "step": 115080 }, { "epoch": 4.957574191325322, "learning_rate": 7.214710508737529e-07, "loss": 3.3511, "step": 115100 }, { "epoch": 4.958435629064909, "learning_rate": 7.21422568898274e-07, "loss": 3.5421, "step": 115120 }, { "epoch": 4.9592970668044964, "learning_rate": 7.213740869227952e-07, "loss": 3.4279, "step": 115140 }, { "epoch": 4.960158504544084, "learning_rate": 7.213256049473163e-07, "loss": 3.2522, "step": 115160 }, { "epoch": 4.961019942283672, "learning_rate": 7.212771229718373e-07, "loss": 3.4242, "step": 115180 }, { "epoch": 4.961881380023259, "learning_rate": 7.212286409963584e-07, "loss": 3.3116, "step": 115200 }, { "epoch": 4.962742817762846, "learning_rate": 7.211801590208796e-07, "loss": 3.3607, "step": 115220 }, { "epoch": 4.963604255502434, "learning_rate": 7.211316770454006e-07, "loss": 3.3513, "step": 115240 }, { "epoch": 4.964465693242021, "learning_rate": 7.210831950699218e-07, "loss": 3.2433, "step": 115260 }, { "epoch": 4.965327130981608, "learning_rate": 7.210347130944428e-07, "loss": 3.2373, "step": 115280 }, { "epoch": 4.966188568721195, "learning_rate": 7.20986231118964e-07, "loss": 3.5493, "step": 115300 }, { "epoch": 4.967050006460783, "learning_rate": 7.20937749143485e-07, "loss": 3.2338, "step": 115320 }, { "epoch": 4.967911444200371, "learning_rate": 7.208892671680061e-07, "loss": 3.4842, "step": 115340 }, { "epoch": 4.968772881939958, "learning_rate": 7.208407851925273e-07, "loss": 3.2719, "step": 115360 }, { "epoch": 4.969634319679545, "learning_rate": 7.207923032170485e-07, "loss": 3.3858, "step": 115380 }, { "epoch": 4.970495757419132, "learning_rate": 7.207438212415695e-07, "loss": 3.2511, "step": 115400 }, { "epoch": 4.97135719515872, "learning_rate": 7.206953392660905e-07, "loss": 3.2809, "step": 115420 }, { "epoch": 4.972218632898307, "learning_rate": 7.206468572906117e-07, "loss": 3.453, "step": 115440 }, { "epoch": 4.973080070637895, "learning_rate": 7.205983753151329e-07, "loss": 3.3949, "step": 115460 }, { "epoch": 4.973941508377482, "learning_rate": 7.205498933396539e-07, "loss": 3.473, "step": 115480 }, { "epoch": 4.9748029461170695, "learning_rate": 7.20501411364175e-07, "loss": 3.3837, "step": 115500 }, { "epoch": 4.975664383856657, "learning_rate": 7.204529293886962e-07, "loss": 3.153, "step": 115520 }, { "epoch": 4.976525821596244, "learning_rate": 7.204044474132172e-07, "loss": 3.3058, "step": 115540 }, { "epoch": 4.977387259335831, "learning_rate": 7.203559654377383e-07, "loss": 3.1242, "step": 115560 }, { "epoch": 4.9782486970754185, "learning_rate": 7.203074834622594e-07, "loss": 3.2933, "step": 115580 }, { "epoch": 4.979110134815006, "learning_rate": 7.202590014867806e-07, "loss": 3.1943, "step": 115600 }, { "epoch": 4.979971572554594, "learning_rate": 7.202105195113017e-07, "loss": 3.2513, "step": 115620 }, { "epoch": 4.980833010294181, "learning_rate": 7.201620375358228e-07, "loss": 3.4284, "step": 115640 }, { "epoch": 4.981694448033768, "learning_rate": 7.201135555603439e-07, "loss": 3.3555, "step": 115660 }, { "epoch": 4.982555885773356, "learning_rate": 7.200650735848649e-07, "loss": 3.4221, "step": 115680 }, { "epoch": 4.983417323512943, "learning_rate": 7.200165916093861e-07, "loss": 3.3952, "step": 115700 }, { "epoch": 4.98427876125253, "learning_rate": 7.199681096339072e-07, "loss": 3.2709, "step": 115720 }, { "epoch": 4.985140198992118, "learning_rate": 7.199196276584283e-07, "loss": 3.309, "step": 115740 }, { "epoch": 4.9860016367317055, "learning_rate": 7.198711456829495e-07, "loss": 3.2481, "step": 115760 }, { "epoch": 4.986863074471293, "learning_rate": 7.198226637074705e-07, "loss": 3.462, "step": 115780 }, { "epoch": 4.98772451221088, "learning_rate": 7.197741817319915e-07, "loss": 3.2731, "step": 115800 }, { "epoch": 4.988585949950467, "learning_rate": 7.197256997565127e-07, "loss": 3.309, "step": 115820 }, { "epoch": 4.9894473876900545, "learning_rate": 7.196772177810338e-07, "loss": 3.2293, "step": 115840 }, { "epoch": 4.990308825429642, "learning_rate": 7.196287358055549e-07, "loss": 3.5308, "step": 115860 }, { "epoch": 4.991170263169229, "learning_rate": 7.19580253830076e-07, "loss": 3.4282, "step": 115880 }, { "epoch": 4.992031700908817, "learning_rate": 7.195317718545972e-07, "loss": 3.2874, "step": 115900 }, { "epoch": 4.992893138648404, "learning_rate": 7.194832898791182e-07, "loss": 3.2489, "step": 115920 }, { "epoch": 4.993754576387992, "learning_rate": 7.194348079036393e-07, "loss": 3.3017, "step": 115940 }, { "epoch": 4.994616014127579, "learning_rate": 7.193863259281604e-07, "loss": 3.54, "step": 115960 }, { "epoch": 4.995477451867166, "learning_rate": 7.193378439526816e-07, "loss": 3.5398, "step": 115980 }, { "epoch": 4.996338889606753, "learning_rate": 7.192893619772027e-07, "loss": 3.3978, "step": 116000 }, { "epoch": 4.9972003273463415, "learning_rate": 7.192408800017238e-07, "loss": 3.2752, "step": 116020 }, { "epoch": 4.998061765085929, "learning_rate": 7.191923980262449e-07, "loss": 3.2816, "step": 116040 }, { "epoch": 4.998923202825516, "learning_rate": 7.19143916050766e-07, "loss": 3.1059, "step": 116060 }, { "epoch": 4.999784640565103, "learning_rate": 7.190954340752871e-07, "loss": 3.4993, "step": 116080 }, { "epoch": 5.00064607830469, "learning_rate": 7.190469520998081e-07, "loss": 3.2164, "step": 116100 }, { "epoch": 5.001507516044278, "learning_rate": 7.189984701243293e-07, "loss": 3.1924, "step": 116120 }, { "epoch": 5.002368953783865, "learning_rate": 7.189499881488504e-07, "loss": 3.4916, "step": 116140 }, { "epoch": 5.003230391523453, "learning_rate": 7.189015061733716e-07, "loss": 3.3075, "step": 116160 }, { "epoch": 5.00409182926304, "learning_rate": 7.188530241978926e-07, "loss": 3.4159, "step": 116180 }, { "epoch": 5.0049532670026275, "learning_rate": 7.188045422224137e-07, "loss": 3.3504, "step": 116200 }, { "epoch": 5.005814704742215, "learning_rate": 7.187560602469348e-07, "loss": 3.1897, "step": 116220 }, { "epoch": 5.006676142481802, "learning_rate": 7.18707578271456e-07, "loss": 3.1461, "step": 116240 }, { "epoch": 5.007537580221389, "learning_rate": 7.186590962959771e-07, "loss": 3.3035, "step": 116260 }, { "epoch": 5.0083990179609765, "learning_rate": 7.186106143204982e-07, "loss": 3.0551, "step": 116280 }, { "epoch": 5.009260455700565, "learning_rate": 7.185621323450192e-07, "loss": 3.3047, "step": 116300 }, { "epoch": 5.010121893440152, "learning_rate": 7.185136503695403e-07, "loss": 3.2376, "step": 116320 }, { "epoch": 5.010983331179739, "learning_rate": 7.184651683940614e-07, "loss": 3.1463, "step": 116340 }, { "epoch": 5.011844768919326, "learning_rate": 7.184166864185826e-07, "loss": 3.3451, "step": 116360 }, { "epoch": 5.012706206658914, "learning_rate": 7.183682044431037e-07, "loss": 3.3481, "step": 116380 }, { "epoch": 5.013567644398501, "learning_rate": 7.183197224676248e-07, "loss": 3.3748, "step": 116400 }, { "epoch": 5.014429082138088, "learning_rate": 7.182712404921459e-07, "loss": 3.399, "step": 116420 }, { "epoch": 5.015290519877676, "learning_rate": 7.182227585166669e-07, "loss": 3.2716, "step": 116440 }, { "epoch": 5.0161519576172635, "learning_rate": 7.181742765411881e-07, "loss": 3.3551, "step": 116460 }, { "epoch": 5.017013395356851, "learning_rate": 7.181257945657092e-07, "loss": 3.2017, "step": 116480 }, { "epoch": 5.017874833096438, "learning_rate": 7.180773125902303e-07, "loss": 3.2237, "step": 116500 }, { "epoch": 5.018736270836025, "learning_rate": 7.180288306147514e-07, "loss": 3.333, "step": 116520 }, { "epoch": 5.0195977085756125, "learning_rate": 7.179803486392726e-07, "loss": 3.3354, "step": 116540 }, { "epoch": 5.0204591463152, "learning_rate": 7.179318666637936e-07, "loss": 3.2447, "step": 116560 }, { "epoch": 5.021320584054788, "learning_rate": 7.178833846883147e-07, "loss": 3.3293, "step": 116580 }, { "epoch": 5.022182021794375, "learning_rate": 7.178349027128358e-07, "loss": 3.2851, "step": 116600 }, { "epoch": 5.023043459533962, "learning_rate": 7.177864207373569e-07, "loss": 3.2433, "step": 116620 }, { "epoch": 5.02390489727355, "learning_rate": 7.177379387618781e-07, "loss": 3.4275, "step": 116640 }, { "epoch": 5.024766335013137, "learning_rate": 7.176894567863992e-07, "loss": 3.2474, "step": 116660 }, { "epoch": 5.025627772752724, "learning_rate": 7.176409748109202e-07, "loss": 3.1054, "step": 116680 }, { "epoch": 5.026489210492311, "learning_rate": 7.175924928354414e-07, "loss": 3.2145, "step": 116700 }, { "epoch": 5.0273506482318995, "learning_rate": 7.175440108599625e-07, "loss": 3.3423, "step": 116720 }, { "epoch": 5.028212085971487, "learning_rate": 7.174955288844835e-07, "loss": 3.2361, "step": 116740 }, { "epoch": 5.029073523711074, "learning_rate": 7.174470469090047e-07, "loss": 3.3735, "step": 116760 }, { "epoch": 5.029934961450661, "learning_rate": 7.173985649335259e-07, "loss": 3.3189, "step": 116780 }, { "epoch": 5.0307963991902485, "learning_rate": 7.17350082958047e-07, "loss": 3.1903, "step": 116800 }, { "epoch": 5.031657836929836, "learning_rate": 7.173016009825679e-07, "loss": 3.4081, "step": 116820 }, { "epoch": 5.032519274669423, "learning_rate": 7.172531190070891e-07, "loss": 3.3324, "step": 116840 }, { "epoch": 5.033380712409011, "learning_rate": 7.172046370316102e-07, "loss": 3.2918, "step": 116860 }, { "epoch": 5.034242150148598, "learning_rate": 7.171561550561313e-07, "loss": 3.4805, "step": 116880 }, { "epoch": 5.035103587888186, "learning_rate": 7.171076730806524e-07, "loss": 3.1995, "step": 116900 }, { "epoch": 5.035965025627773, "learning_rate": 7.170591911051736e-07, "loss": 3.4255, "step": 116920 }, { "epoch": 5.03682646336736, "learning_rate": 7.170107091296945e-07, "loss": 3.0694, "step": 116940 }, { "epoch": 5.037687901106947, "learning_rate": 7.169622271542157e-07, "loss": 3.4009, "step": 116960 }, { "epoch": 5.0385493388465346, "learning_rate": 7.169137451787368e-07, "loss": 3.3476, "step": 116980 }, { "epoch": 5.039410776586122, "learning_rate": 7.16865263203258e-07, "loss": 3.2149, "step": 117000 }, { "epoch": 5.04027221432571, "learning_rate": 7.168167812277791e-07, "loss": 3.2856, "step": 117020 }, { "epoch": 5.041133652065297, "learning_rate": 7.167682992523003e-07, "loss": 3.4699, "step": 117040 }, { "epoch": 5.041995089804884, "learning_rate": 7.167198172768212e-07, "loss": 3.3669, "step": 117060 }, { "epoch": 5.042856527544472, "learning_rate": 7.166713353013424e-07, "loss": 3.2544, "step": 117080 }, { "epoch": 5.043717965284059, "learning_rate": 7.166228533258635e-07, "loss": 3.4742, "step": 117100 }, { "epoch": 5.044579403023646, "learning_rate": 7.165743713503845e-07, "loss": 3.1644, "step": 117120 }, { "epoch": 5.045440840763233, "learning_rate": 7.165258893749057e-07, "loss": 3.2146, "step": 117140 }, { "epoch": 5.0463022785028215, "learning_rate": 7.164774073994269e-07, "loss": 3.2629, "step": 117160 }, { "epoch": 5.047163716242409, "learning_rate": 7.16428925423948e-07, "loss": 3.4091, "step": 117180 }, { "epoch": 5.048025153981996, "learning_rate": 7.163804434484689e-07, "loss": 3.3521, "step": 117200 }, { "epoch": 5.048886591721583, "learning_rate": 7.163319614729901e-07, "loss": 3.3163, "step": 117220 }, { "epoch": 5.0497480294611705, "learning_rate": 7.162834794975113e-07, "loss": 3.2432, "step": 117240 }, { "epoch": 5.050609467200758, "learning_rate": 7.162349975220324e-07, "loss": 3.3028, "step": 117260 }, { "epoch": 5.051470904940345, "learning_rate": 7.161865155465534e-07, "loss": 3.3965, "step": 117280 }, { "epoch": 5.052332342679933, "learning_rate": 7.161380335710746e-07, "loss": 3.3166, "step": 117300 }, { "epoch": 5.05319378041952, "learning_rate": 7.160895515955957e-07, "loss": 3.4106, "step": 117320 }, { "epoch": 5.054055218159108, "learning_rate": 7.160410696201167e-07, "loss": 3.4457, "step": 117340 }, { "epoch": 5.054916655898695, "learning_rate": 7.159925876446378e-07, "loss": 3.209, "step": 117360 }, { "epoch": 5.055778093638282, "learning_rate": 7.15944105669159e-07, "loss": 3.4142, "step": 117380 }, { "epoch": 5.056639531377869, "learning_rate": 7.158956236936801e-07, "loss": 3.3758, "step": 117400 }, { "epoch": 5.057500969117457, "learning_rate": 7.158471417182011e-07, "loss": 3.4906, "step": 117420 }, { "epoch": 5.058362406857045, "learning_rate": 7.157986597427223e-07, "loss": 3.3155, "step": 117440 }, { "epoch": 5.059223844596632, "learning_rate": 7.157501777672433e-07, "loss": 3.2834, "step": 117460 }, { "epoch": 5.060085282336219, "learning_rate": 7.157016957917645e-07, "loss": 3.2455, "step": 117480 }, { "epoch": 5.0609467200758065, "learning_rate": 7.156532138162855e-07, "loss": 3.2418, "step": 117500 }, { "epoch": 5.061808157815394, "learning_rate": 7.156047318408067e-07, "loss": 3.369, "step": 117520 }, { "epoch": 5.062669595554981, "learning_rate": 7.155562498653279e-07, "loss": 3.35, "step": 117540 }, { "epoch": 5.063531033294568, "learning_rate": 7.15507767889849e-07, "loss": 3.3948, "step": 117560 }, { "epoch": 5.064392471034156, "learning_rate": 7.154592859143699e-07, "loss": 3.1273, "step": 117580 }, { "epoch": 5.065253908773744, "learning_rate": 7.154108039388911e-07, "loss": 3.3413, "step": 117600 }, { "epoch": 5.066115346513331, "learning_rate": 7.153623219634123e-07, "loss": 3.2372, "step": 117620 }, { "epoch": 5.066976784252918, "learning_rate": 7.153138399879334e-07, "loss": 3.216, "step": 117640 }, { "epoch": 5.067838221992505, "learning_rate": 7.152653580124544e-07, "loss": 3.141, "step": 117660 }, { "epoch": 5.068699659732093, "learning_rate": 7.152168760369756e-07, "loss": 3.3666, "step": 117680 }, { "epoch": 5.06956109747168, "learning_rate": 7.151683940614967e-07, "loss": 3.2647, "step": 117700 }, { "epoch": 5.070422535211268, "learning_rate": 7.151199120860177e-07, "loss": 3.2725, "step": 117720 }, { "epoch": 5.071283972950855, "learning_rate": 7.150714301105388e-07, "loss": 3.2857, "step": 117740 }, { "epoch": 5.0721454106904424, "learning_rate": 7.1502294813506e-07, "loss": 3.3427, "step": 117760 }, { "epoch": 5.07300684843003, "learning_rate": 7.149744661595812e-07, "loss": 3.4027, "step": 117780 }, { "epoch": 5.073868286169617, "learning_rate": 7.149259841841022e-07, "loss": 3.2014, "step": 117800 }, { "epoch": 5.074729723909204, "learning_rate": 7.148775022086233e-07, "loss": 3.3583, "step": 117820 }, { "epoch": 5.075591161648791, "learning_rate": 7.148290202331444e-07, "loss": 3.1608, "step": 117840 }, { "epoch": 5.07645259938838, "learning_rate": 7.147805382576656e-07, "loss": 3.1793, "step": 117860 }, { "epoch": 5.077314037127967, "learning_rate": 7.147320562821865e-07, "loss": 3.2232, "step": 117880 }, { "epoch": 5.078175474867554, "learning_rate": 7.146835743067077e-07, "loss": 3.3778, "step": 117900 }, { "epoch": 5.079036912607141, "learning_rate": 7.146350923312289e-07, "loss": 3.4455, "step": 117920 }, { "epoch": 5.0798983503467285, "learning_rate": 7.1458661035575e-07, "loss": 3.2943, "step": 117940 }, { "epoch": 5.080759788086316, "learning_rate": 7.145381283802709e-07, "loss": 3.3868, "step": 117960 }, { "epoch": 5.081621225825903, "learning_rate": 7.144896464047921e-07, "loss": 3.2209, "step": 117980 }, { "epoch": 5.082482663565491, "learning_rate": 7.144411644293133e-07, "loss": 3.1395, "step": 118000 }, { "epoch": 5.083344101305078, "learning_rate": 7.143926824538343e-07, "loss": 3.4147, "step": 118020 }, { "epoch": 5.084205539044666, "learning_rate": 7.143442004783554e-07, "loss": 3.372, "step": 118040 }, { "epoch": 5.085066976784253, "learning_rate": 7.142957185028766e-07, "loss": 3.3439, "step": 118060 }, { "epoch": 5.08592841452384, "learning_rate": 7.142472365273977e-07, "loss": 3.4245, "step": 118080 }, { "epoch": 5.086789852263427, "learning_rate": 7.141987545519187e-07, "loss": 3.2875, "step": 118100 }, { "epoch": 5.087651290003015, "learning_rate": 7.141502725764398e-07, "loss": 3.3443, "step": 118120 }, { "epoch": 5.088512727742603, "learning_rate": 7.14101790600961e-07, "loss": 3.1295, "step": 118140 }, { "epoch": 5.08937416548219, "learning_rate": 7.140533086254822e-07, "loss": 3.3251, "step": 118160 }, { "epoch": 5.090235603221777, "learning_rate": 7.140048266500032e-07, "loss": 3.5161, "step": 118180 }, { "epoch": 5.0910970409613645, "learning_rate": 7.139563446745243e-07, "loss": 3.2926, "step": 118200 }, { "epoch": 5.091958478700952, "learning_rate": 7.139078626990454e-07, "loss": 3.4842, "step": 118220 }, { "epoch": 5.092819916440539, "learning_rate": 7.138593807235666e-07, "loss": 3.2159, "step": 118240 }, { "epoch": 5.093681354180126, "learning_rate": 7.138108987480876e-07, "loss": 3.2224, "step": 118260 }, { "epoch": 5.094542791919714, "learning_rate": 7.137624167726087e-07, "loss": 3.3001, "step": 118280 }, { "epoch": 5.095404229659302, "learning_rate": 7.137139347971299e-07, "loss": 3.1671, "step": 118300 }, { "epoch": 5.096265667398889, "learning_rate": 7.13665452821651e-07, "loss": 3.2863, "step": 118320 }, { "epoch": 5.097127105138476, "learning_rate": 7.13616970846172e-07, "loss": 3.4125, "step": 118340 }, { "epoch": 5.097988542878063, "learning_rate": 7.135684888706931e-07, "loss": 3.1658, "step": 118360 }, { "epoch": 5.098849980617651, "learning_rate": 7.135200068952143e-07, "loss": 3.4096, "step": 118380 }, { "epoch": 5.099711418357238, "learning_rate": 7.134715249197353e-07, "loss": 3.1757, "step": 118400 }, { "epoch": 5.100572856096826, "learning_rate": 7.134230429442565e-07, "loss": 3.4005, "step": 118420 }, { "epoch": 5.101434293836413, "learning_rate": 7.133745609687776e-07, "loss": 3.4536, "step": 118440 }, { "epoch": 5.1022957315760005, "learning_rate": 7.133260789932987e-07, "loss": 3.3017, "step": 118460 }, { "epoch": 5.103157169315588, "learning_rate": 7.132775970178197e-07, "loss": 3.2372, "step": 118480 }, { "epoch": 5.104018607055175, "learning_rate": 7.132291150423408e-07, "loss": 3.1772, "step": 118500 }, { "epoch": 5.104880044794762, "learning_rate": 7.13180633066862e-07, "loss": 3.141, "step": 118520 }, { "epoch": 5.1057414825343495, "learning_rate": 7.131321510913832e-07, "loss": 3.2798, "step": 118540 }, { "epoch": 5.106602920273938, "learning_rate": 7.130836691159042e-07, "loss": 3.2898, "step": 118560 }, { "epoch": 5.107464358013525, "learning_rate": 7.130351871404253e-07, "loss": 3.4141, "step": 118580 }, { "epoch": 5.108325795753112, "learning_rate": 7.129867051649464e-07, "loss": 3.2897, "step": 118600 }, { "epoch": 5.109187233492699, "learning_rate": 7.129382231894675e-07, "loss": 3.34, "step": 118620 }, { "epoch": 5.110048671232287, "learning_rate": 7.128897412139886e-07, "loss": 3.3275, "step": 118640 }, { "epoch": 5.110910108971874, "learning_rate": 7.128412592385097e-07, "loss": 3.3727, "step": 118660 }, { "epoch": 5.111771546711461, "learning_rate": 7.127927772630309e-07, "loss": 3.0476, "step": 118680 }, { "epoch": 5.112632984451049, "learning_rate": 7.12744295287552e-07, "loss": 3.1781, "step": 118700 }, { "epoch": 5.113494422190636, "learning_rate": 7.126958133120729e-07, "loss": 3.3102, "step": 118720 }, { "epoch": 5.114355859930224, "learning_rate": 7.126473313365941e-07, "loss": 3.4757, "step": 118740 }, { "epoch": 5.115217297669811, "learning_rate": 7.125988493611153e-07, "loss": 3.3084, "step": 118760 }, { "epoch": 5.116078735409398, "learning_rate": 7.125503673856364e-07, "loss": 3.3996, "step": 118780 }, { "epoch": 5.116940173148985, "learning_rate": 7.125018854101575e-07, "loss": 3.447, "step": 118800 }, { "epoch": 5.117801610888573, "learning_rate": 7.124534034346787e-07, "loss": 3.2432, "step": 118820 }, { "epoch": 5.118663048628161, "learning_rate": 7.124049214591997e-07, "loss": 3.2017, "step": 118840 }, { "epoch": 5.119524486367748, "learning_rate": 7.123564394837208e-07, "loss": 3.1725, "step": 118860 }, { "epoch": 5.120385924107335, "learning_rate": 7.123079575082419e-07, "loss": 3.2998, "step": 118880 }, { "epoch": 5.1212473618469225, "learning_rate": 7.12259475532763e-07, "loss": 3.179, "step": 118900 }, { "epoch": 5.12210879958651, "learning_rate": 7.122109935572842e-07, "loss": 3.1106, "step": 118920 }, { "epoch": 5.122970237326097, "learning_rate": 7.121625115818053e-07, "loss": 3.1492, "step": 118940 }, { "epoch": 5.123831675065684, "learning_rate": 7.121140296063264e-07, "loss": 3.2879, "step": 118960 }, { "epoch": 5.124693112805272, "learning_rate": 7.120655476308474e-07, "loss": 3.3552, "step": 118980 }, { "epoch": 5.12555455054486, "learning_rate": 7.120170656553685e-07, "loss": 3.1907, "step": 119000 }, { "epoch": 5.126415988284447, "learning_rate": 7.119685836798896e-07, "loss": 3.2938, "step": 119020 }, { "epoch": 5.127277426024034, "learning_rate": 7.119201017044107e-07, "loss": 3.3589, "step": 119040 }, { "epoch": 5.128138863763621, "learning_rate": 7.118716197289319e-07, "loss": 3.1688, "step": 119060 }, { "epoch": 5.129000301503209, "learning_rate": 7.11823137753453e-07, "loss": 3.2452, "step": 119080 }, { "epoch": 5.129861739242796, "learning_rate": 7.11774655777974e-07, "loss": 3.3892, "step": 119100 }, { "epoch": 5.130723176982384, "learning_rate": 7.117261738024951e-07, "loss": 3.425, "step": 119120 }, { "epoch": 5.131584614721971, "learning_rate": 7.116776918270163e-07, "loss": 3.2473, "step": 119140 }, { "epoch": 5.1324460524615585, "learning_rate": 7.116292098515374e-07, "loss": 3.3399, "step": 119160 }, { "epoch": 5.133307490201146, "learning_rate": 7.115807278760585e-07, "loss": 3.4017, "step": 119180 }, { "epoch": 5.134168927940733, "learning_rate": 7.115322459005796e-07, "loss": 3.1981, "step": 119200 }, { "epoch": 5.13503036568032, "learning_rate": 7.114837639251008e-07, "loss": 3.248, "step": 119220 }, { "epoch": 5.1358918034199075, "learning_rate": 7.114352819496217e-07, "loss": 3.2817, "step": 119240 }, { "epoch": 5.136753241159496, "learning_rate": 7.113867999741429e-07, "loss": 3.3869, "step": 119260 }, { "epoch": 5.137614678899083, "learning_rate": 7.11338317998664e-07, "loss": 3.408, "step": 119280 }, { "epoch": 5.13847611663867, "learning_rate": 7.112898360231851e-07, "loss": 3.162, "step": 119300 }, { "epoch": 5.139337554378257, "learning_rate": 7.112413540477063e-07, "loss": 3.2604, "step": 119320 }, { "epoch": 5.140198992117845, "learning_rate": 7.111928720722274e-07, "loss": 3.176, "step": 119340 }, { "epoch": 5.141060429857432, "learning_rate": 7.111443900967484e-07, "loss": 3.3473, "step": 119360 }, { "epoch": 5.141921867597019, "learning_rate": 7.110959081212695e-07, "loss": 3.199, "step": 119380 }, { "epoch": 5.142783305336607, "learning_rate": 7.110474261457907e-07, "loss": 3.3073, "step": 119400 }, { "epoch": 5.1436447430761945, "learning_rate": 7.109989441703118e-07, "loss": 3.3468, "step": 119420 }, { "epoch": 5.144506180815782, "learning_rate": 7.109504621948329e-07, "loss": 3.2473, "step": 119440 }, { "epoch": 5.145367618555369, "learning_rate": 7.10901980219354e-07, "loss": 3.2101, "step": 119460 }, { "epoch": 5.146229056294956, "learning_rate": 7.108534982438751e-07, "loss": 3.329, "step": 119480 }, { "epoch": 5.147090494034543, "learning_rate": 7.108050162683962e-07, "loss": 3.2842, "step": 119500 }, { "epoch": 5.147951931774131, "learning_rate": 7.107565342929172e-07, "loss": 3.225, "step": 119520 }, { "epoch": 5.148813369513719, "learning_rate": 7.107080523174384e-07, "loss": 3.3703, "step": 119540 }, { "epoch": 5.149674807253306, "learning_rate": 7.106595703419595e-07, "loss": 3.2941, "step": 119560 }, { "epoch": 5.150536244992893, "learning_rate": 7.106110883664806e-07, "loss": 3.0823, "step": 119580 }, { "epoch": 5.1513976827324806, "learning_rate": 7.105626063910017e-07, "loss": 3.1861, "step": 119600 }, { "epoch": 5.152259120472068, "learning_rate": 7.105141244155228e-07, "loss": 3.2413, "step": 119620 }, { "epoch": 5.153120558211655, "learning_rate": 7.104656424400439e-07, "loss": 3.289, "step": 119640 }, { "epoch": 5.153981995951242, "learning_rate": 7.10417160464565e-07, "loss": 3.1453, "step": 119660 }, { "epoch": 5.15484343369083, "learning_rate": 7.103686784890861e-07, "loss": 3.3127, "step": 119680 }, { "epoch": 5.155704871430418, "learning_rate": 7.103201965136073e-07, "loss": 3.253, "step": 119700 }, { "epoch": 5.156566309170005, "learning_rate": 7.102717145381284e-07, "loss": 3.4088, "step": 119720 }, { "epoch": 5.157427746909592, "learning_rate": 7.102232325626494e-07, "loss": 3.1496, "step": 119740 }, { "epoch": 5.158289184649179, "learning_rate": 7.101747505871705e-07, "loss": 3.1972, "step": 119760 }, { "epoch": 5.159150622388767, "learning_rate": 7.101262686116917e-07, "loss": 3.3144, "step": 119780 }, { "epoch": 5.160012060128354, "learning_rate": 7.100777866362128e-07, "loss": 3.2915, "step": 119800 }, { "epoch": 5.160873497867941, "learning_rate": 7.100293046607338e-07, "loss": 3.2987, "step": 119820 }, { "epoch": 5.161734935607529, "learning_rate": 7.09980822685255e-07, "loss": 3.3093, "step": 119840 }, { "epoch": 5.1625963733471165, "learning_rate": 7.099323407097761e-07, "loss": 3.0692, "step": 119860 }, { "epoch": 5.163457811086704, "learning_rate": 7.098838587342972e-07, "loss": 3.2909, "step": 119880 }, { "epoch": 5.164319248826291, "learning_rate": 7.098353767588182e-07, "loss": 3.1687, "step": 119900 }, { "epoch": 5.165180686565878, "learning_rate": 7.097868947833394e-07, "loss": 3.2103, "step": 119920 }, { "epoch": 5.1660421243054655, "learning_rate": 7.097384128078606e-07, "loss": 3.1913, "step": 119940 }, { "epoch": 5.166903562045054, "learning_rate": 7.096899308323817e-07, "loss": 3.2558, "step": 119960 }, { "epoch": 5.167764999784641, "learning_rate": 7.096414488569027e-07, "loss": 3.4224, "step": 119980 }, { "epoch": 5.168626437524228, "learning_rate": 7.095929668814238e-07, "loss": 3.4945, "step": 120000 }, { "epoch": 5.169487875263815, "learning_rate": 7.09544484905945e-07, "loss": 3.4162, "step": 120020 }, { "epoch": 5.170349313003403, "learning_rate": 7.094960029304661e-07, "loss": 3.1287, "step": 120040 }, { "epoch": 5.17121075074299, "learning_rate": 7.094475209549871e-07, "loss": 3.2619, "step": 120060 }, { "epoch": 5.172072188482577, "learning_rate": 7.093990389795083e-07, "loss": 3.351, "step": 120080 }, { "epoch": 5.172933626222164, "learning_rate": 7.093505570040294e-07, "loss": 3.2758, "step": 120100 }, { "epoch": 5.1737950639617525, "learning_rate": 7.093020750285503e-07, "loss": 3.3792, "step": 120120 }, { "epoch": 5.17465650170134, "learning_rate": 7.092535930530715e-07, "loss": 3.449, "step": 120140 }, { "epoch": 5.175517939440927, "learning_rate": 7.092051110775927e-07, "loss": 3.3339, "step": 120160 }, { "epoch": 5.176379377180514, "learning_rate": 7.091566291021138e-07, "loss": 3.4083, "step": 120180 }, { "epoch": 5.1772408149201015, "learning_rate": 7.091081471266348e-07, "loss": 3.2743, "step": 120200 }, { "epoch": 5.178102252659689, "learning_rate": 7.09059665151156e-07, "loss": 3.2275, "step": 120220 }, { "epoch": 5.178963690399277, "learning_rate": 7.090111831756771e-07, "loss": 3.2897, "step": 120240 }, { "epoch": 5.179825128138864, "learning_rate": 7.089627012001982e-07, "loss": 3.3508, "step": 120260 }, { "epoch": 5.180686565878451, "learning_rate": 7.089142192247192e-07, "loss": 3.2463, "step": 120280 }, { "epoch": 5.181548003618039, "learning_rate": 7.088657372492404e-07, "loss": 3.1163, "step": 120300 }, { "epoch": 5.182409441357626, "learning_rate": 7.088172552737616e-07, "loss": 3.2874, "step": 120320 }, { "epoch": 5.183270879097213, "learning_rate": 7.087687732982827e-07, "loss": 3.2875, "step": 120340 }, { "epoch": 5.1841323168368, "learning_rate": 7.087202913228037e-07, "loss": 3.3629, "step": 120360 }, { "epoch": 5.184993754576388, "learning_rate": 7.086718093473248e-07, "loss": 3.1691, "step": 120380 }, { "epoch": 5.185855192315976, "learning_rate": 7.08623327371846e-07, "loss": 3.2311, "step": 120400 }, { "epoch": 5.186716630055563, "learning_rate": 7.085748453963671e-07, "loss": 3.1645, "step": 120420 }, { "epoch": 5.18757806779515, "learning_rate": 7.085263634208881e-07, "loss": 3.1069, "step": 120440 }, { "epoch": 5.188439505534737, "learning_rate": 7.084778814454093e-07, "loss": 3.3347, "step": 120460 }, { "epoch": 5.189300943274325, "learning_rate": 7.084293994699305e-07, "loss": 3.2396, "step": 120480 }, { "epoch": 5.190162381013912, "learning_rate": 7.083809174944513e-07, "loss": 3.3069, "step": 120500 }, { "epoch": 5.191023818753499, "learning_rate": 7.083324355189725e-07, "loss": 3.3206, "step": 120520 }, { "epoch": 5.191885256493087, "learning_rate": 7.082839535434937e-07, "loss": 3.3677, "step": 120540 }, { "epoch": 5.1927466942326745, "learning_rate": 7.082354715680149e-07, "loss": 3.3377, "step": 120560 }, { "epoch": 5.193608131972262, "learning_rate": 7.081869895925359e-07, "loss": 3.3011, "step": 120580 }, { "epoch": 5.194469569711849, "learning_rate": 7.081385076170571e-07, "loss": 3.207, "step": 120600 }, { "epoch": 5.195331007451436, "learning_rate": 7.080900256415781e-07, "loss": 3.1702, "step": 120620 }, { "epoch": 5.1961924451910235, "learning_rate": 7.080415436660992e-07, "loss": 3.2638, "step": 120640 }, { "epoch": 5.197053882930611, "learning_rate": 7.079930616906202e-07, "loss": 3.2216, "step": 120660 }, { "epoch": 5.197915320670199, "learning_rate": 7.079445797151414e-07, "loss": 3.1858, "step": 120680 }, { "epoch": 5.198776758409786, "learning_rate": 7.078960977396626e-07, "loss": 3.3262, "step": 120700 }, { "epoch": 5.199638196149373, "learning_rate": 7.078476157641837e-07, "loss": 3.1264, "step": 120720 }, { "epoch": 5.200499633888961, "learning_rate": 7.077991337887047e-07, "loss": 3.15, "step": 120740 }, { "epoch": 5.201361071628548, "learning_rate": 7.077506518132258e-07, "loss": 3.2252, "step": 120760 }, { "epoch": 5.202222509368135, "learning_rate": 7.07702169837747e-07, "loss": 3.3022, "step": 120780 }, { "epoch": 5.203083947107722, "learning_rate": 7.07653687862268e-07, "loss": 3.1993, "step": 120800 }, { "epoch": 5.2039453848473105, "learning_rate": 7.076052058867891e-07, "loss": 3.17, "step": 120820 }, { "epoch": 5.204806822586898, "learning_rate": 7.075567239113103e-07, "loss": 3.1966, "step": 120840 }, { "epoch": 5.205668260326485, "learning_rate": 7.075082419358315e-07, "loss": 3.2244, "step": 120860 }, { "epoch": 5.206529698066072, "learning_rate": 7.074597599603524e-07, "loss": 3.2044, "step": 120880 }, { "epoch": 5.2073911358056595, "learning_rate": 7.074112779848735e-07, "loss": 3.4288, "step": 120900 }, { "epoch": 5.208252573545247, "learning_rate": 7.073627960093947e-07, "loss": 3.2562, "step": 120920 }, { "epoch": 5.209114011284834, "learning_rate": 7.073143140339159e-07, "loss": 3.3652, "step": 120940 }, { "epoch": 5.209975449024422, "learning_rate": 7.072658320584369e-07, "loss": 3.2845, "step": 120960 }, { "epoch": 5.210836886764009, "learning_rate": 7.07217350082958e-07, "loss": 3.1904, "step": 120980 }, { "epoch": 5.211698324503597, "learning_rate": 7.071688681074792e-07, "loss": 3.4221, "step": 121000 }, { "epoch": 5.212559762243184, "learning_rate": 7.071203861320002e-07, "loss": 3.1999, "step": 121020 }, { "epoch": 5.213421199982771, "learning_rate": 7.070719041565213e-07, "loss": 3.1014, "step": 121040 }, { "epoch": 5.214282637722358, "learning_rate": 7.070234221810424e-07, "loss": 3.2871, "step": 121060 }, { "epoch": 5.215144075461946, "learning_rate": 7.069749402055636e-07, "loss": 3.2395, "step": 121080 }, { "epoch": 5.216005513201534, "learning_rate": 7.069264582300848e-07, "loss": 3.1326, "step": 121100 }, { "epoch": 5.216866950941121, "learning_rate": 7.068779762546058e-07, "loss": 3.1206, "step": 121120 }, { "epoch": 5.217728388680708, "learning_rate": 7.068294942791268e-07, "loss": 3.368, "step": 121140 }, { "epoch": 5.2185898264202955, "learning_rate": 7.06781012303648e-07, "loss": 3.2948, "step": 121160 }, { "epoch": 5.219451264159883, "learning_rate": 7.06732530328169e-07, "loss": 3.284, "step": 121180 }, { "epoch": 5.22031270189947, "learning_rate": 7.066840483526901e-07, "loss": 3.4241, "step": 121200 }, { "epoch": 5.221174139639057, "learning_rate": 7.066355663772113e-07, "loss": 3.1637, "step": 121220 }, { "epoch": 5.222035577378645, "learning_rate": 7.065870844017325e-07, "loss": 3.2195, "step": 121240 }, { "epoch": 5.222897015118233, "learning_rate": 7.065386024262534e-07, "loss": 3.208, "step": 121260 }, { "epoch": 5.22375845285782, "learning_rate": 7.064901204507745e-07, "loss": 3.3566, "step": 121280 }, { "epoch": 5.224619890597407, "learning_rate": 7.064416384752957e-07, "loss": 3.3725, "step": 121300 }, { "epoch": 5.225481328336994, "learning_rate": 7.063931564998169e-07, "loss": 3.3575, "step": 121320 }, { "epoch": 5.2263427660765815, "learning_rate": 7.063446745243379e-07, "loss": 3.3201, "step": 121340 }, { "epoch": 5.227204203816169, "learning_rate": 7.06296192548859e-07, "loss": 3.2648, "step": 121360 }, { "epoch": 5.228065641555757, "learning_rate": 7.062477105733802e-07, "loss": 3.186, "step": 121380 }, { "epoch": 5.228927079295344, "learning_rate": 7.061992285979013e-07, "loss": 3.3726, "step": 121400 }, { "epoch": 5.229788517034931, "learning_rate": 7.061507466224223e-07, "loss": 3.2674, "step": 121420 }, { "epoch": 5.230649954774519, "learning_rate": 7.061022646469434e-07, "loss": 3.3961, "step": 121440 }, { "epoch": 5.231511392514106, "learning_rate": 7.060537826714646e-07, "loss": 3.0819, "step": 121460 }, { "epoch": 5.232372830253693, "learning_rate": 7.060053006959857e-07, "loss": 3.3016, "step": 121480 }, { "epoch": 5.23323426799328, "learning_rate": 7.059568187205068e-07, "loss": 3.2763, "step": 121500 }, { "epoch": 5.2340957057328685, "learning_rate": 7.059083367450278e-07, "loss": 3.2682, "step": 121520 }, { "epoch": 5.234957143472456, "learning_rate": 7.05859854769549e-07, "loss": 3.3309, "step": 121540 }, { "epoch": 5.235818581212043, "learning_rate": 7.058113727940701e-07, "loss": 3.2571, "step": 121560 }, { "epoch": 5.23668001895163, "learning_rate": 7.057628908185912e-07, "loss": 3.2318, "step": 121580 }, { "epoch": 5.2375414566912175, "learning_rate": 7.057144088431123e-07, "loss": 3.1048, "step": 121600 }, { "epoch": 5.238402894430805, "learning_rate": 7.056659268676335e-07, "loss": 3.2561, "step": 121620 }, { "epoch": 5.239264332170392, "learning_rate": 7.056174448921545e-07, "loss": 3.3285, "step": 121640 }, { "epoch": 5.24012576990998, "learning_rate": 7.055689629166756e-07, "loss": 3.1627, "step": 121660 }, { "epoch": 5.240987207649567, "learning_rate": 7.055204809411967e-07, "loss": 3.2619, "step": 121680 }, { "epoch": 5.241848645389155, "learning_rate": 7.054719989657179e-07, "loss": 3.1182, "step": 121700 }, { "epoch": 5.242710083128742, "learning_rate": 7.054235169902389e-07, "loss": 3.2971, "step": 121720 }, { "epoch": 5.243571520868329, "learning_rate": 7.0537503501476e-07, "loss": 3.2285, "step": 121740 }, { "epoch": 5.244432958607916, "learning_rate": 7.053265530392812e-07, "loss": 3.297, "step": 121760 }, { "epoch": 5.245294396347504, "learning_rate": 7.052780710638022e-07, "loss": 3.1897, "step": 121780 }, { "epoch": 5.246155834087092, "learning_rate": 7.052295890883233e-07, "loss": 3.4053, "step": 121800 }, { "epoch": 5.247017271826679, "learning_rate": 7.051811071128444e-07, "loss": 3.2536, "step": 121820 }, { "epoch": 5.247878709566266, "learning_rate": 7.051326251373656e-07, "loss": 3.2782, "step": 121840 }, { "epoch": 5.2487401473058535, "learning_rate": 7.050841431618867e-07, "loss": 3.3838, "step": 121860 }, { "epoch": 5.249601585045441, "learning_rate": 7.050356611864078e-07, "loss": 3.2346, "step": 121880 }, { "epoch": 5.250463022785028, "learning_rate": 7.049871792109288e-07, "loss": 3.37, "step": 121900 }, { "epoch": 5.251324460524615, "learning_rate": 7.0493869723545e-07, "loss": 3.2603, "step": 121920 }, { "epoch": 5.252185898264203, "learning_rate": 7.048902152599711e-07, "loss": 3.1746, "step": 121940 }, { "epoch": 5.253047336003791, "learning_rate": 7.048417332844922e-07, "loss": 3.1348, "step": 121960 }, { "epoch": 5.253908773743378, "learning_rate": 7.047932513090133e-07, "loss": 3.409, "step": 121980 }, { "epoch": 5.254770211482965, "learning_rate": 7.047447693335345e-07, "loss": 3.1956, "step": 122000 }, { "epoch": 5.255631649222552, "learning_rate": 7.046962873580555e-07, "loss": 3.2303, "step": 122020 }, { "epoch": 5.25649308696214, "learning_rate": 7.046478053825766e-07, "loss": 3.3265, "step": 122040 }, { "epoch": 5.257354524701727, "learning_rate": 7.045993234070977e-07, "loss": 3.3418, "step": 122060 }, { "epoch": 5.258215962441315, "learning_rate": 7.045508414316188e-07, "loss": 3.3365, "step": 122080 }, { "epoch": 5.259077400180902, "learning_rate": 7.0450235945614e-07, "loss": 3.4789, "step": 122100 }, { "epoch": 5.259938837920489, "learning_rate": 7.044538774806611e-07, "loss": 3.1382, "step": 122120 }, { "epoch": 5.260800275660077, "learning_rate": 7.044053955051822e-07, "loss": 3.4152, "step": 122140 }, { "epoch": 5.261661713399664, "learning_rate": 7.043569135297032e-07, "loss": 3.4156, "step": 122160 }, { "epoch": 5.262523151139251, "learning_rate": 7.043084315542244e-07, "loss": 3.3359, "step": 122180 }, { "epoch": 5.263384588878838, "learning_rate": 7.042599495787455e-07, "loss": 3.2836, "step": 122200 }, { "epoch": 5.2642460266184266, "learning_rate": 7.042114676032666e-07, "loss": 3.3352, "step": 122220 }, { "epoch": 5.265107464358014, "learning_rate": 7.041629856277877e-07, "loss": 3.1764, "step": 122240 }, { "epoch": 5.265968902097601, "learning_rate": 7.041145036523088e-07, "loss": 3.2883, "step": 122260 }, { "epoch": 5.266830339837188, "learning_rate": 7.040660216768298e-07, "loss": 3.399, "step": 122280 }, { "epoch": 5.2676917775767755, "learning_rate": 7.04017539701351e-07, "loss": 3.263, "step": 122300 }, { "epoch": 5.268553215316363, "learning_rate": 7.039690577258721e-07, "loss": 3.1207, "step": 122320 }, { "epoch": 5.26941465305595, "learning_rate": 7.039205757503932e-07, "loss": 3.1844, "step": 122340 }, { "epoch": 5.270276090795538, "learning_rate": 7.038720937749143e-07, "loss": 3.1571, "step": 122360 }, { "epoch": 5.271137528535125, "learning_rate": 7.038236117994356e-07, "loss": 3.1762, "step": 122380 }, { "epoch": 5.271998966274713, "learning_rate": 7.037751298239565e-07, "loss": 3.2898, "step": 122400 }, { "epoch": 5.2728604040143, "learning_rate": 7.037266478484776e-07, "loss": 3.4552, "step": 122420 }, { "epoch": 5.273721841753887, "learning_rate": 7.036781658729987e-07, "loss": 3.274, "step": 122440 }, { "epoch": 5.274583279493474, "learning_rate": 7.036296838975198e-07, "loss": 3.2178, "step": 122460 }, { "epoch": 5.275444717233062, "learning_rate": 7.03581201922041e-07, "loss": 3.0688, "step": 122480 }, { "epoch": 5.27630615497265, "learning_rate": 7.035327199465621e-07, "loss": 3.1846, "step": 122500 }, { "epoch": 5.277167592712237, "learning_rate": 7.034842379710832e-07, "loss": 3.2659, "step": 122520 }, { "epoch": 5.278029030451824, "learning_rate": 7.034357559956042e-07, "loss": 3.2669, "step": 122540 }, { "epoch": 5.2788904681914115, "learning_rate": 7.033872740201254e-07, "loss": 3.0736, "step": 122560 }, { "epoch": 5.279751905930999, "learning_rate": 7.033387920446465e-07, "loss": 3.2083, "step": 122580 }, { "epoch": 5.280613343670586, "learning_rate": 7.032903100691676e-07, "loss": 3.1391, "step": 122600 }, { "epoch": 5.281474781410173, "learning_rate": 7.032418280936887e-07, "loss": 3.4247, "step": 122620 }, { "epoch": 5.2823362191497605, "learning_rate": 7.031933461182099e-07, "loss": 3.1499, "step": 122640 }, { "epoch": 5.283197656889349, "learning_rate": 7.031448641427309e-07, "loss": 3.2581, "step": 122660 }, { "epoch": 5.284059094628936, "learning_rate": 7.030963821672519e-07, "loss": 3.0979, "step": 122680 }, { "epoch": 5.284920532368523, "learning_rate": 7.030479001917731e-07, "loss": 3.0472, "step": 122700 }, { "epoch": 5.28578197010811, "learning_rate": 7.029994182162943e-07, "loss": 3.2276, "step": 122720 }, { "epoch": 5.286643407847698, "learning_rate": 7.029509362408154e-07, "loss": 3.2683, "step": 122740 }, { "epoch": 5.287504845587285, "learning_rate": 7.029024542653364e-07, "loss": 3.1784, "step": 122760 }, { "epoch": 5.288366283326873, "learning_rate": 7.028539722898576e-07, "loss": 3.3002, "step": 122780 }, { "epoch": 5.28922772106646, "learning_rate": 7.028054903143786e-07, "loss": 3.1269, "step": 122800 }, { "epoch": 5.2900891588060475, "learning_rate": 7.027570083388997e-07, "loss": 3.1993, "step": 122820 }, { "epoch": 5.290950596545635, "learning_rate": 7.027085263634208e-07, "loss": 3.348, "step": 122840 }, { "epoch": 5.291812034285222, "learning_rate": 7.02660044387942e-07, "loss": 3.2302, "step": 122860 }, { "epoch": 5.292673472024809, "learning_rate": 7.026115624124631e-07, "loss": 3.2381, "step": 122880 }, { "epoch": 5.2935349097643964, "learning_rate": 7.025630804369842e-07, "loss": 3.2901, "step": 122900 }, { "epoch": 5.294396347503984, "learning_rate": 7.025145984615052e-07, "loss": 3.189, "step": 122920 }, { "epoch": 5.295257785243572, "learning_rate": 7.024661164860264e-07, "loss": 3.3819, "step": 122940 }, { "epoch": 5.296119222983159, "learning_rate": 7.024176345105475e-07, "loss": 3.2116, "step": 122960 }, { "epoch": 5.296980660722746, "learning_rate": 7.023691525350685e-07, "loss": 3.2211, "step": 122980 }, { "epoch": 5.297842098462334, "learning_rate": 7.023206705595897e-07, "loss": 3.3478, "step": 123000 }, { "epoch": 5.298703536201921, "learning_rate": 7.022721885841109e-07, "loss": 3.1674, "step": 123020 }, { "epoch": 5.299564973941508, "learning_rate": 7.022237066086319e-07, "loss": 3.3267, "step": 123040 }, { "epoch": 5.300426411681096, "learning_rate": 7.021752246331529e-07, "loss": 3.2083, "step": 123060 }, { "epoch": 5.301287849420683, "learning_rate": 7.021267426576741e-07, "loss": 3.1486, "step": 123080 }, { "epoch": 5.302149287160271, "learning_rate": 7.020782606821953e-07, "loss": 3.1825, "step": 123100 }, { "epoch": 5.303010724899858, "learning_rate": 7.020297787067164e-07, "loss": 3.4227, "step": 123120 }, { "epoch": 5.303872162639445, "learning_rate": 7.019812967312374e-07, "loss": 3.102, "step": 123140 }, { "epoch": 5.304733600379032, "learning_rate": 7.019328147557586e-07, "loss": 3.1699, "step": 123160 }, { "epoch": 5.30559503811862, "learning_rate": 7.018843327802797e-07, "loss": 3.2191, "step": 123180 }, { "epoch": 5.306456475858207, "learning_rate": 7.018358508048008e-07, "loss": 3.3449, "step": 123200 }, { "epoch": 5.307317913597795, "learning_rate": 7.017873688293218e-07, "loss": 3.3355, "step": 123220 }, { "epoch": 5.308179351337382, "learning_rate": 7.01738886853843e-07, "loss": 3.2185, "step": 123240 }, { "epoch": 5.3090407890769695, "learning_rate": 7.016904048783642e-07, "loss": 3.1446, "step": 123260 }, { "epoch": 5.309902226816557, "learning_rate": 7.016419229028853e-07, "loss": 3.237, "step": 123280 }, { "epoch": 5.310763664556144, "learning_rate": 7.015934409274062e-07, "loss": 3.2808, "step": 123300 }, { "epoch": 5.311625102295731, "learning_rate": 7.015449589519274e-07, "loss": 3.248, "step": 123320 }, { "epoch": 5.312486540035319, "learning_rate": 7.014964769764485e-07, "loss": 3.2058, "step": 123340 }, { "epoch": 5.313347977774907, "learning_rate": 7.014479950009695e-07, "loss": 3.4605, "step": 123360 }, { "epoch": 5.314209415514494, "learning_rate": 7.013995130254907e-07, "loss": 3.2334, "step": 123380 }, { "epoch": 5.315070853254081, "learning_rate": 7.013510310500119e-07, "loss": 3.2318, "step": 123400 }, { "epoch": 5.315932290993668, "learning_rate": 7.013025490745329e-07, "loss": 3.0309, "step": 123420 }, { "epoch": 5.316793728733256, "learning_rate": 7.012540670990539e-07, "loss": 3.3005, "step": 123440 }, { "epoch": 5.317655166472843, "learning_rate": 7.012055851235751e-07, "loss": 3.2107, "step": 123460 }, { "epoch": 5.31851660421243, "learning_rate": 7.011571031480963e-07, "loss": 3.2395, "step": 123480 }, { "epoch": 5.319378041952018, "learning_rate": 7.011086211726174e-07, "loss": 3.3364, "step": 123500 }, { "epoch": 5.3202394796916055, "learning_rate": 7.010601391971384e-07, "loss": 3.3557, "step": 123520 }, { "epoch": 5.321100917431193, "learning_rate": 7.010116572216596e-07, "loss": 3.2909, "step": 123540 }, { "epoch": 5.32196235517078, "learning_rate": 7.009631752461807e-07, "loss": 3.1557, "step": 123560 }, { "epoch": 5.322823792910367, "learning_rate": 7.009146932707017e-07, "loss": 3.1209, "step": 123580 }, { "epoch": 5.3236852306499545, "learning_rate": 7.008662112952228e-07, "loss": 3.2179, "step": 123600 }, { "epoch": 5.324546668389543, "learning_rate": 7.00817729319744e-07, "loss": 3.227, "step": 123620 }, { "epoch": 5.32540810612913, "learning_rate": 7.007692473442652e-07, "loss": 3.3198, "step": 123640 }, { "epoch": 5.326269543868717, "learning_rate": 7.007207653687862e-07, "loss": 3.2398, "step": 123660 }, { "epoch": 5.327130981608304, "learning_rate": 7.006722833933072e-07, "loss": 3.2511, "step": 123680 }, { "epoch": 5.327992419347892, "learning_rate": 7.006238014178284e-07, "loss": 3.2067, "step": 123700 }, { "epoch": 5.328853857087479, "learning_rate": 7.005753194423496e-07, "loss": 3.0998, "step": 123720 }, { "epoch": 5.329715294827066, "learning_rate": 7.005268374668706e-07, "loss": 3.2079, "step": 123740 }, { "epoch": 5.330576732566653, "learning_rate": 7.004783554913917e-07, "loss": 3.3591, "step": 123760 }, { "epoch": 5.3314381703062415, "learning_rate": 7.004298735159129e-07, "loss": 3.2563, "step": 123780 }, { "epoch": 5.332299608045829, "learning_rate": 7.00381391540434e-07, "loss": 3.3802, "step": 123800 }, { "epoch": 5.333161045785416, "learning_rate": 7.00332909564955e-07, "loss": 3.1963, "step": 123820 }, { "epoch": 5.334022483525003, "learning_rate": 7.002844275894761e-07, "loss": 3.2951, "step": 123840 }, { "epoch": 5.33488392126459, "learning_rate": 7.002359456139973e-07, "loss": 3.3228, "step": 123860 }, { "epoch": 5.335745359004178, "learning_rate": 7.001874636385184e-07, "loss": 3.2656, "step": 123880 }, { "epoch": 5.336606796743766, "learning_rate": 7.001389816630394e-07, "loss": 3.1612, "step": 123900 }, { "epoch": 5.337468234483353, "learning_rate": 7.000904996875606e-07, "loss": 3.1975, "step": 123920 }, { "epoch": 5.33832967222294, "learning_rate": 7.000420177120817e-07, "loss": 3.2904, "step": 123940 }, { "epoch": 5.3391911099625275, "learning_rate": 6.999935357366027e-07, "loss": 3.1987, "step": 123960 }, { "epoch": 5.340052547702115, "learning_rate": 6.999450537611238e-07, "loss": 3.1677, "step": 123980 }, { "epoch": 5.340913985441702, "learning_rate": 6.99896571785645e-07, "loss": 3.1516, "step": 124000 }, { "epoch": 5.341775423181289, "learning_rate": 6.998480898101662e-07, "loss": 3.0876, "step": 124020 }, { "epoch": 5.3426368609208765, "learning_rate": 6.997996078346872e-07, "loss": 3.3544, "step": 124040 }, { "epoch": 5.343498298660465, "learning_rate": 6.997511258592082e-07, "loss": 3.1773, "step": 124060 }, { "epoch": 5.344359736400052, "learning_rate": 6.997026438837294e-07, "loss": 3.1361, "step": 124080 }, { "epoch": 5.345221174139639, "learning_rate": 6.996541619082506e-07, "loss": 3.0797, "step": 124100 }, { "epoch": 5.346082611879226, "learning_rate": 6.996056799327716e-07, "loss": 3.2646, "step": 124120 }, { "epoch": 5.346944049618814, "learning_rate": 6.995571979572927e-07, "loss": 3.3926, "step": 124140 }, { "epoch": 5.347805487358401, "learning_rate": 6.99508715981814e-07, "loss": 3.1273, "step": 124160 }, { "epoch": 5.348666925097988, "learning_rate": 6.99460234006335e-07, "loss": 3.3624, "step": 124180 }, { "epoch": 5.349528362837576, "learning_rate": 6.99411752030856e-07, "loss": 3.2057, "step": 124200 }, { "epoch": 5.3503898005771635, "learning_rate": 6.993632700553771e-07, "loss": 3.3404, "step": 124220 }, { "epoch": 5.351251238316751, "learning_rate": 6.993147880798983e-07, "loss": 3.162, "step": 124240 }, { "epoch": 5.352112676056338, "learning_rate": 6.992663061044195e-07, "loss": 3.011, "step": 124260 }, { "epoch": 5.352974113795925, "learning_rate": 6.992178241289405e-07, "loss": 3.2391, "step": 124280 }, { "epoch": 5.3538355515355125, "learning_rate": 6.991693421534616e-07, "loss": 3.3316, "step": 124300 }, { "epoch": 5.3546969892751, "learning_rate": 6.991208601779827e-07, "loss": 3.2561, "step": 124320 }, { "epoch": 5.355558427014688, "learning_rate": 6.990723782025038e-07, "loss": 3.2998, "step": 124340 }, { "epoch": 5.356419864754275, "learning_rate": 6.990238962270249e-07, "loss": 3.1571, "step": 124360 }, { "epoch": 5.357281302493862, "learning_rate": 6.98975414251546e-07, "loss": 3.1613, "step": 124380 }, { "epoch": 5.35814274023345, "learning_rate": 6.989269322760672e-07, "loss": 3.2361, "step": 124400 }, { "epoch": 5.359004177973037, "learning_rate": 6.988784503005882e-07, "loss": 3.255, "step": 124420 }, { "epoch": 5.359865615712624, "learning_rate": 6.988299683251093e-07, "loss": 3.3, "step": 124440 }, { "epoch": 5.360727053452211, "learning_rate": 6.987814863496304e-07, "loss": 3.3961, "step": 124460 }, { "epoch": 5.3615884911917995, "learning_rate": 6.987330043741516e-07, "loss": 3.2765, "step": 124480 }, { "epoch": 5.362449928931387, "learning_rate": 6.986845223986726e-07, "loss": 3.2107, "step": 124500 }, { "epoch": 5.363311366670974, "learning_rate": 6.986360404231937e-07, "loss": 3.0093, "step": 124520 }, { "epoch": 5.364172804410561, "learning_rate": 6.985875584477149e-07, "loss": 3.1355, "step": 124540 }, { "epoch": 5.3650342421501485, "learning_rate": 6.98539076472236e-07, "loss": 2.961, "step": 124560 }, { "epoch": 5.365895679889736, "learning_rate": 6.98490594496757e-07, "loss": 3.3422, "step": 124580 }, { "epoch": 5.366757117629323, "learning_rate": 6.984421125212781e-07, "loss": 3.3043, "step": 124600 }, { "epoch": 5.367618555368911, "learning_rate": 6.983936305457993e-07, "loss": 3.0706, "step": 124620 }, { "epoch": 5.368479993108498, "learning_rate": 6.983451485703204e-07, "loss": 3.1049, "step": 124640 }, { "epoch": 5.369341430848086, "learning_rate": 6.982966665948415e-07, "loss": 3.2923, "step": 124660 }, { "epoch": 5.370202868587673, "learning_rate": 6.982481846193626e-07, "loss": 3.3227, "step": 124680 }, { "epoch": 5.37106430632726, "learning_rate": 6.981997026438837e-07, "loss": 3.4115, "step": 124700 }, { "epoch": 5.371925744066847, "learning_rate": 6.981512206684048e-07, "loss": 3.2173, "step": 124720 }, { "epoch": 5.3727871818064346, "learning_rate": 6.981027386929259e-07, "loss": 3.2662, "step": 124740 }, { "epoch": 5.373648619546023, "learning_rate": 6.98054256717447e-07, "loss": 3.2279, "step": 124760 }, { "epoch": 5.37451005728561, "learning_rate": 6.980057747419682e-07, "loss": 3.2805, "step": 124780 }, { "epoch": 5.375371495025197, "learning_rate": 6.979572927664893e-07, "loss": 3.3974, "step": 124800 }, { "epoch": 5.376232932764784, "learning_rate": 6.979088107910103e-07, "loss": 3.2605, "step": 124820 }, { "epoch": 5.377094370504372, "learning_rate": 6.978603288155314e-07, "loss": 3.2753, "step": 124840 }, { "epoch": 5.377955808243959, "learning_rate": 6.978118468400525e-07, "loss": 3.3599, "step": 124860 }, { "epoch": 5.378817245983546, "learning_rate": 6.977633648645737e-07, "loss": 3.2229, "step": 124880 }, { "epoch": 5.379678683723134, "learning_rate": 6.977148828890948e-07, "loss": 3.1242, "step": 124900 }, { "epoch": 5.3805401214627215, "learning_rate": 6.976664009136159e-07, "loss": 3.1544, "step": 124920 }, { "epoch": 5.381401559202309, "learning_rate": 6.97617918938137e-07, "loss": 3.1571, "step": 124940 }, { "epoch": 5.382262996941896, "learning_rate": 6.97569436962658e-07, "loss": 3.1765, "step": 124960 }, { "epoch": 5.383124434681483, "learning_rate": 6.975209549871791e-07, "loss": 3.2518, "step": 124980 }, { "epoch": 5.3839858724210705, "learning_rate": 6.974724730117004e-07, "loss": 3.2307, "step": 125000 }, { "epoch": 5.384847310160658, "learning_rate": 6.974239910362214e-07, "loss": 3.3902, "step": 125020 }, { "epoch": 5.385708747900246, "learning_rate": 6.973755090607425e-07, "loss": 3.1286, "step": 125040 }, { "epoch": 5.386570185639833, "learning_rate": 6.973270270852636e-07, "loss": 3.1218, "step": 125060 }, { "epoch": 5.38743162337942, "learning_rate": 6.972785451097847e-07, "loss": 3.0916, "step": 125080 }, { "epoch": 5.388293061119008, "learning_rate": 6.972300631343058e-07, "loss": 3.2104, "step": 125100 }, { "epoch": 5.389154498858595, "learning_rate": 6.971815811588269e-07, "loss": 3.2597, "step": 125120 }, { "epoch": 5.390015936598182, "learning_rate": 6.97133099183348e-07, "loss": 3.2439, "step": 125140 }, { "epoch": 5.390877374337769, "learning_rate": 6.970846172078691e-07, "loss": 3.4474, "step": 125160 }, { "epoch": 5.3917388120773575, "learning_rate": 6.970361352323903e-07, "loss": 3.2778, "step": 125180 }, { "epoch": 5.392600249816945, "learning_rate": 6.969876532569113e-07, "loss": 3.2983, "step": 125200 }, { "epoch": 5.393461687556532, "learning_rate": 6.969391712814324e-07, "loss": 3.2102, "step": 125220 }, { "epoch": 5.394323125296119, "learning_rate": 6.968906893059535e-07, "loss": 3.295, "step": 125240 }, { "epoch": 5.3951845630357065, "learning_rate": 6.968422073304747e-07, "loss": 3.2226, "step": 125260 }, { "epoch": 5.396046000775294, "learning_rate": 6.967937253549958e-07, "loss": 3.218, "step": 125280 }, { "epoch": 5.396907438514881, "learning_rate": 6.967452433795169e-07, "loss": 3.042, "step": 125300 }, { "epoch": 5.397768876254469, "learning_rate": 6.96696761404038e-07, "loss": 3.1354, "step": 125320 }, { "epoch": 5.398630313994056, "learning_rate": 6.966482794285591e-07, "loss": 3.3227, "step": 125340 }, { "epoch": 5.399491751733644, "learning_rate": 6.965997974530802e-07, "loss": 3.2836, "step": 125360 }, { "epoch": 5.400353189473231, "learning_rate": 6.965513154776013e-07, "loss": 3.0382, "step": 125380 }, { "epoch": 5.401214627212818, "learning_rate": 6.965028335021224e-07, "loss": 3.2907, "step": 125400 }, { "epoch": 5.402076064952405, "learning_rate": 6.964543515266436e-07, "loss": 3.4009, "step": 125420 }, { "epoch": 5.402937502691993, "learning_rate": 6.964058695511647e-07, "loss": 3.2983, "step": 125440 }, { "epoch": 5.403798940431581, "learning_rate": 6.963573875756856e-07, "loss": 3.2915, "step": 125460 }, { "epoch": 5.404660378171168, "learning_rate": 6.963089056002068e-07, "loss": 3.2158, "step": 125480 }, { "epoch": 5.405521815910755, "learning_rate": 6.962604236247279e-07, "loss": 3.1302, "step": 125500 }, { "epoch": 5.4063832536503424, "learning_rate": 6.96211941649249e-07, "loss": 3.1503, "step": 125520 }, { "epoch": 5.40724469138993, "learning_rate": 6.961634596737701e-07, "loss": 3.1419, "step": 125540 }, { "epoch": 5.408106129129517, "learning_rate": 6.961149776982913e-07, "loss": 3.381, "step": 125560 }, { "epoch": 5.408967566869104, "learning_rate": 6.960664957228124e-07, "loss": 3.159, "step": 125580 }, { "epoch": 5.409829004608692, "learning_rate": 6.960180137473334e-07, "loss": 3.2026, "step": 125600 }, { "epoch": 5.41069044234828, "learning_rate": 6.959695317718545e-07, "loss": 3.1769, "step": 125620 }, { "epoch": 5.411551880087867, "learning_rate": 6.959210497963757e-07, "loss": 3.291, "step": 125640 }, { "epoch": 5.412413317827454, "learning_rate": 6.958725678208968e-07, "loss": 3.1185, "step": 125660 }, { "epoch": 5.413274755567041, "learning_rate": 6.958240858454179e-07, "loss": 3.138, "step": 125680 }, { "epoch": 5.4141361933066285, "learning_rate": 6.95775603869939e-07, "loss": 3.2235, "step": 125700 }, { "epoch": 5.414997631046216, "learning_rate": 6.957271218944601e-07, "loss": 3.3726, "step": 125720 }, { "epoch": 5.415859068785804, "learning_rate": 6.956786399189812e-07, "loss": 3.2151, "step": 125740 }, { "epoch": 5.416720506525391, "learning_rate": 6.956301579435022e-07, "loss": 3.1706, "step": 125760 }, { "epoch": 5.417581944264978, "learning_rate": 6.955816759680234e-07, "loss": 3.3046, "step": 125780 }, { "epoch": 5.418443382004566, "learning_rate": 6.955331939925446e-07, "loss": 3.2669, "step": 125800 }, { "epoch": 5.419304819744153, "learning_rate": 6.954847120170657e-07, "loss": 3.2547, "step": 125820 }, { "epoch": 5.42016625748374, "learning_rate": 6.954362300415866e-07, "loss": 3.2823, "step": 125840 }, { "epoch": 5.421027695223327, "learning_rate": 6.953877480661078e-07, "loss": 3.2155, "step": 125860 }, { "epoch": 5.4218891329629155, "learning_rate": 6.95339266090629e-07, "loss": 3.4131, "step": 125880 }, { "epoch": 5.422750570702503, "learning_rate": 6.952907841151501e-07, "loss": 3.2091, "step": 125900 }, { "epoch": 5.42361200844209, "learning_rate": 6.952423021396711e-07, "loss": 3.246, "step": 125920 }, { "epoch": 5.424473446181677, "learning_rate": 6.951938201641924e-07, "loss": 3.0609, "step": 125940 }, { "epoch": 5.4253348839212645, "learning_rate": 6.951453381887134e-07, "loss": 3.3931, "step": 125960 }, { "epoch": 5.426196321660852, "learning_rate": 6.950968562132345e-07, "loss": 3.1498, "step": 125980 }, { "epoch": 5.427057759400439, "learning_rate": 6.950483742377555e-07, "loss": 3.2485, "step": 126000 }, { "epoch": 5.427919197140026, "learning_rate": 6.949998922622767e-07, "loss": 3.2079, "step": 126020 }, { "epoch": 5.428780634879614, "learning_rate": 6.949514102867978e-07, "loss": 3.3722, "step": 126040 }, { "epoch": 5.429642072619202, "learning_rate": 6.949029283113188e-07, "loss": 2.9532, "step": 126060 }, { "epoch": 5.430503510358789, "learning_rate": 6.9485444633584e-07, "loss": 3.3179, "step": 126080 }, { "epoch": 5.431364948098376, "learning_rate": 6.948059643603611e-07, "loss": 3.17, "step": 126100 }, { "epoch": 5.432226385837963, "learning_rate": 6.947574823848822e-07, "loss": 3.083, "step": 126120 }, { "epoch": 5.433087823577551, "learning_rate": 6.947090004094032e-07, "loss": 3.2787, "step": 126140 }, { "epoch": 5.433949261317139, "learning_rate": 6.946605184339244e-07, "loss": 3.1984, "step": 126160 }, { "epoch": 5.434810699056726, "learning_rate": 6.946120364584456e-07, "loss": 3.236, "step": 126180 }, { "epoch": 5.435672136796313, "learning_rate": 6.945635544829667e-07, "loss": 3.1788, "step": 126200 }, { "epoch": 5.4365335745359005, "learning_rate": 6.945150725074877e-07, "loss": 3.2153, "step": 126220 }, { "epoch": 5.437395012275488, "learning_rate": 6.944665905320088e-07, "loss": 3.2417, "step": 126240 }, { "epoch": 5.438256450015075, "learning_rate": 6.9441810855653e-07, "loss": 3.1596, "step": 126260 }, { "epoch": 5.439117887754662, "learning_rate": 6.943696265810511e-07, "loss": 3.2614, "step": 126280 }, { "epoch": 5.4399793254942495, "learning_rate": 6.943211446055721e-07, "loss": 3.1875, "step": 126300 }, { "epoch": 5.440840763233838, "learning_rate": 6.942726626300933e-07, "loss": 3.26, "step": 126320 }, { "epoch": 5.441702200973425, "learning_rate": 6.942241806546145e-07, "loss": 3.1076, "step": 126340 }, { "epoch": 5.442563638713012, "learning_rate": 6.941756986791355e-07, "loss": 3.0644, "step": 126360 }, { "epoch": 5.443425076452599, "learning_rate": 6.941272167036565e-07, "loss": 3.344, "step": 126380 }, { "epoch": 5.444286514192187, "learning_rate": 6.940787347281777e-07, "loss": 3.4049, "step": 126400 }, { "epoch": 5.445147951931774, "learning_rate": 6.940302527526989e-07, "loss": 3.2218, "step": 126420 }, { "epoch": 5.446009389671362, "learning_rate": 6.9398177077722e-07, "loss": 3.1928, "step": 126440 }, { "epoch": 5.446870827410949, "learning_rate": 6.93933288801741e-07, "loss": 3.2087, "step": 126460 }, { "epoch": 5.447732265150536, "learning_rate": 6.938848068262621e-07, "loss": 3.2636, "step": 126480 }, { "epoch": 5.448593702890124, "learning_rate": 6.938363248507833e-07, "loss": 3.1491, "step": 126500 }, { "epoch": 5.449455140629711, "learning_rate": 6.937878428753043e-07, "loss": 3.0597, "step": 126520 }, { "epoch": 5.450316578369298, "learning_rate": 6.937393608998254e-07, "loss": 3.3135, "step": 126540 }, { "epoch": 5.451178016108885, "learning_rate": 6.936908789243466e-07, "loss": 3.2245, "step": 126560 }, { "epoch": 5.452039453848473, "learning_rate": 6.936423969488677e-07, "loss": 3.3457, "step": 126580 }, { "epoch": 5.452900891588061, "learning_rate": 6.935939149733887e-07, "loss": 3.3086, "step": 126600 }, { "epoch": 5.453762329327648, "learning_rate": 6.935454329979098e-07, "loss": 3.1835, "step": 126620 }, { "epoch": 5.454623767067235, "learning_rate": 6.93496951022431e-07, "loss": 3.3209, "step": 126640 }, { "epoch": 5.4554852048068225, "learning_rate": 6.93448469046952e-07, "loss": 3.1743, "step": 126660 }, { "epoch": 5.45634664254641, "learning_rate": 6.93399987071473e-07, "loss": 3.4087, "step": 126680 }, { "epoch": 5.457208080285997, "learning_rate": 6.933515050959943e-07, "loss": 3.1188, "step": 126700 }, { "epoch": 5.458069518025585, "learning_rate": 6.933030231205155e-07, "loss": 3.2372, "step": 126720 }, { "epoch": 5.458930955765172, "learning_rate": 6.932545411450364e-07, "loss": 3.2105, "step": 126740 }, { "epoch": 5.45979239350476, "learning_rate": 6.932060591695575e-07, "loss": 3.1491, "step": 126760 }, { "epoch": 5.460653831244347, "learning_rate": 6.931575771940788e-07, "loss": 3.1326, "step": 126780 }, { "epoch": 5.461515268983934, "learning_rate": 6.931090952185999e-07, "loss": 3.0921, "step": 126800 }, { "epoch": 5.462376706723521, "learning_rate": 6.930606132431209e-07, "loss": 2.9035, "step": 126820 }, { "epoch": 5.463238144463109, "learning_rate": 6.93012131267642e-07, "loss": 3.2621, "step": 126840 }, { "epoch": 5.464099582202696, "learning_rate": 6.929636492921631e-07, "loss": 3.3112, "step": 126860 }, { "epoch": 5.464961019942284, "learning_rate": 6.929151673166843e-07, "loss": 3.2661, "step": 126880 }, { "epoch": 5.465822457681871, "learning_rate": 6.928666853412053e-07, "loss": 3.2265, "step": 126900 }, { "epoch": 5.4666838954214585, "learning_rate": 6.928182033657264e-07, "loss": 3.3469, "step": 126920 }, { "epoch": 5.467545333161046, "learning_rate": 6.927697213902476e-07, "loss": 3.2671, "step": 126940 }, { "epoch": 5.468406770900633, "learning_rate": 6.927212394147688e-07, "loss": 3.1797, "step": 126960 }, { "epoch": 5.46926820864022, "learning_rate": 6.926727574392897e-07, "loss": 3.3644, "step": 126980 }, { "epoch": 5.470129646379808, "learning_rate": 6.926242754638108e-07, "loss": 3.188, "step": 127000 }, { "epoch": 5.470991084119396, "learning_rate": 6.92575793488332e-07, "loss": 3.1395, "step": 127020 }, { "epoch": 5.471852521858983, "learning_rate": 6.925273115128532e-07, "loss": 3.2592, "step": 127040 }, { "epoch": 5.47271395959857, "learning_rate": 6.924788295373742e-07, "loss": 3.2996, "step": 127060 }, { "epoch": 5.473575397338157, "learning_rate": 6.924303475618953e-07, "loss": 3.1774, "step": 127080 }, { "epoch": 5.474436835077745, "learning_rate": 6.923818655864165e-07, "loss": 3.2721, "step": 127100 }, { "epoch": 5.475298272817332, "learning_rate": 6.923333836109374e-07, "loss": 3.2907, "step": 127120 }, { "epoch": 5.476159710556919, "learning_rate": 6.922849016354585e-07, "loss": 3.2786, "step": 127140 }, { "epoch": 5.477021148296507, "learning_rate": 6.922364196599797e-07, "loss": 3.3626, "step": 127160 }, { "epoch": 5.4778825860360945, "learning_rate": 6.921879376845009e-07, "loss": 3.1141, "step": 127180 }, { "epoch": 5.478744023775682, "learning_rate": 6.92139455709022e-07, "loss": 3.3689, "step": 127200 }, { "epoch": 5.479605461515269, "learning_rate": 6.92090973733543e-07, "loss": 3.1013, "step": 127220 }, { "epoch": 5.480466899254856, "learning_rate": 6.920424917580641e-07, "loss": 3.1667, "step": 127240 }, { "epoch": 5.481328336994443, "learning_rate": 6.919940097825853e-07, "loss": 3.2935, "step": 127260 }, { "epoch": 5.482189774734031, "learning_rate": 6.919455278071063e-07, "loss": 3.1647, "step": 127280 }, { "epoch": 5.483051212473619, "learning_rate": 6.918970458316274e-07, "loss": 3.3421, "step": 127300 }, { "epoch": 5.483912650213206, "learning_rate": 6.918485638561486e-07, "loss": 3.4923, "step": 127320 }, { "epoch": 5.484774087952793, "learning_rate": 6.918000818806698e-07, "loss": 3.2637, "step": 127340 }, { "epoch": 5.4856355256923806, "learning_rate": 6.917515999051908e-07, "loss": 3.0587, "step": 127360 }, { "epoch": 5.486496963431968, "learning_rate": 6.917031179297118e-07, "loss": 3.1862, "step": 127380 }, { "epoch": 5.487358401171555, "learning_rate": 6.91654635954233e-07, "loss": 3.1303, "step": 127400 }, { "epoch": 5.488219838911142, "learning_rate": 6.916061539787541e-07, "loss": 3.2572, "step": 127420 }, { "epoch": 5.48908127665073, "learning_rate": 6.915576720032752e-07, "loss": 3.1622, "step": 127440 }, { "epoch": 5.489942714390318, "learning_rate": 6.915091900277963e-07, "loss": 3.2954, "step": 127460 }, { "epoch": 5.490804152129905, "learning_rate": 6.914607080523175e-07, "loss": 3.207, "step": 127480 }, { "epoch": 5.491665589869492, "learning_rate": 6.914122260768385e-07, "loss": 3.2483, "step": 127500 }, { "epoch": 5.492527027609079, "learning_rate": 6.913637441013596e-07, "loss": 3.2078, "step": 127520 }, { "epoch": 5.493388465348667, "learning_rate": 6.913152621258807e-07, "loss": 3.1427, "step": 127540 }, { "epoch": 5.494249903088254, "learning_rate": 6.912667801504019e-07, "loss": 3.2661, "step": 127560 }, { "epoch": 5.495111340827842, "learning_rate": 6.91218298174923e-07, "loss": 3.1361, "step": 127580 }, { "epoch": 5.495972778567429, "learning_rate": 6.911698161994441e-07, "loss": 3.3142, "step": 127600 }, { "epoch": 5.4968342163070165, "learning_rate": 6.911213342239651e-07, "loss": 3.0769, "step": 127620 }, { "epoch": 5.497695654046604, "learning_rate": 6.910728522484862e-07, "loss": 3.3286, "step": 127640 }, { "epoch": 5.498557091786191, "learning_rate": 6.910243702730073e-07, "loss": 3.2604, "step": 127660 }, { "epoch": 5.499418529525778, "learning_rate": 6.909758882975284e-07, "loss": 3.188, "step": 127680 }, { "epoch": 5.5002799672653655, "learning_rate": 6.909274063220496e-07, "loss": 3.0901, "step": 127700 }, { "epoch": 5.501141405004954, "learning_rate": 6.908789243465709e-07, "loss": 3.1472, "step": 127720 }, { "epoch": 5.502002842744541, "learning_rate": 6.908304423710918e-07, "loss": 3.3285, "step": 127740 }, { "epoch": 5.502864280484128, "learning_rate": 6.907819603956128e-07, "loss": 3.3031, "step": 127760 }, { "epoch": 5.503725718223715, "learning_rate": 6.90733478420134e-07, "loss": 3.4402, "step": 127780 }, { "epoch": 5.504587155963303, "learning_rate": 6.906849964446551e-07, "loss": 3.2753, "step": 127800 }, { "epoch": 5.50544859370289, "learning_rate": 6.906365144691762e-07, "loss": 3.1753, "step": 127820 }, { "epoch": 5.506310031442478, "learning_rate": 6.905880324936973e-07, "loss": 3.083, "step": 127840 }, { "epoch": 5.507171469182065, "learning_rate": 6.905395505182185e-07, "loss": 3.2826, "step": 127860 }, { "epoch": 5.5080329069216525, "learning_rate": 6.904910685427395e-07, "loss": 3.3222, "step": 127880 }, { "epoch": 5.50889434466124, "learning_rate": 6.904425865672606e-07, "loss": 3.33, "step": 127900 }, { "epoch": 5.509755782400827, "learning_rate": 6.903941045917817e-07, "loss": 3.2663, "step": 127920 }, { "epoch": 5.510617220140414, "learning_rate": 6.903456226163029e-07, "loss": 3.1387, "step": 127940 }, { "epoch": 5.5114786578800015, "learning_rate": 6.90297140640824e-07, "loss": 3.1768, "step": 127960 }, { "epoch": 5.512340095619589, "learning_rate": 6.902486586653451e-07, "loss": 3.268, "step": 127980 }, { "epoch": 5.513201533359177, "learning_rate": 6.902001766898662e-07, "loss": 3.1133, "step": 128000 }, { "epoch": 5.514062971098764, "learning_rate": 6.901516947143872e-07, "loss": 3.0589, "step": 128020 }, { "epoch": 5.514924408838351, "learning_rate": 6.901032127389084e-07, "loss": 3.2662, "step": 128040 }, { "epoch": 5.515785846577939, "learning_rate": 6.900547307634295e-07, "loss": 3.1112, "step": 128060 }, { "epoch": 5.516647284317526, "learning_rate": 6.900062487879506e-07, "loss": 3.1509, "step": 128080 }, { "epoch": 5.517508722057113, "learning_rate": 6.899577668124717e-07, "loss": 3.2454, "step": 128100 }, { "epoch": 5.5183701597967, "learning_rate": 6.899092848369929e-07, "loss": 3.3012, "step": 128120 }, { "epoch": 5.5192315975362884, "learning_rate": 6.898608028615139e-07, "loss": 3.4595, "step": 128140 }, { "epoch": 5.520093035275876, "learning_rate": 6.89812320886035e-07, "loss": 3.2327, "step": 128160 }, { "epoch": 5.520954473015463, "learning_rate": 6.897638389105561e-07, "loss": 3.0591, "step": 128180 }, { "epoch": 5.52181591075505, "learning_rate": 6.897153569350772e-07, "loss": 3.5673, "step": 128200 }, { "epoch": 5.522677348494637, "learning_rate": 6.896668749595983e-07, "loss": 3.253, "step": 128220 }, { "epoch": 5.523538786234225, "learning_rate": 6.896183929841195e-07, "loss": 3.3035, "step": 128240 }, { "epoch": 5.524400223973812, "learning_rate": 6.895699110086405e-07, "loss": 3.1991, "step": 128260 }, { "epoch": 5.5252616617134, "learning_rate": 6.895214290331616e-07, "loss": 3.2189, "step": 128280 }, { "epoch": 5.526123099452987, "learning_rate": 6.894729470576827e-07, "loss": 3.3936, "step": 128300 }, { "epoch": 5.5269845371925745, "learning_rate": 6.894244650822038e-07, "loss": 3.0153, "step": 128320 }, { "epoch": 5.527845974932162, "learning_rate": 6.89375983106725e-07, "loss": 3.1051, "step": 128340 }, { "epoch": 5.528707412671749, "learning_rate": 6.893275011312461e-07, "loss": 3.0751, "step": 128360 }, { "epoch": 5.529568850411336, "learning_rate": 6.892790191557672e-07, "loss": 3.3597, "step": 128380 }, { "epoch": 5.5304302881509235, "learning_rate": 6.892305371802882e-07, "loss": 3.2377, "step": 128400 }, { "epoch": 5.531291725890512, "learning_rate": 6.891820552048094e-07, "loss": 3.1814, "step": 128420 }, { "epoch": 5.532153163630099, "learning_rate": 6.891335732293305e-07, "loss": 3.0797, "step": 128440 }, { "epoch": 5.533014601369686, "learning_rate": 6.890850912538515e-07, "loss": 3.1967, "step": 128460 }, { "epoch": 5.533876039109273, "learning_rate": 6.890366092783727e-07, "loss": 3.2526, "step": 128480 }, { "epoch": 5.534737476848861, "learning_rate": 6.889881273028939e-07, "loss": 3.139, "step": 128500 }, { "epoch": 5.535598914588448, "learning_rate": 6.889396453274149e-07, "loss": 3.1816, "step": 128520 }, { "epoch": 5.536460352328035, "learning_rate": 6.88891163351936e-07, "loss": 3.2645, "step": 128540 }, { "epoch": 5.537321790067622, "learning_rate": 6.888426813764572e-07, "loss": 3.0456, "step": 128560 }, { "epoch": 5.5381832278072105, "learning_rate": 6.887941994009783e-07, "loss": 3.12, "step": 128580 }, { "epoch": 5.539044665546798, "learning_rate": 6.887457174254994e-07, "loss": 3.0965, "step": 128600 }, { "epoch": 5.539906103286385, "learning_rate": 6.886972354500204e-07, "loss": 3.0685, "step": 128620 }, { "epoch": 5.540767541025972, "learning_rate": 6.886487534745415e-07, "loss": 3.1361, "step": 128640 }, { "epoch": 5.5416289787655595, "learning_rate": 6.886002714990627e-07, "loss": 3.2411, "step": 128660 }, { "epoch": 5.542490416505147, "learning_rate": 6.885517895235837e-07, "loss": 3.0284, "step": 128680 }, { "epoch": 5.543351854244735, "learning_rate": 6.885033075481048e-07, "loss": 3.3229, "step": 128700 }, { "epoch": 5.544213291984322, "learning_rate": 6.88454825572626e-07, "loss": 3.2227, "step": 128720 }, { "epoch": 5.545074729723909, "learning_rate": 6.884063435971471e-07, "loss": 3.2449, "step": 128740 }, { "epoch": 5.545936167463497, "learning_rate": 6.883578616216682e-07, "loss": 3.2154, "step": 128760 }, { "epoch": 5.546797605203084, "learning_rate": 6.883093796461892e-07, "loss": 3.2846, "step": 128780 }, { "epoch": 5.547659042942671, "learning_rate": 6.882608976707104e-07, "loss": 3.1468, "step": 128800 }, { "epoch": 5.548520480682258, "learning_rate": 6.882124156952315e-07, "loss": 3.1386, "step": 128820 }, { "epoch": 5.549381918421846, "learning_rate": 6.881639337197526e-07, "loss": 3.0341, "step": 128840 }, { "epoch": 5.550243356161434, "learning_rate": 6.881154517442737e-07, "loss": 3.1763, "step": 128860 }, { "epoch": 5.551104793901021, "learning_rate": 6.880669697687949e-07, "loss": 3.107, "step": 128880 }, { "epoch": 5.551966231640608, "learning_rate": 6.880184877933159e-07, "loss": 3.1413, "step": 128900 }, { "epoch": 5.5528276693801955, "learning_rate": 6.879700058178369e-07, "loss": 3.0938, "step": 128920 }, { "epoch": 5.553689107119783, "learning_rate": 6.879215238423581e-07, "loss": 3.1517, "step": 128940 }, { "epoch": 5.55455054485937, "learning_rate": 6.878730418668793e-07, "loss": 3.1688, "step": 128960 }, { "epoch": 5.555411982598958, "learning_rate": 6.878245598914005e-07, "loss": 3.2693, "step": 128980 }, { "epoch": 5.556273420338545, "learning_rate": 6.877760779159214e-07, "loss": 3.0736, "step": 129000 }, { "epoch": 5.557134858078133, "learning_rate": 6.877275959404425e-07, "loss": 3.1875, "step": 129020 }, { "epoch": 5.55799629581772, "learning_rate": 6.876791139649637e-07, "loss": 3.2301, "step": 129040 }, { "epoch": 5.558857733557307, "learning_rate": 6.876306319894848e-07, "loss": 3.2131, "step": 129060 }, { "epoch": 5.559719171296894, "learning_rate": 6.875821500140058e-07, "loss": 3.3486, "step": 129080 }, { "epoch": 5.5605806090364815, "learning_rate": 6.87533668038527e-07, "loss": 3.0836, "step": 129100 }, { "epoch": 5.561442046776069, "learning_rate": 6.874851860630482e-07, "loss": 3.0602, "step": 129120 }, { "epoch": 5.562303484515657, "learning_rate": 6.874367040875693e-07, "loss": 3.1346, "step": 129140 }, { "epoch": 5.563164922255244, "learning_rate": 6.873882221120902e-07, "loss": 3.3999, "step": 129160 }, { "epoch": 5.564026359994831, "learning_rate": 6.873397401366114e-07, "loss": 3.2813, "step": 129180 }, { "epoch": 5.564887797734419, "learning_rate": 6.872912581611326e-07, "loss": 3.1085, "step": 129200 }, { "epoch": 5.565749235474006, "learning_rate": 6.872427761856535e-07, "loss": 3.439, "step": 129220 }, { "epoch": 5.566610673213593, "learning_rate": 6.871942942101747e-07, "loss": 3.1544, "step": 129240 }, { "epoch": 5.567472110953181, "learning_rate": 6.871458122346959e-07, "loss": 3.1114, "step": 129260 }, { "epoch": 5.5683335486927685, "learning_rate": 6.870973302592169e-07, "loss": 3.121, "step": 129280 }, { "epoch": 5.569194986432356, "learning_rate": 6.87048848283738e-07, "loss": 3.23, "step": 129300 }, { "epoch": 5.570056424171943, "learning_rate": 6.870003663082591e-07, "loss": 3.2205, "step": 129320 }, { "epoch": 5.57091786191153, "learning_rate": 6.869518843327803e-07, "loss": 3.2211, "step": 129340 }, { "epoch": 5.5717792996511175, "learning_rate": 6.869034023573014e-07, "loss": 3.1302, "step": 129360 }, { "epoch": 5.572640737390705, "learning_rate": 6.868549203818224e-07, "loss": 3.0789, "step": 129380 }, { "epoch": 5.573502175130292, "learning_rate": 6.868064384063435e-07, "loss": 3.2549, "step": 129400 }, { "epoch": 5.57436361286988, "learning_rate": 6.867579564308647e-07, "loss": 3.1204, "step": 129420 }, { "epoch": 5.575225050609467, "learning_rate": 6.867094744553858e-07, "loss": 3.0862, "step": 129440 }, { "epoch": 5.576086488349055, "learning_rate": 6.866609924799068e-07, "loss": 3.3525, "step": 129460 }, { "epoch": 5.576947926088642, "learning_rate": 6.86612510504428e-07, "loss": 3.2225, "step": 129480 }, { "epoch": 5.577809363828229, "learning_rate": 6.865640285289493e-07, "loss": 3.1107, "step": 129500 }, { "epoch": 5.578670801567816, "learning_rate": 6.865155465534703e-07, "loss": 3.2329, "step": 129520 }, { "epoch": 5.5795322393074045, "learning_rate": 6.864670645779912e-07, "loss": 3.2811, "step": 129540 }, { "epoch": 5.580393677046992, "learning_rate": 6.864185826025124e-07, "loss": 3.1129, "step": 129560 }, { "epoch": 5.581255114786579, "learning_rate": 6.863701006270336e-07, "loss": 3.3175, "step": 129580 }, { "epoch": 5.582116552526166, "learning_rate": 6.863216186515546e-07, "loss": 3.3308, "step": 129600 }, { "epoch": 5.5829779902657535, "learning_rate": 6.862731366760757e-07, "loss": 3.269, "step": 129620 }, { "epoch": 5.583839428005341, "learning_rate": 6.862246547005969e-07, "loss": 3.404, "step": 129640 }, { "epoch": 5.584700865744928, "learning_rate": 6.86176172725118e-07, "loss": 3.3046, "step": 129660 }, { "epoch": 5.585562303484515, "learning_rate": 6.86127690749639e-07, "loss": 3.153, "step": 129680 }, { "epoch": 5.586423741224103, "learning_rate": 6.860792087741601e-07, "loss": 3.0212, "step": 129700 }, { "epoch": 5.587285178963691, "learning_rate": 6.860307267986813e-07, "loss": 3.0432, "step": 129720 }, { "epoch": 5.588146616703278, "learning_rate": 6.859822448232025e-07, "loss": 3.3084, "step": 129740 }, { "epoch": 5.589008054442865, "learning_rate": 6.859337628477234e-07, "loss": 3.1186, "step": 129760 }, { "epoch": 5.589869492182452, "learning_rate": 6.858852808722446e-07, "loss": 3.1652, "step": 129780 }, { "epoch": 5.59073092992204, "learning_rate": 6.858367988967657e-07, "loss": 3.161, "step": 129800 }, { "epoch": 5.591592367661628, "learning_rate": 6.857883169212869e-07, "loss": 3.2219, "step": 129820 }, { "epoch": 5.592453805401215, "learning_rate": 6.857398349458078e-07, "loss": 3.2173, "step": 129840 }, { "epoch": 5.593315243140802, "learning_rate": 6.85691352970329e-07, "loss": 3.1586, "step": 129860 }, { "epoch": 5.594176680880389, "learning_rate": 6.856428709948502e-07, "loss": 3.0959, "step": 129880 }, { "epoch": 5.595038118619977, "learning_rate": 6.855943890193712e-07, "loss": 3.1031, "step": 129900 }, { "epoch": 5.595899556359564, "learning_rate": 6.855459070438922e-07, "loss": 3.189, "step": 129920 }, { "epoch": 5.596760994099151, "learning_rate": 6.854974250684134e-07, "loss": 3.1366, "step": 129940 }, { "epoch": 5.597622431838738, "learning_rate": 6.854489430929346e-07, "loss": 3.3031, "step": 129960 }, { "epoch": 5.5984838695783266, "learning_rate": 6.854004611174556e-07, "loss": 3.3899, "step": 129980 }, { "epoch": 5.599345307317914, "learning_rate": 6.853519791419767e-07, "loss": 3.0167, "step": 130000 }, { "epoch": 5.600206745057501, "learning_rate": 6.853034971664979e-07, "loss": 3.1078, "step": 130020 }, { "epoch": 5.601068182797088, "learning_rate": 6.85255015191019e-07, "loss": 3.0785, "step": 130040 }, { "epoch": 5.6019296205366755, "learning_rate": 6.8520653321554e-07, "loss": 3.1983, "step": 130060 }, { "epoch": 5.602791058276263, "learning_rate": 6.851580512400611e-07, "loss": 3.2296, "step": 130080 }, { "epoch": 5.603652496015851, "learning_rate": 6.851095692645823e-07, "loss": 3.3313, "step": 130100 }, { "epoch": 5.604513933755438, "learning_rate": 6.850610872891035e-07, "loss": 3.2221, "step": 130120 }, { "epoch": 5.605375371495025, "learning_rate": 6.850126053136245e-07, "loss": 3.1508, "step": 130140 }, { "epoch": 5.606236809234613, "learning_rate": 6.849641233381456e-07, "loss": 3.0391, "step": 130160 }, { "epoch": 5.6070982469742, "learning_rate": 6.849156413626667e-07, "loss": 3.1049, "step": 130180 }, { "epoch": 5.607959684713787, "learning_rate": 6.848671593871878e-07, "loss": 3.0774, "step": 130200 }, { "epoch": 5.608821122453374, "learning_rate": 6.848186774117089e-07, "loss": 3.0843, "step": 130220 }, { "epoch": 5.609682560192962, "learning_rate": 6.847701954362299e-07, "loss": 3.109, "step": 130240 }, { "epoch": 5.61054399793255, "learning_rate": 6.847217134607512e-07, "loss": 3.2645, "step": 130260 }, { "epoch": 5.611405435672137, "learning_rate": 6.846732314852723e-07, "loss": 3.2293, "step": 130280 }, { "epoch": 5.612266873411724, "learning_rate": 6.846247495097932e-07, "loss": 3.2828, "step": 130300 }, { "epoch": 5.6131283111513115, "learning_rate": 6.845762675343144e-07, "loss": 3.0916, "step": 130320 }, { "epoch": 5.613989748890899, "learning_rate": 6.845277855588357e-07, "loss": 3.2169, "step": 130340 }, { "epoch": 5.614851186630486, "learning_rate": 6.844793035833566e-07, "loss": 3.2115, "step": 130360 }, { "epoch": 5.615712624370074, "learning_rate": 6.844308216078777e-07, "loss": 2.9956, "step": 130380 }, { "epoch": 5.616574062109661, "learning_rate": 6.843823396323989e-07, "loss": 3.2077, "step": 130400 }, { "epoch": 5.617435499849249, "learning_rate": 6.8433385765692e-07, "loss": 3.1998, "step": 130420 }, { "epoch": 5.618296937588836, "learning_rate": 6.84285375681441e-07, "loss": 3.2389, "step": 130440 }, { "epoch": 5.619158375328423, "learning_rate": 6.842368937059621e-07, "loss": 3.1101, "step": 130460 }, { "epoch": 5.62001981306801, "learning_rate": 6.841884117304833e-07, "loss": 3.2012, "step": 130480 }, { "epoch": 5.620881250807598, "learning_rate": 6.841399297550044e-07, "loss": 3.2541, "step": 130500 }, { "epoch": 5.621742688547185, "learning_rate": 6.840914477795255e-07, "loss": 3.1097, "step": 130520 }, { "epoch": 5.622604126286773, "learning_rate": 6.840429658040466e-07, "loss": 3.0298, "step": 130540 }, { "epoch": 5.62346556402636, "learning_rate": 6.839944838285677e-07, "loss": 3.2764, "step": 130560 }, { "epoch": 5.6243270017659475, "learning_rate": 6.839460018530888e-07, "loss": 3.1988, "step": 130580 }, { "epoch": 5.625188439505535, "learning_rate": 6.838975198776099e-07, "loss": 2.9662, "step": 130600 }, { "epoch": 5.626049877245122, "learning_rate": 6.83849037902131e-07, "loss": 3.1406, "step": 130620 }, { "epoch": 5.626911314984709, "learning_rate": 6.838005559266522e-07, "loss": 3.1734, "step": 130640 }, { "epoch": 5.627772752724297, "learning_rate": 6.837520739511733e-07, "loss": 3.1844, "step": 130660 }, { "epoch": 5.628634190463885, "learning_rate": 6.837035919756943e-07, "loss": 3.116, "step": 130680 }, { "epoch": 5.629495628203472, "learning_rate": 6.836551100002154e-07, "loss": 3.1303, "step": 130700 }, { "epoch": 5.630357065943059, "learning_rate": 6.836066280247366e-07, "loss": 3.1137, "step": 130720 }, { "epoch": 5.631218503682646, "learning_rate": 6.835581460492577e-07, "loss": 3.2246, "step": 130740 }, { "epoch": 5.632079941422234, "learning_rate": 6.835096640737789e-07, "loss": 3.2303, "step": 130760 }, { "epoch": 5.632941379161821, "learning_rate": 6.834611820982999e-07, "loss": 3.1437, "step": 130780 }, { "epoch": 5.633802816901408, "learning_rate": 6.834127001228209e-07, "loss": 3.2279, "step": 130800 }, { "epoch": 5.634664254640996, "learning_rate": 6.833642181473421e-07, "loss": 3.2933, "step": 130820 }, { "epoch": 5.635525692380583, "learning_rate": 6.833157361718631e-07, "loss": 3.0662, "step": 130840 }, { "epoch": 5.636387130120171, "learning_rate": 6.832672541963843e-07, "loss": 3.1152, "step": 130860 }, { "epoch": 5.637248567859758, "learning_rate": 6.832187722209054e-07, "loss": 3.1508, "step": 130880 }, { "epoch": 5.638110005599345, "learning_rate": 6.831702902454265e-07, "loss": 3.2658, "step": 130900 }, { "epoch": 5.638971443338932, "learning_rate": 6.831218082699476e-07, "loss": 3.1537, "step": 130920 }, { "epoch": 5.6398328810785205, "learning_rate": 6.830733262944687e-07, "loss": 3.2088, "step": 130940 }, { "epoch": 5.640694318818108, "learning_rate": 6.830248443189898e-07, "loss": 3.0688, "step": 130960 }, { "epoch": 5.641555756557695, "learning_rate": 6.829763623435109e-07, "loss": 3.2585, "step": 130980 }, { "epoch": 5.642417194297282, "learning_rate": 6.82927880368032e-07, "loss": 3.2439, "step": 131000 }, { "epoch": 5.6432786320368695, "learning_rate": 6.828793983925532e-07, "loss": 3.124, "step": 131020 }, { "epoch": 5.644140069776457, "learning_rate": 6.828309164170743e-07, "loss": 3.0572, "step": 131040 }, { "epoch": 5.645001507516044, "learning_rate": 6.827824344415953e-07, "loss": 3.1064, "step": 131060 }, { "epoch": 5.645862945255631, "learning_rate": 6.827339524661165e-07, "loss": 3.3465, "step": 131080 }, { "epoch": 5.646724382995219, "learning_rate": 6.826854704906375e-07, "loss": 3.1171, "step": 131100 }, { "epoch": 5.647585820734807, "learning_rate": 6.826369885151587e-07, "loss": 3.3204, "step": 131120 }, { "epoch": 5.648447258474394, "learning_rate": 6.825885065396798e-07, "loss": 3.0589, "step": 131140 }, { "epoch": 5.649308696213981, "learning_rate": 6.825400245642009e-07, "loss": 3.1474, "step": 131160 }, { "epoch": 5.650170133953568, "learning_rate": 6.824915425887219e-07, "loss": 3.2572, "step": 131180 }, { "epoch": 5.651031571693156, "learning_rate": 6.824430606132431e-07, "loss": 2.9921, "step": 131200 }, { "epoch": 5.651893009432743, "learning_rate": 6.823945786377642e-07, "loss": 3.2725, "step": 131220 }, { "epoch": 5.652754447172331, "learning_rate": 6.823460966622853e-07, "loss": 3.1645, "step": 131240 }, { "epoch": 5.653615884911918, "learning_rate": 6.822976146868064e-07, "loss": 3.2603, "step": 131260 }, { "epoch": 5.6544773226515055, "learning_rate": 6.822491327113277e-07, "loss": 3.3358, "step": 131280 }, { "epoch": 5.655338760391093, "learning_rate": 6.822006507358487e-07, "loss": 3.2891, "step": 131300 }, { "epoch": 5.65620019813068, "learning_rate": 6.821521687603697e-07, "loss": 3.1934, "step": 131320 }, { "epoch": 5.657061635870267, "learning_rate": 6.821036867848908e-07, "loss": 2.972, "step": 131340 }, { "epoch": 5.6579230736098545, "learning_rate": 6.82055204809412e-07, "loss": 3.233, "step": 131360 }, { "epoch": 5.658784511349443, "learning_rate": 6.82006722833933e-07, "loss": 3.3083, "step": 131380 }, { "epoch": 5.65964594908903, "learning_rate": 6.819582408584541e-07, "loss": 3.2913, "step": 131400 }, { "epoch": 5.660507386828617, "learning_rate": 6.819097588829753e-07, "loss": 3.2096, "step": 131420 }, { "epoch": 5.661368824568204, "learning_rate": 6.818612769074963e-07, "loss": 3.1739, "step": 131440 }, { "epoch": 5.662230262307792, "learning_rate": 6.818127949320174e-07, "loss": 3.2092, "step": 131460 }, { "epoch": 5.663091700047379, "learning_rate": 6.817643129565385e-07, "loss": 3.0328, "step": 131480 }, { "epoch": 5.663953137786966, "learning_rate": 6.817158309810597e-07, "loss": 3.2039, "step": 131500 }, { "epoch": 5.664814575526554, "learning_rate": 6.816673490055808e-07, "loss": 3.255, "step": 131520 }, { "epoch": 5.6656760132661415, "learning_rate": 6.816188670301019e-07, "loss": 3.2335, "step": 131540 }, { "epoch": 5.666537451005729, "learning_rate": 6.81570385054623e-07, "loss": 3.0177, "step": 131560 }, { "epoch": 5.667398888745316, "learning_rate": 6.815219030791441e-07, "loss": 3.1171, "step": 131580 }, { "epoch": 5.668260326484903, "learning_rate": 6.814734211036653e-07, "loss": 3.154, "step": 131600 }, { "epoch": 5.66912176422449, "learning_rate": 6.814249391281863e-07, "loss": 2.9814, "step": 131620 }, { "epoch": 5.669983201964078, "learning_rate": 6.813764571527074e-07, "loss": 3.0508, "step": 131640 }, { "epoch": 5.670844639703665, "learning_rate": 6.813279751772286e-07, "loss": 3.0222, "step": 131660 }, { "epoch": 5.671706077443253, "learning_rate": 6.812794932017497e-07, "loss": 3.3455, "step": 131680 }, { "epoch": 5.67256751518284, "learning_rate": 6.812310112262706e-07, "loss": 3.099, "step": 131700 }, { "epoch": 5.6734289529224275, "learning_rate": 6.811825292507918e-07, "loss": 3.1388, "step": 131720 }, { "epoch": 5.674290390662015, "learning_rate": 6.81134047275313e-07, "loss": 3.2224, "step": 131740 }, { "epoch": 5.675151828401602, "learning_rate": 6.810855652998341e-07, "loss": 3.2286, "step": 131760 }, { "epoch": 5.676013266141189, "learning_rate": 6.810370833243551e-07, "loss": 3.2585, "step": 131780 }, { "epoch": 5.676874703880777, "learning_rate": 6.809886013488763e-07, "loss": 3.4365, "step": 131800 }, { "epoch": 5.677736141620365, "learning_rate": 6.809401193733974e-07, "loss": 3.1841, "step": 131820 }, { "epoch": 5.678597579359952, "learning_rate": 6.808916373979185e-07, "loss": 3.233, "step": 131840 }, { "epoch": 5.679459017099539, "learning_rate": 6.808431554224395e-07, "loss": 3.2861, "step": 131860 }, { "epoch": 5.680320454839126, "learning_rate": 6.807946734469607e-07, "loss": 3.2726, "step": 131880 }, { "epoch": 5.681181892578714, "learning_rate": 6.807461914714819e-07, "loss": 3.0955, "step": 131900 }, { "epoch": 5.682043330318301, "learning_rate": 6.806977094960029e-07, "loss": 3.2073, "step": 131920 }, { "epoch": 5.682904768057888, "learning_rate": 6.80649227520524e-07, "loss": 3.3188, "step": 131940 }, { "epoch": 5.683766205797476, "learning_rate": 6.806007455450451e-07, "loss": 3.0318, "step": 131960 }, { "epoch": 5.6846276435370635, "learning_rate": 6.805522635695662e-07, "loss": 3.1203, "step": 131980 }, { "epoch": 5.685489081276651, "learning_rate": 6.805037815940872e-07, "loss": 3.1966, "step": 132000 }, { "epoch": 5.686350519016238, "learning_rate": 6.804552996186083e-07, "loss": 3.1344, "step": 132020 }, { "epoch": 5.687211956755825, "learning_rate": 6.804068176431296e-07, "loss": 3.2833, "step": 132040 }, { "epoch": 5.6880733944954125, "learning_rate": 6.803583356676507e-07, "loss": 3.1791, "step": 132060 }, { "epoch": 5.688934832235001, "learning_rate": 6.803098536921716e-07, "loss": 3.1291, "step": 132080 }, { "epoch": 5.689796269974588, "learning_rate": 6.802613717166928e-07, "loss": 3.2564, "step": 132100 }, { "epoch": 5.690657707714175, "learning_rate": 6.802128897412141e-07, "loss": 3.3028, "step": 132120 }, { "epoch": 5.691519145453762, "learning_rate": 6.801644077657351e-07, "loss": 3.0871, "step": 132140 }, { "epoch": 5.69238058319335, "learning_rate": 6.801159257902561e-07, "loss": 3.2126, "step": 132160 }, { "epoch": 5.693242020932937, "learning_rate": 6.800674438147773e-07, "loss": 3.1723, "step": 132180 }, { "epoch": 5.694103458672524, "learning_rate": 6.800189618392984e-07, "loss": 3.1666, "step": 132200 }, { "epoch": 5.694964896412111, "learning_rate": 6.799704798638195e-07, "loss": 3.0644, "step": 132220 }, { "epoch": 5.6958263341516995, "learning_rate": 6.799219978883405e-07, "loss": 3.1257, "step": 132240 }, { "epoch": 5.696687771891287, "learning_rate": 6.798735159128617e-07, "loss": 3.1779, "step": 132260 }, { "epoch": 5.697549209630874, "learning_rate": 6.798250339373829e-07, "loss": 3.295, "step": 132280 }, { "epoch": 5.698410647370461, "learning_rate": 6.79776551961904e-07, "loss": 3.2091, "step": 132300 }, { "epoch": 5.6992720851100485, "learning_rate": 6.79728069986425e-07, "loss": 3.1656, "step": 132320 }, { "epoch": 5.700133522849636, "learning_rate": 6.796795880109461e-07, "loss": 3.2217, "step": 132340 }, { "epoch": 5.700994960589224, "learning_rate": 6.796311060354673e-07, "loss": 3.0465, "step": 132360 }, { "epoch": 5.701856398328811, "learning_rate": 6.795826240599883e-07, "loss": 3.0063, "step": 132380 }, { "epoch": 5.702717836068398, "learning_rate": 6.795341420845094e-07, "loss": 3.0732, "step": 132400 }, { "epoch": 5.703579273807986, "learning_rate": 6.794856601090306e-07, "loss": 3.0586, "step": 132420 }, { "epoch": 5.704440711547573, "learning_rate": 6.794371781335518e-07, "loss": 3.337, "step": 132440 }, { "epoch": 5.70530214928716, "learning_rate": 6.793886961580726e-07, "loss": 3.1075, "step": 132460 }, { "epoch": 5.706163587026747, "learning_rate": 6.793402141825938e-07, "loss": 3.1479, "step": 132480 }, { "epoch": 5.7070250247663346, "learning_rate": 6.79291732207115e-07, "loss": 3.068, "step": 132500 }, { "epoch": 5.707886462505923, "learning_rate": 6.792432502316361e-07, "loss": 3.2656, "step": 132520 }, { "epoch": 5.70874790024551, "learning_rate": 6.791947682561571e-07, "loss": 3.077, "step": 132540 }, { "epoch": 5.709609337985097, "learning_rate": 6.791462862806783e-07, "loss": 3.1316, "step": 132560 }, { "epoch": 5.710470775724684, "learning_rate": 6.790978043051994e-07, "loss": 3.1422, "step": 132580 }, { "epoch": 5.711332213464272, "learning_rate": 6.790493223297205e-07, "loss": 3.1795, "step": 132600 }, { "epoch": 5.712193651203859, "learning_rate": 6.790008403542415e-07, "loss": 3.2757, "step": 132620 }, { "epoch": 5.713055088943447, "learning_rate": 6.789523583787627e-07, "loss": 3.2422, "step": 132640 }, { "epoch": 5.713916526683034, "learning_rate": 6.789038764032839e-07, "loss": 3.1589, "step": 132660 }, { "epoch": 5.7147779644226215, "learning_rate": 6.78855394427805e-07, "loss": 3.328, "step": 132680 }, { "epoch": 5.715639402162209, "learning_rate": 6.78806912452326e-07, "loss": 3.1003, "step": 132700 }, { "epoch": 5.716500839901796, "learning_rate": 6.787584304768471e-07, "loss": 2.895, "step": 132720 }, { "epoch": 5.717362277641383, "learning_rate": 6.787099485013683e-07, "loss": 3.2938, "step": 132740 }, { "epoch": 5.7182237153809705, "learning_rate": 6.786614665258893e-07, "loss": 3.1938, "step": 132760 }, { "epoch": 5.719085153120558, "learning_rate": 6.786129845504104e-07, "loss": 3.218, "step": 132780 }, { "epoch": 5.719946590860146, "learning_rate": 6.785645025749316e-07, "loss": 3.2835, "step": 132800 }, { "epoch": 5.720808028599733, "learning_rate": 6.785160205994528e-07, "loss": 3.1632, "step": 132820 }, { "epoch": 5.72166946633932, "learning_rate": 6.784675386239737e-07, "loss": 3.2434, "step": 132840 }, { "epoch": 5.722530904078908, "learning_rate": 6.784190566484949e-07, "loss": 3.2058, "step": 132860 }, { "epoch": 5.723392341818495, "learning_rate": 6.78370574673016e-07, "loss": 3.282, "step": 132880 }, { "epoch": 5.724253779558082, "learning_rate": 6.783220926975372e-07, "loss": 3.0866, "step": 132900 }, { "epoch": 5.72511521729767, "learning_rate": 6.782736107220582e-07, "loss": 3.1784, "step": 132920 }, { "epoch": 5.7259766550372575, "learning_rate": 6.782251287465793e-07, "loss": 3.1761, "step": 132940 }, { "epoch": 5.726838092776845, "learning_rate": 6.781766467711004e-07, "loss": 3.0853, "step": 132960 }, { "epoch": 5.727699530516432, "learning_rate": 6.781281647956215e-07, "loss": 3.0961, "step": 132980 }, { "epoch": 5.728560968256019, "learning_rate": 6.780796828201425e-07, "loss": 3.1643, "step": 133000 }, { "epoch": 5.7294224059956065, "learning_rate": 6.780312008446637e-07, "loss": 3.1659, "step": 133020 }, { "epoch": 5.730283843735194, "learning_rate": 6.779827188691849e-07, "loss": 3.1038, "step": 133040 }, { "epoch": 5.731145281474781, "learning_rate": 6.779342368937059e-07, "loss": 3.4505, "step": 133060 }, { "epoch": 5.732006719214369, "learning_rate": 6.77885754918227e-07, "loss": 3.3092, "step": 133080 }, { "epoch": 5.732868156953956, "learning_rate": 6.778372729427481e-07, "loss": 2.963, "step": 133100 }, { "epoch": 5.733729594693544, "learning_rate": 6.777887909672693e-07, "loss": 3.2165, "step": 133120 }, { "epoch": 5.734591032433131, "learning_rate": 6.777403089917903e-07, "loss": 3.0454, "step": 133140 }, { "epoch": 5.735452470172718, "learning_rate": 6.776918270163114e-07, "loss": 3.2645, "step": 133160 }, { "epoch": 5.736313907912305, "learning_rate": 6.776433450408326e-07, "loss": 3.1076, "step": 133180 }, { "epoch": 5.7371753456518935, "learning_rate": 6.775948630653538e-07, "loss": 3.1345, "step": 133200 }, { "epoch": 5.738036783391481, "learning_rate": 6.775463810898747e-07, "loss": 3.1146, "step": 133220 }, { "epoch": 5.738898221131068, "learning_rate": 6.774978991143958e-07, "loss": 3.4338, "step": 133240 }, { "epoch": 5.739759658870655, "learning_rate": 6.77449417138917e-07, "loss": 3.1648, "step": 133260 }, { "epoch": 5.7406210966102424, "learning_rate": 6.774009351634382e-07, "loss": 3.1633, "step": 133280 }, { "epoch": 5.74148253434983, "learning_rate": 6.773524531879592e-07, "loss": 2.9923, "step": 133300 }, { "epoch": 5.742343972089417, "learning_rate": 6.773039712124803e-07, "loss": 3.072, "step": 133320 }, { "epoch": 5.743205409829004, "learning_rate": 6.772554892370015e-07, "loss": 3.0225, "step": 133340 }, { "epoch": 5.744066847568592, "learning_rate": 6.772070072615225e-07, "loss": 3.1442, "step": 133360 }, { "epoch": 5.74492828530818, "learning_rate": 6.771585252860437e-07, "loss": 3.1921, "step": 133380 }, { "epoch": 5.745789723047767, "learning_rate": 6.771100433105647e-07, "loss": 3.2415, "step": 133400 }, { "epoch": 5.746651160787354, "learning_rate": 6.770615613350859e-07, "loss": 3.2781, "step": 133420 }, { "epoch": 5.747512598526941, "learning_rate": 6.77013079359607e-07, "loss": 3.1377, "step": 133440 }, { "epoch": 5.7483740362665285, "learning_rate": 6.769645973841281e-07, "loss": 3.009, "step": 133460 }, { "epoch": 5.749235474006117, "learning_rate": 6.769161154086491e-07, "loss": 2.9462, "step": 133480 }, { "epoch": 5.750096911745704, "learning_rate": 6.768676334331703e-07, "loss": 3.1098, "step": 133500 }, { "epoch": 5.750958349485291, "learning_rate": 6.768191514576914e-07, "loss": 3.2555, "step": 133520 }, { "epoch": 5.751819787224878, "learning_rate": 6.767706694822124e-07, "loss": 3.0656, "step": 133540 }, { "epoch": 5.752681224964466, "learning_rate": 6.767221875067336e-07, "loss": 3.0742, "step": 133560 }, { "epoch": 5.753542662704053, "learning_rate": 6.766737055312548e-07, "loss": 3.1825, "step": 133580 }, { "epoch": 5.75440410044364, "learning_rate": 6.766252235557757e-07, "loss": 3.201, "step": 133600 }, { "epoch": 5.755265538183227, "learning_rate": 6.765767415802968e-07, "loss": 3.3179, "step": 133620 }, { "epoch": 5.7561269759228155, "learning_rate": 6.76528259604818e-07, "loss": 3.1431, "step": 133640 }, { "epoch": 5.756988413662403, "learning_rate": 6.764797776293391e-07, "loss": 3.28, "step": 133660 }, { "epoch": 5.75784985140199, "learning_rate": 6.764312956538602e-07, "loss": 3.0673, "step": 133680 }, { "epoch": 5.758711289141577, "learning_rate": 6.763828136783813e-07, "loss": 2.9866, "step": 133700 }, { "epoch": 5.7595727268811645, "learning_rate": 6.763343317029025e-07, "loss": 3.2106, "step": 133720 }, { "epoch": 5.760434164620752, "learning_rate": 6.762858497274235e-07, "loss": 3.2006, "step": 133740 }, { "epoch": 5.76129560236034, "learning_rate": 6.762373677519446e-07, "loss": 3.3177, "step": 133760 }, { "epoch": 5.762157040099927, "learning_rate": 6.761888857764657e-07, "loss": 3.1612, "step": 133780 }, { "epoch": 5.763018477839514, "learning_rate": 6.761404038009868e-07, "loss": 3.2061, "step": 133800 }, { "epoch": 5.763879915579102, "learning_rate": 6.76091921825508e-07, "loss": 3.2408, "step": 133820 }, { "epoch": 5.764741353318689, "learning_rate": 6.760434398500291e-07, "loss": 3.0431, "step": 133840 }, { "epoch": 5.765602791058276, "learning_rate": 6.759949578745501e-07, "loss": 3.1857, "step": 133860 }, { "epoch": 5.766464228797863, "learning_rate": 6.759464758990712e-07, "loss": 3.0534, "step": 133880 }, { "epoch": 5.767325666537451, "learning_rate": 6.758979939235925e-07, "loss": 3.2721, "step": 133900 }, { "epoch": 5.768187104277039, "learning_rate": 6.758495119481135e-07, "loss": 3.2464, "step": 133920 }, { "epoch": 5.769048542016626, "learning_rate": 6.758010299726346e-07, "loss": 3.0732, "step": 133940 }, { "epoch": 5.769909979756213, "learning_rate": 6.757525479971557e-07, "loss": 3.1995, "step": 133960 }, { "epoch": 5.7707714174958005, "learning_rate": 6.757040660216768e-07, "loss": 3.104, "step": 133980 }, { "epoch": 5.771632855235388, "learning_rate": 6.756555840461979e-07, "loss": 3.1425, "step": 134000 }, { "epoch": 5.772494292974975, "learning_rate": 6.75607102070719e-07, "loss": 3.1767, "step": 134020 }, { "epoch": 5.773355730714563, "learning_rate": 6.755586200952401e-07, "loss": 3.1568, "step": 134040 }, { "epoch": 5.77421716845415, "learning_rate": 6.755101381197613e-07, "loss": 3.164, "step": 134060 }, { "epoch": 5.775078606193738, "learning_rate": 6.754616561442823e-07, "loss": 3.1444, "step": 134080 }, { "epoch": 5.775940043933325, "learning_rate": 6.754131741688035e-07, "loss": 3.2324, "step": 134100 }, { "epoch": 5.776801481672912, "learning_rate": 6.753646921933245e-07, "loss": 3.1375, "step": 134120 }, { "epoch": 5.777662919412499, "learning_rate": 6.753162102178456e-07, "loss": 2.9544, "step": 134140 }, { "epoch": 5.778524357152087, "learning_rate": 6.752677282423667e-07, "loss": 3.0062, "step": 134160 }, { "epoch": 5.779385794891674, "learning_rate": 6.752192462668879e-07, "loss": 3.0994, "step": 134180 }, { "epoch": 5.780247232631262, "learning_rate": 6.75170764291409e-07, "loss": 3.1016, "step": 134200 }, { "epoch": 5.781108670370849, "learning_rate": 6.751222823159301e-07, "loss": 3.2393, "step": 134220 }, { "epoch": 5.781970108110436, "learning_rate": 6.750738003404511e-07, "loss": 3.1419, "step": 134240 }, { "epoch": 5.782831545850024, "learning_rate": 6.750253183649722e-07, "loss": 2.9431, "step": 134260 }, { "epoch": 5.783692983589611, "learning_rate": 6.749768363894934e-07, "loss": 3.0403, "step": 134280 }, { "epoch": 5.784554421329198, "learning_rate": 6.749283544140145e-07, "loss": 3.2636, "step": 134300 }, { "epoch": 5.785415859068785, "learning_rate": 6.748798724385356e-07, "loss": 3.1755, "step": 134320 }, { "epoch": 5.7862772968083735, "learning_rate": 6.748313904630567e-07, "loss": 2.9281, "step": 134340 }, { "epoch": 5.787138734547961, "learning_rate": 6.747829084875778e-07, "loss": 3.0516, "step": 134360 }, { "epoch": 5.788000172287548, "learning_rate": 6.747344265120989e-07, "loss": 3.0372, "step": 134380 }, { "epoch": 5.788861610027135, "learning_rate": 6.7468594453662e-07, "loss": 3.1118, "step": 134400 }, { "epoch": 5.7897230477667225, "learning_rate": 6.746374625611411e-07, "loss": 3.2026, "step": 134420 }, { "epoch": 5.79058448550631, "learning_rate": 6.745889805856623e-07, "loss": 3.0647, "step": 134440 }, { "epoch": 5.791445923245897, "learning_rate": 6.745404986101834e-07, "loss": 3.3857, "step": 134460 }, { "epoch": 5.792307360985485, "learning_rate": 6.744920166347045e-07, "loss": 3.2017, "step": 134480 }, { "epoch": 5.793168798725072, "learning_rate": 6.744435346592255e-07, "loss": 3.0898, "step": 134500 }, { "epoch": 5.79403023646466, "learning_rate": 6.743950526837467e-07, "loss": 3.1422, "step": 134520 }, { "epoch": 5.794891674204247, "learning_rate": 6.743465707082678e-07, "loss": 3.183, "step": 134540 }, { "epoch": 5.795753111943834, "learning_rate": 6.742980887327888e-07, "loss": 3.2282, "step": 134560 }, { "epoch": 5.796614549683421, "learning_rate": 6.7424960675731e-07, "loss": 3.1018, "step": 134580 }, { "epoch": 5.797475987423009, "learning_rate": 6.742011247818312e-07, "loss": 2.9996, "step": 134600 }, { "epoch": 5.798337425162597, "learning_rate": 6.741526428063521e-07, "loss": 3.0524, "step": 134620 }, { "epoch": 5.799198862902184, "learning_rate": 6.741041608308733e-07, "loss": 3.2086, "step": 134640 }, { "epoch": 5.800060300641771, "learning_rate": 6.740556788553944e-07, "loss": 3.3612, "step": 134660 }, { "epoch": 5.8009217383813585, "learning_rate": 6.740071968799155e-07, "loss": 3.1381, "step": 134680 }, { "epoch": 5.801783176120946, "learning_rate": 6.739587149044366e-07, "loss": 3.2056, "step": 134700 }, { "epoch": 5.802644613860533, "learning_rate": 6.739102329289577e-07, "loss": 3.1089, "step": 134720 }, { "epoch": 5.80350605160012, "learning_rate": 6.738617509534788e-07, "loss": 3.3494, "step": 134740 }, { "epoch": 5.8043674893397075, "learning_rate": 6.738132689779999e-07, "loss": 3.1653, "step": 134760 }, { "epoch": 5.805228927079296, "learning_rate": 6.73764787002521e-07, "loss": 3.2408, "step": 134780 }, { "epoch": 5.806090364818883, "learning_rate": 6.737163050270421e-07, "loss": 3.1996, "step": 134800 }, { "epoch": 5.80695180255847, "learning_rate": 6.736678230515633e-07, "loss": 3.0941, "step": 134820 }, { "epoch": 5.807813240298057, "learning_rate": 6.736193410760844e-07, "loss": 3.1541, "step": 134840 }, { "epoch": 5.808674678037645, "learning_rate": 6.735708591006054e-07, "loss": 3.1663, "step": 134860 }, { "epoch": 5.809536115777232, "learning_rate": 6.735223771251265e-07, "loss": 3.2249, "step": 134880 }, { "epoch": 5.81039755351682, "learning_rate": 6.734738951496477e-07, "loss": 3.0091, "step": 134900 }, { "epoch": 5.811258991256407, "learning_rate": 6.734254131741688e-07, "loss": 3.1883, "step": 134920 }, { "epoch": 5.8121204289959945, "learning_rate": 6.733769311986898e-07, "loss": 3.0712, "step": 134940 }, { "epoch": 5.812981866735582, "learning_rate": 6.73328449223211e-07, "loss": 3.0874, "step": 134960 }, { "epoch": 5.813843304475169, "learning_rate": 6.732799672477322e-07, "loss": 3.1555, "step": 134980 }, { "epoch": 5.814704742214756, "learning_rate": 6.732314852722532e-07, "loss": 3.2034, "step": 135000 }, { "epoch": 5.815566179954343, "learning_rate": 6.731830032967742e-07, "loss": 3.3072, "step": 135020 }, { "epoch": 5.816427617693931, "learning_rate": 6.731345213212954e-07, "loss": 3.1931, "step": 135040 }, { "epoch": 5.817289055433519, "learning_rate": 6.730860393458166e-07, "loss": 3.1744, "step": 135060 }, { "epoch": 5.818150493173106, "learning_rate": 6.730375573703377e-07, "loss": 3.0712, "step": 135080 }, { "epoch": 5.819011930912693, "learning_rate": 6.729890753948587e-07, "loss": 3.1945, "step": 135100 }, { "epoch": 5.8198733686522806, "learning_rate": 6.729405934193799e-07, "loss": 2.9604, "step": 135120 }, { "epoch": 5.820734806391868, "learning_rate": 6.72892111443901e-07, "loss": 3.0203, "step": 135140 }, { "epoch": 5.821596244131455, "learning_rate": 6.728436294684219e-07, "loss": 3.042, "step": 135160 }, { "epoch": 5.822457681871043, "learning_rate": 6.727951474929431e-07, "loss": 3.0849, "step": 135180 }, { "epoch": 5.82331911961063, "learning_rate": 6.727466655174643e-07, "loss": 3.1667, "step": 135200 }, { "epoch": 5.824180557350218, "learning_rate": 6.726981835419854e-07, "loss": 3.103, "step": 135220 }, { "epoch": 5.825041995089805, "learning_rate": 6.726497015665064e-07, "loss": 3.0376, "step": 135240 }, { "epoch": 5.825903432829392, "learning_rate": 6.726012195910275e-07, "loss": 3.2213, "step": 135260 }, { "epoch": 5.826764870568979, "learning_rate": 6.725527376155487e-07, "loss": 3.1859, "step": 135280 }, { "epoch": 5.827626308308567, "learning_rate": 6.725042556400698e-07, "loss": 3.0732, "step": 135300 }, { "epoch": 5.828487746048154, "learning_rate": 6.724557736645908e-07, "loss": 3.0541, "step": 135320 }, { "epoch": 5.829349183787742, "learning_rate": 6.72407291689112e-07, "loss": 3.1965, "step": 135340 }, { "epoch": 5.830210621527329, "learning_rate": 6.723588097136332e-07, "loss": 3.1839, "step": 135360 }, { "epoch": 5.8310720592669165, "learning_rate": 6.723103277381542e-07, "loss": 3.1495, "step": 135380 }, { "epoch": 5.831933497006504, "learning_rate": 6.722618457626752e-07, "loss": 3.0962, "step": 135400 }, { "epoch": 5.832794934746091, "learning_rate": 6.722133637871964e-07, "loss": 3.239, "step": 135420 }, { "epoch": 5.833656372485678, "learning_rate": 6.721648818117176e-07, "loss": 3.0408, "step": 135440 }, { "epoch": 5.834517810225266, "learning_rate": 6.721163998362387e-07, "loss": 3.0296, "step": 135460 }, { "epoch": 5.835379247964854, "learning_rate": 6.720679178607597e-07, "loss": 3.2457, "step": 135480 }, { "epoch": 5.836240685704441, "learning_rate": 6.720194358852809e-07, "loss": 3.1288, "step": 135500 }, { "epoch": 5.837102123444028, "learning_rate": 6.71970953909802e-07, "loss": 3.1188, "step": 135520 }, { "epoch": 5.837963561183615, "learning_rate": 6.71922471934323e-07, "loss": 3.1655, "step": 135540 }, { "epoch": 5.838824998923203, "learning_rate": 6.718739899588441e-07, "loss": 3.1493, "step": 135560 }, { "epoch": 5.83968643666279, "learning_rate": 6.718255079833652e-07, "loss": 3.1491, "step": 135580 }, { "epoch": 5.840547874402377, "learning_rate": 6.717770260078865e-07, "loss": 2.9958, "step": 135600 }, { "epoch": 5.841409312141965, "learning_rate": 6.717285440324075e-07, "loss": 3.2048, "step": 135620 }, { "epoch": 5.8422707498815525, "learning_rate": 6.716800620569285e-07, "loss": 3.1827, "step": 135640 }, { "epoch": 5.84313218762114, "learning_rate": 6.716315800814497e-07, "loss": 3.0809, "step": 135660 }, { "epoch": 5.843993625360727, "learning_rate": 6.71583098105971e-07, "loss": 3.0137, "step": 135680 }, { "epoch": 5.844855063100314, "learning_rate": 6.715346161304918e-07, "loss": 3.0158, "step": 135700 }, { "epoch": 5.8457165008399015, "learning_rate": 6.71486134155013e-07, "loss": 3.1452, "step": 135720 }, { "epoch": 5.84657793857949, "learning_rate": 6.714376521795342e-07, "loss": 3.0691, "step": 135740 }, { "epoch": 5.847439376319077, "learning_rate": 6.713891702040553e-07, "loss": 3.2708, "step": 135760 }, { "epoch": 5.848300814058664, "learning_rate": 6.713406882285762e-07, "loss": 3.1081, "step": 135780 }, { "epoch": 5.849162251798251, "learning_rate": 6.712922062530974e-07, "loss": 3.166, "step": 135800 }, { "epoch": 5.850023689537839, "learning_rate": 6.712437242776186e-07, "loss": 3.1383, "step": 135820 }, { "epoch": 5.850885127277426, "learning_rate": 6.711952423021396e-07, "loss": 3.2279, "step": 135840 }, { "epoch": 5.851746565017013, "learning_rate": 6.711467603266607e-07, "loss": 3.009, "step": 135860 }, { "epoch": 5.8526080027566, "learning_rate": 6.710982783511819e-07, "loss": 3.1181, "step": 135880 }, { "epoch": 5.8534694404961884, "learning_rate": 6.71049796375703e-07, "loss": 3.1606, "step": 135900 }, { "epoch": 5.854330878235776, "learning_rate": 6.71001314400224e-07, "loss": 3.0974, "step": 135920 }, { "epoch": 5.855192315975363, "learning_rate": 6.709528324247451e-07, "loss": 3.0554, "step": 135940 }, { "epoch": 5.85605375371495, "learning_rate": 6.709043504492663e-07, "loss": 3.292, "step": 135960 }, { "epoch": 5.856915191454537, "learning_rate": 6.708558684737875e-07, "loss": 3.1406, "step": 135980 }, { "epoch": 5.857776629194125, "learning_rate": 6.708073864983085e-07, "loss": 3.2363, "step": 136000 }, { "epoch": 5.858638066933713, "learning_rate": 6.707589045228295e-07, "loss": 3.164, "step": 136020 }, { "epoch": 5.8594995046733, "learning_rate": 6.707104225473507e-07, "loss": 3.2286, "step": 136040 }, { "epoch": 5.860360942412887, "learning_rate": 6.706619405718719e-07, "loss": 3.2019, "step": 136060 }, { "epoch": 5.8612223801524745, "learning_rate": 6.706134585963929e-07, "loss": 2.9319, "step": 136080 }, { "epoch": 5.862083817892062, "learning_rate": 6.70564976620914e-07, "loss": 3.1988, "step": 136100 }, { "epoch": 5.862945255631649, "learning_rate": 6.705164946454352e-07, "loss": 3.0192, "step": 136120 }, { "epoch": 5.863806693371236, "learning_rate": 6.704680126699562e-07, "loss": 3.0822, "step": 136140 }, { "epoch": 5.8646681311108235, "learning_rate": 6.704195306944773e-07, "loss": 3.1441, "step": 136160 }, { "epoch": 5.865529568850412, "learning_rate": 6.703710487189984e-07, "loss": 3.1282, "step": 136180 }, { "epoch": 5.866391006589999, "learning_rate": 6.703225667435196e-07, "loss": 3.1296, "step": 136200 }, { "epoch": 5.867252444329586, "learning_rate": 6.702740847680407e-07, "loss": 3.2144, "step": 136220 }, { "epoch": 5.868113882069173, "learning_rate": 6.702256027925617e-07, "loss": 3.0968, "step": 136240 }, { "epoch": 5.868975319808761, "learning_rate": 6.701771208170829e-07, "loss": 3.3572, "step": 136260 }, { "epoch": 5.869836757548348, "learning_rate": 6.70128638841604e-07, "loss": 3.1393, "step": 136280 }, { "epoch": 5.870698195287936, "learning_rate": 6.70080156866125e-07, "loss": 3.1237, "step": 136300 }, { "epoch": 5.871559633027523, "learning_rate": 6.700316748906461e-07, "loss": 3.0902, "step": 136320 }, { "epoch": 5.8724210707671105, "learning_rate": 6.699831929151673e-07, "loss": 3.0515, "step": 136340 }, { "epoch": 5.873282508506698, "learning_rate": 6.699347109396885e-07, "loss": 3.0444, "step": 136360 }, { "epoch": 5.874143946246285, "learning_rate": 6.698862289642095e-07, "loss": 3.2028, "step": 136380 }, { "epoch": 5.875005383985872, "learning_rate": 6.698377469887306e-07, "loss": 3.1298, "step": 136400 }, { "epoch": 5.8758668217254595, "learning_rate": 6.697892650132518e-07, "loss": 3.0716, "step": 136420 }, { "epoch": 5.876728259465047, "learning_rate": 6.697407830377728e-07, "loss": 3.0892, "step": 136440 }, { "epoch": 5.877589697204635, "learning_rate": 6.696923010622939e-07, "loss": 3.0835, "step": 136460 }, { "epoch": 5.878451134944222, "learning_rate": 6.69643819086815e-07, "loss": 2.9605, "step": 136480 }, { "epoch": 5.879312572683809, "learning_rate": 6.695953371113362e-07, "loss": 3.0078, "step": 136500 }, { "epoch": 5.880174010423397, "learning_rate": 6.695468551358572e-07, "loss": 3.0952, "step": 136520 }, { "epoch": 5.881035448162984, "learning_rate": 6.694983731603783e-07, "loss": 3.1124, "step": 136540 }, { "epoch": 5.881896885902571, "learning_rate": 6.694498911848994e-07, "loss": 3.2002, "step": 136560 }, { "epoch": 5.882758323642159, "learning_rate": 6.694014092094206e-07, "loss": 3.154, "step": 136580 }, { "epoch": 5.8836197613817465, "learning_rate": 6.693529272339417e-07, "loss": 3.1006, "step": 136600 }, { "epoch": 5.884481199121334, "learning_rate": 6.693044452584628e-07, "loss": 3.1552, "step": 136620 }, { "epoch": 5.885342636860921, "learning_rate": 6.692559632829839e-07, "loss": 3.1886, "step": 136640 }, { "epoch": 5.886204074600508, "learning_rate": 6.69207481307505e-07, "loss": 2.9938, "step": 136660 }, { "epoch": 5.8870655123400955, "learning_rate": 6.691589993320261e-07, "loss": 3.1426, "step": 136680 }, { "epoch": 5.887926950079683, "learning_rate": 6.691105173565472e-07, "loss": 3.0142, "step": 136700 }, { "epoch": 5.88878838781927, "learning_rate": 6.690620353810683e-07, "loss": 3.0936, "step": 136720 }, { "epoch": 5.889649825558858, "learning_rate": 6.690135534055894e-07, "loss": 3.0059, "step": 136740 }, { "epoch": 5.890511263298445, "learning_rate": 6.689650714301106e-07, "loss": 3.2446, "step": 136760 }, { "epoch": 5.891372701038033, "learning_rate": 6.689165894546316e-07, "loss": 3.1538, "step": 136780 }, { "epoch": 5.89223413877762, "learning_rate": 6.688681074791527e-07, "loss": 2.9594, "step": 136800 }, { "epoch": 5.893095576517207, "learning_rate": 6.688196255036738e-07, "loss": 3.0342, "step": 136820 }, { "epoch": 5.893957014256794, "learning_rate": 6.687711435281948e-07, "loss": 3.1117, "step": 136840 }, { "epoch": 5.894818451996382, "learning_rate": 6.68722661552716e-07, "loss": 2.9883, "step": 136860 }, { "epoch": 5.89567988973597, "learning_rate": 6.686741795772372e-07, "loss": 3.039, "step": 136880 }, { "epoch": 5.896541327475557, "learning_rate": 6.686256976017583e-07, "loss": 3.1795, "step": 136900 }, { "epoch": 5.897402765215144, "learning_rate": 6.685772156262793e-07, "loss": 3.0312, "step": 136920 }, { "epoch": 5.898264202954731, "learning_rate": 6.685287336508004e-07, "loss": 3.0345, "step": 136940 }, { "epoch": 5.899125640694319, "learning_rate": 6.684802516753216e-07, "loss": 3.0789, "step": 136960 }, { "epoch": 5.899987078433906, "learning_rate": 6.684317696998427e-07, "loss": 3.1513, "step": 136980 }, { "epoch": 5.900848516173493, "learning_rate": 6.683832877243638e-07, "loss": 2.986, "step": 137000 }, { "epoch": 5.901709953913081, "learning_rate": 6.683348057488849e-07, "loss": 3.0019, "step": 137020 }, { "epoch": 5.9025713916526685, "learning_rate": 6.682863237734059e-07, "loss": 3.088, "step": 137040 }, { "epoch": 5.903432829392256, "learning_rate": 6.682378417979271e-07, "loss": 3.0026, "step": 137060 }, { "epoch": 5.904294267131843, "learning_rate": 6.681893598224482e-07, "loss": 3.2295, "step": 137080 }, { "epoch": 5.90515570487143, "learning_rate": 6.681408778469693e-07, "loss": 3.1641, "step": 137100 }, { "epoch": 5.9060171426110175, "learning_rate": 6.680923958714904e-07, "loss": 3.2013, "step": 137120 }, { "epoch": 5.906878580350606, "learning_rate": 6.680439138960116e-07, "loss": 3.0945, "step": 137140 }, { "epoch": 5.907740018090193, "learning_rate": 6.679954319205326e-07, "loss": 3.1501, "step": 137160 }, { "epoch": 5.90860145582978, "learning_rate": 6.679469499450537e-07, "loss": 3.1245, "step": 137180 }, { "epoch": 5.909462893569367, "learning_rate": 6.678984679695748e-07, "loss": 2.8896, "step": 137200 }, { "epoch": 5.910324331308955, "learning_rate": 6.67849985994096e-07, "loss": 2.9712, "step": 137220 }, { "epoch": 5.911185769048542, "learning_rate": 6.678015040186171e-07, "loss": 3.2945, "step": 137240 }, { "epoch": 5.912047206788129, "learning_rate": 6.677530220431382e-07, "loss": 3.28, "step": 137260 }, { "epoch": 5.912908644527716, "learning_rate": 6.677045400676593e-07, "loss": 3.2057, "step": 137280 }, { "epoch": 5.9137700822673045, "learning_rate": 6.676560580921803e-07, "loss": 3.1788, "step": 137300 }, { "epoch": 5.914631520006892, "learning_rate": 6.676075761167014e-07, "loss": 3.2869, "step": 137320 }, { "epoch": 5.915492957746479, "learning_rate": 6.675590941412225e-07, "loss": 2.9659, "step": 137340 }, { "epoch": 5.916354395486066, "learning_rate": 6.675106121657436e-07, "loss": 3.1462, "step": 137360 }, { "epoch": 5.9172158332256535, "learning_rate": 6.674621301902648e-07, "loss": 3.0528, "step": 137380 }, { "epoch": 5.918077270965241, "learning_rate": 6.674136482147859e-07, "loss": 3.0271, "step": 137400 }, { "epoch": 5.918938708704829, "learning_rate": 6.673651662393069e-07, "loss": 2.9471, "step": 137420 }, { "epoch": 5.919800146444416, "learning_rate": 6.673166842638281e-07, "loss": 3.2837, "step": 137440 }, { "epoch": 5.920661584184003, "learning_rate": 6.672682022883492e-07, "loss": 3.013, "step": 137460 }, { "epoch": 5.921523021923591, "learning_rate": 6.672197203128703e-07, "loss": 3.1329, "step": 137480 }, { "epoch": 5.922384459663178, "learning_rate": 6.671712383373914e-07, "loss": 3.2816, "step": 137500 }, { "epoch": 5.923245897402765, "learning_rate": 6.671227563619126e-07, "loss": 3.0798, "step": 137520 }, { "epoch": 5.924107335142352, "learning_rate": 6.670742743864337e-07, "loss": 3.1757, "step": 137540 }, { "epoch": 5.92496877288194, "learning_rate": 6.670257924109547e-07, "loss": 3.1536, "step": 137560 }, { "epoch": 5.925830210621528, "learning_rate": 6.669773104354758e-07, "loss": 3.1504, "step": 137580 }, { "epoch": 5.926691648361115, "learning_rate": 6.66928828459997e-07, "loss": 3.0666, "step": 137600 }, { "epoch": 5.927553086100702, "learning_rate": 6.668803464845181e-07, "loss": 3.0226, "step": 137620 }, { "epoch": 5.928414523840289, "learning_rate": 6.668318645090391e-07, "loss": 3.0879, "step": 137640 }, { "epoch": 5.929275961579877, "learning_rate": 6.667833825335603e-07, "loss": 3.1144, "step": 137660 }, { "epoch": 5.930137399319464, "learning_rate": 6.667349005580814e-07, "loss": 3.176, "step": 137680 }, { "epoch": 5.930998837059051, "learning_rate": 6.666864185826025e-07, "loss": 3.203, "step": 137700 }, { "epoch": 5.931860274798639, "learning_rate": 6.666379366071235e-07, "loss": 3.1169, "step": 137720 }, { "epoch": 5.9327217125382266, "learning_rate": 6.665894546316447e-07, "loss": 3.2139, "step": 137740 }, { "epoch": 5.933583150277814, "learning_rate": 6.665409726561659e-07, "loss": 3.2804, "step": 137760 }, { "epoch": 5.934444588017401, "learning_rate": 6.66492490680687e-07, "loss": 3.1852, "step": 137780 }, { "epoch": 5.935306025756988, "learning_rate": 6.664440087052079e-07, "loss": 3.035, "step": 137800 }, { "epoch": 5.9361674634965755, "learning_rate": 6.663955267297291e-07, "loss": 3.0937, "step": 137820 }, { "epoch": 5.937028901236163, "learning_rate": 6.663470447542502e-07, "loss": 3.0263, "step": 137840 }, { "epoch": 5.937890338975751, "learning_rate": 6.662985627787713e-07, "loss": 3.3591, "step": 137860 }, { "epoch": 5.938751776715338, "learning_rate": 6.662500808032924e-07, "loss": 3.0939, "step": 137880 }, { "epoch": 5.939613214454925, "learning_rate": 6.662015988278136e-07, "loss": 3.0609, "step": 137900 }, { "epoch": 5.940474652194513, "learning_rate": 6.661531168523347e-07, "loss": 3.2201, "step": 137920 }, { "epoch": 5.9413360899341, "learning_rate": 6.661046348768556e-07, "loss": 3.314, "step": 137940 }, { "epoch": 5.942197527673687, "learning_rate": 6.660561529013768e-07, "loss": 3.0459, "step": 137960 }, { "epoch": 5.943058965413274, "learning_rate": 6.66007670925898e-07, "loss": 3.191, "step": 137980 }, { "epoch": 5.9439204031528625, "learning_rate": 6.659591889504191e-07, "loss": 3.4345, "step": 138000 }, { "epoch": 5.94478184089245, "learning_rate": 6.659107069749401e-07, "loss": 2.9941, "step": 138020 }, { "epoch": 5.945643278632037, "learning_rate": 6.658622249994613e-07, "loss": 3.1053, "step": 138040 }, { "epoch": 5.946504716371624, "learning_rate": 6.658137430239824e-07, "loss": 3.0454, "step": 138060 }, { "epoch": 5.9473661541112115, "learning_rate": 6.657652610485035e-07, "loss": 3.05, "step": 138080 }, { "epoch": 5.948227591850799, "learning_rate": 6.657167790730245e-07, "loss": 3.1033, "step": 138100 }, { "epoch": 5.949089029590386, "learning_rate": 6.656682970975457e-07, "loss": 2.9416, "step": 138120 }, { "epoch": 5.949950467329973, "learning_rate": 6.656198151220669e-07, "loss": 3.1685, "step": 138140 }, { "epoch": 5.950811905069561, "learning_rate": 6.65571333146588e-07, "loss": 3.1556, "step": 138160 }, { "epoch": 5.951673342809149, "learning_rate": 6.65522851171109e-07, "loss": 3.2782, "step": 138180 }, { "epoch": 5.952534780548736, "learning_rate": 6.654743691956302e-07, "loss": 3.0903, "step": 138200 }, { "epoch": 5.953396218288323, "learning_rate": 6.654258872201513e-07, "loss": 3.2313, "step": 138220 }, { "epoch": 5.95425765602791, "learning_rate": 6.653774052446724e-07, "loss": 3.079, "step": 138240 }, { "epoch": 5.955119093767498, "learning_rate": 6.653289232691934e-07, "loss": 3.082, "step": 138260 }, { "epoch": 5.955980531507086, "learning_rate": 6.652804412937146e-07, "loss": 3.1584, "step": 138280 }, { "epoch": 5.956841969246673, "learning_rate": 6.652319593182357e-07, "loss": 3.1979, "step": 138300 }, { "epoch": 5.95770340698626, "learning_rate": 6.651834773427567e-07, "loss": 3.1629, "step": 138320 }, { "epoch": 5.9585648447258475, "learning_rate": 6.651349953672778e-07, "loss": 3.0764, "step": 138340 }, { "epoch": 5.959426282465435, "learning_rate": 6.65086513391799e-07, "loss": 3.0662, "step": 138360 }, { "epoch": 5.960287720205022, "learning_rate": 6.650380314163201e-07, "loss": 3.3012, "step": 138380 }, { "epoch": 5.961149157944609, "learning_rate": 6.649895494408411e-07, "loss": 3.1651, "step": 138400 }, { "epoch": 5.9620105956841964, "learning_rate": 6.649410674653623e-07, "loss": 3.2542, "step": 138420 }, { "epoch": 5.962872033423785, "learning_rate": 6.648925854898834e-07, "loss": 2.9693, "step": 138440 }, { "epoch": 5.963733471163372, "learning_rate": 6.648441035144045e-07, "loss": 3.0111, "step": 138460 }, { "epoch": 5.964594908902959, "learning_rate": 6.647956215389255e-07, "loss": 3.1406, "step": 138480 }, { "epoch": 5.965456346642546, "learning_rate": 6.647471395634467e-07, "loss": 3.0596, "step": 138500 }, { "epoch": 5.966317784382134, "learning_rate": 6.646986575879679e-07, "loss": 3.0972, "step": 138520 }, { "epoch": 5.967179222121721, "learning_rate": 6.64650175612489e-07, "loss": 3.3605, "step": 138540 }, { "epoch": 5.968040659861309, "learning_rate": 6.6460169363701e-07, "loss": 3.0885, "step": 138560 }, { "epoch": 5.968902097600896, "learning_rate": 6.645532116615311e-07, "loss": 3.2058, "step": 138580 }, { "epoch": 5.969763535340483, "learning_rate": 6.645047296860523e-07, "loss": 3.2432, "step": 138600 }, { "epoch": 5.970624973080071, "learning_rate": 6.644562477105732e-07, "loss": 3.0379, "step": 138620 }, { "epoch": 5.971486410819658, "learning_rate": 6.644077657350944e-07, "loss": 3.1696, "step": 138640 }, { "epoch": 5.972347848559245, "learning_rate": 6.643592837596156e-07, "loss": 3.1055, "step": 138660 }, { "epoch": 5.973209286298832, "learning_rate": 6.643108017841368e-07, "loss": 3.1759, "step": 138680 }, { "epoch": 5.97407072403842, "learning_rate": 6.642623198086577e-07, "loss": 3.0362, "step": 138700 }, { "epoch": 5.974932161778008, "learning_rate": 6.642138378331788e-07, "loss": 2.9943, "step": 138720 }, { "epoch": 5.975793599517595, "learning_rate": 6.641653558577e-07, "loss": 3.248, "step": 138740 }, { "epoch": 5.976655037257182, "learning_rate": 6.641168738822212e-07, "loss": 3.0353, "step": 138760 }, { "epoch": 5.9775164749967695, "learning_rate": 6.640683919067422e-07, "loss": 2.9982, "step": 138780 }, { "epoch": 5.978377912736357, "learning_rate": 6.640199099312633e-07, "loss": 3.18, "step": 138800 }, { "epoch": 5.979239350475944, "learning_rate": 6.639714279557844e-07, "loss": 3.1225, "step": 138820 }, { "epoch": 5.980100788215532, "learning_rate": 6.639229459803056e-07, "loss": 3.0631, "step": 138840 }, { "epoch": 5.980962225955119, "learning_rate": 6.638744640048266e-07, "loss": 3.1189, "step": 138860 }, { "epoch": 5.981823663694707, "learning_rate": 6.638259820293477e-07, "loss": 3.0192, "step": 138880 }, { "epoch": 5.982685101434294, "learning_rate": 6.637775000538689e-07, "loss": 3.21, "step": 138900 }, { "epoch": 5.983546539173881, "learning_rate": 6.6372901807839e-07, "loss": 3.3042, "step": 138920 }, { "epoch": 5.984407976913468, "learning_rate": 6.63680536102911e-07, "loss": 3.2292, "step": 138940 }, { "epoch": 5.985269414653056, "learning_rate": 6.636320541274321e-07, "loss": 3.1658, "step": 138960 }, { "epoch": 5.986130852392643, "learning_rate": 6.635835721519533e-07, "loss": 3.2374, "step": 138980 }, { "epoch": 5.986992290132231, "learning_rate": 6.635350901764743e-07, "loss": 3.0771, "step": 139000 }, { "epoch": 5.987853727871818, "learning_rate": 6.634866082009954e-07, "loss": 3.0023, "step": 139020 }, { "epoch": 5.9887151656114055, "learning_rate": 6.634381262255166e-07, "loss": 3.1219, "step": 139040 }, { "epoch": 5.989576603350993, "learning_rate": 6.633896442500378e-07, "loss": 3.0883, "step": 139060 }, { "epoch": 5.99043804109058, "learning_rate": 6.633411622745587e-07, "loss": 3.1024, "step": 139080 }, { "epoch": 5.991299478830167, "learning_rate": 6.632926802990798e-07, "loss": 3.1098, "step": 139100 }, { "epoch": 5.992160916569755, "learning_rate": 6.63244198323601e-07, "loss": 3.1583, "step": 139120 }, { "epoch": 5.993022354309343, "learning_rate": 6.63195716348122e-07, "loss": 3.2578, "step": 139140 }, { "epoch": 5.99388379204893, "learning_rate": 6.631472343726432e-07, "loss": 2.9869, "step": 139160 }, { "epoch": 5.994745229788517, "learning_rate": 6.630987523971643e-07, "loss": 3.0814, "step": 139180 }, { "epoch": 5.995606667528104, "learning_rate": 6.630502704216854e-07, "loss": 3.0635, "step": 139200 }, { "epoch": 5.996468105267692, "learning_rate": 6.630017884462065e-07, "loss": 3.1107, "step": 139220 }, { "epoch": 5.997329543007279, "learning_rate": 6.629533064707276e-07, "loss": 2.8631, "step": 139240 }, { "epoch": 5.998190980746866, "learning_rate": 6.629048244952487e-07, "loss": 3.101, "step": 139260 }, { "epoch": 5.999052418486454, "learning_rate": 6.628563425197699e-07, "loss": 3.1452, "step": 139280 }, { "epoch": 5.9999138562260415, "learning_rate": 6.62807860544291e-07, "loss": 3.2317, "step": 139300 }, { "epoch": 6.000775293965629, "learning_rate": 6.627593785688121e-07, "loss": 3.1723, "step": 139320 }, { "epoch": 6.001636731705216, "learning_rate": 6.627108965933331e-07, "loss": 3.0666, "step": 139340 }, { "epoch": 6.002498169444803, "learning_rate": 6.626624146178543e-07, "loss": 3.1272, "step": 139360 }, { "epoch": 6.00335960718439, "learning_rate": 6.626139326423754e-07, "loss": 3.2, "step": 139380 }, { "epoch": 6.004221044923978, "learning_rate": 6.625654506668965e-07, "loss": 3.1094, "step": 139400 }, { "epoch": 6.005082482663566, "learning_rate": 6.625169686914176e-07, "loss": 3.1583, "step": 139420 }, { "epoch": 6.005943920403153, "learning_rate": 6.624684867159388e-07, "loss": 3.0964, "step": 139440 }, { "epoch": 6.00680535814274, "learning_rate": 6.624200047404596e-07, "loss": 3.1498, "step": 139460 }, { "epoch": 6.0076667958823275, "learning_rate": 6.623715227649808e-07, "loss": 3.2131, "step": 139480 }, { "epoch": 6.008528233621915, "learning_rate": 6.62323040789502e-07, "loss": 3.0729, "step": 139500 }, { "epoch": 6.009389671361502, "learning_rate": 6.622745588140232e-07, "loss": 3.1449, "step": 139520 }, { "epoch": 6.010251109101089, "learning_rate": 6.622260768385442e-07, "loss": 3.2453, "step": 139540 }, { "epoch": 6.011112546840677, "learning_rate": 6.621775948630654e-07, "loss": 3.0347, "step": 139560 }, { "epoch": 6.011973984580265, "learning_rate": 6.621291128875864e-07, "loss": 3.2111, "step": 139580 }, { "epoch": 6.012835422319852, "learning_rate": 6.620806309121075e-07, "loss": 3.1421, "step": 139600 }, { "epoch": 6.013696860059439, "learning_rate": 6.620321489366286e-07, "loss": 3.0602, "step": 139620 }, { "epoch": 6.014558297799026, "learning_rate": 6.619836669611497e-07, "loss": 3.1771, "step": 139640 }, { "epoch": 6.015419735538614, "learning_rate": 6.619351849856709e-07, "loss": 3.0985, "step": 139660 }, { "epoch": 6.016281173278201, "learning_rate": 6.61886703010192e-07, "loss": 3.1825, "step": 139680 }, { "epoch": 6.017142611017789, "learning_rate": 6.618382210347131e-07, "loss": 3.0863, "step": 139700 }, { "epoch": 6.018004048757376, "learning_rate": 6.617897390592341e-07, "loss": 3.2641, "step": 139720 }, { "epoch": 6.0188654864969635, "learning_rate": 6.617412570837553e-07, "loss": 3.0288, "step": 139740 }, { "epoch": 6.019726924236551, "learning_rate": 6.616927751082764e-07, "loss": 3.2157, "step": 139760 }, { "epoch": 6.020588361976138, "learning_rate": 6.616442931327975e-07, "loss": 3.1409, "step": 139780 }, { "epoch": 6.021449799715725, "learning_rate": 6.615958111573186e-07, "loss": 3.2418, "step": 139800 }, { "epoch": 6.0223112374553125, "learning_rate": 6.615473291818398e-07, "loss": 3.2647, "step": 139820 }, { "epoch": 6.023172675194901, "learning_rate": 6.614988472063608e-07, "loss": 3.0625, "step": 139840 }, { "epoch": 6.024034112934488, "learning_rate": 6.614503652308819e-07, "loss": 3.004, "step": 139860 }, { "epoch": 6.024895550674075, "learning_rate": 6.61401883255403e-07, "loss": 3.1721, "step": 139880 }, { "epoch": 6.025756988413662, "learning_rate": 6.613534012799241e-07, "loss": 3.2279, "step": 139900 }, { "epoch": 6.02661842615325, "learning_rate": 6.613049193044453e-07, "loss": 3.2096, "step": 139920 }, { "epoch": 6.027479863892837, "learning_rate": 6.612564373289664e-07, "loss": 3.0461, "step": 139940 }, { "epoch": 6.028341301632424, "learning_rate": 6.612079553534875e-07, "loss": 3.0212, "step": 139960 }, { "epoch": 6.029202739372012, "learning_rate": 6.611594733780084e-07, "loss": 3.1434, "step": 139980 }, { "epoch": 6.0300641771115995, "learning_rate": 6.611109914025296e-07, "loss": 3.1837, "step": 140000 }, { "epoch": 6.030925614851187, "learning_rate": 6.610625094270507e-07, "loss": 3.1087, "step": 140020 }, { "epoch": 6.031787052590774, "learning_rate": 6.610140274515719e-07, "loss": 3.0756, "step": 140040 }, { "epoch": 6.032648490330361, "learning_rate": 6.60965545476093e-07, "loss": 3.2138, "step": 140060 }, { "epoch": 6.0335099280699485, "learning_rate": 6.609170635006141e-07, "loss": 3.1924, "step": 140080 }, { "epoch": 6.034371365809536, "learning_rate": 6.608685815251351e-07, "loss": 3.0428, "step": 140100 }, { "epoch": 6.035232803549124, "learning_rate": 6.608200995496562e-07, "loss": 3.163, "step": 140120 }, { "epoch": 6.036094241288711, "learning_rate": 6.607716175741774e-07, "loss": 3.1273, "step": 140140 }, { "epoch": 6.036955679028298, "learning_rate": 6.607231355986985e-07, "loss": 3.0538, "step": 140160 }, { "epoch": 6.037817116767886, "learning_rate": 6.606746536232196e-07, "loss": 3.0561, "step": 140180 }, { "epoch": 6.038678554507473, "learning_rate": 6.606261716477407e-07, "loss": 3.1734, "step": 140200 }, { "epoch": 6.03953999224706, "learning_rate": 6.605776896722618e-07, "loss": 2.9901, "step": 140220 }, { "epoch": 6.040401429986647, "learning_rate": 6.605292076967829e-07, "loss": 3.1236, "step": 140240 }, { "epoch": 6.041262867726235, "learning_rate": 6.60480725721304e-07, "loss": 3.0149, "step": 140260 }, { "epoch": 6.042124305465823, "learning_rate": 6.604322437458251e-07, "loss": 3.176, "step": 140280 }, { "epoch": 6.04298574320541, "learning_rate": 6.603837617703463e-07, "loss": 3.1077, "step": 140300 }, { "epoch": 6.043847180944997, "learning_rate": 6.603352797948674e-07, "loss": 3.1683, "step": 140320 }, { "epoch": 6.044708618684584, "learning_rate": 6.602867978193885e-07, "loss": 2.9746, "step": 140340 }, { "epoch": 6.045570056424172, "learning_rate": 6.602383158439095e-07, "loss": 3.1203, "step": 140360 }, { "epoch": 6.046431494163759, "learning_rate": 6.601898338684307e-07, "loss": 2.9771, "step": 140380 }, { "epoch": 6.047292931903347, "learning_rate": 6.601413518929517e-07, "loss": 3.1925, "step": 140400 }, { "epoch": 6.048154369642934, "learning_rate": 6.600928699174729e-07, "loss": 3.0159, "step": 140420 }, { "epoch": 6.0490158073825215, "learning_rate": 6.60044387941994e-07, "loss": 3.3095, "step": 140440 }, { "epoch": 6.049877245122109, "learning_rate": 6.599959059665152e-07, "loss": 3.0322, "step": 140460 }, { "epoch": 6.050738682861696, "learning_rate": 6.599474239910362e-07, "loss": 3.0358, "step": 140480 }, { "epoch": 6.051600120601283, "learning_rate": 6.598989420155572e-07, "loss": 3.2903, "step": 140500 }, { "epoch": 6.0524615583408705, "learning_rate": 6.598504600400784e-07, "loss": 3.1244, "step": 140520 }, { "epoch": 6.053322996080459, "learning_rate": 6.598019780645995e-07, "loss": 2.8873, "step": 140540 }, { "epoch": 6.054184433820046, "learning_rate": 6.597534960891206e-07, "loss": 3.2066, "step": 140560 }, { "epoch": 6.055045871559633, "learning_rate": 6.597050141136417e-07, "loss": 3.0456, "step": 140580 }, { "epoch": 6.05590730929922, "learning_rate": 6.596565321381628e-07, "loss": 3.1168, "step": 140600 }, { "epoch": 6.056768747038808, "learning_rate": 6.596080501626839e-07, "loss": 2.9427, "step": 140620 }, { "epoch": 6.057630184778395, "learning_rate": 6.59559568187205e-07, "loss": 3.2248, "step": 140640 }, { "epoch": 6.058491622517982, "learning_rate": 6.595110862117261e-07, "loss": 3.0325, "step": 140660 }, { "epoch": 6.05935306025757, "learning_rate": 6.594626042362473e-07, "loss": 3.111, "step": 140680 }, { "epoch": 6.0602144979971575, "learning_rate": 6.594141222607684e-07, "loss": 2.973, "step": 140700 }, { "epoch": 6.061075935736745, "learning_rate": 6.593656402852895e-07, "loss": 3.0337, "step": 140720 }, { "epoch": 6.061937373476332, "learning_rate": 6.593171583098105e-07, "loss": 3.1221, "step": 140740 }, { "epoch": 6.062798811215919, "learning_rate": 6.592686763343317e-07, "loss": 3.0322, "step": 140760 }, { "epoch": 6.0636602489555065, "learning_rate": 6.592201943588528e-07, "loss": 3.0169, "step": 140780 }, { "epoch": 6.064521686695094, "learning_rate": 6.591717123833738e-07, "loss": 3.0824, "step": 140800 }, { "epoch": 6.065383124434682, "learning_rate": 6.59123230407895e-07, "loss": 3.044, "step": 140820 }, { "epoch": 6.066244562174269, "learning_rate": 6.590747484324162e-07, "loss": 3.2366, "step": 140840 }, { "epoch": 6.067105999913856, "learning_rate": 6.590262664569372e-07, "loss": 3.0372, "step": 140860 }, { "epoch": 6.067967437653444, "learning_rate": 6.589777844814582e-07, "loss": 2.9702, "step": 140880 }, { "epoch": 6.068828875393031, "learning_rate": 6.589293025059794e-07, "loss": 3.0684, "step": 140900 }, { "epoch": 6.069690313132618, "learning_rate": 6.588808205305005e-07, "loss": 3.0935, "step": 140920 }, { "epoch": 6.070551750872205, "learning_rate": 6.588323385550217e-07, "loss": 3.1145, "step": 140940 }, { "epoch": 6.0714131886117935, "learning_rate": 6.587838565795427e-07, "loss": 3.124, "step": 140960 }, { "epoch": 6.072274626351381, "learning_rate": 6.587353746040638e-07, "loss": 3.1731, "step": 140980 }, { "epoch": 6.073136064090968, "learning_rate": 6.58686892628585e-07, "loss": 3.1348, "step": 141000 }, { "epoch": 6.073997501830555, "learning_rate": 6.586384106531061e-07, "loss": 3.1581, "step": 141020 }, { "epoch": 6.0748589395701424, "learning_rate": 6.585899286776271e-07, "loss": 3.1047, "step": 141040 }, { "epoch": 6.07572037730973, "learning_rate": 6.585414467021483e-07, "loss": 3.1277, "step": 141060 }, { "epoch": 6.076581815049317, "learning_rate": 6.584929647266694e-07, "loss": 3.2029, "step": 141080 }, { "epoch": 6.077443252788905, "learning_rate": 6.584444827511904e-07, "loss": 3.1204, "step": 141100 }, { "epoch": 6.078304690528492, "learning_rate": 6.583960007757115e-07, "loss": 3.0006, "step": 141120 }, { "epoch": 6.07916612826808, "learning_rate": 6.583475188002327e-07, "loss": 3.1516, "step": 141140 }, { "epoch": 6.080027566007667, "learning_rate": 6.582990368247538e-07, "loss": 3.2332, "step": 141160 }, { "epoch": 6.080889003747254, "learning_rate": 6.582505548492748e-07, "loss": 3.2021, "step": 141180 }, { "epoch": 6.081750441486841, "learning_rate": 6.58202072873796e-07, "loss": 2.9476, "step": 141200 }, { "epoch": 6.0826118792264285, "learning_rate": 6.581535908983172e-07, "loss": 2.869, "step": 141220 }, { "epoch": 6.083473316966017, "learning_rate": 6.58105108922838e-07, "loss": 3.0986, "step": 141240 }, { "epoch": 6.084334754705604, "learning_rate": 6.580566269473592e-07, "loss": 2.9666, "step": 141260 }, { "epoch": 6.085196192445191, "learning_rate": 6.580081449718804e-07, "loss": 3.0493, "step": 141280 }, { "epoch": 6.086057630184778, "learning_rate": 6.579596629964016e-07, "loss": 3.0523, "step": 141300 }, { "epoch": 6.086919067924366, "learning_rate": 6.579111810209227e-07, "loss": 3.1076, "step": 141320 }, { "epoch": 6.087780505663953, "learning_rate": 6.578626990454438e-07, "loss": 3.076, "step": 141340 }, { "epoch": 6.08864194340354, "learning_rate": 6.578142170699648e-07, "loss": 3.042, "step": 141360 }, { "epoch": 6.089503381143128, "learning_rate": 6.57765735094486e-07, "loss": 3.2608, "step": 141380 }, { "epoch": 6.0903648188827155, "learning_rate": 6.57717253119007e-07, "loss": 3.108, "step": 141400 }, { "epoch": 6.091226256622303, "learning_rate": 6.576687711435281e-07, "loss": 3.1995, "step": 141420 }, { "epoch": 6.09208769436189, "learning_rate": 6.576202891680493e-07, "loss": 3.091, "step": 141440 }, { "epoch": 6.092949132101477, "learning_rate": 6.575718071925705e-07, "loss": 3.0555, "step": 141460 }, { "epoch": 6.0938105698410645, "learning_rate": 6.575233252170915e-07, "loss": 3.1279, "step": 141480 }, { "epoch": 6.094672007580652, "learning_rate": 6.574748432416125e-07, "loss": 3.1466, "step": 141500 }, { "epoch": 6.09553344532024, "learning_rate": 6.574263612661337e-07, "loss": 3.1014, "step": 141520 }, { "epoch": 6.096394883059827, "learning_rate": 6.573778792906549e-07, "loss": 2.9234, "step": 141540 }, { "epoch": 6.097256320799414, "learning_rate": 6.573293973151759e-07, "loss": 3.1651, "step": 141560 }, { "epoch": 6.098117758539002, "learning_rate": 6.57280915339697e-07, "loss": 3.0746, "step": 141580 }, { "epoch": 6.098979196278589, "learning_rate": 6.572324333642182e-07, "loss": 3.0919, "step": 141600 }, { "epoch": 6.099840634018176, "learning_rate": 6.571839513887392e-07, "loss": 3.173, "step": 141620 }, { "epoch": 6.100702071757763, "learning_rate": 6.571354694132602e-07, "loss": 3.0691, "step": 141640 }, { "epoch": 6.1015635094973515, "learning_rate": 6.570869874377814e-07, "loss": 3.1636, "step": 141660 }, { "epoch": 6.102424947236939, "learning_rate": 6.570385054623026e-07, "loss": 3.1672, "step": 141680 }, { "epoch": 6.103286384976526, "learning_rate": 6.569900234868236e-07, "loss": 3.0015, "step": 141700 }, { "epoch": 6.104147822716113, "learning_rate": 6.569415415113447e-07, "loss": 2.9495, "step": 141720 }, { "epoch": 6.1050092604557005, "learning_rate": 6.568930595358659e-07, "loss": 3.0205, "step": 141740 }, { "epoch": 6.105870698195288, "learning_rate": 6.568445775603869e-07, "loss": 3.1471, "step": 141760 }, { "epoch": 6.106732135934875, "learning_rate": 6.56796095584908e-07, "loss": 2.9674, "step": 141780 }, { "epoch": 6.107593573674463, "learning_rate": 6.567476136094291e-07, "loss": 3.0847, "step": 141800 }, { "epoch": 6.10845501141405, "learning_rate": 6.566991316339503e-07, "loss": 3.0304, "step": 141820 }, { "epoch": 6.109316449153638, "learning_rate": 6.566506496584715e-07, "loss": 3.0879, "step": 141840 }, { "epoch": 6.110177886893225, "learning_rate": 6.566021676829925e-07, "loss": 3.0913, "step": 141860 }, { "epoch": 6.111039324632812, "learning_rate": 6.565536857075135e-07, "loss": 2.9659, "step": 141880 }, { "epoch": 6.111900762372399, "learning_rate": 6.565052037320347e-07, "loss": 3.1774, "step": 141900 }, { "epoch": 6.112762200111987, "learning_rate": 6.564567217565559e-07, "loss": 2.9349, "step": 141920 }, { "epoch": 6.113623637851575, "learning_rate": 6.564082397810769e-07, "loss": 3.1102, "step": 141940 }, { "epoch": 6.114485075591162, "learning_rate": 6.56359757805598e-07, "loss": 3.0541, "step": 141960 }, { "epoch": 6.115346513330749, "learning_rate": 6.563112758301192e-07, "loss": 3.1639, "step": 141980 }, { "epoch": 6.116207951070336, "learning_rate": 6.562627938546403e-07, "loss": 3.0711, "step": 142000 }, { "epoch": 6.117069388809924, "learning_rate": 6.562143118791613e-07, "loss": 3.3387, "step": 142020 }, { "epoch": 6.117930826549511, "learning_rate": 6.561658299036824e-07, "loss": 3.0099, "step": 142040 }, { "epoch": 6.118792264289098, "learning_rate": 6.561173479282036e-07, "loss": 3.1491, "step": 142060 }, { "epoch": 6.119653702028686, "learning_rate": 6.560688659527247e-07, "loss": 3.0026, "step": 142080 }, { "epoch": 6.1205151397682735, "learning_rate": 6.560203839772458e-07, "loss": 3.0704, "step": 142100 }, { "epoch": 6.121376577507861, "learning_rate": 6.559719020017669e-07, "loss": 3.1666, "step": 142120 }, { "epoch": 6.122238015247448, "learning_rate": 6.55923420026288e-07, "loss": 3.0388, "step": 142140 }, { "epoch": 6.123099452987035, "learning_rate": 6.55874938050809e-07, "loss": 3.1915, "step": 142160 }, { "epoch": 6.1239608907266225, "learning_rate": 6.558264560753301e-07, "loss": 3.1515, "step": 142180 }, { "epoch": 6.12482232846621, "learning_rate": 6.557779740998513e-07, "loss": 2.9918, "step": 142200 }, { "epoch": 6.125683766205798, "learning_rate": 6.557294921243725e-07, "loss": 2.9383, "step": 142220 }, { "epoch": 6.126545203945385, "learning_rate": 6.556810101488935e-07, "loss": 3.0703, "step": 142240 }, { "epoch": 6.127406641684972, "learning_rate": 6.556325281734145e-07, "loss": 3.0261, "step": 142260 }, { "epoch": 6.12826807942456, "learning_rate": 6.555840461979357e-07, "loss": 2.9771, "step": 142280 }, { "epoch": 6.129129517164147, "learning_rate": 6.555355642224569e-07, "loss": 3.1518, "step": 142300 }, { "epoch": 6.129990954903734, "learning_rate": 6.554870822469779e-07, "loss": 3.2171, "step": 142320 }, { "epoch": 6.130852392643321, "learning_rate": 6.55438600271499e-07, "loss": 3.105, "step": 142340 }, { "epoch": 6.131713830382909, "learning_rate": 6.553901182960202e-07, "loss": 3.0182, "step": 142360 }, { "epoch": 6.132575268122497, "learning_rate": 6.553416363205412e-07, "loss": 2.9401, "step": 142380 }, { "epoch": 6.133436705862084, "learning_rate": 6.552931543450623e-07, "loss": 2.9168, "step": 142400 }, { "epoch": 6.134298143601671, "learning_rate": 6.552446723695834e-07, "loss": 3.0665, "step": 142420 }, { "epoch": 6.1351595813412585, "learning_rate": 6.551961903941046e-07, "loss": 2.9681, "step": 142440 }, { "epoch": 6.136021019080846, "learning_rate": 6.551477084186257e-07, "loss": 3.1715, "step": 142460 }, { "epoch": 6.136882456820433, "learning_rate": 6.550992264431468e-07, "loss": 2.9768, "step": 142480 }, { "epoch": 6.137743894560021, "learning_rate": 6.550507444676679e-07, "loss": 3.0413, "step": 142500 }, { "epoch": 6.138605332299608, "learning_rate": 6.55002262492189e-07, "loss": 3.0386, "step": 142520 }, { "epoch": 6.139466770039196, "learning_rate": 6.549537805167101e-07, "loss": 3.2124, "step": 142540 }, { "epoch": 6.140328207778783, "learning_rate": 6.549052985412312e-07, "loss": 2.9274, "step": 142560 }, { "epoch": 6.14118964551837, "learning_rate": 6.548568165657523e-07, "loss": 3.0114, "step": 142580 }, { "epoch": 6.142051083257957, "learning_rate": 6.548083345902735e-07, "loss": 3.0412, "step": 142600 }, { "epoch": 6.142912520997545, "learning_rate": 6.547598526147946e-07, "loss": 3.1086, "step": 142620 }, { "epoch": 6.143773958737132, "learning_rate": 6.547113706393156e-07, "loss": 3.0649, "step": 142640 }, { "epoch": 6.14463539647672, "learning_rate": 6.546628886638367e-07, "loss": 3.1731, "step": 142660 }, { "epoch": 6.145496834216307, "learning_rate": 6.546144066883578e-07, "loss": 3.0079, "step": 142680 }, { "epoch": 6.1463582719558945, "learning_rate": 6.545659247128789e-07, "loss": 3.1038, "step": 142700 }, { "epoch": 6.147219709695482, "learning_rate": 6.545174427374e-07, "loss": 3.2172, "step": 142720 }, { "epoch": 6.148081147435069, "learning_rate": 6.544689607619212e-07, "loss": 3.005, "step": 142740 }, { "epoch": 6.148942585174656, "learning_rate": 6.544204787864422e-07, "loss": 2.9745, "step": 142760 }, { "epoch": 6.149804022914244, "learning_rate": 6.543719968109633e-07, "loss": 3.0308, "step": 142780 }, { "epoch": 6.150665460653832, "learning_rate": 6.543235148354844e-07, "loss": 3.214, "step": 142800 }, { "epoch": 6.151526898393419, "learning_rate": 6.542750328600056e-07, "loss": 3.0938, "step": 142820 }, { "epoch": 6.152388336133006, "learning_rate": 6.542265508845267e-07, "loss": 2.962, "step": 142840 }, { "epoch": 6.153249773872593, "learning_rate": 6.541780689090478e-07, "loss": 2.9912, "step": 142860 }, { "epoch": 6.1541112116121806, "learning_rate": 6.541295869335689e-07, "loss": 3.1085, "step": 142880 }, { "epoch": 6.154972649351768, "learning_rate": 6.5408110495809e-07, "loss": 2.9788, "step": 142900 }, { "epoch": 6.155834087091355, "learning_rate": 6.540326229826111e-07, "loss": 3.0205, "step": 142920 }, { "epoch": 6.156695524830943, "learning_rate": 6.539841410071322e-07, "loss": 2.9646, "step": 142940 }, { "epoch": 6.15755696257053, "learning_rate": 6.539356590316533e-07, "loss": 2.9958, "step": 142960 }, { "epoch": 6.158418400310118, "learning_rate": 6.538871770561744e-07, "loss": 3.1534, "step": 142980 }, { "epoch": 6.159279838049705, "learning_rate": 6.538386950806956e-07, "loss": 2.9997, "step": 143000 }, { "epoch": 6.160141275789292, "learning_rate": 6.537902131052165e-07, "loss": 3.0556, "step": 143020 }, { "epoch": 6.161002713528879, "learning_rate": 6.537417311297377e-07, "loss": 3.1091, "step": 143040 }, { "epoch": 6.161864151268467, "learning_rate": 6.536932491542588e-07, "loss": 2.9388, "step": 143060 }, { "epoch": 6.162725589008055, "learning_rate": 6.5364476717878e-07, "loss": 2.9674, "step": 143080 }, { "epoch": 6.163587026747642, "learning_rate": 6.535962852033011e-07, "loss": 2.9648, "step": 143100 }, { "epoch": 6.164448464487229, "learning_rate": 6.535478032278223e-07, "loss": 2.9091, "step": 143120 }, { "epoch": 6.1653099022268165, "learning_rate": 6.534993212523432e-07, "loss": 3.1276, "step": 143140 }, { "epoch": 6.166171339966404, "learning_rate": 6.534508392768644e-07, "loss": 2.9383, "step": 143160 }, { "epoch": 6.167032777705991, "learning_rate": 6.534023573013855e-07, "loss": 3.0959, "step": 143180 }, { "epoch": 6.167894215445578, "learning_rate": 6.533538753259066e-07, "loss": 3.158, "step": 143200 }, { "epoch": 6.168755653185166, "learning_rate": 6.533053933504277e-07, "loss": 3.1312, "step": 143220 }, { "epoch": 6.169617090924754, "learning_rate": 6.532569113749488e-07, "loss": 3.0724, "step": 143240 }, { "epoch": 6.170478528664341, "learning_rate": 6.532084293994699e-07, "loss": 3.0025, "step": 143260 }, { "epoch": 6.171339966403928, "learning_rate": 6.531599474239909e-07, "loss": 3.1661, "step": 143280 }, { "epoch": 6.172201404143515, "learning_rate": 6.531114654485121e-07, "loss": 3.0536, "step": 143300 }, { "epoch": 6.173062841883103, "learning_rate": 6.530629834730332e-07, "loss": 3.0641, "step": 143320 }, { "epoch": 6.17392427962269, "learning_rate": 6.530145014975543e-07, "loss": 3.0325, "step": 143340 }, { "epoch": 6.174785717362278, "learning_rate": 6.529660195220754e-07, "loss": 3.1385, "step": 143360 }, { "epoch": 6.175647155101865, "learning_rate": 6.529175375465966e-07, "loss": 3.095, "step": 143380 }, { "epoch": 6.1765085928414525, "learning_rate": 6.528690555711176e-07, "loss": 2.9911, "step": 143400 }, { "epoch": 6.17737003058104, "learning_rate": 6.528205735956387e-07, "loss": 2.9103, "step": 143420 }, { "epoch": 6.178231468320627, "learning_rate": 6.527720916201598e-07, "loss": 3.0496, "step": 143440 }, { "epoch": 6.179092906060214, "learning_rate": 6.52723609644681e-07, "loss": 3.0576, "step": 143460 }, { "epoch": 6.1799543437998015, "learning_rate": 6.526751276692021e-07, "loss": 3.0677, "step": 143480 }, { "epoch": 6.18081578153939, "learning_rate": 6.526266456937232e-07, "loss": 2.9543, "step": 143500 }, { "epoch": 6.181677219278977, "learning_rate": 6.525781637182443e-07, "loss": 3.073, "step": 143520 }, { "epoch": 6.182538657018564, "learning_rate": 6.525296817427653e-07, "loss": 3.0333, "step": 143540 }, { "epoch": 6.183400094758151, "learning_rate": 6.524811997672865e-07, "loss": 3.1605, "step": 143560 }, { "epoch": 6.184261532497739, "learning_rate": 6.524327177918075e-07, "loss": 3.1581, "step": 143580 }, { "epoch": 6.185122970237326, "learning_rate": 6.523842358163287e-07, "loss": 3.0362, "step": 143600 }, { "epoch": 6.185984407976913, "learning_rate": 6.523357538408499e-07, "loss": 2.978, "step": 143620 }, { "epoch": 6.186845845716501, "learning_rate": 6.52287271865371e-07, "loss": 2.9823, "step": 143640 }, { "epoch": 6.1877072834560884, "learning_rate": 6.522387898898919e-07, "loss": 3.1352, "step": 143660 }, { "epoch": 6.188568721195676, "learning_rate": 6.521903079144131e-07, "loss": 3.0386, "step": 143680 }, { "epoch": 6.189430158935263, "learning_rate": 6.521418259389343e-07, "loss": 3.034, "step": 143700 }, { "epoch": 6.19029159667485, "learning_rate": 6.520933439634554e-07, "loss": 2.9932, "step": 143720 }, { "epoch": 6.191153034414437, "learning_rate": 6.520448619879764e-07, "loss": 2.9454, "step": 143740 }, { "epoch": 6.192014472154025, "learning_rate": 6.519963800124976e-07, "loss": 3.0561, "step": 143760 }, { "epoch": 6.192875909893613, "learning_rate": 6.519478980370186e-07, "loss": 3.1347, "step": 143780 }, { "epoch": 6.1937373476332, "learning_rate": 6.518994160615397e-07, "loss": 3.073, "step": 143800 }, { "epoch": 6.194598785372787, "learning_rate": 6.518509340860608e-07, "loss": 2.997, "step": 143820 }, { "epoch": 6.1954602231123745, "learning_rate": 6.51802452110582e-07, "loss": 3.0257, "step": 143840 }, { "epoch": 6.196321660851962, "learning_rate": 6.517539701351031e-07, "loss": 2.9795, "step": 143860 }, { "epoch": 6.197183098591549, "learning_rate": 6.517054881596241e-07, "loss": 3.113, "step": 143880 }, { "epoch": 6.198044536331136, "learning_rate": 6.516570061841453e-07, "loss": 3.1734, "step": 143900 }, { "epoch": 6.198905974070724, "learning_rate": 6.516085242086664e-07, "loss": 2.9414, "step": 143920 }, { "epoch": 6.199767411810312, "learning_rate": 6.515600422331875e-07, "loss": 3.2025, "step": 143940 }, { "epoch": 6.200628849549899, "learning_rate": 6.515115602577085e-07, "loss": 3.0881, "step": 143960 }, { "epoch": 6.201490287289486, "learning_rate": 6.514630782822297e-07, "loss": 2.9911, "step": 143980 }, { "epoch": 6.202351725029073, "learning_rate": 6.514145963067509e-07, "loss": 3.0289, "step": 144000 }, { "epoch": 6.203213162768661, "learning_rate": 6.51366114331272e-07, "loss": 3.0679, "step": 144020 }, { "epoch": 6.204074600508248, "learning_rate": 6.513176323557929e-07, "loss": 3.0797, "step": 144040 }, { "epoch": 6.204936038247836, "learning_rate": 6.512691503803141e-07, "loss": 3.055, "step": 144060 }, { "epoch": 6.205797475987423, "learning_rate": 6.512206684048353e-07, "loss": 2.9941, "step": 144080 }, { "epoch": 6.2066589137270105, "learning_rate": 6.511721864293564e-07, "loss": 3.17, "step": 144100 }, { "epoch": 6.207520351466598, "learning_rate": 6.511237044538774e-07, "loss": 2.9849, "step": 144120 }, { "epoch": 6.208381789206185, "learning_rate": 6.510752224783986e-07, "loss": 3.0946, "step": 144140 }, { "epoch": 6.209243226945772, "learning_rate": 6.510267405029197e-07, "loss": 3.0971, "step": 144160 }, { "epoch": 6.2101046646853595, "learning_rate": 6.509782585274407e-07, "loss": 3.1411, "step": 144180 }, { "epoch": 6.210966102424948, "learning_rate": 6.509297765519618e-07, "loss": 3.0213, "step": 144200 }, { "epoch": 6.211827540164535, "learning_rate": 6.50881294576483e-07, "loss": 3.0573, "step": 144220 }, { "epoch": 6.212688977904122, "learning_rate": 6.508328126010042e-07, "loss": 3.2744, "step": 144240 }, { "epoch": 6.213550415643709, "learning_rate": 6.507843306255252e-07, "loss": 2.903, "step": 144260 }, { "epoch": 6.214411853383297, "learning_rate": 6.507358486500463e-07, "loss": 3.0774, "step": 144280 }, { "epoch": 6.215273291122884, "learning_rate": 6.506873666745674e-07, "loss": 3.0651, "step": 144300 }, { "epoch": 6.216134728862471, "learning_rate": 6.506388846990885e-07, "loss": 2.9615, "step": 144320 }, { "epoch": 6.216996166602059, "learning_rate": 6.505904027236095e-07, "loss": 2.9431, "step": 144340 }, { "epoch": 6.2178576043416465, "learning_rate": 6.505419207481307e-07, "loss": 3.1006, "step": 144360 }, { "epoch": 6.218719042081234, "learning_rate": 6.504934387726519e-07, "loss": 3.061, "step": 144380 }, { "epoch": 6.219580479820821, "learning_rate": 6.50444956797173e-07, "loss": 3.2328, "step": 144400 }, { "epoch": 6.220441917560408, "learning_rate": 6.503964748216939e-07, "loss": 3.1352, "step": 144420 }, { "epoch": 6.2213033552999955, "learning_rate": 6.503479928462151e-07, "loss": 2.96, "step": 144440 }, { "epoch": 6.222164793039583, "learning_rate": 6.502995108707363e-07, "loss": 2.9712, "step": 144460 }, { "epoch": 6.223026230779171, "learning_rate": 6.502510288952574e-07, "loss": 3.1767, "step": 144480 }, { "epoch": 6.223887668518758, "learning_rate": 6.502025469197784e-07, "loss": 3.0188, "step": 144500 }, { "epoch": 6.224749106258345, "learning_rate": 6.501540649442996e-07, "loss": 2.9544, "step": 144520 }, { "epoch": 6.225610543997933, "learning_rate": 6.501055829688207e-07, "loss": 2.9634, "step": 144540 }, { "epoch": 6.22647198173752, "learning_rate": 6.500571009933417e-07, "loss": 3.016, "step": 144560 }, { "epoch": 6.227333419477107, "learning_rate": 6.500086190178628e-07, "loss": 3.1407, "step": 144580 }, { "epoch": 6.228194857216694, "learning_rate": 6.49960137042384e-07, "loss": 3.0797, "step": 144600 }, { "epoch": 6.229056294956282, "learning_rate": 6.499116550669052e-07, "loss": 3.2018, "step": 144620 }, { "epoch": 6.22991773269587, "learning_rate": 6.498631730914262e-07, "loss": 3.1486, "step": 144640 }, { "epoch": 6.230779170435457, "learning_rate": 6.498146911159473e-07, "loss": 2.9957, "step": 144660 }, { "epoch": 6.231640608175044, "learning_rate": 6.497662091404684e-07, "loss": 3.1289, "step": 144680 }, { "epoch": 6.232502045914631, "learning_rate": 6.497177271649896e-07, "loss": 3.2294, "step": 144700 }, { "epoch": 6.233363483654219, "learning_rate": 6.496692451895106e-07, "loss": 3.0735, "step": 144720 }, { "epoch": 6.234224921393806, "learning_rate": 6.496207632140317e-07, "loss": 3.1478, "step": 144740 }, { "epoch": 6.235086359133394, "learning_rate": 6.495722812385529e-07, "loss": 3.0473, "step": 144760 }, { "epoch": 6.235947796872981, "learning_rate": 6.495237992630741e-07, "loss": 3.3, "step": 144780 }, { "epoch": 6.2368092346125685, "learning_rate": 6.494753172875949e-07, "loss": 3.0556, "step": 144800 }, { "epoch": 6.237670672352156, "learning_rate": 6.494268353121161e-07, "loss": 3.0405, "step": 144820 }, { "epoch": 6.238532110091743, "learning_rate": 6.493783533366373e-07, "loss": 3.0471, "step": 144840 }, { "epoch": 6.23939354783133, "learning_rate": 6.493298713611583e-07, "loss": 3.073, "step": 144860 }, { "epoch": 6.2402549855709175, "learning_rate": 6.492813893856794e-07, "loss": 3.1403, "step": 144880 }, { "epoch": 6.241116423310506, "learning_rate": 6.492329074102007e-07, "loss": 3.1593, "step": 144900 }, { "epoch": 6.241977861050093, "learning_rate": 6.491844254347217e-07, "loss": 3.0036, "step": 144920 }, { "epoch": 6.24283929878968, "learning_rate": 6.491359434592427e-07, "loss": 3.0493, "step": 144940 }, { "epoch": 6.243700736529267, "learning_rate": 6.490874614837638e-07, "loss": 2.9428, "step": 144960 }, { "epoch": 6.244562174268855, "learning_rate": 6.49038979508285e-07, "loss": 2.8886, "step": 144980 }, { "epoch": 6.245423612008442, "learning_rate": 6.489904975328062e-07, "loss": 2.9753, "step": 145000 }, { "epoch": 6.246285049748029, "learning_rate": 6.489420155573272e-07, "loss": 3.0476, "step": 145020 }, { "epoch": 6.247146487487617, "learning_rate": 6.488935335818483e-07, "loss": 3.0522, "step": 145040 }, { "epoch": 6.2480079252272045, "learning_rate": 6.488450516063694e-07, "loss": 2.9302, "step": 145060 }, { "epoch": 6.248869362966792, "learning_rate": 6.487965696308906e-07, "loss": 3.182, "step": 145080 }, { "epoch": 6.249730800706379, "learning_rate": 6.487480876554116e-07, "loss": 3.1149, "step": 145100 }, { "epoch": 6.250592238445966, "learning_rate": 6.486996056799327e-07, "loss": 3.1622, "step": 145120 }, { "epoch": 6.2514536761855535, "learning_rate": 6.486511237044539e-07, "loss": 3.0016, "step": 145140 }, { "epoch": 6.252315113925141, "learning_rate": 6.48602641728975e-07, "loss": 3.0787, "step": 145160 }, { "epoch": 6.253176551664728, "learning_rate": 6.48554159753496e-07, "loss": 3.0271, "step": 145180 }, { "epoch": 6.254037989404316, "learning_rate": 6.485056777780171e-07, "loss": 3.0857, "step": 145200 }, { "epoch": 6.254899427143903, "learning_rate": 6.484571958025383e-07, "loss": 2.9853, "step": 145220 }, { "epoch": 6.255760864883491, "learning_rate": 6.484087138270594e-07, "loss": 3.0568, "step": 145240 }, { "epoch": 6.256622302623078, "learning_rate": 6.483602318515805e-07, "loss": 2.8701, "step": 145260 }, { "epoch": 6.257483740362665, "learning_rate": 6.483117498761016e-07, "loss": 3.1234, "step": 145280 }, { "epoch": 6.258345178102252, "learning_rate": 6.482632679006228e-07, "loss": 3.0076, "step": 145300 }, { "epoch": 6.2592066158418405, "learning_rate": 6.482147859251437e-07, "loss": 3.0725, "step": 145320 }, { "epoch": 6.260068053581428, "learning_rate": 6.481663039496649e-07, "loss": 3.0528, "step": 145340 }, { "epoch": 6.260929491321015, "learning_rate": 6.48117821974186e-07, "loss": 3.081, "step": 145360 }, { "epoch": 6.261790929060602, "learning_rate": 6.480693399987072e-07, "loss": 3.1135, "step": 145380 }, { "epoch": 6.262652366800189, "learning_rate": 6.480208580232282e-07, "loss": 3.206, "step": 145400 }, { "epoch": 6.263513804539777, "learning_rate": 6.479723760477493e-07, "loss": 3.148, "step": 145420 }, { "epoch": 6.264375242279364, "learning_rate": 6.479238940722704e-07, "loss": 3.1843, "step": 145440 }, { "epoch": 6.265236680018951, "learning_rate": 6.478754120967915e-07, "loss": 3.1198, "step": 145460 }, { "epoch": 6.266098117758539, "learning_rate": 6.478269301213126e-07, "loss": 3.1223, "step": 145480 }, { "epoch": 6.2669595554981266, "learning_rate": 6.477784481458337e-07, "loss": 3.2059, "step": 145500 }, { "epoch": 6.267820993237714, "learning_rate": 6.477299661703549e-07, "loss": 3.0883, "step": 145520 }, { "epoch": 6.268682430977301, "learning_rate": 6.47681484194876e-07, "loss": 2.9758, "step": 145540 }, { "epoch": 6.269543868716888, "learning_rate": 6.47633002219397e-07, "loss": 3.0687, "step": 145560 }, { "epoch": 6.2704053064564755, "learning_rate": 6.475845202439181e-07, "loss": 2.9901, "step": 145580 }, { "epoch": 6.271266744196064, "learning_rate": 6.475360382684393e-07, "loss": 2.9865, "step": 145600 }, { "epoch": 6.272128181935651, "learning_rate": 6.474875562929604e-07, "loss": 3.0191, "step": 145620 }, { "epoch": 6.272989619675238, "learning_rate": 6.474390743174815e-07, "loss": 3.1671, "step": 145640 }, { "epoch": 6.273851057414825, "learning_rate": 6.473905923420026e-07, "loss": 2.9921, "step": 145660 }, { "epoch": 6.274712495154413, "learning_rate": 6.473421103665238e-07, "loss": 3.2051, "step": 145680 }, { "epoch": 6.275573932894, "learning_rate": 6.472936283910448e-07, "loss": 3.1568, "step": 145700 }, { "epoch": 6.276435370633587, "learning_rate": 6.472451464155659e-07, "loss": 3.1747, "step": 145720 }, { "epoch": 6.277296808373174, "learning_rate": 6.47196664440087e-07, "loss": 3.0205, "step": 145740 }, { "epoch": 6.2781582461127625, "learning_rate": 6.471481824646082e-07, "loss": 2.9759, "step": 145760 }, { "epoch": 6.27901968385235, "learning_rate": 6.470997004891293e-07, "loss": 3.0396, "step": 145780 }, { "epoch": 6.279881121591937, "learning_rate": 6.470512185136504e-07, "loss": 3.0091, "step": 145800 }, { "epoch": 6.280742559331524, "learning_rate": 6.470027365381714e-07, "loss": 3.0232, "step": 145820 }, { "epoch": 6.2816039970711115, "learning_rate": 6.469542545626925e-07, "loss": 3.0821, "step": 145840 }, { "epoch": 6.282465434810699, "learning_rate": 6.469057725872137e-07, "loss": 3.0388, "step": 145860 }, { "epoch": 6.283326872550287, "learning_rate": 6.468572906117348e-07, "loss": 3.0643, "step": 145880 }, { "epoch": 6.284188310289874, "learning_rate": 6.468088086362559e-07, "loss": 3.0133, "step": 145900 }, { "epoch": 6.285049748029461, "learning_rate": 6.46760326660777e-07, "loss": 3.0982, "step": 145920 }, { "epoch": 6.285911185769049, "learning_rate": 6.46711844685298e-07, "loss": 3.1341, "step": 145940 }, { "epoch": 6.286772623508636, "learning_rate": 6.466633627098191e-07, "loss": 3.0694, "step": 145960 }, { "epoch": 6.287634061248223, "learning_rate": 6.466148807343403e-07, "loss": 3.0871, "step": 145980 }, { "epoch": 6.28849549898781, "learning_rate": 6.465663987588614e-07, "loss": 3.0118, "step": 146000 }, { "epoch": 6.289356936727398, "learning_rate": 6.465179167833825e-07, "loss": 2.97, "step": 146020 }, { "epoch": 6.290218374466986, "learning_rate": 6.464694348079036e-07, "loss": 3.0341, "step": 146040 }, { "epoch": 6.291079812206573, "learning_rate": 6.464209528324248e-07, "loss": 3.1868, "step": 146060 }, { "epoch": 6.29194124994616, "learning_rate": 6.463724708569458e-07, "loss": 3.146, "step": 146080 }, { "epoch": 6.2928026876857475, "learning_rate": 6.463239888814669e-07, "loss": 3.1331, "step": 146100 }, { "epoch": 6.293664125425335, "learning_rate": 6.46275506905988e-07, "loss": 3.0846, "step": 146120 }, { "epoch": 6.294525563164922, "learning_rate": 6.462270249305091e-07, "loss": 3.0856, "step": 146140 }, { "epoch": 6.29538700090451, "learning_rate": 6.461785429550303e-07, "loss": 3.0674, "step": 146160 }, { "epoch": 6.296248438644097, "learning_rate": 6.461300609795514e-07, "loss": 2.8756, "step": 146180 }, { "epoch": 6.297109876383685, "learning_rate": 6.460815790040724e-07, "loss": 3.084, "step": 146200 }, { "epoch": 6.297971314123272, "learning_rate": 6.460330970285935e-07, "loss": 2.9546, "step": 146220 }, { "epoch": 6.298832751862859, "learning_rate": 6.459846150531147e-07, "loss": 3.0375, "step": 146240 }, { "epoch": 6.299694189602446, "learning_rate": 6.459361330776358e-07, "loss": 3.0693, "step": 146260 }, { "epoch": 6.300555627342034, "learning_rate": 6.458876511021569e-07, "loss": 3.0388, "step": 146280 }, { "epoch": 6.301417065081621, "learning_rate": 6.45839169126678e-07, "loss": 3.0492, "step": 146300 }, { "epoch": 6.302278502821209, "learning_rate": 6.457906871511991e-07, "loss": 3.0288, "step": 146320 }, { "epoch": 6.303139940560796, "learning_rate": 6.457422051757202e-07, "loss": 3.0578, "step": 146340 }, { "epoch": 6.304001378300383, "learning_rate": 6.456937232002412e-07, "loss": 3.1535, "step": 146360 }, { "epoch": 6.304862816039971, "learning_rate": 6.456452412247624e-07, "loss": 3.1023, "step": 146380 }, { "epoch": 6.305724253779558, "learning_rate": 6.455967592492836e-07, "loss": 3.0593, "step": 146400 }, { "epoch": 6.306585691519145, "learning_rate": 6.455482772738047e-07, "loss": 3.1925, "step": 146420 }, { "epoch": 6.307447129258732, "learning_rate": 6.454997952983257e-07, "loss": 3.0739, "step": 146440 }, { "epoch": 6.3083085669983205, "learning_rate": 6.454513133228468e-07, "loss": 2.9221, "step": 146460 }, { "epoch": 6.309170004737908, "learning_rate": 6.454028313473679e-07, "loss": 3.1608, "step": 146480 }, { "epoch": 6.310031442477495, "learning_rate": 6.45354349371889e-07, "loss": 3.1005, "step": 146500 }, { "epoch": 6.310892880217082, "learning_rate": 6.453058673964101e-07, "loss": 2.9418, "step": 146520 }, { "epoch": 6.3117543179566695, "learning_rate": 6.452573854209313e-07, "loss": 3.0864, "step": 146540 }, { "epoch": 6.312615755696257, "learning_rate": 6.452089034454524e-07, "loss": 3.0841, "step": 146560 }, { "epoch": 6.313477193435844, "learning_rate": 6.451604214699734e-07, "loss": 2.9752, "step": 146580 }, { "epoch": 6.314338631175432, "learning_rate": 6.451119394944945e-07, "loss": 2.9973, "step": 146600 }, { "epoch": 6.315200068915019, "learning_rate": 6.450634575190157e-07, "loss": 3.1664, "step": 146620 }, { "epoch": 6.316061506654607, "learning_rate": 6.450149755435368e-07, "loss": 3.3198, "step": 146640 }, { "epoch": 6.316922944394194, "learning_rate": 6.449664935680578e-07, "loss": 3.0288, "step": 146660 }, { "epoch": 6.317784382133781, "learning_rate": 6.449180115925791e-07, "loss": 2.9783, "step": 146680 }, { "epoch": 6.318645819873368, "learning_rate": 6.448695296171001e-07, "loss": 2.9619, "step": 146700 }, { "epoch": 6.319507257612956, "learning_rate": 6.448210476416212e-07, "loss": 3.1545, "step": 146720 }, { "epoch": 6.320368695352544, "learning_rate": 6.447725656661422e-07, "loss": 2.9653, "step": 146740 }, { "epoch": 6.321230133092131, "learning_rate": 6.447240836906634e-07, "loss": 3.2072, "step": 146760 }, { "epoch": 6.322091570831718, "learning_rate": 6.446756017151846e-07, "loss": 3.0717, "step": 146780 }, { "epoch": 6.3229530085713055, "learning_rate": 6.446271197397057e-07, "loss": 2.9369, "step": 146800 }, { "epoch": 6.323814446310893, "learning_rate": 6.445786377642267e-07, "loss": 3.1566, "step": 146820 }, { "epoch": 6.32467588405048, "learning_rate": 6.445301557887478e-07, "loss": 3.0599, "step": 146840 }, { "epoch": 6.325537321790067, "learning_rate": 6.44481673813269e-07, "loss": 3.1047, "step": 146860 }, { "epoch": 6.326398759529655, "learning_rate": 6.444331918377901e-07, "loss": 2.9497, "step": 146880 }, { "epoch": 6.327260197269243, "learning_rate": 6.443847098623111e-07, "loss": 2.9879, "step": 146900 }, { "epoch": 6.32812163500883, "learning_rate": 6.443362278868323e-07, "loss": 2.9598, "step": 146920 }, { "epoch": 6.328983072748417, "learning_rate": 6.442877459113535e-07, "loss": 2.9785, "step": 146940 }, { "epoch": 6.329844510488004, "learning_rate": 6.442392639358745e-07, "loss": 2.9926, "step": 146960 }, { "epoch": 6.330705948227592, "learning_rate": 6.441907819603955e-07, "loss": 2.9378, "step": 146980 }, { "epoch": 6.331567385967179, "learning_rate": 6.441422999849167e-07, "loss": 3.0532, "step": 147000 }, { "epoch": 6.332428823706767, "learning_rate": 6.440938180094378e-07, "loss": 2.9795, "step": 147020 }, { "epoch": 6.333290261446354, "learning_rate": 6.440453360339588e-07, "loss": 2.869, "step": 147040 }, { "epoch": 6.3341516991859415, "learning_rate": 6.4399685405848e-07, "loss": 3.2375, "step": 147060 }, { "epoch": 6.335013136925529, "learning_rate": 6.439483720830012e-07, "loss": 2.9151, "step": 147080 }, { "epoch": 6.335874574665116, "learning_rate": 6.438998901075222e-07, "loss": 3.0957, "step": 147100 }, { "epoch": 6.336736012404703, "learning_rate": 6.438514081320432e-07, "loss": 3.1848, "step": 147120 }, { "epoch": 6.33759745014429, "learning_rate": 6.438029261565644e-07, "loss": 2.9763, "step": 147140 }, { "epoch": 6.338458887883879, "learning_rate": 6.437544441810856e-07, "loss": 2.9703, "step": 147160 }, { "epoch": 6.339320325623466, "learning_rate": 6.437059622056067e-07, "loss": 3.0977, "step": 147180 }, { "epoch": 6.340181763363053, "learning_rate": 6.436574802301277e-07, "loss": 3.0322, "step": 147200 }, { "epoch": 6.34104320110264, "learning_rate": 6.436089982546488e-07, "loss": 3.1085, "step": 147220 }, { "epoch": 6.3419046388422275, "learning_rate": 6.4356051627917e-07, "loss": 3.0974, "step": 147240 }, { "epoch": 6.342766076581815, "learning_rate": 6.435120343036911e-07, "loss": 3.0323, "step": 147260 }, { "epoch": 6.343627514321402, "learning_rate": 6.434635523282121e-07, "loss": 3.0292, "step": 147280 }, { "epoch": 6.34448895206099, "learning_rate": 6.434150703527333e-07, "loss": 3.1899, "step": 147300 }, { "epoch": 6.345350389800577, "learning_rate": 6.433665883772545e-07, "loss": 3.01, "step": 147320 }, { "epoch": 6.346211827540165, "learning_rate": 6.433181064017754e-07, "loss": 3.0239, "step": 147340 }, { "epoch": 6.347073265279752, "learning_rate": 6.432696244262965e-07, "loss": 3.1099, "step": 147360 }, { "epoch": 6.347934703019339, "learning_rate": 6.432211424508177e-07, "loss": 3.0773, "step": 147380 }, { "epoch": 6.348796140758926, "learning_rate": 6.431726604753389e-07, "loss": 2.9724, "step": 147400 }, { "epoch": 6.349657578498514, "learning_rate": 6.431241784998599e-07, "loss": 3.0582, "step": 147420 }, { "epoch": 6.350519016238102, "learning_rate": 6.43075696524381e-07, "loss": 2.9966, "step": 147440 }, { "epoch": 6.351380453977689, "learning_rate": 6.430272145489022e-07, "loss": 3.2228, "step": 147460 }, { "epoch": 6.352241891717276, "learning_rate": 6.429787325734233e-07, "loss": 3.165, "step": 147480 }, { "epoch": 6.3531033294568635, "learning_rate": 6.429302505979443e-07, "loss": 2.9948, "step": 147500 }, { "epoch": 6.353964767196451, "learning_rate": 6.428817686224654e-07, "loss": 2.983, "step": 147520 }, { "epoch": 6.354826204936038, "learning_rate": 6.428332866469866e-07, "loss": 3.1103, "step": 147540 }, { "epoch": 6.355687642675625, "learning_rate": 6.427848046715077e-07, "loss": 3.075, "step": 147560 }, { "epoch": 6.356549080415213, "learning_rate": 6.427363226960287e-07, "loss": 3.1159, "step": 147580 }, { "epoch": 6.357410518154801, "learning_rate": 6.426878407205498e-07, "loss": 2.7799, "step": 147600 }, { "epoch": 6.358271955894388, "learning_rate": 6.42639358745071e-07, "loss": 3.0357, "step": 147620 }, { "epoch": 6.359133393633975, "learning_rate": 6.42590876769592e-07, "loss": 3.1084, "step": 147640 }, { "epoch": 6.359994831373562, "learning_rate": 6.425423947941131e-07, "loss": 2.9815, "step": 147660 }, { "epoch": 6.36085626911315, "learning_rate": 6.424939128186343e-07, "loss": 2.9376, "step": 147680 }, { "epoch": 6.361717706852737, "learning_rate": 6.424454308431555e-07, "loss": 3.103, "step": 147700 }, { "epoch": 6.362579144592325, "learning_rate": 6.423969488676764e-07, "loss": 3.0496, "step": 147720 }, { "epoch": 6.363440582331912, "learning_rate": 6.423484668921975e-07, "loss": 3.0304, "step": 147740 }, { "epoch": 6.3643020200714995, "learning_rate": 6.422999849167187e-07, "loss": 3.0093, "step": 147760 }, { "epoch": 6.365163457811087, "learning_rate": 6.422515029412399e-07, "loss": 3.0737, "step": 147780 }, { "epoch": 6.366024895550674, "learning_rate": 6.422030209657609e-07, "loss": 3.0453, "step": 147800 }, { "epoch": 6.366886333290261, "learning_rate": 6.42154538990282e-07, "loss": 2.9104, "step": 147820 }, { "epoch": 6.3677477710298485, "learning_rate": 6.421060570148032e-07, "loss": 3.1226, "step": 147840 }, { "epoch": 6.368609208769437, "learning_rate": 6.420575750393243e-07, "loss": 3.2256, "step": 147860 }, { "epoch": 6.369470646509024, "learning_rate": 6.420090930638453e-07, "loss": 3.0533, "step": 147880 }, { "epoch": 6.370332084248611, "learning_rate": 6.419606110883664e-07, "loss": 3.1103, "step": 147900 }, { "epoch": 6.371193521988198, "learning_rate": 6.419121291128876e-07, "loss": 2.9301, "step": 147920 }, { "epoch": 6.372054959727786, "learning_rate": 6.418636471374088e-07, "loss": 3.0135, "step": 147940 }, { "epoch": 6.372916397467373, "learning_rate": 6.418151651619298e-07, "loss": 3.0704, "step": 147960 }, { "epoch": 6.37377783520696, "learning_rate": 6.417666831864508e-07, "loss": 3.1493, "step": 147980 }, { "epoch": 6.374639272946548, "learning_rate": 6.41718201210972e-07, "loss": 3.149, "step": 148000 }, { "epoch": 6.375500710686135, "learning_rate": 6.416697192354931e-07, "loss": 3.0029, "step": 148020 }, { "epoch": 6.376362148425723, "learning_rate": 6.416212372600142e-07, "loss": 2.964, "step": 148040 }, { "epoch": 6.37722358616531, "learning_rate": 6.415727552845353e-07, "loss": 3.0097, "step": 148060 }, { "epoch": 6.378085023904897, "learning_rate": 6.415242733090565e-07, "loss": 3.0326, "step": 148080 }, { "epoch": 6.378946461644484, "learning_rate": 6.414757913335775e-07, "loss": 3.0253, "step": 148100 }, { "epoch": 6.379807899384072, "learning_rate": 6.414273093580985e-07, "loss": 3.1097, "step": 148120 }, { "epoch": 6.38066933712366, "learning_rate": 6.413788273826197e-07, "loss": 2.9429, "step": 148140 }, { "epoch": 6.381530774863247, "learning_rate": 6.413303454071409e-07, "loss": 3.1749, "step": 148160 }, { "epoch": 6.382392212602834, "learning_rate": 6.412818634316619e-07, "loss": 3.0793, "step": 148180 }, { "epoch": 6.3832536503424215, "learning_rate": 6.41233381456183e-07, "loss": 3.1345, "step": 148200 }, { "epoch": 6.384115088082009, "learning_rate": 6.411848994807042e-07, "loss": 3.0555, "step": 148220 }, { "epoch": 6.384976525821596, "learning_rate": 6.411364175052253e-07, "loss": 2.9021, "step": 148240 }, { "epoch": 6.385837963561183, "learning_rate": 6.410879355297463e-07, "loss": 3.0521, "step": 148260 }, { "epoch": 6.3866994013007705, "learning_rate": 6.410394535542674e-07, "loss": 3.0175, "step": 148280 }, { "epoch": 6.387560839040359, "learning_rate": 6.409909715787886e-07, "loss": 2.9556, "step": 148300 }, { "epoch": 6.388422276779946, "learning_rate": 6.409424896033097e-07, "loss": 2.9525, "step": 148320 }, { "epoch": 6.389283714519533, "learning_rate": 6.408940076278308e-07, "loss": 3.2338, "step": 148340 }, { "epoch": 6.39014515225912, "learning_rate": 6.408455256523518e-07, "loss": 2.9022, "step": 148360 }, { "epoch": 6.391006589998708, "learning_rate": 6.40797043676873e-07, "loss": 3.2906, "step": 148380 }, { "epoch": 6.391868027738295, "learning_rate": 6.407485617013941e-07, "loss": 2.9087, "step": 148400 }, { "epoch": 6.392729465477883, "learning_rate": 6.407000797259152e-07, "loss": 3.16, "step": 148420 }, { "epoch": 6.39359090321747, "learning_rate": 6.406515977504363e-07, "loss": 3.0006, "step": 148440 }, { "epoch": 6.3944523409570575, "learning_rate": 6.406031157749576e-07, "loss": 2.9738, "step": 148460 }, { "epoch": 6.395313778696645, "learning_rate": 6.405546337994785e-07, "loss": 3.1392, "step": 148480 }, { "epoch": 6.396175216436232, "learning_rate": 6.405061518239996e-07, "loss": 2.8689, "step": 148500 }, { "epoch": 6.397036654175819, "learning_rate": 6.404576698485207e-07, "loss": 3.0214, "step": 148520 }, { "epoch": 6.3978980919154065, "learning_rate": 6.404091878730419e-07, "loss": 3.0429, "step": 148540 }, { "epoch": 6.398759529654994, "learning_rate": 6.40360705897563e-07, "loss": 3.0169, "step": 148560 }, { "epoch": 6.399620967394582, "learning_rate": 6.403122239220841e-07, "loss": 3.0331, "step": 148580 }, { "epoch": 6.400482405134169, "learning_rate": 6.402637419466052e-07, "loss": 3.16, "step": 148600 }, { "epoch": 6.401343842873756, "learning_rate": 6.402152599711262e-07, "loss": 2.8436, "step": 148620 }, { "epoch": 6.402205280613344, "learning_rate": 6.401667779956473e-07, "loss": 2.8837, "step": 148640 }, { "epoch": 6.403066718352931, "learning_rate": 6.401182960201684e-07, "loss": 3.1698, "step": 148660 }, { "epoch": 6.403928156092518, "learning_rate": 6.400698140446896e-07, "loss": 2.9466, "step": 148680 }, { "epoch": 6.404789593832106, "learning_rate": 6.400213320692107e-07, "loss": 2.9721, "step": 148700 }, { "epoch": 6.4056510315716935, "learning_rate": 6.399728500937318e-07, "loss": 3.0964, "step": 148720 }, { "epoch": 6.406512469311281, "learning_rate": 6.399243681182529e-07, "loss": 3.102, "step": 148740 }, { "epoch": 6.407373907050868, "learning_rate": 6.39875886142774e-07, "loss": 3.0463, "step": 148760 }, { "epoch": 6.408235344790455, "learning_rate": 6.398274041672951e-07, "loss": 2.9633, "step": 148780 }, { "epoch": 6.4090967825300424, "learning_rate": 6.397789221918162e-07, "loss": 2.9336, "step": 148800 }, { "epoch": 6.40995822026963, "learning_rate": 6.397304402163373e-07, "loss": 2.8699, "step": 148820 }, { "epoch": 6.410819658009217, "learning_rate": 6.396819582408585e-07, "loss": 3.0416, "step": 148840 }, { "epoch": 6.411681095748805, "learning_rate": 6.396334762653796e-07, "loss": 2.9257, "step": 148860 }, { "epoch": 6.412542533488392, "learning_rate": 6.395849942899006e-07, "loss": 3.0721, "step": 148880 }, { "epoch": 6.41340397122798, "learning_rate": 6.395365123144217e-07, "loss": 2.9446, "step": 148900 }, { "epoch": 6.414265408967567, "learning_rate": 6.394880303389428e-07, "loss": 2.9593, "step": 148920 }, { "epoch": 6.415126846707154, "learning_rate": 6.39439548363464e-07, "loss": 3.0242, "step": 148940 }, { "epoch": 6.415988284446741, "learning_rate": 6.393910663879851e-07, "loss": 2.9517, "step": 148960 }, { "epoch": 6.416849722186329, "learning_rate": 6.393425844125062e-07, "loss": 3.0163, "step": 148980 }, { "epoch": 6.417711159925917, "learning_rate": 6.392941024370272e-07, "loss": 3.1562, "step": 149000 }, { "epoch": 6.418572597665504, "learning_rate": 6.392456204615484e-07, "loss": 3.1841, "step": 149020 }, { "epoch": 6.419434035405091, "learning_rate": 6.391971384860695e-07, "loss": 2.9744, "step": 149040 }, { "epoch": 6.420295473144678, "learning_rate": 6.391486565105906e-07, "loss": 2.9977, "step": 149060 }, { "epoch": 6.421156910884266, "learning_rate": 6.391001745351117e-07, "loss": 3.1158, "step": 149080 }, { "epoch": 6.422018348623853, "learning_rate": 6.390516925596329e-07, "loss": 3.08, "step": 149100 }, { "epoch": 6.42287978636344, "learning_rate": 6.390032105841539e-07, "loss": 3.0742, "step": 149120 }, { "epoch": 6.423741224103028, "learning_rate": 6.38954728608675e-07, "loss": 3.0499, "step": 149140 }, { "epoch": 6.4246026618426155, "learning_rate": 6.389062466331961e-07, "loss": 3.0616, "step": 149160 }, { "epoch": 6.425464099582203, "learning_rate": 6.388577646577172e-07, "loss": 3.1319, "step": 149180 }, { "epoch": 6.42632553732179, "learning_rate": 6.388092826822382e-07, "loss": 3.0148, "step": 149200 }, { "epoch": 6.427186975061377, "learning_rate": 6.387608007067594e-07, "loss": 3.0379, "step": 149220 }, { "epoch": 6.4280484128009645, "learning_rate": 6.387123187312806e-07, "loss": 3.0734, "step": 149240 }, { "epoch": 6.428909850540553, "learning_rate": 6.386638367558016e-07, "loss": 3.2378, "step": 149260 }, { "epoch": 6.42977128828014, "learning_rate": 6.386153547803227e-07, "loss": 3.063, "step": 149280 }, { "epoch": 6.430632726019727, "learning_rate": 6.385668728048438e-07, "loss": 2.9773, "step": 149300 }, { "epoch": 6.431494163759314, "learning_rate": 6.38518390829365e-07, "loss": 2.9428, "step": 149320 }, { "epoch": 6.432355601498902, "learning_rate": 6.384699088538861e-07, "loss": 3.1824, "step": 149340 }, { "epoch": 6.433217039238489, "learning_rate": 6.384214268784072e-07, "loss": 3.1833, "step": 149360 }, { "epoch": 6.434078476978076, "learning_rate": 6.383729449029282e-07, "loss": 3.0497, "step": 149380 }, { "epoch": 6.434939914717663, "learning_rate": 6.383244629274494e-07, "loss": 3.1283, "step": 149400 }, { "epoch": 6.4358013524572515, "learning_rate": 6.382759809519705e-07, "loss": 2.8892, "step": 149420 }, { "epoch": 6.436662790196839, "learning_rate": 6.382274989764916e-07, "loss": 3.1393, "step": 149440 }, { "epoch": 6.437524227936426, "learning_rate": 6.381790170010127e-07, "loss": 3.084, "step": 149460 }, { "epoch": 6.438385665676013, "learning_rate": 6.381305350255339e-07, "loss": 3.1023, "step": 149480 }, { "epoch": 6.4392471034156005, "learning_rate": 6.380820530500549e-07, "loss": 3.2493, "step": 149500 }, { "epoch": 6.440108541155188, "learning_rate": 6.380335710745759e-07, "loss": 3.0065, "step": 149520 }, { "epoch": 6.440969978894776, "learning_rate": 6.379850890990971e-07, "loss": 3.0019, "step": 149540 }, { "epoch": 6.441831416634363, "learning_rate": 6.379366071236183e-07, "loss": 3.1531, "step": 149560 }, { "epoch": 6.44269285437395, "learning_rate": 6.378881251481394e-07, "loss": 2.9318, "step": 149580 }, { "epoch": 6.443554292113538, "learning_rate": 6.378396431726604e-07, "loss": 3.1113, "step": 149600 }, { "epoch": 6.444415729853125, "learning_rate": 6.377911611971816e-07, "loss": 3.1662, "step": 149620 }, { "epoch": 6.445277167592712, "learning_rate": 6.377426792217027e-07, "loss": 2.8247, "step": 149640 }, { "epoch": 6.446138605332299, "learning_rate": 6.376941972462238e-07, "loss": 3.0338, "step": 149660 }, { "epoch": 6.447000043071887, "learning_rate": 6.376457152707448e-07, "loss": 3.1673, "step": 149680 }, { "epoch": 6.447861480811475, "learning_rate": 6.37597233295266e-07, "loss": 2.9663, "step": 149700 }, { "epoch": 6.448722918551062, "learning_rate": 6.375487513197872e-07, "loss": 3.0305, "step": 149720 }, { "epoch": 6.449584356290649, "learning_rate": 6.375002693443082e-07, "loss": 3.0724, "step": 149740 }, { "epoch": 6.450445794030236, "learning_rate": 6.374517873688292e-07, "loss": 3.1412, "step": 149760 }, { "epoch": 6.451307231769824, "learning_rate": 6.374033053933504e-07, "loss": 2.8918, "step": 149780 }, { "epoch": 6.452168669509411, "learning_rate": 6.373548234178715e-07, "loss": 2.9777, "step": 149800 }, { "epoch": 6.453030107248998, "learning_rate": 6.373063414423925e-07, "loss": 3.0213, "step": 149820 }, { "epoch": 6.453891544988586, "learning_rate": 6.372578594669137e-07, "loss": 3.0968, "step": 149840 }, { "epoch": 6.4547529827281735, "learning_rate": 6.372093774914349e-07, "loss": 2.8913, "step": 149860 }, { "epoch": 6.455614420467761, "learning_rate": 6.37160895515956e-07, "loss": 2.9613, "step": 149880 }, { "epoch": 6.456475858207348, "learning_rate": 6.371124135404769e-07, "loss": 2.9579, "step": 149900 }, { "epoch": 6.457337295946935, "learning_rate": 6.370639315649981e-07, "loss": 3.0013, "step": 149920 }, { "epoch": 6.4581987336865225, "learning_rate": 6.370154495895193e-07, "loss": 3.0597, "step": 149940 }, { "epoch": 6.45906017142611, "learning_rate": 6.369669676140404e-07, "loss": 3.1495, "step": 149960 }, { "epoch": 6.459921609165698, "learning_rate": 6.369184856385614e-07, "loss": 3.1095, "step": 149980 }, { "epoch": 6.460783046905285, "learning_rate": 6.368700036630826e-07, "loss": 3.1146, "step": 150000 }, { "epoch": 6.461644484644872, "learning_rate": 6.368215216876037e-07, "loss": 3.0585, "step": 150020 }, { "epoch": 6.46250592238446, "learning_rate": 6.367730397121248e-07, "loss": 2.9691, "step": 150040 }, { "epoch": 6.463367360124047, "learning_rate": 6.367245577366458e-07, "loss": 3.1086, "step": 150060 }, { "epoch": 6.464228797863634, "learning_rate": 6.36676075761167e-07, "loss": 2.8684, "step": 150080 }, { "epoch": 6.465090235603221, "learning_rate": 6.366275937856882e-07, "loss": 3.0294, "step": 150100 }, { "epoch": 6.4659516733428095, "learning_rate": 6.365791118102093e-07, "loss": 3.1275, "step": 150120 }, { "epoch": 6.466813111082397, "learning_rate": 6.365306298347302e-07, "loss": 3.093, "step": 150140 }, { "epoch": 6.467674548821984, "learning_rate": 6.364821478592514e-07, "loss": 3.0717, "step": 150160 }, { "epoch": 6.468535986561571, "learning_rate": 6.364336658837726e-07, "loss": 3.2582, "step": 150180 }, { "epoch": 6.4693974243011585, "learning_rate": 6.363851839082936e-07, "loss": 3.2377, "step": 150200 }, { "epoch": 6.470258862040746, "learning_rate": 6.363367019328147e-07, "loss": 2.9406, "step": 150220 }, { "epoch": 6.471120299780333, "learning_rate": 6.36288219957336e-07, "loss": 2.9965, "step": 150240 }, { "epoch": 6.471981737519921, "learning_rate": 6.36239737981857e-07, "loss": 3.0417, "step": 150260 }, { "epoch": 6.472843175259508, "learning_rate": 6.361912560063779e-07, "loss": 2.9891, "step": 150280 }, { "epoch": 6.473704612999096, "learning_rate": 6.361427740308991e-07, "loss": 2.9861, "step": 150300 }, { "epoch": 6.474566050738683, "learning_rate": 6.360942920554203e-07, "loss": 3.0218, "step": 150320 }, { "epoch": 6.47542748847827, "learning_rate": 6.360458100799414e-07, "loss": 3.1552, "step": 150340 }, { "epoch": 6.476288926217857, "learning_rate": 6.359973281044624e-07, "loss": 2.9628, "step": 150360 }, { "epoch": 6.477150363957445, "learning_rate": 6.359488461289836e-07, "loss": 3.18, "step": 150380 }, { "epoch": 6.478011801697033, "learning_rate": 6.359003641535047e-07, "loss": 3.0132, "step": 150400 }, { "epoch": 6.47887323943662, "learning_rate": 6.358518821780257e-07, "loss": 3.1383, "step": 150420 }, { "epoch": 6.479734677176207, "learning_rate": 6.358034002025468e-07, "loss": 2.9295, "step": 150440 }, { "epoch": 6.4805961149157945, "learning_rate": 6.35754918227068e-07, "loss": 3.0995, "step": 150460 }, { "epoch": 6.481457552655382, "learning_rate": 6.357064362515892e-07, "loss": 3.0508, "step": 150480 }, { "epoch": 6.482318990394969, "learning_rate": 6.356579542761102e-07, "loss": 2.9362, "step": 150500 }, { "epoch": 6.483180428134556, "learning_rate": 6.356094723006313e-07, "loss": 3.0329, "step": 150520 }, { "epoch": 6.484041865874144, "learning_rate": 6.355609903251524e-07, "loss": 2.8904, "step": 150540 }, { "epoch": 6.484903303613732, "learning_rate": 6.355125083496736e-07, "loss": 3.0757, "step": 150560 }, { "epoch": 6.485764741353319, "learning_rate": 6.354640263741946e-07, "loss": 2.9863, "step": 150580 }, { "epoch": 6.486626179092906, "learning_rate": 6.354155443987157e-07, "loss": 2.9458, "step": 150600 }, { "epoch": 6.487487616832493, "learning_rate": 6.353670624232369e-07, "loss": 2.9559, "step": 150620 }, { "epoch": 6.4883490545720806, "learning_rate": 6.353185804477581e-07, "loss": 2.8638, "step": 150640 }, { "epoch": 6.489210492311668, "learning_rate": 6.35270098472279e-07, "loss": 3.0731, "step": 150660 }, { "epoch": 6.490071930051256, "learning_rate": 6.352216164968001e-07, "loss": 2.9872, "step": 150680 }, { "epoch": 6.490933367790843, "learning_rate": 6.351731345213213e-07, "loss": 2.867, "step": 150700 }, { "epoch": 6.49179480553043, "learning_rate": 6.351246525458425e-07, "loss": 3.085, "step": 150720 }, { "epoch": 6.492656243270018, "learning_rate": 6.350761705703635e-07, "loss": 2.9261, "step": 150740 }, { "epoch": 6.493517681009605, "learning_rate": 6.350276885948846e-07, "loss": 3.0252, "step": 150760 }, { "epoch": 6.494379118749192, "learning_rate": 6.349792066194057e-07, "loss": 2.9674, "step": 150780 }, { "epoch": 6.495240556488779, "learning_rate": 6.349307246439267e-07, "loss": 3.0716, "step": 150800 }, { "epoch": 6.4961019942283675, "learning_rate": 6.348822426684478e-07, "loss": 3.0616, "step": 150820 }, { "epoch": 6.496963431967955, "learning_rate": 6.34833760692969e-07, "loss": 3.0068, "step": 150840 }, { "epoch": 6.497824869707542, "learning_rate": 6.347852787174902e-07, "loss": 3.0799, "step": 150860 }, { "epoch": 6.498686307447129, "learning_rate": 6.347367967420112e-07, "loss": 3.0078, "step": 150880 }, { "epoch": 6.4995477451867165, "learning_rate": 6.346883147665323e-07, "loss": 2.8516, "step": 150900 }, { "epoch": 6.500409182926304, "learning_rate": 6.346398327910534e-07, "loss": 2.8823, "step": 150920 }, { "epoch": 6.501270620665891, "learning_rate": 6.345913508155746e-07, "loss": 3.1984, "step": 150940 }, { "epoch": 6.502132058405479, "learning_rate": 6.345428688400956e-07, "loss": 2.9213, "step": 150960 }, { "epoch": 6.502993496145066, "learning_rate": 6.344943868646166e-07, "loss": 3.0559, "step": 150980 }, { "epoch": 6.503854933884654, "learning_rate": 6.344459048891379e-07, "loss": 3.1253, "step": 151000 }, { "epoch": 6.504716371624241, "learning_rate": 6.343974229136591e-07, "loss": 3.0372, "step": 151020 }, { "epoch": 6.505577809363828, "learning_rate": 6.3434894093818e-07, "loss": 3.0609, "step": 151040 }, { "epoch": 6.506439247103415, "learning_rate": 6.343004589627011e-07, "loss": 2.9774, "step": 151060 }, { "epoch": 6.507300684843003, "learning_rate": 6.342519769872223e-07, "loss": 2.9747, "step": 151080 }, { "epoch": 6.50816212258259, "learning_rate": 6.342034950117435e-07, "loss": 3.011, "step": 151100 }, { "epoch": 6.509023560322178, "learning_rate": 6.341550130362645e-07, "loss": 3.0645, "step": 151120 }, { "epoch": 6.509884998061765, "learning_rate": 6.341065310607856e-07, "loss": 3.0472, "step": 151140 }, { "epoch": 6.5107464358013525, "learning_rate": 6.340580490853067e-07, "loss": 3.0908, "step": 151160 }, { "epoch": 6.51160787354094, "learning_rate": 6.340095671098278e-07, "loss": 2.8934, "step": 151180 }, { "epoch": 6.512469311280527, "learning_rate": 6.339610851343489e-07, "loss": 2.9795, "step": 151200 }, { "epoch": 6.513330749020114, "learning_rate": 6.3391260315887e-07, "loss": 2.9716, "step": 151220 }, { "epoch": 6.514192186759702, "learning_rate": 6.338641211833912e-07, "loss": 2.8835, "step": 151240 }, { "epoch": 6.51505362449929, "learning_rate": 6.338156392079123e-07, "loss": 3.1284, "step": 151260 }, { "epoch": 6.515915062238877, "learning_rate": 6.337671572324333e-07, "loss": 2.9869, "step": 151280 }, { "epoch": 6.516776499978464, "learning_rate": 6.337186752569544e-07, "loss": 2.9428, "step": 151300 }, { "epoch": 6.517637937718051, "learning_rate": 6.336701932814756e-07, "loss": 2.9179, "step": 151320 }, { "epoch": 6.518499375457639, "learning_rate": 6.336217113059966e-07, "loss": 2.9666, "step": 151340 }, { "epoch": 6.519360813197226, "learning_rate": 6.335732293305177e-07, "loss": 3.0892, "step": 151360 }, { "epoch": 6.520222250936813, "learning_rate": 6.335247473550389e-07, "loss": 3.0733, "step": 151380 }, { "epoch": 6.521083688676401, "learning_rate": 6.3347626537956e-07, "loss": 3.0391, "step": 151400 }, { "epoch": 6.5219451264159884, "learning_rate": 6.33427783404081e-07, "loss": 3.1777, "step": 151420 }, { "epoch": 6.522806564155576, "learning_rate": 6.333793014286021e-07, "loss": 3.0745, "step": 151440 }, { "epoch": 6.523668001895163, "learning_rate": 6.333308194531233e-07, "loss": 3.1122, "step": 151460 }, { "epoch": 6.52452943963475, "learning_rate": 6.332823374776444e-07, "loss": 3.0054, "step": 151480 }, { "epoch": 6.525390877374337, "learning_rate": 6.332338555021656e-07, "loss": 3.1837, "step": 151500 }, { "epoch": 6.526252315113926, "learning_rate": 6.331853735266866e-07, "loss": 3.2382, "step": 151520 }, { "epoch": 6.527113752853513, "learning_rate": 6.331368915512077e-07, "loss": 2.9591, "step": 151540 }, { "epoch": 6.5279751905931, "learning_rate": 6.330884095757288e-07, "loss": 3.1018, "step": 151560 }, { "epoch": 6.528836628332687, "learning_rate": 6.330399276002499e-07, "loss": 3.1298, "step": 151580 }, { "epoch": 6.5296980660722745, "learning_rate": 6.32991445624771e-07, "loss": 2.9416, "step": 151600 }, { "epoch": 6.530559503811862, "learning_rate": 6.329429636492922e-07, "loss": 3.0476, "step": 151620 }, { "epoch": 6.531420941551449, "learning_rate": 6.328944816738133e-07, "loss": 3.0574, "step": 151640 }, { "epoch": 6.532282379291036, "learning_rate": 6.328459996983344e-07, "loss": 2.966, "step": 151660 }, { "epoch": 6.533143817030624, "learning_rate": 6.327975177228554e-07, "loss": 2.8506, "step": 151680 }, { "epoch": 6.534005254770212, "learning_rate": 6.327490357473765e-07, "loss": 3.0833, "step": 151700 }, { "epoch": 6.534866692509799, "learning_rate": 6.327005537718977e-07, "loss": 2.9755, "step": 151720 }, { "epoch": 6.535728130249386, "learning_rate": 6.326520717964188e-07, "loss": 2.852, "step": 151740 }, { "epoch": 6.536589567988973, "learning_rate": 6.326035898209399e-07, "loss": 3.0673, "step": 151760 }, { "epoch": 6.537451005728561, "learning_rate": 6.32555107845461e-07, "loss": 2.9666, "step": 151780 }, { "epoch": 6.538312443468149, "learning_rate": 6.325066258699821e-07, "loss": 3.0076, "step": 151800 }, { "epoch": 6.539173881207736, "learning_rate": 6.324581438945032e-07, "loss": 3.0325, "step": 151820 }, { "epoch": 6.540035318947323, "learning_rate": 6.324096619190243e-07, "loss": 3.0532, "step": 151840 }, { "epoch": 6.5408967566869105, "learning_rate": 6.323611799435454e-07, "loss": 3.0661, "step": 151860 }, { "epoch": 6.541758194426498, "learning_rate": 6.323126979680665e-07, "loss": 3.1099, "step": 151880 }, { "epoch": 6.542619632166085, "learning_rate": 6.322642159925876e-07, "loss": 3.0268, "step": 151900 }, { "epoch": 6.543481069905672, "learning_rate": 6.322157340171087e-07, "loss": 2.7766, "step": 151920 }, { "epoch": 6.5443425076452595, "learning_rate": 6.321672520416298e-07, "loss": 2.896, "step": 151940 }, { "epoch": 6.545203945384848, "learning_rate": 6.321187700661509e-07, "loss": 2.8296, "step": 151960 }, { "epoch": 6.546065383124435, "learning_rate": 6.32070288090672e-07, "loss": 3.0717, "step": 151980 }, { "epoch": 6.546926820864022, "learning_rate": 6.320218061151931e-07, "loss": 3.0814, "step": 152000 }, { "epoch": 6.547788258603609, "learning_rate": 6.319733241397144e-07, "loss": 2.9106, "step": 152020 }, { "epoch": 6.548649696343197, "learning_rate": 6.319248421642354e-07, "loss": 3.0612, "step": 152040 }, { "epoch": 6.549511134082784, "learning_rate": 6.318763601887564e-07, "loss": 3.28, "step": 152060 }, { "epoch": 6.550372571822372, "learning_rate": 6.318278782132775e-07, "loss": 2.9751, "step": 152080 }, { "epoch": 6.551234009561959, "learning_rate": 6.317793962377987e-07, "loss": 3.0044, "step": 152100 }, { "epoch": 6.5520954473015465, "learning_rate": 6.317309142623198e-07, "loss": 3.0349, "step": 152120 }, { "epoch": 6.552956885041134, "learning_rate": 6.316824322868409e-07, "loss": 3.017, "step": 152140 }, { "epoch": 6.553818322780721, "learning_rate": 6.31633950311362e-07, "loss": 3.1175, "step": 152160 }, { "epoch": 6.554679760520308, "learning_rate": 6.315854683358831e-07, "loss": 2.9024, "step": 152180 }, { "epoch": 6.5555411982598955, "learning_rate": 6.315369863604042e-07, "loss": 3.0506, "step": 152200 }, { "epoch": 6.556402635999483, "learning_rate": 6.314885043849253e-07, "loss": 3.1815, "step": 152220 }, { "epoch": 6.557264073739071, "learning_rate": 6.314400224094464e-07, "loss": 3.133, "step": 152240 }, { "epoch": 6.558125511478658, "learning_rate": 6.313915404339676e-07, "loss": 3.0963, "step": 152260 }, { "epoch": 6.558986949218245, "learning_rate": 6.313430584584887e-07, "loss": 3.0607, "step": 152280 }, { "epoch": 6.559848386957833, "learning_rate": 6.312945764830098e-07, "loss": 2.9569, "step": 152300 }, { "epoch": 6.56070982469742, "learning_rate": 6.312460945075308e-07, "loss": 2.9461, "step": 152320 }, { "epoch": 6.561571262437007, "learning_rate": 6.31197612532052e-07, "loss": 3.1044, "step": 152340 }, { "epoch": 6.562432700176595, "learning_rate": 6.311491305565731e-07, "loss": 2.9768, "step": 152360 }, { "epoch": 6.563294137916182, "learning_rate": 6.311006485810941e-07, "loss": 3.0912, "step": 152380 }, { "epoch": 6.56415557565577, "learning_rate": 6.310521666056153e-07, "loss": 3.0005, "step": 152400 }, { "epoch": 6.565017013395357, "learning_rate": 6.310036846301364e-07, "loss": 3.0401, "step": 152420 }, { "epoch": 6.565878451134944, "learning_rate": 6.309552026546574e-07, "loss": 2.9733, "step": 152440 }, { "epoch": 6.566739888874531, "learning_rate": 6.309067206791785e-07, "loss": 3.0368, "step": 152460 }, { "epoch": 6.567601326614119, "learning_rate": 6.308582387036997e-07, "loss": 3.1191, "step": 152480 }, { "epoch": 6.568462764353706, "learning_rate": 6.308097567282208e-07, "loss": 2.8328, "step": 152500 }, { "epoch": 6.569324202093294, "learning_rate": 6.307612747527419e-07, "loss": 3.0683, "step": 152520 }, { "epoch": 6.570185639832881, "learning_rate": 6.30712792777263e-07, "loss": 3.0126, "step": 152540 }, { "epoch": 6.5710470775724685, "learning_rate": 6.306643108017841e-07, "loss": 3.1029, "step": 152560 }, { "epoch": 6.571908515312056, "learning_rate": 6.306158288263052e-07, "loss": 3.1557, "step": 152580 }, { "epoch": 6.572769953051643, "learning_rate": 6.305673468508262e-07, "loss": 3.0914, "step": 152600 }, { "epoch": 6.57363139079123, "learning_rate": 6.305188648753474e-07, "loss": 3.066, "step": 152620 }, { "epoch": 6.574492828530818, "learning_rate": 6.304703828998686e-07, "loss": 3.0865, "step": 152640 }, { "epoch": 6.575354266270406, "learning_rate": 6.304219009243897e-07, "loss": 3.0166, "step": 152660 }, { "epoch": 6.576215704009993, "learning_rate": 6.303734189489107e-07, "loss": 3.0326, "step": 152680 }, { "epoch": 6.57707714174958, "learning_rate": 6.303249369734318e-07, "loss": 3.045, "step": 152700 }, { "epoch": 6.577938579489167, "learning_rate": 6.30276454997953e-07, "loss": 3.0789, "step": 152720 }, { "epoch": 6.578800017228755, "learning_rate": 6.302279730224741e-07, "loss": 3.1799, "step": 152740 }, { "epoch": 6.579661454968342, "learning_rate": 6.30179491046995e-07, "loss": 3.0191, "step": 152760 }, { "epoch": 6.580522892707929, "learning_rate": 6.301310090715163e-07, "loss": 3.1214, "step": 152780 }, { "epoch": 6.581384330447517, "learning_rate": 6.300825270960375e-07, "loss": 3.0332, "step": 152800 }, { "epoch": 6.5822457681871045, "learning_rate": 6.300340451205585e-07, "loss": 2.9769, "step": 152820 }, { "epoch": 6.583107205926692, "learning_rate": 6.299855631450795e-07, "loss": 3.0994, "step": 152840 }, { "epoch": 6.583968643666279, "learning_rate": 6.299370811696008e-07, "loss": 2.9288, "step": 152860 }, { "epoch": 6.584830081405866, "learning_rate": 6.298885991941219e-07, "loss": 2.7699, "step": 152880 }, { "epoch": 6.5856915191454535, "learning_rate": 6.29840117218643e-07, "loss": 2.9334, "step": 152900 }, { "epoch": 6.586552956885042, "learning_rate": 6.29791635243164e-07, "loss": 3.0299, "step": 152920 }, { "epoch": 6.587414394624629, "learning_rate": 6.297431532676851e-07, "loss": 3.0097, "step": 152940 }, { "epoch": 6.588275832364216, "learning_rate": 6.296946712922062e-07, "loss": 2.9578, "step": 152960 }, { "epoch": 6.589137270103803, "learning_rate": 6.296461893167272e-07, "loss": 3.0689, "step": 152980 }, { "epoch": 6.589998707843391, "learning_rate": 6.295977073412484e-07, "loss": 3.0299, "step": 153000 }, { "epoch": 6.590860145582978, "learning_rate": 6.295492253657696e-07, "loss": 2.9866, "step": 153020 }, { "epoch": 6.591721583322565, "learning_rate": 6.295007433902907e-07, "loss": 2.827, "step": 153040 }, { "epoch": 6.592583021062152, "learning_rate": 6.294522614148117e-07, "loss": 2.9763, "step": 153060 }, { "epoch": 6.5934444588017405, "learning_rate": 6.294037794393328e-07, "loss": 2.9473, "step": 153080 }, { "epoch": 6.594305896541328, "learning_rate": 6.29355297463854e-07, "loss": 2.9952, "step": 153100 }, { "epoch": 6.595167334280915, "learning_rate": 6.293068154883751e-07, "loss": 2.9059, "step": 153120 }, { "epoch": 6.596028772020502, "learning_rate": 6.292583335128961e-07, "loss": 3.0828, "step": 153140 }, { "epoch": 6.596890209760089, "learning_rate": 6.292098515374173e-07, "loss": 2.925, "step": 153160 }, { "epoch": 6.597751647499677, "learning_rate": 6.291613695619385e-07, "loss": 3.0201, "step": 153180 }, { "epoch": 6.598613085239265, "learning_rate": 6.291128875864595e-07, "loss": 3.0182, "step": 153200 }, { "epoch": 6.599474522978852, "learning_rate": 6.290644056109805e-07, "loss": 2.9782, "step": 153220 }, { "epoch": 6.600335960718439, "learning_rate": 6.290159236355017e-07, "loss": 2.8781, "step": 153240 }, { "epoch": 6.6011973984580266, "learning_rate": 6.289674416600229e-07, "loss": 3.0769, "step": 153260 }, { "epoch": 6.602058836197614, "learning_rate": 6.289189596845441e-07, "loss": 2.9958, "step": 153280 }, { "epoch": 6.602920273937201, "learning_rate": 6.28870477709065e-07, "loss": 3.1945, "step": 153300 }, { "epoch": 6.603781711676788, "learning_rate": 6.288219957335861e-07, "loss": 3.116, "step": 153320 }, { "epoch": 6.6046431494163755, "learning_rate": 6.287735137581073e-07, "loss": 2.8838, "step": 153340 }, { "epoch": 6.605504587155964, "learning_rate": 6.287250317826283e-07, "loss": 2.9389, "step": 153360 }, { "epoch": 6.606366024895551, "learning_rate": 6.286765498071494e-07, "loss": 3.1823, "step": 153380 }, { "epoch": 6.607227462635138, "learning_rate": 6.286280678316706e-07, "loss": 3.2413, "step": 153400 }, { "epoch": 6.608088900374725, "learning_rate": 6.285795858561918e-07, "loss": 3.0336, "step": 153420 }, { "epoch": 6.608950338114313, "learning_rate": 6.285311038807128e-07, "loss": 3.0477, "step": 153440 }, { "epoch": 6.6098117758539, "learning_rate": 6.284826219052338e-07, "loss": 3.0603, "step": 153460 }, { "epoch": 6.610673213593488, "learning_rate": 6.28434139929755e-07, "loss": 3.0448, "step": 153480 }, { "epoch": 6.611534651333075, "learning_rate": 6.28385657954276e-07, "loss": 3.118, "step": 153500 }, { "epoch": 6.6123960890726625, "learning_rate": 6.283371759787971e-07, "loss": 2.9742, "step": 153520 }, { "epoch": 6.61325752681225, "learning_rate": 6.282886940033183e-07, "loss": 3.0811, "step": 153540 }, { "epoch": 6.614118964551837, "learning_rate": 6.282402120278395e-07, "loss": 3.0658, "step": 153560 }, { "epoch": 6.614980402291424, "learning_rate": 6.281917300523604e-07, "loss": 3.1608, "step": 153580 }, { "epoch": 6.6158418400310115, "learning_rate": 6.281432480768816e-07, "loss": 3.1729, "step": 153600 }, { "epoch": 6.616703277770599, "learning_rate": 6.280947661014027e-07, "loss": 2.9438, "step": 153620 }, { "epoch": 6.617564715510187, "learning_rate": 6.280462841259239e-07, "loss": 2.9643, "step": 153640 }, { "epoch": 6.618426153249774, "learning_rate": 6.279978021504449e-07, "loss": 2.9705, "step": 153660 }, { "epoch": 6.619287590989361, "learning_rate": 6.27949320174966e-07, "loss": 2.9978, "step": 153680 }, { "epoch": 6.620149028728949, "learning_rate": 6.279008381994871e-07, "loss": 3.0236, "step": 153700 }, { "epoch": 6.621010466468536, "learning_rate": 6.278523562240083e-07, "loss": 3.0465, "step": 153720 }, { "epoch": 6.621871904208123, "learning_rate": 6.278038742485293e-07, "loss": 2.9625, "step": 153740 }, { "epoch": 6.62273334194771, "learning_rate": 6.277553922730504e-07, "loss": 3.0782, "step": 153760 }, { "epoch": 6.6235947796872985, "learning_rate": 6.277069102975716e-07, "loss": 2.9201, "step": 153780 }, { "epoch": 6.624456217426886, "learning_rate": 6.276584283220929e-07, "loss": 2.8962, "step": 153800 }, { "epoch": 6.625317655166473, "learning_rate": 6.276099463466138e-07, "loss": 3.1382, "step": 153820 }, { "epoch": 6.62617909290606, "learning_rate": 6.275614643711348e-07, "loss": 3.0758, "step": 153840 }, { "epoch": 6.6270405306456475, "learning_rate": 6.27512982395656e-07, "loss": 3.1004, "step": 153860 }, { "epoch": 6.627901968385235, "learning_rate": 6.274645004201772e-07, "loss": 3.0348, "step": 153880 }, { "epoch": 6.628763406124822, "learning_rate": 6.274160184446982e-07, "loss": 3.0491, "step": 153900 }, { "epoch": 6.629624843864409, "learning_rate": 6.273675364692193e-07, "loss": 3.0115, "step": 153920 }, { "epoch": 6.630486281603997, "learning_rate": 6.273190544937405e-07, "loss": 2.8551, "step": 153940 }, { "epoch": 6.631347719343585, "learning_rate": 6.272705725182615e-07, "loss": 3.1129, "step": 153960 }, { "epoch": 6.632209157083172, "learning_rate": 6.272220905427826e-07, "loss": 3.0415, "step": 153980 }, { "epoch": 6.633070594822759, "learning_rate": 6.271736085673037e-07, "loss": 2.9106, "step": 154000 }, { "epoch": 6.633932032562346, "learning_rate": 6.271251265918249e-07, "loss": 3.0103, "step": 154020 }, { "epoch": 6.634793470301934, "learning_rate": 6.270766446163459e-07, "loss": 3.2402, "step": 154040 }, { "epoch": 6.635654908041522, "learning_rate": 6.27028162640867e-07, "loss": 3.0972, "step": 154060 }, { "epoch": 6.636516345781109, "learning_rate": 6.269796806653882e-07, "loss": 3.2383, "step": 154080 }, { "epoch": 6.637377783520696, "learning_rate": 6.269311986899093e-07, "loss": 2.9132, "step": 154100 }, { "epoch": 6.638239221260283, "learning_rate": 6.268827167144304e-07, "loss": 2.944, "step": 154120 }, { "epoch": 6.639100658999871, "learning_rate": 6.268342347389514e-07, "loss": 3.004, "step": 154140 }, { "epoch": 6.639962096739458, "learning_rate": 6.267857527634726e-07, "loss": 2.8744, "step": 154160 }, { "epoch": 6.640823534479045, "learning_rate": 6.267372707879938e-07, "loss": 2.8872, "step": 154180 }, { "epoch": 6.641684972218632, "learning_rate": 6.266887888125148e-07, "loss": 2.9834, "step": 154200 }, { "epoch": 6.6425464099582205, "learning_rate": 6.266403068370358e-07, "loss": 2.8617, "step": 154220 }, { "epoch": 6.643407847697808, "learning_rate": 6.26591824861557e-07, "loss": 3.0015, "step": 154240 }, { "epoch": 6.644269285437395, "learning_rate": 6.265433428860781e-07, "loss": 3.0244, "step": 154260 }, { "epoch": 6.645130723176982, "learning_rate": 6.264948609105992e-07, "loss": 2.8984, "step": 154280 }, { "epoch": 6.6459921609165695, "learning_rate": 6.264463789351203e-07, "loss": 2.8875, "step": 154300 }, { "epoch": 6.646853598656157, "learning_rate": 6.263978969596415e-07, "loss": 3.1168, "step": 154320 }, { "epoch": 6.647715036395745, "learning_rate": 6.263494149841625e-07, "loss": 2.9842, "step": 154340 }, { "epoch": 6.648576474135332, "learning_rate": 6.263009330086836e-07, "loss": 2.9541, "step": 154360 }, { "epoch": 6.649437911874919, "learning_rate": 6.262524510332047e-07, "loss": 2.9555, "step": 154380 }, { "epoch": 6.650299349614507, "learning_rate": 6.262039690577259e-07, "loss": 3.0413, "step": 154400 }, { "epoch": 6.651160787354094, "learning_rate": 6.26155487082247e-07, "loss": 3.1225, "step": 154420 }, { "epoch": 6.652022225093681, "learning_rate": 6.261070051067681e-07, "loss": 2.9169, "step": 154440 }, { "epoch": 6.652883662833268, "learning_rate": 6.260585231312892e-07, "loss": 3.1405, "step": 154460 }, { "epoch": 6.653745100572856, "learning_rate": 6.260100411558102e-07, "loss": 3.0304, "step": 154480 }, { "epoch": 6.654606538312444, "learning_rate": 6.259615591803314e-07, "loss": 3.0029, "step": 154500 }, { "epoch": 6.655467976052031, "learning_rate": 6.259130772048525e-07, "loss": 3.202, "step": 154520 }, { "epoch": 6.656329413791618, "learning_rate": 6.258645952293735e-07, "loss": 3.0197, "step": 154540 }, { "epoch": 6.6571908515312055, "learning_rate": 6.258161132538947e-07, "loss": 2.9569, "step": 154560 }, { "epoch": 6.658052289270793, "learning_rate": 6.257676312784158e-07, "loss": 2.9694, "step": 154580 }, { "epoch": 6.65891372701038, "learning_rate": 6.257191493029368e-07, "loss": 3.0886, "step": 154600 }, { "epoch": 6.659775164749968, "learning_rate": 6.25670667327458e-07, "loss": 2.9773, "step": 154620 }, { "epoch": 6.660636602489555, "learning_rate": 6.256221853519792e-07, "loss": 2.8901, "step": 154640 }, { "epoch": 6.661498040229143, "learning_rate": 6.255737033765002e-07, "loss": 3.0304, "step": 154660 }, { "epoch": 6.66235947796873, "learning_rate": 6.255252214010213e-07, "loss": 2.9793, "step": 154680 }, { "epoch": 6.663220915708317, "learning_rate": 6.254767394255425e-07, "loss": 3.059, "step": 154700 }, { "epoch": 6.664082353447904, "learning_rate": 6.254282574500635e-07, "loss": 3.0417, "step": 154720 }, { "epoch": 6.664943791187492, "learning_rate": 6.253797754745846e-07, "loss": 2.9529, "step": 154740 }, { "epoch": 6.665805228927079, "learning_rate": 6.253312934991057e-07, "loss": 3.0776, "step": 154760 }, { "epoch": 6.666666666666667, "learning_rate": 6.252828115236269e-07, "loss": 2.9024, "step": 154780 }, { "epoch": 6.667528104406254, "learning_rate": 6.25234329548148e-07, "loss": 2.9622, "step": 154800 }, { "epoch": 6.6683895421458415, "learning_rate": 6.251858475726691e-07, "loss": 3.0013, "step": 154820 }, { "epoch": 6.669250979885429, "learning_rate": 6.251373655971902e-07, "loss": 3.0382, "step": 154840 }, { "epoch": 6.670112417625016, "learning_rate": 6.250888836217112e-07, "loss": 2.9014, "step": 154860 }, { "epoch": 6.670973855364603, "learning_rate": 6.250404016462324e-07, "loss": 3.0762, "step": 154880 }, { "epoch": 6.671835293104191, "learning_rate": 6.249919196707535e-07, "loss": 3.0177, "step": 154900 }, { "epoch": 6.672696730843779, "learning_rate": 6.249434376952746e-07, "loss": 2.9434, "step": 154920 }, { "epoch": 6.673558168583366, "learning_rate": 6.248949557197957e-07, "loss": 2.9934, "step": 154940 }, { "epoch": 6.674419606322953, "learning_rate": 6.248464737443169e-07, "loss": 3.0086, "step": 154960 }, { "epoch": 6.67528104406254, "learning_rate": 6.247979917688379e-07, "loss": 2.9837, "step": 154980 }, { "epoch": 6.6761424818021275, "learning_rate": 6.24749509793359e-07, "loss": 2.9572, "step": 155000 }, { "epoch": 6.677003919541715, "learning_rate": 6.247010278178801e-07, "loss": 3.105, "step": 155020 }, { "epoch": 6.677865357281302, "learning_rate": 6.246525458424013e-07, "loss": 2.8943, "step": 155040 }, { "epoch": 6.67872679502089, "learning_rate": 6.246040638669225e-07, "loss": 2.9442, "step": 155060 }, { "epoch": 6.679588232760477, "learning_rate": 6.245555818914435e-07, "loss": 3.0343, "step": 155080 }, { "epoch": 6.680449670500065, "learning_rate": 6.245070999159645e-07, "loss": 2.8267, "step": 155100 }, { "epoch": 6.681311108239652, "learning_rate": 6.244586179404856e-07, "loss": 2.9839, "step": 155120 }, { "epoch": 6.682172545979239, "learning_rate": 6.244101359650067e-07, "loss": 3.0226, "step": 155140 }, { "epoch": 6.683033983718826, "learning_rate": 6.243616539895278e-07, "loss": 2.9933, "step": 155160 }, { "epoch": 6.6838954214584145, "learning_rate": 6.24313172014049e-07, "loss": 3.04, "step": 155180 }, { "epoch": 6.684756859198002, "learning_rate": 6.242646900385701e-07, "loss": 2.9675, "step": 155200 }, { "epoch": 6.685618296937589, "learning_rate": 6.242162080630912e-07, "loss": 2.8901, "step": 155220 }, { "epoch": 6.686479734677176, "learning_rate": 6.241677260876122e-07, "loss": 2.9661, "step": 155240 }, { "epoch": 6.6873411724167635, "learning_rate": 6.241192441121334e-07, "loss": 3.0657, "step": 155260 }, { "epoch": 6.688202610156351, "learning_rate": 6.240707621366545e-07, "loss": 3.1289, "step": 155280 }, { "epoch": 6.689064047895938, "learning_rate": 6.240222801611756e-07, "loss": 2.9638, "step": 155300 }, { "epoch": 6.689925485635525, "learning_rate": 6.239737981856967e-07, "loss": 3.1156, "step": 155320 }, { "epoch": 6.690786923375113, "learning_rate": 6.239253162102179e-07, "loss": 3.0036, "step": 155340 }, { "epoch": 6.691648361114701, "learning_rate": 6.238768342347389e-07, "loss": 2.9745, "step": 155360 }, { "epoch": 6.692509798854288, "learning_rate": 6.238283522592601e-07, "loss": 2.9774, "step": 155380 }, { "epoch": 6.693371236593875, "learning_rate": 6.237798702837811e-07, "loss": 2.9541, "step": 155400 }, { "epoch": 6.694232674333462, "learning_rate": 6.237313883083023e-07, "loss": 2.8394, "step": 155420 }, { "epoch": 6.69509411207305, "learning_rate": 6.236829063328234e-07, "loss": 3.0245, "step": 155440 }, { "epoch": 6.695955549812638, "learning_rate": 6.236344243573444e-07, "loss": 3.0556, "step": 155460 }, { "epoch": 6.696816987552225, "learning_rate": 6.235859423818655e-07, "loss": 3.0199, "step": 155480 }, { "epoch": 6.697678425291812, "learning_rate": 6.235374604063867e-07, "loss": 2.975, "step": 155500 }, { "epoch": 6.6985398630313995, "learning_rate": 6.234889784309078e-07, "loss": 2.9335, "step": 155520 }, { "epoch": 6.699401300770987, "learning_rate": 6.234404964554288e-07, "loss": 3.1675, "step": 155540 }, { "epoch": 6.700262738510574, "learning_rate": 6.2339201447995e-07, "loss": 3.001, "step": 155560 }, { "epoch": 6.701124176250161, "learning_rate": 6.233435325044713e-07, "loss": 3.1264, "step": 155580 }, { "epoch": 6.7019856139897485, "learning_rate": 6.232950505289923e-07, "loss": 3.0924, "step": 155600 }, { "epoch": 6.702847051729337, "learning_rate": 6.232465685535132e-07, "loss": 3.107, "step": 155620 }, { "epoch": 6.703708489468924, "learning_rate": 6.231980865780344e-07, "loss": 2.839, "step": 155640 }, { "epoch": 6.704569927208511, "learning_rate": 6.231496046025555e-07, "loss": 3.022, "step": 155660 }, { "epoch": 6.705431364948098, "learning_rate": 6.231011226270766e-07, "loss": 2.8106, "step": 155680 }, { "epoch": 6.706292802687686, "learning_rate": 6.230526406515977e-07, "loss": 3.0252, "step": 155700 }, { "epoch": 6.707154240427273, "learning_rate": 6.230041586761189e-07, "loss": 3.1579, "step": 155720 }, { "epoch": 6.708015678166861, "learning_rate": 6.229556767006399e-07, "loss": 3.1057, "step": 155740 }, { "epoch": 6.708877115906448, "learning_rate": 6.229071947251609e-07, "loss": 2.9082, "step": 155760 }, { "epoch": 6.709738553646035, "learning_rate": 6.228587127496821e-07, "loss": 2.8599, "step": 155780 }, { "epoch": 6.710599991385623, "learning_rate": 6.228102307742033e-07, "loss": 2.9938, "step": 155800 }, { "epoch": 6.71146142912521, "learning_rate": 6.227617487987244e-07, "loss": 3.0094, "step": 155820 }, { "epoch": 6.712322866864797, "learning_rate": 6.227132668232454e-07, "loss": 3.062, "step": 155840 }, { "epoch": 6.713184304604384, "learning_rate": 6.226647848477666e-07, "loss": 2.9853, "step": 155860 }, { "epoch": 6.714045742343972, "learning_rate": 6.226163028722877e-07, "loss": 2.9691, "step": 155880 }, { "epoch": 6.71490718008356, "learning_rate": 6.225678208968089e-07, "loss": 2.9228, "step": 155900 }, { "epoch": 6.715768617823147, "learning_rate": 6.225193389213298e-07, "loss": 3.0041, "step": 155920 }, { "epoch": 6.716630055562734, "learning_rate": 6.22470856945851e-07, "loss": 2.9954, "step": 155940 }, { "epoch": 6.7174914933023215, "learning_rate": 6.224223749703722e-07, "loss": 2.8786, "step": 155960 }, { "epoch": 6.718352931041909, "learning_rate": 6.223738929948933e-07, "loss": 2.797, "step": 155980 }, { "epoch": 6.719214368781496, "learning_rate": 6.223254110194142e-07, "loss": 2.8859, "step": 156000 }, { "epoch": 6.720075806521084, "learning_rate": 6.222769290439354e-07, "loss": 3.1274, "step": 156020 }, { "epoch": 6.720937244260671, "learning_rate": 6.222284470684566e-07, "loss": 3.0293, "step": 156040 }, { "epoch": 6.721798682000259, "learning_rate": 6.221799650929777e-07, "loss": 2.9593, "step": 156060 }, { "epoch": 6.722660119739846, "learning_rate": 6.221314831174987e-07, "loss": 2.8998, "step": 156080 }, { "epoch": 6.723521557479433, "learning_rate": 6.220830011420199e-07, "loss": 2.9749, "step": 156100 }, { "epoch": 6.72438299521902, "learning_rate": 6.22034519166541e-07, "loss": 2.9735, "step": 156120 }, { "epoch": 6.725244432958608, "learning_rate": 6.21986037191062e-07, "loss": 3.0642, "step": 156140 }, { "epoch": 6.726105870698195, "learning_rate": 6.219375552155831e-07, "loss": 3.1098, "step": 156160 }, { "epoch": 6.726967308437783, "learning_rate": 6.218890732401043e-07, "loss": 3.0129, "step": 156180 }, { "epoch": 6.72782874617737, "learning_rate": 6.218405912646254e-07, "loss": 2.8954, "step": 156200 }, { "epoch": 6.7286901839169575, "learning_rate": 6.217921092891464e-07, "loss": 3.1258, "step": 156220 }, { "epoch": 6.729551621656545, "learning_rate": 6.217436273136676e-07, "loss": 3.1158, "step": 156240 }, { "epoch": 6.730413059396132, "learning_rate": 6.216951453381887e-07, "loss": 3.0389, "step": 156260 }, { "epoch": 6.731274497135719, "learning_rate": 6.216466633627098e-07, "loss": 3.0145, "step": 156280 }, { "epoch": 6.732135934875307, "learning_rate": 6.215981813872308e-07, "loss": 3.0301, "step": 156300 }, { "epoch": 6.732997372614895, "learning_rate": 6.215496994117519e-07, "loss": 2.9008, "step": 156320 }, { "epoch": 6.733858810354482, "learning_rate": 6.215012174362732e-07, "loss": 3.3197, "step": 156340 }, { "epoch": 6.734720248094069, "learning_rate": 6.214527354607943e-07, "loss": 2.9195, "step": 156360 }, { "epoch": 6.735581685833656, "learning_rate": 6.214042534853152e-07, "loss": 2.9933, "step": 156380 }, { "epoch": 6.736443123573244, "learning_rate": 6.213557715098364e-07, "loss": 3.2085, "step": 156400 }, { "epoch": 6.737304561312831, "learning_rate": 6.213072895343577e-07, "loss": 3.041, "step": 156420 }, { "epoch": 6.738165999052418, "learning_rate": 6.212588075588786e-07, "loss": 2.9605, "step": 156440 }, { "epoch": 6.739027436792006, "learning_rate": 6.212103255833997e-07, "loss": 3.0708, "step": 156460 }, { "epoch": 6.7398888745315935, "learning_rate": 6.211618436079209e-07, "loss": 3.0773, "step": 156480 }, { "epoch": 6.740750312271181, "learning_rate": 6.21113361632442e-07, "loss": 3.0126, "step": 156500 }, { "epoch": 6.741611750010768, "learning_rate": 6.21064879656963e-07, "loss": 2.8398, "step": 156520 }, { "epoch": 6.742473187750355, "learning_rate": 6.210163976814841e-07, "loss": 2.9419, "step": 156540 }, { "epoch": 6.7433346254899424, "learning_rate": 6.209679157060053e-07, "loss": 2.9745, "step": 156560 }, { "epoch": 6.744196063229531, "learning_rate": 6.209194337305265e-07, "loss": 3.065, "step": 156580 }, { "epoch": 6.745057500969118, "learning_rate": 6.208709517550475e-07, "loss": 2.9098, "step": 156600 }, { "epoch": 6.745918938708705, "learning_rate": 6.208224697795686e-07, "loss": 2.9912, "step": 156620 }, { "epoch": 6.746780376448292, "learning_rate": 6.207739878040897e-07, "loss": 2.9551, "step": 156640 }, { "epoch": 6.74764181418788, "learning_rate": 6.207255058286109e-07, "loss": 3.0716, "step": 156660 }, { "epoch": 6.748503251927467, "learning_rate": 6.206770238531319e-07, "loss": 2.8915, "step": 156680 }, { "epoch": 6.749364689667054, "learning_rate": 6.20628541877653e-07, "loss": 2.9835, "step": 156700 }, { "epoch": 6.750226127406641, "learning_rate": 6.205800599021742e-07, "loss": 2.9588, "step": 156720 }, { "epoch": 6.751087565146229, "learning_rate": 6.205315779266952e-07, "loss": 2.8194, "step": 156740 }, { "epoch": 6.751949002885817, "learning_rate": 6.204830959512162e-07, "loss": 2.9887, "step": 156760 }, { "epoch": 6.752810440625404, "learning_rate": 6.204346139757374e-07, "loss": 2.9617, "step": 156780 }, { "epoch": 6.753671878364991, "learning_rate": 6.203861320002586e-07, "loss": 2.8621, "step": 156800 }, { "epoch": 6.754533316104578, "learning_rate": 6.203376500247796e-07, "loss": 2.9655, "step": 156820 }, { "epoch": 6.755394753844166, "learning_rate": 6.202891680493007e-07, "loss": 3.0653, "step": 156840 }, { "epoch": 6.756256191583753, "learning_rate": 6.202406860738219e-07, "loss": 3.0867, "step": 156860 }, { "epoch": 6.757117629323341, "learning_rate": 6.20192204098343e-07, "loss": 3.1583, "step": 156880 }, { "epoch": 6.757979067062928, "learning_rate": 6.20143722122864e-07, "loss": 3.0401, "step": 156900 }, { "epoch": 6.7588405048025155, "learning_rate": 6.200952401473851e-07, "loss": 2.874, "step": 156920 }, { "epoch": 6.759701942542103, "learning_rate": 6.200467581719063e-07, "loss": 2.9552, "step": 156940 }, { "epoch": 6.76056338028169, "learning_rate": 6.199982761964275e-07, "loss": 3.1028, "step": 156960 }, { "epoch": 6.761424818021277, "learning_rate": 6.199497942209485e-07, "loss": 2.9856, "step": 156980 }, { "epoch": 6.7622862557608645, "learning_rate": 6.199013122454696e-07, "loss": 3.0649, "step": 157000 }, { "epoch": 6.763147693500453, "learning_rate": 6.198528302699907e-07, "loss": 3.0121, "step": 157020 }, { "epoch": 6.76400913124004, "learning_rate": 6.198043482945118e-07, "loss": 3.0559, "step": 157040 }, { "epoch": 6.764870568979627, "learning_rate": 6.197558663190329e-07, "loss": 2.9783, "step": 157060 }, { "epoch": 6.765732006719214, "learning_rate": 6.19707384343554e-07, "loss": 3.2066, "step": 157080 }, { "epoch": 6.766593444458802, "learning_rate": 6.196589023680752e-07, "loss": 2.8297, "step": 157100 }, { "epoch": 6.767454882198389, "learning_rate": 6.196104203925963e-07, "loss": 2.9256, "step": 157120 }, { "epoch": 6.768316319937976, "learning_rate": 6.195619384171173e-07, "loss": 3.1123, "step": 157140 }, { "epoch": 6.769177757677564, "learning_rate": 6.195134564416385e-07, "loss": 3.0236, "step": 157160 }, { "epoch": 6.7700391954171515, "learning_rate": 6.194649744661596e-07, "loss": 2.9107, "step": 157180 }, { "epoch": 6.770900633156739, "learning_rate": 6.194164924906807e-07, "loss": 2.8161, "step": 157200 }, { "epoch": 6.771762070896326, "learning_rate": 6.193680105152018e-07, "loss": 2.9896, "step": 157220 }, { "epoch": 6.772623508635913, "learning_rate": 6.193195285397229e-07, "loss": 2.8765, "step": 157240 }, { "epoch": 6.7734849463755005, "learning_rate": 6.19271046564244e-07, "loss": 3.1581, "step": 157260 }, { "epoch": 6.774346384115088, "learning_rate": 6.19222564588765e-07, "loss": 2.8857, "step": 157280 }, { "epoch": 6.775207821854675, "learning_rate": 6.191740826132861e-07, "loss": 2.8782, "step": 157300 }, { "epoch": 6.776069259594263, "learning_rate": 6.191256006378073e-07, "loss": 2.9014, "step": 157320 }, { "epoch": 6.77693069733385, "learning_rate": 6.190771186623284e-07, "loss": 2.9653, "step": 157340 }, { "epoch": 6.777792135073438, "learning_rate": 6.190286366868495e-07, "loss": 2.9282, "step": 157360 }, { "epoch": 6.778653572813025, "learning_rate": 6.189801547113706e-07, "loss": 2.7857, "step": 157380 }, { "epoch": 6.779515010552612, "learning_rate": 6.189316727358917e-07, "loss": 2.9665, "step": 157400 }, { "epoch": 6.780376448292199, "learning_rate": 6.188831907604128e-07, "loss": 2.8862, "step": 157420 }, { "epoch": 6.7812378860317875, "learning_rate": 6.188347087849339e-07, "loss": 2.9512, "step": 157440 }, { "epoch": 6.782099323771375, "learning_rate": 6.18786226809455e-07, "loss": 2.9818, "step": 157460 }, { "epoch": 6.782960761510962, "learning_rate": 6.187377448339762e-07, "loss": 2.8903, "step": 157480 }, { "epoch": 6.783822199250549, "learning_rate": 6.186892628584973e-07, "loss": 3.0525, "step": 157500 }, { "epoch": 6.784683636990136, "learning_rate": 6.186407808830183e-07, "loss": 3.1049, "step": 157520 }, { "epoch": 6.785545074729724, "learning_rate": 6.185922989075394e-07, "loss": 3.0582, "step": 157540 }, { "epoch": 6.786406512469311, "learning_rate": 6.185438169320606e-07, "loss": 3.0075, "step": 157560 }, { "epoch": 6.787267950208898, "learning_rate": 6.184953349565817e-07, "loss": 2.9351, "step": 157580 }, { "epoch": 6.788129387948486, "learning_rate": 6.184468529811028e-07, "loss": 2.9793, "step": 157600 }, { "epoch": 6.7889908256880735, "learning_rate": 6.183983710056239e-07, "loss": 2.9752, "step": 157620 }, { "epoch": 6.789852263427661, "learning_rate": 6.18349889030145e-07, "loss": 3.0591, "step": 157640 }, { "epoch": 6.790713701167248, "learning_rate": 6.183014070546661e-07, "loss": 3.0441, "step": 157660 }, { "epoch": 6.791575138906835, "learning_rate": 6.182529250791873e-07, "loss": 2.9888, "step": 157680 }, { "epoch": 6.7924365766464225, "learning_rate": 6.182044431037083e-07, "loss": 3.0303, "step": 157700 }, { "epoch": 6.793298014386011, "learning_rate": 6.181559611282294e-07, "loss": 3.0746, "step": 157720 }, { "epoch": 6.794159452125598, "learning_rate": 6.181074791527506e-07, "loss": 2.9545, "step": 157740 }, { "epoch": 6.795020889865185, "learning_rate": 6.180589971772717e-07, "loss": 3.023, "step": 157760 }, { "epoch": 6.795882327604772, "learning_rate": 6.180105152017927e-07, "loss": 2.8885, "step": 157780 }, { "epoch": 6.79674376534436, "learning_rate": 6.179620332263138e-07, "loss": 2.9991, "step": 157800 }, { "epoch": 6.797605203083947, "learning_rate": 6.179135512508349e-07, "loss": 2.9048, "step": 157820 }, { "epoch": 6.798466640823534, "learning_rate": 6.17865069275356e-07, "loss": 2.9513, "step": 157840 }, { "epoch": 6.799328078563121, "learning_rate": 6.178165872998772e-07, "loss": 2.8403, "step": 157860 }, { "epoch": 6.8001895163027095, "learning_rate": 6.177681053243983e-07, "loss": 2.9784, "step": 157880 }, { "epoch": 6.801050954042297, "learning_rate": 6.177196233489193e-07, "loss": 3.0029, "step": 157900 }, { "epoch": 6.801912391781884, "learning_rate": 6.176711413734404e-07, "loss": 3.2713, "step": 157920 }, { "epoch": 6.802773829521471, "learning_rate": 6.176226593979615e-07, "loss": 2.8866, "step": 157940 }, { "epoch": 6.8036352672610585, "learning_rate": 6.175741774224827e-07, "loss": 3.0227, "step": 157960 }, { "epoch": 6.804496705000646, "learning_rate": 6.175256954470038e-07, "loss": 3.0292, "step": 157980 }, { "epoch": 6.805358142740234, "learning_rate": 6.174772134715249e-07, "loss": 3.047, "step": 158000 }, { "epoch": 6.806219580479821, "learning_rate": 6.17428731496046e-07, "loss": 2.9948, "step": 158020 }, { "epoch": 6.807081018219408, "learning_rate": 6.173802495205671e-07, "loss": 2.9083, "step": 158040 }, { "epoch": 6.807942455958996, "learning_rate": 6.173317675450882e-07, "loss": 2.9981, "step": 158060 }, { "epoch": 6.808803893698583, "learning_rate": 6.172832855696093e-07, "loss": 3.0321, "step": 158080 }, { "epoch": 6.80966533143817, "learning_rate": 6.172348035941303e-07, "loss": 3.0917, "step": 158100 }, { "epoch": 6.810526769177757, "learning_rate": 6.171863216186516e-07, "loss": 2.8706, "step": 158120 }, { "epoch": 6.811388206917345, "learning_rate": 6.171378396431727e-07, "loss": 2.7852, "step": 158140 }, { "epoch": 6.812249644656933, "learning_rate": 6.170893576676937e-07, "loss": 3.275, "step": 158160 }, { "epoch": 6.81311108239652, "learning_rate": 6.170408756922148e-07, "loss": 2.9358, "step": 158180 }, { "epoch": 6.813972520136107, "learning_rate": 6.169923937167361e-07, "loss": 2.9583, "step": 158200 }, { "epoch": 6.8148339578756945, "learning_rate": 6.169439117412571e-07, "loss": 3.1115, "step": 158220 }, { "epoch": 6.815695395615282, "learning_rate": 6.168954297657781e-07, "loss": 2.9678, "step": 158240 }, { "epoch": 6.816556833354869, "learning_rate": 6.168469477902993e-07, "loss": 3.0422, "step": 158260 }, { "epoch": 6.817418271094457, "learning_rate": 6.167984658148204e-07, "loss": 2.9535, "step": 158280 }, { "epoch": 6.818279708834044, "learning_rate": 6.167499838393415e-07, "loss": 3.111, "step": 158300 }, { "epoch": 6.819141146573632, "learning_rate": 6.167015018638625e-07, "loss": 3.0791, "step": 158320 }, { "epoch": 6.820002584313219, "learning_rate": 6.166530198883837e-07, "loss": 3.0346, "step": 158340 }, { "epoch": 6.820864022052806, "learning_rate": 6.166045379129048e-07, "loss": 3.3128, "step": 158360 }, { "epoch": 6.821725459792393, "learning_rate": 6.165560559374259e-07, "loss": 2.9771, "step": 158380 }, { "epoch": 6.8225868975319806, "learning_rate": 6.16507573961947e-07, "loss": 2.9752, "step": 158400 }, { "epoch": 6.823448335271568, "learning_rate": 6.164590919864681e-07, "loss": 3.0256, "step": 158420 }, { "epoch": 6.824309773011156, "learning_rate": 6.164106100109892e-07, "loss": 3.1982, "step": 158440 }, { "epoch": 6.825171210750743, "learning_rate": 6.163621280355103e-07, "loss": 2.9138, "step": 158460 }, { "epoch": 6.82603264849033, "learning_rate": 6.163136460600314e-07, "loss": 3.099, "step": 158480 }, { "epoch": 6.826894086229918, "learning_rate": 6.162651640845526e-07, "loss": 3.1695, "step": 158500 }, { "epoch": 6.827755523969505, "learning_rate": 6.162166821090737e-07, "loss": 3.0407, "step": 158520 }, { "epoch": 6.828616961709092, "learning_rate": 6.161682001335946e-07, "loss": 2.897, "step": 158540 }, { "epoch": 6.82947839944868, "learning_rate": 6.161197181581158e-07, "loss": 2.843, "step": 158560 }, { "epoch": 6.8303398371882675, "learning_rate": 6.16071236182637e-07, "loss": 3.0255, "step": 158580 }, { "epoch": 6.831201274927855, "learning_rate": 6.160227542071581e-07, "loss": 3.0561, "step": 158600 }, { "epoch": 6.832062712667442, "learning_rate": 6.159742722316791e-07, "loss": 3.1013, "step": 158620 }, { "epoch": 6.832924150407029, "learning_rate": 6.159257902562003e-07, "loss": 2.9464, "step": 158640 }, { "epoch": 6.8337855881466165, "learning_rate": 6.158773082807214e-07, "loss": 3.0131, "step": 158660 }, { "epoch": 6.834647025886204, "learning_rate": 6.158288263052425e-07, "loss": 2.9572, "step": 158680 }, { "epoch": 6.835508463625791, "learning_rate": 6.157803443297635e-07, "loss": 3.137, "step": 158700 }, { "epoch": 6.836369901365379, "learning_rate": 6.157318623542847e-07, "loss": 2.8969, "step": 158720 }, { "epoch": 6.837231339104966, "learning_rate": 6.156833803788059e-07, "loss": 3.1929, "step": 158740 }, { "epoch": 6.838092776844554, "learning_rate": 6.15634898403327e-07, "loss": 2.906, "step": 158760 }, { "epoch": 6.838954214584141, "learning_rate": 6.15586416427848e-07, "loss": 2.8766, "step": 158780 }, { "epoch": 6.839815652323728, "learning_rate": 6.155379344523691e-07, "loss": 3.1092, "step": 158800 }, { "epoch": 6.840677090063315, "learning_rate": 6.154894524768903e-07, "loss": 2.9979, "step": 158820 }, { "epoch": 6.8415385278029035, "learning_rate": 6.154409705014114e-07, "loss": 3.1303, "step": 158840 }, { "epoch": 6.842399965542491, "learning_rate": 6.153924885259324e-07, "loss": 2.9108, "step": 158860 }, { "epoch": 6.843261403282078, "learning_rate": 6.153440065504536e-07, "loss": 2.8907, "step": 158880 }, { "epoch": 6.844122841021665, "learning_rate": 6.152955245749747e-07, "loss": 3.0587, "step": 158900 }, { "epoch": 6.8449842787612525, "learning_rate": 6.152470425994956e-07, "loss": 2.9565, "step": 158920 }, { "epoch": 6.84584571650084, "learning_rate": 6.151985606240169e-07, "loss": 2.8607, "step": 158940 }, { "epoch": 6.846707154240427, "learning_rate": 6.15150078648538e-07, "loss": 2.9411, "step": 158960 }, { "epoch": 6.847568591980014, "learning_rate": 6.151015966730591e-07, "loss": 2.9219, "step": 158980 }, { "epoch": 6.848430029719602, "learning_rate": 6.150531146975801e-07, "loss": 3.0025, "step": 159000 }, { "epoch": 6.84929146745919, "learning_rate": 6.150046327221013e-07, "loss": 2.937, "step": 159020 }, { "epoch": 6.850152905198777, "learning_rate": 6.149561507466224e-07, "loss": 3.1591, "step": 159040 }, { "epoch": 6.851014342938364, "learning_rate": 6.149076687711435e-07, "loss": 2.9512, "step": 159060 }, { "epoch": 6.851875780677951, "learning_rate": 6.148591867956645e-07, "loss": 3.0933, "step": 159080 }, { "epoch": 6.852737218417539, "learning_rate": 6.148107048201857e-07, "loss": 2.9341, "step": 159100 }, { "epoch": 6.853598656157127, "learning_rate": 6.147622228447069e-07, "loss": 2.9897, "step": 159120 }, { "epoch": 6.854460093896714, "learning_rate": 6.14713740869228e-07, "loss": 3.0072, "step": 159140 }, { "epoch": 6.855321531636301, "learning_rate": 6.14665258893749e-07, "loss": 3.0333, "step": 159160 }, { "epoch": 6.8561829693758884, "learning_rate": 6.146167769182701e-07, "loss": 2.9791, "step": 159180 }, { "epoch": 6.857044407115476, "learning_rate": 6.145682949427913e-07, "loss": 2.92, "step": 159200 }, { "epoch": 6.857905844855063, "learning_rate": 6.145198129673123e-07, "loss": 3.0197, "step": 159220 }, { "epoch": 6.85876728259465, "learning_rate": 6.144713309918334e-07, "loss": 2.9263, "step": 159240 }, { "epoch": 6.859628720334237, "learning_rate": 6.144228490163546e-07, "loss": 3.0077, "step": 159260 }, { "epoch": 6.860490158073826, "learning_rate": 6.143743670408758e-07, "loss": 3.0874, "step": 159280 }, { "epoch": 6.861351595813413, "learning_rate": 6.143258850653967e-07, "loss": 2.8634, "step": 159300 }, { "epoch": 6.862213033553, "learning_rate": 6.142774030899178e-07, "loss": 2.8933, "step": 159320 }, { "epoch": 6.863074471292587, "learning_rate": 6.14228921114439e-07, "loss": 2.8897, "step": 159340 }, { "epoch": 6.8639359090321745, "learning_rate": 6.141804391389602e-07, "loss": 2.8941, "step": 159360 }, { "epoch": 6.864797346771762, "learning_rate": 6.141319571634812e-07, "loss": 3.0647, "step": 159380 }, { "epoch": 6.86565878451135, "learning_rate": 6.140834751880023e-07, "loss": 3.0247, "step": 159400 }, { "epoch": 6.866520222250937, "learning_rate": 6.140349932125235e-07, "loss": 3.0619, "step": 159420 }, { "epoch": 6.867381659990524, "learning_rate": 6.139865112370445e-07, "loss": 3.1027, "step": 159440 }, { "epoch": 6.868243097730112, "learning_rate": 6.139380292615655e-07, "loss": 3.0916, "step": 159460 }, { "epoch": 6.869104535469699, "learning_rate": 6.138895472860867e-07, "loss": 2.8267, "step": 159480 }, { "epoch": 6.869965973209286, "learning_rate": 6.138410653106079e-07, "loss": 3.0008, "step": 159500 }, { "epoch": 6.870827410948873, "learning_rate": 6.13792583335129e-07, "loss": 2.7965, "step": 159520 }, { "epoch": 6.871688848688461, "learning_rate": 6.1374410135965e-07, "loss": 3.091, "step": 159540 }, { "epoch": 6.872550286428049, "learning_rate": 6.136956193841711e-07, "loss": 2.9722, "step": 159560 }, { "epoch": 6.873411724167636, "learning_rate": 6.136471374086923e-07, "loss": 3.0117, "step": 159580 }, { "epoch": 6.874273161907223, "learning_rate": 6.135986554332133e-07, "loss": 2.915, "step": 159600 }, { "epoch": 6.8751345996468105, "learning_rate": 6.135501734577344e-07, "loss": 2.9692, "step": 159620 }, { "epoch": 6.875996037386398, "learning_rate": 6.135016914822556e-07, "loss": 2.9778, "step": 159640 }, { "epoch": 6.876857475125985, "learning_rate": 6.134532095067768e-07, "loss": 2.9872, "step": 159660 }, { "epoch": 6.877718912865573, "learning_rate": 6.134047275312977e-07, "loss": 2.9809, "step": 159680 }, { "epoch": 6.87858035060516, "learning_rate": 6.133562455558188e-07, "loss": 3.0234, "step": 159700 }, { "epoch": 6.879441788344748, "learning_rate": 6.1330776358034e-07, "loss": 3.1114, "step": 159720 }, { "epoch": 6.880303226084335, "learning_rate": 6.132592816048612e-07, "loss": 2.8857, "step": 159740 }, { "epoch": 6.881164663823922, "learning_rate": 6.132107996293822e-07, "loss": 3.0025, "step": 159760 }, { "epoch": 6.882026101563509, "learning_rate": 6.131623176539033e-07, "loss": 2.8827, "step": 159780 }, { "epoch": 6.882887539303097, "learning_rate": 6.131138356784245e-07, "loss": 2.9432, "step": 159800 }, { "epoch": 6.883748977042684, "learning_rate": 6.130653537029455e-07, "loss": 2.9671, "step": 159820 }, { "epoch": 6.884610414782272, "learning_rate": 6.130168717274666e-07, "loss": 3.0198, "step": 159840 }, { "epoch": 6.885471852521859, "learning_rate": 6.129683897519877e-07, "loss": 3.1678, "step": 159860 }, { "epoch": 6.8863332902614465, "learning_rate": 6.129199077765088e-07, "loss": 3.2214, "step": 159880 }, { "epoch": 6.887194728001034, "learning_rate": 6.1287142580103e-07, "loss": 2.9037, "step": 159900 }, { "epoch": 6.888056165740621, "learning_rate": 6.128229438255511e-07, "loss": 3.0055, "step": 159920 }, { "epoch": 6.888917603480208, "learning_rate": 6.127744618500721e-07, "loss": 2.976, "step": 159940 }, { "epoch": 6.8897790412197955, "learning_rate": 6.127259798745933e-07, "loss": 2.9331, "step": 159960 }, { "epoch": 6.890640478959384, "learning_rate": 6.126774978991143e-07, "loss": 3.021, "step": 159980 }, { "epoch": 6.891501916698971, "learning_rate": 6.126290159236354e-07, "loss": 3.0942, "step": 160000 }, { "epoch": 6.892363354438558, "learning_rate": 6.125805339481566e-07, "loss": 3.0612, "step": 160020 }, { "epoch": 6.893224792178145, "learning_rate": 6.125320519726778e-07, "loss": 2.9488, "step": 160040 }, { "epoch": 6.894086229917733, "learning_rate": 6.124835699971988e-07, "loss": 3.1428, "step": 160060 }, { "epoch": 6.89494766765732, "learning_rate": 6.124350880217198e-07, "loss": 2.9347, "step": 160080 }, { "epoch": 6.895809105396907, "learning_rate": 6.12386606046241e-07, "loss": 3.0888, "step": 160100 }, { "epoch": 6.896670543136495, "learning_rate": 6.123381240707622e-07, "loss": 2.9773, "step": 160120 }, { "epoch": 6.897531980876082, "learning_rate": 6.122896420952832e-07, "loss": 2.9257, "step": 160140 }, { "epoch": 6.89839341861567, "learning_rate": 6.122411601198043e-07, "loss": 3.0157, "step": 160160 }, { "epoch": 6.899254856355257, "learning_rate": 6.121926781443255e-07, "loss": 2.982, "step": 160180 }, { "epoch": 6.900116294094844, "learning_rate": 6.121441961688465e-07, "loss": 2.9873, "step": 160200 }, { "epoch": 6.900977731834431, "learning_rate": 6.120957141933676e-07, "loss": 2.9533, "step": 160220 }, { "epoch": 6.901839169574019, "learning_rate": 6.120472322178887e-07, "loss": 2.9036, "step": 160240 }, { "epoch": 6.902700607313607, "learning_rate": 6.119987502424099e-07, "loss": 3.0392, "step": 160260 }, { "epoch": 6.903562045053194, "learning_rate": 6.11950268266931e-07, "loss": 2.9104, "step": 160280 }, { "epoch": 6.904423482792781, "learning_rate": 6.119017862914521e-07, "loss": 2.8994, "step": 160300 }, { "epoch": 6.9052849205323685, "learning_rate": 6.118533043159731e-07, "loss": 2.8456, "step": 160320 }, { "epoch": 6.906146358271956, "learning_rate": 6.118048223404943e-07, "loss": 2.9991, "step": 160340 }, { "epoch": 6.907007796011543, "learning_rate": 6.117563403650154e-07, "loss": 2.9742, "step": 160360 }, { "epoch": 6.90786923375113, "learning_rate": 6.117078583895365e-07, "loss": 2.9202, "step": 160380 }, { "epoch": 6.9087306714907175, "learning_rate": 6.116593764140576e-07, "loss": 3.0187, "step": 160400 }, { "epoch": 6.909592109230306, "learning_rate": 6.116108944385788e-07, "loss": 3.0533, "step": 160420 }, { "epoch": 6.910453546969893, "learning_rate": 6.115624124630998e-07, "loss": 3.1671, "step": 160440 }, { "epoch": 6.91131498470948, "learning_rate": 6.115139304876209e-07, "loss": 2.9761, "step": 160460 }, { "epoch": 6.912176422449067, "learning_rate": 6.11465448512142e-07, "loss": 3.0505, "step": 160480 }, { "epoch": 6.913037860188655, "learning_rate": 6.114169665366631e-07, "loss": 2.859, "step": 160500 }, { "epoch": 6.913899297928242, "learning_rate": 6.113684845611842e-07, "loss": 2.9739, "step": 160520 }, { "epoch": 6.91476073566783, "learning_rate": 6.113200025857053e-07, "loss": 3.0312, "step": 160540 }, { "epoch": 6.915622173407417, "learning_rate": 6.112715206102265e-07, "loss": 2.9245, "step": 160560 }, { "epoch": 6.9164836111470045, "learning_rate": 6.112230386347475e-07, "loss": 2.9019, "step": 160580 }, { "epoch": 6.917345048886592, "learning_rate": 6.111745566592686e-07, "loss": 2.9645, "step": 160600 }, { "epoch": 6.918206486626179, "learning_rate": 6.111260746837897e-07, "loss": 2.9283, "step": 160620 }, { "epoch": 6.919067924365766, "learning_rate": 6.110775927083109e-07, "loss": 3.1025, "step": 160640 }, { "epoch": 6.9199293621053535, "learning_rate": 6.11029110732832e-07, "loss": 2.9577, "step": 160660 }, { "epoch": 6.920790799844941, "learning_rate": 6.109806287573531e-07, "loss": 2.9135, "step": 160680 }, { "epoch": 6.921652237584529, "learning_rate": 6.109321467818741e-07, "loss": 2.8663, "step": 160700 }, { "epoch": 6.922513675324116, "learning_rate": 6.108836648063954e-07, "loss": 3.0216, "step": 160720 }, { "epoch": 6.923375113063703, "learning_rate": 6.108351828309164e-07, "loss": 3.0667, "step": 160740 }, { "epoch": 6.924236550803291, "learning_rate": 6.107867008554375e-07, "loss": 2.8985, "step": 160760 }, { "epoch": 6.925097988542878, "learning_rate": 6.107382188799586e-07, "loss": 2.9746, "step": 160780 }, { "epoch": 6.925959426282465, "learning_rate": 6.106897369044797e-07, "loss": 3.093, "step": 160800 }, { "epoch": 6.926820864022053, "learning_rate": 6.106412549290008e-07, "loss": 2.9555, "step": 160820 }, { "epoch": 6.9276823017616405, "learning_rate": 6.105927729535219e-07, "loss": 2.9293, "step": 160840 }, { "epoch": 6.928543739501228, "learning_rate": 6.10544290978043e-07, "loss": 3.1082, "step": 160860 }, { "epoch": 6.929405177240815, "learning_rate": 6.104958090025641e-07, "loss": 3.0971, "step": 160880 }, { "epoch": 6.930266614980402, "learning_rate": 6.104473270270853e-07, "loss": 2.8921, "step": 160900 }, { "epoch": 6.931128052719989, "learning_rate": 6.103988450516064e-07, "loss": 2.9966, "step": 160920 }, { "epoch": 6.931989490459577, "learning_rate": 6.103503630761275e-07, "loss": 3.0089, "step": 160940 }, { "epoch": 6.932850928199164, "learning_rate": 6.103018811006485e-07, "loss": 2.8651, "step": 160960 }, { "epoch": 6.933712365938752, "learning_rate": 6.102533991251697e-07, "loss": 3.0146, "step": 160980 }, { "epoch": 6.934573803678339, "learning_rate": 6.102049171496908e-07, "loss": 2.9815, "step": 161000 }, { "epoch": 6.9354352414179266, "learning_rate": 6.101564351742119e-07, "loss": 3.0273, "step": 161020 }, { "epoch": 6.936296679157514, "learning_rate": 6.10107953198733e-07, "loss": 2.9128, "step": 161040 }, { "epoch": 6.937158116897101, "learning_rate": 6.100594712232541e-07, "loss": 2.9265, "step": 161060 }, { "epoch": 6.938019554636688, "learning_rate": 6.100109892477752e-07, "loss": 3.0911, "step": 161080 }, { "epoch": 6.938880992376276, "learning_rate": 6.099625072722962e-07, "loss": 2.9841, "step": 161100 }, { "epoch": 6.939742430115864, "learning_rate": 6.099140252968174e-07, "loss": 3.0494, "step": 161120 }, { "epoch": 6.940603867855451, "learning_rate": 6.098655433213384e-07, "loss": 2.9868, "step": 161140 }, { "epoch": 6.941465305595038, "learning_rate": 6.098170613458596e-07, "loss": 2.8688, "step": 161160 }, { "epoch": 6.942326743334625, "learning_rate": 6.097685793703807e-07, "loss": 2.9067, "step": 161180 }, { "epoch": 6.943188181074213, "learning_rate": 6.097200973949019e-07, "loss": 3.0363, "step": 161200 }, { "epoch": 6.9440496188138, "learning_rate": 6.096716154194229e-07, "loss": 3.1299, "step": 161220 }, { "epoch": 6.944911056553387, "learning_rate": 6.09623133443944e-07, "loss": 3.1942, "step": 161240 }, { "epoch": 6.945772494292975, "learning_rate": 6.095746514684651e-07, "loss": 3.031, "step": 161260 }, { "epoch": 6.9466339320325625, "learning_rate": 6.095261694929863e-07, "loss": 2.9631, "step": 161280 }, { "epoch": 6.94749536977215, "learning_rate": 6.094776875175074e-07, "loss": 3.015, "step": 161300 }, { "epoch": 6.948356807511737, "learning_rate": 6.094292055420285e-07, "loss": 2.9451, "step": 161320 }, { "epoch": 6.949218245251324, "learning_rate": 6.093807235665495e-07, "loss": 2.9851, "step": 161340 }, { "epoch": 6.9500796829909115, "learning_rate": 6.093322415910707e-07, "loss": 2.9212, "step": 161360 }, { "epoch": 6.9509411207305, "learning_rate": 6.092837596155918e-07, "loss": 2.9451, "step": 161380 }, { "epoch": 6.951802558470087, "learning_rate": 6.092352776401128e-07, "loss": 2.8747, "step": 161400 }, { "epoch": 6.952663996209674, "learning_rate": 6.09186795664634e-07, "loss": 3.0574, "step": 161420 }, { "epoch": 6.953525433949261, "learning_rate": 6.091383136891552e-07, "loss": 2.9227, "step": 161440 }, { "epoch": 6.954386871688849, "learning_rate": 6.090898317136762e-07, "loss": 2.9752, "step": 161460 }, { "epoch": 6.955248309428436, "learning_rate": 6.090413497381972e-07, "loss": 3.2182, "step": 161480 }, { "epoch": 6.956109747168023, "learning_rate": 6.089928677627184e-07, "loss": 2.946, "step": 161500 }, { "epoch": 6.95697118490761, "learning_rate": 6.089443857872396e-07, "loss": 2.7582, "step": 161520 }, { "epoch": 6.9578326226471985, "learning_rate": 6.088959038117607e-07, "loss": 2.9416, "step": 161540 }, { "epoch": 6.958694060386786, "learning_rate": 6.088474218362817e-07, "loss": 2.7032, "step": 161560 }, { "epoch": 6.959555498126373, "learning_rate": 6.087989398608029e-07, "loss": 2.9332, "step": 161580 }, { "epoch": 6.96041693586596, "learning_rate": 6.087504578853239e-07, "loss": 2.8729, "step": 161600 }, { "epoch": 6.9612783736055475, "learning_rate": 6.08701975909845e-07, "loss": 3.0252, "step": 161620 }, { "epoch": 6.962139811345135, "learning_rate": 6.086534939343661e-07, "loss": 2.8712, "step": 161640 }, { "epoch": 6.963001249084723, "learning_rate": 6.086050119588872e-07, "loss": 2.9224, "step": 161660 }, { "epoch": 6.96386268682431, "learning_rate": 6.085565299834084e-07, "loss": 3.1855, "step": 161680 }, { "epoch": 6.964724124563897, "learning_rate": 6.085080480079294e-07, "loss": 3.0708, "step": 161700 }, { "epoch": 6.965585562303485, "learning_rate": 6.084595660324505e-07, "loss": 2.9805, "step": 161720 }, { "epoch": 6.966447000043072, "learning_rate": 6.084110840569717e-07, "loss": 3.1157, "step": 161740 }, { "epoch": 6.967308437782659, "learning_rate": 6.083626020814928e-07, "loss": 3.0599, "step": 161760 }, { "epoch": 6.968169875522246, "learning_rate": 6.083141201060138e-07, "loss": 2.8968, "step": 161780 }, { "epoch": 6.969031313261834, "learning_rate": 6.08265638130535e-07, "loss": 2.9741, "step": 161800 }, { "epoch": 6.969892751001422, "learning_rate": 6.082171561550562e-07, "loss": 2.874, "step": 161820 }, { "epoch": 6.970754188741009, "learning_rate": 6.081686741795773e-07, "loss": 3.0136, "step": 161840 }, { "epoch": 6.971615626480596, "learning_rate": 6.081201922040982e-07, "loss": 2.9273, "step": 161860 }, { "epoch": 6.972477064220183, "learning_rate": 6.080717102286194e-07, "loss": 3.0055, "step": 161880 }, { "epoch": 6.973338501959771, "learning_rate": 6.080232282531406e-07, "loss": 2.7037, "step": 161900 }, { "epoch": 6.974199939699358, "learning_rate": 6.079747462776617e-07, "loss": 2.8749, "step": 161920 }, { "epoch": 6.975061377438946, "learning_rate": 6.079262643021827e-07, "loss": 2.9466, "step": 161940 }, { "epoch": 6.975922815178533, "learning_rate": 6.078777823267039e-07, "loss": 2.9015, "step": 161960 }, { "epoch": 6.9767842529181205, "learning_rate": 6.07829300351225e-07, "loss": 2.9317, "step": 161980 }, { "epoch": 6.977645690657708, "learning_rate": 6.07780818375746e-07, "loss": 2.9359, "step": 162000 }, { "epoch": 6.978507128397295, "learning_rate": 6.077323364002671e-07, "loss": 3.0368, "step": 162020 }, { "epoch": 6.979368566136882, "learning_rate": 6.076838544247883e-07, "loss": 3.0601, "step": 162040 }, { "epoch": 6.9802300038764695, "learning_rate": 6.076353724493095e-07, "loss": 2.8448, "step": 162060 }, { "epoch": 6.981091441616057, "learning_rate": 6.075868904738305e-07, "loss": 3.0056, "step": 162080 }, { "epoch": 6.981952879355645, "learning_rate": 6.075384084983515e-07, "loss": 3.0617, "step": 162100 }, { "epoch": 6.982814317095232, "learning_rate": 6.074899265228727e-07, "loss": 3.0664, "step": 162120 }, { "epoch": 6.983675754834819, "learning_rate": 6.074414445473938e-07, "loss": 3.0686, "step": 162140 }, { "epoch": 6.984537192574407, "learning_rate": 6.073929625719148e-07, "loss": 2.9651, "step": 162160 }, { "epoch": 6.985398630313994, "learning_rate": 6.07344480596436e-07, "loss": 3.0607, "step": 162180 }, { "epoch": 6.986260068053581, "learning_rate": 6.072959986209572e-07, "loss": 2.9463, "step": 162200 }, { "epoch": 6.987121505793169, "learning_rate": 6.072475166454783e-07, "loss": 2.9659, "step": 162220 }, { "epoch": 6.9879829435327565, "learning_rate": 6.071990346699992e-07, "loss": 3.0896, "step": 162240 }, { "epoch": 6.988844381272344, "learning_rate": 6.071505526945204e-07, "loss": 3.0308, "step": 162260 }, { "epoch": 6.989705819011931, "learning_rate": 6.071020707190416e-07, "loss": 2.8781, "step": 162280 }, { "epoch": 6.990567256751518, "learning_rate": 6.070535887435627e-07, "loss": 2.8961, "step": 162300 }, { "epoch": 6.9914286944911055, "learning_rate": 6.070051067680837e-07, "loss": 2.8748, "step": 162320 }, { "epoch": 6.992290132230693, "learning_rate": 6.069566247926049e-07, "loss": 3.0448, "step": 162340 }, { "epoch": 6.99315156997028, "learning_rate": 6.06908142817126e-07, "loss": 3.0758, "step": 162360 }, { "epoch": 6.994013007709868, "learning_rate": 6.06859660841647e-07, "loss": 3.1342, "step": 162380 }, { "epoch": 6.994874445449455, "learning_rate": 6.068111788661681e-07, "loss": 2.9174, "step": 162400 }, { "epoch": 6.995735883189043, "learning_rate": 6.067626968906893e-07, "loss": 2.9313, "step": 162420 }, { "epoch": 6.99659732092863, "learning_rate": 6.067142149152105e-07, "loss": 2.8678, "step": 162440 }, { "epoch": 6.997458758668217, "learning_rate": 6.066657329397315e-07, "loss": 3.0337, "step": 162460 }, { "epoch": 6.998320196407804, "learning_rate": 6.066172509642525e-07, "loss": 2.9557, "step": 162480 }, { "epoch": 6.9991816341473925, "learning_rate": 6.065687689887738e-07, "loss": 2.8643, "step": 162500 }, { "epoch": 7.00004307188698, "learning_rate": 6.065202870132949e-07, "loss": 3.0248, "step": 162520 }, { "epoch": 7.000904509626567, "learning_rate": 6.064718050378159e-07, "loss": 3.0142, "step": 162540 }, { "epoch": 7.001765947366154, "learning_rate": 6.06423323062337e-07, "loss": 2.9398, "step": 162560 }, { "epoch": 7.0026273851057415, "learning_rate": 6.063748410868582e-07, "loss": 2.9548, "step": 162580 }, { "epoch": 7.003488822845329, "learning_rate": 6.063263591113793e-07, "loss": 2.8529, "step": 162600 }, { "epoch": 7.004350260584916, "learning_rate": 6.062778771359002e-07, "loss": 3.0512, "step": 162620 }, { "epoch": 7.005211698324503, "learning_rate": 6.062293951604214e-07, "loss": 2.9536, "step": 162640 }, { "epoch": 7.006073136064091, "learning_rate": 6.061809131849426e-07, "loss": 3.1112, "step": 162660 }, { "epoch": 7.006934573803679, "learning_rate": 6.061324312094636e-07, "loss": 3.034, "step": 162680 }, { "epoch": 7.007796011543266, "learning_rate": 6.060839492339847e-07, "loss": 2.8783, "step": 162700 }, { "epoch": 7.008657449282853, "learning_rate": 6.060354672585059e-07, "loss": 2.9961, "step": 162720 }, { "epoch": 7.00951888702244, "learning_rate": 6.05986985283027e-07, "loss": 2.9377, "step": 162740 }, { "epoch": 7.0103803247620275, "learning_rate": 6.05938503307548e-07, "loss": 2.9542, "step": 162760 }, { "epoch": 7.011241762501615, "learning_rate": 6.058900213320691e-07, "loss": 3.0192, "step": 162780 }, { "epoch": 7.012103200241203, "learning_rate": 6.058415393565903e-07, "loss": 2.9397, "step": 162800 }, { "epoch": 7.01296463798079, "learning_rate": 6.057930573811115e-07, "loss": 3.0397, "step": 162820 }, { "epoch": 7.013826075720377, "learning_rate": 6.057445754056325e-07, "loss": 2.8082, "step": 162840 }, { "epoch": 7.014687513459965, "learning_rate": 6.056960934301536e-07, "loss": 2.9747, "step": 162860 }, { "epoch": 7.015548951199552, "learning_rate": 6.056476114546747e-07, "loss": 2.8268, "step": 162880 }, { "epoch": 7.016410388939139, "learning_rate": 6.055991294791959e-07, "loss": 3.1277, "step": 162900 }, { "epoch": 7.017271826678726, "learning_rate": 6.055506475037168e-07, "loss": 3.0074, "step": 162920 }, { "epoch": 7.0181332644183145, "learning_rate": 6.05502165528238e-07, "loss": 2.8977, "step": 162940 }, { "epoch": 7.018994702157902, "learning_rate": 6.054536835527592e-07, "loss": 2.8764, "step": 162960 }, { "epoch": 7.019856139897489, "learning_rate": 6.054052015772804e-07, "loss": 2.9158, "step": 162980 }, { "epoch": 7.020717577637076, "learning_rate": 6.053567196018013e-07, "loss": 2.7917, "step": 163000 }, { "epoch": 7.0215790153766635, "learning_rate": 6.053082376263224e-07, "loss": 3.0121, "step": 163020 }, { "epoch": 7.022440453116251, "learning_rate": 6.052597556508436e-07, "loss": 2.9799, "step": 163040 }, { "epoch": 7.023301890855838, "learning_rate": 6.052112736753647e-07, "loss": 2.8608, "step": 163060 }, { "epoch": 7.024163328595426, "learning_rate": 6.051627916998858e-07, "loss": 3.0588, "step": 163080 }, { "epoch": 7.025024766335013, "learning_rate": 6.051143097244069e-07, "loss": 3.0159, "step": 163100 }, { "epoch": 7.025886204074601, "learning_rate": 6.05065827748928e-07, "loss": 2.9035, "step": 163120 }, { "epoch": 7.026747641814188, "learning_rate": 6.050173457734491e-07, "loss": 2.8273, "step": 163140 }, { "epoch": 7.027609079553775, "learning_rate": 6.049688637979701e-07, "loss": 2.9879, "step": 163160 }, { "epoch": 7.028470517293362, "learning_rate": 6.049203818224913e-07, "loss": 2.8966, "step": 163180 }, { "epoch": 7.02933195503295, "learning_rate": 6.048718998470125e-07, "loss": 3.0789, "step": 163200 }, { "epoch": 7.030193392772538, "learning_rate": 6.048234178715335e-07, "loss": 2.9414, "step": 163220 }, { "epoch": 7.031054830512125, "learning_rate": 6.047749358960546e-07, "loss": 2.9182, "step": 163240 }, { "epoch": 7.031916268251712, "learning_rate": 6.047264539205757e-07, "loss": 2.8845, "step": 163260 }, { "epoch": 7.0327777059912995, "learning_rate": 6.046779719450968e-07, "loss": 2.9894, "step": 163280 }, { "epoch": 7.033639143730887, "learning_rate": 6.046294899696179e-07, "loss": 2.981, "step": 163300 }, { "epoch": 7.034500581470474, "learning_rate": 6.04581007994139e-07, "loss": 3.0679, "step": 163320 }, { "epoch": 7.035362019210061, "learning_rate": 6.045325260186602e-07, "loss": 2.7753, "step": 163340 }, { "epoch": 7.036223456949649, "learning_rate": 6.044840440431813e-07, "loss": 2.8831, "step": 163360 }, { "epoch": 7.037084894689237, "learning_rate": 6.044355620677023e-07, "loss": 3.0505, "step": 163380 }, { "epoch": 7.037946332428824, "learning_rate": 6.043870800922234e-07, "loss": 3.0424, "step": 163400 }, { "epoch": 7.038807770168411, "learning_rate": 6.043385981167446e-07, "loss": 2.8949, "step": 163420 }, { "epoch": 7.039669207907998, "learning_rate": 6.042901161412656e-07, "loss": 2.8069, "step": 163440 }, { "epoch": 7.040530645647586, "learning_rate": 6.042416341657868e-07, "loss": 3.0025, "step": 163460 }, { "epoch": 7.041392083387173, "learning_rate": 6.041931521903079e-07, "loss": 2.962, "step": 163480 }, { "epoch": 7.042253521126761, "learning_rate": 6.04144670214829e-07, "loss": 3.0152, "step": 163500 }, { "epoch": 7.043114958866348, "learning_rate": 6.040961882393501e-07, "loss": 2.8635, "step": 163520 }, { "epoch": 7.043976396605935, "learning_rate": 6.040477062638712e-07, "loss": 2.9724, "step": 163540 }, { "epoch": 7.044837834345523, "learning_rate": 6.039992242883923e-07, "loss": 2.9637, "step": 163560 }, { "epoch": 7.04569927208511, "learning_rate": 6.039507423129134e-07, "loss": 3.1147, "step": 163580 }, { "epoch": 7.046560709824697, "learning_rate": 6.039022603374346e-07, "loss": 3.136, "step": 163600 }, { "epoch": 7.047422147564284, "learning_rate": 6.038537783619557e-07, "loss": 2.9028, "step": 163620 }, { "epoch": 7.048283585303873, "learning_rate": 6.038052963864767e-07, "loss": 3.1695, "step": 163640 }, { "epoch": 7.04914502304346, "learning_rate": 6.037568144109978e-07, "loss": 2.964, "step": 163660 }, { "epoch": 7.050006460783047, "learning_rate": 6.03708332435519e-07, "loss": 2.9293, "step": 163680 }, { "epoch": 7.050867898522634, "learning_rate": 6.0365985046004e-07, "loss": 2.844, "step": 163700 }, { "epoch": 7.0517293362622215, "learning_rate": 6.036113684845612e-07, "loss": 2.9406, "step": 163720 }, { "epoch": 7.052590774001809, "learning_rate": 6.035628865090823e-07, "loss": 2.8727, "step": 163740 }, { "epoch": 7.053452211741396, "learning_rate": 6.035144045336032e-07, "loss": 3.0454, "step": 163760 }, { "epoch": 7.054313649480984, "learning_rate": 6.034659225581244e-07, "loss": 2.9611, "step": 163780 }, { "epoch": 7.055175087220571, "learning_rate": 6.034174405826456e-07, "loss": 3.0298, "step": 163800 }, { "epoch": 7.056036524960159, "learning_rate": 6.033689586071667e-07, "loss": 2.9875, "step": 163820 }, { "epoch": 7.056897962699746, "learning_rate": 6.033204766316878e-07, "loss": 2.964, "step": 163840 }, { "epoch": 7.057759400439333, "learning_rate": 6.03271994656209e-07, "loss": 2.9821, "step": 163860 }, { "epoch": 7.05862083817892, "learning_rate": 6.032235126807299e-07, "loss": 2.9721, "step": 163880 }, { "epoch": 7.059482275918508, "learning_rate": 6.031750307052511e-07, "loss": 3.0335, "step": 163900 }, { "epoch": 7.060343713658096, "learning_rate": 6.031265487297722e-07, "loss": 3.0767, "step": 163920 }, { "epoch": 7.061205151397683, "learning_rate": 6.030780667542933e-07, "loss": 2.8802, "step": 163940 }, { "epoch": 7.06206658913727, "learning_rate": 6.030295847788144e-07, "loss": 2.9003, "step": 163960 }, { "epoch": 7.0629280268768575, "learning_rate": 6.029811028033356e-07, "loss": 2.973, "step": 163980 }, { "epoch": 7.063789464616445, "learning_rate": 6.029326208278567e-07, "loss": 2.9174, "step": 164000 }, { "epoch": 7.064650902356032, "learning_rate": 6.028841388523777e-07, "loss": 2.9363, "step": 164020 }, { "epoch": 7.065512340095619, "learning_rate": 6.028356568768988e-07, "loss": 2.9845, "step": 164040 }, { "epoch": 7.066373777835207, "learning_rate": 6.0278717490142e-07, "loss": 3.1542, "step": 164060 }, { "epoch": 7.067235215574795, "learning_rate": 6.027386929259411e-07, "loss": 3.1543, "step": 164080 }, { "epoch": 7.068096653314382, "learning_rate": 6.026902109504622e-07, "loss": 2.9746, "step": 164100 }, { "epoch": 7.068958091053969, "learning_rate": 6.026417289749833e-07, "loss": 2.9436, "step": 164120 }, { "epoch": 7.069819528793556, "learning_rate": 6.025932469995044e-07, "loss": 3.0228, "step": 164140 }, { "epoch": 7.070680966533144, "learning_rate": 6.025447650240255e-07, "loss": 3.046, "step": 164160 }, { "epoch": 7.071542404272731, "learning_rate": 6.024962830485465e-07, "loss": 2.9359, "step": 164180 }, { "epoch": 7.072403842012319, "learning_rate": 6.024478010730677e-07, "loss": 2.9893, "step": 164200 }, { "epoch": 7.073265279751906, "learning_rate": 6.023993190975889e-07, "loss": 3.0916, "step": 164220 }, { "epoch": 7.0741267174914935, "learning_rate": 6.023508371221099e-07, "loss": 2.8421, "step": 164240 }, { "epoch": 7.074988155231081, "learning_rate": 6.02302355146631e-07, "loss": 2.7733, "step": 164260 }, { "epoch": 7.075849592970668, "learning_rate": 6.022538731711522e-07, "loss": 2.9088, "step": 164280 }, { "epoch": 7.076711030710255, "learning_rate": 6.022053911956732e-07, "loss": 3.0312, "step": 164300 }, { "epoch": 7.0775724684498424, "learning_rate": 6.021569092201943e-07, "loss": 2.8864, "step": 164320 }, { "epoch": 7.078433906189431, "learning_rate": 6.021084272447154e-07, "loss": 3.0425, "step": 164340 }, { "epoch": 7.079295343929018, "learning_rate": 6.020599452692366e-07, "loss": 3.0696, "step": 164360 }, { "epoch": 7.080156781668605, "learning_rate": 6.020114632937577e-07, "loss": 3.0198, "step": 164380 }, { "epoch": 7.081018219408192, "learning_rate": 6.019629813182787e-07, "loss": 2.9285, "step": 164400 }, { "epoch": 7.08187965714778, "learning_rate": 6.019144993427998e-07, "loss": 3.0015, "step": 164420 }, { "epoch": 7.082741094887367, "learning_rate": 6.01866017367321e-07, "loss": 3.0201, "step": 164440 }, { "epoch": 7.083602532626954, "learning_rate": 6.018175353918421e-07, "loss": 3.0112, "step": 164460 }, { "epoch": 7.084463970366542, "learning_rate": 6.017690534163631e-07, "loss": 2.8393, "step": 164480 }, { "epoch": 7.085325408106129, "learning_rate": 6.017205714408843e-07, "loss": 2.9475, "step": 164500 }, { "epoch": 7.086186845845717, "learning_rate": 6.016720894654054e-07, "loss": 2.8907, "step": 164520 }, { "epoch": 7.087048283585304, "learning_rate": 6.016236074899265e-07, "loss": 2.8161, "step": 164540 }, { "epoch": 7.087909721324891, "learning_rate": 6.015751255144475e-07, "loss": 3.0204, "step": 164560 }, { "epoch": 7.088771159064478, "learning_rate": 6.015266435389687e-07, "loss": 2.9281, "step": 164580 }, { "epoch": 7.089632596804066, "learning_rate": 6.014781615634899e-07, "loss": 2.9017, "step": 164600 }, { "epoch": 7.090494034543654, "learning_rate": 6.01429679588011e-07, "loss": 2.8945, "step": 164620 }, { "epoch": 7.091355472283241, "learning_rate": 6.01381197612532e-07, "loss": 2.972, "step": 164640 }, { "epoch": 7.092216910022828, "learning_rate": 6.013327156370531e-07, "loss": 2.8434, "step": 164660 }, { "epoch": 7.0930783477624155, "learning_rate": 6.012842336615743e-07, "loss": 2.9673, "step": 164680 }, { "epoch": 7.093939785502003, "learning_rate": 6.012357516860953e-07, "loss": 2.9604, "step": 164700 }, { "epoch": 7.09480122324159, "learning_rate": 6.011872697106164e-07, "loss": 3.1189, "step": 164720 }, { "epoch": 7.095662660981177, "learning_rate": 6.011387877351376e-07, "loss": 3.0591, "step": 164740 }, { "epoch": 7.096524098720765, "learning_rate": 6.010903057596588e-07, "loss": 3.0443, "step": 164760 }, { "epoch": 7.097385536460353, "learning_rate": 6.010418237841796e-07, "loss": 3.0799, "step": 164780 }, { "epoch": 7.09824697419994, "learning_rate": 6.009933418087008e-07, "loss": 3.0805, "step": 164800 }, { "epoch": 7.099108411939527, "learning_rate": 6.00944859833222e-07, "loss": 3.0189, "step": 164820 }, { "epoch": 7.099969849679114, "learning_rate": 6.008963778577431e-07, "loss": 2.9508, "step": 164840 }, { "epoch": 7.100831287418702, "learning_rate": 6.008478958822641e-07, "loss": 2.7667, "step": 164860 }, { "epoch": 7.101692725158289, "learning_rate": 6.007994139067853e-07, "loss": 2.8482, "step": 164880 }, { "epoch": 7.102554162897876, "learning_rate": 6.007509319313064e-07, "loss": 2.8236, "step": 164900 }, { "epoch": 7.103415600637464, "learning_rate": 6.007024499558275e-07, "loss": 2.962, "step": 164920 }, { "epoch": 7.1042770383770515, "learning_rate": 6.006539679803485e-07, "loss": 3.0985, "step": 164940 }, { "epoch": 7.105138476116639, "learning_rate": 6.006054860048697e-07, "loss": 3.0049, "step": 164960 }, { "epoch": 7.105999913856226, "learning_rate": 6.005570040293909e-07, "loss": 2.7876, "step": 164980 }, { "epoch": 7.106861351595813, "learning_rate": 6.00508522053912e-07, "loss": 2.6748, "step": 165000 }, { "epoch": 7.1077227893354005, "learning_rate": 6.00460040078433e-07, "loss": 2.9568, "step": 165020 }, { "epoch": 7.108584227074989, "learning_rate": 6.004115581029541e-07, "loss": 2.9358, "step": 165040 }, { "epoch": 7.109445664814576, "learning_rate": 6.003630761274753e-07, "loss": 3.0014, "step": 165060 }, { "epoch": 7.110307102554163, "learning_rate": 6.003145941519964e-07, "loss": 3.124, "step": 165080 }, { "epoch": 7.11116854029375, "learning_rate": 6.002661121765174e-07, "loss": 3.0466, "step": 165100 }, { "epoch": 7.112029978033338, "learning_rate": 6.002176302010386e-07, "loss": 2.8176, "step": 165120 }, { "epoch": 7.112891415772925, "learning_rate": 6.001691482255598e-07, "loss": 2.9513, "step": 165140 }, { "epoch": 7.113752853512512, "learning_rate": 6.001206662500807e-07, "loss": 3.0868, "step": 165160 }, { "epoch": 7.114614291252099, "learning_rate": 6.000721842746018e-07, "loss": 2.8789, "step": 165180 }, { "epoch": 7.1154757289916875, "learning_rate": 6.00023702299123e-07, "loss": 2.9039, "step": 165200 }, { "epoch": 7.116337166731275, "learning_rate": 5.999752203236441e-07, "loss": 3.0421, "step": 165220 }, { "epoch": 7.117198604470862, "learning_rate": 5.999267383481652e-07, "loss": 2.7671, "step": 165240 }, { "epoch": 7.118060042210449, "learning_rate": 5.998782563726863e-07, "loss": 2.997, "step": 165260 }, { "epoch": 7.118921479950036, "learning_rate": 5.998297743972074e-07, "loss": 2.8864, "step": 165280 }, { "epoch": 7.119782917689624, "learning_rate": 5.997812924217286e-07, "loss": 3.011, "step": 165300 }, { "epoch": 7.120644355429211, "learning_rate": 5.997328104462495e-07, "loss": 2.8793, "step": 165320 }, { "epoch": 7.121505793168799, "learning_rate": 5.996843284707707e-07, "loss": 2.9767, "step": 165340 }, { "epoch": 7.122367230908386, "learning_rate": 5.996358464952919e-07, "loss": 2.9329, "step": 165360 }, { "epoch": 7.1232286686479735, "learning_rate": 5.99587364519813e-07, "loss": 2.949, "step": 165380 }, { "epoch": 7.124090106387561, "learning_rate": 5.99538882544334e-07, "loss": 2.8001, "step": 165400 }, { "epoch": 7.124951544127148, "learning_rate": 5.994904005688551e-07, "loss": 2.9253, "step": 165420 }, { "epoch": 7.125812981866735, "learning_rate": 5.994419185933763e-07, "loss": 2.9064, "step": 165440 }, { "epoch": 7.1266744196063225, "learning_rate": 5.993934366178973e-07, "loss": 2.9482, "step": 165460 }, { "epoch": 7.127535857345911, "learning_rate": 5.993449546424184e-07, "loss": 3.1369, "step": 165480 }, { "epoch": 7.128397295085498, "learning_rate": 5.992964726669396e-07, "loss": 2.9048, "step": 165500 }, { "epoch": 7.129258732825085, "learning_rate": 5.992479906914608e-07, "loss": 2.9847, "step": 165520 }, { "epoch": 7.130120170564672, "learning_rate": 5.991995087159816e-07, "loss": 2.8901, "step": 165540 }, { "epoch": 7.13098160830426, "learning_rate": 5.991510267405028e-07, "loss": 2.866, "step": 165560 }, { "epoch": 7.131843046043847, "learning_rate": 5.99102544765024e-07, "loss": 2.9895, "step": 165580 }, { "epoch": 7.132704483783434, "learning_rate": 5.990540627895452e-07, "loss": 2.8174, "step": 165600 }, { "epoch": 7.133565921523022, "learning_rate": 5.990055808140662e-07, "loss": 2.9952, "step": 165620 }, { "epoch": 7.1344273592626095, "learning_rate": 5.989570988385874e-07, "loss": 2.8857, "step": 165640 }, { "epoch": 7.135288797002197, "learning_rate": 5.989086168631084e-07, "loss": 2.9869, "step": 165660 }, { "epoch": 7.136150234741784, "learning_rate": 5.988601348876296e-07, "loss": 2.816, "step": 165680 }, { "epoch": 7.137011672481371, "learning_rate": 5.988116529121506e-07, "loss": 3.0321, "step": 165700 }, { "epoch": 7.1378731102209585, "learning_rate": 5.987631709366717e-07, "loss": 2.8562, "step": 165720 }, { "epoch": 7.138734547960546, "learning_rate": 5.987146889611929e-07, "loss": 3.1286, "step": 165740 }, { "epoch": 7.139595985700134, "learning_rate": 5.986662069857141e-07, "loss": 2.7627, "step": 165760 }, { "epoch": 7.140457423439721, "learning_rate": 5.986177250102351e-07, "loss": 3.0709, "step": 165780 }, { "epoch": 7.141318861179308, "learning_rate": 5.985692430347561e-07, "loss": 2.8484, "step": 165800 }, { "epoch": 7.142180298918896, "learning_rate": 5.985207610592773e-07, "loss": 2.9155, "step": 165820 }, { "epoch": 7.143041736658483, "learning_rate": 5.984722790837984e-07, "loss": 2.8336, "step": 165840 }, { "epoch": 7.14390317439807, "learning_rate": 5.984237971083194e-07, "loss": 2.9144, "step": 165860 }, { "epoch": 7.144764612137657, "learning_rate": 5.983753151328406e-07, "loss": 2.8718, "step": 165880 }, { "epoch": 7.1456260498772455, "learning_rate": 5.983268331573618e-07, "loss": 2.7754, "step": 165900 }, { "epoch": 7.146487487616833, "learning_rate": 5.982783511818827e-07, "loss": 2.9541, "step": 165920 }, { "epoch": 7.14734892535642, "learning_rate": 5.982298692064038e-07, "loss": 3.015, "step": 165940 }, { "epoch": 7.148210363096007, "learning_rate": 5.98181387230925e-07, "loss": 2.849, "step": 165960 }, { "epoch": 7.1490718008355945, "learning_rate": 5.981329052554462e-07, "loss": 2.9698, "step": 165980 }, { "epoch": 7.149933238575182, "learning_rate": 5.980844232799672e-07, "loss": 2.7705, "step": 166000 }, { "epoch": 7.150794676314769, "learning_rate": 5.980359413044883e-07, "loss": 2.901, "step": 166020 }, { "epoch": 7.151656114054357, "learning_rate": 5.979874593290095e-07, "loss": 2.876, "step": 166040 }, { "epoch": 7.152517551793944, "learning_rate": 5.979389773535307e-07, "loss": 2.8266, "step": 166060 }, { "epoch": 7.153378989533532, "learning_rate": 5.978904953780516e-07, "loss": 2.9199, "step": 166080 }, { "epoch": 7.154240427273119, "learning_rate": 5.978420134025727e-07, "loss": 3.0151, "step": 166100 }, { "epoch": 7.155101865012706, "learning_rate": 5.977935314270939e-07, "loss": 3.0024, "step": 166120 }, { "epoch": 7.155963302752293, "learning_rate": 5.97745049451615e-07, "loss": 2.8695, "step": 166140 }, { "epoch": 7.1568247404918806, "learning_rate": 5.976965674761361e-07, "loss": 3.1322, "step": 166160 }, { "epoch": 7.157686178231469, "learning_rate": 5.976480855006571e-07, "loss": 3.1076, "step": 166180 }, { "epoch": 7.158547615971056, "learning_rate": 5.975996035251783e-07, "loss": 3.0057, "step": 166200 }, { "epoch": 7.159409053710643, "learning_rate": 5.975511215496994e-07, "loss": 2.7869, "step": 166220 }, { "epoch": 7.16027049145023, "learning_rate": 5.975026395742205e-07, "loss": 2.9718, "step": 166240 }, { "epoch": 7.161131929189818, "learning_rate": 5.974541575987416e-07, "loss": 2.8379, "step": 166260 }, { "epoch": 7.161993366929405, "learning_rate": 5.974056756232628e-07, "loss": 3.0159, "step": 166280 }, { "epoch": 7.162854804668992, "learning_rate": 5.973571936477838e-07, "loss": 2.9229, "step": 166300 }, { "epoch": 7.16371624240858, "learning_rate": 5.973087116723049e-07, "loss": 2.9966, "step": 166320 }, { "epoch": 7.1645776801481675, "learning_rate": 5.97260229696826e-07, "loss": 3.0938, "step": 166340 }, { "epoch": 7.165439117887755, "learning_rate": 5.972117477213472e-07, "loss": 2.8535, "step": 166360 }, { "epoch": 7.166300555627342, "learning_rate": 5.971632657458683e-07, "loss": 2.8759, "step": 166380 }, { "epoch": 7.167161993366929, "learning_rate": 5.971147837703893e-07, "loss": 2.9628, "step": 166400 }, { "epoch": 7.1680234311065165, "learning_rate": 5.970663017949105e-07, "loss": 2.9814, "step": 166420 }, { "epoch": 7.168884868846104, "learning_rate": 5.970178198194315e-07, "loss": 2.8651, "step": 166440 }, { "epoch": 7.169746306585692, "learning_rate": 5.969693378439526e-07, "loss": 2.8922, "step": 166460 }, { "epoch": 7.170607744325279, "learning_rate": 5.969208558684737e-07, "loss": 2.9063, "step": 166480 }, { "epoch": 7.171469182064866, "learning_rate": 5.968723738929949e-07, "loss": 2.9269, "step": 166500 }, { "epoch": 7.172330619804454, "learning_rate": 5.96823891917516e-07, "loss": 2.7926, "step": 166520 }, { "epoch": 7.173192057544041, "learning_rate": 5.967754099420371e-07, "loss": 2.9786, "step": 166540 }, { "epoch": 7.174053495283628, "learning_rate": 5.967269279665581e-07, "loss": 2.8454, "step": 166560 }, { "epoch": 7.174914933023215, "learning_rate": 5.966784459910793e-07, "loss": 2.7862, "step": 166580 }, { "epoch": 7.1757763707628035, "learning_rate": 5.966299640156004e-07, "loss": 3.1183, "step": 166600 }, { "epoch": 7.176637808502391, "learning_rate": 5.965814820401215e-07, "loss": 2.8224, "step": 166620 }, { "epoch": 7.177499246241978, "learning_rate": 5.965330000646426e-07, "loss": 2.9662, "step": 166640 }, { "epoch": 7.178360683981565, "learning_rate": 5.964845180891638e-07, "loss": 2.7401, "step": 166660 }, { "epoch": 7.1792221217211525, "learning_rate": 5.964360361136848e-07, "loss": 2.9522, "step": 166680 }, { "epoch": 7.18008355946074, "learning_rate": 5.963875541382059e-07, "loss": 3.0052, "step": 166700 }, { "epoch": 7.180944997200327, "learning_rate": 5.96339072162727e-07, "loss": 2.8316, "step": 166720 }, { "epoch": 7.181806434939915, "learning_rate": 5.962905901872481e-07, "loss": 2.9772, "step": 166740 }, { "epoch": 7.182667872679502, "learning_rate": 5.962421082117693e-07, "loss": 3.0082, "step": 166760 }, { "epoch": 7.18352931041909, "learning_rate": 5.961936262362904e-07, "loss": 2.9295, "step": 166780 }, { "epoch": 7.184390748158677, "learning_rate": 5.961451442608115e-07, "loss": 2.9893, "step": 166800 }, { "epoch": 7.185252185898264, "learning_rate": 5.960966622853325e-07, "loss": 2.9769, "step": 166820 }, { "epoch": 7.186113623637851, "learning_rate": 5.960481803098537e-07, "loss": 2.9779, "step": 166840 }, { "epoch": 7.186975061377439, "learning_rate": 5.959996983343748e-07, "loss": 3.0604, "step": 166860 }, { "epoch": 7.187836499117027, "learning_rate": 5.959512163588959e-07, "loss": 2.8288, "step": 166880 }, { "epoch": 7.188697936856614, "learning_rate": 5.95902734383417e-07, "loss": 2.9687, "step": 166900 }, { "epoch": 7.189559374596201, "learning_rate": 5.958542524079382e-07, "loss": 3.0262, "step": 166920 }, { "epoch": 7.1904208123357884, "learning_rate": 5.958057704324591e-07, "loss": 2.9214, "step": 166940 }, { "epoch": 7.191282250075376, "learning_rate": 5.957572884569802e-07, "loss": 2.9494, "step": 166960 }, { "epoch": 7.192143687814963, "learning_rate": 5.957088064815014e-07, "loss": 2.7909, "step": 166980 }, { "epoch": 7.19300512555455, "learning_rate": 5.956603245060225e-07, "loss": 2.9268, "step": 167000 }, { "epoch": 7.193866563294138, "learning_rate": 5.956118425305436e-07, "loss": 2.8481, "step": 167020 }, { "epoch": 7.194728001033726, "learning_rate": 5.955633605550647e-07, "loss": 3.02, "step": 167040 }, { "epoch": 7.195589438773313, "learning_rate": 5.955148785795858e-07, "loss": 2.9193, "step": 167060 }, { "epoch": 7.1964508765129, "learning_rate": 5.954663966041069e-07, "loss": 2.93, "step": 167080 }, { "epoch": 7.197312314252487, "learning_rate": 5.95417914628628e-07, "loss": 2.9868, "step": 167100 }, { "epoch": 7.1981737519920745, "learning_rate": 5.953694326531491e-07, "loss": 2.8993, "step": 167120 }, { "epoch": 7.199035189731662, "learning_rate": 5.953209506776703e-07, "loss": 2.9262, "step": 167140 }, { "epoch": 7.19989662747125, "learning_rate": 5.952724687021914e-07, "loss": 3.0902, "step": 167160 }, { "epoch": 7.200758065210837, "learning_rate": 5.952239867267125e-07, "loss": 3.0595, "step": 167180 }, { "epoch": 7.201619502950424, "learning_rate": 5.951755047512335e-07, "loss": 2.8887, "step": 167200 }, { "epoch": 7.202480940690012, "learning_rate": 5.951270227757547e-07, "loss": 2.8772, "step": 167220 }, { "epoch": 7.203342378429599, "learning_rate": 5.950785408002758e-07, "loss": 3.1009, "step": 167240 }, { "epoch": 7.204203816169186, "learning_rate": 5.950300588247969e-07, "loss": 2.9769, "step": 167260 }, { "epoch": 7.205065253908773, "learning_rate": 5.94981576849318e-07, "loss": 2.7755, "step": 167280 }, { "epoch": 7.2059266916483615, "learning_rate": 5.949330948738392e-07, "loss": 2.9528, "step": 167300 }, { "epoch": 7.206788129387949, "learning_rate": 5.948846128983601e-07, "loss": 3.1157, "step": 167320 }, { "epoch": 7.207649567127536, "learning_rate": 5.948361309228812e-07, "loss": 2.7264, "step": 167340 }, { "epoch": 7.208511004867123, "learning_rate": 5.947876489474024e-07, "loss": 2.9063, "step": 167360 }, { "epoch": 7.2093724426067105, "learning_rate": 5.947391669719236e-07, "loss": 2.9303, "step": 167380 }, { "epoch": 7.210233880346298, "learning_rate": 5.946906849964447e-07, "loss": 2.9421, "step": 167400 }, { "epoch": 7.211095318085885, "learning_rate": 5.946422030209658e-07, "loss": 2.8847, "step": 167420 }, { "epoch": 7.211956755825473, "learning_rate": 5.945937210454868e-07, "loss": 2.8596, "step": 167440 }, { "epoch": 7.21281819356506, "learning_rate": 5.94545239070008e-07, "loss": 2.9099, "step": 167460 }, { "epoch": 7.213679631304648, "learning_rate": 5.94496757094529e-07, "loss": 2.8965, "step": 167480 }, { "epoch": 7.214541069044235, "learning_rate": 5.944482751190501e-07, "loss": 2.7551, "step": 167500 }, { "epoch": 7.215402506783822, "learning_rate": 5.943997931435713e-07, "loss": 2.9461, "step": 167520 }, { "epoch": 7.216263944523409, "learning_rate": 5.943513111680924e-07, "loss": 2.8199, "step": 167540 }, { "epoch": 7.217125382262997, "learning_rate": 5.943028291926135e-07, "loss": 3.1303, "step": 167560 }, { "epoch": 7.217986820002585, "learning_rate": 5.942543472171345e-07, "loss": 3.0348, "step": 167580 }, { "epoch": 7.218848257742172, "learning_rate": 5.942058652416557e-07, "loss": 2.9456, "step": 167600 }, { "epoch": 7.219709695481759, "learning_rate": 5.941573832661768e-07, "loss": 2.7533, "step": 167620 }, { "epoch": 7.2205711332213465, "learning_rate": 5.941089012906978e-07, "loss": 2.9833, "step": 167640 }, { "epoch": 7.221432570960934, "learning_rate": 5.94060419315219e-07, "loss": 2.7963, "step": 167660 }, { "epoch": 7.222294008700521, "learning_rate": 5.940119373397402e-07, "loss": 2.9361, "step": 167680 }, { "epoch": 7.223155446440108, "learning_rate": 5.939634553642612e-07, "loss": 3.0236, "step": 167700 }, { "epoch": 7.224016884179696, "learning_rate": 5.939149733887822e-07, "loss": 3.0724, "step": 167720 }, { "epoch": 7.224878321919284, "learning_rate": 5.938664914133034e-07, "loss": 2.9369, "step": 167740 }, { "epoch": 7.225739759658871, "learning_rate": 5.938180094378246e-07, "loss": 3.022, "step": 167760 }, { "epoch": 7.226601197398458, "learning_rate": 5.937695274623457e-07, "loss": 2.9464, "step": 167780 }, { "epoch": 7.227462635138045, "learning_rate": 5.937210454868667e-07, "loss": 2.8952, "step": 167800 }, { "epoch": 7.228324072877633, "learning_rate": 5.936725635113879e-07, "loss": 2.9995, "step": 167820 }, { "epoch": 7.22918551061722, "learning_rate": 5.936240815359089e-07, "loss": 3.0927, "step": 167840 }, { "epoch": 7.230046948356808, "learning_rate": 5.935755995604301e-07, "loss": 2.8791, "step": 167860 }, { "epoch": 7.230908386096395, "learning_rate": 5.935271175849511e-07, "loss": 3.0313, "step": 167880 }, { "epoch": 7.231769823835982, "learning_rate": 5.934786356094723e-07, "loss": 2.9859, "step": 167900 }, { "epoch": 7.23263126157557, "learning_rate": 5.934301536339935e-07, "loss": 3.0214, "step": 167920 }, { "epoch": 7.233492699315157, "learning_rate": 5.933816716585146e-07, "loss": 2.8827, "step": 167940 }, { "epoch": 7.234354137054744, "learning_rate": 5.933331896830355e-07, "loss": 3.0012, "step": 167960 }, { "epoch": 7.235215574794331, "learning_rate": 5.932847077075567e-07, "loss": 2.9237, "step": 167980 }, { "epoch": 7.236077012533919, "learning_rate": 5.932362257320779e-07, "loss": 2.9389, "step": 168000 }, { "epoch": 7.236938450273507, "learning_rate": 5.931877437565988e-07, "loss": 3.1977, "step": 168020 }, { "epoch": 7.237799888013094, "learning_rate": 5.9313926178112e-07, "loss": 3.0234, "step": 168040 }, { "epoch": 7.238661325752681, "learning_rate": 5.930907798056412e-07, "loss": 2.8896, "step": 168060 }, { "epoch": 7.2395227634922685, "learning_rate": 5.930422978301622e-07, "loss": 2.9428, "step": 168080 }, { "epoch": 7.240384201231856, "learning_rate": 5.929938158546832e-07, "loss": 2.9208, "step": 168100 }, { "epoch": 7.241245638971443, "learning_rate": 5.929453338792044e-07, "loss": 3.0968, "step": 168120 }, { "epoch": 7.242107076711031, "learning_rate": 5.928968519037256e-07, "loss": 2.8931, "step": 168140 }, { "epoch": 7.242968514450618, "learning_rate": 5.928483699282467e-07, "loss": 2.9136, "step": 168160 }, { "epoch": 7.243829952190206, "learning_rate": 5.927998879527677e-07, "loss": 3.1429, "step": 168180 }, { "epoch": 7.244691389929793, "learning_rate": 5.927514059772889e-07, "loss": 3.007, "step": 168200 }, { "epoch": 7.24555282766938, "learning_rate": 5.9270292400181e-07, "loss": 2.9629, "step": 168220 }, { "epoch": 7.246414265408967, "learning_rate": 5.92654442026331e-07, "loss": 2.9683, "step": 168240 }, { "epoch": 7.247275703148555, "learning_rate": 5.926059600508521e-07, "loss": 3.0316, "step": 168260 }, { "epoch": 7.248137140888142, "learning_rate": 5.925574780753733e-07, "loss": 2.954, "step": 168280 }, { "epoch": 7.24899857862773, "learning_rate": 5.925089960998945e-07, "loss": 2.9693, "step": 168300 }, { "epoch": 7.249860016367317, "learning_rate": 5.924605141244155e-07, "loss": 2.9614, "step": 168320 }, { "epoch": 7.2507214541069045, "learning_rate": 5.924120321489365e-07, "loss": 3.064, "step": 168340 }, { "epoch": 7.251582891846492, "learning_rate": 5.923635501734577e-07, "loss": 3.0012, "step": 168360 }, { "epoch": 7.252444329586079, "learning_rate": 5.923150681979789e-07, "loss": 2.952, "step": 168380 }, { "epoch": 7.253305767325666, "learning_rate": 5.922665862224999e-07, "loss": 2.9328, "step": 168400 }, { "epoch": 7.254167205065254, "learning_rate": 5.92218104247021e-07, "loss": 2.9244, "step": 168420 }, { "epoch": 7.255028642804842, "learning_rate": 5.921696222715422e-07, "loss": 2.996, "step": 168440 }, { "epoch": 7.255890080544429, "learning_rate": 5.921211402960633e-07, "loss": 3.0186, "step": 168460 }, { "epoch": 7.256751518284016, "learning_rate": 5.920726583205843e-07, "loss": 2.9952, "step": 168480 }, { "epoch": 7.257612956023603, "learning_rate": 5.920241763451054e-07, "loss": 2.8745, "step": 168500 }, { "epoch": 7.258474393763191, "learning_rate": 5.919756943696266e-07, "loss": 2.7985, "step": 168520 }, { "epoch": 7.259335831502778, "learning_rate": 5.919272123941478e-07, "loss": 2.8791, "step": 168540 }, { "epoch": 7.260197269242365, "learning_rate": 5.918787304186687e-07, "loss": 2.8099, "step": 168560 }, { "epoch": 7.261058706981953, "learning_rate": 5.918302484431899e-07, "loss": 2.939, "step": 168580 }, { "epoch": 7.2619201447215405, "learning_rate": 5.91781766467711e-07, "loss": 2.9422, "step": 168600 }, { "epoch": 7.262781582461128, "learning_rate": 5.91733284492232e-07, "loss": 2.9773, "step": 168620 }, { "epoch": 7.263643020200715, "learning_rate": 5.916848025167531e-07, "loss": 2.9952, "step": 168640 }, { "epoch": 7.264504457940302, "learning_rate": 5.916363205412743e-07, "loss": 2.95, "step": 168660 }, { "epoch": 7.265365895679889, "learning_rate": 5.915878385657955e-07, "loss": 2.9477, "step": 168680 }, { "epoch": 7.266227333419478, "learning_rate": 5.915393565903165e-07, "loss": 3.0112, "step": 168700 }, { "epoch": 7.267088771159065, "learning_rate": 5.914908746148375e-07, "loss": 2.9472, "step": 168720 }, { "epoch": 7.267950208898652, "learning_rate": 5.914423926393587e-07, "loss": 2.9012, "step": 168740 }, { "epoch": 7.268811646638239, "learning_rate": 5.913939106638799e-07, "loss": 2.878, "step": 168760 }, { "epoch": 7.2696730843778266, "learning_rate": 5.913454286884009e-07, "loss": 3.0647, "step": 168780 }, { "epoch": 7.270534522117414, "learning_rate": 5.91296946712922e-07, "loss": 2.8986, "step": 168800 }, { "epoch": 7.271395959857001, "learning_rate": 5.912484647374432e-07, "loss": 2.9061, "step": 168820 }, { "epoch": 7.272257397596588, "learning_rate": 5.911999827619643e-07, "loss": 2.9105, "step": 168840 }, { "epoch": 7.273118835336176, "learning_rate": 5.911515007864853e-07, "loss": 2.9233, "step": 168860 }, { "epoch": 7.273980273075764, "learning_rate": 5.911030188110064e-07, "loss": 2.8147, "step": 168880 }, { "epoch": 7.274841710815351, "learning_rate": 5.910545368355276e-07, "loss": 2.9303, "step": 168900 }, { "epoch": 7.275703148554938, "learning_rate": 5.910060548600487e-07, "loss": 2.9242, "step": 168920 }, { "epoch": 7.276564586294525, "learning_rate": 5.909575728845698e-07, "loss": 3.0085, "step": 168940 }, { "epoch": 7.277426024034113, "learning_rate": 5.909090909090909e-07, "loss": 2.872, "step": 168960 }, { "epoch": 7.2782874617737, "learning_rate": 5.90860608933612e-07, "loss": 2.9409, "step": 168980 }, { "epoch": 7.279148899513288, "learning_rate": 5.908121269581331e-07, "loss": 3.0711, "step": 169000 }, { "epoch": 7.280010337252875, "learning_rate": 5.907636449826542e-07, "loss": 3.0192, "step": 169020 }, { "epoch": 7.2808717749924625, "learning_rate": 5.907151630071753e-07, "loss": 2.9171, "step": 169040 }, { "epoch": 7.28173321273205, "learning_rate": 5.906666810316965e-07, "loss": 2.8123, "step": 169060 }, { "epoch": 7.282594650471637, "learning_rate": 5.906181990562176e-07, "loss": 3.0775, "step": 169080 }, { "epoch": 7.283456088211224, "learning_rate": 5.905697170807385e-07, "loss": 2.7945, "step": 169100 }, { "epoch": 7.2843175259508115, "learning_rate": 5.905212351052597e-07, "loss": 2.7648, "step": 169120 }, { "epoch": 7.2851789636904, "learning_rate": 5.904727531297809e-07, "loss": 2.9796, "step": 169140 }, { "epoch": 7.286040401429987, "learning_rate": 5.904242711543019e-07, "loss": 2.8709, "step": 169160 }, { "epoch": 7.286901839169574, "learning_rate": 5.90375789178823e-07, "loss": 2.9439, "step": 169180 }, { "epoch": 7.287763276909161, "learning_rate": 5.903273072033443e-07, "loss": 2.8146, "step": 169200 }, { "epoch": 7.288624714648749, "learning_rate": 5.902788252278652e-07, "loss": 2.9144, "step": 169220 }, { "epoch": 7.289486152388336, "learning_rate": 5.902303432523863e-07, "loss": 2.9881, "step": 169240 }, { "epoch": 7.290347590127923, "learning_rate": 5.901818612769074e-07, "loss": 2.9816, "step": 169260 }, { "epoch": 7.291209027867511, "learning_rate": 5.901333793014286e-07, "loss": 2.825, "step": 169280 }, { "epoch": 7.2920704656070985, "learning_rate": 5.900848973259497e-07, "loss": 3.0248, "step": 169300 }, { "epoch": 7.292931903346686, "learning_rate": 5.900364153504708e-07, "loss": 2.9905, "step": 169320 }, { "epoch": 7.293793341086273, "learning_rate": 5.899879333749919e-07, "loss": 2.9468, "step": 169340 }, { "epoch": 7.29465477882586, "learning_rate": 5.89939451399513e-07, "loss": 2.9361, "step": 169360 }, { "epoch": 7.2955162165654475, "learning_rate": 5.898909694240341e-07, "loss": 3.002, "step": 169380 }, { "epoch": 7.296377654305035, "learning_rate": 5.898424874485552e-07, "loss": 2.8868, "step": 169400 }, { "epoch": 7.297239092044623, "learning_rate": 5.897940054730763e-07, "loss": 2.8819, "step": 169420 }, { "epoch": 7.29810052978421, "learning_rate": 5.897455234975975e-07, "loss": 3.1066, "step": 169440 }, { "epoch": 7.298961967523797, "learning_rate": 5.896970415221186e-07, "loss": 2.8859, "step": 169460 }, { "epoch": 7.299823405263385, "learning_rate": 5.896485595466396e-07, "loss": 2.8926, "step": 169480 }, { "epoch": 7.300684843002972, "learning_rate": 5.896000775711607e-07, "loss": 2.8052, "step": 169500 }, { "epoch": 7.301546280742559, "learning_rate": 5.895515955956818e-07, "loss": 2.9299, "step": 169520 }, { "epoch": 7.302407718482146, "learning_rate": 5.89503113620203e-07, "loss": 2.8855, "step": 169540 }, { "epoch": 7.3032691562217344, "learning_rate": 5.894546316447241e-07, "loss": 3.0999, "step": 169560 }, { "epoch": 7.304130593961322, "learning_rate": 5.894061496692452e-07, "loss": 3.057, "step": 169580 }, { "epoch": 7.304992031700909, "learning_rate": 5.893576676937663e-07, "loss": 3.0468, "step": 169600 }, { "epoch": 7.305853469440496, "learning_rate": 5.893091857182873e-07, "loss": 3.1104, "step": 169620 }, { "epoch": 7.306714907180083, "learning_rate": 5.892607037428084e-07, "loss": 2.8131, "step": 169640 }, { "epoch": 7.307576344919671, "learning_rate": 5.892122217673296e-07, "loss": 3.0096, "step": 169660 }, { "epoch": 7.308437782659258, "learning_rate": 5.891637397918507e-07, "loss": 3.1009, "step": 169680 }, { "epoch": 7.309299220398846, "learning_rate": 5.891152578163718e-07, "loss": 2.9723, "step": 169700 }, { "epoch": 7.310160658138433, "learning_rate": 5.890667758408929e-07, "loss": 3.0135, "step": 169720 }, { "epoch": 7.3110220958780205, "learning_rate": 5.89018293865414e-07, "loss": 2.9477, "step": 169740 }, { "epoch": 7.311883533617608, "learning_rate": 5.889698118899351e-07, "loss": 2.895, "step": 169760 }, { "epoch": 7.312744971357195, "learning_rate": 5.889213299144562e-07, "loss": 2.8632, "step": 169780 }, { "epoch": 7.313606409096782, "learning_rate": 5.888728479389773e-07, "loss": 2.8107, "step": 169800 }, { "epoch": 7.3144678468363695, "learning_rate": 5.888243659634984e-07, "loss": 3.0592, "step": 169820 }, { "epoch": 7.315329284575958, "learning_rate": 5.887758839880196e-07, "loss": 2.7838, "step": 169840 }, { "epoch": 7.316190722315545, "learning_rate": 5.887274020125406e-07, "loss": 2.854, "step": 169860 }, { "epoch": 7.317052160055132, "learning_rate": 5.886789200370617e-07, "loss": 3.1152, "step": 169880 }, { "epoch": 7.317913597794719, "learning_rate": 5.886304380615828e-07, "loss": 2.9547, "step": 169900 }, { "epoch": 7.318775035534307, "learning_rate": 5.88581956086104e-07, "loss": 3.009, "step": 169920 }, { "epoch": 7.319636473273894, "learning_rate": 5.885334741106251e-07, "loss": 2.9955, "step": 169940 }, { "epoch": 7.320497911013481, "learning_rate": 5.884849921351462e-07, "loss": 2.8114, "step": 169960 }, { "epoch": 7.321359348753069, "learning_rate": 5.884365101596673e-07, "loss": 2.9754, "step": 169980 }, { "epoch": 7.3222207864926565, "learning_rate": 5.883880281841884e-07, "loss": 2.9266, "step": 170000 }, { "epoch": 7.323082224232244, "learning_rate": 5.883395462087095e-07, "loss": 2.8869, "step": 170020 }, { "epoch": 7.323943661971831, "learning_rate": 5.882910642332306e-07, "loss": 2.9221, "step": 170040 }, { "epoch": 7.324805099711418, "learning_rate": 5.882425822577517e-07, "loss": 2.8042, "step": 170060 }, { "epoch": 7.3256665374510055, "learning_rate": 5.881941002822729e-07, "loss": 2.8651, "step": 170080 }, { "epoch": 7.326527975190593, "learning_rate": 5.88145618306794e-07, "loss": 2.9741, "step": 170100 }, { "epoch": 7.327389412930181, "learning_rate": 5.880971363313149e-07, "loss": 2.6987, "step": 170120 }, { "epoch": 7.328250850669768, "learning_rate": 5.880486543558361e-07, "loss": 3.0572, "step": 170140 }, { "epoch": 7.329112288409355, "learning_rate": 5.880001723803572e-07, "loss": 2.7799, "step": 170160 }, { "epoch": 7.329973726148943, "learning_rate": 5.879516904048783e-07, "loss": 2.9505, "step": 170180 }, { "epoch": 7.33083516388853, "learning_rate": 5.879032084293994e-07, "loss": 3.0217, "step": 170200 }, { "epoch": 7.331696601628117, "learning_rate": 5.878547264539206e-07, "loss": 3.004, "step": 170220 }, { "epoch": 7.332558039367704, "learning_rate": 5.878062444784416e-07, "loss": 2.9208, "step": 170240 }, { "epoch": 7.3334194771072925, "learning_rate": 5.877577625029627e-07, "loss": 2.8257, "step": 170260 }, { "epoch": 7.33428091484688, "learning_rate": 5.877092805274838e-07, "loss": 3.045, "step": 170280 }, { "epoch": 7.335142352586467, "learning_rate": 5.87660798552005e-07, "loss": 2.9598, "step": 170300 }, { "epoch": 7.336003790326054, "learning_rate": 5.876123165765261e-07, "loss": 3.1423, "step": 170320 }, { "epoch": 7.3368652280656415, "learning_rate": 5.875638346010472e-07, "loss": 2.8198, "step": 170340 }, { "epoch": 7.337726665805229, "learning_rate": 5.875153526255683e-07, "loss": 2.9087, "step": 170360 }, { "epoch": 7.338588103544816, "learning_rate": 5.874668706500894e-07, "loss": 3.1305, "step": 170380 }, { "epoch": 7.339449541284404, "learning_rate": 5.874183886746105e-07, "loss": 2.8604, "step": 170400 }, { "epoch": 7.340310979023991, "learning_rate": 5.873699066991315e-07, "loss": 2.8556, "step": 170420 }, { "epoch": 7.341172416763579, "learning_rate": 5.873214247236527e-07, "loss": 2.7255, "step": 170440 }, { "epoch": 7.342033854503166, "learning_rate": 5.872729427481739e-07, "loss": 3.0197, "step": 170460 }, { "epoch": 7.342895292242753, "learning_rate": 5.87224460772695e-07, "loss": 2.7561, "step": 170480 }, { "epoch": 7.34375672998234, "learning_rate": 5.871759787972159e-07, "loss": 2.8936, "step": 170500 }, { "epoch": 7.3446181677219275, "learning_rate": 5.871274968217371e-07, "loss": 2.9464, "step": 170520 }, { "epoch": 7.345479605461516, "learning_rate": 5.870790148462583e-07, "loss": 2.8811, "step": 170540 }, { "epoch": 7.346341043201103, "learning_rate": 5.870305328707794e-07, "loss": 2.9148, "step": 170560 }, { "epoch": 7.34720248094069, "learning_rate": 5.869820508953004e-07, "loss": 3.0389, "step": 170580 }, { "epoch": 7.348063918680277, "learning_rate": 5.869335689198216e-07, "loss": 2.991, "step": 170600 }, { "epoch": 7.348925356419865, "learning_rate": 5.868850869443427e-07, "loss": 3.1231, "step": 170620 }, { "epoch": 7.349786794159452, "learning_rate": 5.868366049688638e-07, "loss": 2.7957, "step": 170640 }, { "epoch": 7.350648231899039, "learning_rate": 5.867881229933848e-07, "loss": 3.1204, "step": 170660 }, { "epoch": 7.351509669638627, "learning_rate": 5.86739641017906e-07, "loss": 2.8421, "step": 170680 }, { "epoch": 7.3523711073782145, "learning_rate": 5.866911590424271e-07, "loss": 2.7792, "step": 170700 }, { "epoch": 7.353232545117802, "learning_rate": 5.866426770669481e-07, "loss": 3.1122, "step": 170720 }, { "epoch": 7.354093982857389, "learning_rate": 5.865941950914693e-07, "loss": 2.8722, "step": 170740 }, { "epoch": 7.354955420596976, "learning_rate": 5.865457131159904e-07, "loss": 2.8448, "step": 170760 }, { "epoch": 7.3558168583365635, "learning_rate": 5.864972311405115e-07, "loss": 3.027, "step": 170780 }, { "epoch": 7.356678296076151, "learning_rate": 5.864487491650325e-07, "loss": 2.8187, "step": 170800 }, { "epoch": 7.357539733815738, "learning_rate": 5.864002671895537e-07, "loss": 2.9756, "step": 170820 }, { "epoch": 7.358401171555326, "learning_rate": 5.863517852140749e-07, "loss": 2.8773, "step": 170840 }, { "epoch": 7.359262609294913, "learning_rate": 5.86303303238596e-07, "loss": 2.7869, "step": 170860 }, { "epoch": 7.360124047034501, "learning_rate": 5.862548212631169e-07, "loss": 3.163, "step": 170880 }, { "epoch": 7.360985484774088, "learning_rate": 5.862063392876381e-07, "loss": 2.8545, "step": 170900 }, { "epoch": 7.361846922513675, "learning_rate": 5.861578573121593e-07, "loss": 2.8849, "step": 170920 }, { "epoch": 7.362708360253262, "learning_rate": 5.861093753366804e-07, "loss": 2.9166, "step": 170940 }, { "epoch": 7.3635697979928505, "learning_rate": 5.860608933612014e-07, "loss": 2.7729, "step": 170960 }, { "epoch": 7.364431235732438, "learning_rate": 5.860124113857227e-07, "loss": 2.8605, "step": 170980 }, { "epoch": 7.365292673472025, "learning_rate": 5.859639294102437e-07, "loss": 3.039, "step": 171000 }, { "epoch": 7.366154111211612, "learning_rate": 5.859154474347647e-07, "loss": 2.8706, "step": 171020 }, { "epoch": 7.3670155489511995, "learning_rate": 5.858669654592858e-07, "loss": 2.8606, "step": 171040 }, { "epoch": 7.367876986690787, "learning_rate": 5.85818483483807e-07, "loss": 2.9551, "step": 171060 }, { "epoch": 7.368738424430374, "learning_rate": 5.857700015083282e-07, "loss": 2.8875, "step": 171080 }, { "epoch": 7.369599862169961, "learning_rate": 5.857215195328492e-07, "loss": 2.9301, "step": 171100 }, { "epoch": 7.370461299909549, "learning_rate": 5.856730375573703e-07, "loss": 3.0549, "step": 171120 }, { "epoch": 7.371322737649137, "learning_rate": 5.856245555818914e-07, "loss": 2.9536, "step": 171140 }, { "epoch": 7.372184175388724, "learning_rate": 5.855760736064126e-07, "loss": 2.7503, "step": 171160 }, { "epoch": 7.373045613128311, "learning_rate": 5.855275916309336e-07, "loss": 2.9457, "step": 171180 }, { "epoch": 7.373907050867898, "learning_rate": 5.854791096554547e-07, "loss": 2.9001, "step": 171200 }, { "epoch": 7.374768488607486, "learning_rate": 5.854306276799759e-07, "loss": 2.7928, "step": 171220 }, { "epoch": 7.375629926347074, "learning_rate": 5.85382145704497e-07, "loss": 2.8237, "step": 171240 }, { "epoch": 7.376491364086661, "learning_rate": 5.85333663729018e-07, "loss": 3.0204, "step": 171260 }, { "epoch": 7.377352801826248, "learning_rate": 5.852851817535391e-07, "loss": 3.0339, "step": 171280 }, { "epoch": 7.378214239565835, "learning_rate": 5.852366997780603e-07, "loss": 2.8556, "step": 171300 }, { "epoch": 7.379075677305423, "learning_rate": 5.851882178025814e-07, "loss": 3.0505, "step": 171320 }, { "epoch": 7.37993711504501, "learning_rate": 5.851397358271024e-07, "loss": 2.7923, "step": 171340 }, { "epoch": 7.380798552784597, "learning_rate": 5.850912538516236e-07, "loss": 3.0507, "step": 171360 }, { "epoch": 7.381659990524184, "learning_rate": 5.850427718761448e-07, "loss": 3.1366, "step": 171380 }, { "epoch": 7.382521428263773, "learning_rate": 5.849942899006657e-07, "loss": 3.0351, "step": 171400 }, { "epoch": 7.38338286600336, "learning_rate": 5.849458079251868e-07, "loss": 3.0174, "step": 171420 }, { "epoch": 7.384244303742947, "learning_rate": 5.84897325949708e-07, "loss": 2.9193, "step": 171440 }, { "epoch": 7.385105741482534, "learning_rate": 5.848488439742292e-07, "loss": 2.993, "step": 171460 }, { "epoch": 7.3859671792221215, "learning_rate": 5.848003619987502e-07, "loss": 2.8399, "step": 171480 }, { "epoch": 7.386828616961709, "learning_rate": 5.847518800232713e-07, "loss": 2.8423, "step": 171500 }, { "epoch": 7.387690054701297, "learning_rate": 5.847033980477924e-07, "loss": 2.953, "step": 171520 }, { "epoch": 7.388551492440884, "learning_rate": 5.846549160723136e-07, "loss": 2.8429, "step": 171540 }, { "epoch": 7.389412930180471, "learning_rate": 5.846064340968346e-07, "loss": 2.9913, "step": 171560 }, { "epoch": 7.390274367920059, "learning_rate": 5.845579521213557e-07, "loss": 2.9053, "step": 171580 }, { "epoch": 7.391135805659646, "learning_rate": 5.845094701458769e-07, "loss": 2.9369, "step": 171600 }, { "epoch": 7.391997243399233, "learning_rate": 5.844609881703981e-07, "loss": 2.8386, "step": 171620 }, { "epoch": 7.39285868113882, "learning_rate": 5.84412506194919e-07, "loss": 2.8679, "step": 171640 }, { "epoch": 7.393720118878408, "learning_rate": 5.843640242194401e-07, "loss": 2.7829, "step": 171660 }, { "epoch": 7.394581556617996, "learning_rate": 5.843155422439613e-07, "loss": 2.8549, "step": 171680 }, { "epoch": 7.395442994357583, "learning_rate": 5.842670602684825e-07, "loss": 2.8714, "step": 171700 }, { "epoch": 7.39630443209717, "learning_rate": 5.842185782930035e-07, "loss": 2.7525, "step": 171720 }, { "epoch": 7.3971658698367575, "learning_rate": 5.841700963175246e-07, "loss": 2.8591, "step": 171740 }, { "epoch": 7.398027307576345, "learning_rate": 5.841216143420458e-07, "loss": 2.8466, "step": 171760 }, { "epoch": 7.398888745315932, "learning_rate": 5.840731323665667e-07, "loss": 2.8126, "step": 171780 }, { "epoch": 7.39975018305552, "learning_rate": 5.840246503910878e-07, "loss": 2.9123, "step": 171800 }, { "epoch": 7.400611620795107, "learning_rate": 5.83976168415609e-07, "loss": 2.8722, "step": 171820 }, { "epoch": 7.401473058534695, "learning_rate": 5.839276864401302e-07, "loss": 2.9351, "step": 171840 }, { "epoch": 7.402334496274282, "learning_rate": 5.838792044646512e-07, "loss": 2.9546, "step": 171860 }, { "epoch": 7.403195934013869, "learning_rate": 5.838307224891723e-07, "loss": 2.9289, "step": 171880 }, { "epoch": 7.404057371753456, "learning_rate": 5.837822405136934e-07, "loss": 2.9627, "step": 171900 }, { "epoch": 7.404918809493044, "learning_rate": 5.837337585382146e-07, "loss": 2.9691, "step": 171920 }, { "epoch": 7.405780247232631, "learning_rate": 5.836852765627356e-07, "loss": 2.8775, "step": 171940 }, { "epoch": 7.406641684972219, "learning_rate": 5.836367945872567e-07, "loss": 2.8242, "step": 171960 }, { "epoch": 7.407503122711806, "learning_rate": 5.835883126117779e-07, "loss": 2.8642, "step": 171980 }, { "epoch": 7.4083645604513935, "learning_rate": 5.83539830636299e-07, "loss": 2.927, "step": 172000 }, { "epoch": 7.409225998190981, "learning_rate": 5.8349134866082e-07, "loss": 2.9396, "step": 172020 }, { "epoch": 7.410087435930568, "learning_rate": 5.834428666853411e-07, "loss": 2.909, "step": 172040 }, { "epoch": 7.410948873670155, "learning_rate": 5.833943847098623e-07, "loss": 2.9112, "step": 172060 }, { "epoch": 7.4118103114097424, "learning_rate": 5.833459027343834e-07, "loss": 2.9629, "step": 172080 }, { "epoch": 7.412671749149331, "learning_rate": 5.832974207589045e-07, "loss": 2.9921, "step": 172100 }, { "epoch": 7.413533186888918, "learning_rate": 5.832489387834256e-07, "loss": 3.0032, "step": 172120 }, { "epoch": 7.414394624628505, "learning_rate": 5.832004568079468e-07, "loss": 3.0308, "step": 172140 }, { "epoch": 7.415256062368092, "learning_rate": 5.831519748324678e-07, "loss": 2.9708, "step": 172160 }, { "epoch": 7.41611750010768, "learning_rate": 5.831034928569889e-07, "loss": 3.0427, "step": 172180 }, { "epoch": 7.416978937847267, "learning_rate": 5.8305501088151e-07, "loss": 2.8585, "step": 172200 }, { "epoch": 7.417840375586854, "learning_rate": 5.830065289060312e-07, "loss": 3.0821, "step": 172220 }, { "epoch": 7.418701813326442, "learning_rate": 5.829580469305523e-07, "loss": 2.7136, "step": 172240 }, { "epoch": 7.419563251066029, "learning_rate": 5.829095649550734e-07, "loss": 2.9593, "step": 172260 }, { "epoch": 7.420424688805617, "learning_rate": 5.828610829795944e-07, "loss": 2.9629, "step": 172280 }, { "epoch": 7.421286126545204, "learning_rate": 5.828126010041155e-07, "loss": 2.9325, "step": 172300 }, { "epoch": 7.422147564284791, "learning_rate": 5.827641190286366e-07, "loss": 2.9699, "step": 172320 }, { "epoch": 7.423009002024378, "learning_rate": 5.827156370531577e-07, "loss": 3.0142, "step": 172340 }, { "epoch": 7.423870439763966, "learning_rate": 5.826671550776789e-07, "loss": 2.7816, "step": 172360 }, { "epoch": 7.424731877503554, "learning_rate": 5.826186731022e-07, "loss": 2.8549, "step": 172380 }, { "epoch": 7.425593315243141, "learning_rate": 5.825701911267211e-07, "loss": 2.9331, "step": 172400 }, { "epoch": 7.426454752982728, "learning_rate": 5.825217091512421e-07, "loss": 2.8974, "step": 172420 }, { "epoch": 7.4273161907223155, "learning_rate": 5.824732271757633e-07, "loss": 3.2114, "step": 172440 }, { "epoch": 7.428177628461903, "learning_rate": 5.824247452002844e-07, "loss": 2.9152, "step": 172460 }, { "epoch": 7.42903906620149, "learning_rate": 5.823762632248055e-07, "loss": 2.8957, "step": 172480 }, { "epoch": 7.429900503941077, "learning_rate": 5.823277812493266e-07, "loss": 2.8788, "step": 172500 }, { "epoch": 7.430761941680665, "learning_rate": 5.822792992738478e-07, "loss": 2.8834, "step": 172520 }, { "epoch": 7.431623379420253, "learning_rate": 5.822308172983688e-07, "loss": 3.0285, "step": 172540 }, { "epoch": 7.43248481715984, "learning_rate": 5.821823353228899e-07, "loss": 2.9356, "step": 172560 }, { "epoch": 7.433346254899427, "learning_rate": 5.82133853347411e-07, "loss": 2.9336, "step": 172580 }, { "epoch": 7.434207692639014, "learning_rate": 5.820853713719322e-07, "loss": 2.8127, "step": 172600 }, { "epoch": 7.435069130378602, "learning_rate": 5.820368893964533e-07, "loss": 3.0354, "step": 172620 }, { "epoch": 7.435930568118189, "learning_rate": 5.819884074209744e-07, "loss": 2.916, "step": 172640 }, { "epoch": 7.436792005857777, "learning_rate": 5.819399254454954e-07, "loss": 2.8452, "step": 172660 }, { "epoch": 7.437653443597364, "learning_rate": 5.818914434700165e-07, "loss": 2.8804, "step": 172680 }, { "epoch": 7.4385148813369515, "learning_rate": 5.818429614945377e-07, "loss": 2.958, "step": 172700 }, { "epoch": 7.439376319076539, "learning_rate": 5.817944795190588e-07, "loss": 2.897, "step": 172720 }, { "epoch": 7.440237756816126, "learning_rate": 5.817459975435799e-07, "loss": 2.895, "step": 172740 }, { "epoch": 7.441099194555713, "learning_rate": 5.816975155681011e-07, "loss": 2.9375, "step": 172760 }, { "epoch": 7.4419606322953005, "learning_rate": 5.816490335926221e-07, "loss": 2.9213, "step": 172780 }, { "epoch": 7.442822070034889, "learning_rate": 5.816005516171432e-07, "loss": 2.8961, "step": 172800 }, { "epoch": 7.443683507774476, "learning_rate": 5.815520696416643e-07, "loss": 2.8334, "step": 172820 }, { "epoch": 7.444544945514063, "learning_rate": 5.815035876661854e-07, "loss": 2.7548, "step": 172840 }, { "epoch": 7.44540638325365, "learning_rate": 5.814551056907065e-07, "loss": 2.9896, "step": 172860 }, { "epoch": 7.446267820993238, "learning_rate": 5.814066237152276e-07, "loss": 2.9465, "step": 172880 }, { "epoch": 7.447129258732825, "learning_rate": 5.813581417397488e-07, "loss": 2.9069, "step": 172900 }, { "epoch": 7.447990696472412, "learning_rate": 5.813096597642698e-07, "loss": 2.8928, "step": 172920 }, { "epoch": 7.448852134212, "learning_rate": 5.812611777887909e-07, "loss": 2.7724, "step": 172940 }, { "epoch": 7.4497135719515875, "learning_rate": 5.81212695813312e-07, "loss": 2.8192, "step": 172960 }, { "epoch": 7.450575009691175, "learning_rate": 5.811642138378331e-07, "loss": 3.0283, "step": 172980 }, { "epoch": 7.451436447430762, "learning_rate": 5.811157318623543e-07, "loss": 3.0158, "step": 173000 }, { "epoch": 7.452297885170349, "learning_rate": 5.810672498868754e-07, "loss": 2.9748, "step": 173020 }, { "epoch": 7.453159322909936, "learning_rate": 5.810187679113965e-07, "loss": 2.9266, "step": 173040 }, { "epoch": 7.454020760649524, "learning_rate": 5.809702859359175e-07, "loss": 2.8645, "step": 173060 }, { "epoch": 7.454882198389112, "learning_rate": 5.809218039604387e-07, "loss": 2.975, "step": 173080 }, { "epoch": 7.455743636128699, "learning_rate": 5.808733219849598e-07, "loss": 2.8677, "step": 173100 }, { "epoch": 7.456605073868286, "learning_rate": 5.808248400094809e-07, "loss": 3.0536, "step": 173120 }, { "epoch": 7.4574665116078735, "learning_rate": 5.80776358034002e-07, "loss": 2.7891, "step": 173140 }, { "epoch": 7.458327949347461, "learning_rate": 5.807278760585232e-07, "loss": 2.697, "step": 173160 }, { "epoch": 7.459189387087048, "learning_rate": 5.806793940830442e-07, "loss": 2.8482, "step": 173180 }, { "epoch": 7.460050824826635, "learning_rate": 5.806309121075652e-07, "loss": 2.9113, "step": 173200 }, { "epoch": 7.460912262566223, "learning_rate": 5.805824301320864e-07, "loss": 3.0873, "step": 173220 }, { "epoch": 7.461773700305811, "learning_rate": 5.805339481566076e-07, "loss": 3.0078, "step": 173240 }, { "epoch": 7.462635138045398, "learning_rate": 5.804854661811287e-07, "loss": 2.8836, "step": 173260 }, { "epoch": 7.463496575784985, "learning_rate": 5.804369842056497e-07, "loss": 2.9884, "step": 173280 }, { "epoch": 7.464358013524572, "learning_rate": 5.803885022301708e-07, "loss": 3.0345, "step": 173300 }, { "epoch": 7.46521945126416, "learning_rate": 5.80340020254692e-07, "loss": 2.9537, "step": 173320 }, { "epoch": 7.466080889003747, "learning_rate": 5.802915382792131e-07, "loss": 2.8841, "step": 173340 }, { "epoch": 7.466942326743335, "learning_rate": 5.802430563037341e-07, "loss": 2.6887, "step": 173360 }, { "epoch": 7.467803764482922, "learning_rate": 5.801945743282553e-07, "loss": 2.9607, "step": 173380 }, { "epoch": 7.4686652022225095, "learning_rate": 5.801460923527764e-07, "loss": 2.9414, "step": 173400 }, { "epoch": 7.469526639962097, "learning_rate": 5.800976103772975e-07, "loss": 3.0906, "step": 173420 }, { "epoch": 7.470388077701684, "learning_rate": 5.800491284018185e-07, "loss": 2.8137, "step": 173440 }, { "epoch": 7.471249515441271, "learning_rate": 5.800006464263397e-07, "loss": 2.9699, "step": 173460 }, { "epoch": 7.4721109531808585, "learning_rate": 5.799521644508608e-07, "loss": 3.0245, "step": 173480 }, { "epoch": 7.472972390920447, "learning_rate": 5.799036824753817e-07, "loss": 2.9055, "step": 173500 }, { "epoch": 7.473833828660034, "learning_rate": 5.79855200499903e-07, "loss": 2.8786, "step": 173520 }, { "epoch": 7.474695266399621, "learning_rate": 5.798067185244242e-07, "loss": 3.0392, "step": 173540 }, { "epoch": 7.475556704139208, "learning_rate": 5.797582365489452e-07, "loss": 2.9517, "step": 173560 }, { "epoch": 7.476418141878796, "learning_rate": 5.797097545734662e-07, "loss": 2.9106, "step": 173580 }, { "epoch": 7.477279579618383, "learning_rate": 5.796612725979874e-07, "loss": 3.0123, "step": 173600 }, { "epoch": 7.47814101735797, "learning_rate": 5.796127906225086e-07, "loss": 2.824, "step": 173620 }, { "epoch": 7.479002455097558, "learning_rate": 5.795643086470297e-07, "loss": 3.0775, "step": 173640 }, { "epoch": 7.4798638928371455, "learning_rate": 5.795158266715507e-07, "loss": 2.8225, "step": 173660 }, { "epoch": 7.480725330576733, "learning_rate": 5.794673446960718e-07, "loss": 2.9347, "step": 173680 }, { "epoch": 7.48158676831632, "learning_rate": 5.79418862720593e-07, "loss": 2.7681, "step": 173700 }, { "epoch": 7.482448206055907, "learning_rate": 5.793703807451141e-07, "loss": 2.8223, "step": 173720 }, { "epoch": 7.4833096437954945, "learning_rate": 5.793218987696351e-07, "loss": 2.8764, "step": 173740 }, { "epoch": 7.484171081535082, "learning_rate": 5.792734167941563e-07, "loss": 2.8406, "step": 173760 }, { "epoch": 7.48503251927467, "learning_rate": 5.792249348186775e-07, "loss": 2.7933, "step": 173780 }, { "epoch": 7.485893957014257, "learning_rate": 5.791764528431985e-07, "loss": 3.0147, "step": 173800 }, { "epoch": 7.486755394753844, "learning_rate": 5.791279708677195e-07, "loss": 2.806, "step": 173820 }, { "epoch": 7.487616832493432, "learning_rate": 5.790794888922407e-07, "loss": 2.8958, "step": 173840 }, { "epoch": 7.488478270233019, "learning_rate": 5.790310069167619e-07, "loss": 3.0115, "step": 173860 }, { "epoch": 7.489339707972606, "learning_rate": 5.78982524941283e-07, "loss": 3.0222, "step": 173880 }, { "epoch": 7.490201145712193, "learning_rate": 5.78934042965804e-07, "loss": 2.9076, "step": 173900 }, { "epoch": 7.4910625834517806, "learning_rate": 5.788855609903252e-07, "loss": 2.921, "step": 173920 }, { "epoch": 7.491924021191369, "learning_rate": 5.788370790148462e-07, "loss": 2.942, "step": 173940 }, { "epoch": 7.492785458930956, "learning_rate": 5.787885970393672e-07, "loss": 2.9758, "step": 173960 }, { "epoch": 7.493646896670543, "learning_rate": 5.787401150638884e-07, "loss": 3.07, "step": 173980 }, { "epoch": 7.49450833441013, "learning_rate": 5.786916330884096e-07, "loss": 2.9814, "step": 174000 }, { "epoch": 7.495369772149718, "learning_rate": 5.786431511129308e-07, "loss": 2.7575, "step": 174020 }, { "epoch": 7.496231209889305, "learning_rate": 5.785946691374517e-07, "loss": 3.0069, "step": 174040 }, { "epoch": 7.497092647628893, "learning_rate": 5.785461871619728e-07, "loss": 2.8659, "step": 174060 }, { "epoch": 7.49795408536848, "learning_rate": 5.78497705186494e-07, "loss": 3.0161, "step": 174080 }, { "epoch": 7.4988155231080675, "learning_rate": 5.784492232110151e-07, "loss": 2.936, "step": 174100 }, { "epoch": 7.499676960847655, "learning_rate": 5.784007412355361e-07, "loss": 2.9952, "step": 174120 }, { "epoch": 7.500538398587242, "learning_rate": 5.783522592600573e-07, "loss": 2.8454, "step": 174140 }, { "epoch": 7.501399836326829, "learning_rate": 5.783037772845785e-07, "loss": 2.9106, "step": 174160 }, { "epoch": 7.5022612740664165, "learning_rate": 5.782552953090996e-07, "loss": 2.8405, "step": 174180 }, { "epoch": 7.503122711806004, "learning_rate": 5.782068133336205e-07, "loss": 2.7486, "step": 174200 }, { "epoch": 7.503984149545592, "learning_rate": 5.781583313581417e-07, "loss": 2.8409, "step": 174220 }, { "epoch": 7.504845587285179, "learning_rate": 5.781098493826629e-07, "loss": 2.9866, "step": 174240 }, { "epoch": 7.505707025024766, "learning_rate": 5.780613674071839e-07, "loss": 2.8109, "step": 174260 }, { "epoch": 7.506568462764354, "learning_rate": 5.78012885431705e-07, "loss": 2.8109, "step": 174280 }, { "epoch": 7.507429900503941, "learning_rate": 5.779644034562262e-07, "loss": 2.9757, "step": 174300 }, { "epoch": 7.508291338243528, "learning_rate": 5.779159214807473e-07, "loss": 2.9165, "step": 174320 }, { "epoch": 7.509152775983116, "learning_rate": 5.778674395052683e-07, "loss": 3.0805, "step": 174340 }, { "epoch": 7.5100142137227035, "learning_rate": 5.778189575297894e-07, "loss": 3.0664, "step": 174360 }, { "epoch": 7.510875651462291, "learning_rate": 5.777704755543106e-07, "loss": 2.7602, "step": 174380 }, { "epoch": 7.511737089201878, "learning_rate": 5.777219935788318e-07, "loss": 2.8023, "step": 174400 }, { "epoch": 7.512598526941465, "learning_rate": 5.776735116033528e-07, "loss": 2.9864, "step": 174420 }, { "epoch": 7.5134599646810525, "learning_rate": 5.776250296278738e-07, "loss": 2.9893, "step": 174440 }, { "epoch": 7.51432140242064, "learning_rate": 5.77576547652395e-07, "loss": 2.889, "step": 174460 }, { "epoch": 7.515182840160227, "learning_rate": 5.77528065676916e-07, "loss": 3.0151, "step": 174480 }, { "epoch": 7.516044277899815, "learning_rate": 5.774795837014371e-07, "loss": 2.9809, "step": 174500 }, { "epoch": 7.516905715639402, "learning_rate": 5.774311017259583e-07, "loss": 2.9486, "step": 174520 }, { "epoch": 7.51776715337899, "learning_rate": 5.773826197504796e-07, "loss": 2.8693, "step": 174540 }, { "epoch": 7.518628591118577, "learning_rate": 5.773341377750005e-07, "loss": 2.9246, "step": 174560 }, { "epoch": 7.519490028858164, "learning_rate": 5.772856557995215e-07, "loss": 3.1529, "step": 174580 }, { "epoch": 7.520351466597751, "learning_rate": 5.772371738240427e-07, "loss": 2.8535, "step": 174600 }, { "epoch": 7.5212129043373395, "learning_rate": 5.771886918485639e-07, "loss": 2.93, "step": 174620 }, { "epoch": 7.522074342076927, "learning_rate": 5.771402098730849e-07, "loss": 2.8254, "step": 174640 }, { "epoch": 7.522935779816514, "learning_rate": 5.77091727897606e-07, "loss": 3.1018, "step": 174660 }, { "epoch": 7.523797217556101, "learning_rate": 5.770432459221272e-07, "loss": 3.12, "step": 174680 }, { "epoch": 7.5246586552956884, "learning_rate": 5.769947639466483e-07, "loss": 2.7816, "step": 174700 }, { "epoch": 7.525520093035276, "learning_rate": 5.769462819711693e-07, "loss": 3.0391, "step": 174720 }, { "epoch": 7.526381530774863, "learning_rate": 5.768977999956904e-07, "loss": 2.8462, "step": 174740 }, { "epoch": 7.52724296851445, "learning_rate": 5.768493180202116e-07, "loss": 2.9654, "step": 174760 }, { "epoch": 7.528104406254038, "learning_rate": 5.768008360447328e-07, "loss": 3.0049, "step": 174780 }, { "epoch": 7.528965843993626, "learning_rate": 5.767523540692537e-07, "loss": 2.9603, "step": 174800 }, { "epoch": 7.529827281733213, "learning_rate": 5.767038720937748e-07, "loss": 2.8927, "step": 174820 }, { "epoch": 7.5306887194728, "learning_rate": 5.766553901182961e-07, "loss": 2.9327, "step": 174840 }, { "epoch": 7.531550157212387, "learning_rate": 5.766069081428171e-07, "loss": 2.8319, "step": 174860 }, { "epoch": 7.5324115949519745, "learning_rate": 5.765584261673382e-07, "loss": 2.9311, "step": 174880 }, { "epoch": 7.533273032691563, "learning_rate": 5.765099441918593e-07, "loss": 2.8435, "step": 174900 }, { "epoch": 7.53413447043115, "learning_rate": 5.764614622163804e-07, "loss": 2.9389, "step": 174920 }, { "epoch": 7.534995908170737, "learning_rate": 5.764129802409016e-07, "loss": 2.7082, "step": 174940 }, { "epoch": 7.535857345910324, "learning_rate": 5.763644982654226e-07, "loss": 2.9937, "step": 174960 }, { "epoch": 7.536718783649912, "learning_rate": 5.763160162899437e-07, "loss": 3.1888, "step": 174980 }, { "epoch": 7.537580221389499, "learning_rate": 5.762675343144649e-07, "loss": 2.7724, "step": 175000 }, { "epoch": 7.538441659129086, "learning_rate": 5.762190523389859e-07, "loss": 3.0143, "step": 175020 }, { "epoch": 7.539303096868673, "learning_rate": 5.76170570363507e-07, "loss": 2.9051, "step": 175040 }, { "epoch": 7.5401645346082615, "learning_rate": 5.761220883880281e-07, "loss": 2.929, "step": 175060 }, { "epoch": 7.541025972347849, "learning_rate": 5.760736064125494e-07, "loss": 2.7748, "step": 175080 }, { "epoch": 7.541887410087436, "learning_rate": 5.760251244370704e-07, "loss": 2.927, "step": 175100 }, { "epoch": 7.542748847827023, "learning_rate": 5.759766424615914e-07, "loss": 2.8946, "step": 175120 }, { "epoch": 7.5436102855666105, "learning_rate": 5.759281604861126e-07, "loss": 2.9457, "step": 175140 }, { "epoch": 7.544471723306198, "learning_rate": 5.758796785106337e-07, "loss": 3.0025, "step": 175160 }, { "epoch": 7.545333161045786, "learning_rate": 5.758311965351547e-07, "loss": 2.9123, "step": 175180 }, { "epoch": 7.546194598785373, "learning_rate": 5.757827145596758e-07, "loss": 2.8583, "step": 175200 }, { "epoch": 7.54705603652496, "learning_rate": 5.757342325841971e-07, "loss": 2.9453, "step": 175220 }, { "epoch": 7.547917474264548, "learning_rate": 5.756857506087181e-07, "loss": 3.0417, "step": 175240 }, { "epoch": 7.548778912004135, "learning_rate": 5.756372686332392e-07, "loss": 2.7906, "step": 175260 }, { "epoch": 7.549640349743722, "learning_rate": 5.755887866577602e-07, "loss": 2.9301, "step": 175280 }, { "epoch": 7.550501787483309, "learning_rate": 5.755403046822814e-07, "loss": 2.8961, "step": 175300 }, { "epoch": 7.551363225222897, "learning_rate": 5.754918227068026e-07, "loss": 3.0848, "step": 175320 }, { "epoch": 7.552224662962485, "learning_rate": 5.754433407313237e-07, "loss": 2.8408, "step": 175340 }, { "epoch": 7.553086100702072, "learning_rate": 5.753948587558448e-07, "loss": 2.9714, "step": 175360 }, { "epoch": 7.553947538441659, "learning_rate": 5.753463767803659e-07, "loss": 3.0606, "step": 175380 }, { "epoch": 7.5548089761812465, "learning_rate": 5.75297894804887e-07, "loss": 2.8411, "step": 175400 }, { "epoch": 7.555670413920834, "learning_rate": 5.752494128294081e-07, "loss": 2.8904, "step": 175420 }, { "epoch": 7.556531851660421, "learning_rate": 5.752009308539291e-07, "loss": 2.9668, "step": 175440 }, { "epoch": 7.557393289400009, "learning_rate": 5.751524488784504e-07, "loss": 3.0131, "step": 175460 }, { "epoch": 7.558254727139596, "learning_rate": 5.751039669029714e-07, "loss": 2.9878, "step": 175480 }, { "epoch": 7.559116164879184, "learning_rate": 5.750554849274925e-07, "loss": 2.8644, "step": 175500 }, { "epoch": 7.559977602618771, "learning_rate": 5.750070029520136e-07, "loss": 2.7475, "step": 175520 }, { "epoch": 7.560839040358358, "learning_rate": 5.749585209765347e-07, "loss": 2.8831, "step": 175540 }, { "epoch": 7.561700478097945, "learning_rate": 5.749100390010557e-07, "loss": 2.8582, "step": 175560 }, { "epoch": 7.562561915837533, "learning_rate": 5.748615570255768e-07, "loss": 2.7638, "step": 175580 }, { "epoch": 7.56342335357712, "learning_rate": 5.748130750500981e-07, "loss": 2.9603, "step": 175600 }, { "epoch": 7.564284791316708, "learning_rate": 5.747645930746191e-07, "loss": 2.8423, "step": 175620 }, { "epoch": 7.565146229056295, "learning_rate": 5.747161110991402e-07, "loss": 2.85, "step": 175640 }, { "epoch": 7.566007666795882, "learning_rate": 5.746676291236613e-07, "loss": 2.834, "step": 175660 }, { "epoch": 7.56686910453547, "learning_rate": 5.746191471481823e-07, "loss": 2.7594, "step": 175680 }, { "epoch": 7.567730542275057, "learning_rate": 5.745706651727036e-07, "loss": 2.8926, "step": 175700 }, { "epoch": 7.568591980014644, "learning_rate": 5.745221831972247e-07, "loss": 3.051, "step": 175720 }, { "epoch": 7.569453417754232, "learning_rate": 5.744737012217458e-07, "loss": 2.8567, "step": 175740 }, { "epoch": 7.5703148554938196, "learning_rate": 5.744252192462668e-07, "loss": 2.794, "step": 175760 }, { "epoch": 7.571176293233407, "learning_rate": 5.74376737270788e-07, "loss": 2.9003, "step": 175780 }, { "epoch": 7.572037730972994, "learning_rate": 5.743282552953092e-07, "loss": 3.0987, "step": 175800 }, { "epoch": 7.572899168712581, "learning_rate": 5.742797733198301e-07, "loss": 2.7817, "step": 175820 }, { "epoch": 7.5737606064521685, "learning_rate": 5.742312913443513e-07, "loss": 2.8617, "step": 175840 }, { "epoch": 7.574622044191756, "learning_rate": 5.741828093688724e-07, "loss": 3.0271, "step": 175860 }, { "epoch": 7.575483481931343, "learning_rate": 5.741343273933935e-07, "loss": 2.9359, "step": 175880 }, { "epoch": 7.576344919670931, "learning_rate": 5.740858454179146e-07, "loss": 2.7748, "step": 175900 }, { "epoch": 7.577206357410518, "learning_rate": 5.740373634424357e-07, "loss": 2.7792, "step": 175920 }, { "epoch": 7.578067795150106, "learning_rate": 5.739888814669569e-07, "loss": 2.7557, "step": 175940 }, { "epoch": 7.578929232889693, "learning_rate": 5.73940399491478e-07, "loss": 2.8995, "step": 175960 }, { "epoch": 7.57979067062928, "learning_rate": 5.738919175159991e-07, "loss": 3.0177, "step": 175980 }, { "epoch": 7.580652108368867, "learning_rate": 5.738434355405201e-07, "loss": 2.9495, "step": 176000 }, { "epoch": 7.581513546108455, "learning_rate": 5.737949535650413e-07, "loss": 3.1058, "step": 176020 }, { "epoch": 7.582374983848043, "learning_rate": 5.737464715895624e-07, "loss": 2.9547, "step": 176040 }, { "epoch": 7.58323642158763, "learning_rate": 5.736979896140834e-07, "loss": 2.9434, "step": 176060 }, { "epoch": 7.584097859327217, "learning_rate": 5.736495076386046e-07, "loss": 2.9938, "step": 176080 }, { "epoch": 7.5849592970668045, "learning_rate": 5.736010256631257e-07, "loss": 2.8392, "step": 176100 }, { "epoch": 7.585820734806392, "learning_rate": 5.735525436876468e-07, "loss": 2.8956, "step": 176120 }, { "epoch": 7.586682172545979, "learning_rate": 5.735040617121678e-07, "loss": 2.7263, "step": 176140 }, { "epoch": 7.587543610285566, "learning_rate": 5.73455579736689e-07, "loss": 2.8636, "step": 176160 }, { "epoch": 7.588405048025154, "learning_rate": 5.734070977612101e-07, "loss": 2.9922, "step": 176180 }, { "epoch": 7.589266485764742, "learning_rate": 5.733586157857311e-07, "loss": 2.8353, "step": 176200 }, { "epoch": 7.590127923504329, "learning_rate": 5.733101338102522e-07, "loss": 2.9748, "step": 176220 }, { "epoch": 7.590989361243916, "learning_rate": 5.732616518347734e-07, "loss": 2.8611, "step": 176240 }, { "epoch": 7.591850798983503, "learning_rate": 5.732131698592945e-07, "loss": 3.0279, "step": 176260 }, { "epoch": 7.592712236723091, "learning_rate": 5.731646878838156e-07, "loss": 2.7119, "step": 176280 }, { "epoch": 7.593573674462678, "learning_rate": 5.731162059083367e-07, "loss": 2.784, "step": 176300 }, { "epoch": 7.594435112202266, "learning_rate": 5.73067723932858e-07, "loss": 2.8745, "step": 176320 }, { "epoch": 7.595296549941853, "learning_rate": 5.73019241957379e-07, "loss": 2.9571, "step": 176340 }, { "epoch": 7.5961579876814405, "learning_rate": 5.729707599819e-07, "loss": 2.873, "step": 176360 }, { "epoch": 7.597019425421028, "learning_rate": 5.729222780064211e-07, "loss": 2.7703, "step": 176380 }, { "epoch": 7.597880863160615, "learning_rate": 5.728737960309423e-07, "loss": 2.7918, "step": 176400 }, { "epoch": 7.598742300900202, "learning_rate": 5.728253140554634e-07, "loss": 2.8746, "step": 176420 }, { "epoch": 7.599603738639789, "learning_rate": 5.727768320799844e-07, "loss": 3.0421, "step": 176440 }, { "epoch": 7.600465176379377, "learning_rate": 5.727283501045056e-07, "loss": 2.8362, "step": 176460 }, { "epoch": 7.601326614118965, "learning_rate": 5.726798681290267e-07, "loss": 2.9434, "step": 176480 }, { "epoch": 7.602188051858552, "learning_rate": 5.726313861535478e-07, "loss": 2.7924, "step": 176500 }, { "epoch": 7.603049489598139, "learning_rate": 5.725829041780688e-07, "loss": 2.8914, "step": 176520 }, { "epoch": 7.6039109273377266, "learning_rate": 5.7253442220259e-07, "loss": 2.8746, "step": 176540 }, { "epoch": 7.604772365077314, "learning_rate": 5.724859402271112e-07, "loss": 2.9561, "step": 176560 }, { "epoch": 7.605633802816901, "learning_rate": 5.724374582516323e-07, "loss": 2.8022, "step": 176580 }, { "epoch": 7.606495240556489, "learning_rate": 5.723889762761533e-07, "loss": 2.9256, "step": 176600 }, { "epoch": 7.607356678296076, "learning_rate": 5.723404943006744e-07, "loss": 2.8818, "step": 176620 }, { "epoch": 7.608218116035664, "learning_rate": 5.722920123251956e-07, "loss": 2.8612, "step": 176640 }, { "epoch": 7.609079553775251, "learning_rate": 5.722435303497165e-07, "loss": 3.0052, "step": 176660 }, { "epoch": 7.609940991514838, "learning_rate": 5.721950483742377e-07, "loss": 2.7427, "step": 176680 }, { "epoch": 7.610802429254425, "learning_rate": 5.721465663987589e-07, "loss": 2.946, "step": 176700 }, { "epoch": 7.611663866994013, "learning_rate": 5.7209808442328e-07, "loss": 2.9249, "step": 176720 }, { "epoch": 7.6125253047336, "learning_rate": 5.72049602447801e-07, "loss": 2.7948, "step": 176740 }, { "epoch": 7.613386742473188, "learning_rate": 5.720011204723221e-07, "loss": 2.917, "step": 176760 }, { "epoch": 7.614248180212775, "learning_rate": 5.719526384968433e-07, "loss": 2.9814, "step": 176780 }, { "epoch": 7.6151096179523625, "learning_rate": 5.719041565213644e-07, "loss": 2.8168, "step": 176800 }, { "epoch": 7.61597105569195, "learning_rate": 5.718556745458854e-07, "loss": 2.9025, "step": 176820 }, { "epoch": 7.616832493431537, "learning_rate": 5.718071925704066e-07, "loss": 2.9491, "step": 176840 }, { "epoch": 7.617693931171124, "learning_rate": 5.717587105949277e-07, "loss": 3.0033, "step": 176860 }, { "epoch": 7.618555368910712, "learning_rate": 5.717102286194488e-07, "loss": 3.0039, "step": 176880 }, { "epoch": 7.6194168066503, "learning_rate": 5.716617466439698e-07, "loss": 3.0975, "step": 176900 }, { "epoch": 7.620278244389887, "learning_rate": 5.71613264668491e-07, "loss": 2.8998, "step": 176920 }, { "epoch": 7.621139682129474, "learning_rate": 5.715647826930122e-07, "loss": 2.8901, "step": 176940 }, { "epoch": 7.622001119869061, "learning_rate": 5.715163007175333e-07, "loss": 2.8183, "step": 176960 }, { "epoch": 7.622862557608649, "learning_rate": 5.714678187420543e-07, "loss": 2.7321, "step": 176980 }, { "epoch": 7.623723995348236, "learning_rate": 5.714193367665754e-07, "loss": 2.7715, "step": 177000 }, { "epoch": 7.624585433087823, "learning_rate": 5.713708547910966e-07, "loss": 2.9246, "step": 177020 }, { "epoch": 7.625446870827411, "learning_rate": 5.713223728156176e-07, "loss": 2.8889, "step": 177040 }, { "epoch": 7.6263083085669985, "learning_rate": 5.712738908401386e-07, "loss": 2.9614, "step": 177060 }, { "epoch": 7.627169746306586, "learning_rate": 5.712254088646599e-07, "loss": 2.9824, "step": 177080 }, { "epoch": 7.628031184046173, "learning_rate": 5.71176926889181e-07, "loss": 2.917, "step": 177100 }, { "epoch": 7.62889262178576, "learning_rate": 5.71128444913702e-07, "loss": 3.0267, "step": 177120 }, { "epoch": 7.6297540595253475, "learning_rate": 5.710799629382231e-07, "loss": 2.9144, "step": 177140 }, { "epoch": 7.630615497264936, "learning_rate": 5.710314809627443e-07, "loss": 2.7737, "step": 177160 }, { "epoch": 7.631476935004523, "learning_rate": 5.709829989872654e-07, "loss": 2.9193, "step": 177180 }, { "epoch": 7.63233837274411, "learning_rate": 5.709345170117864e-07, "loss": 2.9529, "step": 177200 }, { "epoch": 7.633199810483697, "learning_rate": 5.708860350363076e-07, "loss": 2.8482, "step": 177220 }, { "epoch": 7.634061248223285, "learning_rate": 5.708375530608287e-07, "loss": 2.8155, "step": 177240 }, { "epoch": 7.634922685962872, "learning_rate": 5.707890710853497e-07, "loss": 2.8975, "step": 177260 }, { "epoch": 7.635784123702459, "learning_rate": 5.707405891098708e-07, "loss": 2.7884, "step": 177280 }, { "epoch": 7.636645561442046, "learning_rate": 5.70692107134392e-07, "loss": 2.857, "step": 177300 }, { "epoch": 7.6375069991816344, "learning_rate": 5.706436251589132e-07, "loss": 2.8511, "step": 177320 }, { "epoch": 7.638368436921222, "learning_rate": 5.705951431834342e-07, "loss": 2.9179, "step": 177340 }, { "epoch": 7.639229874660809, "learning_rate": 5.705466612079553e-07, "loss": 2.796, "step": 177360 }, { "epoch": 7.640091312400396, "learning_rate": 5.704981792324764e-07, "loss": 3.001, "step": 177380 }, { "epoch": 7.640952750139983, "learning_rate": 5.704496972569976e-07, "loss": 2.7894, "step": 177400 }, { "epoch": 7.641814187879571, "learning_rate": 5.704012152815186e-07, "loss": 2.9977, "step": 177420 }, { "epoch": 7.642675625619159, "learning_rate": 5.703527333060397e-07, "loss": 2.8482, "step": 177440 }, { "epoch": 7.643537063358746, "learning_rate": 5.703042513305609e-07, "loss": 2.9358, "step": 177460 }, { "epoch": 7.644398501098333, "learning_rate": 5.702557693550819e-07, "loss": 2.9128, "step": 177480 }, { "epoch": 7.6452599388379205, "learning_rate": 5.70207287379603e-07, "loss": 2.8842, "step": 177500 }, { "epoch": 7.646121376577508, "learning_rate": 5.701588054041241e-07, "loss": 2.9111, "step": 177520 }, { "epoch": 7.646982814317095, "learning_rate": 5.701103234286453e-07, "loss": 3.0995, "step": 177540 }, { "epoch": 7.647844252056682, "learning_rate": 5.700618414531665e-07, "loss": 2.993, "step": 177560 }, { "epoch": 7.6487056897962695, "learning_rate": 5.700133594776876e-07, "loss": 2.9807, "step": 177580 }, { "epoch": 7.649567127535858, "learning_rate": 5.699648775022086e-07, "loss": 2.8282, "step": 177600 }, { "epoch": 7.650428565275445, "learning_rate": 5.699163955267297e-07, "loss": 2.8604, "step": 177620 }, { "epoch": 7.651290003015032, "learning_rate": 5.69867913551251e-07, "loss": 2.8404, "step": 177640 }, { "epoch": 7.652151440754619, "learning_rate": 5.698194315757719e-07, "loss": 2.8981, "step": 177660 }, { "epoch": 7.653012878494207, "learning_rate": 5.69770949600293e-07, "loss": 2.8425, "step": 177680 }, { "epoch": 7.653874316233794, "learning_rate": 5.697224676248142e-07, "loss": 2.9297, "step": 177700 }, { "epoch": 7.654735753973382, "learning_rate": 5.696739856493352e-07, "loss": 2.9438, "step": 177720 }, { "epoch": 7.655597191712969, "learning_rate": 5.696255036738563e-07, "loss": 2.8202, "step": 177740 }, { "epoch": 7.6564586294525565, "learning_rate": 5.695770216983774e-07, "loss": 2.9952, "step": 177760 }, { "epoch": 7.657320067192144, "learning_rate": 5.695285397228986e-07, "loss": 2.8149, "step": 177780 }, { "epoch": 7.658181504931731, "learning_rate": 5.694800577474196e-07, "loss": 2.9931, "step": 177800 }, { "epoch": 7.659042942671318, "learning_rate": 5.694315757719407e-07, "loss": 2.8799, "step": 177820 }, { "epoch": 7.6599043804109055, "learning_rate": 5.693830937964619e-07, "loss": 3.0778, "step": 177840 }, { "epoch": 7.660765818150493, "learning_rate": 5.69334611820983e-07, "loss": 2.8611, "step": 177860 }, { "epoch": 7.661627255890081, "learning_rate": 5.69286129845504e-07, "loss": 2.8026, "step": 177880 }, { "epoch": 7.662488693629668, "learning_rate": 5.692376478700252e-07, "loss": 2.9164, "step": 177900 }, { "epoch": 7.663350131369255, "learning_rate": 5.691891658945463e-07, "loss": 2.9282, "step": 177920 }, { "epoch": 7.664211569108843, "learning_rate": 5.691406839190675e-07, "loss": 2.8347, "step": 177940 }, { "epoch": 7.66507300684843, "learning_rate": 5.690922019435885e-07, "loss": 2.8105, "step": 177960 }, { "epoch": 7.665934444588017, "learning_rate": 5.690437199681096e-07, "loss": 2.7934, "step": 177980 }, { "epoch": 7.666795882327605, "learning_rate": 5.689952379926307e-07, "loss": 2.8627, "step": 178000 }, { "epoch": 7.6676573200671925, "learning_rate": 5.68946756017152e-07, "loss": 2.8653, "step": 178020 }, { "epoch": 7.66851875780678, "learning_rate": 5.688982740416729e-07, "loss": 2.7763, "step": 178040 }, { "epoch": 7.669380195546367, "learning_rate": 5.68849792066194e-07, "loss": 2.967, "step": 178060 }, { "epoch": 7.670241633285954, "learning_rate": 5.688013100907152e-07, "loss": 2.8955, "step": 178080 }, { "epoch": 7.6711030710255415, "learning_rate": 5.687528281152362e-07, "loss": 2.9241, "step": 178100 }, { "epoch": 7.671964508765129, "learning_rate": 5.687043461397573e-07, "loss": 2.9947, "step": 178120 }, { "epoch": 7.672825946504716, "learning_rate": 5.686558641642784e-07, "loss": 2.7975, "step": 178140 }, { "epoch": 7.673687384244304, "learning_rate": 5.686073821887996e-07, "loss": 2.9721, "step": 178160 }, { "epoch": 7.674548821983891, "learning_rate": 5.685589002133207e-07, "loss": 2.8841, "step": 178180 }, { "epoch": 7.675410259723479, "learning_rate": 5.685104182378418e-07, "loss": 2.7844, "step": 178200 }, { "epoch": 7.676271697463066, "learning_rate": 5.684619362623629e-07, "loss": 3.0574, "step": 178220 }, { "epoch": 7.677133135202653, "learning_rate": 5.684134542868839e-07, "loss": 2.8605, "step": 178240 }, { "epoch": 7.67799457294224, "learning_rate": 5.68364972311405e-07, "loss": 2.938, "step": 178260 }, { "epoch": 7.678856010681828, "learning_rate": 5.683164903359261e-07, "loss": 2.8949, "step": 178280 }, { "epoch": 7.679717448421416, "learning_rate": 5.682680083604473e-07, "loss": 2.8205, "step": 178300 }, { "epoch": 7.680578886161003, "learning_rate": 5.682195263849684e-07, "loss": 2.8276, "step": 178320 }, { "epoch": 7.68144032390059, "learning_rate": 5.681710444094895e-07, "loss": 2.9013, "step": 178340 }, { "epoch": 7.682301761640177, "learning_rate": 5.681225624340106e-07, "loss": 3.0176, "step": 178360 }, { "epoch": 7.683163199379765, "learning_rate": 5.680740804585317e-07, "loss": 2.8536, "step": 178380 }, { "epoch": 7.684024637119352, "learning_rate": 5.680255984830529e-07, "loss": 2.9184, "step": 178400 }, { "epoch": 7.684886074858939, "learning_rate": 5.67977116507574e-07, "loss": 2.7591, "step": 178420 }, { "epoch": 7.685747512598527, "learning_rate": 5.67928634532095e-07, "loss": 2.9337, "step": 178440 }, { "epoch": 7.6866089503381145, "learning_rate": 5.678801525566162e-07, "loss": 2.878, "step": 178460 }, { "epoch": 7.687470388077702, "learning_rate": 5.678316705811372e-07, "loss": 2.8357, "step": 178480 }, { "epoch": 7.688331825817289, "learning_rate": 5.677831886056583e-07, "loss": 2.8828, "step": 178500 }, { "epoch": 7.689193263556876, "learning_rate": 5.677347066301794e-07, "loss": 2.7581, "step": 178520 }, { "epoch": 7.6900547012964635, "learning_rate": 5.676862246547005e-07, "loss": 2.9381, "step": 178540 }, { "epoch": 7.690916139036052, "learning_rate": 5.676377426792217e-07, "loss": 2.9568, "step": 178560 }, { "epoch": 7.691777576775639, "learning_rate": 5.675892607037428e-07, "loss": 2.8587, "step": 178580 }, { "epoch": 7.692639014515226, "learning_rate": 5.675407787282639e-07, "loss": 3.0068, "step": 178600 }, { "epoch": 7.693500452254813, "learning_rate": 5.674922967527849e-07, "loss": 2.8665, "step": 178620 }, { "epoch": 7.694361889994401, "learning_rate": 5.674438147773062e-07, "loss": 2.9613, "step": 178640 }, { "epoch": 7.695223327733988, "learning_rate": 5.673953328018273e-07, "loss": 3.003, "step": 178660 }, { "epoch": 7.696084765473575, "learning_rate": 5.673468508263483e-07, "loss": 2.9981, "step": 178680 }, { "epoch": 7.696946203213162, "learning_rate": 5.672983688508694e-07, "loss": 2.911, "step": 178700 }, { "epoch": 7.6978076409527505, "learning_rate": 5.672498868753906e-07, "loss": 3.0579, "step": 178720 }, { "epoch": 7.698669078692338, "learning_rate": 5.672014048999116e-07, "loss": 2.8371, "step": 178740 }, { "epoch": 7.699530516431925, "learning_rate": 5.671529229244327e-07, "loss": 2.9275, "step": 178760 }, { "epoch": 7.700391954171512, "learning_rate": 5.671044409489539e-07, "loss": 2.8365, "step": 178780 }, { "epoch": 7.7012533919110995, "learning_rate": 5.670559589734749e-07, "loss": 2.7299, "step": 178800 }, { "epoch": 7.702114829650687, "learning_rate": 5.67007476997996e-07, "loss": 2.7999, "step": 178820 }, { "epoch": 7.702976267390275, "learning_rate": 5.66958995022517e-07, "loss": 3.0619, "step": 178840 }, { "epoch": 7.703837705129862, "learning_rate": 5.669105130470382e-07, "loss": 2.861, "step": 178860 }, { "epoch": 7.704699142869449, "learning_rate": 5.668620310715593e-07, "loss": 3.0203, "step": 178880 }, { "epoch": 7.705560580609037, "learning_rate": 5.668135490960804e-07, "loss": 2.8358, "step": 178900 }, { "epoch": 7.706422018348624, "learning_rate": 5.667650671206016e-07, "loss": 3.0071, "step": 178920 }, { "epoch": 7.707283456088211, "learning_rate": 5.667165851451227e-07, "loss": 3.071, "step": 178940 }, { "epoch": 7.708144893827798, "learning_rate": 5.666681031696438e-07, "loss": 2.7632, "step": 178960 }, { "epoch": 7.709006331567386, "learning_rate": 5.666196211941649e-07, "loss": 2.9656, "step": 178980 }, { "epoch": 7.709867769306974, "learning_rate": 5.665711392186859e-07, "loss": 2.8735, "step": 179000 }, { "epoch": 7.710729207046561, "learning_rate": 5.665226572432072e-07, "loss": 3.0153, "step": 179020 }, { "epoch": 7.711590644786148, "learning_rate": 5.664741752677283e-07, "loss": 2.8502, "step": 179040 }, { "epoch": 7.712452082525735, "learning_rate": 5.664256932922493e-07, "loss": 2.9253, "step": 179060 }, { "epoch": 7.713313520265323, "learning_rate": 5.663772113167704e-07, "loss": 2.7938, "step": 179080 }, { "epoch": 7.71417495800491, "learning_rate": 5.663287293412916e-07, "loss": 2.9461, "step": 179100 }, { "epoch": 7.715036395744498, "learning_rate": 5.662802473658126e-07, "loss": 2.8583, "step": 179120 }, { "epoch": 7.715897833484085, "learning_rate": 5.662317653903336e-07, "loss": 2.7534, "step": 179140 }, { "epoch": 7.716759271223673, "learning_rate": 5.661832834148549e-07, "loss": 2.9148, "step": 179160 }, { "epoch": 7.71762070896326, "learning_rate": 5.66134801439376e-07, "loss": 3.0079, "step": 179180 }, { "epoch": 7.718482146702847, "learning_rate": 5.660863194638971e-07, "loss": 2.8535, "step": 179200 }, { "epoch": 7.719343584442434, "learning_rate": 5.660378374884181e-07, "loss": 2.9551, "step": 179220 }, { "epoch": 7.7202050221820215, "learning_rate": 5.659893555129392e-07, "loss": 2.9464, "step": 179240 }, { "epoch": 7.721066459921609, "learning_rate": 5.659408735374605e-07, "loss": 2.989, "step": 179260 }, { "epoch": 7.721927897661197, "learning_rate": 5.658923915619816e-07, "loss": 3.0463, "step": 179280 }, { "epoch": 7.722789335400784, "learning_rate": 5.658439095865026e-07, "loss": 2.7292, "step": 179300 }, { "epoch": 7.723650773140371, "learning_rate": 5.657954276110237e-07, "loss": 3.105, "step": 179320 }, { "epoch": 7.724512210879959, "learning_rate": 5.657469456355448e-07, "loss": 3.0078, "step": 179340 }, { "epoch": 7.725373648619546, "learning_rate": 5.656984636600659e-07, "loss": 3.0882, "step": 179360 }, { "epoch": 7.726235086359133, "learning_rate": 5.656499816845869e-07, "loss": 2.7412, "step": 179380 }, { "epoch": 7.72709652409872, "learning_rate": 5.656014997091082e-07, "loss": 2.7994, "step": 179400 }, { "epoch": 7.7279579618383085, "learning_rate": 5.655530177336293e-07, "loss": 2.897, "step": 179420 }, { "epoch": 7.728819399577896, "learning_rate": 5.655045357581502e-07, "loss": 2.809, "step": 179440 }, { "epoch": 7.729680837317483, "learning_rate": 5.654560537826714e-07, "loss": 2.8083, "step": 179460 }, { "epoch": 7.73054227505707, "learning_rate": 5.654075718071926e-07, "loss": 2.9846, "step": 179480 }, { "epoch": 7.7314037127966575, "learning_rate": 5.653590898317136e-07, "loss": 2.8899, "step": 179500 }, { "epoch": 7.732265150536245, "learning_rate": 5.653106078562346e-07, "loss": 2.9043, "step": 179520 }, { "epoch": 7.733126588275832, "learning_rate": 5.652621258807559e-07, "loss": 2.7846, "step": 179540 }, { "epoch": 7.733988026015419, "learning_rate": 5.65213643905277e-07, "loss": 3.137, "step": 179560 }, { "epoch": 7.734849463755007, "learning_rate": 5.651651619297981e-07, "loss": 2.6918, "step": 179580 }, { "epoch": 7.735710901494595, "learning_rate": 5.651166799543191e-07, "loss": 3.0329, "step": 179600 }, { "epoch": 7.736572339234182, "learning_rate": 5.650681979788403e-07, "loss": 2.88, "step": 179620 }, { "epoch": 7.737433776973769, "learning_rate": 5.650197160033615e-07, "loss": 2.7716, "step": 179640 }, { "epoch": 7.738295214713356, "learning_rate": 5.649712340278826e-07, "loss": 2.9868, "step": 179660 }, { "epoch": 7.739156652452944, "learning_rate": 5.649227520524036e-07, "loss": 2.8133, "step": 179680 }, { "epoch": 7.740018090192532, "learning_rate": 5.648742700769247e-07, "loss": 3.0493, "step": 179700 }, { "epoch": 7.740879527932119, "learning_rate": 5.648257881014459e-07, "loss": 2.7209, "step": 179720 }, { "epoch": 7.741740965671706, "learning_rate": 5.64777306125967e-07, "loss": 2.7858, "step": 179740 }, { "epoch": 7.7426024034112935, "learning_rate": 5.647288241504879e-07, "loss": 2.9878, "step": 179760 }, { "epoch": 7.743463841150881, "learning_rate": 5.646803421750091e-07, "loss": 2.9763, "step": 179780 }, { "epoch": 7.744325278890468, "learning_rate": 5.646318601995303e-07, "loss": 2.9738, "step": 179800 }, { "epoch": 7.745186716630055, "learning_rate": 5.645833782240513e-07, "loss": 2.7971, "step": 179820 }, { "epoch": 7.7460481543696424, "learning_rate": 5.645348962485724e-07, "loss": 2.8912, "step": 179840 }, { "epoch": 7.746909592109231, "learning_rate": 5.644864142730936e-07, "loss": 2.9335, "step": 179860 }, { "epoch": 7.747771029848818, "learning_rate": 5.644379322976146e-07, "loss": 3.0141, "step": 179880 }, { "epoch": 7.748632467588405, "learning_rate": 5.643894503221356e-07, "loss": 2.8723, "step": 179900 }, { "epoch": 7.749493905327992, "learning_rate": 5.643409683466569e-07, "loss": 2.9959, "step": 179920 }, { "epoch": 7.75035534306758, "learning_rate": 5.64292486371178e-07, "loss": 2.9816, "step": 179940 }, { "epoch": 7.751216780807167, "learning_rate": 5.642440043956991e-07, "loss": 3.1578, "step": 179960 }, { "epoch": 7.752078218546755, "learning_rate": 5.641955224202201e-07, "loss": 2.982, "step": 179980 }, { "epoch": 7.752939656286342, "learning_rate": 5.641470404447413e-07, "loss": 2.7792, "step": 180000 }, { "epoch": 7.753801094025929, "learning_rate": 5.640985584692625e-07, "loss": 2.8534, "step": 180020 }, { "epoch": 7.754662531765517, "learning_rate": 5.640500764937836e-07, "loss": 2.9068, "step": 180040 }, { "epoch": 7.755523969505104, "learning_rate": 5.640015945183046e-07, "loss": 3.0067, "step": 180060 }, { "epoch": 7.756385407244691, "learning_rate": 5.639531125428257e-07, "loss": 2.9297, "step": 180080 }, { "epoch": 7.757246844984278, "learning_rate": 5.639046305673469e-07, "loss": 2.7985, "step": 180100 }, { "epoch": 7.758108282723866, "learning_rate": 5.63856148591868e-07, "loss": 2.8505, "step": 180120 }, { "epoch": 7.758969720463454, "learning_rate": 5.638076666163889e-07, "loss": 3.0474, "step": 180140 }, { "epoch": 7.759831158203041, "learning_rate": 5.637591846409102e-07, "loss": 2.7556, "step": 180160 }, { "epoch": 7.760692595942628, "learning_rate": 5.637107026654313e-07, "loss": 2.8877, "step": 180180 }, { "epoch": 7.7615540336822155, "learning_rate": 5.636622206899524e-07, "loss": 2.9347, "step": 180200 }, { "epoch": 7.762415471421803, "learning_rate": 5.636137387144734e-07, "loss": 2.8148, "step": 180220 }, { "epoch": 7.76327690916139, "learning_rate": 5.635652567389946e-07, "loss": 2.9797, "step": 180240 }, { "epoch": 7.764138346900978, "learning_rate": 5.635167747635158e-07, "loss": 2.9068, "step": 180260 }, { "epoch": 7.764999784640565, "learning_rate": 5.634682927880368e-07, "loss": 3.0076, "step": 180280 }, { "epoch": 7.765861222380153, "learning_rate": 5.634198108125579e-07, "loss": 2.8068, "step": 180300 }, { "epoch": 7.76672266011974, "learning_rate": 5.63371328837079e-07, "loss": 2.8989, "step": 180320 }, { "epoch": 7.767584097859327, "learning_rate": 5.633228468616002e-07, "loss": 2.8423, "step": 180340 }, { "epoch": 7.768445535598914, "learning_rate": 5.632743648861212e-07, "loss": 2.9327, "step": 180360 }, { "epoch": 7.769306973338502, "learning_rate": 5.632258829106423e-07, "loss": 2.829, "step": 180380 }, { "epoch": 7.770168411078089, "learning_rate": 5.631774009351635e-07, "loss": 2.7907, "step": 180400 }, { "epoch": 7.771029848817677, "learning_rate": 5.631289189596846e-07, "loss": 2.9406, "step": 180420 }, { "epoch": 7.771891286557264, "learning_rate": 5.630804369842056e-07, "loss": 3.106, "step": 180440 }, { "epoch": 7.7727527242968515, "learning_rate": 5.630319550087267e-07, "loss": 2.7992, "step": 180460 }, { "epoch": 7.773614162036439, "learning_rate": 5.629834730332479e-07, "loss": 2.9508, "step": 180480 }, { "epoch": 7.774475599776026, "learning_rate": 5.629349910577689e-07, "loss": 2.7001, "step": 180500 }, { "epoch": 7.775337037515613, "learning_rate": 5.6288650908229e-07, "loss": 2.8058, "step": 180520 }, { "epoch": 7.776198475255201, "learning_rate": 5.628380271068112e-07, "loss": 2.9045, "step": 180540 }, { "epoch": 7.777059912994789, "learning_rate": 5.627895451313323e-07, "loss": 2.8204, "step": 180560 }, { "epoch": 7.777921350734376, "learning_rate": 5.627410631558533e-07, "loss": 2.8089, "step": 180580 }, { "epoch": 7.778782788473963, "learning_rate": 5.626925811803744e-07, "loss": 2.8198, "step": 180600 }, { "epoch": 7.77964422621355, "learning_rate": 5.626440992048955e-07, "loss": 2.9812, "step": 180620 }, { "epoch": 7.780505663953138, "learning_rate": 5.625956172294168e-07, "loss": 2.797, "step": 180640 }, { "epoch": 7.781367101692725, "learning_rate": 5.625471352539378e-07, "loss": 2.7257, "step": 180660 }, { "epoch": 7.782228539432312, "learning_rate": 5.624986532784589e-07, "loss": 3.0094, "step": 180680 }, { "epoch": 7.7830899771719, "learning_rate": 5.6245017130298e-07, "loss": 2.7618, "step": 180700 }, { "epoch": 7.7839514149114875, "learning_rate": 5.624016893275013e-07, "loss": 2.8389, "step": 180720 }, { "epoch": 7.784812852651075, "learning_rate": 5.623532073520222e-07, "loss": 2.7675, "step": 180740 }, { "epoch": 7.785674290390662, "learning_rate": 5.623047253765433e-07, "loss": 2.8425, "step": 180760 }, { "epoch": 7.786535728130249, "learning_rate": 5.622562434010645e-07, "loss": 2.9683, "step": 180780 }, { "epoch": 7.787397165869836, "learning_rate": 5.622077614255855e-07, "loss": 2.9076, "step": 180800 }, { "epoch": 7.788258603609425, "learning_rate": 5.621592794501066e-07, "loss": 2.9029, "step": 180820 }, { "epoch": 7.789120041349012, "learning_rate": 5.621107974746277e-07, "loss": 2.9947, "step": 180840 }, { "epoch": 7.789981479088599, "learning_rate": 5.620623154991489e-07, "loss": 2.9691, "step": 180860 }, { "epoch": 7.790842916828186, "learning_rate": 5.6201383352367e-07, "loss": 3.0655, "step": 180880 }, { "epoch": 7.7917043545677735, "learning_rate": 5.619653515481911e-07, "loss": 2.7746, "step": 180900 }, { "epoch": 7.792565792307361, "learning_rate": 5.619168695727122e-07, "loss": 2.8569, "step": 180920 }, { "epoch": 7.793427230046948, "learning_rate": 5.618683875972333e-07, "loss": 2.8128, "step": 180940 }, { "epoch": 7.794288667786535, "learning_rate": 5.618199056217543e-07, "loss": 2.9037, "step": 180960 }, { "epoch": 7.795150105526123, "learning_rate": 5.617714236462754e-07, "loss": 2.7566, "step": 180980 }, { "epoch": 7.796011543265711, "learning_rate": 5.617229416707966e-07, "loss": 2.8804, "step": 181000 }, { "epoch": 7.796872981005298, "learning_rate": 5.616744596953178e-07, "loss": 2.7942, "step": 181020 }, { "epoch": 7.797734418744885, "learning_rate": 5.616259777198388e-07, "loss": 2.8969, "step": 181040 }, { "epoch": 7.798595856484472, "learning_rate": 5.615774957443599e-07, "loss": 2.9761, "step": 181060 }, { "epoch": 7.79945729422406, "learning_rate": 5.61529013768881e-07, "loss": 2.8106, "step": 181080 }, { "epoch": 7.800318731963648, "learning_rate": 5.614805317934021e-07, "loss": 3.0037, "step": 181100 }, { "epoch": 7.801180169703235, "learning_rate": 5.614320498179232e-07, "loss": 2.9452, "step": 181120 }, { "epoch": 7.802041607442822, "learning_rate": 5.613835678424443e-07, "loss": 2.9165, "step": 181140 }, { "epoch": 7.8029030451824095, "learning_rate": 5.613350858669655e-07, "loss": 2.8266, "step": 181160 }, { "epoch": 7.803764482921997, "learning_rate": 5.612866038914865e-07, "loss": 2.9734, "step": 181180 }, { "epoch": 7.804625920661584, "learning_rate": 5.612381219160076e-07, "loss": 2.9567, "step": 181200 }, { "epoch": 7.805487358401171, "learning_rate": 5.611896399405287e-07, "loss": 2.9947, "step": 181220 }, { "epoch": 7.8063487961407585, "learning_rate": 5.611411579650499e-07, "loss": 2.9535, "step": 181240 }, { "epoch": 7.807210233880347, "learning_rate": 5.61092675989571e-07, "loss": 2.9932, "step": 181260 }, { "epoch": 7.808071671619934, "learning_rate": 5.610441940140921e-07, "loss": 2.8796, "step": 181280 }, { "epoch": 7.808933109359521, "learning_rate": 5.609957120386132e-07, "loss": 3.0261, "step": 181300 }, { "epoch": 7.809794547099108, "learning_rate": 5.609472300631343e-07, "loss": 2.8174, "step": 181320 }, { "epoch": 7.810655984838696, "learning_rate": 5.608987480876554e-07, "loss": 2.9925, "step": 181340 }, { "epoch": 7.811517422578283, "learning_rate": 5.608502661121765e-07, "loss": 2.8922, "step": 181360 }, { "epoch": 7.812378860317871, "learning_rate": 5.608017841366976e-07, "loss": 2.7791, "step": 181380 }, { "epoch": 7.813240298057458, "learning_rate": 5.607533021612187e-07, "loss": 2.8835, "step": 181400 }, { "epoch": 7.8141017357970455, "learning_rate": 5.607048201857398e-07, "loss": 2.8447, "step": 181420 }, { "epoch": 7.814963173536633, "learning_rate": 5.606563382102609e-07, "loss": 2.8917, "step": 181440 }, { "epoch": 7.81582461127622, "learning_rate": 5.606078562347821e-07, "loss": 2.8386, "step": 181460 }, { "epoch": 7.816686049015807, "learning_rate": 5.605593742593031e-07, "loss": 2.8214, "step": 181480 }, { "epoch": 7.8175474867553945, "learning_rate": 5.605108922838242e-07, "loss": 2.9754, "step": 181500 }, { "epoch": 7.818408924494982, "learning_rate": 5.604624103083453e-07, "loss": 3.0259, "step": 181520 }, { "epoch": 7.81927036223457, "learning_rate": 5.604139283328665e-07, "loss": 3.0027, "step": 181540 }, { "epoch": 7.820131799974157, "learning_rate": 5.603654463573875e-07, "loss": 2.8351, "step": 181560 }, { "epoch": 7.820993237713744, "learning_rate": 5.603169643819086e-07, "loss": 2.8462, "step": 181580 }, { "epoch": 7.821854675453332, "learning_rate": 5.602684824064297e-07, "loss": 2.8548, "step": 181600 }, { "epoch": 7.822716113192919, "learning_rate": 5.602200004309509e-07, "loss": 3.0012, "step": 181620 }, { "epoch": 7.823577550932506, "learning_rate": 5.60171518455472e-07, "loss": 2.8327, "step": 181640 }, { "epoch": 7.824438988672094, "learning_rate": 5.601230364799931e-07, "loss": 2.7803, "step": 181660 }, { "epoch": 7.825300426411681, "learning_rate": 5.600745545045142e-07, "loss": 2.9596, "step": 181680 }, { "epoch": 7.826161864151269, "learning_rate": 5.600260725290352e-07, "loss": 2.872, "step": 181700 }, { "epoch": 7.827023301890856, "learning_rate": 5.599775905535564e-07, "loss": 2.9774, "step": 181720 }, { "epoch": 7.827884739630443, "learning_rate": 5.599291085780775e-07, "loss": 2.9218, "step": 181740 }, { "epoch": 7.82874617737003, "learning_rate": 5.598806266025986e-07, "loss": 3.0218, "step": 181760 }, { "epoch": 7.829607615109618, "learning_rate": 5.598321446271197e-07, "loss": 2.9055, "step": 181780 }, { "epoch": 7.830469052849205, "learning_rate": 5.597836626516408e-07, "loss": 2.7596, "step": 181800 }, { "epoch": 7.831330490588793, "learning_rate": 5.597351806761619e-07, "loss": 2.8521, "step": 181820 }, { "epoch": 7.83219192832838, "learning_rate": 5.59686698700683e-07, "loss": 2.8664, "step": 181840 }, { "epoch": 7.8330533660679675, "learning_rate": 5.596382167252041e-07, "loss": 2.763, "step": 181860 }, { "epoch": 7.833914803807555, "learning_rate": 5.595897347497253e-07, "loss": 2.7258, "step": 181880 }, { "epoch": 7.834776241547142, "learning_rate": 5.595412527742464e-07, "loss": 2.955, "step": 181900 }, { "epoch": 7.835637679286729, "learning_rate": 5.594927707987675e-07, "loss": 2.8266, "step": 181920 }, { "epoch": 7.836499117026317, "learning_rate": 5.594442888232885e-07, "loss": 3.0105, "step": 181940 }, { "epoch": 7.837360554765905, "learning_rate": 5.593958068478098e-07, "loss": 2.8356, "step": 181960 }, { "epoch": 7.838221992505492, "learning_rate": 5.593473248723309e-07, "loss": 3.0566, "step": 181980 }, { "epoch": 7.839083430245079, "learning_rate": 5.592988428968518e-07, "loss": 2.8246, "step": 182000 }, { "epoch": 7.839944867984666, "learning_rate": 5.59250360921373e-07, "loss": 2.8604, "step": 182020 }, { "epoch": 7.840806305724254, "learning_rate": 5.592018789458941e-07, "loss": 2.8263, "step": 182040 }, { "epoch": 7.841667743463841, "learning_rate": 5.591533969704152e-07, "loss": 2.9423, "step": 182060 }, { "epoch": 7.842529181203428, "learning_rate": 5.591049149949362e-07, "loss": 3.0576, "step": 182080 }, { "epoch": 7.843390618943016, "learning_rate": 5.590564330194574e-07, "loss": 2.9191, "step": 182100 }, { "epoch": 7.8442520566826035, "learning_rate": 5.590079510439785e-07, "loss": 2.9829, "step": 182120 }, { "epoch": 7.845113494422191, "learning_rate": 5.589594690684996e-07, "loss": 2.7893, "step": 182140 }, { "epoch": 7.845974932161778, "learning_rate": 5.589109870930207e-07, "loss": 3.0744, "step": 182160 }, { "epoch": 7.846836369901365, "learning_rate": 5.588625051175418e-07, "loss": 2.8198, "step": 182180 }, { "epoch": 7.8476978076409525, "learning_rate": 5.588140231420629e-07, "loss": 2.7462, "step": 182200 }, { "epoch": 7.848559245380541, "learning_rate": 5.58765541166584e-07, "loss": 2.9637, "step": 182220 }, { "epoch": 7.849420683120128, "learning_rate": 5.587170591911051e-07, "loss": 3.052, "step": 182240 }, { "epoch": 7.850282120859715, "learning_rate": 5.586685772156263e-07, "loss": 2.9327, "step": 182260 }, { "epoch": 7.851143558599302, "learning_rate": 5.586200952401474e-07, "loss": 2.8609, "step": 182280 }, { "epoch": 7.85200499633889, "learning_rate": 5.585716132646684e-07, "loss": 2.864, "step": 182300 }, { "epoch": 7.852866434078477, "learning_rate": 5.585231312891895e-07, "loss": 2.8302, "step": 182320 }, { "epoch": 7.853727871818064, "learning_rate": 5.584746493137108e-07, "loss": 2.9203, "step": 182340 }, { "epoch": 7.854589309557651, "learning_rate": 5.584261673382318e-07, "loss": 2.9416, "step": 182360 }, { "epoch": 7.8554507472972395, "learning_rate": 5.583776853627528e-07, "loss": 2.9021, "step": 182380 }, { "epoch": 7.856312185036827, "learning_rate": 5.583292033872739e-07, "loss": 2.8476, "step": 182400 }, { "epoch": 7.857173622776414, "learning_rate": 5.582807214117951e-07, "loss": 2.9182, "step": 182420 }, { "epoch": 7.858035060516001, "learning_rate": 5.582322394363162e-07, "loss": 2.9308, "step": 182440 }, { "epoch": 7.8588964982555884, "learning_rate": 5.581837574608372e-07, "loss": 2.8982, "step": 182460 }, { "epoch": 7.859757935995176, "learning_rate": 5.581352754853585e-07, "loss": 3.0389, "step": 182480 }, { "epoch": 7.860619373734763, "learning_rate": 5.580867935098797e-07, "loss": 2.69, "step": 182500 }, { "epoch": 7.861480811474351, "learning_rate": 5.580383115344007e-07, "loss": 2.9931, "step": 182520 }, { "epoch": 7.862342249213938, "learning_rate": 5.579898295589217e-07, "loss": 2.9962, "step": 182540 }, { "epoch": 7.863203686953526, "learning_rate": 5.579413475834428e-07, "loss": 2.9083, "step": 182560 }, { "epoch": 7.864065124693113, "learning_rate": 5.578928656079639e-07, "loss": 2.9395, "step": 182580 }, { "epoch": 7.8649265624327, "learning_rate": 5.578443836324849e-07, "loss": 2.8333, "step": 182600 }, { "epoch": 7.865788000172287, "learning_rate": 5.577959016570061e-07, "loss": 2.9175, "step": 182620 }, { "epoch": 7.8666494379118745, "learning_rate": 5.577474196815273e-07, "loss": 3.083, "step": 182640 }, { "epoch": 7.867510875651463, "learning_rate": 5.576989377060484e-07, "loss": 2.9667, "step": 182660 }, { "epoch": 7.86837231339105, "learning_rate": 5.576504557305694e-07, "loss": 2.9499, "step": 182680 }, { "epoch": 7.869233751130637, "learning_rate": 5.576019737550905e-07, "loss": 2.7947, "step": 182700 }, { "epoch": 7.870095188870224, "learning_rate": 5.575534917796118e-07, "loss": 3.0048, "step": 182720 }, { "epoch": 7.870956626609812, "learning_rate": 5.575050098041328e-07, "loss": 2.8448, "step": 182740 }, { "epoch": 7.871818064349399, "learning_rate": 5.574565278286538e-07, "loss": 2.9344, "step": 182760 }, { "epoch": 7.872679502088986, "learning_rate": 5.57408045853175e-07, "loss": 2.8774, "step": 182780 }, { "epoch": 7.873540939828574, "learning_rate": 5.573595638776961e-07, "loss": 2.7459, "step": 182800 }, { "epoch": 7.8744023775681615, "learning_rate": 5.573110819022173e-07, "loss": 2.8585, "step": 182820 }, { "epoch": 7.875263815307749, "learning_rate": 5.572625999267382e-07, "loss": 2.975, "step": 182840 }, { "epoch": 7.876125253047336, "learning_rate": 5.572141179512595e-07, "loss": 3.0125, "step": 182860 }, { "epoch": 7.876986690786923, "learning_rate": 5.571656359757806e-07, "loss": 2.9091, "step": 182880 }, { "epoch": 7.8778481285265105, "learning_rate": 5.571171540003017e-07, "loss": 2.894, "step": 182900 }, { "epoch": 7.878709566266098, "learning_rate": 5.570686720248227e-07, "loss": 2.9009, "step": 182920 }, { "epoch": 7.879571004005685, "learning_rate": 5.570201900493438e-07, "loss": 2.6669, "step": 182940 }, { "epoch": 7.880432441745273, "learning_rate": 5.569717080738651e-07, "loss": 2.8498, "step": 182960 }, { "epoch": 7.88129387948486, "learning_rate": 5.569232260983861e-07, "loss": 2.9496, "step": 182980 }, { "epoch": 7.882155317224448, "learning_rate": 5.568747441229071e-07, "loss": 3.0787, "step": 183000 }, { "epoch": 7.883016754964035, "learning_rate": 5.568262621474283e-07, "loss": 2.8212, "step": 183020 }, { "epoch": 7.883878192703622, "learning_rate": 5.567777801719495e-07, "loss": 2.8339, "step": 183040 }, { "epoch": 7.884739630443209, "learning_rate": 5.567292981964704e-07, "loss": 3.0194, "step": 183060 }, { "epoch": 7.8856010681827975, "learning_rate": 5.566808162209915e-07, "loss": 2.828, "step": 183080 }, { "epoch": 7.886462505922385, "learning_rate": 5.566323342455128e-07, "loss": 2.9469, "step": 183100 }, { "epoch": 7.887323943661972, "learning_rate": 5.565838522700339e-07, "loss": 2.9783, "step": 183120 }, { "epoch": 7.888185381401559, "learning_rate": 5.565353702945548e-07, "loss": 3.1139, "step": 183140 }, { "epoch": 7.8890468191411465, "learning_rate": 5.56486888319076e-07, "loss": 2.9093, "step": 183160 }, { "epoch": 7.889908256880734, "learning_rate": 5.564384063435972e-07, "loss": 2.8876, "step": 183180 }, { "epoch": 7.890769694620321, "learning_rate": 5.563899243681181e-07, "loss": 2.813, "step": 183200 }, { "epoch": 7.891631132359908, "learning_rate": 5.563414423926392e-07, "loss": 2.8334, "step": 183220 }, { "epoch": 7.892492570099496, "learning_rate": 5.562929604171605e-07, "loss": 2.9396, "step": 183240 }, { "epoch": 7.893354007839084, "learning_rate": 5.562444784416816e-07, "loss": 3.0149, "step": 183260 }, { "epoch": 7.894215445578671, "learning_rate": 5.561959964662026e-07, "loss": 2.848, "step": 183280 }, { "epoch": 7.895076883318258, "learning_rate": 5.561475144907237e-07, "loss": 2.8675, "step": 183300 }, { "epoch": 7.895938321057845, "learning_rate": 5.560990325152448e-07, "loss": 2.8603, "step": 183320 }, { "epoch": 7.896799758797433, "learning_rate": 5.56050550539766e-07, "loss": 2.8728, "step": 183340 }, { "epoch": 7.897661196537021, "learning_rate": 5.560020685642871e-07, "loss": 2.7816, "step": 183360 }, { "epoch": 7.898522634276608, "learning_rate": 5.559535865888081e-07, "loss": 2.8735, "step": 183380 }, { "epoch": 7.899384072016195, "learning_rate": 5.559051046133293e-07, "loss": 2.8137, "step": 183400 }, { "epoch": 7.900245509755782, "learning_rate": 5.558566226378505e-07, "loss": 2.9221, "step": 183420 }, { "epoch": 7.90110694749537, "learning_rate": 5.558081406623714e-07, "loss": 2.7331, "step": 183440 }, { "epoch": 7.901968385234957, "learning_rate": 5.557596586868925e-07, "loss": 2.9545, "step": 183460 }, { "epoch": 7.902829822974544, "learning_rate": 5.557111767114138e-07, "loss": 3.0222, "step": 183480 }, { "epoch": 7.903691260714131, "learning_rate": 5.556626947359349e-07, "loss": 2.8649, "step": 183500 }, { "epoch": 7.9045526984537196, "learning_rate": 5.556142127604559e-07, "loss": 2.9137, "step": 183520 }, { "epoch": 7.905414136193307, "learning_rate": 5.55565730784977e-07, "loss": 2.9703, "step": 183540 }, { "epoch": 7.906275573932894, "learning_rate": 5.555172488094982e-07, "loss": 2.9942, "step": 183560 }, { "epoch": 7.907137011672481, "learning_rate": 5.554687668340194e-07, "loss": 2.9615, "step": 183580 }, { "epoch": 7.9079984494120685, "learning_rate": 5.554202848585404e-07, "loss": 2.9507, "step": 183600 }, { "epoch": 7.908859887151656, "learning_rate": 5.553718028830615e-07, "loss": 2.7534, "step": 183620 }, { "epoch": 7.909721324891244, "learning_rate": 5.553233209075826e-07, "loss": 2.9526, "step": 183640 }, { "epoch": 7.910582762630831, "learning_rate": 5.552748389321035e-07, "loss": 2.932, "step": 183660 }, { "epoch": 7.911444200370418, "learning_rate": 5.552263569566247e-07, "loss": 2.7313, "step": 183680 }, { "epoch": 7.912305638110006, "learning_rate": 5.551778749811458e-07, "loss": 2.9417, "step": 183700 }, { "epoch": 7.913167075849593, "learning_rate": 5.551293930056671e-07, "loss": 2.6892, "step": 183720 }, { "epoch": 7.91402851358918, "learning_rate": 5.550809110301881e-07, "loss": 2.9362, "step": 183740 }, { "epoch": 7.914889951328767, "learning_rate": 5.550324290547093e-07, "loss": 2.8431, "step": 183760 }, { "epoch": 7.915751389068355, "learning_rate": 5.549839470792303e-07, "loss": 2.9966, "step": 183780 }, { "epoch": 7.916612826807943, "learning_rate": 5.549354651037515e-07, "loss": 2.7752, "step": 183800 }, { "epoch": 7.91747426454753, "learning_rate": 5.548869831282724e-07, "loss": 3.0113, "step": 183820 }, { "epoch": 7.918335702287117, "learning_rate": 5.548385011527935e-07, "loss": 2.8056, "step": 183840 }, { "epoch": 7.9191971400267045, "learning_rate": 5.547900191773148e-07, "loss": 2.8115, "step": 183860 }, { "epoch": 7.920058577766292, "learning_rate": 5.547415372018358e-07, "loss": 2.9027, "step": 183880 }, { "epoch": 7.920920015505879, "learning_rate": 5.546930552263569e-07, "loss": 2.8213, "step": 183900 }, { "epoch": 7.921781453245467, "learning_rate": 5.54644573250878e-07, "loss": 2.9637, "step": 183920 }, { "epoch": 7.922642890985054, "learning_rate": 5.545960912753992e-07, "loss": 2.922, "step": 183940 }, { "epoch": 7.923504328724642, "learning_rate": 5.545476092999203e-07, "loss": 2.9792, "step": 183960 }, { "epoch": 7.924365766464229, "learning_rate": 5.544991273244414e-07, "loss": 3.0949, "step": 183980 }, { "epoch": 7.925227204203816, "learning_rate": 5.544506453489625e-07, "loss": 2.807, "step": 184000 }, { "epoch": 7.926088641943403, "learning_rate": 5.544021633734836e-07, "loss": 2.9191, "step": 184020 }, { "epoch": 7.926950079682991, "learning_rate": 5.543536813980047e-07, "loss": 2.865, "step": 184040 }, { "epoch": 7.927811517422578, "learning_rate": 5.543051994225258e-07, "loss": 2.9347, "step": 184060 }, { "epoch": 7.928672955162166, "learning_rate": 5.542567174470469e-07, "loss": 2.9221, "step": 184080 }, { "epoch": 7.929534392901753, "learning_rate": 5.542082354715681e-07, "loss": 2.8588, "step": 184100 }, { "epoch": 7.9303958306413405, "learning_rate": 5.541597534960891e-07, "loss": 3.0246, "step": 184120 }, { "epoch": 7.931257268380928, "learning_rate": 5.541112715206102e-07, "loss": 2.8957, "step": 184140 }, { "epoch": 7.932118706120515, "learning_rate": 5.540627895451313e-07, "loss": 2.9124, "step": 184160 }, { "epoch": 7.932980143860102, "learning_rate": 5.540143075696523e-07, "loss": 2.7781, "step": 184180 }, { "epoch": 7.93384158159969, "learning_rate": 5.539658255941735e-07, "loss": 3.0612, "step": 184200 }, { "epoch": 7.934703019339278, "learning_rate": 5.539173436186945e-07, "loss": 3.0434, "step": 184220 }, { "epoch": 7.935564457078865, "learning_rate": 5.538688616432158e-07, "loss": 2.9978, "step": 184240 }, { "epoch": 7.936425894818452, "learning_rate": 5.538203796677368e-07, "loss": 3.0963, "step": 184260 }, { "epoch": 7.937287332558039, "learning_rate": 5.537718976922579e-07, "loss": 2.8242, "step": 184280 }, { "epoch": 7.9381487702976266, "learning_rate": 5.53723415716779e-07, "loss": 2.6657, "step": 184300 }, { "epoch": 7.939010208037214, "learning_rate": 5.536749337413002e-07, "loss": 2.8766, "step": 184320 }, { "epoch": 7.939871645776801, "learning_rate": 5.536264517658213e-07, "loss": 2.9144, "step": 184340 }, { "epoch": 7.940733083516389, "learning_rate": 5.535779697903424e-07, "loss": 2.8125, "step": 184360 }, { "epoch": 7.941594521255976, "learning_rate": 5.535294878148635e-07, "loss": 2.7815, "step": 184380 }, { "epoch": 7.942455958995564, "learning_rate": 5.534810058393846e-07, "loss": 2.8472, "step": 184400 }, { "epoch": 7.943317396735151, "learning_rate": 5.534325238639057e-07, "loss": 2.8719, "step": 184420 }, { "epoch": 7.944178834474738, "learning_rate": 5.533840418884268e-07, "loss": 2.909, "step": 184440 }, { "epoch": 7.945040272214325, "learning_rate": 5.533355599129478e-07, "loss": 2.9544, "step": 184460 }, { "epoch": 7.9459017099539135, "learning_rate": 5.53287077937469e-07, "loss": 2.8958, "step": 184480 }, { "epoch": 7.946763147693501, "learning_rate": 5.532385959619901e-07, "loss": 2.9271, "step": 184500 }, { "epoch": 7.947624585433088, "learning_rate": 5.531901139865112e-07, "loss": 2.8972, "step": 184520 }, { "epoch": 7.948486023172675, "learning_rate": 5.531416320110323e-07, "loss": 3.0782, "step": 184540 }, { "epoch": 7.9493474609122625, "learning_rate": 5.530931500355534e-07, "loss": 2.8258, "step": 184560 }, { "epoch": 7.95020889865185, "learning_rate": 5.530446680600746e-07, "loss": 2.815, "step": 184580 }, { "epoch": 7.951070336391437, "learning_rate": 5.529961860845957e-07, "loss": 2.9328, "step": 184600 }, { "epoch": 7.951931774131024, "learning_rate": 5.529477041091168e-07, "loss": 2.7971, "step": 184620 }, { "epoch": 7.952793211870612, "learning_rate": 5.528992221336378e-07, "loss": 2.8274, "step": 184640 }, { "epoch": 7.9536546496102, "learning_rate": 5.52850740158159e-07, "loss": 2.8176, "step": 184660 }, { "epoch": 7.954516087349787, "learning_rate": 5.528022581826801e-07, "loss": 3.1136, "step": 184680 }, { "epoch": 7.955377525089374, "learning_rate": 5.527537762072012e-07, "loss": 2.9203, "step": 184700 }, { "epoch": 7.956238962828961, "learning_rate": 5.527052942317223e-07, "loss": 2.7676, "step": 184720 }, { "epoch": 7.957100400568549, "learning_rate": 5.526568122562434e-07, "loss": 2.9574, "step": 184740 }, { "epoch": 7.957961838308137, "learning_rate": 5.526083302807645e-07, "loss": 2.8543, "step": 184760 }, { "epoch": 7.958823276047724, "learning_rate": 5.525598483052855e-07, "loss": 2.7526, "step": 184780 }, { "epoch": 7.959684713787311, "learning_rate": 5.525113663298067e-07, "loss": 2.7772, "step": 184800 }, { "epoch": 7.9605461515268985, "learning_rate": 5.524628843543278e-07, "loss": 2.8461, "step": 184820 }, { "epoch": 7.961407589266486, "learning_rate": 5.524144023788489e-07, "loss": 2.8869, "step": 184840 }, { "epoch": 7.962269027006073, "learning_rate": 5.5236592040337e-07, "loss": 2.9518, "step": 184860 }, { "epoch": 7.96313046474566, "learning_rate": 5.523174384278911e-07, "loss": 2.9137, "step": 184880 }, { "epoch": 7.9639919024852475, "learning_rate": 5.522689564524122e-07, "loss": 2.8664, "step": 184900 }, { "epoch": 7.964853340224836, "learning_rate": 5.522204744769333e-07, "loss": 3.0343, "step": 184920 }, { "epoch": 7.965714777964423, "learning_rate": 5.521719925014544e-07, "loss": 2.8583, "step": 184940 }, { "epoch": 7.96657621570401, "learning_rate": 5.521235105259756e-07, "loss": 2.7853, "step": 184960 }, { "epoch": 7.967437653443597, "learning_rate": 5.520750285504967e-07, "loss": 2.9354, "step": 184980 }, { "epoch": 7.968299091183185, "learning_rate": 5.520265465750178e-07, "loss": 2.8865, "step": 185000 }, { "epoch": 7.969160528922772, "learning_rate": 5.519780645995389e-07, "loss": 2.6633, "step": 185020 }, { "epoch": 7.97002196666236, "learning_rate": 5.5192958262406e-07, "loss": 2.8873, "step": 185040 }, { "epoch": 7.970883404401947, "learning_rate": 5.518811006485811e-07, "loss": 2.9029, "step": 185060 }, { "epoch": 7.9717448421415344, "learning_rate": 5.518326186731021e-07, "loss": 2.666, "step": 185080 }, { "epoch": 7.972606279881122, "learning_rate": 5.517841366976233e-07, "loss": 2.9847, "step": 185100 }, { "epoch": 7.973467717620709, "learning_rate": 5.517356547221444e-07, "loss": 2.8482, "step": 185120 }, { "epoch": 7.974329155360296, "learning_rate": 5.516871727466655e-07, "loss": 2.8324, "step": 185140 }, { "epoch": 7.975190593099883, "learning_rate": 5.516386907711865e-07, "loss": 3.0191, "step": 185160 }, { "epoch": 7.976052030839471, "learning_rate": 5.515902087957077e-07, "loss": 2.8666, "step": 185180 }, { "epoch": 7.976913468579059, "learning_rate": 5.515417268202289e-07, "loss": 3.0354, "step": 185200 }, { "epoch": 7.977774906318646, "learning_rate": 5.5149324484475e-07, "loss": 2.9946, "step": 185220 }, { "epoch": 7.978636344058233, "learning_rate": 5.51444762869271e-07, "loss": 2.8819, "step": 185240 }, { "epoch": 7.9794977817978205, "learning_rate": 5.513962808937921e-07, "loss": 2.9656, "step": 185260 }, { "epoch": 7.980359219537408, "learning_rate": 5.513477989183132e-07, "loss": 2.8122, "step": 185280 }, { "epoch": 7.981220657276995, "learning_rate": 5.512993169428343e-07, "loss": 2.786, "step": 185300 }, { "epoch": 7.982082095016583, "learning_rate": 5.512508349673554e-07, "loss": 2.9515, "step": 185320 }, { "epoch": 7.98294353275617, "learning_rate": 5.512023529918766e-07, "loss": 2.987, "step": 185340 }, { "epoch": 7.983804970495758, "learning_rate": 5.511538710163977e-07, "loss": 2.78, "step": 185360 }, { "epoch": 7.984666408235345, "learning_rate": 5.511053890409188e-07, "loss": 2.7717, "step": 185380 }, { "epoch": 7.985527845974932, "learning_rate": 5.510569070654398e-07, "loss": 2.9943, "step": 185400 }, { "epoch": 7.986389283714519, "learning_rate": 5.51008425089961e-07, "loss": 2.9615, "step": 185420 }, { "epoch": 7.987250721454107, "learning_rate": 5.50959943114482e-07, "loss": 2.8716, "step": 185440 }, { "epoch": 7.988112159193694, "learning_rate": 5.509114611390031e-07, "loss": 2.7545, "step": 185460 }, { "epoch": 7.988973596933282, "learning_rate": 5.508629791635243e-07, "loss": 2.8347, "step": 185480 }, { "epoch": 7.989835034672869, "learning_rate": 5.508144971880454e-07, "loss": 2.9001, "step": 185500 }, { "epoch": 7.9906964724124565, "learning_rate": 5.507660152125665e-07, "loss": 2.9832, "step": 185520 }, { "epoch": 7.991557910152044, "learning_rate": 5.507175332370875e-07, "loss": 2.7182, "step": 185540 }, { "epoch": 7.992419347891631, "learning_rate": 5.506690512616087e-07, "loss": 2.7733, "step": 185560 }, { "epoch": 7.993280785631218, "learning_rate": 5.506205692861299e-07, "loss": 2.8181, "step": 185580 }, { "epoch": 7.9941422233708055, "learning_rate": 5.50572087310651e-07, "loss": 3.0215, "step": 185600 }, { "epoch": 7.995003661110394, "learning_rate": 5.50523605335172e-07, "loss": 2.9515, "step": 185620 }, { "epoch": 7.995865098849981, "learning_rate": 5.504751233596931e-07, "loss": 2.9126, "step": 185640 }, { "epoch": 7.996726536589568, "learning_rate": 5.504266413842143e-07, "loss": 3.0746, "step": 185660 }, { "epoch": 7.997587974329155, "learning_rate": 5.503781594087354e-07, "loss": 2.9059, "step": 185680 }, { "epoch": 7.998449412068743, "learning_rate": 5.503296774332564e-07, "loss": 2.8672, "step": 185700 }, { "epoch": 7.99931084980833, "learning_rate": 5.502811954577776e-07, "loss": 2.9432, "step": 185720 }, { "epoch": 8.000172287547917, "learning_rate": 5.502327134822987e-07, "loss": 2.8989, "step": 185740 }, { "epoch": 8.001033725287504, "learning_rate": 5.501842315068197e-07, "loss": 2.7647, "step": 185760 }, { "epoch": 8.001895163027092, "learning_rate": 5.501357495313408e-07, "loss": 2.8602, "step": 185780 }, { "epoch": 8.002756600766679, "learning_rate": 5.50087267555862e-07, "loss": 2.8374, "step": 185800 }, { "epoch": 8.003618038506266, "learning_rate": 5.500387855803831e-07, "loss": 2.6321, "step": 185820 }, { "epoch": 8.004479476245855, "learning_rate": 5.499903036049042e-07, "loss": 2.6652, "step": 185840 }, { "epoch": 8.005340913985442, "learning_rate": 5.499418216294253e-07, "loss": 2.8973, "step": 185860 }, { "epoch": 8.00620235172503, "learning_rate": 5.498933396539464e-07, "loss": 2.791, "step": 185880 }, { "epoch": 8.007063789464617, "learning_rate": 5.498448576784675e-07, "loss": 2.9331, "step": 185900 }, { "epoch": 8.007925227204204, "learning_rate": 5.497963757029886e-07, "loss": 2.9022, "step": 185920 }, { "epoch": 8.008786664943791, "learning_rate": 5.497478937275097e-07, "loss": 2.6971, "step": 185940 }, { "epoch": 8.009648102683379, "learning_rate": 5.496994117520308e-07, "loss": 2.9388, "step": 185960 }, { "epoch": 8.010509540422966, "learning_rate": 5.49650929776552e-07, "loss": 2.9828, "step": 185980 }, { "epoch": 8.011370978162553, "learning_rate": 5.49602447801073e-07, "loss": 2.9233, "step": 186000 }, { "epoch": 8.01223241590214, "learning_rate": 5.495539658255941e-07, "loss": 2.7119, "step": 186020 }, { "epoch": 8.013093853641728, "learning_rate": 5.495054838501154e-07, "loss": 2.7768, "step": 186040 }, { "epoch": 8.013955291381315, "learning_rate": 5.494570018746363e-07, "loss": 2.944, "step": 186060 }, { "epoch": 8.014816729120902, "learning_rate": 5.494085198991574e-07, "loss": 2.8419, "step": 186080 }, { "epoch": 8.01567816686049, "learning_rate": 5.493600379236786e-07, "loss": 2.775, "step": 186100 }, { "epoch": 8.016539604600078, "learning_rate": 5.493115559481997e-07, "loss": 2.7614, "step": 186120 }, { "epoch": 8.017401042339666, "learning_rate": 5.492630739727207e-07, "loss": 2.7659, "step": 186140 }, { "epoch": 8.018262480079253, "learning_rate": 5.492145919972418e-07, "loss": 2.8567, "step": 186160 }, { "epoch": 8.01912391781884, "learning_rate": 5.49166110021763e-07, "loss": 2.7898, "step": 186180 }, { "epoch": 8.019985355558427, "learning_rate": 5.491176280462841e-07, "loss": 2.8394, "step": 186200 }, { "epoch": 8.020846793298015, "learning_rate": 5.490691460708052e-07, "loss": 2.9255, "step": 186220 }, { "epoch": 8.021708231037602, "learning_rate": 5.490206640953263e-07, "loss": 2.945, "step": 186240 }, { "epoch": 8.022569668777189, "learning_rate": 5.489721821198474e-07, "loss": 3.0707, "step": 186260 }, { "epoch": 8.023431106516776, "learning_rate": 5.489237001443683e-07, "loss": 2.8099, "step": 186280 }, { "epoch": 8.024292544256364, "learning_rate": 5.488752181688896e-07, "loss": 2.8102, "step": 186300 }, { "epoch": 8.02515398199595, "learning_rate": 5.488267361934107e-07, "loss": 2.9267, "step": 186320 }, { "epoch": 8.026015419735538, "learning_rate": 5.487782542179319e-07, "loss": 2.8434, "step": 186340 }, { "epoch": 8.026876857475125, "learning_rate": 5.48729772242453e-07, "loss": 2.8068, "step": 186360 }, { "epoch": 8.027738295214712, "learning_rate": 5.486812902669741e-07, "loss": 2.7225, "step": 186380 }, { "epoch": 8.028599732954302, "learning_rate": 5.486328082914951e-07, "loss": 2.924, "step": 186400 }, { "epoch": 8.029461170693889, "learning_rate": 5.485843263160164e-07, "loss": 2.7929, "step": 186420 }, { "epoch": 8.030322608433476, "learning_rate": 5.485358443405373e-07, "loss": 2.9104, "step": 186440 }, { "epoch": 8.031184046173063, "learning_rate": 5.484873623650585e-07, "loss": 2.742, "step": 186460 }, { "epoch": 8.03204548391265, "learning_rate": 5.484388803895796e-07, "loss": 3.1014, "step": 186480 }, { "epoch": 8.032906921652238, "learning_rate": 5.483903984141007e-07, "loss": 2.9382, "step": 186500 }, { "epoch": 8.033768359391825, "learning_rate": 5.483419164386217e-07, "loss": 2.8127, "step": 186520 }, { "epoch": 8.034629797131412, "learning_rate": 5.48293434463143e-07, "loss": 2.9188, "step": 186540 }, { "epoch": 8.035491234871, "learning_rate": 5.48244952487664e-07, "loss": 2.7634, "step": 186560 }, { "epoch": 8.036352672610587, "learning_rate": 5.481964705121852e-07, "loss": 2.7907, "step": 186580 }, { "epoch": 8.037214110350174, "learning_rate": 5.481479885367062e-07, "loss": 2.9569, "step": 186600 }, { "epoch": 8.038075548089761, "learning_rate": 5.480995065612273e-07, "loss": 2.9635, "step": 186620 }, { "epoch": 8.038936985829348, "learning_rate": 5.480510245857484e-07, "loss": 2.7462, "step": 186640 }, { "epoch": 8.039798423568936, "learning_rate": 5.480025426102694e-07, "loss": 2.9788, "step": 186660 }, { "epoch": 8.040659861308525, "learning_rate": 5.479540606347907e-07, "loss": 3.1408, "step": 186680 }, { "epoch": 8.041521299048112, "learning_rate": 5.479055786593117e-07, "loss": 2.9223, "step": 186700 }, { "epoch": 8.0423827367877, "learning_rate": 5.478570966838329e-07, "loss": 2.9281, "step": 186720 }, { "epoch": 8.043244174527286, "learning_rate": 5.478086147083539e-07, "loss": 2.8503, "step": 186740 }, { "epoch": 8.044105612266874, "learning_rate": 5.47760132732875e-07, "loss": 2.7624, "step": 186760 }, { "epoch": 8.044967050006461, "learning_rate": 5.477116507573961e-07, "loss": 2.6184, "step": 186780 }, { "epoch": 8.045828487746048, "learning_rate": 5.476631687819174e-07, "loss": 2.9575, "step": 186800 }, { "epoch": 8.046689925485635, "learning_rate": 5.476146868064383e-07, "loss": 2.7833, "step": 186820 }, { "epoch": 8.047551363225223, "learning_rate": 5.475662048309595e-07, "loss": 2.8162, "step": 186840 }, { "epoch": 8.04841280096481, "learning_rate": 5.475177228554806e-07, "loss": 2.8005, "step": 186860 }, { "epoch": 8.049274238704397, "learning_rate": 5.474692408800017e-07, "loss": 2.6589, "step": 186880 }, { "epoch": 8.050135676443984, "learning_rate": 5.474207589045227e-07, "loss": 2.7078, "step": 186900 }, { "epoch": 8.050997114183572, "learning_rate": 5.47372276929044e-07, "loss": 2.7777, "step": 186920 }, { "epoch": 8.051858551923159, "learning_rate": 5.47323794953565e-07, "loss": 2.7571, "step": 186940 }, { "epoch": 8.052719989662748, "learning_rate": 5.472753129780862e-07, "loss": 2.8615, "step": 186960 }, { "epoch": 8.053581427402335, "learning_rate": 5.472268310026072e-07, "loss": 2.9259, "step": 186980 }, { "epoch": 8.054442865141922, "learning_rate": 5.471783490271283e-07, "loss": 2.8687, "step": 187000 }, { "epoch": 8.05530430288151, "learning_rate": 5.471298670516494e-07, "loss": 2.7701, "step": 187020 }, { "epoch": 8.056165740621097, "learning_rate": 5.470813850761706e-07, "loss": 2.9363, "step": 187040 }, { "epoch": 8.057027178360684, "learning_rate": 5.470329031006917e-07, "loss": 2.8305, "step": 187060 }, { "epoch": 8.057888616100271, "learning_rate": 5.469844211252128e-07, "loss": 2.8337, "step": 187080 }, { "epoch": 8.058750053839859, "learning_rate": 5.469359391497339e-07, "loss": 2.9148, "step": 187100 }, { "epoch": 8.059611491579446, "learning_rate": 5.46887457174255e-07, "loss": 2.8639, "step": 187120 }, { "epoch": 8.060472929319033, "learning_rate": 5.46838975198776e-07, "loss": 2.8798, "step": 187140 }, { "epoch": 8.06133436705862, "learning_rate": 5.467904932232971e-07, "loss": 2.8389, "step": 187160 }, { "epoch": 8.062195804798208, "learning_rate": 5.467420112478184e-07, "loss": 2.9595, "step": 187180 }, { "epoch": 8.063057242537795, "learning_rate": 5.466935292723393e-07, "loss": 3.0177, "step": 187200 }, { "epoch": 8.063918680277382, "learning_rate": 5.466450472968604e-07, "loss": 3.0028, "step": 187220 }, { "epoch": 8.064780118016971, "learning_rate": 5.465965653213816e-07, "loss": 2.8665, "step": 187240 }, { "epoch": 8.065641555756558, "learning_rate": 5.465480833459026e-07, "loss": 2.8232, "step": 187260 }, { "epoch": 8.066502993496146, "learning_rate": 5.464996013704237e-07, "loss": 2.8723, "step": 187280 }, { "epoch": 8.067364431235733, "learning_rate": 5.46451119394945e-07, "loss": 2.9318, "step": 187300 }, { "epoch": 8.06822586897532, "learning_rate": 5.464026374194662e-07, "loss": 2.8936, "step": 187320 }, { "epoch": 8.069087306714907, "learning_rate": 5.463541554439871e-07, "loss": 2.8346, "step": 187340 }, { "epoch": 8.069948744454495, "learning_rate": 5.463056734685082e-07, "loss": 2.833, "step": 187360 }, { "epoch": 8.070810182194082, "learning_rate": 5.462571914930294e-07, "loss": 2.9263, "step": 187380 }, { "epoch": 8.071671619933669, "learning_rate": 5.462087095175504e-07, "loss": 2.9219, "step": 187400 }, { "epoch": 8.072533057673256, "learning_rate": 5.461602275420716e-07, "loss": 2.8683, "step": 187420 }, { "epoch": 8.073394495412844, "learning_rate": 5.461117455665927e-07, "loss": 2.8976, "step": 187440 }, { "epoch": 8.07425593315243, "learning_rate": 5.460632635911138e-07, "loss": 2.8717, "step": 187460 }, { "epoch": 8.075117370892018, "learning_rate": 5.460147816156349e-07, "loss": 2.8118, "step": 187480 }, { "epoch": 8.075978808631605, "learning_rate": 5.45966299640156e-07, "loss": 2.876, "step": 187500 }, { "epoch": 8.076840246371194, "learning_rate": 5.45917817664677e-07, "loss": 2.8992, "step": 187520 }, { "epoch": 8.077701684110782, "learning_rate": 5.458693356891983e-07, "loss": 2.8317, "step": 187540 }, { "epoch": 8.078563121850369, "learning_rate": 5.458208537137194e-07, "loss": 3.0307, "step": 187560 }, { "epoch": 8.079424559589956, "learning_rate": 5.457723717382404e-07, "loss": 2.9651, "step": 187580 }, { "epoch": 8.080285997329543, "learning_rate": 5.457238897627615e-07, "loss": 2.7887, "step": 187600 }, { "epoch": 8.08114743506913, "learning_rate": 5.456754077872827e-07, "loss": 2.9788, "step": 187620 }, { "epoch": 8.082008872808718, "learning_rate": 5.456269258118037e-07, "loss": 2.8938, "step": 187640 }, { "epoch": 8.082870310548305, "learning_rate": 5.455784438363249e-07, "loss": 2.9273, "step": 187660 }, { "epoch": 8.083731748287892, "learning_rate": 5.45529961860846e-07, "loss": 2.7979, "step": 187680 }, { "epoch": 8.08459318602748, "learning_rate": 5.454814798853671e-07, "loss": 2.8268, "step": 187700 }, { "epoch": 8.085454623767067, "learning_rate": 5.454329979098881e-07, "loss": 2.869, "step": 187720 }, { "epoch": 8.086316061506654, "learning_rate": 5.453845159344092e-07, "loss": 2.7095, "step": 187740 }, { "epoch": 8.087177499246241, "learning_rate": 5.453360339589304e-07, "loss": 3.0114, "step": 187760 }, { "epoch": 8.088038936985829, "learning_rate": 5.452875519834514e-07, "loss": 2.8283, "step": 187780 }, { "epoch": 8.088900374725418, "learning_rate": 5.452390700079726e-07, "loss": 2.8659, "step": 187800 }, { "epoch": 8.089761812465005, "learning_rate": 5.451905880324937e-07, "loss": 2.8199, "step": 187820 }, { "epoch": 8.090623250204592, "learning_rate": 5.451421060570148e-07, "loss": 2.8875, "step": 187840 }, { "epoch": 8.09148468794418, "learning_rate": 5.450936240815359e-07, "loss": 2.8334, "step": 187860 }, { "epoch": 8.092346125683767, "learning_rate": 5.45045142106057e-07, "loss": 2.8955, "step": 187880 }, { "epoch": 8.093207563423354, "learning_rate": 5.44996660130578e-07, "loss": 2.7915, "step": 187900 }, { "epoch": 8.094069001162941, "learning_rate": 5.449481781550993e-07, "loss": 2.7899, "step": 187920 }, { "epoch": 8.094930438902528, "learning_rate": 5.448996961796203e-07, "loss": 2.8525, "step": 187940 }, { "epoch": 8.095791876642116, "learning_rate": 5.448512142041414e-07, "loss": 2.601, "step": 187960 }, { "epoch": 8.096653314381703, "learning_rate": 5.448027322286625e-07, "loss": 2.9153, "step": 187980 }, { "epoch": 8.09751475212129, "learning_rate": 5.447542502531837e-07, "loss": 2.9657, "step": 188000 }, { "epoch": 8.098376189860877, "learning_rate": 5.447057682777046e-07, "loss": 3.0092, "step": 188020 }, { "epoch": 8.099237627600465, "learning_rate": 5.446572863022259e-07, "loss": 2.796, "step": 188040 }, { "epoch": 8.100099065340052, "learning_rate": 5.44608804326747e-07, "loss": 2.8068, "step": 188060 }, { "epoch": 8.10096050307964, "learning_rate": 5.445603223512681e-07, "loss": 2.8932, "step": 188080 }, { "epoch": 8.101821940819228, "learning_rate": 5.445118403757891e-07, "loss": 2.7652, "step": 188100 }, { "epoch": 8.102683378558815, "learning_rate": 5.444633584003103e-07, "loss": 2.9182, "step": 188120 }, { "epoch": 8.103544816298403, "learning_rate": 5.444148764248314e-07, "loss": 2.8235, "step": 188140 }, { "epoch": 8.10440625403799, "learning_rate": 5.443663944493526e-07, "loss": 2.8054, "step": 188160 }, { "epoch": 8.105267691777577, "learning_rate": 5.443179124738736e-07, "loss": 2.8168, "step": 188180 }, { "epoch": 8.106129129517164, "learning_rate": 5.442694304983947e-07, "loss": 3.1287, "step": 188200 }, { "epoch": 8.106990567256751, "learning_rate": 5.442209485229158e-07, "loss": 2.8272, "step": 188220 }, { "epoch": 8.107852004996339, "learning_rate": 5.441724665474368e-07, "loss": 2.8292, "step": 188240 }, { "epoch": 8.108713442735926, "learning_rate": 5.44123984571958e-07, "loss": 2.8369, "step": 188260 }, { "epoch": 8.109574880475513, "learning_rate": 5.44075502596479e-07, "loss": 2.8932, "step": 188280 }, { "epoch": 8.1104363182151, "learning_rate": 5.440270206210003e-07, "loss": 2.813, "step": 188300 }, { "epoch": 8.111297755954688, "learning_rate": 5.439785386455213e-07, "loss": 2.9218, "step": 188320 }, { "epoch": 8.112159193694275, "learning_rate": 5.439300566700424e-07, "loss": 2.9631, "step": 188340 }, { "epoch": 8.113020631433864, "learning_rate": 5.438815746945635e-07, "loss": 2.9247, "step": 188360 }, { "epoch": 8.113882069173451, "learning_rate": 5.438330927190847e-07, "loss": 2.877, "step": 188380 }, { "epoch": 8.114743506913038, "learning_rate": 5.437846107436057e-07, "loss": 2.7514, "step": 188400 }, { "epoch": 8.115604944652626, "learning_rate": 5.437361287681269e-07, "loss": 2.9351, "step": 188420 }, { "epoch": 8.116466382392213, "learning_rate": 5.43687646792648e-07, "loss": 2.9104, "step": 188440 }, { "epoch": 8.1173278201318, "learning_rate": 5.436391648171691e-07, "loss": 2.6926, "step": 188460 }, { "epoch": 8.118189257871387, "learning_rate": 5.435906828416901e-07, "loss": 2.8524, "step": 188480 }, { "epoch": 8.119050695610975, "learning_rate": 5.435422008662113e-07, "loss": 2.9805, "step": 188500 }, { "epoch": 8.119912133350562, "learning_rate": 5.434937188907324e-07, "loss": 2.8204, "step": 188520 }, { "epoch": 8.12077357109015, "learning_rate": 5.434452369152536e-07, "loss": 2.6614, "step": 188540 }, { "epoch": 8.121635008829736, "learning_rate": 5.433967549397746e-07, "loss": 2.8234, "step": 188560 }, { "epoch": 8.122496446569324, "learning_rate": 5.433482729642958e-07, "loss": 3.071, "step": 188580 }, { "epoch": 8.123357884308911, "learning_rate": 5.432997909888168e-07, "loss": 2.9603, "step": 188600 }, { "epoch": 8.124219322048498, "learning_rate": 5.432513090133379e-07, "loss": 2.6726, "step": 188620 }, { "epoch": 8.125080759788087, "learning_rate": 5.43202827037859e-07, "loss": 2.7021, "step": 188640 }, { "epoch": 8.125942197527674, "learning_rate": 5.431543450623802e-07, "loss": 2.8364, "step": 188660 }, { "epoch": 8.126803635267262, "learning_rate": 5.431058630869013e-07, "loss": 2.7237, "step": 188680 }, { "epoch": 8.127665073006849, "learning_rate": 5.430573811114223e-07, "loss": 2.9124, "step": 188700 }, { "epoch": 8.128526510746436, "learning_rate": 5.430088991359434e-07, "loss": 2.872, "step": 188720 }, { "epoch": 8.129387948486023, "learning_rate": 5.429604171604646e-07, "loss": 2.8185, "step": 188740 }, { "epoch": 8.13024938622561, "learning_rate": 5.429119351849857e-07, "loss": 2.8814, "step": 188760 }, { "epoch": 8.131110823965198, "learning_rate": 5.428634532095067e-07, "loss": 2.8428, "step": 188780 }, { "epoch": 8.131972261704785, "learning_rate": 5.428149712340279e-07, "loss": 2.6746, "step": 188800 }, { "epoch": 8.132833699444372, "learning_rate": 5.42766489258549e-07, "loss": 2.9055, "step": 188820 }, { "epoch": 8.13369513718396, "learning_rate": 5.4271800728307e-07, "loss": 2.7554, "step": 188840 }, { "epoch": 8.134556574923547, "learning_rate": 5.426695253075911e-07, "loss": 2.9018, "step": 188860 }, { "epoch": 8.135418012663134, "learning_rate": 5.426210433321123e-07, "loss": 2.7, "step": 188880 }, { "epoch": 8.136279450402721, "learning_rate": 5.425725613566334e-07, "loss": 2.815, "step": 188900 }, { "epoch": 8.137140888142309, "learning_rate": 5.425240793811545e-07, "loss": 2.9735, "step": 188920 }, { "epoch": 8.138002325881898, "learning_rate": 5.424755974056756e-07, "loss": 2.7785, "step": 188940 }, { "epoch": 8.138863763621485, "learning_rate": 5.424271154301967e-07, "loss": 2.9137, "step": 188960 }, { "epoch": 8.139725201361072, "learning_rate": 5.423786334547178e-07, "loss": 2.6938, "step": 188980 }, { "epoch": 8.14058663910066, "learning_rate": 5.423301514792388e-07, "loss": 2.585, "step": 189000 }, { "epoch": 8.141448076840247, "learning_rate": 5.4228166950376e-07, "loss": 2.743, "step": 189020 }, { "epoch": 8.142309514579834, "learning_rate": 5.422331875282812e-07, "loss": 2.8405, "step": 189040 }, { "epoch": 8.143170952319421, "learning_rate": 5.421847055528023e-07, "loss": 2.8791, "step": 189060 }, { "epoch": 8.144032390059008, "learning_rate": 5.421362235773233e-07, "loss": 2.757, "step": 189080 }, { "epoch": 8.144893827798596, "learning_rate": 5.420877416018444e-07, "loss": 2.8943, "step": 189100 }, { "epoch": 8.145755265538183, "learning_rate": 5.420392596263656e-07, "loss": 2.6807, "step": 189120 }, { "epoch": 8.14661670327777, "learning_rate": 5.419907776508867e-07, "loss": 2.7537, "step": 189140 }, { "epoch": 8.147478141017357, "learning_rate": 5.419422956754078e-07, "loss": 2.7596, "step": 189160 }, { "epoch": 8.148339578756945, "learning_rate": 5.418938136999289e-07, "loss": 2.7655, "step": 189180 }, { "epoch": 8.149201016496534, "learning_rate": 5.4184533172445e-07, "loss": 2.8042, "step": 189200 }, { "epoch": 8.15006245423612, "learning_rate": 5.41796849748971e-07, "loss": 2.7951, "step": 189220 }, { "epoch": 8.150923891975708, "learning_rate": 5.417483677734922e-07, "loss": 2.7646, "step": 189240 }, { "epoch": 8.151785329715295, "learning_rate": 5.416998857980133e-07, "loss": 2.6971, "step": 189260 }, { "epoch": 8.152646767454883, "learning_rate": 5.416514038225345e-07, "loss": 2.9457, "step": 189280 }, { "epoch": 8.15350820519447, "learning_rate": 5.416029218470555e-07, "loss": 2.8773, "step": 189300 }, { "epoch": 8.154369642934057, "learning_rate": 5.415544398715766e-07, "loss": 2.8289, "step": 189320 }, { "epoch": 8.155231080673644, "learning_rate": 5.415059578960977e-07, "loss": 2.8327, "step": 189340 }, { "epoch": 8.156092518413232, "learning_rate": 5.414574759206188e-07, "loss": 2.8517, "step": 189360 }, { "epoch": 8.156953956152819, "learning_rate": 5.414089939451399e-07, "loss": 2.8755, "step": 189380 }, { "epoch": 8.157815393892406, "learning_rate": 5.41360511969661e-07, "loss": 2.7876, "step": 189400 }, { "epoch": 8.158676831631993, "learning_rate": 5.413120299941822e-07, "loss": 2.8586, "step": 189420 }, { "epoch": 8.15953826937158, "learning_rate": 5.412635480187033e-07, "loss": 2.943, "step": 189440 }, { "epoch": 8.160399707111168, "learning_rate": 5.412150660432243e-07, "loss": 2.8401, "step": 189460 }, { "epoch": 8.161261144850755, "learning_rate": 5.411665840677454e-07, "loss": 2.8157, "step": 189480 }, { "epoch": 8.162122582590344, "learning_rate": 5.411181020922666e-07, "loss": 2.7659, "step": 189500 }, { "epoch": 8.162984020329931, "learning_rate": 5.410696201167876e-07, "loss": 2.9528, "step": 189520 }, { "epoch": 8.163845458069519, "learning_rate": 5.410211381413088e-07, "loss": 2.8621, "step": 189540 }, { "epoch": 8.164706895809106, "learning_rate": 5.409726561658299e-07, "loss": 2.9605, "step": 189560 }, { "epoch": 8.165568333548693, "learning_rate": 5.40924174190351e-07, "loss": 2.692, "step": 189580 }, { "epoch": 8.16642977128828, "learning_rate": 5.40875692214872e-07, "loss": 2.9341, "step": 189600 }, { "epoch": 8.167291209027868, "learning_rate": 5.408272102393932e-07, "loss": 2.9522, "step": 189620 }, { "epoch": 8.168152646767455, "learning_rate": 5.407787282639143e-07, "loss": 2.8424, "step": 189640 }, { "epoch": 8.169014084507042, "learning_rate": 5.407302462884355e-07, "loss": 2.8768, "step": 189660 }, { "epoch": 8.16987552224663, "learning_rate": 5.406817643129565e-07, "loss": 2.8687, "step": 189680 }, { "epoch": 8.170736959986217, "learning_rate": 5.406332823374776e-07, "loss": 2.8191, "step": 189700 }, { "epoch": 8.171598397725804, "learning_rate": 5.405848003619987e-07, "loss": 2.6789, "step": 189720 }, { "epoch": 8.172459835465391, "learning_rate": 5.405363183865199e-07, "loss": 2.7668, "step": 189740 }, { "epoch": 8.173321273204978, "learning_rate": 5.404878364110409e-07, "loss": 2.8633, "step": 189760 }, { "epoch": 8.174182710944567, "learning_rate": 5.404393544355621e-07, "loss": 2.8988, "step": 189780 }, { "epoch": 8.175044148684155, "learning_rate": 5.403908724600832e-07, "loss": 2.8187, "step": 189800 }, { "epoch": 8.175905586423742, "learning_rate": 5.403423904846042e-07, "loss": 2.82, "step": 189820 }, { "epoch": 8.176767024163329, "learning_rate": 5.402939085091252e-07, "loss": 2.9023, "step": 189840 }, { "epoch": 8.177628461902916, "learning_rate": 5.402454265336464e-07, "loss": 2.6845, "step": 189860 }, { "epoch": 8.178489899642504, "learning_rate": 5.401969445581676e-07, "loss": 2.7747, "step": 189880 }, { "epoch": 8.17935133738209, "learning_rate": 5.401484625826886e-07, "loss": 2.7124, "step": 189900 }, { "epoch": 8.180212775121678, "learning_rate": 5.400999806072098e-07, "loss": 2.8155, "step": 189920 }, { "epoch": 8.181074212861265, "learning_rate": 5.40051498631731e-07, "loss": 2.7547, "step": 189940 }, { "epoch": 8.181935650600852, "learning_rate": 5.40003016656252e-07, "loss": 2.8066, "step": 189960 }, { "epoch": 8.18279708834044, "learning_rate": 5.39954534680773e-07, "loss": 2.8346, "step": 189980 }, { "epoch": 8.183658526080027, "learning_rate": 5.399060527052942e-07, "loss": 2.8498, "step": 190000 }, { "epoch": 8.184519963819614, "learning_rate": 5.398575707298153e-07, "loss": 2.9417, "step": 190020 }, { "epoch": 8.185381401559201, "learning_rate": 5.398090887543365e-07, "loss": 2.6773, "step": 190040 }, { "epoch": 8.18624283929879, "learning_rate": 5.397606067788575e-07, "loss": 2.883, "step": 190060 }, { "epoch": 8.187104277038378, "learning_rate": 5.397121248033786e-07, "loss": 2.6511, "step": 190080 }, { "epoch": 8.187965714777965, "learning_rate": 5.396636428278997e-07, "loss": 2.743, "step": 190100 }, { "epoch": 8.188827152517552, "learning_rate": 5.396151608524208e-07, "loss": 2.8361, "step": 190120 }, { "epoch": 8.18968859025714, "learning_rate": 5.395666788769419e-07, "loss": 2.8676, "step": 190140 }, { "epoch": 8.190550027996727, "learning_rate": 5.395181969014631e-07, "loss": 2.8939, "step": 190160 }, { "epoch": 8.191411465736314, "learning_rate": 5.394697149259842e-07, "loss": 2.6872, "step": 190180 }, { "epoch": 8.192272903475901, "learning_rate": 5.394212329505052e-07, "loss": 3.0806, "step": 190200 }, { "epoch": 8.193134341215488, "learning_rate": 5.393727509750263e-07, "loss": 2.8276, "step": 190220 }, { "epoch": 8.193995778955076, "learning_rate": 5.393242689995476e-07, "loss": 2.8806, "step": 190240 }, { "epoch": 8.194857216694663, "learning_rate": 5.392757870240686e-07, "loss": 2.7002, "step": 190260 }, { "epoch": 8.19571865443425, "learning_rate": 5.392273050485897e-07, "loss": 2.9972, "step": 190280 }, { "epoch": 8.196580092173837, "learning_rate": 5.391788230731108e-07, "loss": 2.7218, "step": 190300 }, { "epoch": 8.197441529913425, "learning_rate": 5.391303410976319e-07, "loss": 2.9483, "step": 190320 }, { "epoch": 8.198302967653014, "learning_rate": 5.39081859122153e-07, "loss": 2.7806, "step": 190340 }, { "epoch": 8.199164405392601, "learning_rate": 5.390333771466742e-07, "loss": 2.9818, "step": 190360 }, { "epoch": 8.200025843132188, "learning_rate": 5.389848951711952e-07, "loss": 2.8512, "step": 190380 }, { "epoch": 8.200887280871775, "learning_rate": 5.389364131957163e-07, "loss": 2.8676, "step": 190400 }, { "epoch": 8.201748718611363, "learning_rate": 5.388879312202374e-07, "loss": 2.7186, "step": 190420 }, { "epoch": 8.20261015635095, "learning_rate": 5.388394492447585e-07, "loss": 2.9419, "step": 190440 }, { "epoch": 8.203471594090537, "learning_rate": 5.387909672692796e-07, "loss": 2.7684, "step": 190460 }, { "epoch": 8.204333031830124, "learning_rate": 5.387424852938007e-07, "loss": 2.6722, "step": 190480 }, { "epoch": 8.205194469569712, "learning_rate": 5.386940033183218e-07, "loss": 2.7817, "step": 190500 }, { "epoch": 8.206055907309299, "learning_rate": 5.386455213428429e-07, "loss": 2.848, "step": 190520 }, { "epoch": 8.206917345048886, "learning_rate": 5.385970393673641e-07, "loss": 2.7137, "step": 190540 }, { "epoch": 8.207778782788473, "learning_rate": 5.385485573918852e-07, "loss": 2.9375, "step": 190560 }, { "epoch": 8.20864022052806, "learning_rate": 5.385000754164062e-07, "loss": 2.8932, "step": 190580 }, { "epoch": 8.209501658267648, "learning_rate": 5.384515934409273e-07, "loss": 2.8291, "step": 190600 }, { "epoch": 8.210363096007237, "learning_rate": 5.384031114654486e-07, "loss": 2.7778, "step": 190620 }, { "epoch": 8.211224533746824, "learning_rate": 5.383546294899696e-07, "loss": 2.8801, "step": 190640 }, { "epoch": 8.212085971486411, "learning_rate": 5.383061475144907e-07, "loss": 2.789, "step": 190660 }, { "epoch": 8.212947409225999, "learning_rate": 5.382576655390118e-07, "loss": 2.7925, "step": 190680 }, { "epoch": 8.213808846965586, "learning_rate": 5.382091835635329e-07, "loss": 2.7455, "step": 190700 }, { "epoch": 8.214670284705173, "learning_rate": 5.381607015880539e-07, "loss": 2.869, "step": 190720 }, { "epoch": 8.21553172244476, "learning_rate": 5.381122196125752e-07, "loss": 2.9415, "step": 190740 }, { "epoch": 8.216393160184348, "learning_rate": 5.380637376370962e-07, "loss": 3.0069, "step": 190760 }, { "epoch": 8.217254597923935, "learning_rate": 5.380152556616173e-07, "loss": 2.9634, "step": 190780 }, { "epoch": 8.218116035663522, "learning_rate": 5.379667736861384e-07, "loss": 2.875, "step": 190800 }, { "epoch": 8.21897747340311, "learning_rate": 5.379182917106595e-07, "loss": 2.8717, "step": 190820 }, { "epoch": 8.219838911142697, "learning_rate": 5.378698097351806e-07, "loss": 2.8482, "step": 190840 }, { "epoch": 8.220700348882284, "learning_rate": 5.378213277597019e-07, "loss": 2.8019, "step": 190860 }, { "epoch": 8.221561786621871, "learning_rate": 5.37772845784223e-07, "loss": 2.7865, "step": 190880 }, { "epoch": 8.22242322436146, "learning_rate": 5.377243638087439e-07, "loss": 2.7649, "step": 190900 }, { "epoch": 8.223284662101047, "learning_rate": 5.376758818332651e-07, "loss": 2.7535, "step": 190920 }, { "epoch": 8.224146099840635, "learning_rate": 5.376273998577862e-07, "loss": 2.9174, "step": 190940 }, { "epoch": 8.225007537580222, "learning_rate": 5.375789178823072e-07, "loss": 3.0242, "step": 190960 }, { "epoch": 8.225868975319809, "learning_rate": 5.375304359068283e-07, "loss": 2.8741, "step": 190980 }, { "epoch": 8.226730413059396, "learning_rate": 5.374819539313496e-07, "loss": 2.9048, "step": 191000 }, { "epoch": 8.227591850798984, "learning_rate": 5.374334719558705e-07, "loss": 2.876, "step": 191020 }, { "epoch": 8.22845328853857, "learning_rate": 5.373849899803917e-07, "loss": 2.814, "step": 191040 }, { "epoch": 8.229314726278158, "learning_rate": 5.373365080049128e-07, "loss": 2.7669, "step": 191060 }, { "epoch": 8.230176164017745, "learning_rate": 5.372880260294339e-07, "loss": 2.9784, "step": 191080 }, { "epoch": 8.231037601757333, "learning_rate": 5.372395440539548e-07, "loss": 2.8121, "step": 191100 }, { "epoch": 8.23189903949692, "learning_rate": 5.371910620784762e-07, "loss": 2.8768, "step": 191120 }, { "epoch": 8.232760477236507, "learning_rate": 5.371425801029972e-07, "loss": 2.7928, "step": 191140 }, { "epoch": 8.233621914976094, "learning_rate": 5.370940981275184e-07, "loss": 2.7714, "step": 191160 }, { "epoch": 8.234483352715683, "learning_rate": 5.370456161520394e-07, "loss": 2.9588, "step": 191180 }, { "epoch": 8.23534479045527, "learning_rate": 5.369971341765606e-07, "loss": 2.8323, "step": 191200 }, { "epoch": 8.236206228194858, "learning_rate": 5.369486522010816e-07, "loss": 2.7282, "step": 191220 }, { "epoch": 8.237067665934445, "learning_rate": 5.369001702256029e-07, "loss": 2.8038, "step": 191240 }, { "epoch": 8.237929103674032, "learning_rate": 5.368516882501239e-07, "loss": 2.7668, "step": 191260 }, { "epoch": 8.23879054141362, "learning_rate": 5.36803206274645e-07, "loss": 2.6988, "step": 191280 }, { "epoch": 8.239651979153207, "learning_rate": 5.367547242991661e-07, "loss": 2.7061, "step": 191300 }, { "epoch": 8.240513416892794, "learning_rate": 5.367062423236873e-07, "loss": 2.7883, "step": 191320 }, { "epoch": 8.241374854632381, "learning_rate": 5.366577603482082e-07, "loss": 2.9844, "step": 191340 }, { "epoch": 8.242236292371969, "learning_rate": 5.366092783727295e-07, "loss": 3.0078, "step": 191360 }, { "epoch": 8.243097730111556, "learning_rate": 5.365607963972506e-07, "loss": 2.7691, "step": 191380 }, { "epoch": 8.243959167851143, "learning_rate": 5.365123144217716e-07, "loss": 2.9006, "step": 191400 }, { "epoch": 8.24482060559073, "learning_rate": 5.364638324462927e-07, "loss": 2.9844, "step": 191420 }, { "epoch": 8.245682043330318, "learning_rate": 5.364153504708138e-07, "loss": 2.8637, "step": 191440 }, { "epoch": 8.246543481069907, "learning_rate": 5.363668684953349e-07, "loss": 2.6688, "step": 191460 }, { "epoch": 8.247404918809494, "learning_rate": 5.363183865198559e-07, "loss": 2.852, "step": 191480 }, { "epoch": 8.248266356549081, "learning_rate": 5.362699045443772e-07, "loss": 2.9802, "step": 191500 }, { "epoch": 8.249127794288668, "learning_rate": 5.362214225688982e-07, "loss": 2.7958, "step": 191520 }, { "epoch": 8.249989232028256, "learning_rate": 5.361729405934194e-07, "loss": 3.034, "step": 191540 }, { "epoch": 8.250850669767843, "learning_rate": 5.361244586179404e-07, "loss": 2.7596, "step": 191560 }, { "epoch": 8.25171210750743, "learning_rate": 5.360759766424615e-07, "loss": 2.8898, "step": 191580 }, { "epoch": 8.252573545247017, "learning_rate": 5.360274946669826e-07, "loss": 2.8601, "step": 191600 }, { "epoch": 8.253434982986604, "learning_rate": 5.359790126915039e-07, "loss": 2.8803, "step": 191620 }, { "epoch": 8.254296420726192, "learning_rate": 5.359305307160249e-07, "loss": 2.9353, "step": 191640 }, { "epoch": 8.255157858465779, "learning_rate": 5.35882048740546e-07, "loss": 2.9471, "step": 191660 }, { "epoch": 8.256019296205366, "learning_rate": 5.358335667650671e-07, "loss": 2.8166, "step": 191680 }, { "epoch": 8.256880733944953, "learning_rate": 5.357850847895882e-07, "loss": 2.9236, "step": 191700 }, { "epoch": 8.25774217168454, "learning_rate": 5.357366028141092e-07, "loss": 2.9062, "step": 191720 }, { "epoch": 8.258603609424128, "learning_rate": 5.356881208386305e-07, "loss": 2.8066, "step": 191740 }, { "epoch": 8.259465047163717, "learning_rate": 5.356396388631516e-07, "loss": 2.7848, "step": 191760 }, { "epoch": 8.260326484903304, "learning_rate": 5.355911568876726e-07, "loss": 2.9218, "step": 191780 }, { "epoch": 8.261187922642891, "learning_rate": 5.355426749121937e-07, "loss": 2.9428, "step": 191800 }, { "epoch": 8.262049360382479, "learning_rate": 5.354941929367149e-07, "loss": 2.7686, "step": 191820 }, { "epoch": 8.262910798122066, "learning_rate": 5.354457109612359e-07, "loss": 2.9087, "step": 191840 }, { "epoch": 8.263772235861653, "learning_rate": 5.353972289857571e-07, "loss": 2.8755, "step": 191860 }, { "epoch": 8.26463367360124, "learning_rate": 5.353487470102782e-07, "loss": 2.9366, "step": 191880 }, { "epoch": 8.265495111340828, "learning_rate": 5.353002650347993e-07, "loss": 2.819, "step": 191900 }, { "epoch": 8.266356549080415, "learning_rate": 5.352517830593204e-07, "loss": 2.8892, "step": 191920 }, { "epoch": 8.267217986820002, "learning_rate": 5.352033010838415e-07, "loss": 2.8028, "step": 191940 }, { "epoch": 8.26807942455959, "learning_rate": 5.351548191083626e-07, "loss": 2.8039, "step": 191960 }, { "epoch": 8.268940862299177, "learning_rate": 5.351063371328836e-07, "loss": 2.8111, "step": 191980 }, { "epoch": 8.269802300038764, "learning_rate": 5.350578551574048e-07, "loss": 2.891, "step": 192000 }, { "epoch": 8.270663737778353, "learning_rate": 5.350093731819259e-07, "loss": 2.8603, "step": 192020 }, { "epoch": 8.27152517551794, "learning_rate": 5.34960891206447e-07, "loss": 2.8696, "step": 192040 }, { "epoch": 8.272386613257527, "learning_rate": 5.349124092309681e-07, "loss": 2.8228, "step": 192060 }, { "epoch": 8.273248050997115, "learning_rate": 5.348639272554892e-07, "loss": 2.8075, "step": 192080 }, { "epoch": 8.274109488736702, "learning_rate": 5.348154452800102e-07, "loss": 2.7268, "step": 192100 }, { "epoch": 8.27497092647629, "learning_rate": 5.347669633045315e-07, "loss": 3.0419, "step": 192120 }, { "epoch": 8.275832364215876, "learning_rate": 5.347184813290527e-07, "loss": 2.7408, "step": 192140 }, { "epoch": 8.276693801955464, "learning_rate": 5.346699993535736e-07, "loss": 2.7163, "step": 192160 }, { "epoch": 8.277555239695051, "learning_rate": 5.346215173780947e-07, "loss": 2.8054, "step": 192180 }, { "epoch": 8.278416677434638, "learning_rate": 5.345730354026159e-07, "loss": 2.7001, "step": 192200 }, { "epoch": 8.279278115174225, "learning_rate": 5.345245534271368e-07, "loss": 2.8004, "step": 192220 }, { "epoch": 8.280139552913813, "learning_rate": 5.344760714516581e-07, "loss": 2.7886, "step": 192240 }, { "epoch": 8.2810009906534, "learning_rate": 5.344275894761792e-07, "loss": 2.9304, "step": 192260 }, { "epoch": 8.281862428392987, "learning_rate": 5.343791075007003e-07, "loss": 2.924, "step": 192280 }, { "epoch": 8.282723866132574, "learning_rate": 5.343306255252213e-07, "loss": 2.789, "step": 192300 }, { "epoch": 8.283585303872163, "learning_rate": 5.342821435497425e-07, "loss": 2.8838, "step": 192320 }, { "epoch": 8.28444674161175, "learning_rate": 5.342336615742636e-07, "loss": 2.7758, "step": 192340 }, { "epoch": 8.285308179351338, "learning_rate": 5.341851795987848e-07, "loss": 2.7015, "step": 192360 }, { "epoch": 8.286169617090925, "learning_rate": 5.341366976233058e-07, "loss": 2.8744, "step": 192380 }, { "epoch": 8.287031054830512, "learning_rate": 5.340882156478269e-07, "loss": 2.919, "step": 192400 }, { "epoch": 8.2878924925701, "learning_rate": 5.34039733672348e-07, "loss": 2.9522, "step": 192420 }, { "epoch": 8.288753930309687, "learning_rate": 5.339912516968692e-07, "loss": 2.8904, "step": 192440 }, { "epoch": 8.289615368049274, "learning_rate": 5.339427697213902e-07, "loss": 2.8016, "step": 192460 }, { "epoch": 8.290476805788861, "learning_rate": 5.338942877459114e-07, "loss": 2.9182, "step": 192480 }, { "epoch": 8.291338243528449, "learning_rate": 5.338458057704325e-07, "loss": 2.7127, "step": 192500 }, { "epoch": 8.292199681268036, "learning_rate": 5.337973237949536e-07, "loss": 2.6971, "step": 192520 }, { "epoch": 8.293061119007623, "learning_rate": 5.337488418194746e-07, "loss": 2.7954, "step": 192540 }, { "epoch": 8.29392255674721, "learning_rate": 5.337003598439957e-07, "loss": 2.8545, "step": 192560 }, { "epoch": 8.2947839944868, "learning_rate": 5.336518778685169e-07, "loss": 2.7959, "step": 192580 }, { "epoch": 8.295645432226387, "learning_rate": 5.33603395893038e-07, "loss": 2.7757, "step": 192600 }, { "epoch": 8.296506869965974, "learning_rate": 5.335549139175591e-07, "loss": 2.946, "step": 192620 }, { "epoch": 8.297368307705561, "learning_rate": 5.335064319420802e-07, "loss": 2.7016, "step": 192640 }, { "epoch": 8.298229745445148, "learning_rate": 5.334579499666013e-07, "loss": 2.892, "step": 192660 }, { "epoch": 8.299091183184736, "learning_rate": 5.334094679911223e-07, "loss": 2.9614, "step": 192680 }, { "epoch": 8.299952620924323, "learning_rate": 5.333609860156435e-07, "loss": 2.7922, "step": 192700 }, { "epoch": 8.30081405866391, "learning_rate": 5.333125040401646e-07, "loss": 2.8522, "step": 192720 }, { "epoch": 8.301675496403497, "learning_rate": 5.332640220646858e-07, "loss": 2.9637, "step": 192740 }, { "epoch": 8.302536934143085, "learning_rate": 5.332155400892068e-07, "loss": 2.7615, "step": 192760 }, { "epoch": 8.303398371882672, "learning_rate": 5.331670581137279e-07, "loss": 3.0098, "step": 192780 }, { "epoch": 8.304259809622259, "learning_rate": 5.33118576138249e-07, "loss": 2.8328, "step": 192800 }, { "epoch": 8.305121247361846, "learning_rate": 5.330700941627702e-07, "loss": 2.7705, "step": 192820 }, { "epoch": 8.305982685101434, "learning_rate": 5.330216121872912e-07, "loss": 2.8824, "step": 192840 }, { "epoch": 8.30684412284102, "learning_rate": 5.329731302118124e-07, "loss": 2.8293, "step": 192860 }, { "epoch": 8.30770556058061, "learning_rate": 5.329246482363335e-07, "loss": 2.7079, "step": 192880 }, { "epoch": 8.308566998320197, "learning_rate": 5.328761662608545e-07, "loss": 2.8041, "step": 192900 }, { "epoch": 8.309428436059784, "learning_rate": 5.328276842853756e-07, "loss": 2.8667, "step": 192920 }, { "epoch": 8.310289873799372, "learning_rate": 5.327792023098968e-07, "loss": 2.7585, "step": 192940 }, { "epoch": 8.311151311538959, "learning_rate": 5.327307203344179e-07, "loss": 2.7214, "step": 192960 }, { "epoch": 8.312012749278546, "learning_rate": 5.32682238358939e-07, "loss": 2.8166, "step": 192980 }, { "epoch": 8.312874187018133, "learning_rate": 5.326337563834601e-07, "loss": 2.7908, "step": 193000 }, { "epoch": 8.31373562475772, "learning_rate": 5.325852744079812e-07, "loss": 2.8472, "step": 193020 }, { "epoch": 8.314597062497308, "learning_rate": 5.325367924325023e-07, "loss": 2.8318, "step": 193040 }, { "epoch": 8.315458500236895, "learning_rate": 5.324883104570233e-07, "loss": 2.9193, "step": 193060 }, { "epoch": 8.316319937976482, "learning_rate": 5.324398284815445e-07, "loss": 2.8778, "step": 193080 }, { "epoch": 8.31718137571607, "learning_rate": 5.323913465060656e-07, "loss": 2.9213, "step": 193100 }, { "epoch": 8.318042813455657, "learning_rate": 5.323428645305868e-07, "loss": 2.8886, "step": 193120 }, { "epoch": 8.318904251195244, "learning_rate": 5.322943825551078e-07, "loss": 2.7436, "step": 193140 }, { "epoch": 8.319765688934833, "learning_rate": 5.322459005796289e-07, "loss": 2.7981, "step": 193160 }, { "epoch": 8.32062712667442, "learning_rate": 5.3219741860415e-07, "loss": 2.8153, "step": 193180 }, { "epoch": 8.321488564414008, "learning_rate": 5.321489366286712e-07, "loss": 2.7594, "step": 193200 }, { "epoch": 8.322350002153595, "learning_rate": 5.321004546531922e-07, "loss": 2.8845, "step": 193220 }, { "epoch": 8.323211439893182, "learning_rate": 5.320519726777134e-07, "loss": 2.8591, "step": 193240 }, { "epoch": 8.32407287763277, "learning_rate": 5.320034907022345e-07, "loss": 2.7678, "step": 193260 }, { "epoch": 8.324934315372357, "learning_rate": 5.319550087267555e-07, "loss": 2.8613, "step": 193280 }, { "epoch": 8.325795753111944, "learning_rate": 5.319065267512766e-07, "loss": 2.8338, "step": 193300 }, { "epoch": 8.326657190851531, "learning_rate": 5.318580447757978e-07, "loss": 2.7499, "step": 193320 }, { "epoch": 8.327518628591118, "learning_rate": 5.318095628003189e-07, "loss": 2.7598, "step": 193340 }, { "epoch": 8.328380066330705, "learning_rate": 5.3176108082484e-07, "loss": 2.842, "step": 193360 }, { "epoch": 8.329241504070293, "learning_rate": 5.317125988493611e-07, "loss": 2.8035, "step": 193380 }, { "epoch": 8.33010294180988, "learning_rate": 5.316641168738821e-07, "loss": 2.7983, "step": 193400 }, { "epoch": 8.330964379549467, "learning_rate": 5.316156348984033e-07, "loss": 2.7298, "step": 193420 }, { "epoch": 8.331825817289056, "learning_rate": 5.315671529229244e-07, "loss": 2.8466, "step": 193440 }, { "epoch": 8.332687255028643, "learning_rate": 5.315186709474455e-07, "loss": 2.9406, "step": 193460 }, { "epoch": 8.33354869276823, "learning_rate": 5.314701889719667e-07, "loss": 2.7692, "step": 193480 }, { "epoch": 8.334410130507818, "learning_rate": 5.314217069964879e-07, "loss": 2.768, "step": 193500 }, { "epoch": 8.335271568247405, "learning_rate": 5.313732250210088e-07, "loss": 2.9539, "step": 193520 }, { "epoch": 8.336133005986992, "learning_rate": 5.313247430455299e-07, "loss": 2.8041, "step": 193540 }, { "epoch": 8.33699444372658, "learning_rate": 5.312762610700511e-07, "loss": 2.8878, "step": 193560 }, { "epoch": 8.337855881466167, "learning_rate": 5.312277790945721e-07, "loss": 3.0075, "step": 193580 }, { "epoch": 8.338717319205754, "learning_rate": 5.311792971190932e-07, "loss": 2.9022, "step": 193600 }, { "epoch": 8.339578756945341, "learning_rate": 5.311308151436144e-07, "loss": 2.7835, "step": 193620 }, { "epoch": 8.340440194684929, "learning_rate": 5.310823331681355e-07, "loss": 2.8469, "step": 193640 }, { "epoch": 8.341301632424516, "learning_rate": 5.310338511926565e-07, "loss": 2.8049, "step": 193660 }, { "epoch": 8.342163070164103, "learning_rate": 5.309853692171776e-07, "loss": 2.7877, "step": 193680 }, { "epoch": 8.34302450790369, "learning_rate": 5.309368872416988e-07, "loss": 2.8604, "step": 193700 }, { "epoch": 8.34388594564328, "learning_rate": 5.308884052662199e-07, "loss": 2.6735, "step": 193720 }, { "epoch": 8.344747383382867, "learning_rate": 5.30839923290741e-07, "loss": 2.7065, "step": 193740 }, { "epoch": 8.345608821122454, "learning_rate": 5.307914413152621e-07, "loss": 2.8099, "step": 193760 }, { "epoch": 8.346470258862041, "learning_rate": 5.307429593397832e-07, "loss": 2.8067, "step": 193780 }, { "epoch": 8.347331696601628, "learning_rate": 5.306944773643042e-07, "loss": 2.7398, "step": 193800 }, { "epoch": 8.348193134341216, "learning_rate": 5.306459953888254e-07, "loss": 2.7524, "step": 193820 }, { "epoch": 8.349054572080803, "learning_rate": 5.305975134133465e-07, "loss": 2.7024, "step": 193840 }, { "epoch": 8.34991600982039, "learning_rate": 5.305490314378677e-07, "loss": 2.9233, "step": 193860 }, { "epoch": 8.350777447559977, "learning_rate": 5.305005494623887e-07, "loss": 2.6723, "step": 193880 }, { "epoch": 8.351638885299565, "learning_rate": 5.304520674869098e-07, "loss": 2.7394, "step": 193900 }, { "epoch": 8.352500323039152, "learning_rate": 5.304035855114309e-07, "loss": 2.8478, "step": 193920 }, { "epoch": 8.35336176077874, "learning_rate": 5.303551035359521e-07, "loss": 3.0193, "step": 193940 }, { "epoch": 8.354223198518326, "learning_rate": 5.303066215604731e-07, "loss": 2.9807, "step": 193960 }, { "epoch": 8.355084636257914, "learning_rate": 5.302581395849943e-07, "loss": 2.9236, "step": 193980 }, { "epoch": 8.355946073997503, "learning_rate": 5.302096576095154e-07, "loss": 2.7848, "step": 194000 }, { "epoch": 8.35680751173709, "learning_rate": 5.301611756340365e-07, "loss": 2.8283, "step": 194020 }, { "epoch": 8.357668949476677, "learning_rate": 5.301126936585575e-07, "loss": 2.9253, "step": 194040 }, { "epoch": 8.358530387216264, "learning_rate": 5.300642116830787e-07, "loss": 2.7625, "step": 194060 }, { "epoch": 8.359391824955852, "learning_rate": 5.300157297075998e-07, "loss": 2.773, "step": 194080 }, { "epoch": 8.360253262695439, "learning_rate": 5.29967247732121e-07, "loss": 2.8944, "step": 194100 }, { "epoch": 8.361114700435026, "learning_rate": 5.29918765756642e-07, "loss": 2.9446, "step": 194120 }, { "epoch": 8.361976138174613, "learning_rate": 5.298702837811631e-07, "loss": 2.8256, "step": 194140 }, { "epoch": 8.3628375759142, "learning_rate": 5.298218018056842e-07, "loss": 2.7298, "step": 194160 }, { "epoch": 8.363699013653788, "learning_rate": 5.297733198302052e-07, "loss": 2.9087, "step": 194180 }, { "epoch": 8.364560451393375, "learning_rate": 5.297248378547264e-07, "loss": 2.9862, "step": 194200 }, { "epoch": 8.365421889132962, "learning_rate": 5.296763558792475e-07, "loss": 2.8551, "step": 194220 }, { "epoch": 8.36628332687255, "learning_rate": 5.296278739037687e-07, "loss": 2.7787, "step": 194240 }, { "epoch": 8.367144764612137, "learning_rate": 5.295793919282897e-07, "loss": 2.803, "step": 194260 }, { "epoch": 8.368006202351726, "learning_rate": 5.295309099528108e-07, "loss": 2.782, "step": 194280 }, { "epoch": 8.368867640091313, "learning_rate": 5.294824279773319e-07, "loss": 2.9948, "step": 194300 }, { "epoch": 8.3697290778309, "learning_rate": 5.294339460018531e-07, "loss": 2.7013, "step": 194320 }, { "epoch": 8.370590515570488, "learning_rate": 5.293854640263741e-07, "loss": 2.9207, "step": 194340 }, { "epoch": 8.371451953310075, "learning_rate": 5.293369820508953e-07, "loss": 2.7565, "step": 194360 }, { "epoch": 8.372313391049662, "learning_rate": 5.292885000754164e-07, "loss": 2.6697, "step": 194380 }, { "epoch": 8.37317482878925, "learning_rate": 5.292400180999375e-07, "loss": 2.8355, "step": 194400 }, { "epoch": 8.374036266528837, "learning_rate": 5.291915361244585e-07, "loss": 2.8441, "step": 194420 }, { "epoch": 8.374897704268424, "learning_rate": 5.291430541489799e-07, "loss": 2.8538, "step": 194440 }, { "epoch": 8.375759142008011, "learning_rate": 5.290945721735008e-07, "loss": 2.9368, "step": 194460 }, { "epoch": 8.376620579747598, "learning_rate": 5.29046090198022e-07, "loss": 2.7997, "step": 194480 }, { "epoch": 8.377482017487186, "learning_rate": 5.28997608222543e-07, "loss": 2.9849, "step": 194500 }, { "epoch": 8.378343455226773, "learning_rate": 5.289491262470641e-07, "loss": 2.8221, "step": 194520 }, { "epoch": 8.37920489296636, "learning_rate": 5.289006442715852e-07, "loss": 2.9149, "step": 194540 }, { "epoch": 8.380066330705949, "learning_rate": 5.288521622961064e-07, "loss": 2.8722, "step": 194560 }, { "epoch": 8.380927768445536, "learning_rate": 5.288036803206274e-07, "loss": 2.8734, "step": 194580 }, { "epoch": 8.381789206185124, "learning_rate": 5.287551983451486e-07, "loss": 2.9513, "step": 194600 }, { "epoch": 8.38265064392471, "learning_rate": 5.287067163696697e-07, "loss": 2.781, "step": 194620 }, { "epoch": 8.383512081664298, "learning_rate": 5.286582343941907e-07, "loss": 2.8479, "step": 194640 }, { "epoch": 8.384373519403885, "learning_rate": 5.286097524187117e-07, "loss": 2.7545, "step": 194660 }, { "epoch": 8.385234957143473, "learning_rate": 5.285612704432329e-07, "loss": 2.9227, "step": 194680 }, { "epoch": 8.38609639488306, "learning_rate": 5.285127884677541e-07, "loss": 2.9396, "step": 194700 }, { "epoch": 8.386957832622647, "learning_rate": 5.284643064922751e-07, "loss": 2.9198, "step": 194720 }, { "epoch": 8.387819270362234, "learning_rate": 5.284158245167963e-07, "loss": 2.8509, "step": 194740 }, { "epoch": 8.388680708101822, "learning_rate": 5.283673425413175e-07, "loss": 2.8296, "step": 194760 }, { "epoch": 8.389542145841409, "learning_rate": 5.283188605658384e-07, "loss": 2.6618, "step": 194780 }, { "epoch": 8.390403583580996, "learning_rate": 5.282703785903595e-07, "loss": 2.6981, "step": 194800 }, { "epoch": 8.391265021320583, "learning_rate": 5.282218966148808e-07, "loss": 2.9253, "step": 194820 }, { "epoch": 8.392126459060172, "learning_rate": 5.281734146394018e-07, "loss": 2.877, "step": 194840 }, { "epoch": 8.39298789679976, "learning_rate": 5.281249326639229e-07, "loss": 2.9102, "step": 194860 }, { "epoch": 8.393849334539347, "learning_rate": 5.28076450688444e-07, "loss": 2.9398, "step": 194880 }, { "epoch": 8.394710772278934, "learning_rate": 5.280279687129651e-07, "loss": 2.9861, "step": 194900 }, { "epoch": 8.395572210018521, "learning_rate": 5.279794867374862e-07, "loss": 2.749, "step": 194920 }, { "epoch": 8.396433647758109, "learning_rate": 5.279310047620074e-07, "loss": 2.909, "step": 194940 }, { "epoch": 8.397295085497696, "learning_rate": 5.278825227865284e-07, "loss": 2.7484, "step": 194960 }, { "epoch": 8.398156523237283, "learning_rate": 5.278340408110496e-07, "loss": 2.9034, "step": 194980 }, { "epoch": 8.39901796097687, "learning_rate": 5.277855588355707e-07, "loss": 2.9113, "step": 195000 }, { "epoch": 8.399879398716458, "learning_rate": 5.277370768600917e-07, "loss": 2.7193, "step": 195020 }, { "epoch": 8.400740836456045, "learning_rate": 5.276885948846128e-07, "loss": 2.8522, "step": 195040 }, { "epoch": 8.401602274195632, "learning_rate": 5.276401129091341e-07, "loss": 2.9194, "step": 195060 }, { "epoch": 8.40246371193522, "learning_rate": 5.27591630933655e-07, "loss": 2.7175, "step": 195080 }, { "epoch": 8.403325149674806, "learning_rate": 5.275431489581762e-07, "loss": 2.8063, "step": 195100 }, { "epoch": 8.404186587414394, "learning_rate": 5.274946669826973e-07, "loss": 2.7914, "step": 195120 }, { "epoch": 8.405048025153983, "learning_rate": 5.274461850072184e-07, "loss": 2.8494, "step": 195140 }, { "epoch": 8.40590946289357, "learning_rate": 5.273977030317394e-07, "loss": 2.8944, "step": 195160 }, { "epoch": 8.406770900633157, "learning_rate": 5.273492210562607e-07, "loss": 2.7095, "step": 195180 }, { "epoch": 8.407632338372744, "learning_rate": 5.273007390807818e-07, "loss": 2.6588, "step": 195200 }, { "epoch": 8.408493776112332, "learning_rate": 5.272522571053028e-07, "loss": 2.8418, "step": 195220 }, { "epoch": 8.409355213851919, "learning_rate": 5.272037751298239e-07, "loss": 2.7767, "step": 195240 }, { "epoch": 8.410216651591506, "learning_rate": 5.27155293154345e-07, "loss": 3.025, "step": 195260 }, { "epoch": 8.411078089331093, "learning_rate": 5.271068111788661e-07, "loss": 2.8623, "step": 195280 }, { "epoch": 8.41193952707068, "learning_rate": 5.270583292033872e-07, "loss": 2.9464, "step": 195300 }, { "epoch": 8.412800964810268, "learning_rate": 5.270098472279084e-07, "loss": 2.8541, "step": 195320 }, { "epoch": 8.413662402549855, "learning_rate": 5.269613652524294e-07, "loss": 2.85, "step": 195340 }, { "epoch": 8.414523840289442, "learning_rate": 5.269128832769506e-07, "loss": 2.7397, "step": 195360 }, { "epoch": 8.41538527802903, "learning_rate": 5.268644013014716e-07, "loss": 2.8529, "step": 195380 }, { "epoch": 8.416246715768619, "learning_rate": 5.268159193259927e-07, "loss": 2.8305, "step": 195400 }, { "epoch": 8.417108153508206, "learning_rate": 5.267674373505138e-07, "loss": 2.8165, "step": 195420 }, { "epoch": 8.417969591247793, "learning_rate": 5.267189553750351e-07, "loss": 2.8481, "step": 195440 }, { "epoch": 8.41883102898738, "learning_rate": 5.266704733995561e-07, "loss": 2.8084, "step": 195460 }, { "epoch": 8.419692466726968, "learning_rate": 5.266219914240772e-07, "loss": 2.856, "step": 195480 }, { "epoch": 8.420553904466555, "learning_rate": 5.265735094485983e-07, "loss": 2.731, "step": 195500 }, { "epoch": 8.421415342206142, "learning_rate": 5.265250274731195e-07, "loss": 2.7958, "step": 195520 }, { "epoch": 8.42227677994573, "learning_rate": 5.264765454976404e-07, "loss": 2.7509, "step": 195540 }, { "epoch": 8.423138217685317, "learning_rate": 5.264280635221617e-07, "loss": 2.9749, "step": 195560 }, { "epoch": 8.423999655424904, "learning_rate": 5.263795815466828e-07, "loss": 2.8651, "step": 195580 }, { "epoch": 8.424861093164491, "learning_rate": 5.263310995712039e-07, "loss": 2.8275, "step": 195600 }, { "epoch": 8.425722530904078, "learning_rate": 5.262826175957249e-07, "loss": 2.9179, "step": 195620 }, { "epoch": 8.426583968643666, "learning_rate": 5.262341356202461e-07, "loss": 2.9458, "step": 195640 }, { "epoch": 8.427445406383253, "learning_rate": 5.261856536447671e-07, "loss": 2.9064, "step": 195660 }, { "epoch": 8.42830684412284, "learning_rate": 5.261371716692884e-07, "loss": 2.9557, "step": 195680 }, { "epoch": 8.42916828186243, "learning_rate": 5.260886896938093e-07, "loss": 2.8975, "step": 195700 }, { "epoch": 8.430029719602016, "learning_rate": 5.260402077183305e-07, "loss": 2.7593, "step": 195720 }, { "epoch": 8.430891157341604, "learning_rate": 5.259917257428516e-07, "loss": 2.753, "step": 195740 }, { "epoch": 8.431752595081191, "learning_rate": 5.259432437673726e-07, "loss": 2.7863, "step": 195760 }, { "epoch": 8.432614032820778, "learning_rate": 5.258947617918937e-07, "loss": 2.7199, "step": 195780 }, { "epoch": 8.433475470560365, "learning_rate": 5.258462798164148e-07, "loss": 2.8413, "step": 195800 }, { "epoch": 8.434336908299953, "learning_rate": 5.257977978409361e-07, "loss": 2.7761, "step": 195820 }, { "epoch": 8.43519834603954, "learning_rate": 5.257493158654571e-07, "loss": 2.9352, "step": 195840 }, { "epoch": 8.436059783779127, "learning_rate": 5.257008338899782e-07, "loss": 2.78, "step": 195860 }, { "epoch": 8.436921221518714, "learning_rate": 5.256523519144993e-07, "loss": 2.7551, "step": 195880 }, { "epoch": 8.437782659258302, "learning_rate": 5.256038699390205e-07, "loss": 3.0006, "step": 195900 }, { "epoch": 8.438644096997889, "learning_rate": 5.255553879635414e-07, "loss": 2.7652, "step": 195920 }, { "epoch": 8.439505534737476, "learning_rate": 5.255069059880627e-07, "loss": 2.9959, "step": 195940 }, { "epoch": 8.440366972477065, "learning_rate": 5.254584240125838e-07, "loss": 2.7546, "step": 195960 }, { "epoch": 8.441228410216652, "learning_rate": 5.254099420371049e-07, "loss": 2.6923, "step": 195980 }, { "epoch": 8.44208984795624, "learning_rate": 5.253614600616259e-07, "loss": 2.9054, "step": 196000 }, { "epoch": 8.442951285695827, "learning_rate": 5.253129780861471e-07, "loss": 2.8743, "step": 196020 }, { "epoch": 8.443812723435414, "learning_rate": 5.252644961106681e-07, "loss": 2.8211, "step": 196040 }, { "epoch": 8.444674161175001, "learning_rate": 5.252160141351894e-07, "loss": 2.8662, "step": 196060 }, { "epoch": 8.445535598914589, "learning_rate": 5.251675321597104e-07, "loss": 2.7426, "step": 196080 }, { "epoch": 8.446397036654176, "learning_rate": 5.251190501842315e-07, "loss": 2.6689, "step": 196100 }, { "epoch": 8.447258474393763, "learning_rate": 5.250705682087526e-07, "loss": 2.6515, "step": 196120 }, { "epoch": 8.44811991213335, "learning_rate": 5.250220862332737e-07, "loss": 2.8461, "step": 196140 }, { "epoch": 8.448981349872938, "learning_rate": 5.249736042577948e-07, "loss": 2.8565, "step": 196160 }, { "epoch": 8.449842787612525, "learning_rate": 5.24925122282316e-07, "loss": 2.9057, "step": 196180 }, { "epoch": 8.450704225352112, "learning_rate": 5.248766403068371e-07, "loss": 2.9683, "step": 196200 }, { "epoch": 8.4515656630917, "learning_rate": 5.248281583313581e-07, "loss": 2.8998, "step": 196220 }, { "epoch": 8.452427100831287, "learning_rate": 5.247796763558792e-07, "loss": 2.8961, "step": 196240 }, { "epoch": 8.453288538570876, "learning_rate": 5.247311943804004e-07, "loss": 2.6497, "step": 196260 }, { "epoch": 8.454149976310463, "learning_rate": 5.246827124049215e-07, "loss": 2.8473, "step": 196280 }, { "epoch": 8.45501141405005, "learning_rate": 5.246342304294424e-07, "loss": 2.785, "step": 196300 }, { "epoch": 8.455872851789637, "learning_rate": 5.245857484539637e-07, "loss": 2.7366, "step": 196320 }, { "epoch": 8.456734289529225, "learning_rate": 5.245372664784848e-07, "loss": 2.977, "step": 196340 }, { "epoch": 8.457595727268812, "learning_rate": 5.244887845030058e-07, "loss": 2.8296, "step": 196360 }, { "epoch": 8.458457165008399, "learning_rate": 5.244403025275269e-07, "loss": 2.9757, "step": 196380 }, { "epoch": 8.459318602747986, "learning_rate": 5.243918205520481e-07, "loss": 2.8488, "step": 196400 }, { "epoch": 8.460180040487574, "learning_rate": 5.243433385765691e-07, "loss": 2.7902, "step": 196420 }, { "epoch": 8.46104147822716, "learning_rate": 5.242948566010903e-07, "loss": 2.8993, "step": 196440 }, { "epoch": 8.461902915966748, "learning_rate": 5.242463746256114e-07, "loss": 2.8119, "step": 196460 }, { "epoch": 8.462764353706335, "learning_rate": 5.241978926501325e-07, "loss": 2.8009, "step": 196480 }, { "epoch": 8.463625791445923, "learning_rate": 5.241494106746536e-07, "loss": 2.8596, "step": 196500 }, { "epoch": 8.46448722918551, "learning_rate": 5.241009286991747e-07, "loss": 2.8722, "step": 196520 }, { "epoch": 8.465348666925099, "learning_rate": 5.240524467236959e-07, "loss": 2.8623, "step": 196540 }, { "epoch": 8.466210104664686, "learning_rate": 5.24003964748217e-07, "loss": 2.8401, "step": 196560 }, { "epoch": 8.467071542404273, "learning_rate": 5.239554827727381e-07, "loss": 2.7958, "step": 196580 }, { "epoch": 8.46793298014386, "learning_rate": 5.239070007972591e-07, "loss": 2.9506, "step": 196600 }, { "epoch": 8.468794417883448, "learning_rate": 5.238585188217802e-07, "loss": 2.774, "step": 196620 }, { "epoch": 8.469655855623035, "learning_rate": 5.238100368463014e-07, "loss": 2.7399, "step": 196640 }, { "epoch": 8.470517293362622, "learning_rate": 5.237615548708224e-07, "loss": 2.683, "step": 196660 }, { "epoch": 8.47137873110221, "learning_rate": 5.237130728953436e-07, "loss": 2.8936, "step": 196680 }, { "epoch": 8.472240168841797, "learning_rate": 5.236645909198647e-07, "loss": 2.9256, "step": 196700 }, { "epoch": 8.473101606581384, "learning_rate": 5.236161089443858e-07, "loss": 2.7536, "step": 196720 }, { "epoch": 8.473963044320971, "learning_rate": 5.235676269689068e-07, "loss": 2.8408, "step": 196740 }, { "epoch": 8.474824482060558, "learning_rate": 5.23519144993428e-07, "loss": 2.9324, "step": 196760 }, { "epoch": 8.475685919800146, "learning_rate": 5.234706630179491e-07, "loss": 2.9602, "step": 196780 }, { "epoch": 8.476547357539733, "learning_rate": 5.234221810424703e-07, "loss": 2.7435, "step": 196800 }, { "epoch": 8.477408795279322, "learning_rate": 5.233736990669913e-07, "loss": 2.8988, "step": 196820 }, { "epoch": 8.47827023301891, "learning_rate": 5.233252170915124e-07, "loss": 2.8003, "step": 196840 }, { "epoch": 8.479131670758496, "learning_rate": 5.232767351160335e-07, "loss": 2.8911, "step": 196860 }, { "epoch": 8.479993108498084, "learning_rate": 5.232282531405546e-07, "loss": 2.9594, "step": 196880 }, { "epoch": 8.480854546237671, "learning_rate": 5.231797711650757e-07, "loss": 2.8645, "step": 196900 }, { "epoch": 8.481715983977258, "learning_rate": 5.231312891895968e-07, "loss": 2.6689, "step": 196920 }, { "epoch": 8.482577421716845, "learning_rate": 5.23082807214118e-07, "loss": 2.8583, "step": 196940 }, { "epoch": 8.483438859456433, "learning_rate": 5.230343252386389e-07, "loss": 2.9744, "step": 196960 }, { "epoch": 8.48430029719602, "learning_rate": 5.229858432631601e-07, "loss": 2.8115, "step": 196980 }, { "epoch": 8.485161734935607, "learning_rate": 5.229373612876812e-07, "loss": 2.7695, "step": 197000 }, { "epoch": 8.486023172675194, "learning_rate": 5.228888793122024e-07, "loss": 2.9598, "step": 197020 }, { "epoch": 8.486884610414782, "learning_rate": 5.228403973367234e-07, "loss": 2.7315, "step": 197040 }, { "epoch": 8.487746048154369, "learning_rate": 5.227919153612447e-07, "loss": 2.7569, "step": 197060 }, { "epoch": 8.488607485893956, "learning_rate": 5.227434333857657e-07, "loss": 3.068, "step": 197080 }, { "epoch": 8.489468923633545, "learning_rate": 5.226949514102868e-07, "loss": 2.8164, "step": 197100 }, { "epoch": 8.490330361373132, "learning_rate": 5.226464694348078e-07, "loss": 2.8301, "step": 197120 }, { "epoch": 8.49119179911272, "learning_rate": 5.22597987459329e-07, "loss": 2.8292, "step": 197140 }, { "epoch": 8.492053236852307, "learning_rate": 5.225495054838501e-07, "loss": 2.9988, "step": 197160 }, { "epoch": 8.492914674591894, "learning_rate": 5.225010235083713e-07, "loss": 2.9165, "step": 197180 }, { "epoch": 8.493776112331481, "learning_rate": 5.224525415328923e-07, "loss": 2.8576, "step": 197200 }, { "epoch": 8.494637550071069, "learning_rate": 5.224040595574134e-07, "loss": 2.9873, "step": 197220 }, { "epoch": 8.495498987810656, "learning_rate": 5.223555775819345e-07, "loss": 2.7417, "step": 197240 }, { "epoch": 8.496360425550243, "learning_rate": 5.223070956064557e-07, "loss": 2.9705, "step": 197260 }, { "epoch": 8.49722186328983, "learning_rate": 5.222586136309767e-07, "loss": 2.7863, "step": 197280 }, { "epoch": 8.498083301029418, "learning_rate": 5.222101316554979e-07, "loss": 2.8756, "step": 197300 }, { "epoch": 8.498944738769005, "learning_rate": 5.22161649680019e-07, "loss": 2.7553, "step": 197320 }, { "epoch": 8.499806176508592, "learning_rate": 5.2211316770454e-07, "loss": 2.7722, "step": 197340 }, { "epoch": 8.50066761424818, "learning_rate": 5.220646857290611e-07, "loss": 2.8705, "step": 197360 }, { "epoch": 8.501529051987767, "learning_rate": 5.220162037535823e-07, "loss": 2.729, "step": 197380 }, { "epoch": 8.502390489727356, "learning_rate": 5.219677217781034e-07, "loss": 3.0501, "step": 197400 }, { "epoch": 8.503251927466943, "learning_rate": 5.219192398026244e-07, "loss": 2.8888, "step": 197420 }, { "epoch": 8.50411336520653, "learning_rate": 5.218707578271456e-07, "loss": 2.9031, "step": 197440 }, { "epoch": 8.504974802946117, "learning_rate": 5.218222758516667e-07, "loss": 2.8254, "step": 197460 }, { "epoch": 8.505836240685705, "learning_rate": 5.217737938761878e-07, "loss": 2.8542, "step": 197480 }, { "epoch": 8.506697678425292, "learning_rate": 5.217253119007088e-07, "loss": 2.9229, "step": 197500 }, { "epoch": 8.50755911616488, "learning_rate": 5.2167682992523e-07, "loss": 2.8644, "step": 197520 }, { "epoch": 8.508420553904466, "learning_rate": 5.216283479497511e-07, "loss": 2.7827, "step": 197540 }, { "epoch": 8.509281991644054, "learning_rate": 5.215798659742723e-07, "loss": 2.8719, "step": 197560 }, { "epoch": 8.51014342938364, "learning_rate": 5.215313839987933e-07, "loss": 2.788, "step": 197580 }, { "epoch": 8.511004867123228, "learning_rate": 5.214829020233144e-07, "loss": 2.8262, "step": 197600 }, { "epoch": 8.511866304862815, "learning_rate": 5.214344200478355e-07, "loss": 2.8725, "step": 197620 }, { "epoch": 8.512727742602403, "learning_rate": 5.213859380723566e-07, "loss": 2.9363, "step": 197640 }, { "epoch": 8.513589180341992, "learning_rate": 5.213374560968777e-07, "loss": 2.8855, "step": 197660 }, { "epoch": 8.514450618081579, "learning_rate": 5.212889741213989e-07, "loss": 2.8554, "step": 197680 }, { "epoch": 8.515312055821166, "learning_rate": 5.2124049214592e-07, "loss": 2.9091, "step": 197700 }, { "epoch": 8.516173493560753, "learning_rate": 5.21192010170441e-07, "loss": 2.7772, "step": 197720 }, { "epoch": 8.51703493130034, "learning_rate": 5.211435281949621e-07, "loss": 2.8151, "step": 197740 }, { "epoch": 8.517896369039928, "learning_rate": 5.210950462194833e-07, "loss": 2.7355, "step": 197760 }, { "epoch": 8.518757806779515, "learning_rate": 5.210465642440044e-07, "loss": 2.7931, "step": 197780 }, { "epoch": 8.519619244519102, "learning_rate": 5.209980822685255e-07, "loss": 2.8071, "step": 197800 }, { "epoch": 8.52048068225869, "learning_rate": 5.209496002930466e-07, "loss": 2.8792, "step": 197820 }, { "epoch": 8.521342119998277, "learning_rate": 5.209011183175677e-07, "loss": 2.8135, "step": 197840 }, { "epoch": 8.522203557737864, "learning_rate": 5.208526363420887e-07, "loss": 2.8223, "step": 197860 }, { "epoch": 8.523064995477451, "learning_rate": 5.208041543666099e-07, "loss": 2.6526, "step": 197880 }, { "epoch": 8.523926433217039, "learning_rate": 5.20755672391131e-07, "loss": 2.8189, "step": 197900 }, { "epoch": 8.524787870956626, "learning_rate": 5.207071904156521e-07, "loss": 2.9899, "step": 197920 }, { "epoch": 8.525649308696213, "learning_rate": 5.206587084401732e-07, "loss": 2.8135, "step": 197940 }, { "epoch": 8.526510746435802, "learning_rate": 5.206102264646943e-07, "loss": 2.7957, "step": 197960 }, { "epoch": 8.52737218417539, "learning_rate": 5.205617444892154e-07, "loss": 2.7763, "step": 197980 }, { "epoch": 8.528233621914977, "learning_rate": 5.205132625137365e-07, "loss": 2.7666, "step": 198000 }, { "epoch": 8.529095059654564, "learning_rate": 5.204647805382576e-07, "loss": 2.6702, "step": 198020 }, { "epoch": 8.529956497394151, "learning_rate": 5.204162985627787e-07, "loss": 2.8805, "step": 198040 }, { "epoch": 8.530817935133738, "learning_rate": 5.203678165872999e-07, "loss": 2.7985, "step": 198060 }, { "epoch": 8.531679372873326, "learning_rate": 5.20319334611821e-07, "loss": 2.7036, "step": 198080 }, { "epoch": 8.532540810612913, "learning_rate": 5.20270852636342e-07, "loss": 2.805, "step": 198100 }, { "epoch": 8.5334022483525, "learning_rate": 5.202223706608631e-07, "loss": 2.6698, "step": 198120 }, { "epoch": 8.534263686092087, "learning_rate": 5.201738886853843e-07, "loss": 2.8566, "step": 198140 }, { "epoch": 8.535125123831675, "learning_rate": 5.201254067099054e-07, "loss": 2.6772, "step": 198160 }, { "epoch": 8.535986561571262, "learning_rate": 5.200769247344265e-07, "loss": 2.746, "step": 198180 }, { "epoch": 8.536847999310849, "learning_rate": 5.200284427589476e-07, "loss": 2.8138, "step": 198200 }, { "epoch": 8.537709437050438, "learning_rate": 5.199799607834686e-07, "loss": 2.818, "step": 198220 }, { "epoch": 8.538570874790025, "learning_rate": 5.199314788079897e-07, "loss": 2.8835, "step": 198240 }, { "epoch": 8.539432312529613, "learning_rate": 5.198829968325109e-07, "loss": 2.7283, "step": 198260 }, { "epoch": 8.5402937502692, "learning_rate": 5.19834514857032e-07, "loss": 2.6718, "step": 198280 }, { "epoch": 8.541155188008787, "learning_rate": 5.197860328815532e-07, "loss": 2.7283, "step": 198300 }, { "epoch": 8.542016625748374, "learning_rate": 5.197375509060743e-07, "loss": 2.8658, "step": 198320 }, { "epoch": 8.542878063487962, "learning_rate": 5.196890689305953e-07, "loss": 2.8539, "step": 198340 }, { "epoch": 8.543739501227549, "learning_rate": 5.196405869551164e-07, "loss": 2.8443, "step": 198360 }, { "epoch": 8.544600938967136, "learning_rate": 5.195921049796377e-07, "loss": 2.8931, "step": 198380 }, { "epoch": 8.545462376706723, "learning_rate": 5.195436230041586e-07, "loss": 2.8324, "step": 198400 }, { "epoch": 8.54632381444631, "learning_rate": 5.194951410286798e-07, "loss": 2.9121, "step": 198420 }, { "epoch": 8.547185252185898, "learning_rate": 5.194466590532009e-07, "loss": 2.9026, "step": 198440 }, { "epoch": 8.548046689925485, "learning_rate": 5.19398177077722e-07, "loss": 2.8467, "step": 198460 }, { "epoch": 8.548908127665072, "learning_rate": 5.19349695102243e-07, "loss": 2.9879, "step": 198480 }, { "epoch": 8.54976956540466, "learning_rate": 5.193012131267641e-07, "loss": 2.7975, "step": 198500 }, { "epoch": 8.550631003144249, "learning_rate": 5.192527311512853e-07, "loss": 2.9268, "step": 198520 }, { "epoch": 8.551492440883836, "learning_rate": 5.192042491758063e-07, "loss": 2.8762, "step": 198540 }, { "epoch": 8.552353878623423, "learning_rate": 5.191557672003275e-07, "loss": 2.7135, "step": 198560 }, { "epoch": 8.55321531636301, "learning_rate": 5.191072852248486e-07, "loss": 2.986, "step": 198580 }, { "epoch": 8.554076754102597, "learning_rate": 5.190588032493697e-07, "loss": 2.8043, "step": 198600 }, { "epoch": 8.554938191842185, "learning_rate": 5.190103212738907e-07, "loss": 2.7518, "step": 198620 }, { "epoch": 8.555799629581772, "learning_rate": 5.18961839298412e-07, "loss": 2.7733, "step": 198640 }, { "epoch": 8.55666106732136, "learning_rate": 5.18913357322933e-07, "loss": 2.8381, "step": 198660 }, { "epoch": 8.557522505060946, "learning_rate": 5.188648753474542e-07, "loss": 2.9077, "step": 198680 }, { "epoch": 8.558383942800534, "learning_rate": 5.188163933719752e-07, "loss": 3.0503, "step": 198700 }, { "epoch": 8.559245380540121, "learning_rate": 5.187679113964963e-07, "loss": 2.7644, "step": 198720 }, { "epoch": 8.560106818279708, "learning_rate": 5.187194294210174e-07, "loss": 2.8158, "step": 198740 }, { "epoch": 8.560968256019295, "learning_rate": 5.186709474455387e-07, "loss": 2.797, "step": 198760 }, { "epoch": 8.561829693758884, "learning_rate": 5.186224654700596e-07, "loss": 2.8478, "step": 198780 }, { "epoch": 8.562691131498472, "learning_rate": 5.185739834945808e-07, "loss": 2.8117, "step": 198800 }, { "epoch": 8.563552569238059, "learning_rate": 5.185255015191019e-07, "loss": 2.8671, "step": 198820 }, { "epoch": 8.564414006977646, "learning_rate": 5.184770195436229e-07, "loss": 3.0008, "step": 198840 }, { "epoch": 8.565275444717233, "learning_rate": 5.18428537568144e-07, "loss": 2.8989, "step": 198860 }, { "epoch": 8.56613688245682, "learning_rate": 5.183800555926653e-07, "loss": 2.8234, "step": 198880 }, { "epoch": 8.566998320196408, "learning_rate": 5.183315736171863e-07, "loss": 2.8194, "step": 198900 }, { "epoch": 8.567859757935995, "learning_rate": 5.182830916417074e-07, "loss": 2.7937, "step": 198920 }, { "epoch": 8.568721195675582, "learning_rate": 5.182346096662285e-07, "loss": 2.7389, "step": 198940 }, { "epoch": 8.56958263341517, "learning_rate": 5.181861276907496e-07, "loss": 2.7374, "step": 198960 }, { "epoch": 8.570444071154757, "learning_rate": 5.181376457152707e-07, "loss": 2.7319, "step": 198980 }, { "epoch": 8.571305508894344, "learning_rate": 5.180891637397917e-07, "loss": 2.9284, "step": 199000 }, { "epoch": 8.572166946633931, "learning_rate": 5.18040681764313e-07, "loss": 2.8322, "step": 199020 }, { "epoch": 8.573028384373519, "learning_rate": 5.17992199788834e-07, "loss": 2.6775, "step": 199040 }, { "epoch": 8.573889822113106, "learning_rate": 5.179437178133552e-07, "loss": 2.8017, "step": 199060 }, { "epoch": 8.574751259852695, "learning_rate": 5.178952358378762e-07, "loss": 2.8476, "step": 199080 }, { "epoch": 8.575612697592282, "learning_rate": 5.178467538623973e-07, "loss": 2.7949, "step": 199100 }, { "epoch": 8.57647413533187, "learning_rate": 5.177982718869184e-07, "loss": 2.6909, "step": 199120 }, { "epoch": 8.577335573071457, "learning_rate": 5.177497899114397e-07, "loss": 2.8837, "step": 199140 }, { "epoch": 8.578197010811044, "learning_rate": 5.177013079359607e-07, "loss": 2.9126, "step": 199160 }, { "epoch": 8.579058448550631, "learning_rate": 5.176528259604818e-07, "loss": 2.7491, "step": 199180 }, { "epoch": 8.579919886290218, "learning_rate": 5.176043439850029e-07, "loss": 2.8154, "step": 199200 }, { "epoch": 8.580781324029806, "learning_rate": 5.175558620095239e-07, "loss": 2.8235, "step": 199220 }, { "epoch": 8.581642761769393, "learning_rate": 5.17507380034045e-07, "loss": 2.9001, "step": 199240 }, { "epoch": 8.58250419950898, "learning_rate": 5.174588980585662e-07, "loss": 2.8626, "step": 199260 }, { "epoch": 8.583365637248567, "learning_rate": 5.174104160830873e-07, "loss": 2.9643, "step": 199280 }, { "epoch": 8.584227074988155, "learning_rate": 5.173619341076084e-07, "loss": 2.7842, "step": 199300 }, { "epoch": 8.585088512727742, "learning_rate": 5.173134521321295e-07, "loss": 2.7479, "step": 199320 }, { "epoch": 8.585949950467331, "learning_rate": 5.172649701566506e-07, "loss": 3.076, "step": 199340 }, { "epoch": 8.586811388206918, "learning_rate": 5.172164881811717e-07, "loss": 2.7899, "step": 199360 }, { "epoch": 8.587672825946505, "learning_rate": 5.171680062056929e-07, "loss": 2.7569, "step": 199380 }, { "epoch": 8.588534263686093, "learning_rate": 5.17119524230214e-07, "loss": 2.8048, "step": 199400 }, { "epoch": 8.58939570142568, "learning_rate": 5.170710422547351e-07, "loss": 2.7998, "step": 199420 }, { "epoch": 8.590257139165267, "learning_rate": 5.170225602792562e-07, "loss": 2.8093, "step": 199440 }, { "epoch": 8.591118576904854, "learning_rate": 5.169740783037773e-07, "loss": 2.8356, "step": 199460 }, { "epoch": 8.591980014644442, "learning_rate": 5.169255963282983e-07, "loss": 2.8164, "step": 199480 }, { "epoch": 8.592841452384029, "learning_rate": 5.168771143528196e-07, "loss": 2.7407, "step": 199500 }, { "epoch": 8.593702890123616, "learning_rate": 5.168286323773406e-07, "loss": 2.831, "step": 199520 }, { "epoch": 8.594564327863203, "learning_rate": 5.167801504018616e-07, "loss": 2.8033, "step": 199540 }, { "epoch": 8.59542576560279, "learning_rate": 5.167316684263828e-07, "loss": 2.9391, "step": 199560 }, { "epoch": 8.596287203342378, "learning_rate": 5.166831864509038e-07, "loss": 3.0003, "step": 199580 }, { "epoch": 8.597148641081965, "learning_rate": 5.166347044754249e-07, "loss": 2.8427, "step": 199600 }, { "epoch": 8.598010078821552, "learning_rate": 5.16586222499946e-07, "loss": 2.8831, "step": 199620 }, { "epoch": 8.598871516561141, "learning_rate": 5.165377405244673e-07, "loss": 2.8887, "step": 199640 }, { "epoch": 8.599732954300729, "learning_rate": 5.164892585489884e-07, "loss": 2.7851, "step": 199660 }, { "epoch": 8.600594392040316, "learning_rate": 5.164407765735094e-07, "loss": 2.8764, "step": 199680 }, { "epoch": 8.601455829779903, "learning_rate": 5.163922945980305e-07, "loss": 2.9, "step": 199700 }, { "epoch": 8.60231726751949, "learning_rate": 5.163438126225517e-07, "loss": 2.8338, "step": 199720 }, { "epoch": 8.603178705259078, "learning_rate": 5.162953306470726e-07, "loss": 2.6307, "step": 199740 }, { "epoch": 8.604040142998665, "learning_rate": 5.162468486715939e-07, "loss": 2.8472, "step": 199760 }, { "epoch": 8.604901580738252, "learning_rate": 5.16198366696115e-07, "loss": 2.9391, "step": 199780 }, { "epoch": 8.60576301847784, "learning_rate": 5.161498847206361e-07, "loss": 2.8241, "step": 199800 }, { "epoch": 8.606624456217427, "learning_rate": 5.161014027451571e-07, "loss": 2.7818, "step": 199820 }, { "epoch": 8.607485893957014, "learning_rate": 5.160529207696783e-07, "loss": 2.8876, "step": 199840 }, { "epoch": 8.608347331696601, "learning_rate": 5.160044387941993e-07, "loss": 2.9382, "step": 199860 }, { "epoch": 8.609208769436188, "learning_rate": 5.159559568187206e-07, "loss": 2.8564, "step": 199880 }, { "epoch": 8.610070207175777, "learning_rate": 5.159074748432416e-07, "loss": 2.7276, "step": 199900 }, { "epoch": 8.610931644915365, "learning_rate": 5.158589928677627e-07, "loss": 2.7382, "step": 199920 }, { "epoch": 8.611793082654952, "learning_rate": 5.158105108922838e-07, "loss": 2.8303, "step": 199940 }, { "epoch": 8.612654520394539, "learning_rate": 5.15762028916805e-07, "loss": 2.8814, "step": 199960 }, { "epoch": 8.613515958134126, "learning_rate": 5.157135469413259e-07, "loss": 2.7987, "step": 199980 }, { "epoch": 8.614377395873714, "learning_rate": 5.156650649658472e-07, "loss": 2.7933, "step": 200000 }, { "epoch": 8.6152388336133, "learning_rate": 5.156165829903683e-07, "loss": 2.8389, "step": 200020 }, { "epoch": 8.616100271352888, "learning_rate": 5.155681010148894e-07, "loss": 2.9539, "step": 200040 }, { "epoch": 8.616961709092475, "learning_rate": 5.155196190394104e-07, "loss": 2.7235, "step": 200060 }, { "epoch": 8.617823146832063, "learning_rate": 5.154711370639315e-07, "loss": 2.8923, "step": 200080 }, { "epoch": 8.61868458457165, "learning_rate": 5.154226550884528e-07, "loss": 2.8319, "step": 200100 }, { "epoch": 8.619546022311237, "learning_rate": 5.153741731129736e-07, "loss": 2.873, "step": 200120 }, { "epoch": 8.620407460050824, "learning_rate": 5.153256911374949e-07, "loss": 2.6924, "step": 200140 }, { "epoch": 8.621268897790412, "learning_rate": 5.15277209162016e-07, "loss": 2.8031, "step": 200160 }, { "epoch": 8.622130335529999, "learning_rate": 5.152287271865371e-07, "loss": 2.7695, "step": 200180 }, { "epoch": 8.622991773269588, "learning_rate": 5.151802452110581e-07, "loss": 2.8411, "step": 200200 }, { "epoch": 8.623853211009175, "learning_rate": 5.151317632355793e-07, "loss": 2.9199, "step": 200220 }, { "epoch": 8.624714648748762, "learning_rate": 5.150832812601003e-07, "loss": 2.8566, "step": 200240 }, { "epoch": 8.62557608648835, "learning_rate": 5.150347992846216e-07, "loss": 2.8795, "step": 200260 }, { "epoch": 8.626437524227937, "learning_rate": 5.149863173091426e-07, "loss": 2.7133, "step": 200280 }, { "epoch": 8.627298961967524, "learning_rate": 5.149378353336637e-07, "loss": 2.9274, "step": 200300 }, { "epoch": 8.628160399707111, "learning_rate": 5.148893533581848e-07, "loss": 2.8877, "step": 200320 }, { "epoch": 8.629021837446698, "learning_rate": 5.14840871382706e-07, "loss": 2.9399, "step": 200340 }, { "epoch": 8.629883275186286, "learning_rate": 5.14792389407227e-07, "loss": 2.7561, "step": 200360 }, { "epoch": 8.630744712925873, "learning_rate": 5.147439074317482e-07, "loss": 2.896, "step": 200380 }, { "epoch": 8.63160615066546, "learning_rate": 5.146954254562693e-07, "loss": 2.8474, "step": 200400 }, { "epoch": 8.632467588405047, "learning_rate": 5.146469434807903e-07, "loss": 2.7614, "step": 200420 }, { "epoch": 8.633329026144635, "learning_rate": 5.145984615053114e-07, "loss": 2.8439, "step": 200440 }, { "epoch": 8.634190463884222, "learning_rate": 5.145499795298326e-07, "loss": 2.8169, "step": 200460 }, { "epoch": 8.635051901623811, "learning_rate": 5.145014975543537e-07, "loss": 2.8103, "step": 200480 }, { "epoch": 8.635913339363398, "learning_rate": 5.144530155788748e-07, "loss": 2.8019, "step": 200500 }, { "epoch": 8.636774777102985, "learning_rate": 5.144045336033958e-07, "loss": 2.893, "step": 200520 }, { "epoch": 8.637636214842573, "learning_rate": 5.14356051627917e-07, "loss": 2.7962, "step": 200540 }, { "epoch": 8.63849765258216, "learning_rate": 5.143075696524381e-07, "loss": 2.8821, "step": 200560 }, { "epoch": 8.639359090321747, "learning_rate": 5.142590876769592e-07, "loss": 2.9318, "step": 200580 }, { "epoch": 8.640220528061334, "learning_rate": 5.142106057014803e-07, "loss": 2.8569, "step": 200600 }, { "epoch": 8.641081965800922, "learning_rate": 5.141621237260013e-07, "loss": 2.9505, "step": 200620 }, { "epoch": 8.641943403540509, "learning_rate": 5.141136417505226e-07, "loss": 2.7503, "step": 200640 }, { "epoch": 8.642804841280096, "learning_rate": 5.140651597750436e-07, "loss": 2.928, "step": 200660 }, { "epoch": 8.643666279019683, "learning_rate": 5.140166777995647e-07, "loss": 2.778, "step": 200680 }, { "epoch": 8.64452771675927, "learning_rate": 5.139681958240858e-07, "loss": 2.8373, "step": 200700 }, { "epoch": 8.645389154498858, "learning_rate": 5.13919713848607e-07, "loss": 2.7228, "step": 200720 }, { "epoch": 8.646250592238445, "learning_rate": 5.13871231873128e-07, "loss": 2.6668, "step": 200740 }, { "epoch": 8.647112029978032, "learning_rate": 5.138227498976492e-07, "loss": 2.7417, "step": 200760 }, { "epoch": 8.647973467717621, "learning_rate": 5.137742679221703e-07, "loss": 2.7532, "step": 200780 }, { "epoch": 8.648834905457209, "learning_rate": 5.137257859466913e-07, "loss": 2.8305, "step": 200800 }, { "epoch": 8.649696343196796, "learning_rate": 5.136773039712124e-07, "loss": 2.8124, "step": 200820 }, { "epoch": 8.650557780936383, "learning_rate": 5.136288219957336e-07, "loss": 2.6388, "step": 200840 }, { "epoch": 8.65141921867597, "learning_rate": 5.135803400202547e-07, "loss": 2.8524, "step": 200860 }, { "epoch": 8.652280656415558, "learning_rate": 5.135318580447758e-07, "loss": 2.7188, "step": 200880 }, { "epoch": 8.653142094155145, "learning_rate": 5.134833760692969e-07, "loss": 2.804, "step": 200900 }, { "epoch": 8.654003531894732, "learning_rate": 5.13434894093818e-07, "loss": 2.7377, "step": 200920 }, { "epoch": 8.65486496963432, "learning_rate": 5.133864121183392e-07, "loss": 2.8644, "step": 200940 }, { "epoch": 8.655726407373907, "learning_rate": 5.133379301428602e-07, "loss": 2.9126, "step": 200960 }, { "epoch": 8.656587845113494, "learning_rate": 5.132894481673813e-07, "loss": 2.8072, "step": 200980 }, { "epoch": 8.657449282853081, "learning_rate": 5.132409661919025e-07, "loss": 2.694, "step": 201000 }, { "epoch": 8.658310720592668, "learning_rate": 5.131924842164236e-07, "loss": 2.9598, "step": 201020 }, { "epoch": 8.659172158332257, "learning_rate": 5.131440022409446e-07, "loss": 2.8682, "step": 201040 }, { "epoch": 8.660033596071845, "learning_rate": 5.130955202654657e-07, "loss": 2.8072, "step": 201060 }, { "epoch": 8.660895033811432, "learning_rate": 5.130470382899869e-07, "loss": 2.9851, "step": 201080 }, { "epoch": 8.66175647155102, "learning_rate": 5.129985563145079e-07, "loss": 2.7291, "step": 201100 }, { "epoch": 8.662617909290606, "learning_rate": 5.129500743390291e-07, "loss": 2.8692, "step": 201120 }, { "epoch": 8.663479347030194, "learning_rate": 5.129015923635502e-07, "loss": 2.8013, "step": 201140 }, { "epoch": 8.66434078476978, "learning_rate": 5.128531103880713e-07, "loss": 2.8793, "step": 201160 }, { "epoch": 8.665202222509368, "learning_rate": 5.128046284125923e-07, "loss": 2.8865, "step": 201180 }, { "epoch": 8.666063660248955, "learning_rate": 5.127561464371134e-07, "loss": 2.8563, "step": 201200 }, { "epoch": 8.666925097988543, "learning_rate": 5.127076644616346e-07, "loss": 2.9266, "step": 201220 }, { "epoch": 8.66778653572813, "learning_rate": 5.126591824861557e-07, "loss": 2.8851, "step": 201240 }, { "epoch": 8.668647973467717, "learning_rate": 5.126107005106768e-07, "loss": 2.8589, "step": 201260 }, { "epoch": 8.669509411207304, "learning_rate": 5.125622185351979e-07, "loss": 2.8104, "step": 201280 }, { "epoch": 8.670370848946892, "learning_rate": 5.12513736559719e-07, "loss": 2.8402, "step": 201300 }, { "epoch": 8.671232286686479, "learning_rate": 5.1246525458424e-07, "loss": 2.7586, "step": 201320 }, { "epoch": 8.672093724426068, "learning_rate": 5.124167726087612e-07, "loss": 2.8566, "step": 201340 }, { "epoch": 8.672955162165655, "learning_rate": 5.123682906332822e-07, "loss": 2.7491, "step": 201360 }, { "epoch": 8.673816599905242, "learning_rate": 5.123198086578035e-07, "loss": 2.7602, "step": 201380 }, { "epoch": 8.67467803764483, "learning_rate": 5.122713266823245e-07, "loss": 2.8435, "step": 201400 }, { "epoch": 8.675539475384417, "learning_rate": 5.122228447068456e-07, "loss": 2.7676, "step": 201420 }, { "epoch": 8.676400913124004, "learning_rate": 5.121743627313667e-07, "loss": 2.7603, "step": 201440 }, { "epoch": 8.677262350863591, "learning_rate": 5.121258807558879e-07, "loss": 2.6328, "step": 201460 }, { "epoch": 8.678123788603179, "learning_rate": 5.120773987804089e-07, "loss": 2.8345, "step": 201480 }, { "epoch": 8.678985226342766, "learning_rate": 5.120289168049301e-07, "loss": 2.9789, "step": 201500 }, { "epoch": 8.679846664082353, "learning_rate": 5.119804348294512e-07, "loss": 2.7145, "step": 201520 }, { "epoch": 8.68070810182194, "learning_rate": 5.119319528539723e-07, "loss": 2.7369, "step": 201540 }, { "epoch": 8.681569539561528, "learning_rate": 5.118834708784933e-07, "loss": 2.8584, "step": 201560 }, { "epoch": 8.682430977301115, "learning_rate": 5.118349889030145e-07, "loss": 2.8352, "step": 201580 }, { "epoch": 8.683292415040704, "learning_rate": 5.117865069275356e-07, "loss": 2.9402, "step": 201600 }, { "epoch": 8.684153852780291, "learning_rate": 5.117380249520568e-07, "loss": 2.8614, "step": 201620 }, { "epoch": 8.685015290519878, "learning_rate": 5.116895429765778e-07, "loss": 2.7366, "step": 201640 }, { "epoch": 8.685876728259466, "learning_rate": 5.116410610010989e-07, "loss": 2.7901, "step": 201660 }, { "epoch": 8.686738165999053, "learning_rate": 5.1159257902562e-07, "loss": 2.7065, "step": 201680 }, { "epoch": 8.68759960373864, "learning_rate": 5.11544097050141e-07, "loss": 2.9466, "step": 201700 }, { "epoch": 8.688461041478227, "learning_rate": 5.114956150746622e-07, "loss": 2.737, "step": 201720 }, { "epoch": 8.689322479217815, "learning_rate": 5.114471330991833e-07, "loss": 2.8377, "step": 201740 }, { "epoch": 8.690183916957402, "learning_rate": 5.113986511237045e-07, "loss": 2.9032, "step": 201760 }, { "epoch": 8.691045354696989, "learning_rate": 5.113501691482254e-07, "loss": 2.8823, "step": 201780 }, { "epoch": 8.691906792436576, "learning_rate": 5.113016871727466e-07, "loss": 2.8486, "step": 201800 }, { "epoch": 8.692768230176164, "learning_rate": 5.112532051972677e-07, "loss": 2.849, "step": 201820 }, { "epoch": 8.69362966791575, "learning_rate": 5.112047232217889e-07, "loss": 2.7449, "step": 201840 }, { "epoch": 8.694491105655338, "learning_rate": 5.111562412463099e-07, "loss": 2.9557, "step": 201860 }, { "epoch": 8.695352543394925, "learning_rate": 5.111077592708312e-07, "loss": 2.8113, "step": 201880 }, { "epoch": 8.696213981134514, "learning_rate": 5.110592772953522e-07, "loss": 2.6733, "step": 201900 }, { "epoch": 8.697075418874102, "learning_rate": 5.110107953198733e-07, "loss": 2.7332, "step": 201920 }, { "epoch": 8.697936856613689, "learning_rate": 5.109623133443943e-07, "loss": 2.7663, "step": 201940 }, { "epoch": 8.698798294353276, "learning_rate": 5.109138313689155e-07, "loss": 2.7566, "step": 201960 }, { "epoch": 8.699659732092863, "learning_rate": 5.108653493934366e-07, "loss": 2.7085, "step": 201980 }, { "epoch": 8.70052116983245, "learning_rate": 5.108168674179577e-07, "loss": 2.7813, "step": 202000 }, { "epoch": 8.701382607572038, "learning_rate": 5.107683854424788e-07, "loss": 2.7146, "step": 202020 }, { "epoch": 8.702244045311625, "learning_rate": 5.107199034669999e-07, "loss": 2.7763, "step": 202040 }, { "epoch": 8.703105483051212, "learning_rate": 5.10671421491521e-07, "loss": 2.9722, "step": 202060 }, { "epoch": 8.7039669207908, "learning_rate": 5.106229395160421e-07, "loss": 2.8961, "step": 202080 }, { "epoch": 8.704828358530387, "learning_rate": 5.105744575405632e-07, "loss": 2.8863, "step": 202100 }, { "epoch": 8.705689796269974, "learning_rate": 5.105259755650844e-07, "loss": 2.788, "step": 202120 }, { "epoch": 8.706551234009561, "learning_rate": 5.104774935896055e-07, "loss": 2.7852, "step": 202140 }, { "epoch": 8.70741267174915, "learning_rate": 5.104290116141265e-07, "loss": 2.7038, "step": 202160 }, { "epoch": 8.708274109488737, "learning_rate": 5.103805296386476e-07, "loss": 2.8857, "step": 202180 }, { "epoch": 8.709135547228325, "learning_rate": 5.103320476631689e-07, "loss": 2.6962, "step": 202200 }, { "epoch": 8.709996984967912, "learning_rate": 5.102835656876899e-07, "loss": 2.8276, "step": 202220 }, { "epoch": 8.7108584227075, "learning_rate": 5.102350837122109e-07, "loss": 2.9324, "step": 202240 }, { "epoch": 8.711719860447086, "learning_rate": 5.101866017367321e-07, "loss": 3.118, "step": 202260 }, { "epoch": 8.712581298186674, "learning_rate": 5.101381197612532e-07, "loss": 2.7478, "step": 202280 }, { "epoch": 8.713442735926261, "learning_rate": 5.100896377857742e-07, "loss": 2.7874, "step": 202300 }, { "epoch": 8.714304173665848, "learning_rate": 5.100411558102953e-07, "loss": 2.6344, "step": 202320 }, { "epoch": 8.715165611405435, "learning_rate": 5.099926738348165e-07, "loss": 2.6481, "step": 202340 }, { "epoch": 8.716027049145023, "learning_rate": 5.099441918593376e-07, "loss": 2.8809, "step": 202360 }, { "epoch": 8.71688848688461, "learning_rate": 5.098957098838587e-07, "loss": 2.7905, "step": 202380 }, { "epoch": 8.717749924624197, "learning_rate": 5.098472279083798e-07, "loss": 2.8901, "step": 202400 }, { "epoch": 8.718611362363784, "learning_rate": 5.097987459329009e-07, "loss": 2.8081, "step": 202420 }, { "epoch": 8.719472800103372, "learning_rate": 5.09750263957422e-07, "loss": 2.837, "step": 202440 }, { "epoch": 8.72033423784296, "learning_rate": 5.097017819819431e-07, "loss": 2.77, "step": 202460 }, { "epoch": 8.721195675582548, "learning_rate": 5.096533000064642e-07, "loss": 2.6409, "step": 202480 }, { "epoch": 8.722057113322135, "learning_rate": 5.096048180309854e-07, "loss": 2.9066, "step": 202500 }, { "epoch": 8.722918551061722, "learning_rate": 5.095563360555065e-07, "loss": 2.6541, "step": 202520 }, { "epoch": 8.72377998880131, "learning_rate": 5.095078540800275e-07, "loss": 2.8906, "step": 202540 }, { "epoch": 8.724641426540897, "learning_rate": 5.094593721045486e-07, "loss": 3.0343, "step": 202560 }, { "epoch": 8.725502864280484, "learning_rate": 5.094108901290699e-07, "loss": 2.9966, "step": 202580 }, { "epoch": 8.726364302020071, "learning_rate": 5.093624081535908e-07, "loss": 2.6479, "step": 202600 }, { "epoch": 8.727225739759659, "learning_rate": 5.09313926178112e-07, "loss": 2.8328, "step": 202620 }, { "epoch": 8.728087177499246, "learning_rate": 5.092654442026331e-07, "loss": 2.7923, "step": 202640 }, { "epoch": 8.728948615238833, "learning_rate": 5.092169622271542e-07, "loss": 2.9302, "step": 202660 }, { "epoch": 8.72981005297842, "learning_rate": 5.091684802516752e-07, "loss": 2.8886, "step": 202680 }, { "epoch": 8.730671490718008, "learning_rate": 5.091199982761965e-07, "loss": 2.7802, "step": 202700 }, { "epoch": 8.731532928457597, "learning_rate": 5.090715163007176e-07, "loss": 2.7921, "step": 202720 }, { "epoch": 8.732394366197184, "learning_rate": 5.090230343252387e-07, "loss": 2.8147, "step": 202740 }, { "epoch": 8.733255803936771, "learning_rate": 5.089745523497597e-07, "loss": 2.8923, "step": 202760 }, { "epoch": 8.734117241676358, "learning_rate": 5.089260703742808e-07, "loss": 2.9598, "step": 202780 }, { "epoch": 8.734978679415946, "learning_rate": 5.088775883988019e-07, "loss": 2.7588, "step": 202800 }, { "epoch": 8.735840117155533, "learning_rate": 5.08829106423323e-07, "loss": 2.8756, "step": 202820 }, { "epoch": 8.73670155489512, "learning_rate": 5.087806244478441e-07, "loss": 2.7777, "step": 202840 }, { "epoch": 8.737562992634707, "learning_rate": 5.087321424723652e-07, "loss": 2.8854, "step": 202860 }, { "epoch": 8.738424430374295, "learning_rate": 5.086836604968864e-07, "loss": 2.7083, "step": 202880 }, { "epoch": 8.739285868113882, "learning_rate": 5.086351785214074e-07, "loss": 2.8746, "step": 202900 }, { "epoch": 8.740147305853469, "learning_rate": 5.085866965459285e-07, "loss": 2.8011, "step": 202920 }, { "epoch": 8.741008743593056, "learning_rate": 5.085382145704496e-07, "loss": 2.8455, "step": 202940 }, { "epoch": 8.741870181332644, "learning_rate": 5.084897325949709e-07, "loss": 2.7942, "step": 202960 }, { "epoch": 8.74273161907223, "learning_rate": 5.084412506194918e-07, "loss": 3.0209, "step": 202980 }, { "epoch": 8.743593056811818, "learning_rate": 5.08392768644013e-07, "loss": 2.8514, "step": 203000 }, { "epoch": 8.744454494551407, "learning_rate": 5.083442866685341e-07, "loss": 2.8833, "step": 203020 }, { "epoch": 8.745315932290994, "learning_rate": 5.082958046930552e-07, "loss": 2.8089, "step": 203040 }, { "epoch": 8.746177370030582, "learning_rate": 5.082473227175762e-07, "loss": 2.8259, "step": 203060 }, { "epoch": 8.747038807770169, "learning_rate": 5.081988407420975e-07, "loss": 2.9265, "step": 203080 }, { "epoch": 8.747900245509756, "learning_rate": 5.081503587666185e-07, "loss": 2.7484, "step": 203100 }, { "epoch": 8.748761683249343, "learning_rate": 5.081018767911397e-07, "loss": 2.7716, "step": 203120 }, { "epoch": 8.74962312098893, "learning_rate": 5.080533948156606e-07, "loss": 2.9734, "step": 203140 }, { "epoch": 8.750484558728518, "learning_rate": 5.080049128401818e-07, "loss": 2.8085, "step": 203160 }, { "epoch": 8.751345996468105, "learning_rate": 5.079564308647029e-07, "loss": 2.8456, "step": 203180 }, { "epoch": 8.752207434207692, "learning_rate": 5.079079488892242e-07, "loss": 2.8985, "step": 203200 }, { "epoch": 8.75306887194728, "learning_rate": 5.078594669137452e-07, "loss": 2.9077, "step": 203220 }, { "epoch": 8.753930309686867, "learning_rate": 5.078109849382663e-07, "loss": 2.75, "step": 203240 }, { "epoch": 8.754791747426454, "learning_rate": 5.077625029627874e-07, "loss": 2.8891, "step": 203260 }, { "epoch": 8.755653185166041, "learning_rate": 5.077140209873085e-07, "loss": 2.8053, "step": 203280 }, { "epoch": 8.75651462290563, "learning_rate": 5.076655390118295e-07, "loss": 2.9073, "step": 203300 }, { "epoch": 8.757376060645218, "learning_rate": 5.076170570363506e-07, "loss": 2.6256, "step": 203320 }, { "epoch": 8.758237498384805, "learning_rate": 5.075685750608719e-07, "loss": 2.757, "step": 203340 }, { "epoch": 8.759098936124392, "learning_rate": 5.075200930853928e-07, "loss": 2.9073, "step": 203360 }, { "epoch": 8.75996037386398, "learning_rate": 5.07471611109914e-07, "loss": 2.8246, "step": 203380 }, { "epoch": 8.760821811603567, "learning_rate": 5.074231291344351e-07, "loss": 2.8745, "step": 203400 }, { "epoch": 8.761683249343154, "learning_rate": 5.073746471589562e-07, "loss": 2.7888, "step": 203420 }, { "epoch": 8.762544687082741, "learning_rate": 5.073261651834772e-07, "loss": 2.595, "step": 203440 }, { "epoch": 8.763406124822328, "learning_rate": 5.072776832079985e-07, "loss": 2.8389, "step": 203460 }, { "epoch": 8.764267562561916, "learning_rate": 5.072292012325195e-07, "loss": 2.8873, "step": 203480 }, { "epoch": 8.765129000301503, "learning_rate": 5.071807192570407e-07, "loss": 2.9035, "step": 203500 }, { "epoch": 8.76599043804109, "learning_rate": 5.071322372815617e-07, "loss": 2.7774, "step": 203520 }, { "epoch": 8.766851875780677, "learning_rate": 5.070837553060828e-07, "loss": 2.7589, "step": 203540 }, { "epoch": 8.767713313520265, "learning_rate": 5.070352733306039e-07, "loss": 2.8061, "step": 203560 }, { "epoch": 8.768574751259852, "learning_rate": 5.069867913551251e-07, "loss": 2.7734, "step": 203580 }, { "epoch": 8.76943618899944, "learning_rate": 5.069383093796462e-07, "loss": 2.8127, "step": 203600 }, { "epoch": 8.770297626739028, "learning_rate": 5.068898274041673e-07, "loss": 2.8392, "step": 203620 }, { "epoch": 8.771159064478615, "learning_rate": 5.068413454286884e-07, "loss": 2.7593, "step": 203640 }, { "epoch": 8.772020502218203, "learning_rate": 5.067928634532096e-07, "loss": 2.8513, "step": 203660 }, { "epoch": 8.77288193995779, "learning_rate": 5.067443814777305e-07, "loss": 2.8826, "step": 203680 }, { "epoch": 8.773743377697377, "learning_rate": 5.066958995022518e-07, "loss": 2.7148, "step": 203700 }, { "epoch": 8.774604815436964, "learning_rate": 5.066474175267729e-07, "loss": 2.8295, "step": 203720 }, { "epoch": 8.775466253176551, "learning_rate": 5.065989355512939e-07, "loss": 2.9243, "step": 203740 }, { "epoch": 8.776327690916139, "learning_rate": 5.06550453575815e-07, "loss": 2.7898, "step": 203760 }, { "epoch": 8.777189128655726, "learning_rate": 5.065019716003362e-07, "loss": 2.8652, "step": 203780 }, { "epoch": 8.778050566395313, "learning_rate": 5.064534896248571e-07, "loss": 2.7303, "step": 203800 }, { "epoch": 8.7789120041349, "learning_rate": 5.064050076493784e-07, "loss": 2.8705, "step": 203820 }, { "epoch": 8.779773441874488, "learning_rate": 5.063565256738995e-07, "loss": 2.7126, "step": 203840 }, { "epoch": 8.780634879614077, "learning_rate": 5.063080436984206e-07, "loss": 2.7751, "step": 203860 }, { "epoch": 8.781496317353664, "learning_rate": 5.062595617229416e-07, "loss": 2.8544, "step": 203880 }, { "epoch": 8.782357755093251, "learning_rate": 5.062110797474627e-07, "loss": 2.713, "step": 203900 }, { "epoch": 8.783219192832838, "learning_rate": 5.061625977719839e-07, "loss": 2.7746, "step": 203920 }, { "epoch": 8.784080630572426, "learning_rate": 5.061141157965049e-07, "loss": 2.8726, "step": 203940 }, { "epoch": 8.784942068312013, "learning_rate": 5.060656338210261e-07, "loss": 2.8837, "step": 203960 }, { "epoch": 8.7858035060516, "learning_rate": 5.060171518455472e-07, "loss": 2.7561, "step": 203980 }, { "epoch": 8.786664943791187, "learning_rate": 5.059686698700683e-07, "loss": 2.7602, "step": 204000 }, { "epoch": 8.787526381530775, "learning_rate": 5.059201878945894e-07, "loss": 2.777, "step": 204020 }, { "epoch": 8.788387819270362, "learning_rate": 5.058717059191105e-07, "loss": 2.724, "step": 204040 }, { "epoch": 8.78924925700995, "learning_rate": 5.058232239436315e-07, "loss": 2.8531, "step": 204060 }, { "epoch": 8.790110694749536, "learning_rate": 5.057747419681527e-07, "loss": 2.9155, "step": 204080 }, { "epoch": 8.790972132489124, "learning_rate": 5.057262599926739e-07, "loss": 2.8578, "step": 204100 }, { "epoch": 8.791833570228711, "learning_rate": 5.056777780171949e-07, "loss": 2.8149, "step": 204120 }, { "epoch": 8.792695007968298, "learning_rate": 5.05629296041716e-07, "loss": 2.7478, "step": 204140 }, { "epoch": 8.793556445707887, "learning_rate": 5.055808140662372e-07, "loss": 2.8813, "step": 204160 }, { "epoch": 8.794417883447474, "learning_rate": 5.055323320907581e-07, "loss": 2.8816, "step": 204180 }, { "epoch": 8.795279321187062, "learning_rate": 5.054838501152794e-07, "loss": 2.7442, "step": 204200 }, { "epoch": 8.796140758926649, "learning_rate": 5.054353681398005e-07, "loss": 2.7913, "step": 204220 }, { "epoch": 8.797002196666236, "learning_rate": 5.053868861643216e-07, "loss": 2.817, "step": 204240 }, { "epoch": 8.797863634405823, "learning_rate": 5.053384041888426e-07, "loss": 2.979, "step": 204260 }, { "epoch": 8.79872507214541, "learning_rate": 5.052899222133638e-07, "loss": 2.9678, "step": 204280 }, { "epoch": 8.799586509884998, "learning_rate": 5.052414402378849e-07, "loss": 2.8938, "step": 204300 }, { "epoch": 8.800447947624585, "learning_rate": 5.051929582624061e-07, "loss": 2.9322, "step": 204320 }, { "epoch": 8.801309385364172, "learning_rate": 5.051444762869271e-07, "loss": 2.8114, "step": 204340 }, { "epoch": 8.80217082310376, "learning_rate": 5.050959943114482e-07, "loss": 2.6833, "step": 204360 }, { "epoch": 8.803032260843347, "learning_rate": 5.050475123359693e-07, "loss": 2.9219, "step": 204380 }, { "epoch": 8.803893698582934, "learning_rate": 5.049990303604902e-07, "loss": 2.9483, "step": 204400 }, { "epoch": 8.804755136322523, "learning_rate": 5.049505483850115e-07, "loss": 2.9087, "step": 204420 }, { "epoch": 8.80561657406211, "learning_rate": 5.049020664095325e-07, "loss": 2.8359, "step": 204440 }, { "epoch": 8.806478011801698, "learning_rate": 5.048535844340538e-07, "loss": 2.68, "step": 204460 }, { "epoch": 8.807339449541285, "learning_rate": 5.048051024585748e-07, "loss": 2.704, "step": 204480 }, { "epoch": 8.808200887280872, "learning_rate": 5.04756620483096e-07, "loss": 2.9191, "step": 204500 }, { "epoch": 8.80906232502046, "learning_rate": 5.04708138507617e-07, "loss": 2.7754, "step": 204520 }, { "epoch": 8.809923762760047, "learning_rate": 5.046596565321382e-07, "loss": 2.8191, "step": 204540 }, { "epoch": 8.810785200499634, "learning_rate": 5.046111745566591e-07, "loss": 2.7213, "step": 204560 }, { "epoch": 8.811646638239221, "learning_rate": 5.045626925811804e-07, "loss": 2.7574, "step": 204580 }, { "epoch": 8.812508075978808, "learning_rate": 5.045142106057015e-07, "loss": 2.7357, "step": 204600 }, { "epoch": 8.813369513718396, "learning_rate": 5.044657286302226e-07, "loss": 2.9433, "step": 204620 }, { "epoch": 8.814230951457983, "learning_rate": 5.044172466547436e-07, "loss": 2.6935, "step": 204640 }, { "epoch": 8.81509238919757, "learning_rate": 5.043687646792648e-07, "loss": 2.8591, "step": 204660 }, { "epoch": 8.815953826937157, "learning_rate": 5.043202827037859e-07, "loss": 2.8243, "step": 204680 }, { "epoch": 8.816815264676745, "learning_rate": 5.042718007283071e-07, "loss": 2.6521, "step": 204700 }, { "epoch": 8.817676702416334, "learning_rate": 5.042233187528281e-07, "loss": 2.7583, "step": 204720 }, { "epoch": 8.81853814015592, "learning_rate": 5.041748367773492e-07, "loss": 2.6812, "step": 204740 }, { "epoch": 8.819399577895508, "learning_rate": 5.041263548018703e-07, "loss": 2.8878, "step": 204760 }, { "epoch": 8.820261015635095, "learning_rate": 5.040778728263915e-07, "loss": 2.8116, "step": 204780 }, { "epoch": 8.821122453374683, "learning_rate": 5.040293908509125e-07, "loss": 2.8695, "step": 204800 }, { "epoch": 8.82198389111427, "learning_rate": 5.039809088754337e-07, "loss": 2.766, "step": 204820 }, { "epoch": 8.822845328853857, "learning_rate": 5.039324268999548e-07, "loss": 2.7355, "step": 204840 }, { "epoch": 8.823706766593444, "learning_rate": 5.038839449244758e-07, "loss": 2.7593, "step": 204860 }, { "epoch": 8.824568204333032, "learning_rate": 5.038354629489969e-07, "loss": 2.6035, "step": 204880 }, { "epoch": 8.825429642072619, "learning_rate": 5.037869809735181e-07, "loss": 2.7999, "step": 204900 }, { "epoch": 8.826291079812206, "learning_rate": 5.037384989980391e-07, "loss": 2.7785, "step": 204920 }, { "epoch": 8.827152517551793, "learning_rate": 5.036900170225602e-07, "loss": 2.9095, "step": 204940 }, { "epoch": 8.82801395529138, "learning_rate": 5.036415350470814e-07, "loss": 2.8641, "step": 204960 }, { "epoch": 8.82887539303097, "learning_rate": 5.035930530716025e-07, "loss": 2.9096, "step": 204980 }, { "epoch": 8.829736830770557, "learning_rate": 5.035445710961236e-07, "loss": 2.7437, "step": 205000 }, { "epoch": 8.830598268510144, "learning_rate": 5.034960891206446e-07, "loss": 2.844, "step": 205020 }, { "epoch": 8.831459706249731, "learning_rate": 5.034476071451658e-07, "loss": 2.7729, "step": 205040 }, { "epoch": 8.832321143989319, "learning_rate": 5.033991251696869e-07, "loss": 2.8474, "step": 205060 }, { "epoch": 8.833182581728906, "learning_rate": 5.03350643194208e-07, "loss": 2.8222, "step": 205080 }, { "epoch": 8.834044019468493, "learning_rate": 5.033021612187291e-07, "loss": 2.8051, "step": 205100 }, { "epoch": 8.83490545720808, "learning_rate": 5.032536792432502e-07, "loss": 2.6721, "step": 205120 }, { "epoch": 8.835766894947668, "learning_rate": 5.032051972677713e-07, "loss": 2.7249, "step": 205140 }, { "epoch": 8.836628332687255, "learning_rate": 5.031567152922924e-07, "loss": 2.7986, "step": 205160 }, { "epoch": 8.837489770426842, "learning_rate": 5.031082333168135e-07, "loss": 2.7748, "step": 205180 }, { "epoch": 8.83835120816643, "learning_rate": 5.030597513413347e-07, "loss": 2.8704, "step": 205200 }, { "epoch": 8.839212645906017, "learning_rate": 5.030112693658558e-07, "loss": 2.9582, "step": 205220 }, { "epoch": 8.840074083645604, "learning_rate": 5.029627873903768e-07, "loss": 3.0258, "step": 205240 }, { "epoch": 8.840935521385191, "learning_rate": 5.029143054148979e-07, "loss": 3.0356, "step": 205260 }, { "epoch": 8.84179695912478, "learning_rate": 5.028658234394191e-07, "loss": 2.8522, "step": 205280 }, { "epoch": 8.842658396864367, "learning_rate": 5.028173414639402e-07, "loss": 2.7104, "step": 205300 }, { "epoch": 8.843519834603955, "learning_rate": 5.027688594884613e-07, "loss": 2.912, "step": 205320 }, { "epoch": 8.844381272343542, "learning_rate": 5.027203775129823e-07, "loss": 2.7909, "step": 205340 }, { "epoch": 8.845242710083129, "learning_rate": 5.026718955375035e-07, "loss": 2.8626, "step": 205360 }, { "epoch": 8.846104147822716, "learning_rate": 5.026234135620245e-07, "loss": 2.8831, "step": 205380 }, { "epoch": 8.846965585562304, "learning_rate": 5.025749315865457e-07, "loss": 2.7329, "step": 205400 }, { "epoch": 8.84782702330189, "learning_rate": 5.025264496110668e-07, "loss": 2.9075, "step": 205420 }, { "epoch": 8.848688461041478, "learning_rate": 5.024779676355881e-07, "loss": 2.8838, "step": 205440 }, { "epoch": 8.849549898781065, "learning_rate": 5.02429485660109e-07, "loss": 2.9157, "step": 205460 }, { "epoch": 8.850411336520652, "learning_rate": 5.023810036846301e-07, "loss": 2.922, "step": 205480 }, { "epoch": 8.85127277426024, "learning_rate": 5.023325217091512e-07, "loss": 2.8131, "step": 205500 }, { "epoch": 8.852134211999827, "learning_rate": 5.022840397336723e-07, "loss": 2.7611, "step": 205520 }, { "epoch": 8.852995649739416, "learning_rate": 5.022355577581934e-07, "loss": 2.6802, "step": 205540 }, { "epoch": 8.853857087479003, "learning_rate": 5.021870757827145e-07, "loss": 2.8463, "step": 205560 }, { "epoch": 8.85471852521859, "learning_rate": 5.021385938072357e-07, "loss": 2.7438, "step": 205580 }, { "epoch": 8.855579962958178, "learning_rate": 5.020901118317568e-07, "loss": 2.6723, "step": 205600 }, { "epoch": 8.856441400697765, "learning_rate": 5.020416298562778e-07, "loss": 2.7466, "step": 205620 }, { "epoch": 8.857302838437352, "learning_rate": 5.019931478807989e-07, "loss": 2.7885, "step": 205640 }, { "epoch": 8.85816427617694, "learning_rate": 5.019446659053201e-07, "loss": 2.6155, "step": 205660 }, { "epoch": 8.859025713916527, "learning_rate": 5.018961839298411e-07, "loss": 2.9405, "step": 205680 }, { "epoch": 8.859887151656114, "learning_rate": 5.018477019543623e-07, "loss": 2.7916, "step": 205700 }, { "epoch": 8.860748589395701, "learning_rate": 5.017992199788834e-07, "loss": 2.7625, "step": 205720 }, { "epoch": 8.861610027135288, "learning_rate": 5.017507380034045e-07, "loss": 2.7498, "step": 205740 }, { "epoch": 8.862471464874876, "learning_rate": 5.017022560279256e-07, "loss": 2.8547, "step": 205760 }, { "epoch": 8.863332902614463, "learning_rate": 5.016537740524467e-07, "loss": 2.8807, "step": 205780 }, { "epoch": 8.86419434035405, "learning_rate": 5.016052920769678e-07, "loss": 2.946, "step": 205800 }, { "epoch": 8.865055778093637, "learning_rate": 5.01556810101489e-07, "loss": 2.8499, "step": 205820 }, { "epoch": 8.865917215833226, "learning_rate": 5.0150832812601e-07, "loss": 2.633, "step": 205840 }, { "epoch": 8.866778653572814, "learning_rate": 5.014598461505311e-07, "loss": 2.9719, "step": 205860 }, { "epoch": 8.867640091312401, "learning_rate": 5.014113641750522e-07, "loss": 2.8063, "step": 205880 }, { "epoch": 8.868501529051988, "learning_rate": 5.013628821995734e-07, "loss": 2.8222, "step": 205900 }, { "epoch": 8.869362966791575, "learning_rate": 5.013144002240944e-07, "loss": 2.7787, "step": 205920 }, { "epoch": 8.870224404531163, "learning_rate": 5.012659182486156e-07, "loss": 2.7994, "step": 205940 }, { "epoch": 8.87108584227075, "learning_rate": 5.012174362731367e-07, "loss": 2.8412, "step": 205960 }, { "epoch": 8.871947280010337, "learning_rate": 5.011689542976578e-07, "loss": 2.8371, "step": 205980 }, { "epoch": 8.872808717749924, "learning_rate": 5.011204723221788e-07, "loss": 2.8634, "step": 206000 }, { "epoch": 8.873670155489512, "learning_rate": 5.010719903466999e-07, "loss": 2.8774, "step": 206020 }, { "epoch": 8.874531593229099, "learning_rate": 5.010235083712211e-07, "loss": 2.6984, "step": 206040 }, { "epoch": 8.875393030968686, "learning_rate": 5.009750263957421e-07, "loss": 2.6922, "step": 206060 }, { "epoch": 8.876254468708273, "learning_rate": 5.009265444202633e-07, "loss": 2.7961, "step": 206080 }, { "epoch": 8.877115906447862, "learning_rate": 5.008780624447844e-07, "loss": 2.8549, "step": 206100 }, { "epoch": 8.87797734418745, "learning_rate": 5.008295804693055e-07, "loss": 2.7169, "step": 206120 }, { "epoch": 8.878838781927037, "learning_rate": 5.007810984938265e-07, "loss": 2.7768, "step": 206140 }, { "epoch": 8.879700219666624, "learning_rate": 5.007326165183477e-07, "loss": 2.9567, "step": 206160 }, { "epoch": 8.880561657406211, "learning_rate": 5.006841345428687e-07, "loss": 2.9466, "step": 206180 }, { "epoch": 8.881423095145799, "learning_rate": 5.0063565256739e-07, "loss": 2.7599, "step": 206200 }, { "epoch": 8.882284532885386, "learning_rate": 5.00587170591911e-07, "loss": 2.7669, "step": 206220 }, { "epoch": 8.883145970624973, "learning_rate": 5.005386886164321e-07, "loss": 2.7219, "step": 206240 }, { "epoch": 8.88400740836456, "learning_rate": 5.004902066409532e-07, "loss": 2.7417, "step": 206260 }, { "epoch": 8.884868846104148, "learning_rate": 5.004417246654745e-07, "loss": 2.6134, "step": 206280 }, { "epoch": 8.885730283843735, "learning_rate": 5.003932426899954e-07, "loss": 3.068, "step": 206300 }, { "epoch": 8.886591721583322, "learning_rate": 5.003447607145166e-07, "loss": 2.7246, "step": 206320 }, { "epoch": 8.88745315932291, "learning_rate": 5.002962787390377e-07, "loss": 2.8051, "step": 206340 }, { "epoch": 8.888314597062497, "learning_rate": 5.002477967635587e-07, "loss": 2.782, "step": 206360 }, { "epoch": 8.889176034802084, "learning_rate": 5.001993147880798e-07, "loss": 2.8031, "step": 206380 }, { "epoch": 8.890037472541673, "learning_rate": 5.00150832812601e-07, "loss": 2.7857, "step": 206400 }, { "epoch": 8.89089891028126, "learning_rate": 5.001023508371221e-07, "loss": 2.6652, "step": 206420 }, { "epoch": 8.891760348020847, "learning_rate": 5.000538688616432e-07, "loss": 3.002, "step": 206440 }, { "epoch": 8.892621785760435, "learning_rate": 5.000053868861643e-07, "loss": 2.6807, "step": 206460 }, { "epoch": 8.893483223500022, "learning_rate": 4.999569049106854e-07, "loss": 2.8004, "step": 206480 }, { "epoch": 8.894344661239609, "learning_rate": 4.999084229352065e-07, "loss": 2.6653, "step": 206500 }, { "epoch": 8.895206098979196, "learning_rate": 4.998599409597277e-07, "loss": 2.8275, "step": 206520 }, { "epoch": 8.896067536718784, "learning_rate": 4.998114589842487e-07, "loss": 2.7458, "step": 206540 }, { "epoch": 8.89692897445837, "learning_rate": 4.997629770087698e-07, "loss": 2.7374, "step": 206560 }, { "epoch": 8.897790412197958, "learning_rate": 4.99714495033291e-07, "loss": 2.7023, "step": 206580 }, { "epoch": 8.898651849937545, "learning_rate": 4.99666013057812e-07, "loss": 2.8753, "step": 206600 }, { "epoch": 8.899513287677133, "learning_rate": 4.996175310823331e-07, "loss": 2.6257, "step": 206620 }, { "epoch": 8.90037472541672, "learning_rate": 4.995690491068542e-07, "loss": 2.8837, "step": 206640 }, { "epoch": 8.901236163156307, "learning_rate": 4.995205671313753e-07, "loss": 2.9417, "step": 206660 }, { "epoch": 8.902097600895896, "learning_rate": 4.994720851558964e-07, "loss": 2.6013, "step": 206680 }, { "epoch": 8.902959038635483, "learning_rate": 4.994236031804176e-07, "loss": 2.9726, "step": 206700 }, { "epoch": 8.90382047637507, "learning_rate": 4.993751212049387e-07, "loss": 2.9955, "step": 206720 }, { "epoch": 8.904681914114658, "learning_rate": 4.993266392294597e-07, "loss": 2.9071, "step": 206740 }, { "epoch": 8.905543351854245, "learning_rate": 4.992781572539808e-07, "loss": 2.748, "step": 206760 }, { "epoch": 8.906404789593832, "learning_rate": 4.992296752785021e-07, "loss": 2.8128, "step": 206780 }, { "epoch": 8.90726622733342, "learning_rate": 4.991811933030231e-07, "loss": 2.7125, "step": 206800 }, { "epoch": 8.908127665073007, "learning_rate": 4.991327113275442e-07, "loss": 2.975, "step": 206820 }, { "epoch": 8.908989102812594, "learning_rate": 4.990842293520653e-07, "loss": 2.8637, "step": 206840 }, { "epoch": 8.909850540552181, "learning_rate": 4.990357473765864e-07, "loss": 2.7322, "step": 206860 }, { "epoch": 8.910711978291769, "learning_rate": 4.989872654011075e-07, "loss": 2.8601, "step": 206880 }, { "epoch": 8.911573416031356, "learning_rate": 4.989387834256287e-07, "loss": 2.8902, "step": 206900 }, { "epoch": 8.912434853770943, "learning_rate": 4.988903014501497e-07, "loss": 2.8591, "step": 206920 }, { "epoch": 8.91329629151053, "learning_rate": 4.988418194746709e-07, "loss": 2.8898, "step": 206940 }, { "epoch": 8.914157729250118, "learning_rate": 4.98793337499192e-07, "loss": 2.6676, "step": 206960 }, { "epoch": 8.915019166989707, "learning_rate": 4.98744855523713e-07, "loss": 2.7823, "step": 206980 }, { "epoch": 8.915880604729294, "learning_rate": 4.986963735482341e-07, "loss": 2.7087, "step": 207000 }, { "epoch": 8.916742042468881, "learning_rate": 4.986478915727554e-07, "loss": 2.7614, "step": 207020 }, { "epoch": 8.917603480208468, "learning_rate": 4.985994095972763e-07, "loss": 2.8083, "step": 207040 }, { "epoch": 8.918464917948056, "learning_rate": 4.985509276217975e-07, "loss": 2.7376, "step": 207060 }, { "epoch": 8.919326355687643, "learning_rate": 4.985024456463186e-07, "loss": 2.7756, "step": 207080 }, { "epoch": 8.92018779342723, "learning_rate": 4.984539636708397e-07, "loss": 2.6233, "step": 207100 }, { "epoch": 8.921049231166817, "learning_rate": 4.984054816953607e-07, "loss": 2.911, "step": 207120 }, { "epoch": 8.921910668906404, "learning_rate": 4.983569997198818e-07, "loss": 2.7265, "step": 207140 }, { "epoch": 8.922772106645992, "learning_rate": 4.983085177444031e-07, "loss": 2.9048, "step": 207160 }, { "epoch": 8.923633544385579, "learning_rate": 4.98260035768924e-07, "loss": 2.7434, "step": 207180 }, { "epoch": 8.924494982125166, "learning_rate": 4.982115537934452e-07, "loss": 2.8449, "step": 207200 }, { "epoch": 8.925356419864753, "learning_rate": 4.981630718179663e-07, "loss": 2.868, "step": 207220 }, { "epoch": 8.926217857604342, "learning_rate": 4.981145898424874e-07, "loss": 3.0536, "step": 207240 }, { "epoch": 8.92707929534393, "learning_rate": 4.980661078670084e-07, "loss": 2.683, "step": 207260 }, { "epoch": 8.927940733083517, "learning_rate": 4.980176258915297e-07, "loss": 2.7975, "step": 207280 }, { "epoch": 8.928802170823104, "learning_rate": 4.979691439160507e-07, "loss": 2.6879, "step": 207300 }, { "epoch": 8.929663608562691, "learning_rate": 4.979206619405719e-07, "loss": 2.8606, "step": 207320 }, { "epoch": 8.930525046302279, "learning_rate": 4.978721799650929e-07, "loss": 2.9253, "step": 207340 }, { "epoch": 8.931386484041866, "learning_rate": 4.97823697989614e-07, "loss": 2.8174, "step": 207360 }, { "epoch": 8.932247921781453, "learning_rate": 4.977752160141351e-07, "loss": 2.7824, "step": 207380 }, { "epoch": 8.93310935952104, "learning_rate": 4.977267340386564e-07, "loss": 2.7739, "step": 207400 }, { "epoch": 8.933970797260628, "learning_rate": 4.976782520631774e-07, "loss": 2.9688, "step": 207420 }, { "epoch": 8.934832235000215, "learning_rate": 4.976297700876985e-07, "loss": 2.7645, "step": 207440 }, { "epoch": 8.935693672739802, "learning_rate": 4.975812881122196e-07, "loss": 2.8543, "step": 207460 }, { "epoch": 8.93655511047939, "learning_rate": 4.975328061367408e-07, "loss": 2.6946, "step": 207480 }, { "epoch": 8.937416548218977, "learning_rate": 4.974843241612617e-07, "loss": 2.6236, "step": 207500 }, { "epoch": 8.938277985958564, "learning_rate": 4.97435842185783e-07, "loss": 2.762, "step": 207520 }, { "epoch": 8.939139423698153, "learning_rate": 4.973873602103041e-07, "loss": 2.8095, "step": 207540 }, { "epoch": 8.94000086143774, "learning_rate": 4.973388782348252e-07, "loss": 2.8577, "step": 207560 }, { "epoch": 8.940862299177327, "learning_rate": 4.972903962593462e-07, "loss": 2.7617, "step": 207580 }, { "epoch": 8.941723736916915, "learning_rate": 4.972419142838673e-07, "loss": 2.9164, "step": 207600 }, { "epoch": 8.942585174656502, "learning_rate": 4.971934323083884e-07, "loss": 2.7899, "step": 207620 }, { "epoch": 8.94344661239609, "learning_rate": 4.971449503329094e-07, "loss": 2.8184, "step": 207640 }, { "epoch": 8.944308050135676, "learning_rate": 4.970964683574307e-07, "loss": 2.9206, "step": 207660 }, { "epoch": 8.945169487875264, "learning_rate": 4.970479863819517e-07, "loss": 2.8246, "step": 207680 }, { "epoch": 8.946030925614851, "learning_rate": 4.969995044064729e-07, "loss": 2.8401, "step": 207700 }, { "epoch": 8.946892363354438, "learning_rate": 4.969510224309939e-07, "loss": 2.7927, "step": 207720 }, { "epoch": 8.947753801094025, "learning_rate": 4.96902540455515e-07, "loss": 2.9633, "step": 207740 }, { "epoch": 8.948615238833613, "learning_rate": 4.968540584800361e-07, "loss": 2.7613, "step": 207760 }, { "epoch": 8.9494766765732, "learning_rate": 4.968055765045574e-07, "loss": 2.6976, "step": 207780 }, { "epoch": 8.950338114312789, "learning_rate": 4.967570945290784e-07, "loss": 2.7946, "step": 207800 }, { "epoch": 8.951199552052376, "learning_rate": 4.967086125535995e-07, "loss": 2.8475, "step": 207820 }, { "epoch": 8.952060989791963, "learning_rate": 4.966601305781206e-07, "loss": 2.8836, "step": 207840 }, { "epoch": 8.95292242753155, "learning_rate": 4.966116486026418e-07, "loss": 2.8895, "step": 207860 }, { "epoch": 8.953783865271138, "learning_rate": 4.965631666271627e-07, "loss": 2.8067, "step": 207880 }, { "epoch": 8.954645303010725, "learning_rate": 4.96514684651684e-07, "loss": 2.7142, "step": 207900 }, { "epoch": 8.955506740750312, "learning_rate": 4.964662026762051e-07, "loss": 2.856, "step": 207920 }, { "epoch": 8.9563681784899, "learning_rate": 4.964177207007261e-07, "loss": 2.7506, "step": 207940 }, { "epoch": 8.957229616229487, "learning_rate": 4.963692387252472e-07, "loss": 2.7947, "step": 207960 }, { "epoch": 8.958091053969074, "learning_rate": 4.963207567497684e-07, "loss": 2.6989, "step": 207980 }, { "epoch": 8.958952491708661, "learning_rate": 4.962722747742894e-07, "loss": 2.7843, "step": 208000 }, { "epoch": 8.959813929448249, "learning_rate": 4.962237927988106e-07, "loss": 2.7649, "step": 208020 }, { "epoch": 8.960675367187836, "learning_rate": 4.961753108233317e-07, "loss": 2.9142, "step": 208040 }, { "epoch": 8.961536804927423, "learning_rate": 4.961268288478528e-07, "loss": 2.7992, "step": 208060 }, { "epoch": 8.96239824266701, "learning_rate": 4.960783468723739e-07, "loss": 2.6827, "step": 208080 }, { "epoch": 8.9632596804066, "learning_rate": 4.96029864896895e-07, "loss": 2.8244, "step": 208100 }, { "epoch": 8.964121118146187, "learning_rate": 4.95981382921416e-07, "loss": 2.6914, "step": 208120 }, { "epoch": 8.964982555885774, "learning_rate": 4.959329009459371e-07, "loss": 2.7354, "step": 208140 }, { "epoch": 8.965843993625361, "learning_rate": 4.958844189704584e-07, "loss": 3.0264, "step": 208160 }, { "epoch": 8.966705431364948, "learning_rate": 4.958359369949794e-07, "loss": 2.7513, "step": 208180 }, { "epoch": 8.967566869104536, "learning_rate": 4.957874550195005e-07, "loss": 2.8886, "step": 208200 }, { "epoch": 8.968428306844123, "learning_rate": 4.957389730440216e-07, "loss": 2.7714, "step": 208220 }, { "epoch": 8.96928974458371, "learning_rate": 4.956904910685427e-07, "loss": 2.8303, "step": 208240 }, { "epoch": 8.970151182323297, "learning_rate": 4.956420090930637e-07, "loss": 2.8362, "step": 208260 }, { "epoch": 8.971012620062885, "learning_rate": 4.95593527117585e-07, "loss": 2.8484, "step": 208280 }, { "epoch": 8.971874057802472, "learning_rate": 4.955450451421061e-07, "loss": 2.7592, "step": 208300 }, { "epoch": 8.972735495542059, "learning_rate": 4.954965631666271e-07, "loss": 2.7266, "step": 208320 }, { "epoch": 8.973596933281646, "learning_rate": 4.954480811911482e-07, "loss": 2.8843, "step": 208340 }, { "epoch": 8.974458371021235, "learning_rate": 4.953995992156694e-07, "loss": 2.9337, "step": 208360 }, { "epoch": 8.975319808760823, "learning_rate": 4.953511172401904e-07, "loss": 2.7508, "step": 208380 }, { "epoch": 8.97618124650041, "learning_rate": 4.953026352647116e-07, "loss": 2.9536, "step": 208400 }, { "epoch": 8.977042684239997, "learning_rate": 4.952541532892327e-07, "loss": 2.8992, "step": 208420 }, { "epoch": 8.977904121979584, "learning_rate": 4.952056713137538e-07, "loss": 2.7903, "step": 208440 }, { "epoch": 8.978765559719172, "learning_rate": 4.951571893382749e-07, "loss": 2.6928, "step": 208460 }, { "epoch": 8.979626997458759, "learning_rate": 4.95108707362796e-07, "loss": 2.7672, "step": 208480 }, { "epoch": 8.980488435198346, "learning_rate": 4.950602253873171e-07, "loss": 2.7821, "step": 208500 }, { "epoch": 8.981349872937933, "learning_rate": 4.950117434118383e-07, "loss": 2.7021, "step": 208520 }, { "epoch": 8.98221131067752, "learning_rate": 4.949632614363593e-07, "loss": 2.9278, "step": 208540 }, { "epoch": 8.983072748417108, "learning_rate": 4.949147794608804e-07, "loss": 2.9182, "step": 208560 }, { "epoch": 8.983934186156695, "learning_rate": 4.948662974854015e-07, "loss": 2.9085, "step": 208580 }, { "epoch": 8.984795623896282, "learning_rate": 4.948178155099227e-07, "loss": 2.6529, "step": 208600 }, { "epoch": 8.98565706163587, "learning_rate": 4.947693335344437e-07, "loss": 2.8609, "step": 208620 }, { "epoch": 8.986518499375457, "learning_rate": 4.947208515589649e-07, "loss": 2.8899, "step": 208640 }, { "epoch": 8.987379937115046, "learning_rate": 4.94672369583486e-07, "loss": 2.7179, "step": 208660 }, { "epoch": 8.988241374854633, "learning_rate": 4.946238876080071e-07, "loss": 3.04, "step": 208680 }, { "epoch": 8.98910281259422, "learning_rate": 4.945754056325281e-07, "loss": 2.8087, "step": 208700 }, { "epoch": 8.989964250333808, "learning_rate": 4.945269236570492e-07, "loss": 2.7465, "step": 208720 }, { "epoch": 8.990825688073395, "learning_rate": 4.944784416815704e-07, "loss": 2.8801, "step": 208740 }, { "epoch": 8.991687125812982, "learning_rate": 4.944299597060913e-07, "loss": 2.791, "step": 208760 }, { "epoch": 8.99254856355257, "learning_rate": 4.943814777306126e-07, "loss": 2.8373, "step": 208780 }, { "epoch": 8.993410001292157, "learning_rate": 4.943329957551337e-07, "loss": 2.6809, "step": 208800 }, { "epoch": 8.994271439031744, "learning_rate": 4.942845137796548e-07, "loss": 2.7167, "step": 208820 }, { "epoch": 8.995132876771331, "learning_rate": 4.942360318041758e-07, "loss": 2.772, "step": 208840 }, { "epoch": 8.995994314510918, "learning_rate": 4.94187549828697e-07, "loss": 2.8149, "step": 208860 }, { "epoch": 8.996855752250505, "learning_rate": 4.941390678532181e-07, "loss": 2.8174, "step": 208880 }, { "epoch": 8.997717189990093, "learning_rate": 4.940905858777393e-07, "loss": 2.907, "step": 208900 }, { "epoch": 8.998578627729682, "learning_rate": 4.940421039022603e-07, "loss": 2.9478, "step": 208920 }, { "epoch": 8.999440065469269, "learning_rate": 4.939936219267814e-07, "loss": 2.9048, "step": 208940 }, { "epoch": 9.000301503208856, "learning_rate": 4.939451399513025e-07, "loss": 2.8208, "step": 208960 }, { "epoch": 9.001162940948443, "learning_rate": 4.938966579758237e-07, "loss": 2.7121, "step": 208980 }, { "epoch": 9.00202437868803, "learning_rate": 4.938481760003447e-07, "loss": 2.8119, "step": 209000 }, { "epoch": 9.002885816427618, "learning_rate": 4.937996940248659e-07, "loss": 2.8549, "step": 209020 }, { "epoch": 9.003747254167205, "learning_rate": 4.93751212049387e-07, "loss": 2.8284, "step": 209040 }, { "epoch": 9.004608691906792, "learning_rate": 4.937027300739081e-07, "loss": 2.7087, "step": 209060 }, { "epoch": 9.00547012964638, "learning_rate": 4.936542480984291e-07, "loss": 2.8894, "step": 209080 }, { "epoch": 9.006331567385967, "learning_rate": 4.936057661229503e-07, "loss": 2.697, "step": 209100 }, { "epoch": 9.007193005125554, "learning_rate": 4.935572841474714e-07, "loss": 2.7267, "step": 209120 }, { "epoch": 9.008054442865141, "learning_rate": 4.935088021719926e-07, "loss": 2.8956, "step": 209140 }, { "epoch": 9.008915880604729, "learning_rate": 4.934603201965136e-07, "loss": 2.866, "step": 209160 }, { "epoch": 9.009777318344316, "learning_rate": 4.934118382210347e-07, "loss": 2.8339, "step": 209180 }, { "epoch": 9.010638756083903, "learning_rate": 4.933633562455558e-07, "loss": 2.6688, "step": 209200 }, { "epoch": 9.011500193823492, "learning_rate": 4.933148742700768e-07, "loss": 2.8458, "step": 209220 }, { "epoch": 9.01236163156308, "learning_rate": 4.93266392294598e-07, "loss": 2.7636, "step": 209240 }, { "epoch": 9.013223069302667, "learning_rate": 4.932179103191191e-07, "loss": 2.7693, "step": 209260 }, { "epoch": 9.014084507042254, "learning_rate": 4.931694283436403e-07, "loss": 2.8906, "step": 209280 }, { "epoch": 9.014945944781841, "learning_rate": 4.931209463681613e-07, "loss": 2.83, "step": 209300 }, { "epoch": 9.015807382521428, "learning_rate": 4.930724643926824e-07, "loss": 2.8182, "step": 209320 }, { "epoch": 9.016668820261016, "learning_rate": 4.930239824172035e-07, "loss": 2.9462, "step": 209340 }, { "epoch": 9.017530258000603, "learning_rate": 4.929755004417247e-07, "loss": 2.6256, "step": 209360 }, { "epoch": 9.01839169574019, "learning_rate": 4.929270184662457e-07, "loss": 2.813, "step": 209380 }, { "epoch": 9.019253133479777, "learning_rate": 4.928785364907669e-07, "loss": 2.6426, "step": 209400 }, { "epoch": 9.020114571219365, "learning_rate": 4.92830054515288e-07, "loss": 2.6395, "step": 209420 }, { "epoch": 9.020976008958952, "learning_rate": 4.92781572539809e-07, "loss": 2.8371, "step": 209440 }, { "epoch": 9.02183744669854, "learning_rate": 4.927330905643301e-07, "loss": 2.7289, "step": 209460 }, { "epoch": 9.022698884438126, "learning_rate": 4.926846085888513e-07, "loss": 2.6307, "step": 209480 }, { "epoch": 9.023560322177715, "learning_rate": 4.926361266133724e-07, "loss": 2.7825, "step": 209500 }, { "epoch": 9.024421759917303, "learning_rate": 4.925876446378935e-07, "loss": 2.8287, "step": 209520 }, { "epoch": 9.02528319765689, "learning_rate": 4.925391626624146e-07, "loss": 2.7915, "step": 209540 }, { "epoch": 9.026144635396477, "learning_rate": 4.924906806869357e-07, "loss": 2.7484, "step": 209560 }, { "epoch": 9.027006073136064, "learning_rate": 4.924421987114568e-07, "loss": 2.7852, "step": 209580 }, { "epoch": 9.027867510875652, "learning_rate": 4.923937167359779e-07, "loss": 2.7298, "step": 209600 }, { "epoch": 9.028728948615239, "learning_rate": 4.92345234760499e-07, "loss": 2.9529, "step": 209620 }, { "epoch": 9.029590386354826, "learning_rate": 4.922967527850202e-07, "loss": 2.7925, "step": 209640 }, { "epoch": 9.030451824094413, "learning_rate": 4.922482708095413e-07, "loss": 2.8614, "step": 209660 }, { "epoch": 9.031313261834, "learning_rate": 4.921997888340623e-07, "loss": 2.8169, "step": 209680 }, { "epoch": 9.032174699573588, "learning_rate": 4.921513068585834e-07, "loss": 2.7182, "step": 209700 }, { "epoch": 9.033036137313175, "learning_rate": 4.921028248831046e-07, "loss": 2.765, "step": 209720 }, { "epoch": 9.033897575052762, "learning_rate": 4.920543429076257e-07, "loss": 2.728, "step": 209740 }, { "epoch": 9.03475901279235, "learning_rate": 4.920058609321467e-07, "loss": 2.677, "step": 209760 }, { "epoch": 9.035620450531939, "learning_rate": 4.919573789566679e-07, "loss": 2.6246, "step": 209780 }, { "epoch": 9.036481888271526, "learning_rate": 4.91908896981189e-07, "loss": 2.7099, "step": 209800 }, { "epoch": 9.037343326011113, "learning_rate": 4.9186041500571e-07, "loss": 2.7465, "step": 209820 }, { "epoch": 9.0382047637507, "learning_rate": 4.918119330302311e-07, "loss": 2.8671, "step": 209840 }, { "epoch": 9.039066201490288, "learning_rate": 4.917634510547523e-07, "loss": 2.7427, "step": 209860 }, { "epoch": 9.039927639229875, "learning_rate": 4.917149690792734e-07, "loss": 2.8203, "step": 209880 }, { "epoch": 9.040789076969462, "learning_rate": 4.916664871037945e-07, "loss": 2.7269, "step": 209900 }, { "epoch": 9.04165051470905, "learning_rate": 4.916180051283156e-07, "loss": 2.9496, "step": 209920 }, { "epoch": 9.042511952448637, "learning_rate": 4.915695231528367e-07, "loss": 2.7703, "step": 209940 }, { "epoch": 9.043373390188224, "learning_rate": 4.915210411773578e-07, "loss": 2.7397, "step": 209960 }, { "epoch": 9.044234827927811, "learning_rate": 4.914725592018789e-07, "loss": 2.7152, "step": 209980 }, { "epoch": 9.045096265667398, "learning_rate": 4.914240772264e-07, "loss": 2.6606, "step": 210000 }, { "epoch": 9.045957703406986, "learning_rate": 4.913755952509212e-07, "loss": 2.8625, "step": 210020 }, { "epoch": 9.046819141146573, "learning_rate": 4.913271132754423e-07, "loss": 2.909, "step": 210040 }, { "epoch": 9.047680578886162, "learning_rate": 4.912786312999633e-07, "loss": 2.7786, "step": 210060 }, { "epoch": 9.048542016625749, "learning_rate": 4.912301493244844e-07, "loss": 2.829, "step": 210080 }, { "epoch": 9.049403454365336, "learning_rate": 4.911816673490056e-07, "loss": 2.8775, "step": 210100 }, { "epoch": 9.050264892104924, "learning_rate": 4.911331853735266e-07, "loss": 2.664, "step": 210120 }, { "epoch": 9.05112632984451, "learning_rate": 4.910847033980478e-07, "loss": 2.7338, "step": 210140 }, { "epoch": 9.051987767584098, "learning_rate": 4.910362214225689e-07, "loss": 2.6413, "step": 210160 }, { "epoch": 9.052849205323685, "learning_rate": 4.9098773944709e-07, "loss": 2.7625, "step": 210180 }, { "epoch": 9.053710643063273, "learning_rate": 4.90939257471611e-07, "loss": 2.7116, "step": 210200 }, { "epoch": 9.05457208080286, "learning_rate": 4.908907754961322e-07, "loss": 2.7952, "step": 210220 }, { "epoch": 9.055433518542447, "learning_rate": 4.908422935206533e-07, "loss": 2.7026, "step": 210240 }, { "epoch": 9.056294956282034, "learning_rate": 4.907938115451745e-07, "loss": 2.7441, "step": 210260 }, { "epoch": 9.057156394021622, "learning_rate": 4.907453295696955e-07, "loss": 2.8, "step": 210280 }, { "epoch": 9.058017831761209, "learning_rate": 4.906968475942166e-07, "loss": 2.6163, "step": 210300 }, { "epoch": 9.058879269500796, "learning_rate": 4.906483656187377e-07, "loss": 2.8324, "step": 210320 }, { "epoch": 9.059740707240385, "learning_rate": 4.905998836432587e-07, "loss": 2.8528, "step": 210340 }, { "epoch": 9.060602144979972, "learning_rate": 4.905514016677799e-07, "loss": 2.7707, "step": 210360 }, { "epoch": 9.06146358271956, "learning_rate": 4.90502919692301e-07, "loss": 2.7224, "step": 210380 }, { "epoch": 9.062325020459147, "learning_rate": 4.904544377168222e-07, "loss": 2.8389, "step": 210400 }, { "epoch": 9.063186458198734, "learning_rate": 4.904059557413432e-07, "loss": 2.7061, "step": 210420 }, { "epoch": 9.064047895938321, "learning_rate": 4.903574737658643e-07, "loss": 2.7977, "step": 210440 }, { "epoch": 9.064909333677909, "learning_rate": 4.903089917903854e-07, "loss": 2.992, "step": 210460 }, { "epoch": 9.065770771417496, "learning_rate": 4.902605098149066e-07, "loss": 3.0235, "step": 210480 }, { "epoch": 9.066632209157083, "learning_rate": 4.902120278394276e-07, "loss": 3.007, "step": 210500 }, { "epoch": 9.06749364689667, "learning_rate": 4.901635458639488e-07, "loss": 2.8629, "step": 210520 }, { "epoch": 9.068355084636258, "learning_rate": 4.901150638884699e-07, "loss": 2.8134, "step": 210540 }, { "epoch": 9.069216522375845, "learning_rate": 4.90066581912991e-07, "loss": 2.7196, "step": 210560 }, { "epoch": 9.070077960115432, "learning_rate": 4.90018099937512e-07, "loss": 2.7868, "step": 210580 }, { "epoch": 9.07093939785502, "learning_rate": 4.899696179620332e-07, "loss": 2.8761, "step": 210600 }, { "epoch": 9.071800835594608, "learning_rate": 4.899211359865543e-07, "loss": 2.74, "step": 210620 }, { "epoch": 9.072662273334196, "learning_rate": 4.898726540110755e-07, "loss": 2.7413, "step": 210640 }, { "epoch": 9.073523711073783, "learning_rate": 4.898241720355965e-07, "loss": 2.8491, "step": 210660 }, { "epoch": 9.07438514881337, "learning_rate": 4.897756900601176e-07, "loss": 2.8067, "step": 210680 }, { "epoch": 9.075246586552957, "learning_rate": 4.897272080846387e-07, "loss": 2.9919, "step": 210700 }, { "epoch": 9.076108024292544, "learning_rate": 4.8967872610916e-07, "loss": 2.7304, "step": 210720 }, { "epoch": 9.076969462032132, "learning_rate": 4.896302441336809e-07, "loss": 2.8144, "step": 210740 }, { "epoch": 9.077830899771719, "learning_rate": 4.895817621582021e-07, "loss": 2.6616, "step": 210760 }, { "epoch": 9.078692337511306, "learning_rate": 4.895332801827232e-07, "loss": 2.7471, "step": 210780 }, { "epoch": 9.079553775250893, "learning_rate": 4.894847982072442e-07, "loss": 2.6589, "step": 210800 }, { "epoch": 9.08041521299048, "learning_rate": 4.894363162317653e-07, "loss": 2.7406, "step": 210820 }, { "epoch": 9.081276650730068, "learning_rate": 4.893878342562864e-07, "loss": 2.7401, "step": 210840 }, { "epoch": 9.082138088469655, "learning_rate": 4.893393522808076e-07, "loss": 2.7378, "step": 210860 }, { "epoch": 9.082999526209242, "learning_rate": 4.892908703053286e-07, "loss": 2.8288, "step": 210880 }, { "epoch": 9.083860963948831, "learning_rate": 4.892423883298498e-07, "loss": 2.682, "step": 210900 }, { "epoch": 9.084722401688419, "learning_rate": 4.891939063543709e-07, "loss": 2.8812, "step": 210920 }, { "epoch": 9.085583839428006, "learning_rate": 4.89145424378892e-07, "loss": 2.7509, "step": 210940 }, { "epoch": 9.086445277167593, "learning_rate": 4.89096942403413e-07, "loss": 2.6837, "step": 210960 }, { "epoch": 9.08730671490718, "learning_rate": 4.890484604279343e-07, "loss": 2.7617, "step": 210980 }, { "epoch": 9.088168152646768, "learning_rate": 4.889999784524553e-07, "loss": 2.7113, "step": 211000 }, { "epoch": 9.089029590386355, "learning_rate": 4.889514964769764e-07, "loss": 2.8437, "step": 211020 }, { "epoch": 9.089891028125942, "learning_rate": 4.889030145014975e-07, "loss": 2.7288, "step": 211040 }, { "epoch": 9.09075246586553, "learning_rate": 4.888545325260186e-07, "loss": 2.9409, "step": 211060 }, { "epoch": 9.091613903605117, "learning_rate": 4.888060505505397e-07, "loss": 2.7346, "step": 211080 }, { "epoch": 9.092475341344704, "learning_rate": 4.88757568575061e-07, "loss": 2.7377, "step": 211100 }, { "epoch": 9.093336779084291, "learning_rate": 4.887090865995819e-07, "loss": 2.8065, "step": 211120 }, { "epoch": 9.094198216823878, "learning_rate": 4.886606046241031e-07, "loss": 2.7825, "step": 211140 }, { "epoch": 9.095059654563466, "learning_rate": 4.886121226486242e-07, "loss": 2.9028, "step": 211160 }, { "epoch": 9.095921092303055, "learning_rate": 4.885636406731452e-07, "loss": 2.9024, "step": 211180 }, { "epoch": 9.096782530042642, "learning_rate": 4.885151586976663e-07, "loss": 2.7885, "step": 211200 }, { "epoch": 9.09764396778223, "learning_rate": 4.884666767221876e-07, "loss": 2.8232, "step": 211220 }, { "epoch": 9.098505405521816, "learning_rate": 4.884181947467086e-07, "loss": 2.8803, "step": 211240 }, { "epoch": 9.099366843261404, "learning_rate": 4.883697127712297e-07, "loss": 2.75, "step": 211260 }, { "epoch": 9.100228281000991, "learning_rate": 4.883212307957508e-07, "loss": 2.6685, "step": 211280 }, { "epoch": 9.101089718740578, "learning_rate": 4.882727488202719e-07, "loss": 2.7195, "step": 211300 }, { "epoch": 9.101951156480165, "learning_rate": 4.882242668447929e-07, "loss": 2.6886, "step": 211320 }, { "epoch": 9.102812594219753, "learning_rate": 4.881757848693142e-07, "loss": 2.6708, "step": 211340 }, { "epoch": 9.10367403195934, "learning_rate": 4.881273028938353e-07, "loss": 2.77, "step": 211360 }, { "epoch": 9.104535469698927, "learning_rate": 4.880788209183563e-07, "loss": 2.8255, "step": 211380 }, { "epoch": 9.105396907438514, "learning_rate": 4.880303389428774e-07, "loss": 2.88, "step": 211400 }, { "epoch": 9.106258345178102, "learning_rate": 4.879818569673985e-07, "loss": 2.8699, "step": 211420 }, { "epoch": 9.107119782917689, "learning_rate": 4.879333749919196e-07, "loss": 2.8845, "step": 211440 }, { "epoch": 9.107981220657276, "learning_rate": 4.878848930164407e-07, "loss": 2.6452, "step": 211460 }, { "epoch": 9.108842658396865, "learning_rate": 4.878364110409619e-07, "loss": 2.7368, "step": 211480 }, { "epoch": 9.109704096136452, "learning_rate": 4.877879290654829e-07, "loss": 2.6705, "step": 211500 }, { "epoch": 9.11056553387604, "learning_rate": 4.877394470900041e-07, "loss": 2.9266, "step": 211520 }, { "epoch": 9.111426971615627, "learning_rate": 4.876909651145252e-07, "loss": 2.7933, "step": 211540 }, { "epoch": 9.112288409355214, "learning_rate": 4.876424831390462e-07, "loss": 2.8466, "step": 211560 }, { "epoch": 9.113149847094801, "learning_rate": 4.875940011635673e-07, "loss": 2.6953, "step": 211580 }, { "epoch": 9.114011284834389, "learning_rate": 4.875455191880886e-07, "loss": 2.7673, "step": 211600 }, { "epoch": 9.114872722573976, "learning_rate": 4.874970372126097e-07, "loss": 2.7531, "step": 211620 }, { "epoch": 9.115734160313563, "learning_rate": 4.874485552371307e-07, "loss": 2.7172, "step": 211640 }, { "epoch": 9.11659559805315, "learning_rate": 4.874000732616518e-07, "loss": 2.9402, "step": 211660 }, { "epoch": 9.117457035792738, "learning_rate": 4.873515912861729e-07, "loss": 2.8385, "step": 211680 }, { "epoch": 9.118318473532325, "learning_rate": 4.873031093106939e-07, "loss": 2.6852, "step": 211700 }, { "epoch": 9.119179911271912, "learning_rate": 4.872546273352152e-07, "loss": 2.8735, "step": 211720 }, { "epoch": 9.1200413490115, "learning_rate": 4.872061453597363e-07, "loss": 2.7933, "step": 211740 }, { "epoch": 9.120902786751088, "learning_rate": 4.871576633842574e-07, "loss": 2.9273, "step": 211760 }, { "epoch": 9.121764224490676, "learning_rate": 4.871091814087784e-07, "loss": 2.7936, "step": 211780 }, { "epoch": 9.122625662230263, "learning_rate": 4.870606994332996e-07, "loss": 2.6287, "step": 211800 }, { "epoch": 9.12348709996985, "learning_rate": 4.870122174578206e-07, "loss": 2.8238, "step": 211820 }, { "epoch": 9.124348537709437, "learning_rate": 4.869637354823419e-07, "loss": 2.7757, "step": 211840 }, { "epoch": 9.125209975449025, "learning_rate": 4.869152535068629e-07, "loss": 2.7307, "step": 211860 }, { "epoch": 9.126071413188612, "learning_rate": 4.86866771531384e-07, "loss": 2.7887, "step": 211880 }, { "epoch": 9.126932850928199, "learning_rate": 4.868182895559051e-07, "loss": 2.8678, "step": 211900 }, { "epoch": 9.127794288667786, "learning_rate": 4.867698075804261e-07, "loss": 2.7273, "step": 211920 }, { "epoch": 9.128655726407374, "learning_rate": 4.867213256049472e-07, "loss": 2.7432, "step": 211940 }, { "epoch": 9.12951716414696, "learning_rate": 4.866728436294683e-07, "loss": 2.8012, "step": 211960 }, { "epoch": 9.130378601886548, "learning_rate": 4.866243616539896e-07, "loss": 2.9162, "step": 211980 }, { "epoch": 9.131240039626135, "learning_rate": 4.865758796785106e-07, "loss": 2.98, "step": 212000 }, { "epoch": 9.132101477365723, "learning_rate": 4.865273977030317e-07, "loss": 2.7121, "step": 212020 }, { "epoch": 9.132962915105312, "learning_rate": 4.864789157275528e-07, "loss": 2.8153, "step": 212040 }, { "epoch": 9.133824352844899, "learning_rate": 4.86430433752074e-07, "loss": 2.7326, "step": 212060 }, { "epoch": 9.134685790584486, "learning_rate": 4.863819517765949e-07, "loss": 2.8942, "step": 212080 }, { "epoch": 9.135547228324073, "learning_rate": 4.863334698011162e-07, "loss": 2.8813, "step": 212100 }, { "epoch": 9.13640866606366, "learning_rate": 4.862849878256373e-07, "loss": 2.7346, "step": 212120 }, { "epoch": 9.137270103803248, "learning_rate": 4.862365058501584e-07, "loss": 2.8459, "step": 212140 }, { "epoch": 9.138131541542835, "learning_rate": 4.861880238746794e-07, "loss": 2.808, "step": 212160 }, { "epoch": 9.138992979282422, "learning_rate": 4.861395418992006e-07, "loss": 2.7127, "step": 212180 }, { "epoch": 9.13985441702201, "learning_rate": 4.860910599237216e-07, "loss": 2.8106, "step": 212200 }, { "epoch": 9.140715854761597, "learning_rate": 4.860425779482429e-07, "loss": 2.9485, "step": 212220 }, { "epoch": 9.141577292501184, "learning_rate": 4.859940959727639e-07, "loss": 2.7833, "step": 212240 }, { "epoch": 9.142438730240771, "learning_rate": 4.85945613997285e-07, "loss": 2.7181, "step": 212260 }, { "epoch": 9.143300167980358, "learning_rate": 4.858971320218061e-07, "loss": 2.8519, "step": 212280 }, { "epoch": 9.144161605719946, "learning_rate": 4.858486500463272e-07, "loss": 2.7646, "step": 212300 }, { "epoch": 9.145023043459535, "learning_rate": 4.858001680708482e-07, "loss": 2.6661, "step": 212320 }, { "epoch": 9.145884481199122, "learning_rate": 4.857516860953695e-07, "loss": 2.8405, "step": 212340 }, { "epoch": 9.14674591893871, "learning_rate": 4.857032041198906e-07, "loss": 2.7817, "step": 212360 }, { "epoch": 9.147607356678296, "learning_rate": 4.856547221444116e-07, "loss": 2.7087, "step": 212380 }, { "epoch": 9.148468794417884, "learning_rate": 4.856062401689327e-07, "loss": 2.7313, "step": 212400 }, { "epoch": 9.149330232157471, "learning_rate": 4.855577581934539e-07, "loss": 2.7778, "step": 212420 }, { "epoch": 9.150191669897058, "learning_rate": 4.85509276217975e-07, "loss": 2.686, "step": 212440 }, { "epoch": 9.151053107636645, "learning_rate": 4.854607942424959e-07, "loss": 2.7028, "step": 212460 }, { "epoch": 9.151914545376233, "learning_rate": 4.854123122670172e-07, "loss": 2.838, "step": 212480 }, { "epoch": 9.15277598311582, "learning_rate": 4.853638302915383e-07, "loss": 2.7864, "step": 212500 }, { "epoch": 9.153637420855407, "learning_rate": 4.853153483160594e-07, "loss": 2.7843, "step": 212520 }, { "epoch": 9.154498858594994, "learning_rate": 4.852668663405804e-07, "loss": 2.8295, "step": 212540 }, { "epoch": 9.155360296334582, "learning_rate": 4.852183843651016e-07, "loss": 2.6963, "step": 212560 }, { "epoch": 9.156221734074169, "learning_rate": 4.851699023896226e-07, "loss": 2.8218, "step": 212580 }, { "epoch": 9.157083171813758, "learning_rate": 4.851214204141439e-07, "loss": 2.8548, "step": 212600 }, { "epoch": 9.157944609553345, "learning_rate": 4.850729384386649e-07, "loss": 3.0033, "step": 212620 }, { "epoch": 9.158806047292932, "learning_rate": 4.85024456463186e-07, "loss": 2.7761, "step": 212640 }, { "epoch": 9.15966748503252, "learning_rate": 4.849759744877071e-07, "loss": 2.7804, "step": 212660 }, { "epoch": 9.160528922772107, "learning_rate": 4.849274925122282e-07, "loss": 2.7053, "step": 212680 }, { "epoch": 9.161390360511694, "learning_rate": 4.848790105367493e-07, "loss": 2.7786, "step": 212700 }, { "epoch": 9.162251798251281, "learning_rate": 4.848305285612705e-07, "loss": 2.8218, "step": 212720 }, { "epoch": 9.163113235990869, "learning_rate": 4.847820465857916e-07, "loss": 2.7728, "step": 212740 }, { "epoch": 9.163974673730456, "learning_rate": 4.847335646103126e-07, "loss": 2.8223, "step": 212760 }, { "epoch": 9.164836111470043, "learning_rate": 4.846850826348337e-07, "loss": 2.9852, "step": 212780 }, { "epoch": 9.16569754920963, "learning_rate": 4.846366006593549e-07, "loss": 2.7812, "step": 212800 }, { "epoch": 9.166558986949218, "learning_rate": 4.84588118683876e-07, "loss": 2.7516, "step": 212820 }, { "epoch": 9.167420424688805, "learning_rate": 4.845396367083971e-07, "loss": 2.7151, "step": 212840 }, { "epoch": 9.168281862428392, "learning_rate": 4.844911547329182e-07, "loss": 2.8112, "step": 212860 }, { "epoch": 9.169143300167981, "learning_rate": 4.844426727574393e-07, "loss": 2.6776, "step": 212880 }, { "epoch": 9.170004737907568, "learning_rate": 4.843941907819603e-07, "loss": 2.9367, "step": 212900 }, { "epoch": 9.170866175647156, "learning_rate": 4.843457088064815e-07, "loss": 2.6525, "step": 212920 }, { "epoch": 9.171727613386743, "learning_rate": 4.842972268310026e-07, "loss": 2.731, "step": 212940 }, { "epoch": 9.17258905112633, "learning_rate": 4.842487448555238e-07, "loss": 2.7802, "step": 212960 }, { "epoch": 9.173450488865917, "learning_rate": 4.842002628800448e-07, "loss": 2.763, "step": 212980 }, { "epoch": 9.174311926605505, "learning_rate": 4.841517809045659e-07, "loss": 2.652, "step": 213000 }, { "epoch": 9.175173364345092, "learning_rate": 4.84103298929087e-07, "loss": 2.8239, "step": 213020 }, { "epoch": 9.17603480208468, "learning_rate": 4.840548169536081e-07, "loss": 2.8207, "step": 213040 }, { "epoch": 9.176896239824266, "learning_rate": 4.840063349781292e-07, "loss": 2.7807, "step": 213060 }, { "epoch": 9.177757677563854, "learning_rate": 4.839578530026503e-07, "loss": 2.8629, "step": 213080 }, { "epoch": 9.17861911530344, "learning_rate": 4.839093710271715e-07, "loss": 2.7593, "step": 213100 }, { "epoch": 9.179480553043028, "learning_rate": 4.838608890516926e-07, "loss": 3.0015, "step": 213120 }, { "epoch": 9.180341990782615, "learning_rate": 4.838124070762136e-07, "loss": 2.839, "step": 213140 }, { "epoch": 9.181203428522204, "learning_rate": 4.837639251007347e-07, "loss": 2.7609, "step": 213160 }, { "epoch": 9.182064866261792, "learning_rate": 4.837154431252559e-07, "loss": 2.7606, "step": 213180 }, { "epoch": 9.182926304001379, "learning_rate": 4.83666961149777e-07, "loss": 2.8152, "step": 213200 }, { "epoch": 9.183787741740966, "learning_rate": 4.836184791742981e-07, "loss": 2.8119, "step": 213220 }, { "epoch": 9.184649179480553, "learning_rate": 4.835699971988192e-07, "loss": 2.7964, "step": 213240 }, { "epoch": 9.18551061722014, "learning_rate": 4.835215152233403e-07, "loss": 2.6985, "step": 213260 }, { "epoch": 9.186372054959728, "learning_rate": 4.834730332478613e-07, "loss": 2.7612, "step": 213280 }, { "epoch": 9.187233492699315, "learning_rate": 4.834245512723825e-07, "loss": 2.9069, "step": 213300 }, { "epoch": 9.188094930438902, "learning_rate": 4.833760692969036e-07, "loss": 2.9639, "step": 213320 }, { "epoch": 9.18895636817849, "learning_rate": 4.833275873214248e-07, "loss": 2.6816, "step": 213340 }, { "epoch": 9.189817805918077, "learning_rate": 4.832791053459458e-07, "loss": 2.9023, "step": 213360 }, { "epoch": 9.190679243657664, "learning_rate": 4.832306233704669e-07, "loss": 2.8334, "step": 213380 }, { "epoch": 9.191540681397251, "learning_rate": 4.83182141394988e-07, "loss": 2.7348, "step": 213400 }, { "epoch": 9.192402119136839, "learning_rate": 4.831336594195092e-07, "loss": 2.8176, "step": 213420 }, { "epoch": 9.193263556876428, "learning_rate": 4.830851774440302e-07, "loss": 2.7926, "step": 213440 }, { "epoch": 9.194124994616015, "learning_rate": 4.830366954685514e-07, "loss": 2.7795, "step": 213460 }, { "epoch": 9.194986432355602, "learning_rate": 4.829882134930725e-07, "loss": 2.9301, "step": 213480 }, { "epoch": 9.19584787009519, "learning_rate": 4.829397315175936e-07, "loss": 2.742, "step": 213500 }, { "epoch": 9.196709307834777, "learning_rate": 4.828912495421146e-07, "loss": 2.8339, "step": 213520 }, { "epoch": 9.197570745574364, "learning_rate": 4.828427675666357e-07, "loss": 2.756, "step": 213540 }, { "epoch": 9.198432183313951, "learning_rate": 4.827942855911569e-07, "loss": 2.6859, "step": 213560 }, { "epoch": 9.199293621053538, "learning_rate": 4.827458036156779e-07, "loss": 2.6961, "step": 213580 }, { "epoch": 9.200155058793126, "learning_rate": 4.826973216401991e-07, "loss": 2.655, "step": 213600 }, { "epoch": 9.201016496532713, "learning_rate": 4.826488396647202e-07, "loss": 2.773, "step": 213620 }, { "epoch": 9.2018779342723, "learning_rate": 4.826003576892413e-07, "loss": 2.7564, "step": 213640 }, { "epoch": 9.202739372011887, "learning_rate": 4.825518757137623e-07, "loss": 2.7459, "step": 213660 }, { "epoch": 9.203600809751475, "learning_rate": 4.825033937382835e-07, "loss": 2.6344, "step": 213680 }, { "epoch": 9.204462247491062, "learning_rate": 4.824549117628046e-07, "loss": 2.7902, "step": 213700 }, { "epoch": 9.20532368523065, "learning_rate": 4.824064297873258e-07, "loss": 2.8005, "step": 213720 }, { "epoch": 9.206185122970238, "learning_rate": 4.823579478118468e-07, "loss": 2.7273, "step": 213740 }, { "epoch": 9.207046560709825, "learning_rate": 4.823094658363679e-07, "loss": 2.9303, "step": 213760 }, { "epoch": 9.207907998449413, "learning_rate": 4.82260983860889e-07, "loss": 2.7787, "step": 213780 }, { "epoch": 9.208769436189, "learning_rate": 4.822125018854102e-07, "loss": 2.7766, "step": 213800 }, { "epoch": 9.209630873928587, "learning_rate": 4.821640199099312e-07, "loss": 2.6748, "step": 213820 }, { "epoch": 9.210492311668174, "learning_rate": 4.821155379344524e-07, "loss": 2.7941, "step": 213840 }, { "epoch": 9.211353749407762, "learning_rate": 4.820670559589735e-07, "loss": 2.7436, "step": 213860 }, { "epoch": 9.212215187147349, "learning_rate": 4.820185739834945e-07, "loss": 2.8231, "step": 213880 }, { "epoch": 9.213076624886936, "learning_rate": 4.819700920080156e-07, "loss": 2.9088, "step": 213900 }, { "epoch": 9.213938062626523, "learning_rate": 4.819216100325368e-07, "loss": 2.7446, "step": 213920 }, { "epoch": 9.21479950036611, "learning_rate": 4.818731280570579e-07, "loss": 2.7752, "step": 213940 }, { "epoch": 9.215660938105698, "learning_rate": 4.81824646081579e-07, "loss": 2.7244, "step": 213960 }, { "epoch": 9.216522375845285, "learning_rate": 4.817761641061001e-07, "loss": 2.7014, "step": 213980 }, { "epoch": 9.217383813584874, "learning_rate": 4.817276821306212e-07, "loss": 2.809, "step": 214000 }, { "epoch": 9.218245251324461, "learning_rate": 4.816792001551423e-07, "loss": 2.8281, "step": 214020 }, { "epoch": 9.219106689064049, "learning_rate": 4.816307181796634e-07, "loss": 2.7548, "step": 214040 }, { "epoch": 9.219968126803636, "learning_rate": 4.815822362041845e-07, "loss": 2.8694, "step": 214060 }, { "epoch": 9.220829564543223, "learning_rate": 4.815337542287056e-07, "loss": 2.6885, "step": 214080 }, { "epoch": 9.22169100228281, "learning_rate": 4.814852722532268e-07, "loss": 2.7604, "step": 214100 }, { "epoch": 9.222552440022397, "learning_rate": 4.814367902777478e-07, "loss": 2.7137, "step": 214120 }, { "epoch": 9.223413877761985, "learning_rate": 4.813883083022689e-07, "loss": 2.7509, "step": 214140 }, { "epoch": 9.224275315501572, "learning_rate": 4.8133982632679e-07, "loss": 2.834, "step": 214160 }, { "epoch": 9.22513675324116, "learning_rate": 4.812913443513111e-07, "loss": 2.6781, "step": 214180 }, { "epoch": 9.225998190980746, "learning_rate": 4.812428623758322e-07, "loss": 2.6976, "step": 214200 }, { "epoch": 9.226859628720334, "learning_rate": 4.811943804003534e-07, "loss": 2.6986, "step": 214220 }, { "epoch": 9.227721066459921, "learning_rate": 4.811458984248745e-07, "loss": 2.8735, "step": 214240 }, { "epoch": 9.228582504199508, "learning_rate": 4.810974164493955e-07, "loss": 2.8408, "step": 214260 }, { "epoch": 9.229443941939095, "learning_rate": 4.810489344739166e-07, "loss": 2.8558, "step": 214280 }, { "epoch": 9.230305379678684, "learning_rate": 4.810004524984378e-07, "loss": 2.8054, "step": 214300 }, { "epoch": 9.231166817418272, "learning_rate": 4.809519705229589e-07, "loss": 2.9913, "step": 214320 }, { "epoch": 9.232028255157859, "learning_rate": 4.8090348854748e-07, "loss": 2.6871, "step": 214340 }, { "epoch": 9.232889692897446, "learning_rate": 4.808550065720011e-07, "loss": 2.8645, "step": 214360 }, { "epoch": 9.233751130637033, "learning_rate": 4.808065245965222e-07, "loss": 2.9013, "step": 214380 }, { "epoch": 9.23461256837662, "learning_rate": 4.807580426210432e-07, "loss": 2.7107, "step": 214400 }, { "epoch": 9.235474006116208, "learning_rate": 4.807095606455644e-07, "loss": 2.8391, "step": 214420 }, { "epoch": 9.236335443855795, "learning_rate": 4.806610786700855e-07, "loss": 2.865, "step": 214440 }, { "epoch": 9.237196881595382, "learning_rate": 4.806125966946067e-07, "loss": 2.8037, "step": 214460 }, { "epoch": 9.23805831933497, "learning_rate": 4.805641147191277e-07, "loss": 2.8826, "step": 214480 }, { "epoch": 9.238919757074557, "learning_rate": 4.805156327436488e-07, "loss": 2.8241, "step": 214500 }, { "epoch": 9.239781194814144, "learning_rate": 4.804671507681699e-07, "loss": 2.7252, "step": 214520 }, { "epoch": 9.240642632553731, "learning_rate": 4.804186687926912e-07, "loss": 2.5828, "step": 214540 }, { "epoch": 9.24150407029332, "learning_rate": 4.803701868172121e-07, "loss": 2.7707, "step": 214560 }, { "epoch": 9.242365508032908, "learning_rate": 4.803217048417333e-07, "loss": 2.7082, "step": 214580 }, { "epoch": 9.243226945772495, "learning_rate": 4.802732228662544e-07, "loss": 2.9447, "step": 214600 }, { "epoch": 9.244088383512082, "learning_rate": 4.802247408907755e-07, "loss": 2.8645, "step": 214620 }, { "epoch": 9.24494982125167, "learning_rate": 4.801762589152965e-07, "loss": 2.7345, "step": 214640 }, { "epoch": 9.245811258991257, "learning_rate": 4.801277769398176e-07, "loss": 2.9838, "step": 214660 }, { "epoch": 9.246672696730844, "learning_rate": 4.800792949643388e-07, "loss": 2.7544, "step": 214680 }, { "epoch": 9.247534134470431, "learning_rate": 4.800308129888599e-07, "loss": 2.6974, "step": 214700 }, { "epoch": 9.248395572210018, "learning_rate": 4.79982331013381e-07, "loss": 2.7511, "step": 214720 }, { "epoch": 9.249257009949606, "learning_rate": 4.799338490379021e-07, "loss": 2.8622, "step": 214740 }, { "epoch": 9.250118447689193, "learning_rate": 4.798853670624232e-07, "loss": 2.8547, "step": 214760 }, { "epoch": 9.25097988542878, "learning_rate": 4.798368850869442e-07, "loss": 2.8388, "step": 214780 }, { "epoch": 9.251841323168367, "learning_rate": 4.797884031114654e-07, "loss": 2.7055, "step": 214800 }, { "epoch": 9.252702760907955, "learning_rate": 4.797399211359865e-07, "loss": 2.8674, "step": 214820 }, { "epoch": 9.253564198647542, "learning_rate": 4.796914391605077e-07, "loss": 2.7443, "step": 214840 }, { "epoch": 9.254425636387131, "learning_rate": 4.796429571850287e-07, "loss": 2.7057, "step": 214860 }, { "epoch": 9.255287074126718, "learning_rate": 4.795944752095498e-07, "loss": 2.8693, "step": 214880 }, { "epoch": 9.256148511866305, "learning_rate": 4.795459932340709e-07, "loss": 2.9006, "step": 214900 }, { "epoch": 9.257009949605893, "learning_rate": 4.794975112585922e-07, "loss": 2.7086, "step": 214920 }, { "epoch": 9.25787138734548, "learning_rate": 4.794490292831131e-07, "loss": 3.0234, "step": 214940 }, { "epoch": 9.258732825085067, "learning_rate": 4.794005473076343e-07, "loss": 2.8577, "step": 214960 }, { "epoch": 9.259594262824654, "learning_rate": 4.793520653321554e-07, "loss": 2.689, "step": 214980 }, { "epoch": 9.260455700564242, "learning_rate": 4.793035833566765e-07, "loss": 2.8257, "step": 215000 }, { "epoch": 9.261317138303829, "learning_rate": 4.792551013811975e-07, "loss": 2.8232, "step": 215020 }, { "epoch": 9.262178576043416, "learning_rate": 4.792066194057188e-07, "loss": 2.8038, "step": 215040 }, { "epoch": 9.263040013783003, "learning_rate": 4.791581374302398e-07, "loss": 2.8183, "step": 215060 }, { "epoch": 9.26390145152259, "learning_rate": 4.79109655454761e-07, "loss": 2.7844, "step": 215080 }, { "epoch": 9.264762889262178, "learning_rate": 4.79061173479282e-07, "loss": 2.8101, "step": 215100 }, { "epoch": 9.265624327001767, "learning_rate": 4.790126915038031e-07, "loss": 2.9274, "step": 215120 }, { "epoch": 9.266485764741354, "learning_rate": 4.789642095283242e-07, "loss": 2.7084, "step": 215140 }, { "epoch": 9.267347202480941, "learning_rate": 4.789157275528452e-07, "loss": 2.8471, "step": 215160 }, { "epoch": 9.268208640220529, "learning_rate": 4.788672455773665e-07, "loss": 2.6728, "step": 215180 }, { "epoch": 9.269070077960116, "learning_rate": 4.788187636018875e-07, "loss": 2.8401, "step": 215200 }, { "epoch": 9.269931515699703, "learning_rate": 4.787702816264087e-07, "loss": 2.8208, "step": 215220 }, { "epoch": 9.27079295343929, "learning_rate": 4.787217996509297e-07, "loss": 2.608, "step": 215240 }, { "epoch": 9.271654391178878, "learning_rate": 4.786733176754508e-07, "loss": 2.8091, "step": 215260 }, { "epoch": 9.272515828918465, "learning_rate": 4.786248356999719e-07, "loss": 2.9819, "step": 215280 }, { "epoch": 9.273377266658052, "learning_rate": 4.785763537244932e-07, "loss": 2.8235, "step": 215300 }, { "epoch": 9.27423870439764, "learning_rate": 4.785278717490141e-07, "loss": 2.8206, "step": 215320 }, { "epoch": 9.275100142137227, "learning_rate": 4.784793897735353e-07, "loss": 2.8924, "step": 215340 }, { "epoch": 9.275961579876814, "learning_rate": 4.784309077980564e-07, "loss": 2.6921, "step": 215360 }, { "epoch": 9.276823017616401, "learning_rate": 4.783824258225774e-07, "loss": 2.7133, "step": 215380 }, { "epoch": 9.277684455355988, "learning_rate": 4.783339438470985e-07, "loss": 2.6839, "step": 215400 }, { "epoch": 9.278545893095577, "learning_rate": 4.782854618716198e-07, "loss": 2.7344, "step": 215420 }, { "epoch": 9.279407330835165, "learning_rate": 4.782369798961408e-07, "loss": 2.7001, "step": 215440 }, { "epoch": 9.280268768574752, "learning_rate": 4.781884979206619e-07, "loss": 2.7773, "step": 215460 }, { "epoch": 9.281130206314339, "learning_rate": 4.78140015945183e-07, "loss": 2.8666, "step": 215480 }, { "epoch": 9.281991644053926, "learning_rate": 4.780915339697041e-07, "loss": 2.7814, "step": 215500 }, { "epoch": 9.282853081793514, "learning_rate": 4.780430519942252e-07, "loss": 2.6307, "step": 215520 }, { "epoch": 9.2837145195331, "learning_rate": 4.779945700187464e-07, "loss": 2.8385, "step": 215540 }, { "epoch": 9.284575957272688, "learning_rate": 4.779460880432675e-07, "loss": 2.9439, "step": 215560 }, { "epoch": 9.285437395012275, "learning_rate": 4.778976060677886e-07, "loss": 2.8195, "step": 215580 }, { "epoch": 9.286298832751863, "learning_rate": 4.778491240923097e-07, "loss": 2.8578, "step": 215600 }, { "epoch": 9.28716027049145, "learning_rate": 4.778006421168308e-07, "loss": 2.8657, "step": 215620 }, { "epoch": 9.288021708231037, "learning_rate": 4.777521601413518e-07, "loss": 2.8134, "step": 215640 }, { "epoch": 9.288883145970624, "learning_rate": 4.777036781658731e-07, "loss": 2.8343, "step": 215660 }, { "epoch": 9.289744583710212, "learning_rate": 4.776551961903942e-07, "loss": 2.656, "step": 215680 }, { "epoch": 9.2906060214498, "learning_rate": 4.776067142149151e-07, "loss": 2.6626, "step": 215700 }, { "epoch": 9.291467459189388, "learning_rate": 4.775582322394363e-07, "loss": 2.8446, "step": 215720 }, { "epoch": 9.292328896928975, "learning_rate": 4.775097502639574e-07, "loss": 2.7615, "step": 215740 }, { "epoch": 9.293190334668562, "learning_rate": 4.774612682884784e-07, "loss": 2.8748, "step": 215760 }, { "epoch": 9.29405177240815, "learning_rate": 4.774127863129995e-07, "loss": 2.9536, "step": 215780 }, { "epoch": 9.294913210147737, "learning_rate": 4.773643043375208e-07, "loss": 2.6563, "step": 215800 }, { "epoch": 9.295774647887324, "learning_rate": 4.773158223620418e-07, "loss": 2.7203, "step": 215820 }, { "epoch": 9.296636085626911, "learning_rate": 4.772673403865629e-07, "loss": 2.7041, "step": 215840 }, { "epoch": 9.297497523366498, "learning_rate": 4.77218858411084e-07, "loss": 2.6837, "step": 215860 }, { "epoch": 9.298358961106086, "learning_rate": 4.771703764356051e-07, "loss": 2.9107, "step": 215880 }, { "epoch": 9.299220398845673, "learning_rate": 4.771218944601262e-07, "loss": 2.6808, "step": 215900 }, { "epoch": 9.30008183658526, "learning_rate": 4.770734124846474e-07, "loss": 2.7624, "step": 215920 }, { "epoch": 9.300943274324847, "learning_rate": 4.770249305091685e-07, "loss": 2.7494, "step": 215940 }, { "epoch": 9.301804712064435, "learning_rate": 4.769764485336896e-07, "loss": 2.7703, "step": 215960 }, { "epoch": 9.302666149804024, "learning_rate": 4.769279665582107e-07, "loss": 2.8778, "step": 215980 }, { "epoch": 9.303527587543611, "learning_rate": 4.768794845827318e-07, "loss": 2.7031, "step": 216000 }, { "epoch": 9.304389025283198, "learning_rate": 4.7683100260725285e-07, "loss": 2.6789, "step": 216020 }, { "epoch": 9.305250463022785, "learning_rate": 4.76782520631774e-07, "loss": 2.6724, "step": 216040 }, { "epoch": 9.306111900762373, "learning_rate": 4.767340386562951e-07, "loss": 2.9111, "step": 216060 }, { "epoch": 9.30697333850196, "learning_rate": 4.766855566808162e-07, "loss": 2.8785, "step": 216080 }, { "epoch": 9.307834776241547, "learning_rate": 4.766370747053373e-07, "loss": 2.6516, "step": 216100 }, { "epoch": 9.308696213981134, "learning_rate": 4.7658859272985847e-07, "loss": 2.7743, "step": 216120 }, { "epoch": 9.309557651720722, "learning_rate": 4.765401107543795e-07, "loss": 2.7974, "step": 216140 }, { "epoch": 9.310419089460309, "learning_rate": 4.7649162877890066e-07, "loss": 2.8888, "step": 216160 }, { "epoch": 9.311280527199896, "learning_rate": 4.7644314680342174e-07, "loss": 2.6167, "step": 216180 }, { "epoch": 9.312141964939483, "learning_rate": 4.7639466482794286e-07, "loss": 2.66, "step": 216200 }, { "epoch": 9.31300340267907, "learning_rate": 4.7634618285246393e-07, "loss": 2.807, "step": 216220 }, { "epoch": 9.313864840418658, "learning_rate": 4.76297700876985e-07, "loss": 2.6777, "step": 216240 }, { "epoch": 9.314726278158247, "learning_rate": 4.762492189015062e-07, "loss": 2.7346, "step": 216260 }, { "epoch": 9.315587715897834, "learning_rate": 4.762007369260272e-07, "loss": 2.7119, "step": 216280 }, { "epoch": 9.316449153637421, "learning_rate": 4.761522549505484e-07, "loss": 2.6735, "step": 216300 }, { "epoch": 9.317310591377009, "learning_rate": 4.7610377297506945e-07, "loss": 2.9062, "step": 216320 }, { "epoch": 9.318172029116596, "learning_rate": 4.7605529099959057e-07, "loss": 2.7525, "step": 216340 }, { "epoch": 9.319033466856183, "learning_rate": 4.7600680902411164e-07, "loss": 2.6748, "step": 216360 }, { "epoch": 9.31989490459577, "learning_rate": 4.759583270486328e-07, "loss": 2.6888, "step": 216380 }, { "epoch": 9.320756342335358, "learning_rate": 4.7590984507315384e-07, "loss": 2.7734, "step": 216400 }, { "epoch": 9.321617780074945, "learning_rate": 4.75861363097675e-07, "loss": 2.9006, "step": 216420 }, { "epoch": 9.322479217814532, "learning_rate": 4.758128811221961e-07, "loss": 2.7669, "step": 216440 }, { "epoch": 9.32334065555412, "learning_rate": 4.757643991467172e-07, "loss": 2.8867, "step": 216460 }, { "epoch": 9.324202093293707, "learning_rate": 4.757159171712383e-07, "loss": 2.7095, "step": 216480 }, { "epoch": 9.325063531033294, "learning_rate": 4.7566743519575946e-07, "loss": 2.7579, "step": 216500 }, { "epoch": 9.325924968772881, "learning_rate": 4.756189532202805e-07, "loss": 2.9818, "step": 216520 }, { "epoch": 9.32678640651247, "learning_rate": 4.7557047124480166e-07, "loss": 2.6963, "step": 216540 }, { "epoch": 9.327647844252057, "learning_rate": 4.7552198926932273e-07, "loss": 2.8073, "step": 216560 }, { "epoch": 9.328509281991645, "learning_rate": 4.7547350729384385e-07, "loss": 2.7222, "step": 216580 }, { "epoch": 9.329370719731232, "learning_rate": 4.754250253183649e-07, "loss": 2.7137, "step": 216600 }, { "epoch": 9.33023215747082, "learning_rate": 4.753765433428861e-07, "loss": 2.8584, "step": 216620 }, { "epoch": 9.331093595210406, "learning_rate": 4.753280613674072e-07, "loss": 2.927, "step": 216640 }, { "epoch": 9.331955032949994, "learning_rate": 4.752795793919283e-07, "loss": 2.6638, "step": 216660 }, { "epoch": 9.33281647068958, "learning_rate": 4.7523109741644937e-07, "loss": 2.8273, "step": 216680 }, { "epoch": 9.333677908429168, "learning_rate": 4.7518261544097055e-07, "loss": 2.8107, "step": 216700 }, { "epoch": 9.334539346168755, "learning_rate": 4.7513413346549157e-07, "loss": 2.7368, "step": 216720 }, { "epoch": 9.335400783908343, "learning_rate": 4.7508565149001274e-07, "loss": 2.8687, "step": 216740 }, { "epoch": 9.33626222164793, "learning_rate": 4.750371695145338e-07, "loss": 2.8141, "step": 216760 }, { "epoch": 9.337123659387517, "learning_rate": 4.7498868753905483e-07, "loss": 2.7724, "step": 216780 }, { "epoch": 9.337985097127104, "learning_rate": 4.74940205563576e-07, "loss": 2.6339, "step": 216800 }, { "epoch": 9.338846534866693, "learning_rate": 4.748917235880971e-07, "loss": 2.7346, "step": 216820 }, { "epoch": 9.33970797260628, "learning_rate": 4.748432416126182e-07, "loss": 2.7309, "step": 216840 }, { "epoch": 9.340569410345868, "learning_rate": 4.747947596371393e-07, "loss": 2.8673, "step": 216860 }, { "epoch": 9.341430848085455, "learning_rate": 4.7474627766166046e-07, "loss": 2.6364, "step": 216880 }, { "epoch": 9.342292285825042, "learning_rate": 4.7469779568618153e-07, "loss": 2.8734, "step": 216900 }, { "epoch": 9.34315372356463, "learning_rate": 4.7464931371070265e-07, "loss": 2.6972, "step": 216920 }, { "epoch": 9.344015161304217, "learning_rate": 4.746008317352238e-07, "loss": 2.8335, "step": 216940 }, { "epoch": 9.344876599043804, "learning_rate": 4.745523497597449e-07, "loss": 2.702, "step": 216960 }, { "epoch": 9.345738036783391, "learning_rate": 4.745038677842659e-07, "loss": 2.9223, "step": 216980 }, { "epoch": 9.346599474522979, "learning_rate": 4.744553858087871e-07, "loss": 2.8573, "step": 217000 }, { "epoch": 9.347460912262566, "learning_rate": 4.7440690383330817e-07, "loss": 2.8069, "step": 217020 }, { "epoch": 9.348322350002153, "learning_rate": 4.743584218578293e-07, "loss": 2.7429, "step": 217040 }, { "epoch": 9.34918378774174, "learning_rate": 4.7430993988235036e-07, "loss": 2.7815, "step": 217060 }, { "epoch": 9.350045225481328, "learning_rate": 4.7426145790687154e-07, "loss": 2.6812, "step": 217080 }, { "epoch": 9.350906663220915, "learning_rate": 4.7421297593139256e-07, "loss": 2.6339, "step": 217100 }, { "epoch": 9.351768100960504, "learning_rate": 4.7416449395591374e-07, "loss": 2.7107, "step": 217120 }, { "epoch": 9.352629538700091, "learning_rate": 4.741160119804348e-07, "loss": 2.7021, "step": 217140 }, { "epoch": 9.353490976439678, "learning_rate": 4.7406753000495593e-07, "loss": 2.8311, "step": 217160 }, { "epoch": 9.354352414179266, "learning_rate": 4.74019048029477e-07, "loss": 2.6569, "step": 217180 }, { "epoch": 9.355213851918853, "learning_rate": 4.739705660539982e-07, "loss": 2.7586, "step": 217200 }, { "epoch": 9.35607528965844, "learning_rate": 4.739220840785192e-07, "loss": 2.8908, "step": 217220 }, { "epoch": 9.356936727398027, "learning_rate": 4.738736021030404e-07, "loss": 2.7593, "step": 217240 }, { "epoch": 9.357798165137615, "learning_rate": 4.7382512012756145e-07, "loss": 2.7375, "step": 217260 }, { "epoch": 9.358659602877202, "learning_rate": 4.737766381520826e-07, "loss": 2.891, "step": 217280 }, { "epoch": 9.359521040616789, "learning_rate": 4.7372815617660365e-07, "loss": 2.7291, "step": 217300 }, { "epoch": 9.360382478356376, "learning_rate": 4.736796742011247e-07, "loss": 2.7541, "step": 217320 }, { "epoch": 9.361243916095964, "learning_rate": 4.736311922256459e-07, "loss": 2.7715, "step": 217340 }, { "epoch": 9.36210535383555, "learning_rate": 4.7358271025016697e-07, "loss": 2.8804, "step": 217360 }, { "epoch": 9.36296679157514, "learning_rate": 4.735342282746881e-07, "loss": 2.6993, "step": 217380 }, { "epoch": 9.363828229314727, "learning_rate": 4.7348574629920916e-07, "loss": 2.702, "step": 217400 }, { "epoch": 9.364689667054314, "learning_rate": 4.734372643237303e-07, "loss": 2.5604, "step": 217420 }, { "epoch": 9.365551104793902, "learning_rate": 4.7338878234825136e-07, "loss": 2.7732, "step": 217440 }, { "epoch": 9.366412542533489, "learning_rate": 4.7334030037277253e-07, "loss": 2.7692, "step": 217460 }, { "epoch": 9.367273980273076, "learning_rate": 4.7329181839729355e-07, "loss": 2.7861, "step": 217480 }, { "epoch": 9.368135418012663, "learning_rate": 4.7324333642181473e-07, "loss": 2.7939, "step": 217500 }, { "epoch": 9.36899685575225, "learning_rate": 4.731948544463358e-07, "loss": 2.8307, "step": 217520 }, { "epoch": 9.369858293491838, "learning_rate": 4.7314637247085693e-07, "loss": 2.8812, "step": 217540 }, { "epoch": 9.370719731231425, "learning_rate": 4.73097890495378e-07, "loss": 2.7517, "step": 217560 }, { "epoch": 9.371581168971012, "learning_rate": 4.730494085198992e-07, "loss": 2.7045, "step": 217580 }, { "epoch": 9.3724426067106, "learning_rate": 4.730009265444202e-07, "loss": 2.8497, "step": 217600 }, { "epoch": 9.373304044450187, "learning_rate": 4.729524445689413e-07, "loss": 2.7783, "step": 217620 }, { "epoch": 9.374165482189774, "learning_rate": 4.7290396259346244e-07, "loss": 2.7794, "step": 217640 }, { "epoch": 9.375026919929361, "learning_rate": 4.7285548061798357e-07, "loss": 2.9148, "step": 217660 }, { "epoch": 9.37588835766895, "learning_rate": 4.7280699864250464e-07, "loss": 2.7302, "step": 217680 }, { "epoch": 9.376749795408537, "learning_rate": 4.727585166670258e-07, "loss": 2.8039, "step": 217700 }, { "epoch": 9.377611233148125, "learning_rate": 4.727100346915469e-07, "loss": 2.6189, "step": 217720 }, { "epoch": 9.378472670887712, "learning_rate": 4.72661552716068e-07, "loss": 2.7212, "step": 217740 }, { "epoch": 9.3793341086273, "learning_rate": 4.726130707405891e-07, "loss": 2.7878, "step": 217760 }, { "epoch": 9.380195546366886, "learning_rate": 4.7256458876511026e-07, "loss": 2.6635, "step": 217780 }, { "epoch": 9.381056984106474, "learning_rate": 4.725161067896313e-07, "loss": 2.8616, "step": 217800 }, { "epoch": 9.381918421846061, "learning_rate": 4.7246762481415246e-07, "loss": 2.7216, "step": 217820 }, { "epoch": 9.382779859585648, "learning_rate": 4.7241914283867353e-07, "loss": 2.8782, "step": 217840 }, { "epoch": 9.383641297325235, "learning_rate": 4.7237066086319455e-07, "loss": 2.7975, "step": 217860 }, { "epoch": 9.384502735064823, "learning_rate": 4.723221788877157e-07, "loss": 2.8146, "step": 217880 }, { "epoch": 9.38536417280441, "learning_rate": 4.722736969122368e-07, "loss": 2.6604, "step": 217900 }, { "epoch": 9.386225610543997, "learning_rate": 4.722252149367579e-07, "loss": 2.7254, "step": 217920 }, { "epoch": 9.387087048283586, "learning_rate": 4.72176732961279e-07, "loss": 2.858, "step": 217940 }, { "epoch": 9.387948486023173, "learning_rate": 4.7212825098580017e-07, "loss": 2.6999, "step": 217960 }, { "epoch": 9.38880992376276, "learning_rate": 4.7207976901032124e-07, "loss": 2.778, "step": 217980 }, { "epoch": 9.389671361502348, "learning_rate": 4.7203128703484236e-07, "loss": 2.7209, "step": 218000 }, { "epoch": 9.390532799241935, "learning_rate": 4.7198280505936344e-07, "loss": 2.7458, "step": 218020 }, { "epoch": 9.391394236981522, "learning_rate": 4.719343230838846e-07, "loss": 2.749, "step": 218040 }, { "epoch": 9.39225567472111, "learning_rate": 4.7188584110840563e-07, "loss": 2.7289, "step": 218060 }, { "epoch": 9.393117112460697, "learning_rate": 4.718373591329268e-07, "loss": 2.732, "step": 218080 }, { "epoch": 9.393978550200284, "learning_rate": 4.717888771574479e-07, "loss": 2.7003, "step": 218100 }, { "epoch": 9.394839987939871, "learning_rate": 4.71740395181969e-07, "loss": 2.7173, "step": 218120 }, { "epoch": 9.395701425679459, "learning_rate": 4.716919132064901e-07, "loss": 2.7349, "step": 218140 }, { "epoch": 9.396562863419046, "learning_rate": 4.7164343123101125e-07, "loss": 2.7273, "step": 218160 }, { "epoch": 9.397424301158633, "learning_rate": 4.7159494925553227e-07, "loss": 2.8421, "step": 218180 }, { "epoch": 9.39828573889822, "learning_rate": 4.7154646728005345e-07, "loss": 2.7992, "step": 218200 }, { "epoch": 9.399147176637808, "learning_rate": 4.714979853045745e-07, "loss": 2.7876, "step": 218220 }, { "epoch": 9.400008614377397, "learning_rate": 4.7144950332909565e-07, "loss": 2.8685, "step": 218240 }, { "epoch": 9.400870052116984, "learning_rate": 4.714010213536167e-07, "loss": 2.8043, "step": 218260 }, { "epoch": 9.401731489856571, "learning_rate": 4.713525393781379e-07, "loss": 2.7657, "step": 218280 }, { "epoch": 9.402592927596158, "learning_rate": 4.713040574026589e-07, "loss": 2.7798, "step": 218300 }, { "epoch": 9.403454365335746, "learning_rate": 4.712555754271801e-07, "loss": 2.7966, "step": 218320 }, { "epoch": 9.404315803075333, "learning_rate": 4.7120709345170116e-07, "loss": 2.6801, "step": 218340 }, { "epoch": 9.40517724081492, "learning_rate": 4.711586114762223e-07, "loss": 2.8211, "step": 218360 }, { "epoch": 9.406038678554507, "learning_rate": 4.7111012950074336e-07, "loss": 2.8401, "step": 218380 }, { "epoch": 9.406900116294095, "learning_rate": 4.7106164752526443e-07, "loss": 2.7618, "step": 218400 }, { "epoch": 9.407761554033682, "learning_rate": 4.710131655497856e-07, "loss": 2.7974, "step": 218420 }, { "epoch": 9.408622991773269, "learning_rate": 4.709646835743066e-07, "loss": 2.6125, "step": 218440 }, { "epoch": 9.409484429512856, "learning_rate": 4.709162015988278e-07, "loss": 2.8947, "step": 218460 }, { "epoch": 9.410345867252444, "learning_rate": 4.708677196233489e-07, "loss": 2.765, "step": 218480 }, { "epoch": 9.41120730499203, "learning_rate": 4.7081923764787e-07, "loss": 2.9609, "step": 218500 }, { "epoch": 9.41206874273162, "learning_rate": 4.7077075567239107e-07, "loss": 2.7321, "step": 218520 }, { "epoch": 9.412930180471207, "learning_rate": 4.7072227369691225e-07, "loss": 2.9344, "step": 218540 }, { "epoch": 9.413791618210794, "learning_rate": 4.7067379172143327e-07, "loss": 2.8201, "step": 218560 }, { "epoch": 9.414653055950382, "learning_rate": 4.7062530974595444e-07, "loss": 2.7655, "step": 218580 }, { "epoch": 9.415514493689969, "learning_rate": 4.705768277704755e-07, "loss": 2.8186, "step": 218600 }, { "epoch": 9.416375931429556, "learning_rate": 4.7052834579499664e-07, "loss": 2.6848, "step": 218620 }, { "epoch": 9.417237369169143, "learning_rate": 4.704798638195177e-07, "loss": 2.8518, "step": 218640 }, { "epoch": 9.41809880690873, "learning_rate": 4.704313818440389e-07, "loss": 2.8492, "step": 218660 }, { "epoch": 9.418960244648318, "learning_rate": 4.7038289986855996e-07, "loss": 2.8951, "step": 218680 }, { "epoch": 9.419821682387905, "learning_rate": 4.703344178930811e-07, "loss": 2.6785, "step": 218700 }, { "epoch": 9.420683120127492, "learning_rate": 4.702859359176022e-07, "loss": 2.7418, "step": 218720 }, { "epoch": 9.42154455786708, "learning_rate": 4.7023745394212333e-07, "loss": 2.7158, "step": 218740 }, { "epoch": 9.422405995606667, "learning_rate": 4.7018897196664435e-07, "loss": 2.7968, "step": 218760 }, { "epoch": 9.423267433346254, "learning_rate": 4.7014048999116553e-07, "loss": 2.7231, "step": 218780 }, { "epoch": 9.424128871085843, "learning_rate": 4.700920080156866e-07, "loss": 2.6582, "step": 218800 }, { "epoch": 9.42499030882543, "learning_rate": 4.700435260402077e-07, "loss": 2.9426, "step": 218820 }, { "epoch": 9.425851746565018, "learning_rate": 4.699950440647288e-07, "loss": 2.7211, "step": 218840 }, { "epoch": 9.426713184304605, "learning_rate": 4.6994656208925e-07, "loss": 2.6901, "step": 218860 }, { "epoch": 9.427574622044192, "learning_rate": 4.69898080113771e-07, "loss": 2.6718, "step": 218880 }, { "epoch": 9.42843605978378, "learning_rate": 4.6984959813829217e-07, "loss": 2.7882, "step": 218900 }, { "epoch": 9.429297497523367, "learning_rate": 4.6980111616281324e-07, "loss": 2.8221, "step": 218920 }, { "epoch": 9.430158935262954, "learning_rate": 4.6975263418733426e-07, "loss": 2.8577, "step": 218940 }, { "epoch": 9.431020373002541, "learning_rate": 4.6970415221185544e-07, "loss": 2.8326, "step": 218960 }, { "epoch": 9.431881810742128, "learning_rate": 4.696556702363765e-07, "loss": 2.7996, "step": 218980 }, { "epoch": 9.432743248481716, "learning_rate": 4.6960718826089763e-07, "loss": 2.8635, "step": 219000 }, { "epoch": 9.433604686221303, "learning_rate": 4.695587062854187e-07, "loss": 2.7414, "step": 219020 }, { "epoch": 9.43446612396089, "learning_rate": 4.695102243099399e-07, "loss": 2.6799, "step": 219040 }, { "epoch": 9.435327561700477, "learning_rate": 4.6946174233446095e-07, "loss": 2.7515, "step": 219060 }, { "epoch": 9.436188999440066, "learning_rate": 4.694132603589821e-07, "loss": 2.869, "step": 219080 }, { "epoch": 9.437050437179654, "learning_rate": 4.6936477838350315e-07, "loss": 2.6511, "step": 219100 }, { "epoch": 9.43791187491924, "learning_rate": 4.6931629640802433e-07, "loss": 2.8963, "step": 219120 }, { "epoch": 9.438773312658828, "learning_rate": 4.692678144325454e-07, "loss": 2.5918, "step": 219140 }, { "epoch": 9.439634750398415, "learning_rate": 4.692193324570665e-07, "loss": 2.6708, "step": 219160 }, { "epoch": 9.440496188138003, "learning_rate": 4.691708504815876e-07, "loss": 3.0272, "step": 219180 }, { "epoch": 9.44135762587759, "learning_rate": 4.691223685061087e-07, "loss": 2.6152, "step": 219200 }, { "epoch": 9.442219063617177, "learning_rate": 4.690738865306298e-07, "loss": 2.7087, "step": 219220 }, { "epoch": 9.443080501356764, "learning_rate": 4.6902540455515097e-07, "loss": 2.7778, "step": 219240 }, { "epoch": 9.443941939096351, "learning_rate": 4.68976922579672e-07, "loss": 2.7955, "step": 219260 }, { "epoch": 9.444803376835939, "learning_rate": 4.6892844060419316e-07, "loss": 2.7842, "step": 219280 }, { "epoch": 9.445664814575526, "learning_rate": 4.6887995862871424e-07, "loss": 2.6905, "step": 219300 }, { "epoch": 9.446526252315113, "learning_rate": 4.6883147665323536e-07, "loss": 2.6503, "step": 219320 }, { "epoch": 9.4473876900547, "learning_rate": 4.6878299467775643e-07, "loss": 2.709, "step": 219340 }, { "epoch": 9.44824912779429, "learning_rate": 4.687345127022776e-07, "loss": 2.742, "step": 219360 }, { "epoch": 9.449110565533877, "learning_rate": 4.6868603072679863e-07, "loss": 2.6826, "step": 219380 }, { "epoch": 9.449972003273464, "learning_rate": 4.6863754875131975e-07, "loss": 2.7637, "step": 219400 }, { "epoch": 9.450833441013051, "learning_rate": 4.685890667758409e-07, "loss": 2.7583, "step": 219420 }, { "epoch": 9.451694878752638, "learning_rate": 4.68540584800362e-07, "loss": 2.6713, "step": 219440 }, { "epoch": 9.452556316492226, "learning_rate": 4.6849210282488307e-07, "loss": 2.8431, "step": 219460 }, { "epoch": 9.453417754231813, "learning_rate": 4.6844362084940414e-07, "loss": 2.7665, "step": 219480 }, { "epoch": 9.4542791919714, "learning_rate": 4.683951388739253e-07, "loss": 2.7237, "step": 219500 }, { "epoch": 9.455140629710987, "learning_rate": 4.6834665689844634e-07, "loss": 2.739, "step": 219520 }, { "epoch": 9.456002067450575, "learning_rate": 4.682981749229675e-07, "loss": 2.7976, "step": 219540 }, { "epoch": 9.456863505190162, "learning_rate": 4.6824969294748854e-07, "loss": 2.8751, "step": 219560 }, { "epoch": 9.45772494292975, "learning_rate": 4.682012109720097e-07, "loss": 2.8261, "step": 219580 }, { "epoch": 9.458586380669336, "learning_rate": 4.681527289965308e-07, "loss": 2.682, "step": 219600 }, { "epoch": 9.459447818408924, "learning_rate": 4.6810424702105196e-07, "loss": 2.8218, "step": 219620 }, { "epoch": 9.460309256148513, "learning_rate": 4.68055765045573e-07, "loss": 2.807, "step": 219640 }, { "epoch": 9.4611706938881, "learning_rate": 4.6800728307009416e-07, "loss": 2.7572, "step": 219660 }, { "epoch": 9.462032131627687, "learning_rate": 4.6795880109461523e-07, "loss": 2.8307, "step": 219680 }, { "epoch": 9.462893569367274, "learning_rate": 4.6791031911913635e-07, "loss": 2.6896, "step": 219700 }, { "epoch": 9.463755007106862, "learning_rate": 4.678618371436574e-07, "loss": 2.8259, "step": 219720 }, { "epoch": 9.464616444846449, "learning_rate": 4.678133551681786e-07, "loss": 2.8468, "step": 219740 }, { "epoch": 9.465477882586036, "learning_rate": 4.677648731926997e-07, "loss": 2.8159, "step": 219760 }, { "epoch": 9.466339320325623, "learning_rate": 4.677163912172208e-07, "loss": 2.6888, "step": 219780 }, { "epoch": 9.46720075806521, "learning_rate": 4.6766790924174187e-07, "loss": 2.9189, "step": 219800 }, { "epoch": 9.468062195804798, "learning_rate": 4.6761942726626305e-07, "loss": 2.618, "step": 219820 }, { "epoch": 9.468923633544385, "learning_rate": 4.6757094529078407e-07, "loss": 2.9062, "step": 219840 }, { "epoch": 9.469785071283972, "learning_rate": 4.6752246331530524e-07, "loss": 2.8533, "step": 219860 }, { "epoch": 9.47064650902356, "learning_rate": 4.674739813398263e-07, "loss": 2.7796, "step": 219880 }, { "epoch": 9.471507946763147, "learning_rate": 4.6742549936434744e-07, "loss": 2.5566, "step": 219900 }, { "epoch": 9.472369384502736, "learning_rate": 4.673770173888685e-07, "loss": 2.6976, "step": 219920 }, { "epoch": 9.473230822242323, "learning_rate": 4.673285354133897e-07, "loss": 2.6365, "step": 219940 }, { "epoch": 9.47409225998191, "learning_rate": 4.672800534379107e-07, "loss": 2.7811, "step": 219960 }, { "epoch": 9.474953697721498, "learning_rate": 4.672315714624319e-07, "loss": 2.712, "step": 219980 }, { "epoch": 9.475815135461085, "learning_rate": 4.6718308948695296e-07, "loss": 2.8672, "step": 220000 }, { "epoch": 9.476676573200672, "learning_rate": 4.67134607511474e-07, "loss": 2.7057, "step": 220020 }, { "epoch": 9.47753801094026, "learning_rate": 4.6708612553599515e-07, "loss": 2.6057, "step": 220040 }, { "epoch": 9.478399448679847, "learning_rate": 4.670376435605162e-07, "loss": 2.7621, "step": 220060 }, { "epoch": 9.479260886419434, "learning_rate": 4.6698916158503735e-07, "loss": 2.812, "step": 220080 }, { "epoch": 9.480122324159021, "learning_rate": 4.669406796095584e-07, "loss": 2.6328, "step": 220100 }, { "epoch": 9.480983761898608, "learning_rate": 4.668921976340796e-07, "loss": 2.8228, "step": 220120 }, { "epoch": 9.481845199638196, "learning_rate": 4.6684371565860067e-07, "loss": 2.7412, "step": 220140 }, { "epoch": 9.482706637377783, "learning_rate": 4.667952336831218e-07, "loss": 2.8407, "step": 220160 }, { "epoch": 9.48356807511737, "learning_rate": 4.6674675170764286e-07, "loss": 2.8508, "step": 220180 }, { "epoch": 9.48442951285696, "learning_rate": 4.6669826973216404e-07, "loss": 2.8285, "step": 220200 }, { "epoch": 9.485290950596546, "learning_rate": 4.6664978775668506e-07, "loss": 2.7229, "step": 220220 }, { "epoch": 9.486152388336134, "learning_rate": 4.6660130578120624e-07, "loss": 2.773, "step": 220240 }, { "epoch": 9.48701382607572, "learning_rate": 4.665528238057273e-07, "loss": 2.7426, "step": 220260 }, { "epoch": 9.487875263815308, "learning_rate": 4.6650434183024843e-07, "loss": 2.8881, "step": 220280 }, { "epoch": 9.488736701554895, "learning_rate": 4.664558598547695e-07, "loss": 2.6345, "step": 220300 }, { "epoch": 9.489598139294483, "learning_rate": 4.664073778792907e-07, "loss": 2.7257, "step": 220320 }, { "epoch": 9.49045957703407, "learning_rate": 4.663588959038117e-07, "loss": 2.8465, "step": 220340 }, { "epoch": 9.491321014773657, "learning_rate": 4.663104139283329e-07, "loss": 2.9037, "step": 220360 }, { "epoch": 9.492182452513244, "learning_rate": 4.6626193195285395e-07, "loss": 2.7852, "step": 220380 }, { "epoch": 9.493043890252832, "learning_rate": 4.6621344997737507e-07, "loss": 2.7868, "step": 220400 }, { "epoch": 9.493905327992419, "learning_rate": 4.6616496800189614e-07, "loss": 2.692, "step": 220420 }, { "epoch": 9.494766765732006, "learning_rate": 4.661164860264173e-07, "loss": 2.6847, "step": 220440 }, { "epoch": 9.495628203471593, "learning_rate": 4.660680040509384e-07, "loss": 2.562, "step": 220460 }, { "epoch": 9.49648964121118, "learning_rate": 4.660195220754595e-07, "loss": 2.7846, "step": 220480 }, { "epoch": 9.49735107895077, "learning_rate": 4.6597104009998064e-07, "loss": 2.7741, "step": 220500 }, { "epoch": 9.498212516690357, "learning_rate": 4.6592255812450177e-07, "loss": 2.89, "step": 220520 }, { "epoch": 9.499073954429944, "learning_rate": 4.658740761490228e-07, "loss": 2.686, "step": 220540 }, { "epoch": 9.499935392169531, "learning_rate": 4.6582559417354386e-07, "loss": 2.7728, "step": 220560 }, { "epoch": 9.500796829909119, "learning_rate": 4.6577711219806503e-07, "loss": 2.7762, "step": 220580 }, { "epoch": 9.501658267648706, "learning_rate": 4.6572863022258605e-07, "loss": 2.7031, "step": 220600 }, { "epoch": 9.502519705388293, "learning_rate": 4.6568014824710723e-07, "loss": 2.867, "step": 220620 }, { "epoch": 9.50338114312788, "learning_rate": 4.656316662716283e-07, "loss": 2.7529, "step": 220640 }, { "epoch": 9.504242580867468, "learning_rate": 4.655831842961494e-07, "loss": 2.7569, "step": 220660 }, { "epoch": 9.505104018607055, "learning_rate": 4.655347023206705e-07, "loss": 2.7325, "step": 220680 }, { "epoch": 9.505965456346642, "learning_rate": 4.654862203451917e-07, "loss": 2.7647, "step": 220700 }, { "epoch": 9.50682689408623, "learning_rate": 4.654377383697127e-07, "loss": 2.8313, "step": 220720 }, { "epoch": 9.507688331825817, "learning_rate": 4.6538925639423387e-07, "loss": 2.6693, "step": 220740 }, { "epoch": 9.508549769565406, "learning_rate": 4.6534077441875494e-07, "loss": 2.7826, "step": 220760 }, { "epoch": 9.509411207304993, "learning_rate": 4.6529229244327607e-07, "loss": 2.8352, "step": 220780 }, { "epoch": 9.51027264504458, "learning_rate": 4.6524381046779714e-07, "loss": 2.8352, "step": 220800 }, { "epoch": 9.511134082784167, "learning_rate": 4.651953284923183e-07, "loss": 2.7102, "step": 220820 }, { "epoch": 9.511995520523755, "learning_rate": 4.651468465168394e-07, "loss": 2.6711, "step": 220840 }, { "epoch": 9.512856958263342, "learning_rate": 4.650983645413605e-07, "loss": 2.7535, "step": 220860 }, { "epoch": 9.513718396002929, "learning_rate": 4.650498825658816e-07, "loss": 2.801, "step": 220880 }, { "epoch": 9.514579833742516, "learning_rate": 4.6500140059040276e-07, "loss": 2.7716, "step": 220900 }, { "epoch": 9.515441271482104, "learning_rate": 4.6495291861492383e-07, "loss": 2.719, "step": 220920 }, { "epoch": 9.51630270922169, "learning_rate": 4.6490443663944496e-07, "loss": 2.7264, "step": 220940 }, { "epoch": 9.517164146961278, "learning_rate": 4.6485595466396603e-07, "loss": 2.8272, "step": 220960 }, { "epoch": 9.518025584700865, "learning_rate": 4.6480747268848715e-07, "loss": 2.7964, "step": 220980 }, { "epoch": 9.518887022440452, "learning_rate": 4.647589907130082e-07, "loss": 2.8806, "step": 221000 }, { "epoch": 9.51974846018004, "learning_rate": 4.647105087375294e-07, "loss": 2.5979, "step": 221020 }, { "epoch": 9.520609897919627, "learning_rate": 4.646620267620504e-07, "loss": 2.7241, "step": 221040 }, { "epoch": 9.521471335659216, "learning_rate": 4.646135447865716e-07, "loss": 2.9737, "step": 221060 }, { "epoch": 9.522332773398803, "learning_rate": 4.6456506281109267e-07, "loss": 2.662, "step": 221080 }, { "epoch": 9.52319421113839, "learning_rate": 4.6451658083561374e-07, "loss": 2.662, "step": 221100 }, { "epoch": 9.524055648877978, "learning_rate": 4.6446809886013486e-07, "loss": 2.8679, "step": 221120 }, { "epoch": 9.524917086617565, "learning_rate": 4.6441961688465594e-07, "loss": 2.7523, "step": 221140 }, { "epoch": 9.525778524357152, "learning_rate": 4.6437113490917706e-07, "loss": 2.6111, "step": 221160 }, { "epoch": 9.52663996209674, "learning_rate": 4.6432265293369813e-07, "loss": 2.6969, "step": 221180 }, { "epoch": 9.527501399836327, "learning_rate": 4.642741709582193e-07, "loss": 2.9035, "step": 221200 }, { "epoch": 9.528362837575914, "learning_rate": 4.642256889827404e-07, "loss": 2.6181, "step": 221220 }, { "epoch": 9.529224275315501, "learning_rate": 4.641772070072615e-07, "loss": 2.6503, "step": 221240 }, { "epoch": 9.530085713055088, "learning_rate": 4.641287250317826e-07, "loss": 2.698, "step": 221260 }, { "epoch": 9.530947150794676, "learning_rate": 4.6408024305630375e-07, "loss": 2.8984, "step": 221280 }, { "epoch": 9.531808588534263, "learning_rate": 4.6403176108082477e-07, "loss": 2.8184, "step": 221300 }, { "epoch": 9.532670026273852, "learning_rate": 4.6398327910534595e-07, "loss": 2.7037, "step": 221320 }, { "epoch": 9.53353146401344, "learning_rate": 4.6393479712986697e-07, "loss": 2.7479, "step": 221340 }, { "epoch": 9.534392901753026, "learning_rate": 4.6388631515438815e-07, "loss": 2.6851, "step": 221360 }, { "epoch": 9.535254339492614, "learning_rate": 4.638378331789092e-07, "loss": 2.9043, "step": 221380 }, { "epoch": 9.536115777232201, "learning_rate": 4.637893512034304e-07, "loss": 2.7045, "step": 221400 }, { "epoch": 9.536977214971788, "learning_rate": 4.637408692279514e-07, "loss": 2.7692, "step": 221420 }, { "epoch": 9.537838652711375, "learning_rate": 4.636923872524726e-07, "loss": 2.6278, "step": 221440 }, { "epoch": 9.538700090450963, "learning_rate": 4.6364390527699366e-07, "loss": 2.8553, "step": 221460 }, { "epoch": 9.53956152819055, "learning_rate": 4.635954233015148e-07, "loss": 2.7888, "step": 221480 }, { "epoch": 9.540422965930137, "learning_rate": 4.6354694132603586e-07, "loss": 2.6852, "step": 221500 }, { "epoch": 9.541284403669724, "learning_rate": 4.6349845935055704e-07, "loss": 2.6897, "step": 221520 }, { "epoch": 9.542145841409312, "learning_rate": 4.634499773750781e-07, "loss": 2.5976, "step": 221540 }, { "epoch": 9.543007279148899, "learning_rate": 4.6340149539959923e-07, "loss": 2.9348, "step": 221560 }, { "epoch": 9.543868716888486, "learning_rate": 4.633530134241203e-07, "loss": 2.7513, "step": 221580 }, { "epoch": 9.544730154628073, "learning_rate": 4.633045314486415e-07, "loss": 2.7227, "step": 221600 }, { "epoch": 9.545591592367662, "learning_rate": 4.632560494731625e-07, "loss": 2.81, "step": 221620 }, { "epoch": 9.54645303010725, "learning_rate": 4.6320756749768357e-07, "loss": 2.8312, "step": 221640 }, { "epoch": 9.547314467846837, "learning_rate": 4.6315908552220475e-07, "loss": 2.9392, "step": 221660 }, { "epoch": 9.548175905586424, "learning_rate": 4.6311060354672577e-07, "loss": 2.868, "step": 221680 }, { "epoch": 9.549037343326011, "learning_rate": 4.6306212157124694e-07, "loss": 2.7795, "step": 221700 }, { "epoch": 9.549898781065599, "learning_rate": 4.63013639595768e-07, "loss": 2.7455, "step": 221720 }, { "epoch": 9.550760218805186, "learning_rate": 4.6296515762028914e-07, "loss": 2.6211, "step": 221740 }, { "epoch": 9.551621656544773, "learning_rate": 4.6291667564481016e-07, "loss": 2.6358, "step": 221760 }, { "epoch": 9.55248309428436, "learning_rate": 4.628681936693314e-07, "loss": 2.6719, "step": 221780 }, { "epoch": 9.553344532023948, "learning_rate": 4.628197116938524e-07, "loss": 2.6254, "step": 221800 }, { "epoch": 9.554205969763535, "learning_rate": 4.627712297183736e-07, "loss": 2.7804, "step": 221820 }, { "epoch": 9.555067407503122, "learning_rate": 4.6272274774289466e-07, "loss": 2.6968, "step": 221840 }, { "epoch": 9.55592884524271, "learning_rate": 4.626742657674158e-07, "loss": 2.7047, "step": 221860 }, { "epoch": 9.556790282982298, "learning_rate": 4.6262578379193685e-07, "loss": 2.8242, "step": 221880 }, { "epoch": 9.557651720721886, "learning_rate": 4.6257730181645803e-07, "loss": 2.7317, "step": 221900 }, { "epoch": 9.558513158461473, "learning_rate": 4.625288198409791e-07, "loss": 2.7427, "step": 221920 }, { "epoch": 9.55937459620106, "learning_rate": 4.624803378655002e-07, "loss": 2.7459, "step": 221940 }, { "epoch": 9.560236033940647, "learning_rate": 4.624318558900213e-07, "loss": 2.6571, "step": 221960 }, { "epoch": 9.561097471680235, "learning_rate": 4.623833739145425e-07, "loss": 2.819, "step": 221980 }, { "epoch": 9.561958909419822, "learning_rate": 4.623348919390635e-07, "loss": 2.8205, "step": 222000 }, { "epoch": 9.562820347159409, "learning_rate": 4.6228640996358467e-07, "loss": 2.8551, "step": 222020 }, { "epoch": 9.563681784898996, "learning_rate": 4.6223792798810574e-07, "loss": 2.8075, "step": 222040 }, { "epoch": 9.564543222638584, "learning_rate": 4.6218944601262687e-07, "loss": 2.7886, "step": 222060 }, { "epoch": 9.56540466037817, "learning_rate": 4.6214096403714794e-07, "loss": 2.8636, "step": 222080 }, { "epoch": 9.566266098117758, "learning_rate": 4.620924820616691e-07, "loss": 2.7891, "step": 222100 }, { "epoch": 9.567127535857345, "learning_rate": 4.6204400008619013e-07, "loss": 2.7293, "step": 222120 }, { "epoch": 9.567988973596933, "learning_rate": 4.619955181107113e-07, "loss": 2.7045, "step": 222140 }, { "epoch": 9.56885041133652, "learning_rate": 4.619470361352324e-07, "loss": 2.6255, "step": 222160 }, { "epoch": 9.569711849076109, "learning_rate": 4.6189855415975345e-07, "loss": 2.6517, "step": 222180 }, { "epoch": 9.570573286815696, "learning_rate": 4.618500721842746e-07, "loss": 2.5779, "step": 222200 }, { "epoch": 9.571434724555283, "learning_rate": 4.6180159020879565e-07, "loss": 2.6227, "step": 222220 }, { "epoch": 9.57229616229487, "learning_rate": 4.6175310823331683e-07, "loss": 2.8602, "step": 222240 }, { "epoch": 9.573157600034458, "learning_rate": 4.6170462625783785e-07, "loss": 2.7164, "step": 222260 }, { "epoch": 9.574019037774045, "learning_rate": 4.616561442823591e-07, "loss": 2.6927, "step": 222280 }, { "epoch": 9.574880475513632, "learning_rate": 4.616076623068801e-07, "loss": 2.776, "step": 222300 }, { "epoch": 9.57574191325322, "learning_rate": 4.615591803314012e-07, "loss": 2.761, "step": 222320 }, { "epoch": 9.576603350992807, "learning_rate": 4.615106983559223e-07, "loss": 2.7828, "step": 222340 }, { "epoch": 9.577464788732394, "learning_rate": 4.6146221638044347e-07, "loss": 2.742, "step": 222360 }, { "epoch": 9.578326226471981, "learning_rate": 4.614137344049645e-07, "loss": 2.8463, "step": 222380 }, { "epoch": 9.579187664211569, "learning_rate": 4.6136525242948566e-07, "loss": 2.9185, "step": 222400 }, { "epoch": 9.580049101951156, "learning_rate": 4.6131677045400674e-07, "loss": 2.8181, "step": 222420 }, { "epoch": 9.580910539690743, "learning_rate": 4.6126828847852786e-07, "loss": 2.619, "step": 222440 }, { "epoch": 9.581771977430332, "learning_rate": 4.6121980650304893e-07, "loss": 2.8647, "step": 222460 }, { "epoch": 9.58263341516992, "learning_rate": 4.611713245275701e-07, "loss": 2.7904, "step": 222480 }, { "epoch": 9.583494852909507, "learning_rate": 4.6112284255209113e-07, "loss": 2.6804, "step": 222500 }, { "epoch": 9.584356290649094, "learning_rate": 4.610743605766123e-07, "loss": 2.7771, "step": 222520 }, { "epoch": 9.585217728388681, "learning_rate": 4.610258786011334e-07, "loss": 2.8796, "step": 222540 }, { "epoch": 9.586079166128268, "learning_rate": 4.609773966256545e-07, "loss": 2.6521, "step": 222560 }, { "epoch": 9.586940603867856, "learning_rate": 4.6092891465017557e-07, "loss": 2.858, "step": 222580 }, { "epoch": 9.587802041607443, "learning_rate": 4.6088043267469675e-07, "loss": 2.6884, "step": 222600 }, { "epoch": 9.58866347934703, "learning_rate": 4.608319506992178e-07, "loss": 2.8, "step": 222620 }, { "epoch": 9.589524917086617, "learning_rate": 4.6078346872373895e-07, "loss": 2.8533, "step": 222640 }, { "epoch": 9.590386354826204, "learning_rate": 4.6073498674826e-07, "loss": 2.6732, "step": 222660 }, { "epoch": 9.591247792565792, "learning_rate": 4.606865047727812e-07, "loss": 2.7219, "step": 222680 }, { "epoch": 9.592109230305379, "learning_rate": 4.6063802279730227e-07, "loss": 2.6791, "step": 222700 }, { "epoch": 9.592970668044966, "learning_rate": 4.605895408218233e-07, "loss": 2.8623, "step": 222720 }, { "epoch": 9.593832105784555, "learning_rate": 4.6054105884634446e-07, "loss": 2.7245, "step": 222740 }, { "epoch": 9.594693543524142, "learning_rate": 4.604925768708655e-07, "loss": 2.64, "step": 222760 }, { "epoch": 9.59555498126373, "learning_rate": 4.6044409489538666e-07, "loss": 2.7437, "step": 222780 }, { "epoch": 9.596416419003317, "learning_rate": 4.6039561291990773e-07, "loss": 2.7288, "step": 222800 }, { "epoch": 9.597277856742904, "learning_rate": 4.6034713094442885e-07, "loss": 2.8061, "step": 222820 }, { "epoch": 9.598139294482491, "learning_rate": 4.602986489689499e-07, "loss": 2.8817, "step": 222840 }, { "epoch": 9.599000732222079, "learning_rate": 4.602501669934711e-07, "loss": 2.6967, "step": 222860 }, { "epoch": 9.599862169961666, "learning_rate": 4.602016850179922e-07, "loss": 2.8687, "step": 222880 }, { "epoch": 9.600723607701253, "learning_rate": 4.601532030425133e-07, "loss": 2.956, "step": 222900 }, { "epoch": 9.60158504544084, "learning_rate": 4.6010472106703437e-07, "loss": 2.801, "step": 222920 }, { "epoch": 9.602446483180428, "learning_rate": 4.600562390915555e-07, "loss": 2.7654, "step": 222940 }, { "epoch": 9.603307920920015, "learning_rate": 4.6000775711607657e-07, "loss": 2.7122, "step": 222960 }, { "epoch": 9.604169358659602, "learning_rate": 4.5995927514059774e-07, "loss": 2.8072, "step": 222980 }, { "epoch": 9.60503079639919, "learning_rate": 4.599107931651188e-07, "loss": 2.8184, "step": 223000 }, { "epoch": 9.605892234138778, "learning_rate": 4.5986231118963994e-07, "loss": 2.8303, "step": 223020 }, { "epoch": 9.606753671878366, "learning_rate": 4.59813829214161e-07, "loss": 2.8135, "step": 223040 }, { "epoch": 9.607615109617953, "learning_rate": 4.597653472386822e-07, "loss": 2.8092, "step": 223060 }, { "epoch": 9.60847654735754, "learning_rate": 4.597168652632032e-07, "loss": 2.8142, "step": 223080 }, { "epoch": 9.609337985097127, "learning_rate": 4.596683832877244e-07, "loss": 2.7048, "step": 223100 }, { "epoch": 9.610199422836715, "learning_rate": 4.596199013122454e-07, "loss": 2.6679, "step": 223120 }, { "epoch": 9.611060860576302, "learning_rate": 4.595714193367666e-07, "loss": 2.7028, "step": 223140 }, { "epoch": 9.61192229831589, "learning_rate": 4.5952293736128765e-07, "loss": 2.5985, "step": 223160 }, { "epoch": 9.612783736055476, "learning_rate": 4.5947445538580883e-07, "loss": 2.6536, "step": 223180 }, { "epoch": 9.613645173795064, "learning_rate": 4.5942597341032985e-07, "loss": 2.7786, "step": 223200 }, { "epoch": 9.614506611534651, "learning_rate": 4.59377491434851e-07, "loss": 2.8383, "step": 223220 }, { "epoch": 9.615368049274238, "learning_rate": 4.593290094593721e-07, "loss": 2.732, "step": 223240 }, { "epoch": 9.616229487013825, "learning_rate": 4.5928052748389317e-07, "loss": 2.7582, "step": 223260 }, { "epoch": 9.617090924753413, "learning_rate": 4.592320455084143e-07, "loss": 2.771, "step": 223280 }, { "epoch": 9.617952362493, "learning_rate": 4.5918356353293536e-07, "loss": 2.803, "step": 223300 }, { "epoch": 9.618813800232589, "learning_rate": 4.5913508155745654e-07, "loss": 2.7081, "step": 223320 }, { "epoch": 9.619675237972176, "learning_rate": 4.5908659958197756e-07, "loss": 2.8873, "step": 223340 }, { "epoch": 9.620536675711763, "learning_rate": 4.5903811760649874e-07, "loss": 2.863, "step": 223360 }, { "epoch": 9.62139811345135, "learning_rate": 4.589896356310198e-07, "loss": 2.7993, "step": 223380 }, { "epoch": 9.622259551190938, "learning_rate": 4.5894115365554093e-07, "loss": 2.6764, "step": 223400 }, { "epoch": 9.623120988930525, "learning_rate": 4.58892671680062e-07, "loss": 2.8272, "step": 223420 }, { "epoch": 9.623982426670112, "learning_rate": 4.588441897045832e-07, "loss": 2.7148, "step": 223440 }, { "epoch": 9.6248438644097, "learning_rate": 4.587957077291042e-07, "loss": 2.7571, "step": 223460 }, { "epoch": 9.625705302149287, "learning_rate": 4.587472257536254e-07, "loss": 2.7363, "step": 223480 }, { "epoch": 9.626566739888874, "learning_rate": 4.5869874377814645e-07, "loss": 2.7913, "step": 223500 }, { "epoch": 9.627428177628461, "learning_rate": 4.5865026180266757e-07, "loss": 2.8569, "step": 223520 }, { "epoch": 9.628289615368049, "learning_rate": 4.586017798271886e-07, "loss": 2.9943, "step": 223540 }, { "epoch": 9.629151053107636, "learning_rate": 4.585532978517098e-07, "loss": 2.8718, "step": 223560 }, { "epoch": 9.630012490847225, "learning_rate": 4.5850481587623084e-07, "loss": 2.7683, "step": 223580 }, { "epoch": 9.630873928586812, "learning_rate": 4.58456333900752e-07, "loss": 2.8745, "step": 223600 }, { "epoch": 9.6317353663264, "learning_rate": 4.584078519252731e-07, "loss": 2.806, "step": 223620 }, { "epoch": 9.632596804065987, "learning_rate": 4.583593699497942e-07, "loss": 2.7738, "step": 223640 }, { "epoch": 9.633458241805574, "learning_rate": 4.583108879743153e-07, "loss": 2.8604, "step": 223660 }, { "epoch": 9.634319679545161, "learning_rate": 4.5826240599883646e-07, "loss": 2.8135, "step": 223680 }, { "epoch": 9.635181117284748, "learning_rate": 4.5821392402335753e-07, "loss": 2.8537, "step": 223700 }, { "epoch": 9.636042555024336, "learning_rate": 4.5816544204787866e-07, "loss": 2.7165, "step": 223720 }, { "epoch": 9.636903992763923, "learning_rate": 4.5811696007239973e-07, "loss": 2.6836, "step": 223740 }, { "epoch": 9.63776543050351, "learning_rate": 4.580684780969209e-07, "loss": 2.6913, "step": 223760 }, { "epoch": 9.638626868243097, "learning_rate": 4.580199961214419e-07, "loss": 2.6725, "step": 223780 }, { "epoch": 9.639488305982685, "learning_rate": 4.57971514145963e-07, "loss": 2.8223, "step": 223800 }, { "epoch": 9.640349743722272, "learning_rate": 4.579230321704842e-07, "loss": 2.7259, "step": 223820 }, { "epoch": 9.641211181461859, "learning_rate": 4.578745501950052e-07, "loss": 2.816, "step": 223840 }, { "epoch": 9.642072619201446, "learning_rate": 4.5782606821952637e-07, "loss": 2.7359, "step": 223860 }, { "epoch": 9.642934056941035, "learning_rate": 4.5777758624404744e-07, "loss": 2.7473, "step": 223880 }, { "epoch": 9.643795494680623, "learning_rate": 4.5772910426856857e-07, "loss": 2.8182, "step": 223900 }, { "epoch": 9.64465693242021, "learning_rate": 4.5768062229308964e-07, "loss": 2.7864, "step": 223920 }, { "epoch": 9.645518370159797, "learning_rate": 4.576321403176108e-07, "loss": 2.8997, "step": 223940 }, { "epoch": 9.646379807899384, "learning_rate": 4.575836583421319e-07, "loss": 2.7875, "step": 223960 }, { "epoch": 9.647241245638972, "learning_rate": 4.57535176366653e-07, "loss": 2.7154, "step": 223980 }, { "epoch": 9.648102683378559, "learning_rate": 4.574866943911741e-07, "loss": 2.6767, "step": 224000 }, { "epoch": 9.648964121118146, "learning_rate": 4.5743821241569526e-07, "loss": 2.8458, "step": 224020 }, { "epoch": 9.649825558857733, "learning_rate": 4.573897304402163e-07, "loss": 2.8304, "step": 224040 }, { "epoch": 9.65068699659732, "learning_rate": 4.573412484647375e-07, "loss": 2.8133, "step": 224060 }, { "epoch": 9.651548434336908, "learning_rate": 4.5729276648925853e-07, "loss": 2.7072, "step": 224080 }, { "epoch": 9.652409872076495, "learning_rate": 4.5724428451377965e-07, "loss": 2.8338, "step": 224100 }, { "epoch": 9.653271309816082, "learning_rate": 4.571958025383007e-07, "loss": 2.6711, "step": 224120 }, { "epoch": 9.654132747555671, "learning_rate": 4.571473205628219e-07, "loss": 2.7503, "step": 224140 }, { "epoch": 9.654994185295259, "learning_rate": 4.570988385873429e-07, "loss": 2.7118, "step": 224160 }, { "epoch": 9.655855623034846, "learning_rate": 4.570503566118641e-07, "loss": 2.6771, "step": 224180 }, { "epoch": 9.656717060774433, "learning_rate": 4.5700187463638517e-07, "loss": 2.7248, "step": 224200 }, { "epoch": 9.65757849851402, "learning_rate": 4.569533926609063e-07, "loss": 2.8017, "step": 224220 }, { "epoch": 9.658439936253608, "learning_rate": 4.5690491068542736e-07, "loss": 2.9738, "step": 224240 }, { "epoch": 9.659301373993195, "learning_rate": 4.5685642870994854e-07, "loss": 2.7204, "step": 224260 }, { "epoch": 9.660162811732782, "learning_rate": 4.5680794673446956e-07, "loss": 2.8758, "step": 224280 }, { "epoch": 9.66102424947237, "learning_rate": 4.5675946475899063e-07, "loss": 2.6574, "step": 224300 }, { "epoch": 9.661885687211957, "learning_rate": 4.567109827835118e-07, "loss": 2.8197, "step": 224320 }, { "epoch": 9.662747124951544, "learning_rate": 4.566625008080329e-07, "loss": 2.7282, "step": 224340 }, { "epoch": 9.663608562691131, "learning_rate": 4.56614018832554e-07, "loss": 2.6115, "step": 224360 }, { "epoch": 9.664470000430718, "learning_rate": 4.565655368570751e-07, "loss": 2.7395, "step": 224380 }, { "epoch": 9.665331438170305, "learning_rate": 4.5651705488159625e-07, "loss": 2.8541, "step": 224400 }, { "epoch": 9.666192875909893, "learning_rate": 4.5646857290611727e-07, "loss": 2.7594, "step": 224420 }, { "epoch": 9.667054313649482, "learning_rate": 4.5642009093063845e-07, "loss": 2.6516, "step": 224440 }, { "epoch": 9.667915751389069, "learning_rate": 4.563716089551595e-07, "loss": 2.8379, "step": 224460 }, { "epoch": 9.668777189128656, "learning_rate": 4.563231269796807e-07, "loss": 2.675, "step": 224480 }, { "epoch": 9.669638626868243, "learning_rate": 4.562746450042017e-07, "loss": 2.8521, "step": 224500 }, { "epoch": 9.67050006460783, "learning_rate": 4.562261630287229e-07, "loss": 2.7813, "step": 224520 }, { "epoch": 9.671361502347418, "learning_rate": 4.561776810532439e-07, "loss": 2.7344, "step": 224540 }, { "epoch": 9.672222940087005, "learning_rate": 4.561291990777651e-07, "loss": 2.8612, "step": 224560 }, { "epoch": 9.673084377826592, "learning_rate": 4.5608071710228616e-07, "loss": 2.7651, "step": 224580 }, { "epoch": 9.67394581556618, "learning_rate": 4.560322351268073e-07, "loss": 2.8943, "step": 224600 }, { "epoch": 9.674807253305767, "learning_rate": 4.5598375315132836e-07, "loss": 2.7203, "step": 224620 }, { "epoch": 9.675668691045354, "learning_rate": 4.5593527117584954e-07, "loss": 2.7841, "step": 224640 }, { "epoch": 9.676530128784941, "learning_rate": 4.558867892003706e-07, "loss": 2.5049, "step": 224660 }, { "epoch": 9.677391566524529, "learning_rate": 4.5583830722489173e-07, "loss": 2.7598, "step": 224680 }, { "epoch": 9.678253004264118, "learning_rate": 4.557898252494128e-07, "loss": 2.9121, "step": 224700 }, { "epoch": 9.679114442003705, "learning_rate": 4.5574134327393393e-07, "loss": 2.7836, "step": 224720 }, { "epoch": 9.679975879743292, "learning_rate": 4.55692861298455e-07, "loss": 2.7268, "step": 224740 }, { "epoch": 9.68083731748288, "learning_rate": 4.556443793229762e-07, "loss": 2.6832, "step": 224760 }, { "epoch": 9.681698755222467, "learning_rate": 4.5559589734749725e-07, "loss": 2.6138, "step": 224780 }, { "epoch": 9.682560192962054, "learning_rate": 4.5554741537201837e-07, "loss": 2.7521, "step": 224800 }, { "epoch": 9.683421630701641, "learning_rate": 4.5549893339653944e-07, "loss": 2.6972, "step": 224820 }, { "epoch": 9.684283068441228, "learning_rate": 4.554504514210605e-07, "loss": 2.7531, "step": 224840 }, { "epoch": 9.685144506180816, "learning_rate": 4.5540196944558164e-07, "loss": 2.6843, "step": 224860 }, { "epoch": 9.686005943920403, "learning_rate": 4.553534874701027e-07, "loss": 2.8923, "step": 224880 }, { "epoch": 9.68686738165999, "learning_rate": 4.5530500549462384e-07, "loss": 2.7335, "step": 224900 }, { "epoch": 9.687728819399577, "learning_rate": 4.552565235191449e-07, "loss": 2.6646, "step": 224920 }, { "epoch": 9.688590257139165, "learning_rate": 4.552080415436661e-07, "loss": 2.695, "step": 224940 }, { "epoch": 9.689451694878752, "learning_rate": 4.5515955956818716e-07, "loss": 2.7809, "step": 224960 }, { "epoch": 9.69031313261834, "learning_rate": 4.551110775927083e-07, "loss": 2.7469, "step": 224980 }, { "epoch": 9.691174570357928, "learning_rate": 4.5506259561722935e-07, "loss": 2.7765, "step": 225000 }, { "epoch": 9.692036008097515, "learning_rate": 4.5501411364175053e-07, "loss": 2.6461, "step": 225020 }, { "epoch": 9.692897445837103, "learning_rate": 4.549656316662716e-07, "loss": 2.6859, "step": 225040 }, { "epoch": 9.69375888357669, "learning_rate": 4.549171496907927e-07, "loss": 2.7593, "step": 225060 }, { "epoch": 9.694620321316277, "learning_rate": 4.548686677153138e-07, "loss": 2.729, "step": 225080 }, { "epoch": 9.695481759055864, "learning_rate": 4.54820185739835e-07, "loss": 2.8962, "step": 225100 }, { "epoch": 9.696343196795452, "learning_rate": 4.54771703764356e-07, "loss": 2.7119, "step": 225120 }, { "epoch": 9.697204634535039, "learning_rate": 4.5472322178887717e-07, "loss": 2.7855, "step": 225140 }, { "epoch": 9.698066072274626, "learning_rate": 4.5467473981339824e-07, "loss": 2.749, "step": 225160 }, { "epoch": 9.698927510014213, "learning_rate": 4.5462625783791937e-07, "loss": 2.5683, "step": 225180 }, { "epoch": 9.6997889477538, "learning_rate": 4.5457777586244044e-07, "loss": 2.6116, "step": 225200 }, { "epoch": 9.700650385493388, "learning_rate": 4.545292938869616e-07, "loss": 2.7632, "step": 225220 }, { "epoch": 9.701511823232975, "learning_rate": 4.5448081191148263e-07, "loss": 2.7693, "step": 225240 }, { "epoch": 9.702373260972564, "learning_rate": 4.544323299360038e-07, "loss": 2.7074, "step": 225260 }, { "epoch": 9.703234698712151, "learning_rate": 4.543838479605249e-07, "loss": 2.7151, "step": 225280 }, { "epoch": 9.704096136451739, "learning_rate": 4.54335365985046e-07, "loss": 2.6543, "step": 225300 }, { "epoch": 9.704957574191326, "learning_rate": 4.54286884009567e-07, "loss": 2.9024, "step": 225320 }, { "epoch": 9.705819011930913, "learning_rate": 4.5423840203408826e-07, "loss": 2.8344, "step": 225340 }, { "epoch": 9.7066804496705, "learning_rate": 4.541899200586093e-07, "loss": 2.8408, "step": 225360 }, { "epoch": 9.707541887410088, "learning_rate": 4.5414143808313035e-07, "loss": 2.6425, "step": 225380 }, { "epoch": 9.708403325149675, "learning_rate": 4.540929561076515e-07, "loss": 2.8614, "step": 225400 }, { "epoch": 9.709264762889262, "learning_rate": 4.540444741321726e-07, "loss": 2.6933, "step": 225420 }, { "epoch": 9.71012620062885, "learning_rate": 4.539959921566937e-07, "loss": 2.6296, "step": 225440 }, { "epoch": 9.710987638368437, "learning_rate": 4.539475101812148e-07, "loss": 2.6936, "step": 225460 }, { "epoch": 9.711849076108024, "learning_rate": 4.5389902820573597e-07, "loss": 2.6641, "step": 225480 }, { "epoch": 9.712710513847611, "learning_rate": 4.53850546230257e-07, "loss": 2.8102, "step": 225500 }, { "epoch": 9.713571951587198, "learning_rate": 4.5380206425477816e-07, "loss": 2.7152, "step": 225520 }, { "epoch": 9.714433389326786, "learning_rate": 4.5375358227929923e-07, "loss": 2.6994, "step": 225540 }, { "epoch": 9.715294827066375, "learning_rate": 4.5370510030382036e-07, "loss": 2.8739, "step": 225560 }, { "epoch": 9.716156264805962, "learning_rate": 4.5365661832834143e-07, "loss": 2.8303, "step": 225580 }, { "epoch": 9.717017702545549, "learning_rate": 4.536081363528626e-07, "loss": 2.7013, "step": 225600 }, { "epoch": 9.717879140285136, "learning_rate": 4.5355965437738363e-07, "loss": 2.8658, "step": 225620 }, { "epoch": 9.718740578024724, "learning_rate": 4.535111724019048e-07, "loss": 2.8617, "step": 225640 }, { "epoch": 9.71960201576431, "learning_rate": 4.534626904264259e-07, "loss": 2.7165, "step": 225660 }, { "epoch": 9.720463453503898, "learning_rate": 4.53414208450947e-07, "loss": 2.6301, "step": 225680 }, { "epoch": 9.721324891243485, "learning_rate": 4.5336572647546807e-07, "loss": 2.8198, "step": 225700 }, { "epoch": 9.722186328983073, "learning_rate": 4.5331724449998925e-07, "loss": 2.8984, "step": 225720 }, { "epoch": 9.72304776672266, "learning_rate": 4.532687625245103e-07, "loss": 2.6888, "step": 225740 }, { "epoch": 9.723909204462247, "learning_rate": 4.5322028054903144e-07, "loss": 2.8395, "step": 225760 }, { "epoch": 9.724770642201834, "learning_rate": 4.531717985735525e-07, "loss": 2.8543, "step": 225780 }, { "epoch": 9.725632079941422, "learning_rate": 4.531233165980737e-07, "loss": 2.9374, "step": 225800 }, { "epoch": 9.726493517681009, "learning_rate": 4.530748346225947e-07, "loss": 2.6287, "step": 225820 }, { "epoch": 9.727354955420598, "learning_rate": 4.5302635264711594e-07, "loss": 2.7455, "step": 225840 }, { "epoch": 9.728216393160185, "learning_rate": 4.5297787067163696e-07, "loss": 2.5992, "step": 225860 }, { "epoch": 9.729077830899772, "learning_rate": 4.529293886961581e-07, "loss": 2.8205, "step": 225880 }, { "epoch": 9.72993926863936, "learning_rate": 4.5288090672067916e-07, "loss": 2.8776, "step": 225900 }, { "epoch": 9.730800706378947, "learning_rate": 4.5283242474520023e-07, "loss": 2.4803, "step": 225920 }, { "epoch": 9.731662144118534, "learning_rate": 4.5278394276972135e-07, "loss": 2.8727, "step": 225940 }, { "epoch": 9.732523581858121, "learning_rate": 4.527354607942424e-07, "loss": 2.6881, "step": 225960 }, { "epoch": 9.733385019597709, "learning_rate": 4.526869788187636e-07, "loss": 2.6297, "step": 225980 }, { "epoch": 9.734246457337296, "learning_rate": 4.526384968432846e-07, "loss": 2.7619, "step": 226000 }, { "epoch": 9.735107895076883, "learning_rate": 4.525900148678058e-07, "loss": 2.8766, "step": 226020 }, { "epoch": 9.73596933281647, "learning_rate": 4.5254153289232687e-07, "loss": 2.7204, "step": 226040 }, { "epoch": 9.736830770556058, "learning_rate": 4.52493050916848e-07, "loss": 2.7272, "step": 226060 }, { "epoch": 9.737692208295645, "learning_rate": 4.5244456894136907e-07, "loss": 2.8508, "step": 226080 }, { "epoch": 9.738553646035232, "learning_rate": 4.5239608696589024e-07, "loss": 2.7654, "step": 226100 }, { "epoch": 9.73941508377482, "learning_rate": 4.523476049904113e-07, "loss": 2.7756, "step": 226120 }, { "epoch": 9.740276521514408, "learning_rate": 4.5229912301493244e-07, "loss": 2.6627, "step": 226140 }, { "epoch": 9.741137959253996, "learning_rate": 4.522506410394535e-07, "loss": 2.902, "step": 226160 }, { "epoch": 9.741999396993583, "learning_rate": 4.522021590639747e-07, "loss": 2.956, "step": 226180 }, { "epoch": 9.74286083473317, "learning_rate": 4.521536770884957e-07, "loss": 2.7228, "step": 226200 }, { "epoch": 9.743722272472757, "learning_rate": 4.521051951130169e-07, "loss": 2.6514, "step": 226220 }, { "epoch": 9.744583710212344, "learning_rate": 4.5205671313753795e-07, "loss": 2.7835, "step": 226240 }, { "epoch": 9.745445147951932, "learning_rate": 4.5200823116205913e-07, "loss": 2.768, "step": 226260 }, { "epoch": 9.746306585691519, "learning_rate": 4.5195974918658015e-07, "loss": 2.919, "step": 226280 }, { "epoch": 9.747168023431106, "learning_rate": 4.5191126721110133e-07, "loss": 2.9138, "step": 226300 }, { "epoch": 9.748029461170693, "learning_rate": 4.5186278523562235e-07, "loss": 2.7485, "step": 226320 }, { "epoch": 9.74889089891028, "learning_rate": 4.518143032601435e-07, "loss": 2.6993, "step": 226340 }, { "epoch": 9.749752336649868, "learning_rate": 4.517658212846646e-07, "loss": 2.944, "step": 226360 }, { "epoch": 9.750613774389455, "learning_rate": 4.517173393091857e-07, "loss": 2.892, "step": 226380 }, { "epoch": 9.751475212129044, "learning_rate": 4.516688573337068e-07, "loss": 2.6098, "step": 226400 }, { "epoch": 9.752336649868631, "learning_rate": 4.5162037535822797e-07, "loss": 2.7225, "step": 226420 }, { "epoch": 9.753198087608219, "learning_rate": 4.5157189338274904e-07, "loss": 2.8637, "step": 226440 }, { "epoch": 9.754059525347806, "learning_rate": 4.5152341140727006e-07, "loss": 2.9834, "step": 226460 }, { "epoch": 9.754920963087393, "learning_rate": 4.5147492943179124e-07, "loss": 2.8666, "step": 226480 }, { "epoch": 9.75578240082698, "learning_rate": 4.514264474563123e-07, "loss": 2.7582, "step": 226500 }, { "epoch": 9.756643838566568, "learning_rate": 4.5137796548083343e-07, "loss": 2.7784, "step": 226520 }, { "epoch": 9.757505276306155, "learning_rate": 4.513294835053545e-07, "loss": 2.8077, "step": 226540 }, { "epoch": 9.758366714045742, "learning_rate": 4.512810015298757e-07, "loss": 2.8165, "step": 226560 }, { "epoch": 9.75922815178533, "learning_rate": 4.512325195543967e-07, "loss": 2.779, "step": 226580 }, { "epoch": 9.760089589524917, "learning_rate": 4.511840375789179e-07, "loss": 2.6152, "step": 226600 }, { "epoch": 9.760951027264504, "learning_rate": 4.5113555560343895e-07, "loss": 2.727, "step": 226620 }, { "epoch": 9.761812465004091, "learning_rate": 4.5108707362796007e-07, "loss": 2.7786, "step": 226640 }, { "epoch": 9.762673902743678, "learning_rate": 4.5103859165248114e-07, "loss": 2.8263, "step": 226660 }, { "epoch": 9.763535340483266, "learning_rate": 4.5099010967700227e-07, "loss": 2.759, "step": 226680 }, { "epoch": 9.764396778222855, "learning_rate": 4.5094162770152334e-07, "loss": 2.9038, "step": 226700 }, { "epoch": 9.765258215962442, "learning_rate": 4.508931457260445e-07, "loss": 2.7489, "step": 226720 }, { "epoch": 9.76611965370203, "learning_rate": 4.508446637505656e-07, "loss": 2.7452, "step": 226740 }, { "epoch": 9.766981091441616, "learning_rate": 4.507961817750867e-07, "loss": 2.7395, "step": 226760 }, { "epoch": 9.767842529181204, "learning_rate": 4.507476997996078e-07, "loss": 2.7063, "step": 226780 }, { "epoch": 9.768703966920791, "learning_rate": 4.5069921782412896e-07, "loss": 2.7776, "step": 226800 }, { "epoch": 9.769565404660378, "learning_rate": 4.5065073584865003e-07, "loss": 2.8116, "step": 226820 }, { "epoch": 9.770426842399965, "learning_rate": 4.5060225387317116e-07, "loss": 2.9207, "step": 226840 }, { "epoch": 9.771288280139553, "learning_rate": 4.5055377189769223e-07, "loss": 2.7321, "step": 226860 }, { "epoch": 9.77214971787914, "learning_rate": 4.505052899222134e-07, "loss": 2.7426, "step": 226880 }, { "epoch": 9.773011155618727, "learning_rate": 4.504568079467344e-07, "loss": 2.8496, "step": 226900 }, { "epoch": 9.773872593358314, "learning_rate": 4.504083259712556e-07, "loss": 2.6321, "step": 226920 }, { "epoch": 9.774734031097902, "learning_rate": 4.503598439957767e-07, "loss": 2.8901, "step": 226940 }, { "epoch": 9.77559546883749, "learning_rate": 4.503113620202978e-07, "loss": 2.7151, "step": 226960 }, { "epoch": 9.776456906577078, "learning_rate": 4.5026288004481887e-07, "loss": 2.6635, "step": 226980 }, { "epoch": 9.777318344316665, "learning_rate": 4.5021439806933994e-07, "loss": 2.6493, "step": 227000 }, { "epoch": 9.778179782056252, "learning_rate": 4.5016591609386107e-07, "loss": 2.8145, "step": 227020 }, { "epoch": 9.77904121979584, "learning_rate": 4.5011743411838214e-07, "loss": 2.7313, "step": 227040 }, { "epoch": 9.779902657535427, "learning_rate": 4.500689521429033e-07, "loss": 2.6517, "step": 227060 }, { "epoch": 9.780764095275014, "learning_rate": 4.5002047016742433e-07, "loss": 2.7803, "step": 227080 }, { "epoch": 9.781625533014601, "learning_rate": 4.4997198819194546e-07, "loss": 2.795, "step": 227100 }, { "epoch": 9.782486970754189, "learning_rate": 4.499235062164666e-07, "loss": 2.7965, "step": 227120 }, { "epoch": 9.783348408493776, "learning_rate": 4.498750242409877e-07, "loss": 2.7508, "step": 227140 }, { "epoch": 9.784209846233363, "learning_rate": 4.498265422655088e-07, "loss": 2.6297, "step": 227160 }, { "epoch": 9.78507128397295, "learning_rate": 4.4977806029002996e-07, "loss": 2.8588, "step": 227180 }, { "epoch": 9.785932721712538, "learning_rate": 4.4972957831455103e-07, "loss": 2.747, "step": 227200 }, { "epoch": 9.786794159452125, "learning_rate": 4.4968109633907215e-07, "loss": 2.8575, "step": 227220 }, { "epoch": 9.787655597191712, "learning_rate": 4.496326143635932e-07, "loss": 2.863, "step": 227240 }, { "epoch": 9.788517034931301, "learning_rate": 4.495841323881144e-07, "loss": 2.7117, "step": 227260 }, { "epoch": 9.789378472670888, "learning_rate": 4.495356504126354e-07, "loss": 2.7848, "step": 227280 }, { "epoch": 9.790239910410476, "learning_rate": 4.494871684371566e-07, "loss": 2.6425, "step": 227300 }, { "epoch": 9.791101348150063, "learning_rate": 4.4943868646167767e-07, "loss": 2.7465, "step": 227320 }, { "epoch": 9.79196278588965, "learning_rate": 4.493902044861988e-07, "loss": 2.872, "step": 227340 }, { "epoch": 9.792824223629237, "learning_rate": 4.4934172251071986e-07, "loss": 2.7169, "step": 227360 }, { "epoch": 9.793685661368825, "learning_rate": 4.4929324053524104e-07, "loss": 2.8158, "step": 227380 }, { "epoch": 9.794547099108412, "learning_rate": 4.4924475855976206e-07, "loss": 2.7835, "step": 227400 }, { "epoch": 9.795408536847999, "learning_rate": 4.4919627658428324e-07, "loss": 2.6498, "step": 227420 }, { "epoch": 9.796269974587586, "learning_rate": 4.491477946088043e-07, "loss": 2.7942, "step": 227440 }, { "epoch": 9.797131412327174, "learning_rate": 4.4909931263332543e-07, "loss": 2.7132, "step": 227460 }, { "epoch": 9.79799285006676, "learning_rate": 4.490508306578465e-07, "loss": 2.773, "step": 227480 }, { "epoch": 9.798854287806348, "learning_rate": 4.490023486823677e-07, "loss": 2.5229, "step": 227500 }, { "epoch": 9.799715725545937, "learning_rate": 4.4895386670688875e-07, "loss": 2.7793, "step": 227520 }, { "epoch": 9.800577163285524, "learning_rate": 4.4890538473140977e-07, "loss": 2.9072, "step": 227540 }, { "epoch": 9.801438601025112, "learning_rate": 4.4885690275593095e-07, "loss": 2.8125, "step": 227560 }, { "epoch": 9.802300038764699, "learning_rate": 4.48808420780452e-07, "loss": 2.8258, "step": 227580 }, { "epoch": 9.803161476504286, "learning_rate": 4.4875993880497315e-07, "loss": 2.7891, "step": 227600 }, { "epoch": 9.804022914243873, "learning_rate": 4.487114568294942e-07, "loss": 2.979, "step": 227620 }, { "epoch": 9.80488435198346, "learning_rate": 4.486629748540154e-07, "loss": 2.9353, "step": 227640 }, { "epoch": 9.805745789723048, "learning_rate": 4.486144928785364e-07, "loss": 2.6931, "step": 227660 }, { "epoch": 9.806607227462635, "learning_rate": 4.485660109030576e-07, "loss": 2.7263, "step": 227680 }, { "epoch": 9.807468665202222, "learning_rate": 4.4851752892757866e-07, "loss": 2.6593, "step": 227700 }, { "epoch": 9.80833010294181, "learning_rate": 4.484690469520998e-07, "loss": 2.6925, "step": 227720 }, { "epoch": 9.809191540681397, "learning_rate": 4.4842056497662086e-07, "loss": 2.8602, "step": 227740 }, { "epoch": 9.810052978420984, "learning_rate": 4.4837208300114204e-07, "loss": 2.7879, "step": 227760 }, { "epoch": 9.810914416160571, "learning_rate": 4.4832360102566305e-07, "loss": 3.0059, "step": 227780 }, { "epoch": 9.811775853900158, "learning_rate": 4.4827511905018423e-07, "loss": 2.7912, "step": 227800 }, { "epoch": 9.812637291639748, "learning_rate": 4.482266370747053e-07, "loss": 2.9112, "step": 227820 }, { "epoch": 9.813498729379335, "learning_rate": 4.4817815509922643e-07, "loss": 2.8648, "step": 227840 }, { "epoch": 9.814360167118922, "learning_rate": 4.481296731237475e-07, "loss": 2.9697, "step": 227860 }, { "epoch": 9.81522160485851, "learning_rate": 4.480811911482687e-07, "loss": 2.8356, "step": 227880 }, { "epoch": 9.816083042598096, "learning_rate": 4.4803270917278975e-07, "loss": 2.8096, "step": 227900 }, { "epoch": 9.816944480337684, "learning_rate": 4.4798422719731087e-07, "loss": 2.8205, "step": 227920 }, { "epoch": 9.817805918077271, "learning_rate": 4.4793574522183194e-07, "loss": 2.9274, "step": 227940 }, { "epoch": 9.818667355816858, "learning_rate": 4.478872632463531e-07, "loss": 2.7424, "step": 227960 }, { "epoch": 9.819528793556445, "learning_rate": 4.4783878127087414e-07, "loss": 2.7874, "step": 227980 }, { "epoch": 9.820390231296033, "learning_rate": 4.477902992953953e-07, "loss": 2.9248, "step": 228000 }, { "epoch": 9.82125166903562, "learning_rate": 4.477418173199164e-07, "loss": 2.7727, "step": 228020 }, { "epoch": 9.822113106775207, "learning_rate": 4.4769333534443757e-07, "loss": 2.5964, "step": 228040 }, { "epoch": 9.822974544514794, "learning_rate": 4.476448533689586e-07, "loss": 2.824, "step": 228060 }, { "epoch": 9.823835982254383, "learning_rate": 4.4759637139347966e-07, "loss": 2.6053, "step": 228080 }, { "epoch": 9.82469741999397, "learning_rate": 4.475478894180008e-07, "loss": 2.831, "step": 228100 }, { "epoch": 9.825558857733558, "learning_rate": 4.4749940744252185e-07, "loss": 2.7021, "step": 228120 }, { "epoch": 9.826420295473145, "learning_rate": 4.4745092546704303e-07, "loss": 2.6783, "step": 228140 }, { "epoch": 9.827281733212732, "learning_rate": 4.474024434915641e-07, "loss": 2.8846, "step": 228160 }, { "epoch": 9.82814317095232, "learning_rate": 4.473539615160852e-07, "loss": 2.8144, "step": 228180 }, { "epoch": 9.829004608691907, "learning_rate": 4.4730547954060635e-07, "loss": 2.8399, "step": 228200 }, { "epoch": 9.829866046431494, "learning_rate": 4.4725699756512747e-07, "loss": 2.9469, "step": 228220 }, { "epoch": 9.830727484171081, "learning_rate": 4.472085155896485e-07, "loss": 2.7132, "step": 228240 }, { "epoch": 9.831588921910669, "learning_rate": 4.4716003361416967e-07, "loss": 2.6978, "step": 228260 }, { "epoch": 9.832450359650256, "learning_rate": 4.4711155163869074e-07, "loss": 2.8289, "step": 228280 }, { "epoch": 9.833311797389843, "learning_rate": 4.4706306966321187e-07, "loss": 2.6652, "step": 228300 }, { "epoch": 9.83417323512943, "learning_rate": 4.4701458768773294e-07, "loss": 2.8793, "step": 228320 }, { "epoch": 9.835034672869018, "learning_rate": 4.469661057122541e-07, "loss": 2.6363, "step": 228340 }, { "epoch": 9.835896110608605, "learning_rate": 4.4691762373677513e-07, "loss": 2.6278, "step": 228360 }, { "epoch": 9.836757548348194, "learning_rate": 4.468691417612963e-07, "loss": 2.6308, "step": 228380 }, { "epoch": 9.837618986087781, "learning_rate": 4.468206597858174e-07, "loss": 2.734, "step": 228400 }, { "epoch": 9.838480423827368, "learning_rate": 4.467721778103385e-07, "loss": 2.6363, "step": 228420 }, { "epoch": 9.839341861566956, "learning_rate": 4.467236958348596e-07, "loss": 2.7233, "step": 228440 }, { "epoch": 9.840203299306543, "learning_rate": 4.466752138593807e-07, "loss": 2.8116, "step": 228460 }, { "epoch": 9.84106473704613, "learning_rate": 4.4662673188390177e-07, "loss": 2.937, "step": 228480 }, { "epoch": 9.841926174785717, "learning_rate": 4.4657824990842295e-07, "loss": 2.6129, "step": 228500 }, { "epoch": 9.842787612525305, "learning_rate": 4.46529767932944e-07, "loss": 2.6577, "step": 228520 }, { "epoch": 9.843649050264892, "learning_rate": 4.4648128595746515e-07, "loss": 2.8493, "step": 228540 }, { "epoch": 9.84451048800448, "learning_rate": 4.464328039819862e-07, "loss": 2.8479, "step": 228560 }, { "epoch": 9.845371925744066, "learning_rate": 4.463843220065074e-07, "loss": 2.7164, "step": 228580 }, { "epoch": 9.846233363483654, "learning_rate": 4.4633584003102847e-07, "loss": 2.7004, "step": 228600 }, { "epoch": 9.84709480122324, "learning_rate": 4.462873580555495e-07, "loss": 2.6577, "step": 228620 }, { "epoch": 9.84795623896283, "learning_rate": 4.4623887608007066e-07, "loss": 2.7832, "step": 228640 }, { "epoch": 9.848817676702417, "learning_rate": 4.4619039410459173e-07, "loss": 2.7868, "step": 228660 }, { "epoch": 9.849679114442004, "learning_rate": 4.4614191212911286e-07, "loss": 2.7001, "step": 228680 }, { "epoch": 9.850540552181592, "learning_rate": 4.4609343015363393e-07, "loss": 2.7618, "step": 228700 }, { "epoch": 9.851401989921179, "learning_rate": 4.460449481781551e-07, "loss": 2.7038, "step": 228720 }, { "epoch": 9.852263427660766, "learning_rate": 4.459964662026761e-07, "loss": 2.9747, "step": 228740 }, { "epoch": 9.853124865400353, "learning_rate": 4.459479842271973e-07, "loss": 2.7802, "step": 228760 }, { "epoch": 9.85398630313994, "learning_rate": 4.458995022517184e-07, "loss": 2.7647, "step": 228780 }, { "epoch": 9.854847740879528, "learning_rate": 4.458510202762395e-07, "loss": 2.8517, "step": 228800 }, { "epoch": 9.855709178619115, "learning_rate": 4.4580253830076057e-07, "loss": 2.6253, "step": 228820 }, { "epoch": 9.856570616358702, "learning_rate": 4.4575405632528175e-07, "loss": 2.6648, "step": 228840 }, { "epoch": 9.85743205409829, "learning_rate": 4.4570557434980277e-07, "loss": 2.7749, "step": 228860 }, { "epoch": 9.858293491837877, "learning_rate": 4.456570923743239e-07, "loss": 2.6959, "step": 228880 }, { "epoch": 9.859154929577464, "learning_rate": 4.45608610398845e-07, "loss": 2.743, "step": 228900 }, { "epoch": 9.860016367317051, "learning_rate": 4.4556012842336614e-07, "loss": 2.747, "step": 228920 }, { "epoch": 9.86087780505664, "learning_rate": 4.455116464478872e-07, "loss": 2.6789, "step": 228940 }, { "epoch": 9.861739242796228, "learning_rate": 4.454631644724084e-07, "loss": 2.7587, "step": 228960 }, { "epoch": 9.862600680535815, "learning_rate": 4.4541468249692946e-07, "loss": 2.7325, "step": 228980 }, { "epoch": 9.863462118275402, "learning_rate": 4.453662005214506e-07, "loss": 2.6533, "step": 229000 }, { "epoch": 9.86432355601499, "learning_rate": 4.4531771854597166e-07, "loss": 2.7957, "step": 229020 }, { "epoch": 9.865184993754577, "learning_rate": 4.4526923657049283e-07, "loss": 2.7182, "step": 229040 }, { "epoch": 9.866046431494164, "learning_rate": 4.4522075459501385e-07, "loss": 2.5773, "step": 229060 }, { "epoch": 9.866907869233751, "learning_rate": 4.4517227261953503e-07, "loss": 2.9397, "step": 229080 }, { "epoch": 9.867769306973338, "learning_rate": 4.451237906440561e-07, "loss": 2.7843, "step": 229100 }, { "epoch": 9.868630744712926, "learning_rate": 4.450753086685772e-07, "loss": 2.7145, "step": 229120 }, { "epoch": 9.869492182452513, "learning_rate": 4.450268266930983e-07, "loss": 2.9599, "step": 229140 }, { "epoch": 9.8703536201921, "learning_rate": 4.4497834471761937e-07, "loss": 2.6404, "step": 229160 }, { "epoch": 9.871215057931687, "learning_rate": 4.449298627421405e-07, "loss": 2.5842, "step": 229180 }, { "epoch": 9.872076495671275, "learning_rate": 4.4488138076666156e-07, "loss": 2.7618, "step": 229200 }, { "epoch": 9.872937933410864, "learning_rate": 4.4483289879118274e-07, "loss": 2.8299, "step": 229220 }, { "epoch": 9.87379937115045, "learning_rate": 4.447844168157038e-07, "loss": 2.8898, "step": 229240 }, { "epoch": 9.874660808890038, "learning_rate": 4.4473593484022494e-07, "loss": 2.6774, "step": 229260 }, { "epoch": 9.875522246629625, "learning_rate": 4.44687452864746e-07, "loss": 2.8864, "step": 229280 }, { "epoch": 9.876383684369213, "learning_rate": 4.446389708892672e-07, "loss": 2.6506, "step": 229300 }, { "epoch": 9.8772451221088, "learning_rate": 4.445904889137882e-07, "loss": 2.6895, "step": 229320 }, { "epoch": 9.878106559848387, "learning_rate": 4.445420069383094e-07, "loss": 2.64, "step": 229340 }, { "epoch": 9.878967997587974, "learning_rate": 4.4449352496283045e-07, "loss": 2.7518, "step": 229360 }, { "epoch": 9.879829435327562, "learning_rate": 4.444450429873516e-07, "loss": 2.5919, "step": 229380 }, { "epoch": 9.880690873067149, "learning_rate": 4.4439656101187265e-07, "loss": 2.5162, "step": 229400 }, { "epoch": 9.881552310806736, "learning_rate": 4.4434807903639383e-07, "loss": 2.6462, "step": 229420 }, { "epoch": 9.882413748546323, "learning_rate": 4.4429959706091485e-07, "loss": 2.7029, "step": 229440 }, { "epoch": 9.88327518628591, "learning_rate": 4.44251115085436e-07, "loss": 2.8173, "step": 229460 }, { "epoch": 9.884136624025498, "learning_rate": 4.442026331099571e-07, "loss": 2.7525, "step": 229480 }, { "epoch": 9.884998061765085, "learning_rate": 4.441541511344782e-07, "loss": 2.6046, "step": 229500 }, { "epoch": 9.885859499504674, "learning_rate": 4.441056691589993e-07, "loss": 2.6568, "step": 229520 }, { "epoch": 9.886720937244261, "learning_rate": 4.4405718718352047e-07, "loss": 2.8816, "step": 229540 }, { "epoch": 9.887582374983849, "learning_rate": 4.440087052080415e-07, "loss": 2.6224, "step": 229560 }, { "epoch": 9.888443812723436, "learning_rate": 4.4396022323256266e-07, "loss": 2.8357, "step": 229580 }, { "epoch": 9.889305250463023, "learning_rate": 4.4391174125708374e-07, "loss": 2.8233, "step": 229600 }, { "epoch": 9.89016668820261, "learning_rate": 4.4386325928160486e-07, "loss": 2.7707, "step": 229620 }, { "epoch": 9.891028125942197, "learning_rate": 4.4381477730612593e-07, "loss": 2.5705, "step": 229640 }, { "epoch": 9.891889563681785, "learning_rate": 4.437662953306471e-07, "loss": 2.7595, "step": 229660 }, { "epoch": 9.892751001421372, "learning_rate": 4.437178133551682e-07, "loss": 2.756, "step": 229680 }, { "epoch": 9.89361243916096, "learning_rate": 4.436693313796892e-07, "loss": 2.6594, "step": 229700 }, { "epoch": 9.894473876900546, "learning_rate": 4.436208494042104e-07, "loss": 2.7264, "step": 229720 }, { "epoch": 9.895335314640134, "learning_rate": 4.4357236742873145e-07, "loss": 2.8597, "step": 229740 }, { "epoch": 9.896196752379721, "learning_rate": 4.4352388545325257e-07, "loss": 2.7373, "step": 229760 }, { "epoch": 9.89705819011931, "learning_rate": 4.4347540347777364e-07, "loss": 2.7717, "step": 229780 }, { "epoch": 9.897919627858897, "learning_rate": 4.434269215022948e-07, "loss": 2.8305, "step": 229800 }, { "epoch": 9.898781065598484, "learning_rate": 4.4337843952681584e-07, "loss": 2.6757, "step": 229820 }, { "epoch": 9.899642503338072, "learning_rate": 4.43329957551337e-07, "loss": 2.5936, "step": 229840 }, { "epoch": 9.900503941077659, "learning_rate": 4.432814755758581e-07, "loss": 2.8563, "step": 229860 }, { "epoch": 9.901365378817246, "learning_rate": 4.432329936003792e-07, "loss": 2.6284, "step": 229880 }, { "epoch": 9.902226816556833, "learning_rate": 4.431845116249003e-07, "loss": 2.6192, "step": 229900 }, { "epoch": 9.90308825429642, "learning_rate": 4.4313602964942146e-07, "loss": 2.6951, "step": 229920 }, { "epoch": 9.903949692036008, "learning_rate": 4.4308754767394253e-07, "loss": 2.7005, "step": 229940 }, { "epoch": 9.904811129775595, "learning_rate": 4.4303906569846366e-07, "loss": 2.846, "step": 229960 }, { "epoch": 9.905672567515182, "learning_rate": 4.429905837229848e-07, "loss": 2.8555, "step": 229980 }, { "epoch": 9.90653400525477, "learning_rate": 4.429421017475059e-07, "loss": 2.6858, "step": 230000 }, { "epoch": 9.907395442994357, "learning_rate": 4.428936197720269e-07, "loss": 2.6543, "step": 230020 }, { "epoch": 9.908256880733944, "learning_rate": 4.428451377965481e-07, "loss": 2.872, "step": 230040 }, { "epoch": 9.909118318473531, "learning_rate": 4.427966558210692e-07, "loss": 2.9425, "step": 230060 }, { "epoch": 9.90997975621312, "learning_rate": 4.427481738455903e-07, "loss": 2.7322, "step": 230080 }, { "epoch": 9.910841193952708, "learning_rate": 4.4269969187011137e-07, "loss": 2.5811, "step": 230100 }, { "epoch": 9.911702631692295, "learning_rate": 4.4265120989463255e-07, "loss": 2.7535, "step": 230120 }, { "epoch": 9.912564069431882, "learning_rate": 4.4260272791915357e-07, "loss": 2.7364, "step": 230140 }, { "epoch": 9.91342550717147, "learning_rate": 4.4255424594367474e-07, "loss": 2.8359, "step": 230160 }, { "epoch": 9.914286944911057, "learning_rate": 4.425057639681958e-07, "loss": 2.8603, "step": 230180 }, { "epoch": 9.915148382650644, "learning_rate": 4.4245728199271694e-07, "loss": 2.6994, "step": 230200 }, { "epoch": 9.916009820390231, "learning_rate": 4.42408800017238e-07, "loss": 2.71, "step": 230220 }, { "epoch": 9.916871258129818, "learning_rate": 4.423603180417591e-07, "loss": 2.9023, "step": 230240 }, { "epoch": 9.917732695869406, "learning_rate": 4.423118360662802e-07, "loss": 2.8213, "step": 230260 }, { "epoch": 9.918594133608993, "learning_rate": 4.422633540908013e-07, "loss": 2.7915, "step": 230280 }, { "epoch": 9.91945557134858, "learning_rate": 4.4221487211532246e-07, "loss": 2.7525, "step": 230300 }, { "epoch": 9.920317009088167, "learning_rate": 4.4216639013984353e-07, "loss": 2.7287, "step": 230320 }, { "epoch": 9.921178446827756, "learning_rate": 4.4211790816436465e-07, "loss": 2.7031, "step": 230340 }, { "epoch": 9.922039884567344, "learning_rate": 4.420694261888857e-07, "loss": 2.6911, "step": 230360 }, { "epoch": 9.922901322306931, "learning_rate": 4.420209442134069e-07, "loss": 2.6716, "step": 230380 }, { "epoch": 9.923762760046518, "learning_rate": 4.4197246223792797e-07, "loss": 2.8474, "step": 230400 }, { "epoch": 9.924624197786105, "learning_rate": 4.419239802624491e-07, "loss": 2.5551, "step": 230420 }, { "epoch": 9.925485635525693, "learning_rate": 4.4187549828697017e-07, "loss": 2.7081, "step": 230440 }, { "epoch": 9.92634707326528, "learning_rate": 4.418270163114913e-07, "loss": 2.7313, "step": 230460 }, { "epoch": 9.927208511004867, "learning_rate": 4.4177853433601236e-07, "loss": 2.8119, "step": 230480 }, { "epoch": 9.928069948744454, "learning_rate": 4.4173005236053354e-07, "loss": 2.7096, "step": 230500 }, { "epoch": 9.928931386484042, "learning_rate": 4.4168157038505456e-07, "loss": 2.6679, "step": 230520 }, { "epoch": 9.929792824223629, "learning_rate": 4.4163308840957574e-07, "loss": 2.758, "step": 230540 }, { "epoch": 9.930654261963216, "learning_rate": 4.415846064340968e-07, "loss": 2.6794, "step": 230560 }, { "epoch": 9.931515699702803, "learning_rate": 4.4153612445861793e-07, "loss": 2.6742, "step": 230580 }, { "epoch": 9.93237713744239, "learning_rate": 4.41487642483139e-07, "loss": 2.6609, "step": 230600 }, { "epoch": 9.933238575181978, "learning_rate": 4.414391605076602e-07, "loss": 2.8234, "step": 230620 }, { "epoch": 9.934100012921567, "learning_rate": 4.413906785321812e-07, "loss": 2.7093, "step": 230640 }, { "epoch": 9.934961450661154, "learning_rate": 4.413421965567023e-07, "loss": 2.6735, "step": 230660 }, { "epoch": 9.935822888400741, "learning_rate": 4.4129371458122345e-07, "loss": 2.7809, "step": 230680 }, { "epoch": 9.936684326140329, "learning_rate": 4.412452326057446e-07, "loss": 2.7143, "step": 230700 }, { "epoch": 9.937545763879916, "learning_rate": 4.4119675063026565e-07, "loss": 2.7614, "step": 230720 }, { "epoch": 9.938407201619503, "learning_rate": 4.411482686547868e-07, "loss": 2.6715, "step": 230740 }, { "epoch": 9.93926863935909, "learning_rate": 4.410997866793079e-07, "loss": 2.9459, "step": 230760 }, { "epoch": 9.940130077098678, "learning_rate": 4.410513047038289e-07, "loss": 2.4877, "step": 230780 }, { "epoch": 9.940991514838265, "learning_rate": 4.410028227283501e-07, "loss": 2.8954, "step": 230800 }, { "epoch": 9.941852952577852, "learning_rate": 4.409543407528711e-07, "loss": 2.7721, "step": 230820 }, { "epoch": 9.94271439031744, "learning_rate": 4.409058587773923e-07, "loss": 2.6864, "step": 230840 }, { "epoch": 9.943575828057027, "learning_rate": 4.4085737680191336e-07, "loss": 2.6257, "step": 230860 }, { "epoch": 9.944437265796614, "learning_rate": 4.4080889482643453e-07, "loss": 2.7408, "step": 230880 }, { "epoch": 9.945298703536203, "learning_rate": 4.4076041285095555e-07, "loss": 2.8327, "step": 230900 }, { "epoch": 9.94616014127579, "learning_rate": 4.4071193087547673e-07, "loss": 2.7708, "step": 230920 }, { "epoch": 9.947021579015377, "learning_rate": 4.406634488999978e-07, "loss": 2.6885, "step": 230940 }, { "epoch": 9.947883016754965, "learning_rate": 4.4061496692451893e-07, "loss": 2.8345, "step": 230960 }, { "epoch": 9.948744454494552, "learning_rate": 4.4056648494904e-07, "loss": 2.7501, "step": 230980 }, { "epoch": 9.949605892234139, "learning_rate": 4.405180029735612e-07, "loss": 2.7741, "step": 231000 }, { "epoch": 9.950467329973726, "learning_rate": 4.4046952099808225e-07, "loss": 2.6325, "step": 231020 }, { "epoch": 9.951328767713314, "learning_rate": 4.4042103902260337e-07, "loss": 2.8222, "step": 231040 }, { "epoch": 9.9521902054529, "learning_rate": 4.4037255704712444e-07, "loss": 2.7223, "step": 231060 }, { "epoch": 9.953051643192488, "learning_rate": 4.403240750716456e-07, "loss": 2.5351, "step": 231080 }, { "epoch": 9.953913080932075, "learning_rate": 4.4027559309616664e-07, "loss": 2.6932, "step": 231100 }, { "epoch": 9.954774518671663, "learning_rate": 4.402271111206878e-07, "loss": 2.8151, "step": 231120 }, { "epoch": 9.95563595641125, "learning_rate": 4.401786291452089e-07, "loss": 2.5107, "step": 231140 }, { "epoch": 9.956497394150837, "learning_rate": 4.4013014716973e-07, "loss": 2.6976, "step": 231160 }, { "epoch": 9.957358831890424, "learning_rate": 4.400816651942511e-07, "loss": 2.8091, "step": 231180 }, { "epoch": 9.958220269630013, "learning_rate": 4.4003318321877226e-07, "loss": 2.5837, "step": 231200 }, { "epoch": 9.9590817073696, "learning_rate": 4.399847012432933e-07, "loss": 2.809, "step": 231220 }, { "epoch": 9.959943145109188, "learning_rate": 4.3993621926781446e-07, "loss": 2.5785, "step": 231240 }, { "epoch": 9.960804582848775, "learning_rate": 4.3988773729233553e-07, "loss": 2.7323, "step": 231260 }, { "epoch": 9.961666020588362, "learning_rate": 4.3983925531685665e-07, "loss": 2.7423, "step": 231280 }, { "epoch": 9.96252745832795, "learning_rate": 4.397907733413777e-07, "loss": 2.8942, "step": 231300 }, { "epoch": 9.963388896067537, "learning_rate": 4.397422913658988e-07, "loss": 2.6899, "step": 231320 }, { "epoch": 9.964250333807124, "learning_rate": 4.396938093904199e-07, "loss": 2.6318, "step": 231340 }, { "epoch": 9.965111771546711, "learning_rate": 4.39645327414941e-07, "loss": 2.7993, "step": 231360 }, { "epoch": 9.965973209286298, "learning_rate": 4.3959684543946217e-07, "loss": 2.8087, "step": 231380 }, { "epoch": 9.966834647025886, "learning_rate": 4.3954836346398324e-07, "loss": 2.7006, "step": 231400 }, { "epoch": 9.967696084765473, "learning_rate": 4.3949988148850437e-07, "loss": 2.7229, "step": 231420 }, { "epoch": 9.96855752250506, "learning_rate": 4.3945139951302544e-07, "loss": 2.6935, "step": 231440 }, { "epoch": 9.96941896024465, "learning_rate": 4.394029175375466e-07, "loss": 2.8356, "step": 231460 }, { "epoch": 9.970280397984236, "learning_rate": 4.3935443556206763e-07, "loss": 2.8626, "step": 231480 }, { "epoch": 9.971141835723824, "learning_rate": 4.393059535865888e-07, "loss": 2.6645, "step": 231500 }, { "epoch": 9.972003273463411, "learning_rate": 4.392574716111099e-07, "loss": 2.7563, "step": 231520 }, { "epoch": 9.972864711202998, "learning_rate": 4.39208989635631e-07, "loss": 2.7228, "step": 231540 }, { "epoch": 9.973726148942585, "learning_rate": 4.391605076601521e-07, "loss": 2.7645, "step": 231560 }, { "epoch": 9.974587586682173, "learning_rate": 4.3911202568467325e-07, "loss": 2.7895, "step": 231580 }, { "epoch": 9.97544902442176, "learning_rate": 4.3906354370919427e-07, "loss": 2.75, "step": 231600 }, { "epoch": 9.976310462161347, "learning_rate": 4.3901506173371545e-07, "loss": 2.7876, "step": 231620 }, { "epoch": 9.977171899900934, "learning_rate": 4.389665797582365e-07, "loss": 2.5934, "step": 231640 }, { "epoch": 9.978033337640522, "learning_rate": 4.3891809778275765e-07, "loss": 2.8632, "step": 231660 }, { "epoch": 9.978894775380109, "learning_rate": 4.388696158072787e-07, "loss": 2.6887, "step": 231680 }, { "epoch": 9.979756213119696, "learning_rate": 4.388211338317999e-07, "loss": 2.7499, "step": 231700 }, { "epoch": 9.980617650859283, "learning_rate": 4.3877265185632097e-07, "loss": 2.7217, "step": 231720 }, { "epoch": 9.98147908859887, "learning_rate": 4.387241698808421e-07, "loss": 2.64, "step": 231740 }, { "epoch": 9.98234052633846, "learning_rate": 4.386756879053632e-07, "loss": 2.6802, "step": 231760 }, { "epoch": 9.983201964078047, "learning_rate": 4.3862720592988434e-07, "loss": 2.7785, "step": 231780 }, { "epoch": 9.984063401817634, "learning_rate": 4.3857872395440536e-07, "loss": 2.7331, "step": 231800 }, { "epoch": 9.984924839557221, "learning_rate": 4.3853024197892654e-07, "loss": 2.6514, "step": 231820 }, { "epoch": 9.985786277296809, "learning_rate": 4.384817600034476e-07, "loss": 2.8064, "step": 231840 }, { "epoch": 9.986647715036396, "learning_rate": 4.384332780279686e-07, "loss": 2.5903, "step": 231860 }, { "epoch": 9.987509152775983, "learning_rate": 4.383847960524898e-07, "loss": 2.7847, "step": 231880 }, { "epoch": 9.98837059051557, "learning_rate": 4.383363140770109e-07, "loss": 2.7971, "step": 231900 }, { "epoch": 9.989232028255158, "learning_rate": 4.38287832101532e-07, "loss": 2.9441, "step": 231920 }, { "epoch": 9.990093465994745, "learning_rate": 4.3823935012605307e-07, "loss": 2.6663, "step": 231940 }, { "epoch": 9.990954903734332, "learning_rate": 4.3819086815057425e-07, "loss": 2.7285, "step": 231960 }, { "epoch": 9.99181634147392, "learning_rate": 4.3814238617509527e-07, "loss": 2.6635, "step": 231980 }, { "epoch": 9.992677779213507, "learning_rate": 4.3809390419961644e-07, "loss": 2.7147, "step": 232000 }, { "epoch": 9.993539216953094, "learning_rate": 4.380454222241375e-07, "loss": 2.808, "step": 232020 }, { "epoch": 9.994400654692683, "learning_rate": 4.3799694024865864e-07, "loss": 2.8795, "step": 232040 }, { "epoch": 9.99526209243227, "learning_rate": 4.379484582731797e-07, "loss": 2.7803, "step": 232060 }, { "epoch": 9.996123530171857, "learning_rate": 4.378999762977009e-07, "loss": 2.7404, "step": 232080 }, { "epoch": 9.996984967911445, "learning_rate": 4.3785149432222196e-07, "loss": 2.8107, "step": 232100 }, { "epoch": 9.997846405651032, "learning_rate": 4.378030123467431e-07, "loss": 2.8, "step": 232120 }, { "epoch": 9.99870784339062, "learning_rate": 4.3775453037126416e-07, "loss": 2.71, "step": 232140 }, { "epoch": 9.999569281130206, "learning_rate": 4.3770604839578533e-07, "loss": 2.729, "step": 232160 }, { "epoch": 10.000430718869794, "learning_rate": 4.376575664203064e-07, "loss": 2.8009, "step": 232180 }, { "epoch": 10.00129215660938, "learning_rate": 4.3760908444482753e-07, "loss": 2.826, "step": 232200 }, { "epoch": 10.002153594348968, "learning_rate": 4.375606024693486e-07, "loss": 2.8589, "step": 232220 }, { "epoch": 10.003015032088555, "learning_rate": 4.375121204938697e-07, "loss": 2.6534, "step": 232240 }, { "epoch": 10.003876469828143, "learning_rate": 4.374636385183908e-07, "loss": 2.6214, "step": 232260 }, { "epoch": 10.00473790756773, "learning_rate": 4.37415156542912e-07, "loss": 2.706, "step": 232280 }, { "epoch": 10.005599345307317, "learning_rate": 4.37366674567433e-07, "loss": 2.7396, "step": 232300 }, { "epoch": 10.006460783046906, "learning_rate": 4.3731819259195417e-07, "loss": 2.7347, "step": 232320 }, { "epoch": 10.007322220786493, "learning_rate": 4.3726971061647524e-07, "loss": 2.9149, "step": 232340 }, { "epoch": 10.00818365852608, "learning_rate": 4.3722122864099637e-07, "loss": 2.7851, "step": 232360 }, { "epoch": 10.009045096265668, "learning_rate": 4.3717274666551744e-07, "loss": 2.7284, "step": 232380 }, { "epoch": 10.009906534005255, "learning_rate": 4.371242646900385e-07, "loss": 2.6129, "step": 232400 }, { "epoch": 10.010767971744842, "learning_rate": 4.3707578271455963e-07, "loss": 2.8324, "step": 232420 }, { "epoch": 10.01162940948443, "learning_rate": 4.370273007390807e-07, "loss": 2.7013, "step": 232440 }, { "epoch": 10.012490847224017, "learning_rate": 4.369788187636019e-07, "loss": 2.7624, "step": 232460 }, { "epoch": 10.013352284963604, "learning_rate": 4.3693033678812295e-07, "loss": 2.7084, "step": 232480 }, { "epoch": 10.014213722703191, "learning_rate": 4.368818548126441e-07, "loss": 2.6958, "step": 232500 }, { "epoch": 10.015075160442779, "learning_rate": 4.3683337283716515e-07, "loss": 2.7841, "step": 232520 }, { "epoch": 10.015936598182366, "learning_rate": 4.3678489086168633e-07, "loss": 2.7964, "step": 232540 }, { "epoch": 10.016798035921953, "learning_rate": 4.3673640888620735e-07, "loss": 2.7828, "step": 232560 }, { "epoch": 10.01765947366154, "learning_rate": 4.366879269107285e-07, "loss": 2.8251, "step": 232580 }, { "epoch": 10.01852091140113, "learning_rate": 4.3663944493524954e-07, "loss": 2.6227, "step": 232600 }, { "epoch": 10.019382349140717, "learning_rate": 4.365909629597707e-07, "loss": 2.6491, "step": 232620 }, { "epoch": 10.020243786880304, "learning_rate": 4.365424809842918e-07, "loss": 2.7686, "step": 232640 }, { "epoch": 10.021105224619891, "learning_rate": 4.3649399900881297e-07, "loss": 2.7209, "step": 232660 }, { "epoch": 10.021966662359478, "learning_rate": 4.36445517033334e-07, "loss": 2.9373, "step": 232680 }, { "epoch": 10.022828100099066, "learning_rate": 4.3639703505785516e-07, "loss": 2.7911, "step": 232700 }, { "epoch": 10.023689537838653, "learning_rate": 4.3634855308237624e-07, "loss": 2.8482, "step": 232720 }, { "epoch": 10.02455097557824, "learning_rate": 4.3630007110689736e-07, "loss": 2.675, "step": 232740 }, { "epoch": 10.025412413317827, "learning_rate": 4.3625158913141843e-07, "loss": 2.4819, "step": 232760 }, { "epoch": 10.026273851057415, "learning_rate": 4.362031071559396e-07, "loss": 2.8888, "step": 232780 }, { "epoch": 10.027135288797002, "learning_rate": 4.361546251804607e-07, "loss": 2.7406, "step": 232800 }, { "epoch": 10.027996726536589, "learning_rate": 4.361061432049818e-07, "loss": 2.7427, "step": 232820 }, { "epoch": 10.028858164276176, "learning_rate": 4.360576612295029e-07, "loss": 2.7009, "step": 232840 }, { "epoch": 10.029719602015764, "learning_rate": 4.3600917925402405e-07, "loss": 2.7962, "step": 232860 }, { "epoch": 10.030581039755353, "learning_rate": 4.3596069727854507e-07, "loss": 2.7476, "step": 232880 }, { "epoch": 10.03144247749494, "learning_rate": 4.3591221530306625e-07, "loss": 2.6929, "step": 232900 }, { "epoch": 10.032303915234527, "learning_rate": 4.358637333275873e-07, "loss": 2.781, "step": 232920 }, { "epoch": 10.033165352974114, "learning_rate": 4.3581525135210834e-07, "loss": 2.7936, "step": 232940 }, { "epoch": 10.034026790713702, "learning_rate": 4.357667693766295e-07, "loss": 2.755, "step": 232960 }, { "epoch": 10.034888228453289, "learning_rate": 4.357182874011506e-07, "loss": 2.5696, "step": 232980 }, { "epoch": 10.035749666192876, "learning_rate": 4.356698054256717e-07, "loss": 2.9032, "step": 233000 }, { "epoch": 10.036611103932463, "learning_rate": 4.3562132345019273e-07, "loss": 2.7736, "step": 233020 }, { "epoch": 10.03747254167205, "learning_rate": 4.3557284147471396e-07, "loss": 2.6972, "step": 233040 }, { "epoch": 10.038333979411638, "learning_rate": 4.35524359499235e-07, "loss": 2.738, "step": 233060 }, { "epoch": 10.039195417151225, "learning_rate": 4.3547587752375616e-07, "loss": 2.6802, "step": 233080 }, { "epoch": 10.040056854890812, "learning_rate": 4.3542739554827723e-07, "loss": 2.7077, "step": 233100 }, { "epoch": 10.0409182926304, "learning_rate": 4.3537891357279835e-07, "loss": 2.8842, "step": 233120 }, { "epoch": 10.041779730369987, "learning_rate": 4.353304315973194e-07, "loss": 2.5946, "step": 233140 }, { "epoch": 10.042641168109576, "learning_rate": 4.352819496218406e-07, "loss": 2.7576, "step": 233160 }, { "epoch": 10.043502605849163, "learning_rate": 4.352334676463617e-07, "loss": 2.7143, "step": 233180 }, { "epoch": 10.04436404358875, "learning_rate": 4.351849856708828e-07, "loss": 2.7301, "step": 233200 }, { "epoch": 10.045225481328337, "learning_rate": 4.3513650369540387e-07, "loss": 2.7959, "step": 233220 }, { "epoch": 10.046086919067925, "learning_rate": 4.3508802171992505e-07, "loss": 2.9086, "step": 233240 }, { "epoch": 10.046948356807512, "learning_rate": 4.3503953974444607e-07, "loss": 2.7724, "step": 233260 }, { "epoch": 10.0478097945471, "learning_rate": 4.3499105776896724e-07, "loss": 2.6747, "step": 233280 }, { "epoch": 10.048671232286686, "learning_rate": 4.349425757934883e-07, "loss": 2.7101, "step": 233300 }, { "epoch": 10.049532670026274, "learning_rate": 4.3489409381800944e-07, "loss": 2.7335, "step": 233320 }, { "epoch": 10.050394107765861, "learning_rate": 4.348456118425305e-07, "loss": 2.7687, "step": 233340 }, { "epoch": 10.051255545505448, "learning_rate": 4.347971298670517e-07, "loss": 2.8072, "step": 233360 }, { "epoch": 10.052116983245035, "learning_rate": 4.347486478915727e-07, "loss": 2.7751, "step": 233380 }, { "epoch": 10.052978420984623, "learning_rate": 4.347001659160939e-07, "loss": 2.6336, "step": 233400 }, { "epoch": 10.05383985872421, "learning_rate": 4.3465168394061496e-07, "loss": 2.8188, "step": 233420 }, { "epoch": 10.054701296463799, "learning_rate": 4.346032019651361e-07, "loss": 2.7701, "step": 233440 }, { "epoch": 10.055562734203386, "learning_rate": 4.3455471998965715e-07, "loss": 2.7414, "step": 233460 }, { "epoch": 10.056424171942973, "learning_rate": 4.345062380141782e-07, "loss": 2.8707, "step": 233480 }, { "epoch": 10.05728560968256, "learning_rate": 4.344577560386994e-07, "loss": 2.7362, "step": 233500 }, { "epoch": 10.058147047422148, "learning_rate": 4.344092740632204e-07, "loss": 2.6798, "step": 233520 }, { "epoch": 10.059008485161735, "learning_rate": 4.3436079208774165e-07, "loss": 2.8949, "step": 233540 }, { "epoch": 10.059869922901322, "learning_rate": 4.3431231011226267e-07, "loss": 2.878, "step": 233560 }, { "epoch": 10.06073136064091, "learning_rate": 4.342638281367838e-07, "loss": 2.5081, "step": 233580 }, { "epoch": 10.061592798380497, "learning_rate": 4.3421534616130486e-07, "loss": 2.7506, "step": 233600 }, { "epoch": 10.062454236120084, "learning_rate": 4.3416686418582604e-07, "loss": 2.6746, "step": 233620 }, { "epoch": 10.063315673859671, "learning_rate": 4.3411838221034706e-07, "loss": 2.7771, "step": 233640 }, { "epoch": 10.064177111599259, "learning_rate": 4.3406990023486824e-07, "loss": 2.6613, "step": 233660 }, { "epoch": 10.065038549338846, "learning_rate": 4.340214182593893e-07, "loss": 2.8941, "step": 233680 }, { "epoch": 10.065899987078433, "learning_rate": 4.3397293628391043e-07, "loss": 2.6194, "step": 233700 }, { "epoch": 10.066761424818022, "learning_rate": 4.339244543084315e-07, "loss": 2.7221, "step": 233720 }, { "epoch": 10.06762286255761, "learning_rate": 4.338759723329527e-07, "loss": 2.8645, "step": 233740 }, { "epoch": 10.068484300297197, "learning_rate": 4.338274903574737e-07, "loss": 2.6529, "step": 233760 }, { "epoch": 10.069345738036784, "learning_rate": 4.337790083819949e-07, "loss": 2.7663, "step": 233780 }, { "epoch": 10.070207175776371, "learning_rate": 4.3373052640651595e-07, "loss": 2.7477, "step": 233800 }, { "epoch": 10.071068613515958, "learning_rate": 4.3368204443103707e-07, "loss": 2.7058, "step": 233820 }, { "epoch": 10.071930051255546, "learning_rate": 4.3363356245555814e-07, "loss": 2.7488, "step": 233840 }, { "epoch": 10.072791488995133, "learning_rate": 4.335850804800793e-07, "loss": 2.6378, "step": 233860 }, { "epoch": 10.07365292673472, "learning_rate": 4.335365985046004e-07, "loss": 2.7101, "step": 233880 }, { "epoch": 10.074514364474307, "learning_rate": 4.334881165291215e-07, "loss": 2.7669, "step": 233900 }, { "epoch": 10.075375802213895, "learning_rate": 4.334396345536426e-07, "loss": 2.7634, "step": 233920 }, { "epoch": 10.076237239953482, "learning_rate": 4.3339115257816377e-07, "loss": 2.6902, "step": 233940 }, { "epoch": 10.077098677693069, "learning_rate": 4.3334267060268484e-07, "loss": 2.6588, "step": 233960 }, { "epoch": 10.077960115432656, "learning_rate": 4.3329418862720596e-07, "loss": 2.6609, "step": 233980 }, { "epoch": 10.078821553172244, "learning_rate": 4.3324570665172703e-07, "loss": 2.7384, "step": 234000 }, { "epoch": 10.079682990911833, "learning_rate": 4.3319722467624805e-07, "loss": 2.5661, "step": 234020 }, { "epoch": 10.08054442865142, "learning_rate": 4.3314874270076923e-07, "loss": 2.7888, "step": 234040 }, { "epoch": 10.081405866391007, "learning_rate": 4.331002607252903e-07, "loss": 2.6906, "step": 234060 }, { "epoch": 10.082267304130594, "learning_rate": 4.330517787498114e-07, "loss": 2.8107, "step": 234080 }, { "epoch": 10.083128741870182, "learning_rate": 4.330032967743325e-07, "loss": 2.6208, "step": 234100 }, { "epoch": 10.083990179609769, "learning_rate": 4.329548147988537e-07, "loss": 2.8787, "step": 234120 }, { "epoch": 10.084851617349356, "learning_rate": 4.3290633282337475e-07, "loss": 2.7092, "step": 234140 }, { "epoch": 10.085713055088943, "learning_rate": 4.3285785084789587e-07, "loss": 2.8086, "step": 234160 }, { "epoch": 10.08657449282853, "learning_rate": 4.3280936887241694e-07, "loss": 2.6078, "step": 234180 }, { "epoch": 10.087435930568118, "learning_rate": 4.3276088689693807e-07, "loss": 2.8745, "step": 234200 }, { "epoch": 10.088297368307705, "learning_rate": 4.3271240492145914e-07, "loss": 2.7078, "step": 234220 }, { "epoch": 10.089158806047292, "learning_rate": 4.326639229459803e-07, "loss": 2.7043, "step": 234240 }, { "epoch": 10.09002024378688, "learning_rate": 4.326154409705014e-07, "loss": 2.6147, "step": 234260 }, { "epoch": 10.090881681526467, "learning_rate": 4.325669589950225e-07, "loss": 2.6612, "step": 234280 }, { "epoch": 10.091743119266056, "learning_rate": 4.325184770195436e-07, "loss": 2.5956, "step": 234300 }, { "epoch": 10.092604557005643, "learning_rate": 4.3246999504406476e-07, "loss": 2.7266, "step": 234320 }, { "epoch": 10.09346599474523, "learning_rate": 4.324215130685858e-07, "loss": 2.7149, "step": 234340 }, { "epoch": 10.094327432484818, "learning_rate": 4.3237303109310696e-07, "loss": 2.7574, "step": 234360 }, { "epoch": 10.095188870224405, "learning_rate": 4.32324549117628e-07, "loss": 2.8443, "step": 234380 }, { "epoch": 10.096050307963992, "learning_rate": 4.3227606714214915e-07, "loss": 2.8475, "step": 234400 }, { "epoch": 10.09691174570358, "learning_rate": 4.322275851666702e-07, "loss": 2.5984, "step": 234420 }, { "epoch": 10.097773183443167, "learning_rate": 4.321791031911914e-07, "loss": 2.6424, "step": 234440 }, { "epoch": 10.098634621182754, "learning_rate": 4.321306212157124e-07, "loss": 2.6935, "step": 234460 }, { "epoch": 10.099496058922341, "learning_rate": 4.320821392402336e-07, "loss": 2.7103, "step": 234480 }, { "epoch": 10.100357496661928, "learning_rate": 4.3203365726475467e-07, "loss": 2.7521, "step": 234500 }, { "epoch": 10.101218934401516, "learning_rate": 4.319851752892758e-07, "loss": 2.912, "step": 234520 }, { "epoch": 10.102080372141103, "learning_rate": 4.3193669331379686e-07, "loss": 2.7193, "step": 234540 }, { "epoch": 10.10294180988069, "learning_rate": 4.3188821133831794e-07, "loss": 2.9009, "step": 234560 }, { "epoch": 10.103803247620279, "learning_rate": 4.318397293628391e-07, "loss": 2.6406, "step": 234580 }, { "epoch": 10.104664685359866, "learning_rate": 4.3179124738736013e-07, "loss": 2.7141, "step": 234600 }, { "epoch": 10.105526123099454, "learning_rate": 4.317427654118813e-07, "loss": 2.9049, "step": 234620 }, { "epoch": 10.10638756083904, "learning_rate": 4.316942834364024e-07, "loss": 2.7165, "step": 234640 }, { "epoch": 10.107248998578628, "learning_rate": 4.316458014609235e-07, "loss": 2.7212, "step": 234660 }, { "epoch": 10.108110436318215, "learning_rate": 4.315973194854446e-07, "loss": 3.0083, "step": 234680 }, { "epoch": 10.108971874057803, "learning_rate": 4.3154883750996575e-07, "loss": 2.8408, "step": 234700 }, { "epoch": 10.10983331179739, "learning_rate": 4.3150035553448677e-07, "loss": 2.7484, "step": 234720 }, { "epoch": 10.110694749536977, "learning_rate": 4.3145187355900795e-07, "loss": 2.6511, "step": 234740 }, { "epoch": 10.111556187276564, "learning_rate": 4.31403391583529e-07, "loss": 2.7963, "step": 234760 }, { "epoch": 10.112417625016151, "learning_rate": 4.3135490960805015e-07, "loss": 2.7539, "step": 234780 }, { "epoch": 10.113279062755739, "learning_rate": 4.3130642763257116e-07, "loss": 2.7425, "step": 234800 }, { "epoch": 10.114140500495326, "learning_rate": 4.312579456570924e-07, "loss": 2.6201, "step": 234820 }, { "epoch": 10.115001938234913, "learning_rate": 4.312094636816134e-07, "loss": 2.9524, "step": 234840 }, { "epoch": 10.115863375974502, "learning_rate": 4.311609817061346e-07, "loss": 2.7409, "step": 234860 }, { "epoch": 10.11672481371409, "learning_rate": 4.3111249973065566e-07, "loss": 2.7645, "step": 234880 }, { "epoch": 10.117586251453677, "learning_rate": 4.310640177551768e-07, "loss": 2.8219, "step": 234900 }, { "epoch": 10.118447689193264, "learning_rate": 4.3101553577969786e-07, "loss": 2.6798, "step": 234920 }, { "epoch": 10.119309126932851, "learning_rate": 4.3096705380421904e-07, "loss": 2.617, "step": 234940 }, { "epoch": 10.120170564672438, "learning_rate": 4.309185718287401e-07, "loss": 2.8987, "step": 234960 }, { "epoch": 10.121032002412026, "learning_rate": 4.3087008985326123e-07, "loss": 2.6836, "step": 234980 }, { "epoch": 10.121893440151613, "learning_rate": 4.308216078777823e-07, "loss": 2.8904, "step": 235000 }, { "epoch": 10.1227548778912, "learning_rate": 4.307731259023035e-07, "loss": 2.7397, "step": 235020 }, { "epoch": 10.123616315630787, "learning_rate": 4.307246439268245e-07, "loss": 2.7348, "step": 235040 }, { "epoch": 10.124477753370375, "learning_rate": 4.306761619513457e-07, "loss": 2.6797, "step": 235060 }, { "epoch": 10.125339191109962, "learning_rate": 4.3062767997586675e-07, "loss": 2.8619, "step": 235080 }, { "epoch": 10.12620062884955, "learning_rate": 4.3057919800038777e-07, "loss": 2.6747, "step": 235100 }, { "epoch": 10.127062066589136, "learning_rate": 4.3053071602490894e-07, "loss": 2.85, "step": 235120 }, { "epoch": 10.127923504328725, "learning_rate": 4.3048223404943e-07, "loss": 2.6745, "step": 235140 }, { "epoch": 10.128784942068313, "learning_rate": 4.3043375207395114e-07, "loss": 2.7043, "step": 235160 }, { "epoch": 10.1296463798079, "learning_rate": 4.303852700984722e-07, "loss": 2.7826, "step": 235180 }, { "epoch": 10.130507817547487, "learning_rate": 4.303367881229934e-07, "loss": 2.736, "step": 235200 }, { "epoch": 10.131369255287074, "learning_rate": 4.3028830614751446e-07, "loss": 2.6693, "step": 235220 }, { "epoch": 10.132230693026662, "learning_rate": 4.302398241720356e-07, "loss": 2.8071, "step": 235240 }, { "epoch": 10.133092130766249, "learning_rate": 4.3019134219655666e-07, "loss": 2.7331, "step": 235260 }, { "epoch": 10.133953568505836, "learning_rate": 4.3014286022107783e-07, "loss": 2.6406, "step": 235280 }, { "epoch": 10.134815006245423, "learning_rate": 4.3009437824559885e-07, "loss": 2.6701, "step": 235300 }, { "epoch": 10.13567644398501, "learning_rate": 4.300458962701201e-07, "loss": 2.6395, "step": 235320 }, { "epoch": 10.136537881724598, "learning_rate": 4.299974142946411e-07, "loss": 2.6618, "step": 235340 }, { "epoch": 10.137399319464185, "learning_rate": 4.299489323191622e-07, "loss": 2.7274, "step": 235360 }, { "epoch": 10.138260757203772, "learning_rate": 4.299004503436833e-07, "loss": 2.7476, "step": 235380 }, { "epoch": 10.13912219494336, "learning_rate": 4.298519683682045e-07, "loss": 2.6348, "step": 235400 }, { "epoch": 10.139983632682949, "learning_rate": 4.298034863927255e-07, "loss": 2.6986, "step": 235420 }, { "epoch": 10.140845070422536, "learning_rate": 4.2975500441724667e-07, "loss": 2.7733, "step": 235440 }, { "epoch": 10.141706508162123, "learning_rate": 4.2970652244176774e-07, "loss": 2.7101, "step": 235460 }, { "epoch": 10.14256794590171, "learning_rate": 4.2965804046628887e-07, "loss": 2.7993, "step": 235480 }, { "epoch": 10.143429383641298, "learning_rate": 4.2960955849080994e-07, "loss": 2.6913, "step": 235500 }, { "epoch": 10.144290821380885, "learning_rate": 4.295610765153311e-07, "loss": 2.7281, "step": 235520 }, { "epoch": 10.145152259120472, "learning_rate": 4.2951259453985213e-07, "loss": 2.717, "step": 235540 }, { "epoch": 10.14601369686006, "learning_rate": 4.294641125643733e-07, "loss": 2.6913, "step": 235560 }, { "epoch": 10.146875134599647, "learning_rate": 4.294156305888944e-07, "loss": 2.7638, "step": 235580 }, { "epoch": 10.147736572339234, "learning_rate": 4.293671486134155e-07, "loss": 2.7439, "step": 235600 }, { "epoch": 10.148598010078821, "learning_rate": 4.293186666379366e-07, "loss": 2.7637, "step": 235620 }, { "epoch": 10.149459447818408, "learning_rate": 4.2927018466245765e-07, "loss": 2.73, "step": 235640 }, { "epoch": 10.150320885557996, "learning_rate": 4.2922170268697883e-07, "loss": 2.6166, "step": 235660 }, { "epoch": 10.151182323297583, "learning_rate": 4.2917322071149985e-07, "loss": 2.8379, "step": 235680 }, { "epoch": 10.152043761037172, "learning_rate": 4.29124738736021e-07, "loss": 2.7107, "step": 235700 }, { "epoch": 10.15290519877676, "learning_rate": 4.290762567605421e-07, "loss": 2.5572, "step": 235720 }, { "epoch": 10.153766636516346, "learning_rate": 4.2902777478506327e-07, "loss": 2.694, "step": 235740 }, { "epoch": 10.154628074255934, "learning_rate": 4.289792928095843e-07, "loss": 2.7585, "step": 235760 }, { "epoch": 10.15548951199552, "learning_rate": 4.2893081083410547e-07, "loss": 2.8107, "step": 235780 }, { "epoch": 10.156350949735108, "learning_rate": 4.288823288586265e-07, "loss": 2.6571, "step": 235800 }, { "epoch": 10.157212387474695, "learning_rate": 4.2883384688314766e-07, "loss": 2.6744, "step": 235820 }, { "epoch": 10.158073825214283, "learning_rate": 4.2878536490766874e-07, "loss": 2.7439, "step": 235840 }, { "epoch": 10.15893526295387, "learning_rate": 4.2873688293218986e-07, "loss": 2.6749, "step": 235860 }, { "epoch": 10.159796700693457, "learning_rate": 4.2868840095671093e-07, "loss": 2.7292, "step": 235880 }, { "epoch": 10.160658138433044, "learning_rate": 4.286399189812321e-07, "loss": 2.7218, "step": 235900 }, { "epoch": 10.161519576172632, "learning_rate": 4.285914370057532e-07, "loss": 2.7372, "step": 235920 }, { "epoch": 10.162381013912219, "learning_rate": 4.285429550302743e-07, "loss": 2.7821, "step": 235940 }, { "epoch": 10.163242451651806, "learning_rate": 4.284944730547954e-07, "loss": 2.7652, "step": 235960 }, { "epoch": 10.164103889391395, "learning_rate": 4.284459910793165e-07, "loss": 2.8439, "step": 235980 }, { "epoch": 10.164965327130982, "learning_rate": 4.2839750910383757e-07, "loss": 2.6719, "step": 236000 }, { "epoch": 10.16582676487057, "learning_rate": 4.2834902712835875e-07, "loss": 2.6264, "step": 236020 }, { "epoch": 10.166688202610157, "learning_rate": 4.283005451528798e-07, "loss": 2.7847, "step": 236040 }, { "epoch": 10.167549640349744, "learning_rate": 4.2825206317740095e-07, "loss": 2.8013, "step": 236060 }, { "epoch": 10.168411078089331, "learning_rate": 4.28203581201922e-07, "loss": 2.6943, "step": 236080 }, { "epoch": 10.169272515828919, "learning_rate": 4.281550992264432e-07, "loss": 2.7968, "step": 236100 }, { "epoch": 10.170133953568506, "learning_rate": 4.281066172509642e-07, "loss": 2.6838, "step": 236120 }, { "epoch": 10.170995391308093, "learning_rate": 4.280581352754854e-07, "loss": 2.6997, "step": 236140 }, { "epoch": 10.17185682904768, "learning_rate": 4.280096533000064e-07, "loss": 2.5769, "step": 236160 }, { "epoch": 10.172718266787268, "learning_rate": 4.279611713245275e-07, "loss": 2.7788, "step": 236180 }, { "epoch": 10.173579704526855, "learning_rate": 4.2791268934904866e-07, "loss": 2.6494, "step": 236200 }, { "epoch": 10.174441142266442, "learning_rate": 4.2786420737356973e-07, "loss": 2.8399, "step": 236220 }, { "epoch": 10.17530258000603, "learning_rate": 4.2781572539809085e-07, "loss": 2.8589, "step": 236240 }, { "epoch": 10.176164017745618, "learning_rate": 4.277672434226119e-07, "loss": 2.8967, "step": 236260 }, { "epoch": 10.177025455485206, "learning_rate": 4.277187614471331e-07, "loss": 2.7168, "step": 236280 }, { "epoch": 10.177886893224793, "learning_rate": 4.276702794716542e-07, "loss": 2.7743, "step": 236300 }, { "epoch": 10.17874833096438, "learning_rate": 4.276217974961753e-07, "loss": 2.6319, "step": 236320 }, { "epoch": 10.179609768703967, "learning_rate": 4.2757331552069637e-07, "loss": 2.8287, "step": 236340 }, { "epoch": 10.180471206443555, "learning_rate": 4.2752483354521755e-07, "loss": 2.8522, "step": 236360 }, { "epoch": 10.181332644183142, "learning_rate": 4.2747635156973857e-07, "loss": 2.6662, "step": 236380 }, { "epoch": 10.182194081922729, "learning_rate": 4.2742786959425974e-07, "loss": 2.6742, "step": 236400 }, { "epoch": 10.183055519662316, "learning_rate": 4.273793876187808e-07, "loss": 2.8923, "step": 236420 }, { "epoch": 10.183916957401904, "learning_rate": 4.2733090564330194e-07, "loss": 2.8241, "step": 236440 }, { "epoch": 10.18477839514149, "learning_rate": 4.27282423667823e-07, "loss": 2.6736, "step": 236460 }, { "epoch": 10.185639832881078, "learning_rate": 4.272339416923442e-07, "loss": 2.6149, "step": 236480 }, { "epoch": 10.186501270620665, "learning_rate": 4.271854597168652e-07, "loss": 2.8253, "step": 236500 }, { "epoch": 10.187362708360252, "learning_rate": 4.271369777413864e-07, "loss": 2.7735, "step": 236520 }, { "epoch": 10.188224146099842, "learning_rate": 4.2708849576590746e-07, "loss": 2.7163, "step": 236540 }, { "epoch": 10.189085583839429, "learning_rate": 4.270400137904286e-07, "loss": 2.6441, "step": 236560 }, { "epoch": 10.189947021579016, "learning_rate": 4.269915318149496e-07, "loss": 2.6054, "step": 236580 }, { "epoch": 10.190808459318603, "learning_rate": 4.2694304983947083e-07, "loss": 2.9061, "step": 236600 }, { "epoch": 10.19166989705819, "learning_rate": 4.2689456786399185e-07, "loss": 2.7978, "step": 236620 }, { "epoch": 10.192531334797778, "learning_rate": 4.26846085888513e-07, "loss": 2.6875, "step": 236640 }, { "epoch": 10.193392772537365, "learning_rate": 4.267976039130341e-07, "loss": 2.739, "step": 236660 }, { "epoch": 10.194254210276952, "learning_rate": 4.267491219375552e-07, "loss": 2.7167, "step": 236680 }, { "epoch": 10.19511564801654, "learning_rate": 4.267006399620763e-07, "loss": 2.7461, "step": 236700 }, { "epoch": 10.195977085756127, "learning_rate": 4.2665215798659736e-07, "loss": 2.8194, "step": 236720 }, { "epoch": 10.196838523495714, "learning_rate": 4.2660367601111854e-07, "loss": 2.5267, "step": 236740 }, { "epoch": 10.197699961235301, "learning_rate": 4.2655519403563956e-07, "loss": 2.7588, "step": 236760 }, { "epoch": 10.198561398974888, "learning_rate": 4.2650671206016074e-07, "loss": 2.7631, "step": 236780 }, { "epoch": 10.199422836714476, "learning_rate": 4.264582300846818e-07, "loss": 2.6696, "step": 236800 }, { "epoch": 10.200284274454063, "learning_rate": 4.2640974810920293e-07, "loss": 2.7617, "step": 236820 }, { "epoch": 10.201145712193652, "learning_rate": 4.26361266133724e-07, "loss": 2.726, "step": 236840 }, { "epoch": 10.20200714993324, "learning_rate": 4.263127841582452e-07, "loss": 2.8623, "step": 236860 }, { "epoch": 10.202868587672826, "learning_rate": 4.262643021827662e-07, "loss": 2.7234, "step": 236880 }, { "epoch": 10.203730025412414, "learning_rate": 4.262158202072874e-07, "loss": 2.7692, "step": 236900 }, { "epoch": 10.204591463152001, "learning_rate": 4.2616733823180845e-07, "loss": 2.6683, "step": 236920 }, { "epoch": 10.205452900891588, "learning_rate": 4.2611885625632957e-07, "loss": 2.7187, "step": 236940 }, { "epoch": 10.206314338631175, "learning_rate": 4.2607037428085064e-07, "loss": 2.7769, "step": 236960 }, { "epoch": 10.207175776370763, "learning_rate": 4.260218923053718e-07, "loss": 2.6797, "step": 236980 }, { "epoch": 10.20803721411035, "learning_rate": 4.259734103298929e-07, "loss": 2.6933, "step": 237000 }, { "epoch": 10.208898651849937, "learning_rate": 4.25924928354414e-07, "loss": 2.6775, "step": 237020 }, { "epoch": 10.209760089589524, "learning_rate": 4.258764463789351e-07, "loss": 2.6683, "step": 237040 }, { "epoch": 10.210621527329112, "learning_rate": 4.2582796440345627e-07, "loss": 2.8644, "step": 237060 }, { "epoch": 10.211482965068699, "learning_rate": 4.257794824279773e-07, "loss": 2.6605, "step": 237080 }, { "epoch": 10.212344402808288, "learning_rate": 4.257310004524985e-07, "loss": 2.6446, "step": 237100 }, { "epoch": 10.213205840547875, "learning_rate": 4.2568251847701953e-07, "loss": 2.5867, "step": 237120 }, { "epoch": 10.214067278287462, "learning_rate": 4.2563403650154066e-07, "loss": 2.7821, "step": 237140 }, { "epoch": 10.21492871602705, "learning_rate": 4.2558555452606173e-07, "loss": 2.7165, "step": 237160 }, { "epoch": 10.215790153766637, "learning_rate": 4.255370725505829e-07, "loss": 2.7909, "step": 237180 }, { "epoch": 10.216651591506224, "learning_rate": 4.254885905751039e-07, "loss": 2.6505, "step": 237200 }, { "epoch": 10.217513029245811, "learning_rate": 4.254401085996251e-07, "loss": 2.7775, "step": 237220 }, { "epoch": 10.218374466985399, "learning_rate": 4.253916266241462e-07, "loss": 2.8393, "step": 237240 }, { "epoch": 10.219235904724986, "learning_rate": 4.253431446486672e-07, "loss": 2.6272, "step": 237260 }, { "epoch": 10.220097342464573, "learning_rate": 4.2529466267318837e-07, "loss": 2.6569, "step": 237280 }, { "epoch": 10.22095878020416, "learning_rate": 4.2524618069770944e-07, "loss": 2.6837, "step": 237300 }, { "epoch": 10.221820217943748, "learning_rate": 4.2519769872223057e-07, "loss": 2.7375, "step": 237320 }, { "epoch": 10.222681655683335, "learning_rate": 4.2514921674675164e-07, "loss": 2.653, "step": 237340 }, { "epoch": 10.223543093422922, "learning_rate": 4.251007347712728e-07, "loss": 2.5177, "step": 237360 }, { "epoch": 10.22440453116251, "learning_rate": 4.250522527957939e-07, "loss": 2.7613, "step": 237380 }, { "epoch": 10.225265968902098, "learning_rate": 4.25003770820315e-07, "loss": 2.7773, "step": 237400 }, { "epoch": 10.226127406641686, "learning_rate": 4.249552888448361e-07, "loss": 2.6988, "step": 237420 }, { "epoch": 10.226988844381273, "learning_rate": 4.2490680686935726e-07, "loss": 2.6198, "step": 237440 }, { "epoch": 10.22785028212086, "learning_rate": 4.248583248938783e-07, "loss": 2.5947, "step": 237460 }, { "epoch": 10.228711719860447, "learning_rate": 4.2480984291839946e-07, "loss": 2.796, "step": 237480 }, { "epoch": 10.229573157600035, "learning_rate": 4.2476136094292053e-07, "loss": 2.6244, "step": 237500 }, { "epoch": 10.230434595339622, "learning_rate": 4.247128789674417e-07, "loss": 2.665, "step": 237520 }, { "epoch": 10.231296033079209, "learning_rate": 4.246643969919627e-07, "loss": 2.648, "step": 237540 }, { "epoch": 10.232157470818796, "learning_rate": 4.246159150164839e-07, "loss": 2.8089, "step": 237560 }, { "epoch": 10.233018908558384, "learning_rate": 4.245674330410049e-07, "loss": 2.6438, "step": 237580 }, { "epoch": 10.23388034629797, "learning_rate": 4.245189510655261e-07, "loss": 2.6655, "step": 237600 }, { "epoch": 10.234741784037558, "learning_rate": 4.2447046909004717e-07, "loss": 2.7351, "step": 237620 }, { "epoch": 10.235603221777145, "learning_rate": 4.244219871145683e-07, "loss": 2.685, "step": 237640 }, { "epoch": 10.236464659516733, "learning_rate": 4.2437350513908936e-07, "loss": 2.7698, "step": 237660 }, { "epoch": 10.237326097256322, "learning_rate": 4.2432502316361054e-07, "loss": 2.6844, "step": 237680 }, { "epoch": 10.238187534995909, "learning_rate": 4.242765411881316e-07, "loss": 2.7511, "step": 237700 }, { "epoch": 10.239048972735496, "learning_rate": 4.2422805921265274e-07, "loss": 2.7576, "step": 237720 }, { "epoch": 10.239910410475083, "learning_rate": 4.241795772371738e-07, "loss": 2.6258, "step": 237740 }, { "epoch": 10.24077184821467, "learning_rate": 4.2413109526169493e-07, "loss": 2.784, "step": 237760 }, { "epoch": 10.241633285954258, "learning_rate": 4.24082613286216e-07, "loss": 2.6417, "step": 237780 }, { "epoch": 10.242494723693845, "learning_rate": 4.240341313107371e-07, "loss": 2.661, "step": 237800 }, { "epoch": 10.243356161433432, "learning_rate": 4.2398564933525825e-07, "loss": 2.8966, "step": 237820 }, { "epoch": 10.24421759917302, "learning_rate": 4.2393716735977927e-07, "loss": 2.8227, "step": 237840 }, { "epoch": 10.245079036912607, "learning_rate": 4.2388868538430045e-07, "loss": 2.6246, "step": 237860 }, { "epoch": 10.245940474652194, "learning_rate": 4.238402034088215e-07, "loss": 2.8172, "step": 237880 }, { "epoch": 10.246801912391781, "learning_rate": 4.2379172143334265e-07, "loss": 2.8408, "step": 237900 }, { "epoch": 10.247663350131369, "learning_rate": 4.237432394578637e-07, "loss": 2.7499, "step": 237920 }, { "epoch": 10.248524787870956, "learning_rate": 4.2369475748238484e-07, "loss": 2.7672, "step": 237940 }, { "epoch": 10.249386225610545, "learning_rate": 4.236462755069059e-07, "loss": 2.6401, "step": 237960 }, { "epoch": 10.250247663350132, "learning_rate": 4.235977935314271e-07, "loss": 2.9037, "step": 237980 }, { "epoch": 10.25110910108972, "learning_rate": 4.2354931155594816e-07, "loss": 2.5941, "step": 238000 }, { "epoch": 10.251970538829307, "learning_rate": 4.235008295804693e-07, "loss": 2.7517, "step": 238020 }, { "epoch": 10.252831976568894, "learning_rate": 4.2345234760499036e-07, "loss": 2.7605, "step": 238040 }, { "epoch": 10.253693414308481, "learning_rate": 4.2340386562951154e-07, "loss": 2.6672, "step": 238060 }, { "epoch": 10.254554852048068, "learning_rate": 4.233553836540326e-07, "loss": 2.7447, "step": 238080 }, { "epoch": 10.255416289787656, "learning_rate": 4.2330690167855373e-07, "loss": 2.8843, "step": 238100 }, { "epoch": 10.256277727527243, "learning_rate": 4.232584197030748e-07, "loss": 2.6501, "step": 238120 }, { "epoch": 10.25713916526683, "learning_rate": 4.23209937727596e-07, "loss": 2.7445, "step": 238140 }, { "epoch": 10.258000603006417, "learning_rate": 4.23161455752117e-07, "loss": 2.5849, "step": 238160 }, { "epoch": 10.258862040746004, "learning_rate": 4.231129737766382e-07, "loss": 2.7438, "step": 238180 }, { "epoch": 10.259723478485592, "learning_rate": 4.2306449180115925e-07, "loss": 2.647, "step": 238200 }, { "epoch": 10.260584916225179, "learning_rate": 4.2301600982568037e-07, "loss": 2.672, "step": 238220 }, { "epoch": 10.261446353964768, "learning_rate": 4.2296752785020144e-07, "loss": 2.6635, "step": 238240 }, { "epoch": 10.262307791704355, "learning_rate": 4.229190458747226e-07, "loss": 2.6759, "step": 238260 }, { "epoch": 10.263169229443942, "learning_rate": 4.2287056389924364e-07, "loss": 2.7626, "step": 238280 }, { "epoch": 10.26403066718353, "learning_rate": 4.228220819237648e-07, "loss": 2.5946, "step": 238300 }, { "epoch": 10.264892104923117, "learning_rate": 4.227735999482859e-07, "loss": 2.6959, "step": 238320 }, { "epoch": 10.265753542662704, "learning_rate": 4.227251179728069e-07, "loss": 2.8096, "step": 238340 }, { "epoch": 10.266614980402291, "learning_rate": 4.2267663599732803e-07, "loss": 2.8333, "step": 238360 }, { "epoch": 10.267476418141879, "learning_rate": 4.2262815402184916e-07, "loss": 2.7044, "step": 238380 }, { "epoch": 10.268337855881466, "learning_rate": 4.225796720463703e-07, "loss": 2.6821, "step": 238400 }, { "epoch": 10.269199293621053, "learning_rate": 4.2253119007089135e-07, "loss": 2.8375, "step": 238420 }, { "epoch": 10.27006073136064, "learning_rate": 4.2248270809541253e-07, "loss": 2.7435, "step": 238440 }, { "epoch": 10.270922169100228, "learning_rate": 4.224342261199336e-07, "loss": 2.7913, "step": 238460 }, { "epoch": 10.271783606839815, "learning_rate": 4.223857441444547e-07, "loss": 2.7772, "step": 238480 }, { "epoch": 10.272645044579402, "learning_rate": 4.223372621689758e-07, "loss": 2.6101, "step": 238500 }, { "epoch": 10.273506482318991, "learning_rate": 4.22288780193497e-07, "loss": 2.6701, "step": 238520 }, { "epoch": 10.274367920058578, "learning_rate": 4.22240298218018e-07, "loss": 2.9081, "step": 238540 }, { "epoch": 10.275229357798166, "learning_rate": 4.2219181624253917e-07, "loss": 2.7445, "step": 238560 }, { "epoch": 10.276090795537753, "learning_rate": 4.2214333426706024e-07, "loss": 2.774, "step": 238580 }, { "epoch": 10.27695223327734, "learning_rate": 4.2209485229158137e-07, "loss": 2.6653, "step": 238600 }, { "epoch": 10.277813671016927, "learning_rate": 4.2204637031610244e-07, "loss": 2.5808, "step": 238620 }, { "epoch": 10.278675108756515, "learning_rate": 4.219978883406236e-07, "loss": 2.7587, "step": 238640 }, { "epoch": 10.279536546496102, "learning_rate": 4.2194940636514463e-07, "loss": 2.6612, "step": 238660 }, { "epoch": 10.28039798423569, "learning_rate": 4.219009243896658e-07, "loss": 2.7094, "step": 238680 }, { "epoch": 10.281259421975276, "learning_rate": 4.218524424141869e-07, "loss": 2.835, "step": 238700 }, { "epoch": 10.282120859714864, "learning_rate": 4.21803960438708e-07, "loss": 2.8281, "step": 238720 }, { "epoch": 10.282982297454451, "learning_rate": 4.217554784632291e-07, "loss": 2.7648, "step": 238740 }, { "epoch": 10.283843735194038, "learning_rate": 4.2170699648775026e-07, "loss": 2.7684, "step": 238760 }, { "epoch": 10.284705172933625, "learning_rate": 4.2165851451227133e-07, "loss": 2.9308, "step": 238780 }, { "epoch": 10.285566610673214, "learning_rate": 4.2161003253679245e-07, "loss": 2.6397, "step": 238800 }, { "epoch": 10.286428048412802, "learning_rate": 4.215615505613135e-07, "loss": 2.6449, "step": 238820 }, { "epoch": 10.287289486152389, "learning_rate": 4.215130685858347e-07, "loss": 2.5978, "step": 238840 }, { "epoch": 10.288150923891976, "learning_rate": 4.214645866103557e-07, "loss": 2.7557, "step": 238860 }, { "epoch": 10.289012361631563, "learning_rate": 4.214161046348768e-07, "loss": 2.5845, "step": 238880 }, { "epoch": 10.28987379937115, "learning_rate": 4.2136762265939797e-07, "loss": 2.7624, "step": 238900 }, { "epoch": 10.290735237110738, "learning_rate": 4.21319140683919e-07, "loss": 2.7894, "step": 238920 }, { "epoch": 10.291596674850325, "learning_rate": 4.2127065870844016e-07, "loss": 2.7407, "step": 238940 }, { "epoch": 10.292458112589912, "learning_rate": 4.2122217673296123e-07, "loss": 2.6433, "step": 238960 }, { "epoch": 10.2933195503295, "learning_rate": 4.2117369475748236e-07, "loss": 2.6204, "step": 238980 }, { "epoch": 10.294180988069087, "learning_rate": 4.2112521278200343e-07, "loss": 2.8115, "step": 239000 }, { "epoch": 10.295042425808674, "learning_rate": 4.210767308065246e-07, "loss": 2.7236, "step": 239020 }, { "epoch": 10.295903863548261, "learning_rate": 4.2102824883104563e-07, "loss": 2.7157, "step": 239040 }, { "epoch": 10.296765301287849, "learning_rate": 4.209797668555668e-07, "loss": 2.645, "step": 239060 }, { "epoch": 10.297626739027438, "learning_rate": 4.209312848800879e-07, "loss": 2.7174, "step": 239080 }, { "epoch": 10.298488176767025, "learning_rate": 4.20882802904609e-07, "loss": 2.4616, "step": 239100 }, { "epoch": 10.299349614506612, "learning_rate": 4.2083432092913007e-07, "loss": 2.7281, "step": 239120 }, { "epoch": 10.3002110522462, "learning_rate": 4.2078583895365125e-07, "loss": 2.8163, "step": 239140 }, { "epoch": 10.301072489985787, "learning_rate": 4.207373569781723e-07, "loss": 2.6816, "step": 239160 }, { "epoch": 10.301933927725374, "learning_rate": 4.2068887500269344e-07, "loss": 2.7182, "step": 239180 }, { "epoch": 10.302795365464961, "learning_rate": 4.206403930272145e-07, "loss": 2.668, "step": 239200 }, { "epoch": 10.303656803204548, "learning_rate": 4.205919110517357e-07, "loss": 2.7394, "step": 239220 }, { "epoch": 10.304518240944136, "learning_rate": 4.205434290762567e-07, "loss": 2.7768, "step": 239240 }, { "epoch": 10.305379678683723, "learning_rate": 4.204949471007779e-07, "loss": 2.5834, "step": 239260 }, { "epoch": 10.30624111642331, "learning_rate": 4.2044646512529896e-07, "loss": 2.7463, "step": 239280 }, { "epoch": 10.307102554162897, "learning_rate": 4.2039798314982014e-07, "loss": 2.8267, "step": 239300 }, { "epoch": 10.307963991902485, "learning_rate": 4.2034950117434116e-07, "loss": 2.7263, "step": 239320 }, { "epoch": 10.308825429642072, "learning_rate": 4.2030101919886233e-07, "loss": 2.7779, "step": 239340 }, { "epoch": 10.30968686738166, "learning_rate": 4.2025253722338335e-07, "loss": 2.6972, "step": 239360 }, { "epoch": 10.310548305121248, "learning_rate": 4.2020405524790453e-07, "loss": 2.8002, "step": 239380 }, { "epoch": 10.311409742860835, "learning_rate": 4.201555732724256e-07, "loss": 2.7197, "step": 239400 }, { "epoch": 10.312271180600423, "learning_rate": 4.2010709129694667e-07, "loss": 2.7259, "step": 239420 }, { "epoch": 10.31313261834001, "learning_rate": 4.200586093214678e-07, "loss": 2.6876, "step": 239440 }, { "epoch": 10.313994056079597, "learning_rate": 4.200101273459889e-07, "loss": 2.5693, "step": 239460 }, { "epoch": 10.314855493819184, "learning_rate": 4.1996164537051005e-07, "loss": 2.6567, "step": 239480 }, { "epoch": 10.315716931558772, "learning_rate": 4.1991316339503107e-07, "loss": 2.8098, "step": 239500 }, { "epoch": 10.316578369298359, "learning_rate": 4.1986468141955224e-07, "loss": 2.6968, "step": 239520 }, { "epoch": 10.317439807037946, "learning_rate": 4.198161994440733e-07, "loss": 2.804, "step": 239540 }, { "epoch": 10.318301244777533, "learning_rate": 4.1976771746859444e-07, "loss": 2.7836, "step": 239560 }, { "epoch": 10.31916268251712, "learning_rate": 4.197192354931155e-07, "loss": 2.7852, "step": 239580 }, { "epoch": 10.320024120256708, "learning_rate": 4.196707535176367e-07, "loss": 2.7026, "step": 239600 }, { "epoch": 10.320885557996295, "learning_rate": 4.196222715421577e-07, "loss": 2.6644, "step": 239620 }, { "epoch": 10.321746995735882, "learning_rate": 4.195737895666789e-07, "loss": 2.5679, "step": 239640 }, { "epoch": 10.322608433475471, "learning_rate": 4.1952530759119995e-07, "loss": 2.712, "step": 239660 }, { "epoch": 10.323469871215059, "learning_rate": 4.194768256157211e-07, "loss": 2.7454, "step": 239680 }, { "epoch": 10.324331308954646, "learning_rate": 4.1942834364024215e-07, "loss": 2.8028, "step": 239700 }, { "epoch": 10.325192746694233, "learning_rate": 4.193798616647633e-07, "loss": 2.7481, "step": 239720 }, { "epoch": 10.32605418443382, "learning_rate": 4.1933137968928435e-07, "loss": 2.8263, "step": 239740 }, { "epoch": 10.326915622173408, "learning_rate": 4.192828977138055e-07, "loss": 2.8311, "step": 239760 }, { "epoch": 10.327777059912995, "learning_rate": 4.192344157383266e-07, "loss": 2.7306, "step": 239780 }, { "epoch": 10.328638497652582, "learning_rate": 4.191859337628477e-07, "loss": 2.7676, "step": 239800 }, { "epoch": 10.32949993539217, "learning_rate": 4.191374517873688e-07, "loss": 2.7976, "step": 239820 }, { "epoch": 10.330361373131757, "learning_rate": 4.1908896981188997e-07, "loss": 2.7643, "step": 239840 }, { "epoch": 10.331222810871344, "learning_rate": 4.1904048783641104e-07, "loss": 2.8516, "step": 239860 }, { "epoch": 10.332084248610931, "learning_rate": 4.1899200586093216e-07, "loss": 2.8845, "step": 239880 }, { "epoch": 10.332945686350518, "learning_rate": 4.1894352388545324e-07, "loss": 2.7571, "step": 239900 }, { "epoch": 10.333807124090107, "learning_rate": 4.188950419099744e-07, "loss": 2.8223, "step": 239920 }, { "epoch": 10.334668561829695, "learning_rate": 4.1884655993449543e-07, "loss": 2.8767, "step": 239940 }, { "epoch": 10.335529999569282, "learning_rate": 4.187980779590165e-07, "loss": 2.7964, "step": 239960 }, { "epoch": 10.336391437308869, "learning_rate": 4.187495959835377e-07, "loss": 2.5461, "step": 239980 }, { "epoch": 10.337252875048456, "learning_rate": 4.187011140080587e-07, "loss": 2.6187, "step": 240000 }, { "epoch": 10.338114312788043, "learning_rate": 4.186526320325799e-07, "loss": 2.6227, "step": 240020 }, { "epoch": 10.33897575052763, "learning_rate": 4.1860415005710095e-07, "loss": 2.59, "step": 240040 }, { "epoch": 10.339837188267218, "learning_rate": 4.1855566808162207e-07, "loss": 2.7605, "step": 240060 }, { "epoch": 10.340698626006805, "learning_rate": 4.1850718610614314e-07, "loss": 2.8958, "step": 240080 }, { "epoch": 10.341560063746392, "learning_rate": 4.184587041306643e-07, "loss": 2.8473, "step": 240100 }, { "epoch": 10.34242150148598, "learning_rate": 4.1841022215518534e-07, "loss": 2.6949, "step": 240120 }, { "epoch": 10.343282939225567, "learning_rate": 4.1836174017970646e-07, "loss": 2.8028, "step": 240140 }, { "epoch": 10.344144376965154, "learning_rate": 4.183132582042276e-07, "loss": 2.6857, "step": 240160 }, { "epoch": 10.345005814704741, "learning_rate": 4.182647762287487e-07, "loss": 2.8328, "step": 240180 }, { "epoch": 10.345867252444329, "learning_rate": 4.182162942532698e-07, "loss": 2.6733, "step": 240200 }, { "epoch": 10.346728690183918, "learning_rate": 4.1816781227779096e-07, "loss": 2.6818, "step": 240220 }, { "epoch": 10.347590127923505, "learning_rate": 4.1811933030231203e-07, "loss": 2.7516, "step": 240240 }, { "epoch": 10.348451565663092, "learning_rate": 4.1807084832683316e-07, "loss": 2.7548, "step": 240260 }, { "epoch": 10.34931300340268, "learning_rate": 4.1802236635135423e-07, "loss": 2.8873, "step": 240280 }, { "epoch": 10.350174441142267, "learning_rate": 4.179738843758754e-07, "loss": 2.7693, "step": 240300 }, { "epoch": 10.351035878881854, "learning_rate": 4.179254024003964e-07, "loss": 2.6307, "step": 240320 }, { "epoch": 10.351897316621441, "learning_rate": 4.178769204249176e-07, "loss": 2.7293, "step": 240340 }, { "epoch": 10.352758754361028, "learning_rate": 4.178284384494387e-07, "loss": 2.8753, "step": 240360 }, { "epoch": 10.353620192100616, "learning_rate": 4.177799564739598e-07, "loss": 2.6822, "step": 240380 }, { "epoch": 10.354481629840203, "learning_rate": 4.1773147449848087e-07, "loss": 2.8325, "step": 240400 }, { "epoch": 10.35534306757979, "learning_rate": 4.1768299252300205e-07, "loss": 2.6278, "step": 240420 }, { "epoch": 10.356204505319377, "learning_rate": 4.1763451054752307e-07, "loss": 2.7377, "step": 240440 }, { "epoch": 10.357065943058965, "learning_rate": 4.1758602857204424e-07, "loss": 3.0039, "step": 240460 }, { "epoch": 10.357927380798554, "learning_rate": 4.175375465965653e-07, "loss": 2.7882, "step": 240480 }, { "epoch": 10.358788818538141, "learning_rate": 4.174890646210864e-07, "loss": 2.7133, "step": 240500 }, { "epoch": 10.359650256277728, "learning_rate": 4.174405826456075e-07, "loss": 2.6893, "step": 240520 }, { "epoch": 10.360511694017315, "learning_rate": 4.173921006701286e-07, "loss": 2.7598, "step": 240540 }, { "epoch": 10.361373131756903, "learning_rate": 4.1734361869464976e-07, "loss": 2.8027, "step": 240560 }, { "epoch": 10.36223456949649, "learning_rate": 4.172951367191708e-07, "loss": 2.834, "step": 240580 }, { "epoch": 10.363096007236077, "learning_rate": 4.1724665474369196e-07, "loss": 2.6183, "step": 240600 }, { "epoch": 10.363957444975664, "learning_rate": 4.1719817276821303e-07, "loss": 2.695, "step": 240620 }, { "epoch": 10.364818882715252, "learning_rate": 4.1714969079273415e-07, "loss": 2.791, "step": 240640 }, { "epoch": 10.365680320454839, "learning_rate": 4.171012088172552e-07, "loss": 2.6879, "step": 240660 }, { "epoch": 10.366541758194426, "learning_rate": 4.170527268417764e-07, "loss": 2.9086, "step": 240680 }, { "epoch": 10.367403195934013, "learning_rate": 4.170042448662974e-07, "loss": 2.6636, "step": 240700 }, { "epoch": 10.3682646336736, "learning_rate": 4.169557628908186e-07, "loss": 2.6825, "step": 240720 }, { "epoch": 10.369126071413188, "learning_rate": 4.1690728091533967e-07, "loss": 2.8097, "step": 240740 }, { "epoch": 10.369987509152775, "learning_rate": 4.168587989398608e-07, "loss": 2.5656, "step": 240760 }, { "epoch": 10.370848946892364, "learning_rate": 4.1681031696438186e-07, "loss": 2.8078, "step": 240780 }, { "epoch": 10.371710384631951, "learning_rate": 4.1676183498890304e-07, "loss": 2.8529, "step": 240800 }, { "epoch": 10.372571822371539, "learning_rate": 4.1671335301342406e-07, "loss": 2.7072, "step": 240820 }, { "epoch": 10.373433260111126, "learning_rate": 4.1666487103794524e-07, "loss": 2.7549, "step": 240840 }, { "epoch": 10.374294697850713, "learning_rate": 4.166163890624663e-07, "loss": 2.6892, "step": 240860 }, { "epoch": 10.3751561355903, "learning_rate": 4.1656790708698743e-07, "loss": 2.824, "step": 240880 }, { "epoch": 10.376017573329888, "learning_rate": 4.165194251115085e-07, "loss": 2.7393, "step": 240900 }, { "epoch": 10.376879011069475, "learning_rate": 4.164709431360297e-07, "loss": 2.8184, "step": 240920 }, { "epoch": 10.377740448809062, "learning_rate": 4.1642246116055075e-07, "loss": 2.7304, "step": 240940 }, { "epoch": 10.37860188654865, "learning_rate": 4.163739791850719e-07, "loss": 2.7009, "step": 240960 }, { "epoch": 10.379463324288237, "learning_rate": 4.1632549720959295e-07, "loss": 2.7764, "step": 240980 }, { "epoch": 10.380324762027824, "learning_rate": 4.16277015234114e-07, "loss": 2.7895, "step": 241000 }, { "epoch": 10.381186199767411, "learning_rate": 4.1622853325863515e-07, "loss": 2.6754, "step": 241020 }, { "epoch": 10.382047637506998, "learning_rate": 4.161800512831562e-07, "loss": 2.7218, "step": 241040 }, { "epoch": 10.382909075246587, "learning_rate": 4.161315693076774e-07, "loss": 2.7934, "step": 241060 }, { "epoch": 10.383770512986175, "learning_rate": 4.160830873321984e-07, "loss": 2.559, "step": 241080 }, { "epoch": 10.384631950725762, "learning_rate": 4.160346053567196e-07, "loss": 2.6398, "step": 241100 }, { "epoch": 10.385493388465349, "learning_rate": 4.1598612338124066e-07, "loss": 2.7251, "step": 241120 }, { "epoch": 10.386354826204936, "learning_rate": 4.159376414057618e-07, "loss": 2.8396, "step": 241140 }, { "epoch": 10.387216263944524, "learning_rate": 4.1588915943028286e-07, "loss": 2.7702, "step": 241160 }, { "epoch": 10.38807770168411, "learning_rate": 4.1584067745480404e-07, "loss": 2.8014, "step": 241180 }, { "epoch": 10.388939139423698, "learning_rate": 4.157921954793251e-07, "loss": 2.6955, "step": 241200 }, { "epoch": 10.389800577163285, "learning_rate": 4.1574371350384623e-07, "loss": 2.6607, "step": 241220 }, { "epoch": 10.390662014902873, "learning_rate": 4.1569523152836736e-07, "loss": 2.6697, "step": 241240 }, { "epoch": 10.39152345264246, "learning_rate": 4.156467495528885e-07, "loss": 2.8915, "step": 241260 }, { "epoch": 10.392384890382047, "learning_rate": 4.155982675774095e-07, "loss": 2.7716, "step": 241280 }, { "epoch": 10.393246328121634, "learning_rate": 4.155497856019307e-07, "loss": 2.6913, "step": 241300 }, { "epoch": 10.394107765861222, "learning_rate": 4.1550130362645175e-07, "loss": 2.84, "step": 241320 }, { "epoch": 10.39496920360081, "learning_rate": 4.1545282165097287e-07, "loss": 2.7007, "step": 241340 }, { "epoch": 10.395830641340398, "learning_rate": 4.1540433967549394e-07, "loss": 2.8545, "step": 241360 }, { "epoch": 10.396692079079985, "learning_rate": 4.153558577000151e-07, "loss": 2.62, "step": 241380 }, { "epoch": 10.397553516819572, "learning_rate": 4.1530737572453614e-07, "loss": 2.6837, "step": 241400 }, { "epoch": 10.39841495455916, "learning_rate": 4.152588937490573e-07, "loss": 2.839, "step": 241420 }, { "epoch": 10.399276392298747, "learning_rate": 4.152104117735784e-07, "loss": 2.8489, "step": 241440 }, { "epoch": 10.400137830038334, "learning_rate": 4.151619297980995e-07, "loss": 2.7458, "step": 241460 }, { "epoch": 10.400999267777921, "learning_rate": 4.151134478226206e-07, "loss": 2.7536, "step": 241480 }, { "epoch": 10.401860705517509, "learning_rate": 4.150649658471417e-07, "loss": 2.6656, "step": 241500 }, { "epoch": 10.402722143257096, "learning_rate": 4.150164838716628e-07, "loss": 2.6403, "step": 241520 }, { "epoch": 10.403583580996683, "learning_rate": 4.1496800189618385e-07, "loss": 2.7839, "step": 241540 }, { "epoch": 10.40444501873627, "learning_rate": 4.1491951992070503e-07, "loss": 2.6942, "step": 241560 }, { "epoch": 10.405306456475858, "learning_rate": 4.148710379452261e-07, "loss": 2.7122, "step": 241580 }, { "epoch": 10.406167894215445, "learning_rate": 4.148225559697472e-07, "loss": 2.6757, "step": 241600 }, { "epoch": 10.407029331955034, "learning_rate": 4.147740739942683e-07, "loss": 2.738, "step": 241620 }, { "epoch": 10.407890769694621, "learning_rate": 4.1472559201878947e-07, "loss": 2.7018, "step": 241640 }, { "epoch": 10.408752207434208, "learning_rate": 4.146771100433105e-07, "loss": 2.7495, "step": 241660 }, { "epoch": 10.409613645173796, "learning_rate": 4.1462862806783167e-07, "loss": 2.4667, "step": 241680 }, { "epoch": 10.410475082913383, "learning_rate": 4.1458014609235274e-07, "loss": 2.7203, "step": 241700 }, { "epoch": 10.41133652065297, "learning_rate": 4.1453166411687387e-07, "loss": 2.839, "step": 241720 }, { "epoch": 10.412197958392557, "learning_rate": 4.1448318214139494e-07, "loss": 2.7146, "step": 241740 }, { "epoch": 10.413059396132144, "learning_rate": 4.144347001659161e-07, "loss": 2.7673, "step": 241760 }, { "epoch": 10.413920833871732, "learning_rate": 4.1438621819043713e-07, "loss": 2.8578, "step": 241780 }, { "epoch": 10.414782271611319, "learning_rate": 4.143377362149583e-07, "loss": 2.743, "step": 241800 }, { "epoch": 10.415643709350906, "learning_rate": 4.142892542394794e-07, "loss": 2.6676, "step": 241820 }, { "epoch": 10.416505147090493, "learning_rate": 4.142407722640005e-07, "loss": 2.8984, "step": 241840 }, { "epoch": 10.41736658483008, "learning_rate": 4.141922902885216e-07, "loss": 2.7202, "step": 241860 }, { "epoch": 10.418228022569668, "learning_rate": 4.1414380831304275e-07, "loss": 2.8233, "step": 241880 }, { "epoch": 10.419089460309257, "learning_rate": 4.1409532633756377e-07, "loss": 2.7018, "step": 241900 }, { "epoch": 10.419950898048844, "learning_rate": 4.140468443620849e-07, "loss": 2.8156, "step": 241920 }, { "epoch": 10.420812335788431, "learning_rate": 4.13998362386606e-07, "loss": 2.6408, "step": 241940 }, { "epoch": 10.421673773528019, "learning_rate": 4.1394988041112715e-07, "loss": 2.9254, "step": 241960 }, { "epoch": 10.422535211267606, "learning_rate": 4.139013984356482e-07, "loss": 2.5724, "step": 241980 }, { "epoch": 10.423396649007193, "learning_rate": 4.138529164601694e-07, "loss": 2.7481, "step": 242000 }, { "epoch": 10.42425808674678, "learning_rate": 4.1380443448469047e-07, "loss": 2.641, "step": 242020 }, { "epoch": 10.425119524486368, "learning_rate": 4.137559525092116e-07, "loss": 2.8069, "step": 242040 }, { "epoch": 10.425980962225955, "learning_rate": 4.1370747053373266e-07, "loss": 2.8911, "step": 242060 }, { "epoch": 10.426842399965542, "learning_rate": 4.136589885582537e-07, "loss": 2.6756, "step": 242080 }, { "epoch": 10.42770383770513, "learning_rate": 4.1361050658277486e-07, "loss": 2.7046, "step": 242100 }, { "epoch": 10.428565275444717, "learning_rate": 4.1356202460729593e-07, "loss": 2.976, "step": 242120 }, { "epoch": 10.429426713184304, "learning_rate": 4.135135426318171e-07, "loss": 2.7859, "step": 242140 }, { "epoch": 10.430288150923891, "learning_rate": 4.134650606563381e-07, "loss": 2.5922, "step": 242160 }, { "epoch": 10.43114958866348, "learning_rate": 4.134165786808593e-07, "loss": 2.6969, "step": 242180 }, { "epoch": 10.432011026403067, "learning_rate": 4.133680967053804e-07, "loss": 2.5188, "step": 242200 }, { "epoch": 10.432872464142655, "learning_rate": 4.133196147299015e-07, "loss": 2.8282, "step": 242220 }, { "epoch": 10.433733901882242, "learning_rate": 4.1327113275442257e-07, "loss": 2.6584, "step": 242240 }, { "epoch": 10.43459533962183, "learning_rate": 4.1322265077894375e-07, "loss": 2.7135, "step": 242260 }, { "epoch": 10.435456777361416, "learning_rate": 4.131741688034648e-07, "loss": 2.747, "step": 242280 }, { "epoch": 10.436318215101004, "learning_rate": 4.1312568682798594e-07, "loss": 2.6868, "step": 242300 }, { "epoch": 10.437179652840591, "learning_rate": 4.13077204852507e-07, "loss": 2.8851, "step": 242320 }, { "epoch": 10.438041090580178, "learning_rate": 4.130287228770282e-07, "loss": 2.8996, "step": 242340 }, { "epoch": 10.438902528319765, "learning_rate": 4.129802409015492e-07, "loss": 2.6106, "step": 242360 }, { "epoch": 10.439763966059353, "learning_rate": 4.129317589260704e-07, "loss": 2.7284, "step": 242380 }, { "epoch": 10.44062540379894, "learning_rate": 4.1288327695059146e-07, "loss": 2.6931, "step": 242400 }, { "epoch": 10.441486841538527, "learning_rate": 4.128347949751126e-07, "loss": 2.6258, "step": 242420 }, { "epoch": 10.442348279278114, "learning_rate": 4.1278631299963366e-07, "loss": 2.5783, "step": 242440 }, { "epoch": 10.443209717017703, "learning_rate": 4.1273783102415483e-07, "loss": 2.7085, "step": 242460 }, { "epoch": 10.44407115475729, "learning_rate": 4.1268934904867585e-07, "loss": 2.7755, "step": 242480 }, { "epoch": 10.444932592496878, "learning_rate": 4.1264086707319703e-07, "loss": 2.6098, "step": 242500 }, { "epoch": 10.445794030236465, "learning_rate": 4.125923850977181e-07, "loss": 2.5546, "step": 242520 }, { "epoch": 10.446655467976052, "learning_rate": 4.125439031222392e-07, "loss": 2.7509, "step": 242540 }, { "epoch": 10.44751690571564, "learning_rate": 4.124954211467603e-07, "loss": 2.7256, "step": 242560 }, { "epoch": 10.448378343455227, "learning_rate": 4.124469391712815e-07, "loss": 2.6558, "step": 242580 }, { "epoch": 10.449239781194814, "learning_rate": 4.123984571958025e-07, "loss": 2.6617, "step": 242600 }, { "epoch": 10.450101218934401, "learning_rate": 4.1234997522032356e-07, "loss": 2.6414, "step": 242620 }, { "epoch": 10.450962656673989, "learning_rate": 4.1230149324484474e-07, "loss": 2.6696, "step": 242640 }, { "epoch": 10.451824094413576, "learning_rate": 4.122530112693658e-07, "loss": 2.6282, "step": 242660 }, { "epoch": 10.452685532153163, "learning_rate": 4.1220452929388694e-07, "loss": 2.7701, "step": 242680 }, { "epoch": 10.45354696989275, "learning_rate": 4.12156047318408e-07, "loss": 2.6444, "step": 242700 }, { "epoch": 10.454408407632338, "learning_rate": 4.121075653429292e-07, "loss": 2.717, "step": 242720 }, { "epoch": 10.455269845371927, "learning_rate": 4.120590833674502e-07, "loss": 2.8215, "step": 242740 }, { "epoch": 10.456131283111514, "learning_rate": 4.120106013919714e-07, "loss": 2.8347, "step": 242760 }, { "epoch": 10.456992720851101, "learning_rate": 4.1196211941649245e-07, "loss": 2.8176, "step": 242780 }, { "epoch": 10.457854158590688, "learning_rate": 4.119136374410136e-07, "loss": 2.8437, "step": 242800 }, { "epoch": 10.458715596330276, "learning_rate": 4.1186515546553465e-07, "loss": 2.7224, "step": 242820 }, { "epoch": 10.459577034069863, "learning_rate": 4.1181667349005583e-07, "loss": 2.513, "step": 242840 }, { "epoch": 10.46043847180945, "learning_rate": 4.1176819151457685e-07, "loss": 2.6324, "step": 242860 }, { "epoch": 10.461299909549037, "learning_rate": 4.11719709539098e-07, "loss": 2.7459, "step": 242880 }, { "epoch": 10.462161347288625, "learning_rate": 4.116712275636191e-07, "loss": 2.7043, "step": 242900 }, { "epoch": 10.463022785028212, "learning_rate": 4.116227455881402e-07, "loss": 2.7917, "step": 242920 }, { "epoch": 10.463884222767799, "learning_rate": 4.115742636126613e-07, "loss": 2.611, "step": 242940 }, { "epoch": 10.464745660507386, "learning_rate": 4.1152578163718247e-07, "loss": 2.8265, "step": 242960 }, { "epoch": 10.465607098246974, "learning_rate": 4.1147729966170354e-07, "loss": 2.6467, "step": 242980 }, { "epoch": 10.46646853598656, "learning_rate": 4.1142881768622466e-07, "loss": 2.5529, "step": 243000 }, { "epoch": 10.467329973726148, "learning_rate": 4.113803357107458e-07, "loss": 2.711, "step": 243020 }, { "epoch": 10.468191411465737, "learning_rate": 4.113318537352669e-07, "loss": 2.7606, "step": 243040 }, { "epoch": 10.469052849205324, "learning_rate": 4.1128337175978793e-07, "loss": 2.8217, "step": 243060 }, { "epoch": 10.469914286944912, "learning_rate": 4.112348897843091e-07, "loss": 2.6495, "step": 243080 }, { "epoch": 10.470775724684499, "learning_rate": 4.111864078088302e-07, "loss": 2.6198, "step": 243100 }, { "epoch": 10.471637162424086, "learning_rate": 4.111379258333513e-07, "loss": 2.6805, "step": 243120 }, { "epoch": 10.472498600163673, "learning_rate": 4.110894438578724e-07, "loss": 2.5894, "step": 243140 }, { "epoch": 10.47336003790326, "learning_rate": 4.1104096188239345e-07, "loss": 2.6904, "step": 243160 }, { "epoch": 10.474221475642848, "learning_rate": 4.1099247990691457e-07, "loss": 2.7166, "step": 243180 }, { "epoch": 10.475082913382435, "learning_rate": 4.1094399793143564e-07, "loss": 2.7099, "step": 243200 }, { "epoch": 10.475944351122022, "learning_rate": 4.108955159559568e-07, "loss": 2.7125, "step": 243220 }, { "epoch": 10.47680578886161, "learning_rate": 4.1084703398047784e-07, "loss": 2.7862, "step": 243240 }, { "epoch": 10.477667226601197, "learning_rate": 4.10798552004999e-07, "loss": 2.7367, "step": 243260 }, { "epoch": 10.478528664340784, "learning_rate": 4.107500700295201e-07, "loss": 2.9008, "step": 243280 }, { "epoch": 10.479390102080373, "learning_rate": 4.107015880540412e-07, "loss": 2.5499, "step": 243300 }, { "epoch": 10.48025153981996, "learning_rate": 4.106531060785623e-07, "loss": 2.7328, "step": 243320 }, { "epoch": 10.481112977559548, "learning_rate": 4.1060462410308346e-07, "loss": 2.5978, "step": 243340 }, { "epoch": 10.481974415299135, "learning_rate": 4.1055614212760453e-07, "loss": 2.6652, "step": 243360 }, { "epoch": 10.482835853038722, "learning_rate": 4.1050766015212566e-07, "loss": 2.9084, "step": 243380 }, { "epoch": 10.48369729077831, "learning_rate": 4.1045917817664673e-07, "loss": 2.9277, "step": 243400 }, { "epoch": 10.484558728517896, "learning_rate": 4.104106962011679e-07, "loss": 2.7471, "step": 243420 }, { "epoch": 10.485420166257484, "learning_rate": 4.10362214225689e-07, "loss": 2.6108, "step": 243440 }, { "epoch": 10.486281603997071, "learning_rate": 4.103137322502101e-07, "loss": 2.6274, "step": 243460 }, { "epoch": 10.487143041736658, "learning_rate": 4.102652502747312e-07, "loss": 2.7421, "step": 243480 }, { "epoch": 10.488004479476245, "learning_rate": 4.102167682992523e-07, "loss": 2.7093, "step": 243500 }, { "epoch": 10.488865917215833, "learning_rate": 4.1016828632377337e-07, "loss": 2.65, "step": 243520 }, { "epoch": 10.48972735495542, "learning_rate": 4.1011980434829455e-07, "loss": 2.7218, "step": 243540 }, { "epoch": 10.490588792695007, "learning_rate": 4.1007132237281557e-07, "loss": 2.8047, "step": 243560 }, { "epoch": 10.491450230434594, "learning_rate": 4.1002284039733674e-07, "loss": 2.5298, "step": 243580 }, { "epoch": 10.492311668174183, "learning_rate": 4.099743584218578e-07, "loss": 2.6055, "step": 243600 }, { "epoch": 10.49317310591377, "learning_rate": 4.0992587644637894e-07, "loss": 2.6417, "step": 243620 }, { "epoch": 10.494034543653358, "learning_rate": 4.098773944709e-07, "loss": 2.6597, "step": 243640 }, { "epoch": 10.494895981392945, "learning_rate": 4.098289124954212e-07, "loss": 2.6881, "step": 243660 }, { "epoch": 10.495757419132532, "learning_rate": 4.097804305199422e-07, "loss": 2.7074, "step": 243680 }, { "epoch": 10.49661885687212, "learning_rate": 4.097319485444633e-07, "loss": 2.6582, "step": 243700 }, { "epoch": 10.497480294611707, "learning_rate": 4.0968346656898446e-07, "loss": 2.6949, "step": 243720 }, { "epoch": 10.498341732351294, "learning_rate": 4.0963498459350553e-07, "loss": 2.6477, "step": 243740 }, { "epoch": 10.499203170090881, "learning_rate": 4.0958650261802665e-07, "loss": 2.6339, "step": 243760 }, { "epoch": 10.500064607830469, "learning_rate": 4.095380206425477e-07, "loss": 2.9636, "step": 243780 }, { "epoch": 10.500926045570056, "learning_rate": 4.094895386670689e-07, "loss": 2.6991, "step": 243800 }, { "epoch": 10.501787483309643, "learning_rate": 4.094410566915899e-07, "loss": 2.7597, "step": 243820 }, { "epoch": 10.50264892104923, "learning_rate": 4.093925747161111e-07, "loss": 2.7381, "step": 243840 }, { "epoch": 10.50351035878882, "learning_rate": 4.093440927406321e-07, "loss": 2.7609, "step": 243860 }, { "epoch": 10.504371796528407, "learning_rate": 4.092956107651533e-07, "loss": 2.6154, "step": 243880 }, { "epoch": 10.505233234267994, "learning_rate": 4.0924712878967436e-07, "loss": 2.6193, "step": 243900 }, { "epoch": 10.506094672007581, "learning_rate": 4.0919864681419554e-07, "loss": 2.6014, "step": 243920 }, { "epoch": 10.506956109747168, "learning_rate": 4.0915016483871656e-07, "loss": 2.7556, "step": 243940 }, { "epoch": 10.507817547486756, "learning_rate": 4.0910168286323774e-07, "loss": 2.7253, "step": 243960 }, { "epoch": 10.508678985226343, "learning_rate": 4.090532008877588e-07, "loss": 2.783, "step": 243980 }, { "epoch": 10.50954042296593, "learning_rate": 4.0900471891227993e-07, "loss": 2.7339, "step": 244000 }, { "epoch": 10.510401860705517, "learning_rate": 4.08956236936801e-07, "loss": 2.7184, "step": 244020 }, { "epoch": 10.511263298445105, "learning_rate": 4.089077549613222e-07, "loss": 2.5618, "step": 244040 }, { "epoch": 10.512124736184692, "learning_rate": 4.0885927298584325e-07, "loss": 2.855, "step": 244060 }, { "epoch": 10.51298617392428, "learning_rate": 4.088107910103644e-07, "loss": 2.7471, "step": 244080 }, { "epoch": 10.513847611663866, "learning_rate": 4.0876230903488545e-07, "loss": 2.5773, "step": 244100 }, { "epoch": 10.514709049403454, "learning_rate": 4.0871382705940663e-07, "loss": 2.8157, "step": 244120 }, { "epoch": 10.51557048714304, "learning_rate": 4.0866534508392765e-07, "loss": 2.6612, "step": 244140 }, { "epoch": 10.51643192488263, "learning_rate": 4.086168631084488e-07, "loss": 2.6533, "step": 244160 }, { "epoch": 10.517293362622217, "learning_rate": 4.085683811329699e-07, "loss": 2.5078, "step": 244180 }, { "epoch": 10.518154800361804, "learning_rate": 4.08519899157491e-07, "loss": 2.6689, "step": 244200 }, { "epoch": 10.519016238101392, "learning_rate": 4.084714171820121e-07, "loss": 2.6541, "step": 244220 }, { "epoch": 10.519877675840979, "learning_rate": 4.0842293520653316e-07, "loss": 2.5645, "step": 244240 }, { "epoch": 10.520739113580566, "learning_rate": 4.083744532310543e-07, "loss": 2.7696, "step": 244260 }, { "epoch": 10.521600551320153, "learning_rate": 4.083259712555753e-07, "loss": 2.7563, "step": 244280 }, { "epoch": 10.52246198905974, "learning_rate": 4.0827748928009653e-07, "loss": 2.6911, "step": 244300 }, { "epoch": 10.523323426799328, "learning_rate": 4.0822900730461755e-07, "loss": 2.7087, "step": 244320 }, { "epoch": 10.524184864538915, "learning_rate": 4.0818052532913873e-07, "loss": 2.7004, "step": 244340 }, { "epoch": 10.525046302278502, "learning_rate": 4.081320433536598e-07, "loss": 2.616, "step": 244360 }, { "epoch": 10.52590774001809, "learning_rate": 4.0808356137818093e-07, "loss": 2.6794, "step": 244380 }, { "epoch": 10.526769177757677, "learning_rate": 4.08035079402702e-07, "loss": 2.7325, "step": 244400 }, { "epoch": 10.527630615497266, "learning_rate": 4.079865974272232e-07, "loss": 2.5289, "step": 244420 }, { "epoch": 10.528492053236853, "learning_rate": 4.0793811545174425e-07, "loss": 2.8028, "step": 244440 }, { "epoch": 10.52935349097644, "learning_rate": 4.0788963347626537e-07, "loss": 2.7712, "step": 244460 }, { "epoch": 10.530214928716028, "learning_rate": 4.0784115150078644e-07, "loss": 2.7467, "step": 244480 }, { "epoch": 10.531076366455615, "learning_rate": 4.077926695253076e-07, "loss": 2.7799, "step": 244500 }, { "epoch": 10.531937804195202, "learning_rate": 4.0774418754982864e-07, "loss": 2.727, "step": 244520 }, { "epoch": 10.53279924193479, "learning_rate": 4.076957055743498e-07, "loss": 2.8432, "step": 244540 }, { "epoch": 10.533660679674377, "learning_rate": 4.076472235988709e-07, "loss": 2.6869, "step": 244560 }, { "epoch": 10.534522117413964, "learning_rate": 4.07598741623392e-07, "loss": 2.7177, "step": 244580 }, { "epoch": 10.535383555153551, "learning_rate": 4.075502596479131e-07, "loss": 2.7105, "step": 244600 }, { "epoch": 10.536244992893138, "learning_rate": 4.0750177767243426e-07, "loss": 2.8797, "step": 244620 }, { "epoch": 10.537106430632726, "learning_rate": 4.074532956969553e-07, "loss": 2.6954, "step": 244640 }, { "epoch": 10.537967868372313, "learning_rate": 4.0740481372147646e-07, "loss": 2.7067, "step": 244660 }, { "epoch": 10.5388293061119, "learning_rate": 4.0735633174599753e-07, "loss": 2.767, "step": 244680 }, { "epoch": 10.539690743851487, "learning_rate": 4.0730784977051865e-07, "loss": 2.8059, "step": 244700 }, { "epoch": 10.540552181591076, "learning_rate": 4.072593677950397e-07, "loss": 2.6957, "step": 244720 }, { "epoch": 10.541413619330664, "learning_rate": 4.072108858195609e-07, "loss": 2.6242, "step": 244740 }, { "epoch": 10.54227505707025, "learning_rate": 4.0716240384408197e-07, "loss": 2.7862, "step": 244760 }, { "epoch": 10.543136494809838, "learning_rate": 4.07113921868603e-07, "loss": 2.6113, "step": 244780 }, { "epoch": 10.543997932549425, "learning_rate": 4.070654398931242e-07, "loss": 2.6656, "step": 244800 }, { "epoch": 10.544859370289013, "learning_rate": 4.0701695791764524e-07, "loss": 2.7469, "step": 244820 }, { "epoch": 10.5457208080286, "learning_rate": 4.0696847594216637e-07, "loss": 2.6042, "step": 244840 }, { "epoch": 10.546582245768187, "learning_rate": 4.0691999396668744e-07, "loss": 2.7045, "step": 244860 }, { "epoch": 10.547443683507774, "learning_rate": 4.068715119912086e-07, "loss": 2.6508, "step": 244880 }, { "epoch": 10.548305121247362, "learning_rate": 4.0682303001572963e-07, "loss": 2.6896, "step": 244900 }, { "epoch": 10.549166558986949, "learning_rate": 4.067745480402508e-07, "loss": 2.7048, "step": 244920 }, { "epoch": 10.550027996726536, "learning_rate": 4.067260660647719e-07, "loss": 2.7177, "step": 244940 }, { "epoch": 10.550889434466123, "learning_rate": 4.06677584089293e-07, "loss": 2.7456, "step": 244960 }, { "epoch": 10.55175087220571, "learning_rate": 4.066291021138141e-07, "loss": 2.6942, "step": 244980 }, { "epoch": 10.5526123099453, "learning_rate": 4.0658062013833525e-07, "loss": 2.6131, "step": 245000 }, { "epoch": 10.553473747684887, "learning_rate": 4.0653213816285627e-07, "loss": 2.7857, "step": 245020 }, { "epoch": 10.554335185424474, "learning_rate": 4.0648365618737745e-07, "loss": 2.716, "step": 245040 }, { "epoch": 10.555196623164061, "learning_rate": 4.064351742118985e-07, "loss": 2.7965, "step": 245060 }, { "epoch": 10.556058060903649, "learning_rate": 4.0638669223641965e-07, "loss": 2.7223, "step": 245080 }, { "epoch": 10.556919498643236, "learning_rate": 4.063382102609407e-07, "loss": 2.716, "step": 245100 }, { "epoch": 10.557780936382823, "learning_rate": 4.062897282854619e-07, "loss": 2.6, "step": 245120 }, { "epoch": 10.55864237412241, "learning_rate": 4.0624124630998297e-07, "loss": 2.5992, "step": 245140 }, { "epoch": 10.559503811861997, "learning_rate": 4.061927643345041e-07, "loss": 2.8501, "step": 245160 }, { "epoch": 10.560365249601585, "learning_rate": 4.0614428235902516e-07, "loss": 2.7108, "step": 245180 }, { "epoch": 10.561226687341172, "learning_rate": 4.0609580038354634e-07, "loss": 2.8172, "step": 245200 }, { "epoch": 10.56208812508076, "learning_rate": 4.060473184080674e-07, "loss": 2.6279, "step": 245220 }, { "epoch": 10.562949562820346, "learning_rate": 4.0599883643258854e-07, "loss": 2.6777, "step": 245240 }, { "epoch": 10.563811000559934, "learning_rate": 4.059503544571096e-07, "loss": 2.8771, "step": 245260 }, { "epoch": 10.564672438299521, "learning_rate": 4.0590187248163073e-07, "loss": 2.6919, "step": 245280 }, { "epoch": 10.56553387603911, "learning_rate": 4.058533905061518e-07, "loss": 2.78, "step": 245300 }, { "epoch": 10.566395313778697, "learning_rate": 4.058049085306729e-07, "loss": 2.6916, "step": 245320 }, { "epoch": 10.567256751518284, "learning_rate": 4.05756426555194e-07, "loss": 2.6583, "step": 245340 }, { "epoch": 10.568118189257872, "learning_rate": 4.0570794457971507e-07, "loss": 2.6407, "step": 245360 }, { "epoch": 10.568979626997459, "learning_rate": 4.0565946260423625e-07, "loss": 2.5549, "step": 245380 }, { "epoch": 10.569841064737046, "learning_rate": 4.0561098062875727e-07, "loss": 2.8551, "step": 245400 }, { "epoch": 10.570702502476633, "learning_rate": 4.0556249865327844e-07, "loss": 2.709, "step": 245420 }, { "epoch": 10.57156394021622, "learning_rate": 4.055140166777995e-07, "loss": 2.6679, "step": 245440 }, { "epoch": 10.572425377955808, "learning_rate": 4.0546553470232064e-07, "loss": 2.7534, "step": 245460 }, { "epoch": 10.573286815695395, "learning_rate": 4.054170527268417e-07, "loss": 2.6787, "step": 245480 }, { "epoch": 10.574148253434982, "learning_rate": 4.053685707513629e-07, "loss": 2.9504, "step": 245500 }, { "epoch": 10.57500969117457, "learning_rate": 4.0532008877588396e-07, "loss": 2.7671, "step": 245520 }, { "epoch": 10.575871128914157, "learning_rate": 4.052716068004051e-07, "loss": 2.7004, "step": 245540 }, { "epoch": 10.576732566653746, "learning_rate": 4.0522312482492616e-07, "loss": 2.7547, "step": 245560 }, { "epoch": 10.577594004393333, "learning_rate": 4.0517464284944733e-07, "loss": 2.7269, "step": 245580 }, { "epoch": 10.57845544213292, "learning_rate": 4.0512616087396835e-07, "loss": 2.7758, "step": 245600 }, { "epoch": 10.579316879872508, "learning_rate": 4.0507767889848953e-07, "loss": 2.8585, "step": 245620 }, { "epoch": 10.580178317612095, "learning_rate": 4.0502919692301055e-07, "loss": 2.7244, "step": 245640 }, { "epoch": 10.581039755351682, "learning_rate": 4.049807149475317e-07, "loss": 2.7507, "step": 245660 }, { "epoch": 10.58190119309127, "learning_rate": 4.049322329720528e-07, "loss": 2.8038, "step": 245680 }, { "epoch": 10.582762630830857, "learning_rate": 4.04883750996574e-07, "loss": 2.574, "step": 245700 }, { "epoch": 10.583624068570444, "learning_rate": 4.04835269021095e-07, "loss": 2.64, "step": 245720 }, { "epoch": 10.584485506310031, "learning_rate": 4.0478678704561617e-07, "loss": 2.7477, "step": 245740 }, { "epoch": 10.585346944049618, "learning_rate": 4.0473830507013724e-07, "loss": 2.6317, "step": 245760 }, { "epoch": 10.586208381789206, "learning_rate": 4.0468982309465837e-07, "loss": 2.6242, "step": 245780 }, { "epoch": 10.587069819528793, "learning_rate": 4.0464134111917944e-07, "loss": 2.778, "step": 245800 }, { "epoch": 10.58793125726838, "learning_rate": 4.045928591437006e-07, "loss": 2.5842, "step": 245820 }, { "epoch": 10.588792695007967, "learning_rate": 4.045443771682217e-07, "loss": 2.6206, "step": 245840 }, { "epoch": 10.589654132747556, "learning_rate": 4.044958951927427e-07, "loss": 2.6498, "step": 245860 }, { "epoch": 10.590515570487144, "learning_rate": 4.044474132172639e-07, "loss": 2.6468, "step": 245880 }, { "epoch": 10.591377008226731, "learning_rate": 4.0439893124178495e-07, "loss": 2.8539, "step": 245900 }, { "epoch": 10.592238445966318, "learning_rate": 4.043504492663061e-07, "loss": 2.6748, "step": 245920 }, { "epoch": 10.593099883705905, "learning_rate": 4.0430196729082715e-07, "loss": 2.8282, "step": 245940 }, { "epoch": 10.593961321445493, "learning_rate": 4.0425348531534833e-07, "loss": 2.6743, "step": 245960 }, { "epoch": 10.59482275918508, "learning_rate": 4.0420500333986935e-07, "loss": 2.675, "step": 245980 }, { "epoch": 10.595684196924667, "learning_rate": 4.041565213643905e-07, "loss": 2.6944, "step": 246000 }, { "epoch": 10.596545634664254, "learning_rate": 4.041080393889116e-07, "loss": 2.675, "step": 246020 }, { "epoch": 10.597407072403842, "learning_rate": 4.040595574134327e-07, "loss": 2.669, "step": 246040 }, { "epoch": 10.598268510143429, "learning_rate": 4.0401107543795374e-07, "loss": 2.586, "step": 246060 }, { "epoch": 10.599129947883016, "learning_rate": 4.0396259346247497e-07, "loss": 2.7529, "step": 246080 }, { "epoch": 10.599991385622603, "learning_rate": 4.03914111486996e-07, "loss": 2.7618, "step": 246100 }, { "epoch": 10.600852823362192, "learning_rate": 4.0386562951151716e-07, "loss": 2.6064, "step": 246120 }, { "epoch": 10.60171426110178, "learning_rate": 4.0381714753603824e-07, "loss": 2.8967, "step": 246140 }, { "epoch": 10.602575698841367, "learning_rate": 4.0376866556055936e-07, "loss": 2.663, "step": 246160 }, { "epoch": 10.603437136580954, "learning_rate": 4.0372018358508043e-07, "loss": 2.7052, "step": 246180 }, { "epoch": 10.604298574320541, "learning_rate": 4.036717016096016e-07, "loss": 2.7495, "step": 246200 }, { "epoch": 10.605160012060129, "learning_rate": 4.036232196341227e-07, "loss": 2.6622, "step": 246220 }, { "epoch": 10.606021449799716, "learning_rate": 4.035747376586438e-07, "loss": 2.6962, "step": 246240 }, { "epoch": 10.606882887539303, "learning_rate": 4.035262556831649e-07, "loss": 2.8449, "step": 246260 }, { "epoch": 10.60774432527889, "learning_rate": 4.0347777370768605e-07, "loss": 2.749, "step": 246280 }, { "epoch": 10.608605763018478, "learning_rate": 4.0342929173220707e-07, "loss": 2.8367, "step": 246300 }, { "epoch": 10.609467200758065, "learning_rate": 4.0338080975672825e-07, "loss": 2.6259, "step": 246320 }, { "epoch": 10.610328638497652, "learning_rate": 4.033323277812493e-07, "loss": 2.49, "step": 246340 }, { "epoch": 10.61119007623724, "learning_rate": 4.0328384580577045e-07, "loss": 2.8884, "step": 246360 }, { "epoch": 10.612051513976827, "learning_rate": 4.032353638302915e-07, "loss": 2.5794, "step": 246380 }, { "epoch": 10.612912951716414, "learning_rate": 4.031868818548126e-07, "loss": 2.7271, "step": 246400 }, { "epoch": 10.613774389456003, "learning_rate": 4.031383998793337e-07, "loss": 2.7578, "step": 246420 }, { "epoch": 10.61463582719559, "learning_rate": 4.030899179038548e-07, "loss": 2.6441, "step": 246440 }, { "epoch": 10.615497264935177, "learning_rate": 4.0304143592837596e-07, "loss": 2.6346, "step": 246460 }, { "epoch": 10.616358702674765, "learning_rate": 4.0299295395289703e-07, "loss": 2.6772, "step": 246480 }, { "epoch": 10.617220140414352, "learning_rate": 4.0294447197741816e-07, "loss": 2.7151, "step": 246500 }, { "epoch": 10.618081578153939, "learning_rate": 4.0289599000193923e-07, "loss": 2.613, "step": 246520 }, { "epoch": 10.618943015893526, "learning_rate": 4.028475080264604e-07, "loss": 2.8516, "step": 246540 }, { "epoch": 10.619804453633114, "learning_rate": 4.027990260509814e-07, "loss": 2.7367, "step": 246560 }, { "epoch": 10.6206658913727, "learning_rate": 4.0275054407550266e-07, "loss": 2.7396, "step": 246580 }, { "epoch": 10.621527329112288, "learning_rate": 4.027020621000237e-07, "loss": 2.7509, "step": 246600 }, { "epoch": 10.622388766851875, "learning_rate": 4.026535801245448e-07, "loss": 2.6947, "step": 246620 }, { "epoch": 10.623250204591463, "learning_rate": 4.0260509814906587e-07, "loss": 2.8514, "step": 246640 }, { "epoch": 10.62411164233105, "learning_rate": 4.0255661617358705e-07, "loss": 2.8385, "step": 246660 }, { "epoch": 10.624973080070639, "learning_rate": 4.0250813419810807e-07, "loss": 2.7319, "step": 246680 }, { "epoch": 10.625834517810226, "learning_rate": 4.0245965222262924e-07, "loss": 2.5677, "step": 246700 }, { "epoch": 10.626695955549813, "learning_rate": 4.024111702471503e-07, "loss": 2.5839, "step": 246720 }, { "epoch": 10.6275573932894, "learning_rate": 4.0236268827167144e-07, "loss": 2.6964, "step": 246740 }, { "epoch": 10.628418831028988, "learning_rate": 4.023142062961925e-07, "loss": 2.8188, "step": 246760 }, { "epoch": 10.629280268768575, "learning_rate": 4.022657243207137e-07, "loss": 2.8455, "step": 246780 }, { "epoch": 10.630141706508162, "learning_rate": 4.022172423452347e-07, "loss": 2.7445, "step": 246800 }, { "epoch": 10.63100314424775, "learning_rate": 4.021687603697559e-07, "loss": 2.6141, "step": 246820 }, { "epoch": 10.631864581987337, "learning_rate": 4.0212027839427696e-07, "loss": 2.6317, "step": 246840 }, { "epoch": 10.632726019726924, "learning_rate": 4.020717964187981e-07, "loss": 2.7151, "step": 246860 }, { "epoch": 10.633587457466511, "learning_rate": 4.0202331444331915e-07, "loss": 2.7095, "step": 246880 }, { "epoch": 10.634448895206098, "learning_rate": 4.0197483246784033e-07, "loss": 2.7266, "step": 246900 }, { "epoch": 10.635310332945686, "learning_rate": 4.019263504923614e-07, "loss": 2.7092, "step": 246920 }, { "epoch": 10.636171770685273, "learning_rate": 4.018778685168824e-07, "loss": 2.8431, "step": 246940 }, { "epoch": 10.63703320842486, "learning_rate": 4.018293865414036e-07, "loss": 2.6595, "step": 246960 }, { "epoch": 10.63789464616445, "learning_rate": 4.0178090456592467e-07, "loss": 2.7157, "step": 246980 }, { "epoch": 10.638756083904036, "learning_rate": 4.0173242259044584e-07, "loss": 2.5974, "step": 247000 }, { "epoch": 10.639617521643624, "learning_rate": 4.0168394061496686e-07, "loss": 2.7854, "step": 247020 }, { "epoch": 10.640478959383211, "learning_rate": 4.0163545863948804e-07, "loss": 2.5843, "step": 247040 }, { "epoch": 10.641340397122798, "learning_rate": 4.0158697666400906e-07, "loss": 2.8019, "step": 247060 }, { "epoch": 10.642201834862385, "learning_rate": 4.0153849468853024e-07, "loss": 2.7425, "step": 247080 }, { "epoch": 10.643063272601973, "learning_rate": 4.014900127130513e-07, "loss": 2.7409, "step": 247100 }, { "epoch": 10.64392471034156, "learning_rate": 4.0144153073757243e-07, "loss": 2.8053, "step": 247120 }, { "epoch": 10.644786148081147, "learning_rate": 4.013930487620935e-07, "loss": 2.9431, "step": 247140 }, { "epoch": 10.645647585820734, "learning_rate": 4.013445667866147e-07, "loss": 2.8047, "step": 247160 }, { "epoch": 10.646509023560322, "learning_rate": 4.0129608481113575e-07, "loss": 2.6465, "step": 247180 }, { "epoch": 10.647370461299909, "learning_rate": 4.012476028356569e-07, "loss": 2.6798, "step": 247200 }, { "epoch": 10.648231899039496, "learning_rate": 4.0119912086017795e-07, "loss": 2.6021, "step": 247220 }, { "epoch": 10.649093336779085, "learning_rate": 4.0115063888469907e-07, "loss": 2.795, "step": 247240 }, { "epoch": 10.649954774518672, "learning_rate": 4.0110215690922014e-07, "loss": 2.6861, "step": 247260 }, { "epoch": 10.65081621225826, "learning_rate": 4.010536749337413e-07, "loss": 2.6823, "step": 247280 }, { "epoch": 10.651677649997847, "learning_rate": 4.010051929582624e-07, "loss": 2.7419, "step": 247300 }, { "epoch": 10.652539087737434, "learning_rate": 4.009567109827835e-07, "loss": 2.5948, "step": 247320 }, { "epoch": 10.653400525477021, "learning_rate": 4.009082290073046e-07, "loss": 2.6084, "step": 247340 }, { "epoch": 10.654261963216609, "learning_rate": 4.0085974703182577e-07, "loss": 2.8237, "step": 247360 }, { "epoch": 10.655123400956196, "learning_rate": 4.008112650563468e-07, "loss": 2.7328, "step": 247380 }, { "epoch": 10.655984838695783, "learning_rate": 4.0076278308086796e-07, "loss": 2.7196, "step": 247400 }, { "epoch": 10.65684627643537, "learning_rate": 4.00714301105389e-07, "loss": 2.7776, "step": 247420 }, { "epoch": 10.657707714174958, "learning_rate": 4.0066581912991016e-07, "loss": 2.5936, "step": 247440 }, { "epoch": 10.658569151914545, "learning_rate": 4.0061733715443123e-07, "loss": 2.7986, "step": 247460 }, { "epoch": 10.659430589654132, "learning_rate": 4.005688551789523e-07, "loss": 2.5998, "step": 247480 }, { "epoch": 10.66029202739372, "learning_rate": 4.005203732034734e-07, "loss": 2.7327, "step": 247500 }, { "epoch": 10.661153465133307, "learning_rate": 4.004718912279945e-07, "loss": 2.7414, "step": 247520 }, { "epoch": 10.662014902872896, "learning_rate": 4.004234092525157e-07, "loss": 2.661, "step": 247540 }, { "epoch": 10.662876340612483, "learning_rate": 4.0037492727703675e-07, "loss": 2.7196, "step": 247560 }, { "epoch": 10.66373777835207, "learning_rate": 4.0032644530155787e-07, "loss": 2.8726, "step": 247580 }, { "epoch": 10.664599216091657, "learning_rate": 4.0027796332607894e-07, "loss": 2.7016, "step": 247600 }, { "epoch": 10.665460653831245, "learning_rate": 4.002294813506001e-07, "loss": 2.793, "step": 247620 }, { "epoch": 10.666322091570832, "learning_rate": 4.0018099937512114e-07, "loss": 2.6924, "step": 247640 }, { "epoch": 10.66718352931042, "learning_rate": 4.001325173996423e-07, "loss": 2.9092, "step": 247660 }, { "epoch": 10.668044967050006, "learning_rate": 4.000840354241634e-07, "loss": 2.668, "step": 247680 }, { "epoch": 10.668906404789594, "learning_rate": 4.000355534486845e-07, "loss": 2.7045, "step": 247700 }, { "epoch": 10.66976784252918, "learning_rate": 3.999870714732056e-07, "loss": 2.6134, "step": 247720 }, { "epoch": 10.670629280268768, "learning_rate": 3.9993858949772676e-07, "loss": 2.5507, "step": 247740 }, { "epoch": 10.671490718008355, "learning_rate": 3.998901075222478e-07, "loss": 2.8916, "step": 247760 }, { "epoch": 10.672352155747943, "learning_rate": 3.9984162554676896e-07, "loss": 2.878, "step": 247780 }, { "epoch": 10.673213593487532, "learning_rate": 3.9979314357129003e-07, "loss": 2.6711, "step": 247800 }, { "epoch": 10.674075031227119, "learning_rate": 3.9974466159581115e-07, "loss": 2.7284, "step": 247820 }, { "epoch": 10.674936468966706, "learning_rate": 3.9969617962033217e-07, "loss": 2.7175, "step": 247840 }, { "epoch": 10.675797906706293, "learning_rate": 3.996476976448534e-07, "loss": 2.908, "step": 247860 }, { "epoch": 10.67665934444588, "learning_rate": 3.995992156693744e-07, "loss": 2.6948, "step": 247880 }, { "epoch": 10.677520782185468, "learning_rate": 3.995507336938956e-07, "loss": 2.7499, "step": 247900 }, { "epoch": 10.678382219925055, "learning_rate": 3.9950225171841667e-07, "loss": 2.6621, "step": 247920 }, { "epoch": 10.679243657664642, "learning_rate": 3.994537697429378e-07, "loss": 2.6516, "step": 247940 }, { "epoch": 10.68010509540423, "learning_rate": 3.9940528776745886e-07, "loss": 2.7411, "step": 247960 }, { "epoch": 10.680966533143817, "learning_rate": 3.9935680579198004e-07, "loss": 2.8197, "step": 247980 }, { "epoch": 10.681827970883404, "learning_rate": 3.993083238165011e-07, "loss": 2.8438, "step": 248000 }, { "epoch": 10.682689408622991, "learning_rate": 3.9925984184102213e-07, "loss": 2.7967, "step": 248020 }, { "epoch": 10.683550846362579, "learning_rate": 3.992113598655433e-07, "loss": 2.8629, "step": 248040 }, { "epoch": 10.684412284102166, "learning_rate": 3.991628778900644e-07, "loss": 2.7564, "step": 248060 }, { "epoch": 10.685273721841753, "learning_rate": 3.991143959145855e-07, "loss": 2.9037, "step": 248080 }, { "epoch": 10.686135159581342, "learning_rate": 3.990659139391066e-07, "loss": 2.5739, "step": 248100 }, { "epoch": 10.68699659732093, "learning_rate": 3.9901743196362775e-07, "loss": 2.4965, "step": 248120 }, { "epoch": 10.687858035060517, "learning_rate": 3.9896894998814877e-07, "loss": 2.7925, "step": 248140 }, { "epoch": 10.688719472800104, "learning_rate": 3.9892046801266995e-07, "loss": 2.7307, "step": 248160 }, { "epoch": 10.689580910539691, "learning_rate": 3.98871986037191e-07, "loss": 2.6534, "step": 248180 }, { "epoch": 10.690442348279278, "learning_rate": 3.9882350406171215e-07, "loss": 2.664, "step": 248200 }, { "epoch": 10.691303786018866, "learning_rate": 3.987750220862332e-07, "loss": 2.8853, "step": 248220 }, { "epoch": 10.692165223758453, "learning_rate": 3.987265401107544e-07, "loss": 2.7974, "step": 248240 }, { "epoch": 10.69302666149804, "learning_rate": 3.9867805813527547e-07, "loss": 2.8183, "step": 248260 }, { "epoch": 10.693888099237627, "learning_rate": 3.986295761597966e-07, "loss": 2.7635, "step": 248280 }, { "epoch": 10.694749536977215, "learning_rate": 3.9858109418431766e-07, "loss": 2.7195, "step": 248300 }, { "epoch": 10.695610974716802, "learning_rate": 3.9853261220883884e-07, "loss": 2.7906, "step": 248320 }, { "epoch": 10.696472412456389, "learning_rate": 3.9848413023335986e-07, "loss": 2.5707, "step": 248340 }, { "epoch": 10.697333850195976, "learning_rate": 3.984356482578811e-07, "loss": 2.7345, "step": 248360 }, { "epoch": 10.698195287935565, "learning_rate": 3.983871662824021e-07, "loss": 2.7566, "step": 248380 }, { "epoch": 10.699056725675153, "learning_rate": 3.9833868430692323e-07, "loss": 2.7558, "step": 248400 }, { "epoch": 10.69991816341474, "learning_rate": 3.982902023314443e-07, "loss": 2.8115, "step": 248420 }, { "epoch": 10.700779601154327, "learning_rate": 3.982417203559655e-07, "loss": 2.7139, "step": 248440 }, { "epoch": 10.701641038893914, "learning_rate": 3.981932383804865e-07, "loss": 2.6825, "step": 248460 }, { "epoch": 10.702502476633502, "learning_rate": 3.981447564050077e-07, "loss": 2.7256, "step": 248480 }, { "epoch": 10.703363914373089, "learning_rate": 3.9809627442952875e-07, "loss": 2.8035, "step": 248500 }, { "epoch": 10.704225352112676, "learning_rate": 3.9804779245404987e-07, "loss": 2.7793, "step": 248520 }, { "epoch": 10.705086789852263, "learning_rate": 3.9799931047857094e-07, "loss": 2.7632, "step": 248540 }, { "epoch": 10.70594822759185, "learning_rate": 3.97950828503092e-07, "loss": 2.7417, "step": 248560 }, { "epoch": 10.706809665331438, "learning_rate": 3.9790234652761314e-07, "loss": 2.6433, "step": 248580 }, { "epoch": 10.707671103071025, "learning_rate": 3.978538645521342e-07, "loss": 2.7302, "step": 248600 }, { "epoch": 10.708532540810612, "learning_rate": 3.978053825766554e-07, "loss": 2.6308, "step": 248620 }, { "epoch": 10.7093939785502, "learning_rate": 3.9775690060117646e-07, "loss": 2.7982, "step": 248640 }, { "epoch": 10.710255416289787, "learning_rate": 3.977084186256976e-07, "loss": 2.8, "step": 248660 }, { "epoch": 10.711116854029376, "learning_rate": 3.9765993665021866e-07, "loss": 2.5638, "step": 248680 }, { "epoch": 10.711978291768963, "learning_rate": 3.9761145467473983e-07, "loss": 2.6755, "step": 248700 }, { "epoch": 10.71283972950855, "learning_rate": 3.9756297269926085e-07, "loss": 2.6702, "step": 248720 }, { "epoch": 10.713701167248137, "learning_rate": 3.9751449072378203e-07, "loss": 2.6553, "step": 248740 }, { "epoch": 10.714562604987725, "learning_rate": 3.974660087483031e-07, "loss": 2.8686, "step": 248760 }, { "epoch": 10.715424042727312, "learning_rate": 3.974175267728243e-07, "loss": 2.6178, "step": 248780 }, { "epoch": 10.7162854804669, "learning_rate": 3.973690447973453e-07, "loss": 2.7126, "step": 248800 }, { "epoch": 10.717146918206486, "learning_rate": 3.973205628218665e-07, "loss": 2.6747, "step": 248820 }, { "epoch": 10.718008355946074, "learning_rate": 3.972720808463875e-07, "loss": 2.6992, "step": 248840 }, { "epoch": 10.718869793685661, "learning_rate": 3.9722359887090867e-07, "loss": 2.5974, "step": 248860 }, { "epoch": 10.719731231425248, "learning_rate": 3.9717511689542974e-07, "loss": 2.5828, "step": 248880 }, { "epoch": 10.720592669164835, "learning_rate": 3.9712663491995087e-07, "loss": 2.6206, "step": 248900 }, { "epoch": 10.721454106904423, "learning_rate": 3.9707815294447194e-07, "loss": 2.5613, "step": 248920 }, { "epoch": 10.722315544644012, "learning_rate": 3.970296709689931e-07, "loss": 2.7067, "step": 248940 }, { "epoch": 10.723176982383599, "learning_rate": 3.969811889935142e-07, "loss": 2.6689, "step": 248960 }, { "epoch": 10.724038420123186, "learning_rate": 3.969327070180353e-07, "loss": 2.6738, "step": 248980 }, { "epoch": 10.724899857862773, "learning_rate": 3.968842250425564e-07, "loss": 2.6238, "step": 249000 }, { "epoch": 10.72576129560236, "learning_rate": 3.968357430670775e-07, "loss": 2.9424, "step": 249020 }, { "epoch": 10.726622733341948, "learning_rate": 3.967872610915986e-07, "loss": 2.8769, "step": 249040 }, { "epoch": 10.727484171081535, "learning_rate": 3.9673877911611976e-07, "loss": 2.7418, "step": 249060 }, { "epoch": 10.728345608821122, "learning_rate": 3.9669029714064083e-07, "loss": 2.6448, "step": 249080 }, { "epoch": 10.72920704656071, "learning_rate": 3.9664181516516185e-07, "loss": 2.7918, "step": 249100 }, { "epoch": 10.730068484300297, "learning_rate": 3.96593333189683e-07, "loss": 2.6833, "step": 249120 }, { "epoch": 10.730929922039884, "learning_rate": 3.965448512142041e-07, "loss": 2.7579, "step": 249140 }, { "epoch": 10.731791359779471, "learning_rate": 3.964963692387252e-07, "loss": 2.6477, "step": 249160 }, { "epoch": 10.732652797519059, "learning_rate": 3.964478872632463e-07, "loss": 2.8299, "step": 249180 }, { "epoch": 10.733514235258646, "learning_rate": 3.963994052877674e-07, "loss": 2.8103, "step": 249200 }, { "epoch": 10.734375672998233, "learning_rate": 3.963509233122885e-07, "loss": 2.8128, "step": 249220 }, { "epoch": 10.735237110737822, "learning_rate": 3.9630244133680966e-07, "loss": 2.5805, "step": 249240 }, { "epoch": 10.73609854847741, "learning_rate": 3.9625395936133074e-07, "loss": 2.7847, "step": 249260 }, { "epoch": 10.736959986216997, "learning_rate": 3.9620547738585186e-07, "loss": 2.6476, "step": 249280 }, { "epoch": 10.737821423956584, "learning_rate": 3.9615699541037293e-07, "loss": 2.559, "step": 249300 }, { "epoch": 10.738682861696171, "learning_rate": 3.961085134348941e-07, "loss": 2.6233, "step": 249320 }, { "epoch": 10.739544299435758, "learning_rate": 3.960600314594152e-07, "loss": 2.8206, "step": 249340 }, { "epoch": 10.740405737175346, "learning_rate": 3.960115494839363e-07, "loss": 2.7888, "step": 249360 }, { "epoch": 10.741267174914933, "learning_rate": 3.959630675084574e-07, "loss": 2.7177, "step": 249380 }, { "epoch": 10.74212861265452, "learning_rate": 3.9591458553297855e-07, "loss": 2.7815, "step": 249400 }, { "epoch": 10.742990050394107, "learning_rate": 3.9586610355749957e-07, "loss": 2.8109, "step": 249420 }, { "epoch": 10.743851488133695, "learning_rate": 3.9581762158202075e-07, "loss": 2.6889, "step": 249440 }, { "epoch": 10.744712925873282, "learning_rate": 3.957691396065418e-07, "loss": 2.7934, "step": 249460 }, { "epoch": 10.745574363612869, "learning_rate": 3.9572065763106295e-07, "loss": 2.6443, "step": 249480 }, { "epoch": 10.746435801352458, "learning_rate": 3.95672175655584e-07, "loss": 2.6251, "step": 249500 }, { "epoch": 10.747297239092045, "learning_rate": 3.956236936801052e-07, "loss": 2.783, "step": 249520 }, { "epoch": 10.748158676831633, "learning_rate": 3.955752117046262e-07, "loss": 2.6905, "step": 249540 }, { "epoch": 10.74902011457122, "learning_rate": 3.955267297291474e-07, "loss": 2.6168, "step": 249560 }, { "epoch": 10.749881552310807, "learning_rate": 3.9547824775366846e-07, "loss": 2.6612, "step": 249580 }, { "epoch": 10.750742990050394, "learning_rate": 3.954297657781896e-07, "loss": 2.7306, "step": 249600 }, { "epoch": 10.751604427789982, "learning_rate": 3.953812838027106e-07, "loss": 2.7064, "step": 249620 }, { "epoch": 10.752465865529569, "learning_rate": 3.9533280182723173e-07, "loss": 2.7647, "step": 249640 }, { "epoch": 10.753327303269156, "learning_rate": 3.9528431985175285e-07, "loss": 2.6554, "step": 249660 }, { "epoch": 10.754188741008743, "learning_rate": 3.952358378762739e-07, "loss": 2.7716, "step": 249680 }, { "epoch": 10.75505017874833, "learning_rate": 3.951873559007951e-07, "loss": 2.9324, "step": 249700 }, { "epoch": 10.755911616487918, "learning_rate": 3.951388739253162e-07, "loss": 2.5961, "step": 249720 }, { "epoch": 10.756773054227505, "learning_rate": 3.950903919498373e-07, "loss": 2.7217, "step": 249740 }, { "epoch": 10.757634491967092, "learning_rate": 3.9504190997435837e-07, "loss": 2.6498, "step": 249760 }, { "epoch": 10.75849592970668, "learning_rate": 3.9499342799887955e-07, "loss": 2.8038, "step": 249780 }, { "epoch": 10.759357367446269, "learning_rate": 3.9494494602340057e-07, "loss": 2.7005, "step": 249800 }, { "epoch": 10.760218805185856, "learning_rate": 3.9489646404792174e-07, "loss": 2.8195, "step": 249820 }, { "epoch": 10.761080242925443, "learning_rate": 3.948479820724428e-07, "loss": 2.6579, "step": 249840 }, { "epoch": 10.76194168066503, "learning_rate": 3.9479950009696394e-07, "loss": 2.7223, "step": 249860 }, { "epoch": 10.762803118404618, "learning_rate": 3.94751018121485e-07, "loss": 2.6784, "step": 249880 }, { "epoch": 10.763664556144205, "learning_rate": 3.947025361460062e-07, "loss": 2.6136, "step": 249900 }, { "epoch": 10.764525993883792, "learning_rate": 3.946540541705272e-07, "loss": 2.656, "step": 249920 }, { "epoch": 10.76538743162338, "learning_rate": 3.946055721950484e-07, "loss": 2.6751, "step": 249940 }, { "epoch": 10.766248869362967, "learning_rate": 3.9455709021956946e-07, "loss": 2.6143, "step": 249960 }, { "epoch": 10.767110307102554, "learning_rate": 3.945086082440906e-07, "loss": 2.6903, "step": 249980 }, { "epoch": 10.767971744842141, "learning_rate": 3.9446012626861165e-07, "loss": 2.6309, "step": 250000 }, { "epoch": 10.768833182581728, "learning_rate": 3.9441164429313283e-07, "loss": 2.7828, "step": 250020 }, { "epoch": 10.769694620321316, "learning_rate": 3.943631623176539e-07, "loss": 2.8303, "step": 250040 }, { "epoch": 10.770556058060905, "learning_rate": 3.94314680342175e-07, "loss": 2.6908, "step": 250060 }, { "epoch": 10.771417495800492, "learning_rate": 3.942661983666961e-07, "loss": 2.7573, "step": 250080 }, { "epoch": 10.772278933540079, "learning_rate": 3.9421771639121727e-07, "loss": 2.6386, "step": 250100 }, { "epoch": 10.773140371279666, "learning_rate": 3.941692344157383e-07, "loss": 2.6396, "step": 250120 }, { "epoch": 10.774001809019254, "learning_rate": 3.941207524402595e-07, "loss": 2.688, "step": 250140 }, { "epoch": 10.77486324675884, "learning_rate": 3.9407227046478054e-07, "loss": 2.7022, "step": 250160 }, { "epoch": 10.775724684498428, "learning_rate": 3.9402378848930156e-07, "loss": 2.5293, "step": 250180 }, { "epoch": 10.776586122238015, "learning_rate": 3.9397530651382274e-07, "loss": 2.5658, "step": 250200 }, { "epoch": 10.777447559977603, "learning_rate": 3.939268245383438e-07, "loss": 2.6185, "step": 250220 }, { "epoch": 10.77830899771719, "learning_rate": 3.9387834256286493e-07, "loss": 2.7524, "step": 250240 }, { "epoch": 10.779170435456777, "learning_rate": 3.93829860587386e-07, "loss": 2.7248, "step": 250260 }, { "epoch": 10.780031873196364, "learning_rate": 3.937813786119072e-07, "loss": 2.7396, "step": 250280 }, { "epoch": 10.780893310935951, "learning_rate": 3.937328966364282e-07, "loss": 2.7933, "step": 250300 }, { "epoch": 10.781754748675539, "learning_rate": 3.936844146609494e-07, "loss": 2.7059, "step": 250320 }, { "epoch": 10.782616186415126, "learning_rate": 3.9363593268547045e-07, "loss": 2.74, "step": 250340 }, { "epoch": 10.783477624154715, "learning_rate": 3.9358745070999157e-07, "loss": 2.7157, "step": 250360 }, { "epoch": 10.784339061894302, "learning_rate": 3.9353896873451264e-07, "loss": 2.8119, "step": 250380 }, { "epoch": 10.78520049963389, "learning_rate": 3.934904867590338e-07, "loss": 2.6462, "step": 250400 }, { "epoch": 10.786061937373477, "learning_rate": 3.934420047835549e-07, "loss": 2.6425, "step": 250420 }, { "epoch": 10.786923375113064, "learning_rate": 3.93393522808076e-07, "loss": 2.6152, "step": 250440 }, { "epoch": 10.787784812852651, "learning_rate": 3.933450408325971e-07, "loss": 2.6737, "step": 250460 }, { "epoch": 10.788646250592238, "learning_rate": 3.9329655885711827e-07, "loss": 2.6755, "step": 250480 }, { "epoch": 10.789507688331826, "learning_rate": 3.932480768816393e-07, "loss": 2.8558, "step": 250500 }, { "epoch": 10.790369126071413, "learning_rate": 3.9319959490616046e-07, "loss": 2.7448, "step": 250520 }, { "epoch": 10.791230563811, "learning_rate": 3.9315111293068153e-07, "loss": 2.6994, "step": 250540 }, { "epoch": 10.792092001550587, "learning_rate": 3.931026309552027e-07, "loss": 2.8555, "step": 250560 }, { "epoch": 10.792953439290175, "learning_rate": 3.9305414897972373e-07, "loss": 2.6691, "step": 250580 }, { "epoch": 10.793814877029762, "learning_rate": 3.930056670042449e-07, "loss": 2.7229, "step": 250600 }, { "epoch": 10.794676314769351, "learning_rate": 3.929571850287659e-07, "loss": 2.672, "step": 250620 }, { "epoch": 10.795537752508938, "learning_rate": 3.929087030532871e-07, "loss": 2.6419, "step": 250640 }, { "epoch": 10.796399190248525, "learning_rate": 3.928602210778082e-07, "loss": 2.6856, "step": 250660 }, { "epoch": 10.797260627988113, "learning_rate": 3.928117391023293e-07, "loss": 2.6191, "step": 250680 }, { "epoch": 10.7981220657277, "learning_rate": 3.9276325712685037e-07, "loss": 2.626, "step": 250700 }, { "epoch": 10.798983503467287, "learning_rate": 3.927147751513715e-07, "loss": 2.7637, "step": 250720 }, { "epoch": 10.799844941206874, "learning_rate": 3.926662931758926e-07, "loss": 2.7734, "step": 250740 }, { "epoch": 10.800706378946462, "learning_rate": 3.9261781120041364e-07, "loss": 2.8191, "step": 250760 }, { "epoch": 10.801567816686049, "learning_rate": 3.925693292249348e-07, "loss": 2.6136, "step": 250780 }, { "epoch": 10.802429254425636, "learning_rate": 3.925208472494559e-07, "loss": 2.6629, "step": 250800 }, { "epoch": 10.803290692165223, "learning_rate": 3.92472365273977e-07, "loss": 2.6255, "step": 250820 }, { "epoch": 10.80415212990481, "learning_rate": 3.924238832984981e-07, "loss": 2.5363, "step": 250840 }, { "epoch": 10.805013567644398, "learning_rate": 3.9237540132301926e-07, "loss": 2.7397, "step": 250860 }, { "epoch": 10.805875005383985, "learning_rate": 3.923269193475403e-07, "loss": 2.7255, "step": 250880 }, { "epoch": 10.806736443123572, "learning_rate": 3.9227843737206146e-07, "loss": 2.6742, "step": 250900 }, { "epoch": 10.807597880863161, "learning_rate": 3.9222995539658253e-07, "loss": 2.5995, "step": 250920 }, { "epoch": 10.808459318602749, "learning_rate": 3.9218147342110365e-07, "loss": 2.6999, "step": 250940 }, { "epoch": 10.809320756342336, "learning_rate": 3.921329914456247e-07, "loss": 2.5971, "step": 250960 }, { "epoch": 10.810182194081923, "learning_rate": 3.9208450947014585e-07, "loss": 2.8558, "step": 250980 }, { "epoch": 10.81104363182151, "learning_rate": 3.920360274946669e-07, "loss": 2.5188, "step": 251000 }, { "epoch": 10.811905069561098, "learning_rate": 3.919875455191881e-07, "loss": 2.6637, "step": 251020 }, { "epoch": 10.812766507300685, "learning_rate": 3.9193906354370917e-07, "loss": 2.6799, "step": 251040 }, { "epoch": 10.813627945040272, "learning_rate": 3.918905815682303e-07, "loss": 2.7201, "step": 251060 }, { "epoch": 10.81448938277986, "learning_rate": 3.9184209959275136e-07, "loss": 2.7163, "step": 251080 }, { "epoch": 10.815350820519447, "learning_rate": 3.9179361761727254e-07, "loss": 2.7782, "step": 251100 }, { "epoch": 10.816212258259034, "learning_rate": 3.917451356417936e-07, "loss": 2.6108, "step": 251120 }, { "epoch": 10.817073695998621, "learning_rate": 3.9169665366631474e-07, "loss": 2.7799, "step": 251140 }, { "epoch": 10.817935133738208, "learning_rate": 3.916481716908358e-07, "loss": 2.7628, "step": 251160 }, { "epoch": 10.818796571477797, "learning_rate": 3.91599689715357e-07, "loss": 2.6062, "step": 251180 }, { "epoch": 10.819658009217385, "learning_rate": 3.91551207739878e-07, "loss": 2.8156, "step": 251200 }, { "epoch": 10.820519446956972, "learning_rate": 3.915027257643992e-07, "loss": 2.7541, "step": 251220 }, { "epoch": 10.82138088469656, "learning_rate": 3.9145424378892025e-07, "loss": 2.7093, "step": 251240 }, { "epoch": 10.822242322436146, "learning_rate": 3.9140576181344127e-07, "loss": 2.7304, "step": 251260 }, { "epoch": 10.823103760175734, "learning_rate": 3.9135727983796245e-07, "loss": 2.7137, "step": 251280 }, { "epoch": 10.82396519791532, "learning_rate": 3.913087978624835e-07, "loss": 2.7261, "step": 251300 }, { "epoch": 10.824826635654908, "learning_rate": 3.9126031588700465e-07, "loss": 2.7054, "step": 251320 }, { "epoch": 10.825688073394495, "learning_rate": 3.912118339115257e-07, "loss": 2.783, "step": 251340 }, { "epoch": 10.826549511134083, "learning_rate": 3.911633519360469e-07, "loss": 2.7209, "step": 251360 }, { "epoch": 10.82741094887367, "learning_rate": 3.911148699605679e-07, "loss": 2.6814, "step": 251380 }, { "epoch": 10.828272386613257, "learning_rate": 3.9106638798508904e-07, "loss": 2.5566, "step": 251400 }, { "epoch": 10.829133824352844, "learning_rate": 3.9101790600961016e-07, "loss": 2.7102, "step": 251420 }, { "epoch": 10.829995262092432, "learning_rate": 3.909694240341313e-07, "loss": 2.625, "step": 251440 }, { "epoch": 10.830856699832019, "learning_rate": 3.9092094205865236e-07, "loss": 2.739, "step": 251460 }, { "epoch": 10.831718137571608, "learning_rate": 3.9087246008317354e-07, "loss": 2.7268, "step": 251480 }, { "epoch": 10.832579575311195, "learning_rate": 3.908239781076946e-07, "loss": 2.5722, "step": 251500 }, { "epoch": 10.833441013050782, "learning_rate": 3.9077549613221573e-07, "loss": 2.8868, "step": 251520 }, { "epoch": 10.83430245079037, "learning_rate": 3.907270141567368e-07, "loss": 2.6677, "step": 251540 }, { "epoch": 10.835163888529957, "learning_rate": 3.90678532181258e-07, "loss": 2.8517, "step": 251560 }, { "epoch": 10.836025326269544, "learning_rate": 3.90630050205779e-07, "loss": 2.587, "step": 251580 }, { "epoch": 10.836886764009131, "learning_rate": 3.905815682303002e-07, "loss": 2.752, "step": 251600 }, { "epoch": 10.837748201748719, "learning_rate": 3.9053308625482125e-07, "loss": 2.7607, "step": 251620 }, { "epoch": 10.838609639488306, "learning_rate": 3.9048460427934237e-07, "loss": 2.5829, "step": 251640 }, { "epoch": 10.839471077227893, "learning_rate": 3.9043612230386344e-07, "loss": 2.7927, "step": 251660 }, { "epoch": 10.84033251496748, "learning_rate": 3.903876403283846e-07, "loss": 2.798, "step": 251680 }, { "epoch": 10.841193952707068, "learning_rate": 3.9033915835290564e-07, "loss": 2.8178, "step": 251700 }, { "epoch": 10.842055390446655, "learning_rate": 3.902906763774268e-07, "loss": 2.7898, "step": 251720 }, { "epoch": 10.842916828186242, "learning_rate": 3.902421944019479e-07, "loss": 2.8232, "step": 251740 }, { "epoch": 10.843778265925831, "learning_rate": 3.90193712426469e-07, "loss": 2.8107, "step": 251760 }, { "epoch": 10.844639703665418, "learning_rate": 3.901452304509901e-07, "loss": 2.6956, "step": 251780 }, { "epoch": 10.845501141405006, "learning_rate": 3.9009674847551116e-07, "loss": 2.7122, "step": 251800 }, { "epoch": 10.846362579144593, "learning_rate": 3.9004826650003233e-07, "loss": 2.9492, "step": 251820 }, { "epoch": 10.84722401688418, "learning_rate": 3.8999978452455335e-07, "loss": 2.6983, "step": 251840 }, { "epoch": 10.848085454623767, "learning_rate": 3.8995130254907453e-07, "loss": 2.6794, "step": 251860 }, { "epoch": 10.848946892363355, "learning_rate": 3.899028205735956e-07, "loss": 2.6689, "step": 251880 }, { "epoch": 10.849808330102942, "learning_rate": 3.898543385981167e-07, "loss": 2.717, "step": 251900 }, { "epoch": 10.850669767842529, "learning_rate": 3.898058566226378e-07, "loss": 2.7677, "step": 251920 }, { "epoch": 10.851531205582116, "learning_rate": 3.89757374647159e-07, "loss": 2.7662, "step": 251940 }, { "epoch": 10.852392643321704, "learning_rate": 3.8970889267168e-07, "loss": 2.6663, "step": 251960 }, { "epoch": 10.85325408106129, "learning_rate": 3.8966041069620117e-07, "loss": 2.8586, "step": 251980 }, { "epoch": 10.854115518800878, "learning_rate": 3.8961192872072224e-07, "loss": 2.632, "step": 252000 }, { "epoch": 10.854976956540465, "learning_rate": 3.8956344674524337e-07, "loss": 2.6612, "step": 252020 }, { "epoch": 10.855838394280052, "learning_rate": 3.8951496476976444e-07, "loss": 2.8107, "step": 252040 }, { "epoch": 10.856699832019642, "learning_rate": 3.894664827942856e-07, "loss": 2.6739, "step": 252060 }, { "epoch": 10.857561269759229, "learning_rate": 3.8941800081880663e-07, "loss": 2.7777, "step": 252080 }, { "epoch": 10.858422707498816, "learning_rate": 3.893695188433278e-07, "loss": 2.7407, "step": 252100 }, { "epoch": 10.859284145238403, "learning_rate": 3.893210368678489e-07, "loss": 2.6793, "step": 252120 }, { "epoch": 10.86014558297799, "learning_rate": 3.8927255489237e-07, "loss": 2.5803, "step": 252140 }, { "epoch": 10.861007020717578, "learning_rate": 3.892240729168911e-07, "loss": 2.8207, "step": 252160 }, { "epoch": 10.861868458457165, "learning_rate": 3.8917559094141226e-07, "loss": 2.7134, "step": 252180 }, { "epoch": 10.862729896196752, "learning_rate": 3.8912710896593333e-07, "loss": 2.4867, "step": 252200 }, { "epoch": 10.86359133393634, "learning_rate": 3.8907862699045445e-07, "loss": 2.6984, "step": 252220 }, { "epoch": 10.864452771675927, "learning_rate": 3.890301450149755e-07, "loss": 2.7083, "step": 252240 }, { "epoch": 10.865314209415514, "learning_rate": 3.889816630394967e-07, "loss": 2.6565, "step": 252260 }, { "epoch": 10.866175647155101, "learning_rate": 3.889331810640177e-07, "loss": 2.6843, "step": 252280 }, { "epoch": 10.867037084894688, "learning_rate": 3.888846990885389e-07, "loss": 2.622, "step": 252300 }, { "epoch": 10.867898522634277, "learning_rate": 3.8883621711305997e-07, "loss": 2.6381, "step": 252320 }, { "epoch": 10.868759960373865, "learning_rate": 3.88787735137581e-07, "loss": 2.7306, "step": 252340 }, { "epoch": 10.869621398113452, "learning_rate": 3.8873925316210216e-07, "loss": 2.6979, "step": 252360 }, { "epoch": 10.87048283585304, "learning_rate": 3.8869077118662323e-07, "loss": 2.6787, "step": 252380 }, { "epoch": 10.871344273592626, "learning_rate": 3.8864228921114436e-07, "loss": 2.7172, "step": 252400 }, { "epoch": 10.872205711332214, "learning_rate": 3.8859380723566543e-07, "loss": 2.636, "step": 252420 }, { "epoch": 10.873067149071801, "learning_rate": 3.885453252601866e-07, "loss": 2.6099, "step": 252440 }, { "epoch": 10.873928586811388, "learning_rate": 3.884968432847077e-07, "loss": 2.6221, "step": 252460 }, { "epoch": 10.874790024550975, "learning_rate": 3.884483613092288e-07, "loss": 2.6044, "step": 252480 }, { "epoch": 10.875651462290563, "learning_rate": 3.8839987933374993e-07, "loss": 2.7876, "step": 252500 }, { "epoch": 10.87651290003015, "learning_rate": 3.8835139735827105e-07, "loss": 2.8552, "step": 252520 }, { "epoch": 10.877374337769737, "learning_rate": 3.8830291538279207e-07, "loss": 2.7441, "step": 252540 }, { "epoch": 10.878235775509324, "learning_rate": 3.8825443340731325e-07, "loss": 2.8744, "step": 252560 }, { "epoch": 10.879097213248912, "learning_rate": 3.882059514318343e-07, "loss": 2.5416, "step": 252580 }, { "epoch": 10.879958650988499, "learning_rate": 3.8815746945635544e-07, "loss": 2.6524, "step": 252600 }, { "epoch": 10.880820088728088, "learning_rate": 3.881089874808765e-07, "loss": 2.6658, "step": 252620 }, { "epoch": 10.881681526467675, "learning_rate": 3.880605055053977e-07, "loss": 2.6793, "step": 252640 }, { "epoch": 10.882542964207262, "learning_rate": 3.880120235299187e-07, "loss": 2.7381, "step": 252660 }, { "epoch": 10.88340440194685, "learning_rate": 3.879635415544399e-07, "loss": 2.6739, "step": 252680 }, { "epoch": 10.884265839686437, "learning_rate": 3.8791505957896096e-07, "loss": 2.7802, "step": 252700 }, { "epoch": 10.885127277426024, "learning_rate": 3.878665776034821e-07, "loss": 2.6218, "step": 252720 }, { "epoch": 10.885988715165611, "learning_rate": 3.8781809562800316e-07, "loss": 2.9297, "step": 252740 }, { "epoch": 10.886850152905199, "learning_rate": 3.877696136525243e-07, "loss": 2.6872, "step": 252760 }, { "epoch": 10.887711590644786, "learning_rate": 3.8772113167704535e-07, "loss": 2.768, "step": 252780 }, { "epoch": 10.888573028384373, "learning_rate": 3.8767264970156653e-07, "loss": 2.6201, "step": 252800 }, { "epoch": 10.88943446612396, "learning_rate": 3.876241677260876e-07, "loss": 2.7861, "step": 252820 }, { "epoch": 10.890295903863548, "learning_rate": 3.875756857506087e-07, "loss": 2.6204, "step": 252840 }, { "epoch": 10.891157341603135, "learning_rate": 3.875272037751298e-07, "loss": 2.6845, "step": 252860 }, { "epoch": 10.892018779342724, "learning_rate": 3.8747872179965087e-07, "loss": 2.8161, "step": 252880 }, { "epoch": 10.892880217082311, "learning_rate": 3.8743023982417205e-07, "loss": 2.5913, "step": 252900 }, { "epoch": 10.893741654821898, "learning_rate": 3.8738175784869307e-07, "loss": 2.7811, "step": 252920 }, { "epoch": 10.894603092561486, "learning_rate": 3.8733327587321424e-07, "loss": 2.5047, "step": 252940 }, { "epoch": 10.895464530301073, "learning_rate": 3.872847938977353e-07, "loss": 2.6822, "step": 252960 }, { "epoch": 10.89632596804066, "learning_rate": 3.8723631192225644e-07, "loss": 2.7796, "step": 252980 }, { "epoch": 10.897187405780247, "learning_rate": 3.871878299467775e-07, "loss": 2.7107, "step": 253000 }, { "epoch": 10.898048843519835, "learning_rate": 3.871393479712987e-07, "loss": 2.6998, "step": 253020 }, { "epoch": 10.898910281259422, "learning_rate": 3.870908659958197e-07, "loss": 2.5733, "step": 253040 }, { "epoch": 10.899771718999009, "learning_rate": 3.870423840203409e-07, "loss": 2.6319, "step": 253060 }, { "epoch": 10.900633156738596, "learning_rate": 3.8699390204486195e-07, "loss": 2.6217, "step": 253080 }, { "epoch": 10.901494594478184, "learning_rate": 3.869454200693831e-07, "loss": 2.7137, "step": 253100 }, { "epoch": 10.90235603221777, "learning_rate": 3.8689693809390415e-07, "loss": 2.6858, "step": 253120 }, { "epoch": 10.903217469957358, "learning_rate": 3.8684845611842533e-07, "loss": 2.5383, "step": 253140 }, { "epoch": 10.904078907696945, "learning_rate": 3.8679997414294635e-07, "loss": 2.6669, "step": 253160 }, { "epoch": 10.904940345436534, "learning_rate": 3.8675149216746747e-07, "loss": 2.7556, "step": 253180 }, { "epoch": 10.905801783176122, "learning_rate": 3.867030101919886e-07, "loss": 2.7454, "step": 253200 }, { "epoch": 10.906663220915709, "learning_rate": 3.866545282165097e-07, "loss": 2.6015, "step": 253220 }, { "epoch": 10.907524658655296, "learning_rate": 3.866060462410308e-07, "loss": 2.7307, "step": 253240 }, { "epoch": 10.908386096394883, "learning_rate": 3.8655756426555197e-07, "loss": 2.7609, "step": 253260 }, { "epoch": 10.90924753413447, "learning_rate": 3.8650908229007304e-07, "loss": 2.6122, "step": 253280 }, { "epoch": 10.910108971874058, "learning_rate": 3.8646060031459416e-07, "loss": 2.6757, "step": 253300 }, { "epoch": 10.910970409613645, "learning_rate": 3.8641211833911524e-07, "loss": 2.6429, "step": 253320 }, { "epoch": 10.911831847353232, "learning_rate": 3.863636363636364e-07, "loss": 2.7088, "step": 253340 }, { "epoch": 10.91269328509282, "learning_rate": 3.8631515438815743e-07, "loss": 2.628, "step": 253360 }, { "epoch": 10.913554722832407, "learning_rate": 3.862666724126786e-07, "loss": 2.842, "step": 253380 }, { "epoch": 10.914416160571994, "learning_rate": 3.862181904371997e-07, "loss": 2.764, "step": 253400 }, { "epoch": 10.915277598311581, "learning_rate": 3.861697084617207e-07, "loss": 2.6701, "step": 253420 }, { "epoch": 10.91613903605117, "learning_rate": 3.861212264862419e-07, "loss": 2.7695, "step": 253440 }, { "epoch": 10.917000473790758, "learning_rate": 3.8607274451076295e-07, "loss": 2.8627, "step": 253460 }, { "epoch": 10.917861911530345, "learning_rate": 3.8602426253528407e-07, "loss": 2.904, "step": 253480 }, { "epoch": 10.918723349269932, "learning_rate": 3.8597578055980514e-07, "loss": 2.7059, "step": 253500 }, { "epoch": 10.91958478700952, "learning_rate": 3.859272985843263e-07, "loss": 2.782, "step": 253520 }, { "epoch": 10.920446224749107, "learning_rate": 3.858788166088474e-07, "loss": 2.6654, "step": 253540 }, { "epoch": 10.921307662488694, "learning_rate": 3.858303346333685e-07, "loss": 2.8532, "step": 253560 }, { "epoch": 10.922169100228281, "learning_rate": 3.857818526578896e-07, "loss": 2.6286, "step": 253580 }, { "epoch": 10.923030537967868, "learning_rate": 3.8573337068241077e-07, "loss": 2.7574, "step": 253600 }, { "epoch": 10.923891975707456, "learning_rate": 3.856848887069318e-07, "loss": 2.4667, "step": 253620 }, { "epoch": 10.924753413447043, "learning_rate": 3.8563640673145296e-07, "loss": 2.7814, "step": 253640 }, { "epoch": 10.92561485118663, "learning_rate": 3.8558792475597403e-07, "loss": 2.8162, "step": 253660 }, { "epoch": 10.926476288926217, "learning_rate": 3.8553944278049516e-07, "loss": 2.5512, "step": 253680 }, { "epoch": 10.927337726665804, "learning_rate": 3.8549096080501623e-07, "loss": 2.7637, "step": 253700 }, { "epoch": 10.928199164405392, "learning_rate": 3.854424788295374e-07, "loss": 2.6994, "step": 253720 }, { "epoch": 10.92906060214498, "learning_rate": 3.853939968540584e-07, "loss": 2.7207, "step": 253740 }, { "epoch": 10.929922039884568, "learning_rate": 3.853455148785796e-07, "loss": 2.6725, "step": 253760 }, { "epoch": 10.930783477624155, "learning_rate": 3.852970329031007e-07, "loss": 2.6883, "step": 253780 }, { "epoch": 10.931644915363742, "learning_rate": 3.852485509276218e-07, "loss": 2.5921, "step": 253800 }, { "epoch": 10.93250635310333, "learning_rate": 3.8520006895214287e-07, "loss": 2.684, "step": 253820 }, { "epoch": 10.933367790842917, "learning_rate": 3.8515158697666405e-07, "loss": 2.6772, "step": 253840 }, { "epoch": 10.934229228582504, "learning_rate": 3.8510310500118507e-07, "loss": 2.8591, "step": 253860 }, { "epoch": 10.935090666322091, "learning_rate": 3.8505462302570624e-07, "loss": 2.7687, "step": 253880 }, { "epoch": 10.935952104061679, "learning_rate": 3.850061410502273e-07, "loss": 2.6965, "step": 253900 }, { "epoch": 10.936813541801266, "learning_rate": 3.8495765907474844e-07, "loss": 2.8039, "step": 253920 }, { "epoch": 10.937674979540853, "learning_rate": 3.849091770992695e-07, "loss": 2.7694, "step": 253940 }, { "epoch": 10.93853641728044, "learning_rate": 3.848606951237906e-07, "loss": 2.7452, "step": 253960 }, { "epoch": 10.939397855020028, "learning_rate": 3.8481221314831176e-07, "loss": 2.4987, "step": 253980 }, { "epoch": 10.940259292759617, "learning_rate": 3.847637311728328e-07, "loss": 2.5379, "step": 254000 }, { "epoch": 10.941120730499204, "learning_rate": 3.8471524919735396e-07, "loss": 2.8784, "step": 254020 }, { "epoch": 10.941982168238791, "learning_rate": 3.8466676722187503e-07, "loss": 2.7421, "step": 254040 }, { "epoch": 10.942843605978378, "learning_rate": 3.8461828524639615e-07, "loss": 2.7414, "step": 254060 }, { "epoch": 10.943705043717966, "learning_rate": 3.845698032709172e-07, "loss": 2.8405, "step": 254080 }, { "epoch": 10.944566481457553, "learning_rate": 3.845213212954384e-07, "loss": 2.7445, "step": 254100 }, { "epoch": 10.94542791919714, "learning_rate": 3.844728393199594e-07, "loss": 2.8707, "step": 254120 }, { "epoch": 10.946289356936727, "learning_rate": 3.844243573444806e-07, "loss": 2.7064, "step": 254140 }, { "epoch": 10.947150794676315, "learning_rate": 3.8437587536900167e-07, "loss": 2.6081, "step": 254160 }, { "epoch": 10.948012232415902, "learning_rate": 3.843273933935228e-07, "loss": 2.8442, "step": 254180 }, { "epoch": 10.94887367015549, "learning_rate": 3.8427891141804386e-07, "loss": 2.7482, "step": 254200 }, { "epoch": 10.949735107895076, "learning_rate": 3.8423042944256504e-07, "loss": 2.7234, "step": 254220 }, { "epoch": 10.950596545634664, "learning_rate": 3.841819474670861e-07, "loss": 2.7027, "step": 254240 }, { "epoch": 10.951457983374251, "learning_rate": 3.8413346549160724e-07, "loss": 2.8861, "step": 254260 }, { "epoch": 10.952319421113838, "learning_rate": 3.8408498351612836e-07, "loss": 2.6787, "step": 254280 }, { "epoch": 10.953180858853427, "learning_rate": 3.840365015406495e-07, "loss": 2.6898, "step": 254300 }, { "epoch": 10.954042296593014, "learning_rate": 3.839880195651705e-07, "loss": 2.6418, "step": 254320 }, { "epoch": 10.954903734332602, "learning_rate": 3.839395375896917e-07, "loss": 2.6845, "step": 254340 }, { "epoch": 10.955765172072189, "learning_rate": 3.8389105561421275e-07, "loss": 2.6792, "step": 254360 }, { "epoch": 10.956626609811776, "learning_rate": 3.838425736387339e-07, "loss": 2.7364, "step": 254380 }, { "epoch": 10.957488047551363, "learning_rate": 3.8379409166325495e-07, "loss": 2.6681, "step": 254400 }, { "epoch": 10.95834948529095, "learning_rate": 3.8374560968777613e-07, "loss": 2.7149, "step": 254420 }, { "epoch": 10.959210923030538, "learning_rate": 3.8369712771229715e-07, "loss": 2.7263, "step": 254440 }, { "epoch": 10.960072360770125, "learning_rate": 3.836486457368183e-07, "loss": 2.6764, "step": 254460 }, { "epoch": 10.960933798509712, "learning_rate": 3.836001637613394e-07, "loss": 2.803, "step": 254480 }, { "epoch": 10.9617952362493, "learning_rate": 3.835516817858604e-07, "loss": 2.7733, "step": 254500 }, { "epoch": 10.962656673988887, "learning_rate": 3.835031998103816e-07, "loss": 2.6544, "step": 254520 }, { "epoch": 10.963518111728474, "learning_rate": 3.8345471783490266e-07, "loss": 2.836, "step": 254540 }, { "epoch": 10.964379549468061, "learning_rate": 3.834062358594238e-07, "loss": 2.7806, "step": 254560 }, { "epoch": 10.96524098720765, "learning_rate": 3.8335775388394486e-07, "loss": 2.7177, "step": 254580 }, { "epoch": 10.966102424947238, "learning_rate": 3.8330927190846604e-07, "loss": 2.6857, "step": 254600 }, { "epoch": 10.966963862686825, "learning_rate": 3.832607899329871e-07, "loss": 2.9224, "step": 254620 }, { "epoch": 10.967825300426412, "learning_rate": 3.8321230795750823e-07, "loss": 2.6148, "step": 254640 }, { "epoch": 10.968686738166, "learning_rate": 3.831638259820293e-07, "loss": 2.7, "step": 254660 }, { "epoch": 10.969548175905587, "learning_rate": 3.831153440065505e-07, "loss": 2.7589, "step": 254680 }, { "epoch": 10.970409613645174, "learning_rate": 3.830668620310715e-07, "loss": 2.7418, "step": 254700 }, { "epoch": 10.971271051384761, "learning_rate": 3.830183800555927e-07, "loss": 2.5423, "step": 254720 }, { "epoch": 10.972132489124348, "learning_rate": 3.8296989808011375e-07, "loss": 2.7039, "step": 254740 }, { "epoch": 10.972993926863936, "learning_rate": 3.8292141610463487e-07, "loss": 2.8299, "step": 254760 }, { "epoch": 10.973855364603523, "learning_rate": 3.8287293412915594e-07, "loss": 2.6977, "step": 254780 }, { "epoch": 10.97471680234311, "learning_rate": 3.828244521536771e-07, "loss": 2.6704, "step": 254800 }, { "epoch": 10.975578240082697, "learning_rate": 3.8277597017819814e-07, "loss": 2.6605, "step": 254820 }, { "epoch": 10.976439677822285, "learning_rate": 3.827274882027193e-07, "loss": 2.5926, "step": 254840 }, { "epoch": 10.977301115561872, "learning_rate": 3.826790062272404e-07, "loss": 2.6429, "step": 254860 }, { "epoch": 10.97816255330146, "learning_rate": 3.826305242517615e-07, "loss": 2.55, "step": 254880 }, { "epoch": 10.979023991041048, "learning_rate": 3.825820422762826e-07, "loss": 2.7065, "step": 254900 }, { "epoch": 10.979885428780635, "learning_rate": 3.8253356030080376e-07, "loss": 2.6166, "step": 254920 }, { "epoch": 10.980746866520223, "learning_rate": 3.824850783253248e-07, "loss": 2.7052, "step": 254940 }, { "epoch": 10.98160830425981, "learning_rate": 3.824365963498459e-07, "loss": 2.9047, "step": 254960 }, { "epoch": 10.982469741999397, "learning_rate": 3.8238811437436703e-07, "loss": 2.6298, "step": 254980 }, { "epoch": 10.983331179738984, "learning_rate": 3.8233963239888815e-07, "loss": 2.7228, "step": 255000 }, { "epoch": 10.984192617478572, "learning_rate": 3.822911504234092e-07, "loss": 2.809, "step": 255020 }, { "epoch": 10.985054055218159, "learning_rate": 3.822426684479303e-07, "loss": 2.7017, "step": 255040 }, { "epoch": 10.985915492957746, "learning_rate": 3.821941864724515e-07, "loss": 2.7488, "step": 255060 }, { "epoch": 10.986776930697333, "learning_rate": 3.821457044969725e-07, "loss": 2.7029, "step": 255080 }, { "epoch": 10.98763836843692, "learning_rate": 3.8209722252149367e-07, "loss": 2.5678, "step": 255100 }, { "epoch": 10.988499806176508, "learning_rate": 3.820487405460147e-07, "loss": 2.6809, "step": 255120 }, { "epoch": 10.989361243916097, "learning_rate": 3.8200025857053587e-07, "loss": 2.7815, "step": 255140 }, { "epoch": 10.990222681655684, "learning_rate": 3.8195177659505694e-07, "loss": 2.5145, "step": 255160 }, { "epoch": 10.991084119395271, "learning_rate": 3.819032946195781e-07, "loss": 2.6788, "step": 255180 }, { "epoch": 10.991945557134859, "learning_rate": 3.8185481264409913e-07, "loss": 2.9221, "step": 255200 }, { "epoch": 10.992806994874446, "learning_rate": 3.818063306686203e-07, "loss": 2.7145, "step": 255220 }, { "epoch": 10.993668432614033, "learning_rate": 3.817578486931414e-07, "loss": 2.6509, "step": 255240 }, { "epoch": 10.99452987035362, "learning_rate": 3.817093667176625e-07, "loss": 2.7283, "step": 255260 }, { "epoch": 10.995391308093208, "learning_rate": 3.816608847421836e-07, "loss": 2.6823, "step": 255280 }, { "epoch": 10.996252745832795, "learning_rate": 3.8161240276670475e-07, "loss": 2.7784, "step": 255300 }, { "epoch": 10.997114183572382, "learning_rate": 3.815639207912258e-07, "loss": 2.8586, "step": 255320 }, { "epoch": 10.99797562131197, "learning_rate": 3.8151543881574695e-07, "loss": 2.7214, "step": 255340 }, { "epoch": 10.998837059051557, "learning_rate": 3.81466956840268e-07, "loss": 2.7323, "step": 255360 }, { "epoch": 10.999698496791144, "learning_rate": 3.814184748647892e-07, "loss": 2.657, "step": 255380 }, { "epoch": 11.000559934530731, "learning_rate": 3.813699928893102e-07, "loss": 2.6821, "step": 255400 }, { "epoch": 11.00142137227032, "learning_rate": 3.813215109138314e-07, "loss": 2.6677, "step": 255420 }, { "epoch": 11.002282810009907, "learning_rate": 3.8127302893835247e-07, "loss": 2.6476, "step": 255440 }, { "epoch": 11.003144247749495, "learning_rate": 3.812245469628736e-07, "loss": 2.6695, "step": 255460 }, { "epoch": 11.004005685489082, "learning_rate": 3.8117606498739466e-07, "loss": 2.5307, "step": 255480 }, { "epoch": 11.004867123228669, "learning_rate": 3.8112758301191584e-07, "loss": 2.7569, "step": 255500 }, { "epoch": 11.005728560968256, "learning_rate": 3.8107910103643686e-07, "loss": 2.5528, "step": 255520 }, { "epoch": 11.006589998707843, "learning_rate": 3.8103061906095804e-07, "loss": 2.5802, "step": 255540 }, { "epoch": 11.00745143644743, "learning_rate": 3.809821370854791e-07, "loss": 2.6102, "step": 255560 }, { "epoch": 11.008312874187018, "learning_rate": 3.809336551100001e-07, "loss": 2.7395, "step": 255580 }, { "epoch": 11.009174311926605, "learning_rate": 3.808851731345213e-07, "loss": 2.7554, "step": 255600 }, { "epoch": 11.010035749666192, "learning_rate": 3.808366911590424e-07, "loss": 2.8473, "step": 255620 }, { "epoch": 11.01089718740578, "learning_rate": 3.807882091835635e-07, "loss": 2.6387, "step": 255640 }, { "epoch": 11.011758625145367, "learning_rate": 3.8073972720808457e-07, "loss": 2.6696, "step": 255660 }, { "epoch": 11.012620062884954, "learning_rate": 3.8069124523260575e-07, "loss": 2.6337, "step": 255680 }, { "epoch": 11.013481500624543, "learning_rate": 3.806427632571268e-07, "loss": 2.7248, "step": 255700 }, { "epoch": 11.01434293836413, "learning_rate": 3.8059428128164794e-07, "loss": 2.6919, "step": 255720 }, { "epoch": 11.015204376103718, "learning_rate": 3.80545799306169e-07, "loss": 2.5793, "step": 255740 }, { "epoch": 11.016065813843305, "learning_rate": 3.804973173306902e-07, "loss": 2.618, "step": 255760 }, { "epoch": 11.016927251582892, "learning_rate": 3.804488353552112e-07, "loss": 2.5663, "step": 255780 }, { "epoch": 11.01778868932248, "learning_rate": 3.804003533797324e-07, "loss": 2.6556, "step": 255800 }, { "epoch": 11.018650127062067, "learning_rate": 3.8035187140425346e-07, "loss": 2.6463, "step": 255820 }, { "epoch": 11.019511564801654, "learning_rate": 3.803033894287746e-07, "loss": 2.678, "step": 255840 }, { "epoch": 11.020373002541241, "learning_rate": 3.8025490745329566e-07, "loss": 2.8039, "step": 255860 }, { "epoch": 11.021234440280828, "learning_rate": 3.8020642547781683e-07, "loss": 2.7517, "step": 255880 }, { "epoch": 11.022095878020416, "learning_rate": 3.8015794350233785e-07, "loss": 2.6259, "step": 255900 }, { "epoch": 11.022957315760003, "learning_rate": 3.8010946152685903e-07, "loss": 2.6548, "step": 255920 }, { "epoch": 11.02381875349959, "learning_rate": 3.800609795513801e-07, "loss": 2.6512, "step": 255940 }, { "epoch": 11.024680191239177, "learning_rate": 3.800124975759012e-07, "loss": 2.5655, "step": 255960 }, { "epoch": 11.025541628978766, "learning_rate": 3.799640156004223e-07, "loss": 2.7611, "step": 255980 }, { "epoch": 11.026403066718354, "learning_rate": 3.799155336249435e-07, "loss": 2.5456, "step": 256000 }, { "epoch": 11.027264504457941, "learning_rate": 3.7986705164946455e-07, "loss": 2.7182, "step": 256020 }, { "epoch": 11.028125942197528, "learning_rate": 3.7981856967398567e-07, "loss": 2.7277, "step": 256040 }, { "epoch": 11.028987379937115, "learning_rate": 3.797700876985068e-07, "loss": 2.8762, "step": 256060 }, { "epoch": 11.029848817676703, "learning_rate": 3.797216057230279e-07, "loss": 2.6487, "step": 256080 }, { "epoch": 11.03071025541629, "learning_rate": 3.7967312374754894e-07, "loss": 2.7623, "step": 256100 }, { "epoch": 11.031571693155877, "learning_rate": 3.7962464177207e-07, "loss": 2.7161, "step": 256120 }, { "epoch": 11.032433130895464, "learning_rate": 3.795761597965912e-07, "loss": 2.7695, "step": 256140 }, { "epoch": 11.033294568635052, "learning_rate": 3.795276778211122e-07, "loss": 2.6285, "step": 256160 }, { "epoch": 11.034156006374639, "learning_rate": 3.794791958456334e-07, "loss": 2.6789, "step": 256180 }, { "epoch": 11.035017444114226, "learning_rate": 3.7943071387015445e-07, "loss": 2.7008, "step": 256200 }, { "epoch": 11.035878881853813, "learning_rate": 3.793822318946756e-07, "loss": 2.9918, "step": 256220 }, { "epoch": 11.0367403195934, "learning_rate": 3.7933374991919665e-07, "loss": 2.6468, "step": 256240 }, { "epoch": 11.03760175733299, "learning_rate": 3.7928526794371783e-07, "loss": 2.7715, "step": 256260 }, { "epoch": 11.038463195072577, "learning_rate": 3.7923678596823885e-07, "loss": 2.6992, "step": 256280 }, { "epoch": 11.039324632812164, "learning_rate": 3.7918830399276e-07, "loss": 2.7435, "step": 256300 }, { "epoch": 11.040186070551751, "learning_rate": 3.791398220172811e-07, "loss": 2.7368, "step": 256320 }, { "epoch": 11.041047508291339, "learning_rate": 3.790913400418022e-07, "loss": 2.7158, "step": 256340 }, { "epoch": 11.041908946030926, "learning_rate": 3.790428580663233e-07, "loss": 2.8105, "step": 256360 }, { "epoch": 11.042770383770513, "learning_rate": 3.7899437609084447e-07, "loss": 2.7244, "step": 256380 }, { "epoch": 11.0436318215101, "learning_rate": 3.7894589411536554e-07, "loss": 2.6854, "step": 256400 }, { "epoch": 11.044493259249688, "learning_rate": 3.7889741213988666e-07, "loss": 2.865, "step": 256420 }, { "epoch": 11.045354696989275, "learning_rate": 3.7884893016440774e-07, "loss": 2.7896, "step": 256440 }, { "epoch": 11.046216134728862, "learning_rate": 3.788004481889289e-07, "loss": 2.6578, "step": 256460 }, { "epoch": 11.04707757246845, "learning_rate": 3.7875196621344993e-07, "loss": 2.6847, "step": 256480 }, { "epoch": 11.047939010208037, "learning_rate": 3.787034842379711e-07, "loss": 2.6792, "step": 256500 }, { "epoch": 11.048800447947624, "learning_rate": 3.786550022624922e-07, "loss": 2.8267, "step": 256520 }, { "epoch": 11.049661885687211, "learning_rate": 3.786065202870133e-07, "loss": 2.8887, "step": 256540 }, { "epoch": 11.0505233234268, "learning_rate": 3.785580383115344e-07, "loss": 2.7591, "step": 256560 }, { "epoch": 11.051384761166387, "learning_rate": 3.7850955633605555e-07, "loss": 2.724, "step": 256580 }, { "epoch": 11.052246198905975, "learning_rate": 3.7846107436057657e-07, "loss": 2.7337, "step": 256600 }, { "epoch": 11.053107636645562, "learning_rate": 3.7841259238509775e-07, "loss": 2.7163, "step": 256620 }, { "epoch": 11.053969074385149, "learning_rate": 3.783641104096188e-07, "loss": 2.606, "step": 256640 }, { "epoch": 11.054830512124736, "learning_rate": 3.7831562843413984e-07, "loss": 2.6659, "step": 256660 }, { "epoch": 11.055691949864324, "learning_rate": 3.78267146458661e-07, "loss": 2.6612, "step": 256680 }, { "epoch": 11.05655338760391, "learning_rate": 3.782186644831821e-07, "loss": 2.6571, "step": 256700 }, { "epoch": 11.057414825343498, "learning_rate": 3.781701825077032e-07, "loss": 2.7935, "step": 256720 }, { "epoch": 11.058276263083085, "learning_rate": 3.781217005322243e-07, "loss": 2.7118, "step": 256740 }, { "epoch": 11.059137700822673, "learning_rate": 3.7807321855674546e-07, "loss": 2.6495, "step": 256760 }, { "epoch": 11.05999913856226, "learning_rate": 3.7802473658126653e-07, "loss": 2.8208, "step": 256780 }, { "epoch": 11.060860576301847, "learning_rate": 3.7797625460578766e-07, "loss": 2.7967, "step": 256800 }, { "epoch": 11.061722014041434, "learning_rate": 3.7792777263030873e-07, "loss": 2.7592, "step": 256820 }, { "epoch": 11.062583451781023, "learning_rate": 3.778792906548299e-07, "loss": 2.7064, "step": 256840 }, { "epoch": 11.06344488952061, "learning_rate": 3.778308086793509e-07, "loss": 2.6757, "step": 256860 }, { "epoch": 11.064306327260198, "learning_rate": 3.777823267038721e-07, "loss": 2.7191, "step": 256880 }, { "epoch": 11.065167764999785, "learning_rate": 3.777338447283931e-07, "loss": 2.7623, "step": 256900 }, { "epoch": 11.066029202739372, "learning_rate": 3.776853627529143e-07, "loss": 2.6378, "step": 256920 }, { "epoch": 11.06689064047896, "learning_rate": 3.7763688077743537e-07, "loss": 2.7084, "step": 256940 }, { "epoch": 11.067752078218547, "learning_rate": 3.7758839880195655e-07, "loss": 2.6558, "step": 256960 }, { "epoch": 11.068613515958134, "learning_rate": 3.7753991682647757e-07, "loss": 2.716, "step": 256980 }, { "epoch": 11.069474953697721, "learning_rate": 3.7749143485099874e-07, "loss": 2.7302, "step": 257000 }, { "epoch": 11.070336391437309, "learning_rate": 3.774429528755198e-07, "loss": 2.7268, "step": 257020 }, { "epoch": 11.071197829176896, "learning_rate": 3.7739447090004094e-07, "loss": 2.6044, "step": 257040 }, { "epoch": 11.072059266916483, "learning_rate": 3.77345988924562e-07, "loss": 2.6666, "step": 257060 }, { "epoch": 11.07292070465607, "learning_rate": 3.772975069490832e-07, "loss": 2.6669, "step": 257080 }, { "epoch": 11.073782142395658, "learning_rate": 3.7724902497360426e-07, "loss": 2.616, "step": 257100 }, { "epoch": 11.074643580135247, "learning_rate": 3.772005429981254e-07, "loss": 2.65, "step": 257120 }, { "epoch": 11.075505017874834, "learning_rate": 3.7715206102264646e-07, "loss": 2.6499, "step": 257140 }, { "epoch": 11.076366455614421, "learning_rate": 3.7710357904716753e-07, "loss": 2.6326, "step": 257160 }, { "epoch": 11.077227893354008, "learning_rate": 3.7705509707168865e-07, "loss": 2.7269, "step": 257180 }, { "epoch": 11.078089331093596, "learning_rate": 3.770066150962097e-07, "loss": 2.6456, "step": 257200 }, { "epoch": 11.078950768833183, "learning_rate": 3.769581331207309e-07, "loss": 2.6365, "step": 257220 }, { "epoch": 11.07981220657277, "learning_rate": 3.769096511452519e-07, "loss": 2.647, "step": 257240 }, { "epoch": 11.080673644312357, "learning_rate": 3.768611691697731e-07, "loss": 2.6643, "step": 257260 }, { "epoch": 11.081535082051944, "learning_rate": 3.7681268719429417e-07, "loss": 2.6601, "step": 257280 }, { "epoch": 11.082396519791532, "learning_rate": 3.767642052188153e-07, "loss": 2.6847, "step": 257300 }, { "epoch": 11.083257957531119, "learning_rate": 3.767157232433363e-07, "loss": 2.6581, "step": 257320 }, { "epoch": 11.084119395270706, "learning_rate": 3.7666724126785754e-07, "loss": 2.7497, "step": 257340 }, { "epoch": 11.084980833010293, "learning_rate": 3.7661875929237856e-07, "loss": 2.7313, "step": 257360 }, { "epoch": 11.08584227074988, "learning_rate": 3.7657027731689974e-07, "loss": 2.6247, "step": 257380 }, { "epoch": 11.08670370848947, "learning_rate": 3.765217953414208e-07, "loss": 2.6595, "step": 257400 }, { "epoch": 11.087565146229057, "learning_rate": 3.7647331336594193e-07, "loss": 2.6793, "step": 257420 }, { "epoch": 11.088426583968644, "learning_rate": 3.76424831390463e-07, "loss": 2.8569, "step": 257440 }, { "epoch": 11.089288021708231, "learning_rate": 3.763763494149842e-07, "loss": 2.6576, "step": 257460 }, { "epoch": 11.090149459447819, "learning_rate": 3.7632786743950525e-07, "loss": 2.7709, "step": 257480 }, { "epoch": 11.091010897187406, "learning_rate": 3.762793854640264e-07, "loss": 2.7947, "step": 257500 }, { "epoch": 11.091872334926993, "learning_rate": 3.7623090348854745e-07, "loss": 2.626, "step": 257520 }, { "epoch": 11.09273377266658, "learning_rate": 3.7618242151306863e-07, "loss": 2.635, "step": 257540 }, { "epoch": 11.093595210406168, "learning_rate": 3.7613393953758965e-07, "loss": 2.7806, "step": 257560 }, { "epoch": 11.094456648145755, "learning_rate": 3.760854575621108e-07, "loss": 2.6777, "step": 257580 }, { "epoch": 11.095318085885342, "learning_rate": 3.760369755866319e-07, "loss": 2.8047, "step": 257600 }, { "epoch": 11.09617952362493, "learning_rate": 3.75988493611153e-07, "loss": 2.7178, "step": 257620 }, { "epoch": 11.097040961364517, "learning_rate": 3.759400116356741e-07, "loss": 2.6654, "step": 257640 }, { "epoch": 11.097902399104104, "learning_rate": 3.7589152966019527e-07, "loss": 2.7061, "step": 257660 }, { "epoch": 11.098763836843693, "learning_rate": 3.758430476847163e-07, "loss": 2.6194, "step": 257680 }, { "epoch": 11.09962527458328, "learning_rate": 3.7579456570923736e-07, "loss": 2.7663, "step": 257700 }, { "epoch": 11.100486712322867, "learning_rate": 3.7574608373375853e-07, "loss": 2.6972, "step": 257720 }, { "epoch": 11.101348150062455, "learning_rate": 3.756976017582796e-07, "loss": 2.7006, "step": 257740 }, { "epoch": 11.102209587802042, "learning_rate": 3.7564911978280073e-07, "loss": 2.8124, "step": 257760 }, { "epoch": 11.10307102554163, "learning_rate": 3.756006378073218e-07, "loss": 2.6189, "step": 257780 }, { "epoch": 11.103932463281216, "learning_rate": 3.75552155831843e-07, "loss": 2.553, "step": 257800 }, { "epoch": 11.104793901020804, "learning_rate": 3.75503673856364e-07, "loss": 2.7419, "step": 257820 }, { "epoch": 11.105655338760391, "learning_rate": 3.7545519188088523e-07, "loss": 2.5952, "step": 257840 }, { "epoch": 11.106516776499978, "learning_rate": 3.7540670990540625e-07, "loss": 2.656, "step": 257860 }, { "epoch": 11.107378214239565, "learning_rate": 3.7535822792992737e-07, "loss": 2.6258, "step": 257880 }, { "epoch": 11.108239651979153, "learning_rate": 3.7530974595444844e-07, "loss": 2.6638, "step": 257900 }, { "epoch": 11.10910108971874, "learning_rate": 3.752612639789696e-07, "loss": 2.5885, "step": 257920 }, { "epoch": 11.109962527458327, "learning_rate": 3.7521278200349064e-07, "loss": 2.5551, "step": 257940 }, { "epoch": 11.110823965197916, "learning_rate": 3.751643000280118e-07, "loss": 2.5582, "step": 257960 }, { "epoch": 11.111685402937503, "learning_rate": 3.751158180525329e-07, "loss": 2.717, "step": 257980 }, { "epoch": 11.11254684067709, "learning_rate": 3.75067336077054e-07, "loss": 2.8434, "step": 258000 }, { "epoch": 11.113408278416678, "learning_rate": 3.750188541015751e-07, "loss": 2.6309, "step": 258020 }, { "epoch": 11.114269716156265, "learning_rate": 3.7497037212609626e-07, "loss": 2.637, "step": 258040 }, { "epoch": 11.115131153895852, "learning_rate": 3.749218901506173e-07, "loss": 2.6483, "step": 258060 }, { "epoch": 11.11599259163544, "learning_rate": 3.7487340817513846e-07, "loss": 2.6086, "step": 258080 }, { "epoch": 11.116854029375027, "learning_rate": 3.7482492619965953e-07, "loss": 2.8124, "step": 258100 }, { "epoch": 11.117715467114614, "learning_rate": 3.7477644422418065e-07, "loss": 2.6673, "step": 258120 }, { "epoch": 11.118576904854201, "learning_rate": 3.747279622487017e-07, "loss": 2.8104, "step": 258140 }, { "epoch": 11.119438342593789, "learning_rate": 3.746794802732229e-07, "loss": 2.6798, "step": 258160 }, { "epoch": 11.120299780333376, "learning_rate": 3.7463099829774397e-07, "loss": 2.6891, "step": 258180 }, { "epoch": 11.121161218072963, "learning_rate": 3.745825163222651e-07, "loss": 2.7712, "step": 258200 }, { "epoch": 11.12202265581255, "learning_rate": 3.7453403434678617e-07, "loss": 2.6227, "step": 258220 }, { "epoch": 11.12288409355214, "learning_rate": 3.7448555237130724e-07, "loss": 2.6325, "step": 258240 }, { "epoch": 11.123745531291727, "learning_rate": 3.744370703958284e-07, "loss": 2.764, "step": 258260 }, { "epoch": 11.124606969031314, "learning_rate": 3.7438858842034944e-07, "loss": 2.648, "step": 258280 }, { "epoch": 11.125468406770901, "learning_rate": 3.743401064448706e-07, "loss": 2.568, "step": 258300 }, { "epoch": 11.126329844510488, "learning_rate": 3.7429162446939163e-07, "loss": 2.867, "step": 258320 }, { "epoch": 11.127191282250076, "learning_rate": 3.742431424939128e-07, "loss": 2.6859, "step": 258340 }, { "epoch": 11.128052719989663, "learning_rate": 3.741946605184339e-07, "loss": 2.6706, "step": 258360 }, { "epoch": 11.12891415772925, "learning_rate": 3.74146178542955e-07, "loss": 2.7601, "step": 258380 }, { "epoch": 11.129775595468837, "learning_rate": 3.740976965674761e-07, "loss": 2.6869, "step": 258400 }, { "epoch": 11.130637033208425, "learning_rate": 3.7404921459199725e-07, "loss": 2.7379, "step": 258420 }, { "epoch": 11.131498470948012, "learning_rate": 3.7400073261651827e-07, "loss": 2.839, "step": 258440 }, { "epoch": 11.132359908687599, "learning_rate": 3.7395225064103945e-07, "loss": 2.7361, "step": 258460 }, { "epoch": 11.133221346427186, "learning_rate": 3.739037686655605e-07, "loss": 2.7026, "step": 258480 }, { "epoch": 11.134082784166774, "learning_rate": 3.7385528669008165e-07, "loss": 2.6763, "step": 258500 }, { "epoch": 11.134944221906363, "learning_rate": 3.738068047146027e-07, "loss": 2.7107, "step": 258520 }, { "epoch": 11.13580565964595, "learning_rate": 3.737583227391239e-07, "loss": 2.6642, "step": 258540 }, { "epoch": 11.136667097385537, "learning_rate": 3.7370984076364497e-07, "loss": 2.8538, "step": 258560 }, { "epoch": 11.137528535125124, "learning_rate": 3.736613587881661e-07, "loss": 2.5917, "step": 258580 }, { "epoch": 11.138389972864712, "learning_rate": 3.7361287681268716e-07, "loss": 2.4737, "step": 258600 }, { "epoch": 11.139251410604299, "learning_rate": 3.7356439483720834e-07, "loss": 2.5963, "step": 258620 }, { "epoch": 11.140112848343886, "learning_rate": 3.7351591286172936e-07, "loss": 2.6081, "step": 258640 }, { "epoch": 11.140974286083473, "learning_rate": 3.7346743088625054e-07, "loss": 2.7198, "step": 258660 }, { "epoch": 11.14183572382306, "learning_rate": 3.7341894891077155e-07, "loss": 2.6683, "step": 258680 }, { "epoch": 11.142697161562648, "learning_rate": 3.7337046693529273e-07, "loss": 2.6184, "step": 258700 }, { "epoch": 11.143558599302235, "learning_rate": 3.733219849598138e-07, "loss": 2.6803, "step": 258720 }, { "epoch": 11.144420037041822, "learning_rate": 3.73273502984335e-07, "loss": 2.6644, "step": 258740 }, { "epoch": 11.14528147478141, "learning_rate": 3.73225021008856e-07, "loss": 2.652, "step": 258760 }, { "epoch": 11.146142912520997, "learning_rate": 3.7317653903337707e-07, "loss": 2.6475, "step": 258780 }, { "epoch": 11.147004350260586, "learning_rate": 3.7312805705789825e-07, "loss": 2.7513, "step": 258800 }, { "epoch": 11.147865788000173, "learning_rate": 3.730795750824193e-07, "loss": 2.7514, "step": 258820 }, { "epoch": 11.14872722573976, "learning_rate": 3.7303109310694044e-07, "loss": 2.6196, "step": 258840 }, { "epoch": 11.149588663479348, "learning_rate": 3.729826111314615e-07, "loss": 2.7612, "step": 258860 }, { "epoch": 11.150450101218935, "learning_rate": 3.729341291559827e-07, "loss": 2.7776, "step": 258880 }, { "epoch": 11.151311538958522, "learning_rate": 3.728856471805037e-07, "loss": 2.6566, "step": 258900 }, { "epoch": 11.15217297669811, "learning_rate": 3.728371652050249e-07, "loss": 2.6475, "step": 258920 }, { "epoch": 11.153034414437696, "learning_rate": 3.7278868322954596e-07, "loss": 2.6842, "step": 258940 }, { "epoch": 11.153895852177284, "learning_rate": 3.727402012540671e-07, "loss": 2.6369, "step": 258960 }, { "epoch": 11.154757289916871, "learning_rate": 3.7269171927858816e-07, "loss": 2.6916, "step": 258980 }, { "epoch": 11.155618727656458, "learning_rate": 3.7264323730310933e-07, "loss": 2.7137, "step": 259000 }, { "epoch": 11.156480165396045, "learning_rate": 3.7259475532763035e-07, "loss": 2.7634, "step": 259020 }, { "epoch": 11.157341603135633, "learning_rate": 3.7254627335215153e-07, "loss": 2.7118, "step": 259040 }, { "epoch": 11.15820304087522, "learning_rate": 3.724977913766726e-07, "loss": 2.8294, "step": 259060 }, { "epoch": 11.159064478614809, "learning_rate": 3.724493094011937e-07, "loss": 2.698, "step": 259080 }, { "epoch": 11.159925916354396, "learning_rate": 3.7240082742571474e-07, "loss": 2.7294, "step": 259100 }, { "epoch": 11.160787354093983, "learning_rate": 3.72352345450236e-07, "loss": 2.6485, "step": 259120 }, { "epoch": 11.16164879183357, "learning_rate": 3.72303863474757e-07, "loss": 2.6891, "step": 259140 }, { "epoch": 11.162510229573158, "learning_rate": 3.7225538149927817e-07, "loss": 2.6406, "step": 259160 }, { "epoch": 11.163371667312745, "learning_rate": 3.7220689952379924e-07, "loss": 2.6055, "step": 259180 }, { "epoch": 11.164233105052332, "learning_rate": 3.7215841754832037e-07, "loss": 2.7416, "step": 259200 }, { "epoch": 11.16509454279192, "learning_rate": 3.7210993557284144e-07, "loss": 2.6381, "step": 259220 }, { "epoch": 11.165955980531507, "learning_rate": 3.720614535973626e-07, "loss": 2.7161, "step": 259240 }, { "epoch": 11.166817418271094, "learning_rate": 3.720129716218837e-07, "loss": 2.6156, "step": 259260 }, { "epoch": 11.167678856010681, "learning_rate": 3.719644896464048e-07, "loss": 2.6264, "step": 259280 }, { "epoch": 11.168540293750269, "learning_rate": 3.719160076709259e-07, "loss": 2.7198, "step": 259300 }, { "epoch": 11.169401731489856, "learning_rate": 3.7186752569544695e-07, "loss": 2.7084, "step": 259320 }, { "epoch": 11.170263169229443, "learning_rate": 3.718190437199681e-07, "loss": 2.626, "step": 259340 }, { "epoch": 11.17112460696903, "learning_rate": 3.7177056174448915e-07, "loss": 2.7168, "step": 259360 }, { "epoch": 11.17198604470862, "learning_rate": 3.7172207976901033e-07, "loss": 2.6728, "step": 259380 }, { "epoch": 11.172847482448207, "learning_rate": 3.7167359779353135e-07, "loss": 2.7146, "step": 259400 }, { "epoch": 11.173708920187794, "learning_rate": 3.716251158180525e-07, "loss": 2.6482, "step": 259420 }, { "epoch": 11.174570357927381, "learning_rate": 3.715766338425736e-07, "loss": 2.9058, "step": 259440 }, { "epoch": 11.175431795666968, "learning_rate": 3.715281518670947e-07, "loss": 2.7505, "step": 259460 }, { "epoch": 11.176293233406556, "learning_rate": 3.714796698916158e-07, "loss": 2.5608, "step": 259480 }, { "epoch": 11.177154671146143, "learning_rate": 3.7143118791613697e-07, "loss": 2.7514, "step": 259500 }, { "epoch": 11.17801610888573, "learning_rate": 3.7138270594065804e-07, "loss": 2.608, "step": 259520 }, { "epoch": 11.178877546625317, "learning_rate": 3.7133422396517916e-07, "loss": 2.6366, "step": 259540 }, { "epoch": 11.179738984364905, "learning_rate": 3.7128574198970024e-07, "loss": 2.5746, "step": 259560 }, { "epoch": 11.180600422104492, "learning_rate": 3.712372600142214e-07, "loss": 2.5941, "step": 259580 }, { "epoch": 11.18146185984408, "learning_rate": 3.7118877803874243e-07, "loss": 2.5702, "step": 259600 }, { "epoch": 11.182323297583666, "learning_rate": 3.7114029606326366e-07, "loss": 2.6965, "step": 259620 }, { "epoch": 11.183184735323255, "learning_rate": 3.710918140877847e-07, "loss": 2.6455, "step": 259640 }, { "epoch": 11.184046173062843, "learning_rate": 3.710433321123058e-07, "loss": 2.6595, "step": 259660 }, { "epoch": 11.18490761080243, "learning_rate": 3.709948501368269e-07, "loss": 2.7583, "step": 259680 }, { "epoch": 11.185769048542017, "learning_rate": 3.7094636816134805e-07, "loss": 2.8513, "step": 259700 }, { "epoch": 11.186630486281604, "learning_rate": 3.7089788618586907e-07, "loss": 2.5674, "step": 259720 }, { "epoch": 11.187491924021192, "learning_rate": 3.7084940421039025e-07, "loss": 2.6932, "step": 259740 }, { "epoch": 11.188353361760779, "learning_rate": 3.708009222349113e-07, "loss": 2.5482, "step": 259760 }, { "epoch": 11.189214799500366, "learning_rate": 3.7075244025943245e-07, "loss": 2.7748, "step": 259780 }, { "epoch": 11.190076237239953, "learning_rate": 3.707039582839535e-07, "loss": 2.8663, "step": 259800 }, { "epoch": 11.19093767497954, "learning_rate": 3.706554763084747e-07, "loss": 2.6781, "step": 259820 }, { "epoch": 11.191799112719128, "learning_rate": 3.706069943329957e-07, "loss": 2.7033, "step": 259840 }, { "epoch": 11.192660550458715, "learning_rate": 3.705585123575168e-07, "loss": 2.6399, "step": 259860 }, { "epoch": 11.193521988198302, "learning_rate": 3.7051003038203796e-07, "loss": 2.5655, "step": 259880 }, { "epoch": 11.19438342593789, "learning_rate": 3.7046154840655903e-07, "loss": 2.5537, "step": 259900 }, { "epoch": 11.195244863677477, "learning_rate": 3.7041306643108016e-07, "loss": 2.6872, "step": 259920 }, { "epoch": 11.196106301417066, "learning_rate": 3.7036458445560123e-07, "loss": 2.7478, "step": 259940 }, { "epoch": 11.196967739156653, "learning_rate": 3.703161024801224e-07, "loss": 2.6607, "step": 259960 }, { "epoch": 11.19782917689624, "learning_rate": 3.702676205046434e-07, "loss": 2.7515, "step": 259980 }, { "epoch": 11.198690614635828, "learning_rate": 3.702191385291646e-07, "loss": 2.7073, "step": 260000 }, { "epoch": 11.199552052375415, "learning_rate": 3.701706565536857e-07, "loss": 2.7493, "step": 260020 }, { "epoch": 11.200413490115002, "learning_rate": 3.7012217457820685e-07, "loss": 2.7059, "step": 260040 }, { "epoch": 11.20127492785459, "learning_rate": 3.7007369260272787e-07, "loss": 2.6397, "step": 260060 }, { "epoch": 11.202136365594177, "learning_rate": 3.7002521062724905e-07, "loss": 2.6288, "step": 260080 }, { "epoch": 11.202997803333764, "learning_rate": 3.6997672865177007e-07, "loss": 2.5814, "step": 260100 }, { "epoch": 11.203859241073351, "learning_rate": 3.6992824667629124e-07, "loss": 2.5966, "step": 260120 }, { "epoch": 11.204720678812938, "learning_rate": 3.698797647008123e-07, "loss": 2.7036, "step": 260140 }, { "epoch": 11.205582116552526, "learning_rate": 3.6983128272533344e-07, "loss": 2.5929, "step": 260160 }, { "epoch": 11.206443554292113, "learning_rate": 3.697828007498545e-07, "loss": 2.9086, "step": 260180 }, { "epoch": 11.2073049920317, "learning_rate": 3.697343187743757e-07, "loss": 2.7528, "step": 260200 }, { "epoch": 11.208166429771289, "learning_rate": 3.6968583679889676e-07, "loss": 2.6824, "step": 260220 }, { "epoch": 11.209027867510876, "learning_rate": 3.696373548234179e-07, "loss": 2.7157, "step": 260240 }, { "epoch": 11.209889305250464, "learning_rate": 3.6958887284793896e-07, "loss": 2.8698, "step": 260260 }, { "epoch": 11.21075074299005, "learning_rate": 3.695403908724601e-07, "loss": 2.7149, "step": 260280 }, { "epoch": 11.211612180729638, "learning_rate": 3.6949190889698115e-07, "loss": 2.7457, "step": 260300 }, { "epoch": 11.212473618469225, "learning_rate": 3.6944342692150233e-07, "loss": 2.6581, "step": 260320 }, { "epoch": 11.213335056208813, "learning_rate": 3.693949449460234e-07, "loss": 2.572, "step": 260340 }, { "epoch": 11.2141964939484, "learning_rate": 3.693464629705445e-07, "loss": 2.7815, "step": 260360 }, { "epoch": 11.215057931687987, "learning_rate": 3.692979809950656e-07, "loss": 2.7149, "step": 260380 }, { "epoch": 11.215919369427574, "learning_rate": 3.6924949901958667e-07, "loss": 2.7086, "step": 260400 }, { "epoch": 11.216780807167162, "learning_rate": 3.692010170441078e-07, "loss": 2.5227, "step": 260420 }, { "epoch": 11.217642244906749, "learning_rate": 3.6915253506862886e-07, "loss": 2.7248, "step": 260440 }, { "epoch": 11.218503682646336, "learning_rate": 3.6910405309315e-07, "loss": 2.7464, "step": 260460 }, { "epoch": 11.219365120385923, "learning_rate": 3.6905557111767106e-07, "loss": 2.6007, "step": 260480 }, { "epoch": 11.220226558125512, "learning_rate": 3.6900708914219224e-07, "loss": 2.8847, "step": 260500 }, { "epoch": 11.2210879958651, "learning_rate": 3.689586071667133e-07, "loss": 2.6996, "step": 260520 }, { "epoch": 11.221949433604687, "learning_rate": 3.6891012519123443e-07, "loss": 2.7259, "step": 260540 }, { "epoch": 11.222810871344274, "learning_rate": 3.688616432157555e-07, "loss": 2.729, "step": 260560 }, { "epoch": 11.223672309083861, "learning_rate": 3.688131612402767e-07, "loss": 2.7597, "step": 260580 }, { "epoch": 11.224533746823449, "learning_rate": 3.6876467926479775e-07, "loss": 2.5726, "step": 260600 }, { "epoch": 11.225395184563036, "learning_rate": 3.687161972893189e-07, "loss": 2.7787, "step": 260620 }, { "epoch": 11.226256622302623, "learning_rate": 3.6866771531383995e-07, "loss": 2.6776, "step": 260640 }, { "epoch": 11.22711806004221, "learning_rate": 3.686192333383611e-07, "loss": 2.7559, "step": 260660 }, { "epoch": 11.227979497781797, "learning_rate": 3.6857075136288214e-07, "loss": 2.6263, "step": 260680 }, { "epoch": 11.228840935521385, "learning_rate": 3.685222693874033e-07, "loss": 2.6744, "step": 260700 }, { "epoch": 11.229702373260972, "learning_rate": 3.684737874119244e-07, "loss": 2.6301, "step": 260720 }, { "epoch": 11.23056381100056, "learning_rate": 3.684253054364455e-07, "loss": 2.6686, "step": 260740 }, { "epoch": 11.231425248740146, "learning_rate": 3.683768234609666e-07, "loss": 2.5924, "step": 260760 }, { "epoch": 11.232286686479735, "learning_rate": 3.6832834148548777e-07, "loss": 2.7315, "step": 260780 }, { "epoch": 11.233148124219323, "learning_rate": 3.682798595100088e-07, "loss": 2.6015, "step": 260800 }, { "epoch": 11.23400956195891, "learning_rate": 3.6823137753452996e-07, "loss": 2.5791, "step": 260820 }, { "epoch": 11.234870999698497, "learning_rate": 3.6818289555905103e-07, "loss": 2.7972, "step": 260840 }, { "epoch": 11.235732437438084, "learning_rate": 3.6813441358357216e-07, "loss": 2.6673, "step": 260860 }, { "epoch": 11.236593875177672, "learning_rate": 3.680859316080932e-07, "loss": 2.6326, "step": 260880 }, { "epoch": 11.237455312917259, "learning_rate": 3.680374496326144e-07, "loss": 2.8249, "step": 260900 }, { "epoch": 11.238316750656846, "learning_rate": 3.679889676571354e-07, "loss": 2.6791, "step": 260920 }, { "epoch": 11.239178188396433, "learning_rate": 3.679404856816565e-07, "loss": 2.7154, "step": 260940 }, { "epoch": 11.24003962613602, "learning_rate": 3.678920037061777e-07, "loss": 2.6196, "step": 260960 }, { "epoch": 11.240901063875608, "learning_rate": 3.6784352173069875e-07, "loss": 2.5993, "step": 260980 }, { "epoch": 11.241762501615195, "learning_rate": 3.6779503975521987e-07, "loss": 2.7121, "step": 261000 }, { "epoch": 11.242623939354782, "learning_rate": 3.6774655777974094e-07, "loss": 2.6487, "step": 261020 }, { "epoch": 11.24348537709437, "learning_rate": 3.676980758042621e-07, "loss": 2.7702, "step": 261040 }, { "epoch": 11.244346814833959, "learning_rate": 3.6764959382878314e-07, "loss": 2.6975, "step": 261060 }, { "epoch": 11.245208252573546, "learning_rate": 3.676011118533043e-07, "loss": 2.8171, "step": 261080 }, { "epoch": 11.246069690313133, "learning_rate": 3.675526298778254e-07, "loss": 2.7124, "step": 261100 }, { "epoch": 11.24693112805272, "learning_rate": 3.675041479023465e-07, "loss": 2.6925, "step": 261120 }, { "epoch": 11.247792565792308, "learning_rate": 3.674556659268676e-07, "loss": 2.7024, "step": 261140 }, { "epoch": 11.248654003531895, "learning_rate": 3.6740718395138876e-07, "loss": 2.6962, "step": 261160 }, { "epoch": 11.249515441271482, "learning_rate": 3.673587019759098e-07, "loss": 2.6577, "step": 261180 }, { "epoch": 11.25037687901107, "learning_rate": 3.6731022000043096e-07, "loss": 2.8084, "step": 261200 }, { "epoch": 11.251238316750657, "learning_rate": 3.6726173802495203e-07, "loss": 2.7224, "step": 261220 }, { "epoch": 11.252099754490244, "learning_rate": 3.6721325604947315e-07, "loss": 2.7054, "step": 261240 }, { "epoch": 11.252961192229831, "learning_rate": 3.671647740739942e-07, "loss": 2.7645, "step": 261260 }, { "epoch": 11.253822629969418, "learning_rate": 3.671162920985154e-07, "loss": 2.6893, "step": 261280 }, { "epoch": 11.254684067709006, "learning_rate": 3.6706781012303647e-07, "loss": 2.7444, "step": 261300 }, { "epoch": 11.255545505448593, "learning_rate": 3.670193281475576e-07, "loss": 2.6174, "step": 261320 }, { "epoch": 11.256406943188182, "learning_rate": 3.6697084617207867e-07, "loss": 2.6677, "step": 261340 }, { "epoch": 11.25726838092777, "learning_rate": 3.6692236419659985e-07, "loss": 2.5612, "step": 261360 }, { "epoch": 11.258129818667356, "learning_rate": 3.6687388222112086e-07, "loss": 2.6961, "step": 261380 }, { "epoch": 11.258991256406944, "learning_rate": 3.668254002456421e-07, "loss": 2.6096, "step": 261400 }, { "epoch": 11.259852694146531, "learning_rate": 3.667769182701631e-07, "loss": 2.6481, "step": 261420 }, { "epoch": 11.260714131886118, "learning_rate": 3.6672843629468424e-07, "loss": 2.7822, "step": 261440 }, { "epoch": 11.261575569625705, "learning_rate": 3.666799543192053e-07, "loss": 2.8938, "step": 261460 }, { "epoch": 11.262437007365293, "learning_rate": 3.666314723437264e-07, "loss": 2.6835, "step": 261480 }, { "epoch": 11.26329844510488, "learning_rate": 3.665829903682475e-07, "loss": 2.6235, "step": 261500 }, { "epoch": 11.264159882844467, "learning_rate": 3.665345083927686e-07, "loss": 2.7454, "step": 261520 }, { "epoch": 11.265021320584054, "learning_rate": 3.6648602641728975e-07, "loss": 2.6238, "step": 261540 }, { "epoch": 11.265882758323642, "learning_rate": 3.6643754444181077e-07, "loss": 2.7557, "step": 261560 }, { "epoch": 11.266744196063229, "learning_rate": 3.6638906246633195e-07, "loss": 2.6695, "step": 261580 }, { "epoch": 11.267605633802816, "learning_rate": 3.66340580490853e-07, "loss": 2.8005, "step": 261600 }, { "epoch": 11.268467071542405, "learning_rate": 3.6629209851537415e-07, "loss": 2.8192, "step": 261620 }, { "epoch": 11.269328509281992, "learning_rate": 3.662436165398952e-07, "loss": 2.7, "step": 261640 }, { "epoch": 11.27018994702158, "learning_rate": 3.661951345644164e-07, "loss": 2.5918, "step": 261660 }, { "epoch": 11.271051384761167, "learning_rate": 3.6614665258893747e-07, "loss": 2.7405, "step": 261680 }, { "epoch": 11.271912822500754, "learning_rate": 3.660981706134586e-07, "loss": 2.6354, "step": 261700 }, { "epoch": 11.272774260240341, "learning_rate": 3.6604968863797966e-07, "loss": 2.7084, "step": 261720 }, { "epoch": 11.273635697979929, "learning_rate": 3.6600120666250084e-07, "loss": 2.6866, "step": 261740 }, { "epoch": 11.274497135719516, "learning_rate": 3.6595272468702186e-07, "loss": 2.796, "step": 261760 }, { "epoch": 11.275358573459103, "learning_rate": 3.6590424271154304e-07, "loss": 2.6673, "step": 261780 }, { "epoch": 11.27622001119869, "learning_rate": 3.658557607360641e-07, "loss": 2.6104, "step": 261800 }, { "epoch": 11.277081448938278, "learning_rate": 3.658072787605853e-07, "loss": 2.8615, "step": 261820 }, { "epoch": 11.277942886677865, "learning_rate": 3.657587967851063e-07, "loss": 2.6338, "step": 261840 }, { "epoch": 11.278804324417452, "learning_rate": 3.657103148096275e-07, "loss": 2.6073, "step": 261860 }, { "epoch": 11.27966576215704, "learning_rate": 3.656618328341485e-07, "loss": 2.6492, "step": 261880 }, { "epoch": 11.280527199896628, "learning_rate": 3.656133508586697e-07, "loss": 2.8675, "step": 261900 }, { "epoch": 11.281388637636216, "learning_rate": 3.6556486888319075e-07, "loss": 2.6832, "step": 261920 }, { "epoch": 11.282250075375803, "learning_rate": 3.6551638690771187e-07, "loss": 2.7999, "step": 261940 }, { "epoch": 11.28311151311539, "learning_rate": 3.6546790493223294e-07, "loss": 2.8716, "step": 261960 }, { "epoch": 11.283972950854977, "learning_rate": 3.654194229567541e-07, "loss": 2.619, "step": 261980 }, { "epoch": 11.284834388594565, "learning_rate": 3.653709409812752e-07, "loss": 2.6946, "step": 262000 }, { "epoch": 11.285695826334152, "learning_rate": 3.653224590057962e-07, "loss": 2.8203, "step": 262020 }, { "epoch": 11.286557264073739, "learning_rate": 3.652739770303174e-07, "loss": 2.5111, "step": 262040 }, { "epoch": 11.287418701813326, "learning_rate": 3.6522549505483846e-07, "loss": 2.7455, "step": 262060 }, { "epoch": 11.288280139552914, "learning_rate": 3.651770130793596e-07, "loss": 2.677, "step": 262080 }, { "epoch": 11.2891415772925, "learning_rate": 3.6512853110388066e-07, "loss": 2.589, "step": 262100 }, { "epoch": 11.290003015032088, "learning_rate": 3.6508004912840183e-07, "loss": 2.6868, "step": 262120 }, { "epoch": 11.290864452771675, "learning_rate": 3.6503156715292285e-07, "loss": 2.6337, "step": 262140 }, { "epoch": 11.291725890511263, "learning_rate": 3.6498308517744403e-07, "loss": 2.7394, "step": 262160 }, { "epoch": 11.29258732825085, "learning_rate": 3.649346032019651e-07, "loss": 2.642, "step": 262180 }, { "epoch": 11.293448765990439, "learning_rate": 3.648861212264862e-07, "loss": 2.5684, "step": 262200 }, { "epoch": 11.294310203730026, "learning_rate": 3.648376392510073e-07, "loss": 2.8067, "step": 262220 }, { "epoch": 11.295171641469613, "learning_rate": 3.647891572755284e-07, "loss": 2.7239, "step": 262240 }, { "epoch": 11.2960330792092, "learning_rate": 3.647406753000495e-07, "loss": 2.579, "step": 262260 }, { "epoch": 11.296894516948788, "learning_rate": 3.6469219332457067e-07, "loss": 2.6761, "step": 262280 }, { "epoch": 11.297755954688375, "learning_rate": 3.6464371134909174e-07, "loss": 2.7015, "step": 262300 }, { "epoch": 11.298617392427962, "learning_rate": 3.6459522937361287e-07, "loss": 2.7074, "step": 262320 }, { "epoch": 11.29947883016755, "learning_rate": 3.6454674739813394e-07, "loss": 2.7385, "step": 262340 }, { "epoch": 11.300340267907137, "learning_rate": 3.644982654226551e-07, "loss": 2.5927, "step": 262360 }, { "epoch": 11.301201705646724, "learning_rate": 3.644497834471762e-07, "loss": 2.5618, "step": 262380 }, { "epoch": 11.302063143386311, "learning_rate": 3.644013014716973e-07, "loss": 2.5528, "step": 262400 }, { "epoch": 11.302924581125898, "learning_rate": 3.643528194962184e-07, "loss": 2.5595, "step": 262420 }, { "epoch": 11.303786018865486, "learning_rate": 3.6430433752073956e-07, "loss": 2.6969, "step": 262440 }, { "epoch": 11.304647456605075, "learning_rate": 3.642558555452606e-07, "loss": 2.6438, "step": 262460 }, { "epoch": 11.305508894344662, "learning_rate": 3.6420737356978176e-07, "loss": 2.9873, "step": 262480 }, { "epoch": 11.30637033208425, "learning_rate": 3.6415889159430283e-07, "loss": 2.8199, "step": 262500 }, { "epoch": 11.307231769823836, "learning_rate": 3.6411040961882395e-07, "loss": 2.5067, "step": 262520 }, { "epoch": 11.308093207563424, "learning_rate": 3.64061927643345e-07, "loss": 2.6805, "step": 262540 }, { "epoch": 11.308954645303011, "learning_rate": 3.640134456678661e-07, "loss": 2.689, "step": 262560 }, { "epoch": 11.309816083042598, "learning_rate": 3.639649636923872e-07, "loss": 2.6227, "step": 262580 }, { "epoch": 11.310677520782185, "learning_rate": 3.639164817169083e-07, "loss": 2.798, "step": 262600 }, { "epoch": 11.311538958521773, "learning_rate": 3.6386799974142947e-07, "loss": 2.7261, "step": 262620 }, { "epoch": 11.31240039626136, "learning_rate": 3.638195177659505e-07, "loss": 2.6597, "step": 262640 }, { "epoch": 11.313261834000947, "learning_rate": 3.637710357904716e-07, "loss": 2.6717, "step": 262660 }, { "epoch": 11.314123271740534, "learning_rate": 3.6372255381499274e-07, "loss": 2.7021, "step": 262680 }, { "epoch": 11.314984709480122, "learning_rate": 3.6367407183951386e-07, "loss": 2.5828, "step": 262700 }, { "epoch": 11.315846147219709, "learning_rate": 3.6362558986403493e-07, "loss": 2.573, "step": 262720 }, { "epoch": 11.316707584959296, "learning_rate": 3.635771078885561e-07, "loss": 2.5739, "step": 262740 }, { "epoch": 11.317569022698885, "learning_rate": 3.635286259130772e-07, "loss": 2.7258, "step": 262760 }, { "epoch": 11.318430460438472, "learning_rate": 3.634801439375983e-07, "loss": 2.6771, "step": 262780 }, { "epoch": 11.31929189817806, "learning_rate": 3.634316619621194e-07, "loss": 2.5985, "step": 262800 }, { "epoch": 11.320153335917647, "learning_rate": 3.6338317998664055e-07, "loss": 2.5751, "step": 262820 }, { "epoch": 11.321014773657234, "learning_rate": 3.6333469801116157e-07, "loss": 2.7373, "step": 262840 }, { "epoch": 11.321876211396821, "learning_rate": 3.6328621603568275e-07, "loss": 2.5901, "step": 262860 }, { "epoch": 11.322737649136409, "learning_rate": 3.632377340602038e-07, "loss": 2.686, "step": 262880 }, { "epoch": 11.323599086875996, "learning_rate": 3.6318925208472495e-07, "loss": 2.7234, "step": 262900 }, { "epoch": 11.324460524615583, "learning_rate": 3.63140770109246e-07, "loss": 2.6707, "step": 262920 }, { "epoch": 11.32532196235517, "learning_rate": 3.630922881337672e-07, "loss": 2.6246, "step": 262940 }, { "epoch": 11.326183400094758, "learning_rate": 3.630438061582882e-07, "loss": 2.8864, "step": 262960 }, { "epoch": 11.327044837834345, "learning_rate": 3.629953241828094e-07, "loss": 2.5684, "step": 262980 }, { "epoch": 11.327906275573932, "learning_rate": 3.6294684220733046e-07, "loss": 2.6732, "step": 263000 }, { "epoch": 11.328767713313521, "learning_rate": 3.628983602318516e-07, "loss": 2.7212, "step": 263020 }, { "epoch": 11.329629151053108, "learning_rate": 3.6284987825637266e-07, "loss": 2.6075, "step": 263040 }, { "epoch": 11.330490588792696, "learning_rate": 3.6280139628089383e-07, "loss": 2.5, "step": 263060 }, { "epoch": 11.331352026532283, "learning_rate": 3.627529143054149e-07, "loss": 2.7131, "step": 263080 }, { "epoch": 11.33221346427187, "learning_rate": 3.627044323299359e-07, "loss": 2.6999, "step": 263100 }, { "epoch": 11.333074902011457, "learning_rate": 3.626559503544571e-07, "loss": 2.6716, "step": 263120 }, { "epoch": 11.333936339751045, "learning_rate": 3.626074683789782e-07, "loss": 2.6048, "step": 263140 }, { "epoch": 11.334797777490632, "learning_rate": 3.625589864034993e-07, "loss": 2.6689, "step": 263160 }, { "epoch": 11.33565921523022, "learning_rate": 3.6251050442802037e-07, "loss": 2.7047, "step": 263180 }, { "epoch": 11.336520652969806, "learning_rate": 3.6246202245254155e-07, "loss": 2.8244, "step": 263200 }, { "epoch": 11.337382090709394, "learning_rate": 3.6241354047706257e-07, "loss": 2.7548, "step": 263220 }, { "epoch": 11.33824352844898, "learning_rate": 3.6236505850158374e-07, "loss": 2.6733, "step": 263240 }, { "epoch": 11.339104966188568, "learning_rate": 3.623165765261048e-07, "loss": 2.6915, "step": 263260 }, { "epoch": 11.339966403928155, "learning_rate": 3.6226809455062594e-07, "loss": 2.6419, "step": 263280 }, { "epoch": 11.340827841667743, "learning_rate": 3.62219612575147e-07, "loss": 2.6339, "step": 263300 }, { "epoch": 11.341689279407332, "learning_rate": 3.621711305996682e-07, "loss": 2.8433, "step": 263320 }, { "epoch": 11.342550717146919, "learning_rate": 3.621226486241892e-07, "loss": 2.5349, "step": 263340 }, { "epoch": 11.343412154886506, "learning_rate": 3.620741666487104e-07, "loss": 2.6242, "step": 263360 }, { "epoch": 11.344273592626093, "learning_rate": 3.6202568467323146e-07, "loss": 2.6204, "step": 263380 }, { "epoch": 11.34513503036568, "learning_rate": 3.619772026977526e-07, "loss": 2.8318, "step": 263400 }, { "epoch": 11.345996468105268, "learning_rate": 3.6192872072227365e-07, "loss": 2.6383, "step": 263420 }, { "epoch": 11.346857905844855, "learning_rate": 3.6188023874679483e-07, "loss": 2.755, "step": 263440 }, { "epoch": 11.347719343584442, "learning_rate": 3.618317567713159e-07, "loss": 2.7493, "step": 263460 }, { "epoch": 11.34858078132403, "learning_rate": 3.61783274795837e-07, "loss": 2.6125, "step": 263480 }, { "epoch": 11.349442219063617, "learning_rate": 3.617347928203581e-07, "loss": 2.6253, "step": 263500 }, { "epoch": 11.350303656803204, "learning_rate": 3.6168631084487927e-07, "loss": 2.7914, "step": 263520 }, { "epoch": 11.351165094542791, "learning_rate": 3.616378288694003e-07, "loss": 2.8067, "step": 263540 }, { "epoch": 11.352026532282379, "learning_rate": 3.6158934689392147e-07, "loss": 2.7362, "step": 263560 }, { "epoch": 11.352887970021966, "learning_rate": 3.6154086491844254e-07, "loss": 2.5429, "step": 263580 }, { "epoch": 11.353749407761555, "learning_rate": 3.614923829429637e-07, "loss": 2.6339, "step": 263600 }, { "epoch": 11.354610845501142, "learning_rate": 3.6144390096748474e-07, "loss": 2.6697, "step": 263620 }, { "epoch": 11.35547228324073, "learning_rate": 3.613954189920058e-07, "loss": 2.7363, "step": 263640 }, { "epoch": 11.356333720980317, "learning_rate": 3.6134693701652693e-07, "loss": 2.8156, "step": 263660 }, { "epoch": 11.357195158719904, "learning_rate": 3.61298455041048e-07, "loss": 2.6751, "step": 263680 }, { "epoch": 11.358056596459491, "learning_rate": 3.612499730655692e-07, "loss": 2.549, "step": 263700 }, { "epoch": 11.358918034199078, "learning_rate": 3.6120149109009025e-07, "loss": 2.6644, "step": 263720 }, { "epoch": 11.359779471938666, "learning_rate": 3.611530091146114e-07, "loss": 2.6996, "step": 263740 }, { "epoch": 11.360640909678253, "learning_rate": 3.611045271391325e-07, "loss": 2.6133, "step": 263760 }, { "epoch": 11.36150234741784, "learning_rate": 3.610560451636536e-07, "loss": 2.5933, "step": 263780 }, { "epoch": 11.362363785157427, "learning_rate": 3.6100756318817464e-07, "loss": 2.4859, "step": 263800 }, { "epoch": 11.363225222897015, "learning_rate": 3.609590812126958e-07, "loss": 2.713, "step": 263820 }, { "epoch": 11.364086660636602, "learning_rate": 3.609105992372169e-07, "loss": 2.7598, "step": 263840 }, { "epoch": 11.364948098376189, "learning_rate": 3.60862117261738e-07, "loss": 2.8019, "step": 263860 }, { "epoch": 11.365809536115778, "learning_rate": 3.608136352862591e-07, "loss": 2.6633, "step": 263880 }, { "epoch": 11.366670973855365, "learning_rate": 3.6076515331078027e-07, "loss": 2.8507, "step": 263900 }, { "epoch": 11.367532411594953, "learning_rate": 3.607166713353013e-07, "loss": 2.7872, "step": 263920 }, { "epoch": 11.36839384933454, "learning_rate": 3.6066818935982246e-07, "loss": 2.7264, "step": 263940 }, { "epoch": 11.369255287074127, "learning_rate": 3.6061970738434353e-07, "loss": 2.6419, "step": 263960 }, { "epoch": 11.370116724813714, "learning_rate": 3.6057122540886466e-07, "loss": 2.7795, "step": 263980 }, { "epoch": 11.370978162553302, "learning_rate": 3.6052274343338573e-07, "loss": 2.5282, "step": 264000 }, { "epoch": 11.371839600292889, "learning_rate": 3.6047426145790685e-07, "loss": 2.6325, "step": 264020 }, { "epoch": 11.372701038032476, "learning_rate": 3.604257794824279e-07, "loss": 2.6761, "step": 264040 }, { "epoch": 11.373562475772063, "learning_rate": 3.603772975069491e-07, "loss": 2.7391, "step": 264060 }, { "epoch": 11.37442391351165, "learning_rate": 3.603288155314702e-07, "loss": 2.6634, "step": 264080 }, { "epoch": 11.375285351251238, "learning_rate": 3.602803335559913e-07, "loss": 2.8457, "step": 264100 }, { "epoch": 11.376146788990825, "learning_rate": 3.6023185158051237e-07, "loss": 2.7611, "step": 264120 }, { "epoch": 11.377008226730412, "learning_rate": 3.6018336960503355e-07, "loss": 2.672, "step": 264140 }, { "epoch": 11.377869664470001, "learning_rate": 3.601348876295546e-07, "loss": 2.7513, "step": 264160 }, { "epoch": 11.378731102209588, "learning_rate": 3.6008640565407564e-07, "loss": 2.6982, "step": 264180 }, { "epoch": 11.379592539949176, "learning_rate": 3.600379236785968e-07, "loss": 2.7165, "step": 264200 }, { "epoch": 11.380453977688763, "learning_rate": 3.599894417031179e-07, "loss": 2.7722, "step": 264220 }, { "epoch": 11.38131541542835, "learning_rate": 3.59940959727639e-07, "loss": 2.6672, "step": 264240 }, { "epoch": 11.382176853167937, "learning_rate": 3.598924777521601e-07, "loss": 2.6427, "step": 264260 }, { "epoch": 11.383038290907525, "learning_rate": 3.5984399577668126e-07, "loss": 2.8462, "step": 264280 }, { "epoch": 11.383899728647112, "learning_rate": 3.597955138012023e-07, "loss": 2.7421, "step": 264300 }, { "epoch": 11.3847611663867, "learning_rate": 3.5974703182572346e-07, "loss": 2.5693, "step": 264320 }, { "epoch": 11.385622604126286, "learning_rate": 3.5969854985024453e-07, "loss": 2.7751, "step": 264340 }, { "epoch": 11.386484041865874, "learning_rate": 3.5965006787476565e-07, "loss": 2.7341, "step": 264360 }, { "epoch": 11.387345479605461, "learning_rate": 3.596015858992867e-07, "loss": 2.824, "step": 264380 }, { "epoch": 11.388206917345048, "learning_rate": 3.595531039238079e-07, "loss": 2.8201, "step": 264400 }, { "epoch": 11.389068355084635, "learning_rate": 3.595046219483289e-07, "loss": 2.7071, "step": 264420 }, { "epoch": 11.389929792824224, "learning_rate": 3.5945613997285004e-07, "loss": 2.6153, "step": 264440 }, { "epoch": 11.390791230563812, "learning_rate": 3.5940765799737117e-07, "loss": 2.7009, "step": 264460 }, { "epoch": 11.391652668303399, "learning_rate": 3.593591760218923e-07, "loss": 2.6572, "step": 264480 }, { "epoch": 11.392514106042986, "learning_rate": 3.5931069404641336e-07, "loss": 2.5899, "step": 264500 }, { "epoch": 11.393375543782573, "learning_rate": 3.5926221207093454e-07, "loss": 2.5121, "step": 264520 }, { "epoch": 11.39423698152216, "learning_rate": 3.592137300954556e-07, "loss": 2.5968, "step": 264540 }, { "epoch": 11.395098419261748, "learning_rate": 3.5916524811997674e-07, "loss": 2.735, "step": 264560 }, { "epoch": 11.395959857001335, "learning_rate": 3.591167661444978e-07, "loss": 2.7383, "step": 264580 }, { "epoch": 11.396821294740922, "learning_rate": 3.59068284169019e-07, "loss": 2.642, "step": 264600 }, { "epoch": 11.39768273248051, "learning_rate": 3.5901980219354e-07, "loss": 2.6297, "step": 264620 }, { "epoch": 11.398544170220097, "learning_rate": 3.589713202180612e-07, "loss": 2.6994, "step": 264640 }, { "epoch": 11.399405607959684, "learning_rate": 3.5892283824258225e-07, "loss": 2.5456, "step": 264660 }, { "epoch": 11.400267045699271, "learning_rate": 3.588743562671034e-07, "loss": 2.6547, "step": 264680 }, { "epoch": 11.401128483438859, "learning_rate": 3.5882587429162445e-07, "loss": 2.6829, "step": 264700 }, { "epoch": 11.401989921178448, "learning_rate": 3.587773923161455e-07, "loss": 2.7299, "step": 264720 }, { "epoch": 11.402851358918035, "learning_rate": 3.5872891034066665e-07, "loss": 2.6594, "step": 264740 }, { "epoch": 11.403712796657622, "learning_rate": 3.586804283651877e-07, "loss": 2.6305, "step": 264760 }, { "epoch": 11.40457423439721, "learning_rate": 3.586319463897089e-07, "loss": 2.6109, "step": 264780 }, { "epoch": 11.405435672136797, "learning_rate": 3.5858346441422997e-07, "loss": 2.7643, "step": 264800 }, { "epoch": 11.406297109876384, "learning_rate": 3.585349824387511e-07, "loss": 2.7715, "step": 264820 }, { "epoch": 11.407158547615971, "learning_rate": 3.5848650046327216e-07, "loss": 2.7675, "step": 264840 }, { "epoch": 11.408019985355558, "learning_rate": 3.5843801848779334e-07, "loss": 2.7795, "step": 264860 }, { "epoch": 11.408881423095146, "learning_rate": 3.5838953651231436e-07, "loss": 2.5671, "step": 264880 }, { "epoch": 11.409742860834733, "learning_rate": 3.5834105453683554e-07, "loss": 2.6557, "step": 264900 }, { "epoch": 11.41060429857432, "learning_rate": 3.582925725613566e-07, "loss": 2.8492, "step": 264920 }, { "epoch": 11.411465736313907, "learning_rate": 3.5824409058587773e-07, "loss": 2.7376, "step": 264940 }, { "epoch": 11.412327174053495, "learning_rate": 3.581956086103988e-07, "loss": 2.7667, "step": 264960 }, { "epoch": 11.413188611793082, "learning_rate": 3.5814712663492e-07, "loss": 2.6129, "step": 264980 }, { "epoch": 11.41405004953267, "learning_rate": 3.58098644659441e-07, "loss": 2.7216, "step": 265000 }, { "epoch": 11.414911487272258, "learning_rate": 3.580501626839622e-07, "loss": 2.6674, "step": 265020 }, { "epoch": 11.415772925011845, "learning_rate": 3.5800168070848325e-07, "loss": 2.5365, "step": 265040 }, { "epoch": 11.416634362751433, "learning_rate": 3.5795319873300437e-07, "loss": 2.6728, "step": 265060 }, { "epoch": 11.41749580049102, "learning_rate": 3.5790471675752544e-07, "loss": 2.6456, "step": 265080 }, { "epoch": 11.418357238230607, "learning_rate": 3.578562347820466e-07, "loss": 2.7634, "step": 265100 }, { "epoch": 11.419218675970194, "learning_rate": 3.5780775280656764e-07, "loss": 2.6315, "step": 265120 }, { "epoch": 11.420080113709782, "learning_rate": 3.577592708310888e-07, "loss": 2.6406, "step": 265140 }, { "epoch": 11.420941551449369, "learning_rate": 3.577107888556099e-07, "loss": 2.649, "step": 265160 }, { "epoch": 11.421802989188956, "learning_rate": 3.57662306880131e-07, "loss": 2.7111, "step": 265180 }, { "epoch": 11.422664426928543, "learning_rate": 3.576138249046521e-07, "loss": 2.8068, "step": 265200 }, { "epoch": 11.42352586466813, "learning_rate": 3.5756534292917326e-07, "loss": 2.6857, "step": 265220 }, { "epoch": 11.424387302407718, "learning_rate": 3.5751686095369433e-07, "loss": 2.5895, "step": 265240 }, { "epoch": 11.425248740147305, "learning_rate": 3.5746837897821535e-07, "loss": 2.6672, "step": 265260 }, { "epoch": 11.426110177886894, "learning_rate": 3.5741989700273653e-07, "loss": 2.8758, "step": 265280 }, { "epoch": 11.426971615626481, "learning_rate": 3.573714150272576e-07, "loss": 2.6769, "step": 265300 }, { "epoch": 11.427833053366069, "learning_rate": 3.573229330517787e-07, "loss": 2.7178, "step": 265320 }, { "epoch": 11.428694491105656, "learning_rate": 3.572744510762998e-07, "loss": 2.6932, "step": 265340 }, { "epoch": 11.429555928845243, "learning_rate": 3.57225969100821e-07, "loss": 2.6182, "step": 265360 }, { "epoch": 11.43041736658483, "learning_rate": 3.57177487125342e-07, "loss": 2.6274, "step": 265380 }, { "epoch": 11.431278804324418, "learning_rate": 3.5712900514986317e-07, "loss": 2.6637, "step": 265400 }, { "epoch": 11.432140242064005, "learning_rate": 3.5708052317438424e-07, "loss": 2.7774, "step": 265420 }, { "epoch": 11.433001679803592, "learning_rate": 3.5703204119890537e-07, "loss": 2.6659, "step": 265440 }, { "epoch": 11.43386311754318, "learning_rate": 3.5698355922342644e-07, "loss": 2.6816, "step": 265460 }, { "epoch": 11.434724555282767, "learning_rate": 3.569350772479476e-07, "loss": 2.7127, "step": 265480 }, { "epoch": 11.435585993022354, "learning_rate": 3.568865952724687e-07, "loss": 2.5942, "step": 265500 }, { "epoch": 11.436447430761941, "learning_rate": 3.568381132969898e-07, "loss": 2.5864, "step": 265520 }, { "epoch": 11.437308868501528, "learning_rate": 3.5678963132151093e-07, "loss": 2.5962, "step": 265540 }, { "epoch": 11.438170306241116, "learning_rate": 3.5674114934603206e-07, "loss": 2.8055, "step": 265560 }, { "epoch": 11.439031743980705, "learning_rate": 3.566926673705531e-07, "loss": 2.6882, "step": 265580 }, { "epoch": 11.439893181720292, "learning_rate": 3.5664418539507426e-07, "loss": 2.6954, "step": 265600 }, { "epoch": 11.440754619459879, "learning_rate": 3.5659570341959533e-07, "loss": 2.643, "step": 265620 }, { "epoch": 11.441616057199466, "learning_rate": 3.5654722144411645e-07, "loss": 2.8738, "step": 265640 }, { "epoch": 11.442477494939054, "learning_rate": 3.564987394686375e-07, "loss": 2.7065, "step": 265660 }, { "epoch": 11.44333893267864, "learning_rate": 3.564502574931587e-07, "loss": 2.6687, "step": 265680 }, { "epoch": 11.444200370418228, "learning_rate": 3.564017755176797e-07, "loss": 2.9295, "step": 265700 }, { "epoch": 11.445061808157815, "learning_rate": 3.563532935422009e-07, "loss": 2.6549, "step": 265720 }, { "epoch": 11.445923245897403, "learning_rate": 3.5630481156672197e-07, "loss": 2.7723, "step": 265740 }, { "epoch": 11.44678468363699, "learning_rate": 3.562563295912431e-07, "loss": 2.6675, "step": 265760 }, { "epoch": 11.447646121376577, "learning_rate": 3.5620784761576416e-07, "loss": 2.6307, "step": 265780 }, { "epoch": 11.448507559116164, "learning_rate": 3.5615936564028523e-07, "loss": 2.6387, "step": 265800 }, { "epoch": 11.449368996855751, "learning_rate": 3.5611088366480636e-07, "loss": 2.676, "step": 265820 }, { "epoch": 11.45023043459534, "learning_rate": 3.5606240168932743e-07, "loss": 2.7642, "step": 265840 }, { "epoch": 11.451091872334928, "learning_rate": 3.560139197138486e-07, "loss": 2.6546, "step": 265860 }, { "epoch": 11.451953310074515, "learning_rate": 3.559654377383697e-07, "loss": 2.7686, "step": 265880 }, { "epoch": 11.452814747814102, "learning_rate": 3.559169557628908e-07, "loss": 2.6982, "step": 265900 }, { "epoch": 11.45367618555369, "learning_rate": 3.558684737874119e-07, "loss": 2.6708, "step": 265920 }, { "epoch": 11.454537623293277, "learning_rate": 3.5581999181193305e-07, "loss": 2.6241, "step": 265940 }, { "epoch": 11.455399061032864, "learning_rate": 3.5577150983645407e-07, "loss": 2.7323, "step": 265960 }, { "epoch": 11.456260498772451, "learning_rate": 3.5572302786097525e-07, "loss": 2.6763, "step": 265980 }, { "epoch": 11.457121936512038, "learning_rate": 3.556745458854963e-07, "loss": 2.6664, "step": 266000 }, { "epoch": 11.457983374251626, "learning_rate": 3.5562606391001744e-07, "loss": 2.6869, "step": 266020 }, { "epoch": 11.458844811991213, "learning_rate": 3.555775819345385e-07, "loss": 2.7559, "step": 266040 }, { "epoch": 11.4597062497308, "learning_rate": 3.555290999590597e-07, "loss": 2.6276, "step": 266060 }, { "epoch": 11.460567687470387, "learning_rate": 3.554806179835807e-07, "loss": 2.5962, "step": 266080 }, { "epoch": 11.461429125209975, "learning_rate": 3.554321360081019e-07, "loss": 2.5784, "step": 266100 }, { "epoch": 11.462290562949562, "learning_rate": 3.5538365403262296e-07, "loss": 2.6144, "step": 266120 }, { "epoch": 11.463152000689151, "learning_rate": 3.553351720571441e-07, "loss": 2.536, "step": 266140 }, { "epoch": 11.464013438428738, "learning_rate": 3.5528669008166516e-07, "loss": 2.4818, "step": 266160 }, { "epoch": 11.464874876168325, "learning_rate": 3.5523820810618633e-07, "loss": 2.7523, "step": 266180 }, { "epoch": 11.465736313907913, "learning_rate": 3.5518972613070735e-07, "loss": 2.6485, "step": 266200 }, { "epoch": 11.4665977516475, "learning_rate": 3.551412441552285e-07, "loss": 2.5413, "step": 266220 }, { "epoch": 11.467459189387087, "learning_rate": 3.550927621797496e-07, "loss": 2.6649, "step": 266240 }, { "epoch": 11.468320627126674, "learning_rate": 3.550442802042707e-07, "loss": 2.7784, "step": 266260 }, { "epoch": 11.469182064866262, "learning_rate": 3.549957982287918e-07, "loss": 2.6745, "step": 266280 }, { "epoch": 11.470043502605849, "learning_rate": 3.54947316253313e-07, "loss": 2.6983, "step": 266300 }, { "epoch": 11.470904940345436, "learning_rate": 3.5489883427783405e-07, "loss": 2.5935, "step": 266320 }, { "epoch": 11.471766378085023, "learning_rate": 3.5485035230235507e-07, "loss": 2.6699, "step": 266340 }, { "epoch": 11.47262781582461, "learning_rate": 3.5480187032687624e-07, "loss": 2.6687, "step": 266360 }, { "epoch": 11.473489253564198, "learning_rate": 3.5475338835139726e-07, "loss": 2.7936, "step": 266380 }, { "epoch": 11.474350691303787, "learning_rate": 3.5470490637591844e-07, "loss": 2.5499, "step": 266400 }, { "epoch": 11.475212129043374, "learning_rate": 3.546564244004395e-07, "loss": 2.6454, "step": 266420 }, { "epoch": 11.476073566782961, "learning_rate": 3.546079424249607e-07, "loss": 2.5643, "step": 266440 }, { "epoch": 11.476935004522549, "learning_rate": 3.545594604494817e-07, "loss": 2.6276, "step": 266460 }, { "epoch": 11.477796442262136, "learning_rate": 3.545109784740029e-07, "loss": 2.5711, "step": 266480 }, { "epoch": 11.478657880001723, "learning_rate": 3.5446249649852395e-07, "loss": 2.5766, "step": 266500 }, { "epoch": 11.47951931774131, "learning_rate": 3.544140145230451e-07, "loss": 2.8699, "step": 266520 }, { "epoch": 11.480380755480898, "learning_rate": 3.5436553254756615e-07, "loss": 2.6325, "step": 266540 }, { "epoch": 11.481242193220485, "learning_rate": 3.5431705057208733e-07, "loss": 2.7475, "step": 266560 }, { "epoch": 11.482103630960072, "learning_rate": 3.542685685966084e-07, "loss": 2.651, "step": 266580 }, { "epoch": 11.48296506869966, "learning_rate": 3.542200866211295e-07, "loss": 2.7225, "step": 266600 }, { "epoch": 11.483826506439247, "learning_rate": 3.541716046456506e-07, "loss": 2.7533, "step": 266620 }, { "epoch": 11.484687944178834, "learning_rate": 3.5412312267017177e-07, "loss": 2.6442, "step": 266640 }, { "epoch": 11.485549381918421, "learning_rate": 3.540746406946928e-07, "loss": 2.5328, "step": 266660 }, { "epoch": 11.486410819658008, "learning_rate": 3.5402615871921397e-07, "loss": 2.7359, "step": 266680 }, { "epoch": 11.487272257397597, "learning_rate": 3.5397767674373504e-07, "loss": 2.6927, "step": 266700 }, { "epoch": 11.488133695137185, "learning_rate": 3.5392919476825616e-07, "loss": 2.889, "step": 266720 }, { "epoch": 11.488995132876772, "learning_rate": 3.5388071279277724e-07, "loss": 2.6259, "step": 266740 }, { "epoch": 11.48985657061636, "learning_rate": 3.538322308172984e-07, "loss": 2.5006, "step": 266760 }, { "epoch": 11.490718008355946, "learning_rate": 3.5378374884181943e-07, "loss": 2.6701, "step": 266780 }, { "epoch": 11.491579446095534, "learning_rate": 3.537352668663406e-07, "loss": 2.5574, "step": 266800 }, { "epoch": 11.49244088383512, "learning_rate": 3.536867848908617e-07, "loss": 2.7275, "step": 266820 }, { "epoch": 11.493302321574708, "learning_rate": 3.536383029153828e-07, "loss": 2.6698, "step": 266840 }, { "epoch": 11.494163759314295, "learning_rate": 3.535898209399039e-07, "loss": 2.7349, "step": 266860 }, { "epoch": 11.495025197053883, "learning_rate": 3.5354133896442495e-07, "loss": 2.5945, "step": 266880 }, { "epoch": 11.49588663479347, "learning_rate": 3.5349285698894607e-07, "loss": 2.6871, "step": 266900 }, { "epoch": 11.496748072533057, "learning_rate": 3.5344437501346714e-07, "loss": 2.738, "step": 266920 }, { "epoch": 11.497609510272644, "learning_rate": 3.533958930379883e-07, "loss": 2.6153, "step": 266940 }, { "epoch": 11.498470948012232, "learning_rate": 3.533474110625094e-07, "loss": 2.6952, "step": 266960 }, { "epoch": 11.49933238575182, "learning_rate": 3.532989290870305e-07, "loss": 2.6875, "step": 266980 }, { "epoch": 11.500193823491408, "learning_rate": 3.532504471115516e-07, "loss": 2.6754, "step": 267000 }, { "epoch": 11.501055261230995, "learning_rate": 3.5320196513607277e-07, "loss": 2.6195, "step": 267020 }, { "epoch": 11.501916698970582, "learning_rate": 3.531534831605938e-07, "loss": 2.7994, "step": 267040 }, { "epoch": 11.50277813671017, "learning_rate": 3.5310500118511496e-07, "loss": 2.5582, "step": 267060 }, { "epoch": 11.503639574449757, "learning_rate": 3.5305651920963603e-07, "loss": 2.7091, "step": 267080 }, { "epoch": 11.504501012189344, "learning_rate": 3.5300803723415716e-07, "loss": 2.6958, "step": 267100 }, { "epoch": 11.505362449928931, "learning_rate": 3.5295955525867823e-07, "loss": 2.493, "step": 267120 }, { "epoch": 11.506223887668519, "learning_rate": 3.529110732831994e-07, "loss": 2.6581, "step": 267140 }, { "epoch": 11.507085325408106, "learning_rate": 3.528625913077204e-07, "loss": 2.7439, "step": 267160 }, { "epoch": 11.507946763147693, "learning_rate": 3.528141093322416e-07, "loss": 2.6821, "step": 267180 }, { "epoch": 11.50880820088728, "learning_rate": 3.527656273567627e-07, "loss": 2.639, "step": 267200 }, { "epoch": 11.509669638626868, "learning_rate": 3.527171453812838e-07, "loss": 2.7124, "step": 267220 }, { "epoch": 11.510531076366455, "learning_rate": 3.5266866340580487e-07, "loss": 2.7389, "step": 267240 }, { "epoch": 11.511392514106044, "learning_rate": 3.5262018143032605e-07, "loss": 2.5981, "step": 267260 }, { "epoch": 11.512253951845631, "learning_rate": 3.525716994548471e-07, "loss": 2.6497, "step": 267280 }, { "epoch": 11.513115389585218, "learning_rate": 3.5252321747936824e-07, "loss": 2.7268, "step": 267300 }, { "epoch": 11.513976827324806, "learning_rate": 3.5247473550388937e-07, "loss": 2.7139, "step": 267320 }, { "epoch": 11.514838265064393, "learning_rate": 3.524262535284105e-07, "loss": 2.622, "step": 267340 }, { "epoch": 11.51569970280398, "learning_rate": 3.523777715529315e-07, "loss": 2.6994, "step": 267360 }, { "epoch": 11.516561140543567, "learning_rate": 3.523292895774527e-07, "loss": 2.7336, "step": 267380 }, { "epoch": 11.517422578283155, "learning_rate": 3.5228080760197376e-07, "loss": 2.6865, "step": 267400 }, { "epoch": 11.518284016022742, "learning_rate": 3.522323256264948e-07, "loss": 2.5589, "step": 267420 }, { "epoch": 11.519145453762329, "learning_rate": 3.5218384365101596e-07, "loss": 2.5846, "step": 267440 }, { "epoch": 11.520006891501916, "learning_rate": 3.5213536167553703e-07, "loss": 2.6534, "step": 267460 }, { "epoch": 11.520868329241504, "learning_rate": 3.5208687970005815e-07, "loss": 2.6561, "step": 267480 }, { "epoch": 11.52172976698109, "learning_rate": 3.520383977245792e-07, "loss": 2.6449, "step": 267500 }, { "epoch": 11.522591204720678, "learning_rate": 3.519899157491004e-07, "loss": 2.7117, "step": 267520 }, { "epoch": 11.523452642460267, "learning_rate": 3.519414337736214e-07, "loss": 2.7923, "step": 267540 }, { "epoch": 11.524314080199854, "learning_rate": 3.518929517981426e-07, "loss": 2.7483, "step": 267560 }, { "epoch": 11.525175517939442, "learning_rate": 3.5184446982266367e-07, "loss": 2.6203, "step": 267580 }, { "epoch": 11.526036955679029, "learning_rate": 3.517959878471848e-07, "loss": 2.6736, "step": 267600 }, { "epoch": 11.526898393418616, "learning_rate": 3.5174750587170586e-07, "loss": 2.805, "step": 267620 }, { "epoch": 11.527759831158203, "learning_rate": 3.5169902389622704e-07, "loss": 2.6418, "step": 267640 }, { "epoch": 11.52862126889779, "learning_rate": 3.516505419207481e-07, "loss": 2.5387, "step": 267660 }, { "epoch": 11.529482706637378, "learning_rate": 3.5160205994526924e-07, "loss": 2.7673, "step": 267680 }, { "epoch": 11.530344144376965, "learning_rate": 3.515535779697903e-07, "loss": 2.706, "step": 267700 }, { "epoch": 11.531205582116552, "learning_rate": 3.515050959943115e-07, "loss": 2.8014, "step": 267720 }, { "epoch": 11.53206701985614, "learning_rate": 3.514566140188325e-07, "loss": 2.6639, "step": 267740 }, { "epoch": 11.532928457595727, "learning_rate": 3.514081320433537e-07, "loss": 2.5589, "step": 267760 }, { "epoch": 11.533789895335314, "learning_rate": 3.5135965006787475e-07, "loss": 2.7341, "step": 267780 }, { "epoch": 11.534651333074901, "learning_rate": 3.513111680923959e-07, "loss": 2.8104, "step": 267800 }, { "epoch": 11.535512770814488, "learning_rate": 3.5126268611691695e-07, "loss": 2.6073, "step": 267820 }, { "epoch": 11.536374208554077, "learning_rate": 3.5121420414143813e-07, "loss": 2.7501, "step": 267840 }, { "epoch": 11.537235646293665, "learning_rate": 3.5116572216595915e-07, "loss": 2.7052, "step": 267860 }, { "epoch": 11.538097084033252, "learning_rate": 3.511172401904803e-07, "loss": 2.6185, "step": 267880 }, { "epoch": 11.53895852177284, "learning_rate": 3.510687582150014e-07, "loss": 2.5777, "step": 267900 }, { "epoch": 11.539819959512426, "learning_rate": 3.510202762395225e-07, "loss": 2.7751, "step": 267920 }, { "epoch": 11.540681397252014, "learning_rate": 3.509717942640436e-07, "loss": 2.6033, "step": 267940 }, { "epoch": 11.541542834991601, "learning_rate": 3.5092331228856466e-07, "loss": 2.7062, "step": 267960 }, { "epoch": 11.542404272731188, "learning_rate": 3.508748303130858e-07, "loss": 2.7754, "step": 267980 }, { "epoch": 11.543265710470775, "learning_rate": 3.5082634833760686e-07, "loss": 2.6907, "step": 268000 }, { "epoch": 11.544127148210363, "learning_rate": 3.5077786636212804e-07, "loss": 2.6425, "step": 268020 }, { "epoch": 11.54498858594995, "learning_rate": 3.507293843866491e-07, "loss": 2.6458, "step": 268040 }, { "epoch": 11.545850023689537, "learning_rate": 3.5068090241117023e-07, "loss": 2.6482, "step": 268060 }, { "epoch": 11.546711461429124, "learning_rate": 3.506324204356913e-07, "loss": 2.3549, "step": 268080 }, { "epoch": 11.547572899168713, "learning_rate": 3.505839384602125e-07, "loss": 2.6718, "step": 268100 }, { "epoch": 11.5484343369083, "learning_rate": 3.505354564847335e-07, "loss": 2.8284, "step": 268120 }, { "epoch": 11.549295774647888, "learning_rate": 3.504869745092547e-07, "loss": 2.5824, "step": 268140 }, { "epoch": 11.550157212387475, "learning_rate": 3.504384925337757e-07, "loss": 2.6244, "step": 268160 }, { "epoch": 11.551018650127062, "learning_rate": 3.5039001055829687e-07, "loss": 2.6953, "step": 268180 }, { "epoch": 11.55188008786665, "learning_rate": 3.5034152858281794e-07, "loss": 2.6662, "step": 268200 }, { "epoch": 11.552741525606237, "learning_rate": 3.502930466073391e-07, "loss": 2.7127, "step": 268220 }, { "epoch": 11.553602963345824, "learning_rate": 3.5024456463186014e-07, "loss": 2.5874, "step": 268240 }, { "epoch": 11.554464401085411, "learning_rate": 3.501960826563813e-07, "loss": 2.8154, "step": 268260 }, { "epoch": 11.555325838824999, "learning_rate": 3.501476006809024e-07, "loss": 2.7562, "step": 268280 }, { "epoch": 11.556187276564586, "learning_rate": 3.500991187054235e-07, "loss": 2.7339, "step": 268300 }, { "epoch": 11.557048714304173, "learning_rate": 3.500506367299446e-07, "loss": 2.6444, "step": 268320 }, { "epoch": 11.55791015204376, "learning_rate": 3.5000215475446576e-07, "loss": 2.6124, "step": 268340 }, { "epoch": 11.558771589783348, "learning_rate": 3.4995367277898683e-07, "loss": 2.744, "step": 268360 }, { "epoch": 11.559633027522935, "learning_rate": 3.4990519080350796e-07, "loss": 2.7434, "step": 268380 }, { "epoch": 11.560494465262524, "learning_rate": 3.4985670882802903e-07, "loss": 2.7406, "step": 268400 }, { "epoch": 11.561355903002111, "learning_rate": 3.498082268525502e-07, "loss": 2.4582, "step": 268420 }, { "epoch": 11.562217340741698, "learning_rate": 3.497597448770712e-07, "loss": 2.7682, "step": 268440 }, { "epoch": 11.563078778481286, "learning_rate": 3.497112629015924e-07, "loss": 2.6481, "step": 268460 }, { "epoch": 11.563940216220873, "learning_rate": 3.496627809261135e-07, "loss": 2.7401, "step": 268480 }, { "epoch": 11.56480165396046, "learning_rate": 3.496142989506345e-07, "loss": 2.618, "step": 268500 }, { "epoch": 11.565663091700047, "learning_rate": 3.4956581697515567e-07, "loss": 2.6567, "step": 268520 }, { "epoch": 11.566524529439635, "learning_rate": 3.4951733499967674e-07, "loss": 2.7129, "step": 268540 }, { "epoch": 11.567385967179222, "learning_rate": 3.4946885302419787e-07, "loss": 2.5538, "step": 268560 }, { "epoch": 11.568247404918809, "learning_rate": 3.494203710487189e-07, "loss": 2.6239, "step": 268580 }, { "epoch": 11.569108842658396, "learning_rate": 3.493718890732401e-07, "loss": 2.6656, "step": 268600 }, { "epoch": 11.569970280397984, "learning_rate": 3.4932340709776113e-07, "loss": 2.6732, "step": 268620 }, { "epoch": 11.57083171813757, "learning_rate": 3.492749251222823e-07, "loss": 2.6911, "step": 268640 }, { "epoch": 11.57169315587716, "learning_rate": 3.492264431468034e-07, "loss": 2.697, "step": 268660 }, { "epoch": 11.572554593616747, "learning_rate": 3.491779611713245e-07, "loss": 2.6415, "step": 268680 }, { "epoch": 11.573416031356334, "learning_rate": 3.491294791958456e-07, "loss": 2.7255, "step": 268700 }, { "epoch": 11.574277469095922, "learning_rate": 3.4908099722036676e-07, "loss": 2.6439, "step": 268720 }, { "epoch": 11.575138906835509, "learning_rate": 3.4903251524488783e-07, "loss": 2.7398, "step": 268740 }, { "epoch": 11.576000344575096, "learning_rate": 3.4898403326940895e-07, "loss": 2.6276, "step": 268760 }, { "epoch": 11.576861782314683, "learning_rate": 3.4893555129393e-07, "loss": 2.6732, "step": 268780 }, { "epoch": 11.57772322005427, "learning_rate": 3.488870693184512e-07, "loss": 2.6539, "step": 268800 }, { "epoch": 11.578584657793858, "learning_rate": 3.488385873429722e-07, "loss": 2.5968, "step": 268820 }, { "epoch": 11.579446095533445, "learning_rate": 3.487901053674934e-07, "loss": 2.7037, "step": 268840 }, { "epoch": 11.580307533273032, "learning_rate": 3.4874162339201447e-07, "loss": 2.7104, "step": 268860 }, { "epoch": 11.58116897101262, "learning_rate": 3.486931414165356e-07, "loss": 2.816, "step": 268880 }, { "epoch": 11.582030408752207, "learning_rate": 3.4864465944105666e-07, "loss": 2.6485, "step": 268900 }, { "epoch": 11.582891846491794, "learning_rate": 3.4859617746557784e-07, "loss": 2.7153, "step": 268920 }, { "epoch": 11.583753284231381, "learning_rate": 3.4854769549009886e-07, "loss": 2.5963, "step": 268940 }, { "epoch": 11.58461472197097, "learning_rate": 3.4849921351462004e-07, "loss": 2.7317, "step": 268960 }, { "epoch": 11.585476159710558, "learning_rate": 3.484507315391411e-07, "loss": 2.7779, "step": 268980 }, { "epoch": 11.586337597450145, "learning_rate": 3.4840224956366223e-07, "loss": 2.529, "step": 269000 }, { "epoch": 11.587199035189732, "learning_rate": 3.483537675881833e-07, "loss": 2.7229, "step": 269020 }, { "epoch": 11.58806047292932, "learning_rate": 3.483052856127044e-07, "loss": 2.8845, "step": 269040 }, { "epoch": 11.588921910668907, "learning_rate": 3.4825680363722555e-07, "loss": 2.8447, "step": 269060 }, { "epoch": 11.589783348408494, "learning_rate": 3.4820832166174657e-07, "loss": 2.7575, "step": 269080 }, { "epoch": 11.590644786148081, "learning_rate": 3.481598396862678e-07, "loss": 2.5798, "step": 269100 }, { "epoch": 11.591506223887668, "learning_rate": 3.481113577107888e-07, "loss": 2.8075, "step": 269120 }, { "epoch": 11.592367661627256, "learning_rate": 3.4806287573530994e-07, "loss": 2.6302, "step": 269140 }, { "epoch": 11.593229099366843, "learning_rate": 3.48014393759831e-07, "loss": 2.7477, "step": 269160 }, { "epoch": 11.59409053710643, "learning_rate": 3.479659117843522e-07, "loss": 2.7242, "step": 269180 }, { "epoch": 11.594951974846017, "learning_rate": 3.479174298088732e-07, "loss": 2.652, "step": 269200 }, { "epoch": 11.595813412585606, "learning_rate": 3.478689478333944e-07, "loss": 2.6842, "step": 269220 }, { "epoch": 11.596674850325194, "learning_rate": 3.4782046585791546e-07, "loss": 2.6931, "step": 269240 }, { "epoch": 11.59753628806478, "learning_rate": 3.477719838824366e-07, "loss": 2.7179, "step": 269260 }, { "epoch": 11.598397725804368, "learning_rate": 3.4772350190695766e-07, "loss": 2.5345, "step": 269280 }, { "epoch": 11.599259163543955, "learning_rate": 3.4767501993147883e-07, "loss": 2.6438, "step": 269300 }, { "epoch": 11.600120601283542, "learning_rate": 3.4762653795599985e-07, "loss": 2.7808, "step": 269320 }, { "epoch": 11.60098203902313, "learning_rate": 3.4757805598052103e-07, "loss": 2.7043, "step": 269340 }, { "epoch": 11.601843476762717, "learning_rate": 3.475295740050421e-07, "loss": 2.8218, "step": 269360 }, { "epoch": 11.602704914502304, "learning_rate": 3.474810920295632e-07, "loss": 2.6736, "step": 269380 }, { "epoch": 11.603566352241891, "learning_rate": 3.474326100540843e-07, "loss": 2.6597, "step": 269400 }, { "epoch": 11.604427789981479, "learning_rate": 3.473841280786055e-07, "loss": 2.7214, "step": 269420 }, { "epoch": 11.605289227721066, "learning_rate": 3.4733564610312655e-07, "loss": 2.5837, "step": 269440 }, { "epoch": 11.606150665460653, "learning_rate": 3.4728716412764767e-07, "loss": 2.6142, "step": 269460 }, { "epoch": 11.60701210320024, "learning_rate": 3.4723868215216874e-07, "loss": 2.7647, "step": 269480 }, { "epoch": 11.607873540939828, "learning_rate": 3.471902001766899e-07, "loss": 2.6906, "step": 269500 }, { "epoch": 11.608734978679417, "learning_rate": 3.4714171820121094e-07, "loss": 2.6173, "step": 269520 }, { "epoch": 11.609596416419004, "learning_rate": 3.470932362257321e-07, "loss": 2.6231, "step": 269540 }, { "epoch": 11.610457854158591, "learning_rate": 3.470447542502532e-07, "loss": 2.6159, "step": 269560 }, { "epoch": 11.611319291898178, "learning_rate": 3.469962722747742e-07, "loss": 2.5048, "step": 269580 }, { "epoch": 11.612180729637766, "learning_rate": 3.469477902992954e-07, "loss": 2.6746, "step": 269600 }, { "epoch": 11.613042167377353, "learning_rate": 3.4689930832381645e-07, "loss": 2.5379, "step": 269620 }, { "epoch": 11.61390360511694, "learning_rate": 3.468508263483376e-07, "loss": 2.7899, "step": 269640 }, { "epoch": 11.614765042856527, "learning_rate": 3.4680234437285865e-07, "loss": 2.6395, "step": 269660 }, { "epoch": 11.615626480596115, "learning_rate": 3.4675386239737983e-07, "loss": 2.6746, "step": 269680 }, { "epoch": 11.616487918335702, "learning_rate": 3.4670538042190085e-07, "loss": 2.7388, "step": 269700 }, { "epoch": 11.61734935607529, "learning_rate": 3.46656898446422e-07, "loss": 2.5798, "step": 269720 }, { "epoch": 11.618210793814876, "learning_rate": 3.466084164709431e-07, "loss": 2.6728, "step": 269740 }, { "epoch": 11.619072231554464, "learning_rate": 3.465599344954642e-07, "loss": 2.7451, "step": 269760 }, { "epoch": 11.619933669294053, "learning_rate": 3.465114525199853e-07, "loss": 2.5929, "step": 269780 }, { "epoch": 11.62079510703364, "learning_rate": 3.4646297054450647e-07, "loss": 2.6723, "step": 269800 }, { "epoch": 11.621656544773227, "learning_rate": 3.4641448856902754e-07, "loss": 2.7837, "step": 269820 }, { "epoch": 11.622517982512814, "learning_rate": 3.4636600659354866e-07, "loss": 2.6395, "step": 269840 }, { "epoch": 11.623379420252402, "learning_rate": 3.4631752461806974e-07, "loss": 2.8101, "step": 269860 }, { "epoch": 11.624240857991989, "learning_rate": 3.462690426425909e-07, "loss": 2.7942, "step": 269880 }, { "epoch": 11.625102295731576, "learning_rate": 3.4622056066711193e-07, "loss": 2.6571, "step": 269900 }, { "epoch": 11.625963733471163, "learning_rate": 3.461720786916331e-07, "loss": 2.8668, "step": 269920 }, { "epoch": 11.62682517121075, "learning_rate": 3.4612359671615413e-07, "loss": 2.7685, "step": 269940 }, { "epoch": 11.627686608950338, "learning_rate": 3.460751147406753e-07, "loss": 2.7434, "step": 269960 }, { "epoch": 11.628548046689925, "learning_rate": 3.460266327651964e-07, "loss": 2.7319, "step": 269980 }, { "epoch": 11.629409484429512, "learning_rate": 3.4597815078971755e-07, "loss": 2.7151, "step": 270000 }, { "epoch": 11.6302709221691, "learning_rate": 3.4592966881423857e-07, "loss": 2.701, "step": 270020 }, { "epoch": 11.631132359908687, "learning_rate": 3.4588118683875975e-07, "loss": 2.7061, "step": 270040 }, { "epoch": 11.631993797648274, "learning_rate": 3.458327048632808e-07, "loss": 2.4788, "step": 270060 }, { "epoch": 11.632855235387863, "learning_rate": 3.4578422288780195e-07, "loss": 2.6189, "step": 270080 }, { "epoch": 11.63371667312745, "learning_rate": 3.45735740912323e-07, "loss": 2.738, "step": 270100 }, { "epoch": 11.634578110867038, "learning_rate": 3.456872589368441e-07, "loss": 2.5935, "step": 270120 }, { "epoch": 11.635439548606625, "learning_rate": 3.4563877696136527e-07, "loss": 2.7896, "step": 270140 }, { "epoch": 11.636300986346212, "learning_rate": 3.455902949858863e-07, "loss": 2.7385, "step": 270160 }, { "epoch": 11.6371624240858, "learning_rate": 3.4554181301040746e-07, "loss": 2.7946, "step": 270180 }, { "epoch": 11.638023861825387, "learning_rate": 3.4549333103492853e-07, "loss": 2.6333, "step": 270200 }, { "epoch": 11.638885299564974, "learning_rate": 3.4544484905944966e-07, "loss": 2.668, "step": 270220 }, { "epoch": 11.639746737304561, "learning_rate": 3.4539636708397073e-07, "loss": 2.6694, "step": 270240 }, { "epoch": 11.640608175044148, "learning_rate": 3.453478851084919e-07, "loss": 2.4948, "step": 270260 }, { "epoch": 11.641469612783736, "learning_rate": 3.452994031330129e-07, "loss": 2.6425, "step": 270280 }, { "epoch": 11.642331050523323, "learning_rate": 3.452509211575341e-07, "loss": 2.6751, "step": 270300 }, { "epoch": 11.64319248826291, "learning_rate": 3.452024391820552e-07, "loss": 2.6562, "step": 270320 }, { "epoch": 11.644053926002499, "learning_rate": 3.451539572065763e-07, "loss": 2.8837, "step": 270340 }, { "epoch": 11.644915363742086, "learning_rate": 3.451054752310973e-07, "loss": 2.6046, "step": 270360 }, { "epoch": 11.645776801481674, "learning_rate": 3.4505699325561855e-07, "loss": 2.5532, "step": 270380 }, { "epoch": 11.64663823922126, "learning_rate": 3.4500851128013957e-07, "loss": 2.7026, "step": 270400 }, { "epoch": 11.647499676960848, "learning_rate": 3.4496002930466074e-07, "loss": 2.6357, "step": 270420 }, { "epoch": 11.648361114700435, "learning_rate": 3.449115473291818e-07, "loss": 2.7709, "step": 270440 }, { "epoch": 11.649222552440023, "learning_rate": 3.4486306535370294e-07, "loss": 2.6177, "step": 270460 }, { "epoch": 11.65008399017961, "learning_rate": 3.44814583378224e-07, "loss": 2.7173, "step": 270480 }, { "epoch": 11.650945427919197, "learning_rate": 3.447661014027452e-07, "loss": 2.7448, "step": 270500 }, { "epoch": 11.651806865658784, "learning_rate": 3.4471761942726626e-07, "loss": 2.6247, "step": 270520 }, { "epoch": 11.652668303398372, "learning_rate": 3.446691374517874e-07, "loss": 2.6592, "step": 270540 }, { "epoch": 11.653529741137959, "learning_rate": 3.4462065547630846e-07, "loss": 2.7426, "step": 270560 }, { "epoch": 11.654391178877546, "learning_rate": 3.4457217350082963e-07, "loss": 2.6317, "step": 270580 }, { "epoch": 11.655252616617133, "learning_rate": 3.4452369152535065e-07, "loss": 2.6891, "step": 270600 }, { "epoch": 11.65611405435672, "learning_rate": 3.4447520954987183e-07, "loss": 2.7556, "step": 270620 }, { "epoch": 11.65697549209631, "learning_rate": 3.444267275743929e-07, "loss": 2.8023, "step": 270640 }, { "epoch": 11.657836929835897, "learning_rate": 3.443782455989139e-07, "loss": 2.4638, "step": 270660 }, { "epoch": 11.658698367575484, "learning_rate": 3.443297636234351e-07, "loss": 2.7375, "step": 270680 }, { "epoch": 11.659559805315071, "learning_rate": 3.4428128164795617e-07, "loss": 2.4503, "step": 270700 }, { "epoch": 11.660421243054659, "learning_rate": 3.442327996724773e-07, "loss": 2.7243, "step": 270720 }, { "epoch": 11.661282680794246, "learning_rate": 3.4418431769699836e-07, "loss": 2.6981, "step": 270740 }, { "epoch": 11.662144118533833, "learning_rate": 3.4413583572151954e-07, "loss": 2.6666, "step": 270760 }, { "epoch": 11.66300555627342, "learning_rate": 3.440873537460406e-07, "loss": 2.6689, "step": 270780 }, { "epoch": 11.663866994013008, "learning_rate": 3.4403887177056174e-07, "loss": 2.7643, "step": 270800 }, { "epoch": 11.664728431752595, "learning_rate": 3.439903897950828e-07, "loss": 2.5501, "step": 270820 }, { "epoch": 11.665589869492182, "learning_rate": 3.43941907819604e-07, "loss": 2.6787, "step": 270840 }, { "epoch": 11.66645130723177, "learning_rate": 3.43893425844125e-07, "loss": 2.5102, "step": 270860 }, { "epoch": 11.667312744971357, "learning_rate": 3.4384494386864623e-07, "loss": 2.6931, "step": 270880 }, { "epoch": 11.668174182710944, "learning_rate": 3.4379646189316725e-07, "loss": 2.6536, "step": 270900 }, { "epoch": 11.669035620450533, "learning_rate": 3.437479799176884e-07, "loss": 2.4796, "step": 270920 }, { "epoch": 11.66989705819012, "learning_rate": 3.4369949794220945e-07, "loss": 2.5935, "step": 270940 }, { "epoch": 11.670758495929707, "learning_rate": 3.4365101596673063e-07, "loss": 2.744, "step": 270960 }, { "epoch": 11.671619933669295, "learning_rate": 3.4360253399125165e-07, "loss": 2.6618, "step": 270980 }, { "epoch": 11.672481371408882, "learning_rate": 3.435540520157728e-07, "loss": 2.7811, "step": 271000 }, { "epoch": 11.673342809148469, "learning_rate": 3.435055700402939e-07, "loss": 2.656, "step": 271020 }, { "epoch": 11.674204246888056, "learning_rate": 3.43457088064815e-07, "loss": 2.5973, "step": 271040 }, { "epoch": 11.675065684627643, "learning_rate": 3.434086060893361e-07, "loss": 2.6051, "step": 271060 }, { "epoch": 11.67592712236723, "learning_rate": 3.4336012411385727e-07, "loss": 2.8716, "step": 271080 }, { "epoch": 11.676788560106818, "learning_rate": 3.433116421383783e-07, "loss": 2.8101, "step": 271100 }, { "epoch": 11.677649997846405, "learning_rate": 3.4326316016289946e-07, "loss": 2.634, "step": 271120 }, { "epoch": 11.678511435585992, "learning_rate": 3.4321467818742053e-07, "loss": 2.6308, "step": 271140 }, { "epoch": 11.67937287332558, "learning_rate": 3.4316619621194166e-07, "loss": 2.7381, "step": 271160 }, { "epoch": 11.680234311065167, "learning_rate": 3.4311771423646273e-07, "loss": 2.8653, "step": 271180 }, { "epoch": 11.681095748804754, "learning_rate": 3.430692322609838e-07, "loss": 2.5815, "step": 271200 }, { "epoch": 11.681957186544343, "learning_rate": 3.43020750285505e-07, "loss": 2.6845, "step": 271220 }, { "epoch": 11.68281862428393, "learning_rate": 3.42972268310026e-07, "loss": 2.638, "step": 271240 }, { "epoch": 11.683680062023518, "learning_rate": 3.429237863345472e-07, "loss": 2.7026, "step": 271260 }, { "epoch": 11.684541499763105, "learning_rate": 3.4287530435906825e-07, "loss": 2.5727, "step": 271280 }, { "epoch": 11.685402937502692, "learning_rate": 3.428268223835894e-07, "loss": 2.6657, "step": 271300 }, { "epoch": 11.68626437524228, "learning_rate": 3.4277834040811044e-07, "loss": 2.6757, "step": 271320 }, { "epoch": 11.687125812981867, "learning_rate": 3.427298584326316e-07, "loss": 2.5676, "step": 271340 }, { "epoch": 11.687987250721454, "learning_rate": 3.4268137645715264e-07, "loss": 2.6191, "step": 271360 }, { "epoch": 11.688848688461041, "learning_rate": 3.426328944816738e-07, "loss": 2.6184, "step": 271380 }, { "epoch": 11.689710126200628, "learning_rate": 3.425844125061949e-07, "loss": 2.6617, "step": 271400 }, { "epoch": 11.690571563940216, "learning_rate": 3.42535930530716e-07, "loss": 2.5532, "step": 271420 }, { "epoch": 11.691433001679803, "learning_rate": 3.424874485552371e-07, "loss": 2.5707, "step": 271440 }, { "epoch": 11.69229443941939, "learning_rate": 3.4243896657975826e-07, "loss": 2.7049, "step": 271460 }, { "epoch": 11.69315587715898, "learning_rate": 3.423904846042793e-07, "loss": 2.671, "step": 271480 }, { "epoch": 11.694017314898566, "learning_rate": 3.4234200262880046e-07, "loss": 2.6403, "step": 271500 }, { "epoch": 11.694878752638154, "learning_rate": 3.4229352065332153e-07, "loss": 2.8709, "step": 271520 }, { "epoch": 11.695740190377741, "learning_rate": 3.4224503867784265e-07, "loss": 2.6761, "step": 271540 }, { "epoch": 11.696601628117328, "learning_rate": 3.421965567023637e-07, "loss": 2.6105, "step": 271560 }, { "epoch": 11.697463065856915, "learning_rate": 3.421480747268849e-07, "loss": 2.6849, "step": 271580 }, { "epoch": 11.698324503596503, "learning_rate": 3.4209959275140597e-07, "loss": 2.7617, "step": 271600 }, { "epoch": 11.69918594133609, "learning_rate": 3.420511107759271e-07, "loss": 2.7185, "step": 271620 }, { "epoch": 11.700047379075677, "learning_rate": 3.4200262880044817e-07, "loss": 2.7364, "step": 271640 }, { "epoch": 11.700908816815264, "learning_rate": 3.4195414682496935e-07, "loss": 2.6421, "step": 271660 }, { "epoch": 11.701770254554852, "learning_rate": 3.4190566484949037e-07, "loss": 2.6225, "step": 271680 }, { "epoch": 11.702631692294439, "learning_rate": 3.4185718287401154e-07, "loss": 2.812, "step": 271700 }, { "epoch": 11.703493130034026, "learning_rate": 3.4180870089853256e-07, "loss": 2.7415, "step": 271720 }, { "epoch": 11.704354567773613, "learning_rate": 3.4176021892305363e-07, "loss": 2.6548, "step": 271740 }, { "epoch": 11.7052160055132, "learning_rate": 3.417117369475748e-07, "loss": 2.7165, "step": 271760 }, { "epoch": 11.70607744325279, "learning_rate": 3.416632549720959e-07, "loss": 2.5979, "step": 271780 }, { "epoch": 11.706938880992377, "learning_rate": 3.41614772996617e-07, "loss": 2.7294, "step": 271800 }, { "epoch": 11.707800318731964, "learning_rate": 3.415662910211381e-07, "loss": 2.8422, "step": 271820 }, { "epoch": 11.708661756471551, "learning_rate": 3.4151780904565925e-07, "loss": 2.6919, "step": 271840 }, { "epoch": 11.709523194211139, "learning_rate": 3.414693270701803e-07, "loss": 2.7331, "step": 271860 }, { "epoch": 11.710384631950726, "learning_rate": 3.4142084509470145e-07, "loss": 2.8226, "step": 271880 }, { "epoch": 11.711246069690313, "learning_rate": 3.413723631192225e-07, "loss": 2.6908, "step": 271900 }, { "epoch": 11.7121075074299, "learning_rate": 3.413238811437437e-07, "loss": 2.595, "step": 271920 }, { "epoch": 11.712968945169488, "learning_rate": 3.412753991682647e-07, "loss": 2.7196, "step": 271940 }, { "epoch": 11.713830382909075, "learning_rate": 3.412269171927859e-07, "loss": 2.8116, "step": 271960 }, { "epoch": 11.714691820648662, "learning_rate": 3.4117843521730697e-07, "loss": 2.7604, "step": 271980 }, { "epoch": 11.71555325838825, "learning_rate": 3.411299532418281e-07, "loss": 2.5667, "step": 272000 }, { "epoch": 11.716414696127837, "learning_rate": 3.4108147126634916e-07, "loss": 2.5846, "step": 272020 }, { "epoch": 11.717276133867426, "learning_rate": 3.4103298929087034e-07, "loss": 2.6128, "step": 272040 }, { "epoch": 11.718137571607013, "learning_rate": 3.4098450731539136e-07, "loss": 2.686, "step": 272060 }, { "epoch": 11.7189990093466, "learning_rate": 3.4093602533991254e-07, "loss": 2.6042, "step": 272080 }, { "epoch": 11.719860447086187, "learning_rate": 3.408875433644336e-07, "loss": 2.6227, "step": 272100 }, { "epoch": 11.720721884825775, "learning_rate": 3.4083906138895473e-07, "loss": 2.7469, "step": 272120 }, { "epoch": 11.721583322565362, "learning_rate": 3.4079057941347575e-07, "loss": 2.5832, "step": 272140 }, { "epoch": 11.722444760304949, "learning_rate": 3.40742097437997e-07, "loss": 2.7955, "step": 272160 }, { "epoch": 11.723306198044536, "learning_rate": 3.40693615462518e-07, "loss": 2.7671, "step": 272180 }, { "epoch": 11.724167635784124, "learning_rate": 3.406451334870392e-07, "loss": 2.5575, "step": 272200 }, { "epoch": 11.72502907352371, "learning_rate": 3.4059665151156025e-07, "loss": 2.565, "step": 272220 }, { "epoch": 11.725890511263298, "learning_rate": 3.4054816953608137e-07, "loss": 2.6498, "step": 272240 }, { "epoch": 11.726751949002885, "learning_rate": 3.4049968756060244e-07, "loss": 2.6399, "step": 272260 }, { "epoch": 11.727613386742473, "learning_rate": 3.404512055851235e-07, "loss": 2.6688, "step": 272280 }, { "epoch": 11.72847482448206, "learning_rate": 3.404027236096447e-07, "loss": 2.6612, "step": 272300 }, { "epoch": 11.729336262221647, "learning_rate": 3.403542416341657e-07, "loss": 2.7149, "step": 272320 }, { "epoch": 11.730197699961236, "learning_rate": 3.403057596586869e-07, "loss": 2.6162, "step": 272340 }, { "epoch": 11.731059137700823, "learning_rate": 3.4025727768320796e-07, "loss": 2.7494, "step": 272360 }, { "epoch": 11.73192057544041, "learning_rate": 3.402087957077291e-07, "loss": 2.6323, "step": 272380 }, { "epoch": 11.732782013179998, "learning_rate": 3.4016031373225016e-07, "loss": 2.5218, "step": 272400 }, { "epoch": 11.733643450919585, "learning_rate": 3.4011183175677133e-07, "loss": 2.7708, "step": 272420 }, { "epoch": 11.734504888659172, "learning_rate": 3.4006334978129235e-07, "loss": 2.5395, "step": 272440 }, { "epoch": 11.73536632639876, "learning_rate": 3.4001486780581353e-07, "loss": 2.5683, "step": 272460 }, { "epoch": 11.736227764138347, "learning_rate": 3.399663858303346e-07, "loss": 2.6572, "step": 272480 }, { "epoch": 11.737089201877934, "learning_rate": 3.399179038548557e-07, "loss": 2.5511, "step": 272500 }, { "epoch": 11.737950639617521, "learning_rate": 3.398694218793768e-07, "loss": 2.6618, "step": 272520 }, { "epoch": 11.738812077357109, "learning_rate": 3.39820939903898e-07, "loss": 2.6413, "step": 272540 }, { "epoch": 11.739673515096696, "learning_rate": 3.3977245792841905e-07, "loss": 2.5724, "step": 272560 }, { "epoch": 11.740534952836283, "learning_rate": 3.3972397595294017e-07, "loss": 2.5351, "step": 272580 }, { "epoch": 11.741396390575872, "learning_rate": 3.3967549397746124e-07, "loss": 2.7165, "step": 272600 }, { "epoch": 11.74225782831546, "learning_rate": 3.396270120019824e-07, "loss": 2.5943, "step": 272620 }, { "epoch": 11.743119266055047, "learning_rate": 3.3957853002650344e-07, "loss": 2.6535, "step": 272640 }, { "epoch": 11.743980703794634, "learning_rate": 3.3953004805102467e-07, "loss": 2.8194, "step": 272660 }, { "epoch": 11.744842141534221, "learning_rate": 3.394815660755457e-07, "loss": 2.7655, "step": 272680 }, { "epoch": 11.745703579273808, "learning_rate": 3.394330841000668e-07, "loss": 2.6841, "step": 272700 }, { "epoch": 11.746565017013396, "learning_rate": 3.393846021245879e-07, "loss": 2.7402, "step": 272720 }, { "epoch": 11.747426454752983, "learning_rate": 3.3933612014910906e-07, "loss": 2.5801, "step": 272740 }, { "epoch": 11.74828789249257, "learning_rate": 3.392876381736301e-07, "loss": 2.7298, "step": 272760 }, { "epoch": 11.749149330232157, "learning_rate": 3.3923915619815126e-07, "loss": 2.5652, "step": 272780 }, { "epoch": 11.750010767971744, "learning_rate": 3.3919067422267233e-07, "loss": 2.7261, "step": 272800 }, { "epoch": 11.750872205711332, "learning_rate": 3.3914219224719335e-07, "loss": 2.4542, "step": 272820 }, { "epoch": 11.751733643450919, "learning_rate": 3.390937102717145e-07, "loss": 2.7875, "step": 272840 }, { "epoch": 11.752595081190506, "learning_rate": 3.390452282962356e-07, "loss": 2.6517, "step": 272860 }, { "epoch": 11.753456518930093, "learning_rate": 3.389967463207567e-07, "loss": 2.6066, "step": 272880 }, { "epoch": 11.754317956669682, "learning_rate": 3.389482643452778e-07, "loss": 2.6276, "step": 272900 }, { "epoch": 11.75517939440927, "learning_rate": 3.3889978236979897e-07, "loss": 2.6729, "step": 272920 }, { "epoch": 11.756040832148857, "learning_rate": 3.3885130039432004e-07, "loss": 2.7096, "step": 272940 }, { "epoch": 11.756902269888444, "learning_rate": 3.3880281841884116e-07, "loss": 2.686, "step": 272960 }, { "epoch": 11.757763707628031, "learning_rate": 3.3875433644336224e-07, "loss": 2.7698, "step": 272980 }, { "epoch": 11.758625145367619, "learning_rate": 3.387058544678834e-07, "loss": 2.6513, "step": 273000 }, { "epoch": 11.759486583107206, "learning_rate": 3.3865737249240443e-07, "loss": 2.8697, "step": 273020 }, { "epoch": 11.760348020846793, "learning_rate": 3.386088905169256e-07, "loss": 2.8073, "step": 273040 }, { "epoch": 11.76120945858638, "learning_rate": 3.385604085414467e-07, "loss": 2.6398, "step": 273060 }, { "epoch": 11.762070896325968, "learning_rate": 3.3851192656596786e-07, "loss": 2.6703, "step": 273080 }, { "epoch": 11.762932334065555, "learning_rate": 3.384634445904889e-07, "loss": 2.6378, "step": 273100 }, { "epoch": 11.763793771805142, "learning_rate": 3.3841496261501005e-07, "loss": 2.627, "step": 273120 }, { "epoch": 11.76465520954473, "learning_rate": 3.383664806395311e-07, "loss": 2.6154, "step": 273140 }, { "epoch": 11.765516647284318, "learning_rate": 3.3831799866405225e-07, "loss": 2.8121, "step": 273160 }, { "epoch": 11.766378085023906, "learning_rate": 3.382695166885733e-07, "loss": 2.6651, "step": 273180 }, { "epoch": 11.767239522763493, "learning_rate": 3.3822103471309445e-07, "loss": 2.716, "step": 273200 }, { "epoch": 11.76810096050308, "learning_rate": 3.381725527376155e-07, "loss": 2.5994, "step": 273220 }, { "epoch": 11.768962398242667, "learning_rate": 3.381240707621367e-07, "loss": 2.7422, "step": 273240 }, { "epoch": 11.769823835982255, "learning_rate": 3.3807558878665777e-07, "loss": 2.7483, "step": 273260 }, { "epoch": 11.770685273721842, "learning_rate": 3.380271068111789e-07, "loss": 2.6502, "step": 273280 }, { "epoch": 11.77154671146143, "learning_rate": 3.3797862483569996e-07, "loss": 2.771, "step": 273300 }, { "epoch": 11.772408149201016, "learning_rate": 3.379301428602211e-07, "loss": 2.7438, "step": 273320 }, { "epoch": 11.773269586940604, "learning_rate": 3.3788166088474216e-07, "loss": 2.7458, "step": 273340 }, { "epoch": 11.774131024680191, "learning_rate": 3.3783317890926323e-07, "loss": 2.6119, "step": 273360 }, { "epoch": 11.774992462419778, "learning_rate": 3.377846969337844e-07, "loss": 2.7765, "step": 273380 }, { "epoch": 11.775853900159365, "learning_rate": 3.377362149583055e-07, "loss": 2.5885, "step": 273400 }, { "epoch": 11.776715337898953, "learning_rate": 3.376877329828266e-07, "loss": 2.6772, "step": 273420 }, { "epoch": 11.77757677563854, "learning_rate": 3.376392510073476e-07, "loss": 2.8632, "step": 273440 }, { "epoch": 11.778438213378129, "learning_rate": 3.375907690318688e-07, "loss": 2.758, "step": 273460 }, { "epoch": 11.779299651117716, "learning_rate": 3.3754228705638987e-07, "loss": 2.4698, "step": 273480 }, { "epoch": 11.780161088857303, "learning_rate": 3.37493805080911e-07, "loss": 2.7331, "step": 273500 }, { "epoch": 11.78102252659689, "learning_rate": 3.374453231054321e-07, "loss": 2.7932, "step": 273520 }, { "epoch": 11.781883964336478, "learning_rate": 3.3739684112995324e-07, "loss": 2.6025, "step": 273540 }, { "epoch": 11.782745402076065, "learning_rate": 3.373483591544743e-07, "loss": 2.5503, "step": 273560 }, { "epoch": 11.783606839815652, "learning_rate": 3.3729987717899544e-07, "loss": 2.7673, "step": 273580 }, { "epoch": 11.78446827755524, "learning_rate": 3.372513952035165e-07, "loss": 2.5853, "step": 273600 }, { "epoch": 11.785329715294827, "learning_rate": 3.372029132280377e-07, "loss": 2.6442, "step": 273620 }, { "epoch": 11.786191153034414, "learning_rate": 3.3715443125255876e-07, "loss": 2.7395, "step": 273640 }, { "epoch": 11.787052590774001, "learning_rate": 3.371059492770799e-07, "loss": 2.5884, "step": 273660 }, { "epoch": 11.787914028513589, "learning_rate": 3.3705746730160096e-07, "loss": 2.6469, "step": 273680 }, { "epoch": 11.788775466253176, "learning_rate": 3.370089853261221e-07, "loss": 2.6048, "step": 273700 }, { "epoch": 11.789636903992763, "learning_rate": 3.3696050335064315e-07, "loss": 2.5812, "step": 273720 }, { "epoch": 11.790498341732352, "learning_rate": 3.3691202137516433e-07, "loss": 2.8409, "step": 273740 }, { "epoch": 11.79135977947194, "learning_rate": 3.368635393996854e-07, "loss": 2.5859, "step": 273760 }, { "epoch": 11.792221217211527, "learning_rate": 3.368150574242066e-07, "loss": 2.7016, "step": 273780 }, { "epoch": 11.793082654951114, "learning_rate": 3.367665754487276e-07, "loss": 2.8266, "step": 273800 }, { "epoch": 11.793944092690701, "learning_rate": 3.367180934732487e-07, "loss": 2.6032, "step": 273820 }, { "epoch": 11.794805530430288, "learning_rate": 3.366696114977698e-07, "loss": 2.6875, "step": 273840 }, { "epoch": 11.795666968169876, "learning_rate": 3.3662112952229086e-07, "loss": 2.6273, "step": 273860 }, { "epoch": 11.796528405909463, "learning_rate": 3.3657264754681204e-07, "loss": 2.6289, "step": 273880 }, { "epoch": 11.79738984364905, "learning_rate": 3.365241655713331e-07, "loss": 2.6991, "step": 273900 }, { "epoch": 11.798251281388637, "learning_rate": 3.364756835958542e-07, "loss": 2.6168, "step": 273920 }, { "epoch": 11.799112719128225, "learning_rate": 3.364272016203753e-07, "loss": 2.5049, "step": 273940 }, { "epoch": 11.799974156867812, "learning_rate": 3.3637871964489643e-07, "loss": 2.8692, "step": 273960 }, { "epoch": 11.800835594607399, "learning_rate": 3.363302376694175e-07, "loss": 2.6287, "step": 273980 }, { "epoch": 11.801697032346986, "learning_rate": 3.362817556939387e-07, "loss": 2.6123, "step": 274000 }, { "epoch": 11.802558470086574, "learning_rate": 3.3623327371845975e-07, "loss": 2.5744, "step": 274020 }, { "epoch": 11.803419907826163, "learning_rate": 3.361847917429809e-07, "loss": 2.5607, "step": 274040 }, { "epoch": 11.80428134556575, "learning_rate": 3.3613630976750195e-07, "loss": 2.7899, "step": 274060 }, { "epoch": 11.805142783305337, "learning_rate": 3.3608782779202307e-07, "loss": 2.8176, "step": 274080 }, { "epoch": 11.806004221044924, "learning_rate": 3.3603934581654414e-07, "loss": 2.6875, "step": 274100 }, { "epoch": 11.806865658784512, "learning_rate": 3.359908638410653e-07, "loss": 2.7196, "step": 274120 }, { "epoch": 11.807727096524099, "learning_rate": 3.359423818655864e-07, "loss": 2.8055, "step": 274140 }, { "epoch": 11.808588534263686, "learning_rate": 3.3589389989010757e-07, "loss": 2.5748, "step": 274160 }, { "epoch": 11.809449972003273, "learning_rate": 3.358454179146286e-07, "loss": 2.6205, "step": 274180 }, { "epoch": 11.81031140974286, "learning_rate": 3.357969359391497e-07, "loss": 2.774, "step": 274200 }, { "epoch": 11.811172847482448, "learning_rate": 3.357484539636708e-07, "loss": 2.6148, "step": 274220 }, { "epoch": 11.812034285222035, "learning_rate": 3.3569997198819196e-07, "loss": 2.6291, "step": 274240 }, { "epoch": 11.812895722961622, "learning_rate": 3.3565149001271303e-07, "loss": 2.5182, "step": 274260 }, { "epoch": 11.81375716070121, "learning_rate": 3.356030080372342e-07, "loss": 2.7443, "step": 274280 }, { "epoch": 11.814618598440799, "learning_rate": 3.3555452606175523e-07, "loss": 2.8341, "step": 274300 }, { "epoch": 11.815480036180386, "learning_rate": 3.355060440862764e-07, "loss": 2.6137, "step": 274320 }, { "epoch": 11.816341473919973, "learning_rate": 3.354575621107975e-07, "loss": 2.6959, "step": 274340 }, { "epoch": 11.81720291165956, "learning_rate": 3.354090801353186e-07, "loss": 2.8004, "step": 274360 }, { "epoch": 11.818064349399148, "learning_rate": 3.353605981598397e-07, "loss": 2.7277, "step": 274380 }, { "epoch": 11.818925787138735, "learning_rate": 3.3531211618436075e-07, "loss": 2.6774, "step": 274400 }, { "epoch": 11.819787224878322, "learning_rate": 3.352636342088819e-07, "loss": 2.8918, "step": 274420 }, { "epoch": 11.82064866261791, "learning_rate": 3.3521515223340294e-07, "loss": 2.6623, "step": 274440 }, { "epoch": 11.821510100357496, "learning_rate": 3.3516667025792407e-07, "loss": 2.6023, "step": 274460 }, { "epoch": 11.822371538097084, "learning_rate": 3.3511818828244514e-07, "loss": 2.4951, "step": 274480 }, { "epoch": 11.823232975836671, "learning_rate": 3.350697063069663e-07, "loss": 2.6299, "step": 274500 }, { "epoch": 11.824094413576258, "learning_rate": 3.350212243314874e-07, "loss": 2.6153, "step": 274520 }, { "epoch": 11.824955851315845, "learning_rate": 3.3497274235600856e-07, "loss": 2.6757, "step": 274540 }, { "epoch": 11.825817289055433, "learning_rate": 3.349242603805296e-07, "loss": 2.6486, "step": 274560 }, { "epoch": 11.82667872679502, "learning_rate": 3.348757784050507e-07, "loss": 2.7478, "step": 274580 }, { "epoch": 11.827540164534609, "learning_rate": 3.348272964295718e-07, "loss": 2.6985, "step": 274600 }, { "epoch": 11.828401602274196, "learning_rate": 3.3477881445409296e-07, "loss": 2.6069, "step": 274620 }, { "epoch": 11.829263040013783, "learning_rate": 3.3473033247861403e-07, "loss": 2.7162, "step": 274640 }, { "epoch": 11.83012447775337, "learning_rate": 3.346818505031352e-07, "loss": 2.5967, "step": 274660 }, { "epoch": 11.830985915492958, "learning_rate": 3.346333685276562e-07, "loss": 2.7461, "step": 274680 }, { "epoch": 11.831847353232545, "learning_rate": 3.345848865521774e-07, "loss": 2.6767, "step": 274700 }, { "epoch": 11.832708790972132, "learning_rate": 3.345364045766984e-07, "loss": 2.7876, "step": 274720 }, { "epoch": 11.83357022871172, "learning_rate": 3.344879226012196e-07, "loss": 2.9248, "step": 274740 }, { "epoch": 11.834431666451307, "learning_rate": 3.3443944062574067e-07, "loss": 2.6269, "step": 274760 }, { "epoch": 11.835293104190894, "learning_rate": 3.3439095865026185e-07, "loss": 2.7569, "step": 274780 }, { "epoch": 11.836154541930481, "learning_rate": 3.343424766747829e-07, "loss": 2.5816, "step": 274800 }, { "epoch": 11.837015979670069, "learning_rate": 3.3429399469930404e-07, "loss": 2.7404, "step": 274820 }, { "epoch": 11.837877417409656, "learning_rate": 3.3424551272382506e-07, "loss": 2.6581, "step": 274840 }, { "epoch": 11.838738855149245, "learning_rate": 3.341970307483463e-07, "loss": 2.6549, "step": 274860 }, { "epoch": 11.839600292888832, "learning_rate": 3.341485487728673e-07, "loss": 2.6798, "step": 274880 }, { "epoch": 11.84046173062842, "learning_rate": 3.341000667973885e-07, "loss": 2.7292, "step": 274900 }, { "epoch": 11.841323168368007, "learning_rate": 3.3405158482190956e-07, "loss": 2.7619, "step": 274920 }, { "epoch": 11.842184606107594, "learning_rate": 3.340031028464306e-07, "loss": 2.8162, "step": 274940 }, { "epoch": 11.843046043847181, "learning_rate": 3.3395462087095175e-07, "loss": 2.7504, "step": 274960 }, { "epoch": 11.843907481586768, "learning_rate": 3.3390613889547277e-07, "loss": 2.8307, "step": 274980 }, { "epoch": 11.844768919326356, "learning_rate": 3.3385765691999395e-07, "loss": 2.6568, "step": 275000 }, { "epoch": 11.845630357065943, "learning_rate": 3.338091749445151e-07, "loss": 2.675, "step": 275020 }, { "epoch": 11.84649179480553, "learning_rate": 3.337606929690362e-07, "loss": 2.7465, "step": 275040 }, { "epoch": 11.847353232545117, "learning_rate": 3.337122109935572e-07, "loss": 2.7152, "step": 275060 }, { "epoch": 11.848214670284705, "learning_rate": 3.336637290180784e-07, "loss": 2.6036, "step": 275080 }, { "epoch": 11.849076108024292, "learning_rate": 3.336152470425994e-07, "loss": 2.6355, "step": 275100 }, { "epoch": 11.84993754576388, "learning_rate": 3.335667650671206e-07, "loss": 2.6826, "step": 275120 }, { "epoch": 11.850798983503466, "learning_rate": 3.3351828309164166e-07, "loss": 2.689, "step": 275140 }, { "epoch": 11.851660421243055, "learning_rate": 3.3346980111616284e-07, "loss": 2.829, "step": 275160 }, { "epoch": 11.852521858982643, "learning_rate": 3.334213191406839e-07, "loss": 2.7426, "step": 275180 }, { "epoch": 11.85338329672223, "learning_rate": 3.3337283716520504e-07, "loss": 2.7357, "step": 275200 }, { "epoch": 11.854244734461817, "learning_rate": 3.3332435518972605e-07, "loss": 2.651, "step": 275220 }, { "epoch": 11.855106172201404, "learning_rate": 3.3327587321424723e-07, "loss": 2.6034, "step": 275240 }, { "epoch": 11.855967609940992, "learning_rate": 3.332273912387683e-07, "loss": 2.6322, "step": 275260 }, { "epoch": 11.856829047680579, "learning_rate": 3.3317890926328943e-07, "loss": 2.7469, "step": 275280 }, { "epoch": 11.857690485420166, "learning_rate": 3.3313042728781055e-07, "loss": 2.7669, "step": 275300 }, { "epoch": 11.858551923159753, "learning_rate": 3.330819453123317e-07, "loss": 2.7214, "step": 275320 }, { "epoch": 11.85941336089934, "learning_rate": 3.3303346333685275e-07, "loss": 2.7032, "step": 275340 }, { "epoch": 11.860274798638928, "learning_rate": 3.3298498136137387e-07, "loss": 2.63, "step": 275360 }, { "epoch": 11.861136236378515, "learning_rate": 3.3293649938589494e-07, "loss": 2.5894, "step": 275380 }, { "epoch": 11.861997674118102, "learning_rate": 3.328880174104161e-07, "loss": 2.6414, "step": 275400 }, { "epoch": 11.862859111857691, "learning_rate": 3.328395354349372e-07, "loss": 2.6788, "step": 275420 }, { "epoch": 11.863720549597279, "learning_rate": 3.327910534594583e-07, "loss": 2.5572, "step": 275440 }, { "epoch": 11.864581987336866, "learning_rate": 3.327425714839794e-07, "loss": 2.6496, "step": 275460 }, { "epoch": 11.865443425076453, "learning_rate": 3.326940895085004e-07, "loss": 2.5556, "step": 275480 }, { "epoch": 11.86630486281604, "learning_rate": 3.326456075330216e-07, "loss": 2.6158, "step": 275500 }, { "epoch": 11.867166300555628, "learning_rate": 3.3259712555754266e-07, "loss": 2.632, "step": 275520 }, { "epoch": 11.868027738295215, "learning_rate": 3.3254864358206383e-07, "loss": 2.7011, "step": 275540 }, { "epoch": 11.868889176034802, "learning_rate": 3.325001616065849e-07, "loss": 2.5745, "step": 275560 }, { "epoch": 11.86975061377439, "learning_rate": 3.3245167963110603e-07, "loss": 2.6675, "step": 275580 }, { "epoch": 11.870612051513977, "learning_rate": 3.324031976556271e-07, "loss": 2.5639, "step": 275600 }, { "epoch": 11.871473489253564, "learning_rate": 3.323547156801482e-07, "loss": 2.6428, "step": 275620 }, { "epoch": 11.872334926993151, "learning_rate": 3.323062337046693e-07, "loss": 2.6425, "step": 275640 }, { "epoch": 11.873196364732738, "learning_rate": 3.322577517291905e-07, "loss": 2.5434, "step": 275660 }, { "epoch": 11.874057802472326, "learning_rate": 3.3220926975371155e-07, "loss": 2.6906, "step": 275680 }, { "epoch": 11.874919240211913, "learning_rate": 3.321607877782326e-07, "loss": 2.6468, "step": 275700 }, { "epoch": 11.875780677951502, "learning_rate": 3.3211230580275374e-07, "loss": 2.6107, "step": 275720 }, { "epoch": 11.876642115691089, "learning_rate": 3.3206382382727487e-07, "loss": 2.6303, "step": 275740 }, { "epoch": 11.877503553430676, "learning_rate": 3.3201534185179594e-07, "loss": 2.6305, "step": 275760 }, { "epoch": 11.878364991170264, "learning_rate": 3.319668598763171e-07, "loss": 2.5388, "step": 275780 }, { "epoch": 11.87922642890985, "learning_rate": 3.319183779008382e-07, "loss": 2.6873, "step": 275800 }, { "epoch": 11.880087866649438, "learning_rate": 3.318698959253593e-07, "loss": 2.6399, "step": 275820 }, { "epoch": 11.880949304389025, "learning_rate": 3.318214139498804e-07, "loss": 2.687, "step": 275840 }, { "epoch": 11.881810742128613, "learning_rate": 3.317729319744015e-07, "loss": 2.5969, "step": 275860 }, { "epoch": 11.8826721798682, "learning_rate": 3.317244499989226e-07, "loss": 2.6786, "step": 275880 }, { "epoch": 11.883533617607787, "learning_rate": 3.3167596802344376e-07, "loss": 2.625, "step": 275900 }, { "epoch": 11.884395055347374, "learning_rate": 3.3162748604796483e-07, "loss": 2.6764, "step": 275920 }, { "epoch": 11.885256493086962, "learning_rate": 3.31579004072486e-07, "loss": 2.7203, "step": 275940 }, { "epoch": 11.886117930826549, "learning_rate": 3.31530522097007e-07, "loss": 2.7822, "step": 275960 }, { "epoch": 11.886979368566138, "learning_rate": 3.3148204012152815e-07, "loss": 2.5802, "step": 275980 }, { "epoch": 11.887840806305725, "learning_rate": 3.314335581460492e-07, "loss": 2.6592, "step": 276000 }, { "epoch": 11.888702244045312, "learning_rate": 3.313850761705703e-07, "loss": 2.7089, "step": 276020 }, { "epoch": 11.8895636817849, "learning_rate": 3.3133659419509147e-07, "loss": 2.6674, "step": 276040 }, { "epoch": 11.890425119524487, "learning_rate": 3.3128811221961254e-07, "loss": 2.784, "step": 276060 }, { "epoch": 11.891286557264074, "learning_rate": 3.3123963024413366e-07, "loss": 2.6551, "step": 276080 }, { "epoch": 11.892147995003661, "learning_rate": 3.3119114826865474e-07, "loss": 2.6224, "step": 276100 }, { "epoch": 11.893009432743249, "learning_rate": 3.311426662931759e-07, "loss": 2.7288, "step": 276120 }, { "epoch": 11.893870870482836, "learning_rate": 3.3109418431769693e-07, "loss": 2.7251, "step": 276140 }, { "epoch": 11.894732308222423, "learning_rate": 3.310457023422181e-07, "loss": 2.5628, "step": 276160 }, { "epoch": 11.89559374596201, "learning_rate": 3.309972203667392e-07, "loss": 2.6521, "step": 276180 }, { "epoch": 11.896455183701597, "learning_rate": 3.3094873839126036e-07, "loss": 2.6316, "step": 276200 }, { "epoch": 11.897316621441185, "learning_rate": 3.309002564157814e-07, "loss": 2.5206, "step": 276220 }, { "epoch": 11.898178059180772, "learning_rate": 3.308517744403025e-07, "loss": 2.6421, "step": 276240 }, { "epoch": 11.89903949692036, "learning_rate": 3.3080329246482357e-07, "loss": 2.6335, "step": 276260 }, { "epoch": 11.899900934659948, "learning_rate": 3.3075481048934475e-07, "loss": 2.6249, "step": 276280 }, { "epoch": 11.900762372399535, "learning_rate": 3.307063285138658e-07, "loss": 2.7035, "step": 276300 }, { "epoch": 11.901623810139123, "learning_rate": 3.30657846538387e-07, "loss": 2.6067, "step": 276320 }, { "epoch": 11.90248524787871, "learning_rate": 3.30609364562908e-07, "loss": 2.8189, "step": 276340 }, { "epoch": 11.903346685618297, "learning_rate": 3.3056088258742914e-07, "loss": 2.5865, "step": 276360 }, { "epoch": 11.904208123357884, "learning_rate": 3.305124006119502e-07, "loss": 2.6378, "step": 276380 }, { "epoch": 11.905069561097472, "learning_rate": 3.304639186364714e-07, "loss": 2.6276, "step": 276400 }, { "epoch": 11.905930998837059, "learning_rate": 3.3041543666099246e-07, "loss": 2.6133, "step": 276420 }, { "epoch": 11.906792436576646, "learning_rate": 3.3036695468551364e-07, "loss": 2.5795, "step": 276440 }, { "epoch": 11.907653874316233, "learning_rate": 3.3031847271003466e-07, "loss": 2.5603, "step": 276460 }, { "epoch": 11.90851531205582, "learning_rate": 3.3026999073455583e-07, "loss": 2.6023, "step": 276480 }, { "epoch": 11.909376749795408, "learning_rate": 3.3022150875907685e-07, "loss": 2.7661, "step": 276500 }, { "epoch": 11.910238187534995, "learning_rate": 3.3017302678359803e-07, "loss": 2.4933, "step": 276520 }, { "epoch": 11.911099625274584, "learning_rate": 3.301245448081191e-07, "loss": 2.7678, "step": 276540 }, { "epoch": 11.911961063014171, "learning_rate": 3.300760628326402e-07, "loss": 2.7137, "step": 276560 }, { "epoch": 11.912822500753759, "learning_rate": 3.3002758085716135e-07, "loss": 2.6119, "step": 276580 }, { "epoch": 11.913683938493346, "learning_rate": 3.2997909888168237e-07, "loss": 2.6618, "step": 276600 }, { "epoch": 11.914545376232933, "learning_rate": 3.299306169062035e-07, "loss": 2.5988, "step": 276620 }, { "epoch": 11.91540681397252, "learning_rate": 3.2988213493072457e-07, "loss": 2.8154, "step": 276640 }, { "epoch": 11.916268251712108, "learning_rate": 3.2983365295524574e-07, "loss": 2.7424, "step": 276660 }, { "epoch": 11.917129689451695, "learning_rate": 3.297851709797668e-07, "loss": 2.6548, "step": 276680 }, { "epoch": 11.917991127191282, "learning_rate": 3.29736689004288e-07, "loss": 2.5865, "step": 276700 }, { "epoch": 11.91885256493087, "learning_rate": 3.29688207028809e-07, "loss": 2.6357, "step": 276720 }, { "epoch": 11.919714002670457, "learning_rate": 3.296397250533302e-07, "loss": 2.5452, "step": 276740 }, { "epoch": 11.920575440410044, "learning_rate": 3.295912430778512e-07, "loss": 2.6517, "step": 276760 }, { "epoch": 11.921436878149631, "learning_rate": 3.295427611023724e-07, "loss": 2.7925, "step": 276780 }, { "epoch": 11.922298315889218, "learning_rate": 3.294942791268935e-07, "loss": 2.6985, "step": 276800 }, { "epoch": 11.923159753628806, "learning_rate": 3.2944579715141463e-07, "loss": 2.6887, "step": 276820 }, { "epoch": 11.924021191368395, "learning_rate": 3.2939731517593565e-07, "loss": 2.5938, "step": 276840 }, { "epoch": 11.924882629107982, "learning_rate": 3.2934883320045683e-07, "loss": 2.588, "step": 276860 }, { "epoch": 11.92574406684757, "learning_rate": 3.2930035122497785e-07, "loss": 2.8582, "step": 276880 }, { "epoch": 11.926605504587156, "learning_rate": 3.29251869249499e-07, "loss": 2.6669, "step": 276900 }, { "epoch": 11.927466942326744, "learning_rate": 3.292033872740201e-07, "loss": 2.6765, "step": 276920 }, { "epoch": 11.928328380066331, "learning_rate": 3.2915490529854127e-07, "loss": 2.5833, "step": 276940 }, { "epoch": 11.929189817805918, "learning_rate": 3.2910642332306234e-07, "loss": 2.6574, "step": 276960 }, { "epoch": 11.930051255545505, "learning_rate": 3.2905794134758347e-07, "loss": 2.6555, "step": 276980 }, { "epoch": 11.930912693285093, "learning_rate": 3.290094593721045e-07, "loss": 2.625, "step": 277000 }, { "epoch": 11.93177413102468, "learning_rate": 3.2896097739662567e-07, "loss": 2.7165, "step": 277020 }, { "epoch": 11.932635568764267, "learning_rate": 3.2891249542114674e-07, "loss": 2.6747, "step": 277040 }, { "epoch": 11.933497006503854, "learning_rate": 3.2886401344566786e-07, "loss": 2.6879, "step": 277060 }, { "epoch": 11.934358444243442, "learning_rate": 3.28815531470189e-07, "loss": 2.5516, "step": 277080 }, { "epoch": 11.935219881983029, "learning_rate": 3.2876704949471e-07, "loss": 2.632, "step": 277100 }, { "epoch": 11.936081319722618, "learning_rate": 3.287185675192312e-07, "loss": 2.8018, "step": 277120 }, { "epoch": 11.936942757462205, "learning_rate": 3.286700855437522e-07, "loss": 2.7521, "step": 277140 }, { "epoch": 11.937804195201792, "learning_rate": 3.286216035682734e-07, "loss": 2.585, "step": 277160 }, { "epoch": 11.93866563294138, "learning_rate": 3.2857312159279445e-07, "loss": 2.6209, "step": 277180 }, { "epoch": 11.939527070680967, "learning_rate": 3.285246396173156e-07, "loss": 2.7613, "step": 277200 }, { "epoch": 11.940388508420554, "learning_rate": 3.2847615764183664e-07, "loss": 2.6512, "step": 277220 }, { "epoch": 11.941249946160141, "learning_rate": 3.284276756663578e-07, "loss": 2.8816, "step": 277240 }, { "epoch": 11.942111383899729, "learning_rate": 3.2837919369087884e-07, "loss": 2.6116, "step": 277260 }, { "epoch": 11.942972821639316, "learning_rate": 3.283307117154e-07, "loss": 2.6445, "step": 277280 }, { "epoch": 11.943834259378903, "learning_rate": 3.282822297399211e-07, "loss": 2.7239, "step": 277300 }, { "epoch": 11.94469569711849, "learning_rate": 3.2823374776444227e-07, "loss": 2.7059, "step": 277320 }, { "epoch": 11.945557134858078, "learning_rate": 3.2818526578896334e-07, "loss": 2.5603, "step": 277340 }, { "epoch": 11.946418572597665, "learning_rate": 3.2813678381348446e-07, "loss": 2.6261, "step": 277360 }, { "epoch": 11.947280010337252, "learning_rate": 3.2808830183800553e-07, "loss": 2.7303, "step": 277380 }, { "epoch": 11.94814144807684, "learning_rate": 3.2803981986252666e-07, "loss": 2.7767, "step": 277400 }, { "epoch": 11.949002885816428, "learning_rate": 3.2799133788704773e-07, "loss": 2.649, "step": 277420 }, { "epoch": 11.949864323556016, "learning_rate": 3.279428559115689e-07, "loss": 2.736, "step": 277440 }, { "epoch": 11.950725761295603, "learning_rate": 3.2789437393609e-07, "loss": 2.6461, "step": 277460 }, { "epoch": 11.95158719903519, "learning_rate": 3.2784589196061105e-07, "loss": 2.6651, "step": 277480 }, { "epoch": 11.952448636774777, "learning_rate": 3.277974099851322e-07, "loss": 2.6215, "step": 277500 }, { "epoch": 11.953310074514365, "learning_rate": 3.277489280096533e-07, "loss": 2.7587, "step": 277520 }, { "epoch": 11.954171512253952, "learning_rate": 3.2770044603417437e-07, "loss": 2.5037, "step": 277540 }, { "epoch": 11.955032949993539, "learning_rate": 3.2765196405869555e-07, "loss": 2.6852, "step": 277560 }, { "epoch": 11.955894387733126, "learning_rate": 3.276034820832166e-07, "loss": 2.6569, "step": 277580 }, { "epoch": 11.956755825472714, "learning_rate": 3.2755500010773774e-07, "loss": 2.6077, "step": 277600 }, { "epoch": 11.9576172632123, "learning_rate": 3.275065181322588e-07, "loss": 2.7846, "step": 277620 }, { "epoch": 11.958478700951888, "learning_rate": 3.2745803615677983e-07, "loss": 2.7034, "step": 277640 }, { "epoch": 11.959340138691475, "learning_rate": 3.27409554181301e-07, "loss": 2.6402, "step": 277660 }, { "epoch": 11.960201576431064, "learning_rate": 3.273610722058221e-07, "loss": 2.6398, "step": 277680 }, { "epoch": 11.961063014170652, "learning_rate": 3.2731259023034326e-07, "loss": 2.6806, "step": 277700 }, { "epoch": 11.961924451910239, "learning_rate": 3.2726410825486433e-07, "loss": 2.6908, "step": 277720 }, { "epoch": 11.962785889649826, "learning_rate": 3.2721562627938546e-07, "loss": 2.5429, "step": 277740 }, { "epoch": 11.963647327389413, "learning_rate": 3.2716714430390653e-07, "loss": 2.6286, "step": 277760 }, { "epoch": 11.964508765129, "learning_rate": 3.2711866232842765e-07, "loss": 2.535, "step": 277780 }, { "epoch": 11.965370202868588, "learning_rate": 3.270701803529487e-07, "loss": 2.6253, "step": 277800 }, { "epoch": 11.966231640608175, "learning_rate": 3.270216983774699e-07, "loss": 2.7484, "step": 277820 }, { "epoch": 11.967093078347762, "learning_rate": 3.2697321640199097e-07, "loss": 2.9824, "step": 277840 }, { "epoch": 11.96795451608735, "learning_rate": 3.269247344265121e-07, "loss": 2.7533, "step": 277860 }, { "epoch": 11.968815953826937, "learning_rate": 3.2687625245103317e-07, "loss": 2.6578, "step": 277880 }, { "epoch": 11.969677391566524, "learning_rate": 3.2682777047555424e-07, "loss": 2.5848, "step": 277900 }, { "epoch": 11.970538829306111, "learning_rate": 3.2677928850007536e-07, "loss": 2.6526, "step": 277920 }, { "epoch": 11.971400267045698, "learning_rate": 3.2673080652459654e-07, "loss": 2.665, "step": 277940 }, { "epoch": 11.972261704785286, "learning_rate": 3.266823245491176e-07, "loss": 2.6287, "step": 277960 }, { "epoch": 11.973123142524875, "learning_rate": 3.266338425736388e-07, "loss": 2.7685, "step": 277980 }, { "epoch": 11.973984580264462, "learning_rate": 3.265853605981598e-07, "loss": 2.5883, "step": 278000 }, { "epoch": 11.97484601800405, "learning_rate": 3.2653687862268093e-07, "loss": 2.6511, "step": 278020 }, { "epoch": 11.975707455743636, "learning_rate": 3.26488396647202e-07, "loss": 2.6565, "step": 278040 }, { "epoch": 11.976568893483224, "learning_rate": 3.264399146717232e-07, "loss": 2.5511, "step": 278060 }, { "epoch": 11.977430331222811, "learning_rate": 3.2639143269624425e-07, "loss": 2.5764, "step": 278080 }, { "epoch": 11.978291768962398, "learning_rate": 3.2634295072076543e-07, "loss": 2.5598, "step": 278100 }, { "epoch": 11.979153206701985, "learning_rate": 3.2629446874528645e-07, "loss": 2.7291, "step": 278120 }, { "epoch": 11.980014644441573, "learning_rate": 3.262459867698076e-07, "loss": 2.7659, "step": 278140 }, { "epoch": 11.98087608218116, "learning_rate": 3.2619750479432865e-07, "loss": 2.7481, "step": 278160 }, { "epoch": 11.981737519920747, "learning_rate": 3.261490228188497e-07, "loss": 2.556, "step": 278180 }, { "epoch": 11.982598957660334, "learning_rate": 3.261005408433709e-07, "loss": 2.6587, "step": 278200 }, { "epoch": 11.983460395399922, "learning_rate": 3.2605205886789197e-07, "loss": 2.7716, "step": 278220 }, { "epoch": 11.98432183313951, "learning_rate": 3.260035768924131e-07, "loss": 2.7282, "step": 278240 }, { "epoch": 11.985183270879098, "learning_rate": 3.2595509491693416e-07, "loss": 2.6598, "step": 278260 }, { "epoch": 11.986044708618685, "learning_rate": 3.259066129414553e-07, "loss": 2.5216, "step": 278280 }, { "epoch": 11.986906146358272, "learning_rate": 3.2585813096597636e-07, "loss": 2.4636, "step": 278300 }, { "epoch": 11.98776758409786, "learning_rate": 3.2580964899049754e-07, "loss": 2.6891, "step": 278320 }, { "epoch": 11.988629021837447, "learning_rate": 3.257611670150186e-07, "loss": 2.7524, "step": 278340 }, { "epoch": 11.989490459577034, "learning_rate": 3.257126850395398e-07, "loss": 2.6747, "step": 278360 }, { "epoch": 11.990351897316621, "learning_rate": 3.256642030640608e-07, "loss": 2.6026, "step": 278380 }, { "epoch": 11.991213335056209, "learning_rate": 3.2561572108858193e-07, "loss": 2.6204, "step": 278400 }, { "epoch": 11.992074772795796, "learning_rate": 3.25567239113103e-07, "loss": 2.6967, "step": 278420 }, { "epoch": 11.992936210535383, "learning_rate": 3.255187571376242e-07, "loss": 2.7776, "step": 278440 }, { "epoch": 11.99379764827497, "learning_rate": 3.2547027516214525e-07, "loss": 2.6165, "step": 278460 }, { "epoch": 11.994659086014558, "learning_rate": 3.254217931866664e-07, "loss": 2.6045, "step": 278480 }, { "epoch": 11.995520523754145, "learning_rate": 3.2537331121118744e-07, "loss": 2.8232, "step": 278500 }, { "epoch": 11.996381961493732, "learning_rate": 3.253248292357086e-07, "loss": 2.6298, "step": 278520 }, { "epoch": 11.997243399233321, "learning_rate": 3.2527634726022964e-07, "loss": 2.4933, "step": 278540 }, { "epoch": 11.998104836972908, "learning_rate": 3.252278652847508e-07, "loss": 2.706, "step": 278560 }, { "epoch": 11.998966274712496, "learning_rate": 3.2517938330927194e-07, "loss": 2.5162, "step": 278580 }, { "epoch": 11.999827712452083, "learning_rate": 3.2513090133379307e-07, "loss": 2.5421, "step": 278600 }, { "epoch": 12.00068915019167, "learning_rate": 3.250824193583141e-07, "loss": 2.5815, "step": 278620 }, { "epoch": 12.001550587931257, "learning_rate": 3.2503393738283526e-07, "loss": 2.7044, "step": 278640 }, { "epoch": 12.002412025670845, "learning_rate": 3.249854554073563e-07, "loss": 2.6956, "step": 278660 }, { "epoch": 12.003273463410432, "learning_rate": 3.2493697343187746e-07, "loss": 2.8126, "step": 278680 }, { "epoch": 12.00413490115002, "learning_rate": 3.2488849145639853e-07, "loss": 2.6521, "step": 278700 }, { "epoch": 12.004996338889606, "learning_rate": 3.248400094809196e-07, "loss": 2.5069, "step": 278720 }, { "epoch": 12.005857776629194, "learning_rate": 3.247915275054408e-07, "loss": 2.5415, "step": 278740 }, { "epoch": 12.00671921436878, "learning_rate": 3.247430455299618e-07, "loss": 2.641, "step": 278760 }, { "epoch": 12.007580652108368, "learning_rate": 3.246945635544829e-07, "loss": 2.4992, "step": 278780 }, { "epoch": 12.008442089847955, "learning_rate": 3.24646081579004e-07, "loss": 2.6855, "step": 278800 }, { "epoch": 12.009303527587544, "learning_rate": 3.2459759960352517e-07, "loss": 2.722, "step": 278820 }, { "epoch": 12.010164965327132, "learning_rate": 3.2454911762804624e-07, "loss": 2.6642, "step": 278840 }, { "epoch": 12.011026403066719, "learning_rate": 3.245006356525674e-07, "loss": 2.5292, "step": 278860 }, { "epoch": 12.011887840806306, "learning_rate": 3.2445215367708844e-07, "loss": 2.7679, "step": 278880 }, { "epoch": 12.012749278545893, "learning_rate": 3.244036717016096e-07, "loss": 2.5001, "step": 278900 }, { "epoch": 12.01361071628548, "learning_rate": 3.2435518972613063e-07, "loss": 2.6657, "step": 278920 }, { "epoch": 12.014472154025068, "learning_rate": 3.243067077506518e-07, "loss": 2.6545, "step": 278940 }, { "epoch": 12.015333591764655, "learning_rate": 3.242582257751729e-07, "loss": 2.8526, "step": 278960 }, { "epoch": 12.016195029504242, "learning_rate": 3.2420974379969406e-07, "loss": 2.5763, "step": 278980 }, { "epoch": 12.01705646724383, "learning_rate": 3.241612618242151e-07, "loss": 2.667, "step": 279000 }, { "epoch": 12.017917904983417, "learning_rate": 3.2411277984873626e-07, "loss": 2.6374, "step": 279020 }, { "epoch": 12.018779342723004, "learning_rate": 3.240642978732573e-07, "loss": 2.7372, "step": 279040 }, { "epoch": 12.019640780462591, "learning_rate": 3.2401581589777845e-07, "loss": 2.5283, "step": 279060 }, { "epoch": 12.020502218202179, "learning_rate": 3.239673339222995e-07, "loss": 2.5684, "step": 279080 }, { "epoch": 12.021363655941768, "learning_rate": 3.239188519468207e-07, "loss": 2.5982, "step": 279100 }, { "epoch": 12.022225093681355, "learning_rate": 3.2387036997134177e-07, "loss": 2.8493, "step": 279120 }, { "epoch": 12.023086531420942, "learning_rate": 3.238218879958629e-07, "loss": 2.6987, "step": 279140 }, { "epoch": 12.02394796916053, "learning_rate": 3.2377340602038397e-07, "loss": 2.7163, "step": 279160 }, { "epoch": 12.024809406900117, "learning_rate": 3.237249240449051e-07, "loss": 2.6763, "step": 279180 }, { "epoch": 12.025670844639704, "learning_rate": 3.2367644206942616e-07, "loss": 2.5603, "step": 279200 }, { "epoch": 12.026532282379291, "learning_rate": 3.2362796009394734e-07, "loss": 2.6138, "step": 279220 }, { "epoch": 12.027393720118878, "learning_rate": 3.235794781184684e-07, "loss": 2.7566, "step": 279240 }, { "epoch": 12.028255157858466, "learning_rate": 3.2353099614298943e-07, "loss": 2.6024, "step": 279260 }, { "epoch": 12.029116595598053, "learning_rate": 3.234825141675106e-07, "loss": 2.7577, "step": 279280 }, { "epoch": 12.02997803333764, "learning_rate": 3.2343403219203163e-07, "loss": 2.8342, "step": 279300 }, { "epoch": 12.030839471077227, "learning_rate": 3.233855502165528e-07, "loss": 2.722, "step": 279320 }, { "epoch": 12.031700908816815, "learning_rate": 3.233370682410739e-07, "loss": 2.6587, "step": 279340 }, { "epoch": 12.032562346556402, "learning_rate": 3.2328858626559505e-07, "loss": 2.5825, "step": 279360 }, { "epoch": 12.03342378429599, "learning_rate": 3.232401042901161e-07, "loss": 2.5255, "step": 279380 }, { "epoch": 12.034285222035578, "learning_rate": 3.2319162231463725e-07, "loss": 2.761, "step": 279400 }, { "epoch": 12.035146659775165, "learning_rate": 3.2314314033915827e-07, "loss": 2.7216, "step": 279420 }, { "epoch": 12.036008097514753, "learning_rate": 3.2309465836367944e-07, "loss": 2.6263, "step": 279440 }, { "epoch": 12.03686953525434, "learning_rate": 3.230461763882005e-07, "loss": 2.7164, "step": 279460 }, { "epoch": 12.037730972993927, "learning_rate": 3.229976944127217e-07, "loss": 2.7095, "step": 279480 }, { "epoch": 12.038592410733514, "learning_rate": 3.2294921243724277e-07, "loss": 2.7882, "step": 279500 }, { "epoch": 12.039453848473102, "learning_rate": 3.229007304617639e-07, "loss": 2.6921, "step": 279520 }, { "epoch": 12.040315286212689, "learning_rate": 3.2285224848628496e-07, "loss": 2.6628, "step": 279540 }, { "epoch": 12.041176723952276, "learning_rate": 3.228037665108061e-07, "loss": 2.4556, "step": 279560 }, { "epoch": 12.042038161691863, "learning_rate": 3.2275528453532716e-07, "loss": 2.6819, "step": 279580 }, { "epoch": 12.04289959943145, "learning_rate": 3.2270680255984833e-07, "loss": 2.6355, "step": 279600 }, { "epoch": 12.043761037171038, "learning_rate": 3.226583205843694e-07, "loss": 2.6846, "step": 279620 }, { "epoch": 12.044622474910625, "learning_rate": 3.2260983860889053e-07, "loss": 2.7921, "step": 279640 }, { "epoch": 12.045483912650214, "learning_rate": 3.225613566334116e-07, "loss": 2.641, "step": 279660 }, { "epoch": 12.046345350389801, "learning_rate": 3.2251287465793267e-07, "loss": 2.5433, "step": 279680 }, { "epoch": 12.047206788129388, "learning_rate": 3.224643926824538e-07, "loss": 2.6853, "step": 279700 }, { "epoch": 12.048068225868976, "learning_rate": 3.22415910706975e-07, "loss": 2.6396, "step": 279720 }, { "epoch": 12.048929663608563, "learning_rate": 3.2236742873149605e-07, "loss": 2.6785, "step": 279740 }, { "epoch": 12.04979110134815, "learning_rate": 3.223189467560172e-07, "loss": 2.6297, "step": 279760 }, { "epoch": 12.050652539087737, "learning_rate": 3.2227046478053824e-07, "loss": 2.639, "step": 279780 }, { "epoch": 12.051513976827325, "learning_rate": 3.2222198280505926e-07, "loss": 2.6382, "step": 279800 }, { "epoch": 12.052375414566912, "learning_rate": 3.2217350082958044e-07, "loss": 2.4631, "step": 279820 }, { "epoch": 12.0532368523065, "learning_rate": 3.2212501885410146e-07, "loss": 2.7065, "step": 279840 }, { "epoch": 12.054098290046086, "learning_rate": 3.220765368786227e-07, "loss": 2.6827, "step": 279860 }, { "epoch": 12.054959727785674, "learning_rate": 3.2202805490314376e-07, "loss": 2.6372, "step": 279880 }, { "epoch": 12.055821165525261, "learning_rate": 3.219795729276649e-07, "loss": 2.7196, "step": 279900 }, { "epoch": 12.056682603264848, "learning_rate": 3.2193109095218595e-07, "loss": 2.6824, "step": 279920 }, { "epoch": 12.057544041004437, "learning_rate": 3.218826089767071e-07, "loss": 2.6147, "step": 279940 }, { "epoch": 12.058405478744024, "learning_rate": 3.2183412700122815e-07, "loss": 2.9152, "step": 279960 }, { "epoch": 12.059266916483612, "learning_rate": 3.2178564502574933e-07, "loss": 2.6484, "step": 279980 }, { "epoch": 12.060128354223199, "learning_rate": 3.217371630502704e-07, "loss": 2.678, "step": 280000 }, { "epoch": 12.060989791962786, "learning_rate": 3.216886810747915e-07, "loss": 2.6279, "step": 280020 }, { "epoch": 12.061851229702373, "learning_rate": 3.216401990993126e-07, "loss": 2.7064, "step": 280040 }, { "epoch": 12.06271266744196, "learning_rate": 3.215917171238337e-07, "loss": 2.7526, "step": 280060 }, { "epoch": 12.063574105181548, "learning_rate": 3.215432351483548e-07, "loss": 2.6908, "step": 280080 }, { "epoch": 12.064435542921135, "learning_rate": 3.2149475317287597e-07, "loss": 2.6723, "step": 280100 }, { "epoch": 12.065296980660722, "learning_rate": 3.2144627119739704e-07, "loss": 2.5986, "step": 280120 }, { "epoch": 12.06615841840031, "learning_rate": 3.213977892219182e-07, "loss": 2.5637, "step": 280140 }, { "epoch": 12.067019856139897, "learning_rate": 3.2134930724643924e-07, "loss": 2.6421, "step": 280160 }, { "epoch": 12.067881293879484, "learning_rate": 3.2130082527096036e-07, "loss": 2.8548, "step": 280180 }, { "epoch": 12.068742731619071, "learning_rate": 3.2125234329548143e-07, "loss": 2.718, "step": 280200 }, { "epoch": 12.06960416935866, "learning_rate": 3.212038613200026e-07, "loss": 2.4913, "step": 280220 }, { "epoch": 12.070465607098248, "learning_rate": 3.211553793445237e-07, "loss": 2.7582, "step": 280240 }, { "epoch": 12.071327044837835, "learning_rate": 3.2110689736904486e-07, "loss": 2.5788, "step": 280260 }, { "epoch": 12.072188482577422, "learning_rate": 3.210584153935659e-07, "loss": 2.4607, "step": 280280 }, { "epoch": 12.07304992031701, "learning_rate": 3.2100993341808705e-07, "loss": 2.6057, "step": 280300 }, { "epoch": 12.073911358056597, "learning_rate": 3.2096145144260807e-07, "loss": 2.7481, "step": 280320 }, { "epoch": 12.074772795796184, "learning_rate": 3.2091296946712914e-07, "loss": 2.5981, "step": 280340 }, { "epoch": 12.075634233535771, "learning_rate": 3.208644874916504e-07, "loss": 2.7732, "step": 280360 }, { "epoch": 12.076495671275358, "learning_rate": 3.208160055161714e-07, "loss": 2.5642, "step": 280380 }, { "epoch": 12.077357109014946, "learning_rate": 3.207675235406925e-07, "loss": 2.6959, "step": 280400 }, { "epoch": 12.078218546754533, "learning_rate": 3.207190415652136e-07, "loss": 2.709, "step": 280420 }, { "epoch": 12.07907998449412, "learning_rate": 3.206705595897347e-07, "loss": 2.6982, "step": 280440 }, { "epoch": 12.079941422233707, "learning_rate": 3.206220776142558e-07, "loss": 2.6734, "step": 280460 }, { "epoch": 12.080802859973295, "learning_rate": 3.2057359563877696e-07, "loss": 2.6566, "step": 280480 }, { "epoch": 12.081664297712884, "learning_rate": 3.2052511366329803e-07, "loss": 2.8173, "step": 280500 }, { "epoch": 12.08252573545247, "learning_rate": 3.204766316878192e-07, "loss": 2.6839, "step": 280520 }, { "epoch": 12.083387173192058, "learning_rate": 3.2042814971234023e-07, "loss": 2.6462, "step": 280540 }, { "epoch": 12.084248610931645, "learning_rate": 3.2037966773686135e-07, "loss": 2.5682, "step": 280560 }, { "epoch": 12.085110048671233, "learning_rate": 3.203311857613824e-07, "loss": 2.5912, "step": 280580 }, { "epoch": 12.08597148641082, "learning_rate": 3.202827037859036e-07, "loss": 2.7367, "step": 280600 }, { "epoch": 12.086832924150407, "learning_rate": 3.202342218104247e-07, "loss": 2.5062, "step": 280620 }, { "epoch": 12.087694361889994, "learning_rate": 3.2018573983494585e-07, "loss": 2.9282, "step": 280640 }, { "epoch": 12.088555799629582, "learning_rate": 3.2013725785946687e-07, "loss": 2.5171, "step": 280660 }, { "epoch": 12.089417237369169, "learning_rate": 3.2008877588398805e-07, "loss": 2.6362, "step": 280680 }, { "epoch": 12.090278675108756, "learning_rate": 3.2004029390850907e-07, "loss": 2.5138, "step": 280700 }, { "epoch": 12.091140112848343, "learning_rate": 3.1999181193303024e-07, "loss": 2.3689, "step": 280720 }, { "epoch": 12.09200155058793, "learning_rate": 3.199433299575513e-07, "loss": 2.673, "step": 280740 }, { "epoch": 12.092862988327518, "learning_rate": 3.198948479820725e-07, "loss": 2.7552, "step": 280760 }, { "epoch": 12.093724426067107, "learning_rate": 3.198463660065935e-07, "loss": 2.6517, "step": 280780 }, { "epoch": 12.094585863806694, "learning_rate": 3.197978840311147e-07, "loss": 2.6466, "step": 280800 }, { "epoch": 12.095447301546281, "learning_rate": 3.197494020556357e-07, "loss": 2.4338, "step": 280820 }, { "epoch": 12.096308739285869, "learning_rate": 3.197009200801569e-07, "loss": 2.7102, "step": 280840 }, { "epoch": 12.097170177025456, "learning_rate": 3.1965243810467796e-07, "loss": 2.5105, "step": 280860 }, { "epoch": 12.098031614765043, "learning_rate": 3.1960395612919903e-07, "loss": 2.6428, "step": 280880 }, { "epoch": 12.09889305250463, "learning_rate": 3.195554741537202e-07, "loss": 2.5714, "step": 280900 }, { "epoch": 12.099754490244218, "learning_rate": 3.195069921782412e-07, "loss": 2.7273, "step": 280920 }, { "epoch": 12.100615927983805, "learning_rate": 3.194585102027624e-07, "loss": 2.731, "step": 280940 }, { "epoch": 12.101477365723392, "learning_rate": 3.194100282272834e-07, "loss": 2.7161, "step": 280960 }, { "epoch": 12.10233880346298, "learning_rate": 3.193615462518046e-07, "loss": 2.7277, "step": 280980 }, { "epoch": 12.103200241202567, "learning_rate": 3.1931306427632567e-07, "loss": 2.7404, "step": 281000 }, { "epoch": 12.104061678942154, "learning_rate": 3.1926458230084685e-07, "loss": 2.7515, "step": 281020 }, { "epoch": 12.104923116681741, "learning_rate": 3.1921610032536786e-07, "loss": 2.4221, "step": 281040 }, { "epoch": 12.10578455442133, "learning_rate": 3.1916761834988904e-07, "loss": 2.7052, "step": 281060 }, { "epoch": 12.106645992160917, "learning_rate": 3.1911913637441006e-07, "loss": 2.7834, "step": 281080 }, { "epoch": 12.107507429900505, "learning_rate": 3.1907065439893124e-07, "loss": 2.8349, "step": 281100 }, { "epoch": 12.108368867640092, "learning_rate": 3.190221724234523e-07, "loss": 2.7697, "step": 281120 }, { "epoch": 12.109230305379679, "learning_rate": 3.189736904479735e-07, "loss": 2.6377, "step": 281140 }, { "epoch": 12.110091743119266, "learning_rate": 3.1892520847249456e-07, "loss": 2.5552, "step": 281160 }, { "epoch": 12.110953180858854, "learning_rate": 3.188767264970157e-07, "loss": 2.5621, "step": 281180 }, { "epoch": 12.11181461859844, "learning_rate": 3.188282445215367e-07, "loss": 2.744, "step": 281200 }, { "epoch": 12.112676056338028, "learning_rate": 3.187797625460579e-07, "loss": 2.766, "step": 281220 }, { "epoch": 12.113537494077615, "learning_rate": 3.1873128057057895e-07, "loss": 2.4181, "step": 281240 }, { "epoch": 12.114398931817203, "learning_rate": 3.1868279859510013e-07, "loss": 2.6702, "step": 281260 }, { "epoch": 12.11526036955679, "learning_rate": 3.186343166196212e-07, "loss": 2.4436, "step": 281280 }, { "epoch": 12.116121807296377, "learning_rate": 3.185858346441423e-07, "loss": 2.825, "step": 281300 }, { "epoch": 12.116983245035964, "learning_rate": 3.185373526686634e-07, "loss": 2.76, "step": 281320 }, { "epoch": 12.117844682775553, "learning_rate": 3.184888706931845e-07, "loss": 2.7774, "step": 281340 }, { "epoch": 12.11870612051514, "learning_rate": 3.184403887177056e-07, "loss": 2.8402, "step": 281360 }, { "epoch": 12.119567558254728, "learning_rate": 3.1839190674222677e-07, "loss": 2.5495, "step": 281380 }, { "epoch": 12.120428995994315, "learning_rate": 3.1834342476674784e-07, "loss": 2.5337, "step": 281400 }, { "epoch": 12.121290433733902, "learning_rate": 3.182949427912689e-07, "loss": 2.6677, "step": 281420 }, { "epoch": 12.12215187147349, "learning_rate": 3.1824646081579004e-07, "loss": 2.5728, "step": 281440 }, { "epoch": 12.123013309213077, "learning_rate": 3.1819797884031105e-07, "loss": 2.7419, "step": 281460 }, { "epoch": 12.123874746952664, "learning_rate": 3.1814949686483223e-07, "loss": 2.6847, "step": 281480 }, { "epoch": 12.124736184692251, "learning_rate": 3.181010148893533e-07, "loss": 2.5298, "step": 281500 }, { "epoch": 12.125597622431838, "learning_rate": 3.180525329138745e-07, "loss": 2.7803, "step": 281520 }, { "epoch": 12.126459060171426, "learning_rate": 3.1800405093839555e-07, "loss": 2.6021, "step": 281540 }, { "epoch": 12.127320497911013, "learning_rate": 3.179555689629167e-07, "loss": 2.6996, "step": 281560 }, { "epoch": 12.1281819356506, "learning_rate": 3.179070869874377e-07, "loss": 2.7017, "step": 281580 }, { "epoch": 12.129043373390187, "learning_rate": 3.1785860501195887e-07, "loss": 2.732, "step": 281600 }, { "epoch": 12.129904811129776, "learning_rate": 3.178101230364799e-07, "loss": 2.5736, "step": 281620 }, { "epoch": 12.130766248869364, "learning_rate": 3.177616410610011e-07, "loss": 2.6226, "step": 281640 }, { "epoch": 12.131627686608951, "learning_rate": 3.177131590855222e-07, "loss": 2.6062, "step": 281660 }, { "epoch": 12.132489124348538, "learning_rate": 3.176646771100433e-07, "loss": 2.6981, "step": 281680 }, { "epoch": 12.133350562088125, "learning_rate": 3.176161951345644e-07, "loss": 2.6678, "step": 281700 }, { "epoch": 12.134211999827713, "learning_rate": 3.175677131590855e-07, "loss": 2.5356, "step": 281720 }, { "epoch": 12.1350734375673, "learning_rate": 3.175192311836066e-07, "loss": 2.8069, "step": 281740 }, { "epoch": 12.135934875306887, "learning_rate": 3.1747074920812776e-07, "loss": 2.6572, "step": 281760 }, { "epoch": 12.136796313046474, "learning_rate": 3.1742226723264883e-07, "loss": 2.7969, "step": 281780 }, { "epoch": 12.137657750786062, "learning_rate": 3.1737378525716996e-07, "loss": 2.635, "step": 281800 }, { "epoch": 12.138519188525649, "learning_rate": 3.1732530328169103e-07, "loss": 2.7737, "step": 281820 }, { "epoch": 12.139380626265236, "learning_rate": 3.1727682130621215e-07, "loss": 2.6968, "step": 281840 }, { "epoch": 12.140242064004823, "learning_rate": 3.172283393307332e-07, "loss": 2.6456, "step": 281860 }, { "epoch": 12.14110350174441, "learning_rate": 3.171798573552544e-07, "loss": 2.7025, "step": 281880 }, { "epoch": 12.141964939483998, "learning_rate": 3.171313753797755e-07, "loss": 2.7486, "step": 281900 }, { "epoch": 12.142826377223587, "learning_rate": 3.1708289340429665e-07, "loss": 2.5102, "step": 281920 }, { "epoch": 12.143687814963174, "learning_rate": 3.1703441142881767e-07, "loss": 2.7809, "step": 281940 }, { "epoch": 12.144549252702761, "learning_rate": 3.1698592945333874e-07, "loss": 2.507, "step": 281960 }, { "epoch": 12.145410690442349, "learning_rate": 3.1693744747785987e-07, "loss": 2.7553, "step": 281980 }, { "epoch": 12.146272128181936, "learning_rate": 3.1688896550238094e-07, "loss": 2.6774, "step": 282000 }, { "epoch": 12.147133565921523, "learning_rate": 3.168404835269021e-07, "loss": 2.6614, "step": 282020 }, { "epoch": 12.14799500366111, "learning_rate": 3.167920015514232e-07, "loss": 2.6563, "step": 282040 }, { "epoch": 12.148856441400698, "learning_rate": 3.167435195759443e-07, "loss": 2.524, "step": 282060 }, { "epoch": 12.149717879140285, "learning_rate": 3.166950376004654e-07, "loss": 2.8813, "step": 282080 }, { "epoch": 12.150579316879872, "learning_rate": 3.166465556249865e-07, "loss": 2.5361, "step": 282100 }, { "epoch": 12.15144075461946, "learning_rate": 3.165980736495076e-07, "loss": 2.7264, "step": 282120 }, { "epoch": 12.152302192359047, "learning_rate": 3.165495916740288e-07, "loss": 2.86, "step": 282140 }, { "epoch": 12.153163630098634, "learning_rate": 3.1650110969854983e-07, "loss": 2.692, "step": 282160 }, { "epoch": 12.154025067838221, "learning_rate": 3.1645262772307095e-07, "loss": 2.556, "step": 282180 }, { "epoch": 12.15488650557781, "learning_rate": 3.16404145747592e-07, "loss": 2.7177, "step": 282200 }, { "epoch": 12.155747943317397, "learning_rate": 3.1635566377211315e-07, "loss": 2.6877, "step": 282220 }, { "epoch": 12.156609381056985, "learning_rate": 3.163071817966342e-07, "loss": 2.6802, "step": 282240 }, { "epoch": 12.157470818796572, "learning_rate": 3.162586998211554e-07, "loss": 2.7304, "step": 282260 }, { "epoch": 12.15833225653616, "learning_rate": 3.1621021784567647e-07, "loss": 2.6478, "step": 282280 }, { "epoch": 12.159193694275746, "learning_rate": 3.1616173587019764e-07, "loss": 2.7325, "step": 282300 }, { "epoch": 12.160055132015334, "learning_rate": 3.1611325389471866e-07, "loss": 2.6768, "step": 282320 }, { "epoch": 12.16091656975492, "learning_rate": 3.160647719192398e-07, "loss": 2.601, "step": 282340 }, { "epoch": 12.161778007494508, "learning_rate": 3.1601628994376086e-07, "loss": 2.634, "step": 282360 }, { "epoch": 12.162639445234095, "learning_rate": 3.1596780796828204e-07, "loss": 2.6286, "step": 282380 }, { "epoch": 12.163500882973683, "learning_rate": 3.159193259928031e-07, "loss": 2.6458, "step": 282400 }, { "epoch": 12.16436232071327, "learning_rate": 3.158708440173243e-07, "loss": 2.8183, "step": 282420 }, { "epoch": 12.165223758452857, "learning_rate": 3.158223620418453e-07, "loss": 2.7678, "step": 282440 }, { "epoch": 12.166085196192444, "learning_rate": 3.157738800663665e-07, "loss": 2.6007, "step": 282460 }, { "epoch": 12.166946633932033, "learning_rate": 3.157253980908875e-07, "loss": 2.6673, "step": 282480 }, { "epoch": 12.16780807167162, "learning_rate": 3.1567691611540857e-07, "loss": 2.827, "step": 282500 }, { "epoch": 12.168669509411208, "learning_rate": 3.1562843413992975e-07, "loss": 2.7046, "step": 282520 }, { "epoch": 12.169530947150795, "learning_rate": 3.155799521644508e-07, "loss": 2.788, "step": 282540 }, { "epoch": 12.170392384890382, "learning_rate": 3.1553147018897194e-07, "loss": 2.7242, "step": 282560 }, { "epoch": 12.17125382262997, "learning_rate": 3.15482988213493e-07, "loss": 2.7806, "step": 282580 }, { "epoch": 12.172115260369557, "learning_rate": 3.1543450623801414e-07, "loss": 2.6444, "step": 282600 }, { "epoch": 12.172976698109144, "learning_rate": 3.153860242625352e-07, "loss": 2.5881, "step": 282620 }, { "epoch": 12.173838135848731, "learning_rate": 3.153375422870564e-07, "loss": 2.7025, "step": 282640 }, { "epoch": 12.174699573588319, "learning_rate": 3.1528906031157746e-07, "loss": 2.4754, "step": 282660 }, { "epoch": 12.175561011327906, "learning_rate": 3.1524057833609864e-07, "loss": 2.8031, "step": 282680 }, { "epoch": 12.176422449067493, "learning_rate": 3.1519209636061966e-07, "loss": 2.6491, "step": 282700 }, { "epoch": 12.17728388680708, "learning_rate": 3.1514361438514083e-07, "loss": 2.6157, "step": 282720 }, { "epoch": 12.178145324546668, "learning_rate": 3.1509513240966185e-07, "loss": 2.5549, "step": 282740 }, { "epoch": 12.179006762286257, "learning_rate": 3.1504665043418303e-07, "loss": 2.7368, "step": 282760 }, { "epoch": 12.179868200025844, "learning_rate": 3.149981684587041e-07, "loss": 2.7308, "step": 282780 }, { "epoch": 12.180729637765431, "learning_rate": 3.149496864832253e-07, "loss": 2.6727, "step": 282800 }, { "epoch": 12.181591075505018, "learning_rate": 3.149012045077463e-07, "loss": 2.7379, "step": 282820 }, { "epoch": 12.182452513244606, "learning_rate": 3.148527225322675e-07, "loss": 2.562, "step": 282840 }, { "epoch": 12.183313950984193, "learning_rate": 3.148042405567885e-07, "loss": 2.5854, "step": 282860 }, { "epoch": 12.18417538872378, "learning_rate": 3.1475575858130967e-07, "loss": 2.5675, "step": 282880 }, { "epoch": 12.185036826463367, "learning_rate": 3.1470727660583074e-07, "loss": 2.5105, "step": 282900 }, { "epoch": 12.185898264202955, "learning_rate": 3.146587946303519e-07, "loss": 2.6114, "step": 282920 }, { "epoch": 12.186759701942542, "learning_rate": 3.14610312654873e-07, "loss": 2.6199, "step": 282940 }, { "epoch": 12.187621139682129, "learning_rate": 3.145618306793941e-07, "loss": 2.6921, "step": 282960 }, { "epoch": 12.188482577421716, "learning_rate": 3.1451334870391513e-07, "loss": 2.6386, "step": 282980 }, { "epoch": 12.189344015161304, "learning_rate": 3.144648667284363e-07, "loss": 2.5797, "step": 283000 }, { "epoch": 12.19020545290089, "learning_rate": 3.144163847529574e-07, "loss": 2.5456, "step": 283020 }, { "epoch": 12.19106689064048, "learning_rate": 3.1436790277747845e-07, "loss": 2.5795, "step": 283040 }, { "epoch": 12.191928328380067, "learning_rate": 3.1431942080199963e-07, "loss": 2.8023, "step": 283060 }, { "epoch": 12.192789766119654, "learning_rate": 3.1427093882652065e-07, "loss": 2.7558, "step": 283080 }, { "epoch": 12.193651203859242, "learning_rate": 3.1422245685104183e-07, "loss": 2.6029, "step": 283100 }, { "epoch": 12.194512641598829, "learning_rate": 3.1417397487556285e-07, "loss": 2.6282, "step": 283120 }, { "epoch": 12.195374079338416, "learning_rate": 3.14125492900084e-07, "loss": 2.777, "step": 283140 }, { "epoch": 12.196235517078003, "learning_rate": 3.140770109246051e-07, "loss": 2.7223, "step": 283160 }, { "epoch": 12.19709695481759, "learning_rate": 3.1402852894912627e-07, "loss": 2.5273, "step": 283180 }, { "epoch": 12.197958392557178, "learning_rate": 3.1398004697364734e-07, "loss": 2.7031, "step": 283200 }, { "epoch": 12.198819830296765, "learning_rate": 3.1393156499816847e-07, "loss": 2.517, "step": 283220 }, { "epoch": 12.199681268036352, "learning_rate": 3.138830830226895e-07, "loss": 2.5316, "step": 283240 }, { "epoch": 12.20054270577594, "learning_rate": 3.1383460104721066e-07, "loss": 2.69, "step": 283260 }, { "epoch": 12.201404143515527, "learning_rate": 3.1378611907173174e-07, "loss": 2.6164, "step": 283280 }, { "epoch": 12.202265581255114, "learning_rate": 3.137376370962529e-07, "loss": 2.6084, "step": 283300 }, { "epoch": 12.203127018994703, "learning_rate": 3.13689155120774e-07, "loss": 2.79, "step": 283320 }, { "epoch": 12.20398845673429, "learning_rate": 3.136406731452951e-07, "loss": 2.7838, "step": 283340 }, { "epoch": 12.204849894473877, "learning_rate": 3.1359219116981613e-07, "loss": 2.5918, "step": 283360 }, { "epoch": 12.205711332213465, "learning_rate": 3.135437091943373e-07, "loss": 2.6417, "step": 283380 }, { "epoch": 12.206572769953052, "learning_rate": 3.134952272188583e-07, "loss": 2.4747, "step": 283400 }, { "epoch": 12.20743420769264, "learning_rate": 3.1344674524337955e-07, "loss": 2.6172, "step": 283420 }, { "epoch": 12.208295645432226, "learning_rate": 3.133982632679006e-07, "loss": 2.4866, "step": 283440 }, { "epoch": 12.209157083171814, "learning_rate": 3.1334978129242175e-07, "loss": 2.9154, "step": 283460 }, { "epoch": 12.210018520911401, "learning_rate": 3.133012993169428e-07, "loss": 2.7597, "step": 283480 }, { "epoch": 12.210879958650988, "learning_rate": 3.1325281734146395e-07, "loss": 2.668, "step": 283500 }, { "epoch": 12.211741396390575, "learning_rate": 3.13204335365985e-07, "loss": 2.5891, "step": 283520 }, { "epoch": 12.212602834130163, "learning_rate": 3.131558533905062e-07, "loss": 2.7841, "step": 283540 }, { "epoch": 12.21346427186975, "learning_rate": 3.1310737141502727e-07, "loss": 2.7817, "step": 283560 }, { "epoch": 12.214325709609337, "learning_rate": 3.1305888943954834e-07, "loss": 2.8051, "step": 283580 }, { "epoch": 12.215187147348926, "learning_rate": 3.1301040746406946e-07, "loss": 2.7293, "step": 283600 }, { "epoch": 12.216048585088513, "learning_rate": 3.129619254885905e-07, "loss": 2.7359, "step": 283620 }, { "epoch": 12.2169100228281, "learning_rate": 3.1291344351311166e-07, "loss": 2.6884, "step": 283640 }, { "epoch": 12.217771460567688, "learning_rate": 3.1286496153763273e-07, "loss": 2.721, "step": 283660 }, { "epoch": 12.218632898307275, "learning_rate": 3.128164795621539e-07, "loss": 2.5913, "step": 283680 }, { "epoch": 12.219494336046862, "learning_rate": 3.12767997586675e-07, "loss": 2.6444, "step": 283700 }, { "epoch": 12.22035577378645, "learning_rate": 3.127195156111961e-07, "loss": 2.6475, "step": 283720 }, { "epoch": 12.221217211526037, "learning_rate": 3.126710336357172e-07, "loss": 2.693, "step": 283740 }, { "epoch": 12.222078649265624, "learning_rate": 3.126225516602383e-07, "loss": 2.7412, "step": 283760 }, { "epoch": 12.222940087005211, "learning_rate": 3.1257406968475937e-07, "loss": 2.7226, "step": 283780 }, { "epoch": 12.223801524744799, "learning_rate": 3.1252558770928055e-07, "loss": 2.6661, "step": 283800 }, { "epoch": 12.224662962484386, "learning_rate": 3.124771057338016e-07, "loss": 2.646, "step": 283820 }, { "epoch": 12.225524400223973, "learning_rate": 3.1242862375832274e-07, "loss": 2.6183, "step": 283840 }, { "epoch": 12.22638583796356, "learning_rate": 3.123801417828438e-07, "loss": 2.6285, "step": 283860 }, { "epoch": 12.22724727570315, "learning_rate": 3.1233165980736494e-07, "loss": 2.7797, "step": 283880 }, { "epoch": 12.228108713442737, "learning_rate": 3.12283177831886e-07, "loss": 2.5849, "step": 283900 }, { "epoch": 12.228970151182324, "learning_rate": 3.1223469585640724e-07, "loss": 2.651, "step": 283920 }, { "epoch": 12.229831588921911, "learning_rate": 3.1218621388092826e-07, "loss": 2.6335, "step": 283940 }, { "epoch": 12.230693026661498, "learning_rate": 3.121377319054494e-07, "loss": 2.4971, "step": 283960 }, { "epoch": 12.231554464401086, "learning_rate": 3.1208924992997046e-07, "loss": 2.5143, "step": 283980 }, { "epoch": 12.232415902140673, "learning_rate": 3.120407679544916e-07, "loss": 2.662, "step": 284000 }, { "epoch": 12.23327733988026, "learning_rate": 3.1199228597901265e-07, "loss": 2.7152, "step": 284020 }, { "epoch": 12.234138777619847, "learning_rate": 3.1194380400353383e-07, "loss": 2.5785, "step": 284040 }, { "epoch": 12.235000215359435, "learning_rate": 3.118953220280549e-07, "loss": 2.6094, "step": 284060 }, { "epoch": 12.235861653099022, "learning_rate": 3.118468400525761e-07, "loss": 2.7845, "step": 284080 }, { "epoch": 12.236723090838609, "learning_rate": 3.117983580770971e-07, "loss": 2.594, "step": 284100 }, { "epoch": 12.237584528578196, "learning_rate": 3.1174987610161817e-07, "loss": 2.7624, "step": 284120 }, { "epoch": 12.238445966317784, "learning_rate": 3.117013941261393e-07, "loss": 2.6621, "step": 284140 }, { "epoch": 12.239307404057373, "learning_rate": 3.1165291215066036e-07, "loss": 2.7222, "step": 284160 }, { "epoch": 12.24016884179696, "learning_rate": 3.1160443017518154e-07, "loss": 2.6901, "step": 284180 }, { "epoch": 12.241030279536547, "learning_rate": 3.115559481997026e-07, "loss": 2.7473, "step": 284200 }, { "epoch": 12.241891717276134, "learning_rate": 3.1150746622422374e-07, "loss": 2.6592, "step": 284220 }, { "epoch": 12.242753155015722, "learning_rate": 3.114589842487448e-07, "loss": 2.6421, "step": 284240 }, { "epoch": 12.243614592755309, "learning_rate": 3.1141050227326593e-07, "loss": 2.602, "step": 284260 }, { "epoch": 12.244476030494896, "learning_rate": 3.11362020297787e-07, "loss": 2.6711, "step": 284280 }, { "epoch": 12.245337468234483, "learning_rate": 3.113135383223082e-07, "loss": 2.7425, "step": 284300 }, { "epoch": 12.24619890597407, "learning_rate": 3.1126505634682925e-07, "loss": 2.6025, "step": 284320 }, { "epoch": 12.247060343713658, "learning_rate": 3.1121657437135043e-07, "loss": 2.7317, "step": 284340 }, { "epoch": 12.247921781453245, "learning_rate": 3.1116809239587145e-07, "loss": 2.6941, "step": 284360 }, { "epoch": 12.248783219192832, "learning_rate": 3.111196104203926e-07, "loss": 2.6338, "step": 284380 }, { "epoch": 12.24964465693242, "learning_rate": 3.1107112844491365e-07, "loss": 2.6764, "step": 284400 }, { "epoch": 12.250506094672007, "learning_rate": 3.110226464694348e-07, "loss": 2.5445, "step": 284420 }, { "epoch": 12.251367532411596, "learning_rate": 3.109741644939559e-07, "loss": 2.6504, "step": 284440 }, { "epoch": 12.252228970151183, "learning_rate": 3.1092568251847707e-07, "loss": 2.5548, "step": 284460 }, { "epoch": 12.25309040789077, "learning_rate": 3.108772005429981e-07, "loss": 2.7473, "step": 284480 }, { "epoch": 12.253951845630358, "learning_rate": 3.1082871856751927e-07, "loss": 2.716, "step": 284500 }, { "epoch": 12.254813283369945, "learning_rate": 3.107802365920403e-07, "loss": 2.787, "step": 284520 }, { "epoch": 12.255674721109532, "learning_rate": 3.1073175461656146e-07, "loss": 2.5552, "step": 284540 }, { "epoch": 12.25653615884912, "learning_rate": 3.1068327264108253e-07, "loss": 2.5611, "step": 284560 }, { "epoch": 12.257397596588707, "learning_rate": 3.106347906656037e-07, "loss": 2.6479, "step": 284580 }, { "epoch": 12.258259034328294, "learning_rate": 3.1058630869012473e-07, "loss": 2.7255, "step": 284600 }, { "epoch": 12.259120472067881, "learning_rate": 3.105378267146459e-07, "loss": 2.6212, "step": 284620 }, { "epoch": 12.259981909807468, "learning_rate": 3.1048934473916693e-07, "loss": 2.4974, "step": 284640 }, { "epoch": 12.260843347547056, "learning_rate": 3.10440862763688e-07, "loss": 2.6343, "step": 284660 }, { "epoch": 12.261704785286643, "learning_rate": 3.103923807882092e-07, "loss": 2.6822, "step": 284680 }, { "epoch": 12.26256622302623, "learning_rate": 3.1034389881273025e-07, "loss": 2.6699, "step": 284700 }, { "epoch": 12.263427660765817, "learning_rate": 3.102954168372514e-07, "loss": 2.4896, "step": 284720 }, { "epoch": 12.264289098505406, "learning_rate": 3.1024693486177244e-07, "loss": 2.6407, "step": 284740 }, { "epoch": 12.265150536244994, "learning_rate": 3.1019845288629357e-07, "loss": 2.7113, "step": 284760 }, { "epoch": 12.26601197398458, "learning_rate": 3.1014997091081464e-07, "loss": 2.6378, "step": 284780 }, { "epoch": 12.266873411724168, "learning_rate": 3.101014889353358e-07, "loss": 2.7009, "step": 284800 }, { "epoch": 12.267734849463755, "learning_rate": 3.100530069598569e-07, "loss": 2.7187, "step": 284820 }, { "epoch": 12.268596287203342, "learning_rate": 3.1000452498437807e-07, "loss": 2.7304, "step": 284840 }, { "epoch": 12.26945772494293, "learning_rate": 3.099560430088991e-07, "loss": 2.6793, "step": 284860 }, { "epoch": 12.270319162682517, "learning_rate": 3.0990756103342026e-07, "loss": 2.5475, "step": 284880 }, { "epoch": 12.271180600422104, "learning_rate": 3.098590790579413e-07, "loss": 2.5963, "step": 284900 }, { "epoch": 12.272042038161691, "learning_rate": 3.0981059708246246e-07, "loss": 2.8119, "step": 284920 }, { "epoch": 12.272903475901279, "learning_rate": 3.0976211510698353e-07, "loss": 2.6846, "step": 284940 }, { "epoch": 12.273764913640866, "learning_rate": 3.097136331315047e-07, "loss": 2.6876, "step": 284960 }, { "epoch": 12.274626351380453, "learning_rate": 3.096651511560258e-07, "loss": 2.5761, "step": 284980 }, { "epoch": 12.275487789120042, "learning_rate": 3.096166691805469e-07, "loss": 2.5684, "step": 285000 }, { "epoch": 12.27634922685963, "learning_rate": 3.095681872050679e-07, "loss": 2.6118, "step": 285020 }, { "epoch": 12.277210664599217, "learning_rate": 3.095197052295891e-07, "loss": 2.6284, "step": 285040 }, { "epoch": 12.278072102338804, "learning_rate": 3.0947122325411017e-07, "loss": 2.7455, "step": 285060 }, { "epoch": 12.278933540078391, "learning_rate": 3.0942274127863135e-07, "loss": 2.676, "step": 285080 }, { "epoch": 12.279794977817978, "learning_rate": 3.093742593031524e-07, "loss": 2.7163, "step": 285100 }, { "epoch": 12.280656415557566, "learning_rate": 3.0932577732767354e-07, "loss": 2.5859, "step": 285120 }, { "epoch": 12.281517853297153, "learning_rate": 3.0927729535219456e-07, "loss": 2.7292, "step": 285140 }, { "epoch": 12.28237929103674, "learning_rate": 3.0922881337671574e-07, "loss": 2.6502, "step": 285160 }, { "epoch": 12.283240728776327, "learning_rate": 3.0918033140123676e-07, "loss": 2.6488, "step": 285180 }, { "epoch": 12.284102166515915, "learning_rate": 3.091318494257579e-07, "loss": 2.6687, "step": 285200 }, { "epoch": 12.284963604255502, "learning_rate": 3.0908336745027906e-07, "loss": 2.8292, "step": 285220 }, { "epoch": 12.28582504199509, "learning_rate": 3.090348854748001e-07, "loss": 2.5875, "step": 285240 }, { "epoch": 12.286686479734676, "learning_rate": 3.0898640349932125e-07, "loss": 2.6252, "step": 285260 }, { "epoch": 12.287547917474264, "learning_rate": 3.0893792152384227e-07, "loss": 2.6112, "step": 285280 }, { "epoch": 12.288409355213853, "learning_rate": 3.0888943954836345e-07, "loss": 2.625, "step": 285300 }, { "epoch": 12.28927079295344, "learning_rate": 3.088409575728845e-07, "loss": 2.6753, "step": 285320 }, { "epoch": 12.290132230693027, "learning_rate": 3.087924755974057e-07, "loss": 2.6782, "step": 285340 }, { "epoch": 12.290993668432614, "learning_rate": 3.0874399362192677e-07, "loss": 2.6865, "step": 285360 }, { "epoch": 12.291855106172202, "learning_rate": 3.086955116464479e-07, "loss": 2.6781, "step": 285380 }, { "epoch": 12.292716543911789, "learning_rate": 3.086470296709689e-07, "loss": 2.5961, "step": 285400 }, { "epoch": 12.293577981651376, "learning_rate": 3.085985476954901e-07, "loss": 2.6303, "step": 285420 }, { "epoch": 12.294439419390963, "learning_rate": 3.0855006572001116e-07, "loss": 2.558, "step": 285440 }, { "epoch": 12.29530085713055, "learning_rate": 3.0850158374453234e-07, "loss": 2.5607, "step": 285460 }, { "epoch": 12.296162294870138, "learning_rate": 3.084531017690534e-07, "loss": 2.6177, "step": 285480 }, { "epoch": 12.297023732609725, "learning_rate": 3.0840461979357454e-07, "loss": 2.6011, "step": 285500 }, { "epoch": 12.297885170349312, "learning_rate": 3.083561378180956e-07, "loss": 2.7173, "step": 285520 }, { "epoch": 12.2987466080889, "learning_rate": 3.0830765584261673e-07, "loss": 2.5973, "step": 285540 }, { "epoch": 12.299608045828489, "learning_rate": 3.082591738671378e-07, "loss": 2.6453, "step": 285560 }, { "epoch": 12.300469483568076, "learning_rate": 3.08210691891659e-07, "loss": 2.6752, "step": 285580 }, { "epoch": 12.301330921307663, "learning_rate": 3.0816220991618005e-07, "loss": 2.6393, "step": 285600 }, { "epoch": 12.30219235904725, "learning_rate": 3.081137279407012e-07, "loss": 2.6705, "step": 285620 }, { "epoch": 12.303053796786838, "learning_rate": 3.0806524596522225e-07, "loss": 2.6462, "step": 285640 }, { "epoch": 12.303915234526425, "learning_rate": 3.0801676398974337e-07, "loss": 2.6627, "step": 285660 }, { "epoch": 12.304776672266012, "learning_rate": 3.0796828201426444e-07, "loss": 2.5752, "step": 285680 }, { "epoch": 12.3056381100056, "learning_rate": 3.079198000387857e-07, "loss": 2.6345, "step": 285700 }, { "epoch": 12.306499547745187, "learning_rate": 3.078713180633067e-07, "loss": 2.6414, "step": 285720 }, { "epoch": 12.307360985484774, "learning_rate": 3.0782283608782776e-07, "loss": 2.695, "step": 285740 }, { "epoch": 12.308222423224361, "learning_rate": 3.077743541123489e-07, "loss": 2.5002, "step": 285760 }, { "epoch": 12.309083860963948, "learning_rate": 3.077258721368699e-07, "loss": 2.6504, "step": 285780 }, { "epoch": 12.309945298703536, "learning_rate": 3.076773901613911e-07, "loss": 2.7466, "step": 285800 }, { "epoch": 12.310806736443123, "learning_rate": 3.0762890818591216e-07, "loss": 2.6289, "step": 285820 }, { "epoch": 12.31166817418271, "learning_rate": 3.0758042621043333e-07, "loss": 2.6796, "step": 285840 }, { "epoch": 12.312529611922299, "learning_rate": 3.0753194423495446e-07, "loss": 2.5462, "step": 285860 }, { "epoch": 12.313391049661886, "learning_rate": 3.0748346225947553e-07, "loss": 2.6527, "step": 285880 }, { "epoch": 12.314252487401474, "learning_rate": 3.074349802839966e-07, "loss": 2.499, "step": 285900 }, { "epoch": 12.31511392514106, "learning_rate": 3.073864983085177e-07, "loss": 2.6724, "step": 285920 }, { "epoch": 12.315975362880648, "learning_rate": 3.073380163330388e-07, "loss": 2.6488, "step": 285940 }, { "epoch": 12.316836800620235, "learning_rate": 3.0728953435756e-07, "loss": 2.6322, "step": 285960 }, { "epoch": 12.317698238359823, "learning_rate": 3.0724105238208105e-07, "loss": 2.6622, "step": 285980 }, { "epoch": 12.31855967609941, "learning_rate": 3.0719257040660217e-07, "loss": 2.5092, "step": 286000 }, { "epoch": 12.319421113838997, "learning_rate": 3.0714408843112324e-07, "loss": 2.8079, "step": 286020 }, { "epoch": 12.320282551578584, "learning_rate": 3.0709560645564437e-07, "loss": 2.687, "step": 286040 }, { "epoch": 12.321143989318172, "learning_rate": 3.0704712448016544e-07, "loss": 2.7085, "step": 286060 }, { "epoch": 12.322005427057759, "learning_rate": 3.069986425046866e-07, "loss": 2.6768, "step": 286080 }, { "epoch": 12.322866864797346, "learning_rate": 3.069501605292077e-07, "loss": 2.5259, "step": 286100 }, { "epoch": 12.323728302536933, "learning_rate": 3.0690167855372886e-07, "loss": 2.4748, "step": 286120 }, { "epoch": 12.324589740276522, "learning_rate": 3.068531965782499e-07, "loss": 2.5445, "step": 286140 }, { "epoch": 12.32545117801611, "learning_rate": 3.06804714602771e-07, "loss": 2.6947, "step": 286160 }, { "epoch": 12.326312615755697, "learning_rate": 3.067562326272921e-07, "loss": 2.7624, "step": 286180 }, { "epoch": 12.327174053495284, "learning_rate": 3.0670775065181326e-07, "loss": 2.6549, "step": 286200 }, { "epoch": 12.328035491234871, "learning_rate": 3.0665926867633433e-07, "loss": 2.6655, "step": 286220 }, { "epoch": 12.328896928974459, "learning_rate": 3.066107867008555e-07, "loss": 2.6721, "step": 286240 }, { "epoch": 12.329758366714046, "learning_rate": 3.065623047253765e-07, "loss": 2.4617, "step": 286260 }, { "epoch": 12.330619804453633, "learning_rate": 3.0651382274989765e-07, "loss": 2.6777, "step": 286280 }, { "epoch": 12.33148124219322, "learning_rate": 3.064653407744187e-07, "loss": 2.7041, "step": 286300 }, { "epoch": 12.332342679932808, "learning_rate": 3.064168587989398e-07, "loss": 2.6977, "step": 286320 }, { "epoch": 12.333204117672395, "learning_rate": 3.0636837682346097e-07, "loss": 2.6874, "step": 286340 }, { "epoch": 12.334065555411982, "learning_rate": 3.0631989484798204e-07, "loss": 2.5615, "step": 286360 }, { "epoch": 12.33492699315157, "learning_rate": 3.0627141287250316e-07, "loss": 2.6366, "step": 286380 }, { "epoch": 12.335788430891157, "learning_rate": 3.0622293089702424e-07, "loss": 2.7173, "step": 286400 }, { "epoch": 12.336649868630746, "learning_rate": 3.0617444892154536e-07, "loss": 2.6211, "step": 286420 }, { "epoch": 12.337511306370333, "learning_rate": 3.0612596694606643e-07, "loss": 2.5845, "step": 286440 }, { "epoch": 12.33837274410992, "learning_rate": 3.060774849705876e-07, "loss": 2.5355, "step": 286460 }, { "epoch": 12.339234181849507, "learning_rate": 3.060290029951087e-07, "loss": 2.4594, "step": 286480 }, { "epoch": 12.340095619589095, "learning_rate": 3.0598052101962986e-07, "loss": 2.6302, "step": 286500 }, { "epoch": 12.340957057328682, "learning_rate": 3.059320390441509e-07, "loss": 2.7743, "step": 286520 }, { "epoch": 12.341818495068269, "learning_rate": 3.05883557068672e-07, "loss": 2.6027, "step": 286540 }, { "epoch": 12.342679932807856, "learning_rate": 3.0583507509319307e-07, "loss": 2.6636, "step": 286560 }, { "epoch": 12.343541370547443, "learning_rate": 3.0578659311771425e-07, "loss": 2.6098, "step": 286580 }, { "epoch": 12.34440280828703, "learning_rate": 3.057381111422353e-07, "loss": 2.7154, "step": 286600 }, { "epoch": 12.345264246026618, "learning_rate": 3.056896291667565e-07, "loss": 2.5915, "step": 286620 }, { "epoch": 12.346125683766205, "learning_rate": 3.056411471912775e-07, "loss": 2.6958, "step": 286640 }, { "epoch": 12.346987121505792, "learning_rate": 3.055926652157987e-07, "loss": 2.6378, "step": 286660 }, { "epoch": 12.34784855924538, "learning_rate": 3.055441832403197e-07, "loss": 2.7855, "step": 286680 }, { "epoch": 12.348709996984969, "learning_rate": 3.054957012648409e-07, "loss": 2.7904, "step": 286700 }, { "epoch": 12.349571434724556, "learning_rate": 3.0544721928936196e-07, "loss": 2.7063, "step": 286720 }, { "epoch": 12.350432872464143, "learning_rate": 3.0539873731388314e-07, "loss": 2.6036, "step": 286740 }, { "epoch": 12.35129431020373, "learning_rate": 3.053502553384042e-07, "loss": 2.6396, "step": 286760 }, { "epoch": 12.352155747943318, "learning_rate": 3.0530177336292534e-07, "loss": 2.7558, "step": 286780 }, { "epoch": 12.353017185682905, "learning_rate": 3.0525329138744635e-07, "loss": 2.5817, "step": 286800 }, { "epoch": 12.353878623422492, "learning_rate": 3.052048094119674e-07, "loss": 2.7277, "step": 286820 }, { "epoch": 12.35474006116208, "learning_rate": 3.051563274364886e-07, "loss": 2.6564, "step": 286840 }, { "epoch": 12.355601498901667, "learning_rate": 3.051078454610097e-07, "loss": 2.5541, "step": 286860 }, { "epoch": 12.356462936641254, "learning_rate": 3.0505936348553085e-07, "loss": 2.4774, "step": 286880 }, { "epoch": 12.357324374380841, "learning_rate": 3.0501088151005187e-07, "loss": 2.8436, "step": 286900 }, { "epoch": 12.358185812120428, "learning_rate": 3.04962399534573e-07, "loss": 2.6681, "step": 286920 }, { "epoch": 12.359047249860016, "learning_rate": 3.0491391755909407e-07, "loss": 2.692, "step": 286940 }, { "epoch": 12.359908687599603, "learning_rate": 3.048654355836152e-07, "loss": 2.6182, "step": 286960 }, { "epoch": 12.360770125339192, "learning_rate": 3.048169536081363e-07, "loss": 2.7399, "step": 286980 }, { "epoch": 12.36163156307878, "learning_rate": 3.047684716326575e-07, "loss": 2.7216, "step": 287000 }, { "epoch": 12.362493000818366, "learning_rate": 3.047199896571785e-07, "loss": 2.5714, "step": 287020 }, { "epoch": 12.363354438557954, "learning_rate": 3.046715076816997e-07, "loss": 2.6329, "step": 287040 }, { "epoch": 12.364215876297541, "learning_rate": 3.046230257062207e-07, "loss": 2.7162, "step": 287060 }, { "epoch": 12.365077314037128, "learning_rate": 3.045745437307419e-07, "loss": 2.4998, "step": 287080 }, { "epoch": 12.365938751776715, "learning_rate": 3.0452606175526296e-07, "loss": 2.6335, "step": 287100 }, { "epoch": 12.366800189516303, "learning_rate": 3.0447757977978413e-07, "loss": 2.6795, "step": 287120 }, { "epoch": 12.36766162725589, "learning_rate": 3.044290978043052e-07, "loss": 2.7349, "step": 287140 }, { "epoch": 12.368523064995477, "learning_rate": 3.0438061582882633e-07, "loss": 2.5806, "step": 287160 }, { "epoch": 12.369384502735064, "learning_rate": 3.0433213385334735e-07, "loss": 2.6251, "step": 287180 }, { "epoch": 12.370245940474652, "learning_rate": 3.042836518778685e-07, "loss": 2.51, "step": 287200 }, { "epoch": 12.371107378214239, "learning_rate": 3.042351699023896e-07, "loss": 2.5275, "step": 287220 }, { "epoch": 12.371968815953826, "learning_rate": 3.041866879269108e-07, "loss": 2.6751, "step": 287240 }, { "epoch": 12.372830253693415, "learning_rate": 3.0413820595143185e-07, "loss": 2.5879, "step": 287260 }, { "epoch": 12.373691691433002, "learning_rate": 3.0408972397595297e-07, "loss": 2.5084, "step": 287280 }, { "epoch": 12.37455312917259, "learning_rate": 3.0404124200047404e-07, "loss": 2.605, "step": 287300 }, { "epoch": 12.375414566912177, "learning_rate": 3.0399276002499517e-07, "loss": 2.631, "step": 287320 }, { "epoch": 12.376276004651764, "learning_rate": 3.0394427804951624e-07, "loss": 2.6851, "step": 287340 }, { "epoch": 12.377137442391351, "learning_rate": 3.038957960740373e-07, "loss": 2.5198, "step": 287360 }, { "epoch": 12.377998880130939, "learning_rate": 3.038473140985585e-07, "loss": 2.6957, "step": 287380 }, { "epoch": 12.378860317870526, "learning_rate": 3.0379883212307956e-07, "loss": 2.6435, "step": 287400 }, { "epoch": 12.379721755610113, "learning_rate": 3.037503501476007e-07, "loss": 2.5182, "step": 287420 }, { "epoch": 12.3805831933497, "learning_rate": 3.037018681721217e-07, "loss": 2.7017, "step": 287440 }, { "epoch": 12.381444631089288, "learning_rate": 3.036533861966429e-07, "loss": 2.7335, "step": 287460 }, { "epoch": 12.382306068828875, "learning_rate": 3.0360490422116395e-07, "loss": 2.69, "step": 287480 }, { "epoch": 12.383167506568462, "learning_rate": 3.035564222456851e-07, "loss": 2.4446, "step": 287500 }, { "epoch": 12.38402894430805, "learning_rate": 3.035079402702062e-07, "loss": 2.6119, "step": 287520 }, { "epoch": 12.384890382047637, "learning_rate": 3.034594582947273e-07, "loss": 2.7492, "step": 287540 }, { "epoch": 12.385751819787226, "learning_rate": 3.0341097631924834e-07, "loss": 2.5787, "step": 287560 }, { "epoch": 12.386613257526813, "learning_rate": 3.033624943437695e-07, "loss": 2.7187, "step": 287580 }, { "epoch": 12.3874746952664, "learning_rate": 3.033140123682906e-07, "loss": 2.5064, "step": 287600 }, { "epoch": 12.388336133005987, "learning_rate": 3.0326553039281177e-07, "loss": 2.8349, "step": 287620 }, { "epoch": 12.389197570745575, "learning_rate": 3.032170484173329e-07, "loss": 2.5825, "step": 287640 }, { "epoch": 12.390059008485162, "learning_rate": 3.0316856644185396e-07, "loss": 2.6366, "step": 287660 }, { "epoch": 12.390920446224749, "learning_rate": 3.0312008446637503e-07, "loss": 2.5046, "step": 287680 }, { "epoch": 12.391781883964336, "learning_rate": 3.0307160249089616e-07, "loss": 2.6494, "step": 287700 }, { "epoch": 12.392643321703924, "learning_rate": 3.0302312051541723e-07, "loss": 2.7099, "step": 287720 }, { "epoch": 12.39350475944351, "learning_rate": 3.029746385399384e-07, "loss": 2.6851, "step": 287740 }, { "epoch": 12.394366197183098, "learning_rate": 3.029261565644595e-07, "loss": 2.7689, "step": 287760 }, { "epoch": 12.395227634922685, "learning_rate": 3.028776745889806e-07, "loss": 2.7105, "step": 287780 }, { "epoch": 12.396089072662273, "learning_rate": 3.028291926135017e-07, "loss": 2.6311, "step": 287800 }, { "epoch": 12.396950510401862, "learning_rate": 3.027807106380228e-07, "loss": 2.7113, "step": 287820 }, { "epoch": 12.397811948141449, "learning_rate": 3.0273222866254387e-07, "loss": 2.6197, "step": 287840 }, { "epoch": 12.398673385881036, "learning_rate": 3.0268374668706505e-07, "loss": 2.6443, "step": 287860 }, { "epoch": 12.399534823620623, "learning_rate": 3.026352647115861e-07, "loss": 2.6092, "step": 287880 }, { "epoch": 12.40039626136021, "learning_rate": 3.025867827361072e-07, "loss": 2.7041, "step": 287900 }, { "epoch": 12.401257699099798, "learning_rate": 3.025383007606283e-07, "loss": 2.6403, "step": 287920 }, { "epoch": 12.402119136839385, "learning_rate": 3.024898187851494e-07, "loss": 2.6281, "step": 287940 }, { "epoch": 12.402980574578972, "learning_rate": 3.024413368096705e-07, "loss": 2.5521, "step": 287960 }, { "epoch": 12.40384201231856, "learning_rate": 3.023928548341916e-07, "loss": 2.7908, "step": 287980 }, { "epoch": 12.404703450058147, "learning_rate": 3.0234437285871276e-07, "loss": 2.5934, "step": 288000 }, { "epoch": 12.405564887797734, "learning_rate": 3.0229589088323383e-07, "loss": 2.6662, "step": 288020 }, { "epoch": 12.406426325537321, "learning_rate": 3.0224740890775496e-07, "loss": 2.5668, "step": 288040 }, { "epoch": 12.407287763276909, "learning_rate": 3.021989269322761e-07, "loss": 2.4194, "step": 288060 }, { "epoch": 12.408149201016496, "learning_rate": 3.0215044495679715e-07, "loss": 2.5457, "step": 288080 }, { "epoch": 12.409010638756083, "learning_rate": 3.021019629813182e-07, "loss": 2.6923, "step": 288100 }, { "epoch": 12.409872076495672, "learning_rate": 3.020534810058394e-07, "loss": 2.697, "step": 288120 }, { "epoch": 12.41073351423526, "learning_rate": 3.0200499903036047e-07, "loss": 2.837, "step": 288140 }, { "epoch": 12.411594951974847, "learning_rate": 3.019565170548816e-07, "loss": 2.6677, "step": 288160 }, { "epoch": 12.412456389714434, "learning_rate": 3.0190803507940267e-07, "loss": 2.5443, "step": 288180 }, { "epoch": 12.413317827454021, "learning_rate": 3.018595531039238e-07, "loss": 2.5745, "step": 288200 }, { "epoch": 12.414179265193608, "learning_rate": 3.0181107112844486e-07, "loss": 2.714, "step": 288220 }, { "epoch": 12.415040702933196, "learning_rate": 3.0176258915296604e-07, "loss": 2.6238, "step": 288240 }, { "epoch": 12.415902140672783, "learning_rate": 3.017141071774871e-07, "loss": 2.7704, "step": 288260 }, { "epoch": 12.41676357841237, "learning_rate": 3.016656252020083e-07, "loss": 2.5917, "step": 288280 }, { "epoch": 12.417625016151957, "learning_rate": 3.016171432265293e-07, "loss": 2.7871, "step": 288300 }, { "epoch": 12.418486453891544, "learning_rate": 3.0156866125105043e-07, "loss": 2.6234, "step": 288320 }, { "epoch": 12.419347891631132, "learning_rate": 3.015201792755715e-07, "loss": 2.6935, "step": 288340 }, { "epoch": 12.420209329370719, "learning_rate": 3.014716973000927e-07, "loss": 2.7246, "step": 288360 }, { "epoch": 12.421070767110308, "learning_rate": 3.0142321532461375e-07, "loss": 2.6798, "step": 288380 }, { "epoch": 12.421932204849895, "learning_rate": 3.0137473334913493e-07, "loss": 2.5673, "step": 288400 }, { "epoch": 12.422793642589482, "learning_rate": 3.0132625137365595e-07, "loss": 2.6549, "step": 288420 }, { "epoch": 12.42365508032907, "learning_rate": 3.01277769398177e-07, "loss": 2.6266, "step": 288440 }, { "epoch": 12.424516518068657, "learning_rate": 3.0122928742269815e-07, "loss": 2.6031, "step": 288460 }, { "epoch": 12.425377955808244, "learning_rate": 3.011808054472192e-07, "loss": 2.6059, "step": 288480 }, { "epoch": 12.426239393547831, "learning_rate": 3.011323234717404e-07, "loss": 2.6938, "step": 288500 }, { "epoch": 12.427100831287419, "learning_rate": 3.0108384149626147e-07, "loss": 2.8276, "step": 288520 }, { "epoch": 12.427962269027006, "learning_rate": 3.0103535952078264e-07, "loss": 2.717, "step": 288540 }, { "epoch": 12.428823706766593, "learning_rate": 3.0098687754530366e-07, "loss": 2.6925, "step": 288560 }, { "epoch": 12.42968514450618, "learning_rate": 3.009383955698248e-07, "loss": 2.5705, "step": 288580 }, { "epoch": 12.430546582245768, "learning_rate": 3.0088991359434586e-07, "loss": 2.6878, "step": 288600 }, { "epoch": 12.431408019985355, "learning_rate": 3.0084143161886704e-07, "loss": 2.5579, "step": 288620 }, { "epoch": 12.432269457724942, "learning_rate": 3.007929496433881e-07, "loss": 2.6674, "step": 288640 }, { "epoch": 12.43313089546453, "learning_rate": 3.007444676679093e-07, "loss": 2.5969, "step": 288660 }, { "epoch": 12.433992333204118, "learning_rate": 3.006959856924303e-07, "loss": 2.6961, "step": 288680 }, { "epoch": 12.434853770943706, "learning_rate": 3.0064750371695143e-07, "loss": 2.7223, "step": 288700 }, { "epoch": 12.435715208683293, "learning_rate": 3.005990217414725e-07, "loss": 2.6462, "step": 288720 }, { "epoch": 12.43657664642288, "learning_rate": 3.005505397659936e-07, "loss": 2.6288, "step": 288740 }, { "epoch": 12.437438084162467, "learning_rate": 3.0050205779051475e-07, "loss": 2.662, "step": 288760 }, { "epoch": 12.438299521902055, "learning_rate": 3.004535758150359e-07, "loss": 2.7825, "step": 288780 }, { "epoch": 12.439160959641642, "learning_rate": 3.0040509383955694e-07, "loss": 2.6859, "step": 288800 }, { "epoch": 12.44002239738123, "learning_rate": 3.003566118640781e-07, "loss": 2.6473, "step": 288820 }, { "epoch": 12.440883835120816, "learning_rate": 3.0030812988859914e-07, "loss": 2.6463, "step": 288840 }, { "epoch": 12.441745272860404, "learning_rate": 3.002596479131203e-07, "loss": 2.6285, "step": 288860 }, { "epoch": 12.442606710599991, "learning_rate": 3.002111659376414e-07, "loss": 2.7403, "step": 288880 }, { "epoch": 12.443468148339578, "learning_rate": 3.0016268396216257e-07, "loss": 2.6494, "step": 288900 }, { "epoch": 12.444329586079165, "learning_rate": 3.0011420198668364e-07, "loss": 2.6691, "step": 288920 }, { "epoch": 12.445191023818753, "learning_rate": 3.0006572001120476e-07, "loss": 2.488, "step": 288940 }, { "epoch": 12.446052461558342, "learning_rate": 3.000172380357258e-07, "loss": 2.6129, "step": 288960 }, { "epoch": 12.446913899297929, "learning_rate": 2.9996875606024685e-07, "loss": 2.574, "step": 288980 }, { "epoch": 12.447775337037516, "learning_rate": 2.9992027408476803e-07, "loss": 2.7323, "step": 289000 }, { "epoch": 12.448636774777103, "learning_rate": 2.998717921092891e-07, "loss": 2.6815, "step": 289020 }, { "epoch": 12.44949821251669, "learning_rate": 2.998233101338103e-07, "loss": 2.6763, "step": 289040 }, { "epoch": 12.450359650256278, "learning_rate": 2.997748281583313e-07, "loss": 2.6663, "step": 289060 }, { "epoch": 12.451221087995865, "learning_rate": 2.997263461828525e-07, "loss": 2.6307, "step": 289080 }, { "epoch": 12.452082525735452, "learning_rate": 2.996778642073735e-07, "loss": 2.5369, "step": 289100 }, { "epoch": 12.45294396347504, "learning_rate": 2.9962938223189467e-07, "loss": 2.5533, "step": 289120 }, { "epoch": 12.453805401214627, "learning_rate": 2.9958090025641574e-07, "loss": 2.8373, "step": 289140 }, { "epoch": 12.454666838954214, "learning_rate": 2.995324182809369e-07, "loss": 2.5433, "step": 289160 }, { "epoch": 12.455528276693801, "learning_rate": 2.99483936305458e-07, "loss": 2.7266, "step": 289180 }, { "epoch": 12.456389714433389, "learning_rate": 2.994354543299791e-07, "loss": 2.7163, "step": 289200 }, { "epoch": 12.457251152172976, "learning_rate": 2.9938697235450013e-07, "loss": 2.5602, "step": 289220 }, { "epoch": 12.458112589912565, "learning_rate": 2.993384903790213e-07, "loss": 2.6478, "step": 289240 }, { "epoch": 12.458974027652152, "learning_rate": 2.992900084035424e-07, "loss": 2.5214, "step": 289260 }, { "epoch": 12.45983546539174, "learning_rate": 2.9924152642806356e-07, "loss": 2.5694, "step": 289280 }, { "epoch": 12.460696903131327, "learning_rate": 2.9919304445258463e-07, "loss": 2.5812, "step": 289300 }, { "epoch": 12.461558340870914, "learning_rate": 2.9914456247710576e-07, "loss": 2.6201, "step": 289320 }, { "epoch": 12.462419778610501, "learning_rate": 2.990960805016268e-07, "loss": 2.6816, "step": 289340 }, { "epoch": 12.463281216350088, "learning_rate": 2.9904759852614795e-07, "loss": 2.7573, "step": 289360 }, { "epoch": 12.464142654089676, "learning_rate": 2.98999116550669e-07, "loss": 2.6741, "step": 289380 }, { "epoch": 12.465004091829263, "learning_rate": 2.989506345751902e-07, "loss": 2.6681, "step": 289400 }, { "epoch": 12.46586552956885, "learning_rate": 2.989021525997113e-07, "loss": 2.6111, "step": 289420 }, { "epoch": 12.466726967308437, "learning_rate": 2.988536706242324e-07, "loss": 2.6715, "step": 289440 }, { "epoch": 12.467588405048025, "learning_rate": 2.9880518864875347e-07, "loss": 2.583, "step": 289460 }, { "epoch": 12.468449842787612, "learning_rate": 2.987567066732746e-07, "loss": 2.7964, "step": 289480 }, { "epoch": 12.469311280527199, "learning_rate": 2.9870822469779566e-07, "loss": 2.762, "step": 289500 }, { "epoch": 12.470172718266788, "learning_rate": 2.9865974272231674e-07, "loss": 2.66, "step": 289520 }, { "epoch": 12.471034156006375, "learning_rate": 2.986112607468379e-07, "loss": 2.5871, "step": 289540 }, { "epoch": 12.471895593745963, "learning_rate": 2.98562778771359e-07, "loss": 2.5938, "step": 289560 }, { "epoch": 12.47275703148555, "learning_rate": 2.985142967958801e-07, "loss": 2.6893, "step": 289580 }, { "epoch": 12.473618469225137, "learning_rate": 2.9846581482040113e-07, "loss": 2.7183, "step": 289600 }, { "epoch": 12.474479906964724, "learning_rate": 2.984173328449223e-07, "loss": 2.702, "step": 289620 }, { "epoch": 12.475341344704312, "learning_rate": 2.983688508694434e-07, "loss": 2.5105, "step": 289640 }, { "epoch": 12.476202782443899, "learning_rate": 2.9832036889396455e-07, "loss": 2.7753, "step": 289660 }, { "epoch": 12.477064220183486, "learning_rate": 2.982718869184856e-07, "loss": 2.6572, "step": 289680 }, { "epoch": 12.477925657923073, "learning_rate": 2.9822340494300675e-07, "loss": 2.5335, "step": 289700 }, { "epoch": 12.47878709566266, "learning_rate": 2.981749229675278e-07, "loss": 2.5776, "step": 289720 }, { "epoch": 12.479648533402248, "learning_rate": 2.9812644099204895e-07, "loss": 2.7473, "step": 289740 }, { "epoch": 12.480509971141835, "learning_rate": 2.9807795901657e-07, "loss": 2.6801, "step": 289760 }, { "epoch": 12.481371408881422, "learning_rate": 2.980294770410912e-07, "loss": 2.7145, "step": 289780 }, { "epoch": 12.482232846621011, "learning_rate": 2.9798099506561227e-07, "loss": 2.6253, "step": 289800 }, { "epoch": 12.483094284360599, "learning_rate": 2.979325130901334e-07, "loss": 2.5986, "step": 289820 }, { "epoch": 12.483955722100186, "learning_rate": 2.978840311146545e-07, "loss": 2.6511, "step": 289840 }, { "epoch": 12.484817159839773, "learning_rate": 2.978355491391756e-07, "loss": 2.7623, "step": 289860 }, { "epoch": 12.48567859757936, "learning_rate": 2.9778706716369666e-07, "loss": 2.654, "step": 289880 }, { "epoch": 12.486540035318948, "learning_rate": 2.9773858518821783e-07, "loss": 2.581, "step": 289900 }, { "epoch": 12.487401473058535, "learning_rate": 2.976901032127389e-07, "loss": 2.5641, "step": 289920 }, { "epoch": 12.488262910798122, "learning_rate": 2.9764162123726003e-07, "loss": 2.6614, "step": 289940 }, { "epoch": 12.48912434853771, "learning_rate": 2.975931392617811e-07, "loss": 2.7834, "step": 289960 }, { "epoch": 12.489985786277296, "learning_rate": 2.9754465728630223e-07, "loss": 2.6106, "step": 289980 }, { "epoch": 12.490847224016884, "learning_rate": 2.974961753108233e-07, "loss": 2.6796, "step": 290000 }, { "epoch": 12.491708661756471, "learning_rate": 2.974476933353445e-07, "loss": 2.5155, "step": 290020 }, { "epoch": 12.492570099496058, "learning_rate": 2.9739921135986555e-07, "loss": 2.6256, "step": 290040 }, { "epoch": 12.493431537235645, "learning_rate": 2.973507293843866e-07, "loss": 2.8283, "step": 290060 }, { "epoch": 12.494292974975235, "learning_rate": 2.9730224740890774e-07, "loss": 2.5472, "step": 290080 }, { "epoch": 12.495154412714822, "learning_rate": 2.972537654334288e-07, "loss": 2.64, "step": 290100 }, { "epoch": 12.496015850454409, "learning_rate": 2.9720528345794994e-07, "loss": 2.7291, "step": 290120 }, { "epoch": 12.496877288193996, "learning_rate": 2.97156801482471e-07, "loss": 2.5455, "step": 290140 }, { "epoch": 12.497738725933583, "learning_rate": 2.971083195069922e-07, "loss": 2.5293, "step": 290160 }, { "epoch": 12.49860016367317, "learning_rate": 2.9705983753151326e-07, "loss": 2.6717, "step": 290180 }, { "epoch": 12.499461601412758, "learning_rate": 2.970113555560344e-07, "loss": 2.7297, "step": 290200 }, { "epoch": 12.500323039152345, "learning_rate": 2.9696287358055546e-07, "loss": 2.6091, "step": 290220 }, { "epoch": 12.501184476891932, "learning_rate": 2.969143916050766e-07, "loss": 2.6265, "step": 290240 }, { "epoch": 12.50204591463152, "learning_rate": 2.9686590962959765e-07, "loss": 2.5136, "step": 290260 }, { "epoch": 12.502907352371107, "learning_rate": 2.9681742765411883e-07, "loss": 2.5363, "step": 290280 }, { "epoch": 12.503768790110694, "learning_rate": 2.967689456786399e-07, "loss": 2.5, "step": 290300 }, { "epoch": 12.504630227850281, "learning_rate": 2.967204637031611e-07, "loss": 2.5634, "step": 290320 }, { "epoch": 12.505491665589869, "learning_rate": 2.966719817276821e-07, "loss": 2.5178, "step": 290340 }, { "epoch": 12.506353103329456, "learning_rate": 2.966234997522032e-07, "loss": 2.7307, "step": 290360 }, { "epoch": 12.507214541069045, "learning_rate": 2.965750177767243e-07, "loss": 2.6083, "step": 290380 }, { "epoch": 12.508075978808632, "learning_rate": 2.9652653580124547e-07, "loss": 2.4803, "step": 290400 }, { "epoch": 12.50893741654822, "learning_rate": 2.9647805382576654e-07, "loss": 2.7073, "step": 290420 }, { "epoch": 12.509798854287807, "learning_rate": 2.964295718502877e-07, "loss": 2.7129, "step": 290440 }, { "epoch": 12.510660292027394, "learning_rate": 2.9638108987480874e-07, "loss": 2.6443, "step": 290460 }, { "epoch": 12.511521729766981, "learning_rate": 2.9633260789932986e-07, "loss": 2.5839, "step": 290480 }, { "epoch": 12.512383167506568, "learning_rate": 2.9628412592385093e-07, "loss": 2.8655, "step": 290500 }, { "epoch": 12.513244605246156, "learning_rate": 2.9623564394837206e-07, "loss": 2.6249, "step": 290520 }, { "epoch": 12.514106042985743, "learning_rate": 2.961871619728932e-07, "loss": 2.7372, "step": 290540 }, { "epoch": 12.51496748072533, "learning_rate": 2.9613867999741425e-07, "loss": 2.6209, "step": 290560 }, { "epoch": 12.515828918464917, "learning_rate": 2.960901980219354e-07, "loss": 2.7, "step": 290580 }, { "epoch": 12.516690356204505, "learning_rate": 2.9604171604645645e-07, "loss": 2.655, "step": 290600 }, { "epoch": 12.517551793944092, "learning_rate": 2.9599323407097757e-07, "loss": 2.6453, "step": 290620 }, { "epoch": 12.518413231683681, "learning_rate": 2.9594475209549864e-07, "loss": 2.8287, "step": 290640 }, { "epoch": 12.519274669423268, "learning_rate": 2.958962701200198e-07, "loss": 2.626, "step": 290660 }, { "epoch": 12.520136107162855, "learning_rate": 2.9584778814454084e-07, "loss": 2.5116, "step": 290680 }, { "epoch": 12.520997544902443, "learning_rate": 2.9579930616906207e-07, "loss": 2.7038, "step": 290700 }, { "epoch": 12.52185898264203, "learning_rate": 2.957508241935831e-07, "loss": 2.7353, "step": 290720 }, { "epoch": 12.522720420381617, "learning_rate": 2.957023422181042e-07, "loss": 2.5879, "step": 290740 }, { "epoch": 12.523581858121204, "learning_rate": 2.956538602426253e-07, "loss": 2.6836, "step": 290760 }, { "epoch": 12.524443295860792, "learning_rate": 2.9560537826714646e-07, "loss": 2.7696, "step": 290780 }, { "epoch": 12.525304733600379, "learning_rate": 2.9555689629166753e-07, "loss": 2.5073, "step": 290800 }, { "epoch": 12.526166171339966, "learning_rate": 2.955084143161887e-07, "loss": 2.7461, "step": 290820 }, { "epoch": 12.527027609079553, "learning_rate": 2.9545993234070973e-07, "loss": 2.7101, "step": 290840 }, { "epoch": 12.52788904681914, "learning_rate": 2.954114503652309e-07, "loss": 2.6116, "step": 290860 }, { "epoch": 12.528750484558728, "learning_rate": 2.953629683897519e-07, "loss": 2.7061, "step": 290880 }, { "epoch": 12.529611922298315, "learning_rate": 2.953144864142731e-07, "loss": 2.6589, "step": 290900 }, { "epoch": 12.530473360037902, "learning_rate": 2.952660044387942e-07, "loss": 2.5656, "step": 290920 }, { "epoch": 12.531334797777491, "learning_rate": 2.9521752246331535e-07, "loss": 2.6399, "step": 290940 }, { "epoch": 12.532196235517079, "learning_rate": 2.951690404878364e-07, "loss": 2.4765, "step": 290960 }, { "epoch": 12.533057673256666, "learning_rate": 2.9512055851235755e-07, "loss": 2.692, "step": 290980 }, { "epoch": 12.533919110996253, "learning_rate": 2.9507207653687857e-07, "loss": 2.5797, "step": 291000 }, { "epoch": 12.53478054873584, "learning_rate": 2.9502359456139974e-07, "loss": 2.5788, "step": 291020 }, { "epoch": 12.535641986475428, "learning_rate": 2.949751125859208e-07, "loss": 2.6033, "step": 291040 }, { "epoch": 12.536503424215015, "learning_rate": 2.94926630610442e-07, "loss": 2.6274, "step": 291060 }, { "epoch": 12.537364861954602, "learning_rate": 2.9487814863496306e-07, "loss": 2.5821, "step": 291080 }, { "epoch": 12.53822629969419, "learning_rate": 2.9482966665948414e-07, "loss": 2.6164, "step": 291100 }, { "epoch": 12.539087737433777, "learning_rate": 2.947811846840052e-07, "loss": 2.637, "step": 291120 }, { "epoch": 12.539949175173364, "learning_rate": 2.947327027085263e-07, "loss": 2.7307, "step": 291140 }, { "epoch": 12.540810612912951, "learning_rate": 2.9468422073304746e-07, "loss": 2.73, "step": 291160 }, { "epoch": 12.541672050652538, "learning_rate": 2.9463573875756853e-07, "loss": 2.6702, "step": 291180 }, { "epoch": 12.542533488392127, "learning_rate": 2.9458725678208976e-07, "loss": 2.7146, "step": 291200 }, { "epoch": 12.543394926131715, "learning_rate": 2.945387748066107e-07, "loss": 2.7799, "step": 291220 }, { "epoch": 12.544256363871302, "learning_rate": 2.944902928311319e-07, "loss": 2.729, "step": 291240 }, { "epoch": 12.545117801610889, "learning_rate": 2.944418108556529e-07, "loss": 2.7877, "step": 291260 }, { "epoch": 12.545979239350476, "learning_rate": 2.943933288801741e-07, "loss": 2.7767, "step": 291280 }, { "epoch": 12.546840677090064, "learning_rate": 2.9434484690469517e-07, "loss": 2.824, "step": 291300 }, { "epoch": 12.54770211482965, "learning_rate": 2.9429636492921635e-07, "loss": 2.7196, "step": 291320 }, { "epoch": 12.548563552569238, "learning_rate": 2.942478829537374e-07, "loss": 2.7268, "step": 291340 }, { "epoch": 12.549424990308825, "learning_rate": 2.9419940097825854e-07, "loss": 2.7069, "step": 291360 }, { "epoch": 12.550286428048413, "learning_rate": 2.9415091900277956e-07, "loss": 2.6513, "step": 291380 }, { "epoch": 12.551147865788, "learning_rate": 2.9410243702730074e-07, "loss": 2.6835, "step": 291400 }, { "epoch": 12.552009303527587, "learning_rate": 2.940539550518218e-07, "loss": 2.7296, "step": 291420 }, { "epoch": 12.552870741267174, "learning_rate": 2.94005473076343e-07, "loss": 2.7143, "step": 291440 }, { "epoch": 12.553732179006762, "learning_rate": 2.9395699110086406e-07, "loss": 2.4909, "step": 291460 }, { "epoch": 12.554593616746349, "learning_rate": 2.939085091253852e-07, "loss": 2.6139, "step": 291480 }, { "epoch": 12.555455054485938, "learning_rate": 2.9386002714990625e-07, "loss": 2.5784, "step": 291500 }, { "epoch": 12.556316492225525, "learning_rate": 2.938115451744274e-07, "loss": 2.6652, "step": 291520 }, { "epoch": 12.557177929965112, "learning_rate": 2.9376306319894845e-07, "loss": 2.714, "step": 291540 }, { "epoch": 12.5580393677047, "learning_rate": 2.9371458122346963e-07, "loss": 2.6572, "step": 291560 }, { "epoch": 12.558900805444287, "learning_rate": 2.936660992479907e-07, "loss": 2.6315, "step": 291580 }, { "epoch": 12.559762243183874, "learning_rate": 2.936176172725118e-07, "loss": 2.6029, "step": 291600 }, { "epoch": 12.560623680923461, "learning_rate": 2.9356913529703295e-07, "loss": 2.7119, "step": 291620 }, { "epoch": 12.561485118663049, "learning_rate": 2.935206533215539e-07, "loss": 2.6676, "step": 291640 }, { "epoch": 12.562346556402636, "learning_rate": 2.934721713460751e-07, "loss": 2.6439, "step": 291660 }, { "epoch": 12.563207994142223, "learning_rate": 2.9342368937059616e-07, "loss": 2.7212, "step": 291680 }, { "epoch": 12.56406943188181, "learning_rate": 2.9337520739511734e-07, "loss": 2.7353, "step": 291700 }, { "epoch": 12.564930869621397, "learning_rate": 2.933267254196384e-07, "loss": 2.6276, "step": 291720 }, { "epoch": 12.565792307360985, "learning_rate": 2.9327824344415954e-07, "loss": 2.6812, "step": 291740 }, { "epoch": 12.566653745100574, "learning_rate": 2.9322976146868055e-07, "loss": 2.5885, "step": 291760 }, { "epoch": 12.567515182840161, "learning_rate": 2.9318127949320173e-07, "loss": 2.667, "step": 291780 }, { "epoch": 12.568376620579748, "learning_rate": 2.931327975177228e-07, "loss": 2.6812, "step": 291800 }, { "epoch": 12.569238058319335, "learning_rate": 2.93084315542244e-07, "loss": 2.6638, "step": 291820 }, { "epoch": 12.570099496058923, "learning_rate": 2.9303583356676505e-07, "loss": 2.5995, "step": 291840 }, { "epoch": 12.57096093379851, "learning_rate": 2.929873515912862e-07, "loss": 2.6172, "step": 291860 }, { "epoch": 12.571822371538097, "learning_rate": 2.9293886961580725e-07, "loss": 2.7344, "step": 291880 }, { "epoch": 12.572683809277684, "learning_rate": 2.9289038764032837e-07, "loss": 2.5708, "step": 291900 }, { "epoch": 12.573545247017272, "learning_rate": 2.9284190566484944e-07, "loss": 2.7894, "step": 291920 }, { "epoch": 12.574406684756859, "learning_rate": 2.927934236893706e-07, "loss": 2.6047, "step": 291940 }, { "epoch": 12.575268122496446, "learning_rate": 2.927449417138917e-07, "loss": 2.6845, "step": 291960 }, { "epoch": 12.576129560236033, "learning_rate": 2.926964597384128e-07, "loss": 2.724, "step": 291980 }, { "epoch": 12.57699099797562, "learning_rate": 2.926479777629339e-07, "loss": 2.581, "step": 292000 }, { "epoch": 12.577852435715208, "learning_rate": 2.92599495787455e-07, "loss": 2.6285, "step": 292020 }, { "epoch": 12.578713873454795, "learning_rate": 2.925510138119761e-07, "loss": 2.6735, "step": 292040 }, { "epoch": 12.579575311194384, "learning_rate": 2.9250253183649726e-07, "loss": 2.7533, "step": 292060 }, { "epoch": 12.580436748933971, "learning_rate": 2.9245404986101833e-07, "loss": 2.8235, "step": 292080 }, { "epoch": 12.581298186673559, "learning_rate": 2.924055678855395e-07, "loss": 2.6697, "step": 292100 }, { "epoch": 12.582159624413146, "learning_rate": 2.9235708591006053e-07, "loss": 2.6171, "step": 292120 }, { "epoch": 12.583021062152733, "learning_rate": 2.9230860393458165e-07, "loss": 2.6433, "step": 292140 }, { "epoch": 12.58388249989232, "learning_rate": 2.922601219591027e-07, "loss": 2.5921, "step": 292160 }, { "epoch": 12.584743937631908, "learning_rate": 2.922116399836238e-07, "loss": 2.695, "step": 292180 }, { "epoch": 12.585605375371495, "learning_rate": 2.92163158008145e-07, "loss": 2.8238, "step": 292200 }, { "epoch": 12.586466813111082, "learning_rate": 2.9211467603266605e-07, "loss": 2.7444, "step": 292220 }, { "epoch": 12.58732825085067, "learning_rate": 2.9206619405718717e-07, "loss": 2.6187, "step": 292240 }, { "epoch": 12.588189688590257, "learning_rate": 2.9201771208170824e-07, "loss": 2.7259, "step": 292260 }, { "epoch": 12.589051126329844, "learning_rate": 2.9196923010622937e-07, "loss": 2.6344, "step": 292280 }, { "epoch": 12.589912564069431, "learning_rate": 2.9192074813075044e-07, "loss": 2.649, "step": 292300 }, { "epoch": 12.59077400180902, "learning_rate": 2.918722661552716e-07, "loss": 2.6747, "step": 292320 }, { "epoch": 12.591635439548607, "learning_rate": 2.918237841797927e-07, "loss": 2.7064, "step": 292340 }, { "epoch": 12.592496877288195, "learning_rate": 2.917753022043138e-07, "loss": 2.6012, "step": 292360 }, { "epoch": 12.593358315027782, "learning_rate": 2.917268202288349e-07, "loss": 2.5918, "step": 292380 }, { "epoch": 12.59421975276737, "learning_rate": 2.91678338253356e-07, "loss": 2.6065, "step": 292400 }, { "epoch": 12.595081190506956, "learning_rate": 2.916298562778771e-07, "loss": 2.8146, "step": 292420 }, { "epoch": 12.595942628246544, "learning_rate": 2.9158137430239826e-07, "loss": 2.6774, "step": 292440 }, { "epoch": 12.596804065986131, "learning_rate": 2.915328923269193e-07, "loss": 2.7255, "step": 292460 }, { "epoch": 12.597665503725718, "learning_rate": 2.914844103514405e-07, "loss": 2.697, "step": 292480 }, { "epoch": 12.598526941465305, "learning_rate": 2.914359283759615e-07, "loss": 2.6327, "step": 292500 }, { "epoch": 12.599388379204893, "learning_rate": 2.9138744640048265e-07, "loss": 2.8909, "step": 292520 }, { "epoch": 12.60024981694448, "learning_rate": 2.913389644250037e-07, "loss": 2.6471, "step": 292540 }, { "epoch": 12.601111254684067, "learning_rate": 2.912904824495249e-07, "loss": 2.7794, "step": 292560 }, { "epoch": 12.601972692423654, "learning_rate": 2.9124200047404597e-07, "loss": 2.5778, "step": 292580 }, { "epoch": 12.602834130163242, "learning_rate": 2.9119351849856714e-07, "loss": 2.5811, "step": 292600 }, { "epoch": 12.60369556790283, "learning_rate": 2.9114503652308816e-07, "loss": 2.6495, "step": 292620 }, { "epoch": 12.604557005642418, "learning_rate": 2.9109655454760934e-07, "loss": 2.5725, "step": 292640 }, { "epoch": 12.605418443382005, "learning_rate": 2.9104807257213036e-07, "loss": 2.7022, "step": 292660 }, { "epoch": 12.606279881121592, "learning_rate": 2.9099959059665154e-07, "loss": 2.6003, "step": 292680 }, { "epoch": 12.60714131886118, "learning_rate": 2.909511086211726e-07, "loss": 2.5838, "step": 292700 }, { "epoch": 12.608002756600767, "learning_rate": 2.909026266456937e-07, "loss": 2.7696, "step": 292720 }, { "epoch": 12.608864194340354, "learning_rate": 2.9085414467021486e-07, "loss": 2.6116, "step": 292740 }, { "epoch": 12.609725632079941, "learning_rate": 2.908056626947359e-07, "loss": 2.7159, "step": 292760 }, { "epoch": 12.610587069819529, "learning_rate": 2.90757180719257e-07, "loss": 2.6302, "step": 292780 }, { "epoch": 12.611448507559116, "learning_rate": 2.9070869874377807e-07, "loss": 2.6525, "step": 292800 }, { "epoch": 12.612309945298703, "learning_rate": 2.9066021676829925e-07, "loss": 2.6844, "step": 292820 }, { "epoch": 12.61317138303829, "learning_rate": 2.906117347928203e-07, "loss": 2.5995, "step": 292840 }, { "epoch": 12.614032820777878, "learning_rate": 2.905632528173415e-07, "loss": 2.6287, "step": 292860 }, { "epoch": 12.614894258517465, "learning_rate": 2.9051477084186246e-07, "loss": 2.6232, "step": 292880 }, { "epoch": 12.615755696257054, "learning_rate": 2.9046628886638364e-07, "loss": 2.7094, "step": 292900 }, { "epoch": 12.616617133996641, "learning_rate": 2.904178068909047e-07, "loss": 2.6561, "step": 292920 }, { "epoch": 12.617478571736228, "learning_rate": 2.903693249154259e-07, "loss": 2.5999, "step": 292940 }, { "epoch": 12.618340009475816, "learning_rate": 2.9032084293994696e-07, "loss": 2.6093, "step": 292960 }, { "epoch": 12.619201447215403, "learning_rate": 2.902723609644682e-07, "loss": 2.5226, "step": 292980 }, { "epoch": 12.62006288495499, "learning_rate": 2.9022387898898916e-07, "loss": 2.5581, "step": 293000 }, { "epoch": 12.620924322694577, "learning_rate": 2.9017539701351033e-07, "loss": 2.5949, "step": 293020 }, { "epoch": 12.621785760434165, "learning_rate": 2.9012691503803135e-07, "loss": 2.4308, "step": 293040 }, { "epoch": 12.622647198173752, "learning_rate": 2.9007843306255253e-07, "loss": 2.709, "step": 293060 }, { "epoch": 12.623508635913339, "learning_rate": 2.900299510870736e-07, "loss": 2.6412, "step": 293080 }, { "epoch": 12.624370073652926, "learning_rate": 2.899814691115948e-07, "loss": 2.6504, "step": 293100 }, { "epoch": 12.625231511392514, "learning_rate": 2.8993298713611585e-07, "loss": 2.6869, "step": 293120 }, { "epoch": 12.6260929491321, "learning_rate": 2.89884505160637e-07, "loss": 2.7405, "step": 293140 }, { "epoch": 12.626954386871688, "learning_rate": 2.89836023185158e-07, "loss": 2.7138, "step": 293160 }, { "epoch": 12.627815824611277, "learning_rate": 2.8978754120967917e-07, "loss": 2.7541, "step": 293180 }, { "epoch": 12.628677262350864, "learning_rate": 2.8973905923420024e-07, "loss": 2.508, "step": 293200 }, { "epoch": 12.629538700090452, "learning_rate": 2.896905772587214e-07, "loss": 2.6663, "step": 293220 }, { "epoch": 12.630400137830039, "learning_rate": 2.896420952832425e-07, "loss": 2.6477, "step": 293240 }, { "epoch": 12.631261575569626, "learning_rate": 2.895936133077635e-07, "loss": 2.7621, "step": 293260 }, { "epoch": 12.632123013309213, "learning_rate": 2.895451313322847e-07, "loss": 2.5569, "step": 293280 }, { "epoch": 12.6329844510488, "learning_rate": 2.8949664935680576e-07, "loss": 2.5897, "step": 293300 }, { "epoch": 12.633845888788388, "learning_rate": 2.894481673813269e-07, "loss": 2.5062, "step": 293320 }, { "epoch": 12.634707326527975, "learning_rate": 2.8939968540584795e-07, "loss": 2.5871, "step": 293340 }, { "epoch": 12.635568764267562, "learning_rate": 2.8935120343036913e-07, "loss": 2.5131, "step": 293360 }, { "epoch": 12.63643020200715, "learning_rate": 2.893027214548902e-07, "loss": 2.4774, "step": 293380 }, { "epoch": 12.637291639746737, "learning_rate": 2.892542394794114e-07, "loss": 2.7408, "step": 293400 }, { "epoch": 12.638153077486324, "learning_rate": 2.8920575750393235e-07, "loss": 2.5466, "step": 293420 }, { "epoch": 12.639014515225911, "learning_rate": 2.891572755284535e-07, "loss": 2.86, "step": 293440 }, { "epoch": 12.6398759529655, "learning_rate": 2.891087935529746e-07, "loss": 2.6317, "step": 293460 }, { "epoch": 12.640737390705088, "learning_rate": 2.8906031157749577e-07, "loss": 2.5677, "step": 293480 }, { "epoch": 12.641598828444675, "learning_rate": 2.8901182960201684e-07, "loss": 2.7134, "step": 293500 }, { "epoch": 12.642460266184262, "learning_rate": 2.8896334762653797e-07, "loss": 2.7171, "step": 293520 }, { "epoch": 12.64332170392385, "learning_rate": 2.88914865651059e-07, "loss": 2.6595, "step": 293540 }, { "epoch": 12.644183141663436, "learning_rate": 2.8886638367558016e-07, "loss": 2.4712, "step": 293560 }, { "epoch": 12.645044579403024, "learning_rate": 2.8881790170010124e-07, "loss": 2.6573, "step": 293580 }, { "epoch": 12.645906017142611, "learning_rate": 2.887694197246224e-07, "loss": 2.7136, "step": 293600 }, { "epoch": 12.646767454882198, "learning_rate": 2.887209377491435e-07, "loss": 2.5816, "step": 293620 }, { "epoch": 12.647628892621785, "learning_rate": 2.886724557736646e-07, "loss": 2.6257, "step": 293640 }, { "epoch": 12.648490330361373, "learning_rate": 2.886239737981857e-07, "loss": 2.5202, "step": 293660 }, { "epoch": 12.64935176810096, "learning_rate": 2.885754918227068e-07, "loss": 2.5755, "step": 293680 }, { "epoch": 12.650213205840547, "learning_rate": 2.885270098472279e-07, "loss": 2.5538, "step": 293700 }, { "epoch": 12.651074643580134, "learning_rate": 2.8847852787174905e-07, "loss": 2.6555, "step": 293720 }, { "epoch": 12.651936081319722, "learning_rate": 2.884300458962701e-07, "loss": 2.5887, "step": 293740 }, { "epoch": 12.65279751905931, "learning_rate": 2.8838156392079125e-07, "loss": 2.656, "step": 293760 }, { "epoch": 12.653658956798898, "learning_rate": 2.883330819453123e-07, "loss": 2.5881, "step": 293780 }, { "epoch": 12.654520394538485, "learning_rate": 2.8828459996983334e-07, "loss": 2.6702, "step": 293800 }, { "epoch": 12.655381832278072, "learning_rate": 2.882361179943545e-07, "loss": 2.6164, "step": 293820 }, { "epoch": 12.65624327001766, "learning_rate": 2.881876360188756e-07, "loss": 2.7417, "step": 293840 }, { "epoch": 12.657104707757247, "learning_rate": 2.8813915404339677e-07, "loss": 2.7723, "step": 293860 }, { "epoch": 12.657966145496834, "learning_rate": 2.8809067206791784e-07, "loss": 2.6107, "step": 293880 }, { "epoch": 12.658827583236421, "learning_rate": 2.8804219009243896e-07, "loss": 2.7768, "step": 293900 }, { "epoch": 12.659689020976009, "learning_rate": 2.8799370811696003e-07, "loss": 2.6019, "step": 293920 }, { "epoch": 12.660550458715596, "learning_rate": 2.8794522614148116e-07, "loss": 2.6657, "step": 293940 }, { "epoch": 12.661411896455183, "learning_rate": 2.8789674416600223e-07, "loss": 2.5644, "step": 293960 }, { "epoch": 12.66227333419477, "learning_rate": 2.878482621905234e-07, "loss": 2.6796, "step": 293980 }, { "epoch": 12.663134771934358, "learning_rate": 2.877997802150445e-07, "loss": 2.614, "step": 294000 }, { "epoch": 12.663996209673947, "learning_rate": 2.877512982395656e-07, "loss": 2.5702, "step": 294020 }, { "epoch": 12.664857647413534, "learning_rate": 2.877028162640867e-07, "loss": 2.5226, "step": 294040 }, { "epoch": 12.665719085153121, "learning_rate": 2.876543342886078e-07, "loss": 2.5579, "step": 294060 }, { "epoch": 12.666580522892708, "learning_rate": 2.8760585231312887e-07, "loss": 2.5395, "step": 294080 }, { "epoch": 12.667441960632296, "learning_rate": 2.8755737033765005e-07, "loss": 2.7392, "step": 294100 }, { "epoch": 12.668303398371883, "learning_rate": 2.875088883621711e-07, "loss": 2.6625, "step": 294120 }, { "epoch": 12.66916483611147, "learning_rate": 2.8746040638669224e-07, "loss": 2.6292, "step": 294140 }, { "epoch": 12.670026273851057, "learning_rate": 2.874119244112133e-07, "loss": 2.8294, "step": 294160 }, { "epoch": 12.670887711590645, "learning_rate": 2.8736344243573444e-07, "loss": 2.5904, "step": 294180 }, { "epoch": 12.671749149330232, "learning_rate": 2.873149604602555e-07, "loss": 2.5297, "step": 294200 }, { "epoch": 12.67261058706982, "learning_rate": 2.872664784847767e-07, "loss": 2.6002, "step": 294220 }, { "epoch": 12.673472024809406, "learning_rate": 2.872179965092977e-07, "loss": 2.6644, "step": 294240 }, { "epoch": 12.674333462548994, "learning_rate": 2.8716951453381894e-07, "loss": 2.5754, "step": 294260 }, { "epoch": 12.67519490028858, "learning_rate": 2.8712103255833996e-07, "loss": 2.6956, "step": 294280 }, { "epoch": 12.676056338028168, "learning_rate": 2.870725505828611e-07, "loss": 2.6064, "step": 294300 }, { "epoch": 12.676917775767757, "learning_rate": 2.8702406860738215e-07, "loss": 2.6565, "step": 294320 }, { "epoch": 12.677779213507344, "learning_rate": 2.869755866319032e-07, "loss": 2.8212, "step": 294340 }, { "epoch": 12.678640651246932, "learning_rate": 2.869271046564244e-07, "loss": 2.5368, "step": 294360 }, { "epoch": 12.679502088986519, "learning_rate": 2.8687862268094547e-07, "loss": 2.6298, "step": 294380 }, { "epoch": 12.680363526726106, "learning_rate": 2.868301407054666e-07, "loss": 2.6617, "step": 294400 }, { "epoch": 12.681224964465693, "learning_rate": 2.8678165872998767e-07, "loss": 2.7764, "step": 294420 }, { "epoch": 12.68208640220528, "learning_rate": 2.867331767545088e-07, "loss": 2.529, "step": 294440 }, { "epoch": 12.682947839944868, "learning_rate": 2.8668469477902986e-07, "loss": 2.7669, "step": 294460 }, { "epoch": 12.683809277684455, "learning_rate": 2.8663621280355104e-07, "loss": 2.6813, "step": 294480 }, { "epoch": 12.684670715424042, "learning_rate": 2.865877308280721e-07, "loss": 2.4994, "step": 294500 }, { "epoch": 12.68553215316363, "learning_rate": 2.865392488525933e-07, "loss": 2.6074, "step": 294520 }, { "epoch": 12.686393590903217, "learning_rate": 2.864907668771143e-07, "loss": 2.6493, "step": 294540 }, { "epoch": 12.687255028642804, "learning_rate": 2.8644228490163543e-07, "loss": 2.5249, "step": 294560 }, { "epoch": 12.688116466382393, "learning_rate": 2.863938029261565e-07, "loss": 2.5256, "step": 294580 }, { "epoch": 12.68897790412198, "learning_rate": 2.863453209506777e-07, "loss": 2.5343, "step": 294600 }, { "epoch": 12.689839341861568, "learning_rate": 2.8629683897519875e-07, "loss": 2.6197, "step": 294620 }, { "epoch": 12.690700779601155, "learning_rate": 2.8624835699971993e-07, "loss": 2.6576, "step": 294640 }, { "epoch": 12.691562217340742, "learning_rate": 2.861998750242409e-07, "loss": 2.7261, "step": 294660 }, { "epoch": 12.69242365508033, "learning_rate": 2.861513930487621e-07, "loss": 2.703, "step": 294680 }, { "epoch": 12.693285092819917, "learning_rate": 2.8610291107328315e-07, "loss": 2.5973, "step": 294700 }, { "epoch": 12.694146530559504, "learning_rate": 2.860544290978043e-07, "loss": 2.4556, "step": 294720 }, { "epoch": 12.695007968299091, "learning_rate": 2.860059471223254e-07, "loss": 2.5586, "step": 294740 }, { "epoch": 12.695869406038678, "learning_rate": 2.859574651468466e-07, "loss": 2.5931, "step": 294760 }, { "epoch": 12.696730843778266, "learning_rate": 2.859089831713676e-07, "loss": 2.6477, "step": 294780 }, { "epoch": 12.697592281517853, "learning_rate": 2.8586050119588877e-07, "loss": 2.569, "step": 294800 }, { "epoch": 12.69845371925744, "learning_rate": 2.858120192204098e-07, "loss": 2.6582, "step": 294820 }, { "epoch": 12.699315156997027, "learning_rate": 2.8576353724493096e-07, "loss": 2.5656, "step": 294840 }, { "epoch": 12.700176594736615, "learning_rate": 2.8571505526945204e-07, "loss": 2.3716, "step": 294860 }, { "epoch": 12.701038032476204, "learning_rate": 2.856665732939731e-07, "loss": 2.7706, "step": 294880 }, { "epoch": 12.70189947021579, "learning_rate": 2.856180913184943e-07, "loss": 2.5786, "step": 294900 }, { "epoch": 12.702760907955378, "learning_rate": 2.855696093430153e-07, "loss": 2.7228, "step": 294920 }, { "epoch": 12.703622345694965, "learning_rate": 2.8552112736753643e-07, "loss": 2.5741, "step": 294940 }, { "epoch": 12.704483783434553, "learning_rate": 2.854726453920575e-07, "loss": 2.6002, "step": 294960 }, { "epoch": 12.70534522117414, "learning_rate": 2.854241634165787e-07, "loss": 2.6618, "step": 294980 }, { "epoch": 12.706206658913727, "learning_rate": 2.8537568144109975e-07, "loss": 2.7533, "step": 295000 }, { "epoch": 12.707068096653314, "learning_rate": 2.853271994656209e-07, "loss": 2.5936, "step": 295020 }, { "epoch": 12.707929534392902, "learning_rate": 2.8527871749014194e-07, "loss": 2.7275, "step": 295040 }, { "epoch": 12.708790972132489, "learning_rate": 2.852302355146631e-07, "loss": 2.5928, "step": 295060 }, { "epoch": 12.709652409872076, "learning_rate": 2.851817535391842e-07, "loss": 2.5949, "step": 295080 }, { "epoch": 12.710513847611663, "learning_rate": 2.851332715637053e-07, "loss": 2.6181, "step": 295100 }, { "epoch": 12.71137528535125, "learning_rate": 2.850847895882264e-07, "loss": 2.6434, "step": 295120 }, { "epoch": 12.71223672309084, "learning_rate": 2.8503630761274757e-07, "loss": 2.6966, "step": 295140 }, { "epoch": 12.713098160830427, "learning_rate": 2.8498782563726864e-07, "loss": 2.4387, "step": 295160 }, { "epoch": 12.713959598570014, "learning_rate": 2.849393436617898e-07, "loss": 2.5376, "step": 295180 }, { "epoch": 12.714821036309601, "learning_rate": 2.848908616863108e-07, "loss": 2.7144, "step": 295200 }, { "epoch": 12.715682474049188, "learning_rate": 2.8484237971083196e-07, "loss": 2.6114, "step": 295220 }, { "epoch": 12.716543911788776, "learning_rate": 2.8479389773535303e-07, "loss": 2.7665, "step": 295240 }, { "epoch": 12.717405349528363, "learning_rate": 2.847454157598742e-07, "loss": 2.6, "step": 295260 }, { "epoch": 12.71826678726795, "learning_rate": 2.846969337843953e-07, "loss": 2.5887, "step": 295280 }, { "epoch": 12.719128225007537, "learning_rate": 2.846484518089164e-07, "loss": 2.5904, "step": 295300 }, { "epoch": 12.719989662747125, "learning_rate": 2.845999698334374e-07, "loss": 2.7059, "step": 295320 }, { "epoch": 12.720851100486712, "learning_rate": 2.845514878579586e-07, "loss": 2.6135, "step": 295340 }, { "epoch": 12.7217125382263, "learning_rate": 2.8450300588247967e-07, "loss": 2.5256, "step": 295360 }, { "epoch": 12.722573975965886, "learning_rate": 2.8445452390700085e-07, "loss": 2.5843, "step": 295380 }, { "epoch": 12.723435413705474, "learning_rate": 2.844060419315219e-07, "loss": 2.7224, "step": 295400 }, { "epoch": 12.724296851445061, "learning_rate": 2.8435755995604294e-07, "loss": 2.6084, "step": 295420 }, { "epoch": 12.72515828918465, "learning_rate": 2.843090779805641e-07, "loss": 2.5841, "step": 295440 }, { "epoch": 12.726019726924237, "learning_rate": 2.8426059600508513e-07, "loss": 2.6595, "step": 295460 }, { "epoch": 12.726881164663824, "learning_rate": 2.842121140296063e-07, "loss": 2.6791, "step": 295480 }, { "epoch": 12.727742602403412, "learning_rate": 2.841636320541274e-07, "loss": 2.57, "step": 295500 }, { "epoch": 12.728604040142999, "learning_rate": 2.8411515007864856e-07, "loss": 2.6894, "step": 295520 }, { "epoch": 12.729465477882586, "learning_rate": 2.8406666810316963e-07, "loss": 2.5993, "step": 295540 }, { "epoch": 12.730326915622173, "learning_rate": 2.8401818612769076e-07, "loss": 2.7104, "step": 295560 }, { "epoch": 12.73118835336176, "learning_rate": 2.839697041522118e-07, "loss": 2.6457, "step": 295580 }, { "epoch": 12.732049791101348, "learning_rate": 2.8392122217673295e-07, "loss": 2.5959, "step": 295600 }, { "epoch": 12.732911228840935, "learning_rate": 2.83872740201254e-07, "loss": 2.6245, "step": 295620 }, { "epoch": 12.733772666580522, "learning_rate": 2.838242582257752e-07, "loss": 2.6517, "step": 295640 }, { "epoch": 12.73463410432011, "learning_rate": 2.8377577625029627e-07, "loss": 2.6414, "step": 295660 }, { "epoch": 12.735495542059697, "learning_rate": 2.837272942748174e-07, "loss": 2.5139, "step": 295680 }, { "epoch": 12.736356979799286, "learning_rate": 2.8367881229933847e-07, "loss": 2.5954, "step": 295700 }, { "epoch": 12.737218417538873, "learning_rate": 2.836303303238596e-07, "loss": 2.6218, "step": 295720 }, { "epoch": 12.73807985527846, "learning_rate": 2.8358184834838066e-07, "loss": 2.7151, "step": 295740 }, { "epoch": 12.738941293018048, "learning_rate": 2.8353336637290184e-07, "loss": 2.6355, "step": 295760 }, { "epoch": 12.739802730757635, "learning_rate": 2.834848843974229e-07, "loss": 2.7098, "step": 295780 }, { "epoch": 12.740664168497222, "learning_rate": 2.8343640242194404e-07, "loss": 2.5331, "step": 295800 }, { "epoch": 12.74152560623681, "learning_rate": 2.833879204464651e-07, "loss": 2.7369, "step": 295820 }, { "epoch": 12.742387043976397, "learning_rate": 2.8333943847098623e-07, "loss": 2.6565, "step": 295840 }, { "epoch": 12.743248481715984, "learning_rate": 2.832909564955073e-07, "loss": 2.664, "step": 295860 }, { "epoch": 12.744109919455571, "learning_rate": 2.832424745200285e-07, "loss": 2.7261, "step": 295880 }, { "epoch": 12.744971357195158, "learning_rate": 2.8319399254454955e-07, "loss": 2.7797, "step": 295900 }, { "epoch": 12.745832794934746, "learning_rate": 2.831455105690707e-07, "loss": 2.5473, "step": 295920 }, { "epoch": 12.746694232674333, "learning_rate": 2.8309702859359175e-07, "loss": 2.5462, "step": 295940 }, { "epoch": 12.74755567041392, "learning_rate": 2.8304854661811277e-07, "loss": 2.6234, "step": 295960 }, { "epoch": 12.748417108153507, "learning_rate": 2.8300006464263394e-07, "loss": 2.5749, "step": 295980 }, { "epoch": 12.749278545893096, "learning_rate": 2.82951582667155e-07, "loss": 2.5439, "step": 296000 }, { "epoch": 12.750139983632684, "learning_rate": 2.8290310069167614e-07, "loss": 2.4526, "step": 296020 }, { "epoch": 12.75100142137227, "learning_rate": 2.8285461871619726e-07, "loss": 2.786, "step": 296040 }, { "epoch": 12.751862859111858, "learning_rate": 2.828061367407184e-07, "loss": 2.7407, "step": 296060 }, { "epoch": 12.752724296851445, "learning_rate": 2.8275765476523946e-07, "loss": 2.5435, "step": 296080 }, { "epoch": 12.753585734591033, "learning_rate": 2.827091727897606e-07, "loss": 2.6374, "step": 296100 }, { "epoch": 12.75444717233062, "learning_rate": 2.8266069081428166e-07, "loss": 2.5454, "step": 296120 }, { "epoch": 12.755308610070207, "learning_rate": 2.8261220883880283e-07, "loss": 2.5536, "step": 296140 }, { "epoch": 12.756170047809794, "learning_rate": 2.825637268633239e-07, "loss": 2.7044, "step": 296160 }, { "epoch": 12.757031485549382, "learning_rate": 2.8251524488784503e-07, "loss": 2.6375, "step": 296180 }, { "epoch": 12.757892923288969, "learning_rate": 2.824667629123661e-07, "loss": 2.5967, "step": 296200 }, { "epoch": 12.758754361028556, "learning_rate": 2.824182809368872e-07, "loss": 2.6012, "step": 296220 }, { "epoch": 12.759615798768143, "learning_rate": 2.823697989614083e-07, "loss": 2.5654, "step": 296240 }, { "epoch": 12.76047723650773, "learning_rate": 2.823213169859295e-07, "loss": 2.6531, "step": 296260 }, { "epoch": 12.76133867424732, "learning_rate": 2.8227283501045055e-07, "loss": 2.5965, "step": 296280 }, { "epoch": 12.762200111986907, "learning_rate": 2.822243530349717e-07, "loss": 2.5703, "step": 296300 }, { "epoch": 12.763061549726494, "learning_rate": 2.8217587105949274e-07, "loss": 2.661, "step": 296320 }, { "epoch": 12.763922987466081, "learning_rate": 2.8212738908401387e-07, "loss": 2.7292, "step": 296340 }, { "epoch": 12.764784425205669, "learning_rate": 2.8207890710853494e-07, "loss": 2.7687, "step": 296360 }, { "epoch": 12.765645862945256, "learning_rate": 2.820304251330561e-07, "loss": 2.5975, "step": 296380 }, { "epoch": 12.766507300684843, "learning_rate": 2.819819431575772e-07, "loss": 2.6269, "step": 296400 }, { "epoch": 12.76736873842443, "learning_rate": 2.8193346118209836e-07, "loss": 2.5782, "step": 296420 }, { "epoch": 12.768230176164018, "learning_rate": 2.8188497920661933e-07, "loss": 2.6674, "step": 296440 }, { "epoch": 12.769091613903605, "learning_rate": 2.818364972311405e-07, "loss": 2.5672, "step": 296460 }, { "epoch": 12.769953051643192, "learning_rate": 2.817880152556616e-07, "loss": 2.5856, "step": 296480 }, { "epoch": 12.77081448938278, "learning_rate": 2.8173953328018265e-07, "loss": 2.623, "step": 296500 }, { "epoch": 12.771675927122367, "learning_rate": 2.8169105130470383e-07, "loss": 2.6967, "step": 296520 }, { "epoch": 12.772537364861954, "learning_rate": 2.816425693292249e-07, "loss": 2.7119, "step": 296540 }, { "epoch": 12.773398802601541, "learning_rate": 2.81594087353746e-07, "loss": 2.5336, "step": 296560 }, { "epoch": 12.77426024034113, "learning_rate": 2.815456053782671e-07, "loss": 2.6224, "step": 296580 }, { "epoch": 12.775121678080717, "learning_rate": 2.814971234027882e-07, "loss": 2.7981, "step": 296600 }, { "epoch": 12.775983115820305, "learning_rate": 2.814486414273093e-07, "loss": 2.9213, "step": 296620 }, { "epoch": 12.776844553559892, "learning_rate": 2.8140015945183047e-07, "loss": 2.552, "step": 296640 }, { "epoch": 12.777705991299479, "learning_rate": 2.8135167747635154e-07, "loss": 2.5508, "step": 296660 }, { "epoch": 12.778567429039066, "learning_rate": 2.813031955008727e-07, "loss": 2.798, "step": 296680 }, { "epoch": 12.779428866778654, "learning_rate": 2.8125471352539374e-07, "loss": 2.6025, "step": 296700 }, { "epoch": 12.78029030451824, "learning_rate": 2.8120623154991486e-07, "loss": 2.538, "step": 296720 }, { "epoch": 12.781151742257828, "learning_rate": 2.8115774957443593e-07, "loss": 2.5814, "step": 296740 }, { "epoch": 12.782013179997415, "learning_rate": 2.811092675989571e-07, "loss": 2.7343, "step": 296760 }, { "epoch": 12.782874617737003, "learning_rate": 2.810607856234782e-07, "loss": 2.6081, "step": 296780 }, { "epoch": 12.78373605547659, "learning_rate": 2.8101230364799936e-07, "loss": 2.6453, "step": 296800 }, { "epoch": 12.784597493216177, "learning_rate": 2.809638216725204e-07, "loss": 2.6074, "step": 296820 }, { "epoch": 12.785458930955766, "learning_rate": 2.8091533969704155e-07, "loss": 2.5665, "step": 296840 }, { "epoch": 12.786320368695353, "learning_rate": 2.808668577215626e-07, "loss": 2.6258, "step": 296860 }, { "epoch": 12.78718180643494, "learning_rate": 2.8081837574608375e-07, "loss": 2.6744, "step": 296880 }, { "epoch": 12.788043244174528, "learning_rate": 2.807698937706048e-07, "loss": 2.5914, "step": 296900 }, { "epoch": 12.788904681914115, "learning_rate": 2.80721411795126e-07, "loss": 2.5855, "step": 296920 }, { "epoch": 12.789766119653702, "learning_rate": 2.8067292981964707e-07, "loss": 2.6165, "step": 296940 }, { "epoch": 12.79062755739329, "learning_rate": 2.8062444784416825e-07, "loss": 2.6862, "step": 296960 }, { "epoch": 12.791488995132877, "learning_rate": 2.805759658686892e-07, "loss": 2.6403, "step": 296980 }, { "epoch": 12.792350432872464, "learning_rate": 2.805274838932104e-07, "loss": 2.4418, "step": 297000 }, { "epoch": 12.793211870612051, "learning_rate": 2.8047900191773146e-07, "loss": 2.4945, "step": 297020 }, { "epoch": 12.794073308351638, "learning_rate": 2.8043051994225253e-07, "loss": 2.7498, "step": 297040 }, { "epoch": 12.794934746091226, "learning_rate": 2.803820379667737e-07, "loss": 2.6761, "step": 297060 }, { "epoch": 12.795796183830813, "learning_rate": 2.8033355599129473e-07, "loss": 2.7213, "step": 297080 }, { "epoch": 12.7966576215704, "learning_rate": 2.8028507401581585e-07, "loss": 2.6271, "step": 297100 }, { "epoch": 12.797519059309987, "learning_rate": 2.802365920403369e-07, "loss": 2.5896, "step": 297120 }, { "epoch": 12.798380497049576, "learning_rate": 2.801881100648581e-07, "loss": 2.7081, "step": 297140 }, { "epoch": 12.799241934789164, "learning_rate": 2.801396280893792e-07, "loss": 2.678, "step": 297160 }, { "epoch": 12.800103372528751, "learning_rate": 2.8009114611390035e-07, "loss": 2.7735, "step": 297180 }, { "epoch": 12.800964810268338, "learning_rate": 2.8004266413842137e-07, "loss": 2.7146, "step": 297200 }, { "epoch": 12.801826248007925, "learning_rate": 2.7999418216294255e-07, "loss": 2.4159, "step": 297220 }, { "epoch": 12.802687685747513, "learning_rate": 2.7994570018746357e-07, "loss": 2.52, "step": 297240 }, { "epoch": 12.8035491234871, "learning_rate": 2.7989721821198474e-07, "loss": 2.5229, "step": 297260 }, { "epoch": 12.804410561226687, "learning_rate": 2.798487362365058e-07, "loss": 2.5813, "step": 297280 }, { "epoch": 12.805271998966274, "learning_rate": 2.79800254261027e-07, "loss": 2.5456, "step": 297300 }, { "epoch": 12.806133436705862, "learning_rate": 2.7975177228554806e-07, "loss": 2.5308, "step": 297320 }, { "epoch": 12.806994874445449, "learning_rate": 2.797032903100692e-07, "loss": 2.4351, "step": 297340 }, { "epoch": 12.807856312185036, "learning_rate": 2.796548083345902e-07, "loss": 2.4958, "step": 297360 }, { "epoch": 12.808717749924623, "learning_rate": 2.796063263591114e-07, "loss": 2.6359, "step": 297380 }, { "epoch": 12.809579187664212, "learning_rate": 2.7955784438363246e-07, "loss": 2.5897, "step": 297400 }, { "epoch": 12.8104406254038, "learning_rate": 2.7950936240815363e-07, "loss": 2.727, "step": 297420 }, { "epoch": 12.811302063143387, "learning_rate": 2.794608804326747e-07, "loss": 2.5589, "step": 297440 }, { "epoch": 12.812163500882974, "learning_rate": 2.7941239845719583e-07, "loss": 2.7046, "step": 297460 }, { "epoch": 12.813024938622561, "learning_rate": 2.793639164817169e-07, "loss": 2.6463, "step": 297480 }, { "epoch": 12.813886376362149, "learning_rate": 2.79315434506238e-07, "loss": 2.716, "step": 297500 }, { "epoch": 12.814747814101736, "learning_rate": 2.792669525307591e-07, "loss": 2.5513, "step": 297520 }, { "epoch": 12.815609251841323, "learning_rate": 2.792184705552803e-07, "loss": 2.6723, "step": 297540 }, { "epoch": 12.81647068958091, "learning_rate": 2.7916998857980135e-07, "loss": 2.5926, "step": 297560 }, { "epoch": 12.817332127320498, "learning_rate": 2.7912150660432236e-07, "loss": 2.6568, "step": 297580 }, { "epoch": 12.818193565060085, "learning_rate": 2.7907302462884354e-07, "loss": 2.811, "step": 297600 }, { "epoch": 12.819055002799672, "learning_rate": 2.7902454265336456e-07, "loss": 2.4933, "step": 297620 }, { "epoch": 12.81991644053926, "learning_rate": 2.7897606067788574e-07, "loss": 2.6083, "step": 297640 }, { "epoch": 12.820777878278847, "learning_rate": 2.789275787024068e-07, "loss": 2.5614, "step": 297660 }, { "epoch": 12.821639316018434, "learning_rate": 2.78879096726928e-07, "loss": 2.5322, "step": 297680 }, { "epoch": 12.822500753758023, "learning_rate": 2.7883061475144906e-07, "loss": 2.5783, "step": 297700 }, { "epoch": 12.82336219149761, "learning_rate": 2.787821327759702e-07, "loss": 2.5832, "step": 297720 }, { "epoch": 12.824223629237197, "learning_rate": 2.787336508004912e-07, "loss": 2.6054, "step": 297740 }, { "epoch": 12.825085066976785, "learning_rate": 2.786851688250124e-07, "loss": 2.5139, "step": 297760 }, { "epoch": 12.825946504716372, "learning_rate": 2.7863668684953345e-07, "loss": 2.5547, "step": 297780 }, { "epoch": 12.82680794245596, "learning_rate": 2.785882048740546e-07, "loss": 2.5324, "step": 297800 }, { "epoch": 12.827669380195546, "learning_rate": 2.785397228985757e-07, "loss": 2.61, "step": 297820 }, { "epoch": 12.828530817935134, "learning_rate": 2.784912409230968e-07, "loss": 2.6532, "step": 297840 }, { "epoch": 12.82939225567472, "learning_rate": 2.784427589476179e-07, "loss": 2.6585, "step": 297860 }, { "epoch": 12.830253693414308, "learning_rate": 2.78394276972139e-07, "loss": 2.5425, "step": 297880 }, { "epoch": 12.831115131153895, "learning_rate": 2.783457949966601e-07, "loss": 2.6938, "step": 297900 }, { "epoch": 12.831976568893483, "learning_rate": 2.7829731302118127e-07, "loss": 2.7733, "step": 297920 }, { "epoch": 12.83283800663307, "learning_rate": 2.7824883104570234e-07, "loss": 2.5951, "step": 297940 }, { "epoch": 12.833699444372659, "learning_rate": 2.7820034907022346e-07, "loss": 2.7898, "step": 297960 }, { "epoch": 12.834560882112246, "learning_rate": 2.7815186709474453e-07, "loss": 2.7569, "step": 297980 }, { "epoch": 12.835422319851833, "learning_rate": 2.7810338511926566e-07, "loss": 2.6097, "step": 298000 }, { "epoch": 12.83628375759142, "learning_rate": 2.7805490314378673e-07, "loss": 2.6816, "step": 298020 }, { "epoch": 12.837145195331008, "learning_rate": 2.780064211683079e-07, "loss": 2.6504, "step": 298040 }, { "epoch": 12.838006633070595, "learning_rate": 2.77957939192829e-07, "loss": 2.6471, "step": 298060 }, { "epoch": 12.838868070810182, "learning_rate": 2.7790945721735016e-07, "loss": 2.5633, "step": 298080 }, { "epoch": 12.83972950854977, "learning_rate": 2.778609752418712e-07, "loss": 2.9068, "step": 298100 }, { "epoch": 12.840590946289357, "learning_rate": 2.7781249326639214e-07, "loss": 2.8216, "step": 298120 }, { "epoch": 12.841452384028944, "learning_rate": 2.7776401129091337e-07, "loss": 2.6191, "step": 298140 }, { "epoch": 12.842313821768531, "learning_rate": 2.7771552931543444e-07, "loss": 2.7182, "step": 298160 }, { "epoch": 12.843175259508119, "learning_rate": 2.776670473399556e-07, "loss": 2.7419, "step": 298180 }, { "epoch": 12.844036697247706, "learning_rate": 2.776185653644767e-07, "loss": 2.6258, "step": 298200 }, { "epoch": 12.844898134987293, "learning_rate": 2.7757008338899776e-07, "loss": 2.5853, "step": 298220 }, { "epoch": 12.84575957272688, "learning_rate": 2.775216014135189e-07, "loss": 2.5797, "step": 298240 }, { "epoch": 12.84662101046647, "learning_rate": 2.7747311943804e-07, "loss": 2.5357, "step": 298260 }, { "epoch": 12.847482448206057, "learning_rate": 2.774246374625611e-07, "loss": 2.5866, "step": 298280 }, { "epoch": 12.848343885945644, "learning_rate": 2.7737615548708226e-07, "loss": 2.5728, "step": 298300 }, { "epoch": 12.849205323685231, "learning_rate": 2.7732767351160333e-07, "loss": 2.5743, "step": 298320 }, { "epoch": 12.850066761424818, "learning_rate": 2.7727919153612446e-07, "loss": 2.5963, "step": 298340 }, { "epoch": 12.850928199164406, "learning_rate": 2.7723070956064553e-07, "loss": 2.5451, "step": 298360 }, { "epoch": 12.851789636903993, "learning_rate": 2.7718222758516665e-07, "loss": 2.6661, "step": 298380 }, { "epoch": 12.85265107464358, "learning_rate": 2.771337456096877e-07, "loss": 2.6282, "step": 298400 }, { "epoch": 12.853512512383167, "learning_rate": 2.770852636342089e-07, "loss": 2.5869, "step": 298420 }, { "epoch": 12.854373950122755, "learning_rate": 2.7703678165872997e-07, "loss": 2.6933, "step": 298440 }, { "epoch": 12.855235387862342, "learning_rate": 2.7698829968325115e-07, "loss": 2.6399, "step": 298460 }, { "epoch": 12.856096825601929, "learning_rate": 2.7693981770777217e-07, "loss": 2.5716, "step": 298480 }, { "epoch": 12.856958263341516, "learning_rate": 2.768913357322933e-07, "loss": 2.5783, "step": 298500 }, { "epoch": 12.857819701081105, "learning_rate": 2.7684285375681437e-07, "loss": 2.624, "step": 298520 }, { "epoch": 12.858681138820693, "learning_rate": 2.7679437178133554e-07, "loss": 2.5406, "step": 298540 }, { "epoch": 12.85954257656028, "learning_rate": 2.767458898058566e-07, "loss": 2.7869, "step": 298560 }, { "epoch": 12.860404014299867, "learning_rate": 2.766974078303778e-07, "loss": 2.5571, "step": 298580 }, { "epoch": 12.861265452039454, "learning_rate": 2.766489258548988e-07, "loss": 2.6057, "step": 298600 }, { "epoch": 12.862126889779042, "learning_rate": 2.7660044387942e-07, "loss": 2.5953, "step": 298620 }, { "epoch": 12.862988327518629, "learning_rate": 2.7655196190394106e-07, "loss": 2.7274, "step": 298640 }, { "epoch": 12.863849765258216, "learning_rate": 2.765034799284621e-07, "loss": 2.7594, "step": 298660 }, { "epoch": 12.864711202997803, "learning_rate": 2.7645499795298325e-07, "loss": 2.6732, "step": 298680 }, { "epoch": 12.86557264073739, "learning_rate": 2.764065159775043e-07, "loss": 2.7139, "step": 298700 }, { "epoch": 12.866434078476978, "learning_rate": 2.763580340020255e-07, "loss": 2.6373, "step": 298720 }, { "epoch": 12.867295516216565, "learning_rate": 2.763095520265465e-07, "loss": 2.5931, "step": 298740 }, { "epoch": 12.868156953956152, "learning_rate": 2.7626107005106765e-07, "loss": 2.7149, "step": 298760 }, { "epoch": 12.86901839169574, "learning_rate": 2.762125880755887e-07, "loss": 2.6343, "step": 298780 }, { "epoch": 12.869879829435327, "learning_rate": 2.761641061001099e-07, "loss": 2.6087, "step": 298800 }, { "epoch": 12.870741267174916, "learning_rate": 2.7611562412463097e-07, "loss": 2.6823, "step": 298820 }, { "epoch": 12.871602704914503, "learning_rate": 2.7606714214915214e-07, "loss": 2.6577, "step": 298840 }, { "epoch": 12.87246414265409, "learning_rate": 2.7601866017367316e-07, "loss": 2.7138, "step": 298860 }, { "epoch": 12.873325580393677, "learning_rate": 2.759701781981943e-07, "loss": 2.6444, "step": 298880 }, { "epoch": 12.874187018133265, "learning_rate": 2.7592169622271536e-07, "loss": 2.633, "step": 298900 }, { "epoch": 12.875048455872852, "learning_rate": 2.7587321424723654e-07, "loss": 2.5574, "step": 298920 }, { "epoch": 12.87590989361244, "learning_rate": 2.758247322717576e-07, "loss": 2.4667, "step": 298940 }, { "epoch": 12.876771331352026, "learning_rate": 2.757762502962788e-07, "loss": 2.6237, "step": 298960 }, { "epoch": 12.877632769091614, "learning_rate": 2.757277683207998e-07, "loss": 2.7041, "step": 298980 }, { "epoch": 12.878494206831201, "learning_rate": 2.75679286345321e-07, "loss": 2.6107, "step": 299000 }, { "epoch": 12.879355644570788, "learning_rate": 2.75630804369842e-07, "loss": 2.7385, "step": 299020 }, { "epoch": 12.880217082310375, "learning_rate": 2.755823223943632e-07, "loss": 2.6392, "step": 299040 }, { "epoch": 12.881078520049963, "learning_rate": 2.7553384041888425e-07, "loss": 2.6229, "step": 299060 }, { "epoch": 12.881939957789552, "learning_rate": 2.754853584434054e-07, "loss": 2.7297, "step": 299080 }, { "epoch": 12.882801395529139, "learning_rate": 2.754368764679265e-07, "loss": 2.6914, "step": 299100 }, { "epoch": 12.883662833268726, "learning_rate": 2.753883944924476e-07, "loss": 2.5897, "step": 299120 }, { "epoch": 12.884524271008313, "learning_rate": 2.7533991251696864e-07, "loss": 2.6271, "step": 299140 }, { "epoch": 12.8853857087479, "learning_rate": 2.7529143054148987e-07, "loss": 2.5833, "step": 299160 }, { "epoch": 12.886247146487488, "learning_rate": 2.752429485660109e-07, "loss": 2.6166, "step": 299180 }, { "epoch": 12.887108584227075, "learning_rate": 2.7519446659053196e-07, "loss": 2.5666, "step": 299200 }, { "epoch": 12.887970021966662, "learning_rate": 2.7514598461505314e-07, "loss": 2.6638, "step": 299220 }, { "epoch": 12.88883145970625, "learning_rate": 2.7509750263957416e-07, "loss": 2.6017, "step": 299240 }, { "epoch": 12.889692897445837, "learning_rate": 2.7504902066409533e-07, "loss": 2.6313, "step": 299260 }, { "epoch": 12.890554335185424, "learning_rate": 2.7500053868861635e-07, "loss": 2.6531, "step": 299280 }, { "epoch": 12.891415772925011, "learning_rate": 2.7495205671313753e-07, "loss": 2.7054, "step": 299300 }, { "epoch": 12.892277210664599, "learning_rate": 2.7490357473765865e-07, "loss": 2.5938, "step": 299320 }, { "epoch": 12.893138648404186, "learning_rate": 2.748550927621798e-07, "loss": 2.5624, "step": 299340 }, { "epoch": 12.894000086143773, "learning_rate": 2.748066107867008e-07, "loss": 2.7195, "step": 299360 }, { "epoch": 12.894861523883362, "learning_rate": 2.74758128811222e-07, "loss": 2.5893, "step": 299380 }, { "epoch": 12.89572296162295, "learning_rate": 2.74709646835743e-07, "loss": 2.6357, "step": 299400 }, { "epoch": 12.896584399362537, "learning_rate": 2.7466116486026417e-07, "loss": 2.6014, "step": 299420 }, { "epoch": 12.897445837102124, "learning_rate": 2.7461268288478524e-07, "loss": 2.6344, "step": 299440 }, { "epoch": 12.898307274841711, "learning_rate": 2.745642009093064e-07, "loss": 2.6459, "step": 299460 }, { "epoch": 12.899168712581298, "learning_rate": 2.745157189338275e-07, "loss": 2.6501, "step": 299480 }, { "epoch": 12.900030150320886, "learning_rate": 2.744672369583486e-07, "loss": 2.5032, "step": 299500 }, { "epoch": 12.900891588060473, "learning_rate": 2.7441875498286963e-07, "loss": 2.8034, "step": 299520 }, { "epoch": 12.90175302580006, "learning_rate": 2.743702730073908e-07, "loss": 2.5474, "step": 299540 }, { "epoch": 12.902614463539647, "learning_rate": 2.743217910319119e-07, "loss": 2.4991, "step": 299560 }, { "epoch": 12.903475901279235, "learning_rate": 2.74273309056433e-07, "loss": 2.6761, "step": 299580 }, { "epoch": 12.904337339018822, "learning_rate": 2.7422482708095413e-07, "loss": 2.5863, "step": 299600 }, { "epoch": 12.905198776758409, "learning_rate": 2.7417634510547526e-07, "loss": 2.6597, "step": 299620 }, { "epoch": 12.906060214497996, "learning_rate": 2.7412786312999633e-07, "loss": 2.6865, "step": 299640 }, { "epoch": 12.906921652237585, "learning_rate": 2.7407938115451745e-07, "loss": 2.6143, "step": 299660 }, { "epoch": 12.907783089977173, "learning_rate": 2.740308991790385e-07, "loss": 2.6175, "step": 299680 }, { "epoch": 12.90864452771676, "learning_rate": 2.739824172035597e-07, "loss": 2.6008, "step": 299700 }, { "epoch": 12.909505965456347, "learning_rate": 2.7393393522808077e-07, "loss": 2.6534, "step": 299720 }, { "epoch": 12.910367403195934, "learning_rate": 2.738854532526018e-07, "loss": 2.4686, "step": 299740 }, { "epoch": 12.911228840935522, "learning_rate": 2.7383697127712297e-07, "loss": 2.5069, "step": 299760 }, { "epoch": 12.912090278675109, "learning_rate": 2.73788489301644e-07, "loss": 2.5873, "step": 299780 }, { "epoch": 12.912951716414696, "learning_rate": 2.7374000732616516e-07, "loss": 2.6245, "step": 299800 }, { "epoch": 12.913813154154283, "learning_rate": 2.7369152535068624e-07, "loss": 2.7032, "step": 299820 }, { "epoch": 12.91467459189387, "learning_rate": 2.736430433752074e-07, "loss": 2.6818, "step": 299840 }, { "epoch": 12.915536029633458, "learning_rate": 2.735945613997285e-07, "loss": 2.5451, "step": 299860 }, { "epoch": 12.916397467373045, "learning_rate": 2.735460794242496e-07, "loss": 2.6756, "step": 299880 }, { "epoch": 12.917258905112632, "learning_rate": 2.734975974487706e-07, "loss": 2.6983, "step": 299900 }, { "epoch": 12.91812034285222, "learning_rate": 2.734491154732918e-07, "loss": 2.6017, "step": 299920 }, { "epoch": 12.918981780591807, "learning_rate": 2.734006334978129e-07, "loss": 2.5622, "step": 299940 }, { "epoch": 12.919843218331396, "learning_rate": 2.7335215152233405e-07, "loss": 2.542, "step": 299960 }, { "epoch": 12.920704656070983, "learning_rate": 2.733036695468551e-07, "loss": 2.695, "step": 299980 }, { "epoch": 12.92156609381057, "learning_rate": 2.732551875713762e-07, "loss": 2.7653, "step": 300000 }, { "epoch": 12.922427531550158, "learning_rate": 2.732067055958973e-07, "loss": 2.7004, "step": 300020 }, { "epoch": 12.923288969289745, "learning_rate": 2.7315822362041845e-07, "loss": 2.7434, "step": 300040 }, { "epoch": 12.924150407029332, "learning_rate": 2.731097416449395e-07, "loss": 2.5178, "step": 300060 }, { "epoch": 12.92501184476892, "learning_rate": 2.730612596694607e-07, "loss": 2.5278, "step": 300080 }, { "epoch": 12.925873282508507, "learning_rate": 2.7301277769398177e-07, "loss": 2.6071, "step": 300100 }, { "epoch": 12.926734720248094, "learning_rate": 2.729642957185029e-07, "loss": 2.6055, "step": 300120 }, { "epoch": 12.927596157987681, "learning_rate": 2.7291581374302396e-07, "loss": 2.7922, "step": 300140 }, { "epoch": 12.928457595727268, "learning_rate": 2.728673317675451e-07, "loss": 2.6208, "step": 300160 }, { "epoch": 12.929319033466856, "learning_rate": 2.7281884979206616e-07, "loss": 2.5856, "step": 300180 }, { "epoch": 12.930180471206443, "learning_rate": 2.7277036781658734e-07, "loss": 2.6802, "step": 300200 }, { "epoch": 12.931041908946032, "learning_rate": 2.727218858411084e-07, "loss": 2.6973, "step": 300220 }, { "epoch": 12.931903346685619, "learning_rate": 2.726734038656296e-07, "loss": 2.5916, "step": 300240 }, { "epoch": 12.932764784425206, "learning_rate": 2.726249218901506e-07, "loss": 2.6747, "step": 300260 }, { "epoch": 12.933626222164794, "learning_rate": 2.725764399146717e-07, "loss": 2.7069, "step": 300280 }, { "epoch": 12.93448765990438, "learning_rate": 2.725279579391928e-07, "loss": 2.6773, "step": 300300 }, { "epoch": 12.935349097643968, "learning_rate": 2.7247947596371387e-07, "loss": 2.6966, "step": 300320 }, { "epoch": 12.936210535383555, "learning_rate": 2.7243099398823505e-07, "loss": 2.6603, "step": 300340 }, { "epoch": 12.937071973123142, "learning_rate": 2.723825120127561e-07, "loss": 2.6822, "step": 300360 }, { "epoch": 12.93793341086273, "learning_rate": 2.7233403003727724e-07, "loss": 2.6672, "step": 300380 }, { "epoch": 12.938794848602317, "learning_rate": 2.722855480617983e-07, "loss": 2.6388, "step": 300400 }, { "epoch": 12.939656286341904, "learning_rate": 2.722370660863195e-07, "loss": 2.6384, "step": 300420 }, { "epoch": 12.940517724081491, "learning_rate": 2.721885841108405e-07, "loss": 2.6712, "step": 300440 }, { "epoch": 12.941379161821079, "learning_rate": 2.721401021353617e-07, "loss": 2.6658, "step": 300460 }, { "epoch": 12.942240599560666, "learning_rate": 2.7209162015988276e-07, "loss": 2.583, "step": 300480 }, { "epoch": 12.943102037300253, "learning_rate": 2.7204313818440394e-07, "loss": 2.6777, "step": 300500 }, { "epoch": 12.943963475039842, "learning_rate": 2.7199465620892496e-07, "loss": 2.7412, "step": 300520 }, { "epoch": 12.94482491277943, "learning_rate": 2.719461742334461e-07, "loss": 2.6379, "step": 300540 }, { "epoch": 12.945686350519017, "learning_rate": 2.7189769225796715e-07, "loss": 2.7214, "step": 300560 }, { "epoch": 12.946547788258604, "learning_rate": 2.7184921028248833e-07, "loss": 2.6133, "step": 300580 }, { "epoch": 12.947409225998191, "learning_rate": 2.718007283070094e-07, "loss": 2.5743, "step": 300600 }, { "epoch": 12.948270663737778, "learning_rate": 2.717522463315306e-07, "loss": 2.7952, "step": 300620 }, { "epoch": 12.949132101477366, "learning_rate": 2.717037643560516e-07, "loss": 2.4734, "step": 300640 }, { "epoch": 12.949993539216953, "learning_rate": 2.716552823805727e-07, "loss": 2.6176, "step": 300660 }, { "epoch": 12.95085497695654, "learning_rate": 2.716068004050938e-07, "loss": 2.5759, "step": 300680 }, { "epoch": 12.951716414696127, "learning_rate": 2.7155831842961497e-07, "loss": 2.6594, "step": 300700 }, { "epoch": 12.952577852435715, "learning_rate": 2.7150983645413604e-07, "loss": 2.737, "step": 300720 }, { "epoch": 12.953439290175302, "learning_rate": 2.714613544786572e-07, "loss": 2.5943, "step": 300740 }, { "epoch": 12.95430072791489, "learning_rate": 2.7141287250317824e-07, "loss": 2.7416, "step": 300760 }, { "epoch": 12.955162165654478, "learning_rate": 2.713643905276994e-07, "loss": 2.5609, "step": 300780 }, { "epoch": 12.956023603394065, "learning_rate": 2.7131590855222043e-07, "loss": 2.6876, "step": 300800 }, { "epoch": 12.956885041133653, "learning_rate": 2.712674265767415e-07, "loss": 2.5753, "step": 300820 }, { "epoch": 12.95774647887324, "learning_rate": 2.712189446012627e-07, "loss": 2.6136, "step": 300840 }, { "epoch": 12.958607916612827, "learning_rate": 2.7117046262578375e-07, "loss": 2.7641, "step": 300860 }, { "epoch": 12.959469354352414, "learning_rate": 2.7112198065030493e-07, "loss": 2.7037, "step": 300880 }, { "epoch": 12.960330792092002, "learning_rate": 2.7107349867482595e-07, "loss": 2.5892, "step": 300900 }, { "epoch": 12.961192229831589, "learning_rate": 2.710250166993471e-07, "loss": 2.7004, "step": 300920 }, { "epoch": 12.962053667571176, "learning_rate": 2.7097653472386815e-07, "loss": 2.5434, "step": 300940 }, { "epoch": 12.962915105310763, "learning_rate": 2.709280527483893e-07, "loss": 2.6784, "step": 300960 }, { "epoch": 12.96377654305035, "learning_rate": 2.708795707729104e-07, "loss": 2.4164, "step": 300980 }, { "epoch": 12.964637980789938, "learning_rate": 2.7083108879743157e-07, "loss": 2.5784, "step": 301000 }, { "epoch": 12.965499418529525, "learning_rate": 2.707826068219526e-07, "loss": 2.4189, "step": 301020 }, { "epoch": 12.966360856269112, "learning_rate": 2.7073412484647377e-07, "loss": 2.5985, "step": 301040 }, { "epoch": 12.9672222940087, "learning_rate": 2.706856428709948e-07, "loss": 2.6717, "step": 301060 }, { "epoch": 12.968083731748289, "learning_rate": 2.7063716089551596e-07, "loss": 2.6696, "step": 301080 }, { "epoch": 12.968945169487876, "learning_rate": 2.705886789200371e-07, "loss": 2.4904, "step": 301100 }, { "epoch": 12.969806607227463, "learning_rate": 2.705401969445582e-07, "loss": 2.6252, "step": 301120 }, { "epoch": 12.97066804496705, "learning_rate": 2.7049171496907923e-07, "loss": 2.5448, "step": 301140 }, { "epoch": 12.971529482706638, "learning_rate": 2.704432329936004e-07, "loss": 2.5831, "step": 301160 }, { "epoch": 12.972390920446225, "learning_rate": 2.7039475101812143e-07, "loss": 2.6472, "step": 301180 }, { "epoch": 12.973252358185812, "learning_rate": 2.703462690426426e-07, "loss": 2.5724, "step": 301200 }, { "epoch": 12.9741137959254, "learning_rate": 2.702977870671637e-07, "loss": 2.626, "step": 301220 }, { "epoch": 12.974975233664987, "learning_rate": 2.7024930509168485e-07, "loss": 2.5409, "step": 301240 }, { "epoch": 12.975836671404574, "learning_rate": 2.702008231162059e-07, "loss": 2.6202, "step": 301260 }, { "epoch": 12.976698109144161, "learning_rate": 2.7015234114072705e-07, "loss": 2.6305, "step": 301280 }, { "epoch": 12.977559546883748, "learning_rate": 2.7010385916524807e-07, "loss": 2.5864, "step": 301300 }, { "epoch": 12.978420984623336, "learning_rate": 2.7005537718976924e-07, "loss": 2.6463, "step": 301320 }, { "epoch": 12.979282422362925, "learning_rate": 2.700068952142903e-07, "loss": 2.5249, "step": 301340 }, { "epoch": 12.980143860102512, "learning_rate": 2.699584132388114e-07, "loss": 2.6206, "step": 301360 }, { "epoch": 12.981005297842099, "learning_rate": 2.6990993126333256e-07, "loss": 2.7297, "step": 301380 }, { "epoch": 12.981866735581686, "learning_rate": 2.698614492878536e-07, "loss": 2.6632, "step": 301400 }, { "epoch": 12.982728173321274, "learning_rate": 2.6981296731237476e-07, "loss": 2.7352, "step": 301420 }, { "epoch": 12.98358961106086, "learning_rate": 2.697644853368958e-07, "loss": 2.7319, "step": 301440 }, { "epoch": 12.984451048800448, "learning_rate": 2.6971600336141696e-07, "loss": 2.6456, "step": 301460 }, { "epoch": 12.985312486540035, "learning_rate": 2.6966752138593803e-07, "loss": 2.7005, "step": 301480 }, { "epoch": 12.986173924279623, "learning_rate": 2.696190394104592e-07, "loss": 2.6033, "step": 301500 }, { "epoch": 12.98703536201921, "learning_rate": 2.695705574349802e-07, "loss": 2.6314, "step": 301520 }, { "epoch": 12.987896799758797, "learning_rate": 2.695220754595014e-07, "loss": 2.5994, "step": 301540 }, { "epoch": 12.988758237498384, "learning_rate": 2.694735934840224e-07, "loss": 2.643, "step": 301560 }, { "epoch": 12.989619675237972, "learning_rate": 2.694251115085436e-07, "loss": 2.7575, "step": 301580 }, { "epoch": 12.990481112977559, "learning_rate": 2.6937662953306467e-07, "loss": 2.6093, "step": 301600 }, { "epoch": 12.991342550717146, "learning_rate": 2.6932814755758585e-07, "loss": 2.6586, "step": 301620 }, { "epoch": 12.992203988456735, "learning_rate": 2.692796655821069e-07, "loss": 2.5421, "step": 301640 }, { "epoch": 12.993065426196322, "learning_rate": 2.6923118360662804e-07, "loss": 2.6485, "step": 301660 }, { "epoch": 12.99392686393591, "learning_rate": 2.691827016311491e-07, "loss": 2.7305, "step": 301680 }, { "epoch": 12.994788301675497, "learning_rate": 2.6913421965567024e-07, "loss": 2.6815, "step": 301700 }, { "epoch": 12.995649739415084, "learning_rate": 2.690857376801913e-07, "loss": 2.6734, "step": 301720 }, { "epoch": 12.996511177154671, "learning_rate": 2.690372557047125e-07, "loss": 2.671, "step": 301740 }, { "epoch": 12.997372614894259, "learning_rate": 2.6898877372923356e-07, "loss": 2.5003, "step": 301760 }, { "epoch": 12.998234052633846, "learning_rate": 2.6894029175375463e-07, "loss": 2.7639, "step": 301780 }, { "epoch": 12.999095490373433, "learning_rate": 2.6889180977827575e-07, "loss": 2.5628, "step": 301800 }, { "epoch": 12.99995692811302, "learning_rate": 2.688433278027969e-07, "loss": 2.6752, "step": 301820 }, { "epoch": 13.000818365852608, "learning_rate": 2.6879484582731795e-07, "loss": 2.5615, "step": 301840 }, { "epoch": 13.001679803592195, "learning_rate": 2.6874636385183913e-07, "loss": 2.6487, "step": 301860 }, { "epoch": 13.002541241331782, "learning_rate": 2.686978818763602e-07, "loss": 2.6064, "step": 301880 }, { "epoch": 13.00340267907137, "learning_rate": 2.6864939990088127e-07, "loss": 2.5395, "step": 301900 }, { "epoch": 13.004264116810958, "learning_rate": 2.686009179254024e-07, "loss": 2.677, "step": 301920 }, { "epoch": 13.005125554550546, "learning_rate": 2.685524359499234e-07, "loss": 2.61, "step": 301940 }, { "epoch": 13.005986992290133, "learning_rate": 2.685039539744446e-07, "loss": 2.6091, "step": 301960 }, { "epoch": 13.00684843002972, "learning_rate": 2.6845547199896566e-07, "loss": 2.5934, "step": 301980 }, { "epoch": 13.007709867769307, "learning_rate": 2.6840699002348684e-07, "loss": 2.6411, "step": 302000 }, { "epoch": 13.008571305508895, "learning_rate": 2.683585080480079e-07, "loss": 2.5554, "step": 302020 }, { "epoch": 13.009432743248482, "learning_rate": 2.6831002607252904e-07, "loss": 2.4619, "step": 302040 }, { "epoch": 13.010294180988069, "learning_rate": 2.682615440970501e-07, "loss": 2.6895, "step": 302060 }, { "epoch": 13.011155618727656, "learning_rate": 2.6821306212157123e-07, "loss": 2.5049, "step": 302080 }, { "epoch": 13.012017056467243, "learning_rate": 2.681645801460923e-07, "loss": 2.6687, "step": 302100 }, { "epoch": 13.01287849420683, "learning_rate": 2.681160981706135e-07, "loss": 2.7094, "step": 302120 }, { "epoch": 13.013739931946418, "learning_rate": 2.6806761619513455e-07, "loss": 2.4551, "step": 302140 }, { "epoch": 13.014601369686005, "learning_rate": 2.680191342196557e-07, "loss": 2.5582, "step": 302160 }, { "epoch": 13.015462807425592, "learning_rate": 2.6797065224417675e-07, "loss": 2.692, "step": 302180 }, { "epoch": 13.016324245165181, "learning_rate": 2.679221702686979e-07, "loss": 2.606, "step": 302200 }, { "epoch": 13.017185682904769, "learning_rate": 2.6787368829321894e-07, "loss": 2.633, "step": 302220 }, { "epoch": 13.018047120644356, "learning_rate": 2.678252063177401e-07, "loss": 2.5473, "step": 302240 }, { "epoch": 13.018908558383943, "learning_rate": 2.677767243422612e-07, "loss": 2.4885, "step": 302260 }, { "epoch": 13.01976999612353, "learning_rate": 2.6772824236678237e-07, "loss": 2.5479, "step": 302280 }, { "epoch": 13.020631433863118, "learning_rate": 2.676797603913034e-07, "loss": 2.5428, "step": 302300 }, { "epoch": 13.021492871602705, "learning_rate": 2.676312784158245e-07, "loss": 2.5308, "step": 302320 }, { "epoch": 13.022354309342292, "learning_rate": 2.675827964403456e-07, "loss": 2.7441, "step": 302340 }, { "epoch": 13.02321574708188, "learning_rate": 2.6753431446486676e-07, "loss": 2.5769, "step": 302360 }, { "epoch": 13.024077184821467, "learning_rate": 2.6748583248938783e-07, "loss": 2.7615, "step": 302380 }, { "epoch": 13.024938622561054, "learning_rate": 2.67437350513909e-07, "loss": 2.6189, "step": 302400 }, { "epoch": 13.025800060300641, "learning_rate": 2.6738886853843003e-07, "loss": 2.6219, "step": 302420 }, { "epoch": 13.026661498040228, "learning_rate": 2.673403865629511e-07, "loss": 2.7262, "step": 302440 }, { "epoch": 13.027522935779816, "learning_rate": 2.672919045874722e-07, "loss": 2.4711, "step": 302460 }, { "epoch": 13.028384373519405, "learning_rate": 2.672434226119933e-07, "loss": 2.6528, "step": 302480 }, { "epoch": 13.029245811258992, "learning_rate": 2.671949406365145e-07, "loss": 2.7579, "step": 302500 }, { "epoch": 13.03010724899858, "learning_rate": 2.6714645866103555e-07, "loss": 2.5941, "step": 302520 }, { "epoch": 13.030968686738166, "learning_rate": 2.6709797668555667e-07, "loss": 2.6373, "step": 302540 }, { "epoch": 13.031830124477754, "learning_rate": 2.6704949471007774e-07, "loss": 2.7049, "step": 302560 }, { "epoch": 13.032691562217341, "learning_rate": 2.6700101273459887e-07, "loss": 2.6878, "step": 302580 }, { "epoch": 13.033552999956928, "learning_rate": 2.6695253075911994e-07, "loss": 2.6419, "step": 302600 }, { "epoch": 13.034414437696515, "learning_rate": 2.669040487836411e-07, "loss": 2.5144, "step": 302620 }, { "epoch": 13.035275875436103, "learning_rate": 2.668555668081622e-07, "loss": 2.6666, "step": 302640 }, { "epoch": 13.03613731317569, "learning_rate": 2.6680708483268336e-07, "loss": 2.7529, "step": 302660 }, { "epoch": 13.036998750915277, "learning_rate": 2.667586028572044e-07, "loss": 2.7241, "step": 302680 }, { "epoch": 13.037860188654864, "learning_rate": 2.667101208817255e-07, "loss": 2.6292, "step": 302700 }, { "epoch": 13.038721626394452, "learning_rate": 2.666616389062466e-07, "loss": 2.676, "step": 302720 }, { "epoch": 13.039583064134039, "learning_rate": 2.6661315693076776e-07, "loss": 2.6695, "step": 302740 }, { "epoch": 13.040444501873628, "learning_rate": 2.6656467495528883e-07, "loss": 2.7132, "step": 302760 }, { "epoch": 13.041305939613215, "learning_rate": 2.6651619297981e-07, "loss": 2.5231, "step": 302780 }, { "epoch": 13.042167377352802, "learning_rate": 2.66467711004331e-07, "loss": 2.6172, "step": 302800 }, { "epoch": 13.04302881509239, "learning_rate": 2.664192290288522e-07, "loss": 2.6135, "step": 302820 }, { "epoch": 13.043890252831977, "learning_rate": 2.663707470533732e-07, "loss": 2.5796, "step": 302840 }, { "epoch": 13.044751690571564, "learning_rate": 2.663222650778944e-07, "loss": 2.7081, "step": 302860 }, { "epoch": 13.045613128311151, "learning_rate": 2.662737831024155e-07, "loss": 2.621, "step": 302880 }, { "epoch": 13.046474566050739, "learning_rate": 2.6622530112693665e-07, "loss": 2.5306, "step": 302900 }, { "epoch": 13.047336003790326, "learning_rate": 2.6617681915145766e-07, "loss": 2.6119, "step": 302920 }, { "epoch": 13.048197441529913, "learning_rate": 2.6612833717597884e-07, "loss": 2.6158, "step": 302940 }, { "epoch": 13.0490588792695, "learning_rate": 2.6607985520049986e-07, "loss": 2.6629, "step": 302960 }, { "epoch": 13.049920317009088, "learning_rate": 2.6603137322502093e-07, "loss": 2.5772, "step": 302980 }, { "epoch": 13.050781754748675, "learning_rate": 2.659828912495421e-07, "loss": 2.6038, "step": 303000 }, { "epoch": 13.051643192488262, "learning_rate": 2.659344092740632e-07, "loss": 2.4844, "step": 303020 }, { "epoch": 13.052504630227851, "learning_rate": 2.6588592729858436e-07, "loss": 2.6601, "step": 303040 }, { "epoch": 13.053366067967438, "learning_rate": 2.658374453231054e-07, "loss": 2.4765, "step": 303060 }, { "epoch": 13.054227505707026, "learning_rate": 2.657889633476265e-07, "loss": 2.5308, "step": 303080 }, { "epoch": 13.055088943446613, "learning_rate": 2.6574048137214757e-07, "loss": 2.6465, "step": 303100 }, { "epoch": 13.0559503811862, "learning_rate": 2.6569199939666875e-07, "loss": 2.5301, "step": 303120 }, { "epoch": 13.056811818925787, "learning_rate": 2.656435174211898e-07, "loss": 2.6109, "step": 303140 }, { "epoch": 13.057673256665375, "learning_rate": 2.65595035445711e-07, "loss": 2.662, "step": 303160 }, { "epoch": 13.058534694404962, "learning_rate": 2.65546553470232e-07, "loss": 2.6763, "step": 303180 }, { "epoch": 13.059396132144549, "learning_rate": 2.654980714947532e-07, "loss": 2.701, "step": 303200 }, { "epoch": 13.060257569884136, "learning_rate": 2.654495895192742e-07, "loss": 2.6614, "step": 303220 }, { "epoch": 13.061119007623724, "learning_rate": 2.654011075437954e-07, "loss": 2.525, "step": 303240 }, { "epoch": 13.06198044536331, "learning_rate": 2.6535262556831646e-07, "loss": 2.7763, "step": 303260 }, { "epoch": 13.062841883102898, "learning_rate": 2.6530414359283764e-07, "loss": 2.5338, "step": 303280 }, { "epoch": 13.063703320842485, "learning_rate": 2.6525566161735866e-07, "loss": 2.5807, "step": 303300 }, { "epoch": 13.064564758582074, "learning_rate": 2.6520717964187983e-07, "loss": 2.6429, "step": 303320 }, { "epoch": 13.065426196321662, "learning_rate": 2.6515869766640085e-07, "loss": 2.5146, "step": 303340 }, { "epoch": 13.066287634061249, "learning_rate": 2.6511021569092203e-07, "loss": 2.4708, "step": 303360 }, { "epoch": 13.067149071800836, "learning_rate": 2.650617337154431e-07, "loss": 2.5624, "step": 303380 }, { "epoch": 13.068010509540423, "learning_rate": 2.650132517399643e-07, "loss": 2.6605, "step": 303400 }, { "epoch": 13.06887194728001, "learning_rate": 2.6496476976448535e-07, "loss": 2.6118, "step": 303420 }, { "epoch": 13.069733385019598, "learning_rate": 2.649162877890065e-07, "loss": 2.6912, "step": 303440 }, { "epoch": 13.070594822759185, "learning_rate": 2.6486780581352755e-07, "loss": 2.6579, "step": 303460 }, { "epoch": 13.071456260498772, "learning_rate": 2.6481932383804867e-07, "loss": 2.6719, "step": 303480 }, { "epoch": 13.07231769823836, "learning_rate": 2.6477084186256974e-07, "loss": 2.7401, "step": 303500 }, { "epoch": 13.073179135977947, "learning_rate": 2.647223598870908e-07, "loss": 2.6363, "step": 303520 }, { "epoch": 13.074040573717534, "learning_rate": 2.64673877911612e-07, "loss": 2.5892, "step": 303540 }, { "epoch": 13.074902011457121, "learning_rate": 2.64625395936133e-07, "loss": 2.5345, "step": 303560 }, { "epoch": 13.075763449196709, "learning_rate": 2.645769139606542e-07, "loss": 2.6091, "step": 303580 }, { "epoch": 13.076624886936298, "learning_rate": 2.645284319851752e-07, "loss": 2.5794, "step": 303600 }, { "epoch": 13.077486324675885, "learning_rate": 2.644799500096964e-07, "loss": 2.6407, "step": 303620 }, { "epoch": 13.078347762415472, "learning_rate": 2.6443146803421746e-07, "loss": 2.5916, "step": 303640 }, { "epoch": 13.07920920015506, "learning_rate": 2.6438298605873863e-07, "loss": 2.6332, "step": 303660 }, { "epoch": 13.080070637894647, "learning_rate": 2.643345040832597e-07, "loss": 2.652, "step": 303680 }, { "epoch": 13.080932075634234, "learning_rate": 2.6428602210778083e-07, "loss": 2.6237, "step": 303700 }, { "epoch": 13.081793513373821, "learning_rate": 2.6423754013230185e-07, "loss": 2.6612, "step": 303720 }, { "epoch": 13.082654951113408, "learning_rate": 2.64189058156823e-07, "loss": 2.6535, "step": 303740 }, { "epoch": 13.083516388852996, "learning_rate": 2.641405761813441e-07, "loss": 2.62, "step": 303760 }, { "epoch": 13.084377826592583, "learning_rate": 2.6409209420586527e-07, "loss": 2.7925, "step": 303780 }, { "epoch": 13.08523926433217, "learning_rate": 2.6404361223038634e-07, "loss": 2.7772, "step": 303800 }, { "epoch": 13.086100702071757, "learning_rate": 2.6399513025490747e-07, "loss": 2.6268, "step": 303820 }, { "epoch": 13.086962139811344, "learning_rate": 2.6394664827942854e-07, "loss": 2.5434, "step": 303840 }, { "epoch": 13.087823577550932, "learning_rate": 2.6389816630394967e-07, "loss": 2.594, "step": 303860 }, { "epoch": 13.08868501529052, "learning_rate": 2.6384968432847074e-07, "loss": 2.6446, "step": 303880 }, { "epoch": 13.089546453030108, "learning_rate": 2.638012023529919e-07, "loss": 2.4759, "step": 303900 }, { "epoch": 13.090407890769695, "learning_rate": 2.63752720377513e-07, "loss": 2.6055, "step": 303920 }, { "epoch": 13.091269328509282, "learning_rate": 2.637042384020341e-07, "loss": 2.597, "step": 303940 }, { "epoch": 13.09213076624887, "learning_rate": 2.636557564265552e-07, "loss": 2.6991, "step": 303960 }, { "epoch": 13.092992203988457, "learning_rate": 2.6360727445107636e-07, "loss": 2.6913, "step": 303980 }, { "epoch": 13.093853641728044, "learning_rate": 2.635587924755974e-07, "loss": 2.5528, "step": 304000 }, { "epoch": 13.094715079467631, "learning_rate": 2.6351031050011855e-07, "loss": 2.6152, "step": 304020 }, { "epoch": 13.095576517207219, "learning_rate": 2.634618285246396e-07, "loss": 2.6309, "step": 304040 }, { "epoch": 13.096437954946806, "learning_rate": 2.634133465491607e-07, "loss": 2.7013, "step": 304060 }, { "epoch": 13.097299392686393, "learning_rate": 2.633648645736818e-07, "loss": 2.71, "step": 304080 }, { "epoch": 13.09816083042598, "learning_rate": 2.6331638259820284e-07, "loss": 2.4768, "step": 304100 }, { "epoch": 13.099022268165568, "learning_rate": 2.63267900622724e-07, "loss": 2.4884, "step": 304120 }, { "epoch": 13.099883705905155, "learning_rate": 2.6321941864724514e-07, "loss": 2.7565, "step": 304140 }, { "epoch": 13.100745143644744, "learning_rate": 2.6317093667176627e-07, "loss": 2.7113, "step": 304160 }, { "epoch": 13.101606581384331, "learning_rate": 2.6312245469628734e-07, "loss": 2.5984, "step": 304180 }, { "epoch": 13.102468019123918, "learning_rate": 2.6307397272080846e-07, "loss": 2.71, "step": 304200 }, { "epoch": 13.103329456863506, "learning_rate": 2.6302549074532953e-07, "loss": 2.7681, "step": 304220 }, { "epoch": 13.104190894603093, "learning_rate": 2.6297700876985066e-07, "loss": 2.6624, "step": 304240 }, { "epoch": 13.10505233234268, "learning_rate": 2.6292852679437173e-07, "loss": 2.556, "step": 304260 }, { "epoch": 13.105913770082267, "learning_rate": 2.628800448188929e-07, "loss": 2.5369, "step": 304280 }, { "epoch": 13.106775207821855, "learning_rate": 2.62831562843414e-07, "loss": 2.7743, "step": 304300 }, { "epoch": 13.107636645561442, "learning_rate": 2.627830808679351e-07, "loss": 2.5478, "step": 304320 }, { "epoch": 13.10849808330103, "learning_rate": 2.627345988924562e-07, "loss": 2.6478, "step": 304340 }, { "epoch": 13.109359521040616, "learning_rate": 2.626861169169773e-07, "loss": 2.6533, "step": 304360 }, { "epoch": 13.110220958780204, "learning_rate": 2.6263763494149837e-07, "loss": 2.555, "step": 304380 }, { "epoch": 13.111082396519791, "learning_rate": 2.6258915296601955e-07, "loss": 2.5419, "step": 304400 }, { "epoch": 13.111943834259378, "learning_rate": 2.625406709905406e-07, "loss": 2.5613, "step": 304420 }, { "epoch": 13.112805271998965, "learning_rate": 2.624921890150618e-07, "loss": 2.6712, "step": 304440 }, { "epoch": 13.113666709738554, "learning_rate": 2.624437070395828e-07, "loss": 2.5835, "step": 304460 }, { "epoch": 13.114528147478142, "learning_rate": 2.6239522506410394e-07, "loss": 2.6367, "step": 304480 }, { "epoch": 13.115389585217729, "learning_rate": 2.62346743088625e-07, "loss": 2.7958, "step": 304500 }, { "epoch": 13.116251022957316, "learning_rate": 2.622982611131462e-07, "loss": 2.6331, "step": 304520 }, { "epoch": 13.117112460696903, "learning_rate": 2.6224977913766726e-07, "loss": 2.6486, "step": 304540 }, { "epoch": 13.11797389843649, "learning_rate": 2.6220129716218844e-07, "loss": 2.5002, "step": 304560 }, { "epoch": 13.118835336176078, "learning_rate": 2.6215281518670946e-07, "loss": 2.6136, "step": 304580 }, { "epoch": 13.119696773915665, "learning_rate": 2.6210433321123053e-07, "loss": 2.4932, "step": 304600 }, { "epoch": 13.120558211655252, "learning_rate": 2.6205585123575165e-07, "loss": 2.6518, "step": 304620 }, { "epoch": 13.12141964939484, "learning_rate": 2.620073692602727e-07, "loss": 2.5847, "step": 304640 }, { "epoch": 13.122281087134427, "learning_rate": 2.6195888728479395e-07, "loss": 2.5423, "step": 304660 }, { "epoch": 13.123142524874014, "learning_rate": 2.6191040530931497e-07, "loss": 2.5294, "step": 304680 }, { "epoch": 13.124003962613601, "learning_rate": 2.618619233338361e-07, "loss": 2.6553, "step": 304700 }, { "epoch": 13.124865400353189, "learning_rate": 2.6181344135835717e-07, "loss": 2.5032, "step": 304720 }, { "epoch": 13.125726838092778, "learning_rate": 2.617649593828783e-07, "loss": 2.7025, "step": 304740 }, { "epoch": 13.126588275832365, "learning_rate": 2.6171647740739936e-07, "loss": 2.6432, "step": 304760 }, { "epoch": 13.127449713571952, "learning_rate": 2.6166799543192054e-07, "loss": 2.6117, "step": 304780 }, { "epoch": 13.12831115131154, "learning_rate": 2.616195134564416e-07, "loss": 2.3488, "step": 304800 }, { "epoch": 13.129172589051127, "learning_rate": 2.615710314809628e-07, "loss": 2.6994, "step": 304820 }, { "epoch": 13.130034026790714, "learning_rate": 2.615225495054838e-07, "loss": 2.6947, "step": 304840 }, { "epoch": 13.130895464530301, "learning_rate": 2.6147406753000493e-07, "loss": 2.8372, "step": 304860 }, { "epoch": 13.131756902269888, "learning_rate": 2.61425585554526e-07, "loss": 2.5578, "step": 304880 }, { "epoch": 13.132618340009476, "learning_rate": 2.613771035790472e-07, "loss": 2.598, "step": 304900 }, { "epoch": 13.133479777749063, "learning_rate": 2.6132862160356825e-07, "loss": 2.6546, "step": 304920 }, { "epoch": 13.13434121548865, "learning_rate": 2.6128013962808943e-07, "loss": 2.5749, "step": 304940 }, { "epoch": 13.135202653228237, "learning_rate": 2.6123165765261045e-07, "loss": 2.7047, "step": 304960 }, { "epoch": 13.136064090967825, "learning_rate": 2.6118317567713163e-07, "loss": 2.8053, "step": 304980 }, { "epoch": 13.136925528707412, "learning_rate": 2.6113469370165265e-07, "loss": 2.765, "step": 305000 }, { "epoch": 13.137786966447, "learning_rate": 2.610862117261738e-07, "loss": 2.5409, "step": 305020 }, { "epoch": 13.138648404186588, "learning_rate": 2.610377297506949e-07, "loss": 2.5443, "step": 305040 }, { "epoch": 13.139509841926175, "learning_rate": 2.6098924777521607e-07, "loss": 2.5643, "step": 305060 }, { "epoch": 13.140371279665763, "learning_rate": 2.609407657997371e-07, "loss": 2.837, "step": 305080 }, { "epoch": 13.14123271740535, "learning_rate": 2.6089228382425827e-07, "loss": 2.5216, "step": 305100 }, { "epoch": 13.142094155144937, "learning_rate": 2.608438018487793e-07, "loss": 2.5768, "step": 305120 }, { "epoch": 13.142955592884524, "learning_rate": 2.6079531987330036e-07, "loss": 2.6189, "step": 305140 }, { "epoch": 13.143817030624112, "learning_rate": 2.6074683789782154e-07, "loss": 2.6443, "step": 305160 }, { "epoch": 13.144678468363699, "learning_rate": 2.606983559223426e-07, "loss": 2.817, "step": 305180 }, { "epoch": 13.145539906103286, "learning_rate": 2.606498739468638e-07, "loss": 2.6411, "step": 305200 }, { "epoch": 13.146401343842873, "learning_rate": 2.606013919713848e-07, "loss": 2.5729, "step": 305220 }, { "epoch": 13.14726278158246, "learning_rate": 2.60552909995906e-07, "loss": 2.7698, "step": 305240 }, { "epoch": 13.148124219322048, "learning_rate": 2.60504428020427e-07, "loss": 2.53, "step": 305260 }, { "epoch": 13.148985657061635, "learning_rate": 2.604559460449482e-07, "loss": 2.6859, "step": 305280 }, { "epoch": 13.149847094801224, "learning_rate": 2.6040746406946925e-07, "loss": 2.6277, "step": 305300 }, { "epoch": 13.150708532540811, "learning_rate": 2.603589820939904e-07, "loss": 2.6258, "step": 305320 }, { "epoch": 13.151569970280399, "learning_rate": 2.6031050011851144e-07, "loss": 2.5243, "step": 305340 }, { "epoch": 13.152431408019986, "learning_rate": 2.602620181430326e-07, "loss": 2.548, "step": 305360 }, { "epoch": 13.153292845759573, "learning_rate": 2.6021353616755364e-07, "loss": 2.5161, "step": 305380 }, { "epoch": 13.15415428349916, "learning_rate": 2.601650541920748e-07, "loss": 2.6547, "step": 305400 }, { "epoch": 13.155015721238748, "learning_rate": 2.601165722165959e-07, "loss": 2.6246, "step": 305420 }, { "epoch": 13.155877158978335, "learning_rate": 2.6006809024111707e-07, "loss": 2.6358, "step": 305440 }, { "epoch": 13.156738596717922, "learning_rate": 2.6001960826563814e-07, "loss": 2.6986, "step": 305460 }, { "epoch": 13.15760003445751, "learning_rate": 2.5997112629015926e-07, "loss": 2.4145, "step": 305480 }, { "epoch": 13.158461472197096, "learning_rate": 2.599226443146803e-07, "loss": 2.5752, "step": 305500 }, { "epoch": 13.159322909936684, "learning_rate": 2.5987416233920146e-07, "loss": 2.5435, "step": 305520 }, { "epoch": 13.160184347676271, "learning_rate": 2.5982568036372253e-07, "loss": 2.7423, "step": 305540 }, { "epoch": 13.161045785415858, "learning_rate": 2.597771983882437e-07, "loss": 2.702, "step": 305560 }, { "epoch": 13.161907223155447, "learning_rate": 2.597287164127648e-07, "loss": 2.6743, "step": 305580 }, { "epoch": 13.162768660895035, "learning_rate": 2.596802344372859e-07, "loss": 2.6351, "step": 305600 }, { "epoch": 13.163630098634622, "learning_rate": 2.59631752461807e-07, "loss": 2.6873, "step": 305620 }, { "epoch": 13.164491536374209, "learning_rate": 2.595832704863281e-07, "loss": 2.6427, "step": 305640 }, { "epoch": 13.165352974113796, "learning_rate": 2.5953478851084917e-07, "loss": 2.6083, "step": 305660 }, { "epoch": 13.166214411853383, "learning_rate": 2.5948630653537024e-07, "loss": 2.486, "step": 305680 }, { "epoch": 13.16707584959297, "learning_rate": 2.594378245598914e-07, "loss": 2.6573, "step": 305700 }, { "epoch": 13.167937287332558, "learning_rate": 2.593893425844125e-07, "loss": 2.6877, "step": 305720 }, { "epoch": 13.168798725072145, "learning_rate": 2.593408606089336e-07, "loss": 2.6386, "step": 305740 }, { "epoch": 13.169660162811732, "learning_rate": 2.5929237863345463e-07, "loss": 2.6084, "step": 305760 }, { "epoch": 13.17052160055132, "learning_rate": 2.592438966579758e-07, "loss": 2.713, "step": 305780 }, { "epoch": 13.171383038290907, "learning_rate": 2.591954146824969e-07, "loss": 2.5676, "step": 305800 }, { "epoch": 13.172244476030494, "learning_rate": 2.5914693270701806e-07, "loss": 2.7775, "step": 305820 }, { "epoch": 13.173105913770081, "learning_rate": 2.5909845073153913e-07, "loss": 2.5596, "step": 305840 }, { "epoch": 13.17396735150967, "learning_rate": 2.5904996875606026e-07, "loss": 2.6065, "step": 305860 }, { "epoch": 13.174828789249258, "learning_rate": 2.590014867805813e-07, "loss": 2.6292, "step": 305880 }, { "epoch": 13.175690226988845, "learning_rate": 2.5895300480510245e-07, "loss": 2.6403, "step": 305900 }, { "epoch": 13.176551664728432, "learning_rate": 2.5890452282962347e-07, "loss": 2.5729, "step": 305920 }, { "epoch": 13.17741310246802, "learning_rate": 2.588560408541447e-07, "loss": 2.6555, "step": 305940 }, { "epoch": 13.178274540207607, "learning_rate": 2.5880755887866577e-07, "loss": 2.6704, "step": 305960 }, { "epoch": 13.179135977947194, "learning_rate": 2.587590769031869e-07, "loss": 2.4828, "step": 305980 }, { "epoch": 13.179997415686781, "learning_rate": 2.5871059492770797e-07, "loss": 2.7224, "step": 306000 }, { "epoch": 13.180858853426368, "learning_rate": 2.586621129522291e-07, "loss": 2.5742, "step": 306020 }, { "epoch": 13.181720291165956, "learning_rate": 2.5861363097675016e-07, "loss": 2.4122, "step": 306040 }, { "epoch": 13.182581728905543, "learning_rate": 2.5856514900127134e-07, "loss": 2.6164, "step": 306060 }, { "epoch": 13.18344316664513, "learning_rate": 2.585166670257924e-07, "loss": 2.6388, "step": 306080 }, { "epoch": 13.184304604384717, "learning_rate": 2.5846818505031354e-07, "loss": 2.4858, "step": 306100 }, { "epoch": 13.185166042124305, "learning_rate": 2.584197030748346e-07, "loss": 2.6263, "step": 306120 }, { "epoch": 13.186027479863894, "learning_rate": 2.5837122109935573e-07, "loss": 2.6685, "step": 306140 }, { "epoch": 13.186888917603481, "learning_rate": 2.583227391238768e-07, "loss": 2.6296, "step": 306160 }, { "epoch": 13.187750355343068, "learning_rate": 2.58274257148398e-07, "loss": 2.4683, "step": 306180 }, { "epoch": 13.188611793082655, "learning_rate": 2.5822577517291905e-07, "loss": 2.5284, "step": 306200 }, { "epoch": 13.189473230822243, "learning_rate": 2.581772931974401e-07, "loss": 2.7702, "step": 306220 }, { "epoch": 13.19033466856183, "learning_rate": 2.5812881122196125e-07, "loss": 2.708, "step": 306240 }, { "epoch": 13.191196106301417, "learning_rate": 2.580803292464823e-07, "loss": 2.6489, "step": 306260 }, { "epoch": 13.192057544041004, "learning_rate": 2.5803184727100344e-07, "loss": 2.7037, "step": 306280 }, { "epoch": 13.192918981780592, "learning_rate": 2.579833652955245e-07, "loss": 2.488, "step": 306300 }, { "epoch": 13.193780419520179, "learning_rate": 2.579348833200457e-07, "loss": 2.7993, "step": 306320 }, { "epoch": 13.194641857259766, "learning_rate": 2.5788640134456677e-07, "loss": 2.6552, "step": 306340 }, { "epoch": 13.195503294999353, "learning_rate": 2.578379193690879e-07, "loss": 2.5938, "step": 306360 }, { "epoch": 13.19636473273894, "learning_rate": 2.5778943739360896e-07, "loss": 2.7372, "step": 306380 }, { "epoch": 13.197226170478528, "learning_rate": 2.577409554181301e-07, "loss": 2.6503, "step": 306400 }, { "epoch": 13.198087608218117, "learning_rate": 2.5769247344265116e-07, "loss": 2.5986, "step": 306420 }, { "epoch": 13.198949045957704, "learning_rate": 2.576439914671724e-07, "loss": 2.6749, "step": 306440 }, { "epoch": 13.199810483697291, "learning_rate": 2.575955094916934e-07, "loss": 2.5863, "step": 306460 }, { "epoch": 13.200671921436879, "learning_rate": 2.5754702751621453e-07, "loss": 2.53, "step": 306480 }, { "epoch": 13.201533359176466, "learning_rate": 2.574985455407356e-07, "loss": 2.642, "step": 306500 }, { "epoch": 13.202394796916053, "learning_rate": 2.574500635652567e-07, "loss": 2.6451, "step": 306520 }, { "epoch": 13.20325623465564, "learning_rate": 2.574015815897778e-07, "loss": 2.5487, "step": 306540 }, { "epoch": 13.204117672395228, "learning_rate": 2.57353099614299e-07, "loss": 2.6412, "step": 306560 }, { "epoch": 13.204979110134815, "learning_rate": 2.5730461763882005e-07, "loss": 2.6531, "step": 306580 }, { "epoch": 13.205840547874402, "learning_rate": 2.572561356633412e-07, "loss": 2.64, "step": 306600 }, { "epoch": 13.20670198561399, "learning_rate": 2.5720765368786224e-07, "loss": 2.6314, "step": 306620 }, { "epoch": 13.207563423353577, "learning_rate": 2.5715917171238337e-07, "loss": 2.6048, "step": 306640 }, { "epoch": 13.208424861093164, "learning_rate": 2.5711068973690444e-07, "loss": 2.5858, "step": 306660 }, { "epoch": 13.209286298832751, "learning_rate": 2.570622077614256e-07, "loss": 2.5769, "step": 306680 }, { "epoch": 13.21014773657234, "learning_rate": 2.570137257859467e-07, "loss": 2.7004, "step": 306700 }, { "epoch": 13.211009174311927, "learning_rate": 2.5696524381046786e-07, "loss": 2.6033, "step": 306720 }, { "epoch": 13.211870612051515, "learning_rate": 2.569167618349889e-07, "loss": 2.7423, "step": 306740 }, { "epoch": 13.212732049791102, "learning_rate": 2.5686827985950995e-07, "loss": 2.7109, "step": 306760 }, { "epoch": 13.213593487530689, "learning_rate": 2.568197978840311e-07, "loss": 2.5836, "step": 306780 }, { "epoch": 13.214454925270276, "learning_rate": 2.5677131590855215e-07, "loss": 2.6771, "step": 306800 }, { "epoch": 13.215316363009864, "learning_rate": 2.5672283393307333e-07, "loss": 2.6578, "step": 306820 }, { "epoch": 13.21617780074945, "learning_rate": 2.566743519575944e-07, "loss": 2.5415, "step": 306840 }, { "epoch": 13.217039238489038, "learning_rate": 2.566258699821155e-07, "loss": 2.5031, "step": 306860 }, { "epoch": 13.217900676228625, "learning_rate": 2.565773880066366e-07, "loss": 2.458, "step": 306880 }, { "epoch": 13.218762113968213, "learning_rate": 2.565289060311577e-07, "loss": 2.6473, "step": 306900 }, { "epoch": 13.2196235517078, "learning_rate": 2.564804240556788e-07, "loss": 2.7272, "step": 306920 }, { "epoch": 13.220484989447387, "learning_rate": 2.5643194208019997e-07, "loss": 2.5685, "step": 306940 }, { "epoch": 13.221346427186974, "learning_rate": 2.5638346010472104e-07, "loss": 2.6429, "step": 306960 }, { "epoch": 13.222207864926563, "learning_rate": 2.563349781292422e-07, "loss": 2.6118, "step": 306980 }, { "epoch": 13.22306930266615, "learning_rate": 2.5628649615376324e-07, "loss": 2.5783, "step": 307000 }, { "epoch": 13.223930740405738, "learning_rate": 2.562380141782844e-07, "loss": 2.5645, "step": 307020 }, { "epoch": 13.224792178145325, "learning_rate": 2.5618953220280543e-07, "loss": 2.56, "step": 307040 }, { "epoch": 13.225653615884912, "learning_rate": 2.561410502273266e-07, "loss": 2.7697, "step": 307060 }, { "epoch": 13.2265150536245, "learning_rate": 2.560925682518477e-07, "loss": 2.5968, "step": 307080 }, { "epoch": 13.227376491364087, "learning_rate": 2.5604408627636886e-07, "loss": 2.5923, "step": 307100 }, { "epoch": 13.228237929103674, "learning_rate": 2.559956043008899e-07, "loss": 2.7173, "step": 307120 }, { "epoch": 13.229099366843261, "learning_rate": 2.5594712232541105e-07, "loss": 2.6259, "step": 307140 }, { "epoch": 13.229960804582849, "learning_rate": 2.5589864034993207e-07, "loss": 2.7521, "step": 307160 }, { "epoch": 13.230822242322436, "learning_rate": 2.5585015837445325e-07, "loss": 2.8089, "step": 307180 }, { "epoch": 13.231683680062023, "learning_rate": 2.558016763989743e-07, "loss": 2.4875, "step": 307200 }, { "epoch": 13.23254511780161, "learning_rate": 2.557531944234955e-07, "loss": 2.5141, "step": 307220 }, { "epoch": 13.233406555541197, "learning_rate": 2.5570471244801657e-07, "loss": 2.6736, "step": 307240 }, { "epoch": 13.234267993280785, "learning_rate": 2.556562304725376e-07, "loss": 2.5106, "step": 307260 }, { "epoch": 13.235129431020374, "learning_rate": 2.556077484970587e-07, "loss": 2.5701, "step": 307280 }, { "epoch": 13.235990868759961, "learning_rate": 2.555592665215798e-07, "loss": 2.5772, "step": 307300 }, { "epoch": 13.236852306499548, "learning_rate": 2.5551078454610096e-07, "loss": 2.6105, "step": 307320 }, { "epoch": 13.237713744239135, "learning_rate": 2.5546230257062203e-07, "loss": 2.5223, "step": 307340 }, { "epoch": 13.238575181978723, "learning_rate": 2.554138205951432e-07, "loss": 2.5373, "step": 307360 }, { "epoch": 13.23943661971831, "learning_rate": 2.5536533861966423e-07, "loss": 2.5587, "step": 307380 }, { "epoch": 13.240298057457897, "learning_rate": 2.553168566441854e-07, "loss": 2.6929, "step": 307400 }, { "epoch": 13.241159495197484, "learning_rate": 2.552683746687064e-07, "loss": 2.5897, "step": 307420 }, { "epoch": 13.242020932937072, "learning_rate": 2.552198926932276e-07, "loss": 2.7799, "step": 307440 }, { "epoch": 13.242882370676659, "learning_rate": 2.551714107177487e-07, "loss": 2.7503, "step": 307460 }, { "epoch": 13.243743808416246, "learning_rate": 2.5512292874226985e-07, "loss": 2.6107, "step": 307480 }, { "epoch": 13.244605246155833, "learning_rate": 2.550744467667909e-07, "loss": 2.7337, "step": 307500 }, { "epoch": 13.24546668389542, "learning_rate": 2.5502596479131205e-07, "loss": 2.6188, "step": 307520 }, { "epoch": 13.24632812163501, "learning_rate": 2.5497748281583307e-07, "loss": 2.5774, "step": 307540 }, { "epoch": 13.247189559374597, "learning_rate": 2.5492900084035424e-07, "loss": 2.6083, "step": 307560 }, { "epoch": 13.248050997114184, "learning_rate": 2.548805188648753e-07, "loss": 2.7326, "step": 307580 }, { "epoch": 13.248912434853771, "learning_rate": 2.548320368893965e-07, "loss": 2.7897, "step": 307600 }, { "epoch": 13.249773872593359, "learning_rate": 2.5478355491391756e-07, "loss": 2.8053, "step": 307620 }, { "epoch": 13.250635310332946, "learning_rate": 2.547350729384387e-07, "loss": 2.6573, "step": 307640 }, { "epoch": 13.251496748072533, "learning_rate": 2.546865909629597e-07, "loss": 2.498, "step": 307660 }, { "epoch": 13.25235818581212, "learning_rate": 2.546381089874809e-07, "loss": 2.572, "step": 307680 }, { "epoch": 13.253219623551708, "learning_rate": 2.545896270120019e-07, "loss": 2.6217, "step": 307700 }, { "epoch": 13.254081061291295, "learning_rate": 2.5454114503652313e-07, "loss": 2.575, "step": 307720 }, { "epoch": 13.254942499030882, "learning_rate": 2.544926630610442e-07, "loss": 2.7143, "step": 307740 }, { "epoch": 13.25580393677047, "learning_rate": 2.5444418108556533e-07, "loss": 2.6312, "step": 307760 }, { "epoch": 13.256665374510057, "learning_rate": 2.543956991100864e-07, "loss": 2.5298, "step": 307780 }, { "epoch": 13.257526812249644, "learning_rate": 2.543472171346074e-07, "loss": 2.6041, "step": 307800 }, { "epoch": 13.258388249989231, "learning_rate": 2.542987351591286e-07, "loss": 2.5693, "step": 307820 }, { "epoch": 13.25924968772882, "learning_rate": 2.5425025318364967e-07, "loss": 2.6481, "step": 307840 }, { "epoch": 13.260111125468407, "learning_rate": 2.5420177120817085e-07, "loss": 2.5942, "step": 307860 }, { "epoch": 13.260972563207995, "learning_rate": 2.541532892326919e-07, "loss": 2.5565, "step": 307880 }, { "epoch": 13.261834000947582, "learning_rate": 2.5410480725721304e-07, "loss": 2.5537, "step": 307900 }, { "epoch": 13.26269543868717, "learning_rate": 2.5405632528173406e-07, "loss": 2.6801, "step": 307920 }, { "epoch": 13.263556876426756, "learning_rate": 2.5400784330625524e-07, "loss": 2.72, "step": 307940 }, { "epoch": 13.264418314166344, "learning_rate": 2.539593613307763e-07, "loss": 2.4685, "step": 307960 }, { "epoch": 13.265279751905931, "learning_rate": 2.539108793552975e-07, "loss": 2.7236, "step": 307980 }, { "epoch": 13.266141189645518, "learning_rate": 2.5386239737981856e-07, "loss": 2.6655, "step": 308000 }, { "epoch": 13.267002627385105, "learning_rate": 2.538139154043397e-07, "loss": 2.6397, "step": 308020 }, { "epoch": 13.267864065124693, "learning_rate": 2.5376543342886075e-07, "loss": 2.6042, "step": 308040 }, { "epoch": 13.26872550286428, "learning_rate": 2.537169514533819e-07, "loss": 2.7883, "step": 308060 }, { "epoch": 13.269586940603867, "learning_rate": 2.5366846947790295e-07, "loss": 2.6145, "step": 308080 }, { "epoch": 13.270448378343456, "learning_rate": 2.5361998750242413e-07, "loss": 2.5496, "step": 308100 }, { "epoch": 13.271309816083043, "learning_rate": 2.535715055269452e-07, "loss": 2.6875, "step": 308120 }, { "epoch": 13.27217125382263, "learning_rate": 2.535230235514663e-07, "loss": 2.7115, "step": 308140 }, { "epoch": 13.273032691562218, "learning_rate": 2.534745415759874e-07, "loss": 2.6078, "step": 308160 }, { "epoch": 13.273894129301805, "learning_rate": 2.534260596005085e-07, "loss": 2.5669, "step": 308180 }, { "epoch": 13.274755567041392, "learning_rate": 2.533775776250296e-07, "loss": 2.5691, "step": 308200 }, { "epoch": 13.27561700478098, "learning_rate": 2.533290956495508e-07, "loss": 2.5338, "step": 308220 }, { "epoch": 13.276478442520567, "learning_rate": 2.5328061367407184e-07, "loss": 2.7222, "step": 308240 }, { "epoch": 13.277339880260154, "learning_rate": 2.5323213169859296e-07, "loss": 2.6525, "step": 308260 }, { "epoch": 13.278201317999741, "learning_rate": 2.5318364972311404e-07, "loss": 2.5824, "step": 308280 }, { "epoch": 13.279062755739329, "learning_rate": 2.5313516774763516e-07, "loss": 2.5384, "step": 308300 }, { "epoch": 13.279924193478916, "learning_rate": 2.5308668577215623e-07, "loss": 2.6637, "step": 308320 }, { "epoch": 13.280785631218503, "learning_rate": 2.530382037966773e-07, "loss": 2.5735, "step": 308340 }, { "epoch": 13.28164706895809, "learning_rate": 2.529897218211985e-07, "loss": 2.5317, "step": 308360 }, { "epoch": 13.282508506697678, "learning_rate": 2.529412398457196e-07, "loss": 2.5293, "step": 308380 }, { "epoch": 13.283369944437267, "learning_rate": 2.528927578702407e-07, "loss": 2.4983, "step": 308400 }, { "epoch": 13.284231382176854, "learning_rate": 2.5284427589476175e-07, "loss": 2.439, "step": 308420 }, { "epoch": 13.285092819916441, "learning_rate": 2.5279579391928287e-07, "loss": 2.5472, "step": 308440 }, { "epoch": 13.285954257656028, "learning_rate": 2.5274731194380394e-07, "loss": 2.5931, "step": 308460 }, { "epoch": 13.286815695395616, "learning_rate": 2.526988299683251e-07, "loss": 2.6459, "step": 308480 }, { "epoch": 13.287677133135203, "learning_rate": 2.526503479928462e-07, "loss": 2.5263, "step": 308500 }, { "epoch": 13.28853857087479, "learning_rate": 2.526018660173673e-07, "loss": 2.6128, "step": 308520 }, { "epoch": 13.289400008614377, "learning_rate": 2.525533840418884e-07, "loss": 2.6853, "step": 308540 }, { "epoch": 13.290261446353965, "learning_rate": 2.525049020664095e-07, "loss": 2.7502, "step": 308560 }, { "epoch": 13.291122884093552, "learning_rate": 2.524564200909306e-07, "loss": 2.5823, "step": 308580 }, { "epoch": 13.291984321833139, "learning_rate": 2.5240793811545176e-07, "loss": 2.4266, "step": 308600 }, { "epoch": 13.292845759572726, "learning_rate": 2.5235945613997283e-07, "loss": 2.5653, "step": 308620 }, { "epoch": 13.293707197312314, "learning_rate": 2.5231097416449396e-07, "loss": 2.7127, "step": 308640 }, { "epoch": 13.2945686350519, "learning_rate": 2.5226249218901503e-07, "loss": 2.5315, "step": 308660 }, { "epoch": 13.29543007279149, "learning_rate": 2.5221401021353615e-07, "loss": 2.6033, "step": 308680 }, { "epoch": 13.296291510531077, "learning_rate": 2.521655282380572e-07, "loss": 2.5222, "step": 308700 }, { "epoch": 13.297152948270664, "learning_rate": 2.521170462625784e-07, "loss": 2.5947, "step": 308720 }, { "epoch": 13.298014386010252, "learning_rate": 2.520685642870995e-07, "loss": 2.5283, "step": 308740 }, { "epoch": 13.298875823749839, "learning_rate": 2.5202008231162065e-07, "loss": 2.7919, "step": 308760 }, { "epoch": 13.299737261489426, "learning_rate": 2.5197160033614167e-07, "loss": 2.5789, "step": 308780 }, { "epoch": 13.300598699229013, "learning_rate": 2.5192311836066285e-07, "loss": 2.55, "step": 308800 }, { "epoch": 13.3014601369686, "learning_rate": 2.5187463638518387e-07, "loss": 2.6373, "step": 308820 }, { "epoch": 13.302321574708188, "learning_rate": 2.5182615440970504e-07, "loss": 2.5767, "step": 308840 }, { "epoch": 13.303183012447775, "learning_rate": 2.517776724342261e-07, "loss": 2.602, "step": 308860 }, { "epoch": 13.304044450187362, "learning_rate": 2.517291904587472e-07, "loss": 2.6467, "step": 308880 }, { "epoch": 13.30490588792695, "learning_rate": 2.516807084832683e-07, "loss": 2.6796, "step": 308900 }, { "epoch": 13.305767325666537, "learning_rate": 2.516322265077894e-07, "loss": 2.4592, "step": 308920 }, { "epoch": 13.306628763406124, "learning_rate": 2.515837445323105e-07, "loss": 2.5789, "step": 308940 }, { "epoch": 13.307490201145713, "learning_rate": 2.515352625568315e-07, "loss": 2.5778, "step": 308960 }, { "epoch": 13.3083516388853, "learning_rate": 2.5148678058135276e-07, "loss": 2.6608, "step": 308980 }, { "epoch": 13.309213076624888, "learning_rate": 2.5143829860587383e-07, "loss": 2.6027, "step": 309000 }, { "epoch": 13.310074514364475, "learning_rate": 2.51389816630395e-07, "loss": 2.6352, "step": 309020 }, { "epoch": 13.310935952104062, "learning_rate": 2.51341334654916e-07, "loss": 2.6092, "step": 309040 }, { "epoch": 13.31179738984365, "learning_rate": 2.5129285267943715e-07, "loss": 2.6243, "step": 309060 }, { "epoch": 13.312658827583236, "learning_rate": 2.512443707039582e-07, "loss": 2.7477, "step": 309080 }, { "epoch": 13.313520265322824, "learning_rate": 2.511958887284794e-07, "loss": 2.6252, "step": 309100 }, { "epoch": 13.314381703062411, "learning_rate": 2.5114740675300047e-07, "loss": 2.6107, "step": 309120 }, { "epoch": 13.315243140801998, "learning_rate": 2.5109892477752164e-07, "loss": 2.6659, "step": 309140 }, { "epoch": 13.316104578541585, "learning_rate": 2.5105044280204266e-07, "loss": 2.5502, "step": 309160 }, { "epoch": 13.316966016281173, "learning_rate": 2.5100196082656384e-07, "loss": 2.6917, "step": 309180 }, { "epoch": 13.31782745402076, "learning_rate": 2.5095347885108486e-07, "loss": 2.6354, "step": 309200 }, { "epoch": 13.318688891760347, "learning_rate": 2.5090499687560604e-07, "loss": 2.5776, "step": 309220 }, { "epoch": 13.319550329499936, "learning_rate": 2.508565149001271e-07, "loss": 2.5769, "step": 309240 }, { "epoch": 13.320411767239523, "learning_rate": 2.508080329246483e-07, "loss": 2.6866, "step": 309260 }, { "epoch": 13.32127320497911, "learning_rate": 2.5075955094916936e-07, "loss": 2.6129, "step": 309280 }, { "epoch": 13.322134642718698, "learning_rate": 2.507110689736905e-07, "loss": 2.5801, "step": 309300 }, { "epoch": 13.322996080458285, "learning_rate": 2.506625869982115e-07, "loss": 2.6647, "step": 309320 }, { "epoch": 13.323857518197872, "learning_rate": 2.506141050227327e-07, "loss": 2.6922, "step": 309340 }, { "epoch": 13.32471895593746, "learning_rate": 2.5056562304725375e-07, "loss": 2.5258, "step": 309360 }, { "epoch": 13.325580393677047, "learning_rate": 2.505171410717749e-07, "loss": 2.5408, "step": 309380 }, { "epoch": 13.326441831416634, "learning_rate": 2.50468659096296e-07, "loss": 2.7358, "step": 309400 }, { "epoch": 13.327303269156221, "learning_rate": 2.50420177120817e-07, "loss": 2.8785, "step": 309420 }, { "epoch": 13.328164706895809, "learning_rate": 2.5037169514533814e-07, "loss": 2.5786, "step": 309440 }, { "epoch": 13.329026144635396, "learning_rate": 2.503232131698592e-07, "loss": 2.5491, "step": 309460 }, { "epoch": 13.329887582374983, "learning_rate": 2.5027473119438034e-07, "loss": 2.7053, "step": 309480 }, { "epoch": 13.33074902011457, "learning_rate": 2.5022624921890146e-07, "loss": 2.7644, "step": 309500 }, { "epoch": 13.33161045785416, "learning_rate": 2.5017776724342264e-07, "loss": 2.5879, "step": 309520 }, { "epoch": 13.332471895593747, "learning_rate": 2.5012928526794366e-07, "loss": 2.6265, "step": 309540 }, { "epoch": 13.333333333333334, "learning_rate": 2.5008080329246483e-07, "loss": 2.8365, "step": 309560 }, { "epoch": 13.334194771072921, "learning_rate": 2.5003232131698585e-07, "loss": 2.6719, "step": 309580 }, { "epoch": 13.335056208812508, "learning_rate": 2.4998383934150703e-07, "loss": 2.6803, "step": 309600 }, { "epoch": 13.335917646552096, "learning_rate": 2.499353573660281e-07, "loss": 2.6457, "step": 309620 }, { "epoch": 13.336779084291683, "learning_rate": 2.498868753905493e-07, "loss": 2.8114, "step": 309640 }, { "epoch": 13.33764052203127, "learning_rate": 2.4983839341507035e-07, "loss": 2.8209, "step": 309660 }, { "epoch": 13.338501959770857, "learning_rate": 2.497899114395915e-07, "loss": 2.7457, "step": 309680 }, { "epoch": 13.339363397510445, "learning_rate": 2.497414294641125e-07, "loss": 2.6467, "step": 309700 }, { "epoch": 13.340224835250032, "learning_rate": 2.4969294748863367e-07, "loss": 2.5921, "step": 309720 }, { "epoch": 13.34108627298962, "learning_rate": 2.4964446551315474e-07, "loss": 2.6578, "step": 309740 }, { "epoch": 13.341947710729206, "learning_rate": 2.495959835376759e-07, "loss": 2.5365, "step": 309760 }, { "epoch": 13.342809148468794, "learning_rate": 2.49547501562197e-07, "loss": 2.5924, "step": 309780 }, { "epoch": 13.343670586208383, "learning_rate": 2.494990195867181e-07, "loss": 2.6519, "step": 309800 }, { "epoch": 13.34453202394797, "learning_rate": 2.494505376112392e-07, "loss": 2.4883, "step": 309820 }, { "epoch": 13.345393461687557, "learning_rate": 2.494020556357603e-07, "loss": 2.5973, "step": 309840 }, { "epoch": 13.346254899427144, "learning_rate": 2.493535736602814e-07, "loss": 2.4793, "step": 309860 }, { "epoch": 13.347116337166732, "learning_rate": 2.4930509168480256e-07, "loss": 2.5769, "step": 309880 }, { "epoch": 13.347977774906319, "learning_rate": 2.4925660970932363e-07, "loss": 2.6924, "step": 309900 }, { "epoch": 13.348839212645906, "learning_rate": 2.4920812773384476e-07, "loss": 2.5114, "step": 309920 }, { "epoch": 13.349700650385493, "learning_rate": 2.4915964575836583e-07, "loss": 2.4798, "step": 309940 }, { "epoch": 13.35056208812508, "learning_rate": 2.4911116378288685e-07, "loss": 2.6611, "step": 309960 }, { "epoch": 13.351423525864668, "learning_rate": 2.49062681807408e-07, "loss": 2.7054, "step": 309980 }, { "epoch": 13.352284963604255, "learning_rate": 2.490141998319291e-07, "loss": 2.5479, "step": 310000 }, { "epoch": 13.353146401343842, "learning_rate": 2.4896571785645027e-07, "loss": 2.7308, "step": 310020 }, { "epoch": 13.35400783908343, "learning_rate": 2.4891723588097134e-07, "loss": 2.6109, "step": 310040 }, { "epoch": 13.354869276823017, "learning_rate": 2.4886875390549247e-07, "loss": 2.5903, "step": 310060 }, { "epoch": 13.355730714562604, "learning_rate": 2.488202719300135e-07, "loss": 2.6304, "step": 310080 }, { "epoch": 13.356592152302193, "learning_rate": 2.4877178995453466e-07, "loss": 2.6033, "step": 310100 }, { "epoch": 13.35745359004178, "learning_rate": 2.4872330797905574e-07, "loss": 2.6583, "step": 310120 }, { "epoch": 13.358315027781368, "learning_rate": 2.486748260035769e-07, "loss": 2.5792, "step": 310140 }, { "epoch": 13.359176465520955, "learning_rate": 2.48626344028098e-07, "loss": 2.3862, "step": 310160 }, { "epoch": 13.360037903260542, "learning_rate": 2.485778620526191e-07, "loss": 2.6694, "step": 310180 }, { "epoch": 13.36089934100013, "learning_rate": 2.485293800771402e-07, "loss": 2.6157, "step": 310200 }, { "epoch": 13.361760778739717, "learning_rate": 2.484808981016613e-07, "loss": 2.5402, "step": 310220 }, { "epoch": 13.362622216479304, "learning_rate": 2.484324161261824e-07, "loss": 2.755, "step": 310240 }, { "epoch": 13.363483654218891, "learning_rate": 2.4838393415070355e-07, "loss": 2.6288, "step": 310260 }, { "epoch": 13.364345091958478, "learning_rate": 2.483354521752246e-07, "loss": 2.7295, "step": 310280 }, { "epoch": 13.365206529698066, "learning_rate": 2.4828697019974575e-07, "loss": 2.5844, "step": 310300 }, { "epoch": 13.366067967437653, "learning_rate": 2.482384882242668e-07, "loss": 2.7444, "step": 310320 }, { "epoch": 13.36692940517724, "learning_rate": 2.4819000624878795e-07, "loss": 2.6125, "step": 310340 }, { "epoch": 13.367790842916829, "learning_rate": 2.48141524273309e-07, "loss": 2.6339, "step": 310360 }, { "epoch": 13.368652280656416, "learning_rate": 2.480930422978302e-07, "loss": 2.7418, "step": 310380 }, { "epoch": 13.369513718396004, "learning_rate": 2.4804456032235127e-07, "loss": 2.6589, "step": 310400 }, { "epoch": 13.37037515613559, "learning_rate": 2.4799607834687244e-07, "loss": 2.5761, "step": 310420 }, { "epoch": 13.371236593875178, "learning_rate": 2.4794759637139346e-07, "loss": 2.5038, "step": 310440 }, { "epoch": 13.372098031614765, "learning_rate": 2.478991143959146e-07, "loss": 2.5175, "step": 310460 }, { "epoch": 13.372959469354353, "learning_rate": 2.4785063242043566e-07, "loss": 2.6397, "step": 310480 }, { "epoch": 13.37382090709394, "learning_rate": 2.4780215044495673e-07, "loss": 2.5658, "step": 310500 }, { "epoch": 13.374682344833527, "learning_rate": 2.477536684694779e-07, "loss": 2.6733, "step": 310520 }, { "epoch": 13.375543782573114, "learning_rate": 2.47705186493999e-07, "loss": 2.5706, "step": 310540 }, { "epoch": 13.376405220312702, "learning_rate": 2.476567045185201e-07, "loss": 2.8008, "step": 310560 }, { "epoch": 13.377266658052289, "learning_rate": 2.476082225430412e-07, "loss": 2.4957, "step": 310580 }, { "epoch": 13.378128095791876, "learning_rate": 2.475597405675623e-07, "loss": 2.7782, "step": 310600 }, { "epoch": 13.378989533531463, "learning_rate": 2.4751125859208337e-07, "loss": 2.7666, "step": 310620 }, { "epoch": 13.37985097127105, "learning_rate": 2.4746277661660455e-07, "loss": 2.5153, "step": 310640 }, { "epoch": 13.38071240901064, "learning_rate": 2.474142946411256e-07, "loss": 2.6852, "step": 310660 }, { "epoch": 13.381573846750227, "learning_rate": 2.4736581266564674e-07, "loss": 2.4995, "step": 310680 }, { "epoch": 13.382435284489814, "learning_rate": 2.473173306901678e-07, "loss": 2.5206, "step": 310700 }, { "epoch": 13.383296722229401, "learning_rate": 2.4726884871468894e-07, "loss": 2.549, "step": 310720 }, { "epoch": 13.384158159968988, "learning_rate": 2.4722036673921e-07, "loss": 2.5346, "step": 310740 }, { "epoch": 13.385019597708576, "learning_rate": 2.471718847637312e-07, "loss": 2.6787, "step": 310760 }, { "epoch": 13.385881035448163, "learning_rate": 2.4712340278825226e-07, "loss": 2.5822, "step": 310780 }, { "epoch": 13.38674247318775, "learning_rate": 2.4707492081277344e-07, "loss": 2.6649, "step": 310800 }, { "epoch": 13.387603910927337, "learning_rate": 2.4702643883729446e-07, "loss": 2.688, "step": 310820 }, { "epoch": 13.388465348666925, "learning_rate": 2.469779568618156e-07, "loss": 2.582, "step": 310840 }, { "epoch": 13.389326786406512, "learning_rate": 2.4692947488633665e-07, "loss": 2.656, "step": 310860 }, { "epoch": 13.3901882241461, "learning_rate": 2.4688099291085783e-07, "loss": 2.5511, "step": 310880 }, { "epoch": 13.391049661885686, "learning_rate": 2.468325109353789e-07, "loss": 2.5403, "step": 310900 }, { "epoch": 13.391911099625275, "learning_rate": 2.467840289599001e-07, "loss": 2.7293, "step": 310920 }, { "epoch": 13.392772537364863, "learning_rate": 2.467355469844211e-07, "loss": 2.6078, "step": 310940 }, { "epoch": 13.39363397510445, "learning_rate": 2.466870650089423e-07, "loss": 2.5595, "step": 310960 }, { "epoch": 13.394495412844037, "learning_rate": 2.466385830334633e-07, "loss": 2.5329, "step": 310980 }, { "epoch": 13.395356850583624, "learning_rate": 2.4659010105798447e-07, "loss": 2.5606, "step": 311000 }, { "epoch": 13.396218288323212, "learning_rate": 2.4654161908250554e-07, "loss": 2.4948, "step": 311020 }, { "epoch": 13.397079726062799, "learning_rate": 2.464931371070266e-07, "loss": 2.6074, "step": 311040 }, { "epoch": 13.397941163802386, "learning_rate": 2.464446551315478e-07, "loss": 2.6243, "step": 311060 }, { "epoch": 13.398802601541973, "learning_rate": 2.463961731560688e-07, "loss": 2.7304, "step": 311080 }, { "epoch": 13.39966403928156, "learning_rate": 2.4634769118058993e-07, "loss": 2.6246, "step": 311100 }, { "epoch": 13.400525477021148, "learning_rate": 2.46299209205111e-07, "loss": 2.6427, "step": 311120 }, { "epoch": 13.401386914760735, "learning_rate": 2.462507272296322e-07, "loss": 2.7194, "step": 311140 }, { "epoch": 13.402248352500322, "learning_rate": 2.4620224525415325e-07, "loss": 2.6458, "step": 311160 }, { "epoch": 13.40310979023991, "learning_rate": 2.4615376327867443e-07, "loss": 2.6192, "step": 311180 }, { "epoch": 13.403971227979497, "learning_rate": 2.4610528130319545e-07, "loss": 2.743, "step": 311200 }, { "epoch": 13.404832665719086, "learning_rate": 2.460567993277166e-07, "loss": 2.5398, "step": 311220 }, { "epoch": 13.405694103458673, "learning_rate": 2.4600831735223765e-07, "loss": 2.7562, "step": 311240 }, { "epoch": 13.40655554119826, "learning_rate": 2.459598353767588e-07, "loss": 2.5712, "step": 311260 }, { "epoch": 13.407416978937848, "learning_rate": 2.459113534012799e-07, "loss": 2.6123, "step": 311280 }, { "epoch": 13.408278416677435, "learning_rate": 2.4586287142580107e-07, "loss": 2.6448, "step": 311300 }, { "epoch": 13.409139854417022, "learning_rate": 2.458143894503221e-07, "loss": 2.6371, "step": 311320 }, { "epoch": 13.41000129215661, "learning_rate": 2.4576590747484327e-07, "loss": 2.6925, "step": 311340 }, { "epoch": 13.410862729896197, "learning_rate": 2.457174254993643e-07, "loss": 2.5064, "step": 311360 }, { "epoch": 13.411724167635784, "learning_rate": 2.4566894352388546e-07, "loss": 2.4588, "step": 311380 }, { "epoch": 13.412585605375371, "learning_rate": 2.4562046154840653e-07, "loss": 2.522, "step": 311400 }, { "epoch": 13.413447043114958, "learning_rate": 2.455719795729277e-07, "loss": 2.7448, "step": 311420 }, { "epoch": 13.414308480854546, "learning_rate": 2.455234975974488e-07, "loss": 2.6814, "step": 311440 }, { "epoch": 13.415169918594133, "learning_rate": 2.454750156219699e-07, "loss": 2.5064, "step": 311460 }, { "epoch": 13.41603135633372, "learning_rate": 2.4542653364649093e-07, "loss": 2.764, "step": 311480 }, { "epoch": 13.41689279407331, "learning_rate": 2.453780516710121e-07, "loss": 2.6298, "step": 311500 }, { "epoch": 13.417754231812896, "learning_rate": 2.453295696955332e-07, "loss": 2.7895, "step": 311520 }, { "epoch": 13.418615669552484, "learning_rate": 2.4528108772005435e-07, "loss": 2.8188, "step": 311540 }, { "epoch": 13.41947710729207, "learning_rate": 2.452326057445754e-07, "loss": 2.5353, "step": 311560 }, { "epoch": 13.420338545031658, "learning_rate": 2.4518412376909644e-07, "loss": 2.7352, "step": 311580 }, { "epoch": 13.421199982771245, "learning_rate": 2.451356417936176e-07, "loss": 2.6017, "step": 311600 }, { "epoch": 13.422061420510833, "learning_rate": 2.4508715981813864e-07, "loss": 2.6726, "step": 311620 }, { "epoch": 13.42292285825042, "learning_rate": 2.450386778426598e-07, "loss": 2.5684, "step": 311640 }, { "epoch": 13.423784295990007, "learning_rate": 2.449901958671809e-07, "loss": 2.6366, "step": 311660 }, { "epoch": 13.424645733729594, "learning_rate": 2.4494171389170207e-07, "loss": 2.5527, "step": 311680 }, { "epoch": 13.425507171469182, "learning_rate": 2.4489323191622314e-07, "loss": 2.6648, "step": 311700 }, { "epoch": 13.426368609208769, "learning_rate": 2.4484474994074426e-07, "loss": 2.6971, "step": 311720 }, { "epoch": 13.427230046948356, "learning_rate": 2.447962679652653e-07, "loss": 2.5396, "step": 311740 }, { "epoch": 13.428091484687943, "learning_rate": 2.4474778598978646e-07, "loss": 2.7001, "step": 311760 }, { "epoch": 13.428952922427532, "learning_rate": 2.4469930401430753e-07, "loss": 2.5513, "step": 311780 }, { "epoch": 13.42981436016712, "learning_rate": 2.446508220388287e-07, "loss": 2.6997, "step": 311800 }, { "epoch": 13.430675797906707, "learning_rate": 2.446023400633498e-07, "loss": 2.6252, "step": 311820 }, { "epoch": 13.431537235646294, "learning_rate": 2.445538580878709e-07, "loss": 2.5889, "step": 311840 }, { "epoch": 13.432398673385881, "learning_rate": 2.445053761123919e-07, "loss": 2.48, "step": 311860 }, { "epoch": 13.433260111125469, "learning_rate": 2.444568941369131e-07, "loss": 2.5923, "step": 311880 }, { "epoch": 13.434121548865056, "learning_rate": 2.4440841216143417e-07, "loss": 2.6949, "step": 311900 }, { "epoch": 13.434982986604643, "learning_rate": 2.4435993018595535e-07, "loss": 2.5442, "step": 311920 }, { "epoch": 13.43584442434423, "learning_rate": 2.443114482104764e-07, "loss": 2.684, "step": 311940 }, { "epoch": 13.436705862083818, "learning_rate": 2.4426296623499754e-07, "loss": 2.6743, "step": 311960 }, { "epoch": 13.437567299823405, "learning_rate": 2.442144842595186e-07, "loss": 2.6552, "step": 311980 }, { "epoch": 13.438428737562992, "learning_rate": 2.4416600228403974e-07, "loss": 2.5452, "step": 312000 }, { "epoch": 13.43929017530258, "learning_rate": 2.441175203085608e-07, "loss": 2.6209, "step": 312020 }, { "epoch": 13.440151613042167, "learning_rate": 2.44069038333082e-07, "loss": 2.5455, "step": 312040 }, { "epoch": 13.441013050781756, "learning_rate": 2.4402055635760306e-07, "loss": 2.6212, "step": 312060 }, { "epoch": 13.441874488521343, "learning_rate": 2.439720743821242e-07, "loss": 2.7076, "step": 312080 }, { "epoch": 13.44273592626093, "learning_rate": 2.4392359240664525e-07, "loss": 2.5035, "step": 312100 }, { "epoch": 13.443597364000517, "learning_rate": 2.4387511043116627e-07, "loss": 2.7596, "step": 312120 }, { "epoch": 13.444458801740105, "learning_rate": 2.4382662845568745e-07, "loss": 2.6602, "step": 312140 }, { "epoch": 13.445320239479692, "learning_rate": 2.437781464802085e-07, "loss": 2.5853, "step": 312160 }, { "epoch": 13.446181677219279, "learning_rate": 2.437296645047297e-07, "loss": 2.627, "step": 312180 }, { "epoch": 13.447043114958866, "learning_rate": 2.4368118252925077e-07, "loss": 2.7397, "step": 312200 }, { "epoch": 13.447904552698454, "learning_rate": 2.436327005537719e-07, "loss": 2.6324, "step": 312220 }, { "epoch": 13.44876599043804, "learning_rate": 2.4358421857829297e-07, "loss": 2.5756, "step": 312240 }, { "epoch": 13.449627428177628, "learning_rate": 2.435357366028141e-07, "loss": 2.761, "step": 312260 }, { "epoch": 13.450488865917215, "learning_rate": 2.4348725462733516e-07, "loss": 2.5825, "step": 312280 }, { "epoch": 13.451350303656803, "learning_rate": 2.4343877265185634e-07, "loss": 2.5376, "step": 312300 }, { "epoch": 13.45221174139639, "learning_rate": 2.433902906763774e-07, "loss": 2.5674, "step": 312320 }, { "epoch": 13.453073179135979, "learning_rate": 2.4334180870089854e-07, "loss": 2.5509, "step": 312340 }, { "epoch": 13.453934616875566, "learning_rate": 2.432933267254196e-07, "loss": 2.5194, "step": 312360 }, { "epoch": 13.454796054615153, "learning_rate": 2.4324484474994073e-07, "loss": 2.46, "step": 312380 }, { "epoch": 13.45565749235474, "learning_rate": 2.431963627744618e-07, "loss": 2.5683, "step": 312400 }, { "epoch": 13.456518930094328, "learning_rate": 2.43147880798983e-07, "loss": 2.6261, "step": 312420 }, { "epoch": 13.457380367833915, "learning_rate": 2.4309939882350405e-07, "loss": 2.5832, "step": 312440 }, { "epoch": 13.458241805573502, "learning_rate": 2.430509168480252e-07, "loss": 2.798, "step": 312460 }, { "epoch": 13.45910324331309, "learning_rate": 2.4300243487254625e-07, "loss": 2.5235, "step": 312480 }, { "epoch": 13.459964681052677, "learning_rate": 2.4295395289706737e-07, "loss": 2.5215, "step": 312500 }, { "epoch": 13.460826118792264, "learning_rate": 2.4290547092158844e-07, "loss": 2.6091, "step": 312520 }, { "epoch": 13.461687556531851, "learning_rate": 2.428569889461096e-07, "loss": 2.6526, "step": 312540 }, { "epoch": 13.462548994271438, "learning_rate": 2.428085069706307e-07, "loss": 2.6182, "step": 312560 }, { "epoch": 13.463410432011026, "learning_rate": 2.4276002499515187e-07, "loss": 2.687, "step": 312580 }, { "epoch": 13.464271869750613, "learning_rate": 2.427115430196729e-07, "loss": 2.6361, "step": 312600 }, { "epoch": 13.465133307490202, "learning_rate": 2.42663061044194e-07, "loss": 2.6645, "step": 312620 }, { "epoch": 13.46599474522979, "learning_rate": 2.426145790687151e-07, "loss": 2.7155, "step": 312640 }, { "epoch": 13.466856182969376, "learning_rate": 2.4256609709323616e-07, "loss": 2.5914, "step": 312660 }, { "epoch": 13.467717620708964, "learning_rate": 2.4251761511775733e-07, "loss": 2.4742, "step": 312680 }, { "epoch": 13.468579058448551, "learning_rate": 2.424691331422784e-07, "loss": 2.6718, "step": 312700 }, { "epoch": 13.469440496188138, "learning_rate": 2.4242065116679953e-07, "loss": 2.6461, "step": 312720 }, { "epoch": 13.470301933927725, "learning_rate": 2.423721691913206e-07, "loss": 2.6569, "step": 312740 }, { "epoch": 13.471163371667313, "learning_rate": 2.423236872158417e-07, "loss": 2.609, "step": 312760 }, { "epoch": 13.4720248094069, "learning_rate": 2.422752052403628e-07, "loss": 2.5955, "step": 312780 }, { "epoch": 13.472886247146487, "learning_rate": 2.42226723264884e-07, "loss": 2.7272, "step": 312800 }, { "epoch": 13.473747684886074, "learning_rate": 2.4217824128940505e-07, "loss": 2.6852, "step": 312820 }, { "epoch": 13.474609122625662, "learning_rate": 2.421297593139262e-07, "loss": 2.5584, "step": 312840 }, { "epoch": 13.475470560365249, "learning_rate": 2.4208127733844724e-07, "loss": 2.6021, "step": 312860 }, { "epoch": 13.476331998104836, "learning_rate": 2.4203279536296837e-07, "loss": 2.5985, "step": 312880 }, { "epoch": 13.477193435844425, "learning_rate": 2.4198431338748944e-07, "loss": 2.697, "step": 312900 }, { "epoch": 13.478054873584012, "learning_rate": 2.419358314120106e-07, "loss": 2.8035, "step": 312920 }, { "epoch": 13.4789163113236, "learning_rate": 2.418873494365317e-07, "loss": 2.6354, "step": 312940 }, { "epoch": 13.479777749063187, "learning_rate": 2.4183886746105286e-07, "loss": 2.6125, "step": 312960 }, { "epoch": 13.480639186802774, "learning_rate": 2.417903854855739e-07, "loss": 2.5651, "step": 312980 }, { "epoch": 13.481500624542361, "learning_rate": 2.41741903510095e-07, "loss": 2.546, "step": 313000 }, { "epoch": 13.482362062281949, "learning_rate": 2.416934215346161e-07, "loss": 2.6106, "step": 313020 }, { "epoch": 13.483223500021536, "learning_rate": 2.4164493955913726e-07, "loss": 2.6122, "step": 313040 }, { "epoch": 13.484084937761123, "learning_rate": 2.4159645758365833e-07, "loss": 2.5376, "step": 313060 }, { "epoch": 13.48494637550071, "learning_rate": 2.415479756081795e-07, "loss": 2.708, "step": 313080 }, { "epoch": 13.485807813240298, "learning_rate": 2.414994936327005e-07, "loss": 2.5406, "step": 313100 }, { "epoch": 13.486669250979885, "learning_rate": 2.414510116572217e-07, "loss": 2.5614, "step": 313120 }, { "epoch": 13.487530688719472, "learning_rate": 2.414025296817427e-07, "loss": 2.4992, "step": 313140 }, { "epoch": 13.48839212645906, "learning_rate": 2.413540477062639e-07, "loss": 2.664, "step": 313160 }, { "epoch": 13.489253564198648, "learning_rate": 2.4130556573078497e-07, "loss": 2.6009, "step": 313180 }, { "epoch": 13.490115001938236, "learning_rate": 2.4125708375530604e-07, "loss": 2.6323, "step": 313200 }, { "epoch": 13.490976439677823, "learning_rate": 2.412086017798272e-07, "loss": 2.427, "step": 313220 }, { "epoch": 13.49183787741741, "learning_rate": 2.4116011980434824e-07, "loss": 2.6683, "step": 313240 }, { "epoch": 13.492699315156997, "learning_rate": 2.4111163782886936e-07, "loss": 2.666, "step": 313260 }, { "epoch": 13.493560752896585, "learning_rate": 2.4106315585339043e-07, "loss": 2.5848, "step": 313280 }, { "epoch": 13.494422190636172, "learning_rate": 2.410146738779116e-07, "loss": 2.585, "step": 313300 }, { "epoch": 13.49528362837576, "learning_rate": 2.409661919024327e-07, "loss": 2.6915, "step": 313320 }, { "epoch": 13.496145066115346, "learning_rate": 2.4091770992695386e-07, "loss": 2.6742, "step": 313340 }, { "epoch": 13.497006503854934, "learning_rate": 2.408692279514749e-07, "loss": 2.6662, "step": 313360 }, { "epoch": 13.49786794159452, "learning_rate": 2.4082074597599605e-07, "loss": 2.6417, "step": 313380 }, { "epoch": 13.498729379334108, "learning_rate": 2.4077226400051707e-07, "loss": 2.7282, "step": 313400 }, { "epoch": 13.499590817073695, "learning_rate": 2.4072378202503825e-07, "loss": 2.4319, "step": 313420 }, { "epoch": 13.500452254813283, "learning_rate": 2.406753000495593e-07, "loss": 2.7033, "step": 313440 }, { "epoch": 13.50131369255287, "learning_rate": 2.406268180740805e-07, "loss": 2.5156, "step": 313460 }, { "epoch": 13.502175130292459, "learning_rate": 2.4057833609860157e-07, "loss": 2.6789, "step": 313480 }, { "epoch": 13.503036568032046, "learning_rate": 2.405298541231227e-07, "loss": 2.6506, "step": 313500 }, { "epoch": 13.503898005771633, "learning_rate": 2.404813721476437e-07, "loss": 2.6087, "step": 313520 }, { "epoch": 13.50475944351122, "learning_rate": 2.404328901721649e-07, "loss": 2.491, "step": 313540 }, { "epoch": 13.505620881250808, "learning_rate": 2.4038440819668596e-07, "loss": 2.689, "step": 313560 }, { "epoch": 13.506482318990395, "learning_rate": 2.4033592622120714e-07, "loss": 2.6423, "step": 313580 }, { "epoch": 13.507343756729982, "learning_rate": 2.402874442457282e-07, "loss": 2.6292, "step": 313600 }, { "epoch": 13.50820519446957, "learning_rate": 2.4023896227024934e-07, "loss": 2.734, "step": 313620 }, { "epoch": 13.509066632209157, "learning_rate": 2.4019048029477035e-07, "loss": 2.6066, "step": 313640 }, { "epoch": 13.509928069948744, "learning_rate": 2.4014199831929153e-07, "loss": 2.5064, "step": 313660 }, { "epoch": 13.510789507688331, "learning_rate": 2.400935163438126e-07, "loss": 2.6212, "step": 313680 }, { "epoch": 13.511650945427919, "learning_rate": 2.400450343683338e-07, "loss": 2.4765, "step": 313700 }, { "epoch": 13.512512383167506, "learning_rate": 2.3999655239285485e-07, "loss": 2.6867, "step": 313720 }, { "epoch": 13.513373820907095, "learning_rate": 2.3994807041737587e-07, "loss": 2.5274, "step": 313740 }, { "epoch": 13.514235258646682, "learning_rate": 2.3989958844189705e-07, "loss": 2.5631, "step": 313760 }, { "epoch": 13.51509669638627, "learning_rate": 2.3985110646641807e-07, "loss": 2.7278, "step": 313780 }, { "epoch": 13.515958134125857, "learning_rate": 2.3980262449093924e-07, "loss": 2.5514, "step": 313800 }, { "epoch": 13.516819571865444, "learning_rate": 2.397541425154603e-07, "loss": 2.6512, "step": 313820 }, { "epoch": 13.517681009605031, "learning_rate": 2.397056605399815e-07, "loss": 2.7746, "step": 313840 }, { "epoch": 13.518542447344618, "learning_rate": 2.3965717856450256e-07, "loss": 2.4491, "step": 313860 }, { "epoch": 13.519403885084206, "learning_rate": 2.396086965890237e-07, "loss": 2.6522, "step": 313880 }, { "epoch": 13.520265322823793, "learning_rate": 2.395602146135447e-07, "loss": 2.6163, "step": 313900 }, { "epoch": 13.52112676056338, "learning_rate": 2.395117326380659e-07, "loss": 2.5492, "step": 313920 }, { "epoch": 13.521988198302967, "learning_rate": 2.3946325066258696e-07, "loss": 2.6224, "step": 313940 }, { "epoch": 13.522849636042555, "learning_rate": 2.3941476868710813e-07, "loss": 2.674, "step": 313960 }, { "epoch": 13.523711073782142, "learning_rate": 2.393662867116292e-07, "loss": 2.5871, "step": 313980 }, { "epoch": 13.524572511521729, "learning_rate": 2.3931780473615033e-07, "loss": 2.5395, "step": 314000 }, { "epoch": 13.525433949261316, "learning_rate": 2.392693227606714e-07, "loss": 2.5009, "step": 314020 }, { "epoch": 13.526295387000905, "learning_rate": 2.392208407851925e-07, "loss": 2.7574, "step": 314040 }, { "epoch": 13.527156824740493, "learning_rate": 2.391723588097136e-07, "loss": 2.7873, "step": 314060 }, { "epoch": 13.52801826248008, "learning_rate": 2.391238768342348e-07, "loss": 2.6678, "step": 314080 }, { "epoch": 13.528879700219667, "learning_rate": 2.3907539485875585e-07, "loss": 2.5549, "step": 314100 }, { "epoch": 13.529741137959254, "learning_rate": 2.3902691288327697e-07, "loss": 2.5577, "step": 314120 }, { "epoch": 13.530602575698842, "learning_rate": 2.3897843090779804e-07, "loss": 2.6593, "step": 314140 }, { "epoch": 13.531464013438429, "learning_rate": 2.3892994893231917e-07, "loss": 2.6085, "step": 314160 }, { "epoch": 13.532325451178016, "learning_rate": 2.3888146695684024e-07, "loss": 2.6282, "step": 314180 }, { "epoch": 13.533186888917603, "learning_rate": 2.388329849813614e-07, "loss": 2.6215, "step": 314200 }, { "epoch": 13.53404832665719, "learning_rate": 2.387845030058825e-07, "loss": 2.7215, "step": 314220 }, { "epoch": 13.534909764396778, "learning_rate": 2.387360210304036e-07, "loss": 2.6837, "step": 314240 }, { "epoch": 13.535771202136365, "learning_rate": 2.386875390549247e-07, "loss": 2.3899, "step": 314260 }, { "epoch": 13.536632639875952, "learning_rate": 2.386390570794457e-07, "loss": 2.6089, "step": 314280 }, { "epoch": 13.537494077615541, "learning_rate": 2.385905751039669e-07, "loss": 2.5468, "step": 314300 }, { "epoch": 13.538355515355128, "learning_rate": 2.3854209312848795e-07, "loss": 2.4438, "step": 314320 }, { "epoch": 13.539216953094716, "learning_rate": 2.3849361115300913e-07, "loss": 2.6862, "step": 314340 }, { "epoch": 13.540078390834303, "learning_rate": 2.384451291775302e-07, "loss": 2.5344, "step": 314360 }, { "epoch": 13.54093982857389, "learning_rate": 2.3839664720205132e-07, "loss": 2.5829, "step": 314380 }, { "epoch": 13.541801266313477, "learning_rate": 2.383481652265724e-07, "loss": 2.7122, "step": 314400 }, { "epoch": 13.542662704053065, "learning_rate": 2.3829968325109354e-07, "loss": 2.5506, "step": 314420 }, { "epoch": 13.543524141792652, "learning_rate": 2.382512012756146e-07, "loss": 2.5889, "step": 314440 }, { "epoch": 13.54438557953224, "learning_rate": 2.3820271930013577e-07, "loss": 2.5591, "step": 314460 }, { "epoch": 13.545247017271826, "learning_rate": 2.381542373246568e-07, "loss": 2.5539, "step": 314480 }, { "epoch": 13.546108455011414, "learning_rate": 2.3810575534917796e-07, "loss": 2.6354, "step": 314500 }, { "epoch": 13.546969892751001, "learning_rate": 2.3805727337369903e-07, "loss": 2.6145, "step": 314520 }, { "epoch": 13.547831330490588, "learning_rate": 2.3800879139822019e-07, "loss": 2.6656, "step": 314540 }, { "epoch": 13.548692768230175, "learning_rate": 2.3796030942274126e-07, "loss": 2.6923, "step": 314560 }, { "epoch": 13.549554205969763, "learning_rate": 2.379118274472624e-07, "loss": 2.5696, "step": 314580 }, { "epoch": 13.550415643709352, "learning_rate": 2.3786334547178348e-07, "loss": 2.5214, "step": 314600 }, { "epoch": 13.551277081448939, "learning_rate": 2.3781486349630463e-07, "loss": 2.6726, "step": 314620 }, { "epoch": 13.552138519188526, "learning_rate": 2.3776638152082568e-07, "loss": 2.7103, "step": 314640 }, { "epoch": 13.552999956928113, "learning_rate": 2.3771789954534683e-07, "loss": 2.5352, "step": 314660 }, { "epoch": 13.5538613946677, "learning_rate": 2.376694175698679e-07, "loss": 2.4609, "step": 314680 }, { "epoch": 13.554722832407288, "learning_rate": 2.3762093559438905e-07, "loss": 2.7057, "step": 314700 }, { "epoch": 13.555584270146875, "learning_rate": 2.3757245361891012e-07, "loss": 2.7537, "step": 314720 }, { "epoch": 13.556445707886462, "learning_rate": 2.3752397164343127e-07, "loss": 2.5772, "step": 314740 }, { "epoch": 13.55730714562605, "learning_rate": 2.3747548966795232e-07, "loss": 2.8235, "step": 314760 }, { "epoch": 13.558168583365637, "learning_rate": 2.3742700769247347e-07, "loss": 2.6092, "step": 314780 }, { "epoch": 13.559030021105224, "learning_rate": 2.3737852571699454e-07, "loss": 2.6274, "step": 314800 }, { "epoch": 13.559891458844811, "learning_rate": 2.3733004374151558e-07, "loss": 2.6005, "step": 314820 }, { "epoch": 13.560752896584399, "learning_rate": 2.3728156176603676e-07, "loss": 2.5692, "step": 314840 }, { "epoch": 13.561614334323988, "learning_rate": 2.372330797905578e-07, "loss": 2.558, "step": 314860 }, { "epoch": 13.562475772063575, "learning_rate": 2.3718459781507896e-07, "loss": 2.6597, "step": 314880 }, { "epoch": 13.563337209803162, "learning_rate": 2.3713611583960003e-07, "loss": 2.5809, "step": 314900 }, { "epoch": 13.56419864754275, "learning_rate": 2.3708763386412118e-07, "loss": 2.5523, "step": 314920 }, { "epoch": 13.565060085282337, "learning_rate": 2.3703915188864225e-07, "loss": 2.6379, "step": 314940 }, { "epoch": 13.565921523021924, "learning_rate": 2.369906699131634e-07, "loss": 2.708, "step": 314960 }, { "epoch": 13.566782960761511, "learning_rate": 2.3694218793768445e-07, "loss": 2.4451, "step": 314980 }, { "epoch": 13.567644398501098, "learning_rate": 2.3689370596220562e-07, "loss": 2.6896, "step": 315000 }, { "epoch": 13.568505836240686, "learning_rate": 2.3684522398672664e-07, "loss": 2.642, "step": 315020 }, { "epoch": 13.569367273980273, "learning_rate": 2.3679674201124782e-07, "loss": 2.6705, "step": 315040 }, { "epoch": 13.57022871171986, "learning_rate": 2.367482600357689e-07, "loss": 2.6188, "step": 315060 }, { "epoch": 13.571090149459447, "learning_rate": 2.3669977806029004e-07, "loss": 2.5622, "step": 315080 }, { "epoch": 13.571951587199035, "learning_rate": 2.3665129608481111e-07, "loss": 2.6521, "step": 315100 }, { "epoch": 13.572813024938622, "learning_rate": 2.3660281410933226e-07, "loss": 2.6237, "step": 315120 }, { "epoch": 13.573674462678209, "learning_rate": 2.365543321338533e-07, "loss": 2.7317, "step": 315140 }, { "epoch": 13.574535900417798, "learning_rate": 2.365058501583745e-07, "loss": 2.5398, "step": 315160 }, { "epoch": 13.575397338157385, "learning_rate": 2.3645736818289553e-07, "loss": 2.734, "step": 315180 }, { "epoch": 13.576258775896973, "learning_rate": 2.364088862074167e-07, "loss": 2.4298, "step": 315200 }, { "epoch": 13.57712021363656, "learning_rate": 2.3636040423193775e-07, "loss": 2.7553, "step": 315220 }, { "epoch": 13.577981651376147, "learning_rate": 2.363119222564589e-07, "loss": 2.5988, "step": 315240 }, { "epoch": 13.578843089115734, "learning_rate": 2.3626344028097998e-07, "loss": 2.7544, "step": 315260 }, { "epoch": 13.579704526855322, "learning_rate": 2.3621495830550113e-07, "loss": 2.7165, "step": 315280 }, { "epoch": 13.580565964594909, "learning_rate": 2.3616647633002217e-07, "loss": 2.789, "step": 315300 }, { "epoch": 13.581427402334496, "learning_rate": 2.3611799435454335e-07, "loss": 2.6278, "step": 315320 }, { "epoch": 13.582288840074083, "learning_rate": 2.360695123790644e-07, "loss": 2.739, "step": 315340 }, { "epoch": 13.58315027781367, "learning_rate": 2.3602103040358544e-07, "loss": 2.5887, "step": 315360 }, { "epoch": 13.584011715553258, "learning_rate": 2.3597254842810662e-07, "loss": 2.5674, "step": 315380 }, { "epoch": 13.584873153292845, "learning_rate": 2.3592406645262766e-07, "loss": 2.4476, "step": 315400 }, { "epoch": 13.585734591032432, "learning_rate": 2.3587558447714881e-07, "loss": 2.6411, "step": 315420 }, { "epoch": 13.586596028772021, "learning_rate": 2.3582710250166989e-07, "loss": 2.6268, "step": 315440 }, { "epoch": 13.587457466511609, "learning_rate": 2.3577862052619104e-07, "loss": 2.5505, "step": 315460 }, { "epoch": 13.588318904251196, "learning_rate": 2.357301385507121e-07, "loss": 2.5561, "step": 315480 }, { "epoch": 13.589180341990783, "learning_rate": 2.3568165657523326e-07, "loss": 2.5035, "step": 315500 }, { "epoch": 13.59004177973037, "learning_rate": 2.356331745997543e-07, "loss": 2.6064, "step": 315520 }, { "epoch": 13.590903217469958, "learning_rate": 2.355846926242755e-07, "loss": 2.5732, "step": 315540 }, { "epoch": 13.591764655209545, "learning_rate": 2.3553621064879653e-07, "loss": 2.5613, "step": 315560 }, { "epoch": 13.592626092949132, "learning_rate": 2.3548772867331768e-07, "loss": 2.5786, "step": 315580 }, { "epoch": 13.59348753068872, "learning_rate": 2.3543924669783875e-07, "loss": 2.4646, "step": 315600 }, { "epoch": 13.594348968428307, "learning_rate": 2.353907647223599e-07, "loss": 2.8069, "step": 315620 }, { "epoch": 13.595210406167894, "learning_rate": 2.3534228274688097e-07, "loss": 2.6569, "step": 315640 }, { "epoch": 13.596071843907481, "learning_rate": 2.3529380077140212e-07, "loss": 2.417, "step": 315660 }, { "epoch": 13.596933281647068, "learning_rate": 2.3524531879592317e-07, "loss": 2.6935, "step": 315680 }, { "epoch": 13.597794719386656, "learning_rate": 2.3519683682044434e-07, "loss": 2.5445, "step": 315700 }, { "epoch": 13.598656157126243, "learning_rate": 2.351483548449654e-07, "loss": 2.7263, "step": 315720 }, { "epoch": 13.599517594865832, "learning_rate": 2.3509987286948654e-07, "loss": 2.5559, "step": 315740 }, { "epoch": 13.600379032605419, "learning_rate": 2.350513908940076e-07, "loss": 2.5486, "step": 315760 }, { "epoch": 13.601240470345006, "learning_rate": 2.3500290891852876e-07, "loss": 2.5204, "step": 315780 }, { "epoch": 13.602101908084594, "learning_rate": 2.3495442694304983e-07, "loss": 2.7117, "step": 315800 }, { "epoch": 13.60296334582418, "learning_rate": 2.3490594496757098e-07, "loss": 2.6245, "step": 315820 }, { "epoch": 13.603824783563768, "learning_rate": 2.3485746299209203e-07, "loss": 2.6393, "step": 315840 }, { "epoch": 13.604686221303355, "learning_rate": 2.348089810166132e-07, "loss": 2.7073, "step": 315860 }, { "epoch": 13.605547659042942, "learning_rate": 2.3476049904113425e-07, "loss": 2.4753, "step": 315880 }, { "epoch": 13.60640909678253, "learning_rate": 2.347120170656553e-07, "loss": 2.5205, "step": 315900 }, { "epoch": 13.607270534522117, "learning_rate": 2.3466353509017647e-07, "loss": 2.519, "step": 315920 }, { "epoch": 13.608131972261704, "learning_rate": 2.3461505311469752e-07, "loss": 2.801, "step": 315940 }, { "epoch": 13.608993410001291, "learning_rate": 2.3456657113921867e-07, "loss": 2.6765, "step": 315960 }, { "epoch": 13.609854847740879, "learning_rate": 2.3451808916373974e-07, "loss": 2.711, "step": 315980 }, { "epoch": 13.610716285480468, "learning_rate": 2.344696071882609e-07, "loss": 2.5411, "step": 316000 }, { "epoch": 13.611577723220055, "learning_rate": 2.3442112521278196e-07, "loss": 2.5969, "step": 316020 }, { "epoch": 13.612439160959642, "learning_rate": 2.3437264323730312e-07, "loss": 2.6818, "step": 316040 }, { "epoch": 13.61330059869923, "learning_rate": 2.3432416126182416e-07, "loss": 2.6649, "step": 316060 }, { "epoch": 13.614162036438817, "learning_rate": 2.3427567928634534e-07, "loss": 2.4477, "step": 316080 }, { "epoch": 13.615023474178404, "learning_rate": 2.3422719731086638e-07, "loss": 2.6765, "step": 316100 }, { "epoch": 13.615884911917991, "learning_rate": 2.3417871533538753e-07, "loss": 2.5705, "step": 316120 }, { "epoch": 13.616746349657578, "learning_rate": 2.341302333599086e-07, "loss": 2.5967, "step": 316140 }, { "epoch": 13.617607787397166, "learning_rate": 2.3408175138442976e-07, "loss": 2.6771, "step": 316160 }, { "epoch": 13.618469225136753, "learning_rate": 2.3403326940895083e-07, "loss": 2.7426, "step": 316180 }, { "epoch": 13.61933066287634, "learning_rate": 2.3398478743347198e-07, "loss": 2.6629, "step": 316200 }, { "epoch": 13.620192100615927, "learning_rate": 2.3393630545799302e-07, "loss": 2.6219, "step": 316220 }, { "epoch": 13.621053538355515, "learning_rate": 2.338878234825142e-07, "loss": 2.6313, "step": 316240 }, { "epoch": 13.621914976095102, "learning_rate": 2.3383934150703525e-07, "loss": 2.6503, "step": 316260 }, { "epoch": 13.62277641383469, "learning_rate": 2.337908595315564e-07, "loss": 2.5146, "step": 316280 }, { "epoch": 13.623637851574278, "learning_rate": 2.3374237755607747e-07, "loss": 2.7663, "step": 316300 }, { "epoch": 13.624499289313865, "learning_rate": 2.3369389558059862e-07, "loss": 2.6365, "step": 316320 }, { "epoch": 13.625360727053453, "learning_rate": 2.336454136051197e-07, "loss": 2.4983, "step": 316340 }, { "epoch": 13.62622216479304, "learning_rate": 2.3359693162964084e-07, "loss": 2.6188, "step": 316360 }, { "epoch": 13.627083602532627, "learning_rate": 2.335484496541619e-07, "loss": 2.627, "step": 316380 }, { "epoch": 13.627945040272214, "learning_rate": 2.3349996767868306e-07, "loss": 2.6724, "step": 316400 }, { "epoch": 13.628806478011802, "learning_rate": 2.334514857032041e-07, "loss": 2.5563, "step": 316420 }, { "epoch": 13.629667915751389, "learning_rate": 2.3340300372772518e-07, "loss": 2.6905, "step": 316440 }, { "epoch": 13.630529353490976, "learning_rate": 2.3335452175224633e-07, "loss": 2.6448, "step": 316460 }, { "epoch": 13.631390791230563, "learning_rate": 2.3330603977676738e-07, "loss": 2.6148, "step": 316480 }, { "epoch": 13.63225222897015, "learning_rate": 2.3325755780128855e-07, "loss": 2.5684, "step": 316500 }, { "epoch": 13.633113666709738, "learning_rate": 2.332090758258096e-07, "loss": 2.8535, "step": 316520 }, { "epoch": 13.633975104449325, "learning_rate": 2.3316059385033075e-07, "loss": 2.6163, "step": 316540 }, { "epoch": 13.634836542188914, "learning_rate": 2.3311211187485182e-07, "loss": 2.5554, "step": 316560 }, { "epoch": 13.635697979928501, "learning_rate": 2.3306362989937297e-07, "loss": 2.6172, "step": 316580 }, { "epoch": 13.636559417668089, "learning_rate": 2.3301514792389402e-07, "loss": 2.4438, "step": 316600 }, { "epoch": 13.637420855407676, "learning_rate": 2.329666659484152e-07, "loss": 2.6679, "step": 316620 }, { "epoch": 13.638282293147263, "learning_rate": 2.3291818397293624e-07, "loss": 2.6042, "step": 316640 }, { "epoch": 13.63914373088685, "learning_rate": 2.328697019974574e-07, "loss": 2.5169, "step": 316660 }, { "epoch": 13.640005168626438, "learning_rate": 2.3282122002197846e-07, "loss": 2.5609, "step": 316680 }, { "epoch": 13.640866606366025, "learning_rate": 2.327727380464996e-07, "loss": 2.6411, "step": 316700 }, { "epoch": 13.641728044105612, "learning_rate": 2.3272425607102068e-07, "loss": 2.5535, "step": 316720 }, { "epoch": 13.6425894818452, "learning_rate": 2.3267577409554183e-07, "loss": 2.6541, "step": 316740 }, { "epoch": 13.643450919584787, "learning_rate": 2.3262729212006288e-07, "loss": 2.6037, "step": 316760 }, { "epoch": 13.644312357324374, "learning_rate": 2.3257881014458406e-07, "loss": 2.7162, "step": 316780 }, { "epoch": 13.645173795063961, "learning_rate": 2.3253032816910508e-07, "loss": 2.5152, "step": 316800 }, { "epoch": 13.646035232803548, "learning_rate": 2.3248184619362625e-07, "loss": 2.6035, "step": 316820 }, { "epoch": 13.646896670543136, "learning_rate": 2.3243336421814732e-07, "loss": 2.705, "step": 316840 }, { "epoch": 13.647758108282725, "learning_rate": 2.3238488224266848e-07, "loss": 2.6841, "step": 316860 }, { "epoch": 13.648619546022312, "learning_rate": 2.3233640026718955e-07, "loss": 2.6903, "step": 316880 }, { "epoch": 13.649480983761899, "learning_rate": 2.322879182917107e-07, "loss": 2.5961, "step": 316900 }, { "epoch": 13.650342421501486, "learning_rate": 2.3223943631623174e-07, "loss": 2.5962, "step": 316920 }, { "epoch": 13.651203859241074, "learning_rate": 2.3219095434075292e-07, "loss": 2.6654, "step": 316940 }, { "epoch": 13.65206529698066, "learning_rate": 2.3214247236527397e-07, "loss": 2.4737, "step": 316960 }, { "epoch": 13.652926734720248, "learning_rate": 2.3209399038979504e-07, "loss": 2.6016, "step": 316980 }, { "epoch": 13.653788172459835, "learning_rate": 2.320455084143162e-07, "loss": 2.534, "step": 317000 }, { "epoch": 13.654649610199423, "learning_rate": 2.3199702643883723e-07, "loss": 2.5667, "step": 317020 }, { "epoch": 13.65551104793901, "learning_rate": 2.319485444633584e-07, "loss": 2.6442, "step": 317040 }, { "epoch": 13.656372485678597, "learning_rate": 2.3190006248787946e-07, "loss": 2.4701, "step": 317060 }, { "epoch": 13.657233923418184, "learning_rate": 2.318515805124006e-07, "loss": 2.5644, "step": 317080 }, { "epoch": 13.658095361157772, "learning_rate": 2.3180309853692168e-07, "loss": 2.5641, "step": 317100 }, { "epoch": 13.65895679889736, "learning_rate": 2.3175461656144283e-07, "loss": 2.58, "step": 317120 }, { "epoch": 13.659818236636948, "learning_rate": 2.3170613458596387e-07, "loss": 2.3708, "step": 317140 }, { "epoch": 13.660679674376535, "learning_rate": 2.3165765261048505e-07, "loss": 2.5548, "step": 317160 }, { "epoch": 13.661541112116122, "learning_rate": 2.316091706350061e-07, "loss": 2.5728, "step": 317180 }, { "epoch": 13.66240254985571, "learning_rate": 2.3156068865952725e-07, "loss": 2.7305, "step": 317200 }, { "epoch": 13.663263987595297, "learning_rate": 2.3151220668404832e-07, "loss": 2.8196, "step": 317220 }, { "epoch": 13.664125425334884, "learning_rate": 2.3146372470856947e-07, "loss": 2.6515, "step": 317240 }, { "epoch": 13.664986863074471, "learning_rate": 2.3141524273309054e-07, "loss": 2.5495, "step": 317260 }, { "epoch": 13.665848300814059, "learning_rate": 2.313667607576117e-07, "loss": 2.6552, "step": 317280 }, { "epoch": 13.666709738553646, "learning_rate": 2.3131827878213274e-07, "loss": 2.6458, "step": 317300 }, { "epoch": 13.667571176293233, "learning_rate": 2.3126979680665394e-07, "loss": 2.74, "step": 317320 }, { "epoch": 13.66843261403282, "learning_rate": 2.3122131483117496e-07, "loss": 2.716, "step": 317340 }, { "epoch": 13.669294051772408, "learning_rate": 2.311728328556961e-07, "loss": 2.7629, "step": 317360 }, { "epoch": 13.670155489511995, "learning_rate": 2.3112435088021718e-07, "loss": 2.4614, "step": 317380 }, { "epoch": 13.671016927251582, "learning_rate": 2.3107586890473833e-07, "loss": 2.6294, "step": 317400 }, { "epoch": 13.671878364991171, "learning_rate": 2.310273869292594e-07, "loss": 2.6722, "step": 317420 }, { "epoch": 13.672739802730758, "learning_rate": 2.3097890495378055e-07, "loss": 2.6999, "step": 317440 }, { "epoch": 13.673601240470346, "learning_rate": 2.309304229783016e-07, "loss": 2.4168, "step": 317460 }, { "epoch": 13.674462678209933, "learning_rate": 2.3088194100282278e-07, "loss": 2.6829, "step": 317480 }, { "epoch": 13.67532411594952, "learning_rate": 2.3083345902734382e-07, "loss": 2.7317, "step": 317500 }, { "epoch": 13.676185553689107, "learning_rate": 2.307849770518649e-07, "loss": 2.3154, "step": 317520 }, { "epoch": 13.677046991428695, "learning_rate": 2.3073649507638604e-07, "loss": 2.6433, "step": 317540 }, { "epoch": 13.677908429168282, "learning_rate": 2.3068801310090712e-07, "loss": 2.4515, "step": 317560 }, { "epoch": 13.678769866907869, "learning_rate": 2.3063953112542827e-07, "loss": 2.6021, "step": 317580 }, { "epoch": 13.679631304647456, "learning_rate": 2.305910491499493e-07, "loss": 2.7584, "step": 317600 }, { "epoch": 13.680492742387043, "learning_rate": 2.3054256717447046e-07, "loss": 2.6365, "step": 317620 }, { "epoch": 13.68135418012663, "learning_rate": 2.3049408519899153e-07, "loss": 2.6376, "step": 317640 }, { "epoch": 13.682215617866218, "learning_rate": 2.3044560322351269e-07, "loss": 2.5223, "step": 317660 }, { "epoch": 13.683077055605807, "learning_rate": 2.3039712124803373e-07, "loss": 2.4847, "step": 317680 }, { "epoch": 13.683938493345394, "learning_rate": 2.303486392725549e-07, "loss": 2.5939, "step": 317700 }, { "epoch": 13.684799931084981, "learning_rate": 2.3030015729707595e-07, "loss": 2.6626, "step": 317720 }, { "epoch": 13.685661368824569, "learning_rate": 2.302516753215971e-07, "loss": 2.5535, "step": 317740 }, { "epoch": 13.686522806564156, "learning_rate": 2.3020319334611818e-07, "loss": 2.598, "step": 317760 }, { "epoch": 13.687384244303743, "learning_rate": 2.3015471137063933e-07, "loss": 2.5754, "step": 317780 }, { "epoch": 13.68824568204333, "learning_rate": 2.301062293951604e-07, "loss": 2.6186, "step": 317800 }, { "epoch": 13.689107119782918, "learning_rate": 2.3005774741968155e-07, "loss": 2.6983, "step": 317820 }, { "epoch": 13.689968557522505, "learning_rate": 2.300092654442026e-07, "loss": 2.6068, "step": 317840 }, { "epoch": 13.690829995262092, "learning_rate": 2.2996078346872377e-07, "loss": 2.5399, "step": 317860 }, { "epoch": 13.69169143300168, "learning_rate": 2.2991230149324482e-07, "loss": 2.649, "step": 317880 }, { "epoch": 13.692552870741267, "learning_rate": 2.2986381951776597e-07, "loss": 2.767, "step": 317900 }, { "epoch": 13.693414308480854, "learning_rate": 2.2981533754228704e-07, "loss": 2.8701, "step": 317920 }, { "epoch": 13.694275746220441, "learning_rate": 2.297668555668082e-07, "loss": 2.6435, "step": 317940 }, { "epoch": 13.695137183960028, "learning_rate": 2.2971837359132926e-07, "loss": 2.5623, "step": 317960 }, { "epoch": 13.695998621699617, "learning_rate": 2.296698916158504e-07, "loss": 2.6595, "step": 317980 }, { "epoch": 13.696860059439205, "learning_rate": 2.2962140964037146e-07, "loss": 2.5111, "step": 318000 }, { "epoch": 13.697721497178792, "learning_rate": 2.2957292766489263e-07, "loss": 2.5726, "step": 318020 }, { "epoch": 13.69858293491838, "learning_rate": 2.2952444568941368e-07, "loss": 2.653, "step": 318040 }, { "epoch": 13.699444372657966, "learning_rate": 2.2947596371393475e-07, "loss": 2.6211, "step": 318060 }, { "epoch": 13.700305810397554, "learning_rate": 2.294274817384559e-07, "loss": 2.74, "step": 318080 }, { "epoch": 13.701167248137141, "learning_rate": 2.2937899976297695e-07, "loss": 2.6846, "step": 318100 }, { "epoch": 13.702028685876728, "learning_rate": 2.2933051778749812e-07, "loss": 2.5525, "step": 318120 }, { "epoch": 13.702890123616315, "learning_rate": 2.2928203581201917e-07, "loss": 2.5077, "step": 318140 }, { "epoch": 13.703751561355903, "learning_rate": 2.2923355383654035e-07, "loss": 2.7061, "step": 318160 }, { "epoch": 13.70461299909549, "learning_rate": 2.291850718610614e-07, "loss": 2.5372, "step": 318180 }, { "epoch": 13.705474436835077, "learning_rate": 2.2913658988558254e-07, "loss": 2.5398, "step": 318200 }, { "epoch": 13.706335874574664, "learning_rate": 2.2908810791010361e-07, "loss": 2.7135, "step": 318220 }, { "epoch": 13.707197312314253, "learning_rate": 2.2903962593462476e-07, "loss": 2.76, "step": 318240 }, { "epoch": 13.70805875005384, "learning_rate": 2.289911439591458e-07, "loss": 2.6263, "step": 318260 }, { "epoch": 13.708920187793428, "learning_rate": 2.28942661983667e-07, "loss": 2.5812, "step": 318280 }, { "epoch": 13.709781625533015, "learning_rate": 2.2889418000818803e-07, "loss": 2.6673, "step": 318300 }, { "epoch": 13.710643063272602, "learning_rate": 2.2884569803270918e-07, "loss": 2.7558, "step": 318320 }, { "epoch": 13.71150450101219, "learning_rate": 2.2879721605723025e-07, "loss": 2.7093, "step": 318340 }, { "epoch": 13.712365938751777, "learning_rate": 2.287487340817514e-07, "loss": 2.6226, "step": 318360 }, { "epoch": 13.713227376491364, "learning_rate": 2.2870025210627245e-07, "loss": 2.691, "step": 318380 }, { "epoch": 13.714088814230951, "learning_rate": 2.2865177013079363e-07, "loss": 2.6256, "step": 318400 }, { "epoch": 13.714950251970539, "learning_rate": 2.2860328815531467e-07, "loss": 2.6553, "step": 318420 }, { "epoch": 13.715811689710126, "learning_rate": 2.2855480617983582e-07, "loss": 2.4524, "step": 318440 }, { "epoch": 13.716673127449713, "learning_rate": 2.285063242043569e-07, "loss": 2.7421, "step": 318460 }, { "epoch": 13.7175345651893, "learning_rate": 2.2845784222887805e-07, "loss": 2.7151, "step": 318480 }, { "epoch": 13.718396002928888, "learning_rate": 2.2840936025339912e-07, "loss": 2.6132, "step": 318500 }, { "epoch": 13.719257440668475, "learning_rate": 2.2836087827792027e-07, "loss": 2.6578, "step": 318520 }, { "epoch": 13.720118878408064, "learning_rate": 2.283123963024413e-07, "loss": 2.7232, "step": 318540 }, { "epoch": 13.720980316147651, "learning_rate": 2.282639143269625e-07, "loss": 2.5221, "step": 318560 }, { "epoch": 13.721841753887238, "learning_rate": 2.282154323514835e-07, "loss": 2.7649, "step": 318580 }, { "epoch": 13.722703191626826, "learning_rate": 2.281669503760046e-07, "loss": 2.5224, "step": 318600 }, { "epoch": 13.723564629366413, "learning_rate": 2.2811846840052576e-07, "loss": 2.5875, "step": 318620 }, { "epoch": 13.724426067106, "learning_rate": 2.280699864250468e-07, "loss": 2.6593, "step": 318640 }, { "epoch": 13.725287504845587, "learning_rate": 2.2802150444956798e-07, "loss": 2.5634, "step": 318660 }, { "epoch": 13.726148942585175, "learning_rate": 2.2797302247408903e-07, "loss": 2.5581, "step": 318680 }, { "epoch": 13.727010380324762, "learning_rate": 2.2792454049861018e-07, "loss": 2.5185, "step": 318700 }, { "epoch": 13.727871818064349, "learning_rate": 2.2787605852313125e-07, "loss": 2.5307, "step": 318720 }, { "epoch": 13.728733255803936, "learning_rate": 2.278275765476524e-07, "loss": 2.6528, "step": 318740 }, { "epoch": 13.729594693543524, "learning_rate": 2.2777909457217347e-07, "loss": 2.6903, "step": 318760 }, { "epoch": 13.73045613128311, "learning_rate": 2.2773061259669462e-07, "loss": 2.5633, "step": 318780 }, { "epoch": 13.731317569022698, "learning_rate": 2.2768213062121567e-07, "loss": 2.6005, "step": 318800 }, { "epoch": 13.732179006762287, "learning_rate": 2.2763364864573684e-07, "loss": 2.6099, "step": 318820 }, { "epoch": 13.733040444501874, "learning_rate": 2.275851666702579e-07, "loss": 2.6526, "step": 318840 }, { "epoch": 13.733901882241462, "learning_rate": 2.2753668469477904e-07, "loss": 2.675, "step": 318860 }, { "epoch": 13.734763319981049, "learning_rate": 2.274882027193001e-07, "loss": 2.5057, "step": 318880 }, { "epoch": 13.735624757720636, "learning_rate": 2.2743972074382126e-07, "loss": 2.5883, "step": 318900 }, { "epoch": 13.736486195460223, "learning_rate": 2.273912387683423e-07, "loss": 2.7093, "step": 318920 }, { "epoch": 13.73734763319981, "learning_rate": 2.2734275679286348e-07, "loss": 2.522, "step": 318940 }, { "epoch": 13.738209070939398, "learning_rate": 2.2729427481738453e-07, "loss": 2.6332, "step": 318960 }, { "epoch": 13.739070508678985, "learning_rate": 2.2724579284190568e-07, "loss": 2.6915, "step": 318980 }, { "epoch": 13.739931946418572, "learning_rate": 2.2719731086642675e-07, "loss": 2.7708, "step": 319000 }, { "epoch": 13.74079338415816, "learning_rate": 2.271488288909479e-07, "loss": 2.5651, "step": 319020 }, { "epoch": 13.741654821897747, "learning_rate": 2.2710034691546897e-07, "loss": 2.6959, "step": 319040 }, { "epoch": 13.742516259637334, "learning_rate": 2.2705186493999012e-07, "loss": 2.6764, "step": 319060 }, { "epoch": 13.743377697376921, "learning_rate": 2.2700338296451117e-07, "loss": 2.5679, "step": 319080 }, { "epoch": 13.744239135116509, "learning_rate": 2.2695490098903237e-07, "loss": 2.5923, "step": 319100 }, { "epoch": 13.745100572856098, "learning_rate": 2.269064190135534e-07, "loss": 2.6913, "step": 319120 }, { "epoch": 13.745962010595685, "learning_rate": 2.2685793703807446e-07, "loss": 2.4856, "step": 319140 }, { "epoch": 13.746823448335272, "learning_rate": 2.2680945506259561e-07, "loss": 2.6485, "step": 319160 }, { "epoch": 13.74768488607486, "learning_rate": 2.2676097308711666e-07, "loss": 2.6885, "step": 319180 }, { "epoch": 13.748546323814447, "learning_rate": 2.2671249111163784e-07, "loss": 2.6691, "step": 319200 }, { "epoch": 13.749407761554034, "learning_rate": 2.2666400913615888e-07, "loss": 2.6242, "step": 319220 }, { "epoch": 13.750269199293621, "learning_rate": 2.2661552716068003e-07, "loss": 2.6539, "step": 319240 }, { "epoch": 13.751130637033208, "learning_rate": 2.265670451852011e-07, "loss": 2.7113, "step": 319260 }, { "epoch": 13.751992074772796, "learning_rate": 2.2651856320972226e-07, "loss": 2.6786, "step": 319280 }, { "epoch": 13.752853512512383, "learning_rate": 2.2647008123424333e-07, "loss": 2.465, "step": 319300 }, { "epoch": 13.75371495025197, "learning_rate": 2.2642159925876448e-07, "loss": 2.626, "step": 319320 }, { "epoch": 13.754576387991557, "learning_rate": 2.2637311728328555e-07, "loss": 2.6505, "step": 319340 }, { "epoch": 13.755437825731144, "learning_rate": 2.263246353078067e-07, "loss": 2.6773, "step": 319360 }, { "epoch": 13.756299263470734, "learning_rate": 2.2627615333232775e-07, "loss": 2.7319, "step": 319380 }, { "epoch": 13.75716070121032, "learning_rate": 2.262276713568489e-07, "loss": 2.6238, "step": 319400 }, { "epoch": 13.758022138949908, "learning_rate": 2.2617918938136997e-07, "loss": 2.688, "step": 319420 }, { "epoch": 13.758883576689495, "learning_rate": 2.2613070740589112e-07, "loss": 2.5783, "step": 319440 }, { "epoch": 13.759745014429082, "learning_rate": 2.2608222543041216e-07, "loss": 2.7362, "step": 319460 }, { "epoch": 13.76060645216867, "learning_rate": 2.2603374345493334e-07, "loss": 2.5784, "step": 319480 }, { "epoch": 13.761467889908257, "learning_rate": 2.2598526147945439e-07, "loss": 2.6707, "step": 319500 }, { "epoch": 13.762329327647844, "learning_rate": 2.2593677950397554e-07, "loss": 2.5952, "step": 319520 }, { "epoch": 13.763190765387431, "learning_rate": 2.258882975284966e-07, "loss": 2.6801, "step": 319540 }, { "epoch": 13.764052203127019, "learning_rate": 2.2583981555301776e-07, "loss": 2.5862, "step": 319560 }, { "epoch": 13.764913640866606, "learning_rate": 2.2579133357753883e-07, "loss": 2.6318, "step": 319580 }, { "epoch": 13.765775078606193, "learning_rate": 2.2574285160205998e-07, "loss": 2.743, "step": 319600 }, { "epoch": 13.76663651634578, "learning_rate": 2.2569436962658103e-07, "loss": 2.5896, "step": 319620 }, { "epoch": 13.767497954085368, "learning_rate": 2.256458876511022e-07, "loss": 2.7296, "step": 319640 }, { "epoch": 13.768359391824955, "learning_rate": 2.2559740567562325e-07, "loss": 2.6488, "step": 319660 }, { "epoch": 13.769220829564544, "learning_rate": 2.2554892370014432e-07, "loss": 2.6161, "step": 319680 }, { "epoch": 13.770082267304131, "learning_rate": 2.2550044172466547e-07, "loss": 2.6331, "step": 319700 }, { "epoch": 13.770943705043718, "learning_rate": 2.2545195974918652e-07, "loss": 2.5998, "step": 319720 }, { "epoch": 13.771805142783306, "learning_rate": 2.254034777737077e-07, "loss": 2.6264, "step": 319740 }, { "epoch": 13.772666580522893, "learning_rate": 2.253549957982287e-07, "loss": 2.595, "step": 319760 }, { "epoch": 13.77352801826248, "learning_rate": 2.253065138227499e-07, "loss": 2.5949, "step": 319780 }, { "epoch": 13.774389456002067, "learning_rate": 2.2525803184727096e-07, "loss": 2.5239, "step": 319800 }, { "epoch": 13.775250893741655, "learning_rate": 2.252095498717921e-07, "loss": 2.5965, "step": 319820 }, { "epoch": 13.776112331481242, "learning_rate": 2.2516106789631318e-07, "loss": 2.6621, "step": 319840 }, { "epoch": 13.77697376922083, "learning_rate": 2.2511258592083433e-07, "loss": 2.6317, "step": 319860 }, { "epoch": 13.777835206960416, "learning_rate": 2.2506410394535538e-07, "loss": 2.5931, "step": 319880 }, { "epoch": 13.778696644700004, "learning_rate": 2.2501562196987656e-07, "loss": 2.5865, "step": 319900 }, { "epoch": 13.779558082439591, "learning_rate": 2.249671399943976e-07, "loss": 2.6264, "step": 319920 }, { "epoch": 13.78041952017918, "learning_rate": 2.2491865801891878e-07, "loss": 2.7847, "step": 319940 }, { "epoch": 13.781280957918767, "learning_rate": 2.2487017604343982e-07, "loss": 2.6362, "step": 319960 }, { "epoch": 13.782142395658354, "learning_rate": 2.2482169406796098e-07, "loss": 2.6725, "step": 319980 }, { "epoch": 13.783003833397942, "learning_rate": 2.2477321209248205e-07, "loss": 2.6421, "step": 320000 }, { "epoch": 13.783865271137529, "learning_rate": 2.247247301170032e-07, "loss": 2.5763, "step": 320020 }, { "epoch": 13.784726708877116, "learning_rate": 2.2467624814152424e-07, "loss": 2.6003, "step": 320040 }, { "epoch": 13.785588146616703, "learning_rate": 2.2462776616604542e-07, "loss": 2.5253, "step": 320060 }, { "epoch": 13.78644958435629, "learning_rate": 2.2457928419056647e-07, "loss": 2.5502, "step": 320080 }, { "epoch": 13.787311022095878, "learning_rate": 2.2453080221508762e-07, "loss": 2.5013, "step": 320100 }, { "epoch": 13.788172459835465, "learning_rate": 2.244823202396087e-07, "loss": 2.3231, "step": 320120 }, { "epoch": 13.789033897575052, "learning_rate": 2.2443383826412984e-07, "loss": 2.4768, "step": 320140 }, { "epoch": 13.78989533531464, "learning_rate": 2.2438535628865088e-07, "loss": 2.5472, "step": 320160 }, { "epoch": 13.790756773054227, "learning_rate": 2.2433687431317206e-07, "loss": 2.7108, "step": 320180 }, { "epoch": 13.791618210793814, "learning_rate": 2.242883923376931e-07, "loss": 2.6205, "step": 320200 }, { "epoch": 13.792479648533401, "learning_rate": 2.2423991036221418e-07, "loss": 2.6425, "step": 320220 }, { "epoch": 13.79334108627299, "learning_rate": 2.2419142838673533e-07, "loss": 2.5645, "step": 320240 }, { "epoch": 13.794202524012578, "learning_rate": 2.2414294641125637e-07, "loss": 2.4753, "step": 320260 }, { "epoch": 13.795063961752165, "learning_rate": 2.2409446443577755e-07, "loss": 2.5966, "step": 320280 }, { "epoch": 13.795925399491752, "learning_rate": 2.240459824602986e-07, "loss": 2.5655, "step": 320300 }, { "epoch": 13.79678683723134, "learning_rate": 2.2399750048481975e-07, "loss": 2.5414, "step": 320320 }, { "epoch": 13.797648274970927, "learning_rate": 2.2394901850934082e-07, "loss": 2.4954, "step": 320340 }, { "epoch": 13.798509712710514, "learning_rate": 2.2390053653386194e-07, "loss": 2.7249, "step": 320360 }, { "epoch": 13.799371150450101, "learning_rate": 2.2385205455838304e-07, "loss": 2.586, "step": 320380 }, { "epoch": 13.800232588189688, "learning_rate": 2.238035725829042e-07, "loss": 2.5719, "step": 320400 }, { "epoch": 13.801094025929276, "learning_rate": 2.2375509060742524e-07, "loss": 2.6848, "step": 320420 }, { "epoch": 13.801955463668863, "learning_rate": 2.2370660863194641e-07, "loss": 2.6738, "step": 320440 }, { "epoch": 13.80281690140845, "learning_rate": 2.2365812665646746e-07, "loss": 2.5619, "step": 320460 }, { "epoch": 13.803678339148037, "learning_rate": 2.236096446809886e-07, "loss": 2.5595, "step": 320480 }, { "epoch": 13.804539776887626, "learning_rate": 2.2356116270550968e-07, "loss": 2.6241, "step": 320500 }, { "epoch": 13.805401214627214, "learning_rate": 2.2351268073003083e-07, "loss": 2.6352, "step": 320520 }, { "epoch": 13.8062626523668, "learning_rate": 2.234641987545519e-07, "loss": 2.7458, "step": 320540 }, { "epoch": 13.807124090106388, "learning_rate": 2.2341571677907305e-07, "loss": 2.6644, "step": 320560 }, { "epoch": 13.807985527845975, "learning_rate": 2.233672348035941e-07, "loss": 2.591, "step": 320580 }, { "epoch": 13.808846965585563, "learning_rate": 2.2331875282811528e-07, "loss": 2.5078, "step": 320600 }, { "epoch": 13.80970840332515, "learning_rate": 2.2327027085263632e-07, "loss": 2.6281, "step": 320620 }, { "epoch": 13.810569841064737, "learning_rate": 2.2322178887715747e-07, "loss": 2.6036, "step": 320640 }, { "epoch": 13.811431278804324, "learning_rate": 2.2317330690167854e-07, "loss": 2.5645, "step": 320660 }, { "epoch": 13.812292716543912, "learning_rate": 2.231248249261997e-07, "loss": 2.5442, "step": 320680 }, { "epoch": 13.813154154283499, "learning_rate": 2.2307634295072074e-07, "loss": 2.636, "step": 320700 }, { "epoch": 13.814015592023086, "learning_rate": 2.2302786097524192e-07, "loss": 2.5238, "step": 320720 }, { "epoch": 13.814877029762673, "learning_rate": 2.2297937899976296e-07, "loss": 2.6757, "step": 320740 }, { "epoch": 13.81573846750226, "learning_rate": 2.2293089702428403e-07, "loss": 2.6606, "step": 320760 }, { "epoch": 13.816599905241848, "learning_rate": 2.2288241504880518e-07, "loss": 2.5454, "step": 320780 }, { "epoch": 13.817461342981437, "learning_rate": 2.2283393307332623e-07, "loss": 2.4819, "step": 320800 }, { "epoch": 13.818322780721024, "learning_rate": 2.227854510978474e-07, "loss": 2.624, "step": 320820 }, { "epoch": 13.819184218460611, "learning_rate": 2.2273696912236845e-07, "loss": 2.5421, "step": 320840 }, { "epoch": 13.820045656200199, "learning_rate": 2.226884871468896e-07, "loss": 2.5411, "step": 320860 }, { "epoch": 13.820907093939786, "learning_rate": 2.2264000517141067e-07, "loss": 2.518, "step": 320880 }, { "epoch": 13.821768531679373, "learning_rate": 2.2259152319593183e-07, "loss": 2.7473, "step": 320900 }, { "epoch": 13.82262996941896, "learning_rate": 2.225430412204529e-07, "loss": 2.7069, "step": 320920 }, { "epoch": 13.823491407158548, "learning_rate": 2.2249455924497405e-07, "loss": 2.6028, "step": 320940 }, { "epoch": 13.824352844898135, "learning_rate": 2.224460772694951e-07, "loss": 2.6889, "step": 320960 }, { "epoch": 13.825214282637722, "learning_rate": 2.2239759529401627e-07, "loss": 2.551, "step": 320980 }, { "epoch": 13.82607572037731, "learning_rate": 2.2234911331853732e-07, "loss": 2.6268, "step": 321000 }, { "epoch": 13.826937158116896, "learning_rate": 2.2230063134305847e-07, "loss": 2.6163, "step": 321020 }, { "epoch": 13.827798595856484, "learning_rate": 2.2225214936757954e-07, "loss": 2.5761, "step": 321040 }, { "epoch": 13.828660033596073, "learning_rate": 2.222036673921007e-07, "loss": 2.8215, "step": 321060 }, { "epoch": 13.82952147133566, "learning_rate": 2.2215518541662176e-07, "loss": 2.646, "step": 321080 }, { "epoch": 13.830382909075247, "learning_rate": 2.221067034411429e-07, "loss": 2.7141, "step": 321100 }, { "epoch": 13.831244346814835, "learning_rate": 2.2205822146566398e-07, "loss": 2.684, "step": 321120 }, { "epoch": 13.832105784554422, "learning_rate": 2.2200973949018513e-07, "loss": 2.7284, "step": 321140 }, { "epoch": 13.832967222294009, "learning_rate": 2.2196125751470618e-07, "loss": 2.5299, "step": 321160 }, { "epoch": 13.833828660033596, "learning_rate": 2.2191277553922733e-07, "loss": 2.5704, "step": 321180 }, { "epoch": 13.834690097773183, "learning_rate": 2.218642935637484e-07, "loss": 2.5667, "step": 321200 }, { "epoch": 13.83555153551277, "learning_rate": 2.2181581158826955e-07, "loss": 2.4441, "step": 321220 }, { "epoch": 13.836412973252358, "learning_rate": 2.2176732961279062e-07, "loss": 2.542, "step": 321240 }, { "epoch": 13.837274410991945, "learning_rate": 2.2171884763731177e-07, "loss": 2.6281, "step": 321260 }, { "epoch": 13.838135848731532, "learning_rate": 2.2167036566183282e-07, "loss": 2.5904, "step": 321280 }, { "epoch": 13.83899728647112, "learning_rate": 2.216218836863539e-07, "loss": 2.7963, "step": 321300 }, { "epoch": 13.839858724210707, "learning_rate": 2.2157340171087504e-07, "loss": 2.5814, "step": 321320 }, { "epoch": 13.840720161950294, "learning_rate": 2.2152491973539609e-07, "loss": 2.6498, "step": 321340 }, { "epoch": 13.841581599689883, "learning_rate": 2.2147643775991726e-07, "loss": 2.5499, "step": 321360 }, { "epoch": 13.84244303742947, "learning_rate": 2.214279557844383e-07, "loss": 2.5968, "step": 321380 }, { "epoch": 13.843304475169058, "learning_rate": 2.2137947380895946e-07, "loss": 2.6682, "step": 321400 }, { "epoch": 13.844165912908645, "learning_rate": 2.2133099183348053e-07, "loss": 2.6776, "step": 321420 }, { "epoch": 13.845027350648232, "learning_rate": 2.2128250985800168e-07, "loss": 2.5416, "step": 321440 }, { "epoch": 13.84588878838782, "learning_rate": 2.2123402788252275e-07, "loss": 2.5542, "step": 321460 }, { "epoch": 13.846750226127407, "learning_rate": 2.211855459070439e-07, "loss": 2.4092, "step": 321480 }, { "epoch": 13.847611663866994, "learning_rate": 2.2113706393156495e-07, "loss": 2.5509, "step": 321500 }, { "epoch": 13.848473101606581, "learning_rate": 2.2108858195608613e-07, "loss": 2.6794, "step": 321520 }, { "epoch": 13.849334539346168, "learning_rate": 2.2104009998060715e-07, "loss": 2.7076, "step": 321540 }, { "epoch": 13.850195977085756, "learning_rate": 2.2099161800512832e-07, "loss": 2.6216, "step": 321560 }, { "epoch": 13.851057414825343, "learning_rate": 2.209431360296494e-07, "loss": 2.5382, "step": 321580 }, { "epoch": 13.85191885256493, "learning_rate": 2.2089465405417055e-07, "loss": 2.6434, "step": 321600 }, { "epoch": 13.85278029030452, "learning_rate": 2.2084617207869162e-07, "loss": 2.6113, "step": 321620 }, { "epoch": 13.853641728044106, "learning_rate": 2.2079769010321277e-07, "loss": 2.6422, "step": 321640 }, { "epoch": 13.854503165783694, "learning_rate": 2.207492081277338e-07, "loss": 2.697, "step": 321660 }, { "epoch": 13.855364603523281, "learning_rate": 2.20700726152255e-07, "loss": 2.5194, "step": 321680 }, { "epoch": 13.856226041262868, "learning_rate": 2.2065224417677604e-07, "loss": 2.6975, "step": 321700 }, { "epoch": 13.857087479002455, "learning_rate": 2.206037622012972e-07, "loss": 2.681, "step": 321720 }, { "epoch": 13.857948916742043, "learning_rate": 2.2055528022581826e-07, "loss": 2.5351, "step": 321740 }, { "epoch": 13.85881035448163, "learning_rate": 2.205067982503394e-07, "loss": 2.6119, "step": 321760 }, { "epoch": 13.859671792221217, "learning_rate": 2.2045831627486048e-07, "loss": 2.7103, "step": 321780 }, { "epoch": 13.860533229960804, "learning_rate": 2.2040983429938163e-07, "loss": 2.4373, "step": 321800 }, { "epoch": 13.861394667700392, "learning_rate": 2.2036135232390268e-07, "loss": 2.6565, "step": 321820 }, { "epoch": 13.862256105439979, "learning_rate": 2.2031287034842375e-07, "loss": 2.5698, "step": 321840 }, { "epoch": 13.863117543179566, "learning_rate": 2.202643883729449e-07, "loss": 2.6076, "step": 321860 }, { "epoch": 13.863978980919153, "learning_rate": 2.2021590639746594e-07, "loss": 2.497, "step": 321880 }, { "epoch": 13.86484041865874, "learning_rate": 2.2016742442198712e-07, "loss": 2.578, "step": 321900 }, { "epoch": 13.86570185639833, "learning_rate": 2.2011894244650817e-07, "loss": 2.69, "step": 321920 }, { "epoch": 13.866563294137917, "learning_rate": 2.2007046047102932e-07, "loss": 2.683, "step": 321940 }, { "epoch": 13.867424731877504, "learning_rate": 2.200219784955504e-07, "loss": 2.5411, "step": 321960 }, { "epoch": 13.868286169617091, "learning_rate": 2.1997349652007154e-07, "loss": 2.6286, "step": 321980 }, { "epoch": 13.869147607356679, "learning_rate": 2.199250145445926e-07, "loss": 2.6917, "step": 322000 }, { "epoch": 13.870009045096266, "learning_rate": 2.1987653256911376e-07, "loss": 2.8502, "step": 322020 }, { "epoch": 13.870870482835853, "learning_rate": 2.198280505936348e-07, "loss": 2.7536, "step": 322040 }, { "epoch": 13.87173192057544, "learning_rate": 2.1977956861815598e-07, "loss": 2.568, "step": 322060 }, { "epoch": 13.872593358315028, "learning_rate": 2.1973108664267703e-07, "loss": 2.668, "step": 322080 }, { "epoch": 13.873454796054615, "learning_rate": 2.1968260466719818e-07, "loss": 2.4933, "step": 322100 }, { "epoch": 13.874316233794202, "learning_rate": 2.1963412269171925e-07, "loss": 2.6142, "step": 322120 }, { "epoch": 13.87517767153379, "learning_rate": 2.1958564071624038e-07, "loss": 2.5315, "step": 322140 }, { "epoch": 13.876039109273377, "learning_rate": 2.1953715874076147e-07, "loss": 2.6767, "step": 322160 }, { "epoch": 13.876900547012964, "learning_rate": 2.1948867676528262e-07, "loss": 2.6996, "step": 322180 }, { "epoch": 13.877761984752553, "learning_rate": 2.1944019478980367e-07, "loss": 2.6149, "step": 322200 }, { "epoch": 13.87862342249214, "learning_rate": 2.1939171281432485e-07, "loss": 2.4441, "step": 322220 }, { "epoch": 13.879484860231727, "learning_rate": 2.193432308388459e-07, "loss": 2.6143, "step": 322240 }, { "epoch": 13.880346297971315, "learning_rate": 2.1929474886336704e-07, "loss": 2.5947, "step": 322260 }, { "epoch": 13.881207735710902, "learning_rate": 2.1924626688788811e-07, "loss": 2.6258, "step": 322280 }, { "epoch": 13.882069173450489, "learning_rate": 2.1919778491240927e-07, "loss": 2.6899, "step": 322300 }, { "epoch": 13.882930611190076, "learning_rate": 2.191493029369303e-07, "loss": 2.5773, "step": 322320 }, { "epoch": 13.883792048929664, "learning_rate": 2.191008209614515e-07, "loss": 2.6042, "step": 322340 }, { "epoch": 13.88465348666925, "learning_rate": 2.1905233898597256e-07, "loss": 2.7045, "step": 322360 }, { "epoch": 13.885514924408838, "learning_rate": 2.190038570104936e-07, "loss": 2.8539, "step": 322380 }, { "epoch": 13.886376362148425, "learning_rate": 2.1895537503501476e-07, "loss": 2.4505, "step": 322400 }, { "epoch": 13.887237799888013, "learning_rate": 2.1890689305953583e-07, "loss": 2.8214, "step": 322420 }, { "epoch": 13.8880992376276, "learning_rate": 2.1885841108405695e-07, "loss": 2.6117, "step": 322440 }, { "epoch": 13.888960675367187, "learning_rate": 2.1880992910857802e-07, "loss": 2.5984, "step": 322460 }, { "epoch": 13.889822113106774, "learning_rate": 2.1876144713309917e-07, "loss": 2.6814, "step": 322480 }, { "epoch": 13.890683550846363, "learning_rate": 2.1871296515762024e-07, "loss": 2.6011, "step": 322500 }, { "epoch": 13.89154498858595, "learning_rate": 2.186644831821414e-07, "loss": 2.5855, "step": 322520 }, { "epoch": 13.892406426325538, "learning_rate": 2.1861600120666247e-07, "loss": 2.7869, "step": 322540 }, { "epoch": 13.893267864065125, "learning_rate": 2.1856751923118362e-07, "loss": 2.5851, "step": 322560 }, { "epoch": 13.894129301804712, "learning_rate": 2.1851903725570466e-07, "loss": 2.6016, "step": 322580 }, { "epoch": 13.8949907395443, "learning_rate": 2.1847055528022584e-07, "loss": 2.5534, "step": 322600 }, { "epoch": 13.895852177283887, "learning_rate": 2.1842207330474689e-07, "loss": 2.641, "step": 322620 }, { "epoch": 13.896713615023474, "learning_rate": 2.1837359132926804e-07, "loss": 2.6462, "step": 322640 }, { "epoch": 13.897575052763061, "learning_rate": 2.183251093537891e-07, "loss": 2.413, "step": 322660 }, { "epoch": 13.898436490502649, "learning_rate": 2.1827662737831026e-07, "loss": 2.4732, "step": 322680 }, { "epoch": 13.899297928242236, "learning_rate": 2.182281454028313e-07, "loss": 2.6885, "step": 322700 }, { "epoch": 13.900159365981823, "learning_rate": 2.1817966342735248e-07, "loss": 2.5188, "step": 322720 }, { "epoch": 13.90102080372141, "learning_rate": 2.1813118145187355e-07, "loss": 2.5134, "step": 322740 }, { "epoch": 13.901882241461, "learning_rate": 2.1808269947639468e-07, "loss": 2.5268, "step": 322760 }, { "epoch": 13.902743679200587, "learning_rate": 2.1803421750091575e-07, "loss": 2.5177, "step": 322780 }, { "epoch": 13.903605116940174, "learning_rate": 2.1798573552543693e-07, "loss": 2.7694, "step": 322800 }, { "epoch": 13.904466554679761, "learning_rate": 2.1793725354995797e-07, "loss": 2.463, "step": 322820 }, { "epoch": 13.905327992419348, "learning_rate": 2.1788877157447912e-07, "loss": 2.6157, "step": 322840 }, { "epoch": 13.906189430158935, "learning_rate": 2.178402895990002e-07, "loss": 2.657, "step": 322860 }, { "epoch": 13.907050867898523, "learning_rate": 2.1779180762352134e-07, "loss": 2.5717, "step": 322880 }, { "epoch": 13.90791230563811, "learning_rate": 2.1774332564804242e-07, "loss": 2.6992, "step": 322900 }, { "epoch": 13.908773743377697, "learning_rate": 2.1769484367256346e-07, "loss": 2.6036, "step": 322920 }, { "epoch": 13.909635181117284, "learning_rate": 2.176463616970846e-07, "loss": 2.5744, "step": 322940 }, { "epoch": 13.910496618856872, "learning_rate": 2.1759787972160566e-07, "loss": 2.5247, "step": 322960 }, { "epoch": 13.911358056596459, "learning_rate": 2.1754939774612683e-07, "loss": 2.6865, "step": 322980 }, { "epoch": 13.912219494336046, "learning_rate": 2.1750091577064788e-07, "loss": 2.5961, "step": 323000 }, { "epoch": 13.913080932075633, "learning_rate": 2.1745243379516903e-07, "loss": 2.5787, "step": 323020 }, { "epoch": 13.91394236981522, "learning_rate": 2.174039518196901e-07, "loss": 2.6522, "step": 323040 }, { "epoch": 13.91480380755481, "learning_rate": 2.1735546984421125e-07, "loss": 2.4898, "step": 323060 }, { "epoch": 13.915665245294397, "learning_rate": 2.173069878687323e-07, "loss": 2.6025, "step": 323080 }, { "epoch": 13.916526683033984, "learning_rate": 2.1725850589325347e-07, "loss": 2.5855, "step": 323100 }, { "epoch": 13.917388120773571, "learning_rate": 2.1721002391777455e-07, "loss": 2.7232, "step": 323120 }, { "epoch": 13.918249558513159, "learning_rate": 2.1716154194229567e-07, "loss": 2.6064, "step": 323140 }, { "epoch": 13.919110996252746, "learning_rate": 2.1711305996681674e-07, "loss": 2.5532, "step": 323160 }, { "epoch": 13.919972433992333, "learning_rate": 2.1706457799133792e-07, "loss": 2.5995, "step": 323180 }, { "epoch": 13.92083387173192, "learning_rate": 2.1701609601585896e-07, "loss": 2.7889, "step": 323200 }, { "epoch": 13.921695309471508, "learning_rate": 2.1696761404038012e-07, "loss": 2.5003, "step": 323220 }, { "epoch": 13.922556747211095, "learning_rate": 2.169191320649012e-07, "loss": 2.6985, "step": 323240 }, { "epoch": 13.923418184950682, "learning_rate": 2.1687065008942234e-07, "loss": 2.6339, "step": 323260 }, { "epoch": 13.92427962269027, "learning_rate": 2.1682216811394338e-07, "loss": 2.5724, "step": 323280 }, { "epoch": 13.925141060429857, "learning_rate": 2.1677368613846456e-07, "loss": 2.7949, "step": 323300 }, { "epoch": 13.926002498169446, "learning_rate": 2.1672520416298558e-07, "loss": 2.4169, "step": 323320 }, { "epoch": 13.926863935909033, "learning_rate": 2.1667672218750676e-07, "loss": 2.6352, "step": 323340 }, { "epoch": 13.92772537364862, "learning_rate": 2.1662824021202783e-07, "loss": 2.6807, "step": 323360 }, { "epoch": 13.928586811388207, "learning_rate": 2.1657975823654898e-07, "loss": 2.6023, "step": 323380 }, { "epoch": 13.929448249127795, "learning_rate": 2.1653127626107002e-07, "loss": 2.5101, "step": 323400 }, { "epoch": 13.930309686867382, "learning_rate": 2.164827942855911e-07, "loss": 2.5595, "step": 323420 }, { "epoch": 13.93117112460697, "learning_rate": 2.1643431231011227e-07, "loss": 2.6221, "step": 323440 }, { "epoch": 13.932032562346556, "learning_rate": 2.1638583033463332e-07, "loss": 2.5337, "step": 323460 }, { "epoch": 13.932894000086144, "learning_rate": 2.1633734835915447e-07, "loss": 2.5301, "step": 323480 }, { "epoch": 13.933755437825731, "learning_rate": 2.1628886638367554e-07, "loss": 2.6193, "step": 323500 }, { "epoch": 13.934616875565318, "learning_rate": 2.162403844081967e-07, "loss": 2.5914, "step": 323520 }, { "epoch": 13.935478313304905, "learning_rate": 2.1619190243271774e-07, "loss": 2.7129, "step": 323540 }, { "epoch": 13.936339751044493, "learning_rate": 2.1614342045723891e-07, "loss": 2.6284, "step": 323560 }, { "epoch": 13.93720118878408, "learning_rate": 2.1609493848175996e-07, "loss": 2.6072, "step": 323580 }, { "epoch": 13.938062626523667, "learning_rate": 2.160464565062811e-07, "loss": 2.5625, "step": 323600 }, { "epoch": 13.938924064263256, "learning_rate": 2.1599797453080218e-07, "loss": 2.595, "step": 323620 }, { "epoch": 13.939785502002843, "learning_rate": 2.1594949255532333e-07, "loss": 2.619, "step": 323640 }, { "epoch": 13.94064693974243, "learning_rate": 2.1590101057984438e-07, "loss": 2.7397, "step": 323660 }, { "epoch": 13.941508377482018, "learning_rate": 2.1585252860436555e-07, "loss": 2.6035, "step": 323680 }, { "epoch": 13.942369815221605, "learning_rate": 2.158040466288866e-07, "loss": 2.5558, "step": 323700 }, { "epoch": 13.943231252961192, "learning_rate": 2.1575556465340775e-07, "loss": 2.676, "step": 323720 }, { "epoch": 13.94409269070078, "learning_rate": 2.1570708267792882e-07, "loss": 2.6293, "step": 323740 }, { "epoch": 13.944954128440367, "learning_rate": 2.1565860070244997e-07, "loss": 2.5457, "step": 323760 }, { "epoch": 13.945815566179954, "learning_rate": 2.1561011872697102e-07, "loss": 2.387, "step": 323780 }, { "epoch": 13.946677003919541, "learning_rate": 2.155616367514922e-07, "loss": 2.7181, "step": 323800 }, { "epoch": 13.947538441659129, "learning_rate": 2.1551315477601327e-07, "loss": 2.5838, "step": 323820 }, { "epoch": 13.948399879398716, "learning_rate": 2.154646728005344e-07, "loss": 2.6781, "step": 323840 }, { "epoch": 13.949261317138303, "learning_rate": 2.1541619082505546e-07, "loss": 2.6391, "step": 323860 }, { "epoch": 13.950122754877892, "learning_rate": 2.1536770884957664e-07, "loss": 2.6496, "step": 323880 }, { "epoch": 13.95098419261748, "learning_rate": 2.1531922687409768e-07, "loss": 2.5362, "step": 323900 }, { "epoch": 13.951845630357067, "learning_rate": 2.1527074489861884e-07, "loss": 2.5785, "step": 323920 }, { "epoch": 13.952707068096654, "learning_rate": 2.152222629231399e-07, "loss": 2.7744, "step": 323940 }, { "epoch": 13.953568505836241, "learning_rate": 2.1517378094766095e-07, "loss": 2.6227, "step": 323960 }, { "epoch": 13.954429943575828, "learning_rate": 2.151252989721821e-07, "loss": 2.6485, "step": 323980 }, { "epoch": 13.955291381315416, "learning_rate": 2.1507681699670317e-07, "loss": 2.6624, "step": 324000 }, { "epoch": 13.956152819055003, "learning_rate": 2.1502833502122433e-07, "loss": 2.5742, "step": 324020 }, { "epoch": 13.95701425679459, "learning_rate": 2.1497985304574537e-07, "loss": 2.563, "step": 324040 }, { "epoch": 13.957875694534177, "learning_rate": 2.1493137107026655e-07, "loss": 2.5564, "step": 324060 }, { "epoch": 13.958737132273765, "learning_rate": 2.1488288909478762e-07, "loss": 2.7031, "step": 324080 }, { "epoch": 13.959598570013352, "learning_rate": 2.1483440711930874e-07, "loss": 2.5054, "step": 324100 }, { "epoch": 13.960460007752939, "learning_rate": 2.1478592514382982e-07, "loss": 2.5471, "step": 324120 }, { "epoch": 13.961321445492526, "learning_rate": 2.14737443168351e-07, "loss": 2.6381, "step": 324140 }, { "epoch": 13.962182883232114, "learning_rate": 2.1468896119287204e-07, "loss": 2.6618, "step": 324160 }, { "epoch": 13.963044320971703, "learning_rate": 2.146404792173932e-07, "loss": 2.4785, "step": 324180 }, { "epoch": 13.96390575871129, "learning_rate": 2.1459199724191426e-07, "loss": 2.7402, "step": 324200 }, { "epoch": 13.964767196450877, "learning_rate": 2.1454351526643538e-07, "loss": 2.666, "step": 324220 }, { "epoch": 13.965628634190464, "learning_rate": 2.1449503329095646e-07, "loss": 2.6239, "step": 324240 }, { "epoch": 13.966490071930052, "learning_rate": 2.144465513154776e-07, "loss": 2.6809, "step": 324260 }, { "epoch": 13.967351509669639, "learning_rate": 2.1439806933999868e-07, "loss": 2.6778, "step": 324280 }, { "epoch": 13.968212947409226, "learning_rate": 2.1434958736451983e-07, "loss": 2.7316, "step": 324300 }, { "epoch": 13.969074385148813, "learning_rate": 2.143011053890409e-07, "loss": 2.4663, "step": 324320 }, { "epoch": 13.9699358228884, "learning_rate": 2.1425262341356205e-07, "loss": 2.3857, "step": 324340 }, { "epoch": 13.970797260627988, "learning_rate": 2.142041414380831e-07, "loss": 2.5109, "step": 324360 }, { "epoch": 13.971658698367575, "learning_rate": 2.1415565946260427e-07, "loss": 2.519, "step": 324380 }, { "epoch": 13.972520136107162, "learning_rate": 2.1410717748712532e-07, "loss": 2.604, "step": 324400 }, { "epoch": 13.97338157384675, "learning_rate": 2.1405869551164647e-07, "loss": 2.7339, "step": 324420 }, { "epoch": 13.974243011586339, "learning_rate": 2.1401021353616754e-07, "loss": 2.6048, "step": 324440 }, { "epoch": 13.975104449325926, "learning_rate": 2.139617315606887e-07, "loss": 2.5451, "step": 324460 }, { "epoch": 13.975965887065513, "learning_rate": 2.1391324958520974e-07, "loss": 2.5748, "step": 324480 }, { "epoch": 13.9768273248051, "learning_rate": 2.1386476760973078e-07, "loss": 2.7825, "step": 324500 }, { "epoch": 13.977688762544688, "learning_rate": 2.1381628563425199e-07, "loss": 2.5972, "step": 324520 }, { "epoch": 13.978550200284275, "learning_rate": 2.1376780365877303e-07, "loss": 2.7476, "step": 324540 }, { "epoch": 13.979411638023862, "learning_rate": 2.1371932168329418e-07, "loss": 2.6723, "step": 324560 }, { "epoch": 13.98027307576345, "learning_rate": 2.1367083970781525e-07, "loss": 2.5598, "step": 324580 }, { "epoch": 13.981134513503036, "learning_rate": 2.136223577323364e-07, "loss": 2.5914, "step": 324600 }, { "epoch": 13.981995951242624, "learning_rate": 2.1357387575685745e-07, "loss": 2.694, "step": 324620 }, { "epoch": 13.982857388982211, "learning_rate": 2.1352539378137863e-07, "loss": 2.6578, "step": 324640 }, { "epoch": 13.983718826721798, "learning_rate": 2.1347691180589967e-07, "loss": 2.6574, "step": 324660 }, { "epoch": 13.984580264461385, "learning_rate": 2.1342842983042085e-07, "loss": 2.6636, "step": 324680 }, { "epoch": 13.985441702200973, "learning_rate": 2.133799478549419e-07, "loss": 2.5429, "step": 324700 }, { "epoch": 13.98630313994056, "learning_rate": 2.1333146587946305e-07, "loss": 2.6734, "step": 324720 }, { "epoch": 13.987164577680149, "learning_rate": 2.132829839039841e-07, "loss": 2.7199, "step": 324740 }, { "epoch": 13.988026015419736, "learning_rate": 2.1323450192850527e-07, "loss": 2.7304, "step": 324760 }, { "epoch": 13.988887453159323, "learning_rate": 2.1318601995302634e-07, "loss": 2.6436, "step": 324780 }, { "epoch": 13.98974889089891, "learning_rate": 2.1313753797754746e-07, "loss": 2.5286, "step": 324800 }, { "epoch": 13.990610328638498, "learning_rate": 2.1308905600206854e-07, "loss": 2.6289, "step": 324820 }, { "epoch": 13.991471766378085, "learning_rate": 2.1304057402658969e-07, "loss": 2.6956, "step": 324840 }, { "epoch": 13.992333204117672, "learning_rate": 2.1299209205111073e-07, "loss": 2.6838, "step": 324860 }, { "epoch": 13.99319464185726, "learning_rate": 2.129436100756319e-07, "loss": 2.5407, "step": 324880 }, { "epoch": 13.994056079596847, "learning_rate": 2.1289512810015298e-07, "loss": 2.6206, "step": 324900 }, { "epoch": 13.994917517336434, "learning_rate": 2.128466461246741e-07, "loss": 2.574, "step": 324920 }, { "epoch": 13.995778955076021, "learning_rate": 2.1279816414919518e-07, "loss": 2.553, "step": 324940 }, { "epoch": 13.996640392815609, "learning_rate": 2.1274968217371635e-07, "loss": 2.662, "step": 324960 }, { "epoch": 13.997501830555196, "learning_rate": 2.127012001982374e-07, "loss": 2.6917, "step": 324980 }, { "epoch": 13.998363268294783, "learning_rate": 2.1265271822275855e-07, "loss": 2.5112, "step": 325000 }, { "epoch": 13.999224706034372, "learning_rate": 2.1260423624727962e-07, "loss": 2.6875, "step": 325020 } ], "logging_steps": 20, "max_steps": 371472, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 10000.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.5558906270972314e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }