{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 92868, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.307188697936857e-05, "learning_rate": 5e-09, "loss": 15.3469, "step": 1 }, { "epoch": 0.0008614377395873713, "learning_rate": 1e-07, "loss": 13.5266, "step": 20 }, { "epoch": 0.0017228754791747427, "learning_rate": 2e-07, "loss": 12.4249, "step": 40 }, { "epoch": 0.002584313218762114, "learning_rate": 3e-07, "loss": 11.9215, "step": 60 }, { "epoch": 0.0034457509583494853, "learning_rate": 4e-07, "loss": 10.5898, "step": 80 }, { "epoch": 0.004307188697936857, "learning_rate": 5e-07, "loss": 9.9173, "step": 100 }, { "epoch": 0.005168626437524228, "learning_rate": 6e-07, "loss": 8.5743, "step": 120 }, { "epoch": 0.0060300641771116, "learning_rate": 7e-07, "loss": 7.9646, "step": 140 }, { "epoch": 0.006891501916698971, "learning_rate": 8e-07, "loss": 7.5993, "step": 160 }, { "epoch": 0.007752939656286342, "learning_rate": 9e-07, "loss": 7.2787, "step": 180 }, { "epoch": 0.008614377395873713, "learning_rate": 1e-06, "loss": 7.1685, "step": 200 }, { "epoch": 0.009475815135461085, "learning_rate": 9.99951518024521e-07, "loss": 6.7489, "step": 220 }, { "epoch": 0.010337252875048455, "learning_rate": 9.999030360490423e-07, "loss": 7.4429, "step": 240 }, { "epoch": 0.011198690614635827, "learning_rate": 9.998545540735632e-07, "loss": 7.4136, "step": 260 }, { "epoch": 0.0120601283542232, "learning_rate": 9.998060720980845e-07, "loss": 6.8118, "step": 280 }, { "epoch": 0.01292156609381057, "learning_rate": 9.997575901226053e-07, "loss": 7.0537, "step": 300 }, { "epoch": 0.013783003833397941, "learning_rate": 9.997091081471266e-07, "loss": 6.7556, "step": 320 }, { "epoch": 0.014644441572985313, "learning_rate": 9.996606261716477e-07, "loss": 7.0601, "step": 340 }, { "epoch": 0.015505879312572683, "learning_rate": 9.996121441961687e-07, "loss": 6.8547, "step": 360 }, { "epoch": 0.016367317052160057, "learning_rate": 9.9956366222069e-07, "loss": 6.4108, "step": 380 }, { "epoch": 0.017228754791747427, "learning_rate": 9.995151802452109e-07, "loss": 6.5316, "step": 400 }, { "epoch": 0.018090192531334797, "learning_rate": 9.994666982697322e-07, "loss": 6.4739, "step": 420 }, { "epoch": 0.01895163027092217, "learning_rate": 9.994182162942532e-07, "loss": 6.8201, "step": 440 }, { "epoch": 0.01981306801050954, "learning_rate": 9.993697343187743e-07, "loss": 6.4731, "step": 460 }, { "epoch": 0.02067450575009691, "learning_rate": 9.993212523432956e-07, "loss": 6.5669, "step": 480 }, { "epoch": 0.021535943489684285, "learning_rate": 9.992727703678165e-07, "loss": 6.6863, "step": 500 }, { "epoch": 0.022397381229271655, "learning_rate": 9.992242883923377e-07, "loss": 6.7291, "step": 520 }, { "epoch": 0.023258818968859025, "learning_rate": 9.991758064168586e-07, "loss": 6.2971, "step": 540 }, { "epoch": 0.0241202567084464, "learning_rate": 9.991273244413799e-07, "loss": 6.1858, "step": 560 }, { "epoch": 0.02498169444803377, "learning_rate": 9.99078842465901e-07, "loss": 6.5758, "step": 580 }, { "epoch": 0.02584313218762114, "learning_rate": 9.99030360490422e-07, "loss": 6.186, "step": 600 }, { "epoch": 0.026704569927208512, "learning_rate": 9.989818785149433e-07, "loss": 6.4182, "step": 620 }, { "epoch": 0.027566007666795882, "learning_rate": 9.989333965394644e-07, "loss": 6.263, "step": 640 }, { "epoch": 0.028427445406383253, "learning_rate": 9.988849145639854e-07, "loss": 6.0137, "step": 660 }, { "epoch": 0.029288883145970626, "learning_rate": 9.988364325885065e-07, "loss": 6.2632, "step": 680 }, { "epoch": 0.030150320885557996, "learning_rate": 9.987879506130276e-07, "loss": 6.3874, "step": 700 }, { "epoch": 0.031011758625145366, "learning_rate": 9.987394686375489e-07, "loss": 6.338, "step": 720 }, { "epoch": 0.03187319636473274, "learning_rate": 9.986909866620697e-07, "loss": 6.4615, "step": 740 }, { "epoch": 0.032734634104320114, "learning_rate": 9.98642504686591e-07, "loss": 6.3216, "step": 760 }, { "epoch": 0.033596071843907484, "learning_rate": 9.985940227111119e-07, "loss": 6.3571, "step": 780 }, { "epoch": 0.034457509583494854, "learning_rate": 9.985455407356332e-07, "loss": 6.2426, "step": 800 }, { "epoch": 0.035318947323082224, "learning_rate": 9.984970587601542e-07, "loss": 6.2687, "step": 820 }, { "epoch": 0.036180385062669594, "learning_rate": 9.984485767846753e-07, "loss": 6.2717, "step": 840 }, { "epoch": 0.037041822802256964, "learning_rate": 9.984000948091966e-07, "loss": 6.1245, "step": 860 }, { "epoch": 0.03790326054184434, "learning_rate": 9.983516128337174e-07, "loss": 6.281, "step": 880 }, { "epoch": 0.03876469828143171, "learning_rate": 9.983031308582387e-07, "loss": 6.3632, "step": 900 }, { "epoch": 0.03962613602101908, "learning_rate": 9.982546488827596e-07, "loss": 6.3535, "step": 920 }, { "epoch": 0.04048757376060645, "learning_rate": 9.982061669072809e-07, "loss": 6.0799, "step": 940 }, { "epoch": 0.04134901150019382, "learning_rate": 9.98157684931802e-07, "loss": 6.4344, "step": 960 }, { "epoch": 0.04221044923978119, "learning_rate": 9.98109202956323e-07, "loss": 6.2517, "step": 980 }, { "epoch": 0.04307188697936857, "learning_rate": 9.980607209808443e-07, "loss": 5.9678, "step": 1000 }, { "epoch": 0.04393332471895594, "learning_rate": 9.980122390053654e-07, "loss": 6.0589, "step": 1020 }, { "epoch": 0.04479476245854331, "learning_rate": 9.979637570298864e-07, "loss": 6.3632, "step": 1040 }, { "epoch": 0.04565620019813068, "learning_rate": 9.979152750544075e-07, "loss": 6.265, "step": 1060 }, { "epoch": 0.04651763793771805, "learning_rate": 9.978667930789286e-07, "loss": 6.0993, "step": 1080 }, { "epoch": 0.04737907567730542, "learning_rate": 9.978183111034499e-07, "loss": 6.1147, "step": 1100 }, { "epoch": 0.0482405134168928, "learning_rate": 9.977698291279707e-07, "loss": 6.1485, "step": 1120 }, { "epoch": 0.04910195115648017, "learning_rate": 9.97721347152492e-07, "loss": 5.9857, "step": 1140 }, { "epoch": 0.04996338889606754, "learning_rate": 9.976728651770129e-07, "loss": 6.0165, "step": 1160 }, { "epoch": 0.05082482663565491, "learning_rate": 9.976243832015342e-07, "loss": 6.176, "step": 1180 }, { "epoch": 0.05168626437524228, "learning_rate": 9.975759012260552e-07, "loss": 6.2797, "step": 1200 }, { "epoch": 0.05254770211482965, "learning_rate": 9.975274192505763e-07, "loss": 6.1033, "step": 1220 }, { "epoch": 0.053409139854417025, "learning_rate": 9.974789372750976e-07, "loss": 5.9836, "step": 1240 }, { "epoch": 0.054270577594004395, "learning_rate": 9.974304552996186e-07, "loss": 6.2921, "step": 1260 }, { "epoch": 0.055132015333591765, "learning_rate": 9.973819733241397e-07, "loss": 6.3446, "step": 1280 }, { "epoch": 0.055993453073179135, "learning_rate": 9.973334913486606e-07, "loss": 6.1988, "step": 1300 }, { "epoch": 0.056854890812766505, "learning_rate": 9.972850093731819e-07, "loss": 6.1577, "step": 1320 }, { "epoch": 0.057716328552353875, "learning_rate": 9.97236527397703e-07, "loss": 5.9894, "step": 1340 }, { "epoch": 0.05857776629194125, "learning_rate": 9.97188045422224e-07, "loss": 6.2019, "step": 1360 }, { "epoch": 0.05943920403152862, "learning_rate": 9.971395634467453e-07, "loss": 5.8516, "step": 1380 }, { "epoch": 0.06030064177111599, "learning_rate": 9.970910814712664e-07, "loss": 6.0954, "step": 1400 }, { "epoch": 0.06116207951070336, "learning_rate": 9.970425994957874e-07, "loss": 6.0892, "step": 1420 }, { "epoch": 0.06202351725029073, "learning_rate": 9.969941175203085e-07, "loss": 5.8398, "step": 1440 }, { "epoch": 0.0628849549898781, "learning_rate": 9.969456355448296e-07, "loss": 6.0239, "step": 1460 }, { "epoch": 0.06374639272946547, "learning_rate": 9.968971535693509e-07, "loss": 6.0563, "step": 1480 }, { "epoch": 0.06460783046905284, "learning_rate": 9.968486715938717e-07, "loss": 6.1168, "step": 1500 }, { "epoch": 0.06546926820864023, "learning_rate": 9.96800189618393e-07, "loss": 5.8443, "step": 1520 }, { "epoch": 0.0663307059482276, "learning_rate": 9.967517076429139e-07, "loss": 5.8785, "step": 1540 }, { "epoch": 0.06719214368781497, "learning_rate": 9.967032256674351e-07, "loss": 6.0316, "step": 1560 }, { "epoch": 0.06805358142740234, "learning_rate": 9.966547436919562e-07, "loss": 5.9318, "step": 1580 }, { "epoch": 0.06891501916698971, "learning_rate": 9.966062617164773e-07, "loss": 6.1837, "step": 1600 }, { "epoch": 0.06977645690657708, "learning_rate": 9.965577797409986e-07, "loss": 6.0558, "step": 1620 }, { "epoch": 0.07063789464616445, "learning_rate": 9.965092977655196e-07, "loss": 5.9914, "step": 1640 }, { "epoch": 0.07149933238575182, "learning_rate": 9.964608157900407e-07, "loss": 6.1015, "step": 1660 }, { "epoch": 0.07236077012533919, "learning_rate": 9.964123338145618e-07, "loss": 5.948, "step": 1680 }, { "epoch": 0.07322220786492656, "learning_rate": 9.963638518390829e-07, "loss": 5.8682, "step": 1700 }, { "epoch": 0.07408364560451393, "learning_rate": 9.963153698636041e-07, "loss": 6.0684, "step": 1720 }, { "epoch": 0.0749450833441013, "learning_rate": 9.96266887888125e-07, "loss": 5.7916, "step": 1740 }, { "epoch": 0.07580652108368868, "learning_rate": 9.962184059126463e-07, "loss": 6.0412, "step": 1760 }, { "epoch": 0.07666795882327605, "learning_rate": 9.961699239371674e-07, "loss": 6.0506, "step": 1780 }, { "epoch": 0.07752939656286342, "learning_rate": 9.961214419616884e-07, "loss": 5.8769, "step": 1800 }, { "epoch": 0.0783908343024508, "learning_rate": 9.960729599862095e-07, "loss": 5.7286, "step": 1820 }, { "epoch": 0.07925227204203816, "learning_rate": 9.960244780107306e-07, "loss": 5.8717, "step": 1840 }, { "epoch": 0.08011370978162553, "learning_rate": 9.959759960352519e-07, "loss": 5.7639, "step": 1860 }, { "epoch": 0.0809751475212129, "learning_rate": 9.959275140597727e-07, "loss": 6.1518, "step": 1880 }, { "epoch": 0.08183658526080027, "learning_rate": 9.95879032084294e-07, "loss": 5.8542, "step": 1900 }, { "epoch": 0.08269802300038764, "learning_rate": 9.95830550108815e-07, "loss": 5.8854, "step": 1920 }, { "epoch": 0.08355946073997501, "learning_rate": 9.957820681333361e-07, "loss": 5.6142, "step": 1940 }, { "epoch": 0.08442089847956238, "learning_rate": 9.957335861578572e-07, "loss": 5.9447, "step": 1960 }, { "epoch": 0.08528233621914975, "learning_rate": 9.956851041823783e-07, "loss": 5.7814, "step": 1980 }, { "epoch": 0.08614377395873714, "learning_rate": 9.956366222068996e-07, "loss": 5.7888, "step": 2000 }, { "epoch": 0.08700521169832451, "learning_rate": 9.955881402314206e-07, "loss": 5.6126, "step": 2020 }, { "epoch": 0.08786664943791188, "learning_rate": 9.955396582559417e-07, "loss": 5.6853, "step": 2040 }, { "epoch": 0.08872808717749925, "learning_rate": 9.954911762804628e-07, "loss": 5.7188, "step": 2060 }, { "epoch": 0.08958952491708662, "learning_rate": 9.954426943049839e-07, "loss": 5.8168, "step": 2080 }, { "epoch": 0.09045096265667399, "learning_rate": 9.953942123295051e-07, "loss": 5.5472, "step": 2100 }, { "epoch": 0.09131240039626136, "learning_rate": 9.95345730354026e-07, "loss": 5.5835, "step": 2120 }, { "epoch": 0.09217383813584873, "learning_rate": 9.952972483785473e-07, "loss": 5.9346, "step": 2140 }, { "epoch": 0.0930352758754361, "learning_rate": 9.952487664030683e-07, "loss": 5.9372, "step": 2160 }, { "epoch": 0.09389671361502347, "learning_rate": 9.952002844275894e-07, "loss": 6.0861, "step": 2180 }, { "epoch": 0.09475815135461084, "learning_rate": 9.951518024521105e-07, "loss": 5.7357, "step": 2200 }, { "epoch": 0.09561958909419822, "learning_rate": 9.951033204766316e-07, "loss": 5.9574, "step": 2220 }, { "epoch": 0.0964810268337856, "learning_rate": 9.950548385011528e-07, "loss": 5.6279, "step": 2240 }, { "epoch": 0.09734246457337296, "learning_rate": 9.95006356525674e-07, "loss": 5.8369, "step": 2260 }, { "epoch": 0.09820390231296033, "learning_rate": 9.94957874550195e-07, "loss": 5.7718, "step": 2280 }, { "epoch": 0.0990653400525477, "learning_rate": 9.94909392574716e-07, "loss": 5.8574, "step": 2300 }, { "epoch": 0.09992677779213507, "learning_rate": 9.948609105992371e-07, "loss": 5.5917, "step": 2320 }, { "epoch": 0.10078821553172244, "learning_rate": 9.948124286237584e-07, "loss": 5.573, "step": 2340 }, { "epoch": 0.10164965327130981, "learning_rate": 9.947639466482793e-07, "loss": 5.5742, "step": 2360 }, { "epoch": 0.10251109101089718, "learning_rate": 9.947154646728006e-07, "loss": 5.8785, "step": 2380 }, { "epoch": 0.10337252875048455, "learning_rate": 9.946669826973216e-07, "loss": 5.9297, "step": 2400 }, { "epoch": 0.10423396649007192, "learning_rate": 9.946185007218427e-07, "loss": 5.5759, "step": 2420 }, { "epoch": 0.1050954042296593, "learning_rate": 9.945700187463638e-07, "loss": 5.3543, "step": 2440 }, { "epoch": 0.10595684196924668, "learning_rate": 9.945215367708848e-07, "loss": 5.8447, "step": 2460 }, { "epoch": 0.10681827970883405, "learning_rate": 9.944730547954061e-07, "loss": 5.7428, "step": 2480 }, { "epoch": 0.10767971744842142, "learning_rate": 9.94424572819927e-07, "loss": 5.4967, "step": 2500 }, { "epoch": 0.10854115518800879, "learning_rate": 9.943760908444483e-07, "loss": 5.567, "step": 2520 }, { "epoch": 0.10940259292759616, "learning_rate": 9.943276088689693e-07, "loss": 5.8644, "step": 2540 }, { "epoch": 0.11026403066718353, "learning_rate": 9.942791268934904e-07, "loss": 5.4059, "step": 2560 }, { "epoch": 0.1111254684067709, "learning_rate": 9.942306449180115e-07, "loss": 5.6175, "step": 2580 }, { "epoch": 0.11198690614635827, "learning_rate": 9.941821629425326e-07, "loss": 5.5652, "step": 2600 }, { "epoch": 0.11284834388594564, "learning_rate": 9.941336809670538e-07, "loss": 5.5506, "step": 2620 }, { "epoch": 0.11370978162553301, "learning_rate": 9.94085198991575e-07, "loss": 5.5353, "step": 2640 }, { "epoch": 0.11457121936512038, "learning_rate": 9.94036717016096e-07, "loss": 5.678, "step": 2660 }, { "epoch": 0.11543265710470775, "learning_rate": 9.93988235040617e-07, "loss": 5.5166, "step": 2680 }, { "epoch": 0.11629409484429513, "learning_rate": 9.939397530651381e-07, "loss": 5.5444, "step": 2700 }, { "epoch": 0.1171555325838825, "learning_rate": 9.938912710896594e-07, "loss": 5.395, "step": 2720 }, { "epoch": 0.11801697032346987, "learning_rate": 9.938427891141803e-07, "loss": 5.7822, "step": 2740 }, { "epoch": 0.11887840806305724, "learning_rate": 9.937943071387016e-07, "loss": 5.5044, "step": 2760 }, { "epoch": 0.11973984580264461, "learning_rate": 9.937458251632226e-07, "loss": 5.309, "step": 2780 }, { "epoch": 0.12060128354223199, "learning_rate": 9.936973431877437e-07, "loss": 5.4511, "step": 2800 }, { "epoch": 0.12146272128181936, "learning_rate": 9.936488612122648e-07, "loss": 5.6822, "step": 2820 }, { "epoch": 0.12232415902140673, "learning_rate": 9.936003792367858e-07, "loss": 5.7788, "step": 2840 }, { "epoch": 0.1231855967609941, "learning_rate": 9.935518972613071e-07, "loss": 5.3378, "step": 2860 }, { "epoch": 0.12404703450058147, "learning_rate": 9.935034152858282e-07, "loss": 5.6238, "step": 2880 }, { "epoch": 0.12490847224016884, "learning_rate": 9.934549333103493e-07, "loss": 5.3651, "step": 2900 }, { "epoch": 0.1257699099797562, "learning_rate": 9.934064513348703e-07, "loss": 5.6061, "step": 2920 }, { "epoch": 0.1266313477193436, "learning_rate": 9.933579693593914e-07, "loss": 5.6688, "step": 2940 }, { "epoch": 0.12749278545893095, "learning_rate": 9.933094873839125e-07, "loss": 5.5402, "step": 2960 }, { "epoch": 0.12835422319851833, "learning_rate": 9.932610054084336e-07, "loss": 5.3776, "step": 2980 }, { "epoch": 0.1292156609381057, "learning_rate": 9.932125234329548e-07, "loss": 5.4292, "step": 3000 }, { "epoch": 0.13007709867769307, "learning_rate": 9.93164041457476e-07, "loss": 5.614, "step": 3020 }, { "epoch": 0.13093853641728045, "learning_rate": 9.93115559481997e-07, "loss": 5.5546, "step": 3040 }, { "epoch": 0.1317999741568678, "learning_rate": 9.930670775065183e-07, "loss": 5.5784, "step": 3060 }, { "epoch": 0.1326614118964552, "learning_rate": 9.930185955310391e-07, "loss": 4.989, "step": 3080 }, { "epoch": 0.13352284963604255, "learning_rate": 9.929701135555604e-07, "loss": 5.5975, "step": 3100 }, { "epoch": 0.13438428737562993, "learning_rate": 9.929216315800813e-07, "loss": 5.2873, "step": 3120 }, { "epoch": 0.1352457251152173, "learning_rate": 9.928731496046025e-07, "loss": 5.4276, "step": 3140 }, { "epoch": 0.13610716285480468, "learning_rate": 9.928246676291236e-07, "loss": 5.4971, "step": 3160 }, { "epoch": 0.13696860059439203, "learning_rate": 9.927761856536447e-07, "loss": 5.6698, "step": 3180 }, { "epoch": 0.13783003833397942, "learning_rate": 9.927277036781658e-07, "loss": 5.7259, "step": 3200 }, { "epoch": 0.13869147607356677, "learning_rate": 9.926792217026868e-07, "loss": 5.5365, "step": 3220 }, { "epoch": 0.13955291381315416, "learning_rate": 9.926307397272081e-07, "loss": 5.5269, "step": 3240 }, { "epoch": 0.1404143515527415, "learning_rate": 9.925822577517292e-07, "loss": 5.4957, "step": 3260 }, { "epoch": 0.1412757892923289, "learning_rate": 9.925337757762503e-07, "loss": 5.15, "step": 3280 }, { "epoch": 0.14213722703191628, "learning_rate": 9.924852938007715e-07, "loss": 5.3415, "step": 3300 }, { "epoch": 0.14299866477150364, "learning_rate": 9.924368118252924e-07, "loss": 5.446, "step": 3320 }, { "epoch": 0.14386010251109102, "learning_rate": 9.923883298498137e-07, "loss": 5.4782, "step": 3340 }, { "epoch": 0.14472154025067838, "learning_rate": 9.923398478743345e-07, "loss": 5.7166, "step": 3360 }, { "epoch": 0.14558297799026576, "learning_rate": 9.922913658988558e-07, "loss": 5.2477, "step": 3380 }, { "epoch": 0.14644441572985312, "learning_rate": 9.92242883923377e-07, "loss": 5.5064, "step": 3400 }, { "epoch": 0.1473058534694405, "learning_rate": 9.92194401947898e-07, "loss": 5.1118, "step": 3420 }, { "epoch": 0.14816729120902786, "learning_rate": 9.921459199724193e-07, "loss": 5.6227, "step": 3440 }, { "epoch": 0.14902872894861524, "learning_rate": 9.920974379969401e-07, "loss": 5.2942, "step": 3460 }, { "epoch": 0.1498901666882026, "learning_rate": 9.920489560214614e-07, "loss": 5.5577, "step": 3480 }, { "epoch": 0.15075160442778998, "learning_rate": 9.920004740459823e-07, "loss": 5.2978, "step": 3500 }, { "epoch": 0.15161304216737737, "learning_rate": 9.919519920705035e-07, "loss": 5.4462, "step": 3520 }, { "epoch": 0.15247447990696472, "learning_rate": 9.919035100950246e-07, "loss": 5.322, "step": 3540 }, { "epoch": 0.1533359176465521, "learning_rate": 9.918550281195457e-07, "loss": 5.1661, "step": 3560 }, { "epoch": 0.15419735538613946, "learning_rate": 9.918065461440668e-07, "loss": 5.4463, "step": 3580 }, { "epoch": 0.15505879312572685, "learning_rate": 9.917580641685878e-07, "loss": 5.2187, "step": 3600 }, { "epoch": 0.1559202308653142, "learning_rate": 9.917095821931091e-07, "loss": 5.4907, "step": 3620 }, { "epoch": 0.1567816686049016, "learning_rate": 9.916611002176302e-07, "loss": 5.5288, "step": 3640 }, { "epoch": 0.15764310634448894, "learning_rate": 9.916126182421513e-07, "loss": 5.4536, "step": 3660 }, { "epoch": 0.15850454408407633, "learning_rate": 9.915641362666725e-07, "loss": 5.481, "step": 3680 }, { "epoch": 0.15936598182366368, "learning_rate": 9.915156542911934e-07, "loss": 5.3777, "step": 3700 }, { "epoch": 0.16022741956325107, "learning_rate": 9.914671723157147e-07, "loss": 5.61, "step": 3720 }, { "epoch": 0.16108885730283845, "learning_rate": 9.914186903402355e-07, "loss": 5.4264, "step": 3740 }, { "epoch": 0.1619502950424258, "learning_rate": 9.913702083647568e-07, "loss": 5.4003, "step": 3760 }, { "epoch": 0.1628117327820132, "learning_rate": 9.913217263892779e-07, "loss": 5.2343, "step": 3780 }, { "epoch": 0.16367317052160055, "learning_rate": 9.91273244413799e-07, "loss": 5.3609, "step": 3800 }, { "epoch": 0.16453460826118793, "learning_rate": 9.912247624383202e-07, "loss": 5.4076, "step": 3820 }, { "epoch": 0.1653960460007753, "learning_rate": 9.911762804628411e-07, "loss": 5.2988, "step": 3840 }, { "epoch": 0.16625748374036267, "learning_rate": 9.911277984873624e-07, "loss": 5.334, "step": 3860 }, { "epoch": 0.16711892147995003, "learning_rate": 9.910793165118835e-07, "loss": 5.5399, "step": 3880 }, { "epoch": 0.1679803592195374, "learning_rate": 9.910308345364045e-07, "loss": 5.1626, "step": 3900 }, { "epoch": 0.16884179695912477, "learning_rate": 9.909823525609258e-07, "loss": 5.3731, "step": 3920 }, { "epoch": 0.16970323469871215, "learning_rate": 9.909338705854467e-07, "loss": 5.2582, "step": 3940 }, { "epoch": 0.1705646724382995, "learning_rate": 9.90885388609968e-07, "loss": 5.3624, "step": 3960 }, { "epoch": 0.1714261101778869, "learning_rate": 9.908369066344888e-07, "loss": 5.2776, "step": 3980 }, { "epoch": 0.17228754791747428, "learning_rate": 9.9078842465901e-07, "loss": 5.4069, "step": 4000 }, { "epoch": 0.17314898565706163, "learning_rate": 9.907399426835312e-07, "loss": 5.336, "step": 4020 }, { "epoch": 0.17401042339664902, "learning_rate": 9.906914607080522e-07, "loss": 5.1774, "step": 4040 }, { "epoch": 0.17487186113623637, "learning_rate": 9.906429787325735e-07, "loss": 5.2318, "step": 4060 }, { "epoch": 0.17573329887582376, "learning_rate": 9.905944967570944e-07, "loss": 5.2908, "step": 4080 }, { "epoch": 0.1765947366154111, "learning_rate": 9.905460147816157e-07, "loss": 5.3843, "step": 4100 }, { "epoch": 0.1774561743549985, "learning_rate": 9.904975328061365e-07, "loss": 5.3274, "step": 4120 }, { "epoch": 0.17831761209458585, "learning_rate": 9.904490508306578e-07, "loss": 5.2516, "step": 4140 }, { "epoch": 0.17917904983417324, "learning_rate": 9.904005688551789e-07, "loss": 5.4997, "step": 4160 }, { "epoch": 0.1800404875737606, "learning_rate": 9.903520868797e-07, "loss": 5.2678, "step": 4180 }, { "epoch": 0.18090192531334798, "learning_rate": 9.903036049042212e-07, "loss": 5.5379, "step": 4200 }, { "epoch": 0.18176336305293536, "learning_rate": 9.90255122928742e-07, "loss": 5.2914, "step": 4220 }, { "epoch": 0.18262480079252272, "learning_rate": 9.902066409532634e-07, "loss": 5.2059, "step": 4240 }, { "epoch": 0.1834862385321101, "learning_rate": 9.901581589777845e-07, "loss": 5.1896, "step": 4260 }, { "epoch": 0.18434767627169746, "learning_rate": 9.901096770023055e-07, "loss": 5.256, "step": 4280 }, { "epoch": 0.18520911401128484, "learning_rate": 9.900611950268268e-07, "loss": 5.3677, "step": 4300 }, { "epoch": 0.1860705517508722, "learning_rate": 9.900127130513477e-07, "loss": 5.1496, "step": 4320 }, { "epoch": 0.18693198949045958, "learning_rate": 9.89964231075869e-07, "loss": 5.2171, "step": 4340 }, { "epoch": 0.18779342723004694, "learning_rate": 9.899157491003898e-07, "loss": 5.1794, "step": 4360 }, { "epoch": 0.18865486496963432, "learning_rate": 9.89867267124911e-07, "loss": 5.149, "step": 4380 }, { "epoch": 0.18951630270922168, "learning_rate": 9.898187851494322e-07, "loss": 5.3623, "step": 4400 }, { "epoch": 0.19037774044880906, "learning_rate": 9.897703031739532e-07, "loss": 5.12, "step": 4420 }, { "epoch": 0.19123917818839645, "learning_rate": 9.897218211984745e-07, "loss": 5.1459, "step": 4440 }, { "epoch": 0.1921006159279838, "learning_rate": 9.896733392229954e-07, "loss": 5.1605, "step": 4460 }, { "epoch": 0.1929620536675712, "learning_rate": 9.896248572475167e-07, "loss": 5.3126, "step": 4480 }, { "epoch": 0.19382349140715854, "learning_rate": 9.895763752720377e-07, "loss": 5.3019, "step": 4500 }, { "epoch": 0.19468492914674593, "learning_rate": 9.895278932965588e-07, "loss": 5.165, "step": 4520 }, { "epoch": 0.19554636688633328, "learning_rate": 9.894794113210799e-07, "loss": 5.2627, "step": 4540 }, { "epoch": 0.19640780462592067, "learning_rate": 9.89430929345601e-07, "loss": 5.2599, "step": 4560 }, { "epoch": 0.19726924236550802, "learning_rate": 9.893824473701222e-07, "loss": 5.1008, "step": 4580 }, { "epoch": 0.1981306801050954, "learning_rate": 9.89333965394643e-07, "loss": 5.1498, "step": 4600 }, { "epoch": 0.19899211784468276, "learning_rate": 9.892854834191644e-07, "loss": 5.177, "step": 4620 }, { "epoch": 0.19985355558427015, "learning_rate": 9.892370014436854e-07, "loss": 5.4382, "step": 4640 }, { "epoch": 0.2007149933238575, "learning_rate": 9.891885194682065e-07, "loss": 5.2083, "step": 4660 }, { "epoch": 0.2015764310634449, "learning_rate": 9.891400374927278e-07, "loss": 5.1972, "step": 4680 }, { "epoch": 0.20243786880303227, "learning_rate": 9.890915555172487e-07, "loss": 5.2555, "step": 4700 }, { "epoch": 0.20329930654261963, "learning_rate": 9.8904307354177e-07, "loss": 5.3427, "step": 4720 }, { "epoch": 0.204160744282207, "learning_rate": 9.889945915662908e-07, "loss": 5.1514, "step": 4740 }, { "epoch": 0.20502218202179437, "learning_rate": 9.88946109590812e-07, "loss": 5.4223, "step": 4760 }, { "epoch": 0.20588361976138175, "learning_rate": 9.888976276153332e-07, "loss": 5.2675, "step": 4780 }, { "epoch": 0.2067450575009691, "learning_rate": 9.888491456398542e-07, "loss": 5.2755, "step": 4800 }, { "epoch": 0.2076064952405565, "learning_rate": 9.888006636643755e-07, "loss": 5.0806, "step": 4820 }, { "epoch": 0.20846793298014385, "learning_rate": 9.887521816888966e-07, "loss": 4.9737, "step": 4840 }, { "epoch": 0.20932937071973123, "learning_rate": 9.887036997134177e-07, "loss": 5.186, "step": 4860 }, { "epoch": 0.2101908084593186, "learning_rate": 9.886552177379387e-07, "loss": 5.2746, "step": 4880 }, { "epoch": 0.21105224619890597, "learning_rate": 9.886067357624598e-07, "loss": 5.359, "step": 4900 }, { "epoch": 0.21191368393849336, "learning_rate": 9.88558253786981e-07, "loss": 5.1738, "step": 4920 }, { "epoch": 0.21277512167808071, "learning_rate": 9.88509771811502e-07, "loss": 5.1438, "step": 4940 }, { "epoch": 0.2136365594176681, "learning_rate": 9.884612898360232e-07, "loss": 4.9935, "step": 4960 }, { "epoch": 0.21449799715725545, "learning_rate": 9.88412807860544e-07, "loss": 5.2627, "step": 4980 }, { "epoch": 0.21535943489684284, "learning_rate": 9.883643258850654e-07, "loss": 4.8987, "step": 5000 }, { "epoch": 0.2162208726364302, "learning_rate": 9.883158439095864e-07, "loss": 5.1943, "step": 5020 }, { "epoch": 0.21708231037601758, "learning_rate": 9.882673619341075e-07, "loss": 5.1257, "step": 5040 }, { "epoch": 0.21794374811560493, "learning_rate": 9.882188799586288e-07, "loss": 5.2447, "step": 5060 }, { "epoch": 0.21880518585519232, "learning_rate": 9.881703979831497e-07, "loss": 5.0602, "step": 5080 }, { "epoch": 0.21966662359477968, "learning_rate": 9.88121916007671e-07, "loss": 5.1273, "step": 5100 }, { "epoch": 0.22052806133436706, "learning_rate": 9.880734340321918e-07, "loss": 5.2593, "step": 5120 }, { "epoch": 0.22138949907395444, "learning_rate": 9.88024952056713e-07, "loss": 5.2149, "step": 5140 }, { "epoch": 0.2222509368135418, "learning_rate": 9.879764700812342e-07, "loss": 5.1093, "step": 5160 }, { "epoch": 0.22311237455312918, "learning_rate": 9.879279881057552e-07, "loss": 5.1914, "step": 5180 }, { "epoch": 0.22397381229271654, "learning_rate": 9.878795061302765e-07, "loss": 5.1855, "step": 5200 }, { "epoch": 0.22483525003230392, "learning_rate": 9.878310241547976e-07, "loss": 5.1667, "step": 5220 }, { "epoch": 0.22569668777189128, "learning_rate": 9.877825421793187e-07, "loss": 5.1006, "step": 5240 }, { "epoch": 0.22655812551147866, "learning_rate": 9.877340602038397e-07, "loss": 5.4394, "step": 5260 }, { "epoch": 0.22741956325106602, "learning_rate": 9.876855782283608e-07, "loss": 5.2247, "step": 5280 }, { "epoch": 0.2282810009906534, "learning_rate": 9.87637096252882e-07, "loss": 5.2253, "step": 5300 }, { "epoch": 0.22914243873024076, "learning_rate": 9.87588614277403e-07, "loss": 5.0591, "step": 5320 }, { "epoch": 0.23000387646982814, "learning_rate": 9.875401323019242e-07, "loss": 5.368, "step": 5340 }, { "epoch": 0.2308653142094155, "learning_rate": 9.87491650326445e-07, "loss": 5.1635, "step": 5360 }, { "epoch": 0.23172675194900288, "learning_rate": 9.874431683509664e-07, "loss": 5.0713, "step": 5380 }, { "epoch": 0.23258818968859027, "learning_rate": 9.873946863754874e-07, "loss": 5.1414, "step": 5400 }, { "epoch": 0.23344962742817763, "learning_rate": 9.873462044000085e-07, "loss": 5.0667, "step": 5420 }, { "epoch": 0.234311065167765, "learning_rate": 9.872977224245298e-07, "loss": 4.9844, "step": 5440 }, { "epoch": 0.23517250290735237, "learning_rate": 9.872492404490509e-07, "loss": 5.0536, "step": 5460 }, { "epoch": 0.23603394064693975, "learning_rate": 9.87200758473572e-07, "loss": 5.0188, "step": 5480 }, { "epoch": 0.2368953783865271, "learning_rate": 9.87152276498093e-07, "loss": 4.9918, "step": 5500 }, { "epoch": 0.2377568161261145, "learning_rate": 9.87103794522614e-07, "loss": 5.1782, "step": 5520 }, { "epoch": 0.23861825386570185, "learning_rate": 9.870553125471354e-07, "loss": 5.0001, "step": 5540 }, { "epoch": 0.23947969160528923, "learning_rate": 9.870068305716562e-07, "loss": 4.9272, "step": 5560 }, { "epoch": 0.2403411293448766, "learning_rate": 9.869583485961775e-07, "loss": 5.3893, "step": 5580 }, { "epoch": 0.24120256708446397, "learning_rate": 9.869098666206986e-07, "loss": 5.3314, "step": 5600 }, { "epoch": 0.24206400482405135, "learning_rate": 9.868613846452196e-07, "loss": 5.4649, "step": 5620 }, { "epoch": 0.2429254425636387, "learning_rate": 9.868129026697407e-07, "loss": 5.2219, "step": 5640 }, { "epoch": 0.2437868803032261, "learning_rate": 9.867644206942618e-07, "loss": 5.3536, "step": 5660 }, { "epoch": 0.24464831804281345, "learning_rate": 9.86715938718783e-07, "loss": 5.0812, "step": 5680 }, { "epoch": 0.24550975578240083, "learning_rate": 9.86667456743304e-07, "loss": 5.0065, "step": 5700 }, { "epoch": 0.2463711935219882, "learning_rate": 9.866189747678252e-07, "loss": 4.8052, "step": 5720 }, { "epoch": 0.24723263126157558, "learning_rate": 9.86570492792346e-07, "loss": 5.1683, "step": 5740 }, { "epoch": 0.24809406900116293, "learning_rate": 9.865220108168674e-07, "loss": 5.0243, "step": 5760 }, { "epoch": 0.24895550674075032, "learning_rate": 9.864735288413884e-07, "loss": 5.1228, "step": 5780 }, { "epoch": 0.24981694448033767, "learning_rate": 9.864250468659095e-07, "loss": 5.0573, "step": 5800 }, { "epoch": 0.25067838221992506, "learning_rate": 9.863765648904308e-07, "loss": 5.2251, "step": 5820 }, { "epoch": 0.2515398199595124, "learning_rate": 9.863280829149519e-07, "loss": 5.1655, "step": 5840 }, { "epoch": 0.2524012576990998, "learning_rate": 9.86279600939473e-07, "loss": 5.0808, "step": 5860 }, { "epoch": 0.2532626954386872, "learning_rate": 9.86231118963994e-07, "loss": 5.0284, "step": 5880 }, { "epoch": 0.25412413317827454, "learning_rate": 9.86182636988515e-07, "loss": 5.1345, "step": 5900 }, { "epoch": 0.2549855709178619, "learning_rate": 9.861341550130364e-07, "loss": 4.7051, "step": 5920 }, { "epoch": 0.2558470086574493, "learning_rate": 9.860856730375572e-07, "loss": 4.9449, "step": 5940 }, { "epoch": 0.25670844639703666, "learning_rate": 9.860371910620785e-07, "loss": 5.1681, "step": 5960 }, { "epoch": 0.257569884136624, "learning_rate": 9.859887090865996e-07, "loss": 4.9312, "step": 5980 }, { "epoch": 0.2584313218762114, "learning_rate": 9.859402271111206e-07, "loss": 5.3096, "step": 6000 }, { "epoch": 0.2592927596157988, "learning_rate": 9.858917451356417e-07, "loss": 4.9742, "step": 6020 }, { "epoch": 0.26015419735538614, "learning_rate": 9.858432631601628e-07, "loss": 5.1821, "step": 6040 }, { "epoch": 0.2610156350949735, "learning_rate": 9.85794781184684e-07, "loss": 5.1821, "step": 6060 }, { "epoch": 0.2618770728345609, "learning_rate": 9.857462992092051e-07, "loss": 5.083, "step": 6080 }, { "epoch": 0.26273851057414827, "learning_rate": 9.856978172337262e-07, "loss": 5.1987, "step": 6100 }, { "epoch": 0.2635999483137356, "learning_rate": 9.85649335258247e-07, "loss": 5.0137, "step": 6120 }, { "epoch": 0.264461386053323, "learning_rate": 9.856008532827684e-07, "loss": 4.8114, "step": 6140 }, { "epoch": 0.2653228237929104, "learning_rate": 9.855523713072894e-07, "loss": 5.1205, "step": 6160 }, { "epoch": 0.26618426153249775, "learning_rate": 9.855038893318105e-07, "loss": 4.9281, "step": 6180 }, { "epoch": 0.2670456992720851, "learning_rate": 9.854554073563318e-07, "loss": 5.0873, "step": 6200 }, { "epoch": 0.26790713701167246, "learning_rate": 9.854069253808529e-07, "loss": 5.0967, "step": 6220 }, { "epoch": 0.26876857475125987, "learning_rate": 9.85358443405374e-07, "loss": 5.2425, "step": 6240 }, { "epoch": 0.2696300124908472, "learning_rate": 9.85309961429895e-07, "loss": 5.1937, "step": 6260 }, { "epoch": 0.2704914502304346, "learning_rate": 9.85261479454416e-07, "loss": 4.8241, "step": 6280 }, { "epoch": 0.271352887970022, "learning_rate": 9.852129974789373e-07, "loss": 5.0539, "step": 6300 }, { "epoch": 0.27221432570960935, "learning_rate": 9.851645155034582e-07, "loss": 5.1938, "step": 6320 }, { "epoch": 0.2730757634491967, "learning_rate": 9.851160335279795e-07, "loss": 5.0296, "step": 6340 }, { "epoch": 0.27393720118878406, "learning_rate": 9.850675515525006e-07, "loss": 4.9956, "step": 6360 }, { "epoch": 0.2747986389283715, "learning_rate": 9.850190695770216e-07, "loss": 5.1587, "step": 6380 }, { "epoch": 0.27566007666795883, "learning_rate": 9.849705876015427e-07, "loss": 5.0578, "step": 6400 }, { "epoch": 0.2765215144075462, "learning_rate": 9.849221056260638e-07, "loss": 4.6687, "step": 6420 }, { "epoch": 0.27738295214713354, "learning_rate": 9.84873623650585e-07, "loss": 5.0046, "step": 6440 }, { "epoch": 0.27824438988672096, "learning_rate": 9.848251416751061e-07, "loss": 4.9388, "step": 6460 }, { "epoch": 0.2791058276263083, "learning_rate": 9.847766596996272e-07, "loss": 5.1524, "step": 6480 }, { "epoch": 0.27996726536589567, "learning_rate": 9.847281777241483e-07, "loss": 5.0729, "step": 6500 }, { "epoch": 0.280828703105483, "learning_rate": 9.846796957486693e-07, "loss": 5.1614, "step": 6520 }, { "epoch": 0.28169014084507044, "learning_rate": 9.846312137731906e-07, "loss": 5.0912, "step": 6540 }, { "epoch": 0.2825515785846578, "learning_rate": 9.845827317977115e-07, "loss": 4.8363, "step": 6560 }, { "epoch": 0.28341301632424515, "learning_rate": 9.845342498222328e-07, "loss": 5.1497, "step": 6580 }, { "epoch": 0.28427445406383256, "learning_rate": 9.844857678467538e-07, "loss": 5.1407, "step": 6600 }, { "epoch": 0.2851358918034199, "learning_rate": 9.84437285871275e-07, "loss": 5.1137, "step": 6620 }, { "epoch": 0.2859973295430073, "learning_rate": 9.84388803895796e-07, "loss": 4.9547, "step": 6640 }, { "epoch": 0.28685876728259463, "learning_rate": 9.84340321920317e-07, "loss": 5.0116, "step": 6660 }, { "epoch": 0.28772020502218204, "learning_rate": 9.842918399448383e-07, "loss": 5.0545, "step": 6680 }, { "epoch": 0.2885816427617694, "learning_rate": 9.842433579693592e-07, "loss": 4.8564, "step": 6700 }, { "epoch": 0.28944308050135675, "learning_rate": 9.841948759938805e-07, "loss": 4.7905, "step": 6720 }, { "epoch": 0.2903045182409441, "learning_rate": 9.841463940184016e-07, "loss": 5.0053, "step": 6740 }, { "epoch": 0.2911659559805315, "learning_rate": 9.840979120429226e-07, "loss": 4.9103, "step": 6760 }, { "epoch": 0.2920273937201189, "learning_rate": 9.840494300674437e-07, "loss": 4.9392, "step": 6780 }, { "epoch": 0.29288883145970623, "learning_rate": 9.840009480919648e-07, "loss": 4.9946, "step": 6800 }, { "epoch": 0.29375026919929365, "learning_rate": 9.83952466116486e-07, "loss": 5.123, "step": 6820 }, { "epoch": 0.294611706938881, "learning_rate": 9.839039841410071e-07, "loss": 5.0541, "step": 6840 }, { "epoch": 0.29547314467846836, "learning_rate": 9.838555021655282e-07, "loss": 5.2848, "step": 6860 }, { "epoch": 0.2963345824180557, "learning_rate": 9.838070201900493e-07, "loss": 5.1143, "step": 6880 }, { "epoch": 0.2971960201576431, "learning_rate": 9.837585382145703e-07, "loss": 5.0813, "step": 6900 }, { "epoch": 0.2980574578972305, "learning_rate": 9.837100562390916e-07, "loss": 4.902, "step": 6920 }, { "epoch": 0.29891889563681784, "learning_rate": 9.836615742636125e-07, "loss": 5.4337, "step": 6940 }, { "epoch": 0.2997803333764052, "learning_rate": 9.836130922881338e-07, "loss": 4.8747, "step": 6960 }, { "epoch": 0.3006417711159926, "learning_rate": 9.835646103126548e-07, "loss": 4.8659, "step": 6980 }, { "epoch": 0.30150320885557996, "learning_rate": 9.83516128337176e-07, "loss": 4.9519, "step": 7000 }, { "epoch": 0.3023646465951673, "learning_rate": 9.83467646361697e-07, "loss": 4.9942, "step": 7020 }, { "epoch": 0.30322608433475473, "learning_rate": 9.83419164386218e-07, "loss": 5.0863, "step": 7040 }, { "epoch": 0.3040875220743421, "learning_rate": 9.833706824107393e-07, "loss": 4.7905, "step": 7060 }, { "epoch": 0.30494895981392944, "learning_rate": 9.833222004352604e-07, "loss": 4.9519, "step": 7080 }, { "epoch": 0.3058103975535168, "learning_rate": 9.832737184597815e-07, "loss": 5.0442, "step": 7100 }, { "epoch": 0.3066718352931042, "learning_rate": 9.832252364843028e-07, "loss": 4.9433, "step": 7120 }, { "epoch": 0.30753327303269157, "learning_rate": 9.831767545088236e-07, "loss": 4.9803, "step": 7140 }, { "epoch": 0.3083947107722789, "learning_rate": 9.83128272533345e-07, "loss": 4.9704, "step": 7160 }, { "epoch": 0.3092561485118663, "learning_rate": 9.830797905578658e-07, "loss": 4.922, "step": 7180 }, { "epoch": 0.3101175862514537, "learning_rate": 9.83031308582387e-07, "loss": 5.1661, "step": 7200 }, { "epoch": 0.31097902399104105, "learning_rate": 9.829828266069081e-07, "loss": 4.9994, "step": 7220 }, { "epoch": 0.3118404617306284, "learning_rate": 9.829343446314292e-07, "loss": 5.1306, "step": 7240 }, { "epoch": 0.3127018994702158, "learning_rate": 9.828858626559505e-07, "loss": 5.1203, "step": 7260 }, { "epoch": 0.3135633372098032, "learning_rate": 9.828373806804713e-07, "loss": 4.9512, "step": 7280 }, { "epoch": 0.31442477494939053, "learning_rate": 9.827888987049926e-07, "loss": 4.9913, "step": 7300 }, { "epoch": 0.3152862126889779, "learning_rate": 9.827404167295135e-07, "loss": 4.8699, "step": 7320 }, { "epoch": 0.3161476504285653, "learning_rate": 9.826919347540348e-07, "loss": 5.0095, "step": 7340 }, { "epoch": 0.31700908816815265, "learning_rate": 9.826434527785558e-07, "loss": 4.8065, "step": 7360 }, { "epoch": 0.31787052590774, "learning_rate": 9.82594970803077e-07, "loss": 5.2293, "step": 7380 }, { "epoch": 0.31873196364732737, "learning_rate": 9.82546488827598e-07, "loss": 5.0196, "step": 7400 }, { "epoch": 0.3195934013869148, "learning_rate": 9.82498006852119e-07, "loss": 4.9105, "step": 7420 }, { "epoch": 0.32045483912650213, "learning_rate": 9.824495248766403e-07, "loss": 4.8088, "step": 7440 }, { "epoch": 0.3213162768660895, "learning_rate": 9.824010429011614e-07, "loss": 4.8067, "step": 7460 }, { "epoch": 0.3221777146056769, "learning_rate": 9.823525609256825e-07, "loss": 4.775, "step": 7480 }, { "epoch": 0.32303915234526426, "learning_rate": 9.823040789502038e-07, "loss": 4.8729, "step": 7500 }, { "epoch": 0.3239005900848516, "learning_rate": 9.822555969747246e-07, "loss": 4.8439, "step": 7520 }, { "epoch": 0.32476202782443897, "learning_rate": 9.82207114999246e-07, "loss": 4.936, "step": 7540 }, { "epoch": 0.3256234655640264, "learning_rate": 9.821586330237668e-07, "loss": 4.6856, "step": 7560 }, { "epoch": 0.32648490330361374, "learning_rate": 9.82110151048288e-07, "loss": 5.3348, "step": 7580 }, { "epoch": 0.3273463410432011, "learning_rate": 9.820616690728091e-07, "loss": 5.0314, "step": 7600 }, { "epoch": 0.32820777878278845, "learning_rate": 9.820131870973302e-07, "loss": 4.9376, "step": 7620 }, { "epoch": 0.32906921652237586, "learning_rate": 9.819647051218515e-07, "loss": 4.9632, "step": 7640 }, { "epoch": 0.3299306542619632, "learning_rate": 9.819162231463723e-07, "loss": 4.8058, "step": 7660 }, { "epoch": 0.3307920920015506, "learning_rate": 9.818677411708936e-07, "loss": 5.0257, "step": 7680 }, { "epoch": 0.331653529741138, "learning_rate": 9.818192591954147e-07, "loss": 4.8374, "step": 7700 }, { "epoch": 0.33251496748072534, "learning_rate": 9.817707772199358e-07, "loss": 5.0226, "step": 7720 }, { "epoch": 0.3333764052203127, "learning_rate": 9.817222952444568e-07, "loss": 5.1709, "step": 7740 }, { "epoch": 0.33423784295990006, "learning_rate": 9.81673813268978e-07, "loss": 5.0315, "step": 7760 }, { "epoch": 0.33509928069948747, "learning_rate": 9.81625331293499e-07, "loss": 4.8475, "step": 7780 }, { "epoch": 0.3359607184390748, "learning_rate": 9.8157684931802e-07, "loss": 4.7601, "step": 7800 }, { "epoch": 0.3368221561786622, "learning_rate": 9.815283673425413e-07, "loss": 4.9091, "step": 7820 }, { "epoch": 0.33768359391824954, "learning_rate": 9.814798853670624e-07, "loss": 4.7602, "step": 7840 }, { "epoch": 0.33854503165783695, "learning_rate": 9.814314033915835e-07, "loss": 4.9096, "step": 7860 }, { "epoch": 0.3394064693974243, "learning_rate": 9.813829214161047e-07, "loss": 5.0443, "step": 7880 }, { "epoch": 0.34026790713701166, "learning_rate": 9.813344394406256e-07, "loss": 4.9772, "step": 7900 }, { "epoch": 0.341129344876599, "learning_rate": 9.812859574651469e-07, "loss": 4.8295, "step": 7920 }, { "epoch": 0.34199078261618643, "learning_rate": 9.812374754896678e-07, "loss": 5.0334, "step": 7940 }, { "epoch": 0.3428522203557738, "learning_rate": 9.81188993514189e-07, "loss": 4.8409, "step": 7960 }, { "epoch": 0.34371365809536114, "learning_rate": 9.811405115387101e-07, "loss": 4.7826, "step": 7980 }, { "epoch": 0.34457509583494855, "learning_rate": 9.810920295632312e-07, "loss": 4.8348, "step": 8000 }, { "epoch": 0.3454365335745359, "learning_rate": 9.810435475877525e-07, "loss": 4.8643, "step": 8020 }, { "epoch": 0.34629797131412327, "learning_rate": 9.809950656122733e-07, "loss": 5.0973, "step": 8040 }, { "epoch": 0.3471594090537106, "learning_rate": 9.809465836367946e-07, "loss": 4.9742, "step": 8060 }, { "epoch": 0.34802084679329803, "learning_rate": 9.808981016613157e-07, "loss": 4.8906, "step": 8080 }, { "epoch": 0.3488822845328854, "learning_rate": 9.808496196858367e-07, "loss": 4.6369, "step": 8100 }, { "epoch": 0.34974372227247275, "learning_rate": 9.80801137710358e-07, "loss": 4.9755, "step": 8120 }, { "epoch": 0.3506051600120601, "learning_rate": 9.807526557348789e-07, "loss": 4.7116, "step": 8140 }, { "epoch": 0.3514665977516475, "learning_rate": 9.807041737594002e-07, "loss": 4.7687, "step": 8160 }, { "epoch": 0.35232803549123487, "learning_rate": 9.80655691783921e-07, "loss": 4.9314, "step": 8180 }, { "epoch": 0.3531894732308222, "learning_rate": 9.806072098084423e-07, "loss": 5.0838, "step": 8200 }, { "epoch": 0.35405091097040964, "learning_rate": 9.805587278329634e-07, "loss": 5.0102, "step": 8220 }, { "epoch": 0.354912348709997, "learning_rate": 9.805102458574845e-07, "loss": 5.0401, "step": 8240 }, { "epoch": 0.35577378644958435, "learning_rate": 9.804617638820057e-07, "loss": 4.9059, "step": 8260 }, { "epoch": 0.3566352241891717, "learning_rate": 9.804132819065266e-07, "loss": 4.6473, "step": 8280 }, { "epoch": 0.3574966619287591, "learning_rate": 9.803647999310479e-07, "loss": 4.7138, "step": 8300 }, { "epoch": 0.3583580996683465, "learning_rate": 9.803163179555687e-07, "loss": 4.8867, "step": 8320 }, { "epoch": 0.35921953740793383, "learning_rate": 9.8026783598009e-07, "loss": 4.8516, "step": 8340 }, { "epoch": 0.3600809751475212, "learning_rate": 9.80219354004611e-07, "loss": 4.5803, "step": 8360 }, { "epoch": 0.3609424128871086, "learning_rate": 9.801708720291322e-07, "loss": 4.9113, "step": 8380 }, { "epoch": 0.36180385062669596, "learning_rate": 9.801223900536535e-07, "loss": 4.7101, "step": 8400 }, { "epoch": 0.3626652883662833, "learning_rate": 9.800739080781743e-07, "loss": 4.673, "step": 8420 }, { "epoch": 0.3635267261058707, "learning_rate": 9.800254261026956e-07, "loss": 4.6649, "step": 8440 }, { "epoch": 0.3643881638454581, "learning_rate": 9.799769441272167e-07, "loss": 4.8451, "step": 8460 }, { "epoch": 0.36524960158504544, "learning_rate": 9.799284621517377e-07, "loss": 4.9897, "step": 8480 }, { "epoch": 0.3661110393246328, "learning_rate": 9.79879980176259e-07, "loss": 4.9975, "step": 8500 }, { "epoch": 0.3669724770642202, "learning_rate": 9.798314982007799e-07, "loss": 4.9695, "step": 8520 }, { "epoch": 0.36783391480380756, "learning_rate": 9.797830162253012e-07, "loss": 4.7713, "step": 8540 }, { "epoch": 0.3686953525433949, "learning_rate": 9.79734534249822e-07, "loss": 4.8941, "step": 8560 }, { "epoch": 0.3695567902829823, "learning_rate": 9.796860522743433e-07, "loss": 4.6747, "step": 8580 }, { "epoch": 0.3704182280225697, "learning_rate": 9.796375702988644e-07, "loss": 4.7685, "step": 8600 }, { "epoch": 0.37127966576215704, "learning_rate": 9.795890883233855e-07, "loss": 4.9399, "step": 8620 }, { "epoch": 0.3721411035017444, "learning_rate": 9.795406063479067e-07, "loss": 4.9424, "step": 8640 }, { "epoch": 0.3730025412413318, "learning_rate": 9.794921243724276e-07, "loss": 4.8493, "step": 8660 }, { "epoch": 0.37386397898091916, "learning_rate": 9.794436423969489e-07, "loss": 4.974, "step": 8680 }, { "epoch": 0.3747254167205065, "learning_rate": 9.7939516042147e-07, "loss": 4.6983, "step": 8700 }, { "epoch": 0.3755868544600939, "learning_rate": 9.79346678445991e-07, "loss": 4.7911, "step": 8720 }, { "epoch": 0.3764482921996813, "learning_rate": 9.792981964705123e-07, "loss": 4.7347, "step": 8740 }, { "epoch": 0.37730972993926865, "learning_rate": 9.792497144950332e-07, "loss": 4.8341, "step": 8760 }, { "epoch": 0.378171167678856, "learning_rate": 9.792012325195544e-07, "loss": 5.0537, "step": 8780 }, { "epoch": 0.37903260541844336, "learning_rate": 9.791527505440753e-07, "loss": 4.8714, "step": 8800 }, { "epoch": 0.37989404315803077, "learning_rate": 9.791042685685966e-07, "loss": 5.0039, "step": 8820 }, { "epoch": 0.3807554808976181, "learning_rate": 9.790557865931177e-07, "loss": 4.9384, "step": 8840 }, { "epoch": 0.3816169186372055, "learning_rate": 9.790073046176387e-07, "loss": 4.9291, "step": 8860 }, { "epoch": 0.3824783563767929, "learning_rate": 9.7895882264216e-07, "loss": 4.7098, "step": 8880 }, { "epoch": 0.38333979411638025, "learning_rate": 9.789103406666809e-07, "loss": 4.8243, "step": 8900 }, { "epoch": 0.3842012318559676, "learning_rate": 9.788618586912022e-07, "loss": 4.7814, "step": 8920 }, { "epoch": 0.38506266959555496, "learning_rate": 9.78813376715723e-07, "loss": 5.0305, "step": 8940 }, { "epoch": 0.3859241073351424, "learning_rate": 9.787648947402443e-07, "loss": 5.0014, "step": 8960 }, { "epoch": 0.38678554507472973, "learning_rate": 9.787164127647654e-07, "loss": 5.014, "step": 8980 }, { "epoch": 0.3876469828143171, "learning_rate": 9.786679307892864e-07, "loss": 4.8992, "step": 9000 }, { "epoch": 0.38850842055390444, "learning_rate": 9.786194488138077e-07, "loss": 4.4712, "step": 9020 }, { "epoch": 0.38936985829349186, "learning_rate": 9.785709668383288e-07, "loss": 4.9685, "step": 9040 }, { "epoch": 0.3902312960330792, "learning_rate": 9.785224848628499e-07, "loss": 4.4798, "step": 9060 }, { "epoch": 0.39109273377266657, "learning_rate": 9.78474002887371e-07, "loss": 5.0365, "step": 9080 }, { "epoch": 0.391954171512254, "learning_rate": 9.78425520911892e-07, "loss": 4.8627, "step": 9100 }, { "epoch": 0.39281560925184134, "learning_rate": 9.783770389364133e-07, "loss": 4.8344, "step": 9120 }, { "epoch": 0.3936770469914287, "learning_rate": 9.783285569609342e-07, "loss": 4.8208, "step": 9140 }, { "epoch": 0.39453848473101605, "learning_rate": 9.782800749854554e-07, "loss": 4.7272, "step": 9160 }, { "epoch": 0.39539992247060346, "learning_rate": 9.782315930099763e-07, "loss": 4.6802, "step": 9180 }, { "epoch": 0.3962613602101908, "learning_rate": 9.781831110344976e-07, "loss": 4.9891, "step": 9200 }, { "epoch": 0.3971227979497782, "learning_rate": 9.781346290590187e-07, "loss": 5.038, "step": 9220 }, { "epoch": 0.39798423568936553, "learning_rate": 9.780861470835397e-07, "loss": 4.8407, "step": 9240 }, { "epoch": 0.39884567342895294, "learning_rate": 9.78037665108061e-07, "loss": 4.9967, "step": 9260 }, { "epoch": 0.3997071111685403, "learning_rate": 9.77989183132582e-07, "loss": 4.871, "step": 9280 }, { "epoch": 0.40056854890812765, "learning_rate": 9.779407011571032e-07, "loss": 4.9019, "step": 9300 }, { "epoch": 0.401429986647715, "learning_rate": 9.778922191816242e-07, "loss": 4.4204, "step": 9320 }, { "epoch": 0.4022914243873024, "learning_rate": 9.778437372061453e-07, "loss": 4.6821, "step": 9340 }, { "epoch": 0.4031528621268898, "learning_rate": 9.777952552306664e-07, "loss": 4.8691, "step": 9360 }, { "epoch": 0.40401429986647713, "learning_rate": 9.777467732551874e-07, "loss": 5.0331, "step": 9380 }, { "epoch": 0.40487573760606455, "learning_rate": 9.776982912797087e-07, "loss": 4.992, "step": 9400 }, { "epoch": 0.4057371753456519, "learning_rate": 9.776498093042298e-07, "loss": 4.8098, "step": 9420 }, { "epoch": 0.40659861308523926, "learning_rate": 9.776013273287509e-07, "loss": 4.9163, "step": 9440 }, { "epoch": 0.4074600508248266, "learning_rate": 9.77552845353272e-07, "loss": 4.9387, "step": 9460 }, { "epoch": 0.408321488564414, "learning_rate": 9.77504363377793e-07, "loss": 4.879, "step": 9480 }, { "epoch": 0.4091829263040014, "learning_rate": 9.774558814023143e-07, "loss": 4.65, "step": 9500 }, { "epoch": 0.41004436404358874, "learning_rate": 9.774073994268352e-07, "loss": 4.5392, "step": 9520 }, { "epoch": 0.4109058017831761, "learning_rate": 9.773589174513564e-07, "loss": 4.9185, "step": 9540 }, { "epoch": 0.4117672395227635, "learning_rate": 9.773104354758773e-07, "loss": 4.8353, "step": 9560 }, { "epoch": 0.41262867726235086, "learning_rate": 9.772619535003986e-07, "loss": 4.9165, "step": 9580 }, { "epoch": 0.4134901150019382, "learning_rate": 9.772134715249196e-07, "loss": 4.9327, "step": 9600 }, { "epoch": 0.41435155274152563, "learning_rate": 9.771649895494407e-07, "loss": 4.4741, "step": 9620 }, { "epoch": 0.415212990481113, "learning_rate": 9.77116507573962e-07, "loss": 5.0317, "step": 9640 }, { "epoch": 0.41607442822070034, "learning_rate": 9.77068025598483e-07, "loss": 5.118, "step": 9660 }, { "epoch": 0.4169358659602877, "learning_rate": 9.770195436230041e-07, "loss": 4.7494, "step": 9680 }, { "epoch": 0.4177973036998751, "learning_rate": 9.769710616475252e-07, "loss": 4.9464, "step": 9700 }, { "epoch": 0.41865874143946247, "learning_rate": 9.769225796720463e-07, "loss": 4.9494, "step": 9720 }, { "epoch": 0.4195201791790498, "learning_rate": 9.768740976965676e-07, "loss": 4.841, "step": 9740 }, { "epoch": 0.4203816169186372, "learning_rate": 9.768256157210884e-07, "loss": 4.6014, "step": 9760 }, { "epoch": 0.4212430546582246, "learning_rate": 9.767771337456097e-07, "loss": 4.9624, "step": 9780 }, { "epoch": 0.42210449239781195, "learning_rate": 9.767286517701308e-07, "loss": 4.9065, "step": 9800 }, { "epoch": 0.4229659301373993, "learning_rate": 9.766801697946519e-07, "loss": 5.0393, "step": 9820 }, { "epoch": 0.4238273678769867, "learning_rate": 9.76631687819173e-07, "loss": 4.761, "step": 9840 }, { "epoch": 0.42468880561657407, "learning_rate": 9.76583205843694e-07, "loss": 4.7322, "step": 9860 }, { "epoch": 0.42555024335616143, "learning_rate": 9.765347238682153e-07, "loss": 4.7106, "step": 9880 }, { "epoch": 0.4264116810957488, "learning_rate": 9.764862418927361e-07, "loss": 4.8356, "step": 9900 }, { "epoch": 0.4272731188353362, "learning_rate": 9.764377599172574e-07, "loss": 4.6066, "step": 9920 }, { "epoch": 0.42813455657492355, "learning_rate": 9.763892779417783e-07, "loss": 4.7757, "step": 9940 }, { "epoch": 0.4289959943145109, "learning_rate": 9.763407959662996e-07, "loss": 4.8173, "step": 9960 }, { "epoch": 0.42985743205409827, "learning_rate": 9.762923139908206e-07, "loss": 4.6468, "step": 9980 }, { "epoch": 0.4307188697936857, "learning_rate": 9.762438320153417e-07, "loss": 4.8561, "step": 10000 }, { "epoch": 0.43158030753327303, "learning_rate": 9.76195350039863e-07, "loss": 4.769, "step": 10020 }, { "epoch": 0.4324417452728604, "learning_rate": 9.76146868064384e-07, "loss": 4.8013, "step": 10040 }, { "epoch": 0.4333031830124478, "learning_rate": 9.760983860889051e-07, "loss": 4.8112, "step": 10060 }, { "epoch": 0.43416462075203516, "learning_rate": 9.760499041134262e-07, "loss": 4.6127, "step": 10080 }, { "epoch": 0.4350260584916225, "learning_rate": 9.760014221379473e-07, "loss": 4.808, "step": 10100 }, { "epoch": 0.43588749623120987, "learning_rate": 9.759529401624686e-07, "loss": 4.7345, "step": 10120 }, { "epoch": 0.4367489339707973, "learning_rate": 9.759044581869894e-07, "loss": 4.6163, "step": 10140 }, { "epoch": 0.43761037171038464, "learning_rate": 9.758559762115107e-07, "loss": 4.785, "step": 10160 }, { "epoch": 0.438471809449972, "learning_rate": 9.758074942360318e-07, "loss": 5.0965, "step": 10180 }, { "epoch": 0.43933324718955935, "learning_rate": 9.757590122605529e-07, "loss": 4.7546, "step": 10200 }, { "epoch": 0.44019468492914676, "learning_rate": 9.75710530285074e-07, "loss": 4.7411, "step": 10220 }, { "epoch": 0.4410561226687341, "learning_rate": 9.75662048309595e-07, "loss": 4.7157, "step": 10240 }, { "epoch": 0.4419175604083215, "learning_rate": 9.756135663341163e-07, "loss": 4.7451, "step": 10260 }, { "epoch": 0.4427789981479089, "learning_rate": 9.755650843586374e-07, "loss": 4.9128, "step": 10280 }, { "epoch": 0.44364043588749624, "learning_rate": 9.755166023831584e-07, "loss": 4.9461, "step": 10300 }, { "epoch": 0.4445018736270836, "learning_rate": 9.754681204076795e-07, "loss": 4.902, "step": 10320 }, { "epoch": 0.44536331136667096, "learning_rate": 9.754196384322006e-07, "loss": 4.9179, "step": 10340 }, { "epoch": 0.44622474910625837, "learning_rate": 9.753711564567218e-07, "loss": 4.7129, "step": 10360 }, { "epoch": 0.4470861868458457, "learning_rate": 9.753226744812427e-07, "loss": 4.8344, "step": 10380 }, { "epoch": 0.4479476245854331, "learning_rate": 9.75274192505764e-07, "loss": 4.8156, "step": 10400 }, { "epoch": 0.44880906232502044, "learning_rate": 9.75225710530285e-07, "loss": 4.8929, "step": 10420 }, { "epoch": 0.44967050006460785, "learning_rate": 9.751772285548061e-07, "loss": 4.7655, "step": 10440 }, { "epoch": 0.4505319378041952, "learning_rate": 9.751287465793272e-07, "loss": 4.8392, "step": 10460 }, { "epoch": 0.45139337554378256, "learning_rate": 9.750802646038483e-07, "loss": 4.6094, "step": 10480 }, { "epoch": 0.45225481328336997, "learning_rate": 9.750317826283696e-07, "loss": 4.7745, "step": 10500 }, { "epoch": 0.45311625102295733, "learning_rate": 9.749833006528904e-07, "loss": 4.7868, "step": 10520 }, { "epoch": 0.4539776887625447, "learning_rate": 9.749348186774117e-07, "loss": 4.6695, "step": 10540 }, { "epoch": 0.45483912650213204, "learning_rate": 9.748863367019328e-07, "loss": 4.9459, "step": 10560 }, { "epoch": 0.45570056424171945, "learning_rate": 9.748378547264538e-07, "loss": 4.856, "step": 10580 }, { "epoch": 0.4565620019813068, "learning_rate": 9.74789372750975e-07, "loss": 4.6656, "step": 10600 }, { "epoch": 0.45742343972089416, "learning_rate": 9.74740890775496e-07, "loss": 4.7138, "step": 10620 }, { "epoch": 0.4582848774604815, "learning_rate": 9.746924088000173e-07, "loss": 4.7696, "step": 10640 }, { "epoch": 0.45914631520006893, "learning_rate": 9.746439268245383e-07, "loss": 4.7906, "step": 10660 }, { "epoch": 0.4600077529396563, "learning_rate": 9.745954448490594e-07, "loss": 4.7629, "step": 10680 }, { "epoch": 0.46086919067924365, "learning_rate": 9.745469628735805e-07, "loss": 4.9862, "step": 10700 }, { "epoch": 0.461730628418831, "learning_rate": 9.744984808981016e-07, "loss": 4.9942, "step": 10720 }, { "epoch": 0.4625920661584184, "learning_rate": 9.744499989226228e-07, "loss": 5.1121, "step": 10740 }, { "epoch": 0.46345350389800577, "learning_rate": 9.744015169471437e-07, "loss": 4.7407, "step": 10760 }, { "epoch": 0.4643149416375931, "learning_rate": 9.74353034971665e-07, "loss": 4.5919, "step": 10780 }, { "epoch": 0.46517637937718054, "learning_rate": 9.74304552996186e-07, "loss": 4.8487, "step": 10800 }, { "epoch": 0.4660378171167679, "learning_rate": 9.742560710207071e-07, "loss": 4.4517, "step": 10820 }, { "epoch": 0.46689925485635525, "learning_rate": 9.742075890452282e-07, "loss": 4.8589, "step": 10840 }, { "epoch": 0.4677606925959426, "learning_rate": 9.741591070697493e-07, "loss": 4.6649, "step": 10860 }, { "epoch": 0.46862213033553, "learning_rate": 9.741106250942706e-07, "loss": 4.7476, "step": 10880 }, { "epoch": 0.4694835680751174, "learning_rate": 9.740621431187916e-07, "loss": 5.019, "step": 10900 }, { "epoch": 0.47034500581470473, "learning_rate": 9.740136611433127e-07, "loss": 4.7025, "step": 10920 }, { "epoch": 0.4712064435542921, "learning_rate": 9.73965179167834e-07, "loss": 4.8888, "step": 10940 }, { "epoch": 0.4720678812938795, "learning_rate": 9.739166971923548e-07, "loss": 4.9776, "step": 10960 }, { "epoch": 0.47292931903346686, "learning_rate": 9.73868215216876e-07, "loss": 4.5894, "step": 10980 }, { "epoch": 0.4737907567730542, "learning_rate": 9.73819733241397e-07, "loss": 4.925, "step": 11000 }, { "epoch": 0.4746521945126416, "learning_rate": 9.737712512659183e-07, "loss": 4.7209, "step": 11020 }, { "epoch": 0.475513632252229, "learning_rate": 9.737227692904393e-07, "loss": 4.6565, "step": 11040 }, { "epoch": 0.47637506999181634, "learning_rate": 9.736742873149604e-07, "loss": 4.6889, "step": 11060 }, { "epoch": 0.4772365077314037, "learning_rate": 9.736258053394815e-07, "loss": 4.462, "step": 11080 }, { "epoch": 0.4780979454709911, "learning_rate": 9.735773233640026e-07, "loss": 4.6663, "step": 11100 }, { "epoch": 0.47895938321057846, "learning_rate": 9.735288413885238e-07, "loss": 4.8906, "step": 11120 }, { "epoch": 0.4798208209501658, "learning_rate": 9.734803594130447e-07, "loss": 5.0451, "step": 11140 }, { "epoch": 0.4806822586897532, "learning_rate": 9.73431877437566e-07, "loss": 4.7728, "step": 11160 }, { "epoch": 0.4815436964293406, "learning_rate": 9.73383395462087e-07, "loss": 4.6112, "step": 11180 }, { "epoch": 0.48240513416892794, "learning_rate": 9.733349134866081e-07, "loss": 4.7198, "step": 11200 }, { "epoch": 0.4832665719085153, "learning_rate": 9.732864315111292e-07, "loss": 4.5283, "step": 11220 }, { "epoch": 0.4841280096481027, "learning_rate": 9.732379495356503e-07, "loss": 4.6935, "step": 11240 }, { "epoch": 0.48498944738769006, "learning_rate": 9.731894675601715e-07, "loss": 4.8756, "step": 11260 }, { "epoch": 0.4858508851272774, "learning_rate": 9.731409855846926e-07, "loss": 4.7816, "step": 11280 }, { "epoch": 0.4867123228668648, "learning_rate": 9.730925036092137e-07, "loss": 4.6481, "step": 11300 }, { "epoch": 0.4875737606064522, "learning_rate": 9.73044021633735e-07, "loss": 4.7033, "step": 11320 }, { "epoch": 0.48843519834603955, "learning_rate": 9.729955396582558e-07, "loss": 4.6986, "step": 11340 }, { "epoch": 0.4892966360856269, "learning_rate": 9.729470576827771e-07, "loss": 4.7492, "step": 11360 }, { "epoch": 0.49015807382521426, "learning_rate": 9.72898575707298e-07, "loss": 4.7513, "step": 11380 }, { "epoch": 0.49101951156480167, "learning_rate": 9.728500937318193e-07, "loss": 4.7428, "step": 11400 }, { "epoch": 0.491880949304389, "learning_rate": 9.728016117563403e-07, "loss": 4.6812, "step": 11420 }, { "epoch": 0.4927423870439764, "learning_rate": 9.727531297808614e-07, "loss": 4.9698, "step": 11440 }, { "epoch": 0.4936038247835638, "learning_rate": 9.727046478053825e-07, "loss": 4.6955, "step": 11460 }, { "epoch": 0.49446526252315115, "learning_rate": 9.726561658299035e-07, "loss": 4.7071, "step": 11480 }, { "epoch": 0.4953267002627385, "learning_rate": 9.726076838544248e-07, "loss": 4.477, "step": 11500 }, { "epoch": 0.49618813800232586, "learning_rate": 9.725592018789457e-07, "loss": 4.5629, "step": 11520 }, { "epoch": 0.4970495757419133, "learning_rate": 9.72510719903467e-07, "loss": 4.2942, "step": 11540 }, { "epoch": 0.49791101348150063, "learning_rate": 9.72462237927988e-07, "loss": 4.763, "step": 11560 }, { "epoch": 0.498772451221088, "learning_rate": 9.724137559525091e-07, "loss": 4.7874, "step": 11580 }, { "epoch": 0.49963388896067534, "learning_rate": 9.723652739770302e-07, "loss": 4.8124, "step": 11600 }, { "epoch": 0.5004953267002628, "learning_rate": 9.723167920015513e-07, "loss": 4.9469, "step": 11620 }, { "epoch": 0.5013567644398501, "learning_rate": 9.722683100260725e-07, "loss": 4.8652, "step": 11640 }, { "epoch": 0.5022182021794375, "learning_rate": 9.722198280505936e-07, "loss": 4.7594, "step": 11660 }, { "epoch": 0.5030796399190248, "learning_rate": 9.721713460751147e-07, "loss": 4.4601, "step": 11680 }, { "epoch": 0.5039410776586122, "learning_rate": 9.72122864099636e-07, "loss": 4.7384, "step": 11700 }, { "epoch": 0.5048025153981996, "learning_rate": 9.720743821241568e-07, "loss": 4.7338, "step": 11720 }, { "epoch": 0.505663953137787, "learning_rate": 9.720259001486781e-07, "loss": 4.7798, "step": 11740 }, { "epoch": 0.5065253908773744, "learning_rate": 9.71977418173199e-07, "loss": 4.988, "step": 11760 }, { "epoch": 0.5073868286169617, "learning_rate": 9.719289361977203e-07, "loss": 4.7253, "step": 11780 }, { "epoch": 0.5082482663565491, "learning_rate": 9.718804542222413e-07, "loss": 4.8133, "step": 11800 }, { "epoch": 0.5091097040961364, "learning_rate": 9.718319722467624e-07, "loss": 4.8508, "step": 11820 }, { "epoch": 0.5099711418357238, "learning_rate": 9.717834902712837e-07, "loss": 4.6992, "step": 11840 }, { "epoch": 0.5108325795753111, "learning_rate": 9.717350082958045e-07, "loss": 4.7687, "step": 11860 }, { "epoch": 0.5116940173148986, "learning_rate": 9.716865263203258e-07, "loss": 4.5679, "step": 11880 }, { "epoch": 0.512555455054486, "learning_rate": 9.71638044344847e-07, "loss": 4.7933, "step": 11900 }, { "epoch": 0.5134168927940733, "learning_rate": 9.71589562369368e-07, "loss": 4.6264, "step": 11920 }, { "epoch": 0.5142783305336607, "learning_rate": 9.715410803938892e-07, "loss": 4.6911, "step": 11940 }, { "epoch": 0.515139768273248, "learning_rate": 9.714925984184101e-07, "loss": 4.6685, "step": 11960 }, { "epoch": 0.5160012060128354, "learning_rate": 9.714441164429314e-07, "loss": 4.938, "step": 11980 }, { "epoch": 0.5168626437524227, "learning_rate": 9.713956344674523e-07, "loss": 4.6868, "step": 12000 }, { "epoch": 0.5177240814920102, "learning_rate": 9.713471524919735e-07, "loss": 4.7499, "step": 12020 }, { "epoch": 0.5185855192315976, "learning_rate": 9.712986705164946e-07, "loss": 4.5444, "step": 12040 }, { "epoch": 0.5194469569711849, "learning_rate": 9.712501885410157e-07, "loss": 4.7128, "step": 12060 }, { "epoch": 0.5203083947107723, "learning_rate": 9.71201706565537e-07, "loss": 4.6333, "step": 12080 }, { "epoch": 0.5211698324503596, "learning_rate": 9.711532245900578e-07, "loss": 4.7689, "step": 12100 }, { "epoch": 0.522031270189947, "learning_rate": 9.71104742614579e-07, "loss": 4.417, "step": 12120 }, { "epoch": 0.5228927079295344, "learning_rate": 9.710562606391e-07, "loss": 4.6453, "step": 12140 }, { "epoch": 0.5237541456691218, "learning_rate": 9.710077786636212e-07, "loss": 4.7497, "step": 12160 }, { "epoch": 0.5246155834087092, "learning_rate": 9.709592966881423e-07, "loss": 4.6708, "step": 12180 }, { "epoch": 0.5254770211482965, "learning_rate": 9.709108147126634e-07, "loss": 4.6749, "step": 12200 }, { "epoch": 0.5263384588878839, "learning_rate": 9.708623327371847e-07, "loss": 4.5748, "step": 12220 }, { "epoch": 0.5271998966274712, "learning_rate": 9.708138507617055e-07, "loss": 4.4467, "step": 12240 }, { "epoch": 0.5280613343670586, "learning_rate": 9.707653687862268e-07, "loss": 4.5542, "step": 12260 }, { "epoch": 0.528922772106646, "learning_rate": 9.707168868107479e-07, "loss": 4.7008, "step": 12280 }, { "epoch": 0.5297842098462333, "learning_rate": 9.70668404835269e-07, "loss": 4.4736, "step": 12300 }, { "epoch": 0.5306456475858208, "learning_rate": 9.706199228597902e-07, "loss": 4.5476, "step": 12320 }, { "epoch": 0.5315070853254081, "learning_rate": 9.70571440884311e-07, "loss": 4.8016, "step": 12340 }, { "epoch": 0.5323685230649955, "learning_rate": 9.705229589088324e-07, "loss": 4.8178, "step": 12360 }, { "epoch": 0.5332299608045828, "learning_rate": 9.704744769333532e-07, "loss": 4.6261, "step": 12380 }, { "epoch": 0.5340913985441702, "learning_rate": 9.704259949578745e-07, "loss": 4.5225, "step": 12400 }, { "epoch": 0.5349528362837576, "learning_rate": 9.703775129823956e-07, "loss": 4.5948, "step": 12420 }, { "epoch": 0.5358142740233449, "learning_rate": 9.703290310069167e-07, "loss": 4.6267, "step": 12440 }, { "epoch": 0.5366757117629324, "learning_rate": 9.70280549031438e-07, "loss": 4.8665, "step": 12460 }, { "epoch": 0.5375371495025197, "learning_rate": 9.702320670559588e-07, "loss": 4.6497, "step": 12480 }, { "epoch": 0.5383985872421071, "learning_rate": 9.7018358508048e-07, "loss": 4.6235, "step": 12500 }, { "epoch": 0.5392600249816945, "learning_rate": 9.701351031050012e-07, "loss": 4.5948, "step": 12520 }, { "epoch": 0.5401214627212818, "learning_rate": 9.700866211295222e-07, "loss": 4.586, "step": 12540 }, { "epoch": 0.5409829004608692, "learning_rate": 9.700381391540435e-07, "loss": 4.5366, "step": 12560 }, { "epoch": 0.5418443382004565, "learning_rate": 9.699896571785644e-07, "loss": 4.4859, "step": 12580 }, { "epoch": 0.542705775940044, "learning_rate": 9.699411752030857e-07, "loss": 4.5417, "step": 12600 }, { "epoch": 0.5435672136796313, "learning_rate": 9.698926932276065e-07, "loss": 4.6418, "step": 12620 }, { "epoch": 0.5444286514192187, "learning_rate": 9.698442112521278e-07, "loss": 4.6774, "step": 12640 }, { "epoch": 0.5452900891588061, "learning_rate": 9.697957292766489e-07, "loss": 4.4528, "step": 12660 }, { "epoch": 0.5461515268983934, "learning_rate": 9.6974724730117e-07, "loss": 4.8846, "step": 12680 }, { "epoch": 0.5470129646379808, "learning_rate": 9.696987653256912e-07, "loss": 4.6795, "step": 12700 }, { "epoch": 0.5478744023775681, "learning_rate": 9.69650283350212e-07, "loss": 4.6705, "step": 12720 }, { "epoch": 0.5487358401171555, "learning_rate": 9.696018013747334e-07, "loss": 4.7057, "step": 12740 }, { "epoch": 0.549597277856743, "learning_rate": 9.695533193992542e-07, "loss": 4.8444, "step": 12760 }, { "epoch": 0.5504587155963303, "learning_rate": 9.695048374237755e-07, "loss": 4.598, "step": 12780 }, { "epoch": 0.5513201533359177, "learning_rate": 9.694563554482966e-07, "loss": 4.622, "step": 12800 }, { "epoch": 0.552181591075505, "learning_rate": 9.694078734728177e-07, "loss": 4.971, "step": 12820 }, { "epoch": 0.5530430288150924, "learning_rate": 9.69359391497339e-07, "loss": 4.5947, "step": 12840 }, { "epoch": 0.5539044665546797, "learning_rate": 9.693109095218598e-07, "loss": 4.2249, "step": 12860 }, { "epoch": 0.5547659042942671, "learning_rate": 9.69262427546381e-07, "loss": 4.5096, "step": 12880 }, { "epoch": 0.5556273420338546, "learning_rate": 9.692139455709022e-07, "loss": 4.6678, "step": 12900 }, { "epoch": 0.5564887797734419, "learning_rate": 9.691654635954232e-07, "loss": 4.6959, "step": 12920 }, { "epoch": 0.5573502175130293, "learning_rate": 9.691169816199445e-07, "loss": 4.5744, "step": 12940 }, { "epoch": 0.5582116552526166, "learning_rate": 9.690684996444654e-07, "loss": 4.8221, "step": 12960 }, { "epoch": 0.559073092992204, "learning_rate": 9.690200176689867e-07, "loss": 4.6241, "step": 12980 }, { "epoch": 0.5599345307317913, "learning_rate": 9.689715356935075e-07, "loss": 4.6407, "step": 13000 }, { "epoch": 0.5607959684713787, "learning_rate": 9.689230537180288e-07, "loss": 4.5464, "step": 13020 }, { "epoch": 0.561657406210966, "learning_rate": 9.688745717425499e-07, "loss": 4.6636, "step": 13040 }, { "epoch": 0.5625188439505535, "learning_rate": 9.68826089767071e-07, "loss": 4.5332, "step": 13060 }, { "epoch": 0.5633802816901409, "learning_rate": 9.687776077915922e-07, "loss": 4.5259, "step": 13080 }, { "epoch": 0.5642417194297282, "learning_rate": 9.687291258161133e-07, "loss": 4.7069, "step": 13100 }, { "epoch": 0.5651031571693156, "learning_rate": 9.686806438406344e-07, "loss": 4.5002, "step": 13120 }, { "epoch": 0.5659645949089029, "learning_rate": 9.686321618651552e-07, "loss": 4.802, "step": 13140 }, { "epoch": 0.5668260326484903, "learning_rate": 9.685836798896765e-07, "loss": 4.4615, "step": 13160 }, { "epoch": 0.5676874703880777, "learning_rate": 9.685351979141976e-07, "loss": 4.6674, "step": 13180 }, { "epoch": 0.5685489081276651, "learning_rate": 9.684867159387187e-07, "loss": 4.5308, "step": 13200 }, { "epoch": 0.5694103458672525, "learning_rate": 9.6843823396324e-07, "loss": 4.6764, "step": 13220 }, { "epoch": 0.5702717836068398, "learning_rate": 9.683897519877608e-07, "loss": 4.7747, "step": 13240 }, { "epoch": 0.5711332213464272, "learning_rate": 9.68341270012282e-07, "loss": 5.0853, "step": 13260 }, { "epoch": 0.5719946590860145, "learning_rate": 9.682927880368032e-07, "loss": 4.6953, "step": 13280 }, { "epoch": 0.5728560968256019, "learning_rate": 9.682443060613242e-07, "loss": 4.7413, "step": 13300 }, { "epoch": 0.5737175345651893, "learning_rate": 9.681958240858455e-07, "loss": 4.5876, "step": 13320 }, { "epoch": 0.5745789723047767, "learning_rate": 9.681473421103664e-07, "loss": 4.7571, "step": 13340 }, { "epoch": 0.5754404100443641, "learning_rate": 9.680988601348877e-07, "loss": 4.837, "step": 13360 }, { "epoch": 0.5763018477839514, "learning_rate": 9.680503781594085e-07, "loss": 4.5773, "step": 13380 }, { "epoch": 0.5771632855235388, "learning_rate": 9.680018961839298e-07, "loss": 4.7744, "step": 13400 }, { "epoch": 0.5780247232631262, "learning_rate": 9.679534142084509e-07, "loss": 4.7419, "step": 13420 }, { "epoch": 0.5788861610027135, "learning_rate": 9.67904932232972e-07, "loss": 4.8362, "step": 13440 }, { "epoch": 0.5797475987423009, "learning_rate": 9.678564502574932e-07, "loss": 4.7081, "step": 13460 }, { "epoch": 0.5806090364818882, "learning_rate": 9.678079682820143e-07, "loss": 4.7993, "step": 13480 }, { "epoch": 0.5814704742214757, "learning_rate": 9.677594863065354e-07, "loss": 4.495, "step": 13500 }, { "epoch": 0.582331911961063, "learning_rate": 9.677110043310564e-07, "loss": 4.5018, "step": 13520 }, { "epoch": 0.5831933497006504, "learning_rate": 9.676625223555775e-07, "loss": 4.7964, "step": 13540 }, { "epoch": 0.5840547874402378, "learning_rate": 9.676140403800988e-07, "loss": 4.538, "step": 13560 }, { "epoch": 0.5849162251798251, "learning_rate": 9.675655584046197e-07, "loss": 4.6586, "step": 13580 }, { "epoch": 0.5857776629194125, "learning_rate": 9.67517076429141e-07, "loss": 4.4211, "step": 13600 }, { "epoch": 0.5866391006589998, "learning_rate": 9.67468594453662e-07, "loss": 4.7492, "step": 13620 }, { "epoch": 0.5875005383985873, "learning_rate": 9.67420112478183e-07, "loss": 4.5692, "step": 13640 }, { "epoch": 0.5883619761381746, "learning_rate": 9.673716305027042e-07, "loss": 4.4928, "step": 13660 }, { "epoch": 0.589223413877762, "learning_rate": 9.673231485272252e-07, "loss": 4.5794, "step": 13680 }, { "epoch": 0.5900848516173494, "learning_rate": 9.672746665517465e-07, "loss": 4.658, "step": 13700 }, { "epoch": 0.5909462893569367, "learning_rate": 9.672261845762674e-07, "loss": 4.4166, "step": 13720 }, { "epoch": 0.5918077270965241, "learning_rate": 9.671777026007886e-07, "loss": 4.7738, "step": 13740 }, { "epoch": 0.5926691648361114, "learning_rate": 9.671292206253095e-07, "loss": 4.6694, "step": 13760 }, { "epoch": 0.5935306025756989, "learning_rate": 9.670807386498308e-07, "loss": 4.796, "step": 13780 }, { "epoch": 0.5943920403152863, "learning_rate": 9.670322566743519e-07, "loss": 4.506, "step": 13800 }, { "epoch": 0.5952534780548736, "learning_rate": 9.66983774698873e-07, "loss": 4.4972, "step": 13820 }, { "epoch": 0.596114915794461, "learning_rate": 9.669352927233942e-07, "loss": 4.7098, "step": 13840 }, { "epoch": 0.5969763535340483, "learning_rate": 9.668868107479153e-07, "loss": 4.3856, "step": 13860 }, { "epoch": 0.5978377912736357, "learning_rate": 9.668383287724364e-07, "loss": 4.6158, "step": 13880 }, { "epoch": 0.598699229013223, "learning_rate": 9.667898467969574e-07, "loss": 4.5539, "step": 13900 }, { "epoch": 0.5995606667528104, "learning_rate": 9.667413648214785e-07, "loss": 4.6079, "step": 13920 }, { "epoch": 0.6004221044923979, "learning_rate": 9.666928828459998e-07, "loss": 4.7394, "step": 13940 }, { "epoch": 0.6012835422319852, "learning_rate": 9.666444008705206e-07, "loss": 4.6476, "step": 13960 }, { "epoch": 0.6021449799715726, "learning_rate": 9.66595918895042e-07, "loss": 4.5925, "step": 13980 }, { "epoch": 0.6030064177111599, "learning_rate": 9.66547436919563e-07, "loss": 4.4916, "step": 14000 }, { "epoch": 0.6038678554507473, "learning_rate": 9.66498954944084e-07, "loss": 4.584, "step": 14020 }, { "epoch": 0.6047292931903346, "learning_rate": 9.664504729686051e-07, "loss": 4.926, "step": 14040 }, { "epoch": 0.605590730929922, "learning_rate": 9.664019909931262e-07, "loss": 4.7048, "step": 14060 }, { "epoch": 0.6064521686695095, "learning_rate": 9.663535090176475e-07, "loss": 4.8114, "step": 14080 }, { "epoch": 0.6073136064090968, "learning_rate": 9.663050270421686e-07, "loss": 4.6444, "step": 14100 }, { "epoch": 0.6081750441486842, "learning_rate": 9.662565450666896e-07, "loss": 4.7975, "step": 14120 }, { "epoch": 0.6090364818882715, "learning_rate": 9.662080630912107e-07, "loss": 4.7108, "step": 14140 }, { "epoch": 0.6098979196278589, "learning_rate": 9.661595811157318e-07, "loss": 4.5902, "step": 14160 }, { "epoch": 0.6107593573674462, "learning_rate": 9.66111099140253e-07, "loss": 4.4984, "step": 14180 }, { "epoch": 0.6116207951070336, "learning_rate": 9.66062617164774e-07, "loss": 4.8832, "step": 14200 }, { "epoch": 0.612482232846621, "learning_rate": 9.660141351892952e-07, "loss": 4.7217, "step": 14220 }, { "epoch": 0.6133436705862084, "learning_rate": 9.659656532138163e-07, "loss": 4.6702, "step": 14240 }, { "epoch": 0.6142051083257958, "learning_rate": 9.659171712383374e-07, "loss": 4.6737, "step": 14260 }, { "epoch": 0.6150665460653831, "learning_rate": 9.658686892628584e-07, "loss": 4.5198, "step": 14280 }, { "epoch": 0.6159279838049705, "learning_rate": 9.658202072873795e-07, "loss": 4.619, "step": 14300 }, { "epoch": 0.6167894215445578, "learning_rate": 9.657717253119008e-07, "loss": 4.4772, "step": 14320 }, { "epoch": 0.6176508592841452, "learning_rate": 9.657232433364216e-07, "loss": 4.6145, "step": 14340 }, { "epoch": 0.6185122970237326, "learning_rate": 9.65674761360943e-07, "loss": 4.9091, "step": 14360 }, { "epoch": 0.61937373476332, "learning_rate": 9.65626279385464e-07, "loss": 4.5172, "step": 14380 }, { "epoch": 0.6202351725029074, "learning_rate": 9.65577797409985e-07, "loss": 4.6175, "step": 14400 }, { "epoch": 0.6210966102424947, "learning_rate": 9.655293154345061e-07, "loss": 4.7035, "step": 14420 }, { "epoch": 0.6219580479820821, "learning_rate": 9.654808334590272e-07, "loss": 4.8991, "step": 14440 }, { "epoch": 0.6228194857216695, "learning_rate": 9.654323514835485e-07, "loss": 4.5864, "step": 14460 }, { "epoch": 0.6236809234612568, "learning_rate": 9.653838695080696e-07, "loss": 4.9811, "step": 14480 }, { "epoch": 0.6245423612008442, "learning_rate": 9.653353875325906e-07, "loss": 4.5354, "step": 14500 }, { "epoch": 0.6254037989404316, "learning_rate": 9.652869055571117e-07, "loss": 4.8074, "step": 14520 }, { "epoch": 0.626265236680019, "learning_rate": 9.652384235816328e-07, "loss": 4.7193, "step": 14540 }, { "epoch": 0.6271266744196063, "learning_rate": 9.65189941606154e-07, "loss": 4.7722, "step": 14560 }, { "epoch": 0.6279881121591937, "learning_rate": 9.65141459630675e-07, "loss": 4.6782, "step": 14580 }, { "epoch": 0.6288495498987811, "learning_rate": 9.650929776551962e-07, "loss": 4.5165, "step": 14600 }, { "epoch": 0.6297109876383684, "learning_rate": 9.650444956797173e-07, "loss": 4.682, "step": 14620 }, { "epoch": 0.6305724253779558, "learning_rate": 9.649960137042383e-07, "loss": 4.7696, "step": 14640 }, { "epoch": 0.6314338631175431, "learning_rate": 9.649475317287594e-07, "loss": 4.44, "step": 14660 }, { "epoch": 0.6322953008571306, "learning_rate": 9.648990497532805e-07, "loss": 4.9536, "step": 14680 }, { "epoch": 0.633156738596718, "learning_rate": 9.648505677778018e-07, "loss": 4.4343, "step": 14700 }, { "epoch": 0.6340181763363053, "learning_rate": 9.648020858023228e-07, "loss": 4.7066, "step": 14720 }, { "epoch": 0.6348796140758927, "learning_rate": 9.64753603826844e-07, "loss": 4.737, "step": 14740 }, { "epoch": 0.63574105181548, "learning_rate": 9.64705121851365e-07, "loss": 4.5779, "step": 14760 }, { "epoch": 0.6366024895550674, "learning_rate": 9.64656639875886e-07, "loss": 4.6775, "step": 14780 }, { "epoch": 0.6374639272946547, "learning_rate": 9.646081579004071e-07, "loss": 4.6419, "step": 14800 }, { "epoch": 0.6383253650342422, "learning_rate": 9.645596759249282e-07, "loss": 4.793, "step": 14820 }, { "epoch": 0.6391868027738296, "learning_rate": 9.645111939494495e-07, "loss": 4.6022, "step": 14840 }, { "epoch": 0.6400482405134169, "learning_rate": 9.644627119739706e-07, "loss": 4.7609, "step": 14860 }, { "epoch": 0.6409096782530043, "learning_rate": 9.644142299984916e-07, "loss": 4.664, "step": 14880 }, { "epoch": 0.6417711159925916, "learning_rate": 9.643657480230127e-07, "loss": 4.6633, "step": 14900 }, { "epoch": 0.642632553732179, "learning_rate": 9.643172660475338e-07, "loss": 4.7861, "step": 14920 }, { "epoch": 0.6434939914717663, "learning_rate": 9.64268784072055e-07, "loss": 4.6721, "step": 14940 }, { "epoch": 0.6443554292113538, "learning_rate": 9.64220302096576e-07, "loss": 4.6615, "step": 14960 }, { "epoch": 0.6452168669509412, "learning_rate": 9.641718201210972e-07, "loss": 4.668, "step": 14980 }, { "epoch": 0.6460783046905285, "learning_rate": 9.641233381456183e-07, "loss": 4.6814, "step": 15000 }, { "epoch": 0.6469397424301159, "learning_rate": 9.640748561701393e-07, "loss": 4.6951, "step": 15020 }, { "epoch": 0.6478011801697032, "learning_rate": 9.640263741946604e-07, "loss": 4.4504, "step": 15040 }, { "epoch": 0.6486626179092906, "learning_rate": 9.639778922191815e-07, "loss": 4.6223, "step": 15060 }, { "epoch": 0.6495240556488779, "learning_rate": 9.639294102437028e-07, "loss": 4.5445, "step": 15080 }, { "epoch": 0.6503854933884653, "learning_rate": 9.638809282682238e-07, "loss": 4.714, "step": 15100 }, { "epoch": 0.6512469311280528, "learning_rate": 9.63832446292745e-07, "loss": 4.5464, "step": 15120 }, { "epoch": 0.6521083688676401, "learning_rate": 9.637839643172662e-07, "loss": 4.266, "step": 15140 }, { "epoch": 0.6529698066072275, "learning_rate": 9.63735482341787e-07, "loss": 4.7638, "step": 15160 }, { "epoch": 0.6538312443468148, "learning_rate": 9.636870003663083e-07, "loss": 4.6303, "step": 15180 }, { "epoch": 0.6546926820864022, "learning_rate": 9.636385183908292e-07, "loss": 4.7192, "step": 15200 }, { "epoch": 0.6555541198259895, "learning_rate": 9.635900364153505e-07, "loss": 4.4276, "step": 15220 }, { "epoch": 0.6564155575655769, "learning_rate": 9.635415544398716e-07, "loss": 4.6025, "step": 15240 }, { "epoch": 0.6572769953051644, "learning_rate": 9.634930724643926e-07, "loss": 4.6005, "step": 15260 }, { "epoch": 0.6581384330447517, "learning_rate": 9.634445904889137e-07, "loss": 4.6788, "step": 15280 }, { "epoch": 0.6589998707843391, "learning_rate": 9.633961085134348e-07, "loss": 4.5772, "step": 15300 }, { "epoch": 0.6598613085239264, "learning_rate": 9.63347626537956e-07, "loss": 4.7873, "step": 15320 }, { "epoch": 0.6607227462635138, "learning_rate": 9.63299144562477e-07, "loss": 4.7228, "step": 15340 }, { "epoch": 0.6615841840031012, "learning_rate": 9.632506625869982e-07, "loss": 4.5213, "step": 15360 }, { "epoch": 0.6624456217426885, "learning_rate": 9.632021806115193e-07, "loss": 4.7668, "step": 15380 }, { "epoch": 0.663307059482276, "learning_rate": 9.631536986360403e-07, "loss": 4.6066, "step": 15400 }, { "epoch": 0.6641684972218633, "learning_rate": 9.631052166605614e-07, "loss": 4.642, "step": 15420 }, { "epoch": 0.6650299349614507, "learning_rate": 9.630567346850825e-07, "loss": 4.5487, "step": 15440 }, { "epoch": 0.665891372701038, "learning_rate": 9.630082527096038e-07, "loss": 4.5664, "step": 15460 }, { "epoch": 0.6667528104406254, "learning_rate": 9.629597707341248e-07, "loss": 4.5904, "step": 15480 }, { "epoch": 0.6676142481802128, "learning_rate": 9.62911288758646e-07, "loss": 4.5054, "step": 15500 }, { "epoch": 0.6684756859198001, "learning_rate": 9.628628067831672e-07, "loss": 4.6034, "step": 15520 }, { "epoch": 0.6693371236593875, "learning_rate": 9.62814324807688e-07, "loss": 4.6439, "step": 15540 }, { "epoch": 0.6701985613989749, "learning_rate": 9.627658428322093e-07, "loss": 4.3408, "step": 15560 }, { "epoch": 0.6710599991385623, "learning_rate": 9.627173608567302e-07, "loss": 4.8935, "step": 15580 }, { "epoch": 0.6719214368781496, "learning_rate": 9.626688788812515e-07, "loss": 4.569, "step": 15600 }, { "epoch": 0.672782874617737, "learning_rate": 9.626203969057725e-07, "loss": 4.2996, "step": 15620 }, { "epoch": 0.6736443123573244, "learning_rate": 9.625719149302936e-07, "loss": 4.6604, "step": 15640 }, { "epoch": 0.6745057500969117, "learning_rate": 9.625234329548147e-07, "loss": 4.5706, "step": 15660 }, { "epoch": 0.6753671878364991, "learning_rate": 9.624749509793358e-07, "loss": 4.5631, "step": 15680 }, { "epoch": 0.6762286255760865, "learning_rate": 9.62426469003857e-07, "loss": 4.7159, "step": 15700 }, { "epoch": 0.6770900633156739, "learning_rate": 9.623779870283781e-07, "loss": 4.7933, "step": 15720 }, { "epoch": 0.6779515010552613, "learning_rate": 9.623295050528992e-07, "loss": 4.654, "step": 15740 }, { "epoch": 0.6788129387948486, "learning_rate": 9.622810230774205e-07, "loss": 4.4683, "step": 15760 }, { "epoch": 0.679674376534436, "learning_rate": 9.622325411019413e-07, "loss": 4.7185, "step": 15780 }, { "epoch": 0.6805358142740233, "learning_rate": 9.621840591264626e-07, "loss": 4.643, "step": 15800 }, { "epoch": 0.6813972520136107, "learning_rate": 9.621355771509835e-07, "loss": 4.367, "step": 15820 }, { "epoch": 0.682258689753198, "learning_rate": 9.620870951755048e-07, "loss": 4.6144, "step": 15840 }, { "epoch": 0.6831201274927855, "learning_rate": 9.620386132000258e-07, "loss": 4.5154, "step": 15860 }, { "epoch": 0.6839815652323729, "learning_rate": 9.61990131224547e-07, "loss": 4.6272, "step": 15880 }, { "epoch": 0.6848430029719602, "learning_rate": 9.619416492490682e-07, "loss": 4.5658, "step": 15900 }, { "epoch": 0.6857044407115476, "learning_rate": 9.61893167273589e-07, "loss": 4.4771, "step": 15920 }, { "epoch": 0.6865658784511349, "learning_rate": 9.618446852981103e-07, "loss": 4.3724, "step": 15940 }, { "epoch": 0.6874273161907223, "learning_rate": 9.617962033226312e-07, "loss": 4.7635, "step": 15960 }, { "epoch": 0.6882887539303096, "learning_rate": 9.617477213471525e-07, "loss": 4.5284, "step": 15980 }, { "epoch": 0.6891501916698971, "learning_rate": 9.616992393716735e-07, "loss": 4.7441, "step": 16000 }, { "epoch": 0.6900116294094845, "learning_rate": 9.616507573961946e-07, "loss": 4.6897, "step": 16020 }, { "epoch": 0.6908730671490718, "learning_rate": 9.616022754207159e-07, "loss": 4.6016, "step": 16040 }, { "epoch": 0.6917345048886592, "learning_rate": 9.615537934452368e-07, "loss": 4.6313, "step": 16060 }, { "epoch": 0.6925959426282465, "learning_rate": 9.61505311469758e-07, "loss": 4.6305, "step": 16080 }, { "epoch": 0.6934573803678339, "learning_rate": 9.614568294942791e-07, "loss": 4.4422, "step": 16100 }, { "epoch": 0.6943188181074212, "learning_rate": 9.614083475188002e-07, "loss": 4.7132, "step": 16120 }, { "epoch": 0.6951802558470087, "learning_rate": 9.613598655433215e-07, "loss": 4.566, "step": 16140 }, { "epoch": 0.6960416935865961, "learning_rate": 9.613113835678423e-07, "loss": 4.7489, "step": 16160 }, { "epoch": 0.6969031313261834, "learning_rate": 9.612629015923636e-07, "loss": 4.6467, "step": 16180 }, { "epoch": 0.6977645690657708, "learning_rate": 9.612144196168845e-07, "loss": 4.573, "step": 16200 }, { "epoch": 0.6986260068053581, "learning_rate": 9.611659376414057e-07, "loss": 4.778, "step": 16220 }, { "epoch": 0.6994874445449455, "learning_rate": 9.611174556659268e-07, "loss": 4.2616, "step": 16240 }, { "epoch": 0.7003488822845328, "learning_rate": 9.610689736904479e-07, "loss": 4.5403, "step": 16260 }, { "epoch": 0.7012103200241202, "learning_rate": 9.610204917149692e-07, "loss": 4.436, "step": 16280 }, { "epoch": 0.7020717577637077, "learning_rate": 9.6097200973949e-07, "loss": 4.7559, "step": 16300 }, { "epoch": 0.702933195503295, "learning_rate": 9.609235277640113e-07, "loss": 4.5752, "step": 16320 }, { "epoch": 0.7037946332428824, "learning_rate": 9.608750457885324e-07, "loss": 4.7316, "step": 16340 }, { "epoch": 0.7046560709824697, "learning_rate": 9.608265638130535e-07, "loss": 4.6856, "step": 16360 }, { "epoch": 0.7055175087220571, "learning_rate": 9.607780818375745e-07, "loss": 4.4327, "step": 16380 }, { "epoch": 0.7063789464616445, "learning_rate": 9.607295998620956e-07, "loss": 4.5428, "step": 16400 }, { "epoch": 0.7072403842012318, "learning_rate": 9.606811178866169e-07, "loss": 4.4309, "step": 16420 }, { "epoch": 0.7081018219408193, "learning_rate": 9.606326359111377e-07, "loss": 4.8195, "step": 16440 }, { "epoch": 0.7089632596804066, "learning_rate": 9.60584153935659e-07, "loss": 4.7752, "step": 16460 }, { "epoch": 0.709824697419994, "learning_rate": 9.6053567196018e-07, "loss": 4.8154, "step": 16480 }, { "epoch": 0.7106861351595813, "learning_rate": 9.604871899847012e-07, "loss": 4.3601, "step": 16500 }, { "epoch": 0.7115475728991687, "learning_rate": 9.604387080092225e-07, "loss": 4.7314, "step": 16520 }, { "epoch": 0.7124090106387561, "learning_rate": 9.603902260337433e-07, "loss": 4.588, "step": 16540 }, { "epoch": 0.7132704483783434, "learning_rate": 9.603417440582646e-07, "loss": 4.7045, "step": 16560 }, { "epoch": 0.7141318861179309, "learning_rate": 9.602932620827855e-07, "loss": 4.5338, "step": 16580 }, { "epoch": 0.7149933238575182, "learning_rate": 9.602447801073067e-07, "loss": 4.9495, "step": 16600 }, { "epoch": 0.7158547615971056, "learning_rate": 9.601962981318278e-07, "loss": 4.5542, "step": 16620 }, { "epoch": 0.716716199336693, "learning_rate": 9.601478161563489e-07, "loss": 4.4112, "step": 16640 }, { "epoch": 0.7175776370762803, "learning_rate": 9.600993341808702e-07, "loss": 4.5342, "step": 16660 }, { "epoch": 0.7184390748158677, "learning_rate": 9.60050852205391e-07, "loss": 4.4651, "step": 16680 }, { "epoch": 0.719300512555455, "learning_rate": 9.600023702299123e-07, "loss": 4.7432, "step": 16700 }, { "epoch": 0.7201619502950424, "learning_rate": 9.599538882544334e-07, "loss": 4.6841, "step": 16720 }, { "epoch": 0.7210233880346298, "learning_rate": 9.599054062789545e-07, "loss": 4.4546, "step": 16740 }, { "epoch": 0.7218848257742172, "learning_rate": 9.598569243034757e-07, "loss": 4.7137, "step": 16760 }, { "epoch": 0.7227462635138046, "learning_rate": 9.598084423279966e-07, "loss": 4.5197, "step": 16780 }, { "epoch": 0.7236077012533919, "learning_rate": 9.597599603525179e-07, "loss": 4.6357, "step": 16800 }, { "epoch": 0.7244691389929793, "learning_rate": 9.597114783770387e-07, "loss": 4.4496, "step": 16820 }, { "epoch": 0.7253305767325666, "learning_rate": 9.5966299640156e-07, "loss": 4.5952, "step": 16840 }, { "epoch": 0.726192014472154, "learning_rate": 9.59614514426081e-07, "loss": 4.3745, "step": 16860 }, { "epoch": 0.7270534522117414, "learning_rate": 9.595660324506022e-07, "loss": 4.5424, "step": 16880 }, { "epoch": 0.7279148899513288, "learning_rate": 9.595175504751234e-07, "loss": 4.5643, "step": 16900 }, { "epoch": 0.7287763276909162, "learning_rate": 9.594690684996443e-07, "loss": 4.5414, "step": 16920 }, { "epoch": 0.7296377654305035, "learning_rate": 9.594205865241656e-07, "loss": 4.4815, "step": 16940 }, { "epoch": 0.7304992031700909, "learning_rate": 9.593721045486865e-07, "loss": 4.371, "step": 16960 }, { "epoch": 0.7313606409096782, "learning_rate": 9.593236225732077e-07, "loss": 4.6894, "step": 16980 }, { "epoch": 0.7322220786492656, "learning_rate": 9.592751405977288e-07, "loss": 4.603, "step": 17000 }, { "epoch": 0.7330835163888529, "learning_rate": 9.592266586222499e-07, "loss": 4.6009, "step": 17020 }, { "epoch": 0.7339449541284404, "learning_rate": 9.591781766467712e-07, "loss": 4.5632, "step": 17040 }, { "epoch": 0.7348063918680278, "learning_rate": 9.59129694671292e-07, "loss": 4.5738, "step": 17060 }, { "epoch": 0.7356678296076151, "learning_rate": 9.590812126958133e-07, "loss": 4.7349, "step": 17080 }, { "epoch": 0.7365292673472025, "learning_rate": 9.590327307203344e-07, "loss": 4.7286, "step": 17100 }, { "epoch": 0.7373907050867898, "learning_rate": 9.589842487448554e-07, "loss": 4.8498, "step": 17120 }, { "epoch": 0.7382521428263772, "learning_rate": 9.589357667693767e-07, "loss": 4.7121, "step": 17140 }, { "epoch": 0.7391135805659645, "learning_rate": 9.588872847938976e-07, "loss": 4.3988, "step": 17160 }, { "epoch": 0.739975018305552, "learning_rate": 9.588388028184189e-07, "loss": 4.6037, "step": 17180 }, { "epoch": 0.7408364560451394, "learning_rate": 9.587903208429397e-07, "loss": 4.6311, "step": 17200 }, { "epoch": 0.7416978937847267, "learning_rate": 9.58741838867461e-07, "loss": 4.5533, "step": 17220 }, { "epoch": 0.7425593315243141, "learning_rate": 9.58693356891982e-07, "loss": 4.6068, "step": 17240 }, { "epoch": 0.7434207692639014, "learning_rate": 9.586448749165032e-07, "loss": 4.6679, "step": 17260 }, { "epoch": 0.7442822070034888, "learning_rate": 9.585963929410244e-07, "loss": 4.7084, "step": 17280 }, { "epoch": 0.7451436447430762, "learning_rate": 9.585479109655455e-07, "loss": 4.5974, "step": 17300 }, { "epoch": 0.7460050824826636, "learning_rate": 9.584994289900666e-07, "loss": 4.8029, "step": 17320 }, { "epoch": 0.746866520222251, "learning_rate": 9.584509470145877e-07, "loss": 4.6615, "step": 17340 }, { "epoch": 0.7477279579618383, "learning_rate": 9.584024650391087e-07, "loss": 4.6187, "step": 17360 }, { "epoch": 0.7485893957014257, "learning_rate": 9.5835398306363e-07, "loss": 4.6186, "step": 17380 }, { "epoch": 0.749450833441013, "learning_rate": 9.583055010881509e-07, "loss": 4.5701, "step": 17400 }, { "epoch": 0.7503122711806004, "learning_rate": 9.582570191126722e-07, "loss": 4.8236, "step": 17420 }, { "epoch": 0.7511737089201878, "learning_rate": 9.58208537137193e-07, "loss": 4.7821, "step": 17440 }, { "epoch": 0.7520351466597751, "learning_rate": 9.581600551617143e-07, "loss": 4.3822, "step": 17460 }, { "epoch": 0.7528965843993626, "learning_rate": 9.581115731862354e-07, "loss": 4.8352, "step": 17480 }, { "epoch": 0.7537580221389499, "learning_rate": 9.580630912107564e-07, "loss": 4.5794, "step": 17500 }, { "epoch": 0.7546194598785373, "learning_rate": 9.580146092352777e-07, "loss": 4.3318, "step": 17520 }, { "epoch": 0.7554808976181246, "learning_rate": 9.579661272597986e-07, "loss": 4.4593, "step": 17540 }, { "epoch": 0.756342335357712, "learning_rate": 9.579176452843199e-07, "loss": 4.4453, "step": 17560 }, { "epoch": 0.7572037730972994, "learning_rate": 9.578691633088407e-07, "loss": 4.5113, "step": 17580 }, { "epoch": 0.7580652108368867, "learning_rate": 9.57820681333362e-07, "loss": 4.4507, "step": 17600 }, { "epoch": 0.7589266485764742, "learning_rate": 9.57772199357883e-07, "loss": 4.5328, "step": 17620 }, { "epoch": 0.7597880863160615, "learning_rate": 9.577237173824042e-07, "loss": 4.6588, "step": 17640 }, { "epoch": 0.7606495240556489, "learning_rate": 9.576752354069254e-07, "loss": 4.5698, "step": 17660 }, { "epoch": 0.7615109617952363, "learning_rate": 9.576267534314465e-07, "loss": 4.5499, "step": 17680 }, { "epoch": 0.7623723995348236, "learning_rate": 9.575782714559676e-07, "loss": 4.6859, "step": 17700 }, { "epoch": 0.763233837274411, "learning_rate": 9.575297894804887e-07, "loss": 4.3594, "step": 17720 }, { "epoch": 0.7640952750139983, "learning_rate": 9.574813075050097e-07, "loss": 4.3802, "step": 17740 }, { "epoch": 0.7649567127535858, "learning_rate": 9.57432825529531e-07, "loss": 4.5581, "step": 17760 }, { "epoch": 0.7658181504931731, "learning_rate": 9.573843435540519e-07, "loss": 4.6645, "step": 17780 }, { "epoch": 0.7666795882327605, "learning_rate": 9.573358615785731e-07, "loss": 4.4874, "step": 17800 }, { "epoch": 0.7675410259723479, "learning_rate": 9.572873796030942e-07, "loss": 4.401, "step": 17820 }, { "epoch": 0.7684024637119352, "learning_rate": 9.572388976276153e-07, "loss": 4.5584, "step": 17840 }, { "epoch": 0.7692639014515226, "learning_rate": 9.571904156521364e-07, "loss": 4.3602, "step": 17860 }, { "epoch": 0.7701253391911099, "learning_rate": 9.571419336766574e-07, "loss": 4.5251, "step": 17880 }, { "epoch": 0.7709867769306973, "learning_rate": 9.570934517011787e-07, "loss": 4.51, "step": 17900 }, { "epoch": 0.7718482146702847, "learning_rate": 9.570449697256998e-07, "loss": 4.3659, "step": 17920 }, { "epoch": 0.7727096524098721, "learning_rate": 9.569964877502209e-07, "loss": 4.6877, "step": 17940 }, { "epoch": 0.7735710901494595, "learning_rate": 9.56948005774742e-07, "loss": 4.4384, "step": 17960 }, { "epoch": 0.7744325278890468, "learning_rate": 9.56899523799263e-07, "loss": 4.763, "step": 17980 }, { "epoch": 0.7752939656286342, "learning_rate": 9.56851041823784e-07, "loss": 4.7351, "step": 18000 }, { "epoch": 0.7761554033682215, "learning_rate": 9.568025598483051e-07, "loss": 4.539, "step": 18020 }, { "epoch": 0.7770168411078089, "learning_rate": 9.567540778728264e-07, "loss": 4.359, "step": 18040 }, { "epoch": 0.7778782788473964, "learning_rate": 9.567055958973475e-07, "loss": 4.4866, "step": 18060 }, { "epoch": 0.7787397165869837, "learning_rate": 9.566571139218686e-07, "loss": 4.5832, "step": 18080 }, { "epoch": 0.7796011543265711, "learning_rate": 9.566086319463896e-07, "loss": 4.3959, "step": 18100 }, { "epoch": 0.7804625920661584, "learning_rate": 9.565601499709107e-07, "loss": 4.4185, "step": 18120 }, { "epoch": 0.7813240298057458, "learning_rate": 9.56511667995432e-07, "loss": 4.4384, "step": 18140 }, { "epoch": 0.7821854675453331, "learning_rate": 9.564631860199529e-07, "loss": 4.5399, "step": 18160 }, { "epoch": 0.7830469052849205, "learning_rate": 9.564147040444741e-07, "loss": 4.6471, "step": 18180 }, { "epoch": 0.783908343024508, "learning_rate": 9.563662220689952e-07, "loss": 4.72, "step": 18200 }, { "epoch": 0.7847697807640953, "learning_rate": 9.563177400935163e-07, "loss": 4.489, "step": 18220 }, { "epoch": 0.7856312185036827, "learning_rate": 9.562692581180374e-07, "loss": 4.5276, "step": 18240 }, { "epoch": 0.78649265624327, "learning_rate": 9.562207761425584e-07, "loss": 4.5964, "step": 18260 }, { "epoch": 0.7873540939828574, "learning_rate": 9.561722941670797e-07, "loss": 4.5272, "step": 18280 }, { "epoch": 0.7882155317224447, "learning_rate": 9.561238121916008e-07, "loss": 4.5325, "step": 18300 }, { "epoch": 0.7890769694620321, "learning_rate": 9.560753302161219e-07, "loss": 4.6481, "step": 18320 }, { "epoch": 0.7899384072016195, "learning_rate": 9.56026848240643e-07, "loss": 4.6294, "step": 18340 }, { "epoch": 0.7907998449412069, "learning_rate": 9.55978366265164e-07, "loss": 4.5148, "step": 18360 }, { "epoch": 0.7916612826807943, "learning_rate": 9.559298842896853e-07, "loss": 4.4899, "step": 18380 }, { "epoch": 0.7925227204203816, "learning_rate": 9.558814023142061e-07, "loss": 4.5426, "step": 18400 }, { "epoch": 0.793384158159969, "learning_rate": 9.558329203387274e-07, "loss": 4.4513, "step": 18420 }, { "epoch": 0.7942455958995563, "learning_rate": 9.557844383632485e-07, "loss": 4.4633, "step": 18440 }, { "epoch": 0.7951070336391437, "learning_rate": 9.557359563877696e-07, "loss": 4.44, "step": 18460 }, { "epoch": 0.7959684713787311, "learning_rate": 9.556874744122906e-07, "loss": 4.8242, "step": 18480 }, { "epoch": 0.7968299091183185, "learning_rate": 9.556389924368117e-07, "loss": 4.475, "step": 18500 }, { "epoch": 0.7976913468579059, "learning_rate": 9.55590510461333e-07, "loss": 4.5009, "step": 18520 }, { "epoch": 0.7985527845974932, "learning_rate": 9.555420284858539e-07, "loss": 4.4502, "step": 18540 }, { "epoch": 0.7994142223370806, "learning_rate": 9.554935465103751e-07, "loss": 4.7639, "step": 18560 }, { "epoch": 0.800275660076668, "learning_rate": 9.554450645348962e-07, "loss": 4.4771, "step": 18580 }, { "epoch": 0.8011370978162553, "learning_rate": 9.553965825594173e-07, "loss": 4.4977, "step": 18600 }, { "epoch": 0.8019985355558427, "learning_rate": 9.553481005839384e-07, "loss": 4.3852, "step": 18620 }, { "epoch": 0.80285997329543, "learning_rate": 9.552996186084594e-07, "loss": 4.5963, "step": 18640 }, { "epoch": 0.8037214110350175, "learning_rate": 9.552511366329807e-07, "loss": 4.7555, "step": 18660 }, { "epoch": 0.8045828487746048, "learning_rate": 9.552026546575018e-07, "loss": 4.7279, "step": 18680 }, { "epoch": 0.8054442865141922, "learning_rate": 9.551541726820228e-07, "loss": 4.6135, "step": 18700 }, { "epoch": 0.8063057242537796, "learning_rate": 9.55105690706544e-07, "loss": 4.474, "step": 18720 }, { "epoch": 0.8071671619933669, "learning_rate": 9.55057208731065e-07, "loss": 4.5132, "step": 18740 }, { "epoch": 0.8080285997329543, "learning_rate": 9.550087267555863e-07, "loss": 4.3501, "step": 18760 }, { "epoch": 0.8088900374725416, "learning_rate": 9.549602447801071e-07, "loss": 4.4286, "step": 18780 }, { "epoch": 0.8097514752121291, "learning_rate": 9.549117628046284e-07, "loss": 4.5616, "step": 18800 }, { "epoch": 0.8106129129517164, "learning_rate": 9.548632808291495e-07, "loss": 4.4247, "step": 18820 }, { "epoch": 0.8114743506913038, "learning_rate": 9.548147988536706e-07, "loss": 4.5656, "step": 18840 }, { "epoch": 0.8123357884308912, "learning_rate": 9.547663168781916e-07, "loss": 4.2647, "step": 18860 }, { "epoch": 0.8131972261704785, "learning_rate": 9.547178349027127e-07, "loss": 4.3358, "step": 18880 }, { "epoch": 0.8140586639100659, "learning_rate": 9.54669352927234e-07, "loss": 4.2501, "step": 18900 }, { "epoch": 0.8149201016496532, "learning_rate": 9.54620870951755e-07, "loss": 4.6137, "step": 18920 }, { "epoch": 0.8157815393892407, "learning_rate": 9.545723889762761e-07, "loss": 4.5941, "step": 18940 }, { "epoch": 0.816642977128828, "learning_rate": 9.545239070007974e-07, "loss": 4.464, "step": 18960 }, { "epoch": 0.8175044148684154, "learning_rate": 9.544754250253183e-07, "loss": 4.3827, "step": 18980 }, { "epoch": 0.8183658526080028, "learning_rate": 9.544269430498396e-07, "loss": 4.62, "step": 19000 }, { "epoch": 0.8192272903475901, "learning_rate": 9.543784610743604e-07, "loss": 4.6739, "step": 19020 }, { "epoch": 0.8200887280871775, "learning_rate": 9.543299790988817e-07, "loss": 4.5649, "step": 19040 }, { "epoch": 0.8209501658267648, "learning_rate": 9.542814971234028e-07, "loss": 4.4442, "step": 19060 }, { "epoch": 0.8218116035663522, "learning_rate": 9.542330151479238e-07, "loss": 4.5496, "step": 19080 }, { "epoch": 0.8226730413059397, "learning_rate": 9.54184533172445e-07, "loss": 4.458, "step": 19100 }, { "epoch": 0.823534479045527, "learning_rate": 9.54136051196966e-07, "loss": 4.536, "step": 19120 }, { "epoch": 0.8243959167851144, "learning_rate": 9.540875692214873e-07, "loss": 4.6454, "step": 19140 }, { "epoch": 0.8252573545247017, "learning_rate": 9.540390872460081e-07, "loss": 4.4801, "step": 19160 }, { "epoch": 0.8261187922642891, "learning_rate": 9.539906052705294e-07, "loss": 4.5506, "step": 19180 }, { "epoch": 0.8269802300038764, "learning_rate": 9.539421232950505e-07, "loss": 4.4797, "step": 19200 }, { "epoch": 0.8278416677434638, "learning_rate": 9.538936413195716e-07, "loss": 4.5092, "step": 19220 }, { "epoch": 0.8287031054830513, "learning_rate": 9.538451593440926e-07, "loss": 4.4169, "step": 19240 }, { "epoch": 0.8295645432226386, "learning_rate": 9.537966773686137e-07, "loss": 4.7532, "step": 19260 }, { "epoch": 0.830425980962226, "learning_rate": 9.53748195393135e-07, "loss": 4.6437, "step": 19280 }, { "epoch": 0.8312874187018133, "learning_rate": 9.53699713417656e-07, "loss": 4.4116, "step": 19300 }, { "epoch": 0.8321488564414007, "learning_rate": 9.536512314421771e-07, "loss": 4.6395, "step": 19320 }, { "epoch": 0.833010294180988, "learning_rate": 9.536027494666983e-07, "loss": 4.5763, "step": 19340 }, { "epoch": 0.8338717319205754, "learning_rate": 9.535542674912193e-07, "loss": 4.6412, "step": 19360 }, { "epoch": 0.8347331696601629, "learning_rate": 9.535057855157404e-07, "loss": 4.4227, "step": 19380 }, { "epoch": 0.8355946073997502, "learning_rate": 9.534573035402615e-07, "loss": 4.6791, "step": 19400 }, { "epoch": 0.8364560451393376, "learning_rate": 9.534088215647826e-07, "loss": 4.5265, "step": 19420 }, { "epoch": 0.8373174828789249, "learning_rate": 9.533603395893038e-07, "loss": 4.5809, "step": 19440 }, { "epoch": 0.8381789206185123, "learning_rate": 9.533118576138249e-07, "loss": 4.7335, "step": 19460 }, { "epoch": 0.8390403583580996, "learning_rate": 9.532633756383459e-07, "loss": 4.6606, "step": 19480 }, { "epoch": 0.839901796097687, "learning_rate": 9.532148936628671e-07, "loss": 4.5205, "step": 19500 }, { "epoch": 0.8407632338372744, "learning_rate": 9.531664116873882e-07, "loss": 4.6531, "step": 19520 }, { "epoch": 0.8416246715768618, "learning_rate": 9.531179297119093e-07, "loss": 4.7872, "step": 19540 }, { "epoch": 0.8424861093164492, "learning_rate": 9.530694477364304e-07, "loss": 4.4518, "step": 19560 }, { "epoch": 0.8433475470560365, "learning_rate": 9.530209657609516e-07, "loss": 4.4122, "step": 19580 }, { "epoch": 0.8442089847956239, "learning_rate": 9.529724837854727e-07, "loss": 4.6403, "step": 19600 }, { "epoch": 0.8450704225352113, "learning_rate": 9.529240018099936e-07, "loss": 4.6208, "step": 19620 }, { "epoch": 0.8459318602747986, "learning_rate": 9.528755198345148e-07, "loss": 4.6772, "step": 19640 }, { "epoch": 0.846793298014386, "learning_rate": 9.528270378590359e-07, "loss": 4.2239, "step": 19660 }, { "epoch": 0.8476547357539734, "learning_rate": 9.52778555883557e-07, "loss": 4.4615, "step": 19680 }, { "epoch": 0.8485161734935608, "learning_rate": 9.527300739080781e-07, "loss": 4.4219, "step": 19700 }, { "epoch": 0.8493776112331481, "learning_rate": 9.526815919325993e-07, "loss": 4.5251, "step": 19720 }, { "epoch": 0.8502390489727355, "learning_rate": 9.526331099571202e-07, "loss": 4.4332, "step": 19740 }, { "epoch": 0.8511004867123229, "learning_rate": 9.525846279816414e-07, "loss": 4.7348, "step": 19760 }, { "epoch": 0.8519619244519102, "learning_rate": 9.525361460061625e-07, "loss": 4.4744, "step": 19780 }, { "epoch": 0.8528233621914976, "learning_rate": 9.524876640306837e-07, "loss": 4.482, "step": 19800 }, { "epoch": 0.8536847999310849, "learning_rate": 9.524391820552048e-07, "loss": 4.6764, "step": 19820 }, { "epoch": 0.8545462376706724, "learning_rate": 9.523907000797259e-07, "loss": 4.2579, "step": 19840 }, { "epoch": 0.8554076754102597, "learning_rate": 9.523422181042469e-07, "loss": 4.5412, "step": 19860 }, { "epoch": 0.8562691131498471, "learning_rate": 9.522937361287681e-07, "loss": 4.4961, "step": 19880 }, { "epoch": 0.8571305508894345, "learning_rate": 9.522452541532892e-07, "loss": 4.7074, "step": 19900 }, { "epoch": 0.8579919886290218, "learning_rate": 9.521967721778103e-07, "loss": 4.7198, "step": 19920 }, { "epoch": 0.8588534263686092, "learning_rate": 9.521482902023314e-07, "loss": 4.5957, "step": 19940 }, { "epoch": 0.8597148641081965, "learning_rate": 9.520998082268526e-07, "loss": 4.5261, "step": 19960 }, { "epoch": 0.860576301847784, "learning_rate": 9.520513262513736e-07, "loss": 4.2652, "step": 19980 }, { "epoch": 0.8614377395873714, "learning_rate": 9.520028442758947e-07, "loss": 4.3014, "step": 20000 }, { "epoch": 0.8622991773269587, "learning_rate": 9.519543623004158e-07, "loss": 4.617, "step": 20020 }, { "epoch": 0.8631606150665461, "learning_rate": 9.51905880324937e-07, "loss": 4.5581, "step": 20040 }, { "epoch": 0.8640220528061334, "learning_rate": 9.51857398349458e-07, "loss": 4.3092, "step": 20060 }, { "epoch": 0.8648834905457208, "learning_rate": 9.518089163739792e-07, "loss": 4.4433, "step": 20080 }, { "epoch": 0.8657449282853081, "learning_rate": 9.517604343985003e-07, "loss": 4.3439, "step": 20100 }, { "epoch": 0.8666063660248956, "learning_rate": 9.517119524230214e-07, "loss": 4.4046, "step": 20120 }, { "epoch": 0.867467803764483, "learning_rate": 9.516634704475424e-07, "loss": 4.4703, "step": 20140 }, { "epoch": 0.8683292415040703, "learning_rate": 9.516149884720635e-07, "loss": 4.0817, "step": 20160 }, { "epoch": 0.8691906792436577, "learning_rate": 9.515665064965847e-07, "loss": 4.6784, "step": 20180 }, { "epoch": 0.870052116983245, "learning_rate": 9.515180245211058e-07, "loss": 4.3892, "step": 20200 }, { "epoch": 0.8709135547228324, "learning_rate": 9.514695425456269e-07, "loss": 4.5129, "step": 20220 }, { "epoch": 0.8717749924624197, "learning_rate": 9.51421060570148e-07, "loss": 4.4788, "step": 20240 }, { "epoch": 0.8726364302020071, "learning_rate": 9.51372578594669e-07, "loss": 4.3852, "step": 20260 }, { "epoch": 0.8734978679415946, "learning_rate": 9.513240966191901e-07, "loss": 4.3911, "step": 20280 }, { "epoch": 0.8743593056811819, "learning_rate": 9.512756146437113e-07, "loss": 4.4513, "step": 20300 }, { "epoch": 0.8752207434207693, "learning_rate": 9.512271326682324e-07, "loss": 4.5099, "step": 20320 }, { "epoch": 0.8760821811603566, "learning_rate": 9.511786506927536e-07, "loss": 4.4488, "step": 20340 }, { "epoch": 0.876943618899944, "learning_rate": 9.511301687172747e-07, "loss": 4.4457, "step": 20360 }, { "epoch": 0.8778050566395313, "learning_rate": 9.510816867417957e-07, "loss": 4.4634, "step": 20380 }, { "epoch": 0.8786664943791187, "learning_rate": 9.510332047663168e-07, "loss": 4.4281, "step": 20400 }, { "epoch": 0.8795279321187062, "learning_rate": 9.50984722790838e-07, "loss": 4.7259, "step": 20420 }, { "epoch": 0.8803893698582935, "learning_rate": 9.50936240815359e-07, "loss": 4.4135, "step": 20440 }, { "epoch": 0.8812508075978809, "learning_rate": 9.508877588398802e-07, "loss": 4.6404, "step": 20460 }, { "epoch": 0.8821122453374682, "learning_rate": 9.508392768644013e-07, "loss": 4.4357, "step": 20480 }, { "epoch": 0.8829736830770556, "learning_rate": 9.507907948889224e-07, "loss": 4.4953, "step": 20500 }, { "epoch": 0.883835120816643, "learning_rate": 9.507423129134434e-07, "loss": 4.3235, "step": 20520 }, { "epoch": 0.8846965585562303, "learning_rate": 9.506938309379646e-07, "loss": 4.5058, "step": 20540 }, { "epoch": 0.8855579962958178, "learning_rate": 9.506453489624857e-07, "loss": 4.6422, "step": 20560 }, { "epoch": 0.8864194340354051, "learning_rate": 9.505968669870069e-07, "loss": 4.5657, "step": 20580 }, { "epoch": 0.8872808717749925, "learning_rate": 9.505483850115279e-07, "loss": 4.1617, "step": 20600 }, { "epoch": 0.8881423095145798, "learning_rate": 9.50499903036049e-07, "loss": 4.4965, "step": 20620 }, { "epoch": 0.8890037472541672, "learning_rate": 9.504514210605701e-07, "loss": 4.512, "step": 20640 }, { "epoch": 0.8898651849937546, "learning_rate": 9.504029390850912e-07, "loss": 4.5665, "step": 20660 }, { "epoch": 0.8907266227333419, "learning_rate": 9.503544571096123e-07, "loss": 4.6343, "step": 20680 }, { "epoch": 0.8915880604729293, "learning_rate": 9.503059751341334e-07, "loss": 4.5786, "step": 20700 }, { "epoch": 0.8924494982125167, "learning_rate": 9.502574931586546e-07, "loss": 4.2518, "step": 20720 }, { "epoch": 0.8933109359521041, "learning_rate": 9.502090111831756e-07, "loss": 4.3461, "step": 20740 }, { "epoch": 0.8941723736916914, "learning_rate": 9.501605292076967e-07, "loss": 4.6709, "step": 20760 }, { "epoch": 0.8950338114312788, "learning_rate": 9.501120472322178e-07, "loss": 4.441, "step": 20780 }, { "epoch": 0.8958952491708662, "learning_rate": 9.50063565256739e-07, "loss": 4.4467, "step": 20800 }, { "epoch": 0.8967566869104535, "learning_rate": 9.5001508328126e-07, "loss": 4.5218, "step": 20820 }, { "epoch": 0.8976181246500409, "learning_rate": 9.499666013057812e-07, "loss": 4.2716, "step": 20840 }, { "epoch": 0.8984795623896283, "learning_rate": 9.499181193303023e-07, "loss": 4.531, "step": 20860 }, { "epoch": 0.8993410001292157, "learning_rate": 9.498696373548233e-07, "loss": 4.4809, "step": 20880 }, { "epoch": 0.900202437868803, "learning_rate": 9.498211553793444e-07, "loss": 4.5645, "step": 20900 }, { "epoch": 0.9010638756083904, "learning_rate": 9.497726734038656e-07, "loss": 4.3924, "step": 20920 }, { "epoch": 0.9019253133479778, "learning_rate": 9.497241914283867e-07, "loss": 4.5356, "step": 20940 }, { "epoch": 0.9027867510875651, "learning_rate": 9.496757094529078e-07, "loss": 4.1991, "step": 20960 }, { "epoch": 0.9036481888271525, "learning_rate": 9.496272274774289e-07, "loss": 4.4928, "step": 20980 }, { "epoch": 0.9045096265667399, "learning_rate": 9.4957874550195e-07, "loss": 4.6196, "step": 21000 }, { "epoch": 0.9053710643063273, "learning_rate": 9.495302635264711e-07, "loss": 4.2559, "step": 21020 }, { "epoch": 0.9062325020459147, "learning_rate": 9.494817815509922e-07, "loss": 4.6048, "step": 21040 }, { "epoch": 0.907093939785502, "learning_rate": 9.494332995755133e-07, "loss": 4.6164, "step": 21060 }, { "epoch": 0.9079553775250894, "learning_rate": 9.493848176000345e-07, "loss": 4.4236, "step": 21080 }, { "epoch": 0.9088168152646767, "learning_rate": 9.493363356245556e-07, "loss": 4.4766, "step": 21100 }, { "epoch": 0.9096782530042641, "learning_rate": 9.492878536490767e-07, "loss": 4.457, "step": 21120 }, { "epoch": 0.9105396907438514, "learning_rate": 9.492393716735977e-07, "loss": 4.4613, "step": 21140 }, { "epoch": 0.9114011284834389, "learning_rate": 9.491908896981189e-07, "loss": 4.3311, "step": 21160 }, { "epoch": 0.9122625662230263, "learning_rate": 9.4914240772264e-07, "loss": 4.4791, "step": 21180 }, { "epoch": 0.9131240039626136, "learning_rate": 9.49093925747161e-07, "loss": 4.5871, "step": 21200 }, { "epoch": 0.913985441702201, "learning_rate": 9.490454437716822e-07, "loss": 4.5684, "step": 21220 }, { "epoch": 0.9148468794417883, "learning_rate": 9.489969617962033e-07, "loss": 4.5018, "step": 21240 }, { "epoch": 0.9157083171813757, "learning_rate": 9.489484798207243e-07, "loss": 4.4402, "step": 21260 }, { "epoch": 0.916569754920963, "learning_rate": 9.488999978452454e-07, "loss": 4.5509, "step": 21280 }, { "epoch": 0.9174311926605505, "learning_rate": 9.488515158697666e-07, "loss": 4.4623, "step": 21300 }, { "epoch": 0.9182926304001379, "learning_rate": 9.488030338942877e-07, "loss": 4.6452, "step": 21320 }, { "epoch": 0.9191540681397252, "learning_rate": 9.487545519188088e-07, "loss": 4.3545, "step": 21340 }, { "epoch": 0.9200155058793126, "learning_rate": 9.487060699433299e-07, "loss": 4.6088, "step": 21360 }, { "epoch": 0.9208769436188999, "learning_rate": 9.486575879678511e-07, "loss": 4.4155, "step": 21380 }, { "epoch": 0.9217383813584873, "learning_rate": 9.486091059923721e-07, "loss": 4.4004, "step": 21400 }, { "epoch": 0.9225998190980746, "learning_rate": 9.485606240168932e-07, "loss": 4.5805, "step": 21420 }, { "epoch": 0.923461256837662, "learning_rate": 9.485121420414143e-07, "loss": 4.545, "step": 21440 }, { "epoch": 0.9243226945772495, "learning_rate": 9.484636600659355e-07, "loss": 4.4467, "step": 21460 }, { "epoch": 0.9251841323168368, "learning_rate": 9.484151780904566e-07, "loss": 4.47, "step": 21480 }, { "epoch": 0.9260455700564242, "learning_rate": 9.483666961149777e-07, "loss": 4.7662, "step": 21500 }, { "epoch": 0.9269070077960115, "learning_rate": 9.483182141394986e-07, "loss": 4.3187, "step": 21520 }, { "epoch": 0.9277684455355989, "learning_rate": 9.482697321640199e-07, "loss": 4.8051, "step": 21540 }, { "epoch": 0.9286298832751863, "learning_rate": 9.482212501885409e-07, "loss": 4.5825, "step": 21560 }, { "epoch": 0.9294913210147736, "learning_rate": 9.481727682130621e-07, "loss": 4.3509, "step": 21580 }, { "epoch": 0.9303527587543611, "learning_rate": 9.481242862375832e-07, "loss": 4.3702, "step": 21600 }, { "epoch": 0.9312141964939484, "learning_rate": 9.480758042621044e-07, "loss": 4.6154, "step": 21620 }, { "epoch": 0.9320756342335358, "learning_rate": 9.480273222866253e-07, "loss": 4.7423, "step": 21640 }, { "epoch": 0.9329370719731231, "learning_rate": 9.479788403111465e-07, "loss": 4.494, "step": 21660 }, { "epoch": 0.9337985097127105, "learning_rate": 9.479303583356676e-07, "loss": 4.8994, "step": 21680 }, { "epoch": 0.9346599474522979, "learning_rate": 9.478818763601888e-07, "loss": 4.5079, "step": 21700 }, { "epoch": 0.9355213851918852, "learning_rate": 9.478333943847098e-07, "loss": 4.4827, "step": 21720 }, { "epoch": 0.9363828229314727, "learning_rate": 9.47784912409231e-07, "loss": 4.5921, "step": 21740 }, { "epoch": 0.93724426067106, "learning_rate": 9.477364304337521e-07, "loss": 4.4418, "step": 21760 }, { "epoch": 0.9381056984106474, "learning_rate": 9.47687948458273e-07, "loss": 4.5204, "step": 21780 }, { "epoch": 0.9389671361502347, "learning_rate": 9.476394664827942e-07, "loss": 4.6105, "step": 21800 }, { "epoch": 0.9398285738898221, "learning_rate": 9.475909845073153e-07, "loss": 4.6572, "step": 21820 }, { "epoch": 0.9406900116294095, "learning_rate": 9.475425025318365e-07, "loss": 4.33, "step": 21840 }, { "epoch": 0.9415514493689968, "learning_rate": 9.474940205563575e-07, "loss": 4.5902, "step": 21860 }, { "epoch": 0.9424128871085842, "learning_rate": 9.474455385808787e-07, "loss": 4.3756, "step": 21880 }, { "epoch": 0.9432743248481716, "learning_rate": 9.473970566053997e-07, "loss": 4.5468, "step": 21900 }, { "epoch": 0.944135762587759, "learning_rate": 9.473485746299209e-07, "loss": 4.4465, "step": 21920 }, { "epoch": 0.9449972003273464, "learning_rate": 9.473000926544419e-07, "loss": 4.6516, "step": 21940 }, { "epoch": 0.9458586380669337, "learning_rate": 9.472516106789631e-07, "loss": 4.328, "step": 21960 }, { "epoch": 0.9467200758065211, "learning_rate": 9.472031287034842e-07, "loss": 4.6072, "step": 21980 }, { "epoch": 0.9475815135461084, "learning_rate": 9.471546467280054e-07, "loss": 4.2032, "step": 22000 }, { "epoch": 0.9484429512856958, "learning_rate": 9.471061647525264e-07, "loss": 4.3651, "step": 22020 }, { "epoch": 0.9493043890252832, "learning_rate": 9.470576827770474e-07, "loss": 4.263, "step": 22040 }, { "epoch": 0.9501658267648706, "learning_rate": 9.470092008015686e-07, "loss": 4.6553, "step": 22060 }, { "epoch": 0.951027264504458, "learning_rate": 9.469607188260898e-07, "loss": 4.4842, "step": 22080 }, { "epoch": 0.9518887022440453, "learning_rate": 9.469122368506108e-07, "loss": 4.344, "step": 22100 }, { "epoch": 0.9527501399836327, "learning_rate": 9.46863754875132e-07, "loss": 4.4443, "step": 22120 }, { "epoch": 0.95361157772322, "learning_rate": 9.468152728996532e-07, "loss": 4.4187, "step": 22140 }, { "epoch": 0.9544730154628074, "learning_rate": 9.467667909241741e-07, "loss": 4.3837, "step": 22160 }, { "epoch": 0.9553344532023949, "learning_rate": 9.467183089486952e-07, "loss": 4.6523, "step": 22180 }, { "epoch": 0.9561958909419822, "learning_rate": 9.466698269732164e-07, "loss": 4.3209, "step": 22200 }, { "epoch": 0.9570573286815696, "learning_rate": 9.466213449977375e-07, "loss": 4.6185, "step": 22220 }, { "epoch": 0.9579187664211569, "learning_rate": 9.465728630222586e-07, "loss": 4.5998, "step": 22240 }, { "epoch": 0.9587802041607443, "learning_rate": 9.465243810467797e-07, "loss": 4.5501, "step": 22260 }, { "epoch": 0.9596416419003316, "learning_rate": 9.464758990713008e-07, "loss": 4.3369, "step": 22280 }, { "epoch": 0.960503079639919, "learning_rate": 9.464274170958219e-07, "loss": 4.3487, "step": 22300 }, { "epoch": 0.9613645173795063, "learning_rate": 9.463789351203429e-07, "loss": 4.4979, "step": 22320 }, { "epoch": 0.9622259551190938, "learning_rate": 9.463304531448641e-07, "loss": 4.482, "step": 22340 }, { "epoch": 0.9630873928586812, "learning_rate": 9.462819711693852e-07, "loss": 4.5562, "step": 22360 }, { "epoch": 0.9639488305982685, "learning_rate": 9.462334891939064e-07, "loss": 4.6254, "step": 22380 }, { "epoch": 0.9648102683378559, "learning_rate": 9.461850072184274e-07, "loss": 4.442, "step": 22400 }, { "epoch": 0.9656717060774432, "learning_rate": 9.461365252429485e-07, "loss": 4.4808, "step": 22420 }, { "epoch": 0.9665331438170306, "learning_rate": 9.460880432674696e-07, "loss": 4.3537, "step": 22440 }, { "epoch": 0.967394581556618, "learning_rate": 9.460395612919907e-07, "loss": 4.4404, "step": 22460 }, { "epoch": 0.9682560192962054, "learning_rate": 9.459910793165118e-07, "loss": 4.2819, "step": 22480 }, { "epoch": 0.9691174570357928, "learning_rate": 9.45942597341033e-07, "loss": 4.2377, "step": 22500 }, { "epoch": 0.9699788947753801, "learning_rate": 9.458941153655541e-07, "loss": 4.4839, "step": 22520 }, { "epoch": 0.9708403325149675, "learning_rate": 9.458456333900751e-07, "loss": 4.4019, "step": 22540 }, { "epoch": 0.9717017702545548, "learning_rate": 9.457971514145962e-07, "loss": 4.5467, "step": 22560 }, { "epoch": 0.9725632079941422, "learning_rate": 9.457486694391174e-07, "loss": 4.3238, "step": 22580 }, { "epoch": 0.9734246457337296, "learning_rate": 9.457001874636385e-07, "loss": 4.4663, "step": 22600 }, { "epoch": 0.9742860834733169, "learning_rate": 9.456517054881596e-07, "loss": 4.4383, "step": 22620 }, { "epoch": 0.9751475212129044, "learning_rate": 9.456032235126807e-07, "loss": 4.5796, "step": 22640 }, { "epoch": 0.9760089589524917, "learning_rate": 9.455547415372018e-07, "loss": 4.4543, "step": 22660 }, { "epoch": 0.9768703966920791, "learning_rate": 9.455062595617229e-07, "loss": 4.3599, "step": 22680 }, { "epoch": 0.9777318344316664, "learning_rate": 9.45457777586244e-07, "loss": 4.5134, "step": 22700 }, { "epoch": 0.9785932721712538, "learning_rate": 9.454092956107651e-07, "loss": 4.5121, "step": 22720 }, { "epoch": 0.9794547099108412, "learning_rate": 9.453608136352863e-07, "loss": 4.6245, "step": 22740 }, { "epoch": 0.9803161476504285, "learning_rate": 9.453123316598073e-07, "loss": 4.4596, "step": 22760 }, { "epoch": 0.981177585390016, "learning_rate": 9.452638496843284e-07, "loss": 4.7281, "step": 22780 }, { "epoch": 0.9820390231296033, "learning_rate": 9.452153677088495e-07, "loss": 4.4649, "step": 22800 }, { "epoch": 0.9829004608691907, "learning_rate": 9.451668857333707e-07, "loss": 4.5645, "step": 22820 }, { "epoch": 0.983761898608778, "learning_rate": 9.451184037578917e-07, "loss": 4.5543, "step": 22840 }, { "epoch": 0.9846233363483654, "learning_rate": 9.450699217824128e-07, "loss": 4.7059, "step": 22860 }, { "epoch": 0.9854847740879528, "learning_rate": 9.45021439806934e-07, "loss": 4.5437, "step": 22880 }, { "epoch": 0.9863462118275401, "learning_rate": 9.449729578314551e-07, "loss": 4.3082, "step": 22900 }, { "epoch": 0.9872076495671276, "learning_rate": 9.449244758559761e-07, "loss": 4.5584, "step": 22920 }, { "epoch": 0.9880690873067149, "learning_rate": 9.448759938804972e-07, "loss": 4.5198, "step": 22940 }, { "epoch": 0.9889305250463023, "learning_rate": 9.448275119050184e-07, "loss": 4.632, "step": 22960 }, { "epoch": 0.9897919627858897, "learning_rate": 9.447790299295395e-07, "loss": 4.5563, "step": 22980 }, { "epoch": 0.990653400525477, "learning_rate": 9.447305479540606e-07, "loss": 4.8115, "step": 23000 }, { "epoch": 0.9915148382650644, "learning_rate": 9.446820659785817e-07, "loss": 4.5416, "step": 23020 }, { "epoch": 0.9923762760046517, "learning_rate": 9.446335840031028e-07, "loss": 4.1979, "step": 23040 }, { "epoch": 0.9932377137442391, "learning_rate": 9.445851020276238e-07, "loss": 4.4413, "step": 23060 }, { "epoch": 0.9940991514838265, "learning_rate": 9.44536620052145e-07, "loss": 4.4993, "step": 23080 }, { "epoch": 0.9949605892234139, "learning_rate": 9.444881380766661e-07, "loss": 4.4913, "step": 23100 }, { "epoch": 0.9958220269630013, "learning_rate": 9.444396561011873e-07, "loss": 4.3746, "step": 23120 }, { "epoch": 0.9966834647025886, "learning_rate": 9.443911741257083e-07, "loss": 4.6342, "step": 23140 }, { "epoch": 0.997544902442176, "learning_rate": 9.443426921502295e-07, "loss": 4.4272, "step": 23160 }, { "epoch": 0.9984063401817633, "learning_rate": 9.442942101747505e-07, "loss": 4.6873, "step": 23180 }, { "epoch": 0.9992677779213507, "learning_rate": 9.442457281992717e-07, "loss": 4.4283, "step": 23200 }, { "epoch": 1.000129215660938, "learning_rate": 9.441972462237927e-07, "loss": 4.5882, "step": 23220 }, { "epoch": 1.0009906534005255, "learning_rate": 9.441487642483139e-07, "loss": 4.5283, "step": 23240 }, { "epoch": 1.0018520911401128, "learning_rate": 9.44100282272835e-07, "loss": 4.2976, "step": 23260 }, { "epoch": 1.0027135288797002, "learning_rate": 9.440518002973562e-07, "loss": 4.3969, "step": 23280 }, { "epoch": 1.0035749666192877, "learning_rate": 9.44003318321877e-07, "loss": 4.3359, "step": 23300 }, { "epoch": 1.004436404358875, "learning_rate": 9.439548363463983e-07, "loss": 4.5208, "step": 23320 }, { "epoch": 1.0052978420984624, "learning_rate": 9.439063543709194e-07, "loss": 4.3077, "step": 23340 }, { "epoch": 1.0061592798380496, "learning_rate": 9.438578723954406e-07, "loss": 4.5879, "step": 23360 }, { "epoch": 1.0070207175776371, "learning_rate": 9.438093904199616e-07, "loss": 4.5573, "step": 23380 }, { "epoch": 1.0078821553172244, "learning_rate": 9.437609084444828e-07, "loss": 4.6482, "step": 23400 }, { "epoch": 1.0087435930568118, "learning_rate": 9.437124264690038e-07, "loss": 4.4084, "step": 23420 }, { "epoch": 1.0096050307963993, "learning_rate": 9.436639444935248e-07, "loss": 4.4637, "step": 23440 }, { "epoch": 1.0104664685359865, "learning_rate": 9.43615462518046e-07, "loss": 4.5866, "step": 23460 }, { "epoch": 1.011327906275574, "learning_rate": 9.435669805425671e-07, "loss": 4.5344, "step": 23480 }, { "epoch": 1.0121893440151613, "learning_rate": 9.435184985670883e-07, "loss": 4.4895, "step": 23500 }, { "epoch": 1.0130507817547487, "learning_rate": 9.434700165916093e-07, "loss": 4.4187, "step": 23520 }, { "epoch": 1.013912219494336, "learning_rate": 9.434215346161305e-07, "loss": 4.5773, "step": 23540 }, { "epoch": 1.0147736572339234, "learning_rate": 9.433730526406515e-07, "loss": 4.7074, "step": 23560 }, { "epoch": 1.015635094973511, "learning_rate": 9.433245706651727e-07, "loss": 4.5204, "step": 23580 }, { "epoch": 1.0164965327130981, "learning_rate": 9.432760886896937e-07, "loss": 4.3078, "step": 23600 }, { "epoch": 1.0173579704526856, "learning_rate": 9.432276067142149e-07, "loss": 4.3662, "step": 23620 }, { "epoch": 1.0182194081922729, "learning_rate": 9.43179124738736e-07, "loss": 4.4802, "step": 23640 }, { "epoch": 1.0190808459318603, "learning_rate": 9.431306427632572e-07, "loss": 4.5543, "step": 23660 }, { "epoch": 1.0199422836714476, "learning_rate": 9.430821607877781e-07, "loss": 4.5778, "step": 23680 }, { "epoch": 1.020803721411035, "learning_rate": 9.430336788122993e-07, "loss": 4.2533, "step": 23700 }, { "epoch": 1.0216651591506223, "learning_rate": 9.429851968368204e-07, "loss": 4.6842, "step": 23720 }, { "epoch": 1.0225265968902097, "learning_rate": 9.429367148613415e-07, "loss": 4.3704, "step": 23740 }, { "epoch": 1.0233880346297972, "learning_rate": 9.428882328858626e-07, "loss": 4.5711, "step": 23760 }, { "epoch": 1.0242494723693845, "learning_rate": 9.428397509103838e-07, "loss": 4.7231, "step": 23780 }, { "epoch": 1.025110910108972, "learning_rate": 9.427912689349049e-07, "loss": 4.5228, "step": 23800 }, { "epoch": 1.0259723478485592, "learning_rate": 9.427427869594258e-07, "loss": 4.4926, "step": 23820 }, { "epoch": 1.0268337855881466, "learning_rate": 9.42694304983947e-07, "loss": 4.3109, "step": 23840 }, { "epoch": 1.0276952233277339, "learning_rate": 9.426458230084681e-07, "loss": 4.5105, "step": 23860 }, { "epoch": 1.0285566610673214, "learning_rate": 9.425973410329893e-07, "loss": 4.3662, "step": 23880 }, { "epoch": 1.0294180988069088, "learning_rate": 9.425488590575104e-07, "loss": 4.3859, "step": 23900 }, { "epoch": 1.030279536546496, "learning_rate": 9.425003770820316e-07, "loss": 4.4111, "step": 23920 }, { "epoch": 1.0311409742860835, "learning_rate": 9.424518951065525e-07, "loss": 4.253, "step": 23940 }, { "epoch": 1.0320024120256708, "learning_rate": 9.424034131310737e-07, "loss": 4.5461, "step": 23960 }, { "epoch": 1.0328638497652582, "learning_rate": 9.423549311555947e-07, "loss": 4.4709, "step": 23980 }, { "epoch": 1.0337252875048455, "learning_rate": 9.423064491801159e-07, "loss": 4.376, "step": 24000 }, { "epoch": 1.034586725244433, "learning_rate": 9.42257967204637e-07, "loss": 4.4641, "step": 24020 }, { "epoch": 1.0354481629840204, "learning_rate": 9.422094852291581e-07, "loss": 4.2648, "step": 24040 }, { "epoch": 1.0363096007236077, "learning_rate": 9.421610032536791e-07, "loss": 4.428, "step": 24060 }, { "epoch": 1.0371710384631951, "learning_rate": 9.421125212782003e-07, "loss": 4.6883, "step": 24080 }, { "epoch": 1.0380324762027824, "learning_rate": 9.420640393027214e-07, "loss": 4.4379, "step": 24100 }, { "epoch": 1.0388939139423699, "learning_rate": 9.420155573272425e-07, "loss": 4.3833, "step": 24120 }, { "epoch": 1.039755351681957, "learning_rate": 9.419670753517636e-07, "loss": 4.1903, "step": 24140 }, { "epoch": 1.0406167894215446, "learning_rate": 9.419185933762848e-07, "loss": 4.5037, "step": 24160 }, { "epoch": 1.041478227161132, "learning_rate": 9.418701114008059e-07, "loss": 4.5972, "step": 24180 }, { "epoch": 1.0423396649007193, "learning_rate": 9.418216294253269e-07, "loss": 4.4044, "step": 24200 }, { "epoch": 1.0432011026403067, "learning_rate": 9.41773147449848e-07, "loss": 4.3715, "step": 24220 }, { "epoch": 1.044062540379894, "learning_rate": 9.417246654743692e-07, "loss": 4.5498, "step": 24240 }, { "epoch": 1.0449239781194815, "learning_rate": 9.416761834988903e-07, "loss": 4.237, "step": 24260 }, { "epoch": 1.0457854158590687, "learning_rate": 9.416277015234114e-07, "loss": 4.4666, "step": 24280 }, { "epoch": 1.0466468535986562, "learning_rate": 9.415792195479325e-07, "loss": 4.4463, "step": 24300 }, { "epoch": 1.0475082913382436, "learning_rate": 9.415307375724536e-07, "loss": 4.361, "step": 24320 }, { "epoch": 1.0483697290778309, "learning_rate": 9.414822555969746e-07, "loss": 4.6172, "step": 24340 }, { "epoch": 1.0492311668174183, "learning_rate": 9.414337736214958e-07, "loss": 4.5976, "step": 24360 }, { "epoch": 1.0500926045570056, "learning_rate": 9.413852916460169e-07, "loss": 4.3985, "step": 24380 }, { "epoch": 1.050954042296593, "learning_rate": 9.41336809670538e-07, "loss": 4.3975, "step": 24400 }, { "epoch": 1.0518154800361803, "learning_rate": 9.412883276950591e-07, "loss": 4.5018, "step": 24420 }, { "epoch": 1.0526769177757678, "learning_rate": 9.412398457195802e-07, "loss": 4.5551, "step": 24440 }, { "epoch": 1.053538355515355, "learning_rate": 9.411913637441013e-07, "loss": 4.5351, "step": 24460 }, { "epoch": 1.0543997932549425, "learning_rate": 9.411428817686224e-07, "loss": 4.4579, "step": 24480 }, { "epoch": 1.05526123099453, "learning_rate": 9.410943997931435e-07, "loss": 4.5496, "step": 24500 }, { "epoch": 1.0561226687341172, "learning_rate": 9.410459178176646e-07, "loss": 4.5997, "step": 24520 }, { "epoch": 1.0569841064737047, "learning_rate": 9.409974358421858e-07, "loss": 4.5158, "step": 24540 }, { "epoch": 1.057845544213292, "learning_rate": 9.409489538667069e-07, "loss": 4.3401, "step": 24560 }, { "epoch": 1.0587069819528794, "learning_rate": 9.409004718912279e-07, "loss": 4.6352, "step": 24580 }, { "epoch": 1.0595684196924666, "learning_rate": 9.40851989915749e-07, "loss": 4.3259, "step": 24600 }, { "epoch": 1.060429857432054, "learning_rate": 9.408035079402702e-07, "loss": 4.3454, "step": 24620 }, { "epoch": 1.0612912951716416, "learning_rate": 9.407550259647912e-07, "loss": 4.6752, "step": 24640 }, { "epoch": 1.0621527329112288, "learning_rate": 9.407065439893124e-07, "loss": 4.4029, "step": 24660 }, { "epoch": 1.0630141706508163, "learning_rate": 9.406580620138335e-07, "loss": 4.3, "step": 24680 }, { "epoch": 1.0638756083904035, "learning_rate": 9.406095800383546e-07, "loss": 4.6179, "step": 24700 }, { "epoch": 1.064737046129991, "learning_rate": 9.405610980628756e-07, "loss": 4.2334, "step": 24720 }, { "epoch": 1.0655984838695782, "learning_rate": 9.405126160873968e-07, "loss": 4.1275, "step": 24740 }, { "epoch": 1.0664599216091657, "learning_rate": 9.404641341119179e-07, "loss": 4.4202, "step": 24760 }, { "epoch": 1.0673213593487532, "learning_rate": 9.404156521364391e-07, "loss": 4.3064, "step": 24780 }, { "epoch": 1.0681827970883404, "learning_rate": 9.403671701609601e-07, "loss": 4.3091, "step": 24800 }, { "epoch": 1.0690442348279279, "learning_rate": 9.403186881854812e-07, "loss": 4.3217, "step": 24820 }, { "epoch": 1.0699056725675151, "learning_rate": 9.402702062100023e-07, "loss": 4.5031, "step": 24840 }, { "epoch": 1.0707671103071026, "learning_rate": 9.402217242345235e-07, "loss": 4.4397, "step": 24860 }, { "epoch": 1.0716285480466898, "learning_rate": 9.401732422590445e-07, "loss": 4.4409, "step": 24880 }, { "epoch": 1.0724899857862773, "learning_rate": 9.401247602835657e-07, "loss": 4.5023, "step": 24900 }, { "epoch": 1.0733514235258648, "learning_rate": 9.400762783080868e-07, "loss": 4.3031, "step": 24920 }, { "epoch": 1.074212861265452, "learning_rate": 9.400277963326078e-07, "loss": 4.4672, "step": 24940 }, { "epoch": 1.0750742990050395, "learning_rate": 9.399793143571289e-07, "loss": 4.6026, "step": 24960 }, { "epoch": 1.0759357367446267, "learning_rate": 9.399308323816501e-07, "loss": 4.4296, "step": 24980 }, { "epoch": 1.0767971744842142, "learning_rate": 9.398823504061712e-07, "loss": 4.4177, "step": 25000 }, { "epoch": 1.0776586122238014, "learning_rate": 9.398338684306922e-07, "loss": 4.5838, "step": 25020 }, { "epoch": 1.078520049963389, "learning_rate": 9.397853864552134e-07, "loss": 4.3385, "step": 25040 }, { "epoch": 1.0793814877029764, "learning_rate": 9.397369044797345e-07, "loss": 4.4873, "step": 25060 }, { "epoch": 1.0802429254425636, "learning_rate": 9.396884225042555e-07, "loss": 4.6927, "step": 25080 }, { "epoch": 1.081104363182151, "learning_rate": 9.396399405287766e-07, "loss": 4.1841, "step": 25100 }, { "epoch": 1.0819658009217383, "learning_rate": 9.395914585532978e-07, "loss": 4.4341, "step": 25120 }, { "epoch": 1.0828272386613258, "learning_rate": 9.395429765778189e-07, "loss": 4.3862, "step": 25140 }, { "epoch": 1.083688676400913, "learning_rate": 9.394944946023401e-07, "loss": 4.5278, "step": 25160 }, { "epoch": 1.0845501141405005, "learning_rate": 9.394460126268612e-07, "loss": 4.3656, "step": 25180 }, { "epoch": 1.085411551880088, "learning_rate": 9.393975306513822e-07, "loss": 4.3609, "step": 25200 }, { "epoch": 1.0862729896196752, "learning_rate": 9.393490486759033e-07, "loss": 4.432, "step": 25220 }, { "epoch": 1.0871344273592627, "learning_rate": 9.393005667004245e-07, "loss": 4.4633, "step": 25240 }, { "epoch": 1.08799586509885, "learning_rate": 9.392520847249455e-07, "loss": 4.3571, "step": 25260 }, { "epoch": 1.0888573028384374, "learning_rate": 9.392036027494667e-07, "loss": 4.3883, "step": 25280 }, { "epoch": 1.0897187405780246, "learning_rate": 9.391551207739878e-07, "loss": 4.5822, "step": 25300 }, { "epoch": 1.0905801783176121, "learning_rate": 9.39106638798509e-07, "loss": 4.416, "step": 25320 }, { "epoch": 1.0914416160571996, "learning_rate": 9.390581568230299e-07, "loss": 4.3431, "step": 25340 }, { "epoch": 1.0923030537967868, "learning_rate": 9.390096748475511e-07, "loss": 4.5583, "step": 25360 }, { "epoch": 1.0931644915363743, "learning_rate": 9.389611928720722e-07, "loss": 4.3422, "step": 25380 }, { "epoch": 1.0940259292759615, "learning_rate": 9.389127108965933e-07, "loss": 4.4077, "step": 25400 }, { "epoch": 1.094887367015549, "learning_rate": 9.388642289211144e-07, "loss": 4.4079, "step": 25420 }, { "epoch": 1.0957488047551363, "learning_rate": 9.388157469456356e-07, "loss": 4.581, "step": 25440 }, { "epoch": 1.0966102424947237, "learning_rate": 9.387672649701566e-07, "loss": 4.3695, "step": 25460 }, { "epoch": 1.097471680234311, "learning_rate": 9.387187829946776e-07, "loss": 4.2777, "step": 25480 }, { "epoch": 1.0983331179738984, "learning_rate": 9.386703010191988e-07, "loss": 4.5497, "step": 25500 }, { "epoch": 1.099194555713486, "learning_rate": 9.3862181904372e-07, "loss": 4.4497, "step": 25520 }, { "epoch": 1.1000559934530731, "learning_rate": 9.38573337068241e-07, "loss": 4.3623, "step": 25540 }, { "epoch": 1.1009174311926606, "learning_rate": 9.385248550927621e-07, "loss": 4.609, "step": 25560 }, { "epoch": 1.1017788689322479, "learning_rate": 9.384763731172833e-07, "loss": 4.2504, "step": 25580 }, { "epoch": 1.1026403066718353, "learning_rate": 9.384278911418043e-07, "loss": 4.5254, "step": 25600 }, { "epoch": 1.1035017444114226, "learning_rate": 9.383794091663254e-07, "loss": 4.4802, "step": 25620 }, { "epoch": 1.10436318215101, "learning_rate": 9.383309271908465e-07, "loss": 4.5648, "step": 25640 }, { "epoch": 1.1052246198905975, "learning_rate": 9.382824452153677e-07, "loss": 4.4582, "step": 25660 }, { "epoch": 1.1060860576301847, "learning_rate": 9.382339632398888e-07, "loss": 4.5596, "step": 25680 }, { "epoch": 1.1069474953697722, "learning_rate": 9.3818548126441e-07, "loss": 4.4884, "step": 25700 }, { "epoch": 1.1078089331093595, "learning_rate": 9.381369992889309e-07, "loss": 4.3298, "step": 25720 }, { "epoch": 1.108670370848947, "learning_rate": 9.380885173134521e-07, "loss": 4.3468, "step": 25740 }, { "epoch": 1.1095318085885342, "learning_rate": 9.380400353379732e-07, "loss": 4.3481, "step": 25760 }, { "epoch": 1.1103932463281216, "learning_rate": 9.379915533624943e-07, "loss": 4.4193, "step": 25780 }, { "epoch": 1.111254684067709, "learning_rate": 9.379430713870154e-07, "loss": 4.4418, "step": 25800 }, { "epoch": 1.1121161218072964, "learning_rate": 9.378945894115366e-07, "loss": 4.4261, "step": 25820 }, { "epoch": 1.1129775595468838, "learning_rate": 9.378461074360575e-07, "loss": 4.4694, "step": 25840 }, { "epoch": 1.113838997286471, "learning_rate": 9.377976254605787e-07, "loss": 4.3937, "step": 25860 }, { "epoch": 1.1147004350260585, "learning_rate": 9.377491434850998e-07, "loss": 4.2992, "step": 25880 }, { "epoch": 1.1155618727656458, "learning_rate": 9.37700661509621e-07, "loss": 4.5451, "step": 25900 }, { "epoch": 1.1164233105052332, "learning_rate": 9.37652179534142e-07, "loss": 4.4103, "step": 25920 }, { "epoch": 1.1172847482448205, "learning_rate": 9.376036975586632e-07, "loss": 4.3582, "step": 25940 }, { "epoch": 1.118146185984408, "learning_rate": 9.375552155831843e-07, "loss": 4.3502, "step": 25960 }, { "epoch": 1.1190076237239954, "learning_rate": 9.375067336077054e-07, "loss": 4.4556, "step": 25980 }, { "epoch": 1.1198690614635827, "learning_rate": 9.374582516322264e-07, "loss": 4.8482, "step": 26000 }, { "epoch": 1.1207304992031701, "learning_rate": 9.374097696567476e-07, "loss": 4.4736, "step": 26020 }, { "epoch": 1.1215919369427574, "learning_rate": 9.373612876812687e-07, "loss": 4.4255, "step": 26040 }, { "epoch": 1.1224533746823449, "learning_rate": 9.373128057057899e-07, "loss": 4.3318, "step": 26060 }, { "epoch": 1.123314812421932, "learning_rate": 9.372643237303109e-07, "loss": 4.3827, "step": 26080 }, { "epoch": 1.1241762501615196, "learning_rate": 9.372158417548319e-07, "loss": 4.4282, "step": 26100 }, { "epoch": 1.125037687901107, "learning_rate": 9.371673597793531e-07, "loss": 4.4113, "step": 26120 }, { "epoch": 1.1258991256406943, "learning_rate": 9.371188778038741e-07, "loss": 4.3691, "step": 26140 }, { "epoch": 1.1267605633802817, "learning_rate": 9.370703958283953e-07, "loss": 4.469, "step": 26160 }, { "epoch": 1.127622001119869, "learning_rate": 9.370219138529164e-07, "loss": 4.5255, "step": 26180 }, { "epoch": 1.1284834388594565, "learning_rate": 9.369734318774376e-07, "loss": 4.495, "step": 26200 }, { "epoch": 1.1293448765990437, "learning_rate": 9.369249499019586e-07, "loss": 4.4714, "step": 26220 }, { "epoch": 1.1302063143386312, "learning_rate": 9.368764679264797e-07, "loss": 4.4941, "step": 26240 }, { "epoch": 1.1310677520782186, "learning_rate": 9.368279859510008e-07, "loss": 4.457, "step": 26260 }, { "epoch": 1.1319291898178059, "learning_rate": 9.36779503975522e-07, "loss": 4.3587, "step": 26280 }, { "epoch": 1.1327906275573933, "learning_rate": 9.36731022000043e-07, "loss": 4.326, "step": 26300 }, { "epoch": 1.1336520652969806, "learning_rate": 9.366825400245642e-07, "loss": 4.4715, "step": 26320 }, { "epoch": 1.134513503036568, "learning_rate": 9.366340580490853e-07, "loss": 4.5996, "step": 26340 }, { "epoch": 1.1353749407761553, "learning_rate": 9.365855760736064e-07, "loss": 4.1874, "step": 26360 }, { "epoch": 1.1362363785157428, "learning_rate": 9.365370940981274e-07, "loss": 4.2523, "step": 26380 }, { "epoch": 1.1370978162553302, "learning_rate": 9.364886121226486e-07, "loss": 4.4615, "step": 26400 }, { "epoch": 1.1379592539949175, "learning_rate": 9.364401301471697e-07, "loss": 4.8248, "step": 26420 }, { "epoch": 1.138820691734505, "learning_rate": 9.363916481716909e-07, "loss": 4.2533, "step": 26440 }, { "epoch": 1.1396821294740922, "learning_rate": 9.363431661962119e-07, "loss": 4.5715, "step": 26460 }, { "epoch": 1.1405435672136797, "learning_rate": 9.36294684220733e-07, "loss": 4.4302, "step": 26480 }, { "epoch": 1.141405004953267, "learning_rate": 9.362462022452541e-07, "loss": 4.3582, "step": 26500 }, { "epoch": 1.1422664426928544, "learning_rate": 9.361977202697752e-07, "loss": 4.4676, "step": 26520 }, { "epoch": 1.1431278804324418, "learning_rate": 9.361492382942963e-07, "loss": 4.3547, "step": 26540 }, { "epoch": 1.143989318172029, "learning_rate": 9.361007563188174e-07, "loss": 4.2655, "step": 26560 }, { "epoch": 1.1448507559116166, "learning_rate": 9.360522743433386e-07, "loss": 4.364, "step": 26580 }, { "epoch": 1.1457121936512038, "learning_rate": 9.360037923678596e-07, "loss": 4.2633, "step": 26600 }, { "epoch": 1.1465736313907913, "learning_rate": 9.359553103923807e-07, "loss": 4.2595, "step": 26620 }, { "epoch": 1.1474350691303785, "learning_rate": 9.359068284169018e-07, "loss": 4.3531, "step": 26640 }, { "epoch": 1.148296506869966, "learning_rate": 9.35858346441423e-07, "loss": 4.6372, "step": 26660 }, { "epoch": 1.1491579446095535, "learning_rate": 9.35809864465944e-07, "loss": 4.4899, "step": 26680 }, { "epoch": 1.1500193823491407, "learning_rate": 9.357613824904652e-07, "loss": 4.2618, "step": 26700 }, { "epoch": 1.1508808200887282, "learning_rate": 9.357129005149863e-07, "loss": 4.3635, "step": 26720 }, { "epoch": 1.1517422578283154, "learning_rate": 9.356644185395074e-07, "loss": 4.2641, "step": 26740 }, { "epoch": 1.1526036955679029, "learning_rate": 9.356159365640284e-07, "loss": 4.3223, "step": 26760 }, { "epoch": 1.1534651333074901, "learning_rate": 9.355674545885496e-07, "loss": 4.666, "step": 26780 }, { "epoch": 1.1543265710470776, "learning_rate": 9.355189726130707e-07, "loss": 4.4854, "step": 26800 }, { "epoch": 1.155188008786665, "learning_rate": 9.354704906375919e-07, "loss": 4.3282, "step": 26820 }, { "epoch": 1.1560494465262523, "learning_rate": 9.354220086621129e-07, "loss": 4.4205, "step": 26840 }, { "epoch": 1.1569108842658398, "learning_rate": 9.353735266866339e-07, "loss": 4.5329, "step": 26860 }, { "epoch": 1.157772322005427, "learning_rate": 9.353250447111551e-07, "loss": 4.2963, "step": 26880 }, { "epoch": 1.1586337597450145, "learning_rate": 9.352765627356762e-07, "loss": 4.2485, "step": 26900 }, { "epoch": 1.1594951974846017, "learning_rate": 9.352280807601973e-07, "loss": 4.4059, "step": 26920 }, { "epoch": 1.1603566352241892, "learning_rate": 9.351795987847185e-07, "loss": 4.5401, "step": 26940 }, { "epoch": 1.1612180729637767, "learning_rate": 9.351311168092397e-07, "loss": 4.6506, "step": 26960 }, { "epoch": 1.162079510703364, "learning_rate": 9.350826348337606e-07, "loss": 4.3864, "step": 26980 }, { "epoch": 1.1629409484429514, "learning_rate": 9.350341528582817e-07, "loss": 4.5897, "step": 27000 }, { "epoch": 1.1638023861825386, "learning_rate": 9.349856708828029e-07, "loss": 4.4499, "step": 27020 }, { "epoch": 1.164663823922126, "learning_rate": 9.34937188907324e-07, "loss": 4.464, "step": 27040 }, { "epoch": 1.1655252616617133, "learning_rate": 9.348887069318451e-07, "loss": 4.2179, "step": 27060 }, { "epoch": 1.1663866994013008, "learning_rate": 9.348402249563662e-07, "loss": 4.2474, "step": 27080 }, { "epoch": 1.1672481371408883, "learning_rate": 9.347917429808873e-07, "loss": 4.2296, "step": 27100 }, { "epoch": 1.1681095748804755, "learning_rate": 9.347432610054083e-07, "loss": 4.5144, "step": 27120 }, { "epoch": 1.168971012620063, "learning_rate": 9.346947790299295e-07, "loss": 4.4157, "step": 27140 }, { "epoch": 1.1698324503596502, "learning_rate": 9.346462970544506e-07, "loss": 4.4907, "step": 27160 }, { "epoch": 1.1706938880992377, "learning_rate": 9.345978150789717e-07, "loss": 4.4599, "step": 27180 }, { "epoch": 1.171555325838825, "learning_rate": 9.345493331034928e-07, "loss": 4.3646, "step": 27200 }, { "epoch": 1.1724167635784124, "learning_rate": 9.345008511280139e-07, "loss": 4.4938, "step": 27220 }, { "epoch": 1.1732782013179996, "learning_rate": 9.34452369152535e-07, "loss": 4.2979, "step": 27240 }, { "epoch": 1.1741396390575871, "learning_rate": 9.344038871770561e-07, "loss": 4.0727, "step": 27260 }, { "epoch": 1.1750010767971744, "learning_rate": 9.343554052015772e-07, "loss": 4.3746, "step": 27280 }, { "epoch": 1.1758625145367618, "learning_rate": 9.343069232260983e-07, "loss": 4.4847, "step": 27300 }, { "epoch": 1.1767239522763493, "learning_rate": 9.342584412506195e-07, "loss": 4.3683, "step": 27320 }, { "epoch": 1.1775853900159365, "learning_rate": 9.342099592751406e-07, "loss": 4.4623, "step": 27340 }, { "epoch": 1.178446827755524, "learning_rate": 9.341614772996617e-07, "loss": 4.5681, "step": 27360 }, { "epoch": 1.1793082654951113, "learning_rate": 9.341129953241827e-07, "loss": 4.4459, "step": 27380 }, { "epoch": 1.1801697032346987, "learning_rate": 9.340645133487039e-07, "loss": 4.2048, "step": 27400 }, { "epoch": 1.181031140974286, "learning_rate": 9.34016031373225e-07, "loss": 4.2432, "step": 27420 }, { "epoch": 1.1818925787138734, "learning_rate": 9.339675493977461e-07, "loss": 4.5315, "step": 27440 }, { "epoch": 1.182754016453461, "learning_rate": 9.339190674222672e-07, "loss": 4.6568, "step": 27460 }, { "epoch": 1.1836154541930481, "learning_rate": 9.338705854467885e-07, "loss": 4.3739, "step": 27480 }, { "epoch": 1.1844768919326356, "learning_rate": 9.338221034713093e-07, "loss": 4.4015, "step": 27500 }, { "epoch": 1.1853383296722229, "learning_rate": 9.337736214958305e-07, "loss": 4.5714, "step": 27520 }, { "epoch": 1.1861997674118103, "learning_rate": 9.337251395203516e-07, "loss": 4.35, "step": 27540 }, { "epoch": 1.1870612051513976, "learning_rate": 9.336766575448728e-07, "loss": 4.4903, "step": 27560 }, { "epoch": 1.187922642890985, "learning_rate": 9.336281755693938e-07, "loss": 4.487, "step": 27580 }, { "epoch": 1.1887840806305725, "learning_rate": 9.33579693593915e-07, "loss": 4.446, "step": 27600 }, { "epoch": 1.1896455183701597, "learning_rate": 9.33531211618436e-07, "loss": 4.3769, "step": 27620 }, { "epoch": 1.1905069561097472, "learning_rate": 9.33482729642957e-07, "loss": 4.3718, "step": 27640 }, { "epoch": 1.1913683938493345, "learning_rate": 9.334342476674782e-07, "loss": 4.4117, "step": 27660 }, { "epoch": 1.192229831588922, "learning_rate": 9.333857656919994e-07, "loss": 4.3429, "step": 27680 }, { "epoch": 1.1930912693285092, "learning_rate": 9.333372837165205e-07, "loss": 4.334, "step": 27700 }, { "epoch": 1.1939527070680966, "learning_rate": 9.332888017410416e-07, "loss": 4.6125, "step": 27720 }, { "epoch": 1.194814144807684, "learning_rate": 9.332403197655627e-07, "loss": 4.4454, "step": 27740 }, { "epoch": 1.1956755825472714, "learning_rate": 9.331918377900837e-07, "loss": 4.3109, "step": 27760 }, { "epoch": 1.1965370202868588, "learning_rate": 9.331433558146049e-07, "loss": 4.2727, "step": 27780 }, { "epoch": 1.197398458026446, "learning_rate": 9.33094873839126e-07, "loss": 4.4393, "step": 27800 }, { "epoch": 1.1982598957660335, "learning_rate": 9.330463918636471e-07, "loss": 4.3083, "step": 27820 }, { "epoch": 1.1991213335056208, "learning_rate": 9.329979098881682e-07, "loss": 4.4089, "step": 27840 }, { "epoch": 1.1999827712452082, "learning_rate": 9.329494279126894e-07, "loss": 4.2863, "step": 27860 }, { "epoch": 1.2008442089847957, "learning_rate": 9.329009459372103e-07, "loss": 4.3972, "step": 27880 }, { "epoch": 1.201705646724383, "learning_rate": 9.328524639617315e-07, "loss": 4.4722, "step": 27900 }, { "epoch": 1.2025670844639704, "learning_rate": 9.328039819862526e-07, "loss": 4.4498, "step": 27920 }, { "epoch": 1.2034285222035577, "learning_rate": 9.327555000107738e-07, "loss": 4.6628, "step": 27940 }, { "epoch": 1.2042899599431451, "learning_rate": 9.327070180352948e-07, "loss": 4.6212, "step": 27960 }, { "epoch": 1.2051513976827324, "learning_rate": 9.32658536059816e-07, "loss": 4.5064, "step": 27980 }, { "epoch": 1.2060128354223199, "learning_rate": 9.326100540843371e-07, "loss": 4.4675, "step": 28000 }, { "epoch": 1.2068742731619073, "learning_rate": 9.325615721088582e-07, "loss": 4.4763, "step": 28020 }, { "epoch": 1.2077357109014946, "learning_rate": 9.325130901333792e-07, "loss": 4.4464, "step": 28040 }, { "epoch": 1.208597148641082, "learning_rate": 9.324646081579004e-07, "loss": 4.2176, "step": 28060 }, { "epoch": 1.2094585863806693, "learning_rate": 9.324161261824215e-07, "loss": 4.705, "step": 28080 }, { "epoch": 1.2103200241202567, "learning_rate": 9.323676442069426e-07, "loss": 4.3475, "step": 28100 }, { "epoch": 1.211181461859844, "learning_rate": 9.323191622314637e-07, "loss": 4.1616, "step": 28120 }, { "epoch": 1.2120428995994315, "learning_rate": 9.322706802559848e-07, "loss": 4.4239, "step": 28140 }, { "epoch": 1.212904337339019, "learning_rate": 9.322221982805059e-07, "loss": 4.3745, "step": 28160 }, { "epoch": 1.2137657750786062, "learning_rate": 9.321737163050269e-07, "loss": 4.394, "step": 28180 }, { "epoch": 1.2146272128181936, "learning_rate": 9.321252343295481e-07, "loss": 4.3618, "step": 28200 }, { "epoch": 1.2154886505577809, "learning_rate": 9.320767523540693e-07, "loss": 4.3503, "step": 28220 }, { "epoch": 1.2163500882973683, "learning_rate": 9.320282703785904e-07, "loss": 4.4078, "step": 28240 }, { "epoch": 1.2172115260369556, "learning_rate": 9.319797884031113e-07, "loss": 4.2004, "step": 28260 }, { "epoch": 1.218072963776543, "learning_rate": 9.319313064276325e-07, "loss": 4.4605, "step": 28280 }, { "epoch": 1.2189344015161305, "learning_rate": 9.318828244521536e-07, "loss": 4.592, "step": 28300 }, { "epoch": 1.2197958392557178, "learning_rate": 9.318343424766748e-07, "loss": 4.1173, "step": 28320 }, { "epoch": 1.2206572769953052, "learning_rate": 9.317858605011958e-07, "loss": 4.4178, "step": 28340 }, { "epoch": 1.2215187147348925, "learning_rate": 9.31737378525717e-07, "loss": 4.8156, "step": 28360 }, { "epoch": 1.22238015247448, "learning_rate": 9.316888965502381e-07, "loss": 4.591, "step": 28380 }, { "epoch": 1.2232415902140672, "learning_rate": 9.316404145747591e-07, "loss": 4.5912, "step": 28400 }, { "epoch": 1.2241030279536547, "learning_rate": 9.315919325992802e-07, "loss": 4.2252, "step": 28420 }, { "epoch": 1.2249644656932421, "learning_rate": 9.315434506238014e-07, "loss": 4.2596, "step": 28440 }, { "epoch": 1.2258259034328294, "learning_rate": 9.314949686483225e-07, "loss": 4.4703, "step": 28460 }, { "epoch": 1.2266873411724168, "learning_rate": 9.314464866728436e-07, "loss": 4.3343, "step": 28480 }, { "epoch": 1.227548778912004, "learning_rate": 9.313980046973647e-07, "loss": 4.4048, "step": 28500 }, { "epoch": 1.2284102166515916, "learning_rate": 9.313495227218858e-07, "loss": 4.6057, "step": 28520 }, { "epoch": 1.2292716543911788, "learning_rate": 9.313010407464069e-07, "loss": 4.5505, "step": 28540 }, { "epoch": 1.2301330921307663, "learning_rate": 9.31252558770928e-07, "loss": 4.5829, "step": 28560 }, { "epoch": 1.2309945298703537, "learning_rate": 9.312040767954491e-07, "loss": 4.2724, "step": 28580 }, { "epoch": 1.231855967609941, "learning_rate": 9.311555948199703e-07, "loss": 4.5347, "step": 28600 }, { "epoch": 1.2327174053495285, "learning_rate": 9.311071128444914e-07, "loss": 4.3547, "step": 28620 }, { "epoch": 1.2335788430891157, "learning_rate": 9.310586308690123e-07, "loss": 4.5261, "step": 28640 }, { "epoch": 1.2344402808287032, "learning_rate": 9.310101488935335e-07, "loss": 4.5067, "step": 28660 }, { "epoch": 1.2353017185682904, "learning_rate": 9.309616669180547e-07, "loss": 4.4154, "step": 28680 }, { "epoch": 1.2361631563078779, "learning_rate": 9.309131849425757e-07, "loss": 4.2921, "step": 28700 }, { "epoch": 1.2370245940474653, "learning_rate": 9.308647029670968e-07, "loss": 4.5793, "step": 28720 }, { "epoch": 1.2378860317870526, "learning_rate": 9.308162209916181e-07, "loss": 4.3936, "step": 28740 }, { "epoch": 1.23874746952664, "learning_rate": 9.307677390161391e-07, "loss": 4.2317, "step": 28760 }, { "epoch": 1.2396089072662273, "learning_rate": 9.307192570406601e-07, "loss": 4.1972, "step": 28780 }, { "epoch": 1.2404703450058148, "learning_rate": 9.306707750651812e-07, "loss": 4.4496, "step": 28800 }, { "epoch": 1.241331782745402, "learning_rate": 9.306222930897024e-07, "loss": 4.4471, "step": 28820 }, { "epoch": 1.2421932204849895, "learning_rate": 9.305738111142235e-07, "loss": 4.3669, "step": 28840 }, { "epoch": 1.2430546582245767, "learning_rate": 9.305253291387446e-07, "loss": 4.6186, "step": 28860 }, { "epoch": 1.2439160959641642, "learning_rate": 9.304768471632657e-07, "loss": 4.374, "step": 28880 }, { "epoch": 1.2447775337037514, "learning_rate": 9.304283651877868e-07, "loss": 4.423, "step": 28900 }, { "epoch": 1.245638971443339, "learning_rate": 9.303798832123079e-07, "loss": 4.5189, "step": 28920 }, { "epoch": 1.2465004091829264, "learning_rate": 9.30331401236829e-07, "loss": 4.215, "step": 28940 }, { "epoch": 1.2473618469225136, "learning_rate": 9.302829192613501e-07, "loss": 4.4685, "step": 28960 }, { "epoch": 1.248223284662101, "learning_rate": 9.302344372858713e-07, "loss": 4.2605, "step": 28980 }, { "epoch": 1.2490847224016883, "learning_rate": 9.301859553103923e-07, "loss": 4.5794, "step": 29000 }, { "epoch": 1.2499461601412758, "learning_rate": 9.301374733349134e-07, "loss": 4.3712, "step": 29020 }, { "epoch": 1.250807597880863, "learning_rate": 9.300889913594345e-07, "loss": 4.2, "step": 29040 }, { "epoch": 1.2516690356204505, "learning_rate": 9.300405093839557e-07, "loss": 4.3306, "step": 29060 }, { "epoch": 1.252530473360038, "learning_rate": 9.299920274084767e-07, "loss": 4.4159, "step": 29080 }, { "epoch": 1.2533919110996252, "learning_rate": 9.299435454329979e-07, "loss": 4.5091, "step": 29100 }, { "epoch": 1.2542533488392127, "learning_rate": 9.29895063457519e-07, "loss": 4.2013, "step": 29120 }, { "epoch": 1.2551147865788, "learning_rate": 9.298465814820402e-07, "loss": 4.3171, "step": 29140 }, { "epoch": 1.2559762243183874, "learning_rate": 9.297980995065611e-07, "loss": 4.5649, "step": 29160 }, { "epoch": 1.2568376620579746, "learning_rate": 9.297496175310823e-07, "loss": 5.0627, "step": 29180 }, { "epoch": 1.2576990997975621, "learning_rate": 9.297011355556034e-07, "loss": 7.4767, "step": 29200 }, { "epoch": 1.2585605375371496, "learning_rate": 9.296526535801246e-07, "loss": 9.1112, "step": 29220 }, { "epoch": 1.2594219752767368, "learning_rate": 9.296041716046456e-07, "loss": 9.0257, "step": 29240 }, { "epoch": 1.2602834130163243, "learning_rate": 9.295556896291667e-07, "loss": 8.9185, "step": 29260 }, { "epoch": 1.2611448507559115, "learning_rate": 9.295072076536878e-07, "loss": 9.0897, "step": 29280 }, { "epoch": 1.262006288495499, "learning_rate": 9.29458725678209e-07, "loss": 8.6666, "step": 29300 }, { "epoch": 1.2628677262350863, "learning_rate": 9.2941024370273e-07, "loss": 8.6319, "step": 29320 }, { "epoch": 1.2637291639746737, "learning_rate": 9.293617617272511e-07, "loss": 8.7006, "step": 29340 }, { "epoch": 1.2645906017142612, "learning_rate": 9.293132797517723e-07, "loss": 8.5091, "step": 29360 }, { "epoch": 1.2654520394538484, "learning_rate": 9.292647977762933e-07, "loss": 8.6262, "step": 29380 }, { "epoch": 1.266313477193436, "learning_rate": 9.292163158008144e-07, "loss": 8.5565, "step": 29400 }, { "epoch": 1.2671749149330231, "learning_rate": 9.291678338253355e-07, "loss": 8.1368, "step": 29420 }, { "epoch": 1.2680363526726106, "learning_rate": 9.291193518498567e-07, "loss": 8.4045, "step": 29440 }, { "epoch": 1.2688977904121979, "learning_rate": 9.290708698743777e-07, "loss": 8.4389, "step": 29460 }, { "epoch": 1.2697592281517853, "learning_rate": 9.290223878988989e-07, "loss": 8.1549, "step": 29480 }, { "epoch": 1.2706206658913728, "learning_rate": 9.2897390592342e-07, "loss": 8.3515, "step": 29500 }, { "epoch": 1.27148210363096, "learning_rate": 9.289254239479412e-07, "loss": 8.0843, "step": 29520 }, { "epoch": 1.2723435413705475, "learning_rate": 9.288769419724621e-07, "loss": 7.5458, "step": 29540 }, { "epoch": 1.2732049791101347, "learning_rate": 9.288284599969833e-07, "loss": 7.7321, "step": 29560 }, { "epoch": 1.2740664168497222, "learning_rate": 9.287799780215045e-07, "loss": 7.8405, "step": 29580 }, { "epoch": 1.2749278545893095, "learning_rate": 9.287314960460256e-07, "loss": 7.7172, "step": 29600 }, { "epoch": 1.275789292328897, "learning_rate": 9.286830140705466e-07, "loss": 8.0011, "step": 29620 }, { "epoch": 1.2766507300684844, "learning_rate": 9.286345320950678e-07, "loss": 7.8851, "step": 29640 }, { "epoch": 1.2775121678080716, "learning_rate": 9.285860501195888e-07, "loss": 7.7286, "step": 29660 }, { "epoch": 1.278373605547659, "learning_rate": 9.285375681441099e-07, "loss": 8.2685, "step": 29680 }, { "epoch": 1.2792350432872464, "learning_rate": 9.28489086168631e-07, "loss": 8.1651, "step": 29700 }, { "epoch": 1.2800964810268338, "learning_rate": 9.284406041931522e-07, "loss": 7.8719, "step": 29720 }, { "epoch": 1.280957918766421, "learning_rate": 9.283921222176733e-07, "loss": 8.0637, "step": 29740 }, { "epoch": 1.2818193565060085, "learning_rate": 9.283436402421944e-07, "loss": 7.9024, "step": 29760 }, { "epoch": 1.282680794245596, "learning_rate": 9.282951582667155e-07, "loss": 7.7219, "step": 29780 }, { "epoch": 1.2835422319851832, "learning_rate": 9.282466762912365e-07, "loss": 7.7878, "step": 29800 }, { "epoch": 1.2844036697247707, "learning_rate": 9.281981943157577e-07, "loss": 7.6931, "step": 29820 }, { "epoch": 1.285265107464358, "learning_rate": 9.281497123402788e-07, "loss": 7.3598, "step": 29840 }, { "epoch": 1.2861265452039454, "learning_rate": 9.281012303647999e-07, "loss": 7.7623, "step": 29860 }, { "epoch": 1.2869879829435327, "learning_rate": 9.28052748389321e-07, "loss": 7.6945, "step": 29880 }, { "epoch": 1.2878494206831201, "learning_rate": 9.280042664138422e-07, "loss": 7.594, "step": 29900 }, { "epoch": 1.2887108584227076, "learning_rate": 9.279557844383631e-07, "loss": 7.5393, "step": 29920 }, { "epoch": 1.2895722961622949, "learning_rate": 9.279073024628843e-07, "loss": 7.4495, "step": 29940 }, { "epoch": 1.2904337339018823, "learning_rate": 9.278588204874054e-07, "loss": 7.7063, "step": 29960 }, { "epoch": 1.2912951716414696, "learning_rate": 9.278103385119265e-07, "loss": 7.7734, "step": 29980 }, { "epoch": 1.292156609381057, "learning_rate": 9.277618565364475e-07, "loss": 7.7084, "step": 30000 }, { "epoch": 1.2930180471206443, "learning_rate": 9.277133745609688e-07, "loss": 7.6194, "step": 30020 }, { "epoch": 1.2938794848602317, "learning_rate": 9.276648925854898e-07, "loss": 7.8689, "step": 30040 }, { "epoch": 1.2947409225998192, "learning_rate": 9.276164106100109e-07, "loss": 7.2148, "step": 30060 }, { "epoch": 1.2956023603394065, "learning_rate": 9.27567928634532e-07, "loss": 7.7463, "step": 30080 }, { "epoch": 1.296463798078994, "learning_rate": 9.275194466590532e-07, "loss": 7.7451, "step": 30100 }, { "epoch": 1.2973252358185812, "learning_rate": 9.274709646835743e-07, "loss": 7.6534, "step": 30120 }, { "epoch": 1.2981866735581686, "learning_rate": 9.274224827080954e-07, "loss": 7.6972, "step": 30140 }, { "epoch": 1.2990481112977559, "learning_rate": 9.273740007326165e-07, "loss": 7.3694, "step": 30160 }, { "epoch": 1.2999095490373433, "learning_rate": 9.273255187571376e-07, "loss": 7.2392, "step": 30180 }, { "epoch": 1.3007709867769308, "learning_rate": 9.272770367816587e-07, "loss": 7.5428, "step": 30200 }, { "epoch": 1.301632424516518, "learning_rate": 9.272285548061798e-07, "loss": 7.6846, "step": 30220 }, { "epoch": 1.3024938622561053, "learning_rate": 9.271800728307009e-07, "loss": 7.8115, "step": 30240 }, { "epoch": 1.3033552999956928, "learning_rate": 9.271315908552221e-07, "loss": 7.3245, "step": 30260 }, { "epoch": 1.3042167377352802, "learning_rate": 9.270831088797431e-07, "loss": 6.7679, "step": 30280 }, { "epoch": 1.3050781754748675, "learning_rate": 9.270346269042642e-07, "loss": 7.2834, "step": 30300 }, { "epoch": 1.305939613214455, "learning_rate": 9.269861449287853e-07, "loss": 7.2348, "step": 30320 }, { "epoch": 1.3068010509540424, "learning_rate": 9.269376629533064e-07, "loss": 7.0163, "step": 30340 }, { "epoch": 1.3076624886936297, "learning_rate": 9.268891809778275e-07, "loss": 7.1488, "step": 30360 }, { "epoch": 1.308523926433217, "learning_rate": 9.268406990023487e-07, "loss": 7.7662, "step": 30380 }, { "epoch": 1.3093853641728044, "learning_rate": 9.267922170268698e-07, "loss": 7.1247, "step": 30400 }, { "epoch": 1.3102468019123918, "learning_rate": 9.267437350513908e-07, "loss": 7.4735, "step": 30420 }, { "epoch": 1.311108239651979, "learning_rate": 9.266952530759119e-07, "loss": 7.2543, "step": 30440 }, { "epoch": 1.3119696773915666, "learning_rate": 9.26646771100433e-07, "loss": 7.1354, "step": 30460 }, { "epoch": 1.312831115131154, "learning_rate": 9.265982891249542e-07, "loss": 7.3775, "step": 30480 }, { "epoch": 1.3136925528707413, "learning_rate": 9.265498071494753e-07, "loss": 6.9614, "step": 30500 }, { "epoch": 1.3145539906103285, "learning_rate": 9.265013251739965e-07, "loss": 7.279, "step": 30520 }, { "epoch": 1.315415428349916, "learning_rate": 9.264528431985175e-07, "loss": 7.148, "step": 30540 }, { "epoch": 1.3162768660895035, "learning_rate": 9.264043612230386e-07, "loss": 6.9021, "step": 30560 }, { "epoch": 1.3171383038290907, "learning_rate": 9.263558792475596e-07, "loss": 7.1499, "step": 30580 }, { "epoch": 1.3179997415686782, "learning_rate": 9.263073972720808e-07, "loss": 7.1387, "step": 30600 }, { "epoch": 1.3188611793082656, "learning_rate": 9.262589152966019e-07, "loss": 7.1563, "step": 30620 }, { "epoch": 1.3197226170478529, "learning_rate": 9.262104333211231e-07, "loss": 6.9685, "step": 30640 }, { "epoch": 1.3205840547874401, "learning_rate": 9.261619513456441e-07, "loss": 7.0773, "step": 30660 }, { "epoch": 1.3214454925270276, "learning_rate": 9.261134693701652e-07, "loss": 7.239, "step": 30680 }, { "epoch": 1.322306930266615, "learning_rate": 9.260649873946863e-07, "loss": 7.333, "step": 30700 }, { "epoch": 1.3231683680062023, "learning_rate": 9.260165054192075e-07, "loss": 7.1882, "step": 30720 }, { "epoch": 1.3240298057457898, "learning_rate": 9.259680234437285e-07, "loss": 7.0352, "step": 30740 }, { "epoch": 1.324891243485377, "learning_rate": 9.259195414682497e-07, "loss": 7.3077, "step": 30760 }, { "epoch": 1.3257526812249645, "learning_rate": 9.258710594927708e-07, "loss": 7.2401, "step": 30780 }, { "epoch": 1.3266141189645517, "learning_rate": 9.258225775172919e-07, "loss": 6.9523, "step": 30800 }, { "epoch": 1.3274755567041392, "learning_rate": 9.257740955418129e-07, "loss": 6.9704, "step": 30820 }, { "epoch": 1.3283369944437267, "learning_rate": 9.257256135663341e-07, "loss": 7.0696, "step": 30840 }, { "epoch": 1.329198432183314, "learning_rate": 9.256771315908552e-07, "loss": 6.9114, "step": 30860 }, { "epoch": 1.3300598699229014, "learning_rate": 9.256286496153762e-07, "loss": 7.0071, "step": 30880 }, { "epoch": 1.3309213076624886, "learning_rate": 9.255801676398974e-07, "loss": 7.0653, "step": 30900 }, { "epoch": 1.331782745402076, "learning_rate": 9.255316856644186e-07, "loss": 6.9926, "step": 30920 }, { "epoch": 1.3326441831416633, "learning_rate": 9.254832036889396e-07, "loss": 6.9443, "step": 30940 }, { "epoch": 1.3335056208812508, "learning_rate": 9.254347217134606e-07, "loss": 7.1948, "step": 30960 }, { "epoch": 1.3343670586208383, "learning_rate": 9.253862397379818e-07, "loss": 7.0766, "step": 30980 }, { "epoch": 1.3352284963604255, "learning_rate": 9.253377577625029e-07, "loss": 7.1149, "step": 31000 }, { "epoch": 1.336089934100013, "learning_rate": 9.252892757870241e-07, "loss": 7.3839, "step": 31020 }, { "epoch": 1.3369513718396002, "learning_rate": 9.252407938115451e-07, "loss": 7.0784, "step": 31040 }, { "epoch": 1.3378128095791877, "learning_rate": 9.251923118360662e-07, "loss": 6.9548, "step": 31060 }, { "epoch": 1.338674247318775, "learning_rate": 9.251438298605873e-07, "loss": 7.0966, "step": 31080 }, { "epoch": 1.3395356850583624, "learning_rate": 9.250953478851085e-07, "loss": 6.9126, "step": 31100 }, { "epoch": 1.3403971227979499, "learning_rate": 9.250468659096295e-07, "loss": 6.8928, "step": 31120 }, { "epoch": 1.3412585605375371, "learning_rate": 9.249983839341507e-07, "loss": 7.2436, "step": 31140 }, { "epoch": 1.3421199982771246, "learning_rate": 9.249499019586718e-07, "loss": 6.7896, "step": 31160 }, { "epoch": 1.3429814360167118, "learning_rate": 9.249014199831928e-07, "loss": 7.2349, "step": 31180 }, { "epoch": 1.3438428737562993, "learning_rate": 9.248529380077139e-07, "loss": 6.7734, "step": 31200 }, { "epoch": 1.3447043114958865, "learning_rate": 9.248044560322351e-07, "loss": 7.0566, "step": 31220 }, { "epoch": 1.345565749235474, "learning_rate": 9.247559740567562e-07, "loss": 7.0811, "step": 31240 }, { "epoch": 1.3464271869750615, "learning_rate": 9.247074920812773e-07, "loss": 7.2334, "step": 31260 }, { "epoch": 1.3472886247146487, "learning_rate": 9.246590101057984e-07, "loss": 7.1606, "step": 31280 }, { "epoch": 1.3481500624542362, "learning_rate": 9.246105281303196e-07, "loss": 7.0721, "step": 31300 }, { "epoch": 1.3490115001938234, "learning_rate": 9.245620461548406e-07, "loss": 6.8343, "step": 31320 }, { "epoch": 1.349872937933411, "learning_rate": 9.245135641793617e-07, "loss": 7.0205, "step": 31340 }, { "epoch": 1.3507343756729981, "learning_rate": 9.244650822038829e-07, "loss": 7.0218, "step": 31360 }, { "epoch": 1.3515958134125856, "learning_rate": 9.24416600228404e-07, "loss": 6.9216, "step": 31380 }, { "epoch": 1.352457251152173, "learning_rate": 9.243681182529251e-07, "loss": 7.1311, "step": 31400 }, { "epoch": 1.3533186888917603, "learning_rate": 9.243196362774461e-07, "loss": 6.7834, "step": 31420 }, { "epoch": 1.3541801266313478, "learning_rate": 9.242711543019672e-07, "loss": 6.7601, "step": 31440 }, { "epoch": 1.355041564370935, "learning_rate": 9.242226723264884e-07, "loss": 6.9749, "step": 31460 }, { "epoch": 1.3559030021105225, "learning_rate": 9.241741903510094e-07, "loss": 6.8912, "step": 31480 }, { "epoch": 1.3567644398501097, "learning_rate": 9.241257083755305e-07, "loss": 7.0472, "step": 31500 }, { "epoch": 1.3576258775896972, "learning_rate": 9.240772264000517e-07, "loss": 7.0801, "step": 31520 }, { "epoch": 1.3584873153292847, "learning_rate": 9.240287444245728e-07, "loss": 6.9824, "step": 31540 }, { "epoch": 1.359348753068872, "learning_rate": 9.23980262449094e-07, "loss": 6.8021, "step": 31560 }, { "epoch": 1.3602101908084594, "learning_rate": 9.239317804736149e-07, "loss": 6.6614, "step": 31580 }, { "epoch": 1.3610716285480466, "learning_rate": 9.238832984981361e-07, "loss": 7.0195, "step": 31600 }, { "epoch": 1.361933066287634, "learning_rate": 9.238348165226572e-07, "loss": 6.989, "step": 31620 }, { "epoch": 1.3627945040272214, "learning_rate": 9.237863345471783e-07, "loss": 6.6913, "step": 31640 }, { "epoch": 1.3636559417668088, "learning_rate": 9.237378525716994e-07, "loss": 6.7222, "step": 31660 }, { "epoch": 1.3645173795063963, "learning_rate": 9.236893705962206e-07, "loss": 6.9444, "step": 31680 }, { "epoch": 1.3653788172459835, "learning_rate": 9.236408886207416e-07, "loss": 6.6173, "step": 31700 }, { "epoch": 1.366240254985571, "learning_rate": 9.235924066452627e-07, "loss": 6.9346, "step": 31720 }, { "epoch": 1.3671016927251582, "learning_rate": 9.235439246697838e-07, "loss": 6.9446, "step": 31740 }, { "epoch": 1.3679631304647457, "learning_rate": 9.23495442694305e-07, "loss": 7.1068, "step": 31760 }, { "epoch": 1.368824568204333, "learning_rate": 9.234469607188259e-07, "loss": 6.7589, "step": 31780 }, { "epoch": 1.3696860059439204, "learning_rate": 9.233984787433472e-07, "loss": 6.7636, "step": 31800 }, { "epoch": 1.370547443683508, "learning_rate": 9.233499967678682e-07, "loss": 6.7678, "step": 31820 }, { "epoch": 1.3714088814230951, "learning_rate": 9.233015147923894e-07, "loss": 6.8524, "step": 31840 }, { "epoch": 1.3722703191626824, "learning_rate": 9.232530328169104e-07, "loss": 6.9558, "step": 31860 }, { "epoch": 1.3731317569022699, "learning_rate": 9.232045508414316e-07, "loss": 6.7935, "step": 31880 }, { "epoch": 1.3739931946418573, "learning_rate": 9.231560688659527e-07, "loss": 6.6628, "step": 31900 }, { "epoch": 1.3748546323814446, "learning_rate": 9.231075868904739e-07, "loss": 6.8103, "step": 31920 }, { "epoch": 1.375716070121032, "learning_rate": 9.230591049149949e-07, "loss": 6.5024, "step": 31940 }, { "epoch": 1.3765775078606195, "learning_rate": 9.230106229395159e-07, "loss": 6.9965, "step": 31960 }, { "epoch": 1.3774389456002067, "learning_rate": 9.229621409640371e-07, "loss": 6.8808, "step": 31980 }, { "epoch": 1.378300383339794, "learning_rate": 9.229136589885583e-07, "loss": 6.7527, "step": 32000 }, { "epoch": 1.3791618210793815, "learning_rate": 9.228651770130793e-07, "loss": 6.7427, "step": 32020 }, { "epoch": 1.380023258818969, "learning_rate": 9.228166950376004e-07, "loss": 6.4327, "step": 32040 }, { "epoch": 1.3808846965585562, "learning_rate": 9.227682130621216e-07, "loss": 6.7198, "step": 32060 }, { "epoch": 1.3817461342981436, "learning_rate": 9.227197310866425e-07, "loss": 6.6045, "step": 32080 }, { "epoch": 1.382607572037731, "learning_rate": 9.226712491111637e-07, "loss": 6.5851, "step": 32100 }, { "epoch": 1.3834690097773183, "learning_rate": 9.226227671356848e-07, "loss": 6.809, "step": 32120 }, { "epoch": 1.3843304475169056, "learning_rate": 9.22574285160206e-07, "loss": 6.5905, "step": 32140 }, { "epoch": 1.385191885256493, "learning_rate": 9.22525803184727e-07, "loss": 6.8906, "step": 32160 }, { "epoch": 1.3860533229960805, "learning_rate": 9.224773212092482e-07, "loss": 6.6777, "step": 32180 }, { "epoch": 1.3869147607356678, "learning_rate": 9.224288392337692e-07, "loss": 6.707, "step": 32200 }, { "epoch": 1.3877761984752552, "learning_rate": 9.223803572582904e-07, "loss": 6.6338, "step": 32220 }, { "epoch": 1.3886376362148427, "learning_rate": 9.223318752828114e-07, "loss": 6.8514, "step": 32240 }, { "epoch": 1.38949907395443, "learning_rate": 9.222833933073326e-07, "loss": 6.6803, "step": 32260 }, { "epoch": 1.3903605116940172, "learning_rate": 9.222349113318537e-07, "loss": 6.7469, "step": 32280 }, { "epoch": 1.3912219494336047, "learning_rate": 9.22186429356375e-07, "loss": 6.7612, "step": 32300 }, { "epoch": 1.3920833871731921, "learning_rate": 9.221379473808959e-07, "loss": 6.8637, "step": 32320 }, { "epoch": 1.3929448249127794, "learning_rate": 9.22089465405417e-07, "loss": 6.6512, "step": 32340 }, { "epoch": 1.3938062626523668, "learning_rate": 9.220409834299381e-07, "loss": 6.5928, "step": 32360 }, { "epoch": 1.394667700391954, "learning_rate": 9.219925014544593e-07, "loss": 6.5807, "step": 32380 }, { "epoch": 1.3955291381315416, "learning_rate": 9.219440194789803e-07, "loss": 6.5286, "step": 32400 }, { "epoch": 1.3963905758711288, "learning_rate": 9.218955375035015e-07, "loss": 6.4358, "step": 32420 }, { "epoch": 1.3972520136107163, "learning_rate": 9.218470555280226e-07, "loss": 6.7531, "step": 32440 }, { "epoch": 1.3981134513503037, "learning_rate": 9.217985735525436e-07, "loss": 6.7327, "step": 32460 }, { "epoch": 1.398974889089891, "learning_rate": 9.217500915770647e-07, "loss": 6.9049, "step": 32480 }, { "epoch": 1.3998363268294785, "learning_rate": 9.217016096015858e-07, "loss": 6.744, "step": 32500 }, { "epoch": 1.4006977645690657, "learning_rate": 9.21653127626107e-07, "loss": 6.7219, "step": 32520 }, { "epoch": 1.4015592023086532, "learning_rate": 9.216046456506281e-07, "loss": 6.7781, "step": 32540 }, { "epoch": 1.4024206400482404, "learning_rate": 9.215561636751492e-07, "loss": 6.7637, "step": 32560 }, { "epoch": 1.4032820777878279, "learning_rate": 9.215076816996703e-07, "loss": 6.7455, "step": 32580 }, { "epoch": 1.4041435155274153, "learning_rate": 9.214591997241914e-07, "loss": 6.5962, "step": 32600 }, { "epoch": 1.4050049532670026, "learning_rate": 9.214107177487125e-07, "loss": 6.4006, "step": 32620 }, { "epoch": 1.40586639100659, "learning_rate": 9.213622357732336e-07, "loss": 6.4812, "step": 32640 }, { "epoch": 1.4067278287461773, "learning_rate": 9.213137537977547e-07, "loss": 6.7662, "step": 32660 }, { "epoch": 1.4075892664857648, "learning_rate": 9.212652718222759e-07, "loss": 6.7259, "step": 32680 }, { "epoch": 1.408450704225352, "learning_rate": 9.212167898467969e-07, "loss": 6.7009, "step": 32700 }, { "epoch": 1.4093121419649395, "learning_rate": 9.21168307871318e-07, "loss": 6.4475, "step": 32720 }, { "epoch": 1.410173579704527, "learning_rate": 9.211198258958391e-07, "loss": 6.4441, "step": 32740 }, { "epoch": 1.4110350174441142, "learning_rate": 9.210713439203602e-07, "loss": 6.6392, "step": 32760 }, { "epoch": 1.4118964551837017, "learning_rate": 9.210228619448813e-07, "loss": 6.5847, "step": 32780 }, { "epoch": 1.412757892923289, "learning_rate": 9.209743799694025e-07, "loss": 6.5724, "step": 32800 }, { "epoch": 1.4136193306628764, "learning_rate": 9.209258979939236e-07, "loss": 6.7541, "step": 32820 }, { "epoch": 1.4144807684024636, "learning_rate": 9.208774160184446e-07, "loss": 6.5684, "step": 32840 }, { "epoch": 1.415342206142051, "learning_rate": 9.208289340429657e-07, "loss": 6.4602, "step": 32860 }, { "epoch": 1.4162036438816386, "learning_rate": 9.207804520674869e-07, "loss": 6.7454, "step": 32880 }, { "epoch": 1.4170650816212258, "learning_rate": 9.20731970092008e-07, "loss": 6.5201, "step": 32900 }, { "epoch": 1.4179265193608133, "learning_rate": 9.206834881165291e-07, "loss": 6.9773, "step": 32920 }, { "epoch": 1.4187879571004005, "learning_rate": 9.206350061410502e-07, "loss": 6.6975, "step": 32940 }, { "epoch": 1.419649394839988, "learning_rate": 9.205865241655713e-07, "loss": 6.451, "step": 32960 }, { "epoch": 1.4205108325795752, "learning_rate": 9.205380421900924e-07, "loss": 6.311, "step": 32980 }, { "epoch": 1.4213722703191627, "learning_rate": 9.204895602146135e-07, "loss": 6.6721, "step": 33000 }, { "epoch": 1.4222337080587502, "learning_rate": 9.204410782391346e-07, "loss": 6.5873, "step": 33020 }, { "epoch": 1.4230951457983374, "learning_rate": 9.203925962636556e-07, "loss": 6.7574, "step": 33040 }, { "epoch": 1.4239565835379249, "learning_rate": 9.203441142881769e-07, "loss": 6.7247, "step": 33060 }, { "epoch": 1.4248180212775121, "learning_rate": 9.20295632312698e-07, "loss": 6.7455, "step": 33080 }, { "epoch": 1.4256794590170996, "learning_rate": 9.20247150337219e-07, "loss": 6.559, "step": 33100 }, { "epoch": 1.4265408967566868, "learning_rate": 9.201986683617401e-07, "loss": 6.2385, "step": 33120 }, { "epoch": 1.4274023344962743, "learning_rate": 9.201501863862613e-07, "loss": 6.4429, "step": 33140 }, { "epoch": 1.4282637722358618, "learning_rate": 9.201017044107823e-07, "loss": 6.7354, "step": 33160 }, { "epoch": 1.429125209975449, "learning_rate": 9.200532224353035e-07, "loss": 6.3568, "step": 33180 }, { "epoch": 1.4299866477150365, "learning_rate": 9.200047404598246e-07, "loss": 6.4991, "step": 33200 }, { "epoch": 1.4308480854546237, "learning_rate": 9.199562584843456e-07, "loss": 6.6018, "step": 33220 }, { "epoch": 1.4317095231942112, "learning_rate": 9.199077765088667e-07, "loss": 6.5083, "step": 33240 }, { "epoch": 1.4325709609337984, "learning_rate": 9.198592945333879e-07, "loss": 6.408, "step": 33260 }, { "epoch": 1.433432398673386, "learning_rate": 9.19810812557909e-07, "loss": 6.7178, "step": 33280 }, { "epoch": 1.4342938364129734, "learning_rate": 9.197623305824301e-07, "loss": 6.3813, "step": 33300 }, { "epoch": 1.4351552741525606, "learning_rate": 9.197138486069512e-07, "loss": 6.601, "step": 33320 }, { "epoch": 1.4360167118921479, "learning_rate": 9.196653666314724e-07, "loss": 6.7897, "step": 33340 }, { "epoch": 1.4368781496317353, "learning_rate": 9.196168846559933e-07, "loss": 6.7652, "step": 33360 }, { "epoch": 1.4377395873713228, "learning_rate": 9.195684026805145e-07, "loss": 6.7107, "step": 33380 }, { "epoch": 1.43860102511091, "learning_rate": 9.195199207050356e-07, "loss": 5.7782, "step": 33400 }, { "epoch": 1.4394624628504975, "learning_rate": 9.194714387295568e-07, "loss": 5.4448, "step": 33420 }, { "epoch": 1.440323900590085, "learning_rate": 9.194229567540778e-07, "loss": 5.6638, "step": 33440 }, { "epoch": 1.4411853383296722, "learning_rate": 9.19374474778599e-07, "loss": 5.1264, "step": 33460 }, { "epoch": 1.4420467760692595, "learning_rate": 9.1932599280312e-07, "loss": 5.0293, "step": 33480 }, { "epoch": 1.442908213808847, "learning_rate": 9.192775108276412e-07, "loss": 4.9955, "step": 33500 }, { "epoch": 1.4437696515484344, "learning_rate": 9.192290288521622e-07, "loss": 4.7765, "step": 33520 }, { "epoch": 1.4446310892880216, "learning_rate": 9.191805468766834e-07, "loss": 4.6915, "step": 33540 }, { "epoch": 1.445492527027609, "learning_rate": 9.191320649012044e-07, "loss": 4.6286, "step": 33560 }, { "epoch": 1.4463539647671966, "learning_rate": 9.190835829257256e-07, "loss": 4.7875, "step": 33580 }, { "epoch": 1.4472154025067838, "learning_rate": 9.190351009502466e-07, "loss": 4.5608, "step": 33600 }, { "epoch": 1.448076840246371, "learning_rate": 9.189866189747678e-07, "loss": 4.3453, "step": 33620 }, { "epoch": 1.4489382779859585, "learning_rate": 9.189381369992889e-07, "loss": 4.8078, "step": 33640 }, { "epoch": 1.449799715725546, "learning_rate": 9.1888965502381e-07, "loss": 4.7774, "step": 33660 }, { "epoch": 1.4506611534651332, "learning_rate": 9.188411730483311e-07, "loss": 4.8069, "step": 33680 }, { "epoch": 1.4515225912047207, "learning_rate": 9.187926910728522e-07, "loss": 4.5375, "step": 33700 }, { "epoch": 1.4523840289443082, "learning_rate": 9.187442090973734e-07, "loss": 4.6969, "step": 33720 }, { "epoch": 1.4532454666838954, "learning_rate": 9.186957271218943e-07, "loss": 4.6461, "step": 33740 }, { "epoch": 1.4541069044234827, "learning_rate": 9.186472451464155e-07, "loss": 4.6505, "step": 33760 }, { "epoch": 1.4549683421630701, "learning_rate": 9.185987631709366e-07, "loss": 4.4535, "step": 33780 }, { "epoch": 1.4558297799026576, "learning_rate": 9.185502811954578e-07, "loss": 4.8117, "step": 33800 }, { "epoch": 1.4566912176422449, "learning_rate": 9.185017992199788e-07, "loss": 4.6628, "step": 33820 }, { "epoch": 1.4575526553818323, "learning_rate": 9.184533172445e-07, "loss": 4.6842, "step": 33840 }, { "epoch": 1.4584140931214198, "learning_rate": 9.18404835269021e-07, "loss": 4.4481, "step": 33860 }, { "epoch": 1.459275530861007, "learning_rate": 9.183563532935422e-07, "loss": 4.7234, "step": 33880 }, { "epoch": 1.4601369686005943, "learning_rate": 9.183078713180632e-07, "loss": 4.6209, "step": 33900 }, { "epoch": 1.4609984063401817, "learning_rate": 9.182593893425844e-07, "loss": 4.486, "step": 33920 }, { "epoch": 1.4618598440797692, "learning_rate": 9.182109073671055e-07, "loss": 4.6013, "step": 33940 }, { "epoch": 1.4627212818193565, "learning_rate": 9.181624253916267e-07, "loss": 4.4464, "step": 33960 }, { "epoch": 1.463582719558944, "learning_rate": 9.181139434161476e-07, "loss": 4.5521, "step": 33980 }, { "epoch": 1.4644441572985312, "learning_rate": 9.180654614406688e-07, "loss": 4.5197, "step": 34000 }, { "epoch": 1.4653055950381186, "learning_rate": 9.180169794651899e-07, "loss": 4.3679, "step": 34020 }, { "epoch": 1.4661670327777059, "learning_rate": 9.17968497489711e-07, "loss": 4.7741, "step": 34040 }, { "epoch": 1.4670284705172933, "learning_rate": 9.179200155142321e-07, "loss": 4.8057, "step": 34060 }, { "epoch": 1.4678899082568808, "learning_rate": 9.178715335387534e-07, "loss": 4.4689, "step": 34080 }, { "epoch": 1.468751345996468, "learning_rate": 9.178230515632744e-07, "loss": 4.5038, "step": 34100 }, { "epoch": 1.4696127837360555, "learning_rate": 9.177745695877953e-07, "loss": 4.6934, "step": 34120 }, { "epoch": 1.4704742214756428, "learning_rate": 9.177260876123165e-07, "loss": 4.5616, "step": 34140 }, { "epoch": 1.4713356592152302, "learning_rate": 9.176776056368377e-07, "loss": 4.5588, "step": 34160 }, { "epoch": 1.4721970969548175, "learning_rate": 9.176291236613588e-07, "loss": 4.5878, "step": 34180 }, { "epoch": 1.473058534694405, "learning_rate": 9.175806416858798e-07, "loss": 4.4511, "step": 34200 }, { "epoch": 1.4739199724339924, "learning_rate": 9.17532159710401e-07, "loss": 4.3849, "step": 34220 }, { "epoch": 1.4747814101735797, "learning_rate": 9.17483677734922e-07, "loss": 4.573, "step": 34240 }, { "epoch": 1.4756428479131671, "learning_rate": 9.174351957594432e-07, "loss": 4.6106, "step": 34260 }, { "epoch": 1.4765042856527544, "learning_rate": 9.173867137839642e-07, "loss": 4.5424, "step": 34280 }, { "epoch": 1.4773657233923418, "learning_rate": 9.173382318084854e-07, "loss": 4.3785, "step": 34300 }, { "epoch": 1.478227161131929, "learning_rate": 9.172897498330065e-07, "loss": 4.6613, "step": 34320 }, { "epoch": 1.4790885988715166, "learning_rate": 9.172412678575276e-07, "loss": 4.31, "step": 34340 }, { "epoch": 1.479950036611104, "learning_rate": 9.171927858820487e-07, "loss": 4.5539, "step": 34360 }, { "epoch": 1.4808114743506913, "learning_rate": 9.171443039065698e-07, "loss": 4.4384, "step": 34380 }, { "epoch": 1.4816729120902787, "learning_rate": 9.17095821931091e-07, "loss": 4.4427, "step": 34400 }, { "epoch": 1.482534349829866, "learning_rate": 9.17047339955612e-07, "loss": 4.7581, "step": 34420 }, { "epoch": 1.4833957875694535, "learning_rate": 9.169988579801331e-07, "loss": 4.5789, "step": 34440 }, { "epoch": 1.4842572253090407, "learning_rate": 9.169503760046543e-07, "loss": 4.4681, "step": 34460 }, { "epoch": 1.4851186630486282, "learning_rate": 9.169018940291754e-07, "loss": 4.5523, "step": 34480 }, { "epoch": 1.4859801007882156, "learning_rate": 9.168534120536964e-07, "loss": 4.3729, "step": 34500 }, { "epoch": 1.4868415385278029, "learning_rate": 9.168049300782175e-07, "loss": 4.5161, "step": 34520 }, { "epoch": 1.4877029762673903, "learning_rate": 9.167564481027387e-07, "loss": 4.4172, "step": 34540 }, { "epoch": 1.4885644140069776, "learning_rate": 9.167079661272598e-07, "loss": 4.261, "step": 34560 }, { "epoch": 1.489425851746565, "learning_rate": 9.166594841517809e-07, "loss": 4.23, "step": 34580 }, { "epoch": 1.4902872894861523, "learning_rate": 9.16611002176302e-07, "loss": 4.339, "step": 34600 }, { "epoch": 1.4911487272257398, "learning_rate": 9.165625202008231e-07, "loss": 4.1811, "step": 34620 }, { "epoch": 1.4920101649653272, "learning_rate": 9.165140382253441e-07, "loss": 4.3727, "step": 34640 }, { "epoch": 1.4928716027049145, "learning_rate": 9.164655562498652e-07, "loss": 4.5534, "step": 34660 }, { "epoch": 1.493733040444502, "learning_rate": 9.164170742743864e-07, "loss": 4.3347, "step": 34680 }, { "epoch": 1.4945944781840892, "learning_rate": 9.163685922989076e-07, "loss": 4.2064, "step": 34700 }, { "epoch": 1.4954559159236767, "learning_rate": 9.163201103234286e-07, "loss": 4.4352, "step": 34720 }, { "epoch": 1.496317353663264, "learning_rate": 9.162716283479497e-07, "loss": 4.4662, "step": 34740 }, { "epoch": 1.4971787914028514, "learning_rate": 9.162231463724708e-07, "loss": 4.4698, "step": 34760 }, { "epoch": 1.4980402291424388, "learning_rate": 9.161746643969919e-07, "loss": 4.2969, "step": 34780 }, { "epoch": 1.498901666882026, "learning_rate": 9.16126182421513e-07, "loss": 4.5373, "step": 34800 }, { "epoch": 1.4997631046216136, "learning_rate": 9.16077700446034e-07, "loss": 4.2752, "step": 34820 }, { "epoch": 1.5006245423612008, "learning_rate": 9.160292184705553e-07, "loss": 4.4435, "step": 34840 }, { "epoch": 1.5014859801007883, "learning_rate": 9.159807364950764e-07, "loss": 4.5597, "step": 34860 }, { "epoch": 1.5023474178403755, "learning_rate": 9.159322545195974e-07, "loss": 4.2376, "step": 34880 }, { "epoch": 1.503208855579963, "learning_rate": 9.158837725441185e-07, "loss": 4.6336, "step": 34900 }, { "epoch": 1.5040702933195504, "learning_rate": 9.158352905686398e-07, "loss": 4.265, "step": 34920 }, { "epoch": 1.5049317310591377, "learning_rate": 9.157868085931607e-07, "loss": 4.6778, "step": 34940 }, { "epoch": 1.505793168798725, "learning_rate": 9.157383266176819e-07, "loss": 4.5546, "step": 34960 }, { "epoch": 1.5066546065383124, "learning_rate": 9.15689844642203e-07, "loss": 4.4426, "step": 34980 }, { "epoch": 1.5075160442778999, "learning_rate": 9.156413626667241e-07, "loss": 4.6047, "step": 35000 }, { "epoch": 1.5083774820174871, "learning_rate": 9.155928806912451e-07, "loss": 4.3895, "step": 35020 }, { "epoch": 1.5092389197570746, "learning_rate": 9.155443987157663e-07, "loss": 4.2913, "step": 35040 }, { "epoch": 1.510100357496662, "learning_rate": 9.154959167402874e-07, "loss": 4.4838, "step": 35060 }, { "epoch": 1.5109617952362493, "learning_rate": 9.154474347648086e-07, "loss": 4.7426, "step": 35080 }, { "epoch": 1.5118232329758365, "learning_rate": 9.153989527893296e-07, "loss": 4.3318, "step": 35100 }, { "epoch": 1.512684670715424, "learning_rate": 9.153504708138508e-07, "loss": 4.2035, "step": 35120 }, { "epoch": 1.5135461084550115, "learning_rate": 9.153019888383718e-07, "loss": 4.4679, "step": 35140 }, { "epoch": 1.5144075461945987, "learning_rate": 9.15253506862893e-07, "loss": 4.2768, "step": 35160 }, { "epoch": 1.5152689839341862, "learning_rate": 9.15205024887414e-07, "loss": 4.447, "step": 35180 }, { "epoch": 1.5161304216737737, "learning_rate": 9.151565429119351e-07, "loss": 4.5441, "step": 35200 }, { "epoch": 1.516991859413361, "learning_rate": 9.151080609364563e-07, "loss": 4.239, "step": 35220 }, { "epoch": 1.5178532971529481, "learning_rate": 9.150595789609775e-07, "loss": 4.4943, "step": 35240 }, { "epoch": 1.5187147348925356, "learning_rate": 9.150110969854984e-07, "loss": 4.28, "step": 35260 }, { "epoch": 1.519576172632123, "learning_rate": 9.149626150100195e-07, "loss": 4.3123, "step": 35280 }, { "epoch": 1.5204376103717103, "learning_rate": 9.149141330345407e-07, "loss": 4.4965, "step": 35300 }, { "epoch": 1.5212990481112978, "learning_rate": 9.148656510590617e-07, "loss": 4.2643, "step": 35320 }, { "epoch": 1.5221604858508853, "learning_rate": 9.148171690835828e-07, "loss": 4.4908, "step": 35340 }, { "epoch": 1.5230219235904725, "learning_rate": 9.14768687108104e-07, "loss": 4.5154, "step": 35360 }, { "epoch": 1.5238833613300597, "learning_rate": 9.147202051326251e-07, "loss": 4.5667, "step": 35380 }, { "epoch": 1.5247447990696472, "learning_rate": 9.146717231571461e-07, "loss": 4.3141, "step": 35400 }, { "epoch": 1.5256062368092347, "learning_rate": 9.146232411816673e-07, "loss": 4.3248, "step": 35420 }, { "epoch": 1.526467674548822, "learning_rate": 9.145747592061884e-07, "loss": 4.4251, "step": 35440 }, { "epoch": 1.5273291122884094, "learning_rate": 9.145262772307096e-07, "loss": 4.612, "step": 35460 }, { "epoch": 1.5281905500279969, "learning_rate": 9.144777952552306e-07, "loss": 4.5537, "step": 35480 }, { "epoch": 1.529051987767584, "learning_rate": 9.144293132797518e-07, "loss": 4.414, "step": 35500 }, { "epoch": 1.5299134255071714, "learning_rate": 9.143808313042728e-07, "loss": 4.4837, "step": 35520 }, { "epoch": 1.5307748632467588, "learning_rate": 9.14332349328794e-07, "loss": 4.5767, "step": 35540 }, { "epoch": 1.5316363009863463, "learning_rate": 9.14283867353315e-07, "loss": 4.3294, "step": 35560 }, { "epoch": 1.5324977387259335, "learning_rate": 9.142353853778362e-07, "loss": 4.2305, "step": 35580 }, { "epoch": 1.533359176465521, "learning_rate": 9.141869034023573e-07, "loss": 4.522, "step": 35600 }, { "epoch": 1.5342206142051085, "learning_rate": 9.141384214268784e-07, "loss": 4.3252, "step": 35620 }, { "epoch": 1.5350820519446957, "learning_rate": 9.140899394513994e-07, "loss": 4.4498, "step": 35640 }, { "epoch": 1.535943489684283, "learning_rate": 9.140414574759206e-07, "loss": 4.3863, "step": 35660 }, { "epoch": 1.5368049274238704, "learning_rate": 9.139929755004417e-07, "loss": 4.279, "step": 35680 }, { "epoch": 1.537666365163458, "learning_rate": 9.139444935249628e-07, "loss": 4.3294, "step": 35700 }, { "epoch": 1.5385278029030451, "learning_rate": 9.138960115494839e-07, "loss": 4.5648, "step": 35720 }, { "epoch": 1.5393892406426326, "learning_rate": 9.13847529574005e-07, "loss": 4.6493, "step": 35740 }, { "epoch": 1.54025067838222, "learning_rate": 9.137990475985261e-07, "loss": 4.4849, "step": 35760 }, { "epoch": 1.5411121161218073, "learning_rate": 9.137505656230472e-07, "loss": 4.5569, "step": 35780 }, { "epoch": 1.5419735538613946, "learning_rate": 9.137020836475683e-07, "loss": 4.5644, "step": 35800 }, { "epoch": 1.542834991600982, "learning_rate": 9.136536016720894e-07, "loss": 4.4694, "step": 35820 }, { "epoch": 1.5436964293405695, "learning_rate": 9.136051196966106e-07, "loss": 4.5983, "step": 35840 }, { "epoch": 1.5445578670801567, "learning_rate": 9.135566377211316e-07, "loss": 4.4882, "step": 35860 }, { "epoch": 1.545419304819744, "learning_rate": 9.135081557456528e-07, "loss": 4.5535, "step": 35880 }, { "epoch": 1.5462807425593317, "learning_rate": 9.134596737701738e-07, "loss": 4.4972, "step": 35900 }, { "epoch": 1.547142180298919, "learning_rate": 9.134111917946949e-07, "loss": 4.6593, "step": 35920 }, { "epoch": 1.5480036180385062, "learning_rate": 9.13362709819216e-07, "loss": 4.1969, "step": 35940 }, { "epoch": 1.5488650557780936, "learning_rate": 9.133142278437372e-07, "loss": 4.2865, "step": 35960 }, { "epoch": 1.549726493517681, "learning_rate": 9.132657458682583e-07, "loss": 4.2481, "step": 35980 }, { "epoch": 1.5505879312572683, "learning_rate": 9.132172638927794e-07, "loss": 4.6457, "step": 36000 }, { "epoch": 1.5514493689968556, "learning_rate": 9.131687819173004e-07, "loss": 4.4537, "step": 36020 }, { "epoch": 1.5523108067364433, "learning_rate": 9.131202999418216e-07, "loss": 4.2257, "step": 36040 }, { "epoch": 1.5531722444760305, "learning_rate": 9.130718179663427e-07, "loss": 4.3691, "step": 36060 }, { "epoch": 1.5540336822156178, "learning_rate": 9.130233359908638e-07, "loss": 4.3142, "step": 36080 }, { "epoch": 1.5548951199552052, "learning_rate": 9.129748540153849e-07, "loss": 4.3368, "step": 36100 }, { "epoch": 1.5557565576947927, "learning_rate": 9.129263720399061e-07, "loss": 4.638, "step": 36120 }, { "epoch": 1.55661799543438, "learning_rate": 9.128778900644272e-07, "loss": 4.3089, "step": 36140 }, { "epoch": 1.5574794331739672, "learning_rate": 9.128294080889482e-07, "loss": 4.6912, "step": 36160 }, { "epoch": 1.5583408709135549, "learning_rate": 9.127809261134694e-07, "loss": 4.4828, "step": 36180 }, { "epoch": 1.5592023086531421, "learning_rate": 9.127324441379905e-07, "loss": 4.1292, "step": 36200 }, { "epoch": 1.5600637463927294, "learning_rate": 9.126839621625115e-07, "loss": 4.5201, "step": 36220 }, { "epoch": 1.5609251841323168, "learning_rate": 9.126354801870327e-07, "loss": 4.2531, "step": 36240 }, { "epoch": 1.5617866218719043, "learning_rate": 9.125869982115538e-07, "loss": 4.4037, "step": 36260 }, { "epoch": 1.5626480596114916, "learning_rate": 9.125385162360748e-07, "loss": 4.5505, "step": 36280 }, { "epoch": 1.5635094973510788, "learning_rate": 9.124900342605959e-07, "loss": 4.297, "step": 36300 }, { "epoch": 1.5643709350906663, "learning_rate": 9.124415522851171e-07, "loss": 4.5845, "step": 36320 }, { "epoch": 1.5652323728302537, "learning_rate": 9.123930703096382e-07, "loss": 4.5232, "step": 36340 }, { "epoch": 1.566093810569841, "learning_rate": 9.123445883341593e-07, "loss": 4.4532, "step": 36360 }, { "epoch": 1.5669552483094285, "learning_rate": 9.122961063586804e-07, "loss": 4.6118, "step": 36380 }, { "epoch": 1.567816686049016, "learning_rate": 9.122476243832014e-07, "loss": 4.3377, "step": 36400 }, { "epoch": 1.5686781237886032, "learning_rate": 9.121991424077226e-07, "loss": 4.4561, "step": 36420 }, { "epoch": 1.5695395615281904, "learning_rate": 9.121506604322436e-07, "loss": 4.4312, "step": 36440 }, { "epoch": 1.5704009992677779, "learning_rate": 9.121021784567648e-07, "loss": 4.5839, "step": 36460 }, { "epoch": 1.5712624370073653, "learning_rate": 9.120536964812859e-07, "loss": 4.5667, "step": 36480 }, { "epoch": 1.5721238747469526, "learning_rate": 9.120052145058071e-07, "loss": 4.4461, "step": 36500 }, { "epoch": 1.57298531248654, "learning_rate": 9.119567325303281e-07, "loss": 4.4775, "step": 36520 }, { "epoch": 1.5738467502261275, "learning_rate": 9.119082505548492e-07, "loss": 4.4642, "step": 36540 }, { "epoch": 1.5747081879657148, "learning_rate": 9.118597685793703e-07, "loss": 4.4668, "step": 36560 }, { "epoch": 1.575569625705302, "learning_rate": 9.118112866038915e-07, "loss": 4.4052, "step": 36580 }, { "epoch": 1.5764310634448895, "learning_rate": 9.117628046284124e-07, "loss": 4.6068, "step": 36600 }, { "epoch": 1.577292501184477, "learning_rate": 9.117143226529337e-07, "loss": 4.2858, "step": 36620 }, { "epoch": 1.5781539389240642, "learning_rate": 9.116658406774548e-07, "loss": 4.3676, "step": 36640 }, { "epoch": 1.5790153766636517, "learning_rate": 9.116173587019759e-07, "loss": 4.3611, "step": 36660 }, { "epoch": 1.5798768144032391, "learning_rate": 9.115688767264969e-07, "loss": 4.2911, "step": 36680 }, { "epoch": 1.5807382521428264, "learning_rate": 9.115203947510182e-07, "loss": 4.4236, "step": 36700 }, { "epoch": 1.5815996898824136, "learning_rate": 9.114719127755392e-07, "loss": 4.4679, "step": 36720 }, { "epoch": 1.582461127622001, "learning_rate": 9.114234308000604e-07, "loss": 4.4973, "step": 36740 }, { "epoch": 1.5833225653615886, "learning_rate": 9.113749488245814e-07, "loss": 4.5085, "step": 36760 }, { "epoch": 1.5841840031011758, "learning_rate": 9.113264668491025e-07, "loss": 4.2936, "step": 36780 }, { "epoch": 1.5850454408407633, "learning_rate": 9.112779848736236e-07, "loss": 4.5846, "step": 36800 }, { "epoch": 1.5859068785803507, "learning_rate": 9.112295028981446e-07, "loss": 4.4171, "step": 36820 }, { "epoch": 1.586768316319938, "learning_rate": 9.111810209226658e-07, "loss": 4.3638, "step": 36840 }, { "epoch": 1.5876297540595252, "learning_rate": 9.11132538947187e-07, "loss": 4.4992, "step": 36860 }, { "epoch": 1.5884911917991127, "learning_rate": 9.110840569717081e-07, "loss": 4.3224, "step": 36880 }, { "epoch": 1.5893526295387002, "learning_rate": 9.110355749962291e-07, "loss": 4.5416, "step": 36900 }, { "epoch": 1.5902140672782874, "learning_rate": 9.109870930207502e-07, "loss": 4.369, "step": 36920 }, { "epoch": 1.5910755050178749, "learning_rate": 9.109386110452713e-07, "loss": 4.6777, "step": 36940 }, { "epoch": 1.5919369427574623, "learning_rate": 9.108901290697925e-07, "loss": 4.4192, "step": 36960 }, { "epoch": 1.5927983804970496, "learning_rate": 9.108416470943135e-07, "loss": 4.4574, "step": 36980 }, { "epoch": 1.5936598182366368, "learning_rate": 9.107931651188347e-07, "loss": 4.4622, "step": 37000 }, { "epoch": 1.5945212559762243, "learning_rate": 9.107446831433558e-07, "loss": 4.2659, "step": 37020 }, { "epoch": 1.5953826937158118, "learning_rate": 9.106962011678769e-07, "loss": 4.3141, "step": 37040 }, { "epoch": 1.596244131455399, "learning_rate": 9.106477191923979e-07, "loss": 4.1779, "step": 37060 }, { "epoch": 1.5971055691949865, "learning_rate": 9.105992372169191e-07, "loss": 4.4769, "step": 37080 }, { "epoch": 1.597967006934574, "learning_rate": 9.105507552414402e-07, "loss": 4.306, "step": 37100 }, { "epoch": 1.5988284446741612, "learning_rate": 9.105022732659612e-07, "loss": 4.3981, "step": 37120 }, { "epoch": 1.5996898824137484, "learning_rate": 9.104537912904824e-07, "loss": 4.5685, "step": 37140 }, { "epoch": 1.600551320153336, "learning_rate": 9.104053093150035e-07, "loss": 4.2877, "step": 37160 }, { "epoch": 1.6014127578929234, "learning_rate": 9.103568273395246e-07, "loss": 4.4643, "step": 37180 }, { "epoch": 1.6022741956325106, "learning_rate": 9.103083453640457e-07, "loss": 4.4946, "step": 37200 }, { "epoch": 1.603135633372098, "learning_rate": 9.102598633885668e-07, "loss": 4.249, "step": 37220 }, { "epoch": 1.6039970711116855, "learning_rate": 9.10211381413088e-07, "loss": 4.3711, "step": 37240 }, { "epoch": 1.6048585088512728, "learning_rate": 9.101628994376091e-07, "loss": 4.4879, "step": 37260 }, { "epoch": 1.60571994659086, "learning_rate": 9.101144174621302e-07, "loss": 4.4627, "step": 37280 }, { "epoch": 1.6065813843304475, "learning_rate": 9.100659354866512e-07, "loss": 4.3324, "step": 37300 }, { "epoch": 1.607442822070035, "learning_rate": 9.100174535111724e-07, "loss": 4.345, "step": 37320 }, { "epoch": 1.6083042598096222, "learning_rate": 9.099689715356935e-07, "loss": 4.4219, "step": 37340 }, { "epoch": 1.6091656975492097, "learning_rate": 9.099204895602145e-07, "loss": 4.1766, "step": 37360 }, { "epoch": 1.6100271352887972, "learning_rate": 9.098720075847357e-07, "loss": 4.4342, "step": 37380 }, { "epoch": 1.6108885730283844, "learning_rate": 9.098235256092569e-07, "loss": 4.232, "step": 37400 }, { "epoch": 1.6117500107679716, "learning_rate": 9.097750436337778e-07, "loss": 4.3993, "step": 37420 }, { "epoch": 1.612611448507559, "learning_rate": 9.097265616582988e-07, "loss": 4.3122, "step": 37440 }, { "epoch": 1.6134728862471466, "learning_rate": 9.096780796828201e-07, "loss": 4.3472, "step": 37460 }, { "epoch": 1.6143343239867338, "learning_rate": 9.096295977073412e-07, "loss": 4.3092, "step": 37480 }, { "epoch": 1.615195761726321, "learning_rate": 9.095811157318623e-07, "loss": 4.1438, "step": 37500 }, { "epoch": 1.6160571994659088, "learning_rate": 9.095326337563834e-07, "loss": 4.4261, "step": 37520 }, { "epoch": 1.616918637205496, "learning_rate": 9.094841517809045e-07, "loss": 4.3154, "step": 37540 }, { "epoch": 1.6177800749450832, "learning_rate": 9.094356698054256e-07, "loss": 4.363, "step": 37560 }, { "epoch": 1.6186415126846707, "learning_rate": 9.093871878299467e-07, "loss": 4.2389, "step": 37580 }, { "epoch": 1.6195029504242582, "learning_rate": 9.093387058544678e-07, "loss": 4.3851, "step": 37600 }, { "epoch": 1.6203643881638454, "learning_rate": 9.09290223878989e-07, "loss": 4.4013, "step": 37620 }, { "epoch": 1.6212258259034327, "learning_rate": 9.092417419035101e-07, "loss": 4.3324, "step": 37640 }, { "epoch": 1.6220872636430204, "learning_rate": 9.091932599280312e-07, "loss": 4.373, "step": 37660 }, { "epoch": 1.6229487013826076, "learning_rate": 9.091447779525522e-07, "loss": 4.6259, "step": 37680 }, { "epoch": 1.6238101391221949, "learning_rate": 9.090962959770734e-07, "loss": 4.2269, "step": 37700 }, { "epoch": 1.6246715768617823, "learning_rate": 9.090478140015944e-07, "loss": 4.3219, "step": 37720 }, { "epoch": 1.6255330146013698, "learning_rate": 9.089993320261156e-07, "loss": 4.2986, "step": 37740 }, { "epoch": 1.626394452340957, "learning_rate": 9.089508500506367e-07, "loss": 4.4416, "step": 37760 }, { "epoch": 1.6272558900805443, "learning_rate": 9.089023680751579e-07, "loss": 4.3954, "step": 37780 }, { "epoch": 1.628117327820132, "learning_rate": 9.088538860996788e-07, "loss": 4.3065, "step": 37800 }, { "epoch": 1.6289787655597192, "learning_rate": 9.088054041242e-07, "loss": 4.2826, "step": 37820 }, { "epoch": 1.6298402032993065, "learning_rate": 9.087569221487211e-07, "loss": 4.4086, "step": 37840 }, { "epoch": 1.630701641038894, "learning_rate": 9.087084401732423e-07, "loss": 4.4004, "step": 37860 }, { "epoch": 1.6315630787784814, "learning_rate": 9.086599581977633e-07, "loss": 4.5516, "step": 37880 }, { "epoch": 1.6324245165180686, "learning_rate": 9.086114762222844e-07, "loss": 4.4274, "step": 37900 }, { "epoch": 1.6332859542576559, "learning_rate": 9.085629942468056e-07, "loss": 4.3502, "step": 37920 }, { "epoch": 1.6341473919972433, "learning_rate": 9.085145122713267e-07, "loss": 4.4738, "step": 37940 }, { "epoch": 1.6350088297368308, "learning_rate": 9.084660302958478e-07, "loss": 4.2082, "step": 37960 }, { "epoch": 1.635870267476418, "learning_rate": 9.084175483203688e-07, "loss": 4.4372, "step": 37980 }, { "epoch": 1.6367317052160055, "learning_rate": 9.0836906634489e-07, "loss": 4.3474, "step": 38000 }, { "epoch": 1.637593142955593, "learning_rate": 9.08320584369411e-07, "loss": 4.3913, "step": 38020 }, { "epoch": 1.6384545806951802, "learning_rate": 9.082721023939322e-07, "loss": 4.4225, "step": 38040 }, { "epoch": 1.6393160184347675, "learning_rate": 9.082236204184532e-07, "loss": 4.3337, "step": 38060 }, { "epoch": 1.640177456174355, "learning_rate": 9.081751384429744e-07, "loss": 4.3372, "step": 38080 }, { "epoch": 1.6410388939139424, "learning_rate": 9.081266564674954e-07, "loss": 4.2774, "step": 38100 }, { "epoch": 1.6419003316535297, "learning_rate": 9.080781744920166e-07, "loss": 4.3185, "step": 38120 }, { "epoch": 1.6427617693931171, "learning_rate": 9.080296925165377e-07, "loss": 4.3871, "step": 38140 }, { "epoch": 1.6436232071327046, "learning_rate": 9.079812105410589e-07, "loss": 4.3266, "step": 38160 }, { "epoch": 1.6444846448722918, "learning_rate": 9.079327285655798e-07, "loss": 4.4932, "step": 38180 }, { "epoch": 1.645346082611879, "learning_rate": 9.07884246590101e-07, "loss": 4.7061, "step": 38200 }, { "epoch": 1.6462075203514666, "learning_rate": 9.078357646146221e-07, "loss": 4.458, "step": 38220 }, { "epoch": 1.647068958091054, "learning_rate": 9.077872826391433e-07, "loss": 4.3307, "step": 38240 }, { "epoch": 1.6479303958306413, "learning_rate": 9.077388006636643e-07, "loss": 4.1894, "step": 38260 }, { "epoch": 1.6487918335702287, "learning_rate": 9.076903186881855e-07, "loss": 4.2687, "step": 38280 }, { "epoch": 1.6496532713098162, "learning_rate": 9.076418367127066e-07, "loss": 4.4671, "step": 38300 }, { "epoch": 1.6505147090494035, "learning_rate": 9.075933547372277e-07, "loss": 4.4285, "step": 38320 }, { "epoch": 1.6513761467889907, "learning_rate": 9.075448727617487e-07, "loss": 4.4103, "step": 38340 }, { "epoch": 1.6522375845285782, "learning_rate": 9.074963907862699e-07, "loss": 4.4297, "step": 38360 }, { "epoch": 1.6530990222681656, "learning_rate": 9.074479088107909e-07, "loss": 4.5008, "step": 38380 }, { "epoch": 1.6539604600077529, "learning_rate": 9.073994268353122e-07, "loss": 4.2244, "step": 38400 }, { "epoch": 1.6548218977473403, "learning_rate": 9.073509448598332e-07, "loss": 4.4952, "step": 38420 }, { "epoch": 1.6556833354869278, "learning_rate": 9.073024628843542e-07, "loss": 4.5604, "step": 38440 }, { "epoch": 1.656544773226515, "learning_rate": 9.072539809088754e-07, "loss": 4.4206, "step": 38460 }, { "epoch": 1.6574062109661023, "learning_rate": 9.072054989333966e-07, "loss": 4.3534, "step": 38480 }, { "epoch": 1.6582676487056898, "learning_rate": 9.071570169579176e-07, "loss": 4.4252, "step": 38500 }, { "epoch": 1.6591290864452772, "learning_rate": 9.071085349824387e-07, "loss": 4.6212, "step": 38520 }, { "epoch": 1.6599905241848645, "learning_rate": 9.070600530069599e-07, "loss": 4.3829, "step": 38540 }, { "epoch": 1.660851961924452, "learning_rate": 9.070115710314809e-07, "loss": 4.5421, "step": 38560 }, { "epoch": 1.6617133996640394, "learning_rate": 9.06963089056002e-07, "loss": 4.3745, "step": 38580 }, { "epoch": 1.6625748374036267, "learning_rate": 9.069146070805231e-07, "loss": 4.4464, "step": 38600 }, { "epoch": 1.663436275143214, "learning_rate": 9.068661251050443e-07, "loss": 4.4741, "step": 38620 }, { "epoch": 1.6642977128828014, "learning_rate": 9.068176431295653e-07, "loss": 4.2344, "step": 38640 }, { "epoch": 1.6651591506223888, "learning_rate": 9.067691611540865e-07, "loss": 4.3982, "step": 38660 }, { "epoch": 1.666020588361976, "learning_rate": 9.067206791786076e-07, "loss": 4.2757, "step": 38680 }, { "epoch": 1.6668820261015636, "learning_rate": 9.066721972031286e-07, "loss": 4.3779, "step": 38700 }, { "epoch": 1.667743463841151, "learning_rate": 9.066237152276497e-07, "loss": 4.2537, "step": 38720 }, { "epoch": 1.6686049015807383, "learning_rate": 9.065752332521709e-07, "loss": 4.459, "step": 38740 }, { "epoch": 1.6694663393203255, "learning_rate": 9.06526751276692e-07, "loss": 4.3083, "step": 38760 }, { "epoch": 1.670327777059913, "learning_rate": 9.064782693012131e-07, "loss": 4.5771, "step": 38780 }, { "epoch": 1.6711892147995004, "learning_rate": 9.064297873257342e-07, "loss": 4.6128, "step": 38800 }, { "epoch": 1.6720506525390877, "learning_rate": 9.063813053502553e-07, "loss": 4.3605, "step": 38820 }, { "epoch": 1.6729120902786752, "learning_rate": 9.063328233747764e-07, "loss": 4.3275, "step": 38840 }, { "epoch": 1.6737735280182626, "learning_rate": 9.062843413992975e-07, "loss": 4.4422, "step": 38860 }, { "epoch": 1.6746349657578499, "learning_rate": 9.062358594238186e-07, "loss": 4.4172, "step": 38880 }, { "epoch": 1.6754964034974371, "learning_rate": 9.061873774483397e-07, "loss": 4.5314, "step": 38900 }, { "epoch": 1.6763578412370246, "learning_rate": 9.061388954728609e-07, "loss": 4.4356, "step": 38920 }, { "epoch": 1.677219278976612, "learning_rate": 9.060904134973819e-07, "loss": 4.1629, "step": 38940 }, { "epoch": 1.6780807167161993, "learning_rate": 9.06041931521903e-07, "loss": 4.4948, "step": 38960 }, { "epoch": 1.6789421544557868, "learning_rate": 9.059934495464241e-07, "loss": 4.787, "step": 38980 }, { "epoch": 1.6798035921953742, "learning_rate": 9.059449675709452e-07, "loss": 4.1931, "step": 39000 }, { "epoch": 1.6806650299349615, "learning_rate": 9.058964855954664e-07, "loss": 4.1048, "step": 39020 }, { "epoch": 1.6815264676745487, "learning_rate": 9.058480036199875e-07, "loss": 4.522, "step": 39040 }, { "epoch": 1.6823879054141362, "learning_rate": 9.057995216445086e-07, "loss": 4.4355, "step": 39060 }, { "epoch": 1.6832493431537237, "learning_rate": 9.057510396690296e-07, "loss": 4.2634, "step": 39080 }, { "epoch": 1.684110780893311, "learning_rate": 9.057025576935507e-07, "loss": 4.3253, "step": 39100 }, { "epoch": 1.6849722186328981, "learning_rate": 9.056540757180719e-07, "loss": 4.2652, "step": 39120 }, { "epoch": 1.6858336563724858, "learning_rate": 9.05605593742593e-07, "loss": 4.3281, "step": 39140 }, { "epoch": 1.686695094112073, "learning_rate": 9.055571117671141e-07, "loss": 4.391, "step": 39160 }, { "epoch": 1.6875565318516603, "learning_rate": 9.055086297916352e-07, "loss": 4.3401, "step": 39180 }, { "epoch": 1.6884179695912478, "learning_rate": 9.054601478161563e-07, "loss": 4.3509, "step": 39200 }, { "epoch": 1.6892794073308353, "learning_rate": 9.054116658406772e-07, "loss": 4.4641, "step": 39220 }, { "epoch": 1.6901408450704225, "learning_rate": 9.053631838651985e-07, "loss": 4.3644, "step": 39240 }, { "epoch": 1.6910022828100097, "learning_rate": 9.053147018897196e-07, "loss": 4.4213, "step": 39260 }, { "epoch": 1.6918637205495974, "learning_rate": 9.052662199142408e-07, "loss": 4.4868, "step": 39280 }, { "epoch": 1.6927251582891847, "learning_rate": 9.052177379387618e-07, "loss": 4.3956, "step": 39300 }, { "epoch": 1.693586596028772, "learning_rate": 9.051692559632829e-07, "loss": 4.5938, "step": 39320 }, { "epoch": 1.6944480337683594, "learning_rate": 9.05120773987804e-07, "loss": 4.4835, "step": 39340 }, { "epoch": 1.6953094715079469, "learning_rate": 9.050722920123252e-07, "loss": 4.2902, "step": 39360 }, { "epoch": 1.696170909247534, "learning_rate": 9.050238100368462e-07, "loss": 4.3747, "step": 39380 }, { "epoch": 1.6970323469871214, "learning_rate": 9.049753280613674e-07, "loss": 4.482, "step": 39400 }, { "epoch": 1.697893784726709, "learning_rate": 9.049268460858885e-07, "loss": 4.2359, "step": 39420 }, { "epoch": 1.6987552224662963, "learning_rate": 9.048783641104097e-07, "loss": 4.3643, "step": 39440 }, { "epoch": 1.6996166602058835, "learning_rate": 9.048298821349306e-07, "loss": 4.1877, "step": 39460 }, { "epoch": 1.700478097945471, "learning_rate": 9.047814001594518e-07, "loss": 4.1596, "step": 39480 }, { "epoch": 1.7013395356850585, "learning_rate": 9.047329181839729e-07, "loss": 4.2136, "step": 39500 }, { "epoch": 1.7022009734246457, "learning_rate": 9.04684436208494e-07, "loss": 4.3671, "step": 39520 }, { "epoch": 1.703062411164233, "learning_rate": 9.046359542330151e-07, "loss": 4.3725, "step": 39540 }, { "epoch": 1.7039238489038204, "learning_rate": 9.045874722575363e-07, "loss": 4.4943, "step": 39560 }, { "epoch": 1.704785286643408, "learning_rate": 9.045389902820573e-07, "loss": 4.429, "step": 39580 }, { "epoch": 1.7056467243829951, "learning_rate": 9.044905083065783e-07, "loss": 4.4638, "step": 39600 }, { "epoch": 1.7065081621225826, "learning_rate": 9.044420263310995e-07, "loss": 4.3664, "step": 39620 }, { "epoch": 1.70736959986217, "learning_rate": 9.043935443556205e-07, "loss": 4.3404, "step": 39640 }, { "epoch": 1.7082310376017573, "learning_rate": 9.043450623801418e-07, "loss": 4.3263, "step": 39660 }, { "epoch": 1.7090924753413446, "learning_rate": 9.042965804046628e-07, "loss": 4.2613, "step": 39680 }, { "epoch": 1.709953913080932, "learning_rate": 9.04248098429184e-07, "loss": 4.1147, "step": 39700 }, { "epoch": 1.7108153508205195, "learning_rate": 9.04199616453705e-07, "loss": 4.2168, "step": 39720 }, { "epoch": 1.7116767885601067, "learning_rate": 9.041511344782263e-07, "loss": 4.0227, "step": 39740 }, { "epoch": 1.7125382262996942, "learning_rate": 9.041026525027472e-07, "loss": 4.563, "step": 39760 }, { "epoch": 1.7133996640392817, "learning_rate": 9.040541705272684e-07, "loss": 4.4562, "step": 39780 }, { "epoch": 1.714261101778869, "learning_rate": 9.040056885517895e-07, "loss": 4.3732, "step": 39800 }, { "epoch": 1.7151225395184562, "learning_rate": 9.039572065763107e-07, "loss": 4.5097, "step": 39820 }, { "epoch": 1.7159839772580436, "learning_rate": 9.039087246008316e-07, "loss": 4.4336, "step": 39840 }, { "epoch": 1.716845414997631, "learning_rate": 9.038602426253528e-07, "loss": 4.3732, "step": 39860 }, { "epoch": 1.7177068527372183, "learning_rate": 9.038117606498739e-07, "loss": 4.214, "step": 39880 }, { "epoch": 1.7185682904768058, "learning_rate": 9.037632786743951e-07, "loss": 4.3972, "step": 39900 }, { "epoch": 1.7194297282163933, "learning_rate": 9.037147966989161e-07, "loss": 4.3894, "step": 39920 }, { "epoch": 1.7202911659559805, "learning_rate": 9.036663147234373e-07, "loss": 4.2121, "step": 39940 }, { "epoch": 1.7211526036955678, "learning_rate": 9.036178327479583e-07, "loss": 4.3815, "step": 39960 }, { "epoch": 1.7220140414351552, "learning_rate": 9.035693507724794e-07, "loss": 4.4487, "step": 39980 }, { "epoch": 1.7228754791747427, "learning_rate": 9.035208687970005e-07, "loss": 4.2327, "step": 40000 }, { "epoch": 1.72373691691433, "learning_rate": 9.034723868215217e-07, "loss": 4.1977, "step": 40020 }, { "epoch": 1.7245983546539174, "learning_rate": 9.034239048460428e-07, "loss": 4.6189, "step": 40040 }, { "epoch": 1.7254597923935049, "learning_rate": 9.033754228705638e-07, "loss": 4.366, "step": 40060 }, { "epoch": 1.7263212301330921, "learning_rate": 9.03326940895085e-07, "loss": 4.3975, "step": 40080 }, { "epoch": 1.7271826678726794, "learning_rate": 9.032784589196061e-07, "loss": 4.2507, "step": 40100 }, { "epoch": 1.7280441056122668, "learning_rate": 9.032299769441272e-07, "loss": 4.5417, "step": 40120 }, { "epoch": 1.7289055433518543, "learning_rate": 9.031814949686482e-07, "loss": 4.4213, "step": 40140 }, { "epoch": 1.7297669810914416, "learning_rate": 9.031330129931693e-07, "loss": 4.5404, "step": 40160 }, { "epoch": 1.730628418831029, "learning_rate": 9.030845310176905e-07, "loss": 4.3475, "step": 40180 }, { "epoch": 1.7314898565706165, "learning_rate": 9.030360490422117e-07, "loss": 4.3838, "step": 40200 }, { "epoch": 1.7323512943102037, "learning_rate": 9.029875670667326e-07, "loss": 4.404, "step": 40220 }, { "epoch": 1.733212732049791, "learning_rate": 9.029390850912538e-07, "loss": 4.4143, "step": 40240 }, { "epoch": 1.7340741697893785, "learning_rate": 9.028906031157749e-07, "loss": 4.4484, "step": 40260 }, { "epoch": 1.734935607528966, "learning_rate": 9.02842121140296e-07, "loss": 4.3804, "step": 40280 }, { "epoch": 1.7357970452685532, "learning_rate": 9.027936391648171e-07, "loss": 4.3601, "step": 40300 }, { "epoch": 1.7366584830081406, "learning_rate": 9.027451571893383e-07, "loss": 4.3157, "step": 40320 }, { "epoch": 1.737519920747728, "learning_rate": 9.026966752138594e-07, "loss": 4.5075, "step": 40340 }, { "epoch": 1.7383813584873153, "learning_rate": 9.026481932383804e-07, "loss": 4.2924, "step": 40360 }, { "epoch": 1.7392427962269026, "learning_rate": 9.025997112629015e-07, "loss": 4.4275, "step": 40380 }, { "epoch": 1.74010423396649, "learning_rate": 9.025512292874227e-07, "loss": 4.198, "step": 40400 }, { "epoch": 1.7409656717060775, "learning_rate": 9.025027473119438e-07, "loss": 4.3831, "step": 40420 }, { "epoch": 1.7418271094456648, "learning_rate": 9.024542653364649e-07, "loss": 4.3877, "step": 40440 }, { "epoch": 1.7426885471852522, "learning_rate": 9.02405783360986e-07, "loss": 4.4235, "step": 40460 }, { "epoch": 1.7435499849248397, "learning_rate": 9.023573013855071e-07, "loss": 4.5444, "step": 40480 }, { "epoch": 1.744411422664427, "learning_rate": 9.023088194100282e-07, "loss": 4.2116, "step": 40500 }, { "epoch": 1.7452728604040142, "learning_rate": 9.022603374345493e-07, "loss": 4.4303, "step": 40520 }, { "epoch": 1.7461342981436017, "learning_rate": 9.022118554590704e-07, "loss": 4.2506, "step": 40540 }, { "epoch": 1.7469957358831891, "learning_rate": 9.021633734835916e-07, "loss": 4.3852, "step": 40560 }, { "epoch": 1.7478571736227764, "learning_rate": 9.021148915081126e-07, "loss": 4.3947, "step": 40580 }, { "epoch": 1.7487186113623638, "learning_rate": 9.020664095326336e-07, "loss": 4.3598, "step": 40600 }, { "epoch": 1.7495800491019513, "learning_rate": 9.020179275571548e-07, "loss": 4.2804, "step": 40620 }, { "epoch": 1.7504414868415386, "learning_rate": 9.01969445581676e-07, "loss": 4.4294, "step": 40640 }, { "epoch": 1.7513029245811258, "learning_rate": 9.01920963606197e-07, "loss": 4.5575, "step": 40660 }, { "epoch": 1.7521643623207133, "learning_rate": 9.018724816307181e-07, "loss": 4.1388, "step": 40680 }, { "epoch": 1.7530258000603007, "learning_rate": 9.018239996552393e-07, "loss": 4.2976, "step": 40700 }, { "epoch": 1.753887237799888, "learning_rate": 9.017755176797604e-07, "loss": 4.1478, "step": 40720 }, { "epoch": 1.7547486755394752, "learning_rate": 9.017270357042814e-07, "loss": 4.4194, "step": 40740 }, { "epoch": 1.755610113279063, "learning_rate": 9.016785537288025e-07, "loss": 4.3831, "step": 40760 }, { "epoch": 1.7564715510186502, "learning_rate": 9.016300717533237e-07, "loss": 4.2891, "step": 40780 }, { "epoch": 1.7573329887582374, "learning_rate": 9.015815897778448e-07, "loss": 4.3545, "step": 40800 }, { "epoch": 1.7581944264978249, "learning_rate": 9.015331078023659e-07, "loss": 4.3924, "step": 40820 }, { "epoch": 1.7590558642374123, "learning_rate": 9.01484625826887e-07, "loss": 4.4795, "step": 40840 }, { "epoch": 1.7599173019769996, "learning_rate": 9.014361438514081e-07, "loss": 4.1988, "step": 40860 }, { "epoch": 1.7607787397165868, "learning_rate": 9.013876618759291e-07, "loss": 4.5492, "step": 40880 }, { "epoch": 1.7616401774561745, "learning_rate": 9.013391799004503e-07, "loss": 4.3368, "step": 40900 }, { "epoch": 1.7625016151957618, "learning_rate": 9.012906979249714e-07, "loss": 4.388, "step": 40920 }, { "epoch": 1.763363052935349, "learning_rate": 9.012422159494926e-07, "loss": 4.2305, "step": 40940 }, { "epoch": 1.7642244906749365, "learning_rate": 9.011937339740136e-07, "loss": 4.5238, "step": 40960 }, { "epoch": 1.765085928414524, "learning_rate": 9.011452519985347e-07, "loss": 4.5662, "step": 40980 }, { "epoch": 1.7659473661541112, "learning_rate": 9.010967700230557e-07, "loss": 4.4144, "step": 41000 }, { "epoch": 1.7668088038936984, "learning_rate": 9.01048288047577e-07, "loss": 4.2149, "step": 41020 }, { "epoch": 1.767670241633286, "learning_rate": 9.00999806072098e-07, "loss": 4.4242, "step": 41040 }, { "epoch": 1.7685316793728734, "learning_rate": 9.009513240966192e-07, "loss": 4.4088, "step": 41060 }, { "epoch": 1.7693931171124606, "learning_rate": 9.009028421211403e-07, "loss": 4.323, "step": 41080 }, { "epoch": 1.770254554852048, "learning_rate": 9.008543601456614e-07, "loss": 4.4107, "step": 41100 }, { "epoch": 1.7711159925916355, "learning_rate": 9.008058781701824e-07, "loss": 4.5422, "step": 41120 }, { "epoch": 1.7719774303312228, "learning_rate": 9.007573961947035e-07, "loss": 4.4039, "step": 41140 }, { "epoch": 1.77283886807081, "learning_rate": 9.007089142192247e-07, "loss": 4.2002, "step": 41160 }, { "epoch": 1.7737003058103975, "learning_rate": 9.006604322437459e-07, "loss": 4.3473, "step": 41180 }, { "epoch": 1.774561743549985, "learning_rate": 9.006119502682669e-07, "loss": 4.2903, "step": 41200 }, { "epoch": 1.7754231812895722, "learning_rate": 9.00563468292788e-07, "loss": 4.4221, "step": 41220 }, { "epoch": 1.7762846190291597, "learning_rate": 9.005149863173091e-07, "loss": 4.3886, "step": 41240 }, { "epoch": 1.7771460567687472, "learning_rate": 9.004665043418301e-07, "loss": 4.3666, "step": 41260 }, { "epoch": 1.7780074945083344, "learning_rate": 9.004180223663513e-07, "loss": 4.3247, "step": 41280 }, { "epoch": 1.7788689322479216, "learning_rate": 9.003695403908724e-07, "loss": 4.4073, "step": 41300 }, { "epoch": 1.779730369987509, "learning_rate": 9.003210584153936e-07, "loss": 4.1473, "step": 41320 }, { "epoch": 1.7805918077270966, "learning_rate": 9.002725764399146e-07, "loss": 4.3599, "step": 41340 }, { "epoch": 1.7814532454666838, "learning_rate": 9.002240944644357e-07, "loss": 4.2765, "step": 41360 }, { "epoch": 1.7823146832062713, "learning_rate": 9.001756124889568e-07, "loss": 4.4386, "step": 41380 }, { "epoch": 1.7831761209458588, "learning_rate": 9.00127130513478e-07, "loss": 4.5666, "step": 41400 }, { "epoch": 1.784037558685446, "learning_rate": 9.000786485379989e-07, "loss": 4.3756, "step": 41420 }, { "epoch": 1.7848989964250332, "learning_rate": 9.000301665625202e-07, "loss": 4.2232, "step": 41440 }, { "epoch": 1.7857604341646207, "learning_rate": 8.999816845870413e-07, "loss": 4.2154, "step": 41460 }, { "epoch": 1.7866218719042082, "learning_rate": 8.999332026115625e-07, "loss": 4.4487, "step": 41480 }, { "epoch": 1.7874833096437954, "learning_rate": 8.998847206360834e-07, "loss": 4.1479, "step": 41500 }, { "epoch": 1.788344747383383, "learning_rate": 8.998362386606047e-07, "loss": 4.2259, "step": 41520 }, { "epoch": 1.7892061851229704, "learning_rate": 8.997877566851257e-07, "loss": 4.4609, "step": 41540 }, { "epoch": 1.7900676228625576, "learning_rate": 8.997392747096468e-07, "loss": 4.2485, "step": 41560 }, { "epoch": 1.7909290606021449, "learning_rate": 8.996907927341679e-07, "loss": 4.3681, "step": 41580 }, { "epoch": 1.7917904983417323, "learning_rate": 8.996423107586891e-07, "loss": 4.2164, "step": 41600 }, { "epoch": 1.7926519360813198, "learning_rate": 8.995938287832101e-07, "loss": 4.1929, "step": 41620 }, { "epoch": 1.793513373820907, "learning_rate": 8.995453468077312e-07, "loss": 4.226, "step": 41640 }, { "epoch": 1.7943748115604945, "learning_rate": 8.994968648322523e-07, "loss": 4.3083, "step": 41660 }, { "epoch": 1.795236249300082, "learning_rate": 8.994483828567734e-07, "loss": 4.2247, "step": 41680 }, { "epoch": 1.7960976870396692, "learning_rate": 8.993999008812946e-07, "loss": 4.1595, "step": 41700 }, { "epoch": 1.7969591247792565, "learning_rate": 8.993514189058157e-07, "loss": 4.2684, "step": 41720 }, { "epoch": 1.797820562518844, "learning_rate": 8.993029369303367e-07, "loss": 4.3604, "step": 41740 }, { "epoch": 1.7986820002584314, "learning_rate": 8.992544549548578e-07, "loss": 4.2129, "step": 41760 }, { "epoch": 1.7995434379980186, "learning_rate": 8.99205972979379e-07, "loss": 4.381, "step": 41780 }, { "epoch": 1.800404875737606, "learning_rate": 8.991574910039e-07, "loss": 4.2786, "step": 41800 }, { "epoch": 1.8012663134771936, "learning_rate": 8.991090090284212e-07, "loss": 4.4976, "step": 41820 }, { "epoch": 1.8021277512167808, "learning_rate": 8.990605270529423e-07, "loss": 4.2708, "step": 41840 }, { "epoch": 1.802989188956368, "learning_rate": 8.990120450774634e-07, "loss": 4.271, "step": 41860 }, { "epoch": 1.8038506266959555, "learning_rate": 8.989635631019844e-07, "loss": 4.5042, "step": 41880 }, { "epoch": 1.804712064435543, "learning_rate": 8.989150811265056e-07, "loss": 4.677, "step": 41900 }, { "epoch": 1.8055735021751302, "learning_rate": 8.988665991510267e-07, "loss": 4.2695, "step": 41920 }, { "epoch": 1.8064349399147177, "learning_rate": 8.988181171755477e-07, "loss": 4.3243, "step": 41940 }, { "epoch": 1.8072963776543052, "learning_rate": 8.987696352000689e-07, "loss": 4.3315, "step": 41960 }, { "epoch": 1.8081578153938924, "learning_rate": 8.987211532245901e-07, "loss": 4.4334, "step": 41980 }, { "epoch": 1.8090192531334797, "learning_rate": 8.986726712491111e-07, "loss": 4.456, "step": 42000 }, { "epoch": 1.8098806908730671, "learning_rate": 8.986241892736322e-07, "loss": 4.2427, "step": 42020 }, { "epoch": 1.8107421286126546, "learning_rate": 8.985757072981533e-07, "loss": 4.5055, "step": 42040 }, { "epoch": 1.8116035663522418, "learning_rate": 8.985272253226745e-07, "loss": 4.7073, "step": 42060 }, { "epoch": 1.8124650040918293, "learning_rate": 8.984787433471956e-07, "loss": 4.4209, "step": 42080 }, { "epoch": 1.8133264418314168, "learning_rate": 8.984302613717167e-07, "loss": 4.4284, "step": 42100 }, { "epoch": 1.814187879571004, "learning_rate": 8.983817793962378e-07, "loss": 4.2628, "step": 42120 }, { "epoch": 1.8150493173105913, "learning_rate": 8.983332974207589e-07, "loss": 4.3875, "step": 42140 }, { "epoch": 1.8159107550501787, "learning_rate": 8.982848154452799e-07, "loss": 4.2567, "step": 42160 }, { "epoch": 1.8167721927897662, "learning_rate": 8.982363334698011e-07, "loss": 4.2877, "step": 42180 }, { "epoch": 1.8176336305293535, "learning_rate": 8.981878514943222e-07, "loss": 4.4149, "step": 42200 }, { "epoch": 1.818495068268941, "learning_rate": 8.981393695188433e-07, "loss": 4.2483, "step": 42220 }, { "epoch": 1.8193565060085284, "learning_rate": 8.980908875433644e-07, "loss": 4.5543, "step": 42240 }, { "epoch": 1.8202179437481156, "learning_rate": 8.980424055678855e-07, "loss": 4.5195, "step": 42260 }, { "epoch": 1.8210793814877029, "learning_rate": 8.979939235924066e-07, "loss": 4.3553, "step": 42280 }, { "epoch": 1.8219408192272903, "learning_rate": 8.979454416169277e-07, "loss": 4.1781, "step": 42300 }, { "epoch": 1.8228022569668778, "learning_rate": 8.978969596414488e-07, "loss": 4.4369, "step": 42320 }, { "epoch": 1.823663694706465, "learning_rate": 8.978484776659699e-07, "loss": 4.2777, "step": 42340 }, { "epoch": 1.8245251324460523, "learning_rate": 8.977999956904911e-07, "loss": 4.3465, "step": 42360 }, { "epoch": 1.82538657018564, "learning_rate": 8.97751513715012e-07, "loss": 4.3886, "step": 42380 }, { "epoch": 1.8262480079252272, "learning_rate": 8.977030317395332e-07, "loss": 4.2146, "step": 42400 }, { "epoch": 1.8271094456648145, "learning_rate": 8.976545497640543e-07, "loss": 4.3834, "step": 42420 }, { "epoch": 1.827970883404402, "learning_rate": 8.976060677885755e-07, "loss": 4.4563, "step": 42440 }, { "epoch": 1.8288323211439894, "learning_rate": 8.975575858130965e-07, "loss": 4.2038, "step": 42460 }, { "epoch": 1.8296937588835767, "learning_rate": 8.975091038376177e-07, "loss": 4.4039, "step": 42480 }, { "epoch": 1.830555196623164, "learning_rate": 8.974606218621388e-07, "loss": 4.1675, "step": 42500 }, { "epoch": 1.8314166343627516, "learning_rate": 8.974121398866599e-07, "loss": 4.3842, "step": 42520 }, { "epoch": 1.8322780721023388, "learning_rate": 8.973636579111809e-07, "loss": 4.2416, "step": 42540 }, { "epoch": 1.833139509841926, "learning_rate": 8.973151759357021e-07, "loss": 4.5044, "step": 42560 }, { "epoch": 1.8340009475815136, "learning_rate": 8.972666939602232e-07, "loss": 4.3566, "step": 42580 }, { "epoch": 1.834862385321101, "learning_rate": 8.972182119847444e-07, "loss": 4.503, "step": 42600 }, { "epoch": 1.8357238230606883, "learning_rate": 8.971697300092654e-07, "loss": 4.339, "step": 42620 }, { "epoch": 1.8365852608002755, "learning_rate": 8.971212480337865e-07, "loss": 4.2754, "step": 42640 }, { "epoch": 1.837446698539863, "learning_rate": 8.970727660583076e-07, "loss": 4.1781, "step": 42660 }, { "epoch": 1.8383081362794504, "learning_rate": 8.970242840828288e-07, "loss": 4.3477, "step": 42680 }, { "epoch": 1.8391695740190377, "learning_rate": 8.969758021073498e-07, "loss": 4.4648, "step": 42700 }, { "epoch": 1.8400310117586252, "learning_rate": 8.96927320131871e-07, "loss": 4.4001, "step": 42720 }, { "epoch": 1.8408924494982126, "learning_rate": 8.968788381563921e-07, "loss": 4.3718, "step": 42740 }, { "epoch": 1.8417538872377999, "learning_rate": 8.968303561809131e-07, "loss": 4.2497, "step": 42760 }, { "epoch": 1.8426153249773871, "learning_rate": 8.967818742054341e-07, "loss": 4.426, "step": 42780 }, { "epoch": 1.8434767627169746, "learning_rate": 8.967333922299554e-07, "loss": 4.4254, "step": 42800 }, { "epoch": 1.844338200456562, "learning_rate": 8.966849102544765e-07, "loss": 4.2573, "step": 42820 }, { "epoch": 1.8451996381961493, "learning_rate": 8.966364282789975e-07, "loss": 4.205, "step": 42840 }, { "epoch": 1.8460610759357368, "learning_rate": 8.965879463035187e-07, "loss": 4.5753, "step": 42860 }, { "epoch": 1.8469225136753242, "learning_rate": 8.965394643280399e-07, "loss": 4.2849, "step": 42880 }, { "epoch": 1.8477839514149115, "learning_rate": 8.964909823525609e-07, "loss": 4.4319, "step": 42900 }, { "epoch": 1.8486453891544987, "learning_rate": 8.964425003770819e-07, "loss": 4.4898, "step": 42920 }, { "epoch": 1.8495068268940862, "learning_rate": 8.963940184016031e-07, "loss": 4.393, "step": 42940 }, { "epoch": 1.8503682646336737, "learning_rate": 8.963455364261242e-07, "loss": 4.3307, "step": 42960 }, { "epoch": 1.851229702373261, "learning_rate": 8.962970544506454e-07, "loss": 4.2222, "step": 42980 }, { "epoch": 1.8520911401128484, "learning_rate": 8.962485724751664e-07, "loss": 4.3846, "step": 43000 }, { "epoch": 1.8529525778524358, "learning_rate": 8.962000904996875e-07, "loss": 4.2039, "step": 43020 }, { "epoch": 1.853814015592023, "learning_rate": 8.961516085242086e-07, "loss": 4.3454, "step": 43040 }, { "epoch": 1.8546754533316103, "learning_rate": 8.961031265487297e-07, "loss": 4.3098, "step": 43060 }, { "epoch": 1.8555368910711978, "learning_rate": 8.960546445732508e-07, "loss": 4.3761, "step": 43080 }, { "epoch": 1.8563983288107853, "learning_rate": 8.96006162597772e-07, "loss": 4.3821, "step": 43100 }, { "epoch": 1.8572597665503725, "learning_rate": 8.959576806222931e-07, "loss": 4.3961, "step": 43120 }, { "epoch": 1.85812120428996, "learning_rate": 8.959091986468141e-07, "loss": 4.1976, "step": 43140 }, { "epoch": 1.8589826420295474, "learning_rate": 8.958607166713352e-07, "loss": 4.2587, "step": 43160 }, { "epoch": 1.8598440797691347, "learning_rate": 8.958122346958564e-07, "loss": 4.3849, "step": 43180 }, { "epoch": 1.860705517508722, "learning_rate": 8.957637527203775e-07, "loss": 4.2497, "step": 43200 }, { "epoch": 1.8615669552483094, "learning_rate": 8.957152707448986e-07, "loss": 4.2743, "step": 43220 }, { "epoch": 1.8624283929878969, "learning_rate": 8.956667887694197e-07, "loss": 4.384, "step": 43240 }, { "epoch": 1.863289830727484, "learning_rate": 8.956183067939409e-07, "loss": 4.1961, "step": 43260 }, { "epoch": 1.8641512684670716, "learning_rate": 8.955698248184619e-07, "loss": 4.1731, "step": 43280 }, { "epoch": 1.865012706206659, "learning_rate": 8.955213428429829e-07, "loss": 4.1847, "step": 43300 }, { "epoch": 1.8658741439462463, "learning_rate": 8.954728608675041e-07, "loss": 4.3402, "step": 43320 }, { "epoch": 1.8667355816858335, "learning_rate": 8.954243788920253e-07, "loss": 4.5423, "step": 43340 }, { "epoch": 1.867597019425421, "learning_rate": 8.953758969165464e-07, "loss": 4.3562, "step": 43360 }, { "epoch": 1.8684584571650085, "learning_rate": 8.953274149410674e-07, "loss": 4.3731, "step": 43380 }, { "epoch": 1.8693198949045957, "learning_rate": 8.952789329655885e-07, "loss": 4.286, "step": 43400 }, { "epoch": 1.8701813326441832, "learning_rate": 8.952304509901096e-07, "loss": 4.2101, "step": 43420 }, { "epoch": 1.8710427703837706, "learning_rate": 8.951819690146307e-07, "loss": 4.2868, "step": 43440 }, { "epoch": 1.871904208123358, "learning_rate": 8.951334870391518e-07, "loss": 4.2495, "step": 43460 }, { "epoch": 1.8727656458629451, "learning_rate": 8.95085005063673e-07, "loss": 4.3549, "step": 43480 }, { "epoch": 1.8736270836025326, "learning_rate": 8.950365230881941e-07, "loss": 4.1452, "step": 43500 }, { "epoch": 1.87448852134212, "learning_rate": 8.949880411127151e-07, "loss": 4.2519, "step": 43520 }, { "epoch": 1.8753499590817073, "learning_rate": 8.949395591372362e-07, "loss": 4.1753, "step": 43540 }, { "epoch": 1.8762113968212948, "learning_rate": 8.948910771617574e-07, "loss": 4.2263, "step": 43560 }, { "epoch": 1.8770728345608823, "learning_rate": 8.948425951862785e-07, "loss": 4.3419, "step": 43580 }, { "epoch": 1.8779342723004695, "learning_rate": 8.947941132107996e-07, "loss": 4.0941, "step": 43600 }, { "epoch": 1.8787957100400567, "learning_rate": 8.947456312353207e-07, "loss": 4.2375, "step": 43620 }, { "epoch": 1.8796571477796442, "learning_rate": 8.946971492598419e-07, "loss": 4.2234, "step": 43640 }, { "epoch": 1.8805185855192317, "learning_rate": 8.946486672843628e-07, "loss": 4.3174, "step": 43660 }, { "epoch": 1.881380023258819, "learning_rate": 8.94600185308884e-07, "loss": 4.3217, "step": 43680 }, { "epoch": 1.8822414609984064, "learning_rate": 8.945517033334051e-07, "loss": 4.4135, "step": 43700 }, { "epoch": 1.8831028987379939, "learning_rate": 8.945032213579262e-07, "loss": 4.2409, "step": 43720 }, { "epoch": 1.883964336477581, "learning_rate": 8.944547393824473e-07, "loss": 4.4634, "step": 43740 }, { "epoch": 1.8848257742171683, "learning_rate": 8.944062574069685e-07, "loss": 4.2902, "step": 43760 }, { "epoch": 1.8856872119567558, "learning_rate": 8.943577754314895e-07, "loss": 4.296, "step": 43780 }, { "epoch": 1.8865486496963433, "learning_rate": 8.943092934560107e-07, "loss": 4.2914, "step": 43800 }, { "epoch": 1.8874100874359305, "learning_rate": 8.942608114805317e-07, "loss": 4.2811, "step": 43820 }, { "epoch": 1.8882715251755178, "learning_rate": 8.942123295050528e-07, "loss": 4.4442, "step": 43840 }, { "epoch": 1.8891329629151055, "learning_rate": 8.94163847529574e-07, "loss": 4.2375, "step": 43860 }, { "epoch": 1.8899944006546927, "learning_rate": 8.941153655540952e-07, "loss": 4.202, "step": 43880 }, { "epoch": 1.89085583839428, "learning_rate": 8.940668835786162e-07, "loss": 4.2511, "step": 43900 }, { "epoch": 1.8917172761338674, "learning_rate": 8.940184016031372e-07, "loss": 4.2208, "step": 43920 }, { "epoch": 1.8925787138734549, "learning_rate": 8.939699196276584e-07, "loss": 4.4483, "step": 43940 }, { "epoch": 1.8934401516130421, "learning_rate": 8.939214376521794e-07, "loss": 4.2544, "step": 43960 }, { "epoch": 1.8943015893526294, "learning_rate": 8.938729556767006e-07, "loss": 4.3998, "step": 43980 }, { "epoch": 1.895163027092217, "learning_rate": 8.938244737012217e-07, "loss": 4.3337, "step": 44000 }, { "epoch": 1.8960244648318043, "learning_rate": 8.937759917257429e-07, "loss": 4.5991, "step": 44020 }, { "epoch": 1.8968859025713916, "learning_rate": 8.937275097502637e-07, "loss": 4.4394, "step": 44040 }, { "epoch": 1.897747340310979, "learning_rate": 8.93679027774785e-07, "loss": 4.1898, "step": 44060 }, { "epoch": 1.8986087780505665, "learning_rate": 8.936305457993061e-07, "loss": 4.2828, "step": 44080 }, { "epoch": 1.8994702157901537, "learning_rate": 8.935820638238273e-07, "loss": 4.5109, "step": 44100 }, { "epoch": 1.900331653529741, "learning_rate": 8.935335818483483e-07, "loss": 4.4199, "step": 44120 }, { "epoch": 1.9011930912693287, "learning_rate": 8.934850998728695e-07, "loss": 4.2196, "step": 44140 }, { "epoch": 1.902054529008916, "learning_rate": 8.934366178973905e-07, "loss": 4.4022, "step": 44160 }, { "epoch": 1.9029159667485032, "learning_rate": 8.933881359219117e-07, "loss": 4.2565, "step": 44180 }, { "epoch": 1.9037774044880906, "learning_rate": 8.933396539464327e-07, "loss": 4.1588, "step": 44200 }, { "epoch": 1.904638842227678, "learning_rate": 8.932911719709539e-07, "loss": 4.3023, "step": 44220 }, { "epoch": 1.9055002799672653, "learning_rate": 8.93242689995475e-07, "loss": 4.4887, "step": 44240 }, { "epoch": 1.9063617177068526, "learning_rate": 8.931942080199962e-07, "loss": 4.4413, "step": 44260 }, { "epoch": 1.90722315544644, "learning_rate": 8.931457260445172e-07, "loss": 4.549, "step": 44280 }, { "epoch": 1.9080845931860275, "learning_rate": 8.930972440690383e-07, "loss": 4.2645, "step": 44300 }, { "epoch": 1.9089460309256148, "learning_rate": 8.930487620935594e-07, "loss": 4.5101, "step": 44320 }, { "epoch": 1.9098074686652022, "learning_rate": 8.930002801180805e-07, "loss": 4.1307, "step": 44340 }, { "epoch": 1.9106689064047897, "learning_rate": 8.929517981426016e-07, "loss": 4.5143, "step": 44360 }, { "epoch": 1.911530344144377, "learning_rate": 8.929033161671227e-07, "loss": 4.3989, "step": 44380 }, { "epoch": 1.9123917818839642, "learning_rate": 8.928548341916439e-07, "loss": 4.3002, "step": 44400 }, { "epoch": 1.9132532196235517, "learning_rate": 8.928063522161649e-07, "loss": 4.4724, "step": 44420 }, { "epoch": 1.9141146573631391, "learning_rate": 8.92757870240686e-07, "loss": 4.423, "step": 44440 }, { "epoch": 1.9149760951027264, "learning_rate": 8.927093882652071e-07, "loss": 4.2459, "step": 44460 }, { "epoch": 1.9158375328423138, "learning_rate": 8.926609062897283e-07, "loss": 4.3396, "step": 44480 }, { "epoch": 1.9166989705819013, "learning_rate": 8.926124243142493e-07, "loss": 4.4372, "step": 44500 }, { "epoch": 1.9175604083214886, "learning_rate": 8.925639423387705e-07, "loss": 4.288, "step": 44520 }, { "epoch": 1.9184218460610758, "learning_rate": 8.925154603632916e-07, "loss": 4.4371, "step": 44540 }, { "epoch": 1.9192832838006633, "learning_rate": 8.924669783878125e-07, "loss": 4.1958, "step": 44560 }, { "epoch": 1.9201447215402507, "learning_rate": 8.924184964123337e-07, "loss": 4.1513, "step": 44580 }, { "epoch": 1.921006159279838, "learning_rate": 8.923700144368549e-07, "loss": 4.2007, "step": 44600 }, { "epoch": 1.9218675970194254, "learning_rate": 8.92321532461376e-07, "loss": 4.1913, "step": 44620 }, { "epoch": 1.922729034759013, "learning_rate": 8.922730504858971e-07, "loss": 4.3211, "step": 44640 }, { "epoch": 1.9235904724986002, "learning_rate": 8.922245685104183e-07, "loss": 4.3395, "step": 44660 }, { "epoch": 1.9244519102381874, "learning_rate": 8.921760865349393e-07, "loss": 4.2522, "step": 44680 }, { "epoch": 1.9253133479777749, "learning_rate": 8.921276045594604e-07, "loss": 4.279, "step": 44700 }, { "epoch": 1.9261747857173623, "learning_rate": 8.920791225839815e-07, "loss": 4.2267, "step": 44720 }, { "epoch": 1.9270362234569496, "learning_rate": 8.920306406085026e-07, "loss": 4.3767, "step": 44740 }, { "epoch": 1.927897661196537, "learning_rate": 8.919821586330238e-07, "loss": 4.0336, "step": 44760 }, { "epoch": 1.9287590989361245, "learning_rate": 8.919336766575449e-07, "loss": 4.3381, "step": 44780 }, { "epoch": 1.9296205366757118, "learning_rate": 8.918851946820659e-07, "loss": 4.218, "step": 44800 }, { "epoch": 1.930481974415299, "learning_rate": 8.91836712706587e-07, "loss": 4.251, "step": 44820 }, { "epoch": 1.9313434121548865, "learning_rate": 8.917882307311082e-07, "loss": 4.305, "step": 44840 }, { "epoch": 1.932204849894474, "learning_rate": 8.917397487556293e-07, "loss": 4.2406, "step": 44860 }, { "epoch": 1.9330662876340612, "learning_rate": 8.916912667801504e-07, "loss": 4.4796, "step": 44880 }, { "epoch": 1.9339277253736487, "learning_rate": 8.916427848046715e-07, "loss": 4.2853, "step": 44900 }, { "epoch": 1.9347891631132361, "learning_rate": 8.915943028291926e-07, "loss": 4.3997, "step": 44920 }, { "epoch": 1.9356506008528234, "learning_rate": 8.915458208537136e-07, "loss": 4.2774, "step": 44940 }, { "epoch": 1.9365120385924106, "learning_rate": 8.914973388782348e-07, "loss": 4.2732, "step": 44960 }, { "epoch": 1.937373476331998, "learning_rate": 8.914488569027559e-07, "loss": 4.4675, "step": 44980 }, { "epoch": 1.9382349140715855, "learning_rate": 8.91400374927277e-07, "loss": 4.3969, "step": 45000 }, { "epoch": 1.9390963518111728, "learning_rate": 8.913518929517981e-07, "loss": 4.3149, "step": 45020 }, { "epoch": 1.9399577895507603, "learning_rate": 8.913034109763192e-07, "loss": 4.4777, "step": 45040 }, { "epoch": 1.9408192272903477, "learning_rate": 8.912549290008403e-07, "loss": 4.426, "step": 45060 }, { "epoch": 1.941680665029935, "learning_rate": 8.912064470253614e-07, "loss": 4.2196, "step": 45080 }, { "epoch": 1.9425421027695222, "learning_rate": 8.911579650498825e-07, "loss": 4.2747, "step": 45100 }, { "epoch": 1.9434035405091097, "learning_rate": 8.911094830744036e-07, "loss": 4.2879, "step": 45120 }, { "epoch": 1.9442649782486972, "learning_rate": 8.910610010989248e-07, "loss": 4.3427, "step": 45140 }, { "epoch": 1.9451264159882844, "learning_rate": 8.910125191234459e-07, "loss": 4.3285, "step": 45160 }, { "epoch": 1.9459878537278719, "learning_rate": 8.909640371479669e-07, "loss": 4.4457, "step": 45180 }, { "epoch": 1.9468492914674593, "learning_rate": 8.90915555172488e-07, "loss": 4.5181, "step": 45200 }, { "epoch": 1.9477107292070466, "learning_rate": 8.908670731970092e-07, "loss": 4.4637, "step": 45220 }, { "epoch": 1.9485721669466338, "learning_rate": 8.908185912215302e-07, "loss": 4.2949, "step": 45240 }, { "epoch": 1.9494336046862213, "learning_rate": 8.907701092460514e-07, "loss": 4.2796, "step": 45260 }, { "epoch": 1.9502950424258088, "learning_rate": 8.907216272705725e-07, "loss": 4.3893, "step": 45280 }, { "epoch": 1.951156480165396, "learning_rate": 8.906731452950936e-07, "loss": 4.2103, "step": 45300 }, { "epoch": 1.9520179179049835, "learning_rate": 8.906246633196146e-07, "loss": 4.1402, "step": 45320 }, { "epoch": 1.952879355644571, "learning_rate": 8.905761813441358e-07, "loss": 4.3627, "step": 45340 }, { "epoch": 1.9537407933841582, "learning_rate": 8.905276993686569e-07, "loss": 4.3429, "step": 45360 }, { "epoch": 1.9546022311237454, "learning_rate": 8.904792173931781e-07, "loss": 4.4115, "step": 45380 }, { "epoch": 1.955463668863333, "learning_rate": 8.904307354176991e-07, "loss": 4.6042, "step": 45400 }, { "epoch": 1.9563251066029204, "learning_rate": 8.903822534422203e-07, "loss": 4.2902, "step": 45420 }, { "epoch": 1.9571865443425076, "learning_rate": 8.903337714667413e-07, "loss": 4.2655, "step": 45440 }, { "epoch": 1.9580479820820949, "learning_rate": 8.902852894912624e-07, "loss": 4.2639, "step": 45460 }, { "epoch": 1.9589094198216825, "learning_rate": 8.902368075157835e-07, "loss": 4.4137, "step": 45480 }, { "epoch": 1.9597708575612698, "learning_rate": 8.901883255403046e-07, "loss": 4.4555, "step": 45500 }, { "epoch": 1.960632295300857, "learning_rate": 8.901398435648258e-07, "loss": 4.2671, "step": 45520 }, { "epoch": 1.9614937330404445, "learning_rate": 8.900913615893468e-07, "loss": 4.3513, "step": 45540 }, { "epoch": 1.962355170780032, "learning_rate": 8.900428796138679e-07, "loss": 4.2389, "step": 45560 }, { "epoch": 1.9632166085196192, "learning_rate": 8.89994397638389e-07, "loss": 4.1699, "step": 45580 }, { "epoch": 1.9640780462592065, "learning_rate": 8.899459156629102e-07, "loss": 4.2811, "step": 45600 }, { "epoch": 1.9649394839987941, "learning_rate": 8.898974336874312e-07, "loss": 4.5388, "step": 45620 }, { "epoch": 1.9658009217383814, "learning_rate": 8.898489517119524e-07, "loss": 4.2654, "step": 45640 }, { "epoch": 1.9666623594779686, "learning_rate": 8.898004697364735e-07, "loss": 4.2945, "step": 45660 }, { "epoch": 1.967523797217556, "learning_rate": 8.897519877609947e-07, "loss": 4.3381, "step": 45680 }, { "epoch": 1.9683852349571436, "learning_rate": 8.897035057855156e-07, "loss": 4.3183, "step": 45700 }, { "epoch": 1.9692466726967308, "learning_rate": 8.896550238100368e-07, "loss": 4.0999, "step": 45720 }, { "epoch": 1.970108110436318, "learning_rate": 8.896065418345579e-07, "loss": 4.1506, "step": 45740 }, { "epoch": 1.9709695481759058, "learning_rate": 8.895580598590791e-07, "loss": 4.2768, "step": 45760 }, { "epoch": 1.971830985915493, "learning_rate": 8.895095778836001e-07, "loss": 4.2313, "step": 45780 }, { "epoch": 1.9726924236550802, "learning_rate": 8.894610959081213e-07, "loss": 4.1688, "step": 45800 }, { "epoch": 1.9735538613946677, "learning_rate": 8.894126139326422e-07, "loss": 4.5331, "step": 45820 }, { "epoch": 1.9744152991342552, "learning_rate": 8.893641319571635e-07, "loss": 4.1421, "step": 45840 }, { "epoch": 1.9752767368738424, "learning_rate": 8.893156499816845e-07, "loss": 4.4491, "step": 45860 }, { "epoch": 1.9761381746134297, "learning_rate": 8.892671680062057e-07, "loss": 4.1742, "step": 45880 }, { "epoch": 1.9769996123530171, "learning_rate": 8.892186860307268e-07, "loss": 4.2573, "step": 45900 }, { "epoch": 1.9778610500926046, "learning_rate": 8.89170204055248e-07, "loss": 4.3677, "step": 45920 }, { "epoch": 1.9787224878321918, "learning_rate": 8.891217220797689e-07, "loss": 4.3011, "step": 45940 }, { "epoch": 1.9795839255717793, "learning_rate": 8.890732401042901e-07, "loss": 4.4628, "step": 45960 }, { "epoch": 1.9804453633113668, "learning_rate": 8.890247581288112e-07, "loss": 4.2885, "step": 45980 }, { "epoch": 1.981306801050954, "learning_rate": 8.889762761533322e-07, "loss": 4.2826, "step": 46000 }, { "epoch": 1.9821682387905413, "learning_rate": 8.889277941778534e-07, "loss": 4.219, "step": 46020 }, { "epoch": 1.9830296765301287, "learning_rate": 8.888793122023746e-07, "loss": 4.5854, "step": 46040 }, { "epoch": 1.9838911142697162, "learning_rate": 8.888308302268957e-07, "loss": 4.2624, "step": 46060 }, { "epoch": 1.9847525520093035, "learning_rate": 8.887823482514166e-07, "loss": 4.1914, "step": 46080 }, { "epoch": 1.985613989748891, "learning_rate": 8.887338662759378e-07, "loss": 4.4736, "step": 46100 }, { "epoch": 1.9864754274884784, "learning_rate": 8.886853843004589e-07, "loss": 4.1645, "step": 46120 }, { "epoch": 1.9873368652280656, "learning_rate": 8.8863690232498e-07, "loss": 4.2593, "step": 46140 }, { "epoch": 1.9881983029676529, "learning_rate": 8.885884203495011e-07, "loss": 4.2933, "step": 46160 }, { "epoch": 1.9890597407072403, "learning_rate": 8.885399383740223e-07, "loss": 4.472, "step": 46180 }, { "epoch": 1.9899211784468278, "learning_rate": 8.884914563985433e-07, "loss": 4.5536, "step": 46200 }, { "epoch": 1.990782616186415, "learning_rate": 8.884429744230644e-07, "loss": 4.111, "step": 46220 }, { "epoch": 1.9916440539260025, "learning_rate": 8.883944924475855e-07, "loss": 4.3073, "step": 46240 }, { "epoch": 1.99250549166559, "learning_rate": 8.883460104721067e-07, "loss": 4.2787, "step": 46260 }, { "epoch": 1.9933669294051772, "learning_rate": 8.882975284966278e-07, "loss": 4.195, "step": 46280 }, { "epoch": 1.9942283671447645, "learning_rate": 8.882490465211489e-07, "loss": 4.3508, "step": 46300 }, { "epoch": 1.995089804884352, "learning_rate": 8.8820056454567e-07, "loss": 4.2283, "step": 46320 }, { "epoch": 1.9959512426239394, "learning_rate": 8.88152082570191e-07, "loss": 4.0979, "step": 46340 }, { "epoch": 1.9968126803635267, "learning_rate": 8.881036005947122e-07, "loss": 4.509, "step": 46360 }, { "epoch": 1.9976741181031141, "learning_rate": 8.880551186192333e-07, "loss": 4.3608, "step": 46380 }, { "epoch": 1.9985355558427016, "learning_rate": 8.880066366437544e-07, "loss": 4.1539, "step": 46400 }, { "epoch": 1.9993969935822888, "learning_rate": 8.879581546682756e-07, "loss": 4.287, "step": 46420 }, { "epoch": 2.000258431321876, "learning_rate": 8.879096726927968e-07, "loss": 4.2811, "step": 46440 }, { "epoch": 2.0011198690614638, "learning_rate": 8.878611907173177e-07, "loss": 4.3977, "step": 46460 }, { "epoch": 2.001981306801051, "learning_rate": 8.878127087418388e-07, "loss": 4.4591, "step": 46480 }, { "epoch": 2.0028427445406383, "learning_rate": 8.8776422676636e-07, "loss": 4.3031, "step": 46500 }, { "epoch": 2.0037041822802255, "learning_rate": 8.87715744790881e-07, "loss": 4.3416, "step": 46520 }, { "epoch": 2.004565620019813, "learning_rate": 8.876672628154021e-07, "loss": 4.2634, "step": 46540 }, { "epoch": 2.0054270577594004, "learning_rate": 8.876187808399233e-07, "loss": 4.2435, "step": 46560 }, { "epoch": 2.0062884954989877, "learning_rate": 8.875702988644444e-07, "loss": 4.444, "step": 46580 }, { "epoch": 2.0071499332385754, "learning_rate": 8.875218168889654e-07, "loss": 4.2953, "step": 46600 }, { "epoch": 2.0080113709781626, "learning_rate": 8.874733349134865e-07, "loss": 4.1916, "step": 46620 }, { "epoch": 2.00887280871775, "learning_rate": 8.874248529380077e-07, "loss": 4.208, "step": 46640 }, { "epoch": 2.009734246457337, "learning_rate": 8.873763709625289e-07, "loss": 4.3798, "step": 46660 }, { "epoch": 2.010595684196925, "learning_rate": 8.873278889870499e-07, "loss": 4.2406, "step": 46680 }, { "epoch": 2.011457121936512, "learning_rate": 8.87279407011571e-07, "loss": 4.2977, "step": 46700 }, { "epoch": 2.0123185596760993, "learning_rate": 8.872309250360921e-07, "loss": 4.1866, "step": 46720 }, { "epoch": 2.013179997415687, "learning_rate": 8.871824430606132e-07, "loss": 3.9985, "step": 46740 }, { "epoch": 2.0140414351552742, "learning_rate": 8.871339610851343e-07, "loss": 4.3315, "step": 46760 }, { "epoch": 2.0149028728948615, "learning_rate": 8.870854791096554e-07, "loss": 4.3568, "step": 46780 }, { "epoch": 2.0157643106344487, "learning_rate": 8.870369971341766e-07, "loss": 4.4983, "step": 46800 }, { "epoch": 2.0166257483740364, "learning_rate": 8.869885151586976e-07, "loss": 4.3196, "step": 46820 }, { "epoch": 2.0174871861136237, "learning_rate": 8.869400331832187e-07, "loss": 4.3064, "step": 46840 }, { "epoch": 2.018348623853211, "learning_rate": 8.868915512077398e-07, "loss": 4.2872, "step": 46860 }, { "epoch": 2.0192100615927986, "learning_rate": 8.86843069232261e-07, "loss": 4.3508, "step": 46880 }, { "epoch": 2.020071499332386, "learning_rate": 8.86794587256782e-07, "loss": 4.3117, "step": 46900 }, { "epoch": 2.020932937071973, "learning_rate": 8.867461052813032e-07, "loss": 4.3204, "step": 46920 }, { "epoch": 2.0217943748115603, "learning_rate": 8.866976233058243e-07, "loss": 4.2796, "step": 46940 }, { "epoch": 2.022655812551148, "learning_rate": 8.866491413303454e-07, "loss": 4.3953, "step": 46960 }, { "epoch": 2.0235172502907353, "learning_rate": 8.866006593548664e-07, "loss": 4.3401, "step": 46980 }, { "epoch": 2.0243786880303225, "learning_rate": 8.865521773793875e-07, "loss": 4.4807, "step": 47000 }, { "epoch": 2.02524012576991, "learning_rate": 8.865036954039087e-07, "loss": 4.2138, "step": 47020 }, { "epoch": 2.0261015635094974, "learning_rate": 8.864552134284299e-07, "loss": 4.2883, "step": 47040 }, { "epoch": 2.0269630012490847, "learning_rate": 8.864067314529509e-07, "loss": 4.353, "step": 47060 }, { "epoch": 2.027824438988672, "learning_rate": 8.86358249477472e-07, "loss": 4.291, "step": 47080 }, { "epoch": 2.0286858767282596, "learning_rate": 8.863097675019931e-07, "loss": 4.2781, "step": 47100 }, { "epoch": 2.029547314467847, "learning_rate": 8.862612855265142e-07, "loss": 4.2815, "step": 47120 }, { "epoch": 2.030408752207434, "learning_rate": 8.862128035510353e-07, "loss": 4.1737, "step": 47140 }, { "epoch": 2.031270189947022, "learning_rate": 8.861643215755564e-07, "loss": 4.1671, "step": 47160 }, { "epoch": 2.032131627686609, "learning_rate": 8.861158396000776e-07, "loss": 4.4056, "step": 47180 }, { "epoch": 2.0329930654261963, "learning_rate": 8.860673576245987e-07, "loss": 4.3964, "step": 47200 }, { "epoch": 2.0338545031657835, "learning_rate": 8.860188756491197e-07, "loss": 4.1294, "step": 47220 }, { "epoch": 2.0347159409053712, "learning_rate": 8.859703936736408e-07, "loss": 4.2129, "step": 47240 }, { "epoch": 2.0355773786449585, "learning_rate": 8.85921911698162e-07, "loss": 4.342, "step": 47260 }, { "epoch": 2.0364388163845457, "learning_rate": 8.85873429722683e-07, "loss": 4.3192, "step": 47280 }, { "epoch": 2.0373002541241334, "learning_rate": 8.858249477472042e-07, "loss": 4.4086, "step": 47300 }, { "epoch": 2.0381616918637206, "learning_rate": 8.857764657717253e-07, "loss": 4.2597, "step": 47320 }, { "epoch": 2.039023129603308, "learning_rate": 8.857279837962464e-07, "loss": 4.3538, "step": 47340 }, { "epoch": 2.039884567342895, "learning_rate": 8.856795018207674e-07, "loss": 4.3276, "step": 47360 }, { "epoch": 2.040746005082483, "learning_rate": 8.856310198452886e-07, "loss": 4.3319, "step": 47380 }, { "epoch": 2.04160744282207, "learning_rate": 8.855825378698097e-07, "loss": 4.3089, "step": 47400 }, { "epoch": 2.0424688805616573, "learning_rate": 8.855340558943309e-07, "loss": 4.4973, "step": 47420 }, { "epoch": 2.0433303183012446, "learning_rate": 8.854855739188519e-07, "loss": 4.4496, "step": 47440 }, { "epoch": 2.0441917560408323, "learning_rate": 8.854370919433731e-07, "loss": 4.395, "step": 47460 }, { "epoch": 2.0450531937804195, "learning_rate": 8.853886099678941e-07, "loss": 4.0993, "step": 47480 }, { "epoch": 2.0459146315200067, "learning_rate": 8.853401279924152e-07, "loss": 4.3092, "step": 47500 }, { "epoch": 2.0467760692595944, "learning_rate": 8.852916460169363e-07, "loss": 4.1731, "step": 47520 }, { "epoch": 2.0476375069991817, "learning_rate": 8.852431640414574e-07, "loss": 4.498, "step": 47540 }, { "epoch": 2.048498944738769, "learning_rate": 8.851946820659786e-07, "loss": 4.301, "step": 47560 }, { "epoch": 2.049360382478356, "learning_rate": 8.851462000904997e-07, "loss": 4.0986, "step": 47580 }, { "epoch": 2.050221820217944, "learning_rate": 8.850977181150206e-07, "loss": 4.3623, "step": 47600 }, { "epoch": 2.051083257957531, "learning_rate": 8.850492361395418e-07, "loss": 4.186, "step": 47620 }, { "epoch": 2.0519446956971183, "learning_rate": 8.85000754164063e-07, "loss": 4.2125, "step": 47640 }, { "epoch": 2.052806133436706, "learning_rate": 8.849522721885841e-07, "loss": 4.3799, "step": 47660 }, { "epoch": 2.0536675711762933, "learning_rate": 8.849037902131052e-07, "loss": 4.2715, "step": 47680 }, { "epoch": 2.0545290089158805, "learning_rate": 8.848553082376264e-07, "loss": 4.3085, "step": 47700 }, { "epoch": 2.0553904466554678, "learning_rate": 8.848068262621473e-07, "loss": 4.4178, "step": 47720 }, { "epoch": 2.0562518843950555, "learning_rate": 8.847583442866685e-07, "loss": 4.3978, "step": 47740 }, { "epoch": 2.0571133221346427, "learning_rate": 8.847098623111896e-07, "loss": 4.2653, "step": 47760 }, { "epoch": 2.05797475987423, "learning_rate": 8.846613803357107e-07, "loss": 4.3894, "step": 47780 }, { "epoch": 2.0588361976138176, "learning_rate": 8.846128983602318e-07, "loss": 4.1731, "step": 47800 }, { "epoch": 2.059697635353405, "learning_rate": 8.845644163847529e-07, "loss": 4.2996, "step": 47820 }, { "epoch": 2.060559073092992, "learning_rate": 8.845159344092741e-07, "loss": 4.2233, "step": 47840 }, { "epoch": 2.0614205108325794, "learning_rate": 8.844674524337951e-07, "loss": 4.3654, "step": 47860 }, { "epoch": 2.062281948572167, "learning_rate": 8.844189704583162e-07, "loss": 4.1437, "step": 47880 }, { "epoch": 2.0631433863117543, "learning_rate": 8.843704884828373e-07, "loss": 4.3644, "step": 47900 }, { "epoch": 2.0640048240513416, "learning_rate": 8.843220065073585e-07, "loss": 4.2227, "step": 47920 }, { "epoch": 2.0648662617909292, "learning_rate": 8.842735245318796e-07, "loss": 4.0454, "step": 47940 }, { "epoch": 2.0657276995305165, "learning_rate": 8.842250425564007e-07, "loss": 4.4391, "step": 47960 }, { "epoch": 2.0665891372701037, "learning_rate": 8.841765605809217e-07, "loss": 4.3234, "step": 47980 }, { "epoch": 2.067450575009691, "learning_rate": 8.841280786054429e-07, "loss": 4.2496, "step": 48000 }, { "epoch": 2.0683120127492787, "learning_rate": 8.84079596629964e-07, "loss": 4.166, "step": 48020 }, { "epoch": 2.069173450488866, "learning_rate": 8.840311146544851e-07, "loss": 4.2374, "step": 48040 }, { "epoch": 2.070034888228453, "learning_rate": 8.839826326790062e-07, "loss": 4.2214, "step": 48060 }, { "epoch": 2.070896325968041, "learning_rate": 8.839341507035273e-07, "loss": 4.177, "step": 48080 }, { "epoch": 2.071757763707628, "learning_rate": 8.838856687280484e-07, "loss": 4.1296, "step": 48100 }, { "epoch": 2.0726192014472153, "learning_rate": 8.838371867525694e-07, "loss": 4.1983, "step": 48120 }, { "epoch": 2.0734806391868026, "learning_rate": 8.837887047770906e-07, "loss": 4.2373, "step": 48140 }, { "epoch": 2.0743420769263903, "learning_rate": 8.837402228016117e-07, "loss": 4.1931, "step": 48160 }, { "epoch": 2.0752035146659775, "learning_rate": 8.836917408261328e-07, "loss": 4.0753, "step": 48180 }, { "epoch": 2.0760649524055648, "learning_rate": 8.83643258850654e-07, "loss": 4.4324, "step": 48200 }, { "epoch": 2.0769263901451525, "learning_rate": 8.835947768751752e-07, "loss": 4.2271, "step": 48220 }, { "epoch": 2.0777878278847397, "learning_rate": 8.835462948996961e-07, "loss": 4.1287, "step": 48240 }, { "epoch": 2.078649265624327, "learning_rate": 8.834978129242172e-07, "loss": 4.3173, "step": 48260 }, { "epoch": 2.079510703363914, "learning_rate": 8.834493309487383e-07, "loss": 4.1593, "step": 48280 }, { "epoch": 2.080372141103502, "learning_rate": 8.834008489732595e-07, "loss": 4.265, "step": 48300 }, { "epoch": 2.081233578843089, "learning_rate": 8.833523669977806e-07, "loss": 4.2934, "step": 48320 }, { "epoch": 2.0820950165826764, "learning_rate": 8.833038850223017e-07, "loss": 4.3757, "step": 48340 }, { "epoch": 2.082956454322264, "learning_rate": 8.832554030468227e-07, "loss": 4.3117, "step": 48360 }, { "epoch": 2.0838178920618513, "learning_rate": 8.832069210713439e-07, "loss": 4.3394, "step": 48380 }, { "epoch": 2.0846793298014386, "learning_rate": 8.831584390958649e-07, "loss": 4.2903, "step": 48400 }, { "epoch": 2.085540767541026, "learning_rate": 8.831099571203861e-07, "loss": 4.4071, "step": 48420 }, { "epoch": 2.0864022052806135, "learning_rate": 8.830614751449072e-07, "loss": 4.0438, "step": 48440 }, { "epoch": 2.0872636430202007, "learning_rate": 8.830129931694284e-07, "loss": 4.3352, "step": 48460 }, { "epoch": 2.088125080759788, "learning_rate": 8.829645111939494e-07, "loss": 4.2519, "step": 48480 }, { "epoch": 2.0889865184993757, "learning_rate": 8.829160292184705e-07, "loss": 4.3348, "step": 48500 }, { "epoch": 2.089847956238963, "learning_rate": 8.828675472429916e-07, "loss": 4.0967, "step": 48520 }, { "epoch": 2.09070939397855, "learning_rate": 8.828190652675128e-07, "loss": 4.4048, "step": 48540 }, { "epoch": 2.0915708317181374, "learning_rate": 8.827705832920338e-07, "loss": 4.379, "step": 48560 }, { "epoch": 2.092432269457725, "learning_rate": 8.82722101316555e-07, "loss": 4.3112, "step": 48580 }, { "epoch": 2.0932937071973123, "learning_rate": 8.826736193410761e-07, "loss": 4.2812, "step": 48600 }, { "epoch": 2.0941551449368996, "learning_rate": 8.82625137365597e-07, "loss": 4.2841, "step": 48620 }, { "epoch": 2.0950165826764873, "learning_rate": 8.825766553901182e-07, "loss": 4.4263, "step": 48640 }, { "epoch": 2.0958780204160745, "learning_rate": 8.825281734146394e-07, "loss": 4.4555, "step": 48660 }, { "epoch": 2.0967394581556618, "learning_rate": 8.824796914391605e-07, "loss": 4.2405, "step": 48680 }, { "epoch": 2.097600895895249, "learning_rate": 8.824312094636815e-07, "loss": 4.1133, "step": 48700 }, { "epoch": 2.0984623336348367, "learning_rate": 8.823827274882027e-07, "loss": 4.2703, "step": 48720 }, { "epoch": 2.099323771374424, "learning_rate": 8.823342455127238e-07, "loss": 4.5088, "step": 48740 }, { "epoch": 2.100185209114011, "learning_rate": 8.822857635372449e-07, "loss": 4.1103, "step": 48760 }, { "epoch": 2.1010466468535984, "learning_rate": 8.822372815617659e-07, "loss": 4.4763, "step": 48780 }, { "epoch": 2.101908084593186, "learning_rate": 8.821887995862871e-07, "loss": 4.2769, "step": 48800 }, { "epoch": 2.1027695223327734, "learning_rate": 8.821403176108082e-07, "loss": 4.4507, "step": 48820 }, { "epoch": 2.1036309600723606, "learning_rate": 8.820918356353294e-07, "loss": 4.1722, "step": 48840 }, { "epoch": 2.1044923978119483, "learning_rate": 8.820433536598504e-07, "loss": 4.1197, "step": 48860 }, { "epoch": 2.1053538355515355, "learning_rate": 8.819948716843715e-07, "loss": 4.0937, "step": 48880 }, { "epoch": 2.106215273291123, "learning_rate": 8.819463897088926e-07, "loss": 4.2682, "step": 48900 }, { "epoch": 2.10707671103071, "learning_rate": 8.818979077334138e-07, "loss": 4.2429, "step": 48920 }, { "epoch": 2.1079381487702977, "learning_rate": 8.818494257579348e-07, "loss": 4.2731, "step": 48940 }, { "epoch": 2.108799586509885, "learning_rate": 8.81800943782456e-07, "loss": 4.2936, "step": 48960 }, { "epoch": 2.109661024249472, "learning_rate": 8.817524618069771e-07, "loss": 4.3315, "step": 48980 }, { "epoch": 2.11052246198906, "learning_rate": 8.817039798314981e-07, "loss": 4.3196, "step": 49000 }, { "epoch": 2.111383899728647, "learning_rate": 8.816554978560192e-07, "loss": 4.192, "step": 49020 }, { "epoch": 2.1122453374682344, "learning_rate": 8.816070158805404e-07, "loss": 4.261, "step": 49040 }, { "epoch": 2.1131067752078216, "learning_rate": 8.815585339050615e-07, "loss": 4.3711, "step": 49060 }, { "epoch": 2.1139682129474093, "learning_rate": 8.815100519295826e-07, "loss": 4.4269, "step": 49080 }, { "epoch": 2.1148296506869966, "learning_rate": 8.814615699541037e-07, "loss": 4.4123, "step": 49100 }, { "epoch": 2.115691088426584, "learning_rate": 8.814130879786248e-07, "loss": 4.1912, "step": 49120 }, { "epoch": 2.1165525261661715, "learning_rate": 8.813646060031459e-07, "loss": 4.2693, "step": 49140 }, { "epoch": 2.1174139639057588, "learning_rate": 8.813161240276669e-07, "loss": 4.4526, "step": 49160 }, { "epoch": 2.118275401645346, "learning_rate": 8.812676420521881e-07, "loss": 4.4448, "step": 49180 }, { "epoch": 2.1191368393849332, "learning_rate": 8.812191600767093e-07, "loss": 4.1797, "step": 49200 }, { "epoch": 2.119998277124521, "learning_rate": 8.811706781012304e-07, "loss": 4.0822, "step": 49220 }, { "epoch": 2.120859714864108, "learning_rate": 8.811221961257514e-07, "loss": 4.3429, "step": 49240 }, { "epoch": 2.1217211526036954, "learning_rate": 8.810737141502725e-07, "loss": 4.2108, "step": 49260 }, { "epoch": 2.122582590343283, "learning_rate": 8.810252321747937e-07, "loss": 4.3856, "step": 49280 }, { "epoch": 2.1234440280828704, "learning_rate": 8.809767501993147e-07, "loss": 4.3599, "step": 49300 }, { "epoch": 2.1243054658224576, "learning_rate": 8.809282682238358e-07, "loss": 4.1637, "step": 49320 }, { "epoch": 2.125166903562045, "learning_rate": 8.80879786248357e-07, "loss": 4.3569, "step": 49340 }, { "epoch": 2.1260283413016325, "learning_rate": 8.808313042728781e-07, "loss": 4.312, "step": 49360 }, { "epoch": 2.12688977904122, "learning_rate": 8.80782822297399e-07, "loss": 4.2211, "step": 49380 }, { "epoch": 2.127751216780807, "learning_rate": 8.807343403219202e-07, "loss": 4.3182, "step": 49400 }, { "epoch": 2.1286126545203947, "learning_rate": 8.806858583464414e-07, "loss": 4.3647, "step": 49420 }, { "epoch": 2.129474092259982, "learning_rate": 8.806373763709625e-07, "loss": 4.1665, "step": 49440 }, { "epoch": 2.130335529999569, "learning_rate": 8.805888943954836e-07, "loss": 4.3388, "step": 49460 }, { "epoch": 2.1311969677391565, "learning_rate": 8.805404124200048e-07, "loss": 4.3251, "step": 49480 }, { "epoch": 2.132058405478744, "learning_rate": 8.804919304445258e-07, "loss": 4.3496, "step": 49500 }, { "epoch": 2.1329198432183314, "learning_rate": 8.804434484690469e-07, "loss": 4.0173, "step": 49520 }, { "epoch": 2.1337812809579186, "learning_rate": 8.80394966493568e-07, "loss": 4.291, "step": 49540 }, { "epoch": 2.1346427186975063, "learning_rate": 8.803464845180891e-07, "loss": 4.271, "step": 49560 }, { "epoch": 2.1355041564370936, "learning_rate": 8.802980025426103e-07, "loss": 4.1767, "step": 49580 }, { "epoch": 2.136365594176681, "learning_rate": 8.802495205671313e-07, "loss": 4.3604, "step": 49600 }, { "epoch": 2.137227031916268, "learning_rate": 8.802010385916525e-07, "loss": 4.2652, "step": 49620 }, { "epoch": 2.1380884696558558, "learning_rate": 8.801525566161735e-07, "loss": 4.296, "step": 49640 }, { "epoch": 2.138949907395443, "learning_rate": 8.801040746406947e-07, "loss": 4.3255, "step": 49660 }, { "epoch": 2.1398113451350302, "learning_rate": 8.800555926652157e-07, "loss": 4.2263, "step": 49680 }, { "epoch": 2.140672782874618, "learning_rate": 8.800071106897368e-07, "loss": 4.3562, "step": 49700 }, { "epoch": 2.141534220614205, "learning_rate": 8.79958628714258e-07, "loss": 4.1724, "step": 49720 }, { "epoch": 2.1423956583537924, "learning_rate": 8.799101467387792e-07, "loss": 4.3046, "step": 49740 }, { "epoch": 2.1432570960933797, "learning_rate": 8.798616647633001e-07, "loss": 4.3324, "step": 49760 }, { "epoch": 2.1441185338329674, "learning_rate": 8.798131827878212e-07, "loss": 4.106, "step": 49780 }, { "epoch": 2.1449799715725546, "learning_rate": 8.797647008123424e-07, "loss": 4.2116, "step": 49800 }, { "epoch": 2.145841409312142, "learning_rate": 8.797162188368636e-07, "loss": 4.3622, "step": 49820 }, { "epoch": 2.1467028470517295, "learning_rate": 8.796677368613846e-07, "loss": 4.2366, "step": 49840 }, { "epoch": 2.147564284791317, "learning_rate": 8.796192548859057e-07, "loss": 4.34, "step": 49860 }, { "epoch": 2.148425722530904, "learning_rate": 8.795707729104269e-07, "loss": 4.1654, "step": 49880 }, { "epoch": 2.1492871602704913, "learning_rate": 8.795222909349478e-07, "loss": 4.3001, "step": 49900 }, { "epoch": 2.150148598010079, "learning_rate": 8.79473808959469e-07, "loss": 4.2751, "step": 49920 }, { "epoch": 2.151010035749666, "learning_rate": 8.794253269839901e-07, "loss": 4.2452, "step": 49940 }, { "epoch": 2.1518714734892535, "learning_rate": 8.793768450085113e-07, "loss": 4.2826, "step": 49960 }, { "epoch": 2.152732911228841, "learning_rate": 8.793283630330323e-07, "loss": 4.0487, "step": 49980 }, { "epoch": 2.1535943489684284, "learning_rate": 8.792798810575536e-07, "loss": 4.0342, "step": 50000 }, { "epoch": 2.1544557867080156, "learning_rate": 8.792313990820745e-07, "loss": 4.3066, "step": 50020 }, { "epoch": 2.155317224447603, "learning_rate": 8.791829171065957e-07, "loss": 4.2279, "step": 50040 }, { "epoch": 2.1561786621871906, "learning_rate": 8.791344351311167e-07, "loss": 4.2122, "step": 50060 }, { "epoch": 2.157040099926778, "learning_rate": 8.790859531556379e-07, "loss": 4.2016, "step": 50080 }, { "epoch": 2.157901537666365, "learning_rate": 8.79037471180159e-07, "loss": 4.3734, "step": 50100 }, { "epoch": 2.1587629754059527, "learning_rate": 8.789889892046802e-07, "loss": 4.3605, "step": 50120 }, { "epoch": 2.15962441314554, "learning_rate": 8.789405072292011e-07, "loss": 4.125, "step": 50140 }, { "epoch": 2.1604858508851272, "learning_rate": 8.788920252537223e-07, "loss": 4.3302, "step": 50160 }, { "epoch": 2.1613472886247145, "learning_rate": 8.788435432782434e-07, "loss": 4.142, "step": 50180 }, { "epoch": 2.162208726364302, "learning_rate": 8.787950613027646e-07, "loss": 4.2163, "step": 50200 }, { "epoch": 2.1630701641038894, "learning_rate": 8.787465793272856e-07, "loss": 4.226, "step": 50220 }, { "epoch": 2.1639316018434767, "learning_rate": 8.786980973518067e-07, "loss": 4.2411, "step": 50240 }, { "epoch": 2.1647930395830643, "learning_rate": 8.786496153763279e-07, "loss": 4.4795, "step": 50260 }, { "epoch": 2.1656544773226516, "learning_rate": 8.786011334008489e-07, "loss": 4.0183, "step": 50280 }, { "epoch": 2.166515915062239, "learning_rate": 8.7855265142537e-07, "loss": 4.1343, "step": 50300 }, { "epoch": 2.167377352801826, "learning_rate": 8.785041694498912e-07, "loss": 4.1272, "step": 50320 }, { "epoch": 2.1682387905414138, "learning_rate": 8.784556874744123e-07, "loss": 4.4295, "step": 50340 }, { "epoch": 2.169100228281001, "learning_rate": 8.784072054989334e-07, "loss": 4.179, "step": 50360 }, { "epoch": 2.1699616660205883, "learning_rate": 8.783587235234545e-07, "loss": 4.4046, "step": 50380 }, { "epoch": 2.170823103760176, "learning_rate": 8.783102415479755e-07, "loss": 4.3233, "step": 50400 }, { "epoch": 2.171684541499763, "learning_rate": 8.782617595724967e-07, "loss": 4.0627, "step": 50420 }, { "epoch": 2.1725459792393504, "learning_rate": 8.782132775970177e-07, "loss": 4.2086, "step": 50440 }, { "epoch": 2.1734074169789377, "learning_rate": 8.781647956215389e-07, "loss": 4.2934, "step": 50460 }, { "epoch": 2.1742688547185254, "learning_rate": 8.7811631364606e-07, "loss": 4.3135, "step": 50480 }, { "epoch": 2.1751302924581126, "learning_rate": 8.780678316705812e-07, "loss": 4.0578, "step": 50500 }, { "epoch": 2.1759917301977, "learning_rate": 8.780193496951022e-07, "loss": 4.1289, "step": 50520 }, { "epoch": 2.1768531679372876, "learning_rate": 8.779708677196233e-07, "loss": 4.2016, "step": 50540 }, { "epoch": 2.177714605676875, "learning_rate": 8.779223857441444e-07, "loss": 4.0938, "step": 50560 }, { "epoch": 2.178576043416462, "learning_rate": 8.778739037686655e-07, "loss": 4.345, "step": 50580 }, { "epoch": 2.1794374811560493, "learning_rate": 8.778254217931866e-07, "loss": 4.3269, "step": 50600 }, { "epoch": 2.180298918895637, "learning_rate": 8.777769398177078e-07, "loss": 4.2461, "step": 50620 }, { "epoch": 2.1811603566352242, "learning_rate": 8.777284578422289e-07, "loss": 4.1303, "step": 50640 }, { "epoch": 2.1820217943748115, "learning_rate": 8.776799758667499e-07, "loss": 4.2371, "step": 50660 }, { "epoch": 2.182883232114399, "learning_rate": 8.77631493891271e-07, "loss": 4.2818, "step": 50680 }, { "epoch": 2.1837446698539864, "learning_rate": 8.775830119157922e-07, "loss": 4.5456, "step": 50700 }, { "epoch": 2.1846061075935737, "learning_rate": 8.775345299403133e-07, "loss": 4.3468, "step": 50720 }, { "epoch": 2.185467545333161, "learning_rate": 8.774860479648344e-07, "loss": 4.2847, "step": 50740 }, { "epoch": 2.1863289830727486, "learning_rate": 8.774375659893555e-07, "loss": 4.203, "step": 50760 }, { "epoch": 2.187190420812336, "learning_rate": 8.773890840138765e-07, "loss": 4.0493, "step": 50780 }, { "epoch": 2.188051858551923, "learning_rate": 8.773406020383977e-07, "loss": 4.2606, "step": 50800 }, { "epoch": 2.1889132962915103, "learning_rate": 8.772921200629188e-07, "loss": 4.1683, "step": 50820 }, { "epoch": 2.189774734031098, "learning_rate": 8.772436380874399e-07, "loss": 4.3027, "step": 50840 }, { "epoch": 2.1906361717706853, "learning_rate": 8.77195156111961e-07, "loss": 4.2621, "step": 50860 }, { "epoch": 2.1914976095102725, "learning_rate": 8.771466741364821e-07, "loss": 4.4363, "step": 50880 }, { "epoch": 2.19235904724986, "learning_rate": 8.770981921610032e-07, "loss": 4.4562, "step": 50900 }, { "epoch": 2.1932204849894474, "learning_rate": 8.770497101855243e-07, "loss": 4.2564, "step": 50920 }, { "epoch": 2.1940819227290347, "learning_rate": 8.770012282100454e-07, "loss": 4.2429, "step": 50940 }, { "epoch": 2.194943360468622, "learning_rate": 8.769527462345665e-07, "loss": 4.3255, "step": 50960 }, { "epoch": 2.1958047982082096, "learning_rate": 8.769042642590876e-07, "loss": 4.2201, "step": 50980 }, { "epoch": 2.196666235947797, "learning_rate": 8.768557822836088e-07, "loss": 4.3356, "step": 51000 }, { "epoch": 2.197527673687384, "learning_rate": 8.768073003081299e-07, "loss": 4.019, "step": 51020 }, { "epoch": 2.198389111426972, "learning_rate": 8.767588183326509e-07, "loss": 4.3707, "step": 51040 }, { "epoch": 2.199250549166559, "learning_rate": 8.76710336357172e-07, "loss": 4.2873, "step": 51060 }, { "epoch": 2.2001119869061463, "learning_rate": 8.766618543816932e-07, "loss": 4.4412, "step": 51080 }, { "epoch": 2.2009734246457335, "learning_rate": 8.766133724062143e-07, "loss": 4.2582, "step": 51100 }, { "epoch": 2.2018348623853212, "learning_rate": 8.765648904307354e-07, "loss": 4.1803, "step": 51120 }, { "epoch": 2.2026963001249085, "learning_rate": 8.765164084552565e-07, "loss": 4.3579, "step": 51140 }, { "epoch": 2.2035577378644957, "learning_rate": 8.764679264797775e-07, "loss": 4.3487, "step": 51160 }, { "epoch": 2.2044191756040834, "learning_rate": 8.764194445042986e-07, "loss": 4.2576, "step": 51180 }, { "epoch": 2.2052806133436706, "learning_rate": 8.763709625288198e-07, "loss": 4.1051, "step": 51200 }, { "epoch": 2.206142051083258, "learning_rate": 8.763224805533409e-07, "loss": 4.1606, "step": 51220 }, { "epoch": 2.207003488822845, "learning_rate": 8.762739985778621e-07, "loss": 4.594, "step": 51240 }, { "epoch": 2.207864926562433, "learning_rate": 8.762255166023832e-07, "loss": 4.1487, "step": 51260 }, { "epoch": 2.20872636430202, "learning_rate": 8.761770346269042e-07, "loss": 4.1725, "step": 51280 }, { "epoch": 2.2095878020416073, "learning_rate": 8.761285526514253e-07, "loss": 4.1257, "step": 51300 }, { "epoch": 2.210449239781195, "learning_rate": 8.760800706759464e-07, "loss": 4.2533, "step": 51320 }, { "epoch": 2.2113106775207823, "learning_rate": 8.760315887004675e-07, "loss": 4.1158, "step": 51340 }, { "epoch": 2.2121721152603695, "learning_rate": 8.759831067249887e-07, "loss": 4.4128, "step": 51360 }, { "epoch": 2.2130335529999567, "learning_rate": 8.759346247495098e-07, "loss": 4.2981, "step": 51380 }, { "epoch": 2.2138949907395444, "learning_rate": 8.758861427740309e-07, "loss": 4.2351, "step": 51400 }, { "epoch": 2.2147564284791317, "learning_rate": 8.758376607985519e-07, "loss": 4.1967, "step": 51420 }, { "epoch": 2.215617866218719, "learning_rate": 8.757891788230731e-07, "loss": 4.1482, "step": 51440 }, { "epoch": 2.2164793039583066, "learning_rate": 8.757406968475942e-07, "loss": 4.2317, "step": 51460 }, { "epoch": 2.217340741697894, "learning_rate": 8.756922148721152e-07, "loss": 4.2784, "step": 51480 }, { "epoch": 2.218202179437481, "learning_rate": 8.756437328966364e-07, "loss": 4.3519, "step": 51500 }, { "epoch": 2.2190636171770683, "learning_rate": 8.755952509211575e-07, "loss": 4.3672, "step": 51520 }, { "epoch": 2.219925054916656, "learning_rate": 8.755467689456786e-07, "loss": 4.1288, "step": 51540 }, { "epoch": 2.2207864926562433, "learning_rate": 8.754982869701996e-07, "loss": 4.4207, "step": 51560 }, { "epoch": 2.2216479303958305, "learning_rate": 8.754498049947208e-07, "loss": 4.5234, "step": 51580 }, { "epoch": 2.222509368135418, "learning_rate": 8.754013230192419e-07, "loss": 4.2745, "step": 51600 }, { "epoch": 2.2233708058750055, "learning_rate": 8.753528410437631e-07, "loss": 4.2628, "step": 51620 }, { "epoch": 2.2242322436145927, "learning_rate": 8.753043590682841e-07, "loss": 4.2123, "step": 51640 }, { "epoch": 2.22509368135418, "learning_rate": 8.752558770928053e-07, "loss": 4.3077, "step": 51660 }, { "epoch": 2.2259551190937676, "learning_rate": 8.752073951173263e-07, "loss": 4.245, "step": 51680 }, { "epoch": 2.226816556833355, "learning_rate": 8.751589131418475e-07, "loss": 4.2597, "step": 51700 }, { "epoch": 2.227677994572942, "learning_rate": 8.751104311663685e-07, "loss": 4.4867, "step": 51720 }, { "epoch": 2.2285394323125294, "learning_rate": 8.750619491908897e-07, "loss": 4.5026, "step": 51740 }, { "epoch": 2.229400870052117, "learning_rate": 8.750134672154108e-07, "loss": 4.2651, "step": 51760 }, { "epoch": 2.2302623077917043, "learning_rate": 8.749649852399321e-07, "loss": 4.3774, "step": 51780 }, { "epoch": 2.2311237455312916, "learning_rate": 8.749165032644529e-07, "loss": 4.2054, "step": 51800 }, { "epoch": 2.2319851832708792, "learning_rate": 8.748680212889741e-07, "loss": 4.3723, "step": 51820 }, { "epoch": 2.2328466210104665, "learning_rate": 8.748195393134952e-07, "loss": 4.2458, "step": 51840 }, { "epoch": 2.2337080587500537, "learning_rate": 8.747710573380162e-07, "loss": 4.1491, "step": 51860 }, { "epoch": 2.234569496489641, "learning_rate": 8.747225753625374e-07, "loss": 4.0953, "step": 51880 }, { "epoch": 2.2354309342292287, "learning_rate": 8.746740933870586e-07, "loss": 4.2957, "step": 51900 }, { "epoch": 2.236292371968816, "learning_rate": 8.746256114115796e-07, "loss": 4.0886, "step": 51920 }, { "epoch": 2.237153809708403, "learning_rate": 8.745771294361006e-07, "loss": 4.2769, "step": 51940 }, { "epoch": 2.238015247447991, "learning_rate": 8.745286474606218e-07, "loss": 4.2706, "step": 51960 }, { "epoch": 2.238876685187578, "learning_rate": 8.74480165485143e-07, "loss": 4.2817, "step": 51980 }, { "epoch": 2.2397381229271653, "learning_rate": 8.744316835096641e-07, "loss": 4.1763, "step": 52000 }, { "epoch": 2.2405995606667526, "learning_rate": 8.743832015341851e-07, "loss": 4.3284, "step": 52020 }, { "epoch": 2.2414609984063403, "learning_rate": 8.743347195587063e-07, "loss": 4.3196, "step": 52040 }, { "epoch": 2.2423224361459275, "learning_rate": 8.742862375832273e-07, "loss": 4.4508, "step": 52060 }, { "epoch": 2.2431838738855148, "learning_rate": 8.742377556077485e-07, "loss": 4.0581, "step": 52080 }, { "epoch": 2.2440453116251025, "learning_rate": 8.741892736322696e-07, "loss": 4.3669, "step": 52100 }, { "epoch": 2.2449067493646897, "learning_rate": 8.741407916567907e-07, "loss": 4.1813, "step": 52120 }, { "epoch": 2.245768187104277, "learning_rate": 8.740923096813118e-07, "loss": 4.2587, "step": 52140 }, { "epoch": 2.246629624843864, "learning_rate": 8.74043827705833e-07, "loss": 4.1264, "step": 52160 }, { "epoch": 2.247491062583452, "learning_rate": 8.739953457303539e-07, "loss": 4.2917, "step": 52180 }, { "epoch": 2.248352500323039, "learning_rate": 8.739468637548751e-07, "loss": 4.3568, "step": 52200 }, { "epoch": 2.2492139380626264, "learning_rate": 8.738983817793962e-07, "loss": 4.3348, "step": 52220 }, { "epoch": 2.250075375802214, "learning_rate": 8.738498998039173e-07, "loss": 4.3902, "step": 52240 }, { "epoch": 2.2509368135418013, "learning_rate": 8.738014178284384e-07, "loss": 4.1351, "step": 52260 }, { "epoch": 2.2517982512813886, "learning_rate": 8.737529358529596e-07, "loss": 4.3549, "step": 52280 }, { "epoch": 2.252659689020976, "learning_rate": 8.737044538774807e-07, "loss": 4.2185, "step": 52300 }, { "epoch": 2.2535211267605635, "learning_rate": 8.736559719020016e-07, "loss": 4.0571, "step": 52320 }, { "epoch": 2.2543825645001507, "learning_rate": 8.736074899265228e-07, "loss": 4.1322, "step": 52340 }, { "epoch": 2.255244002239738, "learning_rate": 8.73559007951044e-07, "loss": 4.1068, "step": 52360 }, { "epoch": 2.2561054399793257, "learning_rate": 8.73510525975565e-07, "loss": 4.4032, "step": 52380 }, { "epoch": 2.256966877718913, "learning_rate": 8.734620440000861e-07, "loss": 4.2477, "step": 52400 }, { "epoch": 2.2578283154585, "learning_rate": 8.734135620246073e-07, "loss": 4.3231, "step": 52420 }, { "epoch": 2.2586897531980874, "learning_rate": 8.733650800491284e-07, "loss": 4.1685, "step": 52440 }, { "epoch": 2.259551190937675, "learning_rate": 8.733165980736494e-07, "loss": 4.3216, "step": 52460 }, { "epoch": 2.2604126286772623, "learning_rate": 8.732681160981705e-07, "loss": 4.227, "step": 52480 }, { "epoch": 2.2612740664168496, "learning_rate": 8.732196341226917e-07, "loss": 4.244, "step": 52500 }, { "epoch": 2.2621355041564373, "learning_rate": 8.731711521472129e-07, "loss": 4.3651, "step": 52520 }, { "epoch": 2.2629969418960245, "learning_rate": 8.731226701717339e-07, "loss": 4.4109, "step": 52540 }, { "epoch": 2.2638583796356118, "learning_rate": 8.730741881962549e-07, "loss": 4.2073, "step": 52560 }, { "epoch": 2.264719817375199, "learning_rate": 8.730257062207761e-07, "loss": 4.3845, "step": 52580 }, { "epoch": 2.2655812551147867, "learning_rate": 8.729772242452972e-07, "loss": 4.3746, "step": 52600 }, { "epoch": 2.266442692854374, "learning_rate": 8.729287422698183e-07, "loss": 4.0557, "step": 52620 }, { "epoch": 2.267304130593961, "learning_rate": 8.728802602943394e-07, "loss": 4.3143, "step": 52640 }, { "epoch": 2.268165568333549, "learning_rate": 8.728317783188606e-07, "loss": 3.9698, "step": 52660 }, { "epoch": 2.269027006073136, "learning_rate": 8.727832963433817e-07, "loss": 4.1787, "step": 52680 }, { "epoch": 2.2698884438127234, "learning_rate": 8.727348143679027e-07, "loss": 4.294, "step": 52700 }, { "epoch": 2.2707498815523106, "learning_rate": 8.726863323924238e-07, "loss": 4.1294, "step": 52720 }, { "epoch": 2.2716113192918983, "learning_rate": 8.72637850416945e-07, "loss": 4.3712, "step": 52740 }, { "epoch": 2.2724727570314855, "learning_rate": 8.72589368441466e-07, "loss": 4.4626, "step": 52760 }, { "epoch": 2.273334194771073, "learning_rate": 8.725408864659872e-07, "loss": 4.1864, "step": 52780 }, { "epoch": 2.2741956325106605, "learning_rate": 8.724924044905083e-07, "loss": 4.0382, "step": 52800 }, { "epoch": 2.2750570702502477, "learning_rate": 8.724439225150294e-07, "loss": 4.1505, "step": 52820 }, { "epoch": 2.275918507989835, "learning_rate": 8.723954405395504e-07, "loss": 4.3047, "step": 52840 }, { "epoch": 2.276779945729422, "learning_rate": 8.723469585640715e-07, "loss": 4.4541, "step": 52860 }, { "epoch": 2.27764138346901, "learning_rate": 8.722984765885927e-07, "loss": 4.1365, "step": 52880 }, { "epoch": 2.278502821208597, "learning_rate": 8.722499946131139e-07, "loss": 4.2079, "step": 52900 }, { "epoch": 2.2793642589481844, "learning_rate": 8.722015126376349e-07, "loss": 4.2184, "step": 52920 }, { "epoch": 2.280225696687772, "learning_rate": 8.721530306621559e-07, "loss": 4.298, "step": 52940 }, { "epoch": 2.2810871344273593, "learning_rate": 8.721045486866771e-07, "loss": 4.2712, "step": 52960 }, { "epoch": 2.2819485721669466, "learning_rate": 8.720560667111983e-07, "loss": 4.2324, "step": 52980 }, { "epoch": 2.282810009906534, "learning_rate": 8.720075847357193e-07, "loss": 4.2026, "step": 53000 }, { "epoch": 2.2836714476461215, "learning_rate": 8.719591027602404e-07, "loss": 4.2496, "step": 53020 }, { "epoch": 2.2845328853857088, "learning_rate": 8.719106207847617e-07, "loss": 4.337, "step": 53040 }, { "epoch": 2.285394323125296, "learning_rate": 8.718621388092826e-07, "loss": 4.2557, "step": 53060 }, { "epoch": 2.2862557608648837, "learning_rate": 8.718136568338037e-07, "loss": 4.2969, "step": 53080 }, { "epoch": 2.287117198604471, "learning_rate": 8.717651748583248e-07, "loss": 4.4096, "step": 53100 }, { "epoch": 2.287978636344058, "learning_rate": 8.71716692882846e-07, "loss": 4.312, "step": 53120 }, { "epoch": 2.2888400740836454, "learning_rate": 8.71668210907367e-07, "loss": 4.2076, "step": 53140 }, { "epoch": 2.289701511823233, "learning_rate": 8.716197289318882e-07, "loss": 4.1274, "step": 53160 }, { "epoch": 2.2905629495628204, "learning_rate": 8.715712469564093e-07, "loss": 4.2475, "step": 53180 }, { "epoch": 2.2914243873024076, "learning_rate": 8.715227649809304e-07, "loss": 4.1663, "step": 53200 }, { "epoch": 2.2922858250419953, "learning_rate": 8.714742830054514e-07, "loss": 4.2687, "step": 53220 }, { "epoch": 2.2931472627815825, "learning_rate": 8.714258010299726e-07, "loss": 4.1586, "step": 53240 }, { "epoch": 2.29400870052117, "learning_rate": 8.713773190544937e-07, "loss": 4.2607, "step": 53260 }, { "epoch": 2.294870138260757, "learning_rate": 8.713288370790149e-07, "loss": 4.2607, "step": 53280 }, { "epoch": 2.2957315760003447, "learning_rate": 8.712803551035359e-07, "loss": 4.1282, "step": 53300 }, { "epoch": 2.296593013739932, "learning_rate": 8.71231873128057e-07, "loss": 4.0689, "step": 53320 }, { "epoch": 2.297454451479519, "learning_rate": 8.711833911525781e-07, "loss": 4.3091, "step": 53340 }, { "epoch": 2.298315889219107, "learning_rate": 8.711349091770992e-07, "loss": 4.1475, "step": 53360 }, { "epoch": 2.299177326958694, "learning_rate": 8.710864272016203e-07, "loss": 4.1784, "step": 53380 }, { "epoch": 2.3000387646982814, "learning_rate": 8.710379452261414e-07, "loss": 4.1835, "step": 53400 }, { "epoch": 2.3009002024378686, "learning_rate": 8.709894632506626e-07, "loss": 4.1533, "step": 53420 }, { "epoch": 2.3017616401774563, "learning_rate": 8.709409812751837e-07, "loss": 4.2843, "step": 53440 }, { "epoch": 2.3026230779170436, "learning_rate": 8.708924992997047e-07, "loss": 4.1374, "step": 53460 }, { "epoch": 2.303484515656631, "learning_rate": 8.708440173242258e-07, "loss": 4.1059, "step": 53480 }, { "epoch": 2.3043459533962185, "learning_rate": 8.70795535348747e-07, "loss": 4.1689, "step": 53500 }, { "epoch": 2.3052073911358058, "learning_rate": 8.707470533732681e-07, "loss": 4.3381, "step": 53520 }, { "epoch": 2.306068828875393, "learning_rate": 8.706985713977892e-07, "loss": 4.0695, "step": 53540 }, { "epoch": 2.3069302666149802, "learning_rate": 8.706500894223103e-07, "loss": 4.3566, "step": 53560 }, { "epoch": 2.307791704354568, "learning_rate": 8.706016074468314e-07, "loss": 4.1611, "step": 53580 }, { "epoch": 2.308653142094155, "learning_rate": 8.705531254713525e-07, "loss": 4.2708, "step": 53600 }, { "epoch": 2.3095145798337424, "learning_rate": 8.705046434958736e-07, "loss": 4.2761, "step": 53620 }, { "epoch": 2.31037601757333, "learning_rate": 8.704561615203947e-07, "loss": 4.2535, "step": 53640 }, { "epoch": 2.3112374553129174, "learning_rate": 8.704076795449159e-07, "loss": 4.331, "step": 53660 }, { "epoch": 2.3120988930525046, "learning_rate": 8.703591975694369e-07, "loss": 4.2145, "step": 53680 }, { "epoch": 2.312960330792092, "learning_rate": 8.70310715593958e-07, "loss": 4.162, "step": 53700 }, { "epoch": 2.3138217685316795, "learning_rate": 8.702622336184791e-07, "loss": 4.1911, "step": 53720 }, { "epoch": 2.314683206271267, "learning_rate": 8.702137516430002e-07, "loss": 4.4483, "step": 53740 }, { "epoch": 2.315544644010854, "learning_rate": 8.701652696675213e-07, "loss": 4.1958, "step": 53760 }, { "epoch": 2.3164060817504417, "learning_rate": 8.701167876920425e-07, "loss": 4.3123, "step": 53780 }, { "epoch": 2.317267519490029, "learning_rate": 8.700683057165636e-07, "loss": 4.251, "step": 53800 }, { "epoch": 2.318128957229616, "learning_rate": 8.700198237410847e-07, "loss": 4.3, "step": 53820 }, { "epoch": 2.3189903949692035, "learning_rate": 8.699713417656057e-07, "loss": 4.327, "step": 53840 }, { "epoch": 2.319851832708791, "learning_rate": 8.699228597901269e-07, "loss": 4.5191, "step": 53860 }, { "epoch": 2.3207132704483784, "learning_rate": 8.698743778146481e-07, "loss": 4.0079, "step": 53880 }, { "epoch": 2.3215747081879656, "learning_rate": 8.698258958391691e-07, "loss": 4.1226, "step": 53900 }, { "epoch": 2.3224361459275533, "learning_rate": 8.697774138636902e-07, "loss": 4.2492, "step": 53920 }, { "epoch": 2.3232975836671406, "learning_rate": 8.697289318882113e-07, "loss": 4.2787, "step": 53940 }, { "epoch": 2.324159021406728, "learning_rate": 8.696804499127323e-07, "loss": 4.1966, "step": 53960 }, { "epoch": 2.325020459146315, "learning_rate": 8.696319679372535e-07, "loss": 4.2201, "step": 53980 }, { "epoch": 2.3258818968859027, "learning_rate": 8.695834859617746e-07, "loss": 4.2533, "step": 54000 }, { "epoch": 2.32674333462549, "learning_rate": 8.695350039862957e-07, "loss": 4.2268, "step": 54020 }, { "epoch": 2.3276047723650772, "learning_rate": 8.694865220108168e-07, "loss": 4.2416, "step": 54040 }, { "epoch": 2.328466210104665, "learning_rate": 8.69438040035338e-07, "loss": 4.1719, "step": 54060 }, { "epoch": 2.329327647844252, "learning_rate": 8.693895580598591e-07, "loss": 4.4017, "step": 54080 }, { "epoch": 2.3301890855838394, "learning_rate": 8.693410760843801e-07, "loss": 4.3052, "step": 54100 }, { "epoch": 2.3310505233234267, "learning_rate": 8.692925941089012e-07, "loss": 4.1228, "step": 54120 }, { "epoch": 2.3319119610630143, "learning_rate": 8.692441121334224e-07, "loss": 4.2331, "step": 54140 }, { "epoch": 2.3327733988026016, "learning_rate": 8.691956301579435e-07, "loss": 4.055, "step": 54160 }, { "epoch": 2.333634836542189, "learning_rate": 8.691471481824646e-07, "loss": 4.3784, "step": 54180 }, { "epoch": 2.3344962742817765, "learning_rate": 8.690986662069857e-07, "loss": 4.362, "step": 54200 }, { "epoch": 2.3353577120213638, "learning_rate": 8.690501842315067e-07, "loss": 4.2607, "step": 54220 }, { "epoch": 2.336219149760951, "learning_rate": 8.690017022560279e-07, "loss": 4.4963, "step": 54240 }, { "epoch": 2.3370805875005383, "learning_rate": 8.68953220280549e-07, "loss": 4.2517, "step": 54260 }, { "epoch": 2.337942025240126, "learning_rate": 8.689047383050701e-07, "loss": 4.3148, "step": 54280 }, { "epoch": 2.338803462979713, "learning_rate": 8.688562563295913e-07, "loss": 4.2369, "step": 54300 }, { "epoch": 2.3396649007193004, "learning_rate": 8.688077743541124e-07, "loss": 4.2441, "step": 54320 }, { "epoch": 2.3405263384588877, "learning_rate": 8.687592923786333e-07, "loss": 4.0943, "step": 54340 }, { "epoch": 2.3413877761984754, "learning_rate": 8.687108104031545e-07, "loss": 4.3473, "step": 54360 }, { "epoch": 2.3422492139380626, "learning_rate": 8.686623284276756e-07, "loss": 4.2992, "step": 54380 }, { "epoch": 2.34311065167765, "learning_rate": 8.686138464521968e-07, "loss": 4.2246, "step": 54400 }, { "epoch": 2.3439720894172376, "learning_rate": 8.685653644767178e-07, "loss": 4.4738, "step": 54420 }, { "epoch": 2.344833527156825, "learning_rate": 8.68516882501239e-07, "loss": 4.1206, "step": 54440 }, { "epoch": 2.345694964896412, "learning_rate": 8.684684005257601e-07, "loss": 4.3221, "step": 54460 }, { "epoch": 2.3465564026359993, "learning_rate": 8.68419918550281e-07, "loss": 4.277, "step": 54480 }, { "epoch": 2.347417840375587, "learning_rate": 8.683714365748022e-07, "loss": 4.3785, "step": 54500 }, { "epoch": 2.3482792781151742, "learning_rate": 8.683229545993234e-07, "loss": 4.381, "step": 54520 }, { "epoch": 2.3491407158547615, "learning_rate": 8.682744726238445e-07, "loss": 4.4255, "step": 54540 }, { "epoch": 2.3500021535943487, "learning_rate": 8.682259906483656e-07, "loss": 4.1823, "step": 54560 }, { "epoch": 2.3508635913339364, "learning_rate": 8.681775086728867e-07, "loss": 4.3709, "step": 54580 }, { "epoch": 2.3517250290735237, "learning_rate": 8.681290266974078e-07, "loss": 4.1439, "step": 54600 }, { "epoch": 2.352586466813111, "learning_rate": 8.680805447219289e-07, "loss": 4.4158, "step": 54620 }, { "epoch": 2.3534479045526986, "learning_rate": 8.680320627464499e-07, "loss": 4.2438, "step": 54640 }, { "epoch": 2.354309342292286, "learning_rate": 8.679835807709711e-07, "loss": 4.1635, "step": 54660 }, { "epoch": 2.355170780031873, "learning_rate": 8.679350987954923e-07, "loss": 4.1807, "step": 54680 }, { "epoch": 2.3560322177714603, "learning_rate": 8.678866168200134e-07, "loss": 4.311, "step": 54700 }, { "epoch": 2.356893655511048, "learning_rate": 8.678381348445343e-07, "loss": 4.1099, "step": 54720 }, { "epoch": 2.3577550932506353, "learning_rate": 8.677896528690555e-07, "loss": 4.1695, "step": 54740 }, { "epoch": 2.3586165309902225, "learning_rate": 8.677411708935766e-07, "loss": 4.2379, "step": 54760 }, { "epoch": 2.35947796872981, "learning_rate": 8.676926889180978e-07, "loss": 4.2587, "step": 54780 }, { "epoch": 2.3603394064693974, "learning_rate": 8.676442069426188e-07, "loss": 4.1083, "step": 54800 }, { "epoch": 2.3612008442089847, "learning_rate": 8.675957249671401e-07, "loss": 4.0596, "step": 54820 }, { "epoch": 2.362062281948572, "learning_rate": 8.675472429916611e-07, "loss": 3.8836, "step": 54840 }, { "epoch": 2.3629237196881596, "learning_rate": 8.674987610161822e-07, "loss": 4.306, "step": 54860 }, { "epoch": 2.363785157427747, "learning_rate": 8.674502790407032e-07, "loss": 4.215, "step": 54880 }, { "epoch": 2.364646595167334, "learning_rate": 8.674017970652244e-07, "loss": 4.0287, "step": 54900 }, { "epoch": 2.365508032906922, "learning_rate": 8.673533150897455e-07, "loss": 4.4671, "step": 54920 }, { "epoch": 2.366369470646509, "learning_rate": 8.673048331142666e-07, "loss": 4.3428, "step": 54940 }, { "epoch": 2.3672309083860963, "learning_rate": 8.672563511387877e-07, "loss": 4.0316, "step": 54960 }, { "epoch": 2.3680923461256835, "learning_rate": 8.672078691633088e-07, "loss": 4.2033, "step": 54980 }, { "epoch": 2.3689537838652712, "learning_rate": 8.671593871878299e-07, "loss": 4.3097, "step": 55000 }, { "epoch": 2.3698152216048585, "learning_rate": 8.671109052123509e-07, "loss": 4.1758, "step": 55020 }, { "epoch": 2.3706766593444457, "learning_rate": 8.670624232368721e-07, "loss": 4.5582, "step": 55040 }, { "epoch": 2.3715380970840334, "learning_rate": 8.670139412613933e-07, "loss": 4.3608, "step": 55060 }, { "epoch": 2.3723995348236206, "learning_rate": 8.669654592859144e-07, "loss": 4.3383, "step": 55080 }, { "epoch": 2.373260972563208, "learning_rate": 8.669169773104354e-07, "loss": 4.1525, "step": 55100 }, { "epoch": 2.374122410302795, "learning_rate": 8.668684953349565e-07, "loss": 4.1372, "step": 55120 }, { "epoch": 2.374983848042383, "learning_rate": 8.668200133594777e-07, "loss": 4.3853, "step": 55140 }, { "epoch": 2.37584528578197, "learning_rate": 8.667715313839988e-07, "loss": 4.1832, "step": 55160 }, { "epoch": 2.3767067235215573, "learning_rate": 8.667230494085198e-07, "loss": 4.3249, "step": 55180 }, { "epoch": 2.377568161261145, "learning_rate": 8.66674567433041e-07, "loss": 4.231, "step": 55200 }, { "epoch": 2.3784295990007323, "learning_rate": 8.666260854575622e-07, "loss": 4.3996, "step": 55220 }, { "epoch": 2.3792910367403195, "learning_rate": 8.665776034820831e-07, "loss": 4.3011, "step": 55240 }, { "epoch": 2.3801524744799067, "learning_rate": 8.665291215066042e-07, "loss": 4.1059, "step": 55260 }, { "epoch": 2.3810139122194944, "learning_rate": 8.664806395311254e-07, "loss": 4.1404, "step": 55280 }, { "epoch": 2.3818753499590817, "learning_rate": 8.664321575556465e-07, "loss": 4.2836, "step": 55300 }, { "epoch": 2.382736787698669, "learning_rate": 8.663836755801676e-07, "loss": 4.2736, "step": 55320 }, { "epoch": 2.3835982254382566, "learning_rate": 8.663351936046887e-07, "loss": 4.0531, "step": 55340 }, { "epoch": 2.384459663177844, "learning_rate": 8.662867116292098e-07, "loss": 4.2524, "step": 55360 }, { "epoch": 2.385321100917431, "learning_rate": 8.662382296537309e-07, "loss": 4.307, "step": 55380 }, { "epoch": 2.3861825386570183, "learning_rate": 8.66189747678252e-07, "loss": 4.2786, "step": 55400 }, { "epoch": 2.387043976396606, "learning_rate": 8.661412657027731e-07, "loss": 3.9889, "step": 55420 }, { "epoch": 2.3879054141361933, "learning_rate": 8.660927837272943e-07, "loss": 4.381, "step": 55440 }, { "epoch": 2.3887668518757805, "learning_rate": 8.660443017518154e-07, "loss": 4.3136, "step": 55460 }, { "epoch": 2.389628289615368, "learning_rate": 8.659958197763364e-07, "loss": 4.1068, "step": 55480 }, { "epoch": 2.3904897273549555, "learning_rate": 8.659473378008575e-07, "loss": 4.2248, "step": 55500 }, { "epoch": 2.3913511650945427, "learning_rate": 8.658988558253787e-07, "loss": 3.9636, "step": 55520 }, { "epoch": 2.39221260283413, "learning_rate": 8.658503738498997e-07, "loss": 4.3415, "step": 55540 }, { "epoch": 2.3930740405737176, "learning_rate": 8.658018918744207e-07, "loss": 4.2074, "step": 55560 }, { "epoch": 2.393935478313305, "learning_rate": 8.65753409898942e-07, "loss": 4.472, "step": 55580 }, { "epoch": 2.394796916052892, "learning_rate": 8.657049279234632e-07, "loss": 4.221, "step": 55600 }, { "epoch": 2.39565835379248, "learning_rate": 8.656564459479841e-07, "loss": 4.027, "step": 55620 }, { "epoch": 2.396519791532067, "learning_rate": 8.656079639725052e-07, "loss": 4.1817, "step": 55640 }, { "epoch": 2.3973812292716543, "learning_rate": 8.655594819970265e-07, "loss": 3.85, "step": 55660 }, { "epoch": 2.3982426670112416, "learning_rate": 8.655110000215476e-07, "loss": 4.1537, "step": 55680 }, { "epoch": 2.3991041047508292, "learning_rate": 8.654625180460686e-07, "loss": 4.1673, "step": 55700 }, { "epoch": 2.3999655424904165, "learning_rate": 8.654140360705897e-07, "loss": 4.3035, "step": 55720 }, { "epoch": 2.4008269802300037, "learning_rate": 8.653655540951108e-07, "loss": 4.3662, "step": 55740 }, { "epoch": 2.4016884179695914, "learning_rate": 8.65317072119632e-07, "loss": 4.3216, "step": 55760 }, { "epoch": 2.4025498557091787, "learning_rate": 8.65268590144153e-07, "loss": 4.1805, "step": 55780 }, { "epoch": 2.403411293448766, "learning_rate": 8.652201081686741e-07, "loss": 4.1321, "step": 55800 }, { "epoch": 2.404272731188353, "learning_rate": 8.651716261931953e-07, "loss": 4.3409, "step": 55820 }, { "epoch": 2.405134168927941, "learning_rate": 8.651231442177163e-07, "loss": 4.2178, "step": 55840 }, { "epoch": 2.405995606667528, "learning_rate": 8.650746622422375e-07, "loss": 4.1659, "step": 55860 }, { "epoch": 2.4068570444071153, "learning_rate": 8.650261802667585e-07, "loss": 3.9914, "step": 55880 }, { "epoch": 2.407718482146703, "learning_rate": 8.649776982912797e-07, "loss": 4.1824, "step": 55900 }, { "epoch": 2.4085799198862903, "learning_rate": 8.649292163158007e-07, "loss": 4.2067, "step": 55920 }, { "epoch": 2.4094413576258775, "learning_rate": 8.648807343403219e-07, "loss": 4.3716, "step": 55940 }, { "epoch": 2.4103027953654648, "learning_rate": 8.64832252364843e-07, "loss": 4.183, "step": 55960 }, { "epoch": 2.4111642331050525, "learning_rate": 8.647837703893642e-07, "loss": 4.3275, "step": 55980 }, { "epoch": 2.4120256708446397, "learning_rate": 8.647352884138851e-07, "loss": 4.0832, "step": 56000 }, { "epoch": 2.412887108584227, "learning_rate": 8.646868064384063e-07, "loss": 4.0189, "step": 56020 }, { "epoch": 2.4137485463238146, "learning_rate": 8.646383244629274e-07, "loss": 4.3236, "step": 56040 }, { "epoch": 2.414609984063402, "learning_rate": 8.645898424874486e-07, "loss": 4.2682, "step": 56060 }, { "epoch": 2.415471421802989, "learning_rate": 8.645413605119695e-07, "loss": 4.2826, "step": 56080 }, { "epoch": 2.4163328595425764, "learning_rate": 8.644928785364907e-07, "loss": 4.0921, "step": 56100 }, { "epoch": 2.417194297282164, "learning_rate": 8.644443965610118e-07, "loss": 4.0919, "step": 56120 }, { "epoch": 2.4180557350217513, "learning_rate": 8.64395914585533e-07, "loss": 3.8716, "step": 56140 }, { "epoch": 2.4189171727613386, "learning_rate": 8.64347432610054e-07, "loss": 4.2843, "step": 56160 }, { "epoch": 2.4197786105009262, "learning_rate": 8.642989506345751e-07, "loss": 4.3531, "step": 56180 }, { "epoch": 2.4206400482405135, "learning_rate": 8.642504686590963e-07, "loss": 3.9195, "step": 56200 }, { "epoch": 2.4215014859801007, "learning_rate": 8.642019866836174e-07, "loss": 4.4277, "step": 56220 }, { "epoch": 2.422362923719688, "learning_rate": 8.641535047081385e-07, "loss": 4.2536, "step": 56240 }, { "epoch": 2.4232243614592757, "learning_rate": 8.641050227326595e-07, "loss": 4.2423, "step": 56260 }, { "epoch": 2.424085799198863, "learning_rate": 8.640565407571807e-07, "loss": 4.4274, "step": 56280 }, { "epoch": 2.42494723693845, "learning_rate": 8.640080587817018e-07, "loss": 4.3172, "step": 56300 }, { "epoch": 2.425808674678038, "learning_rate": 8.639595768062229e-07, "loss": 4.1643, "step": 56320 }, { "epoch": 2.426670112417625, "learning_rate": 8.63911094830744e-07, "loss": 4.1716, "step": 56340 }, { "epoch": 2.4275315501572123, "learning_rate": 8.638626128552652e-07, "loss": 4.2735, "step": 56360 }, { "epoch": 2.4283929878967996, "learning_rate": 8.638141308797861e-07, "loss": 4.2517, "step": 56380 }, { "epoch": 2.4292544256363873, "learning_rate": 8.637656489043073e-07, "loss": 4.2051, "step": 56400 }, { "epoch": 2.4301158633759745, "learning_rate": 8.637171669288284e-07, "loss": 4.1685, "step": 56420 }, { "epoch": 2.4309773011155618, "learning_rate": 8.636686849533496e-07, "loss": 4.0869, "step": 56440 }, { "epoch": 2.4318387388551495, "learning_rate": 8.636202029778706e-07, "loss": 4.0233, "step": 56460 }, { "epoch": 2.4327001765947367, "learning_rate": 8.635717210023918e-07, "loss": 4.2422, "step": 56480 }, { "epoch": 2.433561614334324, "learning_rate": 8.635232390269128e-07, "loss": 4.2122, "step": 56500 }, { "epoch": 2.434423052073911, "learning_rate": 8.634747570514339e-07, "loss": 4.0499, "step": 56520 }, { "epoch": 2.435284489813499, "learning_rate": 8.63426275075955e-07, "loss": 4.3853, "step": 56540 }, { "epoch": 2.436145927553086, "learning_rate": 8.633777931004762e-07, "loss": 4.4201, "step": 56560 }, { "epoch": 2.4370073652926734, "learning_rate": 8.633293111249973e-07, "loss": 4.1895, "step": 56580 }, { "epoch": 2.437868803032261, "learning_rate": 8.632808291495185e-07, "loss": 4.3688, "step": 56600 }, { "epoch": 2.4387302407718483, "learning_rate": 8.632323471740395e-07, "loss": 4.3445, "step": 56620 }, { "epoch": 2.4395916785114355, "learning_rate": 8.631838651985605e-07, "loss": 4.0405, "step": 56640 }, { "epoch": 2.440453116251023, "learning_rate": 8.631353832230817e-07, "loss": 4.1405, "step": 56660 }, { "epoch": 2.4413145539906105, "learning_rate": 8.630869012476028e-07, "loss": 4.1484, "step": 56680 }, { "epoch": 2.4421759917301977, "learning_rate": 8.630384192721239e-07, "loss": 4.2479, "step": 56700 }, { "epoch": 2.443037429469785, "learning_rate": 8.62989937296645e-07, "loss": 4.1576, "step": 56720 }, { "epoch": 2.4438988672093727, "learning_rate": 8.629414553211662e-07, "loss": 4.3753, "step": 56740 }, { "epoch": 2.44476030494896, "learning_rate": 8.628929733456872e-07, "loss": 4.4392, "step": 56760 }, { "epoch": 2.445621742688547, "learning_rate": 8.628444913702083e-07, "loss": 4.1372, "step": 56780 }, { "epoch": 2.4464831804281344, "learning_rate": 8.627960093947294e-07, "loss": 4.1564, "step": 56800 }, { "epoch": 2.447344618167722, "learning_rate": 8.627475274192505e-07, "loss": 4.1573, "step": 56820 }, { "epoch": 2.4482060559073093, "learning_rate": 8.626990454437717e-07, "loss": 4.0543, "step": 56840 }, { "epoch": 2.4490674936468966, "learning_rate": 8.626505634682928e-07, "loss": 4.077, "step": 56860 }, { "epoch": 2.4499289313864843, "learning_rate": 8.626020814928139e-07, "loss": 4.2958, "step": 56880 }, { "epoch": 2.4507903691260715, "learning_rate": 8.625535995173349e-07, "loss": 3.9361, "step": 56900 }, { "epoch": 2.4516518068656588, "learning_rate": 8.625051175418561e-07, "loss": 4.3521, "step": 56920 }, { "epoch": 2.452513244605246, "learning_rate": 8.624566355663772e-07, "loss": 4.432, "step": 56940 }, { "epoch": 2.4533746823448337, "learning_rate": 8.624081535908983e-07, "loss": 3.9227, "step": 56960 }, { "epoch": 2.454236120084421, "learning_rate": 8.623596716154194e-07, "loss": 4.1412, "step": 56980 }, { "epoch": 2.455097557824008, "learning_rate": 8.623111896399405e-07, "loss": 4.073, "step": 57000 }, { "epoch": 2.455958995563596, "learning_rate": 8.622627076644616e-07, "loss": 4.2698, "step": 57020 }, { "epoch": 2.456820433303183, "learning_rate": 8.622142256889827e-07, "loss": 4.2295, "step": 57040 }, { "epoch": 2.4576818710427704, "learning_rate": 8.621657437135038e-07, "loss": 4.3758, "step": 57060 }, { "epoch": 2.4585433087823576, "learning_rate": 8.621172617380249e-07, "loss": 4.4118, "step": 57080 }, { "epoch": 2.4594047465219453, "learning_rate": 8.620687797625461e-07, "loss": 4.3054, "step": 57100 }, { "epoch": 2.4602661842615325, "learning_rate": 8.620202977870671e-07, "loss": 4.1352, "step": 57120 }, { "epoch": 2.46112762200112, "learning_rate": 8.619718158115882e-07, "loss": 4.2934, "step": 57140 }, { "epoch": 2.4619890597407075, "learning_rate": 8.619233338361093e-07, "loss": 4.1653, "step": 57160 }, { "epoch": 2.4628504974802947, "learning_rate": 8.618748518606304e-07, "loss": 4.2276, "step": 57180 }, { "epoch": 2.463711935219882, "learning_rate": 8.618263698851515e-07, "loss": 4.1756, "step": 57200 }, { "epoch": 2.464573372959469, "learning_rate": 8.617778879096727e-07, "loss": 4.2888, "step": 57220 }, { "epoch": 2.465434810699057, "learning_rate": 8.617294059341938e-07, "loss": 4.1995, "step": 57240 }, { "epoch": 2.466296248438644, "learning_rate": 8.616809239587149e-07, "loss": 4.2819, "step": 57260 }, { "epoch": 2.4671576861782314, "learning_rate": 8.616324419832359e-07, "loss": 4.3963, "step": 57280 }, { "epoch": 2.468019123917819, "learning_rate": 8.615839600077571e-07, "loss": 4.3, "step": 57300 }, { "epoch": 2.4688805616574063, "learning_rate": 8.615354780322782e-07, "loss": 4.0509, "step": 57320 }, { "epoch": 2.4697419993969936, "learning_rate": 8.614869960567991e-07, "loss": 4.3817, "step": 57340 }, { "epoch": 2.470603437136581, "learning_rate": 8.614385140813204e-07, "loss": 4.2557, "step": 57360 }, { "epoch": 2.4714648748761685, "learning_rate": 8.613900321058416e-07, "loss": 4.3308, "step": 57380 }, { "epoch": 2.4723263126157558, "learning_rate": 8.613415501303626e-07, "loss": 4.3975, "step": 57400 }, { "epoch": 2.473187750355343, "learning_rate": 8.612930681548836e-07, "loss": 4.1042, "step": 57420 }, { "epoch": 2.4740491880949307, "learning_rate": 8.612445861794049e-07, "loss": 4.2089, "step": 57440 }, { "epoch": 2.474910625834518, "learning_rate": 8.611961042039259e-07, "loss": 4.1088, "step": 57460 }, { "epoch": 2.475772063574105, "learning_rate": 8.611476222284471e-07, "loss": 4.3419, "step": 57480 }, { "epoch": 2.4766335013136924, "learning_rate": 8.610991402529681e-07, "loss": 4.2666, "step": 57500 }, { "epoch": 2.47749493905328, "learning_rate": 8.610506582774892e-07, "loss": 4.252, "step": 57520 }, { "epoch": 2.4783563767928674, "learning_rate": 8.610021763020103e-07, "loss": 4.0077, "step": 57540 }, { "epoch": 2.4792178145324546, "learning_rate": 8.609536943265315e-07, "loss": 3.9771, "step": 57560 }, { "epoch": 2.480079252272042, "learning_rate": 8.609052123510525e-07, "loss": 4.2792, "step": 57580 }, { "epoch": 2.4809406900116295, "learning_rate": 8.608567303755737e-07, "loss": 4.3579, "step": 57600 }, { "epoch": 2.481802127751217, "learning_rate": 8.608082484000948e-07, "loss": 4.29, "step": 57620 }, { "epoch": 2.482663565490804, "learning_rate": 8.60759766424616e-07, "loss": 4.3093, "step": 57640 }, { "epoch": 2.4835250032303917, "learning_rate": 8.607112844491369e-07, "loss": 4.3589, "step": 57660 }, { "epoch": 2.484386440969979, "learning_rate": 8.606628024736581e-07, "loss": 4.4272, "step": 57680 }, { "epoch": 2.485247878709566, "learning_rate": 8.606143204981792e-07, "loss": 4.0898, "step": 57700 }, { "epoch": 2.4861093164491535, "learning_rate": 8.605658385227002e-07, "loss": 4.2245, "step": 57720 }, { "epoch": 2.486970754188741, "learning_rate": 8.605173565472214e-07, "loss": 4.4094, "step": 57740 }, { "epoch": 2.4878321919283284, "learning_rate": 8.604688745717426e-07, "loss": 4.1925, "step": 57760 }, { "epoch": 2.4886936296679156, "learning_rate": 8.604203925962636e-07, "loss": 4.2971, "step": 57780 }, { "epoch": 2.489555067407503, "learning_rate": 8.603719106207846e-07, "loss": 4.3134, "step": 57800 }, { "epoch": 2.4904165051470906, "learning_rate": 8.603234286453058e-07, "loss": 4.0966, "step": 57820 }, { "epoch": 2.491277942886678, "learning_rate": 8.60274946669827e-07, "loss": 4.1384, "step": 57840 }, { "epoch": 2.492139380626265, "learning_rate": 8.60226464694348e-07, "loss": 4.3345, "step": 57860 }, { "epoch": 2.4930008183658527, "learning_rate": 8.601779827188691e-07, "loss": 4.0805, "step": 57880 }, { "epoch": 2.49386225610544, "learning_rate": 8.601295007433902e-07, "loss": 4.3075, "step": 57900 }, { "epoch": 2.4947236938450272, "learning_rate": 8.600810187679114e-07, "loss": 3.9731, "step": 57920 }, { "epoch": 2.4955851315846145, "learning_rate": 8.600325367924325e-07, "loss": 4.2603, "step": 57940 }, { "epoch": 2.496446569324202, "learning_rate": 8.599840548169535e-07, "loss": 4.26, "step": 57960 }, { "epoch": 2.4973080070637894, "learning_rate": 8.599355728414747e-07, "loss": 3.9662, "step": 57980 }, { "epoch": 2.4981694448033767, "learning_rate": 8.598870908659958e-07, "loss": 4.4137, "step": 58000 }, { "epoch": 2.4990308825429643, "learning_rate": 8.59838608890517e-07, "loss": 4.3187, "step": 58020 }, { "epoch": 2.4998923202825516, "learning_rate": 8.597901269150379e-07, "loss": 4.3463, "step": 58040 }, { "epoch": 2.500753758022139, "learning_rate": 8.597416449395591e-07, "loss": 4.1218, "step": 58060 }, { "epoch": 2.501615195761726, "learning_rate": 8.596931629640802e-07, "loss": 4.2526, "step": 58080 }, { "epoch": 2.5024766335013138, "learning_rate": 8.596446809886013e-07, "loss": 4.4045, "step": 58100 }, { "epoch": 2.503338071240901, "learning_rate": 8.595961990131224e-07, "loss": 4.1303, "step": 58120 }, { "epoch": 2.5041995089804887, "learning_rate": 8.595477170376436e-07, "loss": 4.0786, "step": 58140 }, { "epoch": 2.505060946720076, "learning_rate": 8.594992350621646e-07, "loss": 4.2704, "step": 58160 }, { "epoch": 2.505922384459663, "learning_rate": 8.594507530866857e-07, "loss": 4.2644, "step": 58180 }, { "epoch": 2.5067838221992504, "learning_rate": 8.594022711112068e-07, "loss": 4.4116, "step": 58200 }, { "epoch": 2.5076452599388377, "learning_rate": 8.59353789135728e-07, "loss": 4.0617, "step": 58220 }, { "epoch": 2.5085066976784254, "learning_rate": 8.593053071602491e-07, "loss": 4.3488, "step": 58240 }, { "epoch": 2.5093681354180126, "learning_rate": 8.592568251847701e-07, "loss": 4.213, "step": 58260 }, { "epoch": 2.5102295731576, "learning_rate": 8.592083432092912e-07, "loss": 4.5169, "step": 58280 }, { "epoch": 2.5110910108971876, "learning_rate": 8.591598612338124e-07, "loss": 4.0503, "step": 58300 }, { "epoch": 2.511952448636775, "learning_rate": 8.591113792583334e-07, "loss": 3.8825, "step": 58320 }, { "epoch": 2.512813886376362, "learning_rate": 8.590628972828545e-07, "loss": 4.281, "step": 58340 }, { "epoch": 2.5136753241159493, "learning_rate": 8.590144153073757e-07, "loss": 4.2192, "step": 58360 }, { "epoch": 2.514536761855537, "learning_rate": 8.58965933331897e-07, "loss": 4.2605, "step": 58380 }, { "epoch": 2.5153981995951242, "learning_rate": 8.58917451356418e-07, "loss": 4.209, "step": 58400 }, { "epoch": 2.5162596373347115, "learning_rate": 8.588689693809389e-07, "loss": 4.2857, "step": 58420 }, { "epoch": 2.517121075074299, "learning_rate": 8.588204874054601e-07, "loss": 4.1435, "step": 58440 }, { "epoch": 2.5179825128138864, "learning_rate": 8.587720054299813e-07, "loss": 4.3966, "step": 58460 }, { "epoch": 2.5188439505534737, "learning_rate": 8.587235234545023e-07, "loss": 4.1411, "step": 58480 }, { "epoch": 2.519705388293061, "learning_rate": 8.586750414790234e-07, "loss": 4.2864, "step": 58500 }, { "epoch": 2.5205668260326486, "learning_rate": 8.586265595035446e-07, "loss": 4.035, "step": 58520 }, { "epoch": 2.521428263772236, "learning_rate": 8.585780775280656e-07, "loss": 4.0988, "step": 58540 }, { "epoch": 2.522289701511823, "learning_rate": 8.585295955525867e-07, "loss": 4.2534, "step": 58560 }, { "epoch": 2.5231511392514108, "learning_rate": 8.584811135771078e-07, "loss": 4.1708, "step": 58580 }, { "epoch": 2.524012576990998, "learning_rate": 8.58432631601629e-07, "loss": 4.3406, "step": 58600 }, { "epoch": 2.5248740147305853, "learning_rate": 8.5838414962615e-07, "loss": 4.4411, "step": 58620 }, { "epoch": 2.5257354524701725, "learning_rate": 8.583356676506712e-07, "loss": 4.1217, "step": 58640 }, { "epoch": 2.52659689020976, "learning_rate": 8.582871856751923e-07, "loss": 4.4278, "step": 58660 }, { "epoch": 2.5274583279493474, "learning_rate": 8.582387036997134e-07, "loss": 4.3738, "step": 58680 }, { "epoch": 2.5283197656889347, "learning_rate": 8.581902217242345e-07, "loss": 4.1657, "step": 58700 }, { "epoch": 2.5291812034285224, "learning_rate": 8.581417397487556e-07, "loss": 4.0269, "step": 58720 }, { "epoch": 2.5300426411681096, "learning_rate": 8.580932577732767e-07, "loss": 4.2034, "step": 58740 }, { "epoch": 2.530904078907697, "learning_rate": 8.580447757977979e-07, "loss": 4.26, "step": 58760 }, { "epoch": 2.531765516647284, "learning_rate": 8.579962938223189e-07, "loss": 4.0627, "step": 58780 }, { "epoch": 2.532626954386872, "learning_rate": 8.579478118468399e-07, "loss": 4.4826, "step": 58800 }, { "epoch": 2.533488392126459, "learning_rate": 8.578993298713611e-07, "loss": 4.2246, "step": 58820 }, { "epoch": 2.5343498298660463, "learning_rate": 8.578508478958823e-07, "loss": 4.0814, "step": 58840 }, { "epoch": 2.535211267605634, "learning_rate": 8.578023659204033e-07, "loss": 4.4911, "step": 58860 }, { "epoch": 2.5360727053452212, "learning_rate": 8.577538839449244e-07, "loss": 4.3088, "step": 58880 }, { "epoch": 2.5369341430848085, "learning_rate": 8.577054019694456e-07, "loss": 4.1783, "step": 58900 }, { "epoch": 2.5377955808243957, "learning_rate": 8.576569199939667e-07, "loss": 4.0773, "step": 58920 }, { "epoch": 2.5386570185639834, "learning_rate": 8.576084380184877e-07, "loss": 4.2445, "step": 58940 }, { "epoch": 2.5395184563035706, "learning_rate": 8.575599560430088e-07, "loss": 4.4122, "step": 58960 }, { "epoch": 2.540379894043158, "learning_rate": 8.5751147406753e-07, "loss": 4.0924, "step": 58980 }, { "epoch": 2.5412413317827456, "learning_rate": 8.574629920920512e-07, "loss": 4.169, "step": 59000 }, { "epoch": 2.542102769522333, "learning_rate": 8.574145101165722e-07, "loss": 4.268, "step": 59020 }, { "epoch": 2.54296420726192, "learning_rate": 8.573660281410933e-07, "loss": 4.3032, "step": 59040 }, { "epoch": 2.5438256450015073, "learning_rate": 8.573175461656144e-07, "loss": 4.2648, "step": 59060 }, { "epoch": 2.544687082741095, "learning_rate": 8.572690641901354e-07, "loss": 4.4249, "step": 59080 }, { "epoch": 2.5455485204806823, "learning_rate": 8.572205822146566e-07, "loss": 4.3171, "step": 59100 }, { "epoch": 2.5464099582202695, "learning_rate": 8.571721002391776e-07, "loss": 4.2584, "step": 59120 }, { "epoch": 2.547271395959857, "learning_rate": 8.571236182636989e-07, "loss": 4.3728, "step": 59140 }, { "epoch": 2.5481328336994444, "learning_rate": 8.570751362882199e-07, "loss": 4.3094, "step": 59160 }, { "epoch": 2.5489942714390317, "learning_rate": 8.57026654312741e-07, "loss": 3.9504, "step": 59180 }, { "epoch": 2.549855709178619, "learning_rate": 8.569781723372621e-07, "loss": 4.1956, "step": 59200 }, { "epoch": 2.5507171469182066, "learning_rate": 8.569296903617834e-07, "loss": 4.2037, "step": 59220 }, { "epoch": 2.551578584657794, "learning_rate": 8.568812083863043e-07, "loss": 4.1132, "step": 59240 }, { "epoch": 2.552440022397381, "learning_rate": 8.568327264108255e-07, "loss": 4.3493, "step": 59260 }, { "epoch": 2.553301460136969, "learning_rate": 8.567842444353466e-07, "loss": 4.361, "step": 59280 }, { "epoch": 2.554162897876556, "learning_rate": 8.567357624598676e-07, "loss": 4.1195, "step": 59300 }, { "epoch": 2.5550243356161433, "learning_rate": 8.566872804843887e-07, "loss": 4.1201, "step": 59320 }, { "epoch": 2.5558857733557305, "learning_rate": 8.566387985089098e-07, "loss": 4.3441, "step": 59340 }, { "epoch": 2.556747211095318, "learning_rate": 8.56590316533431e-07, "loss": 4.1668, "step": 59360 }, { "epoch": 2.5576086488349055, "learning_rate": 8.565418345579521e-07, "loss": 4.391, "step": 59380 }, { "epoch": 2.5584700865744927, "learning_rate": 8.564933525824732e-07, "loss": 4.1836, "step": 59400 }, { "epoch": 2.5593315243140804, "learning_rate": 8.564448706069943e-07, "loss": 4.0879, "step": 59420 }, { "epoch": 2.5601929620536676, "learning_rate": 8.563963886315154e-07, "loss": 4.187, "step": 59440 }, { "epoch": 2.561054399793255, "learning_rate": 8.563479066560365e-07, "loss": 4.1479, "step": 59460 }, { "epoch": 2.561915837532842, "learning_rate": 8.562994246805576e-07, "loss": 4.1809, "step": 59480 }, { "epoch": 2.56277727527243, "learning_rate": 8.562509427050787e-07, "loss": 4.191, "step": 59500 }, { "epoch": 2.563638713012017, "learning_rate": 8.562024607295999e-07, "loss": 4.4552, "step": 59520 }, { "epoch": 2.5645001507516043, "learning_rate": 8.56153978754121e-07, "loss": 4.3285, "step": 59540 }, { "epoch": 2.565361588491192, "learning_rate": 8.56105496778642e-07, "loss": 4.2826, "step": 59560 }, { "epoch": 2.5662230262307792, "learning_rate": 8.560570148031631e-07, "loss": 4.0214, "step": 59580 }, { "epoch": 2.5670844639703665, "learning_rate": 8.560085328276842e-07, "loss": 4.0768, "step": 59600 }, { "epoch": 2.5679459017099537, "learning_rate": 8.559600508522053e-07, "loss": 4.1235, "step": 59620 }, { "epoch": 2.5688073394495414, "learning_rate": 8.559115688767264e-07, "loss": 4.2986, "step": 59640 }, { "epoch": 2.5696687771891287, "learning_rate": 8.558630869012476e-07, "loss": 3.9553, "step": 59660 }, { "epoch": 2.570530214928716, "learning_rate": 8.558146049257686e-07, "loss": 4.144, "step": 59680 }, { "epoch": 2.5713916526683036, "learning_rate": 8.557661229502897e-07, "loss": 4.1962, "step": 59700 }, { "epoch": 2.572253090407891, "learning_rate": 8.557176409748109e-07, "loss": 4.3642, "step": 59720 }, { "epoch": 2.573114528147478, "learning_rate": 8.55669158999332e-07, "loss": 4.0517, "step": 59740 }, { "epoch": 2.5739759658870653, "learning_rate": 8.556206770238531e-07, "loss": 4.0824, "step": 59760 }, { "epoch": 2.574837403626653, "learning_rate": 8.555721950483742e-07, "loss": 4.178, "step": 59780 }, { "epoch": 2.5756988413662403, "learning_rate": 8.555237130728954e-07, "loss": 4.347, "step": 59800 }, { "epoch": 2.5765602791058275, "learning_rate": 8.554752310974164e-07, "loss": 4.3382, "step": 59820 }, { "epoch": 2.577421716845415, "learning_rate": 8.554267491219375e-07, "loss": 4.1127, "step": 59840 }, { "epoch": 2.5782831545850025, "learning_rate": 8.553782671464586e-07, "loss": 4.0289, "step": 59860 }, { "epoch": 2.5791445923245897, "learning_rate": 8.553297851709797e-07, "loss": 4.1871, "step": 59880 }, { "epoch": 2.580006030064177, "learning_rate": 8.552813031955009e-07, "loss": 4.2729, "step": 59900 }, { "epoch": 2.5808674678037646, "learning_rate": 8.55232821220022e-07, "loss": 4.1083, "step": 59920 }, { "epoch": 2.581728905543352, "learning_rate": 8.55184339244543e-07, "loss": 4.0924, "step": 59940 }, { "epoch": 2.582590343282939, "learning_rate": 8.55135857269064e-07, "loss": 4.134, "step": 59960 }, { "epoch": 2.583451781022527, "learning_rate": 8.550873752935852e-07, "loss": 4.1005, "step": 59980 }, { "epoch": 2.584313218762114, "learning_rate": 8.550388933181064e-07, "loss": 4.0589, "step": 60000 }, { "epoch": 2.5851746565017013, "learning_rate": 8.549904113426275e-07, "loss": 4.3462, "step": 60020 }, { "epoch": 2.5860360942412886, "learning_rate": 8.549419293671486e-07, "loss": 4.1581, "step": 60040 }, { "epoch": 2.5868975319808762, "learning_rate": 8.548934473916696e-07, "loss": 4.3692, "step": 60060 }, { "epoch": 2.5877589697204635, "learning_rate": 8.548449654161908e-07, "loss": 4.0618, "step": 60080 }, { "epoch": 2.5886204074600507, "learning_rate": 8.547964834407119e-07, "loss": 4.3576, "step": 60100 }, { "epoch": 2.5894818451996384, "learning_rate": 8.54748001465233e-07, "loss": 4.3794, "step": 60120 }, { "epoch": 2.5903432829392257, "learning_rate": 8.546995194897541e-07, "loss": 4.1958, "step": 60140 }, { "epoch": 2.591204720678813, "learning_rate": 8.546510375142752e-07, "loss": 4.2342, "step": 60160 }, { "epoch": 2.5920661584184, "learning_rate": 8.546025555387964e-07, "loss": 4.2702, "step": 60180 }, { "epoch": 2.592927596157988, "learning_rate": 8.545540735633173e-07, "loss": 4.0517, "step": 60200 }, { "epoch": 2.593789033897575, "learning_rate": 8.545055915878385e-07, "loss": 4.1766, "step": 60220 }, { "epoch": 2.5946504716371623, "learning_rate": 8.544571096123596e-07, "loss": 4.0639, "step": 60240 }, { "epoch": 2.59551190937675, "learning_rate": 8.544086276368808e-07, "loss": 4.3518, "step": 60260 }, { "epoch": 2.5963733471163373, "learning_rate": 8.543601456614018e-07, "loss": 4.4055, "step": 60280 }, { "epoch": 2.5972347848559245, "learning_rate": 8.54311663685923e-07, "loss": 4.3399, "step": 60300 }, { "epoch": 2.5980962225955118, "learning_rate": 8.54263181710444e-07, "loss": 4.1884, "step": 60320 }, { "epoch": 2.598957660335099, "learning_rate": 8.542146997349652e-07, "loss": 4.3731, "step": 60340 }, { "epoch": 2.5998190980746867, "learning_rate": 8.541662177594862e-07, "loss": 4.5374, "step": 60360 }, { "epoch": 2.600680535814274, "learning_rate": 8.541177357840074e-07, "loss": 4.1441, "step": 60380 }, { "epoch": 2.6015419735538616, "learning_rate": 8.540692538085285e-07, "loss": 4.2451, "step": 60400 }, { "epoch": 2.602403411293449, "learning_rate": 8.540207718330496e-07, "loss": 4.172, "step": 60420 }, { "epoch": 2.603264849033036, "learning_rate": 8.539722898575707e-07, "loss": 4.1974, "step": 60440 }, { "epoch": 2.6041262867726234, "learning_rate": 8.539238078820918e-07, "loss": 4.2203, "step": 60460 }, { "epoch": 2.6049877245122106, "learning_rate": 8.53875325906613e-07, "loss": 4.241, "step": 60480 }, { "epoch": 2.6058491622517983, "learning_rate": 8.53826843931134e-07, "loss": 4.2997, "step": 60500 }, { "epoch": 2.6067105999913855, "learning_rate": 8.537783619556551e-07, "loss": 4.0746, "step": 60520 }, { "epoch": 2.6075720377309732, "learning_rate": 8.537298799801763e-07, "loss": 4.1195, "step": 60540 }, { "epoch": 2.6084334754705605, "learning_rate": 8.536813980046974e-07, "loss": 4.2102, "step": 60560 }, { "epoch": 2.6092949132101477, "learning_rate": 8.536329160292183e-07, "loss": 4.1293, "step": 60580 }, { "epoch": 2.610156350949735, "learning_rate": 8.535844340537395e-07, "loss": 4.1909, "step": 60600 }, { "epoch": 2.611017788689322, "learning_rate": 8.535359520782607e-07, "loss": 4.0423, "step": 60620 }, { "epoch": 2.61187922642891, "learning_rate": 8.534874701027818e-07, "loss": 4.2171, "step": 60640 }, { "epoch": 2.612740664168497, "learning_rate": 8.534389881273028e-07, "loss": 3.9872, "step": 60660 }, { "epoch": 2.613602101908085, "learning_rate": 8.53390506151824e-07, "loss": 4.1679, "step": 60680 }, { "epoch": 2.614463539647672, "learning_rate": 8.53342024176345e-07, "loss": 4.2011, "step": 60700 }, { "epoch": 2.6153249773872593, "learning_rate": 8.532935422008662e-07, "loss": 4.1579, "step": 60720 }, { "epoch": 2.6161864151268466, "learning_rate": 8.532450602253872e-07, "loss": 4.3445, "step": 60740 }, { "epoch": 2.617047852866434, "learning_rate": 8.531965782499084e-07, "loss": 4.2757, "step": 60760 }, { "epoch": 2.6179092906060215, "learning_rate": 8.531480962744295e-07, "loss": 4.1861, "step": 60780 }, { "epoch": 2.6187707283456088, "learning_rate": 8.530996142989507e-07, "loss": 4.3216, "step": 60800 }, { "epoch": 2.6196321660851964, "learning_rate": 8.530511323234717e-07, "loss": 4.3104, "step": 60820 }, { "epoch": 2.6204936038247837, "learning_rate": 8.530026503479928e-07, "loss": 4.31, "step": 60840 }, { "epoch": 2.621355041564371, "learning_rate": 8.529541683725139e-07, "loss": 4.2184, "step": 60860 }, { "epoch": 2.622216479303958, "learning_rate": 8.52905686397035e-07, "loss": 4.0942, "step": 60880 }, { "epoch": 2.6230779170435454, "learning_rate": 8.52857204421556e-07, "loss": 4.3643, "step": 60900 }, { "epoch": 2.623939354783133, "learning_rate": 8.528087224460773e-07, "loss": 3.9566, "step": 60920 }, { "epoch": 2.6248007925227204, "learning_rate": 8.527602404705984e-07, "loss": 4.2772, "step": 60940 }, { "epoch": 2.625662230262308, "learning_rate": 8.527117584951193e-07, "loss": 4.1774, "step": 60960 }, { "epoch": 2.6265236680018953, "learning_rate": 8.526632765196405e-07, "loss": 4.1943, "step": 60980 }, { "epoch": 2.6273851057414825, "learning_rate": 8.526147945441618e-07, "loss": 4.1812, "step": 61000 }, { "epoch": 2.62824654348107, "learning_rate": 8.525663125686828e-07, "loss": 4.3691, "step": 61020 }, { "epoch": 2.629107981220657, "learning_rate": 8.525178305932038e-07, "loss": 4.3042, "step": 61040 }, { "epoch": 2.6299694189602447, "learning_rate": 8.52469348617725e-07, "loss": 4.2198, "step": 61060 }, { "epoch": 2.630830856699832, "learning_rate": 8.524208666422461e-07, "loss": 4.1068, "step": 61080 }, { "epoch": 2.6316922944394197, "learning_rate": 8.523723846667672e-07, "loss": 4.1193, "step": 61100 }, { "epoch": 2.632553732179007, "learning_rate": 8.523239026912882e-07, "loss": 4.4358, "step": 61120 }, { "epoch": 2.633415169918594, "learning_rate": 8.522754207158094e-07, "loss": 4.2313, "step": 61140 }, { "epoch": 2.6342766076581814, "learning_rate": 8.522269387403306e-07, "loss": 4.2971, "step": 61160 }, { "epoch": 2.6351380453977686, "learning_rate": 8.521784567648516e-07, "loss": 4.2572, "step": 61180 }, { "epoch": 2.6359994831373563, "learning_rate": 8.521299747893727e-07, "loss": 4.2868, "step": 61200 }, { "epoch": 2.6368609208769436, "learning_rate": 8.520814928138938e-07, "loss": 4.1524, "step": 61220 }, { "epoch": 2.6377223586165313, "learning_rate": 8.520330108384149e-07, "loss": 4.267, "step": 61240 }, { "epoch": 2.6385837963561185, "learning_rate": 8.51984528862936e-07, "loss": 4.1479, "step": 61260 }, { "epoch": 2.6394452340957058, "learning_rate": 8.519360468874571e-07, "loss": 4.1027, "step": 61280 }, { "epoch": 2.640306671835293, "learning_rate": 8.518875649119783e-07, "loss": 4.0376, "step": 61300 }, { "epoch": 2.6411681095748802, "learning_rate": 8.518390829364994e-07, "loss": 4.268, "step": 61320 }, { "epoch": 2.642029547314468, "learning_rate": 8.517906009610204e-07, "loss": 4.4516, "step": 61340 }, { "epoch": 2.642890985054055, "learning_rate": 8.517421189855415e-07, "loss": 4.144, "step": 61360 }, { "epoch": 2.643752422793643, "learning_rate": 8.516936370100627e-07, "loss": 4.213, "step": 61380 }, { "epoch": 2.64461386053323, "learning_rate": 8.516451550345838e-07, "loss": 4.2411, "step": 61400 }, { "epoch": 2.6454752982728174, "learning_rate": 8.515966730591048e-07, "loss": 4.125, "step": 61420 }, { "epoch": 2.6463367360124046, "learning_rate": 8.51548191083626e-07, "loss": 4.2793, "step": 61440 }, { "epoch": 2.647198173751992, "learning_rate": 8.514997091081471e-07, "loss": 4.1936, "step": 61460 }, { "epoch": 2.6480596114915795, "learning_rate": 8.514512271326681e-07, "loss": 4.3829, "step": 61480 }, { "epoch": 2.648921049231167, "learning_rate": 8.514027451571892e-07, "loss": 4.0414, "step": 61500 }, { "epoch": 2.649782486970754, "learning_rate": 8.513542631817104e-07, "loss": 4.3418, "step": 61520 }, { "epoch": 2.6506439247103417, "learning_rate": 8.513057812062316e-07, "loss": 4.2777, "step": 61540 }, { "epoch": 2.651505362449929, "learning_rate": 8.512572992307526e-07, "loss": 4.075, "step": 61560 }, { "epoch": 2.652366800189516, "learning_rate": 8.512088172552737e-07, "loss": 4.2111, "step": 61580 }, { "epoch": 2.6532282379291035, "learning_rate": 8.511603352797948e-07, "loss": 4.0313, "step": 61600 }, { "epoch": 2.654089675668691, "learning_rate": 8.51111853304316e-07, "loss": 4.1522, "step": 61620 }, { "epoch": 2.6549511134082784, "learning_rate": 8.51063371328837e-07, "loss": 4.1889, "step": 61640 }, { "epoch": 2.6558125511478656, "learning_rate": 8.510148893533581e-07, "loss": 4.0174, "step": 61660 }, { "epoch": 2.6566739888874533, "learning_rate": 8.509664073778793e-07, "loss": 4.1852, "step": 61680 }, { "epoch": 2.6575354266270406, "learning_rate": 8.509179254024005e-07, "loss": 4.2706, "step": 61700 }, { "epoch": 2.658396864366628, "learning_rate": 8.508694434269214e-07, "loss": 4.218, "step": 61720 }, { "epoch": 2.659258302106215, "learning_rate": 8.508209614514424e-07, "loss": 4.1876, "step": 61740 }, { "epoch": 2.6601197398458027, "learning_rate": 8.507724794759637e-07, "loss": 4.1555, "step": 61760 }, { "epoch": 2.66098117758539, "learning_rate": 8.507239975004847e-07, "loss": 3.943, "step": 61780 }, { "epoch": 2.6618426153249772, "learning_rate": 8.506755155250059e-07, "loss": 4.1855, "step": 61800 }, { "epoch": 2.662704053064565, "learning_rate": 8.50627033549527e-07, "loss": 4.0555, "step": 61820 }, { "epoch": 2.663565490804152, "learning_rate": 8.505785515740481e-07, "loss": 4.0709, "step": 61840 }, { "epoch": 2.6644269285437394, "learning_rate": 8.505300695985691e-07, "loss": 4.2282, "step": 61860 }, { "epoch": 2.6652883662833267, "learning_rate": 8.504815876230903e-07, "loss": 4.0509, "step": 61880 }, { "epoch": 2.6661498040229143, "learning_rate": 8.504331056476114e-07, "loss": 4.0596, "step": 61900 }, { "epoch": 2.6670112417625016, "learning_rate": 8.503846236721326e-07, "loss": 4.1527, "step": 61920 }, { "epoch": 2.667872679502089, "learning_rate": 8.503361416966536e-07, "loss": 4.3536, "step": 61940 }, { "epoch": 2.6687341172416765, "learning_rate": 8.502876597211748e-07, "loss": 4.0764, "step": 61960 }, { "epoch": 2.6695955549812638, "learning_rate": 8.502391777456958e-07, "loss": 4.1755, "step": 61980 }, { "epoch": 2.670456992720851, "learning_rate": 8.50190695770217e-07, "loss": 4.2831, "step": 62000 }, { "epoch": 2.6713184304604383, "learning_rate": 8.50142213794738e-07, "loss": 4.3237, "step": 62020 }, { "epoch": 2.672179868200026, "learning_rate": 8.500937318192591e-07, "loss": 4.186, "step": 62040 }, { "epoch": 2.673041305939613, "learning_rate": 8.500452498437803e-07, "loss": 4.3383, "step": 62060 }, { "epoch": 2.6739027436792004, "learning_rate": 8.499967678683015e-07, "loss": 3.7935, "step": 62080 }, { "epoch": 2.674764181418788, "learning_rate": 8.499482858928224e-07, "loss": 4.1606, "step": 62100 }, { "epoch": 2.6756256191583754, "learning_rate": 8.498998039173435e-07, "loss": 4.2875, "step": 62120 }, { "epoch": 2.6764870568979626, "learning_rate": 8.498513219418647e-07, "loss": 4.3184, "step": 62140 }, { "epoch": 2.67734849463755, "learning_rate": 8.498028399663858e-07, "loss": 4.2195, "step": 62160 }, { "epoch": 2.6782099323771376, "learning_rate": 8.497543579909069e-07, "loss": 4.3262, "step": 62180 }, { "epoch": 2.679071370116725, "learning_rate": 8.49705876015428e-07, "loss": 4.2036, "step": 62200 }, { "epoch": 2.679932807856312, "learning_rate": 8.496573940399492e-07, "loss": 4.1811, "step": 62220 }, { "epoch": 2.6807942455958997, "learning_rate": 8.496089120644702e-07, "loss": 4.2556, "step": 62240 }, { "epoch": 2.681655683335487, "learning_rate": 8.495604300889914e-07, "loss": 4.1667, "step": 62260 }, { "epoch": 2.6825171210750742, "learning_rate": 8.495119481135124e-07, "loss": 4.0655, "step": 62280 }, { "epoch": 2.6833785588146615, "learning_rate": 8.494634661380336e-07, "loss": 4.0451, "step": 62300 }, { "epoch": 2.684239996554249, "learning_rate": 8.494149841625546e-07, "loss": 4.3132, "step": 62320 }, { "epoch": 2.6851014342938364, "learning_rate": 8.493665021870758e-07, "loss": 4.3183, "step": 62340 }, { "epoch": 2.6859628720334237, "learning_rate": 8.493180202115968e-07, "loss": 4.0798, "step": 62360 }, { "epoch": 2.6868243097730113, "learning_rate": 8.49269538236118e-07, "loss": 4.362, "step": 62380 }, { "epoch": 2.6876857475125986, "learning_rate": 8.49221056260639e-07, "loss": 4.1945, "step": 62400 }, { "epoch": 2.688547185252186, "learning_rate": 8.491725742851602e-07, "loss": 4.2235, "step": 62420 }, { "epoch": 2.689408622991773, "learning_rate": 8.491240923096813e-07, "loss": 4.1094, "step": 62440 }, { "epoch": 2.6902700607313608, "learning_rate": 8.490756103342024e-07, "loss": 4.0557, "step": 62460 }, { "epoch": 2.691131498470948, "learning_rate": 8.490271283587234e-07, "loss": 4.2701, "step": 62480 }, { "epoch": 2.6919929362105353, "learning_rate": 8.489786463832446e-07, "loss": 4.1211, "step": 62500 }, { "epoch": 2.692854373950123, "learning_rate": 8.489301644077657e-07, "loss": 4.2872, "step": 62520 }, { "epoch": 2.69371581168971, "learning_rate": 8.488816824322868e-07, "loss": 4.1406, "step": 62540 }, { "epoch": 2.6945772494292974, "learning_rate": 8.488332004568079e-07, "loss": 4.2378, "step": 62560 }, { "epoch": 2.6954386871688847, "learning_rate": 8.48784718481329e-07, "loss": 4.2389, "step": 62580 }, { "epoch": 2.6963001249084724, "learning_rate": 8.487362365058502e-07, "loss": 4.1908, "step": 62600 }, { "epoch": 2.6971615626480596, "learning_rate": 8.486877545303712e-07, "loss": 4.269, "step": 62620 }, { "epoch": 2.698023000387647, "learning_rate": 8.486392725548923e-07, "loss": 4.0371, "step": 62640 }, { "epoch": 2.6988844381272346, "learning_rate": 8.485907905794134e-07, "loss": 4.2435, "step": 62660 }, { "epoch": 2.699745875866822, "learning_rate": 8.485423086039344e-07, "loss": 4.1919, "step": 62680 }, { "epoch": 2.700607313606409, "learning_rate": 8.484938266284557e-07, "loss": 4.0822, "step": 62700 }, { "epoch": 2.7014687513459963, "learning_rate": 8.484453446529768e-07, "loss": 4.2258, "step": 62720 }, { "epoch": 2.702330189085584, "learning_rate": 8.483968626774978e-07, "loss": 4.0884, "step": 62740 }, { "epoch": 2.7031916268251712, "learning_rate": 8.483483807020189e-07, "loss": 4.2268, "step": 62760 }, { "epoch": 2.7040530645647585, "learning_rate": 8.482998987265402e-07, "loss": 4.0974, "step": 62780 }, { "epoch": 2.704914502304346, "learning_rate": 8.482514167510612e-07, "loss": 4.2737, "step": 62800 }, { "epoch": 2.7057759400439334, "learning_rate": 8.482029347755823e-07, "loss": 4.3173, "step": 62820 }, { "epoch": 2.7066373777835206, "learning_rate": 8.481544528001034e-07, "loss": 4.0712, "step": 62840 }, { "epoch": 2.707498815523108, "learning_rate": 8.481059708246245e-07, "loss": 4.4412, "step": 62860 }, { "epoch": 2.7083602532626956, "learning_rate": 8.480574888491456e-07, "loss": 4.271, "step": 62880 }, { "epoch": 2.709221691002283, "learning_rate": 8.480090068736667e-07, "loss": 4.1806, "step": 62900 }, { "epoch": 2.71008312874187, "learning_rate": 8.479605248981878e-07, "loss": 4.0364, "step": 62920 }, { "epoch": 2.7109445664814578, "learning_rate": 8.479120429227089e-07, "loss": 4.1013, "step": 62940 }, { "epoch": 2.711806004221045, "learning_rate": 8.478635609472301e-07, "loss": 4.0226, "step": 62960 }, { "epoch": 2.7126674419606323, "learning_rate": 8.478150789717512e-07, "loss": 4.1229, "step": 62980 }, { "epoch": 2.7135288797002195, "learning_rate": 8.477665969962722e-07, "loss": 4.1475, "step": 63000 }, { "epoch": 2.714390317439807, "learning_rate": 8.477181150207933e-07, "loss": 4.1741, "step": 63020 }, { "epoch": 2.7152517551793944, "learning_rate": 8.476696330453145e-07, "loss": 4.3025, "step": 63040 }, { "epoch": 2.7161131929189817, "learning_rate": 8.476211510698355e-07, "loss": 4.2863, "step": 63060 }, { "epoch": 2.7169746306585694, "learning_rate": 8.475726690943567e-07, "loss": 4.1415, "step": 63080 }, { "epoch": 2.7178360683981566, "learning_rate": 8.475241871188778e-07, "loss": 4.1979, "step": 63100 }, { "epoch": 2.718697506137744, "learning_rate": 8.474757051433988e-07, "loss": 4.2951, "step": 63120 }, { "epoch": 2.719558943877331, "learning_rate": 8.474272231679199e-07, "loss": 4.0424, "step": 63140 }, { "epoch": 2.720420381616919, "learning_rate": 8.473787411924411e-07, "loss": 4.2505, "step": 63160 }, { "epoch": 2.721281819356506, "learning_rate": 8.473302592169622e-07, "loss": 4.4026, "step": 63180 }, { "epoch": 2.7221432570960933, "learning_rate": 8.472817772414833e-07, "loss": 4.0589, "step": 63200 }, { "epoch": 2.723004694835681, "learning_rate": 8.472332952660044e-07, "loss": 4.1424, "step": 63220 }, { "epoch": 2.723866132575268, "learning_rate": 8.471848132905255e-07, "loss": 4.256, "step": 63240 }, { "epoch": 2.7247275703148555, "learning_rate": 8.471363313150466e-07, "loss": 4.1204, "step": 63260 }, { "epoch": 2.7255890080544427, "learning_rate": 8.470878493395676e-07, "loss": 4.1596, "step": 63280 }, { "epoch": 2.7264504457940304, "learning_rate": 8.470393673640888e-07, "loss": 4.0859, "step": 63300 }, { "epoch": 2.7273118835336176, "learning_rate": 8.4699088538861e-07, "loss": 4.032, "step": 63320 }, { "epoch": 2.728173321273205, "learning_rate": 8.469424034131311e-07, "loss": 4.3392, "step": 63340 }, { "epoch": 2.7290347590127926, "learning_rate": 8.468939214376521e-07, "loss": 4.18, "step": 63360 }, { "epoch": 2.72989619675238, "learning_rate": 8.468454394621732e-07, "loss": 4.2426, "step": 63380 }, { "epoch": 2.730757634491967, "learning_rate": 8.467969574866943e-07, "loss": 4.1187, "step": 63400 }, { "epoch": 2.7316190722315543, "learning_rate": 8.467484755112155e-07, "loss": 4.1211, "step": 63420 }, { "epoch": 2.732480509971142, "learning_rate": 8.466999935357365e-07, "loss": 4.1815, "step": 63440 }, { "epoch": 2.7333419477107292, "learning_rate": 8.466515115602577e-07, "loss": 4.1454, "step": 63460 }, { "epoch": 2.7342033854503165, "learning_rate": 8.466030295847788e-07, "loss": 4.381, "step": 63480 }, { "epoch": 2.735064823189904, "learning_rate": 8.465545476092999e-07, "loss": 4.3725, "step": 63500 }, { "epoch": 2.7359262609294914, "learning_rate": 8.465060656338208e-07, "loss": 4.0545, "step": 63520 }, { "epoch": 2.7367876986690787, "learning_rate": 8.464575836583421e-07, "loss": 4.2509, "step": 63540 }, { "epoch": 2.737649136408666, "learning_rate": 8.464091016828632e-07, "loss": 4.0918, "step": 63560 }, { "epoch": 2.738510574148253, "learning_rate": 8.463606197073844e-07, "loss": 4.2106, "step": 63580 }, { "epoch": 2.739372011887841, "learning_rate": 8.463121377319054e-07, "loss": 4.145, "step": 63600 }, { "epoch": 2.740233449627428, "learning_rate": 8.462636557564265e-07, "loss": 4.2587, "step": 63620 }, { "epoch": 2.741094887367016, "learning_rate": 8.462151737809476e-07, "loss": 4.2361, "step": 63640 }, { "epoch": 2.741956325106603, "learning_rate": 8.461666918054686e-07, "loss": 4.3539, "step": 63660 }, { "epoch": 2.7428177628461903, "learning_rate": 8.461182098299898e-07, "loss": 4.2672, "step": 63680 }, { "epoch": 2.7436792005857775, "learning_rate": 8.46069727854511e-07, "loss": 4.306, "step": 63700 }, { "epoch": 2.7445406383253648, "learning_rate": 8.460212458790321e-07, "loss": 4.2876, "step": 63720 }, { "epoch": 2.7454020760649525, "learning_rate": 8.459727639035531e-07, "loss": 4.1529, "step": 63740 }, { "epoch": 2.7462635138045397, "learning_rate": 8.459242819280742e-07, "loss": 4.1421, "step": 63760 }, { "epoch": 2.7471249515441274, "learning_rate": 8.458757999525954e-07, "loss": 4.208, "step": 63780 }, { "epoch": 2.7479863892837146, "learning_rate": 8.458273179771165e-07, "loss": 4.2952, "step": 63800 }, { "epoch": 2.748847827023302, "learning_rate": 8.457788360016375e-07, "loss": 4.0587, "step": 63820 }, { "epoch": 2.749709264762889, "learning_rate": 8.457303540261587e-07, "loss": 4.1058, "step": 63840 }, { "epoch": 2.7505707025024764, "learning_rate": 8.456818720506799e-07, "loss": 4.1121, "step": 63860 }, { "epoch": 2.751432140242064, "learning_rate": 8.456333900752009e-07, "loss": 4.0427, "step": 63880 }, { "epoch": 2.7522935779816513, "learning_rate": 8.455849080997219e-07, "loss": 3.9421, "step": 63900 }, { "epoch": 2.753155015721239, "learning_rate": 8.455364261242431e-07, "loss": 4.2178, "step": 63920 }, { "epoch": 2.7540164534608262, "learning_rate": 8.454879441487641e-07, "loss": 4.3995, "step": 63940 }, { "epoch": 2.7548778912004135, "learning_rate": 8.454394621732854e-07, "loss": 3.9356, "step": 63960 }, { "epoch": 2.7557393289400007, "learning_rate": 8.453909801978064e-07, "loss": 4.1646, "step": 63980 }, { "epoch": 2.756600766679588, "learning_rate": 8.453424982223276e-07, "loss": 3.9561, "step": 64000 }, { "epoch": 2.7574622044191757, "learning_rate": 8.452940162468486e-07, "loss": 4.2556, "step": 64020 }, { "epoch": 2.758323642158763, "learning_rate": 8.452455342713698e-07, "loss": 4.1118, "step": 64040 }, { "epoch": 2.7591850798983506, "learning_rate": 8.451970522958908e-07, "loss": 4.0717, "step": 64060 }, { "epoch": 2.760046517637938, "learning_rate": 8.45148570320412e-07, "loss": 4.2172, "step": 64080 }, { "epoch": 2.760907955377525, "learning_rate": 8.451000883449331e-07, "loss": 4.1552, "step": 64100 }, { "epoch": 2.7617693931171123, "learning_rate": 8.450516063694542e-07, "loss": 4.2209, "step": 64120 }, { "epoch": 2.7626308308566996, "learning_rate": 8.450031243939752e-07, "loss": 4.1111, "step": 64140 }, { "epoch": 2.7634922685962873, "learning_rate": 8.449546424184964e-07, "loss": 4.0866, "step": 64160 }, { "epoch": 2.7643537063358745, "learning_rate": 8.449061604430175e-07, "loss": 4.1659, "step": 64180 }, { "epoch": 2.765215144075462, "learning_rate": 8.448576784675385e-07, "loss": 4.3124, "step": 64200 }, { "epoch": 2.7660765818150495, "learning_rate": 8.448091964920597e-07, "loss": 4.1531, "step": 64220 }, { "epoch": 2.7669380195546367, "learning_rate": 8.447607145165809e-07, "loss": 4.2296, "step": 64240 }, { "epoch": 2.767799457294224, "learning_rate": 8.447122325411018e-07, "loss": 4.1856, "step": 64260 }, { "epoch": 2.768660895033811, "learning_rate": 8.446637505656229e-07, "loss": 4.2284, "step": 64280 }, { "epoch": 2.769522332773399, "learning_rate": 8.446152685901441e-07, "loss": 4.2686, "step": 64300 }, { "epoch": 2.770383770512986, "learning_rate": 8.445667866146653e-07, "loss": 4.1992, "step": 64320 }, { "epoch": 2.771245208252574, "learning_rate": 8.445183046391863e-07, "loss": 4.2428, "step": 64340 }, { "epoch": 2.772106645992161, "learning_rate": 8.444698226637074e-07, "loss": 4.3385, "step": 64360 }, { "epoch": 2.7729680837317483, "learning_rate": 8.444213406882286e-07, "loss": 4.0539, "step": 64380 }, { "epoch": 2.7738295214713355, "learning_rate": 8.443728587127497e-07, "loss": 4.073, "step": 64400 }, { "epoch": 2.774690959210923, "learning_rate": 8.443243767372707e-07, "loss": 4.1707, "step": 64420 }, { "epoch": 2.7755523969505105, "learning_rate": 8.442758947617918e-07, "loss": 4.1407, "step": 64440 }, { "epoch": 2.7764138346900977, "learning_rate": 8.442274127863129e-07, "loss": 4.2477, "step": 64460 }, { "epoch": 2.7772752724296854, "learning_rate": 8.441789308108341e-07, "loss": 4.3696, "step": 64480 }, { "epoch": 2.7781367101692727, "learning_rate": 8.441304488353552e-07, "loss": 4.2808, "step": 64500 }, { "epoch": 2.77899814790886, "learning_rate": 8.440819668598762e-07, "loss": 4.248, "step": 64520 }, { "epoch": 2.779859585648447, "learning_rate": 8.440334848843974e-07, "loss": 4.3766, "step": 64540 }, { "epoch": 2.7807210233880344, "learning_rate": 8.439850029089184e-07, "loss": 4.157, "step": 64560 }, { "epoch": 2.781582461127622, "learning_rate": 8.439365209334396e-07, "loss": 4.2217, "step": 64580 }, { "epoch": 2.7824438988672093, "learning_rate": 8.438880389579607e-07, "loss": 4.1429, "step": 64600 }, { "epoch": 2.783305336606797, "learning_rate": 8.438395569824819e-07, "loss": 4.1362, "step": 64620 }, { "epoch": 2.7841667743463843, "learning_rate": 8.437910750070029e-07, "loss": 4.2166, "step": 64640 }, { "epoch": 2.7850282120859715, "learning_rate": 8.43742593031524e-07, "loss": 4.1578, "step": 64660 }, { "epoch": 2.7858896498255588, "learning_rate": 8.436941110560451e-07, "loss": 4.399, "step": 64680 }, { "epoch": 2.786751087565146, "learning_rate": 8.436456290805663e-07, "loss": 4.2696, "step": 64700 }, { "epoch": 2.7876125253047337, "learning_rate": 8.435971471050873e-07, "loss": 4.1585, "step": 64720 }, { "epoch": 2.788473963044321, "learning_rate": 8.435486651296084e-07, "loss": 4.026, "step": 64740 }, { "epoch": 2.789335400783908, "learning_rate": 8.435001831541296e-07, "loss": 4.2457, "step": 64760 }, { "epoch": 2.790196838523496, "learning_rate": 8.434517011786507e-07, "loss": 4.2927, "step": 64780 }, { "epoch": 2.791058276263083, "learning_rate": 8.434032192031717e-07, "loss": 4.2799, "step": 64800 }, { "epoch": 2.7919197140026704, "learning_rate": 8.433547372276928e-07, "loss": 4.1319, "step": 64820 }, { "epoch": 2.7927811517422576, "learning_rate": 8.43306255252214e-07, "loss": 4.1158, "step": 64840 }, { "epoch": 2.7936425894818453, "learning_rate": 8.432577732767352e-07, "loss": 4.2568, "step": 64860 }, { "epoch": 2.7945040272214325, "learning_rate": 8.432092913012562e-07, "loss": 4.2391, "step": 64880 }, { "epoch": 2.79536546496102, "learning_rate": 8.431608093257772e-07, "loss": 4.167, "step": 64900 }, { "epoch": 2.7962269027006075, "learning_rate": 8.431123273502984e-07, "loss": 4.2472, "step": 64920 }, { "epoch": 2.7970883404401947, "learning_rate": 8.430638453748195e-07, "loss": 4.1453, "step": 64940 }, { "epoch": 2.797949778179782, "learning_rate": 8.430153633993406e-07, "loss": 4.0949, "step": 64960 }, { "epoch": 2.798811215919369, "learning_rate": 8.429668814238617e-07, "loss": 4.1396, "step": 64980 }, { "epoch": 2.799672653658957, "learning_rate": 8.429183994483829e-07, "loss": 4.0352, "step": 65000 }, { "epoch": 2.800534091398544, "learning_rate": 8.428699174729039e-07, "loss": 4.0997, "step": 65020 }, { "epoch": 2.8013955291381314, "learning_rate": 8.42821435497425e-07, "loss": 4.2631, "step": 65040 }, { "epoch": 2.802256966877719, "learning_rate": 8.427729535219461e-07, "loss": 4.198, "step": 65060 }, { "epoch": 2.8031184046173063, "learning_rate": 8.427244715464673e-07, "loss": 4.3659, "step": 65080 }, { "epoch": 2.8039798423568936, "learning_rate": 8.426759895709883e-07, "loss": 3.993, "step": 65100 }, { "epoch": 2.804841280096481, "learning_rate": 8.426275075955095e-07, "loss": 4.1925, "step": 65120 }, { "epoch": 2.8057027178360685, "learning_rate": 8.425790256200306e-07, "loss": 4.4249, "step": 65140 }, { "epoch": 2.8065641555756558, "learning_rate": 8.425305436445517e-07, "loss": 4.1671, "step": 65160 }, { "epoch": 2.807425593315243, "learning_rate": 8.424820616690727e-07, "loss": 3.9986, "step": 65180 }, { "epoch": 2.8082870310548307, "learning_rate": 8.424335796935939e-07, "loss": 4.22, "step": 65200 }, { "epoch": 2.809148468794418, "learning_rate": 8.42385097718115e-07, "loss": 4.3503, "step": 65220 }, { "epoch": 2.810009906534005, "learning_rate": 8.423366157426362e-07, "loss": 4.1263, "step": 65240 }, { "epoch": 2.8108713442735924, "learning_rate": 8.422881337671572e-07, "loss": 4.4143, "step": 65260 }, { "epoch": 2.81173278201318, "learning_rate": 8.422396517916783e-07, "loss": 4.0707, "step": 65280 }, { "epoch": 2.8125942197527674, "learning_rate": 8.421911698161993e-07, "loss": 4.052, "step": 65300 }, { "epoch": 2.8134556574923546, "learning_rate": 8.421426878407205e-07, "loss": 4.1191, "step": 65320 }, { "epoch": 2.8143170952319423, "learning_rate": 8.420942058652416e-07, "loss": 4.0559, "step": 65340 }, { "epoch": 2.8151785329715295, "learning_rate": 8.420457238897627e-07, "loss": 4.0588, "step": 65360 }, { "epoch": 2.816039970711117, "learning_rate": 8.419972419142839e-07, "loss": 4.1563, "step": 65380 }, { "epoch": 2.816901408450704, "learning_rate": 8.419487599388049e-07, "loss": 4.1521, "step": 65400 }, { "epoch": 2.8177628461902917, "learning_rate": 8.41900277963326e-07, "loss": 4.202, "step": 65420 }, { "epoch": 2.818624283929879, "learning_rate": 8.418517959878471e-07, "loss": 4.1619, "step": 65440 }, { "epoch": 2.819485721669466, "learning_rate": 8.418033140123683e-07, "loss": 4.2922, "step": 65460 }, { "epoch": 2.820347159409054, "learning_rate": 8.417548320368894e-07, "loss": 4.2528, "step": 65480 }, { "epoch": 2.821208597148641, "learning_rate": 8.417063500614105e-07, "loss": 4.2084, "step": 65500 }, { "epoch": 2.8220700348882284, "learning_rate": 8.416578680859316e-07, "loss": 4.2309, "step": 65520 }, { "epoch": 2.8229314726278156, "learning_rate": 8.416093861104526e-07, "loss": 4.2342, "step": 65540 }, { "epoch": 2.8237929103674033, "learning_rate": 8.415609041349737e-07, "loss": 4.0922, "step": 65560 }, { "epoch": 2.8246543481069906, "learning_rate": 8.415124221594949e-07, "loss": 3.9988, "step": 65580 }, { "epoch": 2.825515785846578, "learning_rate": 8.41463940184016e-07, "loss": 4.2548, "step": 65600 }, { "epoch": 2.8263772235861655, "learning_rate": 8.414154582085371e-07, "loss": 4.024, "step": 65620 }, { "epoch": 2.8272386613257527, "learning_rate": 8.413669762330582e-07, "loss": 4.1746, "step": 65640 }, { "epoch": 2.82810009906534, "learning_rate": 8.413184942575793e-07, "loss": 4.1492, "step": 65660 }, { "epoch": 2.8289615368049272, "learning_rate": 8.412700122821004e-07, "loss": 4.3199, "step": 65680 }, { "epoch": 2.829822974544515, "learning_rate": 8.412215303066215e-07, "loss": 4.2033, "step": 65700 }, { "epoch": 2.830684412284102, "learning_rate": 8.411730483311425e-07, "loss": 4.2661, "step": 65720 }, { "epoch": 2.8315458500236894, "learning_rate": 8.411245663556638e-07, "loss": 4.1234, "step": 65740 }, { "epoch": 2.832407287763277, "learning_rate": 8.410760843801849e-07, "loss": 3.9953, "step": 65760 }, { "epoch": 2.8332687255028643, "learning_rate": 8.41027602404706e-07, "loss": 3.9306, "step": 65780 }, { "epoch": 2.8341301632424516, "learning_rate": 8.40979120429227e-07, "loss": 4.3457, "step": 65800 }, { "epoch": 2.834991600982039, "learning_rate": 8.409306384537481e-07, "loss": 4.3391, "step": 65820 }, { "epoch": 2.8358530387216265, "learning_rate": 8.408821564782692e-07, "loss": 4.1781, "step": 65840 }, { "epoch": 2.8367144764612138, "learning_rate": 8.408336745027904e-07, "loss": 4.1102, "step": 65860 }, { "epoch": 2.837575914200801, "learning_rate": 8.407851925273115e-07, "loss": 4.2409, "step": 65880 }, { "epoch": 2.8384373519403887, "learning_rate": 8.407367105518326e-07, "loss": 4.28, "step": 65900 }, { "epoch": 2.839298789679976, "learning_rate": 8.406882285763536e-07, "loss": 4.5097, "step": 65920 }, { "epoch": 2.840160227419563, "learning_rate": 8.406397466008748e-07, "loss": 4.4996, "step": 65940 }, { "epoch": 2.8410216651591504, "learning_rate": 8.405912646253959e-07, "loss": 4.5238, "step": 65960 }, { "epoch": 2.841883102898738, "learning_rate": 8.40542782649917e-07, "loss": 4.374, "step": 65980 }, { "epoch": 2.8427445406383254, "learning_rate": 8.404943006744381e-07, "loss": 4.3341, "step": 66000 }, { "epoch": 2.8436059783779126, "learning_rate": 8.404458186989593e-07, "loss": 4.2999, "step": 66020 }, { "epoch": 2.8444674161175003, "learning_rate": 8.403973367234803e-07, "loss": 4.0023, "step": 66040 }, { "epoch": 2.8453288538570876, "learning_rate": 8.403488547480014e-07, "loss": 4.3215, "step": 66060 }, { "epoch": 2.846190291596675, "learning_rate": 8.403003727725225e-07, "loss": 4.4189, "step": 66080 }, { "epoch": 2.847051729336262, "learning_rate": 8.402518907970436e-07, "loss": 4.0156, "step": 66100 }, { "epoch": 2.8479131670758497, "learning_rate": 8.402034088215648e-07, "loss": 3.8661, "step": 66120 }, { "epoch": 2.848774604815437, "learning_rate": 8.401549268460858e-07, "loss": 4.1853, "step": 66140 }, { "epoch": 2.8496360425550242, "learning_rate": 8.40106444870607e-07, "loss": 4.0573, "step": 66160 }, { "epoch": 2.850497480294612, "learning_rate": 8.40057962895128e-07, "loss": 4.1377, "step": 66180 }, { "epoch": 2.851358918034199, "learning_rate": 8.400094809196492e-07, "loss": 4.17, "step": 66200 }, { "epoch": 2.8522203557737864, "learning_rate": 8.399609989441702e-07, "loss": 4.1114, "step": 66220 }, { "epoch": 2.8530817935133737, "learning_rate": 8.399125169686913e-07, "loss": 4.0018, "step": 66240 }, { "epoch": 2.8539432312529613, "learning_rate": 8.398640349932125e-07, "loss": 4.0871, "step": 66260 }, { "epoch": 2.8548046689925486, "learning_rate": 8.398155530177337e-07, "loss": 4.1926, "step": 66280 }, { "epoch": 2.855666106732136, "learning_rate": 8.397670710422546e-07, "loss": 4.2544, "step": 66300 }, { "epoch": 2.8565275444717235, "learning_rate": 8.397185890667758e-07, "loss": 4.093, "step": 66320 }, { "epoch": 2.8573889822113108, "learning_rate": 8.396701070912969e-07, "loss": 4.1454, "step": 66340 }, { "epoch": 2.858250419950898, "learning_rate": 8.39621625115818e-07, "loss": 4.237, "step": 66360 }, { "epoch": 2.8591118576904853, "learning_rate": 8.395731431403391e-07, "loss": 4.1079, "step": 66380 }, { "epoch": 2.859973295430073, "learning_rate": 8.395246611648603e-07, "loss": 4.1858, "step": 66400 }, { "epoch": 2.86083473316966, "learning_rate": 8.394761791893814e-07, "loss": 4.2199, "step": 66420 }, { "epoch": 2.8616961709092474, "learning_rate": 8.394276972139023e-07, "loss": 4.1503, "step": 66440 }, { "epoch": 2.862557608648835, "learning_rate": 8.393792152384235e-07, "loss": 3.92, "step": 66460 }, { "epoch": 2.8634190463884224, "learning_rate": 8.393307332629447e-07, "loss": 4.0939, "step": 66480 }, { "epoch": 2.8642804841280096, "learning_rate": 8.392822512874658e-07, "loss": 4.3083, "step": 66500 }, { "epoch": 2.865141921867597, "learning_rate": 8.392337693119868e-07, "loss": 4.5096, "step": 66520 }, { "epoch": 2.8660033596071846, "learning_rate": 8.39185287336508e-07, "loss": 4.1628, "step": 66540 }, { "epoch": 2.866864797346772, "learning_rate": 8.391368053610291e-07, "loss": 4.1477, "step": 66560 }, { "epoch": 2.867726235086359, "learning_rate": 8.390883233855502e-07, "loss": 3.9537, "step": 66580 }, { "epoch": 2.8685876728259467, "learning_rate": 8.390398414100712e-07, "loss": 4.0073, "step": 66600 }, { "epoch": 2.869449110565534, "learning_rate": 8.389913594345924e-07, "loss": 4.1379, "step": 66620 }, { "epoch": 2.8703105483051212, "learning_rate": 8.389428774591135e-07, "loss": 4.5399, "step": 66640 }, { "epoch": 2.8711719860447085, "learning_rate": 8.388943954836347e-07, "loss": 4.172, "step": 66660 }, { "epoch": 2.8720334237842957, "learning_rate": 8.388459135081556e-07, "loss": 4.1137, "step": 66680 }, { "epoch": 2.8728948615238834, "learning_rate": 8.387974315326768e-07, "loss": 4.3093, "step": 66700 }, { "epoch": 2.8737562992634706, "learning_rate": 8.387489495571979e-07, "loss": 4.2004, "step": 66720 }, { "epoch": 2.8746177370030583, "learning_rate": 8.387004675817191e-07, "loss": 4.2117, "step": 66740 }, { "epoch": 2.8754791747426456, "learning_rate": 8.386519856062401e-07, "loss": 3.9785, "step": 66760 }, { "epoch": 2.876340612482233, "learning_rate": 8.386035036307613e-07, "loss": 4.2004, "step": 66780 }, { "epoch": 2.87720205022182, "learning_rate": 8.385550216552824e-07, "loss": 4.3149, "step": 66800 }, { "epoch": 2.8780634879614073, "learning_rate": 8.385065396798034e-07, "loss": 4.0467, "step": 66820 }, { "epoch": 2.878924925700995, "learning_rate": 8.384580577043245e-07, "loss": 4.2308, "step": 66840 }, { "epoch": 2.8797863634405823, "learning_rate": 8.384095757288457e-07, "loss": 4.2297, "step": 66860 }, { "epoch": 2.88064780118017, "learning_rate": 8.383610937533668e-07, "loss": 4.1531, "step": 66880 }, { "epoch": 2.881509238919757, "learning_rate": 8.383126117778878e-07, "loss": 4.127, "step": 66900 }, { "epoch": 2.8823706766593444, "learning_rate": 8.38264129802409e-07, "loss": 4.2216, "step": 66920 }, { "epoch": 2.8832321143989317, "learning_rate": 8.382156478269301e-07, "loss": 4.1503, "step": 66940 }, { "epoch": 2.884093552138519, "learning_rate": 8.381671658514512e-07, "loss": 4.1333, "step": 66960 }, { "epoch": 2.8849549898781066, "learning_rate": 8.381186838759722e-07, "loss": 4.2919, "step": 66980 }, { "epoch": 2.885816427617694, "learning_rate": 8.380702019004934e-07, "loss": 4.2744, "step": 67000 }, { "epoch": 2.8866778653572815, "learning_rate": 8.380217199250146e-07, "loss": 4.3332, "step": 67020 }, { "epoch": 2.887539303096869, "learning_rate": 8.379732379495357e-07, "loss": 4.237, "step": 67040 }, { "epoch": 2.888400740836456, "learning_rate": 8.379247559740567e-07, "loss": 4.0655, "step": 67060 }, { "epoch": 2.8892621785760433, "learning_rate": 8.378762739985777e-07, "loss": 4.3082, "step": 67080 }, { "epoch": 2.8901236163156305, "learning_rate": 8.37827792023099e-07, "loss": 4.2381, "step": 67100 }, { "epoch": 2.890985054055218, "learning_rate": 8.3777931004762e-07, "loss": 4.0887, "step": 67120 }, { "epoch": 2.8918464917948055, "learning_rate": 8.377308280721411e-07, "loss": 4.1576, "step": 67140 }, { "epoch": 2.892707929534393, "learning_rate": 8.376823460966623e-07, "loss": 4.1743, "step": 67160 }, { "epoch": 2.8935693672739804, "learning_rate": 8.376338641211834e-07, "loss": 4.2787, "step": 67180 }, { "epoch": 2.8944308050135676, "learning_rate": 8.375853821457044e-07, "loss": 4.2371, "step": 67200 }, { "epoch": 2.895292242753155, "learning_rate": 8.375369001702255e-07, "loss": 4.2381, "step": 67220 }, { "epoch": 2.896153680492742, "learning_rate": 8.374884181947467e-07, "loss": 4.3043, "step": 67240 }, { "epoch": 2.89701511823233, "learning_rate": 8.374399362192678e-07, "loss": 4.1704, "step": 67260 }, { "epoch": 2.897876555971917, "learning_rate": 8.373914542437889e-07, "loss": 4.0632, "step": 67280 }, { "epoch": 2.8987379937115048, "learning_rate": 8.3734297226831e-07, "loss": 4.035, "step": 67300 }, { "epoch": 2.899599431451092, "learning_rate": 8.372944902928311e-07, "loss": 4.2782, "step": 67320 }, { "epoch": 2.9004608691906792, "learning_rate": 8.372460083173522e-07, "loss": 4.1251, "step": 67340 }, { "epoch": 2.9013223069302665, "learning_rate": 8.371975263418733e-07, "loss": 4.3405, "step": 67360 }, { "epoch": 2.9021837446698537, "learning_rate": 8.371490443663944e-07, "loss": 4.1674, "step": 67380 }, { "epoch": 2.9030451824094414, "learning_rate": 8.371005623909156e-07, "loss": 4.2904, "step": 67400 }, { "epoch": 2.9039066201490287, "learning_rate": 8.370520804154366e-07, "loss": 4.2848, "step": 67420 }, { "epoch": 2.9047680578886164, "learning_rate": 8.370035984399577e-07, "loss": 4.369, "step": 67440 }, { "epoch": 2.9056294956282036, "learning_rate": 8.369551164644788e-07, "loss": 4.2133, "step": 67460 }, { "epoch": 2.906490933367791, "learning_rate": 8.36906634489e-07, "loss": 4.4579, "step": 67480 }, { "epoch": 2.907352371107378, "learning_rate": 8.368581525135209e-07, "loss": 4.0013, "step": 67500 }, { "epoch": 2.9082138088469653, "learning_rate": 8.368096705380421e-07, "loss": 4.0972, "step": 67520 }, { "epoch": 2.909075246586553, "learning_rate": 8.367611885625633e-07, "loss": 4.4095, "step": 67540 }, { "epoch": 2.9099366843261403, "learning_rate": 8.367127065870845e-07, "loss": 4.2958, "step": 67560 }, { "epoch": 2.910798122065728, "learning_rate": 8.366642246116054e-07, "loss": 4.249, "step": 67580 }, { "epoch": 2.911659559805315, "learning_rate": 8.366157426361265e-07, "loss": 4.1099, "step": 67600 }, { "epoch": 2.9125209975449025, "learning_rate": 8.365672606606477e-07, "loss": 4.3728, "step": 67620 }, { "epoch": 2.9133824352844897, "learning_rate": 8.365187786851689e-07, "loss": 4.1975, "step": 67640 }, { "epoch": 2.914243873024077, "learning_rate": 8.364702967096899e-07, "loss": 4.329, "step": 67660 }, { "epoch": 2.9151053107636646, "learning_rate": 8.36421814734211e-07, "loss": 4.2475, "step": 67680 }, { "epoch": 2.915966748503252, "learning_rate": 8.363733327587321e-07, "loss": 4.1461, "step": 67700 }, { "epoch": 2.9168281862428396, "learning_rate": 8.363248507832531e-07, "loss": 4.0494, "step": 67720 }, { "epoch": 2.917689623982427, "learning_rate": 8.362763688077743e-07, "loss": 4.1619, "step": 67740 }, { "epoch": 2.918551061722014, "learning_rate": 8.362278868322954e-07, "loss": 4.1775, "step": 67760 }, { "epoch": 2.9194124994616013, "learning_rate": 8.361794048568166e-07, "loss": 4.0533, "step": 67780 }, { "epoch": 2.9202739372011886, "learning_rate": 8.361309228813376e-07, "loss": 4.1892, "step": 67800 }, { "epoch": 2.9211353749407762, "learning_rate": 8.360824409058587e-07, "loss": 4.1913, "step": 67820 }, { "epoch": 2.9219968126803635, "learning_rate": 8.360339589303798e-07, "loss": 4.0711, "step": 67840 }, { "epoch": 2.9228582504199507, "learning_rate": 8.35985476954901e-07, "loss": 4.2778, "step": 67860 }, { "epoch": 2.9237196881595384, "learning_rate": 8.35936994979422e-07, "loss": 4.3031, "step": 67880 }, { "epoch": 2.9245811258991257, "learning_rate": 8.358885130039432e-07, "loss": 4.032, "step": 67900 }, { "epoch": 2.925442563638713, "learning_rate": 8.358400310284643e-07, "loss": 4.1971, "step": 67920 }, { "epoch": 2.9263040013783, "learning_rate": 8.357915490529855e-07, "loss": 4.133, "step": 67940 }, { "epoch": 2.927165439117888, "learning_rate": 8.357430670775064e-07, "loss": 4.1434, "step": 67960 }, { "epoch": 2.928026876857475, "learning_rate": 8.356945851020275e-07, "loss": 4.1361, "step": 67980 }, { "epoch": 2.9288883145970623, "learning_rate": 8.356461031265487e-07, "loss": 4.1048, "step": 68000 }, { "epoch": 2.92974975233665, "learning_rate": 8.355976211510697e-07, "loss": 4.108, "step": 68020 }, { "epoch": 2.9306111900762373, "learning_rate": 8.355491391755909e-07, "loss": 4.3268, "step": 68040 }, { "epoch": 2.9314726278158245, "learning_rate": 8.35500657200112e-07, "loss": 4.0361, "step": 68060 }, { "epoch": 2.9323340655554118, "learning_rate": 8.354521752246331e-07, "loss": 3.9429, "step": 68080 }, { "epoch": 2.9331955032949995, "learning_rate": 8.354036932491542e-07, "loss": 4.4815, "step": 68100 }, { "epoch": 2.9340569410345867, "learning_rate": 8.353552112736753e-07, "loss": 3.9123, "step": 68120 }, { "epoch": 2.934918378774174, "learning_rate": 8.353067292981964e-07, "loss": 4.309, "step": 68140 }, { "epoch": 2.9357798165137616, "learning_rate": 8.352582473227176e-07, "loss": 4.1525, "step": 68160 }, { "epoch": 2.936641254253349, "learning_rate": 8.352097653472387e-07, "loss": 4.2099, "step": 68180 }, { "epoch": 2.937502691992936, "learning_rate": 8.351612833717598e-07, "loss": 4.2891, "step": 68200 }, { "epoch": 2.9383641297325234, "learning_rate": 8.351128013962808e-07, "loss": 3.9832, "step": 68220 }, { "epoch": 2.939225567472111, "learning_rate": 8.35064319420802e-07, "loss": 4.2352, "step": 68240 }, { "epoch": 2.9400870052116983, "learning_rate": 8.35015837445323e-07, "loss": 4.1817, "step": 68260 }, { "epoch": 2.9409484429512855, "learning_rate": 8.349673554698442e-07, "loss": 4.0247, "step": 68280 }, { "epoch": 2.9418098806908732, "learning_rate": 8.349188734943653e-07, "loss": 4.0533, "step": 68300 }, { "epoch": 2.9426713184304605, "learning_rate": 8.348703915188865e-07, "loss": 4.1722, "step": 68320 }, { "epoch": 2.9435327561700477, "learning_rate": 8.348219095434073e-07, "loss": 4.1471, "step": 68340 }, { "epoch": 2.944394193909635, "learning_rate": 8.347734275679286e-07, "loss": 4.1911, "step": 68360 }, { "epoch": 2.9452556316492227, "learning_rate": 8.347249455924497e-07, "loss": 4.0995, "step": 68380 }, { "epoch": 2.94611706938881, "learning_rate": 8.346764636169708e-07, "loss": 4.1429, "step": 68400 }, { "epoch": 2.946978507128397, "learning_rate": 8.346279816414919e-07, "loss": 4.2546, "step": 68420 }, { "epoch": 2.947839944867985, "learning_rate": 8.345794996660131e-07, "loss": 4.003, "step": 68440 }, { "epoch": 2.948701382607572, "learning_rate": 8.345310176905341e-07, "loss": 4.0601, "step": 68460 }, { "epoch": 2.9495628203471593, "learning_rate": 8.344825357150552e-07, "loss": 4.2808, "step": 68480 }, { "epoch": 2.9504242580867466, "learning_rate": 8.344340537395763e-07, "loss": 4.3243, "step": 68500 }, { "epoch": 2.9512856958263343, "learning_rate": 8.343855717640974e-07, "loss": 4.1212, "step": 68520 }, { "epoch": 2.9521471335659215, "learning_rate": 8.343370897886186e-07, "loss": 4.0958, "step": 68540 }, { "epoch": 2.9530085713055088, "learning_rate": 8.342886078131397e-07, "loss": 4.3868, "step": 68560 }, { "epoch": 2.9538700090450964, "learning_rate": 8.342401258376608e-07, "loss": 4.2354, "step": 68580 }, { "epoch": 2.9547314467846837, "learning_rate": 8.341916438621818e-07, "loss": 4.3807, "step": 68600 }, { "epoch": 2.955592884524271, "learning_rate": 8.34143161886703e-07, "loss": 4.1667, "step": 68620 }, { "epoch": 2.956454322263858, "learning_rate": 8.340946799112241e-07, "loss": 4.0863, "step": 68640 }, { "epoch": 2.957315760003446, "learning_rate": 8.340461979357452e-07, "loss": 4.1569, "step": 68660 }, { "epoch": 2.958177197743033, "learning_rate": 8.339977159602663e-07, "loss": 4.1465, "step": 68680 }, { "epoch": 2.9590386354826204, "learning_rate": 8.339492339847874e-07, "loss": 4.0999, "step": 68700 }, { "epoch": 2.959900073222208, "learning_rate": 8.339007520093085e-07, "loss": 4.0144, "step": 68720 }, { "epoch": 2.9607615109617953, "learning_rate": 8.338522700338296e-07, "loss": 3.8672, "step": 68740 }, { "epoch": 2.9616229487013825, "learning_rate": 8.338037880583507e-07, "loss": 4.0771, "step": 68760 }, { "epoch": 2.96248438644097, "learning_rate": 8.337553060828718e-07, "loss": 4.144, "step": 68780 }, { "epoch": 2.9633458241805575, "learning_rate": 8.337068241073929e-07, "loss": 3.9042, "step": 68800 }, { "epoch": 2.9642072619201447, "learning_rate": 8.336583421319141e-07, "loss": 4.2654, "step": 68820 }, { "epoch": 2.965068699659732, "learning_rate": 8.336098601564352e-07, "loss": 4.2632, "step": 68840 }, { "epoch": 2.9659301373993197, "learning_rate": 8.335613781809561e-07, "loss": 4.1627, "step": 68860 }, { "epoch": 2.966791575138907, "learning_rate": 8.335128962054773e-07, "loss": 4.2268, "step": 68880 }, { "epoch": 2.967653012878494, "learning_rate": 8.334644142299985e-07, "loss": 4.0903, "step": 68900 }, { "epoch": 2.9685144506180814, "learning_rate": 8.334159322545196e-07, "loss": 4.4831, "step": 68920 }, { "epoch": 2.969375888357669, "learning_rate": 8.333674502790407e-07, "loss": 4.0449, "step": 68940 }, { "epoch": 2.9702373260972563, "learning_rate": 8.333189683035618e-07, "loss": 4.0883, "step": 68960 }, { "epoch": 2.9710987638368436, "learning_rate": 8.332704863280829e-07, "loss": 4.1052, "step": 68980 }, { "epoch": 2.9719602015764313, "learning_rate": 8.332220043526039e-07, "loss": 4.0703, "step": 69000 }, { "epoch": 2.9728216393160185, "learning_rate": 8.331735223771251e-07, "loss": 4.2334, "step": 69020 }, { "epoch": 2.9736830770556058, "learning_rate": 8.331250404016462e-07, "loss": 4.1308, "step": 69040 }, { "epoch": 2.974544514795193, "learning_rate": 8.330765584261673e-07, "loss": 4.23, "step": 69060 }, { "epoch": 2.9754059525347807, "learning_rate": 8.330280764506884e-07, "loss": 3.9961, "step": 69080 }, { "epoch": 2.976267390274368, "learning_rate": 8.329795944752095e-07, "loss": 4.2471, "step": 69100 }, { "epoch": 2.977128828013955, "learning_rate": 8.329311124997306e-07, "loss": 4.2375, "step": 69120 }, { "epoch": 2.977990265753543, "learning_rate": 8.328826305242517e-07, "loss": 4.2749, "step": 69140 }, { "epoch": 2.97885170349313, "learning_rate": 8.328341485487728e-07, "loss": 4.0304, "step": 69160 }, { "epoch": 2.9797131412327174, "learning_rate": 8.32785666573294e-07, "loss": 4.1121, "step": 69180 }, { "epoch": 2.9805745789723046, "learning_rate": 8.327371845978151e-07, "loss": 4.1717, "step": 69200 }, { "epoch": 2.9814360167118923, "learning_rate": 8.326887026223362e-07, "loss": 3.996, "step": 69220 }, { "epoch": 2.9822974544514795, "learning_rate": 8.326402206468572e-07, "loss": 4.3253, "step": 69240 }, { "epoch": 2.983158892191067, "learning_rate": 8.325917386713784e-07, "loss": 4.0697, "step": 69260 }, { "epoch": 2.9840203299306545, "learning_rate": 8.325432566958994e-07, "loss": 4.4012, "step": 69280 }, { "epoch": 2.9848817676702417, "learning_rate": 8.324947747204205e-07, "loss": 3.992, "step": 69300 }, { "epoch": 2.985743205409829, "learning_rate": 8.324462927449417e-07, "loss": 4.241, "step": 69320 }, { "epoch": 2.986604643149416, "learning_rate": 8.323978107694628e-07, "loss": 4.3471, "step": 69340 }, { "epoch": 2.987466080889004, "learning_rate": 8.323493287939839e-07, "loss": 4.1748, "step": 69360 }, { "epoch": 2.988327518628591, "learning_rate": 8.323008468185049e-07, "loss": 4.332, "step": 69380 }, { "epoch": 2.9891889563681784, "learning_rate": 8.322523648430261e-07, "loss": 3.9955, "step": 69400 }, { "epoch": 2.990050394107766, "learning_rate": 8.322038828675472e-07, "loss": 4.2962, "step": 69420 }, { "epoch": 2.9909118318473533, "learning_rate": 8.321554008920684e-07, "loss": 4.0723, "step": 69440 }, { "epoch": 2.9917732695869406, "learning_rate": 8.321069189165894e-07, "loss": 3.9664, "step": 69460 }, { "epoch": 2.992634707326528, "learning_rate": 8.320584369411105e-07, "loss": 4.2113, "step": 69480 }, { "epoch": 2.9934961450661155, "learning_rate": 8.320099549656316e-07, "loss": 3.9184, "step": 69500 }, { "epoch": 2.9943575828057027, "learning_rate": 8.319614729901528e-07, "loss": 4.489, "step": 69520 }, { "epoch": 2.99521902054529, "learning_rate": 8.319129910146738e-07, "loss": 4.3433, "step": 69540 }, { "epoch": 2.9960804582848777, "learning_rate": 8.31864509039195e-07, "loss": 4.1775, "step": 69560 }, { "epoch": 2.996941896024465, "learning_rate": 8.318160270637161e-07, "loss": 4.1645, "step": 69580 }, { "epoch": 2.997803333764052, "learning_rate": 8.317675450882371e-07, "loss": 4.0936, "step": 69600 }, { "epoch": 2.9986647715036394, "learning_rate": 8.317190631127582e-07, "loss": 4.243, "step": 69620 }, { "epoch": 2.999526209243227, "learning_rate": 8.316705811372794e-07, "loss": 4.3322, "step": 69640 }, { "epoch": 3.0003876469828143, "learning_rate": 8.316220991618005e-07, "loss": 4.1925, "step": 69660 }, { "epoch": 3.0012490847224016, "learning_rate": 8.315736171863215e-07, "loss": 4.1922, "step": 69680 }, { "epoch": 3.002110522461989, "learning_rate": 8.315251352108427e-07, "loss": 4.2554, "step": 69700 }, { "epoch": 3.0029719602015765, "learning_rate": 8.314766532353639e-07, "loss": 4.001, "step": 69720 }, { "epoch": 3.0038333979411638, "learning_rate": 8.314281712598849e-07, "loss": 4.1319, "step": 69740 }, { "epoch": 3.004694835680751, "learning_rate": 8.313796892844059e-07, "loss": 4.1174, "step": 69760 }, { "epoch": 3.0055562734203387, "learning_rate": 8.313312073089271e-07, "loss": 4.0008, "step": 69780 }, { "epoch": 3.006417711159926, "learning_rate": 8.312827253334482e-07, "loss": 4.1544, "step": 69800 }, { "epoch": 3.007279148899513, "learning_rate": 8.312342433579694e-07, "loss": 4.3267, "step": 69820 }, { "epoch": 3.0081405866391004, "learning_rate": 8.311857613824904e-07, "loss": 4.0394, "step": 69840 }, { "epoch": 3.009002024378688, "learning_rate": 8.311372794070115e-07, "loss": 4.3174, "step": 69860 }, { "epoch": 3.0098634621182754, "learning_rate": 8.310887974315326e-07, "loss": 4.0971, "step": 69880 }, { "epoch": 3.0107248998578626, "learning_rate": 8.310403154560537e-07, "loss": 4.0695, "step": 69900 }, { "epoch": 3.0115863375974503, "learning_rate": 8.309918334805748e-07, "loss": 4.3721, "step": 69920 }, { "epoch": 3.0124477753370376, "learning_rate": 8.30943351505096e-07, "loss": 4.2357, "step": 69940 }, { "epoch": 3.013309213076625, "learning_rate": 8.308948695296171e-07, "loss": 4.1923, "step": 69960 }, { "epoch": 3.014170650816212, "learning_rate": 8.308463875541382e-07, "loss": 4.2256, "step": 69980 }, { "epoch": 3.0150320885557997, "learning_rate": 8.307979055786592e-07, "loss": 4.2484, "step": 70000 }, { "epoch": 3.015893526295387, "learning_rate": 8.307494236031804e-07, "loss": 4.1018, "step": 70020 }, { "epoch": 3.0167549640349742, "learning_rate": 8.307009416277015e-07, "loss": 4.3796, "step": 70040 }, { "epoch": 3.017616401774562, "learning_rate": 8.306524596522226e-07, "loss": 4.2395, "step": 70060 }, { "epoch": 3.018477839514149, "learning_rate": 8.306039776767437e-07, "loss": 3.9913, "step": 70080 }, { "epoch": 3.0193392772537364, "learning_rate": 8.305554957012649e-07, "loss": 4.1971, "step": 70100 }, { "epoch": 3.0202007149933237, "learning_rate": 8.305070137257857e-07, "loss": 4.1074, "step": 70120 }, { "epoch": 3.0210621527329113, "learning_rate": 8.304585317503069e-07, "loss": 4.2932, "step": 70140 }, { "epoch": 3.0219235904724986, "learning_rate": 8.304100497748281e-07, "loss": 4.269, "step": 70160 }, { "epoch": 3.022785028212086, "learning_rate": 8.303615677993493e-07, "loss": 4.1816, "step": 70180 }, { "epoch": 3.0236464659516735, "learning_rate": 8.303130858238704e-07, "loss": 4.2207, "step": 70200 }, { "epoch": 3.0245079036912608, "learning_rate": 8.302646038483915e-07, "loss": 4.1439, "step": 70220 }, { "epoch": 3.025369341430848, "learning_rate": 8.302161218729125e-07, "loss": 3.9728, "step": 70240 }, { "epoch": 3.0262307791704353, "learning_rate": 8.301676398974337e-07, "loss": 3.9491, "step": 70260 }, { "epoch": 3.027092216910023, "learning_rate": 8.301191579219547e-07, "loss": 4.1278, "step": 70280 }, { "epoch": 3.02795365464961, "learning_rate": 8.300706759464758e-07, "loss": 4.1904, "step": 70300 }, { "epoch": 3.0288150923891974, "learning_rate": 8.30022193970997e-07, "loss": 3.9599, "step": 70320 }, { "epoch": 3.029676530128785, "learning_rate": 8.299737119955182e-07, "loss": 4.3606, "step": 70340 }, { "epoch": 3.0305379678683724, "learning_rate": 8.299252300200392e-07, "loss": 4.0411, "step": 70360 }, { "epoch": 3.0313994056079596, "learning_rate": 8.298767480445602e-07, "loss": 4.1926, "step": 70380 }, { "epoch": 3.032260843347547, "learning_rate": 8.298282660690814e-07, "loss": 4.1736, "step": 70400 }, { "epoch": 3.0331222810871346, "learning_rate": 8.297797840936025e-07, "loss": 4.1216, "step": 70420 }, { "epoch": 3.033983718826722, "learning_rate": 8.297313021181236e-07, "loss": 4.0971, "step": 70440 }, { "epoch": 3.034845156566309, "learning_rate": 8.296828201426447e-07, "loss": 4.0617, "step": 70460 }, { "epoch": 3.0357065943058967, "learning_rate": 8.296343381671659e-07, "loss": 3.9797, "step": 70480 }, { "epoch": 3.036568032045484, "learning_rate": 8.295858561916868e-07, "loss": 4.2322, "step": 70500 }, { "epoch": 3.0374294697850712, "learning_rate": 8.29537374216208e-07, "loss": 4.1371, "step": 70520 }, { "epoch": 3.0382909075246585, "learning_rate": 8.294888922407291e-07, "loss": 4.0835, "step": 70540 }, { "epoch": 3.039152345264246, "learning_rate": 8.294404102652503e-07, "loss": 4.0377, "step": 70560 }, { "epoch": 3.0400137830038334, "learning_rate": 8.293919282897713e-07, "loss": 4.1484, "step": 70580 }, { "epoch": 3.0408752207434206, "learning_rate": 8.293434463142925e-07, "loss": 4.0844, "step": 70600 }, { "epoch": 3.0417366584830083, "learning_rate": 8.292949643388136e-07, "loss": 4.1345, "step": 70620 }, { "epoch": 3.0425980962225956, "learning_rate": 8.292464823633346e-07, "loss": 4.1762, "step": 70640 }, { "epoch": 3.043459533962183, "learning_rate": 8.291980003878557e-07, "loss": 4.1371, "step": 70660 }, { "epoch": 3.04432097170177, "learning_rate": 8.291495184123768e-07, "loss": 4.0444, "step": 70680 }, { "epoch": 3.0451824094413578, "learning_rate": 8.29101036436898e-07, "loss": 4.0261, "step": 70700 }, { "epoch": 3.046043847180945, "learning_rate": 8.290525544614192e-07, "loss": 4.31, "step": 70720 }, { "epoch": 3.0469052849205323, "learning_rate": 8.290040724859403e-07, "loss": 4.0623, "step": 70740 }, { "epoch": 3.04776672266012, "learning_rate": 8.289555905104612e-07, "loss": 4.2966, "step": 70760 }, { "epoch": 3.048628160399707, "learning_rate": 8.289071085349824e-07, "loss": 4.2703, "step": 70780 }, { "epoch": 3.0494895981392944, "learning_rate": 8.288586265595036e-07, "loss": 4.2118, "step": 70800 }, { "epoch": 3.0503510358788817, "learning_rate": 8.288101445840246e-07, "loss": 4.1231, "step": 70820 }, { "epoch": 3.0512124736184694, "learning_rate": 8.287616626085457e-07, "loss": 4.0968, "step": 70840 }, { "epoch": 3.0520739113580566, "learning_rate": 8.287131806330669e-07, "loss": 4.1668, "step": 70860 }, { "epoch": 3.052935349097644, "learning_rate": 8.286646986575879e-07, "loss": 4.217, "step": 70880 }, { "epoch": 3.0537967868372315, "learning_rate": 8.28616216682109e-07, "loss": 4.3483, "step": 70900 }, { "epoch": 3.054658224576819, "learning_rate": 8.285677347066301e-07, "loss": 3.878, "step": 70920 }, { "epoch": 3.055519662316406, "learning_rate": 8.285192527311513e-07, "loss": 4.1455, "step": 70940 }, { "epoch": 3.0563811000559933, "learning_rate": 8.284707707556723e-07, "loss": 4.2554, "step": 70960 }, { "epoch": 3.057242537795581, "learning_rate": 8.284222887801935e-07, "loss": 3.9374, "step": 70980 }, { "epoch": 3.058103975535168, "learning_rate": 8.283738068047146e-07, "loss": 4.2194, "step": 71000 }, { "epoch": 3.0589654132747555, "learning_rate": 8.283253248292357e-07, "loss": 4.1952, "step": 71020 }, { "epoch": 3.059826851014343, "learning_rate": 8.282768428537567e-07, "loss": 4.0305, "step": 71040 }, { "epoch": 3.0606882887539304, "learning_rate": 8.282283608782779e-07, "loss": 4.0013, "step": 71060 }, { "epoch": 3.0615497264935176, "learning_rate": 8.28179878902799e-07, "loss": 4.3159, "step": 71080 }, { "epoch": 3.062411164233105, "learning_rate": 8.281313969273202e-07, "loss": 4.2653, "step": 71100 }, { "epoch": 3.0632726019726926, "learning_rate": 8.280829149518412e-07, "loss": 4.2117, "step": 71120 }, { "epoch": 3.06413403971228, "learning_rate": 8.280344329763623e-07, "loss": 3.9771, "step": 71140 }, { "epoch": 3.064995477451867, "learning_rate": 8.279859510008834e-07, "loss": 4.323, "step": 71160 }, { "epoch": 3.0658569151914543, "learning_rate": 8.279374690254045e-07, "loss": 4.0561, "step": 71180 }, { "epoch": 3.066718352931042, "learning_rate": 8.278889870499256e-07, "loss": 4.0266, "step": 71200 }, { "epoch": 3.0675797906706292, "learning_rate": 8.278405050744467e-07, "loss": 4.2683, "step": 71220 }, { "epoch": 3.0684412284102165, "learning_rate": 8.277920230989679e-07, "loss": 4.1258, "step": 71240 }, { "epoch": 3.069302666149804, "learning_rate": 8.277435411234889e-07, "loss": 4.164, "step": 71260 }, { "epoch": 3.0701641038893914, "learning_rate": 8.2769505914801e-07, "loss": 4.1158, "step": 71280 }, { "epoch": 3.0710255416289787, "learning_rate": 8.276465771725311e-07, "loss": 4.2193, "step": 71300 }, { "epoch": 3.071886979368566, "learning_rate": 8.275980951970523e-07, "loss": 3.9963, "step": 71320 }, { "epoch": 3.0727484171081536, "learning_rate": 8.275496132215734e-07, "loss": 4.2502, "step": 71340 }, { "epoch": 3.073609854847741, "learning_rate": 8.275011312460945e-07, "loss": 4.0821, "step": 71360 }, { "epoch": 3.074471292587328, "learning_rate": 8.274526492706156e-07, "loss": 4.2623, "step": 71380 }, { "epoch": 3.075332730326916, "learning_rate": 8.274041672951367e-07, "loss": 4.1051, "step": 71400 }, { "epoch": 3.076194168066503, "learning_rate": 8.273556853196578e-07, "loss": 4.1241, "step": 71420 }, { "epoch": 3.0770556058060903, "learning_rate": 8.273072033441789e-07, "loss": 4.2241, "step": 71440 }, { "epoch": 3.0779170435456775, "learning_rate": 8.272587213687e-07, "loss": 4.1199, "step": 71460 }, { "epoch": 3.078778481285265, "learning_rate": 8.272102393932211e-07, "loss": 4.0423, "step": 71480 }, { "epoch": 3.0796399190248525, "learning_rate": 8.271617574177422e-07, "loss": 4.0132, "step": 71500 }, { "epoch": 3.0805013567644397, "learning_rate": 8.271132754422633e-07, "loss": 4.1384, "step": 71520 }, { "epoch": 3.0813627945040274, "learning_rate": 8.270647934667844e-07, "loss": 4.0186, "step": 71540 }, { "epoch": 3.0822242322436146, "learning_rate": 8.270163114913055e-07, "loss": 4.2077, "step": 71560 }, { "epoch": 3.083085669983202, "learning_rate": 8.269678295158266e-07, "loss": 4.304, "step": 71580 }, { "epoch": 3.083947107722789, "learning_rate": 8.269193475403478e-07, "loss": 4.227, "step": 71600 }, { "epoch": 3.084808545462377, "learning_rate": 8.268708655648689e-07, "loss": 4.2034, "step": 71620 }, { "epoch": 3.085669983201964, "learning_rate": 8.268223835893899e-07, "loss": 4.1402, "step": 71640 }, { "epoch": 3.0865314209415513, "learning_rate": 8.26773901613911e-07, "loss": 4.2338, "step": 71660 }, { "epoch": 3.087392858681139, "learning_rate": 8.267254196384322e-07, "loss": 4.1142, "step": 71680 }, { "epoch": 3.0882542964207262, "learning_rate": 8.266769376629533e-07, "loss": 4.2268, "step": 71700 }, { "epoch": 3.0891157341603135, "learning_rate": 8.266284556874744e-07, "loss": 4.2228, "step": 71720 }, { "epoch": 3.0899771718999007, "learning_rate": 8.265799737119955e-07, "loss": 4.3599, "step": 71740 }, { "epoch": 3.0908386096394884, "learning_rate": 8.265314917365166e-07, "loss": 4.185, "step": 71760 }, { "epoch": 3.0917000473790757, "learning_rate": 8.264830097610376e-07, "loss": 4.0645, "step": 71780 }, { "epoch": 3.092561485118663, "learning_rate": 8.264345277855588e-07, "loss": 4.1716, "step": 71800 }, { "epoch": 3.0934229228582506, "learning_rate": 8.263860458100799e-07, "loss": 4.3095, "step": 71820 }, { "epoch": 3.094284360597838, "learning_rate": 8.26337563834601e-07, "loss": 4.1156, "step": 71840 }, { "epoch": 3.095145798337425, "learning_rate": 8.262890818591221e-07, "loss": 4.3397, "step": 71860 }, { "epoch": 3.0960072360770123, "learning_rate": 8.262405998836433e-07, "loss": 4.1411, "step": 71880 }, { "epoch": 3.0968686738166, "learning_rate": 8.261921179081642e-07, "loss": 4.4665, "step": 71900 }, { "epoch": 3.0977301115561873, "learning_rate": 8.261436359326854e-07, "loss": 4.3184, "step": 71920 }, { "epoch": 3.0985915492957745, "learning_rate": 8.260951539572065e-07, "loss": 4.1395, "step": 71940 }, { "epoch": 3.099452987035362, "learning_rate": 8.260466719817277e-07, "loss": 4.1781, "step": 71960 }, { "epoch": 3.1003144247749495, "learning_rate": 8.259981900062488e-07, "loss": 4.1504, "step": 71980 }, { "epoch": 3.1011758625145367, "learning_rate": 8.2594970803077e-07, "loss": 4.1366, "step": 72000 }, { "epoch": 3.102037300254124, "learning_rate": 8.259012260552909e-07, "loss": 3.9553, "step": 72020 }, { "epoch": 3.1028987379937116, "learning_rate": 8.25852744079812e-07, "loss": 4.1866, "step": 72040 }, { "epoch": 3.103760175733299, "learning_rate": 8.258042621043332e-07, "loss": 4.118, "step": 72060 }, { "epoch": 3.104621613472886, "learning_rate": 8.257557801288542e-07, "loss": 4.0653, "step": 72080 }, { "epoch": 3.105483051212474, "learning_rate": 8.257072981533754e-07, "loss": 4.1731, "step": 72100 }, { "epoch": 3.106344488952061, "learning_rate": 8.256588161778965e-07, "loss": 4.2102, "step": 72120 }, { "epoch": 3.1072059266916483, "learning_rate": 8.256103342024177e-07, "loss": 4.0365, "step": 72140 }, { "epoch": 3.1080673644312355, "learning_rate": 8.255618522269386e-07, "loss": 4.0914, "step": 72160 }, { "epoch": 3.1089288021708232, "learning_rate": 8.255133702514598e-07, "loss": 4.2207, "step": 72180 }, { "epoch": 3.1097902399104105, "learning_rate": 8.254648882759809e-07, "loss": 4.2689, "step": 72200 }, { "epoch": 3.1106516776499977, "learning_rate": 8.254164063005021e-07, "loss": 4.0893, "step": 72220 }, { "epoch": 3.1115131153895854, "learning_rate": 8.253679243250231e-07, "loss": 4.086, "step": 72240 }, { "epoch": 3.1123745531291727, "learning_rate": 8.253194423495443e-07, "loss": 4.2711, "step": 72260 }, { "epoch": 3.11323599086876, "learning_rate": 8.252709603740653e-07, "loss": 4.1362, "step": 72280 }, { "epoch": 3.114097428608347, "learning_rate": 8.252224783985864e-07, "loss": 4.294, "step": 72300 }, { "epoch": 3.114958866347935, "learning_rate": 8.251739964231075e-07, "loss": 3.9799, "step": 72320 }, { "epoch": 3.115820304087522, "learning_rate": 8.251255144476287e-07, "loss": 4.2485, "step": 72340 }, { "epoch": 3.1166817418271093, "learning_rate": 8.250770324721498e-07, "loss": 3.9589, "step": 72360 }, { "epoch": 3.117543179566697, "learning_rate": 8.250285504966708e-07, "loss": 4.2144, "step": 72380 }, { "epoch": 3.1184046173062843, "learning_rate": 8.24980068521192e-07, "loss": 4.0097, "step": 72400 }, { "epoch": 3.1192660550458715, "learning_rate": 8.24931586545713e-07, "loss": 3.9143, "step": 72420 }, { "epoch": 3.1201274927854588, "learning_rate": 8.248831045702342e-07, "loss": 4.2846, "step": 72440 }, { "epoch": 3.1209889305250464, "learning_rate": 8.248346225947552e-07, "loss": 4.1769, "step": 72460 }, { "epoch": 3.1218503682646337, "learning_rate": 8.247861406192764e-07, "loss": 4.1483, "step": 72480 }, { "epoch": 3.122711806004221, "learning_rate": 8.247376586437976e-07, "loss": 4.0892, "step": 72500 }, { "epoch": 3.1235732437438086, "learning_rate": 8.246891766683188e-07, "loss": 4.2878, "step": 72520 }, { "epoch": 3.124434681483396, "learning_rate": 8.246406946928396e-07, "loss": 4.2783, "step": 72540 }, { "epoch": 3.125296119222983, "learning_rate": 8.245922127173608e-07, "loss": 4.2123, "step": 72560 }, { "epoch": 3.1261575569625704, "learning_rate": 8.245437307418819e-07, "loss": 4.2162, "step": 72580 }, { "epoch": 3.127018994702158, "learning_rate": 8.244952487664031e-07, "loss": 4.0662, "step": 72600 }, { "epoch": 3.1278804324417453, "learning_rate": 8.244467667909241e-07, "loss": 4.1451, "step": 72620 }, { "epoch": 3.1287418701813325, "learning_rate": 8.243982848154453e-07, "loss": 3.9821, "step": 72640 }, { "epoch": 3.1296033079209202, "learning_rate": 8.243498028399663e-07, "loss": 4.0284, "step": 72660 }, { "epoch": 3.1304647456605075, "learning_rate": 8.243013208644875e-07, "loss": 4.1408, "step": 72680 }, { "epoch": 3.1313261834000947, "learning_rate": 8.242528388890085e-07, "loss": 4.0879, "step": 72700 }, { "epoch": 3.132187621139682, "learning_rate": 8.242043569135297e-07, "loss": 4.2258, "step": 72720 }, { "epoch": 3.1330490588792697, "learning_rate": 8.241558749380508e-07, "loss": 4.1793, "step": 72740 }, { "epoch": 3.133910496618857, "learning_rate": 8.24107392962572e-07, "loss": 4.1964, "step": 72760 }, { "epoch": 3.134771934358444, "learning_rate": 8.24058910987093e-07, "loss": 3.9955, "step": 72780 }, { "epoch": 3.135633372098032, "learning_rate": 8.240104290116141e-07, "loss": 4.1838, "step": 72800 }, { "epoch": 3.136494809837619, "learning_rate": 8.239619470361352e-07, "loss": 4.0517, "step": 72820 }, { "epoch": 3.1373562475772063, "learning_rate": 8.239134650606563e-07, "loss": 4.2192, "step": 72840 }, { "epoch": 3.1382176853167936, "learning_rate": 8.238649830851774e-07, "loss": 4.1532, "step": 72860 }, { "epoch": 3.1390791230563813, "learning_rate": 8.238165011096986e-07, "loss": 4.0666, "step": 72880 }, { "epoch": 3.1399405607959685, "learning_rate": 8.237680191342197e-07, "loss": 4.0273, "step": 72900 }, { "epoch": 3.1408019985355558, "learning_rate": 8.237195371587406e-07, "loss": 4.2761, "step": 72920 }, { "epoch": 3.1416634362751434, "learning_rate": 8.236710551832618e-07, "loss": 4.4122, "step": 72940 }, { "epoch": 3.1425248740147307, "learning_rate": 8.23622573207783e-07, "loss": 4.2766, "step": 72960 }, { "epoch": 3.143386311754318, "learning_rate": 8.23574091232304e-07, "loss": 3.9567, "step": 72980 }, { "epoch": 3.144247749493905, "learning_rate": 8.235256092568251e-07, "loss": 4.1298, "step": 73000 }, { "epoch": 3.145109187233493, "learning_rate": 8.234771272813463e-07, "loss": 4.2312, "step": 73020 }, { "epoch": 3.14597062497308, "learning_rate": 8.234286453058674e-07, "loss": 4.339, "step": 73040 }, { "epoch": 3.1468320627126674, "learning_rate": 8.233801633303884e-07, "loss": 4.1877, "step": 73060 }, { "epoch": 3.147693500452255, "learning_rate": 8.233316813549095e-07, "loss": 4.1944, "step": 73080 }, { "epoch": 3.1485549381918423, "learning_rate": 8.232831993794307e-07, "loss": 3.9856, "step": 73100 }, { "epoch": 3.1494163759314295, "learning_rate": 8.232347174039518e-07, "loss": 3.9332, "step": 73120 }, { "epoch": 3.150277813671017, "learning_rate": 8.231862354284729e-07, "loss": 3.9824, "step": 73140 }, { "epoch": 3.1511392514106045, "learning_rate": 8.23137753452994e-07, "loss": 4.0645, "step": 73160 }, { "epoch": 3.1520006891501917, "learning_rate": 8.230892714775151e-07, "loss": 4.3052, "step": 73180 }, { "epoch": 3.152862126889779, "learning_rate": 8.230407895020362e-07, "loss": 4.2081, "step": 73200 }, { "epoch": 3.153723564629366, "learning_rate": 8.229923075265573e-07, "loss": 4.3285, "step": 73220 }, { "epoch": 3.154585002368954, "learning_rate": 8.229438255510784e-07, "loss": 4.1318, "step": 73240 }, { "epoch": 3.155446440108541, "learning_rate": 8.228953435755996e-07, "loss": 4.1297, "step": 73260 }, { "epoch": 3.1563078778481284, "learning_rate": 8.228468616001207e-07, "loss": 4.1799, "step": 73280 }, { "epoch": 3.157169315587716, "learning_rate": 8.227983796246417e-07, "loss": 4.1386, "step": 73300 }, { "epoch": 3.1580307533273033, "learning_rate": 8.227498976491628e-07, "loss": 3.901, "step": 73320 }, { "epoch": 3.1588921910668906, "learning_rate": 8.22701415673684e-07, "loss": 4.0359, "step": 73340 }, { "epoch": 3.159753628806478, "learning_rate": 8.22652933698205e-07, "loss": 4.4535, "step": 73360 }, { "epoch": 3.1606150665460655, "learning_rate": 8.226044517227261e-07, "loss": 4.1214, "step": 73380 }, { "epoch": 3.1614765042856527, "learning_rate": 8.225559697472473e-07, "loss": 4.0811, "step": 73400 }, { "epoch": 3.16233794202524, "learning_rate": 8.225074877717684e-07, "loss": 4.3156, "step": 73420 }, { "epoch": 3.1631993797648277, "learning_rate": 8.224590057962894e-07, "loss": 4.1207, "step": 73440 }, { "epoch": 3.164060817504415, "learning_rate": 8.224105238208105e-07, "loss": 3.9015, "step": 73460 }, { "epoch": 3.164922255244002, "learning_rate": 8.223620418453317e-07, "loss": 4.2393, "step": 73480 }, { "epoch": 3.1657836929835894, "learning_rate": 8.223135598698529e-07, "loss": 4.0271, "step": 73500 }, { "epoch": 3.166645130723177, "learning_rate": 8.222650778943739e-07, "loss": 4.0875, "step": 73520 }, { "epoch": 3.1675065684627643, "learning_rate": 8.22216595918895e-07, "loss": 4.0711, "step": 73540 }, { "epoch": 3.1683680062023516, "learning_rate": 8.221681139434161e-07, "loss": 4.228, "step": 73560 }, { "epoch": 3.1692294439419393, "learning_rate": 8.221196319679373e-07, "loss": 4.3021, "step": 73580 }, { "epoch": 3.1700908816815265, "learning_rate": 8.220711499924583e-07, "loss": 4.0456, "step": 73600 }, { "epoch": 3.1709523194211138, "learning_rate": 8.220226680169794e-07, "loss": 4.1921, "step": 73620 }, { "epoch": 3.171813757160701, "learning_rate": 8.219741860415006e-07, "loss": 4.2232, "step": 73640 }, { "epoch": 3.1726751949002887, "learning_rate": 8.219257040660216e-07, "loss": 4.3585, "step": 73660 }, { "epoch": 3.173536632639876, "learning_rate": 8.218772220905426e-07, "loss": 4.1887, "step": 73680 }, { "epoch": 3.174398070379463, "learning_rate": 8.218287401150638e-07, "loss": 3.9149, "step": 73700 }, { "epoch": 3.175259508119051, "learning_rate": 8.21780258139585e-07, "loss": 4.0955, "step": 73720 }, { "epoch": 3.176120945858638, "learning_rate": 8.21731776164106e-07, "loss": 3.9769, "step": 73740 }, { "epoch": 3.1769823835982254, "learning_rate": 8.216832941886272e-07, "loss": 4.1118, "step": 73760 }, { "epoch": 3.1778438213378126, "learning_rate": 8.216348122131484e-07, "loss": 4.1992, "step": 73780 }, { "epoch": 3.1787052590774003, "learning_rate": 8.215863302376694e-07, "loss": 4.0834, "step": 73800 }, { "epoch": 3.1795666968169876, "learning_rate": 8.215378482621904e-07, "loss": 4.041, "step": 73820 }, { "epoch": 3.180428134556575, "learning_rate": 8.214893662867116e-07, "loss": 4.1296, "step": 73840 }, { "epoch": 3.1812895722961625, "learning_rate": 8.214408843112327e-07, "loss": 4.1386, "step": 73860 }, { "epoch": 3.1821510100357497, "learning_rate": 8.213924023357539e-07, "loss": 4.5436, "step": 73880 }, { "epoch": 3.183012447775337, "learning_rate": 8.213439203602749e-07, "loss": 4.0356, "step": 73900 }, { "epoch": 3.1838738855149242, "learning_rate": 8.21295438384796e-07, "loss": 4.2839, "step": 73920 }, { "epoch": 3.184735323254512, "learning_rate": 8.212469564093171e-07, "loss": 4.2784, "step": 73940 }, { "epoch": 3.185596760994099, "learning_rate": 8.211984744338382e-07, "loss": 4.1857, "step": 73960 }, { "epoch": 3.1864581987336864, "learning_rate": 8.211499924583593e-07, "loss": 4.0885, "step": 73980 }, { "epoch": 3.187319636473274, "learning_rate": 8.211015104828804e-07, "loss": 4.1883, "step": 74000 }, { "epoch": 3.1881810742128613, "learning_rate": 8.210530285074016e-07, "loss": 3.9912, "step": 74020 }, { "epoch": 3.1890425119524486, "learning_rate": 8.210045465319227e-07, "loss": 4.3197, "step": 74040 }, { "epoch": 3.189903949692036, "learning_rate": 8.209560645564437e-07, "loss": 4.183, "step": 74060 }, { "epoch": 3.1907653874316235, "learning_rate": 8.209075825809648e-07, "loss": 4.1738, "step": 74080 }, { "epoch": 3.1916268251712108, "learning_rate": 8.20859100605486e-07, "loss": 4.2608, "step": 74100 }, { "epoch": 3.192488262910798, "learning_rate": 8.208106186300071e-07, "loss": 4.0824, "step": 74120 }, { "epoch": 3.1933497006503853, "learning_rate": 8.207621366545282e-07, "loss": 4.1715, "step": 74140 }, { "epoch": 3.194211138389973, "learning_rate": 8.207136546790493e-07, "loss": 4.0035, "step": 74160 }, { "epoch": 3.19507257612956, "learning_rate": 8.206651727035705e-07, "loss": 4.2476, "step": 74180 }, { "epoch": 3.1959340138691474, "learning_rate": 8.206166907280914e-07, "loss": 4.189, "step": 74200 }, { "epoch": 3.196795451608735, "learning_rate": 8.205682087526126e-07, "loss": 4.1405, "step": 74220 }, { "epoch": 3.1976568893483224, "learning_rate": 8.205197267771337e-07, "loss": 4.1466, "step": 74240 }, { "epoch": 3.1985183270879096, "learning_rate": 8.204712448016549e-07, "loss": 4.0237, "step": 74260 }, { "epoch": 3.199379764827497, "learning_rate": 8.204227628261759e-07, "loss": 4.168, "step": 74280 }, { "epoch": 3.2002412025670846, "learning_rate": 8.203742808506972e-07, "loss": 4.1347, "step": 74300 }, { "epoch": 3.201102640306672, "learning_rate": 8.203257988752181e-07, "loss": 3.9789, "step": 74320 }, { "epoch": 3.201964078046259, "learning_rate": 8.202773168997392e-07, "loss": 3.9598, "step": 74340 }, { "epoch": 3.2028255157858467, "learning_rate": 8.202288349242603e-07, "loss": 3.911, "step": 74360 }, { "epoch": 3.203686953525434, "learning_rate": 8.201803529487815e-07, "loss": 4.0397, "step": 74380 }, { "epoch": 3.2045483912650212, "learning_rate": 8.201318709733026e-07, "loss": 4.0569, "step": 74400 }, { "epoch": 3.2054098290046085, "learning_rate": 8.200833889978237e-07, "loss": 4.0304, "step": 74420 }, { "epoch": 3.206271266744196, "learning_rate": 8.200349070223447e-07, "loss": 4.16, "step": 74440 }, { "epoch": 3.2071327044837834, "learning_rate": 8.199864250468658e-07, "loss": 4.3619, "step": 74460 }, { "epoch": 3.2079941422233706, "learning_rate": 8.19937943071387e-07, "loss": 4.2681, "step": 74480 }, { "epoch": 3.2088555799629583, "learning_rate": 8.198894610959081e-07, "loss": 4.1484, "step": 74500 }, { "epoch": 3.2097170177025456, "learning_rate": 8.198409791204292e-07, "loss": 4.0033, "step": 74520 }, { "epoch": 3.210578455442133, "learning_rate": 8.197924971449503e-07, "loss": 4.0437, "step": 74540 }, { "epoch": 3.21143989318172, "learning_rate": 8.197440151694715e-07, "loss": 4.1846, "step": 74560 }, { "epoch": 3.2123013309213078, "learning_rate": 8.196955331939925e-07, "loss": 4.3057, "step": 74580 }, { "epoch": 3.213162768660895, "learning_rate": 8.196470512185136e-07, "loss": 4.0408, "step": 74600 }, { "epoch": 3.2140242064004823, "learning_rate": 8.195985692430348e-07, "loss": 4.1349, "step": 74620 }, { "epoch": 3.21488564414007, "learning_rate": 8.195500872675558e-07, "loss": 4.0433, "step": 74640 }, { "epoch": 3.215747081879657, "learning_rate": 8.19501605292077e-07, "loss": 4.12, "step": 74660 }, { "epoch": 3.2166085196192444, "learning_rate": 8.194531233165981e-07, "loss": 4.1219, "step": 74680 }, { "epoch": 3.2174699573588317, "learning_rate": 8.194046413411191e-07, "loss": 4.2315, "step": 74700 }, { "epoch": 3.2183313950984194, "learning_rate": 8.193561593656402e-07, "loss": 4.139, "step": 74720 }, { "epoch": 3.2191928328380066, "learning_rate": 8.193076773901613e-07, "loss": 4.004, "step": 74740 }, { "epoch": 3.220054270577594, "learning_rate": 8.192591954146825e-07, "loss": 4.3765, "step": 74760 }, { "epoch": 3.2209157083171815, "learning_rate": 8.192107134392036e-07, "loss": 4.3537, "step": 74780 }, { "epoch": 3.221777146056769, "learning_rate": 8.191622314637247e-07, "loss": 4.1181, "step": 74800 }, { "epoch": 3.222638583796356, "learning_rate": 8.191137494882457e-07, "loss": 4.3242, "step": 74820 }, { "epoch": 3.2235000215359433, "learning_rate": 8.190652675127669e-07, "loss": 4.1551, "step": 74840 }, { "epoch": 3.224361459275531, "learning_rate": 8.19016785537288e-07, "loss": 4.2335, "step": 74860 }, { "epoch": 3.225222897015118, "learning_rate": 8.189683035618091e-07, "loss": 4.1713, "step": 74880 }, { "epoch": 3.2260843347547055, "learning_rate": 8.189198215863302e-07, "loss": 4.1811, "step": 74900 }, { "epoch": 3.226945772494293, "learning_rate": 8.188713396108514e-07, "loss": 4.0747, "step": 74920 }, { "epoch": 3.2278072102338804, "learning_rate": 8.188228576353724e-07, "loss": 4.1766, "step": 74940 }, { "epoch": 3.2286686479734676, "learning_rate": 8.187743756598935e-07, "loss": 4.0803, "step": 74960 }, { "epoch": 3.229530085713055, "learning_rate": 8.187258936844146e-07, "loss": 4.2386, "step": 74980 }, { "epoch": 3.2303915234526426, "learning_rate": 8.186774117089357e-07, "loss": 4.05, "step": 75000 }, { "epoch": 3.23125296119223, "learning_rate": 8.186289297334568e-07, "loss": 4.1587, "step": 75020 }, { "epoch": 3.232114398931817, "learning_rate": 8.18580447757978e-07, "loss": 4.2797, "step": 75040 }, { "epoch": 3.2329758366714048, "learning_rate": 8.185319657824991e-07, "loss": 4.1528, "step": 75060 }, { "epoch": 3.233837274410992, "learning_rate": 8.184834838070201e-07, "loss": 4.2551, "step": 75080 }, { "epoch": 3.2346987121505792, "learning_rate": 8.184350018315412e-07, "loss": 4.0345, "step": 75100 }, { "epoch": 3.2355601498901665, "learning_rate": 8.183865198560624e-07, "loss": 4.0762, "step": 75120 }, { "epoch": 3.236421587629754, "learning_rate": 8.183380378805835e-07, "loss": 4.0778, "step": 75140 }, { "epoch": 3.2372830253693414, "learning_rate": 8.182895559051046e-07, "loss": 4.2175, "step": 75160 }, { "epoch": 3.2381444631089287, "learning_rate": 8.182410739296257e-07, "loss": 4.0608, "step": 75180 }, { "epoch": 3.2390059008485164, "learning_rate": 8.181925919541468e-07, "loss": 3.9309, "step": 75200 }, { "epoch": 3.2398673385881036, "learning_rate": 8.181441099786679e-07, "loss": 4.2056, "step": 75220 }, { "epoch": 3.240728776327691, "learning_rate": 8.180956280031889e-07, "loss": 4.0909, "step": 75240 }, { "epoch": 3.241590214067278, "learning_rate": 8.180471460277101e-07, "loss": 4.293, "step": 75260 }, { "epoch": 3.242451651806866, "learning_rate": 8.179986640522312e-07, "loss": 3.9907, "step": 75280 }, { "epoch": 3.243313089546453, "learning_rate": 8.179501820767524e-07, "loss": 4.128, "step": 75300 }, { "epoch": 3.2441745272860403, "learning_rate": 8.179017001012734e-07, "loss": 4.2091, "step": 75320 }, { "epoch": 3.245035965025628, "learning_rate": 8.178532181257945e-07, "loss": 4.3179, "step": 75340 }, { "epoch": 3.245897402765215, "learning_rate": 8.178047361503156e-07, "loss": 3.9396, "step": 75360 }, { "epoch": 3.2467588405048025, "learning_rate": 8.177562541748368e-07, "loss": 4.4707, "step": 75380 }, { "epoch": 3.2476202782443897, "learning_rate": 8.177077721993578e-07, "loss": 4.0212, "step": 75400 }, { "epoch": 3.2484817159839774, "learning_rate": 8.17659290223879e-07, "loss": 4.2023, "step": 75420 }, { "epoch": 3.2493431537235646, "learning_rate": 8.176108082484001e-07, "loss": 4.2866, "step": 75440 }, { "epoch": 3.250204591463152, "learning_rate": 8.17562326272921e-07, "loss": 4.0377, "step": 75460 }, { "epoch": 3.2510660292027396, "learning_rate": 8.175138442974422e-07, "loss": 4.1171, "step": 75480 }, { "epoch": 3.251927466942327, "learning_rate": 8.174653623219634e-07, "loss": 4.1174, "step": 75500 }, { "epoch": 3.252788904681914, "learning_rate": 8.174168803464845e-07, "loss": 3.9708, "step": 75520 }, { "epoch": 3.2536503424215013, "learning_rate": 8.173683983710055e-07, "loss": 4.0468, "step": 75540 }, { "epoch": 3.254511780161089, "learning_rate": 8.173199163955268e-07, "loss": 4.3976, "step": 75560 }, { "epoch": 3.2553732179006762, "learning_rate": 8.172714344200478e-07, "loss": 4.2645, "step": 75580 }, { "epoch": 3.2562346556402635, "learning_rate": 8.172229524445689e-07, "loss": 4.3369, "step": 75600 }, { "epoch": 3.257096093379851, "learning_rate": 8.171744704690899e-07, "loss": 4.2552, "step": 75620 }, { "epoch": 3.2579575311194384, "learning_rate": 8.171259884936111e-07, "loss": 4.0754, "step": 75640 }, { "epoch": 3.2588189688590257, "learning_rate": 8.170775065181323e-07, "loss": 4.2354, "step": 75660 }, { "epoch": 3.259680406598613, "learning_rate": 8.170290245426534e-07, "loss": 3.9267, "step": 75680 }, { "epoch": 3.2605418443382006, "learning_rate": 8.169805425671744e-07, "loss": 4.1187, "step": 75700 }, { "epoch": 3.261403282077788, "learning_rate": 8.169320605916955e-07, "loss": 4.119, "step": 75720 }, { "epoch": 3.262264719817375, "learning_rate": 8.168835786162167e-07, "loss": 3.9992, "step": 75740 }, { "epoch": 3.263126157556963, "learning_rate": 8.168350966407378e-07, "loss": 4.1048, "step": 75760 }, { "epoch": 3.26398759529655, "learning_rate": 8.167866146652588e-07, "loss": 4.1138, "step": 75780 }, { "epoch": 3.2648490330361373, "learning_rate": 8.1673813268978e-07, "loss": 4.3028, "step": 75800 }, { "epoch": 3.2657104707757245, "learning_rate": 8.166896507143011e-07, "loss": 4.0169, "step": 75820 }, { "epoch": 3.266571908515312, "learning_rate": 8.166411687388221e-07, "loss": 4.1142, "step": 75840 }, { "epoch": 3.2674333462548995, "learning_rate": 8.165926867633432e-07, "loss": 4.0902, "step": 75860 }, { "epoch": 3.2682947839944867, "learning_rate": 8.165442047878644e-07, "loss": 4.2813, "step": 75880 }, { "epoch": 3.2691562217340744, "learning_rate": 8.164957228123855e-07, "loss": 4.2515, "step": 75900 }, { "epoch": 3.2700176594736616, "learning_rate": 8.164472408369066e-07, "loss": 4.1882, "step": 75920 }, { "epoch": 3.270879097213249, "learning_rate": 8.163987588614277e-07, "loss": 4.1375, "step": 75940 }, { "epoch": 3.271740534952836, "learning_rate": 8.163502768859489e-07, "loss": 4.1215, "step": 75960 }, { "epoch": 3.272601972692424, "learning_rate": 8.163017949104699e-07, "loss": 4.1708, "step": 75980 }, { "epoch": 3.273463410432011, "learning_rate": 8.16253312934991e-07, "loss": 4.1516, "step": 76000 }, { "epoch": 3.2743248481715983, "learning_rate": 8.162048309595121e-07, "loss": 3.9983, "step": 76020 }, { "epoch": 3.275186285911186, "learning_rate": 8.161563489840333e-07, "loss": 4.249, "step": 76040 }, { "epoch": 3.2760477236507732, "learning_rate": 8.161078670085544e-07, "loss": 3.9864, "step": 76060 }, { "epoch": 3.2769091613903605, "learning_rate": 8.160593850330754e-07, "loss": 4.0451, "step": 76080 }, { "epoch": 3.2777705991299477, "learning_rate": 8.160109030575965e-07, "loss": 4.2359, "step": 76100 }, { "epoch": 3.2786320368695354, "learning_rate": 8.159624210821177e-07, "loss": 4.2358, "step": 76120 }, { "epoch": 3.2794934746091227, "learning_rate": 8.159139391066387e-07, "loss": 4.3616, "step": 76140 }, { "epoch": 3.28035491234871, "learning_rate": 8.158654571311598e-07, "loss": 4.0616, "step": 76160 }, { "epoch": 3.2812163500882976, "learning_rate": 8.15816975155681e-07, "loss": 3.9489, "step": 76180 }, { "epoch": 3.282077787827885, "learning_rate": 8.157684931802022e-07, "loss": 4.3266, "step": 76200 }, { "epoch": 3.282939225567472, "learning_rate": 8.157200112047231e-07, "loss": 4.308, "step": 76220 }, { "epoch": 3.2838006633070593, "learning_rate": 8.156715292292442e-07, "loss": 3.9456, "step": 76240 }, { "epoch": 3.284662101046647, "learning_rate": 8.156230472537654e-07, "loss": 3.9706, "step": 76260 }, { "epoch": 3.2855235387862343, "learning_rate": 8.155745652782866e-07, "loss": 4.3075, "step": 76280 }, { "epoch": 3.2863849765258215, "learning_rate": 8.155260833028076e-07, "loss": 4.0391, "step": 76300 }, { "epoch": 3.287246414265409, "learning_rate": 8.154776013273287e-07, "loss": 4.1471, "step": 76320 }, { "epoch": 3.2881078520049964, "learning_rate": 8.154291193518499e-07, "loss": 4.1654, "step": 76340 }, { "epoch": 3.2889692897445837, "learning_rate": 8.153806373763709e-07, "loss": 4.0594, "step": 76360 }, { "epoch": 3.289830727484171, "learning_rate": 8.15332155400892e-07, "loss": 4.0196, "step": 76380 }, { "epoch": 3.2906921652237586, "learning_rate": 8.152836734254132e-07, "loss": 3.8641, "step": 76400 }, { "epoch": 3.291553602963346, "learning_rate": 8.152351914499343e-07, "loss": 4.3033, "step": 76420 }, { "epoch": 3.292415040702933, "learning_rate": 8.151867094744553e-07, "loss": 4.0645, "step": 76440 }, { "epoch": 3.293276478442521, "learning_rate": 8.151382274989765e-07, "loss": 3.9754, "step": 76460 }, { "epoch": 3.294137916182108, "learning_rate": 8.150897455234975e-07, "loss": 4.2018, "step": 76480 }, { "epoch": 3.2949993539216953, "learning_rate": 8.150412635480187e-07, "loss": 4.0713, "step": 76500 }, { "epoch": 3.2958607916612825, "learning_rate": 8.149927815725397e-07, "loss": 4.0749, "step": 76520 }, { "epoch": 3.2967222294008702, "learning_rate": 8.149442995970609e-07, "loss": 4.0327, "step": 76540 }, { "epoch": 3.2975836671404575, "learning_rate": 8.14895817621582e-07, "loss": 4.3854, "step": 76560 }, { "epoch": 3.2984451048800447, "learning_rate": 8.148473356461032e-07, "loss": 4.0602, "step": 76580 }, { "epoch": 3.2993065426196324, "learning_rate": 8.147988536706242e-07, "loss": 4.0853, "step": 76600 }, { "epoch": 3.3001679803592197, "learning_rate": 8.147503716951452e-07, "loss": 4.1677, "step": 76620 }, { "epoch": 3.301029418098807, "learning_rate": 8.147018897196664e-07, "loss": 4.0639, "step": 76640 }, { "epoch": 3.301890855838394, "learning_rate": 8.146534077441876e-07, "loss": 4.2143, "step": 76660 }, { "epoch": 3.302752293577982, "learning_rate": 8.146049257687086e-07, "loss": 4.1781, "step": 76680 }, { "epoch": 3.303613731317569, "learning_rate": 8.145564437932297e-07, "loss": 4.1863, "step": 76700 }, { "epoch": 3.3044751690571563, "learning_rate": 8.145079618177509e-07, "loss": 3.9404, "step": 76720 }, { "epoch": 3.305336606796744, "learning_rate": 8.14459479842272e-07, "loss": 4.2133, "step": 76740 }, { "epoch": 3.3061980445363313, "learning_rate": 8.14410997866793e-07, "loss": 4.0929, "step": 76760 }, { "epoch": 3.3070594822759185, "learning_rate": 8.143625158913141e-07, "loss": 4.0367, "step": 76780 }, { "epoch": 3.3079209200155058, "learning_rate": 8.143140339158353e-07, "loss": 4.2925, "step": 76800 }, { "epoch": 3.3087823577550934, "learning_rate": 8.142655519403564e-07, "loss": 3.9315, "step": 76820 }, { "epoch": 3.3096437954946807, "learning_rate": 8.142170699648775e-07, "loss": 4.1322, "step": 76840 }, { "epoch": 3.310505233234268, "learning_rate": 8.141685879893985e-07, "loss": 4.1594, "step": 76860 }, { "epoch": 3.311366670973855, "learning_rate": 8.141201060139197e-07, "loss": 4.1882, "step": 76880 }, { "epoch": 3.312228108713443, "learning_rate": 8.140716240384407e-07, "loss": 4.0579, "step": 76900 }, { "epoch": 3.31308954645303, "learning_rate": 8.140231420629619e-07, "loss": 4.0531, "step": 76920 }, { "epoch": 3.3139509841926174, "learning_rate": 8.13974660087483e-07, "loss": 4.188, "step": 76940 }, { "epoch": 3.3148124219322046, "learning_rate": 8.139261781120042e-07, "loss": 4.1626, "step": 76960 }, { "epoch": 3.3156738596717923, "learning_rate": 8.138776961365252e-07, "loss": 3.9643, "step": 76980 }, { "epoch": 3.3165352974113795, "learning_rate": 8.138292141610463e-07, "loss": 4.1296, "step": 77000 }, { "epoch": 3.317396735150967, "learning_rate": 8.137807321855674e-07, "loss": 4.1892, "step": 77020 }, { "epoch": 3.3182581728905545, "learning_rate": 8.137322502100886e-07, "loss": 4.1864, "step": 77040 }, { "epoch": 3.3191196106301417, "learning_rate": 8.136837682346096e-07, "loss": 4.311, "step": 77060 }, { "epoch": 3.319981048369729, "learning_rate": 8.136352862591308e-07, "loss": 4.1749, "step": 77080 }, { "epoch": 3.320842486109316, "learning_rate": 8.135868042836519e-07, "loss": 3.9428, "step": 77100 }, { "epoch": 3.321703923848904, "learning_rate": 8.135383223081729e-07, "loss": 4.302, "step": 77120 }, { "epoch": 3.322565361588491, "learning_rate": 8.13489840332694e-07, "loss": 4.0964, "step": 77140 }, { "epoch": 3.3234267993280784, "learning_rate": 8.134413583572151e-07, "loss": 4.1406, "step": 77160 }, { "epoch": 3.324288237067666, "learning_rate": 8.133928763817363e-07, "loss": 4.2126, "step": 77180 }, { "epoch": 3.3251496748072533, "learning_rate": 8.133443944062574e-07, "loss": 4.0669, "step": 77200 }, { "epoch": 3.3260111125468406, "learning_rate": 8.132959124307785e-07, "loss": 4.2019, "step": 77220 }, { "epoch": 3.326872550286428, "learning_rate": 8.132474304552995e-07, "loss": 4.1299, "step": 77240 }, { "epoch": 3.3277339880260155, "learning_rate": 8.131989484798207e-07, "loss": 4.1683, "step": 77260 }, { "epoch": 3.3285954257656027, "learning_rate": 8.131504665043418e-07, "loss": 3.9748, "step": 77280 }, { "epoch": 3.32945686350519, "learning_rate": 8.131019845288629e-07, "loss": 4.1484, "step": 77300 }, { "epoch": 3.3303183012447777, "learning_rate": 8.13053502553384e-07, "loss": 4.0326, "step": 77320 }, { "epoch": 3.331179738984365, "learning_rate": 8.130050205779053e-07, "loss": 3.9336, "step": 77340 }, { "epoch": 3.332041176723952, "learning_rate": 8.129565386024262e-07, "loss": 4.1124, "step": 77360 }, { "epoch": 3.3329026144635394, "learning_rate": 8.129080566269473e-07, "loss": 4.0678, "step": 77380 }, { "epoch": 3.333764052203127, "learning_rate": 8.128595746514684e-07, "loss": 4.011, "step": 77400 }, { "epoch": 3.3346254899427143, "learning_rate": 8.128110926759895e-07, "loss": 4.171, "step": 77420 }, { "epoch": 3.3354869276823016, "learning_rate": 8.127626107005106e-07, "loss": 4.1553, "step": 77440 }, { "epoch": 3.3363483654218893, "learning_rate": 8.127141287250318e-07, "loss": 4.1368, "step": 77460 }, { "epoch": 3.3372098031614765, "learning_rate": 8.126656467495529e-07, "loss": 3.9005, "step": 77480 }, { "epoch": 3.3380712409010638, "learning_rate": 8.126171647740739e-07, "loss": 4.0312, "step": 77500 }, { "epoch": 3.338932678640651, "learning_rate": 8.12568682798595e-07, "loss": 4.1016, "step": 77520 }, { "epoch": 3.3397941163802387, "learning_rate": 8.125202008231162e-07, "loss": 4.2076, "step": 77540 }, { "epoch": 3.340655554119826, "learning_rate": 8.124717188476373e-07, "loss": 4.1423, "step": 77560 }, { "epoch": 3.341516991859413, "learning_rate": 8.124232368721584e-07, "loss": 4.0852, "step": 77580 }, { "epoch": 3.342378429599001, "learning_rate": 8.123747548966795e-07, "loss": 4.0709, "step": 77600 }, { "epoch": 3.343239867338588, "learning_rate": 8.123262729212006e-07, "loss": 4.0597, "step": 77620 }, { "epoch": 3.3441013050781754, "learning_rate": 8.122777909457217e-07, "loss": 4.1672, "step": 77640 }, { "epoch": 3.3449627428177626, "learning_rate": 8.122293089702428e-07, "loss": 4.0025, "step": 77660 }, { "epoch": 3.3458241805573503, "learning_rate": 8.121808269947639e-07, "loss": 4.0862, "step": 77680 }, { "epoch": 3.3466856182969376, "learning_rate": 8.12132345019285e-07, "loss": 4.168, "step": 77700 }, { "epoch": 3.347547056036525, "learning_rate": 8.120838630438061e-07, "loss": 4.3096, "step": 77720 }, { "epoch": 3.3484084937761125, "learning_rate": 8.120353810683273e-07, "loss": 4.2381, "step": 77740 }, { "epoch": 3.3492699315156997, "learning_rate": 8.119868990928483e-07, "loss": 3.9373, "step": 77760 }, { "epoch": 3.350131369255287, "learning_rate": 8.119384171173694e-07, "loss": 4.136, "step": 77780 }, { "epoch": 3.3509928069948742, "learning_rate": 8.118899351418905e-07, "loss": 4.0034, "step": 77800 }, { "epoch": 3.351854244734462, "learning_rate": 8.118414531664117e-07, "loss": 4.0162, "step": 77820 }, { "epoch": 3.352715682474049, "learning_rate": 8.117929711909328e-07, "loss": 4.0684, "step": 77840 }, { "epoch": 3.3535771202136364, "learning_rate": 8.117444892154539e-07, "loss": 4.2784, "step": 77860 }, { "epoch": 3.354438557953224, "learning_rate": 8.116960072399749e-07, "loss": 4.1786, "step": 77880 }, { "epoch": 3.3552999956928113, "learning_rate": 8.116475252644961e-07, "loss": 4.1515, "step": 77900 }, { "epoch": 3.3561614334323986, "learning_rate": 8.115990432890172e-07, "loss": 4.2021, "step": 77920 }, { "epoch": 3.357022871171986, "learning_rate": 8.115505613135383e-07, "loss": 4.1623, "step": 77940 }, { "epoch": 3.3578843089115735, "learning_rate": 8.115020793380594e-07, "loss": 4.0848, "step": 77960 }, { "epoch": 3.3587457466511608, "learning_rate": 8.114535973625805e-07, "loss": 4.214, "step": 77980 }, { "epoch": 3.359607184390748, "learning_rate": 8.114051153871016e-07, "loss": 4.2203, "step": 78000 }, { "epoch": 3.3604686221303357, "learning_rate": 8.113566334116226e-07, "loss": 4.0898, "step": 78020 }, { "epoch": 3.361330059869923, "learning_rate": 8.113081514361438e-07, "loss": 4.2142, "step": 78040 }, { "epoch": 3.36219149760951, "learning_rate": 8.112596694606649e-07, "loss": 4.0743, "step": 78060 }, { "epoch": 3.3630529353490974, "learning_rate": 8.112111874851861e-07, "loss": 3.9457, "step": 78080 }, { "epoch": 3.363914373088685, "learning_rate": 8.111627055097071e-07, "loss": 4.1495, "step": 78100 }, { "epoch": 3.3647758108282724, "learning_rate": 8.111142235342283e-07, "loss": 4.2278, "step": 78120 }, { "epoch": 3.3656372485678596, "learning_rate": 8.110657415587493e-07, "loss": 4.1487, "step": 78140 }, { "epoch": 3.3664986863074473, "learning_rate": 8.110172595832705e-07, "loss": 3.9095, "step": 78160 }, { "epoch": 3.3673601240470346, "learning_rate": 8.109687776077916e-07, "loss": 4.0588, "step": 78180 }, { "epoch": 3.368221561786622, "learning_rate": 8.109202956323127e-07, "loss": 4.28, "step": 78200 }, { "epoch": 3.369082999526209, "learning_rate": 8.108718136568338e-07, "loss": 4.0036, "step": 78220 }, { "epoch": 3.3699444372657967, "learning_rate": 8.108233316813549e-07, "loss": 4.1697, "step": 78240 }, { "epoch": 3.370805875005384, "learning_rate": 8.107748497058759e-07, "loss": 4.0887, "step": 78260 }, { "epoch": 3.3716673127449712, "learning_rate": 8.107263677303971e-07, "loss": 4.2456, "step": 78280 }, { "epoch": 3.372528750484559, "learning_rate": 8.106778857549182e-07, "loss": 4.1127, "step": 78300 }, { "epoch": 3.373390188224146, "learning_rate": 8.106294037794392e-07, "loss": 4.0247, "step": 78320 }, { "epoch": 3.3742516259637334, "learning_rate": 8.105809218039604e-07, "loss": 4.2261, "step": 78340 }, { "epoch": 3.3751130637033206, "learning_rate": 8.105324398284816e-07, "loss": 4.2271, "step": 78360 }, { "epoch": 3.3759745014429083, "learning_rate": 8.104839578530027e-07, "loss": 4.0098, "step": 78380 }, { "epoch": 3.3768359391824956, "learning_rate": 8.104354758775236e-07, "loss": 4.2418, "step": 78400 }, { "epoch": 3.377697376922083, "learning_rate": 8.103869939020448e-07, "loss": 4.0991, "step": 78420 }, { "epoch": 3.3785588146616705, "learning_rate": 8.10338511926566e-07, "loss": 4.18, "step": 78440 }, { "epoch": 3.3794202524012578, "learning_rate": 8.102900299510871e-07, "loss": 4.0663, "step": 78460 }, { "epoch": 3.380281690140845, "learning_rate": 8.102415479756081e-07, "loss": 4.3074, "step": 78480 }, { "epoch": 3.3811431278804323, "learning_rate": 8.101930660001293e-07, "loss": 3.9928, "step": 78500 }, { "epoch": 3.38200456562002, "learning_rate": 8.101445840246503e-07, "loss": 4.0386, "step": 78520 }, { "epoch": 3.382866003359607, "learning_rate": 8.100961020491715e-07, "loss": 4.1422, "step": 78540 }, { "epoch": 3.3837274410991944, "learning_rate": 8.100476200736925e-07, "loss": 4.1758, "step": 78560 }, { "epoch": 3.384588878838782, "learning_rate": 8.099991380982137e-07, "loss": 4.0686, "step": 78580 }, { "epoch": 3.3854503165783694, "learning_rate": 8.099506561227349e-07, "loss": 4.2067, "step": 78600 }, { "epoch": 3.3863117543179566, "learning_rate": 8.09902174147256e-07, "loss": 4.0233, "step": 78620 }, { "epoch": 3.387173192057544, "learning_rate": 8.098536921717769e-07, "loss": 4.0246, "step": 78640 }, { "epoch": 3.3880346297971315, "learning_rate": 8.098052101962981e-07, "loss": 4.0137, "step": 78660 }, { "epoch": 3.388896067536719, "learning_rate": 8.097567282208192e-07, "loss": 4.1523, "step": 78680 }, { "epoch": 3.389757505276306, "learning_rate": 8.097082462453403e-07, "loss": 4.0293, "step": 78700 }, { "epoch": 3.3906189430158937, "learning_rate": 8.096597642698614e-07, "loss": 4.1168, "step": 78720 }, { "epoch": 3.391480380755481, "learning_rate": 8.096112822943826e-07, "loss": 4.0346, "step": 78740 }, { "epoch": 3.392341818495068, "learning_rate": 8.095628003189037e-07, "loss": 4.0009, "step": 78760 }, { "epoch": 3.3932032562346555, "learning_rate": 8.095143183434246e-07, "loss": 4.0161, "step": 78780 }, { "epoch": 3.394064693974243, "learning_rate": 8.094658363679458e-07, "loss": 4.1611, "step": 78800 }, { "epoch": 3.3949261317138304, "learning_rate": 8.09417354392467e-07, "loss": 4.1065, "step": 78820 }, { "epoch": 3.3957875694534176, "learning_rate": 8.093688724169881e-07, "loss": 4.0982, "step": 78840 }, { "epoch": 3.3966490071930053, "learning_rate": 8.093203904415091e-07, "loss": 4.2407, "step": 78860 }, { "epoch": 3.3975104449325926, "learning_rate": 8.092719084660303e-07, "loss": 4.1065, "step": 78880 }, { "epoch": 3.39837188267218, "learning_rate": 8.092234264905514e-07, "loss": 4.0542, "step": 78900 }, { "epoch": 3.399233320411767, "learning_rate": 8.091749445150725e-07, "loss": 3.9918, "step": 78920 }, { "epoch": 3.4000947581513548, "learning_rate": 8.091264625395935e-07, "loss": 4.2903, "step": 78940 }, { "epoch": 3.400956195890942, "learning_rate": 8.090779805641147e-07, "loss": 4.0658, "step": 78960 }, { "epoch": 3.4018176336305292, "learning_rate": 8.090294985886359e-07, "loss": 4.1685, "step": 78980 }, { "epoch": 3.402679071370117, "learning_rate": 8.08981016613157e-07, "loss": 4.1341, "step": 79000 }, { "epoch": 3.403540509109704, "learning_rate": 8.089325346376779e-07, "loss": 3.9424, "step": 79020 }, { "epoch": 3.4044019468492914, "learning_rate": 8.088840526621991e-07, "loss": 4.1204, "step": 79040 }, { "epoch": 3.4052633845888787, "learning_rate": 8.088355706867202e-07, "loss": 4.1246, "step": 79060 }, { "epoch": 3.4061248223284664, "learning_rate": 8.087870887112413e-07, "loss": 4.1989, "step": 79080 }, { "epoch": 3.4069862600680536, "learning_rate": 8.087386067357624e-07, "loss": 4.139, "step": 79100 }, { "epoch": 3.407847697807641, "learning_rate": 8.086901247602837e-07, "loss": 4.0823, "step": 79120 }, { "epoch": 3.4087091355472285, "learning_rate": 8.086416427848047e-07, "loss": 4.0169, "step": 79140 }, { "epoch": 3.409570573286816, "learning_rate": 8.085931608093257e-07, "loss": 4.0076, "step": 79160 }, { "epoch": 3.410432011026403, "learning_rate": 8.085446788338468e-07, "loss": 4.1152, "step": 79180 }, { "epoch": 3.4112934487659903, "learning_rate": 8.08496196858368e-07, "loss": 4.0021, "step": 79200 }, { "epoch": 3.412154886505578, "learning_rate": 8.08447714882889e-07, "loss": 4.0888, "step": 79220 }, { "epoch": 3.413016324245165, "learning_rate": 8.083992329074102e-07, "loss": 4.0184, "step": 79240 }, { "epoch": 3.4138777619847525, "learning_rate": 8.083507509319313e-07, "loss": 4.0808, "step": 79260 }, { "epoch": 3.41473919972434, "learning_rate": 8.083022689564524e-07, "loss": 4.2175, "step": 79280 }, { "epoch": 3.4156006374639274, "learning_rate": 8.082537869809734e-07, "loss": 4.1338, "step": 79300 }, { "epoch": 3.4164620752035146, "learning_rate": 8.082053050054945e-07, "loss": 4.032, "step": 79320 }, { "epoch": 3.417323512943102, "learning_rate": 8.081568230300157e-07, "loss": 4.4536, "step": 79340 }, { "epoch": 3.4181849506826896, "learning_rate": 8.081083410545369e-07, "loss": 4.1274, "step": 79360 }, { "epoch": 3.419046388422277, "learning_rate": 8.080598590790579e-07, "loss": 4.0179, "step": 79380 }, { "epoch": 3.419907826161864, "learning_rate": 8.08011377103579e-07, "loss": 4.0286, "step": 79400 }, { "epoch": 3.4207692639014518, "learning_rate": 8.079628951281001e-07, "loss": 4.1304, "step": 79420 }, { "epoch": 3.421630701641039, "learning_rate": 8.079144131526213e-07, "loss": 3.9421, "step": 79440 }, { "epoch": 3.4224921393806262, "learning_rate": 8.078659311771423e-07, "loss": 4.1418, "step": 79460 }, { "epoch": 3.4233535771202135, "learning_rate": 8.078174492016634e-07, "loss": 4.3102, "step": 79480 }, { "epoch": 3.424215014859801, "learning_rate": 8.077689672261846e-07, "loss": 3.9138, "step": 79500 }, { "epoch": 3.4250764525993884, "learning_rate": 8.077204852507058e-07, "loss": 3.9956, "step": 79520 }, { "epoch": 3.4259378903389757, "learning_rate": 8.076720032752267e-07, "loss": 4.123, "step": 79540 }, { "epoch": 3.4267993280785634, "learning_rate": 8.076235212997478e-07, "loss": 4.0827, "step": 79560 }, { "epoch": 3.4276607658181506, "learning_rate": 8.07575039324269e-07, "loss": 3.9543, "step": 79580 }, { "epoch": 3.428522203557738, "learning_rate": 8.0752655734879e-07, "loss": 4.219, "step": 79600 }, { "epoch": 3.429383641297325, "learning_rate": 8.074780753733112e-07, "loss": 4.2555, "step": 79620 }, { "epoch": 3.430245079036913, "learning_rate": 8.074295933978323e-07, "loss": 4.0652, "step": 79640 }, { "epoch": 3.4311065167765, "learning_rate": 8.073811114223534e-07, "loss": 4.0591, "step": 79660 }, { "epoch": 3.4319679545160873, "learning_rate": 8.073326294468744e-07, "loss": 4.1497, "step": 79680 }, { "epoch": 3.432829392255675, "learning_rate": 8.072841474713956e-07, "loss": 3.9374, "step": 79700 }, { "epoch": 3.433690829995262, "learning_rate": 8.072356654959167e-07, "loss": 4.2275, "step": 79720 }, { "epoch": 3.4345522677348495, "learning_rate": 8.071871835204379e-07, "loss": 4.0602, "step": 79740 }, { "epoch": 3.4354137054744367, "learning_rate": 8.071387015449589e-07, "loss": 4.0096, "step": 79760 }, { "epoch": 3.4362751432140244, "learning_rate": 8.0709021956948e-07, "loss": 4.1275, "step": 79780 }, { "epoch": 3.4371365809536116, "learning_rate": 8.070417375940011e-07, "loss": 4.0328, "step": 79800 }, { "epoch": 3.437998018693199, "learning_rate": 8.069932556185223e-07, "loss": 4.162, "step": 79820 }, { "epoch": 3.4388594564327866, "learning_rate": 8.069447736430433e-07, "loss": 4.3089, "step": 79840 }, { "epoch": 3.439720894172374, "learning_rate": 8.068962916675643e-07, "loss": 3.8873, "step": 79860 }, { "epoch": 3.440582331911961, "learning_rate": 8.068478096920856e-07, "loss": 4.2677, "step": 79880 }, { "epoch": 3.4414437696515483, "learning_rate": 8.067993277166068e-07, "loss": 4.2398, "step": 79900 }, { "epoch": 3.442305207391136, "learning_rate": 8.067508457411277e-07, "loss": 3.9996, "step": 79920 }, { "epoch": 3.4431666451307232, "learning_rate": 8.067023637656488e-07, "loss": 3.9097, "step": 79940 }, { "epoch": 3.4440280828703105, "learning_rate": 8.066538817901701e-07, "loss": 4.1524, "step": 79960 }, { "epoch": 3.4448895206098977, "learning_rate": 8.066053998146911e-07, "loss": 4.1301, "step": 79980 }, { "epoch": 3.4457509583494854, "learning_rate": 8.065569178392122e-07, "loss": 4.2439, "step": 80000 }, { "epoch": 3.4466123960890727, "learning_rate": 8.065084358637333e-07, "loss": 4.1968, "step": 80020 }, { "epoch": 3.44747383382866, "learning_rate": 8.064599538882544e-07, "loss": 3.9466, "step": 80040 }, { "epoch": 3.4483352715682476, "learning_rate": 8.064114719127755e-07, "loss": 4.2493, "step": 80060 }, { "epoch": 3.449196709307835, "learning_rate": 8.063629899372966e-07, "loss": 4.1672, "step": 80080 }, { "epoch": 3.450058147047422, "learning_rate": 8.063145079618177e-07, "loss": 3.9515, "step": 80100 }, { "epoch": 3.4509195847870093, "learning_rate": 8.062660259863389e-07, "loss": 3.8628, "step": 80120 }, { "epoch": 3.451781022526597, "learning_rate": 8.062175440108599e-07, "loss": 4.1605, "step": 80140 }, { "epoch": 3.4526424602661843, "learning_rate": 8.061690620353811e-07, "loss": 4.0109, "step": 80160 }, { "epoch": 3.4535038980057715, "learning_rate": 8.061205800599021e-07, "loss": 3.985, "step": 80180 }, { "epoch": 3.4543653357453588, "learning_rate": 8.060720980844232e-07, "loss": 4.147, "step": 80200 }, { "epoch": 3.4552267734849464, "learning_rate": 8.060236161089443e-07, "loss": 4.2112, "step": 80220 }, { "epoch": 3.4560882112245337, "learning_rate": 8.059751341334655e-07, "loss": 4.1059, "step": 80240 }, { "epoch": 3.456949648964121, "learning_rate": 8.059266521579866e-07, "loss": 4.1683, "step": 80260 }, { "epoch": 3.4578110867037086, "learning_rate": 8.058781701825077e-07, "loss": 4.3456, "step": 80280 }, { "epoch": 3.458672524443296, "learning_rate": 8.058296882070287e-07, "loss": 4.1346, "step": 80300 }, { "epoch": 3.459533962182883, "learning_rate": 8.057812062315499e-07, "loss": 4.1153, "step": 80320 }, { "epoch": 3.4603953999224704, "learning_rate": 8.05732724256071e-07, "loss": 3.8168, "step": 80340 }, { "epoch": 3.461256837662058, "learning_rate": 8.056842422805921e-07, "loss": 4.1554, "step": 80360 }, { "epoch": 3.4621182754016453, "learning_rate": 8.056357603051133e-07, "loss": 3.9205, "step": 80380 }, { "epoch": 3.4629797131412325, "learning_rate": 8.055872783296343e-07, "loss": 4.0034, "step": 80400 }, { "epoch": 3.4638411508808202, "learning_rate": 8.055387963541554e-07, "loss": 4.233, "step": 80420 }, { "epoch": 3.4647025886204075, "learning_rate": 8.054903143786765e-07, "loss": 4.2769, "step": 80440 }, { "epoch": 3.4655640263599947, "learning_rate": 8.054418324031976e-07, "loss": 3.8424, "step": 80460 }, { "epoch": 3.466425464099582, "learning_rate": 8.053933504277187e-07, "loss": 3.8662, "step": 80480 }, { "epoch": 3.4672869018391697, "learning_rate": 8.053448684522399e-07, "loss": 3.9894, "step": 80500 }, { "epoch": 3.468148339578757, "learning_rate": 8.05296386476761e-07, "loss": 4.0765, "step": 80520 }, { "epoch": 3.469009777318344, "learning_rate": 8.052479045012821e-07, "loss": 4.0221, "step": 80540 }, { "epoch": 3.469871215057932, "learning_rate": 8.051994225258031e-07, "loss": 4.044, "step": 80560 }, { "epoch": 3.470732652797519, "learning_rate": 8.051509405503242e-07, "loss": 4.0196, "step": 80580 }, { "epoch": 3.4715940905371063, "learning_rate": 8.051024585748454e-07, "loss": 4.1307, "step": 80600 }, { "epoch": 3.4724555282766936, "learning_rate": 8.050539765993665e-07, "loss": 3.9635, "step": 80620 }, { "epoch": 3.4733169660162813, "learning_rate": 8.050054946238876e-07, "loss": 4.1048, "step": 80640 }, { "epoch": 3.4741784037558685, "learning_rate": 8.049570126484087e-07, "loss": 3.9423, "step": 80660 }, { "epoch": 3.4750398414954558, "learning_rate": 8.049085306729297e-07, "loss": 3.9662, "step": 80680 }, { "epoch": 3.4759012792350434, "learning_rate": 8.048600486974509e-07, "loss": 4.0199, "step": 80700 }, { "epoch": 3.4767627169746307, "learning_rate": 8.04811566721972e-07, "loss": 4.1143, "step": 80720 }, { "epoch": 3.477624154714218, "learning_rate": 8.047630847464931e-07, "loss": 4.2284, "step": 80740 }, { "epoch": 3.478485592453805, "learning_rate": 8.047146027710142e-07, "loss": 4.204, "step": 80760 }, { "epoch": 3.479347030193393, "learning_rate": 8.046661207955354e-07, "loss": 3.8848, "step": 80780 }, { "epoch": 3.48020846793298, "learning_rate": 8.046176388200563e-07, "loss": 4.0762, "step": 80800 }, { "epoch": 3.4810699056725674, "learning_rate": 8.045691568445775e-07, "loss": 4.0738, "step": 80820 }, { "epoch": 3.481931343412155, "learning_rate": 8.045206748690986e-07, "loss": 4.0487, "step": 80840 }, { "epoch": 3.4827927811517423, "learning_rate": 8.044721928936198e-07, "loss": 4.0359, "step": 80860 }, { "epoch": 3.4836542188913295, "learning_rate": 8.044237109181408e-07, "loss": 3.9768, "step": 80880 }, { "epoch": 3.484515656630917, "learning_rate": 8.043752289426621e-07, "loss": 4.0637, "step": 80900 }, { "epoch": 3.4853770943705045, "learning_rate": 8.043267469671831e-07, "loss": 3.9452, "step": 80920 }, { "epoch": 3.4862385321100917, "learning_rate": 8.042782649917041e-07, "loss": 3.9939, "step": 80940 }, { "epoch": 3.487099969849679, "learning_rate": 8.042297830162252e-07, "loss": 4.086, "step": 80960 }, { "epoch": 3.4879614075892666, "learning_rate": 8.041813010407464e-07, "loss": 4.0662, "step": 80980 }, { "epoch": 3.488822845328854, "learning_rate": 8.041328190652675e-07, "loss": 4.1216, "step": 81000 }, { "epoch": 3.489684283068441, "learning_rate": 8.040843370897886e-07, "loss": 3.9984, "step": 81020 }, { "epoch": 3.4905457208080284, "learning_rate": 8.040358551143097e-07, "loss": 3.9435, "step": 81040 }, { "epoch": 3.491407158547616, "learning_rate": 8.039873731388308e-07, "loss": 4.1261, "step": 81060 }, { "epoch": 3.4922685962872033, "learning_rate": 8.039388911633519e-07, "loss": 4.124, "step": 81080 }, { "epoch": 3.4931300340267906, "learning_rate": 8.03890409187873e-07, "loss": 3.9723, "step": 81100 }, { "epoch": 3.4939914717663783, "learning_rate": 8.038419272123941e-07, "loss": 3.957, "step": 81120 }, { "epoch": 3.4948529095059655, "learning_rate": 8.037934452369152e-07, "loss": 3.9559, "step": 81140 }, { "epoch": 3.4957143472455527, "learning_rate": 8.037449632614364e-07, "loss": 4.1893, "step": 81160 }, { "epoch": 3.49657578498514, "learning_rate": 8.036964812859574e-07, "loss": 4.1786, "step": 81180 }, { "epoch": 3.4974372227247277, "learning_rate": 8.036479993104785e-07, "loss": 4.1742, "step": 81200 }, { "epoch": 3.498298660464315, "learning_rate": 8.035995173349997e-07, "loss": 4.2267, "step": 81220 }, { "epoch": 3.499160098203902, "learning_rate": 8.035510353595208e-07, "loss": 4.245, "step": 81240 }, { "epoch": 3.50002153594349, "learning_rate": 8.035025533840418e-07, "loss": 4.1625, "step": 81260 }, { "epoch": 3.500882973683077, "learning_rate": 8.03454071408563e-07, "loss": 3.9312, "step": 81280 }, { "epoch": 3.5017444114226643, "learning_rate": 8.034055894330841e-07, "loss": 4.0937, "step": 81300 }, { "epoch": 3.5026058491622516, "learning_rate": 8.033571074576052e-07, "loss": 4.0108, "step": 81320 }, { "epoch": 3.5034672869018393, "learning_rate": 8.033086254821262e-07, "loss": 4.133, "step": 81340 }, { "epoch": 3.5043287246414265, "learning_rate": 8.032601435066474e-07, "loss": 4.1074, "step": 81360 }, { "epoch": 3.5051901623810138, "learning_rate": 8.032116615311685e-07, "loss": 3.9485, "step": 81380 }, { "epoch": 3.5060516001206015, "learning_rate": 8.031631795556897e-07, "loss": 4.1104, "step": 81400 }, { "epoch": 3.5069130378601887, "learning_rate": 8.031146975802107e-07, "loss": 4.1912, "step": 81420 }, { "epoch": 3.507774475599776, "learning_rate": 8.030662156047318e-07, "loss": 4.0675, "step": 81440 }, { "epoch": 3.508635913339363, "learning_rate": 8.030177336292529e-07, "loss": 4.0326, "step": 81460 }, { "epoch": 3.509497351078951, "learning_rate": 8.029692516537739e-07, "loss": 3.8711, "step": 81480 }, { "epoch": 3.510358788818538, "learning_rate": 8.029207696782951e-07, "loss": 4.0017, "step": 81500 }, { "epoch": 3.5112202265581254, "learning_rate": 8.028722877028163e-07, "loss": 4.1761, "step": 81520 }, { "epoch": 3.512081664297713, "learning_rate": 8.028238057273374e-07, "loss": 4.0444, "step": 81540 }, { "epoch": 3.5129431020373003, "learning_rate": 8.027753237518584e-07, "loss": 4.0998, "step": 81560 }, { "epoch": 3.5138045397768876, "learning_rate": 8.027268417763795e-07, "loss": 4.1265, "step": 81580 }, { "epoch": 3.514665977516475, "learning_rate": 8.026783598009007e-07, "loss": 4.1821, "step": 81600 }, { "epoch": 3.5155274152560625, "learning_rate": 8.026298778254218e-07, "loss": 3.9004, "step": 81620 }, { "epoch": 3.5163888529956497, "learning_rate": 8.025813958499427e-07, "loss": 4.1587, "step": 81640 }, { "epoch": 3.517250290735237, "learning_rate": 8.02532913874464e-07, "loss": 4.0986, "step": 81660 }, { "epoch": 3.5181117284748247, "learning_rate": 8.024844318989851e-07, "loss": 4.1177, "step": 81680 }, { "epoch": 3.518973166214412, "learning_rate": 8.024359499235062e-07, "loss": 4.1484, "step": 81700 }, { "epoch": 3.519834603953999, "learning_rate": 8.023874679480272e-07, "loss": 3.9485, "step": 81720 }, { "epoch": 3.5206960416935864, "learning_rate": 8.023389859725485e-07, "loss": 4.1861, "step": 81740 }, { "epoch": 3.521557479433174, "learning_rate": 8.022905039970695e-07, "loss": 4.1655, "step": 81760 }, { "epoch": 3.5224189171727613, "learning_rate": 8.022420220215906e-07, "loss": 4.0772, "step": 81780 }, { "epoch": 3.5232803549123486, "learning_rate": 8.021935400461117e-07, "loss": 3.9857, "step": 81800 }, { "epoch": 3.5241417926519363, "learning_rate": 8.021450580706328e-07, "loss": 3.828, "step": 81820 }, { "epoch": 3.5250032303915235, "learning_rate": 8.020965760951539e-07, "loss": 4.0148, "step": 81840 }, { "epoch": 3.5258646681311108, "learning_rate": 8.02048094119675e-07, "loss": 4.0338, "step": 81860 }, { "epoch": 3.526726105870698, "learning_rate": 8.019996121441961e-07, "loss": 4.0018, "step": 81880 }, { "epoch": 3.5275875436102857, "learning_rate": 8.019511301687173e-07, "loss": 4.0669, "step": 81900 }, { "epoch": 3.528448981349873, "learning_rate": 8.019026481932384e-07, "loss": 4.2146, "step": 81920 }, { "epoch": 3.52931041908946, "learning_rate": 8.018541662177595e-07, "loss": 4.1122, "step": 81940 }, { "epoch": 3.530171856829048, "learning_rate": 8.018056842422805e-07, "loss": 4.1816, "step": 81960 }, { "epoch": 3.531033294568635, "learning_rate": 8.017572022668017e-07, "loss": 4.1509, "step": 81980 }, { "epoch": 3.5318947323082224, "learning_rate": 8.017087202913228e-07, "loss": 4.1564, "step": 82000 }, { "epoch": 3.5327561700478096, "learning_rate": 8.016602383158438e-07, "loss": 4.0108, "step": 82020 }, { "epoch": 3.5336176077873973, "learning_rate": 8.01611756340365e-07, "loss": 4.1146, "step": 82040 }, { "epoch": 3.5344790455269846, "learning_rate": 8.015632743648862e-07, "loss": 4.1704, "step": 82060 }, { "epoch": 3.535340483266572, "learning_rate": 8.015147923894071e-07, "loss": 3.7886, "step": 82080 }, { "epoch": 3.5362019210061595, "learning_rate": 8.014663104139282e-07, "loss": 4.2163, "step": 82100 }, { "epoch": 3.5370633587457467, "learning_rate": 8.014178284384494e-07, "loss": 4.0676, "step": 82120 }, { "epoch": 3.537924796485334, "learning_rate": 8.013693464629706e-07, "loss": 4.1845, "step": 82140 }, { "epoch": 3.5387862342249212, "learning_rate": 8.013208644874917e-07, "loss": 4.2671, "step": 82160 }, { "epoch": 3.539647671964509, "learning_rate": 8.012723825120127e-07, "loss": 3.924, "step": 82180 }, { "epoch": 3.540509109704096, "learning_rate": 8.012239005365338e-07, "loss": 3.8993, "step": 82200 }, { "epoch": 3.5413705474436834, "learning_rate": 8.011754185610549e-07, "loss": 3.9327, "step": 82220 }, { "epoch": 3.542231985183271, "learning_rate": 8.01126936585576e-07, "loss": 3.818, "step": 82240 }, { "epoch": 3.5430934229228583, "learning_rate": 8.010784546100971e-07, "loss": 3.9274, "step": 82260 }, { "epoch": 3.5439548606624456, "learning_rate": 8.010299726346183e-07, "loss": 4.0804, "step": 82280 }, { "epoch": 3.544816298402033, "learning_rate": 8.009814906591394e-07, "loss": 3.9787, "step": 82300 }, { "epoch": 3.5456777361416205, "learning_rate": 8.009330086836605e-07, "loss": 4.2806, "step": 82320 }, { "epoch": 3.5465391738812078, "learning_rate": 8.008845267081815e-07, "loss": 4.0621, "step": 82340 }, { "epoch": 3.547400611620795, "learning_rate": 8.008360447327027e-07, "loss": 4.116, "step": 82360 }, { "epoch": 3.5482620493603827, "learning_rate": 8.007875627572237e-07, "loss": 4.0408, "step": 82380 }, { "epoch": 3.54912348709997, "learning_rate": 8.007390807817449e-07, "loss": 4.0807, "step": 82400 }, { "epoch": 3.549984924839557, "learning_rate": 8.00690598806266e-07, "loss": 4.076, "step": 82420 }, { "epoch": 3.5508463625791444, "learning_rate": 8.006421168307872e-07, "loss": 3.9425, "step": 82440 }, { "epoch": 3.551707800318732, "learning_rate": 8.005936348553081e-07, "loss": 4.1282, "step": 82460 }, { "epoch": 3.5525692380583194, "learning_rate": 8.005451528798293e-07, "loss": 4.0561, "step": 82480 }, { "epoch": 3.5534306757979066, "learning_rate": 8.004966709043504e-07, "loss": 4.1453, "step": 82500 }, { "epoch": 3.5542921135374943, "learning_rate": 8.004481889288716e-07, "loss": 4.0067, "step": 82520 }, { "epoch": 3.5551535512770815, "learning_rate": 8.003997069533926e-07, "loss": 4.0551, "step": 82540 }, { "epoch": 3.556014989016669, "learning_rate": 8.003512249779137e-07, "loss": 4.0325, "step": 82560 }, { "epoch": 3.556876426756256, "learning_rate": 8.003027430024348e-07, "loss": 4.1309, "step": 82580 }, { "epoch": 3.5577378644958437, "learning_rate": 8.00254261026956e-07, "loss": 4.1027, "step": 82600 }, { "epoch": 3.558599302235431, "learning_rate": 8.00205779051477e-07, "loss": 4.0887, "step": 82620 }, { "epoch": 3.559460739975018, "learning_rate": 8.001572970759981e-07, "loss": 4.1003, "step": 82640 }, { "epoch": 3.560322177714606, "learning_rate": 8.001088151005193e-07, "loss": 3.9115, "step": 82660 }, { "epoch": 3.561183615454193, "learning_rate": 8.000603331250406e-07, "loss": 3.8889, "step": 82680 }, { "epoch": 3.5620450531937804, "learning_rate": 8.000118511495615e-07, "loss": 4.2752, "step": 82700 }, { "epoch": 3.5629064909333676, "learning_rate": 7.999633691740825e-07, "loss": 4.1105, "step": 82720 }, { "epoch": 3.563767928672955, "learning_rate": 7.999148871986037e-07, "loss": 3.9305, "step": 82740 }, { "epoch": 3.5646293664125426, "learning_rate": 7.998664052231247e-07, "loss": 4.221, "step": 82760 }, { "epoch": 3.56549080415213, "learning_rate": 7.998179232476459e-07, "loss": 3.8642, "step": 82780 }, { "epoch": 3.5663522418917175, "learning_rate": 7.99769441272167e-07, "loss": 4.0657, "step": 82800 }, { "epoch": 3.5672136796313048, "learning_rate": 7.997209592966882e-07, "loss": 4.0627, "step": 82820 }, { "epoch": 3.568075117370892, "learning_rate": 7.996724773212091e-07, "loss": 3.9574, "step": 82840 }, { "epoch": 3.5689365551104792, "learning_rate": 7.996239953457303e-07, "loss": 4.0379, "step": 82860 }, { "epoch": 3.5697979928500665, "learning_rate": 7.995755133702514e-07, "loss": 3.9904, "step": 82880 }, { "epoch": 3.570659430589654, "learning_rate": 7.995270313947726e-07, "loss": 3.9798, "step": 82900 }, { "epoch": 3.5715208683292414, "learning_rate": 7.994785494192936e-07, "loss": 3.9636, "step": 82920 }, { "epoch": 3.572382306068829, "learning_rate": 7.994300674438148e-07, "loss": 4.0279, "step": 82940 }, { "epoch": 3.5732437438084164, "learning_rate": 7.993815854683359e-07, "loss": 3.9741, "step": 82960 }, { "epoch": 3.5741051815480036, "learning_rate": 7.99333103492857e-07, "loss": 3.9285, "step": 82980 }, { "epoch": 3.574966619287591, "learning_rate": 7.992846215173781e-07, "loss": 4.0888, "step": 83000 }, { "epoch": 3.575828057027178, "learning_rate": 7.992361395418992e-07, "loss": 3.9734, "step": 83020 }, { "epoch": 3.576689494766766, "learning_rate": 7.991876575664203e-07, "loss": 4.2585, "step": 83040 }, { "epoch": 3.577550932506353, "learning_rate": 7.991391755909414e-07, "loss": 3.9455, "step": 83060 }, { "epoch": 3.5784123702459407, "learning_rate": 7.990906936154625e-07, "loss": 4.0289, "step": 83080 }, { "epoch": 3.579273807985528, "learning_rate": 7.990422116399835e-07, "loss": 4.2913, "step": 83100 }, { "epoch": 3.580135245725115, "learning_rate": 7.989937296645047e-07, "loss": 3.8413, "step": 83120 }, { "epoch": 3.5809966834647025, "learning_rate": 7.989452476890258e-07, "loss": 4.0535, "step": 83140 }, { "epoch": 3.5818581212042897, "learning_rate": 7.988967657135469e-07, "loss": 4.0097, "step": 83160 }, { "epoch": 3.5827195589438774, "learning_rate": 7.98848283738068e-07, "loss": 3.9713, "step": 83180 }, { "epoch": 3.5835809966834646, "learning_rate": 7.987998017625892e-07, "loss": 3.8081, "step": 83200 }, { "epoch": 3.5844424344230523, "learning_rate": 7.987513197871102e-07, "loss": 4.1517, "step": 83220 }, { "epoch": 3.5853038721626396, "learning_rate": 7.987028378116313e-07, "loss": 4.1589, "step": 83240 }, { "epoch": 3.586165309902227, "learning_rate": 7.986543558361524e-07, "loss": 4.0807, "step": 83260 }, { "epoch": 3.587026747641814, "learning_rate": 7.986058738606736e-07, "loss": 4.1116, "step": 83280 }, { "epoch": 3.5878881853814013, "learning_rate": 7.985573918851946e-07, "loss": 3.9938, "step": 83300 }, { "epoch": 3.588749623120989, "learning_rate": 7.985089099097158e-07, "loss": 4.0718, "step": 83320 }, { "epoch": 3.5896110608605762, "learning_rate": 7.984604279342369e-07, "loss": 3.8139, "step": 83340 }, { "epoch": 3.590472498600164, "learning_rate": 7.984119459587579e-07, "loss": 4.0234, "step": 83360 }, { "epoch": 3.591333936339751, "learning_rate": 7.98363463983279e-07, "loss": 4.1785, "step": 83380 }, { "epoch": 3.5921953740793384, "learning_rate": 7.983149820078002e-07, "loss": 3.8668, "step": 83400 }, { "epoch": 3.5930568118189257, "learning_rate": 7.982665000323212e-07, "loss": 3.9668, "step": 83420 }, { "epoch": 3.593918249558513, "learning_rate": 7.982180180568424e-07, "loss": 3.9363, "step": 83440 }, { "epoch": 3.5947796872981006, "learning_rate": 7.981695360813635e-07, "loss": 3.9967, "step": 83460 }, { "epoch": 3.595641125037688, "learning_rate": 7.981210541058846e-07, "loss": 3.844, "step": 83480 }, { "epoch": 3.5965025627772755, "learning_rate": 7.980725721304057e-07, "loss": 3.9268, "step": 83500 }, { "epoch": 3.597364000516863, "learning_rate": 7.980240901549269e-07, "loss": 4.1531, "step": 83520 }, { "epoch": 3.59822543825645, "learning_rate": 7.979756081794479e-07, "loss": 3.9779, "step": 83540 }, { "epoch": 3.5990868759960373, "learning_rate": 7.979271262039691e-07, "loss": 3.9095, "step": 83560 }, { "epoch": 3.5999483137356245, "learning_rate": 7.978786442284902e-07, "loss": 3.9255, "step": 83580 }, { "epoch": 3.600809751475212, "learning_rate": 7.978301622530112e-07, "loss": 3.8338, "step": 83600 }, { "epoch": 3.6016711892147995, "learning_rate": 7.977816802775323e-07, "loss": 4.0991, "step": 83620 }, { "epoch": 3.602532626954387, "learning_rate": 7.977331983020534e-07, "loss": 4.0504, "step": 83640 }, { "epoch": 3.6033940646939744, "learning_rate": 7.976847163265745e-07, "loss": 4.2887, "step": 83660 }, { "epoch": 3.6042555024335616, "learning_rate": 7.976362343510957e-07, "loss": 4.0872, "step": 83680 }, { "epoch": 3.605116940173149, "learning_rate": 7.975877523756168e-07, "loss": 4.2421, "step": 83700 }, { "epoch": 3.605978377912736, "learning_rate": 7.975392704001379e-07, "loss": 4.0565, "step": 83720 }, { "epoch": 3.606839815652324, "learning_rate": 7.974907884246589e-07, "loss": 4.077, "step": 83740 }, { "epoch": 3.607701253391911, "learning_rate": 7.974423064491801e-07, "loss": 4.0384, "step": 83760 }, { "epoch": 3.6085626911314987, "learning_rate": 7.973938244737012e-07, "loss": 3.9828, "step": 83780 }, { "epoch": 3.609424128871086, "learning_rate": 7.973453424982223e-07, "loss": 3.8051, "step": 83800 }, { "epoch": 3.6102855666106732, "learning_rate": 7.972968605227434e-07, "loss": 4.0368, "step": 83820 }, { "epoch": 3.6111470043502605, "learning_rate": 7.972483785472645e-07, "loss": 3.9145, "step": 83840 }, { "epoch": 3.6120084420898477, "learning_rate": 7.971998965717856e-07, "loss": 3.9743, "step": 83860 }, { "epoch": 3.6128698798294354, "learning_rate": 7.971514145963067e-07, "loss": 3.9877, "step": 83880 }, { "epoch": 3.6137313175690227, "learning_rate": 7.971029326208278e-07, "loss": 4.1971, "step": 83900 }, { "epoch": 3.61459275530861, "learning_rate": 7.970544506453489e-07, "loss": 4.0465, "step": 83920 }, { "epoch": 3.6154541930481976, "learning_rate": 7.9700596866987e-07, "loss": 4.0264, "step": 83940 }, { "epoch": 3.616315630787785, "learning_rate": 7.969574866943911e-07, "loss": 4.0337, "step": 83960 }, { "epoch": 3.617177068527372, "learning_rate": 7.969090047189122e-07, "loss": 3.9109, "step": 83980 }, { "epoch": 3.6180385062669593, "learning_rate": 7.968605227434333e-07, "loss": 3.8259, "step": 84000 }, { "epoch": 3.618899944006547, "learning_rate": 7.968120407679545e-07, "loss": 4.1498, "step": 84020 }, { "epoch": 3.6197613817461343, "learning_rate": 7.967635587924755e-07, "loss": 4.1478, "step": 84040 }, { "epoch": 3.6206228194857215, "learning_rate": 7.967150768169967e-07, "loss": 3.9209, "step": 84060 }, { "epoch": 3.621484257225309, "learning_rate": 7.966665948415178e-07, "loss": 3.9723, "step": 84080 }, { "epoch": 3.6223456949648964, "learning_rate": 7.96618112866039e-07, "loss": 3.8666, "step": 84100 }, { "epoch": 3.6232071327044837, "learning_rate": 7.965696308905599e-07, "loss": 3.7975, "step": 84120 }, { "epoch": 3.624068570444071, "learning_rate": 7.965211489150811e-07, "loss": 4.0348, "step": 84140 }, { "epoch": 3.6249300081836586, "learning_rate": 7.964726669396022e-07, "loss": 4.156, "step": 84160 }, { "epoch": 3.625791445923246, "learning_rate": 7.964241849641233e-07, "loss": 4.0054, "step": 84180 }, { "epoch": 3.626652883662833, "learning_rate": 7.963757029886444e-07, "loss": 4.2446, "step": 84200 }, { "epoch": 3.627514321402421, "learning_rate": 7.963272210131656e-07, "loss": 4.0446, "step": 84220 }, { "epoch": 3.628375759142008, "learning_rate": 7.962787390376866e-07, "loss": 4.0529, "step": 84240 }, { "epoch": 3.6292371968815953, "learning_rate": 7.962302570622075e-07, "loss": 3.8629, "step": 84260 }, { "epoch": 3.6300986346211825, "learning_rate": 7.961817750867288e-07, "loss": 3.9902, "step": 84280 }, { "epoch": 3.6309600723607702, "learning_rate": 7.9613329311125e-07, "loss": 3.8487, "step": 84300 }, { "epoch": 3.6318215101003575, "learning_rate": 7.960848111357711e-07, "loss": 3.9716, "step": 84320 }, { "epoch": 3.6326829478399447, "learning_rate": 7.960363291602921e-07, "loss": 3.9602, "step": 84340 }, { "epoch": 3.6335443855795324, "learning_rate": 7.959878471848132e-07, "loss": 3.9671, "step": 84360 }, { "epoch": 3.6344058233191197, "learning_rate": 7.959393652093343e-07, "loss": 3.8171, "step": 84380 }, { "epoch": 3.635267261058707, "learning_rate": 7.958908832338555e-07, "loss": 3.9961, "step": 84400 }, { "epoch": 3.636128698798294, "learning_rate": 7.958424012583765e-07, "loss": 4.049, "step": 84420 }, { "epoch": 3.636990136537882, "learning_rate": 7.957939192828977e-07, "loss": 4.0956, "step": 84440 }, { "epoch": 3.637851574277469, "learning_rate": 7.957454373074188e-07, "loss": 3.9901, "step": 84460 }, { "epoch": 3.6387130120170563, "learning_rate": 7.9569695533194e-07, "loss": 3.8085, "step": 84480 }, { "epoch": 3.639574449756644, "learning_rate": 7.956484733564609e-07, "loss": 4.0237, "step": 84500 }, { "epoch": 3.6404358874962313, "learning_rate": 7.955999913809821e-07, "loss": 3.8252, "step": 84520 }, { "epoch": 3.6412973252358185, "learning_rate": 7.955515094055032e-07, "loss": 3.9908, "step": 84540 }, { "epoch": 3.6421587629754058, "learning_rate": 7.955030274300244e-07, "loss": 3.9034, "step": 84560 }, { "epoch": 3.6430202007149934, "learning_rate": 7.954545454545454e-07, "loss": 3.8876, "step": 84580 }, { "epoch": 3.6438816384545807, "learning_rate": 7.954060634790666e-07, "loss": 4.1822, "step": 84600 }, { "epoch": 3.644743076194168, "learning_rate": 7.953575815035876e-07, "loss": 3.8962, "step": 84620 }, { "epoch": 3.6456045139337556, "learning_rate": 7.953090995281087e-07, "loss": 3.9814, "step": 84640 }, { "epoch": 3.646465951673343, "learning_rate": 7.952606175526298e-07, "loss": 3.759, "step": 84660 }, { "epoch": 3.64732738941293, "learning_rate": 7.95212135577151e-07, "loss": 3.8853, "step": 84680 }, { "epoch": 3.6481888271525174, "learning_rate": 7.951636536016721e-07, "loss": 3.8057, "step": 84700 }, { "epoch": 3.649050264892105, "learning_rate": 7.951151716261931e-07, "loss": 4.0741, "step": 84720 }, { "epoch": 3.6499117026316923, "learning_rate": 7.950666896507143e-07, "loss": 3.9503, "step": 84740 }, { "epoch": 3.6507731403712795, "learning_rate": 7.950182076752354e-07, "loss": 3.9285, "step": 84760 }, { "epoch": 3.6516345781108672, "learning_rate": 7.949697256997566e-07, "loss": 3.9181, "step": 84780 }, { "epoch": 3.6524960158504545, "learning_rate": 7.949212437242775e-07, "loss": 3.9765, "step": 84800 }, { "epoch": 3.6533574535900417, "learning_rate": 7.948727617487987e-07, "loss": 4.0099, "step": 84820 }, { "epoch": 3.654218891329629, "learning_rate": 7.948242797733199e-07, "loss": 3.8433, "step": 84840 }, { "epoch": 3.6550803290692166, "learning_rate": 7.94775797797841e-07, "loss": 4.1787, "step": 84860 }, { "epoch": 3.655941766808804, "learning_rate": 7.947273158223619e-07, "loss": 3.6115, "step": 84880 }, { "epoch": 3.656803204548391, "learning_rate": 7.946788338468831e-07, "loss": 4.1178, "step": 84900 }, { "epoch": 3.657664642287979, "learning_rate": 7.946303518714042e-07, "loss": 4.0226, "step": 84920 }, { "epoch": 3.658526080027566, "learning_rate": 7.945818698959253e-07, "loss": 4.1416, "step": 84940 }, { "epoch": 3.6593875177671533, "learning_rate": 7.945333879204464e-07, "loss": 3.9804, "step": 84960 }, { "epoch": 3.6602489555067406, "learning_rate": 7.944849059449676e-07, "loss": 4.0505, "step": 84980 }, { "epoch": 3.6611103932463283, "learning_rate": 7.944364239694886e-07, "loss": 3.7466, "step": 85000 }, { "epoch": 3.6619718309859155, "learning_rate": 7.943879419940097e-07, "loss": 3.9504, "step": 85020 }, { "epoch": 3.6628332687255027, "learning_rate": 7.943394600185308e-07, "loss": 4.1651, "step": 85040 }, { "epoch": 3.6636947064650904, "learning_rate": 7.94290978043052e-07, "loss": 4.0615, "step": 85060 }, { "epoch": 3.6645561442046777, "learning_rate": 7.942424960675731e-07, "loss": 3.938, "step": 85080 }, { "epoch": 3.665417581944265, "learning_rate": 7.941940140920942e-07, "loss": 3.7943, "step": 85100 }, { "epoch": 3.666279019683852, "learning_rate": 7.941455321166153e-07, "loss": 4.0416, "step": 85120 }, { "epoch": 3.66714045742344, "learning_rate": 7.940970501411364e-07, "loss": 3.9876, "step": 85140 }, { "epoch": 3.668001895163027, "learning_rate": 7.940485681656574e-07, "loss": 3.9365, "step": 85160 }, { "epoch": 3.6688633329026143, "learning_rate": 7.940000861901785e-07, "loss": 3.9608, "step": 85180 }, { "epoch": 3.669724770642202, "learning_rate": 7.939516042146996e-07, "loss": 3.8254, "step": 85200 }, { "epoch": 3.6705862083817893, "learning_rate": 7.939031222392209e-07, "loss": 3.8515, "step": 85220 }, { "epoch": 3.6714476461213765, "learning_rate": 7.93854640263742e-07, "loss": 4.2451, "step": 85240 }, { "epoch": 3.6723090838609638, "learning_rate": 7.938061582882629e-07, "loss": 3.9635, "step": 85260 }, { "epoch": 3.6731705216005515, "learning_rate": 7.937576763127841e-07, "loss": 4.0323, "step": 85280 }, { "epoch": 3.6740319593401387, "learning_rate": 7.937091943373054e-07, "loss": 3.9351, "step": 85300 }, { "epoch": 3.674893397079726, "learning_rate": 7.936607123618263e-07, "loss": 4.14, "step": 85320 }, { "epoch": 3.6757548348193136, "learning_rate": 7.936122303863474e-07, "loss": 3.8565, "step": 85340 }, { "epoch": 3.676616272558901, "learning_rate": 7.935637484108686e-07, "loss": 3.9187, "step": 85360 }, { "epoch": 3.677477710298488, "learning_rate": 7.935152664353897e-07, "loss": 4.1863, "step": 85380 }, { "epoch": 3.6783391480380754, "learning_rate": 7.934667844599107e-07, "loss": 4.061, "step": 85400 }, { "epoch": 3.679200585777663, "learning_rate": 7.934183024844318e-07, "loss": 3.716, "step": 85420 }, { "epoch": 3.6800620235172503, "learning_rate": 7.93369820508953e-07, "loss": 4.04, "step": 85440 }, { "epoch": 3.6809234612568376, "learning_rate": 7.93321338533474e-07, "loss": 3.9894, "step": 85460 }, { "epoch": 3.6817848989964252, "learning_rate": 7.932728565579952e-07, "loss": 3.9257, "step": 85480 }, { "epoch": 3.6826463367360125, "learning_rate": 7.932243745825163e-07, "loss": 4.038, "step": 85500 }, { "epoch": 3.6835077744755997, "learning_rate": 7.931758926070374e-07, "loss": 4.2045, "step": 85520 }, { "epoch": 3.684369212215187, "learning_rate": 7.931274106315584e-07, "loss": 3.9042, "step": 85540 }, { "epoch": 3.6852306499547747, "learning_rate": 7.930789286560796e-07, "loss": 4.0177, "step": 85560 }, { "epoch": 3.686092087694362, "learning_rate": 7.930304466806007e-07, "loss": 3.7278, "step": 85580 }, { "epoch": 3.686953525433949, "learning_rate": 7.929819647051219e-07, "loss": 3.944, "step": 85600 }, { "epoch": 3.687814963173537, "learning_rate": 7.929334827296429e-07, "loss": 3.8217, "step": 85620 }, { "epoch": 3.688676400913124, "learning_rate": 7.92885000754164e-07, "loss": 3.9619, "step": 85640 }, { "epoch": 3.6895378386527113, "learning_rate": 7.928365187786851e-07, "loss": 3.9583, "step": 85660 }, { "epoch": 3.6903992763922986, "learning_rate": 7.927880368032063e-07, "loss": 3.8675, "step": 85680 }, { "epoch": 3.6912607141318863, "learning_rate": 7.927395548277273e-07, "loss": 4.0699, "step": 85700 }, { "epoch": 3.6921221518714735, "learning_rate": 7.926910728522484e-07, "loss": 4.1263, "step": 85720 }, { "epoch": 3.6929835896110608, "learning_rate": 7.926425908767696e-07, "loss": 3.924, "step": 85740 }, { "epoch": 3.6938450273506485, "learning_rate": 7.925941089012907e-07, "loss": 4.0332, "step": 85760 }, { "epoch": 3.6947064650902357, "learning_rate": 7.925456269258117e-07, "loss": 3.9763, "step": 85780 }, { "epoch": 3.695567902829823, "learning_rate": 7.924971449503328e-07, "loss": 3.9511, "step": 85800 }, { "epoch": 3.69642934056941, "learning_rate": 7.92448662974854e-07, "loss": 3.9879, "step": 85820 }, { "epoch": 3.697290778308998, "learning_rate": 7.924001809993752e-07, "loss": 3.9871, "step": 85840 }, { "epoch": 3.698152216048585, "learning_rate": 7.923516990238962e-07, "loss": 4.0096, "step": 85860 }, { "epoch": 3.6990136537881724, "learning_rate": 7.923032170484173e-07, "loss": 3.8202, "step": 85880 }, { "epoch": 3.69987509152776, "learning_rate": 7.922547350729384e-07, "loss": 3.9721, "step": 85900 }, { "epoch": 3.7007365292673473, "learning_rate": 7.922062530974595e-07, "loss": 3.9915, "step": 85920 }, { "epoch": 3.7015979670069346, "learning_rate": 7.921577711219806e-07, "loss": 4.0234, "step": 85940 }, { "epoch": 3.702459404746522, "learning_rate": 7.921092891465017e-07, "loss": 3.8978, "step": 85960 }, { "epoch": 3.703320842486109, "learning_rate": 7.920608071710229e-07, "loss": 3.907, "step": 85980 }, { "epoch": 3.7041822802256967, "learning_rate": 7.920123251955439e-07, "loss": 4.0054, "step": 86000 }, { "epoch": 3.705043717965284, "learning_rate": 7.91963843220065e-07, "loss": 4.0066, "step": 86020 }, { "epoch": 3.7059051557048717, "learning_rate": 7.91915361244586e-07, "loss": 3.9255, "step": 86040 }, { "epoch": 3.706766593444459, "learning_rate": 7.918668792691073e-07, "loss": 3.7505, "step": 86060 }, { "epoch": 3.707628031184046, "learning_rate": 7.918183972936283e-07, "loss": 3.9397, "step": 86080 }, { "epoch": 3.7084894689236334, "learning_rate": 7.917699153181495e-07, "loss": 3.9661, "step": 86100 }, { "epoch": 3.7093509066632206, "learning_rate": 7.917214333426706e-07, "loss": 4.1445, "step": 86120 }, { "epoch": 3.7102123444028083, "learning_rate": 7.916729513671916e-07, "loss": 3.8973, "step": 86140 }, { "epoch": 3.7110737821423956, "learning_rate": 7.916244693917127e-07, "loss": 3.9122, "step": 86160 }, { "epoch": 3.7119352198819833, "learning_rate": 7.915759874162339e-07, "loss": 4.002, "step": 86180 }, { "epoch": 3.7127966576215705, "learning_rate": 7.91527505440755e-07, "loss": 3.9136, "step": 86200 }, { "epoch": 3.7136580953611578, "learning_rate": 7.914790234652761e-07, "loss": 3.8946, "step": 86220 }, { "epoch": 3.714519533100745, "learning_rate": 7.914305414897972e-07, "loss": 3.9535, "step": 86240 }, { "epoch": 3.7153809708403323, "learning_rate": 7.913820595143183e-07, "loss": 3.8481, "step": 86260 }, { "epoch": 3.71624240857992, "learning_rate": 7.913335775388394e-07, "loss": 4.0469, "step": 86280 }, { "epoch": 3.717103846319507, "learning_rate": 7.912850955633605e-07, "loss": 3.9189, "step": 86300 }, { "epoch": 3.717965284059095, "learning_rate": 7.912366135878816e-07, "loss": 3.9028, "step": 86320 }, { "epoch": 3.718826721798682, "learning_rate": 7.911881316124027e-07, "loss": 3.9573, "step": 86340 }, { "epoch": 3.7196881595382694, "learning_rate": 7.911396496369239e-07, "loss": 3.8091, "step": 86360 }, { "epoch": 3.7205495972778566, "learning_rate": 7.91091167661445e-07, "loss": 3.5977, "step": 86380 }, { "epoch": 3.721411035017444, "learning_rate": 7.91042685685966e-07, "loss": 3.75, "step": 86400 }, { "epoch": 3.7222724727570315, "learning_rate": 7.909942037104871e-07, "loss": 4.0414, "step": 86420 }, { "epoch": 3.723133910496619, "learning_rate": 7.909457217350082e-07, "loss": 3.8814, "step": 86440 }, { "epoch": 3.7239953482362065, "learning_rate": 7.908972397595294e-07, "loss": 3.775, "step": 86460 }, { "epoch": 3.7248567859757937, "learning_rate": 7.908487577840505e-07, "loss": 3.9186, "step": 86480 }, { "epoch": 3.725718223715381, "learning_rate": 7.908002758085716e-07, "loss": 4.0321, "step": 86500 }, { "epoch": 3.726579661454968, "learning_rate": 7.907517938330927e-07, "loss": 3.9766, "step": 86520 }, { "epoch": 3.7274410991945555, "learning_rate": 7.907033118576137e-07, "loss": 3.8738, "step": 86540 }, { "epoch": 3.728302536934143, "learning_rate": 7.90654829882135e-07, "loss": 3.8979, "step": 86560 }, { "epoch": 3.7291639746737304, "learning_rate": 7.90606347906656e-07, "loss": 3.818, "step": 86580 }, { "epoch": 3.730025412413318, "learning_rate": 7.905578659311771e-07, "loss": 4.043, "step": 86600 }, { "epoch": 3.7308868501529053, "learning_rate": 7.905093839556982e-07, "loss": 3.9054, "step": 86620 }, { "epoch": 3.7317482878924926, "learning_rate": 7.904609019802194e-07, "loss": 3.9729, "step": 86640 }, { "epoch": 3.73260972563208, "learning_rate": 7.904124200047404e-07, "loss": 3.903, "step": 86660 }, { "epoch": 3.733471163371667, "learning_rate": 7.903639380292615e-07, "loss": 3.9281, "step": 86680 }, { "epoch": 3.7343326011112548, "learning_rate": 7.903154560537826e-07, "loss": 3.9402, "step": 86700 }, { "epoch": 3.735194038850842, "learning_rate": 7.902669740783038e-07, "loss": 3.896, "step": 86720 }, { "epoch": 3.7360554765904297, "learning_rate": 7.902184921028249e-07, "loss": 4.0376, "step": 86740 }, { "epoch": 3.736916914330017, "learning_rate": 7.90170010127346e-07, "loss": 3.6785, "step": 86760 }, { "epoch": 3.737778352069604, "learning_rate": 7.90121528151867e-07, "loss": 3.961, "step": 86780 }, { "epoch": 3.7386397898091914, "learning_rate": 7.900730461763881e-07, "loss": 4.054, "step": 86800 }, { "epoch": 3.7395012275487787, "learning_rate": 7.900245642009092e-07, "loss": 3.9651, "step": 86820 }, { "epoch": 3.7403626652883664, "learning_rate": 7.899760822254304e-07, "loss": 4.0509, "step": 86840 }, { "epoch": 3.7412241030279536, "learning_rate": 7.899276002499515e-07, "loss": 3.9688, "step": 86860 }, { "epoch": 3.7420855407675413, "learning_rate": 7.898791182744726e-07, "loss": 4.0626, "step": 86880 }, { "epoch": 3.7429469785071285, "learning_rate": 7.898306362989937e-07, "loss": 3.946, "step": 86900 }, { "epoch": 3.743808416246716, "learning_rate": 7.897821543235148e-07, "loss": 3.9239, "step": 86920 }, { "epoch": 3.744669853986303, "learning_rate": 7.897336723480359e-07, "loss": 3.8131, "step": 86940 }, { "epoch": 3.7455312917258903, "learning_rate": 7.89685190372557e-07, "loss": 3.8613, "step": 86960 }, { "epoch": 3.746392729465478, "learning_rate": 7.89636708397078e-07, "loss": 3.9029, "step": 86980 }, { "epoch": 3.747254167205065, "learning_rate": 7.895882264215993e-07, "loss": 3.8923, "step": 87000 }, { "epoch": 3.748115604944653, "learning_rate": 7.895397444461204e-07, "loss": 3.7386, "step": 87020 }, { "epoch": 3.74897704268424, "learning_rate": 7.894912624706413e-07, "loss": 3.8649, "step": 87040 }, { "epoch": 3.7498384804238274, "learning_rate": 7.894427804951625e-07, "loss": 3.9059, "step": 87060 }, { "epoch": 3.7506999181634146, "learning_rate": 7.893942985196836e-07, "loss": 3.8373, "step": 87080 }, { "epoch": 3.751561355903002, "learning_rate": 7.893458165442048e-07, "loss": 4.1028, "step": 87100 }, { "epoch": 3.7524227936425896, "learning_rate": 7.892973345687258e-07, "loss": 3.7742, "step": 87120 }, { "epoch": 3.753284231382177, "learning_rate": 7.89248852593247e-07, "loss": 3.928, "step": 87140 }, { "epoch": 3.754145669121764, "learning_rate": 7.892003706177681e-07, "loss": 3.9705, "step": 87160 }, { "epoch": 3.7550071068613518, "learning_rate": 7.891518886422892e-07, "loss": 3.8655, "step": 87180 }, { "epoch": 3.755868544600939, "learning_rate": 7.891034066668102e-07, "loss": 3.8469, "step": 87200 }, { "epoch": 3.7567299823405262, "learning_rate": 7.890549246913314e-07, "loss": 4.0724, "step": 87220 }, { "epoch": 3.7575914200801135, "learning_rate": 7.890064427158525e-07, "loss": 4.0248, "step": 87240 }, { "epoch": 3.758452857819701, "learning_rate": 7.889579607403737e-07, "loss": 3.7624, "step": 87260 }, { "epoch": 3.7593142955592884, "learning_rate": 7.889094787648947e-07, "loss": 3.872, "step": 87280 }, { "epoch": 3.7601757332988757, "learning_rate": 7.888609967894158e-07, "loss": 3.9052, "step": 87300 }, { "epoch": 3.7610371710384634, "learning_rate": 7.888125148139369e-07, "loss": 4.0138, "step": 87320 }, { "epoch": 3.7618986087780506, "learning_rate": 7.88764032838458e-07, "loss": 3.5288, "step": 87340 }, { "epoch": 3.762760046517638, "learning_rate": 7.887155508629791e-07, "loss": 3.7521, "step": 87360 }, { "epoch": 3.763621484257225, "learning_rate": 7.886670688875003e-07, "loss": 4.0931, "step": 87380 }, { "epoch": 3.764482921996813, "learning_rate": 7.886185869120214e-07, "loss": 3.9274, "step": 87400 }, { "epoch": 3.7653443597364, "learning_rate": 7.885701049365423e-07, "loss": 3.9735, "step": 87420 }, { "epoch": 3.7662057974759873, "learning_rate": 7.885216229610635e-07, "loss": 4.0142, "step": 87440 }, { "epoch": 3.767067235215575, "learning_rate": 7.884731409855847e-07, "loss": 3.7194, "step": 87460 }, { "epoch": 3.767928672955162, "learning_rate": 7.884246590101058e-07, "loss": 3.8846, "step": 87480 }, { "epoch": 3.7687901106947495, "learning_rate": 7.883761770346268e-07, "loss": 3.9781, "step": 87500 }, { "epoch": 3.7696515484343367, "learning_rate": 7.88327695059148e-07, "loss": 3.6471, "step": 87520 }, { "epoch": 3.7705129861739244, "learning_rate": 7.882792130836691e-07, "loss": 4.0283, "step": 87540 }, { "epoch": 3.7713744239135116, "learning_rate": 7.882307311081902e-07, "loss": 3.8996, "step": 87560 }, { "epoch": 3.772235861653099, "learning_rate": 7.881822491327112e-07, "loss": 3.6923, "step": 87580 }, { "epoch": 3.7730972993926866, "learning_rate": 7.881337671572324e-07, "loss": 3.7923, "step": 87600 }, { "epoch": 3.773958737132274, "learning_rate": 7.880852851817535e-07, "loss": 4.0788, "step": 87620 }, { "epoch": 3.774820174871861, "learning_rate": 7.880368032062747e-07, "loss": 4.1749, "step": 87640 }, { "epoch": 3.7756816126114483, "learning_rate": 7.879883212307957e-07, "loss": 3.9226, "step": 87660 }, { "epoch": 3.776543050351036, "learning_rate": 7.879398392553168e-07, "loss": 3.8808, "step": 87680 }, { "epoch": 3.7774044880906232, "learning_rate": 7.878913572798379e-07, "loss": 3.9336, "step": 87700 }, { "epoch": 3.7782659258302105, "learning_rate": 7.87842875304359e-07, "loss": 3.9363, "step": 87720 }, { "epoch": 3.779127363569798, "learning_rate": 7.877943933288801e-07, "loss": 3.7732, "step": 87740 }, { "epoch": 3.7799888013093854, "learning_rate": 7.877459113534013e-07, "loss": 3.9161, "step": 87760 }, { "epoch": 3.7808502390489727, "learning_rate": 7.876974293779224e-07, "loss": 3.738, "step": 87780 }, { "epoch": 3.78171167678856, "learning_rate": 7.876489474024434e-07, "loss": 3.8053, "step": 87800 }, { "epoch": 3.7825731145281476, "learning_rate": 7.876004654269644e-07, "loss": 3.9258, "step": 87820 }, { "epoch": 3.783434552267735, "learning_rate": 7.875519834514857e-07, "loss": 3.8916, "step": 87840 }, { "epoch": 3.784295990007322, "learning_rate": 7.875035014760068e-07, "loss": 3.6987, "step": 87860 }, { "epoch": 3.7851574277469098, "learning_rate": 7.874550195005278e-07, "loss": 3.9392, "step": 87880 }, { "epoch": 3.786018865486497, "learning_rate": 7.87406537525049e-07, "loss": 3.8059, "step": 87900 }, { "epoch": 3.7868803032260843, "learning_rate": 7.873580555495701e-07, "loss": 3.7662, "step": 87920 }, { "epoch": 3.7877417409656715, "learning_rate": 7.873095735740912e-07, "loss": 3.9632, "step": 87940 }, { "epoch": 3.788603178705259, "learning_rate": 7.872610915986122e-07, "loss": 3.9151, "step": 87960 }, { "epoch": 3.7894646164448464, "learning_rate": 7.872126096231334e-07, "loss": 3.8365, "step": 87980 }, { "epoch": 3.7903260541844337, "learning_rate": 7.871641276476546e-07, "loss": 3.7737, "step": 88000 }, { "epoch": 3.7911874919240214, "learning_rate": 7.871156456721756e-07, "loss": 3.7817, "step": 88020 }, { "epoch": 3.7920489296636086, "learning_rate": 7.870671636966967e-07, "loss": 3.7157, "step": 88040 }, { "epoch": 3.792910367403196, "learning_rate": 7.870186817212178e-07, "loss": 3.7524, "step": 88060 }, { "epoch": 3.793771805142783, "learning_rate": 7.86970199745739e-07, "loss": 3.8023, "step": 88080 }, { "epoch": 3.794633242882371, "learning_rate": 7.8692171777026e-07, "loss": 3.7815, "step": 88100 }, { "epoch": 3.795494680621958, "learning_rate": 7.868732357947811e-07, "loss": 4.0132, "step": 88120 }, { "epoch": 3.7963561183615453, "learning_rate": 7.868247538193023e-07, "loss": 3.9462, "step": 88140 }, { "epoch": 3.797217556101133, "learning_rate": 7.867762718438234e-07, "loss": 3.7257, "step": 88160 }, { "epoch": 3.7980789938407202, "learning_rate": 7.867277898683444e-07, "loss": 3.9017, "step": 88180 }, { "epoch": 3.7989404315803075, "learning_rate": 7.866793078928655e-07, "loss": 3.7896, "step": 88200 }, { "epoch": 3.7998018693198947, "learning_rate": 7.866308259173867e-07, "loss": 3.5694, "step": 88220 }, { "epoch": 3.8006633070594824, "learning_rate": 7.865823439419076e-07, "loss": 3.8739, "step": 88240 }, { "epoch": 3.8015247447990697, "learning_rate": 7.865338619664289e-07, "loss": 3.9219, "step": 88260 }, { "epoch": 3.802386182538657, "learning_rate": 7.8648537999095e-07, "loss": 4.0126, "step": 88280 }, { "epoch": 3.8032476202782446, "learning_rate": 7.864368980154712e-07, "loss": 3.8891, "step": 88300 }, { "epoch": 3.804109058017832, "learning_rate": 7.863884160399921e-07, "loss": 3.9425, "step": 88320 }, { "epoch": 3.804970495757419, "learning_rate": 7.863399340645134e-07, "loss": 4.0016, "step": 88340 }, { "epoch": 3.8058319334970063, "learning_rate": 7.862914520890344e-07, "loss": 3.8384, "step": 88360 }, { "epoch": 3.806693371236594, "learning_rate": 7.862429701135556e-07, "loss": 4.0418, "step": 88380 }, { "epoch": 3.8075548089761813, "learning_rate": 7.861944881380766e-07, "loss": 3.6954, "step": 88400 }, { "epoch": 3.8084162467157685, "learning_rate": 7.861460061625977e-07, "loss": 3.8187, "step": 88420 }, { "epoch": 3.809277684455356, "learning_rate": 7.860975241871188e-07, "loss": 3.8867, "step": 88440 }, { "epoch": 3.8101391221949434, "learning_rate": 7.8604904221164e-07, "loss": 3.9452, "step": 88460 }, { "epoch": 3.8110005599345307, "learning_rate": 7.86000560236161e-07, "loss": 3.8708, "step": 88480 }, { "epoch": 3.811861997674118, "learning_rate": 7.859520782606821e-07, "loss": 3.834, "step": 88500 }, { "epoch": 3.8127234354137056, "learning_rate": 7.859035962852033e-07, "loss": 3.9532, "step": 88520 }, { "epoch": 3.813584873153293, "learning_rate": 7.858551143097245e-07, "loss": 3.7738, "step": 88540 }, { "epoch": 3.81444631089288, "learning_rate": 7.858066323342454e-07, "loss": 3.7843, "step": 88560 }, { "epoch": 3.815307748632468, "learning_rate": 7.857581503587665e-07, "loss": 3.7998, "step": 88580 }, { "epoch": 3.816169186372055, "learning_rate": 7.857096683832877e-07, "loss": 3.6708, "step": 88600 }, { "epoch": 3.8170306241116423, "learning_rate": 7.856611864078089e-07, "loss": 4.0309, "step": 88620 }, { "epoch": 3.8178920618512295, "learning_rate": 7.856127044323299e-07, "loss": 3.959, "step": 88640 }, { "epoch": 3.8187534995908172, "learning_rate": 7.85564222456851e-07, "loss": 3.9605, "step": 88660 }, { "epoch": 3.8196149373304045, "learning_rate": 7.855157404813722e-07, "loss": 3.8793, "step": 88680 }, { "epoch": 3.8204763750699917, "learning_rate": 7.854672585058931e-07, "loss": 3.8449, "step": 88700 }, { "epoch": 3.8213378128095794, "learning_rate": 7.854187765304143e-07, "loss": 3.7767, "step": 88720 }, { "epoch": 3.8221992505491666, "learning_rate": 7.853702945549354e-07, "loss": 3.8814, "step": 88740 }, { "epoch": 3.823060688288754, "learning_rate": 7.853218125794565e-07, "loss": 4.0609, "step": 88760 }, { "epoch": 3.823922126028341, "learning_rate": 7.852733306039776e-07, "loss": 3.9216, "step": 88780 }, { "epoch": 3.824783563767929, "learning_rate": 7.852248486284988e-07, "loss": 3.6541, "step": 88800 }, { "epoch": 3.825645001507516, "learning_rate": 7.851763666530198e-07, "loss": 3.8322, "step": 88820 }, { "epoch": 3.8265064392471033, "learning_rate": 7.85127884677541e-07, "loss": 3.7899, "step": 88840 }, { "epoch": 3.827367876986691, "learning_rate": 7.85079402702062e-07, "loss": 3.7304, "step": 88860 }, { "epoch": 3.8282293147262783, "learning_rate": 7.850309207265832e-07, "loss": 3.9979, "step": 88880 }, { "epoch": 3.8290907524658655, "learning_rate": 7.849824387511043e-07, "loss": 3.8461, "step": 88900 }, { "epoch": 3.8299521902054527, "learning_rate": 7.849339567756255e-07, "loss": 3.843, "step": 88920 }, { "epoch": 3.8308136279450404, "learning_rate": 7.848854748001465e-07, "loss": 3.9031, "step": 88940 }, { "epoch": 3.8316750656846277, "learning_rate": 7.848369928246675e-07, "loss": 3.9929, "step": 88960 }, { "epoch": 3.832536503424215, "learning_rate": 7.847885108491887e-07, "loss": 3.7308, "step": 88980 }, { "epoch": 3.8333979411638026, "learning_rate": 7.847400288737098e-07, "loss": 3.9517, "step": 89000 }, { "epoch": 3.83425937890339, "learning_rate": 7.846915468982309e-07, "loss": 3.6881, "step": 89020 }, { "epoch": 3.835120816642977, "learning_rate": 7.84643064922752e-07, "loss": 3.919, "step": 89040 }, { "epoch": 3.8359822543825643, "learning_rate": 7.845945829472732e-07, "loss": 4.0234, "step": 89060 }, { "epoch": 3.8368436921221516, "learning_rate": 7.845461009717942e-07, "loss": 3.8104, "step": 89080 }, { "epoch": 3.8377051298617393, "learning_rate": 7.844976189963153e-07, "loss": 3.9132, "step": 89100 }, { "epoch": 3.8385665676013265, "learning_rate": 7.844491370208364e-07, "loss": 3.9795, "step": 89120 }, { "epoch": 3.839428005340914, "learning_rate": 7.844006550453576e-07, "loss": 3.9139, "step": 89140 }, { "epoch": 3.8402894430805015, "learning_rate": 7.843521730698787e-07, "loss": 4.0388, "step": 89160 }, { "epoch": 3.8411508808200887, "learning_rate": 7.843036910943998e-07, "loss": 3.8791, "step": 89180 }, { "epoch": 3.842012318559676, "learning_rate": 7.842552091189208e-07, "loss": 3.9315, "step": 89200 }, { "epoch": 3.842873756299263, "learning_rate": 7.84206727143442e-07, "loss": 3.7865, "step": 89220 }, { "epoch": 3.843735194038851, "learning_rate": 7.84158245167963e-07, "loss": 3.8102, "step": 89240 }, { "epoch": 3.844596631778438, "learning_rate": 7.841097631924842e-07, "loss": 3.8697, "step": 89260 }, { "epoch": 3.845458069518026, "learning_rate": 7.840612812170053e-07, "loss": 3.7959, "step": 89280 }, { "epoch": 3.846319507257613, "learning_rate": 7.840127992415264e-07, "loss": 4.142, "step": 89300 }, { "epoch": 3.8471809449972003, "learning_rate": 7.839643172660475e-07, "loss": 3.6295, "step": 89320 }, { "epoch": 3.8480423827367876, "learning_rate": 7.839158352905686e-07, "loss": 3.7501, "step": 89340 }, { "epoch": 3.848903820476375, "learning_rate": 7.838673533150897e-07, "loss": 4.1259, "step": 89360 }, { "epoch": 3.8497652582159625, "learning_rate": 7.838188713396108e-07, "loss": 3.7878, "step": 89380 }, { "epoch": 3.8506266959555497, "learning_rate": 7.837703893641319e-07, "loss": 3.8972, "step": 89400 }, { "epoch": 3.8514881336951374, "learning_rate": 7.837219073886531e-07, "loss": 3.8955, "step": 89420 }, { "epoch": 3.8523495714347247, "learning_rate": 7.836734254131742e-07, "loss": 3.7758, "step": 89440 }, { "epoch": 3.853211009174312, "learning_rate": 7.836249434376952e-07, "loss": 3.7494, "step": 89460 }, { "epoch": 3.854072446913899, "learning_rate": 7.835764614622163e-07, "loss": 3.8453, "step": 89480 }, { "epoch": 3.8549338846534864, "learning_rate": 7.835279794867374e-07, "loss": 3.7828, "step": 89500 }, { "epoch": 3.855795322393074, "learning_rate": 7.834794975112586e-07, "loss": 3.6256, "step": 89520 }, { "epoch": 3.8566567601326613, "learning_rate": 7.834310155357797e-07, "loss": 3.9478, "step": 89540 }, { "epoch": 3.857518197872249, "learning_rate": 7.833825335603008e-07, "loss": 3.8812, "step": 89560 }, { "epoch": 3.8583796356118363, "learning_rate": 7.833340515848219e-07, "loss": 3.8641, "step": 89580 }, { "epoch": 3.8592410733514235, "learning_rate": 7.832855696093428e-07, "loss": 3.7482, "step": 89600 }, { "epoch": 3.8601025110910108, "learning_rate": 7.832370876338641e-07, "loss": 3.9785, "step": 89620 }, { "epoch": 3.860963948830598, "learning_rate": 7.831886056583852e-07, "loss": 3.7922, "step": 89640 }, { "epoch": 3.8618253865701857, "learning_rate": 7.831401236829063e-07, "loss": 3.7549, "step": 89660 }, { "epoch": 3.862686824309773, "learning_rate": 7.830916417074274e-07, "loss": 3.8124, "step": 89680 }, { "epoch": 3.8635482620493606, "learning_rate": 7.830431597319485e-07, "loss": 3.6733, "step": 89700 }, { "epoch": 3.864409699788948, "learning_rate": 7.829946777564696e-07, "loss": 3.85, "step": 89720 }, { "epoch": 3.865271137528535, "learning_rate": 7.829461957809907e-07, "loss": 3.8004, "step": 89740 }, { "epoch": 3.8661325752681224, "learning_rate": 7.828977138055118e-07, "loss": 3.9266, "step": 89760 }, { "epoch": 3.8669940130077096, "learning_rate": 7.828492318300329e-07, "loss": 3.7967, "step": 89780 }, { "epoch": 3.8678554507472973, "learning_rate": 7.828007498545541e-07, "loss": 3.8047, "step": 89800 }, { "epoch": 3.8687168884868846, "learning_rate": 7.827522678790752e-07, "loss": 3.9963, "step": 89820 }, { "epoch": 3.8695783262264722, "learning_rate": 7.827037859035962e-07, "loss": 4.0488, "step": 89840 }, { "epoch": 3.8704397639660595, "learning_rate": 7.826553039281173e-07, "loss": 3.8202, "step": 89860 }, { "epoch": 3.8713012017056467, "learning_rate": 7.826068219526385e-07, "loss": 3.8694, "step": 89880 }, { "epoch": 3.872162639445234, "learning_rate": 7.825583399771595e-07, "loss": 3.8098, "step": 89900 }, { "epoch": 3.8730240771848212, "learning_rate": 7.825098580016807e-07, "loss": 3.7308, "step": 89920 }, { "epoch": 3.873885514924409, "learning_rate": 7.824613760262018e-07, "loss": 3.7651, "step": 89940 }, { "epoch": 3.874746952663996, "learning_rate": 7.824128940507229e-07, "loss": 3.9445, "step": 89960 }, { "epoch": 3.875608390403584, "learning_rate": 7.823644120752439e-07, "loss": 3.6094, "step": 89980 }, { "epoch": 3.876469828143171, "learning_rate": 7.823159300997651e-07, "loss": 3.7239, "step": 90000 }, { "epoch": 3.8773312658827583, "learning_rate": 7.822674481242861e-07, "loss": 3.8739, "step": 90020 }, { "epoch": 3.8781927036223456, "learning_rate": 7.822189661488073e-07, "loss": 3.4923, "step": 90040 }, { "epoch": 3.879054141361933, "learning_rate": 7.821704841733284e-07, "loss": 3.7335, "step": 90060 }, { "epoch": 3.8799155791015205, "learning_rate": 7.821220021978496e-07, "loss": 3.8102, "step": 90080 }, { "epoch": 3.8807770168411078, "learning_rate": 7.820735202223706e-07, "loss": 3.7155, "step": 90100 }, { "epoch": 3.8816384545806955, "learning_rate": 7.820250382468916e-07, "loss": 3.7802, "step": 90120 }, { "epoch": 3.8824998923202827, "learning_rate": 7.819765562714128e-07, "loss": 3.7528, "step": 90140 }, { "epoch": 3.88336133005987, "learning_rate": 7.81928074295934e-07, "loss": 3.9415, "step": 90160 }, { "epoch": 3.884222767799457, "learning_rate": 7.818795923204551e-07, "loss": 3.947, "step": 90180 }, { "epoch": 3.8850842055390444, "learning_rate": 7.818311103449761e-07, "loss": 3.639, "step": 90200 }, { "epoch": 3.885945643278632, "learning_rate": 7.817826283694972e-07, "loss": 4.0083, "step": 90220 }, { "epoch": 3.8868070810182194, "learning_rate": 7.817341463940184e-07, "loss": 3.925, "step": 90240 }, { "epoch": 3.887668518757807, "learning_rate": 7.816856644185395e-07, "loss": 3.8864, "step": 90260 }, { "epoch": 3.8885299564973943, "learning_rate": 7.816371824430605e-07, "loss": 3.8101, "step": 90280 }, { "epoch": 3.8893913942369815, "learning_rate": 7.815887004675817e-07, "loss": 3.9873, "step": 90300 }, { "epoch": 3.890252831976569, "learning_rate": 7.815402184921028e-07, "loss": 3.7457, "step": 90320 }, { "epoch": 3.891114269716156, "learning_rate": 7.814917365166239e-07, "loss": 3.925, "step": 90340 }, { "epoch": 3.8919757074557437, "learning_rate": 7.814432545411449e-07, "loss": 3.6213, "step": 90360 }, { "epoch": 3.892837145195331, "learning_rate": 7.813947725656661e-07, "loss": 3.6758, "step": 90380 }, { "epoch": 3.893698582934918, "learning_rate": 7.813462905901872e-07, "loss": 3.7121, "step": 90400 }, { "epoch": 3.894560020674506, "learning_rate": 7.812978086147084e-07, "loss": 3.8724, "step": 90420 }, { "epoch": 3.895421458414093, "learning_rate": 7.812493266392294e-07, "loss": 3.8405, "step": 90440 }, { "epoch": 3.8962828961536804, "learning_rate": 7.812008446637506e-07, "loss": 3.8385, "step": 90460 }, { "epoch": 3.8971443338932676, "learning_rate": 7.811523626882716e-07, "loss": 3.7286, "step": 90480 }, { "epoch": 3.8980057716328553, "learning_rate": 7.811038807127927e-07, "loss": 3.7813, "step": 90500 }, { "epoch": 3.8988672093724426, "learning_rate": 7.810553987373138e-07, "loss": 3.8708, "step": 90520 }, { "epoch": 3.89972864711203, "learning_rate": 7.810069167618349e-07, "loss": 3.9136, "step": 90540 }, { "epoch": 3.9005900848516175, "learning_rate": 7.809584347863561e-07, "loss": 3.8608, "step": 90560 }, { "epoch": 3.9014515225912048, "learning_rate": 7.809099528108771e-07, "loss": 3.8121, "step": 90580 }, { "epoch": 3.902312960330792, "learning_rate": 7.808614708353982e-07, "loss": 3.8127, "step": 90600 }, { "epoch": 3.9031743980703792, "learning_rate": 7.808129888599194e-07, "loss": 3.7044, "step": 90620 }, { "epoch": 3.904035835809967, "learning_rate": 7.807645068844405e-07, "loss": 3.7509, "step": 90640 }, { "epoch": 3.904897273549554, "learning_rate": 7.807160249089615e-07, "loss": 3.7746, "step": 90660 }, { "epoch": 3.9057587112891414, "learning_rate": 7.806675429334827e-07, "loss": 3.7436, "step": 90680 }, { "epoch": 3.906620149028729, "learning_rate": 7.806190609580039e-07, "loss": 3.6755, "step": 90700 }, { "epoch": 3.9074815867683164, "learning_rate": 7.80570578982525e-07, "loss": 3.7125, "step": 90720 }, { "epoch": 3.9083430245079036, "learning_rate": 7.805220970070459e-07, "loss": 3.9075, "step": 90740 }, { "epoch": 3.909204462247491, "learning_rate": 7.804736150315671e-07, "loss": 3.9323, "step": 90760 }, { "epoch": 3.9100658999870785, "learning_rate": 7.804251330560883e-07, "loss": 3.6171, "step": 90780 }, { "epoch": 3.910927337726666, "learning_rate": 7.803766510806094e-07, "loss": 3.712, "step": 90800 }, { "epoch": 3.911788775466253, "learning_rate": 7.803281691051304e-07, "loss": 3.8687, "step": 90820 }, { "epoch": 3.9126502132058407, "learning_rate": 7.802796871296516e-07, "loss": 3.7504, "step": 90840 }, { "epoch": 3.913511650945428, "learning_rate": 7.802312051541725e-07, "loss": 3.818, "step": 90860 }, { "epoch": 3.914373088685015, "learning_rate": 7.801827231786937e-07, "loss": 3.8207, "step": 90880 }, { "epoch": 3.9152345264246025, "learning_rate": 7.801342412032148e-07, "loss": 3.8593, "step": 90900 }, { "epoch": 3.91609596416419, "learning_rate": 7.80085759227736e-07, "loss": 3.8238, "step": 90920 }, { "epoch": 3.9169574019037774, "learning_rate": 7.800372772522571e-07, "loss": 3.8924, "step": 90940 }, { "epoch": 3.9178188396433646, "learning_rate": 7.799887952767782e-07, "loss": 3.7231, "step": 90960 }, { "epoch": 3.9186802773829523, "learning_rate": 7.799403133012992e-07, "loss": 3.8398, "step": 90980 }, { "epoch": 3.9195417151225396, "learning_rate": 7.798918313258204e-07, "loss": 3.6378, "step": 91000 }, { "epoch": 3.920403152862127, "learning_rate": 7.798433493503415e-07, "loss": 4.0435, "step": 91020 }, { "epoch": 3.921264590601714, "learning_rate": 7.797948673748626e-07, "loss": 3.9161, "step": 91040 }, { "epoch": 3.9221260283413018, "learning_rate": 7.797463853993837e-07, "loss": 3.929, "step": 91060 }, { "epoch": 3.922987466080889, "learning_rate": 7.796979034239049e-07, "loss": 3.8256, "step": 91080 }, { "epoch": 3.9238489038204762, "learning_rate": 7.79649421448426e-07, "loss": 3.7158, "step": 91100 }, { "epoch": 3.924710341560064, "learning_rate": 7.796009394729469e-07, "loss": 3.684, "step": 91120 }, { "epoch": 3.925571779299651, "learning_rate": 7.795524574974681e-07, "loss": 3.6719, "step": 91140 }, { "epoch": 3.9264332170392384, "learning_rate": 7.795039755219893e-07, "loss": 3.9501, "step": 91160 }, { "epoch": 3.9272946547788257, "learning_rate": 7.794554935465103e-07, "loss": 3.8401, "step": 91180 }, { "epoch": 3.9281560925184134, "learning_rate": 7.794070115710314e-07, "loss": 3.7076, "step": 91200 }, { "epoch": 3.9290175302580006, "learning_rate": 7.793585295955526e-07, "loss": 3.7346, "step": 91220 }, { "epoch": 3.929878967997588, "learning_rate": 7.793100476200737e-07, "loss": 3.7959, "step": 91240 }, { "epoch": 3.9307404057371755, "learning_rate": 7.792615656445947e-07, "loss": 3.7263, "step": 91260 }, { "epoch": 3.931601843476763, "learning_rate": 7.792130836691158e-07, "loss": 3.7003, "step": 91280 }, { "epoch": 3.93246328121635, "learning_rate": 7.79164601693637e-07, "loss": 3.7147, "step": 91300 }, { "epoch": 3.9333247189559373, "learning_rate": 7.791161197181582e-07, "loss": 3.917, "step": 91320 }, { "epoch": 3.934186156695525, "learning_rate": 7.790676377426792e-07, "loss": 3.83, "step": 91340 }, { "epoch": 3.935047594435112, "learning_rate": 7.790191557672003e-07, "loss": 3.6783, "step": 91360 }, { "epoch": 3.9359090321746995, "learning_rate": 7.789706737917213e-07, "loss": 4.1309, "step": 91380 }, { "epoch": 3.936770469914287, "learning_rate": 7.789221918162424e-07, "loss": 3.6555, "step": 91400 }, { "epoch": 3.9376319076538744, "learning_rate": 7.788737098407636e-07, "loss": 3.735, "step": 91420 }, { "epoch": 3.9384933453934616, "learning_rate": 7.788252278652847e-07, "loss": 3.7647, "step": 91440 }, { "epoch": 3.939354783133049, "learning_rate": 7.787767458898059e-07, "loss": 3.8945, "step": 91460 }, { "epoch": 3.9402162208726366, "learning_rate": 7.787282639143269e-07, "loss": 3.9107, "step": 91480 }, { "epoch": 3.941077658612224, "learning_rate": 7.78679781938848e-07, "loss": 3.7594, "step": 91500 }, { "epoch": 3.941939096351811, "learning_rate": 7.786312999633691e-07, "loss": 3.9104, "step": 91520 }, { "epoch": 3.9428005340913987, "learning_rate": 7.785828179878903e-07, "loss": 3.777, "step": 91540 }, { "epoch": 3.943661971830986, "learning_rate": 7.785343360124113e-07, "loss": 3.76, "step": 91560 }, { "epoch": 3.9445234095705732, "learning_rate": 7.784858540369325e-07, "loss": 3.7582, "step": 91580 }, { "epoch": 3.9453848473101605, "learning_rate": 7.784373720614536e-07, "loss": 3.845, "step": 91600 }, { "epoch": 3.946246285049748, "learning_rate": 7.783888900859747e-07, "loss": 3.8376, "step": 91620 }, { "epoch": 3.9471077227893354, "learning_rate": 7.783404081104957e-07, "loss": 3.6206, "step": 91640 }, { "epoch": 3.9479691605289227, "learning_rate": 7.782919261350168e-07, "loss": 3.7836, "step": 91660 }, { "epoch": 3.9488305982685104, "learning_rate": 7.78243444159538e-07, "loss": 4.0539, "step": 91680 }, { "epoch": 3.9496920360080976, "learning_rate": 7.781949621840592e-07, "loss": 3.8973, "step": 91700 }, { "epoch": 3.950553473747685, "learning_rate": 7.781464802085802e-07, "loss": 3.7282, "step": 91720 }, { "epoch": 3.951414911487272, "learning_rate": 7.780979982331013e-07, "loss": 3.8039, "step": 91740 }, { "epoch": 3.9522763492268598, "learning_rate": 7.780495162576224e-07, "loss": 3.8443, "step": 91760 }, { "epoch": 3.953137786966447, "learning_rate": 7.780010342821435e-07, "loss": 3.8807, "step": 91780 }, { "epoch": 3.9539992247060343, "learning_rate": 7.779525523066645e-07, "loss": 3.641, "step": 91800 }, { "epoch": 3.954860662445622, "learning_rate": 7.779040703311857e-07, "loss": 3.9185, "step": 91820 }, { "epoch": 3.955722100185209, "learning_rate": 7.778555883557069e-07, "loss": 3.8025, "step": 91840 }, { "epoch": 3.9565835379247964, "learning_rate": 7.77807106380228e-07, "loss": 3.7172, "step": 91860 }, { "epoch": 3.9574449756643837, "learning_rate": 7.77758624404749e-07, "loss": 3.6519, "step": 91880 }, { "epoch": 3.9583064134039714, "learning_rate": 7.777101424292701e-07, "loss": 3.7237, "step": 91900 }, { "epoch": 3.9591678511435586, "learning_rate": 7.776616604537913e-07, "loss": 3.8187, "step": 91920 }, { "epoch": 3.960029288883146, "learning_rate": 7.776131784783123e-07, "loss": 3.6389, "step": 91940 }, { "epoch": 3.9608907266227336, "learning_rate": 7.775646965028335e-07, "loss": 3.8033, "step": 91960 }, { "epoch": 3.961752164362321, "learning_rate": 7.775162145273546e-07, "loss": 3.7367, "step": 91980 }, { "epoch": 3.962613602101908, "learning_rate": 7.774677325518757e-07, "loss": 3.5215, "step": 92000 }, { "epoch": 3.9634750398414953, "learning_rate": 7.774192505763967e-07, "loss": 3.7358, "step": 92020 }, { "epoch": 3.964336477581083, "learning_rate": 7.773707686009179e-07, "loss": 3.8196, "step": 92040 }, { "epoch": 3.9651979153206702, "learning_rate": 7.77322286625439e-07, "loss": 3.8156, "step": 92060 }, { "epoch": 3.9660593530602575, "learning_rate": 7.772738046499602e-07, "loss": 3.7631, "step": 92080 }, { "epoch": 3.966920790799845, "learning_rate": 7.772253226744812e-07, "loss": 3.7843, "step": 92100 }, { "epoch": 3.9677822285394324, "learning_rate": 7.771768406990023e-07, "loss": 3.8394, "step": 92120 }, { "epoch": 3.9686436662790197, "learning_rate": 7.771283587235234e-07, "loss": 3.8936, "step": 92140 }, { "epoch": 3.969505104018607, "learning_rate": 7.770798767480445e-07, "loss": 3.8979, "step": 92160 }, { "epoch": 3.9703665417581946, "learning_rate": 7.770313947725656e-07, "loss": 3.8724, "step": 92180 }, { "epoch": 3.971227979497782, "learning_rate": 7.769829127970867e-07, "loss": 3.8303, "step": 92200 }, { "epoch": 3.972089417237369, "learning_rate": 7.769344308216079e-07, "loss": 3.8259, "step": 92220 }, { "epoch": 3.9729508549769568, "learning_rate": 7.76885948846129e-07, "loss": 3.7621, "step": 92240 }, { "epoch": 3.973812292716544, "learning_rate": 7.7683746687065e-07, "loss": 3.6755, "step": 92260 }, { "epoch": 3.9746737304561313, "learning_rate": 7.767889848951711e-07, "loss": 3.6209, "step": 92280 }, { "epoch": 3.9755351681957185, "learning_rate": 7.767405029196923e-07, "loss": 3.6571, "step": 92300 }, { "epoch": 3.9763966059353058, "learning_rate": 7.766920209442133e-07, "loss": 3.761, "step": 92320 }, { "epoch": 3.9772580436748934, "learning_rate": 7.766435389687345e-07, "loss": 3.6781, "step": 92340 }, { "epoch": 3.9781194814144807, "learning_rate": 7.765950569932556e-07, "loss": 3.7519, "step": 92360 }, { "epoch": 3.9789809191540684, "learning_rate": 7.765465750177766e-07, "loss": 3.6434, "step": 92380 }, { "epoch": 3.9798423568936556, "learning_rate": 7.764980930422978e-07, "loss": 3.7371, "step": 92400 }, { "epoch": 3.980703794633243, "learning_rate": 7.764496110668189e-07, "loss": 3.6178, "step": 92420 }, { "epoch": 3.98156523237283, "learning_rate": 7.7640112909134e-07, "loss": 3.6757, "step": 92440 }, { "epoch": 3.9824266701124174, "learning_rate": 7.763526471158611e-07, "loss": 3.8007, "step": 92460 }, { "epoch": 3.983288107852005, "learning_rate": 7.763041651403822e-07, "loss": 3.7136, "step": 92480 }, { "epoch": 3.9841495455915923, "learning_rate": 7.762556831649034e-07, "loss": 3.8083, "step": 92500 }, { "epoch": 3.98501098333118, "learning_rate": 7.762072011894244e-07, "loss": 3.8293, "step": 92520 }, { "epoch": 3.9858724210707672, "learning_rate": 7.761587192139455e-07, "loss": 3.6425, "step": 92540 }, { "epoch": 3.9867338588103545, "learning_rate": 7.761102372384666e-07, "loss": 3.8174, "step": 92560 }, { "epoch": 3.9875952965499417, "learning_rate": 7.760617552629878e-07, "loss": 3.7386, "step": 92580 }, { "epoch": 3.988456734289529, "learning_rate": 7.760132732875089e-07, "loss": 3.6856, "step": 92600 }, { "epoch": 3.9893181720291166, "learning_rate": 7.7596479131203e-07, "loss": 3.7373, "step": 92620 }, { "epoch": 3.990179609768704, "learning_rate": 7.759163093365509e-07, "loss": 3.6376, "step": 92640 }, { "epoch": 3.9910410475082916, "learning_rate": 7.758678273610722e-07, "loss": 3.7604, "step": 92660 }, { "epoch": 3.991902485247879, "learning_rate": 7.758193453855932e-07, "loss": 3.7319, "step": 92680 }, { "epoch": 3.992763922987466, "learning_rate": 7.757708634101144e-07, "loss": 3.6841, "step": 92700 }, { "epoch": 3.9936253607270533, "learning_rate": 7.757223814346355e-07, "loss": 3.6348, "step": 92720 }, { "epoch": 3.9944867984666406, "learning_rate": 7.756738994591567e-07, "loss": 3.951, "step": 92740 }, { "epoch": 3.9953482362062283, "learning_rate": 7.756254174836776e-07, "loss": 3.8506, "step": 92760 }, { "epoch": 3.9962096739458155, "learning_rate": 7.755769355081988e-07, "loss": 3.7258, "step": 92780 }, { "epoch": 3.997071111685403, "learning_rate": 7.755284535327199e-07, "loss": 3.7346, "step": 92800 }, { "epoch": 3.9979325494249904, "learning_rate": 7.75479971557241e-07, "loss": 3.6512, "step": 92820 }, { "epoch": 3.9987939871645777, "learning_rate": 7.754314895817621e-07, "loss": 3.7337, "step": 92840 }, { "epoch": 3.999655424904165, "learning_rate": 7.753830076062833e-07, "loss": 3.9018, "step": 92860 } ], "logging_steps": 20, "max_steps": 371472, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 10000.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0160427286939238e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }