{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 18384, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.2608695652173915e-07, "loss": 1.057, "step": 10 }, { "epoch": 0.0, "learning_rate": 6.884057971014494e-07, "loss": 1.0404, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.0144927536231885e-06, "loss": 1.0415, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.3405797101449278e-06, "loss": 1.0467, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.7028985507246378e-06, "loss": 1.0394, "step": 50 }, { "epoch": 0.01, "learning_rate": 2.065217391304348e-06, "loss": 1.0325, "step": 60 }, { "epoch": 0.01, "learning_rate": 2.4275362318840583e-06, "loss": 1.0236, "step": 70 }, { "epoch": 0.01, "learning_rate": 2.7898550724637686e-06, "loss": 1.0099, "step": 80 }, { "epoch": 0.01, "learning_rate": 3.152173913043479e-06, "loss": 1.024, "step": 90 }, { "epoch": 0.02, "learning_rate": 3.5144927536231887e-06, "loss": 0.9877, "step": 100 }, { "epoch": 0.02, "learning_rate": 3.8768115942028985e-06, "loss": 0.9921, "step": 110 }, { "epoch": 0.02, "learning_rate": 4.239130434782609e-06, "loss": 0.982, "step": 120 }, { "epoch": 0.02, "learning_rate": 4.601449275362319e-06, "loss": 0.9623, "step": 130 }, { "epoch": 0.02, "learning_rate": 4.963768115942029e-06, "loss": 0.9441, "step": 140 }, { "epoch": 0.02, "learning_rate": 5.3260869565217395e-06, "loss": 0.9225, "step": 150 }, { "epoch": 0.03, "learning_rate": 5.688405797101449e-06, "loss": 0.9129, "step": 160 }, { "epoch": 0.03, "learning_rate": 6.05072463768116e-06, "loss": 0.8737, "step": 170 }, { "epoch": 0.03, "learning_rate": 6.41304347826087e-06, "loss": 0.8554, "step": 180 }, { "epoch": 0.03, "learning_rate": 6.7753623188405805e-06, "loss": 0.8249, "step": 190 }, { "epoch": 0.03, "learning_rate": 7.13768115942029e-06, "loss": 0.796, "step": 200 }, { "epoch": 0.03, "learning_rate": 7.500000000000001e-06, "loss": 0.7771, "step": 210 }, { "epoch": 0.04, "learning_rate": 7.862318840579712e-06, "loss": 0.7432, "step": 220 }, { "epoch": 0.04, "learning_rate": 8.22463768115942e-06, "loss": 0.7279, "step": 230 }, { "epoch": 0.04, "learning_rate": 8.586956521739131e-06, "loss": 0.7114, "step": 240 }, { "epoch": 0.04, "learning_rate": 8.94927536231884e-06, "loss": 0.6932, "step": 250 }, { "epoch": 0.04, "learning_rate": 9.275362318840581e-06, "loss": 0.726, "step": 260 }, { "epoch": 0.04, "learning_rate": 9.63768115942029e-06, "loss": 0.6796, "step": 270 }, { "epoch": 0.05, "learning_rate": 1e-05, "loss": 0.6754, "step": 280 }, { "epoch": 0.05, "learning_rate": 1.036231884057971e-05, "loss": 0.6558, "step": 290 }, { "epoch": 0.05, "learning_rate": 1.0724637681159422e-05, "loss": 0.6553, "step": 300 }, { "epoch": 0.05, "learning_rate": 1.1086956521739131e-05, "loss": 0.6355, "step": 310 }, { "epoch": 0.05, "learning_rate": 1.1449275362318842e-05, "loss": 0.6364, "step": 320 }, { "epoch": 0.05, "learning_rate": 1.181159420289855e-05, "loss": 0.6412, "step": 330 }, { "epoch": 0.06, "learning_rate": 1.2173913043478263e-05, "loss": 0.6304, "step": 340 }, { "epoch": 0.06, "learning_rate": 1.2536231884057972e-05, "loss": 0.6308, "step": 350 }, { "epoch": 0.06, "learning_rate": 1.2898550724637681e-05, "loss": 0.6139, "step": 360 }, { "epoch": 0.06, "learning_rate": 1.3260869565217392e-05, "loss": 0.6292, "step": 370 }, { "epoch": 0.06, "learning_rate": 1.3623188405797103e-05, "loss": 0.6259, "step": 380 }, { "epoch": 0.06, "learning_rate": 1.3985507246376813e-05, "loss": 0.6165, "step": 390 }, { "epoch": 0.07, "learning_rate": 1.4347826086956522e-05, "loss": 0.6156, "step": 400 }, { "epoch": 0.07, "learning_rate": 1.4710144927536235e-05, "loss": 0.622, "step": 410 }, { "epoch": 0.07, "learning_rate": 1.5072463768115944e-05, "loss": 0.6195, "step": 420 }, { "epoch": 0.07, "learning_rate": 1.5434782608695654e-05, "loss": 0.612, "step": 430 }, { "epoch": 0.07, "learning_rate": 1.5797101449275363e-05, "loss": 0.6231, "step": 440 }, { "epoch": 0.07, "learning_rate": 1.6159420289855076e-05, "loss": 0.6172, "step": 450 }, { "epoch": 0.08, "learning_rate": 1.6521739130434785e-05, "loss": 0.6111, "step": 460 }, { "epoch": 0.08, "learning_rate": 1.6884057971014494e-05, "loss": 0.5962, "step": 470 }, { "epoch": 0.08, "learning_rate": 1.7246376811594206e-05, "loss": 0.621, "step": 480 }, { "epoch": 0.08, "learning_rate": 1.7608695652173915e-05, "loss": 0.5874, "step": 490 }, { "epoch": 0.08, "learning_rate": 1.7971014492753624e-05, "loss": 0.5981, "step": 500 }, { "epoch": 0.08, "learning_rate": 1.8333333333333333e-05, "loss": 0.6084, "step": 510 }, { "epoch": 0.08, "learning_rate": 1.8695652173913045e-05, "loss": 0.6028, "step": 520 }, { "epoch": 0.09, "learning_rate": 1.9057971014492754e-05, "loss": 0.6004, "step": 530 }, { "epoch": 0.09, "learning_rate": 1.9420289855072467e-05, "loss": 0.5926, "step": 540 }, { "epoch": 0.09, "learning_rate": 1.9782608695652176e-05, "loss": 0.5886, "step": 550 }, { "epoch": 0.09, "learning_rate": 1.9995513683266043e-05, "loss": 0.5958, "step": 560 }, { "epoch": 0.09, "learning_rate": 1.9984297891431138e-05, "loss": 0.596, "step": 570 }, { "epoch": 0.09, "learning_rate": 1.9973082099596232e-05, "loss": 0.5946, "step": 580 }, { "epoch": 0.1, "learning_rate": 1.996186630776133e-05, "loss": 0.6011, "step": 590 }, { "epoch": 0.1, "learning_rate": 1.9950650515926425e-05, "loss": 0.5956, "step": 600 }, { "epoch": 0.1, "learning_rate": 1.9939434724091523e-05, "loss": 0.5838, "step": 610 }, { "epoch": 0.1, "learning_rate": 1.992821893225662e-05, "loss": 0.5925, "step": 620 }, { "epoch": 0.1, "learning_rate": 1.9917003140421716e-05, "loss": 0.5697, "step": 630 }, { "epoch": 0.1, "learning_rate": 1.990578734858681e-05, "loss": 0.5766, "step": 640 }, { "epoch": 0.11, "learning_rate": 1.989457155675191e-05, "loss": 0.5821, "step": 650 }, { "epoch": 0.11, "learning_rate": 1.9883355764917006e-05, "loss": 0.5945, "step": 660 }, { "epoch": 0.11, "learning_rate": 1.98721399730821e-05, "loss": 0.5798, "step": 670 }, { "epoch": 0.11, "learning_rate": 1.98609241812472e-05, "loss": 0.5778, "step": 680 }, { "epoch": 0.11, "learning_rate": 1.9849708389412294e-05, "loss": 0.5762, "step": 690 }, { "epoch": 0.11, "learning_rate": 1.983849259757739e-05, "loss": 0.5755, "step": 700 }, { "epoch": 0.12, "learning_rate": 1.9827276805742487e-05, "loss": 0.5721, "step": 710 }, { "epoch": 0.12, "learning_rate": 1.9816061013907585e-05, "loss": 0.5892, "step": 720 }, { "epoch": 0.12, "learning_rate": 1.980484522207268e-05, "loss": 0.5829, "step": 730 }, { "epoch": 0.12, "learning_rate": 1.9793629430237777e-05, "loss": 0.5775, "step": 740 }, { "epoch": 0.12, "learning_rate": 1.9782413638402872e-05, "loss": 0.5792, "step": 750 }, { "epoch": 0.12, "learning_rate": 1.977119784656797e-05, "loss": 0.579, "step": 760 }, { "epoch": 0.13, "learning_rate": 1.9759982054733068e-05, "loss": 0.5767, "step": 770 }, { "epoch": 0.13, "learning_rate": 1.9748766262898163e-05, "loss": 0.5719, "step": 780 }, { "epoch": 0.13, "learning_rate": 1.9737550471063257e-05, "loss": 0.5804, "step": 790 }, { "epoch": 0.13, "learning_rate": 1.9726334679228355e-05, "loss": 0.5908, "step": 800 }, { "epoch": 0.13, "learning_rate": 1.971511888739345e-05, "loss": 0.5737, "step": 810 }, { "epoch": 0.13, "learning_rate": 1.9703903095558548e-05, "loss": 0.5715, "step": 820 }, { "epoch": 0.14, "learning_rate": 1.9692687303723646e-05, "loss": 0.5866, "step": 830 }, { "epoch": 0.14, "learning_rate": 1.968147151188874e-05, "loss": 0.5663, "step": 840 }, { "epoch": 0.14, "learning_rate": 1.9670255720053836e-05, "loss": 0.5799, "step": 850 }, { "epoch": 0.14, "learning_rate": 1.9659039928218934e-05, "loss": 0.5682, "step": 860 }, { "epoch": 0.14, "learning_rate": 1.964782413638403e-05, "loss": 0.5795, "step": 870 }, { "epoch": 0.14, "learning_rate": 1.9636608344549126e-05, "loss": 0.5683, "step": 880 }, { "epoch": 0.15, "learning_rate": 1.9625392552714224e-05, "loss": 0.5683, "step": 890 }, { "epoch": 0.15, "learning_rate": 1.961417676087932e-05, "loss": 0.5645, "step": 900 }, { "epoch": 0.15, "learning_rate": 1.9602960969044417e-05, "loss": 0.572, "step": 910 }, { "epoch": 0.15, "learning_rate": 1.9591745177209515e-05, "loss": 0.5552, "step": 920 }, { "epoch": 0.15, "learning_rate": 1.958052938537461e-05, "loss": 0.5713, "step": 930 }, { "epoch": 0.15, "learning_rate": 1.9569313593539704e-05, "loss": 0.5688, "step": 940 }, { "epoch": 0.16, "learning_rate": 1.9558097801704803e-05, "loss": 0.5666, "step": 950 }, { "epoch": 0.16, "learning_rate": 1.9546882009869897e-05, "loss": 0.566, "step": 960 }, { "epoch": 0.16, "learning_rate": 1.9535666218034995e-05, "loss": 0.5658, "step": 970 }, { "epoch": 0.16, "learning_rate": 1.9524450426200093e-05, "loss": 0.5718, "step": 980 }, { "epoch": 0.16, "learning_rate": 1.9513234634365188e-05, "loss": 0.5559, "step": 990 }, { "epoch": 0.16, "learning_rate": 1.9502018842530283e-05, "loss": 0.5644, "step": 1000 }, { "epoch": 0.16, "learning_rate": 1.949080305069538e-05, "loss": 0.5648, "step": 1010 }, { "epoch": 0.17, "learning_rate": 1.947958725886048e-05, "loss": 0.562, "step": 1020 }, { "epoch": 0.17, "learning_rate": 1.9468371467025573e-05, "loss": 0.5652, "step": 1030 }, { "epoch": 0.17, "learning_rate": 1.945715567519067e-05, "loss": 0.5679, "step": 1040 }, { "epoch": 0.17, "learning_rate": 1.9445939883355766e-05, "loss": 0.5643, "step": 1050 }, { "epoch": 0.17, "learning_rate": 1.943472409152086e-05, "loss": 0.5566, "step": 1060 }, { "epoch": 0.17, "learning_rate": 1.942350829968596e-05, "loss": 0.566, "step": 1070 }, { "epoch": 0.18, "learning_rate": 1.9412292507851057e-05, "loss": 0.5593, "step": 1080 }, { "epoch": 0.18, "learning_rate": 1.940107671601615e-05, "loss": 0.5719, "step": 1090 }, { "epoch": 0.18, "learning_rate": 1.938986092418125e-05, "loss": 0.5686, "step": 1100 }, { "epoch": 0.18, "learning_rate": 1.9378645132346344e-05, "loss": 0.5606, "step": 1110 }, { "epoch": 0.18, "learning_rate": 1.9367429340511442e-05, "loss": 0.5578, "step": 1120 }, { "epoch": 0.18, "learning_rate": 1.935621354867654e-05, "loss": 0.5519, "step": 1130 }, { "epoch": 0.19, "learning_rate": 1.9344997756841635e-05, "loss": 0.5563, "step": 1140 }, { "epoch": 0.19, "learning_rate": 1.933378196500673e-05, "loss": 0.5595, "step": 1150 }, { "epoch": 0.19, "learning_rate": 1.9322566173171828e-05, "loss": 0.55, "step": 1160 }, { "epoch": 0.19, "learning_rate": 1.9311350381336926e-05, "loss": 0.5602, "step": 1170 }, { "epoch": 0.19, "learning_rate": 1.930013458950202e-05, "loss": 0.5553, "step": 1180 }, { "epoch": 0.19, "learning_rate": 1.928891879766712e-05, "loss": 0.5724, "step": 1190 }, { "epoch": 0.2, "learning_rate": 1.9277703005832213e-05, "loss": 0.5535, "step": 1200 }, { "epoch": 0.2, "learning_rate": 1.9266487213997308e-05, "loss": 0.5615, "step": 1210 }, { "epoch": 0.2, "learning_rate": 1.9255271422162406e-05, "loss": 0.5656, "step": 1220 }, { "epoch": 0.2, "learning_rate": 1.9244055630327504e-05, "loss": 0.5501, "step": 1230 }, { "epoch": 0.2, "learning_rate": 1.92328398384926e-05, "loss": 0.5659, "step": 1240 }, { "epoch": 0.2, "learning_rate": 1.9221624046657697e-05, "loss": 0.5625, "step": 1250 }, { "epoch": 0.21, "learning_rate": 1.921040825482279e-05, "loss": 0.5602, "step": 1260 }, { "epoch": 0.21, "learning_rate": 1.919919246298789e-05, "loss": 0.5519, "step": 1270 }, { "epoch": 0.21, "learning_rate": 1.9187976671152987e-05, "loss": 0.549, "step": 1280 }, { "epoch": 0.21, "learning_rate": 1.9176760879318082e-05, "loss": 0.5636, "step": 1290 }, { "epoch": 0.21, "learning_rate": 1.9165545087483177e-05, "loss": 0.553, "step": 1300 }, { "epoch": 0.21, "learning_rate": 1.9154329295648275e-05, "loss": 0.5597, "step": 1310 }, { "epoch": 0.22, "learning_rate": 1.914311350381337e-05, "loss": 0.5419, "step": 1320 }, { "epoch": 0.22, "learning_rate": 1.9131897711978467e-05, "loss": 0.5368, "step": 1330 }, { "epoch": 0.22, "learning_rate": 1.9120681920143566e-05, "loss": 0.555, "step": 1340 }, { "epoch": 0.22, "learning_rate": 1.910946612830866e-05, "loss": 0.5579, "step": 1350 }, { "epoch": 0.22, "learning_rate": 1.9098250336473755e-05, "loss": 0.5479, "step": 1360 }, { "epoch": 0.22, "learning_rate": 1.9087034544638853e-05, "loss": 0.563, "step": 1370 }, { "epoch": 0.23, "learning_rate": 1.907581875280395e-05, "loss": 0.5491, "step": 1380 }, { "epoch": 0.23, "learning_rate": 1.9064602960969046e-05, "loss": 0.5491, "step": 1390 }, { "epoch": 0.23, "learning_rate": 1.9053387169134144e-05, "loss": 0.5511, "step": 1400 }, { "epoch": 0.23, "learning_rate": 1.904217137729924e-05, "loss": 0.5562, "step": 1410 }, { "epoch": 0.23, "learning_rate": 1.9030955585464333e-05, "loss": 0.5568, "step": 1420 }, { "epoch": 0.23, "learning_rate": 1.901973979362943e-05, "loss": 0.5497, "step": 1430 }, { "epoch": 0.23, "learning_rate": 1.900852400179453e-05, "loss": 0.5571, "step": 1440 }, { "epoch": 0.24, "learning_rate": 1.8997308209959624e-05, "loss": 0.5538, "step": 1450 }, { "epoch": 0.24, "learning_rate": 1.8986092418124722e-05, "loss": 0.5479, "step": 1460 }, { "epoch": 0.24, "learning_rate": 1.8974876626289816e-05, "loss": 0.556, "step": 1470 }, { "epoch": 0.24, "learning_rate": 1.8963660834454915e-05, "loss": 0.5588, "step": 1480 }, { "epoch": 0.24, "learning_rate": 1.8952445042620013e-05, "loss": 0.5358, "step": 1490 }, { "epoch": 0.24, "learning_rate": 1.8941229250785107e-05, "loss": 0.5424, "step": 1500 }, { "epoch": 0.25, "learning_rate": 1.8930013458950202e-05, "loss": 0.5487, "step": 1510 }, { "epoch": 0.25, "learning_rate": 1.89187976671153e-05, "loss": 0.5383, "step": 1520 }, { "epoch": 0.25, "learning_rate": 1.8907581875280398e-05, "loss": 0.5493, "step": 1530 }, { "epoch": 0.25, "learning_rate": 1.8896366083445493e-05, "loss": 0.5604, "step": 1540 }, { "epoch": 0.25, "learning_rate": 1.888515029161059e-05, "loss": 0.5501, "step": 1550 }, { "epoch": 0.25, "learning_rate": 1.8873934499775685e-05, "loss": 0.5419, "step": 1560 }, { "epoch": 0.26, "learning_rate": 1.886271870794078e-05, "loss": 0.5444, "step": 1570 }, { "epoch": 0.26, "learning_rate": 1.8851502916105878e-05, "loss": 0.5508, "step": 1580 }, { "epoch": 0.26, "learning_rate": 1.8840287124270976e-05, "loss": 0.5488, "step": 1590 }, { "epoch": 0.26, "learning_rate": 1.882907133243607e-05, "loss": 0.5653, "step": 1600 }, { "epoch": 0.26, "learning_rate": 1.881785554060117e-05, "loss": 0.5458, "step": 1610 }, { "epoch": 0.26, "learning_rate": 1.8806639748766264e-05, "loss": 0.5548, "step": 1620 }, { "epoch": 0.27, "learning_rate": 1.879542395693136e-05, "loss": 0.5379, "step": 1630 }, { "epoch": 0.27, "learning_rate": 1.878420816509646e-05, "loss": 0.5567, "step": 1640 }, { "epoch": 0.27, "learning_rate": 1.8772992373261554e-05, "loss": 0.5523, "step": 1650 }, { "epoch": 0.27, "learning_rate": 1.876177658142665e-05, "loss": 0.5509, "step": 1660 }, { "epoch": 0.27, "learning_rate": 1.8750560789591747e-05, "loss": 0.5523, "step": 1670 }, { "epoch": 0.27, "learning_rate": 1.873934499775684e-05, "loss": 0.5454, "step": 1680 }, { "epoch": 0.28, "learning_rate": 1.872812920592194e-05, "loss": 0.5513, "step": 1690 }, { "epoch": 0.28, "learning_rate": 1.8716913414087038e-05, "loss": 0.5462, "step": 1700 }, { "epoch": 0.28, "learning_rate": 1.8705697622252132e-05, "loss": 0.5468, "step": 1710 }, { "epoch": 0.28, "learning_rate": 1.8694481830417227e-05, "loss": 0.5398, "step": 1720 }, { "epoch": 0.28, "learning_rate": 1.8683266038582325e-05, "loss": 0.5456, "step": 1730 }, { "epoch": 0.28, "learning_rate": 1.8672050246747423e-05, "loss": 0.5361, "step": 1740 }, { "epoch": 0.29, "learning_rate": 1.8660834454912518e-05, "loss": 0.5508, "step": 1750 }, { "epoch": 0.29, "learning_rate": 1.8649618663077616e-05, "loss": 0.5339, "step": 1760 }, { "epoch": 0.29, "learning_rate": 1.863840287124271e-05, "loss": 0.5403, "step": 1770 }, { "epoch": 0.29, "learning_rate": 1.8627187079407805e-05, "loss": 0.548, "step": 1780 }, { "epoch": 0.29, "learning_rate": 1.8615971287572903e-05, "loss": 0.5424, "step": 1790 }, { "epoch": 0.29, "learning_rate": 1.8604755495738e-05, "loss": 0.5378, "step": 1800 }, { "epoch": 0.3, "learning_rate": 1.8593539703903096e-05, "loss": 0.5549, "step": 1810 }, { "epoch": 0.3, "learning_rate": 1.8582323912068194e-05, "loss": 0.5467, "step": 1820 }, { "epoch": 0.3, "learning_rate": 1.857110812023329e-05, "loss": 0.5449, "step": 1830 }, { "epoch": 0.3, "learning_rate": 1.8559892328398387e-05, "loss": 0.545, "step": 1840 }, { "epoch": 0.3, "learning_rate": 1.8548676536563485e-05, "loss": 0.5476, "step": 1850 }, { "epoch": 0.3, "learning_rate": 1.853746074472858e-05, "loss": 0.5323, "step": 1860 }, { "epoch": 0.31, "learning_rate": 1.8526244952893674e-05, "loss": 0.5532, "step": 1870 }, { "epoch": 0.31, "learning_rate": 1.8515029161058772e-05, "loss": 0.5405, "step": 1880 }, { "epoch": 0.31, "learning_rate": 1.850381336922387e-05, "loss": 0.5494, "step": 1890 }, { "epoch": 0.31, "learning_rate": 1.8492597577388965e-05, "loss": 0.5471, "step": 1900 }, { "epoch": 0.31, "learning_rate": 1.8481381785554063e-05, "loss": 0.5387, "step": 1910 }, { "epoch": 0.31, "learning_rate": 1.8470165993719158e-05, "loss": 0.5436, "step": 1920 }, { "epoch": 0.31, "learning_rate": 1.8458950201884252e-05, "loss": 0.5469, "step": 1930 }, { "epoch": 0.32, "learning_rate": 1.844773441004935e-05, "loss": 0.529, "step": 1940 }, { "epoch": 0.32, "learning_rate": 1.843651861821445e-05, "loss": 0.5439, "step": 1950 }, { "epoch": 0.32, "learning_rate": 1.8425302826379543e-05, "loss": 0.5408, "step": 1960 }, { "epoch": 0.32, "learning_rate": 1.841408703454464e-05, "loss": 0.5361, "step": 1970 }, { "epoch": 0.32, "learning_rate": 1.8402871242709736e-05, "loss": 0.5388, "step": 1980 }, { "epoch": 0.32, "learning_rate": 1.8391655450874834e-05, "loss": 0.5369, "step": 1990 }, { "epoch": 0.33, "learning_rate": 1.8380439659039932e-05, "loss": 0.5366, "step": 2000 }, { "epoch": 0.33, "learning_rate": 1.8369223867205027e-05, "loss": 0.5493, "step": 2010 }, { "epoch": 0.33, "learning_rate": 1.835800807537012e-05, "loss": 0.5346, "step": 2020 }, { "epoch": 0.33, "learning_rate": 1.834679228353522e-05, "loss": 0.5455, "step": 2030 }, { "epoch": 0.33, "learning_rate": 1.8335576491700314e-05, "loss": 0.5452, "step": 2040 }, { "epoch": 0.33, "learning_rate": 1.8324360699865412e-05, "loss": 0.5398, "step": 2050 }, { "epoch": 0.34, "learning_rate": 1.831314490803051e-05, "loss": 0.5558, "step": 2060 }, { "epoch": 0.34, "learning_rate": 1.8301929116195605e-05, "loss": 0.5367, "step": 2070 }, { "epoch": 0.34, "learning_rate": 1.82907133243607e-05, "loss": 0.5377, "step": 2080 }, { "epoch": 0.34, "learning_rate": 1.8279497532525797e-05, "loss": 0.5487, "step": 2090 }, { "epoch": 0.34, "learning_rate": 1.8268281740690895e-05, "loss": 0.5455, "step": 2100 }, { "epoch": 0.34, "learning_rate": 1.825706594885599e-05, "loss": 0.5506, "step": 2110 }, { "epoch": 0.35, "learning_rate": 1.8245850157021088e-05, "loss": 0.533, "step": 2120 }, { "epoch": 0.35, "learning_rate": 1.8234634365186183e-05, "loss": 0.5375, "step": 2130 }, { "epoch": 0.35, "learning_rate": 1.8223418573351277e-05, "loss": 0.5261, "step": 2140 }, { "epoch": 0.35, "learning_rate": 1.8212202781516376e-05, "loss": 0.5478, "step": 2150 }, { "epoch": 0.35, "learning_rate": 1.8200986989681474e-05, "loss": 0.5414, "step": 2160 }, { "epoch": 0.35, "learning_rate": 1.8189771197846568e-05, "loss": 0.5471, "step": 2170 }, { "epoch": 0.36, "learning_rate": 1.8178555406011666e-05, "loss": 0.53, "step": 2180 }, { "epoch": 0.36, "learning_rate": 1.816733961417676e-05, "loss": 0.5265, "step": 2190 }, { "epoch": 0.36, "learning_rate": 1.815612382234186e-05, "loss": 0.5309, "step": 2200 }, { "epoch": 0.36, "learning_rate": 1.8144908030506957e-05, "loss": 0.5479, "step": 2210 }, { "epoch": 0.36, "learning_rate": 1.8133692238672052e-05, "loss": 0.546, "step": 2220 }, { "epoch": 0.36, "learning_rate": 1.8122476446837146e-05, "loss": 0.5355, "step": 2230 }, { "epoch": 0.37, "learning_rate": 1.8111260655002244e-05, "loss": 0.5308, "step": 2240 }, { "epoch": 0.37, "learning_rate": 1.8100044863167343e-05, "loss": 0.5306, "step": 2250 }, { "epoch": 0.37, "learning_rate": 1.8088829071332437e-05, "loss": 0.54, "step": 2260 }, { "epoch": 0.37, "learning_rate": 1.8077613279497535e-05, "loss": 0.5279, "step": 2270 }, { "epoch": 0.37, "learning_rate": 1.806639748766263e-05, "loss": 0.5311, "step": 2280 }, { "epoch": 0.37, "learning_rate": 1.8055181695827725e-05, "loss": 0.5315, "step": 2290 }, { "epoch": 0.38, "learning_rate": 1.8043965903992823e-05, "loss": 0.5456, "step": 2300 }, { "epoch": 0.38, "learning_rate": 1.803275011215792e-05, "loss": 0.5342, "step": 2310 }, { "epoch": 0.38, "learning_rate": 1.8021534320323015e-05, "loss": 0.5445, "step": 2320 }, { "epoch": 0.38, "learning_rate": 1.8010318528488113e-05, "loss": 0.5295, "step": 2330 }, { "epoch": 0.38, "learning_rate": 1.7999102736653208e-05, "loss": 0.5337, "step": 2340 }, { "epoch": 0.38, "learning_rate": 1.7987886944818306e-05, "loss": 0.5375, "step": 2350 }, { "epoch": 0.39, "learning_rate": 1.7976671152983404e-05, "loss": 0.5292, "step": 2360 }, { "epoch": 0.39, "learning_rate": 1.79654553611485e-05, "loss": 0.5365, "step": 2370 }, { "epoch": 0.39, "learning_rate": 1.7954239569313593e-05, "loss": 0.5428, "step": 2380 }, { "epoch": 0.39, "learning_rate": 1.794302377747869e-05, "loss": 0.5489, "step": 2390 }, { "epoch": 0.39, "learning_rate": 1.7931807985643786e-05, "loss": 0.5344, "step": 2400 }, { "epoch": 0.39, "learning_rate": 1.7920592193808884e-05, "loss": 0.5293, "step": 2410 }, { "epoch": 0.39, "learning_rate": 1.7909376401973982e-05, "loss": 0.5282, "step": 2420 }, { "epoch": 0.4, "learning_rate": 1.7898160610139077e-05, "loss": 0.5259, "step": 2430 }, { "epoch": 0.4, "learning_rate": 1.788694481830417e-05, "loss": 0.5316, "step": 2440 }, { "epoch": 0.4, "learning_rate": 1.787572902646927e-05, "loss": 0.5329, "step": 2450 }, { "epoch": 0.4, "learning_rate": 1.7864513234634368e-05, "loss": 0.515, "step": 2460 }, { "epoch": 0.4, "learning_rate": 1.7853297442799462e-05, "loss": 0.5393, "step": 2470 }, { "epoch": 0.4, "learning_rate": 1.784208165096456e-05, "loss": 0.5301, "step": 2480 }, { "epoch": 0.41, "learning_rate": 1.7830865859129655e-05, "loss": 0.5246, "step": 2490 }, { "epoch": 0.41, "learning_rate": 1.7819650067294753e-05, "loss": 0.5313, "step": 2500 }, { "epoch": 0.41, "learning_rate": 1.7808434275459848e-05, "loss": 0.5329, "step": 2510 }, { "epoch": 0.41, "learning_rate": 1.7797218483624946e-05, "loss": 0.5419, "step": 2520 }, { "epoch": 0.41, "learning_rate": 1.778600269179004e-05, "loss": 0.5322, "step": 2530 }, { "epoch": 0.41, "learning_rate": 1.777478689995514e-05, "loss": 0.5385, "step": 2540 }, { "epoch": 0.42, "learning_rate": 1.7763571108120233e-05, "loss": 0.5218, "step": 2550 }, { "epoch": 0.42, "learning_rate": 1.775235531628533e-05, "loss": 0.5205, "step": 2560 }, { "epoch": 0.42, "learning_rate": 1.774113952445043e-05, "loss": 0.5293, "step": 2570 }, { "epoch": 0.42, "learning_rate": 1.7729923732615524e-05, "loss": 0.5289, "step": 2580 }, { "epoch": 0.42, "learning_rate": 1.771870794078062e-05, "loss": 0.5492, "step": 2590 }, { "epoch": 0.42, "learning_rate": 1.7707492148945717e-05, "loss": 0.5246, "step": 2600 }, { "epoch": 0.43, "learning_rate": 1.7696276357110815e-05, "loss": 0.5331, "step": 2610 }, { "epoch": 0.43, "learning_rate": 1.768506056527591e-05, "loss": 0.5423, "step": 2620 }, { "epoch": 0.43, "learning_rate": 1.7673844773441007e-05, "loss": 0.533, "step": 2630 }, { "epoch": 0.43, "learning_rate": 1.7662628981606102e-05, "loss": 0.538, "step": 2640 }, { "epoch": 0.43, "learning_rate": 1.7651413189771197e-05, "loss": 0.5277, "step": 2650 }, { "epoch": 0.43, "learning_rate": 1.7640197397936295e-05, "loss": 0.5307, "step": 2660 }, { "epoch": 0.44, "learning_rate": 1.7628981606101393e-05, "loss": 0.5279, "step": 2670 }, { "epoch": 0.44, "learning_rate": 1.7617765814266488e-05, "loss": 0.5358, "step": 2680 }, { "epoch": 0.44, "learning_rate": 1.7606550022431586e-05, "loss": 0.5265, "step": 2690 }, { "epoch": 0.44, "learning_rate": 1.759533423059668e-05, "loss": 0.5285, "step": 2700 }, { "epoch": 0.44, "learning_rate": 1.758411843876178e-05, "loss": 0.5303, "step": 2710 }, { "epoch": 0.44, "learning_rate": 1.7572902646926876e-05, "loss": 0.5431, "step": 2720 }, { "epoch": 0.45, "learning_rate": 1.756168685509197e-05, "loss": 0.5312, "step": 2730 }, { "epoch": 0.45, "learning_rate": 1.7550471063257066e-05, "loss": 0.5296, "step": 2740 }, { "epoch": 0.45, "learning_rate": 1.7539255271422164e-05, "loss": 0.5282, "step": 2750 }, { "epoch": 0.45, "learning_rate": 1.752803947958726e-05, "loss": 0.5378, "step": 2760 }, { "epoch": 0.45, "learning_rate": 1.7516823687752356e-05, "loss": 0.5372, "step": 2770 }, { "epoch": 0.45, "learning_rate": 1.7505607895917455e-05, "loss": 0.5235, "step": 2780 }, { "epoch": 0.46, "learning_rate": 1.749439210408255e-05, "loss": 0.5319, "step": 2790 }, { "epoch": 0.46, "learning_rate": 1.7483176312247647e-05, "loss": 0.522, "step": 2800 }, { "epoch": 0.46, "learning_rate": 1.7471960520412742e-05, "loss": 0.5346, "step": 2810 }, { "epoch": 0.46, "learning_rate": 1.746074472857784e-05, "loss": 0.5313, "step": 2820 }, { "epoch": 0.46, "learning_rate": 1.7449528936742938e-05, "loss": 0.5459, "step": 2830 }, { "epoch": 0.46, "learning_rate": 1.7438313144908033e-05, "loss": 0.5291, "step": 2840 }, { "epoch": 0.47, "learning_rate": 1.7427097353073127e-05, "loss": 0.5398, "step": 2850 }, { "epoch": 0.47, "learning_rate": 1.7415881561238225e-05, "loss": 0.5225, "step": 2860 }, { "epoch": 0.47, "learning_rate": 1.740466576940332e-05, "loss": 0.5232, "step": 2870 }, { "epoch": 0.47, "learning_rate": 1.7393449977568418e-05, "loss": 0.5315, "step": 2880 }, { "epoch": 0.47, "learning_rate": 1.7382234185733516e-05, "loss": 0.5323, "step": 2890 }, { "epoch": 0.47, "learning_rate": 1.737101839389861e-05, "loss": 0.5278, "step": 2900 }, { "epoch": 0.47, "learning_rate": 1.7359802602063705e-05, "loss": 0.5367, "step": 2910 }, { "epoch": 0.48, "learning_rate": 1.7348586810228804e-05, "loss": 0.5203, "step": 2920 }, { "epoch": 0.48, "learning_rate": 1.73373710183939e-05, "loss": 0.5267, "step": 2930 }, { "epoch": 0.48, "learning_rate": 1.7326155226558996e-05, "loss": 0.5389, "step": 2940 }, { "epoch": 0.48, "learning_rate": 1.7314939434724094e-05, "loss": 0.5327, "step": 2950 }, { "epoch": 0.48, "learning_rate": 1.730372364288919e-05, "loss": 0.5294, "step": 2960 }, { "epoch": 0.48, "learning_rate": 1.7292507851054287e-05, "loss": 0.5287, "step": 2970 }, { "epoch": 0.49, "learning_rate": 1.728129205921938e-05, "loss": 0.5358, "step": 2980 }, { "epoch": 0.49, "learning_rate": 1.727007626738448e-05, "loss": 0.5345, "step": 2990 }, { "epoch": 0.49, "learning_rate": 1.7258860475549574e-05, "loss": 0.5265, "step": 3000 }, { "epoch": 0.49, "learning_rate": 1.7247644683714672e-05, "loss": 0.525, "step": 3010 }, { "epoch": 0.49, "learning_rate": 1.7236428891879767e-05, "loss": 0.5299, "step": 3020 }, { "epoch": 0.49, "learning_rate": 1.7225213100044865e-05, "loss": 0.5295, "step": 3030 }, { "epoch": 0.5, "learning_rate": 1.7213997308209963e-05, "loss": 0.5317, "step": 3040 }, { "epoch": 0.5, "learning_rate": 1.7202781516375058e-05, "loss": 0.5275, "step": 3050 }, { "epoch": 0.5, "learning_rate": 1.7191565724540153e-05, "loss": 0.5304, "step": 3060 }, { "epoch": 0.5, "learning_rate": 1.718034993270525e-05, "loss": 0.5288, "step": 3070 }, { "epoch": 0.5, "learning_rate": 1.716913414087035e-05, "loss": 0.5197, "step": 3080 }, { "epoch": 0.5, "learning_rate": 1.7157918349035443e-05, "loss": 0.5305, "step": 3090 }, { "epoch": 0.51, "learning_rate": 1.714670255720054e-05, "loss": 0.5205, "step": 3100 }, { "epoch": 0.51, "learning_rate": 1.7135486765365636e-05, "loss": 0.5306, "step": 3110 }, { "epoch": 0.51, "learning_rate": 1.712427097353073e-05, "loss": 0.5228, "step": 3120 }, { "epoch": 0.51, "learning_rate": 1.711305518169583e-05, "loss": 0.5241, "step": 3130 }, { "epoch": 0.51, "learning_rate": 1.7101839389860927e-05, "loss": 0.5302, "step": 3140 }, { "epoch": 0.51, "learning_rate": 1.709062359802602e-05, "loss": 0.5254, "step": 3150 }, { "epoch": 0.52, "learning_rate": 1.707940780619112e-05, "loss": 0.529, "step": 3160 }, { "epoch": 0.52, "learning_rate": 1.7068192014356214e-05, "loss": 0.5224, "step": 3170 }, { "epoch": 0.52, "learning_rate": 1.7056976222521312e-05, "loss": 0.5328, "step": 3180 }, { "epoch": 0.52, "learning_rate": 1.704576043068641e-05, "loss": 0.5248, "step": 3190 }, { "epoch": 0.52, "learning_rate": 1.7034544638851505e-05, "loss": 0.5255, "step": 3200 }, { "epoch": 0.52, "learning_rate": 1.70233288470166e-05, "loss": 0.5227, "step": 3210 }, { "epoch": 0.53, "learning_rate": 1.7012113055181698e-05, "loss": 0.5266, "step": 3220 }, { "epoch": 0.53, "learning_rate": 1.7000897263346792e-05, "loss": 0.5202, "step": 3230 }, { "epoch": 0.53, "learning_rate": 1.698968147151189e-05, "loss": 0.5281, "step": 3240 }, { "epoch": 0.53, "learning_rate": 1.697846567967699e-05, "loss": 0.5326, "step": 3250 }, { "epoch": 0.53, "learning_rate": 1.6967249887842083e-05, "loss": 0.5226, "step": 3260 }, { "epoch": 0.53, "learning_rate": 1.6956034096007178e-05, "loss": 0.5169, "step": 3270 }, { "epoch": 0.54, "learning_rate": 1.6944818304172276e-05, "loss": 0.5308, "step": 3280 }, { "epoch": 0.54, "learning_rate": 1.6933602512337374e-05, "loss": 0.5189, "step": 3290 }, { "epoch": 0.54, "learning_rate": 1.692238672050247e-05, "loss": 0.5162, "step": 3300 }, { "epoch": 0.54, "learning_rate": 1.6911170928667567e-05, "loss": 0.5273, "step": 3310 }, { "epoch": 0.54, "learning_rate": 1.689995513683266e-05, "loss": 0.5184, "step": 3320 }, { "epoch": 0.54, "learning_rate": 1.688873934499776e-05, "loss": 0.5327, "step": 3330 }, { "epoch": 0.55, "learning_rate": 1.6877523553162857e-05, "loss": 0.5266, "step": 3340 }, { "epoch": 0.55, "learning_rate": 1.6866307761327952e-05, "loss": 0.5298, "step": 3350 }, { "epoch": 0.55, "learning_rate": 1.6855091969493047e-05, "loss": 0.529, "step": 3360 }, { "epoch": 0.55, "learning_rate": 1.6843876177658145e-05, "loss": 0.5149, "step": 3370 }, { "epoch": 0.55, "learning_rate": 1.683266038582324e-05, "loss": 0.5194, "step": 3380 }, { "epoch": 0.55, "learning_rate": 1.6821444593988337e-05, "loss": 0.5307, "step": 3390 }, { "epoch": 0.55, "learning_rate": 1.6810228802153435e-05, "loss": 0.5248, "step": 3400 }, { "epoch": 0.56, "learning_rate": 1.679901301031853e-05, "loss": 0.5214, "step": 3410 }, { "epoch": 0.56, "learning_rate": 1.6787797218483625e-05, "loss": 0.5306, "step": 3420 }, { "epoch": 0.56, "learning_rate": 1.6776581426648723e-05, "loss": 0.5154, "step": 3430 }, { "epoch": 0.56, "learning_rate": 1.676536563481382e-05, "loss": 0.5237, "step": 3440 }, { "epoch": 0.56, "learning_rate": 1.6754149842978916e-05, "loss": 0.5081, "step": 3450 }, { "epoch": 0.56, "learning_rate": 1.6742934051144014e-05, "loss": 0.5244, "step": 3460 }, { "epoch": 0.57, "learning_rate": 1.6731718259309108e-05, "loss": 0.5235, "step": 3470 }, { "epoch": 0.57, "learning_rate": 1.6720502467474203e-05, "loss": 0.5234, "step": 3480 }, { "epoch": 0.57, "learning_rate": 1.67092866756393e-05, "loss": 0.5189, "step": 3490 }, { "epoch": 0.57, "learning_rate": 1.66980708838044e-05, "loss": 0.5247, "step": 3500 }, { "epoch": 0.57, "learning_rate": 1.6686855091969494e-05, "loss": 0.5196, "step": 3510 }, { "epoch": 0.57, "learning_rate": 1.6675639300134592e-05, "loss": 0.5155, "step": 3520 }, { "epoch": 0.58, "learning_rate": 1.6664423508299686e-05, "loss": 0.5308, "step": 3530 }, { "epoch": 0.58, "learning_rate": 1.6653207716464784e-05, "loss": 0.5132, "step": 3540 }, { "epoch": 0.58, "learning_rate": 1.6641991924629883e-05, "loss": 0.5259, "step": 3550 }, { "epoch": 0.58, "learning_rate": 1.6630776132794977e-05, "loss": 0.5314, "step": 3560 }, { "epoch": 0.58, "learning_rate": 1.6619560340960072e-05, "loss": 0.5243, "step": 3570 }, { "epoch": 0.58, "learning_rate": 1.660834454912517e-05, "loss": 0.5169, "step": 3580 }, { "epoch": 0.59, "learning_rate": 1.6597128757290265e-05, "loss": 0.5337, "step": 3590 }, { "epoch": 0.59, "learning_rate": 1.6585912965455363e-05, "loss": 0.5289, "step": 3600 }, { "epoch": 0.59, "learning_rate": 1.657469717362046e-05, "loss": 0.5277, "step": 3610 }, { "epoch": 0.59, "learning_rate": 1.6563481381785555e-05, "loss": 0.5248, "step": 3620 }, { "epoch": 0.59, "learning_rate": 1.655226558995065e-05, "loss": 0.5183, "step": 3630 }, { "epoch": 0.59, "learning_rate": 1.6541049798115748e-05, "loss": 0.5115, "step": 3640 }, { "epoch": 0.6, "learning_rate": 1.6529834006280846e-05, "loss": 0.5304, "step": 3650 }, { "epoch": 0.6, "learning_rate": 1.651861821444594e-05, "loss": 0.521, "step": 3660 }, { "epoch": 0.6, "learning_rate": 1.650740242261104e-05, "loss": 0.5214, "step": 3670 }, { "epoch": 0.6, "learning_rate": 1.6496186630776133e-05, "loss": 0.5096, "step": 3680 }, { "epoch": 0.6, "learning_rate": 1.648497083894123e-05, "loss": 0.5259, "step": 3690 }, { "epoch": 0.6, "learning_rate": 1.647375504710633e-05, "loss": 0.522, "step": 3700 }, { "epoch": 0.61, "learning_rate": 1.6462539255271424e-05, "loss": 0.5248, "step": 3710 }, { "epoch": 0.61, "learning_rate": 1.645132346343652e-05, "loss": 0.5199, "step": 3720 }, { "epoch": 0.61, "learning_rate": 1.6440107671601617e-05, "loss": 0.5139, "step": 3730 }, { "epoch": 0.61, "learning_rate": 1.642889187976671e-05, "loss": 0.5187, "step": 3740 }, { "epoch": 0.61, "learning_rate": 1.641767608793181e-05, "loss": 0.5287, "step": 3750 }, { "epoch": 0.61, "learning_rate": 1.6406460296096908e-05, "loss": 0.5186, "step": 3760 }, { "epoch": 0.62, "learning_rate": 1.6395244504262002e-05, "loss": 0.5163, "step": 3770 }, { "epoch": 0.62, "learning_rate": 1.6384028712427097e-05, "loss": 0.5234, "step": 3780 }, { "epoch": 0.62, "learning_rate": 1.6372812920592195e-05, "loss": 0.5194, "step": 3790 }, { "epoch": 0.62, "learning_rate": 1.6361597128757293e-05, "loss": 0.5202, "step": 3800 }, { "epoch": 0.62, "learning_rate": 1.6350381336922388e-05, "loss": 0.5079, "step": 3810 }, { "epoch": 0.62, "learning_rate": 1.6339165545087486e-05, "loss": 0.5285, "step": 3820 }, { "epoch": 0.62, "learning_rate": 1.632794975325258e-05, "loss": 0.5293, "step": 3830 }, { "epoch": 0.63, "learning_rate": 1.6316733961417675e-05, "loss": 0.5281, "step": 3840 }, { "epoch": 0.63, "learning_rate": 1.6305518169582773e-05, "loss": 0.5256, "step": 3850 }, { "epoch": 0.63, "learning_rate": 1.629430237774787e-05, "loss": 0.5185, "step": 3860 }, { "epoch": 0.63, "learning_rate": 1.6283086585912966e-05, "loss": 0.5191, "step": 3870 }, { "epoch": 0.63, "learning_rate": 1.6271870794078064e-05, "loss": 0.5282, "step": 3880 }, { "epoch": 0.63, "learning_rate": 1.626065500224316e-05, "loss": 0.5249, "step": 3890 }, { "epoch": 0.64, "learning_rate": 1.6249439210408257e-05, "loss": 0.5072, "step": 3900 }, { "epoch": 0.64, "learning_rate": 1.6238223418573355e-05, "loss": 0.5154, "step": 3910 }, { "epoch": 0.64, "learning_rate": 1.622700762673845e-05, "loss": 0.5154, "step": 3920 }, { "epoch": 0.64, "learning_rate": 1.6215791834903544e-05, "loss": 0.5181, "step": 3930 }, { "epoch": 0.64, "learning_rate": 1.6204576043068642e-05, "loss": 0.516, "step": 3940 }, { "epoch": 0.64, "learning_rate": 1.6193360251233737e-05, "loss": 0.519, "step": 3950 }, { "epoch": 0.65, "learning_rate": 1.6182144459398835e-05, "loss": 0.5253, "step": 3960 }, { "epoch": 0.65, "learning_rate": 1.6170928667563933e-05, "loss": 0.5235, "step": 3970 }, { "epoch": 0.65, "learning_rate": 1.6159712875729028e-05, "loss": 0.5187, "step": 3980 }, { "epoch": 0.65, "learning_rate": 1.6148497083894122e-05, "loss": 0.5098, "step": 3990 }, { "epoch": 0.65, "learning_rate": 1.613728129205922e-05, "loss": 0.5143, "step": 4000 }, { "epoch": 0.65, "learning_rate": 1.612606550022432e-05, "loss": 0.5114, "step": 4010 }, { "epoch": 0.66, "learning_rate": 1.6114849708389413e-05, "loss": 0.5152, "step": 4020 }, { "epoch": 0.66, "learning_rate": 1.610363391655451e-05, "loss": 0.508, "step": 4030 }, { "epoch": 0.66, "learning_rate": 1.6092418124719606e-05, "loss": 0.5062, "step": 4040 }, { "epoch": 0.66, "learning_rate": 1.6081202332884704e-05, "loss": 0.5186, "step": 4050 }, { "epoch": 0.66, "learning_rate": 1.6069986541049802e-05, "loss": 0.5312, "step": 4060 }, { "epoch": 0.66, "learning_rate": 1.6058770749214896e-05, "loss": 0.5238, "step": 4070 }, { "epoch": 0.67, "learning_rate": 1.604755495737999e-05, "loss": 0.5026, "step": 4080 }, { "epoch": 0.67, "learning_rate": 1.603633916554509e-05, "loss": 0.5239, "step": 4090 }, { "epoch": 0.67, "learning_rate": 1.6025123373710184e-05, "loss": 0.5185, "step": 4100 }, { "epoch": 0.67, "learning_rate": 1.6013907581875282e-05, "loss": 0.5162, "step": 4110 }, { "epoch": 0.67, "learning_rate": 1.600269179004038e-05, "loss": 0.5212, "step": 4120 }, { "epoch": 0.67, "learning_rate": 1.5991475998205475e-05, "loss": 0.5128, "step": 4130 }, { "epoch": 0.68, "learning_rate": 1.598026020637057e-05, "loss": 0.5188, "step": 4140 }, { "epoch": 0.68, "learning_rate": 1.5969044414535667e-05, "loss": 0.506, "step": 4150 }, { "epoch": 0.68, "learning_rate": 1.5957828622700765e-05, "loss": 0.5128, "step": 4160 }, { "epoch": 0.68, "learning_rate": 1.594661283086586e-05, "loss": 0.5244, "step": 4170 }, { "epoch": 0.68, "learning_rate": 1.5935397039030958e-05, "loss": 0.5078, "step": 4180 }, { "epoch": 0.68, "learning_rate": 1.5924181247196053e-05, "loss": 0.5319, "step": 4190 }, { "epoch": 0.69, "learning_rate": 1.5912965455361147e-05, "loss": 0.5186, "step": 4200 }, { "epoch": 0.69, "learning_rate": 1.5901749663526245e-05, "loss": 0.5107, "step": 4210 }, { "epoch": 0.69, "learning_rate": 1.5890533871691344e-05, "loss": 0.5131, "step": 4220 }, { "epoch": 0.69, "learning_rate": 1.5879318079856438e-05, "loss": 0.5136, "step": 4230 }, { "epoch": 0.69, "learning_rate": 1.5868102288021536e-05, "loss": 0.5059, "step": 4240 }, { "epoch": 0.69, "learning_rate": 1.585688649618663e-05, "loss": 0.5064, "step": 4250 }, { "epoch": 0.7, "learning_rate": 1.584567070435173e-05, "loss": 0.5063, "step": 4260 }, { "epoch": 0.7, "learning_rate": 1.5834454912516827e-05, "loss": 0.5301, "step": 4270 }, { "epoch": 0.7, "learning_rate": 1.582323912068192e-05, "loss": 0.5228, "step": 4280 }, { "epoch": 0.7, "learning_rate": 1.5812023328847016e-05, "loss": 0.5207, "step": 4290 }, { "epoch": 0.7, "learning_rate": 1.5800807537012114e-05, "loss": 0.5064, "step": 4300 }, { "epoch": 0.7, "learning_rate": 1.578959174517721e-05, "loss": 0.5148, "step": 4310 }, { "epoch": 0.7, "learning_rate": 1.5778375953342307e-05, "loss": 0.5181, "step": 4320 }, { "epoch": 0.71, "learning_rate": 1.5767160161507405e-05, "loss": 0.5093, "step": 4330 }, { "epoch": 0.71, "learning_rate": 1.57559443696725e-05, "loss": 0.5074, "step": 4340 }, { "epoch": 0.71, "learning_rate": 1.5744728577837594e-05, "loss": 0.5226, "step": 4350 }, { "epoch": 0.71, "learning_rate": 1.5733512786002693e-05, "loss": 0.5122, "step": 4360 }, { "epoch": 0.71, "learning_rate": 1.572229699416779e-05, "loss": 0.5197, "step": 4370 }, { "epoch": 0.71, "learning_rate": 1.5711081202332885e-05, "loss": 0.5143, "step": 4380 }, { "epoch": 0.72, "learning_rate": 1.5699865410497983e-05, "loss": 0.5138, "step": 4390 }, { "epoch": 0.72, "learning_rate": 1.5688649618663078e-05, "loss": 0.5326, "step": 4400 }, { "epoch": 0.72, "learning_rate": 1.5677433826828176e-05, "loss": 0.5139, "step": 4410 }, { "epoch": 0.72, "learning_rate": 1.5666218034993274e-05, "loss": 0.5208, "step": 4420 }, { "epoch": 0.72, "learning_rate": 1.565500224315837e-05, "loss": 0.5114, "step": 4430 }, { "epoch": 0.72, "learning_rate": 1.5643786451323463e-05, "loss": 0.5302, "step": 4440 }, { "epoch": 0.73, "learning_rate": 1.563257065948856e-05, "loss": 0.5207, "step": 4450 }, { "epoch": 0.73, "learning_rate": 1.5621354867653656e-05, "loss": 0.4986, "step": 4460 }, { "epoch": 0.73, "learning_rate": 1.5610139075818754e-05, "loss": 0.5175, "step": 4470 }, { "epoch": 0.73, "learning_rate": 1.5598923283983852e-05, "loss": 0.5168, "step": 4480 }, { "epoch": 0.73, "learning_rate": 1.5587707492148947e-05, "loss": 0.5169, "step": 4490 }, { "epoch": 0.73, "learning_rate": 1.557649170031404e-05, "loss": 0.5121, "step": 4500 }, { "epoch": 0.74, "learning_rate": 1.556527590847914e-05, "loss": 0.5094, "step": 4510 }, { "epoch": 0.74, "learning_rate": 1.5554060116644238e-05, "loss": 0.5169, "step": 4520 }, { "epoch": 0.74, "learning_rate": 1.5542844324809332e-05, "loss": 0.5004, "step": 4530 }, { "epoch": 0.74, "learning_rate": 1.553162853297443e-05, "loss": 0.5154, "step": 4540 }, { "epoch": 0.74, "learning_rate": 1.5520412741139525e-05, "loss": 0.5143, "step": 4550 }, { "epoch": 0.74, "learning_rate": 1.550919694930462e-05, "loss": 0.5234, "step": 4560 }, { "epoch": 0.75, "learning_rate": 1.5497981157469718e-05, "loss": 0.5072, "step": 4570 }, { "epoch": 0.75, "learning_rate": 1.5486765365634816e-05, "loss": 0.5196, "step": 4580 }, { "epoch": 0.75, "learning_rate": 1.547554957379991e-05, "loss": 0.5093, "step": 4590 }, { "epoch": 0.75, "learning_rate": 1.546433378196501e-05, "loss": 0.508, "step": 4600 }, { "epoch": 0.75, "learning_rate": 1.5453117990130103e-05, "loss": 0.5118, "step": 4610 }, { "epoch": 0.75, "learning_rate": 1.54419021982952e-05, "loss": 0.5169, "step": 4620 }, { "epoch": 0.76, "learning_rate": 1.54306864064603e-05, "loss": 0.5196, "step": 4630 }, { "epoch": 0.76, "learning_rate": 1.5419470614625394e-05, "loss": 0.5169, "step": 4640 }, { "epoch": 0.76, "learning_rate": 1.540825482279049e-05, "loss": 0.5043, "step": 4650 }, { "epoch": 0.76, "learning_rate": 1.5397039030955587e-05, "loss": 0.5099, "step": 4660 }, { "epoch": 0.76, "learning_rate": 1.5385823239120685e-05, "loss": 0.5285, "step": 4670 }, { "epoch": 0.76, "learning_rate": 1.537460744728578e-05, "loss": 0.5192, "step": 4680 }, { "epoch": 0.77, "learning_rate": 1.5363391655450877e-05, "loss": 0.521, "step": 4690 }, { "epoch": 0.77, "learning_rate": 1.5352175863615972e-05, "loss": 0.5209, "step": 4700 }, { "epoch": 0.77, "learning_rate": 1.5340960071781067e-05, "loss": 0.5177, "step": 4710 }, { "epoch": 0.77, "learning_rate": 1.5329744279946165e-05, "loss": 0.504, "step": 4720 }, { "epoch": 0.77, "learning_rate": 1.5318528488111263e-05, "loss": 0.5031, "step": 4730 }, { "epoch": 0.77, "learning_rate": 1.5307312696276357e-05, "loss": 0.5065, "step": 4740 }, { "epoch": 0.78, "learning_rate": 1.5296096904441456e-05, "loss": 0.5102, "step": 4750 }, { "epoch": 0.78, "learning_rate": 1.528488111260655e-05, "loss": 0.5086, "step": 4760 }, { "epoch": 0.78, "learning_rate": 1.5273665320771648e-05, "loss": 0.5045, "step": 4770 }, { "epoch": 0.78, "learning_rate": 1.5262449528936746e-05, "loss": 0.503, "step": 4780 }, { "epoch": 0.78, "learning_rate": 1.5251233737101841e-05, "loss": 0.5178, "step": 4790 }, { "epoch": 0.78, "learning_rate": 1.5240017945266936e-05, "loss": 0.5174, "step": 4800 }, { "epoch": 0.78, "learning_rate": 1.5228802153432034e-05, "loss": 0.5183, "step": 4810 }, { "epoch": 0.79, "learning_rate": 1.521758636159713e-05, "loss": 0.5003, "step": 4820 }, { "epoch": 0.79, "learning_rate": 1.5206370569762225e-05, "loss": 0.5163, "step": 4830 }, { "epoch": 0.79, "learning_rate": 1.5195154777927323e-05, "loss": 0.5069, "step": 4840 }, { "epoch": 0.79, "learning_rate": 1.5183938986092419e-05, "loss": 0.5132, "step": 4850 }, { "epoch": 0.79, "learning_rate": 1.5172723194257515e-05, "loss": 0.5199, "step": 4860 }, { "epoch": 0.79, "learning_rate": 1.5161507402422614e-05, "loss": 0.5211, "step": 4870 }, { "epoch": 0.8, "learning_rate": 1.5150291610587708e-05, "loss": 0.5, "step": 4880 }, { "epoch": 0.8, "learning_rate": 1.5139075818752805e-05, "loss": 0.5073, "step": 4890 }, { "epoch": 0.8, "learning_rate": 1.5127860026917903e-05, "loss": 0.5107, "step": 4900 }, { "epoch": 0.8, "learning_rate": 1.5116644235082997e-05, "loss": 0.5222, "step": 4910 }, { "epoch": 0.8, "learning_rate": 1.5105428443248094e-05, "loss": 0.5136, "step": 4920 }, { "epoch": 0.8, "learning_rate": 1.5094212651413192e-05, "loss": 0.5058, "step": 4930 }, { "epoch": 0.81, "learning_rate": 1.5082996859578288e-05, "loss": 0.514, "step": 4940 }, { "epoch": 0.81, "learning_rate": 1.5071781067743383e-05, "loss": 0.5073, "step": 4950 }, { "epoch": 0.81, "learning_rate": 1.506056527590848e-05, "loss": 0.5185, "step": 4960 }, { "epoch": 0.81, "learning_rate": 1.5049349484073577e-05, "loss": 0.5202, "step": 4970 }, { "epoch": 0.81, "learning_rate": 1.5038133692238672e-05, "loss": 0.5137, "step": 4980 }, { "epoch": 0.81, "learning_rate": 1.502691790040377e-05, "loss": 0.5035, "step": 4990 }, { "epoch": 0.82, "learning_rate": 1.5015702108568866e-05, "loss": 0.5166, "step": 5000 }, { "epoch": 0.82, "learning_rate": 1.500448631673396e-05, "loss": 0.5113, "step": 5010 }, { "epoch": 0.82, "learning_rate": 1.4993270524899059e-05, "loss": 0.5061, "step": 5020 }, { "epoch": 0.82, "learning_rate": 1.4982054733064155e-05, "loss": 0.4983, "step": 5030 }, { "epoch": 0.82, "learning_rate": 1.4970838941229252e-05, "loss": 0.5184, "step": 5040 }, { "epoch": 0.82, "learning_rate": 1.495962314939435e-05, "loss": 0.5146, "step": 5050 }, { "epoch": 0.83, "learning_rate": 1.4948407357559444e-05, "loss": 0.5032, "step": 5060 }, { "epoch": 0.83, "learning_rate": 1.493719156572454e-05, "loss": 0.5022, "step": 5070 }, { "epoch": 0.83, "learning_rate": 1.4925975773889639e-05, "loss": 0.518, "step": 5080 }, { "epoch": 0.83, "learning_rate": 1.4914759982054733e-05, "loss": 0.5044, "step": 5090 }, { "epoch": 0.83, "learning_rate": 1.490354419021983e-05, "loss": 0.5065, "step": 5100 }, { "epoch": 0.83, "learning_rate": 1.4892328398384928e-05, "loss": 0.506, "step": 5110 }, { "epoch": 0.84, "learning_rate": 1.4881112606550024e-05, "loss": 0.5093, "step": 5120 }, { "epoch": 0.84, "learning_rate": 1.4869896814715119e-05, "loss": 0.5114, "step": 5130 }, { "epoch": 0.84, "learning_rate": 1.4858681022880217e-05, "loss": 0.5172, "step": 5140 }, { "epoch": 0.84, "learning_rate": 1.4847465231045313e-05, "loss": 0.5236, "step": 5150 }, { "epoch": 0.84, "learning_rate": 1.4836249439210408e-05, "loss": 0.5068, "step": 5160 }, { "epoch": 0.84, "learning_rate": 1.4825033647375506e-05, "loss": 0.5078, "step": 5170 }, { "epoch": 0.85, "learning_rate": 1.4813817855540602e-05, "loss": 0.5083, "step": 5180 }, { "epoch": 0.85, "learning_rate": 1.4802602063705697e-05, "loss": 0.51, "step": 5190 }, { "epoch": 0.85, "learning_rate": 1.4791386271870795e-05, "loss": 0.5009, "step": 5200 }, { "epoch": 0.85, "learning_rate": 1.4780170480035891e-05, "loss": 0.5067, "step": 5210 }, { "epoch": 0.85, "learning_rate": 1.4768954688200988e-05, "loss": 0.5092, "step": 5220 }, { "epoch": 0.85, "learning_rate": 1.4757738896366086e-05, "loss": 0.4984, "step": 5230 }, { "epoch": 0.86, "learning_rate": 1.474652310453118e-05, "loss": 0.5114, "step": 5240 }, { "epoch": 0.86, "learning_rate": 1.4735307312696277e-05, "loss": 0.5106, "step": 5250 }, { "epoch": 0.86, "learning_rate": 1.4724091520861375e-05, "loss": 0.5154, "step": 5260 }, { "epoch": 0.86, "learning_rate": 1.471287572902647e-05, "loss": 0.5065, "step": 5270 }, { "epoch": 0.86, "learning_rate": 1.4701659937191568e-05, "loss": 0.5049, "step": 5280 }, { "epoch": 0.86, "learning_rate": 1.4690444145356664e-05, "loss": 0.5126, "step": 5290 }, { "epoch": 0.86, "learning_rate": 1.467922835352176e-05, "loss": 0.5086, "step": 5300 }, { "epoch": 0.87, "learning_rate": 1.4668012561686857e-05, "loss": 0.5081, "step": 5310 }, { "epoch": 0.87, "learning_rate": 1.4656796769851953e-05, "loss": 0.5125, "step": 5320 }, { "epoch": 0.87, "learning_rate": 1.464558097801705e-05, "loss": 0.5097, "step": 5330 }, { "epoch": 0.87, "learning_rate": 1.4634365186182147e-05, "loss": 0.498, "step": 5340 }, { "epoch": 0.87, "learning_rate": 1.4623149394347242e-05, "loss": 0.5011, "step": 5350 }, { "epoch": 0.87, "learning_rate": 1.4611933602512338e-05, "loss": 0.519, "step": 5360 }, { "epoch": 0.88, "learning_rate": 1.4600717810677436e-05, "loss": 0.4988, "step": 5370 }, { "epoch": 0.88, "learning_rate": 1.4589502018842531e-05, "loss": 0.5005, "step": 5380 }, { "epoch": 0.88, "learning_rate": 1.4578286227007627e-05, "loss": 0.5051, "step": 5390 }, { "epoch": 0.88, "learning_rate": 1.4567070435172726e-05, "loss": 0.495, "step": 5400 }, { "epoch": 0.88, "learning_rate": 1.4555854643337822e-05, "loss": 0.5107, "step": 5410 }, { "epoch": 0.88, "learning_rate": 1.4544638851502917e-05, "loss": 0.5104, "step": 5420 }, { "epoch": 0.89, "learning_rate": 1.4533423059668015e-05, "loss": 0.5104, "step": 5430 }, { "epoch": 0.89, "learning_rate": 1.4522207267833111e-05, "loss": 0.5118, "step": 5440 }, { "epoch": 0.89, "learning_rate": 1.4510991475998206e-05, "loss": 0.5085, "step": 5450 }, { "epoch": 0.89, "learning_rate": 1.4499775684163304e-05, "loss": 0.4994, "step": 5460 }, { "epoch": 0.89, "learning_rate": 1.44885598923284e-05, "loss": 0.4985, "step": 5470 }, { "epoch": 0.89, "learning_rate": 1.4477344100493496e-05, "loss": 0.4968, "step": 5480 }, { "epoch": 0.9, "learning_rate": 1.4466128308658593e-05, "loss": 0.5028, "step": 5490 }, { "epoch": 0.9, "learning_rate": 1.4454912516823689e-05, "loss": 0.5092, "step": 5500 }, { "epoch": 0.9, "learning_rate": 1.4443696724988785e-05, "loss": 0.5083, "step": 5510 }, { "epoch": 0.9, "learning_rate": 1.4432480933153884e-05, "loss": 0.4946, "step": 5520 }, { "epoch": 0.9, "learning_rate": 1.4421265141318978e-05, "loss": 0.5098, "step": 5530 }, { "epoch": 0.9, "learning_rate": 1.4410049349484075e-05, "loss": 0.5119, "step": 5540 }, { "epoch": 0.91, "learning_rate": 1.4398833557649173e-05, "loss": 0.499, "step": 5550 }, { "epoch": 0.91, "learning_rate": 1.4387617765814267e-05, "loss": 0.5077, "step": 5560 }, { "epoch": 0.91, "learning_rate": 1.4376401973979364e-05, "loss": 0.5167, "step": 5570 }, { "epoch": 0.91, "learning_rate": 1.4365186182144462e-05, "loss": 0.5103, "step": 5580 }, { "epoch": 0.91, "learning_rate": 1.4353970390309558e-05, "loss": 0.5073, "step": 5590 }, { "epoch": 0.91, "learning_rate": 1.4342754598474653e-05, "loss": 0.5183, "step": 5600 }, { "epoch": 0.92, "learning_rate": 1.433153880663975e-05, "loss": 0.4945, "step": 5610 }, { "epoch": 0.92, "learning_rate": 1.4320323014804847e-05, "loss": 0.5049, "step": 5620 }, { "epoch": 0.92, "learning_rate": 1.4309107222969942e-05, "loss": 0.4994, "step": 5630 }, { "epoch": 0.92, "learning_rate": 1.429789143113504e-05, "loss": 0.4904, "step": 5640 }, { "epoch": 0.92, "learning_rate": 1.4286675639300136e-05, "loss": 0.5215, "step": 5650 }, { "epoch": 0.92, "learning_rate": 1.4275459847465233e-05, "loss": 0.5026, "step": 5660 }, { "epoch": 0.93, "learning_rate": 1.4264244055630329e-05, "loss": 0.5094, "step": 5670 }, { "epoch": 0.93, "learning_rate": 1.4253028263795425e-05, "loss": 0.5058, "step": 5680 }, { "epoch": 0.93, "learning_rate": 1.4241812471960522e-05, "loss": 0.5149, "step": 5690 }, { "epoch": 0.93, "learning_rate": 1.423059668012562e-05, "loss": 0.5156, "step": 5700 }, { "epoch": 0.93, "learning_rate": 1.4219380888290714e-05, "loss": 0.5042, "step": 5710 }, { "epoch": 0.93, "learning_rate": 1.420816509645581e-05, "loss": 0.4958, "step": 5720 }, { "epoch": 0.94, "learning_rate": 1.4196949304620909e-05, "loss": 0.5071, "step": 5730 }, { "epoch": 0.94, "learning_rate": 1.4185733512786003e-05, "loss": 0.4999, "step": 5740 }, { "epoch": 0.94, "learning_rate": 1.41745177209511e-05, "loss": 0.5089, "step": 5750 }, { "epoch": 0.94, "learning_rate": 1.4163301929116198e-05, "loss": 0.4997, "step": 5760 }, { "epoch": 0.94, "learning_rate": 1.4152086137281294e-05, "loss": 0.5034, "step": 5770 }, { "epoch": 0.94, "learning_rate": 1.4140870345446389e-05, "loss": 0.4994, "step": 5780 }, { "epoch": 0.94, "learning_rate": 1.4129654553611487e-05, "loss": 0.5106, "step": 5790 }, { "epoch": 0.95, "learning_rate": 1.4118438761776583e-05, "loss": 0.5032, "step": 5800 }, { "epoch": 0.95, "learning_rate": 1.4107222969941678e-05, "loss": 0.5118, "step": 5810 }, { "epoch": 0.95, "learning_rate": 1.4096007178106776e-05, "loss": 0.5173, "step": 5820 }, { "epoch": 0.95, "learning_rate": 1.4084791386271872e-05, "loss": 0.5107, "step": 5830 }, { "epoch": 0.95, "learning_rate": 1.4073575594436969e-05, "loss": 0.5117, "step": 5840 }, { "epoch": 0.95, "learning_rate": 1.4062359802602065e-05, "loss": 0.5038, "step": 5850 }, { "epoch": 0.96, "learning_rate": 1.4051144010767161e-05, "loss": 0.5058, "step": 5860 }, { "epoch": 0.96, "learning_rate": 1.4039928218932258e-05, "loss": 0.5131, "step": 5870 }, { "epoch": 0.96, "learning_rate": 1.4028712427097356e-05, "loss": 0.492, "step": 5880 }, { "epoch": 0.96, "learning_rate": 1.401749663526245e-05, "loss": 0.5011, "step": 5890 }, { "epoch": 0.96, "learning_rate": 1.4006280843427547e-05, "loss": 0.5045, "step": 5900 }, { "epoch": 0.96, "learning_rate": 1.3995065051592645e-05, "loss": 0.5069, "step": 5910 }, { "epoch": 0.97, "learning_rate": 1.398384925975774e-05, "loss": 0.4989, "step": 5920 }, { "epoch": 0.97, "learning_rate": 1.3972633467922836e-05, "loss": 0.4968, "step": 5930 }, { "epoch": 0.97, "learning_rate": 1.3961417676087934e-05, "loss": 0.4994, "step": 5940 }, { "epoch": 0.97, "learning_rate": 1.395020188425303e-05, "loss": 0.4964, "step": 5950 }, { "epoch": 0.97, "learning_rate": 1.3938986092418125e-05, "loss": 0.5107, "step": 5960 }, { "epoch": 0.97, "learning_rate": 1.3927770300583223e-05, "loss": 0.503, "step": 5970 }, { "epoch": 0.98, "learning_rate": 1.391655450874832e-05, "loss": 0.5122, "step": 5980 }, { "epoch": 0.98, "learning_rate": 1.3905338716913414e-05, "loss": 0.4971, "step": 5990 }, { "epoch": 0.98, "learning_rate": 1.3894122925078512e-05, "loss": 0.5108, "step": 6000 }, { "epoch": 0.98, "learning_rate": 1.3882907133243608e-05, "loss": 0.4972, "step": 6010 }, { "epoch": 0.98, "learning_rate": 1.3871691341408705e-05, "loss": 0.5065, "step": 6020 }, { "epoch": 0.98, "learning_rate": 1.3860475549573801e-05, "loss": 0.5199, "step": 6030 }, { "epoch": 0.99, "learning_rate": 1.3849259757738897e-05, "loss": 0.498, "step": 6040 }, { "epoch": 0.99, "learning_rate": 1.3838043965903994e-05, "loss": 0.5128, "step": 6050 }, { "epoch": 0.99, "learning_rate": 1.3826828174069092e-05, "loss": 0.4961, "step": 6060 }, { "epoch": 0.99, "learning_rate": 1.3815612382234187e-05, "loss": 0.4963, "step": 6070 }, { "epoch": 0.99, "learning_rate": 1.3804396590399283e-05, "loss": 0.5079, "step": 6080 }, { "epoch": 0.99, "learning_rate": 1.3793180798564381e-05, "loss": 0.5068, "step": 6090 }, { "epoch": 1.0, "learning_rate": 1.3781965006729476e-05, "loss": 0.4963, "step": 6100 }, { "epoch": 1.0, "learning_rate": 1.3770749214894572e-05, "loss": 0.5092, "step": 6110 }, { "epoch": 1.0, "learning_rate": 1.375953342305967e-05, "loss": 0.5008, "step": 6120 }, { "epoch": 1.0, "learning_rate": 1.3748317631224766e-05, "loss": 0.5106, "step": 6130 }, { "epoch": 1.0, "learning_rate": 1.3737101839389861e-05, "loss": 0.4955, "step": 6140 }, { "epoch": 1.0, "learning_rate": 1.3725886047554959e-05, "loss": 0.4905, "step": 6150 }, { "epoch": 1.01, "learning_rate": 1.3714670255720055e-05, "loss": 0.4966, "step": 6160 }, { "epoch": 1.01, "learning_rate": 1.370345446388515e-05, "loss": 0.4885, "step": 6170 }, { "epoch": 1.01, "learning_rate": 1.3692238672050248e-05, "loss": 0.5109, "step": 6180 }, { "epoch": 1.01, "learning_rate": 1.3681022880215345e-05, "loss": 0.5029, "step": 6190 }, { "epoch": 1.01, "learning_rate": 1.3669807088380441e-05, "loss": 0.4944, "step": 6200 }, { "epoch": 1.01, "learning_rate": 1.3658591296545537e-05, "loss": 0.494, "step": 6210 }, { "epoch": 1.02, "learning_rate": 1.3647375504710634e-05, "loss": 0.5075, "step": 6220 }, { "epoch": 1.02, "learning_rate": 1.363615971287573e-05, "loss": 0.5079, "step": 6230 }, { "epoch": 1.02, "learning_rate": 1.3624943921040828e-05, "loss": 0.495, "step": 6240 }, { "epoch": 1.02, "learning_rate": 1.3613728129205923e-05, "loss": 0.506, "step": 6250 }, { "epoch": 1.02, "learning_rate": 1.3602512337371019e-05, "loss": 0.4918, "step": 6260 }, { "epoch": 1.02, "learning_rate": 1.3591296545536117e-05, "loss": 0.4981, "step": 6270 }, { "epoch": 1.02, "learning_rate": 1.3580080753701212e-05, "loss": 0.4978, "step": 6280 }, { "epoch": 1.03, "learning_rate": 1.3568864961866308e-05, "loss": 0.4942, "step": 6290 }, { "epoch": 1.03, "learning_rate": 1.3557649170031406e-05, "loss": 0.5062, "step": 6300 }, { "epoch": 1.03, "learning_rate": 1.3546433378196503e-05, "loss": 0.5008, "step": 6310 }, { "epoch": 1.03, "learning_rate": 1.3535217586361597e-05, "loss": 0.5065, "step": 6320 }, { "epoch": 1.03, "learning_rate": 1.3524001794526695e-05, "loss": 0.5129, "step": 6330 }, { "epoch": 1.03, "learning_rate": 1.3512786002691792e-05, "loss": 0.5002, "step": 6340 }, { "epoch": 1.04, "learning_rate": 1.3501570210856886e-05, "loss": 0.5028, "step": 6350 }, { "epoch": 1.04, "learning_rate": 1.3490354419021984e-05, "loss": 0.4931, "step": 6360 }, { "epoch": 1.04, "learning_rate": 1.347913862718708e-05, "loss": 0.4971, "step": 6370 }, { "epoch": 1.04, "learning_rate": 1.3467922835352177e-05, "loss": 0.4965, "step": 6380 }, { "epoch": 1.04, "learning_rate": 1.3456707043517273e-05, "loss": 0.5136, "step": 6390 }, { "epoch": 1.04, "learning_rate": 1.344549125168237e-05, "loss": 0.4982, "step": 6400 }, { "epoch": 1.05, "learning_rate": 1.3434275459847466e-05, "loss": 0.4885, "step": 6410 }, { "epoch": 1.05, "learning_rate": 1.3423059668012564e-05, "loss": 0.4949, "step": 6420 }, { "epoch": 1.05, "learning_rate": 1.3411843876177659e-05, "loss": 0.5015, "step": 6430 }, { "epoch": 1.05, "learning_rate": 1.3400628084342755e-05, "loss": 0.4977, "step": 6440 }, { "epoch": 1.05, "learning_rate": 1.3389412292507853e-05, "loss": 0.5005, "step": 6450 }, { "epoch": 1.05, "learning_rate": 1.3378196500672948e-05, "loss": 0.4907, "step": 6460 }, { "epoch": 1.06, "learning_rate": 1.3366980708838044e-05, "loss": 0.509, "step": 6470 }, { "epoch": 1.06, "learning_rate": 1.3355764917003142e-05, "loss": 0.4904, "step": 6480 }, { "epoch": 1.06, "learning_rate": 1.3344549125168239e-05, "loss": 0.488, "step": 6490 }, { "epoch": 1.06, "learning_rate": 1.3333333333333333e-05, "loss": 0.4863, "step": 6500 }, { "epoch": 1.06, "learning_rate": 1.3322117541498431e-05, "loss": 0.5017, "step": 6510 }, { "epoch": 1.06, "learning_rate": 1.3310901749663528e-05, "loss": 0.5029, "step": 6520 }, { "epoch": 1.07, "learning_rate": 1.3299685957828622e-05, "loss": 0.507, "step": 6530 }, { "epoch": 1.07, "learning_rate": 1.328847016599372e-05, "loss": 0.4966, "step": 6540 }, { "epoch": 1.07, "learning_rate": 1.3277254374158817e-05, "loss": 0.5035, "step": 6550 }, { "epoch": 1.07, "learning_rate": 1.3266038582323913e-05, "loss": 0.4877, "step": 6560 }, { "epoch": 1.07, "learning_rate": 1.325482279048901e-05, "loss": 0.4896, "step": 6570 }, { "epoch": 1.07, "learning_rate": 1.3243606998654106e-05, "loss": 0.5011, "step": 6580 }, { "epoch": 1.08, "learning_rate": 1.3232391206819202e-05, "loss": 0.5059, "step": 6590 }, { "epoch": 1.08, "learning_rate": 1.32211754149843e-05, "loss": 0.5017, "step": 6600 }, { "epoch": 1.08, "learning_rate": 1.3209959623149395e-05, "loss": 0.4982, "step": 6610 }, { "epoch": 1.08, "learning_rate": 1.3198743831314491e-05, "loss": 0.5009, "step": 6620 }, { "epoch": 1.08, "learning_rate": 1.318752803947959e-05, "loss": 0.4909, "step": 6630 }, { "epoch": 1.08, "learning_rate": 1.3176312247644684e-05, "loss": 0.5027, "step": 6640 }, { "epoch": 1.09, "learning_rate": 1.316509645580978e-05, "loss": 0.4997, "step": 6650 }, { "epoch": 1.09, "learning_rate": 1.3153880663974878e-05, "loss": 0.5015, "step": 6660 }, { "epoch": 1.09, "learning_rate": 1.3142664872139975e-05, "loss": 0.4974, "step": 6670 }, { "epoch": 1.09, "learning_rate": 1.313144908030507e-05, "loss": 0.4913, "step": 6680 }, { "epoch": 1.09, "learning_rate": 1.3120233288470167e-05, "loss": 0.4968, "step": 6690 }, { "epoch": 1.09, "learning_rate": 1.3109017496635264e-05, "loss": 0.4932, "step": 6700 }, { "epoch": 1.09, "learning_rate": 1.3097801704800358e-05, "loss": 0.5002, "step": 6710 }, { "epoch": 1.1, "learning_rate": 1.3086585912965457e-05, "loss": 0.5007, "step": 6720 }, { "epoch": 1.1, "learning_rate": 1.3075370121130553e-05, "loss": 0.5003, "step": 6730 }, { "epoch": 1.1, "learning_rate": 1.306415432929565e-05, "loss": 0.4896, "step": 6740 }, { "epoch": 1.1, "learning_rate": 1.3052938537460747e-05, "loss": 0.5006, "step": 6750 }, { "epoch": 1.1, "learning_rate": 1.3041722745625842e-05, "loss": 0.4972, "step": 6760 }, { "epoch": 1.1, "learning_rate": 1.3030506953790938e-05, "loss": 0.4971, "step": 6770 }, { "epoch": 1.11, "learning_rate": 1.3019291161956036e-05, "loss": 0.4925, "step": 6780 }, { "epoch": 1.11, "learning_rate": 1.3008075370121131e-05, "loss": 0.5005, "step": 6790 }, { "epoch": 1.11, "learning_rate": 1.2996859578286227e-05, "loss": 0.4981, "step": 6800 }, { "epoch": 1.11, "learning_rate": 1.2985643786451325e-05, "loss": 0.5063, "step": 6810 }, { "epoch": 1.11, "learning_rate": 1.297442799461642e-05, "loss": 0.4991, "step": 6820 }, { "epoch": 1.11, "learning_rate": 1.2963212202781516e-05, "loss": 0.4969, "step": 6830 }, { "epoch": 1.12, "learning_rate": 1.2951996410946615e-05, "loss": 0.491, "step": 6840 }, { "epoch": 1.12, "learning_rate": 1.2940780619111711e-05, "loss": 0.5001, "step": 6850 }, { "epoch": 1.12, "learning_rate": 1.2929564827276806e-05, "loss": 0.501, "step": 6860 }, { "epoch": 1.12, "learning_rate": 1.2918349035441904e-05, "loss": 0.5133, "step": 6870 }, { "epoch": 1.12, "learning_rate": 1.2907133243607e-05, "loss": 0.4921, "step": 6880 }, { "epoch": 1.12, "learning_rate": 1.2895917451772095e-05, "loss": 0.5047, "step": 6890 }, { "epoch": 1.13, "learning_rate": 1.2884701659937193e-05, "loss": 0.4917, "step": 6900 }, { "epoch": 1.13, "learning_rate": 1.2873485868102289e-05, "loss": 0.5017, "step": 6910 }, { "epoch": 1.13, "learning_rate": 1.2862270076267385e-05, "loss": 0.5029, "step": 6920 }, { "epoch": 1.13, "learning_rate": 1.2851054284432483e-05, "loss": 0.4866, "step": 6930 }, { "epoch": 1.13, "learning_rate": 1.2839838492597578e-05, "loss": 0.4978, "step": 6940 }, { "epoch": 1.13, "learning_rate": 1.2828622700762674e-05, "loss": 0.4744, "step": 6950 }, { "epoch": 1.14, "learning_rate": 1.2817406908927773e-05, "loss": 0.486, "step": 6960 }, { "epoch": 1.14, "learning_rate": 1.2806191117092867e-05, "loss": 0.4956, "step": 6970 }, { "epoch": 1.14, "learning_rate": 1.2794975325257964e-05, "loss": 0.4935, "step": 6980 }, { "epoch": 1.14, "learning_rate": 1.2783759533423062e-05, "loss": 0.5023, "step": 6990 }, { "epoch": 1.14, "learning_rate": 1.2772543741588156e-05, "loss": 0.4937, "step": 7000 }, { "epoch": 1.14, "learning_rate": 1.2761327949753253e-05, "loss": 0.4873, "step": 7010 }, { "epoch": 1.15, "learning_rate": 1.275011215791835e-05, "loss": 0.5001, "step": 7020 }, { "epoch": 1.15, "learning_rate": 1.2738896366083447e-05, "loss": 0.4909, "step": 7030 }, { "epoch": 1.15, "learning_rate": 1.2727680574248542e-05, "loss": 0.49, "step": 7040 }, { "epoch": 1.15, "learning_rate": 1.271646478241364e-05, "loss": 0.501, "step": 7050 }, { "epoch": 1.15, "learning_rate": 1.2705248990578736e-05, "loss": 0.4924, "step": 7060 }, { "epoch": 1.15, "learning_rate": 1.269403319874383e-05, "loss": 0.4927, "step": 7070 }, { "epoch": 1.16, "learning_rate": 1.2682817406908929e-05, "loss": 0.5003, "step": 7080 }, { "epoch": 1.16, "learning_rate": 1.2671601615074025e-05, "loss": 0.4894, "step": 7090 }, { "epoch": 1.16, "learning_rate": 1.2660385823239122e-05, "loss": 0.4939, "step": 7100 }, { "epoch": 1.16, "learning_rate": 1.264917003140422e-05, "loss": 0.4937, "step": 7110 }, { "epoch": 1.16, "learning_rate": 1.2637954239569314e-05, "loss": 0.4839, "step": 7120 }, { "epoch": 1.16, "learning_rate": 1.262673844773441e-05, "loss": 0.5071, "step": 7130 }, { "epoch": 1.17, "learning_rate": 1.2615522655899509e-05, "loss": 0.4893, "step": 7140 }, { "epoch": 1.17, "learning_rate": 1.2604306864064603e-05, "loss": 0.4954, "step": 7150 }, { "epoch": 1.17, "learning_rate": 1.25930910722297e-05, "loss": 0.4876, "step": 7160 }, { "epoch": 1.17, "learning_rate": 1.2581875280394798e-05, "loss": 0.5056, "step": 7170 }, { "epoch": 1.17, "learning_rate": 1.2570659488559892e-05, "loss": 0.4937, "step": 7180 }, { "epoch": 1.17, "learning_rate": 1.2559443696724989e-05, "loss": 0.4979, "step": 7190 }, { "epoch": 1.17, "learning_rate": 1.2548227904890087e-05, "loss": 0.4967, "step": 7200 }, { "epoch": 1.18, "learning_rate": 1.2537012113055183e-05, "loss": 0.4812, "step": 7210 }, { "epoch": 1.18, "learning_rate": 1.2525796321220278e-05, "loss": 0.4944, "step": 7220 }, { "epoch": 1.18, "learning_rate": 1.2514580529385376e-05, "loss": 0.4977, "step": 7230 }, { "epoch": 1.18, "learning_rate": 1.2503364737550472e-05, "loss": 0.4907, "step": 7240 }, { "epoch": 1.18, "learning_rate": 1.2492148945715567e-05, "loss": 0.4902, "step": 7250 }, { "epoch": 1.18, "learning_rate": 1.2480933153880665e-05, "loss": 0.4908, "step": 7260 }, { "epoch": 1.19, "learning_rate": 1.2469717362045761e-05, "loss": 0.5047, "step": 7270 }, { "epoch": 1.19, "learning_rate": 1.2458501570210858e-05, "loss": 0.4909, "step": 7280 }, { "epoch": 1.19, "learning_rate": 1.2447285778375956e-05, "loss": 0.4877, "step": 7290 }, { "epoch": 1.19, "learning_rate": 1.243606998654105e-05, "loss": 0.4972, "step": 7300 }, { "epoch": 1.19, "learning_rate": 1.2424854194706147e-05, "loss": 0.4952, "step": 7310 }, { "epoch": 1.19, "learning_rate": 1.2413638402871245e-05, "loss": 0.4881, "step": 7320 }, { "epoch": 1.2, "learning_rate": 1.240242261103634e-05, "loss": 0.4944, "step": 7330 }, { "epoch": 1.2, "learning_rate": 1.2391206819201436e-05, "loss": 0.4959, "step": 7340 }, { "epoch": 1.2, "learning_rate": 1.2379991027366534e-05, "loss": 0.4963, "step": 7350 }, { "epoch": 1.2, "learning_rate": 1.2368775235531628e-05, "loss": 0.5035, "step": 7360 }, { "epoch": 1.2, "learning_rate": 1.2357559443696725e-05, "loss": 0.5018, "step": 7370 }, { "epoch": 1.2, "learning_rate": 1.2346343651861823e-05, "loss": 0.4973, "step": 7380 }, { "epoch": 1.21, "learning_rate": 1.233512786002692e-05, "loss": 0.4998, "step": 7390 }, { "epoch": 1.21, "learning_rate": 1.2323912068192014e-05, "loss": 0.5015, "step": 7400 }, { "epoch": 1.21, "learning_rate": 1.2312696276357112e-05, "loss": 0.4906, "step": 7410 }, { "epoch": 1.21, "learning_rate": 1.2301480484522208e-05, "loss": 0.4865, "step": 7420 }, { "epoch": 1.21, "learning_rate": 1.2290264692687303e-05, "loss": 0.4918, "step": 7430 }, { "epoch": 1.21, "learning_rate": 1.2279048900852401e-05, "loss": 0.4913, "step": 7440 }, { "epoch": 1.22, "learning_rate": 1.2267833109017497e-05, "loss": 0.4915, "step": 7450 }, { "epoch": 1.22, "learning_rate": 1.2256617317182594e-05, "loss": 0.5034, "step": 7460 }, { "epoch": 1.22, "learning_rate": 1.2245401525347692e-05, "loss": 0.478, "step": 7470 }, { "epoch": 1.22, "learning_rate": 1.2234185733512786e-05, "loss": 0.4911, "step": 7480 }, { "epoch": 1.22, "learning_rate": 1.2222969941677883e-05, "loss": 0.4918, "step": 7490 }, { "epoch": 1.22, "learning_rate": 1.2211754149842981e-05, "loss": 0.4844, "step": 7500 }, { "epoch": 1.23, "learning_rate": 1.2200538358008076e-05, "loss": 0.488, "step": 7510 }, { "epoch": 1.23, "learning_rate": 1.2189322566173172e-05, "loss": 0.4928, "step": 7520 }, { "epoch": 1.23, "learning_rate": 1.217810677433827e-05, "loss": 0.4913, "step": 7530 }, { "epoch": 1.23, "learning_rate": 1.2166890982503365e-05, "loss": 0.4841, "step": 7540 }, { "epoch": 1.23, "learning_rate": 1.2155675190668461e-05, "loss": 0.4758, "step": 7550 }, { "epoch": 1.23, "learning_rate": 1.2144459398833559e-05, "loss": 0.4977, "step": 7560 }, { "epoch": 1.24, "learning_rate": 1.2133243606998655e-05, "loss": 0.4991, "step": 7570 }, { "epoch": 1.24, "learning_rate": 1.212202781516375e-05, "loss": 0.5023, "step": 7580 }, { "epoch": 1.24, "learning_rate": 1.2110812023328848e-05, "loss": 0.4849, "step": 7590 }, { "epoch": 1.24, "learning_rate": 1.2099596231493944e-05, "loss": 0.4867, "step": 7600 }, { "epoch": 1.24, "learning_rate": 1.2088380439659039e-05, "loss": 0.4962, "step": 7610 }, { "epoch": 1.24, "learning_rate": 1.2077164647824137e-05, "loss": 0.4983, "step": 7620 }, { "epoch": 1.25, "learning_rate": 1.2065948855989234e-05, "loss": 0.4894, "step": 7630 }, { "epoch": 1.25, "learning_rate": 1.205473306415433e-05, "loss": 0.4896, "step": 7640 }, { "epoch": 1.25, "learning_rate": 1.2043517272319428e-05, "loss": 0.4961, "step": 7650 }, { "epoch": 1.25, "learning_rate": 1.2032301480484523e-05, "loss": 0.4857, "step": 7660 }, { "epoch": 1.25, "learning_rate": 1.2021085688649619e-05, "loss": 0.491, "step": 7670 }, { "epoch": 1.25, "learning_rate": 1.2009869896814717e-05, "loss": 0.4829, "step": 7680 }, { "epoch": 1.25, "learning_rate": 1.1998654104979812e-05, "loss": 0.4778, "step": 7690 }, { "epoch": 1.26, "learning_rate": 1.1987438313144908e-05, "loss": 0.4795, "step": 7700 }, { "epoch": 1.26, "learning_rate": 1.1976222521310006e-05, "loss": 0.4923, "step": 7710 }, { "epoch": 1.26, "learning_rate": 1.19650067294751e-05, "loss": 0.4903, "step": 7720 }, { "epoch": 1.26, "learning_rate": 1.1953790937640197e-05, "loss": 0.5006, "step": 7730 }, { "epoch": 1.26, "learning_rate": 1.1942575145805295e-05, "loss": 0.4916, "step": 7740 }, { "epoch": 1.26, "learning_rate": 1.1931359353970392e-05, "loss": 0.496, "step": 7750 }, { "epoch": 1.27, "learning_rate": 1.192014356213549e-05, "loss": 0.4908, "step": 7760 }, { "epoch": 1.27, "learning_rate": 1.1908927770300584e-05, "loss": 0.4964, "step": 7770 }, { "epoch": 1.27, "learning_rate": 1.189771197846568e-05, "loss": 0.4954, "step": 7780 }, { "epoch": 1.27, "learning_rate": 1.1886496186630779e-05, "loss": 0.4817, "step": 7790 }, { "epoch": 1.27, "learning_rate": 1.1875280394795873e-05, "loss": 0.4917, "step": 7800 }, { "epoch": 1.27, "learning_rate": 1.186406460296097e-05, "loss": 0.4877, "step": 7810 }, { "epoch": 1.28, "learning_rate": 1.1852848811126068e-05, "loss": 0.488, "step": 7820 }, { "epoch": 1.28, "learning_rate": 1.1841633019291164e-05, "loss": 0.4982, "step": 7830 }, { "epoch": 1.28, "learning_rate": 1.1830417227456259e-05, "loss": 0.4874, "step": 7840 }, { "epoch": 1.28, "learning_rate": 1.1819201435621357e-05, "loss": 0.4925, "step": 7850 }, { "epoch": 1.28, "learning_rate": 1.1807985643786453e-05, "loss": 0.499, "step": 7860 }, { "epoch": 1.28, "learning_rate": 1.1796769851951548e-05, "loss": 0.4916, "step": 7870 }, { "epoch": 1.29, "learning_rate": 1.1785554060116646e-05, "loss": 0.4835, "step": 7880 }, { "epoch": 1.29, "learning_rate": 1.1774338268281742e-05, "loss": 0.4978, "step": 7890 }, { "epoch": 1.29, "learning_rate": 1.1763122476446837e-05, "loss": 0.4952, "step": 7900 }, { "epoch": 1.29, "learning_rate": 1.1751906684611935e-05, "loss": 0.4839, "step": 7910 }, { "epoch": 1.29, "learning_rate": 1.1740690892777031e-05, "loss": 0.4841, "step": 7920 }, { "epoch": 1.29, "learning_rate": 1.1729475100942128e-05, "loss": 0.4884, "step": 7930 }, { "epoch": 1.3, "learning_rate": 1.1718259309107226e-05, "loss": 0.4948, "step": 7940 }, { "epoch": 1.3, "learning_rate": 1.170704351727232e-05, "loss": 0.4888, "step": 7950 }, { "epoch": 1.3, "learning_rate": 1.1695827725437417e-05, "loss": 0.4922, "step": 7960 }, { "epoch": 1.3, "learning_rate": 1.1684611933602515e-05, "loss": 0.4939, "step": 7970 }, { "epoch": 1.3, "learning_rate": 1.167339614176761e-05, "loss": 0.4905, "step": 7980 }, { "epoch": 1.3, "learning_rate": 1.1662180349932706e-05, "loss": 0.4913, "step": 7990 }, { "epoch": 1.31, "learning_rate": 1.1650964558097804e-05, "loss": 0.4926, "step": 8000 }, { "epoch": 1.31, "learning_rate": 1.16397487662629e-05, "loss": 0.496, "step": 8010 }, { "epoch": 1.31, "learning_rate": 1.1628532974427995e-05, "loss": 0.4878, "step": 8020 }, { "epoch": 1.31, "learning_rate": 1.1617317182593093e-05, "loss": 0.4944, "step": 8030 }, { "epoch": 1.31, "learning_rate": 1.160610139075819e-05, "loss": 0.4954, "step": 8040 }, { "epoch": 1.31, "learning_rate": 1.1594885598923284e-05, "loss": 0.502, "step": 8050 }, { "epoch": 1.32, "learning_rate": 1.1583669807088382e-05, "loss": 0.4789, "step": 8060 }, { "epoch": 1.32, "learning_rate": 1.1572454015253478e-05, "loss": 0.4862, "step": 8070 }, { "epoch": 1.32, "learning_rate": 1.1561238223418573e-05, "loss": 0.5012, "step": 8080 }, { "epoch": 1.32, "learning_rate": 1.1550022431583671e-05, "loss": 0.4831, "step": 8090 }, { "epoch": 1.32, "learning_rate": 1.1538806639748767e-05, "loss": 0.4823, "step": 8100 }, { "epoch": 1.32, "learning_rate": 1.1527590847913864e-05, "loss": 0.4948, "step": 8110 }, { "epoch": 1.33, "learning_rate": 1.1516375056078962e-05, "loss": 0.4874, "step": 8120 }, { "epoch": 1.33, "learning_rate": 1.1505159264244056e-05, "loss": 0.4847, "step": 8130 }, { "epoch": 1.33, "learning_rate": 1.1493943472409153e-05, "loss": 0.4985, "step": 8140 }, { "epoch": 1.33, "learning_rate": 1.1482727680574251e-05, "loss": 0.4889, "step": 8150 }, { "epoch": 1.33, "learning_rate": 1.1471511888739346e-05, "loss": 0.493, "step": 8160 }, { "epoch": 1.33, "learning_rate": 1.1460296096904442e-05, "loss": 0.4945, "step": 8170 }, { "epoch": 1.33, "learning_rate": 1.144908030506954e-05, "loss": 0.4813, "step": 8180 }, { "epoch": 1.34, "learning_rate": 1.1437864513234636e-05, "loss": 0.487, "step": 8190 }, { "epoch": 1.34, "learning_rate": 1.1426648721399731e-05, "loss": 0.4875, "step": 8200 }, { "epoch": 1.34, "learning_rate": 1.1415432929564829e-05, "loss": 0.4893, "step": 8210 }, { "epoch": 1.34, "learning_rate": 1.1404217137729925e-05, "loss": 0.4984, "step": 8220 }, { "epoch": 1.34, "learning_rate": 1.139300134589502e-05, "loss": 0.5001, "step": 8230 }, { "epoch": 1.34, "learning_rate": 1.1381785554060118e-05, "loss": 0.4866, "step": 8240 }, { "epoch": 1.35, "learning_rate": 1.1370569762225214e-05, "loss": 0.4904, "step": 8250 }, { "epoch": 1.35, "learning_rate": 1.135935397039031e-05, "loss": 0.4808, "step": 8260 }, { "epoch": 1.35, "learning_rate": 1.1348138178555407e-05, "loss": 0.4902, "step": 8270 }, { "epoch": 1.35, "learning_rate": 1.1336922386720504e-05, "loss": 0.4877, "step": 8280 }, { "epoch": 1.35, "learning_rate": 1.13257065948856e-05, "loss": 0.4904, "step": 8290 }, { "epoch": 1.35, "learning_rate": 1.1314490803050698e-05, "loss": 0.4923, "step": 8300 }, { "epoch": 1.36, "learning_rate": 1.1303275011215793e-05, "loss": 0.4984, "step": 8310 }, { "epoch": 1.36, "learning_rate": 1.1292059219380889e-05, "loss": 0.487, "step": 8320 }, { "epoch": 1.36, "learning_rate": 1.1280843427545987e-05, "loss": 0.4958, "step": 8330 }, { "epoch": 1.36, "learning_rate": 1.1269627635711082e-05, "loss": 0.4884, "step": 8340 }, { "epoch": 1.36, "learning_rate": 1.1258411843876178e-05, "loss": 0.4885, "step": 8350 }, { "epoch": 1.36, "learning_rate": 1.1247196052041276e-05, "loss": 0.4871, "step": 8360 }, { "epoch": 1.37, "learning_rate": 1.1235980260206372e-05, "loss": 0.4961, "step": 8370 }, { "epoch": 1.37, "learning_rate": 1.1224764468371467e-05, "loss": 0.4842, "step": 8380 }, { "epoch": 1.37, "learning_rate": 1.1213548676536565e-05, "loss": 0.4891, "step": 8390 }, { "epoch": 1.37, "learning_rate": 1.1202332884701661e-05, "loss": 0.4836, "step": 8400 }, { "epoch": 1.37, "learning_rate": 1.1191117092866756e-05, "loss": 0.4893, "step": 8410 }, { "epoch": 1.37, "learning_rate": 1.1179901301031854e-05, "loss": 0.4785, "step": 8420 }, { "epoch": 1.38, "learning_rate": 1.116868550919695e-05, "loss": 0.4926, "step": 8430 }, { "epoch": 1.38, "learning_rate": 1.1157469717362047e-05, "loss": 0.4804, "step": 8440 }, { "epoch": 1.38, "learning_rate": 1.1146253925527143e-05, "loss": 0.4888, "step": 8450 }, { "epoch": 1.38, "learning_rate": 1.113503813369224e-05, "loss": 0.5088, "step": 8460 }, { "epoch": 1.38, "learning_rate": 1.1123822341857336e-05, "loss": 0.4904, "step": 8470 }, { "epoch": 1.38, "learning_rate": 1.1112606550022434e-05, "loss": 0.4913, "step": 8480 }, { "epoch": 1.39, "learning_rate": 1.1101390758187529e-05, "loss": 0.4901, "step": 8490 }, { "epoch": 1.39, "learning_rate": 1.1090174966352625e-05, "loss": 0.4887, "step": 8500 }, { "epoch": 1.39, "learning_rate": 1.1078959174517723e-05, "loss": 0.4816, "step": 8510 }, { "epoch": 1.39, "learning_rate": 1.1067743382682818e-05, "loss": 0.4989, "step": 8520 }, { "epoch": 1.39, "learning_rate": 1.1056527590847914e-05, "loss": 0.4807, "step": 8530 }, { "epoch": 1.39, "learning_rate": 1.1045311799013012e-05, "loss": 0.4929, "step": 8540 }, { "epoch": 1.4, "learning_rate": 1.1034096007178109e-05, "loss": 0.4752, "step": 8550 }, { "epoch": 1.4, "learning_rate": 1.1022880215343203e-05, "loss": 0.4822, "step": 8560 }, { "epoch": 1.4, "learning_rate": 1.1011664423508301e-05, "loss": 0.491, "step": 8570 }, { "epoch": 1.4, "learning_rate": 1.1000448631673398e-05, "loss": 0.4987, "step": 8580 }, { "epoch": 1.4, "learning_rate": 1.0989232839838492e-05, "loss": 0.4867, "step": 8590 }, { "epoch": 1.4, "learning_rate": 1.097801704800359e-05, "loss": 0.493, "step": 8600 }, { "epoch": 1.41, "learning_rate": 1.0966801256168687e-05, "loss": 0.5023, "step": 8610 }, { "epoch": 1.41, "learning_rate": 1.0955585464333783e-05, "loss": 0.4931, "step": 8620 }, { "epoch": 1.41, "learning_rate": 1.094436967249888e-05, "loss": 0.4875, "step": 8630 }, { "epoch": 1.41, "learning_rate": 1.0933153880663976e-05, "loss": 0.4818, "step": 8640 }, { "epoch": 1.41, "learning_rate": 1.0921938088829072e-05, "loss": 0.4828, "step": 8650 }, { "epoch": 1.41, "learning_rate": 1.091072229699417e-05, "loss": 0.4897, "step": 8660 }, { "epoch": 1.41, "learning_rate": 1.0899506505159265e-05, "loss": 0.48, "step": 8670 }, { "epoch": 1.42, "learning_rate": 1.0888290713324361e-05, "loss": 0.4865, "step": 8680 }, { "epoch": 1.42, "learning_rate": 1.087707492148946e-05, "loss": 0.4941, "step": 8690 }, { "epoch": 1.42, "learning_rate": 1.0865859129654554e-05, "loss": 0.4858, "step": 8700 }, { "epoch": 1.42, "learning_rate": 1.085464333781965e-05, "loss": 0.4864, "step": 8710 }, { "epoch": 1.42, "learning_rate": 1.0843427545984748e-05, "loss": 0.4864, "step": 8720 }, { "epoch": 1.42, "learning_rate": 1.0832211754149845e-05, "loss": 0.5011, "step": 8730 }, { "epoch": 1.43, "learning_rate": 1.082099596231494e-05, "loss": 0.4841, "step": 8740 }, { "epoch": 1.43, "learning_rate": 1.0809780170480037e-05, "loss": 0.4966, "step": 8750 }, { "epoch": 1.43, "learning_rate": 1.0798564378645134e-05, "loss": 0.5004, "step": 8760 }, { "epoch": 1.43, "learning_rate": 1.0787348586810228e-05, "loss": 0.4976, "step": 8770 }, { "epoch": 1.43, "learning_rate": 1.0776132794975326e-05, "loss": 0.481, "step": 8780 }, { "epoch": 1.43, "learning_rate": 1.0764917003140423e-05, "loss": 0.4866, "step": 8790 }, { "epoch": 1.44, "learning_rate": 1.075370121130552e-05, "loss": 0.496, "step": 8800 }, { "epoch": 1.44, "learning_rate": 1.0742485419470616e-05, "loss": 0.492, "step": 8810 }, { "epoch": 1.44, "learning_rate": 1.0731269627635712e-05, "loss": 0.4819, "step": 8820 }, { "epoch": 1.44, "learning_rate": 1.0720053835800808e-05, "loss": 0.492, "step": 8830 }, { "epoch": 1.44, "learning_rate": 1.0708838043965906e-05, "loss": 0.4818, "step": 8840 }, { "epoch": 1.44, "learning_rate": 1.0697622252131001e-05, "loss": 0.4845, "step": 8850 }, { "epoch": 1.45, "learning_rate": 1.0686406460296097e-05, "loss": 0.5005, "step": 8860 }, { "epoch": 1.45, "learning_rate": 1.0675190668461195e-05, "loss": 0.4761, "step": 8870 }, { "epoch": 1.45, "learning_rate": 1.066397487662629e-05, "loss": 0.4906, "step": 8880 }, { "epoch": 1.45, "learning_rate": 1.0652759084791386e-05, "loss": 0.4873, "step": 8890 }, { "epoch": 1.45, "learning_rate": 1.0641543292956484e-05, "loss": 0.4991, "step": 8900 }, { "epoch": 1.45, "learning_rate": 1.063032750112158e-05, "loss": 0.491, "step": 8910 }, { "epoch": 1.46, "learning_rate": 1.0619111709286675e-05, "loss": 0.4937, "step": 8920 }, { "epoch": 1.46, "learning_rate": 1.0607895917451774e-05, "loss": 0.482, "step": 8930 }, { "epoch": 1.46, "learning_rate": 1.059668012561687e-05, "loss": 0.4956, "step": 8940 }, { "epoch": 1.46, "learning_rate": 1.0585464333781965e-05, "loss": 0.4852, "step": 8950 }, { "epoch": 1.46, "learning_rate": 1.0574248541947063e-05, "loss": 0.4891, "step": 8960 }, { "epoch": 1.46, "learning_rate": 1.0563032750112159e-05, "loss": 0.4873, "step": 8970 }, { "epoch": 1.47, "learning_rate": 1.0551816958277255e-05, "loss": 0.4929, "step": 8980 }, { "epoch": 1.47, "learning_rate": 1.0540601166442352e-05, "loss": 0.4868, "step": 8990 }, { "epoch": 1.47, "learning_rate": 1.0529385374607448e-05, "loss": 0.4931, "step": 9000 }, { "epoch": 1.47, "learning_rate": 1.0518169582772544e-05, "loss": 0.4922, "step": 9010 }, { "epoch": 1.47, "learning_rate": 1.0506953790937642e-05, "loss": 0.4791, "step": 9020 }, { "epoch": 1.47, "learning_rate": 1.0495737999102737e-05, "loss": 0.4915, "step": 9030 }, { "epoch": 1.48, "learning_rate": 1.0484522207267833e-05, "loss": 0.4853, "step": 9040 }, { "epoch": 1.48, "learning_rate": 1.0473306415432931e-05, "loss": 0.4826, "step": 9050 }, { "epoch": 1.48, "learning_rate": 1.0462090623598026e-05, "loss": 0.4948, "step": 9060 }, { "epoch": 1.48, "learning_rate": 1.0450874831763123e-05, "loss": 0.4932, "step": 9070 }, { "epoch": 1.48, "learning_rate": 1.043965903992822e-05, "loss": 0.4855, "step": 9080 }, { "epoch": 1.48, "learning_rate": 1.0428443248093317e-05, "loss": 0.4912, "step": 9090 }, { "epoch": 1.48, "learning_rate": 1.0417227456258412e-05, "loss": 0.4895, "step": 9100 }, { "epoch": 1.49, "learning_rate": 1.040601166442351e-05, "loss": 0.4907, "step": 9110 }, { "epoch": 1.49, "learning_rate": 1.0394795872588606e-05, "loss": 0.486, "step": 9120 }, { "epoch": 1.49, "learning_rate": 1.03835800807537e-05, "loss": 0.4858, "step": 9130 }, { "epoch": 1.49, "learning_rate": 1.0372364288918799e-05, "loss": 0.4949, "step": 9140 }, { "epoch": 1.49, "learning_rate": 1.0361148497083895e-05, "loss": 0.4826, "step": 9150 }, { "epoch": 1.49, "learning_rate": 1.0349932705248991e-05, "loss": 0.4847, "step": 9160 }, { "epoch": 1.5, "learning_rate": 1.0338716913414088e-05, "loss": 0.4926, "step": 9170 }, { "epoch": 1.5, "learning_rate": 1.0327501121579184e-05, "loss": 0.4815, "step": 9180 }, { "epoch": 1.5, "learning_rate": 1.031628532974428e-05, "loss": 0.489, "step": 9190 }, { "epoch": 1.5, "learning_rate": 1.0305069537909379e-05, "loss": 0.4925, "step": 9200 }, { "epoch": 1.5, "learning_rate": 1.0293853746074473e-05, "loss": 0.4765, "step": 9210 }, { "epoch": 1.5, "learning_rate": 1.028263795423957e-05, "loss": 0.4857, "step": 9220 }, { "epoch": 1.51, "learning_rate": 1.0271422162404668e-05, "loss": 0.4905, "step": 9230 }, { "epoch": 1.51, "learning_rate": 1.0260206370569762e-05, "loss": 0.4887, "step": 9240 }, { "epoch": 1.51, "learning_rate": 1.0248990578734859e-05, "loss": 0.4823, "step": 9250 }, { "epoch": 1.51, "learning_rate": 1.0237774786899957e-05, "loss": 0.4873, "step": 9260 }, { "epoch": 1.51, "learning_rate": 1.0226558995065053e-05, "loss": 0.4931, "step": 9270 }, { "epoch": 1.51, "learning_rate": 1.0215343203230148e-05, "loss": 0.4917, "step": 9280 }, { "epoch": 1.52, "learning_rate": 1.0204127411395246e-05, "loss": 0.4895, "step": 9290 }, { "epoch": 1.52, "learning_rate": 1.0192911619560342e-05, "loss": 0.4763, "step": 9300 }, { "epoch": 1.52, "learning_rate": 1.0181695827725437e-05, "loss": 0.4917, "step": 9310 }, { "epoch": 1.52, "learning_rate": 1.0170480035890535e-05, "loss": 0.4755, "step": 9320 }, { "epoch": 1.52, "learning_rate": 1.0159264244055631e-05, "loss": 0.503, "step": 9330 }, { "epoch": 1.52, "learning_rate": 1.0148048452220728e-05, "loss": 0.4761, "step": 9340 }, { "epoch": 1.53, "learning_rate": 1.0136832660385824e-05, "loss": 0.479, "step": 9350 }, { "epoch": 1.53, "learning_rate": 1.012561686855092e-05, "loss": 0.4863, "step": 9360 }, { "epoch": 1.53, "learning_rate": 1.0114401076716017e-05, "loss": 0.491, "step": 9370 }, { "epoch": 1.53, "learning_rate": 1.0103185284881115e-05, "loss": 0.4828, "step": 9380 }, { "epoch": 1.53, "learning_rate": 1.009196949304621e-05, "loss": 0.4839, "step": 9390 }, { "epoch": 1.53, "learning_rate": 1.0080753701211306e-05, "loss": 0.4931, "step": 9400 }, { "epoch": 1.54, "learning_rate": 1.0069537909376404e-05, "loss": 0.4743, "step": 9410 }, { "epoch": 1.54, "learning_rate": 1.0058322117541498e-05, "loss": 0.4825, "step": 9420 }, { "epoch": 1.54, "learning_rate": 1.0047106325706595e-05, "loss": 0.4883, "step": 9430 }, { "epoch": 1.54, "learning_rate": 1.0035890533871693e-05, "loss": 0.4772, "step": 9440 }, { "epoch": 1.54, "learning_rate": 1.002467474203679e-05, "loss": 0.4868, "step": 9450 }, { "epoch": 1.54, "learning_rate": 1.0013458950201884e-05, "loss": 0.4832, "step": 9460 }, { "epoch": 1.55, "learning_rate": 1.0002243158366982e-05, "loss": 0.4858, "step": 9470 }, { "epoch": 1.55, "learning_rate": 9.991027366532078e-06, "loss": 0.4846, "step": 9480 }, { "epoch": 1.55, "learning_rate": 9.979811574697175e-06, "loss": 0.486, "step": 9490 }, { "epoch": 1.55, "learning_rate": 9.968595782862271e-06, "loss": 0.487, "step": 9500 }, { "epoch": 1.55, "learning_rate": 9.957379991027367e-06, "loss": 0.4783, "step": 9510 }, { "epoch": 1.55, "learning_rate": 9.946164199192464e-06, "loss": 0.4837, "step": 9520 }, { "epoch": 1.56, "learning_rate": 9.93494840735756e-06, "loss": 0.4794, "step": 9530 }, { "epoch": 1.56, "learning_rate": 9.923732615522656e-06, "loss": 0.4787, "step": 9540 }, { "epoch": 1.56, "learning_rate": 9.912516823687753e-06, "loss": 0.4906, "step": 9550 }, { "epoch": 1.56, "learning_rate": 9.901301031852849e-06, "loss": 0.4844, "step": 9560 }, { "epoch": 1.56, "learning_rate": 9.890085240017945e-06, "loss": 0.4828, "step": 9570 }, { "epoch": 1.56, "learning_rate": 9.878869448183044e-06, "loss": 0.4763, "step": 9580 }, { "epoch": 1.56, "learning_rate": 9.867653656348138e-06, "loss": 0.4879, "step": 9590 }, { "epoch": 1.57, "learning_rate": 9.856437864513235e-06, "loss": 0.4978, "step": 9600 }, { "epoch": 1.57, "learning_rate": 9.845222072678333e-06, "loss": 0.4805, "step": 9610 }, { "epoch": 1.57, "learning_rate": 9.834006280843429e-06, "loss": 0.4924, "step": 9620 }, { "epoch": 1.57, "learning_rate": 9.822790489008525e-06, "loss": 0.4909, "step": 9630 }, { "epoch": 1.57, "learning_rate": 9.811574697173622e-06, "loss": 0.4831, "step": 9640 }, { "epoch": 1.57, "learning_rate": 9.800358905338718e-06, "loss": 0.485, "step": 9650 }, { "epoch": 1.58, "learning_rate": 9.789143113503814e-06, "loss": 0.4802, "step": 9660 }, { "epoch": 1.58, "learning_rate": 9.77792732166891e-06, "loss": 0.4951, "step": 9670 }, { "epoch": 1.58, "learning_rate": 9.766711529834007e-06, "loss": 0.4784, "step": 9680 }, { "epoch": 1.58, "learning_rate": 9.755495737999103e-06, "loss": 0.4788, "step": 9690 }, { "epoch": 1.58, "learning_rate": 9.7442799461642e-06, "loss": 0.4798, "step": 9700 }, { "epoch": 1.58, "learning_rate": 9.733064154329296e-06, "loss": 0.4858, "step": 9710 }, { "epoch": 1.59, "learning_rate": 9.721848362494393e-06, "loss": 0.4845, "step": 9720 }, { "epoch": 1.59, "learning_rate": 9.710632570659489e-06, "loss": 0.4871, "step": 9730 }, { "epoch": 1.59, "learning_rate": 9.699416778824587e-06, "loss": 0.4809, "step": 9740 }, { "epoch": 1.59, "learning_rate": 9.688200986989682e-06, "loss": 0.4902, "step": 9750 }, { "epoch": 1.59, "learning_rate": 9.67698519515478e-06, "loss": 0.4961, "step": 9760 }, { "epoch": 1.59, "learning_rate": 9.665769403319876e-06, "loss": 0.4749, "step": 9770 }, { "epoch": 1.6, "learning_rate": 9.65455361148497e-06, "loss": 0.4922, "step": 9780 }, { "epoch": 1.6, "learning_rate": 9.643337819650069e-06, "loss": 0.4824, "step": 9790 }, { "epoch": 1.6, "learning_rate": 9.632122027815165e-06, "loss": 0.4853, "step": 9800 }, { "epoch": 1.6, "learning_rate": 9.620906235980261e-06, "loss": 0.4808, "step": 9810 }, { "epoch": 1.6, "learning_rate": 9.609690444145358e-06, "loss": 0.4878, "step": 9820 }, { "epoch": 1.6, "learning_rate": 9.598474652310454e-06, "loss": 0.4847, "step": 9830 }, { "epoch": 1.61, "learning_rate": 9.58725886047555e-06, "loss": 0.4722, "step": 9840 }, { "epoch": 1.61, "learning_rate": 9.576043068640647e-06, "loss": 0.492, "step": 9850 }, { "epoch": 1.61, "learning_rate": 9.564827276805743e-06, "loss": 0.4868, "step": 9860 }, { "epoch": 1.61, "learning_rate": 9.55361148497084e-06, "loss": 0.4877, "step": 9870 }, { "epoch": 1.61, "learning_rate": 9.542395693135936e-06, "loss": 0.4925, "step": 9880 }, { "epoch": 1.61, "learning_rate": 9.531179901301032e-06, "loss": 0.487, "step": 9890 }, { "epoch": 1.62, "learning_rate": 9.519964109466129e-06, "loss": 0.477, "step": 9900 }, { "epoch": 1.62, "learning_rate": 9.508748317631225e-06, "loss": 0.4781, "step": 9910 }, { "epoch": 1.62, "learning_rate": 9.497532525796323e-06, "loss": 0.4943, "step": 9920 }, { "epoch": 1.62, "learning_rate": 9.486316733961418e-06, "loss": 0.4877, "step": 9930 }, { "epoch": 1.62, "learning_rate": 9.475100942126516e-06, "loss": 0.4861, "step": 9940 }, { "epoch": 1.62, "learning_rate": 9.463885150291612e-06, "loss": 0.4761, "step": 9950 }, { "epoch": 1.63, "learning_rate": 9.452669358456707e-06, "loss": 0.4803, "step": 9960 }, { "epoch": 1.63, "learning_rate": 9.441453566621805e-06, "loss": 0.4799, "step": 9970 }, { "epoch": 1.63, "learning_rate": 9.430237774786901e-06, "loss": 0.4706, "step": 9980 }, { "epoch": 1.63, "learning_rate": 9.419021982951998e-06, "loss": 0.4929, "step": 9990 }, { "epoch": 1.63, "learning_rate": 9.407806191117094e-06, "loss": 0.4891, "step": 10000 }, { "epoch": 1.63, "learning_rate": 9.39659039928219e-06, "loss": 0.4708, "step": 10010 }, { "epoch": 1.64, "learning_rate": 9.385374607447287e-06, "loss": 0.4793, "step": 10020 }, { "epoch": 1.64, "learning_rate": 9.374158815612383e-06, "loss": 0.4819, "step": 10030 }, { "epoch": 1.64, "learning_rate": 9.36294302377748e-06, "loss": 0.4805, "step": 10040 }, { "epoch": 1.64, "learning_rate": 9.351727231942576e-06, "loss": 0.4882, "step": 10050 }, { "epoch": 1.64, "learning_rate": 9.340511440107672e-06, "loss": 0.4785, "step": 10060 }, { "epoch": 1.64, "learning_rate": 9.329295648272768e-06, "loss": 0.4856, "step": 10070 }, { "epoch": 1.64, "learning_rate": 9.318079856437865e-06, "loss": 0.4885, "step": 10080 }, { "epoch": 1.65, "learning_rate": 9.306864064602961e-06, "loss": 0.4737, "step": 10090 }, { "epoch": 1.65, "learning_rate": 9.29564827276806e-06, "loss": 0.4882, "step": 10100 }, { "epoch": 1.65, "learning_rate": 9.284432480933154e-06, "loss": 0.4874, "step": 10110 }, { "epoch": 1.65, "learning_rate": 9.273216689098252e-06, "loss": 0.4861, "step": 10120 }, { "epoch": 1.65, "learning_rate": 9.262000897263348e-06, "loss": 0.4882, "step": 10130 }, { "epoch": 1.65, "learning_rate": 9.250785105428443e-06, "loss": 0.4722, "step": 10140 }, { "epoch": 1.66, "learning_rate": 9.239569313593541e-06, "loss": 0.4803, "step": 10150 }, { "epoch": 1.66, "learning_rate": 9.228353521758637e-06, "loss": 0.467, "step": 10160 }, { "epoch": 1.66, "learning_rate": 9.217137729923734e-06, "loss": 0.4861, "step": 10170 }, { "epoch": 1.66, "learning_rate": 9.20592193808883e-06, "loss": 0.4878, "step": 10180 }, { "epoch": 1.66, "learning_rate": 9.194706146253926e-06, "loss": 0.4742, "step": 10190 }, { "epoch": 1.66, "learning_rate": 9.183490354419023e-06, "loss": 0.4746, "step": 10200 }, { "epoch": 1.67, "learning_rate": 9.172274562584119e-06, "loss": 0.4853, "step": 10210 }, { "epoch": 1.67, "learning_rate": 9.161058770749215e-06, "loss": 0.4875, "step": 10220 }, { "epoch": 1.67, "learning_rate": 9.149842978914312e-06, "loss": 0.4843, "step": 10230 }, { "epoch": 1.67, "learning_rate": 9.138627187079408e-06, "loss": 0.4984, "step": 10240 }, { "epoch": 1.67, "learning_rate": 9.127411395244505e-06, "loss": 0.485, "step": 10250 }, { "epoch": 1.67, "learning_rate": 9.116195603409601e-06, "loss": 0.4859, "step": 10260 }, { "epoch": 1.68, "learning_rate": 9.104979811574697e-06, "loss": 0.4807, "step": 10270 }, { "epoch": 1.68, "learning_rate": 9.093764019739795e-06, "loss": 0.4842, "step": 10280 }, { "epoch": 1.68, "learning_rate": 9.08254822790489e-06, "loss": 0.4626, "step": 10290 }, { "epoch": 1.68, "learning_rate": 9.071332436069988e-06, "loss": 0.4593, "step": 10300 }, { "epoch": 1.68, "learning_rate": 9.060116644235084e-06, "loss": 0.4902, "step": 10310 }, { "epoch": 1.68, "learning_rate": 9.048900852400179e-06, "loss": 0.4693, "step": 10320 }, { "epoch": 1.69, "learning_rate": 9.037685060565277e-06, "loss": 0.4949, "step": 10330 }, { "epoch": 1.69, "learning_rate": 9.026469268730373e-06, "loss": 0.4833, "step": 10340 }, { "epoch": 1.69, "learning_rate": 9.01525347689547e-06, "loss": 0.4874, "step": 10350 }, { "epoch": 1.69, "learning_rate": 9.004037685060566e-06, "loss": 0.4766, "step": 10360 }, { "epoch": 1.69, "learning_rate": 8.992821893225663e-06, "loss": 0.4739, "step": 10370 }, { "epoch": 1.69, "learning_rate": 8.981606101390759e-06, "loss": 0.4683, "step": 10380 }, { "epoch": 1.7, "learning_rate": 8.970390309555855e-06, "loss": 0.4887, "step": 10390 }, { "epoch": 1.7, "learning_rate": 8.959174517720952e-06, "loss": 0.4944, "step": 10400 }, { "epoch": 1.7, "learning_rate": 8.947958725886048e-06, "loss": 0.4843, "step": 10410 }, { "epoch": 1.7, "learning_rate": 8.936742934051144e-06, "loss": 0.4733, "step": 10420 }, { "epoch": 1.7, "learning_rate": 8.925527142216242e-06, "loss": 0.476, "step": 10430 }, { "epoch": 1.7, "learning_rate": 8.914311350381337e-06, "loss": 0.4806, "step": 10440 }, { "epoch": 1.71, "learning_rate": 8.903095558546433e-06, "loss": 0.49, "step": 10450 }, { "epoch": 1.71, "learning_rate": 8.891879766711531e-06, "loss": 0.472, "step": 10460 }, { "epoch": 1.71, "learning_rate": 8.880663974876626e-06, "loss": 0.4827, "step": 10470 }, { "epoch": 1.71, "learning_rate": 8.869448183041724e-06, "loss": 0.4752, "step": 10480 }, { "epoch": 1.71, "learning_rate": 8.85823239120682e-06, "loss": 0.4922, "step": 10490 }, { "epoch": 1.71, "learning_rate": 8.847016599371915e-06, "loss": 0.487, "step": 10500 }, { "epoch": 1.72, "learning_rate": 8.835800807537013e-06, "loss": 0.4892, "step": 10510 }, { "epoch": 1.72, "learning_rate": 8.82458501570211e-06, "loss": 0.4788, "step": 10520 }, { "epoch": 1.72, "learning_rate": 8.813369223867206e-06, "loss": 0.473, "step": 10530 }, { "epoch": 1.72, "learning_rate": 8.802153432032302e-06, "loss": 0.4749, "step": 10540 }, { "epoch": 1.72, "learning_rate": 8.790937640197399e-06, "loss": 0.4792, "step": 10550 }, { "epoch": 1.72, "learning_rate": 8.779721848362495e-06, "loss": 0.4705, "step": 10560 }, { "epoch": 1.72, "learning_rate": 8.768506056527591e-06, "loss": 0.4854, "step": 10570 }, { "epoch": 1.73, "learning_rate": 8.757290264692688e-06, "loss": 0.49, "step": 10580 }, { "epoch": 1.73, "learning_rate": 8.746074472857784e-06, "loss": 0.4821, "step": 10590 }, { "epoch": 1.73, "learning_rate": 8.73485868102288e-06, "loss": 0.4762, "step": 10600 }, { "epoch": 1.73, "learning_rate": 8.723642889187978e-06, "loss": 0.4871, "step": 10610 }, { "epoch": 1.73, "learning_rate": 8.712427097353073e-06, "loss": 0.4743, "step": 10620 }, { "epoch": 1.73, "learning_rate": 8.70121130551817e-06, "loss": 0.4749, "step": 10630 }, { "epoch": 1.74, "learning_rate": 8.689995513683268e-06, "loss": 0.4855, "step": 10640 }, { "epoch": 1.74, "learning_rate": 8.678779721848362e-06, "loss": 0.4769, "step": 10650 }, { "epoch": 1.74, "learning_rate": 8.66756393001346e-06, "loss": 0.478, "step": 10660 }, { "epoch": 1.74, "learning_rate": 8.656348138178557e-06, "loss": 0.4818, "step": 10670 }, { "epoch": 1.74, "learning_rate": 8.645132346343651e-06, "loss": 0.469, "step": 10680 }, { "epoch": 1.74, "learning_rate": 8.63391655450875e-06, "loss": 0.4792, "step": 10690 }, { "epoch": 1.75, "learning_rate": 8.622700762673846e-06, "loss": 0.4864, "step": 10700 }, { "epoch": 1.75, "learning_rate": 8.611484970838942e-06, "loss": 0.4849, "step": 10710 }, { "epoch": 1.75, "learning_rate": 8.600269179004038e-06, "loss": 0.4867, "step": 10720 }, { "epoch": 1.75, "learning_rate": 8.589053387169135e-06, "loss": 0.4735, "step": 10730 }, { "epoch": 1.75, "learning_rate": 8.577837595334231e-06, "loss": 0.49, "step": 10740 }, { "epoch": 1.75, "learning_rate": 8.566621803499327e-06, "loss": 0.4854, "step": 10750 }, { "epoch": 1.76, "learning_rate": 8.555406011664424e-06, "loss": 0.4781, "step": 10760 }, { "epoch": 1.76, "learning_rate": 8.54419021982952e-06, "loss": 0.4723, "step": 10770 }, { "epoch": 1.76, "learning_rate": 8.532974427994617e-06, "loss": 0.4835, "step": 10780 }, { "epoch": 1.76, "learning_rate": 8.521758636159715e-06, "loss": 0.4752, "step": 10790 }, { "epoch": 1.76, "learning_rate": 8.51054284432481e-06, "loss": 0.4745, "step": 10800 }, { "epoch": 1.76, "learning_rate": 8.499327052489906e-06, "loss": 0.4869, "step": 10810 }, { "epoch": 1.77, "learning_rate": 8.488111260655004e-06, "loss": 0.4875, "step": 10820 }, { "epoch": 1.77, "learning_rate": 8.476895468820098e-06, "loss": 0.4837, "step": 10830 }, { "epoch": 1.77, "learning_rate": 8.465679676985196e-06, "loss": 0.477, "step": 10840 }, { "epoch": 1.77, "learning_rate": 8.454463885150293e-06, "loss": 0.4963, "step": 10850 }, { "epoch": 1.77, "learning_rate": 8.443248093315389e-06, "loss": 0.486, "step": 10860 }, { "epoch": 1.77, "learning_rate": 8.432032301480485e-06, "loss": 0.4843, "step": 10870 }, { "epoch": 1.78, "learning_rate": 8.420816509645582e-06, "loss": 0.4866, "step": 10880 }, { "epoch": 1.78, "learning_rate": 8.409600717810678e-06, "loss": 0.4873, "step": 10890 }, { "epoch": 1.78, "learning_rate": 8.398384925975775e-06, "loss": 0.4797, "step": 10900 }, { "epoch": 1.78, "learning_rate": 8.387169134140871e-06, "loss": 0.4702, "step": 10910 }, { "epoch": 1.78, "learning_rate": 8.375953342305967e-06, "loss": 0.4971, "step": 10920 }, { "epoch": 1.78, "learning_rate": 8.364737550471064e-06, "loss": 0.4778, "step": 10930 }, { "epoch": 1.79, "learning_rate": 8.35352175863616e-06, "loss": 0.4836, "step": 10940 }, { "epoch": 1.79, "learning_rate": 8.342305966801258e-06, "loss": 0.4871, "step": 10950 }, { "epoch": 1.79, "learning_rate": 8.331090174966353e-06, "loss": 0.4664, "step": 10960 }, { "epoch": 1.79, "learning_rate": 8.31987438313145e-06, "loss": 0.4833, "step": 10970 }, { "epoch": 1.79, "learning_rate": 8.308658591296547e-06, "loss": 0.4874, "step": 10980 }, { "epoch": 1.79, "learning_rate": 8.297442799461642e-06, "loss": 0.4779, "step": 10990 }, { "epoch": 1.8, "learning_rate": 8.28622700762674e-06, "loss": 0.4789, "step": 11000 }, { "epoch": 1.8, "learning_rate": 8.275011215791836e-06, "loss": 0.4619, "step": 11010 }, { "epoch": 1.8, "learning_rate": 8.263795423956933e-06, "loss": 0.4737, "step": 11020 }, { "epoch": 1.8, "learning_rate": 8.252579632122029e-06, "loss": 0.4787, "step": 11030 }, { "epoch": 1.8, "learning_rate": 8.241363840287125e-06, "loss": 0.4809, "step": 11040 }, { "epoch": 1.8, "learning_rate": 8.230148048452222e-06, "loss": 0.479, "step": 11050 }, { "epoch": 1.8, "learning_rate": 8.218932256617318e-06, "loss": 0.4817, "step": 11060 }, { "epoch": 1.81, "learning_rate": 8.207716464782414e-06, "loss": 0.4586, "step": 11070 }, { "epoch": 1.81, "learning_rate": 8.19650067294751e-06, "loss": 0.488, "step": 11080 }, { "epoch": 1.81, "learning_rate": 8.185284881112607e-06, "loss": 0.4714, "step": 11090 }, { "epoch": 1.81, "learning_rate": 8.174069089277703e-06, "loss": 0.4828, "step": 11100 }, { "epoch": 1.81, "learning_rate": 8.1628532974428e-06, "loss": 0.4935, "step": 11110 }, { "epoch": 1.81, "learning_rate": 8.151637505607896e-06, "loss": 0.4799, "step": 11120 }, { "epoch": 1.82, "learning_rate": 8.140421713772994e-06, "loss": 0.483, "step": 11130 }, { "epoch": 1.82, "learning_rate": 8.129205921938089e-06, "loss": 0.4798, "step": 11140 }, { "epoch": 1.82, "learning_rate": 8.117990130103187e-06, "loss": 0.4727, "step": 11150 }, { "epoch": 1.82, "learning_rate": 8.106774338268283e-06, "loss": 0.4746, "step": 11160 }, { "epoch": 1.82, "learning_rate": 8.095558546433378e-06, "loss": 0.4844, "step": 11170 }, { "epoch": 1.82, "learning_rate": 8.084342754598476e-06, "loss": 0.4718, "step": 11180 }, { "epoch": 1.83, "learning_rate": 8.073126962763572e-06, "loss": 0.4758, "step": 11190 }, { "epoch": 1.83, "learning_rate": 8.061911170928669e-06, "loss": 0.4762, "step": 11200 }, { "epoch": 1.83, "learning_rate": 8.050695379093765e-06, "loss": 0.4765, "step": 11210 }, { "epoch": 1.83, "learning_rate": 8.039479587258861e-06, "loss": 0.4748, "step": 11220 }, { "epoch": 1.83, "learning_rate": 8.028263795423958e-06, "loss": 0.481, "step": 11230 }, { "epoch": 1.83, "learning_rate": 8.017048003589054e-06, "loss": 0.4715, "step": 11240 }, { "epoch": 1.84, "learning_rate": 8.00583221175415e-06, "loss": 0.4802, "step": 11250 }, { "epoch": 1.84, "learning_rate": 7.994616419919247e-06, "loss": 0.4812, "step": 11260 }, { "epoch": 1.84, "learning_rate": 7.983400628084343e-06, "loss": 0.4891, "step": 11270 }, { "epoch": 1.84, "learning_rate": 7.97218483624944e-06, "loss": 0.4914, "step": 11280 }, { "epoch": 1.84, "learning_rate": 7.960969044414536e-06, "loss": 0.4749, "step": 11290 }, { "epoch": 1.84, "learning_rate": 7.949753252579632e-06, "loss": 0.4697, "step": 11300 }, { "epoch": 1.85, "learning_rate": 7.93853746074473e-06, "loss": 0.4848, "step": 11310 }, { "epoch": 1.85, "learning_rate": 7.927321668909825e-06, "loss": 0.4861, "step": 11320 }, { "epoch": 1.85, "learning_rate": 7.916105877074923e-06, "loss": 0.4813, "step": 11330 }, { "epoch": 1.85, "learning_rate": 7.90489008524002e-06, "loss": 0.4799, "step": 11340 }, { "epoch": 1.85, "learning_rate": 7.893674293405114e-06, "loss": 0.4856, "step": 11350 }, { "epoch": 1.85, "learning_rate": 7.882458501570212e-06, "loss": 0.4823, "step": 11360 }, { "epoch": 1.86, "learning_rate": 7.871242709735308e-06, "loss": 0.4806, "step": 11370 }, { "epoch": 1.86, "learning_rate": 7.860026917900405e-06, "loss": 0.4802, "step": 11380 }, { "epoch": 1.86, "learning_rate": 7.848811126065501e-06, "loss": 0.489, "step": 11390 }, { "epoch": 1.86, "learning_rate": 7.837595334230597e-06, "loss": 0.4811, "step": 11400 }, { "epoch": 1.86, "learning_rate": 7.826379542395694e-06, "loss": 0.4777, "step": 11410 }, { "epoch": 1.86, "learning_rate": 7.81516375056079e-06, "loss": 0.4803, "step": 11420 }, { "epoch": 1.87, "learning_rate": 7.803947958725887e-06, "loss": 0.4757, "step": 11430 }, { "epoch": 1.87, "learning_rate": 7.792732166890983e-06, "loss": 0.4696, "step": 11440 }, { "epoch": 1.87, "learning_rate": 7.78151637505608e-06, "loss": 0.4754, "step": 11450 }, { "epoch": 1.87, "learning_rate": 7.770300583221176e-06, "loss": 0.4764, "step": 11460 }, { "epoch": 1.87, "learning_rate": 7.759084791386272e-06, "loss": 0.4717, "step": 11470 }, { "epoch": 1.87, "learning_rate": 7.747868999551368e-06, "loss": 0.4794, "step": 11480 }, { "epoch": 1.88, "learning_rate": 7.736653207716466e-06, "loss": 0.4758, "step": 11490 }, { "epoch": 1.88, "learning_rate": 7.725437415881561e-06, "loss": 0.4767, "step": 11500 }, { "epoch": 1.88, "learning_rate": 7.714221624046659e-06, "loss": 0.4745, "step": 11510 }, { "epoch": 1.88, "learning_rate": 7.703005832211755e-06, "loss": 0.475, "step": 11520 }, { "epoch": 1.88, "learning_rate": 7.69179004037685e-06, "loss": 0.4748, "step": 11530 }, { "epoch": 1.88, "learning_rate": 7.680574248541948e-06, "loss": 0.4808, "step": 11540 }, { "epoch": 1.88, "learning_rate": 7.669358456707045e-06, "loss": 0.4655, "step": 11550 }, { "epoch": 1.89, "learning_rate": 7.658142664872141e-06, "loss": 0.4846, "step": 11560 }, { "epoch": 1.89, "learning_rate": 7.646926873037237e-06, "loss": 0.4688, "step": 11570 }, { "epoch": 1.89, "learning_rate": 7.635711081202334e-06, "loss": 0.4665, "step": 11580 }, { "epoch": 1.89, "learning_rate": 7.62449528936743e-06, "loss": 0.4782, "step": 11590 }, { "epoch": 1.89, "learning_rate": 7.613279497532526e-06, "loss": 0.4785, "step": 11600 }, { "epoch": 1.89, "learning_rate": 7.6020637056976235e-06, "loss": 0.4821, "step": 11610 }, { "epoch": 1.9, "learning_rate": 7.590847913862719e-06, "loss": 0.4835, "step": 11620 }, { "epoch": 1.9, "learning_rate": 7.579632122027815e-06, "loss": 0.4875, "step": 11630 }, { "epoch": 1.9, "learning_rate": 7.568416330192913e-06, "loss": 0.4837, "step": 11640 }, { "epoch": 1.9, "learning_rate": 7.557200538358008e-06, "loss": 0.4819, "step": 11650 }, { "epoch": 1.9, "learning_rate": 7.545984746523105e-06, "loss": 0.4704, "step": 11660 }, { "epoch": 1.9, "learning_rate": 7.534768954688202e-06, "loss": 0.477, "step": 11670 }, { "epoch": 1.91, "learning_rate": 7.523553162853298e-06, "loss": 0.4685, "step": 11680 }, { "epoch": 1.91, "learning_rate": 7.512337371018394e-06, "loss": 0.4822, "step": 11690 }, { "epoch": 1.91, "learning_rate": 7.5011215791834916e-06, "loss": 0.4747, "step": 11700 }, { "epoch": 1.91, "learning_rate": 7.489905787348587e-06, "loss": 0.4768, "step": 11710 }, { "epoch": 1.91, "learning_rate": 7.4786899955136834e-06, "loss": 0.4722, "step": 11720 }, { "epoch": 1.91, "learning_rate": 7.467474203678781e-06, "loss": 0.4909, "step": 11730 }, { "epoch": 1.92, "learning_rate": 7.456258411843876e-06, "loss": 0.4749, "step": 11740 }, { "epoch": 1.92, "learning_rate": 7.445042620008973e-06, "loss": 0.4766, "step": 11750 }, { "epoch": 1.92, "learning_rate": 7.43382682817407e-06, "loss": 0.4767, "step": 11760 }, { "epoch": 1.92, "learning_rate": 7.422611036339166e-06, "loss": 0.4737, "step": 11770 }, { "epoch": 1.92, "learning_rate": 7.4113952445042624e-06, "loss": 0.4827, "step": 11780 }, { "epoch": 1.92, "learning_rate": 7.40017945266936e-06, "loss": 0.4819, "step": 11790 }, { "epoch": 1.93, "learning_rate": 7.388963660834455e-06, "loss": 0.4827, "step": 11800 }, { "epoch": 1.93, "learning_rate": 7.3777478689995515e-06, "loss": 0.4795, "step": 11810 }, { "epoch": 1.93, "learning_rate": 7.366532077164649e-06, "loss": 0.4636, "step": 11820 }, { "epoch": 1.93, "learning_rate": 7.355316285329744e-06, "loss": 0.4824, "step": 11830 }, { "epoch": 1.93, "learning_rate": 7.344100493494841e-06, "loss": 0.4732, "step": 11840 }, { "epoch": 1.93, "learning_rate": 7.332884701659938e-06, "loss": 0.4798, "step": 11850 }, { "epoch": 1.94, "learning_rate": 7.321668909825034e-06, "loss": 0.4813, "step": 11860 }, { "epoch": 1.94, "learning_rate": 7.3104531179901305e-06, "loss": 0.4709, "step": 11870 }, { "epoch": 1.94, "learning_rate": 7.299237326155228e-06, "loss": 0.4833, "step": 11880 }, { "epoch": 1.94, "learning_rate": 7.288021534320323e-06, "loss": 0.4772, "step": 11890 }, { "epoch": 1.94, "learning_rate": 7.2768057424854196e-06, "loss": 0.4654, "step": 11900 }, { "epoch": 1.94, "learning_rate": 7.265589950650517e-06, "loss": 0.4779, "step": 11910 }, { "epoch": 1.95, "learning_rate": 7.254374158815612e-06, "loss": 0.4738, "step": 11920 }, { "epoch": 1.95, "learning_rate": 7.2431583669807095e-06, "loss": 0.4783, "step": 11930 }, { "epoch": 1.95, "learning_rate": 7.231942575145806e-06, "loss": 0.4798, "step": 11940 }, { "epoch": 1.95, "learning_rate": 7.220726783310902e-06, "loss": 0.4845, "step": 11950 }, { "epoch": 1.95, "learning_rate": 7.2095109914759986e-06, "loss": 0.4701, "step": 11960 }, { "epoch": 1.95, "learning_rate": 7.198295199641096e-06, "loss": 0.4759, "step": 11970 }, { "epoch": 1.95, "learning_rate": 7.187079407806191e-06, "loss": 0.4781, "step": 11980 }, { "epoch": 1.96, "learning_rate": 7.175863615971288e-06, "loss": 0.4689, "step": 11990 }, { "epoch": 1.96, "learning_rate": 7.164647824136385e-06, "loss": 0.4796, "step": 12000 }, { "epoch": 1.96, "learning_rate": 7.15343203230148e-06, "loss": 0.4801, "step": 12010 }, { "epoch": 1.96, "learning_rate": 7.1422162404665775e-06, "loss": 0.4685, "step": 12020 }, { "epoch": 1.96, "learning_rate": 7.131000448631674e-06, "loss": 0.4834, "step": 12030 }, { "epoch": 1.96, "learning_rate": 7.11978465679677e-06, "loss": 0.474, "step": 12040 }, { "epoch": 1.97, "learning_rate": 7.108568864961867e-06, "loss": 0.4703, "step": 12050 }, { "epoch": 1.97, "learning_rate": 7.097353073126964e-06, "loss": 0.4895, "step": 12060 }, { "epoch": 1.97, "learning_rate": 7.086137281292059e-06, "loss": 0.4765, "step": 12070 }, { "epoch": 1.97, "learning_rate": 7.074921489457156e-06, "loss": 0.4661, "step": 12080 }, { "epoch": 1.97, "learning_rate": 7.063705697622253e-06, "loss": 0.4753, "step": 12090 }, { "epoch": 1.97, "learning_rate": 7.05248990578735e-06, "loss": 0.4835, "step": 12100 }, { "epoch": 1.98, "learning_rate": 7.041274113952446e-06, "loss": 0.4747, "step": 12110 }, { "epoch": 1.98, "learning_rate": 7.030058322117542e-06, "loss": 0.4751, "step": 12120 }, { "epoch": 1.98, "learning_rate": 7.018842530282639e-06, "loss": 0.4702, "step": 12130 }, { "epoch": 1.98, "learning_rate": 7.007626738447735e-06, "loss": 0.4744, "step": 12140 }, { "epoch": 1.98, "learning_rate": 6.996410946612832e-06, "loss": 0.4729, "step": 12150 }, { "epoch": 1.98, "learning_rate": 6.985195154777928e-06, "loss": 0.4689, "step": 12160 }, { "epoch": 1.99, "learning_rate": 6.973979362943024e-06, "loss": 0.4664, "step": 12170 }, { "epoch": 1.99, "learning_rate": 6.962763571108121e-06, "loss": 0.4741, "step": 12180 }, { "epoch": 1.99, "learning_rate": 6.951547779273218e-06, "loss": 0.4676, "step": 12190 }, { "epoch": 1.99, "learning_rate": 6.940331987438314e-06, "loss": 0.4841, "step": 12200 }, { "epoch": 1.99, "learning_rate": 6.92911619560341e-06, "loss": 0.4848, "step": 12210 }, { "epoch": 1.99, "learning_rate": 6.917900403768507e-06, "loss": 0.4792, "step": 12220 }, { "epoch": 2.0, "learning_rate": 6.906684611933603e-06, "loss": 0.4863, "step": 12230 }, { "epoch": 2.0, "learning_rate": 6.8954688200987e-06, "loss": 0.4789, "step": 12240 }, { "epoch": 2.0, "learning_rate": 6.884253028263796e-06, "loss": 0.4726, "step": 12250 }, { "epoch": 2.0, "learning_rate": 6.873037236428892e-06, "loss": 0.4762, "step": 12260 }, { "epoch": 2.0, "learning_rate": 6.861821444593989e-06, "loss": 0.4746, "step": 12270 }, { "epoch": 2.0, "learning_rate": 6.850605652759086e-06, "loss": 0.4637, "step": 12280 }, { "epoch": 2.01, "learning_rate": 6.839389860924182e-06, "loss": 0.4619, "step": 12290 }, { "epoch": 2.01, "learning_rate": 6.828174069089278e-06, "loss": 0.4725, "step": 12300 }, { "epoch": 2.01, "learning_rate": 6.816958277254375e-06, "loss": 0.4737, "step": 12310 }, { "epoch": 2.01, "learning_rate": 6.805742485419471e-06, "loss": 0.4792, "step": 12320 }, { "epoch": 2.01, "learning_rate": 6.794526693584568e-06, "loss": 0.4797, "step": 12330 }, { "epoch": 2.01, "learning_rate": 6.783310901749664e-06, "loss": 0.4707, "step": 12340 }, { "epoch": 2.02, "learning_rate": 6.77209510991476e-06, "loss": 0.4752, "step": 12350 }, { "epoch": 2.02, "learning_rate": 6.760879318079857e-06, "loss": 0.4748, "step": 12360 }, { "epoch": 2.02, "learning_rate": 6.749663526244954e-06, "loss": 0.471, "step": 12370 }, { "epoch": 2.02, "learning_rate": 6.73844773441005e-06, "loss": 0.475, "step": 12380 }, { "epoch": 2.02, "learning_rate": 6.727231942575146e-06, "loss": 0.4686, "step": 12390 }, { "epoch": 2.02, "learning_rate": 6.716016150740243e-06, "loss": 0.4706, "step": 12400 }, { "epoch": 2.03, "learning_rate": 6.704800358905339e-06, "loss": 0.4615, "step": 12410 }, { "epoch": 2.03, "learning_rate": 6.693584567070436e-06, "loss": 0.4762, "step": 12420 }, { "epoch": 2.03, "learning_rate": 6.6823687752355324e-06, "loss": 0.4815, "step": 12430 }, { "epoch": 2.03, "learning_rate": 6.671152983400628e-06, "loss": 0.477, "step": 12440 }, { "epoch": 2.03, "learning_rate": 6.659937191565725e-06, "loss": 0.4645, "step": 12450 }, { "epoch": 2.03, "learning_rate": 6.648721399730822e-06, "loss": 0.4692, "step": 12460 }, { "epoch": 2.03, "learning_rate": 6.637505607895918e-06, "loss": 0.4739, "step": 12470 }, { "epoch": 2.04, "learning_rate": 6.626289816061014e-06, "loss": 0.4704, "step": 12480 }, { "epoch": 2.04, "learning_rate": 6.615074024226111e-06, "loss": 0.4788, "step": 12490 }, { "epoch": 2.04, "learning_rate": 6.603858232391207e-06, "loss": 0.4717, "step": 12500 }, { "epoch": 2.04, "learning_rate": 6.592642440556304e-06, "loss": 0.4595, "step": 12510 }, { "epoch": 2.04, "learning_rate": 6.5814266487214005e-06, "loss": 0.4764, "step": 12520 }, { "epoch": 2.04, "learning_rate": 6.570210856886496e-06, "loss": 0.4757, "step": 12530 }, { "epoch": 2.05, "learning_rate": 6.558995065051593e-06, "loss": 0.4757, "step": 12540 }, { "epoch": 2.05, "learning_rate": 6.54777927321669e-06, "loss": 0.474, "step": 12550 }, { "epoch": 2.05, "learning_rate": 6.536563481381786e-06, "loss": 0.4786, "step": 12560 }, { "epoch": 2.05, "learning_rate": 6.525347689546882e-06, "loss": 0.4662, "step": 12570 }, { "epoch": 2.05, "learning_rate": 6.5141318977119795e-06, "loss": 0.4684, "step": 12580 }, { "epoch": 2.05, "learning_rate": 6.502916105877075e-06, "loss": 0.4727, "step": 12590 }, { "epoch": 2.06, "learning_rate": 6.491700314042172e-06, "loss": 0.4913, "step": 12600 }, { "epoch": 2.06, "learning_rate": 6.4804845222072685e-06, "loss": 0.4618, "step": 12610 }, { "epoch": 2.06, "learning_rate": 6.469268730372364e-06, "loss": 0.4695, "step": 12620 }, { "epoch": 2.06, "learning_rate": 6.458052938537461e-06, "loss": 0.4704, "step": 12630 }, { "epoch": 2.06, "learning_rate": 6.4468371467025585e-06, "loss": 0.4812, "step": 12640 }, { "epoch": 2.06, "learning_rate": 6.435621354867654e-06, "loss": 0.4707, "step": 12650 }, { "epoch": 2.07, "learning_rate": 6.42440556303275e-06, "loss": 0.4845, "step": 12660 }, { "epoch": 2.07, "learning_rate": 6.4131897711978475e-06, "loss": 0.469, "step": 12670 }, { "epoch": 2.07, "learning_rate": 6.401973979362943e-06, "loss": 0.4649, "step": 12680 }, { "epoch": 2.07, "learning_rate": 6.39075818752804e-06, "loss": 0.4684, "step": 12690 }, { "epoch": 2.07, "learning_rate": 6.379542395693137e-06, "loss": 0.4688, "step": 12700 }, { "epoch": 2.07, "learning_rate": 6.368326603858232e-06, "loss": 0.4748, "step": 12710 }, { "epoch": 2.08, "learning_rate": 6.357110812023329e-06, "loss": 0.4631, "step": 12720 }, { "epoch": 2.08, "learning_rate": 6.3458950201884265e-06, "loss": 0.4814, "step": 12730 }, { "epoch": 2.08, "learning_rate": 6.334679228353522e-06, "loss": 0.4598, "step": 12740 }, { "epoch": 2.08, "learning_rate": 6.323463436518618e-06, "loss": 0.4807, "step": 12750 }, { "epoch": 2.08, "learning_rate": 6.312247644683716e-06, "loss": 0.4775, "step": 12760 }, { "epoch": 2.08, "learning_rate": 6.301031852848811e-06, "loss": 0.464, "step": 12770 }, { "epoch": 2.09, "learning_rate": 6.289816061013908e-06, "loss": 0.4724, "step": 12780 }, { "epoch": 2.09, "learning_rate": 6.278600269179005e-06, "loss": 0.4737, "step": 12790 }, { "epoch": 2.09, "learning_rate": 6.2673844773441e-06, "loss": 0.4737, "step": 12800 }, { "epoch": 2.09, "learning_rate": 6.256168685509197e-06, "loss": 0.4748, "step": 12810 }, { "epoch": 2.09, "learning_rate": 6.244952893674295e-06, "loss": 0.4736, "step": 12820 }, { "epoch": 2.09, "learning_rate": 6.23373710183939e-06, "loss": 0.483, "step": 12830 }, { "epoch": 2.1, "learning_rate": 6.2225213100044865e-06, "loss": 0.4687, "step": 12840 }, { "epoch": 2.1, "learning_rate": 6.211305518169584e-06, "loss": 0.4716, "step": 12850 }, { "epoch": 2.1, "learning_rate": 6.200089726334679e-06, "loss": 0.4723, "step": 12860 }, { "epoch": 2.1, "learning_rate": 6.188873934499776e-06, "loss": 0.4714, "step": 12870 }, { "epoch": 2.1, "learning_rate": 6.177658142664873e-06, "loss": 0.4782, "step": 12880 }, { "epoch": 2.1, "learning_rate": 6.166442350829969e-06, "loss": 0.4804, "step": 12890 }, { "epoch": 2.11, "learning_rate": 6.1552265589950654e-06, "loss": 0.4793, "step": 12900 }, { "epoch": 2.11, "learning_rate": 6.144010767160163e-06, "loss": 0.4639, "step": 12910 }, { "epoch": 2.11, "learning_rate": 6.132794975325258e-06, "loss": 0.4628, "step": 12920 }, { "epoch": 2.11, "learning_rate": 6.1215791834903545e-06, "loss": 0.4746, "step": 12930 }, { "epoch": 2.11, "learning_rate": 6.110363391655452e-06, "loss": 0.4625, "step": 12940 }, { "epoch": 2.11, "learning_rate": 6.099147599820547e-06, "loss": 0.4649, "step": 12950 }, { "epoch": 2.11, "learning_rate": 6.0879318079856444e-06, "loss": 0.4696, "step": 12960 }, { "epoch": 2.12, "learning_rate": 6.076716016150741e-06, "loss": 0.4631, "step": 12970 }, { "epoch": 2.12, "learning_rate": 6.065500224315837e-06, "loss": 0.4634, "step": 12980 }, { "epoch": 2.12, "learning_rate": 6.0542844324809335e-06, "loss": 0.4702, "step": 12990 }, { "epoch": 2.12, "learning_rate": 6.043068640646031e-06, "loss": 0.4643, "step": 13000 }, { "epoch": 2.12, "learning_rate": 6.031852848811126e-06, "loss": 0.4724, "step": 13010 }, { "epoch": 2.12, "learning_rate": 6.020637056976223e-06, "loss": 0.4738, "step": 13020 }, { "epoch": 2.13, "learning_rate": 6.00942126514132e-06, "loss": 0.4656, "step": 13030 }, { "epoch": 2.13, "learning_rate": 5.998205473306415e-06, "loss": 0.4733, "step": 13040 }, { "epoch": 2.13, "learning_rate": 5.9869896814715125e-06, "loss": 0.4786, "step": 13050 }, { "epoch": 2.13, "learning_rate": 5.975773889636609e-06, "loss": 0.4657, "step": 13060 }, { "epoch": 2.13, "learning_rate": 5.964558097801705e-06, "loss": 0.4794, "step": 13070 }, { "epoch": 2.13, "learning_rate": 5.9533423059668016e-06, "loss": 0.4775, "step": 13080 }, { "epoch": 2.14, "learning_rate": 5.942126514131899e-06, "loss": 0.4779, "step": 13090 }, { "epoch": 2.14, "learning_rate": 5.930910722296994e-06, "loss": 0.4712, "step": 13100 }, { "epoch": 2.14, "learning_rate": 5.919694930462091e-06, "loss": 0.467, "step": 13110 }, { "epoch": 2.14, "learning_rate": 5.908479138627188e-06, "loss": 0.4607, "step": 13120 }, { "epoch": 2.14, "learning_rate": 5.897263346792283e-06, "loss": 0.4673, "step": 13130 }, { "epoch": 2.14, "learning_rate": 5.8860475549573806e-06, "loss": 0.459, "step": 13140 }, { "epoch": 2.15, "learning_rate": 5.874831763122477e-06, "loss": 0.4771, "step": 13150 }, { "epoch": 2.15, "learning_rate": 5.863615971287573e-06, "loss": 0.4767, "step": 13160 }, { "epoch": 2.15, "learning_rate": 5.85240017945267e-06, "loss": 0.464, "step": 13170 }, { "epoch": 2.15, "learning_rate": 5.841184387617767e-06, "loss": 0.4724, "step": 13180 }, { "epoch": 2.15, "learning_rate": 5.829968595782862e-06, "loss": 0.464, "step": 13190 }, { "epoch": 2.15, "learning_rate": 5.818752803947959e-06, "loss": 0.4793, "step": 13200 }, { "epoch": 2.16, "learning_rate": 5.807537012113056e-06, "loss": 0.4777, "step": 13210 }, { "epoch": 2.16, "learning_rate": 5.796321220278151e-06, "loss": 0.4756, "step": 13220 }, { "epoch": 2.16, "learning_rate": 5.785105428443249e-06, "loss": 0.4695, "step": 13230 }, { "epoch": 2.16, "learning_rate": 5.773889636608345e-06, "loss": 0.4699, "step": 13240 }, { "epoch": 2.16, "learning_rate": 5.762673844773441e-06, "loss": 0.4676, "step": 13250 }, { "epoch": 2.16, "learning_rate": 5.751458052938538e-06, "loss": 0.4714, "step": 13260 }, { "epoch": 2.17, "learning_rate": 5.740242261103635e-06, "loss": 0.475, "step": 13270 }, { "epoch": 2.17, "learning_rate": 5.72902646926873e-06, "loss": 0.4803, "step": 13280 }, { "epoch": 2.17, "learning_rate": 5.717810677433827e-06, "loss": 0.4757, "step": 13290 }, { "epoch": 2.17, "learning_rate": 5.706594885598924e-06, "loss": 0.4742, "step": 13300 }, { "epoch": 2.17, "learning_rate": 5.6953790937640195e-06, "loss": 0.4717, "step": 13310 }, { "epoch": 2.17, "learning_rate": 5.684163301929117e-06, "loss": 0.4766, "step": 13320 }, { "epoch": 2.18, "learning_rate": 5.672947510094213e-06, "loss": 0.4793, "step": 13330 }, { "epoch": 2.18, "learning_rate": 5.66173171825931e-06, "loss": 0.4717, "step": 13340 }, { "epoch": 2.18, "learning_rate": 5.650515926424406e-06, "loss": 0.4591, "step": 13350 }, { "epoch": 2.18, "learning_rate": 5.639300134589503e-06, "loss": 0.463, "step": 13360 }, { "epoch": 2.18, "learning_rate": 5.628084342754599e-06, "loss": 0.4669, "step": 13370 }, { "epoch": 2.18, "learning_rate": 5.616868550919695e-06, "loss": 0.4773, "step": 13380 }, { "epoch": 2.19, "learning_rate": 5.605652759084792e-06, "loss": 0.4642, "step": 13390 }, { "epoch": 2.19, "learning_rate": 5.594436967249889e-06, "loss": 0.4689, "step": 13400 }, { "epoch": 2.19, "learning_rate": 5.583221175414985e-06, "loss": 0.4697, "step": 13410 }, { "epoch": 2.19, "learning_rate": 5.572005383580081e-06, "loss": 0.4627, "step": 13420 }, { "epoch": 2.19, "learning_rate": 5.560789591745178e-06, "loss": 0.4595, "step": 13430 }, { "epoch": 2.19, "learning_rate": 5.549573799910274e-06, "loss": 0.4661, "step": 13440 }, { "epoch": 2.19, "learning_rate": 5.538358008075371e-06, "loss": 0.4616, "step": 13450 }, { "epoch": 2.2, "learning_rate": 5.527142216240467e-06, "loss": 0.4599, "step": 13460 }, { "epoch": 2.2, "learning_rate": 5.515926424405563e-06, "loss": 0.4659, "step": 13470 }, { "epoch": 2.2, "learning_rate": 5.50471063257066e-06, "loss": 0.4645, "step": 13480 }, { "epoch": 2.2, "learning_rate": 5.493494840735757e-06, "loss": 0.4646, "step": 13490 }, { "epoch": 2.2, "learning_rate": 5.482279048900853e-06, "loss": 0.4703, "step": 13500 }, { "epoch": 2.2, "learning_rate": 5.471063257065949e-06, "loss": 0.4717, "step": 13510 }, { "epoch": 2.21, "learning_rate": 5.459847465231046e-06, "loss": 0.4822, "step": 13520 }, { "epoch": 2.21, "learning_rate": 5.448631673396142e-06, "loss": 0.4635, "step": 13530 }, { "epoch": 2.21, "learning_rate": 5.437415881561239e-06, "loss": 0.4709, "step": 13540 }, { "epoch": 2.21, "learning_rate": 5.4262000897263354e-06, "loss": 0.4734, "step": 13550 }, { "epoch": 2.21, "learning_rate": 5.414984297891431e-06, "loss": 0.4664, "step": 13560 }, { "epoch": 2.21, "learning_rate": 5.403768506056528e-06, "loss": 0.472, "step": 13570 }, { "epoch": 2.22, "learning_rate": 5.392552714221625e-06, "loss": 0.4685, "step": 13580 }, { "epoch": 2.22, "learning_rate": 5.381336922386721e-06, "loss": 0.4605, "step": 13590 }, { "epoch": 2.22, "learning_rate": 5.370121130551817e-06, "loss": 0.4594, "step": 13600 }, { "epoch": 2.22, "learning_rate": 5.3589053387169144e-06, "loss": 0.4852, "step": 13610 }, { "epoch": 2.22, "learning_rate": 5.34768954688201e-06, "loss": 0.4696, "step": 13620 }, { "epoch": 2.22, "learning_rate": 5.336473755047107e-06, "loss": 0.4669, "step": 13630 }, { "epoch": 2.23, "learning_rate": 5.3252579632122035e-06, "loss": 0.4792, "step": 13640 }, { "epoch": 2.23, "learning_rate": 5.314042171377299e-06, "loss": 0.461, "step": 13650 }, { "epoch": 2.23, "learning_rate": 5.302826379542396e-06, "loss": 0.4772, "step": 13660 }, { "epoch": 2.23, "learning_rate": 5.2916105877074934e-06, "loss": 0.4671, "step": 13670 }, { "epoch": 2.23, "learning_rate": 5.280394795872589e-06, "loss": 0.4792, "step": 13680 }, { "epoch": 2.23, "learning_rate": 5.269179004037685e-06, "loss": 0.4604, "step": 13690 }, { "epoch": 2.24, "learning_rate": 5.2579632122027825e-06, "loss": 0.4778, "step": 13700 }, { "epoch": 2.24, "learning_rate": 5.246747420367878e-06, "loss": 0.4726, "step": 13710 }, { "epoch": 2.24, "learning_rate": 5.235531628532975e-06, "loss": 0.4681, "step": 13720 }, { "epoch": 2.24, "learning_rate": 5.2243158366980716e-06, "loss": 0.4716, "step": 13730 }, { "epoch": 2.24, "learning_rate": 5.213100044863167e-06, "loss": 0.4725, "step": 13740 }, { "epoch": 2.24, "learning_rate": 5.201884253028264e-06, "loss": 0.4678, "step": 13750 }, { "epoch": 2.25, "learning_rate": 5.1906684611933615e-06, "loss": 0.4672, "step": 13760 }, { "epoch": 2.25, "learning_rate": 5.179452669358457e-06, "loss": 0.4768, "step": 13770 }, { "epoch": 2.25, "learning_rate": 5.168236877523553e-06, "loss": 0.4675, "step": 13780 }, { "epoch": 2.25, "learning_rate": 5.1570210856886506e-06, "loss": 0.4757, "step": 13790 }, { "epoch": 2.25, "learning_rate": 5.145805293853746e-06, "loss": 0.472, "step": 13800 }, { "epoch": 2.25, "learning_rate": 5.134589502018843e-06, "loss": 0.4828, "step": 13810 }, { "epoch": 2.26, "learning_rate": 5.12337371018394e-06, "loss": 0.4626, "step": 13820 }, { "epoch": 2.26, "learning_rate": 5.112157918349035e-06, "loss": 0.4668, "step": 13830 }, { "epoch": 2.26, "learning_rate": 5.100942126514132e-06, "loss": 0.4738, "step": 13840 }, { "epoch": 2.26, "learning_rate": 5.0897263346792296e-06, "loss": 0.4797, "step": 13850 }, { "epoch": 2.26, "learning_rate": 5.078510542844325e-06, "loss": 0.4635, "step": 13860 }, { "epoch": 2.26, "learning_rate": 5.067294751009421e-06, "loss": 0.482, "step": 13870 }, { "epoch": 2.27, "learning_rate": 5.056078959174519e-06, "loss": 0.4805, "step": 13880 }, { "epoch": 2.27, "learning_rate": 5.044863167339614e-06, "loss": 0.4657, "step": 13890 }, { "epoch": 2.27, "learning_rate": 5.033647375504711e-06, "loss": 0.4603, "step": 13900 }, { "epoch": 2.27, "learning_rate": 5.022431583669808e-06, "loss": 0.46, "step": 13910 }, { "epoch": 2.27, "learning_rate": 5.011215791834903e-06, "loss": 0.4665, "step": 13920 }, { "epoch": 2.27, "learning_rate": 5e-06, "loss": 0.467, "step": 13930 }, { "epoch": 2.27, "learning_rate": 4.988784208165097e-06, "loss": 0.4726, "step": 13940 }, { "epoch": 2.28, "learning_rate": 4.977568416330193e-06, "loss": 0.4608, "step": 13950 }, { "epoch": 2.28, "learning_rate": 4.9663526244952895e-06, "loss": 0.4735, "step": 13960 }, { "epoch": 2.28, "learning_rate": 4.955136832660386e-06, "loss": 0.4627, "step": 13970 }, { "epoch": 2.28, "learning_rate": 4.943921040825483e-06, "loss": 0.4797, "step": 13980 }, { "epoch": 2.28, "learning_rate": 4.932705248990579e-06, "loss": 0.4708, "step": 13990 }, { "epoch": 2.28, "learning_rate": 4.921489457155676e-06, "loss": 0.4673, "step": 14000 }, { "epoch": 2.29, "learning_rate": 4.910273665320772e-06, "loss": 0.4703, "step": 14010 }, { "epoch": 2.29, "learning_rate": 4.8990578734858685e-06, "loss": 0.4644, "step": 14020 }, { "epoch": 2.29, "learning_rate": 4.887842081650965e-06, "loss": 0.4712, "step": 14030 }, { "epoch": 2.29, "learning_rate": 4.876626289816061e-06, "loss": 0.4643, "step": 14040 }, { "epoch": 2.29, "learning_rate": 4.8654104979811575e-06, "loss": 0.4795, "step": 14050 }, { "epoch": 2.29, "learning_rate": 4.854194706146254e-06, "loss": 0.4697, "step": 14060 }, { "epoch": 2.3, "learning_rate": 4.842978914311351e-06, "loss": 0.4651, "step": 14070 }, { "epoch": 2.3, "learning_rate": 4.8317631224764475e-06, "loss": 0.4607, "step": 14080 }, { "epoch": 2.3, "learning_rate": 4.820547330641544e-06, "loss": 0.4746, "step": 14090 }, { "epoch": 2.3, "learning_rate": 4.80933153880664e-06, "loss": 0.4804, "step": 14100 }, { "epoch": 2.3, "learning_rate": 4.7981157469717365e-06, "loss": 0.4538, "step": 14110 }, { "epoch": 2.3, "learning_rate": 4.786899955136833e-06, "loss": 0.4744, "step": 14120 }, { "epoch": 2.31, "learning_rate": 4.775684163301929e-06, "loss": 0.4747, "step": 14130 }, { "epoch": 2.31, "learning_rate": 4.764468371467026e-06, "loss": 0.4603, "step": 14140 }, { "epoch": 2.31, "learning_rate": 4.753252579632122e-06, "loss": 0.4742, "step": 14150 }, { "epoch": 2.31, "learning_rate": 4.742036787797219e-06, "loss": 0.4678, "step": 14160 }, { "epoch": 2.31, "learning_rate": 4.7308209959623155e-06, "loss": 0.4707, "step": 14170 }, { "epoch": 2.31, "learning_rate": 4.719605204127412e-06, "loss": 0.4724, "step": 14180 }, { "epoch": 2.32, "learning_rate": 4.708389412292508e-06, "loss": 0.4657, "step": 14190 }, { "epoch": 2.32, "learning_rate": 4.697173620457605e-06, "loss": 0.4748, "step": 14200 }, { "epoch": 2.32, "learning_rate": 4.685957828622701e-06, "loss": 0.4663, "step": 14210 }, { "epoch": 2.32, "learning_rate": 4.674742036787797e-06, "loss": 0.4656, "step": 14220 }, { "epoch": 2.32, "learning_rate": 4.663526244952894e-06, "loss": 0.4611, "step": 14230 }, { "epoch": 2.32, "learning_rate": 4.652310453117991e-06, "loss": 0.4658, "step": 14240 }, { "epoch": 2.33, "learning_rate": 4.641094661283087e-06, "loss": 0.4741, "step": 14250 }, { "epoch": 2.33, "learning_rate": 4.629878869448184e-06, "loss": 0.4728, "step": 14260 }, { "epoch": 2.33, "learning_rate": 4.61866307761328e-06, "loss": 0.4627, "step": 14270 }, { "epoch": 2.33, "learning_rate": 4.607447285778376e-06, "loss": 0.4702, "step": 14280 }, { "epoch": 2.33, "learning_rate": 4.596231493943473e-06, "loss": 0.4744, "step": 14290 }, { "epoch": 2.33, "learning_rate": 4.58501570210857e-06, "loss": 0.4606, "step": 14300 }, { "epoch": 2.34, "learning_rate": 4.573799910273665e-06, "loss": 0.467, "step": 14310 }, { "epoch": 2.34, "learning_rate": 4.562584118438762e-06, "loss": 0.4593, "step": 14320 }, { "epoch": 2.34, "learning_rate": 4.551368326603859e-06, "loss": 0.4724, "step": 14330 }, { "epoch": 2.34, "learning_rate": 4.540152534768955e-06, "loss": 0.4659, "step": 14340 }, { "epoch": 2.34, "learning_rate": 4.528936742934052e-06, "loss": 0.4653, "step": 14350 }, { "epoch": 2.34, "learning_rate": 4.517720951099148e-06, "loss": 0.4571, "step": 14360 }, { "epoch": 2.34, "learning_rate": 4.506505159264244e-06, "loss": 0.4711, "step": 14370 }, { "epoch": 2.35, "learning_rate": 4.495289367429341e-06, "loss": 0.4657, "step": 14380 }, { "epoch": 2.35, "learning_rate": 4.484073575594438e-06, "loss": 0.4671, "step": 14390 }, { "epoch": 2.35, "learning_rate": 4.4728577837595334e-06, "loss": 0.4677, "step": 14400 }, { "epoch": 2.35, "learning_rate": 4.46164199192463e-06, "loss": 0.4751, "step": 14410 }, { "epoch": 2.35, "learning_rate": 4.450426200089727e-06, "loss": 0.4614, "step": 14420 }, { "epoch": 2.35, "learning_rate": 4.439210408254823e-06, "loss": 0.4771, "step": 14430 }, { "epoch": 2.36, "learning_rate": 4.42799461641992e-06, "loss": 0.4573, "step": 14440 }, { "epoch": 2.36, "learning_rate": 4.416778824585016e-06, "loss": 0.4673, "step": 14450 }, { "epoch": 2.36, "learning_rate": 4.4055630327501124e-06, "loss": 0.4746, "step": 14460 }, { "epoch": 2.36, "learning_rate": 4.394347240915209e-06, "loss": 0.4792, "step": 14470 }, { "epoch": 2.36, "learning_rate": 4.383131449080306e-06, "loss": 0.4728, "step": 14480 }, { "epoch": 2.36, "learning_rate": 4.3719156572454015e-06, "loss": 0.4674, "step": 14490 }, { "epoch": 2.37, "learning_rate": 4.360699865410498e-06, "loss": 0.4759, "step": 14500 }, { "epoch": 2.37, "learning_rate": 4.349484073575595e-06, "loss": 0.478, "step": 14510 }, { "epoch": 2.37, "learning_rate": 4.338268281740691e-06, "loss": 0.464, "step": 14520 }, { "epoch": 2.37, "learning_rate": 4.327052489905788e-06, "loss": 0.4801, "step": 14530 }, { "epoch": 2.37, "learning_rate": 4.315836698070884e-06, "loss": 0.4704, "step": 14540 }, { "epoch": 2.37, "learning_rate": 4.3046209062359805e-06, "loss": 0.4778, "step": 14550 }, { "epoch": 2.38, "learning_rate": 4.293405114401077e-06, "loss": 0.4783, "step": 14560 }, { "epoch": 2.38, "learning_rate": 4.282189322566174e-06, "loss": 0.4821, "step": 14570 }, { "epoch": 2.38, "learning_rate": 4.2709735307312696e-06, "loss": 0.4607, "step": 14580 }, { "epoch": 2.38, "learning_rate": 4.259757738896366e-06, "loss": 0.4752, "step": 14590 }, { "epoch": 2.38, "learning_rate": 4.248541947061463e-06, "loss": 0.4672, "step": 14600 }, { "epoch": 2.38, "learning_rate": 4.2373261552265595e-06, "loss": 0.466, "step": 14610 }, { "epoch": 2.39, "learning_rate": 4.226110363391656e-06, "loss": 0.4711, "step": 14620 }, { "epoch": 2.39, "learning_rate": 4.214894571556752e-06, "loss": 0.4813, "step": 14630 }, { "epoch": 2.39, "learning_rate": 4.2036787797218485e-06, "loss": 0.4731, "step": 14640 }, { "epoch": 2.39, "learning_rate": 4.192462987886945e-06, "loss": 0.4656, "step": 14650 }, { "epoch": 2.39, "learning_rate": 4.181247196052042e-06, "loss": 0.4605, "step": 14660 }, { "epoch": 2.39, "learning_rate": 4.170031404217138e-06, "loss": 0.4535, "step": 14670 }, { "epoch": 2.4, "learning_rate": 4.158815612382234e-06, "loss": 0.4601, "step": 14680 }, { "epoch": 2.4, "learning_rate": 4.147599820547331e-06, "loss": 0.4617, "step": 14690 }, { "epoch": 2.4, "learning_rate": 4.1363840287124275e-06, "loss": 0.4691, "step": 14700 }, { "epoch": 2.4, "learning_rate": 4.125168236877524e-06, "loss": 0.4707, "step": 14710 }, { "epoch": 2.4, "learning_rate": 4.11395244504262e-06, "loss": 0.4678, "step": 14720 }, { "epoch": 2.4, "learning_rate": 4.102736653207717e-06, "loss": 0.468, "step": 14730 }, { "epoch": 2.41, "learning_rate": 4.091520861372813e-06, "loss": 0.4675, "step": 14740 }, { "epoch": 2.41, "learning_rate": 4.08030506953791e-06, "loss": 0.4746, "step": 14750 }, { "epoch": 2.41, "learning_rate": 4.069089277703006e-06, "loss": 0.4729, "step": 14760 }, { "epoch": 2.41, "learning_rate": 4.057873485868102e-06, "loss": 0.4681, "step": 14770 }, { "epoch": 2.41, "learning_rate": 4.046657694033199e-06, "loss": 0.4726, "step": 14780 }, { "epoch": 2.41, "learning_rate": 4.035441902198296e-06, "loss": 0.472, "step": 14790 }, { "epoch": 2.42, "learning_rate": 4.024226110363392e-06, "loss": 0.4532, "step": 14800 }, { "epoch": 2.42, "learning_rate": 4.013010318528488e-06, "loss": 0.475, "step": 14810 }, { "epoch": 2.42, "learning_rate": 4.001794526693585e-06, "loss": 0.4659, "step": 14820 }, { "epoch": 2.42, "learning_rate": 3.990578734858681e-06, "loss": 0.4747, "step": 14830 }, { "epoch": 2.42, "learning_rate": 3.979362943023778e-06, "loss": 0.4697, "step": 14840 }, { "epoch": 2.42, "learning_rate": 3.968147151188875e-06, "loss": 0.4709, "step": 14850 }, { "epoch": 2.42, "learning_rate": 3.956931359353971e-06, "loss": 0.4738, "step": 14860 }, { "epoch": 2.43, "learning_rate": 3.945715567519067e-06, "loss": 0.4628, "step": 14870 }, { "epoch": 2.43, "learning_rate": 3.934499775684164e-06, "loss": 0.4695, "step": 14880 }, { "epoch": 2.43, "learning_rate": 3.92328398384926e-06, "loss": 0.4789, "step": 14890 }, { "epoch": 2.43, "learning_rate": 3.912068192014356e-06, "loss": 0.462, "step": 14900 }, { "epoch": 2.43, "learning_rate": 3.900852400179453e-06, "loss": 0.4543, "step": 14910 }, { "epoch": 2.43, "learning_rate": 3.88963660834455e-06, "loss": 0.4577, "step": 14920 }, { "epoch": 2.44, "learning_rate": 3.878420816509646e-06, "loss": 0.4619, "step": 14930 }, { "epoch": 2.44, "learning_rate": 3.867205024674743e-06, "loss": 0.4723, "step": 14940 }, { "epoch": 2.44, "learning_rate": 3.855989232839839e-06, "loss": 0.4687, "step": 14950 }, { "epoch": 2.44, "learning_rate": 3.844773441004935e-06, "loss": 0.4745, "step": 14960 }, { "epoch": 2.44, "learning_rate": 3.833557649170032e-06, "loss": 0.4795, "step": 14970 }, { "epoch": 2.44, "learning_rate": 3.822341857335128e-06, "loss": 0.4591, "step": 14980 }, { "epoch": 2.45, "learning_rate": 3.811126065500225e-06, "loss": 0.4607, "step": 14990 }, { "epoch": 2.45, "learning_rate": 3.799910273665321e-06, "loss": 0.4693, "step": 15000 }, { "epoch": 2.45, "learning_rate": 3.7886944818304176e-06, "loss": 0.4798, "step": 15010 }, { "epoch": 2.45, "learning_rate": 3.777478689995514e-06, "loss": 0.4655, "step": 15020 }, { "epoch": 2.45, "learning_rate": 3.7662628981606103e-06, "loss": 0.4543, "step": 15030 }, { "epoch": 2.45, "learning_rate": 3.755047106325707e-06, "loss": 0.4617, "step": 15040 }, { "epoch": 2.46, "learning_rate": 3.7438313144908034e-06, "loss": 0.4731, "step": 15050 }, { "epoch": 2.46, "learning_rate": 3.7337371018393903e-06, "loss": 0.4608, "step": 15060 }, { "epoch": 2.46, "learning_rate": 3.7225213100044867e-06, "loss": 0.4767, "step": 15070 }, { "epoch": 2.46, "learning_rate": 3.711305518169583e-06, "loss": 0.4679, "step": 15080 }, { "epoch": 2.46, "learning_rate": 3.70008972633468e-06, "loss": 0.4646, "step": 15090 }, { "epoch": 2.46, "learning_rate": 3.6888739344997758e-06, "loss": 0.4713, "step": 15100 }, { "epoch": 2.47, "learning_rate": 3.677658142664872e-06, "loss": 0.4628, "step": 15110 }, { "epoch": 2.47, "learning_rate": 3.666442350829969e-06, "loss": 0.4609, "step": 15120 }, { "epoch": 2.47, "learning_rate": 3.6552265589950652e-06, "loss": 0.4664, "step": 15130 }, { "epoch": 2.47, "learning_rate": 3.6440107671601616e-06, "loss": 0.4698, "step": 15140 }, { "epoch": 2.47, "learning_rate": 3.6327949753252584e-06, "loss": 0.4703, "step": 15150 }, { "epoch": 2.47, "learning_rate": 3.6215791834903547e-06, "loss": 0.4652, "step": 15160 }, { "epoch": 2.48, "learning_rate": 3.610363391655451e-06, "loss": 0.4676, "step": 15170 }, { "epoch": 2.48, "learning_rate": 3.599147599820548e-06, "loss": 0.4586, "step": 15180 }, { "epoch": 2.48, "learning_rate": 3.587931807985644e-06, "loss": 0.4713, "step": 15190 }, { "epoch": 2.48, "learning_rate": 3.57671601615074e-06, "loss": 0.4608, "step": 15200 }, { "epoch": 2.48, "learning_rate": 3.565500224315837e-06, "loss": 0.4606, "step": 15210 }, { "epoch": 2.48, "learning_rate": 3.5542844324809333e-06, "loss": 0.4805, "step": 15220 }, { "epoch": 2.49, "learning_rate": 3.5430686406460297e-06, "loss": 0.4572, "step": 15230 }, { "epoch": 2.49, "learning_rate": 3.5318528488111264e-06, "loss": 0.4591, "step": 15240 }, { "epoch": 2.49, "learning_rate": 3.520637056976223e-06, "loss": 0.4635, "step": 15250 }, { "epoch": 2.49, "learning_rate": 3.5094212651413196e-06, "loss": 0.4734, "step": 15260 }, { "epoch": 2.49, "learning_rate": 3.498205473306416e-06, "loss": 0.4624, "step": 15270 }, { "epoch": 2.49, "learning_rate": 3.486989681471512e-06, "loss": 0.4719, "step": 15280 }, { "epoch": 2.5, "learning_rate": 3.475773889636609e-06, "loss": 0.4617, "step": 15290 }, { "epoch": 2.5, "learning_rate": 3.464558097801705e-06, "loss": 0.4742, "step": 15300 }, { "epoch": 2.5, "learning_rate": 3.4533423059668014e-06, "loss": 0.47, "step": 15310 }, { "epoch": 2.5, "learning_rate": 3.442126514131898e-06, "loss": 0.4725, "step": 15320 }, { "epoch": 2.5, "learning_rate": 3.4309107222969945e-06, "loss": 0.4618, "step": 15330 }, { "epoch": 2.5, "learning_rate": 3.419694930462091e-06, "loss": 0.4651, "step": 15340 }, { "epoch": 2.5, "learning_rate": 3.4084791386271876e-06, "loss": 0.4662, "step": 15350 }, { "epoch": 2.51, "learning_rate": 3.397263346792284e-06, "loss": 0.4773, "step": 15360 }, { "epoch": 2.51, "learning_rate": 3.38604755495738e-06, "loss": 0.457, "step": 15370 }, { "epoch": 2.51, "learning_rate": 3.374831763122477e-06, "loss": 0.4621, "step": 15380 }, { "epoch": 2.51, "learning_rate": 3.363615971287573e-06, "loss": 0.472, "step": 15390 }, { "epoch": 2.51, "learning_rate": 3.3524001794526694e-06, "loss": 0.4784, "step": 15400 }, { "epoch": 2.51, "learning_rate": 3.3411843876177662e-06, "loss": 0.4743, "step": 15410 }, { "epoch": 2.52, "learning_rate": 3.3299685957828626e-06, "loss": 0.4677, "step": 15420 }, { "epoch": 2.52, "learning_rate": 3.318752803947959e-06, "loss": 0.4707, "step": 15430 }, { "epoch": 2.52, "learning_rate": 3.3075370121130557e-06, "loss": 0.4629, "step": 15440 }, { "epoch": 2.52, "learning_rate": 3.296321220278152e-06, "loss": 0.4771, "step": 15450 }, { "epoch": 2.52, "learning_rate": 3.285105428443248e-06, "loss": 0.4681, "step": 15460 }, { "epoch": 2.52, "learning_rate": 3.273889636608345e-06, "loss": 0.4653, "step": 15470 }, { "epoch": 2.53, "learning_rate": 3.262673844773441e-06, "loss": 0.459, "step": 15480 }, { "epoch": 2.53, "learning_rate": 3.2514580529385375e-06, "loss": 0.471, "step": 15490 }, { "epoch": 2.53, "learning_rate": 3.2402422611036343e-06, "loss": 0.464, "step": 15500 }, { "epoch": 2.53, "learning_rate": 3.2290264692687306e-06, "loss": 0.4774, "step": 15510 }, { "epoch": 2.53, "learning_rate": 3.217810677433827e-06, "loss": 0.4736, "step": 15520 }, { "epoch": 2.53, "learning_rate": 3.2065948855989238e-06, "loss": 0.4702, "step": 15530 }, { "epoch": 2.54, "learning_rate": 3.19537909376402e-06, "loss": 0.4598, "step": 15540 }, { "epoch": 2.54, "learning_rate": 3.184163301929116e-06, "loss": 0.4565, "step": 15550 }, { "epoch": 2.54, "learning_rate": 3.1729475100942133e-06, "loss": 0.4617, "step": 15560 }, { "epoch": 2.54, "learning_rate": 3.161731718259309e-06, "loss": 0.4555, "step": 15570 }, { "epoch": 2.54, "learning_rate": 3.1505159264244056e-06, "loss": 0.4594, "step": 15580 }, { "epoch": 2.54, "learning_rate": 3.1393001345895023e-06, "loss": 0.4625, "step": 15590 }, { "epoch": 2.55, "learning_rate": 3.1280843427545987e-06, "loss": 0.4703, "step": 15600 }, { "epoch": 2.55, "learning_rate": 3.116868550919695e-06, "loss": 0.4745, "step": 15610 }, { "epoch": 2.55, "learning_rate": 3.105652759084792e-06, "loss": 0.477, "step": 15620 }, { "epoch": 2.55, "learning_rate": 3.094436967249888e-06, "loss": 0.4619, "step": 15630 }, { "epoch": 2.55, "learning_rate": 3.0832211754149845e-06, "loss": 0.4658, "step": 15640 }, { "epoch": 2.55, "learning_rate": 3.0720053835800813e-06, "loss": 0.4684, "step": 15650 }, { "epoch": 2.56, "learning_rate": 3.0607895917451773e-06, "loss": 0.461, "step": 15660 }, { "epoch": 2.56, "learning_rate": 3.0495737999102736e-06, "loss": 0.4716, "step": 15670 }, { "epoch": 2.56, "learning_rate": 3.0383580080753704e-06, "loss": 0.4632, "step": 15680 }, { "epoch": 2.56, "learning_rate": 3.0271422162404668e-06, "loss": 0.4726, "step": 15690 }, { "epoch": 2.56, "learning_rate": 3.015926424405563e-06, "loss": 0.4733, "step": 15700 }, { "epoch": 2.56, "learning_rate": 3.00471063257066e-06, "loss": 0.4668, "step": 15710 }, { "epoch": 2.57, "learning_rate": 2.9934948407357563e-06, "loss": 0.466, "step": 15720 }, { "epoch": 2.57, "learning_rate": 2.9822790489008526e-06, "loss": 0.4708, "step": 15730 }, { "epoch": 2.57, "learning_rate": 2.9710632570659494e-06, "loss": 0.4719, "step": 15740 }, { "epoch": 2.57, "learning_rate": 2.9598474652310453e-06, "loss": 0.4757, "step": 15750 }, { "epoch": 2.57, "learning_rate": 2.9486316733961417e-06, "loss": 0.4775, "step": 15760 }, { "epoch": 2.57, "learning_rate": 2.9374158815612385e-06, "loss": 0.4569, "step": 15770 }, { "epoch": 2.58, "learning_rate": 2.926200089726335e-06, "loss": 0.4758, "step": 15780 }, { "epoch": 2.58, "learning_rate": 2.914984297891431e-06, "loss": 0.4654, "step": 15790 }, { "epoch": 2.58, "learning_rate": 2.903768506056528e-06, "loss": 0.4641, "step": 15800 }, { "epoch": 2.58, "learning_rate": 2.8925527142216243e-06, "loss": 0.4675, "step": 15810 }, { "epoch": 2.58, "learning_rate": 2.8813369223867207e-06, "loss": 0.4771, "step": 15820 }, { "epoch": 2.58, "learning_rate": 2.8701211305518175e-06, "loss": 0.4653, "step": 15830 }, { "epoch": 2.58, "learning_rate": 2.8589053387169134e-06, "loss": 0.4646, "step": 15840 }, { "epoch": 2.59, "learning_rate": 2.8476895468820097e-06, "loss": 0.4671, "step": 15850 }, { "epoch": 2.59, "learning_rate": 2.8364737550471065e-06, "loss": 0.4733, "step": 15860 }, { "epoch": 2.59, "learning_rate": 2.825257963212203e-06, "loss": 0.4629, "step": 15870 }, { "epoch": 2.59, "learning_rate": 2.8140421713772997e-06, "loss": 0.4803, "step": 15880 }, { "epoch": 2.59, "learning_rate": 2.802826379542396e-06, "loss": 0.465, "step": 15890 }, { "epoch": 2.59, "learning_rate": 2.7916105877074924e-06, "loss": 0.466, "step": 15900 }, { "epoch": 2.6, "learning_rate": 2.780394795872589e-06, "loss": 0.4667, "step": 15910 }, { "epoch": 2.6, "learning_rate": 2.7691790040376855e-06, "loss": 0.4703, "step": 15920 }, { "epoch": 2.6, "learning_rate": 2.7579632122027814e-06, "loss": 0.4772, "step": 15930 }, { "epoch": 2.6, "learning_rate": 2.7467474203678787e-06, "loss": 0.4728, "step": 15940 }, { "epoch": 2.6, "learning_rate": 2.7355316285329746e-06, "loss": 0.4724, "step": 15950 }, { "epoch": 2.6, "learning_rate": 2.724315836698071e-06, "loss": 0.4666, "step": 15960 }, { "epoch": 2.61, "learning_rate": 2.7131000448631677e-06, "loss": 0.4648, "step": 15970 }, { "epoch": 2.61, "learning_rate": 2.701884253028264e-06, "loss": 0.4647, "step": 15980 }, { "epoch": 2.61, "learning_rate": 2.6906684611933604e-06, "loss": 0.4679, "step": 15990 }, { "epoch": 2.61, "learning_rate": 2.6794526693584572e-06, "loss": 0.4604, "step": 16000 }, { "epoch": 2.61, "learning_rate": 2.6682368775235536e-06, "loss": 0.4632, "step": 16010 }, { "epoch": 2.61, "learning_rate": 2.6570210856886495e-06, "loss": 0.4593, "step": 16020 }, { "epoch": 2.62, "learning_rate": 2.6458052938537467e-06, "loss": 0.4646, "step": 16030 }, { "epoch": 2.62, "learning_rate": 2.6345895020188426e-06, "loss": 0.459, "step": 16040 }, { "epoch": 2.62, "learning_rate": 2.623373710183939e-06, "loss": 0.4707, "step": 16050 }, { "epoch": 2.62, "learning_rate": 2.6121579183490358e-06, "loss": 0.462, "step": 16060 }, { "epoch": 2.62, "learning_rate": 2.600942126514132e-06, "loss": 0.4696, "step": 16070 }, { "epoch": 2.62, "learning_rate": 2.5897263346792285e-06, "loss": 0.4678, "step": 16080 }, { "epoch": 2.63, "learning_rate": 2.5785105428443253e-06, "loss": 0.4669, "step": 16090 }, { "epoch": 2.63, "learning_rate": 2.5672947510094216e-06, "loss": 0.4619, "step": 16100 }, { "epoch": 2.63, "learning_rate": 2.5560789591745176e-06, "loss": 0.4588, "step": 16110 }, { "epoch": 2.63, "learning_rate": 2.5448631673396148e-06, "loss": 0.4608, "step": 16120 }, { "epoch": 2.63, "learning_rate": 2.5336473755047107e-06, "loss": 0.4564, "step": 16130 }, { "epoch": 2.63, "learning_rate": 2.522431583669807e-06, "loss": 0.4682, "step": 16140 }, { "epoch": 2.64, "learning_rate": 2.511215791834904e-06, "loss": 0.4623, "step": 16150 }, { "epoch": 2.64, "learning_rate": 2.5e-06, "loss": 0.4652, "step": 16160 }, { "epoch": 2.64, "learning_rate": 2.4887842081650966e-06, "loss": 0.4679, "step": 16170 }, { "epoch": 2.64, "learning_rate": 2.477568416330193e-06, "loss": 0.4719, "step": 16180 }, { "epoch": 2.64, "learning_rate": 2.4663526244952897e-06, "loss": 0.4835, "step": 16190 }, { "epoch": 2.64, "learning_rate": 2.455136832660386e-06, "loss": 0.4721, "step": 16200 }, { "epoch": 2.65, "learning_rate": 2.4439210408254824e-06, "loss": 0.4616, "step": 16210 }, { "epoch": 2.65, "learning_rate": 2.4327052489905788e-06, "loss": 0.4592, "step": 16220 }, { "epoch": 2.65, "learning_rate": 2.4214894571556756e-06, "loss": 0.4607, "step": 16230 }, { "epoch": 2.65, "learning_rate": 2.410273665320772e-06, "loss": 0.4716, "step": 16240 }, { "epoch": 2.65, "learning_rate": 2.3990578734858683e-06, "loss": 0.4689, "step": 16250 }, { "epoch": 2.65, "learning_rate": 2.3878420816509646e-06, "loss": 0.4712, "step": 16260 }, { "epoch": 2.66, "learning_rate": 2.376626289816061e-06, "loss": 0.4771, "step": 16270 }, { "epoch": 2.66, "learning_rate": 2.3654104979811578e-06, "loss": 0.4687, "step": 16280 }, { "epoch": 2.66, "learning_rate": 2.354194706146254e-06, "loss": 0.4683, "step": 16290 }, { "epoch": 2.66, "learning_rate": 2.3429789143113505e-06, "loss": 0.4672, "step": 16300 }, { "epoch": 2.66, "learning_rate": 2.331763122476447e-06, "loss": 0.4772, "step": 16310 }, { "epoch": 2.66, "learning_rate": 2.3205473306415436e-06, "loss": 0.4696, "step": 16320 }, { "epoch": 2.66, "learning_rate": 2.30933153880664e-06, "loss": 0.4691, "step": 16330 }, { "epoch": 2.67, "learning_rate": 2.2981157469717363e-06, "loss": 0.4646, "step": 16340 }, { "epoch": 2.67, "learning_rate": 2.2868999551368327e-06, "loss": 0.4736, "step": 16350 }, { "epoch": 2.67, "learning_rate": 2.2756841633019295e-06, "loss": 0.4831, "step": 16360 }, { "epoch": 2.67, "learning_rate": 2.264468371467026e-06, "loss": 0.4555, "step": 16370 }, { "epoch": 2.67, "learning_rate": 2.253252579632122e-06, "loss": 0.4751, "step": 16380 }, { "epoch": 2.67, "learning_rate": 2.242036787797219e-06, "loss": 0.472, "step": 16390 }, { "epoch": 2.68, "learning_rate": 2.230820995962315e-06, "loss": 0.4743, "step": 16400 }, { "epoch": 2.68, "learning_rate": 2.2196052041274117e-06, "loss": 0.4657, "step": 16410 }, { "epoch": 2.68, "learning_rate": 2.208389412292508e-06, "loss": 0.459, "step": 16420 }, { "epoch": 2.68, "learning_rate": 2.1971736204576044e-06, "loss": 0.4586, "step": 16430 }, { "epoch": 2.68, "learning_rate": 2.1859578286227007e-06, "loss": 0.4627, "step": 16440 }, { "epoch": 2.68, "learning_rate": 2.1747420367877975e-06, "loss": 0.4665, "step": 16450 }, { "epoch": 2.69, "learning_rate": 2.163526244952894e-06, "loss": 0.4625, "step": 16460 }, { "epoch": 2.69, "learning_rate": 2.1523104531179902e-06, "loss": 0.4576, "step": 16470 }, { "epoch": 2.69, "learning_rate": 2.141094661283087e-06, "loss": 0.465, "step": 16480 }, { "epoch": 2.69, "learning_rate": 2.129878869448183e-06, "loss": 0.4622, "step": 16490 }, { "epoch": 2.69, "learning_rate": 2.1186630776132797e-06, "loss": 0.4624, "step": 16500 }, { "epoch": 2.69, "learning_rate": 2.107447285778376e-06, "loss": 0.4644, "step": 16510 }, { "epoch": 2.7, "learning_rate": 2.0962314939434725e-06, "loss": 0.472, "step": 16520 }, { "epoch": 2.7, "learning_rate": 2.085015702108569e-06, "loss": 0.4694, "step": 16530 }, { "epoch": 2.7, "learning_rate": 2.0737999102736656e-06, "loss": 0.4766, "step": 16540 }, { "epoch": 2.7, "learning_rate": 2.062584118438762e-06, "loss": 0.467, "step": 16550 }, { "epoch": 2.7, "learning_rate": 2.0513683266038583e-06, "loss": 0.4701, "step": 16560 }, { "epoch": 2.7, "learning_rate": 2.040152534768955e-06, "loss": 0.4611, "step": 16570 }, { "epoch": 2.71, "learning_rate": 2.028936742934051e-06, "loss": 0.4631, "step": 16580 }, { "epoch": 2.71, "learning_rate": 2.017720951099148e-06, "loss": 0.4672, "step": 16590 }, { "epoch": 2.71, "learning_rate": 2.006505159264244e-06, "loss": 0.4649, "step": 16600 }, { "epoch": 2.71, "learning_rate": 1.9952893674293405e-06, "loss": 0.4703, "step": 16610 }, { "epoch": 2.71, "learning_rate": 1.9840735755944373e-06, "loss": 0.4685, "step": 16620 }, { "epoch": 2.71, "learning_rate": 1.9728577837595337e-06, "loss": 0.4678, "step": 16630 }, { "epoch": 2.72, "learning_rate": 1.96164199192463e-06, "loss": 0.4651, "step": 16640 }, { "epoch": 2.72, "learning_rate": 1.9504262000897264e-06, "loss": 0.4585, "step": 16650 }, { "epoch": 2.72, "learning_rate": 1.939210408254823e-06, "loss": 0.4764, "step": 16660 }, { "epoch": 2.72, "learning_rate": 1.9279946164199195e-06, "loss": 0.4656, "step": 16670 }, { "epoch": 2.72, "learning_rate": 1.916778824585016e-06, "loss": 0.4627, "step": 16680 }, { "epoch": 2.72, "learning_rate": 1.9055630327501124e-06, "loss": 0.4646, "step": 16690 }, { "epoch": 2.73, "learning_rate": 1.8943472409152088e-06, "loss": 0.4678, "step": 16700 }, { "epoch": 2.73, "learning_rate": 1.8831314490803051e-06, "loss": 0.4778, "step": 16710 }, { "epoch": 2.73, "learning_rate": 1.8719156572454017e-06, "loss": 0.4632, "step": 16720 }, { "epoch": 2.73, "learning_rate": 1.8606998654104983e-06, "loss": 0.474, "step": 16730 }, { "epoch": 2.73, "learning_rate": 1.8494840735755944e-06, "loss": 0.4667, "step": 16740 }, { "epoch": 2.73, "learning_rate": 1.838268281740691e-06, "loss": 0.4698, "step": 16750 }, { "epoch": 2.73, "learning_rate": 1.8270524899057876e-06, "loss": 0.4575, "step": 16760 }, { "epoch": 2.74, "learning_rate": 1.815836698070884e-06, "loss": 0.4755, "step": 16770 }, { "epoch": 2.74, "learning_rate": 1.8046209062359805e-06, "loss": 0.4809, "step": 16780 }, { "epoch": 2.74, "learning_rate": 1.7934051144010769e-06, "loss": 0.4655, "step": 16790 }, { "epoch": 2.74, "learning_rate": 1.7821893225661732e-06, "loss": 0.4685, "step": 16800 }, { "epoch": 2.74, "learning_rate": 1.7709735307312698e-06, "loss": 0.4659, "step": 16810 }, { "epoch": 2.74, "learning_rate": 1.7597577388963663e-06, "loss": 0.4676, "step": 16820 }, { "epoch": 2.75, "learning_rate": 1.7485419470614625e-06, "loss": 0.4582, "step": 16830 }, { "epoch": 2.75, "learning_rate": 1.737326155226559e-06, "loss": 0.4692, "step": 16840 }, { "epoch": 2.75, "learning_rate": 1.7261103633916556e-06, "loss": 0.4674, "step": 16850 }, { "epoch": 2.75, "learning_rate": 1.714894571556752e-06, "loss": 0.4644, "step": 16860 }, { "epoch": 2.75, "learning_rate": 1.7036787797218486e-06, "loss": 0.4727, "step": 16870 }, { "epoch": 2.75, "learning_rate": 1.6924629878869451e-06, "loss": 0.4748, "step": 16880 }, { "epoch": 2.76, "learning_rate": 1.6812471960520413e-06, "loss": 0.4653, "step": 16890 }, { "epoch": 2.76, "learning_rate": 1.6700314042171378e-06, "loss": 0.4676, "step": 16900 }, { "epoch": 2.76, "learning_rate": 1.6588156123822344e-06, "loss": 0.4725, "step": 16910 }, { "epoch": 2.76, "learning_rate": 1.6475998205473306e-06, "loss": 0.4715, "step": 16920 }, { "epoch": 2.76, "learning_rate": 1.6363840287124271e-06, "loss": 0.4602, "step": 16930 }, { "epoch": 2.76, "learning_rate": 1.6251682368775237e-06, "loss": 0.4657, "step": 16940 }, { "epoch": 2.77, "learning_rate": 1.6139524450426203e-06, "loss": 0.4735, "step": 16950 }, { "epoch": 2.77, "learning_rate": 1.6027366532077166e-06, "loss": 0.4625, "step": 16960 }, { "epoch": 2.77, "learning_rate": 1.5915208613728132e-06, "loss": 0.4575, "step": 16970 }, { "epoch": 2.77, "learning_rate": 1.5803050695379095e-06, "loss": 0.4619, "step": 16980 }, { "epoch": 2.77, "learning_rate": 1.569089277703006e-06, "loss": 0.4673, "step": 16990 }, { "epoch": 2.77, "learning_rate": 1.5578734858681025e-06, "loss": 0.4607, "step": 17000 }, { "epoch": 2.78, "learning_rate": 1.546657694033199e-06, "loss": 0.4702, "step": 17010 }, { "epoch": 2.78, "learning_rate": 1.5354419021982952e-06, "loss": 0.4622, "step": 17020 }, { "epoch": 2.78, "learning_rate": 1.5242261103633918e-06, "loss": 0.46, "step": 17030 }, { "epoch": 2.78, "learning_rate": 1.5130103185284883e-06, "loss": 0.4759, "step": 17040 }, { "epoch": 2.78, "learning_rate": 1.5017945266935847e-06, "loss": 0.4663, "step": 17050 }, { "epoch": 2.78, "learning_rate": 1.4905787348586812e-06, "loss": 0.4773, "step": 17060 }, { "epoch": 2.79, "learning_rate": 1.4793629430237776e-06, "loss": 0.4588, "step": 17070 }, { "epoch": 2.79, "learning_rate": 1.468147151188874e-06, "loss": 0.4705, "step": 17080 }, { "epoch": 2.79, "learning_rate": 1.4569313593539705e-06, "loss": 0.466, "step": 17090 }, { "epoch": 2.79, "learning_rate": 1.445715567519067e-06, "loss": 0.4704, "step": 17100 }, { "epoch": 2.79, "learning_rate": 1.4344997756841632e-06, "loss": 0.4631, "step": 17110 }, { "epoch": 2.79, "learning_rate": 1.4232839838492598e-06, "loss": 0.4658, "step": 17120 }, { "epoch": 2.8, "learning_rate": 1.4120681920143564e-06, "loss": 0.4629, "step": 17130 }, { "epoch": 2.8, "learning_rate": 1.4008524001794527e-06, "loss": 0.4614, "step": 17140 }, { "epoch": 2.8, "learning_rate": 1.3896366083445493e-06, "loss": 0.4742, "step": 17150 }, { "epoch": 2.8, "learning_rate": 1.3784208165096457e-06, "loss": 0.4547, "step": 17160 }, { "epoch": 2.8, "learning_rate": 1.367205024674742e-06, "loss": 0.4481, "step": 17170 }, { "epoch": 2.8, "learning_rate": 1.3559892328398386e-06, "loss": 0.4628, "step": 17180 }, { "epoch": 2.81, "learning_rate": 1.3447734410049352e-06, "loss": 0.47, "step": 17190 }, { "epoch": 2.81, "learning_rate": 1.3335576491700313e-06, "loss": 0.4543, "step": 17200 }, { "epoch": 2.81, "learning_rate": 1.3223418573351279e-06, "loss": 0.4646, "step": 17210 }, { "epoch": 2.81, "learning_rate": 1.3111260655002244e-06, "loss": 0.4705, "step": 17220 }, { "epoch": 2.81, "learning_rate": 1.2999102736653208e-06, "loss": 0.4649, "step": 17230 }, { "epoch": 2.81, "learning_rate": 1.2886944818304174e-06, "loss": 0.4637, "step": 17240 }, { "epoch": 2.81, "learning_rate": 1.2774786899955137e-06, "loss": 0.4602, "step": 17250 }, { "epoch": 2.82, "learning_rate": 1.2662628981606103e-06, "loss": 0.4743, "step": 17260 }, { "epoch": 2.82, "learning_rate": 1.2550471063257067e-06, "loss": 0.4649, "step": 17270 }, { "epoch": 2.82, "learning_rate": 1.2438313144908032e-06, "loss": 0.4541, "step": 17280 }, { "epoch": 2.82, "learning_rate": 1.2326155226558996e-06, "loss": 0.4619, "step": 17290 }, { "epoch": 2.82, "learning_rate": 1.221399730820996e-06, "loss": 0.4783, "step": 17300 }, { "epoch": 2.82, "learning_rate": 1.2101839389860925e-06, "loss": 0.451, "step": 17310 }, { "epoch": 2.83, "learning_rate": 1.1989681471511889e-06, "loss": 0.4639, "step": 17320 }, { "epoch": 2.83, "learning_rate": 1.1877523553162854e-06, "loss": 0.4599, "step": 17330 }, { "epoch": 2.83, "learning_rate": 1.176536563481382e-06, "loss": 0.4702, "step": 17340 }, { "epoch": 2.83, "learning_rate": 1.1653207716464784e-06, "loss": 0.4689, "step": 17350 }, { "epoch": 2.83, "learning_rate": 1.154104979811575e-06, "loss": 0.4683, "step": 17360 }, { "epoch": 2.83, "learning_rate": 1.1428891879766713e-06, "loss": 0.4672, "step": 17370 }, { "epoch": 2.84, "learning_rate": 1.1316733961417676e-06, "loss": 0.4728, "step": 17380 }, { "epoch": 2.84, "learning_rate": 1.1204576043068642e-06, "loss": 0.4736, "step": 17390 }, { "epoch": 2.84, "learning_rate": 1.1092418124719606e-06, "loss": 0.461, "step": 17400 }, { "epoch": 2.84, "learning_rate": 1.098026020637057e-06, "loss": 0.4564, "step": 17410 }, { "epoch": 2.84, "learning_rate": 1.0868102288021535e-06, "loss": 0.4573, "step": 17420 }, { "epoch": 2.84, "learning_rate": 1.07559443696725e-06, "loss": 0.4646, "step": 17430 }, { "epoch": 2.85, "learning_rate": 1.0643786451323464e-06, "loss": 0.4579, "step": 17440 }, { "epoch": 2.85, "learning_rate": 1.053162853297443e-06, "loss": 0.4557, "step": 17450 }, { "epoch": 2.85, "learning_rate": 1.0419470614625394e-06, "loss": 0.4654, "step": 17460 }, { "epoch": 2.85, "learning_rate": 1.0307312696276357e-06, "loss": 0.4673, "step": 17470 }, { "epoch": 2.85, "learning_rate": 1.0195154777927323e-06, "loss": 0.4602, "step": 17480 }, { "epoch": 2.85, "learning_rate": 1.0082996859578286e-06, "loss": 0.4536, "step": 17490 }, { "epoch": 2.86, "learning_rate": 9.970838941229252e-07, "loss": 0.4669, "step": 17500 }, { "epoch": 2.86, "learning_rate": 9.858681022880216e-07, "loss": 0.4633, "step": 17510 }, { "epoch": 2.86, "learning_rate": 9.746523104531181e-07, "loss": 0.468, "step": 17520 }, { "epoch": 2.86, "learning_rate": 9.634365186182145e-07, "loss": 0.4733, "step": 17530 }, { "epoch": 2.86, "learning_rate": 9.52220726783311e-07, "loss": 0.476, "step": 17540 }, { "epoch": 2.86, "learning_rate": 9.410049349484074e-07, "loss": 0.4666, "step": 17550 }, { "epoch": 2.87, "learning_rate": 9.29789143113504e-07, "loss": 0.4684, "step": 17560 }, { "epoch": 2.87, "learning_rate": 9.185733512786003e-07, "loss": 0.4673, "step": 17570 }, { "epoch": 2.87, "learning_rate": 9.073575594436967e-07, "loss": 0.4658, "step": 17580 }, { "epoch": 2.87, "learning_rate": 8.961417676087933e-07, "loss": 0.4626, "step": 17590 }, { "epoch": 2.87, "learning_rate": 8.849259757738897e-07, "loss": 0.4619, "step": 17600 }, { "epoch": 2.87, "learning_rate": 8.737101839389861e-07, "loss": 0.4563, "step": 17610 }, { "epoch": 2.88, "learning_rate": 8.624943921040827e-07, "loss": 0.4705, "step": 17620 }, { "epoch": 2.88, "learning_rate": 8.51278600269179e-07, "loss": 0.4573, "step": 17630 }, { "epoch": 2.88, "learning_rate": 8.400628084342756e-07, "loss": 0.4791, "step": 17640 }, { "epoch": 2.88, "learning_rate": 8.28847016599372e-07, "loss": 0.458, "step": 17650 }, { "epoch": 2.88, "learning_rate": 8.176312247644684e-07, "loss": 0.4609, "step": 17660 }, { "epoch": 2.88, "learning_rate": 8.06415432929565e-07, "loss": 0.4632, "step": 17670 }, { "epoch": 2.89, "learning_rate": 7.951996410946613e-07, "loss": 0.458, "step": 17680 }, { "epoch": 2.89, "learning_rate": 7.839838492597578e-07, "loss": 0.4806, "step": 17690 }, { "epoch": 2.89, "learning_rate": 7.727680574248544e-07, "loss": 0.467, "step": 17700 }, { "epoch": 2.89, "learning_rate": 7.615522655899507e-07, "loss": 0.4666, "step": 17710 }, { "epoch": 2.89, "learning_rate": 7.503364737550471e-07, "loss": 0.4675, "step": 17720 }, { "epoch": 2.89, "learning_rate": 7.391206819201436e-07, "loss": 0.4607, "step": 17730 }, { "epoch": 2.89, "learning_rate": 7.279048900852401e-07, "loss": 0.4656, "step": 17740 }, { "epoch": 2.9, "learning_rate": 7.166890982503365e-07, "loss": 0.462, "step": 17750 }, { "epoch": 2.9, "learning_rate": 7.05473306415433e-07, "loss": 0.4694, "step": 17760 }, { "epoch": 2.9, "learning_rate": 6.942575145805294e-07, "loss": 0.4644, "step": 17770 }, { "epoch": 2.9, "learning_rate": 6.830417227456259e-07, "loss": 0.4688, "step": 17780 }, { "epoch": 2.9, "learning_rate": 6.718259309107224e-07, "loss": 0.4758, "step": 17790 }, { "epoch": 2.9, "learning_rate": 6.606101390758188e-07, "loss": 0.4581, "step": 17800 }, { "epoch": 2.91, "learning_rate": 6.493943472409153e-07, "loss": 0.4693, "step": 17810 }, { "epoch": 2.91, "learning_rate": 6.381785554060117e-07, "loss": 0.4669, "step": 17820 }, { "epoch": 2.91, "learning_rate": 6.269627635711082e-07, "loss": 0.4576, "step": 17830 }, { "epoch": 2.91, "learning_rate": 6.157469717362046e-07, "loss": 0.4635, "step": 17840 }, { "epoch": 2.91, "learning_rate": 6.045311799013011e-07, "loss": 0.4788, "step": 17850 }, { "epoch": 2.91, "learning_rate": 5.933153880663976e-07, "loss": 0.4627, "step": 17860 }, { "epoch": 2.92, "learning_rate": 5.820995962314939e-07, "loss": 0.4625, "step": 17870 }, { "epoch": 2.92, "learning_rate": 5.708838043965905e-07, "loss": 0.4626, "step": 17880 }, { "epoch": 2.92, "learning_rate": 5.596680125616869e-07, "loss": 0.4663, "step": 17890 }, { "epoch": 2.92, "learning_rate": 5.484522207267833e-07, "loss": 0.4782, "step": 17900 }, { "epoch": 2.92, "learning_rate": 5.372364288918798e-07, "loss": 0.4608, "step": 17910 }, { "epoch": 2.92, "learning_rate": 5.260206370569762e-07, "loss": 0.4672, "step": 17920 }, { "epoch": 2.93, "learning_rate": 5.148048452220728e-07, "loss": 0.4614, "step": 17930 }, { "epoch": 2.93, "learning_rate": 5.035890533871692e-07, "loss": 0.4591, "step": 17940 }, { "epoch": 2.93, "learning_rate": 4.923732615522656e-07, "loss": 0.4624, "step": 17950 }, { "epoch": 2.93, "learning_rate": 4.811574697173621e-07, "loss": 0.4589, "step": 17960 }, { "epoch": 2.93, "learning_rate": 4.699416778824585e-07, "loss": 0.4598, "step": 17970 }, { "epoch": 2.93, "learning_rate": 4.58725886047555e-07, "loss": 0.4641, "step": 17980 }, { "epoch": 2.94, "learning_rate": 4.4751009421265147e-07, "loss": 0.4718, "step": 17990 }, { "epoch": 2.94, "learning_rate": 4.3629430237774793e-07, "loss": 0.4635, "step": 18000 }, { "epoch": 2.94, "learning_rate": 4.2507851054284434e-07, "loss": 0.4641, "step": 18010 }, { "epoch": 2.94, "learning_rate": 4.138627187079408e-07, "loss": 0.48, "step": 18020 }, { "epoch": 2.94, "learning_rate": 4.0264692687303727e-07, "loss": 0.4724, "step": 18030 }, { "epoch": 2.94, "learning_rate": 3.914311350381337e-07, "loss": 0.4706, "step": 18040 }, { "epoch": 2.95, "learning_rate": 3.8021534320323014e-07, "loss": 0.4717, "step": 18050 }, { "epoch": 2.95, "learning_rate": 3.6899955136832666e-07, "loss": 0.4662, "step": 18060 }, { "epoch": 2.95, "learning_rate": 3.577837595334231e-07, "loss": 0.4572, "step": 18070 }, { "epoch": 2.95, "learning_rate": 3.4656796769851953e-07, "loss": 0.466, "step": 18080 }, { "epoch": 2.95, "learning_rate": 3.35352175863616e-07, "loss": 0.47, "step": 18090 }, { "epoch": 2.95, "learning_rate": 3.2413638402871246e-07, "loss": 0.4666, "step": 18100 }, { "epoch": 2.96, "learning_rate": 3.1292059219380887e-07, "loss": 0.4632, "step": 18110 }, { "epoch": 2.96, "learning_rate": 3.0170480035890533e-07, "loss": 0.4671, "step": 18120 }, { "epoch": 2.96, "learning_rate": 2.9048900852400185e-07, "loss": 0.4745, "step": 18130 }, { "epoch": 2.96, "learning_rate": 2.7927321668909826e-07, "loss": 0.4666, "step": 18140 }, { "epoch": 2.96, "learning_rate": 2.680574248541947e-07, "loss": 0.4636, "step": 18150 }, { "epoch": 2.96, "learning_rate": 2.568416330192912e-07, "loss": 0.4618, "step": 18160 }, { "epoch": 2.97, "learning_rate": 2.4562584118438765e-07, "loss": 0.4618, "step": 18170 }, { "epoch": 2.97, "learning_rate": 2.344100493494841e-07, "loss": 0.479, "step": 18180 }, { "epoch": 2.97, "learning_rate": 2.2319425751458055e-07, "loss": 0.4626, "step": 18190 }, { "epoch": 2.97, "learning_rate": 2.11978465679677e-07, "loss": 0.4701, "step": 18200 }, { "epoch": 2.97, "learning_rate": 2.0076267384477345e-07, "loss": 0.4585, "step": 18210 }, { "epoch": 2.97, "learning_rate": 1.895468820098699e-07, "loss": 0.4665, "step": 18220 }, { "epoch": 2.97, "learning_rate": 1.7833109017496637e-07, "loss": 0.4693, "step": 18230 }, { "epoch": 2.98, "learning_rate": 1.671152983400628e-07, "loss": 0.4653, "step": 18240 }, { "epoch": 2.98, "learning_rate": 1.5589950650515927e-07, "loss": 0.4653, "step": 18250 }, { "epoch": 2.98, "learning_rate": 1.4468371467025574e-07, "loss": 0.4652, "step": 18260 }, { "epoch": 2.98, "learning_rate": 1.3346792283535217e-07, "loss": 0.4618, "step": 18270 }, { "epoch": 2.98, "learning_rate": 1.2225213100044864e-07, "loss": 0.4677, "step": 18280 }, { "epoch": 2.98, "learning_rate": 1.110363391655451e-07, "loss": 0.4629, "step": 18290 }, { "epoch": 2.99, "learning_rate": 9.982054733064156e-08, "loss": 0.4664, "step": 18300 }, { "epoch": 2.99, "learning_rate": 8.8604755495738e-08, "loss": 0.4599, "step": 18310 }, { "epoch": 2.99, "learning_rate": 7.738896366083446e-08, "loss": 0.4586, "step": 18320 }, { "epoch": 2.99, "learning_rate": 6.617317182593091e-08, "loss": 0.4506, "step": 18330 }, { "epoch": 2.99, "learning_rate": 5.4957379991027376e-08, "loss": 0.467, "step": 18340 }, { "epoch": 2.99, "learning_rate": 4.374158815612382e-08, "loss": 0.4655, "step": 18350 }, { "epoch": 3.0, "learning_rate": 3.252579632122028e-08, "loss": 0.4675, "step": 18360 }, { "epoch": 3.0, "learning_rate": 2.131000448631674e-08, "loss": 0.4673, "step": 18370 }, { "epoch": 3.0, "learning_rate": 1.009421265141319e-08, "loss": 0.4584, "step": 18380 } ], "logging_steps": 10, "max_steps": 18384, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.2862587547397652e+19, "trial_name": null, "trial_params": null }