diff --git "a/checkpoint-18384/trainer_state.json" "b/checkpoint-18384/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-18384/trainer_state.json" @@ -0,0 +1,11047 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 18384, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.2608695652173915e-07, + "loss": 1.057, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 6.884057971014494e-07, + "loss": 1.0404, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.0144927536231885e-06, + "loss": 1.0415, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 1.3405797101449278e-06, + "loss": 1.0467, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 1.7028985507246378e-06, + "loss": 1.0394, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 2.065217391304348e-06, + "loss": 1.0325, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 2.4275362318840583e-06, + "loss": 1.0236, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 2.7898550724637686e-06, + "loss": 1.0099, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 3.152173913043479e-06, + "loss": 1.024, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 3.5144927536231887e-06, + "loss": 0.9877, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 3.8768115942028985e-06, + "loss": 0.9921, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 4.239130434782609e-06, + "loss": 0.982, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 4.601449275362319e-06, + "loss": 0.9623, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 4.963768115942029e-06, + "loss": 0.9441, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 5.3260869565217395e-06, + "loss": 0.9225, + "step": 150 + }, + { + "epoch": 0.03, + "learning_rate": 5.688405797101449e-06, + "loss": 0.9129, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 6.05072463768116e-06, + "loss": 0.8737, + "step": 170 + }, + { + "epoch": 0.03, + "learning_rate": 6.41304347826087e-06, + "loss": 0.8554, + "step": 180 + }, + { + "epoch": 0.03, + "learning_rate": 6.7753623188405805e-06, + "loss": 0.8249, + "step": 190 + }, + { + "epoch": 0.03, + "learning_rate": 7.13768115942029e-06, + "loss": 0.796, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 7.500000000000001e-06, + "loss": 0.7771, + "step": 210 + }, + { + "epoch": 0.04, + "learning_rate": 7.862318840579712e-06, + "loss": 0.7432, + "step": 220 + }, + { + "epoch": 0.04, + "learning_rate": 8.22463768115942e-06, + "loss": 0.7279, + "step": 230 + }, + { + "epoch": 0.04, + "learning_rate": 8.586956521739131e-06, + "loss": 0.7114, + "step": 240 + }, + { + "epoch": 0.04, + "learning_rate": 8.94927536231884e-06, + "loss": 0.6932, + "step": 250 + }, + { + "epoch": 0.04, + "learning_rate": 9.275362318840581e-06, + "loss": 0.726, + "step": 260 + }, + { + "epoch": 0.04, + "learning_rate": 9.63768115942029e-06, + "loss": 0.6796, + "step": 270 + }, + { + "epoch": 0.05, + "learning_rate": 1e-05, + "loss": 0.6754, + "step": 280 + }, + { + "epoch": 0.05, + "learning_rate": 1.036231884057971e-05, + "loss": 0.6558, + "step": 290 + }, + { + "epoch": 0.05, + "learning_rate": 1.0724637681159422e-05, + "loss": 0.6553, + "step": 300 + }, + { + "epoch": 0.05, + "learning_rate": 1.1086956521739131e-05, + "loss": 0.6355, + "step": 310 + }, + { + "epoch": 0.05, + "learning_rate": 1.1449275362318842e-05, + "loss": 0.6364, + "step": 320 + }, + { + "epoch": 0.05, + "learning_rate": 1.181159420289855e-05, + "loss": 0.6412, + "step": 330 + }, + { + "epoch": 0.06, + "learning_rate": 1.2173913043478263e-05, + "loss": 0.6304, + "step": 340 + }, + { + "epoch": 0.06, + "learning_rate": 1.2536231884057972e-05, + "loss": 0.6308, + "step": 350 + }, + { + "epoch": 0.06, + "learning_rate": 1.2898550724637681e-05, + "loss": 0.6139, + "step": 360 + }, + { + "epoch": 0.06, + "learning_rate": 1.3260869565217392e-05, + "loss": 0.6292, + "step": 370 + }, + { + "epoch": 0.06, + "learning_rate": 1.3623188405797103e-05, + "loss": 0.6259, + "step": 380 + }, + { + "epoch": 0.06, + "learning_rate": 1.3985507246376813e-05, + "loss": 0.6165, + "step": 390 + }, + { + "epoch": 0.07, + "learning_rate": 1.4347826086956522e-05, + "loss": 0.6156, + "step": 400 + }, + { + "epoch": 0.07, + "learning_rate": 1.4710144927536235e-05, + "loss": 0.622, + "step": 410 + }, + { + "epoch": 0.07, + "learning_rate": 1.5072463768115944e-05, + "loss": 0.6195, + "step": 420 + }, + { + "epoch": 0.07, + "learning_rate": 1.5434782608695654e-05, + "loss": 0.612, + "step": 430 + }, + { + "epoch": 0.07, + "learning_rate": 1.5797101449275363e-05, + "loss": 0.6231, + "step": 440 + }, + { + "epoch": 0.07, + "learning_rate": 1.6159420289855076e-05, + "loss": 0.6172, + "step": 450 + }, + { + "epoch": 0.08, + "learning_rate": 1.6521739130434785e-05, + "loss": 0.6111, + "step": 460 + }, + { + "epoch": 0.08, + "learning_rate": 1.6884057971014494e-05, + "loss": 0.5962, + "step": 470 + }, + { + "epoch": 0.08, + "learning_rate": 1.7246376811594206e-05, + "loss": 0.621, + "step": 480 + }, + { + "epoch": 0.08, + "learning_rate": 1.7608695652173915e-05, + "loss": 0.5874, + "step": 490 + }, + { + "epoch": 0.08, + "learning_rate": 1.7971014492753624e-05, + "loss": 0.5981, + "step": 500 + }, + { + "epoch": 0.08, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.6084, + "step": 510 + }, + { + "epoch": 0.08, + "learning_rate": 1.8695652173913045e-05, + "loss": 0.6028, + "step": 520 + }, + { + "epoch": 0.09, + "learning_rate": 1.9057971014492754e-05, + "loss": 0.6004, + "step": 530 + }, + { + "epoch": 0.09, + "learning_rate": 1.9420289855072467e-05, + "loss": 0.5926, + "step": 540 + }, + { + "epoch": 0.09, + "learning_rate": 1.9782608695652176e-05, + "loss": 0.5886, + "step": 550 + }, + { + "epoch": 0.09, + "learning_rate": 1.9995513683266043e-05, + "loss": 0.5958, + "step": 560 + }, + { + "epoch": 0.09, + "learning_rate": 1.9984297891431138e-05, + "loss": 0.596, + "step": 570 + }, + { + "epoch": 0.09, + "learning_rate": 1.9973082099596232e-05, + "loss": 0.5946, + "step": 580 + }, + { + "epoch": 0.1, + "learning_rate": 1.996186630776133e-05, + "loss": 0.6011, + "step": 590 + }, + { + "epoch": 0.1, + "learning_rate": 1.9950650515926425e-05, + "loss": 0.5956, + "step": 600 + }, + { + "epoch": 0.1, + "learning_rate": 1.9939434724091523e-05, + "loss": 0.5838, + "step": 610 + }, + { + "epoch": 0.1, + "learning_rate": 1.992821893225662e-05, + "loss": 0.5925, + "step": 620 + }, + { + "epoch": 0.1, + "learning_rate": 1.9917003140421716e-05, + "loss": 0.5697, + "step": 630 + }, + { + "epoch": 0.1, + "learning_rate": 1.990578734858681e-05, + "loss": 0.5766, + "step": 640 + }, + { + "epoch": 0.11, + "learning_rate": 1.989457155675191e-05, + "loss": 0.5821, + "step": 650 + }, + { + "epoch": 0.11, + "learning_rate": 1.9883355764917006e-05, + "loss": 0.5945, + "step": 660 + }, + { + "epoch": 0.11, + "learning_rate": 1.98721399730821e-05, + "loss": 0.5798, + "step": 670 + }, + { + "epoch": 0.11, + "learning_rate": 1.98609241812472e-05, + "loss": 0.5778, + "step": 680 + }, + { + "epoch": 0.11, + "learning_rate": 1.9849708389412294e-05, + "loss": 0.5762, + "step": 690 + }, + { + "epoch": 0.11, + "learning_rate": 1.983849259757739e-05, + "loss": 0.5755, + "step": 700 + }, + { + "epoch": 0.12, + "learning_rate": 1.9827276805742487e-05, + "loss": 0.5721, + "step": 710 + }, + { + "epoch": 0.12, + "learning_rate": 1.9816061013907585e-05, + "loss": 0.5892, + "step": 720 + }, + { + "epoch": 0.12, + "learning_rate": 1.980484522207268e-05, + "loss": 0.5829, + "step": 730 + }, + { + "epoch": 0.12, + "learning_rate": 1.9793629430237777e-05, + "loss": 0.5775, + "step": 740 + }, + { + "epoch": 0.12, + "learning_rate": 1.9782413638402872e-05, + "loss": 0.5792, + "step": 750 + }, + { + "epoch": 0.12, + "learning_rate": 1.977119784656797e-05, + "loss": 0.579, + "step": 760 + }, + { + "epoch": 0.13, + "learning_rate": 1.9759982054733068e-05, + "loss": 0.5767, + "step": 770 + }, + { + "epoch": 0.13, + "learning_rate": 1.9748766262898163e-05, + "loss": 0.5719, + "step": 780 + }, + { + "epoch": 0.13, + "learning_rate": 1.9737550471063257e-05, + "loss": 0.5804, + "step": 790 + }, + { + "epoch": 0.13, + "learning_rate": 1.9726334679228355e-05, + "loss": 0.5908, + "step": 800 + }, + { + "epoch": 0.13, + "learning_rate": 1.971511888739345e-05, + "loss": 0.5737, + "step": 810 + }, + { + "epoch": 0.13, + "learning_rate": 1.9703903095558548e-05, + "loss": 0.5715, + "step": 820 + }, + { + "epoch": 0.14, + "learning_rate": 1.9692687303723646e-05, + "loss": 0.5866, + "step": 830 + }, + { + "epoch": 0.14, + "learning_rate": 1.968147151188874e-05, + "loss": 0.5663, + "step": 840 + }, + { + "epoch": 0.14, + "learning_rate": 1.9670255720053836e-05, + "loss": 0.5799, + "step": 850 + }, + { + "epoch": 0.14, + "learning_rate": 1.9659039928218934e-05, + "loss": 0.5682, + "step": 860 + }, + { + "epoch": 0.14, + "learning_rate": 1.964782413638403e-05, + "loss": 0.5795, + "step": 870 + }, + { + "epoch": 0.14, + "learning_rate": 1.9636608344549126e-05, + "loss": 0.5683, + "step": 880 + }, + { + "epoch": 0.15, + "learning_rate": 1.9625392552714224e-05, + "loss": 0.5683, + "step": 890 + }, + { + "epoch": 0.15, + "learning_rate": 1.961417676087932e-05, + "loss": 0.5645, + "step": 900 + }, + { + "epoch": 0.15, + "learning_rate": 1.9602960969044417e-05, + "loss": 0.572, + "step": 910 + }, + { + "epoch": 0.15, + "learning_rate": 1.9591745177209515e-05, + "loss": 0.5552, + "step": 920 + }, + { + "epoch": 0.15, + "learning_rate": 1.958052938537461e-05, + "loss": 0.5713, + "step": 930 + }, + { + "epoch": 0.15, + "learning_rate": 1.9569313593539704e-05, + "loss": 0.5688, + "step": 940 + }, + { + "epoch": 0.16, + "learning_rate": 1.9558097801704803e-05, + "loss": 0.5666, + "step": 950 + }, + { + "epoch": 0.16, + "learning_rate": 1.9546882009869897e-05, + "loss": 0.566, + "step": 960 + }, + { + "epoch": 0.16, + "learning_rate": 1.9535666218034995e-05, + "loss": 0.5658, + "step": 970 + }, + { + "epoch": 0.16, + "learning_rate": 1.9524450426200093e-05, + "loss": 0.5718, + "step": 980 + }, + { + "epoch": 0.16, + "learning_rate": 1.9513234634365188e-05, + "loss": 0.5559, + "step": 990 + }, + { + "epoch": 0.16, + "learning_rate": 1.9502018842530283e-05, + "loss": 0.5644, + "step": 1000 + }, + { + "epoch": 0.16, + "learning_rate": 1.949080305069538e-05, + "loss": 0.5648, + "step": 1010 + }, + { + "epoch": 0.17, + "learning_rate": 1.947958725886048e-05, + "loss": 0.562, + "step": 1020 + }, + { + "epoch": 0.17, + "learning_rate": 1.9468371467025573e-05, + "loss": 0.5652, + "step": 1030 + }, + { + "epoch": 0.17, + "learning_rate": 1.945715567519067e-05, + "loss": 0.5679, + "step": 1040 + }, + { + "epoch": 0.17, + "learning_rate": 1.9445939883355766e-05, + "loss": 0.5643, + "step": 1050 + }, + { + "epoch": 0.17, + "learning_rate": 1.943472409152086e-05, + "loss": 0.5566, + "step": 1060 + }, + { + "epoch": 0.17, + "learning_rate": 1.942350829968596e-05, + "loss": 0.566, + "step": 1070 + }, + { + "epoch": 0.18, + "learning_rate": 1.9412292507851057e-05, + "loss": 0.5593, + "step": 1080 + }, + { + "epoch": 0.18, + "learning_rate": 1.940107671601615e-05, + "loss": 0.5719, + "step": 1090 + }, + { + "epoch": 0.18, + "learning_rate": 1.938986092418125e-05, + "loss": 0.5686, + "step": 1100 + }, + { + "epoch": 0.18, + "learning_rate": 1.9378645132346344e-05, + "loss": 0.5606, + "step": 1110 + }, + { + "epoch": 0.18, + "learning_rate": 1.9367429340511442e-05, + "loss": 0.5578, + "step": 1120 + }, + { + "epoch": 0.18, + "learning_rate": 1.935621354867654e-05, + "loss": 0.5519, + "step": 1130 + }, + { + "epoch": 0.19, + "learning_rate": 1.9344997756841635e-05, + "loss": 0.5563, + "step": 1140 + }, + { + "epoch": 0.19, + "learning_rate": 1.933378196500673e-05, + "loss": 0.5595, + "step": 1150 + }, + { + "epoch": 0.19, + "learning_rate": 1.9322566173171828e-05, + "loss": 0.55, + "step": 1160 + }, + { + "epoch": 0.19, + "learning_rate": 1.9311350381336926e-05, + "loss": 0.5602, + "step": 1170 + }, + { + "epoch": 0.19, + "learning_rate": 1.930013458950202e-05, + "loss": 0.5553, + "step": 1180 + }, + { + "epoch": 0.19, + "learning_rate": 1.928891879766712e-05, + "loss": 0.5724, + "step": 1190 + }, + { + "epoch": 0.2, + "learning_rate": 1.9277703005832213e-05, + "loss": 0.5535, + "step": 1200 + }, + { + "epoch": 0.2, + "learning_rate": 1.9266487213997308e-05, + "loss": 0.5615, + "step": 1210 + }, + { + "epoch": 0.2, + "learning_rate": 1.9255271422162406e-05, + "loss": 0.5656, + "step": 1220 + }, + { + "epoch": 0.2, + "learning_rate": 1.9244055630327504e-05, + "loss": 0.5501, + "step": 1230 + }, + { + "epoch": 0.2, + "learning_rate": 1.92328398384926e-05, + "loss": 0.5659, + "step": 1240 + }, + { + "epoch": 0.2, + "learning_rate": 1.9221624046657697e-05, + "loss": 0.5625, + "step": 1250 + }, + { + "epoch": 0.21, + "learning_rate": 1.921040825482279e-05, + "loss": 0.5602, + "step": 1260 + }, + { + "epoch": 0.21, + "learning_rate": 1.919919246298789e-05, + "loss": 0.5519, + "step": 1270 + }, + { + "epoch": 0.21, + "learning_rate": 1.9187976671152987e-05, + "loss": 0.549, + "step": 1280 + }, + { + "epoch": 0.21, + "learning_rate": 1.9176760879318082e-05, + "loss": 0.5636, + "step": 1290 + }, + { + "epoch": 0.21, + "learning_rate": 1.9165545087483177e-05, + "loss": 0.553, + "step": 1300 + }, + { + "epoch": 0.21, + "learning_rate": 1.9154329295648275e-05, + "loss": 0.5597, + "step": 1310 + }, + { + "epoch": 0.22, + "learning_rate": 1.914311350381337e-05, + "loss": 0.5419, + "step": 1320 + }, + { + "epoch": 0.22, + "learning_rate": 1.9131897711978467e-05, + "loss": 0.5368, + "step": 1330 + }, + { + "epoch": 0.22, + "learning_rate": 1.9120681920143566e-05, + "loss": 0.555, + "step": 1340 + }, + { + "epoch": 0.22, + "learning_rate": 1.910946612830866e-05, + "loss": 0.5579, + "step": 1350 + }, + { + "epoch": 0.22, + "learning_rate": 1.9098250336473755e-05, + "loss": 0.5479, + "step": 1360 + }, + { + "epoch": 0.22, + "learning_rate": 1.9087034544638853e-05, + "loss": 0.563, + "step": 1370 + }, + { + "epoch": 0.23, + "learning_rate": 1.907581875280395e-05, + "loss": 0.5491, + "step": 1380 + }, + { + "epoch": 0.23, + "learning_rate": 1.9064602960969046e-05, + "loss": 0.5491, + "step": 1390 + }, + { + "epoch": 0.23, + "learning_rate": 1.9053387169134144e-05, + "loss": 0.5511, + "step": 1400 + }, + { + "epoch": 0.23, + "learning_rate": 1.904217137729924e-05, + "loss": 0.5562, + "step": 1410 + }, + { + "epoch": 0.23, + "learning_rate": 1.9030955585464333e-05, + "loss": 0.5568, + "step": 1420 + }, + { + "epoch": 0.23, + "learning_rate": 1.901973979362943e-05, + "loss": 0.5497, + "step": 1430 + }, + { + "epoch": 0.23, + "learning_rate": 1.900852400179453e-05, + "loss": 0.5571, + "step": 1440 + }, + { + "epoch": 0.24, + "learning_rate": 1.8997308209959624e-05, + "loss": 0.5538, + "step": 1450 + }, + { + "epoch": 0.24, + "learning_rate": 1.8986092418124722e-05, + "loss": 0.5479, + "step": 1460 + }, + { + "epoch": 0.24, + "learning_rate": 1.8974876626289816e-05, + "loss": 0.556, + "step": 1470 + }, + { + "epoch": 0.24, + "learning_rate": 1.8963660834454915e-05, + "loss": 0.5588, + "step": 1480 + }, + { + "epoch": 0.24, + "learning_rate": 1.8952445042620013e-05, + "loss": 0.5358, + "step": 1490 + }, + { + "epoch": 0.24, + "learning_rate": 1.8941229250785107e-05, + "loss": 0.5424, + "step": 1500 + }, + { + "epoch": 0.25, + "learning_rate": 1.8930013458950202e-05, + "loss": 0.5487, + "step": 1510 + }, + { + "epoch": 0.25, + "learning_rate": 1.89187976671153e-05, + "loss": 0.5383, + "step": 1520 + }, + { + "epoch": 0.25, + "learning_rate": 1.8907581875280398e-05, + "loss": 0.5493, + "step": 1530 + }, + { + "epoch": 0.25, + "learning_rate": 1.8896366083445493e-05, + "loss": 0.5604, + "step": 1540 + }, + { + "epoch": 0.25, + "learning_rate": 1.888515029161059e-05, + "loss": 0.5501, + "step": 1550 + }, + { + "epoch": 0.25, + "learning_rate": 1.8873934499775685e-05, + "loss": 0.5419, + "step": 1560 + }, + { + "epoch": 0.26, + "learning_rate": 1.886271870794078e-05, + "loss": 0.5444, + "step": 1570 + }, + { + "epoch": 0.26, + "learning_rate": 1.8851502916105878e-05, + "loss": 0.5508, + "step": 1580 + }, + { + "epoch": 0.26, + "learning_rate": 1.8840287124270976e-05, + "loss": 0.5488, + "step": 1590 + }, + { + "epoch": 0.26, + "learning_rate": 1.882907133243607e-05, + "loss": 0.5653, + "step": 1600 + }, + { + "epoch": 0.26, + "learning_rate": 1.881785554060117e-05, + "loss": 0.5458, + "step": 1610 + }, + { + "epoch": 0.26, + "learning_rate": 1.8806639748766264e-05, + "loss": 0.5548, + "step": 1620 + }, + { + "epoch": 0.27, + "learning_rate": 1.879542395693136e-05, + "loss": 0.5379, + "step": 1630 + }, + { + "epoch": 0.27, + "learning_rate": 1.878420816509646e-05, + "loss": 0.5567, + "step": 1640 + }, + { + "epoch": 0.27, + "learning_rate": 1.8772992373261554e-05, + "loss": 0.5523, + "step": 1650 + }, + { + "epoch": 0.27, + "learning_rate": 1.876177658142665e-05, + "loss": 0.5509, + "step": 1660 + }, + { + "epoch": 0.27, + "learning_rate": 1.8750560789591747e-05, + "loss": 0.5523, + "step": 1670 + }, + { + "epoch": 0.27, + "learning_rate": 1.873934499775684e-05, + "loss": 0.5454, + "step": 1680 + }, + { + "epoch": 0.28, + "learning_rate": 1.872812920592194e-05, + "loss": 0.5513, + "step": 1690 + }, + { + "epoch": 0.28, + "learning_rate": 1.8716913414087038e-05, + "loss": 0.5462, + "step": 1700 + }, + { + "epoch": 0.28, + "learning_rate": 1.8705697622252132e-05, + "loss": 0.5468, + "step": 1710 + }, + { + "epoch": 0.28, + "learning_rate": 1.8694481830417227e-05, + "loss": 0.5398, + "step": 1720 + }, + { + "epoch": 0.28, + "learning_rate": 1.8683266038582325e-05, + "loss": 0.5456, + "step": 1730 + }, + { + "epoch": 0.28, + "learning_rate": 1.8672050246747423e-05, + "loss": 0.5361, + "step": 1740 + }, + { + "epoch": 0.29, + "learning_rate": 1.8660834454912518e-05, + "loss": 0.5508, + "step": 1750 + }, + { + "epoch": 0.29, + "learning_rate": 1.8649618663077616e-05, + "loss": 0.5339, + "step": 1760 + }, + { + "epoch": 0.29, + "learning_rate": 1.863840287124271e-05, + "loss": 0.5403, + "step": 1770 + }, + { + "epoch": 0.29, + "learning_rate": 1.8627187079407805e-05, + "loss": 0.548, + "step": 1780 + }, + { + "epoch": 0.29, + "learning_rate": 1.8615971287572903e-05, + "loss": 0.5424, + "step": 1790 + }, + { + "epoch": 0.29, + "learning_rate": 1.8604755495738e-05, + "loss": 0.5378, + "step": 1800 + }, + { + "epoch": 0.3, + "learning_rate": 1.8593539703903096e-05, + "loss": 0.5549, + "step": 1810 + }, + { + "epoch": 0.3, + "learning_rate": 1.8582323912068194e-05, + "loss": 0.5467, + "step": 1820 + }, + { + "epoch": 0.3, + "learning_rate": 1.857110812023329e-05, + "loss": 0.5449, + "step": 1830 + }, + { + "epoch": 0.3, + "learning_rate": 1.8559892328398387e-05, + "loss": 0.545, + "step": 1840 + }, + { + "epoch": 0.3, + "learning_rate": 1.8548676536563485e-05, + "loss": 0.5476, + "step": 1850 + }, + { + "epoch": 0.3, + "learning_rate": 1.853746074472858e-05, + "loss": 0.5323, + "step": 1860 + }, + { + "epoch": 0.31, + "learning_rate": 1.8526244952893674e-05, + "loss": 0.5532, + "step": 1870 + }, + { + "epoch": 0.31, + "learning_rate": 1.8515029161058772e-05, + "loss": 0.5405, + "step": 1880 + }, + { + "epoch": 0.31, + "learning_rate": 1.850381336922387e-05, + "loss": 0.5494, + "step": 1890 + }, + { + "epoch": 0.31, + "learning_rate": 1.8492597577388965e-05, + "loss": 0.5471, + "step": 1900 + }, + { + "epoch": 0.31, + "learning_rate": 1.8481381785554063e-05, + "loss": 0.5387, + "step": 1910 + }, + { + "epoch": 0.31, + "learning_rate": 1.8470165993719158e-05, + "loss": 0.5436, + "step": 1920 + }, + { + "epoch": 0.31, + "learning_rate": 1.8458950201884252e-05, + "loss": 0.5469, + "step": 1930 + }, + { + "epoch": 0.32, + "learning_rate": 1.844773441004935e-05, + "loss": 0.529, + "step": 1940 + }, + { + "epoch": 0.32, + "learning_rate": 1.843651861821445e-05, + "loss": 0.5439, + "step": 1950 + }, + { + "epoch": 0.32, + "learning_rate": 1.8425302826379543e-05, + "loss": 0.5408, + "step": 1960 + }, + { + "epoch": 0.32, + "learning_rate": 1.841408703454464e-05, + "loss": 0.5361, + "step": 1970 + }, + { + "epoch": 0.32, + "learning_rate": 1.8402871242709736e-05, + "loss": 0.5388, + "step": 1980 + }, + { + "epoch": 0.32, + "learning_rate": 1.8391655450874834e-05, + "loss": 0.5369, + "step": 1990 + }, + { + "epoch": 0.33, + "learning_rate": 1.8380439659039932e-05, + "loss": 0.5366, + "step": 2000 + }, + { + "epoch": 0.33, + "learning_rate": 1.8369223867205027e-05, + "loss": 0.5493, + "step": 2010 + }, + { + "epoch": 0.33, + "learning_rate": 1.835800807537012e-05, + "loss": 0.5346, + "step": 2020 + }, + { + "epoch": 0.33, + "learning_rate": 1.834679228353522e-05, + "loss": 0.5455, + "step": 2030 + }, + { + "epoch": 0.33, + "learning_rate": 1.8335576491700314e-05, + "loss": 0.5452, + "step": 2040 + }, + { + "epoch": 0.33, + "learning_rate": 1.8324360699865412e-05, + "loss": 0.5398, + "step": 2050 + }, + { + "epoch": 0.34, + "learning_rate": 1.831314490803051e-05, + "loss": 0.5558, + "step": 2060 + }, + { + "epoch": 0.34, + "learning_rate": 1.8301929116195605e-05, + "loss": 0.5367, + "step": 2070 + }, + { + "epoch": 0.34, + "learning_rate": 1.82907133243607e-05, + "loss": 0.5377, + "step": 2080 + }, + { + "epoch": 0.34, + "learning_rate": 1.8279497532525797e-05, + "loss": 0.5487, + "step": 2090 + }, + { + "epoch": 0.34, + "learning_rate": 1.8268281740690895e-05, + "loss": 0.5455, + "step": 2100 + }, + { + "epoch": 0.34, + "learning_rate": 1.825706594885599e-05, + "loss": 0.5506, + "step": 2110 + }, + { + "epoch": 0.35, + "learning_rate": 1.8245850157021088e-05, + "loss": 0.533, + "step": 2120 + }, + { + "epoch": 0.35, + "learning_rate": 1.8234634365186183e-05, + "loss": 0.5375, + "step": 2130 + }, + { + "epoch": 0.35, + "learning_rate": 1.8223418573351277e-05, + "loss": 0.5261, + "step": 2140 + }, + { + "epoch": 0.35, + "learning_rate": 1.8212202781516376e-05, + "loss": 0.5478, + "step": 2150 + }, + { + "epoch": 0.35, + "learning_rate": 1.8200986989681474e-05, + "loss": 0.5414, + "step": 2160 + }, + { + "epoch": 0.35, + "learning_rate": 1.8189771197846568e-05, + "loss": 0.5471, + "step": 2170 + }, + { + "epoch": 0.36, + "learning_rate": 1.8178555406011666e-05, + "loss": 0.53, + "step": 2180 + }, + { + "epoch": 0.36, + "learning_rate": 1.816733961417676e-05, + "loss": 0.5265, + "step": 2190 + }, + { + "epoch": 0.36, + "learning_rate": 1.815612382234186e-05, + "loss": 0.5309, + "step": 2200 + }, + { + "epoch": 0.36, + "learning_rate": 1.8144908030506957e-05, + "loss": 0.5479, + "step": 2210 + }, + { + "epoch": 0.36, + "learning_rate": 1.8133692238672052e-05, + "loss": 0.546, + "step": 2220 + }, + { + "epoch": 0.36, + "learning_rate": 1.8122476446837146e-05, + "loss": 0.5355, + "step": 2230 + }, + { + "epoch": 0.37, + "learning_rate": 1.8111260655002244e-05, + "loss": 0.5308, + "step": 2240 + }, + { + "epoch": 0.37, + "learning_rate": 1.8100044863167343e-05, + "loss": 0.5306, + "step": 2250 + }, + { + "epoch": 0.37, + "learning_rate": 1.8088829071332437e-05, + "loss": 0.54, + "step": 2260 + }, + { + "epoch": 0.37, + "learning_rate": 1.8077613279497535e-05, + "loss": 0.5279, + "step": 2270 + }, + { + "epoch": 0.37, + "learning_rate": 1.806639748766263e-05, + "loss": 0.5311, + "step": 2280 + }, + { + "epoch": 0.37, + "learning_rate": 1.8055181695827725e-05, + "loss": 0.5315, + "step": 2290 + }, + { + "epoch": 0.38, + "learning_rate": 1.8043965903992823e-05, + "loss": 0.5456, + "step": 2300 + }, + { + "epoch": 0.38, + "learning_rate": 1.803275011215792e-05, + "loss": 0.5342, + "step": 2310 + }, + { + "epoch": 0.38, + "learning_rate": 1.8021534320323015e-05, + "loss": 0.5445, + "step": 2320 + }, + { + "epoch": 0.38, + "learning_rate": 1.8010318528488113e-05, + "loss": 0.5295, + "step": 2330 + }, + { + "epoch": 0.38, + "learning_rate": 1.7999102736653208e-05, + "loss": 0.5337, + "step": 2340 + }, + { + "epoch": 0.38, + "learning_rate": 1.7987886944818306e-05, + "loss": 0.5375, + "step": 2350 + }, + { + "epoch": 0.39, + "learning_rate": 1.7976671152983404e-05, + "loss": 0.5292, + "step": 2360 + }, + { + "epoch": 0.39, + "learning_rate": 1.79654553611485e-05, + "loss": 0.5365, + "step": 2370 + }, + { + "epoch": 0.39, + "learning_rate": 1.7954239569313593e-05, + "loss": 0.5428, + "step": 2380 + }, + { + "epoch": 0.39, + "learning_rate": 1.794302377747869e-05, + "loss": 0.5489, + "step": 2390 + }, + { + "epoch": 0.39, + "learning_rate": 1.7931807985643786e-05, + "loss": 0.5344, + "step": 2400 + }, + { + "epoch": 0.39, + "learning_rate": 1.7920592193808884e-05, + "loss": 0.5293, + "step": 2410 + }, + { + "epoch": 0.39, + "learning_rate": 1.7909376401973982e-05, + "loss": 0.5282, + "step": 2420 + }, + { + "epoch": 0.4, + "learning_rate": 1.7898160610139077e-05, + "loss": 0.5259, + "step": 2430 + }, + { + "epoch": 0.4, + "learning_rate": 1.788694481830417e-05, + "loss": 0.5316, + "step": 2440 + }, + { + "epoch": 0.4, + "learning_rate": 1.787572902646927e-05, + "loss": 0.5329, + "step": 2450 + }, + { + "epoch": 0.4, + "learning_rate": 1.7864513234634368e-05, + "loss": 0.515, + "step": 2460 + }, + { + "epoch": 0.4, + "learning_rate": 1.7853297442799462e-05, + "loss": 0.5393, + "step": 2470 + }, + { + "epoch": 0.4, + "learning_rate": 1.784208165096456e-05, + "loss": 0.5301, + "step": 2480 + }, + { + "epoch": 0.41, + "learning_rate": 1.7830865859129655e-05, + "loss": 0.5246, + "step": 2490 + }, + { + "epoch": 0.41, + "learning_rate": 1.7819650067294753e-05, + "loss": 0.5313, + "step": 2500 + }, + { + "epoch": 0.41, + "learning_rate": 1.7808434275459848e-05, + "loss": 0.5329, + "step": 2510 + }, + { + "epoch": 0.41, + "learning_rate": 1.7797218483624946e-05, + "loss": 0.5419, + "step": 2520 + }, + { + "epoch": 0.41, + "learning_rate": 1.778600269179004e-05, + "loss": 0.5322, + "step": 2530 + }, + { + "epoch": 0.41, + "learning_rate": 1.777478689995514e-05, + "loss": 0.5385, + "step": 2540 + }, + { + "epoch": 0.42, + "learning_rate": 1.7763571108120233e-05, + "loss": 0.5218, + "step": 2550 + }, + { + "epoch": 0.42, + "learning_rate": 1.775235531628533e-05, + "loss": 0.5205, + "step": 2560 + }, + { + "epoch": 0.42, + "learning_rate": 1.774113952445043e-05, + "loss": 0.5293, + "step": 2570 + }, + { + "epoch": 0.42, + "learning_rate": 1.7729923732615524e-05, + "loss": 0.5289, + "step": 2580 + }, + { + "epoch": 0.42, + "learning_rate": 1.771870794078062e-05, + "loss": 0.5492, + "step": 2590 + }, + { + "epoch": 0.42, + "learning_rate": 1.7707492148945717e-05, + "loss": 0.5246, + "step": 2600 + }, + { + "epoch": 0.43, + "learning_rate": 1.7696276357110815e-05, + "loss": 0.5331, + "step": 2610 + }, + { + "epoch": 0.43, + "learning_rate": 1.768506056527591e-05, + "loss": 0.5423, + "step": 2620 + }, + { + "epoch": 0.43, + "learning_rate": 1.7673844773441007e-05, + "loss": 0.533, + "step": 2630 + }, + { + "epoch": 0.43, + "learning_rate": 1.7662628981606102e-05, + "loss": 0.538, + "step": 2640 + }, + { + "epoch": 0.43, + "learning_rate": 1.7651413189771197e-05, + "loss": 0.5277, + "step": 2650 + }, + { + "epoch": 0.43, + "learning_rate": 1.7640197397936295e-05, + "loss": 0.5307, + "step": 2660 + }, + { + "epoch": 0.44, + "learning_rate": 1.7628981606101393e-05, + "loss": 0.5279, + "step": 2670 + }, + { + "epoch": 0.44, + "learning_rate": 1.7617765814266488e-05, + "loss": 0.5358, + "step": 2680 + }, + { + "epoch": 0.44, + "learning_rate": 1.7606550022431586e-05, + "loss": 0.5265, + "step": 2690 + }, + { + "epoch": 0.44, + "learning_rate": 1.759533423059668e-05, + "loss": 0.5285, + "step": 2700 + }, + { + "epoch": 0.44, + "learning_rate": 1.758411843876178e-05, + "loss": 0.5303, + "step": 2710 + }, + { + "epoch": 0.44, + "learning_rate": 1.7572902646926876e-05, + "loss": 0.5431, + "step": 2720 + }, + { + "epoch": 0.45, + "learning_rate": 1.756168685509197e-05, + "loss": 0.5312, + "step": 2730 + }, + { + "epoch": 0.45, + "learning_rate": 1.7550471063257066e-05, + "loss": 0.5296, + "step": 2740 + }, + { + "epoch": 0.45, + "learning_rate": 1.7539255271422164e-05, + "loss": 0.5282, + "step": 2750 + }, + { + "epoch": 0.45, + "learning_rate": 1.752803947958726e-05, + "loss": 0.5378, + "step": 2760 + }, + { + "epoch": 0.45, + "learning_rate": 1.7516823687752356e-05, + "loss": 0.5372, + "step": 2770 + }, + { + "epoch": 0.45, + "learning_rate": 1.7505607895917455e-05, + "loss": 0.5235, + "step": 2780 + }, + { + "epoch": 0.46, + "learning_rate": 1.749439210408255e-05, + "loss": 0.5319, + "step": 2790 + }, + { + "epoch": 0.46, + "learning_rate": 1.7483176312247647e-05, + "loss": 0.522, + "step": 2800 + }, + { + "epoch": 0.46, + "learning_rate": 1.7471960520412742e-05, + "loss": 0.5346, + "step": 2810 + }, + { + "epoch": 0.46, + "learning_rate": 1.746074472857784e-05, + "loss": 0.5313, + "step": 2820 + }, + { + "epoch": 0.46, + "learning_rate": 1.7449528936742938e-05, + "loss": 0.5459, + "step": 2830 + }, + { + "epoch": 0.46, + "learning_rate": 1.7438313144908033e-05, + "loss": 0.5291, + "step": 2840 + }, + { + "epoch": 0.47, + "learning_rate": 1.7427097353073127e-05, + "loss": 0.5398, + "step": 2850 + }, + { + "epoch": 0.47, + "learning_rate": 1.7415881561238225e-05, + "loss": 0.5225, + "step": 2860 + }, + { + "epoch": 0.47, + "learning_rate": 1.740466576940332e-05, + "loss": 0.5232, + "step": 2870 + }, + { + "epoch": 0.47, + "learning_rate": 1.7393449977568418e-05, + "loss": 0.5315, + "step": 2880 + }, + { + "epoch": 0.47, + "learning_rate": 1.7382234185733516e-05, + "loss": 0.5323, + "step": 2890 + }, + { + "epoch": 0.47, + "learning_rate": 1.737101839389861e-05, + "loss": 0.5278, + "step": 2900 + }, + { + "epoch": 0.47, + "learning_rate": 1.7359802602063705e-05, + "loss": 0.5367, + "step": 2910 + }, + { + "epoch": 0.48, + "learning_rate": 1.7348586810228804e-05, + "loss": 0.5203, + "step": 2920 + }, + { + "epoch": 0.48, + "learning_rate": 1.73373710183939e-05, + "loss": 0.5267, + "step": 2930 + }, + { + "epoch": 0.48, + "learning_rate": 1.7326155226558996e-05, + "loss": 0.5389, + "step": 2940 + }, + { + "epoch": 0.48, + "learning_rate": 1.7314939434724094e-05, + "loss": 0.5327, + "step": 2950 + }, + { + "epoch": 0.48, + "learning_rate": 1.730372364288919e-05, + "loss": 0.5294, + "step": 2960 + }, + { + "epoch": 0.48, + "learning_rate": 1.7292507851054287e-05, + "loss": 0.5287, + "step": 2970 + }, + { + "epoch": 0.49, + "learning_rate": 1.728129205921938e-05, + "loss": 0.5358, + "step": 2980 + }, + { + "epoch": 0.49, + "learning_rate": 1.727007626738448e-05, + "loss": 0.5345, + "step": 2990 + }, + { + "epoch": 0.49, + "learning_rate": 1.7258860475549574e-05, + "loss": 0.5265, + "step": 3000 + }, + { + "epoch": 0.49, + "learning_rate": 1.7247644683714672e-05, + "loss": 0.525, + "step": 3010 + }, + { + "epoch": 0.49, + "learning_rate": 1.7236428891879767e-05, + "loss": 0.5299, + "step": 3020 + }, + { + "epoch": 0.49, + "learning_rate": 1.7225213100044865e-05, + "loss": 0.5295, + "step": 3030 + }, + { + "epoch": 0.5, + "learning_rate": 1.7213997308209963e-05, + "loss": 0.5317, + "step": 3040 + }, + { + "epoch": 0.5, + "learning_rate": 1.7202781516375058e-05, + "loss": 0.5275, + "step": 3050 + }, + { + "epoch": 0.5, + "learning_rate": 1.7191565724540153e-05, + "loss": 0.5304, + "step": 3060 + }, + { + "epoch": 0.5, + "learning_rate": 1.718034993270525e-05, + "loss": 0.5288, + "step": 3070 + }, + { + "epoch": 0.5, + "learning_rate": 1.716913414087035e-05, + "loss": 0.5197, + "step": 3080 + }, + { + "epoch": 0.5, + "learning_rate": 1.7157918349035443e-05, + "loss": 0.5305, + "step": 3090 + }, + { + "epoch": 0.51, + "learning_rate": 1.714670255720054e-05, + "loss": 0.5205, + "step": 3100 + }, + { + "epoch": 0.51, + "learning_rate": 1.7135486765365636e-05, + "loss": 0.5306, + "step": 3110 + }, + { + "epoch": 0.51, + "learning_rate": 1.712427097353073e-05, + "loss": 0.5228, + "step": 3120 + }, + { + "epoch": 0.51, + "learning_rate": 1.711305518169583e-05, + "loss": 0.5241, + "step": 3130 + }, + { + "epoch": 0.51, + "learning_rate": 1.7101839389860927e-05, + "loss": 0.5302, + "step": 3140 + }, + { + "epoch": 0.51, + "learning_rate": 1.709062359802602e-05, + "loss": 0.5254, + "step": 3150 + }, + { + "epoch": 0.52, + "learning_rate": 1.707940780619112e-05, + "loss": 0.529, + "step": 3160 + }, + { + "epoch": 0.52, + "learning_rate": 1.7068192014356214e-05, + "loss": 0.5224, + "step": 3170 + }, + { + "epoch": 0.52, + "learning_rate": 1.7056976222521312e-05, + "loss": 0.5328, + "step": 3180 + }, + { + "epoch": 0.52, + "learning_rate": 1.704576043068641e-05, + "loss": 0.5248, + "step": 3190 + }, + { + "epoch": 0.52, + "learning_rate": 1.7034544638851505e-05, + "loss": 0.5255, + "step": 3200 + }, + { + "epoch": 0.52, + "learning_rate": 1.70233288470166e-05, + "loss": 0.5227, + "step": 3210 + }, + { + "epoch": 0.53, + "learning_rate": 1.7012113055181698e-05, + "loss": 0.5266, + "step": 3220 + }, + { + "epoch": 0.53, + "learning_rate": 1.7000897263346792e-05, + "loss": 0.5202, + "step": 3230 + }, + { + "epoch": 0.53, + "learning_rate": 1.698968147151189e-05, + "loss": 0.5281, + "step": 3240 + }, + { + "epoch": 0.53, + "learning_rate": 1.697846567967699e-05, + "loss": 0.5326, + "step": 3250 + }, + { + "epoch": 0.53, + "learning_rate": 1.6967249887842083e-05, + "loss": 0.5226, + "step": 3260 + }, + { + "epoch": 0.53, + "learning_rate": 1.6956034096007178e-05, + "loss": 0.5169, + "step": 3270 + }, + { + "epoch": 0.54, + "learning_rate": 1.6944818304172276e-05, + "loss": 0.5308, + "step": 3280 + }, + { + "epoch": 0.54, + "learning_rate": 1.6933602512337374e-05, + "loss": 0.5189, + "step": 3290 + }, + { + "epoch": 0.54, + "learning_rate": 1.692238672050247e-05, + "loss": 0.5162, + "step": 3300 + }, + { + "epoch": 0.54, + "learning_rate": 1.6911170928667567e-05, + "loss": 0.5273, + "step": 3310 + }, + { + "epoch": 0.54, + "learning_rate": 1.689995513683266e-05, + "loss": 0.5184, + "step": 3320 + }, + { + "epoch": 0.54, + "learning_rate": 1.688873934499776e-05, + "loss": 0.5327, + "step": 3330 + }, + { + "epoch": 0.55, + "learning_rate": 1.6877523553162857e-05, + "loss": 0.5266, + "step": 3340 + }, + { + "epoch": 0.55, + "learning_rate": 1.6866307761327952e-05, + "loss": 0.5298, + "step": 3350 + }, + { + "epoch": 0.55, + "learning_rate": 1.6855091969493047e-05, + "loss": 0.529, + "step": 3360 + }, + { + "epoch": 0.55, + "learning_rate": 1.6843876177658145e-05, + "loss": 0.5149, + "step": 3370 + }, + { + "epoch": 0.55, + "learning_rate": 1.683266038582324e-05, + "loss": 0.5194, + "step": 3380 + }, + { + "epoch": 0.55, + "learning_rate": 1.6821444593988337e-05, + "loss": 0.5307, + "step": 3390 + }, + { + "epoch": 0.55, + "learning_rate": 1.6810228802153435e-05, + "loss": 0.5248, + "step": 3400 + }, + { + "epoch": 0.56, + "learning_rate": 1.679901301031853e-05, + "loss": 0.5214, + "step": 3410 + }, + { + "epoch": 0.56, + "learning_rate": 1.6787797218483625e-05, + "loss": 0.5306, + "step": 3420 + }, + { + "epoch": 0.56, + "learning_rate": 1.6776581426648723e-05, + "loss": 0.5154, + "step": 3430 + }, + { + "epoch": 0.56, + "learning_rate": 1.676536563481382e-05, + "loss": 0.5237, + "step": 3440 + }, + { + "epoch": 0.56, + "learning_rate": 1.6754149842978916e-05, + "loss": 0.5081, + "step": 3450 + }, + { + "epoch": 0.56, + "learning_rate": 1.6742934051144014e-05, + "loss": 0.5244, + "step": 3460 + }, + { + "epoch": 0.57, + "learning_rate": 1.6731718259309108e-05, + "loss": 0.5235, + "step": 3470 + }, + { + "epoch": 0.57, + "learning_rate": 1.6720502467474203e-05, + "loss": 0.5234, + "step": 3480 + }, + { + "epoch": 0.57, + "learning_rate": 1.67092866756393e-05, + "loss": 0.5189, + "step": 3490 + }, + { + "epoch": 0.57, + "learning_rate": 1.66980708838044e-05, + "loss": 0.5247, + "step": 3500 + }, + { + "epoch": 0.57, + "learning_rate": 1.6686855091969494e-05, + "loss": 0.5196, + "step": 3510 + }, + { + "epoch": 0.57, + "learning_rate": 1.6675639300134592e-05, + "loss": 0.5155, + "step": 3520 + }, + { + "epoch": 0.58, + "learning_rate": 1.6664423508299686e-05, + "loss": 0.5308, + "step": 3530 + }, + { + "epoch": 0.58, + "learning_rate": 1.6653207716464784e-05, + "loss": 0.5132, + "step": 3540 + }, + { + "epoch": 0.58, + "learning_rate": 1.6641991924629883e-05, + "loss": 0.5259, + "step": 3550 + }, + { + "epoch": 0.58, + "learning_rate": 1.6630776132794977e-05, + "loss": 0.5314, + "step": 3560 + }, + { + "epoch": 0.58, + "learning_rate": 1.6619560340960072e-05, + "loss": 0.5243, + "step": 3570 + }, + { + "epoch": 0.58, + "learning_rate": 1.660834454912517e-05, + "loss": 0.5169, + "step": 3580 + }, + { + "epoch": 0.59, + "learning_rate": 1.6597128757290265e-05, + "loss": 0.5337, + "step": 3590 + }, + { + "epoch": 0.59, + "learning_rate": 1.6585912965455363e-05, + "loss": 0.5289, + "step": 3600 + }, + { + "epoch": 0.59, + "learning_rate": 1.657469717362046e-05, + "loss": 0.5277, + "step": 3610 + }, + { + "epoch": 0.59, + "learning_rate": 1.6563481381785555e-05, + "loss": 0.5248, + "step": 3620 + }, + { + "epoch": 0.59, + "learning_rate": 1.655226558995065e-05, + "loss": 0.5183, + "step": 3630 + }, + { + "epoch": 0.59, + "learning_rate": 1.6541049798115748e-05, + "loss": 0.5115, + "step": 3640 + }, + { + "epoch": 0.6, + "learning_rate": 1.6529834006280846e-05, + "loss": 0.5304, + "step": 3650 + }, + { + "epoch": 0.6, + "learning_rate": 1.651861821444594e-05, + "loss": 0.521, + "step": 3660 + }, + { + "epoch": 0.6, + "learning_rate": 1.650740242261104e-05, + "loss": 0.5214, + "step": 3670 + }, + { + "epoch": 0.6, + "learning_rate": 1.6496186630776133e-05, + "loss": 0.5096, + "step": 3680 + }, + { + "epoch": 0.6, + "learning_rate": 1.648497083894123e-05, + "loss": 0.5259, + "step": 3690 + }, + { + "epoch": 0.6, + "learning_rate": 1.647375504710633e-05, + "loss": 0.522, + "step": 3700 + }, + { + "epoch": 0.61, + "learning_rate": 1.6462539255271424e-05, + "loss": 0.5248, + "step": 3710 + }, + { + "epoch": 0.61, + "learning_rate": 1.645132346343652e-05, + "loss": 0.5199, + "step": 3720 + }, + { + "epoch": 0.61, + "learning_rate": 1.6440107671601617e-05, + "loss": 0.5139, + "step": 3730 + }, + { + "epoch": 0.61, + "learning_rate": 1.642889187976671e-05, + "loss": 0.5187, + "step": 3740 + }, + { + "epoch": 0.61, + "learning_rate": 1.641767608793181e-05, + "loss": 0.5287, + "step": 3750 + }, + { + "epoch": 0.61, + "learning_rate": 1.6406460296096908e-05, + "loss": 0.5186, + "step": 3760 + }, + { + "epoch": 0.62, + "learning_rate": 1.6395244504262002e-05, + "loss": 0.5163, + "step": 3770 + }, + { + "epoch": 0.62, + "learning_rate": 1.6384028712427097e-05, + "loss": 0.5234, + "step": 3780 + }, + { + "epoch": 0.62, + "learning_rate": 1.6372812920592195e-05, + "loss": 0.5194, + "step": 3790 + }, + { + "epoch": 0.62, + "learning_rate": 1.6361597128757293e-05, + "loss": 0.5202, + "step": 3800 + }, + { + "epoch": 0.62, + "learning_rate": 1.6350381336922388e-05, + "loss": 0.5079, + "step": 3810 + }, + { + "epoch": 0.62, + "learning_rate": 1.6339165545087486e-05, + "loss": 0.5285, + "step": 3820 + }, + { + "epoch": 0.62, + "learning_rate": 1.632794975325258e-05, + "loss": 0.5293, + "step": 3830 + }, + { + "epoch": 0.63, + "learning_rate": 1.6316733961417675e-05, + "loss": 0.5281, + "step": 3840 + }, + { + "epoch": 0.63, + "learning_rate": 1.6305518169582773e-05, + "loss": 0.5256, + "step": 3850 + }, + { + "epoch": 0.63, + "learning_rate": 1.629430237774787e-05, + "loss": 0.5185, + "step": 3860 + }, + { + "epoch": 0.63, + "learning_rate": 1.6283086585912966e-05, + "loss": 0.5191, + "step": 3870 + }, + { + "epoch": 0.63, + "learning_rate": 1.6271870794078064e-05, + "loss": 0.5282, + "step": 3880 + }, + { + "epoch": 0.63, + "learning_rate": 1.626065500224316e-05, + "loss": 0.5249, + "step": 3890 + }, + { + "epoch": 0.64, + "learning_rate": 1.6249439210408257e-05, + "loss": 0.5072, + "step": 3900 + }, + { + "epoch": 0.64, + "learning_rate": 1.6238223418573355e-05, + "loss": 0.5154, + "step": 3910 + }, + { + "epoch": 0.64, + "learning_rate": 1.622700762673845e-05, + "loss": 0.5154, + "step": 3920 + }, + { + "epoch": 0.64, + "learning_rate": 1.6215791834903544e-05, + "loss": 0.5181, + "step": 3930 + }, + { + "epoch": 0.64, + "learning_rate": 1.6204576043068642e-05, + "loss": 0.516, + "step": 3940 + }, + { + "epoch": 0.64, + "learning_rate": 1.6193360251233737e-05, + "loss": 0.519, + "step": 3950 + }, + { + "epoch": 0.65, + "learning_rate": 1.6182144459398835e-05, + "loss": 0.5253, + "step": 3960 + }, + { + "epoch": 0.65, + "learning_rate": 1.6170928667563933e-05, + "loss": 0.5235, + "step": 3970 + }, + { + "epoch": 0.65, + "learning_rate": 1.6159712875729028e-05, + "loss": 0.5187, + "step": 3980 + }, + { + "epoch": 0.65, + "learning_rate": 1.6148497083894122e-05, + "loss": 0.5098, + "step": 3990 + }, + { + "epoch": 0.65, + "learning_rate": 1.613728129205922e-05, + "loss": 0.5143, + "step": 4000 + }, + { + "epoch": 0.65, + "learning_rate": 1.612606550022432e-05, + "loss": 0.5114, + "step": 4010 + }, + { + "epoch": 0.66, + "learning_rate": 1.6114849708389413e-05, + "loss": 0.5152, + "step": 4020 + }, + { + "epoch": 0.66, + "learning_rate": 1.610363391655451e-05, + "loss": 0.508, + "step": 4030 + }, + { + "epoch": 0.66, + "learning_rate": 1.6092418124719606e-05, + "loss": 0.5062, + "step": 4040 + }, + { + "epoch": 0.66, + "learning_rate": 1.6081202332884704e-05, + "loss": 0.5186, + "step": 4050 + }, + { + "epoch": 0.66, + "learning_rate": 1.6069986541049802e-05, + "loss": 0.5312, + "step": 4060 + }, + { + "epoch": 0.66, + "learning_rate": 1.6058770749214896e-05, + "loss": 0.5238, + "step": 4070 + }, + { + "epoch": 0.67, + "learning_rate": 1.604755495737999e-05, + "loss": 0.5026, + "step": 4080 + }, + { + "epoch": 0.67, + "learning_rate": 1.603633916554509e-05, + "loss": 0.5239, + "step": 4090 + }, + { + "epoch": 0.67, + "learning_rate": 1.6025123373710184e-05, + "loss": 0.5185, + "step": 4100 + }, + { + "epoch": 0.67, + "learning_rate": 1.6013907581875282e-05, + "loss": 0.5162, + "step": 4110 + }, + { + "epoch": 0.67, + "learning_rate": 1.600269179004038e-05, + "loss": 0.5212, + "step": 4120 + }, + { + "epoch": 0.67, + "learning_rate": 1.5991475998205475e-05, + "loss": 0.5128, + "step": 4130 + }, + { + "epoch": 0.68, + "learning_rate": 1.598026020637057e-05, + "loss": 0.5188, + "step": 4140 + }, + { + "epoch": 0.68, + "learning_rate": 1.5969044414535667e-05, + "loss": 0.506, + "step": 4150 + }, + { + "epoch": 0.68, + "learning_rate": 1.5957828622700765e-05, + "loss": 0.5128, + "step": 4160 + }, + { + "epoch": 0.68, + "learning_rate": 1.594661283086586e-05, + "loss": 0.5244, + "step": 4170 + }, + { + "epoch": 0.68, + "learning_rate": 1.5935397039030958e-05, + "loss": 0.5078, + "step": 4180 + }, + { + "epoch": 0.68, + "learning_rate": 1.5924181247196053e-05, + "loss": 0.5319, + "step": 4190 + }, + { + "epoch": 0.69, + "learning_rate": 1.5912965455361147e-05, + "loss": 0.5186, + "step": 4200 + }, + { + "epoch": 0.69, + "learning_rate": 1.5901749663526245e-05, + "loss": 0.5107, + "step": 4210 + }, + { + "epoch": 0.69, + "learning_rate": 1.5890533871691344e-05, + "loss": 0.5131, + "step": 4220 + }, + { + "epoch": 0.69, + "learning_rate": 1.5879318079856438e-05, + "loss": 0.5136, + "step": 4230 + }, + { + "epoch": 0.69, + "learning_rate": 1.5868102288021536e-05, + "loss": 0.5059, + "step": 4240 + }, + { + "epoch": 0.69, + "learning_rate": 1.585688649618663e-05, + "loss": 0.5064, + "step": 4250 + }, + { + "epoch": 0.7, + "learning_rate": 1.584567070435173e-05, + "loss": 0.5063, + "step": 4260 + }, + { + "epoch": 0.7, + "learning_rate": 1.5834454912516827e-05, + "loss": 0.5301, + "step": 4270 + }, + { + "epoch": 0.7, + "learning_rate": 1.582323912068192e-05, + "loss": 0.5228, + "step": 4280 + }, + { + "epoch": 0.7, + "learning_rate": 1.5812023328847016e-05, + "loss": 0.5207, + "step": 4290 + }, + { + "epoch": 0.7, + "learning_rate": 1.5800807537012114e-05, + "loss": 0.5064, + "step": 4300 + }, + { + "epoch": 0.7, + "learning_rate": 1.578959174517721e-05, + "loss": 0.5148, + "step": 4310 + }, + { + "epoch": 0.7, + "learning_rate": 1.5778375953342307e-05, + "loss": 0.5181, + "step": 4320 + }, + { + "epoch": 0.71, + "learning_rate": 1.5767160161507405e-05, + "loss": 0.5093, + "step": 4330 + }, + { + "epoch": 0.71, + "learning_rate": 1.57559443696725e-05, + "loss": 0.5074, + "step": 4340 + }, + { + "epoch": 0.71, + "learning_rate": 1.5744728577837594e-05, + "loss": 0.5226, + "step": 4350 + }, + { + "epoch": 0.71, + "learning_rate": 1.5733512786002693e-05, + "loss": 0.5122, + "step": 4360 + }, + { + "epoch": 0.71, + "learning_rate": 1.572229699416779e-05, + "loss": 0.5197, + "step": 4370 + }, + { + "epoch": 0.71, + "learning_rate": 1.5711081202332885e-05, + "loss": 0.5143, + "step": 4380 + }, + { + "epoch": 0.72, + "learning_rate": 1.5699865410497983e-05, + "loss": 0.5138, + "step": 4390 + }, + { + "epoch": 0.72, + "learning_rate": 1.5688649618663078e-05, + "loss": 0.5326, + "step": 4400 + }, + { + "epoch": 0.72, + "learning_rate": 1.5677433826828176e-05, + "loss": 0.5139, + "step": 4410 + }, + { + "epoch": 0.72, + "learning_rate": 1.5666218034993274e-05, + "loss": 0.5208, + "step": 4420 + }, + { + "epoch": 0.72, + "learning_rate": 1.565500224315837e-05, + "loss": 0.5114, + "step": 4430 + }, + { + "epoch": 0.72, + "learning_rate": 1.5643786451323463e-05, + "loss": 0.5302, + "step": 4440 + }, + { + "epoch": 0.73, + "learning_rate": 1.563257065948856e-05, + "loss": 0.5207, + "step": 4450 + }, + { + "epoch": 0.73, + "learning_rate": 1.5621354867653656e-05, + "loss": 0.4986, + "step": 4460 + }, + { + "epoch": 0.73, + "learning_rate": 1.5610139075818754e-05, + "loss": 0.5175, + "step": 4470 + }, + { + "epoch": 0.73, + "learning_rate": 1.5598923283983852e-05, + "loss": 0.5168, + "step": 4480 + }, + { + "epoch": 0.73, + "learning_rate": 1.5587707492148947e-05, + "loss": 0.5169, + "step": 4490 + }, + { + "epoch": 0.73, + "learning_rate": 1.557649170031404e-05, + "loss": 0.5121, + "step": 4500 + }, + { + "epoch": 0.74, + "learning_rate": 1.556527590847914e-05, + "loss": 0.5094, + "step": 4510 + }, + { + "epoch": 0.74, + "learning_rate": 1.5554060116644238e-05, + "loss": 0.5169, + "step": 4520 + }, + { + "epoch": 0.74, + "learning_rate": 1.5542844324809332e-05, + "loss": 0.5004, + "step": 4530 + }, + { + "epoch": 0.74, + "learning_rate": 1.553162853297443e-05, + "loss": 0.5154, + "step": 4540 + }, + { + "epoch": 0.74, + "learning_rate": 1.5520412741139525e-05, + "loss": 0.5143, + "step": 4550 + }, + { + "epoch": 0.74, + "learning_rate": 1.550919694930462e-05, + "loss": 0.5234, + "step": 4560 + }, + { + "epoch": 0.75, + "learning_rate": 1.5497981157469718e-05, + "loss": 0.5072, + "step": 4570 + }, + { + "epoch": 0.75, + "learning_rate": 1.5486765365634816e-05, + "loss": 0.5196, + "step": 4580 + }, + { + "epoch": 0.75, + "learning_rate": 1.547554957379991e-05, + "loss": 0.5093, + "step": 4590 + }, + { + "epoch": 0.75, + "learning_rate": 1.546433378196501e-05, + "loss": 0.508, + "step": 4600 + }, + { + "epoch": 0.75, + "learning_rate": 1.5453117990130103e-05, + "loss": 0.5118, + "step": 4610 + }, + { + "epoch": 0.75, + "learning_rate": 1.54419021982952e-05, + "loss": 0.5169, + "step": 4620 + }, + { + "epoch": 0.76, + "learning_rate": 1.54306864064603e-05, + "loss": 0.5196, + "step": 4630 + }, + { + "epoch": 0.76, + "learning_rate": 1.5419470614625394e-05, + "loss": 0.5169, + "step": 4640 + }, + { + "epoch": 0.76, + "learning_rate": 1.540825482279049e-05, + "loss": 0.5043, + "step": 4650 + }, + { + "epoch": 0.76, + "learning_rate": 1.5397039030955587e-05, + "loss": 0.5099, + "step": 4660 + }, + { + "epoch": 0.76, + "learning_rate": 1.5385823239120685e-05, + "loss": 0.5285, + "step": 4670 + }, + { + "epoch": 0.76, + "learning_rate": 1.537460744728578e-05, + "loss": 0.5192, + "step": 4680 + }, + { + "epoch": 0.77, + "learning_rate": 1.5363391655450877e-05, + "loss": 0.521, + "step": 4690 + }, + { + "epoch": 0.77, + "learning_rate": 1.5352175863615972e-05, + "loss": 0.5209, + "step": 4700 + }, + { + "epoch": 0.77, + "learning_rate": 1.5340960071781067e-05, + "loss": 0.5177, + "step": 4710 + }, + { + "epoch": 0.77, + "learning_rate": 1.5329744279946165e-05, + "loss": 0.504, + "step": 4720 + }, + { + "epoch": 0.77, + "learning_rate": 1.5318528488111263e-05, + "loss": 0.5031, + "step": 4730 + }, + { + "epoch": 0.77, + "learning_rate": 1.5307312696276357e-05, + "loss": 0.5065, + "step": 4740 + }, + { + "epoch": 0.78, + "learning_rate": 1.5296096904441456e-05, + "loss": 0.5102, + "step": 4750 + }, + { + "epoch": 0.78, + "learning_rate": 1.528488111260655e-05, + "loss": 0.5086, + "step": 4760 + }, + { + "epoch": 0.78, + "learning_rate": 1.5273665320771648e-05, + "loss": 0.5045, + "step": 4770 + }, + { + "epoch": 0.78, + "learning_rate": 1.5262449528936746e-05, + "loss": 0.503, + "step": 4780 + }, + { + "epoch": 0.78, + "learning_rate": 1.5251233737101841e-05, + "loss": 0.5178, + "step": 4790 + }, + { + "epoch": 0.78, + "learning_rate": 1.5240017945266936e-05, + "loss": 0.5174, + "step": 4800 + }, + { + "epoch": 0.78, + "learning_rate": 1.5228802153432034e-05, + "loss": 0.5183, + "step": 4810 + }, + { + "epoch": 0.79, + "learning_rate": 1.521758636159713e-05, + "loss": 0.5003, + "step": 4820 + }, + { + "epoch": 0.79, + "learning_rate": 1.5206370569762225e-05, + "loss": 0.5163, + "step": 4830 + }, + { + "epoch": 0.79, + "learning_rate": 1.5195154777927323e-05, + "loss": 0.5069, + "step": 4840 + }, + { + "epoch": 0.79, + "learning_rate": 1.5183938986092419e-05, + "loss": 0.5132, + "step": 4850 + }, + { + "epoch": 0.79, + "learning_rate": 1.5172723194257515e-05, + "loss": 0.5199, + "step": 4860 + }, + { + "epoch": 0.79, + "learning_rate": 1.5161507402422614e-05, + "loss": 0.5211, + "step": 4870 + }, + { + "epoch": 0.8, + "learning_rate": 1.5150291610587708e-05, + "loss": 0.5, + "step": 4880 + }, + { + "epoch": 0.8, + "learning_rate": 1.5139075818752805e-05, + "loss": 0.5073, + "step": 4890 + }, + { + "epoch": 0.8, + "learning_rate": 1.5127860026917903e-05, + "loss": 0.5107, + "step": 4900 + }, + { + "epoch": 0.8, + "learning_rate": 1.5116644235082997e-05, + "loss": 0.5222, + "step": 4910 + }, + { + "epoch": 0.8, + "learning_rate": 1.5105428443248094e-05, + "loss": 0.5136, + "step": 4920 + }, + { + "epoch": 0.8, + "learning_rate": 1.5094212651413192e-05, + "loss": 0.5058, + "step": 4930 + }, + { + "epoch": 0.81, + "learning_rate": 1.5082996859578288e-05, + "loss": 0.514, + "step": 4940 + }, + { + "epoch": 0.81, + "learning_rate": 1.5071781067743383e-05, + "loss": 0.5073, + "step": 4950 + }, + { + "epoch": 0.81, + "learning_rate": 1.506056527590848e-05, + "loss": 0.5185, + "step": 4960 + }, + { + "epoch": 0.81, + "learning_rate": 1.5049349484073577e-05, + "loss": 0.5202, + "step": 4970 + }, + { + "epoch": 0.81, + "learning_rate": 1.5038133692238672e-05, + "loss": 0.5137, + "step": 4980 + }, + { + "epoch": 0.81, + "learning_rate": 1.502691790040377e-05, + "loss": 0.5035, + "step": 4990 + }, + { + "epoch": 0.82, + "learning_rate": 1.5015702108568866e-05, + "loss": 0.5166, + "step": 5000 + }, + { + "epoch": 0.82, + "learning_rate": 1.500448631673396e-05, + "loss": 0.5113, + "step": 5010 + }, + { + "epoch": 0.82, + "learning_rate": 1.4993270524899059e-05, + "loss": 0.5061, + "step": 5020 + }, + { + "epoch": 0.82, + "learning_rate": 1.4982054733064155e-05, + "loss": 0.4983, + "step": 5030 + }, + { + "epoch": 0.82, + "learning_rate": 1.4970838941229252e-05, + "loss": 0.5184, + "step": 5040 + }, + { + "epoch": 0.82, + "learning_rate": 1.495962314939435e-05, + "loss": 0.5146, + "step": 5050 + }, + { + "epoch": 0.83, + "learning_rate": 1.4948407357559444e-05, + "loss": 0.5032, + "step": 5060 + }, + { + "epoch": 0.83, + "learning_rate": 1.493719156572454e-05, + "loss": 0.5022, + "step": 5070 + }, + { + "epoch": 0.83, + "learning_rate": 1.4925975773889639e-05, + "loss": 0.518, + "step": 5080 + }, + { + "epoch": 0.83, + "learning_rate": 1.4914759982054733e-05, + "loss": 0.5044, + "step": 5090 + }, + { + "epoch": 0.83, + "learning_rate": 1.490354419021983e-05, + "loss": 0.5065, + "step": 5100 + }, + { + "epoch": 0.83, + "learning_rate": 1.4892328398384928e-05, + "loss": 0.506, + "step": 5110 + }, + { + "epoch": 0.84, + "learning_rate": 1.4881112606550024e-05, + "loss": 0.5093, + "step": 5120 + }, + { + "epoch": 0.84, + "learning_rate": 1.4869896814715119e-05, + "loss": 0.5114, + "step": 5130 + }, + { + "epoch": 0.84, + "learning_rate": 1.4858681022880217e-05, + "loss": 0.5172, + "step": 5140 + }, + { + "epoch": 0.84, + "learning_rate": 1.4847465231045313e-05, + "loss": 0.5236, + "step": 5150 + }, + { + "epoch": 0.84, + "learning_rate": 1.4836249439210408e-05, + "loss": 0.5068, + "step": 5160 + }, + { + "epoch": 0.84, + "learning_rate": 1.4825033647375506e-05, + "loss": 0.5078, + "step": 5170 + }, + { + "epoch": 0.85, + "learning_rate": 1.4813817855540602e-05, + "loss": 0.5083, + "step": 5180 + }, + { + "epoch": 0.85, + "learning_rate": 1.4802602063705697e-05, + "loss": 0.51, + "step": 5190 + }, + { + "epoch": 0.85, + "learning_rate": 1.4791386271870795e-05, + "loss": 0.5009, + "step": 5200 + }, + { + "epoch": 0.85, + "learning_rate": 1.4780170480035891e-05, + "loss": 0.5067, + "step": 5210 + }, + { + "epoch": 0.85, + "learning_rate": 1.4768954688200988e-05, + "loss": 0.5092, + "step": 5220 + }, + { + "epoch": 0.85, + "learning_rate": 1.4757738896366086e-05, + "loss": 0.4984, + "step": 5230 + }, + { + "epoch": 0.86, + "learning_rate": 1.474652310453118e-05, + "loss": 0.5114, + "step": 5240 + }, + { + "epoch": 0.86, + "learning_rate": 1.4735307312696277e-05, + "loss": 0.5106, + "step": 5250 + }, + { + "epoch": 0.86, + "learning_rate": 1.4724091520861375e-05, + "loss": 0.5154, + "step": 5260 + }, + { + "epoch": 0.86, + "learning_rate": 1.471287572902647e-05, + "loss": 0.5065, + "step": 5270 + }, + { + "epoch": 0.86, + "learning_rate": 1.4701659937191568e-05, + "loss": 0.5049, + "step": 5280 + }, + { + "epoch": 0.86, + "learning_rate": 1.4690444145356664e-05, + "loss": 0.5126, + "step": 5290 + }, + { + "epoch": 0.86, + "learning_rate": 1.467922835352176e-05, + "loss": 0.5086, + "step": 5300 + }, + { + "epoch": 0.87, + "learning_rate": 1.4668012561686857e-05, + "loss": 0.5081, + "step": 5310 + }, + { + "epoch": 0.87, + "learning_rate": 1.4656796769851953e-05, + "loss": 0.5125, + "step": 5320 + }, + { + "epoch": 0.87, + "learning_rate": 1.464558097801705e-05, + "loss": 0.5097, + "step": 5330 + }, + { + "epoch": 0.87, + "learning_rate": 1.4634365186182147e-05, + "loss": 0.498, + "step": 5340 + }, + { + "epoch": 0.87, + "learning_rate": 1.4623149394347242e-05, + "loss": 0.5011, + "step": 5350 + }, + { + "epoch": 0.87, + "learning_rate": 1.4611933602512338e-05, + "loss": 0.519, + "step": 5360 + }, + { + "epoch": 0.88, + "learning_rate": 1.4600717810677436e-05, + "loss": 0.4988, + "step": 5370 + }, + { + "epoch": 0.88, + "learning_rate": 1.4589502018842531e-05, + "loss": 0.5005, + "step": 5380 + }, + { + "epoch": 0.88, + "learning_rate": 1.4578286227007627e-05, + "loss": 0.5051, + "step": 5390 + }, + { + "epoch": 0.88, + "learning_rate": 1.4567070435172726e-05, + "loss": 0.495, + "step": 5400 + }, + { + "epoch": 0.88, + "learning_rate": 1.4555854643337822e-05, + "loss": 0.5107, + "step": 5410 + }, + { + "epoch": 0.88, + "learning_rate": 1.4544638851502917e-05, + "loss": 0.5104, + "step": 5420 + }, + { + "epoch": 0.89, + "learning_rate": 1.4533423059668015e-05, + "loss": 0.5104, + "step": 5430 + }, + { + "epoch": 0.89, + "learning_rate": 1.4522207267833111e-05, + "loss": 0.5118, + "step": 5440 + }, + { + "epoch": 0.89, + "learning_rate": 1.4510991475998206e-05, + "loss": 0.5085, + "step": 5450 + }, + { + "epoch": 0.89, + "learning_rate": 1.4499775684163304e-05, + "loss": 0.4994, + "step": 5460 + }, + { + "epoch": 0.89, + "learning_rate": 1.44885598923284e-05, + "loss": 0.4985, + "step": 5470 + }, + { + "epoch": 0.89, + "learning_rate": 1.4477344100493496e-05, + "loss": 0.4968, + "step": 5480 + }, + { + "epoch": 0.9, + "learning_rate": 1.4466128308658593e-05, + "loss": 0.5028, + "step": 5490 + }, + { + "epoch": 0.9, + "learning_rate": 1.4454912516823689e-05, + "loss": 0.5092, + "step": 5500 + }, + { + "epoch": 0.9, + "learning_rate": 1.4443696724988785e-05, + "loss": 0.5083, + "step": 5510 + }, + { + "epoch": 0.9, + "learning_rate": 1.4432480933153884e-05, + "loss": 0.4946, + "step": 5520 + }, + { + "epoch": 0.9, + "learning_rate": 1.4421265141318978e-05, + "loss": 0.5098, + "step": 5530 + }, + { + "epoch": 0.9, + "learning_rate": 1.4410049349484075e-05, + "loss": 0.5119, + "step": 5540 + }, + { + "epoch": 0.91, + "learning_rate": 1.4398833557649173e-05, + "loss": 0.499, + "step": 5550 + }, + { + "epoch": 0.91, + "learning_rate": 1.4387617765814267e-05, + "loss": 0.5077, + "step": 5560 + }, + { + "epoch": 0.91, + "learning_rate": 1.4376401973979364e-05, + "loss": 0.5167, + "step": 5570 + }, + { + "epoch": 0.91, + "learning_rate": 1.4365186182144462e-05, + "loss": 0.5103, + "step": 5580 + }, + { + "epoch": 0.91, + "learning_rate": 1.4353970390309558e-05, + "loss": 0.5073, + "step": 5590 + }, + { + "epoch": 0.91, + "learning_rate": 1.4342754598474653e-05, + "loss": 0.5183, + "step": 5600 + }, + { + "epoch": 0.92, + "learning_rate": 1.433153880663975e-05, + "loss": 0.4945, + "step": 5610 + }, + { + "epoch": 0.92, + "learning_rate": 1.4320323014804847e-05, + "loss": 0.5049, + "step": 5620 + }, + { + "epoch": 0.92, + "learning_rate": 1.4309107222969942e-05, + "loss": 0.4994, + "step": 5630 + }, + { + "epoch": 0.92, + "learning_rate": 1.429789143113504e-05, + "loss": 0.4904, + "step": 5640 + }, + { + "epoch": 0.92, + "learning_rate": 1.4286675639300136e-05, + "loss": 0.5215, + "step": 5650 + }, + { + "epoch": 0.92, + "learning_rate": 1.4275459847465233e-05, + "loss": 0.5026, + "step": 5660 + }, + { + "epoch": 0.93, + "learning_rate": 1.4264244055630329e-05, + "loss": 0.5094, + "step": 5670 + }, + { + "epoch": 0.93, + "learning_rate": 1.4253028263795425e-05, + "loss": 0.5058, + "step": 5680 + }, + { + "epoch": 0.93, + "learning_rate": 1.4241812471960522e-05, + "loss": 0.5149, + "step": 5690 + }, + { + "epoch": 0.93, + "learning_rate": 1.423059668012562e-05, + "loss": 0.5156, + "step": 5700 + }, + { + "epoch": 0.93, + "learning_rate": 1.4219380888290714e-05, + "loss": 0.5042, + "step": 5710 + }, + { + "epoch": 0.93, + "learning_rate": 1.420816509645581e-05, + "loss": 0.4958, + "step": 5720 + }, + { + "epoch": 0.94, + "learning_rate": 1.4196949304620909e-05, + "loss": 0.5071, + "step": 5730 + }, + { + "epoch": 0.94, + "learning_rate": 1.4185733512786003e-05, + "loss": 0.4999, + "step": 5740 + }, + { + "epoch": 0.94, + "learning_rate": 1.41745177209511e-05, + "loss": 0.5089, + "step": 5750 + }, + { + "epoch": 0.94, + "learning_rate": 1.4163301929116198e-05, + "loss": 0.4997, + "step": 5760 + }, + { + "epoch": 0.94, + "learning_rate": 1.4152086137281294e-05, + "loss": 0.5034, + "step": 5770 + }, + { + "epoch": 0.94, + "learning_rate": 1.4140870345446389e-05, + "loss": 0.4994, + "step": 5780 + }, + { + "epoch": 0.94, + "learning_rate": 1.4129654553611487e-05, + "loss": 0.5106, + "step": 5790 + }, + { + "epoch": 0.95, + "learning_rate": 1.4118438761776583e-05, + "loss": 0.5032, + "step": 5800 + }, + { + "epoch": 0.95, + "learning_rate": 1.4107222969941678e-05, + "loss": 0.5118, + "step": 5810 + }, + { + "epoch": 0.95, + "learning_rate": 1.4096007178106776e-05, + "loss": 0.5173, + "step": 5820 + }, + { + "epoch": 0.95, + "learning_rate": 1.4084791386271872e-05, + "loss": 0.5107, + "step": 5830 + }, + { + "epoch": 0.95, + "learning_rate": 1.4073575594436969e-05, + "loss": 0.5117, + "step": 5840 + }, + { + "epoch": 0.95, + "learning_rate": 1.4062359802602065e-05, + "loss": 0.5038, + "step": 5850 + }, + { + "epoch": 0.96, + "learning_rate": 1.4051144010767161e-05, + "loss": 0.5058, + "step": 5860 + }, + { + "epoch": 0.96, + "learning_rate": 1.4039928218932258e-05, + "loss": 0.5131, + "step": 5870 + }, + { + "epoch": 0.96, + "learning_rate": 1.4028712427097356e-05, + "loss": 0.492, + "step": 5880 + }, + { + "epoch": 0.96, + "learning_rate": 1.401749663526245e-05, + "loss": 0.5011, + "step": 5890 + }, + { + "epoch": 0.96, + "learning_rate": 1.4006280843427547e-05, + "loss": 0.5045, + "step": 5900 + }, + { + "epoch": 0.96, + "learning_rate": 1.3995065051592645e-05, + "loss": 0.5069, + "step": 5910 + }, + { + "epoch": 0.97, + "learning_rate": 1.398384925975774e-05, + "loss": 0.4989, + "step": 5920 + }, + { + "epoch": 0.97, + "learning_rate": 1.3972633467922836e-05, + "loss": 0.4968, + "step": 5930 + }, + { + "epoch": 0.97, + "learning_rate": 1.3961417676087934e-05, + "loss": 0.4994, + "step": 5940 + }, + { + "epoch": 0.97, + "learning_rate": 1.395020188425303e-05, + "loss": 0.4964, + "step": 5950 + }, + { + "epoch": 0.97, + "learning_rate": 1.3938986092418125e-05, + "loss": 0.5107, + "step": 5960 + }, + { + "epoch": 0.97, + "learning_rate": 1.3927770300583223e-05, + "loss": 0.503, + "step": 5970 + }, + { + "epoch": 0.98, + "learning_rate": 1.391655450874832e-05, + "loss": 0.5122, + "step": 5980 + }, + { + "epoch": 0.98, + "learning_rate": 1.3905338716913414e-05, + "loss": 0.4971, + "step": 5990 + }, + { + "epoch": 0.98, + "learning_rate": 1.3894122925078512e-05, + "loss": 0.5108, + "step": 6000 + }, + { + "epoch": 0.98, + "learning_rate": 1.3882907133243608e-05, + "loss": 0.4972, + "step": 6010 + }, + { + "epoch": 0.98, + "learning_rate": 1.3871691341408705e-05, + "loss": 0.5065, + "step": 6020 + }, + { + "epoch": 0.98, + "learning_rate": 1.3860475549573801e-05, + "loss": 0.5199, + "step": 6030 + }, + { + "epoch": 0.99, + "learning_rate": 1.3849259757738897e-05, + "loss": 0.498, + "step": 6040 + }, + { + "epoch": 0.99, + "learning_rate": 1.3838043965903994e-05, + "loss": 0.5128, + "step": 6050 + }, + { + "epoch": 0.99, + "learning_rate": 1.3826828174069092e-05, + "loss": 0.4961, + "step": 6060 + }, + { + "epoch": 0.99, + "learning_rate": 1.3815612382234187e-05, + "loss": 0.4963, + "step": 6070 + }, + { + "epoch": 0.99, + "learning_rate": 1.3804396590399283e-05, + "loss": 0.5079, + "step": 6080 + }, + { + "epoch": 0.99, + "learning_rate": 1.3793180798564381e-05, + "loss": 0.5068, + "step": 6090 + }, + { + "epoch": 1.0, + "learning_rate": 1.3781965006729476e-05, + "loss": 0.4963, + "step": 6100 + }, + { + "epoch": 1.0, + "learning_rate": 1.3770749214894572e-05, + "loss": 0.5092, + "step": 6110 + }, + { + "epoch": 1.0, + "learning_rate": 1.375953342305967e-05, + "loss": 0.5008, + "step": 6120 + }, + { + "epoch": 1.0, + "learning_rate": 1.3748317631224766e-05, + "loss": 0.5106, + "step": 6130 + }, + { + "epoch": 1.0, + "learning_rate": 1.3737101839389861e-05, + "loss": 0.4955, + "step": 6140 + }, + { + "epoch": 1.0, + "learning_rate": 1.3725886047554959e-05, + "loss": 0.4905, + "step": 6150 + }, + { + "epoch": 1.01, + "learning_rate": 1.3714670255720055e-05, + "loss": 0.4966, + "step": 6160 + }, + { + "epoch": 1.01, + "learning_rate": 1.370345446388515e-05, + "loss": 0.4885, + "step": 6170 + }, + { + "epoch": 1.01, + "learning_rate": 1.3692238672050248e-05, + "loss": 0.5109, + "step": 6180 + }, + { + "epoch": 1.01, + "learning_rate": 1.3681022880215345e-05, + "loss": 0.5029, + "step": 6190 + }, + { + "epoch": 1.01, + "learning_rate": 1.3669807088380441e-05, + "loss": 0.4944, + "step": 6200 + }, + { + "epoch": 1.01, + "learning_rate": 1.3658591296545537e-05, + "loss": 0.494, + "step": 6210 + }, + { + "epoch": 1.02, + "learning_rate": 1.3647375504710634e-05, + "loss": 0.5075, + "step": 6220 + }, + { + "epoch": 1.02, + "learning_rate": 1.363615971287573e-05, + "loss": 0.5079, + "step": 6230 + }, + { + "epoch": 1.02, + "learning_rate": 1.3624943921040828e-05, + "loss": 0.495, + "step": 6240 + }, + { + "epoch": 1.02, + "learning_rate": 1.3613728129205923e-05, + "loss": 0.506, + "step": 6250 + }, + { + "epoch": 1.02, + "learning_rate": 1.3602512337371019e-05, + "loss": 0.4918, + "step": 6260 + }, + { + "epoch": 1.02, + "learning_rate": 1.3591296545536117e-05, + "loss": 0.4981, + "step": 6270 + }, + { + "epoch": 1.02, + "learning_rate": 1.3580080753701212e-05, + "loss": 0.4978, + "step": 6280 + }, + { + "epoch": 1.03, + "learning_rate": 1.3568864961866308e-05, + "loss": 0.4942, + "step": 6290 + }, + { + "epoch": 1.03, + "learning_rate": 1.3557649170031406e-05, + "loss": 0.5062, + "step": 6300 + }, + { + "epoch": 1.03, + "learning_rate": 1.3546433378196503e-05, + "loss": 0.5008, + "step": 6310 + }, + { + "epoch": 1.03, + "learning_rate": 1.3535217586361597e-05, + "loss": 0.5065, + "step": 6320 + }, + { + "epoch": 1.03, + "learning_rate": 1.3524001794526695e-05, + "loss": 0.5129, + "step": 6330 + }, + { + "epoch": 1.03, + "learning_rate": 1.3512786002691792e-05, + "loss": 0.5002, + "step": 6340 + }, + { + "epoch": 1.04, + "learning_rate": 1.3501570210856886e-05, + "loss": 0.5028, + "step": 6350 + }, + { + "epoch": 1.04, + "learning_rate": 1.3490354419021984e-05, + "loss": 0.4931, + "step": 6360 + }, + { + "epoch": 1.04, + "learning_rate": 1.347913862718708e-05, + "loss": 0.4971, + "step": 6370 + }, + { + "epoch": 1.04, + "learning_rate": 1.3467922835352177e-05, + "loss": 0.4965, + "step": 6380 + }, + { + "epoch": 1.04, + "learning_rate": 1.3456707043517273e-05, + "loss": 0.5136, + "step": 6390 + }, + { + "epoch": 1.04, + "learning_rate": 1.344549125168237e-05, + "loss": 0.4982, + "step": 6400 + }, + { + "epoch": 1.05, + "learning_rate": 1.3434275459847466e-05, + "loss": 0.4885, + "step": 6410 + }, + { + "epoch": 1.05, + "learning_rate": 1.3423059668012564e-05, + "loss": 0.4949, + "step": 6420 + }, + { + "epoch": 1.05, + "learning_rate": 1.3411843876177659e-05, + "loss": 0.5015, + "step": 6430 + }, + { + "epoch": 1.05, + "learning_rate": 1.3400628084342755e-05, + "loss": 0.4977, + "step": 6440 + }, + { + "epoch": 1.05, + "learning_rate": 1.3389412292507853e-05, + "loss": 0.5005, + "step": 6450 + }, + { + "epoch": 1.05, + "learning_rate": 1.3378196500672948e-05, + "loss": 0.4907, + "step": 6460 + }, + { + "epoch": 1.06, + "learning_rate": 1.3366980708838044e-05, + "loss": 0.509, + "step": 6470 + }, + { + "epoch": 1.06, + "learning_rate": 1.3355764917003142e-05, + "loss": 0.4904, + "step": 6480 + }, + { + "epoch": 1.06, + "learning_rate": 1.3344549125168239e-05, + "loss": 0.488, + "step": 6490 + }, + { + "epoch": 1.06, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.4863, + "step": 6500 + }, + { + "epoch": 1.06, + "learning_rate": 1.3322117541498431e-05, + "loss": 0.5017, + "step": 6510 + }, + { + "epoch": 1.06, + "learning_rate": 1.3310901749663528e-05, + "loss": 0.5029, + "step": 6520 + }, + { + "epoch": 1.07, + "learning_rate": 1.3299685957828622e-05, + "loss": 0.507, + "step": 6530 + }, + { + "epoch": 1.07, + "learning_rate": 1.328847016599372e-05, + "loss": 0.4966, + "step": 6540 + }, + { + "epoch": 1.07, + "learning_rate": 1.3277254374158817e-05, + "loss": 0.5035, + "step": 6550 + }, + { + "epoch": 1.07, + "learning_rate": 1.3266038582323913e-05, + "loss": 0.4877, + "step": 6560 + }, + { + "epoch": 1.07, + "learning_rate": 1.325482279048901e-05, + "loss": 0.4896, + "step": 6570 + }, + { + "epoch": 1.07, + "learning_rate": 1.3243606998654106e-05, + "loss": 0.5011, + "step": 6580 + }, + { + "epoch": 1.08, + "learning_rate": 1.3232391206819202e-05, + "loss": 0.5059, + "step": 6590 + }, + { + "epoch": 1.08, + "learning_rate": 1.32211754149843e-05, + "loss": 0.5017, + "step": 6600 + }, + { + "epoch": 1.08, + "learning_rate": 1.3209959623149395e-05, + "loss": 0.4982, + "step": 6610 + }, + { + "epoch": 1.08, + "learning_rate": 1.3198743831314491e-05, + "loss": 0.5009, + "step": 6620 + }, + { + "epoch": 1.08, + "learning_rate": 1.318752803947959e-05, + "loss": 0.4909, + "step": 6630 + }, + { + "epoch": 1.08, + "learning_rate": 1.3176312247644684e-05, + "loss": 0.5027, + "step": 6640 + }, + { + "epoch": 1.09, + "learning_rate": 1.316509645580978e-05, + "loss": 0.4997, + "step": 6650 + }, + { + "epoch": 1.09, + "learning_rate": 1.3153880663974878e-05, + "loss": 0.5015, + "step": 6660 + }, + { + "epoch": 1.09, + "learning_rate": 1.3142664872139975e-05, + "loss": 0.4974, + "step": 6670 + }, + { + "epoch": 1.09, + "learning_rate": 1.313144908030507e-05, + "loss": 0.4913, + "step": 6680 + }, + { + "epoch": 1.09, + "learning_rate": 1.3120233288470167e-05, + "loss": 0.4968, + "step": 6690 + }, + { + "epoch": 1.09, + "learning_rate": 1.3109017496635264e-05, + "loss": 0.4932, + "step": 6700 + }, + { + "epoch": 1.09, + "learning_rate": 1.3097801704800358e-05, + "loss": 0.5002, + "step": 6710 + }, + { + "epoch": 1.1, + "learning_rate": 1.3086585912965457e-05, + "loss": 0.5007, + "step": 6720 + }, + { + "epoch": 1.1, + "learning_rate": 1.3075370121130553e-05, + "loss": 0.5003, + "step": 6730 + }, + { + "epoch": 1.1, + "learning_rate": 1.306415432929565e-05, + "loss": 0.4896, + "step": 6740 + }, + { + "epoch": 1.1, + "learning_rate": 1.3052938537460747e-05, + "loss": 0.5006, + "step": 6750 + }, + { + "epoch": 1.1, + "learning_rate": 1.3041722745625842e-05, + "loss": 0.4972, + "step": 6760 + }, + { + "epoch": 1.1, + "learning_rate": 1.3030506953790938e-05, + "loss": 0.4971, + "step": 6770 + }, + { + "epoch": 1.11, + "learning_rate": 1.3019291161956036e-05, + "loss": 0.4925, + "step": 6780 + }, + { + "epoch": 1.11, + "learning_rate": 1.3008075370121131e-05, + "loss": 0.5005, + "step": 6790 + }, + { + "epoch": 1.11, + "learning_rate": 1.2996859578286227e-05, + "loss": 0.4981, + "step": 6800 + }, + { + "epoch": 1.11, + "learning_rate": 1.2985643786451325e-05, + "loss": 0.5063, + "step": 6810 + }, + { + "epoch": 1.11, + "learning_rate": 1.297442799461642e-05, + "loss": 0.4991, + "step": 6820 + }, + { + "epoch": 1.11, + "learning_rate": 1.2963212202781516e-05, + "loss": 0.4969, + "step": 6830 + }, + { + "epoch": 1.12, + "learning_rate": 1.2951996410946615e-05, + "loss": 0.491, + "step": 6840 + }, + { + "epoch": 1.12, + "learning_rate": 1.2940780619111711e-05, + "loss": 0.5001, + "step": 6850 + }, + { + "epoch": 1.12, + "learning_rate": 1.2929564827276806e-05, + "loss": 0.501, + "step": 6860 + }, + { + "epoch": 1.12, + "learning_rate": 1.2918349035441904e-05, + "loss": 0.5133, + "step": 6870 + }, + { + "epoch": 1.12, + "learning_rate": 1.2907133243607e-05, + "loss": 0.4921, + "step": 6880 + }, + { + "epoch": 1.12, + "learning_rate": 1.2895917451772095e-05, + "loss": 0.5047, + "step": 6890 + }, + { + "epoch": 1.13, + "learning_rate": 1.2884701659937193e-05, + "loss": 0.4917, + "step": 6900 + }, + { + "epoch": 1.13, + "learning_rate": 1.2873485868102289e-05, + "loss": 0.5017, + "step": 6910 + }, + { + "epoch": 1.13, + "learning_rate": 1.2862270076267385e-05, + "loss": 0.5029, + "step": 6920 + }, + { + "epoch": 1.13, + "learning_rate": 1.2851054284432483e-05, + "loss": 0.4866, + "step": 6930 + }, + { + "epoch": 1.13, + "learning_rate": 1.2839838492597578e-05, + "loss": 0.4978, + "step": 6940 + }, + { + "epoch": 1.13, + "learning_rate": 1.2828622700762674e-05, + "loss": 0.4744, + "step": 6950 + }, + { + "epoch": 1.14, + "learning_rate": 1.2817406908927773e-05, + "loss": 0.486, + "step": 6960 + }, + { + "epoch": 1.14, + "learning_rate": 1.2806191117092867e-05, + "loss": 0.4956, + "step": 6970 + }, + { + "epoch": 1.14, + "learning_rate": 1.2794975325257964e-05, + "loss": 0.4935, + "step": 6980 + }, + { + "epoch": 1.14, + "learning_rate": 1.2783759533423062e-05, + "loss": 0.5023, + "step": 6990 + }, + { + "epoch": 1.14, + "learning_rate": 1.2772543741588156e-05, + "loss": 0.4937, + "step": 7000 + }, + { + "epoch": 1.14, + "learning_rate": 1.2761327949753253e-05, + "loss": 0.4873, + "step": 7010 + }, + { + "epoch": 1.15, + "learning_rate": 1.275011215791835e-05, + "loss": 0.5001, + "step": 7020 + }, + { + "epoch": 1.15, + "learning_rate": 1.2738896366083447e-05, + "loss": 0.4909, + "step": 7030 + }, + { + "epoch": 1.15, + "learning_rate": 1.2727680574248542e-05, + "loss": 0.49, + "step": 7040 + }, + { + "epoch": 1.15, + "learning_rate": 1.271646478241364e-05, + "loss": 0.501, + "step": 7050 + }, + { + "epoch": 1.15, + "learning_rate": 1.2705248990578736e-05, + "loss": 0.4924, + "step": 7060 + }, + { + "epoch": 1.15, + "learning_rate": 1.269403319874383e-05, + "loss": 0.4927, + "step": 7070 + }, + { + "epoch": 1.16, + "learning_rate": 1.2682817406908929e-05, + "loss": 0.5003, + "step": 7080 + }, + { + "epoch": 1.16, + "learning_rate": 1.2671601615074025e-05, + "loss": 0.4894, + "step": 7090 + }, + { + "epoch": 1.16, + "learning_rate": 1.2660385823239122e-05, + "loss": 0.4939, + "step": 7100 + }, + { + "epoch": 1.16, + "learning_rate": 1.264917003140422e-05, + "loss": 0.4937, + "step": 7110 + }, + { + "epoch": 1.16, + "learning_rate": 1.2637954239569314e-05, + "loss": 0.4839, + "step": 7120 + }, + { + "epoch": 1.16, + "learning_rate": 1.262673844773441e-05, + "loss": 0.5071, + "step": 7130 + }, + { + "epoch": 1.17, + "learning_rate": 1.2615522655899509e-05, + "loss": 0.4893, + "step": 7140 + }, + { + "epoch": 1.17, + "learning_rate": 1.2604306864064603e-05, + "loss": 0.4954, + "step": 7150 + }, + { + "epoch": 1.17, + "learning_rate": 1.25930910722297e-05, + "loss": 0.4876, + "step": 7160 + }, + { + "epoch": 1.17, + "learning_rate": 1.2581875280394798e-05, + "loss": 0.5056, + "step": 7170 + }, + { + "epoch": 1.17, + "learning_rate": 1.2570659488559892e-05, + "loss": 0.4937, + "step": 7180 + }, + { + "epoch": 1.17, + "learning_rate": 1.2559443696724989e-05, + "loss": 0.4979, + "step": 7190 + }, + { + "epoch": 1.17, + "learning_rate": 1.2548227904890087e-05, + "loss": 0.4967, + "step": 7200 + }, + { + "epoch": 1.18, + "learning_rate": 1.2537012113055183e-05, + "loss": 0.4812, + "step": 7210 + }, + { + "epoch": 1.18, + "learning_rate": 1.2525796321220278e-05, + "loss": 0.4944, + "step": 7220 + }, + { + "epoch": 1.18, + "learning_rate": 1.2514580529385376e-05, + "loss": 0.4977, + "step": 7230 + }, + { + "epoch": 1.18, + "learning_rate": 1.2503364737550472e-05, + "loss": 0.4907, + "step": 7240 + }, + { + "epoch": 1.18, + "learning_rate": 1.2492148945715567e-05, + "loss": 0.4902, + "step": 7250 + }, + { + "epoch": 1.18, + "learning_rate": 1.2480933153880665e-05, + "loss": 0.4908, + "step": 7260 + }, + { + "epoch": 1.19, + "learning_rate": 1.2469717362045761e-05, + "loss": 0.5047, + "step": 7270 + }, + { + "epoch": 1.19, + "learning_rate": 1.2458501570210858e-05, + "loss": 0.4909, + "step": 7280 + }, + { + "epoch": 1.19, + "learning_rate": 1.2447285778375956e-05, + "loss": 0.4877, + "step": 7290 + }, + { + "epoch": 1.19, + "learning_rate": 1.243606998654105e-05, + "loss": 0.4972, + "step": 7300 + }, + { + "epoch": 1.19, + "learning_rate": 1.2424854194706147e-05, + "loss": 0.4952, + "step": 7310 + }, + { + "epoch": 1.19, + "learning_rate": 1.2413638402871245e-05, + "loss": 0.4881, + "step": 7320 + }, + { + "epoch": 1.2, + "learning_rate": 1.240242261103634e-05, + "loss": 0.4944, + "step": 7330 + }, + { + "epoch": 1.2, + "learning_rate": 1.2391206819201436e-05, + "loss": 0.4959, + "step": 7340 + }, + { + "epoch": 1.2, + "learning_rate": 1.2379991027366534e-05, + "loss": 0.4963, + "step": 7350 + }, + { + "epoch": 1.2, + "learning_rate": 1.2368775235531628e-05, + "loss": 0.5035, + "step": 7360 + }, + { + "epoch": 1.2, + "learning_rate": 1.2357559443696725e-05, + "loss": 0.5018, + "step": 7370 + }, + { + "epoch": 1.2, + "learning_rate": 1.2346343651861823e-05, + "loss": 0.4973, + "step": 7380 + }, + { + "epoch": 1.21, + "learning_rate": 1.233512786002692e-05, + "loss": 0.4998, + "step": 7390 + }, + { + "epoch": 1.21, + "learning_rate": 1.2323912068192014e-05, + "loss": 0.5015, + "step": 7400 + }, + { + "epoch": 1.21, + "learning_rate": 1.2312696276357112e-05, + "loss": 0.4906, + "step": 7410 + }, + { + "epoch": 1.21, + "learning_rate": 1.2301480484522208e-05, + "loss": 0.4865, + "step": 7420 + }, + { + "epoch": 1.21, + "learning_rate": 1.2290264692687303e-05, + "loss": 0.4918, + "step": 7430 + }, + { + "epoch": 1.21, + "learning_rate": 1.2279048900852401e-05, + "loss": 0.4913, + "step": 7440 + }, + { + "epoch": 1.22, + "learning_rate": 1.2267833109017497e-05, + "loss": 0.4915, + "step": 7450 + }, + { + "epoch": 1.22, + "learning_rate": 1.2256617317182594e-05, + "loss": 0.5034, + "step": 7460 + }, + { + "epoch": 1.22, + "learning_rate": 1.2245401525347692e-05, + "loss": 0.478, + "step": 7470 + }, + { + "epoch": 1.22, + "learning_rate": 1.2234185733512786e-05, + "loss": 0.4911, + "step": 7480 + }, + { + "epoch": 1.22, + "learning_rate": 1.2222969941677883e-05, + "loss": 0.4918, + "step": 7490 + }, + { + "epoch": 1.22, + "learning_rate": 1.2211754149842981e-05, + "loss": 0.4844, + "step": 7500 + }, + { + "epoch": 1.23, + "learning_rate": 1.2200538358008076e-05, + "loss": 0.488, + "step": 7510 + }, + { + "epoch": 1.23, + "learning_rate": 1.2189322566173172e-05, + "loss": 0.4928, + "step": 7520 + }, + { + "epoch": 1.23, + "learning_rate": 1.217810677433827e-05, + "loss": 0.4913, + "step": 7530 + }, + { + "epoch": 1.23, + "learning_rate": 1.2166890982503365e-05, + "loss": 0.4841, + "step": 7540 + }, + { + "epoch": 1.23, + "learning_rate": 1.2155675190668461e-05, + "loss": 0.4758, + "step": 7550 + }, + { + "epoch": 1.23, + "learning_rate": 1.2144459398833559e-05, + "loss": 0.4977, + "step": 7560 + }, + { + "epoch": 1.24, + "learning_rate": 1.2133243606998655e-05, + "loss": 0.4991, + "step": 7570 + }, + { + "epoch": 1.24, + "learning_rate": 1.212202781516375e-05, + "loss": 0.5023, + "step": 7580 + }, + { + "epoch": 1.24, + "learning_rate": 1.2110812023328848e-05, + "loss": 0.4849, + "step": 7590 + }, + { + "epoch": 1.24, + "learning_rate": 1.2099596231493944e-05, + "loss": 0.4867, + "step": 7600 + }, + { + "epoch": 1.24, + "learning_rate": 1.2088380439659039e-05, + "loss": 0.4962, + "step": 7610 + }, + { + "epoch": 1.24, + "learning_rate": 1.2077164647824137e-05, + "loss": 0.4983, + "step": 7620 + }, + { + "epoch": 1.25, + "learning_rate": 1.2065948855989234e-05, + "loss": 0.4894, + "step": 7630 + }, + { + "epoch": 1.25, + "learning_rate": 1.205473306415433e-05, + "loss": 0.4896, + "step": 7640 + }, + { + "epoch": 1.25, + "learning_rate": 1.2043517272319428e-05, + "loss": 0.4961, + "step": 7650 + }, + { + "epoch": 1.25, + "learning_rate": 1.2032301480484523e-05, + "loss": 0.4857, + "step": 7660 + }, + { + "epoch": 1.25, + "learning_rate": 1.2021085688649619e-05, + "loss": 0.491, + "step": 7670 + }, + { + "epoch": 1.25, + "learning_rate": 1.2009869896814717e-05, + "loss": 0.4829, + "step": 7680 + }, + { + "epoch": 1.25, + "learning_rate": 1.1998654104979812e-05, + "loss": 0.4778, + "step": 7690 + }, + { + "epoch": 1.26, + "learning_rate": 1.1987438313144908e-05, + "loss": 0.4795, + "step": 7700 + }, + { + "epoch": 1.26, + "learning_rate": 1.1976222521310006e-05, + "loss": 0.4923, + "step": 7710 + }, + { + "epoch": 1.26, + "learning_rate": 1.19650067294751e-05, + "loss": 0.4903, + "step": 7720 + }, + { + "epoch": 1.26, + "learning_rate": 1.1953790937640197e-05, + "loss": 0.5006, + "step": 7730 + }, + { + "epoch": 1.26, + "learning_rate": 1.1942575145805295e-05, + "loss": 0.4916, + "step": 7740 + }, + { + "epoch": 1.26, + "learning_rate": 1.1931359353970392e-05, + "loss": 0.496, + "step": 7750 + }, + { + "epoch": 1.27, + "learning_rate": 1.192014356213549e-05, + "loss": 0.4908, + "step": 7760 + }, + { + "epoch": 1.27, + "learning_rate": 1.1908927770300584e-05, + "loss": 0.4964, + "step": 7770 + }, + { + "epoch": 1.27, + "learning_rate": 1.189771197846568e-05, + "loss": 0.4954, + "step": 7780 + }, + { + "epoch": 1.27, + "learning_rate": 1.1886496186630779e-05, + "loss": 0.4817, + "step": 7790 + }, + { + "epoch": 1.27, + "learning_rate": 1.1875280394795873e-05, + "loss": 0.4917, + "step": 7800 + }, + { + "epoch": 1.27, + "learning_rate": 1.186406460296097e-05, + "loss": 0.4877, + "step": 7810 + }, + { + "epoch": 1.28, + "learning_rate": 1.1852848811126068e-05, + "loss": 0.488, + "step": 7820 + }, + { + "epoch": 1.28, + "learning_rate": 1.1841633019291164e-05, + "loss": 0.4982, + "step": 7830 + }, + { + "epoch": 1.28, + "learning_rate": 1.1830417227456259e-05, + "loss": 0.4874, + "step": 7840 + }, + { + "epoch": 1.28, + "learning_rate": 1.1819201435621357e-05, + "loss": 0.4925, + "step": 7850 + }, + { + "epoch": 1.28, + "learning_rate": 1.1807985643786453e-05, + "loss": 0.499, + "step": 7860 + }, + { + "epoch": 1.28, + "learning_rate": 1.1796769851951548e-05, + "loss": 0.4916, + "step": 7870 + }, + { + "epoch": 1.29, + "learning_rate": 1.1785554060116646e-05, + "loss": 0.4835, + "step": 7880 + }, + { + "epoch": 1.29, + "learning_rate": 1.1774338268281742e-05, + "loss": 0.4978, + "step": 7890 + }, + { + "epoch": 1.29, + "learning_rate": 1.1763122476446837e-05, + "loss": 0.4952, + "step": 7900 + }, + { + "epoch": 1.29, + "learning_rate": 1.1751906684611935e-05, + "loss": 0.4839, + "step": 7910 + }, + { + "epoch": 1.29, + "learning_rate": 1.1740690892777031e-05, + "loss": 0.4841, + "step": 7920 + }, + { + "epoch": 1.29, + "learning_rate": 1.1729475100942128e-05, + "loss": 0.4884, + "step": 7930 + }, + { + "epoch": 1.3, + "learning_rate": 1.1718259309107226e-05, + "loss": 0.4948, + "step": 7940 + }, + { + "epoch": 1.3, + "learning_rate": 1.170704351727232e-05, + "loss": 0.4888, + "step": 7950 + }, + { + "epoch": 1.3, + "learning_rate": 1.1695827725437417e-05, + "loss": 0.4922, + "step": 7960 + }, + { + "epoch": 1.3, + "learning_rate": 1.1684611933602515e-05, + "loss": 0.4939, + "step": 7970 + }, + { + "epoch": 1.3, + "learning_rate": 1.167339614176761e-05, + "loss": 0.4905, + "step": 7980 + }, + { + "epoch": 1.3, + "learning_rate": 1.1662180349932706e-05, + "loss": 0.4913, + "step": 7990 + }, + { + "epoch": 1.31, + "learning_rate": 1.1650964558097804e-05, + "loss": 0.4926, + "step": 8000 + }, + { + "epoch": 1.31, + "learning_rate": 1.16397487662629e-05, + "loss": 0.496, + "step": 8010 + }, + { + "epoch": 1.31, + "learning_rate": 1.1628532974427995e-05, + "loss": 0.4878, + "step": 8020 + }, + { + "epoch": 1.31, + "learning_rate": 1.1617317182593093e-05, + "loss": 0.4944, + "step": 8030 + }, + { + "epoch": 1.31, + "learning_rate": 1.160610139075819e-05, + "loss": 0.4954, + "step": 8040 + }, + { + "epoch": 1.31, + "learning_rate": 1.1594885598923284e-05, + "loss": 0.502, + "step": 8050 + }, + { + "epoch": 1.32, + "learning_rate": 1.1583669807088382e-05, + "loss": 0.4789, + "step": 8060 + }, + { + "epoch": 1.32, + "learning_rate": 1.1572454015253478e-05, + "loss": 0.4862, + "step": 8070 + }, + { + "epoch": 1.32, + "learning_rate": 1.1561238223418573e-05, + "loss": 0.5012, + "step": 8080 + }, + { + "epoch": 1.32, + "learning_rate": 1.1550022431583671e-05, + "loss": 0.4831, + "step": 8090 + }, + { + "epoch": 1.32, + "learning_rate": 1.1538806639748767e-05, + "loss": 0.4823, + "step": 8100 + }, + { + "epoch": 1.32, + "learning_rate": 1.1527590847913864e-05, + "loss": 0.4948, + "step": 8110 + }, + { + "epoch": 1.33, + "learning_rate": 1.1516375056078962e-05, + "loss": 0.4874, + "step": 8120 + }, + { + "epoch": 1.33, + "learning_rate": 1.1505159264244056e-05, + "loss": 0.4847, + "step": 8130 + }, + { + "epoch": 1.33, + "learning_rate": 1.1493943472409153e-05, + "loss": 0.4985, + "step": 8140 + }, + { + "epoch": 1.33, + "learning_rate": 1.1482727680574251e-05, + "loss": 0.4889, + "step": 8150 + }, + { + "epoch": 1.33, + "learning_rate": 1.1471511888739346e-05, + "loss": 0.493, + "step": 8160 + }, + { + "epoch": 1.33, + "learning_rate": 1.1460296096904442e-05, + "loss": 0.4945, + "step": 8170 + }, + { + "epoch": 1.33, + "learning_rate": 1.144908030506954e-05, + "loss": 0.4813, + "step": 8180 + }, + { + "epoch": 1.34, + "learning_rate": 1.1437864513234636e-05, + "loss": 0.487, + "step": 8190 + }, + { + "epoch": 1.34, + "learning_rate": 1.1426648721399731e-05, + "loss": 0.4875, + "step": 8200 + }, + { + "epoch": 1.34, + "learning_rate": 1.1415432929564829e-05, + "loss": 0.4893, + "step": 8210 + }, + { + "epoch": 1.34, + "learning_rate": 1.1404217137729925e-05, + "loss": 0.4984, + "step": 8220 + }, + { + "epoch": 1.34, + "learning_rate": 1.139300134589502e-05, + "loss": 0.5001, + "step": 8230 + }, + { + "epoch": 1.34, + "learning_rate": 1.1381785554060118e-05, + "loss": 0.4866, + "step": 8240 + }, + { + "epoch": 1.35, + "learning_rate": 1.1370569762225214e-05, + "loss": 0.4904, + "step": 8250 + }, + { + "epoch": 1.35, + "learning_rate": 1.135935397039031e-05, + "loss": 0.4808, + "step": 8260 + }, + { + "epoch": 1.35, + "learning_rate": 1.1348138178555407e-05, + "loss": 0.4902, + "step": 8270 + }, + { + "epoch": 1.35, + "learning_rate": 1.1336922386720504e-05, + "loss": 0.4877, + "step": 8280 + }, + { + "epoch": 1.35, + "learning_rate": 1.13257065948856e-05, + "loss": 0.4904, + "step": 8290 + }, + { + "epoch": 1.35, + "learning_rate": 1.1314490803050698e-05, + "loss": 0.4923, + "step": 8300 + }, + { + "epoch": 1.36, + "learning_rate": 1.1303275011215793e-05, + "loss": 0.4984, + "step": 8310 + }, + { + "epoch": 1.36, + "learning_rate": 1.1292059219380889e-05, + "loss": 0.487, + "step": 8320 + }, + { + "epoch": 1.36, + "learning_rate": 1.1280843427545987e-05, + "loss": 0.4958, + "step": 8330 + }, + { + "epoch": 1.36, + "learning_rate": 1.1269627635711082e-05, + "loss": 0.4884, + "step": 8340 + }, + { + "epoch": 1.36, + "learning_rate": 1.1258411843876178e-05, + "loss": 0.4885, + "step": 8350 + }, + { + "epoch": 1.36, + "learning_rate": 1.1247196052041276e-05, + "loss": 0.4871, + "step": 8360 + }, + { + "epoch": 1.37, + "learning_rate": 1.1235980260206372e-05, + "loss": 0.4961, + "step": 8370 + }, + { + "epoch": 1.37, + "learning_rate": 1.1224764468371467e-05, + "loss": 0.4842, + "step": 8380 + }, + { + "epoch": 1.37, + "learning_rate": 1.1213548676536565e-05, + "loss": 0.4891, + "step": 8390 + }, + { + "epoch": 1.37, + "learning_rate": 1.1202332884701661e-05, + "loss": 0.4836, + "step": 8400 + }, + { + "epoch": 1.37, + "learning_rate": 1.1191117092866756e-05, + "loss": 0.4893, + "step": 8410 + }, + { + "epoch": 1.37, + "learning_rate": 1.1179901301031854e-05, + "loss": 0.4785, + "step": 8420 + }, + { + "epoch": 1.38, + "learning_rate": 1.116868550919695e-05, + "loss": 0.4926, + "step": 8430 + }, + { + "epoch": 1.38, + "learning_rate": 1.1157469717362047e-05, + "loss": 0.4804, + "step": 8440 + }, + { + "epoch": 1.38, + "learning_rate": 1.1146253925527143e-05, + "loss": 0.4888, + "step": 8450 + }, + { + "epoch": 1.38, + "learning_rate": 1.113503813369224e-05, + "loss": 0.5088, + "step": 8460 + }, + { + "epoch": 1.38, + "learning_rate": 1.1123822341857336e-05, + "loss": 0.4904, + "step": 8470 + }, + { + "epoch": 1.38, + "learning_rate": 1.1112606550022434e-05, + "loss": 0.4913, + "step": 8480 + }, + { + "epoch": 1.39, + "learning_rate": 1.1101390758187529e-05, + "loss": 0.4901, + "step": 8490 + }, + { + "epoch": 1.39, + "learning_rate": 1.1090174966352625e-05, + "loss": 0.4887, + "step": 8500 + }, + { + "epoch": 1.39, + "learning_rate": 1.1078959174517723e-05, + "loss": 0.4816, + "step": 8510 + }, + { + "epoch": 1.39, + "learning_rate": 1.1067743382682818e-05, + "loss": 0.4989, + "step": 8520 + }, + { + "epoch": 1.39, + "learning_rate": 1.1056527590847914e-05, + "loss": 0.4807, + "step": 8530 + }, + { + "epoch": 1.39, + "learning_rate": 1.1045311799013012e-05, + "loss": 0.4929, + "step": 8540 + }, + { + "epoch": 1.4, + "learning_rate": 1.1034096007178109e-05, + "loss": 0.4752, + "step": 8550 + }, + { + "epoch": 1.4, + "learning_rate": 1.1022880215343203e-05, + "loss": 0.4822, + "step": 8560 + }, + { + "epoch": 1.4, + "learning_rate": 1.1011664423508301e-05, + "loss": 0.491, + "step": 8570 + }, + { + "epoch": 1.4, + "learning_rate": 1.1000448631673398e-05, + "loss": 0.4987, + "step": 8580 + }, + { + "epoch": 1.4, + "learning_rate": 1.0989232839838492e-05, + "loss": 0.4867, + "step": 8590 + }, + { + "epoch": 1.4, + "learning_rate": 1.097801704800359e-05, + "loss": 0.493, + "step": 8600 + }, + { + "epoch": 1.41, + "learning_rate": 1.0966801256168687e-05, + "loss": 0.5023, + "step": 8610 + }, + { + "epoch": 1.41, + "learning_rate": 1.0955585464333783e-05, + "loss": 0.4931, + "step": 8620 + }, + { + "epoch": 1.41, + "learning_rate": 1.094436967249888e-05, + "loss": 0.4875, + "step": 8630 + }, + { + "epoch": 1.41, + "learning_rate": 1.0933153880663976e-05, + "loss": 0.4818, + "step": 8640 + }, + { + "epoch": 1.41, + "learning_rate": 1.0921938088829072e-05, + "loss": 0.4828, + "step": 8650 + }, + { + "epoch": 1.41, + "learning_rate": 1.091072229699417e-05, + "loss": 0.4897, + "step": 8660 + }, + { + "epoch": 1.41, + "learning_rate": 1.0899506505159265e-05, + "loss": 0.48, + "step": 8670 + }, + { + "epoch": 1.42, + "learning_rate": 1.0888290713324361e-05, + "loss": 0.4865, + "step": 8680 + }, + { + "epoch": 1.42, + "learning_rate": 1.087707492148946e-05, + "loss": 0.4941, + "step": 8690 + }, + { + "epoch": 1.42, + "learning_rate": 1.0865859129654554e-05, + "loss": 0.4858, + "step": 8700 + }, + { + "epoch": 1.42, + "learning_rate": 1.085464333781965e-05, + "loss": 0.4864, + "step": 8710 + }, + { + "epoch": 1.42, + "learning_rate": 1.0843427545984748e-05, + "loss": 0.4864, + "step": 8720 + }, + { + "epoch": 1.42, + "learning_rate": 1.0832211754149845e-05, + "loss": 0.5011, + "step": 8730 + }, + { + "epoch": 1.43, + "learning_rate": 1.082099596231494e-05, + "loss": 0.4841, + "step": 8740 + }, + { + "epoch": 1.43, + "learning_rate": 1.0809780170480037e-05, + "loss": 0.4966, + "step": 8750 + }, + { + "epoch": 1.43, + "learning_rate": 1.0798564378645134e-05, + "loss": 0.5004, + "step": 8760 + }, + { + "epoch": 1.43, + "learning_rate": 1.0787348586810228e-05, + "loss": 0.4976, + "step": 8770 + }, + { + "epoch": 1.43, + "learning_rate": 1.0776132794975326e-05, + "loss": 0.481, + "step": 8780 + }, + { + "epoch": 1.43, + "learning_rate": 1.0764917003140423e-05, + "loss": 0.4866, + "step": 8790 + }, + { + "epoch": 1.44, + "learning_rate": 1.075370121130552e-05, + "loss": 0.496, + "step": 8800 + }, + { + "epoch": 1.44, + "learning_rate": 1.0742485419470616e-05, + "loss": 0.492, + "step": 8810 + }, + { + "epoch": 1.44, + "learning_rate": 1.0731269627635712e-05, + "loss": 0.4819, + "step": 8820 + }, + { + "epoch": 1.44, + "learning_rate": 1.0720053835800808e-05, + "loss": 0.492, + "step": 8830 + }, + { + "epoch": 1.44, + "learning_rate": 1.0708838043965906e-05, + "loss": 0.4818, + "step": 8840 + }, + { + "epoch": 1.44, + "learning_rate": 1.0697622252131001e-05, + "loss": 0.4845, + "step": 8850 + }, + { + "epoch": 1.45, + "learning_rate": 1.0686406460296097e-05, + "loss": 0.5005, + "step": 8860 + }, + { + "epoch": 1.45, + "learning_rate": 1.0675190668461195e-05, + "loss": 0.4761, + "step": 8870 + }, + { + "epoch": 1.45, + "learning_rate": 1.066397487662629e-05, + "loss": 0.4906, + "step": 8880 + }, + { + "epoch": 1.45, + "learning_rate": 1.0652759084791386e-05, + "loss": 0.4873, + "step": 8890 + }, + { + "epoch": 1.45, + "learning_rate": 1.0641543292956484e-05, + "loss": 0.4991, + "step": 8900 + }, + { + "epoch": 1.45, + "learning_rate": 1.063032750112158e-05, + "loss": 0.491, + "step": 8910 + }, + { + "epoch": 1.46, + "learning_rate": 1.0619111709286675e-05, + "loss": 0.4937, + "step": 8920 + }, + { + "epoch": 1.46, + "learning_rate": 1.0607895917451774e-05, + "loss": 0.482, + "step": 8930 + }, + { + "epoch": 1.46, + "learning_rate": 1.059668012561687e-05, + "loss": 0.4956, + "step": 8940 + }, + { + "epoch": 1.46, + "learning_rate": 1.0585464333781965e-05, + "loss": 0.4852, + "step": 8950 + }, + { + "epoch": 1.46, + "learning_rate": 1.0574248541947063e-05, + "loss": 0.4891, + "step": 8960 + }, + { + "epoch": 1.46, + "learning_rate": 1.0563032750112159e-05, + "loss": 0.4873, + "step": 8970 + }, + { + "epoch": 1.47, + "learning_rate": 1.0551816958277255e-05, + "loss": 0.4929, + "step": 8980 + }, + { + "epoch": 1.47, + "learning_rate": 1.0540601166442352e-05, + "loss": 0.4868, + "step": 8990 + }, + { + "epoch": 1.47, + "learning_rate": 1.0529385374607448e-05, + "loss": 0.4931, + "step": 9000 + }, + { + "epoch": 1.47, + "learning_rate": 1.0518169582772544e-05, + "loss": 0.4922, + "step": 9010 + }, + { + "epoch": 1.47, + "learning_rate": 1.0506953790937642e-05, + "loss": 0.4791, + "step": 9020 + }, + { + "epoch": 1.47, + "learning_rate": 1.0495737999102737e-05, + "loss": 0.4915, + "step": 9030 + }, + { + "epoch": 1.48, + "learning_rate": 1.0484522207267833e-05, + "loss": 0.4853, + "step": 9040 + }, + { + "epoch": 1.48, + "learning_rate": 1.0473306415432931e-05, + "loss": 0.4826, + "step": 9050 + }, + { + "epoch": 1.48, + "learning_rate": 1.0462090623598026e-05, + "loss": 0.4948, + "step": 9060 + }, + { + "epoch": 1.48, + "learning_rate": 1.0450874831763123e-05, + "loss": 0.4932, + "step": 9070 + }, + { + "epoch": 1.48, + "learning_rate": 1.043965903992822e-05, + "loss": 0.4855, + "step": 9080 + }, + { + "epoch": 1.48, + "learning_rate": 1.0428443248093317e-05, + "loss": 0.4912, + "step": 9090 + }, + { + "epoch": 1.48, + "learning_rate": 1.0417227456258412e-05, + "loss": 0.4895, + "step": 9100 + }, + { + "epoch": 1.49, + "learning_rate": 1.040601166442351e-05, + "loss": 0.4907, + "step": 9110 + }, + { + "epoch": 1.49, + "learning_rate": 1.0394795872588606e-05, + "loss": 0.486, + "step": 9120 + }, + { + "epoch": 1.49, + "learning_rate": 1.03835800807537e-05, + "loss": 0.4858, + "step": 9130 + }, + { + "epoch": 1.49, + "learning_rate": 1.0372364288918799e-05, + "loss": 0.4949, + "step": 9140 + }, + { + "epoch": 1.49, + "learning_rate": 1.0361148497083895e-05, + "loss": 0.4826, + "step": 9150 + }, + { + "epoch": 1.49, + "learning_rate": 1.0349932705248991e-05, + "loss": 0.4847, + "step": 9160 + }, + { + "epoch": 1.5, + "learning_rate": 1.0338716913414088e-05, + "loss": 0.4926, + "step": 9170 + }, + { + "epoch": 1.5, + "learning_rate": 1.0327501121579184e-05, + "loss": 0.4815, + "step": 9180 + }, + { + "epoch": 1.5, + "learning_rate": 1.031628532974428e-05, + "loss": 0.489, + "step": 9190 + }, + { + "epoch": 1.5, + "learning_rate": 1.0305069537909379e-05, + "loss": 0.4925, + "step": 9200 + }, + { + "epoch": 1.5, + "learning_rate": 1.0293853746074473e-05, + "loss": 0.4765, + "step": 9210 + }, + { + "epoch": 1.5, + "learning_rate": 1.028263795423957e-05, + "loss": 0.4857, + "step": 9220 + }, + { + "epoch": 1.51, + "learning_rate": 1.0271422162404668e-05, + "loss": 0.4905, + "step": 9230 + }, + { + "epoch": 1.51, + "learning_rate": 1.0260206370569762e-05, + "loss": 0.4887, + "step": 9240 + }, + { + "epoch": 1.51, + "learning_rate": 1.0248990578734859e-05, + "loss": 0.4823, + "step": 9250 + }, + { + "epoch": 1.51, + "learning_rate": 1.0237774786899957e-05, + "loss": 0.4873, + "step": 9260 + }, + { + "epoch": 1.51, + "learning_rate": 1.0226558995065053e-05, + "loss": 0.4931, + "step": 9270 + }, + { + "epoch": 1.51, + "learning_rate": 1.0215343203230148e-05, + "loss": 0.4917, + "step": 9280 + }, + { + "epoch": 1.52, + "learning_rate": 1.0204127411395246e-05, + "loss": 0.4895, + "step": 9290 + }, + { + "epoch": 1.52, + "learning_rate": 1.0192911619560342e-05, + "loss": 0.4763, + "step": 9300 + }, + { + "epoch": 1.52, + "learning_rate": 1.0181695827725437e-05, + "loss": 0.4917, + "step": 9310 + }, + { + "epoch": 1.52, + "learning_rate": 1.0170480035890535e-05, + "loss": 0.4755, + "step": 9320 + }, + { + "epoch": 1.52, + "learning_rate": 1.0159264244055631e-05, + "loss": 0.503, + "step": 9330 + }, + { + "epoch": 1.52, + "learning_rate": 1.0148048452220728e-05, + "loss": 0.4761, + "step": 9340 + }, + { + "epoch": 1.53, + "learning_rate": 1.0136832660385824e-05, + "loss": 0.479, + "step": 9350 + }, + { + "epoch": 1.53, + "learning_rate": 1.012561686855092e-05, + "loss": 0.4863, + "step": 9360 + }, + { + "epoch": 1.53, + "learning_rate": 1.0114401076716017e-05, + "loss": 0.491, + "step": 9370 + }, + { + "epoch": 1.53, + "learning_rate": 1.0103185284881115e-05, + "loss": 0.4828, + "step": 9380 + }, + { + "epoch": 1.53, + "learning_rate": 1.009196949304621e-05, + "loss": 0.4839, + "step": 9390 + }, + { + "epoch": 1.53, + "learning_rate": 1.0080753701211306e-05, + "loss": 0.4931, + "step": 9400 + }, + { + "epoch": 1.54, + "learning_rate": 1.0069537909376404e-05, + "loss": 0.4743, + "step": 9410 + }, + { + "epoch": 1.54, + "learning_rate": 1.0058322117541498e-05, + "loss": 0.4825, + "step": 9420 + }, + { + "epoch": 1.54, + "learning_rate": 1.0047106325706595e-05, + "loss": 0.4883, + "step": 9430 + }, + { + "epoch": 1.54, + "learning_rate": 1.0035890533871693e-05, + "loss": 0.4772, + "step": 9440 + }, + { + "epoch": 1.54, + "learning_rate": 1.002467474203679e-05, + "loss": 0.4868, + "step": 9450 + }, + { + "epoch": 1.54, + "learning_rate": 1.0013458950201884e-05, + "loss": 0.4832, + "step": 9460 + }, + { + "epoch": 1.55, + "learning_rate": 1.0002243158366982e-05, + "loss": 0.4858, + "step": 9470 + }, + { + "epoch": 1.55, + "learning_rate": 9.991027366532078e-06, + "loss": 0.4846, + "step": 9480 + }, + { + "epoch": 1.55, + "learning_rate": 9.979811574697175e-06, + "loss": 0.486, + "step": 9490 + }, + { + "epoch": 1.55, + "learning_rate": 9.968595782862271e-06, + "loss": 0.487, + "step": 9500 + }, + { + "epoch": 1.55, + "learning_rate": 9.957379991027367e-06, + "loss": 0.4783, + "step": 9510 + }, + { + "epoch": 1.55, + "learning_rate": 9.946164199192464e-06, + "loss": 0.4837, + "step": 9520 + }, + { + "epoch": 1.56, + "learning_rate": 9.93494840735756e-06, + "loss": 0.4794, + "step": 9530 + }, + { + "epoch": 1.56, + "learning_rate": 9.923732615522656e-06, + "loss": 0.4787, + "step": 9540 + }, + { + "epoch": 1.56, + "learning_rate": 9.912516823687753e-06, + "loss": 0.4906, + "step": 9550 + }, + { + "epoch": 1.56, + "learning_rate": 9.901301031852849e-06, + "loss": 0.4844, + "step": 9560 + }, + { + "epoch": 1.56, + "learning_rate": 9.890085240017945e-06, + "loss": 0.4828, + "step": 9570 + }, + { + "epoch": 1.56, + "learning_rate": 9.878869448183044e-06, + "loss": 0.4763, + "step": 9580 + }, + { + "epoch": 1.56, + "learning_rate": 9.867653656348138e-06, + "loss": 0.4879, + "step": 9590 + }, + { + "epoch": 1.57, + "learning_rate": 9.856437864513235e-06, + "loss": 0.4978, + "step": 9600 + }, + { + "epoch": 1.57, + "learning_rate": 9.845222072678333e-06, + "loss": 0.4805, + "step": 9610 + }, + { + "epoch": 1.57, + "learning_rate": 9.834006280843429e-06, + "loss": 0.4924, + "step": 9620 + }, + { + "epoch": 1.57, + "learning_rate": 9.822790489008525e-06, + "loss": 0.4909, + "step": 9630 + }, + { + "epoch": 1.57, + "learning_rate": 9.811574697173622e-06, + "loss": 0.4831, + "step": 9640 + }, + { + "epoch": 1.57, + "learning_rate": 9.800358905338718e-06, + "loss": 0.485, + "step": 9650 + }, + { + "epoch": 1.58, + "learning_rate": 9.789143113503814e-06, + "loss": 0.4802, + "step": 9660 + }, + { + "epoch": 1.58, + "learning_rate": 9.77792732166891e-06, + "loss": 0.4951, + "step": 9670 + }, + { + "epoch": 1.58, + "learning_rate": 9.766711529834007e-06, + "loss": 0.4784, + "step": 9680 + }, + { + "epoch": 1.58, + "learning_rate": 9.755495737999103e-06, + "loss": 0.4788, + "step": 9690 + }, + { + "epoch": 1.58, + "learning_rate": 9.7442799461642e-06, + "loss": 0.4798, + "step": 9700 + }, + { + "epoch": 1.58, + "learning_rate": 9.733064154329296e-06, + "loss": 0.4858, + "step": 9710 + }, + { + "epoch": 1.59, + "learning_rate": 9.721848362494393e-06, + "loss": 0.4845, + "step": 9720 + }, + { + "epoch": 1.59, + "learning_rate": 9.710632570659489e-06, + "loss": 0.4871, + "step": 9730 + }, + { + "epoch": 1.59, + "learning_rate": 9.699416778824587e-06, + "loss": 0.4809, + "step": 9740 + }, + { + "epoch": 1.59, + "learning_rate": 9.688200986989682e-06, + "loss": 0.4902, + "step": 9750 + }, + { + "epoch": 1.59, + "learning_rate": 9.67698519515478e-06, + "loss": 0.4961, + "step": 9760 + }, + { + "epoch": 1.59, + "learning_rate": 9.665769403319876e-06, + "loss": 0.4749, + "step": 9770 + }, + { + "epoch": 1.6, + "learning_rate": 9.65455361148497e-06, + "loss": 0.4922, + "step": 9780 + }, + { + "epoch": 1.6, + "learning_rate": 9.643337819650069e-06, + "loss": 0.4824, + "step": 9790 + }, + { + "epoch": 1.6, + "learning_rate": 9.632122027815165e-06, + "loss": 0.4853, + "step": 9800 + }, + { + "epoch": 1.6, + "learning_rate": 9.620906235980261e-06, + "loss": 0.4808, + "step": 9810 + }, + { + "epoch": 1.6, + "learning_rate": 9.609690444145358e-06, + "loss": 0.4878, + "step": 9820 + }, + { + "epoch": 1.6, + "learning_rate": 9.598474652310454e-06, + "loss": 0.4847, + "step": 9830 + }, + { + "epoch": 1.61, + "learning_rate": 9.58725886047555e-06, + "loss": 0.4722, + "step": 9840 + }, + { + "epoch": 1.61, + "learning_rate": 9.576043068640647e-06, + "loss": 0.492, + "step": 9850 + }, + { + "epoch": 1.61, + "learning_rate": 9.564827276805743e-06, + "loss": 0.4868, + "step": 9860 + }, + { + "epoch": 1.61, + "learning_rate": 9.55361148497084e-06, + "loss": 0.4877, + "step": 9870 + }, + { + "epoch": 1.61, + "learning_rate": 9.542395693135936e-06, + "loss": 0.4925, + "step": 9880 + }, + { + "epoch": 1.61, + "learning_rate": 9.531179901301032e-06, + "loss": 0.487, + "step": 9890 + }, + { + "epoch": 1.62, + "learning_rate": 9.519964109466129e-06, + "loss": 0.477, + "step": 9900 + }, + { + "epoch": 1.62, + "learning_rate": 9.508748317631225e-06, + "loss": 0.4781, + "step": 9910 + }, + { + "epoch": 1.62, + "learning_rate": 9.497532525796323e-06, + "loss": 0.4943, + "step": 9920 + }, + { + "epoch": 1.62, + "learning_rate": 9.486316733961418e-06, + "loss": 0.4877, + "step": 9930 + }, + { + "epoch": 1.62, + "learning_rate": 9.475100942126516e-06, + "loss": 0.4861, + "step": 9940 + }, + { + "epoch": 1.62, + "learning_rate": 9.463885150291612e-06, + "loss": 0.4761, + "step": 9950 + }, + { + "epoch": 1.63, + "learning_rate": 9.452669358456707e-06, + "loss": 0.4803, + "step": 9960 + }, + { + "epoch": 1.63, + "learning_rate": 9.441453566621805e-06, + "loss": 0.4799, + "step": 9970 + }, + { + "epoch": 1.63, + "learning_rate": 9.430237774786901e-06, + "loss": 0.4706, + "step": 9980 + }, + { + "epoch": 1.63, + "learning_rate": 9.419021982951998e-06, + "loss": 0.4929, + "step": 9990 + }, + { + "epoch": 1.63, + "learning_rate": 9.407806191117094e-06, + "loss": 0.4891, + "step": 10000 + }, + { + "epoch": 1.63, + "learning_rate": 9.39659039928219e-06, + "loss": 0.4708, + "step": 10010 + }, + { + "epoch": 1.64, + "learning_rate": 9.385374607447287e-06, + "loss": 0.4793, + "step": 10020 + }, + { + "epoch": 1.64, + "learning_rate": 9.374158815612383e-06, + "loss": 0.4819, + "step": 10030 + }, + { + "epoch": 1.64, + "learning_rate": 9.36294302377748e-06, + "loss": 0.4805, + "step": 10040 + }, + { + "epoch": 1.64, + "learning_rate": 9.351727231942576e-06, + "loss": 0.4882, + "step": 10050 + }, + { + "epoch": 1.64, + "learning_rate": 9.340511440107672e-06, + "loss": 0.4785, + "step": 10060 + }, + { + "epoch": 1.64, + "learning_rate": 9.329295648272768e-06, + "loss": 0.4856, + "step": 10070 + }, + { + "epoch": 1.64, + "learning_rate": 9.318079856437865e-06, + "loss": 0.4885, + "step": 10080 + }, + { + "epoch": 1.65, + "learning_rate": 9.306864064602961e-06, + "loss": 0.4737, + "step": 10090 + }, + { + "epoch": 1.65, + "learning_rate": 9.29564827276806e-06, + "loss": 0.4882, + "step": 10100 + }, + { + "epoch": 1.65, + "learning_rate": 9.284432480933154e-06, + "loss": 0.4874, + "step": 10110 + }, + { + "epoch": 1.65, + "learning_rate": 9.273216689098252e-06, + "loss": 0.4861, + "step": 10120 + }, + { + "epoch": 1.65, + "learning_rate": 9.262000897263348e-06, + "loss": 0.4882, + "step": 10130 + }, + { + "epoch": 1.65, + "learning_rate": 9.250785105428443e-06, + "loss": 0.4722, + "step": 10140 + }, + { + "epoch": 1.66, + "learning_rate": 9.239569313593541e-06, + "loss": 0.4803, + "step": 10150 + }, + { + "epoch": 1.66, + "learning_rate": 9.228353521758637e-06, + "loss": 0.467, + "step": 10160 + }, + { + "epoch": 1.66, + "learning_rate": 9.217137729923734e-06, + "loss": 0.4861, + "step": 10170 + }, + { + "epoch": 1.66, + "learning_rate": 9.20592193808883e-06, + "loss": 0.4878, + "step": 10180 + }, + { + "epoch": 1.66, + "learning_rate": 9.194706146253926e-06, + "loss": 0.4742, + "step": 10190 + }, + { + "epoch": 1.66, + "learning_rate": 9.183490354419023e-06, + "loss": 0.4746, + "step": 10200 + }, + { + "epoch": 1.67, + "learning_rate": 9.172274562584119e-06, + "loss": 0.4853, + "step": 10210 + }, + { + "epoch": 1.67, + "learning_rate": 9.161058770749215e-06, + "loss": 0.4875, + "step": 10220 + }, + { + "epoch": 1.67, + "learning_rate": 9.149842978914312e-06, + "loss": 0.4843, + "step": 10230 + }, + { + "epoch": 1.67, + "learning_rate": 9.138627187079408e-06, + "loss": 0.4984, + "step": 10240 + }, + { + "epoch": 1.67, + "learning_rate": 9.127411395244505e-06, + "loss": 0.485, + "step": 10250 + }, + { + "epoch": 1.67, + "learning_rate": 9.116195603409601e-06, + "loss": 0.4859, + "step": 10260 + }, + { + "epoch": 1.68, + "learning_rate": 9.104979811574697e-06, + "loss": 0.4807, + "step": 10270 + }, + { + "epoch": 1.68, + "learning_rate": 9.093764019739795e-06, + "loss": 0.4842, + "step": 10280 + }, + { + "epoch": 1.68, + "learning_rate": 9.08254822790489e-06, + "loss": 0.4626, + "step": 10290 + }, + { + "epoch": 1.68, + "learning_rate": 9.071332436069988e-06, + "loss": 0.4593, + "step": 10300 + }, + { + "epoch": 1.68, + "learning_rate": 9.060116644235084e-06, + "loss": 0.4902, + "step": 10310 + }, + { + "epoch": 1.68, + "learning_rate": 9.048900852400179e-06, + "loss": 0.4693, + "step": 10320 + }, + { + "epoch": 1.69, + "learning_rate": 9.037685060565277e-06, + "loss": 0.4949, + "step": 10330 + }, + { + "epoch": 1.69, + "learning_rate": 9.026469268730373e-06, + "loss": 0.4833, + "step": 10340 + }, + { + "epoch": 1.69, + "learning_rate": 9.01525347689547e-06, + "loss": 0.4874, + "step": 10350 + }, + { + "epoch": 1.69, + "learning_rate": 9.004037685060566e-06, + "loss": 0.4766, + "step": 10360 + }, + { + "epoch": 1.69, + "learning_rate": 8.992821893225663e-06, + "loss": 0.4739, + "step": 10370 + }, + { + "epoch": 1.69, + "learning_rate": 8.981606101390759e-06, + "loss": 0.4683, + "step": 10380 + }, + { + "epoch": 1.7, + "learning_rate": 8.970390309555855e-06, + "loss": 0.4887, + "step": 10390 + }, + { + "epoch": 1.7, + "learning_rate": 8.959174517720952e-06, + "loss": 0.4944, + "step": 10400 + }, + { + "epoch": 1.7, + "learning_rate": 8.947958725886048e-06, + "loss": 0.4843, + "step": 10410 + }, + { + "epoch": 1.7, + "learning_rate": 8.936742934051144e-06, + "loss": 0.4733, + "step": 10420 + }, + { + "epoch": 1.7, + "learning_rate": 8.925527142216242e-06, + "loss": 0.476, + "step": 10430 + }, + { + "epoch": 1.7, + "learning_rate": 8.914311350381337e-06, + "loss": 0.4806, + "step": 10440 + }, + { + "epoch": 1.71, + "learning_rate": 8.903095558546433e-06, + "loss": 0.49, + "step": 10450 + }, + { + "epoch": 1.71, + "learning_rate": 8.891879766711531e-06, + "loss": 0.472, + "step": 10460 + }, + { + "epoch": 1.71, + "learning_rate": 8.880663974876626e-06, + "loss": 0.4827, + "step": 10470 + }, + { + "epoch": 1.71, + "learning_rate": 8.869448183041724e-06, + "loss": 0.4752, + "step": 10480 + }, + { + "epoch": 1.71, + "learning_rate": 8.85823239120682e-06, + "loss": 0.4922, + "step": 10490 + }, + { + "epoch": 1.71, + "learning_rate": 8.847016599371915e-06, + "loss": 0.487, + "step": 10500 + }, + { + "epoch": 1.72, + "learning_rate": 8.835800807537013e-06, + "loss": 0.4892, + "step": 10510 + }, + { + "epoch": 1.72, + "learning_rate": 8.82458501570211e-06, + "loss": 0.4788, + "step": 10520 + }, + { + "epoch": 1.72, + "learning_rate": 8.813369223867206e-06, + "loss": 0.473, + "step": 10530 + }, + { + "epoch": 1.72, + "learning_rate": 8.802153432032302e-06, + "loss": 0.4749, + "step": 10540 + }, + { + "epoch": 1.72, + "learning_rate": 8.790937640197399e-06, + "loss": 0.4792, + "step": 10550 + }, + { + "epoch": 1.72, + "learning_rate": 8.779721848362495e-06, + "loss": 0.4705, + "step": 10560 + }, + { + "epoch": 1.72, + "learning_rate": 8.768506056527591e-06, + "loss": 0.4854, + "step": 10570 + }, + { + "epoch": 1.73, + "learning_rate": 8.757290264692688e-06, + "loss": 0.49, + "step": 10580 + }, + { + "epoch": 1.73, + "learning_rate": 8.746074472857784e-06, + "loss": 0.4821, + "step": 10590 + }, + { + "epoch": 1.73, + "learning_rate": 8.73485868102288e-06, + "loss": 0.4762, + "step": 10600 + }, + { + "epoch": 1.73, + "learning_rate": 8.723642889187978e-06, + "loss": 0.4871, + "step": 10610 + }, + { + "epoch": 1.73, + "learning_rate": 8.712427097353073e-06, + "loss": 0.4743, + "step": 10620 + }, + { + "epoch": 1.73, + "learning_rate": 8.70121130551817e-06, + "loss": 0.4749, + "step": 10630 + }, + { + "epoch": 1.74, + "learning_rate": 8.689995513683268e-06, + "loss": 0.4855, + "step": 10640 + }, + { + "epoch": 1.74, + "learning_rate": 8.678779721848362e-06, + "loss": 0.4769, + "step": 10650 + }, + { + "epoch": 1.74, + "learning_rate": 8.66756393001346e-06, + "loss": 0.478, + "step": 10660 + }, + { + "epoch": 1.74, + "learning_rate": 8.656348138178557e-06, + "loss": 0.4818, + "step": 10670 + }, + { + "epoch": 1.74, + "learning_rate": 8.645132346343651e-06, + "loss": 0.469, + "step": 10680 + }, + { + "epoch": 1.74, + "learning_rate": 8.63391655450875e-06, + "loss": 0.4792, + "step": 10690 + }, + { + "epoch": 1.75, + "learning_rate": 8.622700762673846e-06, + "loss": 0.4864, + "step": 10700 + }, + { + "epoch": 1.75, + "learning_rate": 8.611484970838942e-06, + "loss": 0.4849, + "step": 10710 + }, + { + "epoch": 1.75, + "learning_rate": 8.600269179004038e-06, + "loss": 0.4867, + "step": 10720 + }, + { + "epoch": 1.75, + "learning_rate": 8.589053387169135e-06, + "loss": 0.4735, + "step": 10730 + }, + { + "epoch": 1.75, + "learning_rate": 8.577837595334231e-06, + "loss": 0.49, + "step": 10740 + }, + { + "epoch": 1.75, + "learning_rate": 8.566621803499327e-06, + "loss": 0.4854, + "step": 10750 + }, + { + "epoch": 1.76, + "learning_rate": 8.555406011664424e-06, + "loss": 0.4781, + "step": 10760 + }, + { + "epoch": 1.76, + "learning_rate": 8.54419021982952e-06, + "loss": 0.4723, + "step": 10770 + }, + { + "epoch": 1.76, + "learning_rate": 8.532974427994617e-06, + "loss": 0.4835, + "step": 10780 + }, + { + "epoch": 1.76, + "learning_rate": 8.521758636159715e-06, + "loss": 0.4752, + "step": 10790 + }, + { + "epoch": 1.76, + "learning_rate": 8.51054284432481e-06, + "loss": 0.4745, + "step": 10800 + }, + { + "epoch": 1.76, + "learning_rate": 8.499327052489906e-06, + "loss": 0.4869, + "step": 10810 + }, + { + "epoch": 1.77, + "learning_rate": 8.488111260655004e-06, + "loss": 0.4875, + "step": 10820 + }, + { + "epoch": 1.77, + "learning_rate": 8.476895468820098e-06, + "loss": 0.4837, + "step": 10830 + }, + { + "epoch": 1.77, + "learning_rate": 8.465679676985196e-06, + "loss": 0.477, + "step": 10840 + }, + { + "epoch": 1.77, + "learning_rate": 8.454463885150293e-06, + "loss": 0.4963, + "step": 10850 + }, + { + "epoch": 1.77, + "learning_rate": 8.443248093315389e-06, + "loss": 0.486, + "step": 10860 + }, + { + "epoch": 1.77, + "learning_rate": 8.432032301480485e-06, + "loss": 0.4843, + "step": 10870 + }, + { + "epoch": 1.78, + "learning_rate": 8.420816509645582e-06, + "loss": 0.4866, + "step": 10880 + }, + { + "epoch": 1.78, + "learning_rate": 8.409600717810678e-06, + "loss": 0.4873, + "step": 10890 + }, + { + "epoch": 1.78, + "learning_rate": 8.398384925975775e-06, + "loss": 0.4797, + "step": 10900 + }, + { + "epoch": 1.78, + "learning_rate": 8.387169134140871e-06, + "loss": 0.4702, + "step": 10910 + }, + { + "epoch": 1.78, + "learning_rate": 8.375953342305967e-06, + "loss": 0.4971, + "step": 10920 + }, + { + "epoch": 1.78, + "learning_rate": 8.364737550471064e-06, + "loss": 0.4778, + "step": 10930 + }, + { + "epoch": 1.79, + "learning_rate": 8.35352175863616e-06, + "loss": 0.4836, + "step": 10940 + }, + { + "epoch": 1.79, + "learning_rate": 8.342305966801258e-06, + "loss": 0.4871, + "step": 10950 + }, + { + "epoch": 1.79, + "learning_rate": 8.331090174966353e-06, + "loss": 0.4664, + "step": 10960 + }, + { + "epoch": 1.79, + "learning_rate": 8.31987438313145e-06, + "loss": 0.4833, + "step": 10970 + }, + { + "epoch": 1.79, + "learning_rate": 8.308658591296547e-06, + "loss": 0.4874, + "step": 10980 + }, + { + "epoch": 1.79, + "learning_rate": 8.297442799461642e-06, + "loss": 0.4779, + "step": 10990 + }, + { + "epoch": 1.8, + "learning_rate": 8.28622700762674e-06, + "loss": 0.4789, + "step": 11000 + }, + { + "epoch": 1.8, + "learning_rate": 8.275011215791836e-06, + "loss": 0.4619, + "step": 11010 + }, + { + "epoch": 1.8, + "learning_rate": 8.263795423956933e-06, + "loss": 0.4737, + "step": 11020 + }, + { + "epoch": 1.8, + "learning_rate": 8.252579632122029e-06, + "loss": 0.4787, + "step": 11030 + }, + { + "epoch": 1.8, + "learning_rate": 8.241363840287125e-06, + "loss": 0.4809, + "step": 11040 + }, + { + "epoch": 1.8, + "learning_rate": 8.230148048452222e-06, + "loss": 0.479, + "step": 11050 + }, + { + "epoch": 1.8, + "learning_rate": 8.218932256617318e-06, + "loss": 0.4817, + "step": 11060 + }, + { + "epoch": 1.81, + "learning_rate": 8.207716464782414e-06, + "loss": 0.4586, + "step": 11070 + }, + { + "epoch": 1.81, + "learning_rate": 8.19650067294751e-06, + "loss": 0.488, + "step": 11080 + }, + { + "epoch": 1.81, + "learning_rate": 8.185284881112607e-06, + "loss": 0.4714, + "step": 11090 + }, + { + "epoch": 1.81, + "learning_rate": 8.174069089277703e-06, + "loss": 0.4828, + "step": 11100 + }, + { + "epoch": 1.81, + "learning_rate": 8.1628532974428e-06, + "loss": 0.4935, + "step": 11110 + }, + { + "epoch": 1.81, + "learning_rate": 8.151637505607896e-06, + "loss": 0.4799, + "step": 11120 + }, + { + "epoch": 1.82, + "learning_rate": 8.140421713772994e-06, + "loss": 0.483, + "step": 11130 + }, + { + "epoch": 1.82, + "learning_rate": 8.129205921938089e-06, + "loss": 0.4798, + "step": 11140 + }, + { + "epoch": 1.82, + "learning_rate": 8.117990130103187e-06, + "loss": 0.4727, + "step": 11150 + }, + { + "epoch": 1.82, + "learning_rate": 8.106774338268283e-06, + "loss": 0.4746, + "step": 11160 + }, + { + "epoch": 1.82, + "learning_rate": 8.095558546433378e-06, + "loss": 0.4844, + "step": 11170 + }, + { + "epoch": 1.82, + "learning_rate": 8.084342754598476e-06, + "loss": 0.4718, + "step": 11180 + }, + { + "epoch": 1.83, + "learning_rate": 8.073126962763572e-06, + "loss": 0.4758, + "step": 11190 + }, + { + "epoch": 1.83, + "learning_rate": 8.061911170928669e-06, + "loss": 0.4762, + "step": 11200 + }, + { + "epoch": 1.83, + "learning_rate": 8.050695379093765e-06, + "loss": 0.4765, + "step": 11210 + }, + { + "epoch": 1.83, + "learning_rate": 8.039479587258861e-06, + "loss": 0.4748, + "step": 11220 + }, + { + "epoch": 1.83, + "learning_rate": 8.028263795423958e-06, + "loss": 0.481, + "step": 11230 + }, + { + "epoch": 1.83, + "learning_rate": 8.017048003589054e-06, + "loss": 0.4715, + "step": 11240 + }, + { + "epoch": 1.84, + "learning_rate": 8.00583221175415e-06, + "loss": 0.4802, + "step": 11250 + }, + { + "epoch": 1.84, + "learning_rate": 7.994616419919247e-06, + "loss": 0.4812, + "step": 11260 + }, + { + "epoch": 1.84, + "learning_rate": 7.983400628084343e-06, + "loss": 0.4891, + "step": 11270 + }, + { + "epoch": 1.84, + "learning_rate": 7.97218483624944e-06, + "loss": 0.4914, + "step": 11280 + }, + { + "epoch": 1.84, + "learning_rate": 7.960969044414536e-06, + "loss": 0.4749, + "step": 11290 + }, + { + "epoch": 1.84, + "learning_rate": 7.949753252579632e-06, + "loss": 0.4697, + "step": 11300 + }, + { + "epoch": 1.85, + "learning_rate": 7.93853746074473e-06, + "loss": 0.4848, + "step": 11310 + }, + { + "epoch": 1.85, + "learning_rate": 7.927321668909825e-06, + "loss": 0.4861, + "step": 11320 + }, + { + "epoch": 1.85, + "learning_rate": 7.916105877074923e-06, + "loss": 0.4813, + "step": 11330 + }, + { + "epoch": 1.85, + "learning_rate": 7.90489008524002e-06, + "loss": 0.4799, + "step": 11340 + }, + { + "epoch": 1.85, + "learning_rate": 7.893674293405114e-06, + "loss": 0.4856, + "step": 11350 + }, + { + "epoch": 1.85, + "learning_rate": 7.882458501570212e-06, + "loss": 0.4823, + "step": 11360 + }, + { + "epoch": 1.86, + "learning_rate": 7.871242709735308e-06, + "loss": 0.4806, + "step": 11370 + }, + { + "epoch": 1.86, + "learning_rate": 7.860026917900405e-06, + "loss": 0.4802, + "step": 11380 + }, + { + "epoch": 1.86, + "learning_rate": 7.848811126065501e-06, + "loss": 0.489, + "step": 11390 + }, + { + "epoch": 1.86, + "learning_rate": 7.837595334230597e-06, + "loss": 0.4811, + "step": 11400 + }, + { + "epoch": 1.86, + "learning_rate": 7.826379542395694e-06, + "loss": 0.4777, + "step": 11410 + }, + { + "epoch": 1.86, + "learning_rate": 7.81516375056079e-06, + "loss": 0.4803, + "step": 11420 + }, + { + "epoch": 1.87, + "learning_rate": 7.803947958725887e-06, + "loss": 0.4757, + "step": 11430 + }, + { + "epoch": 1.87, + "learning_rate": 7.792732166890983e-06, + "loss": 0.4696, + "step": 11440 + }, + { + "epoch": 1.87, + "learning_rate": 7.78151637505608e-06, + "loss": 0.4754, + "step": 11450 + }, + { + "epoch": 1.87, + "learning_rate": 7.770300583221176e-06, + "loss": 0.4764, + "step": 11460 + }, + { + "epoch": 1.87, + "learning_rate": 7.759084791386272e-06, + "loss": 0.4717, + "step": 11470 + }, + { + "epoch": 1.87, + "learning_rate": 7.747868999551368e-06, + "loss": 0.4794, + "step": 11480 + }, + { + "epoch": 1.88, + "learning_rate": 7.736653207716466e-06, + "loss": 0.4758, + "step": 11490 + }, + { + "epoch": 1.88, + "learning_rate": 7.725437415881561e-06, + "loss": 0.4767, + "step": 11500 + }, + { + "epoch": 1.88, + "learning_rate": 7.714221624046659e-06, + "loss": 0.4745, + "step": 11510 + }, + { + "epoch": 1.88, + "learning_rate": 7.703005832211755e-06, + "loss": 0.475, + "step": 11520 + }, + { + "epoch": 1.88, + "learning_rate": 7.69179004037685e-06, + "loss": 0.4748, + "step": 11530 + }, + { + "epoch": 1.88, + "learning_rate": 7.680574248541948e-06, + "loss": 0.4808, + "step": 11540 + }, + { + "epoch": 1.88, + "learning_rate": 7.669358456707045e-06, + "loss": 0.4655, + "step": 11550 + }, + { + "epoch": 1.89, + "learning_rate": 7.658142664872141e-06, + "loss": 0.4846, + "step": 11560 + }, + { + "epoch": 1.89, + "learning_rate": 7.646926873037237e-06, + "loss": 0.4688, + "step": 11570 + }, + { + "epoch": 1.89, + "learning_rate": 7.635711081202334e-06, + "loss": 0.4665, + "step": 11580 + }, + { + "epoch": 1.89, + "learning_rate": 7.62449528936743e-06, + "loss": 0.4782, + "step": 11590 + }, + { + "epoch": 1.89, + "learning_rate": 7.613279497532526e-06, + "loss": 0.4785, + "step": 11600 + }, + { + "epoch": 1.89, + "learning_rate": 7.6020637056976235e-06, + "loss": 0.4821, + "step": 11610 + }, + { + "epoch": 1.9, + "learning_rate": 7.590847913862719e-06, + "loss": 0.4835, + "step": 11620 + }, + { + "epoch": 1.9, + "learning_rate": 7.579632122027815e-06, + "loss": 0.4875, + "step": 11630 + }, + { + "epoch": 1.9, + "learning_rate": 7.568416330192913e-06, + "loss": 0.4837, + "step": 11640 + }, + { + "epoch": 1.9, + "learning_rate": 7.557200538358008e-06, + "loss": 0.4819, + "step": 11650 + }, + { + "epoch": 1.9, + "learning_rate": 7.545984746523105e-06, + "loss": 0.4704, + "step": 11660 + }, + { + "epoch": 1.9, + "learning_rate": 7.534768954688202e-06, + "loss": 0.477, + "step": 11670 + }, + { + "epoch": 1.91, + "learning_rate": 7.523553162853298e-06, + "loss": 0.4685, + "step": 11680 + }, + { + "epoch": 1.91, + "learning_rate": 7.512337371018394e-06, + "loss": 0.4822, + "step": 11690 + }, + { + "epoch": 1.91, + "learning_rate": 7.5011215791834916e-06, + "loss": 0.4747, + "step": 11700 + }, + { + "epoch": 1.91, + "learning_rate": 7.489905787348587e-06, + "loss": 0.4768, + "step": 11710 + }, + { + "epoch": 1.91, + "learning_rate": 7.4786899955136834e-06, + "loss": 0.4722, + "step": 11720 + }, + { + "epoch": 1.91, + "learning_rate": 7.467474203678781e-06, + "loss": 0.4909, + "step": 11730 + }, + { + "epoch": 1.92, + "learning_rate": 7.456258411843876e-06, + "loss": 0.4749, + "step": 11740 + }, + { + "epoch": 1.92, + "learning_rate": 7.445042620008973e-06, + "loss": 0.4766, + "step": 11750 + }, + { + "epoch": 1.92, + "learning_rate": 7.43382682817407e-06, + "loss": 0.4767, + "step": 11760 + }, + { + "epoch": 1.92, + "learning_rate": 7.422611036339166e-06, + "loss": 0.4737, + "step": 11770 + }, + { + "epoch": 1.92, + "learning_rate": 7.4113952445042624e-06, + "loss": 0.4827, + "step": 11780 + }, + { + "epoch": 1.92, + "learning_rate": 7.40017945266936e-06, + "loss": 0.4819, + "step": 11790 + }, + { + "epoch": 1.93, + "learning_rate": 7.388963660834455e-06, + "loss": 0.4827, + "step": 11800 + }, + { + "epoch": 1.93, + "learning_rate": 7.3777478689995515e-06, + "loss": 0.4795, + "step": 11810 + }, + { + "epoch": 1.93, + "learning_rate": 7.366532077164649e-06, + "loss": 0.4636, + "step": 11820 + }, + { + "epoch": 1.93, + "learning_rate": 7.355316285329744e-06, + "loss": 0.4824, + "step": 11830 + }, + { + "epoch": 1.93, + "learning_rate": 7.344100493494841e-06, + "loss": 0.4732, + "step": 11840 + }, + { + "epoch": 1.93, + "learning_rate": 7.332884701659938e-06, + "loss": 0.4798, + "step": 11850 + }, + { + "epoch": 1.94, + "learning_rate": 7.321668909825034e-06, + "loss": 0.4813, + "step": 11860 + }, + { + "epoch": 1.94, + "learning_rate": 7.3104531179901305e-06, + "loss": 0.4709, + "step": 11870 + }, + { + "epoch": 1.94, + "learning_rate": 7.299237326155228e-06, + "loss": 0.4833, + "step": 11880 + }, + { + "epoch": 1.94, + "learning_rate": 7.288021534320323e-06, + "loss": 0.4772, + "step": 11890 + }, + { + "epoch": 1.94, + "learning_rate": 7.2768057424854196e-06, + "loss": 0.4654, + "step": 11900 + }, + { + "epoch": 1.94, + "learning_rate": 7.265589950650517e-06, + "loss": 0.4779, + "step": 11910 + }, + { + "epoch": 1.95, + "learning_rate": 7.254374158815612e-06, + "loss": 0.4738, + "step": 11920 + }, + { + "epoch": 1.95, + "learning_rate": 7.2431583669807095e-06, + "loss": 0.4783, + "step": 11930 + }, + { + "epoch": 1.95, + "learning_rate": 7.231942575145806e-06, + "loss": 0.4798, + "step": 11940 + }, + { + "epoch": 1.95, + "learning_rate": 7.220726783310902e-06, + "loss": 0.4845, + "step": 11950 + }, + { + "epoch": 1.95, + "learning_rate": 7.2095109914759986e-06, + "loss": 0.4701, + "step": 11960 + }, + { + "epoch": 1.95, + "learning_rate": 7.198295199641096e-06, + "loss": 0.4759, + "step": 11970 + }, + { + "epoch": 1.95, + "learning_rate": 7.187079407806191e-06, + "loss": 0.4781, + "step": 11980 + }, + { + "epoch": 1.96, + "learning_rate": 7.175863615971288e-06, + "loss": 0.4689, + "step": 11990 + }, + { + "epoch": 1.96, + "learning_rate": 7.164647824136385e-06, + "loss": 0.4796, + "step": 12000 + }, + { + "epoch": 1.96, + "learning_rate": 7.15343203230148e-06, + "loss": 0.4801, + "step": 12010 + }, + { + "epoch": 1.96, + "learning_rate": 7.1422162404665775e-06, + "loss": 0.4685, + "step": 12020 + }, + { + "epoch": 1.96, + "learning_rate": 7.131000448631674e-06, + "loss": 0.4834, + "step": 12030 + }, + { + "epoch": 1.96, + "learning_rate": 7.11978465679677e-06, + "loss": 0.474, + "step": 12040 + }, + { + "epoch": 1.97, + "learning_rate": 7.108568864961867e-06, + "loss": 0.4703, + "step": 12050 + }, + { + "epoch": 1.97, + "learning_rate": 7.097353073126964e-06, + "loss": 0.4895, + "step": 12060 + }, + { + "epoch": 1.97, + "learning_rate": 7.086137281292059e-06, + "loss": 0.4765, + "step": 12070 + }, + { + "epoch": 1.97, + "learning_rate": 7.074921489457156e-06, + "loss": 0.4661, + "step": 12080 + }, + { + "epoch": 1.97, + "learning_rate": 7.063705697622253e-06, + "loss": 0.4753, + "step": 12090 + }, + { + "epoch": 1.97, + "learning_rate": 7.05248990578735e-06, + "loss": 0.4835, + "step": 12100 + }, + { + "epoch": 1.98, + "learning_rate": 7.041274113952446e-06, + "loss": 0.4747, + "step": 12110 + }, + { + "epoch": 1.98, + "learning_rate": 7.030058322117542e-06, + "loss": 0.4751, + "step": 12120 + }, + { + "epoch": 1.98, + "learning_rate": 7.018842530282639e-06, + "loss": 0.4702, + "step": 12130 + }, + { + "epoch": 1.98, + "learning_rate": 7.007626738447735e-06, + "loss": 0.4744, + "step": 12140 + }, + { + "epoch": 1.98, + "learning_rate": 6.996410946612832e-06, + "loss": 0.4729, + "step": 12150 + }, + { + "epoch": 1.98, + "learning_rate": 6.985195154777928e-06, + "loss": 0.4689, + "step": 12160 + }, + { + "epoch": 1.99, + "learning_rate": 6.973979362943024e-06, + "loss": 0.4664, + "step": 12170 + }, + { + "epoch": 1.99, + "learning_rate": 6.962763571108121e-06, + "loss": 0.4741, + "step": 12180 + }, + { + "epoch": 1.99, + "learning_rate": 6.951547779273218e-06, + "loss": 0.4676, + "step": 12190 + }, + { + "epoch": 1.99, + "learning_rate": 6.940331987438314e-06, + "loss": 0.4841, + "step": 12200 + }, + { + "epoch": 1.99, + "learning_rate": 6.92911619560341e-06, + "loss": 0.4848, + "step": 12210 + }, + { + "epoch": 1.99, + "learning_rate": 6.917900403768507e-06, + "loss": 0.4792, + "step": 12220 + }, + { + "epoch": 2.0, + "learning_rate": 6.906684611933603e-06, + "loss": 0.4863, + "step": 12230 + }, + { + "epoch": 2.0, + "learning_rate": 6.8954688200987e-06, + "loss": 0.4789, + "step": 12240 + }, + { + "epoch": 2.0, + "learning_rate": 6.884253028263796e-06, + "loss": 0.4726, + "step": 12250 + }, + { + "epoch": 2.0, + "learning_rate": 6.873037236428892e-06, + "loss": 0.4762, + "step": 12260 + }, + { + "epoch": 2.0, + "learning_rate": 6.861821444593989e-06, + "loss": 0.4746, + "step": 12270 + }, + { + "epoch": 2.0, + "learning_rate": 6.850605652759086e-06, + "loss": 0.4637, + "step": 12280 + }, + { + "epoch": 2.01, + "learning_rate": 6.839389860924182e-06, + "loss": 0.4619, + "step": 12290 + }, + { + "epoch": 2.01, + "learning_rate": 6.828174069089278e-06, + "loss": 0.4725, + "step": 12300 + }, + { + "epoch": 2.01, + "learning_rate": 6.816958277254375e-06, + "loss": 0.4737, + "step": 12310 + }, + { + "epoch": 2.01, + "learning_rate": 6.805742485419471e-06, + "loss": 0.4792, + "step": 12320 + }, + { + "epoch": 2.01, + "learning_rate": 6.794526693584568e-06, + "loss": 0.4797, + "step": 12330 + }, + { + "epoch": 2.01, + "learning_rate": 6.783310901749664e-06, + "loss": 0.4707, + "step": 12340 + }, + { + "epoch": 2.02, + "learning_rate": 6.77209510991476e-06, + "loss": 0.4752, + "step": 12350 + }, + { + "epoch": 2.02, + "learning_rate": 6.760879318079857e-06, + "loss": 0.4748, + "step": 12360 + }, + { + "epoch": 2.02, + "learning_rate": 6.749663526244954e-06, + "loss": 0.471, + "step": 12370 + }, + { + "epoch": 2.02, + "learning_rate": 6.73844773441005e-06, + "loss": 0.475, + "step": 12380 + }, + { + "epoch": 2.02, + "learning_rate": 6.727231942575146e-06, + "loss": 0.4686, + "step": 12390 + }, + { + "epoch": 2.02, + "learning_rate": 6.716016150740243e-06, + "loss": 0.4706, + "step": 12400 + }, + { + "epoch": 2.03, + "learning_rate": 6.704800358905339e-06, + "loss": 0.4615, + "step": 12410 + }, + { + "epoch": 2.03, + "learning_rate": 6.693584567070436e-06, + "loss": 0.4762, + "step": 12420 + }, + { + "epoch": 2.03, + "learning_rate": 6.6823687752355324e-06, + "loss": 0.4815, + "step": 12430 + }, + { + "epoch": 2.03, + "learning_rate": 6.671152983400628e-06, + "loss": 0.477, + "step": 12440 + }, + { + "epoch": 2.03, + "learning_rate": 6.659937191565725e-06, + "loss": 0.4645, + "step": 12450 + }, + { + "epoch": 2.03, + "learning_rate": 6.648721399730822e-06, + "loss": 0.4692, + "step": 12460 + }, + { + "epoch": 2.03, + "learning_rate": 6.637505607895918e-06, + "loss": 0.4739, + "step": 12470 + }, + { + "epoch": 2.04, + "learning_rate": 6.626289816061014e-06, + "loss": 0.4704, + "step": 12480 + }, + { + "epoch": 2.04, + "learning_rate": 6.615074024226111e-06, + "loss": 0.4788, + "step": 12490 + }, + { + "epoch": 2.04, + "learning_rate": 6.603858232391207e-06, + "loss": 0.4717, + "step": 12500 + }, + { + "epoch": 2.04, + "learning_rate": 6.592642440556304e-06, + "loss": 0.4595, + "step": 12510 + }, + { + "epoch": 2.04, + "learning_rate": 6.5814266487214005e-06, + "loss": 0.4764, + "step": 12520 + }, + { + "epoch": 2.04, + "learning_rate": 6.570210856886496e-06, + "loss": 0.4757, + "step": 12530 + }, + { + "epoch": 2.05, + "learning_rate": 6.558995065051593e-06, + "loss": 0.4757, + "step": 12540 + }, + { + "epoch": 2.05, + "learning_rate": 6.54777927321669e-06, + "loss": 0.474, + "step": 12550 + }, + { + "epoch": 2.05, + "learning_rate": 6.536563481381786e-06, + "loss": 0.4786, + "step": 12560 + }, + { + "epoch": 2.05, + "learning_rate": 6.525347689546882e-06, + "loss": 0.4662, + "step": 12570 + }, + { + "epoch": 2.05, + "learning_rate": 6.5141318977119795e-06, + "loss": 0.4684, + "step": 12580 + }, + { + "epoch": 2.05, + "learning_rate": 6.502916105877075e-06, + "loss": 0.4727, + "step": 12590 + }, + { + "epoch": 2.06, + "learning_rate": 6.491700314042172e-06, + "loss": 0.4913, + "step": 12600 + }, + { + "epoch": 2.06, + "learning_rate": 6.4804845222072685e-06, + "loss": 0.4618, + "step": 12610 + }, + { + "epoch": 2.06, + "learning_rate": 6.469268730372364e-06, + "loss": 0.4695, + "step": 12620 + }, + { + "epoch": 2.06, + "learning_rate": 6.458052938537461e-06, + "loss": 0.4704, + "step": 12630 + }, + { + "epoch": 2.06, + "learning_rate": 6.4468371467025585e-06, + "loss": 0.4812, + "step": 12640 + }, + { + "epoch": 2.06, + "learning_rate": 6.435621354867654e-06, + "loss": 0.4707, + "step": 12650 + }, + { + "epoch": 2.07, + "learning_rate": 6.42440556303275e-06, + "loss": 0.4845, + "step": 12660 + }, + { + "epoch": 2.07, + "learning_rate": 6.4131897711978475e-06, + "loss": 0.469, + "step": 12670 + }, + { + "epoch": 2.07, + "learning_rate": 6.401973979362943e-06, + "loss": 0.4649, + "step": 12680 + }, + { + "epoch": 2.07, + "learning_rate": 6.39075818752804e-06, + "loss": 0.4684, + "step": 12690 + }, + { + "epoch": 2.07, + "learning_rate": 6.379542395693137e-06, + "loss": 0.4688, + "step": 12700 + }, + { + "epoch": 2.07, + "learning_rate": 6.368326603858232e-06, + "loss": 0.4748, + "step": 12710 + }, + { + "epoch": 2.08, + "learning_rate": 6.357110812023329e-06, + "loss": 0.4631, + "step": 12720 + }, + { + "epoch": 2.08, + "learning_rate": 6.3458950201884265e-06, + "loss": 0.4814, + "step": 12730 + }, + { + "epoch": 2.08, + "learning_rate": 6.334679228353522e-06, + "loss": 0.4598, + "step": 12740 + }, + { + "epoch": 2.08, + "learning_rate": 6.323463436518618e-06, + "loss": 0.4807, + "step": 12750 + }, + { + "epoch": 2.08, + "learning_rate": 6.312247644683716e-06, + "loss": 0.4775, + "step": 12760 + }, + { + "epoch": 2.08, + "learning_rate": 6.301031852848811e-06, + "loss": 0.464, + "step": 12770 + }, + { + "epoch": 2.09, + "learning_rate": 6.289816061013908e-06, + "loss": 0.4724, + "step": 12780 + }, + { + "epoch": 2.09, + "learning_rate": 6.278600269179005e-06, + "loss": 0.4737, + "step": 12790 + }, + { + "epoch": 2.09, + "learning_rate": 6.2673844773441e-06, + "loss": 0.4737, + "step": 12800 + }, + { + "epoch": 2.09, + "learning_rate": 6.256168685509197e-06, + "loss": 0.4748, + "step": 12810 + }, + { + "epoch": 2.09, + "learning_rate": 6.244952893674295e-06, + "loss": 0.4736, + "step": 12820 + }, + { + "epoch": 2.09, + "learning_rate": 6.23373710183939e-06, + "loss": 0.483, + "step": 12830 + }, + { + "epoch": 2.1, + "learning_rate": 6.2225213100044865e-06, + "loss": 0.4687, + "step": 12840 + }, + { + "epoch": 2.1, + "learning_rate": 6.211305518169584e-06, + "loss": 0.4716, + "step": 12850 + }, + { + "epoch": 2.1, + "learning_rate": 6.200089726334679e-06, + "loss": 0.4723, + "step": 12860 + }, + { + "epoch": 2.1, + "learning_rate": 6.188873934499776e-06, + "loss": 0.4714, + "step": 12870 + }, + { + "epoch": 2.1, + "learning_rate": 6.177658142664873e-06, + "loss": 0.4782, + "step": 12880 + }, + { + "epoch": 2.1, + "learning_rate": 6.166442350829969e-06, + "loss": 0.4804, + "step": 12890 + }, + { + "epoch": 2.11, + "learning_rate": 6.1552265589950654e-06, + "loss": 0.4793, + "step": 12900 + }, + { + "epoch": 2.11, + "learning_rate": 6.144010767160163e-06, + "loss": 0.4639, + "step": 12910 + }, + { + "epoch": 2.11, + "learning_rate": 6.132794975325258e-06, + "loss": 0.4628, + "step": 12920 + }, + { + "epoch": 2.11, + "learning_rate": 6.1215791834903545e-06, + "loss": 0.4746, + "step": 12930 + }, + { + "epoch": 2.11, + "learning_rate": 6.110363391655452e-06, + "loss": 0.4625, + "step": 12940 + }, + { + "epoch": 2.11, + "learning_rate": 6.099147599820547e-06, + "loss": 0.4649, + "step": 12950 + }, + { + "epoch": 2.11, + "learning_rate": 6.0879318079856444e-06, + "loss": 0.4696, + "step": 12960 + }, + { + "epoch": 2.12, + "learning_rate": 6.076716016150741e-06, + "loss": 0.4631, + "step": 12970 + }, + { + "epoch": 2.12, + "learning_rate": 6.065500224315837e-06, + "loss": 0.4634, + "step": 12980 + }, + { + "epoch": 2.12, + "learning_rate": 6.0542844324809335e-06, + "loss": 0.4702, + "step": 12990 + }, + { + "epoch": 2.12, + "learning_rate": 6.043068640646031e-06, + "loss": 0.4643, + "step": 13000 + }, + { + "epoch": 2.12, + "learning_rate": 6.031852848811126e-06, + "loss": 0.4724, + "step": 13010 + }, + { + "epoch": 2.12, + "learning_rate": 6.020637056976223e-06, + "loss": 0.4738, + "step": 13020 + }, + { + "epoch": 2.13, + "learning_rate": 6.00942126514132e-06, + "loss": 0.4656, + "step": 13030 + }, + { + "epoch": 2.13, + "learning_rate": 5.998205473306415e-06, + "loss": 0.4733, + "step": 13040 + }, + { + "epoch": 2.13, + "learning_rate": 5.9869896814715125e-06, + "loss": 0.4786, + "step": 13050 + }, + { + "epoch": 2.13, + "learning_rate": 5.975773889636609e-06, + "loss": 0.4657, + "step": 13060 + }, + { + "epoch": 2.13, + "learning_rate": 5.964558097801705e-06, + "loss": 0.4794, + "step": 13070 + }, + { + "epoch": 2.13, + "learning_rate": 5.9533423059668016e-06, + "loss": 0.4775, + "step": 13080 + }, + { + "epoch": 2.14, + "learning_rate": 5.942126514131899e-06, + "loss": 0.4779, + "step": 13090 + }, + { + "epoch": 2.14, + "learning_rate": 5.930910722296994e-06, + "loss": 0.4712, + "step": 13100 + }, + { + "epoch": 2.14, + "learning_rate": 5.919694930462091e-06, + "loss": 0.467, + "step": 13110 + }, + { + "epoch": 2.14, + "learning_rate": 5.908479138627188e-06, + "loss": 0.4607, + "step": 13120 + }, + { + "epoch": 2.14, + "learning_rate": 5.897263346792283e-06, + "loss": 0.4673, + "step": 13130 + }, + { + "epoch": 2.14, + "learning_rate": 5.8860475549573806e-06, + "loss": 0.459, + "step": 13140 + }, + { + "epoch": 2.15, + "learning_rate": 5.874831763122477e-06, + "loss": 0.4771, + "step": 13150 + }, + { + "epoch": 2.15, + "learning_rate": 5.863615971287573e-06, + "loss": 0.4767, + "step": 13160 + }, + { + "epoch": 2.15, + "learning_rate": 5.85240017945267e-06, + "loss": 0.464, + "step": 13170 + }, + { + "epoch": 2.15, + "learning_rate": 5.841184387617767e-06, + "loss": 0.4724, + "step": 13180 + }, + { + "epoch": 2.15, + "learning_rate": 5.829968595782862e-06, + "loss": 0.464, + "step": 13190 + }, + { + "epoch": 2.15, + "learning_rate": 5.818752803947959e-06, + "loss": 0.4793, + "step": 13200 + }, + { + "epoch": 2.16, + "learning_rate": 5.807537012113056e-06, + "loss": 0.4777, + "step": 13210 + }, + { + "epoch": 2.16, + "learning_rate": 5.796321220278151e-06, + "loss": 0.4756, + "step": 13220 + }, + { + "epoch": 2.16, + "learning_rate": 5.785105428443249e-06, + "loss": 0.4695, + "step": 13230 + }, + { + "epoch": 2.16, + "learning_rate": 5.773889636608345e-06, + "loss": 0.4699, + "step": 13240 + }, + { + "epoch": 2.16, + "learning_rate": 5.762673844773441e-06, + "loss": 0.4676, + "step": 13250 + }, + { + "epoch": 2.16, + "learning_rate": 5.751458052938538e-06, + "loss": 0.4714, + "step": 13260 + }, + { + "epoch": 2.17, + "learning_rate": 5.740242261103635e-06, + "loss": 0.475, + "step": 13270 + }, + { + "epoch": 2.17, + "learning_rate": 5.72902646926873e-06, + "loss": 0.4803, + "step": 13280 + }, + { + "epoch": 2.17, + "learning_rate": 5.717810677433827e-06, + "loss": 0.4757, + "step": 13290 + }, + { + "epoch": 2.17, + "learning_rate": 5.706594885598924e-06, + "loss": 0.4742, + "step": 13300 + }, + { + "epoch": 2.17, + "learning_rate": 5.6953790937640195e-06, + "loss": 0.4717, + "step": 13310 + }, + { + "epoch": 2.17, + "learning_rate": 5.684163301929117e-06, + "loss": 0.4766, + "step": 13320 + }, + { + "epoch": 2.18, + "learning_rate": 5.672947510094213e-06, + "loss": 0.4793, + "step": 13330 + }, + { + "epoch": 2.18, + "learning_rate": 5.66173171825931e-06, + "loss": 0.4717, + "step": 13340 + }, + { + "epoch": 2.18, + "learning_rate": 5.650515926424406e-06, + "loss": 0.4591, + "step": 13350 + }, + { + "epoch": 2.18, + "learning_rate": 5.639300134589503e-06, + "loss": 0.463, + "step": 13360 + }, + { + "epoch": 2.18, + "learning_rate": 5.628084342754599e-06, + "loss": 0.4669, + "step": 13370 + }, + { + "epoch": 2.18, + "learning_rate": 5.616868550919695e-06, + "loss": 0.4773, + "step": 13380 + }, + { + "epoch": 2.19, + "learning_rate": 5.605652759084792e-06, + "loss": 0.4642, + "step": 13390 + }, + { + "epoch": 2.19, + "learning_rate": 5.594436967249889e-06, + "loss": 0.4689, + "step": 13400 + }, + { + "epoch": 2.19, + "learning_rate": 5.583221175414985e-06, + "loss": 0.4697, + "step": 13410 + }, + { + "epoch": 2.19, + "learning_rate": 5.572005383580081e-06, + "loss": 0.4627, + "step": 13420 + }, + { + "epoch": 2.19, + "learning_rate": 5.560789591745178e-06, + "loss": 0.4595, + "step": 13430 + }, + { + "epoch": 2.19, + "learning_rate": 5.549573799910274e-06, + "loss": 0.4661, + "step": 13440 + }, + { + "epoch": 2.19, + "learning_rate": 5.538358008075371e-06, + "loss": 0.4616, + "step": 13450 + }, + { + "epoch": 2.2, + "learning_rate": 5.527142216240467e-06, + "loss": 0.4599, + "step": 13460 + }, + { + "epoch": 2.2, + "learning_rate": 5.515926424405563e-06, + "loss": 0.4659, + "step": 13470 + }, + { + "epoch": 2.2, + "learning_rate": 5.50471063257066e-06, + "loss": 0.4645, + "step": 13480 + }, + { + "epoch": 2.2, + "learning_rate": 5.493494840735757e-06, + "loss": 0.4646, + "step": 13490 + }, + { + "epoch": 2.2, + "learning_rate": 5.482279048900853e-06, + "loss": 0.4703, + "step": 13500 + }, + { + "epoch": 2.2, + "learning_rate": 5.471063257065949e-06, + "loss": 0.4717, + "step": 13510 + }, + { + "epoch": 2.21, + "learning_rate": 5.459847465231046e-06, + "loss": 0.4822, + "step": 13520 + }, + { + "epoch": 2.21, + "learning_rate": 5.448631673396142e-06, + "loss": 0.4635, + "step": 13530 + }, + { + "epoch": 2.21, + "learning_rate": 5.437415881561239e-06, + "loss": 0.4709, + "step": 13540 + }, + { + "epoch": 2.21, + "learning_rate": 5.4262000897263354e-06, + "loss": 0.4734, + "step": 13550 + }, + { + "epoch": 2.21, + "learning_rate": 5.414984297891431e-06, + "loss": 0.4664, + "step": 13560 + }, + { + "epoch": 2.21, + "learning_rate": 5.403768506056528e-06, + "loss": 0.472, + "step": 13570 + }, + { + "epoch": 2.22, + "learning_rate": 5.392552714221625e-06, + "loss": 0.4685, + "step": 13580 + }, + { + "epoch": 2.22, + "learning_rate": 5.381336922386721e-06, + "loss": 0.4605, + "step": 13590 + }, + { + "epoch": 2.22, + "learning_rate": 5.370121130551817e-06, + "loss": 0.4594, + "step": 13600 + }, + { + "epoch": 2.22, + "learning_rate": 5.3589053387169144e-06, + "loss": 0.4852, + "step": 13610 + }, + { + "epoch": 2.22, + "learning_rate": 5.34768954688201e-06, + "loss": 0.4696, + "step": 13620 + }, + { + "epoch": 2.22, + "learning_rate": 5.336473755047107e-06, + "loss": 0.4669, + "step": 13630 + }, + { + "epoch": 2.23, + "learning_rate": 5.3252579632122035e-06, + "loss": 0.4792, + "step": 13640 + }, + { + "epoch": 2.23, + "learning_rate": 5.314042171377299e-06, + "loss": 0.461, + "step": 13650 + }, + { + "epoch": 2.23, + "learning_rate": 5.302826379542396e-06, + "loss": 0.4772, + "step": 13660 + }, + { + "epoch": 2.23, + "learning_rate": 5.2916105877074934e-06, + "loss": 0.4671, + "step": 13670 + }, + { + "epoch": 2.23, + "learning_rate": 5.280394795872589e-06, + "loss": 0.4792, + "step": 13680 + }, + { + "epoch": 2.23, + "learning_rate": 5.269179004037685e-06, + "loss": 0.4604, + "step": 13690 + }, + { + "epoch": 2.24, + "learning_rate": 5.2579632122027825e-06, + "loss": 0.4778, + "step": 13700 + }, + { + "epoch": 2.24, + "learning_rate": 5.246747420367878e-06, + "loss": 0.4726, + "step": 13710 + }, + { + "epoch": 2.24, + "learning_rate": 5.235531628532975e-06, + "loss": 0.4681, + "step": 13720 + }, + { + "epoch": 2.24, + "learning_rate": 5.2243158366980716e-06, + "loss": 0.4716, + "step": 13730 + }, + { + "epoch": 2.24, + "learning_rate": 5.213100044863167e-06, + "loss": 0.4725, + "step": 13740 + }, + { + "epoch": 2.24, + "learning_rate": 5.201884253028264e-06, + "loss": 0.4678, + "step": 13750 + }, + { + "epoch": 2.25, + "learning_rate": 5.1906684611933615e-06, + "loss": 0.4672, + "step": 13760 + }, + { + "epoch": 2.25, + "learning_rate": 5.179452669358457e-06, + "loss": 0.4768, + "step": 13770 + }, + { + "epoch": 2.25, + "learning_rate": 5.168236877523553e-06, + "loss": 0.4675, + "step": 13780 + }, + { + "epoch": 2.25, + "learning_rate": 5.1570210856886506e-06, + "loss": 0.4757, + "step": 13790 + }, + { + "epoch": 2.25, + "learning_rate": 5.145805293853746e-06, + "loss": 0.472, + "step": 13800 + }, + { + "epoch": 2.25, + "learning_rate": 5.134589502018843e-06, + "loss": 0.4828, + "step": 13810 + }, + { + "epoch": 2.26, + "learning_rate": 5.12337371018394e-06, + "loss": 0.4626, + "step": 13820 + }, + { + "epoch": 2.26, + "learning_rate": 5.112157918349035e-06, + "loss": 0.4668, + "step": 13830 + }, + { + "epoch": 2.26, + "learning_rate": 5.100942126514132e-06, + "loss": 0.4738, + "step": 13840 + }, + { + "epoch": 2.26, + "learning_rate": 5.0897263346792296e-06, + "loss": 0.4797, + "step": 13850 + }, + { + "epoch": 2.26, + "learning_rate": 5.078510542844325e-06, + "loss": 0.4635, + "step": 13860 + }, + { + "epoch": 2.26, + "learning_rate": 5.067294751009421e-06, + "loss": 0.482, + "step": 13870 + }, + { + "epoch": 2.27, + "learning_rate": 5.056078959174519e-06, + "loss": 0.4805, + "step": 13880 + }, + { + "epoch": 2.27, + "learning_rate": 5.044863167339614e-06, + "loss": 0.4657, + "step": 13890 + }, + { + "epoch": 2.27, + "learning_rate": 5.033647375504711e-06, + "loss": 0.4603, + "step": 13900 + }, + { + "epoch": 2.27, + "learning_rate": 5.022431583669808e-06, + "loss": 0.46, + "step": 13910 + }, + { + "epoch": 2.27, + "learning_rate": 5.011215791834903e-06, + "loss": 0.4665, + "step": 13920 + }, + { + "epoch": 2.27, + "learning_rate": 5e-06, + "loss": 0.467, + "step": 13930 + }, + { + "epoch": 2.27, + "learning_rate": 4.988784208165097e-06, + "loss": 0.4726, + "step": 13940 + }, + { + "epoch": 2.28, + "learning_rate": 4.977568416330193e-06, + "loss": 0.4608, + "step": 13950 + }, + { + "epoch": 2.28, + "learning_rate": 4.9663526244952895e-06, + "loss": 0.4735, + "step": 13960 + }, + { + "epoch": 2.28, + "learning_rate": 4.955136832660386e-06, + "loss": 0.4627, + "step": 13970 + }, + { + "epoch": 2.28, + "learning_rate": 4.943921040825483e-06, + "loss": 0.4797, + "step": 13980 + }, + { + "epoch": 2.28, + "learning_rate": 4.932705248990579e-06, + "loss": 0.4708, + "step": 13990 + }, + { + "epoch": 2.28, + "learning_rate": 4.921489457155676e-06, + "loss": 0.4673, + "step": 14000 + }, + { + "epoch": 2.29, + "learning_rate": 4.910273665320772e-06, + "loss": 0.4703, + "step": 14010 + }, + { + "epoch": 2.29, + "learning_rate": 4.8990578734858685e-06, + "loss": 0.4644, + "step": 14020 + }, + { + "epoch": 2.29, + "learning_rate": 4.887842081650965e-06, + "loss": 0.4712, + "step": 14030 + }, + { + "epoch": 2.29, + "learning_rate": 4.876626289816061e-06, + "loss": 0.4643, + "step": 14040 + }, + { + "epoch": 2.29, + "learning_rate": 4.8654104979811575e-06, + "loss": 0.4795, + "step": 14050 + }, + { + "epoch": 2.29, + "learning_rate": 4.854194706146254e-06, + "loss": 0.4697, + "step": 14060 + }, + { + "epoch": 2.3, + "learning_rate": 4.842978914311351e-06, + "loss": 0.4651, + "step": 14070 + }, + { + "epoch": 2.3, + "learning_rate": 4.8317631224764475e-06, + "loss": 0.4607, + "step": 14080 + }, + { + "epoch": 2.3, + "learning_rate": 4.820547330641544e-06, + "loss": 0.4746, + "step": 14090 + }, + { + "epoch": 2.3, + "learning_rate": 4.80933153880664e-06, + "loss": 0.4804, + "step": 14100 + }, + { + "epoch": 2.3, + "learning_rate": 4.7981157469717365e-06, + "loss": 0.4538, + "step": 14110 + }, + { + "epoch": 2.3, + "learning_rate": 4.786899955136833e-06, + "loss": 0.4744, + "step": 14120 + }, + { + "epoch": 2.31, + "learning_rate": 4.775684163301929e-06, + "loss": 0.4747, + "step": 14130 + }, + { + "epoch": 2.31, + "learning_rate": 4.764468371467026e-06, + "loss": 0.4603, + "step": 14140 + }, + { + "epoch": 2.31, + "learning_rate": 4.753252579632122e-06, + "loss": 0.4742, + "step": 14150 + }, + { + "epoch": 2.31, + "learning_rate": 4.742036787797219e-06, + "loss": 0.4678, + "step": 14160 + }, + { + "epoch": 2.31, + "learning_rate": 4.7308209959623155e-06, + "loss": 0.4707, + "step": 14170 + }, + { + "epoch": 2.31, + "learning_rate": 4.719605204127412e-06, + "loss": 0.4724, + "step": 14180 + }, + { + "epoch": 2.32, + "learning_rate": 4.708389412292508e-06, + "loss": 0.4657, + "step": 14190 + }, + { + "epoch": 2.32, + "learning_rate": 4.697173620457605e-06, + "loss": 0.4748, + "step": 14200 + }, + { + "epoch": 2.32, + "learning_rate": 4.685957828622701e-06, + "loss": 0.4663, + "step": 14210 + }, + { + "epoch": 2.32, + "learning_rate": 4.674742036787797e-06, + "loss": 0.4656, + "step": 14220 + }, + { + "epoch": 2.32, + "learning_rate": 4.663526244952894e-06, + "loss": 0.4611, + "step": 14230 + }, + { + "epoch": 2.32, + "learning_rate": 4.652310453117991e-06, + "loss": 0.4658, + "step": 14240 + }, + { + "epoch": 2.33, + "learning_rate": 4.641094661283087e-06, + "loss": 0.4741, + "step": 14250 + }, + { + "epoch": 2.33, + "learning_rate": 4.629878869448184e-06, + "loss": 0.4728, + "step": 14260 + }, + { + "epoch": 2.33, + "learning_rate": 4.61866307761328e-06, + "loss": 0.4627, + "step": 14270 + }, + { + "epoch": 2.33, + "learning_rate": 4.607447285778376e-06, + "loss": 0.4702, + "step": 14280 + }, + { + "epoch": 2.33, + "learning_rate": 4.596231493943473e-06, + "loss": 0.4744, + "step": 14290 + }, + { + "epoch": 2.33, + "learning_rate": 4.58501570210857e-06, + "loss": 0.4606, + "step": 14300 + }, + { + "epoch": 2.34, + "learning_rate": 4.573799910273665e-06, + "loss": 0.467, + "step": 14310 + }, + { + "epoch": 2.34, + "learning_rate": 4.562584118438762e-06, + "loss": 0.4593, + "step": 14320 + }, + { + "epoch": 2.34, + "learning_rate": 4.551368326603859e-06, + "loss": 0.4724, + "step": 14330 + }, + { + "epoch": 2.34, + "learning_rate": 4.540152534768955e-06, + "loss": 0.4659, + "step": 14340 + }, + { + "epoch": 2.34, + "learning_rate": 4.528936742934052e-06, + "loss": 0.4653, + "step": 14350 + }, + { + "epoch": 2.34, + "learning_rate": 4.517720951099148e-06, + "loss": 0.4571, + "step": 14360 + }, + { + "epoch": 2.34, + "learning_rate": 4.506505159264244e-06, + "loss": 0.4711, + "step": 14370 + }, + { + "epoch": 2.35, + "learning_rate": 4.495289367429341e-06, + "loss": 0.4657, + "step": 14380 + }, + { + "epoch": 2.35, + "learning_rate": 4.484073575594438e-06, + "loss": 0.4671, + "step": 14390 + }, + { + "epoch": 2.35, + "learning_rate": 4.4728577837595334e-06, + "loss": 0.4677, + "step": 14400 + }, + { + "epoch": 2.35, + "learning_rate": 4.46164199192463e-06, + "loss": 0.4751, + "step": 14410 + }, + { + "epoch": 2.35, + "learning_rate": 4.450426200089727e-06, + "loss": 0.4614, + "step": 14420 + }, + { + "epoch": 2.35, + "learning_rate": 4.439210408254823e-06, + "loss": 0.4771, + "step": 14430 + }, + { + "epoch": 2.36, + "learning_rate": 4.42799461641992e-06, + "loss": 0.4573, + "step": 14440 + }, + { + "epoch": 2.36, + "learning_rate": 4.416778824585016e-06, + "loss": 0.4673, + "step": 14450 + }, + { + "epoch": 2.36, + "learning_rate": 4.4055630327501124e-06, + "loss": 0.4746, + "step": 14460 + }, + { + "epoch": 2.36, + "learning_rate": 4.394347240915209e-06, + "loss": 0.4792, + "step": 14470 + }, + { + "epoch": 2.36, + "learning_rate": 4.383131449080306e-06, + "loss": 0.4728, + "step": 14480 + }, + { + "epoch": 2.36, + "learning_rate": 4.3719156572454015e-06, + "loss": 0.4674, + "step": 14490 + }, + { + "epoch": 2.37, + "learning_rate": 4.360699865410498e-06, + "loss": 0.4759, + "step": 14500 + }, + { + "epoch": 2.37, + "learning_rate": 4.349484073575595e-06, + "loss": 0.478, + "step": 14510 + }, + { + "epoch": 2.37, + "learning_rate": 4.338268281740691e-06, + "loss": 0.464, + "step": 14520 + }, + { + "epoch": 2.37, + "learning_rate": 4.327052489905788e-06, + "loss": 0.4801, + "step": 14530 + }, + { + "epoch": 2.37, + "learning_rate": 4.315836698070884e-06, + "loss": 0.4704, + "step": 14540 + }, + { + "epoch": 2.37, + "learning_rate": 4.3046209062359805e-06, + "loss": 0.4778, + "step": 14550 + }, + { + "epoch": 2.38, + "learning_rate": 4.293405114401077e-06, + "loss": 0.4783, + "step": 14560 + }, + { + "epoch": 2.38, + "learning_rate": 4.282189322566174e-06, + "loss": 0.4821, + "step": 14570 + }, + { + "epoch": 2.38, + "learning_rate": 4.2709735307312696e-06, + "loss": 0.4607, + "step": 14580 + }, + { + "epoch": 2.38, + "learning_rate": 4.259757738896366e-06, + "loss": 0.4752, + "step": 14590 + }, + { + "epoch": 2.38, + "learning_rate": 4.248541947061463e-06, + "loss": 0.4672, + "step": 14600 + }, + { + "epoch": 2.38, + "learning_rate": 4.2373261552265595e-06, + "loss": 0.466, + "step": 14610 + }, + { + "epoch": 2.39, + "learning_rate": 4.226110363391656e-06, + "loss": 0.4711, + "step": 14620 + }, + { + "epoch": 2.39, + "learning_rate": 4.214894571556752e-06, + "loss": 0.4813, + "step": 14630 + }, + { + "epoch": 2.39, + "learning_rate": 4.2036787797218485e-06, + "loss": 0.4731, + "step": 14640 + }, + { + "epoch": 2.39, + "learning_rate": 4.192462987886945e-06, + "loss": 0.4656, + "step": 14650 + }, + { + "epoch": 2.39, + "learning_rate": 4.181247196052042e-06, + "loss": 0.4605, + "step": 14660 + }, + { + "epoch": 2.39, + "learning_rate": 4.170031404217138e-06, + "loss": 0.4535, + "step": 14670 + }, + { + "epoch": 2.4, + "learning_rate": 4.158815612382234e-06, + "loss": 0.4601, + "step": 14680 + }, + { + "epoch": 2.4, + "learning_rate": 4.147599820547331e-06, + "loss": 0.4617, + "step": 14690 + }, + { + "epoch": 2.4, + "learning_rate": 4.1363840287124275e-06, + "loss": 0.4691, + "step": 14700 + }, + { + "epoch": 2.4, + "learning_rate": 4.125168236877524e-06, + "loss": 0.4707, + "step": 14710 + }, + { + "epoch": 2.4, + "learning_rate": 4.11395244504262e-06, + "loss": 0.4678, + "step": 14720 + }, + { + "epoch": 2.4, + "learning_rate": 4.102736653207717e-06, + "loss": 0.468, + "step": 14730 + }, + { + "epoch": 2.41, + "learning_rate": 4.091520861372813e-06, + "loss": 0.4675, + "step": 14740 + }, + { + "epoch": 2.41, + "learning_rate": 4.08030506953791e-06, + "loss": 0.4746, + "step": 14750 + }, + { + "epoch": 2.41, + "learning_rate": 4.069089277703006e-06, + "loss": 0.4729, + "step": 14760 + }, + { + "epoch": 2.41, + "learning_rate": 4.057873485868102e-06, + "loss": 0.4681, + "step": 14770 + }, + { + "epoch": 2.41, + "learning_rate": 4.046657694033199e-06, + "loss": 0.4726, + "step": 14780 + }, + { + "epoch": 2.41, + "learning_rate": 4.035441902198296e-06, + "loss": 0.472, + "step": 14790 + }, + { + "epoch": 2.42, + "learning_rate": 4.024226110363392e-06, + "loss": 0.4532, + "step": 14800 + }, + { + "epoch": 2.42, + "learning_rate": 4.013010318528488e-06, + "loss": 0.475, + "step": 14810 + }, + { + "epoch": 2.42, + "learning_rate": 4.001794526693585e-06, + "loss": 0.4659, + "step": 14820 + }, + { + "epoch": 2.42, + "learning_rate": 3.990578734858681e-06, + "loss": 0.4747, + "step": 14830 + }, + { + "epoch": 2.42, + "learning_rate": 3.979362943023778e-06, + "loss": 0.4697, + "step": 14840 + }, + { + "epoch": 2.42, + "learning_rate": 3.968147151188875e-06, + "loss": 0.4709, + "step": 14850 + }, + { + "epoch": 2.42, + "learning_rate": 3.956931359353971e-06, + "loss": 0.4738, + "step": 14860 + }, + { + "epoch": 2.43, + "learning_rate": 3.945715567519067e-06, + "loss": 0.4628, + "step": 14870 + }, + { + "epoch": 2.43, + "learning_rate": 3.934499775684164e-06, + "loss": 0.4695, + "step": 14880 + }, + { + "epoch": 2.43, + "learning_rate": 3.92328398384926e-06, + "loss": 0.4789, + "step": 14890 + }, + { + "epoch": 2.43, + "learning_rate": 3.912068192014356e-06, + "loss": 0.462, + "step": 14900 + }, + { + "epoch": 2.43, + "learning_rate": 3.900852400179453e-06, + "loss": 0.4543, + "step": 14910 + }, + { + "epoch": 2.43, + "learning_rate": 3.88963660834455e-06, + "loss": 0.4577, + "step": 14920 + }, + { + "epoch": 2.44, + "learning_rate": 3.878420816509646e-06, + "loss": 0.4619, + "step": 14930 + }, + { + "epoch": 2.44, + "learning_rate": 3.867205024674743e-06, + "loss": 0.4723, + "step": 14940 + }, + { + "epoch": 2.44, + "learning_rate": 3.855989232839839e-06, + "loss": 0.4687, + "step": 14950 + }, + { + "epoch": 2.44, + "learning_rate": 3.844773441004935e-06, + "loss": 0.4745, + "step": 14960 + }, + { + "epoch": 2.44, + "learning_rate": 3.833557649170032e-06, + "loss": 0.4795, + "step": 14970 + }, + { + "epoch": 2.44, + "learning_rate": 3.822341857335128e-06, + "loss": 0.4591, + "step": 14980 + }, + { + "epoch": 2.45, + "learning_rate": 3.811126065500225e-06, + "loss": 0.4607, + "step": 14990 + }, + { + "epoch": 2.45, + "learning_rate": 3.799910273665321e-06, + "loss": 0.4693, + "step": 15000 + }, + { + "epoch": 2.45, + "learning_rate": 3.7886944818304176e-06, + "loss": 0.4798, + "step": 15010 + }, + { + "epoch": 2.45, + "learning_rate": 3.777478689995514e-06, + "loss": 0.4655, + "step": 15020 + }, + { + "epoch": 2.45, + "learning_rate": 3.7662628981606103e-06, + "loss": 0.4543, + "step": 15030 + }, + { + "epoch": 2.45, + "learning_rate": 3.755047106325707e-06, + "loss": 0.4617, + "step": 15040 + }, + { + "epoch": 2.46, + "learning_rate": 3.7438313144908034e-06, + "loss": 0.4731, + "step": 15050 + }, + { + "epoch": 2.46, + "learning_rate": 3.7337371018393903e-06, + "loss": 0.4608, + "step": 15060 + }, + { + "epoch": 2.46, + "learning_rate": 3.7225213100044867e-06, + "loss": 0.4767, + "step": 15070 + }, + { + "epoch": 2.46, + "learning_rate": 3.711305518169583e-06, + "loss": 0.4679, + "step": 15080 + }, + { + "epoch": 2.46, + "learning_rate": 3.70008972633468e-06, + "loss": 0.4646, + "step": 15090 + }, + { + "epoch": 2.46, + "learning_rate": 3.6888739344997758e-06, + "loss": 0.4713, + "step": 15100 + }, + { + "epoch": 2.47, + "learning_rate": 3.677658142664872e-06, + "loss": 0.4628, + "step": 15110 + }, + { + "epoch": 2.47, + "learning_rate": 3.666442350829969e-06, + "loss": 0.4609, + "step": 15120 + }, + { + "epoch": 2.47, + "learning_rate": 3.6552265589950652e-06, + "loss": 0.4664, + "step": 15130 + }, + { + "epoch": 2.47, + "learning_rate": 3.6440107671601616e-06, + "loss": 0.4698, + "step": 15140 + }, + { + "epoch": 2.47, + "learning_rate": 3.6327949753252584e-06, + "loss": 0.4703, + "step": 15150 + }, + { + "epoch": 2.47, + "learning_rate": 3.6215791834903547e-06, + "loss": 0.4652, + "step": 15160 + }, + { + "epoch": 2.48, + "learning_rate": 3.610363391655451e-06, + "loss": 0.4676, + "step": 15170 + }, + { + "epoch": 2.48, + "learning_rate": 3.599147599820548e-06, + "loss": 0.4586, + "step": 15180 + }, + { + "epoch": 2.48, + "learning_rate": 3.587931807985644e-06, + "loss": 0.4713, + "step": 15190 + }, + { + "epoch": 2.48, + "learning_rate": 3.57671601615074e-06, + "loss": 0.4608, + "step": 15200 + }, + { + "epoch": 2.48, + "learning_rate": 3.565500224315837e-06, + "loss": 0.4606, + "step": 15210 + }, + { + "epoch": 2.48, + "learning_rate": 3.5542844324809333e-06, + "loss": 0.4805, + "step": 15220 + }, + { + "epoch": 2.49, + "learning_rate": 3.5430686406460297e-06, + "loss": 0.4572, + "step": 15230 + }, + { + "epoch": 2.49, + "learning_rate": 3.5318528488111264e-06, + "loss": 0.4591, + "step": 15240 + }, + { + "epoch": 2.49, + "learning_rate": 3.520637056976223e-06, + "loss": 0.4635, + "step": 15250 + }, + { + "epoch": 2.49, + "learning_rate": 3.5094212651413196e-06, + "loss": 0.4734, + "step": 15260 + }, + { + "epoch": 2.49, + "learning_rate": 3.498205473306416e-06, + "loss": 0.4624, + "step": 15270 + }, + { + "epoch": 2.49, + "learning_rate": 3.486989681471512e-06, + "loss": 0.4719, + "step": 15280 + }, + { + "epoch": 2.5, + "learning_rate": 3.475773889636609e-06, + "loss": 0.4617, + "step": 15290 + }, + { + "epoch": 2.5, + "learning_rate": 3.464558097801705e-06, + "loss": 0.4742, + "step": 15300 + }, + { + "epoch": 2.5, + "learning_rate": 3.4533423059668014e-06, + "loss": 0.47, + "step": 15310 + }, + { + "epoch": 2.5, + "learning_rate": 3.442126514131898e-06, + "loss": 0.4725, + "step": 15320 + }, + { + "epoch": 2.5, + "learning_rate": 3.4309107222969945e-06, + "loss": 0.4618, + "step": 15330 + }, + { + "epoch": 2.5, + "learning_rate": 3.419694930462091e-06, + "loss": 0.4651, + "step": 15340 + }, + { + "epoch": 2.5, + "learning_rate": 3.4084791386271876e-06, + "loss": 0.4662, + "step": 15350 + }, + { + "epoch": 2.51, + "learning_rate": 3.397263346792284e-06, + "loss": 0.4773, + "step": 15360 + }, + { + "epoch": 2.51, + "learning_rate": 3.38604755495738e-06, + "loss": 0.457, + "step": 15370 + }, + { + "epoch": 2.51, + "learning_rate": 3.374831763122477e-06, + "loss": 0.4621, + "step": 15380 + }, + { + "epoch": 2.51, + "learning_rate": 3.363615971287573e-06, + "loss": 0.472, + "step": 15390 + }, + { + "epoch": 2.51, + "learning_rate": 3.3524001794526694e-06, + "loss": 0.4784, + "step": 15400 + }, + { + "epoch": 2.51, + "learning_rate": 3.3411843876177662e-06, + "loss": 0.4743, + "step": 15410 + }, + { + "epoch": 2.52, + "learning_rate": 3.3299685957828626e-06, + "loss": 0.4677, + "step": 15420 + }, + { + "epoch": 2.52, + "learning_rate": 3.318752803947959e-06, + "loss": 0.4707, + "step": 15430 + }, + { + "epoch": 2.52, + "learning_rate": 3.3075370121130557e-06, + "loss": 0.4629, + "step": 15440 + }, + { + "epoch": 2.52, + "learning_rate": 3.296321220278152e-06, + "loss": 0.4771, + "step": 15450 + }, + { + "epoch": 2.52, + "learning_rate": 3.285105428443248e-06, + "loss": 0.4681, + "step": 15460 + }, + { + "epoch": 2.52, + "learning_rate": 3.273889636608345e-06, + "loss": 0.4653, + "step": 15470 + }, + { + "epoch": 2.53, + "learning_rate": 3.262673844773441e-06, + "loss": 0.459, + "step": 15480 + }, + { + "epoch": 2.53, + "learning_rate": 3.2514580529385375e-06, + "loss": 0.471, + "step": 15490 + }, + { + "epoch": 2.53, + "learning_rate": 3.2402422611036343e-06, + "loss": 0.464, + "step": 15500 + }, + { + "epoch": 2.53, + "learning_rate": 3.2290264692687306e-06, + "loss": 0.4774, + "step": 15510 + }, + { + "epoch": 2.53, + "learning_rate": 3.217810677433827e-06, + "loss": 0.4736, + "step": 15520 + }, + { + "epoch": 2.53, + "learning_rate": 3.2065948855989238e-06, + "loss": 0.4702, + "step": 15530 + }, + { + "epoch": 2.54, + "learning_rate": 3.19537909376402e-06, + "loss": 0.4598, + "step": 15540 + }, + { + "epoch": 2.54, + "learning_rate": 3.184163301929116e-06, + "loss": 0.4565, + "step": 15550 + }, + { + "epoch": 2.54, + "learning_rate": 3.1729475100942133e-06, + "loss": 0.4617, + "step": 15560 + }, + { + "epoch": 2.54, + "learning_rate": 3.161731718259309e-06, + "loss": 0.4555, + "step": 15570 + }, + { + "epoch": 2.54, + "learning_rate": 3.1505159264244056e-06, + "loss": 0.4594, + "step": 15580 + }, + { + "epoch": 2.54, + "learning_rate": 3.1393001345895023e-06, + "loss": 0.4625, + "step": 15590 + }, + { + "epoch": 2.55, + "learning_rate": 3.1280843427545987e-06, + "loss": 0.4703, + "step": 15600 + }, + { + "epoch": 2.55, + "learning_rate": 3.116868550919695e-06, + "loss": 0.4745, + "step": 15610 + }, + { + "epoch": 2.55, + "learning_rate": 3.105652759084792e-06, + "loss": 0.477, + "step": 15620 + }, + { + "epoch": 2.55, + "learning_rate": 3.094436967249888e-06, + "loss": 0.4619, + "step": 15630 + }, + { + "epoch": 2.55, + "learning_rate": 3.0832211754149845e-06, + "loss": 0.4658, + "step": 15640 + }, + { + "epoch": 2.55, + "learning_rate": 3.0720053835800813e-06, + "loss": 0.4684, + "step": 15650 + }, + { + "epoch": 2.56, + "learning_rate": 3.0607895917451773e-06, + "loss": 0.461, + "step": 15660 + }, + { + "epoch": 2.56, + "learning_rate": 3.0495737999102736e-06, + "loss": 0.4716, + "step": 15670 + }, + { + "epoch": 2.56, + "learning_rate": 3.0383580080753704e-06, + "loss": 0.4632, + "step": 15680 + }, + { + "epoch": 2.56, + "learning_rate": 3.0271422162404668e-06, + "loss": 0.4726, + "step": 15690 + }, + { + "epoch": 2.56, + "learning_rate": 3.015926424405563e-06, + "loss": 0.4733, + "step": 15700 + }, + { + "epoch": 2.56, + "learning_rate": 3.00471063257066e-06, + "loss": 0.4668, + "step": 15710 + }, + { + "epoch": 2.57, + "learning_rate": 2.9934948407357563e-06, + "loss": 0.466, + "step": 15720 + }, + { + "epoch": 2.57, + "learning_rate": 2.9822790489008526e-06, + "loss": 0.4708, + "step": 15730 + }, + { + "epoch": 2.57, + "learning_rate": 2.9710632570659494e-06, + "loss": 0.4719, + "step": 15740 + }, + { + "epoch": 2.57, + "learning_rate": 2.9598474652310453e-06, + "loss": 0.4757, + "step": 15750 + }, + { + "epoch": 2.57, + "learning_rate": 2.9486316733961417e-06, + "loss": 0.4775, + "step": 15760 + }, + { + "epoch": 2.57, + "learning_rate": 2.9374158815612385e-06, + "loss": 0.4569, + "step": 15770 + }, + { + "epoch": 2.58, + "learning_rate": 2.926200089726335e-06, + "loss": 0.4758, + "step": 15780 + }, + { + "epoch": 2.58, + "learning_rate": 2.914984297891431e-06, + "loss": 0.4654, + "step": 15790 + }, + { + "epoch": 2.58, + "learning_rate": 2.903768506056528e-06, + "loss": 0.4641, + "step": 15800 + }, + { + "epoch": 2.58, + "learning_rate": 2.8925527142216243e-06, + "loss": 0.4675, + "step": 15810 + }, + { + "epoch": 2.58, + "learning_rate": 2.8813369223867207e-06, + "loss": 0.4771, + "step": 15820 + }, + { + "epoch": 2.58, + "learning_rate": 2.8701211305518175e-06, + "loss": 0.4653, + "step": 15830 + }, + { + "epoch": 2.58, + "learning_rate": 2.8589053387169134e-06, + "loss": 0.4646, + "step": 15840 + }, + { + "epoch": 2.59, + "learning_rate": 2.8476895468820097e-06, + "loss": 0.4671, + "step": 15850 + }, + { + "epoch": 2.59, + "learning_rate": 2.8364737550471065e-06, + "loss": 0.4733, + "step": 15860 + }, + { + "epoch": 2.59, + "learning_rate": 2.825257963212203e-06, + "loss": 0.4629, + "step": 15870 + }, + { + "epoch": 2.59, + "learning_rate": 2.8140421713772997e-06, + "loss": 0.4803, + "step": 15880 + }, + { + "epoch": 2.59, + "learning_rate": 2.802826379542396e-06, + "loss": 0.465, + "step": 15890 + }, + { + "epoch": 2.59, + "learning_rate": 2.7916105877074924e-06, + "loss": 0.466, + "step": 15900 + }, + { + "epoch": 2.6, + "learning_rate": 2.780394795872589e-06, + "loss": 0.4667, + "step": 15910 + }, + { + "epoch": 2.6, + "learning_rate": 2.7691790040376855e-06, + "loss": 0.4703, + "step": 15920 + }, + { + "epoch": 2.6, + "learning_rate": 2.7579632122027814e-06, + "loss": 0.4772, + "step": 15930 + }, + { + "epoch": 2.6, + "learning_rate": 2.7467474203678787e-06, + "loss": 0.4728, + "step": 15940 + }, + { + "epoch": 2.6, + "learning_rate": 2.7355316285329746e-06, + "loss": 0.4724, + "step": 15950 + }, + { + "epoch": 2.6, + "learning_rate": 2.724315836698071e-06, + "loss": 0.4666, + "step": 15960 + }, + { + "epoch": 2.61, + "learning_rate": 2.7131000448631677e-06, + "loss": 0.4648, + "step": 15970 + }, + { + "epoch": 2.61, + "learning_rate": 2.701884253028264e-06, + "loss": 0.4647, + "step": 15980 + }, + { + "epoch": 2.61, + "learning_rate": 2.6906684611933604e-06, + "loss": 0.4679, + "step": 15990 + }, + { + "epoch": 2.61, + "learning_rate": 2.6794526693584572e-06, + "loss": 0.4604, + "step": 16000 + }, + { + "epoch": 2.61, + "learning_rate": 2.6682368775235536e-06, + "loss": 0.4632, + "step": 16010 + }, + { + "epoch": 2.61, + "learning_rate": 2.6570210856886495e-06, + "loss": 0.4593, + "step": 16020 + }, + { + "epoch": 2.62, + "learning_rate": 2.6458052938537467e-06, + "loss": 0.4646, + "step": 16030 + }, + { + "epoch": 2.62, + "learning_rate": 2.6345895020188426e-06, + "loss": 0.459, + "step": 16040 + }, + { + "epoch": 2.62, + "learning_rate": 2.623373710183939e-06, + "loss": 0.4707, + "step": 16050 + }, + { + "epoch": 2.62, + "learning_rate": 2.6121579183490358e-06, + "loss": 0.462, + "step": 16060 + }, + { + "epoch": 2.62, + "learning_rate": 2.600942126514132e-06, + "loss": 0.4696, + "step": 16070 + }, + { + "epoch": 2.62, + "learning_rate": 2.5897263346792285e-06, + "loss": 0.4678, + "step": 16080 + }, + { + "epoch": 2.63, + "learning_rate": 2.5785105428443253e-06, + "loss": 0.4669, + "step": 16090 + }, + { + "epoch": 2.63, + "learning_rate": 2.5672947510094216e-06, + "loss": 0.4619, + "step": 16100 + }, + { + "epoch": 2.63, + "learning_rate": 2.5560789591745176e-06, + "loss": 0.4588, + "step": 16110 + }, + { + "epoch": 2.63, + "learning_rate": 2.5448631673396148e-06, + "loss": 0.4608, + "step": 16120 + }, + { + "epoch": 2.63, + "learning_rate": 2.5336473755047107e-06, + "loss": 0.4564, + "step": 16130 + }, + { + "epoch": 2.63, + "learning_rate": 2.522431583669807e-06, + "loss": 0.4682, + "step": 16140 + }, + { + "epoch": 2.64, + "learning_rate": 2.511215791834904e-06, + "loss": 0.4623, + "step": 16150 + }, + { + "epoch": 2.64, + "learning_rate": 2.5e-06, + "loss": 0.4652, + "step": 16160 + }, + { + "epoch": 2.64, + "learning_rate": 2.4887842081650966e-06, + "loss": 0.4679, + "step": 16170 + }, + { + "epoch": 2.64, + "learning_rate": 2.477568416330193e-06, + "loss": 0.4719, + "step": 16180 + }, + { + "epoch": 2.64, + "learning_rate": 2.4663526244952897e-06, + "loss": 0.4835, + "step": 16190 + }, + { + "epoch": 2.64, + "learning_rate": 2.455136832660386e-06, + "loss": 0.4721, + "step": 16200 + }, + { + "epoch": 2.65, + "learning_rate": 2.4439210408254824e-06, + "loss": 0.4616, + "step": 16210 + }, + { + "epoch": 2.65, + "learning_rate": 2.4327052489905788e-06, + "loss": 0.4592, + "step": 16220 + }, + { + "epoch": 2.65, + "learning_rate": 2.4214894571556756e-06, + "loss": 0.4607, + "step": 16230 + }, + { + "epoch": 2.65, + "learning_rate": 2.410273665320772e-06, + "loss": 0.4716, + "step": 16240 + }, + { + "epoch": 2.65, + "learning_rate": 2.3990578734858683e-06, + "loss": 0.4689, + "step": 16250 + }, + { + "epoch": 2.65, + "learning_rate": 2.3878420816509646e-06, + "loss": 0.4712, + "step": 16260 + }, + { + "epoch": 2.66, + "learning_rate": 2.376626289816061e-06, + "loss": 0.4771, + "step": 16270 + }, + { + "epoch": 2.66, + "learning_rate": 2.3654104979811578e-06, + "loss": 0.4687, + "step": 16280 + }, + { + "epoch": 2.66, + "learning_rate": 2.354194706146254e-06, + "loss": 0.4683, + "step": 16290 + }, + { + "epoch": 2.66, + "learning_rate": 2.3429789143113505e-06, + "loss": 0.4672, + "step": 16300 + }, + { + "epoch": 2.66, + "learning_rate": 2.331763122476447e-06, + "loss": 0.4772, + "step": 16310 + }, + { + "epoch": 2.66, + "learning_rate": 2.3205473306415436e-06, + "loss": 0.4696, + "step": 16320 + }, + { + "epoch": 2.66, + "learning_rate": 2.30933153880664e-06, + "loss": 0.4691, + "step": 16330 + }, + { + "epoch": 2.67, + "learning_rate": 2.2981157469717363e-06, + "loss": 0.4646, + "step": 16340 + }, + { + "epoch": 2.67, + "learning_rate": 2.2868999551368327e-06, + "loss": 0.4736, + "step": 16350 + }, + { + "epoch": 2.67, + "learning_rate": 2.2756841633019295e-06, + "loss": 0.4831, + "step": 16360 + }, + { + "epoch": 2.67, + "learning_rate": 2.264468371467026e-06, + "loss": 0.4555, + "step": 16370 + }, + { + "epoch": 2.67, + "learning_rate": 2.253252579632122e-06, + "loss": 0.4751, + "step": 16380 + }, + { + "epoch": 2.67, + "learning_rate": 2.242036787797219e-06, + "loss": 0.472, + "step": 16390 + }, + { + "epoch": 2.68, + "learning_rate": 2.230820995962315e-06, + "loss": 0.4743, + "step": 16400 + }, + { + "epoch": 2.68, + "learning_rate": 2.2196052041274117e-06, + "loss": 0.4657, + "step": 16410 + }, + { + "epoch": 2.68, + "learning_rate": 2.208389412292508e-06, + "loss": 0.459, + "step": 16420 + }, + { + "epoch": 2.68, + "learning_rate": 2.1971736204576044e-06, + "loss": 0.4586, + "step": 16430 + }, + { + "epoch": 2.68, + "learning_rate": 2.1859578286227007e-06, + "loss": 0.4627, + "step": 16440 + }, + { + "epoch": 2.68, + "learning_rate": 2.1747420367877975e-06, + "loss": 0.4665, + "step": 16450 + }, + { + "epoch": 2.69, + "learning_rate": 2.163526244952894e-06, + "loss": 0.4625, + "step": 16460 + }, + { + "epoch": 2.69, + "learning_rate": 2.1523104531179902e-06, + "loss": 0.4576, + "step": 16470 + }, + { + "epoch": 2.69, + "learning_rate": 2.141094661283087e-06, + "loss": 0.465, + "step": 16480 + }, + { + "epoch": 2.69, + "learning_rate": 2.129878869448183e-06, + "loss": 0.4622, + "step": 16490 + }, + { + "epoch": 2.69, + "learning_rate": 2.1186630776132797e-06, + "loss": 0.4624, + "step": 16500 + }, + { + "epoch": 2.69, + "learning_rate": 2.107447285778376e-06, + "loss": 0.4644, + "step": 16510 + }, + { + "epoch": 2.7, + "learning_rate": 2.0962314939434725e-06, + "loss": 0.472, + "step": 16520 + }, + { + "epoch": 2.7, + "learning_rate": 2.085015702108569e-06, + "loss": 0.4694, + "step": 16530 + }, + { + "epoch": 2.7, + "learning_rate": 2.0737999102736656e-06, + "loss": 0.4766, + "step": 16540 + }, + { + "epoch": 2.7, + "learning_rate": 2.062584118438762e-06, + "loss": 0.467, + "step": 16550 + }, + { + "epoch": 2.7, + "learning_rate": 2.0513683266038583e-06, + "loss": 0.4701, + "step": 16560 + }, + { + "epoch": 2.7, + "learning_rate": 2.040152534768955e-06, + "loss": 0.4611, + "step": 16570 + }, + { + "epoch": 2.71, + "learning_rate": 2.028936742934051e-06, + "loss": 0.4631, + "step": 16580 + }, + { + "epoch": 2.71, + "learning_rate": 2.017720951099148e-06, + "loss": 0.4672, + "step": 16590 + }, + { + "epoch": 2.71, + "learning_rate": 2.006505159264244e-06, + "loss": 0.4649, + "step": 16600 + }, + { + "epoch": 2.71, + "learning_rate": 1.9952893674293405e-06, + "loss": 0.4703, + "step": 16610 + }, + { + "epoch": 2.71, + "learning_rate": 1.9840735755944373e-06, + "loss": 0.4685, + "step": 16620 + }, + { + "epoch": 2.71, + "learning_rate": 1.9728577837595337e-06, + "loss": 0.4678, + "step": 16630 + }, + { + "epoch": 2.72, + "learning_rate": 1.96164199192463e-06, + "loss": 0.4651, + "step": 16640 + }, + { + "epoch": 2.72, + "learning_rate": 1.9504262000897264e-06, + "loss": 0.4585, + "step": 16650 + }, + { + "epoch": 2.72, + "learning_rate": 1.939210408254823e-06, + "loss": 0.4764, + "step": 16660 + }, + { + "epoch": 2.72, + "learning_rate": 1.9279946164199195e-06, + "loss": 0.4656, + "step": 16670 + }, + { + "epoch": 2.72, + "learning_rate": 1.916778824585016e-06, + "loss": 0.4627, + "step": 16680 + }, + { + "epoch": 2.72, + "learning_rate": 1.9055630327501124e-06, + "loss": 0.4646, + "step": 16690 + }, + { + "epoch": 2.73, + "learning_rate": 1.8943472409152088e-06, + "loss": 0.4678, + "step": 16700 + }, + { + "epoch": 2.73, + "learning_rate": 1.8831314490803051e-06, + "loss": 0.4778, + "step": 16710 + }, + { + "epoch": 2.73, + "learning_rate": 1.8719156572454017e-06, + "loss": 0.4632, + "step": 16720 + }, + { + "epoch": 2.73, + "learning_rate": 1.8606998654104983e-06, + "loss": 0.474, + "step": 16730 + }, + { + "epoch": 2.73, + "learning_rate": 1.8494840735755944e-06, + "loss": 0.4667, + "step": 16740 + }, + { + "epoch": 2.73, + "learning_rate": 1.838268281740691e-06, + "loss": 0.4698, + "step": 16750 + }, + { + "epoch": 2.73, + "learning_rate": 1.8270524899057876e-06, + "loss": 0.4575, + "step": 16760 + }, + { + "epoch": 2.74, + "learning_rate": 1.815836698070884e-06, + "loss": 0.4755, + "step": 16770 + }, + { + "epoch": 2.74, + "learning_rate": 1.8046209062359805e-06, + "loss": 0.4809, + "step": 16780 + }, + { + "epoch": 2.74, + "learning_rate": 1.7934051144010769e-06, + "loss": 0.4655, + "step": 16790 + }, + { + "epoch": 2.74, + "learning_rate": 1.7821893225661732e-06, + "loss": 0.4685, + "step": 16800 + }, + { + "epoch": 2.74, + "learning_rate": 1.7709735307312698e-06, + "loss": 0.4659, + "step": 16810 + }, + { + "epoch": 2.74, + "learning_rate": 1.7597577388963663e-06, + "loss": 0.4676, + "step": 16820 + }, + { + "epoch": 2.75, + "learning_rate": 1.7485419470614625e-06, + "loss": 0.4582, + "step": 16830 + }, + { + "epoch": 2.75, + "learning_rate": 1.737326155226559e-06, + "loss": 0.4692, + "step": 16840 + }, + { + "epoch": 2.75, + "learning_rate": 1.7261103633916556e-06, + "loss": 0.4674, + "step": 16850 + }, + { + "epoch": 2.75, + "learning_rate": 1.714894571556752e-06, + "loss": 0.4644, + "step": 16860 + }, + { + "epoch": 2.75, + "learning_rate": 1.7036787797218486e-06, + "loss": 0.4727, + "step": 16870 + }, + { + "epoch": 2.75, + "learning_rate": 1.6924629878869451e-06, + "loss": 0.4748, + "step": 16880 + }, + { + "epoch": 2.76, + "learning_rate": 1.6812471960520413e-06, + "loss": 0.4653, + "step": 16890 + }, + { + "epoch": 2.76, + "learning_rate": 1.6700314042171378e-06, + "loss": 0.4676, + "step": 16900 + }, + { + "epoch": 2.76, + "learning_rate": 1.6588156123822344e-06, + "loss": 0.4725, + "step": 16910 + }, + { + "epoch": 2.76, + "learning_rate": 1.6475998205473306e-06, + "loss": 0.4715, + "step": 16920 + }, + { + "epoch": 2.76, + "learning_rate": 1.6363840287124271e-06, + "loss": 0.4602, + "step": 16930 + }, + { + "epoch": 2.76, + "learning_rate": 1.6251682368775237e-06, + "loss": 0.4657, + "step": 16940 + }, + { + "epoch": 2.77, + "learning_rate": 1.6139524450426203e-06, + "loss": 0.4735, + "step": 16950 + }, + { + "epoch": 2.77, + "learning_rate": 1.6027366532077166e-06, + "loss": 0.4625, + "step": 16960 + }, + { + "epoch": 2.77, + "learning_rate": 1.5915208613728132e-06, + "loss": 0.4575, + "step": 16970 + }, + { + "epoch": 2.77, + "learning_rate": 1.5803050695379095e-06, + "loss": 0.4619, + "step": 16980 + }, + { + "epoch": 2.77, + "learning_rate": 1.569089277703006e-06, + "loss": 0.4673, + "step": 16990 + }, + { + "epoch": 2.77, + "learning_rate": 1.5578734858681025e-06, + "loss": 0.4607, + "step": 17000 + }, + { + "epoch": 2.78, + "learning_rate": 1.546657694033199e-06, + "loss": 0.4702, + "step": 17010 + }, + { + "epoch": 2.78, + "learning_rate": 1.5354419021982952e-06, + "loss": 0.4622, + "step": 17020 + }, + { + "epoch": 2.78, + "learning_rate": 1.5242261103633918e-06, + "loss": 0.46, + "step": 17030 + }, + { + "epoch": 2.78, + "learning_rate": 1.5130103185284883e-06, + "loss": 0.4759, + "step": 17040 + }, + { + "epoch": 2.78, + "learning_rate": 1.5017945266935847e-06, + "loss": 0.4663, + "step": 17050 + }, + { + "epoch": 2.78, + "learning_rate": 1.4905787348586812e-06, + "loss": 0.4773, + "step": 17060 + }, + { + "epoch": 2.79, + "learning_rate": 1.4793629430237776e-06, + "loss": 0.4588, + "step": 17070 + }, + { + "epoch": 2.79, + "learning_rate": 1.468147151188874e-06, + "loss": 0.4705, + "step": 17080 + }, + { + "epoch": 2.79, + "learning_rate": 1.4569313593539705e-06, + "loss": 0.466, + "step": 17090 + }, + { + "epoch": 2.79, + "learning_rate": 1.445715567519067e-06, + "loss": 0.4704, + "step": 17100 + }, + { + "epoch": 2.79, + "learning_rate": 1.4344997756841632e-06, + "loss": 0.4631, + "step": 17110 + }, + { + "epoch": 2.79, + "learning_rate": 1.4232839838492598e-06, + "loss": 0.4658, + "step": 17120 + }, + { + "epoch": 2.8, + "learning_rate": 1.4120681920143564e-06, + "loss": 0.4629, + "step": 17130 + }, + { + "epoch": 2.8, + "learning_rate": 1.4008524001794527e-06, + "loss": 0.4614, + "step": 17140 + }, + { + "epoch": 2.8, + "learning_rate": 1.3896366083445493e-06, + "loss": 0.4742, + "step": 17150 + }, + { + "epoch": 2.8, + "learning_rate": 1.3784208165096457e-06, + "loss": 0.4547, + "step": 17160 + }, + { + "epoch": 2.8, + "learning_rate": 1.367205024674742e-06, + "loss": 0.4481, + "step": 17170 + }, + { + "epoch": 2.8, + "learning_rate": 1.3559892328398386e-06, + "loss": 0.4628, + "step": 17180 + }, + { + "epoch": 2.81, + "learning_rate": 1.3447734410049352e-06, + "loss": 0.47, + "step": 17190 + }, + { + "epoch": 2.81, + "learning_rate": 1.3335576491700313e-06, + "loss": 0.4543, + "step": 17200 + }, + { + "epoch": 2.81, + "learning_rate": 1.3223418573351279e-06, + "loss": 0.4646, + "step": 17210 + }, + { + "epoch": 2.81, + "learning_rate": 1.3111260655002244e-06, + "loss": 0.4705, + "step": 17220 + }, + { + "epoch": 2.81, + "learning_rate": 1.2999102736653208e-06, + "loss": 0.4649, + "step": 17230 + }, + { + "epoch": 2.81, + "learning_rate": 1.2886944818304174e-06, + "loss": 0.4637, + "step": 17240 + }, + { + "epoch": 2.81, + "learning_rate": 1.2774786899955137e-06, + "loss": 0.4602, + "step": 17250 + }, + { + "epoch": 2.82, + "learning_rate": 1.2662628981606103e-06, + "loss": 0.4743, + "step": 17260 + }, + { + "epoch": 2.82, + "learning_rate": 1.2550471063257067e-06, + "loss": 0.4649, + "step": 17270 + }, + { + "epoch": 2.82, + "learning_rate": 1.2438313144908032e-06, + "loss": 0.4541, + "step": 17280 + }, + { + "epoch": 2.82, + "learning_rate": 1.2326155226558996e-06, + "loss": 0.4619, + "step": 17290 + }, + { + "epoch": 2.82, + "learning_rate": 1.221399730820996e-06, + "loss": 0.4783, + "step": 17300 + }, + { + "epoch": 2.82, + "learning_rate": 1.2101839389860925e-06, + "loss": 0.451, + "step": 17310 + }, + { + "epoch": 2.83, + "learning_rate": 1.1989681471511889e-06, + "loss": 0.4639, + "step": 17320 + }, + { + "epoch": 2.83, + "learning_rate": 1.1877523553162854e-06, + "loss": 0.4599, + "step": 17330 + }, + { + "epoch": 2.83, + "learning_rate": 1.176536563481382e-06, + "loss": 0.4702, + "step": 17340 + }, + { + "epoch": 2.83, + "learning_rate": 1.1653207716464784e-06, + "loss": 0.4689, + "step": 17350 + }, + { + "epoch": 2.83, + "learning_rate": 1.154104979811575e-06, + "loss": 0.4683, + "step": 17360 + }, + { + "epoch": 2.83, + "learning_rate": 1.1428891879766713e-06, + "loss": 0.4672, + "step": 17370 + }, + { + "epoch": 2.84, + "learning_rate": 1.1316733961417676e-06, + "loss": 0.4728, + "step": 17380 + }, + { + "epoch": 2.84, + "learning_rate": 1.1204576043068642e-06, + "loss": 0.4736, + "step": 17390 + }, + { + "epoch": 2.84, + "learning_rate": 1.1092418124719606e-06, + "loss": 0.461, + "step": 17400 + }, + { + "epoch": 2.84, + "learning_rate": 1.098026020637057e-06, + "loss": 0.4564, + "step": 17410 + }, + { + "epoch": 2.84, + "learning_rate": 1.0868102288021535e-06, + "loss": 0.4573, + "step": 17420 + }, + { + "epoch": 2.84, + "learning_rate": 1.07559443696725e-06, + "loss": 0.4646, + "step": 17430 + }, + { + "epoch": 2.85, + "learning_rate": 1.0643786451323464e-06, + "loss": 0.4579, + "step": 17440 + }, + { + "epoch": 2.85, + "learning_rate": 1.053162853297443e-06, + "loss": 0.4557, + "step": 17450 + }, + { + "epoch": 2.85, + "learning_rate": 1.0419470614625394e-06, + "loss": 0.4654, + "step": 17460 + }, + { + "epoch": 2.85, + "learning_rate": 1.0307312696276357e-06, + "loss": 0.4673, + "step": 17470 + }, + { + "epoch": 2.85, + "learning_rate": 1.0195154777927323e-06, + "loss": 0.4602, + "step": 17480 + }, + { + "epoch": 2.85, + "learning_rate": 1.0082996859578286e-06, + "loss": 0.4536, + "step": 17490 + }, + { + "epoch": 2.86, + "learning_rate": 9.970838941229252e-07, + "loss": 0.4669, + "step": 17500 + }, + { + "epoch": 2.86, + "learning_rate": 9.858681022880216e-07, + "loss": 0.4633, + "step": 17510 + }, + { + "epoch": 2.86, + "learning_rate": 9.746523104531181e-07, + "loss": 0.468, + "step": 17520 + }, + { + "epoch": 2.86, + "learning_rate": 9.634365186182145e-07, + "loss": 0.4733, + "step": 17530 + }, + { + "epoch": 2.86, + "learning_rate": 9.52220726783311e-07, + "loss": 0.476, + "step": 17540 + }, + { + "epoch": 2.86, + "learning_rate": 9.410049349484074e-07, + "loss": 0.4666, + "step": 17550 + }, + { + "epoch": 2.87, + "learning_rate": 9.29789143113504e-07, + "loss": 0.4684, + "step": 17560 + }, + { + "epoch": 2.87, + "learning_rate": 9.185733512786003e-07, + "loss": 0.4673, + "step": 17570 + }, + { + "epoch": 2.87, + "learning_rate": 9.073575594436967e-07, + "loss": 0.4658, + "step": 17580 + }, + { + "epoch": 2.87, + "learning_rate": 8.961417676087933e-07, + "loss": 0.4626, + "step": 17590 + }, + { + "epoch": 2.87, + "learning_rate": 8.849259757738897e-07, + "loss": 0.4619, + "step": 17600 + }, + { + "epoch": 2.87, + "learning_rate": 8.737101839389861e-07, + "loss": 0.4563, + "step": 17610 + }, + { + "epoch": 2.88, + "learning_rate": 8.624943921040827e-07, + "loss": 0.4705, + "step": 17620 + }, + { + "epoch": 2.88, + "learning_rate": 8.51278600269179e-07, + "loss": 0.4573, + "step": 17630 + }, + { + "epoch": 2.88, + "learning_rate": 8.400628084342756e-07, + "loss": 0.4791, + "step": 17640 + }, + { + "epoch": 2.88, + "learning_rate": 8.28847016599372e-07, + "loss": 0.458, + "step": 17650 + }, + { + "epoch": 2.88, + "learning_rate": 8.176312247644684e-07, + "loss": 0.4609, + "step": 17660 + }, + { + "epoch": 2.88, + "learning_rate": 8.06415432929565e-07, + "loss": 0.4632, + "step": 17670 + }, + { + "epoch": 2.89, + "learning_rate": 7.951996410946613e-07, + "loss": 0.458, + "step": 17680 + }, + { + "epoch": 2.89, + "learning_rate": 7.839838492597578e-07, + "loss": 0.4806, + "step": 17690 + }, + { + "epoch": 2.89, + "learning_rate": 7.727680574248544e-07, + "loss": 0.467, + "step": 17700 + }, + { + "epoch": 2.89, + "learning_rate": 7.615522655899507e-07, + "loss": 0.4666, + "step": 17710 + }, + { + "epoch": 2.89, + "learning_rate": 7.503364737550471e-07, + "loss": 0.4675, + "step": 17720 + }, + { + "epoch": 2.89, + "learning_rate": 7.391206819201436e-07, + "loss": 0.4607, + "step": 17730 + }, + { + "epoch": 2.89, + "learning_rate": 7.279048900852401e-07, + "loss": 0.4656, + "step": 17740 + }, + { + "epoch": 2.9, + "learning_rate": 7.166890982503365e-07, + "loss": 0.462, + "step": 17750 + }, + { + "epoch": 2.9, + "learning_rate": 7.05473306415433e-07, + "loss": 0.4694, + "step": 17760 + }, + { + "epoch": 2.9, + "learning_rate": 6.942575145805294e-07, + "loss": 0.4644, + "step": 17770 + }, + { + "epoch": 2.9, + "learning_rate": 6.830417227456259e-07, + "loss": 0.4688, + "step": 17780 + }, + { + "epoch": 2.9, + "learning_rate": 6.718259309107224e-07, + "loss": 0.4758, + "step": 17790 + }, + { + "epoch": 2.9, + "learning_rate": 6.606101390758188e-07, + "loss": 0.4581, + "step": 17800 + }, + { + "epoch": 2.91, + "learning_rate": 6.493943472409153e-07, + "loss": 0.4693, + "step": 17810 + }, + { + "epoch": 2.91, + "learning_rate": 6.381785554060117e-07, + "loss": 0.4669, + "step": 17820 + }, + { + "epoch": 2.91, + "learning_rate": 6.269627635711082e-07, + "loss": 0.4576, + "step": 17830 + }, + { + "epoch": 2.91, + "learning_rate": 6.157469717362046e-07, + "loss": 0.4635, + "step": 17840 + }, + { + "epoch": 2.91, + "learning_rate": 6.045311799013011e-07, + "loss": 0.4788, + "step": 17850 + }, + { + "epoch": 2.91, + "learning_rate": 5.933153880663976e-07, + "loss": 0.4627, + "step": 17860 + }, + { + "epoch": 2.92, + "learning_rate": 5.820995962314939e-07, + "loss": 0.4625, + "step": 17870 + }, + { + "epoch": 2.92, + "learning_rate": 5.708838043965905e-07, + "loss": 0.4626, + "step": 17880 + }, + { + "epoch": 2.92, + "learning_rate": 5.596680125616869e-07, + "loss": 0.4663, + "step": 17890 + }, + { + "epoch": 2.92, + "learning_rate": 5.484522207267833e-07, + "loss": 0.4782, + "step": 17900 + }, + { + "epoch": 2.92, + "learning_rate": 5.372364288918798e-07, + "loss": 0.4608, + "step": 17910 + }, + { + "epoch": 2.92, + "learning_rate": 5.260206370569762e-07, + "loss": 0.4672, + "step": 17920 + }, + { + "epoch": 2.93, + "learning_rate": 5.148048452220728e-07, + "loss": 0.4614, + "step": 17930 + }, + { + "epoch": 2.93, + "learning_rate": 5.035890533871692e-07, + "loss": 0.4591, + "step": 17940 + }, + { + "epoch": 2.93, + "learning_rate": 4.923732615522656e-07, + "loss": 0.4624, + "step": 17950 + }, + { + "epoch": 2.93, + "learning_rate": 4.811574697173621e-07, + "loss": 0.4589, + "step": 17960 + }, + { + "epoch": 2.93, + "learning_rate": 4.699416778824585e-07, + "loss": 0.4598, + "step": 17970 + }, + { + "epoch": 2.93, + "learning_rate": 4.58725886047555e-07, + "loss": 0.4641, + "step": 17980 + }, + { + "epoch": 2.94, + "learning_rate": 4.4751009421265147e-07, + "loss": 0.4718, + "step": 17990 + }, + { + "epoch": 2.94, + "learning_rate": 4.3629430237774793e-07, + "loss": 0.4635, + "step": 18000 + }, + { + "epoch": 2.94, + "learning_rate": 4.2507851054284434e-07, + "loss": 0.4641, + "step": 18010 + }, + { + "epoch": 2.94, + "learning_rate": 4.138627187079408e-07, + "loss": 0.48, + "step": 18020 + }, + { + "epoch": 2.94, + "learning_rate": 4.0264692687303727e-07, + "loss": 0.4724, + "step": 18030 + }, + { + "epoch": 2.94, + "learning_rate": 3.914311350381337e-07, + "loss": 0.4706, + "step": 18040 + }, + { + "epoch": 2.95, + "learning_rate": 3.8021534320323014e-07, + "loss": 0.4717, + "step": 18050 + }, + { + "epoch": 2.95, + "learning_rate": 3.6899955136832666e-07, + "loss": 0.4662, + "step": 18060 + }, + { + "epoch": 2.95, + "learning_rate": 3.577837595334231e-07, + "loss": 0.4572, + "step": 18070 + }, + { + "epoch": 2.95, + "learning_rate": 3.4656796769851953e-07, + "loss": 0.466, + "step": 18080 + }, + { + "epoch": 2.95, + "learning_rate": 3.35352175863616e-07, + "loss": 0.47, + "step": 18090 + }, + { + "epoch": 2.95, + "learning_rate": 3.2413638402871246e-07, + "loss": 0.4666, + "step": 18100 + }, + { + "epoch": 2.96, + "learning_rate": 3.1292059219380887e-07, + "loss": 0.4632, + "step": 18110 + }, + { + "epoch": 2.96, + "learning_rate": 3.0170480035890533e-07, + "loss": 0.4671, + "step": 18120 + }, + { + "epoch": 2.96, + "learning_rate": 2.9048900852400185e-07, + "loss": 0.4745, + "step": 18130 + }, + { + "epoch": 2.96, + "learning_rate": 2.7927321668909826e-07, + "loss": 0.4666, + "step": 18140 + }, + { + "epoch": 2.96, + "learning_rate": 2.680574248541947e-07, + "loss": 0.4636, + "step": 18150 + }, + { + "epoch": 2.96, + "learning_rate": 2.568416330192912e-07, + "loss": 0.4618, + "step": 18160 + }, + { + "epoch": 2.97, + "learning_rate": 2.4562584118438765e-07, + "loss": 0.4618, + "step": 18170 + }, + { + "epoch": 2.97, + "learning_rate": 2.344100493494841e-07, + "loss": 0.479, + "step": 18180 + }, + { + "epoch": 2.97, + "learning_rate": 2.2319425751458055e-07, + "loss": 0.4626, + "step": 18190 + }, + { + "epoch": 2.97, + "learning_rate": 2.11978465679677e-07, + "loss": 0.4701, + "step": 18200 + }, + { + "epoch": 2.97, + "learning_rate": 2.0076267384477345e-07, + "loss": 0.4585, + "step": 18210 + }, + { + "epoch": 2.97, + "learning_rate": 1.895468820098699e-07, + "loss": 0.4665, + "step": 18220 + }, + { + "epoch": 2.97, + "learning_rate": 1.7833109017496637e-07, + "loss": 0.4693, + "step": 18230 + }, + { + "epoch": 2.98, + "learning_rate": 1.671152983400628e-07, + "loss": 0.4653, + "step": 18240 + }, + { + "epoch": 2.98, + "learning_rate": 1.5589950650515927e-07, + "loss": 0.4653, + "step": 18250 + }, + { + "epoch": 2.98, + "learning_rate": 1.4468371467025574e-07, + "loss": 0.4652, + "step": 18260 + }, + { + "epoch": 2.98, + "learning_rate": 1.3346792283535217e-07, + "loss": 0.4618, + "step": 18270 + }, + { + "epoch": 2.98, + "learning_rate": 1.2225213100044864e-07, + "loss": 0.4677, + "step": 18280 + }, + { + "epoch": 2.98, + "learning_rate": 1.110363391655451e-07, + "loss": 0.4629, + "step": 18290 + }, + { + "epoch": 2.99, + "learning_rate": 9.982054733064156e-08, + "loss": 0.4664, + "step": 18300 + }, + { + "epoch": 2.99, + "learning_rate": 8.8604755495738e-08, + "loss": 0.4599, + "step": 18310 + }, + { + "epoch": 2.99, + "learning_rate": 7.738896366083446e-08, + "loss": 0.4586, + "step": 18320 + }, + { + "epoch": 2.99, + "learning_rate": 6.617317182593091e-08, + "loss": 0.4506, + "step": 18330 + }, + { + "epoch": 2.99, + "learning_rate": 5.4957379991027376e-08, + "loss": 0.467, + "step": 18340 + }, + { + "epoch": 2.99, + "learning_rate": 4.374158815612382e-08, + "loss": 0.4655, + "step": 18350 + }, + { + "epoch": 3.0, + "learning_rate": 3.252579632122028e-08, + "loss": 0.4675, + "step": 18360 + }, + { + "epoch": 3.0, + "learning_rate": 2.131000448631674e-08, + "loss": 0.4673, + "step": 18370 + }, + { + "epoch": 3.0, + "learning_rate": 1.009421265141319e-08, + "loss": 0.4584, + "step": 18380 + } + ], + "logging_steps": 10, + "max_steps": 18384, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 1.2862587547397652e+19, + "trial_name": null, + "trial_params": null +}