{ "best_metric": null, "best_model_checkpoint": null, "epoch": 34.0, "global_step": 6862662, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9998768845092475e-05, "loss": 3.3856, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.999753025866639e-05, "loss": 3.1861, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.999629167224031e-05, "loss": 3.1416, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.9995053085814226e-05, "loss": 3.0688, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.999381449938814e-05, "loss": 3.0467, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.999257591296206e-05, "loss": 3.0166, "step": 3000 }, { "epoch": 0.02, "learning_rate": 4.999133980370883e-05, "loss": 3.0395, "step": 3500 }, { "epoch": 0.02, "learning_rate": 4.9990101217282745e-05, "loss": 2.9982, "step": 4000 }, { "epoch": 0.02, "learning_rate": 4.9988862630856655e-05, "loss": 2.9531, "step": 4500 }, { "epoch": 0.02, "learning_rate": 4.9987626521603424e-05, "loss": 2.985, "step": 5000 }, { "epoch": 0.03, "learning_rate": 4.998638793517734e-05, "loss": 2.9528, "step": 5500 }, { "epoch": 0.03, "learning_rate": 4.998514934875126e-05, "loss": 2.9339, "step": 6000 }, { "epoch": 0.03, "learning_rate": 4.9983910762325175e-05, "loss": 2.9353, "step": 6500 }, { "epoch": 0.03, "learning_rate": 4.998267217589909e-05, "loss": 2.9251, "step": 7000 }, { "epoch": 0.04, "learning_rate": 4.998143358947301e-05, "loss": 2.9249, "step": 7500 }, { "epoch": 0.04, "learning_rate": 4.9980195003046926e-05, "loss": 2.922, "step": 8000 }, { "epoch": 0.04, "learning_rate": 4.997895641662084e-05, "loss": 2.9129, "step": 8500 }, { "epoch": 0.04, "learning_rate": 4.997771783019476e-05, "loss": 2.8805, "step": 9000 }, { "epoch": 0.05, "learning_rate": 4.997647924376868e-05, "loss": 2.8891, "step": 9500 }, { "epoch": 0.05, "learning_rate": 4.9975240657342594e-05, "loss": 2.8833, "step": 10000 }, { "epoch": 0.05, "learning_rate": 4.997400207091651e-05, "loss": 2.9069, "step": 10500 }, { "epoch": 0.05, "learning_rate": 4.997276596166328e-05, "loss": 2.8774, "step": 11000 }, { "epoch": 0.06, "learning_rate": 4.997152737523719e-05, "loss": 2.8608, "step": 11500 }, { "epoch": 0.06, "learning_rate": 4.9970288788811106e-05, "loss": 2.8853, "step": 12000 }, { "epoch": 0.06, "learning_rate": 4.996905020238502e-05, "loss": 2.854, "step": 12500 }, { "epoch": 0.06, "learning_rate": 4.996781161595894e-05, "loss": 2.8447, "step": 13000 }, { "epoch": 0.07, "learning_rate": 4.996657550670571e-05, "loss": 2.8466, "step": 13500 }, { "epoch": 0.07, "learning_rate": 4.9965336920279626e-05, "loss": 2.8537, "step": 14000 }, { "epoch": 0.07, "learning_rate": 4.996409833385354e-05, "loss": 2.8332, "step": 14500 }, { "epoch": 0.07, "learning_rate": 4.996285974742746e-05, "loss": 2.8452, "step": 15000 }, { "epoch": 0.08, "learning_rate": 4.996162363817423e-05, "loss": 2.8432, "step": 15500 }, { "epoch": 0.08, "learning_rate": 4.9960385051748146e-05, "loss": 2.846, "step": 16000 }, { "epoch": 0.08, "learning_rate": 4.995914646532206e-05, "loss": 2.8418, "step": 16500 }, { "epoch": 0.08, "learning_rate": 4.995790787889598e-05, "loss": 2.8401, "step": 17000 }, { "epoch": 0.09, "learning_rate": 4.9956669292469896e-05, "loss": 2.8253, "step": 17500 }, { "epoch": 0.09, "learning_rate": 4.9955430706043807e-05, "loss": 2.8343, "step": 18000 }, { "epoch": 0.09, "learning_rate": 4.9954192119617723e-05, "loss": 2.8231, "step": 18500 }, { "epoch": 0.09, "learning_rate": 4.995295353319164e-05, "loss": 2.7981, "step": 19000 }, { "epoch": 0.1, "learning_rate": 4.995171742393841e-05, "loss": 2.8358, "step": 19500 }, { "epoch": 0.1, "learning_rate": 4.9950478837512326e-05, "loss": 2.825, "step": 20000 }, { "epoch": 0.1, "learning_rate": 4.994924025108624e-05, "loss": 2.7965, "step": 20500 }, { "epoch": 0.1, "learning_rate": 4.994800166466016e-05, "loss": 2.8224, "step": 21000 }, { "epoch": 0.11, "learning_rate": 4.9946768032579774e-05, "loss": 2.8108, "step": 21500 }, { "epoch": 0.11, "learning_rate": 4.994552944615369e-05, "loss": 2.8076, "step": 22000 }, { "epoch": 0.11, "learning_rate": 4.994429085972761e-05, "loss": 2.8013, "step": 22500 }, { "epoch": 0.11, "learning_rate": 4.9943052273301525e-05, "loss": 2.818, "step": 23000 }, { "epoch": 0.12, "learning_rate": 4.99418161640483e-05, "loss": 2.8436, "step": 23500 }, { "epoch": 0.12, "learning_rate": 4.994057757762222e-05, "loss": 2.8163, "step": 24000 }, { "epoch": 0.12, "learning_rate": 4.993933899119613e-05, "loss": 2.7991, "step": 24500 }, { "epoch": 0.12, "learning_rate": 4.9938100404770044e-05, "loss": 2.7893, "step": 25000 }, { "epoch": 0.13, "learning_rate": 4.993686181834396e-05, "loss": 2.7931, "step": 25500 }, { "epoch": 0.13, "learning_rate": 4.993562323191788e-05, "loss": 2.8015, "step": 26000 }, { "epoch": 0.13, "learning_rate": 4.9934384645491795e-05, "loss": 2.7764, "step": 26500 }, { "epoch": 0.13, "learning_rate": 4.993314605906571e-05, "loss": 2.7875, "step": 27000 }, { "epoch": 0.14, "learning_rate": 4.993190747263963e-05, "loss": 2.8176, "step": 27500 }, { "epoch": 0.14, "learning_rate": 4.9930668886213546e-05, "loss": 2.7744, "step": 28000 }, { "epoch": 0.14, "learning_rate": 4.992943029978746e-05, "loss": 2.8011, "step": 28500 }, { "epoch": 0.14, "learning_rate": 4.9928194190534225e-05, "loss": 2.7897, "step": 29000 }, { "epoch": 0.15, "learning_rate": 4.992695560410814e-05, "loss": 2.7773, "step": 29500 }, { "epoch": 0.15, "learning_rate": 4.992571701768206e-05, "loss": 2.7895, "step": 30000 }, { "epoch": 0.15, "learning_rate": 4.9924478431255976e-05, "loss": 2.795, "step": 30500 }, { "epoch": 0.15, "learning_rate": 4.992323984482989e-05, "loss": 2.7796, "step": 31000 }, { "epoch": 0.16, "learning_rate": 4.992200125840381e-05, "loss": 2.7868, "step": 31500 }, { "epoch": 0.16, "learning_rate": 4.9920762671977726e-05, "loss": 2.8024, "step": 32000 }, { "epoch": 0.16, "learning_rate": 4.991952408555164e-05, "loss": 2.7609, "step": 32500 }, { "epoch": 0.16, "learning_rate": 4.9918290453471264e-05, "loss": 2.7815, "step": 33000 }, { "epoch": 0.17, "learning_rate": 4.991705186704518e-05, "loss": 2.7409, "step": 33500 }, { "epoch": 0.17, "learning_rate": 4.99158132806191e-05, "loss": 2.7607, "step": 34000 }, { "epoch": 0.17, "learning_rate": 4.9914574694193015e-05, "loss": 2.7929, "step": 34500 }, { "epoch": 0.17, "learning_rate": 4.9913336107766925e-05, "loss": 2.782, "step": 35000 }, { "epoch": 0.18, "learning_rate": 4.99120999985137e-05, "loss": 2.785, "step": 35500 }, { "epoch": 0.18, "learning_rate": 4.991086141208762e-05, "loss": 2.7697, "step": 36000 }, { "epoch": 0.18, "learning_rate": 4.9909622825661534e-05, "loss": 2.7626, "step": 36500 }, { "epoch": 0.18, "learning_rate": 4.99083867164083e-05, "loss": 2.7679, "step": 37000 }, { "epoch": 0.19, "learning_rate": 4.990714812998222e-05, "loss": 2.7565, "step": 37500 }, { "epoch": 0.19, "learning_rate": 4.990590954355614e-05, "loss": 2.7848, "step": 38000 }, { "epoch": 0.19, "learning_rate": 4.99046734343029e-05, "loss": 2.7555, "step": 38500 }, { "epoch": 0.19, "learning_rate": 4.9903434847876816e-05, "loss": 2.7742, "step": 39000 }, { "epoch": 0.2, "learning_rate": 4.990219626145073e-05, "loss": 2.7821, "step": 39500 }, { "epoch": 0.2, "learning_rate": 4.990095767502465e-05, "loss": 2.7616, "step": 40000 }, { "epoch": 0.2, "learning_rate": 4.989971908859857e-05, "loss": 2.7711, "step": 40500 }, { "epoch": 0.2, "learning_rate": 4.9898480502172484e-05, "loss": 2.7499, "step": 41000 }, { "epoch": 0.21, "learning_rate": 4.98972419157464e-05, "loss": 2.7549, "step": 41500 }, { "epoch": 0.21, "learning_rate": 4.989600332932032e-05, "loss": 2.7782, "step": 42000 }, { "epoch": 0.21, "learning_rate": 4.9894764742894234e-05, "loss": 2.748, "step": 42500 }, { "epoch": 0.21, "learning_rate": 4.9893528633641e-05, "loss": 2.7325, "step": 43000 }, { "epoch": 0.22, "learning_rate": 4.989229004721492e-05, "loss": 2.7488, "step": 43500 }, { "epoch": 0.22, "learning_rate": 4.989105146078884e-05, "loss": 2.7606, "step": 44000 }, { "epoch": 0.22, "learning_rate": 4.9889812874362754e-05, "loss": 2.7445, "step": 44500 }, { "epoch": 0.22, "learning_rate": 4.988857428793667e-05, "loss": 2.76, "step": 45000 }, { "epoch": 0.23, "learning_rate": 4.988733570151059e-05, "loss": 2.7375, "step": 45500 }, { "epoch": 0.23, "learning_rate": 4.98860971150845e-05, "loss": 2.7624, "step": 46000 }, { "epoch": 0.23, "learning_rate": 4.9884858528658415e-05, "loss": 2.7588, "step": 46500 }, { "epoch": 0.23, "learning_rate": 4.9883622419405184e-05, "loss": 2.7508, "step": 47000 }, { "epoch": 0.24, "learning_rate": 4.988238631015195e-05, "loss": 2.7562, "step": 47500 }, { "epoch": 0.24, "learning_rate": 4.988114772372587e-05, "loss": 2.7546, "step": 48000 }, { "epoch": 0.24, "learning_rate": 4.9879909137299787e-05, "loss": 2.7657, "step": 48500 }, { "epoch": 0.24, "learning_rate": 4.9878670550873703e-05, "loss": 2.741, "step": 49000 }, { "epoch": 0.25, "learning_rate": 4.9877434441620465e-05, "loss": 2.7813, "step": 49500 }, { "epoch": 0.25, "learning_rate": 4.987619585519438e-05, "loss": 2.7575, "step": 50000 }, { "epoch": 0.25, "learning_rate": 4.98749572687683e-05, "loss": 2.7419, "step": 50500 }, { "epoch": 0.25, "learning_rate": 4.9873718682342216e-05, "loss": 2.7422, "step": 51000 }, { "epoch": 0.26, "learning_rate": 4.987248009591613e-05, "loss": 2.7333, "step": 51500 }, { "epoch": 0.26, "learning_rate": 4.987124150949005e-05, "loss": 2.743, "step": 52000 }, { "epoch": 0.26, "learning_rate": 4.987000540023682e-05, "loss": 2.7217, "step": 52500 }, { "epoch": 0.26, "learning_rate": 4.9868766813810736e-05, "loss": 2.7355, "step": 53000 }, { "epoch": 0.27, "learning_rate": 4.986752822738465e-05, "loss": 2.7393, "step": 53500 }, { "epoch": 0.27, "learning_rate": 4.986628964095857e-05, "loss": 2.7279, "step": 54000 }, { "epoch": 0.27, "learning_rate": 4.986505105453249e-05, "loss": 2.7535, "step": 54500 }, { "epoch": 0.27, "learning_rate": 4.9863812468106404e-05, "loss": 2.7389, "step": 55000 }, { "epoch": 0.27, "learning_rate": 4.986257635885317e-05, "loss": 2.7488, "step": 55500 }, { "epoch": 0.28, "learning_rate": 4.986134024959994e-05, "loss": 2.7452, "step": 56000 }, { "epoch": 0.28, "learning_rate": 4.986010166317385e-05, "loss": 2.7503, "step": 56500 }, { "epoch": 0.28, "learning_rate": 4.985886307674777e-05, "loss": 2.7213, "step": 57000 }, { "epoch": 0.28, "learning_rate": 4.9857624490321685e-05, "loss": 2.7421, "step": 57500 }, { "epoch": 0.29, "learning_rate": 4.98563859038956e-05, "loss": 2.7295, "step": 58000 }, { "epoch": 0.29, "learning_rate": 4.985514731746952e-05, "loss": 2.7164, "step": 58500 }, { "epoch": 0.29, "learning_rate": 4.9853908731043436e-05, "loss": 2.7262, "step": 59000 }, { "epoch": 0.29, "learning_rate": 4.985267014461735e-05, "loss": 2.734, "step": 59500 }, { "epoch": 0.3, "learning_rate": 4.985143155819127e-05, "loss": 2.7186, "step": 60000 }, { "epoch": 0.3, "learning_rate": 4.985019297176519e-05, "loss": 2.739, "step": 60500 }, { "epoch": 0.3, "learning_rate": 4.9848954385339104e-05, "loss": 2.7356, "step": 61000 }, { "epoch": 0.3, "learning_rate": 4.984771579891302e-05, "loss": 2.7406, "step": 61500 }, { "epoch": 0.31, "learning_rate": 4.984647968965979e-05, "loss": 2.7167, "step": 62000 }, { "epoch": 0.31, "learning_rate": 4.9845241103233706e-05, "loss": 2.6958, "step": 62500 }, { "epoch": 0.31, "learning_rate": 4.984400499398047e-05, "loss": 2.7223, "step": 63000 }, { "epoch": 0.31, "learning_rate": 4.9842766407554385e-05, "loss": 2.7279, "step": 63500 }, { "epoch": 0.32, "learning_rate": 4.98415278211283e-05, "loss": 2.7186, "step": 64000 }, { "epoch": 0.32, "learning_rate": 4.984029171187508e-05, "loss": 2.7359, "step": 64500 }, { "epoch": 0.32, "learning_rate": 4.9839053125448995e-05, "loss": 2.7288, "step": 65000 }, { "epoch": 0.32, "learning_rate": 4.983781453902291e-05, "loss": 2.7157, "step": 65500 }, { "epoch": 0.33, "learning_rate": 4.983657595259682e-05, "loss": 2.7194, "step": 66000 }, { "epoch": 0.33, "learning_rate": 4.983533736617074e-05, "loss": 2.7106, "step": 66500 }, { "epoch": 0.33, "learning_rate": 4.9834098779744656e-05, "loss": 2.7313, "step": 67000 }, { "epoch": 0.33, "learning_rate": 4.983286019331857e-05, "loss": 2.7072, "step": 67500 }, { "epoch": 0.34, "learning_rate": 4.983162160689249e-05, "loss": 2.7392, "step": 68000 }, { "epoch": 0.34, "learning_rate": 4.9830383020466406e-05, "loss": 2.7083, "step": 68500 }, { "epoch": 0.34, "learning_rate": 4.982914691121317e-05, "loss": 2.7407, "step": 69000 }, { "epoch": 0.34, "learning_rate": 4.9827908324787085e-05, "loss": 2.7374, "step": 69500 }, { "epoch": 0.35, "learning_rate": 4.9826669738361e-05, "loss": 2.7151, "step": 70000 }, { "epoch": 0.35, "learning_rate": 4.982543115193492e-05, "loss": 2.7057, "step": 70500 }, { "epoch": 0.35, "learning_rate": 4.9824192565508836e-05, "loss": 2.7337, "step": 71000 }, { "epoch": 0.35, "learning_rate": 4.982295397908275e-05, "loss": 2.7017, "step": 71500 }, { "epoch": 0.36, "learning_rate": 4.982171786982952e-05, "loss": 2.7105, "step": 72000 }, { "epoch": 0.36, "learning_rate": 4.982047928340344e-05, "loss": 2.7399, "step": 72500 }, { "epoch": 0.36, "learning_rate": 4.981924565132306e-05, "loss": 2.707, "step": 73000 }, { "epoch": 0.36, "learning_rate": 4.981800954206983e-05, "loss": 2.7199, "step": 73500 }, { "epoch": 0.37, "learning_rate": 4.9816770955643745e-05, "loss": 2.7107, "step": 74000 }, { "epoch": 0.37, "learning_rate": 4.981553236921766e-05, "loss": 2.7136, "step": 74500 }, { "epoch": 0.37, "learning_rate": 4.981429378279158e-05, "loss": 2.711, "step": 75000 }, { "epoch": 0.37, "learning_rate": 4.9813055196365496e-05, "loss": 2.705, "step": 75500 }, { "epoch": 0.38, "learning_rate": 4.981181908711226e-05, "loss": 2.672, "step": 76000 }, { "epoch": 0.38, "learning_rate": 4.9810580500686175e-05, "loss": 2.7155, "step": 76500 }, { "epoch": 0.38, "learning_rate": 4.980934191426009e-05, "loss": 2.7097, "step": 77000 }, { "epoch": 0.38, "learning_rate": 4.980810332783401e-05, "loss": 2.7019, "step": 77500 }, { "epoch": 0.39, "learning_rate": 4.9806864741407926e-05, "loss": 2.7046, "step": 78000 }, { "epoch": 0.39, "learning_rate": 4.980562615498184e-05, "loss": 2.7231, "step": 78500 }, { "epoch": 0.39, "learning_rate": 4.980438756855576e-05, "loss": 2.7074, "step": 79000 }, { "epoch": 0.39, "learning_rate": 4.980314898212968e-05, "loss": 2.7098, "step": 79500 }, { "epoch": 0.4, "learning_rate": 4.9801910395703594e-05, "loss": 2.7095, "step": 80000 }, { "epoch": 0.4, "learning_rate": 4.980067180927751e-05, "loss": 2.6981, "step": 80500 }, { "epoch": 0.4, "learning_rate": 4.979943322285143e-05, "loss": 2.7144, "step": 81000 }, { "epoch": 0.4, "learning_rate": 4.9798194636425344e-05, "loss": 2.7018, "step": 81500 }, { "epoch": 0.41, "learning_rate": 4.979695604999926e-05, "loss": 2.7057, "step": 82000 }, { "epoch": 0.41, "learning_rate": 4.979571746357318e-05, "loss": 2.6865, "step": 82500 }, { "epoch": 0.41, "learning_rate": 4.9794478877147095e-05, "loss": 2.7394, "step": 83000 }, { "epoch": 0.41, "learning_rate": 4.979324029072101e-05, "loss": 2.7101, "step": 83500 }, { "epoch": 0.42, "learning_rate": 4.979200170429493e-05, "loss": 2.7016, "step": 84000 }, { "epoch": 0.42, "learning_rate": 4.979076311786884e-05, "loss": 2.6752, "step": 84500 }, { "epoch": 0.42, "learning_rate": 4.9789524531442756e-05, "loss": 2.7344, "step": 85000 }, { "epoch": 0.42, "learning_rate": 4.978828594501667e-05, "loss": 2.7173, "step": 85500 }, { "epoch": 0.43, "learning_rate": 4.978704735859059e-05, "loss": 2.7205, "step": 86000 }, { "epoch": 0.43, "learning_rate": 4.978581124933736e-05, "loss": 2.6739, "step": 86500 }, { "epoch": 0.43, "learning_rate": 4.9784572662911276e-05, "loss": 2.7142, "step": 87000 }, { "epoch": 0.43, "learning_rate": 4.978333407648519e-05, "loss": 2.6928, "step": 87500 }, { "epoch": 0.44, "learning_rate": 4.97820954900591e-05, "loss": 2.722, "step": 88000 }, { "epoch": 0.44, "learning_rate": 4.978085690363302e-05, "loss": 2.7314, "step": 88500 }, { "epoch": 0.44, "learning_rate": 4.9779620794379795e-05, "loss": 2.7161, "step": 89000 }, { "epoch": 0.44, "learning_rate": 4.977838220795371e-05, "loss": 2.7042, "step": 89500 }, { "epoch": 0.45, "learning_rate": 4.977714362152763e-05, "loss": 2.7171, "step": 90000 }, { "epoch": 0.45, "learning_rate": 4.9775905035101546e-05, "loss": 2.7061, "step": 90500 }, { "epoch": 0.45, "learning_rate": 4.977466892584831e-05, "loss": 2.6724, "step": 91000 }, { "epoch": 0.45, "learning_rate": 4.9773430339422225e-05, "loss": 2.6936, "step": 91500 }, { "epoch": 0.46, "learning_rate": 4.977219175299614e-05, "loss": 2.709, "step": 92000 }, { "epoch": 0.46, "learning_rate": 4.977095316657006e-05, "loss": 2.7078, "step": 92500 }, { "epoch": 0.46, "learning_rate": 4.976971705731683e-05, "loss": 2.6722, "step": 93000 }, { "epoch": 0.46, "learning_rate": 4.9768478470890745e-05, "loss": 2.6995, "step": 93500 }, { "epoch": 0.47, "learning_rate": 4.976723988446466e-05, "loss": 2.6995, "step": 94000 }, { "epoch": 0.47, "learning_rate": 4.976600129803858e-05, "loss": 2.6942, "step": 94500 }, { "epoch": 0.47, "learning_rate": 4.9764762711612495e-05, "loss": 2.69, "step": 95000 }, { "epoch": 0.47, "learning_rate": 4.9763526602359264e-05, "loss": 2.7196, "step": 95500 }, { "epoch": 0.48, "learning_rate": 4.976228801593318e-05, "loss": 2.6655, "step": 96000 }, { "epoch": 0.48, "learning_rate": 4.97610494295071e-05, "loss": 2.7035, "step": 96500 }, { "epoch": 0.48, "learning_rate": 4.9759810843081015e-05, "loss": 2.7225, "step": 97000 }, { "epoch": 0.48, "learning_rate": 4.9758572256654925e-05, "loss": 2.6971, "step": 97500 }, { "epoch": 0.49, "learning_rate": 4.975733367022884e-05, "loss": 2.7039, "step": 98000 }, { "epoch": 0.49, "learning_rate": 4.975609508380276e-05, "loss": 2.6814, "step": 98500 }, { "epoch": 0.49, "learning_rate": 4.9754856497376676e-05, "loss": 2.7068, "step": 99000 }, { "epoch": 0.49, "learning_rate": 4.975361791095059e-05, "loss": 2.6989, "step": 99500 }, { "epoch": 0.5, "learning_rate": 4.975237932452451e-05, "loss": 2.6904, "step": 100000 }, { "epoch": 0.5, "learning_rate": 4.975114073809842e-05, "loss": 2.6863, "step": 100500 }, { "epoch": 0.5, "learning_rate": 4.974990215167234e-05, "loss": 2.6785, "step": 101000 }, { "epoch": 0.5, "learning_rate": 4.974866604241911e-05, "loss": 2.6996, "step": 101500 }, { "epoch": 0.51, "learning_rate": 4.974742993316588e-05, "loss": 2.7096, "step": 102000 }, { "epoch": 0.51, "learning_rate": 4.97461913467398e-05, "loss": 2.6979, "step": 102500 }, { "epoch": 0.51, "learning_rate": 4.9744952760313715e-05, "loss": 2.6932, "step": 103000 }, { "epoch": 0.51, "learning_rate": 4.974371417388763e-05, "loss": 2.6986, "step": 103500 }, { "epoch": 0.52, "learning_rate": 4.974247558746155e-05, "loss": 2.6888, "step": 104000 }, { "epoch": 0.52, "learning_rate": 4.974123700103546e-05, "loss": 2.6854, "step": 104500 }, { "epoch": 0.52, "learning_rate": 4.974000089178223e-05, "loss": 2.697, "step": 105000 }, { "epoch": 0.52, "learning_rate": 4.9738762305356145e-05, "loss": 2.6931, "step": 105500 }, { "epoch": 0.53, "learning_rate": 4.973752371893006e-05, "loss": 2.7098, "step": 106000 }, { "epoch": 0.53, "learning_rate": 4.973628513250398e-05, "loss": 2.6989, "step": 106500 }, { "epoch": 0.53, "learning_rate": 4.9735046546077896e-05, "loss": 2.7059, "step": 107000 }, { "epoch": 0.53, "learning_rate": 4.9733810436824664e-05, "loss": 2.6612, "step": 107500 }, { "epoch": 0.54, "learning_rate": 4.973257185039858e-05, "loss": 2.6945, "step": 108000 }, { "epoch": 0.54, "learning_rate": 4.97313332639725e-05, "loss": 2.6951, "step": 108500 }, { "epoch": 0.54, "learning_rate": 4.9730094677546415e-05, "loss": 2.6871, "step": 109000 }, { "epoch": 0.54, "learning_rate": 4.972885609112033e-05, "loss": 2.6909, "step": 109500 }, { "epoch": 0.54, "learning_rate": 4.972761750469425e-05, "loss": 2.6948, "step": 110000 }, { "epoch": 0.55, "learning_rate": 4.972638139544101e-05, "loss": 2.7037, "step": 110500 }, { "epoch": 0.55, "learning_rate": 4.972514280901493e-05, "loss": 2.6804, "step": 111000 }, { "epoch": 0.55, "learning_rate": 4.9723904222588845e-05, "loss": 2.6895, "step": 111500 }, { "epoch": 0.55, "learning_rate": 4.972266563616276e-05, "loss": 2.6878, "step": 112000 }, { "epoch": 0.56, "learning_rate": 4.972142704973668e-05, "loss": 2.6916, "step": 112500 }, { "epoch": 0.56, "learning_rate": 4.9720188463310596e-05, "loss": 2.6843, "step": 113000 }, { "epoch": 0.56, "learning_rate": 4.9718952354057365e-05, "loss": 2.6903, "step": 113500 }, { "epoch": 0.56, "learning_rate": 4.971771376763128e-05, "loss": 2.6777, "step": 114000 }, { "epoch": 0.57, "learning_rate": 4.97164751812052e-05, "loss": 2.7029, "step": 114500 }, { "epoch": 0.57, "learning_rate": 4.9715236594779115e-05, "loss": 2.7026, "step": 115000 }, { "epoch": 0.57, "learning_rate": 4.971399800835303e-05, "loss": 2.6807, "step": 115500 }, { "epoch": 0.57, "learning_rate": 4.971275942192695e-05, "loss": 2.6868, "step": 116000 }, { "epoch": 0.58, "learning_rate": 4.9711520835500866e-05, "loss": 2.6976, "step": 116500 }, { "epoch": 0.58, "learning_rate": 4.971028472624763e-05, "loss": 2.6817, "step": 117000 }, { "epoch": 0.58, "learning_rate": 4.9709046139821545e-05, "loss": 2.6925, "step": 117500 }, { "epoch": 0.58, "learning_rate": 4.970780755339546e-05, "loss": 2.6972, "step": 118000 }, { "epoch": 0.59, "learning_rate": 4.970656896696938e-05, "loss": 2.6853, "step": 118500 }, { "epoch": 0.59, "learning_rate": 4.9705330380543296e-05, "loss": 2.6735, "step": 119000 }, { "epoch": 0.59, "learning_rate": 4.970409179411721e-05, "loss": 2.702, "step": 119500 }, { "epoch": 0.59, "learning_rate": 4.970285320769113e-05, "loss": 2.6876, "step": 120000 }, { "epoch": 0.6, "learning_rate": 4.970161462126505e-05, "loss": 2.6829, "step": 120500 }, { "epoch": 0.6, "learning_rate": 4.9700378512011815e-05, "loss": 2.6596, "step": 121000 }, { "epoch": 0.6, "learning_rate": 4.969913992558573e-05, "loss": 2.6907, "step": 121500 }, { "epoch": 0.6, "learning_rate": 4.969790133915965e-05, "loss": 2.6968, "step": 122000 }, { "epoch": 0.61, "learning_rate": 4.9696662752733566e-05, "loss": 2.6804, "step": 122500 }, { "epoch": 0.61, "learning_rate": 4.969542664348033e-05, "loss": 2.6914, "step": 123000 }, { "epoch": 0.61, "learning_rate": 4.96941905342271e-05, "loss": 2.6812, "step": 123500 }, { "epoch": 0.61, "learning_rate": 4.9692951947801014e-05, "loss": 2.6607, "step": 124000 }, { "epoch": 0.62, "learning_rate": 4.969171336137493e-05, "loss": 2.686, "step": 124500 }, { "epoch": 0.62, "learning_rate": 4.969047477494885e-05, "loss": 2.6591, "step": 125000 }, { "epoch": 0.62, "learning_rate": 4.9689236188522765e-05, "loss": 2.6824, "step": 125500 }, { "epoch": 0.62, "learning_rate": 4.968799760209668e-05, "loss": 2.694, "step": 126000 }, { "epoch": 0.63, "learning_rate": 4.968676149284345e-05, "loss": 2.6881, "step": 126500 }, { "epoch": 0.63, "learning_rate": 4.968552290641737e-05, "loss": 2.7259, "step": 127000 }, { "epoch": 0.63, "learning_rate": 4.9684284319991284e-05, "loss": 2.68, "step": 127500 }, { "epoch": 0.63, "learning_rate": 4.9683045733565195e-05, "loss": 2.6683, "step": 128000 }, { "epoch": 0.64, "learning_rate": 4.968180714713911e-05, "loss": 2.6481, "step": 128500 }, { "epoch": 0.64, "learning_rate": 4.968056856071303e-05, "loss": 2.6635, "step": 129000 }, { "epoch": 0.64, "learning_rate": 4.96793324514598e-05, "loss": 2.6736, "step": 129500 }, { "epoch": 0.64, "learning_rate": 4.9678093865033714e-05, "loss": 2.7035, "step": 130000 }, { "epoch": 0.65, "learning_rate": 4.967685527860763e-05, "loss": 2.6893, "step": 130500 }, { "epoch": 0.65, "learning_rate": 4.967561669218155e-05, "loss": 2.6946, "step": 131000 }, { "epoch": 0.65, "learning_rate": 4.9674378105755465e-05, "loss": 2.6781, "step": 131500 }, { "epoch": 0.65, "learning_rate": 4.967313951932938e-05, "loss": 2.6633, "step": 132000 }, { "epoch": 0.66, "learning_rate": 4.96719009329033e-05, "loss": 2.6894, "step": 132500 }, { "epoch": 0.66, "learning_rate": 4.9670662346477216e-05, "loss": 2.6659, "step": 133000 }, { "epoch": 0.66, "learning_rate": 4.966942376005113e-05, "loss": 2.668, "step": 133500 }, { "epoch": 0.66, "learning_rate": 4.966818517362505e-05, "loss": 2.6745, "step": 134000 }, { "epoch": 0.67, "learning_rate": 4.9666946587198966e-05, "loss": 2.6407, "step": 134500 }, { "epoch": 0.67, "learning_rate": 4.966571047794573e-05, "loss": 2.6832, "step": 135000 }, { "epoch": 0.67, "learning_rate": 4.96644743686925e-05, "loss": 2.6761, "step": 135500 }, { "epoch": 0.67, "learning_rate": 4.9663235782266414e-05, "loss": 2.7023, "step": 136000 }, { "epoch": 0.68, "learning_rate": 4.966199719584033e-05, "loss": 2.6568, "step": 136500 }, { "epoch": 0.68, "learning_rate": 4.966075860941425e-05, "loss": 2.6889, "step": 137000 }, { "epoch": 0.68, "learning_rate": 4.9659520022988165e-05, "loss": 2.6811, "step": 137500 }, { "epoch": 0.68, "learning_rate": 4.965828391373494e-05, "loss": 2.6648, "step": 138000 }, { "epoch": 0.69, "learning_rate": 4.965704532730885e-05, "loss": 2.668, "step": 138500 }, { "epoch": 0.69, "learning_rate": 4.965580674088277e-05, "loss": 2.6594, "step": 139000 }, { "epoch": 0.69, "learning_rate": 4.9654568154456685e-05, "loss": 2.6537, "step": 139500 }, { "epoch": 0.69, "learning_rate": 4.96533295680306e-05, "loss": 2.7002, "step": 140000 }, { "epoch": 0.7, "learning_rate": 4.965209345877737e-05, "loss": 2.679, "step": 140500 }, { "epoch": 0.7, "learning_rate": 4.965085487235129e-05, "loss": 2.6813, "step": 141000 }, { "epoch": 0.7, "learning_rate": 4.9649618763098056e-05, "loss": 2.6977, "step": 141500 }, { "epoch": 0.7, "learning_rate": 4.964838017667197e-05, "loss": 2.6734, "step": 142000 }, { "epoch": 0.71, "learning_rate": 4.964714159024589e-05, "loss": 2.6827, "step": 142500 }, { "epoch": 0.71, "learning_rate": 4.964590300381981e-05, "loss": 2.6551, "step": 143000 }, { "epoch": 0.71, "learning_rate": 4.9644664417393724e-05, "loss": 2.6671, "step": 143500 }, { "epoch": 0.71, "learning_rate": 4.964342583096764e-05, "loss": 2.6576, "step": 144000 }, { "epoch": 0.72, "learning_rate": 4.96421897217144e-05, "loss": 2.6725, "step": 144500 }, { "epoch": 0.72, "learning_rate": 4.964095113528832e-05, "loss": 2.6767, "step": 145000 }, { "epoch": 0.72, "learning_rate": 4.963971254886224e-05, "loss": 2.6836, "step": 145500 }, { "epoch": 0.72, "learning_rate": 4.9638473962436154e-05, "loss": 2.6378, "step": 146000 }, { "epoch": 0.73, "learning_rate": 4.963723537601007e-05, "loss": 2.6721, "step": 146500 }, { "epoch": 0.73, "learning_rate": 4.963599678958399e-05, "loss": 2.7062, "step": 147000 }, { "epoch": 0.73, "learning_rate": 4.9634758203157904e-05, "loss": 2.6502, "step": 147500 }, { "epoch": 0.73, "learning_rate": 4.9633519616731815e-05, "loss": 2.6751, "step": 148000 }, { "epoch": 0.74, "learning_rate": 4.963228103030573e-05, "loss": 2.6675, "step": 148500 }, { "epoch": 0.74, "learning_rate": 4.963104244387965e-05, "loss": 2.6572, "step": 149000 }, { "epoch": 0.74, "learning_rate": 4.9629803857453565e-05, "loss": 2.6735, "step": 149500 }, { "epoch": 0.74, "learning_rate": 4.962856774820034e-05, "loss": 2.6621, "step": 150000 }, { "epoch": 0.75, "learning_rate": 4.962732916177426e-05, "loss": 2.6808, "step": 150500 }, { "epoch": 0.75, "learning_rate": 4.962609057534817e-05, "loss": 2.6627, "step": 151000 }, { "epoch": 0.75, "learning_rate": 4.9624851988922085e-05, "loss": 2.6605, "step": 151500 }, { "epoch": 0.75, "learning_rate": 4.9623613402496e-05, "loss": 2.6853, "step": 152000 }, { "epoch": 0.76, "learning_rate": 4.962237481606992e-05, "loss": 2.6536, "step": 152500 }, { "epoch": 0.76, "learning_rate": 4.9621136229643836e-05, "loss": 2.6741, "step": 153000 }, { "epoch": 0.76, "learning_rate": 4.961989764321775e-05, "loss": 2.6637, "step": 153500 }, { "epoch": 0.76, "learning_rate": 4.9618666488310225e-05, "loss": 2.65, "step": 154000 }, { "epoch": 0.77, "learning_rate": 4.961742790188414e-05, "loss": 2.6598, "step": 154500 }, { "epoch": 0.77, "learning_rate": 4.961618931545806e-05, "loss": 2.6615, "step": 155000 }, { "epoch": 0.77, "learning_rate": 4.9614950729031976e-05, "loss": 2.6592, "step": 155500 }, { "epoch": 0.77, "learning_rate": 4.9613712142605886e-05, "loss": 2.6707, "step": 156000 }, { "epoch": 0.78, "learning_rate": 4.96124735561798e-05, "loss": 2.6291, "step": 156500 }, { "epoch": 0.78, "learning_rate": 4.961123496975372e-05, "loss": 2.6811, "step": 157000 }, { "epoch": 0.78, "learning_rate": 4.960999638332764e-05, "loss": 2.6534, "step": 157500 }, { "epoch": 0.78, "learning_rate": 4.9608757796901554e-05, "loss": 2.6623, "step": 158000 }, { "epoch": 0.79, "learning_rate": 4.960751921047547e-05, "loss": 2.6498, "step": 158500 }, { "epoch": 0.79, "learning_rate": 4.960628062404939e-05, "loss": 2.6617, "step": 159000 }, { "epoch": 0.79, "learning_rate": 4.9605042037623305e-05, "loss": 2.6545, "step": 159500 }, { "epoch": 0.79, "learning_rate": 4.960380345119722e-05, "loss": 2.6516, "step": 160000 }, { "epoch": 0.8, "learning_rate": 4.960256734194399e-05, "loss": 2.6722, "step": 160500 }, { "epoch": 0.8, "learning_rate": 4.960132875551791e-05, "loss": 2.6724, "step": 161000 }, { "epoch": 0.8, "learning_rate": 4.9600090169091824e-05, "loss": 2.6304, "step": 161500 }, { "epoch": 0.8, "learning_rate": 4.959885158266574e-05, "loss": 2.6624, "step": 162000 }, { "epoch": 0.81, "learning_rate": 4.959761299623966e-05, "loss": 2.6667, "step": 162500 }, { "epoch": 0.81, "learning_rate": 4.9596374409813575e-05, "loss": 2.6568, "step": 163000 }, { "epoch": 0.81, "learning_rate": 4.959513830056034e-05, "loss": 2.6541, "step": 163500 }, { "epoch": 0.81, "learning_rate": 4.9593899714134254e-05, "loss": 2.6696, "step": 164000 }, { "epoch": 0.81, "learning_rate": 4.959266112770817e-05, "loss": 2.6907, "step": 164500 }, { "epoch": 0.82, "learning_rate": 4.959142254128209e-05, "loss": 2.656, "step": 165000 }, { "epoch": 0.82, "learning_rate": 4.9590183954856005e-05, "loss": 2.6893, "step": 165500 }, { "epoch": 0.82, "learning_rate": 4.9588950322775625e-05, "loss": 2.6747, "step": 166000 }, { "epoch": 0.82, "learning_rate": 4.958771173634954e-05, "loss": 2.6528, "step": 166500 }, { "epoch": 0.83, "learning_rate": 4.958647314992346e-05, "loss": 2.6685, "step": 167000 }, { "epoch": 0.83, "learning_rate": 4.9585234563497376e-05, "loss": 2.6476, "step": 167500 }, { "epoch": 0.83, "learning_rate": 4.958399597707129e-05, "loss": 2.66, "step": 168000 }, { "epoch": 0.83, "learning_rate": 4.958275739064521e-05, "loss": 2.6539, "step": 168500 }, { "epoch": 0.84, "learning_rate": 4.958151880421913e-05, "loss": 2.6832, "step": 169000 }, { "epoch": 0.84, "learning_rate": 4.958028021779304e-05, "loss": 2.6409, "step": 169500 }, { "epoch": 0.84, "learning_rate": 4.9579044108539806e-05, "loss": 2.6694, "step": 170000 }, { "epoch": 0.84, "learning_rate": 4.957780552211372e-05, "loss": 2.6599, "step": 170500 }, { "epoch": 0.85, "learning_rate": 4.957656693568764e-05, "loss": 2.6693, "step": 171000 }, { "epoch": 0.85, "learning_rate": 4.957532834926156e-05, "loss": 2.6476, "step": 171500 }, { "epoch": 0.85, "learning_rate": 4.9574089762835474e-05, "loss": 2.6592, "step": 172000 }, { "epoch": 0.85, "learning_rate": 4.957285117640939e-05, "loss": 2.6631, "step": 172500 }, { "epoch": 0.86, "learning_rate": 4.957161258998331e-05, "loss": 2.6674, "step": 173000 }, { "epoch": 0.86, "learning_rate": 4.9570374003557224e-05, "loss": 2.6603, "step": 173500 }, { "epoch": 0.86, "learning_rate": 4.956913541713114e-05, "loss": 2.6461, "step": 174000 }, { "epoch": 0.86, "learning_rate": 4.956789930787791e-05, "loss": 2.6766, "step": 174500 }, { "epoch": 0.87, "learning_rate": 4.956666072145183e-05, "loss": 2.6398, "step": 175000 }, { "epoch": 0.87, "learning_rate": 4.956542461219859e-05, "loss": 2.6699, "step": 175500 }, { "epoch": 0.87, "learning_rate": 4.9564186025772506e-05, "loss": 2.6702, "step": 176000 }, { "epoch": 0.87, "learning_rate": 4.956294743934642e-05, "loss": 2.6357, "step": 176500 }, { "epoch": 0.88, "learning_rate": 4.956170885292034e-05, "loss": 2.6474, "step": 177000 }, { "epoch": 0.88, "learning_rate": 4.956047026649426e-05, "loss": 2.6496, "step": 177500 }, { "epoch": 0.88, "learning_rate": 4.9559231680068174e-05, "loss": 2.664, "step": 178000 }, { "epoch": 0.88, "learning_rate": 4.955799557081494e-05, "loss": 2.6964, "step": 178500 }, { "epoch": 0.89, "learning_rate": 4.955675698438886e-05, "loss": 2.6707, "step": 179000 }, { "epoch": 0.89, "learning_rate": 4.9555518397962776e-05, "loss": 2.654, "step": 179500 }, { "epoch": 0.89, "learning_rate": 4.955427981153669e-05, "loss": 2.6789, "step": 180000 }, { "epoch": 0.89, "learning_rate": 4.9553043702283455e-05, "loss": 2.6482, "step": 180500 }, { "epoch": 0.9, "learning_rate": 4.955180511585737e-05, "loss": 2.6471, "step": 181000 }, { "epoch": 0.9, "learning_rate": 4.955056652943129e-05, "loss": 2.6439, "step": 181500 }, { "epoch": 0.9, "learning_rate": 4.9549327943005206e-05, "loss": 2.6462, "step": 182000 }, { "epoch": 0.9, "learning_rate": 4.954808935657912e-05, "loss": 2.6688, "step": 182500 }, { "epoch": 0.91, "learning_rate": 4.954685077015304e-05, "loss": 2.6614, "step": 183000 }, { "epoch": 0.91, "learning_rate": 4.954561218372696e-05, "loss": 2.673, "step": 183500 }, { "epoch": 0.91, "learning_rate": 4.9544373597300874e-05, "loss": 2.6636, "step": 184000 }, { "epoch": 0.91, "learning_rate": 4.954313748804764e-05, "loss": 2.6533, "step": 184500 }, { "epoch": 0.92, "learning_rate": 4.954189890162156e-05, "loss": 2.6585, "step": 185000 }, { "epoch": 0.92, "learning_rate": 4.9540660315195477e-05, "loss": 2.6397, "step": 185500 }, { "epoch": 0.92, "learning_rate": 4.9539421728769394e-05, "loss": 2.6475, "step": 186000 }, { "epoch": 0.92, "learning_rate": 4.953818314234331e-05, "loss": 2.658, "step": 186500 }, { "epoch": 0.93, "learning_rate": 4.953694455591723e-05, "loss": 2.6754, "step": 187000 }, { "epoch": 0.93, "learning_rate": 4.9535705969491144e-05, "loss": 2.6488, "step": 187500 }, { "epoch": 0.93, "learning_rate": 4.953446738306506e-05, "loss": 2.6816, "step": 188000 }, { "epoch": 0.93, "learning_rate": 4.953322879663898e-05, "loss": 2.6558, "step": 188500 }, { "epoch": 0.94, "learning_rate": 4.953199268738574e-05, "loss": 2.6288, "step": 189000 }, { "epoch": 0.94, "learning_rate": 4.953075410095966e-05, "loss": 2.6406, "step": 189500 }, { "epoch": 0.94, "learning_rate": 4.9529517991706426e-05, "loss": 2.6695, "step": 190000 }, { "epoch": 0.94, "learning_rate": 4.952827940528034e-05, "loss": 2.6565, "step": 190500 }, { "epoch": 0.95, "learning_rate": 4.952704081885426e-05, "loss": 2.6577, "step": 191000 }, { "epoch": 0.95, "learning_rate": 4.952580470960103e-05, "loss": 2.6499, "step": 191500 }, { "epoch": 0.95, "learning_rate": 4.95245686003478e-05, "loss": 2.6423, "step": 192000 }, { "epoch": 0.95, "learning_rate": 4.9523330013921714e-05, "loss": 2.6735, "step": 192500 }, { "epoch": 0.96, "learning_rate": 4.952209142749563e-05, "loss": 2.6441, "step": 193000 }, { "epoch": 0.96, "learning_rate": 4.952085284106955e-05, "loss": 2.6782, "step": 193500 }, { "epoch": 0.96, "learning_rate": 4.9519614254643465e-05, "loss": 2.6542, "step": 194000 }, { "epoch": 0.96, "learning_rate": 4.951837566821738e-05, "loss": 2.6564, "step": 194500 }, { "epoch": 0.97, "learning_rate": 4.95171370817913e-05, "loss": 2.6682, "step": 195000 }, { "epoch": 0.97, "learning_rate": 4.951589849536521e-05, "loss": 2.6803, "step": 195500 }, { "epoch": 0.97, "learning_rate": 4.9514659908939126e-05, "loss": 2.6768, "step": 196000 }, { "epoch": 0.97, "learning_rate": 4.951342132251304e-05, "loss": 2.652, "step": 196500 }, { "epoch": 0.98, "learning_rate": 4.951218273608696e-05, "loss": 2.6467, "step": 197000 }, { "epoch": 0.98, "learning_rate": 4.951094414966088e-05, "loss": 2.631, "step": 197500 }, { "epoch": 0.98, "learning_rate": 4.9509705563234794e-05, "loss": 2.6418, "step": 198000 }, { "epoch": 0.98, "learning_rate": 4.950846697680871e-05, "loss": 2.6617, "step": 198500 }, { "epoch": 0.99, "learning_rate": 4.950722839038263e-05, "loss": 2.6553, "step": 199000 }, { "epoch": 0.99, "learning_rate": 4.9505989803956545e-05, "loss": 2.6369, "step": 199500 }, { "epoch": 0.99, "learning_rate": 4.9504753694703307e-05, "loss": 2.6282, "step": 200000 }, { "epoch": 0.99, "learning_rate": 4.9503515108277224e-05, "loss": 2.65, "step": 200500 }, { "epoch": 1.0, "learning_rate": 4.950227652185114e-05, "loss": 2.6651, "step": 201000 }, { "epoch": 1.0, "learning_rate": 4.950103793542506e-05, "loss": 2.6763, "step": 201500 }, { "epoch": 1.0, "eval_accuracy": 0.6236883143982033, "eval_accuracy_mlm": 0.5761070598072291, "eval_accuracy_nsp": 0.8479990900497727, "eval_loss": 2.567748546600342, "eval_runtime": 145.8951, "eval_samples_per_second": 1747.55, "eval_steps_per_second": 72.819, "step": 201843 }, { "epoch": 1.0, "learning_rate": 4.9499801826171826e-05, "loss": 2.6422, "step": 202000 }, { "epoch": 1.0, "learning_rate": 4.949856323974574e-05, "loss": 2.6314, "step": 202500 }, { "epoch": 1.01, "learning_rate": 4.949732465331966e-05, "loss": 2.6069, "step": 203000 }, { "epoch": 1.01, "learning_rate": 4.949608606689358e-05, "loss": 2.621, "step": 203500 }, { "epoch": 1.01, "learning_rate": 4.9494847480467494e-05, "loss": 2.6315, "step": 204000 }, { "epoch": 1.01, "learning_rate": 4.949361137121426e-05, "loss": 2.6192, "step": 204500 }, { "epoch": 1.02, "learning_rate": 4.949237278478818e-05, "loss": 2.6241, "step": 205000 }, { "epoch": 1.02, "learning_rate": 4.949113667553495e-05, "loss": 2.6318, "step": 205500 }, { "epoch": 1.02, "learning_rate": 4.9489898089108865e-05, "loss": 2.6222, "step": 206000 }, { "epoch": 1.02, "learning_rate": 4.948865950268278e-05, "loss": 2.6478, "step": 206500 }, { "epoch": 1.03, "learning_rate": 4.94874209162567e-05, "loss": 2.6204, "step": 207000 }, { "epoch": 1.03, "learning_rate": 4.9486182329830616e-05, "loss": 2.6316, "step": 207500 }, { "epoch": 1.03, "learning_rate": 4.9484946220577385e-05, "loss": 2.6242, "step": 208000 }, { "epoch": 1.03, "learning_rate": 4.94837076341513e-05, "loss": 2.5915, "step": 208500 }, { "epoch": 1.04, "learning_rate": 4.948246904772522e-05, "loss": 2.6172, "step": 209000 }, { "epoch": 1.04, "learning_rate": 4.9481230461299136e-05, "loss": 2.6155, "step": 209500 }, { "epoch": 1.04, "learning_rate": 4.947999187487305e-05, "loss": 2.6064, "step": 210000 }, { "epoch": 1.04, "learning_rate": 4.947875328844697e-05, "loss": 2.6333, "step": 210500 }, { "epoch": 1.05, "learning_rate": 4.947751470202088e-05, "loss": 2.6295, "step": 211000 }, { "epoch": 1.05, "learning_rate": 4.94762761155948e-05, "loss": 2.6262, "step": 211500 }, { "epoch": 1.05, "learning_rate": 4.9475037529168714e-05, "loss": 2.598, "step": 212000 }, { "epoch": 1.05, "learning_rate": 4.947379894274263e-05, "loss": 2.6367, "step": 212500 }, { "epoch": 1.06, "learning_rate": 4.947256035631655e-05, "loss": 2.6183, "step": 213000 }, { "epoch": 1.06, "learning_rate": 4.947132176989046e-05, "loss": 2.6535, "step": 213500 }, { "epoch": 1.06, "learning_rate": 4.9470083183464375e-05, "loss": 2.6075, "step": 214000 }, { "epoch": 1.06, "learning_rate": 4.946884707421114e-05, "loss": 2.5924, "step": 214500 }, { "epoch": 1.07, "learning_rate": 4.946760848778506e-05, "loss": 2.6268, "step": 215000 }, { "epoch": 1.07, "learning_rate": 4.946636990135898e-05, "loss": 2.6207, "step": 215500 }, { "epoch": 1.07, "learning_rate": 4.9465131314932894e-05, "loss": 2.5849, "step": 216000 }, { "epoch": 1.07, "learning_rate": 4.946389520567967e-05, "loss": 2.6141, "step": 216500 }, { "epoch": 1.08, "learning_rate": 4.946265661925359e-05, "loss": 2.6114, "step": 217000 }, { "epoch": 1.08, "learning_rate": 4.94614180328275e-05, "loss": 2.6018, "step": 217500 }, { "epoch": 1.08, "learning_rate": 4.9460179446401414e-05, "loss": 2.6197, "step": 218000 }, { "epoch": 1.08, "learning_rate": 4.945894085997533e-05, "loss": 2.6252, "step": 218500 }, { "epoch": 1.09, "learning_rate": 4.945770227354925e-05, "loss": 2.6027, "step": 219000 }, { "epoch": 1.09, "learning_rate": 4.9456463687123165e-05, "loss": 2.6315, "step": 219500 }, { "epoch": 1.09, "learning_rate": 4.9455225100697075e-05, "loss": 2.6094, "step": 220000 }, { "epoch": 1.09, "learning_rate": 4.9453988991443843e-05, "loss": 2.6246, "step": 220500 }, { "epoch": 1.09, "learning_rate": 4.945275040501776e-05, "loss": 2.6303, "step": 221000 }, { "epoch": 1.1, "learning_rate": 4.945151181859168e-05, "loss": 2.6081, "step": 221500 }, { "epoch": 1.1, "learning_rate": 4.9450273232165594e-05, "loss": 2.6129, "step": 222000 }, { "epoch": 1.1, "learning_rate": 4.944903464573951e-05, "loss": 2.6229, "step": 222500 }, { "epoch": 1.1, "learning_rate": 4.944780101365913e-05, "loss": 2.594, "step": 223000 }, { "epoch": 1.11, "learning_rate": 4.944656242723305e-05, "loss": 2.621, "step": 223500 }, { "epoch": 1.11, "learning_rate": 4.9445323840806966e-05, "loss": 2.6097, "step": 224000 }, { "epoch": 1.11, "learning_rate": 4.944408525438088e-05, "loss": 2.6152, "step": 224500 }, { "epoch": 1.11, "learning_rate": 4.94428466679548e-05, "loss": 2.6206, "step": 225000 }, { "epoch": 1.12, "learning_rate": 4.944161055870157e-05, "loss": 2.6391, "step": 225500 }, { "epoch": 1.12, "learning_rate": 4.9440371972275485e-05, "loss": 2.6487, "step": 226000 }, { "epoch": 1.12, "learning_rate": 4.94391333858494e-05, "loss": 2.5981, "step": 226500 }, { "epoch": 1.12, "learning_rate": 4.943789479942332e-05, "loss": 2.6175, "step": 227000 }, { "epoch": 1.13, "learning_rate": 4.9436656212997236e-05, "loss": 2.6332, "step": 227500 }, { "epoch": 1.13, "learning_rate": 4.9435420103744e-05, "loss": 2.6234, "step": 228000 }, { "epoch": 1.13, "learning_rate": 4.9434181517317915e-05, "loss": 2.6201, "step": 228500 }, { "epoch": 1.13, "learning_rate": 4.943294293089183e-05, "loss": 2.6235, "step": 229000 }, { "epoch": 1.14, "learning_rate": 4.943170434446575e-05, "loss": 2.6339, "step": 229500 }, { "epoch": 1.14, "learning_rate": 4.9430465758039666e-05, "loss": 2.5977, "step": 230000 }, { "epoch": 1.14, "learning_rate": 4.942922717161358e-05, "loss": 2.612, "step": 230500 }, { "epoch": 1.14, "learning_rate": 4.942799106236035e-05, "loss": 2.6263, "step": 231000 }, { "epoch": 1.15, "learning_rate": 4.942675247593427e-05, "loss": 2.6239, "step": 231500 }, { "epoch": 1.15, "learning_rate": 4.9425513889508185e-05, "loss": 2.6085, "step": 232000 }, { "epoch": 1.15, "learning_rate": 4.94242753030821e-05, "loss": 2.6082, "step": 232500 }, { "epoch": 1.15, "learning_rate": 4.942303671665602e-05, "loss": 2.6257, "step": 233000 }, { "epoch": 1.16, "learning_rate": 4.9421798130229936e-05, "loss": 2.6365, "step": 233500 }, { "epoch": 1.16, "learning_rate": 4.942055954380385e-05, "loss": 2.6226, "step": 234000 }, { "epoch": 1.16, "learning_rate": 4.9419323434550615e-05, "loss": 2.6333, "step": 234500 }, { "epoch": 1.16, "learning_rate": 4.941808484812453e-05, "loss": 2.6214, "step": 235000 }, { "epoch": 1.17, "learning_rate": 4.941684626169845e-05, "loss": 2.614, "step": 235500 }, { "epoch": 1.17, "learning_rate": 4.941561015244522e-05, "loss": 2.5987, "step": 236000 }, { "epoch": 1.17, "learning_rate": 4.9414371566019135e-05, "loss": 2.6155, "step": 236500 }, { "epoch": 1.17, "learning_rate": 4.941313297959305e-05, "loss": 2.6424, "step": 237000 }, { "epoch": 1.18, "learning_rate": 4.941189439316697e-05, "loss": 2.6057, "step": 237500 }, { "epoch": 1.18, "learning_rate": 4.9410655806740886e-05, "loss": 2.6237, "step": 238000 }, { "epoch": 1.18, "learning_rate": 4.94094172203148e-05, "loss": 2.6184, "step": 238500 }, { "epoch": 1.18, "learning_rate": 4.940818111106157e-05, "loss": 2.6616, "step": 239000 }, { "epoch": 1.19, "learning_rate": 4.940694252463549e-05, "loss": 2.6191, "step": 239500 }, { "epoch": 1.19, "learning_rate": 4.9405703938209405e-05, "loss": 2.5849, "step": 240000 }, { "epoch": 1.19, "learning_rate": 4.940446535178332e-05, "loss": 2.6178, "step": 240500 }, { "epoch": 1.19, "learning_rate": 4.940322676535723e-05, "loss": 2.6466, "step": 241000 }, { "epoch": 1.2, "learning_rate": 4.940198817893115e-05, "loss": 2.6025, "step": 241500 }, { "epoch": 1.2, "learning_rate": 4.940075206967792e-05, "loss": 2.5923, "step": 242000 }, { "epoch": 1.2, "learning_rate": 4.9399513483251835e-05, "loss": 2.6381, "step": 242500 }, { "epoch": 1.2, "learning_rate": 4.939827489682575e-05, "loss": 2.5819, "step": 243000 }, { "epoch": 1.21, "learning_rate": 4.939703631039967e-05, "loss": 2.6115, "step": 243500 }, { "epoch": 1.21, "learning_rate": 4.9395797723973586e-05, "loss": 2.6289, "step": 244000 }, { "epoch": 1.21, "learning_rate": 4.93945591375475e-05, "loss": 2.5913, "step": 244500 }, { "epoch": 1.21, "learning_rate": 4.939332302829427e-05, "loss": 2.6194, "step": 245000 }, { "epoch": 1.22, "learning_rate": 4.939208444186819e-05, "loss": 2.6232, "step": 245500 }, { "epoch": 1.22, "learning_rate": 4.9390845855442105e-05, "loss": 2.6176, "step": 246000 }, { "epoch": 1.22, "learning_rate": 4.938960726901602e-05, "loss": 2.6158, "step": 246500 }, { "epoch": 1.22, "learning_rate": 4.938836868258994e-05, "loss": 2.6155, "step": 247000 }, { "epoch": 1.23, "learning_rate": 4.9387130096163856e-05, "loss": 2.6407, "step": 247500 }, { "epoch": 1.23, "learning_rate": 4.9385891509737766e-05, "loss": 2.6334, "step": 248000 }, { "epoch": 1.23, "learning_rate": 4.938465292331168e-05, "loss": 2.62, "step": 248500 }, { "epoch": 1.23, "learning_rate": 4.93834143368856e-05, "loss": 2.6266, "step": 249000 }, { "epoch": 1.24, "learning_rate": 4.938217575045952e-05, "loss": 2.612, "step": 249500 }, { "epoch": 1.24, "learning_rate": 4.9380937164033434e-05, "loss": 2.6358, "step": 250000 }, { "epoch": 1.24, "learning_rate": 4.937969857760735e-05, "loss": 2.6289, "step": 250500 }, { "epoch": 1.24, "learning_rate": 4.937846246835412e-05, "loss": 2.6102, "step": 251000 }, { "epoch": 1.25, "learning_rate": 4.9377223881928037e-05, "loss": 2.6004, "step": 251500 }, { "epoch": 1.25, "learning_rate": 4.9375985295501954e-05, "loss": 2.607, "step": 252000 }, { "epoch": 1.25, "learning_rate": 4.937475166342157e-05, "loss": 2.6224, "step": 252500 }, { "epoch": 1.25, "learning_rate": 4.9373513076995484e-05, "loss": 2.6319, "step": 253000 }, { "epoch": 1.26, "learning_rate": 4.93722744905694e-05, "loss": 2.6201, "step": 253500 }, { "epoch": 1.26, "learning_rate": 4.937103590414332e-05, "loss": 2.605, "step": 254000 }, { "epoch": 1.26, "learning_rate": 4.9369797317717235e-05, "loss": 2.6106, "step": 254500 }, { "epoch": 1.26, "learning_rate": 4.936855873129115e-05, "loss": 2.5989, "step": 255000 }, { "epoch": 1.27, "learning_rate": 4.936732014486507e-05, "loss": 2.6324, "step": 255500 }, { "epoch": 1.27, "learning_rate": 4.9366081558438986e-05, "loss": 2.615, "step": 256000 }, { "epoch": 1.27, "learning_rate": 4.9364845449185755e-05, "loss": 2.6136, "step": 256500 }, { "epoch": 1.27, "learning_rate": 4.936360686275967e-05, "loss": 2.6346, "step": 257000 }, { "epoch": 1.28, "learning_rate": 4.936236827633359e-05, "loss": 2.6109, "step": 257500 }, { "epoch": 1.28, "learning_rate": 4.9361129689907506e-05, "loss": 2.629, "step": 258000 }, { "epoch": 1.28, "learning_rate": 4.935989110348142e-05, "loss": 2.6382, "step": 258500 }, { "epoch": 1.28, "learning_rate": 4.935865251705534e-05, "loss": 2.652, "step": 259000 }, { "epoch": 1.29, "learning_rate": 4.9357413930629256e-05, "loss": 2.5949, "step": 259500 }, { "epoch": 1.29, "learning_rate": 4.935617534420317e-05, "loss": 2.6228, "step": 260000 }, { "epoch": 1.29, "learning_rate": 4.935493675777709e-05, "loss": 2.5946, "step": 260500 }, { "epoch": 1.29, "learning_rate": 4.935369817135101e-05, "loss": 2.6195, "step": 261000 }, { "epoch": 1.3, "learning_rate": 4.935245958492492e-05, "loss": 2.6184, "step": 261500 }, { "epoch": 1.3, "learning_rate": 4.9351223475671686e-05, "loss": 2.6138, "step": 262000 }, { "epoch": 1.3, "learning_rate": 4.93499848892456e-05, "loss": 2.5975, "step": 262500 }, { "epoch": 1.3, "learning_rate": 4.934874630281952e-05, "loss": 2.6182, "step": 263000 }, { "epoch": 1.31, "learning_rate": 4.934750771639344e-05, "loss": 2.6013, "step": 263500 }, { "epoch": 1.31, "learning_rate": 4.9346269129967354e-05, "loss": 2.6424, "step": 264000 }, { "epoch": 1.31, "learning_rate": 4.934503302071412e-05, "loss": 2.6397, "step": 264500 }, { "epoch": 1.31, "learning_rate": 4.934379443428804e-05, "loss": 2.6183, "step": 265000 }, { "epoch": 1.32, "learning_rate": 4.9342555847861956e-05, "loss": 2.6236, "step": 265500 }, { "epoch": 1.32, "learning_rate": 4.934131726143587e-05, "loss": 2.6169, "step": 266000 }, { "epoch": 1.32, "learning_rate": 4.934007867500979e-05, "loss": 2.6104, "step": 266500 }, { "epoch": 1.32, "learning_rate": 4.933884008858371e-05, "loss": 2.6056, "step": 267000 }, { "epoch": 1.33, "learning_rate": 4.9337601502157624e-05, "loss": 2.6044, "step": 267500 }, { "epoch": 1.33, "learning_rate": 4.9336362915731534e-05, "loss": 2.5988, "step": 268000 }, { "epoch": 1.33, "learning_rate": 4.933512432930545e-05, "loss": 2.64, "step": 268500 }, { "epoch": 1.33, "learning_rate": 4.933388822005222e-05, "loss": 2.6459, "step": 269000 }, { "epoch": 1.34, "learning_rate": 4.933264963362614e-05, "loss": 2.6069, "step": 269500 }, { "epoch": 1.34, "learning_rate": 4.9331413524372906e-05, "loss": 2.6332, "step": 270000 }, { "epoch": 1.34, "learning_rate": 4.933017493794682e-05, "loss": 2.6211, "step": 270500 }, { "epoch": 1.34, "learning_rate": 4.932893635152074e-05, "loss": 2.637, "step": 271000 }, { "epoch": 1.35, "learning_rate": 4.9327697765094657e-05, "loss": 2.6399, "step": 271500 }, { "epoch": 1.35, "learning_rate": 4.9326459178668573e-05, "loss": 2.6234, "step": 272000 }, { "epoch": 1.35, "learning_rate": 4.9325223069415336e-05, "loss": 2.6221, "step": 272500 }, { "epoch": 1.35, "learning_rate": 4.932398448298925e-05, "loss": 2.6133, "step": 273000 }, { "epoch": 1.36, "learning_rate": 4.932274589656317e-05, "loss": 2.614, "step": 273500 }, { "epoch": 1.36, "learning_rate": 4.9321507310137086e-05, "loss": 2.6026, "step": 274000 }, { "epoch": 1.36, "learning_rate": 4.9320268723711e-05, "loss": 2.5999, "step": 274500 }, { "epoch": 1.36, "learning_rate": 4.931903013728492e-05, "loss": 2.6374, "step": 275000 }, { "epoch": 1.36, "learning_rate": 4.931779155085884e-05, "loss": 2.6182, "step": 275500 }, { "epoch": 1.37, "learning_rate": 4.9316552964432754e-05, "loss": 2.6048, "step": 276000 }, { "epoch": 1.37, "learning_rate": 4.931531685517952e-05, "loss": 2.6308, "step": 276500 }, { "epoch": 1.37, "learning_rate": 4.931408074592629e-05, "loss": 2.6062, "step": 277000 }, { "epoch": 1.37, "learning_rate": 4.931284215950021e-05, "loss": 2.6164, "step": 277500 }, { "epoch": 1.38, "learning_rate": 4.9311603573074126e-05, "loss": 2.6278, "step": 278000 }, { "epoch": 1.38, "learning_rate": 4.9310364986648036e-05, "loss": 2.6067, "step": 278500 }, { "epoch": 1.38, "learning_rate": 4.930912640022195e-05, "loss": 2.6246, "step": 279000 }, { "epoch": 1.38, "learning_rate": 4.930788781379587e-05, "loss": 2.6262, "step": 279500 }, { "epoch": 1.39, "learning_rate": 4.9306649227369786e-05, "loss": 2.6116, "step": 280000 }, { "epoch": 1.39, "learning_rate": 4.93054106409437e-05, "loss": 2.6325, "step": 280500 }, { "epoch": 1.39, "learning_rate": 4.930417453169047e-05, "loss": 2.5979, "step": 281000 }, { "epoch": 1.39, "learning_rate": 4.930293594526439e-05, "loss": 2.6106, "step": 281500 }, { "epoch": 1.4, "learning_rate": 4.9301699836011165e-05, "loss": 2.6094, "step": 282000 }, { "epoch": 1.4, "learning_rate": 4.9300461249585075e-05, "loss": 2.6065, "step": 282500 }, { "epoch": 1.4, "learning_rate": 4.929922266315899e-05, "loss": 2.5977, "step": 283000 }, { "epoch": 1.4, "learning_rate": 4.929798407673291e-05, "loss": 2.6309, "step": 283500 }, { "epoch": 1.41, "learning_rate": 4.9296745490306826e-05, "loss": 2.6134, "step": 284000 }, { "epoch": 1.41, "learning_rate": 4.929550690388074e-05, "loss": 2.6412, "step": 284500 }, { "epoch": 1.41, "learning_rate": 4.929426831745465e-05, "loss": 2.6118, "step": 285000 }, { "epoch": 1.41, "learning_rate": 4.929302973102857e-05, "loss": 2.5835, "step": 285500 }, { "epoch": 1.42, "learning_rate": 4.9291791144602487e-05, "loss": 2.5993, "step": 286000 }, { "epoch": 1.42, "learning_rate": 4.9290555035349255e-05, "loss": 2.614, "step": 286500 }, { "epoch": 1.42, "learning_rate": 4.928931892609603e-05, "loss": 2.5988, "step": 287000 }, { "epoch": 1.42, "learning_rate": 4.928808033966995e-05, "loss": 2.6119, "step": 287500 }, { "epoch": 1.43, "learning_rate": 4.9286841753243865e-05, "loss": 2.6121, "step": 288000 }, { "epoch": 1.43, "learning_rate": 4.928560316681778e-05, "loss": 2.6205, "step": 288500 }, { "epoch": 1.43, "learning_rate": 4.92843645803917e-05, "loss": 2.6136, "step": 289000 }, { "epoch": 1.43, "learning_rate": 4.928312847113846e-05, "loss": 2.6125, "step": 289500 }, { "epoch": 1.44, "learning_rate": 4.928188988471238e-05, "loss": 2.6199, "step": 290000 }, { "epoch": 1.44, "learning_rate": 4.9280651298286295e-05, "loss": 2.612, "step": 290500 }, { "epoch": 1.44, "learning_rate": 4.927941271186021e-05, "loss": 2.5957, "step": 291000 }, { "epoch": 1.44, "learning_rate": 4.927817412543413e-05, "loss": 2.5938, "step": 291500 }, { "epoch": 1.45, "learning_rate": 4.9276935539008045e-05, "loss": 2.6138, "step": 292000 }, { "epoch": 1.45, "learning_rate": 4.9275699429754814e-05, "loss": 2.6162, "step": 292500 }, { "epoch": 1.45, "learning_rate": 4.927446084332873e-05, "loss": 2.6214, "step": 293000 }, { "epoch": 1.45, "learning_rate": 4.927322225690265e-05, "loss": 2.6009, "step": 293500 }, { "epoch": 1.46, "learning_rate": 4.9271983670476565e-05, "loss": 2.6031, "step": 294000 }, { "epoch": 1.46, "learning_rate": 4.927074508405048e-05, "loss": 2.5915, "step": 294500 }, { "epoch": 1.46, "learning_rate": 4.92695064976244e-05, "loss": 2.6289, "step": 295000 }, { "epoch": 1.46, "learning_rate": 4.9268267911198316e-05, "loss": 2.6141, "step": 295500 }, { "epoch": 1.47, "learning_rate": 4.9267029324772226e-05, "loss": 2.6233, "step": 296000 }, { "epoch": 1.47, "learning_rate": 4.926579073834614e-05, "loss": 2.6347, "step": 296500 }, { "epoch": 1.47, "learning_rate": 4.926455215192006e-05, "loss": 2.619, "step": 297000 }, { "epoch": 1.47, "learning_rate": 4.926331604266683e-05, "loss": 2.5973, "step": 297500 }, { "epoch": 1.48, "learning_rate": 4.9262077456240745e-05, "loss": 2.5999, "step": 298000 }, { "epoch": 1.48, "learning_rate": 4.926083886981466e-05, "loss": 2.6199, "step": 298500 }, { "epoch": 1.48, "learning_rate": 4.925960028338857e-05, "loss": 2.6028, "step": 299000 }, { "epoch": 1.48, "learning_rate": 4.925836417413535e-05, "loss": 2.5981, "step": 299500 }, { "epoch": 1.49, "learning_rate": 4.925712806488211e-05, "loss": 2.6159, "step": 300000 }, { "epoch": 1.49, "learning_rate": 4.925588947845603e-05, "loss": 2.6048, "step": 300500 }, { "epoch": 1.49, "learning_rate": 4.9254650892029944e-05, "loss": 2.5982, "step": 301000 }, { "epoch": 1.49, "learning_rate": 4.925341230560386e-05, "loss": 2.6199, "step": 301500 }, { "epoch": 1.5, "learning_rate": 4.925217619635063e-05, "loss": 2.6351, "step": 302000 }, { "epoch": 1.5, "learning_rate": 4.925093760992455e-05, "loss": 2.6151, "step": 302500 }, { "epoch": 1.5, "learning_rate": 4.9249701500671315e-05, "loss": 2.5925, "step": 303000 }, { "epoch": 1.5, "learning_rate": 4.924846291424523e-05, "loss": 2.5977, "step": 303500 }, { "epoch": 1.51, "learning_rate": 4.924722432781915e-05, "loss": 2.6201, "step": 304000 }, { "epoch": 1.51, "learning_rate": 4.9245985741393066e-05, "loss": 2.6132, "step": 304500 }, { "epoch": 1.51, "learning_rate": 4.924474715496698e-05, "loss": 2.6258, "step": 305000 }, { "epoch": 1.51, "learning_rate": 4.92435085685409e-05, "loss": 2.6223, "step": 305500 }, { "epoch": 1.52, "learning_rate": 4.924226998211482e-05, "loss": 2.6127, "step": 306000 }, { "epoch": 1.52, "learning_rate": 4.924103139568873e-05, "loss": 2.6071, "step": 306500 }, { "epoch": 1.52, "learning_rate": 4.9239795286435496e-05, "loss": 2.6147, "step": 307000 }, { "epoch": 1.52, "learning_rate": 4.923855670000941e-05, "loss": 2.6054, "step": 307500 }, { "epoch": 1.53, "learning_rate": 4.923731811358333e-05, "loss": 2.597, "step": 308000 }, { "epoch": 1.53, "learning_rate": 4.9236082004330105e-05, "loss": 2.599, "step": 308500 }, { "epoch": 1.53, "learning_rate": 4.9234843417904016e-05, "loss": 2.5992, "step": 309000 }, { "epoch": 1.53, "learning_rate": 4.923360483147793e-05, "loss": 2.6026, "step": 309500 }, { "epoch": 1.54, "learning_rate": 4.923236624505185e-05, "loss": 2.5938, "step": 310000 }, { "epoch": 1.54, "learning_rate": 4.9231127658625766e-05, "loss": 2.6023, "step": 310500 }, { "epoch": 1.54, "learning_rate": 4.922988907219968e-05, "loss": 2.586, "step": 311000 }, { "epoch": 1.54, "learning_rate": 4.92286504857736e-05, "loss": 2.6111, "step": 311500 }, { "epoch": 1.55, "learning_rate": 4.922741189934752e-05, "loss": 2.6103, "step": 312000 }, { "epoch": 1.55, "learning_rate": 4.922617579009428e-05, "loss": 2.598, "step": 312500 }, { "epoch": 1.55, "learning_rate": 4.9224937203668196e-05, "loss": 2.5982, "step": 313000 }, { "epoch": 1.55, "learning_rate": 4.922369861724211e-05, "loss": 2.6226, "step": 313500 }, { "epoch": 1.56, "learning_rate": 4.922246250798889e-05, "loss": 2.6292, "step": 314000 }, { "epoch": 1.56, "learning_rate": 4.9221223921562806e-05, "loss": 2.6045, "step": 314500 }, { "epoch": 1.56, "learning_rate": 4.921998533513672e-05, "loss": 2.596, "step": 315000 }, { "epoch": 1.56, "learning_rate": 4.921874674871063e-05, "loss": 2.5956, "step": 315500 }, { "epoch": 1.57, "learning_rate": 4.921750816228455e-05, "loss": 2.5923, "step": 316000 }, { "epoch": 1.57, "learning_rate": 4.9216269575858467e-05, "loss": 2.5815, "step": 316500 }, { "epoch": 1.57, "learning_rate": 4.9215030989432383e-05, "loss": 2.6302, "step": 317000 }, { "epoch": 1.57, "learning_rate": 4.92137924030063e-05, "loss": 2.5967, "step": 317500 }, { "epoch": 1.58, "learning_rate": 4.921255381658022e-05, "loss": 2.611, "step": 318000 }, { "epoch": 1.58, "learning_rate": 4.9211315230154134e-05, "loss": 2.5912, "step": 318500 }, { "epoch": 1.58, "learning_rate": 4.921007664372805e-05, "loss": 2.6341, "step": 319000 }, { "epoch": 1.58, "learning_rate": 4.920883805730197e-05, "loss": 2.5869, "step": 319500 }, { "epoch": 1.59, "learning_rate": 4.920759947087588e-05, "loss": 2.648, "step": 320000 }, { "epoch": 1.59, "learning_rate": 4.9206360884449795e-05, "loss": 2.6136, "step": 320500 }, { "epoch": 1.59, "learning_rate": 4.9205124775196564e-05, "loss": 2.5998, "step": 321000 }, { "epoch": 1.59, "learning_rate": 4.920388618877048e-05, "loss": 2.5955, "step": 321500 }, { "epoch": 1.6, "learning_rate": 4.920265007951725e-05, "loss": 2.5852, "step": 322000 }, { "epoch": 1.6, "learning_rate": 4.920141149309117e-05, "loss": 2.6069, "step": 322500 }, { "epoch": 1.6, "learning_rate": 4.9200172906665084e-05, "loss": 2.5923, "step": 323000 }, { "epoch": 1.6, "learning_rate": 4.9198934320239e-05, "loss": 2.6257, "step": 323500 }, { "epoch": 1.61, "learning_rate": 4.919769573381292e-05, "loss": 2.6118, "step": 324000 }, { "epoch": 1.61, "learning_rate": 4.9196459624559686e-05, "loss": 2.6192, "step": 324500 }, { "epoch": 1.61, "learning_rate": 4.9195221038133596e-05, "loss": 2.6028, "step": 325000 }, { "epoch": 1.61, "learning_rate": 4.919398245170751e-05, "loss": 2.6064, "step": 325500 }, { "epoch": 1.62, "learning_rate": 4.919274386528143e-05, "loss": 2.6158, "step": 326000 }, { "epoch": 1.62, "learning_rate": 4.919150527885535e-05, "loss": 2.5847, "step": 326500 }, { "epoch": 1.62, "learning_rate": 4.9190266692429264e-05, "loss": 2.5866, "step": 327000 }, { "epoch": 1.62, "learning_rate": 4.918902810600318e-05, "loss": 2.6172, "step": 327500 }, { "epoch": 1.63, "learning_rate": 4.91877895195771e-05, "loss": 2.6314, "step": 328000 }, { "epoch": 1.63, "learning_rate": 4.918655341032387e-05, "loss": 2.6196, "step": 328500 }, { "epoch": 1.63, "learning_rate": 4.9185317301070636e-05, "loss": 2.6346, "step": 329000 }, { "epoch": 1.63, "learning_rate": 4.918407871464455e-05, "loss": 2.6118, "step": 329500 }, { "epoch": 1.63, "learning_rate": 4.918284012821847e-05, "loss": 2.5716, "step": 330000 }, { "epoch": 1.64, "learning_rate": 4.9181601541792386e-05, "loss": 2.626, "step": 330500 }, { "epoch": 1.64, "learning_rate": 4.9180362955366297e-05, "loss": 2.6039, "step": 331000 }, { "epoch": 1.64, "learning_rate": 4.9179124368940213e-05, "loss": 2.5957, "step": 331500 }, { "epoch": 1.64, "learning_rate": 4.917788825968699e-05, "loss": 2.6115, "step": 332000 }, { "epoch": 1.65, "learning_rate": 4.9176649673260906e-05, "loss": 2.6013, "step": 332500 }, { "epoch": 1.65, "learning_rate": 4.917541108683482e-05, "loss": 2.6131, "step": 333000 }, { "epoch": 1.65, "learning_rate": 4.917417250040874e-05, "loss": 2.5955, "step": 333500 }, { "epoch": 1.65, "learning_rate": 4.917293391398265e-05, "loss": 2.615, "step": 334000 }, { "epoch": 1.66, "learning_rate": 4.917169780472942e-05, "loss": 2.5883, "step": 334500 }, { "epoch": 1.66, "learning_rate": 4.9170459218303336e-05, "loss": 2.6009, "step": 335000 }, { "epoch": 1.66, "learning_rate": 4.916922063187725e-05, "loss": 2.6009, "step": 335500 }, { "epoch": 1.66, "learning_rate": 4.916798204545117e-05, "loss": 2.6285, "step": 336000 }, { "epoch": 1.67, "learning_rate": 4.916674593619794e-05, "loss": 2.6007, "step": 336500 }, { "epoch": 1.67, "learning_rate": 4.9165507349771855e-05, "loss": 2.5952, "step": 337000 }, { "epoch": 1.67, "learning_rate": 4.916426876334577e-05, "loss": 2.5738, "step": 337500 }, { "epoch": 1.67, "learning_rate": 4.916303017691969e-05, "loss": 2.6101, "step": 338000 }, { "epoch": 1.68, "learning_rate": 4.916179406766646e-05, "loss": 2.5811, "step": 338500 }, { "epoch": 1.68, "learning_rate": 4.9160555481240375e-05, "loss": 2.5833, "step": 339000 }, { "epoch": 1.68, "learning_rate": 4.915931689481429e-05, "loss": 2.6013, "step": 339500 }, { "epoch": 1.68, "learning_rate": 4.915807830838821e-05, "loss": 2.6163, "step": 340000 }, { "epoch": 1.69, "learning_rate": 4.9156839721962126e-05, "loss": 2.5969, "step": 340500 }, { "epoch": 1.69, "learning_rate": 4.9155601135536036e-05, "loss": 2.5872, "step": 341000 }, { "epoch": 1.69, "learning_rate": 4.9154365026282805e-05, "loss": 2.5693, "step": 341500 }, { "epoch": 1.69, "learning_rate": 4.915312643985672e-05, "loss": 2.5994, "step": 342000 }, { "epoch": 1.7, "learning_rate": 4.915188785343064e-05, "loss": 2.6187, "step": 342500 }, { "epoch": 1.7, "learning_rate": 4.9150649267004555e-05, "loss": 2.5982, "step": 343000 }, { "epoch": 1.7, "learning_rate": 4.914941068057847e-05, "loss": 2.5968, "step": 343500 }, { "epoch": 1.7, "learning_rate": 4.914817209415239e-05, "loss": 2.5756, "step": 344000 }, { "epoch": 1.71, "learning_rate": 4.9146933507726306e-05, "loss": 2.5962, "step": 344500 }, { "epoch": 1.71, "learning_rate": 4.914569492130022e-05, "loss": 2.6133, "step": 345000 }, { "epoch": 1.71, "learning_rate": 4.914445881204699e-05, "loss": 2.6052, "step": 345500 }, { "epoch": 1.71, "learning_rate": 4.914322022562091e-05, "loss": 2.6143, "step": 346000 }, { "epoch": 1.72, "learning_rate": 4.9141981639194826e-05, "loss": 2.6077, "step": 346500 }, { "epoch": 1.72, "learning_rate": 4.914074552994159e-05, "loss": 2.6198, "step": 347000 }, { "epoch": 1.72, "learning_rate": 4.9139506943515505e-05, "loss": 2.6435, "step": 347500 }, { "epoch": 1.72, "learning_rate": 4.913826835708942e-05, "loss": 2.6145, "step": 348000 }, { "epoch": 1.73, "learning_rate": 4.913702977066334e-05, "loss": 2.5931, "step": 348500 }, { "epoch": 1.73, "learning_rate": 4.9135791184237256e-05, "loss": 2.6031, "step": 349000 }, { "epoch": 1.73, "learning_rate": 4.913455259781117e-05, "loss": 2.599, "step": 349500 }, { "epoch": 1.73, "learning_rate": 4.913331401138509e-05, "loss": 2.6163, "step": 350000 }, { "epoch": 1.74, "learning_rate": 4.9132075424959006e-05, "loss": 2.6104, "step": 350500 }, { "epoch": 1.74, "learning_rate": 4.913083683853292e-05, "loss": 2.6251, "step": 351000 }, { "epoch": 1.74, "learning_rate": 4.912959825210684e-05, "loss": 2.6044, "step": 351500 }, { "epoch": 1.74, "learning_rate": 4.912835966568076e-05, "loss": 2.6131, "step": 352000 }, { "epoch": 1.75, "learning_rate": 4.9127121079254674e-05, "loss": 2.5796, "step": 352500 }, { "epoch": 1.75, "learning_rate": 4.9125882492828584e-05, "loss": 2.6012, "step": 353000 }, { "epoch": 1.75, "learning_rate": 4.912464638357536e-05, "loss": 2.6166, "step": 353500 }, { "epoch": 1.75, "learning_rate": 4.912340779714928e-05, "loss": 2.6058, "step": 354000 }, { "epoch": 1.76, "learning_rate": 4.912216921072319e-05, "loss": 2.6076, "step": 354500 }, { "epoch": 1.76, "learning_rate": 4.9120930624297104e-05, "loss": 2.5742, "step": 355000 }, { "epoch": 1.76, "learning_rate": 4.911969203787102e-05, "loss": 2.5899, "step": 355500 }, { "epoch": 1.76, "learning_rate": 4.911845592861779e-05, "loss": 2.6106, "step": 356000 }, { "epoch": 1.77, "learning_rate": 4.9117217342191706e-05, "loss": 2.6205, "step": 356500 }, { "epoch": 1.77, "learning_rate": 4.9115978755765623e-05, "loss": 2.6254, "step": 357000 }, { "epoch": 1.77, "learning_rate": 4.911474016933954e-05, "loss": 2.6239, "step": 357500 }, { "epoch": 1.77, "learning_rate": 4.911350158291346e-05, "loss": 2.6001, "step": 358000 }, { "epoch": 1.78, "learning_rate": 4.9112262996487374e-05, "loss": 2.5861, "step": 358500 }, { "epoch": 1.78, "learning_rate": 4.911102688723414e-05, "loss": 2.6104, "step": 359000 }, { "epoch": 1.78, "learning_rate": 4.910978830080806e-05, "loss": 2.5978, "step": 359500 }, { "epoch": 1.78, "learning_rate": 4.910854971438198e-05, "loss": 2.5799, "step": 360000 }, { "epoch": 1.79, "learning_rate": 4.910731360512874e-05, "loss": 2.6136, "step": 360500 }, { "epoch": 1.79, "learning_rate": 4.9106075018702656e-05, "loss": 2.6105, "step": 361000 }, { "epoch": 1.79, "learning_rate": 4.910483643227657e-05, "loss": 2.5947, "step": 361500 }, { "epoch": 1.79, "learning_rate": 4.910359784585049e-05, "loss": 2.5731, "step": 362000 }, { "epoch": 1.8, "learning_rate": 4.910236421377011e-05, "loss": 2.6102, "step": 362500 }, { "epoch": 1.8, "learning_rate": 4.910112562734403e-05, "loss": 2.6048, "step": 363000 }, { "epoch": 1.8, "learning_rate": 4.9099887040917944e-05, "loss": 2.6181, "step": 363500 }, { "epoch": 1.8, "learning_rate": 4.909864845449186e-05, "loss": 2.5967, "step": 364000 }, { "epoch": 1.81, "learning_rate": 4.909740986806577e-05, "loss": 2.6098, "step": 364500 }, { "epoch": 1.81, "learning_rate": 4.909617128163969e-05, "loss": 2.6129, "step": 365000 }, { "epoch": 1.81, "learning_rate": 4.9094932695213605e-05, "loss": 2.5773, "step": 365500 }, { "epoch": 1.81, "learning_rate": 4.9093696585960374e-05, "loss": 2.6096, "step": 366000 }, { "epoch": 1.82, "learning_rate": 4.909245799953429e-05, "loss": 2.6181, "step": 366500 }, { "epoch": 1.82, "learning_rate": 4.909121941310821e-05, "loss": 2.5896, "step": 367000 }, { "epoch": 1.82, "learning_rate": 4.9089980826682125e-05, "loss": 2.5851, "step": 367500 }, { "epoch": 1.82, "learning_rate": 4.90887447174289e-05, "loss": 2.6158, "step": 368000 }, { "epoch": 1.83, "learning_rate": 4.908750613100282e-05, "loss": 2.5965, "step": 368500 }, { "epoch": 1.83, "learning_rate": 4.908626754457673e-05, "loss": 2.6232, "step": 369000 }, { "epoch": 1.83, "learning_rate": 4.9085028958150644e-05, "loss": 2.5897, "step": 369500 }, { "epoch": 1.83, "learning_rate": 4.908379037172456e-05, "loss": 2.577, "step": 370000 }, { "epoch": 1.84, "learning_rate": 4.908255178529848e-05, "loss": 2.6109, "step": 370500 }, { "epoch": 1.84, "learning_rate": 4.9081313198872395e-05, "loss": 2.602, "step": 371000 }, { "epoch": 1.84, "learning_rate": 4.9080074612446305e-05, "loss": 2.5674, "step": 371500 }, { "epoch": 1.84, "learning_rate": 4.907883602602022e-05, "loss": 2.5986, "step": 372000 }, { "epoch": 1.85, "learning_rate": 4.907759743959414e-05, "loss": 2.6051, "step": 372500 }, { "epoch": 1.85, "learning_rate": 4.9076358853168056e-05, "loss": 2.5999, "step": 373000 }, { "epoch": 1.85, "learning_rate": 4.907512026674197e-05, "loss": 2.6057, "step": 373500 }, { "epoch": 1.85, "learning_rate": 4.907388168031589e-05, "loss": 2.5942, "step": 374000 }, { "epoch": 1.86, "learning_rate": 4.907264309388981e-05, "loss": 2.6297, "step": 374500 }, { "epoch": 1.86, "learning_rate": 4.9071404507463724e-05, "loss": 2.5977, "step": 375000 }, { "epoch": 1.86, "learning_rate": 4.907016592103764e-05, "loss": 2.5997, "step": 375500 }, { "epoch": 1.86, "learning_rate": 4.906892733461156e-05, "loss": 2.6261, "step": 376000 }, { "epoch": 1.87, "learning_rate": 4.906769370253118e-05, "loss": 2.6139, "step": 376500 }, { "epoch": 1.87, "learning_rate": 4.9066455116105095e-05, "loss": 2.6229, "step": 377000 }, { "epoch": 1.87, "learning_rate": 4.906521652967901e-05, "loss": 2.6, "step": 377500 }, { "epoch": 1.87, "learning_rate": 4.906397794325292e-05, "loss": 2.5966, "step": 378000 }, { "epoch": 1.88, "learning_rate": 4.906273935682684e-05, "loss": 2.5734, "step": 378500 }, { "epoch": 1.88, "learning_rate": 4.9061500770400756e-05, "loss": 2.6094, "step": 379000 }, { "epoch": 1.88, "learning_rate": 4.906026218397467e-05, "loss": 2.5933, "step": 379500 }, { "epoch": 1.88, "learning_rate": 4.905902359754859e-05, "loss": 2.6042, "step": 380000 }, { "epoch": 1.89, "learning_rate": 4.905778501112251e-05, "loss": 2.6221, "step": 380500 }, { "epoch": 1.89, "learning_rate": 4.9056548901869276e-05, "loss": 2.5897, "step": 381000 }, { "epoch": 1.89, "learning_rate": 4.905531279261605e-05, "loss": 2.5985, "step": 381500 }, { "epoch": 1.89, "learning_rate": 4.9054076683362813e-05, "loss": 2.6131, "step": 382000 }, { "epoch": 1.9, "learning_rate": 4.905283809693673e-05, "loss": 2.6016, "step": 382500 }, { "epoch": 1.9, "learning_rate": 4.905159951051065e-05, "loss": 2.6079, "step": 383000 }, { "epoch": 1.9, "learning_rate": 4.9050360924084564e-05, "loss": 2.6093, "step": 383500 }, { "epoch": 1.9, "learning_rate": 4.904912233765848e-05, "loss": 2.6203, "step": 384000 }, { "epoch": 1.9, "learning_rate": 4.90478837512324e-05, "loss": 2.6072, "step": 384500 }, { "epoch": 1.91, "learning_rate": 4.904664516480631e-05, "loss": 2.5929, "step": 385000 }, { "epoch": 1.91, "learning_rate": 4.9045406578380225e-05, "loss": 2.6029, "step": 385500 }, { "epoch": 1.91, "learning_rate": 4.9044170469127e-05, "loss": 2.6103, "step": 386000 }, { "epoch": 1.91, "learning_rate": 4.904293188270092e-05, "loss": 2.5676, "step": 386500 }, { "epoch": 1.92, "learning_rate": 4.9041693296274835e-05, "loss": 2.6016, "step": 387000 }, { "epoch": 1.92, "learning_rate": 4.904045470984875e-05, "loss": 2.5875, "step": 387500 }, { "epoch": 1.92, "learning_rate": 4.903921612342266e-05, "loss": 2.6147, "step": 388000 }, { "epoch": 1.92, "learning_rate": 4.903797753699658e-05, "loss": 2.573, "step": 388500 }, { "epoch": 1.93, "learning_rate": 4.903674142774335e-05, "loss": 2.6102, "step": 389000 }, { "epoch": 1.93, "learning_rate": 4.9035502841317264e-05, "loss": 2.6018, "step": 389500 }, { "epoch": 1.93, "learning_rate": 4.903426425489118e-05, "loss": 2.5944, "step": 390000 }, { "epoch": 1.93, "learning_rate": 4.90330256684651e-05, "loss": 2.6172, "step": 390500 }, { "epoch": 1.94, "learning_rate": 4.903178708203901e-05, "loss": 2.5899, "step": 391000 }, { "epoch": 1.94, "learning_rate": 4.9030548495612925e-05, "loss": 2.5876, "step": 391500 }, { "epoch": 1.94, "learning_rate": 4.902930990918684e-05, "loss": 2.5859, "step": 392000 }, { "epoch": 1.94, "learning_rate": 4.902807132276076e-05, "loss": 2.586, "step": 392500 }, { "epoch": 1.95, "learning_rate": 4.9026832736334676e-05, "loss": 2.6084, "step": 393000 }, { "epoch": 1.95, "learning_rate": 4.902559662708145e-05, "loss": 2.6038, "step": 393500 }, { "epoch": 1.95, "learning_rate": 4.9024360517828214e-05, "loss": 2.5836, "step": 394000 }, { "epoch": 1.95, "learning_rate": 4.902312193140213e-05, "loss": 2.5929, "step": 394500 }, { "epoch": 1.96, "learning_rate": 4.90218858221489e-05, "loss": 2.5957, "step": 395000 }, { "epoch": 1.96, "learning_rate": 4.9020647235722816e-05, "loss": 2.582, "step": 395500 }, { "epoch": 1.96, "learning_rate": 4.901940864929673e-05, "loss": 2.6287, "step": 396000 }, { "epoch": 1.96, "learning_rate": 4.901817006287065e-05, "loss": 2.6059, "step": 396500 }, { "epoch": 1.97, "learning_rate": 4.901693147644457e-05, "loss": 2.5855, "step": 397000 }, { "epoch": 1.97, "learning_rate": 4.9015692890018484e-05, "loss": 2.5951, "step": 397500 }, { "epoch": 1.97, "learning_rate": 4.90144543035924e-05, "loss": 2.5817, "step": 398000 }, { "epoch": 1.97, "learning_rate": 4.901321571716632e-05, "loss": 2.6157, "step": 398500 }, { "epoch": 1.98, "learning_rate": 4.9011977130740235e-05, "loss": 2.596, "step": 399000 }, { "epoch": 1.98, "learning_rate": 4.901073854431415e-05, "loss": 2.5786, "step": 399500 }, { "epoch": 1.98, "learning_rate": 4.900949995788807e-05, "loss": 2.5946, "step": 400000 }, { "epoch": 1.98, "learning_rate": 4.900826384863483e-05, "loss": 2.6155, "step": 400500 }, { "epoch": 1.99, "learning_rate": 4.900702526220875e-05, "loss": 2.5889, "step": 401000 }, { "epoch": 1.99, "learning_rate": 4.9005786675782665e-05, "loss": 2.5796, "step": 401500 }, { "epoch": 1.99, "learning_rate": 4.900454808935658e-05, "loss": 2.5993, "step": 402000 }, { "epoch": 1.99, "learning_rate": 4.900331198010335e-05, "loss": 2.5894, "step": 402500 }, { "epoch": 2.0, "learning_rate": 4.900207339367727e-05, "loss": 2.6215, "step": 403000 }, { "epoch": 2.0, "learning_rate": 4.9000834807251184e-05, "loss": 2.5994, "step": 403500 }, { "epoch": 2.0, "eval_accuracy": 0.6286475772528078, "eval_accuracy_mlm": 0.5813388926339267, "eval_accuracy_nsp": 0.8516075133648939, "eval_loss": 2.524798631668091, "eval_runtime": 146.063, "eval_samples_per_second": 1745.542, "eval_steps_per_second": 72.736, "step": 403686 }, { "epoch": 2.0, "learning_rate": 4.89995962208251e-05, "loss": 2.5887, "step": 404000 }, { "epoch": 2.0, "learning_rate": 4.899835763439902e-05, "loss": 2.5834, "step": 404500 }, { "epoch": 2.01, "learning_rate": 4.8997119047972935e-05, "loss": 2.5753, "step": 405000 }, { "epoch": 2.01, "learning_rate": 4.8995882938719704e-05, "loss": 2.5542, "step": 405500 }, { "epoch": 2.01, "learning_rate": 4.8994644352293614e-05, "loss": 2.5592, "step": 406000 }, { "epoch": 2.01, "learning_rate": 4.899340576586753e-05, "loss": 2.5818, "step": 406500 }, { "epoch": 2.02, "learning_rate": 4.899216717944145e-05, "loss": 2.5571, "step": 407000 }, { "epoch": 2.02, "learning_rate": 4.8990928593015365e-05, "loss": 2.5746, "step": 407500 }, { "epoch": 2.02, "learning_rate": 4.8989692483762133e-05, "loss": 2.5663, "step": 408000 }, { "epoch": 2.02, "learning_rate": 4.898845389733605e-05, "loss": 2.5716, "step": 408500 }, { "epoch": 2.03, "learning_rate": 4.898721778808282e-05, "loss": 2.5595, "step": 409000 }, { "epoch": 2.03, "learning_rate": 4.8985979201656736e-05, "loss": 2.5811, "step": 409500 }, { "epoch": 2.03, "learning_rate": 4.898474061523065e-05, "loss": 2.5488, "step": 410000 }, { "epoch": 2.03, "learning_rate": 4.898350202880457e-05, "loss": 2.5651, "step": 410500 }, { "epoch": 2.04, "learning_rate": 4.898226344237849e-05, "loss": 2.5839, "step": 411000 }, { "epoch": 2.04, "learning_rate": 4.8981024855952404e-05, "loss": 2.577, "step": 411500 }, { "epoch": 2.04, "learning_rate": 4.897978626952632e-05, "loss": 2.5854, "step": 412000 }, { "epoch": 2.04, "learning_rate": 4.897855016027308e-05, "loss": 2.5607, "step": 412500 }, { "epoch": 2.05, "learning_rate": 4.8977311573847e-05, "loss": 2.5672, "step": 413000 }, { "epoch": 2.05, "learning_rate": 4.897607298742092e-05, "loss": 2.5866, "step": 413500 }, { "epoch": 2.05, "learning_rate": 4.8974834400994834e-05, "loss": 2.6007, "step": 414000 }, { "epoch": 2.05, "learning_rate": 4.897359581456875e-05, "loss": 2.5648, "step": 414500 }, { "epoch": 2.06, "learning_rate": 4.897235722814267e-05, "loss": 2.5832, "step": 415000 }, { "epoch": 2.06, "learning_rate": 4.8971118641716584e-05, "loss": 2.5968, "step": 415500 }, { "epoch": 2.06, "learning_rate": 4.896988253246335e-05, "loss": 2.5548, "step": 416000 }, { "epoch": 2.06, "learning_rate": 4.896864394603727e-05, "loss": 2.5688, "step": 416500 }, { "epoch": 2.07, "learning_rate": 4.896740535961119e-05, "loss": 2.5836, "step": 417000 }, { "epoch": 2.07, "learning_rate": 4.8966166773185104e-05, "loss": 2.571, "step": 417500 }, { "epoch": 2.07, "learning_rate": 4.896492818675902e-05, "loss": 2.5721, "step": 418000 }, { "epoch": 2.07, "learning_rate": 4.896368960033294e-05, "loss": 2.561, "step": 418500 }, { "epoch": 2.08, "learning_rate": 4.8962451013906855e-05, "loss": 2.5737, "step": 419000 }, { "epoch": 2.08, "learning_rate": 4.8961212427480765e-05, "loss": 2.559, "step": 419500 }, { "epoch": 2.08, "learning_rate": 4.895997384105468e-05, "loss": 2.5928, "step": 420000 }, { "epoch": 2.08, "learning_rate": 4.89587352546286e-05, "loss": 2.5754, "step": 420500 }, { "epoch": 2.09, "learning_rate": 4.895749914537537e-05, "loss": 2.5631, "step": 421000 }, { "epoch": 2.09, "learning_rate": 4.8956260558949285e-05, "loss": 2.5715, "step": 421500 }, { "epoch": 2.09, "learning_rate": 4.89550219725232e-05, "loss": 2.5942, "step": 422000 }, { "epoch": 2.09, "learning_rate": 4.895378338609712e-05, "loss": 2.5579, "step": 422500 }, { "epoch": 2.1, "learning_rate": 4.8952544799671035e-05, "loss": 2.5612, "step": 423000 }, { "epoch": 2.1, "learning_rate": 4.895130621324495e-05, "loss": 2.5945, "step": 423500 }, { "epoch": 2.1, "learning_rate": 4.895007010399172e-05, "loss": 2.547, "step": 424000 }, { "epoch": 2.1, "learning_rate": 4.894883151756564e-05, "loss": 2.5908, "step": 424500 }, { "epoch": 2.11, "learning_rate": 4.8947592931139555e-05, "loss": 2.5855, "step": 425000 }, { "epoch": 2.11, "learning_rate": 4.894635434471347e-05, "loss": 2.5796, "step": 425500 }, { "epoch": 2.11, "learning_rate": 4.894511575828739e-05, "loss": 2.5654, "step": 426000 }, { "epoch": 2.11, "learning_rate": 4.89438771718613e-05, "loss": 2.5478, "step": 426500 }, { "epoch": 2.12, "learning_rate": 4.8942638585435216e-05, "loss": 2.5889, "step": 427000 }, { "epoch": 2.12, "learning_rate": 4.894139999900913e-05, "loss": 2.5634, "step": 427500 }, { "epoch": 2.12, "learning_rate": 4.894016141258305e-05, "loss": 2.5461, "step": 428000 }, { "epoch": 2.12, "learning_rate": 4.893892530332982e-05, "loss": 2.5366, "step": 428500 }, { "epoch": 2.13, "learning_rate": 4.893768919407659e-05, "loss": 2.5651, "step": 429000 }, { "epoch": 2.13, "learning_rate": 4.8936450607650504e-05, "loss": 2.5911, "step": 429500 }, { "epoch": 2.13, "learning_rate": 4.893521202122442e-05, "loss": 2.5801, "step": 430000 }, { "epoch": 2.13, "learning_rate": 4.893397591197118e-05, "loss": 2.5771, "step": 430500 }, { "epoch": 2.14, "learning_rate": 4.89327373255451e-05, "loss": 2.5864, "step": 431000 }, { "epoch": 2.14, "learning_rate": 4.8931501216291876e-05, "loss": 2.5512, "step": 431500 }, { "epoch": 2.14, "learning_rate": 4.893026262986579e-05, "loss": 2.5651, "step": 432000 }, { "epoch": 2.14, "learning_rate": 4.89290240434397e-05, "loss": 2.5588, "step": 432500 }, { "epoch": 2.15, "learning_rate": 4.892778545701362e-05, "loss": 2.5744, "step": 433000 }, { "epoch": 2.15, "learning_rate": 4.892654687058754e-05, "loss": 2.5913, "step": 433500 }, { "epoch": 2.15, "learning_rate": 4.8925308284161454e-05, "loss": 2.5761, "step": 434000 }, { "epoch": 2.15, "learning_rate": 4.892406969773537e-05, "loss": 2.5819, "step": 434500 }, { "epoch": 2.16, "learning_rate": 4.892283111130929e-05, "loss": 2.5598, "step": 435000 }, { "epoch": 2.16, "learning_rate": 4.8921592524883204e-05, "loss": 2.574, "step": 435500 }, { "epoch": 2.16, "learning_rate": 4.892035393845712e-05, "loss": 2.5842, "step": 436000 }, { "epoch": 2.16, "learning_rate": 4.891911535203104e-05, "loss": 2.5627, "step": 436500 }, { "epoch": 2.17, "learning_rate": 4.8917876765604955e-05, "loss": 2.5587, "step": 437000 }, { "epoch": 2.17, "learning_rate": 4.891663817917887e-05, "loss": 2.5872, "step": 437500 }, { "epoch": 2.17, "learning_rate": 4.891539959275279e-05, "loss": 2.5496, "step": 438000 }, { "epoch": 2.17, "learning_rate": 4.891416348349955e-05, "loss": 2.5884, "step": 438500 }, { "epoch": 2.17, "learning_rate": 4.891292489707347e-05, "loss": 2.5475, "step": 439000 }, { "epoch": 2.18, "learning_rate": 4.8911686310647385e-05, "loss": 2.5432, "step": 439500 }, { "epoch": 2.18, "learning_rate": 4.8910450201394154e-05, "loss": 2.5509, "step": 440000 }, { "epoch": 2.18, "learning_rate": 4.890921161496807e-05, "loss": 2.5689, "step": 440500 }, { "epoch": 2.18, "learning_rate": 4.890797302854199e-05, "loss": 2.5687, "step": 441000 }, { "epoch": 2.19, "learning_rate": 4.8906734442115904e-05, "loss": 2.5651, "step": 441500 }, { "epoch": 2.19, "learning_rate": 4.890549585568982e-05, "loss": 2.5761, "step": 442000 }, { "epoch": 2.19, "learning_rate": 4.8904267177955146e-05, "loss": 2.5642, "step": 442500 }, { "epoch": 2.19, "learning_rate": 4.890302859152906e-05, "loss": 2.5557, "step": 443000 }, { "epoch": 2.2, "learning_rate": 4.890179000510298e-05, "loss": 2.575, "step": 443500 }, { "epoch": 2.2, "learning_rate": 4.890055141867689e-05, "loss": 2.5907, "step": 444000 }, { "epoch": 2.2, "learning_rate": 4.889931283225081e-05, "loss": 2.5731, "step": 444500 }, { "epoch": 2.2, "learning_rate": 4.8898074245824724e-05, "loss": 2.5548, "step": 445000 }, { "epoch": 2.21, "learning_rate": 4.889683565939864e-05, "loss": 2.5467, "step": 445500 }, { "epoch": 2.21, "learning_rate": 4.889559707297256e-05, "loss": 2.592, "step": 446000 }, { "epoch": 2.21, "learning_rate": 4.8894358486546475e-05, "loss": 2.5996, "step": 446500 }, { "epoch": 2.21, "learning_rate": 4.889311990012039e-05, "loss": 2.5768, "step": 447000 }, { "epoch": 2.22, "learning_rate": 4.889188131369431e-05, "loss": 2.5667, "step": 447500 }, { "epoch": 2.22, "learning_rate": 4.8890642727268225e-05, "loss": 2.5699, "step": 448000 }, { "epoch": 2.22, "learning_rate": 4.888940414084214e-05, "loss": 2.5794, "step": 448500 }, { "epoch": 2.22, "learning_rate": 4.888816555441606e-05, "loss": 2.5778, "step": 449000 }, { "epoch": 2.23, "learning_rate": 4.8886926967989976e-05, "loss": 2.5842, "step": 449500 }, { "epoch": 2.23, "learning_rate": 4.888568838156389e-05, "loss": 2.5859, "step": 450000 }, { "epoch": 2.23, "learning_rate": 4.888444979513781e-05, "loss": 2.5658, "step": 450500 }, { "epoch": 2.23, "learning_rate": 4.888321120871172e-05, "loss": 2.5744, "step": 451000 }, { "epoch": 2.24, "learning_rate": 4.888197262228564e-05, "loss": 2.5542, "step": 451500 }, { "epoch": 2.24, "learning_rate": 4.8880734035859554e-05, "loss": 2.5659, "step": 452000 }, { "epoch": 2.24, "learning_rate": 4.887949544943347e-05, "loss": 2.5667, "step": 452500 }, { "epoch": 2.24, "learning_rate": 4.887825686300739e-05, "loss": 2.5916, "step": 453000 }, { "epoch": 2.25, "learning_rate": 4.887702075375416e-05, "loss": 2.5724, "step": 453500 }, { "epoch": 2.25, "learning_rate": 4.8875782167328074e-05, "loss": 2.5604, "step": 454000 }, { "epoch": 2.25, "learning_rate": 4.887454358090199e-05, "loss": 2.556, "step": 454500 }, { "epoch": 2.25, "learning_rate": 4.887330499447591e-05, "loss": 2.569, "step": 455000 }, { "epoch": 2.26, "learning_rate": 4.8872066408049824e-05, "loss": 2.5667, "step": 455500 }, { "epoch": 2.26, "learning_rate": 4.887082782162374e-05, "loss": 2.5849, "step": 456000 }, { "epoch": 2.26, "learning_rate": 4.886958923519765e-05, "loss": 2.5702, "step": 456500 }, { "epoch": 2.26, "learning_rate": 4.886835064877157e-05, "loss": 2.5687, "step": 457000 }, { "epoch": 2.27, "learning_rate": 4.886711453951834e-05, "loss": 2.5787, "step": 457500 }, { "epoch": 2.27, "learning_rate": 4.8865875953092254e-05, "loss": 2.5678, "step": 458000 }, { "epoch": 2.27, "learning_rate": 4.886463736666617e-05, "loss": 2.5795, "step": 458500 }, { "epoch": 2.27, "learning_rate": 4.8863401257412947e-05, "loss": 2.5772, "step": 459000 }, { "epoch": 2.28, "learning_rate": 4.8862162670986864e-05, "loss": 2.5546, "step": 459500 }, { "epoch": 2.28, "learning_rate": 4.886092408456078e-05, "loss": 2.5824, "step": 460000 }, { "epoch": 2.28, "learning_rate": 4.885968549813469e-05, "loss": 2.5854, "step": 460500 }, { "epoch": 2.28, "learning_rate": 4.885844691170861e-05, "loss": 2.5993, "step": 461000 }, { "epoch": 2.29, "learning_rate": 4.8857208325282524e-05, "loss": 2.5536, "step": 461500 }, { "epoch": 2.29, "learning_rate": 4.885596973885644e-05, "loss": 2.5607, "step": 462000 }, { "epoch": 2.29, "learning_rate": 4.885473362960321e-05, "loss": 2.5777, "step": 462500 }, { "epoch": 2.29, "learning_rate": 4.885349504317713e-05, "loss": 2.5489, "step": 463000 }, { "epoch": 2.3, "learning_rate": 4.885225645675104e-05, "loss": 2.5935, "step": 463500 }, { "epoch": 2.3, "learning_rate": 4.8851017870324954e-05, "loss": 2.5599, "step": 464000 }, { "epoch": 2.3, "learning_rate": 4.884978176107173e-05, "loss": 2.5644, "step": 464500 }, { "epoch": 2.3, "learning_rate": 4.884854317464565e-05, "loss": 2.5661, "step": 465000 }, { "epoch": 2.31, "learning_rate": 4.8847304588219564e-05, "loss": 2.5732, "step": 465500 }, { "epoch": 2.31, "learning_rate": 4.884606600179348e-05, "loss": 2.5702, "step": 466000 }, { "epoch": 2.31, "learning_rate": 4.88448274153674e-05, "loss": 2.5586, "step": 466500 }, { "epoch": 2.31, "learning_rate": 4.884359130611416e-05, "loss": 2.574, "step": 467000 }, { "epoch": 2.32, "learning_rate": 4.8842352719688076e-05, "loss": 2.5618, "step": 467500 }, { "epoch": 2.32, "learning_rate": 4.884111413326199e-05, "loss": 2.5526, "step": 468000 }, { "epoch": 2.32, "learning_rate": 4.883987554683591e-05, "loss": 2.5749, "step": 468500 }, { "epoch": 2.32, "learning_rate": 4.883863696040983e-05, "loss": 2.5694, "step": 469000 }, { "epoch": 2.33, "learning_rate": 4.8837398373983744e-05, "loss": 2.5888, "step": 469500 }, { "epoch": 2.33, "learning_rate": 4.8836159787557654e-05, "loss": 2.5595, "step": 470000 }, { "epoch": 2.33, "learning_rate": 4.883492120113157e-05, "loss": 2.5967, "step": 470500 }, { "epoch": 2.33, "learning_rate": 4.883368261470549e-05, "loss": 2.5484, "step": 471000 }, { "epoch": 2.34, "learning_rate": 4.8832444028279405e-05, "loss": 2.5715, "step": 471500 }, { "epoch": 2.34, "learning_rate": 4.883120544185332e-05, "loss": 2.5756, "step": 472000 }, { "epoch": 2.34, "learning_rate": 4.882996685542724e-05, "loss": 2.5513, "step": 472500 }, { "epoch": 2.34, "learning_rate": 4.8828728269001156e-05, "loss": 2.5952, "step": 473000 }, { "epoch": 2.35, "learning_rate": 4.8827492159747925e-05, "loss": 2.5728, "step": 473500 }, { "epoch": 2.35, "learning_rate": 4.882625357332184e-05, "loss": 2.577, "step": 474000 }, { "epoch": 2.35, "learning_rate": 4.882501498689576e-05, "loss": 2.6014, "step": 474500 }, { "epoch": 2.35, "learning_rate": 4.882377887764253e-05, "loss": 2.5732, "step": 475000 }, { "epoch": 2.36, "learning_rate": 4.8822540291216444e-05, "loss": 2.559, "step": 475500 }, { "epoch": 2.36, "learning_rate": 4.8821301704790354e-05, "loss": 2.5574, "step": 476000 }, { "epoch": 2.36, "learning_rate": 4.882006311836427e-05, "loss": 2.5518, "step": 476500 }, { "epoch": 2.36, "learning_rate": 4.881882453193819e-05, "loss": 2.5777, "step": 477000 }, { "epoch": 2.37, "learning_rate": 4.8817585945512105e-05, "loss": 2.5795, "step": 477500 }, { "epoch": 2.37, "learning_rate": 4.881634735908602e-05, "loss": 2.5723, "step": 478000 }, { "epoch": 2.37, "learning_rate": 4.881510877265994e-05, "loss": 2.5972, "step": 478500 }, { "epoch": 2.37, "learning_rate": 4.8813872663406715e-05, "loss": 2.5405, "step": 479000 }, { "epoch": 2.38, "learning_rate": 4.8812634076980625e-05, "loss": 2.6003, "step": 479500 }, { "epoch": 2.38, "learning_rate": 4.881139549055454e-05, "loss": 2.5426, "step": 480000 }, { "epoch": 2.38, "learning_rate": 4.881015690412846e-05, "loss": 2.587, "step": 480500 }, { "epoch": 2.38, "learning_rate": 4.8808918317702376e-05, "loss": 2.5803, "step": 481000 }, { "epoch": 2.39, "learning_rate": 4.880767973127629e-05, "loss": 2.5914, "step": 481500 }, { "epoch": 2.39, "learning_rate": 4.880644114485021e-05, "loss": 2.5622, "step": 482000 }, { "epoch": 2.39, "learning_rate": 4.8805202558424126e-05, "loss": 2.5813, "step": 482500 }, { "epoch": 2.39, "learning_rate": 4.880396397199804e-05, "loss": 2.5929, "step": 483000 }, { "epoch": 2.4, "learning_rate": 4.8802727862744805e-05, "loss": 2.5919, "step": 483500 }, { "epoch": 2.4, "learning_rate": 4.880148927631872e-05, "loss": 2.5565, "step": 484000 }, { "epoch": 2.4, "learning_rate": 4.88002531670655e-05, "loss": 2.5604, "step": 484500 }, { "epoch": 2.4, "learning_rate": 4.8799014580639415e-05, "loss": 2.5575, "step": 485000 }, { "epoch": 2.41, "learning_rate": 4.8797775994213325e-05, "loss": 2.5634, "step": 485500 }, { "epoch": 2.41, "learning_rate": 4.879653740778724e-05, "loss": 2.5762, "step": 486000 }, { "epoch": 2.41, "learning_rate": 4.879529882136116e-05, "loss": 2.5648, "step": 486500 }, { "epoch": 2.41, "learning_rate": 4.8794060234935076e-05, "loss": 2.5592, "step": 487000 }, { "epoch": 2.42, "learning_rate": 4.879282164850899e-05, "loss": 2.563, "step": 487500 }, { "epoch": 2.42, "learning_rate": 4.879158553925576e-05, "loss": 2.6136, "step": 488000 }, { "epoch": 2.42, "learning_rate": 4.879034695282967e-05, "loss": 2.5676, "step": 488500 }, { "epoch": 2.42, "learning_rate": 4.878910836640359e-05, "loss": 2.5787, "step": 489000 }, { "epoch": 2.43, "learning_rate": 4.8787872257150364e-05, "loss": 2.5805, "step": 489500 }, { "epoch": 2.43, "learning_rate": 4.878663367072428e-05, "loss": 2.5883, "step": 490000 }, { "epoch": 2.43, "learning_rate": 4.87853950842982e-05, "loss": 2.5649, "step": 490500 }, { "epoch": 2.43, "learning_rate": 4.878415897504497e-05, "loss": 2.5689, "step": 491000 }, { "epoch": 2.44, "learning_rate": 4.878292038861888e-05, "loss": 2.5621, "step": 491500 }, { "epoch": 2.44, "learning_rate": 4.8781681802192794e-05, "loss": 2.586, "step": 492000 }, { "epoch": 2.44, "learning_rate": 4.878044321576671e-05, "loss": 2.5845, "step": 492500 }, { "epoch": 2.44, "learning_rate": 4.877920462934063e-05, "loss": 2.5749, "step": 493000 }, { "epoch": 2.44, "learning_rate": 4.8777966042914545e-05, "loss": 2.5925, "step": 493500 }, { "epoch": 2.45, "learning_rate": 4.877672745648846e-05, "loss": 2.5829, "step": 494000 }, { "epoch": 2.45, "learning_rate": 4.877549134723523e-05, "loss": 2.5584, "step": 494500 }, { "epoch": 2.45, "learning_rate": 4.877425276080915e-05, "loss": 2.562, "step": 495000 }, { "epoch": 2.45, "learning_rate": 4.8773014174383064e-05, "loss": 2.5678, "step": 495500 }, { "epoch": 2.46, "learning_rate": 4.877177558795698e-05, "loss": 2.5516, "step": 496000 }, { "epoch": 2.46, "learning_rate": 4.87705370015309e-05, "loss": 2.567, "step": 496500 }, { "epoch": 2.46, "learning_rate": 4.8769298415104815e-05, "loss": 2.5885, "step": 497000 }, { "epoch": 2.46, "learning_rate": 4.876805982867873e-05, "loss": 2.5813, "step": 497500 }, { "epoch": 2.47, "learning_rate": 4.876682124225264e-05, "loss": 2.5801, "step": 498000 }, { "epoch": 2.47, "learning_rate": 4.876558265582656e-05, "loss": 2.567, "step": 498500 }, { "epoch": 2.47, "learning_rate": 4.8764344069400476e-05, "loss": 2.5659, "step": 499000 }, { "epoch": 2.47, "learning_rate": 4.876310548297439e-05, "loss": 2.572, "step": 499500 }, { "epoch": 2.48, "learning_rate": 4.876186689654831e-05, "loss": 2.5771, "step": 500000 }, { "epoch": 2.48, "learning_rate": 4.876062831012223e-05, "loss": 2.5942, "step": 500500 }, { "epoch": 2.48, "learning_rate": 4.8759389723696144e-05, "loss": 2.58, "step": 501000 }, { "epoch": 2.48, "learning_rate": 4.875815113727006e-05, "loss": 2.5474, "step": 501500 }, { "epoch": 2.49, "learning_rate": 4.875691502801682e-05, "loss": 2.5899, "step": 502000 }, { "epoch": 2.49, "learning_rate": 4.87556789187636e-05, "loss": 2.581, "step": 502500 }, { "epoch": 2.49, "learning_rate": 4.8754440332337515e-05, "loss": 2.5766, "step": 503000 }, { "epoch": 2.49, "learning_rate": 4.875320174591143e-05, "loss": 2.5461, "step": 503500 }, { "epoch": 2.5, "learning_rate": 4.875196315948535e-05, "loss": 2.5437, "step": 504000 }, { "epoch": 2.5, "learning_rate": 4.875072457305926e-05, "loss": 2.566, "step": 504500 }, { "epoch": 2.5, "learning_rate": 4.8749485986633176e-05, "loss": 2.5592, "step": 505000 }, { "epoch": 2.5, "learning_rate": 4.8748249877379945e-05, "loss": 2.5718, "step": 505500 }, { "epoch": 2.51, "learning_rate": 4.874701129095386e-05, "loss": 2.554, "step": 506000 }, { "epoch": 2.51, "learning_rate": 4.874577270452778e-05, "loss": 2.5343, "step": 506500 }, { "epoch": 2.51, "learning_rate": 4.8744534118101696e-05, "loss": 2.5618, "step": 507000 }, { "epoch": 2.51, "learning_rate": 4.8743295531675606e-05, "loss": 2.5622, "step": 507500 }, { "epoch": 2.52, "learning_rate": 4.874205694524952e-05, "loss": 2.5814, "step": 508000 }, { "epoch": 2.52, "learning_rate": 4.874081835882344e-05, "loss": 2.5951, "step": 508500 }, { "epoch": 2.52, "learning_rate": 4.8739582249570215e-05, "loss": 2.5568, "step": 509000 }, { "epoch": 2.52, "learning_rate": 4.873834366314413e-05, "loss": 2.5576, "step": 509500 }, { "epoch": 2.53, "learning_rate": 4.873710507671805e-05, "loss": 2.5658, "step": 510000 }, { "epoch": 2.53, "learning_rate": 4.873586649029196e-05, "loss": 2.5628, "step": 510500 }, { "epoch": 2.53, "learning_rate": 4.8734630381038735e-05, "loss": 2.5771, "step": 511000 }, { "epoch": 2.53, "learning_rate": 4.8733391794612645e-05, "loss": 2.5623, "step": 511500 }, { "epoch": 2.54, "learning_rate": 4.873215320818656e-05, "loss": 2.552, "step": 512000 }, { "epoch": 2.54, "learning_rate": 4.873091462176048e-05, "loss": 2.5771, "step": 512500 }, { "epoch": 2.54, "learning_rate": 4.8729676035334396e-05, "loss": 2.5683, "step": 513000 }, { "epoch": 2.54, "learning_rate": 4.872843744890831e-05, "loss": 2.5772, "step": 513500 }, { "epoch": 2.55, "learning_rate": 4.872719886248222e-05, "loss": 2.5589, "step": 514000 }, { "epoch": 2.55, "learning_rate": 4.872596027605614e-05, "loss": 2.5793, "step": 514500 }, { "epoch": 2.55, "learning_rate": 4.872472168963006e-05, "loss": 2.5705, "step": 515000 }, { "epoch": 2.55, "learning_rate": 4.8723483103203974e-05, "loss": 2.5768, "step": 515500 }, { "epoch": 2.56, "learning_rate": 4.872224451677789e-05, "loss": 2.5792, "step": 516000 }, { "epoch": 2.56, "learning_rate": 4.872100593035181e-05, "loss": 2.538, "step": 516500 }, { "epoch": 2.56, "learning_rate": 4.8719767343925724e-05, "loss": 2.5786, "step": 517000 }, { "epoch": 2.56, "learning_rate": 4.871852875749964e-05, "loss": 2.5611, "step": 517500 }, { "epoch": 2.57, "learning_rate": 4.871729017107356e-05, "loss": 2.5601, "step": 518000 }, { "epoch": 2.57, "learning_rate": 4.8716051584647475e-05, "loss": 2.5772, "step": 518500 }, { "epoch": 2.57, "learning_rate": 4.8714817952567096e-05, "loss": 2.5865, "step": 519000 }, { "epoch": 2.57, "learning_rate": 4.871357936614101e-05, "loss": 2.5592, "step": 519500 }, { "epoch": 2.58, "learning_rate": 4.871234077971493e-05, "loss": 2.5937, "step": 520000 }, { "epoch": 2.58, "learning_rate": 4.871110219328885e-05, "loss": 2.5601, "step": 520500 }, { "epoch": 2.58, "learning_rate": 4.870986360686276e-05, "loss": 2.5464, "step": 521000 }, { "epoch": 2.58, "learning_rate": 4.8708625020436674e-05, "loss": 2.5565, "step": 521500 }, { "epoch": 2.59, "learning_rate": 4.870738643401059e-05, "loss": 2.5702, "step": 522000 }, { "epoch": 2.59, "learning_rate": 4.870614784758451e-05, "loss": 2.5839, "step": 522500 }, { "epoch": 2.59, "learning_rate": 4.8704911738331276e-05, "loss": 2.5712, "step": 523000 }, { "epoch": 2.59, "learning_rate": 4.8703673151905193e-05, "loss": 2.5777, "step": 523500 }, { "epoch": 2.6, "learning_rate": 4.870243704265197e-05, "loss": 2.56, "step": 524000 }, { "epoch": 2.6, "learning_rate": 4.8701198456225886e-05, "loss": 2.5688, "step": 524500 }, { "epoch": 2.6, "learning_rate": 4.8699959869799796e-05, "loss": 2.5656, "step": 525000 }, { "epoch": 2.6, "learning_rate": 4.8698723760546565e-05, "loss": 2.5649, "step": 525500 }, { "epoch": 2.61, "learning_rate": 4.869748517412048e-05, "loss": 2.5436, "step": 526000 }, { "epoch": 2.61, "learning_rate": 4.86962465876944e-05, "loss": 2.5476, "step": 526500 }, { "epoch": 2.61, "learning_rate": 4.8695008001268316e-05, "loss": 2.5696, "step": 527000 }, { "epoch": 2.61, "learning_rate": 4.869376941484223e-05, "loss": 2.5495, "step": 527500 }, { "epoch": 2.62, "learning_rate": 4.869253082841615e-05, "loss": 2.5808, "step": 528000 }, { "epoch": 2.62, "learning_rate": 4.8691292241990066e-05, "loss": 2.5641, "step": 528500 }, { "epoch": 2.62, "learning_rate": 4.869005365556398e-05, "loss": 2.5714, "step": 529000 }, { "epoch": 2.62, "learning_rate": 4.8688815069137894e-05, "loss": 2.5913, "step": 529500 }, { "epoch": 2.63, "learning_rate": 4.868757648271181e-05, "loss": 2.5783, "step": 530000 }, { "epoch": 2.63, "learning_rate": 4.868633789628573e-05, "loss": 2.57, "step": 530500 }, { "epoch": 2.63, "learning_rate": 4.86851017870325e-05, "loss": 2.5547, "step": 531000 }, { "epoch": 2.63, "learning_rate": 4.868386320060642e-05, "loss": 2.5468, "step": 531500 }, { "epoch": 2.64, "learning_rate": 4.868262461418033e-05, "loss": 2.5864, "step": 532000 }, { "epoch": 2.64, "learning_rate": 4.868138602775425e-05, "loss": 2.5843, "step": 532500 }, { "epoch": 2.64, "learning_rate": 4.8680147441328164e-05, "loss": 2.5719, "step": 533000 }, { "epoch": 2.64, "learning_rate": 4.867891133207493e-05, "loss": 2.5732, "step": 533500 }, { "epoch": 2.65, "learning_rate": 4.867767274564885e-05, "loss": 2.5765, "step": 534000 }, { "epoch": 2.65, "learning_rate": 4.8676434159222767e-05, "loss": 2.5757, "step": 534500 }, { "epoch": 2.65, "learning_rate": 4.8675195572796683e-05, "loss": 2.5774, "step": 535000 }, { "epoch": 2.65, "learning_rate": 4.8673956986370594e-05, "loss": 2.554, "step": 535500 }, { "epoch": 2.66, "learning_rate": 4.867272087711737e-05, "loss": 2.5533, "step": 536000 }, { "epoch": 2.66, "learning_rate": 4.8671482290691286e-05, "loss": 2.5772, "step": 536500 }, { "epoch": 2.66, "learning_rate": 4.86702437042652e-05, "loss": 2.5513, "step": 537000 }, { "epoch": 2.66, "learning_rate": 4.866900511783912e-05, "loss": 2.5708, "step": 537500 }, { "epoch": 2.67, "learning_rate": 4.866776653141304e-05, "loss": 2.5684, "step": 538000 }, { "epoch": 2.67, "learning_rate": 4.866652794498695e-05, "loss": 2.5832, "step": 538500 }, { "epoch": 2.67, "learning_rate": 4.8665291835733716e-05, "loss": 2.5732, "step": 539000 }, { "epoch": 2.67, "learning_rate": 4.866405324930763e-05, "loss": 2.5757, "step": 539500 }, { "epoch": 2.68, "learning_rate": 4.86628171400544e-05, "loss": 2.5808, "step": 540000 }, { "epoch": 2.68, "learning_rate": 4.866157855362832e-05, "loss": 2.5977, "step": 540500 }, { "epoch": 2.68, "learning_rate": 4.8660339967202236e-05, "loss": 2.5599, "step": 541000 }, { "epoch": 2.68, "learning_rate": 4.865910138077615e-05, "loss": 2.5493, "step": 541500 }, { "epoch": 2.69, "learning_rate": 4.865786279435007e-05, "loss": 2.5864, "step": 542000 }, { "epoch": 2.69, "learning_rate": 4.8656624207923986e-05, "loss": 2.5611, "step": 542500 }, { "epoch": 2.69, "learning_rate": 4.86553856214979e-05, "loss": 2.5601, "step": 543000 }, { "epoch": 2.69, "learning_rate": 4.865414703507182e-05, "loss": 2.5715, "step": 543500 }, { "epoch": 2.7, "learning_rate": 4.865291092581858e-05, "loss": 2.5968, "step": 544000 }, { "epoch": 2.7, "learning_rate": 4.86516723393925e-05, "loss": 2.561, "step": 544500 }, { "epoch": 2.7, "learning_rate": 4.8650433752966416e-05, "loss": 2.593, "step": 545000 }, { "epoch": 2.7, "learning_rate": 4.864919516654033e-05, "loss": 2.5692, "step": 545500 }, { "epoch": 2.71, "learning_rate": 4.864795658011425e-05, "loss": 2.5501, "step": 546000 }, { "epoch": 2.71, "learning_rate": 4.864671799368817e-05, "loss": 2.5733, "step": 546500 }, { "epoch": 2.71, "learning_rate": 4.8645481884434936e-05, "loss": 2.5761, "step": 547000 }, { "epoch": 2.71, "learning_rate": 4.864424329800885e-05, "loss": 2.5604, "step": 547500 }, { "epoch": 2.71, "learning_rate": 4.864300471158277e-05, "loss": 2.5876, "step": 548000 }, { "epoch": 2.72, "learning_rate": 4.8641766125156686e-05, "loss": 2.5696, "step": 548500 }, { "epoch": 2.72, "learning_rate": 4.86405275387306e-05, "loss": 2.5717, "step": 549000 }, { "epoch": 2.72, "learning_rate": 4.863928895230452e-05, "loss": 2.5383, "step": 549500 }, { "epoch": 2.72, "learning_rate": 4.863805284305128e-05, "loss": 2.5773, "step": 550000 }, { "epoch": 2.73, "learning_rate": 4.863681673379805e-05, "loss": 2.5818, "step": 550500 }, { "epoch": 2.73, "learning_rate": 4.863557814737197e-05, "loss": 2.5148, "step": 551000 }, { "epoch": 2.73, "learning_rate": 4.8634339560945885e-05, "loss": 2.5935, "step": 551500 }, { "epoch": 2.73, "learning_rate": 4.86331009745198e-05, "loss": 2.5585, "step": 552000 }, { "epoch": 2.74, "learning_rate": 4.863186238809372e-05, "loss": 2.5496, "step": 552500 }, { "epoch": 2.74, "learning_rate": 4.8630623801667636e-05, "loss": 2.5555, "step": 553000 }, { "epoch": 2.74, "learning_rate": 4.862938521524155e-05, "loss": 2.547, "step": 553500 }, { "epoch": 2.74, "learning_rate": 4.862814662881547e-05, "loss": 2.567, "step": 554000 }, { "epoch": 2.75, "learning_rate": 4.8626908042389387e-05, "loss": 2.554, "step": 554500 }, { "epoch": 2.75, "learning_rate": 4.8625669455963303e-05, "loss": 2.5619, "step": 555000 }, { "epoch": 2.75, "learning_rate": 4.862443582388292e-05, "loss": 2.5697, "step": 555500 }, { "epoch": 2.75, "learning_rate": 4.8623197237456834e-05, "loss": 2.5523, "step": 556000 }, { "epoch": 2.76, "learning_rate": 4.862195865103075e-05, "loss": 2.568, "step": 556500 }, { "epoch": 2.76, "learning_rate": 4.862072006460467e-05, "loss": 2.5945, "step": 557000 }, { "epoch": 2.76, "learning_rate": 4.861948643252429e-05, "loss": 2.5638, "step": 557500 }, { "epoch": 2.76, "learning_rate": 4.8618247846098206e-05, "loss": 2.56, "step": 558000 }, { "epoch": 2.77, "learning_rate": 4.861700925967212e-05, "loss": 2.5682, "step": 558500 }, { "epoch": 2.77, "learning_rate": 4.861577067324604e-05, "loss": 2.5404, "step": 559000 }, { "epoch": 2.77, "learning_rate": 4.8614532086819957e-05, "loss": 2.5794, "step": 559500 }, { "epoch": 2.77, "learning_rate": 4.8613293500393873e-05, "loss": 2.5669, "step": 560000 }, { "epoch": 2.78, "learning_rate": 4.861205491396779e-05, "loss": 2.5423, "step": 560500 }, { "epoch": 2.78, "learning_rate": 4.86108163275417e-05, "loss": 2.5711, "step": 561000 }, { "epoch": 2.78, "learning_rate": 4.860957774111562e-05, "loss": 2.5841, "step": 561500 }, { "epoch": 2.78, "learning_rate": 4.8608339154689534e-05, "loss": 2.563, "step": 562000 }, { "epoch": 2.79, "learning_rate": 4.860710056826345e-05, "loss": 2.5802, "step": 562500 }, { "epoch": 2.79, "learning_rate": 4.860586198183737e-05, "loss": 2.5727, "step": 563000 }, { "epoch": 2.79, "learning_rate": 4.8604625872584144e-05, "loss": 2.5798, "step": 563500 }, { "epoch": 2.79, "learning_rate": 4.860338728615806e-05, "loss": 2.5647, "step": 564000 }, { "epoch": 2.8, "learning_rate": 4.860214869973197e-05, "loss": 2.5512, "step": 564500 }, { "epoch": 2.8, "learning_rate": 4.860091011330589e-05, "loss": 2.5654, "step": 565000 }, { "epoch": 2.8, "learning_rate": 4.8599671526879805e-05, "loss": 2.5424, "step": 565500 }, { "epoch": 2.8, "learning_rate": 4.859843294045372e-05, "loss": 2.554, "step": 566000 }, { "epoch": 2.81, "learning_rate": 4.859719435402764e-05, "loss": 2.5894, "step": 566500 }, { "epoch": 2.81, "learning_rate": 4.8595955767601556e-05, "loss": 2.551, "step": 567000 }, { "epoch": 2.81, "learning_rate": 4.859471965834832e-05, "loss": 2.5805, "step": 567500 }, { "epoch": 2.81, "learning_rate": 4.8593481071922235e-05, "loss": 2.5897, "step": 568000 }, { "epoch": 2.82, "learning_rate": 4.859224248549615e-05, "loss": 2.5625, "step": 568500 }, { "epoch": 2.82, "learning_rate": 4.859100389907007e-05, "loss": 2.5616, "step": 569000 }, { "epoch": 2.82, "learning_rate": 4.8589765312643985e-05, "loss": 2.5768, "step": 569500 }, { "epoch": 2.82, "learning_rate": 4.858852920339076e-05, "loss": 2.5554, "step": 570000 }, { "epoch": 2.83, "learning_rate": 4.858729061696467e-05, "loss": 2.5912, "step": 570500 }, { "epoch": 2.83, "learning_rate": 4.858605203053859e-05, "loss": 2.5492, "step": 571000 }, { "epoch": 2.83, "learning_rate": 4.858481592128536e-05, "loss": 2.5554, "step": 571500 }, { "epoch": 2.83, "learning_rate": 4.8583577334859274e-05, "loss": 2.5641, "step": 572000 }, { "epoch": 2.84, "learning_rate": 4.858233874843319e-05, "loss": 2.5709, "step": 572500 }, { "epoch": 2.84, "learning_rate": 4.858110016200711e-05, "loss": 2.5975, "step": 573000 }, { "epoch": 2.84, "learning_rate": 4.857986157558102e-05, "loss": 2.5723, "step": 573500 }, { "epoch": 2.84, "learning_rate": 4.8578622989154935e-05, "loss": 2.5557, "step": 574000 }, { "epoch": 2.85, "learning_rate": 4.857738440272885e-05, "loss": 2.538, "step": 574500 }, { "epoch": 2.85, "learning_rate": 4.857614581630277e-05, "loss": 2.5816, "step": 575000 }, { "epoch": 2.85, "learning_rate": 4.8574907229876685e-05, "loss": 2.5707, "step": 575500 }, { "epoch": 2.85, "learning_rate": 4.85736686434506e-05, "loss": 2.5592, "step": 576000 }, { "epoch": 2.86, "learning_rate": 4.857243005702452e-05, "loss": 2.5493, "step": 576500 }, { "epoch": 2.86, "learning_rate": 4.8571191470598436e-05, "loss": 2.5495, "step": 577000 }, { "epoch": 2.86, "learning_rate": 4.8569955361345205e-05, "loss": 2.5573, "step": 577500 }, { "epoch": 2.86, "learning_rate": 4.8568719252091974e-05, "loss": 2.5596, "step": 578000 }, { "epoch": 2.87, "learning_rate": 4.856748066566589e-05, "loss": 2.5427, "step": 578500 }, { "epoch": 2.87, "learning_rate": 4.856624207923981e-05, "loss": 2.5828, "step": 579000 }, { "epoch": 2.87, "learning_rate": 4.8565003492813725e-05, "loss": 2.5622, "step": 579500 }, { "epoch": 2.87, "learning_rate": 4.8563764906387635e-05, "loss": 2.5681, "step": 580000 }, { "epoch": 2.88, "learning_rate": 4.856252631996155e-05, "loss": 2.5767, "step": 580500 }, { "epoch": 2.88, "learning_rate": 4.856128773353547e-05, "loss": 2.5359, "step": 581000 }, { "epoch": 2.88, "learning_rate": 4.8560049147109386e-05, "loss": 2.5966, "step": 581500 }, { "epoch": 2.88, "learning_rate": 4.85588105606833e-05, "loss": 2.5783, "step": 582000 }, { "epoch": 2.89, "learning_rate": 4.855757445143008e-05, "loss": 2.563, "step": 582500 }, { "epoch": 2.89, "learning_rate": 4.855633586500399e-05, "loss": 2.5594, "step": 583000 }, { "epoch": 2.89, "learning_rate": 4.855509975575076e-05, "loss": 2.5744, "step": 583500 }, { "epoch": 2.89, "learning_rate": 4.8553861169324674e-05, "loss": 2.5556, "step": 584000 }, { "epoch": 2.9, "learning_rate": 4.855262258289859e-05, "loss": 2.5552, "step": 584500 }, { "epoch": 2.9, "learning_rate": 4.855138399647251e-05, "loss": 2.5402, "step": 585000 }, { "epoch": 2.9, "learning_rate": 4.8550145410046425e-05, "loss": 2.553, "step": 585500 }, { "epoch": 2.9, "learning_rate": 4.8548906823620335e-05, "loss": 2.5668, "step": 586000 }, { "epoch": 2.91, "learning_rate": 4.854766823719425e-05, "loss": 2.5855, "step": 586500 }, { "epoch": 2.91, "learning_rate": 4.854643212794103e-05, "loss": 2.5822, "step": 587000 }, { "epoch": 2.91, "learning_rate": 4.8545193541514944e-05, "loss": 2.5603, "step": 587500 }, { "epoch": 2.91, "learning_rate": 4.854395495508886e-05, "loss": 2.5564, "step": 588000 }, { "epoch": 2.92, "learning_rate": 4.854271884583563e-05, "loss": 2.5659, "step": 588500 }, { "epoch": 2.92, "learning_rate": 4.854148025940955e-05, "loss": 2.5704, "step": 589000 }, { "epoch": 2.92, "learning_rate": 4.8540241672983464e-05, "loss": 2.584, "step": 589500 }, { "epoch": 2.92, "learning_rate": 4.8539003086557374e-05, "loss": 2.5611, "step": 590000 }, { "epoch": 2.93, "learning_rate": 4.853776697730414e-05, "loss": 2.5538, "step": 590500 }, { "epoch": 2.93, "learning_rate": 4.853652839087806e-05, "loss": 2.5419, "step": 591000 }, { "epoch": 2.93, "learning_rate": 4.853528980445198e-05, "loss": 2.5637, "step": 591500 }, { "epoch": 2.93, "learning_rate": 4.8534051218025894e-05, "loss": 2.5852, "step": 592000 }, { "epoch": 2.94, "learning_rate": 4.853281263159981e-05, "loss": 2.5701, "step": 592500 }, { "epoch": 2.94, "learning_rate": 4.853157404517373e-05, "loss": 2.5697, "step": 593000 }, { "epoch": 2.94, "learning_rate": 4.8530335458747644e-05, "loss": 2.5454, "step": 593500 }, { "epoch": 2.94, "learning_rate": 4.852909687232156e-05, "loss": 2.5414, "step": 594000 }, { "epoch": 2.95, "learning_rate": 4.852785828589548e-05, "loss": 2.5856, "step": 594500 }, { "epoch": 2.95, "learning_rate": 4.8526619699469395e-05, "loss": 2.5922, "step": 595000 }, { "epoch": 2.95, "learning_rate": 4.8525381113043305e-05, "loss": 2.5762, "step": 595500 }, { "epoch": 2.95, "learning_rate": 4.852414252661722e-05, "loss": 2.5752, "step": 596000 }, { "epoch": 2.96, "learning_rate": 4.852290394019114e-05, "loss": 2.5849, "step": 596500 }, { "epoch": 2.96, "learning_rate": 4.852166783093791e-05, "loss": 2.5798, "step": 597000 }, { "epoch": 2.96, "learning_rate": 4.8520429244511825e-05, "loss": 2.5716, "step": 597500 }, { "epoch": 2.96, "learning_rate": 4.8519193135258594e-05, "loss": 2.559, "step": 598000 }, { "epoch": 2.97, "learning_rate": 4.851795454883251e-05, "loss": 2.5591, "step": 598500 }, { "epoch": 2.97, "learning_rate": 4.851671596240643e-05, "loss": 2.582, "step": 599000 }, { "epoch": 2.97, "learning_rate": 4.8515477375980345e-05, "loss": 2.581, "step": 599500 }, { "epoch": 2.97, "learning_rate": 4.851423878955426e-05, "loss": 2.5779, "step": 600000 }, { "epoch": 2.98, "learning_rate": 4.851300268030103e-05, "loss": 2.5746, "step": 600500 }, { "epoch": 2.98, "learning_rate": 4.851176409387495e-05, "loss": 2.5805, "step": 601000 }, { "epoch": 2.98, "learning_rate": 4.8510525507448864e-05, "loss": 2.5754, "step": 601500 }, { "epoch": 2.98, "learning_rate": 4.850928692102278e-05, "loss": 2.5709, "step": 602000 }, { "epoch": 2.98, "learning_rate": 4.85080483345967e-05, "loss": 2.5627, "step": 602500 }, { "epoch": 2.99, "learning_rate": 4.8506809748170615e-05, "loss": 2.5808, "step": 603000 }, { "epoch": 2.99, "learning_rate": 4.8505571161744525e-05, "loss": 2.5847, "step": 603500 }, { "epoch": 2.99, "learning_rate": 4.850433257531844e-05, "loss": 2.5929, "step": 604000 }, { "epoch": 2.99, "learning_rate": 4.850309398889236e-05, "loss": 2.5613, "step": 604500 }, { "epoch": 3.0, "learning_rate": 4.850185787963913e-05, "loss": 2.5579, "step": 605000 }, { "epoch": 3.0, "learning_rate": 4.8500619293213045e-05, "loss": 2.5798, "step": 605500 }, { "epoch": 3.0, "eval_accuracy": 0.6309935793243545, "eval_accuracy_mlm": 0.583263169880887, "eval_accuracy_nsp": 0.856231786287207, "eval_loss": 2.503361225128174, "eval_runtime": 145.9652, "eval_samples_per_second": 1746.711, "eval_steps_per_second": 72.784, "step": 605529 }, { "epoch": 3.0, "learning_rate": 4.8499383183959814e-05, "loss": 2.5333, "step": 606000 }, { "epoch": 3.0, "learning_rate": 4.849814459753373e-05, "loss": 2.5139, "step": 606500 }, { "epoch": 3.01, "learning_rate": 4.849690601110765e-05, "loss": 2.5247, "step": 607000 }, { "epoch": 3.01, "learning_rate": 4.849566990185441e-05, "loss": 2.5196, "step": 607500 }, { "epoch": 3.01, "learning_rate": 4.8494431315428326e-05, "loss": 2.5171, "step": 608000 }, { "epoch": 3.01, "learning_rate": 4.849319272900224e-05, "loss": 2.5036, "step": 608500 }, { "epoch": 3.02, "learning_rate": 4.849195414257616e-05, "loss": 2.5135, "step": 609000 }, { "epoch": 3.02, "learning_rate": 4.849071555615008e-05, "loss": 2.5322, "step": 609500 }, { "epoch": 3.02, "learning_rate": 4.8489476969723994e-05, "loss": 2.5542, "step": 610000 }, { "epoch": 3.02, "learning_rate": 4.848823838329791e-05, "loss": 2.5499, "step": 610500 }, { "epoch": 3.03, "learning_rate": 4.848699979687183e-05, "loss": 2.5478, "step": 611000 }, { "epoch": 3.03, "learning_rate": 4.84857636876186e-05, "loss": 2.5333, "step": 611500 }, { "epoch": 3.03, "learning_rate": 4.8484525101192514e-05, "loss": 2.552, "step": 612000 }, { "epoch": 3.03, "learning_rate": 4.848328651476643e-05, "loss": 2.5408, "step": 612500 }, { "epoch": 3.04, "learning_rate": 4.848204792834035e-05, "loss": 2.5539, "step": 613000 }, { "epoch": 3.04, "learning_rate": 4.8480809341914264e-05, "loss": 2.5355, "step": 613500 }, { "epoch": 3.04, "learning_rate": 4.847957075548818e-05, "loss": 2.5418, "step": 614000 }, { "epoch": 3.04, "learning_rate": 4.84783321690621e-05, "loss": 2.5373, "step": 614500 }, { "epoch": 3.05, "learning_rate": 4.847709605980886e-05, "loss": 2.539, "step": 615000 }, { "epoch": 3.05, "learning_rate": 4.847585747338278e-05, "loss": 2.5402, "step": 615500 }, { "epoch": 3.05, "learning_rate": 4.8474618886956694e-05, "loss": 2.5299, "step": 616000 }, { "epoch": 3.05, "learning_rate": 4.847338030053061e-05, "loss": 2.5361, "step": 616500 }, { "epoch": 3.06, "learning_rate": 4.847214171410453e-05, "loss": 2.5464, "step": 617000 }, { "epoch": 3.06, "learning_rate": 4.8470903127678445e-05, "loss": 2.5522, "step": 617500 }, { "epoch": 3.06, "learning_rate": 4.846966454125236e-05, "loss": 2.5553, "step": 618000 }, { "epoch": 3.06, "learning_rate": 4.846842595482628e-05, "loss": 2.5591, "step": 618500 }, { "epoch": 3.07, "learning_rate": 4.8467187368400196e-05, "loss": 2.54, "step": 619000 }, { "epoch": 3.07, "learning_rate": 4.846594878197411e-05, "loss": 2.5405, "step": 619500 }, { "epoch": 3.07, "learning_rate": 4.846471267272088e-05, "loss": 2.5232, "step": 620000 }, { "epoch": 3.07, "learning_rate": 4.84634740862948e-05, "loss": 2.5347, "step": 620500 }, { "epoch": 3.08, "learning_rate": 4.8462235499868715e-05, "loss": 2.528, "step": 621000 }, { "epoch": 3.08, "learning_rate": 4.846099939061548e-05, "loss": 2.5322, "step": 621500 }, { "epoch": 3.08, "learning_rate": 4.8459760804189394e-05, "loss": 2.5317, "step": 622000 }, { "epoch": 3.08, "learning_rate": 4.845852221776331e-05, "loss": 2.5383, "step": 622500 }, { "epoch": 3.09, "learning_rate": 4.845728363133723e-05, "loss": 2.5493, "step": 623000 }, { "epoch": 3.09, "learning_rate": 4.8456045044911145e-05, "loss": 2.538, "step": 623500 }, { "epoch": 3.09, "learning_rate": 4.845480645848506e-05, "loss": 2.5492, "step": 624000 }, { "epoch": 3.09, "learning_rate": 4.845356787205898e-05, "loss": 2.5289, "step": 624500 }, { "epoch": 3.1, "learning_rate": 4.8452329285632896e-05, "loss": 2.566, "step": 625000 }, { "epoch": 3.1, "learning_rate": 4.8451093176379665e-05, "loss": 2.5407, "step": 625500 }, { "epoch": 3.1, "learning_rate": 4.844985458995358e-05, "loss": 2.5053, "step": 626000 }, { "epoch": 3.1, "learning_rate": 4.84486160035275e-05, "loss": 2.5606, "step": 626500 }, { "epoch": 3.11, "learning_rate": 4.844737989427427e-05, "loss": 2.5307, "step": 627000 }, { "epoch": 3.11, "learning_rate": 4.844614130784818e-05, "loss": 2.5339, "step": 627500 }, { "epoch": 3.11, "learning_rate": 4.8444902721422094e-05, "loss": 2.5584, "step": 628000 }, { "epoch": 3.11, "learning_rate": 4.844366413499601e-05, "loss": 2.5237, "step": 628500 }, { "epoch": 3.12, "learning_rate": 4.844242554856993e-05, "loss": 2.5553, "step": 629000 }, { "epoch": 3.12, "learning_rate": 4.84411894393167e-05, "loss": 2.5506, "step": 629500 }, { "epoch": 3.12, "learning_rate": 4.8439950852890614e-05, "loss": 2.5469, "step": 630000 }, { "epoch": 3.12, "learning_rate": 4.843871226646453e-05, "loss": 2.551, "step": 630500 }, { "epoch": 3.13, "learning_rate": 4.843747368003845e-05, "loss": 2.5408, "step": 631000 }, { "epoch": 3.13, "learning_rate": 4.8436235093612365e-05, "loss": 2.5641, "step": 631500 }, { "epoch": 3.13, "learning_rate": 4.843499650718628e-05, "loss": 2.5317, "step": 632000 }, { "epoch": 3.13, "learning_rate": 4.84337579207602e-05, "loss": 2.528, "step": 632500 }, { "epoch": 3.14, "learning_rate": 4.8432519334334116e-05, "loss": 2.5114, "step": 633000 }, { "epoch": 3.14, "learning_rate": 4.843128074790803e-05, "loss": 2.5307, "step": 633500 }, { "epoch": 3.14, "learning_rate": 4.843004216148195e-05, "loss": 2.5363, "step": 634000 }, { "epoch": 3.14, "learning_rate": 4.8428803575055866e-05, "loss": 2.5329, "step": 634500 }, { "epoch": 3.15, "learning_rate": 4.842756498862978e-05, "loss": 2.5189, "step": 635000 }, { "epoch": 3.15, "learning_rate": 4.8426328879376545e-05, "loss": 2.5262, "step": 635500 }, { "epoch": 3.15, "learning_rate": 4.842509029295046e-05, "loss": 2.5593, "step": 636000 }, { "epoch": 3.15, "learning_rate": 4.842385418369723e-05, "loss": 2.5337, "step": 636500 }, { "epoch": 3.16, "learning_rate": 4.842261559727115e-05, "loss": 2.5343, "step": 637000 }, { "epoch": 3.16, "learning_rate": 4.8421377010845065e-05, "loss": 2.5407, "step": 637500 }, { "epoch": 3.16, "learning_rate": 4.842013842441898e-05, "loss": 2.5609, "step": 638000 }, { "epoch": 3.16, "learning_rate": 4.84188998379929e-05, "loss": 2.5219, "step": 638500 }, { "epoch": 3.17, "learning_rate": 4.8417661251566816e-05, "loss": 2.5684, "step": 639000 }, { "epoch": 3.17, "learning_rate": 4.841642266514073e-05, "loss": 2.5422, "step": 639500 }, { "epoch": 3.17, "learning_rate": 4.841518407871465e-05, "loss": 2.5736, "step": 640000 }, { "epoch": 3.17, "learning_rate": 4.841394796946142e-05, "loss": 2.5538, "step": 640500 }, { "epoch": 3.18, "learning_rate": 4.841270938303533e-05, "loss": 2.5404, "step": 641000 }, { "epoch": 3.18, "learning_rate": 4.8411470796609245e-05, "loss": 2.5472, "step": 641500 }, { "epoch": 3.18, "learning_rate": 4.841023221018316e-05, "loss": 2.5434, "step": 642000 }, { "epoch": 3.18, "learning_rate": 4.840899610092993e-05, "loss": 2.5179, "step": 642500 }, { "epoch": 3.19, "learning_rate": 4.840775751450385e-05, "loss": 2.5491, "step": 643000 }, { "epoch": 3.19, "learning_rate": 4.8406518928077765e-05, "loss": 2.5372, "step": 643500 }, { "epoch": 3.19, "learning_rate": 4.840528034165168e-05, "loss": 2.544, "step": 644000 }, { "epoch": 3.19, "learning_rate": 4.84040417552256e-05, "loss": 2.5429, "step": 644500 }, { "epoch": 3.2, "learning_rate": 4.8402803168799516e-05, "loss": 2.5307, "step": 645000 }, { "epoch": 3.2, "learning_rate": 4.840156458237343e-05, "loss": 2.553, "step": 645500 }, { "epoch": 3.2, "learning_rate": 4.84003284731202e-05, "loss": 2.5597, "step": 646000 }, { "epoch": 3.2, "learning_rate": 4.839908988669412e-05, "loss": 2.5398, "step": 646500 }, { "epoch": 3.21, "learning_rate": 4.8397851300268035e-05, "loss": 2.5258, "step": 647000 }, { "epoch": 3.21, "learning_rate": 4.8396612713841946e-05, "loss": 2.525, "step": 647500 }, { "epoch": 3.21, "learning_rate": 4.839537412741586e-05, "loss": 2.5487, "step": 648000 }, { "epoch": 3.21, "learning_rate": 4.839413801816263e-05, "loss": 2.5557, "step": 648500 }, { "epoch": 3.22, "learning_rate": 4.839289943173655e-05, "loss": 2.5434, "step": 649000 }, { "epoch": 3.22, "learning_rate": 4.8391660845310465e-05, "loss": 2.5495, "step": 649500 }, { "epoch": 3.22, "learning_rate": 4.839042225888438e-05, "loss": 2.5504, "step": 650000 }, { "epoch": 3.22, "learning_rate": 4.838918614963115e-05, "loss": 2.5101, "step": 650500 }, { "epoch": 3.23, "learning_rate": 4.838794756320507e-05, "loss": 2.5414, "step": 651000 }, { "epoch": 3.23, "learning_rate": 4.8386708976778985e-05, "loss": 2.5399, "step": 651500 }, { "epoch": 3.23, "learning_rate": 4.83854703903529e-05, "loss": 2.5554, "step": 652000 }, { "epoch": 3.23, "learning_rate": 4.8384234281099664e-05, "loss": 2.5515, "step": 652500 }, { "epoch": 3.24, "learning_rate": 4.838299569467358e-05, "loss": 2.5307, "step": 653000 }, { "epoch": 3.24, "learning_rate": 4.83817571082475e-05, "loss": 2.539, "step": 653500 }, { "epoch": 3.24, "learning_rate": 4.8380518521821415e-05, "loss": 2.5284, "step": 654000 }, { "epoch": 3.24, "learning_rate": 4.837928241256819e-05, "loss": 2.5652, "step": 654500 }, { "epoch": 3.25, "learning_rate": 4.837804382614211e-05, "loss": 2.5605, "step": 655000 }, { "epoch": 3.25, "learning_rate": 4.837680523971602e-05, "loss": 2.5348, "step": 655500 }, { "epoch": 3.25, "learning_rate": 4.8375569130462786e-05, "loss": 2.5298, "step": 656000 }, { "epoch": 3.25, "learning_rate": 4.83743305440367e-05, "loss": 2.5437, "step": 656500 }, { "epoch": 3.26, "learning_rate": 4.837309195761062e-05, "loss": 2.5365, "step": 657000 }, { "epoch": 3.26, "learning_rate": 4.837185337118454e-05, "loss": 2.5457, "step": 657500 }, { "epoch": 3.26, "learning_rate": 4.8370614784758454e-05, "loss": 2.5365, "step": 658000 }, { "epoch": 3.26, "learning_rate": 4.836937619833237e-05, "loss": 2.5297, "step": 658500 }, { "epoch": 3.26, "learning_rate": 4.836813761190628e-05, "loss": 2.5214, "step": 659000 }, { "epoch": 3.27, "learning_rate": 4.83668990254802e-05, "loss": 2.5343, "step": 659500 }, { "epoch": 3.27, "learning_rate": 4.8365660439054115e-05, "loss": 2.535, "step": 660000 }, { "epoch": 3.27, "learning_rate": 4.836442432980089e-05, "loss": 2.5523, "step": 660500 }, { "epoch": 3.27, "learning_rate": 4.836318574337481e-05, "loss": 2.5577, "step": 661000 }, { "epoch": 3.28, "learning_rate": 4.8361947156948724e-05, "loss": 2.5796, "step": 661500 }, { "epoch": 3.28, "learning_rate": 4.8360708570522634e-05, "loss": 2.5263, "step": 662000 }, { "epoch": 3.28, "learning_rate": 4.835946998409655e-05, "loss": 2.535, "step": 662500 }, { "epoch": 3.28, "learning_rate": 4.835823139767047e-05, "loss": 2.5555, "step": 663000 }, { "epoch": 3.29, "learning_rate": 4.835699528841724e-05, "loss": 2.5393, "step": 663500 }, { "epoch": 3.29, "learning_rate": 4.8355756701991154e-05, "loss": 2.5416, "step": 664000 }, { "epoch": 3.29, "learning_rate": 4.835451811556507e-05, "loss": 2.5387, "step": 664500 }, { "epoch": 3.29, "learning_rate": 4.835327952913898e-05, "loss": 2.5604, "step": 665000 }, { "epoch": 3.3, "learning_rate": 4.83520409427129e-05, "loss": 2.565, "step": 665500 }, { "epoch": 3.3, "learning_rate": 4.8350802356286815e-05, "loss": 2.5364, "step": 666000 }, { "epoch": 3.3, "learning_rate": 4.834956376986073e-05, "loss": 2.5461, "step": 666500 }, { "epoch": 3.3, "learning_rate": 4.834832518343465e-05, "loss": 2.5692, "step": 667000 }, { "epoch": 3.31, "learning_rate": 4.8347089074181424e-05, "loss": 2.5424, "step": 667500 }, { "epoch": 3.31, "learning_rate": 4.834585296492819e-05, "loss": 2.557, "step": 668000 }, { "epoch": 3.31, "learning_rate": 4.834461437850211e-05, "loss": 2.5454, "step": 668500 }, { "epoch": 3.31, "learning_rate": 4.834337579207602e-05, "loss": 2.5403, "step": 669000 }, { "epoch": 3.32, "learning_rate": 4.834213720564994e-05, "loss": 2.5301, "step": 669500 }, { "epoch": 3.32, "learning_rate": 4.8340898619223854e-05, "loss": 2.5211, "step": 670000 }, { "epoch": 3.32, "learning_rate": 4.833966003279777e-05, "loss": 2.5401, "step": 670500 }, { "epoch": 3.32, "learning_rate": 4.833842144637169e-05, "loss": 2.541, "step": 671000 }, { "epoch": 3.33, "learning_rate": 4.833718533711846e-05, "loss": 2.5565, "step": 671500 }, { "epoch": 3.33, "learning_rate": 4.8335946750692374e-05, "loss": 2.5404, "step": 672000 }, { "epoch": 3.33, "learning_rate": 4.833470816426629e-05, "loss": 2.567, "step": 672500 }, { "epoch": 3.33, "learning_rate": 4.833346957784021e-05, "loss": 2.5351, "step": 673000 }, { "epoch": 3.34, "learning_rate": 4.8332230991414124e-05, "loss": 2.5467, "step": 673500 }, { "epoch": 3.34, "learning_rate": 4.833099240498804e-05, "loss": 2.5485, "step": 674000 }, { "epoch": 3.34, "learning_rate": 4.832975381856195e-05, "loss": 2.5353, "step": 674500 }, { "epoch": 3.34, "learning_rate": 4.832851523213587e-05, "loss": 2.5495, "step": 675000 }, { "epoch": 3.35, "learning_rate": 4.832728160005549e-05, "loss": 2.5532, "step": 675500 }, { "epoch": 3.35, "learning_rate": 4.8326043013629406e-05, "loss": 2.5324, "step": 676000 }, { "epoch": 3.35, "learning_rate": 4.832480442720332e-05, "loss": 2.538, "step": 676500 }, { "epoch": 3.35, "learning_rate": 4.832356584077724e-05, "loss": 2.5226, "step": 677000 }, { "epoch": 3.36, "learning_rate": 4.832232973152401e-05, "loss": 2.5422, "step": 677500 }, { "epoch": 3.36, "learning_rate": 4.8321091145097926e-05, "loss": 2.5271, "step": 678000 }, { "epoch": 3.36, "learning_rate": 4.831985255867184e-05, "loss": 2.5118, "step": 678500 }, { "epoch": 3.36, "learning_rate": 4.831861397224576e-05, "loss": 2.5408, "step": 679000 }, { "epoch": 3.37, "learning_rate": 4.831737786299252e-05, "loss": 2.5397, "step": 679500 }, { "epoch": 3.37, "learning_rate": 4.831613927656644e-05, "loss": 2.5553, "step": 680000 }, { "epoch": 3.37, "learning_rate": 4.8314900690140355e-05, "loss": 2.5429, "step": 680500 }, { "epoch": 3.37, "learning_rate": 4.831366210371427e-05, "loss": 2.5535, "step": 681000 }, { "epoch": 3.38, "learning_rate": 4.831242351728819e-05, "loss": 2.5647, "step": 681500 }, { "epoch": 3.38, "learning_rate": 4.8311184930862106e-05, "loss": 2.5757, "step": 682000 }, { "epoch": 3.38, "learning_rate": 4.830994634443602e-05, "loss": 2.5574, "step": 682500 }, { "epoch": 3.38, "learning_rate": 4.830870775800994e-05, "loss": 2.53, "step": 683000 }, { "epoch": 3.39, "learning_rate": 4.830746917158386e-05, "loss": 2.5279, "step": 683500 }, { "epoch": 3.39, "learning_rate": 4.8306233062330626e-05, "loss": 2.5323, "step": 684000 }, { "epoch": 3.39, "learning_rate": 4.830499447590454e-05, "loss": 2.5433, "step": 684500 }, { "epoch": 3.39, "learning_rate": 4.830375588947846e-05, "loss": 2.5779, "step": 685000 }, { "epoch": 3.4, "learning_rate": 4.8302517303052376e-05, "loss": 2.5304, "step": 685500 }, { "epoch": 3.4, "learning_rate": 4.830128119379914e-05, "loss": 2.5579, "step": 686000 }, { "epoch": 3.4, "learning_rate": 4.8300045084545914e-05, "loss": 2.5429, "step": 686500 }, { "epoch": 3.4, "learning_rate": 4.829880649811983e-05, "loss": 2.5222, "step": 687000 }, { "epoch": 3.41, "learning_rate": 4.829756791169374e-05, "loss": 2.5562, "step": 687500 }, { "epoch": 3.41, "learning_rate": 4.829632932526766e-05, "loss": 2.579, "step": 688000 }, { "epoch": 3.41, "learning_rate": 4.8295090738841575e-05, "loss": 2.5623, "step": 688500 }, { "epoch": 3.41, "learning_rate": 4.829385215241549e-05, "loss": 2.543, "step": 689000 }, { "epoch": 3.42, "learning_rate": 4.829261356598941e-05, "loss": 2.5659, "step": 689500 }, { "epoch": 3.42, "learning_rate": 4.8291374979563326e-05, "loss": 2.5464, "step": 690000 }, { "epoch": 3.42, "learning_rate": 4.829013639313724e-05, "loss": 2.529, "step": 690500 }, { "epoch": 3.42, "learning_rate": 4.828889780671116e-05, "loss": 2.5534, "step": 691000 }, { "epoch": 3.43, "learning_rate": 4.828765922028508e-05, "loss": 2.5394, "step": 691500 }, { "epoch": 3.43, "learning_rate": 4.8286420633858994e-05, "loss": 2.5359, "step": 692000 }, { "epoch": 3.43, "learning_rate": 4.8285184524605756e-05, "loss": 2.5425, "step": 692500 }, { "epoch": 3.43, "learning_rate": 4.828394593817967e-05, "loss": 2.5602, "step": 693000 }, { "epoch": 3.44, "learning_rate": 4.828270735175359e-05, "loss": 2.5164, "step": 693500 }, { "epoch": 3.44, "learning_rate": 4.8281468765327506e-05, "loss": 2.5582, "step": 694000 }, { "epoch": 3.44, "learning_rate": 4.8280232656074275e-05, "loss": 2.5522, "step": 694500 }, { "epoch": 3.44, "learning_rate": 4.827899406964819e-05, "loss": 2.5528, "step": 695000 }, { "epoch": 3.45, "learning_rate": 4.827775548322211e-05, "loss": 2.5459, "step": 695500 }, { "epoch": 3.45, "learning_rate": 4.8276516896796026e-05, "loss": 2.5493, "step": 696000 }, { "epoch": 3.45, "learning_rate": 4.827527831036994e-05, "loss": 2.5516, "step": 696500 }, { "epoch": 3.45, "learning_rate": 4.827403972394386e-05, "loss": 2.5535, "step": 697000 }, { "epoch": 3.46, "learning_rate": 4.827280113751778e-05, "loss": 2.5477, "step": 697500 }, { "epoch": 3.46, "learning_rate": 4.8271562551091694e-05, "loss": 2.5291, "step": 698000 }, { "epoch": 3.46, "learning_rate": 4.827032396466561e-05, "loss": 2.5557, "step": 698500 }, { "epoch": 3.46, "learning_rate": 4.826908537823953e-05, "loss": 2.5559, "step": 699000 }, { "epoch": 3.47, "learning_rate": 4.8267846791813444e-05, "loss": 2.5445, "step": 699500 }, { "epoch": 3.47, "learning_rate": 4.8266610682560206e-05, "loss": 2.5345, "step": 700000 }, { "epoch": 3.47, "learning_rate": 4.8265374573306975e-05, "loss": 2.563, "step": 700500 }, { "epoch": 3.47, "learning_rate": 4.826413598688089e-05, "loss": 2.5561, "step": 701000 }, { "epoch": 3.48, "learning_rate": 4.826289740045481e-05, "loss": 2.5687, "step": 701500 }, { "epoch": 3.48, "learning_rate": 4.8261658814028726e-05, "loss": 2.544, "step": 702000 }, { "epoch": 3.48, "learning_rate": 4.826042022760264e-05, "loss": 2.5362, "step": 702500 }, { "epoch": 3.48, "learning_rate": 4.825918164117656e-05, "loss": 2.5309, "step": 703000 }, { "epoch": 3.49, "learning_rate": 4.825794305475048e-05, "loss": 2.5421, "step": 703500 }, { "epoch": 3.49, "learning_rate": 4.8256704468324394e-05, "loss": 2.5295, "step": 704000 }, { "epoch": 3.49, "learning_rate": 4.825546588189831e-05, "loss": 2.5354, "step": 704500 }, { "epoch": 3.49, "learning_rate": 4.825422977264508e-05, "loss": 2.5441, "step": 705000 }, { "epoch": 3.5, "learning_rate": 4.8252991186218996e-05, "loss": 2.5388, "step": 705500 }, { "epoch": 3.5, "learning_rate": 4.8251752599792907e-05, "loss": 2.5489, "step": 706000 }, { "epoch": 3.5, "learning_rate": 4.8250514013366824e-05, "loss": 2.5435, "step": 706500 }, { "epoch": 3.5, "learning_rate": 4.824927790411359e-05, "loss": 2.5567, "step": 707000 }, { "epoch": 3.51, "learning_rate": 4.824803931768751e-05, "loss": 2.5394, "step": 707500 }, { "epoch": 3.51, "learning_rate": 4.8246800731261426e-05, "loss": 2.5111, "step": 708000 }, { "epoch": 3.51, "learning_rate": 4.824556214483534e-05, "loss": 2.5513, "step": 708500 }, { "epoch": 3.51, "learning_rate": 4.824432355840926e-05, "loss": 2.5387, "step": 709000 }, { "epoch": 3.52, "learning_rate": 4.824308497198318e-05, "loss": 2.5602, "step": 709500 }, { "epoch": 3.52, "learning_rate": 4.8241846385557094e-05, "loss": 2.549, "step": 710000 }, { "epoch": 3.52, "learning_rate": 4.824060779913101e-05, "loss": 2.5252, "step": 710500 }, { "epoch": 3.52, "learning_rate": 4.823937416705063e-05, "loss": 2.5328, "step": 711000 }, { "epoch": 3.53, "learning_rate": 4.82381380577974e-05, "loss": 2.522, "step": 711500 }, { "epoch": 3.53, "learning_rate": 4.823689947137132e-05, "loss": 2.5387, "step": 712000 }, { "epoch": 3.53, "learning_rate": 4.8235660884945234e-05, "loss": 2.5568, "step": 712500 }, { "epoch": 3.53, "learning_rate": 4.823442229851915e-05, "loss": 2.5318, "step": 713000 }, { "epoch": 3.53, "learning_rate": 4.823318371209307e-05, "loss": 2.5472, "step": 713500 }, { "epoch": 3.54, "learning_rate": 4.8231945125666985e-05, "loss": 2.5452, "step": 714000 }, { "epoch": 3.54, "learning_rate": 4.823070901641375e-05, "loss": 2.5537, "step": 714500 }, { "epoch": 3.54, "learning_rate": 4.8229470429987664e-05, "loss": 2.5685, "step": 715000 }, { "epoch": 3.54, "learning_rate": 4.822823184356158e-05, "loss": 2.5505, "step": 715500 }, { "epoch": 3.55, "learning_rate": 4.82269932571355e-05, "loss": 2.5497, "step": 716000 }, { "epoch": 3.55, "learning_rate": 4.8225754670709415e-05, "loss": 2.529, "step": 716500 }, { "epoch": 3.55, "learning_rate": 4.822451608428333e-05, "loss": 2.5528, "step": 717000 }, { "epoch": 3.55, "learning_rate": 4.822327749785725e-05, "loss": 2.5036, "step": 717500 }, { "epoch": 3.56, "learning_rate": 4.8222038911431166e-05, "loss": 2.5104, "step": 718000 }, { "epoch": 3.56, "learning_rate": 4.822080032500508e-05, "loss": 2.5557, "step": 718500 }, { "epoch": 3.56, "learning_rate": 4.821956173857899e-05, "loss": 2.5172, "step": 719000 }, { "epoch": 3.56, "learning_rate": 4.821832562932577e-05, "loss": 2.5314, "step": 719500 }, { "epoch": 3.57, "learning_rate": 4.821708952007254e-05, "loss": 2.5631, "step": 720000 }, { "epoch": 3.57, "learning_rate": 4.821585093364645e-05, "loss": 2.5495, "step": 720500 }, { "epoch": 3.57, "learning_rate": 4.8214612347220364e-05, "loss": 2.5225, "step": 721000 }, { "epoch": 3.57, "learning_rate": 4.821337376079428e-05, "loss": 2.5358, "step": 721500 }, { "epoch": 3.58, "learning_rate": 4.82121351743682e-05, "loss": 2.5573, "step": 722000 }, { "epoch": 3.58, "learning_rate": 4.8210896587942115e-05, "loss": 2.5512, "step": 722500 }, { "epoch": 3.58, "learning_rate": 4.820965800151603e-05, "loss": 2.5373, "step": 723000 }, { "epoch": 3.58, "learning_rate": 4.820841941508995e-05, "loss": 2.5567, "step": 723500 }, { "epoch": 3.59, "learning_rate": 4.8207180828663866e-05, "loss": 2.5367, "step": 724000 }, { "epoch": 3.59, "learning_rate": 4.820594224223778e-05, "loss": 2.558, "step": 724500 }, { "epoch": 3.59, "learning_rate": 4.820470613298455e-05, "loss": 2.5263, "step": 725000 }, { "epoch": 3.59, "learning_rate": 4.820346754655847e-05, "loss": 2.5248, "step": 725500 }, { "epoch": 3.6, "learning_rate": 4.8202228960132385e-05, "loss": 2.5235, "step": 726000 }, { "epoch": 3.6, "learning_rate": 4.82009903737063e-05, "loss": 2.5575, "step": 726500 }, { "epoch": 3.6, "learning_rate": 4.819975178728022e-05, "loss": 2.5402, "step": 727000 }, { "epoch": 3.6, "learning_rate": 4.8198513200854136e-05, "loss": 2.5331, "step": 727500 }, { "epoch": 3.61, "learning_rate": 4.8197274614428046e-05, "loss": 2.5494, "step": 728000 }, { "epoch": 3.61, "learning_rate": 4.819603602800196e-05, "loss": 2.5456, "step": 728500 }, { "epoch": 3.61, "learning_rate": 4.819479991874873e-05, "loss": 2.5465, "step": 729000 }, { "epoch": 3.61, "learning_rate": 4.819356133232265e-05, "loss": 2.5497, "step": 729500 }, { "epoch": 3.62, "learning_rate": 4.8192322745896566e-05, "loss": 2.5535, "step": 730000 }, { "epoch": 3.62, "learning_rate": 4.819108415947048e-05, "loss": 2.5379, "step": 730500 }, { "epoch": 3.62, "learning_rate": 4.81898455730444e-05, "loss": 2.5503, "step": 731000 }, { "epoch": 3.62, "learning_rate": 4.818860698661831e-05, "loss": 2.5508, "step": 731500 }, { "epoch": 3.63, "learning_rate": 4.818736840019223e-05, "loss": 2.5412, "step": 732000 }, { "epoch": 3.63, "learning_rate": 4.8186129813766144e-05, "loss": 2.5646, "step": 732500 }, { "epoch": 3.63, "learning_rate": 4.818489370451292e-05, "loss": 2.554, "step": 733000 }, { "epoch": 3.63, "learning_rate": 4.8183655118086836e-05, "loss": 2.5507, "step": 733500 }, { "epoch": 3.64, "learning_rate": 4.818241653166075e-05, "loss": 2.5684, "step": 734000 }, { "epoch": 3.64, "learning_rate": 4.818117794523466e-05, "loss": 2.5564, "step": 734500 }, { "epoch": 3.64, "learning_rate": 4.817993935880858e-05, "loss": 2.5357, "step": 735000 }, { "epoch": 3.64, "learning_rate": 4.81787057267282e-05, "loss": 2.5358, "step": 735500 }, { "epoch": 3.65, "learning_rate": 4.817746961747497e-05, "loss": 2.5506, "step": 736000 }, { "epoch": 3.65, "learning_rate": 4.8176231031048887e-05, "loss": 2.5443, "step": 736500 }, { "epoch": 3.65, "learning_rate": 4.8174992444622804e-05, "loss": 2.5271, "step": 737000 }, { "epoch": 3.65, "learning_rate": 4.817375633536957e-05, "loss": 2.5738, "step": 737500 }, { "epoch": 3.66, "learning_rate": 4.817251774894348e-05, "loss": 2.5456, "step": 738000 }, { "epoch": 3.66, "learning_rate": 4.81712791625174e-05, "loss": 2.5431, "step": 738500 }, { "epoch": 3.66, "learning_rate": 4.8170040576091316e-05, "loss": 2.5176, "step": 739000 }, { "epoch": 3.66, "learning_rate": 4.816880198966523e-05, "loss": 2.5471, "step": 739500 }, { "epoch": 3.67, "learning_rate": 4.816756340323915e-05, "loss": 2.5422, "step": 740000 }, { "epoch": 3.67, "learning_rate": 4.816632481681307e-05, "loss": 2.5587, "step": 740500 }, { "epoch": 3.67, "learning_rate": 4.8165086230386984e-05, "loss": 2.5371, "step": 741000 }, { "epoch": 3.67, "learning_rate": 4.81638476439609e-05, "loss": 2.5467, "step": 741500 }, { "epoch": 3.68, "learning_rate": 4.816260905753482e-05, "loss": 2.5384, "step": 742000 }, { "epoch": 3.68, "learning_rate": 4.8161370471108735e-05, "loss": 2.5456, "step": 742500 }, { "epoch": 3.68, "learning_rate": 4.816013188468265e-05, "loss": 2.5356, "step": 743000 }, { "epoch": 3.68, "learning_rate": 4.815889329825657e-05, "loss": 2.5309, "step": 743500 }, { "epoch": 3.69, "learning_rate": 4.815765718900334e-05, "loss": 2.5272, "step": 744000 }, { "epoch": 3.69, "learning_rate": 4.8156418602577254e-05, "loss": 2.5665, "step": 744500 }, { "epoch": 3.69, "learning_rate": 4.8155182493324016e-05, "loss": 2.5413, "step": 745000 }, { "epoch": 3.69, "learning_rate": 4.815394390689793e-05, "loss": 2.568, "step": 745500 }, { "epoch": 3.7, "learning_rate": 4.815270532047185e-05, "loss": 2.5461, "step": 746000 }, { "epoch": 3.7, "learning_rate": 4.815146673404577e-05, "loss": 2.5391, "step": 746500 }, { "epoch": 3.7, "learning_rate": 4.8150228147619684e-05, "loss": 2.5244, "step": 747000 }, { "epoch": 3.7, "learning_rate": 4.81489895611936e-05, "loss": 2.544, "step": 747500 }, { "epoch": 3.71, "learning_rate": 4.814775097476752e-05, "loss": 2.5462, "step": 748000 }, { "epoch": 3.71, "learning_rate": 4.8146512388341435e-05, "loss": 2.5465, "step": 748500 }, { "epoch": 3.71, "learning_rate": 4.814527380191535e-05, "loss": 2.5428, "step": 749000 }, { "epoch": 3.71, "learning_rate": 4.814403769266212e-05, "loss": 2.555, "step": 749500 }, { "epoch": 3.72, "learning_rate": 4.814280158340889e-05, "loss": 2.563, "step": 750000 }, { "epoch": 3.72, "learning_rate": 4.8141562996982806e-05, "loss": 2.5506, "step": 750500 }, { "epoch": 3.72, "learning_rate": 4.8140324410556717e-05, "loss": 2.5451, "step": 751000 }, { "epoch": 3.72, "learning_rate": 4.8139085824130633e-05, "loss": 2.544, "step": 751500 }, { "epoch": 3.73, "learning_rate": 4.813784971487741e-05, "loss": 2.5365, "step": 752000 }, { "epoch": 3.73, "learning_rate": 4.8136611128451326e-05, "loss": 2.5606, "step": 752500 }, { "epoch": 3.73, "learning_rate": 4.813537254202524e-05, "loss": 2.5512, "step": 753000 }, { "epoch": 3.73, "learning_rate": 4.813413395559916e-05, "loss": 2.5219, "step": 753500 }, { "epoch": 3.74, "learning_rate": 4.813289784634593e-05, "loss": 2.5419, "step": 754000 }, { "epoch": 3.74, "learning_rate": 4.813166173709269e-05, "loss": 2.5459, "step": 754500 }, { "epoch": 3.74, "learning_rate": 4.813042315066661e-05, "loss": 2.5357, "step": 755000 }, { "epoch": 3.74, "learning_rate": 4.8129184564240525e-05, "loss": 2.5289, "step": 755500 }, { "epoch": 3.75, "learning_rate": 4.812794597781444e-05, "loss": 2.5534, "step": 756000 }, { "epoch": 3.75, "learning_rate": 4.812670739138836e-05, "loss": 2.5353, "step": 756500 }, { "epoch": 3.75, "learning_rate": 4.8125468804962275e-05, "loss": 2.5421, "step": 757000 }, { "epoch": 3.75, "learning_rate": 4.8124232695709044e-05, "loss": 2.5251, "step": 757500 }, { "epoch": 3.76, "learning_rate": 4.812299410928296e-05, "loss": 2.5294, "step": 758000 }, { "epoch": 3.76, "learning_rate": 4.812175552285688e-05, "loss": 2.5252, "step": 758500 }, { "epoch": 3.76, "learning_rate": 4.8120516936430795e-05, "loss": 2.5331, "step": 759000 }, { "epoch": 3.76, "learning_rate": 4.811927835000471e-05, "loss": 2.5705, "step": 759500 }, { "epoch": 3.77, "learning_rate": 4.811803976357863e-05, "loss": 2.5623, "step": 760000 }, { "epoch": 3.77, "learning_rate": 4.8116801177152546e-05, "loss": 2.5405, "step": 760500 }, { "epoch": 3.77, "learning_rate": 4.811556259072646e-05, "loss": 2.525, "step": 761000 }, { "epoch": 3.77, "learning_rate": 4.811432400430038e-05, "loss": 2.5315, "step": 761500 }, { "epoch": 3.78, "learning_rate": 4.811308541787429e-05, "loss": 2.5486, "step": 762000 }, { "epoch": 3.78, "learning_rate": 4.811184683144821e-05, "loss": 2.549, "step": 762500 }, { "epoch": 3.78, "learning_rate": 4.8110608245022124e-05, "loss": 2.5359, "step": 763000 }, { "epoch": 3.78, "learning_rate": 4.810936965859604e-05, "loss": 2.5533, "step": 763500 }, { "epoch": 3.79, "learning_rate": 4.810813354934281e-05, "loss": 2.5451, "step": 764000 }, { "epoch": 3.79, "learning_rate": 4.8106894962916726e-05, "loss": 2.5645, "step": 764500 }, { "epoch": 3.79, "learning_rate": 4.810565637649064e-05, "loss": 2.5328, "step": 765000 }, { "epoch": 3.79, "learning_rate": 4.810441779006456e-05, "loss": 2.538, "step": 765500 }, { "epoch": 3.8, "learning_rate": 4.810318168081133e-05, "loss": 2.5277, "step": 766000 }, { "epoch": 3.8, "learning_rate": 4.8101943094385246e-05, "loss": 2.5509, "step": 766500 }, { "epoch": 3.8, "learning_rate": 4.810070450795916e-05, "loss": 2.558, "step": 767000 }, { "epoch": 3.8, "learning_rate": 4.809946592153308e-05, "loss": 2.5482, "step": 767500 }, { "epoch": 3.8, "learning_rate": 4.8098227335107e-05, "loss": 2.5564, "step": 768000 }, { "epoch": 3.81, "learning_rate": 4.809699122585376e-05, "loss": 2.5368, "step": 768500 }, { "epoch": 3.81, "learning_rate": 4.809575511660053e-05, "loss": 2.5808, "step": 769000 }, { "epoch": 3.81, "learning_rate": 4.8094516530174444e-05, "loss": 2.5585, "step": 769500 }, { "epoch": 3.81, "learning_rate": 4.809327794374836e-05, "loss": 2.5754, "step": 770000 }, { "epoch": 3.82, "learning_rate": 4.809203935732228e-05, "loss": 2.5389, "step": 770500 }, { "epoch": 3.82, "learning_rate": 4.8090800770896195e-05, "loss": 2.5298, "step": 771000 }, { "epoch": 3.82, "learning_rate": 4.808956218447011e-05, "loss": 2.5495, "step": 771500 }, { "epoch": 3.82, "learning_rate": 4.808832359804403e-05, "loss": 2.5579, "step": 772000 }, { "epoch": 3.83, "learning_rate": 4.808708748879079e-05, "loss": 2.5362, "step": 772500 }, { "epoch": 3.83, "learning_rate": 4.808584890236471e-05, "loss": 2.5724, "step": 773000 }, { "epoch": 3.83, "learning_rate": 4.8084610315938625e-05, "loss": 2.5317, "step": 773500 }, { "epoch": 3.83, "learning_rate": 4.808337172951254e-05, "loss": 2.5207, "step": 774000 }, { "epoch": 3.84, "learning_rate": 4.808213314308646e-05, "loss": 2.5329, "step": 774500 }, { "epoch": 3.84, "learning_rate": 4.8080894556660376e-05, "loss": 2.5449, "step": 775000 }, { "epoch": 3.84, "learning_rate": 4.807965597023429e-05, "loss": 2.5359, "step": 775500 }, { "epoch": 3.84, "learning_rate": 4.807841986098106e-05, "loss": 2.5733, "step": 776000 }, { "epoch": 3.85, "learning_rate": 4.807718375172783e-05, "loss": 2.543, "step": 776500 }, { "epoch": 3.85, "learning_rate": 4.807594516530175e-05, "loss": 2.5573, "step": 777000 }, { "epoch": 3.85, "learning_rate": 4.8074706578875664e-05, "loss": 2.5554, "step": 777500 }, { "epoch": 3.85, "learning_rate": 4.807346799244958e-05, "loss": 2.5289, "step": 778000 }, { "epoch": 3.86, "learning_rate": 4.807222940602349e-05, "loss": 2.5436, "step": 778500 }, { "epoch": 3.86, "learning_rate": 4.807099081959741e-05, "loss": 2.5695, "step": 779000 }, { "epoch": 3.86, "learning_rate": 4.8069752233171325e-05, "loss": 2.5339, "step": 779500 }, { "epoch": 3.86, "learning_rate": 4.806851364674524e-05, "loss": 2.5585, "step": 780000 }, { "epoch": 3.87, "learning_rate": 4.806727506031916e-05, "loss": 2.5488, "step": 780500 }, { "epoch": 3.87, "learning_rate": 4.8066036473893076e-05, "loss": 2.55, "step": 781000 }, { "epoch": 3.87, "learning_rate": 4.806479788746699e-05, "loss": 2.524, "step": 781500 }, { "epoch": 3.87, "learning_rate": 4.806355930104091e-05, "loss": 2.5214, "step": 782000 }, { "epoch": 3.88, "learning_rate": 4.806232071461483e-05, "loss": 2.5534, "step": 782500 }, { "epoch": 3.88, "learning_rate": 4.8061082128188744e-05, "loss": 2.5238, "step": 783000 }, { "epoch": 3.88, "learning_rate": 4.805984354176266e-05, "loss": 2.5444, "step": 783500 }, { "epoch": 3.88, "learning_rate": 4.805860495533658e-05, "loss": 2.5237, "step": 784000 }, { "epoch": 3.89, "learning_rate": 4.8057366368910494e-05, "loss": 2.5258, "step": 784500 }, { "epoch": 3.89, "learning_rate": 4.8056127782484404e-05, "loss": 2.529, "step": 785000 }, { "epoch": 3.89, "learning_rate": 4.805488919605832e-05, "loss": 2.5469, "step": 785500 }, { "epoch": 3.89, "learning_rate": 4.805365060963224e-05, "loss": 2.5389, "step": 786000 }, { "epoch": 3.9, "learning_rate": 4.8052414500379014e-05, "loss": 2.5637, "step": 786500 }, { "epoch": 3.9, "learning_rate": 4.805117591395293e-05, "loss": 2.5438, "step": 787000 }, { "epoch": 3.9, "learning_rate": 4.804993732752685e-05, "loss": 2.5465, "step": 787500 }, { "epoch": 3.9, "learning_rate": 4.804869874110076e-05, "loss": 2.5289, "step": 788000 }, { "epoch": 3.91, "learning_rate": 4.804746263184753e-05, "loss": 2.5684, "step": 788500 }, { "epoch": 3.91, "learning_rate": 4.8046226522594296e-05, "loss": 2.5539, "step": 789000 }, { "epoch": 3.91, "learning_rate": 4.804498793616821e-05, "loss": 2.5358, "step": 789500 }, { "epoch": 3.91, "learning_rate": 4.804374934974213e-05, "loss": 2.5412, "step": 790000 }, { "epoch": 3.92, "learning_rate": 4.8042510763316046e-05, "loss": 2.5593, "step": 790500 }, { "epoch": 3.92, "learning_rate": 4.804127217688996e-05, "loss": 2.5245, "step": 791000 }, { "epoch": 3.92, "learning_rate": 4.804003359046388e-05, "loss": 2.5548, "step": 791500 }, { "epoch": 3.92, "learning_rate": 4.80387950040378e-05, "loss": 2.5387, "step": 792000 }, { "epoch": 3.93, "learning_rate": 4.8037556417611714e-05, "loss": 2.529, "step": 792500 }, { "epoch": 3.93, "learning_rate": 4.8036320308358476e-05, "loss": 2.5248, "step": 793000 }, { "epoch": 3.93, "learning_rate": 4.803508172193239e-05, "loss": 2.5407, "step": 793500 }, { "epoch": 3.93, "learning_rate": 4.803384313550631e-05, "loss": 2.5353, "step": 794000 }, { "epoch": 3.94, "learning_rate": 4.803260454908023e-05, "loss": 2.5249, "step": 794500 }, { "epoch": 3.94, "learning_rate": 4.8031368439826996e-05, "loss": 2.5362, "step": 795000 }, { "epoch": 3.94, "learning_rate": 4.803012985340091e-05, "loss": 2.5309, "step": 795500 }, { "epoch": 3.94, "learning_rate": 4.802889126697483e-05, "loss": 2.5317, "step": 796000 }, { "epoch": 3.95, "learning_rate": 4.8027652680548746e-05, "loss": 2.5299, "step": 796500 }, { "epoch": 3.95, "learning_rate": 4.8026416571295515e-05, "loss": 2.5359, "step": 797000 }, { "epoch": 3.95, "learning_rate": 4.8025182939215136e-05, "loss": 2.5481, "step": 797500 }, { "epoch": 3.95, "learning_rate": 4.802394435278905e-05, "loss": 2.5442, "step": 798000 }, { "epoch": 3.96, "learning_rate": 4.802270576636297e-05, "loss": 2.5507, "step": 798500 }, { "epoch": 3.96, "learning_rate": 4.802146717993689e-05, "loss": 2.5488, "step": 799000 }, { "epoch": 3.96, "learning_rate": 4.8020228593510804e-05, "loss": 2.5537, "step": 799500 }, { "epoch": 3.96, "learning_rate": 4.801899000708472e-05, "loss": 2.5368, "step": 800000 }, { "epoch": 3.97, "learning_rate": 4.801775142065864e-05, "loss": 2.5376, "step": 800500 }, { "epoch": 3.97, "learning_rate": 4.8016512834232554e-05, "loss": 2.5168, "step": 801000 }, { "epoch": 3.97, "learning_rate": 4.8015276724979317e-05, "loss": 2.5168, "step": 801500 }, { "epoch": 3.97, "learning_rate": 4.8014038138553233e-05, "loss": 2.5698, "step": 802000 }, { "epoch": 3.98, "learning_rate": 4.801279955212715e-05, "loss": 2.5456, "step": 802500 }, { "epoch": 3.98, "learning_rate": 4.801156096570107e-05, "loss": 2.5533, "step": 803000 }, { "epoch": 3.98, "learning_rate": 4.8010322379274984e-05, "loss": 2.5442, "step": 803500 }, { "epoch": 3.98, "learning_rate": 4.80090837928489e-05, "loss": 2.5533, "step": 804000 }, { "epoch": 3.99, "learning_rate": 4.800784768359567e-05, "loss": 2.5392, "step": 804500 }, { "epoch": 3.99, "learning_rate": 4.800661157434244e-05, "loss": 2.5661, "step": 805000 }, { "epoch": 3.99, "learning_rate": 4.8005372987916356e-05, "loss": 2.5666, "step": 805500 }, { "epoch": 3.99, "learning_rate": 4.800413440149027e-05, "loss": 2.5285, "step": 806000 }, { "epoch": 4.0, "learning_rate": 4.800289581506418e-05, "loss": 2.5249, "step": 806500 }, { "epoch": 4.0, "learning_rate": 4.800165970581095e-05, "loss": 2.5377, "step": 807000 }, { "epoch": 4.0, "eval_accuracy": 0.6340747980022936, "eval_accuracy_mlm": 0.5867373422775914, "eval_accuracy_nsp": 0.8572672468906766, "eval_loss": 2.4791698455810547, "eval_runtime": 145.9611, "eval_samples_per_second": 1746.76, "eval_steps_per_second": 72.787, "step": 807372 }, { "epoch": 4.0, "learning_rate": 4.800042111938487e-05, "loss": 2.5294, "step": 807500 }, { "epoch": 4.0, "learning_rate": 4.7999182532958785e-05, "loss": 2.4976, "step": 808000 }, { "epoch": 4.01, "learning_rate": 4.79979439465327e-05, "loss": 2.5014, "step": 808500 }, { "epoch": 4.01, "learning_rate": 4.799670536010662e-05, "loss": 2.5063, "step": 809000 }, { "epoch": 4.01, "learning_rate": 4.7995466773680536e-05, "loss": 2.5132, "step": 809500 }, { "epoch": 4.01, "learning_rate": 4.799422818725445e-05, "loss": 2.5018, "step": 810000 }, { "epoch": 4.02, "learning_rate": 4.799298960082837e-05, "loss": 2.5117, "step": 810500 }, { "epoch": 4.02, "learning_rate": 4.799175101440229e-05, "loss": 2.5232, "step": 811000 }, { "epoch": 4.02, "learning_rate": 4.7990514905149056e-05, "loss": 2.5043, "step": 811500 }, { "epoch": 4.02, "learning_rate": 4.798927631872297e-05, "loss": 2.5114, "step": 812000 }, { "epoch": 4.03, "learning_rate": 4.798803773229689e-05, "loss": 2.5253, "step": 812500 }, { "epoch": 4.03, "learning_rate": 4.7986799145870807e-05, "loss": 2.5181, "step": 813000 }, { "epoch": 4.03, "learning_rate": 4.798556055944472e-05, "loss": 2.5291, "step": 813500 }, { "epoch": 4.03, "learning_rate": 4.7984321973018634e-05, "loss": 2.499, "step": 814000 }, { "epoch": 4.04, "learning_rate": 4.798308338659255e-05, "loss": 2.5222, "step": 814500 }, { "epoch": 4.04, "learning_rate": 4.798184480016647e-05, "loss": 2.5236, "step": 815000 }, { "epoch": 4.04, "learning_rate": 4.7980606213740384e-05, "loss": 2.5314, "step": 815500 }, { "epoch": 4.04, "learning_rate": 4.797937010448715e-05, "loss": 2.5234, "step": 816000 }, { "epoch": 4.05, "learning_rate": 4.797813151806107e-05, "loss": 2.4951, "step": 816500 }, { "epoch": 4.05, "learning_rate": 4.797689293163499e-05, "loss": 2.529, "step": 817000 }, { "epoch": 4.05, "learning_rate": 4.7975654345208904e-05, "loss": 2.5055, "step": 817500 }, { "epoch": 4.05, "learning_rate": 4.797441575878282e-05, "loss": 2.4981, "step": 818000 }, { "epoch": 4.06, "learning_rate": 4.797317964952959e-05, "loss": 2.512, "step": 818500 }, { "epoch": 4.06, "learning_rate": 4.797194106310351e-05, "loss": 2.515, "step": 819000 }, { "epoch": 4.06, "learning_rate": 4.7970702476677424e-05, "loss": 2.4995, "step": 819500 }, { "epoch": 4.06, "learning_rate": 4.7969463890251334e-05, "loss": 2.5222, "step": 820000 }, { "epoch": 4.07, "learning_rate": 4.79682277809981e-05, "loss": 2.5096, "step": 820500 }, { "epoch": 4.07, "learning_rate": 4.796698919457202e-05, "loss": 2.5198, "step": 821000 }, { "epoch": 4.07, "learning_rate": 4.796575308531879e-05, "loss": 2.5011, "step": 821500 }, { "epoch": 4.07, "learning_rate": 4.7964514498892705e-05, "loss": 2.5134, "step": 822000 }, { "epoch": 4.07, "learning_rate": 4.796327591246662e-05, "loss": 2.486, "step": 822500 }, { "epoch": 4.08, "learning_rate": 4.796203732604054e-05, "loss": 2.5452, "step": 823000 }, { "epoch": 4.08, "learning_rate": 4.7960798739614456e-05, "loss": 2.5249, "step": 823500 }, { "epoch": 4.08, "learning_rate": 4.795956015318837e-05, "loss": 2.5277, "step": 824000 }, { "epoch": 4.08, "learning_rate": 4.795832156676229e-05, "loss": 2.517, "step": 824500 }, { "epoch": 4.09, "learning_rate": 4.795708298033621e-05, "loss": 2.5276, "step": 825000 }, { "epoch": 4.09, "learning_rate": 4.7955844393910124e-05, "loss": 2.5128, "step": 825500 }, { "epoch": 4.09, "learning_rate": 4.7954610761829744e-05, "loss": 2.5389, "step": 826000 }, { "epoch": 4.09, "learning_rate": 4.795337217540366e-05, "loss": 2.5206, "step": 826500 }, { "epoch": 4.1, "learning_rate": 4.795213358897757e-05, "loss": 2.5004, "step": 827000 }, { "epoch": 4.1, "learning_rate": 4.795089747972435e-05, "loss": 2.5165, "step": 827500 }, { "epoch": 4.1, "learning_rate": 4.794965889329826e-05, "loss": 2.5113, "step": 828000 }, { "epoch": 4.1, "learning_rate": 4.7948420306872174e-05, "loss": 2.4997, "step": 828500 }, { "epoch": 4.11, "learning_rate": 4.794718172044609e-05, "loss": 2.5323, "step": 829000 }, { "epoch": 4.11, "learning_rate": 4.794594313402001e-05, "loss": 2.5239, "step": 829500 }, { "epoch": 4.11, "learning_rate": 4.794470454759392e-05, "loss": 2.5154, "step": 830000 }, { "epoch": 4.11, "learning_rate": 4.7943465961167835e-05, "loss": 2.5174, "step": 830500 }, { "epoch": 4.12, "learning_rate": 4.794222737474175e-05, "loss": 2.5047, "step": 831000 }, { "epoch": 4.12, "learning_rate": 4.794098878831567e-05, "loss": 2.5309, "step": 831500 }, { "epoch": 4.12, "learning_rate": 4.7939750201889586e-05, "loss": 2.5058, "step": 832000 }, { "epoch": 4.12, "learning_rate": 4.793851409263636e-05, "loss": 2.5134, "step": 832500 }, { "epoch": 4.13, "learning_rate": 4.793727550621028e-05, "loss": 2.5142, "step": 833000 }, { "epoch": 4.13, "learning_rate": 4.793603691978419e-05, "loss": 2.5138, "step": 833500 }, { "epoch": 4.13, "learning_rate": 4.7934798333358106e-05, "loss": 2.5339, "step": 834000 }, { "epoch": 4.13, "learning_rate": 4.793355974693202e-05, "loss": 2.5123, "step": 834500 }, { "epoch": 4.14, "learning_rate": 4.793232363767879e-05, "loss": 2.5296, "step": 835000 }, { "epoch": 4.14, "learning_rate": 4.793108752842556e-05, "loss": 2.5339, "step": 835500 }, { "epoch": 4.14, "learning_rate": 4.792984894199948e-05, "loss": 2.538, "step": 836000 }, { "epoch": 4.14, "learning_rate": 4.7928610355573394e-05, "loss": 2.5208, "step": 836500 }, { "epoch": 4.15, "learning_rate": 4.792737176914731e-05, "loss": 2.4823, "step": 837000 }, { "epoch": 4.15, "learning_rate": 4.792613318272123e-05, "loss": 2.5293, "step": 837500 }, { "epoch": 4.15, "learning_rate": 4.7924894596295145e-05, "loss": 2.5136, "step": 838000 }, { "epoch": 4.15, "learning_rate": 4.792365600986906e-05, "loss": 2.497, "step": 838500 }, { "epoch": 4.16, "learning_rate": 4.792241742344298e-05, "loss": 2.5301, "step": 839000 }, { "epoch": 4.16, "learning_rate": 4.792117883701689e-05, "loss": 2.5154, "step": 839500 }, { "epoch": 4.16, "learning_rate": 4.7919940250590806e-05, "loss": 2.5086, "step": 840000 }, { "epoch": 4.16, "learning_rate": 4.791870166416472e-05, "loss": 2.506, "step": 840500 }, { "epoch": 4.17, "learning_rate": 4.791746307773864e-05, "loss": 2.5198, "step": 841000 }, { "epoch": 4.17, "learning_rate": 4.7916224491312556e-05, "loss": 2.5042, "step": 841500 }, { "epoch": 4.17, "learning_rate": 4.791499085923218e-05, "loss": 2.5122, "step": 842000 }, { "epoch": 4.17, "learning_rate": 4.7913752272806094e-05, "loss": 2.5176, "step": 842500 }, { "epoch": 4.18, "learning_rate": 4.791251368638001e-05, "loss": 2.5354, "step": 843000 }, { "epoch": 4.18, "learning_rate": 4.791127509995393e-05, "loss": 2.515, "step": 843500 }, { "epoch": 4.18, "learning_rate": 4.7910036513527845e-05, "loss": 2.5099, "step": 844000 }, { "epoch": 4.18, "learning_rate": 4.790879792710176e-05, "loss": 2.5267, "step": 844500 }, { "epoch": 4.19, "learning_rate": 4.790755934067568e-05, "loss": 2.5115, "step": 845000 }, { "epoch": 4.19, "learning_rate": 4.7906320754249596e-05, "loss": 2.5051, "step": 845500 }, { "epoch": 4.19, "learning_rate": 4.7905082167823506e-05, "loss": 2.5294, "step": 846000 }, { "epoch": 4.19, "learning_rate": 4.790384605857028e-05, "loss": 2.5381, "step": 846500 }, { "epoch": 4.2, "learning_rate": 4.79026074721442e-05, "loss": 2.509, "step": 847000 }, { "epoch": 4.2, "learning_rate": 4.7901368885718115e-05, "loss": 2.5301, "step": 847500 }, { "epoch": 4.2, "learning_rate": 4.7900130299292025e-05, "loss": 2.5011, "step": 848000 }, { "epoch": 4.2, "learning_rate": 4.789889171286594e-05, "loss": 2.5179, "step": 848500 }, { "epoch": 4.21, "learning_rate": 4.789765312643986e-05, "loss": 2.5437, "step": 849000 }, { "epoch": 4.21, "learning_rate": 4.789641701718663e-05, "loss": 2.5317, "step": 849500 }, { "epoch": 4.21, "learning_rate": 4.78951809079334e-05, "loss": 2.5221, "step": 850000 }, { "epoch": 4.21, "learning_rate": 4.7893942321507314e-05, "loss": 2.5152, "step": 850500 }, { "epoch": 4.22, "learning_rate": 4.789270621225408e-05, "loss": 2.5534, "step": 851000 }, { "epoch": 4.22, "learning_rate": 4.789146762582799e-05, "loss": 2.5463, "step": 851500 }, { "epoch": 4.22, "learning_rate": 4.789022903940191e-05, "loss": 2.528, "step": 852000 }, { "epoch": 4.22, "learning_rate": 4.7888990452975827e-05, "loss": 2.5066, "step": 852500 }, { "epoch": 4.23, "learning_rate": 4.7887751866549744e-05, "loss": 2.5129, "step": 853000 }, { "epoch": 4.23, "learning_rate": 4.788651328012366e-05, "loss": 2.5264, "step": 853500 }, { "epoch": 4.23, "learning_rate": 4.788527469369758e-05, "loss": 2.5151, "step": 854000 }, { "epoch": 4.23, "learning_rate": 4.7884036107271494e-05, "loss": 2.5214, "step": 854500 }, { "epoch": 4.24, "learning_rate": 4.788279752084541e-05, "loss": 2.5097, "step": 855000 }, { "epoch": 4.24, "learning_rate": 4.788155893441933e-05, "loss": 2.5109, "step": 855500 }, { "epoch": 4.24, "learning_rate": 4.7880320347993245e-05, "loss": 2.522, "step": 856000 }, { "epoch": 4.24, "learning_rate": 4.7879084238740014e-05, "loss": 2.5233, "step": 856500 }, { "epoch": 4.25, "learning_rate": 4.787784565231393e-05, "loss": 2.5345, "step": 857000 }, { "epoch": 4.25, "learning_rate": 4.787660706588785e-05, "loss": 2.5216, "step": 857500 }, { "epoch": 4.25, "learning_rate": 4.7875368479461765e-05, "loss": 2.565, "step": 858000 }, { "epoch": 4.25, "learning_rate": 4.787412989303568e-05, "loss": 2.5278, "step": 858500 }, { "epoch": 4.26, "learning_rate": 4.7872893783782444e-05, "loss": 2.5257, "step": 859000 }, { "epoch": 4.26, "learning_rate": 4.787165519735636e-05, "loss": 2.5161, "step": 859500 }, { "epoch": 4.26, "learning_rate": 4.787041661093028e-05, "loss": 2.5183, "step": 860000 }, { "epoch": 4.26, "learning_rate": 4.7869178024504194e-05, "loss": 2.52, "step": 860500 }, { "epoch": 4.27, "learning_rate": 4.786793943807811e-05, "loss": 2.5174, "step": 861000 }, { "epoch": 4.27, "learning_rate": 4.786670085165203e-05, "loss": 2.5502, "step": 861500 }, { "epoch": 4.27, "learning_rate": 4.78654647423988e-05, "loss": 2.5172, "step": 862000 }, { "epoch": 4.27, "learning_rate": 4.7864226155972714e-05, "loss": 2.5307, "step": 862500 }, { "epoch": 4.28, "learning_rate": 4.786298756954663e-05, "loss": 2.5203, "step": 863000 }, { "epoch": 4.28, "learning_rate": 4.786174898312055e-05, "loss": 2.528, "step": 863500 }, { "epoch": 4.28, "learning_rate": 4.7860510396694465e-05, "loss": 2.5293, "step": 864000 }, { "epoch": 4.28, "learning_rate": 4.785927181026838e-05, "loss": 2.5237, "step": 864500 }, { "epoch": 4.29, "learning_rate": 4.78580332238423e-05, "loss": 2.5579, "step": 865000 }, { "epoch": 4.29, "learning_rate": 4.7856794637416216e-05, "loss": 2.5067, "step": 865500 }, { "epoch": 4.29, "learning_rate": 4.785555605099013e-05, "loss": 2.5152, "step": 866000 }, { "epoch": 4.29, "learning_rate": 4.785431746456405e-05, "loss": 2.5302, "step": 866500 }, { "epoch": 4.3, "learning_rate": 4.785308135531081e-05, "loss": 2.5225, "step": 867000 }, { "epoch": 4.3, "learning_rate": 4.785184524605758e-05, "loss": 2.507, "step": 867500 }, { "epoch": 4.3, "learning_rate": 4.78506066596315e-05, "loss": 2.5427, "step": 868000 }, { "epoch": 4.3, "learning_rate": 4.7849370550378266e-05, "loss": 2.508, "step": 868500 }, { "epoch": 4.31, "learning_rate": 4.784813196395218e-05, "loss": 2.5365, "step": 869000 }, { "epoch": 4.31, "learning_rate": 4.78468933775261e-05, "loss": 2.5121, "step": 869500 }, { "epoch": 4.31, "learning_rate": 4.784565479110002e-05, "loss": 2.519, "step": 870000 }, { "epoch": 4.31, "learning_rate": 4.7844416204673934e-05, "loss": 2.5295, "step": 870500 }, { "epoch": 4.32, "learning_rate": 4.784317761824785e-05, "loss": 2.5398, "step": 871000 }, { "epoch": 4.32, "learning_rate": 4.784193903182176e-05, "loss": 2.5191, "step": 871500 }, { "epoch": 4.32, "learning_rate": 4.784070044539568e-05, "loss": 2.523, "step": 872000 }, { "epoch": 4.32, "learning_rate": 4.7839461858969595e-05, "loss": 2.5589, "step": 872500 }, { "epoch": 4.33, "learning_rate": 4.783822327254351e-05, "loss": 2.5345, "step": 873000 }, { "epoch": 4.33, "learning_rate": 4.783698468611743e-05, "loss": 2.5171, "step": 873500 }, { "epoch": 4.33, "learning_rate": 4.7835746099691345e-05, "loss": 2.5048, "step": 874000 }, { "epoch": 4.33, "learning_rate": 4.783450751326526e-05, "loss": 2.5152, "step": 874500 }, { "epoch": 4.34, "learning_rate": 4.783327140401203e-05, "loss": 2.5189, "step": 875000 }, { "epoch": 4.34, "learning_rate": 4.783203281758595e-05, "loss": 2.506, "step": 875500 }, { "epoch": 4.34, "learning_rate": 4.7830794231159865e-05, "loss": 2.5281, "step": 876000 }, { "epoch": 4.34, "learning_rate": 4.782955564473378e-05, "loss": 2.5282, "step": 876500 }, { "epoch": 4.34, "learning_rate": 4.782831953548055e-05, "loss": 2.4956, "step": 877000 }, { "epoch": 4.35, "learning_rate": 4.782708094905447e-05, "loss": 2.5356, "step": 877500 }, { "epoch": 4.35, "learning_rate": 4.7825842362628385e-05, "loss": 2.5127, "step": 878000 }, { "epoch": 4.35, "learning_rate": 4.7824603776202295e-05, "loss": 2.536, "step": 878500 }, { "epoch": 4.35, "learning_rate": 4.782336518977621e-05, "loss": 2.5432, "step": 879000 }, { "epoch": 4.36, "learning_rate": 4.782212660335013e-05, "loss": 2.5164, "step": 879500 }, { "epoch": 4.36, "learning_rate": 4.78208904940969e-05, "loss": 2.5096, "step": 880000 }, { "epoch": 4.36, "learning_rate": 4.7819651907670814e-05, "loss": 2.5279, "step": 880500 }, { "epoch": 4.36, "learning_rate": 4.781841332124473e-05, "loss": 2.5284, "step": 881000 }, { "epoch": 4.37, "learning_rate": 4.781717473481865e-05, "loss": 2.496, "step": 881500 }, { "epoch": 4.37, "learning_rate": 4.7815936148392565e-05, "loss": 2.5169, "step": 882000 }, { "epoch": 4.37, "learning_rate": 4.781469756196648e-05, "loss": 2.5015, "step": 882500 }, { "epoch": 4.37, "learning_rate": 4.781346145271325e-05, "loss": 2.5389, "step": 883000 }, { "epoch": 4.38, "learning_rate": 4.781222286628717e-05, "loss": 2.5235, "step": 883500 }, { "epoch": 4.38, "learning_rate": 4.7810984279861085e-05, "loss": 2.5245, "step": 884000 }, { "epoch": 4.38, "learning_rate": 4.780974817060785e-05, "loss": 2.5214, "step": 884500 }, { "epoch": 4.38, "learning_rate": 4.780851206135462e-05, "loss": 2.5422, "step": 885000 }, { "epoch": 4.39, "learning_rate": 4.780727347492854e-05, "loss": 2.5327, "step": 885500 }, { "epoch": 4.39, "learning_rate": 4.7806034888502456e-05, "loss": 2.5164, "step": 886000 }, { "epoch": 4.39, "learning_rate": 4.780479630207637e-05, "loss": 2.5537, "step": 886500 }, { "epoch": 4.39, "learning_rate": 4.780355771565028e-05, "loss": 2.513, "step": 887000 }, { "epoch": 4.4, "learning_rate": 4.78023191292242e-05, "loss": 2.5196, "step": 887500 }, { "epoch": 4.4, "learning_rate": 4.780108054279812e-05, "loss": 2.5014, "step": 888000 }, { "epoch": 4.4, "learning_rate": 4.7799841956372034e-05, "loss": 2.5319, "step": 888500 }, { "epoch": 4.4, "learning_rate": 4.779860336994595e-05, "loss": 2.5317, "step": 889000 }, { "epoch": 4.41, "learning_rate": 4.779736478351987e-05, "loss": 2.5447, "step": 889500 }, { "epoch": 4.41, "learning_rate": 4.7796126197093785e-05, "loss": 2.513, "step": 890000 }, { "epoch": 4.41, "learning_rate": 4.77948876106677e-05, "loss": 2.5189, "step": 890500 }, { "epoch": 4.41, "learning_rate": 4.7793651501414464e-05, "loss": 2.5308, "step": 891000 }, { "epoch": 4.42, "learning_rate": 4.779241291498838e-05, "loss": 2.5264, "step": 891500 }, { "epoch": 4.42, "learning_rate": 4.77911743285623e-05, "loss": 2.5147, "step": 892000 }, { "epoch": 4.42, "learning_rate": 4.7789935742136215e-05, "loss": 2.5411, "step": 892500 }, { "epoch": 4.42, "learning_rate": 4.7788702110055835e-05, "loss": 2.496, "step": 893000 }, { "epoch": 4.43, "learning_rate": 4.778746352362975e-05, "loss": 2.5043, "step": 893500 }, { "epoch": 4.43, "learning_rate": 4.778622493720367e-05, "loss": 2.51, "step": 894000 }, { "epoch": 4.43, "learning_rate": 4.7784986350777586e-05, "loss": 2.518, "step": 894500 }, { "epoch": 4.43, "learning_rate": 4.77837477643515e-05, "loss": 2.5077, "step": 895000 }, { "epoch": 4.44, "learning_rate": 4.778250917792542e-05, "loss": 2.512, "step": 895500 }, { "epoch": 4.44, "learning_rate": 4.778127306867219e-05, "loss": 2.5042, "step": 896000 }, { "epoch": 4.44, "learning_rate": 4.7780034482246106e-05, "loss": 2.5105, "step": 896500 }, { "epoch": 4.44, "learning_rate": 4.777879589582002e-05, "loss": 2.5393, "step": 897000 }, { "epoch": 4.45, "learning_rate": 4.777755730939394e-05, "loss": 2.5374, "step": 897500 }, { "epoch": 4.45, "learning_rate": 4.7776318722967856e-05, "loss": 2.4974, "step": 898000 }, { "epoch": 4.45, "learning_rate": 4.7775080136541773e-05, "loss": 2.5217, "step": 898500 }, { "epoch": 4.45, "learning_rate": 4.777384155011569e-05, "loss": 2.5425, "step": 899000 }, { "epoch": 4.46, "learning_rate": 4.77726029636896e-05, "loss": 2.5461, "step": 899500 }, { "epoch": 4.46, "learning_rate": 4.777136933160922e-05, "loss": 2.5432, "step": 900000 }, { "epoch": 4.46, "learning_rate": 4.777013074518314e-05, "loss": 2.5105, "step": 900500 }, { "epoch": 4.46, "learning_rate": 4.7768892158757055e-05, "loss": 2.5198, "step": 901000 }, { "epoch": 4.47, "learning_rate": 4.7767656049503824e-05, "loss": 2.5226, "step": 901500 }, { "epoch": 4.47, "learning_rate": 4.776641746307774e-05, "loss": 2.5397, "step": 902000 }, { "epoch": 4.47, "learning_rate": 4.776517887665166e-05, "loss": 2.5173, "step": 902500 }, { "epoch": 4.47, "learning_rate": 4.7763940290225575e-05, "loss": 2.5123, "step": 903000 }, { "epoch": 4.48, "learning_rate": 4.776270170379949e-05, "loss": 2.5164, "step": 903500 }, { "epoch": 4.48, "learning_rate": 4.776146311737341e-05, "loss": 2.5277, "step": 904000 }, { "epoch": 4.48, "learning_rate": 4.7760224530947325e-05, "loss": 2.5085, "step": 904500 }, { "epoch": 4.48, "learning_rate": 4.775898594452124e-05, "loss": 2.5297, "step": 905000 }, { "epoch": 4.49, "learning_rate": 4.775774735809516e-05, "loss": 2.5393, "step": 905500 }, { "epoch": 4.49, "learning_rate": 4.775651124884192e-05, "loss": 2.5383, "step": 906000 }, { "epoch": 4.49, "learning_rate": 4.775527266241584e-05, "loss": 2.5199, "step": 906500 }, { "epoch": 4.49, "learning_rate": 4.7754034075989755e-05, "loss": 2.5323, "step": 907000 }, { "epoch": 4.5, "learning_rate": 4.775279548956367e-05, "loss": 2.5211, "step": 907500 }, { "epoch": 4.5, "learning_rate": 4.775155690313759e-05, "loss": 2.5245, "step": 908000 }, { "epoch": 4.5, "learning_rate": 4.7750318316711506e-05, "loss": 2.5016, "step": 908500 }, { "epoch": 4.5, "learning_rate": 4.774907973028542e-05, "loss": 2.5191, "step": 909000 }, { "epoch": 4.51, "learning_rate": 4.774784114385934e-05, "loss": 2.5471, "step": 909500 }, { "epoch": 4.51, "learning_rate": 4.774660255743326e-05, "loss": 2.5372, "step": 910000 }, { "epoch": 4.51, "learning_rate": 4.7745363971007174e-05, "loss": 2.52, "step": 910500 }, { "epoch": 4.51, "learning_rate": 4.774412538458109e-05, "loss": 2.512, "step": 911000 }, { "epoch": 4.52, "learning_rate": 4.774288679815501e-05, "loss": 2.5325, "step": 911500 }, { "epoch": 4.52, "learning_rate": 4.774164821172892e-05, "loss": 2.5194, "step": 912000 }, { "epoch": 4.52, "learning_rate": 4.7740409625302835e-05, "loss": 2.5473, "step": 912500 }, { "epoch": 4.52, "learning_rate": 4.773917103887675e-05, "loss": 2.5345, "step": 913000 }, { "epoch": 4.53, "learning_rate": 4.773793245245067e-05, "loss": 2.5285, "step": 913500 }, { "epoch": 4.53, "learning_rate": 4.7736693866024585e-05, "loss": 2.5442, "step": 914000 }, { "epoch": 4.53, "learning_rate": 4.7735457756771354e-05, "loss": 2.5358, "step": 914500 }, { "epoch": 4.53, "learning_rate": 4.773421917034527e-05, "loss": 2.5193, "step": 915000 }, { "epoch": 4.54, "learning_rate": 4.773298306109204e-05, "loss": 2.5213, "step": 915500 }, { "epoch": 4.54, "learning_rate": 4.773174447466596e-05, "loss": 2.5143, "step": 916000 }, { "epoch": 4.54, "learning_rate": 4.7730505888239874e-05, "loss": 2.507, "step": 916500 }, { "epoch": 4.54, "learning_rate": 4.772926977898664e-05, "loss": 2.5088, "step": 917000 }, { "epoch": 4.55, "learning_rate": 4.772803119256056e-05, "loss": 2.538, "step": 917500 }, { "epoch": 4.55, "learning_rate": 4.7726792606134476e-05, "loss": 2.5344, "step": 918000 }, { "epoch": 4.55, "learning_rate": 4.7725554019708393e-05, "loss": 2.5217, "step": 918500 }, { "epoch": 4.55, "learning_rate": 4.772431543328231e-05, "loss": 2.5035, "step": 919000 }, { "epoch": 4.56, "learning_rate": 4.772307684685623e-05, "loss": 2.5022, "step": 919500 }, { "epoch": 4.56, "learning_rate": 4.772183826043014e-05, "loss": 2.535, "step": 920000 }, { "epoch": 4.56, "learning_rate": 4.7720599674004054e-05, "loss": 2.481, "step": 920500 }, { "epoch": 4.56, "learning_rate": 4.771936356475082e-05, "loss": 2.5258, "step": 921000 }, { "epoch": 4.57, "learning_rate": 4.771812497832474e-05, "loss": 2.5533, "step": 921500 }, { "epoch": 4.57, "learning_rate": 4.771688639189866e-05, "loss": 2.5407, "step": 922000 }, { "epoch": 4.57, "learning_rate": 4.7715647805472574e-05, "loss": 2.5222, "step": 922500 }, { "epoch": 4.57, "learning_rate": 4.771440921904649e-05, "loss": 2.5202, "step": 923000 }, { "epoch": 4.58, "learning_rate": 4.7713178064138963e-05, "loss": 2.5326, "step": 923500 }, { "epoch": 4.58, "learning_rate": 4.771193947771288e-05, "loss": 2.5148, "step": 924000 }, { "epoch": 4.58, "learning_rate": 4.77107008912868e-05, "loss": 2.5138, "step": 924500 }, { "epoch": 4.58, "learning_rate": 4.770946230486071e-05, "loss": 2.4793, "step": 925000 }, { "epoch": 4.59, "learning_rate": 4.7708223718434624e-05, "loss": 2.5133, "step": 925500 }, { "epoch": 4.59, "learning_rate": 4.770698513200854e-05, "loss": 2.5307, "step": 926000 }, { "epoch": 4.59, "learning_rate": 4.770574902275532e-05, "loss": 2.5068, "step": 926500 }, { "epoch": 4.59, "learning_rate": 4.7704510436329234e-05, "loss": 2.528, "step": 927000 }, { "epoch": 4.6, "learning_rate": 4.7703271849903144e-05, "loss": 2.5122, "step": 927500 }, { "epoch": 4.6, "learning_rate": 4.770203326347706e-05, "loss": 2.5372, "step": 928000 }, { "epoch": 4.6, "learning_rate": 4.770079467705098e-05, "loss": 2.5282, "step": 928500 }, { "epoch": 4.6, "learning_rate": 4.7699556090624895e-05, "loss": 2.5162, "step": 929000 }, { "epoch": 4.61, "learning_rate": 4.769831750419881e-05, "loss": 2.5279, "step": 929500 }, { "epoch": 4.61, "learning_rate": 4.769707891777272e-05, "loss": 2.5302, "step": 930000 }, { "epoch": 4.61, "learning_rate": 4.769584033134664e-05, "loss": 2.5265, "step": 930500 }, { "epoch": 4.61, "learning_rate": 4.7694601744920556e-05, "loss": 2.5167, "step": 931000 }, { "epoch": 4.61, "learning_rate": 4.769336315849447e-05, "loss": 2.5033, "step": 931500 }, { "epoch": 4.62, "learning_rate": 4.769212457206839e-05, "loss": 2.5239, "step": 932000 }, { "epoch": 4.62, "learning_rate": 4.769088846281516e-05, "loss": 2.5485, "step": 932500 }, { "epoch": 4.62, "learning_rate": 4.7689649876389075e-05, "loss": 2.5165, "step": 933000 }, { "epoch": 4.62, "learning_rate": 4.768841376713585e-05, "loss": 2.5295, "step": 933500 }, { "epoch": 4.63, "learning_rate": 4.768717518070977e-05, "loss": 2.5282, "step": 934000 }, { "epoch": 4.63, "learning_rate": 4.768593659428368e-05, "loss": 2.505, "step": 934500 }, { "epoch": 4.63, "learning_rate": 4.768470048503045e-05, "loss": 2.5145, "step": 935000 }, { "epoch": 4.63, "learning_rate": 4.7683461898604364e-05, "loss": 2.5239, "step": 935500 }, { "epoch": 4.64, "learning_rate": 4.768222331217828e-05, "loss": 2.512, "step": 936000 }, { "epoch": 4.64, "learning_rate": 4.76809847257522e-05, "loss": 2.5265, "step": 936500 }, { "epoch": 4.64, "learning_rate": 4.7679746139326114e-05, "loss": 2.528, "step": 937000 }, { "epoch": 4.64, "learning_rate": 4.7678507552900025e-05, "loss": 2.5295, "step": 937500 }, { "epoch": 4.65, "learning_rate": 4.767726896647394e-05, "loss": 2.4952, "step": 938000 }, { "epoch": 4.65, "learning_rate": 4.767603038004786e-05, "loss": 2.5174, "step": 938500 }, { "epoch": 4.65, "learning_rate": 4.7674791793621775e-05, "loss": 2.5244, "step": 939000 }, { "epoch": 4.65, "learning_rate": 4.767355320719569e-05, "loss": 2.5318, "step": 939500 }, { "epoch": 4.66, "learning_rate": 4.767231462076961e-05, "loss": 2.5375, "step": 940000 }, { "epoch": 4.66, "learning_rate": 4.7671078511516385e-05, "loss": 2.5271, "step": 940500 }, { "epoch": 4.66, "learning_rate": 4.7669839925090295e-05, "loss": 2.5511, "step": 941000 }, { "epoch": 4.66, "learning_rate": 4.766860133866421e-05, "loss": 2.5225, "step": 941500 }, { "epoch": 4.67, "learning_rate": 4.766736275223813e-05, "loss": 2.5055, "step": 942000 }, { "epoch": 4.67, "learning_rate": 4.7666124165812046e-05, "loss": 2.5206, "step": 942500 }, { "epoch": 4.67, "learning_rate": 4.766488557938596e-05, "loss": 2.5182, "step": 943000 }, { "epoch": 4.67, "learning_rate": 4.766364947013273e-05, "loss": 2.5254, "step": 943500 }, { "epoch": 4.68, "learning_rate": 4.766241088370664e-05, "loss": 2.4945, "step": 944000 }, { "epoch": 4.68, "learning_rate": 4.766117229728056e-05, "loss": 2.5185, "step": 944500 }, { "epoch": 4.68, "learning_rate": 4.7659933710854476e-05, "loss": 2.5111, "step": 945000 }, { "epoch": 4.68, "learning_rate": 4.765869512442839e-05, "loss": 2.5186, "step": 945500 }, { "epoch": 4.69, "learning_rate": 4.765745653800231e-05, "loss": 2.5001, "step": 946000 }, { "epoch": 4.69, "learning_rate": 4.7656217951576226e-05, "loss": 2.5132, "step": 946500 }, { "epoch": 4.69, "learning_rate": 4.7654981842322995e-05, "loss": 2.5195, "step": 947000 }, { "epoch": 4.69, "learning_rate": 4.765374325589691e-05, "loss": 2.5179, "step": 947500 }, { "epoch": 4.7, "learning_rate": 4.765250466947083e-05, "loss": 2.5338, "step": 948000 }, { "epoch": 4.7, "learning_rate": 4.7651266083044746e-05, "loss": 2.5614, "step": 948500 }, { "epoch": 4.7, "learning_rate": 4.7650029973791515e-05, "loss": 2.5336, "step": 949000 }, { "epoch": 4.7, "learning_rate": 4.764879138736543e-05, "loss": 2.4901, "step": 949500 }, { "epoch": 4.71, "learning_rate": 4.76475552781122e-05, "loss": 2.5283, "step": 950000 }, { "epoch": 4.71, "learning_rate": 4.764631669168612e-05, "loss": 2.5171, "step": 950500 }, { "epoch": 4.71, "learning_rate": 4.764508058243288e-05, "loss": 2.5353, "step": 951000 }, { "epoch": 4.71, "learning_rate": 4.7643841996006796e-05, "loss": 2.5246, "step": 951500 }, { "epoch": 4.72, "learning_rate": 4.764260340958071e-05, "loss": 2.5173, "step": 952000 }, { "epoch": 4.72, "learning_rate": 4.764136482315463e-05, "loss": 2.5125, "step": 952500 }, { "epoch": 4.72, "learning_rate": 4.764012623672855e-05, "loss": 2.5237, "step": 953000 }, { "epoch": 4.72, "learning_rate": 4.7638887650302464e-05, "loss": 2.5033, "step": 953500 }, { "epoch": 4.73, "learning_rate": 4.763764906387638e-05, "loss": 2.5233, "step": 954000 }, { "epoch": 4.73, "learning_rate": 4.76364104774503e-05, "loss": 2.5408, "step": 954500 }, { "epoch": 4.73, "learning_rate": 4.7635171891024215e-05, "loss": 2.5335, "step": 955000 }, { "epoch": 4.73, "learning_rate": 4.763393330459813e-05, "loss": 2.5481, "step": 955500 }, { "epoch": 4.74, "learning_rate": 4.763269471817205e-05, "loss": 2.5219, "step": 956000 }, { "epoch": 4.74, "learning_rate": 4.763145613174596e-05, "loss": 2.5032, "step": 956500 }, { "epoch": 4.74, "learning_rate": 4.7630217545319876e-05, "loss": 2.5114, "step": 957000 }, { "epoch": 4.74, "learning_rate": 4.762897895889379e-05, "loss": 2.5286, "step": 957500 }, { "epoch": 4.75, "learning_rate": 4.762774284964057e-05, "loss": 2.5163, "step": 958000 }, { "epoch": 4.75, "learning_rate": 4.7626504263214485e-05, "loss": 2.5127, "step": 958500 }, { "epoch": 4.75, "learning_rate": 4.76252656767884e-05, "loss": 2.5466, "step": 959000 }, { "epoch": 4.75, "learning_rate": 4.762402709036231e-05, "loss": 2.5094, "step": 959500 }, { "epoch": 4.76, "learning_rate": 4.762278850393623e-05, "loss": 2.5276, "step": 960000 }, { "epoch": 4.76, "learning_rate": 4.7621549917510146e-05, "loss": 2.5316, "step": 960500 }, { "epoch": 4.76, "learning_rate": 4.762031133108406e-05, "loss": 2.5051, "step": 961000 }, { "epoch": 4.76, "learning_rate": 4.761907522183083e-05, "loss": 2.495, "step": 961500 }, { "epoch": 4.77, "learning_rate": 4.761783663540475e-05, "loss": 2.5125, "step": 962000 }, { "epoch": 4.77, "learning_rate": 4.761659804897866e-05, "loss": 2.5293, "step": 962500 }, { "epoch": 4.77, "learning_rate": 4.7615359462552576e-05, "loss": 2.5661, "step": 963000 }, { "epoch": 4.77, "learning_rate": 4.761412087612649e-05, "loss": 2.5403, "step": 963500 }, { "epoch": 4.78, "learning_rate": 4.761288228970041e-05, "loss": 2.4998, "step": 964000 }, { "epoch": 4.78, "learning_rate": 4.761164370327433e-05, "loss": 2.5193, "step": 964500 }, { "epoch": 4.78, "learning_rate": 4.7610405116848244e-05, "loss": 2.5213, "step": 965000 }, { "epoch": 4.78, "learning_rate": 4.760916653042216e-05, "loss": 2.5163, "step": 965500 }, { "epoch": 4.79, "learning_rate": 4.760792794399608e-05, "loss": 2.5139, "step": 966000 }, { "epoch": 4.79, "learning_rate": 4.7606689357569994e-05, "loss": 2.5008, "step": 966500 }, { "epoch": 4.79, "learning_rate": 4.760545324831676e-05, "loss": 2.5123, "step": 967000 }, { "epoch": 4.79, "learning_rate": 4.760421466189068e-05, "loss": 2.519, "step": 967500 }, { "epoch": 4.8, "learning_rate": 4.76029760754646e-05, "loss": 2.5064, "step": 968000 }, { "epoch": 4.8, "learning_rate": 4.7601739966211366e-05, "loss": 2.5199, "step": 968500 }, { "epoch": 4.8, "learning_rate": 4.7600501379785276e-05, "loss": 2.5431, "step": 969000 }, { "epoch": 4.8, "learning_rate": 4.759926279335919e-05, "loss": 2.5357, "step": 969500 }, { "epoch": 4.81, "learning_rate": 4.759802420693311e-05, "loss": 2.5141, "step": 970000 }, { "epoch": 4.81, "learning_rate": 4.759678562050703e-05, "loss": 2.5247, "step": 970500 }, { "epoch": 4.81, "learning_rate": 4.7595547034080944e-05, "loss": 2.5246, "step": 971000 }, { "epoch": 4.81, "learning_rate": 4.759430844765486e-05, "loss": 2.5307, "step": 971500 }, { "epoch": 4.82, "learning_rate": 4.759307233840163e-05, "loss": 2.5291, "step": 972000 }, { "epoch": 4.82, "learning_rate": 4.7591833751975546e-05, "loss": 2.5249, "step": 972500 }, { "epoch": 4.82, "learning_rate": 4.759059516554946e-05, "loss": 2.5034, "step": 973000 }, { "epoch": 4.82, "learning_rate": 4.758935657912338e-05, "loss": 2.4944, "step": 973500 }, { "epoch": 4.83, "learning_rate": 4.75881179926973e-05, "loss": 2.5173, "step": 974000 }, { "epoch": 4.83, "learning_rate": 4.7586881883444066e-05, "loss": 2.5288, "step": 974500 }, { "epoch": 4.83, "learning_rate": 4.7585643297017976e-05, "loss": 2.5382, "step": 975000 }, { "epoch": 4.83, "learning_rate": 4.758440471059189e-05, "loss": 2.5082, "step": 975500 }, { "epoch": 4.84, "learning_rate": 4.758316612416581e-05, "loss": 2.4987, "step": 976000 }, { "epoch": 4.84, "learning_rate": 4.758192753773973e-05, "loss": 2.5288, "step": 976500 }, { "epoch": 4.84, "learning_rate": 4.7580693905659354e-05, "loss": 2.5066, "step": 977000 }, { "epoch": 4.84, "learning_rate": 4.757945531923327e-05, "loss": 2.5299, "step": 977500 }, { "epoch": 4.85, "learning_rate": 4.757821673280718e-05, "loss": 2.5367, "step": 978000 }, { "epoch": 4.85, "learning_rate": 4.75769781463811e-05, "loss": 2.5034, "step": 978500 }, { "epoch": 4.85, "learning_rate": 4.7575739559955015e-05, "loss": 2.5142, "step": 979000 }, { "epoch": 4.85, "learning_rate": 4.757450097352893e-05, "loss": 2.5349, "step": 979500 }, { "epoch": 4.86, "learning_rate": 4.757326238710285e-05, "loss": 2.5567, "step": 980000 }, { "epoch": 4.86, "learning_rate": 4.7572023800676766e-05, "loss": 2.5057, "step": 980500 }, { "epoch": 4.86, "learning_rate": 4.757078521425068e-05, "loss": 2.5152, "step": 981000 }, { "epoch": 4.86, "learning_rate": 4.756954662782459e-05, "loss": 2.5161, "step": 981500 }, { "epoch": 4.87, "learning_rate": 4.756830804139851e-05, "loss": 2.4941, "step": 982000 }, { "epoch": 4.87, "learning_rate": 4.756706945497243e-05, "loss": 2.5139, "step": 982500 }, { "epoch": 4.87, "learning_rate": 4.7565830868546344e-05, "loss": 2.5407, "step": 983000 }, { "epoch": 4.87, "learning_rate": 4.756459475929312e-05, "loss": 2.5312, "step": 983500 }, { "epoch": 4.88, "learning_rate": 4.7563356172867036e-05, "loss": 2.5106, "step": 984000 }, { "epoch": 4.88, "learning_rate": 4.756211758644095e-05, "loss": 2.5268, "step": 984500 }, { "epoch": 4.88, "learning_rate": 4.7560879000014864e-05, "loss": 2.537, "step": 985000 }, { "epoch": 4.88, "learning_rate": 4.755964041358878e-05, "loss": 2.5343, "step": 985500 }, { "epoch": 4.88, "learning_rate": 4.75584018271627e-05, "loss": 2.5492, "step": 986000 }, { "epoch": 4.89, "learning_rate": 4.7557163240736614e-05, "loss": 2.5359, "step": 986500 }, { "epoch": 4.89, "learning_rate": 4.755592465431053e-05, "loss": 2.5285, "step": 987000 }, { "epoch": 4.89, "learning_rate": 4.755468854505729e-05, "loss": 2.5059, "step": 987500 }, { "epoch": 4.89, "learning_rate": 4.755344995863121e-05, "loss": 2.5226, "step": 988000 }, { "epoch": 4.9, "learning_rate": 4.755221137220513e-05, "loss": 2.5191, "step": 988500 }, { "epoch": 4.9, "learning_rate": 4.75509752629519e-05, "loss": 2.5028, "step": 989000 }, { "epoch": 4.9, "learning_rate": 4.754973915369867e-05, "loss": 2.5224, "step": 989500 }, { "epoch": 4.9, "learning_rate": 4.7548503044445434e-05, "loss": 2.5351, "step": 990000 }, { "epoch": 4.91, "learning_rate": 4.754726445801935e-05, "loss": 2.5295, "step": 990500 }, { "epoch": 4.91, "learning_rate": 4.754602587159327e-05, "loss": 2.5256, "step": 991000 }, { "epoch": 4.91, "learning_rate": 4.7544787285167184e-05, "loss": 2.5175, "step": 991500 }, { "epoch": 4.91, "learning_rate": 4.75435486987411e-05, "loss": 2.5368, "step": 992000 }, { "epoch": 4.92, "learning_rate": 4.754231011231502e-05, "loss": 2.5184, "step": 992500 }, { "epoch": 4.92, "learning_rate": 4.7541071525888935e-05, "loss": 2.5307, "step": 993000 }, { "epoch": 4.92, "learning_rate": 4.753983293946285e-05, "loss": 2.5217, "step": 993500 }, { "epoch": 4.92, "learning_rate": 4.753859435303677e-05, "loss": 2.5294, "step": 994000 }, { "epoch": 4.93, "learning_rate": 4.753735824378354e-05, "loss": 2.505, "step": 994500 }, { "epoch": 4.93, "learning_rate": 4.7536119657357455e-05, "loss": 2.5285, "step": 995000 }, { "epoch": 4.93, "learning_rate": 4.753488107093137e-05, "loss": 2.5311, "step": 995500 }, { "epoch": 4.93, "learning_rate": 4.753364248450529e-05, "loss": 2.5233, "step": 996000 }, { "epoch": 4.94, "learning_rate": 4.7532403898079206e-05, "loss": 2.5115, "step": 996500 }, { "epoch": 4.94, "learning_rate": 4.753116531165312e-05, "loss": 2.5477, "step": 997000 }, { "epoch": 4.94, "learning_rate": 4.7529929202399885e-05, "loss": 2.5459, "step": 997500 }, { "epoch": 4.94, "learning_rate": 4.75286906159738e-05, "loss": 2.5032, "step": 998000 }, { "epoch": 4.95, "learning_rate": 4.752745202954772e-05, "loss": 2.506, "step": 998500 }, { "epoch": 4.95, "learning_rate": 4.7526213443121635e-05, "loss": 2.5271, "step": 999000 }, { "epoch": 4.95, "learning_rate": 4.7524977333868404e-05, "loss": 2.5142, "step": 999500 }, { "epoch": 4.95, "learning_rate": 4.752373874744232e-05, "loss": 2.5061, "step": 1000000 }, { "epoch": 4.96, "learning_rate": 4.752250016101624e-05, "loss": 2.5104, "step": 1000500 }, { "epoch": 4.96, "learning_rate": 4.7521261574590155e-05, "loss": 2.5104, "step": 1001000 }, { "epoch": 4.96, "learning_rate": 4.752002298816407e-05, "loss": 2.5243, "step": 1001500 }, { "epoch": 4.96, "learning_rate": 4.751878440173799e-05, "loss": 2.5186, "step": 1002000 }, { "epoch": 4.97, "learning_rate": 4.7517545815311906e-05, "loss": 2.52, "step": 1002500 }, { "epoch": 4.97, "learning_rate": 4.751630722888582e-05, "loss": 2.5164, "step": 1003000 }, { "epoch": 4.97, "learning_rate": 4.751506864245974e-05, "loss": 2.5276, "step": 1003500 }, { "epoch": 4.97, "learning_rate": 4.7513830056033656e-05, "loss": 2.5109, "step": 1004000 }, { "epoch": 4.98, "learning_rate": 4.751259146960757e-05, "loss": 2.5262, "step": 1004500 }, { "epoch": 4.98, "learning_rate": 4.7511352883181484e-05, "loss": 2.5065, "step": 1005000 }, { "epoch": 4.98, "learning_rate": 4.751011677392825e-05, "loss": 2.5033, "step": 1005500 }, { "epoch": 4.98, "learning_rate": 4.750887818750217e-05, "loss": 2.5218, "step": 1006000 }, { "epoch": 4.99, "learning_rate": 4.7507639601076086e-05, "loss": 2.5046, "step": 1006500 }, { "epoch": 4.99, "learning_rate": 4.750640101465e-05, "loss": 2.5356, "step": 1007000 }, { "epoch": 4.99, "learning_rate": 4.750516490539677e-05, "loss": 2.5308, "step": 1007500 }, { "epoch": 4.99, "learning_rate": 4.750392879614354e-05, "loss": 2.5266, "step": 1008000 }, { "epoch": 5.0, "learning_rate": 4.750269020971745e-05, "loss": 2.5191, "step": 1008500 }, { "epoch": 5.0, "learning_rate": 4.750145162329137e-05, "loss": 2.5071, "step": 1009000 }, { "epoch": 5.0, "eval_accuracy": 0.635423503873417, "eval_accuracy_mlm": 0.588502945831283, "eval_accuracy_nsp": 0.8568240383747975, "eval_loss": 2.4675350189208984, "eval_runtime": 146.0196, "eval_samples_per_second": 1746.06, "eval_steps_per_second": 72.757, "step": 1009215 }, { "epoch": 5.0, "learning_rate": 4.7500213036865285e-05, "loss": 2.5021, "step": 1009500 }, { "epoch": 5.0, "learning_rate": 4.74989744504392e-05, "loss": 2.4848, "step": 1010000 }, { "epoch": 5.01, "learning_rate": 4.749773586401312e-05, "loss": 2.4841, "step": 1010500 }, { "epoch": 5.01, "learning_rate": 4.7496497277587036e-05, "loss": 2.4968, "step": 1011000 }, { "epoch": 5.01, "learning_rate": 4.749525869116095e-05, "loss": 2.4742, "step": 1011500 }, { "epoch": 5.01, "learning_rate": 4.749402010473487e-05, "loss": 2.5161, "step": 1012000 }, { "epoch": 5.02, "learning_rate": 4.7492781518308786e-05, "loss": 2.496, "step": 1012500 }, { "epoch": 5.02, "learning_rate": 4.74915429318827e-05, "loss": 2.4761, "step": 1013000 }, { "epoch": 5.02, "learning_rate": 4.749030434545662e-05, "loss": 2.514, "step": 1013500 }, { "epoch": 5.02, "learning_rate": 4.748906823620339e-05, "loss": 2.4858, "step": 1014000 }, { "epoch": 5.03, "learning_rate": 4.7487829649777306e-05, "loss": 2.5104, "step": 1014500 }, { "epoch": 5.03, "learning_rate": 4.7486593540524075e-05, "loss": 2.4841, "step": 1015000 }, { "epoch": 5.03, "learning_rate": 4.7485354954097985e-05, "loss": 2.4851, "step": 1015500 }, { "epoch": 5.03, "learning_rate": 4.748411884484476e-05, "loss": 2.4796, "step": 1016000 }, { "epoch": 5.04, "learning_rate": 4.748288025841867e-05, "loss": 2.512, "step": 1016500 }, { "epoch": 5.04, "learning_rate": 4.7481644149165446e-05, "loss": 2.4949, "step": 1017000 }, { "epoch": 5.04, "learning_rate": 4.748040556273936e-05, "loss": 2.4787, "step": 1017500 }, { "epoch": 5.04, "learning_rate": 4.747916697631328e-05, "loss": 2.4854, "step": 1018000 }, { "epoch": 5.05, "learning_rate": 4.74779283898872e-05, "loss": 2.505, "step": 1018500 }, { "epoch": 5.05, "learning_rate": 4.7476689803461114e-05, "loss": 2.4824, "step": 1019000 }, { "epoch": 5.05, "learning_rate": 4.7475451217035024e-05, "loss": 2.5006, "step": 1019500 }, { "epoch": 5.05, "learning_rate": 4.747421263060894e-05, "loss": 2.4915, "step": 1020000 }, { "epoch": 5.06, "learning_rate": 4.747297652135571e-05, "loss": 2.5021, "step": 1020500 }, { "epoch": 5.06, "learning_rate": 4.747173793492963e-05, "loss": 2.4845, "step": 1021000 }, { "epoch": 5.06, "learning_rate": 4.7470499348503544e-05, "loss": 2.4846, "step": 1021500 }, { "epoch": 5.06, "learning_rate": 4.746926076207746e-05, "loss": 2.4703, "step": 1022000 }, { "epoch": 5.07, "learning_rate": 4.746802465282423e-05, "loss": 2.5154, "step": 1022500 }, { "epoch": 5.07, "learning_rate": 4.7466786066398146e-05, "loss": 2.4895, "step": 1023000 }, { "epoch": 5.07, "learning_rate": 4.746554747997206e-05, "loss": 2.4909, "step": 1023500 }, { "epoch": 5.07, "learning_rate": 4.746430889354598e-05, "loss": 2.4931, "step": 1024000 }, { "epoch": 5.08, "learning_rate": 4.74630703071199e-05, "loss": 2.4837, "step": 1024500 }, { "epoch": 5.08, "learning_rate": 4.746183667503951e-05, "loss": 2.499, "step": 1025000 }, { "epoch": 5.08, "learning_rate": 4.746059808861343e-05, "loss": 2.5291, "step": 1025500 }, { "epoch": 5.08, "learning_rate": 4.7459359502187345e-05, "loss": 2.4858, "step": 1026000 }, { "epoch": 5.09, "learning_rate": 4.745812091576126e-05, "loss": 2.4969, "step": 1026500 }, { "epoch": 5.09, "learning_rate": 4.745688232933518e-05, "loss": 2.4516, "step": 1027000 }, { "epoch": 5.09, "learning_rate": 4.7455643742909096e-05, "loss": 2.5106, "step": 1027500 }, { "epoch": 5.09, "learning_rate": 4.745440515648301e-05, "loss": 2.4804, "step": 1028000 }, { "epoch": 5.1, "learning_rate": 4.745316657005693e-05, "loss": 2.4969, "step": 1028500 }, { "epoch": 5.1, "learning_rate": 4.7451927983630846e-05, "loss": 2.5142, "step": 1029000 }, { "epoch": 5.1, "learning_rate": 4.745068939720476e-05, "loss": 2.4816, "step": 1029500 }, { "epoch": 5.1, "learning_rate": 4.744945081077868e-05, "loss": 2.4882, "step": 1030000 }, { "epoch": 5.11, "learning_rate": 4.74482122243526e-05, "loss": 2.4912, "step": 1030500 }, { "epoch": 5.11, "learning_rate": 4.7446973637926514e-05, "loss": 2.4795, "step": 1031000 }, { "epoch": 5.11, "learning_rate": 4.744573505150043e-05, "loss": 2.498, "step": 1031500 }, { "epoch": 5.11, "learning_rate": 4.744449646507434e-05, "loss": 2.5025, "step": 1032000 }, { "epoch": 5.12, "learning_rate": 4.744325787864826e-05, "loss": 2.4867, "step": 1032500 }, { "epoch": 5.12, "learning_rate": 4.7442019292222175e-05, "loss": 2.5047, "step": 1033000 }, { "epoch": 5.12, "learning_rate": 4.744078070579609e-05, "loss": 2.493, "step": 1033500 }, { "epoch": 5.12, "learning_rate": 4.743954211937001e-05, "loss": 2.4766, "step": 1034000 }, { "epoch": 5.13, "learning_rate": 4.7438303532943926e-05, "loss": 2.4811, "step": 1034500 }, { "epoch": 5.13, "learning_rate": 4.743706742369069e-05, "loss": 2.4792, "step": 1035000 }, { "epoch": 5.13, "learning_rate": 4.7435828837264605e-05, "loss": 2.4694, "step": 1035500 }, { "epoch": 5.13, "learning_rate": 4.743459272801138e-05, "loss": 2.4567, "step": 1036000 }, { "epoch": 5.14, "learning_rate": 4.7433359095930994e-05, "loss": 2.4914, "step": 1036500 }, { "epoch": 5.14, "learning_rate": 4.743212050950491e-05, "loss": 2.4963, "step": 1037000 }, { "epoch": 5.14, "learning_rate": 4.743088192307883e-05, "loss": 2.5135, "step": 1037500 }, { "epoch": 5.14, "learning_rate": 4.7429643336652745e-05, "loss": 2.5065, "step": 1038000 }, { "epoch": 5.15, "learning_rate": 4.742840475022666e-05, "loss": 2.5155, "step": 1038500 }, { "epoch": 5.15, "learning_rate": 4.742716616380058e-05, "loss": 2.5172, "step": 1039000 }, { "epoch": 5.15, "learning_rate": 4.7425927577374496e-05, "loss": 2.4787, "step": 1039500 }, { "epoch": 5.15, "learning_rate": 4.742468899094841e-05, "loss": 2.4847, "step": 1040000 }, { "epoch": 5.15, "learning_rate": 4.742345040452233e-05, "loss": 2.5204, "step": 1040500 }, { "epoch": 5.16, "learning_rate": 4.742221181809625e-05, "loss": 2.4876, "step": 1041000 }, { "epoch": 5.16, "learning_rate": 4.7420973231670164e-05, "loss": 2.5044, "step": 1041500 }, { "epoch": 5.16, "learning_rate": 4.741973464524408e-05, "loss": 2.4813, "step": 1042000 }, { "epoch": 5.16, "learning_rate": 4.7418496058818e-05, "loss": 2.4927, "step": 1042500 }, { "epoch": 5.17, "learning_rate": 4.7417257472391914e-05, "loss": 2.5097, "step": 1043000 }, { "epoch": 5.17, "learning_rate": 4.741601888596583e-05, "loss": 2.5111, "step": 1043500 }, { "epoch": 5.17, "learning_rate": 4.741478277671259e-05, "loss": 2.5181, "step": 1044000 }, { "epoch": 5.17, "learning_rate": 4.741354419028651e-05, "loss": 2.5167, "step": 1044500 }, { "epoch": 5.18, "learning_rate": 4.741230560386043e-05, "loss": 2.5013, "step": 1045000 }, { "epoch": 5.18, "learning_rate": 4.7411067017434344e-05, "loss": 2.516, "step": 1045500 }, { "epoch": 5.18, "learning_rate": 4.740982843100826e-05, "loss": 2.4898, "step": 1046000 }, { "epoch": 5.18, "learning_rate": 4.740858984458218e-05, "loss": 2.5233, "step": 1046500 }, { "epoch": 5.19, "learning_rate": 4.7407351258156095e-05, "loss": 2.5021, "step": 1047000 }, { "epoch": 5.19, "learning_rate": 4.7406115148902864e-05, "loss": 2.4977, "step": 1047500 }, { "epoch": 5.19, "learning_rate": 4.740487656247678e-05, "loss": 2.5229, "step": 1048000 }, { "epoch": 5.19, "learning_rate": 4.74036379760507e-05, "loss": 2.5149, "step": 1048500 }, { "epoch": 5.2, "learning_rate": 4.7402399389624615e-05, "loss": 2.5227, "step": 1049000 }, { "epoch": 5.2, "learning_rate": 4.740116328037138e-05, "loss": 2.5052, "step": 1049500 }, { "epoch": 5.2, "learning_rate": 4.7399924693945293e-05, "loss": 2.5153, "step": 1050000 }, { "epoch": 5.2, "learning_rate": 4.739868610751921e-05, "loss": 2.5106, "step": 1050500 }, { "epoch": 5.21, "learning_rate": 4.739744752109313e-05, "loss": 2.4979, "step": 1051000 }, { "epoch": 5.21, "learning_rate": 4.7396208934667044e-05, "loss": 2.4952, "step": 1051500 }, { "epoch": 5.21, "learning_rate": 4.739497034824096e-05, "loss": 2.5124, "step": 1052000 }, { "epoch": 5.21, "learning_rate": 4.739373176181488e-05, "loss": 2.4948, "step": 1052500 }, { "epoch": 5.22, "learning_rate": 4.7392493175388795e-05, "loss": 2.5208, "step": 1053000 }, { "epoch": 5.22, "learning_rate": 4.7391257066135564e-05, "loss": 2.4947, "step": 1053500 }, { "epoch": 5.22, "learning_rate": 4.7390023434055185e-05, "loss": 2.505, "step": 1054000 }, { "epoch": 5.22, "learning_rate": 4.7388784847629095e-05, "loss": 2.5084, "step": 1054500 }, { "epoch": 5.23, "learning_rate": 4.738754626120301e-05, "loss": 2.5115, "step": 1055000 }, { "epoch": 5.23, "learning_rate": 4.738630767477693e-05, "loss": 2.5227, "step": 1055500 }, { "epoch": 5.23, "learning_rate": 4.7385069088350846e-05, "loss": 2.5089, "step": 1056000 }, { "epoch": 5.23, "learning_rate": 4.738383050192476e-05, "loss": 2.5291, "step": 1056500 }, { "epoch": 5.24, "learning_rate": 4.738259191549868e-05, "loss": 2.5091, "step": 1057000 }, { "epoch": 5.24, "learning_rate": 4.7381353329072596e-05, "loss": 2.4981, "step": 1057500 }, { "epoch": 5.24, "learning_rate": 4.738011474264651e-05, "loss": 2.4948, "step": 1058000 }, { "epoch": 5.24, "learning_rate": 4.737887615622043e-05, "loss": 2.5134, "step": 1058500 }, { "epoch": 5.25, "learning_rate": 4.737763756979435e-05, "loss": 2.528, "step": 1059000 }, { "epoch": 5.25, "learning_rate": 4.7376398983368264e-05, "loss": 2.5002, "step": 1059500 }, { "epoch": 5.25, "learning_rate": 4.737516287411503e-05, "loss": 2.4989, "step": 1060000 }, { "epoch": 5.25, "learning_rate": 4.737392428768895e-05, "loss": 2.5081, "step": 1060500 }, { "epoch": 5.26, "learning_rate": 4.737268570126287e-05, "loss": 2.4678, "step": 1061000 }, { "epoch": 5.26, "learning_rate": 4.7371447114836784e-05, "loss": 2.5176, "step": 1061500 }, { "epoch": 5.26, "learning_rate": 4.73702085284107e-05, "loss": 2.5324, "step": 1062000 }, { "epoch": 5.26, "learning_rate": 4.736897489633032e-05, "loss": 2.4977, "step": 1062500 }, { "epoch": 5.27, "learning_rate": 4.736773630990424e-05, "loss": 2.4846, "step": 1063000 }, { "epoch": 5.27, "learning_rate": 4.7366497723478155e-05, "loss": 2.4992, "step": 1063500 }, { "epoch": 5.27, "learning_rate": 4.7365259137052065e-05, "loss": 2.5183, "step": 1064000 }, { "epoch": 5.27, "learning_rate": 4.736402055062598e-05, "loss": 2.5053, "step": 1064500 }, { "epoch": 5.28, "learning_rate": 4.73627819641999e-05, "loss": 2.5262, "step": 1065000 }, { "epoch": 5.28, "learning_rate": 4.7361543377773816e-05, "loss": 2.515, "step": 1065500 }, { "epoch": 5.28, "learning_rate": 4.736030479134773e-05, "loss": 2.4938, "step": 1066000 }, { "epoch": 5.28, "learning_rate": 4.735906620492165e-05, "loss": 2.5149, "step": 1066500 }, { "epoch": 5.29, "learning_rate": 4.735782761849557e-05, "loss": 2.5019, "step": 1067000 }, { "epoch": 5.29, "learning_rate": 4.735659150924233e-05, "loss": 2.5112, "step": 1067500 }, { "epoch": 5.29, "learning_rate": 4.7355352922816246e-05, "loss": 2.5042, "step": 1068000 }, { "epoch": 5.29, "learning_rate": 4.735411681356302e-05, "loss": 2.5022, "step": 1068500 }, { "epoch": 5.3, "learning_rate": 4.735287822713694e-05, "loss": 2.5294, "step": 1069000 }, { "epoch": 5.3, "learning_rate": 4.7351639640710855e-05, "loss": 2.4906, "step": 1069500 }, { "epoch": 5.3, "learning_rate": 4.735040105428477e-05, "loss": 2.497, "step": 1070000 }, { "epoch": 5.3, "learning_rate": 4.734916246785868e-05, "loss": 2.5093, "step": 1070500 }, { "epoch": 5.31, "learning_rate": 4.73479238814326e-05, "loss": 2.4948, "step": 1071000 }, { "epoch": 5.31, "learning_rate": 4.7346685295006516e-05, "loss": 2.5087, "step": 1071500 }, { "epoch": 5.31, "learning_rate": 4.734544670858043e-05, "loss": 2.5076, "step": 1072000 }, { "epoch": 5.31, "learning_rate": 4.734420812215435e-05, "loss": 2.4856, "step": 1072500 }, { "epoch": 5.32, "learning_rate": 4.734296953572827e-05, "loss": 2.4979, "step": 1073000 }, { "epoch": 5.32, "learning_rate": 4.7341730949302184e-05, "loss": 2.4767, "step": 1073500 }, { "epoch": 5.32, "learning_rate": 4.7340494840048946e-05, "loss": 2.4792, "step": 1074000 }, { "epoch": 5.32, "learning_rate": 4.733925625362286e-05, "loss": 2.4931, "step": 1074500 }, { "epoch": 5.33, "learning_rate": 4.733801766719678e-05, "loss": 2.4973, "step": 1075000 }, { "epoch": 5.33, "learning_rate": 4.73367790807707e-05, "loss": 2.4988, "step": 1075500 }, { "epoch": 5.33, "learning_rate": 4.7335540494344614e-05, "loss": 2.4931, "step": 1076000 }, { "epoch": 5.33, "learning_rate": 4.733430190791853e-05, "loss": 2.5197, "step": 1076500 }, { "epoch": 5.34, "learning_rate": 4.733306332149245e-05, "loss": 2.4846, "step": 1077000 }, { "epoch": 5.34, "learning_rate": 4.7331824735066364e-05, "loss": 2.5041, "step": 1077500 }, { "epoch": 5.34, "learning_rate": 4.733058862581313e-05, "loss": 2.4894, "step": 1078000 }, { "epoch": 5.34, "learning_rate": 4.732935003938705e-05, "loss": 2.5004, "step": 1078500 }, { "epoch": 5.35, "learning_rate": 4.732811145296097e-05, "loss": 2.5042, "step": 1079000 }, { "epoch": 5.35, "learning_rate": 4.7326875343707736e-05, "loss": 2.4997, "step": 1079500 }, { "epoch": 5.35, "learning_rate": 4.732563675728165e-05, "loss": 2.5063, "step": 1080000 }, { "epoch": 5.35, "learning_rate": 4.732439817085556e-05, "loss": 2.5086, "step": 1080500 }, { "epoch": 5.36, "learning_rate": 4.732315958442948e-05, "loss": 2.4812, "step": 1081000 }, { "epoch": 5.36, "learning_rate": 4.73219209980034e-05, "loss": 2.4943, "step": 1081500 }, { "epoch": 5.36, "learning_rate": 4.7320682411577314e-05, "loss": 2.5031, "step": 1082000 }, { "epoch": 5.36, "learning_rate": 4.731944382515123e-05, "loss": 2.5015, "step": 1082500 }, { "epoch": 5.37, "learning_rate": 4.731820523872515e-05, "loss": 2.5043, "step": 1083000 }, { "epoch": 5.37, "learning_rate": 4.7316966652299064e-05, "loss": 2.5235, "step": 1083500 }, { "epoch": 5.37, "learning_rate": 4.731572806587298e-05, "loss": 2.491, "step": 1084000 }, { "epoch": 5.37, "learning_rate": 4.73144894794469e-05, "loss": 2.4956, "step": 1084500 }, { "epoch": 5.38, "learning_rate": 4.7313250893020815e-05, "loss": 2.5086, "step": 1085000 }, { "epoch": 5.38, "learning_rate": 4.731201230659473e-05, "loss": 2.534, "step": 1085500 }, { "epoch": 5.38, "learning_rate": 4.73107761973415e-05, "loss": 2.5147, "step": 1086000 }, { "epoch": 5.38, "learning_rate": 4.730953761091542e-05, "loss": 2.5247, "step": 1086500 }, { "epoch": 5.39, "learning_rate": 4.7308299024489335e-05, "loss": 2.5079, "step": 1087000 }, { "epoch": 5.39, "learning_rate": 4.730706043806325e-05, "loss": 2.5156, "step": 1087500 }, { "epoch": 5.39, "learning_rate": 4.730582185163717e-05, "loss": 2.4809, "step": 1088000 }, { "epoch": 5.39, "learning_rate": 4.730458574238393e-05, "loss": 2.5121, "step": 1088500 }, { "epoch": 5.4, "learning_rate": 4.730334715595785e-05, "loss": 2.5155, "step": 1089000 }, { "epoch": 5.4, "learning_rate": 4.7302108569531765e-05, "loss": 2.4882, "step": 1089500 }, { "epoch": 5.4, "learning_rate": 4.730086998310568e-05, "loss": 2.4941, "step": 1090000 }, { "epoch": 5.4, "learning_rate": 4.72996313966796e-05, "loss": 2.4765, "step": 1090500 }, { "epoch": 5.41, "learning_rate": 4.7298392810253515e-05, "loss": 2.4786, "step": 1091000 }, { "epoch": 5.41, "learning_rate": 4.729715422382743e-05, "loss": 2.5199, "step": 1091500 }, { "epoch": 5.41, "learning_rate": 4.72959181145742e-05, "loss": 2.4885, "step": 1092000 }, { "epoch": 5.41, "learning_rate": 4.729468200532097e-05, "loss": 2.523, "step": 1092500 }, { "epoch": 5.42, "learning_rate": 4.729344589606774e-05, "loss": 2.5936, "step": 1093000 }, { "epoch": 5.42, "learning_rate": 4.7292207309641656e-05, "loss": 2.5146, "step": 1093500 }, { "epoch": 5.42, "learning_rate": 4.729096872321557e-05, "loss": 2.5291, "step": 1094000 }, { "epoch": 5.42, "learning_rate": 4.728973013678949e-05, "loss": 2.5103, "step": 1094500 }, { "epoch": 5.43, "learning_rate": 4.7288491550363406e-05, "loss": 2.5062, "step": 1095000 }, { "epoch": 5.43, "learning_rate": 4.728725791828302e-05, "loss": 2.5114, "step": 1095500 }, { "epoch": 5.43, "learning_rate": 4.728602180902979e-05, "loss": 2.5239, "step": 1096000 }, { "epoch": 5.43, "learning_rate": 4.7284783222603706e-05, "loss": 2.5374, "step": 1096500 }, { "epoch": 5.43, "learning_rate": 4.728354463617762e-05, "loss": 2.5018, "step": 1097000 }, { "epoch": 5.44, "learning_rate": 4.728230604975154e-05, "loss": 2.4786, "step": 1097500 }, { "epoch": 5.44, "learning_rate": 4.7281069940498316e-05, "loss": 2.5172, "step": 1098000 }, { "epoch": 5.44, "learning_rate": 4.727983135407223e-05, "loss": 2.5365, "step": 1098500 }, { "epoch": 5.44, "learning_rate": 4.727859276764614e-05, "loss": 2.6215, "step": 1099000 }, { "epoch": 5.45, "learning_rate": 4.727735418122006e-05, "loss": 2.5575, "step": 1099500 }, { "epoch": 5.45, "learning_rate": 4.7276115594793977e-05, "loss": 2.549, "step": 1100000 }, { "epoch": 5.45, "learning_rate": 4.7274877008367893e-05, "loss": 2.538, "step": 1100500 }, { "epoch": 5.45, "learning_rate": 4.727364089911466e-05, "loss": 2.698, "step": 1101000 }, { "epoch": 5.46, "learning_rate": 4.727240231268858e-05, "loss": 2.6845, "step": 1101500 }, { "epoch": 5.46, "learning_rate": 4.727116372626249e-05, "loss": 2.6379, "step": 1102000 }, { "epoch": 5.46, "learning_rate": 4.7269925139836406e-05, "loss": 2.5858, "step": 1102500 }, { "epoch": 5.46, "learning_rate": 4.726868655341032e-05, "loss": 2.5711, "step": 1103000 }, { "epoch": 5.47, "learning_rate": 4.7267452921329944e-05, "loss": 2.6267, "step": 1103500 }, { "epoch": 5.47, "learning_rate": 4.726621681207671e-05, "loss": 2.6757, "step": 1104000 }, { "epoch": 5.47, "learning_rate": 4.726497822565063e-05, "loss": 2.6265, "step": 1104500 }, { "epoch": 5.47, "learning_rate": 4.7263739639224547e-05, "loss": 2.6267, "step": 1105000 }, { "epoch": 5.48, "learning_rate": 4.7262501052798463e-05, "loss": 2.6607, "step": 1105500 }, { "epoch": 5.48, "learning_rate": 4.726126246637238e-05, "loss": 2.5848, "step": 1106000 }, { "epoch": 5.48, "learning_rate": 4.72600238799463e-05, "loss": 2.5908, "step": 1106500 }, { "epoch": 5.48, "learning_rate": 4.7258785293520214e-05, "loss": 2.6656, "step": 1107000 }, { "epoch": 5.49, "learning_rate": 4.725754918426698e-05, "loss": 2.5915, "step": 1107500 }, { "epoch": 5.49, "learning_rate": 4.72563105978409e-05, "loss": 2.6518, "step": 1108000 }, { "epoch": 5.49, "learning_rate": 4.725507201141482e-05, "loss": 2.617, "step": 1108500 }, { "epoch": 5.49, "learning_rate": 4.725383342498873e-05, "loss": 2.6337, "step": 1109000 }, { "epoch": 5.5, "learning_rate": 4.7252594838562644e-05, "loss": 2.5885, "step": 1109500 }, { "epoch": 5.5, "learning_rate": 4.725135625213656e-05, "loss": 2.5465, "step": 1110000 }, { "epoch": 5.5, "learning_rate": 4.725011766571048e-05, "loss": 2.5822, "step": 1110500 }, { "epoch": 5.5, "learning_rate": 4.7248879079284395e-05, "loss": 2.6102, "step": 1111000 }, { "epoch": 5.51, "learning_rate": 4.724764049285831e-05, "loss": 2.6242, "step": 1111500 }, { "epoch": 5.51, "learning_rate": 4.724640438360508e-05, "loss": 2.6015, "step": 1112000 }, { "epoch": 5.51, "learning_rate": 4.7245165797179e-05, "loss": 2.5667, "step": 1112500 }, { "epoch": 5.51, "learning_rate": 4.7243927210752914e-05, "loss": 2.5967, "step": 1113000 }, { "epoch": 5.52, "learning_rate": 4.724268862432683e-05, "loss": 2.591, "step": 1113500 }, { "epoch": 5.52, "learning_rate": 4.72414525150736e-05, "loss": 2.6024, "step": 1114000 }, { "epoch": 5.52, "learning_rate": 4.724021392864752e-05, "loss": 2.6239, "step": 1114500 }, { "epoch": 5.52, "learning_rate": 4.7238975342221434e-05, "loss": 2.6354, "step": 1115000 }, { "epoch": 5.53, "learning_rate": 4.723773675579535e-05, "loss": 2.6408, "step": 1115500 }, { "epoch": 5.53, "learning_rate": 4.723649816936926e-05, "loss": 2.6031, "step": 1116000 }, { "epoch": 5.53, "learning_rate": 4.723525958294318e-05, "loss": 2.6564, "step": 1116500 }, { "epoch": 5.53, "learning_rate": 4.7234020996517095e-05, "loss": 2.5826, "step": 1117000 }, { "epoch": 5.54, "learning_rate": 4.723278241009101e-05, "loss": 2.583, "step": 1117500 }, { "epoch": 5.54, "learning_rate": 4.723154382366493e-05, "loss": 2.5424, "step": 1118000 }, { "epoch": 5.54, "learning_rate": 4.7230305237238846e-05, "loss": 2.5366, "step": 1118500 }, { "epoch": 5.54, "learning_rate": 4.722906665081276e-05, "loss": 2.5275, "step": 1119000 }, { "epoch": 5.55, "learning_rate": 4.722782806438668e-05, "loss": 2.546, "step": 1119500 }, { "epoch": 5.55, "learning_rate": 4.7226589477960596e-05, "loss": 2.5238, "step": 1120000 }, { "epoch": 5.55, "learning_rate": 4.7225350891534513e-05, "loss": 2.535, "step": 1120500 }, { "epoch": 5.55, "learning_rate": 4.7224112305108424e-05, "loss": 2.52, "step": 1121000 }, { "epoch": 5.56, "learning_rate": 4.722287371868234e-05, "loss": 2.5252, "step": 1121500 }, { "epoch": 5.56, "learning_rate": 4.722163513225626e-05, "loss": 2.544, "step": 1122000 }, { "epoch": 5.56, "learning_rate": 4.7220396545830174e-05, "loss": 2.5424, "step": 1122500 }, { "epoch": 5.56, "learning_rate": 4.721915795940409e-05, "loss": 2.5489, "step": 1123000 }, { "epoch": 5.57, "learning_rate": 4.721791937297801e-05, "loss": 2.5057, "step": 1123500 }, { "epoch": 5.57, "learning_rate": 4.7216680786551925e-05, "loss": 2.5233, "step": 1124000 }, { "epoch": 5.57, "learning_rate": 4.721544220012584e-05, "loss": 2.53, "step": 1124500 }, { "epoch": 5.57, "learning_rate": 4.721420361369976e-05, "loss": 2.5091, "step": 1125000 }, { "epoch": 5.58, "learning_rate": 4.7212965027273676e-05, "loss": 2.5253, "step": 1125500 }, { "epoch": 5.58, "learning_rate": 4.7211728918020445e-05, "loss": 2.5188, "step": 1126000 }, { "epoch": 5.58, "learning_rate": 4.721049033159436e-05, "loss": 2.522, "step": 1126500 }, { "epoch": 5.58, "learning_rate": 4.720925669951398e-05, "loss": 2.5627, "step": 1127000 }, { "epoch": 5.59, "learning_rate": 4.72080181130879e-05, "loss": 2.526, "step": 1127500 }, { "epoch": 5.59, "learning_rate": 4.7206779526661816e-05, "loss": 2.5394, "step": 1128000 }, { "epoch": 5.59, "learning_rate": 4.720554094023573e-05, "loss": 2.5182, "step": 1128500 }, { "epoch": 5.59, "learning_rate": 4.720430235380965e-05, "loss": 2.568, "step": 1129000 }, { "epoch": 5.6, "learning_rate": 4.720306624455641e-05, "loss": 2.5795, "step": 1129500 }, { "epoch": 5.6, "learning_rate": 4.720182765813033e-05, "loss": 2.6119, "step": 1130000 }, { "epoch": 5.6, "learning_rate": 4.7200589071704246e-05, "loss": 2.5682, "step": 1130500 }, { "epoch": 5.6, "learning_rate": 4.719935048527816e-05, "loss": 2.5436, "step": 1131000 }, { "epoch": 5.61, "learning_rate": 4.719811189885208e-05, "loss": 2.5482, "step": 1131500 }, { "epoch": 5.61, "learning_rate": 4.7196873312426e-05, "loss": 2.5595, "step": 1132000 }, { "epoch": 5.61, "learning_rate": 4.7195634725999914e-05, "loss": 2.5303, "step": 1132500 }, { "epoch": 5.61, "learning_rate": 4.719439861674668e-05, "loss": 2.5571, "step": 1133000 }, { "epoch": 5.62, "learning_rate": 4.71931600303206e-05, "loss": 2.5494, "step": 1133500 }, { "epoch": 5.62, "learning_rate": 4.7191921443894516e-05, "loss": 2.5194, "step": 1134000 }, { "epoch": 5.62, "learning_rate": 4.719068285746843e-05, "loss": 2.5303, "step": 1134500 }, { "epoch": 5.62, "learning_rate": 4.718944427104235e-05, "loss": 2.5406, "step": 1135000 }, { "epoch": 5.63, "learning_rate": 4.718820568461627e-05, "loss": 2.5231, "step": 1135500 }, { "epoch": 5.63, "learning_rate": 4.7186967098190184e-05, "loss": 2.5372, "step": 1136000 }, { "epoch": 5.63, "learning_rate": 4.7185728511764094e-05, "loss": 2.5714, "step": 1136500 }, { "epoch": 5.63, "learning_rate": 4.718448992533801e-05, "loss": 2.5142, "step": 1137000 }, { "epoch": 5.64, "learning_rate": 4.718325381608478e-05, "loss": 2.5527, "step": 1137500 }, { "epoch": 5.64, "learning_rate": 4.71820152296587e-05, "loss": 2.574, "step": 1138000 }, { "epoch": 5.64, "learning_rate": 4.7180776643232614e-05, "loss": 2.5483, "step": 1138500 }, { "epoch": 5.64, "learning_rate": 4.717953805680653e-05, "loss": 2.5275, "step": 1139000 }, { "epoch": 5.65, "learning_rate": 4.717829947038044e-05, "loss": 2.5176, "step": 1139500 }, { "epoch": 5.65, "learning_rate": 4.717706088395436e-05, "loss": 2.5255, "step": 1140000 }, { "epoch": 5.65, "learning_rate": 4.717582477470113e-05, "loss": 2.5277, "step": 1140500 }, { "epoch": 5.65, "learning_rate": 4.717458618827505e-05, "loss": 2.5264, "step": 1141000 }, { "epoch": 5.66, "learning_rate": 4.717334760184897e-05, "loss": 2.5172, "step": 1141500 }, { "epoch": 5.66, "learning_rate": 4.7172109015422884e-05, "loss": 2.5394, "step": 1142000 }, { "epoch": 5.66, "learning_rate": 4.71708704289968e-05, "loss": 2.4941, "step": 1142500 }, { "epoch": 5.66, "learning_rate": 4.716963184257071e-05, "loss": 2.5234, "step": 1143000 }, { "epoch": 5.67, "learning_rate": 4.716839325614463e-05, "loss": 2.5233, "step": 1143500 }, { "epoch": 5.67, "learning_rate": 4.71671571468914e-05, "loss": 2.5376, "step": 1144000 }, { "epoch": 5.67, "learning_rate": 4.7165918560465314e-05, "loss": 2.5352, "step": 1144500 }, { "epoch": 5.67, "learning_rate": 4.716467997403923e-05, "loss": 2.5087, "step": 1145000 }, { "epoch": 5.68, "learning_rate": 4.716344138761315e-05, "loss": 2.5188, "step": 1145500 }, { "epoch": 5.68, "learning_rate": 4.716220280118706e-05, "loss": 2.5193, "step": 1146000 }, { "epoch": 5.68, "learning_rate": 4.7160964214760975e-05, "loss": 2.5462, "step": 1146500 }, { "epoch": 5.68, "learning_rate": 4.71597305826806e-05, "loss": 2.5272, "step": 1147000 }, { "epoch": 5.69, "learning_rate": 4.715849199625452e-05, "loss": 2.5139, "step": 1147500 }, { "epoch": 5.69, "learning_rate": 4.7157253409828436e-05, "loss": 2.5406, "step": 1148000 }, { "epoch": 5.69, "learning_rate": 4.715601482340235e-05, "loss": 2.5199, "step": 1148500 }, { "epoch": 5.69, "learning_rate": 4.715477623697627e-05, "loss": 2.4929, "step": 1149000 }, { "epoch": 5.7, "learning_rate": 4.715354012772303e-05, "loss": 2.5136, "step": 1149500 }, { "epoch": 5.7, "learning_rate": 4.715230154129695e-05, "loss": 2.5388, "step": 1150000 }, { "epoch": 5.7, "learning_rate": 4.7151062954870866e-05, "loss": 2.5158, "step": 1150500 }, { "epoch": 5.7, "learning_rate": 4.714982436844478e-05, "loss": 2.4921, "step": 1151000 }, { "epoch": 5.7, "learning_rate": 4.71485857820187e-05, "loss": 2.5272, "step": 1151500 }, { "epoch": 5.71, "learning_rate": 4.714734719559262e-05, "loss": 2.5284, "step": 1152000 }, { "epoch": 5.71, "learning_rate": 4.7146108609166534e-05, "loss": 2.5297, "step": 1152500 }, { "epoch": 5.71, "learning_rate": 4.714487002274045e-05, "loss": 2.5186, "step": 1153000 }, { "epoch": 5.71, "learning_rate": 4.714363143631437e-05, "loss": 2.4969, "step": 1153500 }, { "epoch": 5.72, "learning_rate": 4.7142392849888284e-05, "loss": 2.5141, "step": 1154000 }, { "epoch": 5.72, "learning_rate": 4.71411542634622e-05, "loss": 2.5465, "step": 1154500 }, { "epoch": 5.72, "learning_rate": 4.713991567703612e-05, "loss": 2.5263, "step": 1155000 }, { "epoch": 5.72, "learning_rate": 4.713867709061003e-05, "loss": 2.5282, "step": 1155500 }, { "epoch": 5.73, "learning_rate": 4.713744345852965e-05, "loss": 2.5718, "step": 1156000 }, { "epoch": 5.73, "learning_rate": 4.7136204872103566e-05, "loss": 2.5614, "step": 1156500 }, { "epoch": 5.73, "learning_rate": 4.7134968762850335e-05, "loss": 2.5299, "step": 1157000 }, { "epoch": 5.73, "learning_rate": 4.713373017642425e-05, "loss": 2.5281, "step": 1157500 }, { "epoch": 5.74, "learning_rate": 4.713249158999817e-05, "loss": 2.503, "step": 1158000 }, { "epoch": 5.74, "learning_rate": 4.7131253003572086e-05, "loss": 2.5163, "step": 1158500 }, { "epoch": 5.74, "learning_rate": 4.7130014417146e-05, "loss": 2.5324, "step": 1159000 }, { "epoch": 5.74, "learning_rate": 4.712877583071992e-05, "loss": 2.522, "step": 1159500 }, { "epoch": 5.75, "learning_rate": 4.7127537244293836e-05, "loss": 2.55, "step": 1160000 }, { "epoch": 5.75, "learning_rate": 4.712629865786775e-05, "loss": 2.5301, "step": 1160500 }, { "epoch": 5.75, "learning_rate": 4.712506007144167e-05, "loss": 2.5052, "step": 1161000 }, { "epoch": 5.75, "learning_rate": 4.712382148501559e-05, "loss": 2.4877, "step": 1161500 }, { "epoch": 5.76, "learning_rate": 4.7122582898589504e-05, "loss": 2.5126, "step": 1162000 }, { "epoch": 5.76, "learning_rate": 4.712134431216342e-05, "loss": 2.5242, "step": 1162500 }, { "epoch": 5.76, "learning_rate": 4.712010572573734e-05, "loss": 2.5206, "step": 1163000 }, { "epoch": 5.76, "learning_rate": 4.711886713931125e-05, "loss": 2.5255, "step": 1163500 }, { "epoch": 5.77, "learning_rate": 4.711763103005802e-05, "loss": 2.5042, "step": 1164000 }, { "epoch": 5.77, "learning_rate": 4.7116392443631934e-05, "loss": 2.5521, "step": 1164500 }, { "epoch": 5.77, "learning_rate": 4.711515385720585e-05, "loss": 2.5334, "step": 1165000 }, { "epoch": 5.77, "learning_rate": 4.711391527077977e-05, "loss": 2.536, "step": 1165500 }, { "epoch": 5.78, "learning_rate": 4.7112676684353685e-05, "loss": 2.5293, "step": 1166000 }, { "epoch": 5.78, "learning_rate": 4.71114380979276e-05, "loss": 2.5166, "step": 1166500 }, { "epoch": 5.78, "learning_rate": 4.711019951150152e-05, "loss": 2.4953, "step": 1167000 }, { "epoch": 5.78, "learning_rate": 4.7108960925075435e-05, "loss": 2.4882, "step": 1167500 }, { "epoch": 5.79, "learning_rate": 4.7107724815822204e-05, "loss": 2.5227, "step": 1168000 }, { "epoch": 5.79, "learning_rate": 4.710648622939612e-05, "loss": 2.514, "step": 1168500 }, { "epoch": 5.79, "learning_rate": 4.710524764297004e-05, "loss": 2.5371, "step": 1169000 }, { "epoch": 5.79, "learning_rate": 4.7104009056543955e-05, "loss": 2.5194, "step": 1169500 }, { "epoch": 5.8, "learning_rate": 4.7102770470117865e-05, "loss": 2.5025, "step": 1170000 }, { "epoch": 5.8, "learning_rate": 4.710153188369178e-05, "loss": 2.5248, "step": 1170500 }, { "epoch": 5.8, "learning_rate": 4.71002932972657e-05, "loss": 2.4913, "step": 1171000 }, { "epoch": 5.8, "learning_rate": 4.7099054710839616e-05, "loss": 2.5103, "step": 1171500 }, { "epoch": 5.81, "learning_rate": 4.709781612441353e-05, "loss": 2.4856, "step": 1172000 }, { "epoch": 5.81, "learning_rate": 4.709657753798744e-05, "loss": 2.536, "step": 1172500 }, { "epoch": 5.81, "learning_rate": 4.709534142873422e-05, "loss": 2.4982, "step": 1173000 }, { "epoch": 5.81, "learning_rate": 4.7094102842308136e-05, "loss": 2.5312, "step": 1173500 }, { "epoch": 5.82, "learning_rate": 4.7092866733054904e-05, "loss": 2.4911, "step": 1174000 }, { "epoch": 5.82, "learning_rate": 4.709162814662882e-05, "loss": 2.5099, "step": 1174500 }, { "epoch": 5.82, "learning_rate": 4.709038956020274e-05, "loss": 2.5238, "step": 1175000 }, { "epoch": 5.82, "learning_rate": 4.7089150973776655e-05, "loss": 2.5279, "step": 1175500 }, { "epoch": 5.83, "learning_rate": 4.708791486452342e-05, "loss": 2.5004, "step": 1176000 }, { "epoch": 5.83, "learning_rate": 4.7086676278097334e-05, "loss": 2.5314, "step": 1176500 }, { "epoch": 5.83, "learning_rate": 4.708543769167125e-05, "loss": 2.5071, "step": 1177000 }, { "epoch": 5.83, "learning_rate": 4.708419910524517e-05, "loss": 2.5197, "step": 1177500 }, { "epoch": 5.84, "learning_rate": 4.7082960518819085e-05, "loss": 2.5249, "step": 1178000 }, { "epoch": 5.84, "learning_rate": 4.7081721932393e-05, "loss": 2.5054, "step": 1178500 }, { "epoch": 5.84, "learning_rate": 4.708048334596692e-05, "loss": 2.5106, "step": 1179000 }, { "epoch": 5.84, "learning_rate": 4.7079244759540836e-05, "loss": 2.5198, "step": 1179500 }, { "epoch": 5.85, "learning_rate": 4.7078008650287604e-05, "loss": 2.5138, "step": 1180000 }, { "epoch": 5.85, "learning_rate": 4.707677006386152e-05, "loss": 2.5156, "step": 1180500 }, { "epoch": 5.85, "learning_rate": 4.707553147743544e-05, "loss": 2.5207, "step": 1181000 }, { "epoch": 5.85, "learning_rate": 4.7074292891009355e-05, "loss": 2.4657, "step": 1181500 }, { "epoch": 5.86, "learning_rate": 4.707305430458327e-05, "loss": 2.4906, "step": 1182000 }, { "epoch": 5.86, "learning_rate": 4.707181571815719e-05, "loss": 2.4839, "step": 1182500 }, { "epoch": 5.86, "learning_rate": 4.7070577131731106e-05, "loss": 2.514, "step": 1183000 }, { "epoch": 5.86, "learning_rate": 4.7069338545305016e-05, "loss": 2.4829, "step": 1183500 }, { "epoch": 5.87, "learning_rate": 4.706809995887893e-05, "loss": 2.5311, "step": 1184000 }, { "epoch": 5.87, "learning_rate": 4.706686137245285e-05, "loss": 2.5153, "step": 1184500 }, { "epoch": 5.87, "learning_rate": 4.706562774037247e-05, "loss": 2.5128, "step": 1185000 }, { "epoch": 5.87, "learning_rate": 4.706438915394639e-05, "loss": 2.5017, "step": 1185500 }, { "epoch": 5.88, "learning_rate": 4.7063153044693156e-05, "loss": 2.5261, "step": 1186000 }, { "epoch": 5.88, "learning_rate": 4.7061914458267073e-05, "loss": 2.5201, "step": 1186500 }, { "epoch": 5.88, "learning_rate": 4.7060675871840984e-05, "loss": 2.5188, "step": 1187000 }, { "epoch": 5.88, "learning_rate": 4.70594372854149e-05, "loss": 2.534, "step": 1187500 }, { "epoch": 5.89, "learning_rate": 4.705819869898882e-05, "loss": 2.5313, "step": 1188000 }, { "epoch": 5.89, "learning_rate": 4.7056960112562734e-05, "loss": 2.5164, "step": 1188500 }, { "epoch": 5.89, "learning_rate": 4.705572152613665e-05, "loss": 2.5065, "step": 1189000 }, { "epoch": 5.89, "learning_rate": 4.705448293971057e-05, "loss": 2.5141, "step": 1189500 }, { "epoch": 5.9, "learning_rate": 4.7053244353284485e-05, "loss": 2.4913, "step": 1190000 }, { "epoch": 5.9, "learning_rate": 4.70520057668584e-05, "loss": 2.5257, "step": 1190500 }, { "epoch": 5.9, "learning_rate": 4.705076965760517e-05, "loss": 2.5149, "step": 1191000 }, { "epoch": 5.9, "learning_rate": 4.704953107117909e-05, "loss": 2.524, "step": 1191500 }, { "epoch": 5.91, "learning_rate": 4.7048292484753005e-05, "loss": 2.5101, "step": 1192000 }, { "epoch": 5.91, "learning_rate": 4.704705389832692e-05, "loss": 2.5176, "step": 1192500 }, { "epoch": 5.91, "learning_rate": 4.704581531190084e-05, "loss": 2.5155, "step": 1193000 }, { "epoch": 5.91, "learning_rate": 4.7044576725474756e-05, "loss": 2.4957, "step": 1193500 }, { "epoch": 5.92, "learning_rate": 4.704333813904867e-05, "loss": 2.5396, "step": 1194000 }, { "epoch": 5.92, "learning_rate": 4.7042102029795434e-05, "loss": 2.5241, "step": 1194500 }, { "epoch": 5.92, "learning_rate": 4.704086344336935e-05, "loss": 2.5134, "step": 1195000 }, { "epoch": 5.92, "learning_rate": 4.703962485694327e-05, "loss": 2.5014, "step": 1195500 }, { "epoch": 5.93, "learning_rate": 4.7038386270517185e-05, "loss": 2.5075, "step": 1196000 }, { "epoch": 5.93, "learning_rate": 4.70371476840911e-05, "loss": 2.5256, "step": 1196500 }, { "epoch": 5.93, "learning_rate": 4.703590909766502e-05, "loss": 2.4918, "step": 1197000 }, { "epoch": 5.93, "learning_rate": 4.7034670511238936e-05, "loss": 2.4843, "step": 1197500 }, { "epoch": 5.94, "learning_rate": 4.703343192481285e-05, "loss": 2.5463, "step": 1198000 }, { "epoch": 5.94, "learning_rate": 4.703219333838677e-05, "loss": 2.4987, "step": 1198500 }, { "epoch": 5.94, "learning_rate": 4.703095475196068e-05, "loss": 2.5182, "step": 1199000 }, { "epoch": 5.94, "learning_rate": 4.702972111988031e-05, "loss": 2.5022, "step": 1199500 }, { "epoch": 5.95, "learning_rate": 4.7028482533454224e-05, "loss": 2.5201, "step": 1200000 }, { "epoch": 5.95, "learning_rate": 4.7027243947028135e-05, "loss": 2.5196, "step": 1200500 }, { "epoch": 5.95, "learning_rate": 4.702600536060205e-05, "loss": 2.5223, "step": 1201000 }, { "epoch": 5.95, "learning_rate": 4.702476677417597e-05, "loss": 2.5153, "step": 1201500 }, { "epoch": 5.96, "learning_rate": 4.7023528187749885e-05, "loss": 2.5415, "step": 1202000 }, { "epoch": 5.96, "learning_rate": 4.70222896013238e-05, "loss": 2.529, "step": 1202500 }, { "epoch": 5.96, "learning_rate": 4.702105349207057e-05, "loss": 2.5326, "step": 1203000 }, { "epoch": 5.96, "learning_rate": 4.701981490564449e-05, "loss": 2.5434, "step": 1203500 }, { "epoch": 5.97, "learning_rate": 4.7018576319218405e-05, "loss": 2.5482, "step": 1204000 }, { "epoch": 5.97, "learning_rate": 4.701733773279232e-05, "loss": 2.5158, "step": 1204500 }, { "epoch": 5.97, "learning_rate": 4.701609914636624e-05, "loss": 2.5229, "step": 1205000 }, { "epoch": 5.97, "learning_rate": 4.7014860559940156e-05, "loss": 2.5232, "step": 1205500 }, { "epoch": 5.97, "learning_rate": 4.7013624450686925e-05, "loss": 2.5345, "step": 1206000 }, { "epoch": 5.98, "learning_rate": 4.701238586426084e-05, "loss": 2.4912, "step": 1206500 }, { "epoch": 5.98, "learning_rate": 4.701114727783475e-05, "loss": 2.5206, "step": 1207000 }, { "epoch": 5.98, "learning_rate": 4.700990869140867e-05, "loss": 2.4784, "step": 1207500 }, { "epoch": 5.98, "learning_rate": 4.7008670104982586e-05, "loss": 2.4862, "step": 1208000 }, { "epoch": 5.99, "learning_rate": 4.70074315185565e-05, "loss": 2.5107, "step": 1208500 }, { "epoch": 5.99, "learning_rate": 4.700619293213042e-05, "loss": 2.5165, "step": 1209000 }, { "epoch": 5.99, "learning_rate": 4.700495682287719e-05, "loss": 2.5188, "step": 1209500 }, { "epoch": 5.99, "learning_rate": 4.7003718236451105e-05, "loss": 2.4935, "step": 1210000 }, { "epoch": 6.0, "learning_rate": 4.700247965002502e-05, "loss": 2.5275, "step": 1210500 }, { "epoch": 6.0, "learning_rate": 4.700124106359894e-05, "loss": 2.5212, "step": 1211000 }, { "epoch": 6.0, "eval_accuracy": 0.6358168518016798, "eval_accuracy_mlm": 0.5887641252682907, "eval_accuracy_nsp": 0.8578281213842226, "eval_loss": 2.467965841293335, "eval_runtime": 145.9334, "eval_samples_per_second": 1747.092, "eval_steps_per_second": 72.8, "step": 1211058 }, { "epoch": 6.0, "learning_rate": 4.7000002477172856e-05, "loss": 2.4902, "step": 1211500 }, { "epoch": 6.0, "learning_rate": 4.699876389074677e-05, "loss": 2.493, "step": 1212000 }, { "epoch": 6.01, "learning_rate": 4.699752530432069e-05, "loss": 2.5038, "step": 1212500 }, { "epoch": 6.01, "learning_rate": 4.699628671789461e-05, "loss": 2.4571, "step": 1213000 }, { "epoch": 6.01, "learning_rate": 4.6995048131468524e-05, "loss": 2.4688, "step": 1213500 }, { "epoch": 6.01, "learning_rate": 4.699380954504244e-05, "loss": 2.5051, "step": 1214000 }, { "epoch": 6.02, "learning_rate": 4.699257095861635e-05, "loss": 2.4776, "step": 1214500 }, { "epoch": 6.02, "learning_rate": 4.699133237219027e-05, "loss": 2.4888, "step": 1215000 }, { "epoch": 6.02, "learning_rate": 4.6990096262937036e-05, "loss": 2.4839, "step": 1215500 }, { "epoch": 6.02, "learning_rate": 4.698885767651095e-05, "loss": 2.4875, "step": 1216000 }, { "epoch": 6.03, "learning_rate": 4.698762156725772e-05, "loss": 2.5075, "step": 1216500 }, { "epoch": 6.03, "learning_rate": 4.698638545800449e-05, "loss": 2.4875, "step": 1217000 }, { "epoch": 6.03, "learning_rate": 4.698514687157841e-05, "loss": 2.4917, "step": 1217500 }, { "epoch": 6.03, "learning_rate": 4.6983908285152325e-05, "loss": 2.4866, "step": 1218000 }, { "epoch": 6.04, "learning_rate": 4.698266969872624e-05, "loss": 2.5019, "step": 1218500 }, { "epoch": 6.04, "learning_rate": 4.698143111230016e-05, "loss": 2.4532, "step": 1219000 }, { "epoch": 6.04, "learning_rate": 4.6980192525874076e-05, "loss": 2.4699, "step": 1219500 }, { "epoch": 6.04, "learning_rate": 4.697895393944799e-05, "loss": 2.5117, "step": 1220000 }, { "epoch": 6.05, "learning_rate": 4.69777153530219e-05, "loss": 2.4897, "step": 1220500 }, { "epoch": 6.05, "learning_rate": 4.697647676659582e-05, "loss": 2.4558, "step": 1221000 }, { "epoch": 6.05, "learning_rate": 4.6975238180169737e-05, "loss": 2.4775, "step": 1221500 }, { "epoch": 6.05, "learning_rate": 4.6973999593743653e-05, "loss": 2.4828, "step": 1222000 }, { "epoch": 6.06, "learning_rate": 4.697276100731757e-05, "loss": 2.487, "step": 1222500 }, { "epoch": 6.06, "learning_rate": 4.697152242089149e-05, "loss": 2.4936, "step": 1223000 }, { "epoch": 6.06, "learning_rate": 4.6970283834465404e-05, "loss": 2.489, "step": 1223500 }, { "epoch": 6.06, "learning_rate": 4.6969045248039314e-05, "loss": 2.4504, "step": 1224000 }, { "epoch": 6.07, "learning_rate": 4.696780666161323e-05, "loss": 2.4844, "step": 1224500 }, { "epoch": 6.07, "learning_rate": 4.696656807518715e-05, "loss": 2.4735, "step": 1225000 }, { "epoch": 6.07, "learning_rate": 4.6965329488761065e-05, "loss": 2.4685, "step": 1225500 }, { "epoch": 6.07, "learning_rate": 4.696409090233498e-05, "loss": 2.4795, "step": 1226000 }, { "epoch": 6.08, "learning_rate": 4.69628523159089e-05, "loss": 2.4918, "step": 1226500 }, { "epoch": 6.08, "learning_rate": 4.696161620665567e-05, "loss": 2.4753, "step": 1227000 }, { "epoch": 6.08, "learning_rate": 4.696038009740244e-05, "loss": 2.4695, "step": 1227500 }, { "epoch": 6.08, "learning_rate": 4.6959141510976354e-05, "loss": 2.4831, "step": 1228000 }, { "epoch": 6.09, "learning_rate": 4.695790292455027e-05, "loss": 2.4936, "step": 1228500 }, { "epoch": 6.09, "learning_rate": 4.695666433812419e-05, "loss": 2.4876, "step": 1229000 }, { "epoch": 6.09, "learning_rate": 4.6955425751698104e-05, "loss": 2.4676, "step": 1229500 }, { "epoch": 6.09, "learning_rate": 4.695418716527202e-05, "loss": 2.4873, "step": 1230000 }, { "epoch": 6.1, "learning_rate": 4.695295105601879e-05, "loss": 2.4812, "step": 1230500 }, { "epoch": 6.1, "learning_rate": 4.695171246959271e-05, "loss": 2.4859, "step": 1231000 }, { "epoch": 6.1, "learning_rate": 4.6950473883166624e-05, "loss": 2.4744, "step": 1231500 }, { "epoch": 6.1, "learning_rate": 4.694923529674054e-05, "loss": 2.4792, "step": 1232000 }, { "epoch": 6.11, "learning_rate": 4.694799918748731e-05, "loss": 2.4847, "step": 1232500 }, { "epoch": 6.11, "learning_rate": 4.694676060106123e-05, "loss": 2.5086, "step": 1233000 }, { "epoch": 6.11, "learning_rate": 4.6945522014635144e-05, "loss": 2.4921, "step": 1233500 }, { "epoch": 6.11, "learning_rate": 4.6944283428209054e-05, "loss": 2.471, "step": 1234000 }, { "epoch": 6.12, "learning_rate": 4.694304484178297e-05, "loss": 2.4858, "step": 1234500 }, { "epoch": 6.12, "learning_rate": 4.694180625535689e-05, "loss": 2.4519, "step": 1235000 }, { "epoch": 6.12, "learning_rate": 4.6940567668930805e-05, "loss": 2.4916, "step": 1235500 }, { "epoch": 6.12, "learning_rate": 4.693932908250472e-05, "loss": 2.4775, "step": 1236000 }, { "epoch": 6.13, "learning_rate": 4.693809049607863e-05, "loss": 2.483, "step": 1236500 }, { "epoch": 6.13, "learning_rate": 4.693685190965255e-05, "loss": 2.4979, "step": 1237000 }, { "epoch": 6.13, "learning_rate": 4.6935613323226465e-05, "loss": 2.4822, "step": 1237500 }, { "epoch": 6.13, "learning_rate": 4.693437473680038e-05, "loss": 2.4846, "step": 1238000 }, { "epoch": 6.14, "learning_rate": 4.69331361503743e-05, "loss": 2.4769, "step": 1238500 }, { "epoch": 6.14, "learning_rate": 4.6931900041121075e-05, "loss": 2.5055, "step": 1239000 }, { "epoch": 6.14, "learning_rate": 4.6930661454694985e-05, "loss": 2.4656, "step": 1239500 }, { "epoch": 6.14, "learning_rate": 4.69294228682689e-05, "loss": 2.5074, "step": 1240000 }, { "epoch": 6.15, "learning_rate": 4.692818428184282e-05, "loss": 2.4729, "step": 1240500 }, { "epoch": 6.15, "learning_rate": 4.692694817258959e-05, "loss": 2.4935, "step": 1241000 }, { "epoch": 6.15, "learning_rate": 4.6925709586163505e-05, "loss": 2.4919, "step": 1241500 }, { "epoch": 6.15, "learning_rate": 4.692447099973742e-05, "loss": 2.4865, "step": 1242000 }, { "epoch": 6.16, "learning_rate": 4.692323241331134e-05, "loss": 2.4812, "step": 1242500 }, { "epoch": 6.16, "learning_rate": 4.6921993826885255e-05, "loss": 2.483, "step": 1243000 }, { "epoch": 6.16, "learning_rate": 4.6920757717632024e-05, "loss": 2.4902, "step": 1243500 }, { "epoch": 6.16, "learning_rate": 4.691951913120594e-05, "loss": 2.47, "step": 1244000 }, { "epoch": 6.17, "learning_rate": 4.691828054477986e-05, "loss": 2.4547, "step": 1244500 }, { "epoch": 6.17, "learning_rate": 4.6917041958353775e-05, "loss": 2.4744, "step": 1245000 }, { "epoch": 6.17, "learning_rate": 4.691580337192769e-05, "loss": 2.4984, "step": 1245500 }, { "epoch": 6.17, "learning_rate": 4.69145647855016e-05, "loss": 2.4811, "step": 1246000 }, { "epoch": 6.18, "learning_rate": 4.691332619907552e-05, "loss": 2.4872, "step": 1246500 }, { "epoch": 6.18, "learning_rate": 4.6912090089822295e-05, "loss": 2.481, "step": 1247000 }, { "epoch": 6.18, "learning_rate": 4.691085150339621e-05, "loss": 2.4718, "step": 1247500 }, { "epoch": 6.18, "learning_rate": 4.690961291697012e-05, "loss": 2.4848, "step": 1248000 }, { "epoch": 6.19, "learning_rate": 4.690837433054404e-05, "loss": 2.483, "step": 1248500 }, { "epoch": 6.19, "learning_rate": 4.6907135744117956e-05, "loss": 2.4704, "step": 1249000 }, { "epoch": 6.19, "learning_rate": 4.690589715769187e-05, "loss": 2.4682, "step": 1249500 }, { "epoch": 6.19, "learning_rate": 4.690466104843864e-05, "loss": 2.4647, "step": 1250000 }, { "epoch": 6.2, "learning_rate": 4.690342493918541e-05, "loss": 2.4835, "step": 1250500 }, { "epoch": 6.2, "learning_rate": 4.690218635275933e-05, "loss": 2.4615, "step": 1251000 }, { "epoch": 6.2, "learning_rate": 4.6900947766333244e-05, "loss": 2.4774, "step": 1251500 }, { "epoch": 6.2, "learning_rate": 4.689970917990716e-05, "loss": 2.4772, "step": 1252000 }, { "epoch": 6.21, "learning_rate": 4.689847059348108e-05, "loss": 2.4977, "step": 1252500 }, { "epoch": 6.21, "learning_rate": 4.6897232007054995e-05, "loss": 2.5088, "step": 1253000 }, { "epoch": 6.21, "learning_rate": 4.689599342062891e-05, "loss": 2.4723, "step": 1253500 }, { "epoch": 6.21, "learning_rate": 4.689475483420283e-05, "loss": 2.4965, "step": 1254000 }, { "epoch": 6.22, "learning_rate": 4.689351624777674e-05, "loss": 2.4822, "step": 1254500 }, { "epoch": 6.22, "learning_rate": 4.689228013852351e-05, "loss": 2.5036, "step": 1255000 }, { "epoch": 6.22, "learning_rate": 4.6891041552097424e-05, "loss": 2.4874, "step": 1255500 }, { "epoch": 6.22, "learning_rate": 4.688980544284419e-05, "loss": 2.4798, "step": 1256000 }, { "epoch": 6.23, "learning_rate": 4.688856685641811e-05, "loss": 2.4956, "step": 1256500 }, { "epoch": 6.23, "learning_rate": 4.688733074716488e-05, "loss": 2.4933, "step": 1257000 }, { "epoch": 6.23, "learning_rate": 4.6886092160738796e-05, "loss": 2.5024, "step": 1257500 }, { "epoch": 6.23, "learning_rate": 4.6884853574312706e-05, "loss": 2.4811, "step": 1258000 }, { "epoch": 6.24, "learning_rate": 4.688361498788662e-05, "loss": 2.4707, "step": 1258500 }, { "epoch": 6.24, "learning_rate": 4.688237887863339e-05, "loss": 2.4857, "step": 1259000 }, { "epoch": 6.24, "learning_rate": 4.688114029220731e-05, "loss": 2.5269, "step": 1259500 }, { "epoch": 6.24, "learning_rate": 4.6879901705781226e-05, "loss": 2.4841, "step": 1260000 }, { "epoch": 6.24, "learning_rate": 4.687866311935514e-05, "loss": 2.5034, "step": 1260500 }, { "epoch": 6.25, "learning_rate": 4.687742453292906e-05, "loss": 2.4715, "step": 1261000 }, { "epoch": 6.25, "learning_rate": 4.6876185946502976e-05, "loss": 2.4924, "step": 1261500 }, { "epoch": 6.25, "learning_rate": 4.6874947360076893e-05, "loss": 2.5025, "step": 1262000 }, { "epoch": 6.25, "learning_rate": 4.687370877365081e-05, "loss": 2.4942, "step": 1262500 }, { "epoch": 6.26, "learning_rate": 4.687247018722473e-05, "loss": 2.4852, "step": 1263000 }, { "epoch": 6.26, "learning_rate": 4.6871231600798644e-05, "loss": 2.4815, "step": 1263500 }, { "epoch": 6.26, "learning_rate": 4.686999301437256e-05, "loss": 2.4833, "step": 1264000 }, { "epoch": 6.26, "learning_rate": 4.686875690511932e-05, "loss": 2.5097, "step": 1264500 }, { "epoch": 6.27, "learning_rate": 4.686751831869324e-05, "loss": 2.5067, "step": 1265000 }, { "epoch": 6.27, "learning_rate": 4.686627973226716e-05, "loss": 2.5029, "step": 1265500 }, { "epoch": 6.27, "learning_rate": 4.6865041145841074e-05, "loss": 2.4842, "step": 1266000 }, { "epoch": 6.27, "learning_rate": 4.686380255941499e-05, "loss": 2.4757, "step": 1266500 }, { "epoch": 6.28, "learning_rate": 4.686256397298891e-05, "loss": 2.4729, "step": 1267000 }, { "epoch": 6.28, "learning_rate": 4.6861325386562825e-05, "loss": 2.4995, "step": 1267500 }, { "epoch": 6.28, "learning_rate": 4.686008680013674e-05, "loss": 2.5044, "step": 1268000 }, { "epoch": 6.28, "learning_rate": 4.685884821371066e-05, "loss": 2.483, "step": 1268500 }, { "epoch": 6.29, "learning_rate": 4.685761458163028e-05, "loss": 2.5019, "step": 1269000 }, { "epoch": 6.29, "learning_rate": 4.6856375995204196e-05, "loss": 2.5024, "step": 1269500 }, { "epoch": 6.29, "learning_rate": 4.685513740877811e-05, "loss": 2.4707, "step": 1270000 }, { "epoch": 6.29, "learning_rate": 4.685389882235203e-05, "loss": 2.4647, "step": 1270500 }, { "epoch": 6.3, "learning_rate": 4.68526627130988e-05, "loss": 2.4922, "step": 1271000 }, { "epoch": 6.3, "learning_rate": 4.685142412667271e-05, "loss": 2.5003, "step": 1271500 }, { "epoch": 6.3, "learning_rate": 4.6850185540246626e-05, "loss": 2.4678, "step": 1272000 }, { "epoch": 6.3, "learning_rate": 4.684894695382054e-05, "loss": 2.4832, "step": 1272500 }, { "epoch": 6.31, "learning_rate": 4.684771084456732e-05, "loss": 2.4824, "step": 1273000 }, { "epoch": 6.31, "learning_rate": 4.6846472258141235e-05, "loss": 2.4748, "step": 1273500 }, { "epoch": 6.31, "learning_rate": 4.684523367171515e-05, "loss": 2.5172, "step": 1274000 }, { "epoch": 6.31, "learning_rate": 4.684399508528906e-05, "loss": 2.4525, "step": 1274500 }, { "epoch": 6.32, "learning_rate": 4.684275649886298e-05, "loss": 2.4783, "step": 1275000 }, { "epoch": 6.32, "learning_rate": 4.6841517912436896e-05, "loss": 2.5077, "step": 1275500 }, { "epoch": 6.32, "learning_rate": 4.6840281803183665e-05, "loss": 2.4907, "step": 1276000 }, { "epoch": 6.32, "learning_rate": 4.683904321675758e-05, "loss": 2.464, "step": 1276500 }, { "epoch": 6.33, "learning_rate": 4.68378046303315e-05, "loss": 2.4991, "step": 1277000 }, { "epoch": 6.33, "learning_rate": 4.6836566043905416e-05, "loss": 2.5031, "step": 1277500 }, { "epoch": 6.33, "learning_rate": 4.6835327457479326e-05, "loss": 2.4927, "step": 1278000 }, { "epoch": 6.33, "learning_rate": 4.683408887105324e-05, "loss": 2.4785, "step": 1278500 }, { "epoch": 6.34, "learning_rate": 4.683285028462716e-05, "loss": 2.465, "step": 1279000 }, { "epoch": 6.34, "learning_rate": 4.683161169820108e-05, "loss": 2.4818, "step": 1279500 }, { "epoch": 6.34, "learning_rate": 4.6830373111774994e-05, "loss": 2.5001, "step": 1280000 }, { "epoch": 6.34, "learning_rate": 4.682913452534891e-05, "loss": 2.4687, "step": 1280500 }, { "epoch": 6.35, "learning_rate": 4.682789841609568e-05, "loss": 2.4916, "step": 1281000 }, { "epoch": 6.35, "learning_rate": 4.6826659829669596e-05, "loss": 2.4898, "step": 1281500 }, { "epoch": 6.35, "learning_rate": 4.682542124324351e-05, "loss": 2.4705, "step": 1282000 }, { "epoch": 6.35, "learning_rate": 4.682418265681743e-05, "loss": 2.4925, "step": 1282500 }, { "epoch": 6.36, "learning_rate": 4.682294407039135e-05, "loss": 2.4848, "step": 1283000 }, { "epoch": 6.36, "learning_rate": 4.6821705483965264e-05, "loss": 2.4765, "step": 1283500 }, { "epoch": 6.36, "learning_rate": 4.6820469374712026e-05, "loss": 2.4905, "step": 1284000 }, { "epoch": 6.36, "learning_rate": 4.681923078828594e-05, "loss": 2.4852, "step": 1284500 }, { "epoch": 6.37, "learning_rate": 4.681799220185986e-05, "loss": 2.4962, "step": 1285000 }, { "epoch": 6.37, "learning_rate": 4.681675361543378e-05, "loss": 2.4493, "step": 1285500 }, { "epoch": 6.37, "learning_rate": 4.6815515029007694e-05, "loss": 2.4891, "step": 1286000 }, { "epoch": 6.37, "learning_rate": 4.681427644258161e-05, "loss": 2.4824, "step": 1286500 }, { "epoch": 6.38, "learning_rate": 4.681303785615553e-05, "loss": 2.4763, "step": 1287000 }, { "epoch": 6.38, "learning_rate": 4.6811799269729445e-05, "loss": 2.5012, "step": 1287500 }, { "epoch": 6.38, "learning_rate": 4.681056068330336e-05, "loss": 2.4999, "step": 1288000 }, { "epoch": 6.38, "learning_rate": 4.680932209687728e-05, "loss": 2.4742, "step": 1288500 }, { "epoch": 6.39, "learning_rate": 4.680808598762405e-05, "loss": 2.4774, "step": 1289000 }, { "epoch": 6.39, "learning_rate": 4.6806847401197964e-05, "loss": 2.4746, "step": 1289500 }, { "epoch": 6.39, "learning_rate": 4.680560881477188e-05, "loss": 2.4768, "step": 1290000 }, { "epoch": 6.39, "learning_rate": 4.68043702283458e-05, "loss": 2.4646, "step": 1290500 }, { "epoch": 6.4, "learning_rate": 4.6803131641919715e-05, "loss": 2.4499, "step": 1291000 }, { "epoch": 6.4, "learning_rate": 4.680189553266648e-05, "loss": 2.4654, "step": 1291500 }, { "epoch": 6.4, "learning_rate": 4.6800656946240394e-05, "loss": 2.4719, "step": 1292000 }, { "epoch": 6.4, "learning_rate": 4.679941835981431e-05, "loss": 2.4862, "step": 1292500 }, { "epoch": 6.41, "learning_rate": 4.6798182250561087e-05, "loss": 2.4977, "step": 1293000 }, { "epoch": 6.41, "learning_rate": 4.679694614130785e-05, "loss": 2.4832, "step": 1293500 }, { "epoch": 6.41, "learning_rate": 4.6795707554881766e-05, "loss": 2.5195, "step": 1294000 }, { "epoch": 6.41, "learning_rate": 4.679446896845568e-05, "loss": 2.4994, "step": 1294500 }, { "epoch": 6.42, "learning_rate": 4.67932303820296e-05, "loss": 2.4854, "step": 1295000 }, { "epoch": 6.42, "learning_rate": 4.6791991795603516e-05, "loss": 2.4757, "step": 1295500 }, { "epoch": 6.42, "learning_rate": 4.679075320917743e-05, "loss": 2.5, "step": 1296000 }, { "epoch": 6.42, "learning_rate": 4.678951462275134e-05, "loss": 2.4907, "step": 1296500 }, { "epoch": 6.43, "learning_rate": 4.678827603632526e-05, "loss": 2.5016, "step": 1297000 }, { "epoch": 6.43, "learning_rate": 4.678703744989918e-05, "loss": 2.5061, "step": 1297500 }, { "epoch": 6.43, "learning_rate": 4.6785798863473094e-05, "loss": 2.4797, "step": 1298000 }, { "epoch": 6.43, "learning_rate": 4.678456027704701e-05, "loss": 2.5274, "step": 1298500 }, { "epoch": 6.44, "learning_rate": 4.678332169062093e-05, "loss": 2.489, "step": 1299000 }, { "epoch": 6.44, "learning_rate": 4.6782083104194845e-05, "loss": 2.4759, "step": 1299500 }, { "epoch": 6.44, "learning_rate": 4.6780846994941614e-05, "loss": 2.4618, "step": 1300000 }, { "epoch": 6.44, "learning_rate": 4.677961088568838e-05, "loss": 2.4962, "step": 1300500 }, { "epoch": 6.45, "learning_rate": 4.67783722992623e-05, "loss": 2.485, "step": 1301000 }, { "epoch": 6.45, "learning_rate": 4.6777133712836216e-05, "loss": 2.4917, "step": 1301500 }, { "epoch": 6.45, "learning_rate": 4.677589512641013e-05, "loss": 2.4872, "step": 1302000 }, { "epoch": 6.45, "learning_rate": 4.677465653998405e-05, "loss": 2.4978, "step": 1302500 }, { "epoch": 6.46, "learning_rate": 4.677342043073082e-05, "loss": 2.486, "step": 1303000 }, { "epoch": 6.46, "learning_rate": 4.6772181844304736e-05, "loss": 2.5053, "step": 1303500 }, { "epoch": 6.46, "learning_rate": 4.6770945735051505e-05, "loss": 2.4912, "step": 1304000 }, { "epoch": 6.46, "learning_rate": 4.676970714862542e-05, "loss": 2.4926, "step": 1304500 }, { "epoch": 6.47, "learning_rate": 4.676846856219934e-05, "loss": 2.4839, "step": 1305000 }, { "epoch": 6.47, "learning_rate": 4.6767229975773256e-05, "loss": 2.4895, "step": 1305500 }, { "epoch": 6.47, "learning_rate": 4.6765991389347166e-05, "loss": 2.5066, "step": 1306000 }, { "epoch": 6.47, "learning_rate": 4.6764755280093935e-05, "loss": 2.4824, "step": 1306500 }, { "epoch": 6.48, "learning_rate": 4.676351669366785e-05, "loss": 2.482, "step": 1307000 }, { "epoch": 6.48, "learning_rate": 4.676227810724177e-05, "loss": 2.4795, "step": 1307500 }, { "epoch": 6.48, "learning_rate": 4.6761039520815685e-05, "loss": 2.5152, "step": 1308000 }, { "epoch": 6.48, "learning_rate": 4.67598009343896e-05, "loss": 2.4962, "step": 1308500 }, { "epoch": 6.49, "learning_rate": 4.675856234796352e-05, "loss": 2.499, "step": 1309000 }, { "epoch": 6.49, "learning_rate": 4.6757323761537436e-05, "loss": 2.5016, "step": 1309500 }, { "epoch": 6.49, "learning_rate": 4.675608517511135e-05, "loss": 2.5232, "step": 1310000 }, { "epoch": 6.49, "learning_rate": 4.675484658868527e-05, "loss": 2.4946, "step": 1310500 }, { "epoch": 6.5, "learning_rate": 4.675361047943204e-05, "loss": 2.4769, "step": 1311000 }, { "epoch": 6.5, "learning_rate": 4.6752371893005956e-05, "loss": 2.4588, "step": 1311500 }, { "epoch": 6.5, "learning_rate": 4.675113578375272e-05, "loss": 2.514, "step": 1312000 }, { "epoch": 6.5, "learning_rate": 4.6749897197326635e-05, "loss": 2.5118, "step": 1312500 }, { "epoch": 6.51, "learning_rate": 4.674865861090055e-05, "loss": 2.4955, "step": 1313000 }, { "epoch": 6.51, "learning_rate": 4.674742002447447e-05, "loss": 2.4849, "step": 1313500 }, { "epoch": 6.51, "learning_rate": 4.6746181438048385e-05, "loss": 2.4894, "step": 1314000 }, { "epoch": 6.51, "learning_rate": 4.67449428516223e-05, "loss": 2.4816, "step": 1314500 }, { "epoch": 6.51, "learning_rate": 4.674370426519622e-05, "loss": 2.4829, "step": 1315000 }, { "epoch": 6.52, "learning_rate": 4.6742465678770136e-05, "loss": 2.4969, "step": 1315500 }, { "epoch": 6.52, "learning_rate": 4.6741229569516905e-05, "loss": 2.4945, "step": 1316000 }, { "epoch": 6.52, "learning_rate": 4.673999346026367e-05, "loss": 2.4751, "step": 1316500 }, { "epoch": 6.52, "learning_rate": 4.6738754873837584e-05, "loss": 2.5224, "step": 1317000 }, { "epoch": 6.53, "learning_rate": 4.67375162874115e-05, "loss": 2.5149, "step": 1317500 }, { "epoch": 6.53, "learning_rate": 4.673627770098542e-05, "loss": 2.5144, "step": 1318000 }, { "epoch": 6.53, "learning_rate": 4.6735039114559335e-05, "loss": 2.4687, "step": 1318500 }, { "epoch": 6.53, "learning_rate": 4.673380052813325e-05, "loss": 2.4861, "step": 1319000 }, { "epoch": 6.54, "learning_rate": 4.673256441888002e-05, "loss": 2.5114, "step": 1319500 }, { "epoch": 6.54, "learning_rate": 4.673132583245394e-05, "loss": 2.482, "step": 1320000 }, { "epoch": 6.54, "learning_rate": 4.6730087246027854e-05, "loss": 2.5231, "step": 1320500 }, { "epoch": 6.54, "learning_rate": 4.672885113677462e-05, "loss": 2.511, "step": 1321000 }, { "epoch": 6.55, "learning_rate": 4.672761502752139e-05, "loss": 2.512, "step": 1321500 }, { "epoch": 6.55, "learning_rate": 4.672637644109531e-05, "loss": 2.5054, "step": 1322000 }, { "epoch": 6.55, "learning_rate": 4.6725137854669226e-05, "loss": 2.502, "step": 1322500 }, { "epoch": 6.55, "learning_rate": 4.6723901745415995e-05, "loss": 2.4838, "step": 1323000 }, { "epoch": 6.56, "learning_rate": 4.672266315898991e-05, "loss": 2.5259, "step": 1323500 }, { "epoch": 6.56, "learning_rate": 4.672142457256383e-05, "loss": 2.5036, "step": 1324000 }, { "epoch": 6.56, "learning_rate": 4.6720185986137745e-05, "loss": 2.5052, "step": 1324500 }, { "epoch": 6.56, "learning_rate": 4.671894739971166e-05, "loss": 2.4959, "step": 1325000 }, { "epoch": 6.57, "learning_rate": 4.671770881328558e-05, "loss": 2.5092, "step": 1325500 }, { "epoch": 6.57, "learning_rate": 4.6716470226859496e-05, "loss": 2.5141, "step": 1326000 }, { "epoch": 6.57, "learning_rate": 4.671523164043341e-05, "loss": 2.5202, "step": 1326500 }, { "epoch": 6.57, "learning_rate": 4.671399305400732e-05, "loss": 2.4919, "step": 1327000 }, { "epoch": 6.58, "learning_rate": 4.671275446758124e-05, "loss": 2.4807, "step": 1327500 }, { "epoch": 6.58, "learning_rate": 4.671151588115516e-05, "loss": 2.4948, "step": 1328000 }, { "epoch": 6.58, "learning_rate": 4.6710277294729074e-05, "loss": 2.51, "step": 1328500 }, { "epoch": 6.58, "learning_rate": 4.670903870830299e-05, "loss": 2.4887, "step": 1329000 }, { "epoch": 6.59, "learning_rate": 4.67078001218769e-05, "loss": 2.4943, "step": 1329500 }, { "epoch": 6.59, "learning_rate": 4.670656153545082e-05, "loss": 2.5102, "step": 1330000 }, { "epoch": 6.59, "learning_rate": 4.6705322949024735e-05, "loss": 2.4674, "step": 1330500 }, { "epoch": 6.59, "learning_rate": 4.670408436259865e-05, "loss": 2.4995, "step": 1331000 }, { "epoch": 6.6, "learning_rate": 4.670285073051828e-05, "loss": 2.5186, "step": 1331500 }, { "epoch": 6.6, "learning_rate": 4.6701612144092196e-05, "loss": 2.485, "step": 1332000 }, { "epoch": 6.6, "learning_rate": 4.670037355766611e-05, "loss": 2.487, "step": 1332500 }, { "epoch": 6.6, "learning_rate": 4.669913497124003e-05, "loss": 2.5017, "step": 1333000 }, { "epoch": 6.61, "learning_rate": 4.669789886198679e-05, "loss": 2.4821, "step": 1333500 }, { "epoch": 6.61, "learning_rate": 4.669666027556071e-05, "loss": 2.4815, "step": 1334000 }, { "epoch": 6.61, "learning_rate": 4.6695421689134626e-05, "loss": 2.4971, "step": 1334500 }, { "epoch": 6.61, "learning_rate": 4.669418310270854e-05, "loss": 2.5461, "step": 1335000 }, { "epoch": 6.62, "learning_rate": 4.669294451628246e-05, "loss": 2.514, "step": 1335500 }, { "epoch": 6.62, "learning_rate": 4.669170592985638e-05, "loss": 2.5117, "step": 1336000 }, { "epoch": 6.62, "learning_rate": 4.6690467343430294e-05, "loss": 2.5189, "step": 1336500 }, { "epoch": 6.62, "learning_rate": 4.668922875700421e-05, "loss": 2.4931, "step": 1337000 }, { "epoch": 6.63, "learning_rate": 4.668799017057813e-05, "loss": 2.5089, "step": 1337500 }, { "epoch": 6.63, "learning_rate": 4.668675158415204e-05, "loss": 2.5183, "step": 1338000 }, { "epoch": 6.63, "learning_rate": 4.6685512997725955e-05, "loss": 2.5247, "step": 1338500 }, { "epoch": 6.63, "learning_rate": 4.668427441129987e-05, "loss": 2.4885, "step": 1339000 }, { "epoch": 6.64, "learning_rate": 4.668303830204665e-05, "loss": 2.5007, "step": 1339500 }, { "epoch": 6.64, "learning_rate": 4.6681799715620564e-05, "loss": 2.483, "step": 1340000 }, { "epoch": 6.64, "learning_rate": 4.668056112919448e-05, "loss": 2.5025, "step": 1340500 }, { "epoch": 6.64, "learning_rate": 4.667932254276839e-05, "loss": 2.4848, "step": 1341000 }, { "epoch": 6.65, "learning_rate": 4.667808395634231e-05, "loss": 2.5055, "step": 1341500 }, { "epoch": 6.65, "learning_rate": 4.6676845369916225e-05, "loss": 2.4936, "step": 1342000 }, { "epoch": 6.65, "learning_rate": 4.6675609260662994e-05, "loss": 2.5113, "step": 1342500 }, { "epoch": 6.65, "learning_rate": 4.667437315140976e-05, "loss": 2.4663, "step": 1343000 }, { "epoch": 6.66, "learning_rate": 4.667313456498368e-05, "loss": 2.4992, "step": 1343500 }, { "epoch": 6.66, "learning_rate": 4.66718959785576e-05, "loss": 2.4944, "step": 1344000 }, { "epoch": 6.66, "learning_rate": 4.6670657392131514e-05, "loss": 2.4973, "step": 1344500 }, { "epoch": 6.66, "learning_rate": 4.666941880570543e-05, "loss": 2.469, "step": 1345000 }, { "epoch": 6.67, "learning_rate": 4.666818021927935e-05, "loss": 2.5108, "step": 1345500 }, { "epoch": 6.67, "learning_rate": 4.6666941632853264e-05, "loss": 2.4903, "step": 1346000 }, { "epoch": 6.67, "learning_rate": 4.666570304642718e-05, "loss": 2.4975, "step": 1346500 }, { "epoch": 6.67, "learning_rate": 4.66644644600011e-05, "loss": 2.5192, "step": 1347000 }, { "epoch": 6.68, "learning_rate": 4.666322587357501e-05, "loss": 2.5035, "step": 1347500 }, { "epoch": 6.68, "learning_rate": 4.666198976432178e-05, "loss": 2.4976, "step": 1348000 }, { "epoch": 6.68, "learning_rate": 4.6660751177895694e-05, "loss": 2.5002, "step": 1348500 }, { "epoch": 6.68, "learning_rate": 4.665951506864246e-05, "loss": 2.5158, "step": 1349000 }, { "epoch": 6.69, "learning_rate": 4.665827648221638e-05, "loss": 2.4809, "step": 1349500 }, { "epoch": 6.69, "learning_rate": 4.66570378957903e-05, "loss": 2.4874, "step": 1350000 }, { "epoch": 6.69, "learning_rate": 4.6655799309364214e-05, "loss": 2.5005, "step": 1350500 }, { "epoch": 6.69, "learning_rate": 4.665456072293813e-05, "loss": 2.4906, "step": 1351000 }, { "epoch": 6.7, "learning_rate": 4.665332213651205e-05, "loss": 2.5107, "step": 1351500 }, { "epoch": 6.7, "learning_rate": 4.6652083550085964e-05, "loss": 2.4649, "step": 1352000 }, { "epoch": 6.7, "learning_rate": 4.665084496365988e-05, "loss": 2.4886, "step": 1352500 }, { "epoch": 6.7, "learning_rate": 4.66496063772338e-05, "loss": 2.5207, "step": 1353000 }, { "epoch": 6.71, "learning_rate": 4.664837026798056e-05, "loss": 2.4898, "step": 1353500 }, { "epoch": 6.71, "learning_rate": 4.664713168155448e-05, "loss": 2.5024, "step": 1354000 }, { "epoch": 6.71, "learning_rate": 4.6645893095128394e-05, "loss": 2.517, "step": 1354500 }, { "epoch": 6.71, "learning_rate": 4.664465450870231e-05, "loss": 2.4937, "step": 1355000 }, { "epoch": 6.72, "learning_rate": 4.664341592227623e-05, "loss": 2.4872, "step": 1355500 }, { "epoch": 6.72, "learning_rate": 4.6642177335850145e-05, "loss": 2.5017, "step": 1356000 }, { "epoch": 6.72, "learning_rate": 4.6640938749424055e-05, "loss": 2.5051, "step": 1356500 }, { "epoch": 6.72, "learning_rate": 4.663970016299797e-05, "loss": 2.477, "step": 1357000 }, { "epoch": 6.73, "learning_rate": 4.663846157657189e-05, "loss": 2.5128, "step": 1357500 }, { "epoch": 6.73, "learning_rate": 4.6637222990145806e-05, "loss": 2.4908, "step": 1358000 }, { "epoch": 6.73, "learning_rate": 4.663598440371972e-05, "loss": 2.4832, "step": 1358500 }, { "epoch": 6.73, "learning_rate": 4.66347482944665e-05, "loss": 2.4844, "step": 1359000 }, { "epoch": 6.74, "learning_rate": 4.6633509708040415e-05, "loss": 2.5207, "step": 1359500 }, { "epoch": 6.74, "learning_rate": 4.663227359878718e-05, "loss": 2.496, "step": 1360000 }, { "epoch": 6.74, "learning_rate": 4.6631035012361094e-05, "loss": 2.5076, "step": 1360500 }, { "epoch": 6.74, "learning_rate": 4.662979642593501e-05, "loss": 2.4935, "step": 1361000 }, { "epoch": 6.75, "learning_rate": 4.662855783950893e-05, "loss": 2.4879, "step": 1361500 }, { "epoch": 6.75, "learning_rate": 4.6627319253082845e-05, "loss": 2.4986, "step": 1362000 }, { "epoch": 6.75, "learning_rate": 4.6626083143829614e-05, "loss": 2.515, "step": 1362500 }, { "epoch": 6.75, "learning_rate": 4.662484455740353e-05, "loss": 2.501, "step": 1363000 }, { "epoch": 6.76, "learning_rate": 4.662360597097745e-05, "loss": 2.4869, "step": 1363500 }, { "epoch": 6.76, "learning_rate": 4.6622367384551365e-05, "loss": 2.4833, "step": 1364000 }, { "epoch": 6.76, "learning_rate": 4.662113127529813e-05, "loss": 2.4934, "step": 1364500 }, { "epoch": 6.76, "learning_rate": 4.6619895166044896e-05, "loss": 2.48, "step": 1365000 }, { "epoch": 6.77, "learning_rate": 4.661865657961881e-05, "loss": 2.4729, "step": 1365500 }, { "epoch": 6.77, "learning_rate": 4.661741799319273e-05, "loss": 2.4902, "step": 1366000 }, { "epoch": 6.77, "learning_rate": 4.6616179406766646e-05, "loss": 2.5141, "step": 1366500 }, { "epoch": 6.77, "learning_rate": 4.661494082034056e-05, "loss": 2.4894, "step": 1367000 }, { "epoch": 6.78, "learning_rate": 4.661370223391448e-05, "loss": 2.4918, "step": 1367500 }, { "epoch": 6.78, "learning_rate": 4.66124636474884e-05, "loss": 2.5045, "step": 1368000 }, { "epoch": 6.78, "learning_rate": 4.6611225061062314e-05, "loss": 2.4838, "step": 1368500 }, { "epoch": 6.78, "learning_rate": 4.660998647463623e-05, "loss": 2.487, "step": 1369000 }, { "epoch": 6.78, "learning_rate": 4.660874788821015e-05, "loss": 2.5039, "step": 1369500 }, { "epoch": 6.79, "learning_rate": 4.6607509301784065e-05, "loss": 2.5063, "step": 1370000 }, { "epoch": 6.79, "learning_rate": 4.660627071535798e-05, "loss": 2.5151, "step": 1370500 }, { "epoch": 6.79, "learning_rate": 4.6605039560450454e-05, "loss": 2.4945, "step": 1371000 }, { "epoch": 6.79, "learning_rate": 4.660380097402437e-05, "loss": 2.4963, "step": 1371500 }, { "epoch": 6.8, "learning_rate": 4.660256238759829e-05, "loss": 2.5327, "step": 1372000 }, { "epoch": 6.8, "learning_rate": 4.660132627834505e-05, "loss": 2.4942, "step": 1372500 }, { "epoch": 6.8, "learning_rate": 4.660008769191897e-05, "loss": 2.512, "step": 1373000 }, { "epoch": 6.8, "learning_rate": 4.6598849105492884e-05, "loss": 2.4764, "step": 1373500 }, { "epoch": 6.81, "learning_rate": 4.65976105190668e-05, "loss": 2.4803, "step": 1374000 }, { "epoch": 6.81, "learning_rate": 4.659637193264072e-05, "loss": 2.4855, "step": 1374500 }, { "epoch": 6.81, "learning_rate": 4.6595133346214635e-05, "loss": 2.4853, "step": 1375000 }, { "epoch": 6.81, "learning_rate": 4.6593897236961404e-05, "loss": 2.4791, "step": 1375500 }, { "epoch": 6.82, "learning_rate": 4.659265865053532e-05, "loss": 2.4887, "step": 1376000 }, { "epoch": 6.82, "learning_rate": 4.659142006410924e-05, "loss": 2.4982, "step": 1376500 }, { "epoch": 6.82, "learning_rate": 4.6590181477683154e-05, "loss": 2.5017, "step": 1377000 }, { "epoch": 6.82, "learning_rate": 4.658894536842992e-05, "loss": 2.5206, "step": 1377500 }, { "epoch": 6.83, "learning_rate": 4.658770678200384e-05, "loss": 2.4919, "step": 1378000 }, { "epoch": 6.83, "learning_rate": 4.658646819557776e-05, "loss": 2.5036, "step": 1378500 }, { "epoch": 6.83, "learning_rate": 4.658522960915167e-05, "loss": 2.4691, "step": 1379000 }, { "epoch": 6.83, "learning_rate": 4.6583991022725584e-05, "loss": 2.4853, "step": 1379500 }, { "epoch": 6.84, "learning_rate": 4.65827524362995e-05, "loss": 2.5148, "step": 1380000 }, { "epoch": 6.84, "learning_rate": 4.658151384987342e-05, "loss": 2.5151, "step": 1380500 }, { "epoch": 6.84, "learning_rate": 4.6580275263447335e-05, "loss": 2.4836, "step": 1381000 }, { "epoch": 6.84, "learning_rate": 4.657903667702125e-05, "loss": 2.4898, "step": 1381500 }, { "epoch": 6.85, "learning_rate": 4.657779809059517e-05, "loss": 2.4917, "step": 1382000 }, { "epoch": 6.85, "learning_rate": 4.657655950416908e-05, "loss": 2.5036, "step": 1382500 }, { "epoch": 6.85, "learning_rate": 4.6575320917742996e-05, "loss": 2.4992, "step": 1383000 }, { "epoch": 6.85, "learning_rate": 4.657408233131691e-05, "loss": 2.502, "step": 1383500 }, { "epoch": 6.86, "learning_rate": 4.657284374489083e-05, "loss": 2.4944, "step": 1384000 }, { "epoch": 6.86, "learning_rate": 4.657160515846475e-05, "loss": 2.4917, "step": 1384500 }, { "epoch": 6.86, "learning_rate": 4.6570366572038664e-05, "loss": 2.4888, "step": 1385000 }, { "epoch": 6.86, "learning_rate": 4.656912798561258e-05, "loss": 2.485, "step": 1385500 }, { "epoch": 6.87, "learning_rate": 4.65678893991865e-05, "loss": 2.4949, "step": 1386000 }, { "epoch": 6.87, "learning_rate": 4.6566653289933266e-05, "loss": 2.4753, "step": 1386500 }, { "epoch": 6.87, "learning_rate": 4.656541470350718e-05, "loss": 2.5044, "step": 1387000 }, { "epoch": 6.87, "learning_rate": 4.65641761170811e-05, "loss": 2.5088, "step": 1387500 }, { "epoch": 6.88, "learning_rate": 4.656293753065502e-05, "loss": 2.4922, "step": 1388000 }, { "epoch": 6.88, "learning_rate": 4.6561698944228934e-05, "loss": 2.5054, "step": 1388500 }, { "epoch": 6.88, "learning_rate": 4.656046035780285e-05, "loss": 2.4948, "step": 1389000 }, { "epoch": 6.88, "learning_rate": 4.655922177137677e-05, "loss": 2.5002, "step": 1389500 }, { "epoch": 6.89, "learning_rate": 4.655798566212353e-05, "loss": 2.4978, "step": 1390000 }, { "epoch": 6.89, "learning_rate": 4.655674707569745e-05, "loss": 2.5017, "step": 1390500 }, { "epoch": 6.89, "learning_rate": 4.6555508489271364e-05, "loss": 2.4925, "step": 1391000 }, { "epoch": 6.89, "learning_rate": 4.655426990284528e-05, "loss": 2.4932, "step": 1391500 }, { "epoch": 6.9, "learning_rate": 4.655303379359205e-05, "loss": 2.5058, "step": 1392000 }, { "epoch": 6.9, "learning_rate": 4.6551795207165966e-05, "loss": 2.4799, "step": 1392500 }, { "epoch": 6.9, "learning_rate": 4.655055662073988e-05, "loss": 2.5225, "step": 1393000 }, { "epoch": 6.9, "learning_rate": 4.65493180343138e-05, "loss": 2.4859, "step": 1393500 }, { "epoch": 6.91, "learning_rate": 4.654807944788772e-05, "loss": 2.521, "step": 1394000 }, { "epoch": 6.91, "learning_rate": 4.6546840861461634e-05, "loss": 2.4728, "step": 1394500 }, { "epoch": 6.91, "learning_rate": 4.6545604752208396e-05, "loss": 2.4708, "step": 1395000 }, { "epoch": 6.91, "learning_rate": 4.654436616578231e-05, "loss": 2.4922, "step": 1395500 }, { "epoch": 6.92, "learning_rate": 4.654312757935623e-05, "loss": 2.4731, "step": 1396000 }, { "epoch": 6.92, "learning_rate": 4.654188899293015e-05, "loss": 2.4955, "step": 1396500 }, { "epoch": 6.92, "learning_rate": 4.6540650406504064e-05, "loss": 2.4996, "step": 1397000 }, { "epoch": 6.92, "learning_rate": 4.653941182007798e-05, "loss": 2.4988, "step": 1397500 }, { "epoch": 6.93, "learning_rate": 4.65381732336519e-05, "loss": 2.476, "step": 1398000 }, { "epoch": 6.93, "learning_rate": 4.6536934647225815e-05, "loss": 2.487, "step": 1398500 }, { "epoch": 6.93, "learning_rate": 4.6535698537972583e-05, "loss": 2.4695, "step": 1399000 }, { "epoch": 6.93, "learning_rate": 4.65344599515465e-05, "loss": 2.4825, "step": 1399500 }, { "epoch": 6.94, "learning_rate": 4.653322136512042e-05, "loss": 2.4803, "step": 1400000 }, { "epoch": 6.94, "learning_rate": 4.6531982778694334e-05, "loss": 2.4772, "step": 1400500 }, { "epoch": 6.94, "learning_rate": 4.653074419226825e-05, "loss": 2.4992, "step": 1401000 }, { "epoch": 6.94, "learning_rate": 4.652950560584217e-05, "loss": 2.4755, "step": 1401500 }, { "epoch": 6.95, "learning_rate": 4.6528267019416085e-05, "loss": 2.486, "step": 1402000 }, { "epoch": 6.95, "learning_rate": 4.652703091016285e-05, "loss": 2.5144, "step": 1402500 }, { "epoch": 6.95, "learning_rate": 4.652579480090962e-05, "loss": 2.4947, "step": 1403000 }, { "epoch": 6.95, "learning_rate": 4.652455621448354e-05, "loss": 2.4977, "step": 1403500 }, { "epoch": 6.96, "learning_rate": 4.652331762805745e-05, "loss": 2.508, "step": 1404000 }, { "epoch": 6.96, "learning_rate": 4.652207904163137e-05, "loss": 2.5197, "step": 1404500 }, { "epoch": 6.96, "learning_rate": 4.6520840455205284e-05, "loss": 2.4885, "step": 1405000 }, { "epoch": 6.96, "learning_rate": 4.65196018687792e-05, "loss": 2.4976, "step": 1405500 }, { "epoch": 6.97, "learning_rate": 4.651836328235312e-05, "loss": 2.4476, "step": 1406000 }, { "epoch": 6.97, "learning_rate": 4.6517124695927034e-05, "loss": 2.5003, "step": 1406500 }, { "epoch": 6.97, "learning_rate": 4.651588610950095e-05, "loss": 2.4807, "step": 1407000 }, { "epoch": 6.97, "learning_rate": 4.651464752307487e-05, "loss": 2.5, "step": 1407500 }, { "epoch": 6.98, "learning_rate": 4.6513408936648785e-05, "loss": 2.5051, "step": 1408000 }, { "epoch": 6.98, "learning_rate": 4.65121703502227e-05, "loss": 2.4952, "step": 1408500 }, { "epoch": 6.98, "learning_rate": 4.651093176379662e-05, "loss": 2.4864, "step": 1409000 }, { "epoch": 6.98, "learning_rate": 4.650969565454338e-05, "loss": 2.4935, "step": 1409500 }, { "epoch": 6.99, "learning_rate": 4.65084570681173e-05, "loss": 2.5068, "step": 1410000 }, { "epoch": 6.99, "learning_rate": 4.650722095886407e-05, "loss": 2.5097, "step": 1410500 }, { "epoch": 6.99, "learning_rate": 4.6505982372437984e-05, "loss": 2.4744, "step": 1411000 }, { "epoch": 6.99, "learning_rate": 4.65047437860119e-05, "loss": 2.4966, "step": 1411500 }, { "epoch": 7.0, "learning_rate": 4.650350519958582e-05, "loss": 2.4788, "step": 1412000 }, { "epoch": 7.0, "learning_rate": 4.6502266613159735e-05, "loss": 2.4876, "step": 1412500 }, { "epoch": 7.0, "eval_accuracy": 0.6390809050019882, "eval_accuracy_mlm": 0.5919321078827258, "eval_accuracy_nsp": 0.8611933683454986, "eval_loss": 2.4429469108581543, "eval_runtime": 145.8536, "eval_samples_per_second": 1748.048, "eval_steps_per_second": 72.84, "step": 1412901 }, { "epoch": 7.0, "learning_rate": 4.650102802673365e-05, "loss": 2.4845, "step": 1413000 }, { "epoch": 7.0, "learning_rate": 4.649978944030757e-05, "loss": 2.4572, "step": 1413500 }, { "epoch": 7.01, "learning_rate": 4.6498550853881485e-05, "loss": 2.4539, "step": 1414000 }, { "epoch": 7.01, "learning_rate": 4.6497314744628254e-05, "loss": 2.4568, "step": 1414500 }, { "epoch": 7.01, "learning_rate": 4.6496076158202164e-05, "loss": 2.4616, "step": 1415000 }, { "epoch": 7.01, "learning_rate": 4.649483757177608e-05, "loss": 2.4551, "step": 1415500 }, { "epoch": 7.02, "learning_rate": 4.649360146252286e-05, "loss": 2.4791, "step": 1416000 }, { "epoch": 7.02, "learning_rate": 4.649236287609677e-05, "loss": 2.481, "step": 1416500 }, { "epoch": 7.02, "learning_rate": 4.6491124289670684e-05, "loss": 2.4784, "step": 1417000 }, { "epoch": 7.02, "learning_rate": 4.64898857032446e-05, "loss": 2.467, "step": 1417500 }, { "epoch": 7.03, "learning_rate": 4.648864711681852e-05, "loss": 2.4805, "step": 1418000 }, { "epoch": 7.03, "learning_rate": 4.6487408530392435e-05, "loss": 2.4275, "step": 1418500 }, { "epoch": 7.03, "learning_rate": 4.648616994396635e-05, "loss": 2.4766, "step": 1419000 }, { "epoch": 7.03, "learning_rate": 4.648493135754027e-05, "loss": 2.4559, "step": 1419500 }, { "epoch": 7.04, "learning_rate": 4.6483692771114185e-05, "loss": 2.5089, "step": 1420000 }, { "epoch": 7.04, "learning_rate": 4.64824541846881e-05, "loss": 2.4865, "step": 1420500 }, { "epoch": 7.04, "learning_rate": 4.648121559826202e-05, "loss": 2.4508, "step": 1421000 }, { "epoch": 7.04, "learning_rate": 4.6479977011835936e-05, "loss": 2.4655, "step": 1421500 }, { "epoch": 7.05, "learning_rate": 4.647873842540985e-05, "loss": 2.4671, "step": 1422000 }, { "epoch": 7.05, "learning_rate": 4.647749983898377e-05, "loss": 2.4695, "step": 1422500 }, { "epoch": 7.05, "learning_rate": 4.647626372973053e-05, "loss": 2.4488, "step": 1423000 }, { "epoch": 7.05, "learning_rate": 4.647502514330445e-05, "loss": 2.4521, "step": 1423500 }, { "epoch": 7.05, "learning_rate": 4.647378903405122e-05, "loss": 2.4709, "step": 1424000 }, { "epoch": 7.06, "learning_rate": 4.647255540197084e-05, "loss": 2.4587, "step": 1424500 }, { "epoch": 7.06, "learning_rate": 4.6471316815544755e-05, "loss": 2.4739, "step": 1425000 }, { "epoch": 7.06, "learning_rate": 4.647007822911867e-05, "loss": 2.4977, "step": 1425500 }, { "epoch": 7.06, "learning_rate": 4.646883964269259e-05, "loss": 2.4725, "step": 1426000 }, { "epoch": 7.07, "learning_rate": 4.6467601056266506e-05, "loss": 2.4716, "step": 1426500 }, { "epoch": 7.07, "learning_rate": 4.646636246984042e-05, "loss": 2.4874, "step": 1427000 }, { "epoch": 7.07, "learning_rate": 4.646512388341434e-05, "loss": 2.4674, "step": 1427500 }, { "epoch": 7.07, "learning_rate": 4.646388529698826e-05, "loss": 2.4617, "step": 1428000 }, { "epoch": 7.08, "learning_rate": 4.6462646710562174e-05, "loss": 2.4703, "step": 1428500 }, { "epoch": 7.08, "learning_rate": 4.6461408124136084e-05, "loss": 2.4612, "step": 1429000 }, { "epoch": 7.08, "learning_rate": 4.646016953771e-05, "loss": 2.4505, "step": 1429500 }, { "epoch": 7.08, "learning_rate": 4.645893095128392e-05, "loss": 2.4645, "step": 1430000 }, { "epoch": 7.09, "learning_rate": 4.6457692364857835e-05, "loss": 2.4425, "step": 1430500 }, { "epoch": 7.09, "learning_rate": 4.645645377843175e-05, "loss": 2.4478, "step": 1431000 }, { "epoch": 7.09, "learning_rate": 4.645521519200567e-05, "loss": 2.4645, "step": 1431500 }, { "epoch": 7.09, "learning_rate": 4.6453979082752444e-05, "loss": 2.4471, "step": 1432000 }, { "epoch": 7.1, "learning_rate": 4.6452740496326354e-05, "loss": 2.4378, "step": 1432500 }, { "epoch": 7.1, "learning_rate": 4.645150190990027e-05, "loss": 2.4872, "step": 1433000 }, { "epoch": 7.1, "learning_rate": 4.645026332347419e-05, "loss": 2.4629, "step": 1433500 }, { "epoch": 7.1, "learning_rate": 4.6449024737048105e-05, "loss": 2.4646, "step": 1434000 }, { "epoch": 7.11, "learning_rate": 4.644778615062202e-05, "loss": 2.4917, "step": 1434500 }, { "epoch": 7.11, "learning_rate": 4.644654756419594e-05, "loss": 2.4704, "step": 1435000 }, { "epoch": 7.11, "learning_rate": 4.644530897776985e-05, "loss": 2.4439, "step": 1435500 }, { "epoch": 7.11, "learning_rate": 4.6444070391343766e-05, "loss": 2.458, "step": 1436000 }, { "epoch": 7.12, "learning_rate": 4.6442834282090535e-05, "loss": 2.4867, "step": 1436500 }, { "epoch": 7.12, "learning_rate": 4.644159569566445e-05, "loss": 2.4849, "step": 1437000 }, { "epoch": 7.12, "learning_rate": 4.644035710923837e-05, "loss": 2.4636, "step": 1437500 }, { "epoch": 7.12, "learning_rate": 4.6439118522812286e-05, "loss": 2.4742, "step": 1438000 }, { "epoch": 7.13, "learning_rate": 4.643788736790476e-05, "loss": 2.4822, "step": 1438500 }, { "epoch": 7.13, "learning_rate": 4.6436648781478675e-05, "loss": 2.4591, "step": 1439000 }, { "epoch": 7.13, "learning_rate": 4.643541019505259e-05, "loss": 2.4727, "step": 1439500 }, { "epoch": 7.13, "learning_rate": 4.643417408579936e-05, "loss": 2.4814, "step": 1440000 }, { "epoch": 7.14, "learning_rate": 4.643293549937328e-05, "loss": 2.4816, "step": 1440500 }, { "epoch": 7.14, "learning_rate": 4.6431696912947195e-05, "loss": 2.4745, "step": 1441000 }, { "epoch": 7.14, "learning_rate": 4.643045832652111e-05, "loss": 2.467, "step": 1441500 }, { "epoch": 7.14, "learning_rate": 4.642921974009503e-05, "loss": 2.4839, "step": 1442000 }, { "epoch": 7.15, "learning_rate": 4.6427981153668946e-05, "loss": 2.4538, "step": 1442500 }, { "epoch": 7.15, "learning_rate": 4.6426742567242856e-05, "loss": 2.4355, "step": 1443000 }, { "epoch": 7.15, "learning_rate": 4.642550398081677e-05, "loss": 2.479, "step": 1443500 }, { "epoch": 7.15, "learning_rate": 4.642426539439069e-05, "loss": 2.4619, "step": 1444000 }, { "epoch": 7.16, "learning_rate": 4.642302680796461e-05, "loss": 2.4853, "step": 1444500 }, { "epoch": 7.16, "learning_rate": 4.6421788221538524e-05, "loss": 2.4502, "step": 1445000 }, { "epoch": 7.16, "learning_rate": 4.642054963511244e-05, "loss": 2.4703, "step": 1445500 }, { "epoch": 7.16, "learning_rate": 4.641931104868636e-05, "loss": 2.4679, "step": 1446000 }, { "epoch": 7.17, "learning_rate": 4.6418072462260274e-05, "loss": 2.4749, "step": 1446500 }, { "epoch": 7.17, "learning_rate": 4.641683387583419e-05, "loss": 2.4873, "step": 1447000 }, { "epoch": 7.17, "learning_rate": 4.641559528940811e-05, "loss": 2.4585, "step": 1447500 }, { "epoch": 7.17, "learning_rate": 4.641435670298202e-05, "loss": 2.4642, "step": 1448000 }, { "epoch": 7.18, "learning_rate": 4.6413120593728794e-05, "loss": 2.4696, "step": 1448500 }, { "epoch": 7.18, "learning_rate": 4.641188200730271e-05, "loss": 2.4597, "step": 1449000 }, { "epoch": 7.18, "learning_rate": 4.641064342087663e-05, "loss": 2.4897, "step": 1449500 }, { "epoch": 7.18, "learning_rate": 4.640940731162339e-05, "loss": 2.4707, "step": 1450000 }, { "epoch": 7.19, "learning_rate": 4.640816872519731e-05, "loss": 2.4917, "step": 1450500 }, { "epoch": 7.19, "learning_rate": 4.6406930138771224e-05, "loss": 2.4877, "step": 1451000 }, { "epoch": 7.19, "learning_rate": 4.640569155234514e-05, "loss": 2.4417, "step": 1451500 }, { "epoch": 7.19, "learning_rate": 4.640445544309191e-05, "loss": 2.4632, "step": 1452000 }, { "epoch": 7.2, "learning_rate": 4.6403216856665826e-05, "loss": 2.5164, "step": 1452500 }, { "epoch": 7.2, "learning_rate": 4.640197827023974e-05, "loss": 2.476, "step": 1453000 }, { "epoch": 7.2, "learning_rate": 4.640073968381366e-05, "loss": 2.4726, "step": 1453500 }, { "epoch": 7.2, "learning_rate": 4.639950357456043e-05, "loss": 2.4642, "step": 1454000 }, { "epoch": 7.21, "learning_rate": 4.6398264988134346e-05, "loss": 2.4863, "step": 1454500 }, { "epoch": 7.21, "learning_rate": 4.639702640170826e-05, "loss": 2.4524, "step": 1455000 }, { "epoch": 7.21, "learning_rate": 4.639578781528218e-05, "loss": 2.4725, "step": 1455500 }, { "epoch": 7.21, "learning_rate": 4.639455170602894e-05, "loss": 2.4505, "step": 1456000 }, { "epoch": 7.22, "learning_rate": 4.639331311960286e-05, "loss": 2.4743, "step": 1456500 }, { "epoch": 7.22, "learning_rate": 4.6392074533176776e-05, "loss": 2.4581, "step": 1457000 }, { "epoch": 7.22, "learning_rate": 4.639083594675069e-05, "loss": 2.4725, "step": 1457500 }, { "epoch": 7.22, "learning_rate": 4.638959736032461e-05, "loss": 2.4704, "step": 1458000 }, { "epoch": 7.23, "learning_rate": 4.6388358773898526e-05, "loss": 2.4718, "step": 1458500 }, { "epoch": 7.23, "learning_rate": 4.638712018747244e-05, "loss": 2.4867, "step": 1459000 }, { "epoch": 7.23, "learning_rate": 4.638588407821921e-05, "loss": 2.4673, "step": 1459500 }, { "epoch": 7.23, "learning_rate": 4.638464549179313e-05, "loss": 2.4954, "step": 1460000 }, { "epoch": 7.24, "learning_rate": 4.6383406905367046e-05, "loss": 2.4766, "step": 1460500 }, { "epoch": 7.24, "learning_rate": 4.638216831894096e-05, "loss": 2.4681, "step": 1461000 }, { "epoch": 7.24, "learning_rate": 4.638092973251488e-05, "loss": 2.4931, "step": 1461500 }, { "epoch": 7.24, "learning_rate": 4.63796911460888e-05, "loss": 2.4671, "step": 1462000 }, { "epoch": 7.25, "learning_rate": 4.6378452559662714e-05, "loss": 2.4901, "step": 1462500 }, { "epoch": 7.25, "learning_rate": 4.637721397323663e-05, "loss": 2.4586, "step": 1463000 }, { "epoch": 7.25, "learning_rate": 4.637597538681054e-05, "loss": 2.4707, "step": 1463500 }, { "epoch": 7.25, "learning_rate": 4.637474175473016e-05, "loss": 2.4735, "step": 1464000 }, { "epoch": 7.26, "learning_rate": 4.637350316830408e-05, "loss": 2.4678, "step": 1464500 }, { "epoch": 7.26, "learning_rate": 4.6372264581877995e-05, "loss": 2.485, "step": 1465000 }, { "epoch": 7.26, "learning_rate": 4.637102599545191e-05, "loss": 2.4831, "step": 1465500 }, { "epoch": 7.26, "learning_rate": 4.636978740902583e-05, "loss": 2.4813, "step": 1466000 }, { "epoch": 7.27, "learning_rate": 4.6368548822599746e-05, "loss": 2.4825, "step": 1466500 }, { "epoch": 7.27, "learning_rate": 4.6367312713346515e-05, "loss": 2.4867, "step": 1467000 }, { "epoch": 7.27, "learning_rate": 4.6366074126920425e-05, "loss": 2.4784, "step": 1467500 }, { "epoch": 7.27, "learning_rate": 4.636483554049434e-05, "loss": 2.4478, "step": 1468000 }, { "epoch": 7.28, "learning_rate": 4.636359695406826e-05, "loss": 2.4608, "step": 1468500 }, { "epoch": 7.28, "learning_rate": 4.6362358367642176e-05, "loss": 2.4759, "step": 1469000 }, { "epoch": 7.28, "learning_rate": 4.636111978121609e-05, "loss": 2.4748, "step": 1469500 }, { "epoch": 7.28, "learning_rate": 4.635988119479001e-05, "loss": 2.4616, "step": 1470000 }, { "epoch": 7.29, "learning_rate": 4.635864260836393e-05, "loss": 2.4663, "step": 1470500 }, { "epoch": 7.29, "learning_rate": 4.6357404021937844e-05, "loss": 2.4643, "step": 1471000 }, { "epoch": 7.29, "learning_rate": 4.635616543551176e-05, "loss": 2.482, "step": 1471500 }, { "epoch": 7.29, "learning_rate": 4.635492932625853e-05, "loss": 2.4848, "step": 1472000 }, { "epoch": 7.3, "learning_rate": 4.6353690739832446e-05, "loss": 2.4771, "step": 1472500 }, { "epoch": 7.3, "learning_rate": 4.635245215340636e-05, "loss": 2.4642, "step": 1473000 }, { "epoch": 7.3, "learning_rate": 4.635121356698028e-05, "loss": 2.4742, "step": 1473500 }, { "epoch": 7.3, "learning_rate": 4.63499749805542e-05, "loss": 2.4884, "step": 1474000 }, { "epoch": 7.31, "learning_rate": 4.6348736394128114e-05, "loss": 2.4658, "step": 1474500 }, { "epoch": 7.31, "learning_rate": 4.634749780770203e-05, "loss": 2.49, "step": 1475000 }, { "epoch": 7.31, "learning_rate": 4.634625922127595e-05, "loss": 2.4885, "step": 1475500 }, { "epoch": 7.31, "learning_rate": 4.634502558919557e-05, "loss": 2.4919, "step": 1476000 }, { "epoch": 7.32, "learning_rate": 4.634378700276948e-05, "loss": 2.4588, "step": 1476500 }, { "epoch": 7.32, "learning_rate": 4.6342548416343396e-05, "loss": 2.4474, "step": 1477000 }, { "epoch": 7.32, "learning_rate": 4.634130982991731e-05, "loss": 2.4867, "step": 1477500 }, { "epoch": 7.32, "learning_rate": 4.634007124349123e-05, "loss": 2.4749, "step": 1478000 }, { "epoch": 7.33, "learning_rate": 4.6338832657065146e-05, "loss": 2.4603, "step": 1478500 }, { "epoch": 7.33, "learning_rate": 4.633759407063906e-05, "loss": 2.4551, "step": 1479000 }, { "epoch": 7.33, "learning_rate": 4.633635548421298e-05, "loss": 2.4716, "step": 1479500 }, { "epoch": 7.33, "learning_rate": 4.633511937495974e-05, "loss": 2.4879, "step": 1480000 }, { "epoch": 7.33, "learning_rate": 4.633388078853366e-05, "loss": 2.4664, "step": 1480500 }, { "epoch": 7.34, "learning_rate": 4.6332642202107576e-05, "loss": 2.4596, "step": 1481000 }, { "epoch": 7.34, "learning_rate": 4.633140361568149e-05, "loss": 2.5133, "step": 1481500 }, { "epoch": 7.34, "learning_rate": 4.633016502925541e-05, "loss": 2.4767, "step": 1482000 }, { "epoch": 7.34, "learning_rate": 4.632892644282933e-05, "loss": 2.4489, "step": 1482500 }, { "epoch": 7.35, "learning_rate": 4.6327687856403244e-05, "loss": 2.4446, "step": 1483000 }, { "epoch": 7.35, "learning_rate": 4.632644926997716e-05, "loss": 2.4838, "step": 1483500 }, { "epoch": 7.35, "learning_rate": 4.632521563789679e-05, "loss": 2.4423, "step": 1484000 }, { "epoch": 7.35, "learning_rate": 4.63239770514707e-05, "loss": 2.4774, "step": 1484500 }, { "epoch": 7.36, "learning_rate": 4.6322738465044615e-05, "loss": 2.4497, "step": 1485000 }, { "epoch": 7.36, "learning_rate": 4.632149987861853e-05, "loss": 2.4882, "step": 1485500 }, { "epoch": 7.36, "learning_rate": 4.63202637693653e-05, "loss": 2.4747, "step": 1486000 }, { "epoch": 7.36, "learning_rate": 4.631902518293922e-05, "loss": 2.4873, "step": 1486500 }, { "epoch": 7.37, "learning_rate": 4.6317786596513135e-05, "loss": 2.4719, "step": 1487000 }, { "epoch": 7.37, "learning_rate": 4.631654801008705e-05, "loss": 2.466, "step": 1487500 }, { "epoch": 7.37, "learning_rate": 4.631530942366097e-05, "loss": 2.5012, "step": 1488000 }, { "epoch": 7.37, "learning_rate": 4.6314070837234886e-05, "loss": 2.4666, "step": 1488500 }, { "epoch": 7.38, "learning_rate": 4.6312832250808796e-05, "loss": 2.476, "step": 1489000 }, { "epoch": 7.38, "learning_rate": 4.631159366438271e-05, "loss": 2.4908, "step": 1489500 }, { "epoch": 7.38, "learning_rate": 4.631035507795663e-05, "loss": 2.4713, "step": 1490000 }, { "epoch": 7.38, "learning_rate": 4.630911649153055e-05, "loss": 2.4714, "step": 1490500 }, { "epoch": 7.39, "learning_rate": 4.6307877905104464e-05, "loss": 2.4822, "step": 1491000 }, { "epoch": 7.39, "learning_rate": 4.630663931867838e-05, "loss": 2.468, "step": 1491500 }, { "epoch": 7.39, "learning_rate": 4.63054007322523e-05, "loss": 2.4597, "step": 1492000 }, { "epoch": 7.39, "learning_rate": 4.6304162145826214e-05, "loss": 2.4886, "step": 1492500 }, { "epoch": 7.4, "learning_rate": 4.630292355940013e-05, "loss": 2.4621, "step": 1493000 }, { "epoch": 7.4, "learning_rate": 4.630168745014689e-05, "loss": 2.4916, "step": 1493500 }, { "epoch": 7.4, "learning_rate": 4.630044886372081e-05, "loss": 2.5105, "step": 1494000 }, { "epoch": 7.4, "learning_rate": 4.629921027729473e-05, "loss": 2.4965, "step": 1494500 }, { "epoch": 7.41, "learning_rate": 4.62979741680415e-05, "loss": 2.4962, "step": 1495000 }, { "epoch": 7.41, "learning_rate": 4.629673558161541e-05, "loss": 2.4789, "step": 1495500 }, { "epoch": 7.41, "learning_rate": 4.629549947236219e-05, "loss": 2.4785, "step": 1496000 }, { "epoch": 7.41, "learning_rate": 4.6294260885936105e-05, "loss": 2.4689, "step": 1496500 }, { "epoch": 7.42, "learning_rate": 4.629302229951002e-05, "loss": 2.5094, "step": 1497000 }, { "epoch": 7.42, "learning_rate": 4.629178371308394e-05, "loss": 2.4664, "step": 1497500 }, { "epoch": 7.42, "learning_rate": 4.629054512665785e-05, "loss": 2.4711, "step": 1498000 }, { "epoch": 7.42, "learning_rate": 4.6289306540231766e-05, "loss": 2.4982, "step": 1498500 }, { "epoch": 7.43, "learning_rate": 4.628806795380568e-05, "loss": 2.4652, "step": 1499000 }, { "epoch": 7.43, "learning_rate": 4.62868293673796e-05, "loss": 2.464, "step": 1499500 }, { "epoch": 7.43, "learning_rate": 4.628559078095352e-05, "loss": 2.4816, "step": 1500000 }, { "epoch": 7.43, "learning_rate": 4.628435219452743e-05, "loss": 2.461, "step": 1500500 }, { "epoch": 7.44, "learning_rate": 4.6283113608101344e-05, "loss": 2.4604, "step": 1501000 }, { "epoch": 7.44, "learning_rate": 4.628187502167526e-05, "loss": 2.4845, "step": 1501500 }, { "epoch": 7.44, "learning_rate": 4.628063643524918e-05, "loss": 2.4844, "step": 1502000 }, { "epoch": 7.44, "learning_rate": 4.6279397848823095e-05, "loss": 2.4942, "step": 1502500 }, { "epoch": 7.45, "learning_rate": 4.627815926239701e-05, "loss": 2.4517, "step": 1503000 }, { "epoch": 7.45, "learning_rate": 4.627692067597093e-05, "loss": 2.4613, "step": 1503500 }, { "epoch": 7.45, "learning_rate": 4.62756845667177e-05, "loss": 2.4785, "step": 1504000 }, { "epoch": 7.45, "learning_rate": 4.6274445980291615e-05, "loss": 2.4746, "step": 1504500 }, { "epoch": 7.46, "learning_rate": 4.627320739386553e-05, "loss": 2.4834, "step": 1505000 }, { "epoch": 7.46, "learning_rate": 4.627196880743945e-05, "loss": 2.5137, "step": 1505500 }, { "epoch": 7.46, "learning_rate": 4.6270730221013365e-05, "loss": 2.4748, "step": 1506000 }, { "epoch": 7.46, "learning_rate": 4.626949163458728e-05, "loss": 2.459, "step": 1506500 }, { "epoch": 7.47, "learning_rate": 4.62682530481612e-05, "loss": 2.4794, "step": 1507000 }, { "epoch": 7.47, "learning_rate": 4.6267014461735116e-05, "loss": 2.4742, "step": 1507500 }, { "epoch": 7.47, "learning_rate": 4.626577587530903e-05, "loss": 2.4774, "step": 1508000 }, { "epoch": 7.47, "learning_rate": 4.626453728888295e-05, "loss": 2.4662, "step": 1508500 }, { "epoch": 7.48, "learning_rate": 4.626330117962971e-05, "loss": 2.4627, "step": 1509000 }, { "epoch": 7.48, "learning_rate": 4.626206259320363e-05, "loss": 2.4786, "step": 1509500 }, { "epoch": 7.48, "learning_rate": 4.62608264839504e-05, "loss": 2.4726, "step": 1510000 }, { "epoch": 7.48, "learning_rate": 4.6259590374697173e-05, "loss": 2.4954, "step": 1510500 }, { "epoch": 7.49, "learning_rate": 4.6258351788271084e-05, "loss": 2.4493, "step": 1511000 }, { "epoch": 7.49, "learning_rate": 4.6257113201845e-05, "loss": 2.4704, "step": 1511500 }, { "epoch": 7.49, "learning_rate": 4.625587461541892e-05, "loss": 2.4773, "step": 1512000 }, { "epoch": 7.49, "learning_rate": 4.6254636028992834e-05, "loss": 2.4571, "step": 1512500 }, { "epoch": 7.5, "learning_rate": 4.62533999197396e-05, "loss": 2.4826, "step": 1513000 }, { "epoch": 7.5, "learning_rate": 4.625216133331352e-05, "loss": 2.4711, "step": 1513500 }, { "epoch": 7.5, "learning_rate": 4.625092274688743e-05, "loss": 2.5154, "step": 1514000 }, { "epoch": 7.5, "learning_rate": 4.624968416046135e-05, "loss": 2.4689, "step": 1514500 }, { "epoch": 7.51, "learning_rate": 4.624844805120812e-05, "loss": 2.4558, "step": 1515000 }, { "epoch": 7.51, "learning_rate": 4.624720946478204e-05, "loss": 2.489, "step": 1515500 }, { "epoch": 7.51, "learning_rate": 4.624597087835596e-05, "loss": 2.4775, "step": 1516000 }, { "epoch": 7.51, "learning_rate": 4.6244732291929874e-05, "loss": 2.5, "step": 1516500 }, { "epoch": 7.52, "learning_rate": 4.6243496182676636e-05, "loss": 2.5144, "step": 1517000 }, { "epoch": 7.52, "learning_rate": 4.624225759625055e-05, "loss": 2.4776, "step": 1517500 }, { "epoch": 7.52, "learning_rate": 4.624101900982447e-05, "loss": 2.5141, "step": 1518000 }, { "epoch": 7.52, "learning_rate": 4.6239780423398386e-05, "loss": 2.4846, "step": 1518500 }, { "epoch": 7.53, "learning_rate": 4.62385418369723e-05, "loss": 2.4617, "step": 1519000 }, { "epoch": 7.53, "learning_rate": 4.623730325054622e-05, "loss": 2.4419, "step": 1519500 }, { "epoch": 7.53, "learning_rate": 4.623606466412014e-05, "loss": 2.4669, "step": 1520000 }, { "epoch": 7.53, "learning_rate": 4.623482607769405e-05, "loss": 2.4905, "step": 1520500 }, { "epoch": 7.54, "learning_rate": 4.6233587491267964e-05, "loss": 2.4972, "step": 1521000 }, { "epoch": 7.54, "learning_rate": 4.623234890484188e-05, "loss": 2.4975, "step": 1521500 }, { "epoch": 7.54, "learning_rate": 4.62311103184158e-05, "loss": 2.4753, "step": 1522000 }, { "epoch": 7.54, "learning_rate": 4.6229871731989715e-05, "loss": 2.4884, "step": 1522500 }, { "epoch": 7.55, "learning_rate": 4.622863314556363e-05, "loss": 2.4908, "step": 1523000 }, { "epoch": 7.55, "learning_rate": 4.62273970363104e-05, "loss": 2.4883, "step": 1523500 }, { "epoch": 7.55, "learning_rate": 4.622615844988432e-05, "loss": 2.48, "step": 1524000 }, { "epoch": 7.55, "learning_rate": 4.6224919863458235e-05, "loss": 2.4724, "step": 1524500 }, { "epoch": 7.56, "learning_rate": 4.622368127703215e-05, "loss": 2.4895, "step": 1525000 }, { "epoch": 7.56, "learning_rate": 4.622244516777892e-05, "loss": 2.4562, "step": 1525500 }, { "epoch": 7.56, "learning_rate": 4.622120658135284e-05, "loss": 2.4682, "step": 1526000 }, { "epoch": 7.56, "learning_rate": 4.621996799492675e-05, "loss": 2.4951, "step": 1526500 }, { "epoch": 7.57, "learning_rate": 4.6218729408500664e-05, "loss": 2.4807, "step": 1527000 }, { "epoch": 7.57, "learning_rate": 4.621749082207458e-05, "loss": 2.51, "step": 1527500 }, { "epoch": 7.57, "learning_rate": 4.621625471282136e-05, "loss": 2.465, "step": 1528000 }, { "epoch": 7.57, "learning_rate": 4.621501860356812e-05, "loss": 2.4833, "step": 1528500 }, { "epoch": 7.58, "learning_rate": 4.6213780017142036e-05, "loss": 2.4647, "step": 1529000 }, { "epoch": 7.58, "learning_rate": 4.621254143071595e-05, "loss": 2.461, "step": 1529500 }, { "epoch": 7.58, "learning_rate": 4.621130284428987e-05, "loss": 2.4943, "step": 1530000 }, { "epoch": 7.58, "learning_rate": 4.6210064257863787e-05, "loss": 2.4836, "step": 1530500 }, { "epoch": 7.59, "learning_rate": 4.6208825671437704e-05, "loss": 2.4815, "step": 1531000 }, { "epoch": 7.59, "learning_rate": 4.620758956218447e-05, "loss": 2.4911, "step": 1531500 }, { "epoch": 7.59, "learning_rate": 4.620635097575839e-05, "loss": 2.5123, "step": 1532000 }, { "epoch": 7.59, "learning_rate": 4.6205112389332306e-05, "loss": 2.4855, "step": 1532500 }, { "epoch": 7.6, "learning_rate": 4.620387380290622e-05, "loss": 2.4682, "step": 1533000 }, { "epoch": 7.6, "learning_rate": 4.620263521648014e-05, "loss": 2.4504, "step": 1533500 }, { "epoch": 7.6, "learning_rate": 4.620139663005406e-05, "loss": 2.4983, "step": 1534000 }, { "epoch": 7.6, "learning_rate": 4.6200158043627974e-05, "loss": 2.444, "step": 1534500 }, { "epoch": 7.6, "learning_rate": 4.619891945720189e-05, "loss": 2.4729, "step": 1535000 }, { "epoch": 7.61, "learning_rate": 4.619768087077581e-05, "loss": 2.4705, "step": 1535500 }, { "epoch": 7.61, "learning_rate": 4.619644476152257e-05, "loss": 2.4669, "step": 1536000 }, { "epoch": 7.61, "learning_rate": 4.619520617509649e-05, "loss": 2.4638, "step": 1536500 }, { "epoch": 7.61, "learning_rate": 4.6193967588670404e-05, "loss": 2.4943, "step": 1537000 }, { "epoch": 7.62, "learning_rate": 4.6192733956590024e-05, "loss": 2.4793, "step": 1537500 }, { "epoch": 7.62, "learning_rate": 4.619149537016394e-05, "loss": 2.4636, "step": 1538000 }, { "epoch": 7.62, "learning_rate": 4.619025678373786e-05, "loss": 2.4792, "step": 1538500 }, { "epoch": 7.62, "learning_rate": 4.6189018197311775e-05, "loss": 2.4809, "step": 1539000 }, { "epoch": 7.63, "learning_rate": 4.618777961088569e-05, "loss": 2.4845, "step": 1539500 }, { "epoch": 7.63, "learning_rate": 4.618654102445961e-05, "loss": 2.4399, "step": 1540000 }, { "epoch": 7.63, "learning_rate": 4.6185302438033526e-05, "loss": 2.4847, "step": 1540500 }, { "epoch": 7.63, "learning_rate": 4.618406385160744e-05, "loss": 2.4646, "step": 1541000 }, { "epoch": 7.64, "learning_rate": 4.618282526518136e-05, "loss": 2.4743, "step": 1541500 }, { "epoch": 7.64, "learning_rate": 4.618158667875527e-05, "loss": 2.4826, "step": 1542000 }, { "epoch": 7.64, "learning_rate": 4.618034809232919e-05, "loss": 2.5099, "step": 1542500 }, { "epoch": 7.64, "learning_rate": 4.6179109505903104e-05, "loss": 2.478, "step": 1543000 }, { "epoch": 7.65, "learning_rate": 4.617787091947702e-05, "loss": 2.4752, "step": 1543500 }, { "epoch": 7.65, "learning_rate": 4.617663481022379e-05, "loss": 2.4662, "step": 1544000 }, { "epoch": 7.65, "learning_rate": 4.6175396223797706e-05, "loss": 2.48, "step": 1544500 }, { "epoch": 7.65, "learning_rate": 4.617415763737162e-05, "loss": 2.498, "step": 1545000 }, { "epoch": 7.66, "learning_rate": 4.617291905094554e-05, "loss": 2.4554, "step": 1545500 }, { "epoch": 7.66, "learning_rate": 4.617168046451946e-05, "loss": 2.481, "step": 1546000 }, { "epoch": 7.66, "learning_rate": 4.6170444355266226e-05, "loss": 2.4825, "step": 1546500 }, { "epoch": 7.66, "learning_rate": 4.616920576884014e-05, "loss": 2.4594, "step": 1547000 }, { "epoch": 7.67, "learning_rate": 4.616796718241406e-05, "loss": 2.4847, "step": 1547500 }, { "epoch": 7.67, "learning_rate": 4.616672859598798e-05, "loss": 2.4623, "step": 1548000 }, { "epoch": 7.67, "learning_rate": 4.616549000956189e-05, "loss": 2.4656, "step": 1548500 }, { "epoch": 7.67, "learning_rate": 4.6164251423135804e-05, "loss": 2.4649, "step": 1549000 }, { "epoch": 7.68, "learning_rate": 4.616301531388257e-05, "loss": 2.474, "step": 1549500 }, { "epoch": 7.68, "learning_rate": 4.616177672745649e-05, "loss": 2.4778, "step": 1550000 }, { "epoch": 7.68, "learning_rate": 4.616054061820326e-05, "loss": 2.4788, "step": 1550500 }, { "epoch": 7.68, "learning_rate": 4.6159302031777175e-05, "loss": 2.4836, "step": 1551000 }, { "epoch": 7.69, "learning_rate": 4.615806344535109e-05, "loss": 2.4977, "step": 1551500 }, { "epoch": 7.69, "learning_rate": 4.615682733609786e-05, "loss": 2.4888, "step": 1552000 }, { "epoch": 7.69, "learning_rate": 4.615558874967177e-05, "loss": 2.4875, "step": 1552500 }, { "epoch": 7.69, "learning_rate": 4.61543551175914e-05, "loss": 2.503, "step": 1553000 }, { "epoch": 7.7, "learning_rate": 4.6153116531165316e-05, "loss": 2.5001, "step": 1553500 }, { "epoch": 7.7, "learning_rate": 4.615187794473923e-05, "loss": 2.4949, "step": 1554000 }, { "epoch": 7.7, "learning_rate": 4.615063935831315e-05, "loss": 2.4934, "step": 1554500 }, { "epoch": 7.7, "learning_rate": 4.6149400771887066e-05, "loss": 2.4785, "step": 1555000 }, { "epoch": 7.71, "learning_rate": 4.614816218546098e-05, "loss": 2.4767, "step": 1555500 }, { "epoch": 7.71, "learning_rate": 4.61469235990349e-05, "loss": 2.4847, "step": 1556000 }, { "epoch": 7.71, "learning_rate": 4.614568501260881e-05, "loss": 2.5089, "step": 1556500 }, { "epoch": 7.71, "learning_rate": 4.614444642618273e-05, "loss": 2.4748, "step": 1557000 }, { "epoch": 7.72, "learning_rate": 4.6143207839756644e-05, "loss": 2.4601, "step": 1557500 }, { "epoch": 7.72, "learning_rate": 4.614196925333056e-05, "loss": 2.5098, "step": 1558000 }, { "epoch": 7.72, "learning_rate": 4.614073066690447e-05, "loss": 2.4963, "step": 1558500 }, { "epoch": 7.72, "learning_rate": 4.613949208047839e-05, "loss": 2.4725, "step": 1559000 }, { "epoch": 7.73, "learning_rate": 4.6138253494052305e-05, "loss": 2.4686, "step": 1559500 }, { "epoch": 7.73, "learning_rate": 4.613701490762622e-05, "loss": 2.4655, "step": 1560000 }, { "epoch": 7.73, "learning_rate": 4.613577632120014e-05, "loss": 2.4731, "step": 1560500 }, { "epoch": 7.73, "learning_rate": 4.6134537734774056e-05, "loss": 2.4864, "step": 1561000 }, { "epoch": 7.74, "learning_rate": 4.613329914834797e-05, "loss": 2.5237, "step": 1561500 }, { "epoch": 7.74, "learning_rate": 4.613206056192189e-05, "loss": 2.4653, "step": 1562000 }, { "epoch": 7.74, "learning_rate": 4.613082197549581e-05, "loss": 2.4584, "step": 1562500 }, { "epoch": 7.74, "learning_rate": 4.6129583389069724e-05, "loss": 2.4773, "step": 1563000 }, { "epoch": 7.75, "learning_rate": 4.612834727981649e-05, "loss": 2.4909, "step": 1563500 }, { "epoch": 7.75, "learning_rate": 4.612711117056326e-05, "loss": 2.4713, "step": 1564000 }, { "epoch": 7.75, "learning_rate": 4.612587258413718e-05, "loss": 2.4771, "step": 1564500 }, { "epoch": 7.75, "learning_rate": 4.6124633997711095e-05, "loss": 2.4854, "step": 1565000 }, { "epoch": 7.76, "learning_rate": 4.6123397888457864e-05, "loss": 2.4965, "step": 1565500 }, { "epoch": 7.76, "learning_rate": 4.612215930203178e-05, "loss": 2.4689, "step": 1566000 }, { "epoch": 7.76, "learning_rate": 4.61209207156057e-05, "loss": 2.4708, "step": 1566500 }, { "epoch": 7.76, "learning_rate": 4.6119682129179615e-05, "loss": 2.4845, "step": 1567000 }, { "epoch": 7.77, "learning_rate": 4.611844354275353e-05, "loss": 2.473, "step": 1567500 }, { "epoch": 7.77, "learning_rate": 4.611720495632744e-05, "loss": 2.4954, "step": 1568000 }, { "epoch": 7.77, "learning_rate": 4.611596636990136e-05, "loss": 2.4764, "step": 1568500 }, { "epoch": 7.77, "learning_rate": 4.6114727783475276e-05, "loss": 2.4517, "step": 1569000 }, { "epoch": 7.78, "learning_rate": 4.611348919704919e-05, "loss": 2.4814, "step": 1569500 }, { "epoch": 7.78, "learning_rate": 4.611225061062311e-05, "loss": 2.4804, "step": 1570000 }, { "epoch": 7.78, "learning_rate": 4.6111012024197027e-05, "loss": 2.4678, "step": 1570500 }, { "epoch": 7.78, "learning_rate": 4.6109773437770943e-05, "loss": 2.4607, "step": 1571000 }, { "epoch": 7.79, "learning_rate": 4.610853485134486e-05, "loss": 2.4835, "step": 1571500 }, { "epoch": 7.79, "learning_rate": 4.610729874209162e-05, "loss": 2.4995, "step": 1572000 }, { "epoch": 7.79, "learning_rate": 4.610606015566554e-05, "loss": 2.4387, "step": 1572500 }, { "epoch": 7.79, "learning_rate": 4.6104824046412315e-05, "loss": 2.451, "step": 1573000 }, { "epoch": 7.8, "learning_rate": 4.610358545998623e-05, "loss": 2.4665, "step": 1573500 }, { "epoch": 7.8, "learning_rate": 4.610234687356015e-05, "loss": 2.4839, "step": 1574000 }, { "epoch": 7.8, "learning_rate": 4.610110828713406e-05, "loss": 2.4874, "step": 1574500 }, { "epoch": 7.8, "learning_rate": 4.6099869700707976e-05, "loss": 2.4841, "step": 1575000 }, { "epoch": 7.81, "learning_rate": 4.609863111428189e-05, "loss": 2.4755, "step": 1575500 }, { "epoch": 7.81, "learning_rate": 4.609739252785581e-05, "loss": 2.4554, "step": 1576000 }, { "epoch": 7.81, "learning_rate": 4.609615394142973e-05, "loss": 2.4956, "step": 1576500 }, { "epoch": 7.81, "learning_rate": 4.6094915355003644e-05, "loss": 2.4772, "step": 1577000 }, { "epoch": 7.82, "learning_rate": 4.609367676857756e-05, "loss": 2.4684, "step": 1577500 }, { "epoch": 7.82, "learning_rate": 4.609244065932433e-05, "loss": 2.4815, "step": 1578000 }, { "epoch": 7.82, "learning_rate": 4.6091202072898246e-05, "loss": 2.4774, "step": 1578500 }, { "epoch": 7.82, "learning_rate": 4.6089963486472156e-05, "loss": 2.4714, "step": 1579000 }, { "epoch": 7.83, "learning_rate": 4.608872490004607e-05, "loss": 2.4645, "step": 1579500 }, { "epoch": 7.83, "learning_rate": 4.608748631361999e-05, "loss": 2.485, "step": 1580000 }, { "epoch": 7.83, "learning_rate": 4.608624772719391e-05, "loss": 2.4805, "step": 1580500 }, { "epoch": 7.83, "learning_rate": 4.6085009140767824e-05, "loss": 2.4852, "step": 1581000 }, { "epoch": 7.84, "learning_rate": 4.608377055434174e-05, "loss": 2.4744, "step": 1581500 }, { "epoch": 7.84, "learning_rate": 4.608253196791566e-05, "loss": 2.4791, "step": 1582000 }, { "epoch": 7.84, "learning_rate": 4.608129585866243e-05, "loss": 2.4555, "step": 1582500 }, { "epoch": 7.84, "learning_rate": 4.60800597494092e-05, "loss": 2.4637, "step": 1583000 }, { "epoch": 7.85, "learning_rate": 4.607882116298311e-05, "loss": 2.4863, "step": 1583500 }, { "epoch": 7.85, "learning_rate": 4.607758257655703e-05, "loss": 2.4752, "step": 1584000 }, { "epoch": 7.85, "learning_rate": 4.6076343990130946e-05, "loss": 2.4594, "step": 1584500 }, { "epoch": 7.85, "learning_rate": 4.6075107880877715e-05, "loss": 2.4782, "step": 1585000 }, { "epoch": 7.86, "learning_rate": 4.607386929445163e-05, "loss": 2.4713, "step": 1585500 }, { "epoch": 7.86, "learning_rate": 4.607263070802555e-05, "loss": 2.4576, "step": 1586000 }, { "epoch": 7.86, "learning_rate": 4.607139459877232e-05, "loss": 2.4664, "step": 1586500 }, { "epoch": 7.86, "learning_rate": 4.6070156012346235e-05, "loss": 2.4834, "step": 1587000 }, { "epoch": 7.87, "learning_rate": 4.606891742592015e-05, "loss": 2.5103, "step": 1587500 }, { "epoch": 7.87, "learning_rate": 4.6067681316666914e-05, "loss": 2.4813, "step": 1588000 }, { "epoch": 7.87, "learning_rate": 4.606644273024083e-05, "loss": 2.4637, "step": 1588500 }, { "epoch": 7.87, "learning_rate": 4.606520414381475e-05, "loss": 2.4753, "step": 1589000 }, { "epoch": 7.87, "learning_rate": 4.6063965557388665e-05, "loss": 2.4819, "step": 1589500 }, { "epoch": 7.88, "learning_rate": 4.606272697096258e-05, "loss": 2.4511, "step": 1590000 }, { "epoch": 7.88, "learning_rate": 4.60614883845365e-05, "loss": 2.4717, "step": 1590500 }, { "epoch": 7.88, "learning_rate": 4.6060249798110415e-05, "loss": 2.489, "step": 1591000 }, { "epoch": 7.88, "learning_rate": 4.605901121168433e-05, "loss": 2.4643, "step": 1591500 }, { "epoch": 7.89, "learning_rate": 4.605777262525825e-05, "loss": 2.4807, "step": 1592000 }, { "epoch": 7.89, "learning_rate": 4.605653651600502e-05, "loss": 2.474, "step": 1592500 }, { "epoch": 7.89, "learning_rate": 4.6055297929578935e-05, "loss": 2.4841, "step": 1593000 }, { "epoch": 7.89, "learning_rate": 4.605405934315285e-05, "loss": 2.4805, "step": 1593500 }, { "epoch": 7.9, "learning_rate": 4.605282075672677e-05, "loss": 2.4747, "step": 1594000 }, { "epoch": 7.9, "learning_rate": 4.6051582170300686e-05, "loss": 2.4757, "step": 1594500 }, { "epoch": 7.9, "learning_rate": 4.60503435838746e-05, "loss": 2.4795, "step": 1595000 }, { "epoch": 7.9, "learning_rate": 4.604910499744852e-05, "loss": 2.4725, "step": 1595500 }, { "epoch": 7.91, "learning_rate": 4.604786641102243e-05, "loss": 2.4809, "step": 1596000 }, { "epoch": 7.91, "learning_rate": 4.604662782459635e-05, "loss": 2.4966, "step": 1596500 }, { "epoch": 7.91, "learning_rate": 4.6045389238170264e-05, "loss": 2.4485, "step": 1597000 }, { "epoch": 7.91, "learning_rate": 4.604415065174418e-05, "loss": 2.4603, "step": 1597500 }, { "epoch": 7.92, "learning_rate": 4.60429120653181e-05, "loss": 2.4999, "step": 1598000 }, { "epoch": 7.92, "learning_rate": 4.6041673478892014e-05, "loss": 2.4641, "step": 1598500 }, { "epoch": 7.92, "learning_rate": 4.604043736963878e-05, "loss": 2.473, "step": 1599000 }, { "epoch": 7.92, "learning_rate": 4.603919878321269e-05, "loss": 2.4771, "step": 1599500 }, { "epoch": 7.93, "learning_rate": 4.603796267395947e-05, "loss": 2.4965, "step": 1600000 }, { "epoch": 7.93, "learning_rate": 4.6036724087533386e-05, "loss": 2.4797, "step": 1600500 }, { "epoch": 7.93, "learning_rate": 4.60354855011073e-05, "loss": 2.4846, "step": 1601000 }, { "epoch": 7.93, "learning_rate": 4.603424691468122e-05, "loss": 2.4528, "step": 1601500 }, { "epoch": 7.94, "learning_rate": 4.6033008328255137e-05, "loss": 2.4811, "step": 1602000 }, { "epoch": 7.94, "learning_rate": 4.603176974182905e-05, "loss": 2.4997, "step": 1602500 }, { "epoch": 7.94, "learning_rate": 4.6030533632575816e-05, "loss": 2.4839, "step": 1603000 }, { "epoch": 7.94, "learning_rate": 4.602929504614973e-05, "loss": 2.4663, "step": 1603500 }, { "epoch": 7.95, "learning_rate": 4.60280589368965e-05, "loss": 2.4979, "step": 1604000 }, { "epoch": 7.95, "learning_rate": 4.602682035047042e-05, "loss": 2.4584, "step": 1604500 }, { "epoch": 7.95, "learning_rate": 4.6025581764044335e-05, "loss": 2.4736, "step": 1605000 }, { "epoch": 7.95, "learning_rate": 4.602434317761825e-05, "loss": 2.4942, "step": 1605500 }, { "epoch": 7.96, "learning_rate": 4.602310459119217e-05, "loss": 2.4717, "step": 1606000 }, { "epoch": 7.96, "learning_rate": 4.6021866004766086e-05, "loss": 2.4614, "step": 1606500 }, { "epoch": 7.96, "learning_rate": 4.602062741834e-05, "loss": 2.4844, "step": 1607000 }, { "epoch": 7.96, "learning_rate": 4.601938883191392e-05, "loss": 2.469, "step": 1607500 }, { "epoch": 7.97, "learning_rate": 4.601815272266068e-05, "loss": 2.48, "step": 1608000 }, { "epoch": 7.97, "learning_rate": 4.60169141362346e-05, "loss": 2.4687, "step": 1608500 }, { "epoch": 7.97, "learning_rate": 4.6015675549808516e-05, "loss": 2.4747, "step": 1609000 }, { "epoch": 7.97, "learning_rate": 4.601443696338243e-05, "loss": 2.4615, "step": 1609500 }, { "epoch": 7.98, "learning_rate": 4.601319837695635e-05, "loss": 2.5101, "step": 1610000 }, { "epoch": 7.98, "learning_rate": 4.6011959790530266e-05, "loss": 2.4775, "step": 1610500 }, { "epoch": 7.98, "learning_rate": 4.6010723681277035e-05, "loss": 2.4492, "step": 1611000 }, { "epoch": 7.98, "learning_rate": 4.600948509485095e-05, "loss": 2.49, "step": 1611500 }, { "epoch": 7.99, "learning_rate": 4.600824650842487e-05, "loss": 2.4681, "step": 1612000 }, { "epoch": 7.99, "learning_rate": 4.6007007921998786e-05, "loss": 2.4747, "step": 1612500 }, { "epoch": 7.99, "learning_rate": 4.60057693355727e-05, "loss": 2.4882, "step": 1613000 }, { "epoch": 7.99, "learning_rate": 4.600453074914662e-05, "loss": 2.4759, "step": 1613500 }, { "epoch": 8.0, "learning_rate": 4.600329216272054e-05, "loss": 2.4905, "step": 1614000 }, { "epoch": 8.0, "learning_rate": 4.6002053576294454e-05, "loss": 2.4694, "step": 1614500 }, { "epoch": 8.0, "eval_accuracy": 0.6397272831364085, "eval_accuracy_mlm": 0.59286278093972, "eval_accuracy_nsp": 0.8609541141909092, "eval_loss": 2.4369935989379883, "eval_runtime": 147.379, "eval_samples_per_second": 1729.955, "eval_steps_per_second": 72.086, "step": 1614744 }, { "epoch": 8.0, "learning_rate": 4.6000814989868364e-05, "loss": 2.4571, "step": 1615000 }, { "epoch": 8.0, "learning_rate": 4.599957640344228e-05, "loss": 2.4342, "step": 1615500 }, { "epoch": 8.01, "learning_rate": 4.599834029418905e-05, "loss": 2.4282, "step": 1616000 }, { "epoch": 8.01, "learning_rate": 4.599710418493582e-05, "loss": 2.4413, "step": 1616500 }, { "epoch": 8.01, "learning_rate": 4.5995865598509735e-05, "loss": 2.4362, "step": 1617000 }, { "epoch": 8.01, "learning_rate": 4.599462701208365e-05, "loss": 2.4393, "step": 1617500 }, { "epoch": 8.02, "learning_rate": 4.599338842565757e-05, "loss": 2.4598, "step": 1618000 }, { "epoch": 8.02, "learning_rate": 4.599215231640434e-05, "loss": 2.4666, "step": 1618500 }, { "epoch": 8.02, "learning_rate": 4.5990913729978255e-05, "loss": 2.4484, "step": 1619000 }, { "epoch": 8.02, "learning_rate": 4.598967514355217e-05, "loss": 2.4482, "step": 1619500 }, { "epoch": 8.03, "learning_rate": 4.598843655712609e-05, "loss": 2.4395, "step": 1620000 }, { "epoch": 8.03, "learning_rate": 4.598720044787285e-05, "loss": 2.4311, "step": 1620500 }, { "epoch": 8.03, "learning_rate": 4.598596186144677e-05, "loss": 2.438, "step": 1621000 }, { "epoch": 8.03, "learning_rate": 4.5984723275020685e-05, "loss": 2.4487, "step": 1621500 }, { "epoch": 8.04, "learning_rate": 4.59834846885946e-05, "loss": 2.4532, "step": 1622000 }, { "epoch": 8.04, "learning_rate": 4.598224610216852e-05, "loss": 2.4735, "step": 1622500 }, { "epoch": 8.04, "learning_rate": 4.5981007515742436e-05, "loss": 2.4447, "step": 1623000 }, { "epoch": 8.04, "learning_rate": 4.597976892931635e-05, "loss": 2.4488, "step": 1623500 }, { "epoch": 8.05, "learning_rate": 4.597853034289027e-05, "loss": 2.437, "step": 1624000 }, { "epoch": 8.05, "learning_rate": 4.597729423363704e-05, "loss": 2.4128, "step": 1624500 }, { "epoch": 8.05, "learning_rate": 4.5976055647210955e-05, "loss": 2.4574, "step": 1625000 }, { "epoch": 8.05, "learning_rate": 4.5974822015130576e-05, "loss": 2.4699, "step": 1625500 }, { "epoch": 8.06, "learning_rate": 4.597358342870449e-05, "loss": 2.4535, "step": 1626000 }, { "epoch": 8.06, "learning_rate": 4.597234484227841e-05, "loss": 2.463, "step": 1626500 }, { "epoch": 8.06, "learning_rate": 4.5971106255852327e-05, "loss": 2.477, "step": 1627000 }, { "epoch": 8.06, "learning_rate": 4.5969867669426244e-05, "loss": 2.4738, "step": 1627500 }, { "epoch": 8.07, "learning_rate": 4.5968629083000154e-05, "loss": 2.4388, "step": 1628000 }, { "epoch": 8.07, "learning_rate": 4.596739049657407e-05, "loss": 2.4269, "step": 1628500 }, { "epoch": 8.07, "learning_rate": 4.596615191014799e-05, "loss": 2.453, "step": 1629000 }, { "epoch": 8.07, "learning_rate": 4.5964913323721904e-05, "loss": 2.4559, "step": 1629500 }, { "epoch": 8.08, "learning_rate": 4.596367473729582e-05, "loss": 2.4375, "step": 1630000 }, { "epoch": 8.08, "learning_rate": 4.596243615086974e-05, "loss": 2.4509, "step": 1630500 }, { "epoch": 8.08, "learning_rate": 4.5961197564443655e-05, "loss": 2.4442, "step": 1631000 }, { "epoch": 8.08, "learning_rate": 4.595995897801757e-05, "loss": 2.4474, "step": 1631500 }, { "epoch": 8.09, "learning_rate": 4.595872039159149e-05, "loss": 2.4552, "step": 1632000 }, { "epoch": 8.09, "learning_rate": 4.5957481805165406e-05, "loss": 2.4695, "step": 1632500 }, { "epoch": 8.09, "learning_rate": 4.595624321873932e-05, "loss": 2.4425, "step": 1633000 }, { "epoch": 8.09, "learning_rate": 4.595500463231324e-05, "loss": 2.4627, "step": 1633500 }, { "epoch": 8.1, "learning_rate": 4.595376852306e-05, "loss": 2.4539, "step": 1634000 }, { "epoch": 8.1, "learning_rate": 4.595252993663392e-05, "loss": 2.4307, "step": 1634500 }, { "epoch": 8.1, "learning_rate": 4.5951291350207836e-05, "loss": 2.446, "step": 1635000 }, { "epoch": 8.1, "learning_rate": 4.595005276378175e-05, "loss": 2.4706, "step": 1635500 }, { "epoch": 8.11, "learning_rate": 4.594881417735567e-05, "loss": 2.4612, "step": 1636000 }, { "epoch": 8.11, "learning_rate": 4.594757806810244e-05, "loss": 2.4764, "step": 1636500 }, { "epoch": 8.11, "learning_rate": 4.5946339481676355e-05, "loss": 2.4155, "step": 1637000 }, { "epoch": 8.11, "learning_rate": 4.594510337242312e-05, "loss": 2.4596, "step": 1637500 }, { "epoch": 8.12, "learning_rate": 4.5943864785997034e-05, "loss": 2.4651, "step": 1638000 }, { "epoch": 8.12, "learning_rate": 4.594262619957095e-05, "loss": 2.4594, "step": 1638500 }, { "epoch": 8.12, "learning_rate": 4.594138761314487e-05, "loss": 2.4223, "step": 1639000 }, { "epoch": 8.12, "learning_rate": 4.5940149026718785e-05, "loss": 2.4587, "step": 1639500 }, { "epoch": 8.13, "learning_rate": 4.59389104402927e-05, "loss": 2.4586, "step": 1640000 }, { "epoch": 8.13, "learning_rate": 4.593767185386662e-05, "loss": 2.4567, "step": 1640500 }, { "epoch": 8.13, "learning_rate": 4.5936433267440536e-05, "loss": 2.4287, "step": 1641000 }, { "epoch": 8.13, "learning_rate": 4.593519468101445e-05, "loss": 2.4658, "step": 1641500 }, { "epoch": 8.14, "learning_rate": 4.593395609458837e-05, "loss": 2.4436, "step": 1642000 }, { "epoch": 8.14, "learning_rate": 4.593271750816229e-05, "loss": 2.4596, "step": 1642500 }, { "epoch": 8.14, "learning_rate": 4.5931478921736204e-05, "loss": 2.4541, "step": 1643000 }, { "epoch": 8.14, "learning_rate": 4.593024281248297e-05, "loss": 2.4404, "step": 1643500 }, { "epoch": 8.14, "learning_rate": 4.592900422605689e-05, "loss": 2.4754, "step": 1644000 }, { "epoch": 8.15, "learning_rate": 4.592776811680365e-05, "loss": 2.4406, "step": 1644500 }, { "epoch": 8.15, "learning_rate": 4.592652953037757e-05, "loss": 2.4571, "step": 1645000 }, { "epoch": 8.15, "learning_rate": 4.5925290943951485e-05, "loss": 2.4375, "step": 1645500 }, { "epoch": 8.15, "learning_rate": 4.59240523575254e-05, "loss": 2.4535, "step": 1646000 }, { "epoch": 8.16, "learning_rate": 4.592281377109932e-05, "loss": 2.4388, "step": 1646500 }, { "epoch": 8.16, "learning_rate": 4.5921575184673236e-05, "loss": 2.4679, "step": 1647000 }, { "epoch": 8.16, "learning_rate": 4.592033659824715e-05, "loss": 2.443, "step": 1647500 }, { "epoch": 8.16, "learning_rate": 4.591909801182107e-05, "loss": 2.449, "step": 1648000 }, { "epoch": 8.17, "learning_rate": 4.591786190256784e-05, "loss": 2.463, "step": 1648500 }, { "epoch": 8.17, "learning_rate": 4.5916623316141756e-05, "loss": 2.4809, "step": 1649000 }, { "epoch": 8.17, "learning_rate": 4.591538472971567e-05, "loss": 2.4569, "step": 1649500 }, { "epoch": 8.17, "learning_rate": 4.591414614328959e-05, "loss": 2.4376, "step": 1650000 }, { "epoch": 8.18, "learning_rate": 4.5912907556863506e-05, "loss": 2.4565, "step": 1650500 }, { "epoch": 8.18, "learning_rate": 4.591166897043742e-05, "loss": 2.448, "step": 1651000 }, { "epoch": 8.18, "learning_rate": 4.591043038401134e-05, "loss": 2.4753, "step": 1651500 }, { "epoch": 8.18, "learning_rate": 4.590919179758526e-05, "loss": 2.4302, "step": 1652000 }, { "epoch": 8.19, "learning_rate": 4.5907953211159174e-05, "loss": 2.4589, "step": 1652500 }, { "epoch": 8.19, "learning_rate": 4.590671462473309e-05, "loss": 2.454, "step": 1653000 }, { "epoch": 8.19, "learning_rate": 4.590547603830701e-05, "loss": 2.442, "step": 1653500 }, { "epoch": 8.19, "learning_rate": 4.590423992905377e-05, "loss": 2.4509, "step": 1654000 }, { "epoch": 8.2, "learning_rate": 4.590300134262769e-05, "loss": 2.4578, "step": 1654500 }, { "epoch": 8.2, "learning_rate": 4.5901765233374456e-05, "loss": 2.4688, "step": 1655000 }, { "epoch": 8.2, "learning_rate": 4.590052664694837e-05, "loss": 2.4823, "step": 1655500 }, { "epoch": 8.2, "learning_rate": 4.589928806052229e-05, "loss": 2.4619, "step": 1656000 }, { "epoch": 8.21, "learning_rate": 4.5898049474096207e-05, "loss": 2.443, "step": 1656500 }, { "epoch": 8.21, "learning_rate": 4.5896810887670123e-05, "loss": 2.4455, "step": 1657000 }, { "epoch": 8.21, "learning_rate": 4.5895574778416885e-05, "loss": 2.4619, "step": 1657500 }, { "epoch": 8.21, "learning_rate": 4.589433866916366e-05, "loss": 2.4513, "step": 1658000 }, { "epoch": 8.22, "learning_rate": 4.589310008273758e-05, "loss": 2.4317, "step": 1658500 }, { "epoch": 8.22, "learning_rate": 4.5891861496311495e-05, "loss": 2.4583, "step": 1659000 }, { "epoch": 8.22, "learning_rate": 4.5890625387058264e-05, "loss": 2.4789, "step": 1659500 }, { "epoch": 8.22, "learning_rate": 4.588938680063218e-05, "loss": 2.4496, "step": 1660000 }, { "epoch": 8.23, "learning_rate": 4.58881482142061e-05, "loss": 2.4006, "step": 1660500 }, { "epoch": 8.23, "learning_rate": 4.5886909627780015e-05, "loss": 2.4678, "step": 1661000 }, { "epoch": 8.23, "learning_rate": 4.588567104135393e-05, "loss": 2.4779, "step": 1661500 }, { "epoch": 8.23, "learning_rate": 4.588443245492784e-05, "loss": 2.4671, "step": 1662000 }, { "epoch": 8.24, "learning_rate": 4.588319386850176e-05, "loss": 2.457, "step": 1662500 }, { "epoch": 8.24, "learning_rate": 4.5881955282075675e-05, "loss": 2.466, "step": 1663000 }, { "epoch": 8.24, "learning_rate": 4.588071669564959e-05, "loss": 2.4845, "step": 1663500 }, { "epoch": 8.24, "learning_rate": 4.587947810922351e-05, "loss": 2.4621, "step": 1664000 }, { "epoch": 8.25, "learning_rate": 4.587823952279742e-05, "loss": 2.455, "step": 1664500 }, { "epoch": 8.25, "learning_rate": 4.5877000936371336e-05, "loss": 2.4329, "step": 1665000 }, { "epoch": 8.25, "learning_rate": 4.587576234994525e-05, "loss": 2.4424, "step": 1665500 }, { "epoch": 8.25, "learning_rate": 4.587452376351917e-05, "loss": 2.4667, "step": 1666000 }, { "epoch": 8.26, "learning_rate": 4.587328765426594e-05, "loss": 2.4531, "step": 1666500 }, { "epoch": 8.26, "learning_rate": 4.5872049067839856e-05, "loss": 2.4527, "step": 1667000 }, { "epoch": 8.26, "learning_rate": 4.587081048141377e-05, "loss": 2.4457, "step": 1667500 }, { "epoch": 8.26, "learning_rate": 4.586957189498769e-05, "loss": 2.4557, "step": 1668000 }, { "epoch": 8.27, "learning_rate": 4.586833330856161e-05, "loss": 2.4627, "step": 1668500 }, { "epoch": 8.27, "learning_rate": 4.5867094722135524e-05, "loss": 2.4349, "step": 1669000 }, { "epoch": 8.27, "learning_rate": 4.586585613570944e-05, "loss": 2.4301, "step": 1669500 }, { "epoch": 8.27, "learning_rate": 4.586461754928336e-05, "loss": 2.4527, "step": 1670000 }, { "epoch": 8.28, "learning_rate": 4.5863378962857274e-05, "loss": 2.4991, "step": 1670500 }, { "epoch": 8.28, "learning_rate": 4.5862142853604037e-05, "loss": 2.4676, "step": 1671000 }, { "epoch": 8.28, "learning_rate": 4.5860904267177953e-05, "loss": 2.4682, "step": 1671500 }, { "epoch": 8.28, "learning_rate": 4.585966568075187e-05, "loss": 2.4547, "step": 1672000 }, { "epoch": 8.29, "learning_rate": 4.585842709432579e-05, "loss": 2.454, "step": 1672500 }, { "epoch": 8.29, "learning_rate": 4.5857190985072556e-05, "loss": 2.4367, "step": 1673000 }, { "epoch": 8.29, "learning_rate": 4.585595239864647e-05, "loss": 2.4829, "step": 1673500 }, { "epoch": 8.29, "learning_rate": 4.585471381222039e-05, "loss": 2.4399, "step": 1674000 }, { "epoch": 8.3, "learning_rate": 4.585347522579431e-05, "loss": 2.4632, "step": 1674500 }, { "epoch": 8.3, "learning_rate": 4.5852236639368224e-05, "loss": 2.4526, "step": 1675000 }, { "epoch": 8.3, "learning_rate": 4.585099805294214e-05, "loss": 2.4805, "step": 1675500 }, { "epoch": 8.3, "learning_rate": 4.584976194368891e-05, "loss": 2.4633, "step": 1676000 }, { "epoch": 8.31, "learning_rate": 4.5848523357262826e-05, "loss": 2.4558, "step": 1676500 }, { "epoch": 8.31, "learning_rate": 4.5847284770836743e-05, "loss": 2.4494, "step": 1677000 }, { "epoch": 8.31, "learning_rate": 4.584604618441066e-05, "loss": 2.4412, "step": 1677500 }, { "epoch": 8.31, "learning_rate": 4.584481255233028e-05, "loss": 2.4841, "step": 1678000 }, { "epoch": 8.32, "learning_rate": 4.58435739659042e-05, "loss": 2.4545, "step": 1678500 }, { "epoch": 8.32, "learning_rate": 4.5842335379478115e-05, "loss": 2.4488, "step": 1679000 }, { "epoch": 8.32, "learning_rate": 4.584109679305203e-05, "loss": 2.454, "step": 1679500 }, { "epoch": 8.32, "learning_rate": 4.583985820662595e-05, "loss": 2.4556, "step": 1680000 }, { "epoch": 8.33, "learning_rate": 4.5838619620199866e-05, "loss": 2.4793, "step": 1680500 }, { "epoch": 8.33, "learning_rate": 4.5837381033773776e-05, "loss": 2.4807, "step": 1681000 }, { "epoch": 8.33, "learning_rate": 4.583614244734769e-05, "loss": 2.4468, "step": 1681500 }, { "epoch": 8.33, "learning_rate": 4.583490386092161e-05, "loss": 2.4587, "step": 1682000 }, { "epoch": 8.34, "learning_rate": 4.583366775166838e-05, "loss": 2.4789, "step": 1682500 }, { "epoch": 8.34, "learning_rate": 4.583243164241515e-05, "loss": 2.4712, "step": 1683000 }, { "epoch": 8.34, "learning_rate": 4.5831193055989064e-05, "loss": 2.4732, "step": 1683500 }, { "epoch": 8.34, "learning_rate": 4.582995446956298e-05, "loss": 2.4527, "step": 1684000 }, { "epoch": 8.35, "learning_rate": 4.582871836030975e-05, "loss": 2.4415, "step": 1684500 }, { "epoch": 8.35, "learning_rate": 4.582747977388367e-05, "loss": 2.4473, "step": 1685000 }, { "epoch": 8.35, "learning_rate": 4.582624118745758e-05, "loss": 2.4513, "step": 1685500 }, { "epoch": 8.35, "learning_rate": 4.5825002601031494e-05, "loss": 2.4484, "step": 1686000 }, { "epoch": 8.36, "learning_rate": 4.582376649177826e-05, "loss": 2.454, "step": 1686500 }, { "epoch": 8.36, "learning_rate": 4.582252790535218e-05, "loss": 2.4445, "step": 1687000 }, { "epoch": 8.36, "learning_rate": 4.58212893189261e-05, "loss": 2.4324, "step": 1687500 }, { "epoch": 8.36, "learning_rate": 4.5820050732500014e-05, "loss": 2.4387, "step": 1688000 }, { "epoch": 8.37, "learning_rate": 4.581881214607393e-05, "loss": 2.4302, "step": 1688500 }, { "epoch": 8.37, "learning_rate": 4.581757355964785e-05, "loss": 2.4632, "step": 1689000 }, { "epoch": 8.37, "learning_rate": 4.5816334973221764e-05, "loss": 2.4692, "step": 1689500 }, { "epoch": 8.37, "learning_rate": 4.581509638679568e-05, "loss": 2.4576, "step": 1690000 }, { "epoch": 8.38, "learning_rate": 4.58138578003696e-05, "loss": 2.454, "step": 1690500 }, { "epoch": 8.38, "learning_rate": 4.5812619213943515e-05, "loss": 2.4575, "step": 1691000 }, { "epoch": 8.38, "learning_rate": 4.581138062751743e-05, "loss": 2.4608, "step": 1691500 }, { "epoch": 8.38, "learning_rate": 4.581014204109135e-05, "loss": 2.4546, "step": 1692000 }, { "epoch": 8.39, "learning_rate": 4.580890593183811e-05, "loss": 2.4915, "step": 1692500 }, { "epoch": 8.39, "learning_rate": 4.580766734541203e-05, "loss": 2.4625, "step": 1693000 }, { "epoch": 8.39, "learning_rate": 4.5806428758985945e-05, "loss": 2.4287, "step": 1693500 }, { "epoch": 8.39, "learning_rate": 4.580519017255986e-05, "loss": 2.45, "step": 1694000 }, { "epoch": 8.4, "learning_rate": 4.580395406330663e-05, "loss": 2.4605, "step": 1694500 }, { "epoch": 8.4, "learning_rate": 4.580271547688055e-05, "loss": 2.4412, "step": 1695000 }, { "epoch": 8.4, "learning_rate": 4.5801476890454464e-05, "loss": 2.4722, "step": 1695500 }, { "epoch": 8.4, "learning_rate": 4.580023830402838e-05, "loss": 2.4585, "step": 1696000 }, { "epoch": 8.41, "learning_rate": 4.57989997176023e-05, "loss": 2.4503, "step": 1696500 }, { "epoch": 8.41, "learning_rate": 4.579776360834907e-05, "loss": 2.4558, "step": 1697000 }, { "epoch": 8.41, "learning_rate": 4.5796525021922984e-05, "loss": 2.4635, "step": 1697500 }, { "epoch": 8.41, "learning_rate": 4.5795288912669746e-05, "loss": 2.4488, "step": 1698000 }, { "epoch": 8.41, "learning_rate": 4.579405032624366e-05, "loss": 2.4235, "step": 1698500 }, { "epoch": 8.42, "learning_rate": 4.579281173981758e-05, "loss": 2.471, "step": 1699000 }, { "epoch": 8.42, "learning_rate": 4.5791575630564356e-05, "loss": 2.4608, "step": 1699500 }, { "epoch": 8.42, "learning_rate": 4.579033704413827e-05, "loss": 2.4558, "step": 1700000 }, { "epoch": 8.42, "learning_rate": 4.578909845771218e-05, "loss": 2.4506, "step": 1700500 }, { "epoch": 8.43, "learning_rate": 4.57878598712861e-05, "loss": 2.4858, "step": 1701000 }, { "epoch": 8.43, "learning_rate": 4.5786621284860016e-05, "loss": 2.4645, "step": 1701500 }, { "epoch": 8.43, "learning_rate": 4.5785385175606785e-05, "loss": 2.4548, "step": 1702000 }, { "epoch": 8.43, "learning_rate": 4.57841465891807e-05, "loss": 2.4721, "step": 1702500 }, { "epoch": 8.44, "learning_rate": 4.578290800275462e-05, "loss": 2.4628, "step": 1703000 }, { "epoch": 8.44, "learning_rate": 4.578166941632853e-05, "loss": 2.4656, "step": 1703500 }, { "epoch": 8.44, "learning_rate": 4.5780430829902446e-05, "loss": 2.4542, "step": 1704000 }, { "epoch": 8.44, "learning_rate": 4.577919224347636e-05, "loss": 2.4838, "step": 1704500 }, { "epoch": 8.45, "learning_rate": 4.577795365705028e-05, "loss": 2.4511, "step": 1705000 }, { "epoch": 8.45, "learning_rate": 4.57767150706242e-05, "loss": 2.4663, "step": 1705500 }, { "epoch": 8.45, "learning_rate": 4.5775476484198114e-05, "loss": 2.4657, "step": 1706000 }, { "epoch": 8.45, "learning_rate": 4.577423789777203e-05, "loss": 2.4396, "step": 1706500 }, { "epoch": 8.46, "learning_rate": 4.577299931134595e-05, "loss": 2.4544, "step": 1707000 }, { "epoch": 8.46, "learning_rate": 4.5771760724919865e-05, "loss": 2.4735, "step": 1707500 }, { "epoch": 8.46, "learning_rate": 4.577052213849378e-05, "loss": 2.4564, "step": 1708000 }, { "epoch": 8.46, "learning_rate": 4.57692835520677e-05, "loss": 2.4455, "step": 1708500 }, { "epoch": 8.47, "learning_rate": 4.576804744281447e-05, "loss": 2.4593, "step": 1709000 }, { "epoch": 8.47, "learning_rate": 4.5766808856388384e-05, "loss": 2.468, "step": 1709500 }, { "epoch": 8.47, "learning_rate": 4.57655702699623e-05, "loss": 2.4607, "step": 1710000 }, { "epoch": 8.47, "learning_rate": 4.576433168353622e-05, "loss": 2.4671, "step": 1710500 }, { "epoch": 8.48, "learning_rate": 4.5763093097110135e-05, "loss": 2.4854, "step": 1711000 }, { "epoch": 8.48, "learning_rate": 4.576185451068405e-05, "loss": 2.4792, "step": 1711500 }, { "epoch": 8.48, "learning_rate": 4.576061592425797e-05, "loss": 2.4591, "step": 1712000 }, { "epoch": 8.48, "learning_rate": 4.575937733783188e-05, "loss": 2.464, "step": 1712500 }, { "epoch": 8.49, "learning_rate": 4.5758138751405796e-05, "loss": 2.4626, "step": 1713000 }, { "epoch": 8.49, "learning_rate": 4.575690016497971e-05, "loss": 2.4861, "step": 1713500 }, { "epoch": 8.49, "learning_rate": 4.5755666532899334e-05, "loss": 2.4691, "step": 1714000 }, { "epoch": 8.49, "learning_rate": 4.575442794647325e-05, "loss": 2.4656, "step": 1714500 }, { "epoch": 8.5, "learning_rate": 4.575318936004717e-05, "loss": 2.4999, "step": 1715000 }, { "epoch": 8.5, "learning_rate": 4.5751950773621084e-05, "loss": 2.4376, "step": 1715500 }, { "epoch": 8.5, "learning_rate": 4.5750712187195e-05, "loss": 2.4519, "step": 1716000 }, { "epoch": 8.5, "learning_rate": 4.5749476077941763e-05, "loss": 2.4505, "step": 1716500 }, { "epoch": 8.51, "learning_rate": 4.574823749151568e-05, "loss": 2.4783, "step": 1717000 }, { "epoch": 8.51, "learning_rate": 4.57469989050896e-05, "loss": 2.437, "step": 1717500 }, { "epoch": 8.51, "learning_rate": 4.5745760318663514e-05, "loss": 2.4739, "step": 1718000 }, { "epoch": 8.51, "learning_rate": 4.574452420941029e-05, "loss": 2.4648, "step": 1718500 }, { "epoch": 8.52, "learning_rate": 4.574328562298421e-05, "loss": 2.4603, "step": 1719000 }, { "epoch": 8.52, "learning_rate": 4.574204703655812e-05, "loss": 2.4343, "step": 1719500 }, { "epoch": 8.52, "learning_rate": 4.5740808450132034e-05, "loss": 2.4705, "step": 1720000 }, { "epoch": 8.52, "learning_rate": 4.573956986370595e-05, "loss": 2.4513, "step": 1720500 }, { "epoch": 8.53, "learning_rate": 4.573833127727987e-05, "loss": 2.4702, "step": 1721000 }, { "epoch": 8.53, "learning_rate": 4.5737092690853785e-05, "loss": 2.4558, "step": 1721500 }, { "epoch": 8.53, "learning_rate": 4.57358541044277e-05, "loss": 2.4708, "step": 1722000 }, { "epoch": 8.53, "learning_rate": 4.573461551800162e-05, "loss": 2.4531, "step": 1722500 }, { "epoch": 8.54, "learning_rate": 4.5733376931575535e-05, "loss": 2.4685, "step": 1723000 }, { "epoch": 8.54, "learning_rate": 4.573213834514945e-05, "loss": 2.4622, "step": 1723500 }, { "epoch": 8.54, "learning_rate": 4.573089975872337e-05, "loss": 2.4663, "step": 1724000 }, { "epoch": 8.54, "learning_rate": 4.572966364947013e-05, "loss": 2.4865, "step": 1724500 }, { "epoch": 8.55, "learning_rate": 4.572842506304405e-05, "loss": 2.4635, "step": 1725000 }, { "epoch": 8.55, "learning_rate": 4.5727186476617965e-05, "loss": 2.4426, "step": 1725500 }, { "epoch": 8.55, "learning_rate": 4.572594789019188e-05, "loss": 2.4789, "step": 1726000 }, { "epoch": 8.55, "learning_rate": 4.57247093037658e-05, "loss": 2.4653, "step": 1726500 }, { "epoch": 8.56, "learning_rate": 4.5723470717339716e-05, "loss": 2.4574, "step": 1727000 }, { "epoch": 8.56, "learning_rate": 4.572223213091363e-05, "loss": 2.447, "step": 1727500 }, { "epoch": 8.56, "learning_rate": 4.572099354448755e-05, "loss": 2.4368, "step": 1728000 }, { "epoch": 8.56, "learning_rate": 4.571975495806147e-05, "loss": 2.4754, "step": 1728500 }, { "epoch": 8.57, "learning_rate": 4.5718516371635384e-05, "loss": 2.4884, "step": 1729000 }, { "epoch": 8.57, "learning_rate": 4.57172777852093e-05, "loss": 2.4793, "step": 1729500 }, { "epoch": 8.57, "learning_rate": 4.571603919878322e-05, "loss": 2.4638, "step": 1730000 }, { "epoch": 8.57, "learning_rate": 4.5714803089529986e-05, "loss": 2.4545, "step": 1730500 }, { "epoch": 8.58, "learning_rate": 4.57135645031039e-05, "loss": 2.4627, "step": 1731000 }, { "epoch": 8.58, "learning_rate": 4.571232591667782e-05, "loss": 2.46, "step": 1731500 }, { "epoch": 8.58, "learning_rate": 4.571108733025174e-05, "loss": 2.4664, "step": 1732000 }, { "epoch": 8.58, "learning_rate": 4.570985369817135e-05, "loss": 2.468, "step": 1732500 }, { "epoch": 8.59, "learning_rate": 4.570861511174527e-05, "loss": 2.4468, "step": 1733000 }, { "epoch": 8.59, "learning_rate": 4.5707376525319185e-05, "loss": 2.4756, "step": 1733500 }, { "epoch": 8.59, "learning_rate": 4.57061379388931e-05, "loss": 2.4659, "step": 1734000 }, { "epoch": 8.59, "learning_rate": 4.570489935246702e-05, "loss": 2.4663, "step": 1734500 }, { "epoch": 8.6, "learning_rate": 4.570366324321379e-05, "loss": 2.4561, "step": 1735000 }, { "epoch": 8.6, "learning_rate": 4.5702424656787704e-05, "loss": 2.4384, "step": 1735500 }, { "epoch": 8.6, "learning_rate": 4.5701186070361615e-05, "loss": 2.4622, "step": 1736000 }, { "epoch": 8.6, "learning_rate": 4.569994748393553e-05, "loss": 2.4654, "step": 1736500 }, { "epoch": 8.61, "learning_rate": 4.569870889750945e-05, "loss": 2.4673, "step": 1737000 }, { "epoch": 8.61, "learning_rate": 4.5697472788256224e-05, "loss": 2.4546, "step": 1737500 }, { "epoch": 8.61, "learning_rate": 4.569623667900299e-05, "loss": 2.4721, "step": 1738000 }, { "epoch": 8.61, "learning_rate": 4.569499809257691e-05, "loss": 2.4357, "step": 1738500 }, { "epoch": 8.62, "learning_rate": 4.569375950615083e-05, "loss": 2.4688, "step": 1739000 }, { "epoch": 8.62, "learning_rate": 4.5692520919724744e-05, "loss": 2.4617, "step": 1739500 }, { "epoch": 8.62, "learning_rate": 4.569128233329866e-05, "loss": 2.4783, "step": 1740000 }, { "epoch": 8.62, "learning_rate": 4.569004374687257e-05, "loss": 2.4474, "step": 1740500 }, { "epoch": 8.63, "learning_rate": 4.568880516044649e-05, "loss": 2.476, "step": 1741000 }, { "epoch": 8.63, "learning_rate": 4.5687566574020405e-05, "loss": 2.4684, "step": 1741500 }, { "epoch": 8.63, "learning_rate": 4.568632798759432e-05, "loss": 2.4567, "step": 1742000 }, { "epoch": 8.63, "learning_rate": 4.568508940116824e-05, "loss": 2.4522, "step": 1742500 }, { "epoch": 8.64, "learning_rate": 4.568385081474215e-05, "loss": 2.4613, "step": 1743000 }, { "epoch": 8.64, "learning_rate": 4.5682612228316065e-05, "loss": 2.481, "step": 1743500 }, { "epoch": 8.64, "learning_rate": 4.568137611906284e-05, "loss": 2.4624, "step": 1744000 }, { "epoch": 8.64, "learning_rate": 4.568013753263675e-05, "loss": 2.4642, "step": 1744500 }, { "epoch": 8.65, "learning_rate": 4.567889894621067e-05, "loss": 2.4412, "step": 1745000 }, { "epoch": 8.65, "learning_rate": 4.5677660359784585e-05, "loss": 2.4577, "step": 1745500 }, { "epoch": 8.65, "learning_rate": 4.56764217733585e-05, "loss": 2.4878, "step": 1746000 }, { "epoch": 8.65, "learning_rate": 4.567518566410528e-05, "loss": 2.4542, "step": 1746500 }, { "epoch": 8.66, "learning_rate": 4.567394707767919e-05, "loss": 2.466, "step": 1747000 }, { "epoch": 8.66, "learning_rate": 4.5672708491253105e-05, "loss": 2.4531, "step": 1747500 }, { "epoch": 8.66, "learning_rate": 4.567146990482702e-05, "loss": 2.4706, "step": 1748000 }, { "epoch": 8.66, "learning_rate": 4.567023131840094e-05, "loss": 2.4771, "step": 1748500 }, { "epoch": 8.67, "learning_rate": 4.5668992731974855e-05, "loss": 2.4667, "step": 1749000 }, { "epoch": 8.67, "learning_rate": 4.5667754145548766e-05, "loss": 2.4672, "step": 1749500 }, { "epoch": 8.67, "learning_rate": 4.566651803629554e-05, "loss": 2.4299, "step": 1750000 }, { "epoch": 8.67, "learning_rate": 4.566527944986945e-05, "loss": 2.4456, "step": 1750500 }, { "epoch": 8.68, "learning_rate": 4.566404334061623e-05, "loss": 2.4839, "step": 1751000 }, { "epoch": 8.68, "learning_rate": 4.5662804754190144e-05, "loss": 2.453, "step": 1751500 }, { "epoch": 8.68, "learning_rate": 4.5661568644936906e-05, "loss": 2.4527, "step": 1752000 }, { "epoch": 8.68, "learning_rate": 4.566033005851082e-05, "loss": 2.4506, "step": 1752500 }, { "epoch": 8.68, "learning_rate": 4.565909147208474e-05, "loss": 2.4345, "step": 1753000 }, { "epoch": 8.69, "learning_rate": 4.565785288565866e-05, "loss": 2.4588, "step": 1753500 }, { "epoch": 8.69, "learning_rate": 4.5656614299232574e-05, "loss": 2.4735, "step": 1754000 }, { "epoch": 8.69, "learning_rate": 4.565537571280649e-05, "loss": 2.4679, "step": 1754500 }, { "epoch": 8.69, "learning_rate": 4.565413712638041e-05, "loss": 2.4841, "step": 1755000 }, { "epoch": 8.7, "learning_rate": 4.5652898539954324e-05, "loss": 2.4871, "step": 1755500 }, { "epoch": 8.7, "learning_rate": 4.565165995352824e-05, "loss": 2.4499, "step": 1756000 }, { "epoch": 8.7, "learning_rate": 4.565042136710216e-05, "loss": 2.4511, "step": 1756500 }, { "epoch": 8.7, "learning_rate": 4.564918278067607e-05, "loss": 2.4915, "step": 1757000 }, { "epoch": 8.71, "learning_rate": 4.5647944194249985e-05, "loss": 2.4732, "step": 1757500 }, { "epoch": 8.71, "learning_rate": 4.56467056078239e-05, "loss": 2.4507, "step": 1758000 }, { "epoch": 8.71, "learning_rate": 4.564546702139782e-05, "loss": 2.4399, "step": 1758500 }, { "epoch": 8.71, "learning_rate": 4.5644228434971736e-05, "loss": 2.4765, "step": 1759000 }, { "epoch": 8.72, "learning_rate": 4.564299480289136e-05, "loss": 2.4684, "step": 1759500 }, { "epoch": 8.72, "learning_rate": 4.5641756216465274e-05, "loss": 2.4655, "step": 1760000 }, { "epoch": 8.72, "learning_rate": 4.564051763003919e-05, "loss": 2.4645, "step": 1760500 }, { "epoch": 8.72, "learning_rate": 4.563927904361311e-05, "loss": 2.4831, "step": 1761000 }, { "epoch": 8.73, "learning_rate": 4.5638040457187025e-05, "loss": 2.4761, "step": 1761500 }, { "epoch": 8.73, "learning_rate": 4.563680187076094e-05, "loss": 2.4824, "step": 1762000 }, { "epoch": 8.73, "learning_rate": 4.563556328433486e-05, "loss": 2.4461, "step": 1762500 }, { "epoch": 8.73, "learning_rate": 4.563432469790877e-05, "loss": 2.4469, "step": 1763000 }, { "epoch": 8.74, "learning_rate": 4.5633086111482685e-05, "loss": 2.4606, "step": 1763500 }, { "epoch": 8.74, "learning_rate": 4.56318475250566e-05, "loss": 2.4706, "step": 1764000 }, { "epoch": 8.74, "learning_rate": 4.563061141580338e-05, "loss": 2.4447, "step": 1764500 }, { "epoch": 8.74, "learning_rate": 4.562937530655014e-05, "loss": 2.4601, "step": 1765000 }, { "epoch": 8.75, "learning_rate": 4.562813672012406e-05, "loss": 2.4485, "step": 1765500 }, { "epoch": 8.75, "learning_rate": 4.5626898133697974e-05, "loss": 2.4624, "step": 1766000 }, { "epoch": 8.75, "learning_rate": 4.562565954727189e-05, "loss": 2.5026, "step": 1766500 }, { "epoch": 8.75, "learning_rate": 4.562442096084581e-05, "loss": 2.4555, "step": 1767000 }, { "epoch": 8.76, "learning_rate": 4.5623182374419725e-05, "loss": 2.4672, "step": 1767500 }, { "epoch": 8.76, "learning_rate": 4.562194378799364e-05, "loss": 2.4704, "step": 1768000 }, { "epoch": 8.76, "learning_rate": 4.5620712633086114e-05, "loss": 2.5025, "step": 1768500 }, { "epoch": 8.76, "learning_rate": 4.561947404666003e-05, "loss": 2.4781, "step": 1769000 }, { "epoch": 8.77, "learning_rate": 4.561823546023395e-05, "loss": 2.4675, "step": 1769500 }, { "epoch": 8.77, "learning_rate": 4.561699687380786e-05, "loss": 2.4574, "step": 1770000 }, { "epoch": 8.77, "learning_rate": 4.5615758287381775e-05, "loss": 2.4715, "step": 1770500 }, { "epoch": 8.77, "learning_rate": 4.561451970095569e-05, "loss": 2.4416, "step": 1771000 }, { "epoch": 8.78, "learning_rate": 4.561328111452961e-05, "loss": 2.4696, "step": 1771500 }, { "epoch": 8.78, "learning_rate": 4.5612042528103526e-05, "loss": 2.4609, "step": 1772000 }, { "epoch": 8.78, "learning_rate": 4.561080394167744e-05, "loss": 2.4613, "step": 1772500 }, { "epoch": 8.78, "learning_rate": 4.560956535525136e-05, "loss": 2.4924, "step": 1773000 }, { "epoch": 8.79, "learning_rate": 4.560832676882528e-05, "loss": 2.4574, "step": 1773500 }, { "epoch": 8.79, "learning_rate": 4.5607088182399194e-05, "loss": 2.479, "step": 1774000 }, { "epoch": 8.79, "learning_rate": 4.560584959597311e-05, "loss": 2.4495, "step": 1774500 }, { "epoch": 8.79, "learning_rate": 4.560461100954703e-05, "loss": 2.4675, "step": 1775000 }, { "epoch": 8.8, "learning_rate": 4.5603372423120944e-05, "loss": 2.472, "step": 1775500 }, { "epoch": 8.8, "learning_rate": 4.560213631386771e-05, "loss": 2.4506, "step": 1776000 }, { "epoch": 8.8, "learning_rate": 4.560089772744163e-05, "loss": 2.453, "step": 1776500 }, { "epoch": 8.8, "learning_rate": 4.559965914101555e-05, "loss": 2.4638, "step": 1777000 }, { "epoch": 8.81, "learning_rate": 4.559842055458946e-05, "loss": 2.4947, "step": 1777500 }, { "epoch": 8.81, "learning_rate": 4.5597181968163374e-05, "loss": 2.4435, "step": 1778000 }, { "epoch": 8.81, "learning_rate": 4.559594338173729e-05, "loss": 2.4739, "step": 1778500 }, { "epoch": 8.81, "learning_rate": 4.559470727248406e-05, "loss": 2.435, "step": 1779000 }, { "epoch": 8.82, "learning_rate": 4.559346868605798e-05, "loss": 2.4844, "step": 1779500 }, { "epoch": 8.82, "learning_rate": 4.5592230099631894e-05, "loss": 2.4432, "step": 1780000 }, { "epoch": 8.82, "learning_rate": 4.559099151320581e-05, "loss": 2.4639, "step": 1780500 }, { "epoch": 8.82, "learning_rate": 4.558975540395258e-05, "loss": 2.4533, "step": 1781000 }, { "epoch": 8.83, "learning_rate": 4.558851929469935e-05, "loss": 2.4545, "step": 1781500 }, { "epoch": 8.83, "learning_rate": 4.5587280708273265e-05, "loss": 2.4481, "step": 1782000 }, { "epoch": 8.83, "learning_rate": 4.5586042121847175e-05, "loss": 2.4613, "step": 1782500 }, { "epoch": 8.83, "learning_rate": 4.558480353542109e-05, "loss": 2.4775, "step": 1783000 }, { "epoch": 8.84, "learning_rate": 4.558356494899501e-05, "loss": 2.4433, "step": 1783500 }, { "epoch": 8.84, "learning_rate": 4.5582326362568926e-05, "loss": 2.4653, "step": 1784000 }, { "epoch": 8.84, "learning_rate": 4.558108777614284e-05, "loss": 2.4877, "step": 1784500 }, { "epoch": 8.84, "learning_rate": 4.557984918971676e-05, "loss": 2.4707, "step": 1785000 }, { "epoch": 8.85, "learning_rate": 4.557861060329068e-05, "loss": 2.4879, "step": 1785500 }, { "epoch": 8.85, "learning_rate": 4.5577372016864594e-05, "loss": 2.4454, "step": 1786000 }, { "epoch": 8.85, "learning_rate": 4.557613343043851e-05, "loss": 2.458, "step": 1786500 }, { "epoch": 8.85, "learning_rate": 4.557489732118528e-05, "loss": 2.4683, "step": 1787000 }, { "epoch": 8.86, "learning_rate": 4.5573658734759196e-05, "loss": 2.4623, "step": 1787500 }, { "epoch": 8.86, "learning_rate": 4.5572420148333113e-05, "loss": 2.4821, "step": 1788000 }, { "epoch": 8.86, "learning_rate": 4.557118156190703e-05, "loss": 2.456, "step": 1788500 }, { "epoch": 8.86, "learning_rate": 4.556994297548095e-05, "loss": 2.4476, "step": 1789000 }, { "epoch": 8.87, "learning_rate": 4.5568704389054864e-05, "loss": 2.4679, "step": 1789500 }, { "epoch": 8.87, "learning_rate": 4.556746580262878e-05, "loss": 2.4734, "step": 1790000 }, { "epoch": 8.87, "learning_rate": 4.55662272162027e-05, "loss": 2.4734, "step": 1790500 }, { "epoch": 8.87, "learning_rate": 4.556498862977661e-05, "loss": 2.4945, "step": 1791000 }, { "epoch": 8.88, "learning_rate": 4.5563750043350525e-05, "loss": 2.4307, "step": 1791500 }, { "epoch": 8.88, "learning_rate": 4.5562513934097294e-05, "loss": 2.4534, "step": 1792000 }, { "epoch": 8.88, "learning_rate": 4.556127534767121e-05, "loss": 2.4577, "step": 1792500 }, { "epoch": 8.88, "learning_rate": 4.556003676124513e-05, "loss": 2.4634, "step": 1793000 }, { "epoch": 8.89, "learning_rate": 4.5558798174819045e-05, "loss": 2.4808, "step": 1793500 }, { "epoch": 8.89, "learning_rate": 4.555755958839296e-05, "loss": 2.4439, "step": 1794000 }, { "epoch": 8.89, "learning_rate": 4.555632100196688e-05, "loss": 2.4453, "step": 1794500 }, { "epoch": 8.89, "learning_rate": 4.555508489271365e-05, "loss": 2.442, "step": 1795000 }, { "epoch": 8.9, "learning_rate": 4.5553846306287564e-05, "loss": 2.4453, "step": 1795500 }, { "epoch": 8.9, "learning_rate": 4.555260771986148e-05, "loss": 2.4428, "step": 1796000 }, { "epoch": 8.9, "learning_rate": 4.555137161060824e-05, "loss": 2.48, "step": 1796500 }, { "epoch": 8.9, "learning_rate": 4.555013302418216e-05, "loss": 2.4658, "step": 1797000 }, { "epoch": 8.91, "learning_rate": 4.554889443775608e-05, "loss": 2.4606, "step": 1797500 }, { "epoch": 8.91, "learning_rate": 4.5547655851329994e-05, "loss": 2.449, "step": 1798000 }, { "epoch": 8.91, "learning_rate": 4.554641726490391e-05, "loss": 2.4647, "step": 1798500 }, { "epoch": 8.91, "learning_rate": 4.554517867847783e-05, "loss": 2.4644, "step": 1799000 }, { "epoch": 8.92, "learning_rate": 4.5543940092051745e-05, "loss": 2.4737, "step": 1799500 }, { "epoch": 8.92, "learning_rate": 4.554270150562566e-05, "loss": 2.491, "step": 1800000 }, { "epoch": 8.92, "learning_rate": 4.554146291919958e-05, "loss": 2.4842, "step": 1800500 }, { "epoch": 8.92, "learning_rate": 4.55402292871192e-05, "loss": 2.4706, "step": 1801000 }, { "epoch": 8.93, "learning_rate": 4.553899070069311e-05, "loss": 2.4853, "step": 1801500 }, { "epoch": 8.93, "learning_rate": 4.5537752114267026e-05, "loss": 2.4699, "step": 1802000 }, { "epoch": 8.93, "learning_rate": 4.5536513527840943e-05, "loss": 2.4815, "step": 1802500 }, { "epoch": 8.93, "learning_rate": 4.553527494141486e-05, "loss": 2.458, "step": 1803000 }, { "epoch": 8.94, "learning_rate": 4.553403635498878e-05, "loss": 2.4418, "step": 1803500 }, { "epoch": 8.94, "learning_rate": 4.5532797768562694e-05, "loss": 2.4691, "step": 1804000 }, { "epoch": 8.94, "learning_rate": 4.553155918213661e-05, "loss": 2.4687, "step": 1804500 }, { "epoch": 8.94, "learning_rate": 4.553032059571053e-05, "loss": 2.4617, "step": 1805000 }, { "epoch": 8.95, "learning_rate": 4.55290844864573e-05, "loss": 2.491, "step": 1805500 }, { "epoch": 8.95, "learning_rate": 4.5527845900031214e-05, "loss": 2.4481, "step": 1806000 }, { "epoch": 8.95, "learning_rate": 4.552660731360513e-05, "loss": 2.447, "step": 1806500 }, { "epoch": 8.95, "learning_rate": 4.552536872717905e-05, "loss": 2.4697, "step": 1807000 }, { "epoch": 8.95, "learning_rate": 4.5524132617925816e-05, "loss": 2.4696, "step": 1807500 }, { "epoch": 8.96, "learning_rate": 4.5522894031499727e-05, "loss": 2.4883, "step": 1808000 }, { "epoch": 8.96, "learning_rate": 4.5521655445073644e-05, "loss": 2.429, "step": 1808500 }, { "epoch": 8.96, "learning_rate": 4.552041685864756e-05, "loss": 2.4636, "step": 1809000 }, { "epoch": 8.96, "learning_rate": 4.5519180749394336e-05, "loss": 2.4663, "step": 1809500 }, { "epoch": 8.97, "learning_rate": 4.5517944640141105e-05, "loss": 2.4526, "step": 1810000 }, { "epoch": 8.97, "learning_rate": 4.551670605371502e-05, "loss": 2.4824, "step": 1810500 }, { "epoch": 8.97, "learning_rate": 4.551546746728894e-05, "loss": 2.4671, "step": 1811000 }, { "epoch": 8.97, "learning_rate": 4.5514228880862856e-05, "loss": 2.4767, "step": 1811500 }, { "epoch": 8.98, "learning_rate": 4.551299029443677e-05, "loss": 2.4661, "step": 1812000 }, { "epoch": 8.98, "learning_rate": 4.551175170801068e-05, "loss": 2.4633, "step": 1812500 }, { "epoch": 8.98, "learning_rate": 4.55105131215846e-05, "loss": 2.4379, "step": 1813000 }, { "epoch": 8.98, "learning_rate": 4.5509274535158517e-05, "loss": 2.4667, "step": 1813500 }, { "epoch": 8.99, "learning_rate": 4.5508035948732434e-05, "loss": 2.4808, "step": 1814000 }, { "epoch": 8.99, "learning_rate": 4.55067998394792e-05, "loss": 2.4708, "step": 1814500 }, { "epoch": 8.99, "learning_rate": 4.550556125305312e-05, "loss": 2.4644, "step": 1815000 }, { "epoch": 8.99, "learning_rate": 4.5504322666627036e-05, "loss": 2.4861, "step": 1815500 }, { "epoch": 9.0, "learning_rate": 4.550308408020095e-05, "loss": 2.4598, "step": 1816000 }, { "epoch": 9.0, "learning_rate": 4.550184549377487e-05, "loss": 2.4818, "step": 1816500 }, { "epoch": 9.0, "eval_accuracy": 0.6403843342409302, "eval_accuracy_mlm": 0.5934790457919371, "eval_accuracy_nsp": 0.8615189108837107, "eval_loss": 2.434657096862793, "eval_runtime": 145.7954, "eval_samples_per_second": 1748.745, "eval_steps_per_second": 72.869, "step": 1816587 }, { "epoch": 9.0, "learning_rate": 4.550060938452164e-05, "loss": 2.424, "step": 1817000 }, { "epoch": 9.0, "learning_rate": 4.5499370798095556e-05, "loss": 2.426, "step": 1817500 }, { "epoch": 9.01, "learning_rate": 4.549813221166947e-05, "loss": 2.4208, "step": 1818000 }, { "epoch": 9.01, "learning_rate": 4.549689362524339e-05, "loss": 2.4265, "step": 1818500 }, { "epoch": 9.01, "learning_rate": 4.549565751599015e-05, "loss": 2.4424, "step": 1819000 }, { "epoch": 9.01, "learning_rate": 4.549441892956407e-05, "loss": 2.4181, "step": 1819500 }, { "epoch": 9.02, "learning_rate": 4.5493180343137986e-05, "loss": 2.4305, "step": 1820000 }, { "epoch": 9.02, "learning_rate": 4.54919417567119e-05, "loss": 2.4542, "step": 1820500 }, { "epoch": 9.02, "learning_rate": 4.549070317028582e-05, "loss": 2.4495, "step": 1821000 }, { "epoch": 9.02, "learning_rate": 4.5489464583859736e-05, "loss": 2.4497, "step": 1821500 }, { "epoch": 9.03, "learning_rate": 4.5488228474606505e-05, "loss": 2.442, "step": 1822000 }, { "epoch": 9.03, "learning_rate": 4.548698988818042e-05, "loss": 2.4593, "step": 1822500 }, { "epoch": 9.03, "learning_rate": 4.548575130175434e-05, "loss": 2.4229, "step": 1823000 }, { "epoch": 9.03, "learning_rate": 4.5484512715328256e-05, "loss": 2.4115, "step": 1823500 }, { "epoch": 9.04, "learning_rate": 4.548327412890217e-05, "loss": 2.4466, "step": 1824000 }, { "epoch": 9.04, "learning_rate": 4.548203554247609e-05, "loss": 2.4517, "step": 1824500 }, { "epoch": 9.04, "learning_rate": 4.548079695605001e-05, "loss": 2.4276, "step": 1825000 }, { "epoch": 9.04, "learning_rate": 4.5479558369623924e-05, "loss": 2.4327, "step": 1825500 }, { "epoch": 9.05, "learning_rate": 4.5478319783197834e-05, "loss": 2.4248, "step": 1826000 }, { "epoch": 9.05, "learning_rate": 4.547708119677175e-05, "loss": 2.4232, "step": 1826500 }, { "epoch": 9.05, "learning_rate": 4.547584508751852e-05, "loss": 2.4336, "step": 1827000 }, { "epoch": 9.05, "learning_rate": 4.547460897826529e-05, "loss": 2.4489, "step": 1827500 }, { "epoch": 9.06, "learning_rate": 4.5473370391839205e-05, "loss": 2.4391, "step": 1828000 }, { "epoch": 9.06, "learning_rate": 4.5472134282585974e-05, "loss": 2.4429, "step": 1828500 }, { "epoch": 9.06, "learning_rate": 4.5470895696159884e-05, "loss": 2.4328, "step": 1829000 }, { "epoch": 9.06, "learning_rate": 4.54696571097338e-05, "loss": 2.4022, "step": 1829500 }, { "epoch": 9.07, "learning_rate": 4.546841852330772e-05, "loss": 2.4167, "step": 1830000 }, { "epoch": 9.07, "learning_rate": 4.5467179936881635e-05, "loss": 2.4667, "step": 1830500 }, { "epoch": 9.07, "learning_rate": 4.546594135045555e-05, "loss": 2.4303, "step": 1831000 }, { "epoch": 9.07, "learning_rate": 4.546470276402947e-05, "loss": 2.4245, "step": 1831500 }, { "epoch": 9.08, "learning_rate": 4.5463464177603386e-05, "loss": 2.4439, "step": 1832000 }, { "epoch": 9.08, "learning_rate": 4.54622255911773e-05, "loss": 2.447, "step": 1832500 }, { "epoch": 9.08, "learning_rate": 4.546098700475122e-05, "loss": 2.4434, "step": 1833000 }, { "epoch": 9.08, "learning_rate": 4.5459748418325137e-05, "loss": 2.4311, "step": 1833500 }, { "epoch": 9.09, "learning_rate": 4.5458509831899053e-05, "loss": 2.4513, "step": 1834000 }, { "epoch": 9.09, "learning_rate": 4.545727372264582e-05, "loss": 2.4607, "step": 1834500 }, { "epoch": 9.09, "learning_rate": 4.545603513621974e-05, "loss": 2.4291, "step": 1835000 }, { "epoch": 9.09, "learning_rate": 4.5454796549793656e-05, "loss": 2.4439, "step": 1835500 }, { "epoch": 9.1, "learning_rate": 4.545355796336757e-05, "loss": 2.4741, "step": 1836000 }, { "epoch": 9.1, "learning_rate": 4.5452321854114335e-05, "loss": 2.4591, "step": 1836500 }, { "epoch": 9.1, "learning_rate": 4.545108326768825e-05, "loss": 2.4535, "step": 1837000 }, { "epoch": 9.1, "learning_rate": 4.544984468126217e-05, "loss": 2.4464, "step": 1837500 }, { "epoch": 9.11, "learning_rate": 4.5448606094836086e-05, "loss": 2.4405, "step": 1838000 }, { "epoch": 9.11, "learning_rate": 4.544736750841e-05, "loss": 2.4423, "step": 1838500 }, { "epoch": 9.11, "learning_rate": 4.544612892198392e-05, "loss": 2.4383, "step": 1839000 }, { "epoch": 9.11, "learning_rate": 4.544489033555784e-05, "loss": 2.4421, "step": 1839500 }, { "epoch": 9.12, "learning_rate": 4.5443651749131754e-05, "loss": 2.4476, "step": 1840000 }, { "epoch": 9.12, "learning_rate": 4.544241316270567e-05, "loss": 2.4121, "step": 1840500 }, { "epoch": 9.12, "learning_rate": 4.544117457627959e-05, "loss": 2.4078, "step": 1841000 }, { "epoch": 9.12, "learning_rate": 4.5439935989853504e-05, "loss": 2.4349, "step": 1841500 }, { "epoch": 9.13, "learning_rate": 4.543869988060027e-05, "loss": 2.4562, "step": 1842000 }, { "epoch": 9.13, "learning_rate": 4.543746129417419e-05, "loss": 2.404, "step": 1842500 }, { "epoch": 9.13, "learning_rate": 4.543622270774811e-05, "loss": 2.4584, "step": 1843000 }, { "epoch": 9.13, "learning_rate": 4.543498659849487e-05, "loss": 2.4404, "step": 1843500 }, { "epoch": 9.14, "learning_rate": 4.5433748012068786e-05, "loss": 2.4358, "step": 1844000 }, { "epoch": 9.14, "learning_rate": 4.54325094256427e-05, "loss": 2.4061, "step": 1844500 }, { "epoch": 9.14, "learning_rate": 4.543127083921662e-05, "loss": 2.4233, "step": 1845000 }, { "epoch": 9.14, "learning_rate": 4.543003472996339e-05, "loss": 2.4524, "step": 1845500 }, { "epoch": 9.15, "learning_rate": 4.542879862071016e-05, "loss": 2.4392, "step": 1846000 }, { "epoch": 9.15, "learning_rate": 4.5427562511456926e-05, "loss": 2.4372, "step": 1846500 }, { "epoch": 9.15, "learning_rate": 4.542632392503084e-05, "loss": 2.4474, "step": 1847000 }, { "epoch": 9.15, "learning_rate": 4.542508533860476e-05, "loss": 2.4468, "step": 1847500 }, { "epoch": 9.16, "learning_rate": 4.542384675217868e-05, "loss": 2.4432, "step": 1848000 }, { "epoch": 9.16, "learning_rate": 4.5422608165752594e-05, "loss": 2.4501, "step": 1848500 }, { "epoch": 9.16, "learning_rate": 4.5421369579326504e-05, "loss": 2.4495, "step": 1849000 }, { "epoch": 9.16, "learning_rate": 4.542013099290042e-05, "loss": 2.4418, "step": 1849500 }, { "epoch": 9.17, "learning_rate": 4.541889240647434e-05, "loss": 2.4484, "step": 1850000 }, { "epoch": 9.17, "learning_rate": 4.5417653820048255e-05, "loss": 2.4385, "step": 1850500 }, { "epoch": 9.17, "learning_rate": 4.541641523362217e-05, "loss": 2.4258, "step": 1851000 }, { "epoch": 9.17, "learning_rate": 4.541517664719609e-05, "loss": 2.4504, "step": 1851500 }, { "epoch": 9.18, "learning_rate": 4.5413938060770006e-05, "loss": 2.458, "step": 1852000 }, { "epoch": 9.18, "learning_rate": 4.541269947434392e-05, "loss": 2.4314, "step": 1852500 }, { "epoch": 9.18, "learning_rate": 4.541146088791784e-05, "loss": 2.436, "step": 1853000 }, { "epoch": 9.18, "learning_rate": 4.5410222301491757e-05, "loss": 2.426, "step": 1853500 }, { "epoch": 9.19, "learning_rate": 4.5408983715065673e-05, "loss": 2.435, "step": 1854000 }, { "epoch": 9.19, "learning_rate": 4.540774512863959e-05, "loss": 2.4389, "step": 1854500 }, { "epoch": 9.19, "learning_rate": 4.540650901938636e-05, "loss": 2.461, "step": 1855000 }, { "epoch": 9.19, "learning_rate": 4.5405270432960276e-05, "loss": 2.4472, "step": 1855500 }, { "epoch": 9.2, "learning_rate": 4.540403432370704e-05, "loss": 2.4388, "step": 1856000 }, { "epoch": 9.2, "learning_rate": 4.5402795737280955e-05, "loss": 2.4427, "step": 1856500 }, { "epoch": 9.2, "learning_rate": 4.540155715085487e-05, "loss": 2.4563, "step": 1857000 }, { "epoch": 9.2, "learning_rate": 4.540032104160165e-05, "loss": 2.4632, "step": 1857500 }, { "epoch": 9.21, "learning_rate": 4.539908245517556e-05, "loss": 2.4482, "step": 1858000 }, { "epoch": 9.21, "learning_rate": 4.5397843868749475e-05, "loss": 2.4332, "step": 1858500 }, { "epoch": 9.21, "learning_rate": 4.539660528232339e-05, "loss": 2.4423, "step": 1859000 }, { "epoch": 9.21, "learning_rate": 4.539536669589731e-05, "loss": 2.4584, "step": 1859500 }, { "epoch": 9.22, "learning_rate": 4.5394128109471225e-05, "loss": 2.4123, "step": 1860000 }, { "epoch": 9.22, "learning_rate": 4.539288952304514e-05, "loss": 2.4395, "step": 1860500 }, { "epoch": 9.22, "learning_rate": 4.539165093661906e-05, "loss": 2.4833, "step": 1861000 }, { "epoch": 9.22, "learning_rate": 4.5390412350192976e-05, "loss": 2.4595, "step": 1861500 }, { "epoch": 9.22, "learning_rate": 4.538917376376689e-05, "loss": 2.4364, "step": 1862000 }, { "epoch": 9.23, "learning_rate": 4.538793517734081e-05, "loss": 2.4205, "step": 1862500 }, { "epoch": 9.23, "learning_rate": 4.538669659091472e-05, "loss": 2.4233, "step": 1863000 }, { "epoch": 9.23, "learning_rate": 4.538545800448864e-05, "loss": 2.4394, "step": 1863500 }, { "epoch": 9.23, "learning_rate": 4.5384219418062554e-05, "loss": 2.4386, "step": 1864000 }, { "epoch": 9.24, "learning_rate": 4.538298330880932e-05, "loss": 2.4466, "step": 1864500 }, { "epoch": 9.24, "learning_rate": 4.538174472238324e-05, "loss": 2.4483, "step": 1865000 }, { "epoch": 9.24, "learning_rate": 4.538050613595716e-05, "loss": 2.454, "step": 1865500 }, { "epoch": 9.24, "learning_rate": 4.5379267549531074e-05, "loss": 2.4675, "step": 1866000 }, { "epoch": 9.25, "learning_rate": 4.537803144027784e-05, "loss": 2.4575, "step": 1866500 }, { "epoch": 9.25, "learning_rate": 4.537679285385176e-05, "loss": 2.4484, "step": 1867000 }, { "epoch": 9.25, "learning_rate": 4.5375554267425676e-05, "loss": 2.4436, "step": 1867500 }, { "epoch": 9.25, "learning_rate": 4.537431568099959e-05, "loss": 2.4271, "step": 1868000 }, { "epoch": 9.26, "learning_rate": 4.537307709457351e-05, "loss": 2.4645, "step": 1868500 }, { "epoch": 9.26, "learning_rate": 4.537183850814743e-05, "loss": 2.4648, "step": 1869000 }, { "epoch": 9.26, "learning_rate": 4.537059992172134e-05, "loss": 2.4415, "step": 1869500 }, { "epoch": 9.26, "learning_rate": 4.5369363812468106e-05, "loss": 2.4357, "step": 1870000 }, { "epoch": 9.27, "learning_rate": 4.536812522604202e-05, "loss": 2.4403, "step": 1870500 }, { "epoch": 9.27, "learning_rate": 4.536688663961594e-05, "loss": 2.4475, "step": 1871000 }, { "epoch": 9.27, "learning_rate": 4.536564805318986e-05, "loss": 2.4376, "step": 1871500 }, { "epoch": 9.27, "learning_rate": 4.5364411943936626e-05, "loss": 2.4186, "step": 1872000 }, { "epoch": 9.28, "learning_rate": 4.536317335751054e-05, "loss": 2.4547, "step": 1872500 }, { "epoch": 9.28, "learning_rate": 4.536193477108446e-05, "loss": 2.4273, "step": 1873000 }, { "epoch": 9.28, "learning_rate": 4.5360696184658376e-05, "loss": 2.453, "step": 1873500 }, { "epoch": 9.28, "learning_rate": 4.5359457598232293e-05, "loss": 2.4315, "step": 1874000 }, { "epoch": 9.29, "learning_rate": 4.535821901180621e-05, "loss": 2.4556, "step": 1874500 }, { "epoch": 9.29, "learning_rate": 4.535698042538013e-05, "loss": 2.4386, "step": 1875000 }, { "epoch": 9.29, "learning_rate": 4.5355741838954044e-05, "loss": 2.4383, "step": 1875500 }, { "epoch": 9.29, "learning_rate": 4.535450325252796e-05, "loss": 2.4127, "step": 1876000 }, { "epoch": 9.3, "learning_rate": 4.535326466610187e-05, "loss": 2.4339, "step": 1876500 }, { "epoch": 9.3, "learning_rate": 4.535202607967579e-05, "loss": 2.4542, "step": 1877000 }, { "epoch": 9.3, "learning_rate": 4.5350787493249705e-05, "loss": 2.4275, "step": 1877500 }, { "epoch": 9.3, "learning_rate": 4.5349551383996474e-05, "loss": 2.4308, "step": 1878000 }, { "epoch": 9.31, "learning_rate": 4.534831279757039e-05, "loss": 2.4147, "step": 1878500 }, { "epoch": 9.31, "learning_rate": 4.534707421114431e-05, "loss": 2.4667, "step": 1879000 }, { "epoch": 9.31, "learning_rate": 4.5345835624718225e-05, "loss": 2.4236, "step": 1879500 }, { "epoch": 9.31, "learning_rate": 4.534459703829214e-05, "loss": 2.4343, "step": 1880000 }, { "epoch": 9.32, "learning_rate": 4.534335845186606e-05, "loss": 2.4451, "step": 1880500 }, { "epoch": 9.32, "learning_rate": 4.5342119865439975e-05, "loss": 2.4541, "step": 1881000 }, { "epoch": 9.32, "learning_rate": 4.534088127901389e-05, "loss": 2.4212, "step": 1881500 }, { "epoch": 9.32, "learning_rate": 4.533964516976066e-05, "loss": 2.4321, "step": 1882000 }, { "epoch": 9.33, "learning_rate": 4.533840658333458e-05, "loss": 2.4481, "step": 1882500 }, { "epoch": 9.33, "learning_rate": 4.533716799690849e-05, "loss": 2.4062, "step": 1883000 }, { "epoch": 9.33, "learning_rate": 4.5335929410482405e-05, "loss": 2.4301, "step": 1883500 }, { "epoch": 9.33, "learning_rate": 4.533469082405632e-05, "loss": 2.4163, "step": 1884000 }, { "epoch": 9.34, "learning_rate": 4.533345471480309e-05, "loss": 2.4449, "step": 1884500 }, { "epoch": 9.34, "learning_rate": 4.533221612837701e-05, "loss": 2.4703, "step": 1885000 }, { "epoch": 9.34, "learning_rate": 4.5330977541950925e-05, "loss": 2.4336, "step": 1885500 }, { "epoch": 9.34, "learning_rate": 4.5329741432697694e-05, "loss": 2.4546, "step": 1886000 }, { "epoch": 9.35, "learning_rate": 4.532850284627161e-05, "loss": 2.4623, "step": 1886500 }, { "epoch": 9.35, "learning_rate": 4.532726425984553e-05, "loss": 2.4571, "step": 1887000 }, { "epoch": 9.35, "learning_rate": 4.5326025673419444e-05, "loss": 2.4461, "step": 1887500 }, { "epoch": 9.35, "learning_rate": 4.532478708699336e-05, "loss": 2.4349, "step": 1888000 }, { "epoch": 9.36, "learning_rate": 4.532354850056728e-05, "loss": 2.4261, "step": 1888500 }, { "epoch": 9.36, "learning_rate": 4.5322309914141195e-05, "loss": 2.4164, "step": 1889000 }, { "epoch": 9.36, "learning_rate": 4.532107132771511e-05, "loss": 2.462, "step": 1889500 }, { "epoch": 9.36, "learning_rate": 4.5319835218461874e-05, "loss": 2.4433, "step": 1890000 }, { "epoch": 9.37, "learning_rate": 4.531859663203579e-05, "loss": 2.4574, "step": 1890500 }, { "epoch": 9.37, "learning_rate": 4.531735804560971e-05, "loss": 2.4477, "step": 1891000 }, { "epoch": 9.37, "learning_rate": 4.5316119459183625e-05, "loss": 2.4444, "step": 1891500 }, { "epoch": 9.37, "learning_rate": 4.531488087275754e-05, "loss": 2.4569, "step": 1892000 }, { "epoch": 9.38, "learning_rate": 4.531364476350431e-05, "loss": 2.4576, "step": 1892500 }, { "epoch": 9.38, "learning_rate": 4.531240617707823e-05, "loss": 2.4366, "step": 1893000 }, { "epoch": 9.38, "learning_rate": 4.5311167590652145e-05, "loss": 2.455, "step": 1893500 }, { "epoch": 9.38, "learning_rate": 4.530992900422606e-05, "loss": 2.4634, "step": 1894000 }, { "epoch": 9.39, "learning_rate": 4.530869041779998e-05, "loss": 2.4578, "step": 1894500 }, { "epoch": 9.39, "learning_rate": 4.530745430854674e-05, "loss": 2.4391, "step": 1895000 }, { "epoch": 9.39, "learning_rate": 4.530621572212066e-05, "loss": 2.4376, "step": 1895500 }, { "epoch": 9.39, "learning_rate": 4.5304977135694574e-05, "loss": 2.4466, "step": 1896000 }, { "epoch": 9.4, "learning_rate": 4.530373854926849e-05, "loss": 2.429, "step": 1896500 }, { "epoch": 9.4, "learning_rate": 4.530249996284241e-05, "loss": 2.4456, "step": 1897000 }, { "epoch": 9.4, "learning_rate": 4.5301261376416325e-05, "loss": 2.4541, "step": 1897500 }, { "epoch": 9.4, "learning_rate": 4.530002278999024e-05, "loss": 2.4651, "step": 1898000 }, { "epoch": 9.41, "learning_rate": 4.529878668073701e-05, "loss": 2.4453, "step": 1898500 }, { "epoch": 9.41, "learning_rate": 4.529754809431093e-05, "loss": 2.4451, "step": 1899000 }, { "epoch": 9.41, "learning_rate": 4.5296309507884845e-05, "loss": 2.4266, "step": 1899500 }, { "epoch": 9.41, "learning_rate": 4.529507092145876e-05, "loss": 2.455, "step": 1900000 }, { "epoch": 9.42, "learning_rate": 4.529383233503268e-05, "loss": 2.4657, "step": 1900500 }, { "epoch": 9.42, "learning_rate": 4.5292593748606595e-05, "loss": 2.4469, "step": 1901000 }, { "epoch": 9.42, "learning_rate": 4.529135516218051e-05, "loss": 2.4411, "step": 1901500 }, { "epoch": 9.42, "learning_rate": 4.5290121530100126e-05, "loss": 2.4473, "step": 1902000 }, { "epoch": 9.43, "learning_rate": 4.528888294367404e-05, "loss": 2.4457, "step": 1902500 }, { "epoch": 9.43, "learning_rate": 4.528764435724796e-05, "loss": 2.4583, "step": 1903000 }, { "epoch": 9.43, "learning_rate": 4.528640577082188e-05, "loss": 2.4524, "step": 1903500 }, { "epoch": 9.43, "learning_rate": 4.5285167184395794e-05, "loss": 2.4532, "step": 1904000 }, { "epoch": 9.44, "learning_rate": 4.528392859796971e-05, "loss": 2.4809, "step": 1904500 }, { "epoch": 9.44, "learning_rate": 4.528269001154363e-05, "loss": 2.4212, "step": 1905000 }, { "epoch": 9.44, "learning_rate": 4.5281451425117545e-05, "loss": 2.4673, "step": 1905500 }, { "epoch": 9.44, "learning_rate": 4.528021283869146e-05, "loss": 2.4051, "step": 1906000 }, { "epoch": 9.45, "learning_rate": 4.527897425226538e-05, "loss": 2.4195, "step": 1906500 }, { "epoch": 9.45, "learning_rate": 4.5277735665839296e-05, "loss": 2.4623, "step": 1907000 }, { "epoch": 9.45, "learning_rate": 4.527649955658606e-05, "loss": 2.4388, "step": 1907500 }, { "epoch": 9.45, "learning_rate": 4.5275260970159975e-05, "loss": 2.4585, "step": 1908000 }, { "epoch": 9.46, "learning_rate": 4.527402238373389e-05, "loss": 2.4361, "step": 1908500 }, { "epoch": 9.46, "learning_rate": 4.527278379730781e-05, "loss": 2.442, "step": 1909000 }, { "epoch": 9.46, "learning_rate": 4.5271545210881725e-05, "loss": 2.4635, "step": 1909500 }, { "epoch": 9.46, "learning_rate": 4.527030662445564e-05, "loss": 2.4437, "step": 1910000 }, { "epoch": 9.47, "learning_rate": 4.526906803802956e-05, "loss": 2.4507, "step": 1910500 }, { "epoch": 9.47, "learning_rate": 4.5267829451603476e-05, "loss": 2.4323, "step": 1911000 }, { "epoch": 9.47, "learning_rate": 4.526659086517739e-05, "loss": 2.45, "step": 1911500 }, { "epoch": 9.47, "learning_rate": 4.526535227875131e-05, "loss": 2.4588, "step": 1912000 }, { "epoch": 9.48, "learning_rate": 4.526411369232523e-05, "loss": 2.4592, "step": 1912500 }, { "epoch": 9.48, "learning_rate": 4.5262875105899144e-05, "loss": 2.4493, "step": 1913000 }, { "epoch": 9.48, "learning_rate": 4.526163899664591e-05, "loss": 2.4565, "step": 1913500 }, { "epoch": 9.48, "learning_rate": 4.5260402887392675e-05, "loss": 2.4562, "step": 1914000 }, { "epoch": 9.49, "learning_rate": 4.525916430096659e-05, "loss": 2.463, "step": 1914500 }, { "epoch": 9.49, "learning_rate": 4.525792819171336e-05, "loss": 2.4384, "step": 1915000 }, { "epoch": 9.49, "learning_rate": 4.525668960528728e-05, "loss": 2.429, "step": 1915500 }, { "epoch": 9.49, "learning_rate": 4.5255451018861194e-05, "loss": 2.4265, "step": 1916000 }, { "epoch": 9.5, "learning_rate": 4.525421243243511e-05, "loss": 2.457, "step": 1916500 }, { "epoch": 9.5, "learning_rate": 4.525297384600903e-05, "loss": 2.4622, "step": 1917000 }, { "epoch": 9.5, "learning_rate": 4.52517377367558e-05, "loss": 2.4689, "step": 1917500 }, { "epoch": 9.5, "learning_rate": 4.5250499150329714e-05, "loss": 2.4288, "step": 1918000 }, { "epoch": 9.5, "learning_rate": 4.524926304107648e-05, "loss": 2.454, "step": 1918500 }, { "epoch": 9.51, "learning_rate": 4.52480244546504e-05, "loss": 2.4701, "step": 1919000 }, { "epoch": 9.51, "learning_rate": 4.5246785868224317e-05, "loss": 2.4518, "step": 1919500 }, { "epoch": 9.51, "learning_rate": 4.5245547281798233e-05, "loss": 2.4624, "step": 1920000 }, { "epoch": 9.51, "learning_rate": 4.5244308695372144e-05, "loss": 2.4445, "step": 1920500 }, { "epoch": 9.52, "learning_rate": 4.524307010894606e-05, "loss": 2.4495, "step": 1921000 }, { "epoch": 9.52, "learning_rate": 4.524183152251998e-05, "loss": 2.4558, "step": 1921500 }, { "epoch": 9.52, "learning_rate": 4.5240592936093894e-05, "loss": 2.4547, "step": 1922000 }, { "epoch": 9.52, "learning_rate": 4.523935682684067e-05, "loss": 2.441, "step": 1922500 }, { "epoch": 9.53, "learning_rate": 4.523811824041459e-05, "loss": 2.4597, "step": 1923000 }, { "epoch": 9.53, "learning_rate": 4.5236879653988504e-05, "loss": 2.444, "step": 1923500 }, { "epoch": 9.53, "learning_rate": 4.523564602190812e-05, "loss": 2.4567, "step": 1924000 }, { "epoch": 9.53, "learning_rate": 4.5234407435482035e-05, "loss": 2.4353, "step": 1924500 }, { "epoch": 9.54, "learning_rate": 4.523316884905595e-05, "loss": 2.4716, "step": 1925000 }, { "epoch": 9.54, "learning_rate": 4.523193026262987e-05, "loss": 2.4343, "step": 1925500 }, { "epoch": 9.54, "learning_rate": 4.5230691676203785e-05, "loss": 2.4463, "step": 1926000 }, { "epoch": 9.54, "learning_rate": 4.52294530897777e-05, "loss": 2.438, "step": 1926500 }, { "epoch": 9.55, "learning_rate": 4.522821450335162e-05, "loss": 2.4469, "step": 1927000 }, { "epoch": 9.55, "learning_rate": 4.5226975916925536e-05, "loss": 2.4329, "step": 1927500 }, { "epoch": 9.55, "learning_rate": 4.522573733049945e-05, "loss": 2.4451, "step": 1928000 }, { "epoch": 9.55, "learning_rate": 4.522449874407337e-05, "loss": 2.4711, "step": 1928500 }, { "epoch": 9.56, "learning_rate": 4.522326015764729e-05, "loss": 2.4351, "step": 1929000 }, { "epoch": 9.56, "learning_rate": 4.5222021571221204e-05, "loss": 2.4218, "step": 1929500 }, { "epoch": 9.56, "learning_rate": 4.5220782984795114e-05, "loss": 2.437, "step": 1930000 }, { "epoch": 9.56, "learning_rate": 4.521954439836903e-05, "loss": 2.4446, "step": 1930500 }, { "epoch": 9.57, "learning_rate": 4.52183082891158e-05, "loss": 2.4568, "step": 1931000 }, { "epoch": 9.57, "learning_rate": 4.521706970268972e-05, "loss": 2.4456, "step": 1931500 }, { "epoch": 9.57, "learning_rate": 4.5215831116263634e-05, "loss": 2.435, "step": 1932000 }, { "epoch": 9.57, "learning_rate": 4.521459252983755e-05, "loss": 2.4542, "step": 1932500 }, { "epoch": 9.58, "learning_rate": 4.521335394341146e-05, "loss": 2.4418, "step": 1933000 }, { "epoch": 9.58, "learning_rate": 4.521211535698538e-05, "loss": 2.4507, "step": 1933500 }, { "epoch": 9.58, "learning_rate": 4.5210876770559295e-05, "loss": 2.4544, "step": 1934000 }, { "epoch": 9.58, "learning_rate": 4.520963818413321e-05, "loss": 2.4652, "step": 1934500 }, { "epoch": 9.59, "learning_rate": 4.520840207487999e-05, "loss": 2.4531, "step": 1935000 }, { "epoch": 9.59, "learning_rate": 4.5207163488453904e-05, "loss": 2.4594, "step": 1935500 }, { "epoch": 9.59, "learning_rate": 4.520592490202782e-05, "loss": 2.453, "step": 1936000 }, { "epoch": 9.59, "learning_rate": 4.5204693747120294e-05, "loss": 2.4411, "step": 1936500 }, { "epoch": 9.6, "learning_rate": 4.5203455160694204e-05, "loss": 2.4588, "step": 1937000 }, { "epoch": 9.6, "learning_rate": 4.520221657426812e-05, "loss": 2.4649, "step": 1937500 }, { "epoch": 9.6, "learning_rate": 4.520097798784204e-05, "loss": 2.4384, "step": 1938000 }, { "epoch": 9.6, "learning_rate": 4.5199739401415955e-05, "loss": 2.4294, "step": 1938500 }, { "epoch": 9.61, "learning_rate": 4.519850081498987e-05, "loss": 2.4468, "step": 1939000 }, { "epoch": 9.61, "learning_rate": 4.519726222856379e-05, "loss": 2.451, "step": 1939500 }, { "epoch": 9.61, "learning_rate": 4.5196023642137705e-05, "loss": 2.4349, "step": 1940000 }, { "epoch": 9.61, "learning_rate": 4.519478505571162e-05, "loss": 2.439, "step": 1940500 }, { "epoch": 9.62, "learning_rate": 4.519354646928554e-05, "loss": 2.4513, "step": 1941000 }, { "epoch": 9.62, "learning_rate": 4.519230788285945e-05, "loss": 2.4451, "step": 1941500 }, { "epoch": 9.62, "learning_rate": 4.5191069296433366e-05, "loss": 2.4526, "step": 1942000 }, { "epoch": 9.62, "learning_rate": 4.518983071000728e-05, "loss": 2.4393, "step": 1942500 }, { "epoch": 9.63, "learning_rate": 4.51885921235812e-05, "loss": 2.4453, "step": 1943000 }, { "epoch": 9.63, "learning_rate": 4.518735353715512e-05, "loss": 2.4257, "step": 1943500 }, { "epoch": 9.63, "learning_rate": 4.5186114950729034e-05, "loss": 2.4565, "step": 1944000 }, { "epoch": 9.63, "learning_rate": 4.51848788414758e-05, "loss": 2.4816, "step": 1944500 }, { "epoch": 9.64, "learning_rate": 4.518364273222257e-05, "loss": 2.4714, "step": 1945000 }, { "epoch": 9.64, "learning_rate": 4.518240662296934e-05, "loss": 2.4386, "step": 1945500 }, { "epoch": 9.64, "learning_rate": 4.518116803654326e-05, "loss": 2.4524, "step": 1946000 }, { "epoch": 9.64, "learning_rate": 4.517992945011717e-05, "loss": 2.4481, "step": 1946500 }, { "epoch": 9.65, "learning_rate": 4.5178690863691084e-05, "loss": 2.4431, "step": 1947000 }, { "epoch": 9.65, "learning_rate": 4.5177452277265e-05, "loss": 2.4608, "step": 1947500 }, { "epoch": 9.65, "learning_rate": 4.517621369083892e-05, "loss": 2.4459, "step": 1948000 }, { "epoch": 9.65, "learning_rate": 4.5174977581585694e-05, "loss": 2.4347, "step": 1948500 }, { "epoch": 9.66, "learning_rate": 4.517373899515961e-05, "loss": 2.4307, "step": 1949000 }, { "epoch": 9.66, "learning_rate": 4.517250040873352e-05, "loss": 2.4474, "step": 1949500 }, { "epoch": 9.66, "learning_rate": 4.517126182230744e-05, "loss": 2.4829, "step": 1950000 }, { "epoch": 9.66, "learning_rate": 4.5170023235881355e-05, "loss": 2.4657, "step": 1950500 }, { "epoch": 9.67, "learning_rate": 4.516878464945527e-05, "loss": 2.4238, "step": 1951000 }, { "epoch": 9.67, "learning_rate": 4.516754854020204e-05, "loss": 2.4664, "step": 1951500 }, { "epoch": 9.67, "learning_rate": 4.516630995377596e-05, "loss": 2.4432, "step": 1952000 }, { "epoch": 9.67, "learning_rate": 4.516507136734987e-05, "loss": 2.4409, "step": 1952500 }, { "epoch": 9.68, "learning_rate": 4.5163832780923785e-05, "loss": 2.4568, "step": 1953000 }, { "epoch": 9.68, "learning_rate": 4.51625941944977e-05, "loss": 2.4436, "step": 1953500 }, { "epoch": 9.68, "learning_rate": 4.516135560807162e-05, "loss": 2.4369, "step": 1954000 }, { "epoch": 9.68, "learning_rate": 4.5160117021645535e-05, "loss": 2.4344, "step": 1954500 }, { "epoch": 9.69, "learning_rate": 4.515887843521945e-05, "loss": 2.4531, "step": 1955000 }, { "epoch": 9.69, "learning_rate": 4.515764232596622e-05, "loss": 2.4491, "step": 1955500 }, { "epoch": 9.69, "learning_rate": 4.515640373954014e-05, "loss": 2.4332, "step": 1956000 }, { "epoch": 9.69, "learning_rate": 4.5155165153114055e-05, "loss": 2.4424, "step": 1956500 }, { "epoch": 9.7, "learning_rate": 4.515392656668797e-05, "loss": 2.4608, "step": 1957000 }, { "epoch": 9.7, "learning_rate": 4.515268798026189e-05, "loss": 2.444, "step": 1957500 }, { "epoch": 9.7, "learning_rate": 4.5151449393835806e-05, "loss": 2.4751, "step": 1958000 }, { "epoch": 9.7, "learning_rate": 4.5150213284582574e-05, "loss": 2.4599, "step": 1958500 }, { "epoch": 9.71, "learning_rate": 4.5148974698156485e-05, "loss": 2.4676, "step": 1959000 }, { "epoch": 9.71, "learning_rate": 4.51477361117304e-05, "loss": 2.4294, "step": 1959500 }, { "epoch": 9.71, "learning_rate": 4.514649752530432e-05, "loss": 2.4494, "step": 1960000 }, { "epoch": 9.71, "learning_rate": 4.5145263893223946e-05, "loss": 2.4253, "step": 1960500 }, { "epoch": 9.72, "learning_rate": 4.514402530679786e-05, "loss": 2.4498, "step": 1961000 }, { "epoch": 9.72, "learning_rate": 4.514278672037178e-05, "loss": 2.4453, "step": 1961500 }, { "epoch": 9.72, "learning_rate": 4.51415481339457e-05, "loss": 2.4619, "step": 1962000 }, { "epoch": 9.72, "learning_rate": 4.514030954751961e-05, "loss": 2.4357, "step": 1962500 }, { "epoch": 9.73, "learning_rate": 4.5139070961093524e-05, "loss": 2.4439, "step": 1963000 }, { "epoch": 9.73, "learning_rate": 4.513783485184029e-05, "loss": 2.4647, "step": 1963500 }, { "epoch": 9.73, "learning_rate": 4.513659626541421e-05, "loss": 2.4701, "step": 1964000 }, { "epoch": 9.73, "learning_rate": 4.5135357678988126e-05, "loss": 2.4486, "step": 1964500 }, { "epoch": 9.74, "learning_rate": 4.5134119092562043e-05, "loss": 2.4749, "step": 1965000 }, { "epoch": 9.74, "learning_rate": 4.513288050613596e-05, "loss": 2.4462, "step": 1965500 }, { "epoch": 9.74, "learning_rate": 4.513164191970988e-05, "loss": 2.452, "step": 1966000 }, { "epoch": 9.74, "learning_rate": 4.5130403333283794e-05, "loss": 2.4414, "step": 1966500 }, { "epoch": 9.75, "learning_rate": 4.512916474685771e-05, "loss": 2.4396, "step": 1967000 }, { "epoch": 9.75, "learning_rate": 4.512792616043163e-05, "loss": 2.4279, "step": 1967500 }, { "epoch": 9.75, "learning_rate": 4.512668757400554e-05, "loss": 2.4615, "step": 1968000 }, { "epoch": 9.75, "learning_rate": 4.5125451464752314e-05, "loss": 2.4715, "step": 1968500 }, { "epoch": 9.76, "learning_rate": 4.512421287832623e-05, "loss": 2.4397, "step": 1969000 }, { "epoch": 9.76, "learning_rate": 4.512297429190014e-05, "loss": 2.4726, "step": 1969500 }, { "epoch": 9.76, "learning_rate": 4.512173570547406e-05, "loss": 2.4296, "step": 1970000 }, { "epoch": 9.76, "learning_rate": 4.5120497119047975e-05, "loss": 2.4543, "step": 1970500 }, { "epoch": 9.77, "learning_rate": 4.511925853262189e-05, "loss": 2.4283, "step": 1971000 }, { "epoch": 9.77, "learning_rate": 4.511801994619581e-05, "loss": 2.4433, "step": 1971500 }, { "epoch": 9.77, "learning_rate": 4.511678135976972e-05, "loss": 2.431, "step": 1972000 }, { "epoch": 9.77, "learning_rate": 4.5115542773343636e-05, "loss": 2.4314, "step": 1972500 }, { "epoch": 9.77, "learning_rate": 4.511430666409041e-05, "loss": 2.4454, "step": 1973000 }, { "epoch": 9.78, "learning_rate": 4.511306807766433e-05, "loss": 2.4631, "step": 1973500 }, { "epoch": 9.78, "learning_rate": 4.5111829491238245e-05, "loss": 2.4612, "step": 1974000 }, { "epoch": 9.78, "learning_rate": 4.5110590904812155e-05, "loss": 2.4322, "step": 1974500 }, { "epoch": 9.78, "learning_rate": 4.510935231838607e-05, "loss": 2.4259, "step": 1975000 }, { "epoch": 9.79, "learning_rate": 4.510811620913285e-05, "loss": 2.4497, "step": 1975500 }, { "epoch": 9.79, "learning_rate": 4.510687762270676e-05, "loss": 2.4183, "step": 1976000 }, { "epoch": 9.79, "learning_rate": 4.5105639036280675e-05, "loss": 2.4897, "step": 1976500 }, { "epoch": 9.79, "learning_rate": 4.510440044985459e-05, "loss": 2.4587, "step": 1977000 }, { "epoch": 9.8, "learning_rate": 4.510316434060136e-05, "loss": 2.4401, "step": 1977500 }, { "epoch": 9.8, "learning_rate": 4.510192575417528e-05, "loss": 2.4357, "step": 1978000 }, { "epoch": 9.8, "learning_rate": 4.5100687167749194e-05, "loss": 2.4482, "step": 1978500 }, { "epoch": 9.8, "learning_rate": 4.509944858132311e-05, "loss": 2.4574, "step": 1979000 }, { "epoch": 9.81, "learning_rate": 4.509821247206988e-05, "loss": 2.4316, "step": 1979500 }, { "epoch": 9.81, "learning_rate": 4.50969738856438e-05, "loss": 2.4638, "step": 1980000 }, { "epoch": 9.81, "learning_rate": 4.5095735299217714e-05, "loss": 2.4208, "step": 1980500 }, { "epoch": 9.81, "learning_rate": 4.509449671279163e-05, "loss": 2.4633, "step": 1981000 }, { "epoch": 9.82, "learning_rate": 4.5093263080711245e-05, "loss": 2.4471, "step": 1981500 }, { "epoch": 9.82, "learning_rate": 4.509202697145802e-05, "loss": 2.4612, "step": 1982000 }, { "epoch": 9.82, "learning_rate": 4.509078838503194e-05, "loss": 2.4536, "step": 1982500 }, { "epoch": 9.82, "learning_rate": 4.50895522757787e-05, "loss": 2.4203, "step": 1983000 }, { "epoch": 9.83, "learning_rate": 4.5088313689352616e-05, "loss": 2.4689, "step": 1983500 }, { "epoch": 9.83, "learning_rate": 4.508707510292653e-05, "loss": 2.4635, "step": 1984000 }, { "epoch": 9.83, "learning_rate": 4.508583651650045e-05, "loss": 2.4427, "step": 1984500 }, { "epoch": 9.83, "learning_rate": 4.508459793007437e-05, "loss": 2.4488, "step": 1985000 }, { "epoch": 9.84, "learning_rate": 4.5083359343648284e-05, "loss": 2.4687, "step": 1985500 }, { "epoch": 9.84, "learning_rate": 4.50821207572222e-05, "loss": 2.4482, "step": 1986000 }, { "epoch": 9.84, "learning_rate": 4.508088217079612e-05, "loss": 2.4457, "step": 1986500 }, { "epoch": 9.84, "learning_rate": 4.5079643584370035e-05, "loss": 2.4492, "step": 1987000 }, { "epoch": 9.85, "learning_rate": 4.5078404997943945e-05, "loss": 2.468, "step": 1987500 }, { "epoch": 9.85, "learning_rate": 4.507716888869072e-05, "loss": 2.4591, "step": 1988000 }, { "epoch": 9.85, "learning_rate": 4.507593030226464e-05, "loss": 2.4664, "step": 1988500 }, { "epoch": 9.85, "learning_rate": 4.5074691715838554e-05, "loss": 2.4511, "step": 1989000 }, { "epoch": 9.86, "learning_rate": 4.507345312941247e-05, "loss": 2.4497, "step": 1989500 }, { "epoch": 9.86, "learning_rate": 4.507221454298639e-05, "loss": 2.4432, "step": 1990000 }, { "epoch": 9.86, "learning_rate": 4.50709759565603e-05, "loss": 2.4519, "step": 1990500 }, { "epoch": 9.86, "learning_rate": 4.5069737370134215e-05, "loss": 2.4516, "step": 1991000 }, { "epoch": 9.87, "learning_rate": 4.506849878370813e-05, "loss": 2.4417, "step": 1991500 }, { "epoch": 9.87, "learning_rate": 4.506726019728205e-05, "loss": 2.464, "step": 1992000 }, { "epoch": 9.87, "learning_rate": 4.5066021610855966e-05, "loss": 2.4332, "step": 1992500 }, { "epoch": 9.87, "learning_rate": 4.5064783024429876e-05, "loss": 2.4693, "step": 1993000 }, { "epoch": 9.88, "learning_rate": 4.506354443800379e-05, "loss": 2.4477, "step": 1993500 }, { "epoch": 9.88, "learning_rate": 4.506230585157771e-05, "loss": 2.4455, "step": 1994000 }, { "epoch": 9.88, "learning_rate": 4.506106974232448e-05, "loss": 2.4597, "step": 1994500 }, { "epoch": 9.88, "learning_rate": 4.5059833633071255e-05, "loss": 2.446, "step": 1995000 }, { "epoch": 9.89, "learning_rate": 4.505859504664517e-05, "loss": 2.4577, "step": 1995500 }, { "epoch": 9.89, "learning_rate": 4.505735646021909e-05, "loss": 2.4678, "step": 1996000 }, { "epoch": 9.89, "learning_rate": 4.5056117873793005e-05, "loss": 2.4562, "step": 1996500 }, { "epoch": 9.89, "learning_rate": 4.5054879287366916e-05, "loss": 2.4657, "step": 1997000 }, { "epoch": 9.9, "learning_rate": 4.505364070094083e-05, "loss": 2.433, "step": 1997500 }, { "epoch": 9.9, "learning_rate": 4.505240211451475e-05, "loss": 2.4243, "step": 1998000 }, { "epoch": 9.9, "learning_rate": 4.5051163528088666e-05, "loss": 2.4324, "step": 1998500 }, { "epoch": 9.9, "learning_rate": 4.504992494166258e-05, "loss": 2.4494, "step": 1999000 }, { "epoch": 9.91, "learning_rate": 4.50486863552365e-05, "loss": 2.4347, "step": 1999500 }, { "epoch": 9.91, "learning_rate": 4.504744776881041e-05, "loss": 2.4345, "step": 2000000 }, { "epoch": 9.91, "learning_rate": 4.504620918238433e-05, "loss": 2.4564, "step": 2000500 }, { "epoch": 9.91, "learning_rate": 4.5044973073131096e-05, "loss": 2.4482, "step": 2001000 }, { "epoch": 9.92, "learning_rate": 4.504373448670501e-05, "loss": 2.4592, "step": 2001500 }, { "epoch": 9.92, "learning_rate": 4.504249590027893e-05, "loss": 2.4584, "step": 2002000 }, { "epoch": 9.92, "learning_rate": 4.504125731385285e-05, "loss": 2.4937, "step": 2002500 }, { "epoch": 9.92, "learning_rate": 4.5040018727426764e-05, "loss": 2.4234, "step": 2003000 }, { "epoch": 9.93, "learning_rate": 4.503878014100068e-05, "loss": 2.4364, "step": 2003500 }, { "epoch": 9.93, "learning_rate": 4.50375415545746e-05, "loss": 2.4408, "step": 2004000 }, { "epoch": 9.93, "learning_rate": 4.5036302968148515e-05, "loss": 2.4403, "step": 2004500 }, { "epoch": 9.93, "learning_rate": 4.503506685889528e-05, "loss": 2.4583, "step": 2005000 }, { "epoch": 9.94, "learning_rate": 4.503383074964205e-05, "loss": 2.4657, "step": 2005500 }, { "epoch": 9.94, "learning_rate": 4.503259216321597e-05, "loss": 2.4634, "step": 2006000 }, { "epoch": 9.94, "learning_rate": 4.503135357678988e-05, "loss": 2.4743, "step": 2006500 }, { "epoch": 9.94, "learning_rate": 4.5030114990363796e-05, "loss": 2.4383, "step": 2007000 }, { "epoch": 9.95, "learning_rate": 4.502887640393771e-05, "loss": 2.4592, "step": 2007500 }, { "epoch": 9.95, "learning_rate": 4.502764029468449e-05, "loss": 2.458, "step": 2008000 }, { "epoch": 9.95, "learning_rate": 4.5026401708258406e-05, "loss": 2.4377, "step": 2008500 }, { "epoch": 9.95, "learning_rate": 4.502516312183232e-05, "loss": 2.4445, "step": 2009000 }, { "epoch": 9.96, "learning_rate": 4.502392453540623e-05, "loss": 2.4318, "step": 2009500 }, { "epoch": 9.96, "learning_rate": 4.502268594898015e-05, "loss": 2.4583, "step": 2010000 }, { "epoch": 9.96, "learning_rate": 4.502144983972692e-05, "loss": 2.4609, "step": 2010500 }, { "epoch": 9.96, "learning_rate": 4.5020211253300835e-05, "loss": 2.4503, "step": 2011000 }, { "epoch": 9.97, "learning_rate": 4.501897266687475e-05, "loss": 2.4607, "step": 2011500 }, { "epoch": 9.97, "learning_rate": 4.501773408044867e-05, "loss": 2.4503, "step": 2012000 }, { "epoch": 9.97, "learning_rate": 4.501649549402258e-05, "loss": 2.4397, "step": 2012500 }, { "epoch": 9.97, "learning_rate": 4.5015256907596496e-05, "loss": 2.4582, "step": 2013000 }, { "epoch": 9.98, "learning_rate": 4.501401832117041e-05, "loss": 2.438, "step": 2013500 }, { "epoch": 9.98, "learning_rate": 4.501277973474433e-05, "loss": 2.462, "step": 2014000 }, { "epoch": 9.98, "learning_rate": 4.5011543625491106e-05, "loss": 2.4676, "step": 2014500 }, { "epoch": 9.98, "learning_rate": 4.501030751623787e-05, "loss": 2.4411, "step": 2015000 }, { "epoch": 9.99, "learning_rate": 4.5009068929811785e-05, "loss": 2.4521, "step": 2015500 }, { "epoch": 9.99, "learning_rate": 4.50078303433857e-05, "loss": 2.4713, "step": 2016000 }, { "epoch": 9.99, "learning_rate": 4.500659175695962e-05, "loss": 2.432, "step": 2016500 }, { "epoch": 9.99, "learning_rate": 4.5005353170533535e-05, "loss": 2.4529, "step": 2017000 }, { "epoch": 10.0, "learning_rate": 4.500411458410745e-05, "loss": 2.4451, "step": 2017500 }, { "epoch": 10.0, "learning_rate": 4.500287847485422e-05, "loss": 2.4712, "step": 2018000 }, { "epoch": 10.0, "eval_accuracy": 0.6423499793751883, "eval_accuracy_mlm": 0.5954648160103292, "eval_accuracy_nsp": 0.8633937221278716, "eval_loss": 2.427269697189331, "eval_runtime": 146.0474, "eval_samples_per_second": 1745.728, "eval_steps_per_second": 72.743, "step": 2018430 }, { "epoch": 10.0, "learning_rate": 4.500164236560099e-05, "loss": 2.4274, "step": 2018500 }, { "epoch": 10.0, "learning_rate": 4.500040377917491e-05, "loss": 2.4295, "step": 2019000 }, { "epoch": 10.01, "learning_rate": 4.4999165192748824e-05, "loss": 2.3972, "step": 2019500 }, { "epoch": 10.01, "learning_rate": 4.499792660632274e-05, "loss": 2.4053, "step": 2020000 }, { "epoch": 10.01, "learning_rate": 4.499668801989666e-05, "loss": 2.4178, "step": 2020500 }, { "epoch": 10.01, "learning_rate": 4.499544943347057e-05, "loss": 2.4151, "step": 2021000 }, { "epoch": 10.02, "learning_rate": 4.4994210847044485e-05, "loss": 2.4157, "step": 2021500 }, { "epoch": 10.02, "learning_rate": 4.49929722606184e-05, "loss": 2.4145, "step": 2022000 }, { "epoch": 10.02, "learning_rate": 4.499173367419232e-05, "loss": 2.4341, "step": 2022500 }, { "epoch": 10.02, "learning_rate": 4.499050004211194e-05, "loss": 2.4384, "step": 2023000 }, { "epoch": 10.03, "learning_rate": 4.4989261455685856e-05, "loss": 2.4209, "step": 2023500 }, { "epoch": 10.03, "learning_rate": 4.498802286925977e-05, "loss": 2.4392, "step": 2024000 }, { "epoch": 10.03, "learning_rate": 4.498678428283369e-05, "loss": 2.4234, "step": 2024500 }, { "epoch": 10.03, "learning_rate": 4.498554569640761e-05, "loss": 2.412, "step": 2025000 }, { "epoch": 10.04, "learning_rate": 4.4984307109981524e-05, "loss": 2.4119, "step": 2025500 }, { "epoch": 10.04, "learning_rate": 4.498306852355544e-05, "loss": 2.4244, "step": 2026000 }, { "epoch": 10.04, "learning_rate": 4.498182993712936e-05, "loss": 2.4498, "step": 2026500 }, { "epoch": 10.04, "learning_rate": 4.4980591350703275e-05, "loss": 2.4358, "step": 2027000 }, { "epoch": 10.04, "learning_rate": 4.497935276427719e-05, "loss": 2.4245, "step": 2027500 }, { "epoch": 10.05, "learning_rate": 4.497811913219681e-05, "loss": 2.4284, "step": 2028000 }, { "epoch": 10.05, "learning_rate": 4.497688054577073e-05, "loss": 2.4131, "step": 2028500 }, { "epoch": 10.05, "learning_rate": 4.497564443651749e-05, "loss": 2.4187, "step": 2029000 }, { "epoch": 10.05, "learning_rate": 4.497440585009141e-05, "loss": 2.4189, "step": 2029500 }, { "epoch": 10.06, "learning_rate": 4.4973167263665325e-05, "loss": 2.4364, "step": 2030000 }, { "epoch": 10.06, "learning_rate": 4.497192867723924e-05, "loss": 2.4455, "step": 2030500 }, { "epoch": 10.06, "learning_rate": 4.497069009081316e-05, "loss": 2.4064, "step": 2031000 }, { "epoch": 10.06, "learning_rate": 4.4969451504387076e-05, "loss": 2.4481, "step": 2031500 }, { "epoch": 10.07, "learning_rate": 4.4968212917960986e-05, "loss": 2.4151, "step": 2032000 }, { "epoch": 10.07, "learning_rate": 4.49669743315349e-05, "loss": 2.4254, "step": 2032500 }, { "epoch": 10.07, "learning_rate": 4.496573574510882e-05, "loss": 2.428, "step": 2033000 }, { "epoch": 10.07, "learning_rate": 4.496449715868274e-05, "loss": 2.4263, "step": 2033500 }, { "epoch": 10.08, "learning_rate": 4.4963258572256654e-05, "loss": 2.4114, "step": 2034000 }, { "epoch": 10.08, "learning_rate": 4.496201998583057e-05, "loss": 2.4332, "step": 2034500 }, { "epoch": 10.08, "learning_rate": 4.496078139940449e-05, "loss": 2.4325, "step": 2035000 }, { "epoch": 10.08, "learning_rate": 4.4959545290151257e-05, "loss": 2.4009, "step": 2035500 }, { "epoch": 10.09, "learning_rate": 4.4958306703725173e-05, "loss": 2.435, "step": 2036000 }, { "epoch": 10.09, "learning_rate": 4.495706811729909e-05, "loss": 2.4103, "step": 2036500 }, { "epoch": 10.09, "learning_rate": 4.495582953087301e-05, "loss": 2.4359, "step": 2037000 }, { "epoch": 10.09, "learning_rate": 4.4954590944446924e-05, "loss": 2.4256, "step": 2037500 }, { "epoch": 10.1, "learning_rate": 4.495335235802084e-05, "loss": 2.4242, "step": 2038000 }, { "epoch": 10.1, "learning_rate": 4.495211377159476e-05, "loss": 2.4267, "step": 2038500 }, { "epoch": 10.1, "learning_rate": 4.4950875185168675e-05, "loss": 2.4368, "step": 2039000 }, { "epoch": 10.1, "learning_rate": 4.4949641553088296e-05, "loss": 2.4211, "step": 2039500 }, { "epoch": 10.11, "learning_rate": 4.494840296666221e-05, "loss": 2.4296, "step": 2040000 }, { "epoch": 10.11, "learning_rate": 4.494716438023613e-05, "loss": 2.4401, "step": 2040500 }, { "epoch": 10.11, "learning_rate": 4.4945925793810047e-05, "loss": 2.4441, "step": 2041000 }, { "epoch": 10.11, "learning_rate": 4.494468720738396e-05, "loss": 2.4434, "step": 2041500 }, { "epoch": 10.12, "learning_rate": 4.4943448620957874e-05, "loss": 2.4475, "step": 2042000 }, { "epoch": 10.12, "learning_rate": 4.494221003453179e-05, "loss": 2.4398, "step": 2042500 }, { "epoch": 10.12, "learning_rate": 4.494097144810571e-05, "loss": 2.4278, "step": 2043000 }, { "epoch": 10.12, "learning_rate": 4.4939732861679624e-05, "loss": 2.4079, "step": 2043500 }, { "epoch": 10.13, "learning_rate": 4.493849675242639e-05, "loss": 2.4734, "step": 2044000 }, { "epoch": 10.13, "learning_rate": 4.49372581660003e-05, "loss": 2.4516, "step": 2044500 }, { "epoch": 10.13, "learning_rate": 4.493601957957422e-05, "loss": 2.4456, "step": 2045000 }, { "epoch": 10.13, "learning_rate": 4.493478099314814e-05, "loss": 2.4319, "step": 2045500 }, { "epoch": 10.14, "learning_rate": 4.4933542406722054e-05, "loss": 2.4348, "step": 2046000 }, { "epoch": 10.14, "learning_rate": 4.493230382029597e-05, "loss": 2.4237, "step": 2046500 }, { "epoch": 10.14, "learning_rate": 4.493106771104275e-05, "loss": 2.4202, "step": 2047000 }, { "epoch": 10.14, "learning_rate": 4.492982912461666e-05, "loss": 2.421, "step": 2047500 }, { "epoch": 10.15, "learning_rate": 4.492859301536343e-05, "loss": 2.4291, "step": 2048000 }, { "epoch": 10.15, "learning_rate": 4.4927356906110194e-05, "loss": 2.422, "step": 2048500 }, { "epoch": 10.15, "learning_rate": 4.492611831968411e-05, "loss": 2.4306, "step": 2049000 }, { "epoch": 10.15, "learning_rate": 4.492487973325803e-05, "loss": 2.4154, "step": 2049500 }, { "epoch": 10.16, "learning_rate": 4.4923641146831945e-05, "loss": 2.4207, "step": 2050000 }, { "epoch": 10.16, "learning_rate": 4.492240256040586e-05, "loss": 2.4277, "step": 2050500 }, { "epoch": 10.16, "learning_rate": 4.492116397397978e-05, "loss": 2.4082, "step": 2051000 }, { "epoch": 10.16, "learning_rate": 4.4919925387553696e-05, "loss": 2.4284, "step": 2051500 }, { "epoch": 10.17, "learning_rate": 4.491868680112761e-05, "loss": 2.4324, "step": 2052000 }, { "epoch": 10.17, "learning_rate": 4.491744821470153e-05, "loss": 2.418, "step": 2052500 }, { "epoch": 10.17, "learning_rate": 4.491620962827545e-05, "loss": 2.4193, "step": 2053000 }, { "epoch": 10.17, "learning_rate": 4.4914971041849364e-05, "loss": 2.4341, "step": 2053500 }, { "epoch": 10.18, "learning_rate": 4.4913732455423274e-05, "loss": 2.4189, "step": 2054000 }, { "epoch": 10.18, "learning_rate": 4.491249634617005e-05, "loss": 2.4323, "step": 2054500 }, { "epoch": 10.18, "learning_rate": 4.4911257759743966e-05, "loss": 2.4587, "step": 2055000 }, { "epoch": 10.18, "learning_rate": 4.491002165049073e-05, "loss": 2.4278, "step": 2055500 }, { "epoch": 10.19, "learning_rate": 4.4908783064064645e-05, "loss": 2.4202, "step": 2056000 }, { "epoch": 10.19, "learning_rate": 4.490754447763856e-05, "loss": 2.4481, "step": 2056500 }, { "epoch": 10.19, "learning_rate": 4.490630589121248e-05, "loss": 2.4114, "step": 2057000 }, { "epoch": 10.19, "learning_rate": 4.490506978195925e-05, "loss": 2.4111, "step": 2057500 }, { "epoch": 10.2, "learning_rate": 4.4903831195533165e-05, "loss": 2.4391, "step": 2058000 }, { "epoch": 10.2, "learning_rate": 4.490259260910708e-05, "loss": 2.4327, "step": 2058500 }, { "epoch": 10.2, "learning_rate": 4.4901354022681e-05, "loss": 2.4279, "step": 2059000 }, { "epoch": 10.2, "learning_rate": 4.4900115436254916e-05, "loss": 2.4355, "step": 2059500 }, { "epoch": 10.21, "learning_rate": 4.4898881804174536e-05, "loss": 2.4252, "step": 2060000 }, { "epoch": 10.21, "learning_rate": 4.489764321774845e-05, "loss": 2.4196, "step": 2060500 }, { "epoch": 10.21, "learning_rate": 4.4896404631322363e-05, "loss": 2.4315, "step": 2061000 }, { "epoch": 10.21, "learning_rate": 4.489516604489628e-05, "loss": 2.4199, "step": 2061500 }, { "epoch": 10.22, "learning_rate": 4.48939274584702e-05, "loss": 2.4235, "step": 2062000 }, { "epoch": 10.22, "learning_rate": 4.4892688872044114e-05, "loss": 2.3984, "step": 2062500 }, { "epoch": 10.22, "learning_rate": 4.489145028561803e-05, "loss": 2.4339, "step": 2063000 }, { "epoch": 10.22, "learning_rate": 4.489021169919195e-05, "loss": 2.3881, "step": 2063500 }, { "epoch": 10.23, "learning_rate": 4.4888973112765865e-05, "loss": 2.4048, "step": 2064000 }, { "epoch": 10.23, "learning_rate": 4.488773452633978e-05, "loss": 2.426, "step": 2064500 }, { "epoch": 10.23, "learning_rate": 4.48864959399137e-05, "loss": 2.4169, "step": 2065000 }, { "epoch": 10.23, "learning_rate": 4.4885257353487616e-05, "loss": 2.4291, "step": 2065500 }, { "epoch": 10.24, "learning_rate": 4.488402124423438e-05, "loss": 2.422, "step": 2066000 }, { "epoch": 10.24, "learning_rate": 4.4882782657808295e-05, "loss": 2.4173, "step": 2066500 }, { "epoch": 10.24, "learning_rate": 4.488154407138221e-05, "loss": 2.415, "step": 2067000 }, { "epoch": 10.24, "learning_rate": 4.488030548495613e-05, "loss": 2.4058, "step": 2067500 }, { "epoch": 10.25, "learning_rate": 4.4879066898530046e-05, "loss": 2.4282, "step": 2068000 }, { "epoch": 10.25, "learning_rate": 4.4877830789276814e-05, "loss": 2.4527, "step": 2068500 }, { "epoch": 10.25, "learning_rate": 4.487659220285073e-05, "loss": 2.4305, "step": 2069000 }, { "epoch": 10.25, "learning_rate": 4.487535361642465e-05, "loss": 2.4422, "step": 2069500 }, { "epoch": 10.26, "learning_rate": 4.4874115029998565e-05, "loss": 2.4138, "step": 2070000 }, { "epoch": 10.26, "learning_rate": 4.4872878920745334e-05, "loss": 2.4326, "step": 2070500 }, { "epoch": 10.26, "learning_rate": 4.487164033431925e-05, "loss": 2.4561, "step": 2071000 }, { "epoch": 10.26, "learning_rate": 4.487040174789317e-05, "loss": 2.4219, "step": 2071500 }, { "epoch": 10.27, "learning_rate": 4.4869163161467085e-05, "loss": 2.4363, "step": 2072000 }, { "epoch": 10.27, "learning_rate": 4.4867924575040995e-05, "loss": 2.4182, "step": 2072500 }, { "epoch": 10.27, "learning_rate": 4.486668598861491e-05, "loss": 2.4498, "step": 2073000 }, { "epoch": 10.27, "learning_rate": 4.486544740218883e-05, "loss": 2.4262, "step": 2073500 }, { "epoch": 10.28, "learning_rate": 4.4864208815762746e-05, "loss": 2.4582, "step": 2074000 }, { "epoch": 10.28, "learning_rate": 4.4862972706509515e-05, "loss": 2.4353, "step": 2074500 }, { "epoch": 10.28, "learning_rate": 4.486173412008343e-05, "loss": 2.4298, "step": 2075000 }, { "epoch": 10.28, "learning_rate": 4.486049553365735e-05, "loss": 2.4233, "step": 2075500 }, { "epoch": 10.29, "learning_rate": 4.4859259424404124e-05, "loss": 2.4328, "step": 2076000 }, { "epoch": 10.29, "learning_rate": 4.4858023315150886e-05, "loss": 2.4591, "step": 2076500 }, { "epoch": 10.29, "learning_rate": 4.48567847287248e-05, "loss": 2.4568, "step": 2077000 }, { "epoch": 10.29, "learning_rate": 4.485554614229872e-05, "loss": 2.4302, "step": 2077500 }, { "epoch": 10.3, "learning_rate": 4.485430755587264e-05, "loss": 2.4473, "step": 2078000 }, { "epoch": 10.3, "learning_rate": 4.4853071446619406e-05, "loss": 2.4297, "step": 2078500 }, { "epoch": 10.3, "learning_rate": 4.4851835337366174e-05, "loss": 2.4359, "step": 2079000 }, { "epoch": 10.3, "learning_rate": 4.485059675094009e-05, "loss": 2.4481, "step": 2079500 }, { "epoch": 10.31, "learning_rate": 4.4849358164514e-05, "loss": 2.4354, "step": 2080000 }, { "epoch": 10.31, "learning_rate": 4.484811957808792e-05, "loss": 2.435, "step": 2080500 }, { "epoch": 10.31, "learning_rate": 4.484688346883469e-05, "loss": 2.4235, "step": 2081000 }, { "epoch": 10.31, "learning_rate": 4.4845644882408604e-05, "loss": 2.419, "step": 2081500 }, { "epoch": 10.31, "learning_rate": 4.484440629598252e-05, "loss": 2.4439, "step": 2082000 }, { "epoch": 10.32, "learning_rate": 4.484316770955644e-05, "loss": 2.423, "step": 2082500 }, { "epoch": 10.32, "learning_rate": 4.4841929123130355e-05, "loss": 2.4478, "step": 2083000 }, { "epoch": 10.32, "learning_rate": 4.484069053670427e-05, "loss": 2.4356, "step": 2083500 }, { "epoch": 10.32, "learning_rate": 4.483945195027819e-05, "loss": 2.4367, "step": 2084000 }, { "epoch": 10.33, "learning_rate": 4.4838213363852106e-05, "loss": 2.4409, "step": 2084500 }, { "epoch": 10.33, "learning_rate": 4.483697477742602e-05, "loss": 2.4238, "step": 2085000 }, { "epoch": 10.33, "learning_rate": 4.483573619099994e-05, "loss": 2.4366, "step": 2085500 }, { "epoch": 10.33, "learning_rate": 4.4834497604573856e-05, "loss": 2.4146, "step": 2086000 }, { "epoch": 10.34, "learning_rate": 4.4833259018147773e-05, "loss": 2.4158, "step": 2086500 }, { "epoch": 10.34, "learning_rate": 4.483202043172169e-05, "loss": 2.4511, "step": 2087000 }, { "epoch": 10.34, "learning_rate": 4.483078184529561e-05, "loss": 2.42, "step": 2087500 }, { "epoch": 10.34, "learning_rate": 4.4829543258869524e-05, "loss": 2.4163, "step": 2088000 }, { "epoch": 10.35, "learning_rate": 4.482830467244344e-05, "loss": 2.4279, "step": 2088500 }, { "epoch": 10.35, "learning_rate": 4.482706608601735e-05, "loss": 2.451, "step": 2089000 }, { "epoch": 10.35, "learning_rate": 4.482582749959127e-05, "loss": 2.4472, "step": 2089500 }, { "epoch": 10.35, "learning_rate": 4.4824588913165185e-05, "loss": 2.4138, "step": 2090000 }, { "epoch": 10.36, "learning_rate": 4.48233503267391e-05, "loss": 2.4263, "step": 2090500 }, { "epoch": 10.36, "learning_rate": 4.482211174031302e-05, "loss": 2.4066, "step": 2091000 }, { "epoch": 10.36, "learning_rate": 4.482087563105979e-05, "loss": 2.4424, "step": 2091500 }, { "epoch": 10.36, "learning_rate": 4.48196370446337e-05, "loss": 2.4485, "step": 2092000 }, { "epoch": 10.37, "learning_rate": 4.4818398458207615e-05, "loss": 2.4437, "step": 2092500 }, { "epoch": 10.37, "learning_rate": 4.481716234895439e-05, "loss": 2.4505, "step": 2093000 }, { "epoch": 10.37, "learning_rate": 4.481592376252831e-05, "loss": 2.4309, "step": 2093500 }, { "epoch": 10.37, "learning_rate": 4.4814685176102224e-05, "loss": 2.4408, "step": 2094000 }, { "epoch": 10.38, "learning_rate": 4.481344658967614e-05, "loss": 2.4208, "step": 2094500 }, { "epoch": 10.38, "learning_rate": 4.481220800325005e-05, "loss": 2.3977, "step": 2095000 }, { "epoch": 10.38, "learning_rate": 4.481097189399682e-05, "loss": 2.4398, "step": 2095500 }, { "epoch": 10.38, "learning_rate": 4.480973330757074e-05, "loss": 2.4252, "step": 2096000 }, { "epoch": 10.39, "learning_rate": 4.4808494721144654e-05, "loss": 2.4061, "step": 2096500 }, { "epoch": 10.39, "learning_rate": 4.480725613471857e-05, "loss": 2.4416, "step": 2097000 }, { "epoch": 10.39, "learning_rate": 4.480601754829249e-05, "loss": 2.4346, "step": 2097500 }, { "epoch": 10.39, "learning_rate": 4.4804778961866405e-05, "loss": 2.4364, "step": 2098000 }, { "epoch": 10.4, "learning_rate": 4.4803540375440315e-05, "loss": 2.4197, "step": 2098500 }, { "epoch": 10.4, "learning_rate": 4.480230178901423e-05, "loss": 2.423, "step": 2099000 }, { "epoch": 10.4, "learning_rate": 4.480106320258815e-05, "loss": 2.4381, "step": 2099500 }, { "epoch": 10.4, "learning_rate": 4.4799824616162066e-05, "loss": 2.4665, "step": 2100000 }, { "epoch": 10.41, "learning_rate": 4.479858602973598e-05, "loss": 2.4273, "step": 2100500 }, { "epoch": 10.41, "learning_rate": 4.479734992048276e-05, "loss": 2.4372, "step": 2101000 }, { "epoch": 10.41, "learning_rate": 4.479611133405667e-05, "loss": 2.429, "step": 2101500 }, { "epoch": 10.41, "learning_rate": 4.4794872747630585e-05, "loss": 2.4399, "step": 2102000 }, { "epoch": 10.42, "learning_rate": 4.47936341612045e-05, "loss": 2.4392, "step": 2102500 }, { "epoch": 10.42, "learning_rate": 4.479239805195127e-05, "loss": 2.4241, "step": 2103000 }, { "epoch": 10.42, "learning_rate": 4.479115946552519e-05, "loss": 2.4679, "step": 2103500 }, { "epoch": 10.42, "learning_rate": 4.4789920879099105e-05, "loss": 2.4386, "step": 2104000 }, { "epoch": 10.43, "learning_rate": 4.4788682292673015e-05, "loss": 2.4394, "step": 2104500 }, { "epoch": 10.43, "learning_rate": 4.478744370624693e-05, "loss": 2.4168, "step": 2105000 }, { "epoch": 10.43, "learning_rate": 4.478620511982085e-05, "loss": 2.4562, "step": 2105500 }, { "epoch": 10.43, "learning_rate": 4.4784969010567625e-05, "loss": 2.4321, "step": 2106000 }, { "epoch": 10.44, "learning_rate": 4.478373042414154e-05, "loss": 2.4339, "step": 2106500 }, { "epoch": 10.44, "learning_rate": 4.478249183771546e-05, "loss": 2.4364, "step": 2107000 }, { "epoch": 10.44, "learning_rate": 4.478125325128937e-05, "loss": 2.3967, "step": 2107500 }, { "epoch": 10.44, "learning_rate": 4.4780014664863286e-05, "loss": 2.4516, "step": 2108000 }, { "epoch": 10.45, "learning_rate": 4.47787760784372e-05, "loss": 2.4217, "step": 2108500 }, { "epoch": 10.45, "learning_rate": 4.477753749201112e-05, "loss": 2.418, "step": 2109000 }, { "epoch": 10.45, "learning_rate": 4.4776298905585036e-05, "loss": 2.4375, "step": 2109500 }, { "epoch": 10.45, "learning_rate": 4.477506031915895e-05, "loss": 2.4179, "step": 2110000 }, { "epoch": 10.46, "learning_rate": 4.477382173273287e-05, "loss": 2.4415, "step": 2110500 }, { "epoch": 10.46, "learning_rate": 4.477258562347963e-05, "loss": 2.4335, "step": 2111000 }, { "epoch": 10.46, "learning_rate": 4.477134703705355e-05, "loss": 2.4394, "step": 2111500 }, { "epoch": 10.46, "learning_rate": 4.4770108450627466e-05, "loss": 2.441, "step": 2112000 }, { "epoch": 10.47, "learning_rate": 4.476886986420138e-05, "loss": 2.4271, "step": 2112500 }, { "epoch": 10.47, "learning_rate": 4.47676312777753e-05, "loss": 2.4607, "step": 2113000 }, { "epoch": 10.47, "learning_rate": 4.476639269134922e-05, "loss": 2.4462, "step": 2113500 }, { "epoch": 10.47, "learning_rate": 4.4765154104923134e-05, "loss": 2.4493, "step": 2114000 }, { "epoch": 10.48, "learning_rate": 4.47639179956699e-05, "loss": 2.4344, "step": 2114500 }, { "epoch": 10.48, "learning_rate": 4.476267940924382e-05, "loss": 2.4534, "step": 2115000 }, { "epoch": 10.48, "learning_rate": 4.4761440822817736e-05, "loss": 2.4273, "step": 2115500 }, { "epoch": 10.48, "learning_rate": 4.476020223639165e-05, "loss": 2.4293, "step": 2116000 }, { "epoch": 10.49, "learning_rate": 4.475896612713842e-05, "loss": 2.4547, "step": 2116500 }, { "epoch": 10.49, "learning_rate": 4.475773001788519e-05, "loss": 2.4524, "step": 2117000 }, { "epoch": 10.49, "learning_rate": 4.475649143145911e-05, "loss": 2.4532, "step": 2117500 }, { "epoch": 10.49, "learning_rate": 4.4755252845033025e-05, "loss": 2.4419, "step": 2118000 }, { "epoch": 10.5, "learning_rate": 4.475401425860694e-05, "loss": 2.4467, "step": 2118500 }, { "epoch": 10.5, "learning_rate": 4.475277567218086e-05, "loss": 2.4355, "step": 2119000 }, { "epoch": 10.5, "learning_rate": 4.475153956292763e-05, "loss": 2.4465, "step": 2119500 }, { "epoch": 10.5, "learning_rate": 4.4750300976501544e-05, "loss": 2.4484, "step": 2120000 }, { "epoch": 10.51, "learning_rate": 4.474906239007546e-05, "loss": 2.4718, "step": 2120500 }, { "epoch": 10.51, "learning_rate": 4.474782380364937e-05, "loss": 2.4429, "step": 2121000 }, { "epoch": 10.51, "learning_rate": 4.474658521722329e-05, "loss": 2.461, "step": 2121500 }, { "epoch": 10.51, "learning_rate": 4.4745346630797205e-05, "loss": 2.4328, "step": 2122000 }, { "epoch": 10.52, "learning_rate": 4.474410804437112e-05, "loss": 2.457, "step": 2122500 }, { "epoch": 10.52, "learning_rate": 4.474286945794504e-05, "loss": 2.4409, "step": 2123000 }, { "epoch": 10.52, "learning_rate": 4.474163087151895e-05, "loss": 2.4528, "step": 2123500 }, { "epoch": 10.52, "learning_rate": 4.4740392285092866e-05, "loss": 2.4398, "step": 2124000 }, { "epoch": 10.53, "learning_rate": 4.473915617583964e-05, "loss": 2.419, "step": 2124500 }, { "epoch": 10.53, "learning_rate": 4.473791758941356e-05, "loss": 2.4351, "step": 2125000 }, { "epoch": 10.53, "learning_rate": 4.4736679002987476e-05, "loss": 2.4138, "step": 2125500 }, { "epoch": 10.53, "learning_rate": 4.473544041656139e-05, "loss": 2.4318, "step": 2126000 }, { "epoch": 10.54, "learning_rate": 4.47342018301353e-05, "loss": 2.4301, "step": 2126500 }, { "epoch": 10.54, "learning_rate": 4.473296324370922e-05, "loss": 2.4201, "step": 2127000 }, { "epoch": 10.54, "learning_rate": 4.473172465728314e-05, "loss": 2.439, "step": 2127500 }, { "epoch": 10.54, "learning_rate": 4.4730488548029905e-05, "loss": 2.4401, "step": 2128000 }, { "epoch": 10.55, "learning_rate": 4.472924996160382e-05, "loss": 2.4265, "step": 2128500 }, { "epoch": 10.55, "learning_rate": 4.472801137517774e-05, "loss": 2.4333, "step": 2129000 }, { "epoch": 10.55, "learning_rate": 4.4726772788751656e-05, "loss": 2.4448, "step": 2129500 }, { "epoch": 10.55, "learning_rate": 4.4725534202325566e-05, "loss": 2.4431, "step": 2130000 }, { "epoch": 10.56, "learning_rate": 4.472429561589948e-05, "loss": 2.4097, "step": 2130500 }, { "epoch": 10.56, "learning_rate": 4.47230570294734e-05, "loss": 2.4432, "step": 2131000 }, { "epoch": 10.56, "learning_rate": 4.472181844304732e-05, "loss": 2.4459, "step": 2131500 }, { "epoch": 10.56, "learning_rate": 4.472058233379409e-05, "loss": 2.4461, "step": 2132000 }, { "epoch": 10.57, "learning_rate": 4.4719343747368e-05, "loss": 2.4396, "step": 2132500 }, { "epoch": 10.57, "learning_rate": 4.471810763811478e-05, "loss": 2.4402, "step": 2133000 }, { "epoch": 10.57, "learning_rate": 4.4716869051688695e-05, "loss": 2.4444, "step": 2133500 }, { "epoch": 10.57, "learning_rate": 4.471563046526261e-05, "loss": 2.4261, "step": 2134000 }, { "epoch": 10.58, "learning_rate": 4.471439187883652e-05, "loss": 2.4395, "step": 2134500 }, { "epoch": 10.58, "learning_rate": 4.471315329241044e-05, "loss": 2.4222, "step": 2135000 }, { "epoch": 10.58, "learning_rate": 4.4711914705984356e-05, "loss": 2.4348, "step": 2135500 }, { "epoch": 10.58, "learning_rate": 4.4710678596731125e-05, "loss": 2.4261, "step": 2136000 }, { "epoch": 10.58, "learning_rate": 4.470944001030504e-05, "loss": 2.4462, "step": 2136500 }, { "epoch": 10.59, "learning_rate": 4.470820142387896e-05, "loss": 2.4554, "step": 2137000 }, { "epoch": 10.59, "learning_rate": 4.4706962837452876e-05, "loss": 2.4547, "step": 2137500 }, { "epoch": 10.59, "learning_rate": 4.470572425102679e-05, "loss": 2.4258, "step": 2138000 }, { "epoch": 10.59, "learning_rate": 4.470448566460071e-05, "loss": 2.4368, "step": 2138500 }, { "epoch": 10.6, "learning_rate": 4.470324707817462e-05, "loss": 2.4417, "step": 2139000 }, { "epoch": 10.6, "learning_rate": 4.470200849174854e-05, "loss": 2.4279, "step": 2139500 }, { "epoch": 10.6, "learning_rate": 4.470077238249531e-05, "loss": 2.4251, "step": 2140000 }, { "epoch": 10.6, "learning_rate": 4.469953379606923e-05, "loss": 2.4378, "step": 2140500 }, { "epoch": 10.61, "learning_rate": 4.469829520964314e-05, "loss": 2.4518, "step": 2141000 }, { "epoch": 10.61, "learning_rate": 4.4697056623217057e-05, "loss": 2.4638, "step": 2141500 }, { "epoch": 10.61, "learning_rate": 4.4695818036790973e-05, "loss": 2.4405, "step": 2142000 }, { "epoch": 10.61, "learning_rate": 4.469457945036489e-05, "loss": 2.444, "step": 2142500 }, { "epoch": 10.62, "learning_rate": 4.469334334111166e-05, "loss": 2.4427, "step": 2143000 }, { "epoch": 10.62, "learning_rate": 4.4692104754685576e-05, "loss": 2.4335, "step": 2143500 }, { "epoch": 10.62, "learning_rate": 4.469086616825949e-05, "loss": 2.4229, "step": 2144000 }, { "epoch": 10.62, "learning_rate": 4.468962758183341e-05, "loss": 2.4291, "step": 2144500 }, { "epoch": 10.63, "learning_rate": 4.468838899540732e-05, "loss": 2.4256, "step": 2145000 }, { "epoch": 10.63, "learning_rate": 4.4687152886154096e-05, "loss": 2.4089, "step": 2145500 }, { "epoch": 10.63, "learning_rate": 4.468591429972801e-05, "loss": 2.4351, "step": 2146000 }, { "epoch": 10.63, "learning_rate": 4.468467571330193e-05, "loss": 2.4628, "step": 2146500 }, { "epoch": 10.64, "learning_rate": 4.468343960404869e-05, "loss": 2.4254, "step": 2147000 }, { "epoch": 10.64, "learning_rate": 4.468220349479546e-05, "loss": 2.4318, "step": 2147500 }, { "epoch": 10.64, "learning_rate": 4.468096490836938e-05, "loss": 2.4339, "step": 2148000 }, { "epoch": 10.64, "learning_rate": 4.4679726321943294e-05, "loss": 2.4517, "step": 2148500 }, { "epoch": 10.65, "learning_rate": 4.467848773551721e-05, "loss": 2.4626, "step": 2149000 }, { "epoch": 10.65, "learning_rate": 4.467725162626398e-05, "loss": 2.4349, "step": 2149500 }, { "epoch": 10.65, "learning_rate": 4.467601551701075e-05, "loss": 2.4428, "step": 2150000 }, { "epoch": 10.65, "learning_rate": 4.4674776930584666e-05, "loss": 2.4328, "step": 2150500 }, { "epoch": 10.66, "learning_rate": 4.467353834415858e-05, "loss": 2.4354, "step": 2151000 }, { "epoch": 10.66, "learning_rate": 4.46722997577325e-05, "loss": 2.4413, "step": 2151500 }, { "epoch": 10.66, "learning_rate": 4.467106117130641e-05, "loss": 2.4063, "step": 2152000 }, { "epoch": 10.66, "learning_rate": 4.466982258488033e-05, "loss": 2.4428, "step": 2152500 }, { "epoch": 10.67, "learning_rate": 4.4668583998454244e-05, "loss": 2.4312, "step": 2153000 }, { "epoch": 10.67, "learning_rate": 4.466734541202816e-05, "loss": 2.4444, "step": 2153500 }, { "epoch": 10.67, "learning_rate": 4.466610682560208e-05, "loss": 2.4416, "step": 2154000 }, { "epoch": 10.67, "learning_rate": 4.4664868239175994e-05, "loss": 2.4138, "step": 2154500 }, { "epoch": 10.68, "learning_rate": 4.466362965274991e-05, "loss": 2.4313, "step": 2155000 }, { "epoch": 10.68, "learning_rate": 4.466239106632383e-05, "loss": 2.419, "step": 2155500 }, { "epoch": 10.68, "learning_rate": 4.4661152479897745e-05, "loss": 2.4566, "step": 2156000 }, { "epoch": 10.68, "learning_rate": 4.4659916370644514e-05, "loss": 2.4375, "step": 2156500 }, { "epoch": 10.69, "learning_rate": 4.465867778421843e-05, "loss": 2.4304, "step": 2157000 }, { "epoch": 10.69, "learning_rate": 4.465743919779235e-05, "loss": 2.4378, "step": 2157500 }, { "epoch": 10.69, "learning_rate": 4.465620061136626e-05, "loss": 2.4383, "step": 2158000 }, { "epoch": 10.69, "learning_rate": 4.4654962024940175e-05, "loss": 2.4265, "step": 2158500 }, { "epoch": 10.7, "learning_rate": 4.4653725915686944e-05, "loss": 2.4481, "step": 2159000 }, { "epoch": 10.7, "learning_rate": 4.465248732926086e-05, "loss": 2.4098, "step": 2159500 }, { "epoch": 10.7, "learning_rate": 4.4651251220007636e-05, "loss": 2.4331, "step": 2160000 }, { "epoch": 10.7, "learning_rate": 4.465001263358155e-05, "loss": 2.419, "step": 2160500 }, { "epoch": 10.71, "learning_rate": 4.464877404715547e-05, "loss": 2.43, "step": 2161000 }, { "epoch": 10.71, "learning_rate": 4.464753546072938e-05, "loss": 2.4379, "step": 2161500 }, { "epoch": 10.71, "learning_rate": 4.46462968743033e-05, "loss": 2.4677, "step": 2162000 }, { "epoch": 10.71, "learning_rate": 4.4645058287877214e-05, "loss": 2.431, "step": 2162500 }, { "epoch": 10.72, "learning_rate": 4.464381970145113e-05, "loss": 2.4455, "step": 2163000 }, { "epoch": 10.72, "learning_rate": 4.464258111502505e-05, "loss": 2.4502, "step": 2163500 }, { "epoch": 10.72, "learning_rate": 4.4641342528598965e-05, "loss": 2.4302, "step": 2164000 }, { "epoch": 10.72, "learning_rate": 4.464010641934573e-05, "loss": 2.4198, "step": 2164500 }, { "epoch": 10.73, "learning_rate": 4.4638867832919644e-05, "loss": 2.4332, "step": 2165000 }, { "epoch": 10.73, "learning_rate": 4.463762924649356e-05, "loss": 2.4356, "step": 2165500 }, { "epoch": 10.73, "learning_rate": 4.463639066006748e-05, "loss": 2.422, "step": 2166000 }, { "epoch": 10.73, "learning_rate": 4.4635152073641395e-05, "loss": 2.4193, "step": 2166500 }, { "epoch": 10.74, "learning_rate": 4.463391348721531e-05, "loss": 2.4501, "step": 2167000 }, { "epoch": 10.74, "learning_rate": 4.463267490078923e-05, "loss": 2.4621, "step": 2167500 }, { "epoch": 10.74, "learning_rate": 4.4631436314363145e-05, "loss": 2.4338, "step": 2168000 }, { "epoch": 10.74, "learning_rate": 4.4630200205109914e-05, "loss": 2.4167, "step": 2168500 }, { "epoch": 10.75, "learning_rate": 4.462896161868383e-05, "loss": 2.4113, "step": 2169000 }, { "epoch": 10.75, "learning_rate": 4.462772303225775e-05, "loss": 2.4667, "step": 2169500 }, { "epoch": 10.75, "learning_rate": 4.462648692300452e-05, "loss": 2.4525, "step": 2170000 }, { "epoch": 10.75, "learning_rate": 4.4625248336578434e-05, "loss": 2.4503, "step": 2170500 }, { "epoch": 10.76, "learning_rate": 4.4624009750152344e-05, "loss": 2.4539, "step": 2171000 }, { "epoch": 10.76, "learning_rate": 4.462277116372626e-05, "loss": 2.4368, "step": 2171500 }, { "epoch": 10.76, "learning_rate": 4.462153257730018e-05, "loss": 2.4427, "step": 2172000 }, { "epoch": 10.76, "learning_rate": 4.4620293990874095e-05, "loss": 2.4173, "step": 2172500 }, { "epoch": 10.77, "learning_rate": 4.461905788162087e-05, "loss": 2.4316, "step": 2173000 }, { "epoch": 10.77, "learning_rate": 4.461781929519479e-05, "loss": 2.4271, "step": 2173500 }, { "epoch": 10.77, "learning_rate": 4.461658318594155e-05, "loss": 2.4018, "step": 2174000 }, { "epoch": 10.77, "learning_rate": 4.4615344599515466e-05, "loss": 2.427, "step": 2174500 }, { "epoch": 10.78, "learning_rate": 4.461410601308938e-05, "loss": 2.4359, "step": 2175000 }, { "epoch": 10.78, "learning_rate": 4.46128674266633e-05, "loss": 2.4292, "step": 2175500 }, { "epoch": 10.78, "learning_rate": 4.461163131741007e-05, "loss": 2.4656, "step": 2176000 }, { "epoch": 10.78, "learning_rate": 4.4610392730983986e-05, "loss": 2.4482, "step": 2176500 }, { "epoch": 10.79, "learning_rate": 4.46091541445579e-05, "loss": 2.4331, "step": 2177000 }, { "epoch": 10.79, "learning_rate": 4.460791555813182e-05, "loss": 2.4638, "step": 2177500 }, { "epoch": 10.79, "learning_rate": 4.4606676971705737e-05, "loss": 2.4594, "step": 2178000 }, { "epoch": 10.79, "learning_rate": 4.4605438385279654e-05, "loss": 2.4464, "step": 2178500 }, { "epoch": 10.8, "learning_rate": 4.460419979885357e-05, "loss": 2.4558, "step": 2179000 }, { "epoch": 10.8, "learning_rate": 4.460296121242749e-05, "loss": 2.4394, "step": 2179500 }, { "epoch": 10.8, "learning_rate": 4.46017226260014e-05, "loss": 2.4307, "step": 2180000 }, { "epoch": 10.8, "learning_rate": 4.4600484039575314e-05, "loss": 2.4527, "step": 2180500 }, { "epoch": 10.81, "learning_rate": 4.459924545314923e-05, "loss": 2.4379, "step": 2181000 }, { "epoch": 10.81, "learning_rate": 4.459800686672315e-05, "loss": 2.4372, "step": 2181500 }, { "epoch": 10.81, "learning_rate": 4.4596768280297065e-05, "loss": 2.4373, "step": 2182000 }, { "epoch": 10.81, "learning_rate": 4.4595532171043834e-05, "loss": 2.4486, "step": 2182500 }, { "epoch": 10.82, "learning_rate": 4.459429358461775e-05, "loss": 2.4385, "step": 2183000 }, { "epoch": 10.82, "learning_rate": 4.459305747536452e-05, "loss": 2.4328, "step": 2183500 }, { "epoch": 10.82, "learning_rate": 4.459181888893844e-05, "loss": 2.4374, "step": 2184000 }, { "epoch": 10.82, "learning_rate": 4.4590580302512354e-05, "loss": 2.4451, "step": 2184500 }, { "epoch": 10.83, "learning_rate": 4.458934171608627e-05, "loss": 2.4589, "step": 2185000 }, { "epoch": 10.83, "learning_rate": 4.458810312966019e-05, "loss": 2.451, "step": 2185500 }, { "epoch": 10.83, "learning_rate": 4.458686702040695e-05, "loss": 2.418, "step": 2186000 }, { "epoch": 10.83, "learning_rate": 4.4585628433980866e-05, "loss": 2.4478, "step": 2186500 }, { "epoch": 10.84, "learning_rate": 4.4584389847554783e-05, "loss": 2.4271, "step": 2187000 }, { "epoch": 10.84, "learning_rate": 4.45831512611287e-05, "loss": 2.4425, "step": 2187500 }, { "epoch": 10.84, "learning_rate": 4.458191267470262e-05, "loss": 2.452, "step": 2188000 }, { "epoch": 10.84, "learning_rate": 4.4580674088276534e-05, "loss": 2.4415, "step": 2188500 }, { "epoch": 10.85, "learning_rate": 4.457943550185045e-05, "loss": 2.4458, "step": 2189000 }, { "epoch": 10.85, "learning_rate": 4.457819691542436e-05, "loss": 2.4406, "step": 2189500 }, { "epoch": 10.85, "learning_rate": 4.457696080617114e-05, "loss": 2.4222, "step": 2190000 }, { "epoch": 10.85, "learning_rate": 4.4575722219745054e-05, "loss": 2.458, "step": 2190500 }, { "epoch": 10.85, "learning_rate": 4.457448363331897e-05, "loss": 2.4399, "step": 2191000 }, { "epoch": 10.86, "learning_rate": 4.457324504689289e-05, "loss": 2.4346, "step": 2191500 }, { "epoch": 10.86, "learning_rate": 4.4572008937639656e-05, "loss": 2.4664, "step": 2192000 }, { "epoch": 10.86, "learning_rate": 4.4570770351213567e-05, "loss": 2.4396, "step": 2192500 }, { "epoch": 10.86, "learning_rate": 4.4569531764787484e-05, "loss": 2.4306, "step": 2193000 }, { "epoch": 10.87, "learning_rate": 4.45682931783614e-05, "loss": 2.4317, "step": 2193500 }, { "epoch": 10.87, "learning_rate": 4.456705459193532e-05, "loss": 2.4156, "step": 2194000 }, { "epoch": 10.87, "learning_rate": 4.4565816005509234e-05, "loss": 2.444, "step": 2194500 }, { "epoch": 10.87, "learning_rate": 4.456457741908315e-05, "loss": 2.4486, "step": 2195000 }, { "epoch": 10.88, "learning_rate": 4.456333883265707e-05, "loss": 2.4602, "step": 2195500 }, { "epoch": 10.88, "learning_rate": 4.456210024623098e-05, "loss": 2.4304, "step": 2196000 }, { "epoch": 10.88, "learning_rate": 4.4560861659804895e-05, "loss": 2.4429, "step": 2196500 }, { "epoch": 10.88, "learning_rate": 4.455962307337881e-05, "loss": 2.4199, "step": 2197000 }, { "epoch": 10.89, "learning_rate": 4.455838448695273e-05, "loss": 2.4348, "step": 2197500 }, { "epoch": 10.89, "learning_rate": 4.4557145900526646e-05, "loss": 2.4346, "step": 2198000 }, { "epoch": 10.89, "learning_rate": 4.455590979127342e-05, "loss": 2.4507, "step": 2198500 }, { "epoch": 10.89, "learning_rate": 4.455467120484733e-05, "loss": 2.443, "step": 2199000 }, { "epoch": 10.9, "learning_rate": 4.455343261842125e-05, "loss": 2.4219, "step": 2199500 }, { "epoch": 10.9, "learning_rate": 4.455219650916802e-05, "loss": 2.4231, "step": 2200000 }, { "epoch": 10.9, "learning_rate": 4.4550957922741934e-05, "loss": 2.4443, "step": 2200500 }, { "epoch": 10.9, "learning_rate": 4.454971933631585e-05, "loss": 2.4357, "step": 2201000 }, { "epoch": 10.91, "learning_rate": 4.454848570423547e-05, "loss": 2.4494, "step": 2201500 }, { "epoch": 10.91, "learning_rate": 4.454724711780939e-05, "loss": 2.4197, "step": 2202000 }, { "epoch": 10.91, "learning_rate": 4.4546008531383306e-05, "loss": 2.4512, "step": 2202500 }, { "epoch": 10.91, "learning_rate": 4.454476994495722e-05, "loss": 2.445, "step": 2203000 }, { "epoch": 10.92, "learning_rate": 4.454353135853114e-05, "loss": 2.449, "step": 2203500 }, { "epoch": 10.92, "learning_rate": 4.45422952492779e-05, "loss": 2.4383, "step": 2204000 }, { "epoch": 10.92, "learning_rate": 4.454105666285182e-05, "loss": 2.4595, "step": 2204500 }, { "epoch": 10.92, "learning_rate": 4.4539818076425736e-05, "loss": 2.4192, "step": 2205000 }, { "epoch": 10.93, "learning_rate": 4.453857948999965e-05, "loss": 2.4381, "step": 2205500 }, { "epoch": 10.93, "learning_rate": 4.453734090357357e-05, "loss": 2.429, "step": 2206000 }, { "epoch": 10.93, "learning_rate": 4.453610479432034e-05, "loss": 2.4534, "step": 2206500 }, { "epoch": 10.93, "learning_rate": 4.4534866207894255e-05, "loss": 2.4423, "step": 2207000 }, { "epoch": 10.94, "learning_rate": 4.453362762146817e-05, "loss": 2.4272, "step": 2207500 }, { "epoch": 10.94, "learning_rate": 4.453239151221494e-05, "loss": 2.4552, "step": 2208000 }, { "epoch": 10.94, "learning_rate": 4.453115292578886e-05, "loss": 2.4172, "step": 2208500 }, { "epoch": 10.94, "learning_rate": 4.4529914339362775e-05, "loss": 2.4602, "step": 2209000 }, { "epoch": 10.95, "learning_rate": 4.4528675752936685e-05, "loss": 2.4529, "step": 2209500 }, { "epoch": 10.95, "learning_rate": 4.45274371665106e-05, "loss": 2.4541, "step": 2210000 }, { "epoch": 10.95, "learning_rate": 4.452619858008452e-05, "loss": 2.4447, "step": 2210500 }, { "epoch": 10.95, "learning_rate": 4.4524959993658436e-05, "loss": 2.4507, "step": 2211000 }, { "epoch": 10.96, "learning_rate": 4.452372140723235e-05, "loss": 2.4288, "step": 2211500 }, { "epoch": 10.96, "learning_rate": 4.452248282080627e-05, "loss": 2.4157, "step": 2212000 }, { "epoch": 10.96, "learning_rate": 4.4521244234380187e-05, "loss": 2.4446, "step": 2212500 }, { "epoch": 10.96, "learning_rate": 4.4520005647954103e-05, "loss": 2.4222, "step": 2213000 }, { "epoch": 10.97, "learning_rate": 4.451876953870087e-05, "loss": 2.4498, "step": 2213500 }, { "epoch": 10.97, "learning_rate": 4.451753095227479e-05, "loss": 2.4253, "step": 2214000 }, { "epoch": 10.97, "learning_rate": 4.4516292365848706e-05, "loss": 2.4469, "step": 2214500 }, { "epoch": 10.97, "learning_rate": 4.451505377942262e-05, "loss": 2.4336, "step": 2215000 }, { "epoch": 10.98, "learning_rate": 4.451381519299654e-05, "loss": 2.4368, "step": 2215500 }, { "epoch": 10.98, "learning_rate": 4.451257908374331e-05, "loss": 2.4251, "step": 2216000 }, { "epoch": 10.98, "learning_rate": 4.451134297449008e-05, "loss": 2.4311, "step": 2216500 }, { "epoch": 10.98, "learning_rate": 4.4510104388063995e-05, "loss": 2.4489, "step": 2217000 }, { "epoch": 10.99, "learning_rate": 4.450886580163791e-05, "loss": 2.4204, "step": 2217500 }, { "epoch": 10.99, "learning_rate": 4.450762721521183e-05, "loss": 2.4315, "step": 2218000 }, { "epoch": 10.99, "learning_rate": 4.450638862878574e-05, "loss": 2.4432, "step": 2218500 }, { "epoch": 10.99, "learning_rate": 4.4505150042359655e-05, "loss": 2.4581, "step": 2219000 }, { "epoch": 11.0, "learning_rate": 4.450391145593357e-05, "loss": 2.4339, "step": 2219500 }, { "epoch": 11.0, "learning_rate": 4.450267286950749e-05, "loss": 2.4147, "step": 2220000 }, { "epoch": 11.0, "eval_accuracy": 0.6435940740781358, "eval_accuracy_mlm": 0.596974884280312, "eval_accuracy_nsp": 0.863742797861617, "eval_loss": 2.406216859817505, "eval_runtime": 146.12, "eval_samples_per_second": 1744.86, "eval_steps_per_second": 72.707, "step": 2220273 }, { "epoch": 11.0, "learning_rate": 4.4501434283081406e-05, "loss": 2.4115, "step": 2220500 }, { "epoch": 11.0, "learning_rate": 4.450019569665532e-05, "loss": 2.3987, "step": 2221000 }, { "epoch": 11.01, "learning_rate": 4.449895958740209e-05, "loss": 2.392, "step": 2221500 }, { "epoch": 11.01, "learning_rate": 4.449772100097601e-05, "loss": 2.3835, "step": 2222000 }, { "epoch": 11.01, "learning_rate": 4.4496482414549926e-05, "loss": 2.4125, "step": 2222500 }, { "epoch": 11.01, "learning_rate": 4.4495243828123836e-05, "loss": 2.4218, "step": 2223000 }, { "epoch": 11.02, "learning_rate": 4.449400524169775e-05, "loss": 2.4222, "step": 2223500 }, { "epoch": 11.02, "learning_rate": 4.449276665527167e-05, "loss": 2.3963, "step": 2224000 }, { "epoch": 11.02, "learning_rate": 4.449152806884559e-05, "loss": 2.4003, "step": 2224500 }, { "epoch": 11.02, "learning_rate": 4.4490289482419504e-05, "loss": 2.4131, "step": 2225000 }, { "epoch": 11.03, "learning_rate": 4.448905089599342e-05, "loss": 2.426, "step": 2225500 }, { "epoch": 11.03, "learning_rate": 4.448781230956734e-05, "loss": 2.4027, "step": 2226000 }, { "epoch": 11.03, "learning_rate": 4.4486573723141255e-05, "loss": 2.4331, "step": 2226500 }, { "epoch": 11.03, "learning_rate": 4.448533761388802e-05, "loss": 2.4009, "step": 2227000 }, { "epoch": 11.04, "learning_rate": 4.448409902746194e-05, "loss": 2.4009, "step": 2227500 }, { "epoch": 11.04, "learning_rate": 4.448286044103586e-05, "loss": 2.4034, "step": 2228000 }, { "epoch": 11.04, "learning_rate": 4.4481621854609774e-05, "loss": 2.4055, "step": 2228500 }, { "epoch": 11.04, "learning_rate": 4.448038326818369e-05, "loss": 2.3944, "step": 2229000 }, { "epoch": 11.05, "learning_rate": 4.447914468175761e-05, "loss": 2.4153, "step": 2229500 }, { "epoch": 11.05, "learning_rate": 4.4477906095331525e-05, "loss": 2.4075, "step": 2230000 }, { "epoch": 11.05, "learning_rate": 4.447666750890544e-05, "loss": 2.4052, "step": 2230500 }, { "epoch": 11.05, "learning_rate": 4.447542892247936e-05, "loss": 2.4021, "step": 2231000 }, { "epoch": 11.06, "learning_rate": 4.447419281322612e-05, "loss": 2.3982, "step": 2231500 }, { "epoch": 11.06, "learning_rate": 4.447295422680004e-05, "loss": 2.4161, "step": 2232000 }, { "epoch": 11.06, "learning_rate": 4.4471715640373955e-05, "loss": 2.4319, "step": 2232500 }, { "epoch": 11.06, "learning_rate": 4.447047705394787e-05, "loss": 2.4021, "step": 2233000 }, { "epoch": 11.07, "learning_rate": 4.446923846752179e-05, "loss": 2.4029, "step": 2233500 }, { "epoch": 11.07, "learning_rate": 4.4467999881095705e-05, "loss": 2.4136, "step": 2234000 }, { "epoch": 11.07, "learning_rate": 4.446676129466962e-05, "loss": 2.4027, "step": 2234500 }, { "epoch": 11.07, "learning_rate": 4.446552518541639e-05, "loss": 2.4079, "step": 2235000 }, { "epoch": 11.08, "learning_rate": 4.446428659899031e-05, "loss": 2.4259, "step": 2235500 }, { "epoch": 11.08, "learning_rate": 4.4463048012564225e-05, "loss": 2.4226, "step": 2236000 }, { "epoch": 11.08, "learning_rate": 4.446180942613814e-05, "loss": 2.4198, "step": 2236500 }, { "epoch": 11.08, "learning_rate": 4.446057083971206e-05, "loss": 2.3826, "step": 2237000 }, { "epoch": 11.09, "learning_rate": 4.4459332253285976e-05, "loss": 2.4163, "step": 2237500 }, { "epoch": 11.09, "learning_rate": 4.445809366685989e-05, "loss": 2.403, "step": 2238000 }, { "epoch": 11.09, "learning_rate": 4.445685508043381e-05, "loss": 2.4073, "step": 2238500 }, { "epoch": 11.09, "learning_rate": 4.445561897118057e-05, "loss": 2.4129, "step": 2239000 }, { "epoch": 11.1, "learning_rate": 4.445438038475449e-05, "loss": 2.4009, "step": 2239500 }, { "epoch": 11.1, "learning_rate": 4.4453141798328406e-05, "loss": 2.4227, "step": 2240000 }, { "epoch": 11.1, "learning_rate": 4.445190321190232e-05, "loss": 2.4006, "step": 2240500 }, { "epoch": 11.1, "learning_rate": 4.445066710264909e-05, "loss": 2.3994, "step": 2241000 }, { "epoch": 11.11, "learning_rate": 4.444942851622301e-05, "loss": 2.4206, "step": 2241500 }, { "epoch": 11.11, "learning_rate": 4.4448189929796925e-05, "loss": 2.4191, "step": 2242000 }, { "epoch": 11.11, "learning_rate": 4.4446953820543694e-05, "loss": 2.3991, "step": 2242500 }, { "epoch": 11.11, "learning_rate": 4.444571523411761e-05, "loss": 2.4203, "step": 2243000 }, { "epoch": 11.12, "learning_rate": 4.444447664769152e-05, "loss": 2.4075, "step": 2243500 }, { "epoch": 11.12, "learning_rate": 4.444323806126544e-05, "loss": 2.4173, "step": 2244000 }, { "epoch": 11.12, "learning_rate": 4.4441999474839355e-05, "loss": 2.4242, "step": 2244500 }, { "epoch": 11.12, "learning_rate": 4.444076088841327e-05, "loss": 2.4012, "step": 2245000 }, { "epoch": 11.12, "learning_rate": 4.443952230198719e-05, "loss": 2.4417, "step": 2245500 }, { "epoch": 11.13, "learning_rate": 4.4438283715561106e-05, "loss": 2.3909, "step": 2246000 }, { "epoch": 11.13, "learning_rate": 4.443704512913502e-05, "loss": 2.4142, "step": 2246500 }, { "epoch": 11.13, "learning_rate": 4.443580901988179e-05, "loss": 2.4145, "step": 2247000 }, { "epoch": 11.13, "learning_rate": 4.443457043345571e-05, "loss": 2.4039, "step": 2247500 }, { "epoch": 11.14, "learning_rate": 4.4433331847029625e-05, "loss": 2.4166, "step": 2248000 }, { "epoch": 11.14, "learning_rate": 4.443209326060354e-05, "loss": 2.4373, "step": 2248500 }, { "epoch": 11.14, "learning_rate": 4.443085467417746e-05, "loss": 2.3937, "step": 2249000 }, { "epoch": 11.14, "learning_rate": 4.4429616087751376e-05, "loss": 2.4121, "step": 2249500 }, { "epoch": 11.15, "learning_rate": 4.442837997849814e-05, "loss": 2.4216, "step": 2250000 }, { "epoch": 11.15, "learning_rate": 4.4427141392072055e-05, "loss": 2.4062, "step": 2250500 }, { "epoch": 11.15, "learning_rate": 4.442590280564597e-05, "loss": 2.3873, "step": 2251000 }, { "epoch": 11.15, "learning_rate": 4.442466421921989e-05, "loss": 2.42, "step": 2251500 }, { "epoch": 11.16, "learning_rate": 4.4423425632793806e-05, "loss": 2.3876, "step": 2252000 }, { "epoch": 11.16, "learning_rate": 4.442218704636772e-05, "loss": 2.4366, "step": 2252500 }, { "epoch": 11.16, "learning_rate": 4.442095093711449e-05, "loss": 2.4273, "step": 2253000 }, { "epoch": 11.16, "learning_rate": 4.441971235068841e-05, "loss": 2.4109, "step": 2253500 }, { "epoch": 11.17, "learning_rate": 4.4418473764262325e-05, "loss": 2.4171, "step": 2254000 }, { "epoch": 11.17, "learning_rate": 4.441723517783624e-05, "loss": 2.4281, "step": 2254500 }, { "epoch": 11.17, "learning_rate": 4.441599659141016e-05, "loss": 2.4044, "step": 2255000 }, { "epoch": 11.17, "learning_rate": 4.441476048215693e-05, "loss": 2.3969, "step": 2255500 }, { "epoch": 11.18, "learning_rate": 4.4413521895730845e-05, "loss": 2.4335, "step": 2256000 }, { "epoch": 11.18, "learning_rate": 4.441228330930476e-05, "loss": 2.4196, "step": 2256500 }, { "epoch": 11.18, "learning_rate": 4.441104472287867e-05, "loss": 2.4018, "step": 2257000 }, { "epoch": 11.18, "learning_rate": 4.440980613645259e-05, "loss": 2.4158, "step": 2257500 }, { "epoch": 11.19, "learning_rate": 4.4408567550026506e-05, "loss": 2.4362, "step": 2258000 }, { "epoch": 11.19, "learning_rate": 4.4407331440773275e-05, "loss": 2.4383, "step": 2258500 }, { "epoch": 11.19, "learning_rate": 4.440609285434719e-05, "loss": 2.4255, "step": 2259000 }, { "epoch": 11.19, "learning_rate": 4.440485426792111e-05, "loss": 2.4434, "step": 2259500 }, { "epoch": 11.2, "learning_rate": 4.4403615681495026e-05, "loss": 2.4004, "step": 2260000 }, { "epoch": 11.2, "learning_rate": 4.440237709506894e-05, "loss": 2.4139, "step": 2260500 }, { "epoch": 11.2, "learning_rate": 4.440113850864286e-05, "loss": 2.4226, "step": 2261000 }, { "epoch": 11.2, "learning_rate": 4.439990239938963e-05, "loss": 2.4301, "step": 2261500 }, { "epoch": 11.21, "learning_rate": 4.4398663812963545e-05, "loss": 2.4222, "step": 2262000 }, { "epoch": 11.21, "learning_rate": 4.439742522653746e-05, "loss": 2.4225, "step": 2262500 }, { "epoch": 11.21, "learning_rate": 4.439618664011138e-05, "loss": 2.4212, "step": 2263000 }, { "epoch": 11.21, "learning_rate": 4.439494805368529e-05, "loss": 2.4104, "step": 2263500 }, { "epoch": 11.22, "learning_rate": 4.439371194443206e-05, "loss": 2.4221, "step": 2264000 }, { "epoch": 11.22, "learning_rate": 4.4392473358005975e-05, "loss": 2.4218, "step": 2264500 }, { "epoch": 11.22, "learning_rate": 4.439123477157989e-05, "loss": 2.4348, "step": 2265000 }, { "epoch": 11.22, "learning_rate": 4.438999618515381e-05, "loss": 2.4062, "step": 2265500 }, { "epoch": 11.23, "learning_rate": 4.4388757598727726e-05, "loss": 2.4061, "step": 2266000 }, { "epoch": 11.23, "learning_rate": 4.438751901230164e-05, "loss": 2.4101, "step": 2266500 }, { "epoch": 11.23, "learning_rate": 4.438628290304841e-05, "loss": 2.4078, "step": 2267000 }, { "epoch": 11.23, "learning_rate": 4.438504431662233e-05, "loss": 2.4107, "step": 2267500 }, { "epoch": 11.24, "learning_rate": 4.4383805730196245e-05, "loss": 2.412, "step": 2268000 }, { "epoch": 11.24, "learning_rate": 4.438256714377016e-05, "loss": 2.4134, "step": 2268500 }, { "epoch": 11.24, "learning_rate": 4.4381331034516924e-05, "loss": 2.4223, "step": 2269000 }, { "epoch": 11.24, "learning_rate": 4.438009244809084e-05, "loss": 2.4328, "step": 2269500 }, { "epoch": 11.25, "learning_rate": 4.437885386166476e-05, "loss": 2.4224, "step": 2270000 }, { "epoch": 11.25, "learning_rate": 4.4377615275238675e-05, "loss": 2.4191, "step": 2270500 }, { "epoch": 11.25, "learning_rate": 4.437637916598545e-05, "loss": 2.4188, "step": 2271000 }, { "epoch": 11.25, "learning_rate": 4.437514057955936e-05, "loss": 2.4173, "step": 2271500 }, { "epoch": 11.26, "learning_rate": 4.437390199313328e-05, "loss": 2.3998, "step": 2272000 }, { "epoch": 11.26, "learning_rate": 4.4372663406707195e-05, "loss": 2.4385, "step": 2272500 }, { "epoch": 11.26, "learning_rate": 4.437142482028111e-05, "loss": 2.3898, "step": 2273000 }, { "epoch": 11.26, "learning_rate": 4.437018623385503e-05, "loss": 2.4457, "step": 2273500 }, { "epoch": 11.27, "learning_rate": 4.4368947647428945e-05, "loss": 2.4537, "step": 2274000 }, { "epoch": 11.27, "learning_rate": 4.436770906100286e-05, "loss": 2.4273, "step": 2274500 }, { "epoch": 11.27, "learning_rate": 4.436647047457678e-05, "loss": 2.4318, "step": 2275000 }, { "epoch": 11.27, "learning_rate": 4.4365231888150696e-05, "loss": 2.4194, "step": 2275500 }, { "epoch": 11.28, "learning_rate": 4.436399330172461e-05, "loss": 2.4104, "step": 2276000 }, { "epoch": 11.28, "learning_rate": 4.4362757192471375e-05, "loss": 2.4375, "step": 2276500 }, { "epoch": 11.28, "learning_rate": 4.436152108321815e-05, "loss": 2.4367, "step": 2277000 }, { "epoch": 11.28, "learning_rate": 4.436028249679207e-05, "loss": 2.4193, "step": 2277500 }, { "epoch": 11.29, "learning_rate": 4.435904638753883e-05, "loss": 2.4272, "step": 2278000 }, { "epoch": 11.29, "learning_rate": 4.4357807801112747e-05, "loss": 2.441, "step": 2278500 }, { "epoch": 11.29, "learning_rate": 4.4356569214686664e-05, "loss": 2.4035, "step": 2279000 }, { "epoch": 11.29, "learning_rate": 4.435533062826058e-05, "loss": 2.4012, "step": 2279500 }, { "epoch": 11.3, "learning_rate": 4.43540920418345e-05, "loss": 2.4267, "step": 2280000 }, { "epoch": 11.3, "learning_rate": 4.4352853455408414e-05, "loss": 2.4075, "step": 2280500 }, { "epoch": 11.3, "learning_rate": 4.4351614868982324e-05, "loss": 2.4112, "step": 2281000 }, { "epoch": 11.3, "learning_rate": 4.435037628255624e-05, "loss": 2.4298, "step": 2281500 }, { "epoch": 11.31, "learning_rate": 4.434913769613016e-05, "loss": 2.4208, "step": 2282000 }, { "epoch": 11.31, "learning_rate": 4.4347899109704075e-05, "loss": 2.4449, "step": 2282500 }, { "epoch": 11.31, "learning_rate": 4.434666052327799e-05, "loss": 2.4218, "step": 2283000 }, { "epoch": 11.31, "learning_rate": 4.434542193685191e-05, "loss": 2.4302, "step": 2283500 }, { "epoch": 11.32, "learning_rate": 4.4344183350425826e-05, "loss": 2.4257, "step": 2284000 }, { "epoch": 11.32, "learning_rate": 4.4342947241172595e-05, "loss": 2.4383, "step": 2284500 }, { "epoch": 11.32, "learning_rate": 4.434170865474651e-05, "loss": 2.3936, "step": 2285000 }, { "epoch": 11.32, "learning_rate": 4.434047254549328e-05, "loss": 2.4184, "step": 2285500 }, { "epoch": 11.33, "learning_rate": 4.43392339590672e-05, "loss": 2.413, "step": 2286000 }, { "epoch": 11.33, "learning_rate": 4.4337995372641114e-05, "loss": 2.4455, "step": 2286500 }, { "epoch": 11.33, "learning_rate": 4.4336756786215025e-05, "loss": 2.4197, "step": 2287000 }, { "epoch": 11.33, "learning_rate": 4.433551819978894e-05, "loss": 2.4045, "step": 2287500 }, { "epoch": 11.34, "learning_rate": 4.433427961336286e-05, "loss": 2.4081, "step": 2288000 }, { "epoch": 11.34, "learning_rate": 4.4333041026936775e-05, "loss": 2.4123, "step": 2288500 }, { "epoch": 11.34, "learning_rate": 4.433180244051069e-05, "loss": 2.4212, "step": 2289000 }, { "epoch": 11.34, "learning_rate": 4.433056633125747e-05, "loss": 2.4364, "step": 2289500 }, { "epoch": 11.35, "learning_rate": 4.4329327744831385e-05, "loss": 2.4244, "step": 2290000 }, { "epoch": 11.35, "learning_rate": 4.4328091635578154e-05, "loss": 2.439, "step": 2290500 }, { "epoch": 11.35, "learning_rate": 4.432685304915207e-05, "loss": 2.4081, "step": 2291000 }, { "epoch": 11.35, "learning_rate": 4.432561446272598e-05, "loss": 2.4084, "step": 2291500 }, { "epoch": 11.36, "learning_rate": 4.43243758762999e-05, "loss": 2.4155, "step": 2292000 }, { "epoch": 11.36, "learning_rate": 4.4323137289873815e-05, "loss": 2.4206, "step": 2292500 }, { "epoch": 11.36, "learning_rate": 4.432189870344773e-05, "loss": 2.4201, "step": 2293000 }, { "epoch": 11.36, "learning_rate": 4.43206625941945e-05, "loss": 2.4092, "step": 2293500 }, { "epoch": 11.37, "learning_rate": 4.431942400776842e-05, "loss": 2.4223, "step": 2294000 }, { "epoch": 11.37, "learning_rate": 4.4318185421342334e-05, "loss": 2.4183, "step": 2294500 }, { "epoch": 11.37, "learning_rate": 4.431694683491625e-05, "loss": 2.424, "step": 2295000 }, { "epoch": 11.37, "learning_rate": 4.431570824849017e-05, "loss": 2.4262, "step": 2295500 }, { "epoch": 11.38, "learning_rate": 4.4314469662064085e-05, "loss": 2.4002, "step": 2296000 }, { "epoch": 11.38, "learning_rate": 4.4313231075637995e-05, "loss": 2.4274, "step": 2296500 }, { "epoch": 11.38, "learning_rate": 4.431199496638477e-05, "loss": 2.4295, "step": 2297000 }, { "epoch": 11.38, "learning_rate": 4.431075637995869e-05, "loss": 2.4161, "step": 2297500 }, { "epoch": 11.39, "learning_rate": 4.4309517793532605e-05, "loss": 2.4199, "step": 2298000 }, { "epoch": 11.39, "learning_rate": 4.4308279207106515e-05, "loss": 2.4241, "step": 2298500 }, { "epoch": 11.39, "learning_rate": 4.430704062068043e-05, "loss": 2.4267, "step": 2299000 }, { "epoch": 11.39, "learning_rate": 4.430580203425435e-05, "loss": 2.4256, "step": 2299500 }, { "epoch": 11.39, "learning_rate": 4.4304563447828265e-05, "loss": 2.4038, "step": 2300000 }, { "epoch": 11.4, "learning_rate": 4.4303324861402176e-05, "loss": 2.419, "step": 2300500 }, { "epoch": 11.4, "learning_rate": 4.430208627497609e-05, "loss": 2.4068, "step": 2301000 }, { "epoch": 11.4, "learning_rate": 4.430085016572287e-05, "loss": 2.4023, "step": 2301500 }, { "epoch": 11.4, "learning_rate": 4.4299611579296785e-05, "loss": 2.4277, "step": 2302000 }, { "epoch": 11.41, "learning_rate": 4.42983729928707e-05, "loss": 2.4221, "step": 2302500 }, { "epoch": 11.41, "learning_rate": 4.429713440644461e-05, "loss": 2.429, "step": 2303000 }, { "epoch": 11.41, "learning_rate": 4.429589582001853e-05, "loss": 2.429, "step": 2303500 }, { "epoch": 11.41, "learning_rate": 4.4294657233592446e-05, "loss": 2.4134, "step": 2304000 }, { "epoch": 11.42, "learning_rate": 4.429341864716636e-05, "loss": 2.4422, "step": 2304500 }, { "epoch": 11.42, "learning_rate": 4.429218253791313e-05, "loss": 2.438, "step": 2305000 }, { "epoch": 11.42, "learning_rate": 4.429094395148705e-05, "loss": 2.4249, "step": 2305500 }, { "epoch": 11.42, "learning_rate": 4.428970784223382e-05, "loss": 2.4145, "step": 2306000 }, { "epoch": 11.43, "learning_rate": 4.4288469255807734e-05, "loss": 2.4202, "step": 2306500 }, { "epoch": 11.43, "learning_rate": 4.428723066938165e-05, "loss": 2.4222, "step": 2307000 }, { "epoch": 11.43, "learning_rate": 4.428599208295557e-05, "loss": 2.4162, "step": 2307500 }, { "epoch": 11.43, "learning_rate": 4.428475597370234e-05, "loss": 2.4277, "step": 2308000 }, { "epoch": 11.44, "learning_rate": 4.4283517387276254e-05, "loss": 2.4503, "step": 2308500 }, { "epoch": 11.44, "learning_rate": 4.4282281278023016e-05, "loss": 2.4174, "step": 2309000 }, { "epoch": 11.44, "learning_rate": 4.428104269159693e-05, "loss": 2.4125, "step": 2309500 }, { "epoch": 11.44, "learning_rate": 4.427980410517085e-05, "loss": 2.4267, "step": 2310000 }, { "epoch": 11.45, "learning_rate": 4.427856551874477e-05, "loss": 2.4294, "step": 2310500 }, { "epoch": 11.45, "learning_rate": 4.4277326932318684e-05, "loss": 2.4163, "step": 2311000 }, { "epoch": 11.45, "learning_rate": 4.42760883458926e-05, "loss": 2.406, "step": 2311500 }, { "epoch": 11.45, "learning_rate": 4.427484975946652e-05, "loss": 2.4035, "step": 2312000 }, { "epoch": 11.46, "learning_rate": 4.4273611173040435e-05, "loss": 2.4194, "step": 2312500 }, { "epoch": 11.46, "learning_rate": 4.42723750637872e-05, "loss": 2.4203, "step": 2313000 }, { "epoch": 11.46, "learning_rate": 4.427113647736112e-05, "loss": 2.4323, "step": 2313500 }, { "epoch": 11.46, "learning_rate": 4.426989789093504e-05, "loss": 2.4078, "step": 2314000 }, { "epoch": 11.47, "learning_rate": 4.4268659304508954e-05, "loss": 2.4321, "step": 2314500 }, { "epoch": 11.47, "learning_rate": 4.426742071808287e-05, "loss": 2.4087, "step": 2315000 }, { "epoch": 11.47, "learning_rate": 4.426618213165679e-05, "loss": 2.4277, "step": 2315500 }, { "epoch": 11.47, "learning_rate": 4.4264943545230705e-05, "loss": 2.41, "step": 2316000 }, { "epoch": 11.48, "learning_rate": 4.426370495880462e-05, "loss": 2.4286, "step": 2316500 }, { "epoch": 11.48, "learning_rate": 4.426246637237854e-05, "loss": 2.4222, "step": 2317000 }, { "epoch": 11.48, "learning_rate": 4.4261227785952456e-05, "loss": 2.4014, "step": 2317500 }, { "epoch": 11.48, "learning_rate": 4.425998919952637e-05, "loss": 2.4204, "step": 2318000 }, { "epoch": 11.49, "learning_rate": 4.425875061310028e-05, "loss": 2.3936, "step": 2318500 }, { "epoch": 11.49, "learning_rate": 4.42575120266742e-05, "loss": 2.4184, "step": 2319000 }, { "epoch": 11.49, "learning_rate": 4.425627591742097e-05, "loss": 2.4393, "step": 2319500 }, { "epoch": 11.49, "learning_rate": 4.4255037330994885e-05, "loss": 2.4238, "step": 2320000 }, { "epoch": 11.5, "learning_rate": 4.4253801221741654e-05, "loss": 2.4157, "step": 2320500 }, { "epoch": 11.5, "learning_rate": 4.425256263531557e-05, "loss": 2.4123, "step": 2321000 }, { "epoch": 11.5, "learning_rate": 4.425132404888949e-05, "loss": 2.4241, "step": 2321500 }, { "epoch": 11.5, "learning_rate": 4.425008793963625e-05, "loss": 2.4004, "step": 2322000 }, { "epoch": 11.51, "learning_rate": 4.424884935321017e-05, "loss": 2.4401, "step": 2322500 }, { "epoch": 11.51, "learning_rate": 4.4247610766784084e-05, "loss": 2.4317, "step": 2323000 }, { "epoch": 11.51, "learning_rate": 4.4246372180358e-05, "loss": 2.4115, "step": 2323500 }, { "epoch": 11.51, "learning_rate": 4.424513359393192e-05, "loss": 2.4012, "step": 2324000 }, { "epoch": 11.52, "learning_rate": 4.424389748467869e-05, "loss": 2.4115, "step": 2324500 }, { "epoch": 11.52, "learning_rate": 4.4242658898252604e-05, "loss": 2.392, "step": 2325000 }, { "epoch": 11.52, "learning_rate": 4.424142031182652e-05, "loss": 2.4307, "step": 2325500 }, { "epoch": 11.52, "learning_rate": 4.424018172540044e-05, "loss": 2.4094, "step": 2326000 }, { "epoch": 11.53, "learning_rate": 4.4238943138974354e-05, "loss": 2.4216, "step": 2326500 }, { "epoch": 11.53, "learning_rate": 4.423770455254827e-05, "loss": 2.4106, "step": 2327000 }, { "epoch": 11.53, "learning_rate": 4.423646596612219e-05, "loss": 2.4049, "step": 2327500 }, { "epoch": 11.53, "learning_rate": 4.4235227379696105e-05, "loss": 2.4113, "step": 2328000 }, { "epoch": 11.54, "learning_rate": 4.423398879327002e-05, "loss": 2.4132, "step": 2328500 }, { "epoch": 11.54, "learning_rate": 4.423275020684394e-05, "loss": 2.4204, "step": 2329000 }, { "epoch": 11.54, "learning_rate": 4.42315140975907e-05, "loss": 2.4299, "step": 2329500 }, { "epoch": 11.54, "learning_rate": 4.423027551116462e-05, "loss": 2.419, "step": 2330000 }, { "epoch": 11.55, "learning_rate": 4.4229036924738535e-05, "loss": 2.4446, "step": 2330500 }, { "epoch": 11.55, "learning_rate": 4.422779833831245e-05, "loss": 2.4427, "step": 2331000 }, { "epoch": 11.55, "learning_rate": 4.422655975188637e-05, "loss": 2.4376, "step": 2331500 }, { "epoch": 11.55, "learning_rate": 4.4225321165460286e-05, "loss": 2.4157, "step": 2332000 }, { "epoch": 11.56, "learning_rate": 4.42240825790342e-05, "loss": 2.4434, "step": 2332500 }, { "epoch": 11.56, "learning_rate": 4.422284399260812e-05, "loss": 2.4163, "step": 2333000 }, { "epoch": 11.56, "learning_rate": 4.4221605406182036e-05, "loss": 2.4218, "step": 2333500 }, { "epoch": 11.56, "learning_rate": 4.4220369296928805e-05, "loss": 2.4099, "step": 2334000 }, { "epoch": 11.57, "learning_rate": 4.421913071050272e-05, "loss": 2.4475, "step": 2334500 }, { "epoch": 11.57, "learning_rate": 4.421789460124949e-05, "loss": 2.4169, "step": 2335000 }, { "epoch": 11.57, "learning_rate": 4.42166560148234e-05, "loss": 2.4357, "step": 2335500 }, { "epoch": 11.57, "learning_rate": 4.421541742839732e-05, "loss": 2.4186, "step": 2336000 }, { "epoch": 11.58, "learning_rate": 4.4214178841971235e-05, "loss": 2.438, "step": 2336500 }, { "epoch": 11.58, "learning_rate": 4.421294025554515e-05, "loss": 2.4189, "step": 2337000 }, { "epoch": 11.58, "learning_rate": 4.421170414629192e-05, "loss": 2.4401, "step": 2337500 }, { "epoch": 11.58, "learning_rate": 4.421046555986584e-05, "loss": 2.4355, "step": 2338000 }, { "epoch": 11.59, "learning_rate": 4.4209226973439755e-05, "loss": 2.4278, "step": 2338500 }, { "epoch": 11.59, "learning_rate": 4.420798838701367e-05, "loss": 2.4345, "step": 2339000 }, { "epoch": 11.59, "learning_rate": 4.420674980058759e-05, "loss": 2.4363, "step": 2339500 }, { "epoch": 11.59, "learning_rate": 4.4205511214161505e-05, "loss": 2.4035, "step": 2340000 }, { "epoch": 11.6, "learning_rate": 4.4204275104908274e-05, "loss": 2.4417, "step": 2340500 }, { "epoch": 11.6, "learning_rate": 4.420303651848219e-05, "loss": 2.4219, "step": 2341000 }, { "epoch": 11.6, "learning_rate": 4.420179793205611e-05, "loss": 2.4465, "step": 2341500 }, { "epoch": 11.6, "learning_rate": 4.420056182280287e-05, "loss": 2.4282, "step": 2342000 }, { "epoch": 11.61, "learning_rate": 4.419932323637679e-05, "loss": 2.4328, "step": 2342500 }, { "epoch": 11.61, "learning_rate": 4.4198084649950704e-05, "loss": 2.4098, "step": 2343000 }, { "epoch": 11.61, "learning_rate": 4.419684606352462e-05, "loss": 2.4214, "step": 2343500 }, { "epoch": 11.61, "learning_rate": 4.419560747709854e-05, "loss": 2.442, "step": 2344000 }, { "epoch": 11.62, "learning_rate": 4.4194368890672455e-05, "loss": 2.4259, "step": 2344500 }, { "epoch": 11.62, "learning_rate": 4.419313030424637e-05, "loss": 2.4322, "step": 2345000 }, { "epoch": 11.62, "learning_rate": 4.419189171782029e-05, "loss": 2.4194, "step": 2345500 }, { "epoch": 11.62, "learning_rate": 4.4190653131394206e-05, "loss": 2.4452, "step": 2346000 }, { "epoch": 11.63, "learning_rate": 4.418941454496812e-05, "loss": 2.4208, "step": 2346500 }, { "epoch": 11.63, "learning_rate": 4.418817595854204e-05, "loss": 2.4285, "step": 2347000 }, { "epoch": 11.63, "learning_rate": 4.418693984928881e-05, "loss": 2.4065, "step": 2347500 }, { "epoch": 11.63, "learning_rate": 4.418570374003557e-05, "loss": 2.42, "step": 2348000 }, { "epoch": 11.64, "learning_rate": 4.418446515360949e-05, "loss": 2.4344, "step": 2348500 }, { "epoch": 11.64, "learning_rate": 4.4183226567183404e-05, "loss": 2.4254, "step": 2349000 }, { "epoch": 11.64, "learning_rate": 4.418198798075732e-05, "loss": 2.4336, "step": 2349500 }, { "epoch": 11.64, "learning_rate": 4.418074939433124e-05, "loss": 2.4124, "step": 2350000 }, { "epoch": 11.65, "learning_rate": 4.4179510807905155e-05, "loss": 2.4268, "step": 2350500 }, { "epoch": 11.65, "learning_rate": 4.417827222147907e-05, "loss": 2.4369, "step": 2351000 }, { "epoch": 11.65, "learning_rate": 4.417703363505299e-05, "loss": 2.4431, "step": 2351500 }, { "epoch": 11.65, "learning_rate": 4.4175795048626906e-05, "loss": 2.4248, "step": 2352000 }, { "epoch": 11.66, "learning_rate": 4.417455646220082e-05, "loss": 2.3927, "step": 2352500 }, { "epoch": 11.66, "learning_rate": 4.417331787577474e-05, "loss": 2.4374, "step": 2353000 }, { "epoch": 11.66, "learning_rate": 4.4172079289348656e-05, "loss": 2.4195, "step": 2353500 }, { "epoch": 11.66, "learning_rate": 4.417084070292257e-05, "loss": 2.4295, "step": 2354000 }, { "epoch": 11.67, "learning_rate": 4.416960459366934e-05, "loss": 2.4114, "step": 2354500 }, { "epoch": 11.67, "learning_rate": 4.416836600724326e-05, "loss": 2.4245, "step": 2355000 }, { "epoch": 11.67, "learning_rate": 4.416712742081717e-05, "loss": 2.4333, "step": 2355500 }, { "epoch": 11.67, "learning_rate": 4.416589131156394e-05, "loss": 2.4191, "step": 2356000 }, { "epoch": 11.67, "learning_rate": 4.4164652725137855e-05, "loss": 2.3961, "step": 2356500 }, { "epoch": 11.68, "learning_rate": 4.4163416615884624e-05, "loss": 2.4087, "step": 2357000 }, { "epoch": 11.68, "learning_rate": 4.416217802945854e-05, "loss": 2.4227, "step": 2357500 }, { "epoch": 11.68, "learning_rate": 4.416093944303246e-05, "loss": 2.4177, "step": 2358000 }, { "epoch": 11.68, "learning_rate": 4.4159700856606375e-05, "loss": 2.4067, "step": 2358500 }, { "epoch": 11.69, "learning_rate": 4.415846227018029e-05, "loss": 2.4215, "step": 2359000 }, { "epoch": 11.69, "learning_rate": 4.4157226160927054e-05, "loss": 2.4338, "step": 2359500 }, { "epoch": 11.69, "learning_rate": 4.415598757450097e-05, "loss": 2.4406, "step": 2360000 }, { "epoch": 11.69, "learning_rate": 4.415474898807489e-05, "loss": 2.4358, "step": 2360500 }, { "epoch": 11.7, "learning_rate": 4.4153510401648804e-05, "loss": 2.4452, "step": 2361000 }, { "epoch": 11.7, "learning_rate": 4.415227181522272e-05, "loss": 2.4173, "step": 2361500 }, { "epoch": 11.7, "learning_rate": 4.415103322879664e-05, "loss": 2.4111, "step": 2362000 }, { "epoch": 11.7, "learning_rate": 4.4149794642370555e-05, "loss": 2.4227, "step": 2362500 }, { "epoch": 11.71, "learning_rate": 4.414855605594447e-05, "loss": 2.4011, "step": 2363000 }, { "epoch": 11.71, "learning_rate": 4.414731994669124e-05, "loss": 2.4455, "step": 2363500 }, { "epoch": 11.71, "learning_rate": 4.414608136026516e-05, "loss": 2.4459, "step": 2364000 }, { "epoch": 11.71, "learning_rate": 4.4144842773839075e-05, "loss": 2.4196, "step": 2364500 }, { "epoch": 11.72, "learning_rate": 4.414360418741299e-05, "loss": 2.4346, "step": 2365000 }, { "epoch": 11.72, "learning_rate": 4.414236560098691e-05, "loss": 2.4307, "step": 2365500 }, { "epoch": 11.72, "learning_rate": 4.414113196890653e-05, "loss": 2.4386, "step": 2366000 }, { "epoch": 11.72, "learning_rate": 4.4139893382480446e-05, "loss": 2.4333, "step": 2366500 }, { "epoch": 11.73, "learning_rate": 4.413865479605436e-05, "loss": 2.4199, "step": 2367000 }, { "epoch": 11.73, "learning_rate": 4.413741620962828e-05, "loss": 2.4048, "step": 2367500 }, { "epoch": 11.73, "learning_rate": 4.41361776232022e-05, "loss": 2.4399, "step": 2368000 }, { "epoch": 11.73, "learning_rate": 4.4134939036776114e-05, "loss": 2.4399, "step": 2368500 }, { "epoch": 11.74, "learning_rate": 4.4133700450350024e-05, "loss": 2.4265, "step": 2369000 }, { "epoch": 11.74, "learning_rate": 4.413246186392394e-05, "loss": 2.4374, "step": 2369500 }, { "epoch": 11.74, "learning_rate": 4.413122327749786e-05, "loss": 2.4099, "step": 2370000 }, { "epoch": 11.74, "learning_rate": 4.4129984691071775e-05, "loss": 2.3965, "step": 2370500 }, { "epoch": 11.75, "learning_rate": 4.4128748581818544e-05, "loss": 2.4322, "step": 2371000 }, { "epoch": 11.75, "learning_rate": 4.412750999539246e-05, "loss": 2.4613, "step": 2371500 }, { "epoch": 11.75, "learning_rate": 4.412627140896638e-05, "loss": 2.4158, "step": 2372000 }, { "epoch": 11.75, "learning_rate": 4.412503282254029e-05, "loss": 2.4332, "step": 2372500 }, { "epoch": 11.76, "learning_rate": 4.4123794236114205e-05, "loss": 2.4335, "step": 2373000 }, { "epoch": 11.76, "learning_rate": 4.412255812686098e-05, "loss": 2.4264, "step": 2373500 }, { "epoch": 11.76, "learning_rate": 4.41213195404349e-05, "loss": 2.3871, "step": 2374000 }, { "epoch": 11.76, "learning_rate": 4.4120080954008814e-05, "loss": 2.4349, "step": 2374500 }, { "epoch": 11.77, "learning_rate": 4.411884236758273e-05, "loss": 2.4322, "step": 2375000 }, { "epoch": 11.77, "learning_rate": 4.411760378115664e-05, "loss": 2.4205, "step": 2375500 }, { "epoch": 11.77, "learning_rate": 4.411637014907626e-05, "loss": 2.427, "step": 2376000 }, { "epoch": 11.77, "learning_rate": 4.411513403982303e-05, "loss": 2.4348, "step": 2376500 }, { "epoch": 11.78, "learning_rate": 4.411389545339695e-05, "loss": 2.4465, "step": 2377000 }, { "epoch": 11.78, "learning_rate": 4.4112656866970864e-05, "loss": 2.4235, "step": 2377500 }, { "epoch": 11.78, "learning_rate": 4.411141828054478e-05, "loss": 2.4328, "step": 2378000 }, { "epoch": 11.78, "learning_rate": 4.41101796941187e-05, "loss": 2.3982, "step": 2378500 }, { "epoch": 11.79, "learning_rate": 4.4108941107692615e-05, "loss": 2.4139, "step": 2379000 }, { "epoch": 11.79, "learning_rate": 4.410770252126653e-05, "loss": 2.4416, "step": 2379500 }, { "epoch": 11.79, "learning_rate": 4.410646393484045e-05, "loss": 2.4149, "step": 2380000 }, { "epoch": 11.79, "learning_rate": 4.4105225348414366e-05, "loss": 2.4093, "step": 2380500 }, { "epoch": 11.8, "learning_rate": 4.410398676198828e-05, "loss": 2.4322, "step": 2381000 }, { "epoch": 11.8, "learning_rate": 4.41027481755622e-05, "loss": 2.4391, "step": 2381500 }, { "epoch": 11.8, "learning_rate": 4.410150958913612e-05, "loss": 2.4264, "step": 2382000 }, { "epoch": 11.8, "learning_rate": 4.4100271002710034e-05, "loss": 2.4302, "step": 2382500 }, { "epoch": 11.81, "learning_rate": 4.409903241628395e-05, "loss": 2.4337, "step": 2383000 }, { "epoch": 11.81, "learning_rate": 4.409779630703071e-05, "loss": 2.404, "step": 2383500 }, { "epoch": 11.81, "learning_rate": 4.409655772060463e-05, "loss": 2.4127, "step": 2384000 }, { "epoch": 11.81, "learning_rate": 4.4095319134178547e-05, "loss": 2.4261, "step": 2384500 }, { "epoch": 11.82, "learning_rate": 4.4094080547752463e-05, "loss": 2.4263, "step": 2385000 }, { "epoch": 11.82, "learning_rate": 4.409284196132638e-05, "loss": 2.4344, "step": 2385500 }, { "epoch": 11.82, "learning_rate": 4.4091608329246e-05, "loss": 2.4243, "step": 2386000 }, { "epoch": 11.82, "learning_rate": 4.409036974281992e-05, "loss": 2.4161, "step": 2386500 }, { "epoch": 11.83, "learning_rate": 4.408913115639383e-05, "loss": 2.4354, "step": 2387000 }, { "epoch": 11.83, "learning_rate": 4.4087895047140604e-05, "loss": 2.398, "step": 2387500 }, { "epoch": 11.83, "learning_rate": 4.408665646071452e-05, "loss": 2.4091, "step": 2388000 }, { "epoch": 11.83, "learning_rate": 4.408541787428843e-05, "loss": 2.4046, "step": 2388500 }, { "epoch": 11.84, "learning_rate": 4.408417928786235e-05, "loss": 2.4116, "step": 2389000 }, { "epoch": 11.84, "learning_rate": 4.4082940701436265e-05, "loss": 2.4345, "step": 2389500 }, { "epoch": 11.84, "learning_rate": 4.408170211501018e-05, "loss": 2.4339, "step": 2390000 }, { "epoch": 11.84, "learning_rate": 4.40804635285841e-05, "loss": 2.4014, "step": 2390500 }, { "epoch": 11.85, "learning_rate": 4.4079224942158015e-05, "loss": 2.4364, "step": 2391000 }, { "epoch": 11.85, "learning_rate": 4.407798635573193e-05, "loss": 2.4408, "step": 2391500 }, { "epoch": 11.85, "learning_rate": 4.407674776930585e-05, "loss": 2.4218, "step": 2392000 }, { "epoch": 11.85, "learning_rate": 4.4075509182879766e-05, "loss": 2.4203, "step": 2392500 }, { "epoch": 11.86, "learning_rate": 4.407427059645368e-05, "loss": 2.4449, "step": 2393000 }, { "epoch": 11.86, "learning_rate": 4.40730320100276e-05, "loss": 2.4359, "step": 2393500 }, { "epoch": 11.86, "learning_rate": 4.407179342360152e-05, "loss": 2.4611, "step": 2394000 }, { "epoch": 11.86, "learning_rate": 4.4070554837175434e-05, "loss": 2.4342, "step": 2394500 }, { "epoch": 11.87, "learning_rate": 4.406931625074935e-05, "loss": 2.4223, "step": 2395000 }, { "epoch": 11.87, "learning_rate": 4.406807766432327e-05, "loss": 2.4426, "step": 2395500 }, { "epoch": 11.87, "learning_rate": 4.4066839077897185e-05, "loss": 2.4305, "step": 2396000 }, { "epoch": 11.87, "learning_rate": 4.406560296864395e-05, "loss": 2.4321, "step": 2396500 }, { "epoch": 11.88, "learning_rate": 4.4064366859390716e-05, "loss": 2.4089, "step": 2397000 }, { "epoch": 11.88, "learning_rate": 4.4063133227310336e-05, "loss": 2.414, "step": 2397500 }, { "epoch": 11.88, "learning_rate": 4.406189464088425e-05, "loss": 2.4312, "step": 2398000 }, { "epoch": 11.88, "learning_rate": 4.406065605445817e-05, "loss": 2.405, "step": 2398500 }, { "epoch": 11.89, "learning_rate": 4.405941746803209e-05, "loss": 2.4159, "step": 2399000 }, { "epoch": 11.89, "learning_rate": 4.4058178881606004e-05, "loss": 2.417, "step": 2399500 }, { "epoch": 11.89, "learning_rate": 4.405694277235277e-05, "loss": 2.3933, "step": 2400000 }, { "epoch": 11.89, "learning_rate": 4.405570418592669e-05, "loss": 2.4296, "step": 2400500 }, { "epoch": 11.9, "learning_rate": 4.405446559950061e-05, "loss": 2.4389, "step": 2401000 }, { "epoch": 11.9, "learning_rate": 4.4053227013074524e-05, "loss": 2.4468, "step": 2401500 }, { "epoch": 11.9, "learning_rate": 4.405198842664844e-05, "loss": 2.4335, "step": 2402000 }, { "epoch": 11.9, "learning_rate": 4.405074984022236e-05, "loss": 2.4179, "step": 2402500 }, { "epoch": 11.91, "learning_rate": 4.4049511253796274e-05, "loss": 2.4189, "step": 2403000 }, { "epoch": 11.91, "learning_rate": 4.404827266737019e-05, "loss": 2.4311, "step": 2403500 }, { "epoch": 11.91, "learning_rate": 4.40470340809441e-05, "loss": 2.4236, "step": 2404000 }, { "epoch": 11.91, "learning_rate": 4.404579549451802e-05, "loss": 2.4249, "step": 2404500 }, { "epoch": 11.92, "learning_rate": 4.4044556908091935e-05, "loss": 2.4524, "step": 2405000 }, { "epoch": 11.92, "learning_rate": 4.404331832166585e-05, "loss": 2.4291, "step": 2405500 }, { "epoch": 11.92, "learning_rate": 4.404207973523977e-05, "loss": 2.4173, "step": 2406000 }, { "epoch": 11.92, "learning_rate": 4.4040841148813686e-05, "loss": 2.448, "step": 2406500 }, { "epoch": 11.93, "learning_rate": 4.4039602562387596e-05, "loss": 2.4346, "step": 2407000 }, { "epoch": 11.93, "learning_rate": 4.403836397596151e-05, "loss": 2.4152, "step": 2407500 }, { "epoch": 11.93, "learning_rate": 4.403712538953543e-05, "loss": 2.4002, "step": 2408000 }, { "epoch": 11.93, "learning_rate": 4.40358892802822e-05, "loss": 2.419, "step": 2408500 }, { "epoch": 11.94, "learning_rate": 4.4034653171028975e-05, "loss": 2.4215, "step": 2409000 }, { "epoch": 11.94, "learning_rate": 4.403341458460289e-05, "loss": 2.4047, "step": 2409500 }, { "epoch": 11.94, "learning_rate": 4.4032178475349653e-05, "loss": 2.4267, "step": 2410000 }, { "epoch": 11.94, "learning_rate": 4.403093988892357e-05, "loss": 2.4454, "step": 2410500 }, { "epoch": 11.94, "learning_rate": 4.402970130249749e-05, "loss": 2.3954, "step": 2411000 }, { "epoch": 11.95, "learning_rate": 4.4028462716071404e-05, "loss": 2.4412, "step": 2411500 }, { "epoch": 11.95, "learning_rate": 4.402722412964532e-05, "loss": 2.4209, "step": 2412000 }, { "epoch": 11.95, "learning_rate": 4.402598554321924e-05, "loss": 2.4415, "step": 2412500 }, { "epoch": 11.95, "learning_rate": 4.4024746956793155e-05, "loss": 2.408, "step": 2413000 }, { "epoch": 11.96, "learning_rate": 4.4023508370367065e-05, "loss": 2.4469, "step": 2413500 }, { "epoch": 11.96, "learning_rate": 4.402226978394098e-05, "loss": 2.4386, "step": 2414000 }, { "epoch": 11.96, "learning_rate": 4.402103367468776e-05, "loss": 2.4394, "step": 2414500 }, { "epoch": 11.96, "learning_rate": 4.401979756543452e-05, "loss": 2.426, "step": 2415000 }, { "epoch": 11.97, "learning_rate": 4.401855897900844e-05, "loss": 2.4471, "step": 2415500 }, { "epoch": 11.97, "learning_rate": 4.4017320392582354e-05, "loss": 2.4293, "step": 2416000 }, { "epoch": 11.97, "learning_rate": 4.401608180615627e-05, "loss": 2.4297, "step": 2416500 }, { "epoch": 11.97, "learning_rate": 4.401484321973019e-05, "loss": 2.4321, "step": 2417000 }, { "epoch": 11.98, "learning_rate": 4.4013604633304104e-05, "loss": 2.4261, "step": 2417500 }, { "epoch": 11.98, "learning_rate": 4.401236604687802e-05, "loss": 2.4338, "step": 2418000 }, { "epoch": 11.98, "learning_rate": 4.401112746045194e-05, "loss": 2.4304, "step": 2418500 }, { "epoch": 11.98, "learning_rate": 4.400989135119871e-05, "loss": 2.3984, "step": 2419000 }, { "epoch": 11.99, "learning_rate": 4.4008652764772624e-05, "loss": 2.437, "step": 2419500 }, { "epoch": 11.99, "learning_rate": 4.400741665551939e-05, "loss": 2.4412, "step": 2420000 }, { "epoch": 11.99, "learning_rate": 4.400617806909331e-05, "loss": 2.4605, "step": 2420500 }, { "epoch": 11.99, "learning_rate": 4.400493948266723e-05, "loss": 2.4154, "step": 2421000 }, { "epoch": 12.0, "learning_rate": 4.400370089624114e-05, "loss": 2.4555, "step": 2421500 }, { "epoch": 12.0, "learning_rate": 4.4002462309815054e-05, "loss": 2.4047, "step": 2422000 }, { "epoch": 12.0, "eval_accuracy": 0.6447791117813911, "eval_accuracy_mlm": 0.5983567225929853, "eval_accuracy_nsp": 0.8637231868653391, "eval_loss": 2.398054361343384, "eval_runtime": 146.0821, "eval_samples_per_second": 1745.314, "eval_steps_per_second": 72.726, "step": 2422116 }, { "epoch": 12.0, "learning_rate": 4.400122372338897e-05, "loss": 2.3898, "step": 2422500 }, { "epoch": 12.0, "learning_rate": 4.399998513696289e-05, "loss": 2.4349, "step": 2423000 }, { "epoch": 12.01, "learning_rate": 4.3998746550536804e-05, "loss": 2.3875, "step": 2423500 }, { "epoch": 12.01, "learning_rate": 4.399750796411072e-05, "loss": 2.3812, "step": 2424000 }, { "epoch": 12.01, "learning_rate": 4.399626937768464e-05, "loss": 2.3915, "step": 2424500 }, { "epoch": 12.01, "learning_rate": 4.3995030791258555e-05, "loss": 2.4071, "step": 2425000 }, { "epoch": 12.02, "learning_rate": 4.399379220483247e-05, "loss": 2.3903, "step": 2425500 }, { "epoch": 12.02, "learning_rate": 4.399255609557924e-05, "loss": 2.3863, "step": 2426000 }, { "epoch": 12.02, "learning_rate": 4.399131998632601e-05, "loss": 2.3889, "step": 2426500 }, { "epoch": 12.02, "learning_rate": 4.399008139989993e-05, "loss": 2.3892, "step": 2427000 }, { "epoch": 12.03, "learning_rate": 4.3988842813473844e-05, "loss": 2.4049, "step": 2427500 }, { "epoch": 12.03, "learning_rate": 4.398760422704776e-05, "loss": 2.3953, "step": 2428000 }, { "epoch": 12.03, "learning_rate": 4.398636564062167e-05, "loss": 2.3994, "step": 2428500 }, { "epoch": 12.03, "learning_rate": 4.398512705419559e-05, "loss": 2.3911, "step": 2429000 }, { "epoch": 12.04, "learning_rate": 4.3983888467769505e-05, "loss": 2.4036, "step": 2429500 }, { "epoch": 12.04, "learning_rate": 4.3982652358516273e-05, "loss": 2.3935, "step": 2430000 }, { "epoch": 12.04, "learning_rate": 4.398141624926304e-05, "loss": 2.4119, "step": 2430500 }, { "epoch": 12.04, "learning_rate": 4.398018014000981e-05, "loss": 2.4128, "step": 2431000 }, { "epoch": 12.05, "learning_rate": 4.397894155358373e-05, "loss": 2.3834, "step": 2431500 }, { "epoch": 12.05, "learning_rate": 4.3977702967157645e-05, "loss": 2.402, "step": 2432000 }, { "epoch": 12.05, "learning_rate": 4.397646438073156e-05, "loss": 2.3786, "step": 2432500 }, { "epoch": 12.05, "learning_rate": 4.397522579430547e-05, "loss": 2.3985, "step": 2433000 }, { "epoch": 12.06, "learning_rate": 4.397398720787939e-05, "loss": 2.3975, "step": 2433500 }, { "epoch": 12.06, "learning_rate": 4.3972748621453306e-05, "loss": 2.395, "step": 2434000 }, { "epoch": 12.06, "learning_rate": 4.397151251220008e-05, "loss": 2.3914, "step": 2434500 }, { "epoch": 12.06, "learning_rate": 4.397027640294685e-05, "loss": 2.4154, "step": 2435000 }, { "epoch": 12.07, "learning_rate": 4.396903781652077e-05, "loss": 2.4228, "step": 2435500 }, { "epoch": 12.07, "learning_rate": 4.396779923009468e-05, "loss": 2.4234, "step": 2436000 }, { "epoch": 12.07, "learning_rate": 4.3966560643668594e-05, "loss": 2.4103, "step": 2436500 }, { "epoch": 12.07, "learning_rate": 4.396532205724251e-05, "loss": 2.4163, "step": 2437000 }, { "epoch": 12.08, "learning_rate": 4.396408347081643e-05, "loss": 2.408, "step": 2437500 }, { "epoch": 12.08, "learning_rate": 4.3962844884390345e-05, "loss": 2.3927, "step": 2438000 }, { "epoch": 12.08, "learning_rate": 4.396160629796426e-05, "loss": 2.3937, "step": 2438500 }, { "epoch": 12.08, "learning_rate": 4.396036771153817e-05, "loss": 2.3874, "step": 2439000 }, { "epoch": 12.09, "learning_rate": 4.395912912511209e-05, "loss": 2.4065, "step": 2439500 }, { "epoch": 12.09, "learning_rate": 4.3957890538686006e-05, "loss": 2.3979, "step": 2440000 }, { "epoch": 12.09, "learning_rate": 4.395665442943278e-05, "loss": 2.3901, "step": 2440500 }, { "epoch": 12.09, "learning_rate": 4.39554158430067e-05, "loss": 2.4009, "step": 2441000 }, { "epoch": 12.1, "learning_rate": 4.3954177256580615e-05, "loss": 2.396, "step": 2441500 }, { "epoch": 12.1, "learning_rate": 4.3952941147327384e-05, "loss": 2.4246, "step": 2442000 }, { "epoch": 12.1, "learning_rate": 4.39517025609013e-05, "loss": 2.4031, "step": 2442500 }, { "epoch": 12.1, "learning_rate": 4.395046397447521e-05, "loss": 2.4032, "step": 2443000 }, { "epoch": 12.11, "learning_rate": 4.394922538804913e-05, "loss": 2.3833, "step": 2443500 }, { "epoch": 12.11, "learning_rate": 4.3947986801623045e-05, "loss": 2.3917, "step": 2444000 }, { "epoch": 12.11, "learning_rate": 4.394674821519696e-05, "loss": 2.4239, "step": 2444500 }, { "epoch": 12.11, "learning_rate": 4.394550962877088e-05, "loss": 2.4095, "step": 2445000 }, { "epoch": 12.12, "learning_rate": 4.394427104234479e-05, "loss": 2.4093, "step": 2445500 }, { "epoch": 12.12, "learning_rate": 4.3943032455918706e-05, "loss": 2.4138, "step": 2446000 }, { "epoch": 12.12, "learning_rate": 4.394179386949262e-05, "loss": 2.3883, "step": 2446500 }, { "epoch": 12.12, "learning_rate": 4.394055528306654e-05, "loss": 2.3971, "step": 2447000 }, { "epoch": 12.13, "learning_rate": 4.393931669664046e-05, "loss": 2.4231, "step": 2447500 }, { "epoch": 12.13, "learning_rate": 4.3938078110214374e-05, "loss": 2.4131, "step": 2448000 }, { "epoch": 12.13, "learning_rate": 4.393683952378829e-05, "loss": 2.4189, "step": 2448500 }, { "epoch": 12.13, "learning_rate": 4.393560093736221e-05, "loss": 2.3864, "step": 2449000 }, { "epoch": 12.14, "learning_rate": 4.3934362350936125e-05, "loss": 2.4134, "step": 2449500 }, { "epoch": 12.14, "learning_rate": 4.393312376451004e-05, "loss": 2.3885, "step": 2450000 }, { "epoch": 12.14, "learning_rate": 4.393188517808396e-05, "loss": 2.4138, "step": 2450500 }, { "epoch": 12.14, "learning_rate": 4.393064906883073e-05, "loss": 2.42, "step": 2451000 }, { "epoch": 12.15, "learning_rate": 4.3929410482404644e-05, "loss": 2.4087, "step": 2451500 }, { "epoch": 12.15, "learning_rate": 4.392817189597856e-05, "loss": 2.4067, "step": 2452000 }, { "epoch": 12.15, "learning_rate": 4.392693330955248e-05, "loss": 2.4229, "step": 2452500 }, { "epoch": 12.15, "learning_rate": 4.3925694723126395e-05, "loss": 2.3739, "step": 2453000 }, { "epoch": 12.16, "learning_rate": 4.392445613670031e-05, "loss": 2.3939, "step": 2453500 }, { "epoch": 12.16, "learning_rate": 4.3923220027447074e-05, "loss": 2.4108, "step": 2454000 }, { "epoch": 12.16, "learning_rate": 4.392198391819384e-05, "loss": 2.3995, "step": 2454500 }, { "epoch": 12.16, "learning_rate": 4.392074533176776e-05, "loss": 2.396, "step": 2455000 }, { "epoch": 12.17, "learning_rate": 4.3919506745341677e-05, "loss": 2.4091, "step": 2455500 }, { "epoch": 12.17, "learning_rate": 4.3918268158915594e-05, "loss": 2.4122, "step": 2456000 }, { "epoch": 12.17, "learning_rate": 4.391702957248951e-05, "loss": 2.3943, "step": 2456500 }, { "epoch": 12.17, "learning_rate": 4.391579098606343e-05, "loss": 2.4066, "step": 2457000 }, { "epoch": 12.18, "learning_rate": 4.3914552399637344e-05, "loss": 2.4244, "step": 2457500 }, { "epoch": 12.18, "learning_rate": 4.391331381321126e-05, "loss": 2.386, "step": 2458000 }, { "epoch": 12.18, "learning_rate": 4.391207522678518e-05, "loss": 2.4081, "step": 2458500 }, { "epoch": 12.18, "learning_rate": 4.3910836640359095e-05, "loss": 2.376, "step": 2459000 }, { "epoch": 12.19, "learning_rate": 4.390959805393301e-05, "loss": 2.4178, "step": 2459500 }, { "epoch": 12.19, "learning_rate": 4.3908361944679774e-05, "loss": 2.4064, "step": 2460000 }, { "epoch": 12.19, "learning_rate": 4.390712335825369e-05, "loss": 2.4019, "step": 2460500 }, { "epoch": 12.19, "learning_rate": 4.390588477182761e-05, "loss": 2.3993, "step": 2461000 }, { "epoch": 12.2, "learning_rate": 4.3904646185401525e-05, "loss": 2.3876, "step": 2461500 }, { "epoch": 12.2, "learning_rate": 4.390340759897544e-05, "loss": 2.388, "step": 2462000 }, { "epoch": 12.2, "learning_rate": 4.390216901254936e-05, "loss": 2.4285, "step": 2462500 }, { "epoch": 12.2, "learning_rate": 4.390093290329613e-05, "loss": 2.4094, "step": 2463000 }, { "epoch": 12.21, "learning_rate": 4.3899696794042896e-05, "loss": 2.3865, "step": 2463500 }, { "epoch": 12.21, "learning_rate": 4.389845820761681e-05, "loss": 2.4204, "step": 2464000 }, { "epoch": 12.21, "learning_rate": 4.389721962119073e-05, "loss": 2.4236, "step": 2464500 }, { "epoch": 12.21, "learning_rate": 4.389598103476465e-05, "loss": 2.3961, "step": 2465000 }, { "epoch": 12.21, "learning_rate": 4.3894744925511416e-05, "loss": 2.397, "step": 2465500 }, { "epoch": 12.22, "learning_rate": 4.389350633908533e-05, "loss": 2.3895, "step": 2466000 }, { "epoch": 12.22, "learning_rate": 4.389226775265925e-05, "loss": 2.4102, "step": 2466500 }, { "epoch": 12.22, "learning_rate": 4.389102916623316e-05, "loss": 2.405, "step": 2467000 }, { "epoch": 12.22, "learning_rate": 4.388979057980708e-05, "loss": 2.4291, "step": 2467500 }, { "epoch": 12.23, "learning_rate": 4.3888551993380994e-05, "loss": 2.4213, "step": 2468000 }, { "epoch": 12.23, "learning_rate": 4.388731340695491e-05, "loss": 2.4009, "step": 2468500 }, { "epoch": 12.23, "learning_rate": 4.388607482052883e-05, "loss": 2.4047, "step": 2469000 }, { "epoch": 12.23, "learning_rate": 4.3884836234102745e-05, "loss": 2.385, "step": 2469500 }, { "epoch": 12.24, "learning_rate": 4.388360012484951e-05, "loss": 2.4111, "step": 2470000 }, { "epoch": 12.24, "learning_rate": 4.388236153842343e-05, "loss": 2.4104, "step": 2470500 }, { "epoch": 12.24, "learning_rate": 4.388112295199735e-05, "loss": 2.4294, "step": 2471000 }, { "epoch": 12.24, "learning_rate": 4.3879884365571264e-05, "loss": 2.4056, "step": 2471500 }, { "epoch": 12.25, "learning_rate": 4.387864577914518e-05, "loss": 2.396, "step": 2472000 }, { "epoch": 12.25, "learning_rate": 4.387740719271909e-05, "loss": 2.3991, "step": 2472500 }, { "epoch": 12.25, "learning_rate": 4.387617108346587e-05, "loss": 2.3729, "step": 2473000 }, { "epoch": 12.25, "learning_rate": 4.387493249703978e-05, "loss": 2.4009, "step": 2473500 }, { "epoch": 12.26, "learning_rate": 4.3873693910613694e-05, "loss": 2.4272, "step": 2474000 }, { "epoch": 12.26, "learning_rate": 4.387245532418761e-05, "loss": 2.4015, "step": 2474500 }, { "epoch": 12.26, "learning_rate": 4.387121673776153e-05, "loss": 2.4204, "step": 2475000 }, { "epoch": 12.26, "learning_rate": 4.3869978151335445e-05, "loss": 2.4145, "step": 2475500 }, { "epoch": 12.27, "learning_rate": 4.386873956490936e-05, "loss": 2.4175, "step": 2476000 }, { "epoch": 12.27, "learning_rate": 4.386750097848328e-05, "loss": 2.3936, "step": 2476500 }, { "epoch": 12.27, "learning_rate": 4.3866262392057195e-05, "loss": 2.382, "step": 2477000 }, { "epoch": 12.27, "learning_rate": 4.3865026282803964e-05, "loss": 2.4075, "step": 2477500 }, { "epoch": 12.28, "learning_rate": 4.386378769637788e-05, "loss": 2.3898, "step": 2478000 }, { "epoch": 12.28, "learning_rate": 4.38625491099518e-05, "loss": 2.4162, "step": 2478500 }, { "epoch": 12.28, "learning_rate": 4.386131052352571e-05, "loss": 2.4163, "step": 2479000 }, { "epoch": 12.28, "learning_rate": 4.3860071937099625e-05, "loss": 2.4165, "step": 2479500 }, { "epoch": 12.29, "learning_rate": 4.385883335067354e-05, "loss": 2.4039, "step": 2480000 }, { "epoch": 12.29, "learning_rate": 4.385759476424746e-05, "loss": 2.4346, "step": 2480500 }, { "epoch": 12.29, "learning_rate": 4.385635865499423e-05, "loss": 2.427, "step": 2481000 }, { "epoch": 12.29, "learning_rate": 4.3855120068568145e-05, "loss": 2.3986, "step": 2481500 }, { "epoch": 12.3, "learning_rate": 4.385388148214206e-05, "loss": 2.4292, "step": 2482000 }, { "epoch": 12.3, "learning_rate": 4.385264537288884e-05, "loss": 2.4181, "step": 2482500 }, { "epoch": 12.3, "learning_rate": 4.385140678646275e-05, "loss": 2.4078, "step": 2483000 }, { "epoch": 12.3, "learning_rate": 4.3850168200036664e-05, "loss": 2.426, "step": 2483500 }, { "epoch": 12.31, "learning_rate": 4.384892961361058e-05, "loss": 2.4333, "step": 2484000 }, { "epoch": 12.31, "learning_rate": 4.38476910271845e-05, "loss": 2.4103, "step": 2484500 }, { "epoch": 12.31, "learning_rate": 4.3846452440758415e-05, "loss": 2.418, "step": 2485000 }, { "epoch": 12.31, "learning_rate": 4.384521385433233e-05, "loss": 2.399, "step": 2485500 }, { "epoch": 12.32, "learning_rate": 4.384397526790624e-05, "loss": 2.408, "step": 2486000 }, { "epoch": 12.32, "learning_rate": 4.384273668148016e-05, "loss": 2.3786, "step": 2486500 }, { "epoch": 12.32, "learning_rate": 4.3841498095054076e-05, "loss": 2.434, "step": 2487000 }, { "epoch": 12.32, "learning_rate": 4.384025950862799e-05, "loss": 2.4179, "step": 2487500 }, { "epoch": 12.33, "learning_rate": 4.383902339937476e-05, "loss": 2.4173, "step": 2488000 }, { "epoch": 12.33, "learning_rate": 4.383778729012154e-05, "loss": 2.4215, "step": 2488500 }, { "epoch": 12.33, "learning_rate": 4.383655365804115e-05, "loss": 2.4273, "step": 2489000 }, { "epoch": 12.33, "learning_rate": 4.383531507161507e-05, "loss": 2.4262, "step": 2489500 }, { "epoch": 12.34, "learning_rate": 4.3834076485188985e-05, "loss": 2.4236, "step": 2490000 }, { "epoch": 12.34, "learning_rate": 4.38328378987629e-05, "loss": 2.3886, "step": 2490500 }, { "epoch": 12.34, "learning_rate": 4.383159931233682e-05, "loss": 2.4172, "step": 2491000 }, { "epoch": 12.34, "learning_rate": 4.3830360725910736e-05, "loss": 2.4084, "step": 2491500 }, { "epoch": 12.35, "learning_rate": 4.382912213948465e-05, "loss": 2.4236, "step": 2492000 }, { "epoch": 12.35, "learning_rate": 4.382788355305857e-05, "loss": 2.4233, "step": 2492500 }, { "epoch": 12.35, "learning_rate": 4.382664496663249e-05, "loss": 2.4178, "step": 2493000 }, { "epoch": 12.35, "learning_rate": 4.3825406380206404e-05, "loss": 2.3815, "step": 2493500 }, { "epoch": 12.36, "learning_rate": 4.382416779378032e-05, "loss": 2.4237, "step": 2494000 }, { "epoch": 12.36, "learning_rate": 4.382292920735424e-05, "loss": 2.397, "step": 2494500 }, { "epoch": 12.36, "learning_rate": 4.3821690620928154e-05, "loss": 2.4008, "step": 2495000 }, { "epoch": 12.36, "learning_rate": 4.3820452034502065e-05, "loss": 2.4192, "step": 2495500 }, { "epoch": 12.37, "learning_rate": 4.3819215925248833e-05, "loss": 2.3788, "step": 2496000 }, { "epoch": 12.37, "learning_rate": 4.381797733882275e-05, "loss": 2.3979, "step": 2496500 }, { "epoch": 12.37, "learning_rate": 4.381673875239667e-05, "loss": 2.3989, "step": 2497000 }, { "epoch": 12.37, "learning_rate": 4.3815500165970584e-05, "loss": 2.4017, "step": 2497500 }, { "epoch": 12.38, "learning_rate": 4.38142615795445e-05, "loss": 2.4174, "step": 2498000 }, { "epoch": 12.38, "learning_rate": 4.381302299311841e-05, "loss": 2.4145, "step": 2498500 }, { "epoch": 12.38, "learning_rate": 4.381178440669233e-05, "loss": 2.4302, "step": 2499000 }, { "epoch": 12.38, "learning_rate": 4.3810545820266245e-05, "loss": 2.4266, "step": 2499500 }, { "epoch": 12.39, "learning_rate": 4.380930971101302e-05, "loss": 2.3984, "step": 2500000 }, { "epoch": 12.39, "learning_rate": 4.380807360175978e-05, "loss": 2.4286, "step": 2500500 }, { "epoch": 12.39, "learning_rate": 4.38068350153337e-05, "loss": 2.3976, "step": 2501000 }, { "epoch": 12.39, "learning_rate": 4.380559890608047e-05, "loss": 2.4343, "step": 2501500 }, { "epoch": 12.4, "learning_rate": 4.3804360319654385e-05, "loss": 2.4084, "step": 2502000 }, { "epoch": 12.4, "learning_rate": 4.38031217332283e-05, "loss": 2.4167, "step": 2502500 }, { "epoch": 12.4, "learning_rate": 4.380188314680222e-05, "loss": 2.4184, "step": 2503000 }, { "epoch": 12.4, "learning_rate": 4.3800644560376136e-05, "loss": 2.424, "step": 2503500 }, { "epoch": 12.41, "learning_rate": 4.3799408451122905e-05, "loss": 2.4085, "step": 2504000 }, { "epoch": 12.41, "learning_rate": 4.379816986469682e-05, "loss": 2.4307, "step": 2504500 }, { "epoch": 12.41, "learning_rate": 4.379693127827074e-05, "loss": 2.4117, "step": 2505000 }, { "epoch": 12.41, "learning_rate": 4.3795692691844656e-05, "loss": 2.3954, "step": 2505500 }, { "epoch": 12.42, "learning_rate": 4.379445658259142e-05, "loss": 2.4061, "step": 2506000 }, { "epoch": 12.42, "learning_rate": 4.3793217996165335e-05, "loss": 2.4096, "step": 2506500 }, { "epoch": 12.42, "learning_rate": 4.379197940973925e-05, "loss": 2.3931, "step": 2507000 }, { "epoch": 12.42, "learning_rate": 4.379074082331317e-05, "loss": 2.4206, "step": 2507500 }, { "epoch": 12.43, "learning_rate": 4.3789502236887086e-05, "loss": 2.4262, "step": 2508000 }, { "epoch": 12.43, "learning_rate": 4.3788263650461e-05, "loss": 2.3758, "step": 2508500 }, { "epoch": 12.43, "learning_rate": 4.378702506403492e-05, "loss": 2.3879, "step": 2509000 }, { "epoch": 12.43, "learning_rate": 4.3785786477608836e-05, "loss": 2.4278, "step": 2509500 }, { "epoch": 12.44, "learning_rate": 4.3784550368355605e-05, "loss": 2.4015, "step": 2510000 }, { "epoch": 12.44, "learning_rate": 4.378331178192952e-05, "loss": 2.4261, "step": 2510500 }, { "epoch": 12.44, "learning_rate": 4.378207319550344e-05, "loss": 2.4078, "step": 2511000 }, { "epoch": 12.44, "learning_rate": 4.3780834609077356e-05, "loss": 2.3968, "step": 2511500 }, { "epoch": 12.45, "learning_rate": 4.377959602265127e-05, "loss": 2.3812, "step": 2512000 }, { "epoch": 12.45, "learning_rate": 4.377835743622519e-05, "loss": 2.3986, "step": 2512500 }, { "epoch": 12.45, "learning_rate": 4.377711884979911e-05, "loss": 2.4222, "step": 2513000 }, { "epoch": 12.45, "learning_rate": 4.377588026337302e-05, "loss": 2.4022, "step": 2513500 }, { "epoch": 12.46, "learning_rate": 4.3774641676946934e-05, "loss": 2.4151, "step": 2514000 }, { "epoch": 12.46, "learning_rate": 4.377340309052085e-05, "loss": 2.4226, "step": 2514500 }, { "epoch": 12.46, "learning_rate": 4.377216450409477e-05, "loss": 2.3848, "step": 2515000 }, { "epoch": 12.46, "learning_rate": 4.3770925917668685e-05, "loss": 2.4285, "step": 2515500 }, { "epoch": 12.47, "learning_rate": 4.37696873312426e-05, "loss": 2.4243, "step": 2516000 }, { "epoch": 12.47, "learning_rate": 4.376845122198937e-05, "loss": 2.4008, "step": 2516500 }, { "epoch": 12.47, "learning_rate": 4.376721263556329e-05, "loss": 2.4064, "step": 2517000 }, { "epoch": 12.47, "learning_rate": 4.3765974049137204e-05, "loss": 2.4022, "step": 2517500 }, { "epoch": 12.48, "learning_rate": 4.376473793988397e-05, "loss": 2.3923, "step": 2518000 }, { "epoch": 12.48, "learning_rate": 4.376349935345789e-05, "loss": 2.4132, "step": 2518500 }, { "epoch": 12.48, "learning_rate": 4.376226076703181e-05, "loss": 2.4178, "step": 2519000 }, { "epoch": 12.48, "learning_rate": 4.3761022180605724e-05, "loss": 2.4013, "step": 2519500 }, { "epoch": 12.48, "learning_rate": 4.375978359417964e-05, "loss": 2.4077, "step": 2520000 }, { "epoch": 12.49, "learning_rate": 4.375854500775355e-05, "loss": 2.4075, "step": 2520500 }, { "epoch": 12.49, "learning_rate": 4.375730642132747e-05, "loss": 2.4111, "step": 2521000 }, { "epoch": 12.49, "learning_rate": 4.375607031207424e-05, "loss": 2.3895, "step": 2521500 }, { "epoch": 12.49, "learning_rate": 4.3754831725648154e-05, "loss": 2.3763, "step": 2522000 }, { "epoch": 12.5, "learning_rate": 4.375359313922207e-05, "loss": 2.4228, "step": 2522500 }, { "epoch": 12.5, "learning_rate": 4.375235455279599e-05, "loss": 2.3938, "step": 2523000 }, { "epoch": 12.5, "learning_rate": 4.3751115966369904e-05, "loss": 2.404, "step": 2523500 }, { "epoch": 12.5, "learning_rate": 4.374987985711667e-05, "loss": 2.4042, "step": 2524000 }, { "epoch": 12.51, "learning_rate": 4.374864127069059e-05, "loss": 2.4155, "step": 2524500 }, { "epoch": 12.51, "learning_rate": 4.374740268426451e-05, "loss": 2.4128, "step": 2525000 }, { "epoch": 12.51, "learning_rate": 4.3746164097838424e-05, "loss": 2.3995, "step": 2525500 }, { "epoch": 12.51, "learning_rate": 4.374492551141234e-05, "loss": 2.3965, "step": 2526000 }, { "epoch": 12.52, "learning_rate": 4.374368692498626e-05, "loss": 2.426, "step": 2526500 }, { "epoch": 12.52, "learning_rate": 4.3742448338560175e-05, "loss": 2.4275, "step": 2527000 }, { "epoch": 12.52, "learning_rate": 4.3741209752134085e-05, "loss": 2.407, "step": 2527500 }, { "epoch": 12.52, "learning_rate": 4.3739973642880854e-05, "loss": 2.376, "step": 2528000 }, { "epoch": 12.53, "learning_rate": 4.373873505645477e-05, "loss": 2.4029, "step": 2528500 }, { "epoch": 12.53, "learning_rate": 4.373749647002869e-05, "loss": 2.4195, "step": 2529000 }, { "epoch": 12.53, "learning_rate": 4.3736257883602604e-05, "loss": 2.3921, "step": 2529500 }, { "epoch": 12.53, "learning_rate": 4.373501929717652e-05, "loss": 2.4134, "step": 2530000 }, { "epoch": 12.54, "learning_rate": 4.3733785665096135e-05, "loss": 2.4015, "step": 2530500 }, { "epoch": 12.54, "learning_rate": 4.373254707867005e-05, "loss": 2.4044, "step": 2531000 }, { "epoch": 12.54, "learning_rate": 4.373130849224397e-05, "loss": 2.4226, "step": 2531500 }, { "epoch": 12.54, "learning_rate": 4.3730069905817886e-05, "loss": 2.4238, "step": 2532000 }, { "epoch": 12.55, "learning_rate": 4.37288313193918e-05, "loss": 2.3989, "step": 2532500 }, { "epoch": 12.55, "learning_rate": 4.372759273296572e-05, "loss": 2.4198, "step": 2533000 }, { "epoch": 12.55, "learning_rate": 4.372635414653964e-05, "loss": 2.3914, "step": 2533500 }, { "epoch": 12.55, "learning_rate": 4.3725115560113554e-05, "loss": 2.4127, "step": 2534000 }, { "epoch": 12.56, "learning_rate": 4.372387697368747e-05, "loss": 2.4067, "step": 2534500 }, { "epoch": 12.56, "learning_rate": 4.372264086443424e-05, "loss": 2.4096, "step": 2535000 }, { "epoch": 12.56, "learning_rate": 4.3721402278008156e-05, "loss": 2.392, "step": 2535500 }, { "epoch": 12.56, "learning_rate": 4.372016369158207e-05, "loss": 2.4025, "step": 2536000 }, { "epoch": 12.57, "learning_rate": 4.371892510515599e-05, "loss": 2.3878, "step": 2536500 }, { "epoch": 12.57, "learning_rate": 4.371768651872991e-05, "loss": 2.428, "step": 2537000 }, { "epoch": 12.57, "learning_rate": 4.3716447932303824e-05, "loss": 2.3988, "step": 2537500 }, { "epoch": 12.57, "learning_rate": 4.371520934587774e-05, "loss": 2.4164, "step": 2538000 }, { "epoch": 12.58, "learning_rate": 4.37139732366245e-05, "loss": 2.4146, "step": 2538500 }, { "epoch": 12.58, "learning_rate": 4.371273465019842e-05, "loss": 2.4205, "step": 2539000 }, { "epoch": 12.58, "learning_rate": 4.3711498540945196e-05, "loss": 2.4266, "step": 2539500 }, { "epoch": 12.58, "learning_rate": 4.3710259954519106e-05, "loss": 2.405, "step": 2540000 }, { "epoch": 12.59, "learning_rate": 4.370902384526588e-05, "loss": 2.4024, "step": 2540500 }, { "epoch": 12.59, "learning_rate": 4.37077852588398e-05, "loss": 2.4214, "step": 2541000 }, { "epoch": 12.59, "learning_rate": 4.370654667241371e-05, "loss": 2.4359, "step": 2541500 }, { "epoch": 12.59, "learning_rate": 4.3705308085987625e-05, "loss": 2.4429, "step": 2542000 }, { "epoch": 12.6, "learning_rate": 4.370406949956154e-05, "loss": 2.4162, "step": 2542500 }, { "epoch": 12.6, "learning_rate": 4.370283339030831e-05, "loss": 2.4065, "step": 2543000 }, { "epoch": 12.6, "learning_rate": 4.370159480388223e-05, "loss": 2.4123, "step": 2543500 }, { "epoch": 12.6, "learning_rate": 4.3700356217456145e-05, "loss": 2.3914, "step": 2544000 }, { "epoch": 12.61, "learning_rate": 4.369911763103006e-05, "loss": 2.4199, "step": 2544500 }, { "epoch": 12.61, "learning_rate": 4.369787904460398e-05, "loss": 2.4165, "step": 2545000 }, { "epoch": 12.61, "learning_rate": 4.3696640458177896e-05, "loss": 2.4082, "step": 2545500 }, { "epoch": 12.61, "learning_rate": 4.3695401871751806e-05, "loss": 2.4234, "step": 2546000 }, { "epoch": 12.62, "learning_rate": 4.369416576249858e-05, "loss": 2.4354, "step": 2546500 }, { "epoch": 12.62, "learning_rate": 4.36929271760725e-05, "loss": 2.434, "step": 2547000 }, { "epoch": 12.62, "learning_rate": 4.3691688589646415e-05, "loss": 2.3977, "step": 2547500 }, { "epoch": 12.62, "learning_rate": 4.369045000322033e-05, "loss": 2.3974, "step": 2548000 }, { "epoch": 12.63, "learning_rate": 4.368921141679424e-05, "loss": 2.4089, "step": 2548500 }, { "epoch": 12.63, "learning_rate": 4.368797283036816e-05, "loss": 2.4315, "step": 2549000 }, { "epoch": 12.63, "learning_rate": 4.3686734243942076e-05, "loss": 2.4032, "step": 2549500 }, { "epoch": 12.63, "learning_rate": 4.368549565751599e-05, "loss": 2.4246, "step": 2550000 }, { "epoch": 12.64, "learning_rate": 4.368425707108991e-05, "loss": 2.3949, "step": 2550500 }, { "epoch": 12.64, "learning_rate": 4.368302096183668e-05, "loss": 2.4226, "step": 2551000 }, { "epoch": 12.64, "learning_rate": 4.3681782375410596e-05, "loss": 2.4192, "step": 2551500 }, { "epoch": 12.64, "learning_rate": 4.368054378898451e-05, "loss": 2.4099, "step": 2552000 }, { "epoch": 12.65, "learning_rate": 4.367930520255842e-05, "loss": 2.4061, "step": 2552500 }, { "epoch": 12.65, "learning_rate": 4.367806661613234e-05, "loss": 2.4307, "step": 2553000 }, { "epoch": 12.65, "learning_rate": 4.367683298405196e-05, "loss": 2.3988, "step": 2553500 }, { "epoch": 12.65, "learning_rate": 4.367559439762588e-05, "loss": 2.3905, "step": 2554000 }, { "epoch": 12.66, "learning_rate": 4.3674355811199794e-05, "loss": 2.4154, "step": 2554500 }, { "epoch": 12.66, "learning_rate": 4.367311722477371e-05, "loss": 2.4097, "step": 2555000 }, { "epoch": 12.66, "learning_rate": 4.367187863834763e-05, "loss": 2.4109, "step": 2555500 }, { "epoch": 12.66, "learning_rate": 4.3670640051921545e-05, "loss": 2.3961, "step": 2556000 }, { "epoch": 12.67, "learning_rate": 4.366940146549546e-05, "loss": 2.4039, "step": 2556500 }, { "epoch": 12.67, "learning_rate": 4.366816287906938e-05, "loss": 2.3979, "step": 2557000 }, { "epoch": 12.67, "learning_rate": 4.366692676981615e-05, "loss": 2.4154, "step": 2557500 }, { "epoch": 12.67, "learning_rate": 4.3665688183390065e-05, "loss": 2.4105, "step": 2558000 }, { "epoch": 12.68, "learning_rate": 4.366444959696398e-05, "loss": 2.4075, "step": 2558500 }, { "epoch": 12.68, "learning_rate": 4.36632110105379e-05, "loss": 2.4176, "step": 2559000 }, { "epoch": 12.68, "learning_rate": 4.3661972424111816e-05, "loss": 2.4234, "step": 2559500 }, { "epoch": 12.68, "learning_rate": 4.366073631485858e-05, "loss": 2.414, "step": 2560000 }, { "epoch": 12.69, "learning_rate": 4.3659497728432495e-05, "loss": 2.4034, "step": 2560500 }, { "epoch": 12.69, "learning_rate": 4.365825914200641e-05, "loss": 2.4229, "step": 2561000 }, { "epoch": 12.69, "learning_rate": 4.365702055558033e-05, "loss": 2.3905, "step": 2561500 }, { "epoch": 12.69, "learning_rate": 4.3655781969154245e-05, "loss": 2.4277, "step": 2562000 }, { "epoch": 12.7, "learning_rate": 4.365454338272816e-05, "loss": 2.4012, "step": 2562500 }, { "epoch": 12.7, "learning_rate": 4.365330479630208e-05, "loss": 2.4179, "step": 2563000 }, { "epoch": 12.7, "learning_rate": 4.365206868704885e-05, "loss": 2.4339, "step": 2563500 }, { "epoch": 12.7, "learning_rate": 4.3650830100622765e-05, "loss": 2.4012, "step": 2564000 }, { "epoch": 12.71, "learning_rate": 4.364959151419668e-05, "loss": 2.4154, "step": 2564500 }, { "epoch": 12.71, "learning_rate": 4.36483529277706e-05, "loss": 2.3908, "step": 2565000 }, { "epoch": 12.71, "learning_rate": 4.3647114341344516e-05, "loss": 2.4239, "step": 2565500 }, { "epoch": 12.71, "learning_rate": 4.364587575491843e-05, "loss": 2.403, "step": 2566000 }, { "epoch": 12.72, "learning_rate": 4.364463716849235e-05, "loss": 2.3735, "step": 2566500 }, { "epoch": 12.72, "learning_rate": 4.3643398582066267e-05, "loss": 2.4058, "step": 2567000 }, { "epoch": 12.72, "learning_rate": 4.364216247281303e-05, "loss": 2.4092, "step": 2567500 }, { "epoch": 12.72, "learning_rate": 4.36409263635598e-05, "loss": 2.4055, "step": 2568000 }, { "epoch": 12.73, "learning_rate": 4.3639687777133714e-05, "loss": 2.4084, "step": 2568500 }, { "epoch": 12.73, "learning_rate": 4.363844919070763e-05, "loss": 2.4078, "step": 2569000 }, { "epoch": 12.73, "learning_rate": 4.363721060428155e-05, "loss": 2.3979, "step": 2569500 }, { "epoch": 12.73, "learning_rate": 4.363597449502832e-05, "loss": 2.4249, "step": 2570000 }, { "epoch": 12.74, "learning_rate": 4.3634735908602234e-05, "loss": 2.4067, "step": 2570500 }, { "epoch": 12.74, "learning_rate": 4.3633499799349e-05, "loss": 2.4111, "step": 2571000 }, { "epoch": 12.74, "learning_rate": 4.363226121292291e-05, "loss": 2.3941, "step": 2571500 }, { "epoch": 12.74, "learning_rate": 4.363102262649683e-05, "loss": 2.4572, "step": 2572000 }, { "epoch": 12.75, "learning_rate": 4.362978404007075e-05, "loss": 2.428, "step": 2572500 }, { "epoch": 12.75, "learning_rate": 4.3628545453644664e-05, "loss": 2.4121, "step": 2573000 }, { "epoch": 12.75, "learning_rate": 4.362730686721858e-05, "loss": 2.4407, "step": 2573500 }, { "epoch": 12.75, "learning_rate": 4.36260682807925e-05, "loss": 2.4202, "step": 2574000 }, { "epoch": 12.75, "learning_rate": 4.3624829694366414e-05, "loss": 2.3971, "step": 2574500 }, { "epoch": 12.76, "learning_rate": 4.362359358511318e-05, "loss": 2.4459, "step": 2575000 }, { "epoch": 12.76, "learning_rate": 4.36223549986871e-05, "loss": 2.4038, "step": 2575500 }, { "epoch": 12.76, "learning_rate": 4.362111641226102e-05, "loss": 2.4283, "step": 2576000 }, { "epoch": 12.76, "learning_rate": 4.3619877825834934e-05, "loss": 2.4301, "step": 2576500 }, { "epoch": 12.77, "learning_rate": 4.361863923940885e-05, "loss": 2.4261, "step": 2577000 }, { "epoch": 12.77, "learning_rate": 4.361740313015562e-05, "loss": 2.4179, "step": 2577500 }, { "epoch": 12.77, "learning_rate": 4.361616702090239e-05, "loss": 2.4112, "step": 2578000 }, { "epoch": 12.77, "learning_rate": 4.3614928434476305e-05, "loss": 2.4111, "step": 2578500 }, { "epoch": 12.78, "learning_rate": 4.361368984805022e-05, "loss": 2.4339, "step": 2579000 }, { "epoch": 12.78, "learning_rate": 4.361245126162414e-05, "loss": 2.4154, "step": 2579500 }, { "epoch": 12.78, "learning_rate": 4.3611212675198056e-05, "loss": 2.4092, "step": 2580000 }, { "epoch": 12.78, "learning_rate": 4.360997408877197e-05, "loss": 2.4017, "step": 2580500 }, { "epoch": 12.79, "learning_rate": 4.3608737979518735e-05, "loss": 2.3993, "step": 2581000 }, { "epoch": 12.79, "learning_rate": 4.360749939309265e-05, "loss": 2.4092, "step": 2581500 }, { "epoch": 12.79, "learning_rate": 4.360626080666657e-05, "loss": 2.4148, "step": 2582000 }, { "epoch": 12.79, "learning_rate": 4.3605022220240486e-05, "loss": 2.4223, "step": 2582500 }, { "epoch": 12.8, "learning_rate": 4.36037836338144e-05, "loss": 2.4065, "step": 2583000 }, { "epoch": 12.8, "learning_rate": 4.360254504738832e-05, "loss": 2.4275, "step": 2583500 }, { "epoch": 12.8, "learning_rate": 4.360130646096223e-05, "loss": 2.3997, "step": 2584000 }, { "epoch": 12.8, "learning_rate": 4.360006787453615e-05, "loss": 2.4098, "step": 2584500 }, { "epoch": 12.81, "learning_rate": 4.3598829288110064e-05, "loss": 2.4142, "step": 2585000 }, { "epoch": 12.81, "learning_rate": 4.359759070168398e-05, "loss": 2.4112, "step": 2585500 }, { "epoch": 12.81, "learning_rate": 4.3596354592430756e-05, "loss": 2.4065, "step": 2586000 }, { "epoch": 12.81, "learning_rate": 4.359511600600467e-05, "loss": 2.4327, "step": 2586500 }, { "epoch": 12.82, "learning_rate": 4.3593879896751435e-05, "loss": 2.4185, "step": 2587000 }, { "epoch": 12.82, "learning_rate": 4.359264131032535e-05, "loss": 2.3929, "step": 2587500 }, { "epoch": 12.82, "learning_rate": 4.359140272389927e-05, "loss": 2.4129, "step": 2588000 }, { "epoch": 12.82, "learning_rate": 4.3590164137473186e-05, "loss": 2.4261, "step": 2588500 }, { "epoch": 12.83, "learning_rate": 4.35889255510471e-05, "loss": 2.4036, "step": 2589000 }, { "epoch": 12.83, "learning_rate": 4.358768696462102e-05, "loss": 2.4252, "step": 2589500 }, { "epoch": 12.83, "learning_rate": 4.358644837819494e-05, "loss": 2.4257, "step": 2590000 }, { "epoch": 12.83, "learning_rate": 4.358520979176885e-05, "loss": 2.3993, "step": 2590500 }, { "epoch": 12.84, "learning_rate": 4.3583971205342764e-05, "loss": 2.4082, "step": 2591000 }, { "epoch": 12.84, "learning_rate": 4.358273261891668e-05, "loss": 2.417, "step": 2591500 }, { "epoch": 12.84, "learning_rate": 4.35814940324906e-05, "loss": 2.4277, "step": 2592000 }, { "epoch": 12.84, "learning_rate": 4.3580257923237373e-05, "loss": 2.4523, "step": 2592500 }, { "epoch": 12.85, "learning_rate": 4.357901933681129e-05, "loss": 2.4193, "step": 2593000 }, { "epoch": 12.85, "learning_rate": 4.35777807503852e-05, "loss": 2.403, "step": 2593500 }, { "epoch": 12.85, "learning_rate": 4.357654216395912e-05, "loss": 2.4337, "step": 2594000 }, { "epoch": 12.85, "learning_rate": 4.3575303577533034e-05, "loss": 2.4078, "step": 2594500 }, { "epoch": 12.86, "learning_rate": 4.35740674682798e-05, "loss": 2.4119, "step": 2595000 }, { "epoch": 12.86, "learning_rate": 4.357282888185372e-05, "loss": 2.4088, "step": 2595500 }, { "epoch": 12.86, "learning_rate": 4.357159029542764e-05, "loss": 2.4246, "step": 2596000 }, { "epoch": 12.86, "learning_rate": 4.357035170900155e-05, "loss": 2.4001, "step": 2596500 }, { "epoch": 12.87, "learning_rate": 4.3569113122575464e-05, "loss": 2.4112, "step": 2597000 }, { "epoch": 12.87, "learning_rate": 4.356787701332224e-05, "loss": 2.4181, "step": 2597500 }, { "epoch": 12.87, "learning_rate": 4.356664090406901e-05, "loss": 2.43, "step": 2598000 }, { "epoch": 12.87, "learning_rate": 4.356540479481577e-05, "loss": 2.4409, "step": 2598500 }, { "epoch": 12.88, "learning_rate": 4.3564168685562546e-05, "loss": 2.4328, "step": 2599000 }, { "epoch": 12.88, "learning_rate": 4.356293009913646e-05, "loss": 2.4351, "step": 2599500 }, { "epoch": 12.88, "learning_rate": 4.356169151271038e-05, "loss": 2.4067, "step": 2600000 }, { "epoch": 12.88, "learning_rate": 4.356045292628429e-05, "loss": 2.4088, "step": 2600500 }, { "epoch": 12.89, "learning_rate": 4.355921433985821e-05, "loss": 2.4367, "step": 2601000 }, { "epoch": 12.89, "learning_rate": 4.3557975753432124e-05, "loss": 2.4095, "step": 2601500 }, { "epoch": 12.89, "learning_rate": 4.355673716700604e-05, "loss": 2.4283, "step": 2602000 }, { "epoch": 12.89, "learning_rate": 4.355549858057996e-05, "loss": 2.4218, "step": 2602500 }, { "epoch": 12.9, "learning_rate": 4.3554259994153875e-05, "loss": 2.4259, "step": 2603000 }, { "epoch": 12.9, "learning_rate": 4.355302140772779e-05, "loss": 2.3895, "step": 2603500 }, { "epoch": 12.9, "learning_rate": 4.355178282130171e-05, "loss": 2.416, "step": 2604000 }, { "epoch": 12.9, "learning_rate": 4.3550544234875626e-05, "loss": 2.3923, "step": 2604500 }, { "epoch": 12.91, "learning_rate": 4.354930812562239e-05, "loss": 2.3966, "step": 2605000 }, { "epoch": 12.91, "learning_rate": 4.3548069539196305e-05, "loss": 2.4156, "step": 2605500 }, { "epoch": 12.91, "learning_rate": 4.354683095277022e-05, "loss": 2.4116, "step": 2606000 }, { "epoch": 12.91, "learning_rate": 4.354559236634414e-05, "loss": 2.4211, "step": 2606500 }, { "epoch": 12.92, "learning_rate": 4.3544353779918055e-05, "loss": 2.4306, "step": 2607000 }, { "epoch": 12.92, "learning_rate": 4.354311519349197e-05, "loss": 2.3948, "step": 2607500 }, { "epoch": 12.92, "learning_rate": 4.354187660706589e-05, "loss": 2.4124, "step": 2608000 }, { "epoch": 12.92, "learning_rate": 4.3540638020639806e-05, "loss": 2.4227, "step": 2608500 }, { "epoch": 12.93, "learning_rate": 4.353939943421372e-05, "loss": 2.4166, "step": 2609000 }, { "epoch": 12.93, "learning_rate": 4.353816084778764e-05, "loss": 2.4322, "step": 2609500 }, { "epoch": 12.93, "learning_rate": 4.353692226136156e-05, "loss": 2.4191, "step": 2610000 }, { "epoch": 12.93, "learning_rate": 4.3535686152108326e-05, "loss": 2.424, "step": 2610500 }, { "epoch": 12.94, "learning_rate": 4.353444756568224e-05, "loss": 2.4131, "step": 2611000 }, { "epoch": 12.94, "learning_rate": 4.353320897925616e-05, "loss": 2.422, "step": 2611500 }, { "epoch": 12.94, "learning_rate": 4.3531970392830076e-05, "loss": 2.3948, "step": 2612000 }, { "epoch": 12.94, "learning_rate": 4.3530731806403993e-05, "loss": 2.4332, "step": 2612500 }, { "epoch": 12.95, "learning_rate": 4.352949321997791e-05, "loss": 2.4331, "step": 2613000 }, { "epoch": 12.95, "learning_rate": 4.352825463355182e-05, "loss": 2.4107, "step": 2613500 }, { "epoch": 12.95, "learning_rate": 4.352701604712574e-05, "loss": 2.4368, "step": 2614000 }, { "epoch": 12.95, "learning_rate": 4.3525779937872506e-05, "loss": 2.4095, "step": 2614500 }, { "epoch": 12.96, "learning_rate": 4.352454135144642e-05, "loss": 2.3953, "step": 2615000 }, { "epoch": 12.96, "learning_rate": 4.352330276502034e-05, "loss": 2.411, "step": 2615500 }, { "epoch": 12.96, "learning_rate": 4.352206417859426e-05, "loss": 2.3921, "step": 2616000 }, { "epoch": 12.96, "learning_rate": 4.3520828069341026e-05, "loss": 2.4145, "step": 2616500 }, { "epoch": 12.97, "learning_rate": 4.351958948291494e-05, "loss": 2.4006, "step": 2617000 }, { "epoch": 12.97, "learning_rate": 4.351835089648886e-05, "loss": 2.4215, "step": 2617500 }, { "epoch": 12.97, "learning_rate": 4.3517112310062777e-05, "loss": 2.4077, "step": 2618000 }, { "epoch": 12.97, "learning_rate": 4.3515873723636694e-05, "loss": 2.4019, "step": 2618500 }, { "epoch": 12.98, "learning_rate": 4.351463513721061e-05, "loss": 2.4322, "step": 2619000 }, { "epoch": 12.98, "learning_rate": 4.351339655078453e-05, "loss": 2.4438, "step": 2619500 }, { "epoch": 12.98, "learning_rate": 4.3512157964358444e-05, "loss": 2.3985, "step": 2620000 }, { "epoch": 12.98, "learning_rate": 4.3510919377932354e-05, "loss": 2.4106, "step": 2620500 }, { "epoch": 12.99, "learning_rate": 4.350968326867912e-05, "loss": 2.4127, "step": 2621000 }, { "epoch": 12.99, "learning_rate": 4.350844468225304e-05, "loss": 2.3968, "step": 2621500 }, { "epoch": 12.99, "learning_rate": 4.350720609582696e-05, "loss": 2.4192, "step": 2622000 }, { "epoch": 12.99, "learning_rate": 4.3505967509400874e-05, "loss": 2.4349, "step": 2622500 }, { "epoch": 13.0, "learning_rate": 4.350472892297479e-05, "loss": 2.403, "step": 2623000 }, { "epoch": 13.0, "learning_rate": 4.350349281372156e-05, "loss": 2.4137, "step": 2623500 }, { "epoch": 13.0, "eval_accuracy": 0.6461218698848585, "eval_accuracy_mlm": 0.5996847093159505, "eval_accuracy_nsp": 0.8652920665675657, "eval_loss": 2.396927833557129, "eval_runtime": 146.13, "eval_samples_per_second": 1744.741, "eval_steps_per_second": 72.702, "step": 2623959 }, { "epoch": 13.0, "learning_rate": 4.350225422729548e-05, "loss": 2.3977, "step": 2624000 }, { "epoch": 13.0, "learning_rate": 4.3501015640869394e-05, "loss": 2.374, "step": 2624500 }, { "epoch": 13.01, "learning_rate": 4.3499779531616156e-05, "loss": 2.3743, "step": 2625000 }, { "epoch": 13.01, "learning_rate": 4.349854094519007e-05, "loss": 2.3774, "step": 2625500 }, { "epoch": 13.01, "learning_rate": 4.349730483593684e-05, "loss": 2.3954, "step": 2626000 }, { "epoch": 13.01, "learning_rate": 4.349606624951076e-05, "loss": 2.3685, "step": 2626500 }, { "epoch": 13.02, "learning_rate": 4.3494827663084675e-05, "loss": 2.3846, "step": 2627000 }, { "epoch": 13.02, "learning_rate": 4.349358907665859e-05, "loss": 2.4169, "step": 2627500 }, { "epoch": 13.02, "learning_rate": 4.349235049023251e-05, "loss": 2.3881, "step": 2628000 }, { "epoch": 13.02, "learning_rate": 4.3491111903806426e-05, "loss": 2.3866, "step": 2628500 }, { "epoch": 13.02, "learning_rate": 4.3489875794553195e-05, "loss": 2.3958, "step": 2629000 }, { "epoch": 13.03, "learning_rate": 4.3488639685299964e-05, "loss": 2.3686, "step": 2629500 }, { "epoch": 13.03, "learning_rate": 4.348740109887388e-05, "loss": 2.3922, "step": 2630000 }, { "epoch": 13.03, "learning_rate": 4.34861625124478e-05, "loss": 2.3662, "step": 2630500 }, { "epoch": 13.03, "learning_rate": 4.3484923926021714e-05, "loss": 2.3793, "step": 2631000 }, { "epoch": 13.04, "learning_rate": 4.3483685339595625e-05, "loss": 2.3712, "step": 2631500 }, { "epoch": 13.04, "learning_rate": 4.348244675316954e-05, "loss": 2.4067, "step": 2632000 }, { "epoch": 13.04, "learning_rate": 4.348120816674346e-05, "loss": 2.3861, "step": 2632500 }, { "epoch": 13.04, "learning_rate": 4.3479972057490234e-05, "loss": 2.4153, "step": 2633000 }, { "epoch": 13.05, "learning_rate": 4.347873347106415e-05, "loss": 2.3681, "step": 2633500 }, { "epoch": 13.05, "learning_rate": 4.347749488463807e-05, "loss": 2.3843, "step": 2634000 }, { "epoch": 13.05, "learning_rate": 4.347625629821198e-05, "loss": 2.3793, "step": 2634500 }, { "epoch": 13.05, "learning_rate": 4.3475017711785895e-05, "loss": 2.3923, "step": 2635000 }, { "epoch": 13.06, "learning_rate": 4.347377912535981e-05, "loss": 2.3799, "step": 2635500 }, { "epoch": 13.06, "learning_rate": 4.347254053893373e-05, "loss": 2.3595, "step": 2636000 }, { "epoch": 13.06, "learning_rate": 4.3471301952507646e-05, "loss": 2.4064, "step": 2636500 }, { "epoch": 13.06, "learning_rate": 4.3470063366081556e-05, "loss": 2.4083, "step": 2637000 }, { "epoch": 13.07, "learning_rate": 4.346882477965547e-05, "loss": 2.3916, "step": 2637500 }, { "epoch": 13.07, "learning_rate": 4.346758619322939e-05, "loss": 2.372, "step": 2638000 }, { "epoch": 13.07, "learning_rate": 4.346634760680331e-05, "loss": 2.3781, "step": 2638500 }, { "epoch": 13.07, "learning_rate": 4.3465109020377224e-05, "loss": 2.408, "step": 2639000 }, { "epoch": 13.08, "learning_rate": 4.346387043395114e-05, "loss": 2.4136, "step": 2639500 }, { "epoch": 13.08, "learning_rate": 4.346263432469791e-05, "loss": 2.3621, "step": 2640000 }, { "epoch": 13.08, "learning_rate": 4.3461398215444685e-05, "loss": 2.3862, "step": 2640500 }, { "epoch": 13.08, "learning_rate": 4.3460159629018595e-05, "loss": 2.3855, "step": 2641000 }, { "epoch": 13.09, "learning_rate": 4.345892104259251e-05, "loss": 2.4046, "step": 2641500 }, { "epoch": 13.09, "learning_rate": 4.345768245616643e-05, "loss": 2.3887, "step": 2642000 }, { "epoch": 13.09, "learning_rate": 4.3456443869740346e-05, "loss": 2.3702, "step": 2642500 }, { "epoch": 13.09, "learning_rate": 4.3455207760487115e-05, "loss": 2.4123, "step": 2643000 }, { "epoch": 13.1, "learning_rate": 4.345396917406103e-05, "loss": 2.3679, "step": 2643500 }, { "epoch": 13.1, "learning_rate": 4.345273058763494e-05, "loss": 2.3806, "step": 2644000 }, { "epoch": 13.1, "learning_rate": 4.345149200120886e-05, "loss": 2.4071, "step": 2644500 }, { "epoch": 13.1, "learning_rate": 4.3450253414782776e-05, "loss": 2.3682, "step": 2645000 }, { "epoch": 13.11, "learning_rate": 4.344901482835669e-05, "loss": 2.375, "step": 2645500 }, { "epoch": 13.11, "learning_rate": 4.344777624193061e-05, "loss": 2.3903, "step": 2646000 }, { "epoch": 13.11, "learning_rate": 4.3446537655504526e-05, "loss": 2.3944, "step": 2646500 }, { "epoch": 13.11, "learning_rate": 4.344529906907844e-05, "loss": 2.3925, "step": 2647000 }, { "epoch": 13.12, "learning_rate": 4.344406048265236e-05, "loss": 2.3984, "step": 2647500 }, { "epoch": 13.12, "learning_rate": 4.344282189622628e-05, "loss": 2.4028, "step": 2648000 }, { "epoch": 13.12, "learning_rate": 4.3441583309800194e-05, "loss": 2.3867, "step": 2648500 }, { "epoch": 13.12, "learning_rate": 4.344034472337411e-05, "loss": 2.4064, "step": 2649000 }, { "epoch": 13.13, "learning_rate": 4.343910613694803e-05, "loss": 2.3914, "step": 2649500 }, { "epoch": 13.13, "learning_rate": 4.34378700276948e-05, "loss": 2.4027, "step": 2650000 }, { "epoch": 13.13, "learning_rate": 4.343663391844156e-05, "loss": 2.4052, "step": 2650500 }, { "epoch": 13.13, "learning_rate": 4.3435395332015476e-05, "loss": 2.3882, "step": 2651000 }, { "epoch": 13.14, "learning_rate": 4.3434161699935096e-05, "loss": 2.3821, "step": 2651500 }, { "epoch": 13.14, "learning_rate": 4.3432923113509013e-05, "loss": 2.4076, "step": 2652000 }, { "epoch": 13.14, "learning_rate": 4.343168452708293e-05, "loss": 2.3935, "step": 2652500 }, { "epoch": 13.14, "learning_rate": 4.34304484178297e-05, "loss": 2.3977, "step": 2653000 }, { "epoch": 13.15, "learning_rate": 4.3429209831403616e-05, "loss": 2.3899, "step": 2653500 }, { "epoch": 13.15, "learning_rate": 4.342797124497753e-05, "loss": 2.3781, "step": 2654000 }, { "epoch": 13.15, "learning_rate": 4.342673265855145e-05, "loss": 2.3914, "step": 2654500 }, { "epoch": 13.15, "learning_rate": 4.342549407212537e-05, "loss": 2.3926, "step": 2655000 }, { "epoch": 13.16, "learning_rate": 4.3424255485699284e-05, "loss": 2.4267, "step": 2655500 }, { "epoch": 13.16, "learning_rate": 4.34230168992732e-05, "loss": 2.3881, "step": 2656000 }, { "epoch": 13.16, "learning_rate": 4.342177831284712e-05, "loss": 2.3844, "step": 2656500 }, { "epoch": 13.16, "learning_rate": 4.3420539726421035e-05, "loss": 2.4005, "step": 2657000 }, { "epoch": 13.17, "learning_rate": 4.341930113999495e-05, "loss": 2.3819, "step": 2657500 }, { "epoch": 13.17, "learning_rate": 4.341806255356887e-05, "loss": 2.395, "step": 2658000 }, { "epoch": 13.17, "learning_rate": 4.341682644431563e-05, "loss": 2.371, "step": 2658500 }, { "epoch": 13.17, "learning_rate": 4.341558785788955e-05, "loss": 2.3832, "step": 2659000 }, { "epoch": 13.18, "learning_rate": 4.3414349271463464e-05, "loss": 2.391, "step": 2659500 }, { "epoch": 13.18, "learning_rate": 4.341311068503738e-05, "loss": 2.3821, "step": 2660000 }, { "epoch": 13.18, "learning_rate": 4.34118720986113e-05, "loss": 2.3738, "step": 2660500 }, { "epoch": 13.18, "learning_rate": 4.3410633512185215e-05, "loss": 2.398, "step": 2661000 }, { "epoch": 13.19, "learning_rate": 4.340939492575913e-05, "loss": 2.395, "step": 2661500 }, { "epoch": 13.19, "learning_rate": 4.340815633933305e-05, "loss": 2.3963, "step": 2662000 }, { "epoch": 13.19, "learning_rate": 4.3406917752906966e-05, "loss": 2.3994, "step": 2662500 }, { "epoch": 13.19, "learning_rate": 4.3405679166480876e-05, "loss": 2.3915, "step": 2663000 }, { "epoch": 13.2, "learning_rate": 4.340444058005479e-05, "loss": 2.4018, "step": 2663500 }, { "epoch": 13.2, "learning_rate": 4.340320199362871e-05, "loss": 2.3685, "step": 2664000 }, { "epoch": 13.2, "learning_rate": 4.340196340720263e-05, "loss": 2.4209, "step": 2664500 }, { "epoch": 13.2, "learning_rate": 4.3400724820776544e-05, "loss": 2.406, "step": 2665000 }, { "epoch": 13.21, "learning_rate": 4.339948623435046e-05, "loss": 2.3968, "step": 2665500 }, { "epoch": 13.21, "learning_rate": 4.339824764792438e-05, "loss": 2.3927, "step": 2666000 }, { "epoch": 13.21, "learning_rate": 4.3397011538671146e-05, "loss": 2.3701, "step": 2666500 }, { "epoch": 13.21, "learning_rate": 4.339577295224506e-05, "loss": 2.364, "step": 2667000 }, { "epoch": 13.22, "learning_rate": 4.339453436581898e-05, "loss": 2.3787, "step": 2667500 }, { "epoch": 13.22, "learning_rate": 4.33932957793929e-05, "loss": 2.3985, "step": 2668000 }, { "epoch": 13.22, "learning_rate": 4.3392057192966814e-05, "loss": 2.379, "step": 2668500 }, { "epoch": 13.22, "learning_rate": 4.339081860654073e-05, "loss": 2.3979, "step": 2669000 }, { "epoch": 13.23, "learning_rate": 4.338958002011465e-05, "loss": 2.3876, "step": 2669500 }, { "epoch": 13.23, "learning_rate": 4.3388341433688565e-05, "loss": 2.4065, "step": 2670000 }, { "epoch": 13.23, "learning_rate": 4.338710284726248e-05, "loss": 2.3983, "step": 2670500 }, { "epoch": 13.23, "learning_rate": 4.338586426083639e-05, "loss": 2.4001, "step": 2671000 }, { "epoch": 13.24, "learning_rate": 4.338462567441031e-05, "loss": 2.4228, "step": 2671500 }, { "epoch": 13.24, "learning_rate": 4.338338956515708e-05, "loss": 2.3995, "step": 2672000 }, { "epoch": 13.24, "learning_rate": 4.3382153455903847e-05, "loss": 2.3867, "step": 2672500 }, { "epoch": 13.24, "learning_rate": 4.3380914869477763e-05, "loss": 2.3989, "step": 2673000 }, { "epoch": 13.25, "learning_rate": 4.337967628305168e-05, "loss": 2.4051, "step": 2673500 }, { "epoch": 13.25, "learning_rate": 4.33784376966256e-05, "loss": 2.4195, "step": 2674000 }, { "epoch": 13.25, "learning_rate": 4.3377199110199514e-05, "loss": 2.4004, "step": 2674500 }, { "epoch": 13.25, "learning_rate": 4.337596052377343e-05, "loss": 2.3939, "step": 2675000 }, { "epoch": 13.26, "learning_rate": 4.337472193734735e-05, "loss": 2.3849, "step": 2675500 }, { "epoch": 13.26, "learning_rate": 4.3373483350921265e-05, "loss": 2.3963, "step": 2676000 }, { "epoch": 13.26, "learning_rate": 4.337224476449518e-05, "loss": 2.4051, "step": 2676500 }, { "epoch": 13.26, "learning_rate": 4.3371008655241944e-05, "loss": 2.3968, "step": 2677000 }, { "epoch": 13.27, "learning_rate": 4.336977006881586e-05, "loss": 2.4036, "step": 2677500 }, { "epoch": 13.27, "learning_rate": 4.336853148238978e-05, "loss": 2.4113, "step": 2678000 }, { "epoch": 13.27, "learning_rate": 4.3367292895963695e-05, "loss": 2.4127, "step": 2678500 }, { "epoch": 13.27, "learning_rate": 4.336605430953761e-05, "loss": 2.3876, "step": 2679000 }, { "epoch": 13.28, "learning_rate": 4.336481572311153e-05, "loss": 2.4134, "step": 2679500 }, { "epoch": 13.28, "learning_rate": 4.33635796138583e-05, "loss": 2.4021, "step": 2680000 }, { "epoch": 13.28, "learning_rate": 4.3362343504605066e-05, "loss": 2.401, "step": 2680500 }, { "epoch": 13.28, "learning_rate": 4.336110491817898e-05, "loss": 2.3873, "step": 2681000 }, { "epoch": 13.29, "learning_rate": 4.335986633175289e-05, "loss": 2.394, "step": 2681500 }, { "epoch": 13.29, "learning_rate": 4.335863022249967e-05, "loss": 2.4049, "step": 2682000 }, { "epoch": 13.29, "learning_rate": 4.3357391636073586e-05, "loss": 2.3955, "step": 2682500 }, { "epoch": 13.29, "learning_rate": 4.33561530496475e-05, "loss": 2.4062, "step": 2683000 }, { "epoch": 13.29, "learning_rate": 4.335491446322142e-05, "loss": 2.427, "step": 2683500 }, { "epoch": 13.3, "learning_rate": 4.335367587679534e-05, "loss": 2.4073, "step": 2684000 }, { "epoch": 13.3, "learning_rate": 4.3352437290369254e-05, "loss": 2.4097, "step": 2684500 }, { "epoch": 13.3, "learning_rate": 4.3351198703943164e-05, "loss": 2.3962, "step": 2685000 }, { "epoch": 13.3, "learning_rate": 4.334996011751708e-05, "loss": 2.3851, "step": 2685500 }, { "epoch": 13.31, "learning_rate": 4.3348721531091e-05, "loss": 2.4041, "step": 2686000 }, { "epoch": 13.31, "learning_rate": 4.3347482944664914e-05, "loss": 2.3944, "step": 2686500 }, { "epoch": 13.31, "learning_rate": 4.334624435823883e-05, "loss": 2.4004, "step": 2687000 }, { "epoch": 13.31, "learning_rate": 4.334501072615845e-05, "loss": 2.3767, "step": 2687500 }, { "epoch": 13.32, "learning_rate": 4.334377213973237e-05, "loss": 2.3833, "step": 2688000 }, { "epoch": 13.32, "learning_rate": 4.3342533553306286e-05, "loss": 2.3789, "step": 2688500 }, { "epoch": 13.32, "learning_rate": 4.3341297444053055e-05, "loss": 2.4131, "step": 2689000 }, { "epoch": 13.32, "learning_rate": 4.334005885762697e-05, "loss": 2.404, "step": 2689500 }, { "epoch": 13.33, "learning_rate": 4.333882027120089e-05, "loss": 2.4251, "step": 2690000 }, { "epoch": 13.33, "learning_rate": 4.3337581684774806e-05, "loss": 2.4047, "step": 2690500 }, { "epoch": 13.33, "learning_rate": 4.333634309834872e-05, "loss": 2.384, "step": 2691000 }, { "epoch": 13.33, "learning_rate": 4.333510451192264e-05, "loss": 2.4168, "step": 2691500 }, { "epoch": 13.34, "learning_rate": 4.333386592549655e-05, "loss": 2.3786, "step": 2692000 }, { "epoch": 13.34, "learning_rate": 4.3332627339070467e-05, "loss": 2.3896, "step": 2692500 }, { "epoch": 13.34, "learning_rate": 4.3331388752644383e-05, "loss": 2.3878, "step": 2693000 }, { "epoch": 13.34, "learning_rate": 4.33301501662183e-05, "loss": 2.3996, "step": 2693500 }, { "epoch": 13.35, "learning_rate": 4.332891157979222e-05, "loss": 2.4324, "step": 2694000 }, { "epoch": 13.35, "learning_rate": 4.3327675470538986e-05, "loss": 2.3555, "step": 2694500 }, { "epoch": 13.35, "learning_rate": 4.33264368841129e-05, "loss": 2.4025, "step": 2695000 }, { "epoch": 13.35, "learning_rate": 4.332520077485967e-05, "loss": 2.4122, "step": 2695500 }, { "epoch": 13.36, "learning_rate": 4.332396218843359e-05, "loss": 2.3975, "step": 2696000 }, { "epoch": 13.36, "learning_rate": 4.3322723602007506e-05, "loss": 2.3986, "step": 2696500 }, { "epoch": 13.36, "learning_rate": 4.332148501558142e-05, "loss": 2.378, "step": 2697000 }, { "epoch": 13.36, "learning_rate": 4.332024642915534e-05, "loss": 2.4197, "step": 2697500 }, { "epoch": 13.37, "learning_rate": 4.3319007842729256e-05, "loss": 2.4088, "step": 2698000 }, { "epoch": 13.37, "learning_rate": 4.3317769256303173e-05, "loss": 2.427, "step": 2698500 }, { "epoch": 13.37, "learning_rate": 4.3316530669877084e-05, "loss": 2.4063, "step": 2699000 }, { "epoch": 13.37, "learning_rate": 4.3315292083451e-05, "loss": 2.4056, "step": 2699500 }, { "epoch": 13.38, "learning_rate": 4.331405349702492e-05, "loss": 2.3971, "step": 2700000 }, { "epoch": 13.38, "learning_rate": 4.3312814910598834e-05, "loss": 2.4082, "step": 2700500 }, { "epoch": 13.38, "learning_rate": 4.331157632417275e-05, "loss": 2.401, "step": 2701000 }, { "epoch": 13.38, "learning_rate": 4.331033773774666e-05, "loss": 2.3885, "step": 2701500 }, { "epoch": 13.39, "learning_rate": 4.330909915132058e-05, "loss": 2.3773, "step": 2702000 }, { "epoch": 13.39, "learning_rate": 4.3307860564894495e-05, "loss": 2.441, "step": 2702500 }, { "epoch": 13.39, "learning_rate": 4.330662197846841e-05, "loss": 2.4094, "step": 2703000 }, { "epoch": 13.39, "learning_rate": 4.330538339204233e-05, "loss": 2.4056, "step": 2703500 }, { "epoch": 13.4, "learning_rate": 4.3304144805616246e-05, "loss": 2.4129, "step": 2704000 }, { "epoch": 13.4, "learning_rate": 4.3302908696363015e-05, "loss": 2.3796, "step": 2704500 }, { "epoch": 13.4, "learning_rate": 4.330167010993693e-05, "loss": 2.3743, "step": 2705000 }, { "epoch": 13.4, "learning_rate": 4.330043152351085e-05, "loss": 2.4091, "step": 2705500 }, { "epoch": 13.41, "learning_rate": 4.329919789143047e-05, "loss": 2.3977, "step": 2706000 }, { "epoch": 13.41, "learning_rate": 4.3297959305004386e-05, "loss": 2.3935, "step": 2706500 }, { "epoch": 13.41, "learning_rate": 4.32967207185783e-05, "loss": 2.4121, "step": 2707000 }, { "epoch": 13.41, "learning_rate": 4.329548213215222e-05, "loss": 2.4118, "step": 2707500 }, { "epoch": 13.42, "learning_rate": 4.329424354572614e-05, "loss": 2.4029, "step": 2708000 }, { "epoch": 13.42, "learning_rate": 4.3293007436472906e-05, "loss": 2.3758, "step": 2708500 }, { "epoch": 13.42, "learning_rate": 4.329176885004682e-05, "loss": 2.3919, "step": 2709000 }, { "epoch": 13.42, "learning_rate": 4.329053026362074e-05, "loss": 2.408, "step": 2709500 }, { "epoch": 13.43, "learning_rate": 4.328929167719466e-05, "loss": 2.3962, "step": 2710000 }, { "epoch": 13.43, "learning_rate": 4.3288053090768574e-05, "loss": 2.3936, "step": 2710500 }, { "epoch": 13.43, "learning_rate": 4.328681450434249e-05, "loss": 2.402, "step": 2711000 }, { "epoch": 13.43, "learning_rate": 4.328557839508925e-05, "loss": 2.3929, "step": 2711500 }, { "epoch": 13.44, "learning_rate": 4.328433980866317e-05, "loss": 2.3941, "step": 2712000 }, { "epoch": 13.44, "learning_rate": 4.3283101222237086e-05, "loss": 2.396, "step": 2712500 }, { "epoch": 13.44, "learning_rate": 4.3281862635811003e-05, "loss": 2.4242, "step": 2713000 }, { "epoch": 13.44, "learning_rate": 4.328062404938492e-05, "loss": 2.4014, "step": 2713500 }, { "epoch": 13.45, "learning_rate": 4.327938546295884e-05, "loss": 2.3839, "step": 2714000 }, { "epoch": 13.45, "learning_rate": 4.3278146876532754e-05, "loss": 2.4211, "step": 2714500 }, { "epoch": 13.45, "learning_rate": 4.327691076727952e-05, "loss": 2.3848, "step": 2715000 }, { "epoch": 13.45, "learning_rate": 4.327567218085344e-05, "loss": 2.4099, "step": 2715500 }, { "epoch": 13.46, "learning_rate": 4.327443359442736e-05, "loss": 2.4064, "step": 2716000 }, { "epoch": 13.46, "learning_rate": 4.3273195008001274e-05, "loss": 2.4105, "step": 2716500 }, { "epoch": 13.46, "learning_rate": 4.327195642157519e-05, "loss": 2.4022, "step": 2717000 }, { "epoch": 13.46, "learning_rate": 4.327071783514911e-05, "loss": 2.3729, "step": 2717500 }, { "epoch": 13.47, "learning_rate": 4.3269479248723025e-05, "loss": 2.4031, "step": 2718000 }, { "epoch": 13.47, "learning_rate": 4.326824066229694e-05, "loss": 2.3839, "step": 2718500 }, { "epoch": 13.47, "learning_rate": 4.3267004553043704e-05, "loss": 2.4294, "step": 2719000 }, { "epoch": 13.47, "learning_rate": 4.326576596661762e-05, "loss": 2.4108, "step": 2719500 }, { "epoch": 13.48, "learning_rate": 4.326452738019154e-05, "loss": 2.4364, "step": 2720000 }, { "epoch": 13.48, "learning_rate": 4.3263288793765454e-05, "loss": 2.4068, "step": 2720500 }, { "epoch": 13.48, "learning_rate": 4.326205020733937e-05, "loss": 2.4131, "step": 2721000 }, { "epoch": 13.48, "learning_rate": 4.326081162091329e-05, "loss": 2.3866, "step": 2721500 }, { "epoch": 13.49, "learning_rate": 4.325957551166006e-05, "loss": 2.388, "step": 2722000 }, { "epoch": 13.49, "learning_rate": 4.3258336925233974e-05, "loss": 2.3977, "step": 2722500 }, { "epoch": 13.49, "learning_rate": 4.3257100815980736e-05, "loss": 2.4153, "step": 2723000 }, { "epoch": 13.49, "learning_rate": 4.325586222955465e-05, "loss": 2.3835, "step": 2723500 }, { "epoch": 13.5, "learning_rate": 4.325462364312857e-05, "loss": 2.4049, "step": 2724000 }, { "epoch": 13.5, "learning_rate": 4.325338505670249e-05, "loss": 2.4043, "step": 2724500 }, { "epoch": 13.5, "learning_rate": 4.3252146470276404e-05, "loss": 2.4012, "step": 2725000 }, { "epoch": 13.5, "learning_rate": 4.325090788385032e-05, "loss": 2.3883, "step": 2725500 }, { "epoch": 13.51, "learning_rate": 4.324966929742424e-05, "loss": 2.4236, "step": 2726000 }, { "epoch": 13.51, "learning_rate": 4.3248430710998154e-05, "loss": 2.3839, "step": 2726500 }, { "epoch": 13.51, "learning_rate": 4.324719212457207e-05, "loss": 2.4157, "step": 2727000 }, { "epoch": 13.51, "learning_rate": 4.324595353814599e-05, "loss": 2.3927, "step": 2727500 }, { "epoch": 13.52, "learning_rate": 4.324471742889276e-05, "loss": 2.388, "step": 2728000 }, { "epoch": 13.52, "learning_rate": 4.3243481319639526e-05, "loss": 2.3996, "step": 2728500 }, { "epoch": 13.52, "learning_rate": 4.3242242733213436e-05, "loss": 2.4144, "step": 2729000 }, { "epoch": 13.52, "learning_rate": 4.324100414678735e-05, "loss": 2.4069, "step": 2729500 }, { "epoch": 13.53, "learning_rate": 4.323976556036127e-05, "loss": 2.4112, "step": 2730000 }, { "epoch": 13.53, "learning_rate": 4.323852697393519e-05, "loss": 2.4076, "step": 2730500 }, { "epoch": 13.53, "learning_rate": 4.3237288387509104e-05, "loss": 2.4015, "step": 2731000 }, { "epoch": 13.53, "learning_rate": 4.323604980108302e-05, "loss": 2.4073, "step": 2731500 }, { "epoch": 13.54, "learning_rate": 4.323481369182979e-05, "loss": 2.428, "step": 2732000 }, { "epoch": 13.54, "learning_rate": 4.3233575105403706e-05, "loss": 2.3869, "step": 2732500 }, { "epoch": 13.54, "learning_rate": 4.323233651897762e-05, "loss": 2.411, "step": 2733000 }, { "epoch": 13.54, "learning_rate": 4.323109793255154e-05, "loss": 2.3869, "step": 2733500 }, { "epoch": 13.55, "learning_rate": 4.322985934612546e-05, "loss": 2.4143, "step": 2734000 }, { "epoch": 13.55, "learning_rate": 4.3228620759699374e-05, "loss": 2.4027, "step": 2734500 }, { "epoch": 13.55, "learning_rate": 4.322738217327329e-05, "loss": 2.3911, "step": 2735000 }, { "epoch": 13.55, "learning_rate": 4.322614606402006e-05, "loss": 2.4308, "step": 2735500 }, { "epoch": 13.56, "learning_rate": 4.322490747759397e-05, "loss": 2.4117, "step": 2736000 }, { "epoch": 13.56, "learning_rate": 4.322366889116789e-05, "loss": 2.4093, "step": 2736500 }, { "epoch": 13.56, "learning_rate": 4.3222430304741804e-05, "loss": 2.372, "step": 2737000 }, { "epoch": 13.56, "learning_rate": 4.322119171831572e-05, "loss": 2.3907, "step": 2737500 }, { "epoch": 13.56, "learning_rate": 4.321995313188964e-05, "loss": 2.3867, "step": 2738000 }, { "epoch": 13.57, "learning_rate": 4.3218714545463555e-05, "loss": 2.3667, "step": 2738500 }, { "epoch": 13.57, "learning_rate": 4.3217478436210323e-05, "loss": 2.3809, "step": 2739000 }, { "epoch": 13.57, "learning_rate": 4.321623984978424e-05, "loss": 2.4062, "step": 2739500 }, { "epoch": 13.57, "learning_rate": 4.321500126335816e-05, "loss": 2.396, "step": 2740000 }, { "epoch": 13.58, "learning_rate": 4.3213762676932074e-05, "loss": 2.4295, "step": 2740500 }, { "epoch": 13.58, "learning_rate": 4.321252409050599e-05, "loss": 2.4223, "step": 2741000 }, { "epoch": 13.58, "learning_rate": 4.321128550407991e-05, "loss": 2.4312, "step": 2741500 }, { "epoch": 13.58, "learning_rate": 4.3210046917653825e-05, "loss": 2.408, "step": 2742000 }, { "epoch": 13.59, "learning_rate": 4.320880833122774e-05, "loss": 2.391, "step": 2742500 }, { "epoch": 13.59, "learning_rate": 4.320756974480166e-05, "loss": 2.4148, "step": 2743000 }, { "epoch": 13.59, "learning_rate": 4.320633363554842e-05, "loss": 2.3847, "step": 2743500 }, { "epoch": 13.59, "learning_rate": 4.320509504912234e-05, "loss": 2.4125, "step": 2744000 }, { "epoch": 13.6, "learning_rate": 4.3203856462696255e-05, "loss": 2.4068, "step": 2744500 }, { "epoch": 13.6, "learning_rate": 4.320261787627017e-05, "loss": 2.3831, "step": 2745000 }, { "epoch": 13.6, "learning_rate": 4.320137928984409e-05, "loss": 2.4067, "step": 2745500 }, { "epoch": 13.6, "learning_rate": 4.3200140703418006e-05, "loss": 2.3968, "step": 2746000 }, { "epoch": 13.61, "learning_rate": 4.319890211699192e-05, "loss": 2.4172, "step": 2746500 }, { "epoch": 13.61, "learning_rate": 4.319766353056584e-05, "loss": 2.4011, "step": 2747000 }, { "epoch": 13.61, "learning_rate": 4.319642494413975e-05, "loss": 2.4083, "step": 2747500 }, { "epoch": 13.61, "learning_rate": 4.3195188834886525e-05, "loss": 2.4248, "step": 2748000 }, { "epoch": 13.62, "learning_rate": 4.319395024846044e-05, "loss": 2.3911, "step": 2748500 }, { "epoch": 13.62, "learning_rate": 4.319271166203436e-05, "loss": 2.3895, "step": 2749000 }, { "epoch": 13.62, "learning_rate": 4.3191473075608276e-05, "loss": 2.4364, "step": 2749500 }, { "epoch": 13.62, "learning_rate": 4.319023448918219e-05, "loss": 2.4193, "step": 2750000 }, { "epoch": 13.63, "learning_rate": 4.3188998379928955e-05, "loss": 2.4033, "step": 2750500 }, { "epoch": 13.63, "learning_rate": 4.318775979350287e-05, "loss": 2.4104, "step": 2751000 }, { "epoch": 13.63, "learning_rate": 4.318652120707679e-05, "loss": 2.4298, "step": 2751500 }, { "epoch": 13.63, "learning_rate": 4.3185282620650706e-05, "loss": 2.3979, "step": 2752000 }, { "epoch": 13.64, "learning_rate": 4.318404403422462e-05, "loss": 2.39, "step": 2752500 }, { "epoch": 13.64, "learning_rate": 4.318280792497139e-05, "loss": 2.4094, "step": 2753000 }, { "epoch": 13.64, "learning_rate": 4.318156933854531e-05, "loss": 2.4306, "step": 2753500 }, { "epoch": 13.64, "learning_rate": 4.3180330752119225e-05, "loss": 2.4091, "step": 2754000 }, { "epoch": 13.65, "learning_rate": 4.317909216569314e-05, "loss": 2.4295, "step": 2754500 }, { "epoch": 13.65, "learning_rate": 4.317785357926706e-05, "loss": 2.4031, "step": 2755000 }, { "epoch": 13.65, "learning_rate": 4.317661747001383e-05, "loss": 2.4377, "step": 2755500 }, { "epoch": 13.65, "learning_rate": 4.3175378883587745e-05, "loss": 2.3773, "step": 2756000 }, { "epoch": 13.66, "learning_rate": 4.3174140297161655e-05, "loss": 2.4163, "step": 2756500 }, { "epoch": 13.66, "learning_rate": 4.317290171073557e-05, "loss": 2.4139, "step": 2757000 }, { "epoch": 13.66, "learning_rate": 4.317166312430949e-05, "loss": 2.4116, "step": 2757500 }, { "epoch": 13.66, "learning_rate": 4.3170424537883406e-05, "loss": 2.391, "step": 2758000 }, { "epoch": 13.67, "learning_rate": 4.316918595145732e-05, "loss": 2.4302, "step": 2758500 }, { "epoch": 13.67, "learning_rate": 4.316794984220409e-05, "loss": 2.4052, "step": 2759000 }, { "epoch": 13.67, "learning_rate": 4.316671373295086e-05, "loss": 2.4154, "step": 2759500 }, { "epoch": 13.67, "learning_rate": 4.316547514652478e-05, "loss": 2.3856, "step": 2760000 }, { "epoch": 13.68, "learning_rate": 4.3164236560098694e-05, "loss": 2.4001, "step": 2760500 }, { "epoch": 13.68, "learning_rate": 4.316299797367261e-05, "loss": 2.4079, "step": 2761000 }, { "epoch": 13.68, "learning_rate": 4.316175938724653e-05, "loss": 2.3876, "step": 2761500 }, { "epoch": 13.68, "learning_rate": 4.3160520800820445e-05, "loss": 2.4033, "step": 2762000 }, { "epoch": 13.69, "learning_rate": 4.315928221439436e-05, "loss": 2.4024, "step": 2762500 }, { "epoch": 13.69, "learning_rate": 4.3158046105141124e-05, "loss": 2.3802, "step": 2763000 }, { "epoch": 13.69, "learning_rate": 4.315680751871504e-05, "loss": 2.4104, "step": 2763500 }, { "epoch": 13.69, "learning_rate": 4.315556893228896e-05, "loss": 2.3841, "step": 2764000 }, { "epoch": 13.7, "learning_rate": 4.315433282303573e-05, "loss": 2.3901, "step": 2764500 }, { "epoch": 13.7, "learning_rate": 4.3153094236609644e-05, "loss": 2.4044, "step": 2765000 }, { "epoch": 13.7, "learning_rate": 4.315185565018356e-05, "loss": 2.4209, "step": 2765500 }, { "epoch": 13.7, "learning_rate": 4.315061706375748e-05, "loss": 2.3937, "step": 2766000 }, { "epoch": 13.71, "learning_rate": 4.3149380954504246e-05, "loss": 2.41, "step": 2766500 }, { "epoch": 13.71, "learning_rate": 4.3148142368078156e-05, "loss": 2.4149, "step": 2767000 }, { "epoch": 13.71, "learning_rate": 4.314690378165207e-05, "loss": 2.4024, "step": 2767500 }, { "epoch": 13.71, "learning_rate": 4.314566519522599e-05, "loss": 2.4355, "step": 2768000 }, { "epoch": 13.72, "learning_rate": 4.314442660879991e-05, "loss": 2.4085, "step": 2768500 }, { "epoch": 13.72, "learning_rate": 4.3143188022373824e-05, "loss": 2.3709, "step": 2769000 }, { "epoch": 13.72, "learning_rate": 4.314194943594774e-05, "loss": 2.397, "step": 2769500 }, { "epoch": 13.72, "learning_rate": 4.314071084952166e-05, "loss": 2.3901, "step": 2770000 }, { "epoch": 13.73, "learning_rate": 4.3139472263095575e-05, "loss": 2.4026, "step": 2770500 }, { "epoch": 13.73, "learning_rate": 4.313823367666949e-05, "loss": 2.4013, "step": 2771000 }, { "epoch": 13.73, "learning_rate": 4.313699756741626e-05, "loss": 2.3871, "step": 2771500 }, { "epoch": 13.73, "learning_rate": 4.313575898099018e-05, "loss": 2.4078, "step": 2772000 }, { "epoch": 13.74, "learning_rate": 4.3134520394564094e-05, "loss": 2.4151, "step": 2772500 }, { "epoch": 13.74, "learning_rate": 4.313328180813801e-05, "loss": 2.4024, "step": 2773000 }, { "epoch": 13.74, "learning_rate": 4.313204322171193e-05, "loss": 2.4182, "step": 2773500 }, { "epoch": 13.74, "learning_rate": 4.3130804635285845e-05, "loss": 2.3841, "step": 2774000 }, { "epoch": 13.75, "learning_rate": 4.312956604885976e-05, "loss": 2.3987, "step": 2774500 }, { "epoch": 13.75, "learning_rate": 4.312832746243368e-05, "loss": 2.3907, "step": 2775000 }, { "epoch": 13.75, "learning_rate": 4.3127088876007596e-05, "loss": 2.4168, "step": 2775500 }, { "epoch": 13.75, "learning_rate": 4.312585028958151e-05, "loss": 2.4074, "step": 2776000 }, { "epoch": 13.76, "learning_rate": 4.312461170315542e-05, "loss": 2.3894, "step": 2776500 }, { "epoch": 13.76, "learning_rate": 4.312337311672934e-05, "loss": 2.3936, "step": 2777000 }, { "epoch": 13.76, "learning_rate": 4.312213453030326e-05, "loss": 2.3887, "step": 2777500 }, { "epoch": 13.76, "learning_rate": 4.3120895943877174e-05, "loss": 2.4014, "step": 2778000 }, { "epoch": 13.77, "learning_rate": 4.311965735745109e-05, "loss": 2.4004, "step": 2778500 }, { "epoch": 13.77, "learning_rate": 4.3118418771025e-05, "loss": 2.4207, "step": 2779000 }, { "epoch": 13.77, "learning_rate": 4.3117182661771777e-05, "loss": 2.4038, "step": 2779500 }, { "epoch": 13.77, "learning_rate": 4.3115944075345694e-05, "loss": 2.417, "step": 2780000 }, { "epoch": 13.78, "learning_rate": 4.311470548891961e-05, "loss": 2.4107, "step": 2780500 }, { "epoch": 13.78, "learning_rate": 4.311346690249353e-05, "loss": 2.4136, "step": 2781000 }, { "epoch": 13.78, "learning_rate": 4.3112228316067444e-05, "loss": 2.3845, "step": 2781500 }, { "epoch": 13.78, "learning_rate": 4.311099220681421e-05, "loss": 2.43, "step": 2782000 }, { "epoch": 13.79, "learning_rate": 4.3109756097560975e-05, "loss": 2.4391, "step": 2782500 }, { "epoch": 13.79, "learning_rate": 4.3108519988307744e-05, "loss": 2.417, "step": 2783000 }, { "epoch": 13.79, "learning_rate": 4.310728387905452e-05, "loss": 2.4119, "step": 2783500 }, { "epoch": 13.79, "learning_rate": 4.310604529262843e-05, "loss": 2.4281, "step": 2784000 }, { "epoch": 13.8, "learning_rate": 4.31048091833752e-05, "loss": 2.4254, "step": 2784500 }, { "epoch": 13.8, "learning_rate": 4.3103570596949115e-05, "loss": 2.3977, "step": 2785000 }, { "epoch": 13.8, "learning_rate": 4.310233201052303e-05, "loss": 2.4255, "step": 2785500 }, { "epoch": 13.8, "learning_rate": 4.310109342409695e-05, "loss": 2.393, "step": 2786000 }, { "epoch": 13.81, "learning_rate": 4.3099854837670866e-05, "loss": 2.4038, "step": 2786500 }, { "epoch": 13.81, "learning_rate": 4.309861625124478e-05, "loss": 2.4045, "step": 2787000 }, { "epoch": 13.81, "learning_rate": 4.30973776648187e-05, "loss": 2.4029, "step": 2787500 }, { "epoch": 13.81, "learning_rate": 4.309613907839262e-05, "loss": 2.4214, "step": 2788000 }, { "epoch": 13.82, "learning_rate": 4.3094902969139386e-05, "loss": 2.3809, "step": 2788500 }, { "epoch": 13.82, "learning_rate": 4.30936643827133e-05, "loss": 2.402, "step": 2789000 }, { "epoch": 13.82, "learning_rate": 4.309242579628722e-05, "loss": 2.3671, "step": 2789500 }, { "epoch": 13.82, "learning_rate": 4.3091187209861137e-05, "loss": 2.391, "step": 2790000 }, { "epoch": 13.83, "learning_rate": 4.30899511006079e-05, "loss": 2.4029, "step": 2790500 }, { "epoch": 13.83, "learning_rate": 4.3088712514181816e-05, "loss": 2.3973, "step": 2791000 }, { "epoch": 13.83, "learning_rate": 4.308747392775573e-05, "loss": 2.3823, "step": 2791500 }, { "epoch": 13.83, "learning_rate": 4.308623534132965e-05, "loss": 2.4324, "step": 2792000 }, { "epoch": 13.84, "learning_rate": 4.3084996754903566e-05, "loss": 2.4068, "step": 2792500 }, { "epoch": 13.84, "learning_rate": 4.308375816847748e-05, "loss": 2.4183, "step": 2793000 }, { "epoch": 13.84, "learning_rate": 4.30825195820514e-05, "loss": 2.4176, "step": 2793500 }, { "epoch": 13.84, "learning_rate": 4.308128099562532e-05, "loss": 2.41, "step": 2794000 }, { "epoch": 13.84, "learning_rate": 4.3080042409199234e-05, "loss": 2.3977, "step": 2794500 }, { "epoch": 13.85, "learning_rate": 4.3078806299946e-05, "loss": 2.3854, "step": 2795000 }, { "epoch": 13.85, "learning_rate": 4.3077570190692765e-05, "loss": 2.4014, "step": 2795500 }, { "epoch": 13.85, "learning_rate": 4.307633160426668e-05, "loss": 2.432, "step": 2796000 }, { "epoch": 13.85, "learning_rate": 4.30750930178406e-05, "loss": 2.4016, "step": 2796500 }, { "epoch": 13.86, "learning_rate": 4.3073854431414516e-05, "loss": 2.4061, "step": 2797000 }, { "epoch": 13.86, "learning_rate": 4.307261584498843e-05, "loss": 2.3972, "step": 2797500 }, { "epoch": 13.86, "learning_rate": 4.307137725856235e-05, "loss": 2.3841, "step": 2798000 }, { "epoch": 13.86, "learning_rate": 4.307014114930912e-05, "loss": 2.3905, "step": 2798500 }, { "epoch": 13.87, "learning_rate": 4.3068902562883035e-05, "loss": 2.412, "step": 2799000 }, { "epoch": 13.87, "learning_rate": 4.306766397645695e-05, "loss": 2.3943, "step": 2799500 }, { "epoch": 13.87, "learning_rate": 4.306642539003087e-05, "loss": 2.3898, "step": 2800000 }, { "epoch": 13.87, "learning_rate": 4.3065186803604786e-05, "loss": 2.3945, "step": 2800500 }, { "epoch": 13.88, "learning_rate": 4.30639482171787e-05, "loss": 2.4051, "step": 2801000 }, { "epoch": 13.88, "learning_rate": 4.306270963075262e-05, "loss": 2.3813, "step": 2801500 }, { "epoch": 13.88, "learning_rate": 4.306147104432654e-05, "loss": 2.3979, "step": 2802000 }, { "epoch": 13.88, "learning_rate": 4.3060232457900454e-05, "loss": 2.4034, "step": 2802500 }, { "epoch": 13.89, "learning_rate": 4.305899387147437e-05, "loss": 2.399, "step": 2803000 }, { "epoch": 13.89, "learning_rate": 4.305775528504829e-05, "loss": 2.3926, "step": 2803500 }, { "epoch": 13.89, "learning_rate": 4.3056516698622205e-05, "loss": 2.3909, "step": 2804000 }, { "epoch": 13.89, "learning_rate": 4.3055278112196115e-05, "loss": 2.4082, "step": 2804500 }, { "epoch": 13.9, "learning_rate": 4.305403952577003e-05, "loss": 2.3697, "step": 2805000 }, { "epoch": 13.9, "learning_rate": 4.30528034165168e-05, "loss": 2.4134, "step": 2805500 }, { "epoch": 13.9, "learning_rate": 4.305156483009072e-05, "loss": 2.3691, "step": 2806000 }, { "epoch": 13.9, "learning_rate": 4.3050326243664634e-05, "loss": 2.388, "step": 2806500 }, { "epoch": 13.91, "learning_rate": 4.304908765723855e-05, "loss": 2.4274, "step": 2807000 }, { "epoch": 13.91, "learning_rate": 4.304784907081246e-05, "loss": 2.4141, "step": 2807500 }, { "epoch": 13.91, "learning_rate": 4.304661048438638e-05, "loss": 2.3835, "step": 2808000 }, { "epoch": 13.91, "learning_rate": 4.3045371897960295e-05, "loss": 2.4, "step": 2808500 }, { "epoch": 13.92, "learning_rate": 4.304413331153421e-05, "loss": 2.4225, "step": 2809000 }, { "epoch": 13.92, "learning_rate": 4.304289472510813e-05, "loss": 2.3942, "step": 2809500 }, { "epoch": 13.92, "learning_rate": 4.3041658615854905e-05, "loss": 2.3977, "step": 2810000 }, { "epoch": 13.92, "learning_rate": 4.304042250660167e-05, "loss": 2.4204, "step": 2810500 }, { "epoch": 13.93, "learning_rate": 4.3039183920175584e-05, "loss": 2.4132, "step": 2811000 }, { "epoch": 13.93, "learning_rate": 4.30379453337495e-05, "loss": 2.4255, "step": 2811500 }, { "epoch": 13.93, "learning_rate": 4.303670674732342e-05, "loss": 2.4183, "step": 2812000 }, { "epoch": 13.93, "learning_rate": 4.3035468160897334e-05, "loss": 2.3954, "step": 2812500 }, { "epoch": 13.94, "learning_rate": 4.30342320516441e-05, "loss": 2.4053, "step": 2813000 }, { "epoch": 13.94, "learning_rate": 4.303299594239087e-05, "loss": 2.3931, "step": 2813500 }, { "epoch": 13.94, "learning_rate": 4.303175735596479e-05, "loss": 2.4367, "step": 2814000 }, { "epoch": 13.94, "learning_rate": 4.30305187695387e-05, "loss": 2.4039, "step": 2814500 }, { "epoch": 13.95, "learning_rate": 4.3029280183112616e-05, "loss": 2.3991, "step": 2815000 }, { "epoch": 13.95, "learning_rate": 4.302804159668653e-05, "loss": 2.4093, "step": 2815500 }, { "epoch": 13.95, "learning_rate": 4.302680301026045e-05, "loss": 2.3957, "step": 2816000 }, { "epoch": 13.95, "learning_rate": 4.302556442383437e-05, "loss": 2.4145, "step": 2816500 }, { "epoch": 13.96, "learning_rate": 4.3024325837408284e-05, "loss": 2.4256, "step": 2817000 }, { "epoch": 13.96, "learning_rate": 4.30230872509822e-05, "loss": 2.412, "step": 2817500 }, { "epoch": 13.96, "learning_rate": 4.302184866455612e-05, "loss": 2.412, "step": 2818000 }, { "epoch": 13.96, "learning_rate": 4.3020612555302886e-05, "loss": 2.4011, "step": 2818500 }, { "epoch": 13.97, "learning_rate": 4.3019376446049655e-05, "loss": 2.4118, "step": 2819000 }, { "epoch": 13.97, "learning_rate": 4.301813785962357e-05, "loss": 2.4149, "step": 2819500 }, { "epoch": 13.97, "learning_rate": 4.301689927319749e-05, "loss": 2.4283, "step": 2820000 }, { "epoch": 13.97, "learning_rate": 4.3015660686771406e-05, "loss": 2.3776, "step": 2820500 }, { "epoch": 13.98, "learning_rate": 4.301442457751817e-05, "loss": 2.4164, "step": 2821000 }, { "epoch": 13.98, "learning_rate": 4.3013185991092085e-05, "loss": 2.3833, "step": 2821500 }, { "epoch": 13.98, "learning_rate": 4.3011947404666e-05, "loss": 2.384, "step": 2822000 }, { "epoch": 13.98, "learning_rate": 4.301070881823992e-05, "loss": 2.4177, "step": 2822500 }, { "epoch": 13.99, "learning_rate": 4.3009470231813836e-05, "loss": 2.4214, "step": 2823000 }, { "epoch": 13.99, "learning_rate": 4.300823164538775e-05, "loss": 2.4005, "step": 2823500 }, { "epoch": 13.99, "learning_rate": 4.300699305896167e-05, "loss": 2.3818, "step": 2824000 }, { "epoch": 13.99, "learning_rate": 4.3005754472535587e-05, "loss": 2.3987, "step": 2824500 }, { "epoch": 14.0, "learning_rate": 4.3004515886109503e-05, "loss": 2.4102, "step": 2825000 }, { "epoch": 14.0, "learning_rate": 4.300327729968342e-05, "loss": 2.4001, "step": 2825500 }, { "epoch": 14.0, "eval_accuracy": 0.6461582624983472, "eval_accuracy_mlm": 0.5998228536990704, "eval_accuracy_nsp": 0.8650881122062763, "eval_loss": 2.389559268951416, "eval_runtime": 145.7903, "eval_samples_per_second": 1748.807, "eval_steps_per_second": 72.872, "step": 2825802 }, { "epoch": 14.0, "learning_rate": 4.300204119043019e-05, "loss": 2.3643, "step": 2826000 }, { "epoch": 14.0, "learning_rate": 4.3000802604004106e-05, "loss": 2.3761, "step": 2826500 }, { "epoch": 14.01, "learning_rate": 4.299956401757802e-05, "loss": 2.3749, "step": 2827000 }, { "epoch": 14.01, "learning_rate": 4.299832543115194e-05, "loss": 2.3892, "step": 2827500 }, { "epoch": 14.01, "learning_rate": 4.299708684472585e-05, "loss": 2.3667, "step": 2828000 }, { "epoch": 14.01, "learning_rate": 4.299585073547262e-05, "loss": 2.364, "step": 2828500 }, { "epoch": 14.02, "learning_rate": 4.2994612149046536e-05, "loss": 2.3782, "step": 2829000 }, { "epoch": 14.02, "learning_rate": 4.299337356262045e-05, "loss": 2.371, "step": 2829500 }, { "epoch": 14.02, "learning_rate": 4.299213497619437e-05, "loss": 2.349, "step": 2830000 }, { "epoch": 14.02, "learning_rate": 4.299089638976829e-05, "loss": 2.3903, "step": 2830500 }, { "epoch": 14.03, "learning_rate": 4.2989657803342204e-05, "loss": 2.374, "step": 2831000 }, { "epoch": 14.03, "learning_rate": 4.298842169408897e-05, "loss": 2.378, "step": 2831500 }, { "epoch": 14.03, "learning_rate": 4.298718310766289e-05, "loss": 2.3867, "step": 2832000 }, { "epoch": 14.03, "learning_rate": 4.2985944521236806e-05, "loss": 2.366, "step": 2832500 }, { "epoch": 14.04, "learning_rate": 4.298470593481072e-05, "loss": 2.3584, "step": 2833000 }, { "epoch": 14.04, "learning_rate": 4.298346734838464e-05, "loss": 2.3585, "step": 2833500 }, { "epoch": 14.04, "learning_rate": 4.298222876195856e-05, "loss": 2.3845, "step": 2834000 }, { "epoch": 14.04, "learning_rate": 4.2980990175532474e-05, "loss": 2.3867, "step": 2834500 }, { "epoch": 14.05, "learning_rate": 4.2979751589106384e-05, "loss": 2.3844, "step": 2835000 }, { "epoch": 14.05, "learning_rate": 4.29785130026803e-05, "loss": 2.3945, "step": 2835500 }, { "epoch": 14.05, "learning_rate": 4.297727689342707e-05, "loss": 2.3819, "step": 2836000 }, { "epoch": 14.05, "learning_rate": 4.297603830700099e-05, "loss": 2.3963, "step": 2836500 }, { "epoch": 14.06, "learning_rate": 4.2974799720574904e-05, "loss": 2.3649, "step": 2837000 }, { "epoch": 14.06, "learning_rate": 4.297356113414882e-05, "loss": 2.3759, "step": 2837500 }, { "epoch": 14.06, "learning_rate": 4.297232254772274e-05, "loss": 2.3773, "step": 2838000 }, { "epoch": 14.06, "learning_rate": 4.2971083961296655e-05, "loss": 2.3649, "step": 2838500 }, { "epoch": 14.07, "learning_rate": 4.296984785204342e-05, "loss": 2.3764, "step": 2839000 }, { "epoch": 14.07, "learning_rate": 4.296860926561734e-05, "loss": 2.3793, "step": 2839500 }, { "epoch": 14.07, "learning_rate": 4.29673731563641e-05, "loss": 2.4066, "step": 2840000 }, { "epoch": 14.07, "learning_rate": 4.296613704711088e-05, "loss": 2.377, "step": 2840500 }, { "epoch": 14.08, "learning_rate": 4.2964898460684795e-05, "loss": 2.3808, "step": 2841000 }, { "epoch": 14.08, "learning_rate": 4.296365987425871e-05, "loss": 2.4138, "step": 2841500 }, { "epoch": 14.08, "learning_rate": 4.296242128783263e-05, "loss": 2.3763, "step": 2842000 }, { "epoch": 14.08, "learning_rate": 4.296118270140654e-05, "loss": 2.3747, "step": 2842500 }, { "epoch": 14.09, "learning_rate": 4.2959944114980456e-05, "loss": 2.3829, "step": 2843000 }, { "epoch": 14.09, "learning_rate": 4.295870552855437e-05, "loss": 2.4047, "step": 2843500 }, { "epoch": 14.09, "learning_rate": 4.295746694212829e-05, "loss": 2.3814, "step": 2844000 }, { "epoch": 14.09, "learning_rate": 4.2956228355702207e-05, "loss": 2.387, "step": 2844500 }, { "epoch": 14.1, "learning_rate": 4.2954989769276123e-05, "loss": 2.3661, "step": 2845000 }, { "epoch": 14.1, "learning_rate": 4.295375118285004e-05, "loss": 2.4036, "step": 2845500 }, { "epoch": 14.1, "learning_rate": 4.29525150735968e-05, "loss": 2.3828, "step": 2846000 }, { "epoch": 14.1, "learning_rate": 4.295127648717072e-05, "loss": 2.3651, "step": 2846500 }, { "epoch": 14.11, "learning_rate": 4.2950037900744636e-05, "loss": 2.3889, "step": 2847000 }, { "epoch": 14.11, "learning_rate": 4.294879931431855e-05, "loss": 2.3733, "step": 2847500 }, { "epoch": 14.11, "learning_rate": 4.294756072789247e-05, "loss": 2.3593, "step": 2848000 }, { "epoch": 14.11, "learning_rate": 4.294632214146639e-05, "loss": 2.3742, "step": 2848500 }, { "epoch": 14.11, "learning_rate": 4.2945083555040304e-05, "loss": 2.3745, "step": 2849000 }, { "epoch": 14.12, "learning_rate": 4.294384496861422e-05, "loss": 2.3674, "step": 2849500 }, { "epoch": 14.12, "learning_rate": 4.294260638218814e-05, "loss": 2.3607, "step": 2850000 }, { "epoch": 14.12, "learning_rate": 4.294137027293491e-05, "loss": 2.383, "step": 2850500 }, { "epoch": 14.12, "learning_rate": 4.2940131686508824e-05, "loss": 2.3761, "step": 2851000 }, { "epoch": 14.13, "learning_rate": 4.293889310008274e-05, "loss": 2.3966, "step": 2851500 }, { "epoch": 14.13, "learning_rate": 4.293765451365666e-05, "loss": 2.3912, "step": 2852000 }, { "epoch": 14.13, "learning_rate": 4.293641840440342e-05, "loss": 2.3719, "step": 2852500 }, { "epoch": 14.13, "learning_rate": 4.2935179817977336e-05, "loss": 2.3608, "step": 2853000 }, { "epoch": 14.14, "learning_rate": 4.293394123155125e-05, "loss": 2.3762, "step": 2853500 }, { "epoch": 14.14, "learning_rate": 4.293270264512517e-05, "loss": 2.3916, "step": 2854000 }, { "epoch": 14.14, "learning_rate": 4.293146405869909e-05, "loss": 2.3773, "step": 2854500 }, { "epoch": 14.14, "learning_rate": 4.2930225472273004e-05, "loss": 2.3872, "step": 2855000 }, { "epoch": 14.15, "learning_rate": 4.292898688584692e-05, "loss": 2.3776, "step": 2855500 }, { "epoch": 14.15, "learning_rate": 4.292775077659369e-05, "loss": 2.4074, "step": 2856000 }, { "epoch": 14.15, "learning_rate": 4.292651219016761e-05, "loss": 2.379, "step": 2856500 }, { "epoch": 14.15, "learning_rate": 4.2925273603741524e-05, "loss": 2.3803, "step": 2857000 }, { "epoch": 14.16, "learning_rate": 4.292403501731544e-05, "loss": 2.3807, "step": 2857500 }, { "epoch": 14.16, "learning_rate": 4.292279643088936e-05, "loss": 2.3591, "step": 2858000 }, { "epoch": 14.16, "learning_rate": 4.2921557844463274e-05, "loss": 2.3725, "step": 2858500 }, { "epoch": 14.16, "learning_rate": 4.292031925803719e-05, "loss": 2.3891, "step": 2859000 }, { "epoch": 14.17, "learning_rate": 4.291908067161111e-05, "loss": 2.3872, "step": 2859500 }, { "epoch": 14.17, "learning_rate": 4.291784456235787e-05, "loss": 2.3672, "step": 2860000 }, { "epoch": 14.17, "learning_rate": 4.2916608453104646e-05, "loss": 2.3655, "step": 2860500 }, { "epoch": 14.17, "learning_rate": 4.2915369866678556e-05, "loss": 2.403, "step": 2861000 }, { "epoch": 14.18, "learning_rate": 4.291413375742533e-05, "loss": 2.4014, "step": 2861500 }, { "epoch": 14.18, "learning_rate": 4.291289517099925e-05, "loss": 2.3719, "step": 2862000 }, { "epoch": 14.18, "learning_rate": 4.2911656584573166e-05, "loss": 2.377, "step": 2862500 }, { "epoch": 14.18, "learning_rate": 4.2910417998147076e-05, "loss": 2.3801, "step": 2863000 }, { "epoch": 14.19, "learning_rate": 4.290917941172099e-05, "loss": 2.3656, "step": 2863500 }, { "epoch": 14.19, "learning_rate": 4.290794082529491e-05, "loss": 2.4083, "step": 2864000 }, { "epoch": 14.19, "learning_rate": 4.2906702238868826e-05, "loss": 2.3794, "step": 2864500 }, { "epoch": 14.19, "learning_rate": 4.2905463652442743e-05, "loss": 2.3929, "step": 2865000 }, { "epoch": 14.2, "learning_rate": 4.2904225066016654e-05, "loss": 2.3909, "step": 2865500 }, { "epoch": 14.2, "learning_rate": 4.290298647959057e-05, "loss": 2.4093, "step": 2866000 }, { "epoch": 14.2, "learning_rate": 4.290174789316449e-05, "loss": 2.3792, "step": 2866500 }, { "epoch": 14.2, "learning_rate": 4.2900509306738404e-05, "loss": 2.3973, "step": 2867000 }, { "epoch": 14.21, "learning_rate": 4.289927072031232e-05, "loss": 2.3682, "step": 2867500 }, { "epoch": 14.21, "learning_rate": 4.289803213388624e-05, "loss": 2.3672, "step": 2868000 }, { "epoch": 14.21, "learning_rate": 4.2896793547460155e-05, "loss": 2.3825, "step": 2868500 }, { "epoch": 14.21, "learning_rate": 4.289555496103407e-05, "loss": 2.3766, "step": 2869000 }, { "epoch": 14.22, "learning_rate": 4.289431885178084e-05, "loss": 2.3758, "step": 2869500 }, { "epoch": 14.22, "learning_rate": 4.289308026535476e-05, "loss": 2.37, "step": 2870000 }, { "epoch": 14.22, "learning_rate": 4.2891844156101527e-05, "loss": 2.3608, "step": 2870500 }, { "epoch": 14.22, "learning_rate": 4.2890605569675444e-05, "loss": 2.3819, "step": 2871000 }, { "epoch": 14.23, "learning_rate": 4.288936698324936e-05, "loss": 2.3681, "step": 2871500 }, { "epoch": 14.23, "learning_rate": 4.288813087399613e-05, "loss": 2.3852, "step": 2872000 }, { "epoch": 14.23, "learning_rate": 4.2886892287570046e-05, "loss": 2.3943, "step": 2872500 }, { "epoch": 14.23, "learning_rate": 4.288565370114396e-05, "loss": 2.3918, "step": 2873000 }, { "epoch": 14.24, "learning_rate": 4.288441511471787e-05, "loss": 2.363, "step": 2873500 }, { "epoch": 14.24, "learning_rate": 4.288317652829179e-05, "loss": 2.3689, "step": 2874000 }, { "epoch": 14.24, "learning_rate": 4.2881940419038566e-05, "loss": 2.3561, "step": 2874500 }, { "epoch": 14.24, "learning_rate": 4.288070183261248e-05, "loss": 2.3556, "step": 2875000 }, { "epoch": 14.25, "learning_rate": 4.28794632461864e-05, "loss": 2.3698, "step": 2875500 }, { "epoch": 14.25, "learning_rate": 4.2878224659760317e-05, "loss": 2.3819, "step": 2876000 }, { "epoch": 14.25, "learning_rate": 4.287698607333423e-05, "loss": 2.3529, "step": 2876500 }, { "epoch": 14.25, "learning_rate": 4.2875747486908144e-05, "loss": 2.3704, "step": 2877000 }, { "epoch": 14.26, "learning_rate": 4.287450890048206e-05, "loss": 2.399, "step": 2877500 }, { "epoch": 14.26, "learning_rate": 4.287327031405598e-05, "loss": 2.3887, "step": 2878000 }, { "epoch": 14.26, "learning_rate": 4.2872031727629894e-05, "loss": 2.3905, "step": 2878500 }, { "epoch": 14.26, "learning_rate": 4.2870793141203805e-05, "loss": 2.3566, "step": 2879000 }, { "epoch": 14.27, "learning_rate": 4.286955703195058e-05, "loss": 2.4, "step": 2879500 }, { "epoch": 14.27, "learning_rate": 4.286831844552449e-05, "loss": 2.3926, "step": 2880000 }, { "epoch": 14.27, "learning_rate": 4.286707985909841e-05, "loss": 2.3815, "step": 2880500 }, { "epoch": 14.27, "learning_rate": 4.2865841272672324e-05, "loss": 2.369, "step": 2881000 }, { "epoch": 14.28, "learning_rate": 4.286460268624624e-05, "loss": 2.3836, "step": 2881500 }, { "epoch": 14.28, "learning_rate": 4.286336409982016e-05, "loss": 2.4035, "step": 2882000 }, { "epoch": 14.28, "learning_rate": 4.2862127990566934e-05, "loss": 2.3799, "step": 2882500 }, { "epoch": 14.28, "learning_rate": 4.2860889404140844e-05, "loss": 2.4322, "step": 2883000 }, { "epoch": 14.29, "learning_rate": 4.285965081771476e-05, "loss": 2.4124, "step": 2883500 }, { "epoch": 14.29, "learning_rate": 4.285841223128868e-05, "loss": 2.3728, "step": 2884000 }, { "epoch": 14.29, "learning_rate": 4.2857173644862595e-05, "loss": 2.3791, "step": 2884500 }, { "epoch": 14.29, "learning_rate": 4.285593505843651e-05, "loss": 2.3921, "step": 2885000 }, { "epoch": 14.3, "learning_rate": 4.285469647201042e-05, "loss": 2.3687, "step": 2885500 }, { "epoch": 14.3, "learning_rate": 4.285346036275719e-05, "loss": 2.3897, "step": 2886000 }, { "epoch": 14.3, "learning_rate": 4.285222177633111e-05, "loss": 2.3735, "step": 2886500 }, { "epoch": 14.3, "learning_rate": 4.2850983189905024e-05, "loss": 2.3695, "step": 2887000 }, { "epoch": 14.31, "learning_rate": 4.284974460347894e-05, "loss": 2.3833, "step": 2887500 }, { "epoch": 14.31, "learning_rate": 4.284850601705286e-05, "loss": 2.3874, "step": 2888000 }, { "epoch": 14.31, "learning_rate": 4.2847267430626775e-05, "loss": 2.4175, "step": 2888500 }, { "epoch": 14.31, "learning_rate": 4.284603132137355e-05, "loss": 2.3706, "step": 2889000 }, { "epoch": 14.32, "learning_rate": 4.2844797689293165e-05, "loss": 2.3967, "step": 2889500 }, { "epoch": 14.32, "learning_rate": 4.2843561580039933e-05, "loss": 2.3934, "step": 2890000 }, { "epoch": 14.32, "learning_rate": 4.284232299361385e-05, "loss": 2.3733, "step": 2890500 }, { "epoch": 14.32, "learning_rate": 4.284108440718777e-05, "loss": 2.3728, "step": 2891000 }, { "epoch": 14.33, "learning_rate": 4.2839845820761684e-05, "loss": 2.3741, "step": 2891500 }, { "epoch": 14.33, "learning_rate": 4.28386072343356e-05, "loss": 2.406, "step": 2892000 }, { "epoch": 14.33, "learning_rate": 4.283736864790952e-05, "loss": 2.3878, "step": 2892500 }, { "epoch": 14.33, "learning_rate": 4.283613006148343e-05, "loss": 2.3857, "step": 2893000 }, { "epoch": 14.34, "learning_rate": 4.2834891475057345e-05, "loss": 2.3929, "step": 2893500 }, { "epoch": 14.34, "learning_rate": 4.2833655365804114e-05, "loss": 2.3873, "step": 2894000 }, { "epoch": 14.34, "learning_rate": 4.283241925655089e-05, "loss": 2.3801, "step": 2894500 }, { "epoch": 14.34, "learning_rate": 4.2831180670124806e-05, "loss": 2.3904, "step": 2895000 }, { "epoch": 14.35, "learning_rate": 4.282994208369872e-05, "loss": 2.4079, "step": 2895500 }, { "epoch": 14.35, "learning_rate": 4.2828703497272634e-05, "loss": 2.418, "step": 2896000 }, { "epoch": 14.35, "learning_rate": 4.282746491084655e-05, "loss": 2.3692, "step": 2896500 }, { "epoch": 14.35, "learning_rate": 4.282622632442047e-05, "loss": 2.377, "step": 2897000 }, { "epoch": 14.36, "learning_rate": 4.2824987737994384e-05, "loss": 2.3925, "step": 2897500 }, { "epoch": 14.36, "learning_rate": 4.28237491515683e-05, "loss": 2.3754, "step": 2898000 }, { "epoch": 14.36, "learning_rate": 4.282251056514222e-05, "loss": 2.3814, "step": 2898500 }, { "epoch": 14.36, "learning_rate": 4.2821271978716135e-05, "loss": 2.3872, "step": 2899000 }, { "epoch": 14.37, "learning_rate": 4.282003339229005e-05, "loss": 2.3753, "step": 2899500 }, { "epoch": 14.37, "learning_rate": 4.281879480586396e-05, "loss": 2.4065, "step": 2900000 }, { "epoch": 14.37, "learning_rate": 4.281755621943788e-05, "loss": 2.3957, "step": 2900500 }, { "epoch": 14.37, "learning_rate": 4.2816317633011796e-05, "loss": 2.4159, "step": 2901000 }, { "epoch": 14.38, "learning_rate": 4.281507904658571e-05, "loss": 2.4126, "step": 2901500 }, { "epoch": 14.38, "learning_rate": 4.281384046015963e-05, "loss": 2.3595, "step": 2902000 }, { "epoch": 14.38, "learning_rate": 4.281260187373355e-05, "loss": 2.3574, "step": 2902500 }, { "epoch": 14.38, "learning_rate": 4.281136824165317e-05, "loss": 2.3769, "step": 2903000 }, { "epoch": 14.38, "learning_rate": 4.2810129655227084e-05, "loss": 2.395, "step": 2903500 }, { "epoch": 14.39, "learning_rate": 4.2808891068801e-05, "loss": 2.4074, "step": 2904000 }, { "epoch": 14.39, "learning_rate": 4.280765495954777e-05, "loss": 2.3847, "step": 2904500 }, { "epoch": 14.39, "learning_rate": 4.280641637312169e-05, "loss": 2.4074, "step": 2905000 }, { "epoch": 14.39, "learning_rate": 4.28051777866956e-05, "loss": 2.3715, "step": 2905500 }, { "epoch": 14.4, "learning_rate": 4.2803939200269514e-05, "loss": 2.3871, "step": 2906000 }, { "epoch": 14.4, "learning_rate": 4.280270061384343e-05, "loss": 2.4013, "step": 2906500 }, { "epoch": 14.4, "learning_rate": 4.280146202741735e-05, "loss": 2.3838, "step": 2907000 }, { "epoch": 14.4, "learning_rate": 4.2800223440991265e-05, "loss": 2.4154, "step": 2907500 }, { "epoch": 14.41, "learning_rate": 4.279898485456518e-05, "loss": 2.3687, "step": 2908000 }, { "epoch": 14.41, "learning_rate": 4.27977462681391e-05, "loss": 2.396, "step": 2908500 }, { "epoch": 14.41, "learning_rate": 4.2796507681713016e-05, "loss": 2.3786, "step": 2909000 }, { "epoch": 14.41, "learning_rate": 4.2795271572459785e-05, "loss": 2.3984, "step": 2909500 }, { "epoch": 14.42, "learning_rate": 4.279403546320655e-05, "loss": 2.3943, "step": 2910000 }, { "epoch": 14.42, "learning_rate": 4.279279687678047e-05, "loss": 2.3614, "step": 2910500 }, { "epoch": 14.42, "learning_rate": 4.279155829035439e-05, "loss": 2.3771, "step": 2911000 }, { "epoch": 14.42, "learning_rate": 4.2790319703928304e-05, "loss": 2.3691, "step": 2911500 }, { "epoch": 14.43, "learning_rate": 4.2789081117502214e-05, "loss": 2.392, "step": 2912000 }, { "epoch": 14.43, "learning_rate": 4.278784253107613e-05, "loss": 2.4161, "step": 2912500 }, { "epoch": 14.43, "learning_rate": 4.278660394465005e-05, "loss": 2.3881, "step": 2913000 }, { "epoch": 14.43, "learning_rate": 4.2785365358223965e-05, "loss": 2.3568, "step": 2913500 }, { "epoch": 14.44, "learning_rate": 4.278412677179788e-05, "loss": 2.4166, "step": 2914000 }, { "epoch": 14.44, "learning_rate": 4.278289066254466e-05, "loss": 2.4008, "step": 2914500 }, { "epoch": 14.44, "learning_rate": 4.278165207611857e-05, "loss": 2.3719, "step": 2915000 }, { "epoch": 14.44, "learning_rate": 4.2780413489692485e-05, "loss": 2.4054, "step": 2915500 }, { "epoch": 14.45, "learning_rate": 4.27791749032664e-05, "loss": 2.3981, "step": 2916000 }, { "epoch": 14.45, "learning_rate": 4.277793631684032e-05, "loss": 2.4088, "step": 2916500 }, { "epoch": 14.45, "learning_rate": 4.2776697730414235e-05, "loss": 2.4193, "step": 2917000 }, { "epoch": 14.45, "learning_rate": 4.277545914398815e-05, "loss": 2.3723, "step": 2917500 }, { "epoch": 14.46, "learning_rate": 4.277422055756207e-05, "loss": 2.3896, "step": 2918000 }, { "epoch": 14.46, "learning_rate": 4.2772981971135986e-05, "loss": 2.3889, "step": 2918500 }, { "epoch": 14.46, "learning_rate": 4.27717433847099e-05, "loss": 2.3773, "step": 2919000 }, { "epoch": 14.46, "learning_rate": 4.277050479828382e-05, "loss": 2.4043, "step": 2919500 }, { "epoch": 14.47, "learning_rate": 4.276927116620344e-05, "loss": 2.3883, "step": 2920000 }, { "epoch": 14.47, "learning_rate": 4.276803257977736e-05, "loss": 2.3812, "step": 2920500 }, { "epoch": 14.47, "learning_rate": 4.276679894769697e-05, "loss": 2.4031, "step": 2921000 }, { "epoch": 14.47, "learning_rate": 4.276556036127089e-05, "loss": 2.3853, "step": 2921500 }, { "epoch": 14.48, "learning_rate": 4.2764321774844806e-05, "loss": 2.4171, "step": 2922000 }, { "epoch": 14.48, "learning_rate": 4.276308318841872e-05, "loss": 2.3994, "step": 2922500 }, { "epoch": 14.48, "learning_rate": 4.276184460199264e-05, "loss": 2.3933, "step": 2923000 }, { "epoch": 14.48, "learning_rate": 4.276060849273941e-05, "loss": 2.3946, "step": 2923500 }, { "epoch": 14.49, "learning_rate": 4.2759369906313325e-05, "loss": 2.4314, "step": 2924000 }, { "epoch": 14.49, "learning_rate": 4.275813131988724e-05, "loss": 2.3782, "step": 2924500 }, { "epoch": 14.49, "learning_rate": 4.275689273346116e-05, "loss": 2.3963, "step": 2925000 }, { "epoch": 14.49, "learning_rate": 4.2755654147035076e-05, "loss": 2.4168, "step": 2925500 }, { "epoch": 14.5, "learning_rate": 4.275441556060899e-05, "loss": 2.3955, "step": 2926000 }, { "epoch": 14.5, "learning_rate": 4.275317697418291e-05, "loss": 2.4109, "step": 2926500 }, { "epoch": 14.5, "learning_rate": 4.275193838775683e-05, "loss": 2.3643, "step": 2927000 }, { "epoch": 14.5, "learning_rate": 4.2750699801330744e-05, "loss": 2.3692, "step": 2927500 }, { "epoch": 14.51, "learning_rate": 4.2749461214904654e-05, "loss": 2.4234, "step": 2928000 }, { "epoch": 14.51, "learning_rate": 4.274822262847857e-05, "loss": 2.4136, "step": 2928500 }, { "epoch": 14.51, "learning_rate": 4.274698404205249e-05, "loss": 2.3882, "step": 2929000 }, { "epoch": 14.51, "learning_rate": 4.2745745455626405e-05, "loss": 2.3713, "step": 2929500 }, { "epoch": 14.52, "learning_rate": 4.274450686920032e-05, "loss": 2.3898, "step": 2930000 }, { "epoch": 14.52, "learning_rate": 4.274326828277423e-05, "loss": 2.4151, "step": 2930500 }, { "epoch": 14.52, "learning_rate": 4.274202969634815e-05, "loss": 2.3902, "step": 2931000 }, { "epoch": 14.52, "learning_rate": 4.2740793587094924e-05, "loss": 2.3814, "step": 2931500 }, { "epoch": 14.53, "learning_rate": 4.273955500066884e-05, "loss": 2.3855, "step": 2932000 }, { "epoch": 14.53, "learning_rate": 4.273831641424276e-05, "loss": 2.4006, "step": 2932500 }, { "epoch": 14.53, "learning_rate": 4.2737077827816675e-05, "loss": 2.3651, "step": 2933000 }, { "epoch": 14.53, "learning_rate": 4.2735839241390585e-05, "loss": 2.3846, "step": 2933500 }, { "epoch": 14.54, "learning_rate": 4.273460313213736e-05, "loss": 2.3763, "step": 2934000 }, { "epoch": 14.54, "learning_rate": 4.273336454571127e-05, "loss": 2.382, "step": 2934500 }, { "epoch": 14.54, "learning_rate": 4.273212595928519e-05, "loss": 2.3847, "step": 2935000 }, { "epoch": 14.54, "learning_rate": 4.2730887372859105e-05, "loss": 2.3934, "step": 2935500 }, { "epoch": 14.55, "learning_rate": 4.2729651263605873e-05, "loss": 2.3807, "step": 2936000 }, { "epoch": 14.55, "learning_rate": 4.272841515435264e-05, "loss": 2.3737, "step": 2936500 }, { "epoch": 14.55, "learning_rate": 4.272717656792656e-05, "loss": 2.396, "step": 2937000 }, { "epoch": 14.55, "learning_rate": 4.2725937981500476e-05, "loss": 2.397, "step": 2937500 }, { "epoch": 14.56, "learning_rate": 4.272469939507439e-05, "loss": 2.3834, "step": 2938000 }, { "epoch": 14.56, "learning_rate": 4.272346080864831e-05, "loss": 2.383, "step": 2938500 }, { "epoch": 14.56, "learning_rate": 4.272222222222223e-05, "loss": 2.3652, "step": 2939000 }, { "epoch": 14.56, "learning_rate": 4.2720983635796144e-05, "loss": 2.3763, "step": 2939500 }, { "epoch": 14.57, "learning_rate": 4.271974504937006e-05, "loss": 2.4087, "step": 2940000 }, { "epoch": 14.57, "learning_rate": 4.271850646294398e-05, "loss": 2.3843, "step": 2940500 }, { "epoch": 14.57, "learning_rate": 4.2717267876517895e-05, "loss": 2.3971, "step": 2941000 }, { "epoch": 14.57, "learning_rate": 4.2716029290091805e-05, "loss": 2.3815, "step": 2941500 }, { "epoch": 14.58, "learning_rate": 4.271479070366572e-05, "loss": 2.4035, "step": 2942000 }, { "epoch": 14.58, "learning_rate": 4.271355211723964e-05, "loss": 2.3709, "step": 2942500 }, { "epoch": 14.58, "learning_rate": 4.2712313530813556e-05, "loss": 2.3951, "step": 2943000 }, { "epoch": 14.58, "learning_rate": 4.271107494438747e-05, "loss": 2.3758, "step": 2943500 }, { "epoch": 14.59, "learning_rate": 4.270983635796138e-05, "loss": 2.4007, "step": 2944000 }, { "epoch": 14.59, "learning_rate": 4.270860024870816e-05, "loss": 2.4097, "step": 2944500 }, { "epoch": 14.59, "learning_rate": 4.2707361662282075e-05, "loss": 2.4025, "step": 2945000 }, { "epoch": 14.59, "learning_rate": 4.2706125553028844e-05, "loss": 2.3816, "step": 2945500 }, { "epoch": 14.6, "learning_rate": 4.270488696660276e-05, "loss": 2.3982, "step": 2946000 }, { "epoch": 14.6, "learning_rate": 4.270364838017668e-05, "loss": 2.3793, "step": 2946500 }, { "epoch": 14.6, "learning_rate": 4.2702409793750595e-05, "loss": 2.389, "step": 2947000 }, { "epoch": 14.6, "learning_rate": 4.270117120732451e-05, "loss": 2.362, "step": 2947500 }, { "epoch": 14.61, "learning_rate": 4.2699935098071274e-05, "loss": 2.3914, "step": 2948000 }, { "epoch": 14.61, "learning_rate": 4.269869651164519e-05, "loss": 2.3902, "step": 2948500 }, { "epoch": 14.61, "learning_rate": 4.269745792521911e-05, "loss": 2.3968, "step": 2949000 }, { "epoch": 14.61, "learning_rate": 4.2696219338793024e-05, "loss": 2.3966, "step": 2949500 }, { "epoch": 14.62, "learning_rate": 4.269498075236694e-05, "loss": 2.395, "step": 2950000 }, { "epoch": 14.62, "learning_rate": 4.269374216594086e-05, "loss": 2.3799, "step": 2950500 }, { "epoch": 14.62, "learning_rate": 4.2692503579514775e-05, "loss": 2.3837, "step": 2951000 }, { "epoch": 14.62, "learning_rate": 4.2691267470261544e-05, "loss": 2.3736, "step": 2951500 }, { "epoch": 14.63, "learning_rate": 4.269002888383546e-05, "loss": 2.3967, "step": 2952000 }, { "epoch": 14.63, "learning_rate": 4.268879029740938e-05, "loss": 2.4221, "step": 2952500 }, { "epoch": 14.63, "learning_rate": 4.2687551710983295e-05, "loss": 2.3803, "step": 2953000 }, { "epoch": 14.63, "learning_rate": 4.268631560173006e-05, "loss": 2.3863, "step": 2953500 }, { "epoch": 14.64, "learning_rate": 4.2685077015303974e-05, "loss": 2.391, "step": 2954000 }, { "epoch": 14.64, "learning_rate": 4.268383842887789e-05, "loss": 2.374, "step": 2954500 }, { "epoch": 14.64, "learning_rate": 4.268259984245181e-05, "loss": 2.3949, "step": 2955000 }, { "epoch": 14.64, "learning_rate": 4.2681363733198577e-05, "loss": 2.3759, "step": 2955500 }, { "epoch": 14.65, "learning_rate": 4.2680125146772493e-05, "loss": 2.387, "step": 2956000 }, { "epoch": 14.65, "learning_rate": 4.267888656034641e-05, "loss": 2.3696, "step": 2956500 }, { "epoch": 14.65, "learning_rate": 4.267764797392033e-05, "loss": 2.4066, "step": 2957000 }, { "epoch": 14.65, "learning_rate": 4.2676409387494244e-05, "loss": 2.4101, "step": 2957500 }, { "epoch": 14.65, "learning_rate": 4.267517327824101e-05, "loss": 2.3882, "step": 2958000 }, { "epoch": 14.66, "learning_rate": 4.267393716898778e-05, "loss": 2.4137, "step": 2958500 }, { "epoch": 14.66, "learning_rate": 4.26726985825617e-05, "loss": 2.3827, "step": 2959000 }, { "epoch": 14.66, "learning_rate": 4.267145999613561e-05, "loss": 2.4004, "step": 2959500 }, { "epoch": 14.66, "learning_rate": 4.2670221409709526e-05, "loss": 2.3833, "step": 2960000 }, { "epoch": 14.67, "learning_rate": 4.26689853004563e-05, "loss": 2.402, "step": 2960500 }, { "epoch": 14.67, "learning_rate": 4.266774671403022e-05, "loss": 2.4208, "step": 2961000 }, { "epoch": 14.67, "learning_rate": 4.2666508127604135e-05, "loss": 2.4217, "step": 2961500 }, { "epoch": 14.67, "learning_rate": 4.266526954117805e-05, "loss": 2.3977, "step": 2962000 }, { "epoch": 14.68, "learning_rate": 4.266403095475196e-05, "loss": 2.398, "step": 2962500 }, { "epoch": 14.68, "learning_rate": 4.266279236832588e-05, "loss": 2.3978, "step": 2963000 }, { "epoch": 14.68, "learning_rate": 4.2661553781899796e-05, "loss": 2.4067, "step": 2963500 }, { "epoch": 14.68, "learning_rate": 4.266031519547371e-05, "loss": 2.4094, "step": 2964000 }, { "epoch": 14.69, "learning_rate": 4.265907660904763e-05, "loss": 2.3881, "step": 2964500 }, { "epoch": 14.69, "learning_rate": 4.265783802262154e-05, "loss": 2.3953, "step": 2965000 }, { "epoch": 14.69, "learning_rate": 4.265659943619546e-05, "loss": 2.3919, "step": 2965500 }, { "epoch": 14.69, "learning_rate": 4.2655363326942226e-05, "loss": 2.3778, "step": 2966000 }, { "epoch": 14.7, "learning_rate": 4.265412474051614e-05, "loss": 2.3833, "step": 2966500 }, { "epoch": 14.7, "learning_rate": 4.265288615409006e-05, "loss": 2.3947, "step": 2967000 }, { "epoch": 14.7, "learning_rate": 4.265164756766398e-05, "loss": 2.3884, "step": 2967500 }, { "epoch": 14.7, "learning_rate": 4.2650408981237894e-05, "loss": 2.3881, "step": 2968000 }, { "epoch": 14.71, "learning_rate": 4.264917039481181e-05, "loss": 2.4038, "step": 2968500 }, { "epoch": 14.71, "learning_rate": 4.264793180838573e-05, "loss": 2.412, "step": 2969000 }, { "epoch": 14.71, "learning_rate": 4.2646693221959644e-05, "loss": 2.3893, "step": 2969500 }, { "epoch": 14.71, "learning_rate": 4.264545463553356e-05, "loss": 2.38, "step": 2970000 }, { "epoch": 14.72, "learning_rate": 4.264421604910748e-05, "loss": 2.4093, "step": 2970500 }, { "epoch": 14.72, "learning_rate": 4.264297993985425e-05, "loss": 2.3932, "step": 2971000 }, { "epoch": 14.72, "learning_rate": 4.2641741353428164e-05, "loss": 2.4133, "step": 2971500 }, { "epoch": 14.72, "learning_rate": 4.2640502767002074e-05, "loss": 2.3936, "step": 2972000 }, { "epoch": 14.73, "learning_rate": 4.263926418057599e-05, "loss": 2.405, "step": 2972500 }, { "epoch": 14.73, "learning_rate": 4.263802559414991e-05, "loss": 2.3882, "step": 2973000 }, { "epoch": 14.73, "learning_rate": 4.2636787007723825e-05, "loss": 2.3862, "step": 2973500 }, { "epoch": 14.73, "learning_rate": 4.263554842129774e-05, "loss": 2.3741, "step": 2974000 }, { "epoch": 14.74, "learning_rate": 4.263430983487166e-05, "loss": 2.4191, "step": 2974500 }, { "epoch": 14.74, "learning_rate": 4.2633071248445576e-05, "loss": 2.4142, "step": 2975000 }, { "epoch": 14.74, "learning_rate": 4.263183266201949e-05, "loss": 2.3749, "step": 2975500 }, { "epoch": 14.74, "learning_rate": 4.263059407559341e-05, "loss": 2.3808, "step": 2976000 }, { "epoch": 14.75, "learning_rate": 4.262935796634018e-05, "loss": 2.3935, "step": 2976500 }, { "epoch": 14.75, "learning_rate": 4.2628119379914095e-05, "loss": 2.3867, "step": 2977000 }, { "epoch": 14.75, "learning_rate": 4.2626883270660864e-05, "loss": 2.3977, "step": 2977500 }, { "epoch": 14.75, "learning_rate": 4.262564468423478e-05, "loss": 2.3814, "step": 2978000 }, { "epoch": 14.76, "learning_rate": 4.262440609780869e-05, "loss": 2.3813, "step": 2978500 }, { "epoch": 14.76, "learning_rate": 4.262316751138261e-05, "loss": 2.3986, "step": 2979000 }, { "epoch": 14.76, "learning_rate": 4.262193140212938e-05, "loss": 2.3936, "step": 2979500 }, { "epoch": 14.76, "learning_rate": 4.2620692815703294e-05, "loss": 2.403, "step": 2980000 }, { "epoch": 14.77, "learning_rate": 4.261945422927721e-05, "loss": 2.391, "step": 2980500 }, { "epoch": 14.77, "learning_rate": 4.261821564285113e-05, "loss": 2.4022, "step": 2981000 }, { "epoch": 14.77, "learning_rate": 4.2616977056425045e-05, "loss": 2.3994, "step": 2981500 }, { "epoch": 14.77, "learning_rate": 4.261573846999896e-05, "loss": 2.4071, "step": 2982000 }, { "epoch": 14.78, "learning_rate": 4.261449988357288e-05, "loss": 2.402, "step": 2982500 }, { "epoch": 14.78, "learning_rate": 4.2613261297146795e-05, "loss": 2.377, "step": 2983000 }, { "epoch": 14.78, "learning_rate": 4.261202271072071e-05, "loss": 2.3869, "step": 2983500 }, { "epoch": 14.78, "learning_rate": 4.261078412429463e-05, "loss": 2.3832, "step": 2984000 }, { "epoch": 14.79, "learning_rate": 4.26095480150414e-05, "loss": 2.4119, "step": 2984500 }, { "epoch": 14.79, "learning_rate": 4.2608309428615315e-05, "loss": 2.4104, "step": 2985000 }, { "epoch": 14.79, "learning_rate": 4.2607070842189225e-05, "loss": 2.3959, "step": 2985500 }, { "epoch": 14.79, "learning_rate": 4.260583225576314e-05, "loss": 2.4221, "step": 2986000 }, { "epoch": 14.8, "learning_rate": 4.260459366933706e-05, "loss": 2.4011, "step": 2986500 }, { "epoch": 14.8, "learning_rate": 4.2603355082910976e-05, "loss": 2.3937, "step": 2987000 }, { "epoch": 14.8, "learning_rate": 4.260211649648489e-05, "loss": 2.3794, "step": 2987500 }, { "epoch": 14.8, "learning_rate": 4.260088038723166e-05, "loss": 2.4179, "step": 2988000 }, { "epoch": 14.81, "learning_rate": 4.259964180080558e-05, "loss": 2.4193, "step": 2988500 }, { "epoch": 14.81, "learning_rate": 4.2598403214379496e-05, "loss": 2.3833, "step": 2989000 }, { "epoch": 14.81, "learning_rate": 4.2597167105126264e-05, "loss": 2.3923, "step": 2989500 }, { "epoch": 14.81, "learning_rate": 4.259592851870018e-05, "loss": 2.394, "step": 2990000 }, { "epoch": 14.82, "learning_rate": 4.25946899322741e-05, "loss": 2.3941, "step": 2990500 }, { "epoch": 14.82, "learning_rate": 4.2593451345848015e-05, "loss": 2.4062, "step": 2991000 }, { "epoch": 14.82, "learning_rate": 4.259221275942193e-05, "loss": 2.3848, "step": 2991500 }, { "epoch": 14.82, "learning_rate": 4.259097417299584e-05, "loss": 2.3924, "step": 2992000 }, { "epoch": 14.83, "learning_rate": 4.258973558656976e-05, "loss": 2.4166, "step": 2992500 }, { "epoch": 14.83, "learning_rate": 4.2588497000143676e-05, "loss": 2.3901, "step": 2993000 }, { "epoch": 14.83, "learning_rate": 4.258725841371759e-05, "loss": 2.4053, "step": 2993500 }, { "epoch": 14.83, "learning_rate": 4.258601982729151e-05, "loss": 2.3861, "step": 2994000 }, { "epoch": 14.84, "learning_rate": 4.258478124086543e-05, "loss": 2.3877, "step": 2994500 }, { "epoch": 14.84, "learning_rate": 4.2583542654439344e-05, "loss": 2.4219, "step": 2995000 }, { "epoch": 14.84, "learning_rate": 4.258230654518611e-05, "loss": 2.4288, "step": 2995500 }, { "epoch": 14.84, "learning_rate": 4.258106795876003e-05, "loss": 2.388, "step": 2996000 }, { "epoch": 14.85, "learning_rate": 4.2579829372333947e-05, "loss": 2.391, "step": 2996500 }, { "epoch": 14.85, "learning_rate": 4.2578590785907863e-05, "loss": 2.4086, "step": 2997000 }, { "epoch": 14.85, "learning_rate": 4.257735219948178e-05, "loss": 2.3844, "step": 2997500 }, { "epoch": 14.85, "learning_rate": 4.25761136130557e-05, "loss": 2.3956, "step": 2998000 }, { "epoch": 14.86, "learning_rate": 4.2574875026629614e-05, "loss": 2.3777, "step": 2998500 }, { "epoch": 14.86, "learning_rate": 4.257363644020353e-05, "loss": 2.3943, "step": 2999000 }, { "epoch": 14.86, "learning_rate": 4.2572402808123145e-05, "loss": 2.3853, "step": 2999500 }, { "epoch": 14.86, "learning_rate": 4.257116422169706e-05, "loss": 2.388, "step": 3000000 }, { "epoch": 14.87, "learning_rate": 4.256992563527098e-05, "loss": 2.4066, "step": 3000500 }, { "epoch": 14.87, "learning_rate": 4.2568687048844896e-05, "loss": 2.3979, "step": 3001000 }, { "epoch": 14.87, "learning_rate": 4.256744846241881e-05, "loss": 2.392, "step": 3001500 }, { "epoch": 14.87, "learning_rate": 4.256620987599273e-05, "loss": 2.4233, "step": 3002000 }, { "epoch": 14.88, "learning_rate": 4.256497128956665e-05, "loss": 2.3834, "step": 3002500 }, { "epoch": 14.88, "learning_rate": 4.2563732703140564e-05, "loss": 2.3635, "step": 3003000 }, { "epoch": 14.88, "learning_rate": 4.256249659388733e-05, "loss": 2.4039, "step": 3003500 }, { "epoch": 14.88, "learning_rate": 4.256125800746125e-05, "loss": 2.3884, "step": 3004000 }, { "epoch": 14.89, "learning_rate": 4.2560019421035166e-05, "loss": 2.4051, "step": 3004500 }, { "epoch": 14.89, "learning_rate": 4.255878083460908e-05, "loss": 2.3943, "step": 3005000 }, { "epoch": 14.89, "learning_rate": 4.255754224818299e-05, "loss": 2.4152, "step": 3005500 }, { "epoch": 14.89, "learning_rate": 4.255630613892976e-05, "loss": 2.3811, "step": 3006000 }, { "epoch": 14.9, "learning_rate": 4.255506755250368e-05, "loss": 2.393, "step": 3006500 }, { "epoch": 14.9, "learning_rate": 4.2553828966077596e-05, "loss": 2.4022, "step": 3007000 }, { "epoch": 14.9, "learning_rate": 4.255259037965151e-05, "loss": 2.3587, "step": 3007500 }, { "epoch": 14.9, "learning_rate": 4.255135179322543e-05, "loss": 2.4104, "step": 3008000 }, { "epoch": 14.91, "learning_rate": 4.255011320679935e-05, "loss": 2.3866, "step": 3008500 }, { "epoch": 14.91, "learning_rate": 4.2548874620373264e-05, "loss": 2.4138, "step": 3009000 }, { "epoch": 14.91, "learning_rate": 4.254763603394718e-05, "loss": 2.3889, "step": 3009500 }, { "epoch": 14.91, "learning_rate": 4.254639992469395e-05, "loss": 2.4053, "step": 3010000 }, { "epoch": 14.92, "learning_rate": 4.2545161338267866e-05, "loss": 2.3929, "step": 3010500 }, { "epoch": 14.92, "learning_rate": 4.254392275184178e-05, "loss": 2.401, "step": 3011000 }, { "epoch": 14.92, "learning_rate": 4.25426841654157e-05, "loss": 2.3921, "step": 3011500 }, { "epoch": 14.92, "learning_rate": 4.254144557898962e-05, "loss": 2.4238, "step": 3012000 }, { "epoch": 14.92, "learning_rate": 4.254020699256353e-05, "loss": 2.3746, "step": 3012500 }, { "epoch": 14.93, "learning_rate": 4.2538968406137444e-05, "loss": 2.3839, "step": 3013000 }, { "epoch": 14.93, "learning_rate": 4.253773229688421e-05, "loss": 2.3947, "step": 3013500 }, { "epoch": 14.93, "learning_rate": 4.253649371045813e-05, "loss": 2.402, "step": 3014000 }, { "epoch": 14.93, "learning_rate": 4.253525512403205e-05, "loss": 2.3758, "step": 3014500 }, { "epoch": 14.94, "learning_rate": 4.2534016537605964e-05, "loss": 2.3933, "step": 3015000 }, { "epoch": 14.94, "learning_rate": 4.253278290552558e-05, "loss": 2.3876, "step": 3015500 }, { "epoch": 14.94, "learning_rate": 4.2531544319099495e-05, "loss": 2.3971, "step": 3016000 }, { "epoch": 14.94, "learning_rate": 4.253030573267341e-05, "loss": 2.4107, "step": 3016500 }, { "epoch": 14.95, "learning_rate": 4.252906714624733e-05, "loss": 2.4113, "step": 3017000 }, { "epoch": 14.95, "learning_rate": 4.2527831036994104e-05, "loss": 2.3868, "step": 3017500 }, { "epoch": 14.95, "learning_rate": 4.252659245056802e-05, "loss": 2.3966, "step": 3018000 }, { "epoch": 14.95, "learning_rate": 4.252535386414194e-05, "loss": 2.3974, "step": 3018500 }, { "epoch": 14.96, "learning_rate": 4.252411527771585e-05, "loss": 2.3664, "step": 3019000 }, { "epoch": 14.96, "learning_rate": 4.2522876691289765e-05, "loss": 2.3885, "step": 3019500 }, { "epoch": 14.96, "learning_rate": 4.252163810486368e-05, "loss": 2.3988, "step": 3020000 }, { "epoch": 14.96, "learning_rate": 4.252040199561045e-05, "loss": 2.3894, "step": 3020500 }, { "epoch": 14.97, "learning_rate": 4.251916340918437e-05, "loss": 2.3897, "step": 3021000 }, { "epoch": 14.97, "learning_rate": 4.2517924822758285e-05, "loss": 2.3855, "step": 3021500 }, { "epoch": 14.97, "learning_rate": 4.2516688713505053e-05, "loss": 2.3951, "step": 3022000 }, { "epoch": 14.97, "learning_rate": 4.251545260425182e-05, "loss": 2.438, "step": 3022500 }, { "epoch": 14.98, "learning_rate": 4.251421401782574e-05, "loss": 2.3995, "step": 3023000 }, { "epoch": 14.98, "learning_rate": 4.2512975431399656e-05, "loss": 2.4106, "step": 3023500 }, { "epoch": 14.98, "learning_rate": 4.251173684497357e-05, "loss": 2.3756, "step": 3024000 }, { "epoch": 14.98, "learning_rate": 4.2510500735720335e-05, "loss": 2.3789, "step": 3024500 }, { "epoch": 14.99, "learning_rate": 4.250926214929425e-05, "loss": 2.4049, "step": 3025000 }, { "epoch": 14.99, "learning_rate": 4.250802356286817e-05, "loss": 2.363, "step": 3025500 }, { "epoch": 14.99, "learning_rate": 4.2506784976442086e-05, "loss": 2.4208, "step": 3026000 }, { "epoch": 14.99, "learning_rate": 4.2505546390016e-05, "loss": 2.3791, "step": 3026500 }, { "epoch": 15.0, "learning_rate": 4.250430780358992e-05, "loss": 2.3832, "step": 3027000 }, { "epoch": 15.0, "learning_rate": 4.250306921716384e-05, "loss": 2.3903, "step": 3027500 }, { "epoch": 15.0, "eval_accuracy": 0.6470159402014201, "eval_accuracy_mlm": 0.6010111516244826, "eval_accuracy_nsp": 0.8638683082377951, "eval_loss": 2.399909496307373, "eval_runtime": 145.758, "eval_samples_per_second": 1749.194, "eval_steps_per_second": 72.888, "step": 3027645 }, { "epoch": 15.0, "learning_rate": 4.2501830630737754e-05, "loss": 2.3627, "step": 3028000 }, { "epoch": 15.0, "learning_rate": 4.250059204431167e-05, "loss": 2.3456, "step": 3028500 }, { "epoch": 15.01, "learning_rate": 4.249935593505844e-05, "loss": 2.3451, "step": 3029000 }, { "epoch": 15.01, "learning_rate": 4.2498117348632356e-05, "loss": 2.3651, "step": 3029500 }, { "epoch": 15.01, "learning_rate": 4.249687876220627e-05, "loss": 2.3518, "step": 3030000 }, { "epoch": 15.01, "learning_rate": 4.249564017578019e-05, "loss": 2.3489, "step": 3030500 }, { "epoch": 15.02, "learning_rate": 4.249440158935411e-05, "loss": 2.3564, "step": 3031000 }, { "epoch": 15.02, "learning_rate": 4.2493163002928024e-05, "loss": 2.3755, "step": 3031500 }, { "epoch": 15.02, "learning_rate": 4.249192441650194e-05, "loss": 2.3875, "step": 3032000 }, { "epoch": 15.02, "learning_rate": 4.249068583007586e-05, "loss": 2.3706, "step": 3032500 }, { "epoch": 15.03, "learning_rate": 4.2489447243649775e-05, "loss": 2.3564, "step": 3033000 }, { "epoch": 15.03, "learning_rate": 4.2488208657223685e-05, "loss": 2.3672, "step": 3033500 }, { "epoch": 15.03, "learning_rate": 4.24869700707976e-05, "loss": 2.3532, "step": 3034000 }, { "epoch": 15.03, "learning_rate": 4.248573148437152e-05, "loss": 2.3531, "step": 3034500 }, { "epoch": 15.04, "learning_rate": 4.2484492897945436e-05, "loss": 2.3578, "step": 3035000 }, { "epoch": 15.04, "learning_rate": 4.2483256788692204e-05, "loss": 2.4104, "step": 3035500 }, { "epoch": 15.04, "learning_rate": 4.248201820226612e-05, "loss": 2.3724, "step": 3036000 }, { "epoch": 15.04, "learning_rate": 4.248077961584004e-05, "loss": 2.3303, "step": 3036500 }, { "epoch": 15.05, "learning_rate": 4.2479541029413955e-05, "loss": 2.3409, "step": 3037000 }, { "epoch": 15.05, "learning_rate": 4.2478302442987865e-05, "loss": 2.3438, "step": 3037500 }, { "epoch": 15.05, "learning_rate": 4.247706633373464e-05, "loss": 2.3988, "step": 3038000 }, { "epoch": 15.05, "learning_rate": 4.247582774730856e-05, "loss": 2.3621, "step": 3038500 }, { "epoch": 15.06, "learning_rate": 4.247459163805532e-05, "loss": 2.4002, "step": 3039000 }, { "epoch": 15.06, "learning_rate": 4.247335305162924e-05, "loss": 2.3571, "step": 3039500 }, { "epoch": 15.06, "learning_rate": 4.2472114465203154e-05, "loss": 2.3597, "step": 3040000 }, { "epoch": 15.06, "learning_rate": 4.247087587877707e-05, "loss": 2.3572, "step": 3040500 }, { "epoch": 15.07, "learning_rate": 4.246963729235099e-05, "loss": 2.391, "step": 3041000 }, { "epoch": 15.07, "learning_rate": 4.2468398705924905e-05, "loss": 2.3717, "step": 3041500 }, { "epoch": 15.07, "learning_rate": 4.246716011949882e-05, "loss": 2.3924, "step": 3042000 }, { "epoch": 15.07, "learning_rate": 4.246592153307274e-05, "loss": 2.3618, "step": 3042500 }, { "epoch": 15.08, "learning_rate": 4.2464682946646655e-05, "loss": 2.3624, "step": 3043000 }, { "epoch": 15.08, "learning_rate": 4.246344436022057e-05, "loss": 2.368, "step": 3043500 }, { "epoch": 15.08, "learning_rate": 4.246220577379448e-05, "loss": 2.3958, "step": 3044000 }, { "epoch": 15.08, "learning_rate": 4.24609671873684e-05, "loss": 2.3759, "step": 3044500 }, { "epoch": 15.09, "learning_rate": 4.2459728600942316e-05, "loss": 2.3554, "step": 3045000 }, { "epoch": 15.09, "learning_rate": 4.245849496886194e-05, "loss": 2.3552, "step": 3045500 }, { "epoch": 15.09, "learning_rate": 4.2457256382435854e-05, "loss": 2.3811, "step": 3046000 }, { "epoch": 15.09, "learning_rate": 4.245601779600977e-05, "loss": 2.381, "step": 3046500 }, { "epoch": 15.1, "learning_rate": 4.245477920958369e-05, "loss": 2.3725, "step": 3047000 }, { "epoch": 15.1, "learning_rate": 4.245354310033046e-05, "loss": 2.3783, "step": 3047500 }, { "epoch": 15.1, "learning_rate": 4.2452304513904374e-05, "loss": 2.3869, "step": 3048000 }, { "epoch": 15.1, "learning_rate": 4.245106592747829e-05, "loss": 2.3689, "step": 3048500 }, { "epoch": 15.11, "learning_rate": 4.244982734105221e-05, "loss": 2.3686, "step": 3049000 }, { "epoch": 15.11, "learning_rate": 4.2448588754626124e-05, "loss": 2.3877, "step": 3049500 }, { "epoch": 15.11, "learning_rate": 4.244735016820004e-05, "loss": 2.3803, "step": 3050000 }, { "epoch": 15.11, "learning_rate": 4.244611158177396e-05, "loss": 2.3927, "step": 3050500 }, { "epoch": 15.12, "learning_rate": 4.2444872995347875e-05, "loss": 2.3387, "step": 3051000 }, { "epoch": 15.12, "learning_rate": 4.244363688609464e-05, "loss": 2.3727, "step": 3051500 }, { "epoch": 15.12, "learning_rate": 4.2442398299668554e-05, "loss": 2.3679, "step": 3052000 }, { "epoch": 15.12, "learning_rate": 4.244115971324247e-05, "loss": 2.3728, "step": 3052500 }, { "epoch": 15.13, "learning_rate": 4.243992112681639e-05, "loss": 2.3888, "step": 3053000 }, { "epoch": 15.13, "learning_rate": 4.2438682540390305e-05, "loss": 2.3701, "step": 3053500 }, { "epoch": 15.13, "learning_rate": 4.243744395396422e-05, "loss": 2.363, "step": 3054000 }, { "epoch": 15.13, "learning_rate": 4.243620784471099e-05, "loss": 2.3638, "step": 3054500 }, { "epoch": 15.14, "learning_rate": 4.243496925828491e-05, "loss": 2.3703, "step": 3055000 }, { "epoch": 15.14, "learning_rate": 4.2433730671858824e-05, "loss": 2.3799, "step": 3055500 }, { "epoch": 15.14, "learning_rate": 4.243249208543274e-05, "loss": 2.3817, "step": 3056000 }, { "epoch": 15.14, "learning_rate": 4.243125349900666e-05, "loss": 2.3796, "step": 3056500 }, { "epoch": 15.15, "learning_rate": 4.243001738975342e-05, "loss": 2.3622, "step": 3057000 }, { "epoch": 15.15, "learning_rate": 4.242877880332734e-05, "loss": 2.3801, "step": 3057500 }, { "epoch": 15.15, "learning_rate": 4.2427542694074106e-05, "loss": 2.3878, "step": 3058000 }, { "epoch": 15.15, "learning_rate": 4.242630410764802e-05, "loss": 2.3574, "step": 3058500 }, { "epoch": 15.16, "learning_rate": 4.242506552122194e-05, "loss": 2.3479, "step": 3059000 }, { "epoch": 15.16, "learning_rate": 4.242382693479586e-05, "loss": 2.3529, "step": 3059500 }, { "epoch": 15.16, "learning_rate": 4.2422588348369774e-05, "loss": 2.3862, "step": 3060000 }, { "epoch": 15.16, "learning_rate": 4.242134976194369e-05, "loss": 2.3654, "step": 3060500 }, { "epoch": 15.17, "learning_rate": 4.242011117551761e-05, "loss": 2.3518, "step": 3061000 }, { "epoch": 15.17, "learning_rate": 4.2418875066264376e-05, "loss": 2.374, "step": 3061500 }, { "epoch": 15.17, "learning_rate": 4.241763647983829e-05, "loss": 2.3655, "step": 3062000 }, { "epoch": 15.17, "learning_rate": 4.241639789341221e-05, "loss": 2.3658, "step": 3062500 }, { "epoch": 15.18, "learning_rate": 4.241515930698613e-05, "loss": 2.3602, "step": 3063000 }, { "epoch": 15.18, "learning_rate": 4.2413920720560044e-05, "loss": 2.3609, "step": 3063500 }, { "epoch": 15.18, "learning_rate": 4.2412682134133954e-05, "loss": 2.367, "step": 3064000 }, { "epoch": 15.18, "learning_rate": 4.241144354770787e-05, "loss": 2.3744, "step": 3064500 }, { "epoch": 15.19, "learning_rate": 4.241020496128179e-05, "loss": 2.3696, "step": 3065000 }, { "epoch": 15.19, "learning_rate": 4.2408966374855705e-05, "loss": 2.3808, "step": 3065500 }, { "epoch": 15.19, "learning_rate": 4.2407730265602474e-05, "loss": 2.3886, "step": 3066000 }, { "epoch": 15.19, "learning_rate": 4.240649167917639e-05, "loss": 2.3662, "step": 3066500 }, { "epoch": 15.19, "learning_rate": 4.240525309275031e-05, "loss": 2.3931, "step": 3067000 }, { "epoch": 15.2, "learning_rate": 4.2404014506324225e-05, "loss": 2.3764, "step": 3067500 }, { "epoch": 15.2, "learning_rate": 4.240277591989814e-05, "loss": 2.3745, "step": 3068000 }, { "epoch": 15.2, "learning_rate": 4.240153733347206e-05, "loss": 2.3827, "step": 3068500 }, { "epoch": 15.2, "learning_rate": 4.240030122421883e-05, "loss": 2.3736, "step": 3069000 }, { "epoch": 15.21, "learning_rate": 4.2399062637792744e-05, "loss": 2.3926, "step": 3069500 }, { "epoch": 15.21, "learning_rate": 4.239782405136666e-05, "loss": 2.3844, "step": 3070000 }, { "epoch": 15.21, "learning_rate": 4.239658546494057e-05, "loss": 2.3614, "step": 3070500 }, { "epoch": 15.21, "learning_rate": 4.239534687851449e-05, "loss": 2.3672, "step": 3071000 }, { "epoch": 15.22, "learning_rate": 4.2394108292088405e-05, "loss": 2.3647, "step": 3071500 }, { "epoch": 15.22, "learning_rate": 4.239286970566232e-05, "loss": 2.3832, "step": 3072000 }, { "epoch": 15.22, "learning_rate": 4.239163111923624e-05, "loss": 2.3962, "step": 3072500 }, { "epoch": 15.22, "learning_rate": 4.239039500998301e-05, "loss": 2.361, "step": 3073000 }, { "epoch": 15.23, "learning_rate": 4.238915890072978e-05, "loss": 2.3841, "step": 3073500 }, { "epoch": 15.23, "learning_rate": 4.2387920314303694e-05, "loss": 2.3729, "step": 3074000 }, { "epoch": 15.23, "learning_rate": 4.238668172787761e-05, "loss": 2.3962, "step": 3074500 }, { "epoch": 15.23, "learning_rate": 4.238544314145153e-05, "loss": 2.3965, "step": 3075000 }, { "epoch": 15.24, "learning_rate": 4.2384204555025444e-05, "loss": 2.3928, "step": 3075500 }, { "epoch": 15.24, "learning_rate": 4.238296596859936e-05, "loss": 2.3878, "step": 3076000 }, { "epoch": 15.24, "learning_rate": 4.238172985934612e-05, "loss": 2.3677, "step": 3076500 }, { "epoch": 15.24, "learning_rate": 4.238049127292004e-05, "loss": 2.3718, "step": 3077000 }, { "epoch": 15.25, "learning_rate": 4.237925268649396e-05, "loss": 2.3981, "step": 3077500 }, { "epoch": 15.25, "learning_rate": 4.2378014100067874e-05, "loss": 2.3709, "step": 3078000 }, { "epoch": 15.25, "learning_rate": 4.237677551364179e-05, "loss": 2.4063, "step": 3078500 }, { "epoch": 15.25, "learning_rate": 4.237553940438856e-05, "loss": 2.3766, "step": 3079000 }, { "epoch": 15.26, "learning_rate": 4.237430081796248e-05, "loss": 2.3836, "step": 3079500 }, { "epoch": 15.26, "learning_rate": 4.2373064708709246e-05, "loss": 2.3741, "step": 3080000 }, { "epoch": 15.26, "learning_rate": 4.237182612228316e-05, "loss": 2.3866, "step": 3080500 }, { "epoch": 15.26, "learning_rate": 4.237058753585708e-05, "loss": 2.3761, "step": 3081000 }, { "epoch": 15.27, "learning_rate": 4.2369348949430996e-05, "loss": 2.3606, "step": 3081500 }, { "epoch": 15.27, "learning_rate": 4.2368110363004907e-05, "loss": 2.3544, "step": 3082000 }, { "epoch": 15.27, "learning_rate": 4.2366871776578824e-05, "loss": 2.3828, "step": 3082500 }, { "epoch": 15.27, "learning_rate": 4.236563319015274e-05, "loss": 2.3494, "step": 3083000 }, { "epoch": 15.28, "learning_rate": 4.236439460372666e-05, "loss": 2.3749, "step": 3083500 }, { "epoch": 15.28, "learning_rate": 4.2363156017300574e-05, "loss": 2.384, "step": 3084000 }, { "epoch": 15.28, "learning_rate": 4.236191990804735e-05, "loss": 2.3721, "step": 3084500 }, { "epoch": 15.28, "learning_rate": 4.236068132162126e-05, "loss": 2.367, "step": 3085000 }, { "epoch": 15.29, "learning_rate": 4.235944273519518e-05, "loss": 2.3508, "step": 3085500 }, { "epoch": 15.29, "learning_rate": 4.2358204148769094e-05, "loss": 2.3788, "step": 3086000 }, { "epoch": 15.29, "learning_rate": 4.235696556234301e-05, "loss": 2.3785, "step": 3086500 }, { "epoch": 15.29, "learning_rate": 4.235572697591693e-05, "loss": 2.3908, "step": 3087000 }, { "epoch": 15.3, "learning_rate": 4.2354488389490845e-05, "loss": 2.3881, "step": 3087500 }, { "epoch": 15.3, "learning_rate": 4.235324980306476e-05, "loss": 2.3729, "step": 3088000 }, { "epoch": 15.3, "learning_rate": 4.2352013693811524e-05, "loss": 2.3611, "step": 3088500 }, { "epoch": 15.3, "learning_rate": 4.235077510738544e-05, "loss": 2.3854, "step": 3089000 }, { "epoch": 15.31, "learning_rate": 4.2349538998132216e-05, "loss": 2.3698, "step": 3089500 }, { "epoch": 15.31, "learning_rate": 4.234830041170613e-05, "loss": 2.3766, "step": 3090000 }, { "epoch": 15.31, "learning_rate": 4.234706182528005e-05, "loss": 2.366, "step": 3090500 }, { "epoch": 15.31, "learning_rate": 4.234582323885397e-05, "loss": 2.3723, "step": 3091000 }, { "epoch": 15.32, "learning_rate": 4.234458465242788e-05, "loss": 2.378, "step": 3091500 }, { "epoch": 15.32, "learning_rate": 4.2343346066001794e-05, "loss": 2.3886, "step": 3092000 }, { "epoch": 15.32, "learning_rate": 4.234210747957571e-05, "loss": 2.3767, "step": 3092500 }, { "epoch": 15.32, "learning_rate": 4.234087137032248e-05, "loss": 2.3718, "step": 3093000 }, { "epoch": 15.33, "learning_rate": 4.23396327838964e-05, "loss": 2.3711, "step": 3093500 }, { "epoch": 15.33, "learning_rate": 4.2338394197470314e-05, "loss": 2.3911, "step": 3094000 }, { "epoch": 15.33, "learning_rate": 4.2337155611044224e-05, "loss": 2.3506, "step": 3094500 }, { "epoch": 15.33, "learning_rate": 4.2335919501791e-05, "loss": 2.3763, "step": 3095000 }, { "epoch": 15.34, "learning_rate": 4.2334680915364916e-05, "loss": 2.3951, "step": 3095500 }, { "epoch": 15.34, "learning_rate": 4.233344232893883e-05, "loss": 2.3904, "step": 3096000 }, { "epoch": 15.34, "learning_rate": 4.233220374251275e-05, "loss": 2.3658, "step": 3096500 }, { "epoch": 15.34, "learning_rate": 4.233096515608667e-05, "loss": 2.3671, "step": 3097000 }, { "epoch": 15.35, "learning_rate": 4.232972656966058e-05, "loss": 2.3921, "step": 3097500 }, { "epoch": 15.35, "learning_rate": 4.2328487983234494e-05, "loss": 2.3858, "step": 3098000 }, { "epoch": 15.35, "learning_rate": 4.232724939680841e-05, "loss": 2.3708, "step": 3098500 }, { "epoch": 15.35, "learning_rate": 4.232601081038233e-05, "loss": 2.3545, "step": 3099000 }, { "epoch": 15.36, "learning_rate": 4.2324772223956245e-05, "loss": 2.3699, "step": 3099500 }, { "epoch": 15.36, "learning_rate": 4.232353363753016e-05, "loss": 2.3685, "step": 3100000 }, { "epoch": 15.36, "learning_rate": 4.232229505110408e-05, "loss": 2.3597, "step": 3100500 }, { "epoch": 15.36, "learning_rate": 4.2321056464677996e-05, "loss": 2.4052, "step": 3101000 }, { "epoch": 15.37, "learning_rate": 4.231981787825191e-05, "loss": 2.3693, "step": 3101500 }, { "epoch": 15.37, "learning_rate": 4.231857929182583e-05, "loss": 2.3889, "step": 3102000 }, { "epoch": 15.37, "learning_rate": 4.2317340705399746e-05, "loss": 2.3852, "step": 3102500 }, { "epoch": 15.37, "learning_rate": 4.2316102118973663e-05, "loss": 2.3749, "step": 3103000 }, { "epoch": 15.38, "learning_rate": 4.2314866009720425e-05, "loss": 2.3614, "step": 3103500 }, { "epoch": 15.38, "learning_rate": 4.231362742329434e-05, "loss": 2.3763, "step": 3104000 }, { "epoch": 15.38, "learning_rate": 4.231238883686826e-05, "loss": 2.3818, "step": 3104500 }, { "epoch": 15.38, "learning_rate": 4.2311150250442176e-05, "loss": 2.3593, "step": 3105000 }, { "epoch": 15.39, "learning_rate": 4.2309914141188945e-05, "loss": 2.3944, "step": 3105500 }, { "epoch": 15.39, "learning_rate": 4.2308678031935714e-05, "loss": 2.3794, "step": 3106000 }, { "epoch": 15.39, "learning_rate": 4.230743944550963e-05, "loss": 2.3482, "step": 3106500 }, { "epoch": 15.39, "learning_rate": 4.230620085908355e-05, "loss": 2.3852, "step": 3107000 }, { "epoch": 15.4, "learning_rate": 4.2304962272657465e-05, "loss": 2.3996, "step": 3107500 }, { "epoch": 15.4, "learning_rate": 4.2303723686231375e-05, "loss": 2.3387, "step": 3108000 }, { "epoch": 15.4, "learning_rate": 4.230248509980529e-05, "loss": 2.3473, "step": 3108500 }, { "epoch": 15.4, "learning_rate": 4.230124899055207e-05, "loss": 2.3551, "step": 3109000 }, { "epoch": 15.41, "learning_rate": 4.2300012881298836e-05, "loss": 2.3676, "step": 3109500 }, { "epoch": 15.41, "learning_rate": 4.229877429487275e-05, "loss": 2.3606, "step": 3110000 }, { "epoch": 15.41, "learning_rate": 4.229753570844667e-05, "loss": 2.3807, "step": 3110500 }, { "epoch": 15.41, "learning_rate": 4.229629712202059e-05, "loss": 2.3688, "step": 3111000 }, { "epoch": 15.42, "learning_rate": 4.2295058535594504e-05, "loss": 2.4027, "step": 3111500 }, { "epoch": 15.42, "learning_rate": 4.2293819949168414e-05, "loss": 2.3676, "step": 3112000 }, { "epoch": 15.42, "learning_rate": 4.229258136274233e-05, "loss": 2.3845, "step": 3112500 }, { "epoch": 15.42, "learning_rate": 4.229134277631625e-05, "loss": 2.3844, "step": 3113000 }, { "epoch": 15.43, "learning_rate": 4.2290104189890165e-05, "loss": 2.3842, "step": 3113500 }, { "epoch": 15.43, "learning_rate": 4.2288868080636934e-05, "loss": 2.3777, "step": 3114000 }, { "epoch": 15.43, "learning_rate": 4.228762949421085e-05, "loss": 2.3698, "step": 3114500 }, { "epoch": 15.43, "learning_rate": 4.228639090778477e-05, "loss": 2.3828, "step": 3115000 }, { "epoch": 15.44, "learning_rate": 4.2285154798531536e-05, "loss": 2.3369, "step": 3115500 }, { "epoch": 15.44, "learning_rate": 4.228391621210545e-05, "loss": 2.3914, "step": 3116000 }, { "epoch": 15.44, "learning_rate": 4.228267762567937e-05, "loss": 2.3863, "step": 3116500 }, { "epoch": 15.44, "learning_rate": 4.228144151642613e-05, "loss": 2.3908, "step": 3117000 }, { "epoch": 15.45, "learning_rate": 4.228020293000005e-05, "loss": 2.3652, "step": 3117500 }, { "epoch": 15.45, "learning_rate": 4.2278964343573966e-05, "loss": 2.3764, "step": 3118000 }, { "epoch": 15.45, "learning_rate": 4.227772575714788e-05, "loss": 2.4005, "step": 3118500 }, { "epoch": 15.45, "learning_rate": 4.22764871707218e-05, "loss": 2.3834, "step": 3119000 }, { "epoch": 15.46, "learning_rate": 4.227525106146857e-05, "loss": 2.3881, "step": 3119500 }, { "epoch": 15.46, "learning_rate": 4.2274012475042486e-05, "loss": 2.3676, "step": 3120000 }, { "epoch": 15.46, "learning_rate": 4.22727738886164e-05, "loss": 2.3671, "step": 3120500 }, { "epoch": 15.46, "learning_rate": 4.227153530219032e-05, "loss": 2.3702, "step": 3121000 }, { "epoch": 15.46, "learning_rate": 4.2270296715764236e-05, "loss": 2.3961, "step": 3121500 }, { "epoch": 15.47, "learning_rate": 4.226905812933815e-05, "loss": 2.3922, "step": 3122000 }, { "epoch": 15.47, "learning_rate": 4.226781954291207e-05, "loss": 2.3649, "step": 3122500 }, { "epoch": 15.47, "learning_rate": 4.226658095648599e-05, "loss": 2.3649, "step": 3123000 }, { "epoch": 15.47, "learning_rate": 4.2265342370059904e-05, "loss": 2.393, "step": 3123500 }, { "epoch": 15.48, "learning_rate": 4.226410378363382e-05, "loss": 2.3866, "step": 3124000 }, { "epoch": 15.48, "learning_rate": 4.226286519720774e-05, "loss": 2.3817, "step": 3124500 }, { "epoch": 15.48, "learning_rate": 4.2261626610781655e-05, "loss": 2.3549, "step": 3125000 }, { "epoch": 15.48, "learning_rate": 4.2260388024355565e-05, "loss": 2.3664, "step": 3125500 }, { "epoch": 15.49, "learning_rate": 4.225914943792948e-05, "loss": 2.3663, "step": 3126000 }, { "epoch": 15.49, "learning_rate": 4.22579158058491e-05, "loss": 2.3709, "step": 3126500 }, { "epoch": 15.49, "learning_rate": 4.225667721942302e-05, "loss": 2.368, "step": 3127000 }, { "epoch": 15.49, "learning_rate": 4.2255438632996936e-05, "loss": 2.3716, "step": 3127500 }, { "epoch": 15.5, "learning_rate": 4.2254200046570853e-05, "loss": 2.4181, "step": 3128000 }, { "epoch": 15.5, "learning_rate": 4.225296146014477e-05, "loss": 2.3836, "step": 3128500 }, { "epoch": 15.5, "learning_rate": 4.225172287371869e-05, "loss": 2.3661, "step": 3129000 }, { "epoch": 15.5, "learning_rate": 4.225048676446545e-05, "loss": 2.3753, "step": 3129500 }, { "epoch": 15.51, "learning_rate": 4.2249248178039366e-05, "loss": 2.3983, "step": 3130000 }, { "epoch": 15.51, "learning_rate": 4.224800959161328e-05, "loss": 2.3943, "step": 3130500 }, { "epoch": 15.51, "learning_rate": 4.22467710051872e-05, "loss": 2.3889, "step": 3131000 }, { "epoch": 15.51, "learning_rate": 4.224553241876112e-05, "loss": 2.3742, "step": 3131500 }, { "epoch": 15.52, "learning_rate": 4.2244293832335034e-05, "loss": 2.3783, "step": 3132000 }, { "epoch": 15.52, "learning_rate": 4.224305524590895e-05, "loss": 2.4068, "step": 3132500 }, { "epoch": 15.52, "learning_rate": 4.224181665948287e-05, "loss": 2.3924, "step": 3133000 }, { "epoch": 15.52, "learning_rate": 4.2240580550229637e-05, "loss": 2.386, "step": 3133500 }, { "epoch": 15.53, "learning_rate": 4.2239341963803554e-05, "loss": 2.3976, "step": 3134000 }, { "epoch": 15.53, "learning_rate": 4.223810337737747e-05, "loss": 2.369, "step": 3134500 }, { "epoch": 15.53, "learning_rate": 4.223686479095139e-05, "loss": 2.3854, "step": 3135000 }, { "epoch": 15.53, "learning_rate": 4.2235626204525304e-05, "loss": 2.392, "step": 3135500 }, { "epoch": 15.54, "learning_rate": 4.223438761809922e-05, "loss": 2.3792, "step": 3136000 }, { "epoch": 15.54, "learning_rate": 4.223314903167314e-05, "loss": 2.357, "step": 3136500 }, { "epoch": 15.54, "learning_rate": 4.2231910445247055e-05, "loss": 2.3669, "step": 3137000 }, { "epoch": 15.54, "learning_rate": 4.223067185882097e-05, "loss": 2.3775, "step": 3137500 }, { "epoch": 15.55, "learning_rate": 4.2229435749567734e-05, "loss": 2.3768, "step": 3138000 }, { "epoch": 15.55, "learning_rate": 4.222819716314165e-05, "loss": 2.3868, "step": 3138500 }, { "epoch": 15.55, "learning_rate": 4.222695857671557e-05, "loss": 2.3805, "step": 3139000 }, { "epoch": 15.55, "learning_rate": 4.2225719990289485e-05, "loss": 2.3855, "step": 3139500 }, { "epoch": 15.56, "learning_rate": 4.22244814038634e-05, "loss": 2.364, "step": 3140000 }, { "epoch": 15.56, "learning_rate": 4.222324281743732e-05, "loss": 2.3673, "step": 3140500 }, { "epoch": 15.56, "learning_rate": 4.222200670818409e-05, "loss": 2.3848, "step": 3141000 }, { "epoch": 15.56, "learning_rate": 4.2220768121758004e-05, "loss": 2.3857, "step": 3141500 }, { "epoch": 15.57, "learning_rate": 4.221953201250477e-05, "loss": 2.3872, "step": 3142000 }, { "epoch": 15.57, "learning_rate": 4.2218293426078683e-05, "loss": 2.3764, "step": 3142500 }, { "epoch": 15.57, "learning_rate": 4.22170548396526e-05, "loss": 2.3927, "step": 3143000 }, { "epoch": 15.57, "learning_rate": 4.221581625322652e-05, "loss": 2.3675, "step": 3143500 }, { "epoch": 15.58, "learning_rate": 4.2214577666800434e-05, "loss": 2.3898, "step": 3144000 }, { "epoch": 15.58, "learning_rate": 4.221333908037435e-05, "loss": 2.3677, "step": 3144500 }, { "epoch": 15.58, "learning_rate": 4.221210049394827e-05, "loss": 2.4042, "step": 3145000 }, { "epoch": 15.58, "learning_rate": 4.2210861907522185e-05, "loss": 2.3829, "step": 3145500 }, { "epoch": 15.59, "learning_rate": 4.2209625798268954e-05, "loss": 2.3929, "step": 3146000 }, { "epoch": 15.59, "learning_rate": 4.220838721184287e-05, "loss": 2.3803, "step": 3146500 }, { "epoch": 15.59, "learning_rate": 4.220714862541679e-05, "loss": 2.3651, "step": 3147000 }, { "epoch": 15.59, "learning_rate": 4.2205910038990705e-05, "loss": 2.4064, "step": 3147500 }, { "epoch": 15.6, "learning_rate": 4.220467392973747e-05, "loss": 2.3791, "step": 3148000 }, { "epoch": 15.6, "learning_rate": 4.220343534331139e-05, "loss": 2.402, "step": 3148500 }, { "epoch": 15.6, "learning_rate": 4.22021967568853e-05, "loss": 2.3714, "step": 3149000 }, { "epoch": 15.6, "learning_rate": 4.220095817045922e-05, "loss": 2.3842, "step": 3149500 }, { "epoch": 15.61, "learning_rate": 4.2199719584033134e-05, "loss": 2.3855, "step": 3150000 }, { "epoch": 15.61, "learning_rate": 4.21984834747799e-05, "loss": 2.4002, "step": 3150500 }, { "epoch": 15.61, "learning_rate": 4.219724488835382e-05, "loss": 2.3798, "step": 3151000 }, { "epoch": 15.61, "learning_rate": 4.219600630192774e-05, "loss": 2.3679, "step": 3151500 }, { "epoch": 15.62, "learning_rate": 4.2194770192674506e-05, "loss": 2.3694, "step": 3152000 }, { "epoch": 15.62, "learning_rate": 4.219353160624842e-05, "loss": 2.3997, "step": 3152500 }, { "epoch": 15.62, "learning_rate": 4.219229301982234e-05, "loss": 2.3808, "step": 3153000 }, { "epoch": 15.62, "learning_rate": 4.2191054433396257e-05, "loss": 2.3822, "step": 3153500 }, { "epoch": 15.63, "learning_rate": 4.2189815846970173e-05, "loss": 2.3799, "step": 3154000 }, { "epoch": 15.63, "learning_rate": 4.218857726054409e-05, "loss": 2.3874, "step": 3154500 }, { "epoch": 15.63, "learning_rate": 4.218733867411801e-05, "loss": 2.387, "step": 3155000 }, { "epoch": 15.63, "learning_rate": 4.2186100087691924e-05, "loss": 2.3953, "step": 3155500 }, { "epoch": 15.64, "learning_rate": 4.2184861501265834e-05, "loss": 2.3654, "step": 3156000 }, { "epoch": 15.64, "learning_rate": 4.218362291483975e-05, "loss": 2.3823, "step": 3156500 }, { "epoch": 15.64, "learning_rate": 4.218238432841367e-05, "loss": 2.3889, "step": 3157000 }, { "epoch": 15.64, "learning_rate": 4.218114821916044e-05, "loss": 2.3922, "step": 3157500 }, { "epoch": 15.65, "learning_rate": 4.2179909632734354e-05, "loss": 2.395, "step": 3158000 }, { "epoch": 15.65, "learning_rate": 4.217867352348112e-05, "loss": 2.3761, "step": 3158500 }, { "epoch": 15.65, "learning_rate": 4.217743493705504e-05, "loss": 2.4031, "step": 3159000 }, { "epoch": 15.65, "learning_rate": 4.217619635062896e-05, "loss": 2.3753, "step": 3159500 }, { "epoch": 15.66, "learning_rate": 4.2174957764202874e-05, "loss": 2.3735, "step": 3160000 }, { "epoch": 15.66, "learning_rate": 4.217371917777679e-05, "loss": 2.3785, "step": 3160500 }, { "epoch": 15.66, "learning_rate": 4.217248059135071e-05, "loss": 2.3862, "step": 3161000 }, { "epoch": 15.66, "learning_rate": 4.217124448209747e-05, "loss": 2.3566, "step": 3161500 }, { "epoch": 15.67, "learning_rate": 4.2170008372844245e-05, "loss": 2.3831, "step": 3162000 }, { "epoch": 15.67, "learning_rate": 4.216876978641816e-05, "loss": 2.3836, "step": 3162500 }, { "epoch": 15.67, "learning_rate": 4.216753119999208e-05, "loss": 2.3862, "step": 3163000 }, { "epoch": 15.67, "learning_rate": 4.2166292613565996e-05, "loss": 2.3876, "step": 3163500 }, { "epoch": 15.68, "learning_rate": 4.2165054027139906e-05, "loss": 2.3782, "step": 3164000 }, { "epoch": 15.68, "learning_rate": 4.216381544071382e-05, "loss": 2.3743, "step": 3164500 }, { "epoch": 15.68, "learning_rate": 4.216257685428774e-05, "loss": 2.3789, "step": 3165000 }, { "epoch": 15.68, "learning_rate": 4.216133826786166e-05, "loss": 2.3919, "step": 3165500 }, { "epoch": 15.69, "learning_rate": 4.2160099681435574e-05, "loss": 2.3701, "step": 3166000 }, { "epoch": 15.69, "learning_rate": 4.215886109500949e-05, "loss": 2.3719, "step": 3166500 }, { "epoch": 15.69, "learning_rate": 4.215762250858341e-05, "loss": 2.3928, "step": 3167000 }, { "epoch": 15.69, "learning_rate": 4.2156383922157325e-05, "loss": 2.3984, "step": 3167500 }, { "epoch": 15.7, "learning_rate": 4.215514533573124e-05, "loss": 2.3644, "step": 3168000 }, { "epoch": 15.7, "learning_rate": 4.215390674930516e-05, "loss": 2.3855, "step": 3168500 }, { "epoch": 15.7, "learning_rate": 4.2152668162879075e-05, "loss": 2.3751, "step": 3169000 }, { "epoch": 15.7, "learning_rate": 4.2151429576452985e-05, "loss": 2.3593, "step": 3169500 }, { "epoch": 15.71, "learning_rate": 4.2150193467199754e-05, "loss": 2.3574, "step": 3170000 }, { "epoch": 15.71, "learning_rate": 4.214895488077367e-05, "loss": 2.3706, "step": 3170500 }, { "epoch": 15.71, "learning_rate": 4.214771629434759e-05, "loss": 2.3809, "step": 3171000 }, { "epoch": 15.71, "learning_rate": 4.214648018509436e-05, "loss": 2.3843, "step": 3171500 }, { "epoch": 15.72, "learning_rate": 4.2145241598668274e-05, "loss": 2.3904, "step": 3172000 }, { "epoch": 15.72, "learning_rate": 4.214400301224219e-05, "loss": 2.38, "step": 3172500 }, { "epoch": 15.72, "learning_rate": 4.214276442581611e-05, "loss": 2.3769, "step": 3173000 }, { "epoch": 15.72, "learning_rate": 4.2141525839390025e-05, "loss": 2.3672, "step": 3173500 }, { "epoch": 15.73, "learning_rate": 4.214028725296394e-05, "loss": 2.4197, "step": 3174000 }, { "epoch": 15.73, "learning_rate": 4.213904866653786e-05, "loss": 2.3741, "step": 3174500 }, { "epoch": 15.73, "learning_rate": 4.2137810080111775e-05, "loss": 2.3853, "step": 3175000 }, { "epoch": 15.73, "learning_rate": 4.213657149368569e-05, "loss": 2.3921, "step": 3175500 }, { "epoch": 15.74, "learning_rate": 4.213533290725961e-05, "loss": 2.3486, "step": 3176000 }, { "epoch": 15.74, "learning_rate": 4.213409679800637e-05, "loss": 2.3594, "step": 3176500 }, { "epoch": 15.74, "learning_rate": 4.213285821158029e-05, "loss": 2.3819, "step": 3177000 }, { "epoch": 15.74, "learning_rate": 4.2131619625154205e-05, "loss": 2.3789, "step": 3177500 }, { "epoch": 15.74, "learning_rate": 4.213038103872812e-05, "loss": 2.3747, "step": 3178000 }, { "epoch": 15.75, "learning_rate": 4.212914245230204e-05, "loss": 2.3711, "step": 3178500 }, { "epoch": 15.75, "learning_rate": 4.2127903865875956e-05, "loss": 2.3928, "step": 3179000 }, { "epoch": 15.75, "learning_rate": 4.2126667756622725e-05, "loss": 2.3843, "step": 3179500 }, { "epoch": 15.75, "learning_rate": 4.212543164736949e-05, "loss": 2.4232, "step": 3180000 }, { "epoch": 15.76, "learning_rate": 4.2124193060943404e-05, "loss": 2.3803, "step": 3180500 }, { "epoch": 15.76, "learning_rate": 4.212295447451732e-05, "loss": 2.3913, "step": 3181000 }, { "epoch": 15.76, "learning_rate": 4.212171588809124e-05, "loss": 2.3512, "step": 3181500 }, { "epoch": 15.76, "learning_rate": 4.2120477301665155e-05, "loss": 2.3715, "step": 3182000 }, { "epoch": 15.77, "learning_rate": 4.211923871523907e-05, "loss": 2.3803, "step": 3182500 }, { "epoch": 15.77, "learning_rate": 4.211800012881299e-05, "loss": 2.3765, "step": 3183000 }, { "epoch": 15.77, "learning_rate": 4.211676401955976e-05, "loss": 2.3654, "step": 3183500 }, { "epoch": 15.77, "learning_rate": 4.2115525433133674e-05, "loss": 2.3893, "step": 3184000 }, { "epoch": 15.78, "learning_rate": 4.211428684670759e-05, "loss": 2.3841, "step": 3184500 }, { "epoch": 15.78, "learning_rate": 4.211304826028151e-05, "loss": 2.3901, "step": 3185000 }, { "epoch": 15.78, "learning_rate": 4.2111809673855425e-05, "loss": 2.3614, "step": 3185500 }, { "epoch": 15.78, "learning_rate": 4.211057108742934e-05, "loss": 2.3858, "step": 3186000 }, { "epoch": 15.79, "learning_rate": 4.210933250100326e-05, "loss": 2.3817, "step": 3186500 }, { "epoch": 15.79, "learning_rate": 4.210809886892288e-05, "loss": 2.3566, "step": 3187000 }, { "epoch": 15.79, "learning_rate": 4.2106860282496796e-05, "loss": 2.3557, "step": 3187500 }, { "epoch": 15.79, "learning_rate": 4.210562169607071e-05, "loss": 2.3717, "step": 3188000 }, { "epoch": 15.8, "learning_rate": 4.210438310964463e-05, "loss": 2.3864, "step": 3188500 }, { "epoch": 15.8, "learning_rate": 4.210314452321854e-05, "loss": 2.3699, "step": 3189000 }, { "epoch": 15.8, "learning_rate": 4.210190593679246e-05, "loss": 2.4, "step": 3189500 }, { "epoch": 15.8, "learning_rate": 4.2100667350366374e-05, "loss": 2.3853, "step": 3190000 }, { "epoch": 15.81, "learning_rate": 4.209942876394029e-05, "loss": 2.3952, "step": 3190500 }, { "epoch": 15.81, "learning_rate": 4.209819017751421e-05, "loss": 2.3948, "step": 3191000 }, { "epoch": 15.81, "learning_rate": 4.2096951591088125e-05, "loss": 2.3522, "step": 3191500 }, { "epoch": 15.81, "learning_rate": 4.209571300466204e-05, "loss": 2.3915, "step": 3192000 }, { "epoch": 15.82, "learning_rate": 4.209447441823596e-05, "loss": 2.3876, "step": 3192500 }, { "epoch": 15.82, "learning_rate": 4.2093235831809876e-05, "loss": 2.4019, "step": 3193000 }, { "epoch": 15.82, "learning_rate": 4.209199724538379e-05, "loss": 2.3881, "step": 3193500 }, { "epoch": 15.82, "learning_rate": 4.209075865895771e-05, "loss": 2.4025, "step": 3194000 }, { "epoch": 15.83, "learning_rate": 4.208952254970447e-05, "loss": 2.3753, "step": 3194500 }, { "epoch": 15.83, "learning_rate": 4.208828644045124e-05, "loss": 2.4118, "step": 3195000 }, { "epoch": 15.83, "learning_rate": 4.208704785402516e-05, "loss": 2.3851, "step": 3195500 }, { "epoch": 15.83, "learning_rate": 4.2085809267599074e-05, "loss": 2.3801, "step": 3196000 }, { "epoch": 15.84, "learning_rate": 4.208457315834585e-05, "loss": 2.394, "step": 3196500 }, { "epoch": 15.84, "learning_rate": 4.208333457191977e-05, "loss": 2.4099, "step": 3197000 }, { "epoch": 15.84, "learning_rate": 4.208209598549368e-05, "loss": 2.3875, "step": 3197500 }, { "epoch": 15.84, "learning_rate": 4.2080857399067594e-05, "loss": 2.3896, "step": 3198000 }, { "epoch": 15.85, "learning_rate": 4.207961881264151e-05, "loss": 2.3944, "step": 3198500 }, { "epoch": 15.85, "learning_rate": 4.207838022621543e-05, "loss": 2.3874, "step": 3199000 }, { "epoch": 15.85, "learning_rate": 4.2077141639789345e-05, "loss": 2.3929, "step": 3199500 }, { "epoch": 15.85, "learning_rate": 4.2075903053363255e-05, "loss": 2.3987, "step": 3200000 }, { "epoch": 15.86, "learning_rate": 4.207466694411003e-05, "loss": 2.3844, "step": 3200500 }, { "epoch": 15.86, "learning_rate": 4.207342835768395e-05, "loss": 2.3909, "step": 3201000 }, { "epoch": 15.86, "learning_rate": 4.207218977125786e-05, "loss": 2.3669, "step": 3201500 }, { "epoch": 15.86, "learning_rate": 4.2070951184831774e-05, "loss": 2.3722, "step": 3202000 }, { "epoch": 15.87, "learning_rate": 4.206971259840569e-05, "loss": 2.3922, "step": 3202500 }, { "epoch": 15.87, "learning_rate": 4.206847401197961e-05, "loss": 2.3647, "step": 3203000 }, { "epoch": 15.87, "learning_rate": 4.2067235425553525e-05, "loss": 2.3798, "step": 3203500 }, { "epoch": 15.87, "learning_rate": 4.206599683912744e-05, "loss": 2.3789, "step": 3204000 }, { "epoch": 15.88, "learning_rate": 4.206475825270136e-05, "loss": 2.3835, "step": 3204500 }, { "epoch": 15.88, "learning_rate": 4.2063519666275276e-05, "loss": 2.4027, "step": 3205000 }, { "epoch": 15.88, "learning_rate": 4.206228107984919e-05, "loss": 2.3981, "step": 3205500 }, { "epoch": 15.88, "learning_rate": 4.206104249342311e-05, "loss": 2.3786, "step": 3206000 }, { "epoch": 15.89, "learning_rate": 4.205980390699703e-05, "loss": 2.3917, "step": 3206500 }, { "epoch": 15.89, "learning_rate": 4.2058565320570944e-05, "loss": 2.381, "step": 3207000 }, { "epoch": 15.89, "learning_rate": 4.2057329211317706e-05, "loss": 2.4097, "step": 3207500 }, { "epoch": 15.89, "learning_rate": 4.205609062489162e-05, "loss": 2.37, "step": 3208000 }, { "epoch": 15.9, "learning_rate": 4.205485203846554e-05, "loss": 2.3886, "step": 3208500 }, { "epoch": 15.9, "learning_rate": 4.2053613452039457e-05, "loss": 2.3526, "step": 3209000 }, { "epoch": 15.9, "learning_rate": 4.2052374865613374e-05, "loss": 2.3746, "step": 3209500 }, { "epoch": 15.9, "learning_rate": 4.205113875636014e-05, "loss": 2.397, "step": 3210000 }, { "epoch": 15.91, "learning_rate": 4.204990016993406e-05, "loss": 2.3853, "step": 3210500 }, { "epoch": 15.91, "learning_rate": 4.2048661583507976e-05, "loss": 2.4041, "step": 3211000 }, { "epoch": 15.91, "learning_rate": 4.204742299708189e-05, "loss": 2.3686, "step": 3211500 }, { "epoch": 15.91, "learning_rate": 4.204618441065581e-05, "loss": 2.3785, "step": 3212000 }, { "epoch": 15.92, "learning_rate": 4.204494582422973e-05, "loss": 2.4122, "step": 3212500 }, { "epoch": 15.92, "learning_rate": 4.2043707237803644e-05, "loss": 2.3736, "step": 3213000 }, { "epoch": 15.92, "learning_rate": 4.204246865137756e-05, "loss": 2.3955, "step": 3213500 }, { "epoch": 15.92, "learning_rate": 4.204123006495148e-05, "loss": 2.4139, "step": 3214000 }, { "epoch": 15.93, "learning_rate": 4.2039991478525395e-05, "loss": 2.385, "step": 3214500 }, { "epoch": 15.93, "learning_rate": 4.203875536927216e-05, "loss": 2.3506, "step": 3215000 }, { "epoch": 15.93, "learning_rate": 4.2037516782846074e-05, "loss": 2.3841, "step": 3215500 }, { "epoch": 15.93, "learning_rate": 4.203627819641999e-05, "loss": 2.3781, "step": 3216000 }, { "epoch": 15.94, "learning_rate": 4.203503960999391e-05, "loss": 2.3828, "step": 3216500 }, { "epoch": 15.94, "learning_rate": 4.2033801023567824e-05, "loss": 2.402, "step": 3217000 }, { "epoch": 15.94, "learning_rate": 4.203256243714174e-05, "loss": 2.3728, "step": 3217500 }, { "epoch": 15.94, "learning_rate": 4.203132632788851e-05, "loss": 2.3832, "step": 3218000 }, { "epoch": 15.95, "learning_rate": 4.203008774146243e-05, "loss": 2.3773, "step": 3218500 }, { "epoch": 15.95, "learning_rate": 4.2028849155036344e-05, "loss": 2.3783, "step": 3219000 }, { "epoch": 15.95, "learning_rate": 4.202761056861026e-05, "loss": 2.401, "step": 3219500 }, { "epoch": 15.95, "learning_rate": 4.202637445935702e-05, "loss": 2.4113, "step": 3220000 }, { "epoch": 15.96, "learning_rate": 4.202513587293094e-05, "loss": 2.3905, "step": 3220500 }, { "epoch": 15.96, "learning_rate": 4.202389728650486e-05, "loss": 2.3798, "step": 3221000 }, { "epoch": 15.96, "learning_rate": 4.2022658700078774e-05, "loss": 2.3802, "step": 3221500 }, { "epoch": 15.96, "learning_rate": 4.202142011365269e-05, "loss": 2.3822, "step": 3222000 }, { "epoch": 15.97, "learning_rate": 4.202018152722661e-05, "loss": 2.3768, "step": 3222500 }, { "epoch": 15.97, "learning_rate": 4.2018942940800525e-05, "loss": 2.373, "step": 3223000 }, { "epoch": 15.97, "learning_rate": 4.201770683154729e-05, "loss": 2.3979, "step": 3223500 }, { "epoch": 15.97, "learning_rate": 4.201646824512121e-05, "loss": 2.4065, "step": 3224000 }, { "epoch": 15.98, "learning_rate": 4.201522965869513e-05, "loss": 2.3761, "step": 3224500 }, { "epoch": 15.98, "learning_rate": 4.2013991072269044e-05, "loss": 2.3741, "step": 3225000 }, { "epoch": 15.98, "learning_rate": 4.201275248584296e-05, "loss": 2.354, "step": 3225500 }, { "epoch": 15.98, "learning_rate": 4.201151637658973e-05, "loss": 2.3871, "step": 3226000 }, { "epoch": 15.99, "learning_rate": 4.201027779016365e-05, "loss": 2.3685, "step": 3226500 }, { "epoch": 15.99, "learning_rate": 4.200903920373756e-05, "loss": 2.3788, "step": 3227000 }, { "epoch": 15.99, "learning_rate": 4.2007805571657184e-05, "loss": 2.365, "step": 3227500 }, { "epoch": 15.99, "learning_rate": 4.20065669852311e-05, "loss": 2.3865, "step": 3228000 }, { "epoch": 16.0, "learning_rate": 4.200532839880502e-05, "loss": 2.3693, "step": 3228500 }, { "epoch": 16.0, "learning_rate": 4.200409228955178e-05, "loss": 2.3664, "step": 3229000 }, { "epoch": 16.0, "eval_accuracy": 0.6492868439902124, "eval_accuracy_mlm": 0.6034055016175841, "eval_accuracy_nsp": 0.8654450323385329, "eval_loss": 2.379127025604248, "eval_runtime": 145.9487, "eval_samples_per_second": 1746.908, "eval_steps_per_second": 72.793, "step": 3229488 }, { "epoch": 16.0, "learning_rate": 4.20028537031257e-05, "loss": 2.3991, "step": 3229500 }, { "epoch": 16.0, "learning_rate": 4.2001615116699614e-05, "loss": 2.3311, "step": 3230000 }, { "epoch": 16.01, "learning_rate": 4.200037653027353e-05, "loss": 2.379, "step": 3230500 }, { "epoch": 16.01, "learning_rate": 4.199913794384745e-05, "loss": 2.3483, "step": 3231000 }, { "epoch": 16.01, "learning_rate": 4.1997899357421365e-05, "loss": 2.3625, "step": 3231500 }, { "epoch": 16.01, "learning_rate": 4.199666077099528e-05, "loss": 2.3739, "step": 3232000 }, { "epoch": 16.01, "learning_rate": 4.199542218456919e-05, "loss": 2.3595, "step": 3232500 }, { "epoch": 16.02, "learning_rate": 4.199418359814311e-05, "loss": 2.3672, "step": 3233000 }, { "epoch": 16.02, "learning_rate": 4.1992947488889885e-05, "loss": 2.3369, "step": 3233500 }, { "epoch": 16.02, "learning_rate": 4.19917089024638e-05, "loss": 2.3675, "step": 3234000 }, { "epoch": 16.02, "learning_rate": 4.199047031603772e-05, "loss": 2.3456, "step": 3234500 }, { "epoch": 16.03, "learning_rate": 4.1989231729611635e-05, "loss": 2.345, "step": 3235000 }, { "epoch": 16.03, "learning_rate": 4.1987993143185545e-05, "loss": 2.3621, "step": 3235500 }, { "epoch": 16.03, "learning_rate": 4.198675455675946e-05, "loss": 2.3692, "step": 3236000 }, { "epoch": 16.03, "learning_rate": 4.198551597033338e-05, "loss": 2.3583, "step": 3236500 }, { "epoch": 16.04, "learning_rate": 4.1984277383907296e-05, "loss": 2.3604, "step": 3237000 }, { "epoch": 16.04, "learning_rate": 4.198303879748121e-05, "loss": 2.3367, "step": 3237500 }, { "epoch": 16.04, "learning_rate": 4.198180268822798e-05, "loss": 2.3517, "step": 3238000 }, { "epoch": 16.04, "learning_rate": 4.19805641018019e-05, "loss": 2.3562, "step": 3238500 }, { "epoch": 16.05, "learning_rate": 4.197932551537581e-05, "loss": 2.3516, "step": 3239000 }, { "epoch": 16.05, "learning_rate": 4.1978086928949726e-05, "loss": 2.3629, "step": 3239500 }, { "epoch": 16.05, "learning_rate": 4.197684834252364e-05, "loss": 2.3508, "step": 3240000 }, { "epoch": 16.05, "learning_rate": 4.197560975609756e-05, "loss": 2.3546, "step": 3240500 }, { "epoch": 16.06, "learning_rate": 4.1974373646844335e-05, "loss": 2.3317, "step": 3241000 }, { "epoch": 16.06, "learning_rate": 4.19731375375911e-05, "loss": 2.3723, "step": 3241500 }, { "epoch": 16.06, "learning_rate": 4.1971898951165014e-05, "loss": 2.3662, "step": 3242000 }, { "epoch": 16.06, "learning_rate": 4.197066036473893e-05, "loss": 2.3793, "step": 3242500 }, { "epoch": 16.07, "learning_rate": 4.196942177831285e-05, "loss": 2.3432, "step": 3243000 }, { "epoch": 16.07, "learning_rate": 4.196818566905962e-05, "loss": 2.357, "step": 3243500 }, { "epoch": 16.07, "learning_rate": 4.1966947082633534e-05, "loss": 2.3268, "step": 3244000 }, { "epoch": 16.07, "learning_rate": 4.196570849620745e-05, "loss": 2.3552, "step": 3244500 }, { "epoch": 16.08, "learning_rate": 4.196446990978137e-05, "loss": 2.3761, "step": 3245000 }, { "epoch": 16.08, "learning_rate": 4.1963231323355285e-05, "loss": 2.3717, "step": 3245500 }, { "epoch": 16.08, "learning_rate": 4.19619927369292e-05, "loss": 2.3475, "step": 3246000 }, { "epoch": 16.08, "learning_rate": 4.196075415050312e-05, "loss": 2.3493, "step": 3246500 }, { "epoch": 16.09, "learning_rate": 4.1959515564077036e-05, "loss": 2.3632, "step": 3247000 }, { "epoch": 16.09, "learning_rate": 4.1958279454823804e-05, "loss": 2.3624, "step": 3247500 }, { "epoch": 16.09, "learning_rate": 4.1957040868397715e-05, "loss": 2.3572, "step": 3248000 }, { "epoch": 16.09, "learning_rate": 4.195580228197163e-05, "loss": 2.3751, "step": 3248500 }, { "epoch": 16.1, "learning_rate": 4.195456369554555e-05, "loss": 2.3589, "step": 3249000 }, { "epoch": 16.1, "learning_rate": 4.195332758629232e-05, "loss": 2.3459, "step": 3249500 }, { "epoch": 16.1, "learning_rate": 4.1952088999866234e-05, "loss": 2.3791, "step": 3250000 }, { "epoch": 16.1, "learning_rate": 4.195085041344015e-05, "loss": 2.3528, "step": 3250500 }, { "epoch": 16.11, "learning_rate": 4.194961182701407e-05, "loss": 2.3381, "step": 3251000 }, { "epoch": 16.11, "learning_rate": 4.1948373240587985e-05, "loss": 2.3431, "step": 3251500 }, { "epoch": 16.11, "learning_rate": 4.19471346541619e-05, "loss": 2.3478, "step": 3252000 }, { "epoch": 16.11, "learning_rate": 4.194589606773582e-05, "loss": 2.3681, "step": 3252500 }, { "epoch": 16.12, "learning_rate": 4.1944657481309736e-05, "loss": 2.3455, "step": 3253000 }, { "epoch": 16.12, "learning_rate": 4.194341889488365e-05, "loss": 2.3379, "step": 3253500 }, { "epoch": 16.12, "learning_rate": 4.194218030845757e-05, "loss": 2.3439, "step": 3254000 }, { "epoch": 16.12, "learning_rate": 4.194094172203148e-05, "loss": 2.3919, "step": 3254500 }, { "epoch": 16.13, "learning_rate": 4.193970561277825e-05, "loss": 2.3615, "step": 3255000 }, { "epoch": 16.13, "learning_rate": 4.1938467026352165e-05, "loss": 2.3455, "step": 3255500 }, { "epoch": 16.13, "learning_rate": 4.193722843992608e-05, "loss": 2.3646, "step": 3256000 }, { "epoch": 16.13, "learning_rate": 4.19359898535e-05, "loss": 2.3457, "step": 3256500 }, { "epoch": 16.14, "learning_rate": 4.1934751267073916e-05, "loss": 2.364, "step": 3257000 }, { "epoch": 16.14, "learning_rate": 4.1933512680647826e-05, "loss": 2.3615, "step": 3257500 }, { "epoch": 16.14, "learning_rate": 4.193227409422174e-05, "loss": 2.3757, "step": 3258000 }, { "epoch": 16.14, "learning_rate": 4.193104046214137e-05, "loss": 2.3399, "step": 3258500 }, { "epoch": 16.15, "learning_rate": 4.192980187571529e-05, "loss": 2.3593, "step": 3259000 }, { "epoch": 16.15, "learning_rate": 4.192856576646205e-05, "loss": 2.378, "step": 3259500 }, { "epoch": 16.15, "learning_rate": 4.192732718003597e-05, "loss": 2.362, "step": 3260000 }, { "epoch": 16.15, "learning_rate": 4.1926088593609884e-05, "loss": 2.381, "step": 3260500 }, { "epoch": 16.16, "learning_rate": 4.19248500071838e-05, "loss": 2.3708, "step": 3261000 }, { "epoch": 16.16, "learning_rate": 4.192361142075772e-05, "loss": 2.3763, "step": 3261500 }, { "epoch": 16.16, "learning_rate": 4.1922372834331634e-05, "loss": 2.3587, "step": 3262000 }, { "epoch": 16.16, "learning_rate": 4.192113424790555e-05, "loss": 2.3468, "step": 3262500 }, { "epoch": 16.17, "learning_rate": 4.191989566147947e-05, "loss": 2.3629, "step": 3263000 }, { "epoch": 16.17, "learning_rate": 4.1918657075053385e-05, "loss": 2.3905, "step": 3263500 }, { "epoch": 16.17, "learning_rate": 4.1917423442973006e-05, "loss": 2.3469, "step": 3264000 }, { "epoch": 16.17, "learning_rate": 4.191618485654692e-05, "loss": 2.3685, "step": 3264500 }, { "epoch": 16.18, "learning_rate": 4.191494627012083e-05, "loss": 2.3798, "step": 3265000 }, { "epoch": 16.18, "learning_rate": 4.191370768369475e-05, "loss": 2.368, "step": 3265500 }, { "epoch": 16.18, "learning_rate": 4.191246909726867e-05, "loss": 2.3569, "step": 3266000 }, { "epoch": 16.18, "learning_rate": 4.1911230510842584e-05, "loss": 2.3642, "step": 3266500 }, { "epoch": 16.19, "learning_rate": 4.19099919244165e-05, "loss": 2.339, "step": 3267000 }, { "epoch": 16.19, "learning_rate": 4.190875333799042e-05, "loss": 2.392, "step": 3267500 }, { "epoch": 16.19, "learning_rate": 4.1907514751564335e-05, "loss": 2.3535, "step": 3268000 }, { "epoch": 16.19, "learning_rate": 4.190627616513825e-05, "loss": 2.3608, "step": 3268500 }, { "epoch": 16.2, "learning_rate": 4.190504005588502e-05, "loss": 2.3881, "step": 3269000 }, { "epoch": 16.2, "learning_rate": 4.190380146945894e-05, "loss": 2.354, "step": 3269500 }, { "epoch": 16.2, "learning_rate": 4.1902562883032854e-05, "loss": 2.3587, "step": 3270000 }, { "epoch": 16.2, "learning_rate": 4.190132429660677e-05, "loss": 2.375, "step": 3270500 }, { "epoch": 16.21, "learning_rate": 4.190008571018069e-05, "loss": 2.3826, "step": 3271000 }, { "epoch": 16.21, "learning_rate": 4.1898847123754605e-05, "loss": 2.3526, "step": 3271500 }, { "epoch": 16.21, "learning_rate": 4.189760853732852e-05, "loss": 2.3849, "step": 3272000 }, { "epoch": 16.21, "learning_rate": 4.189636995090244e-05, "loss": 2.3827, "step": 3272500 }, { "epoch": 16.22, "learning_rate": 4.1895131364476356e-05, "loss": 2.3505, "step": 3273000 }, { "epoch": 16.22, "learning_rate": 4.189389277805027e-05, "loss": 2.3436, "step": 3273500 }, { "epoch": 16.22, "learning_rate": 4.189265419162419e-05, "loss": 2.3404, "step": 3274000 }, { "epoch": 16.22, "learning_rate": 4.1891415605198106e-05, "loss": 2.3904, "step": 3274500 }, { "epoch": 16.23, "learning_rate": 4.189017701877202e-05, "loss": 2.3878, "step": 3275000 }, { "epoch": 16.23, "learning_rate": 4.188894338669164e-05, "loss": 2.372, "step": 3275500 }, { "epoch": 16.23, "learning_rate": 4.1887704800265554e-05, "loss": 2.3541, "step": 3276000 }, { "epoch": 16.23, "learning_rate": 4.188646621383947e-05, "loss": 2.3633, "step": 3276500 }, { "epoch": 16.24, "learning_rate": 4.188522762741339e-05, "loss": 2.3915, "step": 3277000 }, { "epoch": 16.24, "learning_rate": 4.1883989040987305e-05, "loss": 2.3363, "step": 3277500 }, { "epoch": 16.24, "learning_rate": 4.188275045456122e-05, "loss": 2.3568, "step": 3278000 }, { "epoch": 16.24, "learning_rate": 4.188151186813514e-05, "loss": 2.3588, "step": 3278500 }, { "epoch": 16.25, "learning_rate": 4.18802757588819e-05, "loss": 2.3645, "step": 3279000 }, { "epoch": 16.25, "learning_rate": 4.187903717245582e-05, "loss": 2.3591, "step": 3279500 }, { "epoch": 16.25, "learning_rate": 4.1877798586029735e-05, "loss": 2.3448, "step": 3280000 }, { "epoch": 16.25, "learning_rate": 4.187655999960365e-05, "loss": 2.3665, "step": 3280500 }, { "epoch": 16.26, "learning_rate": 4.187532389035042e-05, "loss": 2.3366, "step": 3281000 }, { "epoch": 16.26, "learning_rate": 4.187408530392434e-05, "loss": 2.3512, "step": 3281500 }, { "epoch": 16.26, "learning_rate": 4.1872846717498254e-05, "loss": 2.3426, "step": 3282000 }, { "epoch": 16.26, "learning_rate": 4.187160813107217e-05, "loss": 2.3714, "step": 3282500 }, { "epoch": 16.27, "learning_rate": 4.187036954464609e-05, "loss": 2.3823, "step": 3283000 }, { "epoch": 16.27, "learning_rate": 4.186913343539286e-05, "loss": 2.3549, "step": 3283500 }, { "epoch": 16.27, "learning_rate": 4.1867894848966774e-05, "loss": 2.3734, "step": 3284000 }, { "epoch": 16.27, "learning_rate": 4.186665626254069e-05, "loss": 2.372, "step": 3284500 }, { "epoch": 16.28, "learning_rate": 4.18654176761146e-05, "loss": 2.3525, "step": 3285000 }, { "epoch": 16.28, "learning_rate": 4.186417908968852e-05, "loss": 2.3635, "step": 3285500 }, { "epoch": 16.28, "learning_rate": 4.1862940503262435e-05, "loss": 2.3709, "step": 3286000 }, { "epoch": 16.28, "learning_rate": 4.186170191683635e-05, "loss": 2.364, "step": 3286500 }, { "epoch": 16.28, "learning_rate": 4.186046333041027e-05, "loss": 2.3777, "step": 3287000 }, { "epoch": 16.29, "learning_rate": 4.185922722115704e-05, "loss": 2.3548, "step": 3287500 }, { "epoch": 16.29, "learning_rate": 4.1857988634730954e-05, "loss": 2.3503, "step": 3288000 }, { "epoch": 16.29, "learning_rate": 4.185675004830487e-05, "loss": 2.3702, "step": 3288500 }, { "epoch": 16.29, "learning_rate": 4.185551146187879e-05, "loss": 2.3839, "step": 3289000 }, { "epoch": 16.3, "learning_rate": 4.185427535262556e-05, "loss": 2.3612, "step": 3289500 }, { "epoch": 16.3, "learning_rate": 4.1853039243372326e-05, "loss": 2.3803, "step": 3290000 }, { "epoch": 16.3, "learning_rate": 4.185180065694624e-05, "loss": 2.3574, "step": 3290500 }, { "epoch": 16.3, "learning_rate": 4.185056207052016e-05, "loss": 2.3609, "step": 3291000 }, { "epoch": 16.31, "learning_rate": 4.184932348409408e-05, "loss": 2.3628, "step": 3291500 }, { "epoch": 16.31, "learning_rate": 4.1848087374840846e-05, "loss": 2.3595, "step": 3292000 }, { "epoch": 16.31, "learning_rate": 4.1846851265587614e-05, "loss": 2.3629, "step": 3292500 }, { "epoch": 16.31, "learning_rate": 4.1845612679161525e-05, "loss": 2.3337, "step": 3293000 }, { "epoch": 16.32, "learning_rate": 4.184437409273544e-05, "loss": 2.3881, "step": 3293500 }, { "epoch": 16.32, "learning_rate": 4.184313550630936e-05, "loss": 2.3534, "step": 3294000 }, { "epoch": 16.32, "learning_rate": 4.184189939705613e-05, "loss": 2.4006, "step": 3294500 }, { "epoch": 16.32, "learning_rate": 4.1840660810630044e-05, "loss": 2.351, "step": 3295000 }, { "epoch": 16.33, "learning_rate": 4.183942222420396e-05, "loss": 2.3661, "step": 3295500 }, { "epoch": 16.33, "learning_rate": 4.183818363777788e-05, "loss": 2.3705, "step": 3296000 }, { "epoch": 16.33, "learning_rate": 4.1836945051351795e-05, "loss": 2.3444, "step": 3296500 }, { "epoch": 16.33, "learning_rate": 4.183570646492571e-05, "loss": 2.3706, "step": 3297000 }, { "epoch": 16.34, "learning_rate": 4.183446787849963e-05, "loss": 2.3554, "step": 3297500 }, { "epoch": 16.34, "learning_rate": 4.1833229292073546e-05, "loss": 2.372, "step": 3298000 }, { "epoch": 16.34, "learning_rate": 4.183199070564746e-05, "loss": 2.3748, "step": 3298500 }, { "epoch": 16.34, "learning_rate": 4.183075211922138e-05, "loss": 2.3779, "step": 3299000 }, { "epoch": 16.35, "learning_rate": 4.1829513532795296e-05, "loss": 2.3628, "step": 3299500 }, { "epoch": 16.35, "learning_rate": 4.182827494636921e-05, "loss": 2.3664, "step": 3300000 }, { "epoch": 16.35, "learning_rate": 4.182703635994313e-05, "loss": 2.3823, "step": 3300500 }, { "epoch": 16.35, "learning_rate": 4.182579777351705e-05, "loss": 2.3607, "step": 3301000 }, { "epoch": 16.36, "learning_rate": 4.1824559187090964e-05, "loss": 2.3748, "step": 3301500 }, { "epoch": 16.36, "learning_rate": 4.1823320600664874e-05, "loss": 2.3491, "step": 3302000 }, { "epoch": 16.36, "learning_rate": 4.182208201423879e-05, "loss": 2.3577, "step": 3302500 }, { "epoch": 16.36, "learning_rate": 4.182084342781271e-05, "loss": 2.3337, "step": 3303000 }, { "epoch": 16.37, "learning_rate": 4.1819604841386625e-05, "loss": 2.3717, "step": 3303500 }, { "epoch": 16.37, "learning_rate": 4.181836625496054e-05, "loss": 2.3637, "step": 3304000 }, { "epoch": 16.37, "learning_rate": 4.181712766853446e-05, "loss": 2.3689, "step": 3304500 }, { "epoch": 16.37, "learning_rate": 4.181589155928122e-05, "loss": 2.3704, "step": 3305000 }, { "epoch": 16.38, "learning_rate": 4.181465297285514e-05, "loss": 2.3826, "step": 3305500 }, { "epoch": 16.38, "learning_rate": 4.1813414386429055e-05, "loss": 2.3344, "step": 3306000 }, { "epoch": 16.38, "learning_rate": 4.181217580000297e-05, "loss": 2.3534, "step": 3306500 }, { "epoch": 16.38, "learning_rate": 4.181093721357689e-05, "loss": 2.3628, "step": 3307000 }, { "epoch": 16.39, "learning_rate": 4.1809698627150806e-05, "loss": 2.3458, "step": 3307500 }, { "epoch": 16.39, "learning_rate": 4.180846251789758e-05, "loss": 2.3597, "step": 3308000 }, { "epoch": 16.39, "learning_rate": 4.180722640864434e-05, "loss": 2.3612, "step": 3308500 }, { "epoch": 16.39, "learning_rate": 4.180598782221826e-05, "loss": 2.348, "step": 3309000 }, { "epoch": 16.4, "learning_rate": 4.180474923579218e-05, "loss": 2.3849, "step": 3309500 }, { "epoch": 16.4, "learning_rate": 4.1803510649366094e-05, "loss": 2.3843, "step": 3310000 }, { "epoch": 16.4, "learning_rate": 4.180227206294001e-05, "loss": 2.3771, "step": 3310500 }, { "epoch": 16.4, "learning_rate": 4.180103347651393e-05, "loss": 2.3771, "step": 3311000 }, { "epoch": 16.41, "learning_rate": 4.179979489008784e-05, "loss": 2.3497, "step": 3311500 }, { "epoch": 16.41, "learning_rate": 4.1798556303661755e-05, "loss": 2.348, "step": 3312000 }, { "epoch": 16.41, "learning_rate": 4.179732019440853e-05, "loss": 2.3576, "step": 3312500 }, { "epoch": 16.41, "learning_rate": 4.179608160798245e-05, "loss": 2.3733, "step": 3313000 }, { "epoch": 16.42, "learning_rate": 4.1794843021556364e-05, "loss": 2.3712, "step": 3313500 }, { "epoch": 16.42, "learning_rate": 4.179360443513028e-05, "loss": 2.3685, "step": 3314000 }, { "epoch": 16.42, "learning_rate": 4.179236832587704e-05, "loss": 2.3851, "step": 3314500 }, { "epoch": 16.42, "learning_rate": 4.179112973945096e-05, "loss": 2.3961, "step": 3315000 }, { "epoch": 16.43, "learning_rate": 4.178989115302488e-05, "loss": 2.4049, "step": 3315500 }, { "epoch": 16.43, "learning_rate": 4.1788652566598794e-05, "loss": 2.3543, "step": 3316000 }, { "epoch": 16.43, "learning_rate": 4.178741398017271e-05, "loss": 2.3707, "step": 3316500 }, { "epoch": 16.43, "learning_rate": 4.178617539374663e-05, "loss": 2.3482, "step": 3317000 }, { "epoch": 16.44, "learning_rate": 4.178493680732054e-05, "loss": 2.3832, "step": 3317500 }, { "epoch": 16.44, "learning_rate": 4.1783700698067314e-05, "loss": 2.4093, "step": 3318000 }, { "epoch": 16.44, "learning_rate": 4.178246458881408e-05, "loss": 2.3686, "step": 3318500 }, { "epoch": 16.44, "learning_rate": 4.1781226002388e-05, "loss": 2.3364, "step": 3319000 }, { "epoch": 16.45, "learning_rate": 4.177998989313476e-05, "loss": 2.3651, "step": 3319500 }, { "epoch": 16.45, "learning_rate": 4.177875130670868e-05, "loss": 2.3806, "step": 3320000 }, { "epoch": 16.45, "learning_rate": 4.1777512720282595e-05, "loss": 2.3776, "step": 3320500 }, { "epoch": 16.45, "learning_rate": 4.177627413385651e-05, "loss": 2.3673, "step": 3321000 }, { "epoch": 16.46, "learning_rate": 4.177503554743043e-05, "loss": 2.3572, "step": 3321500 }, { "epoch": 16.46, "learning_rate": 4.1773796961004346e-05, "loss": 2.3464, "step": 3322000 }, { "epoch": 16.46, "learning_rate": 4.177255837457826e-05, "loss": 2.3861, "step": 3322500 }, { "epoch": 16.46, "learning_rate": 4.177131978815218e-05, "loss": 2.3693, "step": 3323000 }, { "epoch": 16.47, "learning_rate": 4.177008367889895e-05, "loss": 2.362, "step": 3323500 }, { "epoch": 16.47, "learning_rate": 4.1768845092472866e-05, "loss": 2.3583, "step": 3324000 }, { "epoch": 16.47, "learning_rate": 4.176760650604678e-05, "loss": 2.4084, "step": 3324500 }, { "epoch": 16.47, "learning_rate": 4.17663679196207e-05, "loss": 2.3874, "step": 3325000 }, { "epoch": 16.48, "learning_rate": 4.1765129333194617e-05, "loss": 2.4014, "step": 3325500 }, { "epoch": 16.48, "learning_rate": 4.1763890746768533e-05, "loss": 2.3739, "step": 3326000 }, { "epoch": 16.48, "learning_rate": 4.1762652160342444e-05, "loss": 2.362, "step": 3326500 }, { "epoch": 16.48, "learning_rate": 4.176141357391636e-05, "loss": 2.3732, "step": 3327000 }, { "epoch": 16.49, "learning_rate": 4.176017498749028e-05, "loss": 2.3626, "step": 3327500 }, { "epoch": 16.49, "learning_rate": 4.1758936401064194e-05, "loss": 2.3443, "step": 3328000 }, { "epoch": 16.49, "learning_rate": 4.175770029181096e-05, "loss": 2.3804, "step": 3328500 }, { "epoch": 16.49, "learning_rate": 4.175646170538488e-05, "loss": 2.3758, "step": 3329000 }, { "epoch": 16.5, "learning_rate": 4.17552231189588e-05, "loss": 2.3673, "step": 3329500 }, { "epoch": 16.5, "learning_rate": 4.1753984532532714e-05, "loss": 2.3549, "step": 3330000 }, { "epoch": 16.5, "learning_rate": 4.175274594610663e-05, "loss": 2.362, "step": 3330500 }, { "epoch": 16.5, "learning_rate": 4.175150735968055e-05, "loss": 2.3912, "step": 3331000 }, { "epoch": 16.51, "learning_rate": 4.1750268773254465e-05, "loss": 2.3859, "step": 3331500 }, { "epoch": 16.51, "learning_rate": 4.174903018682838e-05, "loss": 2.3581, "step": 3332000 }, { "epoch": 16.51, "learning_rate": 4.17477916004023e-05, "loss": 2.3842, "step": 3332500 }, { "epoch": 16.51, "learning_rate": 4.1746553013976216e-05, "loss": 2.3812, "step": 3333000 }, { "epoch": 16.52, "learning_rate": 4.1745314427550126e-05, "loss": 2.3868, "step": 3333500 }, { "epoch": 16.52, "learning_rate": 4.174407584112404e-05, "loss": 2.3852, "step": 3334000 }, { "epoch": 16.52, "learning_rate": 4.174283725469796e-05, "loss": 2.3561, "step": 3334500 }, { "epoch": 16.52, "learning_rate": 4.1741598668271877e-05, "loss": 2.3751, "step": 3335000 }, { "epoch": 16.53, "learning_rate": 4.1740362559018645e-05, "loss": 2.3717, "step": 3335500 }, { "epoch": 16.53, "learning_rate": 4.173912397259256e-05, "loss": 2.3852, "step": 3336000 }, { "epoch": 16.53, "learning_rate": 4.173788786333933e-05, "loss": 2.4165, "step": 3336500 }, { "epoch": 16.53, "learning_rate": 4.173664927691325e-05, "loss": 2.3534, "step": 3337000 }, { "epoch": 16.54, "learning_rate": 4.1735410690487165e-05, "loss": 2.3673, "step": 3337500 }, { "epoch": 16.54, "learning_rate": 4.173417210406108e-05, "loss": 2.3468, "step": 3338000 }, { "epoch": 16.54, "learning_rate": 4.1732933517635e-05, "loss": 2.3575, "step": 3338500 }, { "epoch": 16.54, "learning_rate": 4.1731694931208916e-05, "loss": 2.4043, "step": 3339000 }, { "epoch": 16.55, "learning_rate": 4.1730456344782826e-05, "loss": 2.3744, "step": 3339500 }, { "epoch": 16.55, "learning_rate": 4.172921775835674e-05, "loss": 2.3968, "step": 3340000 }, { "epoch": 16.55, "learning_rate": 4.172797917193066e-05, "loss": 2.389, "step": 3340500 }, { "epoch": 16.55, "learning_rate": 4.172674306267743e-05, "loss": 2.3549, "step": 3341000 }, { "epoch": 16.55, "learning_rate": 4.17255069534242e-05, "loss": 2.3684, "step": 3341500 }, { "epoch": 16.56, "learning_rate": 4.1724268366998114e-05, "loss": 2.379, "step": 3342000 }, { "epoch": 16.56, "learning_rate": 4.172302978057203e-05, "loss": 2.3729, "step": 3342500 }, { "epoch": 16.56, "learning_rate": 4.172179119414595e-05, "loss": 2.3568, "step": 3343000 }, { "epoch": 16.56, "learning_rate": 4.172055508489272e-05, "loss": 2.3773, "step": 3343500 }, { "epoch": 16.57, "learning_rate": 4.1719316498466634e-05, "loss": 2.3581, "step": 3344000 }, { "epoch": 16.57, "learning_rate": 4.171807791204055e-05, "loss": 2.368, "step": 3344500 }, { "epoch": 16.57, "learning_rate": 4.171683932561447e-05, "loss": 2.3668, "step": 3345000 }, { "epoch": 16.57, "learning_rate": 4.1715600739188385e-05, "loss": 2.3714, "step": 3345500 }, { "epoch": 16.58, "learning_rate": 4.17143621527623e-05, "loss": 2.3788, "step": 3346000 }, { "epoch": 16.58, "learning_rate": 4.171312356633622e-05, "loss": 2.3737, "step": 3346500 }, { "epoch": 16.58, "learning_rate": 4.171188497991013e-05, "loss": 2.3877, "step": 3347000 }, { "epoch": 16.58, "learning_rate": 4.1710646393484046e-05, "loss": 2.359, "step": 3347500 }, { "epoch": 16.59, "learning_rate": 4.170940780705796e-05, "loss": 2.3579, "step": 3348000 }, { "epoch": 16.59, "learning_rate": 4.170817169780473e-05, "loss": 2.3688, "step": 3348500 }, { "epoch": 16.59, "learning_rate": 4.170693311137865e-05, "loss": 2.3799, "step": 3349000 }, { "epoch": 16.59, "learning_rate": 4.170569700212542e-05, "loss": 2.3597, "step": 3349500 }, { "epoch": 16.6, "learning_rate": 4.1704458415699334e-05, "loss": 2.3678, "step": 3350000 }, { "epoch": 16.6, "learning_rate": 4.170321982927325e-05, "loss": 2.3693, "step": 3350500 }, { "epoch": 16.6, "learning_rate": 4.170198124284717e-05, "loss": 2.3535, "step": 3351000 }, { "epoch": 16.6, "learning_rate": 4.1700742656421085e-05, "loss": 2.3655, "step": 3351500 }, { "epoch": 16.61, "learning_rate": 4.169950654716785e-05, "loss": 2.3376, "step": 3352000 }, { "epoch": 16.61, "learning_rate": 4.1698267960741764e-05, "loss": 2.3532, "step": 3352500 }, { "epoch": 16.61, "learning_rate": 4.169702937431568e-05, "loss": 2.3747, "step": 3353000 }, { "epoch": 16.61, "learning_rate": 4.16957907878896e-05, "loss": 2.3603, "step": 3353500 }, { "epoch": 16.62, "learning_rate": 4.1694552201463515e-05, "loss": 2.377, "step": 3354000 }, { "epoch": 16.62, "learning_rate": 4.169331361503743e-05, "loss": 2.3917, "step": 3354500 }, { "epoch": 16.62, "learning_rate": 4.16920775057842e-05, "loss": 2.3773, "step": 3355000 }, { "epoch": 16.62, "learning_rate": 4.169083891935812e-05, "loss": 2.3885, "step": 3355500 }, { "epoch": 16.63, "learning_rate": 4.1689600332932034e-05, "loss": 2.3602, "step": 3356000 }, { "epoch": 16.63, "learning_rate": 4.16883642236788e-05, "loss": 2.3732, "step": 3356500 }, { "epoch": 16.63, "learning_rate": 4.168712563725271e-05, "loss": 2.3674, "step": 3357000 }, { "epoch": 16.63, "learning_rate": 4.168588705082663e-05, "loss": 2.3854, "step": 3357500 }, { "epoch": 16.64, "learning_rate": 4.168464846440055e-05, "loss": 2.3712, "step": 3358000 }, { "epoch": 16.64, "learning_rate": 4.1683409877974464e-05, "loss": 2.3552, "step": 3358500 }, { "epoch": 16.64, "learning_rate": 4.168217129154838e-05, "loss": 2.3662, "step": 3359000 }, { "epoch": 16.64, "learning_rate": 4.168093518229515e-05, "loss": 2.3816, "step": 3359500 }, { "epoch": 16.65, "learning_rate": 4.1679696595869067e-05, "loss": 2.3567, "step": 3360000 }, { "epoch": 16.65, "learning_rate": 4.1678458009442983e-05, "loss": 2.3709, "step": 3360500 }, { "epoch": 16.65, "learning_rate": 4.16772194230169e-05, "loss": 2.3764, "step": 3361000 }, { "epoch": 16.65, "learning_rate": 4.167598083659082e-05, "loss": 2.375, "step": 3361500 }, { "epoch": 16.66, "learning_rate": 4.1674742250164734e-05, "loss": 2.3625, "step": 3362000 }, { "epoch": 16.66, "learning_rate": 4.167350366373865e-05, "loss": 2.3537, "step": 3362500 }, { "epoch": 16.66, "learning_rate": 4.167226507731257e-05, "loss": 2.3476, "step": 3363000 }, { "epoch": 16.66, "learning_rate": 4.1671026490886485e-05, "loss": 2.4053, "step": 3363500 }, { "epoch": 16.67, "learning_rate": 4.16697879044604e-05, "loss": 2.3621, "step": 3364000 }, { "epoch": 16.67, "learning_rate": 4.166854931803432e-05, "loss": 2.3565, "step": 3364500 }, { "epoch": 16.67, "learning_rate": 4.1667310731608236e-05, "loss": 2.4182, "step": 3365000 }, { "epoch": 16.67, "learning_rate": 4.1666074622355e-05, "loss": 2.378, "step": 3365500 }, { "epoch": 16.68, "learning_rate": 4.1664836035928915e-05, "loss": 2.3553, "step": 3366000 }, { "epoch": 16.68, "learning_rate": 4.166359744950283e-05, "loss": 2.3641, "step": 3366500 }, { "epoch": 16.68, "learning_rate": 4.166235886307675e-05, "loss": 2.3704, "step": 3367000 }, { "epoch": 16.68, "learning_rate": 4.1661120276650666e-05, "loss": 2.3547, "step": 3367500 }, { "epoch": 16.69, "learning_rate": 4.165988169022458e-05, "loss": 2.3821, "step": 3368000 }, { "epoch": 16.69, "learning_rate": 4.16586431037985e-05, "loss": 2.3792, "step": 3368500 }, { "epoch": 16.69, "learning_rate": 4.165740699454527e-05, "loss": 2.3716, "step": 3369000 }, { "epoch": 16.69, "learning_rate": 4.1656168408119185e-05, "loss": 2.3796, "step": 3369500 }, { "epoch": 16.7, "learning_rate": 4.16549298216931e-05, "loss": 2.3907, "step": 3370000 }, { "epoch": 16.7, "learning_rate": 4.165369123526702e-05, "loss": 2.3926, "step": 3370500 }, { "epoch": 16.7, "learning_rate": 4.165245512601378e-05, "loss": 2.3875, "step": 3371000 }, { "epoch": 16.7, "learning_rate": 4.16512165395877e-05, "loss": 2.3632, "step": 3371500 }, { "epoch": 16.71, "learning_rate": 4.1649977953161615e-05, "loss": 2.3904, "step": 3372000 }, { "epoch": 16.71, "learning_rate": 4.1648741843908384e-05, "loss": 2.3739, "step": 3372500 }, { "epoch": 16.71, "learning_rate": 4.16475032574823e-05, "loss": 2.3697, "step": 3373000 }, { "epoch": 16.71, "learning_rate": 4.164626467105622e-05, "loss": 2.3728, "step": 3373500 }, { "epoch": 16.72, "learning_rate": 4.164502856180299e-05, "loss": 2.3865, "step": 3374000 }, { "epoch": 16.72, "learning_rate": 4.16437899753769e-05, "loss": 2.3485, "step": 3374500 }, { "epoch": 16.72, "learning_rate": 4.164255138895082e-05, "loss": 2.3721, "step": 3375000 }, { "epoch": 16.72, "learning_rate": 4.164131280252474e-05, "loss": 2.3634, "step": 3375500 }, { "epoch": 16.73, "learning_rate": 4.1640076693271506e-05, "loss": 2.3643, "step": 3376000 }, { "epoch": 16.73, "learning_rate": 4.163883810684542e-05, "loss": 2.3706, "step": 3376500 }, { "epoch": 16.73, "learning_rate": 4.163759952041934e-05, "loss": 2.3708, "step": 3377000 }, { "epoch": 16.73, "learning_rate": 4.163636093399325e-05, "loss": 2.3637, "step": 3377500 }, { "epoch": 16.74, "learning_rate": 4.163512234756717e-05, "loss": 2.378, "step": 3378000 }, { "epoch": 16.74, "learning_rate": 4.1633883761141084e-05, "loss": 2.3886, "step": 3378500 }, { "epoch": 16.74, "learning_rate": 4.1632645174715e-05, "loss": 2.3935, "step": 3379000 }, { "epoch": 16.74, "learning_rate": 4.163140658828892e-05, "loss": 2.3653, "step": 3379500 }, { "epoch": 16.75, "learning_rate": 4.1630168001862835e-05, "loss": 2.3607, "step": 3380000 }, { "epoch": 16.75, "learning_rate": 4.162892941543675e-05, "loss": 2.3704, "step": 3380500 }, { "epoch": 16.75, "learning_rate": 4.162769082901067e-05, "loss": 2.3722, "step": 3381000 }, { "epoch": 16.75, "learning_rate": 4.1626452242584585e-05, "loss": 2.3535, "step": 3381500 }, { "epoch": 16.76, "learning_rate": 4.16252136561585e-05, "loss": 2.3634, "step": 3382000 }, { "epoch": 16.76, "learning_rate": 4.162397754690527e-05, "loss": 2.3759, "step": 3382500 }, { "epoch": 16.76, "learning_rate": 4.162273896047919e-05, "loss": 2.3764, "step": 3383000 }, { "epoch": 16.76, "learning_rate": 4.1621500374053105e-05, "loss": 2.3739, "step": 3383500 }, { "epoch": 16.77, "learning_rate": 4.1620261787627015e-05, "loss": 2.3762, "step": 3384000 }, { "epoch": 16.77, "learning_rate": 4.161902320120093e-05, "loss": 2.3656, "step": 3384500 }, { "epoch": 16.77, "learning_rate": 4.16177870919477e-05, "loss": 2.3756, "step": 3385000 }, { "epoch": 16.77, "learning_rate": 4.161654850552162e-05, "loss": 2.3657, "step": 3385500 }, { "epoch": 16.78, "learning_rate": 4.1615309919095535e-05, "loss": 2.3836, "step": 3386000 }, { "epoch": 16.78, "learning_rate": 4.161407133266945e-05, "loss": 2.3893, "step": 3386500 }, { "epoch": 16.78, "learning_rate": 4.161283274624337e-05, "loss": 2.3778, "step": 3387000 }, { "epoch": 16.78, "learning_rate": 4.1611594159817286e-05, "loss": 2.3606, "step": 3387500 }, { "epoch": 16.79, "learning_rate": 4.16103555733912e-05, "loss": 2.3618, "step": 3388000 }, { "epoch": 16.79, "learning_rate": 4.160911698696512e-05, "loss": 2.3565, "step": 3388500 }, { "epoch": 16.79, "learning_rate": 4.160788087771189e-05, "loss": 2.3636, "step": 3389000 }, { "epoch": 16.79, "learning_rate": 4.1606642291285805e-05, "loss": 2.3672, "step": 3389500 }, { "epoch": 16.8, "learning_rate": 4.160540370485972e-05, "loss": 2.3587, "step": 3390000 }, { "epoch": 16.8, "learning_rate": 4.160416511843364e-05, "loss": 2.3782, "step": 3390500 }, { "epoch": 16.8, "learning_rate": 4.160292653200755e-05, "loss": 2.3666, "step": 3391000 }, { "epoch": 16.8, "learning_rate": 4.160169042275432e-05, "loss": 2.3514, "step": 3391500 }, { "epoch": 16.81, "learning_rate": 4.1600451836328235e-05, "loss": 2.3646, "step": 3392000 }, { "epoch": 16.81, "learning_rate": 4.159921324990215e-05, "loss": 2.3601, "step": 3392500 }, { "epoch": 16.81, "learning_rate": 4.159797466347607e-05, "loss": 2.3792, "step": 3393000 }, { "epoch": 16.81, "learning_rate": 4.1596736077049986e-05, "loss": 2.355, "step": 3393500 }, { "epoch": 16.82, "learning_rate": 4.1595499967796754e-05, "loss": 2.3727, "step": 3394000 }, { "epoch": 16.82, "learning_rate": 4.159426138137067e-05, "loss": 2.3774, "step": 3394500 }, { "epoch": 16.82, "learning_rate": 4.159302279494459e-05, "loss": 2.3607, "step": 3395000 }, { "epoch": 16.82, "learning_rate": 4.1591784208518505e-05, "loss": 2.3917, "step": 3395500 }, { "epoch": 16.82, "learning_rate": 4.159054562209242e-05, "loss": 2.3518, "step": 3396000 }, { "epoch": 16.83, "learning_rate": 4.158930703566634e-05, "loss": 2.3856, "step": 3396500 }, { "epoch": 16.83, "learning_rate": 4.1588068449240256e-05, "loss": 2.387, "step": 3397000 }, { "epoch": 16.83, "learning_rate": 4.1586829862814166e-05, "loss": 2.3707, "step": 3397500 }, { "epoch": 16.83, "learning_rate": 4.158559127638808e-05, "loss": 2.3603, "step": 3398000 }, { "epoch": 16.84, "learning_rate": 4.158435764430771e-05, "loss": 2.3707, "step": 3398500 }, { "epoch": 16.84, "learning_rate": 4.158311905788163e-05, "loss": 2.3861, "step": 3399000 }, { "epoch": 16.84, "learning_rate": 4.158188047145554e-05, "loss": 2.3695, "step": 3399500 }, { "epoch": 16.84, "learning_rate": 4.1580641885029455e-05, "loss": 2.3408, "step": 3400000 }, { "epoch": 16.85, "learning_rate": 4.157940329860337e-05, "loss": 2.3676, "step": 3400500 }, { "epoch": 16.85, "learning_rate": 4.157816718935014e-05, "loss": 2.3638, "step": 3401000 }, { "epoch": 16.85, "learning_rate": 4.157692860292406e-05, "loss": 2.3828, "step": 3401500 }, { "epoch": 16.85, "learning_rate": 4.1575690016497974e-05, "loss": 2.3853, "step": 3402000 }, { "epoch": 16.86, "learning_rate": 4.1574451430071884e-05, "loss": 2.3722, "step": 3402500 }, { "epoch": 16.86, "learning_rate": 4.15732128436458e-05, "loss": 2.3818, "step": 3403000 }, { "epoch": 16.86, "learning_rate": 4.157197425721972e-05, "loss": 2.3493, "step": 3403500 }, { "epoch": 16.86, "learning_rate": 4.1570735670793635e-05, "loss": 2.3795, "step": 3404000 }, { "epoch": 16.87, "learning_rate": 4.156949708436755e-05, "loss": 2.3624, "step": 3404500 }, { "epoch": 16.87, "learning_rate": 4.156826097511433e-05, "loss": 2.3783, "step": 3405000 }, { "epoch": 16.87, "learning_rate": 4.1567022388688245e-05, "loss": 2.379, "step": 3405500 }, { "epoch": 16.87, "learning_rate": 4.1565783802262155e-05, "loss": 2.3758, "step": 3406000 }, { "epoch": 16.88, "learning_rate": 4.156454521583607e-05, "loss": 2.3549, "step": 3406500 }, { "epoch": 16.88, "learning_rate": 4.156330662940999e-05, "loss": 2.3986, "step": 3407000 }, { "epoch": 16.88, "learning_rate": 4.1562068042983905e-05, "loss": 2.3615, "step": 3407500 }, { "epoch": 16.88, "learning_rate": 4.156082945655782e-05, "loss": 2.3583, "step": 3408000 }, { "epoch": 16.89, "learning_rate": 4.155959334730459e-05, "loss": 2.3854, "step": 3408500 }, { "epoch": 16.89, "learning_rate": 4.15583547608785e-05, "loss": 2.3673, "step": 3409000 }, { "epoch": 16.89, "learning_rate": 4.155711617445242e-05, "loss": 2.3642, "step": 3409500 }, { "epoch": 16.89, "learning_rate": 4.1555877588026335e-05, "loss": 2.3299, "step": 3410000 }, { "epoch": 16.9, "learning_rate": 4.155463900160025e-05, "loss": 2.3807, "step": 3410500 }, { "epoch": 16.9, "learning_rate": 4.155340041517417e-05, "loss": 2.3767, "step": 3411000 }, { "epoch": 16.9, "learning_rate": 4.1552161828748086e-05, "loss": 2.3789, "step": 3411500 }, { "epoch": 16.9, "learning_rate": 4.1550923242322e-05, "loss": 2.3876, "step": 3412000 }, { "epoch": 16.91, "learning_rate": 4.154968713306877e-05, "loss": 2.3517, "step": 3412500 }, { "epoch": 16.91, "learning_rate": 4.154844854664269e-05, "loss": 2.4007, "step": 3413000 }, { "epoch": 16.91, "learning_rate": 4.1547209960216606e-05, "loss": 2.3944, "step": 3413500 }, { "epoch": 16.91, "learning_rate": 4.154597137379052e-05, "loss": 2.3647, "step": 3414000 }, { "epoch": 16.92, "learning_rate": 4.154473278736444e-05, "loss": 2.3501, "step": 3414500 }, { "epoch": 16.92, "learning_rate": 4.1543494200938356e-05, "loss": 2.3941, "step": 3415000 }, { "epoch": 16.92, "learning_rate": 4.154225561451227e-05, "loss": 2.3809, "step": 3415500 }, { "epoch": 16.92, "learning_rate": 4.154101702808619e-05, "loss": 2.3663, "step": 3416000 }, { "epoch": 16.93, "learning_rate": 4.153978091883295e-05, "loss": 2.3529, "step": 3416500 }, { "epoch": 16.93, "learning_rate": 4.153854233240687e-05, "loss": 2.3712, "step": 3417000 }, { "epoch": 16.93, "learning_rate": 4.1537306223153645e-05, "loss": 2.3917, "step": 3417500 }, { "epoch": 16.93, "learning_rate": 4.153606763672756e-05, "loss": 2.3671, "step": 3418000 }, { "epoch": 16.94, "learning_rate": 4.153482905030147e-05, "loss": 2.3691, "step": 3418500 }, { "epoch": 16.94, "learning_rate": 4.153359046387539e-05, "loss": 2.3647, "step": 3419000 }, { "epoch": 16.94, "learning_rate": 4.1532351877449306e-05, "loss": 2.3684, "step": 3419500 }, { "epoch": 16.94, "learning_rate": 4.153111329102322e-05, "loss": 2.3566, "step": 3420000 }, { "epoch": 16.95, "learning_rate": 4.152987718176999e-05, "loss": 2.3672, "step": 3420500 }, { "epoch": 16.95, "learning_rate": 4.152863859534391e-05, "loss": 2.383, "step": 3421000 }, { "epoch": 16.95, "learning_rate": 4.152740248609068e-05, "loss": 2.3671, "step": 3421500 }, { "epoch": 16.95, "learning_rate": 4.1526163899664594e-05, "loss": 2.3578, "step": 3422000 }, { "epoch": 16.96, "learning_rate": 4.152492531323851e-05, "loss": 2.3837, "step": 3422500 }, { "epoch": 16.96, "learning_rate": 4.152368672681243e-05, "loss": 2.3824, "step": 3423000 }, { "epoch": 16.96, "learning_rate": 4.1522448140386345e-05, "loss": 2.3624, "step": 3423500 }, { "epoch": 16.96, "learning_rate": 4.152120955396026e-05, "loss": 2.3848, "step": 3424000 }, { "epoch": 16.97, "learning_rate": 4.151997096753417e-05, "loss": 2.379, "step": 3424500 }, { "epoch": 16.97, "learning_rate": 4.151873238110809e-05, "loss": 2.3548, "step": 3425000 }, { "epoch": 16.97, "learning_rate": 4.1517493794682006e-05, "loss": 2.363, "step": 3425500 }, { "epoch": 16.97, "learning_rate": 4.151625520825592e-05, "loss": 2.3791, "step": 3426000 }, { "epoch": 16.98, "learning_rate": 4.151501662182984e-05, "loss": 2.3776, "step": 3426500 }, { "epoch": 16.98, "learning_rate": 4.151378051257661e-05, "loss": 2.3708, "step": 3427000 }, { "epoch": 16.98, "learning_rate": 4.1512541926150525e-05, "loss": 2.3777, "step": 3427500 }, { "epoch": 16.98, "learning_rate": 4.1511303339724436e-05, "loss": 2.3577, "step": 3428000 }, { "epoch": 16.99, "learning_rate": 4.151006475329835e-05, "loss": 2.3726, "step": 3428500 }, { "epoch": 16.99, "learning_rate": 4.150882616687227e-05, "loss": 2.393, "step": 3429000 }, { "epoch": 16.99, "learning_rate": 4.1507590057619045e-05, "loss": 2.356, "step": 3429500 }, { "epoch": 16.99, "learning_rate": 4.150635147119296e-05, "loss": 2.3563, "step": 3430000 }, { "epoch": 17.0, "learning_rate": 4.150511288476688e-05, "loss": 2.3842, "step": 3430500 }, { "epoch": 17.0, "learning_rate": 4.150387429834079e-05, "loss": 2.3678, "step": 3431000 }, { "epoch": 17.0, "eval_accuracy": 0.6498075948776978, "eval_accuracy_mlm": 0.6043656048690601, "eval_accuracy_nsp": 0.8643428943477186, "eval_loss": 2.376680374145508, "eval_runtime": 145.8417, "eval_samples_per_second": 1748.189, "eval_steps_per_second": 72.846, "step": 3431331 }, { "epoch": 17.0, "learning_rate": 4.1502638189087565e-05, "loss": 2.3765, "step": 3431500 }, { "epoch": 17.0, "learning_rate": 4.150139960266148e-05, "loss": 2.3414, "step": 3432000 }, { "epoch": 17.01, "learning_rate": 4.150016101623539e-05, "loss": 2.3531, "step": 3432500 }, { "epoch": 17.01, "learning_rate": 4.149892242980931e-05, "loss": 2.3628, "step": 3433000 }, { "epoch": 17.01, "learning_rate": 4.1497683843383226e-05, "loss": 2.3441, "step": 3433500 }, { "epoch": 17.01, "learning_rate": 4.149644525695714e-05, "loss": 2.3688, "step": 3434000 }, { "epoch": 17.02, "learning_rate": 4.149520667053106e-05, "loss": 2.3296, "step": 3434500 }, { "epoch": 17.02, "learning_rate": 4.149396808410497e-05, "loss": 2.3295, "step": 3435000 }, { "epoch": 17.02, "learning_rate": 4.1492729497678886e-05, "loss": 2.3542, "step": 3435500 }, { "epoch": 17.02, "learning_rate": 4.1491490911252803e-05, "loss": 2.3233, "step": 3436000 }, { "epoch": 17.03, "learning_rate": 4.149025232482672e-05, "loss": 2.3492, "step": 3436500 }, { "epoch": 17.03, "learning_rate": 4.148901373840064e-05, "loss": 2.341, "step": 3437000 }, { "epoch": 17.03, "learning_rate": 4.1487775151974554e-05, "loss": 2.356, "step": 3437500 }, { "epoch": 17.03, "learning_rate": 4.148653656554847e-05, "loss": 2.3461, "step": 3438000 }, { "epoch": 17.04, "learning_rate": 4.148529797912239e-05, "loss": 2.3431, "step": 3438500 }, { "epoch": 17.04, "learning_rate": 4.148406186986916e-05, "loss": 2.3379, "step": 3439000 }, { "epoch": 17.04, "learning_rate": 4.1482823283443074e-05, "loss": 2.3413, "step": 3439500 }, { "epoch": 17.04, "learning_rate": 4.148158469701699e-05, "loss": 2.3506, "step": 3440000 }, { "epoch": 17.05, "learning_rate": 4.148034858776376e-05, "loss": 2.3299, "step": 3440500 }, { "epoch": 17.05, "learning_rate": 4.1479110001337676e-05, "loss": 2.3431, "step": 3441000 }, { "epoch": 17.05, "learning_rate": 4.147787141491159e-05, "loss": 2.3623, "step": 3441500 }, { "epoch": 17.05, "learning_rate": 4.1476632828485504e-05, "loss": 2.3722, "step": 3442000 }, { "epoch": 17.06, "learning_rate": 4.147539424205942e-05, "loss": 2.3552, "step": 3442500 }, { "epoch": 17.06, "learning_rate": 4.147415565563334e-05, "loss": 2.3468, "step": 3443000 }, { "epoch": 17.06, "learning_rate": 4.1472917069207254e-05, "loss": 2.3322, "step": 3443500 }, { "epoch": 17.06, "learning_rate": 4.147167848278117e-05, "loss": 2.3492, "step": 3444000 }, { "epoch": 17.07, "learning_rate": 4.147044237352794e-05, "loss": 2.3549, "step": 3444500 }, { "epoch": 17.07, "learning_rate": 4.146920378710186e-05, "loss": 2.373, "step": 3445000 }, { "epoch": 17.07, "learning_rate": 4.1467965200675774e-05, "loss": 2.3629, "step": 3445500 }, { "epoch": 17.07, "learning_rate": 4.146672909142254e-05, "loss": 2.3718, "step": 3446000 }, { "epoch": 17.08, "learning_rate": 4.146549050499646e-05, "loss": 2.3301, "step": 3446500 }, { "epoch": 17.08, "learning_rate": 4.1464251918570377e-05, "loss": 2.3385, "step": 3447000 }, { "epoch": 17.08, "learning_rate": 4.1463013332144294e-05, "loss": 2.3374, "step": 3447500 }, { "epoch": 17.08, "learning_rate": 4.146177474571821e-05, "loss": 2.3562, "step": 3448000 }, { "epoch": 17.09, "learning_rate": 4.146053615929212e-05, "loss": 2.3514, "step": 3448500 }, { "epoch": 17.09, "learning_rate": 4.145929757286604e-05, "loss": 2.3568, "step": 3449000 }, { "epoch": 17.09, "learning_rate": 4.1458058986439954e-05, "loss": 2.3525, "step": 3449500 }, { "epoch": 17.09, "learning_rate": 4.145682287718672e-05, "loss": 2.3269, "step": 3450000 }, { "epoch": 17.09, "learning_rate": 4.145558429076064e-05, "loss": 2.3618, "step": 3450500 }, { "epoch": 17.1, "learning_rate": 4.145434570433456e-05, "loss": 2.3438, "step": 3451000 }, { "epoch": 17.1, "learning_rate": 4.1453107117908474e-05, "loss": 2.341, "step": 3451500 }, { "epoch": 17.1, "learning_rate": 4.145186853148239e-05, "loss": 2.3567, "step": 3452000 }, { "epoch": 17.1, "learning_rate": 4.145063489940201e-05, "loss": 2.3433, "step": 3452500 }, { "epoch": 17.11, "learning_rate": 4.144939631297593e-05, "loss": 2.3524, "step": 3453000 }, { "epoch": 17.11, "learning_rate": 4.1448157726549846e-05, "loss": 2.3444, "step": 3453500 }, { "epoch": 17.11, "learning_rate": 4.144691914012376e-05, "loss": 2.3531, "step": 3454000 }, { "epoch": 17.11, "learning_rate": 4.144568055369768e-05, "loss": 2.3459, "step": 3454500 }, { "epoch": 17.12, "learning_rate": 4.1444441967271596e-05, "loss": 2.35, "step": 3455000 }, { "epoch": 17.12, "learning_rate": 4.144320338084551e-05, "loss": 2.338, "step": 3455500 }, { "epoch": 17.12, "learning_rate": 4.1441964794419423e-05, "loss": 2.3489, "step": 3456000 }, { "epoch": 17.12, "learning_rate": 4.14407286851662e-05, "loss": 2.3561, "step": 3456500 }, { "epoch": 17.13, "learning_rate": 4.1439490098740116e-05, "loss": 2.3618, "step": 3457000 }, { "epoch": 17.13, "learning_rate": 4.143825151231403e-05, "loss": 2.3614, "step": 3457500 }, { "epoch": 17.13, "learning_rate": 4.143701292588795e-05, "loss": 2.3375, "step": 3458000 }, { "epoch": 17.13, "learning_rate": 4.143577433946187e-05, "loss": 2.3632, "step": 3458500 }, { "epoch": 17.14, "learning_rate": 4.143453823020863e-05, "loss": 2.341, "step": 3459000 }, { "epoch": 17.14, "learning_rate": 4.1433299643782546e-05, "loss": 2.3918, "step": 3459500 }, { "epoch": 17.14, "learning_rate": 4.143206105735646e-05, "loss": 2.3394, "step": 3460000 }, { "epoch": 17.14, "learning_rate": 4.143082247093038e-05, "loss": 2.3564, "step": 3460500 }, { "epoch": 17.15, "learning_rate": 4.1429583884504296e-05, "loss": 2.351, "step": 3461000 }, { "epoch": 17.15, "learning_rate": 4.142834529807821e-05, "loss": 2.3515, "step": 3461500 }, { "epoch": 17.15, "learning_rate": 4.142710918882498e-05, "loss": 2.343, "step": 3462000 }, { "epoch": 17.15, "learning_rate": 4.14258706023989e-05, "loss": 2.3226, "step": 3462500 }, { "epoch": 17.16, "learning_rate": 4.1424632015972816e-05, "loss": 2.35, "step": 3463000 }, { "epoch": 17.16, "learning_rate": 4.142339342954673e-05, "loss": 2.3612, "step": 3463500 }, { "epoch": 17.16, "learning_rate": 4.142215484312065e-05, "loss": 2.3746, "step": 3464000 }, { "epoch": 17.16, "learning_rate": 4.142091625669457e-05, "loss": 2.3619, "step": 3464500 }, { "epoch": 17.17, "learning_rate": 4.141967767026848e-05, "loss": 2.3721, "step": 3465000 }, { "epoch": 17.17, "learning_rate": 4.1418439083842394e-05, "loss": 2.3639, "step": 3465500 }, { "epoch": 17.17, "learning_rate": 4.1417205451762015e-05, "loss": 2.343, "step": 3466000 }, { "epoch": 17.17, "learning_rate": 4.141596686533593e-05, "loss": 2.3616, "step": 3466500 }, { "epoch": 17.18, "learning_rate": 4.141472827890985e-05, "loss": 2.3572, "step": 3467000 }, { "epoch": 17.18, "learning_rate": 4.1413489692483765e-05, "loss": 2.374, "step": 3467500 }, { "epoch": 17.18, "learning_rate": 4.1412253583230534e-05, "loss": 2.3437, "step": 3468000 }, { "epoch": 17.18, "learning_rate": 4.141101499680445e-05, "loss": 2.3703, "step": 3468500 }, { "epoch": 17.19, "learning_rate": 4.140977641037837e-05, "loss": 2.3546, "step": 3469000 }, { "epoch": 17.19, "learning_rate": 4.140853782395228e-05, "loss": 2.3631, "step": 3469500 }, { "epoch": 17.19, "learning_rate": 4.140730171469905e-05, "loss": 2.3685, "step": 3470000 }, { "epoch": 17.19, "learning_rate": 4.1406063128272964e-05, "loss": 2.3637, "step": 3470500 }, { "epoch": 17.2, "learning_rate": 4.140482454184688e-05, "loss": 2.3558, "step": 3471000 }, { "epoch": 17.2, "learning_rate": 4.14035859554208e-05, "loss": 2.3688, "step": 3471500 }, { "epoch": 17.2, "learning_rate": 4.1402347368994715e-05, "loss": 2.343, "step": 3472000 }, { "epoch": 17.2, "learning_rate": 4.140110878256863e-05, "loss": 2.364, "step": 3472500 }, { "epoch": 17.21, "learning_rate": 4.139987019614255e-05, "loss": 2.3704, "step": 3473000 }, { "epoch": 17.21, "learning_rate": 4.1398631609716465e-05, "loss": 2.3474, "step": 3473500 }, { "epoch": 17.21, "learning_rate": 4.1397395500463234e-05, "loss": 2.3439, "step": 3474000 }, { "epoch": 17.21, "learning_rate": 4.139615691403715e-05, "loss": 2.3453, "step": 3474500 }, { "epoch": 17.22, "learning_rate": 4.139491832761107e-05, "loss": 2.3463, "step": 3475000 }, { "epoch": 17.22, "learning_rate": 4.1393679741184985e-05, "loss": 2.3682, "step": 3475500 }, { "epoch": 17.22, "learning_rate": 4.1392441154758895e-05, "loss": 2.3424, "step": 3476000 }, { "epoch": 17.22, "learning_rate": 4.139120256833281e-05, "loss": 2.3605, "step": 3476500 }, { "epoch": 17.23, "learning_rate": 4.138996398190673e-05, "loss": 2.3513, "step": 3477000 }, { "epoch": 17.23, "learning_rate": 4.13887278726535e-05, "loss": 2.3756, "step": 3477500 }, { "epoch": 17.23, "learning_rate": 4.1387489286227415e-05, "loss": 2.3382, "step": 3478000 }, { "epoch": 17.23, "learning_rate": 4.138625069980133e-05, "loss": 2.3326, "step": 3478500 }, { "epoch": 17.24, "learning_rate": 4.138501211337525e-05, "loss": 2.3489, "step": 3479000 }, { "epoch": 17.24, "learning_rate": 4.1383773526949166e-05, "loss": 2.3513, "step": 3479500 }, { "epoch": 17.24, "learning_rate": 4.1382537417695934e-05, "loss": 2.3649, "step": 3480000 }, { "epoch": 17.24, "learning_rate": 4.13813013084427e-05, "loss": 2.3805, "step": 3480500 }, { "epoch": 17.25, "learning_rate": 4.138006272201662e-05, "loss": 2.3487, "step": 3481000 }, { "epoch": 17.25, "learning_rate": 4.137882413559053e-05, "loss": 2.3675, "step": 3481500 }, { "epoch": 17.25, "learning_rate": 4.137758554916445e-05, "loss": 2.3361, "step": 3482000 }, { "epoch": 17.25, "learning_rate": 4.1376346962738364e-05, "loss": 2.3631, "step": 3482500 }, { "epoch": 17.26, "learning_rate": 4.137510837631228e-05, "loss": 2.3593, "step": 3483000 }, { "epoch": 17.26, "learning_rate": 4.13738697898862e-05, "loss": 2.3565, "step": 3483500 }, { "epoch": 17.26, "learning_rate": 4.1372631203460115e-05, "loss": 2.353, "step": 3484000 }, { "epoch": 17.26, "learning_rate": 4.137139261703403e-05, "loss": 2.3434, "step": 3484500 }, { "epoch": 17.27, "learning_rate": 4.137015403060795e-05, "loss": 2.3584, "step": 3485000 }, { "epoch": 17.27, "learning_rate": 4.1368915444181866e-05, "loss": 2.3592, "step": 3485500 }, { "epoch": 17.27, "learning_rate": 4.136767685775578e-05, "loss": 2.3616, "step": 3486000 }, { "epoch": 17.27, "learning_rate": 4.13664382713297e-05, "loss": 2.361, "step": 3486500 }, { "epoch": 17.28, "learning_rate": 4.1365199684903617e-05, "loss": 2.3554, "step": 3487000 }, { "epoch": 17.28, "learning_rate": 4.1363961098477533e-05, "loss": 2.3607, "step": 3487500 }, { "epoch": 17.28, "learning_rate": 4.136272251205145e-05, "loss": 2.3504, "step": 3488000 }, { "epoch": 17.28, "learning_rate": 4.136148640279822e-05, "loss": 2.3489, "step": 3488500 }, { "epoch": 17.29, "learning_rate": 4.1360247816372136e-05, "loss": 2.3515, "step": 3489000 }, { "epoch": 17.29, "learning_rate": 4.1359009229946046e-05, "loss": 2.3622, "step": 3489500 }, { "epoch": 17.29, "learning_rate": 4.1357773120692815e-05, "loss": 2.366, "step": 3490000 }, { "epoch": 17.29, "learning_rate": 4.135653453426673e-05, "loss": 2.3397, "step": 3490500 }, { "epoch": 17.3, "learning_rate": 4.135529594784065e-05, "loss": 2.3427, "step": 3491000 }, { "epoch": 17.3, "learning_rate": 4.1354057361414566e-05, "loss": 2.3803, "step": 3491500 }, { "epoch": 17.3, "learning_rate": 4.135281877498848e-05, "loss": 2.3359, "step": 3492000 }, { "epoch": 17.3, "learning_rate": 4.13515801885624e-05, "loss": 2.3294, "step": 3492500 }, { "epoch": 17.31, "learning_rate": 4.135034160213632e-05, "loss": 2.3517, "step": 3493000 }, { "epoch": 17.31, "learning_rate": 4.1349103015710234e-05, "loss": 2.3769, "step": 3493500 }, { "epoch": 17.31, "learning_rate": 4.134786442928415e-05, "loss": 2.388, "step": 3494000 }, { "epoch": 17.31, "learning_rate": 4.134662584285807e-05, "loss": 2.3651, "step": 3494500 }, { "epoch": 17.32, "learning_rate": 4.1345387256431984e-05, "loss": 2.3778, "step": 3495000 }, { "epoch": 17.32, "learning_rate": 4.13441486700059e-05, "loss": 2.3529, "step": 3495500 }, { "epoch": 17.32, "learning_rate": 4.134291008357982e-05, "loss": 2.3574, "step": 3496000 }, { "epoch": 17.32, "learning_rate": 4.134167149715373e-05, "loss": 2.3382, "step": 3496500 }, { "epoch": 17.33, "learning_rate": 4.13404353879005e-05, "loss": 2.3456, "step": 3497000 }, { "epoch": 17.33, "learning_rate": 4.1339196801474414e-05, "loss": 2.3635, "step": 3497500 }, { "epoch": 17.33, "learning_rate": 4.133795821504833e-05, "loss": 2.3514, "step": 3498000 }, { "epoch": 17.33, "learning_rate": 4.133671962862225e-05, "loss": 2.352, "step": 3498500 }, { "epoch": 17.34, "learning_rate": 4.1335481042196165e-05, "loss": 2.3373, "step": 3499000 }, { "epoch": 17.34, "learning_rate": 4.1334242455770075e-05, "loss": 2.3498, "step": 3499500 }, { "epoch": 17.34, "learning_rate": 4.133300386934399e-05, "loss": 2.3483, "step": 3500000 }, { "epoch": 17.34, "learning_rate": 4.133176528291791e-05, "loss": 2.3468, "step": 3500500 }, { "epoch": 17.35, "learning_rate": 4.1330526696491826e-05, "loss": 2.3388, "step": 3501000 }, { "epoch": 17.35, "learning_rate": 4.132928811006574e-05, "loss": 2.3492, "step": 3501500 }, { "epoch": 17.35, "learning_rate": 4.132805200081252e-05, "loss": 2.3273, "step": 3502000 }, { "epoch": 17.35, "learning_rate": 4.132681341438643e-05, "loss": 2.3493, "step": 3502500 }, { "epoch": 17.36, "learning_rate": 4.132557978230605e-05, "loss": 2.3476, "step": 3503000 }, { "epoch": 17.36, "learning_rate": 4.1324341195879966e-05, "loss": 2.3567, "step": 3503500 }, { "epoch": 17.36, "learning_rate": 4.1323105086626735e-05, "loss": 2.3562, "step": 3504000 }, { "epoch": 17.36, "learning_rate": 4.132186650020065e-05, "loss": 2.3567, "step": 3504500 }, { "epoch": 17.36, "learning_rate": 4.132062791377457e-05, "loss": 2.3373, "step": 3505000 }, { "epoch": 17.37, "learning_rate": 4.1319389327348486e-05, "loss": 2.3705, "step": 3505500 }, { "epoch": 17.37, "learning_rate": 4.13181507409224e-05, "loss": 2.36, "step": 3506000 }, { "epoch": 17.37, "learning_rate": 4.131691215449632e-05, "loss": 2.3511, "step": 3506500 }, { "epoch": 17.37, "learning_rate": 4.1315673568070236e-05, "loss": 2.3651, "step": 3507000 }, { "epoch": 17.38, "learning_rate": 4.1314434981644153e-05, "loss": 2.358, "step": 3507500 }, { "epoch": 17.38, "learning_rate": 4.1313198872390915e-05, "loss": 2.3668, "step": 3508000 }, { "epoch": 17.38, "learning_rate": 4.131196028596483e-05, "loss": 2.3245, "step": 3508500 }, { "epoch": 17.38, "learning_rate": 4.131072169953875e-05, "loss": 2.3623, "step": 3509000 }, { "epoch": 17.39, "learning_rate": 4.1309483113112666e-05, "loss": 2.3565, "step": 3509500 }, { "epoch": 17.39, "learning_rate": 4.130824452668658e-05, "loss": 2.3437, "step": 3510000 }, { "epoch": 17.39, "learning_rate": 4.13070059402605e-05, "loss": 2.3466, "step": 3510500 }, { "epoch": 17.39, "learning_rate": 4.130576735383442e-05, "loss": 2.383, "step": 3511000 }, { "epoch": 17.4, "learning_rate": 4.1304528767408334e-05, "loss": 2.3689, "step": 3511500 }, { "epoch": 17.4, "learning_rate": 4.130329018098225e-05, "loss": 2.3632, "step": 3512000 }, { "epoch": 17.4, "learning_rate": 4.130205159455617e-05, "loss": 2.3213, "step": 3512500 }, { "epoch": 17.4, "learning_rate": 4.1300813008130085e-05, "loss": 2.3577, "step": 3513000 }, { "epoch": 17.41, "learning_rate": 4.1299576898876854e-05, "loss": 2.3611, "step": 3513500 }, { "epoch": 17.41, "learning_rate": 4.129833831245077e-05, "loss": 2.3616, "step": 3514000 }, { "epoch": 17.41, "learning_rate": 4.129709972602469e-05, "loss": 2.3532, "step": 3514500 }, { "epoch": 17.41, "learning_rate": 4.1295861139598604e-05, "loss": 2.3241, "step": 3515000 }, { "epoch": 17.42, "learning_rate": 4.129462255317252e-05, "loss": 2.3725, "step": 3515500 }, { "epoch": 17.42, "learning_rate": 4.129338396674644e-05, "loss": 2.3333, "step": 3516000 }, { "epoch": 17.42, "learning_rate": 4.1292145380320355e-05, "loss": 2.3379, "step": 3516500 }, { "epoch": 17.42, "learning_rate": 4.129090927106712e-05, "loss": 2.3909, "step": 3517000 }, { "epoch": 17.43, "learning_rate": 4.1289670684641034e-05, "loss": 2.3664, "step": 3517500 }, { "epoch": 17.43, "learning_rate": 4.128843209821495e-05, "loss": 2.3661, "step": 3518000 }, { "epoch": 17.43, "learning_rate": 4.128719351178887e-05, "loss": 2.3586, "step": 3518500 }, { "epoch": 17.43, "learning_rate": 4.1285954925362785e-05, "loss": 2.3723, "step": 3519000 }, { "epoch": 17.44, "learning_rate": 4.12847163389367e-05, "loss": 2.3486, "step": 3519500 }, { "epoch": 17.44, "learning_rate": 4.128347775251062e-05, "loss": 2.3571, "step": 3520000 }, { "epoch": 17.44, "learning_rate": 4.128224164325739e-05, "loss": 2.3583, "step": 3520500 }, { "epoch": 17.44, "learning_rate": 4.1281003056831304e-05, "loss": 2.4005, "step": 3521000 }, { "epoch": 17.45, "learning_rate": 4.127976447040522e-05, "loss": 2.3775, "step": 3521500 }, { "epoch": 17.45, "learning_rate": 4.127852588397914e-05, "loss": 2.3687, "step": 3522000 }, { "epoch": 17.45, "learning_rate": 4.1277287297553055e-05, "loss": 2.3569, "step": 3522500 }, { "epoch": 17.45, "learning_rate": 4.127605118829982e-05, "loss": 2.3676, "step": 3523000 }, { "epoch": 17.46, "learning_rate": 4.1274815079046586e-05, "loss": 2.3535, "step": 3523500 }, { "epoch": 17.46, "learning_rate": 4.12735764926205e-05, "loss": 2.368, "step": 3524000 }, { "epoch": 17.46, "learning_rate": 4.127233790619442e-05, "loss": 2.3668, "step": 3524500 }, { "epoch": 17.46, "learning_rate": 4.127109931976834e-05, "loss": 2.3525, "step": 3525000 }, { "epoch": 17.47, "learning_rate": 4.1269860733342254e-05, "loss": 2.3639, "step": 3525500 }, { "epoch": 17.47, "learning_rate": 4.126862214691617e-05, "loss": 2.3634, "step": 3526000 }, { "epoch": 17.47, "learning_rate": 4.126738603766294e-05, "loss": 2.3497, "step": 3526500 }, { "epoch": 17.47, "learning_rate": 4.126614992840971e-05, "loss": 2.3586, "step": 3527000 }, { "epoch": 17.48, "learning_rate": 4.1264911341983625e-05, "loss": 2.3447, "step": 3527500 }, { "epoch": 17.48, "learning_rate": 4.126367275555754e-05, "loss": 2.3595, "step": 3528000 }, { "epoch": 17.48, "learning_rate": 4.126243416913145e-05, "loss": 2.3406, "step": 3528500 }, { "epoch": 17.48, "learning_rate": 4.126119805987823e-05, "loss": 2.3242, "step": 3529000 }, { "epoch": 17.49, "learning_rate": 4.1259959473452145e-05, "loss": 2.3484, "step": 3529500 }, { "epoch": 17.49, "learning_rate": 4.125872336419891e-05, "loss": 2.3547, "step": 3530000 }, { "epoch": 17.49, "learning_rate": 4.1257484777772824e-05, "loss": 2.3682, "step": 3530500 }, { "epoch": 17.49, "learning_rate": 4.125624619134674e-05, "loss": 2.352, "step": 3531000 }, { "epoch": 17.5, "learning_rate": 4.125500760492066e-05, "loss": 2.3719, "step": 3531500 }, { "epoch": 17.5, "learning_rate": 4.1253769018494575e-05, "loss": 2.3395, "step": 3532000 }, { "epoch": 17.5, "learning_rate": 4.125253043206849e-05, "loss": 2.3595, "step": 3532500 }, { "epoch": 17.5, "learning_rate": 4.125129184564241e-05, "loss": 2.3707, "step": 3533000 }, { "epoch": 17.51, "learning_rate": 4.1250053259216325e-05, "loss": 2.3691, "step": 3533500 }, { "epoch": 17.51, "learning_rate": 4.124881467279024e-05, "loss": 2.3668, "step": 3534000 }, { "epoch": 17.51, "learning_rate": 4.124757608636415e-05, "loss": 2.3482, "step": 3534500 }, { "epoch": 17.51, "learning_rate": 4.124633749993807e-05, "loss": 2.3422, "step": 3535000 }, { "epoch": 17.52, "learning_rate": 4.1245098913511986e-05, "loss": 2.3515, "step": 3535500 }, { "epoch": 17.52, "learning_rate": 4.12438603270859e-05, "loss": 2.3495, "step": 3536000 }, { "epoch": 17.52, "learning_rate": 4.124262174065982e-05, "loss": 2.3579, "step": 3536500 }, { "epoch": 17.52, "learning_rate": 4.124138315423374e-05, "loss": 2.3655, "step": 3537000 }, { "epoch": 17.53, "learning_rate": 4.1240144567807654e-05, "loss": 2.3543, "step": 3537500 }, { "epoch": 17.53, "learning_rate": 4.123890598138157e-05, "loss": 2.3707, "step": 3538000 }, { "epoch": 17.53, "learning_rate": 4.123766987212834e-05, "loss": 2.3427, "step": 3538500 }, { "epoch": 17.53, "learning_rate": 4.123643128570226e-05, "loss": 2.3651, "step": 3539000 }, { "epoch": 17.54, "learning_rate": 4.1235192699276174e-05, "loss": 2.3747, "step": 3539500 }, { "epoch": 17.54, "learning_rate": 4.123395411285009e-05, "loss": 2.3532, "step": 3540000 }, { "epoch": 17.54, "learning_rate": 4.1232715526424e-05, "loss": 2.3638, "step": 3540500 }, { "epoch": 17.54, "learning_rate": 4.123147941717077e-05, "loss": 2.3649, "step": 3541000 }, { "epoch": 17.55, "learning_rate": 4.1230240830744686e-05, "loss": 2.3551, "step": 3541500 }, { "epoch": 17.55, "learning_rate": 4.12290022443186e-05, "loss": 2.3676, "step": 3542000 }, { "epoch": 17.55, "learning_rate": 4.122776365789252e-05, "loss": 2.3658, "step": 3542500 }, { "epoch": 17.55, "learning_rate": 4.122652507146644e-05, "loss": 2.3553, "step": 3543000 }, { "epoch": 17.56, "learning_rate": 4.1225286485040354e-05, "loss": 2.3745, "step": 3543500 }, { "epoch": 17.56, "learning_rate": 4.122405037578712e-05, "loss": 2.3211, "step": 3544000 }, { "epoch": 17.56, "learning_rate": 4.122281178936104e-05, "loss": 2.3367, "step": 3544500 }, { "epoch": 17.56, "learning_rate": 4.122157320293496e-05, "loss": 2.3815, "step": 3545000 }, { "epoch": 17.57, "learning_rate": 4.1220334616508874e-05, "loss": 2.3569, "step": 3545500 }, { "epoch": 17.57, "learning_rate": 4.121909603008279e-05, "loss": 2.3516, "step": 3546000 }, { "epoch": 17.57, "learning_rate": 4.121785992082956e-05, "loss": 2.3911, "step": 3546500 }, { "epoch": 17.57, "learning_rate": 4.121662133440347e-05, "loss": 2.3477, "step": 3547000 }, { "epoch": 17.58, "learning_rate": 4.1215382747977387e-05, "loss": 2.3665, "step": 3547500 }, { "epoch": 17.58, "learning_rate": 4.1214144161551304e-05, "loss": 2.3727, "step": 3548000 }, { "epoch": 17.58, "learning_rate": 4.121290557512522e-05, "loss": 2.3339, "step": 3548500 }, { "epoch": 17.58, "learning_rate": 4.121166698869914e-05, "loss": 2.391, "step": 3549000 }, { "epoch": 17.59, "learning_rate": 4.1210428402273054e-05, "loss": 2.3653, "step": 3549500 }, { "epoch": 17.59, "learning_rate": 4.120918981584697e-05, "loss": 2.3585, "step": 3550000 }, { "epoch": 17.59, "learning_rate": 4.120795122942089e-05, "loss": 2.3839, "step": 3550500 }, { "epoch": 17.59, "learning_rate": 4.1206712642994805e-05, "loss": 2.3457, "step": 3551000 }, { "epoch": 17.6, "learning_rate": 4.120547405656872e-05, "loss": 2.3383, "step": 3551500 }, { "epoch": 17.6, "learning_rate": 4.120423547014264e-05, "loss": 2.3665, "step": 3552000 }, { "epoch": 17.6, "learning_rate": 4.1202996883716556e-05, "loss": 2.3762, "step": 3552500 }, { "epoch": 17.6, "learning_rate": 4.120175829729047e-05, "loss": 2.364, "step": 3553000 }, { "epoch": 17.61, "learning_rate": 4.120051971086439e-05, "loss": 2.3739, "step": 3553500 }, { "epoch": 17.61, "learning_rate": 4.119928360161115e-05, "loss": 2.3595, "step": 3554000 }, { "epoch": 17.61, "learning_rate": 4.119804501518507e-05, "loss": 2.3607, "step": 3554500 }, { "epoch": 17.61, "learning_rate": 4.1196806428758986e-05, "loss": 2.3675, "step": 3555000 }, { "epoch": 17.62, "learning_rate": 4.11955678423329e-05, "loss": 2.3651, "step": 3555500 }, { "epoch": 17.62, "learning_rate": 4.119432925590682e-05, "loss": 2.3478, "step": 3556000 }, { "epoch": 17.62, "learning_rate": 4.1193090669480736e-05, "loss": 2.3354, "step": 3556500 }, { "epoch": 17.62, "learning_rate": 4.119185208305465e-05, "loss": 2.3537, "step": 3557000 }, { "epoch": 17.63, "learning_rate": 4.1190618450974274e-05, "loss": 2.3613, "step": 3557500 }, { "epoch": 17.63, "learning_rate": 4.118937986454819e-05, "loss": 2.3725, "step": 3558000 }, { "epoch": 17.63, "learning_rate": 4.118814127812211e-05, "loss": 2.3538, "step": 3558500 }, { "epoch": 17.63, "learning_rate": 4.1186902691696025e-05, "loss": 2.3833, "step": 3559000 }, { "epoch": 17.63, "learning_rate": 4.118566410526994e-05, "loss": 2.3447, "step": 3559500 }, { "epoch": 17.64, "learning_rate": 4.118442551884386e-05, "loss": 2.3642, "step": 3560000 }, { "epoch": 17.64, "learning_rate": 4.118318693241777e-05, "loss": 2.3562, "step": 3560500 }, { "epoch": 17.64, "learning_rate": 4.118195082316454e-05, "loss": 2.3684, "step": 3561000 }, { "epoch": 17.64, "learning_rate": 4.118071471391131e-05, "loss": 2.376, "step": 3561500 }, { "epoch": 17.65, "learning_rate": 4.117947612748523e-05, "loss": 2.3549, "step": 3562000 }, { "epoch": 17.65, "learning_rate": 4.117823754105914e-05, "loss": 2.3716, "step": 3562500 }, { "epoch": 17.65, "learning_rate": 4.117699895463306e-05, "loss": 2.3731, "step": 3563000 }, { "epoch": 17.65, "learning_rate": 4.1175760368206974e-05, "loss": 2.3784, "step": 3563500 }, { "epoch": 17.66, "learning_rate": 4.117452425895374e-05, "loss": 2.375, "step": 3564000 }, { "epoch": 17.66, "learning_rate": 4.117328567252766e-05, "loss": 2.3604, "step": 3564500 }, { "epoch": 17.66, "learning_rate": 4.117204708610158e-05, "loss": 2.3685, "step": 3565000 }, { "epoch": 17.66, "learning_rate": 4.1170808499675494e-05, "loss": 2.365, "step": 3565500 }, { "epoch": 17.67, "learning_rate": 4.1169569913249404e-05, "loss": 2.3495, "step": 3566000 }, { "epoch": 17.67, "learning_rate": 4.116833132682332e-05, "loss": 2.3564, "step": 3566500 }, { "epoch": 17.67, "learning_rate": 4.116709274039724e-05, "loss": 2.3713, "step": 3567000 }, { "epoch": 17.67, "learning_rate": 4.1165854153971155e-05, "loss": 2.3543, "step": 3567500 }, { "epoch": 17.68, "learning_rate": 4.116461556754507e-05, "loss": 2.3739, "step": 3568000 }, { "epoch": 17.68, "learning_rate": 4.116337945829185e-05, "loss": 2.3615, "step": 3568500 }, { "epoch": 17.68, "learning_rate": 4.116214087186576e-05, "loss": 2.3605, "step": 3569000 }, { "epoch": 17.68, "learning_rate": 4.1160902285439674e-05, "loss": 2.3517, "step": 3569500 }, { "epoch": 17.69, "learning_rate": 4.115966369901359e-05, "loss": 2.3571, "step": 3570000 }, { "epoch": 17.69, "learning_rate": 4.115842511258751e-05, "loss": 2.3669, "step": 3570500 }, { "epoch": 17.69, "learning_rate": 4.115718900333428e-05, "loss": 2.377, "step": 3571000 }, { "epoch": 17.69, "learning_rate": 4.1155950416908194e-05, "loss": 2.3674, "step": 3571500 }, { "epoch": 17.7, "learning_rate": 4.1154711830482104e-05, "loss": 2.3401, "step": 3572000 }, { "epoch": 17.7, "learning_rate": 4.115347572122888e-05, "loss": 2.3685, "step": 3572500 }, { "epoch": 17.7, "learning_rate": 4.1152237134802797e-05, "loss": 2.3623, "step": 3573000 }, { "epoch": 17.7, "learning_rate": 4.1150998548376713e-05, "loss": 2.3706, "step": 3573500 }, { "epoch": 17.71, "learning_rate": 4.114975996195063e-05, "loss": 2.3436, "step": 3574000 }, { "epoch": 17.71, "learning_rate": 4.114852137552455e-05, "loss": 2.386, "step": 3574500 }, { "epoch": 17.71, "learning_rate": 4.1147282789098464e-05, "loss": 2.3572, "step": 3575000 }, { "epoch": 17.71, "learning_rate": 4.1146044202672374e-05, "loss": 2.3548, "step": 3575500 }, { "epoch": 17.72, "learning_rate": 4.114480561624629e-05, "loss": 2.3364, "step": 3576000 }, { "epoch": 17.72, "learning_rate": 4.114356702982021e-05, "loss": 2.3401, "step": 3576500 }, { "epoch": 17.72, "learning_rate": 4.1142328443394125e-05, "loss": 2.3559, "step": 3577000 }, { "epoch": 17.72, "learning_rate": 4.1141094811313746e-05, "loss": 2.3307, "step": 3577500 }, { "epoch": 17.73, "learning_rate": 4.113985622488766e-05, "loss": 2.3549, "step": 3578000 }, { "epoch": 17.73, "learning_rate": 4.113861763846158e-05, "loss": 2.3539, "step": 3578500 }, { "epoch": 17.73, "learning_rate": 4.11373790520355e-05, "loss": 2.3734, "step": 3579000 }, { "epoch": 17.73, "learning_rate": 4.1136140465609414e-05, "loss": 2.3802, "step": 3579500 }, { "epoch": 17.74, "learning_rate": 4.113490187918333e-05, "loss": 2.3796, "step": 3580000 }, { "epoch": 17.74, "learning_rate": 4.113366329275725e-05, "loss": 2.3417, "step": 3580500 }, { "epoch": 17.74, "learning_rate": 4.1132424706331164e-05, "loss": 2.3862, "step": 3581000 }, { "epoch": 17.74, "learning_rate": 4.1131186119905075e-05, "loss": 2.3848, "step": 3581500 }, { "epoch": 17.75, "learning_rate": 4.112994753347899e-05, "loss": 2.3474, "step": 3582000 }, { "epoch": 17.75, "learning_rate": 4.112870894705291e-05, "loss": 2.3563, "step": 3582500 }, { "epoch": 17.75, "learning_rate": 4.1127470360626825e-05, "loss": 2.3501, "step": 3583000 }, { "epoch": 17.75, "learning_rate": 4.112623177420074e-05, "loss": 2.3553, "step": 3583500 }, { "epoch": 17.76, "learning_rate": 4.112499566494751e-05, "loss": 2.3721, "step": 3584000 }, { "epoch": 17.76, "learning_rate": 4.112375707852142e-05, "loss": 2.3584, "step": 3584500 }, { "epoch": 17.76, "learning_rate": 4.112251849209534e-05, "loss": 2.3551, "step": 3585000 }, { "epoch": 17.76, "learning_rate": 4.1121279905669255e-05, "loss": 2.3418, "step": 3585500 }, { "epoch": 17.77, "learning_rate": 4.112004379641603e-05, "loss": 2.3532, "step": 3586000 }, { "epoch": 17.77, "learning_rate": 4.111880520998995e-05, "loss": 2.3619, "step": 3586500 }, { "epoch": 17.77, "learning_rate": 4.1117566623563864e-05, "loss": 2.3691, "step": 3587000 }, { "epoch": 17.77, "learning_rate": 4.111632803713778e-05, "loss": 2.3613, "step": 3587500 }, { "epoch": 17.78, "learning_rate": 4.111508945071169e-05, "loss": 2.3628, "step": 3588000 }, { "epoch": 17.78, "learning_rate": 4.111385581863131e-05, "loss": 2.3733, "step": 3588500 }, { "epoch": 17.78, "learning_rate": 4.111261723220523e-05, "loss": 2.35, "step": 3589000 }, { "epoch": 17.78, "learning_rate": 4.1111378645779146e-05, "loss": 2.3595, "step": 3589500 }, { "epoch": 17.79, "learning_rate": 4.111014005935306e-05, "loss": 2.3707, "step": 3590000 }, { "epoch": 17.79, "learning_rate": 4.110890147292698e-05, "loss": 2.3446, "step": 3590500 }, { "epoch": 17.79, "learning_rate": 4.11076628865009e-05, "loss": 2.3831, "step": 3591000 }, { "epoch": 17.79, "learning_rate": 4.1106424300074814e-05, "loss": 2.3779, "step": 3591500 }, { "epoch": 17.8, "learning_rate": 4.110518571364873e-05, "loss": 2.3586, "step": 3592000 }, { "epoch": 17.8, "learning_rate": 4.11039496043955e-05, "loss": 2.3604, "step": 3592500 }, { "epoch": 17.8, "learning_rate": 4.1102711017969416e-05, "loss": 2.3887, "step": 3593000 }, { "epoch": 17.8, "learning_rate": 4.1101472431543333e-05, "loss": 2.3616, "step": 3593500 }, { "epoch": 17.81, "learning_rate": 4.110023384511725e-05, "loss": 2.3608, "step": 3594000 }, { "epoch": 17.81, "learning_rate": 4.109899773586401e-05, "loss": 2.3625, "step": 3594500 }, { "epoch": 17.81, "learning_rate": 4.109775914943793e-05, "loss": 2.3792, "step": 3595000 }, { "epoch": 17.81, "learning_rate": 4.1096520563011846e-05, "loss": 2.3811, "step": 3595500 }, { "epoch": 17.82, "learning_rate": 4.109528197658576e-05, "loss": 2.3449, "step": 3596000 }, { "epoch": 17.82, "learning_rate": 4.109404339015968e-05, "loss": 2.3525, "step": 3596500 }, { "epoch": 17.82, "learning_rate": 4.10928048037336e-05, "loss": 2.365, "step": 3597000 }, { "epoch": 17.82, "learning_rate": 4.1091568694480366e-05, "loss": 2.3526, "step": 3597500 }, { "epoch": 17.83, "learning_rate": 4.109033010805428e-05, "loss": 2.3738, "step": 3598000 }, { "epoch": 17.83, "learning_rate": 4.10890915216282e-05, "loss": 2.3432, "step": 3598500 }, { "epoch": 17.83, "learning_rate": 4.1087852935202117e-05, "loss": 2.3489, "step": 3599000 }, { "epoch": 17.83, "learning_rate": 4.1086614348776034e-05, "loss": 2.3774, "step": 3599500 }, { "epoch": 17.84, "learning_rate": 4.108537576234995e-05, "loss": 2.3676, "step": 3600000 }, { "epoch": 17.84, "learning_rate": 4.108413717592387e-05, "loss": 2.3776, "step": 3600500 }, { "epoch": 17.84, "learning_rate": 4.1082898589497784e-05, "loss": 2.3718, "step": 3601000 }, { "epoch": 17.84, "learning_rate": 4.10816649574174e-05, "loss": 2.3682, "step": 3601500 }, { "epoch": 17.85, "learning_rate": 4.1080426370991315e-05, "loss": 2.376, "step": 3602000 }, { "epoch": 17.85, "learning_rate": 4.107919026173809e-05, "loss": 2.3763, "step": 3602500 }, { "epoch": 17.85, "learning_rate": 4.1077951675312e-05, "loss": 2.3482, "step": 3603000 }, { "epoch": 17.85, "learning_rate": 4.107671308888592e-05, "loss": 2.3469, "step": 3603500 }, { "epoch": 17.86, "learning_rate": 4.1075474502459835e-05, "loss": 2.3561, "step": 3604000 }, { "epoch": 17.86, "learning_rate": 4.107423591603375e-05, "loss": 2.3922, "step": 3604500 }, { "epoch": 17.86, "learning_rate": 4.107299732960767e-05, "loss": 2.3394, "step": 3605000 }, { "epoch": 17.86, "learning_rate": 4.107175874318158e-05, "loss": 2.3683, "step": 3605500 }, { "epoch": 17.87, "learning_rate": 4.1070520156755496e-05, "loss": 2.3565, "step": 3606000 }, { "epoch": 17.87, "learning_rate": 4.106928157032941e-05, "loss": 2.3693, "step": 3606500 }, { "epoch": 17.87, "learning_rate": 4.106804298390333e-05, "loss": 2.3589, "step": 3607000 }, { "epoch": 17.87, "learning_rate": 4.1066804397477246e-05, "loss": 2.3629, "step": 3607500 }, { "epoch": 17.88, "learning_rate": 4.1065565811051163e-05, "loss": 2.3639, "step": 3608000 }, { "epoch": 17.88, "learning_rate": 4.106432970179793e-05, "loss": 2.3378, "step": 3608500 }, { "epoch": 17.88, "learning_rate": 4.106309111537185e-05, "loss": 2.3463, "step": 3609000 }, { "epoch": 17.88, "learning_rate": 4.1061855006118625e-05, "loss": 2.3587, "step": 3609500 }, { "epoch": 17.89, "learning_rate": 4.1060616419692535e-05, "loss": 2.3465, "step": 3610000 }, { "epoch": 17.89, "learning_rate": 4.105937783326645e-05, "loss": 2.3632, "step": 3610500 }, { "epoch": 17.89, "learning_rate": 4.105813924684037e-05, "loss": 2.3575, "step": 3611000 }, { "epoch": 17.89, "learning_rate": 4.1056900660414286e-05, "loss": 2.3853, "step": 3611500 }, { "epoch": 17.9, "learning_rate": 4.1055662073988196e-05, "loss": 2.3734, "step": 3612000 }, { "epoch": 17.9, "learning_rate": 4.105442348756211e-05, "loss": 2.3292, "step": 3612500 }, { "epoch": 17.9, "learning_rate": 4.105318490113603e-05, "loss": 2.3488, "step": 3613000 }, { "epoch": 17.9, "learning_rate": 4.10519487918828e-05, "loss": 2.3648, "step": 3613500 }, { "epoch": 17.91, "learning_rate": 4.1050710205456715e-05, "loss": 2.3435, "step": 3614000 }, { "epoch": 17.91, "learning_rate": 4.104947161903063e-05, "loss": 2.3709, "step": 3614500 }, { "epoch": 17.91, "learning_rate": 4.104823303260455e-05, "loss": 2.375, "step": 3615000 }, { "epoch": 17.91, "learning_rate": 4.1046994446178466e-05, "loss": 2.3707, "step": 3615500 }, { "epoch": 17.91, "learning_rate": 4.104575585975238e-05, "loss": 2.3493, "step": 3616000 }, { "epoch": 17.92, "learning_rate": 4.10445172733263e-05, "loss": 2.3454, "step": 3616500 }, { "epoch": 17.92, "learning_rate": 4.104327868690022e-05, "loss": 2.34, "step": 3617000 }, { "epoch": 17.92, "learning_rate": 4.1042042577646986e-05, "loss": 2.3497, "step": 3617500 }, { "epoch": 17.92, "learning_rate": 4.1040806468393755e-05, "loss": 2.3711, "step": 3618000 }, { "epoch": 17.93, "learning_rate": 4.103956788196767e-05, "loss": 2.3608, "step": 3618500 }, { "epoch": 17.93, "learning_rate": 4.103832929554159e-05, "loss": 2.3884, "step": 3619000 }, { "epoch": 17.93, "learning_rate": 4.10370907091155e-05, "loss": 2.3432, "step": 3619500 }, { "epoch": 17.93, "learning_rate": 4.1035852122689416e-05, "loss": 2.3545, "step": 3620000 }, { "epoch": 17.94, "learning_rate": 4.103462096778189e-05, "loss": 2.3426, "step": 3620500 }, { "epoch": 17.94, "learning_rate": 4.1033382381355805e-05, "loss": 2.3625, "step": 3621000 }, { "epoch": 17.94, "learning_rate": 4.103214379492972e-05, "loss": 2.3656, "step": 3621500 }, { "epoch": 17.94, "learning_rate": 4.103090520850364e-05, "loss": 2.3802, "step": 3622000 }, { "epoch": 17.95, "learning_rate": 4.1029666622077556e-05, "loss": 2.3537, "step": 3622500 }, { "epoch": 17.95, "learning_rate": 4.102842803565147e-05, "loss": 2.3691, "step": 3623000 }, { "epoch": 17.95, "learning_rate": 4.102718944922539e-05, "loss": 2.3754, "step": 3623500 }, { "epoch": 17.95, "learning_rate": 4.1025950862799307e-05, "loss": 2.3664, "step": 3624000 }, { "epoch": 17.96, "learning_rate": 4.1024712276373224e-05, "loss": 2.3663, "step": 3624500 }, { "epoch": 17.96, "learning_rate": 4.102347368994714e-05, "loss": 2.3661, "step": 3625000 }, { "epoch": 17.96, "learning_rate": 4.102223510352106e-05, "loss": 2.3567, "step": 3625500 }, { "epoch": 17.96, "learning_rate": 4.1020998994267826e-05, "loss": 2.3632, "step": 3626000 }, { "epoch": 17.97, "learning_rate": 4.1019760407841736e-05, "loss": 2.3703, "step": 3626500 }, { "epoch": 17.97, "learning_rate": 4.101852182141565e-05, "loss": 2.3692, "step": 3627000 }, { "epoch": 17.97, "learning_rate": 4.101728323498957e-05, "loss": 2.3708, "step": 3627500 }, { "epoch": 17.97, "learning_rate": 4.101604464856349e-05, "loss": 2.3665, "step": 3628000 }, { "epoch": 17.98, "learning_rate": 4.1014806062137404e-05, "loss": 2.3794, "step": 3628500 }, { "epoch": 17.98, "learning_rate": 4.101356747571132e-05, "loss": 2.3282, "step": 3629000 }, { "epoch": 17.98, "learning_rate": 4.101232888928524e-05, "loss": 2.3399, "step": 3629500 }, { "epoch": 17.98, "learning_rate": 4.1011090302859155e-05, "loss": 2.3481, "step": 3630000 }, { "epoch": 17.99, "learning_rate": 4.1009854193605924e-05, "loss": 2.3503, "step": 3630500 }, { "epoch": 17.99, "learning_rate": 4.100861560717984e-05, "loss": 2.3472, "step": 3631000 }, { "epoch": 17.99, "learning_rate": 4.100737702075376e-05, "loss": 2.3613, "step": 3631500 }, { "epoch": 17.99, "learning_rate": 4.1006140911500526e-05, "loss": 2.3675, "step": 3632000 }, { "epoch": 18.0, "learning_rate": 4.100490232507444e-05, "loss": 2.3705, "step": 3632500 }, { "epoch": 18.0, "learning_rate": 4.100366373864836e-05, "loss": 2.3473, "step": 3633000 }, { "epoch": 18.0, "eval_accuracy": 0.650584751161539, "eval_accuracy_mlm": 0.6049741663885951, "eval_accuracy_nsp": 0.8655666205154554, "eval_loss": 2.37389874458313, "eval_runtime": 145.8488, "eval_samples_per_second": 1748.105, "eval_steps_per_second": 72.843, "step": 3633174 }, { "epoch": 18.0, "learning_rate": 4.100242515222227e-05, "loss": 2.3426, "step": 3633500 }, { "epoch": 18.0, "learning_rate": 4.100118904296904e-05, "loss": 2.3235, "step": 3634000 }, { "epoch": 18.01, "learning_rate": 4.0999950456542956e-05, "loss": 2.3318, "step": 3634500 }, { "epoch": 18.01, "learning_rate": 4.099871187011687e-05, "loss": 2.3237, "step": 3635000 }, { "epoch": 18.01, "learning_rate": 4.099747328369079e-05, "loss": 2.3424, "step": 3635500 }, { "epoch": 18.01, "learning_rate": 4.099623469726471e-05, "loss": 2.3453, "step": 3636000 }, { "epoch": 18.02, "learning_rate": 4.0994996110838624e-05, "loss": 2.3344, "step": 3636500 }, { "epoch": 18.02, "learning_rate": 4.099375752441254e-05, "loss": 2.3098, "step": 3637000 }, { "epoch": 18.02, "learning_rate": 4.099251893798646e-05, "loss": 2.3288, "step": 3637500 }, { "epoch": 18.02, "learning_rate": 4.0991282828733226e-05, "loss": 2.3565, "step": 3638000 }, { "epoch": 18.03, "learning_rate": 4.099004424230714e-05, "loss": 2.3382, "step": 3638500 }, { "epoch": 18.03, "learning_rate": 4.098880565588106e-05, "loss": 2.3385, "step": 3639000 }, { "epoch": 18.03, "learning_rate": 4.098756954662782e-05, "loss": 2.3369, "step": 3639500 }, { "epoch": 18.03, "learning_rate": 4.098633096020174e-05, "loss": 2.3324, "step": 3640000 }, { "epoch": 18.04, "learning_rate": 4.0985092373775656e-05, "loss": 2.3385, "step": 3640500 }, { "epoch": 18.04, "learning_rate": 4.098385378734957e-05, "loss": 2.341, "step": 3641000 }, { "epoch": 18.04, "learning_rate": 4.098261520092349e-05, "loss": 2.3253, "step": 3641500 }, { "epoch": 18.04, "learning_rate": 4.098137661449741e-05, "loss": 2.3427, "step": 3642000 }, { "epoch": 18.05, "learning_rate": 4.0980138028071324e-05, "loss": 2.3467, "step": 3642500 }, { "epoch": 18.05, "learning_rate": 4.097889944164524e-05, "loss": 2.3307, "step": 3643000 }, { "epoch": 18.05, "learning_rate": 4.097766333239201e-05, "loss": 2.3384, "step": 3643500 }, { "epoch": 18.05, "learning_rate": 4.0976424745965927e-05, "loss": 2.3376, "step": 3644000 }, { "epoch": 18.06, "learning_rate": 4.0975186159539843e-05, "loss": 2.3253, "step": 3644500 }, { "epoch": 18.06, "learning_rate": 4.097394757311376e-05, "loss": 2.347, "step": 3645000 }, { "epoch": 18.06, "learning_rate": 4.097270898668768e-05, "loss": 2.3095, "step": 3645500 }, { "epoch": 18.06, "learning_rate": 4.0971470400261594e-05, "loss": 2.3029, "step": 3646000 }, { "epoch": 18.07, "learning_rate": 4.097023181383551e-05, "loss": 2.3188, "step": 3646500 }, { "epoch": 18.07, "learning_rate": 4.096899322740942e-05, "loss": 2.3294, "step": 3647000 }, { "epoch": 18.07, "learning_rate": 4.096775464098334e-05, "loss": 2.3405, "step": 3647500 }, { "epoch": 18.07, "learning_rate": 4.0966516054557255e-05, "loss": 2.3273, "step": 3648000 }, { "epoch": 18.08, "learning_rate": 4.096527746813117e-05, "loss": 2.3155, "step": 3648500 }, { "epoch": 18.08, "learning_rate": 4.096403888170509e-05, "loss": 2.3269, "step": 3649000 }, { "epoch": 18.08, "learning_rate": 4.0962800295279006e-05, "loss": 2.3336, "step": 3649500 }, { "epoch": 18.08, "learning_rate": 4.0961564186025775e-05, "loss": 2.3234, "step": 3650000 }, { "epoch": 18.09, "learning_rate": 4.096032559959969e-05, "loss": 2.3282, "step": 3650500 }, { "epoch": 18.09, "learning_rate": 4.095908701317361e-05, "loss": 2.3405, "step": 3651000 }, { "epoch": 18.09, "learning_rate": 4.0957848426747526e-05, "loss": 2.317, "step": 3651500 }, { "epoch": 18.09, "learning_rate": 4.0956612317494294e-05, "loss": 2.3369, "step": 3652000 }, { "epoch": 18.1, "learning_rate": 4.095537373106821e-05, "loss": 2.3423, "step": 3652500 }, { "epoch": 18.1, "learning_rate": 4.095413762181497e-05, "loss": 2.3363, "step": 3653000 }, { "epoch": 18.1, "learning_rate": 4.095289903538889e-05, "loss": 2.3515, "step": 3653500 }, { "epoch": 18.1, "learning_rate": 4.095166044896281e-05, "loss": 2.3327, "step": 3654000 }, { "epoch": 18.11, "learning_rate": 4.0950421862536724e-05, "loss": 2.3208, "step": 3654500 }, { "epoch": 18.11, "learning_rate": 4.094918327611064e-05, "loss": 2.3527, "step": 3655000 }, { "epoch": 18.11, "learning_rate": 4.094794468968456e-05, "loss": 2.352, "step": 3655500 }, { "epoch": 18.11, "learning_rate": 4.0946706103258475e-05, "loss": 2.3502, "step": 3656000 }, { "epoch": 18.12, "learning_rate": 4.094546751683239e-05, "loss": 2.3427, "step": 3656500 }, { "epoch": 18.12, "learning_rate": 4.094423140757916e-05, "loss": 2.3514, "step": 3657000 }, { "epoch": 18.12, "learning_rate": 4.094299282115308e-05, "loss": 2.3399, "step": 3657500 }, { "epoch": 18.12, "learning_rate": 4.0941754234726995e-05, "loss": 2.3461, "step": 3658000 }, { "epoch": 18.13, "learning_rate": 4.094051564830091e-05, "loss": 2.3321, "step": 3658500 }, { "epoch": 18.13, "learning_rate": 4.093927706187483e-05, "loss": 2.3296, "step": 3659000 }, { "epoch": 18.13, "learning_rate": 4.093804095262159e-05, "loss": 2.338, "step": 3659500 }, { "epoch": 18.13, "learning_rate": 4.093680236619551e-05, "loss": 2.3325, "step": 3660000 }, { "epoch": 18.14, "learning_rate": 4.093556625694228e-05, "loss": 2.3297, "step": 3660500 }, { "epoch": 18.14, "learning_rate": 4.093432767051619e-05, "loss": 2.3328, "step": 3661000 }, { "epoch": 18.14, "learning_rate": 4.093308908409011e-05, "loss": 2.354, "step": 3661500 }, { "epoch": 18.14, "learning_rate": 4.093185049766403e-05, "loss": 2.3395, "step": 3662000 }, { "epoch": 18.15, "learning_rate": 4.0930611911237944e-05, "loss": 2.3472, "step": 3662500 }, { "epoch": 18.15, "learning_rate": 4.092937332481186e-05, "loss": 2.3441, "step": 3663000 }, { "epoch": 18.15, "learning_rate": 4.092813473838578e-05, "loss": 2.3015, "step": 3663500 }, { "epoch": 18.15, "learning_rate": 4.092689862913254e-05, "loss": 2.3355, "step": 3664000 }, { "epoch": 18.16, "learning_rate": 4.092566004270646e-05, "loss": 2.3456, "step": 3664500 }, { "epoch": 18.16, "learning_rate": 4.0924421456280374e-05, "loss": 2.3623, "step": 3665000 }, { "epoch": 18.16, "learning_rate": 4.092318286985429e-05, "loss": 2.3409, "step": 3665500 }, { "epoch": 18.16, "learning_rate": 4.092194428342821e-05, "loss": 2.3148, "step": 3666000 }, { "epoch": 18.17, "learning_rate": 4.0920705697002124e-05, "loss": 2.3218, "step": 3666500 }, { "epoch": 18.17, "learning_rate": 4.091946711057604e-05, "loss": 2.3479, "step": 3667000 }, { "epoch": 18.17, "learning_rate": 4.091822852414996e-05, "loss": 2.3304, "step": 3667500 }, { "epoch": 18.17, "learning_rate": 4.0916989937723875e-05, "loss": 2.3497, "step": 3668000 }, { "epoch": 18.18, "learning_rate": 4.0915753828470644e-05, "loss": 2.3362, "step": 3668500 }, { "epoch": 18.18, "learning_rate": 4.091451771921741e-05, "loss": 2.3458, "step": 3669000 }, { "epoch": 18.18, "learning_rate": 4.091327913279133e-05, "loss": 2.3631, "step": 3669500 }, { "epoch": 18.18, "learning_rate": 4.091204054636525e-05, "loss": 2.3432, "step": 3670000 }, { "epoch": 18.18, "learning_rate": 4.091080195993916e-05, "loss": 2.3421, "step": 3670500 }, { "epoch": 18.19, "learning_rate": 4.0909563373513074e-05, "loss": 2.3361, "step": 3671000 }, { "epoch": 18.19, "learning_rate": 4.090832478708699e-05, "loss": 2.3355, "step": 3671500 }, { "epoch": 18.19, "learning_rate": 4.090708620066091e-05, "loss": 2.3339, "step": 3672000 }, { "epoch": 18.19, "learning_rate": 4.0905847614234825e-05, "loss": 2.3367, "step": 3672500 }, { "epoch": 18.2, "learning_rate": 4.090460902780874e-05, "loss": 2.3447, "step": 3673000 }, { "epoch": 18.2, "learning_rate": 4.090337291855551e-05, "loss": 2.3549, "step": 3673500 }, { "epoch": 18.2, "learning_rate": 4.090213433212943e-05, "loss": 2.3378, "step": 3674000 }, { "epoch": 18.2, "learning_rate": 4.0900895745703344e-05, "loss": 2.3436, "step": 3674500 }, { "epoch": 18.21, "learning_rate": 4.089965715927726e-05, "loss": 2.3406, "step": 3675000 }, { "epoch": 18.21, "learning_rate": 4.089841857285118e-05, "loss": 2.3393, "step": 3675500 }, { "epoch": 18.21, "learning_rate": 4.089718246359795e-05, "loss": 2.3336, "step": 3676000 }, { "epoch": 18.21, "learning_rate": 4.0895943877171864e-05, "loss": 2.3367, "step": 3676500 }, { "epoch": 18.22, "learning_rate": 4.089470529074578e-05, "loss": 2.3672, "step": 3677000 }, { "epoch": 18.22, "learning_rate": 4.089346670431969e-05, "loss": 2.3312, "step": 3677500 }, { "epoch": 18.22, "learning_rate": 4.089222811789361e-05, "loss": 2.35, "step": 3678000 }, { "epoch": 18.22, "learning_rate": 4.0890989531467525e-05, "loss": 2.3514, "step": 3678500 }, { "epoch": 18.23, "learning_rate": 4.08897534222143e-05, "loss": 2.369, "step": 3679000 }, { "epoch": 18.23, "learning_rate": 4.088851483578821e-05, "loss": 2.3395, "step": 3679500 }, { "epoch": 18.23, "learning_rate": 4.088727624936213e-05, "loss": 2.3569, "step": 3680000 }, { "epoch": 18.23, "learning_rate": 4.0886037662936044e-05, "loss": 2.3456, "step": 3680500 }, { "epoch": 18.24, "learning_rate": 4.088480155368282e-05, "loss": 2.3367, "step": 3681000 }, { "epoch": 18.24, "learning_rate": 4.088356544442958e-05, "loss": 2.3175, "step": 3681500 }, { "epoch": 18.24, "learning_rate": 4.08823268580035e-05, "loss": 2.3266, "step": 3682000 }, { "epoch": 18.24, "learning_rate": 4.0881088271577416e-05, "loss": 2.3512, "step": 3682500 }, { "epoch": 18.25, "learning_rate": 4.087984968515133e-05, "loss": 2.3464, "step": 3683000 }, { "epoch": 18.25, "learning_rate": 4.08786135758981e-05, "loss": 2.3319, "step": 3683500 }, { "epoch": 18.25, "learning_rate": 4.087737498947202e-05, "loss": 2.3545, "step": 3684000 }, { "epoch": 18.25, "learning_rate": 4.0876136403045935e-05, "loss": 2.3483, "step": 3684500 }, { "epoch": 18.26, "learning_rate": 4.087489781661985e-05, "loss": 2.3413, "step": 3685000 }, { "epoch": 18.26, "learning_rate": 4.087365923019377e-05, "loss": 2.3222, "step": 3685500 }, { "epoch": 18.26, "learning_rate": 4.0872420643767686e-05, "loss": 2.3522, "step": 3686000 }, { "epoch": 18.26, "learning_rate": 4.08711820573416e-05, "loss": 2.3184, "step": 3686500 }, { "epoch": 18.27, "learning_rate": 4.086994347091552e-05, "loss": 2.3553, "step": 3687000 }, { "epoch": 18.27, "learning_rate": 4.086870736166228e-05, "loss": 2.3474, "step": 3687500 }, { "epoch": 18.27, "learning_rate": 4.08674687752362e-05, "loss": 2.3626, "step": 3688000 }, { "epoch": 18.27, "learning_rate": 4.086623266598297e-05, "loss": 2.3262, "step": 3688500 }, { "epoch": 18.28, "learning_rate": 4.0864994079556885e-05, "loss": 2.3548, "step": 3689000 }, { "epoch": 18.28, "learning_rate": 4.08637554931308e-05, "loss": 2.3467, "step": 3689500 }, { "epoch": 18.28, "learning_rate": 4.086251690670472e-05, "loss": 2.3482, "step": 3690000 }, { "epoch": 18.28, "learning_rate": 4.086128079745149e-05, "loss": 2.3704, "step": 3690500 }, { "epoch": 18.29, "learning_rate": 4.0860042211025404e-05, "loss": 2.3454, "step": 3691000 }, { "epoch": 18.29, "learning_rate": 4.085880610177217e-05, "loss": 2.3512, "step": 3691500 }, { "epoch": 18.29, "learning_rate": 4.085756751534609e-05, "loss": 2.3291, "step": 3692000 }, { "epoch": 18.29, "learning_rate": 4.085632892892001e-05, "loss": 2.3378, "step": 3692500 }, { "epoch": 18.3, "learning_rate": 4.085509034249392e-05, "loss": 2.3274, "step": 3693000 }, { "epoch": 18.3, "learning_rate": 4.0853851756067834e-05, "loss": 2.3324, "step": 3693500 }, { "epoch": 18.3, "learning_rate": 4.085261316964175e-05, "loss": 2.3558, "step": 3694000 }, { "epoch": 18.3, "learning_rate": 4.085137458321567e-05, "loss": 2.3529, "step": 3694500 }, { "epoch": 18.31, "learning_rate": 4.0850135996789585e-05, "loss": 2.3572, "step": 3695000 }, { "epoch": 18.31, "learning_rate": 4.08488974103635e-05, "loss": 2.3294, "step": 3695500 }, { "epoch": 18.31, "learning_rate": 4.084765882393742e-05, "loss": 2.3592, "step": 3696000 }, { "epoch": 18.31, "learning_rate": 4.084642271468419e-05, "loss": 2.3392, "step": 3696500 }, { "epoch": 18.32, "learning_rate": 4.0845184128258104e-05, "loss": 2.3154, "step": 3697000 }, { "epoch": 18.32, "learning_rate": 4.084394801900487e-05, "loss": 2.3509, "step": 3697500 }, { "epoch": 18.32, "learning_rate": 4.084270943257879e-05, "loss": 2.3477, "step": 3698000 }, { "epoch": 18.32, "learning_rate": 4.084147084615271e-05, "loss": 2.3218, "step": 3698500 }, { "epoch": 18.33, "learning_rate": 4.084023225972662e-05, "loss": 2.3387, "step": 3699000 }, { "epoch": 18.33, "learning_rate": 4.0838993673300534e-05, "loss": 2.3438, "step": 3699500 }, { "epoch": 18.33, "learning_rate": 4.083775508687445e-05, "loss": 2.341, "step": 3700000 }, { "epoch": 18.33, "learning_rate": 4.083651650044837e-05, "loss": 2.3482, "step": 3700500 }, { "epoch": 18.34, "learning_rate": 4.0835277914022285e-05, "loss": 2.3661, "step": 3701000 }, { "epoch": 18.34, "learning_rate": 4.08340393275962e-05, "loss": 2.3362, "step": 3701500 }, { "epoch": 18.34, "learning_rate": 4.083280074117012e-05, "loss": 2.3077, "step": 3702000 }, { "epoch": 18.34, "learning_rate": 4.083156463191689e-05, "loss": 2.362, "step": 3702500 }, { "epoch": 18.35, "learning_rate": 4.0830326045490804e-05, "loss": 2.3652, "step": 3703000 }, { "epoch": 18.35, "learning_rate": 4.082908745906472e-05, "loss": 2.3664, "step": 3703500 }, { "epoch": 18.35, "learning_rate": 4.082784887263864e-05, "loss": 2.3331, "step": 3704000 }, { "epoch": 18.35, "learning_rate": 4.0826610286212555e-05, "loss": 2.3267, "step": 3704500 }, { "epoch": 18.36, "learning_rate": 4.0825371699786465e-05, "loss": 2.3312, "step": 3705000 }, { "epoch": 18.36, "learning_rate": 4.082413311336038e-05, "loss": 2.362, "step": 3705500 }, { "epoch": 18.36, "learning_rate": 4.082289700410715e-05, "loss": 2.3326, "step": 3706000 }, { "epoch": 18.36, "learning_rate": 4.082165841768107e-05, "loss": 2.3602, "step": 3706500 }, { "epoch": 18.37, "learning_rate": 4.0820419831254985e-05, "loss": 2.3623, "step": 3707000 }, { "epoch": 18.37, "learning_rate": 4.08191812448289e-05, "loss": 2.3526, "step": 3707500 }, { "epoch": 18.37, "learning_rate": 4.081794265840282e-05, "loss": 2.3439, "step": 3708000 }, { "epoch": 18.37, "learning_rate": 4.081670654914959e-05, "loss": 2.3465, "step": 3708500 }, { "epoch": 18.38, "learning_rate": 4.0815467962723505e-05, "loss": 2.3447, "step": 3709000 }, { "epoch": 18.38, "learning_rate": 4.081422937629742e-05, "loss": 2.3459, "step": 3709500 }, { "epoch": 18.38, "learning_rate": 4.081299078987134e-05, "loss": 2.3666, "step": 3710000 }, { "epoch": 18.38, "learning_rate": 4.0811752203445255e-05, "loss": 2.3043, "step": 3710500 }, { "epoch": 18.39, "learning_rate": 4.0810516094192024e-05, "loss": 2.3246, "step": 3711000 }, { "epoch": 18.39, "learning_rate": 4.0809277507765934e-05, "loss": 2.3471, "step": 3711500 }, { "epoch": 18.39, "learning_rate": 4.080803892133985e-05, "loss": 2.3432, "step": 3712000 }, { "epoch": 18.39, "learning_rate": 4.080680033491377e-05, "loss": 2.3355, "step": 3712500 }, { "epoch": 18.4, "learning_rate": 4.0805564225660544e-05, "loss": 2.3684, "step": 3713000 }, { "epoch": 18.4, "learning_rate": 4.080432563923446e-05, "loss": 2.366, "step": 3713500 }, { "epoch": 18.4, "learning_rate": 4.080308705280838e-05, "loss": 2.3494, "step": 3714000 }, { "epoch": 18.4, "learning_rate": 4.080184846638229e-05, "loss": 2.3516, "step": 3714500 }, { "epoch": 18.41, "learning_rate": 4.0800609879956205e-05, "loss": 2.3581, "step": 3715000 }, { "epoch": 18.41, "learning_rate": 4.079937129353012e-05, "loss": 2.3587, "step": 3715500 }, { "epoch": 18.41, "learning_rate": 4.079813270710404e-05, "loss": 2.3318, "step": 3716000 }, { "epoch": 18.41, "learning_rate": 4.0796894120677956e-05, "loss": 2.3423, "step": 3716500 }, { "epoch": 18.42, "learning_rate": 4.079565553425187e-05, "loss": 2.338, "step": 3717000 }, { "epoch": 18.42, "learning_rate": 4.079441942499864e-05, "loss": 2.3467, "step": 3717500 }, { "epoch": 18.42, "learning_rate": 4.079318331574541e-05, "loss": 2.3822, "step": 3718000 }, { "epoch": 18.42, "learning_rate": 4.079194472931933e-05, "loss": 2.3626, "step": 3718500 }, { "epoch": 18.43, "learning_rate": 4.0790706142893244e-05, "loss": 2.3683, "step": 3719000 }, { "epoch": 18.43, "learning_rate": 4.078946755646716e-05, "loss": 2.3831, "step": 3719500 }, { "epoch": 18.43, "learning_rate": 4.078822897004108e-05, "loss": 2.3206, "step": 3720000 }, { "epoch": 18.43, "learning_rate": 4.0786990383614995e-05, "loss": 2.3446, "step": 3720500 }, { "epoch": 18.44, "learning_rate": 4.0785751797188905e-05, "loss": 2.3532, "step": 3721000 }, { "epoch": 18.44, "learning_rate": 4.078451321076282e-05, "loss": 2.3127, "step": 3721500 }, { "epoch": 18.44, "learning_rate": 4.078327462433674e-05, "loss": 2.3387, "step": 3722000 }, { "epoch": 18.44, "learning_rate": 4.0782036037910656e-05, "loss": 2.3392, "step": 3722500 }, { "epoch": 18.45, "learning_rate": 4.0780799928657424e-05, "loss": 2.3358, "step": 3723000 }, { "epoch": 18.45, "learning_rate": 4.077956134223134e-05, "loss": 2.3367, "step": 3723500 }, { "epoch": 18.45, "learning_rate": 4.077832275580525e-05, "loss": 2.375, "step": 3724000 }, { "epoch": 18.45, "learning_rate": 4.077708416937917e-05, "loss": 2.3369, "step": 3724500 }, { "epoch": 18.45, "learning_rate": 4.0775845582953085e-05, "loss": 2.3509, "step": 3725000 }, { "epoch": 18.46, "learning_rate": 4.0774606996527e-05, "loss": 2.3558, "step": 3725500 }, { "epoch": 18.46, "learning_rate": 4.077336841010092e-05, "loss": 2.338, "step": 3726000 }, { "epoch": 18.46, "learning_rate": 4.0772129823674836e-05, "loss": 2.351, "step": 3726500 }, { "epoch": 18.46, "learning_rate": 4.0770893714421605e-05, "loss": 2.3565, "step": 3727000 }, { "epoch": 18.47, "learning_rate": 4.076965512799552e-05, "loss": 2.3638, "step": 3727500 }, { "epoch": 18.47, "learning_rate": 4.076841654156944e-05, "loss": 2.3696, "step": 3728000 }, { "epoch": 18.47, "learning_rate": 4.0767177955143356e-05, "loss": 2.3566, "step": 3728500 }, { "epoch": 18.47, "learning_rate": 4.076593936871727e-05, "loss": 2.3472, "step": 3729000 }, { "epoch": 18.48, "learning_rate": 4.076470325946404e-05, "loss": 2.3396, "step": 3729500 }, { "epoch": 18.48, "learning_rate": 4.076346467303796e-05, "loss": 2.3794, "step": 3730000 }, { "epoch": 18.48, "learning_rate": 4.076222608661187e-05, "loss": 2.3788, "step": 3730500 }, { "epoch": 18.48, "learning_rate": 4.0760987500185786e-05, "loss": 2.3517, "step": 3731000 }, { "epoch": 18.49, "learning_rate": 4.075975139093256e-05, "loss": 2.3382, "step": 3731500 }, { "epoch": 18.49, "learning_rate": 4.075851280450648e-05, "loss": 2.3403, "step": 3732000 }, { "epoch": 18.49, "learning_rate": 4.0757274218080395e-05, "loss": 2.3588, "step": 3732500 }, { "epoch": 18.49, "learning_rate": 4.075603563165431e-05, "loss": 2.3352, "step": 3733000 }, { "epoch": 18.5, "learning_rate": 4.075479704522822e-05, "loss": 2.3494, "step": 3733500 }, { "epoch": 18.5, "learning_rate": 4.075355845880214e-05, "loss": 2.3463, "step": 3734000 }, { "epoch": 18.5, "learning_rate": 4.0752319872376056e-05, "loss": 2.3804, "step": 3734500 }, { "epoch": 18.5, "learning_rate": 4.075108128594997e-05, "loss": 2.3203, "step": 3735000 }, { "epoch": 18.51, "learning_rate": 4.074984269952389e-05, "loss": 2.3402, "step": 3735500 }, { "epoch": 18.51, "learning_rate": 4.074860659027066e-05, "loss": 2.3319, "step": 3736000 }, { "epoch": 18.51, "learning_rate": 4.074737048101743e-05, "loss": 2.3261, "step": 3736500 }, { "epoch": 18.51, "learning_rate": 4.0746131894591344e-05, "loss": 2.3539, "step": 3737000 }, { "epoch": 18.52, "learning_rate": 4.074489330816526e-05, "loss": 2.3372, "step": 3737500 }, { "epoch": 18.52, "learning_rate": 4.074365472173918e-05, "loss": 2.3349, "step": 3738000 }, { "epoch": 18.52, "learning_rate": 4.0742416135313095e-05, "loss": 2.3208, "step": 3738500 }, { "epoch": 18.52, "learning_rate": 4.0741180026059864e-05, "loss": 2.3495, "step": 3739000 }, { "epoch": 18.53, "learning_rate": 4.073994143963378e-05, "loss": 2.3535, "step": 3739500 }, { "epoch": 18.53, "learning_rate": 4.073870285320769e-05, "loss": 2.3686, "step": 3740000 }, { "epoch": 18.53, "learning_rate": 4.073746426678161e-05, "loss": 2.3508, "step": 3740500 }, { "epoch": 18.53, "learning_rate": 4.0736225680355525e-05, "loss": 2.3398, "step": 3741000 }, { "epoch": 18.54, "learning_rate": 4.073498709392944e-05, "loss": 2.3602, "step": 3741500 }, { "epoch": 18.54, "learning_rate": 4.073375098467621e-05, "loss": 2.344, "step": 3742000 }, { "epoch": 18.54, "learning_rate": 4.073251239825013e-05, "loss": 2.3464, "step": 3742500 }, { "epoch": 18.54, "learning_rate": 4.0731273811824044e-05, "loss": 2.3433, "step": 3743000 }, { "epoch": 18.55, "learning_rate": 4.073003522539796e-05, "loss": 2.3592, "step": 3743500 }, { "epoch": 18.55, "learning_rate": 4.072879663897188e-05, "loss": 2.318, "step": 3744000 }, { "epoch": 18.55, "learning_rate": 4.072756052971865e-05, "loss": 2.3456, "step": 3744500 }, { "epoch": 18.55, "learning_rate": 4.0726321943292564e-05, "loss": 2.3568, "step": 3745000 }, { "epoch": 18.56, "learning_rate": 4.072508335686648e-05, "loss": 2.3698, "step": 3745500 }, { "epoch": 18.56, "learning_rate": 4.07238447704404e-05, "loss": 2.3393, "step": 3746000 }, { "epoch": 18.56, "learning_rate": 4.072260618401431e-05, "loss": 2.3585, "step": 3746500 }, { "epoch": 18.56, "learning_rate": 4.0721367597588225e-05, "loss": 2.3521, "step": 3747000 }, { "epoch": 18.57, "learning_rate": 4.072012901116214e-05, "loss": 2.3465, "step": 3747500 }, { "epoch": 18.57, "learning_rate": 4.071889042473606e-05, "loss": 2.3389, "step": 3748000 }, { "epoch": 18.57, "learning_rate": 4.071765431548283e-05, "loss": 2.3552, "step": 3748500 }, { "epoch": 18.57, "learning_rate": 4.0716415729056745e-05, "loss": 2.3512, "step": 3749000 }, { "epoch": 18.58, "learning_rate": 4.071517714263066e-05, "loss": 2.3437, "step": 3749500 }, { "epoch": 18.58, "learning_rate": 4.071393855620458e-05, "loss": 2.3615, "step": 3750000 }, { "epoch": 18.58, "learning_rate": 4.0712699969778495e-05, "loss": 2.3265, "step": 3750500 }, { "epoch": 18.58, "learning_rate": 4.071146138335241e-05, "loss": 2.3665, "step": 3751000 }, { "epoch": 18.59, "learning_rate": 4.071022527409918e-05, "loss": 2.345, "step": 3751500 }, { "epoch": 18.59, "learning_rate": 4.07089866876731e-05, "loss": 2.3294, "step": 3752000 }, { "epoch": 18.59, "learning_rate": 4.0707748101247015e-05, "loss": 2.3589, "step": 3752500 }, { "epoch": 18.59, "learning_rate": 4.070650951482093e-05, "loss": 2.3329, "step": 3753000 }, { "epoch": 18.6, "learning_rate": 4.070527092839484e-05, "loss": 2.3413, "step": 3753500 }, { "epoch": 18.6, "learning_rate": 4.070403729631446e-05, "loss": 2.3506, "step": 3754000 }, { "epoch": 18.6, "learning_rate": 4.070279870988838e-05, "loss": 2.3563, "step": 3754500 }, { "epoch": 18.6, "learning_rate": 4.0701560123462297e-05, "loss": 2.3641, "step": 3755000 }, { "epoch": 18.61, "learning_rate": 4.0700321537036213e-05, "loss": 2.3566, "step": 3755500 }, { "epoch": 18.61, "learning_rate": 4.069908295061013e-05, "loss": 2.3409, "step": 3756000 }, { "epoch": 18.61, "learning_rate": 4.06978468413569e-05, "loss": 2.3472, "step": 3756500 }, { "epoch": 18.61, "learning_rate": 4.069660825493081e-05, "loss": 2.3627, "step": 3757000 }, { "epoch": 18.62, "learning_rate": 4.0695369668504726e-05, "loss": 2.3429, "step": 3757500 }, { "epoch": 18.62, "learning_rate": 4.069413108207864e-05, "loss": 2.3491, "step": 3758000 }, { "epoch": 18.62, "learning_rate": 4.069289497282542e-05, "loss": 2.3293, "step": 3758500 }, { "epoch": 18.62, "learning_rate": 4.069165886357219e-05, "loss": 2.365, "step": 3759000 }, { "epoch": 18.63, "learning_rate": 4.0690420277146105e-05, "loss": 2.3383, "step": 3759500 }, { "epoch": 18.63, "learning_rate": 4.068918169072002e-05, "loss": 2.3502, "step": 3760000 }, { "epoch": 18.63, "learning_rate": 4.068794310429394e-05, "loss": 2.3331, "step": 3760500 }, { "epoch": 18.63, "learning_rate": 4.068670451786785e-05, "loss": 2.3463, "step": 3761000 }, { "epoch": 18.64, "learning_rate": 4.0685465931441765e-05, "loss": 2.3493, "step": 3761500 }, { "epoch": 18.64, "learning_rate": 4.068422734501568e-05, "loss": 2.3455, "step": 3762000 }, { "epoch": 18.64, "learning_rate": 4.06829887585896e-05, "loss": 2.3398, "step": 3762500 }, { "epoch": 18.64, "learning_rate": 4.0681750172163516e-05, "loss": 2.3684, "step": 3763000 }, { "epoch": 18.65, "learning_rate": 4.0680514062910285e-05, "loss": 2.3276, "step": 3763500 }, { "epoch": 18.65, "learning_rate": 4.06792754764842e-05, "loss": 2.3632, "step": 3764000 }, { "epoch": 18.65, "learning_rate": 4.067803689005812e-05, "loss": 2.3577, "step": 3764500 }, { "epoch": 18.65, "learning_rate": 4.0676798303632036e-05, "loss": 2.3654, "step": 3765000 }, { "epoch": 18.66, "learning_rate": 4.0675562194378805e-05, "loss": 2.3418, "step": 3765500 }, { "epoch": 18.66, "learning_rate": 4.067432360795272e-05, "loss": 2.3504, "step": 3766000 }, { "epoch": 18.66, "learning_rate": 4.067308502152664e-05, "loss": 2.3258, "step": 3766500 }, { "epoch": 18.66, "learning_rate": 4.0671846435100555e-05, "loss": 2.3525, "step": 3767000 }, { "epoch": 18.67, "learning_rate": 4.067060784867447e-05, "loss": 2.3618, "step": 3767500 }, { "epoch": 18.67, "learning_rate": 4.066936926224838e-05, "loss": 2.3604, "step": 3768000 }, { "epoch": 18.67, "learning_rate": 4.06681306758223e-05, "loss": 2.365, "step": 3768500 }, { "epoch": 18.67, "learning_rate": 4.0666892089396216e-05, "loss": 2.3799, "step": 3769000 }, { "epoch": 18.68, "learning_rate": 4.066565350297013e-05, "loss": 2.3418, "step": 3769500 }, { "epoch": 18.68, "learning_rate": 4.066441491654405e-05, "loss": 2.3537, "step": 3770000 }, { "epoch": 18.68, "learning_rate": 4.066317633011796e-05, "loss": 2.363, "step": 3770500 }, { "epoch": 18.68, "learning_rate": 4.066193774369188e-05, "loss": 2.3617, "step": 3771000 }, { "epoch": 18.69, "learning_rate": 4.0660699157265794e-05, "loss": 2.3339, "step": 3771500 }, { "epoch": 18.69, "learning_rate": 4.065946057083971e-05, "loss": 2.3676, "step": 3772000 }, { "epoch": 18.69, "learning_rate": 4.065822446158648e-05, "loss": 2.3482, "step": 3772500 }, { "epoch": 18.69, "learning_rate": 4.06569858751604e-05, "loss": 2.333, "step": 3773000 }, { "epoch": 18.7, "learning_rate": 4.0655747288734314e-05, "loss": 2.3455, "step": 3773500 }, { "epoch": 18.7, "learning_rate": 4.065450870230823e-05, "loss": 2.3702, "step": 3774000 }, { "epoch": 18.7, "learning_rate": 4.065327011588215e-05, "loss": 2.3753, "step": 3774500 }, { "epoch": 18.7, "learning_rate": 4.0652034006628917e-05, "loss": 2.3434, "step": 3775000 }, { "epoch": 18.71, "learning_rate": 4.0650795420202833e-05, "loss": 2.3595, "step": 3775500 }, { "epoch": 18.71, "learning_rate": 4.064955683377675e-05, "loss": 2.3349, "step": 3776000 }, { "epoch": 18.71, "learning_rate": 4.064831824735067e-05, "loss": 2.3468, "step": 3776500 }, { "epoch": 18.71, "learning_rate": 4.064707966092458e-05, "loss": 2.35, "step": 3777000 }, { "epoch": 18.72, "learning_rate": 4.0645841074498494e-05, "loss": 2.3771, "step": 3777500 }, { "epoch": 18.72, "learning_rate": 4.064460496524526e-05, "loss": 2.3921, "step": 3778000 }, { "epoch": 18.72, "learning_rate": 4.064336637881918e-05, "loss": 2.3522, "step": 3778500 }, { "epoch": 18.72, "learning_rate": 4.06421277923931e-05, "loss": 2.3284, "step": 3779000 }, { "epoch": 18.72, "learning_rate": 4.0640889205967014e-05, "loss": 2.3193, "step": 3779500 }, { "epoch": 18.73, "learning_rate": 4.063965061954093e-05, "loss": 2.3292, "step": 3780000 }, { "epoch": 18.73, "learning_rate": 4.063841203311485e-05, "loss": 2.3412, "step": 3780500 }, { "epoch": 18.73, "learning_rate": 4.0637173446688765e-05, "loss": 2.3477, "step": 3781000 }, { "epoch": 18.73, "learning_rate": 4.063593486026268e-05, "loss": 2.3427, "step": 3781500 }, { "epoch": 18.74, "learning_rate": 4.063469875100945e-05, "loss": 2.359, "step": 3782000 }, { "epoch": 18.74, "learning_rate": 4.063346264175622e-05, "loss": 2.3708, "step": 3782500 }, { "epoch": 18.74, "learning_rate": 4.0632224055330136e-05, "loss": 2.3456, "step": 3783000 }, { "epoch": 18.74, "learning_rate": 4.063098546890405e-05, "loss": 2.3478, "step": 3783500 }, { "epoch": 18.75, "learning_rate": 4.062974935965082e-05, "loss": 2.3417, "step": 3784000 }, { "epoch": 18.75, "learning_rate": 4.062851077322474e-05, "loss": 2.3459, "step": 3784500 }, { "epoch": 18.75, "learning_rate": 4.0627272186798656e-05, "loss": 2.3443, "step": 3785000 }, { "epoch": 18.75, "learning_rate": 4.062603360037257e-05, "loss": 2.3511, "step": 3785500 }, { "epoch": 18.76, "learning_rate": 4.062479501394649e-05, "loss": 2.3571, "step": 3786000 }, { "epoch": 18.76, "learning_rate": 4.062355890469325e-05, "loss": 2.3711, "step": 3786500 }, { "epoch": 18.76, "learning_rate": 4.062232031826717e-05, "loss": 2.3704, "step": 3787000 }, { "epoch": 18.76, "learning_rate": 4.0621081731841086e-05, "loss": 2.3424, "step": 3787500 }, { "epoch": 18.77, "learning_rate": 4.0619843145415e-05, "loss": 2.3654, "step": 3788000 }, { "epoch": 18.77, "learning_rate": 4.061860455898892e-05, "loss": 2.3397, "step": 3788500 }, { "epoch": 18.77, "learning_rate": 4.0617365972562836e-05, "loss": 2.3416, "step": 3789000 }, { "epoch": 18.77, "learning_rate": 4.061612738613675e-05, "loss": 2.3722, "step": 3789500 }, { "epoch": 18.78, "learning_rate": 4.061488879971067e-05, "loss": 2.3566, "step": 3790000 }, { "epoch": 18.78, "learning_rate": 4.061365269045744e-05, "loss": 2.3691, "step": 3790500 }, { "epoch": 18.78, "learning_rate": 4.0612414104031356e-05, "loss": 2.3407, "step": 3791000 }, { "epoch": 18.78, "learning_rate": 4.061117551760527e-05, "loss": 2.3298, "step": 3791500 }, { "epoch": 18.79, "learning_rate": 4.060993693117919e-05, "loss": 2.3593, "step": 3792000 }, { "epoch": 18.79, "learning_rate": 4.060869834475311e-05, "loss": 2.3477, "step": 3792500 }, { "epoch": 18.79, "learning_rate": 4.0607459758327024e-05, "loss": 2.3582, "step": 3793000 }, { "epoch": 18.79, "learning_rate": 4.0606223649073786e-05, "loss": 2.3396, "step": 3793500 }, { "epoch": 18.8, "learning_rate": 4.06049850626477e-05, "loss": 2.3546, "step": 3794000 }, { "epoch": 18.8, "learning_rate": 4.060374647622162e-05, "loss": 2.3364, "step": 3794500 }, { "epoch": 18.8, "learning_rate": 4.0602507889795536e-05, "loss": 2.3689, "step": 3795000 }, { "epoch": 18.8, "learning_rate": 4.0601269303369453e-05, "loss": 2.3572, "step": 3795500 }, { "epoch": 18.81, "learning_rate": 4.060003071694337e-05, "loss": 2.3567, "step": 3796000 }, { "epoch": 18.81, "learning_rate": 4.059879213051728e-05, "loss": 2.3234, "step": 3796500 }, { "epoch": 18.81, "learning_rate": 4.05975535440912e-05, "loss": 2.3548, "step": 3797000 }, { "epoch": 18.81, "learning_rate": 4.0596314957665114e-05, "loss": 2.3628, "step": 3797500 }, { "epoch": 18.82, "learning_rate": 4.059507884841189e-05, "loss": 2.3432, "step": 3798000 }, { "epoch": 18.82, "learning_rate": 4.059384026198581e-05, "loss": 2.3482, "step": 3798500 }, { "epoch": 18.82, "learning_rate": 4.0592601675559724e-05, "loss": 2.3262, "step": 3799000 }, { "epoch": 18.82, "learning_rate": 4.059136308913364e-05, "loss": 2.354, "step": 3799500 }, { "epoch": 18.83, "learning_rate": 4.05901269798804e-05, "loss": 2.3366, "step": 3800000 }, { "epoch": 18.83, "learning_rate": 4.058889087062717e-05, "loss": 2.3351, "step": 3800500 }, { "epoch": 18.83, "learning_rate": 4.058765228420109e-05, "loss": 2.3698, "step": 3801000 }, { "epoch": 18.83, "learning_rate": 4.0586413697775005e-05, "loss": 2.3652, "step": 3801500 }, { "epoch": 18.84, "learning_rate": 4.058517511134892e-05, "loss": 2.3572, "step": 3802000 }, { "epoch": 18.84, "learning_rate": 4.058393900209569e-05, "loss": 2.3613, "step": 3802500 }, { "epoch": 18.84, "learning_rate": 4.058270041566961e-05, "loss": 2.3577, "step": 3803000 }, { "epoch": 18.84, "learning_rate": 4.0581461829243525e-05, "loss": 2.3717, "step": 3803500 }, { "epoch": 18.85, "learning_rate": 4.058022324281744e-05, "loss": 2.3315, "step": 3804000 }, { "epoch": 18.85, "learning_rate": 4.057898465639136e-05, "loss": 2.375, "step": 3804500 }, { "epoch": 18.85, "learning_rate": 4.057774606996527e-05, "loss": 2.3772, "step": 3805000 }, { "epoch": 18.85, "learning_rate": 4.0576507483539186e-05, "loss": 2.3574, "step": 3805500 }, { "epoch": 18.86, "learning_rate": 4.05752688971131e-05, "loss": 2.3731, "step": 3806000 }, { "epoch": 18.86, "learning_rate": 4.057403031068702e-05, "loss": 2.3567, "step": 3806500 }, { "epoch": 18.86, "learning_rate": 4.057279172426094e-05, "loss": 2.3155, "step": 3807000 }, { "epoch": 18.86, "learning_rate": 4.0571553137834854e-05, "loss": 2.362, "step": 3807500 }, { "epoch": 18.87, "learning_rate": 4.057031455140877e-05, "loss": 2.3639, "step": 3808000 }, { "epoch": 18.87, "learning_rate": 4.056907596498269e-05, "loss": 2.3458, "step": 3808500 }, { "epoch": 18.87, "learning_rate": 4.05678373785566e-05, "loss": 2.3485, "step": 3809000 }, { "epoch": 18.87, "learning_rate": 4.0566598792130515e-05, "loss": 2.3583, "step": 3809500 }, { "epoch": 18.88, "learning_rate": 4.056536516005014e-05, "loss": 2.3601, "step": 3810000 }, { "epoch": 18.88, "learning_rate": 4.056412657362406e-05, "loss": 2.3376, "step": 3810500 }, { "epoch": 18.88, "learning_rate": 4.0562887987197976e-05, "loss": 2.3372, "step": 3811000 }, { "epoch": 18.88, "learning_rate": 4.056165187794474e-05, "loss": 2.3443, "step": 3811500 }, { "epoch": 18.89, "learning_rate": 4.0560413291518655e-05, "loss": 2.349, "step": 3812000 }, { "epoch": 18.89, "learning_rate": 4.055917470509257e-05, "loss": 2.3483, "step": 3812500 }, { "epoch": 18.89, "learning_rate": 4.055793611866649e-05, "loss": 2.3419, "step": 3813000 }, { "epoch": 18.89, "learning_rate": 4.0556697532240406e-05, "loss": 2.3571, "step": 3813500 }, { "epoch": 18.9, "learning_rate": 4.055545894581432e-05, "loss": 2.3455, "step": 3814000 }, { "epoch": 18.9, "learning_rate": 4.055422035938824e-05, "loss": 2.3596, "step": 3814500 }, { "epoch": 18.9, "learning_rate": 4.0552981772962156e-05, "loss": 2.3755, "step": 3815000 }, { "epoch": 18.9, "learning_rate": 4.055174318653607e-05, "loss": 2.3312, "step": 3815500 }, { "epoch": 18.91, "learning_rate": 4.055050460010999e-05, "loss": 2.356, "step": 3816000 }, { "epoch": 18.91, "learning_rate": 4.054926601368391e-05, "loss": 2.3591, "step": 3816500 }, { "epoch": 18.91, "learning_rate": 4.0548027427257824e-05, "loss": 2.3418, "step": 3817000 }, { "epoch": 18.91, "learning_rate": 4.054678884083174e-05, "loss": 2.3178, "step": 3817500 }, { "epoch": 18.92, "learning_rate": 4.054555025440566e-05, "loss": 2.3461, "step": 3818000 }, { "epoch": 18.92, "learning_rate": 4.054431166797957e-05, "loss": 2.3436, "step": 3818500 }, { "epoch": 18.92, "learning_rate": 4.0543073081553485e-05, "loss": 2.3313, "step": 3819000 }, { "epoch": 18.92, "learning_rate": 4.05418344951274e-05, "loss": 2.3429, "step": 3819500 }, { "epoch": 18.93, "learning_rate": 4.054059838587417e-05, "loss": 2.3593, "step": 3820000 }, { "epoch": 18.93, "learning_rate": 4.053936227662094e-05, "loss": 2.3424, "step": 3820500 }, { "epoch": 18.93, "learning_rate": 4.0538123690194857e-05, "loss": 2.3253, "step": 3821000 }, { "epoch": 18.93, "learning_rate": 4.0536885103768774e-05, "loss": 2.3515, "step": 3821500 }, { "epoch": 18.94, "learning_rate": 4.053564651734269e-05, "loss": 2.3587, "step": 3822000 }, { "epoch": 18.94, "learning_rate": 4.053440793091661e-05, "loss": 2.3128, "step": 3822500 }, { "epoch": 18.94, "learning_rate": 4.0533169344490524e-05, "loss": 2.3619, "step": 3823000 }, { "epoch": 18.94, "learning_rate": 4.053193075806444e-05, "loss": 2.3424, "step": 3823500 }, { "epoch": 18.95, "learning_rate": 4.053069217163836e-05, "loss": 2.3527, "step": 3824000 }, { "epoch": 18.95, "learning_rate": 4.0529453585212275e-05, "loss": 2.3656, "step": 3824500 }, { "epoch": 18.95, "learning_rate": 4.0528217475959044e-05, "loss": 2.3551, "step": 3825000 }, { "epoch": 18.95, "learning_rate": 4.0526978889532954e-05, "loss": 2.3346, "step": 3825500 }, { "epoch": 18.96, "learning_rate": 4.052574278027972e-05, "loss": 2.3258, "step": 3826000 }, { "epoch": 18.96, "learning_rate": 4.052450419385364e-05, "loss": 2.3451, "step": 3826500 }, { "epoch": 18.96, "learning_rate": 4.052326560742756e-05, "loss": 2.3521, "step": 3827000 }, { "epoch": 18.96, "learning_rate": 4.0522027021001474e-05, "loss": 2.3527, "step": 3827500 }, { "epoch": 18.97, "learning_rate": 4.052078843457539e-05, "loss": 2.3459, "step": 3828000 }, { "epoch": 18.97, "learning_rate": 4.051954984814931e-05, "loss": 2.3684, "step": 3828500 }, { "epoch": 18.97, "learning_rate": 4.0518311261723224e-05, "loss": 2.3569, "step": 3829000 }, { "epoch": 18.97, "learning_rate": 4.051707267529714e-05, "loss": 2.368, "step": 3829500 }, { "epoch": 18.98, "learning_rate": 4.051583408887106e-05, "loss": 2.3412, "step": 3830000 }, { "epoch": 18.98, "learning_rate": 4.0514595502444975e-05, "loss": 2.3685, "step": 3830500 }, { "epoch": 18.98, "learning_rate": 4.0513356916018885e-05, "loss": 2.3667, "step": 3831000 }, { "epoch": 18.98, "learning_rate": 4.05121183295928e-05, "loss": 2.333, "step": 3831500 }, { "epoch": 18.99, "learning_rate": 4.051088469751242e-05, "loss": 2.3416, "step": 3832000 }, { "epoch": 18.99, "learning_rate": 4.050964611108634e-05, "loss": 2.344, "step": 3832500 }, { "epoch": 18.99, "learning_rate": 4.050840752466026e-05, "loss": 2.3712, "step": 3833000 }, { "epoch": 18.99, "learning_rate": 4.0507168938234174e-05, "loss": 2.3576, "step": 3833500 }, { "epoch": 18.99, "learning_rate": 4.050593282898094e-05, "loss": 2.3196, "step": 3834000 }, { "epoch": 19.0, "learning_rate": 4.050469671972771e-05, "loss": 2.3713, "step": 3834500 }, { "epoch": 19.0, "learning_rate": 4.050345813330163e-05, "loss": 2.3427, "step": 3835000 }, { "epoch": 19.0, "eval_accuracy": 0.6510998864965143, "eval_accuracy_mlm": 0.6054676656033432, "eval_accuracy_nsp": 0.8664373487501912, "eval_loss": 2.3667972087860107, "eval_runtime": 145.68, "eval_samples_per_second": 1750.131, "eval_steps_per_second": 72.927, "step": 3835017 }, { "epoch": 19.0, "learning_rate": 4.050221954687554e-05, "loss": 2.3276, "step": 3835500 }, { "epoch": 19.0, "learning_rate": 4.0500983437622314e-05, "loss": 2.3316, "step": 3836000 }, { "epoch": 19.01, "learning_rate": 4.049974485119623e-05, "loss": 2.3063, "step": 3836500 }, { "epoch": 19.01, "learning_rate": 4.049850626477015e-05, "loss": 2.3161, "step": 3837000 }, { "epoch": 19.01, "learning_rate": 4.0497267678344065e-05, "loss": 2.3319, "step": 3837500 }, { "epoch": 19.01, "learning_rate": 4.0496031569090834e-05, "loss": 2.3158, "step": 3838000 }, { "epoch": 19.02, "learning_rate": 4.049479298266475e-05, "loss": 2.3323, "step": 3838500 }, { "epoch": 19.02, "learning_rate": 4.049355439623867e-05, "loss": 2.3132, "step": 3839000 }, { "epoch": 19.02, "learning_rate": 4.049231580981258e-05, "loss": 2.3162, "step": 3839500 }, { "epoch": 19.02, "learning_rate": 4.0491077223386495e-05, "loss": 2.3272, "step": 3840000 }, { "epoch": 19.03, "learning_rate": 4.048983863696041e-05, "loss": 2.3237, "step": 3840500 }, { "epoch": 19.03, "learning_rate": 4.048860005053433e-05, "loss": 2.3282, "step": 3841000 }, { "epoch": 19.03, "learning_rate": 4.0487361464108245e-05, "loss": 2.3251, "step": 3841500 }, { "epoch": 19.03, "learning_rate": 4.0486122877682156e-05, "loss": 2.3182, "step": 3842000 }, { "epoch": 19.04, "learning_rate": 4.048488429125607e-05, "loss": 2.3366, "step": 3842500 }, { "epoch": 19.04, "learning_rate": 4.048364570482999e-05, "loss": 2.312, "step": 3843000 }, { "epoch": 19.04, "learning_rate": 4.0482407118403906e-05, "loss": 2.3254, "step": 3843500 }, { "epoch": 19.04, "learning_rate": 4.048116853197782e-05, "loss": 2.3187, "step": 3844000 }, { "epoch": 19.05, "learning_rate": 4.047992994555174e-05, "loss": 2.3117, "step": 3844500 }, { "epoch": 19.05, "learning_rate": 4.047869135912566e-05, "loss": 2.3228, "step": 3845000 }, { "epoch": 19.05, "learning_rate": 4.0477452772699574e-05, "loss": 2.301, "step": 3845500 }, { "epoch": 19.05, "learning_rate": 4.047621666344634e-05, "loss": 2.3024, "step": 3846000 }, { "epoch": 19.06, "learning_rate": 4.047497807702026e-05, "loss": 2.3125, "step": 3846500 }, { "epoch": 19.06, "learning_rate": 4.047373949059418e-05, "loss": 2.3355, "step": 3847000 }, { "epoch": 19.06, "learning_rate": 4.0472500904168094e-05, "loss": 2.3184, "step": 3847500 }, { "epoch": 19.06, "learning_rate": 4.047126231774201e-05, "loss": 2.2946, "step": 3848000 }, { "epoch": 19.07, "learning_rate": 4.047002620848878e-05, "loss": 2.3503, "step": 3848500 }, { "epoch": 19.07, "learning_rate": 4.046878762206269e-05, "loss": 2.3138, "step": 3849000 }, { "epoch": 19.07, "learning_rate": 4.0467549035636606e-05, "loss": 2.3092, "step": 3849500 }, { "epoch": 19.07, "learning_rate": 4.046631044921052e-05, "loss": 2.3133, "step": 3850000 }, { "epoch": 19.08, "learning_rate": 4.046507186278444e-05, "loss": 2.337, "step": 3850500 }, { "epoch": 19.08, "learning_rate": 4.046383327635836e-05, "loss": 2.3352, "step": 3851000 }, { "epoch": 19.08, "learning_rate": 4.0462594689932274e-05, "loss": 2.3084, "step": 3851500 }, { "epoch": 19.08, "learning_rate": 4.046135858067904e-05, "loss": 2.3219, "step": 3852000 }, { "epoch": 19.09, "learning_rate": 4.046011999425296e-05, "loss": 2.3261, "step": 3852500 }, { "epoch": 19.09, "learning_rate": 4.045888140782688e-05, "loss": 2.3286, "step": 3853000 }, { "epoch": 19.09, "learning_rate": 4.0457642821400794e-05, "loss": 2.2986, "step": 3853500 }, { "epoch": 19.09, "learning_rate": 4.045640423497471e-05, "loss": 2.3174, "step": 3854000 }, { "epoch": 19.1, "learning_rate": 4.045516564854863e-05, "loss": 2.3252, "step": 3854500 }, { "epoch": 19.1, "learning_rate": 4.0453927062122545e-05, "loss": 2.3386, "step": 3855000 }, { "epoch": 19.1, "learning_rate": 4.0452690952869307e-05, "loss": 2.3323, "step": 3855500 }, { "epoch": 19.1, "learning_rate": 4.0451452366443223e-05, "loss": 2.3168, "step": 3856000 }, { "epoch": 19.11, "learning_rate": 4.045021378001714e-05, "loss": 2.3312, "step": 3856500 }, { "epoch": 19.11, "learning_rate": 4.044897519359106e-05, "loss": 2.3278, "step": 3857000 }, { "epoch": 19.11, "learning_rate": 4.0447736607164974e-05, "loss": 2.3539, "step": 3857500 }, { "epoch": 19.11, "learning_rate": 4.044649802073889e-05, "loss": 2.3532, "step": 3858000 }, { "epoch": 19.12, "learning_rate": 4.044525943431281e-05, "loss": 2.3201, "step": 3858500 }, { "epoch": 19.12, "learning_rate": 4.0444020847886725e-05, "loss": 2.2934, "step": 3859000 }, { "epoch": 19.12, "learning_rate": 4.044278226146064e-05, "loss": 2.3019, "step": 3859500 }, { "epoch": 19.12, "learning_rate": 4.044154367503456e-05, "loss": 2.3039, "step": 3860000 }, { "epoch": 19.13, "learning_rate": 4.0440305088608476e-05, "loss": 2.3172, "step": 3860500 }, { "epoch": 19.13, "learning_rate": 4.0439071456528097e-05, "loss": 2.3581, "step": 3861000 }, { "epoch": 19.13, "learning_rate": 4.0437832870102013e-05, "loss": 2.3188, "step": 3861500 }, { "epoch": 19.13, "learning_rate": 4.043659428367593e-05, "loss": 2.3124, "step": 3862000 }, { "epoch": 19.14, "learning_rate": 4.043535569724984e-05, "loss": 2.3261, "step": 3862500 }, { "epoch": 19.14, "learning_rate": 4.043411958799661e-05, "loss": 2.3299, "step": 3863000 }, { "epoch": 19.14, "learning_rate": 4.0432881001570526e-05, "loss": 2.3548, "step": 3863500 }, { "epoch": 19.14, "learning_rate": 4.043164241514444e-05, "loss": 2.3506, "step": 3864000 }, { "epoch": 19.15, "learning_rate": 4.043040382871836e-05, "loss": 2.3457, "step": 3864500 }, { "epoch": 19.15, "learning_rate": 4.0429167719465136e-05, "loss": 2.3462, "step": 3865000 }, { "epoch": 19.15, "learning_rate": 4.042792913303905e-05, "loss": 2.3082, "step": 3865500 }, { "epoch": 19.15, "learning_rate": 4.042669054661296e-05, "loss": 2.3479, "step": 3866000 }, { "epoch": 19.16, "learning_rate": 4.042545196018688e-05, "loss": 2.3207, "step": 3866500 }, { "epoch": 19.16, "learning_rate": 4.042421585093365e-05, "loss": 2.332, "step": 3867000 }, { "epoch": 19.16, "learning_rate": 4.0422977264507565e-05, "loss": 2.3244, "step": 3867500 }, { "epoch": 19.16, "learning_rate": 4.042173867808148e-05, "loss": 2.3418, "step": 3868000 }, { "epoch": 19.17, "learning_rate": 4.04205000916554e-05, "loss": 2.3337, "step": 3868500 }, { "epoch": 19.17, "learning_rate": 4.041926150522931e-05, "loss": 2.3363, "step": 3869000 }, { "epoch": 19.17, "learning_rate": 4.0418022918803226e-05, "loss": 2.3161, "step": 3869500 }, { "epoch": 19.17, "learning_rate": 4.041678433237714e-05, "loss": 2.3293, "step": 3870000 }, { "epoch": 19.18, "learning_rate": 4.041554574595106e-05, "loss": 2.3562, "step": 3870500 }, { "epoch": 19.18, "learning_rate": 4.041430715952498e-05, "loss": 2.3249, "step": 3871000 }, { "epoch": 19.18, "learning_rate": 4.0413068573098894e-05, "loss": 2.3386, "step": 3871500 }, { "epoch": 19.18, "learning_rate": 4.041182998667281e-05, "loss": 2.3363, "step": 3872000 }, { "epoch": 19.19, "learning_rate": 4.041059140024673e-05, "loss": 2.3261, "step": 3872500 }, { "epoch": 19.19, "learning_rate": 4.04093552909935e-05, "loss": 2.3164, "step": 3873000 }, { "epoch": 19.19, "learning_rate": 4.0408119181740266e-05, "loss": 2.3363, "step": 3873500 }, { "epoch": 19.19, "learning_rate": 4.040688059531418e-05, "loss": 2.3191, "step": 3874000 }, { "epoch": 19.2, "learning_rate": 4.04056420088881e-05, "loss": 2.3325, "step": 3874500 }, { "epoch": 19.2, "learning_rate": 4.0404403422462016e-05, "loss": 2.3294, "step": 3875000 }, { "epoch": 19.2, "learning_rate": 4.0403164836035927e-05, "loss": 2.348, "step": 3875500 }, { "epoch": 19.2, "learning_rate": 4.0401926249609843e-05, "loss": 2.3279, "step": 3876000 }, { "epoch": 19.21, "learning_rate": 4.040068766318376e-05, "loss": 2.3494, "step": 3876500 }, { "epoch": 19.21, "learning_rate": 4.0399451553930536e-05, "loss": 2.3361, "step": 3877000 }, { "epoch": 19.21, "learning_rate": 4.039821296750445e-05, "loss": 2.329, "step": 3877500 }, { "epoch": 19.21, "learning_rate": 4.039697438107837e-05, "loss": 2.3137, "step": 3878000 }, { "epoch": 19.22, "learning_rate": 4.039573579465228e-05, "loss": 2.3535, "step": 3878500 }, { "epoch": 19.22, "learning_rate": 4.03944972082262e-05, "loss": 2.3296, "step": 3879000 }, { "epoch": 19.22, "learning_rate": 4.0393258621800114e-05, "loss": 2.3558, "step": 3879500 }, { "epoch": 19.22, "learning_rate": 4.039202251254688e-05, "loss": 2.3215, "step": 3880000 }, { "epoch": 19.23, "learning_rate": 4.03907839261208e-05, "loss": 2.3023, "step": 3880500 }, { "epoch": 19.23, "learning_rate": 4.038954781686757e-05, "loss": 2.3208, "step": 3881000 }, { "epoch": 19.23, "learning_rate": 4.0388309230441485e-05, "loss": 2.3491, "step": 3881500 }, { "epoch": 19.23, "learning_rate": 4.0387073121188254e-05, "loss": 2.3284, "step": 3882000 }, { "epoch": 19.24, "learning_rate": 4.038583453476217e-05, "loss": 2.3285, "step": 3882500 }, { "epoch": 19.24, "learning_rate": 4.038459594833609e-05, "loss": 2.3254, "step": 3883000 }, { "epoch": 19.24, "learning_rate": 4.038335736191e-05, "loss": 2.3343, "step": 3883500 }, { "epoch": 19.24, "learning_rate": 4.0382118775483915e-05, "loss": 2.3346, "step": 3884000 }, { "epoch": 19.25, "learning_rate": 4.038088018905783e-05, "loss": 2.3331, "step": 3884500 }, { "epoch": 19.25, "learning_rate": 4.037964160263175e-05, "loss": 2.3303, "step": 3885000 }, { "epoch": 19.25, "learning_rate": 4.0378403016205666e-05, "loss": 2.3329, "step": 3885500 }, { "epoch": 19.25, "learning_rate": 4.037716442977958e-05, "loss": 2.3502, "step": 3886000 }, { "epoch": 19.26, "learning_rate": 4.03759258433535e-05, "loss": 2.3128, "step": 3886500 }, { "epoch": 19.26, "learning_rate": 4.0374687256927417e-05, "loss": 2.3384, "step": 3887000 }, { "epoch": 19.26, "learning_rate": 4.0373448670501334e-05, "loss": 2.3581, "step": 3887500 }, { "epoch": 19.26, "learning_rate": 4.0372210084075244e-05, "loss": 2.3329, "step": 3888000 }, { "epoch": 19.26, "learning_rate": 4.037097397482202e-05, "loss": 2.3289, "step": 3888500 }, { "epoch": 19.27, "learning_rate": 4.0369735388395936e-05, "loss": 2.3199, "step": 3889000 }, { "epoch": 19.27, "learning_rate": 4.036849680196985e-05, "loss": 2.3655, "step": 3889500 }, { "epoch": 19.27, "learning_rate": 4.036725821554377e-05, "loss": 2.3308, "step": 3890000 }, { "epoch": 19.27, "learning_rate": 4.036601962911769e-05, "loss": 2.3335, "step": 3890500 }, { "epoch": 19.28, "learning_rate": 4.03647810426916e-05, "loss": 2.3548, "step": 3891000 }, { "epoch": 19.28, "learning_rate": 4.0363542456265514e-05, "loss": 2.3258, "step": 3891500 }, { "epoch": 19.28, "learning_rate": 4.036230386983943e-05, "loss": 2.3499, "step": 3892000 }, { "epoch": 19.28, "learning_rate": 4.036106528341335e-05, "loss": 2.3421, "step": 3892500 }, { "epoch": 19.29, "learning_rate": 4.035982917416012e-05, "loss": 2.3172, "step": 3893000 }, { "epoch": 19.29, "learning_rate": 4.0358593064906886e-05, "loss": 2.3416, "step": 3893500 }, { "epoch": 19.29, "learning_rate": 4.03573544784808e-05, "loss": 2.3245, "step": 3894000 }, { "epoch": 19.29, "learning_rate": 4.035611589205472e-05, "loss": 2.3291, "step": 3894500 }, { "epoch": 19.3, "learning_rate": 4.035487978280149e-05, "loss": 2.3485, "step": 3895000 }, { "epoch": 19.3, "learning_rate": 4.0353641196375405e-05, "loss": 2.3216, "step": 3895500 }, { "epoch": 19.3, "learning_rate": 4.035240260994932e-05, "loss": 2.352, "step": 3896000 }, { "epoch": 19.3, "learning_rate": 4.035116402352324e-05, "loss": 2.3244, "step": 3896500 }, { "epoch": 19.31, "learning_rate": 4.034992543709715e-05, "loss": 2.347, "step": 3897000 }, { "epoch": 19.31, "learning_rate": 4.0348686850671066e-05, "loss": 2.3369, "step": 3897500 }, { "epoch": 19.31, "learning_rate": 4.034744826424498e-05, "loss": 2.31, "step": 3898000 }, { "epoch": 19.31, "learning_rate": 4.03462096778189e-05, "loss": 2.3318, "step": 3898500 }, { "epoch": 19.32, "learning_rate": 4.034497109139282e-05, "loss": 2.3256, "step": 3899000 }, { "epoch": 19.32, "learning_rate": 4.0343732504966734e-05, "loss": 2.3221, "step": 3899500 }, { "epoch": 19.32, "learning_rate": 4.03424963957135e-05, "loss": 2.3195, "step": 3900000 }, { "epoch": 19.32, "learning_rate": 4.034125780928742e-05, "loss": 2.3262, "step": 3900500 }, { "epoch": 19.33, "learning_rate": 4.0340019222861336e-05, "loss": 2.3251, "step": 3901000 }, { "epoch": 19.33, "learning_rate": 4.033878063643525e-05, "loss": 2.3431, "step": 3901500 }, { "epoch": 19.33, "learning_rate": 4.033754452718202e-05, "loss": 2.3286, "step": 3902000 }, { "epoch": 19.33, "learning_rate": 4.033630594075594e-05, "loss": 2.3164, "step": 3902500 }, { "epoch": 19.34, "learning_rate": 4.03350698315027e-05, "loss": 2.3303, "step": 3903000 }, { "epoch": 19.34, "learning_rate": 4.033383124507662e-05, "loss": 2.3216, "step": 3903500 }, { "epoch": 19.34, "learning_rate": 4.033259513582339e-05, "loss": 2.3386, "step": 3904000 }, { "epoch": 19.34, "learning_rate": 4.0331356549397304e-05, "loss": 2.3393, "step": 3904500 }, { "epoch": 19.35, "learning_rate": 4.033011796297122e-05, "loss": 2.342, "step": 3905000 }, { "epoch": 19.35, "learning_rate": 4.032887937654514e-05, "loss": 2.3289, "step": 3905500 }, { "epoch": 19.35, "learning_rate": 4.0327640790119055e-05, "loss": 2.3325, "step": 3906000 }, { "epoch": 19.35, "learning_rate": 4.032640220369297e-05, "loss": 2.3331, "step": 3906500 }, { "epoch": 19.36, "learning_rate": 4.032516361726689e-05, "loss": 2.3308, "step": 3907000 }, { "epoch": 19.36, "learning_rate": 4.0323925030840805e-05, "loss": 2.3238, "step": 3907500 }, { "epoch": 19.36, "learning_rate": 4.032268644441472e-05, "loss": 2.3196, "step": 3908000 }, { "epoch": 19.36, "learning_rate": 4.032144785798864e-05, "loss": 2.3406, "step": 3908500 }, { "epoch": 19.37, "learning_rate": 4.0320209271562556e-05, "loss": 2.3241, "step": 3909000 }, { "epoch": 19.37, "learning_rate": 4.031897068513647e-05, "loss": 2.3429, "step": 3909500 }, { "epoch": 19.37, "learning_rate": 4.031773209871039e-05, "loss": 2.3428, "step": 3910000 }, { "epoch": 19.37, "learning_rate": 4.03164935122843e-05, "loss": 2.3308, "step": 3910500 }, { "epoch": 19.38, "learning_rate": 4.031525492585822e-05, "loss": 2.3214, "step": 3911000 }, { "epoch": 19.38, "learning_rate": 4.0314016339432134e-05, "loss": 2.3418, "step": 3911500 }, { "epoch": 19.38, "learning_rate": 4.031277775300605e-05, "loss": 2.3363, "step": 3912000 }, { "epoch": 19.38, "learning_rate": 4.031153916657997e-05, "loss": 2.3595, "step": 3912500 }, { "epoch": 19.39, "learning_rate": 4.031030058015388e-05, "loss": 2.3229, "step": 3913000 }, { "epoch": 19.39, "learning_rate": 4.0309064470900654e-05, "loss": 2.323, "step": 3913500 }, { "epoch": 19.39, "learning_rate": 4.030782836164742e-05, "loss": 2.3263, "step": 3914000 }, { "epoch": 19.39, "learning_rate": 4.030658977522134e-05, "loss": 2.3366, "step": 3914500 }, { "epoch": 19.4, "learning_rate": 4.0305351188795256e-05, "loss": 2.3512, "step": 3915000 }, { "epoch": 19.4, "learning_rate": 4.030411260236917e-05, "loss": 2.3201, "step": 3915500 }, { "epoch": 19.4, "learning_rate": 4.030287401594309e-05, "loss": 2.338, "step": 3916000 }, { "epoch": 19.4, "learning_rate": 4.030163542951701e-05, "loss": 2.3361, "step": 3916500 }, { "epoch": 19.41, "learning_rate": 4.0300396843090924e-05, "loss": 2.3406, "step": 3917000 }, { "epoch": 19.41, "learning_rate": 4.0299158256664834e-05, "loss": 2.3373, "step": 3917500 }, { "epoch": 19.41, "learning_rate": 4.029791967023875e-05, "loss": 2.3003, "step": 3918000 }, { "epoch": 19.41, "learning_rate": 4.029668108381267e-05, "loss": 2.3411, "step": 3918500 }, { "epoch": 19.42, "learning_rate": 4.029544497455944e-05, "loss": 2.3372, "step": 3919000 }, { "epoch": 19.42, "learning_rate": 4.0294206388133354e-05, "loss": 2.3438, "step": 3919500 }, { "epoch": 19.42, "learning_rate": 4.029296780170727e-05, "loss": 2.3411, "step": 3920000 }, { "epoch": 19.42, "learning_rate": 4.029172921528119e-05, "loss": 2.3428, "step": 3920500 }, { "epoch": 19.43, "learning_rate": 4.0290493106027956e-05, "loss": 2.3244, "step": 3921000 }, { "epoch": 19.43, "learning_rate": 4.028925451960187e-05, "loss": 2.339, "step": 3921500 }, { "epoch": 19.43, "learning_rate": 4.028801593317579e-05, "loss": 2.3394, "step": 3922000 }, { "epoch": 19.43, "learning_rate": 4.028677734674971e-05, "loss": 2.3725, "step": 3922500 }, { "epoch": 19.44, "learning_rate": 4.028554123749647e-05, "loss": 2.3452, "step": 3923000 }, { "epoch": 19.44, "learning_rate": 4.0284302651070386e-05, "loss": 2.307, "step": 3923500 }, { "epoch": 19.44, "learning_rate": 4.0283066541817155e-05, "loss": 2.3612, "step": 3924000 }, { "epoch": 19.44, "learning_rate": 4.028182795539107e-05, "loss": 2.3514, "step": 3924500 }, { "epoch": 19.45, "learning_rate": 4.028058936896499e-05, "loss": 2.3375, "step": 3925000 }, { "epoch": 19.45, "learning_rate": 4.0279350782538906e-05, "loss": 2.342, "step": 3925500 }, { "epoch": 19.45, "learning_rate": 4.027811219611282e-05, "loss": 2.3437, "step": 3926000 }, { "epoch": 19.45, "learning_rate": 4.027687608685959e-05, "loss": 2.3342, "step": 3926500 }, { "epoch": 19.46, "learning_rate": 4.027563750043351e-05, "loss": 2.3393, "step": 3927000 }, { "epoch": 19.46, "learning_rate": 4.027439891400742e-05, "loss": 2.3505, "step": 3927500 }, { "epoch": 19.46, "learning_rate": 4.0273160327581335e-05, "loss": 2.3358, "step": 3928000 }, { "epoch": 19.46, "learning_rate": 4.027192174115525e-05, "loss": 2.3349, "step": 3928500 }, { "epoch": 19.47, "learning_rate": 4.027068315472917e-05, "loss": 2.3537, "step": 3929000 }, { "epoch": 19.47, "learning_rate": 4.026944704547594e-05, "loss": 2.323, "step": 3929500 }, { "epoch": 19.47, "learning_rate": 4.0268208459049855e-05, "loss": 2.3303, "step": 3930000 }, { "epoch": 19.47, "learning_rate": 4.026696987262377e-05, "loss": 2.3301, "step": 3930500 }, { "epoch": 19.48, "learning_rate": 4.026573128619769e-05, "loss": 2.3492, "step": 3931000 }, { "epoch": 19.48, "learning_rate": 4.0264492699771606e-05, "loss": 2.3522, "step": 3931500 }, { "epoch": 19.48, "learning_rate": 4.026325411334552e-05, "loss": 2.3272, "step": 3932000 }, { "epoch": 19.48, "learning_rate": 4.026201552691944e-05, "loss": 2.3068, "step": 3932500 }, { "epoch": 19.49, "learning_rate": 4.026077694049336e-05, "loss": 2.3614, "step": 3933000 }, { "epoch": 19.49, "learning_rate": 4.0259540831240125e-05, "loss": 2.3525, "step": 3933500 }, { "epoch": 19.49, "learning_rate": 4.0258304721986894e-05, "loss": 2.3418, "step": 3934000 }, { "epoch": 19.49, "learning_rate": 4.025706613556081e-05, "loss": 2.3367, "step": 3934500 }, { "epoch": 19.5, "learning_rate": 4.025582754913473e-05, "loss": 2.3377, "step": 3935000 }, { "epoch": 19.5, "learning_rate": 4.025458896270864e-05, "loss": 2.3424, "step": 3935500 }, { "epoch": 19.5, "learning_rate": 4.0253350376282555e-05, "loss": 2.3364, "step": 3936000 }, { "epoch": 19.5, "learning_rate": 4.025211178985647e-05, "loss": 2.3356, "step": 3936500 }, { "epoch": 19.51, "learning_rate": 4.025087320343039e-05, "loss": 2.3515, "step": 3937000 }, { "epoch": 19.51, "learning_rate": 4.0249637094177165e-05, "loss": 2.3304, "step": 3937500 }, { "epoch": 19.51, "learning_rate": 4.024839850775108e-05, "loss": 2.3372, "step": 3938000 }, { "epoch": 19.51, "learning_rate": 4.024715992132499e-05, "loss": 2.3153, "step": 3938500 }, { "epoch": 19.52, "learning_rate": 4.024592133489891e-05, "loss": 2.3227, "step": 3939000 }, { "epoch": 19.52, "learning_rate": 4.024468522564568e-05, "loss": 2.3313, "step": 3939500 }, { "epoch": 19.52, "learning_rate": 4.0243446639219594e-05, "loss": 2.3482, "step": 3940000 }, { "epoch": 19.52, "learning_rate": 4.024221052996636e-05, "loss": 2.305, "step": 3940500 }, { "epoch": 19.53, "learning_rate": 4.024097194354028e-05, "loss": 2.3352, "step": 3941000 }, { "epoch": 19.53, "learning_rate": 4.02397333571142e-05, "loss": 2.3291, "step": 3941500 }, { "epoch": 19.53, "learning_rate": 4.0238494770688114e-05, "loss": 2.333, "step": 3942000 }, { "epoch": 19.53, "learning_rate": 4.0237258661434876e-05, "loss": 2.3466, "step": 3942500 }, { "epoch": 19.53, "learning_rate": 4.023602007500879e-05, "loss": 2.3585, "step": 3943000 }, { "epoch": 19.54, "learning_rate": 4.023478148858271e-05, "loss": 2.3453, "step": 3943500 }, { "epoch": 19.54, "learning_rate": 4.023354290215663e-05, "loss": 2.3602, "step": 3944000 }, { "epoch": 19.54, "learning_rate": 4.0232304315730544e-05, "loss": 2.3127, "step": 3944500 }, { "epoch": 19.54, "learning_rate": 4.023106572930446e-05, "loss": 2.331, "step": 3945000 }, { "epoch": 19.55, "learning_rate": 4.022982714287838e-05, "loss": 2.3243, "step": 3945500 }, { "epoch": 19.55, "learning_rate": 4.0228588556452295e-05, "loss": 2.3414, "step": 3946000 }, { "epoch": 19.55, "learning_rate": 4.022734997002621e-05, "loss": 2.3321, "step": 3946500 }, { "epoch": 19.55, "learning_rate": 4.022611138360013e-05, "loss": 2.3295, "step": 3947000 }, { "epoch": 19.56, "learning_rate": 4.0224872797174045e-05, "loss": 2.3272, "step": 3947500 }, { "epoch": 19.56, "learning_rate": 4.0223634210747955e-05, "loss": 2.3229, "step": 3948000 }, { "epoch": 19.56, "learning_rate": 4.022239810149473e-05, "loss": 2.3459, "step": 3948500 }, { "epoch": 19.56, "learning_rate": 4.022115951506865e-05, "loss": 2.3256, "step": 3949000 }, { "epoch": 19.57, "learning_rate": 4.0219920928642565e-05, "loss": 2.3478, "step": 3949500 }, { "epoch": 19.57, "learning_rate": 4.021868234221648e-05, "loss": 2.3261, "step": 3950000 }, { "epoch": 19.57, "learning_rate": 4.02174437557904e-05, "loss": 2.3297, "step": 3950500 }, { "epoch": 19.57, "learning_rate": 4.021620764653716e-05, "loss": 2.326, "step": 3951000 }, { "epoch": 19.58, "learning_rate": 4.021496906011108e-05, "loss": 2.33, "step": 3951500 }, { "epoch": 19.58, "learning_rate": 4.0213730473684995e-05, "loss": 2.3451, "step": 3952000 }, { "epoch": 19.58, "learning_rate": 4.021249188725891e-05, "loss": 2.3306, "step": 3952500 }, { "epoch": 19.58, "learning_rate": 4.021125330083283e-05, "loss": 2.3403, "step": 3953000 }, { "epoch": 19.59, "learning_rate": 4.0210014714406745e-05, "loss": 2.3302, "step": 3953500 }, { "epoch": 19.59, "learning_rate": 4.0208776127980656e-05, "loss": 2.3446, "step": 3954000 }, { "epoch": 19.59, "learning_rate": 4.020754001872743e-05, "loss": 2.3467, "step": 3954500 }, { "epoch": 19.59, "learning_rate": 4.020630143230135e-05, "loss": 2.3475, "step": 3955000 }, { "epoch": 19.6, "learning_rate": 4.0205062845875265e-05, "loss": 2.3296, "step": 3955500 }, { "epoch": 19.6, "learning_rate": 4.020382425944918e-05, "loss": 2.3456, "step": 3956000 }, { "epoch": 19.6, "learning_rate": 4.02025856730231e-05, "loss": 2.3194, "step": 3956500 }, { "epoch": 19.6, "learning_rate": 4.020134956376986e-05, "loss": 2.3377, "step": 3957000 }, { "epoch": 19.61, "learning_rate": 4.020011097734378e-05, "loss": 2.3476, "step": 3957500 }, { "epoch": 19.61, "learning_rate": 4.0198872390917695e-05, "loss": 2.3331, "step": 3958000 }, { "epoch": 19.61, "learning_rate": 4.0197636281664464e-05, "loss": 2.3387, "step": 3958500 }, { "epoch": 19.61, "learning_rate": 4.019639769523838e-05, "loss": 2.3505, "step": 3959000 }, { "epoch": 19.62, "learning_rate": 4.01951591088123e-05, "loss": 2.3324, "step": 3959500 }, { "epoch": 19.62, "learning_rate": 4.0193920522386214e-05, "loss": 2.32, "step": 3960000 }, { "epoch": 19.62, "learning_rate": 4.019268193596013e-05, "loss": 2.3419, "step": 3960500 }, { "epoch": 19.62, "learning_rate": 4.019144334953405e-05, "loss": 2.3614, "step": 3961000 }, { "epoch": 19.63, "learning_rate": 4.0190204763107965e-05, "loss": 2.338, "step": 3961500 }, { "epoch": 19.63, "learning_rate": 4.018896865385473e-05, "loss": 2.3367, "step": 3962000 }, { "epoch": 19.63, "learning_rate": 4.0187730067428644e-05, "loss": 2.3342, "step": 3962500 }, { "epoch": 19.63, "learning_rate": 4.018649148100256e-05, "loss": 2.3435, "step": 3963000 }, { "epoch": 19.64, "learning_rate": 4.018525537174933e-05, "loss": 2.3708, "step": 3963500 }, { "epoch": 19.64, "learning_rate": 4.018401678532325e-05, "loss": 2.3477, "step": 3964000 }, { "epoch": 19.64, "learning_rate": 4.0182780676070016e-05, "loss": 2.3401, "step": 3964500 }, { "epoch": 19.64, "learning_rate": 4.018154208964393e-05, "loss": 2.2965, "step": 3965000 }, { "epoch": 19.65, "learning_rate": 4.018030350321785e-05, "loss": 2.3713, "step": 3965500 }, { "epoch": 19.65, "learning_rate": 4.0179064916791766e-05, "loss": 2.3456, "step": 3966000 }, { "epoch": 19.65, "learning_rate": 4.017782633036568e-05, "loss": 2.3476, "step": 3966500 }, { "epoch": 19.65, "learning_rate": 4.01765877439396e-05, "loss": 2.3343, "step": 3967000 }, { "epoch": 19.66, "learning_rate": 4.017534915751352e-05, "loss": 2.3137, "step": 3967500 }, { "epoch": 19.66, "learning_rate": 4.0174110571087434e-05, "loss": 2.3458, "step": 3968000 }, { "epoch": 19.66, "learning_rate": 4.017287198466135e-05, "loss": 2.3405, "step": 3968500 }, { "epoch": 19.66, "learning_rate": 4.017163339823526e-05, "loss": 2.3362, "step": 3969000 }, { "epoch": 19.67, "learning_rate": 4.017039481180918e-05, "loss": 2.3175, "step": 3969500 }, { "epoch": 19.67, "learning_rate": 4.0169156225383095e-05, "loss": 2.3659, "step": 3970000 }, { "epoch": 19.67, "learning_rate": 4.016791763895701e-05, "loss": 2.3572, "step": 3970500 }, { "epoch": 19.67, "learning_rate": 4.016668400687663e-05, "loss": 2.3597, "step": 3971000 }, { "epoch": 19.68, "learning_rate": 4.016544542045055e-05, "loss": 2.3242, "step": 3971500 }, { "epoch": 19.68, "learning_rate": 4.0164206834024466e-05, "loss": 2.3366, "step": 3972000 }, { "epoch": 19.68, "learning_rate": 4.0162968247598383e-05, "loss": 2.3619, "step": 3972500 }, { "epoch": 19.68, "learning_rate": 4.01617296611723e-05, "loss": 2.3342, "step": 3973000 }, { "epoch": 19.69, "learning_rate": 4.016049107474622e-05, "loss": 2.3261, "step": 3973500 }, { "epoch": 19.69, "learning_rate": 4.0159252488320134e-05, "loss": 2.3447, "step": 3974000 }, { "epoch": 19.69, "learning_rate": 4.015801390189405e-05, "loss": 2.3384, "step": 3974500 }, { "epoch": 19.69, "learning_rate": 4.015677531546797e-05, "loss": 2.3568, "step": 3975000 }, { "epoch": 19.7, "learning_rate": 4.015553672904188e-05, "loss": 2.3522, "step": 3975500 }, { "epoch": 19.7, "learning_rate": 4.0154298142615795e-05, "loss": 2.3391, "step": 3976000 }, { "epoch": 19.7, "learning_rate": 4.015305955618971e-05, "loss": 2.3405, "step": 3976500 }, { "epoch": 19.7, "learning_rate": 4.015182096976363e-05, "loss": 2.3432, "step": 3977000 }, { "epoch": 19.71, "learning_rate": 4.0150582383337546e-05, "loss": 2.3234, "step": 3977500 }, { "epoch": 19.71, "learning_rate": 4.0149346274084315e-05, "loss": 2.3282, "step": 3978000 }, { "epoch": 19.71, "learning_rate": 4.014810768765823e-05, "loss": 2.3459, "step": 3978500 }, { "epoch": 19.71, "learning_rate": 4.014686910123215e-05, "loss": 2.3324, "step": 3979000 }, { "epoch": 19.72, "learning_rate": 4.0145630514806066e-05, "loss": 2.3331, "step": 3979500 }, { "epoch": 19.72, "learning_rate": 4.014439192837998e-05, "loss": 2.3306, "step": 3980000 }, { "epoch": 19.72, "learning_rate": 4.014315581912675e-05, "loss": 2.3346, "step": 3980500 }, { "epoch": 19.72, "learning_rate": 4.014191723270067e-05, "loss": 2.3382, "step": 3981000 }, { "epoch": 19.73, "learning_rate": 4.0140678646274585e-05, "loss": 2.3328, "step": 3981500 }, { "epoch": 19.73, "learning_rate": 4.01394400598485e-05, "loss": 2.3343, "step": 3982000 }, { "epoch": 19.73, "learning_rate": 4.013820147342241e-05, "loss": 2.3313, "step": 3982500 }, { "epoch": 19.73, "learning_rate": 4.013696288699633e-05, "loss": 2.3678, "step": 3983000 }, { "epoch": 19.74, "learning_rate": 4.0135724300570246e-05, "loss": 2.3607, "step": 3983500 }, { "epoch": 19.74, "learning_rate": 4.013448571414416e-05, "loss": 2.3278, "step": 3984000 }, { "epoch": 19.74, "learning_rate": 4.013324712771808e-05, "loss": 2.3487, "step": 3984500 }, { "epoch": 19.74, "learning_rate": 4.0132008541292e-05, "loss": 2.3366, "step": 3985000 }, { "epoch": 19.75, "learning_rate": 4.013076995486591e-05, "loss": 2.364, "step": 3985500 }, { "epoch": 19.75, "learning_rate": 4.012953384561268e-05, "loss": 2.3186, "step": 3986000 }, { "epoch": 19.75, "learning_rate": 4.012829773635945e-05, "loss": 2.3758, "step": 3986500 }, { "epoch": 19.75, "learning_rate": 4.012705914993337e-05, "loss": 2.3873, "step": 3987000 }, { "epoch": 19.76, "learning_rate": 4.0125820563507285e-05, "loss": 2.3456, "step": 3987500 }, { "epoch": 19.76, "learning_rate": 4.01245819770812e-05, "loss": 2.342, "step": 3988000 }, { "epoch": 19.76, "learning_rate": 4.012334339065512e-05, "loss": 2.3393, "step": 3988500 }, { "epoch": 19.76, "learning_rate": 4.012210480422903e-05, "loss": 2.3263, "step": 3989000 }, { "epoch": 19.77, "learning_rate": 4.0120866217802946e-05, "loss": 2.3438, "step": 3989500 }, { "epoch": 19.77, "learning_rate": 4.011962763137686e-05, "loss": 2.348, "step": 3990000 }, { "epoch": 19.77, "learning_rate": 4.011838904495078e-05, "loss": 2.3395, "step": 3990500 }, { "epoch": 19.77, "learning_rate": 4.011715293569755e-05, "loss": 2.3636, "step": 3991000 }, { "epoch": 19.78, "learning_rate": 4.0115914349271466e-05, "loss": 2.3862, "step": 3991500 }, { "epoch": 19.78, "learning_rate": 4.0114678240018235e-05, "loss": 2.3051, "step": 3992000 }, { "epoch": 19.78, "learning_rate": 4.011343965359215e-05, "loss": 2.3611, "step": 3992500 }, { "epoch": 19.78, "learning_rate": 4.011220106716607e-05, "loss": 2.3512, "step": 3993000 }, { "epoch": 19.79, "learning_rate": 4.0110962480739985e-05, "loss": 2.3359, "step": 3993500 }, { "epoch": 19.79, "learning_rate": 4.01097238943139e-05, "loss": 2.3261, "step": 3994000 }, { "epoch": 19.79, "learning_rate": 4.010848530788782e-05, "loss": 2.369, "step": 3994500 }, { "epoch": 19.79, "learning_rate": 4.010724919863458e-05, "loss": 2.3553, "step": 3995000 }, { "epoch": 19.8, "learning_rate": 4.01060106122085e-05, "loss": 2.3294, "step": 3995500 }, { "epoch": 19.8, "learning_rate": 4.0104772025782415e-05, "loss": 2.3247, "step": 3996000 }, { "epoch": 19.8, "learning_rate": 4.010353343935633e-05, "loss": 2.3215, "step": 3996500 }, { "epoch": 19.8, "learning_rate": 4.010229485293025e-05, "loss": 2.3778, "step": 3997000 }, { "epoch": 19.8, "learning_rate": 4.0101056266504166e-05, "loss": 2.3558, "step": 3997500 }, { "epoch": 19.81, "learning_rate": 4.009981768007808e-05, "loss": 2.3269, "step": 3998000 }, { "epoch": 19.81, "learning_rate": 4.0098579093652e-05, "loss": 2.3752, "step": 3998500 }, { "epoch": 19.81, "learning_rate": 4.009734050722592e-05, "loss": 2.3359, "step": 3999000 }, { "epoch": 19.81, "learning_rate": 4.0096101920799834e-05, "loss": 2.3454, "step": 3999500 }, { "epoch": 19.82, "learning_rate": 4.009486333437375e-05, "loss": 2.3231, "step": 4000000 }, { "epoch": 19.82, "learning_rate": 4.009362474794767e-05, "loss": 2.3238, "step": 4000500 }, { "epoch": 19.82, "learning_rate": 4.009238616152158e-05, "loss": 2.3168, "step": 4001000 }, { "epoch": 19.82, "learning_rate": 4.00911525294412e-05, "loss": 2.3225, "step": 4001500 }, { "epoch": 19.83, "learning_rate": 4.0089913943015115e-05, "loss": 2.3095, "step": 4002000 }, { "epoch": 19.83, "learning_rate": 4.008867535658903e-05, "loss": 2.3578, "step": 4002500 }, { "epoch": 19.83, "learning_rate": 4.008743677016295e-05, "loss": 2.3386, "step": 4003000 }, { "epoch": 19.83, "learning_rate": 4.0086198183736866e-05, "loss": 2.3406, "step": 4003500 }, { "epoch": 19.84, "learning_rate": 4.008496455165649e-05, "loss": 2.3178, "step": 4004000 }, { "epoch": 19.84, "learning_rate": 4.0083725965230404e-05, "loss": 2.3481, "step": 4004500 }, { "epoch": 19.84, "learning_rate": 4.008248737880432e-05, "loss": 2.3606, "step": 4005000 }, { "epoch": 19.84, "learning_rate": 4.008124879237824e-05, "loss": 2.3519, "step": 4005500 }, { "epoch": 19.85, "learning_rate": 4.008001020595215e-05, "loss": 2.3332, "step": 4006000 }, { "epoch": 19.85, "learning_rate": 4.0078771619526065e-05, "loss": 2.3432, "step": 4006500 }, { "epoch": 19.85, "learning_rate": 4.007753551027284e-05, "loss": 2.3641, "step": 4007000 }, { "epoch": 19.85, "learning_rate": 4.007629692384676e-05, "loss": 2.3344, "step": 4007500 }, { "epoch": 19.86, "learning_rate": 4.007505833742067e-05, "loss": 2.3358, "step": 4008000 }, { "epoch": 19.86, "learning_rate": 4.0073819750994584e-05, "loss": 2.3221, "step": 4008500 }, { "epoch": 19.86, "learning_rate": 4.00725811645685e-05, "loss": 2.3158, "step": 4009000 }, { "epoch": 19.86, "learning_rate": 4.007134505531528e-05, "loss": 2.3348, "step": 4009500 }, { "epoch": 19.87, "learning_rate": 4.0070106468889194e-05, "loss": 2.3408, "step": 4010000 }, { "epoch": 19.87, "learning_rate": 4.0068870359635956e-05, "loss": 2.339, "step": 4010500 }, { "epoch": 19.87, "learning_rate": 4.0067634250382724e-05, "loss": 2.3199, "step": 4011000 }, { "epoch": 19.87, "learning_rate": 4.006639566395664e-05, "loss": 2.3297, "step": 4011500 }, { "epoch": 19.88, "learning_rate": 4.006515707753056e-05, "loss": 2.3532, "step": 4012000 }, { "epoch": 19.88, "learning_rate": 4.0063918491104475e-05, "loss": 2.3398, "step": 4012500 }, { "epoch": 19.88, "learning_rate": 4.006267990467839e-05, "loss": 2.328, "step": 4013000 }, { "epoch": 19.88, "learning_rate": 4.006144131825231e-05, "loss": 2.3538, "step": 4013500 }, { "epoch": 19.89, "learning_rate": 4.0060202731826226e-05, "loss": 2.3285, "step": 4014000 }, { "epoch": 19.89, "learning_rate": 4.005896414540014e-05, "loss": 2.3619, "step": 4014500 }, { "epoch": 19.89, "learning_rate": 4.005772555897406e-05, "loss": 2.3521, "step": 4015000 }, { "epoch": 19.89, "learning_rate": 4.005648697254798e-05, "loss": 2.3502, "step": 4015500 }, { "epoch": 19.9, "learning_rate": 4.0055248386121894e-05, "loss": 2.3306, "step": 4016000 }, { "epoch": 19.9, "learning_rate": 4.0054012276868656e-05, "loss": 2.3506, "step": 4016500 }, { "epoch": 19.9, "learning_rate": 4.005277369044257e-05, "loss": 2.3259, "step": 4017000 }, { "epoch": 19.9, "learning_rate": 4.005153758118934e-05, "loss": 2.3518, "step": 4017500 }, { "epoch": 19.91, "learning_rate": 4.005029899476326e-05, "loss": 2.3261, "step": 4018000 }, { "epoch": 19.91, "learning_rate": 4.0049060408337175e-05, "loss": 2.3528, "step": 4018500 }, { "epoch": 19.91, "learning_rate": 4.004782182191109e-05, "loss": 2.3506, "step": 4019000 }, { "epoch": 19.91, "learning_rate": 4.004658323548501e-05, "loss": 2.3426, "step": 4019500 }, { "epoch": 19.92, "learning_rate": 4.0045344649058926e-05, "loss": 2.3404, "step": 4020000 }, { "epoch": 19.92, "learning_rate": 4.004410606263284e-05, "loss": 2.373, "step": 4020500 }, { "epoch": 19.92, "learning_rate": 4.004286747620676e-05, "loss": 2.3435, "step": 4021000 }, { "epoch": 19.92, "learning_rate": 4.004163136695352e-05, "loss": 2.329, "step": 4021500 }, { "epoch": 19.93, "learning_rate": 4.004039278052744e-05, "loss": 2.336, "step": 4022000 }, { "epoch": 19.93, "learning_rate": 4.0039154194101356e-05, "loss": 2.3432, "step": 4022500 }, { "epoch": 19.93, "learning_rate": 4.003791560767527e-05, "loss": 2.3376, "step": 4023000 }, { "epoch": 19.93, "learning_rate": 4.003667702124919e-05, "loss": 2.3447, "step": 4023500 }, { "epoch": 19.94, "learning_rate": 4.003543843482311e-05, "loss": 2.3564, "step": 4024000 }, { "epoch": 19.94, "learning_rate": 4.0034202325569875e-05, "loss": 2.3494, "step": 4024500 }, { "epoch": 19.94, "learning_rate": 4.003296373914379e-05, "loss": 2.3439, "step": 4025000 }, { "epoch": 19.94, "learning_rate": 4.003172515271771e-05, "loss": 2.3499, "step": 4025500 }, { "epoch": 19.95, "learning_rate": 4.003048904346448e-05, "loss": 2.311, "step": 4026000 }, { "epoch": 19.95, "learning_rate": 4.0029250457038395e-05, "loss": 2.3349, "step": 4026500 }, { "epoch": 19.95, "learning_rate": 4.0028011870612305e-05, "loss": 2.3582, "step": 4027000 }, { "epoch": 19.95, "learning_rate": 4.002677328418622e-05, "loss": 2.3338, "step": 4027500 }, { "epoch": 19.96, "learning_rate": 4.002553469776014e-05, "loss": 2.3483, "step": 4028000 }, { "epoch": 19.96, "learning_rate": 4.0024296111334056e-05, "loss": 2.3628, "step": 4028500 }, { "epoch": 19.96, "learning_rate": 4.002305752490797e-05, "loss": 2.3416, "step": 4029000 }, { "epoch": 19.96, "learning_rate": 4.002182141565474e-05, "loss": 2.3288, "step": 4029500 }, { "epoch": 19.97, "learning_rate": 4.002058282922866e-05, "loss": 2.3303, "step": 4030000 }, { "epoch": 19.97, "learning_rate": 4.0019344242802576e-05, "loss": 2.3498, "step": 4030500 }, { "epoch": 19.97, "learning_rate": 4.001810565637649e-05, "loss": 2.3482, "step": 4031000 }, { "epoch": 19.97, "learning_rate": 4.001686706995041e-05, "loss": 2.3286, "step": 4031500 }, { "epoch": 19.98, "learning_rate": 4.0015628483524326e-05, "loss": 2.3441, "step": 4032000 }, { "epoch": 19.98, "learning_rate": 4.001438989709824e-05, "loss": 2.3522, "step": 4032500 }, { "epoch": 19.98, "learning_rate": 4.001315131067216e-05, "loss": 2.3592, "step": 4033000 }, { "epoch": 19.98, "learning_rate": 4.001191272424608e-05, "loss": 2.3551, "step": 4033500 }, { "epoch": 19.99, "learning_rate": 4.0010674137819994e-05, "loss": 2.3237, "step": 4034000 }, { "epoch": 19.99, "learning_rate": 4.000943555139391e-05, "loss": 2.3385, "step": 4034500 }, { "epoch": 19.99, "learning_rate": 4.000819696496783e-05, "loss": 2.3485, "step": 4035000 }, { "epoch": 19.99, "learning_rate": 4.0006958378541745e-05, "loss": 2.3399, "step": 4035500 }, { "epoch": 20.0, "learning_rate": 4.000572226928851e-05, "loss": 2.3312, "step": 4036000 }, { "epoch": 20.0, "learning_rate": 4.0004483682862424e-05, "loss": 2.3215, "step": 4036500 }, { "epoch": 20.0, "eval_accuracy": 0.6514623229212978, "eval_accuracy_mlm": 0.6059239055867153, "eval_accuracy_nsp": 0.8662569275844352, "eval_loss": 2.360621213912964, "eval_runtime": 145.9157, "eval_samples_per_second": 1747.304, "eval_steps_per_second": 72.809, "step": 4036860 }, { "epoch": 20.0, "learning_rate": 4.000324509643634e-05, "loss": 2.3361, "step": 4037000 }, { "epoch": 20.0, "learning_rate": 4.000200651001026e-05, "loss": 2.331, "step": 4037500 }, { "epoch": 20.01, "learning_rate": 4.0000767923584175e-05, "loss": 2.3296, "step": 4038000 }, { "epoch": 20.01, "learning_rate": 3.999952933715809e-05, "loss": 2.324, "step": 4038500 }, { "epoch": 20.01, "learning_rate": 3.999829075073201e-05, "loss": 2.274, "step": 4039000 }, { "epoch": 20.01, "learning_rate": 3.999705216430592e-05, "loss": 2.3324, "step": 4039500 }, { "epoch": 20.02, "learning_rate": 3.9995813577879836e-05, "loss": 2.3256, "step": 4040000 }, { "epoch": 20.02, "learning_rate": 3.999457499145375e-05, "loss": 2.2992, "step": 4040500 }, { "epoch": 20.02, "learning_rate": 3.999333640502767e-05, "loss": 2.2902, "step": 4041000 }, { "epoch": 20.02, "learning_rate": 3.9992097818601586e-05, "loss": 2.2879, "step": 4041500 }, { "epoch": 20.03, "learning_rate": 3.999086170934836e-05, "loss": 2.3138, "step": 4042000 }, { "epoch": 20.03, "learning_rate": 3.998962312292227e-05, "loss": 2.3257, "step": 4042500 }, { "epoch": 20.03, "learning_rate": 3.998838453649619e-05, "loss": 2.3098, "step": 4043000 }, { "epoch": 20.03, "learning_rate": 3.9987145950070106e-05, "loss": 2.2984, "step": 4043500 }, { "epoch": 20.04, "learning_rate": 3.998590736364402e-05, "loss": 2.2918, "step": 4044000 }, { "epoch": 20.04, "learning_rate": 3.998466877721794e-05, "loss": 2.2826, "step": 4044500 }, { "epoch": 20.04, "learning_rate": 3.998343019079186e-05, "loss": 2.3165, "step": 4045000 }, { "epoch": 20.04, "learning_rate": 3.998219408153862e-05, "loss": 2.3079, "step": 4045500 }, { "epoch": 20.05, "learning_rate": 3.9980955495112536e-05, "loss": 2.3265, "step": 4046000 }, { "epoch": 20.05, "learning_rate": 3.997971690868645e-05, "loss": 2.3006, "step": 4046500 }, { "epoch": 20.05, "learning_rate": 3.997848079943323e-05, "loss": 2.3074, "step": 4047000 }, { "epoch": 20.05, "learning_rate": 3.9977242213007145e-05, "loss": 2.3117, "step": 4047500 }, { "epoch": 20.06, "learning_rate": 3.997600362658106e-05, "loss": 2.3119, "step": 4048000 }, { "epoch": 20.06, "learning_rate": 3.997476504015497e-05, "loss": 2.3299, "step": 4048500 }, { "epoch": 20.06, "learning_rate": 3.997352645372889e-05, "loss": 2.3168, "step": 4049000 }, { "epoch": 20.06, "learning_rate": 3.9972287867302806e-05, "loss": 2.3114, "step": 4049500 }, { "epoch": 20.07, "learning_rate": 3.997104928087672e-05, "loss": 2.3004, "step": 4050000 }, { "epoch": 20.07, "learning_rate": 3.996981069445064e-05, "loss": 2.3204, "step": 4050500 }, { "epoch": 20.07, "learning_rate": 3.996857458519741e-05, "loss": 2.3282, "step": 4051000 }, { "epoch": 20.07, "learning_rate": 3.9967335998771326e-05, "loss": 2.3293, "step": 4051500 }, { "epoch": 20.08, "learning_rate": 3.9966097412345236e-05, "loss": 2.3379, "step": 4052000 }, { "epoch": 20.08, "learning_rate": 3.996485882591915e-05, "loss": 2.3334, "step": 4052500 }, { "epoch": 20.08, "learning_rate": 3.996362271666593e-05, "loss": 2.3042, "step": 4053000 }, { "epoch": 20.08, "learning_rate": 3.9962384130239845e-05, "loss": 2.3357, "step": 4053500 }, { "epoch": 20.08, "learning_rate": 3.9961148020986614e-05, "loss": 2.3007, "step": 4054000 }, { "epoch": 20.09, "learning_rate": 3.9959909434560524e-05, "loss": 2.3134, "step": 4054500 }, { "epoch": 20.09, "learning_rate": 3.995867084813444e-05, "loss": 2.3141, "step": 4055000 }, { "epoch": 20.09, "learning_rate": 3.995743226170836e-05, "loss": 2.3416, "step": 4055500 }, { "epoch": 20.09, "learning_rate": 3.9956193675282275e-05, "loss": 2.3416, "step": 4056000 }, { "epoch": 20.1, "learning_rate": 3.995495508885619e-05, "loss": 2.3061, "step": 4056500 }, { "epoch": 20.1, "learning_rate": 3.995371650243011e-05, "loss": 2.3172, "step": 4057000 }, { "epoch": 20.1, "learning_rate": 3.9952477916004026e-05, "loss": 2.3176, "step": 4057500 }, { "epoch": 20.1, "learning_rate": 3.9951241806750795e-05, "loss": 2.3158, "step": 4058000 }, { "epoch": 20.11, "learning_rate": 3.995000322032471e-05, "loss": 2.3287, "step": 4058500 }, { "epoch": 20.11, "learning_rate": 3.994876711107148e-05, "loss": 2.3434, "step": 4059000 }, { "epoch": 20.11, "learning_rate": 3.99475285246454e-05, "loss": 2.3069, "step": 4059500 }, { "epoch": 20.11, "learning_rate": 3.9946289938219314e-05, "loss": 2.3125, "step": 4060000 }, { "epoch": 20.12, "learning_rate": 3.994505135179323e-05, "loss": 2.3109, "step": 4060500 }, { "epoch": 20.12, "learning_rate": 3.994381276536714e-05, "loss": 2.3453, "step": 4061000 }, { "epoch": 20.12, "learning_rate": 3.994257417894106e-05, "loss": 2.339, "step": 4061500 }, { "epoch": 20.12, "learning_rate": 3.9941335592514975e-05, "loss": 2.3199, "step": 4062000 }, { "epoch": 20.13, "learning_rate": 3.994009700608889e-05, "loss": 2.3294, "step": 4062500 }, { "epoch": 20.13, "learning_rate": 3.993885841966281e-05, "loss": 2.3076, "step": 4063000 }, { "epoch": 20.13, "learning_rate": 3.9937619833236726e-05, "loss": 2.3158, "step": 4063500 }, { "epoch": 20.13, "learning_rate": 3.993638124681064e-05, "loss": 2.3061, "step": 4064000 }, { "epoch": 20.14, "learning_rate": 3.993514266038455e-05, "loss": 2.343, "step": 4064500 }, { "epoch": 20.14, "learning_rate": 3.993390655113133e-05, "loss": 2.3145, "step": 4065000 }, { "epoch": 20.14, "learning_rate": 3.9932667964705246e-05, "loss": 2.2968, "step": 4065500 }, { "epoch": 20.14, "learning_rate": 3.993142937827916e-05, "loss": 2.3266, "step": 4066000 }, { "epoch": 20.15, "learning_rate": 3.993019079185308e-05, "loss": 2.3402, "step": 4066500 }, { "epoch": 20.15, "learning_rate": 3.9928952205426996e-05, "loss": 2.3157, "step": 4067000 }, { "epoch": 20.15, "learning_rate": 3.992771857334661e-05, "loss": 2.3301, "step": 4067500 }, { "epoch": 20.15, "learning_rate": 3.992647998692053e-05, "loss": 2.3513, "step": 4068000 }, { "epoch": 20.16, "learning_rate": 3.9925241400494444e-05, "loss": 2.3077, "step": 4068500 }, { "epoch": 20.16, "learning_rate": 3.992400281406836e-05, "loss": 2.3166, "step": 4069000 }, { "epoch": 20.16, "learning_rate": 3.992276422764228e-05, "loss": 2.3251, "step": 4069500 }, { "epoch": 20.16, "learning_rate": 3.992152811838905e-05, "loss": 2.3054, "step": 4070000 }, { "epoch": 20.17, "learning_rate": 3.9920289531962964e-05, "loss": 2.322, "step": 4070500 }, { "epoch": 20.17, "learning_rate": 3.9919053422709726e-05, "loss": 2.3153, "step": 4071000 }, { "epoch": 20.17, "learning_rate": 3.991781483628364e-05, "loss": 2.2919, "step": 4071500 }, { "epoch": 20.17, "learning_rate": 3.991657624985756e-05, "loss": 2.3338, "step": 4072000 }, { "epoch": 20.18, "learning_rate": 3.9915337663431476e-05, "loss": 2.3325, "step": 4072500 }, { "epoch": 20.18, "learning_rate": 3.9914099077005393e-05, "loss": 2.3226, "step": 4073000 }, { "epoch": 20.18, "learning_rate": 3.991286049057931e-05, "loss": 2.3341, "step": 4073500 }, { "epoch": 20.18, "learning_rate": 3.991162190415323e-05, "loss": 2.2985, "step": 4074000 }, { "epoch": 20.19, "learning_rate": 3.9910383317727144e-05, "loss": 2.3138, "step": 4074500 }, { "epoch": 20.19, "learning_rate": 3.990914473130106e-05, "loss": 2.3167, "step": 4075000 }, { "epoch": 20.19, "learning_rate": 3.990790614487498e-05, "loss": 2.3082, "step": 4075500 }, { "epoch": 20.19, "learning_rate": 3.9906667558448895e-05, "loss": 2.3136, "step": 4076000 }, { "epoch": 20.2, "learning_rate": 3.990542897202281e-05, "loss": 2.3414, "step": 4076500 }, { "epoch": 20.2, "learning_rate": 3.990419038559673e-05, "loss": 2.298, "step": 4077000 }, { "epoch": 20.2, "learning_rate": 3.99029542763435e-05, "loss": 2.317, "step": 4077500 }, { "epoch": 20.2, "learning_rate": 3.990171816709026e-05, "loss": 2.3172, "step": 4078000 }, { "epoch": 20.21, "learning_rate": 3.9900479580664177e-05, "loss": 2.3035, "step": 4078500 }, { "epoch": 20.21, "learning_rate": 3.9899240994238094e-05, "loss": 2.3391, "step": 4079000 }, { "epoch": 20.21, "learning_rate": 3.989800240781201e-05, "loss": 2.3139, "step": 4079500 }, { "epoch": 20.21, "learning_rate": 3.989676382138593e-05, "loss": 2.297, "step": 4080000 }, { "epoch": 20.22, "learning_rate": 3.9895527712132696e-05, "loss": 2.326, "step": 4080500 }, { "epoch": 20.22, "learning_rate": 3.989428912570661e-05, "loss": 2.3263, "step": 4081000 }, { "epoch": 20.22, "learning_rate": 3.989305053928053e-05, "loss": 2.3248, "step": 4081500 }, { "epoch": 20.22, "learning_rate": 3.98918144300273e-05, "loss": 2.3094, "step": 4082000 }, { "epoch": 20.23, "learning_rate": 3.9890575843601216e-05, "loss": 2.3216, "step": 4082500 }, { "epoch": 20.23, "learning_rate": 3.988933725717513e-05, "loss": 2.3138, "step": 4083000 }, { "epoch": 20.23, "learning_rate": 3.988809867074905e-05, "loss": 2.3215, "step": 4083500 }, { "epoch": 20.23, "learning_rate": 3.9886860084322967e-05, "loss": 2.3122, "step": 4084000 }, { "epoch": 20.24, "learning_rate": 3.988562149789688e-05, "loss": 2.3316, "step": 4084500 }, { "epoch": 20.24, "learning_rate": 3.988438538864365e-05, "loss": 2.324, "step": 4085000 }, { "epoch": 20.24, "learning_rate": 3.988314680221757e-05, "loss": 2.3268, "step": 4085500 }, { "epoch": 20.24, "learning_rate": 3.9881908215791486e-05, "loss": 2.2985, "step": 4086000 }, { "epoch": 20.25, "learning_rate": 3.98806696293654e-05, "loss": 2.3345, "step": 4086500 }, { "epoch": 20.25, "learning_rate": 3.987943104293931e-05, "loss": 2.3098, "step": 4087000 }, { "epoch": 20.25, "learning_rate": 3.987819245651323e-05, "loss": 2.3195, "step": 4087500 }, { "epoch": 20.25, "learning_rate": 3.987695387008715e-05, "loss": 2.373, "step": 4088000 }, { "epoch": 20.26, "learning_rate": 3.9875715283661064e-05, "loss": 2.3466, "step": 4088500 }, { "epoch": 20.26, "learning_rate": 3.987447669723498e-05, "loss": 2.3485, "step": 4089000 }, { "epoch": 20.26, "learning_rate": 3.98732381108089e-05, "loss": 2.3326, "step": 4089500 }, { "epoch": 20.26, "learning_rate": 3.9871999524382815e-05, "loss": 2.2901, "step": 4090000 }, { "epoch": 20.27, "learning_rate": 3.987076093795673e-05, "loss": 2.3226, "step": 4090500 }, { "epoch": 20.27, "learning_rate": 3.98695248287035e-05, "loss": 2.3075, "step": 4091000 }, { "epoch": 20.27, "learning_rate": 3.986828624227741e-05, "loss": 2.2915, "step": 4091500 }, { "epoch": 20.27, "learning_rate": 3.986704765585133e-05, "loss": 2.3465, "step": 4092000 }, { "epoch": 20.28, "learning_rate": 3.9865809069425245e-05, "loss": 2.3153, "step": 4092500 }, { "epoch": 20.28, "learning_rate": 3.986457048299916e-05, "loss": 2.3102, "step": 4093000 }, { "epoch": 20.28, "learning_rate": 3.986333189657308e-05, "loss": 2.3314, "step": 4093500 }, { "epoch": 20.28, "learning_rate": 3.9862093310146995e-05, "loss": 2.3403, "step": 4094000 }, { "epoch": 20.29, "learning_rate": 3.9860857200893764e-05, "loss": 2.3266, "step": 4094500 }, { "epoch": 20.29, "learning_rate": 3.985962109164054e-05, "loss": 2.3155, "step": 4095000 }, { "epoch": 20.29, "learning_rate": 3.985838250521445e-05, "loss": 2.3508, "step": 4095500 }, { "epoch": 20.29, "learning_rate": 3.985714391878837e-05, "loss": 2.32, "step": 4096000 }, { "epoch": 20.3, "learning_rate": 3.9855905332362284e-05, "loss": 2.328, "step": 4096500 }, { "epoch": 20.3, "learning_rate": 3.98546667459362e-05, "loss": 2.3088, "step": 4097000 }, { "epoch": 20.3, "learning_rate": 3.985343063668297e-05, "loss": 2.3212, "step": 4097500 }, { "epoch": 20.3, "learning_rate": 3.9852192050256886e-05, "loss": 2.3098, "step": 4098000 }, { "epoch": 20.31, "learning_rate": 3.98509534638308e-05, "loss": 2.32, "step": 4098500 }, { "epoch": 20.31, "learning_rate": 3.984971487740472e-05, "loss": 2.3258, "step": 4099000 }, { "epoch": 20.31, "learning_rate": 3.984847629097863e-05, "loss": 2.3186, "step": 4099500 }, { "epoch": 20.31, "learning_rate": 3.984723770455255e-05, "loss": 2.3401, "step": 4100000 }, { "epoch": 20.32, "learning_rate": 3.984600159529932e-05, "loss": 2.3358, "step": 4100500 }, { "epoch": 20.32, "learning_rate": 3.984476300887324e-05, "loss": 2.323, "step": 4101000 }, { "epoch": 20.32, "learning_rate": 3.984352442244716e-05, "loss": 2.3239, "step": 4101500 }, { "epoch": 20.32, "learning_rate": 3.9842285836021074e-05, "loss": 2.2897, "step": 4102000 }, { "epoch": 20.33, "learning_rate": 3.9841049726767836e-05, "loss": 2.3372, "step": 4102500 }, { "epoch": 20.33, "learning_rate": 3.983981114034175e-05, "loss": 2.3183, "step": 4103000 }, { "epoch": 20.33, "learning_rate": 3.983857255391567e-05, "loss": 2.3244, "step": 4103500 }, { "epoch": 20.33, "learning_rate": 3.9837333967489587e-05, "loss": 2.341, "step": 4104000 }, { "epoch": 20.34, "learning_rate": 3.9836095381063503e-05, "loss": 2.3251, "step": 4104500 }, { "epoch": 20.34, "learning_rate": 3.983485679463742e-05, "loss": 2.3282, "step": 4105000 }, { "epoch": 20.34, "learning_rate": 3.983361820821133e-05, "loss": 2.3602, "step": 4105500 }, { "epoch": 20.34, "learning_rate": 3.983237962178525e-05, "loss": 2.3238, "step": 4106000 }, { "epoch": 20.35, "learning_rate": 3.9831141035359164e-05, "loss": 2.3484, "step": 4106500 }, { "epoch": 20.35, "learning_rate": 3.982990244893308e-05, "loss": 2.3466, "step": 4107000 }, { "epoch": 20.35, "learning_rate": 3.9828663862507e-05, "loss": 2.3151, "step": 4107500 }, { "epoch": 20.35, "learning_rate": 3.9827427753253774e-05, "loss": 2.3078, "step": 4108000 }, { "epoch": 20.35, "learning_rate": 3.9826189166827684e-05, "loss": 2.3361, "step": 4108500 }, { "epoch": 20.36, "learning_rate": 3.98249505804016e-05, "loss": 2.3302, "step": 4109000 }, { "epoch": 20.36, "learning_rate": 3.982371199397552e-05, "loss": 2.3368, "step": 4109500 }, { "epoch": 20.36, "learning_rate": 3.9822473407549435e-05, "loss": 2.3268, "step": 4110000 }, { "epoch": 20.36, "learning_rate": 3.982123482112335e-05, "loss": 2.337, "step": 4110500 }, { "epoch": 20.37, "learning_rate": 3.981999623469727e-05, "loss": 2.3263, "step": 4111000 }, { "epoch": 20.37, "learning_rate": 3.981875764827118e-05, "loss": 2.3309, "step": 4111500 }, { "epoch": 20.37, "learning_rate": 3.9817519061845096e-05, "loss": 2.3406, "step": 4112000 }, { "epoch": 20.37, "learning_rate": 3.981628047541901e-05, "loss": 2.304, "step": 4112500 }, { "epoch": 20.38, "learning_rate": 3.981504188899293e-05, "loss": 2.3435, "step": 4113000 }, { "epoch": 20.38, "learning_rate": 3.9813803302566847e-05, "loss": 2.3374, "step": 4113500 }, { "epoch": 20.38, "learning_rate": 3.9812567193313615e-05, "loss": 2.334, "step": 4114000 }, { "epoch": 20.38, "learning_rate": 3.981132860688753e-05, "loss": 2.3147, "step": 4114500 }, { "epoch": 20.39, "learning_rate": 3.981009002046145e-05, "loss": 2.33, "step": 4115000 }, { "epoch": 20.39, "learning_rate": 3.9808851434035366e-05, "loss": 2.3088, "step": 4115500 }, { "epoch": 20.39, "learning_rate": 3.980761284760928e-05, "loss": 2.3281, "step": 4116000 }, { "epoch": 20.39, "learning_rate": 3.98063742611832e-05, "loss": 2.3257, "step": 4116500 }, { "epoch": 20.4, "learning_rate": 3.980513815192997e-05, "loss": 2.2962, "step": 4117000 }, { "epoch": 20.4, "learning_rate": 3.9803899565503886e-05, "loss": 2.3362, "step": 4117500 }, { "epoch": 20.4, "learning_rate": 3.98026609790778e-05, "loss": 2.3423, "step": 4118000 }, { "epoch": 20.4, "learning_rate": 3.980142239265171e-05, "loss": 2.2976, "step": 4118500 }, { "epoch": 20.41, "learning_rate": 3.980018380622563e-05, "loss": 2.3139, "step": 4119000 }, { "epoch": 20.41, "learning_rate": 3.979894521979955e-05, "loss": 2.3334, "step": 4119500 }, { "epoch": 20.41, "learning_rate": 3.9797709110546315e-05, "loss": 2.3412, "step": 4120000 }, { "epoch": 20.41, "learning_rate": 3.979647052412023e-05, "loss": 2.3465, "step": 4120500 }, { "epoch": 20.42, "learning_rate": 3.979523193769415e-05, "loss": 2.3413, "step": 4121000 }, { "epoch": 20.42, "learning_rate": 3.9793993351268066e-05, "loss": 2.3351, "step": 4121500 }, { "epoch": 20.42, "learning_rate": 3.979275476484198e-05, "loss": 2.3251, "step": 4122000 }, { "epoch": 20.42, "learning_rate": 3.979151865558875e-05, "loss": 2.3504, "step": 4122500 }, { "epoch": 20.43, "learning_rate": 3.979028006916267e-05, "loss": 2.3297, "step": 4123000 }, { "epoch": 20.43, "learning_rate": 3.9789041482736586e-05, "loss": 2.3087, "step": 4123500 }, { "epoch": 20.43, "learning_rate": 3.97878028963105e-05, "loss": 2.3201, "step": 4124000 }, { "epoch": 20.43, "learning_rate": 3.978656430988442e-05, "loss": 2.2992, "step": 4124500 }, { "epoch": 20.44, "learning_rate": 3.978532572345833e-05, "loss": 2.3279, "step": 4125000 }, { "epoch": 20.44, "learning_rate": 3.97840896142051e-05, "loss": 2.3199, "step": 4125500 }, { "epoch": 20.44, "learning_rate": 3.9782851027779016e-05, "loss": 2.3297, "step": 4126000 }, { "epoch": 20.44, "learning_rate": 3.978161244135293e-05, "loss": 2.3423, "step": 4126500 }, { "epoch": 20.45, "learning_rate": 3.978037385492685e-05, "loss": 2.3141, "step": 4127000 }, { "epoch": 20.45, "learning_rate": 3.9779135268500766e-05, "loss": 2.336, "step": 4127500 }, { "epoch": 20.45, "learning_rate": 3.977789668207468e-05, "loss": 2.3278, "step": 4128000 }, { "epoch": 20.45, "learning_rate": 3.97766580956486e-05, "loss": 2.3016, "step": 4128500 }, { "epoch": 20.46, "learning_rate": 3.977541950922252e-05, "loss": 2.3267, "step": 4129000 }, { "epoch": 20.46, "learning_rate": 3.9774183399969286e-05, "loss": 2.3293, "step": 4129500 }, { "epoch": 20.46, "learning_rate": 3.97729448135432e-05, "loss": 2.3389, "step": 4130000 }, { "epoch": 20.46, "learning_rate": 3.977170622711712e-05, "loss": 2.3131, "step": 4130500 }, { "epoch": 20.47, "learning_rate": 3.977046764069104e-05, "loss": 2.3192, "step": 4131000 }, { "epoch": 20.47, "learning_rate": 3.9769229054264954e-05, "loss": 2.3528, "step": 4131500 }, { "epoch": 20.47, "learning_rate": 3.9767992945011716e-05, "loss": 2.3322, "step": 4132000 }, { "epoch": 20.47, "learning_rate": 3.976675435858563e-05, "loss": 2.3396, "step": 4132500 }, { "epoch": 20.48, "learning_rate": 3.976551824933241e-05, "loss": 2.3243, "step": 4133000 }, { "epoch": 20.48, "learning_rate": 3.976427966290632e-05, "loss": 2.3496, "step": 4133500 }, { "epoch": 20.48, "learning_rate": 3.9763041076480235e-05, "loss": 2.3338, "step": 4134000 }, { "epoch": 20.48, "learning_rate": 3.976180249005415e-05, "loss": 2.2961, "step": 4134500 }, { "epoch": 20.49, "learning_rate": 3.976056390362807e-05, "loss": 2.3068, "step": 4135000 }, { "epoch": 20.49, "learning_rate": 3.9759325317201986e-05, "loss": 2.358, "step": 4135500 }, { "epoch": 20.49, "learning_rate": 3.97580867307759e-05, "loss": 2.2942, "step": 4136000 }, { "epoch": 20.49, "learning_rate": 3.975684814434982e-05, "loss": 2.3191, "step": 4136500 }, { "epoch": 20.5, "learning_rate": 3.975561203509658e-05, "loss": 2.3382, "step": 4137000 }, { "epoch": 20.5, "learning_rate": 3.97543734486705e-05, "loss": 2.3275, "step": 4137500 }, { "epoch": 20.5, "learning_rate": 3.9753134862244416e-05, "loss": 2.352, "step": 4138000 }, { "epoch": 20.5, "learning_rate": 3.975189627581833e-05, "loss": 2.3277, "step": 4138500 }, { "epoch": 20.51, "learning_rate": 3.975066016656511e-05, "loss": 2.3214, "step": 4139000 }, { "epoch": 20.51, "learning_rate": 3.974942405731187e-05, "loss": 2.3368, "step": 4139500 }, { "epoch": 20.51, "learning_rate": 3.974818547088579e-05, "loss": 2.3367, "step": 4140000 }, { "epoch": 20.51, "learning_rate": 3.9746946884459704e-05, "loss": 2.3376, "step": 4140500 }, { "epoch": 20.52, "learning_rate": 3.974570829803362e-05, "loss": 2.3505, "step": 4141000 }, { "epoch": 20.52, "learning_rate": 3.974447218878039e-05, "loss": 2.3357, "step": 4141500 }, { "epoch": 20.52, "learning_rate": 3.974323607952716e-05, "loss": 2.326, "step": 4142000 }, { "epoch": 20.52, "learning_rate": 3.9741997493101076e-05, "loss": 2.3177, "step": 4142500 }, { "epoch": 20.53, "learning_rate": 3.974075890667499e-05, "loss": 2.3331, "step": 4143000 }, { "epoch": 20.53, "learning_rate": 3.973952032024891e-05, "loss": 2.3333, "step": 4143500 }, { "epoch": 20.53, "learning_rate": 3.9738281733822826e-05, "loss": 2.3378, "step": 4144000 }, { "epoch": 20.53, "learning_rate": 3.9737043147396743e-05, "loss": 2.3116, "step": 4144500 }, { "epoch": 20.54, "learning_rate": 3.973580456097066e-05, "loss": 2.3071, "step": 4145000 }, { "epoch": 20.54, "learning_rate": 3.973456597454458e-05, "loss": 2.302, "step": 4145500 }, { "epoch": 20.54, "learning_rate": 3.9733327388118494e-05, "loss": 2.3401, "step": 4146000 }, { "epoch": 20.54, "learning_rate": 3.9732088801692404e-05, "loss": 2.3107, "step": 4146500 }, { "epoch": 20.55, "learning_rate": 3.973085021526632e-05, "loss": 2.3115, "step": 4147000 }, { "epoch": 20.55, "learning_rate": 3.972961162884024e-05, "loss": 2.3198, "step": 4147500 }, { "epoch": 20.55, "learning_rate": 3.9728373042414155e-05, "loss": 2.3458, "step": 4148000 }, { "epoch": 20.55, "learning_rate": 3.972713445598807e-05, "loss": 2.3251, "step": 4148500 }, { "epoch": 20.56, "learning_rate": 3.972589586956199e-05, "loss": 2.3449, "step": 4149000 }, { "epoch": 20.56, "learning_rate": 3.97246572831359e-05, "loss": 2.3381, "step": 4149500 }, { "epoch": 20.56, "learning_rate": 3.9723421173882675e-05, "loss": 2.3345, "step": 4150000 }, { "epoch": 20.56, "learning_rate": 3.972218258745659e-05, "loss": 2.2837, "step": 4150500 }, { "epoch": 20.57, "learning_rate": 3.972094400103051e-05, "loss": 2.3166, "step": 4151000 }, { "epoch": 20.57, "learning_rate": 3.9719705414604426e-05, "loss": 2.3352, "step": 4151500 }, { "epoch": 20.57, "learning_rate": 3.971846682817834e-05, "loss": 2.3251, "step": 4152000 }, { "epoch": 20.57, "learning_rate": 3.971723071892511e-05, "loss": 2.3379, "step": 4152500 }, { "epoch": 20.58, "learning_rate": 3.971599213249902e-05, "loss": 2.3292, "step": 4153000 }, { "epoch": 20.58, "learning_rate": 3.971475354607294e-05, "loss": 2.3387, "step": 4153500 }, { "epoch": 20.58, "learning_rate": 3.9713514959646855e-05, "loss": 2.3423, "step": 4154000 }, { "epoch": 20.58, "learning_rate": 3.971227637322077e-05, "loss": 2.3238, "step": 4154500 }, { "epoch": 20.59, "learning_rate": 3.971104026396754e-05, "loss": 2.3356, "step": 4155000 }, { "epoch": 20.59, "learning_rate": 3.970980167754146e-05, "loss": 2.3428, "step": 4155500 }, { "epoch": 20.59, "learning_rate": 3.9708563091115375e-05, "loss": 2.3381, "step": 4156000 }, { "epoch": 20.59, "learning_rate": 3.970732450468929e-05, "loss": 2.335, "step": 4156500 }, { "epoch": 20.6, "learning_rate": 3.970608591826321e-05, "loss": 2.3374, "step": 4157000 }, { "epoch": 20.6, "learning_rate": 3.9704847331837126e-05, "loss": 2.3173, "step": 4157500 }, { "epoch": 20.6, "learning_rate": 3.9703611222583894e-05, "loss": 2.3357, "step": 4158000 }, { "epoch": 20.6, "learning_rate": 3.970237263615781e-05, "loss": 2.3303, "step": 4158500 }, { "epoch": 20.61, "learning_rate": 3.9701136526904573e-05, "loss": 2.3437, "step": 4159000 }, { "epoch": 20.61, "learning_rate": 3.969989794047849e-05, "loss": 2.2999, "step": 4159500 }, { "epoch": 20.61, "learning_rate": 3.969865935405241e-05, "loss": 2.3308, "step": 4160000 }, { "epoch": 20.61, "learning_rate": 3.9697420767626324e-05, "loss": 2.3444, "step": 4160500 }, { "epoch": 20.62, "learning_rate": 3.969618218120024e-05, "loss": 2.3279, "step": 4161000 }, { "epoch": 20.62, "learning_rate": 3.969494359477416e-05, "loss": 2.3423, "step": 4161500 }, { "epoch": 20.62, "learning_rate": 3.9693705008348075e-05, "loss": 2.352, "step": 4162000 }, { "epoch": 20.62, "learning_rate": 3.969246642192199e-05, "loss": 2.3336, "step": 4162500 }, { "epoch": 20.62, "learning_rate": 3.969122783549591e-05, "loss": 2.3079, "step": 4163000 }, { "epoch": 20.63, "learning_rate": 3.9689989249069826e-05, "loss": 2.3315, "step": 4163500 }, { "epoch": 20.63, "learning_rate": 3.9688753139816595e-05, "loss": 2.3208, "step": 4164000 }, { "epoch": 20.63, "learning_rate": 3.968751455339051e-05, "loss": 2.3149, "step": 4164500 }, { "epoch": 20.63, "learning_rate": 3.968627596696443e-05, "loss": 2.3026, "step": 4165000 }, { "epoch": 20.64, "learning_rate": 3.9685037380538345e-05, "loss": 2.3623, "step": 4165500 }, { "epoch": 20.64, "learning_rate": 3.968379879411226e-05, "loss": 2.3316, "step": 4166000 }, { "epoch": 20.64, "learning_rate": 3.968256020768617e-05, "loss": 2.3202, "step": 4166500 }, { "epoch": 20.64, "learning_rate": 3.968132162126009e-05, "loss": 2.3328, "step": 4167000 }, { "epoch": 20.65, "learning_rate": 3.9680083034834006e-05, "loss": 2.3363, "step": 4167500 }, { "epoch": 20.65, "learning_rate": 3.9678846925580775e-05, "loss": 2.339, "step": 4168000 }, { "epoch": 20.65, "learning_rate": 3.967760833915469e-05, "loss": 2.3511, "step": 4168500 }, { "epoch": 20.65, "learning_rate": 3.967636975272861e-05, "loss": 2.3296, "step": 4169000 }, { "epoch": 20.66, "learning_rate": 3.967513364347538e-05, "loss": 2.3214, "step": 4169500 }, { "epoch": 20.66, "learning_rate": 3.9673895057049295e-05, "loss": 2.3289, "step": 4170000 }, { "epoch": 20.66, "learning_rate": 3.967265647062321e-05, "loss": 2.333, "step": 4170500 }, { "epoch": 20.66, "learning_rate": 3.967141788419713e-05, "loss": 2.3358, "step": 4171000 }, { "epoch": 20.67, "learning_rate": 3.9670179297771045e-05, "loss": 2.3285, "step": 4171500 }, { "epoch": 20.67, "learning_rate": 3.966894318851781e-05, "loss": 2.3058, "step": 4172000 }, { "epoch": 20.67, "learning_rate": 3.9667704602091724e-05, "loss": 2.3333, "step": 4172500 }, { "epoch": 20.67, "learning_rate": 3.966646601566564e-05, "loss": 2.3302, "step": 4173000 }, { "epoch": 20.68, "learning_rate": 3.966522742923956e-05, "loss": 2.3255, "step": 4173500 }, { "epoch": 20.68, "learning_rate": 3.9663988842813475e-05, "loss": 2.3237, "step": 4174000 }, { "epoch": 20.68, "learning_rate": 3.966275025638739e-05, "loss": 2.3398, "step": 4174500 }, { "epoch": 20.68, "learning_rate": 3.966151166996131e-05, "loss": 2.3193, "step": 4175000 }, { "epoch": 20.69, "learning_rate": 3.9660273083535226e-05, "loss": 2.3686, "step": 4175500 }, { "epoch": 20.69, "learning_rate": 3.9659036974281995e-05, "loss": 2.3719, "step": 4176000 }, { "epoch": 20.69, "learning_rate": 3.965779838785591e-05, "loss": 2.3404, "step": 4176500 }, { "epoch": 20.69, "learning_rate": 3.965655980142983e-05, "loss": 2.333, "step": 4177000 }, { "epoch": 20.7, "learning_rate": 3.965532616934945e-05, "loss": 2.339, "step": 4177500 }, { "epoch": 20.7, "learning_rate": 3.965408758292336e-05, "loss": 2.3373, "step": 4178000 }, { "epoch": 20.7, "learning_rate": 3.9652851473670135e-05, "loss": 2.3554, "step": 4178500 }, { "epoch": 20.7, "learning_rate": 3.965161288724405e-05, "loss": 2.3353, "step": 4179000 }, { "epoch": 20.71, "learning_rate": 3.965037430081797e-05, "loss": 2.3286, "step": 4179500 }, { "epoch": 20.71, "learning_rate": 3.9649135714391886e-05, "loss": 2.3024, "step": 4180000 }, { "epoch": 20.71, "learning_rate": 3.964789960513865e-05, "loss": 2.3233, "step": 4180500 }, { "epoch": 20.71, "learning_rate": 3.9646661018712565e-05, "loss": 2.328, "step": 4181000 }, { "epoch": 20.72, "learning_rate": 3.964542243228648e-05, "loss": 2.301, "step": 4181500 }, { "epoch": 20.72, "learning_rate": 3.96441838458604e-05, "loss": 2.3744, "step": 4182000 }, { "epoch": 20.72, "learning_rate": 3.9642945259434316e-05, "loss": 2.3801, "step": 4182500 }, { "epoch": 20.72, "learning_rate": 3.964170667300823e-05, "loss": 2.3168, "step": 4183000 }, { "epoch": 20.73, "learning_rate": 3.964046808658215e-05, "loss": 2.3182, "step": 4183500 }, { "epoch": 20.73, "learning_rate": 3.9639229500156066e-05, "loss": 2.3429, "step": 4184000 }, { "epoch": 20.73, "learning_rate": 3.9637990913729977e-05, "loss": 2.3526, "step": 4184500 }, { "epoch": 20.73, "learning_rate": 3.9636752327303893e-05, "loss": 2.3447, "step": 4185000 }, { "epoch": 20.74, "learning_rate": 3.963551374087781e-05, "loss": 2.3361, "step": 4185500 }, { "epoch": 20.74, "learning_rate": 3.963427515445173e-05, "loss": 2.3486, "step": 4186000 }, { "epoch": 20.74, "learning_rate": 3.9633036568025644e-05, "loss": 2.3517, "step": 4186500 }, { "epoch": 20.74, "learning_rate": 3.963179798159956e-05, "loss": 2.3206, "step": 4187000 }, { "epoch": 20.75, "learning_rate": 3.963055939517348e-05, "loss": 2.3178, "step": 4187500 }, { "epoch": 20.75, "learning_rate": 3.9629320808747395e-05, "loss": 2.3548, "step": 4188000 }, { "epoch": 20.75, "learning_rate": 3.962808222232131e-05, "loss": 2.3043, "step": 4188500 }, { "epoch": 20.75, "learning_rate": 3.962684859024093e-05, "loss": 2.3362, "step": 4189000 }, { "epoch": 20.76, "learning_rate": 3.962561000381485e-05, "loss": 2.3417, "step": 4189500 }, { "epoch": 20.76, "learning_rate": 3.9624371417388767e-05, "loss": 2.3329, "step": 4190000 }, { "epoch": 20.76, "learning_rate": 3.962313283096268e-05, "loss": 2.3078, "step": 4190500 }, { "epoch": 20.76, "learning_rate": 3.9621894244536594e-05, "loss": 2.3241, "step": 4191000 }, { "epoch": 20.77, "learning_rate": 3.962065565811051e-05, "loss": 2.3591, "step": 4191500 }, { "epoch": 20.77, "learning_rate": 3.961941707168443e-05, "loss": 2.3285, "step": 4192000 }, { "epoch": 20.77, "learning_rate": 3.9618178485258344e-05, "loss": 2.3298, "step": 4192500 }, { "epoch": 20.77, "learning_rate": 3.961694237600512e-05, "loss": 2.3402, "step": 4193000 }, { "epoch": 20.78, "learning_rate": 3.961570378957903e-05, "loss": 2.3157, "step": 4193500 }, { "epoch": 20.78, "learning_rate": 3.961446520315295e-05, "loss": 2.3364, "step": 4194000 }, { "epoch": 20.78, "learning_rate": 3.9613226616726864e-05, "loss": 2.339, "step": 4194500 }, { "epoch": 20.78, "learning_rate": 3.961198803030078e-05, "loss": 2.3329, "step": 4195000 }, { "epoch": 20.79, "learning_rate": 3.961075192104755e-05, "loss": 2.3085, "step": 4195500 }, { "epoch": 20.79, "learning_rate": 3.960951333462147e-05, "loss": 2.3486, "step": 4196000 }, { "epoch": 20.79, "learning_rate": 3.9608277225368235e-05, "loss": 2.3269, "step": 4196500 }, { "epoch": 20.79, "learning_rate": 3.960703863894215e-05, "loss": 2.3502, "step": 4197000 }, { "epoch": 20.8, "learning_rate": 3.960580005251607e-05, "loss": 2.32, "step": 4197500 }, { "epoch": 20.8, "learning_rate": 3.960456642043568e-05, "loss": 2.3226, "step": 4198000 }, { "epoch": 20.8, "learning_rate": 3.96033278340096e-05, "loss": 2.3389, "step": 4198500 }, { "epoch": 20.8, "learning_rate": 3.960208924758352e-05, "loss": 2.321, "step": 4199000 }, { "epoch": 20.81, "learning_rate": 3.9600850661157434e-05, "loss": 2.3624, "step": 4199500 }, { "epoch": 20.81, "learning_rate": 3.959961207473135e-05, "loss": 2.3265, "step": 4200000 }, { "epoch": 20.81, "learning_rate": 3.959837596547812e-05, "loss": 2.3382, "step": 4200500 }, { "epoch": 20.81, "learning_rate": 3.959713737905204e-05, "loss": 2.3298, "step": 4201000 }, { "epoch": 20.82, "learning_rate": 3.9595898792625954e-05, "loss": 2.3306, "step": 4201500 }, { "epoch": 20.82, "learning_rate": 3.959466020619987e-05, "loss": 2.3273, "step": 4202000 }, { "epoch": 20.82, "learning_rate": 3.959342409694664e-05, "loss": 2.3362, "step": 4202500 }, { "epoch": 20.82, "learning_rate": 3.9592185510520556e-05, "loss": 2.3331, "step": 4203000 }, { "epoch": 20.83, "learning_rate": 3.9590946924094466e-05, "loss": 2.3285, "step": 4203500 }, { "epoch": 20.83, "learning_rate": 3.958970833766838e-05, "loss": 2.3242, "step": 4204000 }, { "epoch": 20.83, "learning_rate": 3.95884697512423e-05, "loss": 2.3425, "step": 4204500 }, { "epoch": 20.83, "learning_rate": 3.958723116481622e-05, "loss": 2.3424, "step": 4205000 }, { "epoch": 20.84, "learning_rate": 3.9585992578390134e-05, "loss": 2.3153, "step": 4205500 }, { "epoch": 20.84, "learning_rate": 3.958475399196405e-05, "loss": 2.3106, "step": 4206000 }, { "epoch": 20.84, "learning_rate": 3.958351540553797e-05, "loss": 2.3452, "step": 4206500 }, { "epoch": 20.84, "learning_rate": 3.9582276819111885e-05, "loss": 2.3583, "step": 4207000 }, { "epoch": 20.85, "learning_rate": 3.95810382326858e-05, "loss": 2.339, "step": 4207500 }, { "epoch": 20.85, "learning_rate": 3.957979964625972e-05, "loss": 2.3194, "step": 4208000 }, { "epoch": 20.85, "learning_rate": 3.957856353700649e-05, "loss": 2.3322, "step": 4208500 }, { "epoch": 20.85, "learning_rate": 3.9577324950580405e-05, "loss": 2.3334, "step": 4209000 }, { "epoch": 20.86, "learning_rate": 3.957608636415432e-05, "loss": 2.3398, "step": 4209500 }, { "epoch": 20.86, "learning_rate": 3.957484777772824e-05, "loss": 2.3255, "step": 4210000 }, { "epoch": 20.86, "learning_rate": 3.9573611668475e-05, "loss": 2.3186, "step": 4210500 }, { "epoch": 20.86, "learning_rate": 3.957237308204892e-05, "loss": 2.3099, "step": 4211000 }, { "epoch": 20.87, "learning_rate": 3.9571134495622834e-05, "loss": 2.3274, "step": 4211500 }, { "epoch": 20.87, "learning_rate": 3.956989590919675e-05, "loss": 2.3277, "step": 4212000 }, { "epoch": 20.87, "learning_rate": 3.956865732277067e-05, "loss": 2.3225, "step": 4212500 }, { "epoch": 20.87, "learning_rate": 3.9567418736344585e-05, "loss": 2.3221, "step": 4213000 }, { "epoch": 20.88, "learning_rate": 3.95661801499185e-05, "loss": 2.337, "step": 4213500 }, { "epoch": 20.88, "learning_rate": 3.956494156349242e-05, "loss": 2.3301, "step": 4214000 }, { "epoch": 20.88, "learning_rate": 3.9563702977066336e-05, "loss": 2.3094, "step": 4214500 }, { "epoch": 20.88, "learning_rate": 3.956246439064025e-05, "loss": 2.353, "step": 4215000 }, { "epoch": 20.89, "learning_rate": 3.956122580421417e-05, "loss": 2.3262, "step": 4215500 }, { "epoch": 20.89, "learning_rate": 3.955998969496094e-05, "loss": 2.3429, "step": 4216000 }, { "epoch": 20.89, "learning_rate": 3.95587535857077e-05, "loss": 2.3003, "step": 4216500 }, { "epoch": 20.89, "learning_rate": 3.955751499928162e-05, "loss": 2.3653, "step": 4217000 }, { "epoch": 20.89, "learning_rate": 3.9556276412855534e-05, "loss": 2.3238, "step": 4217500 }, { "epoch": 20.9, "learning_rate": 3.955503782642945e-05, "loss": 2.3313, "step": 4218000 }, { "epoch": 20.9, "learning_rate": 3.955379924000337e-05, "loss": 2.3488, "step": 4218500 }, { "epoch": 20.9, "learning_rate": 3.9552560653577285e-05, "loss": 2.3553, "step": 4219000 }, { "epoch": 20.9, "learning_rate": 3.95513220671512e-05, "loss": 2.3272, "step": 4219500 }, { "epoch": 20.91, "learning_rate": 3.955008348072512e-05, "loss": 2.3104, "step": 4220000 }, { "epoch": 20.91, "learning_rate": 3.9548844894299036e-05, "loss": 2.3523, "step": 4220500 }, { "epoch": 20.91, "learning_rate": 3.954760630787295e-05, "loss": 2.3315, "step": 4221000 }, { "epoch": 20.91, "learning_rate": 3.954636772144687e-05, "loss": 2.3421, "step": 4221500 }, { "epoch": 20.92, "learning_rate": 3.954512913502079e-05, "loss": 2.3485, "step": 4222000 }, { "epoch": 20.92, "learning_rate": 3.9543893025767556e-05, "loss": 2.3143, "step": 4222500 }, { "epoch": 20.92, "learning_rate": 3.954265443934147e-05, "loss": 2.3271, "step": 4223000 }, { "epoch": 20.92, "learning_rate": 3.954141585291539e-05, "loss": 2.3282, "step": 4223500 }, { "epoch": 20.93, "learning_rate": 3.9540177266489306e-05, "loss": 2.3492, "step": 4224000 }, { "epoch": 20.93, "learning_rate": 3.953894115723607e-05, "loss": 2.3315, "step": 4224500 }, { "epoch": 20.93, "learning_rate": 3.9537702570809985e-05, "loss": 2.3257, "step": 4225000 }, { "epoch": 20.93, "learning_rate": 3.95364639843839e-05, "loss": 2.351, "step": 4225500 }, { "epoch": 20.94, "learning_rate": 3.953522539795782e-05, "loss": 2.3266, "step": 4226000 }, { "epoch": 20.94, "learning_rate": 3.9533986811531736e-05, "loss": 2.3206, "step": 4226500 }, { "epoch": 20.94, "learning_rate": 3.953274822510565e-05, "loss": 2.343, "step": 4227000 }, { "epoch": 20.94, "learning_rate": 3.953150963867957e-05, "loss": 2.3361, "step": 4227500 }, { "epoch": 20.95, "learning_rate": 3.953027105225349e-05, "loss": 2.2889, "step": 4228000 }, { "epoch": 20.95, "learning_rate": 3.95290374201731e-05, "loss": 2.3352, "step": 4228500 }, { "epoch": 20.95, "learning_rate": 3.952779883374702e-05, "loss": 2.3433, "step": 4229000 }, { "epoch": 20.95, "learning_rate": 3.9526560247320935e-05, "loss": 2.326, "step": 4229500 }, { "epoch": 20.96, "learning_rate": 3.952532413806771e-05, "loss": 2.3601, "step": 4230000 }, { "epoch": 20.96, "learning_rate": 3.952408555164163e-05, "loss": 2.314, "step": 4230500 }, { "epoch": 20.96, "learning_rate": 3.9522846965215544e-05, "loss": 2.3232, "step": 4231000 }, { "epoch": 20.96, "learning_rate": 3.952160837878946e-05, "loss": 2.3366, "step": 4231500 }, { "epoch": 20.97, "learning_rate": 3.952036979236337e-05, "loss": 2.3331, "step": 4232000 }, { "epoch": 20.97, "learning_rate": 3.951913120593729e-05, "loss": 2.3387, "step": 4232500 }, { "epoch": 20.97, "learning_rate": 3.951789509668406e-05, "loss": 2.3342, "step": 4233000 }, { "epoch": 20.97, "learning_rate": 3.9516656510257974e-05, "loss": 2.3227, "step": 4233500 }, { "epoch": 20.98, "learning_rate": 3.951541792383189e-05, "loss": 2.3227, "step": 4234000 }, { "epoch": 20.98, "learning_rate": 3.951417933740581e-05, "loss": 2.3304, "step": 4234500 }, { "epoch": 20.98, "learning_rate": 3.951294075097972e-05, "loss": 2.3277, "step": 4235000 }, { "epoch": 20.98, "learning_rate": 3.9511702164553635e-05, "loss": 2.3255, "step": 4235500 }, { "epoch": 20.99, "learning_rate": 3.951046357812755e-05, "loss": 2.3444, "step": 4236000 }, { "epoch": 20.99, "learning_rate": 3.950922499170147e-05, "loss": 2.3233, "step": 4236500 }, { "epoch": 20.99, "learning_rate": 3.9507986405275386e-05, "loss": 2.3227, "step": 4237000 }, { "epoch": 20.99, "learning_rate": 3.95067478188493e-05, "loss": 2.3412, "step": 4237500 }, { "epoch": 21.0, "learning_rate": 3.950550923242322e-05, "loss": 2.336, "step": 4238000 }, { "epoch": 21.0, "learning_rate": 3.9504270645997136e-05, "loss": 2.3633, "step": 4238500 }, { "epoch": 21.0, "eval_accuracy": 0.6523621169343081, "eval_accuracy_mlm": 0.6068608911574469, "eval_accuracy_nsp": 0.8669707678489482, "eval_loss": 2.365056276321411, "eval_runtime": 145.9769, "eval_samples_per_second": 1746.57, "eval_steps_per_second": 72.779, "step": 4238703 }, { "epoch": 21.0, "learning_rate": 3.9503034536743905e-05, "loss": 2.2921, "step": 4239000 }, { "epoch": 21.0, "learning_rate": 3.950179595031782e-05, "loss": 2.3045, "step": 4239500 }, { "epoch": 21.01, "learning_rate": 3.950055736389174e-05, "loss": 2.2858, "step": 4240000 }, { "epoch": 21.01, "learning_rate": 3.9499318777465656e-05, "loss": 2.2861, "step": 4240500 }, { "epoch": 21.01, "learning_rate": 3.949808019103957e-05, "loss": 2.2879, "step": 4241000 }, { "epoch": 21.01, "learning_rate": 3.949684160461349e-05, "loss": 2.2669, "step": 4241500 }, { "epoch": 21.02, "learning_rate": 3.949560301818741e-05, "loss": 2.3146, "step": 4242000 }, { "epoch": 21.02, "learning_rate": 3.949436690893417e-05, "loss": 2.2891, "step": 4242500 }, { "epoch": 21.02, "learning_rate": 3.9493128322508086e-05, "loss": 2.3136, "step": 4243000 }, { "epoch": 21.02, "learning_rate": 3.9491889736082e-05, "loss": 2.2931, "step": 4243500 }, { "epoch": 21.03, "learning_rate": 3.949065114965592e-05, "loss": 2.3204, "step": 4244000 }, { "epoch": 21.03, "learning_rate": 3.9489412563229836e-05, "loss": 2.2862, "step": 4244500 }, { "epoch": 21.03, "learning_rate": 3.9488176453976605e-05, "loss": 2.2945, "step": 4245000 }, { "epoch": 21.03, "learning_rate": 3.948693786755052e-05, "loss": 2.3142, "step": 4245500 }, { "epoch": 21.04, "learning_rate": 3.948569928112444e-05, "loss": 2.305, "step": 4246000 }, { "epoch": 21.04, "learning_rate": 3.9484460694698356e-05, "loss": 2.2793, "step": 4246500 }, { "epoch": 21.04, "learning_rate": 3.948322210827227e-05, "loss": 2.3251, "step": 4247000 }, { "epoch": 21.04, "learning_rate": 3.948198352184619e-05, "loss": 2.3238, "step": 4247500 }, { "epoch": 21.05, "learning_rate": 3.948074493542011e-05, "loss": 2.2985, "step": 4248000 }, { "epoch": 21.05, "learning_rate": 3.9479506348994024e-05, "loss": 2.3153, "step": 4248500 }, { "epoch": 21.05, "learning_rate": 3.947826776256794e-05, "loss": 2.3251, "step": 4249000 }, { "epoch": 21.05, "learning_rate": 3.947702917614186e-05, "loss": 2.2942, "step": 4249500 }, { "epoch": 21.06, "learning_rate": 3.9475790589715775e-05, "loss": 2.2967, "step": 4250000 }, { "epoch": 21.06, "learning_rate": 3.947455200328969e-05, "loss": 2.3266, "step": 4250500 }, { "epoch": 21.06, "learning_rate": 3.947331341686361e-05, "loss": 2.3041, "step": 4251000 }, { "epoch": 21.06, "learning_rate": 3.947207730761037e-05, "loss": 2.3075, "step": 4251500 }, { "epoch": 21.07, "learning_rate": 3.947083872118429e-05, "loss": 2.3057, "step": 4252000 }, { "epoch": 21.07, "learning_rate": 3.9469600134758204e-05, "loss": 2.3371, "step": 4252500 }, { "epoch": 21.07, "learning_rate": 3.946836154833212e-05, "loss": 2.2894, "step": 4253000 }, { "epoch": 21.07, "learning_rate": 3.946712296190604e-05, "loss": 2.3192, "step": 4253500 }, { "epoch": 21.08, "learning_rate": 3.946588932982566e-05, "loss": 2.2943, "step": 4254000 }, { "epoch": 21.08, "learning_rate": 3.946465322057243e-05, "loss": 2.3263, "step": 4254500 }, { "epoch": 21.08, "learning_rate": 3.9463414634146345e-05, "loss": 2.292, "step": 4255000 }, { "epoch": 21.08, "learning_rate": 3.946217604772026e-05, "loss": 2.324, "step": 4255500 }, { "epoch": 21.09, "learning_rate": 3.946093746129418e-05, "loss": 2.3155, "step": 4256000 }, { "epoch": 21.09, "learning_rate": 3.9459698874868095e-05, "loss": 2.3074, "step": 4256500 }, { "epoch": 21.09, "learning_rate": 3.9458460288442006e-05, "loss": 2.2982, "step": 4257000 }, { "epoch": 21.09, "learning_rate": 3.945722170201592e-05, "loss": 2.3279, "step": 4257500 }, { "epoch": 21.1, "learning_rate": 3.945598311558984e-05, "loss": 2.3079, "step": 4258000 }, { "epoch": 21.1, "learning_rate": 3.9454744529163756e-05, "loss": 2.2971, "step": 4258500 }, { "epoch": 21.1, "learning_rate": 3.945350594273767e-05, "loss": 2.2837, "step": 4259000 }, { "epoch": 21.1, "learning_rate": 3.945226735631159e-05, "loss": 2.3128, "step": 4259500 }, { "epoch": 21.11, "learning_rate": 3.945103124705836e-05, "loss": 2.2928, "step": 4260000 }, { "epoch": 21.11, "learning_rate": 3.9449792660632276e-05, "loss": 2.3077, "step": 4260500 }, { "epoch": 21.11, "learning_rate": 3.944855407420619e-05, "loss": 2.2944, "step": 4261000 }, { "epoch": 21.11, "learning_rate": 3.944731548778011e-05, "loss": 2.3236, "step": 4261500 }, { "epoch": 21.12, "learning_rate": 3.944607690135402e-05, "loss": 2.3145, "step": 4262000 }, { "epoch": 21.12, "learning_rate": 3.944483831492794e-05, "loss": 2.2849, "step": 4262500 }, { "epoch": 21.12, "learning_rate": 3.9443602205674706e-05, "loss": 2.3208, "step": 4263000 }, { "epoch": 21.12, "learning_rate": 3.944236361924862e-05, "loss": 2.3103, "step": 4263500 }, { "epoch": 21.13, "learning_rate": 3.944112503282254e-05, "loss": 2.3213, "step": 4264000 }, { "epoch": 21.13, "learning_rate": 3.9439886446396456e-05, "loss": 2.3028, "step": 4264500 }, { "epoch": 21.13, "learning_rate": 3.943864785997037e-05, "loss": 2.3208, "step": 4265000 }, { "epoch": 21.13, "learning_rate": 3.943740927354429e-05, "loss": 2.2868, "step": 4265500 }, { "epoch": 21.14, "learning_rate": 3.943617316429106e-05, "loss": 2.3265, "step": 4266000 }, { "epoch": 21.14, "learning_rate": 3.9434934577864976e-05, "loss": 2.3104, "step": 4266500 }, { "epoch": 21.14, "learning_rate": 3.943369599143889e-05, "loss": 2.325, "step": 4267000 }, { "epoch": 21.14, "learning_rate": 3.943245740501281e-05, "loss": 2.3422, "step": 4267500 }, { "epoch": 21.15, "learning_rate": 3.943121881858673e-05, "loss": 2.3074, "step": 4268000 }, { "epoch": 21.15, "learning_rate": 3.9429980232160644e-05, "loss": 2.3229, "step": 4268500 }, { "epoch": 21.15, "learning_rate": 3.942874412290741e-05, "loss": 2.2827, "step": 4269000 }, { "epoch": 21.15, "learning_rate": 3.942750553648132e-05, "loss": 2.3152, "step": 4269500 }, { "epoch": 21.16, "learning_rate": 3.942626695005524e-05, "loss": 2.3225, "step": 4270000 }, { "epoch": 21.16, "learning_rate": 3.9425028363629157e-05, "loss": 2.3046, "step": 4270500 }, { "epoch": 21.16, "learning_rate": 3.9423789777203073e-05, "loss": 2.3032, "step": 4271000 }, { "epoch": 21.16, "learning_rate": 3.942255119077699e-05, "loss": 2.2956, "step": 4271500 }, { "epoch": 21.16, "learning_rate": 3.942131260435091e-05, "loss": 2.3496, "step": 4272000 }, { "epoch": 21.17, "learning_rate": 3.9420074017924824e-05, "loss": 2.3057, "step": 4272500 }, { "epoch": 21.17, "learning_rate": 3.941883790867159e-05, "loss": 2.2873, "step": 4273000 }, { "epoch": 21.17, "learning_rate": 3.941759932224551e-05, "loss": 2.2936, "step": 4273500 }, { "epoch": 21.17, "learning_rate": 3.941636073581943e-05, "loss": 2.3081, "step": 4274000 }, { "epoch": 21.18, "learning_rate": 3.9415122149393344e-05, "loss": 2.2982, "step": 4274500 }, { "epoch": 21.18, "learning_rate": 3.941388604014011e-05, "loss": 2.3144, "step": 4275000 }, { "epoch": 21.18, "learning_rate": 3.941264745371402e-05, "loss": 2.3072, "step": 4275500 }, { "epoch": 21.18, "learning_rate": 3.941140886728794e-05, "loss": 2.3279, "step": 4276000 }, { "epoch": 21.19, "learning_rate": 3.941017028086186e-05, "loss": 2.3102, "step": 4276500 }, { "epoch": 21.19, "learning_rate": 3.9408931694435774e-05, "loss": 2.298, "step": 4277000 }, { "epoch": 21.19, "learning_rate": 3.940769558518255e-05, "loss": 2.3229, "step": 4277500 }, { "epoch": 21.19, "learning_rate": 3.9406456998756466e-05, "loss": 2.3222, "step": 4278000 }, { "epoch": 21.2, "learning_rate": 3.940521841233038e-05, "loss": 2.2881, "step": 4278500 }, { "epoch": 21.2, "learning_rate": 3.940397982590429e-05, "loss": 2.3254, "step": 4279000 }, { "epoch": 21.2, "learning_rate": 3.940274123947821e-05, "loss": 2.3011, "step": 4279500 }, { "epoch": 21.2, "learning_rate": 3.940150265305213e-05, "loss": 2.3101, "step": 4280000 }, { "epoch": 21.21, "learning_rate": 3.9400264066626044e-05, "loss": 2.3173, "step": 4280500 }, { "epoch": 21.21, "learning_rate": 3.939902548019996e-05, "loss": 2.3111, "step": 4281000 }, { "epoch": 21.21, "learning_rate": 3.939778937094673e-05, "loss": 2.3002, "step": 4281500 }, { "epoch": 21.21, "learning_rate": 3.93965532616935e-05, "loss": 2.2919, "step": 4282000 }, { "epoch": 21.22, "learning_rate": 3.9395314675267415e-05, "loss": 2.2976, "step": 4282500 }, { "epoch": 21.22, "learning_rate": 3.9394078566014184e-05, "loss": 2.3086, "step": 4283000 }, { "epoch": 21.22, "learning_rate": 3.9392839979588094e-05, "loss": 2.3153, "step": 4283500 }, { "epoch": 21.22, "learning_rate": 3.939160387033486e-05, "loss": 2.3233, "step": 4284000 }, { "epoch": 21.23, "learning_rate": 3.939036528390878e-05, "loss": 2.3166, "step": 4284500 }, { "epoch": 21.23, "learning_rate": 3.9389129174655556e-05, "loss": 2.2879, "step": 4285000 }, { "epoch": 21.23, "learning_rate": 3.9387890588229466e-05, "loss": 2.2924, "step": 4285500 }, { "epoch": 21.23, "learning_rate": 3.938665200180338e-05, "loss": 2.3315, "step": 4286000 }, { "epoch": 21.24, "learning_rate": 3.93854134153773e-05, "loss": 2.2909, "step": 4286500 }, { "epoch": 21.24, "learning_rate": 3.938417482895122e-05, "loss": 2.3155, "step": 4287000 }, { "epoch": 21.24, "learning_rate": 3.9382936242525134e-05, "loss": 2.3186, "step": 4287500 }, { "epoch": 21.24, "learning_rate": 3.938169765609905e-05, "loss": 2.3072, "step": 4288000 }, { "epoch": 21.25, "learning_rate": 3.938046154684582e-05, "loss": 2.3222, "step": 4288500 }, { "epoch": 21.25, "learning_rate": 3.937922296041973e-05, "loss": 2.3089, "step": 4289000 }, { "epoch": 21.25, "learning_rate": 3.9377984373993646e-05, "loss": 2.3375, "step": 4289500 }, { "epoch": 21.25, "learning_rate": 3.937674578756756e-05, "loss": 2.3187, "step": 4290000 }, { "epoch": 21.26, "learning_rate": 3.937550720114148e-05, "loss": 2.3231, "step": 4290500 }, { "epoch": 21.26, "learning_rate": 3.93742686147154e-05, "loss": 2.2876, "step": 4291000 }, { "epoch": 21.26, "learning_rate": 3.9373030028289314e-05, "loss": 2.3264, "step": 4291500 }, { "epoch": 21.26, "learning_rate": 3.937179144186323e-05, "loss": 2.3282, "step": 4292000 }, { "epoch": 21.27, "learning_rate": 3.937055285543715e-05, "loss": 2.3319, "step": 4292500 }, { "epoch": 21.27, "learning_rate": 3.9369314269011065e-05, "loss": 2.3423, "step": 4293000 }, { "epoch": 21.27, "learning_rate": 3.936807568258498e-05, "loss": 2.286, "step": 4293500 }, { "epoch": 21.27, "learning_rate": 3.93668370961589e-05, "loss": 2.3192, "step": 4294000 }, { "epoch": 21.28, "learning_rate": 3.9365598509732816e-05, "loss": 2.3145, "step": 4294500 }, { "epoch": 21.28, "learning_rate": 3.936435992330673e-05, "loss": 2.3265, "step": 4295000 }, { "epoch": 21.28, "learning_rate": 3.936312133688065e-05, "loss": 2.334, "step": 4295500 }, { "epoch": 21.28, "learning_rate": 3.9361882750454566e-05, "loss": 2.3322, "step": 4296000 }, { "epoch": 21.29, "learning_rate": 3.9360646641201335e-05, "loss": 2.2928, "step": 4296500 }, { "epoch": 21.29, "learning_rate": 3.9359408054775245e-05, "loss": 2.314, "step": 4297000 }, { "epoch": 21.29, "learning_rate": 3.935816946834916e-05, "loss": 2.3296, "step": 4297500 }, { "epoch": 21.29, "learning_rate": 3.935693335909593e-05, "loss": 2.3279, "step": 4298000 }, { "epoch": 21.3, "learning_rate": 3.935569477266985e-05, "loss": 2.2969, "step": 4298500 }, { "epoch": 21.3, "learning_rate": 3.9354456186243765e-05, "loss": 2.3151, "step": 4299000 }, { "epoch": 21.3, "learning_rate": 3.935321759981768e-05, "loss": 2.3044, "step": 4299500 }, { "epoch": 21.3, "learning_rate": 3.935198149056445e-05, "loss": 2.3075, "step": 4300000 }, { "epoch": 21.31, "learning_rate": 3.935074290413837e-05, "loss": 2.2896, "step": 4300500 }, { "epoch": 21.31, "learning_rate": 3.9349504317712285e-05, "loss": 2.3119, "step": 4301000 }, { "epoch": 21.31, "learning_rate": 3.93482657312862e-05, "loss": 2.3079, "step": 4301500 }, { "epoch": 21.31, "learning_rate": 3.934702714486012e-05, "loss": 2.3173, "step": 4302000 }, { "epoch": 21.32, "learning_rate": 3.9345788558434035e-05, "loss": 2.3188, "step": 4302500 }, { "epoch": 21.32, "learning_rate": 3.934454997200795e-05, "loss": 2.2956, "step": 4303000 }, { "epoch": 21.32, "learning_rate": 3.934331138558186e-05, "loss": 2.3025, "step": 4303500 }, { "epoch": 21.32, "learning_rate": 3.934207279915578e-05, "loss": 2.3133, "step": 4304000 }, { "epoch": 21.33, "learning_rate": 3.9340834212729696e-05, "loss": 2.2965, "step": 4304500 }, { "epoch": 21.33, "learning_rate": 3.933959562630361e-05, "loss": 2.3176, "step": 4305000 }, { "epoch": 21.33, "learning_rate": 3.933835703987753e-05, "loss": 2.3177, "step": 4305500 }, { "epoch": 21.33, "learning_rate": 3.933711845345145e-05, "loss": 2.3032, "step": 4306000 }, { "epoch": 21.34, "learning_rate": 3.9335879867025364e-05, "loss": 2.2993, "step": 4306500 }, { "epoch": 21.34, "learning_rate": 3.9334641280599274e-05, "loss": 2.3499, "step": 4307000 }, { "epoch": 21.34, "learning_rate": 3.933340269417319e-05, "loss": 2.3083, "step": 4307500 }, { "epoch": 21.34, "learning_rate": 3.933216658491997e-05, "loss": 2.3264, "step": 4308000 }, { "epoch": 21.35, "learning_rate": 3.9330927998493884e-05, "loss": 2.3329, "step": 4308500 }, { "epoch": 21.35, "learning_rate": 3.93296894120678e-05, "loss": 2.2855, "step": 4309000 }, { "epoch": 21.35, "learning_rate": 3.932845082564172e-05, "loss": 2.3285, "step": 4309500 }, { "epoch": 21.35, "learning_rate": 3.932721223921563e-05, "loss": 2.2971, "step": 4310000 }, { "epoch": 21.36, "learning_rate": 3.9325976129962396e-05, "loss": 2.3157, "step": 4310500 }, { "epoch": 21.36, "learning_rate": 3.9324740020709165e-05, "loss": 2.3484, "step": 4311000 }, { "epoch": 21.36, "learning_rate": 3.932350143428308e-05, "loss": 2.3032, "step": 4311500 }, { "epoch": 21.36, "learning_rate": 3.932226532502985e-05, "loss": 2.3432, "step": 4312000 }, { "epoch": 21.37, "learning_rate": 3.932102673860377e-05, "loss": 2.3068, "step": 4312500 }, { "epoch": 21.37, "learning_rate": 3.9319788152177685e-05, "loss": 2.3444, "step": 4313000 }, { "epoch": 21.37, "learning_rate": 3.93185495657516e-05, "loss": 2.3294, "step": 4313500 }, { "epoch": 21.37, "learning_rate": 3.931731097932552e-05, "loss": 2.3024, "step": 4314000 }, { "epoch": 21.38, "learning_rate": 3.9316072392899436e-05, "loss": 2.3364, "step": 4314500 }, { "epoch": 21.38, "learning_rate": 3.931483380647335e-05, "loss": 2.3322, "step": 4315000 }, { "epoch": 21.38, "learning_rate": 3.931359522004727e-05, "loss": 2.3135, "step": 4315500 }, { "epoch": 21.38, "learning_rate": 3.9312356633621186e-05, "loss": 2.3176, "step": 4316000 }, { "epoch": 21.39, "learning_rate": 3.93111180471951e-05, "loss": 2.34, "step": 4316500 }, { "epoch": 21.39, "learning_rate": 3.9309879460769014e-05, "loss": 2.3245, "step": 4317000 }, { "epoch": 21.39, "learning_rate": 3.930864087434293e-05, "loss": 2.3008, "step": 4317500 }, { "epoch": 21.39, "learning_rate": 3.93074047650897e-05, "loss": 2.2976, "step": 4318000 }, { "epoch": 21.4, "learning_rate": 3.9306166178663616e-05, "loss": 2.3099, "step": 4318500 }, { "epoch": 21.4, "learning_rate": 3.9304930069410385e-05, "loss": 2.3081, "step": 4319000 }, { "epoch": 21.4, "learning_rate": 3.93036914829843e-05, "loss": 2.3236, "step": 4319500 }, { "epoch": 21.4, "learning_rate": 3.930245289655822e-05, "loss": 2.3435, "step": 4320000 }, { "epoch": 21.41, "learning_rate": 3.9301214310132136e-05, "loss": 2.3184, "step": 4320500 }, { "epoch": 21.41, "learning_rate": 3.929997572370605e-05, "loss": 2.3174, "step": 4321000 }, { "epoch": 21.41, "learning_rate": 3.929873713727997e-05, "loss": 2.3308, "step": 4321500 }, { "epoch": 21.41, "learning_rate": 3.9297498550853887e-05, "loss": 2.2802, "step": 4322000 }, { "epoch": 21.42, "learning_rate": 3.9296259964427804e-05, "loss": 2.2863, "step": 4322500 }, { "epoch": 21.42, "learning_rate": 3.929502137800172e-05, "loss": 2.3012, "step": 4323000 }, { "epoch": 21.42, "learning_rate": 3.929378526874848e-05, "loss": 2.3041, "step": 4323500 }, { "epoch": 21.42, "learning_rate": 3.929254915949525e-05, "loss": 2.343, "step": 4324000 }, { "epoch": 21.43, "learning_rate": 3.929131057306917e-05, "loss": 2.3007, "step": 4324500 }, { "epoch": 21.43, "learning_rate": 3.9290071986643085e-05, "loss": 2.3265, "step": 4325000 }, { "epoch": 21.43, "learning_rate": 3.9288833400217e-05, "loss": 2.3181, "step": 4325500 }, { "epoch": 21.43, "learning_rate": 3.928759481379092e-05, "loss": 2.3068, "step": 4326000 }, { "epoch": 21.43, "learning_rate": 3.9286356227364836e-05, "loss": 2.3173, "step": 4326500 }, { "epoch": 21.44, "learning_rate": 3.928511764093875e-05, "loss": 2.3145, "step": 4327000 }, { "epoch": 21.44, "learning_rate": 3.9283881531685515e-05, "loss": 2.3157, "step": 4327500 }, { "epoch": 21.44, "learning_rate": 3.928264294525943e-05, "loss": 2.3428, "step": 4328000 }, { "epoch": 21.44, "learning_rate": 3.928140435883335e-05, "loss": 2.3414, "step": 4328500 }, { "epoch": 21.45, "learning_rate": 3.9280165772407266e-05, "loss": 2.3233, "step": 4329000 }, { "epoch": 21.45, "learning_rate": 3.9278929663154034e-05, "loss": 2.3465, "step": 4329500 }, { "epoch": 21.45, "learning_rate": 3.927769107672795e-05, "loss": 2.3259, "step": 4330000 }, { "epoch": 21.45, "learning_rate": 3.927645249030187e-05, "loss": 2.3362, "step": 4330500 }, { "epoch": 21.46, "learning_rate": 3.9275213903875785e-05, "loss": 2.3229, "step": 4331000 }, { "epoch": 21.46, "learning_rate": 3.92739753174497e-05, "loss": 2.3119, "step": 4331500 }, { "epoch": 21.46, "learning_rate": 3.927273673102362e-05, "loss": 2.3136, "step": 4332000 }, { "epoch": 21.46, "learning_rate": 3.9271498144597536e-05, "loss": 2.332, "step": 4332500 }, { "epoch": 21.47, "learning_rate": 3.927025955817145e-05, "loss": 2.3427, "step": 4333000 }, { "epoch": 21.47, "learning_rate": 3.926902097174537e-05, "loss": 2.348, "step": 4333500 }, { "epoch": 21.47, "learning_rate": 3.926778486249213e-05, "loss": 2.3072, "step": 4334000 }, { "epoch": 21.47, "learning_rate": 3.926654627606605e-05, "loss": 2.2956, "step": 4334500 }, { "epoch": 21.48, "learning_rate": 3.9265310166812824e-05, "loss": 2.3138, "step": 4335000 }, { "epoch": 21.48, "learning_rate": 3.9264071580386735e-05, "loss": 2.296, "step": 4335500 }, { "epoch": 21.48, "learning_rate": 3.926283299396065e-05, "loss": 2.3195, "step": 4336000 }, { "epoch": 21.48, "learning_rate": 3.926159440753457e-05, "loss": 2.3196, "step": 4336500 }, { "epoch": 21.49, "learning_rate": 3.9260358298281344e-05, "loss": 2.2992, "step": 4337000 }, { "epoch": 21.49, "learning_rate": 3.925911971185526e-05, "loss": 2.3306, "step": 4337500 }, { "epoch": 21.49, "learning_rate": 3.925788112542918e-05, "loss": 2.3089, "step": 4338000 }, { "epoch": 21.49, "learning_rate": 3.925664253900309e-05, "loss": 2.3162, "step": 4338500 }, { "epoch": 21.5, "learning_rate": 3.9255403952577005e-05, "loss": 2.3234, "step": 4339000 }, { "epoch": 21.5, "learning_rate": 3.925416536615092e-05, "loss": 2.3258, "step": 4339500 }, { "epoch": 21.5, "learning_rate": 3.925292677972484e-05, "loss": 2.3317, "step": 4340000 }, { "epoch": 21.5, "learning_rate": 3.925168819329875e-05, "loss": 2.3034, "step": 4340500 }, { "epoch": 21.51, "learning_rate": 3.9250449606872666e-05, "loss": 2.3462, "step": 4341000 }, { "epoch": 21.51, "learning_rate": 3.924921102044658e-05, "loss": 2.3138, "step": 4341500 }, { "epoch": 21.51, "learning_rate": 3.924797491119335e-05, "loss": 2.3311, "step": 4342000 }, { "epoch": 21.51, "learning_rate": 3.924673632476727e-05, "loss": 2.3276, "step": 4342500 }, { "epoch": 21.52, "learning_rate": 3.9245497738341186e-05, "loss": 2.3321, "step": 4343000 }, { "epoch": 21.52, "learning_rate": 3.924426162908796e-05, "loss": 2.2764, "step": 4343500 }, { "epoch": 21.52, "learning_rate": 3.924302304266188e-05, "loss": 2.3127, "step": 4344000 }, { "epoch": 21.52, "learning_rate": 3.9241784456235795e-05, "loss": 2.3023, "step": 4344500 }, { "epoch": 21.53, "learning_rate": 3.9240545869809705e-05, "loss": 2.2963, "step": 4345000 }, { "epoch": 21.53, "learning_rate": 3.923930728338362e-05, "loss": 2.3223, "step": 4345500 }, { "epoch": 21.53, "learning_rate": 3.923806869695754e-05, "loss": 2.3039, "step": 4346000 }, { "epoch": 21.53, "learning_rate": 3.9236830110531456e-05, "loss": 2.3454, "step": 4346500 }, { "epoch": 21.54, "learning_rate": 3.923559152410537e-05, "loss": 2.3503, "step": 4347000 }, { "epoch": 21.54, "learning_rate": 3.923435293767928e-05, "loss": 2.3224, "step": 4347500 }, { "epoch": 21.54, "learning_rate": 3.923311682842605e-05, "loss": 2.3291, "step": 4348000 }, { "epoch": 21.54, "learning_rate": 3.923187824199997e-05, "loss": 2.3457, "step": 4348500 }, { "epoch": 21.55, "learning_rate": 3.9230639655573886e-05, "loss": 2.3133, "step": 4349000 }, { "epoch": 21.55, "learning_rate": 3.92294010691478e-05, "loss": 2.3439, "step": 4349500 }, { "epoch": 21.55, "learning_rate": 3.922816248272172e-05, "loss": 2.3106, "step": 4350000 }, { "epoch": 21.55, "learning_rate": 3.9226926373468495e-05, "loss": 2.3065, "step": 4350500 }, { "epoch": 21.56, "learning_rate": 3.922569026421526e-05, "loss": 2.3188, "step": 4351000 }, { "epoch": 21.56, "learning_rate": 3.9224451677789174e-05, "loss": 2.3561, "step": 4351500 }, { "epoch": 21.56, "learning_rate": 3.922321309136309e-05, "loss": 2.2994, "step": 4352000 }, { "epoch": 21.56, "learning_rate": 3.922197450493701e-05, "loss": 2.2983, "step": 4352500 }, { "epoch": 21.57, "learning_rate": 3.9220735918510925e-05, "loss": 2.2753, "step": 4353000 }, { "epoch": 21.57, "learning_rate": 3.921949733208484e-05, "loss": 2.3172, "step": 4353500 }, { "epoch": 21.57, "learning_rate": 3.921825874565876e-05, "loss": 2.3432, "step": 4354000 }, { "epoch": 21.57, "learning_rate": 3.921702015923267e-05, "loss": 2.3251, "step": 4354500 }, { "epoch": 21.58, "learning_rate": 3.9215781572806586e-05, "loss": 2.3145, "step": 4355000 }, { "epoch": 21.58, "learning_rate": 3.92145429863805e-05, "loss": 2.3169, "step": 4355500 }, { "epoch": 21.58, "learning_rate": 3.921330687712728e-05, "loss": 2.3224, "step": 4356000 }, { "epoch": 21.58, "learning_rate": 3.9212068290701195e-05, "loss": 2.2893, "step": 4356500 }, { "epoch": 21.59, "learning_rate": 3.921082970427511e-05, "loss": 2.3134, "step": 4357000 }, { "epoch": 21.59, "learning_rate": 3.920959111784902e-05, "loss": 2.3234, "step": 4357500 }, { "epoch": 21.59, "learning_rate": 3.920835500859579e-05, "loss": 2.2992, "step": 4358000 }, { "epoch": 21.59, "learning_rate": 3.920711642216971e-05, "loss": 2.3117, "step": 4358500 }, { "epoch": 21.6, "learning_rate": 3.9205877835743625e-05, "loss": 2.321, "step": 4359000 }, { "epoch": 21.6, "learning_rate": 3.920463924931754e-05, "loss": 2.2993, "step": 4359500 }, { "epoch": 21.6, "learning_rate": 3.920340314006431e-05, "loss": 2.319, "step": 4360000 }, { "epoch": 21.6, "learning_rate": 3.920216455363823e-05, "loss": 2.3492, "step": 4360500 }, { "epoch": 21.61, "learning_rate": 3.9200928444384996e-05, "loss": 2.3343, "step": 4361000 }, { "epoch": 21.61, "learning_rate": 3.919968985795891e-05, "loss": 2.3362, "step": 4361500 }, { "epoch": 21.61, "learning_rate": 3.9198451271532824e-05, "loss": 2.3155, "step": 4362000 }, { "epoch": 21.61, "learning_rate": 3.919721516227959e-05, "loss": 2.3279, "step": 4362500 }, { "epoch": 21.62, "learning_rate": 3.919597657585351e-05, "loss": 2.3272, "step": 4363000 }, { "epoch": 21.62, "learning_rate": 3.9194737989427426e-05, "loss": 2.3127, "step": 4363500 }, { "epoch": 21.62, "learning_rate": 3.919349940300134e-05, "loss": 2.3002, "step": 4364000 }, { "epoch": 21.62, "learning_rate": 3.919226081657526e-05, "loss": 2.3042, "step": 4364500 }, { "epoch": 21.63, "learning_rate": 3.919102223014918e-05, "loss": 2.302, "step": 4365000 }, { "epoch": 21.63, "learning_rate": 3.9189786120895946e-05, "loss": 2.3265, "step": 4365500 }, { "epoch": 21.63, "learning_rate": 3.918854753446986e-05, "loss": 2.3245, "step": 4366000 }, { "epoch": 21.63, "learning_rate": 3.918730894804378e-05, "loss": 2.3159, "step": 4366500 }, { "epoch": 21.64, "learning_rate": 3.918607283879055e-05, "loss": 2.3157, "step": 4367000 }, { "epoch": 21.64, "learning_rate": 3.918483425236446e-05, "loss": 2.3319, "step": 4367500 }, { "epoch": 21.64, "learning_rate": 3.9183595665938376e-05, "loss": 2.3131, "step": 4368000 }, { "epoch": 21.64, "learning_rate": 3.918235707951229e-05, "loss": 2.3323, "step": 4368500 }, { "epoch": 21.65, "learning_rate": 3.918111849308621e-05, "loss": 2.3466, "step": 4369000 }, { "epoch": 21.65, "learning_rate": 3.9179879906660126e-05, "loss": 2.3123, "step": 4369500 }, { "epoch": 21.65, "learning_rate": 3.917864132023404e-05, "loss": 2.296, "step": 4370000 }, { "epoch": 21.65, "learning_rate": 3.917740273380796e-05, "loss": 2.313, "step": 4370500 }, { "epoch": 21.66, "learning_rate": 3.917616414738188e-05, "loss": 2.3314, "step": 4371000 }, { "epoch": 21.66, "learning_rate": 3.9174928038128646e-05, "loss": 2.2958, "step": 4371500 }, { "epoch": 21.66, "learning_rate": 3.917368945170256e-05, "loss": 2.3307, "step": 4372000 }, { "epoch": 21.66, "learning_rate": 3.917245086527648e-05, "loss": 2.3187, "step": 4372500 }, { "epoch": 21.67, "learning_rate": 3.91712122788504e-05, "loss": 2.3269, "step": 4373000 }, { "epoch": 21.67, "learning_rate": 3.9169973692424314e-05, "loss": 2.3228, "step": 4373500 }, { "epoch": 21.67, "learning_rate": 3.916873510599823e-05, "loss": 2.3128, "step": 4374000 }, { "epoch": 21.67, "learning_rate": 3.916749651957215e-05, "loss": 2.3191, "step": 4374500 }, { "epoch": 21.68, "learning_rate": 3.9166257933146064e-05, "loss": 2.3091, "step": 4375000 }, { "epoch": 21.68, "learning_rate": 3.9165019346719975e-05, "loss": 2.3338, "step": 4375500 }, { "epoch": 21.68, "learning_rate": 3.916378076029389e-05, "loss": 2.3332, "step": 4376000 }, { "epoch": 21.68, "learning_rate": 3.916254465104066e-05, "loss": 2.3182, "step": 4376500 }, { "epoch": 21.69, "learning_rate": 3.916130606461458e-05, "loss": 2.3053, "step": 4377000 }, { "epoch": 21.69, "learning_rate": 3.9160067478188494e-05, "loss": 2.3355, "step": 4377500 }, { "epoch": 21.69, "learning_rate": 3.915882889176241e-05, "loss": 2.3166, "step": 4378000 }, { "epoch": 21.69, "learning_rate": 3.915759278250918e-05, "loss": 2.3133, "step": 4378500 }, { "epoch": 21.7, "learning_rate": 3.91563541960831e-05, "loss": 2.3149, "step": 4379000 }, { "epoch": 21.7, "learning_rate": 3.9155115609657014e-05, "loss": 2.3224, "step": 4379500 }, { "epoch": 21.7, "learning_rate": 3.915387702323093e-05, "loss": 2.3327, "step": 4380000 }, { "epoch": 21.7, "learning_rate": 3.915263843680485e-05, "loss": 2.3332, "step": 4380500 }, { "epoch": 21.7, "learning_rate": 3.9151399850378765e-05, "loss": 2.3271, "step": 4381000 }, { "epoch": 21.71, "learning_rate": 3.915016126395268e-05, "loss": 2.286, "step": 4381500 }, { "epoch": 21.71, "learning_rate": 3.914892267752659e-05, "loss": 2.3318, "step": 4382000 }, { "epoch": 21.71, "learning_rate": 3.914768409110051e-05, "loss": 2.3186, "step": 4382500 }, { "epoch": 21.71, "learning_rate": 3.9146445504674425e-05, "loss": 2.3394, "step": 4383000 }, { "epoch": 21.72, "learning_rate": 3.914520691824834e-05, "loss": 2.3411, "step": 4383500 }, { "epoch": 21.72, "learning_rate": 3.914396833182226e-05, "loss": 2.312, "step": 4384000 }, { "epoch": 21.72, "learning_rate": 3.914273222256903e-05, "loss": 2.3266, "step": 4384500 }, { "epoch": 21.72, "learning_rate": 3.9141493636142945e-05, "loss": 2.3351, "step": 4385000 }, { "epoch": 21.73, "learning_rate": 3.914025504971686e-05, "loss": 2.3116, "step": 4385500 }, { "epoch": 21.73, "learning_rate": 3.913901646329078e-05, "loss": 2.3258, "step": 4386000 }, { "epoch": 21.73, "learning_rate": 3.9137777876864696e-05, "loss": 2.3249, "step": 4386500 }, { "epoch": 21.73, "learning_rate": 3.9136541767611465e-05, "loss": 2.3161, "step": 4387000 }, { "epoch": 21.74, "learning_rate": 3.913530318118538e-05, "loss": 2.3343, "step": 4387500 }, { "epoch": 21.74, "learning_rate": 3.91340645947593e-05, "loss": 2.2996, "step": 4388000 }, { "epoch": 21.74, "learning_rate": 3.913282848550606e-05, "loss": 2.307, "step": 4388500 }, { "epoch": 21.74, "learning_rate": 3.913159485342569e-05, "loss": 2.308, "step": 4389000 }, { "epoch": 21.75, "learning_rate": 3.9130356266999605e-05, "loss": 2.3072, "step": 4389500 }, { "epoch": 21.75, "learning_rate": 3.9129117680573515e-05, "loss": 2.3254, "step": 4390000 }, { "epoch": 21.75, "learning_rate": 3.912787909414743e-05, "loss": 2.3295, "step": 4390500 }, { "epoch": 21.75, "learning_rate": 3.912664050772135e-05, "loss": 2.3067, "step": 4391000 }, { "epoch": 21.76, "learning_rate": 3.9125401921295266e-05, "loss": 2.3233, "step": 4391500 }, { "epoch": 21.76, "learning_rate": 3.912416333486918e-05, "loss": 2.3388, "step": 4392000 }, { "epoch": 21.76, "learning_rate": 3.912292474844309e-05, "loss": 2.2997, "step": 4392500 }, { "epoch": 21.76, "learning_rate": 3.912168616201701e-05, "loss": 2.3244, "step": 4393000 }, { "epoch": 21.77, "learning_rate": 3.912044757559093e-05, "loss": 2.3057, "step": 4393500 }, { "epoch": 21.77, "learning_rate": 3.9119208989164844e-05, "loss": 2.291, "step": 4394000 }, { "epoch": 21.77, "learning_rate": 3.911797040273876e-05, "loss": 2.3322, "step": 4394500 }, { "epoch": 21.77, "learning_rate": 3.911673181631268e-05, "loss": 2.3331, "step": 4395000 }, { "epoch": 21.78, "learning_rate": 3.9115493229886595e-05, "loss": 2.3059, "step": 4395500 }, { "epoch": 21.78, "learning_rate": 3.911425464346051e-05, "loss": 2.3113, "step": 4396000 }, { "epoch": 21.78, "learning_rate": 3.911301605703443e-05, "loss": 2.3046, "step": 4396500 }, { "epoch": 21.78, "learning_rate": 3.91117799477812e-05, "loss": 2.3182, "step": 4397000 }, { "epoch": 21.79, "learning_rate": 3.9110541361355114e-05, "loss": 2.3106, "step": 4397500 }, { "epoch": 21.79, "learning_rate": 3.910930277492903e-05, "loss": 2.3463, "step": 4398000 }, { "epoch": 21.79, "learning_rate": 3.910806418850295e-05, "loss": 2.3151, "step": 4398500 }, { "epoch": 21.79, "learning_rate": 3.9106825602076865e-05, "loss": 2.3151, "step": 4399000 }, { "epoch": 21.8, "learning_rate": 3.910558701565078e-05, "loss": 2.332, "step": 4399500 }, { "epoch": 21.8, "learning_rate": 3.91043484292247e-05, "loss": 2.3448, "step": 4400000 }, { "epoch": 21.8, "learning_rate": 3.9103109842798616e-05, "loss": 2.3485, "step": 4400500 }, { "epoch": 21.8, "learning_rate": 3.910187373354538e-05, "loss": 2.342, "step": 4401000 }, { "epoch": 21.81, "learning_rate": 3.9100635147119295e-05, "loss": 2.3263, "step": 4401500 }, { "epoch": 21.81, "learning_rate": 3.9099399037866063e-05, "loss": 2.3294, "step": 4402000 }, { "epoch": 21.81, "learning_rate": 3.909816292861284e-05, "loss": 2.3133, "step": 4402500 }, { "epoch": 21.81, "learning_rate": 3.9096924342186756e-05, "loss": 2.3127, "step": 4403000 }, { "epoch": 21.82, "learning_rate": 3.9095685755760666e-05, "loss": 2.3094, "step": 4403500 }, { "epoch": 21.82, "learning_rate": 3.909444716933458e-05, "loss": 2.2822, "step": 4404000 }, { "epoch": 21.82, "learning_rate": 3.90932085829085e-05, "loss": 2.3257, "step": 4404500 }, { "epoch": 21.82, "learning_rate": 3.909196999648242e-05, "loss": 2.2952, "step": 4405000 }, { "epoch": 21.83, "learning_rate": 3.9090731410056334e-05, "loss": 2.3526, "step": 4405500 }, { "epoch": 21.83, "learning_rate": 3.9089492823630244e-05, "loss": 2.3249, "step": 4406000 }, { "epoch": 21.83, "learning_rate": 3.908825423720416e-05, "loss": 2.3282, "step": 4406500 }, { "epoch": 21.83, "learning_rate": 3.9087018127950936e-05, "loss": 2.3322, "step": 4407000 }, { "epoch": 21.84, "learning_rate": 3.9085779541524853e-05, "loss": 2.3248, "step": 4407500 }, { "epoch": 21.84, "learning_rate": 3.9084540955098764e-05, "loss": 2.325, "step": 4408000 }, { "epoch": 21.84, "learning_rate": 3.908330236867268e-05, "loss": 2.3289, "step": 4408500 }, { "epoch": 21.84, "learning_rate": 3.90820637822466e-05, "loss": 2.3138, "step": 4409000 }, { "epoch": 21.85, "learning_rate": 3.908082767299337e-05, "loss": 2.3137, "step": 4409500 }, { "epoch": 21.85, "learning_rate": 3.907958908656728e-05, "loss": 2.3257, "step": 4410000 }, { "epoch": 21.85, "learning_rate": 3.90783505001412e-05, "loss": 2.322, "step": 4410500 }, { "epoch": 21.85, "learning_rate": 3.907711191371512e-05, "loss": 2.3273, "step": 4411000 }, { "epoch": 21.86, "learning_rate": 3.9075873327289034e-05, "loss": 2.3332, "step": 4411500 }, { "epoch": 21.86, "learning_rate": 3.907463474086295e-05, "loss": 2.337, "step": 4412000 }, { "epoch": 21.86, "learning_rate": 3.907339863160972e-05, "loss": 2.3351, "step": 4412500 }, { "epoch": 21.86, "learning_rate": 3.9072160045183637e-05, "loss": 2.3349, "step": 4413000 }, { "epoch": 21.87, "learning_rate": 3.9070921458757554e-05, "loss": 2.3297, "step": 4413500 }, { "epoch": 21.87, "learning_rate": 3.906968287233147e-05, "loss": 2.3183, "step": 4414000 }, { "epoch": 21.87, "learning_rate": 3.906844428590538e-05, "loss": 2.3166, "step": 4414500 }, { "epoch": 21.87, "learning_rate": 3.90672056994793e-05, "loss": 2.317, "step": 4415000 }, { "epoch": 21.88, "learning_rate": 3.9065967113053214e-05, "loss": 2.3059, "step": 4415500 }, { "epoch": 21.88, "learning_rate": 3.906472852662713e-05, "loss": 2.3185, "step": 4416000 }, { "epoch": 21.88, "learning_rate": 3.906349241737391e-05, "loss": 2.3452, "step": 4416500 }, { "epoch": 21.88, "learning_rate": 3.906225383094782e-05, "loss": 2.32, "step": 4417000 }, { "epoch": 21.89, "learning_rate": 3.9061015244521734e-05, "loss": 2.3385, "step": 4417500 }, { "epoch": 21.89, "learning_rate": 3.905977665809565e-05, "loss": 2.3351, "step": 4418000 }, { "epoch": 21.89, "learning_rate": 3.905853807166957e-05, "loss": 2.3168, "step": 4418500 }, { "epoch": 21.89, "learning_rate": 3.905730196241634e-05, "loss": 2.3263, "step": 4419000 }, { "epoch": 21.9, "learning_rate": 3.9056063375990254e-05, "loss": 2.331, "step": 4419500 }, { "epoch": 21.9, "learning_rate": 3.905482478956417e-05, "loss": 2.3449, "step": 4420000 }, { "epoch": 21.9, "learning_rate": 3.905358868031094e-05, "loss": 2.327, "step": 4420500 }, { "epoch": 21.9, "learning_rate": 3.9052350093884856e-05, "loss": 2.3243, "step": 4421000 }, { "epoch": 21.91, "learning_rate": 3.905111150745877e-05, "loss": 2.3226, "step": 4421500 }, { "epoch": 21.91, "learning_rate": 3.904987292103269e-05, "loss": 2.3242, "step": 4422000 }, { "epoch": 21.91, "learning_rate": 3.904863433460661e-05, "loss": 2.3421, "step": 4422500 }, { "epoch": 21.91, "learning_rate": 3.9047395748180524e-05, "loss": 2.3393, "step": 4423000 }, { "epoch": 21.92, "learning_rate": 3.9046157161754434e-05, "loss": 2.3457, "step": 4423500 }, { "epoch": 21.92, "learning_rate": 3.904491857532835e-05, "loss": 2.3174, "step": 4424000 }, { "epoch": 21.92, "learning_rate": 3.904367998890227e-05, "loss": 2.3136, "step": 4424500 }, { "epoch": 21.92, "learning_rate": 3.904244387964904e-05, "loss": 2.311, "step": 4425000 }, { "epoch": 21.93, "learning_rate": 3.9041205293222954e-05, "loss": 2.3206, "step": 4425500 }, { "epoch": 21.93, "learning_rate": 3.903996670679687e-05, "loss": 2.3157, "step": 4426000 }, { "epoch": 21.93, "learning_rate": 3.903872812037079e-05, "loss": 2.3178, "step": 4426500 }, { "epoch": 21.93, "learning_rate": 3.90374895339447e-05, "loss": 2.3013, "step": 4427000 }, { "epoch": 21.94, "learning_rate": 3.9036250947518615e-05, "loss": 2.3279, "step": 4427500 }, { "epoch": 21.94, "learning_rate": 3.903501236109253e-05, "loss": 2.3246, "step": 4428000 }, { "epoch": 21.94, "learning_rate": 3.903377377466645e-05, "loss": 2.3211, "step": 4428500 }, { "epoch": 21.94, "learning_rate": 3.9032535188240365e-05, "loss": 2.3386, "step": 4429000 }, { "epoch": 21.95, "learning_rate": 3.903129907898714e-05, "loss": 2.3297, "step": 4429500 }, { "epoch": 21.95, "learning_rate": 3.903006049256105e-05, "loss": 2.3189, "step": 4430000 }, { "epoch": 21.95, "learning_rate": 3.902882438330782e-05, "loss": 2.3271, "step": 4430500 }, { "epoch": 21.95, "learning_rate": 3.902758579688174e-05, "loss": 2.2818, "step": 4431000 }, { "epoch": 21.96, "learning_rate": 3.9026347210455654e-05, "loss": 2.2994, "step": 4431500 }, { "epoch": 21.96, "learning_rate": 3.902510862402957e-05, "loss": 2.3206, "step": 4432000 }, { "epoch": 21.96, "learning_rate": 3.902387003760349e-05, "loss": 2.3146, "step": 4432500 }, { "epoch": 21.96, "learning_rate": 3.90226314511774e-05, "loss": 2.325, "step": 4433000 }, { "epoch": 21.97, "learning_rate": 3.9021392864751315e-05, "loss": 2.3225, "step": 4433500 }, { "epoch": 21.97, "learning_rate": 3.902015675549809e-05, "loss": 2.3511, "step": 4434000 }, { "epoch": 21.97, "learning_rate": 3.901891816907201e-05, "loss": 2.307, "step": 4434500 }, { "epoch": 21.97, "learning_rate": 3.9017679582645924e-05, "loss": 2.3254, "step": 4435000 }, { "epoch": 21.98, "learning_rate": 3.901644099621984e-05, "loss": 2.3352, "step": 4435500 }, { "epoch": 21.98, "learning_rate": 3.901520240979376e-05, "loss": 2.337, "step": 4436000 }, { "epoch": 21.98, "learning_rate": 3.901396630054052e-05, "loss": 2.3028, "step": 4436500 }, { "epoch": 21.98, "learning_rate": 3.901272771411444e-05, "loss": 2.3097, "step": 4437000 }, { "epoch": 21.98, "learning_rate": 3.9011489127688354e-05, "loss": 2.3093, "step": 4437500 }, { "epoch": 21.99, "learning_rate": 3.901025054126227e-05, "loss": 2.348, "step": 4438000 }, { "epoch": 21.99, "learning_rate": 3.900901195483619e-05, "loss": 2.3076, "step": 4438500 }, { "epoch": 21.99, "learning_rate": 3.900777584558296e-05, "loss": 2.3348, "step": 4439000 }, { "epoch": 21.99, "learning_rate": 3.9006537259156874e-05, "loss": 2.3648, "step": 4439500 }, { "epoch": 22.0, "learning_rate": 3.900529867273079e-05, "loss": 2.3346, "step": 4440000 }, { "epoch": 22.0, "learning_rate": 3.900406008630471e-05, "loss": 2.3289, "step": 4440500 }, { "epoch": 22.0, "eval_accuracy": 0.6535946905725298, "eval_accuracy_mlm": 0.6084227722805187, "eval_accuracy_nsp": 0.86680995767947, "eval_loss": 2.3525571823120117, "eval_runtime": 145.9518, "eval_samples_per_second": 1746.871, "eval_steps_per_second": 72.791, "step": 4440546 }, { "epoch": 22.0, "learning_rate": 3.900282397705147e-05, "loss": 2.2772, "step": 4441000 }, { "epoch": 22.0, "learning_rate": 3.9001585390625386e-05, "loss": 2.2912, "step": 4441500 }, { "epoch": 22.01, "learning_rate": 3.90003468041993e-05, "loss": 2.282, "step": 4442000 }, { "epoch": 22.01, "learning_rate": 3.899911069494607e-05, "loss": 2.2739, "step": 4442500 }, { "epoch": 22.01, "learning_rate": 3.899787210851999e-05, "loss": 2.2682, "step": 4443000 }, { "epoch": 22.01, "learning_rate": 3.8996633522093906e-05, "loss": 2.2959, "step": 4443500 }, { "epoch": 22.02, "learning_rate": 3.899539493566782e-05, "loss": 2.2891, "step": 4444000 }, { "epoch": 22.02, "learning_rate": 3.899415634924174e-05, "loss": 2.2862, "step": 4444500 }, { "epoch": 22.02, "learning_rate": 3.899291776281566e-05, "loss": 2.3008, "step": 4445000 }, { "epoch": 22.02, "learning_rate": 3.8991679176389574e-05, "loss": 2.2792, "step": 4445500 }, { "epoch": 22.03, "learning_rate": 3.899044058996349e-05, "loss": 2.2976, "step": 4446000 }, { "epoch": 22.03, "learning_rate": 3.898920448071026e-05, "loss": 2.303, "step": 4446500 }, { "epoch": 22.03, "learning_rate": 3.898796589428417e-05, "loss": 2.2843, "step": 4447000 }, { "epoch": 22.03, "learning_rate": 3.8986727307858087e-05, "loss": 2.2869, "step": 4447500 }, { "epoch": 22.04, "learning_rate": 3.8985488721432003e-05, "loss": 2.2573, "step": 4448000 }, { "epoch": 22.04, "learning_rate": 3.898425013500592e-05, "loss": 2.2712, "step": 4448500 }, { "epoch": 22.04, "learning_rate": 3.898301154857984e-05, "loss": 2.275, "step": 4449000 }, { "epoch": 22.04, "learning_rate": 3.8981772962153754e-05, "loss": 2.2868, "step": 4449500 }, { "epoch": 22.05, "learning_rate": 3.898053437572767e-05, "loss": 2.3145, "step": 4450000 }, { "epoch": 22.05, "learning_rate": 3.897929578930159e-05, "loss": 2.3197, "step": 4450500 }, { "epoch": 22.05, "learning_rate": 3.8978057202875505e-05, "loss": 2.2986, "step": 4451000 }, { "epoch": 22.05, "learning_rate": 3.8976821093622274e-05, "loss": 2.2807, "step": 4451500 }, { "epoch": 22.06, "learning_rate": 3.897558498436904e-05, "loss": 2.3027, "step": 4452000 }, { "epoch": 22.06, "learning_rate": 3.897434639794296e-05, "loss": 2.2952, "step": 4452500 }, { "epoch": 22.06, "learning_rate": 3.8973107811516877e-05, "loss": 2.2791, "step": 4453000 }, { "epoch": 22.06, "learning_rate": 3.8971869225090793e-05, "loss": 2.2952, "step": 4453500 }, { "epoch": 22.07, "learning_rate": 3.8970630638664704e-05, "loss": 2.3056, "step": 4454000 }, { "epoch": 22.07, "learning_rate": 3.896939205223862e-05, "loss": 2.3098, "step": 4454500 }, { "epoch": 22.07, "learning_rate": 3.896815346581254e-05, "loss": 2.2978, "step": 4455000 }, { "epoch": 22.07, "learning_rate": 3.8966914879386454e-05, "loss": 2.306, "step": 4455500 }, { "epoch": 22.08, "learning_rate": 3.896567629296037e-05, "loss": 2.2921, "step": 4456000 }, { "epoch": 22.08, "learning_rate": 3.896443770653429e-05, "loss": 2.3061, "step": 4456500 }, { "epoch": 22.08, "learning_rate": 3.896320159728106e-05, "loss": 2.2947, "step": 4457000 }, { "epoch": 22.08, "learning_rate": 3.8961963010854974e-05, "loss": 2.2962, "step": 4457500 }, { "epoch": 22.09, "learning_rate": 3.896072442442889e-05, "loss": 2.2948, "step": 4458000 }, { "epoch": 22.09, "learning_rate": 3.895948831517566e-05, "loss": 2.3185, "step": 4458500 }, { "epoch": 22.09, "learning_rate": 3.895824972874958e-05, "loss": 2.3078, "step": 4459000 }, { "epoch": 22.09, "learning_rate": 3.8957011142323494e-05, "loss": 2.303, "step": 4459500 }, { "epoch": 22.1, "learning_rate": 3.8955775033070256e-05, "loss": 2.2979, "step": 4460000 }, { "epoch": 22.1, "learning_rate": 3.895453644664417e-05, "loss": 2.2898, "step": 4460500 }, { "epoch": 22.1, "learning_rate": 3.895330033739095e-05, "loss": 2.3058, "step": 4461000 }, { "epoch": 22.1, "learning_rate": 3.8952061750964865e-05, "loss": 2.2828, "step": 4461500 }, { "epoch": 22.11, "learning_rate": 3.8950823164538775e-05, "loss": 2.3222, "step": 4462000 }, { "epoch": 22.11, "learning_rate": 3.894958457811269e-05, "loss": 2.3003, "step": 4462500 }, { "epoch": 22.11, "learning_rate": 3.894834599168661e-05, "loss": 2.2981, "step": 4463000 }, { "epoch": 22.11, "learning_rate": 3.8947107405260526e-05, "loss": 2.3091, "step": 4463500 }, { "epoch": 22.12, "learning_rate": 3.894586881883444e-05, "loss": 2.271, "step": 4464000 }, { "epoch": 22.12, "learning_rate": 3.894463023240836e-05, "loss": 2.334, "step": 4464500 }, { "epoch": 22.12, "learning_rate": 3.894339164598228e-05, "loss": 2.2926, "step": 4465000 }, { "epoch": 22.12, "learning_rate": 3.8942153059556194e-05, "loss": 2.3064, "step": 4465500 }, { "epoch": 22.13, "learning_rate": 3.894091447313011e-05, "loss": 2.2886, "step": 4466000 }, { "epoch": 22.13, "learning_rate": 3.893967588670403e-05, "loss": 2.2827, "step": 4466500 }, { "epoch": 22.13, "learning_rate": 3.8938437300277944e-05, "loss": 2.2968, "step": 4467000 }, { "epoch": 22.13, "learning_rate": 3.8937201191024707e-05, "loss": 2.298, "step": 4467500 }, { "epoch": 22.14, "learning_rate": 3.8935962604598623e-05, "loss": 2.2935, "step": 4468000 }, { "epoch": 22.14, "learning_rate": 3.893472401817254e-05, "loss": 2.2881, "step": 4468500 }, { "epoch": 22.14, "learning_rate": 3.893348543174646e-05, "loss": 2.2915, "step": 4469000 }, { "epoch": 22.14, "learning_rate": 3.8932246845320374e-05, "loss": 2.3102, "step": 4469500 }, { "epoch": 22.15, "learning_rate": 3.893100825889429e-05, "loss": 2.3013, "step": 4470000 }, { "epoch": 22.15, "learning_rate": 3.892977214964106e-05, "loss": 2.285, "step": 4470500 }, { "epoch": 22.15, "learning_rate": 3.892853356321498e-05, "loss": 2.2956, "step": 4471000 }, { "epoch": 22.15, "learning_rate": 3.892729745396174e-05, "loss": 2.3063, "step": 4471500 }, { "epoch": 22.16, "learning_rate": 3.8926058867535656e-05, "loss": 2.3184, "step": 4472000 }, { "epoch": 22.16, "learning_rate": 3.892482028110957e-05, "loss": 2.2951, "step": 4472500 }, { "epoch": 22.16, "learning_rate": 3.892358169468349e-05, "loss": 2.2968, "step": 4473000 }, { "epoch": 22.16, "learning_rate": 3.892234310825741e-05, "loss": 2.2868, "step": 4473500 }, { "epoch": 22.17, "learning_rate": 3.892110699900418e-05, "loss": 2.2937, "step": 4474000 }, { "epoch": 22.17, "learning_rate": 3.891986841257809e-05, "loss": 2.2953, "step": 4474500 }, { "epoch": 22.17, "learning_rate": 3.891862982615201e-05, "loss": 2.3181, "step": 4475000 }, { "epoch": 22.17, "learning_rate": 3.8917391239725926e-05, "loss": 2.2859, "step": 4475500 }, { "epoch": 22.18, "learning_rate": 3.891615265329984e-05, "loss": 2.3111, "step": 4476000 }, { "epoch": 22.18, "learning_rate": 3.891491406687376e-05, "loss": 2.3094, "step": 4476500 }, { "epoch": 22.18, "learning_rate": 3.891367548044768e-05, "loss": 2.3165, "step": 4477000 }, { "epoch": 22.18, "learning_rate": 3.8912436894021594e-05, "loss": 2.3021, "step": 4477500 }, { "epoch": 22.19, "learning_rate": 3.891119830759551e-05, "loss": 2.2889, "step": 4478000 }, { "epoch": 22.19, "learning_rate": 3.890995972116943e-05, "loss": 2.3061, "step": 4478500 }, { "epoch": 22.19, "learning_rate": 3.8908721134743345e-05, "loss": 2.303, "step": 4479000 }, { "epoch": 22.19, "learning_rate": 3.890748254831726e-05, "loss": 2.2969, "step": 4479500 }, { "epoch": 22.2, "learning_rate": 3.890624891623688e-05, "loss": 2.2988, "step": 4480000 }, { "epoch": 22.2, "learning_rate": 3.890501032981079e-05, "loss": 2.3114, "step": 4480500 }, { "epoch": 22.2, "learning_rate": 3.890377174338471e-05, "loss": 2.3078, "step": 4481000 }, { "epoch": 22.2, "learning_rate": 3.8902533156958626e-05, "loss": 2.3274, "step": 4481500 }, { "epoch": 22.21, "learning_rate": 3.890129457053254e-05, "loss": 2.296, "step": 4482000 }, { "epoch": 22.21, "learning_rate": 3.890005598410646e-05, "loss": 2.3063, "step": 4482500 }, { "epoch": 22.21, "learning_rate": 3.889881739768038e-05, "loss": 2.2988, "step": 4483000 }, { "epoch": 22.21, "learning_rate": 3.8897578811254294e-05, "loss": 2.2946, "step": 4483500 }, { "epoch": 22.22, "learning_rate": 3.889634022482821e-05, "loss": 2.2692, "step": 4484000 }, { "epoch": 22.22, "learning_rate": 3.889510411557497e-05, "loss": 2.2717, "step": 4484500 }, { "epoch": 22.22, "learning_rate": 3.889386552914889e-05, "loss": 2.3011, "step": 4485000 }, { "epoch": 22.22, "learning_rate": 3.889262694272281e-05, "loss": 2.2948, "step": 4485500 }, { "epoch": 22.23, "learning_rate": 3.8891388356296724e-05, "loss": 2.2936, "step": 4486000 }, { "epoch": 22.23, "learning_rate": 3.889014976987064e-05, "loss": 2.2986, "step": 4486500 }, { "epoch": 22.23, "learning_rate": 3.888891366061741e-05, "loss": 2.3014, "step": 4487000 }, { "epoch": 22.23, "learning_rate": 3.8887675074191326e-05, "loss": 2.3245, "step": 4487500 }, { "epoch": 22.24, "learning_rate": 3.8886436487765243e-05, "loss": 2.3155, "step": 4488000 }, { "epoch": 22.24, "learning_rate": 3.888520037851201e-05, "loss": 2.3137, "step": 4488500 }, { "epoch": 22.24, "learning_rate": 3.888396179208593e-05, "loss": 2.3031, "step": 4489000 }, { "epoch": 22.24, "learning_rate": 3.8882723205659846e-05, "loss": 2.3025, "step": 4489500 }, { "epoch": 22.25, "learning_rate": 3.888148461923376e-05, "loss": 2.3273, "step": 4490000 }, { "epoch": 22.25, "learning_rate": 3.888024850998053e-05, "loss": 2.3053, "step": 4490500 }, { "epoch": 22.25, "learning_rate": 3.887900992355445e-05, "loss": 2.3006, "step": 4491000 }, { "epoch": 22.25, "learning_rate": 3.8877771337128366e-05, "loss": 2.3109, "step": 4491500 }, { "epoch": 22.25, "learning_rate": 3.887653275070228e-05, "loss": 2.3198, "step": 4492000 }, { "epoch": 22.26, "learning_rate": 3.887529664144905e-05, "loss": 2.3127, "step": 4492500 }, { "epoch": 22.26, "learning_rate": 3.887405805502297e-05, "loss": 2.2989, "step": 4493000 }, { "epoch": 22.26, "learning_rate": 3.8872819468596885e-05, "loss": 2.3253, "step": 4493500 }, { "epoch": 22.26, "learning_rate": 3.88715808821708e-05, "loss": 2.2805, "step": 4494000 }, { "epoch": 22.27, "learning_rate": 3.887034229574472e-05, "loss": 2.3011, "step": 4494500 }, { "epoch": 22.27, "learning_rate": 3.8869103709318636e-05, "loss": 2.3047, "step": 4495000 }, { "epoch": 22.27, "learning_rate": 3.8867865122892546e-05, "loss": 2.2922, "step": 4495500 }, { "epoch": 22.27, "learning_rate": 3.886662653646646e-05, "loss": 2.2869, "step": 4496000 }, { "epoch": 22.28, "learning_rate": 3.886538795004038e-05, "loss": 2.3214, "step": 4496500 }, { "epoch": 22.28, "learning_rate": 3.88641493636143e-05, "loss": 2.3244, "step": 4497000 }, { "epoch": 22.28, "learning_rate": 3.8862910777188214e-05, "loss": 2.3028, "step": 4497500 }, { "epoch": 22.28, "learning_rate": 3.8861672190762124e-05, "loss": 2.3, "step": 4498000 }, { "epoch": 22.29, "learning_rate": 3.88604360815089e-05, "loss": 2.2982, "step": 4498500 }, { "epoch": 22.29, "learning_rate": 3.8859197495082817e-05, "loss": 2.3043, "step": 4499000 }, { "epoch": 22.29, "learning_rate": 3.885795890865673e-05, "loss": 2.3042, "step": 4499500 }, { "epoch": 22.29, "learning_rate": 3.8856720322230644e-05, "loss": 2.3022, "step": 4500000 }, { "epoch": 22.3, "learning_rate": 3.885548173580456e-05, "loss": 2.3037, "step": 4500500 }, { "epoch": 22.3, "learning_rate": 3.885424314937848e-05, "loss": 2.2896, "step": 4501000 }, { "epoch": 22.3, "learning_rate": 3.88530095172981e-05, "loss": 2.2943, "step": 4501500 }, { "epoch": 22.3, "learning_rate": 3.8851770930872015e-05, "loss": 2.2988, "step": 4502000 }, { "epoch": 22.31, "learning_rate": 3.8850534821618784e-05, "loss": 2.3111, "step": 4502500 }, { "epoch": 22.31, "learning_rate": 3.88492962351927e-05, "loss": 2.3171, "step": 4503000 }, { "epoch": 22.31, "learning_rate": 3.884805764876662e-05, "loss": 2.2992, "step": 4503500 }, { "epoch": 22.31, "learning_rate": 3.8846819062340535e-05, "loss": 2.3312, "step": 4504000 }, { "epoch": 22.32, "learning_rate": 3.884558047591445e-05, "loss": 2.3045, "step": 4504500 }, { "epoch": 22.32, "learning_rate": 3.884434188948837e-05, "loss": 2.3171, "step": 4505000 }, { "epoch": 22.32, "learning_rate": 3.8843103303062286e-05, "loss": 2.3009, "step": 4505500 }, { "epoch": 22.32, "learning_rate": 3.88418647166362e-05, "loss": 2.3001, "step": 4506000 }, { "epoch": 22.33, "learning_rate": 3.884062613021012e-05, "loss": 2.3133, "step": 4506500 }, { "epoch": 22.33, "learning_rate": 3.883939002095688e-05, "loss": 2.2882, "step": 4507000 }, { "epoch": 22.33, "learning_rate": 3.88381514345308e-05, "loss": 2.3329, "step": 4507500 }, { "epoch": 22.33, "learning_rate": 3.8836912848104715e-05, "loss": 2.3059, "step": 4508000 }, { "epoch": 22.34, "learning_rate": 3.883567426167863e-05, "loss": 2.2917, "step": 4508500 }, { "epoch": 22.34, "learning_rate": 3.883443567525255e-05, "loss": 2.2894, "step": 4509000 }, { "epoch": 22.34, "learning_rate": 3.8833197088826466e-05, "loss": 2.3078, "step": 4509500 }, { "epoch": 22.34, "learning_rate": 3.883195850240038e-05, "loss": 2.3134, "step": 4510000 }, { "epoch": 22.35, "learning_rate": 3.88307199159743e-05, "loss": 2.3113, "step": 4510500 }, { "epoch": 22.35, "learning_rate": 3.882948132954822e-05, "loss": 2.2759, "step": 4511000 }, { "epoch": 22.35, "learning_rate": 3.8828242743122134e-05, "loss": 2.3251, "step": 4511500 }, { "epoch": 22.35, "learning_rate": 3.8827004156696044e-05, "loss": 2.3013, "step": 4512000 }, { "epoch": 22.36, "learning_rate": 3.882576557026996e-05, "loss": 2.2991, "step": 4512500 }, { "epoch": 22.36, "learning_rate": 3.882453193818958e-05, "loss": 2.3148, "step": 4513000 }, { "epoch": 22.36, "learning_rate": 3.88232933517635e-05, "loss": 2.3174, "step": 4513500 }, { "epoch": 22.36, "learning_rate": 3.882205724251027e-05, "loss": 2.2938, "step": 4514000 }, { "epoch": 22.37, "learning_rate": 3.8820818656084184e-05, "loss": 2.3036, "step": 4514500 }, { "epoch": 22.37, "learning_rate": 3.88195800696581e-05, "loss": 2.2946, "step": 4515000 }, { "epoch": 22.37, "learning_rate": 3.881834148323202e-05, "loss": 2.3099, "step": 4515500 }, { "epoch": 22.37, "learning_rate": 3.8817102896805935e-05, "loss": 2.3022, "step": 4516000 }, { "epoch": 22.38, "learning_rate": 3.881586431037985e-05, "loss": 2.309, "step": 4516500 }, { "epoch": 22.38, "learning_rate": 3.881462820112662e-05, "loss": 2.2991, "step": 4517000 }, { "epoch": 22.38, "learning_rate": 3.881338961470054e-05, "loss": 2.2815, "step": 4517500 }, { "epoch": 22.38, "learning_rate": 3.8812151028274455e-05, "loss": 2.3127, "step": 4518000 }, { "epoch": 22.39, "learning_rate": 3.881091244184837e-05, "loss": 2.3122, "step": 4518500 }, { "epoch": 22.39, "learning_rate": 3.880967385542228e-05, "loss": 2.305, "step": 4519000 }, { "epoch": 22.39, "learning_rate": 3.88084352689962e-05, "loss": 2.3096, "step": 4519500 }, { "epoch": 22.39, "learning_rate": 3.8807196682570116e-05, "loss": 2.2711, "step": 4520000 }, { "epoch": 22.4, "learning_rate": 3.880595809614403e-05, "loss": 2.3383, "step": 4520500 }, { "epoch": 22.4, "learning_rate": 3.880471950971795e-05, "loss": 2.2977, "step": 4521000 }, { "epoch": 22.4, "learning_rate": 3.8803480923291866e-05, "loss": 2.3228, "step": 4521500 }, { "epoch": 22.4, "learning_rate": 3.880224233686578e-05, "loss": 2.3133, "step": 4522000 }, { "epoch": 22.41, "learning_rate": 3.880100622761255e-05, "loss": 2.3178, "step": 4522500 }, { "epoch": 22.41, "learning_rate": 3.879976764118647e-05, "loss": 2.2977, "step": 4523000 }, { "epoch": 22.41, "learning_rate": 3.8798529054760386e-05, "loss": 2.3113, "step": 4523500 }, { "epoch": 22.41, "learning_rate": 3.87972904683343e-05, "loss": 2.2864, "step": 4524000 }, { "epoch": 22.42, "learning_rate": 3.879605435908107e-05, "loss": 2.2973, "step": 4524500 }, { "epoch": 22.42, "learning_rate": 3.879481577265499e-05, "loss": 2.3181, "step": 4525000 }, { "epoch": 22.42, "learning_rate": 3.8793577186228905e-05, "loss": 2.3087, "step": 4525500 }, { "epoch": 22.42, "learning_rate": 3.8792338599802816e-05, "loss": 2.3024, "step": 4526000 }, { "epoch": 22.43, "learning_rate": 3.879110001337673e-05, "loss": 2.297, "step": 4526500 }, { "epoch": 22.43, "learning_rate": 3.878986142695065e-05, "loss": 2.2964, "step": 4527000 }, { "epoch": 22.43, "learning_rate": 3.878862531769742e-05, "loss": 2.3192, "step": 4527500 }, { "epoch": 22.43, "learning_rate": 3.8787386731271335e-05, "loss": 2.2818, "step": 4528000 }, { "epoch": 22.44, "learning_rate": 3.878614814484525e-05, "loss": 2.3023, "step": 4528500 }, { "epoch": 22.44, "learning_rate": 3.878490955841917e-05, "loss": 2.3361, "step": 4529000 }, { "epoch": 22.44, "learning_rate": 3.8783670971993086e-05, "loss": 2.3235, "step": 4529500 }, { "epoch": 22.44, "learning_rate": 3.8782432385567e-05, "loss": 2.2829, "step": 4530000 }, { "epoch": 22.45, "learning_rate": 3.878119379914092e-05, "loss": 2.3039, "step": 4530500 }, { "epoch": 22.45, "learning_rate": 3.877995521271484e-05, "loss": 2.2703, "step": 4531000 }, { "epoch": 22.45, "learning_rate": 3.8778719103461606e-05, "loss": 2.2963, "step": 4531500 }, { "epoch": 22.45, "learning_rate": 3.877748051703552e-05, "loss": 2.3257, "step": 4532000 }, { "epoch": 22.46, "learning_rate": 3.877624193060943e-05, "loss": 2.3125, "step": 4532500 }, { "epoch": 22.46, "learning_rate": 3.877500334418335e-05, "loss": 2.3036, "step": 4533000 }, { "epoch": 22.46, "learning_rate": 3.877376723493012e-05, "loss": 2.3172, "step": 4533500 }, { "epoch": 22.46, "learning_rate": 3.8772528648504035e-05, "loss": 2.3257, "step": 4534000 }, { "epoch": 22.47, "learning_rate": 3.8771292539250804e-05, "loss": 2.2861, "step": 4534500 }, { "epoch": 22.47, "learning_rate": 3.877005395282472e-05, "loss": 2.3049, "step": 4535000 }, { "epoch": 22.47, "learning_rate": 3.876881536639864e-05, "loss": 2.3128, "step": 4535500 }, { "epoch": 22.47, "learning_rate": 3.8767576779972555e-05, "loss": 2.3123, "step": 4536000 }, { "epoch": 22.48, "learning_rate": 3.8766340670719324e-05, "loss": 2.3054, "step": 4536500 }, { "epoch": 22.48, "learning_rate": 3.876510208429324e-05, "loss": 2.3278, "step": 4537000 }, { "epoch": 22.48, "learning_rate": 3.876386349786715e-05, "loss": 2.3234, "step": 4537500 }, { "epoch": 22.48, "learning_rate": 3.876262491144107e-05, "loss": 2.3163, "step": 4538000 }, { "epoch": 22.49, "learning_rate": 3.8761386325014985e-05, "loss": 2.3147, "step": 4538500 }, { "epoch": 22.49, "learning_rate": 3.87601477385889e-05, "loss": 2.325, "step": 4539000 }, { "epoch": 22.49, "learning_rate": 3.875890915216282e-05, "loss": 2.3072, "step": 4539500 }, { "epoch": 22.49, "learning_rate": 3.8757670565736735e-05, "loss": 2.3123, "step": 4540000 }, { "epoch": 22.5, "learning_rate": 3.875643197931065e-05, "loss": 2.3073, "step": 4540500 }, { "epoch": 22.5, "learning_rate": 3.875519587005742e-05, "loss": 2.2933, "step": 4541000 }, { "epoch": 22.5, "learning_rate": 3.875395976080419e-05, "loss": 2.3299, "step": 4541500 }, { "epoch": 22.5, "learning_rate": 3.875272117437811e-05, "loss": 2.3161, "step": 4542000 }, { "epoch": 22.51, "learning_rate": 3.8751482587952024e-05, "loss": 2.3171, "step": 4542500 }, { "epoch": 22.51, "learning_rate": 3.875024400152594e-05, "loss": 2.3235, "step": 4543000 }, { "epoch": 22.51, "learning_rate": 3.874900541509986e-05, "loss": 2.3079, "step": 4543500 }, { "epoch": 22.51, "learning_rate": 3.8747769305846627e-05, "loss": 2.313, "step": 4544000 }, { "epoch": 22.52, "learning_rate": 3.8746530719420543e-05, "loss": 2.3353, "step": 4544500 }, { "epoch": 22.52, "learning_rate": 3.874529213299446e-05, "loss": 2.3102, "step": 4545000 }, { "epoch": 22.52, "learning_rate": 3.874405354656838e-05, "loss": 2.3287, "step": 4545500 }, { "epoch": 22.52, "learning_rate": 3.8742817437315146e-05, "loss": 2.3158, "step": 4546000 }, { "epoch": 22.52, "learning_rate": 3.874157885088906e-05, "loss": 2.2897, "step": 4546500 }, { "epoch": 22.53, "learning_rate": 3.874034026446297e-05, "loss": 2.3011, "step": 4547000 }, { "epoch": 22.53, "learning_rate": 3.873910167803689e-05, "loss": 2.2923, "step": 4547500 }, { "epoch": 22.53, "learning_rate": 3.873786309161081e-05, "loss": 2.2992, "step": 4548000 }, { "epoch": 22.53, "learning_rate": 3.8736624505184724e-05, "loss": 2.3284, "step": 4548500 }, { "epoch": 22.54, "learning_rate": 3.873538839593149e-05, "loss": 2.3066, "step": 4549000 }, { "epoch": 22.54, "learning_rate": 3.873414980950541e-05, "loss": 2.3289, "step": 4549500 }, { "epoch": 22.54, "learning_rate": 3.873291122307933e-05, "loss": 2.2994, "step": 4550000 }, { "epoch": 22.54, "learning_rate": 3.8731672636653244e-05, "loss": 2.3128, "step": 4550500 }, { "epoch": 22.55, "learning_rate": 3.873043405022716e-05, "loss": 2.3113, "step": 4551000 }, { "epoch": 22.55, "learning_rate": 3.872919794097393e-05, "loss": 2.3115, "step": 4551500 }, { "epoch": 22.55, "learning_rate": 3.8727959354547846e-05, "loss": 2.3109, "step": 4552000 }, { "epoch": 22.55, "learning_rate": 3.872672076812176e-05, "loss": 2.32, "step": 4552500 }, { "epoch": 22.56, "learning_rate": 3.872548218169568e-05, "loss": 2.3099, "step": 4553000 }, { "epoch": 22.56, "learning_rate": 3.872424607244244e-05, "loss": 2.3342, "step": 4553500 }, { "epoch": 22.56, "learning_rate": 3.872300996318921e-05, "loss": 2.2928, "step": 4554000 }, { "epoch": 22.56, "learning_rate": 3.872177137676313e-05, "loss": 2.3082, "step": 4554500 }, { "epoch": 22.57, "learning_rate": 3.8720532790337045e-05, "loss": 2.326, "step": 4555000 }, { "epoch": 22.57, "learning_rate": 3.871929420391096e-05, "loss": 2.3155, "step": 4555500 }, { "epoch": 22.57, "learning_rate": 3.871805561748488e-05, "loss": 2.299, "step": 4556000 }, { "epoch": 22.57, "learning_rate": 3.8716817031058796e-05, "loss": 2.3201, "step": 4556500 }, { "epoch": 22.58, "learning_rate": 3.871557844463271e-05, "loss": 2.2982, "step": 4557000 }, { "epoch": 22.58, "learning_rate": 3.871433985820663e-05, "loss": 2.3207, "step": 4557500 }, { "epoch": 22.58, "learning_rate": 3.8713101271780546e-05, "loss": 2.316, "step": 4558000 }, { "epoch": 22.58, "learning_rate": 3.871186516252731e-05, "loss": 2.3293, "step": 4558500 }, { "epoch": 22.59, "learning_rate": 3.8710626576101225e-05, "loss": 2.3033, "step": 4559000 }, { "epoch": 22.59, "learning_rate": 3.870938798967514e-05, "loss": 2.3159, "step": 4559500 }, { "epoch": 22.59, "learning_rate": 3.870814940324906e-05, "loss": 2.3366, "step": 4560000 }, { "epoch": 22.59, "learning_rate": 3.8706910816822976e-05, "loss": 2.3203, "step": 4560500 }, { "epoch": 22.6, "learning_rate": 3.870567223039689e-05, "loss": 2.32, "step": 4561000 }, { "epoch": 22.6, "learning_rate": 3.870443364397081e-05, "loss": 2.315, "step": 4561500 }, { "epoch": 22.6, "learning_rate": 3.870319505754473e-05, "loss": 2.3213, "step": 4562000 }, { "epoch": 22.6, "learning_rate": 3.8701956471118644e-05, "loss": 2.3211, "step": 4562500 }, { "epoch": 22.61, "learning_rate": 3.870072036186541e-05, "loss": 2.3087, "step": 4563000 }, { "epoch": 22.61, "learning_rate": 3.869948177543933e-05, "loss": 2.3402, "step": 4563500 }, { "epoch": 22.61, "learning_rate": 3.8698243189013247e-05, "loss": 2.3021, "step": 4564000 }, { "epoch": 22.61, "learning_rate": 3.8697004602587163e-05, "loss": 2.3204, "step": 4564500 }, { "epoch": 22.62, "learning_rate": 3.869576601616108e-05, "loss": 2.2966, "step": 4565000 }, { "epoch": 22.62, "learning_rate": 3.869452990690784e-05, "loss": 2.3114, "step": 4565500 }, { "epoch": 22.62, "learning_rate": 3.869329132048176e-05, "loss": 2.3284, "step": 4566000 }, { "epoch": 22.62, "learning_rate": 3.8692052734055676e-05, "loss": 2.3001, "step": 4566500 }, { "epoch": 22.63, "learning_rate": 3.869081414762959e-05, "loss": 2.3107, "step": 4567000 }, { "epoch": 22.63, "learning_rate": 3.868957556120351e-05, "loss": 2.322, "step": 4567500 }, { "epoch": 22.63, "learning_rate": 3.868833697477743e-05, "loss": 2.3449, "step": 4568000 }, { "epoch": 22.63, "learning_rate": 3.8687098388351344e-05, "loss": 2.3089, "step": 4568500 }, { "epoch": 22.64, "learning_rate": 3.868585980192526e-05, "loss": 2.3135, "step": 4569000 }, { "epoch": 22.64, "learning_rate": 3.868462121549918e-05, "loss": 2.3005, "step": 4569500 }, { "epoch": 22.64, "learning_rate": 3.868338510624595e-05, "loss": 2.3158, "step": 4570000 }, { "epoch": 22.64, "learning_rate": 3.868214899699271e-05, "loss": 2.3041, "step": 4570500 }, { "epoch": 22.65, "learning_rate": 3.8680910410566626e-05, "loss": 2.328, "step": 4571000 }, { "epoch": 22.65, "learning_rate": 3.867967182414054e-05, "loss": 2.3015, "step": 4571500 }, { "epoch": 22.65, "learning_rate": 3.867843323771446e-05, "loss": 2.3389, "step": 4572000 }, { "epoch": 22.65, "learning_rate": 3.867719712846123e-05, "loss": 2.2967, "step": 4572500 }, { "epoch": 22.66, "learning_rate": 3.8675961019208004e-05, "loss": 2.3422, "step": 4573000 }, { "epoch": 22.66, "learning_rate": 3.867472243278192e-05, "loss": 2.3099, "step": 4573500 }, { "epoch": 22.66, "learning_rate": 3.867348384635584e-05, "loss": 2.326, "step": 4574000 }, { "epoch": 22.66, "learning_rate": 3.8672245259929755e-05, "loss": 2.3225, "step": 4574500 }, { "epoch": 22.67, "learning_rate": 3.8671006673503665e-05, "loss": 2.3198, "step": 4575000 }, { "epoch": 22.67, "learning_rate": 3.866976808707758e-05, "loss": 2.3108, "step": 4575500 }, { "epoch": 22.67, "learning_rate": 3.86685295006515e-05, "loss": 2.3139, "step": 4576000 }, { "epoch": 22.67, "learning_rate": 3.8667290914225416e-05, "loss": 2.3271, "step": 4576500 }, { "epoch": 22.68, "learning_rate": 3.866605232779933e-05, "loss": 2.3365, "step": 4577000 }, { "epoch": 22.68, "learning_rate": 3.86648162185461e-05, "loss": 2.316, "step": 4577500 }, { "epoch": 22.68, "learning_rate": 3.866357763212002e-05, "loss": 2.3204, "step": 4578000 }, { "epoch": 22.68, "learning_rate": 3.8662339045693935e-05, "loss": 2.3074, "step": 4578500 }, { "epoch": 22.69, "learning_rate": 3.8661100459267845e-05, "loss": 2.3012, "step": 4579000 }, { "epoch": 22.69, "learning_rate": 3.865986187284176e-05, "loss": 2.3167, "step": 4579500 }, { "epoch": 22.69, "learning_rate": 3.865862328641568e-05, "loss": 2.3111, "step": 4580000 }, { "epoch": 22.69, "learning_rate": 3.8657387177162455e-05, "loss": 2.3244, "step": 4580500 }, { "epoch": 22.7, "learning_rate": 3.865614859073637e-05, "loss": 2.3031, "step": 4581000 }, { "epoch": 22.7, "learning_rate": 3.865491000431028e-05, "loss": 2.3241, "step": 4581500 }, { "epoch": 22.7, "learning_rate": 3.86536714178842e-05, "loss": 2.2923, "step": 4582000 }, { "epoch": 22.7, "learning_rate": 3.865243530863097e-05, "loss": 2.2862, "step": 4582500 }, { "epoch": 22.71, "learning_rate": 3.8651196722204885e-05, "loss": 2.3268, "step": 4583000 }, { "epoch": 22.71, "learning_rate": 3.86499581357788e-05, "loss": 2.3275, "step": 4583500 }, { "epoch": 22.71, "learning_rate": 3.864871954935272e-05, "loss": 2.3048, "step": 4584000 }, { "epoch": 22.71, "learning_rate": 3.8647480962926635e-05, "loss": 2.3134, "step": 4584500 }, { "epoch": 22.72, "learning_rate": 3.8646244853673404e-05, "loss": 2.304, "step": 4585000 }, { "epoch": 22.72, "learning_rate": 3.864500626724732e-05, "loss": 2.2997, "step": 4585500 }, { "epoch": 22.72, "learning_rate": 3.864376768082124e-05, "loss": 2.3093, "step": 4586000 }, { "epoch": 22.72, "learning_rate": 3.8642529094395155e-05, "loss": 2.3179, "step": 4586500 }, { "epoch": 22.73, "learning_rate": 3.864129050796907e-05, "loss": 2.317, "step": 4587000 }, { "epoch": 22.73, "learning_rate": 3.8640054398715834e-05, "loss": 2.3088, "step": 4587500 }, { "epoch": 22.73, "learning_rate": 3.86388182894626e-05, "loss": 2.2975, "step": 4588000 }, { "epoch": 22.73, "learning_rate": 3.863757970303652e-05, "loss": 2.3084, "step": 4588500 }, { "epoch": 22.74, "learning_rate": 3.8636341116610437e-05, "loss": 2.2775, "step": 4589000 }, { "epoch": 22.74, "learning_rate": 3.8635102530184353e-05, "loss": 2.2953, "step": 4589500 }, { "epoch": 22.74, "learning_rate": 3.863386642093112e-05, "loss": 2.3188, "step": 4590000 }, { "epoch": 22.74, "learning_rate": 3.863262783450504e-05, "loss": 2.3119, "step": 4590500 }, { "epoch": 22.75, "learning_rate": 3.8631389248078956e-05, "loss": 2.2956, "step": 4591000 }, { "epoch": 22.75, "learning_rate": 3.863015066165287e-05, "loss": 2.2869, "step": 4591500 }, { "epoch": 22.75, "learning_rate": 3.862891207522678e-05, "loss": 2.3249, "step": 4592000 }, { "epoch": 22.75, "learning_rate": 3.86276734888007e-05, "loss": 2.3244, "step": 4592500 }, { "epoch": 22.76, "learning_rate": 3.862643737954747e-05, "loss": 2.3138, "step": 4593000 }, { "epoch": 22.76, "learning_rate": 3.8625198793121386e-05, "loss": 2.2791, "step": 4593500 }, { "epoch": 22.76, "learning_rate": 3.86239602066953e-05, "loss": 2.313, "step": 4594000 }, { "epoch": 22.76, "learning_rate": 3.862272409744208e-05, "loss": 2.3295, "step": 4594500 }, { "epoch": 22.77, "learning_rate": 3.862148551101599e-05, "loss": 2.3138, "step": 4595000 }, { "epoch": 22.77, "learning_rate": 3.8620246924589905e-05, "loss": 2.2907, "step": 4595500 }, { "epoch": 22.77, "learning_rate": 3.861900833816382e-05, "loss": 2.3033, "step": 4596000 }, { "epoch": 22.77, "learning_rate": 3.861776975173774e-05, "loss": 2.3326, "step": 4596500 }, { "epoch": 22.78, "learning_rate": 3.8616531165311656e-05, "loss": 2.3104, "step": 4597000 }, { "epoch": 22.78, "learning_rate": 3.861529257888557e-05, "loss": 2.3412, "step": 4597500 }, { "epoch": 22.78, "learning_rate": 3.861405399245949e-05, "loss": 2.3166, "step": 4598000 }, { "epoch": 22.78, "learning_rate": 3.86128154060334e-05, "loss": 2.3151, "step": 4598500 }, { "epoch": 22.79, "learning_rate": 3.861157681960732e-05, "loss": 2.325, "step": 4599000 }, { "epoch": 22.79, "learning_rate": 3.8610338233181234e-05, "loss": 2.307, "step": 4599500 }, { "epoch": 22.79, "learning_rate": 3.860909964675515e-05, "loss": 2.3147, "step": 4600000 }, { "epoch": 22.79, "learning_rate": 3.860786106032907e-05, "loss": 2.3078, "step": 4600500 }, { "epoch": 22.79, "learning_rate": 3.8606622473902985e-05, "loss": 2.3078, "step": 4601000 }, { "epoch": 22.8, "learning_rate": 3.86053838874769e-05, "loss": 2.3242, "step": 4601500 }, { "epoch": 22.8, "learning_rate": 3.860414777822367e-05, "loss": 2.3032, "step": 4602000 }, { "epoch": 22.8, "learning_rate": 3.860290919179759e-05, "loss": 2.3121, "step": 4602500 }, { "epoch": 22.8, "learning_rate": 3.8601670605371504e-05, "loss": 2.3143, "step": 4603000 }, { "epoch": 22.81, "learning_rate": 3.860043201894542e-05, "loss": 2.3154, "step": 4603500 }, { "epoch": 22.81, "learning_rate": 3.859919343251934e-05, "loss": 2.327, "step": 4604000 }, { "epoch": 22.81, "learning_rate": 3.8597954846093255e-05, "loss": 2.3061, "step": 4604500 }, { "epoch": 22.81, "learning_rate": 3.859671625966717e-05, "loss": 2.3172, "step": 4605000 }, { "epoch": 22.82, "learning_rate": 3.859547767324109e-05, "loss": 2.3136, "step": 4605500 }, { "epoch": 22.82, "learning_rate": 3.8594239086815006e-05, "loss": 2.3018, "step": 4606000 }, { "epoch": 22.82, "learning_rate": 3.859300297756177e-05, "loss": 2.2998, "step": 4606500 }, { "epoch": 22.82, "learning_rate": 3.8591764391135685e-05, "loss": 2.3314, "step": 4607000 }, { "epoch": 22.83, "learning_rate": 3.85905258047096e-05, "loss": 2.3057, "step": 4607500 }, { "epoch": 22.83, "learning_rate": 3.858928721828352e-05, "loss": 2.3195, "step": 4608000 }, { "epoch": 22.83, "learning_rate": 3.8588048631857436e-05, "loss": 2.3239, "step": 4608500 }, { "epoch": 22.83, "learning_rate": 3.8586812522604205e-05, "loss": 2.293, "step": 4609000 }, { "epoch": 22.84, "learning_rate": 3.858557393617812e-05, "loss": 2.3212, "step": 4609500 }, { "epoch": 22.84, "learning_rate": 3.858433534975204e-05, "loss": 2.3048, "step": 4610000 }, { "epoch": 22.84, "learning_rate": 3.8583096763325955e-05, "loss": 2.2934, "step": 4610500 }, { "epoch": 22.84, "learning_rate": 3.858185817689987e-05, "loss": 2.2999, "step": 4611000 }, { "epoch": 22.85, "learning_rate": 3.858062206764664e-05, "loss": 2.3105, "step": 4611500 }, { "epoch": 22.85, "learning_rate": 3.857938348122055e-05, "loss": 2.3013, "step": 4612000 }, { "epoch": 22.85, "learning_rate": 3.857814489479447e-05, "loss": 2.3245, "step": 4612500 }, { "epoch": 22.85, "learning_rate": 3.8576911262714096e-05, "loss": 2.311, "step": 4613000 }, { "epoch": 22.86, "learning_rate": 3.857567267628801e-05, "loss": 2.3275, "step": 4613500 }, { "epoch": 22.86, "learning_rate": 3.857443408986192e-05, "loss": 2.3123, "step": 4614000 }, { "epoch": 22.86, "learning_rate": 3.857319550343584e-05, "loss": 2.3256, "step": 4614500 }, { "epoch": 22.86, "learning_rate": 3.8571956917009757e-05, "loss": 2.3182, "step": 4615000 }, { "epoch": 22.87, "learning_rate": 3.8570718330583674e-05, "loss": 2.3452, "step": 4615500 }, { "epoch": 22.87, "learning_rate": 3.856947974415759e-05, "loss": 2.3253, "step": 4616000 }, { "epoch": 22.87, "learning_rate": 3.856824115773151e-05, "loss": 2.3012, "step": 4616500 }, { "epoch": 22.87, "learning_rate": 3.8567002571305424e-05, "loss": 2.3173, "step": 4617000 }, { "epoch": 22.88, "learning_rate": 3.856576398487934e-05, "loss": 2.3155, "step": 4617500 }, { "epoch": 22.88, "learning_rate": 3.856452539845326e-05, "loss": 2.2929, "step": 4618000 }, { "epoch": 22.88, "learning_rate": 3.8563286812027175e-05, "loss": 2.3329, "step": 4618500 }, { "epoch": 22.88, "learning_rate": 3.8562048225601085e-05, "loss": 2.3319, "step": 4619000 }, { "epoch": 22.89, "learning_rate": 3.8560809639175e-05, "loss": 2.3235, "step": 4619500 }, { "epoch": 22.89, "learning_rate": 3.855957600709462e-05, "loss": 2.3148, "step": 4620000 }, { "epoch": 22.89, "learning_rate": 3.855833742066854e-05, "loss": 2.3293, "step": 4620500 }, { "epoch": 22.89, "learning_rate": 3.855709883424246e-05, "loss": 2.2721, "step": 4621000 }, { "epoch": 22.9, "learning_rate": 3.8555860247816374e-05, "loss": 2.3437, "step": 4621500 }, { "epoch": 22.9, "learning_rate": 3.855462166139029e-05, "loss": 2.3259, "step": 4622000 }, { "epoch": 22.9, "learning_rate": 3.855338307496421e-05, "loss": 2.3164, "step": 4622500 }, { "epoch": 22.9, "learning_rate": 3.8552146965710976e-05, "loss": 2.3135, "step": 4623000 }, { "epoch": 22.91, "learning_rate": 3.8550910856457745e-05, "loss": 2.3246, "step": 4623500 }, { "epoch": 22.91, "learning_rate": 3.854967227003166e-05, "loss": 2.3097, "step": 4624000 }, { "epoch": 22.91, "learning_rate": 3.854843368360558e-05, "loss": 2.3233, "step": 4624500 }, { "epoch": 22.91, "learning_rate": 3.8547195097179496e-05, "loss": 2.3195, "step": 4625000 }, { "epoch": 22.92, "learning_rate": 3.854595651075341e-05, "loss": 2.3101, "step": 4625500 }, { "epoch": 22.92, "learning_rate": 3.854471792432733e-05, "loss": 2.3006, "step": 4626000 }, { "epoch": 22.92, "learning_rate": 3.854347933790124e-05, "loss": 2.3205, "step": 4626500 }, { "epoch": 22.92, "learning_rate": 3.854224075147516e-05, "loss": 2.3168, "step": 4627000 }, { "epoch": 22.93, "learning_rate": 3.8541002165049074e-05, "loss": 2.3087, "step": 4627500 }, { "epoch": 22.93, "learning_rate": 3.853976357862299e-05, "loss": 2.3106, "step": 4628000 }, { "epoch": 22.93, "learning_rate": 3.853852746936976e-05, "loss": 2.3153, "step": 4628500 }, { "epoch": 22.93, "learning_rate": 3.8537288882943676e-05, "loss": 2.3313, "step": 4629000 }, { "epoch": 22.94, "learning_rate": 3.8536050296517587e-05, "loss": 2.3148, "step": 4629500 }, { "epoch": 22.94, "learning_rate": 3.8534811710091504e-05, "loss": 2.3088, "step": 4630000 }, { "epoch": 22.94, "learning_rate": 3.853357312366542e-05, "loss": 2.3086, "step": 4630500 }, { "epoch": 22.94, "learning_rate": 3.853233453723934e-05, "loss": 2.3283, "step": 4631000 }, { "epoch": 22.95, "learning_rate": 3.853109842798611e-05, "loss": 2.3124, "step": 4631500 }, { "epoch": 22.95, "learning_rate": 3.852985984156003e-05, "loss": 2.3007, "step": 4632000 }, { "epoch": 22.95, "learning_rate": 3.852862125513394e-05, "loss": 2.318, "step": 4632500 }, { "epoch": 22.95, "learning_rate": 3.852738266870786e-05, "loss": 2.3008, "step": 4633000 }, { "epoch": 22.96, "learning_rate": 3.8526144082281774e-05, "loss": 2.3385, "step": 4633500 }, { "epoch": 22.96, "learning_rate": 3.852490549585569e-05, "loss": 2.3005, "step": 4634000 }, { "epoch": 22.96, "learning_rate": 3.852366690942961e-05, "loss": 2.2903, "step": 4634500 }, { "epoch": 22.96, "learning_rate": 3.8522428323003525e-05, "loss": 2.3239, "step": 4635000 }, { "epoch": 22.97, "learning_rate": 3.8521192213750294e-05, "loss": 2.3164, "step": 4635500 }, { "epoch": 22.97, "learning_rate": 3.8519953627324204e-05, "loss": 2.3282, "step": 4636000 }, { "epoch": 22.97, "learning_rate": 3.851871504089812e-05, "loss": 2.3116, "step": 4636500 }, { "epoch": 22.97, "learning_rate": 3.851747645447204e-05, "loss": 2.3066, "step": 4637000 }, { "epoch": 22.98, "learning_rate": 3.8516237868045954e-05, "loss": 2.3298, "step": 4637500 }, { "epoch": 22.98, "learning_rate": 3.851499928161987e-05, "loss": 2.3011, "step": 4638000 }, { "epoch": 22.98, "learning_rate": 3.851376069519379e-05, "loss": 2.33, "step": 4638500 }, { "epoch": 22.98, "learning_rate": 3.851252458594056e-05, "loss": 2.2978, "step": 4639000 }, { "epoch": 22.99, "learning_rate": 3.8511285999514474e-05, "loss": 2.3144, "step": 4639500 }, { "epoch": 22.99, "learning_rate": 3.851004741308839e-05, "loss": 2.3008, "step": 4640000 }, { "epoch": 22.99, "learning_rate": 3.850880882666231e-05, "loss": 2.3002, "step": 4640500 }, { "epoch": 22.99, "learning_rate": 3.8507570240236225e-05, "loss": 2.3052, "step": 4641000 }, { "epoch": 23.0, "learning_rate": 3.850633165381014e-05, "loss": 2.3447, "step": 4641500 }, { "epoch": 23.0, "learning_rate": 3.850509306738406e-05, "loss": 2.2959, "step": 4642000 }, { "epoch": 23.0, "eval_accuracy": 0.6535051236946925, "eval_accuracy_mlm": 0.6085347615669268, "eval_accuracy_nsp": 0.8653273663608658, "eval_loss": 2.3427975177764893, "eval_runtime": 145.7935, "eval_samples_per_second": 1748.768, "eval_steps_per_second": 72.87, "step": 4642389 }, { "epoch": 23.0, "learning_rate": 3.8503854480957976e-05, "loss": 2.2864, "step": 4642500 }, { "epoch": 23.0, "learning_rate": 3.850261589453189e-05, "loss": 2.2797, "step": 4643000 }, { "epoch": 23.01, "learning_rate": 3.850137730810581e-05, "loss": 2.2687, "step": 4643500 }, { "epoch": 23.01, "learning_rate": 3.8500138721679726e-05, "loss": 2.2998, "step": 4644000 }, { "epoch": 23.01, "learning_rate": 3.849890261242649e-05, "loss": 2.2772, "step": 4644500 }, { "epoch": 23.01, "learning_rate": 3.8497664026000405e-05, "loss": 2.2775, "step": 4645000 }, { "epoch": 23.02, "learning_rate": 3.849642543957432e-05, "loss": 2.2837, "step": 4645500 }, { "epoch": 23.02, "learning_rate": 3.849518685314824e-05, "loss": 2.2925, "step": 4646000 }, { "epoch": 23.02, "learning_rate": 3.849395074389501e-05, "loss": 2.2727, "step": 4646500 }, { "epoch": 23.02, "learning_rate": 3.8492712157468925e-05, "loss": 2.2708, "step": 4647000 }, { "epoch": 23.03, "learning_rate": 3.849147357104284e-05, "loss": 2.2694, "step": 4647500 }, { "epoch": 23.03, "learning_rate": 3.849023498461676e-05, "loss": 2.2986, "step": 4648000 }, { "epoch": 23.03, "learning_rate": 3.8488996398190676e-05, "loss": 2.2675, "step": 4648500 }, { "epoch": 23.03, "learning_rate": 3.848775781176459e-05, "loss": 2.2932, "step": 4649000 }, { "epoch": 23.04, "learning_rate": 3.848651922533851e-05, "loss": 2.2778, "step": 4649500 }, { "epoch": 23.04, "learning_rate": 3.8485280638912427e-05, "loss": 2.2582, "step": 4650000 }, { "epoch": 23.04, "learning_rate": 3.848404452965919e-05, "loss": 2.2853, "step": 4650500 }, { "epoch": 23.04, "learning_rate": 3.8482805943233105e-05, "loss": 2.2806, "step": 4651000 }, { "epoch": 23.05, "learning_rate": 3.848156735680702e-05, "loss": 2.3017, "step": 4651500 }, { "epoch": 23.05, "learning_rate": 3.848032877038094e-05, "loss": 2.2922, "step": 4652000 }, { "epoch": 23.05, "learning_rate": 3.8479090183954856e-05, "loss": 2.2852, "step": 4652500 }, { "epoch": 23.05, "learning_rate": 3.847785159752877e-05, "loss": 2.2834, "step": 4653000 }, { "epoch": 23.06, "learning_rate": 3.847661301110269e-05, "loss": 2.2794, "step": 4653500 }, { "epoch": 23.06, "learning_rate": 3.847537690184946e-05, "loss": 2.2736, "step": 4654000 }, { "epoch": 23.06, "learning_rate": 3.8474138315423376e-05, "loss": 2.2817, "step": 4654500 }, { "epoch": 23.06, "learning_rate": 3.847289972899729e-05, "loss": 2.3046, "step": 4655000 }, { "epoch": 23.06, "learning_rate": 3.847166114257121e-05, "loss": 2.2827, "step": 4655500 }, { "epoch": 23.07, "learning_rate": 3.847042255614513e-05, "loss": 2.3029, "step": 4656000 }, { "epoch": 23.07, "learning_rate": 3.8469183969719044e-05, "loss": 2.2855, "step": 4656500 }, { "epoch": 23.07, "learning_rate": 3.846794538329296e-05, "loss": 2.2797, "step": 4657000 }, { "epoch": 23.07, "learning_rate": 3.846670927403972e-05, "loss": 2.269, "step": 4657500 }, { "epoch": 23.08, "learning_rate": 3.846547068761364e-05, "loss": 2.3017, "step": 4658000 }, { "epoch": 23.08, "learning_rate": 3.8464232101187556e-05, "loss": 2.3036, "step": 4658500 }, { "epoch": 23.08, "learning_rate": 3.846299351476147e-05, "loss": 2.2968, "step": 4659000 }, { "epoch": 23.08, "learning_rate": 3.846175492833539e-05, "loss": 2.2592, "step": 4659500 }, { "epoch": 23.09, "learning_rate": 3.846051634190931e-05, "loss": 2.2923, "step": 4660000 }, { "epoch": 23.09, "learning_rate": 3.8459277755483224e-05, "loss": 2.2877, "step": 4660500 }, { "epoch": 23.09, "learning_rate": 3.845804164622999e-05, "loss": 2.2687, "step": 4661000 }, { "epoch": 23.09, "learning_rate": 3.845680305980391e-05, "loss": 2.2669, "step": 4661500 }, { "epoch": 23.1, "learning_rate": 3.845556447337783e-05, "loss": 2.2845, "step": 4662000 }, { "epoch": 23.1, "learning_rate": 3.8454325886951744e-05, "loss": 2.2866, "step": 4662500 }, { "epoch": 23.1, "learning_rate": 3.845308730052566e-05, "loss": 2.3037, "step": 4663000 }, { "epoch": 23.1, "learning_rate": 3.845184871409958e-05, "loss": 2.2609, "step": 4663500 }, { "epoch": 23.11, "learning_rate": 3.8450610127673494e-05, "loss": 2.2816, "step": 4664000 }, { "epoch": 23.11, "learning_rate": 3.844937154124741e-05, "loss": 2.2791, "step": 4664500 }, { "epoch": 23.11, "learning_rate": 3.8448135431994173e-05, "loss": 2.3103, "step": 4665000 }, { "epoch": 23.11, "learning_rate": 3.844689932274094e-05, "loss": 2.2907, "step": 4665500 }, { "epoch": 23.12, "learning_rate": 3.844566073631486e-05, "loss": 2.296, "step": 4666000 }, { "epoch": 23.12, "learning_rate": 3.8444422149888776e-05, "loss": 2.2811, "step": 4666500 }, { "epoch": 23.12, "learning_rate": 3.844318356346269e-05, "loss": 2.2881, "step": 4667000 }, { "epoch": 23.12, "learning_rate": 3.844194497703661e-05, "loss": 2.2979, "step": 4667500 }, { "epoch": 23.13, "learning_rate": 3.844070886778338e-05, "loss": 2.3004, "step": 4668000 }, { "epoch": 23.13, "learning_rate": 3.8439470281357296e-05, "loss": 2.2805, "step": 4668500 }, { "epoch": 23.13, "learning_rate": 3.843823169493121e-05, "loss": 2.2995, "step": 4669000 }, { "epoch": 23.13, "learning_rate": 3.843699310850512e-05, "loss": 2.2929, "step": 4669500 }, { "epoch": 23.14, "learning_rate": 3.843575452207904e-05, "loss": 2.3062, "step": 4670000 }, { "epoch": 23.14, "learning_rate": 3.843451593565296e-05, "loss": 2.3233, "step": 4670500 }, { "epoch": 23.14, "learning_rate": 3.8433277349226874e-05, "loss": 2.3191, "step": 4671000 }, { "epoch": 23.14, "learning_rate": 3.843203876280079e-05, "loss": 2.2908, "step": 4671500 }, { "epoch": 23.15, "learning_rate": 3.843080265354756e-05, "loss": 2.267, "step": 4672000 }, { "epoch": 23.15, "learning_rate": 3.842956902146718e-05, "loss": 2.2694, "step": 4672500 }, { "epoch": 23.15, "learning_rate": 3.84283304350411e-05, "loss": 2.2964, "step": 4673000 }, { "epoch": 23.15, "learning_rate": 3.8427091848615014e-05, "loss": 2.3116, "step": 4673500 }, { "epoch": 23.16, "learning_rate": 3.842585573936178e-05, "loss": 2.2689, "step": 4674000 }, { "epoch": 23.16, "learning_rate": 3.84246171529357e-05, "loss": 2.2976, "step": 4674500 }, { "epoch": 23.16, "learning_rate": 3.8423378566509617e-05, "loss": 2.3003, "step": 4675000 }, { "epoch": 23.16, "learning_rate": 3.8422139980083533e-05, "loss": 2.3213, "step": 4675500 }, { "epoch": 23.17, "learning_rate": 3.842090139365745e-05, "loss": 2.287, "step": 4676000 }, { "epoch": 23.17, "learning_rate": 3.841966280723137e-05, "loss": 2.303, "step": 4676500 }, { "epoch": 23.17, "learning_rate": 3.8418424220805284e-05, "loss": 2.2827, "step": 4677000 }, { "epoch": 23.17, "learning_rate": 3.84171856343792e-05, "loss": 2.2881, "step": 4677500 }, { "epoch": 23.18, "learning_rate": 3.841594704795312e-05, "loss": 2.3183, "step": 4678000 }, { "epoch": 23.18, "learning_rate": 3.8414708461527035e-05, "loss": 2.3166, "step": 4678500 }, { "epoch": 23.18, "learning_rate": 3.841346987510095e-05, "loss": 2.3065, "step": 4679000 }, { "epoch": 23.18, "learning_rate": 3.841223128867486e-05, "loss": 2.278, "step": 4679500 }, { "epoch": 23.19, "learning_rate": 3.841099270224878e-05, "loss": 2.3138, "step": 4680000 }, { "epoch": 23.19, "learning_rate": 3.8409754115822696e-05, "loss": 2.3044, "step": 4680500 }, { "epoch": 23.19, "learning_rate": 3.840851552939661e-05, "loss": 2.2684, "step": 4681000 }, { "epoch": 23.19, "learning_rate": 3.840727694297053e-05, "loss": 2.293, "step": 4681500 }, { "epoch": 23.2, "learning_rate": 3.840603835654445e-05, "loss": 2.3256, "step": 4682000 }, { "epoch": 23.2, "learning_rate": 3.840480224729121e-05, "loss": 2.2807, "step": 4682500 }, { "epoch": 23.2, "learning_rate": 3.8403563660865126e-05, "loss": 2.2796, "step": 4683000 }, { "epoch": 23.2, "learning_rate": 3.840232507443904e-05, "loss": 2.3154, "step": 4683500 }, { "epoch": 23.21, "learning_rate": 3.840108648801296e-05, "loss": 2.2889, "step": 4684000 }, { "epoch": 23.21, "learning_rate": 3.8399847901586876e-05, "loss": 2.2843, "step": 4684500 }, { "epoch": 23.21, "learning_rate": 3.839861179233365e-05, "loss": 2.2907, "step": 4685000 }, { "epoch": 23.21, "learning_rate": 3.839737320590757e-05, "loss": 2.3155, "step": 4685500 }, { "epoch": 23.22, "learning_rate": 3.839613461948148e-05, "loss": 2.3057, "step": 4686000 }, { "epoch": 23.22, "learning_rate": 3.8394896033055396e-05, "loss": 2.2936, "step": 4686500 }, { "epoch": 23.22, "learning_rate": 3.839365744662931e-05, "loss": 2.3158, "step": 4687000 }, { "epoch": 23.22, "learning_rate": 3.839241886020323e-05, "loss": 2.2809, "step": 4687500 }, { "epoch": 23.23, "learning_rate": 3.839118027377715e-05, "loss": 2.2988, "step": 4688000 }, { "epoch": 23.23, "learning_rate": 3.8389941687351064e-05, "loss": 2.2952, "step": 4688500 }, { "epoch": 23.23, "learning_rate": 3.838870310092498e-05, "loss": 2.2915, "step": 4689000 }, { "epoch": 23.23, "learning_rate": 3.83874645144989e-05, "loss": 2.2737, "step": 4689500 }, { "epoch": 23.24, "learning_rate": 3.838622592807281e-05, "loss": 2.2757, "step": 4690000 }, { "epoch": 23.24, "learning_rate": 3.8384989818819577e-05, "loss": 2.3108, "step": 4690500 }, { "epoch": 23.24, "learning_rate": 3.838375370956635e-05, "loss": 2.3196, "step": 4691000 }, { "epoch": 23.24, "learning_rate": 3.838251512314027e-05, "loss": 2.2907, "step": 4691500 }, { "epoch": 23.25, "learning_rate": 3.838127653671418e-05, "loss": 2.2718, "step": 4692000 }, { "epoch": 23.25, "learning_rate": 3.8380037950288096e-05, "loss": 2.2993, "step": 4692500 }, { "epoch": 23.25, "learning_rate": 3.8378801841034865e-05, "loss": 2.3027, "step": 4693000 }, { "epoch": 23.25, "learning_rate": 3.837756325460878e-05, "loss": 2.29, "step": 4693500 }, { "epoch": 23.26, "learning_rate": 3.83763246681827e-05, "loss": 2.2988, "step": 4694000 }, { "epoch": 23.26, "learning_rate": 3.8375086081756616e-05, "loss": 2.2946, "step": 4694500 }, { "epoch": 23.26, "learning_rate": 3.8373847495330526e-05, "loss": 2.3021, "step": 4695000 }, { "epoch": 23.26, "learning_rate": 3.837260890890444e-05, "loss": 2.2775, "step": 4695500 }, { "epoch": 23.27, "learning_rate": 3.837137032247836e-05, "loss": 2.2802, "step": 4696000 }, { "epoch": 23.27, "learning_rate": 3.837013173605228e-05, "loss": 2.3041, "step": 4696500 }, { "epoch": 23.27, "learning_rate": 3.8368893149626194e-05, "loss": 2.2622, "step": 4697000 }, { "epoch": 23.27, "learning_rate": 3.836765704037297e-05, "loss": 2.305, "step": 4697500 }, { "epoch": 23.28, "learning_rate": 3.8366418453946886e-05, "loss": 2.3045, "step": 4698000 }, { "epoch": 23.28, "learning_rate": 3.836518234469365e-05, "loss": 2.282, "step": 4698500 }, { "epoch": 23.28, "learning_rate": 3.8363943758267565e-05, "loss": 2.2845, "step": 4699000 }, { "epoch": 23.28, "learning_rate": 3.836270517184148e-05, "loss": 2.2755, "step": 4699500 }, { "epoch": 23.29, "learning_rate": 3.83614665854154e-05, "loss": 2.2895, "step": 4700000 }, { "epoch": 23.29, "learning_rate": 3.8360227998989316e-05, "loss": 2.2926, "step": 4700500 }, { "epoch": 23.29, "learning_rate": 3.835898941256323e-05, "loss": 2.2743, "step": 4701000 }, { "epoch": 23.29, "learning_rate": 3.835775330331e-05, "loss": 2.2907, "step": 4701500 }, { "epoch": 23.3, "learning_rate": 3.835651471688392e-05, "loss": 2.2967, "step": 4702000 }, { "epoch": 23.3, "learning_rate": 3.8355276130457836e-05, "loss": 2.3121, "step": 4702500 }, { "epoch": 23.3, "learning_rate": 3.835403754403175e-05, "loss": 2.3018, "step": 4703000 }, { "epoch": 23.3, "learning_rate": 3.835279895760567e-05, "loss": 2.2903, "step": 4703500 }, { "epoch": 23.31, "learning_rate": 3.8351560371179586e-05, "loss": 2.3101, "step": 4704000 }, { "epoch": 23.31, "learning_rate": 3.8350321784753496e-05, "loss": 2.2976, "step": 4704500 }, { "epoch": 23.31, "learning_rate": 3.8349085675500265e-05, "loss": 2.2819, "step": 4705000 }, { "epoch": 23.31, "learning_rate": 3.834784708907418e-05, "loss": 2.2727, "step": 4705500 }, { "epoch": 23.32, "learning_rate": 3.834661097982095e-05, "loss": 2.2911, "step": 4706000 }, { "epoch": 23.32, "learning_rate": 3.834537239339487e-05, "loss": 2.3118, "step": 4706500 }, { "epoch": 23.32, "learning_rate": 3.8344133806968785e-05, "loss": 2.3047, "step": 4707000 }, { "epoch": 23.32, "learning_rate": 3.83428952205427e-05, "loss": 2.3097, "step": 4707500 }, { "epoch": 23.33, "learning_rate": 3.834165663411662e-05, "loss": 2.2727, "step": 4708000 }, { "epoch": 23.33, "learning_rate": 3.8340418047690536e-05, "loss": 2.313, "step": 4708500 }, { "epoch": 23.33, "learning_rate": 3.833917946126445e-05, "loss": 2.295, "step": 4709000 }, { "epoch": 23.33, "learning_rate": 3.833794087483837e-05, "loss": 2.3, "step": 4709500 }, { "epoch": 23.33, "learning_rate": 3.8336702288412286e-05, "loss": 2.287, "step": 4710000 }, { "epoch": 23.34, "learning_rate": 3.83354637019862e-05, "loss": 2.2963, "step": 4710500 }, { "epoch": 23.34, "learning_rate": 3.8334227592732965e-05, "loss": 2.2942, "step": 4711000 }, { "epoch": 23.34, "learning_rate": 3.833298900630688e-05, "loss": 2.2964, "step": 4711500 }, { "epoch": 23.34, "learning_rate": 3.83317504198808e-05, "loss": 2.3011, "step": 4712000 }, { "epoch": 23.35, "learning_rate": 3.8330511833454716e-05, "loss": 2.2922, "step": 4712500 }, { "epoch": 23.35, "learning_rate": 3.832927324702863e-05, "loss": 2.2892, "step": 4713000 }, { "epoch": 23.35, "learning_rate": 3.832803466060255e-05, "loss": 2.3098, "step": 4713500 }, { "epoch": 23.35, "learning_rate": 3.832679607417646e-05, "loss": 2.2946, "step": 4714000 }, { "epoch": 23.36, "learning_rate": 3.8325559964923236e-05, "loss": 2.2958, "step": 4714500 }, { "epoch": 23.36, "learning_rate": 3.832432137849715e-05, "loss": 2.3095, "step": 4715000 }, { "epoch": 23.36, "learning_rate": 3.832308279207107e-05, "loss": 2.2891, "step": 4715500 }, { "epoch": 23.36, "learning_rate": 3.8321844205644987e-05, "loss": 2.2835, "step": 4716000 }, { "epoch": 23.37, "learning_rate": 3.8320605619218903e-05, "loss": 2.3211, "step": 4716500 }, { "epoch": 23.37, "learning_rate": 3.831936950996567e-05, "loss": 2.2844, "step": 4717000 }, { "epoch": 23.37, "learning_rate": 3.831813092353958e-05, "loss": 2.298, "step": 4717500 }, { "epoch": 23.37, "learning_rate": 3.83168923371135e-05, "loss": 2.2937, "step": 4718000 }, { "epoch": 23.38, "learning_rate": 3.8315653750687416e-05, "loss": 2.2833, "step": 4718500 }, { "epoch": 23.38, "learning_rate": 3.831441516426133e-05, "loss": 2.2817, "step": 4719000 }, { "epoch": 23.38, "learning_rate": 3.831317657783525e-05, "loss": 2.2641, "step": 4719500 }, { "epoch": 23.38, "learning_rate": 3.831193799140916e-05, "loss": 2.3006, "step": 4720000 }, { "epoch": 23.39, "learning_rate": 3.8310701882155936e-05, "loss": 2.3117, "step": 4720500 }, { "epoch": 23.39, "learning_rate": 3.830946329572985e-05, "loss": 2.2952, "step": 4721000 }, { "epoch": 23.39, "learning_rate": 3.830822470930377e-05, "loss": 2.2941, "step": 4721500 }, { "epoch": 23.39, "learning_rate": 3.830698612287769e-05, "loss": 2.303, "step": 4722000 }, { "epoch": 23.4, "learning_rate": 3.8305747536451604e-05, "loss": 2.2916, "step": 4722500 }, { "epoch": 23.4, "learning_rate": 3.830451142719837e-05, "loss": 2.2864, "step": 4723000 }, { "epoch": 23.4, "learning_rate": 3.830327284077229e-05, "loss": 2.2994, "step": 4723500 }, { "epoch": 23.4, "learning_rate": 3.8302034254346206e-05, "loss": 2.3089, "step": 4724000 }, { "epoch": 23.41, "learning_rate": 3.830079814509297e-05, "loss": 2.3033, "step": 4724500 }, { "epoch": 23.41, "learning_rate": 3.8299559558666885e-05, "loss": 2.2952, "step": 4725000 }, { "epoch": 23.41, "learning_rate": 3.82983209722408e-05, "loss": 2.3225, "step": 4725500 }, { "epoch": 23.41, "learning_rate": 3.829708238581472e-05, "loss": 2.2881, "step": 4726000 }, { "epoch": 23.42, "learning_rate": 3.8295843799388636e-05, "loss": 2.2867, "step": 4726500 }, { "epoch": 23.42, "learning_rate": 3.829460521296255e-05, "loss": 2.2851, "step": 4727000 }, { "epoch": 23.42, "learning_rate": 3.829336662653647e-05, "loss": 2.2913, "step": 4727500 }, { "epoch": 23.42, "learning_rate": 3.829212804011039e-05, "loss": 2.2991, "step": 4728000 }, { "epoch": 23.43, "learning_rate": 3.8290889453684304e-05, "loss": 2.2934, "step": 4728500 }, { "epoch": 23.43, "learning_rate": 3.828965086725822e-05, "loss": 2.2761, "step": 4729000 }, { "epoch": 23.43, "learning_rate": 3.828841228083213e-05, "loss": 2.3159, "step": 4729500 }, { "epoch": 23.43, "learning_rate": 3.828717369440605e-05, "loss": 2.3028, "step": 4730000 }, { "epoch": 23.44, "learning_rate": 3.8285935107979965e-05, "loss": 2.3088, "step": 4730500 }, { "epoch": 23.44, "learning_rate": 3.828469652155388e-05, "loss": 2.2843, "step": 4731000 }, { "epoch": 23.44, "learning_rate": 3.828346041230065e-05, "loss": 2.3045, "step": 4731500 }, { "epoch": 23.44, "learning_rate": 3.828222182587457e-05, "loss": 2.2686, "step": 4732000 }, { "epoch": 23.45, "learning_rate": 3.8280983239448484e-05, "loss": 2.2848, "step": 4732500 }, { "epoch": 23.45, "learning_rate": 3.82797446530224e-05, "loss": 2.2904, "step": 4733000 }, { "epoch": 23.45, "learning_rate": 3.827850854376917e-05, "loss": 2.3025, "step": 4733500 }, { "epoch": 23.45, "learning_rate": 3.827726995734309e-05, "loss": 2.312, "step": 4734000 }, { "epoch": 23.46, "learning_rate": 3.8276031370917004e-05, "loss": 2.2943, "step": 4734500 }, { "epoch": 23.46, "learning_rate": 3.827479526166377e-05, "loss": 2.2959, "step": 4735000 }, { "epoch": 23.46, "learning_rate": 3.8273559152410535e-05, "loss": 2.284, "step": 4735500 }, { "epoch": 23.46, "learning_rate": 3.827232056598445e-05, "loss": 2.2884, "step": 4736000 }, { "epoch": 23.47, "learning_rate": 3.827108197955837e-05, "loss": 2.3079, "step": 4736500 }, { "epoch": 23.47, "learning_rate": 3.8269843393132285e-05, "loss": 2.2866, "step": 4737000 }, { "epoch": 23.47, "learning_rate": 3.82686048067062e-05, "loss": 2.283, "step": 4737500 }, { "epoch": 23.47, "learning_rate": 3.826736622028012e-05, "loss": 2.3011, "step": 4738000 }, { "epoch": 23.48, "learning_rate": 3.8266127633854036e-05, "loss": 2.2811, "step": 4738500 }, { "epoch": 23.48, "learning_rate": 3.826488904742795e-05, "loss": 2.2937, "step": 4739000 }, { "epoch": 23.48, "learning_rate": 3.826365046100187e-05, "loss": 2.2939, "step": 4739500 }, { "epoch": 23.48, "learning_rate": 3.826241187457579e-05, "loss": 2.3019, "step": 4740000 }, { "epoch": 23.49, "learning_rate": 3.8261173288149704e-05, "loss": 2.3265, "step": 4740500 }, { "epoch": 23.49, "learning_rate": 3.825993470172362e-05, "loss": 2.2916, "step": 4741000 }, { "epoch": 23.49, "learning_rate": 3.825869611529754e-05, "loss": 2.2738, "step": 4741500 }, { "epoch": 23.49, "learning_rate": 3.825746000604431e-05, "loss": 2.2937, "step": 4742000 }, { "epoch": 23.5, "learning_rate": 3.8256221419618224e-05, "loss": 2.2827, "step": 4742500 }, { "epoch": 23.5, "learning_rate": 3.825498283319214e-05, "loss": 2.2835, "step": 4743000 }, { "epoch": 23.5, "learning_rate": 3.825374424676606e-05, "loss": 2.3097, "step": 4743500 }, { "epoch": 23.5, "learning_rate": 3.8252505660339974e-05, "loss": 2.3038, "step": 4744000 }, { "epoch": 23.51, "learning_rate": 3.8251267073913884e-05, "loss": 2.3029, "step": 4744500 }, { "epoch": 23.51, "learning_rate": 3.82500284874878e-05, "loss": 2.327, "step": 4745000 }, { "epoch": 23.51, "learning_rate": 3.824878990106172e-05, "loss": 2.3, "step": 4745500 }, { "epoch": 23.51, "learning_rate": 3.8247551314635635e-05, "loss": 2.2879, "step": 4746000 }, { "epoch": 23.52, "learning_rate": 3.8246315205382404e-05, "loss": 2.2921, "step": 4746500 }, { "epoch": 23.52, "learning_rate": 3.824507661895632e-05, "loss": 2.2748, "step": 4747000 }, { "epoch": 23.52, "learning_rate": 3.824383803253024e-05, "loss": 2.26, "step": 4747500 }, { "epoch": 23.52, "learning_rate": 3.8242599446104155e-05, "loss": 2.3142, "step": 4748000 }, { "epoch": 23.53, "learning_rate": 3.8241360859678065e-05, "loss": 2.2673, "step": 4748500 }, { "epoch": 23.53, "learning_rate": 3.824012475042484e-05, "loss": 2.2915, "step": 4749000 }, { "epoch": 23.53, "learning_rate": 3.823888616399876e-05, "loss": 2.2943, "step": 4749500 }, { "epoch": 23.53, "learning_rate": 3.8237647577572674e-05, "loss": 2.2901, "step": 4750000 }, { "epoch": 23.54, "learning_rate": 3.823640899114659e-05, "loss": 2.3014, "step": 4750500 }, { "epoch": 23.54, "learning_rate": 3.8235172881893353e-05, "loss": 2.3051, "step": 4751000 }, { "epoch": 23.54, "learning_rate": 3.823393429546727e-05, "loss": 2.3029, "step": 4751500 }, { "epoch": 23.54, "learning_rate": 3.823269570904119e-05, "loss": 2.3158, "step": 4752000 }, { "epoch": 23.55, "learning_rate": 3.8231459599787956e-05, "loss": 2.3026, "step": 4752500 }, { "epoch": 23.55, "learning_rate": 3.823022101336187e-05, "loss": 2.2962, "step": 4753000 }, { "epoch": 23.55, "learning_rate": 3.822898242693579e-05, "loss": 2.3032, "step": 4753500 }, { "epoch": 23.55, "learning_rate": 3.822774384050971e-05, "loss": 2.2858, "step": 4754000 }, { "epoch": 23.56, "learning_rate": 3.8226505254083624e-05, "loss": 2.312, "step": 4754500 }, { "epoch": 23.56, "learning_rate": 3.8225269144830386e-05, "loss": 2.3003, "step": 4755000 }, { "epoch": 23.56, "learning_rate": 3.82240305584043e-05, "loss": 2.3145, "step": 4755500 }, { "epoch": 23.56, "learning_rate": 3.822279197197822e-05, "loss": 2.2898, "step": 4756000 }, { "epoch": 23.57, "learning_rate": 3.822155338555214e-05, "loss": 2.2888, "step": 4756500 }, { "epoch": 23.57, "learning_rate": 3.8220314799126054e-05, "loss": 2.3015, "step": 4757000 }, { "epoch": 23.57, "learning_rate": 3.821907621269997e-05, "loss": 2.2855, "step": 4757500 }, { "epoch": 23.57, "learning_rate": 3.821784010344674e-05, "loss": 2.3261, "step": 4758000 }, { "epoch": 23.58, "learning_rate": 3.8216601517020656e-05, "loss": 2.2987, "step": 4758500 }, { "epoch": 23.58, "learning_rate": 3.821536293059457e-05, "loss": 2.2882, "step": 4759000 }, { "epoch": 23.58, "learning_rate": 3.8214129298514194e-05, "loss": 2.3058, "step": 4759500 }, { "epoch": 23.58, "learning_rate": 3.821289071208811e-05, "loss": 2.3102, "step": 4760000 }, { "epoch": 23.59, "learning_rate": 3.821165460283488e-05, "loss": 2.3256, "step": 4760500 }, { "epoch": 23.59, "learning_rate": 3.8210416016408796e-05, "loss": 2.3053, "step": 4761000 }, { "epoch": 23.59, "learning_rate": 3.8209177429982713e-05, "loss": 2.3171, "step": 4761500 }, { "epoch": 23.59, "learning_rate": 3.820793884355663e-05, "loss": 2.3362, "step": 4762000 }, { "epoch": 23.6, "learning_rate": 3.820670025713055e-05, "loss": 2.3121, "step": 4762500 }, { "epoch": 23.6, "learning_rate": 3.8205461670704464e-05, "loss": 2.3041, "step": 4763000 }, { "epoch": 23.6, "learning_rate": 3.820422308427838e-05, "loss": 2.2931, "step": 4763500 }, { "epoch": 23.6, "learning_rate": 3.82029844978523e-05, "loss": 2.3172, "step": 4764000 }, { "epoch": 23.6, "learning_rate": 3.820174591142621e-05, "loss": 2.3078, "step": 4764500 }, { "epoch": 23.61, "learning_rate": 3.8200507325000125e-05, "loss": 2.2917, "step": 4765000 }, { "epoch": 23.61, "learning_rate": 3.819926873857404e-05, "loss": 2.3073, "step": 4765500 }, { "epoch": 23.61, "learning_rate": 3.819803015214796e-05, "loss": 2.2881, "step": 4766000 }, { "epoch": 23.61, "learning_rate": 3.8196791565721876e-05, "loss": 2.335, "step": 4766500 }, { "epoch": 23.62, "learning_rate": 3.819555297929579e-05, "loss": 2.2791, "step": 4767000 }, { "epoch": 23.62, "learning_rate": 3.819431439286971e-05, "loss": 2.2827, "step": 4767500 }, { "epoch": 23.62, "learning_rate": 3.819307580644363e-05, "loss": 2.3115, "step": 4768000 }, { "epoch": 23.62, "learning_rate": 3.819183722001754e-05, "loss": 2.3083, "step": 4768500 }, { "epoch": 23.63, "learning_rate": 3.8190598633591454e-05, "loss": 2.3059, "step": 4769000 }, { "epoch": 23.63, "learning_rate": 3.818936252433822e-05, "loss": 2.2958, "step": 4769500 }, { "epoch": 23.63, "learning_rate": 3.8188126415085e-05, "loss": 2.3038, "step": 4770000 }, { "epoch": 23.63, "learning_rate": 3.8186887828658915e-05, "loss": 2.3175, "step": 4770500 }, { "epoch": 23.64, "learning_rate": 3.8185649242232825e-05, "loss": 2.2936, "step": 4771000 }, { "epoch": 23.64, "learning_rate": 3.818441065580674e-05, "loss": 2.2973, "step": 4771500 }, { "epoch": 23.64, "learning_rate": 3.818317206938066e-05, "loss": 2.2993, "step": 4772000 }, { "epoch": 23.64, "learning_rate": 3.8181933482954576e-05, "loss": 2.3115, "step": 4772500 }, { "epoch": 23.65, "learning_rate": 3.818069489652849e-05, "loss": 2.3108, "step": 4773000 }, { "epoch": 23.65, "learning_rate": 3.817945631010241e-05, "loss": 2.3075, "step": 4773500 }, { "epoch": 23.65, "learning_rate": 3.817821772367633e-05, "loss": 2.2759, "step": 4774000 }, { "epoch": 23.65, "learning_rate": 3.817698409159595e-05, "loss": 2.3221, "step": 4774500 }, { "epoch": 23.66, "learning_rate": 3.8175747982342716e-05, "loss": 2.2834, "step": 4775000 }, { "epoch": 23.66, "learning_rate": 3.817450939591663e-05, "loss": 2.2967, "step": 4775500 }, { "epoch": 23.66, "learning_rate": 3.8173270809490543e-05, "loss": 2.2973, "step": 4776000 }, { "epoch": 23.66, "learning_rate": 3.817203222306446e-05, "loss": 2.2763, "step": 4776500 }, { "epoch": 23.67, "learning_rate": 3.817079611381123e-05, "loss": 2.2766, "step": 4777000 }, { "epoch": 23.67, "learning_rate": 3.8169557527385146e-05, "loss": 2.3263, "step": 4777500 }, { "epoch": 23.67, "learning_rate": 3.816831894095906e-05, "loss": 2.2982, "step": 4778000 }, { "epoch": 23.67, "learning_rate": 3.816708035453298e-05, "loss": 2.3112, "step": 4778500 }, { "epoch": 23.68, "learning_rate": 3.81658417681069e-05, "loss": 2.3014, "step": 4779000 }, { "epoch": 23.68, "learning_rate": 3.8164603181680814e-05, "loss": 2.286, "step": 4779500 }, { "epoch": 23.68, "learning_rate": 3.816336459525473e-05, "loss": 2.3127, "step": 4780000 }, { "epoch": 23.68, "learning_rate": 3.816212600882865e-05, "loss": 2.3024, "step": 4780500 }, { "epoch": 23.69, "learning_rate": 3.8160887422402565e-05, "loss": 2.2597, "step": 4781000 }, { "epoch": 23.69, "learning_rate": 3.815964883597648e-05, "loss": 2.2918, "step": 4781500 }, { "epoch": 23.69, "learning_rate": 3.81584102495504e-05, "loss": 2.2857, "step": 4782000 }, { "epoch": 23.69, "learning_rate": 3.8157171663124315e-05, "loss": 2.3056, "step": 4782500 }, { "epoch": 23.7, "learning_rate": 3.815593307669823e-05, "loss": 2.2823, "step": 4783000 }, { "epoch": 23.7, "learning_rate": 3.815469449027214e-05, "loss": 2.2994, "step": 4783500 }, { "epoch": 23.7, "learning_rate": 3.815345590384606e-05, "loss": 2.2764, "step": 4784000 }, { "epoch": 23.7, "learning_rate": 3.8152217317419976e-05, "loss": 2.2975, "step": 4784500 }, { "epoch": 23.71, "learning_rate": 3.815097873099389e-05, "loss": 2.2998, "step": 4785000 }, { "epoch": 23.71, "learning_rate": 3.814974014456781e-05, "loss": 2.293, "step": 4785500 }, { "epoch": 23.71, "learning_rate": 3.814850403531458e-05, "loss": 2.2987, "step": 4786000 }, { "epoch": 23.71, "learning_rate": 3.814726544888849e-05, "loss": 2.2947, "step": 4786500 }, { "epoch": 23.72, "learning_rate": 3.8146026862462406e-05, "loss": 2.296, "step": 4787000 }, { "epoch": 23.72, "learning_rate": 3.814478827603632e-05, "loss": 2.3184, "step": 4787500 }, { "epoch": 23.72, "learning_rate": 3.814354968961024e-05, "loss": 2.3023, "step": 4788000 }, { "epoch": 23.72, "learning_rate": 3.814231110318416e-05, "loss": 2.2848, "step": 4788500 }, { "epoch": 23.73, "learning_rate": 3.8141072516758074e-05, "loss": 2.2869, "step": 4789000 }, { "epoch": 23.73, "learning_rate": 3.813983640750484e-05, "loss": 2.3372, "step": 4789500 }, { "epoch": 23.73, "learning_rate": 3.813859782107876e-05, "loss": 2.3091, "step": 4790000 }, { "epoch": 23.73, "learning_rate": 3.8137359234652676e-05, "loss": 2.2714, "step": 4790500 }, { "epoch": 23.74, "learning_rate": 3.813612064822659e-05, "loss": 2.2939, "step": 4791000 }, { "epoch": 23.74, "learning_rate": 3.813488206180051e-05, "loss": 2.3059, "step": 4791500 }, { "epoch": 23.74, "learning_rate": 3.813364347537443e-05, "loss": 2.2798, "step": 4792000 }, { "epoch": 23.74, "learning_rate": 3.8132407366121196e-05, "loss": 2.2939, "step": 4792500 }, { "epoch": 23.75, "learning_rate": 3.8131168779695106e-05, "loss": 2.2965, "step": 4793000 }, { "epoch": 23.75, "learning_rate": 3.812993019326902e-05, "loss": 2.2802, "step": 4793500 }, { "epoch": 23.75, "learning_rate": 3.81286940840158e-05, "loss": 2.3213, "step": 4794000 }, { "epoch": 23.75, "learning_rate": 3.8127455497589716e-05, "loss": 2.3187, "step": 4794500 }, { "epoch": 23.76, "learning_rate": 3.812621691116363e-05, "loss": 2.314, "step": 4795000 }, { "epoch": 23.76, "learning_rate": 3.812497832473755e-05, "loss": 2.283, "step": 4795500 }, { "epoch": 23.76, "learning_rate": 3.812373973831146e-05, "loss": 2.3006, "step": 4796000 }, { "epoch": 23.76, "learning_rate": 3.812250362905823e-05, "loss": 2.3053, "step": 4796500 }, { "epoch": 23.77, "learning_rate": 3.8121265042632145e-05, "loss": 2.2898, "step": 4797000 }, { "epoch": 23.77, "learning_rate": 3.812002645620606e-05, "loss": 2.3042, "step": 4797500 }, { "epoch": 23.77, "learning_rate": 3.811878786977998e-05, "loss": 2.3032, "step": 4798000 }, { "epoch": 23.77, "learning_rate": 3.8117549283353896e-05, "loss": 2.2994, "step": 4798500 }, { "epoch": 23.78, "learning_rate": 3.8116310696927806e-05, "loss": 2.3109, "step": 4799000 }, { "epoch": 23.78, "learning_rate": 3.811507211050172e-05, "loss": 2.3052, "step": 4799500 }, { "epoch": 23.78, "learning_rate": 3.81138360012485e-05, "loss": 2.2868, "step": 4800000 }, { "epoch": 23.78, "learning_rate": 3.8112597414822416e-05, "loss": 2.3109, "step": 4800500 }, { "epoch": 23.79, "learning_rate": 3.811135882839633e-05, "loss": 2.3048, "step": 4801000 }, { "epoch": 23.79, "learning_rate": 3.811012024197025e-05, "loss": 2.3028, "step": 4801500 }, { "epoch": 23.79, "learning_rate": 3.810888165554416e-05, "loss": 2.3225, "step": 4802000 }, { "epoch": 23.79, "learning_rate": 3.810764306911808e-05, "loss": 2.2936, "step": 4802500 }, { "epoch": 23.8, "learning_rate": 3.8106404482691994e-05, "loss": 2.3293, "step": 4803000 }, { "epoch": 23.8, "learning_rate": 3.810516589626591e-05, "loss": 2.3076, "step": 4803500 }, { "epoch": 23.8, "learning_rate": 3.810392730983983e-05, "loss": 2.2963, "step": 4804000 }, { "epoch": 23.8, "learning_rate": 3.8102688723413744e-05, "loss": 2.2903, "step": 4804500 }, { "epoch": 23.81, "learning_rate": 3.810145261416051e-05, "loss": 2.3059, "step": 4805000 }, { "epoch": 23.81, "learning_rate": 3.810021402773442e-05, "loss": 2.2973, "step": 4805500 }, { "epoch": 23.81, "learning_rate": 3.809897544130834e-05, "loss": 2.2988, "step": 4806000 }, { "epoch": 23.81, "learning_rate": 3.809773685488226e-05, "loss": 2.2914, "step": 4806500 }, { "epoch": 23.82, "learning_rate": 3.8096498268456174e-05, "loss": 2.3179, "step": 4807000 }, { "epoch": 23.82, "learning_rate": 3.809525968203009e-05, "loss": 2.3134, "step": 4807500 }, { "epoch": 23.82, "learning_rate": 3.809402357277687e-05, "loss": 2.28, "step": 4808000 }, { "epoch": 23.82, "learning_rate": 3.809278498635078e-05, "loss": 2.3142, "step": 4808500 }, { "epoch": 23.83, "learning_rate": 3.8091546399924694e-05, "loss": 2.2888, "step": 4809000 }, { "epoch": 23.83, "learning_rate": 3.809030781349861e-05, "loss": 2.3241, "step": 4809500 }, { "epoch": 23.83, "learning_rate": 3.808906922707253e-05, "loss": 2.3173, "step": 4810000 }, { "epoch": 23.83, "learning_rate": 3.8087830640646445e-05, "loss": 2.2905, "step": 4810500 }, { "epoch": 23.84, "learning_rate": 3.8086597008566065e-05, "loss": 2.3409, "step": 4811000 }, { "epoch": 23.84, "learning_rate": 3.808535842213998e-05, "loss": 2.3207, "step": 4811500 }, { "epoch": 23.84, "learning_rate": 3.80841198357139e-05, "loss": 2.3175, "step": 4812000 }, { "epoch": 23.84, "learning_rate": 3.808288372646067e-05, "loss": 2.3065, "step": 4812500 }, { "epoch": 23.85, "learning_rate": 3.8081645140034585e-05, "loss": 2.2876, "step": 4813000 }, { "epoch": 23.85, "learning_rate": 3.80804065536085e-05, "loss": 2.3141, "step": 4813500 }, { "epoch": 23.85, "learning_rate": 3.8079170444355264e-05, "loss": 2.2991, "step": 4814000 }, { "epoch": 23.85, "learning_rate": 3.807793185792918e-05, "loss": 2.2971, "step": 4814500 }, { "epoch": 23.86, "learning_rate": 3.80766932715031e-05, "loss": 2.2939, "step": 4815000 }, { "epoch": 23.86, "learning_rate": 3.8075454685077015e-05, "loss": 2.3154, "step": 4815500 }, { "epoch": 23.86, "learning_rate": 3.807421609865093e-05, "loss": 2.3036, "step": 4816000 }, { "epoch": 23.86, "learning_rate": 3.807297751222485e-05, "loss": 2.3105, "step": 4816500 }, { "epoch": 23.87, "learning_rate": 3.8071738925798765e-05, "loss": 2.281, "step": 4817000 }, { "epoch": 23.87, "learning_rate": 3.807050033937268e-05, "loss": 2.2819, "step": 4817500 }, { "epoch": 23.87, "learning_rate": 3.80692617529466e-05, "loss": 2.2728, "step": 4818000 }, { "epoch": 23.87, "learning_rate": 3.8068023166520516e-05, "loss": 2.3142, "step": 4818500 }, { "epoch": 23.87, "learning_rate": 3.806678458009443e-05, "loss": 2.3092, "step": 4819000 }, { "epoch": 23.88, "learning_rate": 3.806554599366835e-05, "loss": 2.3183, "step": 4819500 }, { "epoch": 23.88, "learning_rate": 3.806430740724227e-05, "loss": 2.2989, "step": 4820000 }, { "epoch": 23.88, "learning_rate": 3.8063071297989036e-05, "loss": 2.2973, "step": 4820500 }, { "epoch": 23.88, "learning_rate": 3.8061837665908656e-05, "loss": 2.3134, "step": 4821000 }, { "epoch": 23.89, "learning_rate": 3.8060599079482567e-05, "loss": 2.3101, "step": 4821500 }, { "epoch": 23.89, "learning_rate": 3.8059360493056483e-05, "loss": 2.3073, "step": 4822000 }, { "epoch": 23.89, "learning_rate": 3.80581219066304e-05, "loss": 2.2903, "step": 4822500 }, { "epoch": 23.89, "learning_rate": 3.805688332020432e-05, "loss": 2.2952, "step": 4823000 }, { "epoch": 23.9, "learning_rate": 3.8055644733778234e-05, "loss": 2.334, "step": 4823500 }, { "epoch": 23.9, "learning_rate": 3.805440614735215e-05, "loss": 2.3168, "step": 4824000 }, { "epoch": 23.9, "learning_rate": 3.805316756092607e-05, "loss": 2.3011, "step": 4824500 }, { "epoch": 23.9, "learning_rate": 3.805193145167284e-05, "loss": 2.3194, "step": 4825000 }, { "epoch": 23.91, "learning_rate": 3.8050692865246754e-05, "loss": 2.313, "step": 4825500 }, { "epoch": 23.91, "learning_rate": 3.804945427882067e-05, "loss": 2.3036, "step": 4826000 }, { "epoch": 23.91, "learning_rate": 3.804821569239458e-05, "loss": 2.2979, "step": 4826500 }, { "epoch": 23.91, "learning_rate": 3.80469771059685e-05, "loss": 2.2836, "step": 4827000 }, { "epoch": 23.92, "learning_rate": 3.8045738519542415e-05, "loss": 2.3093, "step": 4827500 }, { "epoch": 23.92, "learning_rate": 3.8044502410289184e-05, "loss": 2.2908, "step": 4828000 }, { "epoch": 23.92, "learning_rate": 3.80432638238631e-05, "loss": 2.3154, "step": 4828500 }, { "epoch": 23.92, "learning_rate": 3.804202523743702e-05, "loss": 2.3275, "step": 4829000 }, { "epoch": 23.93, "learning_rate": 3.8040786651010934e-05, "loss": 2.3259, "step": 4829500 }, { "epoch": 23.93, "learning_rate": 3.803954806458485e-05, "loss": 2.3006, "step": 4830000 }, { "epoch": 23.93, "learning_rate": 3.803830947815877e-05, "loss": 2.3199, "step": 4830500 }, { "epoch": 23.93, "learning_rate": 3.8037070891732685e-05, "loss": 2.3036, "step": 4831000 }, { "epoch": 23.94, "learning_rate": 3.80358323053066e-05, "loss": 2.2679, "step": 4831500 }, { "epoch": 23.94, "learning_rate": 3.803459619605337e-05, "loss": 2.3195, "step": 4832000 }, { "epoch": 23.94, "learning_rate": 3.803335760962729e-05, "loss": 2.3192, "step": 4832500 }, { "epoch": 23.94, "learning_rate": 3.803212150037406e-05, "loss": 2.3352, "step": 4833000 }, { "epoch": 23.95, "learning_rate": 3.8030885391120825e-05, "loss": 2.3116, "step": 4833500 }, { "epoch": 23.95, "learning_rate": 3.802964680469474e-05, "loss": 2.3202, "step": 4834000 }, { "epoch": 23.95, "learning_rate": 3.802840821826866e-05, "loss": 2.3153, "step": 4834500 }, { "epoch": 23.95, "learning_rate": 3.8027169631842576e-05, "loss": 2.3291, "step": 4835000 }, { "epoch": 23.96, "learning_rate": 3.802593104541649e-05, "loss": 2.2869, "step": 4835500 }, { "epoch": 23.96, "learning_rate": 3.802469245899041e-05, "loss": 2.3052, "step": 4836000 }, { "epoch": 23.96, "learning_rate": 3.802345387256433e-05, "loss": 2.313, "step": 4836500 }, { "epoch": 23.96, "learning_rate": 3.802221776331109e-05, "loss": 2.2973, "step": 4837000 }, { "epoch": 23.97, "learning_rate": 3.8020979176885006e-05, "loss": 2.3024, "step": 4837500 }, { "epoch": 23.97, "learning_rate": 3.801974059045892e-05, "loss": 2.3348, "step": 4838000 }, { "epoch": 23.97, "learning_rate": 3.801850200403284e-05, "loss": 2.3053, "step": 4838500 }, { "epoch": 23.97, "learning_rate": 3.801726341760676e-05, "loss": 2.3055, "step": 4839000 }, { "epoch": 23.98, "learning_rate": 3.8016024831180674e-05, "loss": 2.3312, "step": 4839500 }, { "epoch": 23.98, "learning_rate": 3.801478872192744e-05, "loss": 2.2953, "step": 4840000 }, { "epoch": 23.98, "learning_rate": 3.801355013550136e-05, "loss": 2.3206, "step": 4840500 }, { "epoch": 23.98, "learning_rate": 3.8012311549075276e-05, "loss": 2.3084, "step": 4841000 }, { "epoch": 23.99, "learning_rate": 3.801107296264919e-05, "loss": 2.2991, "step": 4841500 }, { "epoch": 23.99, "learning_rate": 3.800983437622311e-05, "loss": 2.2901, "step": 4842000 }, { "epoch": 23.99, "learning_rate": 3.800859578979703e-05, "loss": 2.3141, "step": 4842500 }, { "epoch": 23.99, "learning_rate": 3.8007357203370944e-05, "loss": 2.3149, "step": 4843000 }, { "epoch": 24.0, "learning_rate": 3.8006118616944854e-05, "loss": 2.2997, "step": 4843500 }, { "epoch": 24.0, "learning_rate": 3.800488003051877e-05, "loss": 2.3124, "step": 4844000 }, { "epoch": 24.0, "eval_accuracy": 0.6541513737695038, "eval_accuracy_mlm": 0.608915092455769, "eval_accuracy_nsp": 0.8673943653685494, "eval_loss": 2.3393051624298096, "eval_runtime": 146.2902, "eval_samples_per_second": 1742.831, "eval_steps_per_second": 72.623, "step": 4844232 }, { "epoch": 24.0, "learning_rate": 3.800364144409269e-05, "loss": 2.2969, "step": 4844500 }, { "epoch": 24.0, "learning_rate": 3.8002402857666605e-05, "loss": 2.2555, "step": 4845000 }, { "epoch": 24.01, "learning_rate": 3.800116427124052e-05, "loss": 2.2708, "step": 4845500 }, { "epoch": 24.01, "learning_rate": 3.799992568481444e-05, "loss": 2.2948, "step": 4846000 }, { "epoch": 24.01, "learning_rate": 3.7998687098388356e-05, "loss": 2.277, "step": 4846500 }, { "epoch": 24.01, "learning_rate": 3.7997448511962266e-05, "loss": 2.249, "step": 4847000 }, { "epoch": 24.02, "learning_rate": 3.7996212402709035e-05, "loss": 2.2613, "step": 4847500 }, { "epoch": 24.02, "learning_rate": 3.799497629345581e-05, "loss": 2.2796, "step": 4848000 }, { "epoch": 24.02, "learning_rate": 3.799373770702973e-05, "loss": 2.2798, "step": 4848500 }, { "epoch": 24.02, "learning_rate": 3.7992499120603644e-05, "loss": 2.2457, "step": 4849000 }, { "epoch": 24.03, "learning_rate": 3.7991260534177554e-05, "loss": 2.2718, "step": 4849500 }, { "epoch": 24.03, "learning_rate": 3.799002194775147e-05, "loss": 2.2644, "step": 4850000 }, { "epoch": 24.03, "learning_rate": 3.798878336132539e-05, "loss": 2.282, "step": 4850500 }, { "epoch": 24.03, "learning_rate": 3.7987544774899305e-05, "loss": 2.2948, "step": 4851000 }, { "epoch": 24.04, "learning_rate": 3.7986308665646074e-05, "loss": 2.2776, "step": 4851500 }, { "epoch": 24.04, "learning_rate": 3.798507007921999e-05, "loss": 2.2824, "step": 4852000 }, { "epoch": 24.04, "learning_rate": 3.798383149279391e-05, "loss": 2.2988, "step": 4852500 }, { "epoch": 24.04, "learning_rate": 3.798259290636782e-05, "loss": 2.296, "step": 4853000 }, { "epoch": 24.05, "learning_rate": 3.7981356797114594e-05, "loss": 2.2601, "step": 4853500 }, { "epoch": 24.05, "learning_rate": 3.798011821068851e-05, "loss": 2.2779, "step": 4854000 }, { "epoch": 24.05, "learning_rate": 3.797887962426243e-05, "loss": 2.2901, "step": 4854500 }, { "epoch": 24.05, "learning_rate": 3.7977641037836344e-05, "loss": 2.286, "step": 4855000 }, { "epoch": 24.06, "learning_rate": 3.797640245141026e-05, "loss": 2.2828, "step": 4855500 }, { "epoch": 24.06, "learning_rate": 3.797516386498417e-05, "loss": 2.2735, "step": 4856000 }, { "epoch": 24.06, "learning_rate": 3.797392527855809e-05, "loss": 2.2708, "step": 4856500 }, { "epoch": 24.06, "learning_rate": 3.7972686692132005e-05, "loss": 2.2948, "step": 4857000 }, { "epoch": 24.07, "learning_rate": 3.7971450582878774e-05, "loss": 2.2892, "step": 4857500 }, { "epoch": 24.07, "learning_rate": 3.797021199645269e-05, "loss": 2.2911, "step": 4858000 }, { "epoch": 24.07, "learning_rate": 3.796897341002661e-05, "loss": 2.2749, "step": 4858500 }, { "epoch": 24.07, "learning_rate": 3.796773482360052e-05, "loss": 2.3011, "step": 4859000 }, { "epoch": 24.08, "learning_rate": 3.7966496237174435e-05, "loss": 2.2884, "step": 4859500 }, { "epoch": 24.08, "learning_rate": 3.796525765074835e-05, "loss": 2.2755, "step": 4860000 }, { "epoch": 24.08, "learning_rate": 3.796401906432227e-05, "loss": 2.3002, "step": 4860500 }, { "epoch": 24.08, "learning_rate": 3.7962780477896186e-05, "loss": 2.2786, "step": 4861000 }, { "epoch": 24.09, "learning_rate": 3.79615418914701e-05, "loss": 2.2814, "step": 4861500 }, { "epoch": 24.09, "learning_rate": 3.796030330504402e-05, "loss": 2.2544, "step": 4862000 }, { "epoch": 24.09, "learning_rate": 3.795906719579079e-05, "loss": 2.2841, "step": 4862500 }, { "epoch": 24.09, "learning_rate": 3.7957828609364705e-05, "loss": 2.3162, "step": 4863000 }, { "epoch": 24.1, "learning_rate": 3.7956592500111474e-05, "loss": 2.2993, "step": 4863500 }, { "epoch": 24.1, "learning_rate": 3.795535391368539e-05, "loss": 2.288, "step": 4864000 }, { "epoch": 24.1, "learning_rate": 3.795411532725931e-05, "loss": 2.2672, "step": 4864500 }, { "epoch": 24.1, "learning_rate": 3.795287921800608e-05, "loss": 2.2889, "step": 4865000 }, { "epoch": 24.11, "learning_rate": 3.7951640631579994e-05, "loss": 2.2822, "step": 4865500 }, { "epoch": 24.11, "learning_rate": 3.795040204515391e-05, "loss": 2.271, "step": 4866000 }, { "epoch": 24.11, "learning_rate": 3.794916345872783e-05, "loss": 2.2691, "step": 4866500 }, { "epoch": 24.11, "learning_rate": 3.7947924872301745e-05, "loss": 2.2968, "step": 4867000 }, { "epoch": 24.12, "learning_rate": 3.794668876304851e-05, "loss": 2.2602, "step": 4867500 }, { "epoch": 24.12, "learning_rate": 3.7945450176622424e-05, "loss": 2.2793, "step": 4868000 }, { "epoch": 24.12, "learning_rate": 3.794421159019634e-05, "loss": 2.2726, "step": 4868500 }, { "epoch": 24.12, "learning_rate": 3.794297300377026e-05, "loss": 2.2902, "step": 4869000 }, { "epoch": 24.13, "learning_rate": 3.7941734417344174e-05, "loss": 2.2575, "step": 4869500 }, { "epoch": 24.13, "learning_rate": 3.794049583091809e-05, "loss": 2.2759, "step": 4870000 }, { "epoch": 24.13, "learning_rate": 3.793925724449201e-05, "loss": 2.2869, "step": 4870500 }, { "epoch": 24.13, "learning_rate": 3.7938018658065925e-05, "loss": 2.2826, "step": 4871000 }, { "epoch": 24.14, "learning_rate": 3.7936780071639835e-05, "loss": 2.2913, "step": 4871500 }, { "epoch": 24.14, "learning_rate": 3.793554396238661e-05, "loss": 2.3063, "step": 4872000 }, { "epoch": 24.14, "learning_rate": 3.793430537596053e-05, "loss": 2.2777, "step": 4872500 }, { "epoch": 24.14, "learning_rate": 3.7933066789534445e-05, "loss": 2.2694, "step": 4873000 }, { "epoch": 24.15, "learning_rate": 3.793182820310836e-05, "loss": 2.2867, "step": 4873500 }, { "epoch": 24.15, "learning_rate": 3.793058961668228e-05, "loss": 2.2729, "step": 4874000 }, { "epoch": 24.15, "learning_rate": 3.792935350742905e-05, "loss": 2.2849, "step": 4874500 }, { "epoch": 24.15, "learning_rate": 3.792811492100296e-05, "loss": 2.2694, "step": 4875000 }, { "epoch": 24.15, "learning_rate": 3.7926876334576874e-05, "loss": 2.263, "step": 4875500 }, { "epoch": 24.16, "learning_rate": 3.792563774815079e-05, "loss": 2.2894, "step": 4876000 }, { "epoch": 24.16, "learning_rate": 3.792439916172471e-05, "loss": 2.2946, "step": 4876500 }, { "epoch": 24.16, "learning_rate": 3.792316305247148e-05, "loss": 2.2824, "step": 4877000 }, { "epoch": 24.16, "learning_rate": 3.7921924466045394e-05, "loss": 2.2882, "step": 4877500 }, { "epoch": 24.17, "learning_rate": 3.792068587961931e-05, "loss": 2.2966, "step": 4878000 }, { "epoch": 24.17, "learning_rate": 3.791944729319323e-05, "loss": 2.2884, "step": 4878500 }, { "epoch": 24.17, "learning_rate": 3.7918208706767145e-05, "loss": 2.2832, "step": 4879000 }, { "epoch": 24.17, "learning_rate": 3.7916972597513914e-05, "loss": 2.268, "step": 4879500 }, { "epoch": 24.18, "learning_rate": 3.791573401108783e-05, "loss": 2.2686, "step": 4880000 }, { "epoch": 24.18, "learning_rate": 3.791449542466175e-05, "loss": 2.2891, "step": 4880500 }, { "epoch": 24.18, "learning_rate": 3.7913256838235664e-05, "loss": 2.2771, "step": 4881000 }, { "epoch": 24.18, "learning_rate": 3.7912018251809575e-05, "loss": 2.3122, "step": 4881500 }, { "epoch": 24.19, "learning_rate": 3.791077966538349e-05, "loss": 2.2735, "step": 4882000 }, { "epoch": 24.19, "learning_rate": 3.790954107895741e-05, "loss": 2.2336, "step": 4882500 }, { "epoch": 24.19, "learning_rate": 3.7908302492531325e-05, "loss": 2.2546, "step": 4883000 }, { "epoch": 24.19, "learning_rate": 3.7907066383278094e-05, "loss": 2.2733, "step": 4883500 }, { "epoch": 24.2, "learning_rate": 3.790582779685201e-05, "loss": 2.2813, "step": 4884000 }, { "epoch": 24.2, "learning_rate": 3.790459168759878e-05, "loss": 2.2616, "step": 4884500 }, { "epoch": 24.2, "learning_rate": 3.79033531011727e-05, "loss": 2.2972, "step": 4885000 }, { "epoch": 24.2, "learning_rate": 3.7902114514746614e-05, "loss": 2.2637, "step": 4885500 }, { "epoch": 24.21, "learning_rate": 3.790087592832053e-05, "loss": 2.2805, "step": 4886000 }, { "epoch": 24.21, "learning_rate": 3.789963734189445e-05, "loss": 2.2822, "step": 4886500 }, { "epoch": 24.21, "learning_rate": 3.7898398755468365e-05, "loss": 2.2969, "step": 4887000 }, { "epoch": 24.21, "learning_rate": 3.789716016904228e-05, "loss": 2.2901, "step": 4887500 }, { "epoch": 24.22, "learning_rate": 3.78959215826162e-05, "loss": 2.2994, "step": 4888000 }, { "epoch": 24.22, "learning_rate": 3.789468299619011e-05, "loss": 2.2735, "step": 4888500 }, { "epoch": 24.22, "learning_rate": 3.7893444409764025e-05, "loss": 2.2762, "step": 4889000 }, { "epoch": 24.22, "learning_rate": 3.789220582333794e-05, "loss": 2.3055, "step": 4889500 }, { "epoch": 24.23, "learning_rate": 3.789096971408471e-05, "loss": 2.2829, "step": 4890000 }, { "epoch": 24.23, "learning_rate": 3.788973112765863e-05, "loss": 2.2869, "step": 4890500 }, { "epoch": 24.23, "learning_rate": 3.78884950184054e-05, "loss": 2.284, "step": 4891000 }, { "epoch": 24.23, "learning_rate": 3.7887256431979314e-05, "loss": 2.2953, "step": 4891500 }, { "epoch": 24.24, "learning_rate": 3.788601784555323e-05, "loss": 2.2887, "step": 4892000 }, { "epoch": 24.24, "learning_rate": 3.788477925912715e-05, "loss": 2.2865, "step": 4892500 }, { "epoch": 24.24, "learning_rate": 3.7883540672701065e-05, "loss": 2.2958, "step": 4893000 }, { "epoch": 24.24, "learning_rate": 3.788230208627498e-05, "loss": 2.2692, "step": 4893500 }, { "epoch": 24.25, "learning_rate": 3.7881065977021744e-05, "loss": 2.2848, "step": 4894000 }, { "epoch": 24.25, "learning_rate": 3.787982986776851e-05, "loss": 2.2996, "step": 4894500 }, { "epoch": 24.25, "learning_rate": 3.787859128134243e-05, "loss": 2.2844, "step": 4895000 }, { "epoch": 24.25, "learning_rate": 3.7877352694916346e-05, "loss": 2.2728, "step": 4895500 }, { "epoch": 24.26, "learning_rate": 3.7876116585663115e-05, "loss": 2.2742, "step": 4896000 }, { "epoch": 24.26, "learning_rate": 3.787487799923703e-05, "loss": 2.2734, "step": 4896500 }, { "epoch": 24.26, "learning_rate": 3.787363941281095e-05, "loss": 2.3034, "step": 4897000 }, { "epoch": 24.26, "learning_rate": 3.7872400826384866e-05, "loss": 2.3104, "step": 4897500 }, { "epoch": 24.27, "learning_rate": 3.787116223995878e-05, "loss": 2.2996, "step": 4898000 }, { "epoch": 24.27, "learning_rate": 3.786992365353269e-05, "loss": 2.2914, "step": 4898500 }, { "epoch": 24.27, "learning_rate": 3.786868506710661e-05, "loss": 2.2983, "step": 4899000 }, { "epoch": 24.27, "learning_rate": 3.786744648068053e-05, "loss": 2.3012, "step": 4899500 }, { "epoch": 24.28, "learning_rate": 3.78662103714273e-05, "loss": 2.2913, "step": 4900000 }, { "epoch": 24.28, "learning_rate": 3.786497178500121e-05, "loss": 2.2849, "step": 4900500 }, { "epoch": 24.28, "learning_rate": 3.786373319857513e-05, "loss": 2.2893, "step": 4901000 }, { "epoch": 24.28, "learning_rate": 3.7862494612149046e-05, "loss": 2.2917, "step": 4901500 }, { "epoch": 24.29, "learning_rate": 3.786125602572296e-05, "loss": 2.2711, "step": 4902000 }, { "epoch": 24.29, "learning_rate": 3.786001743929688e-05, "loss": 2.2723, "step": 4902500 }, { "epoch": 24.29, "learning_rate": 3.78587788528708e-05, "loss": 2.2921, "step": 4903000 }, { "epoch": 24.29, "learning_rate": 3.7857540266444714e-05, "loss": 2.2999, "step": 4903500 }, { "epoch": 24.3, "learning_rate": 3.785630168001863e-05, "loss": 2.2939, "step": 4904000 }, { "epoch": 24.3, "learning_rate": 3.78550655707654e-05, "loss": 2.3084, "step": 4904500 }, { "epoch": 24.3, "learning_rate": 3.785382698433931e-05, "loss": 2.311, "step": 4905000 }, { "epoch": 24.3, "learning_rate": 3.785258839791323e-05, "loss": 2.2878, "step": 4905500 }, { "epoch": 24.31, "learning_rate": 3.7851349811487144e-05, "loss": 2.273, "step": 4906000 }, { "epoch": 24.31, "learning_rate": 3.785011122506106e-05, "loss": 2.2828, "step": 4906500 }, { "epoch": 24.31, "learning_rate": 3.784887263863498e-05, "loss": 2.2966, "step": 4907000 }, { "epoch": 24.31, "learning_rate": 3.7847634052208895e-05, "loss": 2.2957, "step": 4907500 }, { "epoch": 24.32, "learning_rate": 3.784639546578281e-05, "loss": 2.2923, "step": 4908000 }, { "epoch": 24.32, "learning_rate": 3.784515687935673e-05, "loss": 2.2919, "step": 4908500 }, { "epoch": 24.32, "learning_rate": 3.7843918292930645e-05, "loss": 2.2741, "step": 4909000 }, { "epoch": 24.32, "learning_rate": 3.784267970650456e-05, "loss": 2.3029, "step": 4909500 }, { "epoch": 24.33, "learning_rate": 3.784144112007848e-05, "loss": 2.2969, "step": 4910000 }, { "epoch": 24.33, "learning_rate": 3.784020501082525e-05, "loss": 2.2778, "step": 4910500 }, { "epoch": 24.33, "learning_rate": 3.7838966424399165e-05, "loss": 2.3072, "step": 4911000 }, { "epoch": 24.33, "learning_rate": 3.783772783797308e-05, "loss": 2.2775, "step": 4911500 }, { "epoch": 24.34, "learning_rate": 3.7836489251547e-05, "loss": 2.2819, "step": 4912000 }, { "epoch": 24.34, "learning_rate": 3.783525314229376e-05, "loss": 2.2748, "step": 4912500 }, { "epoch": 24.34, "learning_rate": 3.783401703304053e-05, "loss": 2.3047, "step": 4913000 }, { "epoch": 24.34, "learning_rate": 3.783277844661445e-05, "loss": 2.2986, "step": 4913500 }, { "epoch": 24.35, "learning_rate": 3.7831539860188364e-05, "loss": 2.3051, "step": 4914000 }, { "epoch": 24.35, "learning_rate": 3.783030127376228e-05, "loss": 2.2865, "step": 4914500 }, { "epoch": 24.35, "learning_rate": 3.78290626873362e-05, "loss": 2.3101, "step": 4915000 }, { "epoch": 24.35, "learning_rate": 3.7827824100910114e-05, "loss": 2.3132, "step": 4915500 }, { "epoch": 24.36, "learning_rate": 3.782658551448403e-05, "loss": 2.2878, "step": 4916000 }, { "epoch": 24.36, "learning_rate": 3.78253494052308e-05, "loss": 2.2744, "step": 4916500 }, { "epoch": 24.36, "learning_rate": 3.782411329597757e-05, "loss": 2.2969, "step": 4917000 }, { "epoch": 24.36, "learning_rate": 3.7822874709551486e-05, "loss": 2.2918, "step": 4917500 }, { "epoch": 24.37, "learning_rate": 3.7821638600298255e-05, "loss": 2.2919, "step": 4918000 }, { "epoch": 24.37, "learning_rate": 3.782040001387217e-05, "loss": 2.3031, "step": 4918500 }, { "epoch": 24.37, "learning_rate": 3.781916142744609e-05, "loss": 2.2955, "step": 4919000 }, { "epoch": 24.37, "learning_rate": 3.7817922841020005e-05, "loss": 2.2644, "step": 4919500 }, { "epoch": 24.38, "learning_rate": 3.781668425459392e-05, "loss": 2.3126, "step": 4920000 }, { "epoch": 24.38, "learning_rate": 3.781544566816784e-05, "loss": 2.2968, "step": 4920500 }, { "epoch": 24.38, "learning_rate": 3.7814207081741756e-05, "loss": 2.2943, "step": 4921000 }, { "epoch": 24.38, "learning_rate": 3.781296849531567e-05, "loss": 2.3009, "step": 4921500 }, { "epoch": 24.39, "learning_rate": 3.781172990888958e-05, "loss": 2.2804, "step": 4922000 }, { "epoch": 24.39, "learning_rate": 3.781049379963635e-05, "loss": 2.2773, "step": 4922500 }, { "epoch": 24.39, "learning_rate": 3.780925521321027e-05, "loss": 2.3084, "step": 4923000 }, { "epoch": 24.39, "learning_rate": 3.7808016626784186e-05, "loss": 2.3082, "step": 4923500 }, { "epoch": 24.4, "learning_rate": 3.78067780403581e-05, "loss": 2.2789, "step": 4924000 }, { "epoch": 24.4, "learning_rate": 3.780554193110487e-05, "loss": 2.2776, "step": 4924500 }, { "epoch": 24.4, "learning_rate": 3.780430334467879e-05, "loss": 2.2606, "step": 4925000 }, { "epoch": 24.4, "learning_rate": 3.7803064758252706e-05, "loss": 2.3084, "step": 4925500 }, { "epoch": 24.41, "learning_rate": 3.780182617182662e-05, "loss": 2.2744, "step": 4926000 }, { "epoch": 24.41, "learning_rate": 3.780058758540054e-05, "loss": 2.2805, "step": 4926500 }, { "epoch": 24.41, "learning_rate": 3.7799348998974456e-05, "loss": 2.2899, "step": 4927000 }, { "epoch": 24.41, "learning_rate": 3.779811041254837e-05, "loss": 2.2927, "step": 4927500 }, { "epoch": 24.42, "learning_rate": 3.7796874303295135e-05, "loss": 2.3032, "step": 4928000 }, { "epoch": 24.42, "learning_rate": 3.779563571686905e-05, "loss": 2.2916, "step": 4928500 }, { "epoch": 24.42, "learning_rate": 3.779439713044297e-05, "loss": 2.284, "step": 4929000 }, { "epoch": 24.42, "learning_rate": 3.7793158544016886e-05, "loss": 2.2702, "step": 4929500 }, { "epoch": 24.42, "learning_rate": 3.7791922434763655e-05, "loss": 2.2966, "step": 4930000 }, { "epoch": 24.43, "learning_rate": 3.779068384833757e-05, "loss": 2.2612, "step": 4930500 }, { "epoch": 24.43, "learning_rate": 3.778944526191149e-05, "loss": 2.304, "step": 4931000 }, { "epoch": 24.43, "learning_rate": 3.7788206675485406e-05, "loss": 2.2901, "step": 4931500 }, { "epoch": 24.43, "learning_rate": 3.778696808905932e-05, "loss": 2.2847, "step": 4932000 }, { "epoch": 24.44, "learning_rate": 3.778572950263324e-05, "loss": 2.2852, "step": 4932500 }, { "epoch": 24.44, "learning_rate": 3.7784490916207156e-05, "loss": 2.2935, "step": 4933000 }, { "epoch": 24.44, "learning_rate": 3.7783252329781073e-05, "loss": 2.2765, "step": 4933500 }, { "epoch": 24.44, "learning_rate": 3.778201374335499e-05, "loss": 2.3069, "step": 4934000 }, { "epoch": 24.45, "learning_rate": 3.77807751569289e-05, "loss": 2.3149, "step": 4934500 }, { "epoch": 24.45, "learning_rate": 3.777953657050282e-05, "loss": 2.3075, "step": 4935000 }, { "epoch": 24.45, "learning_rate": 3.7778297984076734e-05, "loss": 2.2755, "step": 4935500 }, { "epoch": 24.45, "learning_rate": 3.777705939765065e-05, "loss": 2.2864, "step": 4936000 }, { "epoch": 24.46, "learning_rate": 3.777582328839742e-05, "loss": 2.3105, "step": 4936500 }, { "epoch": 24.46, "learning_rate": 3.777458470197134e-05, "loss": 2.2645, "step": 4937000 }, { "epoch": 24.46, "learning_rate": 3.7773346115545254e-05, "loss": 2.2963, "step": 4937500 }, { "epoch": 24.46, "learning_rate": 3.7772107529119164e-05, "loss": 2.3063, "step": 4938000 }, { "epoch": 24.47, "learning_rate": 3.777086894269308e-05, "loss": 2.309, "step": 4938500 }, { "epoch": 24.47, "learning_rate": 3.7769630356267e-05, "loss": 2.309, "step": 4939000 }, { "epoch": 24.47, "learning_rate": 3.7768391769840915e-05, "loss": 2.2765, "step": 4939500 }, { "epoch": 24.47, "learning_rate": 3.776715318341483e-05, "loss": 2.2682, "step": 4940000 }, { "epoch": 24.48, "learning_rate": 3.776591459698875e-05, "loss": 2.2867, "step": 4940500 }, { "epoch": 24.48, "learning_rate": 3.7764676010562666e-05, "loss": 2.2626, "step": 4941000 }, { "epoch": 24.48, "learning_rate": 3.776343742413658e-05, "loss": 2.2847, "step": 4941500 }, { "epoch": 24.48, "learning_rate": 3.776220131488335e-05, "loss": 2.2891, "step": 4942000 }, { "epoch": 24.49, "learning_rate": 3.776096272845727e-05, "loss": 2.2844, "step": 4942500 }, { "epoch": 24.49, "learning_rate": 3.7759724142031185e-05, "loss": 2.2788, "step": 4943000 }, { "epoch": 24.49, "learning_rate": 3.7758488032777954e-05, "loss": 2.2986, "step": 4943500 }, { "epoch": 24.49, "learning_rate": 3.7757249446351864e-05, "loss": 2.2886, "step": 4944000 }, { "epoch": 24.5, "learning_rate": 3.775601085992578e-05, "loss": 2.3065, "step": 4944500 }, { "epoch": 24.5, "learning_rate": 3.77547722734997e-05, "loss": 2.2977, "step": 4945000 }, { "epoch": 24.5, "learning_rate": 3.7753536164246474e-05, "loss": 2.2616, "step": 4945500 }, { "epoch": 24.5, "learning_rate": 3.775229757782039e-05, "loss": 2.2727, "step": 4946000 }, { "epoch": 24.51, "learning_rate": 3.775105899139431e-05, "loss": 2.2846, "step": 4946500 }, { "epoch": 24.51, "learning_rate": 3.774982040496822e-05, "loss": 2.2827, "step": 4947000 }, { "epoch": 24.51, "learning_rate": 3.7748581818542135e-05, "loss": 2.2623, "step": 4947500 }, { "epoch": 24.51, "learning_rate": 3.774734323211605e-05, "loss": 2.3012, "step": 4948000 }, { "epoch": 24.52, "learning_rate": 3.774610712286282e-05, "loss": 2.2657, "step": 4948500 }, { "epoch": 24.52, "learning_rate": 3.774486853643674e-05, "loss": 2.279, "step": 4949000 }, { "epoch": 24.52, "learning_rate": 3.7743629950010654e-05, "loss": 2.3157, "step": 4949500 }, { "epoch": 24.52, "learning_rate": 3.774239136358457e-05, "loss": 2.2974, "step": 4950000 }, { "epoch": 24.53, "learning_rate": 3.774115277715848e-05, "loss": 2.2873, "step": 4950500 }, { "epoch": 24.53, "learning_rate": 3.773991666790526e-05, "loss": 2.2983, "step": 4951000 }, { "epoch": 24.53, "learning_rate": 3.7738678081479174e-05, "loss": 2.2926, "step": 4951500 }, { "epoch": 24.53, "learning_rate": 3.773743949505309e-05, "loss": 2.2734, "step": 4952000 }, { "epoch": 24.54, "learning_rate": 3.773620090862701e-05, "loss": 2.3017, "step": 4952500 }, { "epoch": 24.54, "learning_rate": 3.7734962322200925e-05, "loss": 2.3132, "step": 4953000 }, { "epoch": 24.54, "learning_rate": 3.7733723735774835e-05, "loss": 2.2891, "step": 4953500 }, { "epoch": 24.54, "learning_rate": 3.773248514934875e-05, "loss": 2.2889, "step": 4954000 }, { "epoch": 24.55, "learning_rate": 3.773124656292267e-05, "loss": 2.3073, "step": 4954500 }, { "epoch": 24.55, "learning_rate": 3.7730007976496585e-05, "loss": 2.2684, "step": 4955000 }, { "epoch": 24.55, "learning_rate": 3.7728774344416206e-05, "loss": 2.2905, "step": 4955500 }, { "epoch": 24.55, "learning_rate": 3.772753575799012e-05, "loss": 2.2836, "step": 4956000 }, { "epoch": 24.56, "learning_rate": 3.772629717156404e-05, "loss": 2.2936, "step": 4956500 }, { "epoch": 24.56, "learning_rate": 3.772505858513796e-05, "loss": 2.3092, "step": 4957000 }, { "epoch": 24.56, "learning_rate": 3.7723822475884726e-05, "loss": 2.2971, "step": 4957500 }, { "epoch": 24.56, "learning_rate": 3.772258388945864e-05, "loss": 2.2959, "step": 4958000 }, { "epoch": 24.57, "learning_rate": 3.772134530303256e-05, "loss": 2.2656, "step": 4958500 }, { "epoch": 24.57, "learning_rate": 3.7720106716606477e-05, "loss": 2.2685, "step": 4959000 }, { "epoch": 24.57, "learning_rate": 3.7718868130180393e-05, "loss": 2.2871, "step": 4959500 }, { "epoch": 24.57, "learning_rate": 3.7717629543754304e-05, "loss": 2.2768, "step": 4960000 }, { "epoch": 24.58, "learning_rate": 3.771639095732822e-05, "loss": 2.3032, "step": 4960500 }, { "epoch": 24.58, "learning_rate": 3.771515237090214e-05, "loss": 2.2941, "step": 4961000 }, { "epoch": 24.58, "learning_rate": 3.7713913784476054e-05, "loss": 2.2861, "step": 4961500 }, { "epoch": 24.58, "learning_rate": 3.771267519804997e-05, "loss": 2.2827, "step": 4962000 }, { "epoch": 24.59, "learning_rate": 3.771143661162389e-05, "loss": 2.2945, "step": 4962500 }, { "epoch": 24.59, "learning_rate": 3.77101980251978e-05, "loss": 2.292, "step": 4963000 }, { "epoch": 24.59, "learning_rate": 3.7708959438771715e-05, "loss": 2.3007, "step": 4963500 }, { "epoch": 24.59, "learning_rate": 3.770772085234563e-05, "loss": 2.2972, "step": 4964000 }, { "epoch": 24.6, "learning_rate": 3.770648226591955e-05, "loss": 2.2883, "step": 4964500 }, { "epoch": 24.6, "learning_rate": 3.7705246156666325e-05, "loss": 2.3001, "step": 4965000 }, { "epoch": 24.6, "learning_rate": 3.7704010047413094e-05, "loss": 2.2894, "step": 4965500 }, { "epoch": 24.6, "learning_rate": 3.770277146098701e-05, "loss": 2.2897, "step": 4966000 }, { "epoch": 24.61, "learning_rate": 3.770153287456093e-05, "loss": 2.267, "step": 4966500 }, { "epoch": 24.61, "learning_rate": 3.770029428813484e-05, "loss": 2.2917, "step": 4967000 }, { "epoch": 24.61, "learning_rate": 3.7699055701708755e-05, "loss": 2.3047, "step": 4967500 }, { "epoch": 24.61, "learning_rate": 3.769781711528267e-05, "loss": 2.2945, "step": 4968000 }, { "epoch": 24.62, "learning_rate": 3.769657852885659e-05, "loss": 2.2779, "step": 4968500 }, { "epoch": 24.62, "learning_rate": 3.769534241960336e-05, "loss": 2.2889, "step": 4969000 }, { "epoch": 24.62, "learning_rate": 3.7694103833177274e-05, "loss": 2.2736, "step": 4969500 }, { "epoch": 24.62, "learning_rate": 3.769286524675119e-05, "loss": 2.2839, "step": 4970000 }, { "epoch": 24.63, "learning_rate": 3.769162666032511e-05, "loss": 2.2858, "step": 4970500 }, { "epoch": 24.63, "learning_rate": 3.7690388073899025e-05, "loss": 2.2925, "step": 4971000 }, { "epoch": 24.63, "learning_rate": 3.768914948747294e-05, "loss": 2.2977, "step": 4971500 }, { "epoch": 24.63, "learning_rate": 3.768791090104685e-05, "loss": 2.2755, "step": 4972000 }, { "epoch": 24.64, "learning_rate": 3.768667231462077e-05, "loss": 2.2916, "step": 4972500 }, { "epoch": 24.64, "learning_rate": 3.7685433728194686e-05, "loss": 2.2846, "step": 4973000 }, { "epoch": 24.64, "learning_rate": 3.76841951417686e-05, "loss": 2.3258, "step": 4973500 }, { "epoch": 24.64, "learning_rate": 3.768295903251537e-05, "loss": 2.3021, "step": 4974000 }, { "epoch": 24.65, "learning_rate": 3.768172044608929e-05, "loss": 2.29, "step": 4974500 }, { "epoch": 24.65, "learning_rate": 3.7680481859663205e-05, "loss": 2.2926, "step": 4975000 }, { "epoch": 24.65, "learning_rate": 3.7679245750409974e-05, "loss": 2.3141, "step": 4975500 }, { "epoch": 24.65, "learning_rate": 3.767800716398389e-05, "loss": 2.3305, "step": 4976000 }, { "epoch": 24.66, "learning_rate": 3.767676857755781e-05, "loss": 2.298, "step": 4976500 }, { "epoch": 24.66, "learning_rate": 3.7675529991131725e-05, "loss": 2.2815, "step": 4977000 }, { "epoch": 24.66, "learning_rate": 3.767429140470564e-05, "loss": 2.2933, "step": 4977500 }, { "epoch": 24.66, "learning_rate": 3.767305281827956e-05, "loss": 2.2923, "step": 4978000 }, { "epoch": 24.67, "learning_rate": 3.767181423185347e-05, "loss": 2.2987, "step": 4978500 }, { "epoch": 24.67, "learning_rate": 3.7670575645427386e-05, "loss": 2.2733, "step": 4979000 }, { "epoch": 24.67, "learning_rate": 3.76693370590013e-05, "loss": 2.2781, "step": 4979500 }, { "epoch": 24.67, "learning_rate": 3.766809847257522e-05, "loss": 2.2993, "step": 4980000 }, { "epoch": 24.68, "learning_rate": 3.766685988614914e-05, "loss": 2.2971, "step": 4980500 }, { "epoch": 24.68, "learning_rate": 3.7665621299723054e-05, "loss": 2.2793, "step": 4981000 }, { "epoch": 24.68, "learning_rate": 3.766438519046982e-05, "loss": 2.3047, "step": 4981500 }, { "epoch": 24.68, "learning_rate": 3.766314908121659e-05, "loss": 2.2659, "step": 4982000 }, { "epoch": 24.69, "learning_rate": 3.766191049479051e-05, "loss": 2.2947, "step": 4982500 }, { "epoch": 24.69, "learning_rate": 3.7660671908364425e-05, "loss": 2.2981, "step": 4983000 }, { "epoch": 24.69, "learning_rate": 3.765943332193834e-05, "loss": 2.3044, "step": 4983500 }, { "epoch": 24.69, "learning_rate": 3.765819473551226e-05, "loss": 2.2545, "step": 4984000 }, { "epoch": 24.69, "learning_rate": 3.765695614908617e-05, "loss": 2.2985, "step": 4984500 }, { "epoch": 24.7, "learning_rate": 3.7655717562660086e-05, "loss": 2.2902, "step": 4985000 }, { "epoch": 24.7, "learning_rate": 3.765448145340686e-05, "loss": 2.2621, "step": 4985500 }, { "epoch": 24.7, "learning_rate": 3.765324286698078e-05, "loss": 2.2897, "step": 4986000 }, { "epoch": 24.7, "learning_rate": 3.7652004280554696e-05, "loss": 2.2919, "step": 4986500 }, { "epoch": 24.71, "learning_rate": 3.7650765694128606e-05, "loss": 2.2849, "step": 4987000 }, { "epoch": 24.71, "learning_rate": 3.764952710770252e-05, "loss": 2.2823, "step": 4987500 }, { "epoch": 24.71, "learning_rate": 3.764828852127644e-05, "loss": 2.3095, "step": 4988000 }, { "epoch": 24.71, "learning_rate": 3.764705241202321e-05, "loss": 2.3133, "step": 4988500 }, { "epoch": 24.72, "learning_rate": 3.7645813825597125e-05, "loss": 2.2789, "step": 4989000 }, { "epoch": 24.72, "learning_rate": 3.764457523917104e-05, "loss": 2.2912, "step": 4989500 }, { "epoch": 24.72, "learning_rate": 3.764333665274496e-05, "loss": 2.2882, "step": 4990000 }, { "epoch": 24.72, "learning_rate": 3.7642098066318876e-05, "loss": 2.2744, "step": 4990500 }, { "epoch": 24.73, "learning_rate": 3.7640859479892786e-05, "loss": 2.2954, "step": 4991000 }, { "epoch": 24.73, "learning_rate": 3.76396208934667e-05, "loss": 2.2657, "step": 4991500 }, { "epoch": 24.73, "learning_rate": 3.763838230704062e-05, "loss": 2.2872, "step": 4992000 }, { "epoch": 24.73, "learning_rate": 3.763714372061454e-05, "loss": 2.3018, "step": 4992500 }, { "epoch": 24.74, "learning_rate": 3.7635905134188454e-05, "loss": 2.276, "step": 4993000 }, { "epoch": 24.74, "learning_rate": 3.763466654776237e-05, "loss": 2.2895, "step": 4993500 }, { "epoch": 24.74, "learning_rate": 3.763342796133629e-05, "loss": 2.3095, "step": 4994000 }, { "epoch": 24.74, "learning_rate": 3.763219185208306e-05, "loss": 2.2876, "step": 4994500 }, { "epoch": 24.75, "learning_rate": 3.7630955742829825e-05, "loss": 2.3048, "step": 4995000 }, { "epoch": 24.75, "learning_rate": 3.762971715640374e-05, "loss": 2.296, "step": 4995500 }, { "epoch": 24.75, "learning_rate": 3.762847856997766e-05, "loss": 2.2919, "step": 4996000 }, { "epoch": 24.75, "learning_rate": 3.7627239983551576e-05, "loss": 2.2907, "step": 4996500 }, { "epoch": 24.76, "learning_rate": 3.7626001397125486e-05, "loss": 2.3036, "step": 4997000 }, { "epoch": 24.76, "learning_rate": 3.76247628106994e-05, "loss": 2.2775, "step": 4997500 }, { "epoch": 24.76, "learning_rate": 3.762352422427332e-05, "loss": 2.3023, "step": 4998000 }, { "epoch": 24.76, "learning_rate": 3.762228563784724e-05, "loss": 2.2945, "step": 4998500 }, { "epoch": 24.77, "learning_rate": 3.762104952859401e-05, "loss": 2.2875, "step": 4999000 }, { "epoch": 24.77, "learning_rate": 3.761981094216793e-05, "loss": 2.3163, "step": 4999500 }, { "epoch": 24.77, "learning_rate": 3.7618572355741847e-05, "loss": 2.3003, "step": 5000000 }, { "epoch": 24.77, "learning_rate": 3.761733376931576e-05, "loss": 2.2923, "step": 5000500 }, { "epoch": 24.78, "learning_rate": 3.7616095182889674e-05, "loss": 2.3032, "step": 5001000 }, { "epoch": 24.78, "learning_rate": 3.761485659646359e-05, "loss": 2.2881, "step": 5001500 }, { "epoch": 24.78, "learning_rate": 3.761361801003751e-05, "loss": 2.2852, "step": 5002000 }, { "epoch": 24.78, "learning_rate": 3.7612379423611424e-05, "loss": 2.2943, "step": 5002500 }, { "epoch": 24.79, "learning_rate": 3.7611140837185335e-05, "loss": 2.2967, "step": 5003000 }, { "epoch": 24.79, "learning_rate": 3.760990225075925e-05, "loss": 2.3114, "step": 5003500 }, { "epoch": 24.79, "learning_rate": 3.760866614150602e-05, "loss": 2.301, "step": 5004000 }, { "epoch": 24.79, "learning_rate": 3.7607430032252796e-05, "loss": 2.2613, "step": 5004500 }, { "epoch": 24.8, "learning_rate": 3.760619144582671e-05, "loss": 2.2814, "step": 5005000 }, { "epoch": 24.8, "learning_rate": 3.760495285940063e-05, "loss": 2.2881, "step": 5005500 }, { "epoch": 24.8, "learning_rate": 3.760371427297455e-05, "loss": 2.2897, "step": 5006000 }, { "epoch": 24.8, "learning_rate": 3.760247568654846e-05, "loss": 2.2766, "step": 5006500 }, { "epoch": 24.81, "learning_rate": 3.7601237100122374e-05, "loss": 2.2921, "step": 5007000 }, { "epoch": 24.81, "learning_rate": 3.759999851369629e-05, "loss": 2.3077, "step": 5007500 }, { "epoch": 24.81, "learning_rate": 3.759875992727021e-05, "loss": 2.2995, "step": 5008000 }, { "epoch": 24.81, "learning_rate": 3.7597521340844125e-05, "loss": 2.3194, "step": 5008500 }, { "epoch": 24.82, "learning_rate": 3.759628275441804e-05, "loss": 2.2753, "step": 5009000 }, { "epoch": 24.82, "learning_rate": 3.759504416799196e-05, "loss": 2.2873, "step": 5009500 }, { "epoch": 24.82, "learning_rate": 3.759380558156587e-05, "loss": 2.279, "step": 5010000 }, { "epoch": 24.82, "learning_rate": 3.759256947231264e-05, "loss": 2.2831, "step": 5010500 }, { "epoch": 24.83, "learning_rate": 3.7591330885886554e-05, "loss": 2.2996, "step": 5011000 }, { "epoch": 24.83, "learning_rate": 3.759009229946047e-05, "loss": 2.3105, "step": 5011500 }, { "epoch": 24.83, "learning_rate": 3.758885371303439e-05, "loss": 2.3228, "step": 5012000 }, { "epoch": 24.83, "learning_rate": 3.7587615126608305e-05, "loss": 2.3095, "step": 5012500 }, { "epoch": 24.84, "learning_rate": 3.7586379017355074e-05, "loss": 2.2962, "step": 5013000 }, { "epoch": 24.84, "learning_rate": 3.758514043092899e-05, "loss": 2.3076, "step": 5013500 }, { "epoch": 24.84, "learning_rate": 3.758390432167576e-05, "loss": 2.2853, "step": 5014000 }, { "epoch": 24.84, "learning_rate": 3.7582665735249677e-05, "loss": 2.2836, "step": 5014500 }, { "epoch": 24.85, "learning_rate": 3.7581427148823594e-05, "loss": 2.2983, "step": 5015000 }, { "epoch": 24.85, "learning_rate": 3.758018856239751e-05, "loss": 2.2958, "step": 5015500 }, { "epoch": 24.85, "learning_rate": 3.757894997597142e-05, "loss": 2.312, "step": 5016000 }, { "epoch": 24.85, "learning_rate": 3.757771138954534e-05, "loss": 2.2927, "step": 5016500 }, { "epoch": 24.86, "learning_rate": 3.7576472803119254e-05, "loss": 2.3107, "step": 5017000 }, { "epoch": 24.86, "learning_rate": 3.757523421669317e-05, "loss": 2.2668, "step": 5017500 }, { "epoch": 24.86, "learning_rate": 3.757399563026709e-05, "loss": 2.29, "step": 5018000 }, { "epoch": 24.86, "learning_rate": 3.7572757043841005e-05, "loss": 2.2804, "step": 5018500 }, { "epoch": 24.87, "learning_rate": 3.757151845741492e-05, "loss": 2.301, "step": 5019000 }, { "epoch": 24.87, "learning_rate": 3.757027987098884e-05, "loss": 2.2906, "step": 5019500 }, { "epoch": 24.87, "learning_rate": 3.756904376173561e-05, "loss": 2.264, "step": 5020000 }, { "epoch": 24.87, "learning_rate": 3.756780765248238e-05, "loss": 2.3048, "step": 5020500 }, { "epoch": 24.88, "learning_rate": 3.7566571543229146e-05, "loss": 2.2908, "step": 5021000 }, { "epoch": 24.88, "learning_rate": 3.756533295680306e-05, "loss": 2.31, "step": 5021500 }, { "epoch": 24.88, "learning_rate": 3.756409437037698e-05, "loss": 2.2986, "step": 5022000 }, { "epoch": 24.88, "learning_rate": 3.7562855783950896e-05, "loss": 2.2729, "step": 5022500 }, { "epoch": 24.89, "learning_rate": 3.756161719752481e-05, "loss": 2.2964, "step": 5023000 }, { "epoch": 24.89, "learning_rate": 3.756038108827158e-05, "loss": 2.2741, "step": 5023500 }, { "epoch": 24.89, "learning_rate": 3.75591425018455e-05, "loss": 2.315, "step": 5024000 }, { "epoch": 24.89, "learning_rate": 3.755790391541941e-05, "loss": 2.2977, "step": 5024500 }, { "epoch": 24.9, "learning_rate": 3.7556665328993326e-05, "loss": 2.292, "step": 5025000 }, { "epoch": 24.9, "learning_rate": 3.755542674256724e-05, "loss": 2.285, "step": 5025500 }, { "epoch": 24.9, "learning_rate": 3.755418815614116e-05, "loss": 2.3066, "step": 5026000 }, { "epoch": 24.9, "learning_rate": 3.755295204688793e-05, "loss": 2.2951, "step": 5026500 }, { "epoch": 24.91, "learning_rate": 3.7551713460461846e-05, "loss": 2.2866, "step": 5027000 }, { "epoch": 24.91, "learning_rate": 3.755047487403576e-05, "loss": 2.2931, "step": 5027500 }, { "epoch": 24.91, "learning_rate": 3.754923628760968e-05, "loss": 2.2865, "step": 5028000 }, { "epoch": 24.91, "learning_rate": 3.7547997701183596e-05, "loss": 2.2993, "step": 5028500 }, { "epoch": 24.92, "learning_rate": 3.7546761591930365e-05, "loss": 2.3104, "step": 5029000 }, { "epoch": 24.92, "learning_rate": 3.754552300550428e-05, "loss": 2.2981, "step": 5029500 }, { "epoch": 24.92, "learning_rate": 3.75442844190782e-05, "loss": 2.3134, "step": 5030000 }, { "epoch": 24.92, "learning_rate": 3.7543045832652116e-05, "loss": 2.3015, "step": 5030500 }, { "epoch": 24.93, "learning_rate": 3.7541807246226026e-05, "loss": 2.2976, "step": 5031000 }, { "epoch": 24.93, "learning_rate": 3.754056865979994e-05, "loss": 2.3091, "step": 5031500 }, { "epoch": 24.93, "learning_rate": 3.753933007337386e-05, "loss": 2.2988, "step": 5032000 }, { "epoch": 24.93, "learning_rate": 3.753809396412063e-05, "loss": 2.2839, "step": 5032500 }, { "epoch": 24.94, "learning_rate": 3.75368578548674e-05, "loss": 2.297, "step": 5033000 }, { "epoch": 24.94, "learning_rate": 3.7535619268441315e-05, "loss": 2.2998, "step": 5033500 }, { "epoch": 24.94, "learning_rate": 3.753438068201523e-05, "loss": 2.2997, "step": 5034000 }, { "epoch": 24.94, "learning_rate": 3.753314209558915e-05, "loss": 2.2748, "step": 5034500 }, { "epoch": 24.95, "learning_rate": 3.7531903509163065e-05, "loss": 2.3065, "step": 5035000 }, { "epoch": 24.95, "learning_rate": 3.753066492273698e-05, "loss": 2.3051, "step": 5035500 }, { "epoch": 24.95, "learning_rate": 3.75294263363109e-05, "loss": 2.2992, "step": 5036000 }, { "epoch": 24.95, "learning_rate": 3.7528187749884816e-05, "loss": 2.3059, "step": 5036500 }, { "epoch": 24.96, "learning_rate": 3.752694916345873e-05, "loss": 2.2977, "step": 5037000 }, { "epoch": 24.96, "learning_rate": 3.752571057703265e-05, "loss": 2.289, "step": 5037500 }, { "epoch": 24.96, "learning_rate": 3.752447199060656e-05, "loss": 2.2699, "step": 5038000 }, { "epoch": 24.96, "learning_rate": 3.752323340418048e-05, "loss": 2.2974, "step": 5038500 }, { "epoch": 24.96, "learning_rate": 3.7521994817754394e-05, "loss": 2.2897, "step": 5039000 }, { "epoch": 24.97, "learning_rate": 3.752075623132831e-05, "loss": 2.3178, "step": 5039500 }, { "epoch": 24.97, "learning_rate": 3.751951764490223e-05, "loss": 2.3062, "step": 5040000 }, { "epoch": 24.97, "learning_rate": 3.7518279058476145e-05, "loss": 2.3189, "step": 5040500 }, { "epoch": 24.97, "learning_rate": 3.7517040472050055e-05, "loss": 2.299, "step": 5041000 }, { "epoch": 24.98, "learning_rate": 3.751580188562397e-05, "loss": 2.3129, "step": 5041500 }, { "epoch": 24.98, "learning_rate": 3.751456329919789e-05, "loss": 2.3282, "step": 5042000 }, { "epoch": 24.98, "learning_rate": 3.7513324712771806e-05, "loss": 2.2736, "step": 5042500 }, { "epoch": 24.98, "learning_rate": 3.751208612634572e-05, "loss": 2.3151, "step": 5043000 }, { "epoch": 24.99, "learning_rate": 3.751085249426535e-05, "loss": 2.2894, "step": 5043500 }, { "epoch": 24.99, "learning_rate": 3.750961390783927e-05, "loss": 2.3101, "step": 5044000 }, { "epoch": 24.99, "learning_rate": 3.750837532141318e-05, "loss": 2.3174, "step": 5044500 }, { "epoch": 24.99, "learning_rate": 3.7507136734987094e-05, "loss": 2.311, "step": 5045000 }, { "epoch": 25.0, "learning_rate": 3.750589814856101e-05, "loss": 2.3085, "step": 5045500 }, { "epoch": 25.0, "learning_rate": 3.750465956213493e-05, "loss": 2.2712, "step": 5046000 }, { "epoch": 25.0, "eval_accuracy": 0.654987755169183, "eval_accuracy_mlm": 0.6101664086430171, "eval_accuracy_nsp": 0.8666530697092474, "eval_loss": 2.337043523788452, "eval_runtime": 145.9167, "eval_samples_per_second": 1747.291, "eval_steps_per_second": 72.809, "step": 5046075 }, { "epoch": 25.0, "learning_rate": 3.7503420975708845e-05, "loss": 2.2487, "step": 5046500 }, { "epoch": 25.0, "learning_rate": 3.7502184866455614e-05, "loss": 2.254, "step": 5047000 }, { "epoch": 25.01, "learning_rate": 3.750094628002953e-05, "loss": 2.2641, "step": 5047500 }, { "epoch": 25.01, "learning_rate": 3.74997101707763e-05, "loss": 2.2432, "step": 5048000 }, { "epoch": 25.01, "learning_rate": 3.7498471584350216e-05, "loss": 2.2432, "step": 5048500 }, { "epoch": 25.01, "learning_rate": 3.749723299792413e-05, "loss": 2.2643, "step": 5049000 }, { "epoch": 25.02, "learning_rate": 3.749599441149805e-05, "loss": 2.2568, "step": 5049500 }, { "epoch": 25.02, "learning_rate": 3.749475582507197e-05, "loss": 2.2875, "step": 5050000 }, { "epoch": 25.02, "learning_rate": 3.749351971581873e-05, "loss": 2.2394, "step": 5050500 }, { "epoch": 25.02, "learning_rate": 3.74922836065655e-05, "loss": 2.2771, "step": 5051000 }, { "epoch": 25.03, "learning_rate": 3.7491045020139415e-05, "loss": 2.3011, "step": 5051500 }, { "epoch": 25.03, "learning_rate": 3.748980643371333e-05, "loss": 2.2375, "step": 5052000 }, { "epoch": 25.03, "learning_rate": 3.748856784728725e-05, "loss": 2.2646, "step": 5052500 }, { "epoch": 25.03, "learning_rate": 3.7487329260861166e-05, "loss": 2.2578, "step": 5053000 }, { "epoch": 25.04, "learning_rate": 3.748609067443508e-05, "loss": 2.2628, "step": 5053500 }, { "epoch": 25.04, "learning_rate": 3.7484852088009e-05, "loss": 2.2763, "step": 5054000 }, { "epoch": 25.04, "learning_rate": 3.7483613501582917e-05, "loss": 2.2344, "step": 5054500 }, { "epoch": 25.04, "learning_rate": 3.7482374915156833e-05, "loss": 2.2395, "step": 5055000 }, { "epoch": 25.05, "learning_rate": 3.7481138805903595e-05, "loss": 2.2747, "step": 5055500 }, { "epoch": 25.05, "learning_rate": 3.747990021947751e-05, "loss": 2.2727, "step": 5056000 }, { "epoch": 25.05, "learning_rate": 3.747866411022429e-05, "loss": 2.2932, "step": 5056500 }, { "epoch": 25.05, "learning_rate": 3.7477425523798205e-05, "loss": 2.2713, "step": 5057000 }, { "epoch": 25.06, "learning_rate": 3.7476186937372115e-05, "loss": 2.2643, "step": 5057500 }, { "epoch": 25.06, "learning_rate": 3.747494835094603e-05, "loss": 2.2898, "step": 5058000 }, { "epoch": 25.06, "learning_rate": 3.747370976451995e-05, "loss": 2.2548, "step": 5058500 }, { "epoch": 25.06, "learning_rate": 3.7472471178093866e-05, "loss": 2.2443, "step": 5059000 }, { "epoch": 25.07, "learning_rate": 3.747123259166778e-05, "loss": 2.2565, "step": 5059500 }, { "epoch": 25.07, "learning_rate": 3.74699940052417e-05, "loss": 2.2639, "step": 5060000 }, { "epoch": 25.07, "learning_rate": 3.746875541881562e-05, "loss": 2.266, "step": 5060500 }, { "epoch": 25.07, "learning_rate": 3.7467516832389534e-05, "loss": 2.2691, "step": 5061000 }, { "epoch": 25.08, "learning_rate": 3.746627824596345e-05, "loss": 2.2544, "step": 5061500 }, { "epoch": 25.08, "learning_rate": 3.746504213671021e-05, "loss": 2.2926, "step": 5062000 }, { "epoch": 25.08, "learning_rate": 3.746380355028413e-05, "loss": 2.2788, "step": 5062500 }, { "epoch": 25.08, "learning_rate": 3.7462564963858046e-05, "loss": 2.2783, "step": 5063000 }, { "epoch": 25.09, "learning_rate": 3.7461328854604815e-05, "loss": 2.279, "step": 5063500 }, { "epoch": 25.09, "learning_rate": 3.746009274535159e-05, "loss": 2.3008, "step": 5064000 }, { "epoch": 25.09, "learning_rate": 3.745885415892551e-05, "loss": 2.2739, "step": 5064500 }, { "epoch": 25.09, "learning_rate": 3.7457615572499425e-05, "loss": 2.2622, "step": 5065000 }, { "epoch": 25.1, "learning_rate": 3.745637698607334e-05, "loss": 2.2654, "step": 5065500 }, { "epoch": 25.1, "learning_rate": 3.745513839964725e-05, "loss": 2.2805, "step": 5066000 }, { "epoch": 25.1, "learning_rate": 3.745389981322117e-05, "loss": 2.2743, "step": 5066500 }, { "epoch": 25.1, "learning_rate": 3.7452661226795086e-05, "loss": 2.2902, "step": 5067000 }, { "epoch": 25.11, "learning_rate": 3.7451422640369e-05, "loss": 2.2832, "step": 5067500 }, { "epoch": 25.11, "learning_rate": 3.745018405394292e-05, "loss": 2.2714, "step": 5068000 }, { "epoch": 25.11, "learning_rate": 3.744894546751683e-05, "loss": 2.2632, "step": 5068500 }, { "epoch": 25.11, "learning_rate": 3.7447706881090747e-05, "loss": 2.2622, "step": 5069000 }, { "epoch": 25.12, "learning_rate": 3.7446468294664663e-05, "loss": 2.2784, "step": 5069500 }, { "epoch": 25.12, "learning_rate": 3.744522970823858e-05, "loss": 2.257, "step": 5070000 }, { "epoch": 25.12, "learning_rate": 3.744399607615821e-05, "loss": 2.2936, "step": 5070500 }, { "epoch": 25.12, "learning_rate": 3.7442757489732125e-05, "loss": 2.2487, "step": 5071000 }, { "epoch": 25.13, "learning_rate": 3.744151890330604e-05, "loss": 2.281, "step": 5071500 }, { "epoch": 25.13, "learning_rate": 3.744028031687996e-05, "loss": 2.2606, "step": 5072000 }, { "epoch": 25.13, "learning_rate": 3.743904173045387e-05, "loss": 2.2643, "step": 5072500 }, { "epoch": 25.13, "learning_rate": 3.7437803144027786e-05, "loss": 2.2806, "step": 5073000 }, { "epoch": 25.14, "learning_rate": 3.74365645576017e-05, "loss": 2.2664, "step": 5073500 }, { "epoch": 25.14, "learning_rate": 3.743532844834847e-05, "loss": 2.261, "step": 5074000 }, { "epoch": 25.14, "learning_rate": 3.743408986192239e-05, "loss": 2.2754, "step": 5074500 }, { "epoch": 25.14, "learning_rate": 3.743285375266916e-05, "loss": 2.2802, "step": 5075000 }, { "epoch": 25.15, "learning_rate": 3.7431615166243074e-05, "loss": 2.2682, "step": 5075500 }, { "epoch": 25.15, "learning_rate": 3.743037657981699e-05, "loss": 2.2538, "step": 5076000 }, { "epoch": 25.15, "learning_rate": 3.742913799339091e-05, "loss": 2.2676, "step": 5076500 }, { "epoch": 25.15, "learning_rate": 3.7427899406964825e-05, "loss": 2.291, "step": 5077000 }, { "epoch": 25.16, "learning_rate": 3.742666082053874e-05, "loss": 2.2751, "step": 5077500 }, { "epoch": 25.16, "learning_rate": 3.742542223411266e-05, "loss": 2.2786, "step": 5078000 }, { "epoch": 25.16, "learning_rate": 3.742418612485942e-05, "loss": 2.2768, "step": 5078500 }, { "epoch": 25.16, "learning_rate": 3.742294753843334e-05, "loss": 2.2702, "step": 5079000 }, { "epoch": 25.17, "learning_rate": 3.7421708952007255e-05, "loss": 2.2698, "step": 5079500 }, { "epoch": 25.17, "learning_rate": 3.7420472842754023e-05, "loss": 2.2873, "step": 5080000 }, { "epoch": 25.17, "learning_rate": 3.741923425632794e-05, "loss": 2.2968, "step": 5080500 }, { "epoch": 25.17, "learning_rate": 3.741799566990186e-05, "loss": 2.2721, "step": 5081000 }, { "epoch": 25.18, "learning_rate": 3.7416757083475774e-05, "loss": 2.2607, "step": 5081500 }, { "epoch": 25.18, "learning_rate": 3.741551849704969e-05, "loss": 2.2801, "step": 5082000 }, { "epoch": 25.18, "learning_rate": 3.741427991062361e-05, "loss": 2.2657, "step": 5082500 }, { "epoch": 25.18, "learning_rate": 3.7413041324197525e-05, "loss": 2.2593, "step": 5083000 }, { "epoch": 25.19, "learning_rate": 3.741180273777144e-05, "loss": 2.2759, "step": 5083500 }, { "epoch": 25.19, "learning_rate": 3.741056415134536e-05, "loss": 2.2836, "step": 5084000 }, { "epoch": 25.19, "learning_rate": 3.7409325564919276e-05, "loss": 2.2825, "step": 5084500 }, { "epoch": 25.19, "learning_rate": 3.740808697849319e-05, "loss": 2.2724, "step": 5085000 }, { "epoch": 25.2, "learning_rate": 3.74068483920671e-05, "loss": 2.268, "step": 5085500 }, { "epoch": 25.2, "learning_rate": 3.740560980564102e-05, "loss": 2.2915, "step": 5086000 }, { "epoch": 25.2, "learning_rate": 3.740437121921494e-05, "loss": 2.2578, "step": 5086500 }, { "epoch": 25.2, "learning_rate": 3.7403132632788854e-05, "loss": 2.2763, "step": 5087000 }, { "epoch": 25.21, "learning_rate": 3.740189404636277e-05, "loss": 2.2847, "step": 5087500 }, { "epoch": 25.21, "learning_rate": 3.740065545993669e-05, "loss": 2.2892, "step": 5088000 }, { "epoch": 25.21, "learning_rate": 3.73994168735106e-05, "loss": 2.2638, "step": 5088500 }, { "epoch": 25.21, "learning_rate": 3.7398178287084515e-05, "loss": 2.2464, "step": 5089000 }, { "epoch": 25.22, "learning_rate": 3.739693970065843e-05, "loss": 2.2865, "step": 5089500 }, { "epoch": 25.22, "learning_rate": 3.739570111423235e-05, "loss": 2.2407, "step": 5090000 }, { "epoch": 25.22, "learning_rate": 3.7394462527806265e-05, "loss": 2.2847, "step": 5090500 }, { "epoch": 25.22, "learning_rate": 3.739322394138018e-05, "loss": 2.2936, "step": 5091000 }, { "epoch": 25.23, "learning_rate": 3.73919853549541e-05, "loss": 2.2751, "step": 5091500 }, { "epoch": 25.23, "learning_rate": 3.7390746768528016e-05, "loss": 2.2734, "step": 5092000 }, { "epoch": 25.23, "learning_rate": 3.738951313644764e-05, "loss": 2.2856, "step": 5092500 }, { "epoch": 25.23, "learning_rate": 3.7388274550021554e-05, "loss": 2.2731, "step": 5093000 }, { "epoch": 25.23, "learning_rate": 3.738703596359547e-05, "loss": 2.2916, "step": 5093500 }, { "epoch": 25.24, "learning_rate": 3.738579737716939e-05, "loss": 2.2879, "step": 5094000 }, { "epoch": 25.24, "learning_rate": 3.7384561267916156e-05, "loss": 2.2828, "step": 5094500 }, { "epoch": 25.24, "learning_rate": 3.7383325158662925e-05, "loss": 2.2666, "step": 5095000 }, { "epoch": 25.24, "learning_rate": 3.738208657223684e-05, "loss": 2.2738, "step": 5095500 }, { "epoch": 25.25, "learning_rate": 3.738084798581076e-05, "loss": 2.2947, "step": 5096000 }, { "epoch": 25.25, "learning_rate": 3.7379609399384676e-05, "loss": 2.2574, "step": 5096500 }, { "epoch": 25.25, "learning_rate": 3.737837081295859e-05, "loss": 2.2668, "step": 5097000 }, { "epoch": 25.25, "learning_rate": 3.737713222653251e-05, "loss": 2.2884, "step": 5097500 }, { "epoch": 25.26, "learning_rate": 3.737589364010642e-05, "loss": 2.2407, "step": 5098000 }, { "epoch": 25.26, "learning_rate": 3.737465505368034e-05, "loss": 2.2656, "step": 5098500 }, { "epoch": 25.26, "learning_rate": 3.7373416467254254e-05, "loss": 2.2799, "step": 5099000 }, { "epoch": 25.26, "learning_rate": 3.737217788082817e-05, "loss": 2.2688, "step": 5099500 }, { "epoch": 25.27, "learning_rate": 3.737093929440209e-05, "loss": 2.284, "step": 5100000 }, { "epoch": 25.27, "learning_rate": 3.7369703185148857e-05, "loss": 2.2631, "step": 5100500 }, { "epoch": 25.27, "learning_rate": 3.736846459872277e-05, "loss": 2.3051, "step": 5101000 }, { "epoch": 25.27, "learning_rate": 3.7367226012296684e-05, "loss": 2.2549, "step": 5101500 }, { "epoch": 25.28, "learning_rate": 3.73659874258706e-05, "loss": 2.2622, "step": 5102000 }, { "epoch": 25.28, "learning_rate": 3.736474883944452e-05, "loss": 2.2823, "step": 5102500 }, { "epoch": 25.28, "learning_rate": 3.7363510253018434e-05, "loss": 2.2786, "step": 5103000 }, { "epoch": 25.28, "learning_rate": 3.736227166659235e-05, "loss": 2.3078, "step": 5103500 }, { "epoch": 25.29, "learning_rate": 3.736103803451197e-05, "loss": 2.3087, "step": 5104000 }, { "epoch": 25.29, "learning_rate": 3.735980192525874e-05, "loss": 2.2929, "step": 5104500 }, { "epoch": 25.29, "learning_rate": 3.735856333883266e-05, "loss": 2.2852, "step": 5105000 }, { "epoch": 25.29, "learning_rate": 3.7357324752406575e-05, "loss": 2.2776, "step": 5105500 }, { "epoch": 25.3, "learning_rate": 3.735608616598049e-05, "loss": 2.2995, "step": 5106000 }, { "epoch": 25.3, "learning_rate": 3.735484757955441e-05, "loss": 2.2672, "step": 5106500 }, { "epoch": 25.3, "learning_rate": 3.7353608993128326e-05, "loss": 2.2895, "step": 5107000 }, { "epoch": 25.3, "learning_rate": 3.735237040670224e-05, "loss": 2.277, "step": 5107500 }, { "epoch": 25.31, "learning_rate": 3.735113182027616e-05, "loss": 2.275, "step": 5108000 }, { "epoch": 25.31, "learning_rate": 3.7349893233850076e-05, "loss": 2.2546, "step": 5108500 }, { "epoch": 25.31, "learning_rate": 3.734865464742399e-05, "loss": 2.295, "step": 5109000 }, { "epoch": 25.31, "learning_rate": 3.7347418538170755e-05, "loss": 2.2752, "step": 5109500 }, { "epoch": 25.32, "learning_rate": 3.734617995174467e-05, "loss": 2.2777, "step": 5110000 }, { "epoch": 25.32, "learning_rate": 3.734494136531859e-05, "loss": 2.2843, "step": 5110500 }, { "epoch": 25.32, "learning_rate": 3.7343702778892506e-05, "loss": 2.2714, "step": 5111000 }, { "epoch": 25.32, "learning_rate": 3.734246419246642e-05, "loss": 2.2732, "step": 5111500 }, { "epoch": 25.33, "learning_rate": 3.734122560604034e-05, "loss": 2.3063, "step": 5112000 }, { "epoch": 25.33, "learning_rate": 3.733998701961426e-05, "loss": 2.2975, "step": 5112500 }, { "epoch": 25.33, "learning_rate": 3.7338748433188174e-05, "loss": 2.291, "step": 5113000 }, { "epoch": 25.33, "learning_rate": 3.7337509846762084e-05, "loss": 2.2564, "step": 5113500 }, { "epoch": 25.34, "learning_rate": 3.7336271260336e-05, "loss": 2.2872, "step": 5114000 }, { "epoch": 25.34, "learning_rate": 3.733503267390992e-05, "loss": 2.2867, "step": 5114500 }, { "epoch": 25.34, "learning_rate": 3.7333794087483835e-05, "loss": 2.243, "step": 5115000 }, { "epoch": 25.34, "learning_rate": 3.733255797823061e-05, "loss": 2.2655, "step": 5115500 }, { "epoch": 25.35, "learning_rate": 3.733131939180453e-05, "loss": 2.2926, "step": 5116000 }, { "epoch": 25.35, "learning_rate": 3.733008080537844e-05, "loss": 2.2715, "step": 5116500 }, { "epoch": 25.35, "learning_rate": 3.7328842218952354e-05, "loss": 2.2757, "step": 5117000 }, { "epoch": 25.35, "learning_rate": 3.732760363252627e-05, "loss": 2.2806, "step": 5117500 }, { "epoch": 25.36, "learning_rate": 3.732636752327304e-05, "loss": 2.2805, "step": 5118000 }, { "epoch": 25.36, "learning_rate": 3.732513141401981e-05, "loss": 2.2542, "step": 5118500 }, { "epoch": 25.36, "learning_rate": 3.7323892827593726e-05, "loss": 2.277, "step": 5119000 }, { "epoch": 25.36, "learning_rate": 3.732265424116764e-05, "loss": 2.2745, "step": 5119500 }, { "epoch": 25.37, "learning_rate": 3.732141565474156e-05, "loss": 2.2858, "step": 5120000 }, { "epoch": 25.37, "learning_rate": 3.7320177068315477e-05, "loss": 2.2666, "step": 5120500 }, { "epoch": 25.37, "learning_rate": 3.7318938481889393e-05, "loss": 2.2903, "step": 5121000 }, { "epoch": 25.37, "learning_rate": 3.731769989546331e-05, "loss": 2.2642, "step": 5121500 }, { "epoch": 25.38, "learning_rate": 3.731646130903723e-05, "loss": 2.2661, "step": 5122000 }, { "epoch": 25.38, "learning_rate": 3.7315222722611144e-05, "loss": 2.2305, "step": 5122500 }, { "epoch": 25.38, "learning_rate": 3.7313984136185054e-05, "loss": 2.2755, "step": 5123000 }, { "epoch": 25.38, "learning_rate": 3.731274554975897e-05, "loss": 2.2832, "step": 5123500 }, { "epoch": 25.39, "learning_rate": 3.731150696333289e-05, "loss": 2.2592, "step": 5124000 }, { "epoch": 25.39, "learning_rate": 3.7310268376906805e-05, "loss": 2.2704, "step": 5124500 }, { "epoch": 25.39, "learning_rate": 3.7309032267653574e-05, "loss": 2.2827, "step": 5125000 }, { "epoch": 25.39, "learning_rate": 3.730779368122749e-05, "loss": 2.2715, "step": 5125500 }, { "epoch": 25.4, "learning_rate": 3.73065550948014e-05, "loss": 2.2773, "step": 5126000 }, { "epoch": 25.4, "learning_rate": 3.730531650837532e-05, "loss": 2.2667, "step": 5126500 }, { "epoch": 25.4, "learning_rate": 3.7304080399122094e-05, "loss": 2.3061, "step": 5127000 }, { "epoch": 25.4, "learning_rate": 3.730284428986886e-05, "loss": 2.2737, "step": 5127500 }, { "epoch": 25.41, "learning_rate": 3.730160570344278e-05, "loss": 2.2715, "step": 5128000 }, { "epoch": 25.41, "learning_rate": 3.7300367117016696e-05, "loss": 2.2898, "step": 5128500 }, { "epoch": 25.41, "learning_rate": 3.729912853059061e-05, "loss": 2.2592, "step": 5129000 }, { "epoch": 25.41, "learning_rate": 3.729788994416453e-05, "loss": 2.2816, "step": 5129500 }, { "epoch": 25.42, "learning_rate": 3.729665135773844e-05, "loss": 2.2692, "step": 5130000 }, { "epoch": 25.42, "learning_rate": 3.729541277131236e-05, "loss": 2.2715, "step": 5130500 }, { "epoch": 25.42, "learning_rate": 3.7294174184886274e-05, "loss": 2.28, "step": 5131000 }, { "epoch": 25.42, "learning_rate": 3.729293807563304e-05, "loss": 2.2883, "step": 5131500 }, { "epoch": 25.43, "learning_rate": 3.729169948920696e-05, "loss": 2.2569, "step": 5132000 }, { "epoch": 25.43, "learning_rate": 3.729046337995373e-05, "loss": 2.2704, "step": 5132500 }, { "epoch": 25.43, "learning_rate": 3.7289224793527646e-05, "loss": 2.279, "step": 5133000 }, { "epoch": 25.43, "learning_rate": 3.728798620710156e-05, "loss": 2.2866, "step": 5133500 }, { "epoch": 25.44, "learning_rate": 3.728674762067548e-05, "loss": 2.2773, "step": 5134000 }, { "epoch": 25.44, "learning_rate": 3.7285509034249396e-05, "loss": 2.3014, "step": 5134500 }, { "epoch": 25.44, "learning_rate": 3.728427044782331e-05, "loss": 2.269, "step": 5135000 }, { "epoch": 25.44, "learning_rate": 3.728303186139723e-05, "loss": 2.2995, "step": 5135500 }, { "epoch": 25.45, "learning_rate": 3.728179327497115e-05, "loss": 2.2661, "step": 5136000 }, { "epoch": 25.45, "learning_rate": 3.7280554688545064e-05, "loss": 2.2761, "step": 5136500 }, { "epoch": 25.45, "learning_rate": 3.7279316102118974e-05, "loss": 2.2874, "step": 5137000 }, { "epoch": 25.45, "learning_rate": 3.727807751569289e-05, "loss": 2.2651, "step": 5137500 }, { "epoch": 25.46, "learning_rate": 3.727683892926681e-05, "loss": 2.2692, "step": 5138000 }, { "epoch": 25.46, "learning_rate": 3.7275600342840725e-05, "loss": 2.2918, "step": 5138500 }, { "epoch": 25.46, "learning_rate": 3.7274364233587494e-05, "loss": 2.288, "step": 5139000 }, { "epoch": 25.46, "learning_rate": 3.727312564716141e-05, "loss": 2.2863, "step": 5139500 }, { "epoch": 25.47, "learning_rate": 3.727188706073533e-05, "loss": 2.2988, "step": 5140000 }, { "epoch": 25.47, "learning_rate": 3.7270648474309245e-05, "loss": 2.2731, "step": 5140500 }, { "epoch": 25.47, "learning_rate": 3.726940988788316e-05, "loss": 2.2773, "step": 5141000 }, { "epoch": 25.47, "learning_rate": 3.726817130145707e-05, "loss": 2.2942, "step": 5141500 }, { "epoch": 25.48, "learning_rate": 3.726693519220385e-05, "loss": 2.2827, "step": 5142000 }, { "epoch": 25.48, "learning_rate": 3.7265696605777764e-05, "loss": 2.2954, "step": 5142500 }, { "epoch": 25.48, "learning_rate": 3.726445801935168e-05, "loss": 2.2909, "step": 5143000 }, { "epoch": 25.48, "learning_rate": 3.726322191009844e-05, "loss": 2.3019, "step": 5143500 }, { "epoch": 25.49, "learning_rate": 3.726198332367236e-05, "loss": 2.2943, "step": 5144000 }, { "epoch": 25.49, "learning_rate": 3.726074473724628e-05, "loss": 2.2736, "step": 5144500 }, { "epoch": 25.49, "learning_rate": 3.7259506150820194e-05, "loss": 2.2876, "step": 5145000 }, { "epoch": 25.49, "learning_rate": 3.725826756439411e-05, "loss": 2.2856, "step": 5145500 }, { "epoch": 25.5, "learning_rate": 3.725702897796803e-05, "loss": 2.272, "step": 5146000 }, { "epoch": 25.5, "learning_rate": 3.7255790391541945e-05, "loss": 2.2921, "step": 5146500 }, { "epoch": 25.5, "learning_rate": 3.725455180511586e-05, "loss": 2.2833, "step": 5147000 }, { "epoch": 25.5, "learning_rate": 3.725331321868978e-05, "loss": 2.2587, "step": 5147500 }, { "epoch": 25.5, "learning_rate": 3.725207463226369e-05, "loss": 2.2883, "step": 5148000 }, { "epoch": 25.51, "learning_rate": 3.7250836045837606e-05, "loss": 2.2676, "step": 5148500 }, { "epoch": 25.51, "learning_rate": 3.724959993658438e-05, "loss": 2.2797, "step": 5149000 }, { "epoch": 25.51, "learning_rate": 3.72483613501583e-05, "loss": 2.2961, "step": 5149500 }, { "epoch": 25.51, "learning_rate": 3.724712524090506e-05, "loss": 2.2984, "step": 5150000 }, { "epoch": 25.52, "learning_rate": 3.724588665447898e-05, "loss": 2.2816, "step": 5150500 }, { "epoch": 25.52, "learning_rate": 3.7244648068052894e-05, "loss": 2.2632, "step": 5151000 }, { "epoch": 25.52, "learning_rate": 3.724340948162681e-05, "loss": 2.2847, "step": 5151500 }, { "epoch": 25.52, "learning_rate": 3.724217089520073e-05, "loss": 2.2749, "step": 5152000 }, { "epoch": 25.53, "learning_rate": 3.7240932308774645e-05, "loss": 2.3042, "step": 5152500 }, { "epoch": 25.53, "learning_rate": 3.723969372234856e-05, "loss": 2.2551, "step": 5153000 }, { "epoch": 25.53, "learning_rate": 3.723845513592248e-05, "loss": 2.2692, "step": 5153500 }, { "epoch": 25.53, "learning_rate": 3.723721654949639e-05, "loss": 2.2742, "step": 5154000 }, { "epoch": 25.54, "learning_rate": 3.7235980440243164e-05, "loss": 2.2607, "step": 5154500 }, { "epoch": 25.54, "learning_rate": 3.7234744330989927e-05, "loss": 2.3025, "step": 5155000 }, { "epoch": 25.54, "learning_rate": 3.7233505744563843e-05, "loss": 2.2741, "step": 5155500 }, { "epoch": 25.54, "learning_rate": 3.723226715813776e-05, "loss": 2.2887, "step": 5156000 }, { "epoch": 25.55, "learning_rate": 3.723102857171168e-05, "loss": 2.2849, "step": 5156500 }, { "epoch": 25.55, "learning_rate": 3.7229789985285594e-05, "loss": 2.2911, "step": 5157000 }, { "epoch": 25.55, "learning_rate": 3.722855139885951e-05, "loss": 2.266, "step": 5157500 }, { "epoch": 25.55, "learning_rate": 3.722731281243343e-05, "loss": 2.2631, "step": 5158000 }, { "epoch": 25.56, "learning_rate": 3.7226074226007345e-05, "loss": 2.2779, "step": 5158500 }, { "epoch": 25.56, "learning_rate": 3.722483563958126e-05, "loss": 2.2959, "step": 5159000 }, { "epoch": 25.56, "learning_rate": 3.722359705315518e-05, "loss": 2.306, "step": 5159500 }, { "epoch": 25.56, "learning_rate": 3.7222358466729096e-05, "loss": 2.2844, "step": 5160000 }, { "epoch": 25.57, "learning_rate": 3.7221119880303006e-05, "loss": 2.2897, "step": 5160500 }, { "epoch": 25.57, "learning_rate": 3.721988377104978e-05, "loss": 2.2778, "step": 5161000 }, { "epoch": 25.57, "learning_rate": 3.7218647661796544e-05, "loss": 2.2916, "step": 5161500 }, { "epoch": 25.57, "learning_rate": 3.721740907537046e-05, "loss": 2.2833, "step": 5162000 }, { "epoch": 25.58, "learning_rate": 3.721617048894438e-05, "loss": 2.2951, "step": 5162500 }, { "epoch": 25.58, "learning_rate": 3.7214931902518294e-05, "loss": 2.273, "step": 5163000 }, { "epoch": 25.58, "learning_rate": 3.721369331609221e-05, "loss": 2.2695, "step": 5163500 }, { "epoch": 25.58, "learning_rate": 3.721245720683898e-05, "loss": 2.2833, "step": 5164000 }, { "epoch": 25.59, "learning_rate": 3.72112186204129e-05, "loss": 2.2957, "step": 5164500 }, { "epoch": 25.59, "learning_rate": 3.7209980033986814e-05, "loss": 2.2693, "step": 5165000 }, { "epoch": 25.59, "learning_rate": 3.720874144756073e-05, "loss": 2.2783, "step": 5165500 }, { "epoch": 25.59, "learning_rate": 3.720750286113465e-05, "loss": 2.2865, "step": 5166000 }, { "epoch": 25.6, "learning_rate": 3.7206264274708565e-05, "loss": 2.2855, "step": 5166500 }, { "epoch": 25.6, "learning_rate": 3.720502568828248e-05, "loss": 2.2972, "step": 5167000 }, { "epoch": 25.6, "learning_rate": 3.72037871018564e-05, "loss": 2.2696, "step": 5167500 }, { "epoch": 25.6, "learning_rate": 3.720255099260316e-05, "loss": 2.2739, "step": 5168000 }, { "epoch": 25.61, "learning_rate": 3.720131488334993e-05, "loss": 2.2988, "step": 5168500 }, { "epoch": 25.61, "learning_rate": 3.7200078774096705e-05, "loss": 2.2543, "step": 5169000 }, { "epoch": 25.61, "learning_rate": 3.719884018767062e-05, "loss": 2.2768, "step": 5169500 }, { "epoch": 25.61, "learning_rate": 3.719760160124454e-05, "loss": 2.2944, "step": 5170000 }, { "epoch": 25.62, "learning_rate": 3.719636301481845e-05, "loss": 2.2929, "step": 5170500 }, { "epoch": 25.62, "learning_rate": 3.7195124428392366e-05, "loss": 2.273, "step": 5171000 }, { "epoch": 25.62, "learning_rate": 3.719388584196628e-05, "loss": 2.2615, "step": 5171500 }, { "epoch": 25.62, "learning_rate": 3.71926472555402e-05, "loss": 2.2707, "step": 5172000 }, { "epoch": 25.63, "learning_rate": 3.719140866911412e-05, "loss": 2.288, "step": 5172500 }, { "epoch": 25.63, "learning_rate": 3.7190170082688034e-05, "loss": 2.2828, "step": 5173000 }, { "epoch": 25.63, "learning_rate": 3.718893149626195e-05, "loss": 2.2858, "step": 5173500 }, { "epoch": 25.63, "learning_rate": 3.718769290983586e-05, "loss": 2.2594, "step": 5174000 }, { "epoch": 25.64, "learning_rate": 3.718645432340978e-05, "loss": 2.2852, "step": 5174500 }, { "epoch": 25.64, "learning_rate": 3.7185215736983695e-05, "loss": 2.2674, "step": 5175000 }, { "epoch": 25.64, "learning_rate": 3.718398210490332e-05, "loss": 2.2819, "step": 5175500 }, { "epoch": 25.64, "learning_rate": 3.718274351847724e-05, "loss": 2.2753, "step": 5176000 }, { "epoch": 25.65, "learning_rate": 3.718150493205115e-05, "loss": 2.2723, "step": 5176500 }, { "epoch": 25.65, "learning_rate": 3.7180266345625066e-05, "loss": 2.2838, "step": 5177000 }, { "epoch": 25.65, "learning_rate": 3.717902775919898e-05, "loss": 2.2957, "step": 5177500 }, { "epoch": 25.65, "learning_rate": 3.717779164994575e-05, "loss": 2.2762, "step": 5178000 }, { "epoch": 25.66, "learning_rate": 3.717655306351967e-05, "loss": 2.2829, "step": 5178500 }, { "epoch": 25.66, "learning_rate": 3.7175314477093586e-05, "loss": 2.2563, "step": 5179000 }, { "epoch": 25.66, "learning_rate": 3.71740758906675e-05, "loss": 2.2887, "step": 5179500 }, { "epoch": 25.66, "learning_rate": 3.717283730424141e-05, "loss": 2.283, "step": 5180000 }, { "epoch": 25.67, "learning_rate": 3.717160119498819e-05, "loss": 2.2762, "step": 5180500 }, { "epoch": 25.67, "learning_rate": 3.7170362608562105e-05, "loss": 2.2929, "step": 5181000 }, { "epoch": 25.67, "learning_rate": 3.716912649930887e-05, "loss": 2.2826, "step": 5181500 }, { "epoch": 25.67, "learning_rate": 3.7167887912882784e-05, "loss": 2.3166, "step": 5182000 }, { "epoch": 25.68, "learning_rate": 3.71666493264567e-05, "loss": 2.2852, "step": 5182500 }, { "epoch": 25.68, "learning_rate": 3.716541074003062e-05, "loss": 2.2891, "step": 5183000 }, { "epoch": 25.68, "learning_rate": 3.7164172153604535e-05, "loss": 2.2888, "step": 5183500 }, { "epoch": 25.68, "learning_rate": 3.7162936044351304e-05, "loss": 2.2808, "step": 5184000 }, { "epoch": 25.69, "learning_rate": 3.716169745792522e-05, "loss": 2.2693, "step": 5184500 }, { "epoch": 25.69, "learning_rate": 3.716046134867199e-05, "loss": 2.2618, "step": 5185000 }, { "epoch": 25.69, "learning_rate": 3.7159222762245906e-05, "loss": 2.2966, "step": 5185500 }, { "epoch": 25.69, "learning_rate": 3.7157984175819823e-05, "loss": 2.2962, "step": 5186000 }, { "epoch": 25.7, "learning_rate": 3.715674558939374e-05, "loss": 2.2637, "step": 5186500 }, { "epoch": 25.7, "learning_rate": 3.715550700296766e-05, "loss": 2.3125, "step": 5187000 }, { "epoch": 25.7, "learning_rate": 3.715427089371442e-05, "loss": 2.2799, "step": 5187500 }, { "epoch": 25.7, "learning_rate": 3.7153032307288336e-05, "loss": 2.3013, "step": 5188000 }, { "epoch": 25.71, "learning_rate": 3.715179372086225e-05, "loss": 2.2987, "step": 5188500 }, { "epoch": 25.71, "learning_rate": 3.715055513443617e-05, "loss": 2.3231, "step": 5189000 }, { "epoch": 25.71, "learning_rate": 3.714931654801009e-05, "loss": 2.2836, "step": 5189500 }, { "epoch": 25.71, "learning_rate": 3.7148077961584004e-05, "loss": 2.2828, "step": 5190000 }, { "epoch": 25.72, "learning_rate": 3.7146844329503625e-05, "loss": 2.2943, "step": 5190500 }, { "epoch": 25.72, "learning_rate": 3.714560574307754e-05, "loss": 2.2569, "step": 5191000 }, { "epoch": 25.72, "learning_rate": 3.714436715665146e-05, "loss": 2.2996, "step": 5191500 }, { "epoch": 25.72, "learning_rate": 3.7143128570225375e-05, "loss": 2.2977, "step": 5192000 }, { "epoch": 25.73, "learning_rate": 3.714188998379929e-05, "loss": 2.2883, "step": 5192500 }, { "epoch": 25.73, "learning_rate": 3.71406513973732e-05, "loss": 2.2551, "step": 5193000 }, { "epoch": 25.73, "learning_rate": 3.713941281094712e-05, "loss": 2.2708, "step": 5193500 }, { "epoch": 25.73, "learning_rate": 3.7138174224521036e-05, "loss": 2.2823, "step": 5194000 }, { "epoch": 25.74, "learning_rate": 3.713693563809495e-05, "loss": 2.274, "step": 5194500 }, { "epoch": 25.74, "learning_rate": 3.713569705166887e-05, "loss": 2.2976, "step": 5195000 }, { "epoch": 25.74, "learning_rate": 3.713445846524279e-05, "loss": 2.2881, "step": 5195500 }, { "epoch": 25.74, "learning_rate": 3.7133219878816704e-05, "loss": 2.2842, "step": 5196000 }, { "epoch": 25.75, "learning_rate": 3.713198129239062e-05, "loss": 2.2757, "step": 5196500 }, { "epoch": 25.75, "learning_rate": 3.713074270596454e-05, "loss": 2.2781, "step": 5197000 }, { "epoch": 25.75, "learning_rate": 3.7129504119538455e-05, "loss": 2.2648, "step": 5197500 }, { "epoch": 25.75, "learning_rate": 3.712826553311237e-05, "loss": 2.2698, "step": 5198000 }, { "epoch": 25.76, "learning_rate": 3.712702942385914e-05, "loss": 2.2989, "step": 5198500 }, { "epoch": 25.76, "learning_rate": 3.712579083743306e-05, "loss": 2.2976, "step": 5199000 }, { "epoch": 25.76, "learning_rate": 3.7124552251006974e-05, "loss": 2.2934, "step": 5199500 }, { "epoch": 25.76, "learning_rate": 3.712331366458089e-05, "loss": 2.2713, "step": 5200000 }, { "epoch": 25.77, "learning_rate": 3.7122077555327653e-05, "loss": 2.3012, "step": 5200500 }, { "epoch": 25.77, "learning_rate": 3.712083896890157e-05, "loss": 2.2833, "step": 5201000 }, { "epoch": 25.77, "learning_rate": 3.711960038247549e-05, "loss": 2.2911, "step": 5201500 }, { "epoch": 25.77, "learning_rate": 3.7118361796049404e-05, "loss": 2.2916, "step": 5202000 }, { "epoch": 25.77, "learning_rate": 3.711712320962332e-05, "loss": 2.2596, "step": 5202500 }, { "epoch": 25.78, "learning_rate": 3.711588710037009e-05, "loss": 2.2809, "step": 5203000 }, { "epoch": 25.78, "learning_rate": 3.711464851394401e-05, "loss": 2.3006, "step": 5203500 }, { "epoch": 25.78, "learning_rate": 3.7113409927517924e-05, "loss": 2.2742, "step": 5204000 }, { "epoch": 25.78, "learning_rate": 3.711217134109184e-05, "loss": 2.2681, "step": 5204500 }, { "epoch": 25.79, "learning_rate": 3.711093275466576e-05, "loss": 2.3087, "step": 5205000 }, { "epoch": 25.79, "learning_rate": 3.7109694168239675e-05, "loss": 2.3044, "step": 5205500 }, { "epoch": 25.79, "learning_rate": 3.710845558181359e-05, "loss": 2.277, "step": 5206000 }, { "epoch": 25.79, "learning_rate": 3.710721699538751e-05, "loss": 2.2873, "step": 5206500 }, { "epoch": 25.8, "learning_rate": 3.710598088613427e-05, "loss": 2.2831, "step": 5207000 }, { "epoch": 25.8, "learning_rate": 3.710474229970819e-05, "loss": 2.2702, "step": 5207500 }, { "epoch": 25.8, "learning_rate": 3.710350619045496e-05, "loss": 2.2777, "step": 5208000 }, { "epoch": 25.8, "learning_rate": 3.710226760402887e-05, "loss": 2.2703, "step": 5208500 }, { "epoch": 25.81, "learning_rate": 3.710102901760279e-05, "loss": 2.2656, "step": 5209000 }, { "epoch": 25.81, "learning_rate": 3.709979043117671e-05, "loss": 2.3058, "step": 5209500 }, { "epoch": 25.81, "learning_rate": 3.7098551844750624e-05, "loss": 2.3062, "step": 5210000 }, { "epoch": 25.81, "learning_rate": 3.709731573549739e-05, "loss": 2.2778, "step": 5210500 }, { "epoch": 25.82, "learning_rate": 3.709607714907131e-05, "loss": 2.2868, "step": 5211000 }, { "epoch": 25.82, "learning_rate": 3.7094838562645227e-05, "loss": 2.2837, "step": 5211500 }, { "epoch": 25.82, "learning_rate": 3.709359997621914e-05, "loss": 2.2617, "step": 5212000 }, { "epoch": 25.82, "learning_rate": 3.7092361389793054e-05, "loss": 2.2789, "step": 5212500 }, { "epoch": 25.83, "learning_rate": 3.709112528053983e-05, "loss": 2.3062, "step": 5213000 }, { "epoch": 25.83, "learning_rate": 3.7089886694113746e-05, "loss": 2.2737, "step": 5213500 }, { "epoch": 25.83, "learning_rate": 3.708864810768766e-05, "loss": 2.2862, "step": 5214000 }, { "epoch": 25.83, "learning_rate": 3.708740952126158e-05, "loss": 2.2864, "step": 5214500 }, { "epoch": 25.84, "learning_rate": 3.708617093483549e-05, "loss": 2.3172, "step": 5215000 }, { "epoch": 25.84, "learning_rate": 3.708493234840941e-05, "loss": 2.2644, "step": 5215500 }, { "epoch": 25.84, "learning_rate": 3.7083693761983324e-05, "loss": 2.2906, "step": 5216000 }, { "epoch": 25.84, "learning_rate": 3.708245765273009e-05, "loss": 2.277, "step": 5216500 }, { "epoch": 25.85, "learning_rate": 3.708121906630401e-05, "loss": 2.2867, "step": 5217000 }, { "epoch": 25.85, "learning_rate": 3.707998047987793e-05, "loss": 2.2931, "step": 5217500 }, { "epoch": 25.85, "learning_rate": 3.7078744370624695e-05, "loss": 2.2907, "step": 5218000 }, { "epoch": 25.85, "learning_rate": 3.707750578419861e-05, "loss": 2.2581, "step": 5218500 }, { "epoch": 25.86, "learning_rate": 3.707626719777253e-05, "loss": 2.3109, "step": 5219000 }, { "epoch": 25.86, "learning_rate": 3.7075028611346446e-05, "loss": 2.293, "step": 5219500 }, { "epoch": 25.86, "learning_rate": 3.707379002492036e-05, "loss": 2.311, "step": 5220000 }, { "epoch": 25.86, "learning_rate": 3.707255143849428e-05, "loss": 2.2732, "step": 5220500 }, { "epoch": 25.87, "learning_rate": 3.707131285206819e-05, "loss": 2.2977, "step": 5221000 }, { "epoch": 25.87, "learning_rate": 3.707007426564211e-05, "loss": 2.2984, "step": 5221500 }, { "epoch": 25.87, "learning_rate": 3.706883815638888e-05, "loss": 2.2718, "step": 5222000 }, { "epoch": 25.87, "learning_rate": 3.7067602047135645e-05, "loss": 2.2732, "step": 5222500 }, { "epoch": 25.88, "learning_rate": 3.706636346070956e-05, "loss": 2.2941, "step": 5223000 }, { "epoch": 25.88, "learning_rate": 3.706512487428348e-05, "loss": 2.2739, "step": 5223500 }, { "epoch": 25.88, "learning_rate": 3.7063886287857396e-05, "loss": 2.265, "step": 5224000 }, { "epoch": 25.88, "learning_rate": 3.706264770143131e-05, "loss": 2.2956, "step": 5224500 }, { "epoch": 25.89, "learning_rate": 3.706140911500523e-05, "loss": 2.2913, "step": 5225000 }, { "epoch": 25.89, "learning_rate": 3.7060170528579146e-05, "loss": 2.2667, "step": 5225500 }, { "epoch": 25.89, "learning_rate": 3.705893194215306e-05, "loss": 2.3244, "step": 5226000 }, { "epoch": 25.89, "learning_rate": 3.705769335572698e-05, "loss": 2.2802, "step": 5226500 }, { "epoch": 25.9, "learning_rate": 3.70564547693009e-05, "loss": 2.2817, "step": 5227000 }, { "epoch": 25.9, "learning_rate": 3.705521618287481e-05, "loss": 2.3061, "step": 5227500 }, { "epoch": 25.9, "learning_rate": 3.7053977596448724e-05, "loss": 2.2814, "step": 5228000 }, { "epoch": 25.9, "learning_rate": 3.705273901002264e-05, "loss": 2.2876, "step": 5228500 }, { "epoch": 25.91, "learning_rate": 3.705150042359656e-05, "loss": 2.2637, "step": 5229000 }, { "epoch": 25.91, "learning_rate": 3.7050261837170475e-05, "loss": 2.2767, "step": 5229500 }, { "epoch": 25.91, "learning_rate": 3.704902325074439e-05, "loss": 2.2874, "step": 5230000 }, { "epoch": 25.91, "learning_rate": 3.704778466431831e-05, "loss": 2.2886, "step": 5230500 }, { "epoch": 25.92, "learning_rate": 3.7046546077892226e-05, "loss": 2.2975, "step": 5231000 }, { "epoch": 25.92, "learning_rate": 3.704530749146614e-05, "loss": 2.2706, "step": 5231500 }, { "epoch": 25.92, "learning_rate": 3.7044071382212905e-05, "loss": 2.2877, "step": 5232000 }, { "epoch": 25.92, "learning_rate": 3.704283279578682e-05, "loss": 2.3207, "step": 5232500 }, { "epoch": 25.93, "learning_rate": 3.704159420936074e-05, "loss": 2.2841, "step": 5233000 }, { "epoch": 25.93, "learning_rate": 3.7040355622934656e-05, "loss": 2.2769, "step": 5233500 }, { "epoch": 25.93, "learning_rate": 3.7039119513681424e-05, "loss": 2.2918, "step": 5234000 }, { "epoch": 25.93, "learning_rate": 3.703788092725534e-05, "loss": 2.288, "step": 5234500 }, { "epoch": 25.94, "learning_rate": 3.703664234082926e-05, "loss": 2.2764, "step": 5235000 }, { "epoch": 25.94, "learning_rate": 3.7035403754403175e-05, "loss": 2.283, "step": 5235500 }, { "epoch": 25.94, "learning_rate": 3.703416764514995e-05, "loss": 2.2929, "step": 5236000 }, { "epoch": 25.94, "learning_rate": 3.703292905872386e-05, "loss": 2.2911, "step": 5236500 }, { "epoch": 25.95, "learning_rate": 3.703169047229778e-05, "loss": 2.2815, "step": 5237000 }, { "epoch": 25.95, "learning_rate": 3.7030451885871695e-05, "loss": 2.3038, "step": 5237500 }, { "epoch": 25.95, "learning_rate": 3.702921329944561e-05, "loss": 2.2901, "step": 5238000 }, { "epoch": 25.95, "learning_rate": 3.702797471301953e-05, "loss": 2.3041, "step": 5238500 }, { "epoch": 25.96, "learning_rate": 3.702673612659344e-05, "loss": 2.2909, "step": 5239000 }, { "epoch": 25.96, "learning_rate": 3.7025497540167356e-05, "loss": 2.3005, "step": 5239500 }, { "epoch": 25.96, "learning_rate": 3.7024261430914125e-05, "loss": 2.3043, "step": 5240000 }, { "epoch": 25.96, "learning_rate": 3.702302284448804e-05, "loss": 2.3069, "step": 5240500 }, { "epoch": 25.97, "learning_rate": 3.702178425806196e-05, "loss": 2.2887, "step": 5241000 }, { "epoch": 25.97, "learning_rate": 3.7020545671635875e-05, "loss": 2.2661, "step": 5241500 }, { "epoch": 25.97, "learning_rate": 3.701930708520979e-05, "loss": 2.2777, "step": 5242000 }, { "epoch": 25.97, "learning_rate": 3.701806849878371e-05, "loss": 2.2992, "step": 5242500 }, { "epoch": 25.98, "learning_rate": 3.701683238953048e-05, "loss": 2.2748, "step": 5243000 }, { "epoch": 25.98, "learning_rate": 3.701559628027725e-05, "loss": 2.2756, "step": 5243500 }, { "epoch": 25.98, "learning_rate": 3.7014357693851164e-05, "loss": 2.2784, "step": 5244000 }, { "epoch": 25.98, "learning_rate": 3.701311910742508e-05, "loss": 2.307, "step": 5244500 }, { "epoch": 25.99, "learning_rate": 3.7011880520999e-05, "loss": 2.2752, "step": 5245000 }, { "epoch": 25.99, "learning_rate": 3.7010641934572914e-05, "loss": 2.2948, "step": 5245500 }, { "epoch": 25.99, "learning_rate": 3.7009403348146825e-05, "loss": 2.3109, "step": 5246000 }, { "epoch": 25.99, "learning_rate": 3.700816476172074e-05, "loss": 2.2798, "step": 5246500 }, { "epoch": 26.0, "learning_rate": 3.700692617529466e-05, "loss": 2.3082, "step": 5247000 }, { "epoch": 26.0, "learning_rate": 3.7005687588868575e-05, "loss": 2.3089, "step": 5247500 }, { "epoch": 26.0, "eval_accuracy": 0.6544683759370017, "eval_accuracy_mlm": 0.6095992095473112, "eval_accuracy_nsp": 0.8660647398209124, "eval_loss": 2.3457705974578857, "eval_runtime": 146.2347, "eval_samples_per_second": 1743.491, "eval_steps_per_second": 72.65, "step": 5247918 }, { "epoch": 26.0, "learning_rate": 3.700444900244249e-05, "loss": 2.275, "step": 5248000 }, { "epoch": 26.0, "learning_rate": 3.700321041601641e-05, "loss": 2.2462, "step": 5248500 }, { "epoch": 26.01, "learning_rate": 3.700197430676318e-05, "loss": 2.2828, "step": 5249000 }, { "epoch": 26.01, "learning_rate": 3.7000735720337095e-05, "loss": 2.2497, "step": 5249500 }, { "epoch": 26.01, "learning_rate": 3.699949713391101e-05, "loss": 2.2561, "step": 5250000 }, { "epoch": 26.01, "learning_rate": 3.699825854748493e-05, "loss": 2.2692, "step": 5250500 }, { "epoch": 26.02, "learning_rate": 3.6997019961058846e-05, "loss": 2.2704, "step": 5251000 }, { "epoch": 26.02, "learning_rate": 3.699578137463276e-05, "loss": 2.2654, "step": 5251500 }, { "epoch": 26.02, "learning_rate": 3.699454278820668e-05, "loss": 2.2692, "step": 5252000 }, { "epoch": 26.02, "learning_rate": 3.699330420178059e-05, "loss": 2.2392, "step": 5252500 }, { "epoch": 26.03, "learning_rate": 3.699206561535451e-05, "loss": 2.2405, "step": 5253000 }, { "epoch": 26.03, "learning_rate": 3.6990831983274134e-05, "loss": 2.2633, "step": 5253500 }, { "epoch": 26.03, "learning_rate": 3.6989595874020896e-05, "loss": 2.2523, "step": 5254000 }, { "epoch": 26.03, "learning_rate": 3.698835728759481e-05, "loss": 2.2422, "step": 5254500 }, { "epoch": 26.04, "learning_rate": 3.698711870116873e-05, "loss": 2.2852, "step": 5255000 }, { "epoch": 26.04, "learning_rate": 3.698588011474265e-05, "loss": 2.2703, "step": 5255500 }, { "epoch": 26.04, "learning_rate": 3.6984641528316564e-05, "loss": 2.2614, "step": 5256000 }, { "epoch": 26.04, "learning_rate": 3.698340541906333e-05, "loss": 2.2694, "step": 5256500 }, { "epoch": 26.04, "learning_rate": 3.698216683263725e-05, "loss": 2.2442, "step": 5257000 }, { "epoch": 26.05, "learning_rate": 3.698092824621117e-05, "loss": 2.263, "step": 5257500 }, { "epoch": 26.05, "learning_rate": 3.6979689659785084e-05, "loss": 2.2511, "step": 5258000 }, { "epoch": 26.05, "learning_rate": 3.6978451073359e-05, "loss": 2.2405, "step": 5258500 }, { "epoch": 26.05, "learning_rate": 3.697721248693292e-05, "loss": 2.2604, "step": 5259000 }, { "epoch": 26.06, "learning_rate": 3.6975973900506834e-05, "loss": 2.2556, "step": 5259500 }, { "epoch": 26.06, "learning_rate": 3.697473531408075e-05, "loss": 2.2919, "step": 5260000 }, { "epoch": 26.06, "learning_rate": 3.697349672765467e-05, "loss": 2.2512, "step": 5260500 }, { "epoch": 26.06, "learning_rate": 3.6972258141228585e-05, "loss": 2.2531, "step": 5261000 }, { "epoch": 26.07, "learning_rate": 3.697102203197535e-05, "loss": 2.2625, "step": 5261500 }, { "epoch": 26.07, "learning_rate": 3.6969783445549264e-05, "loss": 2.2725, "step": 5262000 }, { "epoch": 26.07, "learning_rate": 3.696854733629603e-05, "loss": 2.2693, "step": 5262500 }, { "epoch": 26.07, "learning_rate": 3.696730874986995e-05, "loss": 2.2746, "step": 5263000 }, { "epoch": 26.08, "learning_rate": 3.696607016344387e-05, "loss": 2.2666, "step": 5263500 }, { "epoch": 26.08, "learning_rate": 3.6964831577017784e-05, "loss": 2.271, "step": 5264000 }, { "epoch": 26.08, "learning_rate": 3.69635929905917e-05, "loss": 2.2448, "step": 5264500 }, { "epoch": 26.08, "learning_rate": 3.696235440416562e-05, "loss": 2.283, "step": 5265000 }, { "epoch": 26.09, "learning_rate": 3.6961118294912386e-05, "loss": 2.2575, "step": 5265500 }, { "epoch": 26.09, "learning_rate": 3.695988218565915e-05, "loss": 2.2743, "step": 5266000 }, { "epoch": 26.09, "learning_rate": 3.6958643599233065e-05, "loss": 2.2608, "step": 5266500 }, { "epoch": 26.09, "learning_rate": 3.695740501280698e-05, "loss": 2.2905, "step": 5267000 }, { "epoch": 26.1, "learning_rate": 3.69561664263809e-05, "loss": 2.2947, "step": 5267500 }, { "epoch": 26.1, "learning_rate": 3.6954927839954816e-05, "loss": 2.2509, "step": 5268000 }, { "epoch": 26.1, "learning_rate": 3.695368925352873e-05, "loss": 2.2685, "step": 5268500 }, { "epoch": 26.1, "learning_rate": 3.695245066710265e-05, "loss": 2.2794, "step": 5269000 }, { "epoch": 26.11, "learning_rate": 3.695121208067657e-05, "loss": 2.2516, "step": 5269500 }, { "epoch": 26.11, "learning_rate": 3.6949973494250484e-05, "loss": 2.2744, "step": 5270000 }, { "epoch": 26.11, "learning_rate": 3.69487349078244e-05, "loss": 2.2572, "step": 5270500 }, { "epoch": 26.11, "learning_rate": 3.694749632139832e-05, "loss": 2.2396, "step": 5271000 }, { "epoch": 26.12, "learning_rate": 3.6946257734972235e-05, "loss": 2.2823, "step": 5271500 }, { "epoch": 26.12, "learning_rate": 3.694501914854615e-05, "loss": 2.2637, "step": 5272000 }, { "epoch": 26.12, "learning_rate": 3.694378056212007e-05, "loss": 2.2612, "step": 5272500 }, { "epoch": 26.12, "learning_rate": 3.6942541975693985e-05, "loss": 2.2399, "step": 5273000 }, { "epoch": 26.13, "learning_rate": 3.69413033892679e-05, "loss": 2.2862, "step": 5273500 }, { "epoch": 26.13, "learning_rate": 3.6940067280014664e-05, "loss": 2.284, "step": 5274000 }, { "epoch": 26.13, "learning_rate": 3.693882869358858e-05, "loss": 2.2642, "step": 5274500 }, { "epoch": 26.13, "learning_rate": 3.69375901071625e-05, "loss": 2.2629, "step": 5275000 }, { "epoch": 26.14, "learning_rate": 3.6936351520736415e-05, "loss": 2.2659, "step": 5275500 }, { "epoch": 26.14, "learning_rate": 3.693511293431033e-05, "loss": 2.2365, "step": 5276000 }, { "epoch": 26.14, "learning_rate": 3.693387434788425e-05, "loss": 2.2769, "step": 5276500 }, { "epoch": 26.14, "learning_rate": 3.693263823863102e-05, "loss": 2.2433, "step": 5277000 }, { "epoch": 26.15, "learning_rate": 3.6931399652204935e-05, "loss": 2.2665, "step": 5277500 }, { "epoch": 26.15, "learning_rate": 3.693016106577885e-05, "loss": 2.2601, "step": 5278000 }, { "epoch": 26.15, "learning_rate": 3.692892247935277e-05, "loss": 2.3155, "step": 5278500 }, { "epoch": 26.15, "learning_rate": 3.6927683892926685e-05, "loss": 2.2518, "step": 5279000 }, { "epoch": 26.16, "learning_rate": 3.69264453065006e-05, "loss": 2.2622, "step": 5279500 }, { "epoch": 26.16, "learning_rate": 3.692520672007452e-05, "loss": 2.2623, "step": 5280000 }, { "epoch": 26.16, "learning_rate": 3.692397061082128e-05, "loss": 2.2732, "step": 5280500 }, { "epoch": 26.16, "learning_rate": 3.69227320243952e-05, "loss": 2.2425, "step": 5281000 }, { "epoch": 26.17, "learning_rate": 3.6921493437969115e-05, "loss": 2.2449, "step": 5281500 }, { "epoch": 26.17, "learning_rate": 3.692025485154303e-05, "loss": 2.2487, "step": 5282000 }, { "epoch": 26.17, "learning_rate": 3.691901626511695e-05, "loss": 2.2494, "step": 5282500 }, { "epoch": 26.17, "learning_rate": 3.6917777678690866e-05, "loss": 2.2683, "step": 5283000 }, { "epoch": 26.18, "learning_rate": 3.6916539092264776e-05, "loss": 2.2414, "step": 5283500 }, { "epoch": 26.18, "learning_rate": 3.691530050583869e-05, "loss": 2.2555, "step": 5284000 }, { "epoch": 26.18, "learning_rate": 3.691406439658547e-05, "loss": 2.246, "step": 5284500 }, { "epoch": 26.18, "learning_rate": 3.6912825810159386e-05, "loss": 2.2715, "step": 5285000 }, { "epoch": 26.19, "learning_rate": 3.69115872237333e-05, "loss": 2.2586, "step": 5285500 }, { "epoch": 26.19, "learning_rate": 3.691034863730722e-05, "loss": 2.2493, "step": 5286000 }, { "epoch": 26.19, "learning_rate": 3.690911500522683e-05, "loss": 2.2628, "step": 5286500 }, { "epoch": 26.19, "learning_rate": 3.690787641880075e-05, "loss": 2.2646, "step": 5287000 }, { "epoch": 26.2, "learning_rate": 3.690663783237467e-05, "loss": 2.245, "step": 5287500 }, { "epoch": 26.2, "learning_rate": 3.6905399245948584e-05, "loss": 2.2657, "step": 5288000 }, { "epoch": 26.2, "learning_rate": 3.69041606595225e-05, "loss": 2.2547, "step": 5288500 }, { "epoch": 26.2, "learning_rate": 3.690292207309642e-05, "loss": 2.2863, "step": 5289000 }, { "epoch": 26.21, "learning_rate": 3.6901683486670335e-05, "loss": 2.2627, "step": 5289500 }, { "epoch": 26.21, "learning_rate": 3.690044490024425e-05, "loss": 2.2421, "step": 5290000 }, { "epoch": 26.21, "learning_rate": 3.689920631381817e-05, "loss": 2.2575, "step": 5290500 }, { "epoch": 26.21, "learning_rate": 3.6897967727392086e-05, "loss": 2.2554, "step": 5291000 }, { "epoch": 26.22, "learning_rate": 3.6896729140966e-05, "loss": 2.2682, "step": 5291500 }, { "epoch": 26.22, "learning_rate": 3.689549055453992e-05, "loss": 2.257, "step": 5292000 }, { "epoch": 26.22, "learning_rate": 3.6894256922459534e-05, "loss": 2.2817, "step": 5292500 }, { "epoch": 26.22, "learning_rate": 3.689301833603345e-05, "loss": 2.2867, "step": 5293000 }, { "epoch": 26.23, "learning_rate": 3.689177974960737e-05, "loss": 2.2552, "step": 5293500 }, { "epoch": 26.23, "learning_rate": 3.6890541163181284e-05, "loss": 2.2692, "step": 5294000 }, { "epoch": 26.23, "learning_rate": 3.68893025767552e-05, "loss": 2.2514, "step": 5294500 }, { "epoch": 26.23, "learning_rate": 3.688806399032912e-05, "loss": 2.2587, "step": 5295000 }, { "epoch": 26.24, "learning_rate": 3.6886825403903035e-05, "loss": 2.2817, "step": 5295500 }, { "epoch": 26.24, "learning_rate": 3.688558681747695e-05, "loss": 2.2631, "step": 5296000 }, { "epoch": 26.24, "learning_rate": 3.688434823105087e-05, "loss": 2.2627, "step": 5296500 }, { "epoch": 26.24, "learning_rate": 3.688311212179764e-05, "loss": 2.2805, "step": 5297000 }, { "epoch": 26.25, "learning_rate": 3.6881873535371555e-05, "loss": 2.2687, "step": 5297500 }, { "epoch": 26.25, "learning_rate": 3.688063494894547e-05, "loss": 2.2834, "step": 5298000 }, { "epoch": 26.25, "learning_rate": 3.687939636251939e-05, "loss": 2.2436, "step": 5298500 }, { "epoch": 26.25, "learning_rate": 3.687816025326615e-05, "loss": 2.2389, "step": 5299000 }, { "epoch": 26.26, "learning_rate": 3.687692166684007e-05, "loss": 2.2489, "step": 5299500 }, { "epoch": 26.26, "learning_rate": 3.6875683080413984e-05, "loss": 2.2658, "step": 5300000 }, { "epoch": 26.26, "learning_rate": 3.68744444939879e-05, "loss": 2.2578, "step": 5300500 }, { "epoch": 26.26, "learning_rate": 3.687320590756182e-05, "loss": 2.2667, "step": 5301000 }, { "epoch": 26.27, "learning_rate": 3.6871967321135735e-05, "loss": 2.2481, "step": 5301500 }, { "epoch": 26.27, "learning_rate": 3.687072873470965e-05, "loss": 2.2475, "step": 5302000 }, { "epoch": 26.27, "learning_rate": 3.686949014828357e-05, "loss": 2.2658, "step": 5302500 }, { "epoch": 26.27, "learning_rate": 3.6868251561857486e-05, "loss": 2.2964, "step": 5303000 }, { "epoch": 26.28, "learning_rate": 3.68670129754314e-05, "loss": 2.2901, "step": 5303500 }, { "epoch": 26.28, "learning_rate": 3.686577438900532e-05, "loss": 2.2668, "step": 5304000 }, { "epoch": 26.28, "learning_rate": 3.686453827975209e-05, "loss": 2.289, "step": 5304500 }, { "epoch": 26.28, "learning_rate": 3.6863299693326006e-05, "loss": 2.2806, "step": 5305000 }, { "epoch": 26.29, "learning_rate": 3.686206110689992e-05, "loss": 2.2532, "step": 5305500 }, { "epoch": 26.29, "learning_rate": 3.686082252047384e-05, "loss": 2.2487, "step": 5306000 }, { "epoch": 26.29, "learning_rate": 3.6859583934047756e-05, "loss": 2.2815, "step": 5306500 }, { "epoch": 26.29, "learning_rate": 3.685834534762167e-05, "loss": 2.2663, "step": 5307000 }, { "epoch": 26.3, "learning_rate": 3.6857109238368435e-05, "loss": 2.2659, "step": 5307500 }, { "epoch": 26.3, "learning_rate": 3.685587065194235e-05, "loss": 2.2551, "step": 5308000 }, { "epoch": 26.3, "learning_rate": 3.685463206551627e-05, "loss": 2.2631, "step": 5308500 }, { "epoch": 26.3, "learning_rate": 3.6853393479090186e-05, "loss": 2.2611, "step": 5309000 }, { "epoch": 26.31, "learning_rate": 3.68521548926641e-05, "loss": 2.2766, "step": 5309500 }, { "epoch": 26.31, "learning_rate": 3.685091630623802e-05, "loss": 2.2725, "step": 5310000 }, { "epoch": 26.31, "learning_rate": 3.684967771981194e-05, "loss": 2.2788, "step": 5310500 }, { "epoch": 26.31, "learning_rate": 3.6848439133385854e-05, "loss": 2.2321, "step": 5311000 }, { "epoch": 26.32, "learning_rate": 3.684720302413262e-05, "loss": 2.2733, "step": 5311500 }, { "epoch": 26.32, "learning_rate": 3.684596443770654e-05, "loss": 2.2867, "step": 5312000 }, { "epoch": 26.32, "learning_rate": 3.6844725851280456e-05, "loss": 2.2839, "step": 5312500 }, { "epoch": 26.32, "learning_rate": 3.6843487264854373e-05, "loss": 2.2721, "step": 5313000 }, { "epoch": 26.32, "learning_rate": 3.6842251155601135e-05, "loss": 2.2814, "step": 5313500 }, { "epoch": 26.33, "learning_rate": 3.6841015046347904e-05, "loss": 2.2994, "step": 5314000 }, { "epoch": 26.33, "learning_rate": 3.683977645992182e-05, "loss": 2.2346, "step": 5314500 }, { "epoch": 26.33, "learning_rate": 3.683853787349574e-05, "loss": 2.272, "step": 5315000 }, { "epoch": 26.33, "learning_rate": 3.6837299287069655e-05, "loss": 2.2787, "step": 5315500 }, { "epoch": 26.34, "learning_rate": 3.683606070064357e-05, "loss": 2.2815, "step": 5316000 }, { "epoch": 26.34, "learning_rate": 3.683482211421749e-05, "loss": 2.2951, "step": 5316500 }, { "epoch": 26.34, "learning_rate": 3.6833583527791406e-05, "loss": 2.2583, "step": 5317000 }, { "epoch": 26.34, "learning_rate": 3.683234741853817e-05, "loss": 2.2752, "step": 5317500 }, { "epoch": 26.35, "learning_rate": 3.6831111309284943e-05, "loss": 2.245, "step": 5318000 }, { "epoch": 26.35, "learning_rate": 3.682987520003171e-05, "loss": 2.2642, "step": 5318500 }, { "epoch": 26.35, "learning_rate": 3.6828639090778474e-05, "loss": 2.2769, "step": 5319000 }, { "epoch": 26.35, "learning_rate": 3.682740050435239e-05, "loss": 2.2546, "step": 5319500 }, { "epoch": 26.36, "learning_rate": 3.682616191792631e-05, "loss": 2.2726, "step": 5320000 }, { "epoch": 26.36, "learning_rate": 3.6824923331500225e-05, "loss": 2.247, "step": 5320500 }, { "epoch": 26.36, "learning_rate": 3.682368474507414e-05, "loss": 2.2547, "step": 5321000 }, { "epoch": 26.36, "learning_rate": 3.682244615864806e-05, "loss": 2.2627, "step": 5321500 }, { "epoch": 26.37, "learning_rate": 3.6821207572221976e-05, "loss": 2.2813, "step": 5322000 }, { "epoch": 26.37, "learning_rate": 3.681996898579589e-05, "loss": 2.2566, "step": 5322500 }, { "epoch": 26.37, "learning_rate": 3.681873039936981e-05, "loss": 2.2719, "step": 5323000 }, { "epoch": 26.37, "learning_rate": 3.681749181294373e-05, "loss": 2.2736, "step": 5323500 }, { "epoch": 26.38, "learning_rate": 3.6816253226517644e-05, "loss": 2.2618, "step": 5324000 }, { "epoch": 26.38, "learning_rate": 3.681501464009156e-05, "loss": 2.2542, "step": 5324500 }, { "epoch": 26.38, "learning_rate": 3.681377605366547e-05, "loss": 2.262, "step": 5325000 }, { "epoch": 26.38, "learning_rate": 3.6812539944412246e-05, "loss": 2.2745, "step": 5325500 }, { "epoch": 26.39, "learning_rate": 3.681130135798616e-05, "loss": 2.2565, "step": 5326000 }, { "epoch": 26.39, "learning_rate": 3.681006277156008e-05, "loss": 2.2563, "step": 5326500 }, { "epoch": 26.39, "learning_rate": 3.6808824185134e-05, "loss": 2.2395, "step": 5327000 }, { "epoch": 26.39, "learning_rate": 3.6807585598707914e-05, "loss": 2.2644, "step": 5327500 }, { "epoch": 26.4, "learning_rate": 3.6806347012281824e-05, "loss": 2.2437, "step": 5328000 }, { "epoch": 26.4, "learning_rate": 3.680510842585574e-05, "loss": 2.2721, "step": 5328500 }, { "epoch": 26.4, "learning_rate": 3.680386983942966e-05, "loss": 2.273, "step": 5329000 }, { "epoch": 26.4, "learning_rate": 3.6802631253003575e-05, "loss": 2.2778, "step": 5329500 }, { "epoch": 26.41, "learning_rate": 3.680139266657749e-05, "loss": 2.2782, "step": 5330000 }, { "epoch": 26.41, "learning_rate": 3.680015408015141e-05, "loss": 2.2676, "step": 5330500 }, { "epoch": 26.41, "learning_rate": 3.679891549372532e-05, "loss": 2.2632, "step": 5331000 }, { "epoch": 26.41, "learning_rate": 3.6797676907299236e-05, "loss": 2.2902, "step": 5331500 }, { "epoch": 26.42, "learning_rate": 3.679643832087315e-05, "loss": 2.2698, "step": 5332000 }, { "epoch": 26.42, "learning_rate": 3.679519973444707e-05, "loss": 2.2682, "step": 5332500 }, { "epoch": 26.42, "learning_rate": 3.6793961148020987e-05, "loss": 2.2695, "step": 5333000 }, { "epoch": 26.42, "learning_rate": 3.6792722561594904e-05, "loss": 2.2699, "step": 5333500 }, { "epoch": 26.43, "learning_rate": 3.679148645234167e-05, "loss": 2.2628, "step": 5334000 }, { "epoch": 26.43, "learning_rate": 3.679025034308844e-05, "loss": 2.2823, "step": 5334500 }, { "epoch": 26.43, "learning_rate": 3.678901175666236e-05, "loss": 2.2675, "step": 5335000 }, { "epoch": 26.43, "learning_rate": 3.6787773170236275e-05, "loss": 2.2469, "step": 5335500 }, { "epoch": 26.44, "learning_rate": 3.678653458381019e-05, "loss": 2.2607, "step": 5336000 }, { "epoch": 26.44, "learning_rate": 3.678529599738411e-05, "loss": 2.2687, "step": 5336500 }, { "epoch": 26.44, "learning_rate": 3.6784057410958026e-05, "loss": 2.2789, "step": 5337000 }, { "epoch": 26.44, "learning_rate": 3.678281882453194e-05, "loss": 2.2953, "step": 5337500 }, { "epoch": 26.45, "learning_rate": 3.678158023810585e-05, "loss": 2.2404, "step": 5338000 }, { "epoch": 26.45, "learning_rate": 3.678034165167977e-05, "loss": 2.282, "step": 5338500 }, { "epoch": 26.45, "learning_rate": 3.677910306525369e-05, "loss": 2.2767, "step": 5339000 }, { "epoch": 26.45, "learning_rate": 3.6777864478827604e-05, "loss": 2.2794, "step": 5339500 }, { "epoch": 26.46, "learning_rate": 3.677662589240152e-05, "loss": 2.2838, "step": 5340000 }, { "epoch": 26.46, "learning_rate": 3.677538730597544e-05, "loss": 2.2583, "step": 5340500 }, { "epoch": 26.46, "learning_rate": 3.6774151196722206e-05, "loss": 2.2644, "step": 5341000 }, { "epoch": 26.46, "learning_rate": 3.677291261029612e-05, "loss": 2.2895, "step": 5341500 }, { "epoch": 26.47, "learning_rate": 3.677167402387004e-05, "loss": 2.2884, "step": 5342000 }, { "epoch": 26.47, "learning_rate": 3.677043543744396e-05, "loss": 2.2567, "step": 5342500 }, { "epoch": 26.47, "learning_rate": 3.6769196851017874e-05, "loss": 2.2824, "step": 5343000 }, { "epoch": 26.47, "learning_rate": 3.676796074176464e-05, "loss": 2.2754, "step": 5343500 }, { "epoch": 26.48, "learning_rate": 3.676672215533856e-05, "loss": 2.2883, "step": 5344000 }, { "epoch": 26.48, "learning_rate": 3.676548604608532e-05, "loss": 2.2732, "step": 5344500 }, { "epoch": 26.48, "learning_rate": 3.676424745965924e-05, "loss": 2.2879, "step": 5345000 }, { "epoch": 26.48, "learning_rate": 3.6763008873233156e-05, "loss": 2.2564, "step": 5345500 }, { "epoch": 26.49, "learning_rate": 3.676177028680707e-05, "loss": 2.2885, "step": 5346000 }, { "epoch": 26.49, "learning_rate": 3.676053170038099e-05, "loss": 2.2796, "step": 5346500 }, { "epoch": 26.49, "learning_rate": 3.6759293113954906e-05, "loss": 2.2713, "step": 5347000 }, { "epoch": 26.49, "learning_rate": 3.6758057004701675e-05, "loss": 2.2911, "step": 5347500 }, { "epoch": 26.5, "learning_rate": 3.6756823372621296e-05, "loss": 2.2539, "step": 5348000 }, { "epoch": 26.5, "learning_rate": 3.675558478619521e-05, "loss": 2.2742, "step": 5348500 }, { "epoch": 26.5, "learning_rate": 3.675434619976913e-05, "loss": 2.246, "step": 5349000 }, { "epoch": 26.5, "learning_rate": 3.675310761334305e-05, "loss": 2.2775, "step": 5349500 }, { "epoch": 26.51, "learning_rate": 3.6751871504089816e-05, "loss": 2.2543, "step": 5350000 }, { "epoch": 26.51, "learning_rate": 3.675063291766373e-05, "loss": 2.2928, "step": 5350500 }, { "epoch": 26.51, "learning_rate": 3.674939433123765e-05, "loss": 2.2686, "step": 5351000 }, { "epoch": 26.51, "learning_rate": 3.6748155744811566e-05, "loss": 2.2733, "step": 5351500 }, { "epoch": 26.52, "learning_rate": 3.674691715838548e-05, "loss": 2.2779, "step": 5352000 }, { "epoch": 26.52, "learning_rate": 3.6745678571959393e-05, "loss": 2.2912, "step": 5352500 }, { "epoch": 26.52, "learning_rate": 3.674444246270616e-05, "loss": 2.2687, "step": 5353000 }, { "epoch": 26.52, "learning_rate": 3.674320387628008e-05, "loss": 2.2662, "step": 5353500 }, { "epoch": 26.53, "learning_rate": 3.674196776702685e-05, "loss": 2.2396, "step": 5354000 }, { "epoch": 26.53, "learning_rate": 3.6740729180600765e-05, "loss": 2.2742, "step": 5354500 }, { "epoch": 26.53, "learning_rate": 3.673949059417468e-05, "loss": 2.2846, "step": 5355000 }, { "epoch": 26.53, "learning_rate": 3.67382520077486e-05, "loss": 2.2667, "step": 5355500 }, { "epoch": 26.54, "learning_rate": 3.6737013421322516e-05, "loss": 2.2765, "step": 5356000 }, { "epoch": 26.54, "learning_rate": 3.673577483489643e-05, "loss": 2.251, "step": 5356500 }, { "epoch": 26.54, "learning_rate": 3.673453624847035e-05, "loss": 2.2586, "step": 5357000 }, { "epoch": 26.54, "learning_rate": 3.6733297662044266e-05, "loss": 2.2858, "step": 5357500 }, { "epoch": 26.55, "learning_rate": 3.673205907561818e-05, "loss": 2.2807, "step": 5358000 }, { "epoch": 26.55, "learning_rate": 3.6730822966364945e-05, "loss": 2.2795, "step": 5358500 }, { "epoch": 26.55, "learning_rate": 3.672958437993886e-05, "loss": 2.2698, "step": 5359000 }, { "epoch": 26.55, "learning_rate": 3.672834579351278e-05, "loss": 2.2692, "step": 5359500 }, { "epoch": 26.56, "learning_rate": 3.6727107207086696e-05, "loss": 2.2697, "step": 5360000 }, { "epoch": 26.56, "learning_rate": 3.672586862066061e-05, "loss": 2.2799, "step": 5360500 }, { "epoch": 26.56, "learning_rate": 3.672463003423453e-05, "loss": 2.2751, "step": 5361000 }, { "epoch": 26.56, "learning_rate": 3.672339144780845e-05, "loss": 2.2689, "step": 5361500 }, { "epoch": 26.57, "learning_rate": 3.6722152861382364e-05, "loss": 2.2778, "step": 5362000 }, { "epoch": 26.57, "learning_rate": 3.672091675212913e-05, "loss": 2.2961, "step": 5362500 }, { "epoch": 26.57, "learning_rate": 3.671967816570305e-05, "loss": 2.2765, "step": 5363000 }, { "epoch": 26.57, "learning_rate": 3.6718439579276967e-05, "loss": 2.2756, "step": 5363500 }, { "epoch": 26.58, "learning_rate": 3.6717200992850884e-05, "loss": 2.3013, "step": 5364000 }, { "epoch": 26.58, "learning_rate": 3.67159624064248e-05, "loss": 2.2892, "step": 5364500 }, { "epoch": 26.58, "learning_rate": 3.671473125151727e-05, "loss": 2.3011, "step": 5365000 }, { "epoch": 26.58, "learning_rate": 3.671349266509119e-05, "loss": 2.3148, "step": 5365500 }, { "epoch": 26.59, "learning_rate": 3.671225407866511e-05, "loss": 2.2778, "step": 5366000 }, { "epoch": 26.59, "learning_rate": 3.671101549223902e-05, "loss": 2.2993, "step": 5366500 }, { "epoch": 26.59, "learning_rate": 3.6709776905812934e-05, "loss": 2.2779, "step": 5367000 }, { "epoch": 26.59, "learning_rate": 3.670853831938685e-05, "loss": 2.2696, "step": 5367500 }, { "epoch": 26.59, "learning_rate": 3.670729973296077e-05, "loss": 2.2576, "step": 5368000 }, { "epoch": 26.6, "learning_rate": 3.6706061146534685e-05, "loss": 2.2611, "step": 5368500 }, { "epoch": 26.6, "learning_rate": 3.6704825037281454e-05, "loss": 2.2756, "step": 5369000 }, { "epoch": 26.6, "learning_rate": 3.670358645085537e-05, "loss": 2.2703, "step": 5369500 }, { "epoch": 26.6, "learning_rate": 3.670234786442929e-05, "loss": 2.2862, "step": 5370000 }, { "epoch": 26.61, "learning_rate": 3.6701109278003204e-05, "loss": 2.3082, "step": 5370500 }, { "epoch": 26.61, "learning_rate": 3.669987069157712e-05, "loss": 2.2528, "step": 5371000 }, { "epoch": 26.61, "learning_rate": 3.669863210515104e-05, "loss": 2.2901, "step": 5371500 }, { "epoch": 26.61, "learning_rate": 3.6697393518724955e-05, "loss": 2.2658, "step": 5372000 }, { "epoch": 26.62, "learning_rate": 3.6696154932298865e-05, "loss": 2.2874, "step": 5372500 }, { "epoch": 26.62, "learning_rate": 3.669491634587278e-05, "loss": 2.2565, "step": 5373000 }, { "epoch": 26.62, "learning_rate": 3.66936777594467e-05, "loss": 2.2681, "step": 5373500 }, { "epoch": 26.62, "learning_rate": 3.669244412736632e-05, "loss": 2.2731, "step": 5374000 }, { "epoch": 26.63, "learning_rate": 3.669120554094024e-05, "loss": 2.3088, "step": 5374500 }, { "epoch": 26.63, "learning_rate": 3.6689966954514154e-05, "loss": 2.2603, "step": 5375000 }, { "epoch": 26.63, "learning_rate": 3.668872836808807e-05, "loss": 2.2773, "step": 5375500 }, { "epoch": 26.63, "learning_rate": 3.668748978166199e-05, "loss": 2.2609, "step": 5376000 }, { "epoch": 26.64, "learning_rate": 3.6686251195235904e-05, "loss": 2.2708, "step": 5376500 }, { "epoch": 26.64, "learning_rate": 3.668501260880982e-05, "loss": 2.2872, "step": 5377000 }, { "epoch": 26.64, "learning_rate": 3.668377402238374e-05, "loss": 2.2752, "step": 5377500 }, { "epoch": 26.64, "learning_rate": 3.6682535435957655e-05, "loss": 2.2825, "step": 5378000 }, { "epoch": 26.65, "learning_rate": 3.6681296849531565e-05, "loss": 2.2562, "step": 5378500 }, { "epoch": 26.65, "learning_rate": 3.668005826310548e-05, "loss": 2.2711, "step": 5379000 }, { "epoch": 26.65, "learning_rate": 3.667882215385226e-05, "loss": 2.2773, "step": 5379500 }, { "epoch": 26.65, "learning_rate": 3.667758356742617e-05, "loss": 2.2731, "step": 5380000 }, { "epoch": 26.66, "learning_rate": 3.6676344981000085e-05, "loss": 2.253, "step": 5380500 }, { "epoch": 26.66, "learning_rate": 3.6675106394574e-05, "loss": 2.2824, "step": 5381000 }, { "epoch": 26.66, "learning_rate": 3.667387028532077e-05, "loss": 2.2933, "step": 5381500 }, { "epoch": 26.66, "learning_rate": 3.667263417606754e-05, "loss": 2.2788, "step": 5382000 }, { "epoch": 26.67, "learning_rate": 3.6671395589641456e-05, "loss": 2.2608, "step": 5382500 }, { "epoch": 26.67, "learning_rate": 3.667015700321537e-05, "loss": 2.2762, "step": 5383000 }, { "epoch": 26.67, "learning_rate": 3.666891841678929e-05, "loss": 2.2692, "step": 5383500 }, { "epoch": 26.67, "learning_rate": 3.666767983036321e-05, "loss": 2.2757, "step": 5384000 }, { "epoch": 26.68, "learning_rate": 3.666644372110997e-05, "loss": 2.2969, "step": 5384500 }, { "epoch": 26.68, "learning_rate": 3.6665205134683886e-05, "loss": 2.2756, "step": 5385000 }, { "epoch": 26.68, "learning_rate": 3.66639665482578e-05, "loss": 2.3052, "step": 5385500 }, { "epoch": 26.68, "learning_rate": 3.666272796183172e-05, "loss": 2.2807, "step": 5386000 }, { "epoch": 26.69, "learning_rate": 3.666148937540564e-05, "loss": 2.2769, "step": 5386500 }, { "epoch": 26.69, "learning_rate": 3.6660250788979554e-05, "loss": 2.2956, "step": 5387000 }, { "epoch": 26.69, "learning_rate": 3.665901220255347e-05, "loss": 2.2773, "step": 5387500 }, { "epoch": 26.69, "learning_rate": 3.665777361612739e-05, "loss": 2.2996, "step": 5388000 }, { "epoch": 26.7, "learning_rate": 3.6656535029701305e-05, "loss": 2.2919, "step": 5388500 }, { "epoch": 26.7, "learning_rate": 3.665529644327522e-05, "loss": 2.2895, "step": 5389000 }, { "epoch": 26.7, "learning_rate": 3.665406033402199e-05, "loss": 2.2703, "step": 5389500 }, { "epoch": 26.7, "learning_rate": 3.665282174759591e-05, "loss": 2.2458, "step": 5390000 }, { "epoch": 26.71, "learning_rate": 3.6651583161169824e-05, "loss": 2.2799, "step": 5390500 }, { "epoch": 26.71, "learning_rate": 3.665034457474374e-05, "loss": 2.272, "step": 5391000 }, { "epoch": 26.71, "learning_rate": 3.664910598831766e-05, "loss": 2.2755, "step": 5391500 }, { "epoch": 26.71, "learning_rate": 3.6647867401891575e-05, "loss": 2.284, "step": 5392000 }, { "epoch": 26.72, "learning_rate": 3.664662881546549e-05, "loss": 2.2968, "step": 5392500 }, { "epoch": 26.72, "learning_rate": 3.664539022903941e-05, "loss": 2.2897, "step": 5393000 }, { "epoch": 26.72, "learning_rate": 3.664415164261332e-05, "loss": 2.2828, "step": 5393500 }, { "epoch": 26.72, "learning_rate": 3.6642913056187236e-05, "loss": 2.2667, "step": 5394000 }, { "epoch": 26.73, "learning_rate": 3.6641676946934005e-05, "loss": 2.2811, "step": 5394500 }, { "epoch": 26.73, "learning_rate": 3.6640440837680774e-05, "loss": 2.2899, "step": 5395000 }, { "epoch": 26.73, "learning_rate": 3.663920225125469e-05, "loss": 2.2946, "step": 5395500 }, { "epoch": 26.73, "learning_rate": 3.663796366482861e-05, "loss": 2.2743, "step": 5396000 }, { "epoch": 26.74, "learning_rate": 3.6636725078402524e-05, "loss": 2.2719, "step": 5396500 }, { "epoch": 26.74, "learning_rate": 3.663548649197644e-05, "loss": 2.2736, "step": 5397000 }, { "epoch": 26.74, "learning_rate": 3.663424790555036e-05, "loss": 2.2547, "step": 5397500 }, { "epoch": 26.74, "learning_rate": 3.6633009319124275e-05, "loss": 2.2961, "step": 5398000 }, { "epoch": 26.75, "learning_rate": 3.663177073269819e-05, "loss": 2.2816, "step": 5398500 }, { "epoch": 26.75, "learning_rate": 3.663053214627211e-05, "loss": 2.2523, "step": 5399000 }, { "epoch": 26.75, "learning_rate": 3.662929603701887e-05, "loss": 2.2643, "step": 5399500 }, { "epoch": 26.75, "learning_rate": 3.662805745059279e-05, "loss": 2.2791, "step": 5400000 }, { "epoch": 26.76, "learning_rate": 3.6626818864166705e-05, "loss": 2.2853, "step": 5400500 }, { "epoch": 26.76, "learning_rate": 3.662558027774062e-05, "loss": 2.282, "step": 5401000 }, { "epoch": 26.76, "learning_rate": 3.662434169131454e-05, "loss": 2.2665, "step": 5401500 }, { "epoch": 26.76, "learning_rate": 3.662310558206131e-05, "loss": 2.2855, "step": 5402000 }, { "epoch": 26.77, "learning_rate": 3.6621866995635225e-05, "loss": 2.2654, "step": 5402500 }, { "epoch": 26.77, "learning_rate": 3.662062840920914e-05, "loss": 2.2647, "step": 5403000 }, { "epoch": 26.77, "learning_rate": 3.661938982278306e-05, "loss": 2.2608, "step": 5403500 }, { "epoch": 26.77, "learning_rate": 3.661815371352982e-05, "loss": 2.2639, "step": 5404000 }, { "epoch": 26.78, "learning_rate": 3.661691512710374e-05, "loss": 2.274, "step": 5404500 }, { "epoch": 26.78, "learning_rate": 3.6615676540677654e-05, "loss": 2.2579, "step": 5405000 }, { "epoch": 26.78, "learning_rate": 3.661443795425157e-05, "loss": 2.2698, "step": 5405500 }, { "epoch": 26.78, "learning_rate": 3.661319936782549e-05, "loss": 2.2665, "step": 5406000 }, { "epoch": 26.79, "learning_rate": 3.6611960781399405e-05, "loss": 2.2833, "step": 5406500 }, { "epoch": 26.79, "learning_rate": 3.6610724672146174e-05, "loss": 2.2807, "step": 5407000 }, { "epoch": 26.79, "learning_rate": 3.660948608572009e-05, "loss": 2.2814, "step": 5407500 }, { "epoch": 26.79, "learning_rate": 3.660824749929401e-05, "loss": 2.2917, "step": 5408000 }, { "epoch": 26.8, "learning_rate": 3.6607008912867925e-05, "loss": 2.2824, "step": 5408500 }, { "epoch": 26.8, "learning_rate": 3.660577032644184e-05, "loss": 2.2577, "step": 5409000 }, { "epoch": 26.8, "learning_rate": 3.660453174001576e-05, "loss": 2.2902, "step": 5409500 }, { "epoch": 26.8, "learning_rate": 3.6603293153589675e-05, "loss": 2.2665, "step": 5410000 }, { "epoch": 26.81, "learning_rate": 3.660205456716359e-05, "loss": 2.2558, "step": 5410500 }, { "epoch": 26.81, "learning_rate": 3.660081598073751e-05, "loss": 2.291, "step": 5411000 }, { "epoch": 26.81, "learning_rate": 3.6599577394311426e-05, "loss": 2.2716, "step": 5411500 }, { "epoch": 26.81, "learning_rate": 3.659833880788534e-05, "loss": 2.286, "step": 5412000 }, { "epoch": 26.82, "learning_rate": 3.6597102698632105e-05, "loss": 2.2877, "step": 5412500 }, { "epoch": 26.82, "learning_rate": 3.659586411220602e-05, "loss": 2.272, "step": 5413000 }, { "epoch": 26.82, "learning_rate": 3.659462552577994e-05, "loss": 2.2849, "step": 5413500 }, { "epoch": 26.82, "learning_rate": 3.6593386939353856e-05, "loss": 2.2769, "step": 5414000 }, { "epoch": 26.83, "learning_rate": 3.659214835292777e-05, "loss": 2.2784, "step": 5414500 }, { "epoch": 26.83, "learning_rate": 3.659090976650169e-05, "loss": 2.2649, "step": 5415000 }, { "epoch": 26.83, "learning_rate": 3.658967365724846e-05, "loss": 2.2982, "step": 5415500 }, { "epoch": 26.83, "learning_rate": 3.658843754799523e-05, "loss": 2.2614, "step": 5416000 }, { "epoch": 26.84, "learning_rate": 3.6587198961569144e-05, "loss": 2.3042, "step": 5416500 }, { "epoch": 26.84, "learning_rate": 3.658596037514306e-05, "loss": 2.3011, "step": 5417000 }, { "epoch": 26.84, "learning_rate": 3.658472178871697e-05, "loss": 2.2909, "step": 5417500 }, { "epoch": 26.84, "learning_rate": 3.658348320229089e-05, "loss": 2.2738, "step": 5418000 }, { "epoch": 26.85, "learning_rate": 3.6582244615864805e-05, "loss": 2.2702, "step": 5418500 }, { "epoch": 26.85, "learning_rate": 3.658100602943872e-05, "loss": 2.2858, "step": 5419000 }, { "epoch": 26.85, "learning_rate": 3.657976744301264e-05, "loss": 2.2621, "step": 5419500 }, { "epoch": 26.85, "learning_rate": 3.6578528856586556e-05, "loss": 2.2872, "step": 5420000 }, { "epoch": 26.86, "learning_rate": 3.657729027016047e-05, "loss": 2.2968, "step": 5420500 }, { "epoch": 26.86, "learning_rate": 3.657605416090724e-05, "loss": 2.2733, "step": 5421000 }, { "epoch": 26.86, "learning_rate": 3.657481557448116e-05, "loss": 2.2552, "step": 5421500 }, { "epoch": 26.86, "learning_rate": 3.6573576988055076e-05, "loss": 2.2804, "step": 5422000 }, { "epoch": 26.86, "learning_rate": 3.657233840162899e-05, "loss": 2.2929, "step": 5422500 }, { "epoch": 26.87, "learning_rate": 3.657110229237576e-05, "loss": 2.2556, "step": 5423000 }, { "epoch": 26.87, "learning_rate": 3.656986370594968e-05, "loss": 2.2761, "step": 5423500 }, { "epoch": 26.87, "learning_rate": 3.656862511952359e-05, "loss": 2.289, "step": 5424000 }, { "epoch": 26.87, "learning_rate": 3.6567386533097505e-05, "loss": 2.2732, "step": 5424500 }, { "epoch": 26.88, "learning_rate": 3.6566150423844274e-05, "loss": 2.3058, "step": 5425000 }, { "epoch": 26.88, "learning_rate": 3.656491183741819e-05, "loss": 2.2575, "step": 5425500 }, { "epoch": 26.88, "learning_rate": 3.656367325099211e-05, "loss": 2.2856, "step": 5426000 }, { "epoch": 26.88, "learning_rate": 3.6562434664566025e-05, "loss": 2.2945, "step": 5426500 }, { "epoch": 26.89, "learning_rate": 3.656119607813994e-05, "loss": 2.2855, "step": 5427000 }, { "epoch": 26.89, "learning_rate": 3.655995749171386e-05, "loss": 2.2709, "step": 5427500 }, { "epoch": 26.89, "learning_rate": 3.655872138246063e-05, "loss": 2.2528, "step": 5428000 }, { "epoch": 26.89, "learning_rate": 3.6557485273207397e-05, "loss": 2.2848, "step": 5428500 }, { "epoch": 26.9, "learning_rate": 3.6556246686781313e-05, "loss": 2.2729, "step": 5429000 }, { "epoch": 26.9, "learning_rate": 3.6555008100355224e-05, "loss": 2.3043, "step": 5429500 }, { "epoch": 26.9, "learning_rate": 3.655376951392914e-05, "loss": 2.2512, "step": 5430000 }, { "epoch": 26.9, "learning_rate": 3.655253092750306e-05, "loss": 2.2953, "step": 5430500 }, { "epoch": 26.91, "learning_rate": 3.6551292341076974e-05, "loss": 2.2616, "step": 5431000 }, { "epoch": 26.91, "learning_rate": 3.655005375465089e-05, "loss": 2.2824, "step": 5431500 }, { "epoch": 26.91, "learning_rate": 3.654881516822481e-05, "loss": 2.2861, "step": 5432000 }, { "epoch": 26.91, "learning_rate": 3.6547576581798725e-05, "loss": 2.2852, "step": 5432500 }, { "epoch": 26.92, "learning_rate": 3.654633799537264e-05, "loss": 2.2819, "step": 5433000 }, { "epoch": 26.92, "learning_rate": 3.654509940894656e-05, "loss": 2.2683, "step": 5433500 }, { "epoch": 26.92, "learning_rate": 3.6543860822520476e-05, "loss": 2.2694, "step": 5434000 }, { "epoch": 26.92, "learning_rate": 3.654262223609439e-05, "loss": 2.2698, "step": 5434500 }, { "epoch": 26.93, "learning_rate": 3.654138364966831e-05, "loss": 2.2743, "step": 5435000 }, { "epoch": 26.93, "learning_rate": 3.654014506324223e-05, "loss": 2.2672, "step": 5435500 }, { "epoch": 26.93, "learning_rate": 3.6538906476816144e-05, "loss": 2.2739, "step": 5436000 }, { "epoch": 26.93, "learning_rate": 3.653767036756291e-05, "loss": 2.2586, "step": 5436500 }, { "epoch": 26.94, "learning_rate": 3.653643178113683e-05, "loss": 2.29, "step": 5437000 }, { "epoch": 26.94, "learning_rate": 3.653519319471074e-05, "loss": 2.2828, "step": 5437500 }, { "epoch": 26.94, "learning_rate": 3.6533954608284656e-05, "loss": 2.2699, "step": 5438000 }, { "epoch": 26.94, "learning_rate": 3.6532716021858573e-05, "loss": 2.2833, "step": 5438500 }, { "epoch": 26.95, "learning_rate": 3.653147743543249e-05, "loss": 2.2968, "step": 5439000 }, { "epoch": 26.95, "learning_rate": 3.653023884900641e-05, "loss": 2.2807, "step": 5439500 }, { "epoch": 26.95, "learning_rate": 3.6529000262580324e-05, "loss": 2.2711, "step": 5440000 }, { "epoch": 26.95, "learning_rate": 3.652776167615424e-05, "loss": 2.2559, "step": 5440500 }, { "epoch": 26.96, "learning_rate": 3.652652308972815e-05, "loss": 2.2752, "step": 5441000 }, { "epoch": 26.96, "learning_rate": 3.652528450330207e-05, "loss": 2.2879, "step": 5441500 }, { "epoch": 26.96, "learning_rate": 3.6524050871221696e-05, "loss": 2.2852, "step": 5442000 }, { "epoch": 26.96, "learning_rate": 3.652281228479561e-05, "loss": 2.2834, "step": 5442500 }, { "epoch": 26.97, "learning_rate": 3.652157369836953e-05, "loss": 2.2683, "step": 5443000 }, { "epoch": 26.97, "learning_rate": 3.6520335111943446e-05, "loss": 2.2996, "step": 5443500 }, { "epoch": 26.97, "learning_rate": 3.651909652551736e-05, "loss": 2.2578, "step": 5444000 }, { "epoch": 26.97, "learning_rate": 3.6517857939091274e-05, "loss": 2.2798, "step": 5444500 }, { "epoch": 26.98, "learning_rate": 3.651661935266519e-05, "loss": 2.287, "step": 5445000 }, { "epoch": 26.98, "learning_rate": 3.651538076623911e-05, "loss": 2.2635, "step": 5445500 }, { "epoch": 26.98, "learning_rate": 3.6514144656985876e-05, "loss": 2.2767, "step": 5446000 }, { "epoch": 26.98, "learning_rate": 3.651290607055979e-05, "loss": 2.2714, "step": 5446500 }, { "epoch": 26.99, "learning_rate": 3.651166748413371e-05, "loss": 2.2901, "step": 5447000 }, { "epoch": 26.99, "learning_rate": 3.651042889770763e-05, "loss": 2.2857, "step": 5447500 }, { "epoch": 26.99, "learning_rate": 3.6509190311281544e-05, "loss": 2.2839, "step": 5448000 }, { "epoch": 26.99, "learning_rate": 3.650795172485546e-05, "loss": 2.2809, "step": 5448500 }, { "epoch": 27.0, "learning_rate": 3.650671313842938e-05, "loss": 2.2747, "step": 5449000 }, { "epoch": 27.0, "learning_rate": 3.6505474552003295e-05, "loss": 2.291, "step": 5449500 }, { "epoch": 27.0, "eval_accuracy": 0.6562619972139041, "eval_accuracy_mlm": 0.6113849397655421, "eval_accuracy_nsp": 0.8681356610278516, "eval_loss": 2.3292908668518066, "eval_runtime": 146.2467, "eval_samples_per_second": 1743.349, "eval_steps_per_second": 72.644, "step": 5449761 }, { "epoch": 27.0, "learning_rate": 3.650423596557721e-05, "loss": 2.2555, "step": 5450000 }, { "epoch": 27.0, "learning_rate": 3.650299985632398e-05, "loss": 2.257, "step": 5450500 }, { "epoch": 27.01, "learning_rate": 3.6501766224243594e-05, "loss": 2.2329, "step": 5451000 }, { "epoch": 27.01, "learning_rate": 3.650052763781751e-05, "loss": 2.2794, "step": 5451500 }, { "epoch": 27.01, "learning_rate": 3.649928905139143e-05, "loss": 2.2362, "step": 5452000 }, { "epoch": 27.01, "learning_rate": 3.6498050464965345e-05, "loss": 2.2553, "step": 5452500 }, { "epoch": 27.02, "learning_rate": 3.649681187853926e-05, "loss": 2.2592, "step": 5453000 }, { "epoch": 27.02, "learning_rate": 3.649557329211318e-05, "loss": 2.2158, "step": 5453500 }, { "epoch": 27.02, "learning_rate": 3.6494334705687096e-05, "loss": 2.2312, "step": 5454000 }, { "epoch": 27.02, "learning_rate": 3.649309611926101e-05, "loss": 2.2288, "step": 5454500 }, { "epoch": 27.03, "learning_rate": 3.649185753283493e-05, "loss": 2.2151, "step": 5455000 }, { "epoch": 27.03, "learning_rate": 3.649061894640885e-05, "loss": 2.2561, "step": 5455500 }, { "epoch": 27.03, "learning_rate": 3.6489380359982764e-05, "loss": 2.2237, "step": 5456000 }, { "epoch": 27.03, "learning_rate": 3.648814177355668e-05, "loss": 2.2344, "step": 5456500 }, { "epoch": 27.04, "learning_rate": 3.64869031871306e-05, "loss": 2.2628, "step": 5457000 }, { "epoch": 27.04, "learning_rate": 3.648566707787736e-05, "loss": 2.2354, "step": 5457500 }, { "epoch": 27.04, "learning_rate": 3.6484428491451276e-05, "loss": 2.2272, "step": 5458000 }, { "epoch": 27.04, "learning_rate": 3.648318990502519e-05, "loss": 2.2534, "step": 5458500 }, { "epoch": 27.05, "learning_rate": 3.648195131859911e-05, "loss": 2.2329, "step": 5459000 }, { "epoch": 27.05, "learning_rate": 3.648071273217303e-05, "loss": 2.2415, "step": 5459500 }, { "epoch": 27.05, "learning_rate": 3.6479474145746944e-05, "loss": 2.2537, "step": 5460000 }, { "epoch": 27.05, "learning_rate": 3.647823803649371e-05, "loss": 2.2636, "step": 5460500 }, { "epoch": 27.06, "learning_rate": 3.647699945006763e-05, "loss": 2.2439, "step": 5461000 }, { "epoch": 27.06, "learning_rate": 3.647576086364155e-05, "loss": 2.264, "step": 5461500 }, { "epoch": 27.06, "learning_rate": 3.6474522277215464e-05, "loss": 2.2518, "step": 5462000 }, { "epoch": 27.06, "learning_rate": 3.6473286167962226e-05, "loss": 2.2465, "step": 5462500 }, { "epoch": 27.07, "learning_rate": 3.647204758153614e-05, "loss": 2.246, "step": 5463000 }, { "epoch": 27.07, "learning_rate": 3.647080899511006e-05, "loss": 2.2395, "step": 5463500 }, { "epoch": 27.07, "learning_rate": 3.6469570408683977e-05, "loss": 2.2537, "step": 5464000 }, { "epoch": 27.07, "learning_rate": 3.6468331822257893e-05, "loss": 2.2474, "step": 5464500 }, { "epoch": 27.08, "learning_rate": 3.646709323583181e-05, "loss": 2.2647, "step": 5465000 }, { "epoch": 27.08, "learning_rate": 3.646585712657858e-05, "loss": 2.2496, "step": 5465500 }, { "epoch": 27.08, "learning_rate": 3.646462101732535e-05, "loss": 2.2495, "step": 5466000 }, { "epoch": 27.08, "learning_rate": 3.6463382430899265e-05, "loss": 2.2468, "step": 5466500 }, { "epoch": 27.09, "learning_rate": 3.646214384447318e-05, "loss": 2.2772, "step": 5467000 }, { "epoch": 27.09, "learning_rate": 3.64609052580471e-05, "loss": 2.2372, "step": 5467500 }, { "epoch": 27.09, "learning_rate": 3.645966914879387e-05, "loss": 2.2775, "step": 5468000 }, { "epoch": 27.09, "learning_rate": 3.6458430562367785e-05, "loss": 2.2419, "step": 5468500 }, { "epoch": 27.1, "learning_rate": 3.64571919759417e-05, "loss": 2.2398, "step": 5469000 }, { "epoch": 27.1, "learning_rate": 3.645595586668847e-05, "loss": 2.2505, "step": 5469500 }, { "epoch": 27.1, "learning_rate": 3.645471728026239e-05, "loss": 2.259, "step": 5470000 }, { "epoch": 27.1, "learning_rate": 3.6453478693836304e-05, "loss": 2.2549, "step": 5470500 }, { "epoch": 27.11, "learning_rate": 3.645224010741022e-05, "loss": 2.256, "step": 5471000 }, { "epoch": 27.11, "learning_rate": 3.645100152098414e-05, "loss": 2.2535, "step": 5471500 }, { "epoch": 27.11, "learning_rate": 3.6449762934558055e-05, "loss": 2.2698, "step": 5472000 }, { "epoch": 27.11, "learning_rate": 3.6448524348131965e-05, "loss": 2.2286, "step": 5472500 }, { "epoch": 27.12, "learning_rate": 3.644728576170588e-05, "loss": 2.2582, "step": 5473000 }, { "epoch": 27.12, "learning_rate": 3.64460471752798e-05, "loss": 2.2486, "step": 5473500 }, { "epoch": 27.12, "learning_rate": 3.644481106602657e-05, "loss": 2.261, "step": 5474000 }, { "epoch": 27.12, "learning_rate": 3.6443572479600485e-05, "loss": 2.2363, "step": 5474500 }, { "epoch": 27.13, "learning_rate": 3.64423338931744e-05, "loss": 2.2753, "step": 5475000 }, { "epoch": 27.13, "learning_rate": 3.644109530674832e-05, "loss": 2.2233, "step": 5475500 }, { "epoch": 27.13, "learning_rate": 3.643985672032223e-05, "loss": 2.2671, "step": 5476000 }, { "epoch": 27.13, "learning_rate": 3.6438618133896146e-05, "loss": 2.2589, "step": 5476500 }, { "epoch": 27.13, "learning_rate": 3.643737954747006e-05, "loss": 2.2706, "step": 5477000 }, { "epoch": 27.14, "learning_rate": 3.643614096104398e-05, "loss": 2.247, "step": 5477500 }, { "epoch": 27.14, "learning_rate": 3.6434902374617896e-05, "loss": 2.2405, "step": 5478000 }, { "epoch": 27.14, "learning_rate": 3.643366378819181e-05, "loss": 2.2682, "step": 5478500 }, { "epoch": 27.14, "learning_rate": 3.643242520176573e-05, "loss": 2.2618, "step": 5479000 }, { "epoch": 27.15, "learning_rate": 3.643118661533965e-05, "loss": 2.2553, "step": 5479500 }, { "epoch": 27.15, "learning_rate": 3.6429948028913564e-05, "loss": 2.2585, "step": 5480000 }, { "epoch": 27.15, "learning_rate": 3.642870944248748e-05, "loss": 2.2591, "step": 5480500 }, { "epoch": 27.15, "learning_rate": 3.642747333323425e-05, "loss": 2.2141, "step": 5481000 }, { "epoch": 27.16, "learning_rate": 3.642623474680816e-05, "loss": 2.2618, "step": 5481500 }, { "epoch": 27.16, "learning_rate": 3.642499616038208e-05, "loss": 2.2731, "step": 5482000 }, { "epoch": 27.16, "learning_rate": 3.6423757573955994e-05, "loss": 2.2593, "step": 5482500 }, { "epoch": 27.16, "learning_rate": 3.642252146470276e-05, "loss": 2.2595, "step": 5483000 }, { "epoch": 27.17, "learning_rate": 3.642128287827668e-05, "loss": 2.2376, "step": 5483500 }, { "epoch": 27.17, "learning_rate": 3.6420044291850597e-05, "loss": 2.2437, "step": 5484000 }, { "epoch": 27.17, "learning_rate": 3.6418805705424513e-05, "loss": 2.2427, "step": 5484500 }, { "epoch": 27.17, "learning_rate": 3.641756711899843e-05, "loss": 2.2614, "step": 5485000 }, { "epoch": 27.18, "learning_rate": 3.641633348691805e-05, "loss": 2.2662, "step": 5485500 }, { "epoch": 27.18, "learning_rate": 3.641509490049197e-05, "loss": 2.232, "step": 5486000 }, { "epoch": 27.18, "learning_rate": 3.6413856314065885e-05, "loss": 2.2682, "step": 5486500 }, { "epoch": 27.18, "learning_rate": 3.64126177276398e-05, "loss": 2.2515, "step": 5487000 }, { "epoch": 27.19, "learning_rate": 3.641138161838657e-05, "loss": 2.2346, "step": 5487500 }, { "epoch": 27.19, "learning_rate": 3.641014303196049e-05, "loss": 2.2527, "step": 5488000 }, { "epoch": 27.19, "learning_rate": 3.6408904445534405e-05, "loss": 2.2485, "step": 5488500 }, { "epoch": 27.19, "learning_rate": 3.640766585910832e-05, "loss": 2.2585, "step": 5489000 }, { "epoch": 27.2, "learning_rate": 3.640642727268224e-05, "loss": 2.2364, "step": 5489500 }, { "epoch": 27.2, "learning_rate": 3.6405188686256155e-05, "loss": 2.257, "step": 5490000 }, { "epoch": 27.2, "learning_rate": 3.640395009983007e-05, "loss": 2.2554, "step": 5490500 }, { "epoch": 27.2, "learning_rate": 3.640271151340399e-05, "loss": 2.2422, "step": 5491000 }, { "epoch": 27.21, "learning_rate": 3.6401472926977906e-05, "loss": 2.2549, "step": 5491500 }, { "epoch": 27.21, "learning_rate": 3.640023681772467e-05, "loss": 2.2699, "step": 5492000 }, { "epoch": 27.21, "learning_rate": 3.6398998231298585e-05, "loss": 2.266, "step": 5492500 }, { "epoch": 27.21, "learning_rate": 3.63977596448725e-05, "loss": 2.2627, "step": 5493000 }, { "epoch": 27.22, "learning_rate": 3.639652105844642e-05, "loss": 2.2377, "step": 5493500 }, { "epoch": 27.22, "learning_rate": 3.6395282472020336e-05, "loss": 2.2849, "step": 5494000 }, { "epoch": 27.22, "learning_rate": 3.6394046362767105e-05, "loss": 2.2537, "step": 5494500 }, { "epoch": 27.22, "learning_rate": 3.6392810253513873e-05, "loss": 2.2768, "step": 5495000 }, { "epoch": 27.23, "learning_rate": 3.639157166708779e-05, "loss": 2.2535, "step": 5495500 }, { "epoch": 27.23, "learning_rate": 3.63903330806617e-05, "loss": 2.2404, "step": 5496000 }, { "epoch": 27.23, "learning_rate": 3.638909697140847e-05, "loss": 2.2792, "step": 5496500 }, { "epoch": 27.23, "learning_rate": 3.6387858384982386e-05, "loss": 2.2611, "step": 5497000 }, { "epoch": 27.24, "learning_rate": 3.63866197985563e-05, "loss": 2.2887, "step": 5497500 }, { "epoch": 27.24, "learning_rate": 3.638538121213022e-05, "loss": 2.2696, "step": 5498000 }, { "epoch": 27.24, "learning_rate": 3.638414262570414e-05, "loss": 2.2436, "step": 5498500 }, { "epoch": 27.24, "learning_rate": 3.6382904039278054e-05, "loss": 2.2483, "step": 5499000 }, { "epoch": 27.25, "learning_rate": 3.638166545285197e-05, "loss": 2.2444, "step": 5499500 }, { "epoch": 27.25, "learning_rate": 3.638042686642589e-05, "loss": 2.2416, "step": 5500000 }, { "epoch": 27.25, "learning_rate": 3.637919075717266e-05, "loss": 2.2547, "step": 5500500 }, { "epoch": 27.25, "learning_rate": 3.6377952170746574e-05, "loss": 2.2881, "step": 5501000 }, { "epoch": 27.26, "learning_rate": 3.637671358432049e-05, "loss": 2.2535, "step": 5501500 }, { "epoch": 27.26, "learning_rate": 3.637547747506725e-05, "loss": 2.2535, "step": 5502000 }, { "epoch": 27.26, "learning_rate": 3.637423888864117e-05, "loss": 2.2756, "step": 5502500 }, { "epoch": 27.26, "learning_rate": 3.6373000302215086e-05, "loss": 2.2454, "step": 5503000 }, { "epoch": 27.27, "learning_rate": 3.6371761715789e-05, "loss": 2.2552, "step": 5503500 }, { "epoch": 27.27, "learning_rate": 3.637052312936292e-05, "loss": 2.2726, "step": 5504000 }, { "epoch": 27.27, "learning_rate": 3.636928454293684e-05, "loss": 2.2531, "step": 5504500 }, { "epoch": 27.27, "learning_rate": 3.6368045956510754e-05, "loss": 2.2683, "step": 5505000 }, { "epoch": 27.28, "learning_rate": 3.636680737008467e-05, "loss": 2.2521, "step": 5505500 }, { "epoch": 27.28, "learning_rate": 3.636556878365859e-05, "loss": 2.25, "step": 5506000 }, { "epoch": 27.28, "learning_rate": 3.6364330197232505e-05, "loss": 2.2742, "step": 5506500 }, { "epoch": 27.28, "learning_rate": 3.636309161080642e-05, "loss": 2.261, "step": 5507000 }, { "epoch": 27.29, "learning_rate": 3.636185302438034e-05, "loss": 2.2644, "step": 5507500 }, { "epoch": 27.29, "learning_rate": 3.636061691512711e-05, "loss": 2.2741, "step": 5508000 }, { "epoch": 27.29, "learning_rate": 3.6359378328701024e-05, "loss": 2.2615, "step": 5508500 }, { "epoch": 27.29, "learning_rate": 3.6358142219447787e-05, "loss": 2.235, "step": 5509000 }, { "epoch": 27.3, "learning_rate": 3.6356903633021703e-05, "loss": 2.2733, "step": 5509500 }, { "epoch": 27.3, "learning_rate": 3.635566504659562e-05, "loss": 2.254, "step": 5510000 }, { "epoch": 27.3, "learning_rate": 3.635442646016954e-05, "loss": 2.2436, "step": 5510500 }, { "epoch": 27.3, "learning_rate": 3.6353187873743454e-05, "loss": 2.2736, "step": 5511000 }, { "epoch": 27.31, "learning_rate": 3.635194928731737e-05, "loss": 2.2374, "step": 5511500 }, { "epoch": 27.31, "learning_rate": 3.635071070089129e-05, "loss": 2.2574, "step": 5512000 }, { "epoch": 27.31, "learning_rate": 3.6349472114465205e-05, "loss": 2.253, "step": 5512500 }, { "epoch": 27.31, "learning_rate": 3.634823352803912e-05, "loss": 2.2539, "step": 5513000 }, { "epoch": 27.32, "learning_rate": 3.634699494161304e-05, "loss": 2.2727, "step": 5513500 }, { "epoch": 27.32, "learning_rate": 3.6345756355186956e-05, "loss": 2.2654, "step": 5514000 }, { "epoch": 27.32, "learning_rate": 3.634451776876087e-05, "loss": 2.2546, "step": 5514500 }, { "epoch": 27.32, "learning_rate": 3.634327918233479e-05, "loss": 2.2389, "step": 5515000 }, { "epoch": 27.33, "learning_rate": 3.6342040595908707e-05, "loss": 2.2643, "step": 5515500 }, { "epoch": 27.33, "learning_rate": 3.6340802009482624e-05, "loss": 2.2547, "step": 5516000 }, { "epoch": 27.33, "learning_rate": 3.633956342305654e-05, "loss": 2.2357, "step": 5516500 }, { "epoch": 27.33, "learning_rate": 3.633832483663045e-05, "loss": 2.2576, "step": 5517000 }, { "epoch": 27.34, "learning_rate": 3.633708625020437e-05, "loss": 2.2713, "step": 5517500 }, { "epoch": 27.34, "learning_rate": 3.633585261812399e-05, "loss": 2.2306, "step": 5518000 }, { "epoch": 27.34, "learning_rate": 3.6334614031697905e-05, "loss": 2.2457, "step": 5518500 }, { "epoch": 27.34, "learning_rate": 3.633337544527182e-05, "loss": 2.2603, "step": 5519000 }, { "epoch": 27.35, "learning_rate": 3.633213685884574e-05, "loss": 2.2669, "step": 5519500 }, { "epoch": 27.35, "learning_rate": 3.6330898272419656e-05, "loss": 2.2569, "step": 5520000 }, { "epoch": 27.35, "learning_rate": 3.632965968599357e-05, "loss": 2.2568, "step": 5520500 }, { "epoch": 27.35, "learning_rate": 3.632842109956749e-05, "loss": 2.2558, "step": 5521000 }, { "epoch": 27.36, "learning_rate": 3.632718251314141e-05, "loss": 2.2727, "step": 5521500 }, { "epoch": 27.36, "learning_rate": 3.6325943926715324e-05, "loss": 2.2767, "step": 5522000 }, { "epoch": 27.36, "learning_rate": 3.632470534028924e-05, "loss": 2.2559, "step": 5522500 }, { "epoch": 27.36, "learning_rate": 3.632346675386315e-05, "loss": 2.2577, "step": 5523000 }, { "epoch": 27.37, "learning_rate": 3.632223064460992e-05, "loss": 2.2861, "step": 5523500 }, { "epoch": 27.37, "learning_rate": 3.6320992058183836e-05, "loss": 2.256, "step": 5524000 }, { "epoch": 27.37, "learning_rate": 3.6319753471757753e-05, "loss": 2.2271, "step": 5524500 }, { "epoch": 27.37, "learning_rate": 3.631851488533167e-05, "loss": 2.261, "step": 5525000 }, { "epoch": 27.38, "learning_rate": 3.631727629890559e-05, "loss": 2.245, "step": 5525500 }, { "epoch": 27.38, "learning_rate": 3.63160377124795e-05, "loss": 2.2664, "step": 5526000 }, { "epoch": 27.38, "learning_rate": 3.6314799126053414e-05, "loss": 2.2493, "step": 5526500 }, { "epoch": 27.38, "learning_rate": 3.631356301680019e-05, "loss": 2.2382, "step": 5527000 }, { "epoch": 27.39, "learning_rate": 3.631232443037411e-05, "loss": 2.252, "step": 5527500 }, { "epoch": 27.39, "learning_rate": 3.6311085843948024e-05, "loss": 2.2755, "step": 5528000 }, { "epoch": 27.39, "learning_rate": 3.630984725752194e-05, "loss": 2.2891, "step": 5528500 }, { "epoch": 27.39, "learning_rate": 3.630861114826871e-05, "loss": 2.2629, "step": 5529000 }, { "epoch": 27.4, "learning_rate": 3.630737256184262e-05, "loss": 2.2718, "step": 5529500 }, { "epoch": 27.4, "learning_rate": 3.630613892976224e-05, "loss": 2.2516, "step": 5530000 }, { "epoch": 27.4, "learning_rate": 3.630490034333616e-05, "loss": 2.2641, "step": 5530500 }, { "epoch": 27.4, "learning_rate": 3.6303661756910074e-05, "loss": 2.2786, "step": 5531000 }, { "epoch": 27.4, "learning_rate": 3.630242317048399e-05, "loss": 2.2593, "step": 5531500 }, { "epoch": 27.41, "learning_rate": 3.630118458405791e-05, "loss": 2.2493, "step": 5532000 }, { "epoch": 27.41, "learning_rate": 3.6299945997631825e-05, "loss": 2.2607, "step": 5532500 }, { "epoch": 27.41, "learning_rate": 3.629870741120574e-05, "loss": 2.2367, "step": 5533000 }, { "epoch": 27.41, "learning_rate": 3.629746882477966e-05, "loss": 2.2638, "step": 5533500 }, { "epoch": 27.42, "learning_rate": 3.6296230238353576e-05, "loss": 2.2509, "step": 5534000 }, { "epoch": 27.42, "learning_rate": 3.629499165192749e-05, "loss": 2.2706, "step": 5534500 }, { "epoch": 27.42, "learning_rate": 3.6293755542674255e-05, "loss": 2.2787, "step": 5535000 }, { "epoch": 27.42, "learning_rate": 3.629251695624817e-05, "loss": 2.254, "step": 5535500 }, { "epoch": 27.43, "learning_rate": 3.629127836982209e-05, "loss": 2.2543, "step": 5536000 }, { "epoch": 27.43, "learning_rate": 3.629004226056886e-05, "loss": 2.2467, "step": 5536500 }, { "epoch": 27.43, "learning_rate": 3.6288803674142774e-05, "loss": 2.2442, "step": 5537000 }, { "epoch": 27.43, "learning_rate": 3.628756508771669e-05, "loss": 2.2459, "step": 5537500 }, { "epoch": 27.44, "learning_rate": 3.628632897846346e-05, "loss": 2.285, "step": 5538000 }, { "epoch": 27.44, "learning_rate": 3.628509039203738e-05, "loss": 2.2431, "step": 5538500 }, { "epoch": 27.44, "learning_rate": 3.6283851805611294e-05, "loss": 2.2626, "step": 5539000 }, { "epoch": 27.44, "learning_rate": 3.628261321918521e-05, "loss": 2.2379, "step": 5539500 }, { "epoch": 27.45, "learning_rate": 3.628137710993198e-05, "loss": 2.2729, "step": 5540000 }, { "epoch": 27.45, "learning_rate": 3.6280138523505897e-05, "loss": 2.2661, "step": 5540500 }, { "epoch": 27.45, "learning_rate": 3.6278899937079814e-05, "loss": 2.2461, "step": 5541000 }, { "epoch": 27.45, "learning_rate": 3.627766135065373e-05, "loss": 2.2775, "step": 5541500 }, { "epoch": 27.46, "learning_rate": 3.627642276422765e-05, "loss": 2.2716, "step": 5542000 }, { "epoch": 27.46, "learning_rate": 3.627518417780156e-05, "loss": 2.2505, "step": 5542500 }, { "epoch": 27.46, "learning_rate": 3.6273945591375474e-05, "loss": 2.2486, "step": 5543000 }, { "epoch": 27.46, "learning_rate": 3.627270700494939e-05, "loss": 2.2634, "step": 5543500 }, { "epoch": 27.47, "learning_rate": 3.627146841852331e-05, "loss": 2.2735, "step": 5544000 }, { "epoch": 27.47, "learning_rate": 3.6270229832097225e-05, "loss": 2.2689, "step": 5544500 }, { "epoch": 27.47, "learning_rate": 3.626899124567114e-05, "loss": 2.2402, "step": 5545000 }, { "epoch": 27.47, "learning_rate": 3.626775265924506e-05, "loss": 2.2754, "step": 5545500 }, { "epoch": 27.48, "learning_rate": 3.6266514072818976e-05, "loss": 2.2596, "step": 5546000 }, { "epoch": 27.48, "learning_rate": 3.626527548639289e-05, "loss": 2.2925, "step": 5546500 }, { "epoch": 27.48, "learning_rate": 3.626403689996681e-05, "loss": 2.2827, "step": 5547000 }, { "epoch": 27.48, "learning_rate": 3.626279831354073e-05, "loss": 2.2446, "step": 5547500 }, { "epoch": 27.49, "learning_rate": 3.6261559727114644e-05, "loss": 2.2501, "step": 5548000 }, { "epoch": 27.49, "learning_rate": 3.626032114068856e-05, "loss": 2.2617, "step": 5548500 }, { "epoch": 27.49, "learning_rate": 3.625908255426248e-05, "loss": 2.2591, "step": 5549000 }, { "epoch": 27.49, "learning_rate": 3.6257843967836395e-05, "loss": 2.2573, "step": 5549500 }, { "epoch": 27.5, "learning_rate": 3.6256605381410305e-05, "loss": 2.2601, "step": 5550000 }, { "epoch": 27.5, "learning_rate": 3.625536679498422e-05, "loss": 2.2669, "step": 5550500 }, { "epoch": 27.5, "learning_rate": 3.625412820855814e-05, "loss": 2.2688, "step": 5551000 }, { "epoch": 27.5, "learning_rate": 3.625289209930491e-05, "loss": 2.256, "step": 5551500 }, { "epoch": 27.51, "learning_rate": 3.6251653512878824e-05, "loss": 2.2706, "step": 5552000 }, { "epoch": 27.51, "learning_rate": 3.625041492645274e-05, "loss": 2.2595, "step": 5552500 }, { "epoch": 27.51, "learning_rate": 3.624917634002666e-05, "loss": 2.271, "step": 5553000 }, { "epoch": 27.51, "learning_rate": 3.6247937753600575e-05, "loss": 2.2814, "step": 5553500 }, { "epoch": 27.52, "learning_rate": 3.624669916717449e-05, "loss": 2.277, "step": 5554000 }, { "epoch": 27.52, "learning_rate": 3.62454605807484e-05, "loss": 2.2732, "step": 5554500 }, { "epoch": 27.52, "learning_rate": 3.624422199432232e-05, "loss": 2.2527, "step": 5555000 }, { "epoch": 27.52, "learning_rate": 3.624298836224194e-05, "loss": 2.2457, "step": 5555500 }, { "epoch": 27.53, "learning_rate": 3.624174977581586e-05, "loss": 2.2607, "step": 5556000 }, { "epoch": 27.53, "learning_rate": 3.6240511189389774e-05, "loss": 2.2696, "step": 5556500 }, { "epoch": 27.53, "learning_rate": 3.623927260296369e-05, "loss": 2.2735, "step": 5557000 }, { "epoch": 27.53, "learning_rate": 3.623803401653761e-05, "loss": 2.2682, "step": 5557500 }, { "epoch": 27.54, "learning_rate": 3.6236795430111524e-05, "loss": 2.2556, "step": 5558000 }, { "epoch": 27.54, "learning_rate": 3.623555684368544e-05, "loss": 2.263, "step": 5558500 }, { "epoch": 27.54, "learning_rate": 3.623432073443221e-05, "loss": 2.2585, "step": 5559000 }, { "epoch": 27.54, "learning_rate": 3.623308214800613e-05, "loss": 2.2543, "step": 5559500 }, { "epoch": 27.55, "learning_rate": 3.6231843561580044e-05, "loss": 2.2579, "step": 5560000 }, { "epoch": 27.55, "learning_rate": 3.623060497515396e-05, "loss": 2.2519, "step": 5560500 }, { "epoch": 27.55, "learning_rate": 3.622936638872788e-05, "loss": 2.2483, "step": 5561000 }, { "epoch": 27.55, "learning_rate": 3.6228127802301795e-05, "loss": 2.2644, "step": 5561500 }, { "epoch": 27.56, "learning_rate": 3.622688921587571e-05, "loss": 2.2766, "step": 5562000 }, { "epoch": 27.56, "learning_rate": 3.622565062944963e-05, "loss": 2.2594, "step": 5562500 }, { "epoch": 27.56, "learning_rate": 3.6224412043023546e-05, "loss": 2.2581, "step": 5563000 }, { "epoch": 27.56, "learning_rate": 3.6223173456597456e-05, "loss": 2.2555, "step": 5563500 }, { "epoch": 27.57, "learning_rate": 3.6221937347344225e-05, "loss": 2.2705, "step": 5564000 }, { "epoch": 27.57, "learning_rate": 3.622069876091814e-05, "loss": 2.2884, "step": 5564500 }, { "epoch": 27.57, "learning_rate": 3.621946017449206e-05, "loss": 2.2761, "step": 5565000 }, { "epoch": 27.57, "learning_rate": 3.6218221588065975e-05, "loss": 2.2661, "step": 5565500 }, { "epoch": 27.58, "learning_rate": 3.621698300163989e-05, "loss": 2.2659, "step": 5566000 }, { "epoch": 27.58, "learning_rate": 3.621574441521381e-05, "loss": 2.2419, "step": 5566500 }, { "epoch": 27.58, "learning_rate": 3.621450582878772e-05, "loss": 2.2708, "step": 5567000 }, { "epoch": 27.58, "learning_rate": 3.6213269719534495e-05, "loss": 2.2641, "step": 5567500 }, { "epoch": 27.59, "learning_rate": 3.621203113310841e-05, "loss": 2.2466, "step": 5568000 }, { "epoch": 27.59, "learning_rate": 3.621079254668233e-05, "loss": 2.2507, "step": 5568500 }, { "epoch": 27.59, "learning_rate": 3.6209553960256246e-05, "loss": 2.2843, "step": 5569000 }, { "epoch": 27.59, "learning_rate": 3.620831785100301e-05, "loss": 2.2604, "step": 5569500 }, { "epoch": 27.6, "learning_rate": 3.6207079264576925e-05, "loss": 2.2593, "step": 5570000 }, { "epoch": 27.6, "learning_rate": 3.620584067815084e-05, "loss": 2.2368, "step": 5570500 }, { "epoch": 27.6, "learning_rate": 3.620460209172476e-05, "loss": 2.2762, "step": 5571000 }, { "epoch": 27.6, "learning_rate": 3.6203363505298675e-05, "loss": 2.2359, "step": 5571500 }, { "epoch": 27.61, "learning_rate": 3.6202127396045444e-05, "loss": 2.2649, "step": 5572000 }, { "epoch": 27.61, "learning_rate": 3.6200893763965065e-05, "loss": 2.2789, "step": 5572500 }, { "epoch": 27.61, "learning_rate": 3.619965517753898e-05, "loss": 2.2694, "step": 5573000 }, { "epoch": 27.61, "learning_rate": 3.61984165911129e-05, "loss": 2.3009, "step": 5573500 }, { "epoch": 27.62, "learning_rate": 3.619717800468681e-05, "loss": 2.2679, "step": 5574000 }, { "epoch": 27.62, "learning_rate": 3.6195939418260726e-05, "loss": 2.2534, "step": 5574500 }, { "epoch": 27.62, "learning_rate": 3.619470083183464e-05, "loss": 2.2697, "step": 5575000 }, { "epoch": 27.62, "learning_rate": 3.619346224540856e-05, "loss": 2.2451, "step": 5575500 }, { "epoch": 27.63, "learning_rate": 3.619222365898248e-05, "loss": 2.2733, "step": 5576000 }, { "epoch": 27.63, "learning_rate": 3.6190985072556394e-05, "loss": 2.2543, "step": 5576500 }, { "epoch": 27.63, "learning_rate": 3.618974896330316e-05, "loss": 2.2541, "step": 5577000 }, { "epoch": 27.63, "learning_rate": 3.618851037687708e-05, "loss": 2.2615, "step": 5577500 }, { "epoch": 27.64, "learning_rate": 3.6187271790450996e-05, "loss": 2.2746, "step": 5578000 }, { "epoch": 27.64, "learning_rate": 3.6186035681197765e-05, "loss": 2.2567, "step": 5578500 }, { "epoch": 27.64, "learning_rate": 3.6184799571944534e-05, "loss": 2.2616, "step": 5579000 }, { "epoch": 27.64, "learning_rate": 3.618356098551845e-05, "loss": 2.2827, "step": 5579500 }, { "epoch": 27.65, "learning_rate": 3.618232239909237e-05, "loss": 2.2811, "step": 5580000 }, { "epoch": 27.65, "learning_rate": 3.6181083812666285e-05, "loss": 2.2912, "step": 5580500 }, { "epoch": 27.65, "learning_rate": 3.61798452262402e-05, "loss": 2.31, "step": 5581000 }, { "epoch": 27.65, "learning_rate": 3.617860663981412e-05, "loss": 2.2568, "step": 5581500 }, { "epoch": 27.66, "learning_rate": 3.6177368053388035e-05, "loss": 2.273, "step": 5582000 }, { "epoch": 27.66, "learning_rate": 3.617612946696195e-05, "loss": 2.2506, "step": 5582500 }, { "epoch": 27.66, "learning_rate": 3.617489088053586e-05, "loss": 2.2841, "step": 5583000 }, { "epoch": 27.66, "learning_rate": 3.617365477128263e-05, "loss": 2.2624, "step": 5583500 }, { "epoch": 27.67, "learning_rate": 3.617241618485655e-05, "loss": 2.2689, "step": 5584000 }, { "epoch": 27.67, "learning_rate": 3.6171177598430465e-05, "loss": 2.262, "step": 5584500 }, { "epoch": 27.67, "learning_rate": 3.616993901200438e-05, "loss": 2.2513, "step": 5585000 }, { "epoch": 27.67, "learning_rate": 3.61687004255783e-05, "loss": 2.2472, "step": 5585500 }, { "epoch": 27.67, "learning_rate": 3.6167461839152216e-05, "loss": 2.2559, "step": 5586000 }, { "epoch": 27.68, "learning_rate": 3.6166223252726126e-05, "loss": 2.2452, "step": 5586500 }, { "epoch": 27.68, "learning_rate": 3.616498466630004e-05, "loss": 2.2461, "step": 5587000 }, { "epoch": 27.68, "learning_rate": 3.616374607987396e-05, "loss": 2.2635, "step": 5587500 }, { "epoch": 27.68, "learning_rate": 3.616251244779358e-05, "loss": 2.2708, "step": 5588000 }, { "epoch": 27.69, "learning_rate": 3.61612738613675e-05, "loss": 2.2443, "step": 5588500 }, { "epoch": 27.69, "learning_rate": 3.6160035274941415e-05, "loss": 2.2791, "step": 5589000 }, { "epoch": 27.69, "learning_rate": 3.615879668851533e-05, "loss": 2.2916, "step": 5589500 }, { "epoch": 27.69, "learning_rate": 3.615755810208925e-05, "loss": 2.2805, "step": 5590000 }, { "epoch": 27.7, "learning_rate": 3.6156319515663165e-05, "loss": 2.2724, "step": 5590500 }, { "epoch": 27.7, "learning_rate": 3.615508092923708e-05, "loss": 2.2532, "step": 5591000 }, { "epoch": 27.7, "learning_rate": 3.6153842342811e-05, "loss": 2.2645, "step": 5591500 }, { "epoch": 27.7, "learning_rate": 3.6152603756384916e-05, "loss": 2.264, "step": 5592000 }, { "epoch": 27.71, "learning_rate": 3.6151365169958826e-05, "loss": 2.2729, "step": 5592500 }, { "epoch": 27.71, "learning_rate": 3.615012658353274e-05, "loss": 2.3001, "step": 5593000 }, { "epoch": 27.71, "learning_rate": 3.614888799710666e-05, "loss": 2.2556, "step": 5593500 }, { "epoch": 27.71, "learning_rate": 3.6147651887853436e-05, "loss": 2.2536, "step": 5594000 }, { "epoch": 27.72, "learning_rate": 3.614641330142735e-05, "loss": 2.2779, "step": 5594500 }, { "epoch": 27.72, "learning_rate": 3.614517471500127e-05, "loss": 2.2704, "step": 5595000 }, { "epoch": 27.72, "learning_rate": 3.614393612857518e-05, "loss": 2.2851, "step": 5595500 }, { "epoch": 27.72, "learning_rate": 3.6142697542149097e-05, "loss": 2.2879, "step": 5596000 }, { "epoch": 27.73, "learning_rate": 3.6141461432895865e-05, "loss": 2.2571, "step": 5596500 }, { "epoch": 27.73, "learning_rate": 3.614022284646978e-05, "loss": 2.2481, "step": 5597000 }, { "epoch": 27.73, "learning_rate": 3.61389842600437e-05, "loss": 2.2497, "step": 5597500 }, { "epoch": 27.73, "learning_rate": 3.6137745673617616e-05, "loss": 2.2729, "step": 5598000 }, { "epoch": 27.74, "learning_rate": 3.6136509564364385e-05, "loss": 2.2782, "step": 5598500 }, { "epoch": 27.74, "learning_rate": 3.61352709779383e-05, "loss": 2.2591, "step": 5599000 }, { "epoch": 27.74, "learning_rate": 3.613403239151222e-05, "loss": 2.2868, "step": 5599500 }, { "epoch": 27.74, "learning_rate": 3.6132793805086136e-05, "loss": 2.2517, "step": 5600000 }, { "epoch": 27.75, "learning_rate": 3.613155521866005e-05, "loss": 2.2596, "step": 5600500 }, { "epoch": 27.75, "learning_rate": 3.613031663223397e-05, "loss": 2.2664, "step": 5601000 }, { "epoch": 27.75, "learning_rate": 3.6129078045807887e-05, "loss": 2.2664, "step": 5601500 }, { "epoch": 27.75, "learning_rate": 3.61278394593818e-05, "loss": 2.2545, "step": 5602000 }, { "epoch": 27.76, "learning_rate": 3.6126600872955714e-05, "loss": 2.2648, "step": 5602500 }, { "epoch": 27.76, "learning_rate": 3.612536476370248e-05, "loss": 2.2745, "step": 5603000 }, { "epoch": 27.76, "learning_rate": 3.612412865444925e-05, "loss": 2.2538, "step": 5603500 }, { "epoch": 27.76, "learning_rate": 3.612289006802317e-05, "loss": 2.2435, "step": 5604000 }, { "epoch": 27.77, "learning_rate": 3.6121651481597085e-05, "loss": 2.2784, "step": 5604500 }, { "epoch": 27.77, "learning_rate": 3.6120412895171e-05, "loss": 2.2727, "step": 5605000 }, { "epoch": 27.77, "learning_rate": 3.611917430874492e-05, "loss": 2.2786, "step": 5605500 }, { "epoch": 27.77, "learning_rate": 3.6117935722318836e-05, "loss": 2.2708, "step": 5606000 }, { "epoch": 27.78, "learning_rate": 3.6116699613065605e-05, "loss": 2.268, "step": 5606500 }, { "epoch": 27.78, "learning_rate": 3.611546102663952e-05, "loss": 2.2608, "step": 5607000 }, { "epoch": 27.78, "learning_rate": 3.611422244021344e-05, "loss": 2.2927, "step": 5607500 }, { "epoch": 27.78, "learning_rate": 3.6112983853787356e-05, "loss": 2.2539, "step": 5608000 }, { "epoch": 27.79, "learning_rate": 3.6111745267361266e-05, "loss": 2.2372, "step": 5608500 }, { "epoch": 27.79, "learning_rate": 3.611050668093518e-05, "loss": 2.2446, "step": 5609000 }, { "epoch": 27.79, "learning_rate": 3.61092680945091e-05, "loss": 2.2775, "step": 5609500 }, { "epoch": 27.79, "learning_rate": 3.610803446242872e-05, "loss": 2.2643, "step": 5610000 }, { "epoch": 27.8, "learning_rate": 3.610679587600264e-05, "loss": 2.2885, "step": 5610500 }, { "epoch": 27.8, "learning_rate": 3.6105557289576554e-05, "loss": 2.2784, "step": 5611000 }, { "epoch": 27.8, "learning_rate": 3.610431870315047e-05, "loss": 2.2686, "step": 5611500 }, { "epoch": 27.8, "learning_rate": 3.610308011672439e-05, "loss": 2.2745, "step": 5612000 }, { "epoch": 27.81, "learning_rate": 3.6101841530298305e-05, "loss": 2.2697, "step": 5612500 }, { "epoch": 27.81, "learning_rate": 3.610060294387222e-05, "loss": 2.2637, "step": 5613000 }, { "epoch": 27.81, "learning_rate": 3.609936435744614e-05, "loss": 2.2522, "step": 5613500 }, { "epoch": 27.81, "learning_rate": 3.6098125771020056e-05, "loss": 2.2637, "step": 5614000 }, { "epoch": 27.82, "learning_rate": 3.609688718459397e-05, "loss": 2.275, "step": 5614500 }, { "epoch": 27.82, "learning_rate": 3.6095653552513586e-05, "loss": 2.2715, "step": 5615000 }, { "epoch": 27.82, "learning_rate": 3.6094414966087503e-05, "loss": 2.2733, "step": 5615500 }, { "epoch": 27.82, "learning_rate": 3.609317637966142e-05, "loss": 2.2502, "step": 5616000 }, { "epoch": 27.83, "learning_rate": 3.609193779323534e-05, "loss": 2.2829, "step": 5616500 }, { "epoch": 27.83, "learning_rate": 3.6090699206809254e-05, "loss": 2.2659, "step": 5617000 }, { "epoch": 27.83, "learning_rate": 3.608946062038317e-05, "loss": 2.2579, "step": 5617500 }, { "epoch": 27.83, "learning_rate": 3.608822203395709e-05, "loss": 2.2465, "step": 5618000 }, { "epoch": 27.84, "learning_rate": 3.6086983447531005e-05, "loss": 2.2796, "step": 5618500 }, { "epoch": 27.84, "learning_rate": 3.608574486110492e-05, "loss": 2.2645, "step": 5619000 }, { "epoch": 27.84, "learning_rate": 3.608450627467884e-05, "loss": 2.2538, "step": 5619500 }, { "epoch": 27.84, "learning_rate": 3.6083267688252756e-05, "loss": 2.2571, "step": 5620000 }, { "epoch": 27.85, "learning_rate": 3.608202910182667e-05, "loss": 2.2596, "step": 5620500 }, { "epoch": 27.85, "learning_rate": 3.608079051540059e-05, "loss": 2.2624, "step": 5621000 }, { "epoch": 27.85, "learning_rate": 3.6079551928974507e-05, "loss": 2.2717, "step": 5621500 }, { "epoch": 27.85, "learning_rate": 3.607831829689412e-05, "loss": 2.2788, "step": 5622000 }, { "epoch": 27.86, "learning_rate": 3.607707971046804e-05, "loss": 2.2695, "step": 5622500 }, { "epoch": 27.86, "learning_rate": 3.6075841124041954e-05, "loss": 2.2771, "step": 5623000 }, { "epoch": 27.86, "learning_rate": 3.607460253761587e-05, "loss": 2.2837, "step": 5623500 }, { "epoch": 27.86, "learning_rate": 3.607336395118979e-05, "loss": 2.2757, "step": 5624000 }, { "epoch": 27.87, "learning_rate": 3.6072125364763705e-05, "loss": 2.2864, "step": 5624500 }, { "epoch": 27.87, "learning_rate": 3.607088677833762e-05, "loss": 2.2869, "step": 5625000 }, { "epoch": 27.87, "learning_rate": 3.606964819191154e-05, "loss": 2.2667, "step": 5625500 }, { "epoch": 27.87, "learning_rate": 3.6068409605485456e-05, "loss": 2.2932, "step": 5626000 }, { "epoch": 27.88, "learning_rate": 3.606717349623222e-05, "loss": 2.2459, "step": 5626500 }, { "epoch": 27.88, "learning_rate": 3.6065934909806135e-05, "loss": 2.2514, "step": 5627000 }, { "epoch": 27.88, "learning_rate": 3.606469632338005e-05, "loss": 2.2673, "step": 5627500 }, { "epoch": 27.88, "learning_rate": 3.606345773695397e-05, "loss": 2.2727, "step": 5628000 }, { "epoch": 27.89, "learning_rate": 3.6062219150527886e-05, "loss": 2.2612, "step": 5628500 }, { "epoch": 27.89, "learning_rate": 3.6060983041274654e-05, "loss": 2.2404, "step": 5629000 }, { "epoch": 27.89, "learning_rate": 3.605974445484857e-05, "loss": 2.2698, "step": 5629500 }, { "epoch": 27.89, "learning_rate": 3.605850834559534e-05, "loss": 2.2836, "step": 5630000 }, { "epoch": 27.9, "learning_rate": 3.605726975916926e-05, "loss": 2.2787, "step": 5630500 }, { "epoch": 27.9, "learning_rate": 3.6056031172743174e-05, "loss": 2.2894, "step": 5631000 }, { "epoch": 27.9, "learning_rate": 3.605479506348994e-05, "loss": 2.249, "step": 5631500 }, { "epoch": 27.9, "learning_rate": 3.605355647706386e-05, "loss": 2.2716, "step": 5632000 }, { "epoch": 27.91, "learning_rate": 3.605231789063778e-05, "loss": 2.2512, "step": 5632500 }, { "epoch": 27.91, "learning_rate": 3.6051079304211694e-05, "loss": 2.2884, "step": 5633000 }, { "epoch": 27.91, "learning_rate": 3.604984071778561e-05, "loss": 2.2531, "step": 5633500 }, { "epoch": 27.91, "learning_rate": 3.604860213135952e-05, "loss": 2.2674, "step": 5634000 }, { "epoch": 27.92, "learning_rate": 3.604736354493344e-05, "loss": 2.2461, "step": 5634500 }, { "epoch": 27.92, "learning_rate": 3.604612743568021e-05, "loss": 2.2815, "step": 5635000 }, { "epoch": 27.92, "learning_rate": 3.604488884925413e-05, "loss": 2.2463, "step": 5635500 }, { "epoch": 27.92, "learning_rate": 3.604365026282804e-05, "loss": 2.2841, "step": 5636000 }, { "epoch": 27.93, "learning_rate": 3.604241167640196e-05, "loss": 2.2488, "step": 5636500 }, { "epoch": 27.93, "learning_rate": 3.6041173089975874e-05, "loss": 2.2632, "step": 5637000 }, { "epoch": 27.93, "learning_rate": 3.603993698072264e-05, "loss": 2.2737, "step": 5637500 }, { "epoch": 27.93, "learning_rate": 3.603869839429656e-05, "loss": 2.2868, "step": 5638000 }, { "epoch": 27.94, "learning_rate": 3.603745980787048e-05, "loss": 2.244, "step": 5638500 }, { "epoch": 27.94, "learning_rate": 3.6036221221444394e-05, "loss": 2.2787, "step": 5639000 }, { "epoch": 27.94, "learning_rate": 3.603498263501831e-05, "loss": 2.2593, "step": 5639500 }, { "epoch": 27.94, "learning_rate": 3.603374404859222e-05, "loss": 2.2728, "step": 5640000 }, { "epoch": 27.94, "learning_rate": 3.603250546216614e-05, "loss": 2.2731, "step": 5640500 }, { "epoch": 27.95, "learning_rate": 3.6031266875740055e-05, "loss": 2.2936, "step": 5641000 }, { "epoch": 27.95, "learning_rate": 3.603003076648683e-05, "loss": 2.2646, "step": 5641500 }, { "epoch": 27.95, "learning_rate": 3.602879218006075e-05, "loss": 2.3049, "step": 5642000 }, { "epoch": 27.95, "learning_rate": 3.6027553593634664e-05, "loss": 2.2743, "step": 5642500 }, { "epoch": 27.96, "learning_rate": 3.6026315007208574e-05, "loss": 2.2442, "step": 5643000 }, { "epoch": 27.96, "learning_rate": 3.602507642078249e-05, "loss": 2.2908, "step": 5643500 }, { "epoch": 27.96, "learning_rate": 3.602383783435641e-05, "loss": 2.2864, "step": 5644000 }, { "epoch": 27.96, "learning_rate": 3.6022599247930325e-05, "loss": 2.2823, "step": 5644500 }, { "epoch": 27.97, "learning_rate": 3.602136066150424e-05, "loss": 2.2878, "step": 5645000 }, { "epoch": 27.97, "learning_rate": 3.602012207507815e-05, "loss": 2.2742, "step": 5645500 }, { "epoch": 27.97, "learning_rate": 3.601888844299778e-05, "loss": 2.2998, "step": 5646000 }, { "epoch": 27.97, "learning_rate": 3.6017649856571697e-05, "loss": 2.2767, "step": 5646500 }, { "epoch": 27.98, "learning_rate": 3.6016411270145613e-05, "loss": 2.264, "step": 5647000 }, { "epoch": 27.98, "learning_rate": 3.601517268371953e-05, "loss": 2.2855, "step": 5647500 }, { "epoch": 27.98, "learning_rate": 3.601393409729345e-05, "loss": 2.2364, "step": 5648000 }, { "epoch": 27.98, "learning_rate": 3.6012695510867364e-05, "loss": 2.2728, "step": 5648500 }, { "epoch": 27.99, "learning_rate": 3.601145692444128e-05, "loss": 2.2712, "step": 5649000 }, { "epoch": 27.99, "learning_rate": 3.601021833801519e-05, "loss": 2.2799, "step": 5649500 }, { "epoch": 27.99, "learning_rate": 3.600897975158911e-05, "loss": 2.2532, "step": 5650000 }, { "epoch": 27.99, "learning_rate": 3.6007741165163025e-05, "loss": 2.2599, "step": 5650500 }, { "epoch": 28.0, "learning_rate": 3.600650257873694e-05, "loss": 2.2928, "step": 5651000 }, { "epoch": 28.0, "learning_rate": 3.600526399231086e-05, "loss": 2.2934, "step": 5651500 }, { "epoch": 28.0, "eval_accuracy": 0.6571310181504103, "eval_accuracy_mlm": 0.6123743861697156, "eval_accuracy_nsp": 0.8682572492047741, "eval_loss": 2.3233656883239746, "eval_runtime": 146.0438, "eval_samples_per_second": 1745.771, "eval_steps_per_second": 72.745, "step": 5651604 }, { "epoch": 28.0, "learning_rate": 3.600402540588477e-05, "loss": 2.2378, "step": 5652000 }, { "epoch": 28.0, "learning_rate": 3.6002786819458686e-05, "loss": 2.2507, "step": 5652500 }, { "epoch": 28.01, "learning_rate": 3.6001550710205455e-05, "loss": 2.2047, "step": 5653000 }, { "epoch": 28.01, "learning_rate": 3.600031212377937e-05, "loss": 2.2663, "step": 5653500 }, { "epoch": 28.01, "learning_rate": 3.599907353735329e-05, "loss": 2.2253, "step": 5654000 }, { "epoch": 28.01, "learning_rate": 3.5997834950927206e-05, "loss": 2.2129, "step": 5654500 }, { "epoch": 28.02, "learning_rate": 3.599659636450112e-05, "loss": 2.2342, "step": 5655000 }, { "epoch": 28.02, "learning_rate": 3.599535777807504e-05, "loss": 2.2216, "step": 5655500 }, { "epoch": 28.02, "learning_rate": 3.5994119191648957e-05, "loss": 2.211, "step": 5656000 }, { "epoch": 28.02, "learning_rate": 3.5992883082395725e-05, "loss": 2.2144, "step": 5656500 }, { "epoch": 28.03, "learning_rate": 3.599164449596964e-05, "loss": 2.2563, "step": 5657000 }, { "epoch": 28.03, "learning_rate": 3.599040590954356e-05, "loss": 2.2285, "step": 5657500 }, { "epoch": 28.03, "learning_rate": 3.5989167323117476e-05, "loss": 2.2173, "step": 5658000 }, { "epoch": 28.03, "learning_rate": 3.598792873669139e-05, "loss": 2.2218, "step": 5658500 }, { "epoch": 28.04, "learning_rate": 3.5986692627438155e-05, "loss": 2.2319, "step": 5659000 }, { "epoch": 28.04, "learning_rate": 3.598545404101207e-05, "loss": 2.2306, "step": 5659500 }, { "epoch": 28.04, "learning_rate": 3.598421793175885e-05, "loss": 2.2447, "step": 5660000 }, { "epoch": 28.04, "learning_rate": 3.5982979345332765e-05, "loss": 2.26, "step": 5660500 }, { "epoch": 28.05, "learning_rate": 3.598174075890668e-05, "loss": 2.2248, "step": 5661000 }, { "epoch": 28.05, "learning_rate": 3.59805021724806e-05, "loss": 2.2226, "step": 5661500 }, { "epoch": 28.05, "learning_rate": 3.597926358605451e-05, "loss": 2.2503, "step": 5662000 }, { "epoch": 28.05, "learning_rate": 3.5978024999628425e-05, "loss": 2.2375, "step": 5662500 }, { "epoch": 28.06, "learning_rate": 3.597678641320234e-05, "loss": 2.2391, "step": 5663000 }, { "epoch": 28.06, "learning_rate": 3.597554782677626e-05, "loss": 2.2428, "step": 5663500 }, { "epoch": 28.06, "learning_rate": 3.5974309240350176e-05, "loss": 2.2463, "step": 5664000 }, { "epoch": 28.06, "learning_rate": 3.597307065392409e-05, "loss": 2.2465, "step": 5664500 }, { "epoch": 28.07, "learning_rate": 3.597183206749801e-05, "loss": 2.2468, "step": 5665000 }, { "epoch": 28.07, "learning_rate": 3.597059348107192e-05, "loss": 2.2688, "step": 5665500 }, { "epoch": 28.07, "learning_rate": 3.596935737181869e-05, "loss": 2.2244, "step": 5666000 }, { "epoch": 28.07, "learning_rate": 3.5968118785392606e-05, "loss": 2.2423, "step": 5666500 }, { "epoch": 28.08, "learning_rate": 3.596688019896652e-05, "loss": 2.2341, "step": 5667000 }, { "epoch": 28.08, "learning_rate": 3.596564161254044e-05, "loss": 2.2489, "step": 5667500 }, { "epoch": 28.08, "learning_rate": 3.596440302611436e-05, "loss": 2.2332, "step": 5668000 }, { "epoch": 28.08, "learning_rate": 3.596316939403398e-05, "loss": 2.2225, "step": 5668500 }, { "epoch": 28.09, "learning_rate": 3.5961930807607894e-05, "loss": 2.2174, "step": 5669000 }, { "epoch": 28.09, "learning_rate": 3.596069222118181e-05, "loss": 2.2281, "step": 5669500 }, { "epoch": 28.09, "learning_rate": 3.595945363475573e-05, "loss": 2.2196, "step": 5670000 }, { "epoch": 28.09, "learning_rate": 3.5958215048329645e-05, "loss": 2.273, "step": 5670500 }, { "epoch": 28.1, "learning_rate": 3.595697646190356e-05, "loss": 2.2395, "step": 5671000 }, { "epoch": 28.1, "learning_rate": 3.595573787547747e-05, "loss": 2.2184, "step": 5671500 }, { "epoch": 28.1, "learning_rate": 3.595449928905139e-05, "loss": 2.2504, "step": 5672000 }, { "epoch": 28.1, "learning_rate": 3.5953260702625306e-05, "loss": 2.2414, "step": 5672500 }, { "epoch": 28.11, "learning_rate": 3.595202459337208e-05, "loss": 2.2489, "step": 5673000 }, { "epoch": 28.11, "learning_rate": 3.5950786006946e-05, "loss": 2.2339, "step": 5673500 }, { "epoch": 28.11, "learning_rate": 3.5949547420519916e-05, "loss": 2.2422, "step": 5674000 }, { "epoch": 28.11, "learning_rate": 3.594831626561239e-05, "loss": 2.2523, "step": 5674500 }, { "epoch": 28.12, "learning_rate": 3.59470776791863e-05, "loss": 2.2267, "step": 5675000 }, { "epoch": 28.12, "learning_rate": 3.594584156993307e-05, "loss": 2.2262, "step": 5675500 }, { "epoch": 28.12, "learning_rate": 3.5944602983506984e-05, "loss": 2.2335, "step": 5676000 }, { "epoch": 28.12, "learning_rate": 3.59433643970809e-05, "loss": 2.2257, "step": 5676500 }, { "epoch": 28.13, "learning_rate": 3.594212581065482e-05, "loss": 2.2219, "step": 5677000 }, { "epoch": 28.13, "learning_rate": 3.5940887224228735e-05, "loss": 2.2501, "step": 5677500 }, { "epoch": 28.13, "learning_rate": 3.5939648637802645e-05, "loss": 2.2395, "step": 5678000 }, { "epoch": 28.13, "learning_rate": 3.593841005137656e-05, "loss": 2.2518, "step": 5678500 }, { "epoch": 28.14, "learning_rate": 3.593717394212334e-05, "loss": 2.2567, "step": 5679000 }, { "epoch": 28.14, "learning_rate": 3.5935935355697254e-05, "loss": 2.2426, "step": 5679500 }, { "epoch": 28.14, "learning_rate": 3.593469676927117e-05, "loss": 2.2331, "step": 5680000 }, { "epoch": 28.14, "learning_rate": 3.593346066001794e-05, "loss": 2.2449, "step": 5680500 }, { "epoch": 28.15, "learning_rate": 3.593222207359185e-05, "loss": 2.2522, "step": 5681000 }, { "epoch": 28.15, "learning_rate": 3.593098348716577e-05, "loss": 2.238, "step": 5681500 }, { "epoch": 28.15, "learning_rate": 3.5929744900739684e-05, "loss": 2.2348, "step": 5682000 }, { "epoch": 28.15, "learning_rate": 3.59285063143136e-05, "loss": 2.2584, "step": 5682500 }, { "epoch": 28.16, "learning_rate": 3.592726772788752e-05, "loss": 2.2451, "step": 5683000 }, { "epoch": 28.16, "learning_rate": 3.5926029141461435e-05, "loss": 2.2386, "step": 5683500 }, { "epoch": 28.16, "learning_rate": 3.592479055503535e-05, "loss": 2.2602, "step": 5684000 }, { "epoch": 28.16, "learning_rate": 3.592355196860926e-05, "loss": 2.2475, "step": 5684500 }, { "epoch": 28.17, "learning_rate": 3.592231338218318e-05, "loss": 2.2594, "step": 5685000 }, { "epoch": 28.17, "learning_rate": 3.5921074795757096e-05, "loss": 2.2606, "step": 5685500 }, { "epoch": 28.17, "learning_rate": 3.591983620933101e-05, "loss": 2.2404, "step": 5686000 }, { "epoch": 28.17, "learning_rate": 3.591859762290493e-05, "loss": 2.219, "step": 5686500 }, { "epoch": 28.18, "learning_rate": 3.591735903647885e-05, "loss": 2.2316, "step": 5687000 }, { "epoch": 28.18, "learning_rate": 3.5916122927225615e-05, "loss": 2.2293, "step": 5687500 }, { "epoch": 28.18, "learning_rate": 3.591488434079953e-05, "loss": 2.2417, "step": 5688000 }, { "epoch": 28.18, "learning_rate": 3.591364575437345e-05, "loss": 2.2511, "step": 5688500 }, { "epoch": 28.19, "learning_rate": 3.5912407167947366e-05, "loss": 2.2389, "step": 5689000 }, { "epoch": 28.19, "learning_rate": 3.591116858152128e-05, "loss": 2.2493, "step": 5689500 }, { "epoch": 28.19, "learning_rate": 3.59099299950952e-05, "loss": 2.2295, "step": 5690000 }, { "epoch": 28.19, "learning_rate": 3.590869140866912e-05, "loss": 2.2424, "step": 5690500 }, { "epoch": 28.2, "learning_rate": 3.5907452822243034e-05, "loss": 2.267, "step": 5691000 }, { "epoch": 28.2, "learning_rate": 3.590621423581695e-05, "loss": 2.2283, "step": 5691500 }, { "epoch": 28.2, "learning_rate": 3.590497564939087e-05, "loss": 2.2434, "step": 5692000 }, { "epoch": 28.2, "learning_rate": 3.590373954013763e-05, "loss": 2.2637, "step": 5692500 }, { "epoch": 28.21, "learning_rate": 3.590250095371155e-05, "loss": 2.2305, "step": 5693000 }, { "epoch": 28.21, "learning_rate": 3.5901262367285464e-05, "loss": 2.2433, "step": 5693500 }, { "epoch": 28.21, "learning_rate": 3.590002873520509e-05, "loss": 2.2236, "step": 5694000 }, { "epoch": 28.21, "learning_rate": 3.5898790148779e-05, "loss": 2.261, "step": 5694500 }, { "epoch": 28.21, "learning_rate": 3.589755156235292e-05, "loss": 2.2614, "step": 5695000 }, { "epoch": 28.22, "learning_rate": 3.5896312975926835e-05, "loss": 2.2338, "step": 5695500 }, { "epoch": 28.22, "learning_rate": 3.589507438950075e-05, "loss": 2.2459, "step": 5696000 }, { "epoch": 28.22, "learning_rate": 3.589383580307467e-05, "loss": 2.2287, "step": 5696500 }, { "epoch": 28.22, "learning_rate": 3.589259721664858e-05, "loss": 2.2181, "step": 5697000 }, { "epoch": 28.23, "learning_rate": 3.5891358630222496e-05, "loss": 2.2656, "step": 5697500 }, { "epoch": 28.23, "learning_rate": 3.589012252096927e-05, "loss": 2.2618, "step": 5698000 }, { "epoch": 28.23, "learning_rate": 3.588888393454319e-05, "loss": 2.2414, "step": 5698500 }, { "epoch": 28.23, "learning_rate": 3.5887645348117106e-05, "loss": 2.2461, "step": 5699000 }, { "epoch": 28.24, "learning_rate": 3.588640676169102e-05, "loss": 2.2572, "step": 5699500 }, { "epoch": 28.24, "learning_rate": 3.588516817526493e-05, "loss": 2.2578, "step": 5700000 }, { "epoch": 28.24, "learning_rate": 3.588392958883885e-05, "loss": 2.2657, "step": 5700500 }, { "epoch": 28.24, "learning_rate": 3.5882691002412766e-05, "loss": 2.2494, "step": 5701000 }, { "epoch": 28.25, "learning_rate": 3.5881452415986683e-05, "loss": 2.259, "step": 5701500 }, { "epoch": 28.25, "learning_rate": 3.58802138295606e-05, "loss": 2.2313, "step": 5702000 }, { "epoch": 28.25, "learning_rate": 3.587897772030737e-05, "loss": 2.2567, "step": 5702500 }, { "epoch": 28.25, "learning_rate": 3.5877739133881286e-05, "loss": 2.2508, "step": 5703000 }, { "epoch": 28.26, "learning_rate": 3.58765005474552e-05, "loss": 2.2611, "step": 5703500 }, { "epoch": 28.26, "learning_rate": 3.587526196102911e-05, "loss": 2.2495, "step": 5704000 }, { "epoch": 28.26, "learning_rate": 3.587402337460303e-05, "loss": 2.2509, "step": 5704500 }, { "epoch": 28.26, "learning_rate": 3.587278478817695e-05, "loss": 2.2434, "step": 5705000 }, { "epoch": 28.27, "learning_rate": 3.5871546201750864e-05, "loss": 2.2263, "step": 5705500 }, { "epoch": 28.27, "learning_rate": 3.587030761532478e-05, "loss": 2.2574, "step": 5706000 }, { "epoch": 28.27, "learning_rate": 3.58690690288987e-05, "loss": 2.2488, "step": 5706500 }, { "epoch": 28.27, "learning_rate": 3.5867835396818325e-05, "loss": 2.2452, "step": 5707000 }, { "epoch": 28.28, "learning_rate": 3.586659681039224e-05, "loss": 2.2347, "step": 5707500 }, { "epoch": 28.28, "learning_rate": 3.586535822396615e-05, "loss": 2.2418, "step": 5708000 }, { "epoch": 28.28, "learning_rate": 3.586411963754007e-05, "loss": 2.2475, "step": 5708500 }, { "epoch": 28.28, "learning_rate": 3.5862881051113986e-05, "loss": 2.2471, "step": 5709000 }, { "epoch": 28.29, "learning_rate": 3.58616424646879e-05, "loss": 2.2539, "step": 5709500 }, { "epoch": 28.29, "learning_rate": 3.586040387826182e-05, "loss": 2.2612, "step": 5710000 }, { "epoch": 28.29, "learning_rate": 3.585916529183573e-05, "loss": 2.248, "step": 5710500 }, { "epoch": 28.29, "learning_rate": 3.5857929182582506e-05, "loss": 2.2418, "step": 5711000 }, { "epoch": 28.3, "learning_rate": 3.585669059615642e-05, "loss": 2.2432, "step": 5711500 }, { "epoch": 28.3, "learning_rate": 3.585545200973034e-05, "loss": 2.2694, "step": 5712000 }, { "epoch": 28.3, "learning_rate": 3.585421342330425e-05, "loss": 2.2284, "step": 5712500 }, { "epoch": 28.3, "learning_rate": 3.585297483687817e-05, "loss": 2.2573, "step": 5713000 }, { "epoch": 28.31, "learning_rate": 3.585173872762494e-05, "loss": 2.2553, "step": 5713500 }, { "epoch": 28.31, "learning_rate": 3.585050014119886e-05, "loss": 2.2186, "step": 5714000 }, { "epoch": 28.31, "learning_rate": 3.5849261554772776e-05, "loss": 2.2606, "step": 5714500 }, { "epoch": 28.31, "learning_rate": 3.5848022968346686e-05, "loss": 2.2582, "step": 5715000 }, { "epoch": 28.32, "learning_rate": 3.58467843819206e-05, "loss": 2.2227, "step": 5715500 }, { "epoch": 28.32, "learning_rate": 3.584554579549452e-05, "loss": 2.254, "step": 5716000 }, { "epoch": 28.32, "learning_rate": 3.584430720906844e-05, "loss": 2.2496, "step": 5716500 }, { "epoch": 28.32, "learning_rate": 3.5843068622642354e-05, "loss": 2.2388, "step": 5717000 }, { "epoch": 28.33, "learning_rate": 3.5841830036216264e-05, "loss": 2.243, "step": 5717500 }, { "epoch": 28.33, "learning_rate": 3.584059144979018e-05, "loss": 2.2567, "step": 5718000 }, { "epoch": 28.33, "learning_rate": 3.58393528633641e-05, "loss": 2.2261, "step": 5718500 }, { "epoch": 28.33, "learning_rate": 3.5838114276938015e-05, "loss": 2.2367, "step": 5719000 }, { "epoch": 28.34, "learning_rate": 3.5836878167684784e-05, "loss": 2.272, "step": 5719500 }, { "epoch": 28.34, "learning_rate": 3.583564205843156e-05, "loss": 2.2329, "step": 5720000 }, { "epoch": 28.34, "learning_rate": 3.583440594917832e-05, "loss": 2.2338, "step": 5720500 }, { "epoch": 28.34, "learning_rate": 3.583316736275224e-05, "loss": 2.2532, "step": 5721000 }, { "epoch": 28.35, "learning_rate": 3.583193125349901e-05, "loss": 2.2604, "step": 5721500 }, { "epoch": 28.35, "learning_rate": 3.5830692667072924e-05, "loss": 2.274, "step": 5722000 }, { "epoch": 28.35, "learning_rate": 3.582945408064684e-05, "loss": 2.243, "step": 5722500 }, { "epoch": 28.35, "learning_rate": 3.582821549422076e-05, "loss": 2.2256, "step": 5723000 }, { "epoch": 28.36, "learning_rate": 3.5826976907794675e-05, "loss": 2.2452, "step": 5723500 }, { "epoch": 28.36, "learning_rate": 3.582573832136859e-05, "loss": 2.2638, "step": 5724000 }, { "epoch": 28.36, "learning_rate": 3.582449973494251e-05, "loss": 2.2412, "step": 5724500 }, { "epoch": 28.36, "learning_rate": 3.5823261148516426e-05, "loss": 2.2771, "step": 5725000 }, { "epoch": 28.37, "learning_rate": 3.582202256209034e-05, "loss": 2.2621, "step": 5725500 }, { "epoch": 28.37, "learning_rate": 3.5820788930009956e-05, "loss": 2.23, "step": 5726000 }, { "epoch": 28.37, "learning_rate": 3.5819550343583873e-05, "loss": 2.2686, "step": 5726500 }, { "epoch": 28.37, "learning_rate": 3.581831175715779e-05, "loss": 2.2551, "step": 5727000 }, { "epoch": 28.38, "learning_rate": 3.581707317073171e-05, "loss": 2.2563, "step": 5727500 }, { "epoch": 28.38, "learning_rate": 3.5815834584305624e-05, "loss": 2.2388, "step": 5728000 }, { "epoch": 28.38, "learning_rate": 3.581459599787954e-05, "loss": 2.2399, "step": 5728500 }, { "epoch": 28.38, "learning_rate": 3.581335741145346e-05, "loss": 2.2514, "step": 5729000 }, { "epoch": 28.39, "learning_rate": 3.581212130220023e-05, "loss": 2.2183, "step": 5729500 }, { "epoch": 28.39, "learning_rate": 3.5810885192946996e-05, "loss": 2.236, "step": 5730000 }, { "epoch": 28.39, "learning_rate": 3.580964660652091e-05, "loss": 2.2386, "step": 5730500 }, { "epoch": 28.39, "learning_rate": 3.580840802009483e-05, "loss": 2.251, "step": 5731000 }, { "epoch": 28.4, "learning_rate": 3.5807169433668746e-05, "loss": 2.2459, "step": 5731500 }, { "epoch": 28.4, "learning_rate": 3.5805930847242657e-05, "loss": 2.2252, "step": 5732000 }, { "epoch": 28.4, "learning_rate": 3.5804692260816574e-05, "loss": 2.247, "step": 5732500 }, { "epoch": 28.4, "learning_rate": 3.580345367439049e-05, "loss": 2.2305, "step": 5733000 }, { "epoch": 28.41, "learning_rate": 3.580221508796441e-05, "loss": 2.2407, "step": 5733500 }, { "epoch": 28.41, "learning_rate": 3.5800976501538324e-05, "loss": 2.251, "step": 5734000 }, { "epoch": 28.41, "learning_rate": 3.579973791511224e-05, "loss": 2.2658, "step": 5734500 }, { "epoch": 28.41, "learning_rate": 3.579849932868616e-05, "loss": 2.2874, "step": 5735000 }, { "epoch": 28.42, "learning_rate": 3.5797260742260075e-05, "loss": 2.2551, "step": 5735500 }, { "epoch": 28.42, "learning_rate": 3.579602215583399e-05, "loss": 2.2694, "step": 5736000 }, { "epoch": 28.42, "learning_rate": 3.579478356940791e-05, "loss": 2.2369, "step": 5736500 }, { "epoch": 28.42, "learning_rate": 3.5793544982981826e-05, "loss": 2.2427, "step": 5737000 }, { "epoch": 28.43, "learning_rate": 3.579230639655574e-05, "loss": 2.2528, "step": 5737500 }, { "epoch": 28.43, "learning_rate": 3.579106781012966e-05, "loss": 2.252, "step": 5738000 }, { "epoch": 28.43, "learning_rate": 3.578982922370358e-05, "loss": 2.25, "step": 5738500 }, { "epoch": 28.43, "learning_rate": 3.5788590637277494e-05, "loss": 2.278, "step": 5739000 }, { "epoch": 28.44, "learning_rate": 3.578735205085141e-05, "loss": 2.2768, "step": 5739500 }, { "epoch": 28.44, "learning_rate": 3.578611346442533e-05, "loss": 2.2398, "step": 5740000 }, { "epoch": 28.44, "learning_rate": 3.578487735517209e-05, "loss": 2.2404, "step": 5740500 }, { "epoch": 28.44, "learning_rate": 3.5783638768746006e-05, "loss": 2.2455, "step": 5741000 }, { "epoch": 28.45, "learning_rate": 3.578240018231992e-05, "loss": 2.2767, "step": 5741500 }, { "epoch": 28.45, "learning_rate": 3.578116159589384e-05, "loss": 2.2687, "step": 5742000 }, { "epoch": 28.45, "learning_rate": 3.577992300946776e-05, "loss": 2.2557, "step": 5742500 }, { "epoch": 28.45, "learning_rate": 3.5778684423041674e-05, "loss": 2.25, "step": 5743000 }, { "epoch": 28.46, "learning_rate": 3.577744583661559e-05, "loss": 2.2547, "step": 5743500 }, { "epoch": 28.46, "learning_rate": 3.57762072501895e-05, "loss": 2.2511, "step": 5744000 }, { "epoch": 28.46, "learning_rate": 3.577497114093628e-05, "loss": 2.2485, "step": 5744500 }, { "epoch": 28.46, "learning_rate": 3.577373503168304e-05, "loss": 2.2567, "step": 5745000 }, { "epoch": 28.47, "learning_rate": 3.577249892242981e-05, "loss": 2.2858, "step": 5745500 }, { "epoch": 28.47, "learning_rate": 3.5771260336003725e-05, "loss": 2.2511, "step": 5746000 }, { "epoch": 28.47, "learning_rate": 3.577002174957764e-05, "loss": 2.248, "step": 5746500 }, { "epoch": 28.47, "learning_rate": 3.576878564032442e-05, "loss": 2.2604, "step": 5747000 }, { "epoch": 28.48, "learning_rate": 3.576754705389833e-05, "loss": 2.2871, "step": 5747500 }, { "epoch": 28.48, "learning_rate": 3.5766308467472244e-05, "loss": 2.2474, "step": 5748000 }, { "epoch": 28.48, "learning_rate": 3.576506988104616e-05, "loss": 2.2581, "step": 5748500 }, { "epoch": 28.48, "learning_rate": 3.576383129462008e-05, "loss": 2.228, "step": 5749000 }, { "epoch": 28.49, "learning_rate": 3.5762592708193995e-05, "loss": 2.2505, "step": 5749500 }, { "epoch": 28.49, "learning_rate": 3.576135412176791e-05, "loss": 2.2651, "step": 5750000 }, { "epoch": 28.49, "learning_rate": 3.576011553534183e-05, "loss": 2.24, "step": 5750500 }, { "epoch": 28.49, "learning_rate": 3.5758876948915746e-05, "loss": 2.2268, "step": 5751000 }, { "epoch": 28.49, "learning_rate": 3.575763836248966e-05, "loss": 2.2565, "step": 5751500 }, { "epoch": 28.5, "learning_rate": 3.575639977606357e-05, "loss": 2.25, "step": 5752000 }, { "epoch": 28.5, "learning_rate": 3.575516118963749e-05, "loss": 2.2529, "step": 5752500 }, { "epoch": 28.5, "learning_rate": 3.575392508038426e-05, "loss": 2.2352, "step": 5753000 }, { "epoch": 28.5, "learning_rate": 3.5752686493958175e-05, "loss": 2.2627, "step": 5753500 }, { "epoch": 28.51, "learning_rate": 3.575144790753209e-05, "loss": 2.2574, "step": 5754000 }, { "epoch": 28.51, "learning_rate": 3.575020932110601e-05, "loss": 2.2656, "step": 5754500 }, { "epoch": 28.51, "learning_rate": 3.574897321185278e-05, "loss": 2.2663, "step": 5755000 }, { "epoch": 28.51, "learning_rate": 3.5747734625426695e-05, "loss": 2.2273, "step": 5755500 }, { "epoch": 28.52, "learning_rate": 3.574649603900061e-05, "loss": 2.2567, "step": 5756000 }, { "epoch": 28.52, "learning_rate": 3.574525745257453e-05, "loss": 2.2755, "step": 5756500 }, { "epoch": 28.52, "learning_rate": 3.5744018866148446e-05, "loss": 2.246, "step": 5757000 }, { "epoch": 28.52, "learning_rate": 3.574278027972236e-05, "loss": 2.2647, "step": 5757500 }, { "epoch": 28.53, "learning_rate": 3.574154169329628e-05, "loss": 2.2595, "step": 5758000 }, { "epoch": 28.53, "learning_rate": 3.574030558404304e-05, "loss": 2.2468, "step": 5758500 }, { "epoch": 28.53, "learning_rate": 3.573906699761696e-05, "loss": 2.2384, "step": 5759000 }, { "epoch": 28.53, "learning_rate": 3.5737828411190876e-05, "loss": 2.2599, "step": 5759500 }, { "epoch": 28.54, "learning_rate": 3.573658982476479e-05, "loss": 2.2706, "step": 5760000 }, { "epoch": 28.54, "learning_rate": 3.573535123833871e-05, "loss": 2.2765, "step": 5760500 }, { "epoch": 28.54, "learning_rate": 3.5734112651912626e-05, "loss": 2.2622, "step": 5761000 }, { "epoch": 28.54, "learning_rate": 3.573287406548654e-05, "loss": 2.2588, "step": 5761500 }, { "epoch": 28.55, "learning_rate": 3.573163547906046e-05, "loss": 2.2274, "step": 5762000 }, { "epoch": 28.55, "learning_rate": 3.573039689263438e-05, "loss": 2.264, "step": 5762500 }, { "epoch": 28.55, "learning_rate": 3.5729158306208294e-05, "loss": 2.2544, "step": 5763000 }, { "epoch": 28.55, "learning_rate": 3.572792219695506e-05, "loss": 2.2477, "step": 5763500 }, { "epoch": 28.56, "learning_rate": 3.572668361052898e-05, "loss": 2.2682, "step": 5764000 }, { "epoch": 28.56, "learning_rate": 3.57254450241029e-05, "loss": 2.238, "step": 5764500 }, { "epoch": 28.56, "learning_rate": 3.5724206437676814e-05, "loss": 2.2259, "step": 5765000 }, { "epoch": 28.56, "learning_rate": 3.5722970328423576e-05, "loss": 2.2703, "step": 5765500 }, { "epoch": 28.57, "learning_rate": 3.572173174199749e-05, "loss": 2.2302, "step": 5766000 }, { "epoch": 28.57, "learning_rate": 3.572049315557141e-05, "loss": 2.2499, "step": 5766500 }, { "epoch": 28.57, "learning_rate": 3.5719254569145326e-05, "loss": 2.2483, "step": 5767000 }, { "epoch": 28.57, "learning_rate": 3.5718015982719243e-05, "loss": 2.272, "step": 5767500 }, { "epoch": 28.58, "learning_rate": 3.571677987346601e-05, "loss": 2.2575, "step": 5768000 }, { "epoch": 28.58, "learning_rate": 3.571554128703993e-05, "loss": 2.2488, "step": 5768500 }, { "epoch": 28.58, "learning_rate": 3.5714302700613846e-05, "loss": 2.2621, "step": 5769000 }, { "epoch": 28.58, "learning_rate": 3.571306411418776e-05, "loss": 2.2267, "step": 5769500 }, { "epoch": 28.59, "learning_rate": 3.571182552776168e-05, "loss": 2.2648, "step": 5770000 }, { "epoch": 28.59, "learning_rate": 3.57105869413356e-05, "loss": 2.235, "step": 5770500 }, { "epoch": 28.59, "learning_rate": 3.5709348354909514e-05, "loss": 2.2069, "step": 5771000 }, { "epoch": 28.59, "learning_rate": 3.570810976848343e-05, "loss": 2.2329, "step": 5771500 }, { "epoch": 28.6, "learning_rate": 3.570687365923019e-05, "loss": 2.2703, "step": 5772000 }, { "epoch": 28.6, "learning_rate": 3.570563507280411e-05, "loss": 2.2646, "step": 5772500 }, { "epoch": 28.6, "learning_rate": 3.570439648637803e-05, "loss": 2.2603, "step": 5773000 }, { "epoch": 28.6, "learning_rate": 3.5703160377124795e-05, "loss": 2.2573, "step": 5773500 }, { "epoch": 28.61, "learning_rate": 3.5701924267871564e-05, "loss": 2.2794, "step": 5774000 }, { "epoch": 28.61, "learning_rate": 3.570068568144548e-05, "loss": 2.247, "step": 5774500 }, { "epoch": 28.61, "learning_rate": 3.56994470950194e-05, "loss": 2.2595, "step": 5775000 }, { "epoch": 28.61, "learning_rate": 3.569820850859331e-05, "loss": 2.2653, "step": 5775500 }, { "epoch": 28.62, "learning_rate": 3.5696972399340084e-05, "loss": 2.2649, "step": 5776000 }, { "epoch": 28.62, "learning_rate": 3.5695733812914e-05, "loss": 2.2686, "step": 5776500 }, { "epoch": 28.62, "learning_rate": 3.569449522648792e-05, "loss": 2.2608, "step": 5777000 }, { "epoch": 28.62, "learning_rate": 3.5693256640061835e-05, "loss": 2.2304, "step": 5777500 }, { "epoch": 28.63, "learning_rate": 3.5692020530808603e-05, "loss": 2.2519, "step": 5778000 }, { "epoch": 28.63, "learning_rate": 3.569078194438252e-05, "loss": 2.2614, "step": 5778500 }, { "epoch": 28.63, "learning_rate": 3.568954335795644e-05, "loss": 2.2653, "step": 5779000 }, { "epoch": 28.63, "learning_rate": 3.5688304771530354e-05, "loss": 2.2743, "step": 5779500 }, { "epoch": 28.64, "learning_rate": 3.5687066185104264e-05, "loss": 2.2705, "step": 5780000 }, { "epoch": 28.64, "learning_rate": 3.568583007585103e-05, "loss": 2.2531, "step": 5780500 }, { "epoch": 28.64, "learning_rate": 3.568459148942495e-05, "loss": 2.2766, "step": 5781000 }, { "epoch": 28.64, "learning_rate": 3.568335290299887e-05, "loss": 2.2445, "step": 5781500 }, { "epoch": 28.65, "learning_rate": 3.5682114316572784e-05, "loss": 2.2499, "step": 5782000 }, { "epoch": 28.65, "learning_rate": 3.56808757301467e-05, "loss": 2.2532, "step": 5782500 }, { "epoch": 28.65, "learning_rate": 3.567963714372062e-05, "loss": 2.2463, "step": 5783000 }, { "epoch": 28.65, "learning_rate": 3.5678398557294535e-05, "loss": 2.2268, "step": 5783500 }, { "epoch": 28.66, "learning_rate": 3.567715997086845e-05, "loss": 2.2623, "step": 5784000 }, { "epoch": 28.66, "learning_rate": 3.567592138444237e-05, "loss": 2.2633, "step": 5784500 }, { "epoch": 28.66, "learning_rate": 3.567468279801628e-05, "loss": 2.2451, "step": 5785000 }, { "epoch": 28.66, "learning_rate": 3.5673444211590196e-05, "loss": 2.2643, "step": 5785500 }, { "epoch": 28.67, "learning_rate": 3.567220562516411e-05, "loss": 2.2295, "step": 5786000 }, { "epoch": 28.67, "learning_rate": 3.567096703873803e-05, "loss": 2.2327, "step": 5786500 }, { "epoch": 28.67, "learning_rate": 3.5669728452311946e-05, "loss": 2.2591, "step": 5787000 }, { "epoch": 28.67, "learning_rate": 3.5668489865885863e-05, "loss": 2.2324, "step": 5787500 }, { "epoch": 28.68, "learning_rate": 3.566725375663263e-05, "loss": 2.279, "step": 5788000 }, { "epoch": 28.68, "learning_rate": 3.566601517020655e-05, "loss": 2.2687, "step": 5788500 }, { "epoch": 28.68, "learning_rate": 3.566477658378046e-05, "loss": 2.2327, "step": 5789000 }, { "epoch": 28.68, "learning_rate": 3.5663537997354376e-05, "loss": 2.2751, "step": 5789500 }, { "epoch": 28.69, "learning_rate": 3.566230188810115e-05, "loss": 2.2476, "step": 5790000 }, { "epoch": 28.69, "learning_rate": 3.566106330167507e-05, "loss": 2.2564, "step": 5790500 }, { "epoch": 28.69, "learning_rate": 3.5659824715248986e-05, "loss": 2.2629, "step": 5791000 }, { "epoch": 28.69, "learning_rate": 3.5658586128822896e-05, "loss": 2.2568, "step": 5791500 }, { "epoch": 28.7, "learning_rate": 3.565735001956967e-05, "loss": 2.2414, "step": 5792000 }, { "epoch": 28.7, "learning_rate": 3.565611143314359e-05, "loss": 2.2787, "step": 5792500 }, { "epoch": 28.7, "learning_rate": 3.565487532389035e-05, "loss": 2.2533, "step": 5793000 }, { "epoch": 28.7, "learning_rate": 3.565363673746427e-05, "loss": 2.2566, "step": 5793500 }, { "epoch": 28.71, "learning_rate": 3.5652400628211036e-05, "loss": 2.2658, "step": 5794000 }, { "epoch": 28.71, "learning_rate": 3.565116204178495e-05, "loss": 2.2591, "step": 5794500 }, { "epoch": 28.71, "learning_rate": 3.564992345535887e-05, "loss": 2.2673, "step": 5795000 }, { "epoch": 28.71, "learning_rate": 3.564868486893279e-05, "loss": 2.2577, "step": 5795500 }, { "epoch": 28.72, "learning_rate": 3.5647446282506704e-05, "loss": 2.2438, "step": 5796000 }, { "epoch": 28.72, "learning_rate": 3.564620769608062e-05, "loss": 2.2575, "step": 5796500 }, { "epoch": 28.72, "learning_rate": 3.564496910965454e-05, "loss": 2.2445, "step": 5797000 }, { "epoch": 28.72, "learning_rate": 3.5643730523228455e-05, "loss": 2.2777, "step": 5797500 }, { "epoch": 28.73, "learning_rate": 3.564249193680237e-05, "loss": 2.2571, "step": 5798000 }, { "epoch": 28.73, "learning_rate": 3.564125335037629e-05, "loss": 2.2519, "step": 5798500 }, { "epoch": 28.73, "learning_rate": 3.5640014763950205e-05, "loss": 2.2186, "step": 5799000 }, { "epoch": 28.73, "learning_rate": 3.563877617752412e-05, "loss": 2.2601, "step": 5799500 }, { "epoch": 28.74, "learning_rate": 3.563753759109803e-05, "loss": 2.2653, "step": 5800000 }, { "epoch": 28.74, "learning_rate": 3.563629900467195e-05, "loss": 2.2519, "step": 5800500 }, { "epoch": 28.74, "learning_rate": 3.563506289541872e-05, "loss": 2.2545, "step": 5801000 }, { "epoch": 28.74, "learning_rate": 3.5633824308992635e-05, "loss": 2.2799, "step": 5801500 }, { "epoch": 28.75, "learning_rate": 3.563258572256655e-05, "loss": 2.2447, "step": 5802000 }, { "epoch": 28.75, "learning_rate": 3.563134961331332e-05, "loss": 2.297, "step": 5802500 }, { "epoch": 28.75, "learning_rate": 3.563011102688724e-05, "loss": 2.2558, "step": 5803000 }, { "epoch": 28.75, "learning_rate": 3.5628872440461155e-05, "loss": 2.2682, "step": 5803500 }, { "epoch": 28.76, "learning_rate": 3.562763385403507e-05, "loss": 2.2454, "step": 5804000 }, { "epoch": 28.76, "learning_rate": 3.562639526760899e-05, "loss": 2.2382, "step": 5804500 }, { "epoch": 28.76, "learning_rate": 3.5625156681182905e-05, "loss": 2.2444, "step": 5805000 }, { "epoch": 28.76, "learning_rate": 3.562391809475682e-05, "loss": 2.2791, "step": 5805500 }, { "epoch": 28.76, "learning_rate": 3.562267950833074e-05, "loss": 2.2489, "step": 5806000 }, { "epoch": 28.77, "learning_rate": 3.56214433990775e-05, "loss": 2.2662, "step": 5806500 }, { "epoch": 28.77, "learning_rate": 3.562020481265142e-05, "loss": 2.241, "step": 5807000 }, { "epoch": 28.77, "learning_rate": 3.5618966226225335e-05, "loss": 2.2429, "step": 5807500 }, { "epoch": 28.77, "learning_rate": 3.561772763979925e-05, "loss": 2.2777, "step": 5808000 }, { "epoch": 28.78, "learning_rate": 3.561648905337317e-05, "loss": 2.2731, "step": 5808500 }, { "epoch": 28.78, "learning_rate": 3.5615250466947086e-05, "loss": 2.2572, "step": 5809000 }, { "epoch": 28.78, "learning_rate": 3.5614014357693855e-05, "loss": 2.2283, "step": 5809500 }, { "epoch": 28.78, "learning_rate": 3.561277577126777e-05, "loss": 2.2644, "step": 5810000 }, { "epoch": 28.79, "learning_rate": 3.561153718484169e-05, "loss": 2.2584, "step": 5810500 }, { "epoch": 28.79, "learning_rate": 3.5610298598415606e-05, "loss": 2.2588, "step": 5811000 }, { "epoch": 28.79, "learning_rate": 3.560906001198952e-05, "loss": 2.263, "step": 5811500 }, { "epoch": 28.79, "learning_rate": 3.5607826379909136e-05, "loss": 2.2713, "step": 5812000 }, { "epoch": 28.8, "learning_rate": 3.560658779348305e-05, "loss": 2.2487, "step": 5812500 }, { "epoch": 28.8, "learning_rate": 3.560534920705697e-05, "loss": 2.2539, "step": 5813000 }, { "epoch": 28.8, "learning_rate": 3.560411062063089e-05, "loss": 2.2511, "step": 5813500 }, { "epoch": 28.8, "learning_rate": 3.5602872034204804e-05, "loss": 2.2785, "step": 5814000 }, { "epoch": 28.81, "learning_rate": 3.560163592495157e-05, "loss": 2.2514, "step": 5814500 }, { "epoch": 28.81, "learning_rate": 3.560039733852549e-05, "loss": 2.2603, "step": 5815000 }, { "epoch": 28.81, "learning_rate": 3.559915875209941e-05, "loss": 2.2606, "step": 5815500 }, { "epoch": 28.81, "learning_rate": 3.5597920165673324e-05, "loss": 2.247, "step": 5816000 }, { "epoch": 28.82, "learning_rate": 3.559668157924724e-05, "loss": 2.2564, "step": 5816500 }, { "epoch": 28.82, "learning_rate": 3.559544299282115e-05, "loss": 2.2528, "step": 5817000 }, { "epoch": 28.82, "learning_rate": 3.559420688356792e-05, "loss": 2.2559, "step": 5817500 }, { "epoch": 28.82, "learning_rate": 3.5592968297141837e-05, "loss": 2.2693, "step": 5818000 }, { "epoch": 28.83, "learning_rate": 3.559173218788861e-05, "loss": 2.2497, "step": 5818500 }, { "epoch": 28.83, "learning_rate": 3.559049360146253e-05, "loss": 2.2595, "step": 5819000 }, { "epoch": 28.83, "learning_rate": 3.5589255015036446e-05, "loss": 2.2414, "step": 5819500 }, { "epoch": 28.83, "learning_rate": 3.5588016428610356e-05, "loss": 2.2523, "step": 5820000 }, { "epoch": 28.84, "learning_rate": 3.558677784218427e-05, "loss": 2.2844, "step": 5820500 }, { "epoch": 28.84, "learning_rate": 3.558553925575819e-05, "loss": 2.2532, "step": 5821000 }, { "epoch": 28.84, "learning_rate": 3.558430314650496e-05, "loss": 2.245, "step": 5821500 }, { "epoch": 28.84, "learning_rate": 3.5583064560078876e-05, "loss": 2.2501, "step": 5822000 }, { "epoch": 28.85, "learning_rate": 3.558182597365279e-05, "loss": 2.2561, "step": 5822500 }, { "epoch": 28.85, "learning_rate": 3.558058738722671e-05, "loss": 2.2602, "step": 5823000 }, { "epoch": 28.85, "learning_rate": 3.557935127797348e-05, "loss": 2.2512, "step": 5823500 }, { "epoch": 28.85, "learning_rate": 3.5578112691547395e-05, "loss": 2.2484, "step": 5824000 }, { "epoch": 28.86, "learning_rate": 3.557687410512131e-05, "loss": 2.2785, "step": 5824500 }, { "epoch": 28.86, "learning_rate": 3.557563551869523e-05, "loss": 2.2271, "step": 5825000 }, { "epoch": 28.86, "learning_rate": 3.5574396932269146e-05, "loss": 2.264, "step": 5825500 }, { "epoch": 28.86, "learning_rate": 3.557315834584306e-05, "loss": 2.2774, "step": 5826000 }, { "epoch": 28.87, "learning_rate": 3.557191975941697e-05, "loss": 2.2599, "step": 5826500 }, { "epoch": 28.87, "learning_rate": 3.557068117299089e-05, "loss": 2.2745, "step": 5827000 }, { "epoch": 28.87, "learning_rate": 3.556944258656481e-05, "loss": 2.2626, "step": 5827500 }, { "epoch": 28.87, "learning_rate": 3.5568206477311576e-05, "loss": 2.2708, "step": 5828000 }, { "epoch": 28.88, "learning_rate": 3.556696789088549e-05, "loss": 2.277, "step": 5828500 }, { "epoch": 28.88, "learning_rate": 3.556572930445941e-05, "loss": 2.2848, "step": 5829000 }, { "epoch": 28.88, "learning_rate": 3.556449319520618e-05, "loss": 2.2448, "step": 5829500 }, { "epoch": 28.88, "learning_rate": 3.5563254608780095e-05, "loss": 2.2617, "step": 5830000 }, { "epoch": 28.89, "learning_rate": 3.556201602235401e-05, "loss": 2.2809, "step": 5830500 }, { "epoch": 28.89, "learning_rate": 3.556077743592793e-05, "loss": 2.2586, "step": 5831000 }, { "epoch": 28.89, "learning_rate": 3.5559538849501846e-05, "loss": 2.232, "step": 5831500 }, { "epoch": 28.89, "learning_rate": 3.555830026307576e-05, "loss": 2.2542, "step": 5832000 }, { "epoch": 28.9, "learning_rate": 3.555706167664967e-05, "loss": 2.2311, "step": 5832500 }, { "epoch": 28.9, "learning_rate": 3.555582556739644e-05, "loss": 2.2316, "step": 5833000 }, { "epoch": 28.9, "learning_rate": 3.555458945814321e-05, "loss": 2.2274, "step": 5833500 }, { "epoch": 28.9, "learning_rate": 3.555335087171713e-05, "loss": 2.2638, "step": 5834000 }, { "epoch": 28.91, "learning_rate": 3.5552112285291045e-05, "loss": 2.2413, "step": 5834500 }, { "epoch": 28.91, "learning_rate": 3.555087369886496e-05, "loss": 2.2671, "step": 5835000 }, { "epoch": 28.91, "learning_rate": 3.554963511243888e-05, "loss": 2.2601, "step": 5835500 }, { "epoch": 28.91, "learning_rate": 3.5548396526012796e-05, "loss": 2.2831, "step": 5836000 }, { "epoch": 28.92, "learning_rate": 3.554715793958671e-05, "loss": 2.2749, "step": 5836500 }, { "epoch": 28.92, "learning_rate": 3.554591935316063e-05, "loss": 2.2563, "step": 5837000 }, { "epoch": 28.92, "learning_rate": 3.5544680766734546e-05, "loss": 2.285, "step": 5837500 }, { "epoch": 28.92, "learning_rate": 3.554344713465416e-05, "loss": 2.2429, "step": 5838000 }, { "epoch": 28.93, "learning_rate": 3.554220854822808e-05, "loss": 2.2898, "step": 5838500 }, { "epoch": 28.93, "learning_rate": 3.5540969961801994e-05, "loss": 2.243, "step": 5839000 }, { "epoch": 28.93, "learning_rate": 3.553973137537591e-05, "loss": 2.2507, "step": 5839500 }, { "epoch": 28.93, "learning_rate": 3.553849278894983e-05, "loss": 2.2531, "step": 5840000 }, { "epoch": 28.94, "learning_rate": 3.5537254202523745e-05, "loss": 2.2665, "step": 5840500 }, { "epoch": 28.94, "learning_rate": 3.553601561609766e-05, "loss": 2.2682, "step": 5841000 }, { "epoch": 28.94, "learning_rate": 3.553477702967158e-05, "loss": 2.2489, "step": 5841500 }, { "epoch": 28.94, "learning_rate": 3.5533538443245496e-05, "loss": 2.2418, "step": 5842000 }, { "epoch": 28.95, "learning_rate": 3.5532302333992265e-05, "loss": 2.2649, "step": 5842500 }, { "epoch": 28.95, "learning_rate": 3.553106374756618e-05, "loss": 2.2535, "step": 5843000 }, { "epoch": 28.95, "learning_rate": 3.55298251611401e-05, "loss": 2.2434, "step": 5843500 }, { "epoch": 28.95, "learning_rate": 3.5528586574714015e-05, "loss": 2.2766, "step": 5844000 }, { "epoch": 28.96, "learning_rate": 3.552734798828793e-05, "loss": 2.2398, "step": 5844500 }, { "epoch": 28.96, "learning_rate": 3.552610940186184e-05, "loss": 2.2472, "step": 5845000 }, { "epoch": 28.96, "learning_rate": 3.552487081543576e-05, "loss": 2.2474, "step": 5845500 }, { "epoch": 28.96, "learning_rate": 3.5523632229009676e-05, "loss": 2.2566, "step": 5846000 }, { "epoch": 28.97, "learning_rate": 3.552239364258359e-05, "loss": 2.2593, "step": 5846500 }, { "epoch": 28.97, "learning_rate": 3.552115505615751e-05, "loss": 2.2721, "step": 5847000 }, { "epoch": 28.97, "learning_rate": 3.551992142407713e-05, "loss": 2.2743, "step": 5847500 }, { "epoch": 28.97, "learning_rate": 3.551868283765105e-05, "loss": 2.2448, "step": 5848000 }, { "epoch": 28.98, "learning_rate": 3.5517444251224965e-05, "loss": 2.289, "step": 5848500 }, { "epoch": 28.98, "learning_rate": 3.551620566479888e-05, "loss": 2.264, "step": 5849000 }, { "epoch": 28.98, "learning_rate": 3.55149670783728e-05, "loss": 2.2393, "step": 5849500 }, { "epoch": 28.98, "learning_rate": 3.5513728491946715e-05, "loss": 2.241, "step": 5850000 }, { "epoch": 28.99, "learning_rate": 3.551249238269348e-05, "loss": 2.232, "step": 5850500 }, { "epoch": 28.99, "learning_rate": 3.5511253796267394e-05, "loss": 2.2528, "step": 5851000 }, { "epoch": 28.99, "learning_rate": 3.551001520984131e-05, "loss": 2.2253, "step": 5851500 }, { "epoch": 28.99, "learning_rate": 3.550877662341523e-05, "loss": 2.2537, "step": 5852000 }, { "epoch": 29.0, "learning_rate": 3.5507538036989145e-05, "loss": 2.2496, "step": 5852500 }, { "epoch": 29.0, "learning_rate": 3.550629945056306e-05, "loss": 2.2503, "step": 5853000 }, { "epoch": 29.0, "eval_accuracy": 0.6575265605183357, "eval_accuracy_mlm": 0.6131820592582494, "eval_accuracy_nsp": 0.8667511246906365, "eval_loss": 2.3342039585113525, "eval_runtime": 146.0117, "eval_samples_per_second": 1746.155, "eval_steps_per_second": 72.761, "step": 5853447 }, { "epoch": 29.0, "learning_rate": 3.550506581848268e-05, "loss": 2.2472, "step": 5853500 }, { "epoch": 29.0, "learning_rate": 3.55038272320566e-05, "loss": 2.2281, "step": 5854000 }, { "epoch": 29.01, "learning_rate": 3.550258864563052e-05, "loss": 2.233, "step": 5854500 }, { "epoch": 29.01, "learning_rate": 3.550135005920443e-05, "loss": 2.2422, "step": 5855000 }, { "epoch": 29.01, "learning_rate": 3.5500111472778344e-05, "loss": 2.2156, "step": 5855500 }, { "epoch": 29.01, "learning_rate": 3.549887288635226e-05, "loss": 2.2308, "step": 5856000 }, { "epoch": 29.02, "learning_rate": 3.549763429992618e-05, "loss": 2.2383, "step": 5856500 }, { "epoch": 29.02, "learning_rate": 3.5496395713500095e-05, "loss": 2.2424, "step": 5857000 }, { "epoch": 29.02, "learning_rate": 3.549515712707401e-05, "loss": 2.2365, "step": 5857500 }, { "epoch": 29.02, "learning_rate": 3.549391854064793e-05, "loss": 2.2243, "step": 5858000 }, { "epoch": 29.03, "learning_rate": 3.54926824313947e-05, "loss": 2.2149, "step": 5858500 }, { "epoch": 29.03, "learning_rate": 3.549144632214147e-05, "loss": 2.2196, "step": 5859000 }, { "epoch": 29.03, "learning_rate": 3.549020773571538e-05, "loss": 2.2177, "step": 5859500 }, { "epoch": 29.03, "learning_rate": 3.54889691492893e-05, "loss": 2.2114, "step": 5860000 }, { "epoch": 29.03, "learning_rate": 3.548773056286322e-05, "loss": 2.2293, "step": 5860500 }, { "epoch": 29.04, "learning_rate": 3.5486491976437134e-05, "loss": 2.2463, "step": 5861000 }, { "epoch": 29.04, "learning_rate": 3.548525339001105e-05, "loss": 2.2409, "step": 5861500 }, { "epoch": 29.04, "learning_rate": 3.548401480358496e-05, "loss": 2.2488, "step": 5862000 }, { "epoch": 29.04, "learning_rate": 3.548277621715888e-05, "loss": 2.2524, "step": 5862500 }, { "epoch": 29.05, "learning_rate": 3.5481537630732795e-05, "loss": 2.2146, "step": 5863000 }, { "epoch": 29.05, "learning_rate": 3.548029904430671e-05, "loss": 2.2241, "step": 5863500 }, { "epoch": 29.05, "learning_rate": 3.547906045788063e-05, "loss": 2.2167, "step": 5864000 }, { "epoch": 29.05, "learning_rate": 3.5477821871454545e-05, "loss": 2.2205, "step": 5864500 }, { "epoch": 29.06, "learning_rate": 3.547658328502846e-05, "loss": 2.2322, "step": 5865000 }, { "epoch": 29.06, "learning_rate": 3.547534469860238e-05, "loss": 2.2218, "step": 5865500 }, { "epoch": 29.06, "learning_rate": 3.5474106112176296e-05, "loss": 2.2323, "step": 5866000 }, { "epoch": 29.06, "learning_rate": 3.547286752575021e-05, "loss": 2.2263, "step": 5866500 }, { "epoch": 29.07, "learning_rate": 3.547162893932413e-05, "loss": 2.1892, "step": 5867000 }, { "epoch": 29.07, "learning_rate": 3.547039035289805e-05, "loss": 2.2291, "step": 5867500 }, { "epoch": 29.07, "learning_rate": 3.5469151766471964e-05, "loss": 2.2354, "step": 5868000 }, { "epoch": 29.07, "learning_rate": 3.546791318004588e-05, "loss": 2.2302, "step": 5868500 }, { "epoch": 29.08, "learning_rate": 3.54666745936198e-05, "loss": 2.2066, "step": 5869000 }, { "epoch": 29.08, "learning_rate": 3.5465436007193715e-05, "loss": 2.219, "step": 5869500 }, { "epoch": 29.08, "learning_rate": 3.5464197420767625e-05, "loss": 2.2361, "step": 5870000 }, { "epoch": 29.08, "learning_rate": 3.54629613115144e-05, "loss": 2.2123, "step": 5870500 }, { "epoch": 29.09, "learning_rate": 3.546172272508832e-05, "loss": 2.2175, "step": 5871000 }, { "epoch": 29.09, "learning_rate": 3.546048661583508e-05, "loss": 2.2331, "step": 5871500 }, { "epoch": 29.09, "learning_rate": 3.545925050658185e-05, "loss": 2.2405, "step": 5872000 }, { "epoch": 29.09, "learning_rate": 3.5458011920155765e-05, "loss": 2.2258, "step": 5872500 }, { "epoch": 29.1, "learning_rate": 3.5456775810902534e-05, "loss": 2.2055, "step": 5873000 }, { "epoch": 29.1, "learning_rate": 3.545553722447645e-05, "loss": 2.2264, "step": 5873500 }, { "epoch": 29.1, "learning_rate": 3.545429863805037e-05, "loss": 2.2461, "step": 5874000 }, { "epoch": 29.1, "learning_rate": 3.5453060051624285e-05, "loss": 2.2322, "step": 5874500 }, { "epoch": 29.11, "learning_rate": 3.54518214651982e-05, "loss": 2.2506, "step": 5875000 }, { "epoch": 29.11, "learning_rate": 3.545058287877211e-05, "loss": 2.2381, "step": 5875500 }, { "epoch": 29.11, "learning_rate": 3.544934429234603e-05, "loss": 2.2179, "step": 5876000 }, { "epoch": 29.11, "learning_rate": 3.5448105705919946e-05, "loss": 2.2353, "step": 5876500 }, { "epoch": 29.12, "learning_rate": 3.544686711949386e-05, "loss": 2.2316, "step": 5877000 }, { "epoch": 29.12, "learning_rate": 3.544563101024063e-05, "loss": 2.2306, "step": 5877500 }, { "epoch": 29.12, "learning_rate": 3.544439242381455e-05, "loss": 2.2229, "step": 5878000 }, { "epoch": 29.12, "learning_rate": 3.5443153837388465e-05, "loss": 2.2512, "step": 5878500 }, { "epoch": 29.13, "learning_rate": 3.544191525096238e-05, "loss": 2.2306, "step": 5879000 }, { "epoch": 29.13, "learning_rate": 3.54406766645363e-05, "loss": 2.2534, "step": 5879500 }, { "epoch": 29.13, "learning_rate": 3.543944055528307e-05, "loss": 2.2212, "step": 5880000 }, { "epoch": 29.13, "learning_rate": 3.5438201968856985e-05, "loss": 2.2609, "step": 5880500 }, { "epoch": 29.14, "learning_rate": 3.54369633824309e-05, "loss": 2.2195, "step": 5881000 }, { "epoch": 29.14, "learning_rate": 3.543572727317767e-05, "loss": 2.2432, "step": 5881500 }, { "epoch": 29.14, "learning_rate": 3.543448868675159e-05, "loss": 2.2379, "step": 5882000 }, { "epoch": 29.14, "learning_rate": 3.5433250100325504e-05, "loss": 2.2313, "step": 5882500 }, { "epoch": 29.15, "learning_rate": 3.543201151389942e-05, "loss": 2.2383, "step": 5883000 }, { "epoch": 29.15, "learning_rate": 3.543077292747333e-05, "loss": 2.2507, "step": 5883500 }, { "epoch": 29.15, "learning_rate": 3.542953434104725e-05, "loss": 2.2331, "step": 5884000 }, { "epoch": 29.15, "learning_rate": 3.5428295754621165e-05, "loss": 2.2345, "step": 5884500 }, { "epoch": 29.16, "learning_rate": 3.542705964536794e-05, "loss": 2.2113, "step": 5885000 }, { "epoch": 29.16, "learning_rate": 3.542582105894186e-05, "loss": 2.2003, "step": 5885500 }, { "epoch": 29.16, "learning_rate": 3.5424582472515775e-05, "loss": 2.2238, "step": 5886000 }, { "epoch": 29.16, "learning_rate": 3.5423343886089685e-05, "loss": 2.2471, "step": 5886500 }, { "epoch": 29.17, "learning_rate": 3.54221052996636e-05, "loss": 2.1966, "step": 5887000 }, { "epoch": 29.17, "learning_rate": 3.542086671323752e-05, "loss": 2.2397, "step": 5887500 }, { "epoch": 29.17, "learning_rate": 3.5419628126811436e-05, "loss": 2.2606, "step": 5888000 }, { "epoch": 29.17, "learning_rate": 3.541838954038535e-05, "loss": 2.2447, "step": 5888500 }, { "epoch": 29.18, "learning_rate": 3.541715095395926e-05, "loss": 2.2425, "step": 5889000 }, { "epoch": 29.18, "learning_rate": 3.541591236753318e-05, "loss": 2.226, "step": 5889500 }, { "epoch": 29.18, "learning_rate": 3.54146737811071e-05, "loss": 2.2204, "step": 5890000 }, { "epoch": 29.18, "learning_rate": 3.5413437671853866e-05, "loss": 2.2296, "step": 5890500 }, { "epoch": 29.19, "learning_rate": 3.541220156260064e-05, "loss": 2.2481, "step": 5891000 }, { "epoch": 29.19, "learning_rate": 3.541096297617456e-05, "loss": 2.2228, "step": 5891500 }, { "epoch": 29.19, "learning_rate": 3.5409724389748475e-05, "loss": 2.2576, "step": 5892000 }, { "epoch": 29.19, "learning_rate": 3.5408485803322385e-05, "loss": 2.2459, "step": 5892500 }, { "epoch": 29.2, "learning_rate": 3.54072472168963e-05, "loss": 2.2061, "step": 5893000 }, { "epoch": 29.2, "learning_rate": 3.540600863047022e-05, "loss": 2.2236, "step": 5893500 }, { "epoch": 29.2, "learning_rate": 3.5404770044044136e-05, "loss": 2.2591, "step": 5894000 }, { "epoch": 29.2, "learning_rate": 3.540353145761805e-05, "loss": 2.2513, "step": 5894500 }, { "epoch": 29.21, "learning_rate": 3.540229534836482e-05, "loss": 2.2299, "step": 5895000 }, { "epoch": 29.21, "learning_rate": 3.540105676193874e-05, "loss": 2.2314, "step": 5895500 }, { "epoch": 29.21, "learning_rate": 3.539981817551265e-05, "loss": 2.2248, "step": 5896000 }, { "epoch": 29.21, "learning_rate": 3.5398579589086566e-05, "loss": 2.2565, "step": 5896500 }, { "epoch": 29.22, "learning_rate": 3.539734100266048e-05, "loss": 2.207, "step": 5897000 }, { "epoch": 29.22, "learning_rate": 3.53961024162344e-05, "loss": 2.2213, "step": 5897500 }, { "epoch": 29.22, "learning_rate": 3.5394863829808316e-05, "loss": 2.2324, "step": 5898000 }, { "epoch": 29.22, "learning_rate": 3.539362524338223e-05, "loss": 2.2222, "step": 5898500 }, { "epoch": 29.23, "learning_rate": 3.5392389134129e-05, "loss": 2.2263, "step": 5899000 }, { "epoch": 29.23, "learning_rate": 3.539115054770292e-05, "loss": 2.2123, "step": 5899500 }, { "epoch": 29.23, "learning_rate": 3.5389911961276836e-05, "loss": 2.2744, "step": 5900000 }, { "epoch": 29.23, "learning_rate": 3.538867337485075e-05, "loss": 2.231, "step": 5900500 }, { "epoch": 29.24, "learning_rate": 3.538743478842467e-05, "loss": 2.2395, "step": 5901000 }, { "epoch": 29.24, "learning_rate": 3.538619867917144e-05, "loss": 2.2213, "step": 5901500 }, { "epoch": 29.24, "learning_rate": 3.538496256991821e-05, "loss": 2.2558, "step": 5902000 }, { "epoch": 29.24, "learning_rate": 3.5383723983492124e-05, "loss": 2.234, "step": 5902500 }, { "epoch": 29.25, "learning_rate": 3.538248539706604e-05, "loss": 2.2555, "step": 5903000 }, { "epoch": 29.25, "learning_rate": 3.538124681063996e-05, "loss": 2.245, "step": 5903500 }, { "epoch": 29.25, "learning_rate": 3.5380008224213875e-05, "loss": 2.239, "step": 5904000 }, { "epoch": 29.25, "learning_rate": 3.537876963778779e-05, "loss": 2.2479, "step": 5904500 }, { "epoch": 29.26, "learning_rate": 3.53775310513617e-05, "loss": 2.2461, "step": 5905000 }, { "epoch": 29.26, "learning_rate": 3.537629494210847e-05, "loss": 2.2416, "step": 5905500 }, { "epoch": 29.26, "learning_rate": 3.537505635568239e-05, "loss": 2.2077, "step": 5906000 }, { "epoch": 29.26, "learning_rate": 3.5373817769256305e-05, "loss": 2.2354, "step": 5906500 }, { "epoch": 29.27, "learning_rate": 3.537257918283022e-05, "loss": 2.2289, "step": 5907000 }, { "epoch": 29.27, "learning_rate": 3.537134059640414e-05, "loss": 2.2317, "step": 5907500 }, { "epoch": 29.27, "learning_rate": 3.5370102009978056e-05, "loss": 2.2199, "step": 5908000 }, { "epoch": 29.27, "learning_rate": 3.5368863423551966e-05, "loss": 2.2475, "step": 5908500 }, { "epoch": 29.28, "learning_rate": 3.536762483712588e-05, "loss": 2.2429, "step": 5909000 }, { "epoch": 29.28, "learning_rate": 3.53663862506998e-05, "loss": 2.2527, "step": 5909500 }, { "epoch": 29.28, "learning_rate": 3.5365150141446575e-05, "loss": 2.2502, "step": 5910000 }, { "epoch": 29.28, "learning_rate": 3.536391403219334e-05, "loss": 2.2442, "step": 5910500 }, { "epoch": 29.29, "learning_rate": 3.5362675445767254e-05, "loss": 2.2257, "step": 5911000 }, { "epoch": 29.29, "learning_rate": 3.536143685934117e-05, "loss": 2.2407, "step": 5911500 }, { "epoch": 29.29, "learning_rate": 3.536019827291509e-05, "loss": 2.238, "step": 5912000 }, { "epoch": 29.29, "learning_rate": 3.5358959686489005e-05, "loss": 2.2623, "step": 5912500 }, { "epoch": 29.3, "learning_rate": 3.535772110006292e-05, "loss": 2.228, "step": 5913000 }, { "epoch": 29.3, "learning_rate": 3.535648251363684e-05, "loss": 2.2201, "step": 5913500 }, { "epoch": 29.3, "learning_rate": 3.535524640438361e-05, "loss": 2.2372, "step": 5914000 }, { "epoch": 29.3, "learning_rate": 3.5354007817957525e-05, "loss": 2.2437, "step": 5914500 }, { "epoch": 29.3, "learning_rate": 3.535276923153144e-05, "loss": 2.2395, "step": 5915000 }, { "epoch": 29.31, "learning_rate": 3.535153312227821e-05, "loss": 2.223, "step": 5915500 }, { "epoch": 29.31, "learning_rate": 3.535029453585213e-05, "loss": 2.2415, "step": 5916000 }, { "epoch": 29.31, "learning_rate": 3.5349055949426044e-05, "loss": 2.2512, "step": 5916500 }, { "epoch": 29.31, "learning_rate": 3.5347817362999954e-05, "loss": 2.2071, "step": 5917000 }, { "epoch": 29.32, "learning_rate": 3.534657877657387e-05, "loss": 2.2317, "step": 5917500 }, { "epoch": 29.32, "learning_rate": 3.534534019014779e-05, "loss": 2.2262, "step": 5918000 }, { "epoch": 29.32, "learning_rate": 3.5344101603721705e-05, "loss": 2.2189, "step": 5918500 }, { "epoch": 29.32, "learning_rate": 3.534286301729562e-05, "loss": 2.2357, "step": 5919000 }, { "epoch": 29.33, "learning_rate": 3.534162690804239e-05, "loss": 2.245, "step": 5919500 }, { "epoch": 29.33, "learning_rate": 3.534038832161631e-05, "loss": 2.2388, "step": 5920000 }, { "epoch": 29.33, "learning_rate": 3.5339149735190225e-05, "loss": 2.2373, "step": 5920500 }, { "epoch": 29.33, "learning_rate": 3.533791114876414e-05, "loss": 2.2406, "step": 5921000 }, { "epoch": 29.34, "learning_rate": 3.533667256233806e-05, "loss": 2.2509, "step": 5921500 }, { "epoch": 29.34, "learning_rate": 3.5335433975911976e-05, "loss": 2.2308, "step": 5922000 }, { "epoch": 29.34, "learning_rate": 3.533419538948589e-05, "loss": 2.2571, "step": 5922500 }, { "epoch": 29.34, "learning_rate": 3.533295680305981e-05, "loss": 2.2244, "step": 5923000 }, { "epoch": 29.35, "learning_rate": 3.5331718216633726e-05, "loss": 2.2334, "step": 5923500 }, { "epoch": 29.35, "learning_rate": 3.5330479630207637e-05, "loss": 2.2752, "step": 5924000 }, { "epoch": 29.35, "learning_rate": 3.5329241043781553e-05, "loss": 2.2395, "step": 5924500 }, { "epoch": 29.35, "learning_rate": 3.532800245735547e-05, "loss": 2.2595, "step": 5925000 }, { "epoch": 29.36, "learning_rate": 3.532676387092939e-05, "loss": 2.2495, "step": 5925500 }, { "epoch": 29.36, "learning_rate": 3.5325525284503304e-05, "loss": 2.2548, "step": 5926000 }, { "epoch": 29.36, "learning_rate": 3.532428669807722e-05, "loss": 2.2415, "step": 5926500 }, { "epoch": 29.36, "learning_rate": 3.532304811165114e-05, "loss": 2.2515, "step": 5927000 }, { "epoch": 29.37, "learning_rate": 3.532181447957076e-05, "loss": 2.2455, "step": 5927500 }, { "epoch": 29.37, "learning_rate": 3.532057837031753e-05, "loss": 2.2333, "step": 5928000 }, { "epoch": 29.37, "learning_rate": 3.5319339783891445e-05, "loss": 2.2581, "step": 5928500 }, { "epoch": 29.37, "learning_rate": 3.531810119746536e-05, "loss": 2.2516, "step": 5929000 }, { "epoch": 29.38, "learning_rate": 3.5316865088212123e-05, "loss": 2.2583, "step": 5929500 }, { "epoch": 29.38, "learning_rate": 3.53156289789589e-05, "loss": 2.2352, "step": 5930000 }, { "epoch": 29.38, "learning_rate": 3.5314390392532816e-05, "loss": 2.2317, "step": 5930500 }, { "epoch": 29.38, "learning_rate": 3.5313151806106726e-05, "loss": 2.2426, "step": 5931000 }, { "epoch": 29.39, "learning_rate": 3.531191321968064e-05, "loss": 2.2369, "step": 5931500 }, { "epoch": 29.39, "learning_rate": 3.531067463325456e-05, "loss": 2.2255, "step": 5932000 }, { "epoch": 29.39, "learning_rate": 3.530943604682848e-05, "loss": 2.2445, "step": 5932500 }, { "epoch": 29.39, "learning_rate": 3.5308197460402394e-05, "loss": 2.2298, "step": 5933000 }, { "epoch": 29.4, "learning_rate": 3.530695887397631e-05, "loss": 2.2297, "step": 5933500 }, { "epoch": 29.4, "learning_rate": 3.530572028755023e-05, "loss": 2.2726, "step": 5934000 }, { "epoch": 29.4, "learning_rate": 3.5304481701124145e-05, "loss": 2.2399, "step": 5934500 }, { "epoch": 29.4, "learning_rate": 3.530324311469806e-05, "loss": 2.262, "step": 5935000 }, { "epoch": 29.41, "learning_rate": 3.530200452827198e-05, "loss": 2.2255, "step": 5935500 }, { "epoch": 29.41, "learning_rate": 3.5300765941845895e-05, "loss": 2.2357, "step": 5936000 }, { "epoch": 29.41, "learning_rate": 3.529952735541981e-05, "loss": 2.2195, "step": 5936500 }, { "epoch": 29.41, "learning_rate": 3.5298291246166574e-05, "loss": 2.2428, "step": 5937000 }, { "epoch": 29.42, "learning_rate": 3.529705513691334e-05, "loss": 2.2513, "step": 5937500 }, { "epoch": 29.42, "learning_rate": 3.529581655048726e-05, "loss": 2.2211, "step": 5938000 }, { "epoch": 29.42, "learning_rate": 3.529457796406118e-05, "loss": 2.2356, "step": 5938500 }, { "epoch": 29.42, "learning_rate": 3.5293339377635094e-05, "loss": 2.2263, "step": 5939000 }, { "epoch": 29.43, "learning_rate": 3.529210079120901e-05, "loss": 2.2276, "step": 5939500 }, { "epoch": 29.43, "learning_rate": 3.529086220478293e-05, "loss": 2.2462, "step": 5940000 }, { "epoch": 29.43, "learning_rate": 3.5289623618356845e-05, "loss": 2.1902, "step": 5940500 }, { "epoch": 29.43, "learning_rate": 3.528838503193076e-05, "loss": 2.26, "step": 5941000 }, { "epoch": 29.44, "learning_rate": 3.528714644550468e-05, "loss": 2.2554, "step": 5941500 }, { "epoch": 29.44, "learning_rate": 3.5285907859078596e-05, "loss": 2.2377, "step": 5942000 }, { "epoch": 29.44, "learning_rate": 3.528466927265251e-05, "loss": 2.2355, "step": 5942500 }, { "epoch": 29.44, "learning_rate": 3.528343068622643e-05, "loss": 2.2532, "step": 5943000 }, { "epoch": 29.45, "learning_rate": 3.528219705414604e-05, "loss": 2.2435, "step": 5943500 }, { "epoch": 29.45, "learning_rate": 3.528096094489282e-05, "loss": 2.2458, "step": 5944000 }, { "epoch": 29.45, "learning_rate": 3.527972235846673e-05, "loss": 2.2236, "step": 5944500 }, { "epoch": 29.45, "learning_rate": 3.5278483772040646e-05, "loss": 2.2477, "step": 5945000 }, { "epoch": 29.46, "learning_rate": 3.527724518561456e-05, "loss": 2.2348, "step": 5945500 }, { "epoch": 29.46, "learning_rate": 3.527600659918848e-05, "loss": 2.2568, "step": 5946000 }, { "epoch": 29.46, "learning_rate": 3.52747680127624e-05, "loss": 2.2553, "step": 5946500 }, { "epoch": 29.46, "learning_rate": 3.527352942633631e-05, "loss": 2.246, "step": 5947000 }, { "epoch": 29.47, "learning_rate": 3.5272290839910224e-05, "loss": 2.258, "step": 5947500 }, { "epoch": 29.47, "learning_rate": 3.527105225348414e-05, "loss": 2.2518, "step": 5948000 }, { "epoch": 29.47, "learning_rate": 3.5269816144230916e-05, "loss": 2.2239, "step": 5948500 }, { "epoch": 29.47, "learning_rate": 3.526857755780483e-05, "loss": 2.2415, "step": 5949000 }, { "epoch": 29.48, "learning_rate": 3.5267338971378743e-05, "loss": 2.256, "step": 5949500 }, { "epoch": 29.48, "learning_rate": 3.526610038495266e-05, "loss": 2.2548, "step": 5950000 }, { "epoch": 29.48, "learning_rate": 3.5264864275699436e-05, "loss": 2.2458, "step": 5950500 }, { "epoch": 29.48, "learning_rate": 3.526362568927335e-05, "loss": 2.2636, "step": 5951000 }, { "epoch": 29.49, "learning_rate": 3.526238710284726e-05, "loss": 2.221, "step": 5951500 }, { "epoch": 29.49, "learning_rate": 3.526114851642118e-05, "loss": 2.2384, "step": 5952000 }, { "epoch": 29.49, "learning_rate": 3.525991240716795e-05, "loss": 2.2514, "step": 5952500 }, { "epoch": 29.49, "learning_rate": 3.5258673820741866e-05, "loss": 2.2481, "step": 5953000 }, { "epoch": 29.5, "learning_rate": 3.525743523431578e-05, "loss": 2.205, "step": 5953500 }, { "epoch": 29.5, "learning_rate": 3.52561966478897e-05, "loss": 2.2412, "step": 5954000 }, { "epoch": 29.5, "learning_rate": 3.5254958061463617e-05, "loss": 2.2481, "step": 5954500 }, { "epoch": 29.5, "learning_rate": 3.5253719475037533e-05, "loss": 2.239, "step": 5955000 }, { "epoch": 29.51, "learning_rate": 3.52524833657843e-05, "loss": 2.2468, "step": 5955500 }, { "epoch": 29.51, "learning_rate": 3.525124477935822e-05, "loss": 2.2548, "step": 5956000 }, { "epoch": 29.51, "learning_rate": 3.5250006192932136e-05, "loss": 2.2493, "step": 5956500 }, { "epoch": 29.51, "learning_rate": 3.524876760650605e-05, "loss": 2.2602, "step": 5957000 }, { "epoch": 29.52, "learning_rate": 3.5247531497252815e-05, "loss": 2.255, "step": 5957500 }, { "epoch": 29.52, "learning_rate": 3.524629291082673e-05, "loss": 2.2757, "step": 5958000 }, { "epoch": 29.52, "learning_rate": 3.524505432440065e-05, "loss": 2.2548, "step": 5958500 }, { "epoch": 29.52, "learning_rate": 3.5243815737974566e-05, "loss": 2.2651, "step": 5959000 }, { "epoch": 29.53, "learning_rate": 3.524257715154848e-05, "loss": 2.2414, "step": 5959500 }, { "epoch": 29.53, "learning_rate": 3.52413385651224e-05, "loss": 2.2512, "step": 5960000 }, { "epoch": 29.53, "learning_rate": 3.524009997869632e-05, "loss": 2.2631, "step": 5960500 }, { "epoch": 29.53, "learning_rate": 3.5238861392270234e-05, "loss": 2.254, "step": 5961000 }, { "epoch": 29.54, "learning_rate": 3.523762280584415e-05, "loss": 2.2334, "step": 5961500 }, { "epoch": 29.54, "learning_rate": 3.523638421941806e-05, "loss": 2.2553, "step": 5962000 }, { "epoch": 29.54, "learning_rate": 3.5235148110164836e-05, "loss": 2.2297, "step": 5962500 }, { "epoch": 29.54, "learning_rate": 3.523390952373875e-05, "loss": 2.2426, "step": 5963000 }, { "epoch": 29.55, "learning_rate": 3.5232673414485515e-05, "loss": 2.2523, "step": 5963500 }, { "epoch": 29.55, "learning_rate": 3.523143482805943e-05, "loss": 2.2493, "step": 5964000 }, { "epoch": 29.55, "learning_rate": 3.523019624163335e-05, "loss": 2.2559, "step": 5964500 }, { "epoch": 29.55, "learning_rate": 3.5228957655207266e-05, "loss": 2.2758, "step": 5965000 }, { "epoch": 29.56, "learning_rate": 3.522771906878118e-05, "loss": 2.2565, "step": 5965500 }, { "epoch": 29.56, "learning_rate": 3.52264804823551e-05, "loss": 2.2483, "step": 5966000 }, { "epoch": 29.56, "learning_rate": 3.522524189592902e-05, "loss": 2.2588, "step": 5966500 }, { "epoch": 29.56, "learning_rate": 3.5224003309502934e-05, "loss": 2.2445, "step": 5967000 }, { "epoch": 29.57, "learning_rate": 3.52227672002497e-05, "loss": 2.274, "step": 5967500 }, { "epoch": 29.57, "learning_rate": 3.522152861382362e-05, "loss": 2.2289, "step": 5968000 }, { "epoch": 29.57, "learning_rate": 3.5220290027397536e-05, "loss": 2.2546, "step": 5968500 }, { "epoch": 29.57, "learning_rate": 3.52190539181443e-05, "loss": 2.2503, "step": 5969000 }, { "epoch": 29.57, "learning_rate": 3.5217815331718215e-05, "loss": 2.2506, "step": 5969500 }, { "epoch": 29.58, "learning_rate": 3.521657674529213e-05, "loss": 2.251, "step": 5970000 }, { "epoch": 29.58, "learning_rate": 3.52153406360389e-05, "loss": 2.2336, "step": 5970500 }, { "epoch": 29.58, "learning_rate": 3.521410204961282e-05, "loss": 2.2587, "step": 5971000 }, { "epoch": 29.58, "learning_rate": 3.5212863463186735e-05, "loss": 2.2595, "step": 5971500 }, { "epoch": 29.59, "learning_rate": 3.5211627353933504e-05, "loss": 2.2517, "step": 5972000 }, { "epoch": 29.59, "learning_rate": 3.521038876750742e-05, "loss": 2.258, "step": 5972500 }, { "epoch": 29.59, "learning_rate": 3.520915018108134e-05, "loss": 2.251, "step": 5973000 }, { "epoch": 29.59, "learning_rate": 3.5207911594655255e-05, "loss": 2.2584, "step": 5973500 }, { "epoch": 29.6, "learning_rate": 3.520667300822917e-05, "loss": 2.2444, "step": 5974000 }, { "epoch": 29.6, "learning_rate": 3.520543442180309e-05, "loss": 2.2307, "step": 5974500 }, { "epoch": 29.6, "learning_rate": 3.5204195835377e-05, "loss": 2.2548, "step": 5975000 }, { "epoch": 29.6, "learning_rate": 3.5202957248950915e-05, "loss": 2.2333, "step": 5975500 }, { "epoch": 29.61, "learning_rate": 3.520171866252483e-05, "loss": 2.2488, "step": 5976000 }, { "epoch": 29.61, "learning_rate": 3.520048007609875e-05, "loss": 2.2246, "step": 5976500 }, { "epoch": 29.61, "learning_rate": 3.5199241489672666e-05, "loss": 2.2331, "step": 5977000 }, { "epoch": 29.61, "learning_rate": 3.519800290324658e-05, "loss": 2.2524, "step": 5977500 }, { "epoch": 29.62, "learning_rate": 3.51967643168205e-05, "loss": 2.2479, "step": 5978000 }, { "epoch": 29.62, "learning_rate": 3.519552573039442e-05, "loss": 2.2346, "step": 5978500 }, { "epoch": 29.62, "learning_rate": 3.5194287143968334e-05, "loss": 2.2478, "step": 5979000 }, { "epoch": 29.62, "learning_rate": 3.519304855754225e-05, "loss": 2.2465, "step": 5979500 }, { "epoch": 29.63, "learning_rate": 3.519180997111617e-05, "loss": 2.2337, "step": 5980000 }, { "epoch": 29.63, "learning_rate": 3.5190571384690085e-05, "loss": 2.2392, "step": 5980500 }, { "epoch": 29.63, "learning_rate": 3.5189335275436854e-05, "loss": 2.2563, "step": 5981000 }, { "epoch": 29.63, "learning_rate": 3.518809668901077e-05, "loss": 2.2376, "step": 5981500 }, { "epoch": 29.64, "learning_rate": 3.5186863056930384e-05, "loss": 2.2474, "step": 5982000 }, { "epoch": 29.64, "learning_rate": 3.51856244705043e-05, "loss": 2.2415, "step": 5982500 }, { "epoch": 29.64, "learning_rate": 3.518438588407822e-05, "loss": 2.2424, "step": 5983000 }, { "epoch": 29.64, "learning_rate": 3.5183147297652135e-05, "loss": 2.2372, "step": 5983500 }, { "epoch": 29.65, "learning_rate": 3.518190871122605e-05, "loss": 2.2606, "step": 5984000 }, { "epoch": 29.65, "learning_rate": 3.518067260197282e-05, "loss": 2.2476, "step": 5984500 }, { "epoch": 29.65, "learning_rate": 3.517943401554674e-05, "loss": 2.2595, "step": 5985000 }, { "epoch": 29.65, "learning_rate": 3.5178195429120655e-05, "loss": 2.2325, "step": 5985500 }, { "epoch": 29.66, "learning_rate": 3.517695684269457e-05, "loss": 2.255, "step": 5986000 }, { "epoch": 29.66, "learning_rate": 3.517571825626849e-05, "loss": 2.2224, "step": 5986500 }, { "epoch": 29.66, "learning_rate": 3.5174479669842406e-05, "loss": 2.2385, "step": 5987000 }, { "epoch": 29.66, "learning_rate": 3.517324108341632e-05, "loss": 2.2482, "step": 5987500 }, { "epoch": 29.67, "learning_rate": 3.517200249699024e-05, "loss": 2.2485, "step": 5988000 }, { "epoch": 29.67, "learning_rate": 3.517076391056415e-05, "loss": 2.2526, "step": 5988500 }, { "epoch": 29.67, "learning_rate": 3.5169525324138066e-05, "loss": 2.2193, "step": 5989000 }, { "epoch": 29.67, "learning_rate": 3.5168286737711983e-05, "loss": 2.2374, "step": 5989500 }, { "epoch": 29.68, "learning_rate": 3.51670481512859e-05, "loss": 2.2634, "step": 5990000 }, { "epoch": 29.68, "learning_rate": 3.516580956485982e-05, "loss": 2.24, "step": 5990500 }, { "epoch": 29.68, "learning_rate": 3.5164570978433734e-05, "loss": 2.2492, "step": 5991000 }, { "epoch": 29.68, "learning_rate": 3.516333239200765e-05, "loss": 2.265, "step": 5991500 }, { "epoch": 29.69, "learning_rate": 3.516209380558157e-05, "loss": 2.2368, "step": 5992000 }, { "epoch": 29.69, "learning_rate": 3.5160855219155485e-05, "loss": 2.2397, "step": 5992500 }, { "epoch": 29.69, "learning_rate": 3.51596166327294e-05, "loss": 2.2589, "step": 5993000 }, { "epoch": 29.69, "learning_rate": 3.515837804630331e-05, "loss": 2.246, "step": 5993500 }, { "epoch": 29.7, "learning_rate": 3.515714193705009e-05, "loss": 2.2362, "step": 5994000 }, { "epoch": 29.7, "learning_rate": 3.5155905827796856e-05, "loss": 2.2473, "step": 5994500 }, { "epoch": 29.7, "learning_rate": 3.515466724137077e-05, "loss": 2.2488, "step": 5995000 }, { "epoch": 29.7, "learning_rate": 3.5153428654944684e-05, "loss": 2.2277, "step": 5995500 }, { "epoch": 29.71, "learning_rate": 3.51521900685186e-05, "loss": 2.2283, "step": 5996000 }, { "epoch": 29.71, "learning_rate": 3.515095148209252e-05, "loss": 2.245, "step": 5996500 }, { "epoch": 29.71, "learning_rate": 3.5149715372839286e-05, "loss": 2.2349, "step": 5997000 }, { "epoch": 29.71, "learning_rate": 3.51484767864132e-05, "loss": 2.2465, "step": 5997500 }, { "epoch": 29.72, "learning_rate": 3.514723819998712e-05, "loss": 2.2382, "step": 5998000 }, { "epoch": 29.72, "learning_rate": 3.514599961356104e-05, "loss": 2.2591, "step": 5998500 }, { "epoch": 29.72, "learning_rate": 3.5144761027134954e-05, "loss": 2.2273, "step": 5999000 }, { "epoch": 29.72, "learning_rate": 3.514352244070887e-05, "loss": 2.2438, "step": 5999500 }, { "epoch": 29.73, "learning_rate": 3.514228385428279e-05, "loss": 2.2216, "step": 6000000 }, { "epoch": 29.73, "learning_rate": 3.5141045267856705e-05, "loss": 2.2397, "step": 6000500 }, { "epoch": 29.73, "learning_rate": 3.513980668143062e-05, "loss": 2.2442, "step": 6001000 }, { "epoch": 29.73, "learning_rate": 3.513856809500454e-05, "loss": 2.2634, "step": 6001500 }, { "epoch": 29.74, "learning_rate": 3.5137329508578455e-05, "loss": 2.2353, "step": 6002000 }, { "epoch": 29.74, "learning_rate": 3.5136090922152366e-05, "loss": 2.2581, "step": 6002500 }, { "epoch": 29.74, "learning_rate": 3.513485233572628e-05, "loss": 2.2491, "step": 6003000 }, { "epoch": 29.74, "learning_rate": 3.51336137493002e-05, "loss": 2.2587, "step": 6003500 }, { "epoch": 29.75, "learning_rate": 3.513238011721982e-05, "loss": 2.2485, "step": 6004000 }, { "epoch": 29.75, "learning_rate": 3.513114153079374e-05, "loss": 2.2202, "step": 6004500 }, { "epoch": 29.75, "learning_rate": 3.5129902944367654e-05, "loss": 2.2374, "step": 6005000 }, { "epoch": 29.75, "learning_rate": 3.512866435794157e-05, "loss": 2.2464, "step": 6005500 }, { "epoch": 29.76, "learning_rate": 3.512742824868834e-05, "loss": 2.2466, "step": 6006000 }, { "epoch": 29.76, "learning_rate": 3.512618966226226e-05, "loss": 2.2135, "step": 6006500 }, { "epoch": 29.76, "learning_rate": 3.5124951075836174e-05, "loss": 2.2327, "step": 6007000 }, { "epoch": 29.76, "learning_rate": 3.512371248941009e-05, "loss": 2.2538, "step": 6007500 }, { "epoch": 29.77, "learning_rate": 3.512247390298401e-05, "loss": 2.241, "step": 6008000 }, { "epoch": 29.77, "learning_rate": 3.512123779373077e-05, "loss": 2.234, "step": 6008500 }, { "epoch": 29.77, "learning_rate": 3.5119999207304686e-05, "loss": 2.2631, "step": 6009000 }, { "epoch": 29.77, "learning_rate": 3.51187606208786e-05, "loss": 2.2543, "step": 6009500 }, { "epoch": 29.78, "learning_rate": 3.511752203445252e-05, "loss": 2.2295, "step": 6010000 }, { "epoch": 29.78, "learning_rate": 3.511628344802644e-05, "loss": 2.2397, "step": 6010500 }, { "epoch": 29.78, "learning_rate": 3.5115044861600354e-05, "loss": 2.2528, "step": 6011000 }, { "epoch": 29.78, "learning_rate": 3.511380627517427e-05, "loss": 2.2429, "step": 6011500 }, { "epoch": 29.79, "learning_rate": 3.511256768874819e-05, "loss": 2.2785, "step": 6012000 }, { "epoch": 29.79, "learning_rate": 3.5111329102322105e-05, "loss": 2.2572, "step": 6012500 }, { "epoch": 29.79, "learning_rate": 3.511009051589602e-05, "loss": 2.2329, "step": 6013000 }, { "epoch": 29.79, "learning_rate": 3.510885192946994e-05, "loss": 2.2503, "step": 6013500 }, { "epoch": 29.8, "learning_rate": 3.5107613343043856e-05, "loss": 2.2507, "step": 6014000 }, { "epoch": 29.8, "learning_rate": 3.510637475661777e-05, "loss": 2.2361, "step": 6014500 }, { "epoch": 29.8, "learning_rate": 3.5105141124537387e-05, "loss": 2.2559, "step": 6015000 }, { "epoch": 29.8, "learning_rate": 3.5103902538111303e-05, "loss": 2.2426, "step": 6015500 }, { "epoch": 29.81, "learning_rate": 3.510266395168522e-05, "loss": 2.2477, "step": 6016000 }, { "epoch": 29.81, "learning_rate": 3.510142536525914e-05, "loss": 2.235, "step": 6016500 }, { "epoch": 29.81, "learning_rate": 3.5100186778833054e-05, "loss": 2.2402, "step": 6017000 }, { "epoch": 29.81, "learning_rate": 3.509894819240697e-05, "loss": 2.2432, "step": 6017500 }, { "epoch": 29.82, "learning_rate": 3.509770960598089e-05, "loss": 2.2734, "step": 6018000 }, { "epoch": 29.82, "learning_rate": 3.5096471019554805e-05, "loss": 2.2639, "step": 6018500 }, { "epoch": 29.82, "learning_rate": 3.5095234910301574e-05, "loss": 2.2591, "step": 6019000 }, { "epoch": 29.82, "learning_rate": 3.509399632387549e-05, "loss": 2.2149, "step": 6019500 }, { "epoch": 29.83, "learning_rate": 3.509276021462225e-05, "loss": 2.2437, "step": 6020000 }, { "epoch": 29.83, "learning_rate": 3.509152162819617e-05, "loss": 2.2636, "step": 6020500 }, { "epoch": 29.83, "learning_rate": 3.509028304177009e-05, "loss": 2.2386, "step": 6021000 }, { "epoch": 29.83, "learning_rate": 3.5089044455344004e-05, "loss": 2.2542, "step": 6021500 }, { "epoch": 29.84, "learning_rate": 3.508780586891792e-05, "loss": 2.2627, "step": 6022000 }, { "epoch": 29.84, "learning_rate": 3.508656975966469e-05, "loss": 2.2434, "step": 6022500 }, { "epoch": 29.84, "learning_rate": 3.5085331173238606e-05, "loss": 2.2404, "step": 6023000 }, { "epoch": 29.84, "learning_rate": 3.508409258681252e-05, "loss": 2.2144, "step": 6023500 }, { "epoch": 29.84, "learning_rate": 3.508285400038644e-05, "loss": 2.2302, "step": 6024000 }, { "epoch": 29.85, "learning_rate": 3.508161541396036e-05, "loss": 2.2015, "step": 6024500 }, { "epoch": 29.85, "learning_rate": 3.5080379304707126e-05, "loss": 2.2531, "step": 6025000 }, { "epoch": 29.85, "learning_rate": 3.507914071828104e-05, "loss": 2.2415, "step": 6025500 }, { "epoch": 29.85, "learning_rate": 3.507790213185495e-05, "loss": 2.2345, "step": 6026000 }, { "epoch": 29.86, "learning_rate": 3.507666354542887e-05, "loss": 2.246, "step": 6026500 }, { "epoch": 29.86, "learning_rate": 3.507542495900279e-05, "loss": 2.2626, "step": 6027000 }, { "epoch": 29.86, "learning_rate": 3.5074186372576704e-05, "loss": 2.2182, "step": 6027500 }, { "epoch": 29.86, "learning_rate": 3.507295026332348e-05, "loss": 2.2315, "step": 6028000 }, { "epoch": 29.87, "learning_rate": 3.507171167689739e-05, "loss": 2.26, "step": 6028500 }, { "epoch": 29.87, "learning_rate": 3.5070473090471306e-05, "loss": 2.2435, "step": 6029000 }, { "epoch": 29.87, "learning_rate": 3.506923450404522e-05, "loss": 2.2568, "step": 6029500 }, { "epoch": 29.87, "learning_rate": 3.506799591761914e-05, "loss": 2.2584, "step": 6030000 }, { "epoch": 29.88, "learning_rate": 3.506675733119306e-05, "loss": 2.2313, "step": 6030500 }, { "epoch": 29.88, "learning_rate": 3.5065518744766974e-05, "loss": 2.2419, "step": 6031000 }, { "epoch": 29.88, "learning_rate": 3.506428015834089e-05, "loss": 2.2516, "step": 6031500 }, { "epoch": 29.88, "learning_rate": 3.506304157191481e-05, "loss": 2.2528, "step": 6032000 }, { "epoch": 29.89, "learning_rate": 3.506180793983443e-05, "loss": 2.2518, "step": 6032500 }, { "epoch": 29.89, "learning_rate": 3.5060569353408346e-05, "loss": 2.2389, "step": 6033000 }, { "epoch": 29.89, "learning_rate": 3.505933076698226e-05, "loss": 2.2587, "step": 6033500 }, { "epoch": 29.89, "learning_rate": 3.505809218055618e-05, "loss": 2.2524, "step": 6034000 }, { "epoch": 29.9, "learning_rate": 3.505685607130295e-05, "loss": 2.2516, "step": 6034500 }, { "epoch": 29.9, "learning_rate": 3.5055617484876865e-05, "loss": 2.244, "step": 6035000 }, { "epoch": 29.9, "learning_rate": 3.505437889845078e-05, "loss": 2.2784, "step": 6035500 }, { "epoch": 29.9, "learning_rate": 3.50531403120247e-05, "loss": 2.2512, "step": 6036000 }, { "epoch": 29.91, "learning_rate": 3.5051901725598616e-05, "loss": 2.2421, "step": 6036500 }, { "epoch": 29.91, "learning_rate": 3.5050663139172526e-05, "loss": 2.2322, "step": 6037000 }, { "epoch": 29.91, "learning_rate": 3.5049427029919295e-05, "loss": 2.2487, "step": 6037500 }, { "epoch": 29.91, "learning_rate": 3.5048190920666064e-05, "loss": 2.2687, "step": 6038000 }, { "epoch": 29.92, "learning_rate": 3.504695233423998e-05, "loss": 2.2331, "step": 6038500 }, { "epoch": 29.92, "learning_rate": 3.50457137478139e-05, "loss": 2.2588, "step": 6039000 }, { "epoch": 29.92, "learning_rate": 3.5044475161387815e-05, "loss": 2.2409, "step": 6039500 }, { "epoch": 29.92, "learning_rate": 3.504323657496173e-05, "loss": 2.2369, "step": 6040000 }, { "epoch": 29.93, "learning_rate": 3.504199798853565e-05, "loss": 2.2599, "step": 6040500 }, { "epoch": 29.93, "learning_rate": 3.504076683362812e-05, "loss": 2.2484, "step": 6041000 }, { "epoch": 29.93, "learning_rate": 3.503952824720204e-05, "loss": 2.2689, "step": 6041500 }, { "epoch": 29.93, "learning_rate": 3.5038289660775955e-05, "loss": 2.2367, "step": 6042000 }, { "epoch": 29.94, "learning_rate": 3.503705107434987e-05, "loss": 2.2461, "step": 6042500 }, { "epoch": 29.94, "learning_rate": 3.503581248792379e-05, "loss": 2.2407, "step": 6043000 }, { "epoch": 29.94, "learning_rate": 3.5034573901497706e-05, "loss": 2.2564, "step": 6043500 }, { "epoch": 29.94, "learning_rate": 3.503333531507162e-05, "loss": 2.2676, "step": 6044000 }, { "epoch": 29.95, "learning_rate": 3.503209672864553e-05, "loss": 2.2478, "step": 6044500 }, { "epoch": 29.95, "learning_rate": 3.503085814221945e-05, "loss": 2.2507, "step": 6045000 }, { "epoch": 29.95, "learning_rate": 3.5029619555793367e-05, "loss": 2.2605, "step": 6045500 }, { "epoch": 29.95, "learning_rate": 3.5028383446540135e-05, "loss": 2.2413, "step": 6046000 }, { "epoch": 29.96, "learning_rate": 3.502714486011405e-05, "loss": 2.2463, "step": 6046500 }, { "epoch": 29.96, "learning_rate": 3.502590627368797e-05, "loss": 2.2444, "step": 6047000 }, { "epoch": 29.96, "learning_rate": 3.5024667687261886e-05, "loss": 2.2214, "step": 6047500 }, { "epoch": 29.96, "learning_rate": 3.5023429100835796e-05, "loss": 2.2366, "step": 6048000 }, { "epoch": 29.97, "learning_rate": 3.502219051440971e-05, "loss": 2.2388, "step": 6048500 }, { "epoch": 29.97, "learning_rate": 3.502095192798363e-05, "loss": 2.265, "step": 6049000 }, { "epoch": 29.97, "learning_rate": 3.501971334155755e-05, "loss": 2.2427, "step": 6049500 }, { "epoch": 29.97, "learning_rate": 3.5018474755131464e-05, "loss": 2.243, "step": 6050000 }, { "epoch": 29.98, "learning_rate": 3.501723616870538e-05, "loss": 2.2914, "step": 6050500 }, { "epoch": 29.98, "learning_rate": 3.50159975822793e-05, "loss": 2.2452, "step": 6051000 }, { "epoch": 29.98, "learning_rate": 3.5014758995853215e-05, "loss": 2.2627, "step": 6051500 }, { "epoch": 29.98, "learning_rate": 3.501352040942713e-05, "loss": 2.2594, "step": 6052000 }, { "epoch": 29.99, "learning_rate": 3.50122843001739e-05, "loss": 2.2361, "step": 6052500 }, { "epoch": 29.99, "learning_rate": 3.501104571374782e-05, "loss": 2.2669, "step": 6053000 }, { "epoch": 29.99, "learning_rate": 3.500980712732173e-05, "loss": 2.264, "step": 6053500 }, { "epoch": 29.99, "learning_rate": 3.5008568540895645e-05, "loss": 2.244, "step": 6054000 }, { "epoch": 30.0, "learning_rate": 3.500732995446956e-05, "loss": 2.2693, "step": 6054500 }, { "epoch": 30.0, "learning_rate": 3.500609384521633e-05, "loss": 2.2239, "step": 6055000 }, { "epoch": 30.0, "eval_accuracy": 0.6577156623596847, "eval_accuracy_mlm": 0.6136358534754317, "eval_accuracy_nsp": 0.8656725198953558, "eval_loss": 2.330178737640381, "eval_runtime": 146.0024, "eval_samples_per_second": 1746.266, "eval_steps_per_second": 72.766, "step": 6055290 }, { "epoch": 30.0, "learning_rate": 3.500485525879025e-05, "loss": 2.2413, "step": 6055500 }, { "epoch": 30.0, "learning_rate": 3.5003616672364164e-05, "loss": 2.2212, "step": 6056000 }, { "epoch": 30.01, "learning_rate": 3.500237808593808e-05, "loss": 2.2128, "step": 6056500 }, { "epoch": 30.01, "learning_rate": 3.500114197668485e-05, "loss": 2.225, "step": 6057000 }, { "epoch": 30.01, "learning_rate": 3.499990339025877e-05, "loss": 2.2007, "step": 6057500 }, { "epoch": 30.01, "learning_rate": 3.4998664803832684e-05, "loss": 2.2145, "step": 6058000 }, { "epoch": 30.02, "learning_rate": 3.499742869457945e-05, "loss": 2.2361, "step": 6058500 }, { "epoch": 30.02, "learning_rate": 3.499619010815337e-05, "loss": 2.2315, "step": 6059000 }, { "epoch": 30.02, "learning_rate": 3.4994951521727286e-05, "loss": 2.2215, "step": 6059500 }, { "epoch": 30.02, "learning_rate": 3.49937129353012e-05, "loss": 2.2348, "step": 6060000 }, { "epoch": 30.03, "learning_rate": 3.4992474348875113e-05, "loss": 2.198, "step": 6060500 }, { "epoch": 30.03, "learning_rate": 3.499123576244903e-05, "loss": 2.2208, "step": 6061000 }, { "epoch": 30.03, "learning_rate": 3.498999717602295e-05, "loss": 2.2319, "step": 6061500 }, { "epoch": 30.03, "learning_rate": 3.4988758589596864e-05, "loss": 2.1845, "step": 6062000 }, { "epoch": 30.04, "learning_rate": 3.498752000317078e-05, "loss": 2.2193, "step": 6062500 }, { "epoch": 30.04, "learning_rate": 3.49862814167447e-05, "loss": 2.215, "step": 6063000 }, { "epoch": 30.04, "learning_rate": 3.4985042830318615e-05, "loss": 2.2272, "step": 6063500 }, { "epoch": 30.04, "learning_rate": 3.4983806721065384e-05, "loss": 2.2079, "step": 6064000 }, { "epoch": 30.05, "learning_rate": 3.49825681346393e-05, "loss": 2.2012, "step": 6064500 }, { "epoch": 30.05, "learning_rate": 3.498132954821322e-05, "loss": 2.2302, "step": 6065000 }, { "epoch": 30.05, "learning_rate": 3.4980090961787135e-05, "loss": 2.2268, "step": 6065500 }, { "epoch": 30.05, "learning_rate": 3.497885237536105e-05, "loss": 2.2134, "step": 6066000 }, { "epoch": 30.06, "learning_rate": 3.497761378893497e-05, "loss": 2.2258, "step": 6066500 }, { "epoch": 30.06, "learning_rate": 3.497637520250888e-05, "loss": 2.244, "step": 6067000 }, { "epoch": 30.06, "learning_rate": 3.4975136616082796e-05, "loss": 2.2202, "step": 6067500 }, { "epoch": 30.06, "learning_rate": 3.4973900506829564e-05, "loss": 2.2133, "step": 6068000 }, { "epoch": 30.07, "learning_rate": 3.497266192040348e-05, "loss": 2.2548, "step": 6068500 }, { "epoch": 30.07, "learning_rate": 3.49714233339774e-05, "loss": 2.2356, "step": 6069000 }, { "epoch": 30.07, "learning_rate": 3.497018970189702e-05, "loss": 2.2452, "step": 6069500 }, { "epoch": 30.07, "learning_rate": 3.4968951115470936e-05, "loss": 2.2254, "step": 6070000 }, { "epoch": 30.08, "learning_rate": 3.4967715006217705e-05, "loss": 2.2346, "step": 6070500 }, { "epoch": 30.08, "learning_rate": 3.496647641979162e-05, "loss": 2.2092, "step": 6071000 }, { "epoch": 30.08, "learning_rate": 3.496523783336554e-05, "loss": 2.2229, "step": 6071500 }, { "epoch": 30.08, "learning_rate": 3.4963999246939455e-05, "loss": 2.2513, "step": 6072000 }, { "epoch": 30.09, "learning_rate": 3.496276066051337e-05, "loss": 2.2273, "step": 6072500 }, { "epoch": 30.09, "learning_rate": 3.496152207408729e-05, "loss": 2.2222, "step": 6073000 }, { "epoch": 30.09, "learning_rate": 3.4960283487661206e-05, "loss": 2.2396, "step": 6073500 }, { "epoch": 30.09, "learning_rate": 3.495904490123512e-05, "loss": 2.2165, "step": 6074000 }, { "epoch": 30.1, "learning_rate": 3.495780879198189e-05, "loss": 2.1869, "step": 6074500 }, { "epoch": 30.1, "learning_rate": 3.49565702055558e-05, "loss": 2.2352, "step": 6075000 }, { "epoch": 30.1, "learning_rate": 3.495533161912972e-05, "loss": 2.2344, "step": 6075500 }, { "epoch": 30.1, "learning_rate": 3.4954093032703636e-05, "loss": 2.2467, "step": 6076000 }, { "epoch": 30.11, "learning_rate": 3.495285444627755e-05, "loss": 2.2179, "step": 6076500 }, { "epoch": 30.11, "learning_rate": 3.495161585985147e-05, "loss": 2.2305, "step": 6077000 }, { "epoch": 30.11, "learning_rate": 3.495037727342539e-05, "loss": 2.2301, "step": 6077500 }, { "epoch": 30.11, "learning_rate": 3.494914364134501e-05, "loss": 2.2311, "step": 6078000 }, { "epoch": 30.11, "learning_rate": 3.4947905054918924e-05, "loss": 2.2474, "step": 6078500 }, { "epoch": 30.12, "learning_rate": 3.494666646849284e-05, "loss": 2.2372, "step": 6079000 }, { "epoch": 30.12, "learning_rate": 3.494542788206676e-05, "loss": 2.2275, "step": 6079500 }, { "epoch": 30.12, "learning_rate": 3.4944189295640675e-05, "loss": 2.2308, "step": 6080000 }, { "epoch": 30.12, "learning_rate": 3.494295070921459e-05, "loss": 2.2416, "step": 6080500 }, { "epoch": 30.13, "learning_rate": 3.494171212278851e-05, "loss": 2.2317, "step": 6081000 }, { "epoch": 30.13, "learning_rate": 3.494047353636242e-05, "loss": 2.1961, "step": 6081500 }, { "epoch": 30.13, "learning_rate": 3.4939234949936336e-05, "loss": 2.2102, "step": 6082000 }, { "epoch": 30.13, "learning_rate": 3.493799636351025e-05, "loss": 2.2449, "step": 6082500 }, { "epoch": 30.14, "learning_rate": 3.493676025425702e-05, "loss": 2.2392, "step": 6083000 }, { "epoch": 30.14, "learning_rate": 3.493552166783094e-05, "loss": 2.2121, "step": 6083500 }, { "epoch": 30.14, "learning_rate": 3.4934283081404856e-05, "loss": 2.2322, "step": 6084000 }, { "epoch": 30.14, "learning_rate": 3.493304449497877e-05, "loss": 2.2107, "step": 6084500 }, { "epoch": 30.15, "learning_rate": 3.493180590855269e-05, "loss": 2.2081, "step": 6085000 }, { "epoch": 30.15, "learning_rate": 3.4930567322126606e-05, "loss": 2.2319, "step": 6085500 }, { "epoch": 30.15, "learning_rate": 3.4929328735700523e-05, "loss": 2.2277, "step": 6086000 }, { "epoch": 30.15, "learning_rate": 3.492809262644729e-05, "loss": 2.2258, "step": 6086500 }, { "epoch": 30.16, "learning_rate": 3.492685404002121e-05, "loss": 2.224, "step": 6087000 }, { "epoch": 30.16, "learning_rate": 3.4925615453595126e-05, "loss": 2.2234, "step": 6087500 }, { "epoch": 30.16, "learning_rate": 3.492437934434189e-05, "loss": 2.2201, "step": 6088000 }, { "epoch": 30.16, "learning_rate": 3.4923140757915805e-05, "loss": 2.2361, "step": 6088500 }, { "epoch": 30.17, "learning_rate": 3.492190217148972e-05, "loss": 2.2149, "step": 6089000 }, { "epoch": 30.17, "learning_rate": 3.492066358506364e-05, "loss": 2.2179, "step": 6089500 }, { "epoch": 30.17, "learning_rate": 3.4919424998637556e-05, "loss": 2.2521, "step": 6090000 }, { "epoch": 30.17, "learning_rate": 3.491818641221147e-05, "loss": 2.2198, "step": 6090500 }, { "epoch": 30.18, "learning_rate": 3.491694782578539e-05, "loss": 2.2251, "step": 6091000 }, { "epoch": 30.18, "learning_rate": 3.4915709239359307e-05, "loss": 2.2303, "step": 6091500 }, { "epoch": 30.18, "learning_rate": 3.4914470652933224e-05, "loss": 2.2546, "step": 6092000 }, { "epoch": 30.18, "learning_rate": 3.491323206650714e-05, "loss": 2.2206, "step": 6092500 }, { "epoch": 30.19, "learning_rate": 3.491199595725391e-05, "loss": 2.2407, "step": 6093000 }, { "epoch": 30.19, "learning_rate": 3.4910757370827826e-05, "loss": 2.2124, "step": 6093500 }, { "epoch": 30.19, "learning_rate": 3.490951878440174e-05, "loss": 2.2374, "step": 6094000 }, { "epoch": 30.19, "learning_rate": 3.490828019797566e-05, "loss": 2.2252, "step": 6094500 }, { "epoch": 30.2, "learning_rate": 3.490704161154957e-05, "loss": 2.2356, "step": 6095000 }, { "epoch": 30.2, "learning_rate": 3.490580302512349e-05, "loss": 2.2251, "step": 6095500 }, { "epoch": 30.2, "learning_rate": 3.4904564438697404e-05, "loss": 2.2153, "step": 6096000 }, { "epoch": 30.2, "learning_rate": 3.490332585227132e-05, "loss": 2.2476, "step": 6096500 }, { "epoch": 30.21, "learning_rate": 3.490208726584524e-05, "loss": 2.2666, "step": 6097000 }, { "epoch": 30.21, "learning_rate": 3.4900848679419155e-05, "loss": 2.2188, "step": 6097500 }, { "epoch": 30.21, "learning_rate": 3.4899610092993065e-05, "loss": 2.2538, "step": 6098000 }, { "epoch": 30.21, "learning_rate": 3.489837398373984e-05, "loss": 2.2291, "step": 6098500 }, { "epoch": 30.22, "learning_rate": 3.489713539731376e-05, "loss": 2.2363, "step": 6099000 }, { "epoch": 30.22, "learning_rate": 3.4895896810887674e-05, "loss": 2.2546, "step": 6099500 }, { "epoch": 30.22, "learning_rate": 3.489465822446159e-05, "loss": 2.2155, "step": 6100000 }, { "epoch": 30.22, "learning_rate": 3.489341963803551e-05, "loss": 2.2316, "step": 6100500 }, { "epoch": 30.23, "learning_rate": 3.489218105160942e-05, "loss": 2.2429, "step": 6101000 }, { "epoch": 30.23, "learning_rate": 3.4890944942356194e-05, "loss": 2.2284, "step": 6101500 }, { "epoch": 30.23, "learning_rate": 3.4889708833102956e-05, "loss": 2.2208, "step": 6102000 }, { "epoch": 30.23, "learning_rate": 3.488847024667687e-05, "loss": 2.2401, "step": 6102500 }, { "epoch": 30.24, "learning_rate": 3.488723166025079e-05, "loss": 2.2289, "step": 6103000 }, { "epoch": 30.24, "learning_rate": 3.488599307382471e-05, "loss": 2.2329, "step": 6103500 }, { "epoch": 30.24, "learning_rate": 3.4884754487398624e-05, "loss": 2.2288, "step": 6104000 }, { "epoch": 30.24, "learning_rate": 3.488351590097254e-05, "loss": 2.2316, "step": 6104500 }, { "epoch": 30.25, "learning_rate": 3.488227731454646e-05, "loss": 2.2553, "step": 6105000 }, { "epoch": 30.25, "learning_rate": 3.4881038728120375e-05, "loss": 2.21, "step": 6105500 }, { "epoch": 30.25, "learning_rate": 3.487980261886714e-05, "loss": 2.2134, "step": 6106000 }, { "epoch": 30.25, "learning_rate": 3.487856403244106e-05, "loss": 2.2151, "step": 6106500 }, { "epoch": 30.26, "learning_rate": 3.487732544601498e-05, "loss": 2.229, "step": 6107000 }, { "epoch": 30.26, "learning_rate": 3.4876086859588894e-05, "loss": 2.2177, "step": 6107500 }, { "epoch": 30.26, "learning_rate": 3.4874850750335656e-05, "loss": 2.2384, "step": 6108000 }, { "epoch": 30.26, "learning_rate": 3.487361216390957e-05, "loss": 2.2151, "step": 6108500 }, { "epoch": 30.27, "learning_rate": 3.487237357748349e-05, "loss": 2.2436, "step": 6109000 }, { "epoch": 30.27, "learning_rate": 3.487113746823026e-05, "loss": 2.2404, "step": 6109500 }, { "epoch": 30.27, "learning_rate": 3.4869898881804176e-05, "loss": 2.2555, "step": 6110000 }, { "epoch": 30.27, "learning_rate": 3.486866029537809e-05, "loss": 2.2404, "step": 6110500 }, { "epoch": 30.28, "learning_rate": 3.486742170895201e-05, "loss": 2.2173, "step": 6111000 }, { "epoch": 30.28, "learning_rate": 3.486618559969878e-05, "loss": 2.2282, "step": 6111500 }, { "epoch": 30.28, "learning_rate": 3.486494701327269e-05, "loss": 2.2582, "step": 6112000 }, { "epoch": 30.28, "learning_rate": 3.4863708426846606e-05, "loss": 2.2264, "step": 6112500 }, { "epoch": 30.29, "learning_rate": 3.486246984042052e-05, "loss": 2.2294, "step": 6113000 }, { "epoch": 30.29, "learning_rate": 3.486123125399444e-05, "loss": 2.2493, "step": 6113500 }, { "epoch": 30.29, "learning_rate": 3.4859992667568356e-05, "loss": 2.2327, "step": 6114000 }, { "epoch": 30.29, "learning_rate": 3.485875408114227e-05, "loss": 2.2406, "step": 6114500 }, { "epoch": 30.3, "learning_rate": 3.485751549471619e-05, "loss": 2.2053, "step": 6115000 }, { "epoch": 30.3, "learning_rate": 3.485627690829011e-05, "loss": 2.2343, "step": 6115500 }, { "epoch": 30.3, "learning_rate": 3.4855040799036876e-05, "loss": 2.2061, "step": 6116000 }, { "epoch": 30.3, "learning_rate": 3.4853804689783645e-05, "loss": 2.2168, "step": 6116500 }, { "epoch": 30.31, "learning_rate": 3.485256610335756e-05, "loss": 2.2477, "step": 6117000 }, { "epoch": 30.31, "learning_rate": 3.485132751693148e-05, "loss": 2.233, "step": 6117500 }, { "epoch": 30.31, "learning_rate": 3.4850088930505395e-05, "loss": 2.2193, "step": 6118000 }, { "epoch": 30.31, "learning_rate": 3.484885034407931e-05, "loss": 2.224, "step": 6118500 }, { "epoch": 30.32, "learning_rate": 3.484761175765322e-05, "loss": 2.221, "step": 6119000 }, { "epoch": 30.32, "learning_rate": 3.48463756484e-05, "loss": 2.2206, "step": 6119500 }, { "epoch": 30.32, "learning_rate": 3.4845137061973915e-05, "loss": 2.2106, "step": 6120000 }, { "epoch": 30.32, "learning_rate": 3.4843898475547825e-05, "loss": 2.2688, "step": 6120500 }, { "epoch": 30.33, "learning_rate": 3.484265988912174e-05, "loss": 2.224, "step": 6121000 }, { "epoch": 30.33, "learning_rate": 3.484142130269566e-05, "loss": 2.2378, "step": 6121500 }, { "epoch": 30.33, "learning_rate": 3.4840182716269576e-05, "loss": 2.1927, "step": 6122000 }, { "epoch": 30.33, "learning_rate": 3.483894412984349e-05, "loss": 2.2423, "step": 6122500 }, { "epoch": 30.34, "learning_rate": 3.483770554341741e-05, "loss": 2.2092, "step": 6123000 }, { "epoch": 30.34, "learning_rate": 3.483646695699133e-05, "loss": 2.242, "step": 6123500 }, { "epoch": 30.34, "learning_rate": 3.4835228370565244e-05, "loss": 2.217, "step": 6124000 }, { "epoch": 30.34, "learning_rate": 3.483398978413916e-05, "loss": 2.2285, "step": 6124500 }, { "epoch": 30.35, "learning_rate": 3.483275119771308e-05, "loss": 2.2339, "step": 6125000 }, { "epoch": 30.35, "learning_rate": 3.4831512611286995e-05, "loss": 2.235, "step": 6125500 }, { "epoch": 30.35, "learning_rate": 3.483027402486091e-05, "loss": 2.2277, "step": 6126000 }, { "epoch": 30.35, "learning_rate": 3.482903543843483e-05, "loss": 2.2282, "step": 6126500 }, { "epoch": 30.36, "learning_rate": 3.482779932918159e-05, "loss": 2.2184, "step": 6127000 }, { "epoch": 30.36, "learning_rate": 3.482656321992836e-05, "loss": 2.2337, "step": 6127500 }, { "epoch": 30.36, "learning_rate": 3.4825324633502276e-05, "loss": 2.256, "step": 6128000 }, { "epoch": 30.36, "learning_rate": 3.482408852424905e-05, "loss": 2.2668, "step": 6128500 }, { "epoch": 30.37, "learning_rate": 3.482284993782297e-05, "loss": 2.254, "step": 6129000 }, { "epoch": 30.37, "learning_rate": 3.482161382856973e-05, "loss": 2.2228, "step": 6129500 }, { "epoch": 30.37, "learning_rate": 3.482037524214365e-05, "loss": 2.2304, "step": 6130000 }, { "epoch": 30.37, "learning_rate": 3.4819136655717565e-05, "loss": 2.2172, "step": 6130500 }, { "epoch": 30.38, "learning_rate": 3.481789806929148e-05, "loss": 2.2406, "step": 6131000 }, { "epoch": 30.38, "learning_rate": 3.481666196003825e-05, "loss": 2.2339, "step": 6131500 }, { "epoch": 30.38, "learning_rate": 3.481542337361217e-05, "loss": 2.2278, "step": 6132000 }, { "epoch": 30.38, "learning_rate": 3.4814184787186084e-05, "loss": 2.2436, "step": 6132500 }, { "epoch": 30.39, "learning_rate": 3.481294620076e-05, "loss": 2.2639, "step": 6133000 }, { "epoch": 30.39, "learning_rate": 3.481170761433392e-05, "loss": 2.2498, "step": 6133500 }, { "epoch": 30.39, "learning_rate": 3.481047150508068e-05, "loss": 2.2274, "step": 6134000 }, { "epoch": 30.39, "learning_rate": 3.48092329186546e-05, "loss": 2.251, "step": 6134500 }, { "epoch": 30.39, "learning_rate": 3.4807994332228514e-05, "loss": 2.2089, "step": 6135000 }, { "epoch": 30.4, "learning_rate": 3.480675574580243e-05, "loss": 2.2332, "step": 6135500 }, { "epoch": 30.4, "learning_rate": 3.48055196365492e-05, "loss": 2.2195, "step": 6136000 }, { "epoch": 30.4, "learning_rate": 3.4804281050123117e-05, "loss": 2.2192, "step": 6136500 }, { "epoch": 30.4, "learning_rate": 3.4803042463697033e-05, "loss": 2.2242, "step": 6137000 }, { "epoch": 30.41, "learning_rate": 3.480180387727095e-05, "loss": 2.2118, "step": 6137500 }, { "epoch": 30.41, "learning_rate": 3.480056529084487e-05, "loss": 2.2459, "step": 6138000 }, { "epoch": 30.41, "learning_rate": 3.4799326704418784e-05, "loss": 2.2424, "step": 6138500 }, { "epoch": 30.41, "learning_rate": 3.47980881179927e-05, "loss": 2.2348, "step": 6139000 }, { "epoch": 30.42, "learning_rate": 3.479684953156662e-05, "loss": 2.2165, "step": 6139500 }, { "epoch": 30.42, "learning_rate": 3.4795610945140535e-05, "loss": 2.2224, "step": 6140000 }, { "epoch": 30.42, "learning_rate": 3.47943748358873e-05, "loss": 2.2588, "step": 6140500 }, { "epoch": 30.42, "learning_rate": 3.4793138726634066e-05, "loss": 2.2282, "step": 6141000 }, { "epoch": 30.43, "learning_rate": 3.479190014020798e-05, "loss": 2.2549, "step": 6141500 }, { "epoch": 30.43, "learning_rate": 3.47906615537819e-05, "loss": 2.2456, "step": 6142000 }, { "epoch": 30.43, "learning_rate": 3.478942296735582e-05, "loss": 2.2379, "step": 6142500 }, { "epoch": 30.43, "learning_rate": 3.4788184380929734e-05, "loss": 2.2362, "step": 6143000 }, { "epoch": 30.44, "learning_rate": 3.478694579450365e-05, "loss": 2.2371, "step": 6143500 }, { "epoch": 30.44, "learning_rate": 3.478570720807757e-05, "loss": 2.2568, "step": 6144000 }, { "epoch": 30.44, "learning_rate": 3.4784468621651484e-05, "loss": 2.2387, "step": 6144500 }, { "epoch": 30.44, "learning_rate": 3.47832300352254e-05, "loss": 2.2459, "step": 6145000 }, { "epoch": 30.45, "learning_rate": 3.478199144879932e-05, "loss": 2.2056, "step": 6145500 }, { "epoch": 30.45, "learning_rate": 3.4780752862373235e-05, "loss": 2.2531, "step": 6146000 }, { "epoch": 30.45, "learning_rate": 3.477951427594715e-05, "loss": 2.2194, "step": 6146500 }, { "epoch": 30.45, "learning_rate": 3.477827568952107e-05, "loss": 2.2402, "step": 6147000 }, { "epoch": 30.46, "learning_rate": 3.4777037103094986e-05, "loss": 2.2502, "step": 6147500 }, { "epoch": 30.46, "learning_rate": 3.47757985166689e-05, "loss": 2.2541, "step": 6148000 }, { "epoch": 30.46, "learning_rate": 3.477455993024281e-05, "loss": 2.2304, "step": 6148500 }, { "epoch": 30.46, "learning_rate": 3.477332134381673e-05, "loss": 2.2356, "step": 6149000 }, { "epoch": 30.47, "learning_rate": 3.47720852345635e-05, "loss": 2.2525, "step": 6149500 }, { "epoch": 30.47, "learning_rate": 3.477084912531027e-05, "loss": 2.2437, "step": 6150000 }, { "epoch": 30.47, "learning_rate": 3.4769610538884185e-05, "loss": 2.2182, "step": 6150500 }, { "epoch": 30.47, "learning_rate": 3.47683719524581e-05, "loss": 2.2214, "step": 6151000 }, { "epoch": 30.48, "learning_rate": 3.476713336603202e-05, "loss": 2.2403, "step": 6151500 }, { "epoch": 30.48, "learning_rate": 3.4765894779605935e-05, "loss": 2.2509, "step": 6152000 }, { "epoch": 30.48, "learning_rate": 3.476465619317985e-05, "loss": 2.2426, "step": 6152500 }, { "epoch": 30.48, "learning_rate": 3.476342008392662e-05, "loss": 2.2147, "step": 6153000 }, { "epoch": 30.49, "learning_rate": 3.476218149750053e-05, "loss": 2.2379, "step": 6153500 }, { "epoch": 30.49, "learning_rate": 3.476094291107445e-05, "loss": 2.1983, "step": 6154000 }, { "epoch": 30.49, "learning_rate": 3.4759704324648365e-05, "loss": 2.2076, "step": 6154500 }, { "epoch": 30.49, "learning_rate": 3.4758468215395134e-05, "loss": 2.22, "step": 6155000 }, { "epoch": 30.5, "learning_rate": 3.475722962896905e-05, "loss": 2.2341, "step": 6155500 }, { "epoch": 30.5, "learning_rate": 3.475599351971582e-05, "loss": 2.2196, "step": 6156000 }, { "epoch": 30.5, "learning_rate": 3.475475741046259e-05, "loss": 2.2361, "step": 6156500 }, { "epoch": 30.5, "learning_rate": 3.4753518824036505e-05, "loss": 2.2396, "step": 6157000 }, { "epoch": 30.51, "learning_rate": 3.475228023761042e-05, "loss": 2.2363, "step": 6157500 }, { "epoch": 30.51, "learning_rate": 3.475104165118434e-05, "loss": 2.235, "step": 6158000 }, { "epoch": 30.51, "learning_rate": 3.474980306475825e-05, "loss": 2.247, "step": 6158500 }, { "epoch": 30.51, "learning_rate": 3.4748564478332166e-05, "loss": 2.2427, "step": 6159000 }, { "epoch": 30.52, "learning_rate": 3.474732589190608e-05, "loss": 2.2194, "step": 6159500 }, { "epoch": 30.52, "learning_rate": 3.474608730548e-05, "loss": 2.2395, "step": 6160000 }, { "epoch": 30.52, "learning_rate": 3.474484871905392e-05, "loss": 2.2607, "step": 6160500 }, { "epoch": 30.52, "learning_rate": 3.4743610132627834e-05, "loss": 2.2377, "step": 6161000 }, { "epoch": 30.53, "learning_rate": 3.474237154620175e-05, "loss": 2.2333, "step": 6161500 }, { "epoch": 30.53, "learning_rate": 3.474113295977567e-05, "loss": 2.226, "step": 6162000 }, { "epoch": 30.53, "learning_rate": 3.4739894373349585e-05, "loss": 2.2342, "step": 6162500 }, { "epoch": 30.53, "learning_rate": 3.4738658264096354e-05, "loss": 2.2176, "step": 6163000 }, { "epoch": 30.54, "learning_rate": 3.473741967767027e-05, "loss": 2.2501, "step": 6163500 }, { "epoch": 30.54, "learning_rate": 3.473618109124419e-05, "loss": 2.2182, "step": 6164000 }, { "epoch": 30.54, "learning_rate": 3.4734942504818104e-05, "loss": 2.2549, "step": 6164500 }, { "epoch": 30.54, "learning_rate": 3.473370391839202e-05, "loss": 2.2135, "step": 6165000 }, { "epoch": 30.55, "learning_rate": 3.473246533196594e-05, "loss": 2.2249, "step": 6165500 }, { "epoch": 30.55, "learning_rate": 3.47312292227127e-05, "loss": 2.2566, "step": 6166000 }, { "epoch": 30.55, "learning_rate": 3.4729993113459476e-05, "loss": 2.2149, "step": 6166500 }, { "epoch": 30.55, "learning_rate": 3.472875452703339e-05, "loss": 2.2436, "step": 6167000 }, { "epoch": 30.56, "learning_rate": 3.472751594060731e-05, "loss": 2.2307, "step": 6167500 }, { "epoch": 30.56, "learning_rate": 3.472627983135407e-05, "loss": 2.2567, "step": 6168000 }, { "epoch": 30.56, "learning_rate": 3.472504124492799e-05, "loss": 2.198, "step": 6168500 }, { "epoch": 30.56, "learning_rate": 3.4723802658501906e-05, "loss": 2.2303, "step": 6169000 }, { "epoch": 30.57, "learning_rate": 3.472256407207582e-05, "loss": 2.229, "step": 6169500 }, { "epoch": 30.57, "learning_rate": 3.472132548564974e-05, "loss": 2.2487, "step": 6170000 }, { "epoch": 30.57, "learning_rate": 3.4720086899223656e-05, "loss": 2.2399, "step": 6170500 }, { "epoch": 30.57, "learning_rate": 3.4718848312797567e-05, "loss": 2.2207, "step": 6171000 }, { "epoch": 30.58, "learning_rate": 3.4717609726371483e-05, "loss": 2.2181, "step": 6171500 }, { "epoch": 30.58, "learning_rate": 3.47163711399454e-05, "loss": 2.2427, "step": 6172000 }, { "epoch": 30.58, "learning_rate": 3.471513255351932e-05, "loss": 2.2214, "step": 6172500 }, { "epoch": 30.58, "learning_rate": 3.4713893967093234e-05, "loss": 2.2395, "step": 6173000 }, { "epoch": 30.59, "learning_rate": 3.471265538066715e-05, "loss": 2.2376, "step": 6173500 }, { "epoch": 30.59, "learning_rate": 3.471141679424107e-05, "loss": 2.2301, "step": 6174000 }, { "epoch": 30.59, "learning_rate": 3.4710178207814985e-05, "loss": 2.2256, "step": 6174500 }, { "epoch": 30.59, "learning_rate": 3.47089396213889e-05, "loss": 2.2518, "step": 6175000 }, { "epoch": 30.6, "learning_rate": 3.470770103496282e-05, "loss": 2.2303, "step": 6175500 }, { "epoch": 30.6, "learning_rate": 3.4706462448536736e-05, "loss": 2.2201, "step": 6176000 }, { "epoch": 30.6, "learning_rate": 3.470522386211065e-05, "loss": 2.224, "step": 6176500 }, { "epoch": 30.6, "learning_rate": 3.470398527568457e-05, "loss": 2.2206, "step": 6177000 }, { "epoch": 30.61, "learning_rate": 3.4702746689258487e-05, "loss": 2.2475, "step": 6177500 }, { "epoch": 30.61, "learning_rate": 3.4701510580005255e-05, "loss": 2.2503, "step": 6178000 }, { "epoch": 30.61, "learning_rate": 3.470027199357917e-05, "loss": 2.2358, "step": 6178500 }, { "epoch": 30.61, "learning_rate": 3.4699035884325934e-05, "loss": 2.2493, "step": 6179000 }, { "epoch": 30.62, "learning_rate": 3.469779729789985e-05, "loss": 2.2415, "step": 6179500 }, { "epoch": 30.62, "learning_rate": 3.469655871147377e-05, "loss": 2.1972, "step": 6180000 }, { "epoch": 30.62, "learning_rate": 3.4695325079393396e-05, "loss": 2.2483, "step": 6180500 }, { "epoch": 30.62, "learning_rate": 3.469408649296731e-05, "loss": 2.2393, "step": 6181000 }, { "epoch": 30.63, "learning_rate": 3.469284790654122e-05, "loss": 2.2498, "step": 6181500 }, { "epoch": 30.63, "learning_rate": 3.469160932011514e-05, "loss": 2.2415, "step": 6182000 }, { "epoch": 30.63, "learning_rate": 3.4690370733689057e-05, "loss": 2.2731, "step": 6182500 }, { "epoch": 30.63, "learning_rate": 3.4689132147262974e-05, "loss": 2.265, "step": 6183000 }, { "epoch": 30.64, "learning_rate": 3.468789356083689e-05, "loss": 2.2388, "step": 6183500 }, { "epoch": 30.64, "learning_rate": 3.468665745158366e-05, "loss": 2.2341, "step": 6184000 }, { "epoch": 30.64, "learning_rate": 3.4685418865157576e-05, "loss": 2.2543, "step": 6184500 }, { "epoch": 30.64, "learning_rate": 3.468418027873149e-05, "loss": 2.2344, "step": 6185000 }, { "epoch": 30.65, "learning_rate": 3.468294169230541e-05, "loss": 2.2658, "step": 6185500 }, { "epoch": 30.65, "learning_rate": 3.468170310587933e-05, "loss": 2.2603, "step": 6186000 }, { "epoch": 30.65, "learning_rate": 3.4680466996626096e-05, "loss": 2.2264, "step": 6186500 }, { "epoch": 30.65, "learning_rate": 3.467922841020001e-05, "loss": 2.2426, "step": 6187000 }, { "epoch": 30.66, "learning_rate": 3.467798982377393e-05, "loss": 2.2392, "step": 6187500 }, { "epoch": 30.66, "learning_rate": 3.467675123734784e-05, "loss": 2.2032, "step": 6188000 }, { "epoch": 30.66, "learning_rate": 3.467551265092176e-05, "loss": 2.2448, "step": 6188500 }, { "epoch": 30.66, "learning_rate": 3.4674274064495674e-05, "loss": 2.2308, "step": 6189000 }, { "epoch": 30.66, "learning_rate": 3.467303547806959e-05, "loss": 2.2433, "step": 6189500 }, { "epoch": 30.67, "learning_rate": 3.467179689164351e-05, "loss": 2.2387, "step": 6190000 }, { "epoch": 30.67, "learning_rate": 3.4670560782390276e-05, "loss": 2.2341, "step": 6190500 }, { "epoch": 30.67, "learning_rate": 3.466932219596419e-05, "loss": 2.2467, "step": 6191000 }, { "epoch": 30.67, "learning_rate": 3.466808360953811e-05, "loss": 2.2455, "step": 6191500 }, { "epoch": 30.68, "learning_rate": 3.466684502311203e-05, "loss": 2.2521, "step": 6192000 }, { "epoch": 30.68, "learning_rate": 3.4665606436685944e-05, "loss": 2.2215, "step": 6192500 }, { "epoch": 30.68, "learning_rate": 3.4664367850259854e-05, "loss": 2.2362, "step": 6193000 }, { "epoch": 30.68, "learning_rate": 3.466313174100663e-05, "loss": 2.2413, "step": 6193500 }, { "epoch": 30.69, "learning_rate": 3.466189315458055e-05, "loss": 2.2268, "step": 6194000 }, { "epoch": 30.69, "learning_rate": 3.4660654568154464e-05, "loss": 2.2273, "step": 6194500 }, { "epoch": 30.69, "learning_rate": 3.4659415981728374e-05, "loss": 2.2315, "step": 6195000 }, { "epoch": 30.69, "learning_rate": 3.465817987247514e-05, "loss": 2.2424, "step": 6195500 }, { "epoch": 30.7, "learning_rate": 3.465694128604906e-05, "loss": 2.2435, "step": 6196000 }, { "epoch": 30.7, "learning_rate": 3.465570517679583e-05, "loss": 2.2266, "step": 6196500 }, { "epoch": 30.7, "learning_rate": 3.4654466590369745e-05, "loss": 2.2315, "step": 6197000 }, { "epoch": 30.7, "learning_rate": 3.465322800394366e-05, "loss": 2.2434, "step": 6197500 }, { "epoch": 30.71, "learning_rate": 3.465198941751758e-05, "loss": 2.2493, "step": 6198000 }, { "epoch": 30.71, "learning_rate": 3.4650750831091496e-05, "loss": 2.2375, "step": 6198500 }, { "epoch": 30.71, "learning_rate": 3.464951224466541e-05, "loss": 2.2524, "step": 6199000 }, { "epoch": 30.71, "learning_rate": 3.4648276135412175e-05, "loss": 2.2466, "step": 6199500 }, { "epoch": 30.72, "learning_rate": 3.464703754898609e-05, "loss": 2.2237, "step": 6200000 }, { "epoch": 30.72, "learning_rate": 3.464579896256001e-05, "loss": 2.2402, "step": 6200500 }, { "epoch": 30.72, "learning_rate": 3.4644560376133926e-05, "loss": 2.246, "step": 6201000 }, { "epoch": 30.72, "learning_rate": 3.464332178970784e-05, "loss": 2.2472, "step": 6201500 }, { "epoch": 30.73, "learning_rate": 3.464208320328176e-05, "loss": 2.2377, "step": 6202000 }, { "epoch": 30.73, "learning_rate": 3.4640844616855677e-05, "loss": 2.27, "step": 6202500 }, { "epoch": 30.73, "learning_rate": 3.4639606030429594e-05, "loss": 2.2133, "step": 6203000 }, { "epoch": 30.73, "learning_rate": 3.463836744400351e-05, "loss": 2.2362, "step": 6203500 }, { "epoch": 30.74, "learning_rate": 3.463712885757743e-05, "loss": 2.2287, "step": 6204000 }, { "epoch": 30.74, "learning_rate": 3.4635892748324196e-05, "loss": 2.2377, "step": 6204500 }, { "epoch": 30.74, "learning_rate": 3.463465416189811e-05, "loss": 2.2474, "step": 6205000 }, { "epoch": 30.74, "learning_rate": 3.463341557547203e-05, "loss": 2.2293, "step": 6205500 }, { "epoch": 30.75, "learning_rate": 3.463217698904595e-05, "loss": 2.2539, "step": 6206000 }, { "epoch": 30.75, "learning_rate": 3.463094087979271e-05, "loss": 2.2161, "step": 6206500 }, { "epoch": 30.75, "learning_rate": 3.4629702293366626e-05, "loss": 2.2283, "step": 6207000 }, { "epoch": 30.75, "learning_rate": 3.462846370694054e-05, "loss": 2.2341, "step": 6207500 }, { "epoch": 30.76, "learning_rate": 3.462722512051446e-05, "loss": 2.2445, "step": 6208000 }, { "epoch": 30.76, "learning_rate": 3.462598653408838e-05, "loss": 2.2316, "step": 6208500 }, { "epoch": 30.76, "learning_rate": 3.4624747947662294e-05, "loss": 2.242, "step": 6209000 }, { "epoch": 30.76, "learning_rate": 3.462350936123621e-05, "loss": 2.245, "step": 6209500 }, { "epoch": 30.77, "learning_rate": 3.462227325198298e-05, "loss": 2.253, "step": 6210000 }, { "epoch": 30.77, "learning_rate": 3.4621034665556896e-05, "loss": 2.2291, "step": 6210500 }, { "epoch": 30.77, "learning_rate": 3.461979607913081e-05, "loss": 2.2505, "step": 6211000 }, { "epoch": 30.77, "learning_rate": 3.461855996987758e-05, "loss": 2.2503, "step": 6211500 }, { "epoch": 30.78, "learning_rate": 3.461732138345149e-05, "loss": 2.2289, "step": 6212000 }, { "epoch": 30.78, "learning_rate": 3.461608527419826e-05, "loss": 2.2613, "step": 6212500 }, { "epoch": 30.78, "learning_rate": 3.461484668777218e-05, "loss": 2.2444, "step": 6213000 }, { "epoch": 30.78, "learning_rate": 3.4613608101346095e-05, "loss": 2.2416, "step": 6213500 }, { "epoch": 30.79, "learning_rate": 3.461236951492001e-05, "loss": 2.2211, "step": 6214000 }, { "epoch": 30.79, "learning_rate": 3.461113092849393e-05, "loss": 2.2169, "step": 6214500 }, { "epoch": 30.79, "learning_rate": 3.4609892342067846e-05, "loss": 2.2292, "step": 6215000 }, { "epoch": 30.79, "learning_rate": 3.460865375564176e-05, "loss": 2.2373, "step": 6215500 }, { "epoch": 30.8, "learning_rate": 3.460741516921568e-05, "loss": 2.2433, "step": 6216000 }, { "epoch": 30.8, "learning_rate": 3.4606176582789596e-05, "loss": 2.2271, "step": 6216500 }, { "epoch": 30.8, "learning_rate": 3.460493799636351e-05, "loss": 2.2382, "step": 6217000 }, { "epoch": 30.8, "learning_rate": 3.460369940993743e-05, "loss": 2.2384, "step": 6217500 }, { "epoch": 30.81, "learning_rate": 3.460246082351135e-05, "loss": 2.241, "step": 6218000 }, { "epoch": 30.81, "learning_rate": 3.4601222237085264e-05, "loss": 2.2461, "step": 6218500 }, { "epoch": 30.81, "learning_rate": 3.459998365065918e-05, "loss": 2.2499, "step": 6219000 }, { "epoch": 30.81, "learning_rate": 3.45987450642331e-05, "loss": 2.2335, "step": 6219500 }, { "epoch": 30.82, "learning_rate": 3.4597506477807015e-05, "loss": 2.2417, "step": 6220000 }, { "epoch": 30.82, "learning_rate": 3.459626789138093e-05, "loss": 2.2655, "step": 6220500 }, { "epoch": 30.82, "learning_rate": 3.4595034259300546e-05, "loss": 2.2529, "step": 6221000 }, { "epoch": 30.82, "learning_rate": 3.459379567287446e-05, "loss": 2.2362, "step": 6221500 }, { "epoch": 30.83, "learning_rate": 3.459255708644838e-05, "loss": 2.2316, "step": 6222000 }, { "epoch": 30.83, "learning_rate": 3.4591318500022297e-05, "loss": 2.2179, "step": 6222500 }, { "epoch": 30.83, "learning_rate": 3.4590079913596213e-05, "loss": 2.2264, "step": 6223000 }, { "epoch": 30.83, "learning_rate": 3.458884132717013e-05, "loss": 2.2518, "step": 6223500 }, { "epoch": 30.84, "learning_rate": 3.45876052179169e-05, "loss": 2.2177, "step": 6224000 }, { "epoch": 30.84, "learning_rate": 3.4586366631490816e-05, "loss": 2.2561, "step": 6224500 }, { "epoch": 30.84, "learning_rate": 3.458512804506473e-05, "loss": 2.2601, "step": 6225000 }, { "epoch": 30.84, "learning_rate": 3.458388945863864e-05, "loss": 2.2479, "step": 6225500 }, { "epoch": 30.85, "learning_rate": 3.458265582655827e-05, "loss": 2.2314, "step": 6226000 }, { "epoch": 30.85, "learning_rate": 3.458141724013219e-05, "loss": 2.2399, "step": 6226500 }, { "epoch": 30.85, "learning_rate": 3.4580178653706105e-05, "loss": 2.2339, "step": 6227000 }, { "epoch": 30.85, "learning_rate": 3.457894006728002e-05, "loss": 2.2389, "step": 6227500 }, { "epoch": 30.86, "learning_rate": 3.457770148085393e-05, "loss": 2.2303, "step": 6228000 }, { "epoch": 30.86, "learning_rate": 3.457646289442785e-05, "loss": 2.2388, "step": 6228500 }, { "epoch": 30.86, "learning_rate": 3.4575224308001765e-05, "loss": 2.2413, "step": 6229000 }, { "epoch": 30.86, "learning_rate": 3.457398572157568e-05, "loss": 2.2108, "step": 6229500 }, { "epoch": 30.87, "learning_rate": 3.45727471351496e-05, "loss": 2.2406, "step": 6230000 }, { "epoch": 30.87, "learning_rate": 3.457151102589637e-05, "loss": 2.2665, "step": 6230500 }, { "epoch": 30.87, "learning_rate": 3.457027243947028e-05, "loss": 2.2316, "step": 6231000 }, { "epoch": 30.87, "learning_rate": 3.4569033853044195e-05, "loss": 2.2667, "step": 6231500 }, { "epoch": 30.88, "learning_rate": 3.456779526661811e-05, "loss": 2.2364, "step": 6232000 }, { "epoch": 30.88, "learning_rate": 3.456655668019203e-05, "loss": 2.2286, "step": 6232500 }, { "epoch": 30.88, "learning_rate": 3.4565318093765946e-05, "loss": 2.251, "step": 6233000 }, { "epoch": 30.88, "learning_rate": 3.456407950733986e-05, "loss": 2.2181, "step": 6233500 }, { "epoch": 30.89, "learning_rate": 3.456284092091378e-05, "loss": 2.2578, "step": 6234000 }, { "epoch": 30.89, "learning_rate": 3.456160481166055e-05, "loss": 2.268, "step": 6234500 }, { "epoch": 30.89, "learning_rate": 3.4560366225234466e-05, "loss": 2.2286, "step": 6235000 }, { "epoch": 30.89, "learning_rate": 3.455912763880838e-05, "loss": 2.2199, "step": 6235500 }, { "epoch": 30.9, "learning_rate": 3.45578890523823e-05, "loss": 2.2377, "step": 6236000 }, { "epoch": 30.9, "learning_rate": 3.4556650465956216e-05, "loss": 2.238, "step": 6236500 }, { "epoch": 30.9, "learning_rate": 3.455541187953013e-05, "loss": 2.2317, "step": 6237000 }, { "epoch": 30.9, "learning_rate": 3.455417329310405e-05, "loss": 2.2309, "step": 6237500 }, { "epoch": 30.91, "learning_rate": 3.455293470667797e-05, "loss": 2.2461, "step": 6238000 }, { "epoch": 30.91, "learning_rate": 3.4551696120251884e-05, "loss": 2.2172, "step": 6238500 }, { "epoch": 30.91, "learning_rate": 3.4550457533825794e-05, "loss": 2.2666, "step": 6239000 }, { "epoch": 30.91, "learning_rate": 3.454921894739971e-05, "loss": 2.2173, "step": 6239500 }, { "epoch": 30.92, "learning_rate": 3.454798036097363e-05, "loss": 2.2493, "step": 6240000 }, { "epoch": 30.92, "learning_rate": 3.4546741774547545e-05, "loss": 2.2422, "step": 6240500 }, { "epoch": 30.92, "learning_rate": 3.4545505665294314e-05, "loss": 2.2461, "step": 6241000 }, { "epoch": 30.92, "learning_rate": 3.454426707886823e-05, "loss": 2.2144, "step": 6241500 }, { "epoch": 30.93, "learning_rate": 3.4543030969615e-05, "loss": 2.241, "step": 6242000 }, { "epoch": 30.93, "learning_rate": 3.4541792383188917e-05, "loss": 2.2454, "step": 6242500 }, { "epoch": 30.93, "learning_rate": 3.4540553796762833e-05, "loss": 2.2331, "step": 6243000 }, { "epoch": 30.93, "learning_rate": 3.453931521033675e-05, "loss": 2.241, "step": 6243500 }, { "epoch": 30.93, "learning_rate": 3.453807910108351e-05, "loss": 2.2426, "step": 6244000 }, { "epoch": 30.94, "learning_rate": 3.453684051465743e-05, "loss": 2.2353, "step": 6244500 }, { "epoch": 30.94, "learning_rate": 3.4535601928231346e-05, "loss": 2.2339, "step": 6245000 }, { "epoch": 30.94, "learning_rate": 3.453436334180526e-05, "loss": 2.2464, "step": 6245500 }, { "epoch": 30.94, "learning_rate": 3.453312723255204e-05, "loss": 2.247, "step": 6246000 }, { "epoch": 30.95, "learning_rate": 3.453188864612595e-05, "loss": 2.242, "step": 6246500 }, { "epoch": 30.95, "learning_rate": 3.4530650059699866e-05, "loss": 2.2535, "step": 6247000 }, { "epoch": 30.95, "learning_rate": 3.452941147327378e-05, "loss": 2.2252, "step": 6247500 }, { "epoch": 30.95, "learning_rate": 3.45281728868477e-05, "loss": 2.2676, "step": 6248000 }, { "epoch": 30.96, "learning_rate": 3.452693677759447e-05, "loss": 2.2438, "step": 6248500 }, { "epoch": 30.96, "learning_rate": 3.4525698191168385e-05, "loss": 2.2296, "step": 6249000 }, { "epoch": 30.96, "learning_rate": 3.45244596047423e-05, "loss": 2.254, "step": 6249500 }, { "epoch": 30.96, "learning_rate": 3.452322101831621e-05, "loss": 2.2367, "step": 6250000 }, { "epoch": 30.97, "learning_rate": 3.452198243189013e-05, "loss": 2.2447, "step": 6250500 }, { "epoch": 30.97, "learning_rate": 3.4520746322636905e-05, "loss": 2.2428, "step": 6251000 }, { "epoch": 30.97, "learning_rate": 3.451950773621082e-05, "loss": 2.224, "step": 6251500 }, { "epoch": 30.97, "learning_rate": 3.451826914978474e-05, "loss": 2.2347, "step": 6252000 }, { "epoch": 30.98, "learning_rate": 3.4517030563358656e-05, "loss": 2.2252, "step": 6252500 }, { "epoch": 30.98, "learning_rate": 3.451579445410542e-05, "loss": 2.2325, "step": 6253000 }, { "epoch": 30.98, "learning_rate": 3.4514555867679335e-05, "loss": 2.2505, "step": 6253500 }, { "epoch": 30.98, "learning_rate": 3.4513319758426104e-05, "loss": 2.25, "step": 6254000 }, { "epoch": 30.99, "learning_rate": 3.451208117200002e-05, "loss": 2.2485, "step": 6254500 }, { "epoch": 30.99, "learning_rate": 3.451084258557394e-05, "loss": 2.2436, "step": 6255000 }, { "epoch": 30.99, "learning_rate": 3.4509603999147854e-05, "loss": 2.2469, "step": 6255500 }, { "epoch": 30.99, "learning_rate": 3.450836788989462e-05, "loss": 2.2563, "step": 6256000 }, { "epoch": 31.0, "learning_rate": 3.450712930346854e-05, "loss": 2.2587, "step": 6256500 }, { "epoch": 31.0, "learning_rate": 3.450589071704246e-05, "loss": 2.235, "step": 6257000 }, { "epoch": 31.0, "eval_accuracy": 0.6597478495877519, "eval_accuracy_mlm": 0.6156466741652056, "eval_accuracy_nsp": 0.8678179628881506, "eval_loss": 2.3193306922912598, "eval_runtime": 145.8562, "eval_samples_per_second": 1748.017, "eval_steps_per_second": 72.839, "step": 6257133 }, { "epoch": 31.0, "learning_rate": 3.4504652130616374e-05, "loss": 2.1808, "step": 6257500 }, { "epoch": 31.0, "learning_rate": 3.4503416021363136e-05, "loss": 2.213, "step": 6258000 }, { "epoch": 31.01, "learning_rate": 3.450217743493705e-05, "loss": 2.2195, "step": 6258500 }, { "epoch": 31.01, "learning_rate": 3.450093884851097e-05, "loss": 2.2085, "step": 6259000 }, { "epoch": 31.01, "learning_rate": 3.449970026208489e-05, "loss": 2.2248, "step": 6259500 }, { "epoch": 31.01, "learning_rate": 3.4498461675658804e-05, "loss": 2.2321, "step": 6260000 }, { "epoch": 31.02, "learning_rate": 3.449722308923272e-05, "loss": 2.2127, "step": 6260500 }, { "epoch": 31.02, "learning_rate": 3.449598450280664e-05, "loss": 2.1961, "step": 6261000 }, { "epoch": 31.02, "learning_rate": 3.4494745916380554e-05, "loss": 2.2115, "step": 6261500 }, { "epoch": 31.02, "learning_rate": 3.449350980712732e-05, "loss": 2.1928, "step": 6262000 }, { "epoch": 31.03, "learning_rate": 3.449227122070124e-05, "loss": 2.2092, "step": 6262500 }, { "epoch": 31.03, "learning_rate": 3.449103263427516e-05, "loss": 2.2159, "step": 6263000 }, { "epoch": 31.03, "learning_rate": 3.4489794047849074e-05, "loss": 2.189, "step": 6263500 }, { "epoch": 31.03, "learning_rate": 3.448855546142299e-05, "loss": 2.21, "step": 6264000 }, { "epoch": 31.04, "learning_rate": 3.448731687499691e-05, "loss": 2.2087, "step": 6264500 }, { "epoch": 31.04, "learning_rate": 3.4486078288570825e-05, "loss": 2.2178, "step": 6265000 }, { "epoch": 31.04, "learning_rate": 3.448483970214474e-05, "loss": 2.2034, "step": 6265500 }, { "epoch": 31.04, "learning_rate": 3.448360111571866e-05, "loss": 2.2245, "step": 6266000 }, { "epoch": 31.05, "learning_rate": 3.448236252929257e-05, "loss": 2.203, "step": 6266500 }, { "epoch": 31.05, "learning_rate": 3.4481123942866486e-05, "loss": 2.1872, "step": 6267000 }, { "epoch": 31.05, "learning_rate": 3.4479887833613255e-05, "loss": 2.1915, "step": 6267500 }, { "epoch": 31.05, "learning_rate": 3.447864924718717e-05, "loss": 2.2446, "step": 6268000 }, { "epoch": 31.06, "learning_rate": 3.447741066076109e-05, "loss": 2.2023, "step": 6268500 }, { "epoch": 31.06, "learning_rate": 3.447617455150786e-05, "loss": 2.2039, "step": 6269000 }, { "epoch": 31.06, "learning_rate": 3.4474935965081774e-05, "loss": 2.2216, "step": 6269500 }, { "epoch": 31.06, "learning_rate": 3.447369737865569e-05, "loss": 2.2161, "step": 6270000 }, { "epoch": 31.07, "learning_rate": 3.447245879222961e-05, "loss": 2.1849, "step": 6270500 }, { "epoch": 31.07, "learning_rate": 3.4471220205803525e-05, "loss": 2.2113, "step": 6271000 }, { "epoch": 31.07, "learning_rate": 3.446998161937744e-05, "loss": 2.2109, "step": 6271500 }, { "epoch": 31.07, "learning_rate": 3.446874303295136e-05, "loss": 2.2381, "step": 6272000 }, { "epoch": 31.08, "learning_rate": 3.4467504446525276e-05, "loss": 2.2078, "step": 6272500 }, { "epoch": 31.08, "learning_rate": 3.446626586009919e-05, "loss": 2.2187, "step": 6273000 }, { "epoch": 31.08, "learning_rate": 3.44650272736731e-05, "loss": 2.2024, "step": 6273500 }, { "epoch": 31.08, "learning_rate": 3.446378868724702e-05, "loss": 2.2256, "step": 6274000 }, { "epoch": 31.09, "learning_rate": 3.446255010082094e-05, "loss": 2.2071, "step": 6274500 }, { "epoch": 31.09, "learning_rate": 3.4461311514394854e-05, "loss": 2.2189, "step": 6275000 }, { "epoch": 31.09, "learning_rate": 3.446007540514162e-05, "loss": 2.2198, "step": 6275500 }, { "epoch": 31.09, "learning_rate": 3.445883929588839e-05, "loss": 2.219, "step": 6276000 }, { "epoch": 31.1, "learning_rate": 3.445760070946231e-05, "loss": 2.218, "step": 6276500 }, { "epoch": 31.1, "learning_rate": 3.4456362123036225e-05, "loss": 2.2116, "step": 6277000 }, { "epoch": 31.1, "learning_rate": 3.445512353661014e-05, "loss": 2.2026, "step": 6277500 }, { "epoch": 31.1, "learning_rate": 3.445388495018406e-05, "loss": 2.2124, "step": 6278000 }, { "epoch": 31.11, "learning_rate": 3.4452646363757976e-05, "loss": 2.2316, "step": 6278500 }, { "epoch": 31.11, "learning_rate": 3.445140777733189e-05, "loss": 2.2332, "step": 6279000 }, { "epoch": 31.11, "learning_rate": 3.445016919090581e-05, "loss": 2.2116, "step": 6279500 }, { "epoch": 31.11, "learning_rate": 3.444893060447973e-05, "loss": 2.2152, "step": 6280000 }, { "epoch": 31.12, "learning_rate": 3.444769201805364e-05, "loss": 2.211, "step": 6280500 }, { "epoch": 31.12, "learning_rate": 3.4446453431627554e-05, "loss": 2.2078, "step": 6281000 }, { "epoch": 31.12, "learning_rate": 3.444521484520147e-05, "loss": 2.2311, "step": 6281500 }, { "epoch": 31.12, "learning_rate": 3.444397625877539e-05, "loss": 2.208, "step": 6282000 }, { "epoch": 31.13, "learning_rate": 3.44427376723493e-05, "loss": 2.2156, "step": 6282500 }, { "epoch": 31.13, "learning_rate": 3.444150156309607e-05, "loss": 2.2341, "step": 6283000 }, { "epoch": 31.13, "learning_rate": 3.444026545384284e-05, "loss": 2.2018, "step": 6283500 }, { "epoch": 31.13, "learning_rate": 3.4439029344589604e-05, "loss": 2.2093, "step": 6284000 }, { "epoch": 31.14, "learning_rate": 3.443779075816352e-05, "loss": 2.2189, "step": 6284500 }, { "epoch": 31.14, "learning_rate": 3.443655217173744e-05, "loss": 2.2098, "step": 6285000 }, { "epoch": 31.14, "learning_rate": 3.4435318539657066e-05, "loss": 2.2191, "step": 6285500 }, { "epoch": 31.14, "learning_rate": 3.443407995323098e-05, "loss": 2.2183, "step": 6286000 }, { "epoch": 31.15, "learning_rate": 3.44328413668049e-05, "loss": 2.2258, "step": 6286500 }, { "epoch": 31.15, "learning_rate": 3.4431602780378816e-05, "loss": 2.2116, "step": 6287000 }, { "epoch": 31.15, "learning_rate": 3.443036419395273e-05, "loss": 2.2322, "step": 6287500 }, { "epoch": 31.15, "learning_rate": 3.4429128084699495e-05, "loss": 2.212, "step": 6288000 }, { "epoch": 31.16, "learning_rate": 3.442788949827341e-05, "loss": 2.2144, "step": 6288500 }, { "epoch": 31.16, "learning_rate": 3.442665091184733e-05, "loss": 2.1998, "step": 6289000 }, { "epoch": 31.16, "learning_rate": 3.4425412325421246e-05, "loss": 2.1758, "step": 6289500 }, { "epoch": 31.16, "learning_rate": 3.442417373899516e-05, "loss": 2.2297, "step": 6290000 }, { "epoch": 31.17, "learning_rate": 3.442293515256908e-05, "loss": 2.211, "step": 6290500 }, { "epoch": 31.17, "learning_rate": 3.442169656614299e-05, "loss": 2.212, "step": 6291000 }, { "epoch": 31.17, "learning_rate": 3.442045797971691e-05, "loss": 2.2227, "step": 6291500 }, { "epoch": 31.17, "learning_rate": 3.4419219393290824e-05, "loss": 2.2025, "step": 6292000 }, { "epoch": 31.18, "learning_rate": 3.441798080686474e-05, "loss": 2.2118, "step": 6292500 }, { "epoch": 31.18, "learning_rate": 3.441674222043866e-05, "loss": 2.235, "step": 6293000 }, { "epoch": 31.18, "learning_rate": 3.4415503634012575e-05, "loss": 2.2199, "step": 6293500 }, { "epoch": 31.18, "learning_rate": 3.441426504758649e-05, "loss": 2.2552, "step": 6294000 }, { "epoch": 31.19, "learning_rate": 3.441302646116041e-05, "loss": 2.207, "step": 6294500 }, { "epoch": 31.19, "learning_rate": 3.4411787874734325e-05, "loss": 2.2162, "step": 6295000 }, { "epoch": 31.19, "learning_rate": 3.4410551765481094e-05, "loss": 2.2181, "step": 6295500 }, { "epoch": 31.19, "learning_rate": 3.440931565622786e-05, "loss": 2.228, "step": 6296000 }, { "epoch": 31.2, "learning_rate": 3.440807706980178e-05, "loss": 2.2119, "step": 6296500 }, { "epoch": 31.2, "learning_rate": 3.44068384833757e-05, "loss": 2.2169, "step": 6297000 }, { "epoch": 31.2, "learning_rate": 3.440559989694961e-05, "loss": 2.2164, "step": 6297500 }, { "epoch": 31.2, "learning_rate": 3.4404361310523524e-05, "loss": 2.2253, "step": 6298000 }, { "epoch": 31.2, "learning_rate": 3.440312272409744e-05, "loss": 2.2121, "step": 6298500 }, { "epoch": 31.21, "learning_rate": 3.440188413767136e-05, "loss": 2.2298, "step": 6299000 }, { "epoch": 31.21, "learning_rate": 3.4400645551245275e-05, "loss": 2.2243, "step": 6299500 }, { "epoch": 31.21, "learning_rate": 3.439940696481919e-05, "loss": 2.2202, "step": 6300000 }, { "epoch": 31.21, "learning_rate": 3.439817085556596e-05, "loss": 2.2288, "step": 6300500 }, { "epoch": 31.22, "learning_rate": 3.439693226913988e-05, "loss": 2.2252, "step": 6301000 }, { "epoch": 31.22, "learning_rate": 3.4395693682713794e-05, "loss": 2.2148, "step": 6301500 }, { "epoch": 31.22, "learning_rate": 3.439445509628771e-05, "loss": 2.2291, "step": 6302000 }, { "epoch": 31.22, "learning_rate": 3.439321650986163e-05, "loss": 2.2196, "step": 6302500 }, { "epoch": 31.23, "learning_rate": 3.4391977923435545e-05, "loss": 2.2144, "step": 6303000 }, { "epoch": 31.23, "learning_rate": 3.439073933700946e-05, "loss": 2.2116, "step": 6303500 }, { "epoch": 31.23, "learning_rate": 3.438950075058337e-05, "loss": 2.2102, "step": 6304000 }, { "epoch": 31.23, "learning_rate": 3.438826216415729e-05, "loss": 2.2304, "step": 6304500 }, { "epoch": 31.24, "learning_rate": 3.438702605490406e-05, "loss": 2.2181, "step": 6305000 }, { "epoch": 31.24, "learning_rate": 3.438579242282368e-05, "loss": 2.2039, "step": 6305500 }, { "epoch": 31.24, "learning_rate": 3.438455631357045e-05, "loss": 2.2286, "step": 6306000 }, { "epoch": 31.24, "learning_rate": 3.4383317727144364e-05, "loss": 2.2183, "step": 6306500 }, { "epoch": 31.25, "learning_rate": 3.438207914071828e-05, "loss": 2.2127, "step": 6307000 }, { "epoch": 31.25, "learning_rate": 3.43808405542922e-05, "loss": 2.2101, "step": 6307500 }, { "epoch": 31.25, "learning_rate": 3.4379601967866115e-05, "loss": 2.203, "step": 6308000 }, { "epoch": 31.25, "learning_rate": 3.437836338144003e-05, "loss": 2.2003, "step": 6308500 }, { "epoch": 31.26, "learning_rate": 3.437712479501395e-05, "loss": 2.1954, "step": 6309000 }, { "epoch": 31.26, "learning_rate": 3.4375886208587866e-05, "loss": 2.2006, "step": 6309500 }, { "epoch": 31.26, "learning_rate": 3.437464762216178e-05, "loss": 2.2085, "step": 6310000 }, { "epoch": 31.26, "learning_rate": 3.43734090357357e-05, "loss": 2.2127, "step": 6310500 }, { "epoch": 31.27, "learning_rate": 3.437217292648247e-05, "loss": 2.2205, "step": 6311000 }, { "epoch": 31.27, "learning_rate": 3.437093434005638e-05, "loss": 2.2085, "step": 6311500 }, { "epoch": 31.27, "learning_rate": 3.4369695753630296e-05, "loss": 2.2141, "step": 6312000 }, { "epoch": 31.27, "learning_rate": 3.436845716720421e-05, "loss": 2.226, "step": 6312500 }, { "epoch": 31.28, "learning_rate": 3.436722105795098e-05, "loss": 2.2442, "step": 6313000 }, { "epoch": 31.28, "learning_rate": 3.43659824715249e-05, "loss": 2.2363, "step": 6313500 }, { "epoch": 31.28, "learning_rate": 3.436474636227167e-05, "loss": 2.208, "step": 6314000 }, { "epoch": 31.28, "learning_rate": 3.4363507775845584e-05, "loss": 2.233, "step": 6314500 }, { "epoch": 31.29, "learning_rate": 3.43622691894195e-05, "loss": 2.2111, "step": 6315000 }, { "epoch": 31.29, "learning_rate": 3.436103060299342e-05, "loss": 2.2063, "step": 6315500 }, { "epoch": 31.29, "learning_rate": 3.4359792016567335e-05, "loss": 2.2115, "step": 6316000 }, { "epoch": 31.29, "learning_rate": 3.435855343014125e-05, "loss": 2.1919, "step": 6316500 }, { "epoch": 31.3, "learning_rate": 3.435731484371517e-05, "loss": 2.2389, "step": 6317000 }, { "epoch": 31.3, "learning_rate": 3.4356076257289086e-05, "loss": 2.2189, "step": 6317500 }, { "epoch": 31.3, "learning_rate": 3.435484014803585e-05, "loss": 2.2316, "step": 6318000 }, { "epoch": 31.3, "learning_rate": 3.4353601561609765e-05, "loss": 2.2294, "step": 6318500 }, { "epoch": 31.31, "learning_rate": 3.435236297518368e-05, "loss": 2.228, "step": 6319000 }, { "epoch": 31.31, "learning_rate": 3.43511243887576e-05, "loss": 2.2099, "step": 6319500 }, { "epoch": 31.31, "learning_rate": 3.4349885802331515e-05, "loss": 2.2214, "step": 6320000 }, { "epoch": 31.31, "learning_rate": 3.434864721590543e-05, "loss": 2.2146, "step": 6320500 }, { "epoch": 31.32, "learning_rate": 3.434740862947935e-05, "loss": 2.2259, "step": 6321000 }, { "epoch": 31.32, "learning_rate": 3.4346170043053266e-05, "loss": 2.2005, "step": 6321500 }, { "epoch": 31.32, "learning_rate": 3.434493145662718e-05, "loss": 2.2291, "step": 6322000 }, { "epoch": 31.32, "learning_rate": 3.43436928702011e-05, "loss": 2.219, "step": 6322500 }, { "epoch": 31.33, "learning_rate": 3.434245428377502e-05, "loss": 2.1981, "step": 6323000 }, { "epoch": 31.33, "learning_rate": 3.4341215697348934e-05, "loss": 2.2405, "step": 6323500 }, { "epoch": 31.33, "learning_rate": 3.433997711092285e-05, "loss": 2.1992, "step": 6324000 }, { "epoch": 31.33, "learning_rate": 3.433874100166962e-05, "loss": 2.2415, "step": 6324500 }, { "epoch": 31.34, "learning_rate": 3.433750241524353e-05, "loss": 2.2222, "step": 6325000 }, { "epoch": 31.34, "learning_rate": 3.433626382881745e-05, "loss": 2.2247, "step": 6325500 }, { "epoch": 31.34, "learning_rate": 3.4335025242391364e-05, "loss": 2.2394, "step": 6326000 }, { "epoch": 31.34, "learning_rate": 3.433378665596528e-05, "loss": 2.2441, "step": 6326500 }, { "epoch": 31.35, "learning_rate": 3.43325480695392e-05, "loss": 2.2145, "step": 6327000 }, { "epoch": 31.35, "learning_rate": 3.4331309483113115e-05, "loss": 2.2068, "step": 6327500 }, { "epoch": 31.35, "learning_rate": 3.433007089668703e-05, "loss": 2.2079, "step": 6328000 }, { "epoch": 31.35, "learning_rate": 3.432883231026094e-05, "loss": 2.2071, "step": 6328500 }, { "epoch": 31.36, "learning_rate": 3.432759620100772e-05, "loss": 2.2209, "step": 6329000 }, { "epoch": 31.36, "learning_rate": 3.4326357614581634e-05, "loss": 2.2367, "step": 6329500 }, { "epoch": 31.36, "learning_rate": 3.432511902815555e-05, "loss": 2.2124, "step": 6330000 }, { "epoch": 31.36, "learning_rate": 3.432388044172947e-05, "loss": 2.2072, "step": 6330500 }, { "epoch": 31.37, "learning_rate": 3.4322641855303385e-05, "loss": 2.2324, "step": 6331000 }, { "epoch": 31.37, "learning_rate": 3.4321408223223e-05, "loss": 2.2163, "step": 6331500 }, { "epoch": 31.37, "learning_rate": 3.4320172113969774e-05, "loss": 2.2097, "step": 6332000 }, { "epoch": 31.37, "learning_rate": 3.4318936004716536e-05, "loss": 2.2137, "step": 6332500 }, { "epoch": 31.38, "learning_rate": 3.431769741829045e-05, "loss": 2.2035, "step": 6333000 }, { "epoch": 31.38, "learning_rate": 3.431645883186437e-05, "loss": 2.2252, "step": 6333500 }, { "epoch": 31.38, "learning_rate": 3.431522024543829e-05, "loss": 2.2297, "step": 6334000 }, { "epoch": 31.38, "learning_rate": 3.4313981659012204e-05, "loss": 2.2349, "step": 6334500 }, { "epoch": 31.39, "learning_rate": 3.431274307258612e-05, "loss": 2.2302, "step": 6335000 }, { "epoch": 31.39, "learning_rate": 3.431150696333289e-05, "loss": 2.2064, "step": 6335500 }, { "epoch": 31.39, "learning_rate": 3.431026837690681e-05, "loss": 2.2251, "step": 6336000 }, { "epoch": 31.39, "learning_rate": 3.4309029790480724e-05, "loss": 2.238, "step": 6336500 }, { "epoch": 31.4, "learning_rate": 3.430779120405464e-05, "loss": 2.2249, "step": 6337000 }, { "epoch": 31.4, "learning_rate": 3.430655261762856e-05, "loss": 2.2352, "step": 6337500 }, { "epoch": 31.4, "learning_rate": 3.4305316508375326e-05, "loss": 2.2189, "step": 6338000 }, { "epoch": 31.4, "learning_rate": 3.430407792194924e-05, "loss": 2.2131, "step": 6338500 }, { "epoch": 31.41, "learning_rate": 3.430283933552316e-05, "loss": 2.2507, "step": 6339000 }, { "epoch": 31.41, "learning_rate": 3.430160074909707e-05, "loss": 2.2302, "step": 6339500 }, { "epoch": 31.41, "learning_rate": 3.430036216267099e-05, "loss": 2.2123, "step": 6340000 }, { "epoch": 31.41, "learning_rate": 3.4299123576244904e-05, "loss": 2.2245, "step": 6340500 }, { "epoch": 31.42, "learning_rate": 3.429788498981882e-05, "loss": 2.2152, "step": 6341000 }, { "epoch": 31.42, "learning_rate": 3.429664888056559e-05, "loss": 2.2311, "step": 6341500 }, { "epoch": 31.42, "learning_rate": 3.429541029413951e-05, "loss": 2.2054, "step": 6342000 }, { "epoch": 31.42, "learning_rate": 3.4294171707713424e-05, "loss": 2.2194, "step": 6342500 }, { "epoch": 31.43, "learning_rate": 3.429293312128734e-05, "loss": 2.2208, "step": 6343000 }, { "epoch": 31.43, "learning_rate": 3.429169453486126e-05, "loss": 2.2581, "step": 6343500 }, { "epoch": 31.43, "learning_rate": 3.4290455948435175e-05, "loss": 2.1989, "step": 6344000 }, { "epoch": 31.43, "learning_rate": 3.428921736200909e-05, "loss": 2.2191, "step": 6344500 }, { "epoch": 31.44, "learning_rate": 3.4287978775583e-05, "loss": 2.239, "step": 6345000 }, { "epoch": 31.44, "learning_rate": 3.428674018915692e-05, "loss": 2.2267, "step": 6345500 }, { "epoch": 31.44, "learning_rate": 3.428550407990369e-05, "loss": 2.2187, "step": 6346000 }, { "epoch": 31.44, "learning_rate": 3.4284265493477604e-05, "loss": 2.2207, "step": 6346500 }, { "epoch": 31.45, "learning_rate": 3.428302690705152e-05, "loss": 2.2389, "step": 6347000 }, { "epoch": 31.45, "learning_rate": 3.428178832062544e-05, "loss": 2.2213, "step": 6347500 }, { "epoch": 31.45, "learning_rate": 3.4280549734199355e-05, "loss": 2.2062, "step": 6348000 }, { "epoch": 31.45, "learning_rate": 3.4279311147773265e-05, "loss": 2.218, "step": 6348500 }, { "epoch": 31.46, "learning_rate": 3.427807256134718e-05, "loss": 2.2122, "step": 6349000 }, { "epoch": 31.46, "learning_rate": 3.42768339749211e-05, "loss": 2.2257, "step": 6349500 }, { "epoch": 31.46, "learning_rate": 3.4275595388495016e-05, "loss": 2.2072, "step": 6350000 }, { "epoch": 31.46, "learning_rate": 3.427435927924179e-05, "loss": 2.2411, "step": 6350500 }, { "epoch": 31.47, "learning_rate": 3.427312316998856e-05, "loss": 2.2063, "step": 6351000 }, { "epoch": 31.47, "learning_rate": 3.427188458356248e-05, "loss": 2.2173, "step": 6351500 }, { "epoch": 31.47, "learning_rate": 3.4270645997136394e-05, "loss": 2.2306, "step": 6352000 }, { "epoch": 31.47, "learning_rate": 3.426940741071031e-05, "loss": 2.2277, "step": 6352500 }, { "epoch": 31.47, "learning_rate": 3.426816882428422e-05, "loss": 2.229, "step": 6353000 }, { "epoch": 31.48, "learning_rate": 3.426693023785814e-05, "loss": 2.2136, "step": 6353500 }, { "epoch": 31.48, "learning_rate": 3.4265691651432055e-05, "loss": 2.2245, "step": 6354000 }, { "epoch": 31.48, "learning_rate": 3.426445306500597e-05, "loss": 2.2111, "step": 6354500 }, { "epoch": 31.48, "learning_rate": 3.426321447857989e-05, "loss": 2.2525, "step": 6355000 }, { "epoch": 31.49, "learning_rate": 3.42619758921538e-05, "loss": 2.2219, "step": 6355500 }, { "epoch": 31.49, "learning_rate": 3.4260737305727716e-05, "loss": 2.2086, "step": 6356000 }, { "epoch": 31.49, "learning_rate": 3.425949871930163e-05, "loss": 2.2263, "step": 6356500 }, { "epoch": 31.49, "learning_rate": 3.425826013287555e-05, "loss": 2.2265, "step": 6357000 }, { "epoch": 31.5, "learning_rate": 3.425702154644947e-05, "loss": 2.2005, "step": 6357500 }, { "epoch": 31.5, "learning_rate": 3.4255782960023384e-05, "loss": 2.2232, "step": 6358000 }, { "epoch": 31.5, "learning_rate": 3.42545443735973e-05, "loss": 2.2305, "step": 6358500 }, { "epoch": 31.5, "learning_rate": 3.425330826434407e-05, "loss": 2.1797, "step": 6359000 }, { "epoch": 31.51, "learning_rate": 3.425206967791799e-05, "loss": 2.2228, "step": 6359500 }, { "epoch": 31.51, "learning_rate": 3.4250831091491904e-05, "loss": 2.2035, "step": 6360000 }, { "epoch": 31.51, "learning_rate": 3.424959498223867e-05, "loss": 2.2232, "step": 6360500 }, { "epoch": 31.51, "learning_rate": 3.424835639581259e-05, "loss": 2.2096, "step": 6361000 }, { "epoch": 31.52, "learning_rate": 3.4247117809386506e-05, "loss": 2.203, "step": 6361500 }, { "epoch": 31.52, "learning_rate": 3.4245879222960416e-05, "loss": 2.2527, "step": 6362000 }, { "epoch": 31.52, "learning_rate": 3.424464063653433e-05, "loss": 2.2428, "step": 6362500 }, { "epoch": 31.52, "learning_rate": 3.424340452728111e-05, "loss": 2.1951, "step": 6363000 }, { "epoch": 31.53, "learning_rate": 3.424216594085502e-05, "loss": 2.2294, "step": 6363500 }, { "epoch": 31.53, "learning_rate": 3.4240927354428936e-05, "loss": 2.2303, "step": 6364000 }, { "epoch": 31.53, "learning_rate": 3.423968876800285e-05, "loss": 2.2402, "step": 6364500 }, { "epoch": 31.53, "learning_rate": 3.423845265874963e-05, "loss": 2.2149, "step": 6365000 }, { "epoch": 31.54, "learning_rate": 3.423721654949639e-05, "loss": 2.2285, "step": 6365500 }, { "epoch": 31.54, "learning_rate": 3.423597796307031e-05, "loss": 2.2355, "step": 6366000 }, { "epoch": 31.54, "learning_rate": 3.4234739376644224e-05, "loss": 2.2315, "step": 6366500 }, { "epoch": 31.54, "learning_rate": 3.423350079021814e-05, "loss": 2.2164, "step": 6367000 }, { "epoch": 31.55, "learning_rate": 3.423226220379206e-05, "loss": 2.2494, "step": 6367500 }, { "epoch": 31.55, "learning_rate": 3.4231023617365975e-05, "loss": 2.21, "step": 6368000 }, { "epoch": 31.55, "learning_rate": 3.4229787508112744e-05, "loss": 2.2279, "step": 6368500 }, { "epoch": 31.55, "learning_rate": 3.422854892168666e-05, "loss": 2.2203, "step": 6369000 }, { "epoch": 31.56, "learning_rate": 3.422731033526058e-05, "loss": 2.2472, "step": 6369500 }, { "epoch": 31.56, "learning_rate": 3.4226071748834495e-05, "loss": 2.2179, "step": 6370000 }, { "epoch": 31.56, "learning_rate": 3.422483316240841e-05, "loss": 2.2375, "step": 6370500 }, { "epoch": 31.56, "learning_rate": 3.422359457598233e-05, "loss": 2.2469, "step": 6371000 }, { "epoch": 31.57, "learning_rate": 3.4222355989556246e-05, "loss": 2.2507, "step": 6371500 }, { "epoch": 31.57, "learning_rate": 3.422111740313016e-05, "loss": 2.2195, "step": 6372000 }, { "epoch": 31.57, "learning_rate": 3.421987881670408e-05, "loss": 2.2346, "step": 6372500 }, { "epoch": 31.57, "learning_rate": 3.421864023027799e-05, "loss": 2.2264, "step": 6373000 }, { "epoch": 31.58, "learning_rate": 3.4217401643851906e-05, "loss": 2.2479, "step": 6373500 }, { "epoch": 31.58, "learning_rate": 3.421616305742582e-05, "loss": 2.2097, "step": 6374000 }, { "epoch": 31.58, "learning_rate": 3.421492694817259e-05, "loss": 2.23, "step": 6374500 }, { "epoch": 31.58, "learning_rate": 3.421368836174651e-05, "loss": 2.2047, "step": 6375000 }, { "epoch": 31.59, "learning_rate": 3.4212449775320426e-05, "loss": 2.2001, "step": 6375500 }, { "epoch": 31.59, "learning_rate": 3.4211211188894336e-05, "loss": 2.1968, "step": 6376000 }, { "epoch": 31.59, "learning_rate": 3.420997507964111e-05, "loss": 2.219, "step": 6376500 }, { "epoch": 31.59, "learning_rate": 3.420873649321503e-05, "loss": 2.2418, "step": 6377000 }, { "epoch": 31.6, "learning_rate": 3.4207497906788946e-05, "loss": 2.2385, "step": 6377500 }, { "epoch": 31.6, "learning_rate": 3.420625932036286e-05, "loss": 2.2359, "step": 6378000 }, { "epoch": 31.6, "learning_rate": 3.420502073393678e-05, "loss": 2.2245, "step": 6378500 }, { "epoch": 31.6, "learning_rate": 3.420378462468354e-05, "loss": 2.231, "step": 6379000 }, { "epoch": 31.61, "learning_rate": 3.420254603825746e-05, "loss": 2.1829, "step": 6379500 }, { "epoch": 31.61, "learning_rate": 3.420130992900423e-05, "loss": 2.2169, "step": 6380000 }, { "epoch": 31.61, "learning_rate": 3.4200071342578144e-05, "loss": 2.2441, "step": 6380500 }, { "epoch": 31.61, "learning_rate": 3.419883275615206e-05, "loss": 2.2208, "step": 6381000 }, { "epoch": 31.62, "learning_rate": 3.419759416972598e-05, "loss": 2.2438, "step": 6381500 }, { "epoch": 31.62, "learning_rate": 3.4196355583299895e-05, "loss": 2.2072, "step": 6382000 }, { "epoch": 31.62, "learning_rate": 3.419511699687381e-05, "loss": 2.2324, "step": 6382500 }, { "epoch": 31.62, "learning_rate": 3.419387841044773e-05, "loss": 2.2065, "step": 6383000 }, { "epoch": 31.63, "learning_rate": 3.4192639824021646e-05, "loss": 2.2306, "step": 6383500 }, { "epoch": 31.63, "learning_rate": 3.419140123759556e-05, "loss": 2.2126, "step": 6384000 }, { "epoch": 31.63, "learning_rate": 3.419016265116948e-05, "loss": 2.2266, "step": 6384500 }, { "epoch": 31.63, "learning_rate": 3.4188924064743397e-05, "loss": 2.2249, "step": 6385000 }, { "epoch": 31.64, "learning_rate": 3.418768547831731e-05, "loss": 2.221, "step": 6385500 }, { "epoch": 31.64, "learning_rate": 3.4186446891891224e-05, "loss": 2.245, "step": 6386000 }, { "epoch": 31.64, "learning_rate": 3.418520830546514e-05, "loss": 2.2187, "step": 6386500 }, { "epoch": 31.64, "learning_rate": 3.418396971903906e-05, "loss": 2.2144, "step": 6387000 }, { "epoch": 31.65, "learning_rate": 3.4182731132612974e-05, "loss": 2.2362, "step": 6387500 }, { "epoch": 31.65, "learning_rate": 3.418149254618689e-05, "loss": 2.1898, "step": 6388000 }, { "epoch": 31.65, "learning_rate": 3.418025643693365e-05, "loss": 2.2426, "step": 6388500 }, { "epoch": 31.65, "learning_rate": 3.417901785050757e-05, "loss": 2.2156, "step": 6389000 }, { "epoch": 31.66, "learning_rate": 3.417777926408149e-05, "loss": 2.2327, "step": 6389500 }, { "epoch": 31.66, "learning_rate": 3.4176540677655404e-05, "loss": 2.2054, "step": 6390000 }, { "epoch": 31.66, "learning_rate": 3.417530209122932e-05, "loss": 2.2216, "step": 6390500 }, { "epoch": 31.66, "learning_rate": 3.417406350480324e-05, "loss": 2.2077, "step": 6391000 }, { "epoch": 31.67, "learning_rate": 3.4172824918377155e-05, "loss": 2.2122, "step": 6391500 }, { "epoch": 31.67, "learning_rate": 3.417158633195107e-05, "loss": 2.2206, "step": 6392000 }, { "epoch": 31.67, "learning_rate": 3.417034774552499e-05, "loss": 2.2276, "step": 6392500 }, { "epoch": 31.67, "learning_rate": 3.4169109159098906e-05, "loss": 2.2207, "step": 6393000 }, { "epoch": 31.68, "learning_rate": 3.416787057267282e-05, "loss": 2.2294, "step": 6393500 }, { "epoch": 31.68, "learning_rate": 3.416663446341959e-05, "loss": 2.2208, "step": 6394000 }, { "epoch": 31.68, "learning_rate": 3.416539835416636e-05, "loss": 2.2432, "step": 6394500 }, { "epoch": 31.68, "learning_rate": 3.416415976774027e-05, "loss": 2.2231, "step": 6395000 }, { "epoch": 31.69, "learning_rate": 3.416292118131419e-05, "loss": 2.2351, "step": 6395500 }, { "epoch": 31.69, "learning_rate": 3.4161682594888104e-05, "loss": 2.219, "step": 6396000 }, { "epoch": 31.69, "learning_rate": 3.416044400846202e-05, "loss": 2.2291, "step": 6396500 }, { "epoch": 31.69, "learning_rate": 3.415920542203594e-05, "loss": 2.2375, "step": 6397000 }, { "epoch": 31.7, "learning_rate": 3.4157966835609855e-05, "loss": 2.2292, "step": 6397500 }, { "epoch": 31.7, "learning_rate": 3.415672824918377e-05, "loss": 2.2253, "step": 6398000 }, { "epoch": 31.7, "learning_rate": 3.415548966275769e-05, "loss": 2.2333, "step": 6398500 }, { "epoch": 31.7, "learning_rate": 3.415425603067731e-05, "loss": 2.2152, "step": 6399000 }, { "epoch": 31.71, "learning_rate": 3.415301992142408e-05, "loss": 2.2123, "step": 6399500 }, { "epoch": 31.71, "learning_rate": 3.415178381217085e-05, "loss": 2.235, "step": 6400000 }, { "epoch": 31.71, "learning_rate": 3.4150545225744764e-05, "loss": 2.2219, "step": 6400500 }, { "epoch": 31.71, "learning_rate": 3.414930663931868e-05, "loss": 2.2168, "step": 6401000 }, { "epoch": 31.72, "learning_rate": 3.41480680528926e-05, "loss": 2.2214, "step": 6401500 }, { "epoch": 31.72, "learning_rate": 3.4146829466466515e-05, "loss": 2.2568, "step": 6402000 }, { "epoch": 31.72, "learning_rate": 3.414559088004043e-05, "loss": 2.1955, "step": 6402500 }, { "epoch": 31.72, "learning_rate": 3.4144354770787194e-05, "loss": 2.2304, "step": 6403000 }, { "epoch": 31.73, "learning_rate": 3.414311618436111e-05, "loss": 2.2254, "step": 6403500 }, { "epoch": 31.73, "learning_rate": 3.414187759793503e-05, "loss": 2.2343, "step": 6404000 }, { "epoch": 31.73, "learning_rate": 3.4140639011508945e-05, "loss": 2.2122, "step": 6404500 }, { "epoch": 31.73, "learning_rate": 3.413940042508286e-05, "loss": 2.2378, "step": 6405000 }, { "epoch": 31.74, "learning_rate": 3.413816431582963e-05, "loss": 2.2572, "step": 6405500 }, { "epoch": 31.74, "learning_rate": 3.413692572940355e-05, "loss": 2.2494, "step": 6406000 }, { "epoch": 31.74, "learning_rate": 3.4135687142977464e-05, "loss": 2.2201, "step": 6406500 }, { "epoch": 31.74, "learning_rate": 3.413444855655138e-05, "loss": 2.2259, "step": 6407000 }, { "epoch": 31.74, "learning_rate": 3.41332099701253e-05, "loss": 2.2272, "step": 6407500 }, { "epoch": 31.75, "learning_rate": 3.4131971383699215e-05, "loss": 2.235, "step": 6408000 }, { "epoch": 31.75, "learning_rate": 3.413073279727313e-05, "loss": 2.2403, "step": 6408500 }, { "epoch": 31.75, "learning_rate": 3.412949421084705e-05, "loss": 2.246, "step": 6409000 }, { "epoch": 31.75, "learning_rate": 3.4128255624420966e-05, "loss": 2.2335, "step": 6409500 }, { "epoch": 31.76, "learning_rate": 3.412701703799488e-05, "loss": 2.247, "step": 6410000 }, { "epoch": 31.76, "learning_rate": 3.412577845156879e-05, "loss": 2.2347, "step": 6410500 }, { "epoch": 31.76, "learning_rate": 3.412453986514271e-05, "loss": 2.2364, "step": 6411000 }, { "epoch": 31.76, "learning_rate": 3.412330127871663e-05, "loss": 2.2555, "step": 6411500 }, { "epoch": 31.77, "learning_rate": 3.4122062692290544e-05, "loss": 2.2152, "step": 6412000 }, { "epoch": 31.77, "learning_rate": 3.412082658303731e-05, "loss": 2.2121, "step": 6412500 }, { "epoch": 31.77, "learning_rate": 3.411959047378408e-05, "loss": 2.2103, "step": 6413000 }, { "epoch": 31.77, "learning_rate": 3.411835436453085e-05, "loss": 2.211, "step": 6413500 }, { "epoch": 31.78, "learning_rate": 3.411711577810477e-05, "loss": 2.222, "step": 6414000 }, { "epoch": 31.78, "learning_rate": 3.411587719167868e-05, "loss": 2.2353, "step": 6414500 }, { "epoch": 31.78, "learning_rate": 3.4114638605252594e-05, "loss": 2.2561, "step": 6415000 }, { "epoch": 31.78, "learning_rate": 3.411340001882651e-05, "loss": 2.2272, "step": 6415500 }, { "epoch": 31.79, "learning_rate": 3.411216143240043e-05, "loss": 2.2308, "step": 6416000 }, { "epoch": 31.79, "learning_rate": 3.4110922845974345e-05, "loss": 2.2271, "step": 6416500 }, { "epoch": 31.79, "learning_rate": 3.410968425954826e-05, "loss": 2.203, "step": 6417000 }, { "epoch": 31.79, "learning_rate": 3.410844567312218e-05, "loss": 2.2542, "step": 6417500 }, { "epoch": 31.8, "learning_rate": 3.4107207086696096e-05, "loss": 2.2169, "step": 6418000 }, { "epoch": 31.8, "learning_rate": 3.410596850027001e-05, "loss": 2.225, "step": 6418500 }, { "epoch": 31.8, "learning_rate": 3.410473239101678e-05, "loss": 2.225, "step": 6419000 }, { "epoch": 31.8, "learning_rate": 3.41034938045907e-05, "loss": 2.24, "step": 6419500 }, { "epoch": 31.81, "learning_rate": 3.4102255218164615e-05, "loss": 2.2394, "step": 6420000 }, { "epoch": 31.81, "learning_rate": 3.410101663173853e-05, "loss": 2.255, "step": 6420500 }, { "epoch": 31.81, "learning_rate": 3.409977804531245e-05, "loss": 2.2297, "step": 6421000 }, { "epoch": 31.81, "learning_rate": 3.4098539458886366e-05, "loss": 2.2249, "step": 6421500 }, { "epoch": 31.82, "learning_rate": 3.409730087246028e-05, "loss": 2.2136, "step": 6422000 }, { "epoch": 31.82, "learning_rate": 3.40960622860342e-05, "loss": 2.252, "step": 6422500 }, { "epoch": 31.82, "learning_rate": 3.409482369960812e-05, "loss": 2.213, "step": 6423000 }, { "epoch": 31.82, "learning_rate": 3.4093585113182034e-05, "loss": 2.2281, "step": 6423500 }, { "epoch": 31.83, "learning_rate": 3.4092346526755944e-05, "loss": 2.2149, "step": 6424000 }, { "epoch": 31.83, "learning_rate": 3.409110794032986e-05, "loss": 2.2163, "step": 6424500 }, { "epoch": 31.83, "learning_rate": 3.408987183107663e-05, "loss": 2.2166, "step": 6425000 }, { "epoch": 31.83, "learning_rate": 3.408863324465055e-05, "loss": 2.2333, "step": 6425500 }, { "epoch": 31.84, "learning_rate": 3.4087397135397315e-05, "loss": 2.2312, "step": 6426000 }, { "epoch": 31.84, "learning_rate": 3.4086161026144084e-05, "loss": 2.226, "step": 6426500 }, { "epoch": 31.84, "learning_rate": 3.4084922439718e-05, "loss": 2.2535, "step": 6427000 }, { "epoch": 31.84, "learning_rate": 3.408368385329191e-05, "loss": 2.2257, "step": 6427500 }, { "epoch": 31.85, "learning_rate": 3.408244526686583e-05, "loss": 2.232, "step": 6428000 }, { "epoch": 31.85, "learning_rate": 3.4081206680439745e-05, "loss": 2.2277, "step": 6428500 }, { "epoch": 31.85, "learning_rate": 3.407996809401366e-05, "loss": 2.215, "step": 6429000 }, { "epoch": 31.85, "learning_rate": 3.407872950758758e-05, "loss": 2.2315, "step": 6429500 }, { "epoch": 31.86, "learning_rate": 3.4077490921161496e-05, "loss": 2.2276, "step": 6430000 }, { "epoch": 31.86, "learning_rate": 3.4076254811908265e-05, "loss": 2.2121, "step": 6430500 }, { "epoch": 31.86, "learning_rate": 3.407501622548218e-05, "loss": 2.2239, "step": 6431000 }, { "epoch": 31.86, "learning_rate": 3.40737776390561e-05, "loss": 2.2147, "step": 6431500 }, { "epoch": 31.87, "learning_rate": 3.4072539052630016e-05, "loss": 2.2294, "step": 6432000 }, { "epoch": 31.87, "learning_rate": 3.407130046620393e-05, "loss": 2.2209, "step": 6432500 }, { "epoch": 31.87, "learning_rate": 3.407006187977785e-05, "loss": 2.2195, "step": 6433000 }, { "epoch": 31.87, "learning_rate": 3.4068823293351766e-05, "loss": 2.231, "step": 6433500 }, { "epoch": 31.88, "learning_rate": 3.406758470692568e-05, "loss": 2.2281, "step": 6434000 }, { "epoch": 31.88, "learning_rate": 3.40663461204996e-05, "loss": 2.2155, "step": 6434500 }, { "epoch": 31.88, "learning_rate": 3.406511248841922e-05, "loss": 2.2162, "step": 6435000 }, { "epoch": 31.88, "learning_rate": 3.406387390199314e-05, "loss": 2.217, "step": 6435500 }, { "epoch": 31.89, "learning_rate": 3.406263531556705e-05, "loss": 2.1886, "step": 6436000 }, { "epoch": 31.89, "learning_rate": 3.4061396729140965e-05, "loss": 2.2471, "step": 6436500 }, { "epoch": 31.89, "learning_rate": 3.406015814271488e-05, "loss": 2.2321, "step": 6437000 }, { "epoch": 31.89, "learning_rate": 3.40589195562888e-05, "loss": 2.2454, "step": 6437500 }, { "epoch": 31.9, "learning_rate": 3.4057680969862716e-05, "loss": 2.2335, "step": 6438000 }, { "epoch": 31.9, "learning_rate": 3.4056444860609485e-05, "loss": 2.2461, "step": 6438500 }, { "epoch": 31.9, "learning_rate": 3.40552062741834e-05, "loss": 2.2311, "step": 6439000 }, { "epoch": 31.9, "learning_rate": 3.405396768775732e-05, "loss": 2.2144, "step": 6439500 }, { "epoch": 31.91, "learning_rate": 3.4052729101331235e-05, "loss": 2.2356, "step": 6440000 }, { "epoch": 31.91, "learning_rate": 3.405149051490515e-05, "loss": 2.2321, "step": 6440500 }, { "epoch": 31.91, "learning_rate": 3.405025440565192e-05, "loss": 2.2336, "step": 6441000 }, { "epoch": 31.91, "learning_rate": 3.404901581922584e-05, "loss": 2.2452, "step": 6441500 }, { "epoch": 31.92, "learning_rate": 3.4047777232799755e-05, "loss": 2.2066, "step": 6442000 }, { "epoch": 31.92, "learning_rate": 3.4046538646373665e-05, "loss": 2.2472, "step": 6442500 }, { "epoch": 31.92, "learning_rate": 3.404530005994758e-05, "loss": 2.2411, "step": 6443000 }, { "epoch": 31.92, "learning_rate": 3.40440664278672e-05, "loss": 2.2134, "step": 6443500 }, { "epoch": 31.93, "learning_rate": 3.404282784144112e-05, "loss": 2.2387, "step": 6444000 }, { "epoch": 31.93, "learning_rate": 3.4041589255015037e-05, "loss": 2.2511, "step": 6444500 }, { "epoch": 31.93, "learning_rate": 3.4040350668588953e-05, "loss": 2.1958, "step": 6445000 }, { "epoch": 31.93, "learning_rate": 3.403911208216287e-05, "loss": 2.2224, "step": 6445500 }, { "epoch": 31.94, "learning_rate": 3.403787349573679e-05, "loss": 2.1971, "step": 6446000 }, { "epoch": 31.94, "learning_rate": 3.4036634909310704e-05, "loss": 2.2369, "step": 6446500 }, { "epoch": 31.94, "learning_rate": 3.403539880005747e-05, "loss": 2.2274, "step": 6447000 }, { "epoch": 31.94, "learning_rate": 3.403416021363139e-05, "loss": 2.2175, "step": 6447500 }, { "epoch": 31.95, "learning_rate": 3.403292162720531e-05, "loss": 2.2479, "step": 6448000 }, { "epoch": 31.95, "learning_rate": 3.403168551795207e-05, "loss": 2.2391, "step": 6448500 }, { "epoch": 31.95, "learning_rate": 3.4030449408698845e-05, "loss": 2.2423, "step": 6449000 }, { "epoch": 31.95, "learning_rate": 3.4029210822272755e-05, "loss": 2.2215, "step": 6449500 }, { "epoch": 31.96, "learning_rate": 3.402797223584667e-05, "loss": 2.2542, "step": 6450000 }, { "epoch": 31.96, "learning_rate": 3.402673364942059e-05, "loss": 2.2291, "step": 6450500 }, { "epoch": 31.96, "learning_rate": 3.4025495062994505e-05, "loss": 2.2185, "step": 6451000 }, { "epoch": 31.96, "learning_rate": 3.402425895374128e-05, "loss": 2.2083, "step": 6451500 }, { "epoch": 31.97, "learning_rate": 3.40230203673152e-05, "loss": 2.2151, "step": 6452000 }, { "epoch": 31.97, "learning_rate": 3.402178178088911e-05, "loss": 2.2216, "step": 6452500 }, { "epoch": 31.97, "learning_rate": 3.4020543194463025e-05, "loss": 2.2025, "step": 6453000 }, { "epoch": 31.97, "learning_rate": 3.401930460803694e-05, "loss": 2.2272, "step": 6453500 }, { "epoch": 31.98, "learning_rate": 3.401806602161086e-05, "loss": 2.2315, "step": 6454000 }, { "epoch": 31.98, "learning_rate": 3.4016827435184776e-05, "loss": 2.2421, "step": 6454500 }, { "epoch": 31.98, "learning_rate": 3.4015588848758686e-05, "loss": 2.229, "step": 6455000 }, { "epoch": 31.98, "learning_rate": 3.40143502623326e-05, "loss": 2.2382, "step": 6455500 }, { "epoch": 31.99, "learning_rate": 3.401311167590652e-05, "loss": 2.2209, "step": 6456000 }, { "epoch": 31.99, "learning_rate": 3.401187308948044e-05, "loss": 2.2126, "step": 6456500 }, { "epoch": 31.99, "learning_rate": 3.4010634503054354e-05, "loss": 2.2322, "step": 6457000 }, { "epoch": 31.99, "learning_rate": 3.400939591662827e-05, "loss": 2.2372, "step": 6457500 }, { "epoch": 32.0, "learning_rate": 3.400815733020219e-05, "loss": 2.236, "step": 6458000 }, { "epoch": 32.0, "learning_rate": 3.4006918743776104e-05, "loss": 2.2363, "step": 6458500 }, { "epoch": 32.0, "eval_accuracy": 0.6593229081640353, "eval_accuracy_mlm": 0.6153610829620848, "eval_accuracy_nsp": 0.8667393580928698, "eval_loss": 2.3164103031158447, "eval_runtime": 145.6685, "eval_samples_per_second": 1750.269, "eval_steps_per_second": 72.933, "step": 6458976 }, { "epoch": 32.0, "learning_rate": 3.400568015735002e-05, "loss": 2.2388, "step": 6459000 }, { "epoch": 32.0, "learning_rate": 3.400444652526964e-05, "loss": 2.2307, "step": 6459500 }, { "epoch": 32.01, "learning_rate": 3.400320793884356e-05, "loss": 2.203, "step": 6460000 }, { "epoch": 32.01, "learning_rate": 3.4001969352417476e-05, "loss": 2.1939, "step": 6460500 }, { "epoch": 32.01, "learning_rate": 3.400073076599139e-05, "loss": 2.1834, "step": 6461000 }, { "epoch": 32.01, "learning_rate": 3.399949217956531e-05, "loss": 2.2082, "step": 6461500 }, { "epoch": 32.01, "learning_rate": 3.399825607031207e-05, "loss": 2.2135, "step": 6462000 }, { "epoch": 32.02, "learning_rate": 3.399701748388599e-05, "loss": 2.2041, "step": 6462500 }, { "epoch": 32.02, "learning_rate": 3.3995778897459906e-05, "loss": 2.1927, "step": 6463000 }, { "epoch": 32.02, "learning_rate": 3.399454031103382e-05, "loss": 2.1848, "step": 6463500 }, { "epoch": 32.02, "learning_rate": 3.399330172460774e-05, "loss": 2.2194, "step": 6464000 }, { "epoch": 32.03, "learning_rate": 3.3992063138181656e-05, "loss": 2.1935, "step": 6464500 }, { "epoch": 32.03, "learning_rate": 3.3990824551755573e-05, "loss": 2.22, "step": 6465000 }, { "epoch": 32.03, "learning_rate": 3.398958596532949e-05, "loss": 2.1954, "step": 6465500 }, { "epoch": 32.03, "learning_rate": 3.398834737890341e-05, "loss": 2.2214, "step": 6466000 }, { "epoch": 32.04, "learning_rate": 3.3987111269650176e-05, "loss": 2.1903, "step": 6466500 }, { "epoch": 32.04, "learning_rate": 3.398587268322409e-05, "loss": 2.1986, "step": 6467000 }, { "epoch": 32.04, "learning_rate": 3.398463409679801e-05, "loss": 2.1881, "step": 6467500 }, { "epoch": 32.04, "learning_rate": 3.398339798754477e-05, "loss": 2.2115, "step": 6468000 }, { "epoch": 32.05, "learning_rate": 3.398215940111869e-05, "loss": 2.2028, "step": 6468500 }, { "epoch": 32.05, "learning_rate": 3.3980923291865464e-05, "loss": 2.1965, "step": 6469000 }, { "epoch": 32.05, "learning_rate": 3.3979687182612227e-05, "loss": 2.2209, "step": 6469500 }, { "epoch": 32.05, "learning_rate": 3.3978448596186143e-05, "loss": 2.1943, "step": 6470000 }, { "epoch": 32.06, "learning_rate": 3.397721000976006e-05, "loss": 2.2164, "step": 6470500 }, { "epoch": 32.06, "learning_rate": 3.397597390050683e-05, "loss": 2.1946, "step": 6471000 }, { "epoch": 32.06, "learning_rate": 3.3974735314080746e-05, "loss": 2.2251, "step": 6471500 }, { "epoch": 32.06, "learning_rate": 3.397349672765466e-05, "loss": 2.1968, "step": 6472000 }, { "epoch": 32.07, "learning_rate": 3.397225814122858e-05, "loss": 2.212, "step": 6472500 }, { "epoch": 32.07, "learning_rate": 3.39710195548025e-05, "loss": 2.2174, "step": 6473000 }, { "epoch": 32.07, "learning_rate": 3.3969780968376414e-05, "loss": 2.2001, "step": 6473500 }, { "epoch": 32.07, "learning_rate": 3.396854238195033e-05, "loss": 2.2193, "step": 6474000 }, { "epoch": 32.08, "learning_rate": 3.396730379552425e-05, "loss": 2.2152, "step": 6474500 }, { "epoch": 32.08, "learning_rate": 3.3966065209098165e-05, "loss": 2.1806, "step": 6475000 }, { "epoch": 32.08, "learning_rate": 3.3964829099844933e-05, "loss": 2.2087, "step": 6475500 }, { "epoch": 32.08, "learning_rate": 3.396359051341885e-05, "loss": 2.1978, "step": 6476000 }, { "epoch": 32.09, "learning_rate": 3.396235192699276e-05, "loss": 2.2147, "step": 6476500 }, { "epoch": 32.09, "learning_rate": 3.396111334056668e-05, "loss": 2.2109, "step": 6477000 }, { "epoch": 32.09, "learning_rate": 3.3959874754140594e-05, "loss": 2.2149, "step": 6477500 }, { "epoch": 32.09, "learning_rate": 3.395863616771451e-05, "loss": 2.2203, "step": 6478000 }, { "epoch": 32.1, "learning_rate": 3.395739758128843e-05, "loss": 2.208, "step": 6478500 }, { "epoch": 32.1, "learning_rate": 3.3956158994862345e-05, "loss": 2.2123, "step": 6479000 }, { "epoch": 32.1, "learning_rate": 3.395492040843626e-05, "loss": 2.1831, "step": 6479500 }, { "epoch": 32.1, "learning_rate": 3.395368182201018e-05, "loss": 2.2049, "step": 6480000 }, { "epoch": 32.11, "learning_rate": 3.395244323558409e-05, "loss": 2.2029, "step": 6480500 }, { "epoch": 32.11, "learning_rate": 3.3951207126330865e-05, "loss": 2.1977, "step": 6481000 }, { "epoch": 32.11, "learning_rate": 3.394996853990478e-05, "loss": 2.2033, "step": 6481500 }, { "epoch": 32.11, "learning_rate": 3.39487299534787e-05, "loss": 2.2213, "step": 6482000 }, { "epoch": 32.12, "learning_rate": 3.394749384422547e-05, "loss": 2.2247, "step": 6482500 }, { "epoch": 32.12, "learning_rate": 3.394625525779938e-05, "loss": 2.2051, "step": 6483000 }, { "epoch": 32.12, "learning_rate": 3.3945016671373294e-05, "loss": 2.1888, "step": 6483500 }, { "epoch": 32.12, "learning_rate": 3.394377808494721e-05, "loss": 2.2, "step": 6484000 }, { "epoch": 32.13, "learning_rate": 3.394253949852113e-05, "loss": 2.2133, "step": 6484500 }, { "epoch": 32.13, "learning_rate": 3.3941300912095045e-05, "loss": 2.207, "step": 6485000 }, { "epoch": 32.13, "learning_rate": 3.394006232566896e-05, "loss": 2.2215, "step": 6485500 }, { "epoch": 32.13, "learning_rate": 3.393882373924288e-05, "loss": 2.2094, "step": 6486000 }, { "epoch": 32.14, "learning_rate": 3.3937585152816796e-05, "loss": 2.1998, "step": 6486500 }, { "epoch": 32.14, "learning_rate": 3.3936346566390706e-05, "loss": 2.1855, "step": 6487000 }, { "epoch": 32.14, "learning_rate": 3.393510797996462e-05, "loss": 2.2094, "step": 6487500 }, { "epoch": 32.14, "learning_rate": 3.393386939353854e-05, "loss": 2.1967, "step": 6488000 }, { "epoch": 32.15, "learning_rate": 3.393263080711246e-05, "loss": 2.2105, "step": 6488500 }, { "epoch": 32.15, "learning_rate": 3.3931392220686374e-05, "loss": 2.2006, "step": 6489000 }, { "epoch": 32.15, "learning_rate": 3.393015363426029e-05, "loss": 2.1944, "step": 6489500 }, { "epoch": 32.15, "learning_rate": 3.392891504783421e-05, "loss": 2.1904, "step": 6490000 }, { "epoch": 32.16, "learning_rate": 3.3927676461408125e-05, "loss": 2.1949, "step": 6490500 }, { "epoch": 32.16, "learning_rate": 3.392643787498204e-05, "loss": 2.23, "step": 6491000 }, { "epoch": 32.16, "learning_rate": 3.392519928855596e-05, "loss": 2.2266, "step": 6491500 }, { "epoch": 32.16, "learning_rate": 3.392396317930273e-05, "loss": 2.2044, "step": 6492000 }, { "epoch": 32.17, "learning_rate": 3.3922724592876644e-05, "loss": 2.2013, "step": 6492500 }, { "epoch": 32.17, "learning_rate": 3.392148600645056e-05, "loss": 2.2037, "step": 6493000 }, { "epoch": 32.17, "learning_rate": 3.392024742002448e-05, "loss": 2.213, "step": 6493500 }, { "epoch": 32.17, "learning_rate": 3.3919008833598395e-05, "loss": 2.2009, "step": 6494000 }, { "epoch": 32.18, "learning_rate": 3.391777272434516e-05, "loss": 2.1952, "step": 6494500 }, { "epoch": 32.18, "learning_rate": 3.3916534137919074e-05, "loss": 2.2143, "step": 6495000 }, { "epoch": 32.18, "learning_rate": 3.391529555149299e-05, "loss": 2.208, "step": 6495500 }, { "epoch": 32.18, "learning_rate": 3.391405696506691e-05, "loss": 2.1843, "step": 6496000 }, { "epoch": 32.19, "learning_rate": 3.3912818378640825e-05, "loss": 2.1988, "step": 6496500 }, { "epoch": 32.19, "learning_rate": 3.391157979221474e-05, "loss": 2.2156, "step": 6497000 }, { "epoch": 32.19, "learning_rate": 3.391034120578866e-05, "loss": 2.1969, "step": 6497500 }, { "epoch": 32.19, "learning_rate": 3.3909102619362576e-05, "loss": 2.1982, "step": 6498000 }, { "epoch": 32.2, "learning_rate": 3.3907866510109344e-05, "loss": 2.1881, "step": 6498500 }, { "epoch": 32.2, "learning_rate": 3.390663040085611e-05, "loss": 2.198, "step": 6499000 }, { "epoch": 32.2, "learning_rate": 3.390539181443002e-05, "loss": 2.2107, "step": 6499500 }, { "epoch": 32.2, "learning_rate": 3.390415322800394e-05, "loss": 2.2016, "step": 6500000 }, { "epoch": 32.21, "learning_rate": 3.390291464157786e-05, "loss": 2.2143, "step": 6500500 }, { "epoch": 32.21, "learning_rate": 3.390167853232463e-05, "loss": 2.2103, "step": 6501000 }, { "epoch": 32.21, "learning_rate": 3.390043994589855e-05, "loss": 2.1963, "step": 6501500 }, { "epoch": 32.21, "learning_rate": 3.389920383664532e-05, "loss": 2.2025, "step": 6502000 }, { "epoch": 32.22, "learning_rate": 3.3897965250219235e-05, "loss": 2.2115, "step": 6502500 }, { "epoch": 32.22, "learning_rate": 3.389672666379315e-05, "loss": 2.2162, "step": 6503000 }, { "epoch": 32.22, "learning_rate": 3.389548807736706e-05, "loss": 2.2093, "step": 6503500 }, { "epoch": 32.22, "learning_rate": 3.389424949094098e-05, "loss": 2.2149, "step": 6504000 }, { "epoch": 32.23, "learning_rate": 3.3893010904514896e-05, "loss": 2.2041, "step": 6504500 }, { "epoch": 32.23, "learning_rate": 3.389177231808881e-05, "loss": 2.1998, "step": 6505000 }, { "epoch": 32.23, "learning_rate": 3.389053373166273e-05, "loss": 2.1898, "step": 6505500 }, { "epoch": 32.23, "learning_rate": 3.388929514523664e-05, "loss": 2.2154, "step": 6506000 }, { "epoch": 32.24, "learning_rate": 3.388805655881056e-05, "loss": 2.2269, "step": 6506500 }, { "epoch": 32.24, "learning_rate": 3.3886817972384474e-05, "loss": 2.2098, "step": 6507000 }, { "epoch": 32.24, "learning_rate": 3.388557938595839e-05, "loss": 2.2206, "step": 6507500 }, { "epoch": 32.24, "learning_rate": 3.388434079953231e-05, "loss": 2.2283, "step": 6508000 }, { "epoch": 32.25, "learning_rate": 3.388310469027908e-05, "loss": 2.2005, "step": 6508500 }, { "epoch": 32.25, "learning_rate": 3.3881866103852994e-05, "loss": 2.2254, "step": 6509000 }, { "epoch": 32.25, "learning_rate": 3.388062751742691e-05, "loss": 2.1927, "step": 6509500 }, { "epoch": 32.25, "learning_rate": 3.387938893100083e-05, "loss": 2.201, "step": 6510000 }, { "epoch": 32.26, "learning_rate": 3.3878150344574745e-05, "loss": 2.1961, "step": 6510500 }, { "epoch": 32.26, "learning_rate": 3.387691175814866e-05, "loss": 2.1983, "step": 6511000 }, { "epoch": 32.26, "learning_rate": 3.387567317172258e-05, "loss": 2.2046, "step": 6511500 }, { "epoch": 32.26, "learning_rate": 3.3874434585296495e-05, "loss": 2.2265, "step": 6512000 }, { "epoch": 32.27, "learning_rate": 3.387319599887041e-05, "loss": 2.214, "step": 6512500 }, { "epoch": 32.27, "learning_rate": 3.387195741244433e-05, "loss": 2.194, "step": 6513000 }, { "epoch": 32.27, "learning_rate": 3.3870718826018246e-05, "loss": 2.2226, "step": 6513500 }, { "epoch": 32.27, "learning_rate": 3.386948271676501e-05, "loss": 2.1941, "step": 6514000 }, { "epoch": 32.28, "learning_rate": 3.3868246607511784e-05, "loss": 2.1788, "step": 6514500 }, { "epoch": 32.28, "learning_rate": 3.3867008021085694e-05, "loss": 2.205, "step": 6515000 }, { "epoch": 32.28, "learning_rate": 3.386576943465961e-05, "loss": 2.2079, "step": 6515500 }, { "epoch": 32.28, "learning_rate": 3.386453084823353e-05, "loss": 2.1918, "step": 6516000 }, { "epoch": 32.28, "learning_rate": 3.3863294738980303e-05, "loss": 2.2067, "step": 6516500 }, { "epoch": 32.29, "learning_rate": 3.3862056152554214e-05, "loss": 2.1896, "step": 6517000 }, { "epoch": 32.29, "learning_rate": 3.386081756612813e-05, "loss": 2.1909, "step": 6517500 }, { "epoch": 32.29, "learning_rate": 3.385957897970205e-05, "loss": 2.2219, "step": 6518000 }, { "epoch": 32.29, "learning_rate": 3.3858342870448816e-05, "loss": 2.2144, "step": 6518500 }, { "epoch": 32.3, "learning_rate": 3.385710428402273e-05, "loss": 2.2162, "step": 6519000 }, { "epoch": 32.3, "learning_rate": 3.385586569759665e-05, "loss": 2.2064, "step": 6519500 }, { "epoch": 32.3, "learning_rate": 3.385462958834342e-05, "loss": 2.2063, "step": 6520000 }, { "epoch": 32.3, "learning_rate": 3.3853391001917336e-05, "loss": 2.2012, "step": 6520500 }, { "epoch": 32.31, "learning_rate": 3.385215241549125e-05, "loss": 2.2028, "step": 6521000 }, { "epoch": 32.31, "learning_rate": 3.385091382906517e-05, "loss": 2.1945, "step": 6521500 }, { "epoch": 32.31, "learning_rate": 3.3849680196984784e-05, "loss": 2.2054, "step": 6522000 }, { "epoch": 32.31, "learning_rate": 3.38484416105587e-05, "loss": 2.2139, "step": 6522500 }, { "epoch": 32.32, "learning_rate": 3.384720302413262e-05, "loss": 2.203, "step": 6523000 }, { "epoch": 32.32, "learning_rate": 3.3845964437706534e-05, "loss": 2.2034, "step": 6523500 }, { "epoch": 32.32, "learning_rate": 3.384472585128045e-05, "loss": 2.2103, "step": 6524000 }, { "epoch": 32.32, "learning_rate": 3.384348726485437e-05, "loss": 2.2098, "step": 6524500 }, { "epoch": 32.33, "learning_rate": 3.3842248678428285e-05, "loss": 2.1964, "step": 6525000 }, { "epoch": 32.33, "learning_rate": 3.38410100920022e-05, "loss": 2.1948, "step": 6525500 }, { "epoch": 32.33, "learning_rate": 3.383977150557612e-05, "loss": 2.2176, "step": 6526000 }, { "epoch": 32.33, "learning_rate": 3.3838532919150036e-05, "loss": 2.2094, "step": 6526500 }, { "epoch": 32.34, "learning_rate": 3.383729433272395e-05, "loss": 2.2111, "step": 6527000 }, { "epoch": 32.34, "learning_rate": 3.383605574629787e-05, "loss": 2.2003, "step": 6527500 }, { "epoch": 32.34, "learning_rate": 3.383481715987179e-05, "loss": 2.2115, "step": 6528000 }, { "epoch": 32.34, "learning_rate": 3.3833578573445704e-05, "loss": 2.2292, "step": 6528500 }, { "epoch": 32.35, "learning_rate": 3.3832342464192466e-05, "loss": 2.2087, "step": 6529000 }, { "epoch": 32.35, "learning_rate": 3.383110387776638e-05, "loss": 2.2061, "step": 6529500 }, { "epoch": 32.35, "learning_rate": 3.38298652913403e-05, "loss": 2.2259, "step": 6530000 }, { "epoch": 32.35, "learning_rate": 3.3828626704914217e-05, "loss": 2.203, "step": 6530500 }, { "epoch": 32.36, "learning_rate": 3.3827388118488133e-05, "loss": 2.2353, "step": 6531000 }, { "epoch": 32.36, "learning_rate": 3.382614953206205e-05, "loss": 2.2215, "step": 6531500 }, { "epoch": 32.36, "learning_rate": 3.382491342280882e-05, "loss": 2.2307, "step": 6532000 }, { "epoch": 32.36, "learning_rate": 3.3823674836382736e-05, "loss": 2.2312, "step": 6532500 }, { "epoch": 32.37, "learning_rate": 3.382243624995665e-05, "loss": 2.2237, "step": 6533000 }, { "epoch": 32.37, "learning_rate": 3.382119766353057e-05, "loss": 2.2023, "step": 6533500 }, { "epoch": 32.37, "learning_rate": 3.381996155427733e-05, "loss": 2.2172, "step": 6534000 }, { "epoch": 32.37, "learning_rate": 3.381872296785125e-05, "loss": 2.2087, "step": 6534500 }, { "epoch": 32.38, "learning_rate": 3.3817484381425166e-05, "loss": 2.2205, "step": 6535000 }, { "epoch": 32.38, "learning_rate": 3.381624579499908e-05, "loss": 2.2083, "step": 6535500 }, { "epoch": 32.38, "learning_rate": 3.3815007208573e-05, "loss": 2.2052, "step": 6536000 }, { "epoch": 32.38, "learning_rate": 3.381376862214692e-05, "loss": 2.2252, "step": 6536500 }, { "epoch": 32.39, "learning_rate": 3.3812530035720834e-05, "loss": 2.1906, "step": 6537000 }, { "epoch": 32.39, "learning_rate": 3.381129144929475e-05, "loss": 2.2277, "step": 6537500 }, { "epoch": 32.39, "learning_rate": 3.381005286286867e-05, "loss": 2.2075, "step": 6538000 }, { "epoch": 32.39, "learning_rate": 3.3808816753615436e-05, "loss": 2.2181, "step": 6538500 }, { "epoch": 32.4, "learning_rate": 3.380757816718935e-05, "loss": 2.2153, "step": 6539000 }, { "epoch": 32.4, "learning_rate": 3.380634205793612e-05, "loss": 2.1944, "step": 6539500 }, { "epoch": 32.4, "learning_rate": 3.380510842585574e-05, "loss": 2.2176, "step": 6540000 }, { "epoch": 32.4, "learning_rate": 3.380387231660251e-05, "loss": 2.2233, "step": 6540500 }, { "epoch": 32.41, "learning_rate": 3.380263373017643e-05, "loss": 2.2152, "step": 6541000 }, { "epoch": 32.41, "learning_rate": 3.380139514375034e-05, "loss": 2.2154, "step": 6541500 }, { "epoch": 32.41, "learning_rate": 3.3800156557324255e-05, "loss": 2.2299, "step": 6542000 }, { "epoch": 32.41, "learning_rate": 3.379891797089817e-05, "loss": 2.2184, "step": 6542500 }, { "epoch": 32.42, "learning_rate": 3.379767938447209e-05, "loss": 2.2195, "step": 6543000 }, { "epoch": 32.42, "learning_rate": 3.379644327521886e-05, "loss": 2.2001, "step": 6543500 }, { "epoch": 32.42, "learning_rate": 3.3795204688792775e-05, "loss": 2.2322, "step": 6544000 }, { "epoch": 32.42, "learning_rate": 3.379396610236669e-05, "loss": 2.207, "step": 6544500 }, { "epoch": 32.43, "learning_rate": 3.379272751594061e-05, "loss": 2.2361, "step": 6545000 }, { "epoch": 32.43, "learning_rate": 3.3791488929514526e-05, "loss": 2.2226, "step": 6545500 }, { "epoch": 32.43, "learning_rate": 3.379025034308844e-05, "loss": 2.2174, "step": 6546000 }, { "epoch": 32.43, "learning_rate": 3.378901175666236e-05, "loss": 2.2221, "step": 6546500 }, { "epoch": 32.44, "learning_rate": 3.378777317023628e-05, "loss": 2.1688, "step": 6547000 }, { "epoch": 32.44, "learning_rate": 3.3786534583810194e-05, "loss": 2.2214, "step": 6547500 }, { "epoch": 32.44, "learning_rate": 3.378529599738411e-05, "loss": 2.2168, "step": 6548000 }, { "epoch": 32.44, "learning_rate": 3.378405741095803e-05, "loss": 2.2249, "step": 6548500 }, { "epoch": 32.45, "learning_rate": 3.3782818824531944e-05, "loss": 2.2091, "step": 6549000 }, { "epoch": 32.45, "learning_rate": 3.378158023810586e-05, "loss": 2.2079, "step": 6549500 }, { "epoch": 32.45, "learning_rate": 3.378034165167977e-05, "loss": 2.236, "step": 6550000 }, { "epoch": 32.45, "learning_rate": 3.377910306525369e-05, "loss": 2.219, "step": 6550500 }, { "epoch": 32.46, "learning_rate": 3.3777864478827605e-05, "loss": 2.212, "step": 6551000 }, { "epoch": 32.46, "learning_rate": 3.377662589240152e-05, "loss": 2.2248, "step": 6551500 }, { "epoch": 32.46, "learning_rate": 3.377538730597544e-05, "loss": 2.2177, "step": 6552000 }, { "epoch": 32.46, "learning_rate": 3.3774148719549356e-05, "loss": 2.2114, "step": 6552500 }, { "epoch": 32.47, "learning_rate": 3.377291013312327e-05, "loss": 2.2005, "step": 6553000 }, { "epoch": 32.47, "learning_rate": 3.377167154669719e-05, "loss": 2.2251, "step": 6553500 }, { "epoch": 32.47, "learning_rate": 3.37704329602711e-05, "loss": 2.2158, "step": 6554000 }, { "epoch": 32.47, "learning_rate": 3.376919685101787e-05, "loss": 2.1986, "step": 6554500 }, { "epoch": 32.48, "learning_rate": 3.3767960741764644e-05, "loss": 2.2362, "step": 6555000 }, { "epoch": 32.48, "learning_rate": 3.376672215533856e-05, "loss": 2.2072, "step": 6555500 }, { "epoch": 32.48, "learning_rate": 3.3765488523258175e-05, "loss": 2.2369, "step": 6556000 }, { "epoch": 32.48, "learning_rate": 3.376424993683209e-05, "loss": 2.2121, "step": 6556500 }, { "epoch": 32.49, "learning_rate": 3.376301135040601e-05, "loss": 2.2245, "step": 6557000 }, { "epoch": 32.49, "learning_rate": 3.3761772763979926e-05, "loss": 2.2273, "step": 6557500 }, { "epoch": 32.49, "learning_rate": 3.376053417755384e-05, "loss": 2.2315, "step": 6558000 }, { "epoch": 32.49, "learning_rate": 3.375929559112776e-05, "loss": 2.221, "step": 6558500 }, { "epoch": 32.5, "learning_rate": 3.375805700470168e-05, "loss": 2.2076, "step": 6559000 }, { "epoch": 32.5, "learning_rate": 3.3756820895448446e-05, "loss": 2.2375, "step": 6559500 }, { "epoch": 32.5, "learning_rate": 3.375558230902236e-05, "loss": 2.233, "step": 6560000 }, { "epoch": 32.5, "learning_rate": 3.375434372259628e-05, "loss": 2.2013, "step": 6560500 }, { "epoch": 32.51, "learning_rate": 3.3753105136170196e-05, "loss": 2.224, "step": 6561000 }, { "epoch": 32.51, "learning_rate": 3.375186654974411e-05, "loss": 2.2265, "step": 6561500 }, { "epoch": 32.51, "learning_rate": 3.3750627963318024e-05, "loss": 2.2233, "step": 6562000 }, { "epoch": 32.51, "learning_rate": 3.374938937689194e-05, "loss": 2.22, "step": 6562500 }, { "epoch": 32.52, "learning_rate": 3.374815079046586e-05, "loss": 2.2073, "step": 6563000 }, { "epoch": 32.52, "learning_rate": 3.3746912204039774e-05, "loss": 2.2204, "step": 6563500 }, { "epoch": 32.52, "learning_rate": 3.374567609478654e-05, "loss": 2.1987, "step": 6564000 }, { "epoch": 32.52, "learning_rate": 3.374443750836046e-05, "loss": 2.252, "step": 6564500 }, { "epoch": 32.53, "learning_rate": 3.374319892193438e-05, "loss": 2.2045, "step": 6565000 }, { "epoch": 32.53, "learning_rate": 3.3741960335508294e-05, "loss": 2.2147, "step": 6565500 }, { "epoch": 32.53, "learning_rate": 3.374072174908221e-05, "loss": 2.2299, "step": 6566000 }, { "epoch": 32.53, "learning_rate": 3.373948316265613e-05, "loss": 2.2087, "step": 6566500 }, { "epoch": 32.54, "learning_rate": 3.3738244576230045e-05, "loss": 2.2143, "step": 6567000 }, { "epoch": 32.54, "learning_rate": 3.373700598980396e-05, "loss": 2.2074, "step": 6567500 }, { "epoch": 32.54, "learning_rate": 3.373576740337788e-05, "loss": 2.2112, "step": 6568000 }, { "epoch": 32.54, "learning_rate": 3.373453129412464e-05, "loss": 2.1904, "step": 6568500 }, { "epoch": 32.55, "learning_rate": 3.373329518487141e-05, "loss": 2.1953, "step": 6569000 }, { "epoch": 32.55, "learning_rate": 3.3732056598445326e-05, "loss": 2.2283, "step": 6569500 }, { "epoch": 32.55, "learning_rate": 3.373081801201924e-05, "loss": 2.2039, "step": 6570000 }, { "epoch": 32.55, "learning_rate": 3.372957942559316e-05, "loss": 2.2101, "step": 6570500 }, { "epoch": 32.56, "learning_rate": 3.372834083916708e-05, "loss": 2.2203, "step": 6571000 }, { "epoch": 32.56, "learning_rate": 3.3727102252740994e-05, "loss": 2.1973, "step": 6571500 }, { "epoch": 32.56, "learning_rate": 3.372586366631491e-05, "loss": 2.212, "step": 6572000 }, { "epoch": 32.56, "learning_rate": 3.372462755706168e-05, "loss": 2.2298, "step": 6572500 }, { "epoch": 32.56, "learning_rate": 3.37233889706356e-05, "loss": 2.201, "step": 6573000 }, { "epoch": 32.57, "learning_rate": 3.372215286138236e-05, "loss": 2.2432, "step": 6573500 }, { "epoch": 32.57, "learning_rate": 3.3720914274956276e-05, "loss": 2.2075, "step": 6574000 }, { "epoch": 32.57, "learning_rate": 3.371967568853019e-05, "loss": 2.2052, "step": 6574500 }, { "epoch": 32.57, "learning_rate": 3.371843710210411e-05, "loss": 2.201, "step": 6575000 }, { "epoch": 32.58, "learning_rate": 3.3717198515678026e-05, "loss": 2.2347, "step": 6575500 }, { "epoch": 32.58, "learning_rate": 3.3715959929251943e-05, "loss": 2.2136, "step": 6576000 }, { "epoch": 32.58, "learning_rate": 3.371472134282586e-05, "loss": 2.2285, "step": 6576500 }, { "epoch": 32.58, "learning_rate": 3.371348275639978e-05, "loss": 2.2092, "step": 6577000 }, { "epoch": 32.59, "learning_rate": 3.3712244169973694e-05, "loss": 2.2303, "step": 6577500 }, { "epoch": 32.59, "learning_rate": 3.371100806072046e-05, "loss": 2.2372, "step": 6578000 }, { "epoch": 32.59, "learning_rate": 3.370976947429438e-05, "loss": 2.2142, "step": 6578500 }, { "epoch": 32.59, "learning_rate": 3.37085308878683e-05, "loss": 2.2163, "step": 6579000 }, { "epoch": 32.6, "learning_rate": 3.3707292301442214e-05, "loss": 2.2067, "step": 6579500 }, { "epoch": 32.6, "learning_rate": 3.370605371501613e-05, "loss": 2.2277, "step": 6580000 }, { "epoch": 32.6, "learning_rate": 3.370481512859005e-05, "loss": 2.2055, "step": 6580500 }, { "epoch": 32.6, "learning_rate": 3.3703576542163965e-05, "loss": 2.1938, "step": 6581000 }, { "epoch": 32.61, "learning_rate": 3.370233795573788e-05, "loss": 2.2145, "step": 6581500 }, { "epoch": 32.61, "learning_rate": 3.370109936931179e-05, "loss": 2.1997, "step": 6582000 }, { "epoch": 32.61, "learning_rate": 3.369986078288571e-05, "loss": 2.2109, "step": 6582500 }, { "epoch": 32.61, "learning_rate": 3.3698622196459625e-05, "loss": 2.2218, "step": 6583000 }, { "epoch": 32.62, "learning_rate": 3.369738361003354e-05, "loss": 2.2179, "step": 6583500 }, { "epoch": 32.62, "learning_rate": 3.369614502360746e-05, "loss": 2.2124, "step": 6584000 }, { "epoch": 32.62, "learning_rate": 3.369490891435423e-05, "loss": 2.218, "step": 6584500 }, { "epoch": 32.62, "learning_rate": 3.3693670327928145e-05, "loss": 2.2309, "step": 6585000 }, { "epoch": 32.63, "learning_rate": 3.369243174150206e-05, "loss": 2.1999, "step": 6585500 }, { "epoch": 32.63, "learning_rate": 3.369119315507598e-05, "loss": 2.2156, "step": 6586000 }, { "epoch": 32.63, "learning_rate": 3.368995704582275e-05, "loss": 2.2088, "step": 6586500 }, { "epoch": 32.63, "learning_rate": 3.3688718459396665e-05, "loss": 2.2409, "step": 6587000 }, { "epoch": 32.64, "learning_rate": 3.368748235014343e-05, "loss": 2.2263, "step": 6587500 }, { "epoch": 32.64, "learning_rate": 3.3686243763717344e-05, "loss": 2.2184, "step": 6588000 }, { "epoch": 32.64, "learning_rate": 3.368500517729126e-05, "loss": 2.2329, "step": 6588500 }, { "epoch": 32.64, "learning_rate": 3.368376659086518e-05, "loss": 2.2595, "step": 6589000 }, { "epoch": 32.65, "learning_rate": 3.3682528004439094e-05, "loss": 2.2049, "step": 6589500 }, { "epoch": 32.65, "learning_rate": 3.368128941801301e-05, "loss": 2.2098, "step": 6590000 }, { "epoch": 32.65, "learning_rate": 3.368005083158693e-05, "loss": 2.2009, "step": 6590500 }, { "epoch": 32.65, "learning_rate": 3.3678812245160845e-05, "loss": 2.2141, "step": 6591000 }, { "epoch": 32.66, "learning_rate": 3.367757365873476e-05, "loss": 2.2486, "step": 6591500 }, { "epoch": 32.66, "learning_rate": 3.367633507230868e-05, "loss": 2.1941, "step": 6592000 }, { "epoch": 32.66, "learning_rate": 3.3675096485882596e-05, "loss": 2.2165, "step": 6592500 }, { "epoch": 32.66, "learning_rate": 3.367385789945651e-05, "loss": 2.1934, "step": 6593000 }, { "epoch": 32.67, "learning_rate": 3.367261931303042e-05, "loss": 2.2103, "step": 6593500 }, { "epoch": 32.67, "learning_rate": 3.367138072660434e-05, "loss": 2.2143, "step": 6594000 }, { "epoch": 32.67, "learning_rate": 3.367014214017826e-05, "loss": 2.2099, "step": 6594500 }, { "epoch": 32.67, "learning_rate": 3.366890603092503e-05, "loss": 2.1974, "step": 6595000 }, { "epoch": 32.68, "learning_rate": 3.366766744449894e-05, "loss": 2.2372, "step": 6595500 }, { "epoch": 32.68, "learning_rate": 3.366643133524571e-05, "loss": 2.2234, "step": 6596000 }, { "epoch": 32.68, "learning_rate": 3.366519274881963e-05, "loss": 2.2023, "step": 6596500 }, { "epoch": 32.68, "learning_rate": 3.3663954162393545e-05, "loss": 2.2142, "step": 6597000 }, { "epoch": 32.69, "learning_rate": 3.366271557596746e-05, "loss": 2.2005, "step": 6597500 }, { "epoch": 32.69, "learning_rate": 3.366147698954138e-05, "loss": 2.219, "step": 6598000 }, { "epoch": 32.69, "learning_rate": 3.366024088028815e-05, "loss": 2.2181, "step": 6598500 }, { "epoch": 32.69, "learning_rate": 3.3659002293862065e-05, "loss": 2.2129, "step": 6599000 }, { "epoch": 32.7, "learning_rate": 3.365776618460883e-05, "loss": 2.2157, "step": 6599500 }, { "epoch": 32.7, "learning_rate": 3.3656527598182744e-05, "loss": 2.2164, "step": 6600000 }, { "epoch": 32.7, "learning_rate": 3.365528901175666e-05, "loss": 2.1974, "step": 6600500 }, { "epoch": 32.7, "learning_rate": 3.365405042533058e-05, "loss": 2.2306, "step": 6601000 }, { "epoch": 32.71, "learning_rate": 3.3652814316077347e-05, "loss": 2.2077, "step": 6601500 }, { "epoch": 32.71, "learning_rate": 3.3651575729651263e-05, "loss": 2.2149, "step": 6602000 }, { "epoch": 32.71, "learning_rate": 3.365033714322518e-05, "loss": 2.2067, "step": 6602500 }, { "epoch": 32.71, "learning_rate": 3.36490985567991e-05, "loss": 2.2259, "step": 6603000 }, { "epoch": 32.72, "learning_rate": 3.3647859970373014e-05, "loss": 2.2163, "step": 6603500 }, { "epoch": 32.72, "learning_rate": 3.364662138394693e-05, "loss": 2.2088, "step": 6604000 }, { "epoch": 32.72, "learning_rate": 3.364538279752085e-05, "loss": 2.2304, "step": 6604500 }, { "epoch": 32.72, "learning_rate": 3.3644144211094765e-05, "loss": 2.2071, "step": 6605000 }, { "epoch": 32.73, "learning_rate": 3.364290562466868e-05, "loss": 2.2017, "step": 6605500 }, { "epoch": 32.73, "learning_rate": 3.36416670382426e-05, "loss": 2.2221, "step": 6606000 }, { "epoch": 32.73, "learning_rate": 3.3640428451816516e-05, "loss": 2.2516, "step": 6606500 }, { "epoch": 32.73, "learning_rate": 3.363918986539043e-05, "loss": 2.2249, "step": 6607000 }, { "epoch": 32.74, "learning_rate": 3.363795127896435e-05, "loss": 2.2237, "step": 6607500 }, { "epoch": 32.74, "learning_rate": 3.363671269253827e-05, "loss": 2.2048, "step": 6608000 }, { "epoch": 32.74, "learning_rate": 3.3635474106112184e-05, "loss": 2.2148, "step": 6608500 }, { "epoch": 32.74, "learning_rate": 3.3634235519686094e-05, "loss": 2.2256, "step": 6609000 }, { "epoch": 32.75, "learning_rate": 3.363299693326001e-05, "loss": 2.2463, "step": 6609500 }, { "epoch": 32.75, "learning_rate": 3.363175834683393e-05, "loss": 2.2072, "step": 6610000 }, { "epoch": 32.75, "learning_rate": 3.3630519760407844e-05, "loss": 2.2111, "step": 6610500 }, { "epoch": 32.75, "learning_rate": 3.362928365115461e-05, "loss": 2.2146, "step": 6611000 }, { "epoch": 32.76, "learning_rate": 3.362804506472853e-05, "loss": 2.252, "step": 6611500 }, { "epoch": 32.76, "learning_rate": 3.362680647830245e-05, "loss": 2.2347, "step": 6612000 }, { "epoch": 32.76, "learning_rate": 3.362556789187636e-05, "loss": 2.2109, "step": 6612500 }, { "epoch": 32.76, "learning_rate": 3.3624329305450274e-05, "loss": 2.2187, "step": 6613000 }, { "epoch": 32.77, "learning_rate": 3.362309319619705e-05, "loss": 2.2218, "step": 6613500 }, { "epoch": 32.77, "learning_rate": 3.362185460977097e-05, "loss": 2.226, "step": 6614000 }, { "epoch": 32.77, "learning_rate": 3.3620616023344884e-05, "loss": 2.2319, "step": 6614500 }, { "epoch": 32.77, "learning_rate": 3.36193774369188e-05, "loss": 2.2312, "step": 6615000 }, { "epoch": 32.78, "learning_rate": 3.361814132766556e-05, "loss": 2.224, "step": 6615500 }, { "epoch": 32.78, "learning_rate": 3.361690274123948e-05, "loss": 2.242, "step": 6616000 }, { "epoch": 32.78, "learning_rate": 3.3615664154813396e-05, "loss": 2.2007, "step": 6616500 }, { "epoch": 32.78, "learning_rate": 3.361443052273302e-05, "loss": 2.2512, "step": 6617000 }, { "epoch": 32.79, "learning_rate": 3.3613191936306934e-05, "loss": 2.2141, "step": 6617500 }, { "epoch": 32.79, "learning_rate": 3.361195334988085e-05, "loss": 2.221, "step": 6618000 }, { "epoch": 32.79, "learning_rate": 3.361071476345477e-05, "loss": 2.2071, "step": 6618500 }, { "epoch": 32.79, "learning_rate": 3.360947617702868e-05, "loss": 2.2425, "step": 6619000 }, { "epoch": 32.8, "learning_rate": 3.3608237590602595e-05, "loss": 2.223, "step": 6619500 }, { "epoch": 32.8, "learning_rate": 3.360699900417651e-05, "loss": 2.2144, "step": 6620000 }, { "epoch": 32.8, "learning_rate": 3.360576041775043e-05, "loss": 2.2007, "step": 6620500 }, { "epoch": 32.8, "learning_rate": 3.3604521831324346e-05, "loss": 2.204, "step": 6621000 }, { "epoch": 32.81, "learning_rate": 3.3603285722071115e-05, "loss": 2.2528, "step": 6621500 }, { "epoch": 32.81, "learning_rate": 3.360204713564503e-05, "loss": 2.21, "step": 6622000 }, { "epoch": 32.81, "learning_rate": 3.360080854921895e-05, "loss": 2.2036, "step": 6622500 }, { "epoch": 32.81, "learning_rate": 3.3599569962792865e-05, "loss": 2.2467, "step": 6623000 }, { "epoch": 32.82, "learning_rate": 3.3598336330712486e-05, "loss": 2.214, "step": 6623500 }, { "epoch": 32.82, "learning_rate": 3.35970977442864e-05, "loss": 2.2221, "step": 6624000 }, { "epoch": 32.82, "learning_rate": 3.359585915786032e-05, "loss": 2.2069, "step": 6624500 }, { "epoch": 32.82, "learning_rate": 3.359462057143424e-05, "loss": 2.228, "step": 6625000 }, { "epoch": 32.83, "learning_rate": 3.359338198500815e-05, "loss": 2.2318, "step": 6625500 }, { "epoch": 32.83, "learning_rate": 3.359214587575492e-05, "loss": 2.2248, "step": 6626000 }, { "epoch": 32.83, "learning_rate": 3.359090728932884e-05, "loss": 2.2005, "step": 6626500 }, { "epoch": 32.83, "learning_rate": 3.3589673657248453e-05, "loss": 2.2361, "step": 6627000 }, { "epoch": 32.83, "learning_rate": 3.358843507082237e-05, "loss": 2.2029, "step": 6627500 }, { "epoch": 32.84, "learning_rate": 3.358719648439629e-05, "loss": 2.2289, "step": 6628000 }, { "epoch": 32.84, "learning_rate": 3.3585957897970204e-05, "loss": 2.2083, "step": 6628500 }, { "epoch": 32.84, "learning_rate": 3.358471931154412e-05, "loss": 2.2152, "step": 6629000 }, { "epoch": 32.84, "learning_rate": 3.358348072511804e-05, "loss": 2.2215, "step": 6629500 }, { "epoch": 32.85, "learning_rate": 3.3582242138691955e-05, "loss": 2.1913, "step": 6630000 }, { "epoch": 32.85, "learning_rate": 3.358100355226587e-05, "loss": 2.2186, "step": 6630500 }, { "epoch": 32.85, "learning_rate": 3.357976496583979e-05, "loss": 2.2363, "step": 6631000 }, { "epoch": 32.85, "learning_rate": 3.3578526379413706e-05, "loss": 2.2145, "step": 6631500 }, { "epoch": 32.86, "learning_rate": 3.357728779298762e-05, "loss": 2.2131, "step": 6632000 }, { "epoch": 32.86, "learning_rate": 3.357604920656154e-05, "loss": 2.2287, "step": 6632500 }, { "epoch": 32.86, "learning_rate": 3.357481062013546e-05, "loss": 2.2282, "step": 6633000 }, { "epoch": 32.86, "learning_rate": 3.3573572033709374e-05, "loss": 2.222, "step": 6633500 }, { "epoch": 32.87, "learning_rate": 3.357233344728329e-05, "loss": 2.2288, "step": 6634000 }, { "epoch": 32.87, "learning_rate": 3.357109486085721e-05, "loss": 2.2067, "step": 6634500 }, { "epoch": 32.87, "learning_rate": 3.356985627443112e-05, "loss": 2.2245, "step": 6635000 }, { "epoch": 32.87, "learning_rate": 3.3568620165177886e-05, "loss": 2.2202, "step": 6635500 }, { "epoch": 32.88, "learning_rate": 3.35673815787518e-05, "loss": 2.2116, "step": 6636000 }, { "epoch": 32.88, "learning_rate": 3.356614546949857e-05, "loss": 2.2298, "step": 6636500 }, { "epoch": 32.88, "learning_rate": 3.356490688307249e-05, "loss": 2.2243, "step": 6637000 }, { "epoch": 32.88, "learning_rate": 3.3563668296646406e-05, "loss": 2.2296, "step": 6637500 }, { "epoch": 32.89, "learning_rate": 3.356242971022032e-05, "loss": 2.2225, "step": 6638000 }, { "epoch": 32.89, "learning_rate": 3.356119112379424e-05, "loss": 2.1984, "step": 6638500 }, { "epoch": 32.89, "learning_rate": 3.355995253736816e-05, "loss": 2.2297, "step": 6639000 }, { "epoch": 32.89, "learning_rate": 3.3558713950942074e-05, "loss": 2.2113, "step": 6639500 }, { "epoch": 32.9, "learning_rate": 3.355747536451599e-05, "loss": 2.2124, "step": 6640000 }, { "epoch": 32.9, "learning_rate": 3.355623677808991e-05, "loss": 2.213, "step": 6640500 }, { "epoch": 32.9, "learning_rate": 3.355500066883667e-05, "loss": 2.1908, "step": 6641000 }, { "epoch": 32.9, "learning_rate": 3.3553762082410586e-05, "loss": 2.2046, "step": 6641500 }, { "epoch": 32.91, "learning_rate": 3.3552523495984503e-05, "loss": 2.2032, "step": 6642000 }, { "epoch": 32.91, "learning_rate": 3.355128738673127e-05, "loss": 2.1987, "step": 6642500 }, { "epoch": 32.91, "learning_rate": 3.355004880030519e-05, "loss": 2.2311, "step": 6643000 }, { "epoch": 32.91, "learning_rate": 3.3548810213879106e-05, "loss": 2.2356, "step": 6643500 }, { "epoch": 32.92, "learning_rate": 3.354757162745302e-05, "loss": 2.2262, "step": 6644000 }, { "epoch": 32.92, "learning_rate": 3.354633304102694e-05, "loss": 2.196, "step": 6644500 }, { "epoch": 32.92, "learning_rate": 3.354509445460086e-05, "loss": 2.2177, "step": 6645000 }, { "epoch": 32.92, "learning_rate": 3.3543855868174774e-05, "loss": 2.2002, "step": 6645500 }, { "epoch": 32.93, "learning_rate": 3.354261728174869e-05, "loss": 2.2402, "step": 6646000 }, { "epoch": 32.93, "learning_rate": 3.354137869532261e-05, "loss": 2.2201, "step": 6646500 }, { "epoch": 32.93, "learning_rate": 3.3540140108896525e-05, "loss": 2.2249, "step": 6647000 }, { "epoch": 32.93, "learning_rate": 3.3538901522470435e-05, "loss": 2.2081, "step": 6647500 }, { "epoch": 32.94, "learning_rate": 3.3537665413217204e-05, "loss": 2.2199, "step": 6648000 }, { "epoch": 32.94, "learning_rate": 3.353642682679112e-05, "loss": 2.2042, "step": 6648500 }, { "epoch": 32.94, "learning_rate": 3.353518824036504e-05, "loss": 2.2251, "step": 6649000 }, { "epoch": 32.94, "learning_rate": 3.3533952131111806e-05, "loss": 2.2405, "step": 6649500 }, { "epoch": 32.95, "learning_rate": 3.353271354468572e-05, "loss": 2.2288, "step": 6650000 }, { "epoch": 32.95, "learning_rate": 3.353147495825964e-05, "loss": 2.2055, "step": 6650500 }, { "epoch": 32.95, "learning_rate": 3.353023637183356e-05, "loss": 2.2243, "step": 6651000 }, { "epoch": 32.95, "learning_rate": 3.3528997785407474e-05, "loss": 2.2378, "step": 6651500 }, { "epoch": 32.96, "learning_rate": 3.352775919898139e-05, "loss": 2.2031, "step": 6652000 }, { "epoch": 32.96, "learning_rate": 3.352652061255531e-05, "loss": 2.2246, "step": 6652500 }, { "epoch": 32.96, "learning_rate": 3.3525282026129225e-05, "loss": 2.1836, "step": 6653000 }, { "epoch": 32.96, "learning_rate": 3.3524045916875994e-05, "loss": 2.2215, "step": 6653500 }, { "epoch": 32.97, "learning_rate": 3.3522807330449904e-05, "loss": 2.2132, "step": 6654000 }, { "epoch": 32.97, "learning_rate": 3.352157122119667e-05, "loss": 2.2263, "step": 6654500 }, { "epoch": 32.97, "learning_rate": 3.352033511194344e-05, "loss": 2.2455, "step": 6655000 }, { "epoch": 32.97, "learning_rate": 3.351909652551736e-05, "loss": 2.2298, "step": 6655500 }, { "epoch": 32.98, "learning_rate": 3.3517857939091275e-05, "loss": 2.2223, "step": 6656000 }, { "epoch": 32.98, "learning_rate": 3.351661935266519e-05, "loss": 2.2317, "step": 6656500 }, { "epoch": 32.98, "learning_rate": 3.351538324341196e-05, "loss": 2.2343, "step": 6657000 }, { "epoch": 32.98, "learning_rate": 3.351414465698587e-05, "loss": 2.2235, "step": 6657500 }, { "epoch": 32.99, "learning_rate": 3.351290607055979e-05, "loss": 2.2268, "step": 6658000 }, { "epoch": 32.99, "learning_rate": 3.3511667484133705e-05, "loss": 2.2339, "step": 6658500 }, { "epoch": 32.99, "learning_rate": 3.351042889770762e-05, "loss": 2.2136, "step": 6659000 }, { "epoch": 32.99, "learning_rate": 3.350919031128154e-05, "loss": 2.1893, "step": 6659500 }, { "epoch": 33.0, "learning_rate": 3.3507951724855456e-05, "loss": 2.2058, "step": 6660000 }, { "epoch": 33.0, "learning_rate": 3.350671313842937e-05, "loss": 2.2326, "step": 6660500 }, { "epoch": 33.0, "eval_accuracy": 0.660817023764832, "eval_accuracy_mlm": 0.6167854855212918, "eval_accuracy_nsp": 0.8684219815735079, "eval_loss": 2.316751480102539, "eval_runtime": 145.7034, "eval_samples_per_second": 1749.849, "eval_steps_per_second": 72.915, "step": 6660819 }, { "epoch": 33.0, "learning_rate": 3.350547455200329e-05, "loss": 2.1953, "step": 6661000 }, { "epoch": 33.0, "learning_rate": 3.3504235965577206e-05, "loss": 2.1906, "step": 6661500 }, { "epoch": 33.01, "learning_rate": 3.350299737915112e-05, "loss": 2.1715, "step": 6662000 }, { "epoch": 33.01, "learning_rate": 3.350175879272504e-05, "loss": 2.1679, "step": 6662500 }, { "epoch": 33.01, "learning_rate": 3.350052020629896e-05, "loss": 2.2096, "step": 6663000 }, { "epoch": 33.01, "learning_rate": 3.3499281619872874e-05, "loss": 2.1684, "step": 6663500 }, { "epoch": 33.02, "learning_rate": 3.349804551061964e-05, "loss": 2.1789, "step": 6664000 }, { "epoch": 33.02, "learning_rate": 3.349680692419356e-05, "loss": 2.1883, "step": 6664500 }, { "epoch": 33.02, "learning_rate": 3.349557081494032e-05, "loss": 2.1976, "step": 6665000 }, { "epoch": 33.02, "learning_rate": 3.349433222851424e-05, "loss": 2.1609, "step": 6665500 }, { "epoch": 33.03, "learning_rate": 3.3493093642088156e-05, "loss": 2.1907, "step": 6666000 }, { "epoch": 33.03, "learning_rate": 3.349185505566207e-05, "loss": 2.1754, "step": 6666500 }, { "epoch": 33.03, "learning_rate": 3.349061894640884e-05, "loss": 2.1871, "step": 6667000 }, { "epoch": 33.03, "learning_rate": 3.348938035998276e-05, "loss": 2.1858, "step": 6667500 }, { "epoch": 33.04, "learning_rate": 3.348814425072953e-05, "loss": 2.1992, "step": 6668000 }, { "epoch": 33.04, "learning_rate": 3.3486905664303444e-05, "loss": 2.1737, "step": 6668500 }, { "epoch": 33.04, "learning_rate": 3.348566707787736e-05, "loss": 2.1944, "step": 6669000 }, { "epoch": 33.04, "learning_rate": 3.348442849145128e-05, "loss": 2.1859, "step": 6669500 }, { "epoch": 33.05, "learning_rate": 3.348319238219805e-05, "loss": 2.1786, "step": 6670000 }, { "epoch": 33.05, "learning_rate": 3.3481953795771964e-05, "loss": 2.1959, "step": 6670500 }, { "epoch": 33.05, "learning_rate": 3.348071520934588e-05, "loss": 2.2086, "step": 6671000 }, { "epoch": 33.05, "learning_rate": 3.34794766229198e-05, "loss": 2.1758, "step": 6671500 }, { "epoch": 33.06, "learning_rate": 3.3478238036493715e-05, "loss": 2.1904, "step": 6672000 }, { "epoch": 33.06, "learning_rate": 3.347699945006763e-05, "loss": 2.1964, "step": 6672500 }, { "epoch": 33.06, "learning_rate": 3.347576086364154e-05, "loss": 2.1856, "step": 6673000 }, { "epoch": 33.06, "learning_rate": 3.347452227721546e-05, "loss": 2.1881, "step": 6673500 }, { "epoch": 33.07, "learning_rate": 3.3473283690789376e-05, "loss": 2.1798, "step": 6674000 }, { "epoch": 33.07, "learning_rate": 3.347204510436329e-05, "loss": 2.1917, "step": 6674500 }, { "epoch": 33.07, "learning_rate": 3.347080651793721e-05, "loss": 2.1751, "step": 6675000 }, { "epoch": 33.07, "learning_rate": 3.3469567931511126e-05, "loss": 2.2065, "step": 6675500 }, { "epoch": 33.08, "learning_rate": 3.346832934508504e-05, "loss": 2.1857, "step": 6676000 }, { "epoch": 33.08, "learning_rate": 3.346709075865896e-05, "loss": 2.1952, "step": 6676500 }, { "epoch": 33.08, "learning_rate": 3.346585217223288e-05, "loss": 2.1908, "step": 6677000 }, { "epoch": 33.08, "learning_rate": 3.3464613585806794e-05, "loss": 2.1971, "step": 6677500 }, { "epoch": 33.09, "learning_rate": 3.346337499938071e-05, "loss": 2.194, "step": 6678000 }, { "epoch": 33.09, "learning_rate": 3.346213889012747e-05, "loss": 2.164, "step": 6678500 }, { "epoch": 33.09, "learning_rate": 3.346090030370139e-05, "loss": 2.1589, "step": 6679000 }, { "epoch": 33.09, "learning_rate": 3.345966171727531e-05, "loss": 2.216, "step": 6679500 }, { "epoch": 33.1, "learning_rate": 3.3458423130849224e-05, "loss": 2.1777, "step": 6680000 }, { "epoch": 33.1, "learning_rate": 3.345718454442314e-05, "loss": 2.2036, "step": 6680500 }, { "epoch": 33.1, "learning_rate": 3.345594595799706e-05, "loss": 2.1821, "step": 6681000 }, { "epoch": 33.1, "learning_rate": 3.3454707371570975e-05, "loss": 2.1714, "step": 6681500 }, { "epoch": 33.1, "learning_rate": 3.345346878514489e-05, "loss": 2.199, "step": 6682000 }, { "epoch": 33.11, "learning_rate": 3.345223019871881e-05, "loss": 2.1905, "step": 6682500 }, { "epoch": 33.11, "learning_rate": 3.3450991612292725e-05, "loss": 2.1875, "step": 6683000 }, { "epoch": 33.11, "learning_rate": 3.344975302586664e-05, "loss": 2.1735, "step": 6683500 }, { "epoch": 33.11, "learning_rate": 3.344851443944056e-05, "loss": 2.2194, "step": 6684000 }, { "epoch": 33.12, "learning_rate": 3.3447275853014476e-05, "loss": 2.1537, "step": 6684500 }, { "epoch": 33.12, "learning_rate": 3.3446037266588386e-05, "loss": 2.209, "step": 6685000 }, { "epoch": 33.12, "learning_rate": 3.344480115733516e-05, "loss": 2.1662, "step": 6685500 }, { "epoch": 33.12, "learning_rate": 3.344356257090908e-05, "loss": 2.1802, "step": 6686000 }, { "epoch": 33.13, "learning_rate": 3.344232646165584e-05, "loss": 2.211, "step": 6686500 }, { "epoch": 33.13, "learning_rate": 3.344108787522976e-05, "loss": 2.1945, "step": 6687000 }, { "epoch": 33.13, "learning_rate": 3.3439849288803675e-05, "loss": 2.1963, "step": 6687500 }, { "epoch": 33.13, "learning_rate": 3.343861070237759e-05, "loss": 2.1963, "step": 6688000 }, { "epoch": 33.14, "learning_rate": 3.343737211595151e-05, "loss": 2.1822, "step": 6688500 }, { "epoch": 33.14, "learning_rate": 3.3436133529525425e-05, "loss": 2.1843, "step": 6689000 }, { "epoch": 33.14, "learning_rate": 3.343489494309934e-05, "loss": 2.1904, "step": 6689500 }, { "epoch": 33.14, "learning_rate": 3.343365635667326e-05, "loss": 2.1861, "step": 6690000 }, { "epoch": 33.15, "learning_rate": 3.3432417770247176e-05, "loss": 2.2003, "step": 6690500 }, { "epoch": 33.15, "learning_rate": 3.3431181660993945e-05, "loss": 2.1901, "step": 6691000 }, { "epoch": 33.15, "learning_rate": 3.342994307456786e-05, "loss": 2.2043, "step": 6691500 }, { "epoch": 33.15, "learning_rate": 3.342870448814178e-05, "loss": 2.198, "step": 6692000 }, { "epoch": 33.16, "learning_rate": 3.342746837888854e-05, "loss": 2.1838, "step": 6692500 }, { "epoch": 33.16, "learning_rate": 3.342622979246246e-05, "loss": 2.196, "step": 6693000 }, { "epoch": 33.16, "learning_rate": 3.342499368320923e-05, "loss": 2.1899, "step": 6693500 }, { "epoch": 33.16, "learning_rate": 3.3423757573956e-05, "loss": 2.1982, "step": 6694000 }, { "epoch": 33.17, "learning_rate": 3.342251898752992e-05, "loss": 2.1961, "step": 6694500 }, { "epoch": 33.17, "learning_rate": 3.342128040110383e-05, "loss": 2.2007, "step": 6695000 }, { "epoch": 33.17, "learning_rate": 3.3420041814677746e-05, "loss": 2.1791, "step": 6695500 }, { "epoch": 33.17, "learning_rate": 3.341880322825166e-05, "loss": 2.2048, "step": 6696000 }, { "epoch": 33.18, "learning_rate": 3.341756464182558e-05, "loss": 2.197, "step": 6696500 }, { "epoch": 33.18, "learning_rate": 3.34163260553995e-05, "loss": 2.2261, "step": 6697000 }, { "epoch": 33.18, "learning_rate": 3.3415089946146266e-05, "loss": 2.1916, "step": 6697500 }, { "epoch": 33.18, "learning_rate": 3.3413851359720176e-05, "loss": 2.1948, "step": 6698000 }, { "epoch": 33.19, "learning_rate": 3.341261277329409e-05, "loss": 2.1977, "step": 6698500 }, { "epoch": 33.19, "learning_rate": 3.341137418686801e-05, "loss": 2.1964, "step": 6699000 }, { "epoch": 33.19, "learning_rate": 3.341013560044193e-05, "loss": 2.2078, "step": 6699500 }, { "epoch": 33.19, "learning_rate": 3.3408897014015844e-05, "loss": 2.1995, "step": 6700000 }, { "epoch": 33.2, "learning_rate": 3.340765842758976e-05, "loss": 2.1763, "step": 6700500 }, { "epoch": 33.2, "learning_rate": 3.340641984116368e-05, "loss": 2.1989, "step": 6701000 }, { "epoch": 33.2, "learning_rate": 3.3405181254737595e-05, "loss": 2.1885, "step": 6701500 }, { "epoch": 33.2, "learning_rate": 3.340394266831151e-05, "loss": 2.1716, "step": 6702000 }, { "epoch": 33.21, "learning_rate": 3.340270408188543e-05, "loss": 2.2069, "step": 6702500 }, { "epoch": 33.21, "learning_rate": 3.3401465495459345e-05, "loss": 2.1995, "step": 6703000 }, { "epoch": 33.21, "learning_rate": 3.3400229386206114e-05, "loss": 2.2251, "step": 6703500 }, { "epoch": 33.21, "learning_rate": 3.339899079978003e-05, "loss": 2.2079, "step": 6704000 }, { "epoch": 33.22, "learning_rate": 3.339775221335394e-05, "loss": 2.1914, "step": 6704500 }, { "epoch": 33.22, "learning_rate": 3.339651610410071e-05, "loss": 2.1867, "step": 6705000 }, { "epoch": 33.22, "learning_rate": 3.339527751767463e-05, "loss": 2.2132, "step": 6705500 }, { "epoch": 33.22, "learning_rate": 3.33940414084214e-05, "loss": 2.1824, "step": 6706000 }, { "epoch": 33.23, "learning_rate": 3.339280282199532e-05, "loss": 2.1999, "step": 6706500 }, { "epoch": 33.23, "learning_rate": 3.3391564235569236e-05, "loss": 2.1785, "step": 6707000 }, { "epoch": 33.23, "learning_rate": 3.3390325649143147e-05, "loss": 2.2085, "step": 6707500 }, { "epoch": 33.23, "learning_rate": 3.3389087062717063e-05, "loss": 2.2031, "step": 6708000 }, { "epoch": 33.24, "learning_rate": 3.338784847629098e-05, "loss": 2.181, "step": 6708500 }, { "epoch": 33.24, "learning_rate": 3.33866098898649e-05, "loss": 2.2132, "step": 6709000 }, { "epoch": 33.24, "learning_rate": 3.3385371303438814e-05, "loss": 2.1918, "step": 6709500 }, { "epoch": 33.24, "learning_rate": 3.338413271701273e-05, "loss": 2.1732, "step": 6710000 }, { "epoch": 33.25, "learning_rate": 3.338289413058665e-05, "loss": 2.1797, "step": 6710500 }, { "epoch": 33.25, "learning_rate": 3.3381655544160565e-05, "loss": 2.2188, "step": 6711000 }, { "epoch": 33.25, "learning_rate": 3.3380416957734475e-05, "loss": 2.1945, "step": 6711500 }, { "epoch": 33.25, "learning_rate": 3.3379180848481244e-05, "loss": 2.1775, "step": 6712000 }, { "epoch": 33.26, "learning_rate": 3.337794226205516e-05, "loss": 2.2078, "step": 6712500 }, { "epoch": 33.26, "learning_rate": 3.337670367562908e-05, "loss": 2.1919, "step": 6713000 }, { "epoch": 33.26, "learning_rate": 3.3375465089202995e-05, "loss": 2.2168, "step": 6713500 }, { "epoch": 33.26, "learning_rate": 3.337422650277691e-05, "loss": 2.1926, "step": 6714000 }, { "epoch": 33.27, "learning_rate": 3.337299039352368e-05, "loss": 2.2111, "step": 6714500 }, { "epoch": 33.27, "learning_rate": 3.33717518070976e-05, "loss": 2.1717, "step": 6715000 }, { "epoch": 33.27, "learning_rate": 3.3370515697844366e-05, "loss": 2.1892, "step": 6715500 }, { "epoch": 33.27, "learning_rate": 3.336927711141828e-05, "loss": 2.1819, "step": 6716000 }, { "epoch": 33.28, "learning_rate": 3.33680385249922e-05, "loss": 2.1656, "step": 6716500 }, { "epoch": 33.28, "learning_rate": 3.336680241573897e-05, "loss": 2.1817, "step": 6717000 }, { "epoch": 33.28, "learning_rate": 3.3365563829312886e-05, "loss": 2.2084, "step": 6717500 }, { "epoch": 33.28, "learning_rate": 3.33643252428868e-05, "loss": 2.1876, "step": 6718000 }, { "epoch": 33.29, "learning_rate": 3.336308665646072e-05, "loss": 2.2138, "step": 6718500 }, { "epoch": 33.29, "learning_rate": 3.3361848070034637e-05, "loss": 2.2026, "step": 6719000 }, { "epoch": 33.29, "learning_rate": 3.3360609483608554e-05, "loss": 2.1882, "step": 6719500 }, { "epoch": 33.29, "learning_rate": 3.3359370897182464e-05, "loss": 2.2171, "step": 6720000 }, { "epoch": 33.3, "learning_rate": 3.335813231075638e-05, "loss": 2.1742, "step": 6720500 }, { "epoch": 33.3, "learning_rate": 3.33568937243303e-05, "loss": 2.2045, "step": 6721000 }, { "epoch": 33.3, "learning_rate": 3.3355655137904214e-05, "loss": 2.2029, "step": 6721500 }, { "epoch": 33.3, "learning_rate": 3.335441655147813e-05, "loss": 2.1792, "step": 6722000 }, { "epoch": 33.31, "learning_rate": 3.335317796505205e-05, "loss": 2.1961, "step": 6722500 }, { "epoch": 33.31, "learning_rate": 3.3351939378625965e-05, "loss": 2.2094, "step": 6723000 }, { "epoch": 33.31, "learning_rate": 3.335070079219988e-05, "loss": 2.2066, "step": 6723500 }, { "epoch": 33.31, "learning_rate": 3.33494622057738e-05, "loss": 2.1974, "step": 6724000 }, { "epoch": 33.32, "learning_rate": 3.3348223619347716e-05, "loss": 2.1959, "step": 6724500 }, { "epoch": 33.32, "learning_rate": 3.334698998726734e-05, "loss": 2.2206, "step": 6725000 }, { "epoch": 33.32, "learning_rate": 3.3345751400841254e-05, "loss": 2.1876, "step": 6725500 }, { "epoch": 33.32, "learning_rate": 3.334451281441517e-05, "loss": 2.1978, "step": 6726000 }, { "epoch": 33.33, "learning_rate": 3.334327422798908e-05, "loss": 2.1713, "step": 6726500 }, { "epoch": 33.33, "learning_rate": 3.3342035641563e-05, "loss": 2.2117, "step": 6727000 }, { "epoch": 33.33, "learning_rate": 3.3340799532309766e-05, "loss": 2.2119, "step": 6727500 }, { "epoch": 33.33, "learning_rate": 3.3339560945883683e-05, "loss": 2.191, "step": 6728000 }, { "epoch": 33.34, "learning_rate": 3.33383223594576e-05, "loss": 2.1935, "step": 6728500 }, { "epoch": 33.34, "learning_rate": 3.333708377303152e-05, "loss": 2.1783, "step": 6729000 }, { "epoch": 33.34, "learning_rate": 3.333584518660543e-05, "loss": 2.2009, "step": 6729500 }, { "epoch": 33.34, "learning_rate": 3.3334606600179344e-05, "loss": 2.1902, "step": 6730000 }, { "epoch": 33.35, "learning_rate": 3.333336801375326e-05, "loss": 2.2013, "step": 6730500 }, { "epoch": 33.35, "learning_rate": 3.333212942732718e-05, "loss": 2.2053, "step": 6731000 }, { "epoch": 33.35, "learning_rate": 3.3330893318073954e-05, "loss": 2.2068, "step": 6731500 }, { "epoch": 33.35, "learning_rate": 3.332965473164787e-05, "loss": 2.1893, "step": 6732000 }, { "epoch": 33.36, "learning_rate": 3.332841614522178e-05, "loss": 2.1971, "step": 6732500 }, { "epoch": 33.36, "learning_rate": 3.33271775587957e-05, "loss": 2.2038, "step": 6733000 }, { "epoch": 33.36, "learning_rate": 3.3325938972369615e-05, "loss": 2.1941, "step": 6733500 }, { "epoch": 33.36, "learning_rate": 3.3324705340289235e-05, "loss": 2.2214, "step": 6734000 }, { "epoch": 33.37, "learning_rate": 3.332346675386315e-05, "loss": 2.1986, "step": 6734500 }, { "epoch": 33.37, "learning_rate": 3.332222816743707e-05, "loss": 2.2197, "step": 6735000 }, { "epoch": 33.37, "learning_rate": 3.332099205818384e-05, "loss": 2.198, "step": 6735500 }, { "epoch": 33.37, "learning_rate": 3.3319753471757755e-05, "loss": 2.2016, "step": 6736000 }, { "epoch": 33.37, "learning_rate": 3.331851488533167e-05, "loss": 2.2062, "step": 6736500 }, { "epoch": 33.38, "learning_rate": 3.331727629890559e-05, "loss": 2.2213, "step": 6737000 }, { "epoch": 33.38, "learning_rate": 3.3316037712479506e-05, "loss": 2.2173, "step": 6737500 }, { "epoch": 33.38, "learning_rate": 3.331479912605342e-05, "loss": 2.196, "step": 6738000 }, { "epoch": 33.38, "learning_rate": 3.331356053962734e-05, "loss": 2.2056, "step": 6738500 }, { "epoch": 33.39, "learning_rate": 3.331232195320125e-05, "loss": 2.2122, "step": 6739000 }, { "epoch": 33.39, "learning_rate": 3.331108336677517e-05, "loss": 2.1967, "step": 6739500 }, { "epoch": 33.39, "learning_rate": 3.3309844780349084e-05, "loss": 2.2116, "step": 6740000 }, { "epoch": 33.39, "learning_rate": 3.3308606193923e-05, "loss": 2.2288, "step": 6740500 }, { "epoch": 33.4, "learning_rate": 3.330736760749692e-05, "loss": 2.1971, "step": 6741000 }, { "epoch": 33.4, "learning_rate": 3.3306129021070834e-05, "loss": 2.2274, "step": 6741500 }, { "epoch": 33.4, "learning_rate": 3.33048929118176e-05, "loss": 2.2, "step": 6742000 }, { "epoch": 33.4, "learning_rate": 3.330365432539152e-05, "loss": 2.2053, "step": 6742500 }, { "epoch": 33.41, "learning_rate": 3.330241573896544e-05, "loss": 2.2427, "step": 6743000 }, { "epoch": 33.41, "learning_rate": 3.3301177152539354e-05, "loss": 2.2006, "step": 6743500 }, { "epoch": 33.41, "learning_rate": 3.329994104328612e-05, "loss": 2.1955, "step": 6744000 }, { "epoch": 33.41, "learning_rate": 3.329870245686004e-05, "loss": 2.2025, "step": 6744500 }, { "epoch": 33.42, "learning_rate": 3.329746387043396e-05, "loss": 2.2052, "step": 6745000 }, { "epoch": 33.42, "learning_rate": 3.3296225284007874e-05, "loss": 2.199, "step": 6745500 }, { "epoch": 33.42, "learning_rate": 3.3294986697581784e-05, "loss": 2.2119, "step": 6746000 }, { "epoch": 33.42, "learning_rate": 3.329375058832855e-05, "loss": 2.1805, "step": 6746500 }, { "epoch": 33.43, "learning_rate": 3.329251200190247e-05, "loss": 2.2214, "step": 6747000 }, { "epoch": 33.43, "learning_rate": 3.3291273415476386e-05, "loss": 2.1932, "step": 6747500 }, { "epoch": 33.43, "learning_rate": 3.32900348290503e-05, "loss": 2.21, "step": 6748000 }, { "epoch": 33.43, "learning_rate": 3.328879871979707e-05, "loss": 2.1815, "step": 6748500 }, { "epoch": 33.44, "learning_rate": 3.328756013337099e-05, "loss": 2.2231, "step": 6749000 }, { "epoch": 33.44, "learning_rate": 3.3286321546944906e-05, "loss": 2.1896, "step": 6749500 }, { "epoch": 33.44, "learning_rate": 3.328508296051882e-05, "loss": 2.1942, "step": 6750000 }, { "epoch": 33.44, "learning_rate": 3.328384437409274e-05, "loss": 2.2055, "step": 6750500 }, { "epoch": 33.45, "learning_rate": 3.328260578766666e-05, "loss": 2.2069, "step": 6751000 }, { "epoch": 33.45, "learning_rate": 3.3281367201240574e-05, "loss": 2.2045, "step": 6751500 }, { "epoch": 33.45, "learning_rate": 3.3280131091987336e-05, "loss": 2.2114, "step": 6752000 }, { "epoch": 33.45, "learning_rate": 3.327889250556125e-05, "loss": 2.2043, "step": 6752500 }, { "epoch": 33.46, "learning_rate": 3.327765391913517e-05, "loss": 2.2053, "step": 6753000 }, { "epoch": 33.46, "learning_rate": 3.3276415332709087e-05, "loss": 2.2035, "step": 6753500 }, { "epoch": 33.46, "learning_rate": 3.3275179223455855e-05, "loss": 2.1932, "step": 6754000 }, { "epoch": 33.46, "learning_rate": 3.327394063702977e-05, "loss": 2.2068, "step": 6754500 }, { "epoch": 33.47, "learning_rate": 3.327270205060369e-05, "loss": 2.1834, "step": 6755000 }, { "epoch": 33.47, "learning_rate": 3.3271463464177606e-05, "loss": 2.1792, "step": 6755500 }, { "epoch": 33.47, "learning_rate": 3.327022735492437e-05, "loss": 2.1915, "step": 6756000 }, { "epoch": 33.47, "learning_rate": 3.3268988768498285e-05, "loss": 2.2222, "step": 6756500 }, { "epoch": 33.48, "learning_rate": 3.32677501820722e-05, "loss": 2.2125, "step": 6757000 }, { "epoch": 33.48, "learning_rate": 3.326651159564612e-05, "loss": 2.1967, "step": 6757500 }, { "epoch": 33.48, "learning_rate": 3.3265273009220036e-05, "loss": 2.2272, "step": 6758000 }, { "epoch": 33.48, "learning_rate": 3.326403442279395e-05, "loss": 2.2209, "step": 6758500 }, { "epoch": 33.49, "learning_rate": 3.326279831354072e-05, "loss": 2.2223, "step": 6759000 }, { "epoch": 33.49, "learning_rate": 3.326155972711464e-05, "loss": 2.21, "step": 6759500 }, { "epoch": 33.49, "learning_rate": 3.3260321140688556e-05, "loss": 2.2063, "step": 6760000 }, { "epoch": 33.49, "learning_rate": 3.325908255426247e-05, "loss": 2.2272, "step": 6760500 }, { "epoch": 33.5, "learning_rate": 3.325784396783639e-05, "loss": 2.1844, "step": 6761000 }, { "epoch": 33.5, "learning_rate": 3.3256605381410306e-05, "loss": 2.2001, "step": 6761500 }, { "epoch": 33.5, "learning_rate": 3.3255369272157075e-05, "loss": 2.1846, "step": 6762000 }, { "epoch": 33.5, "learning_rate": 3.3254133162903844e-05, "loss": 2.2296, "step": 6762500 }, { "epoch": 33.51, "learning_rate": 3.325289457647776e-05, "loss": 2.2138, "step": 6763000 }, { "epoch": 33.51, "learning_rate": 3.325165599005168e-05, "loss": 2.2029, "step": 6763500 }, { "epoch": 33.51, "learning_rate": 3.3250417403625595e-05, "loss": 2.1842, "step": 6764000 }, { "epoch": 33.51, "learning_rate": 3.3249181294372364e-05, "loss": 2.216, "step": 6764500 }, { "epoch": 33.52, "learning_rate": 3.324794270794628e-05, "loss": 2.2137, "step": 6765000 }, { "epoch": 33.52, "learning_rate": 3.32467041215202e-05, "loss": 2.1953, "step": 6765500 }, { "epoch": 33.52, "learning_rate": 3.324546801226696e-05, "loss": 2.209, "step": 6766000 }, { "epoch": 33.52, "learning_rate": 3.3244229425840876e-05, "loss": 2.2116, "step": 6766500 }, { "epoch": 33.53, "learning_rate": 3.324299083941479e-05, "loss": 2.2116, "step": 6767000 }, { "epoch": 33.53, "learning_rate": 3.324175225298871e-05, "loss": 2.2068, "step": 6767500 }, { "epoch": 33.53, "learning_rate": 3.324051366656263e-05, "loss": 2.1877, "step": 6768000 }, { "epoch": 33.53, "learning_rate": 3.3239275080136544e-05, "loss": 2.2073, "step": 6768500 }, { "epoch": 33.54, "learning_rate": 3.323803649371046e-05, "loss": 2.1873, "step": 6769000 }, { "epoch": 33.54, "learning_rate": 3.323679790728438e-05, "loss": 2.2067, "step": 6769500 }, { "epoch": 33.54, "learning_rate": 3.3235559320858295e-05, "loss": 2.1925, "step": 6770000 }, { "epoch": 33.54, "learning_rate": 3.3234320734432205e-05, "loss": 2.2227, "step": 6770500 }, { "epoch": 33.55, "learning_rate": 3.323308214800612e-05, "loss": 2.1975, "step": 6771000 }, { "epoch": 33.55, "learning_rate": 3.323184356158004e-05, "loss": 2.1838, "step": 6771500 }, { "epoch": 33.55, "learning_rate": 3.3230604975153956e-05, "loss": 2.2219, "step": 6772000 }, { "epoch": 33.55, "learning_rate": 3.322936638872787e-05, "loss": 2.1908, "step": 6772500 }, { "epoch": 33.56, "learning_rate": 3.322812780230179e-05, "loss": 2.1813, "step": 6773000 }, { "epoch": 33.56, "learning_rate": 3.3226889215875707e-05, "loss": 2.1919, "step": 6773500 }, { "epoch": 33.56, "learning_rate": 3.3225650629449623e-05, "loss": 2.2287, "step": 6774000 }, { "epoch": 33.56, "learning_rate": 3.322441204302354e-05, "loss": 2.2185, "step": 6774500 }, { "epoch": 33.57, "learning_rate": 3.322317345659746e-05, "loss": 2.1899, "step": 6775000 }, { "epoch": 33.57, "learning_rate": 3.3221934870171374e-05, "loss": 2.204, "step": 6775500 }, { "epoch": 33.57, "learning_rate": 3.322069628374529e-05, "loss": 2.2152, "step": 6776000 }, { "epoch": 33.57, "learning_rate": 3.321945769731921e-05, "loss": 2.1904, "step": 6776500 }, { "epoch": 33.58, "learning_rate": 3.3218219110893125e-05, "loss": 2.2197, "step": 6777000 }, { "epoch": 33.58, "learning_rate": 3.321698052446704e-05, "loss": 2.1919, "step": 6777500 }, { "epoch": 33.58, "learning_rate": 3.321574193804096e-05, "loss": 2.2174, "step": 6778000 }, { "epoch": 33.58, "learning_rate": 3.3214503351614876e-05, "loss": 2.1984, "step": 6778500 }, { "epoch": 33.59, "learning_rate": 3.321326971953449e-05, "loss": 2.2013, "step": 6779000 }, { "epoch": 33.59, "learning_rate": 3.3212033610281265e-05, "loss": 2.2408, "step": 6779500 }, { "epoch": 33.59, "learning_rate": 3.3210795023855175e-05, "loss": 2.2103, "step": 6780000 }, { "epoch": 33.59, "learning_rate": 3.320955643742909e-05, "loss": 2.1746, "step": 6780500 }, { "epoch": 33.6, "learning_rate": 3.320831785100301e-05, "loss": 2.2278, "step": 6781000 }, { "epoch": 33.6, "learning_rate": 3.3207079264576926e-05, "loss": 2.1785, "step": 6781500 }, { "epoch": 33.6, "learning_rate": 3.320584067815084e-05, "loss": 2.2099, "step": 6782000 }, { "epoch": 33.6, "learning_rate": 3.320460209172476e-05, "loss": 2.1942, "step": 6782500 }, { "epoch": 33.61, "learning_rate": 3.320336845964438e-05, "loss": 2.2025, "step": 6783000 }, { "epoch": 33.61, "learning_rate": 3.32021298732183e-05, "loss": 2.2365, "step": 6783500 }, { "epoch": 33.61, "learning_rate": 3.3200891286792215e-05, "loss": 2.2065, "step": 6784000 }, { "epoch": 33.61, "learning_rate": 3.319965270036613e-05, "loss": 2.1954, "step": 6784500 }, { "epoch": 33.62, "learning_rate": 3.319841411394005e-05, "loss": 2.2004, "step": 6785000 }, { "epoch": 33.62, "learning_rate": 3.3197175527513965e-05, "loss": 2.211, "step": 6785500 }, { "epoch": 33.62, "learning_rate": 3.319593694108788e-05, "loss": 2.2276, "step": 6786000 }, { "epoch": 33.62, "learning_rate": 3.319469835466179e-05, "loss": 2.2108, "step": 6786500 }, { "epoch": 33.63, "learning_rate": 3.319345976823571e-05, "loss": 2.2176, "step": 6787000 }, { "epoch": 33.63, "learning_rate": 3.3192221181809626e-05, "loss": 2.2349, "step": 6787500 }, { "epoch": 33.63, "learning_rate": 3.3190985072556395e-05, "loss": 2.2048, "step": 6788000 }, { "epoch": 33.63, "learning_rate": 3.318974648613031e-05, "loss": 2.1891, "step": 6788500 }, { "epoch": 33.64, "learning_rate": 3.318850789970423e-05, "loss": 2.2304, "step": 6789000 }, { "epoch": 33.64, "learning_rate": 3.318726931327814e-05, "loss": 2.21, "step": 6789500 }, { "epoch": 33.64, "learning_rate": 3.3186030726852056e-05, "loss": 2.1899, "step": 6790000 }, { "epoch": 33.64, "learning_rate": 3.318479214042597e-05, "loss": 2.2241, "step": 6790500 }, { "epoch": 33.64, "learning_rate": 3.318355355399989e-05, "loss": 2.2191, "step": 6791000 }, { "epoch": 33.65, "learning_rate": 3.318231496757381e-05, "loss": 2.2102, "step": 6791500 }, { "epoch": 33.65, "learning_rate": 3.318107885832058e-05, "loss": 2.1883, "step": 6792000 }, { "epoch": 33.65, "learning_rate": 3.317984027189449e-05, "loss": 2.1964, "step": 6792500 }, { "epoch": 33.65, "learning_rate": 3.317860168546841e-05, "loss": 2.2054, "step": 6793000 }, { "epoch": 33.66, "learning_rate": 3.3177363099042327e-05, "loss": 2.1987, "step": 6793500 }, { "epoch": 33.66, "learning_rate": 3.3176126989789095e-05, "loss": 2.2227, "step": 6794000 }, { "epoch": 33.66, "learning_rate": 3.317488840336301e-05, "loss": 2.217, "step": 6794500 }, { "epoch": 33.66, "learning_rate": 3.317364981693693e-05, "loss": 2.1929, "step": 6795000 }, { "epoch": 33.67, "learning_rate": 3.317241123051084e-05, "loss": 2.2129, "step": 6795500 }, { "epoch": 33.67, "learning_rate": 3.3171172644084756e-05, "loss": 2.2122, "step": 6796000 }, { "epoch": 33.67, "learning_rate": 3.316993405765867e-05, "loss": 2.2232, "step": 6796500 }, { "epoch": 33.67, "learning_rate": 3.316869547123259e-05, "loss": 2.2027, "step": 6797000 }, { "epoch": 33.68, "learning_rate": 3.316745688480651e-05, "loss": 2.1981, "step": 6797500 }, { "epoch": 33.68, "learning_rate": 3.3166218298380424e-05, "loss": 2.1982, "step": 6798000 }, { "epoch": 33.68, "learning_rate": 3.316497971195434e-05, "loss": 2.1927, "step": 6798500 }, { "epoch": 33.68, "learning_rate": 3.316374112552826e-05, "loss": 2.1843, "step": 6799000 }, { "epoch": 33.69, "learning_rate": 3.3162502539102175e-05, "loss": 2.2093, "step": 6799500 }, { "epoch": 33.69, "learning_rate": 3.3161266429848944e-05, "loss": 2.1841, "step": 6800000 }, { "epoch": 33.69, "learning_rate": 3.316003032059571e-05, "loss": 2.2062, "step": 6800500 }, { "epoch": 33.69, "learning_rate": 3.315879173416963e-05, "loss": 2.1887, "step": 6801000 }, { "epoch": 33.7, "learning_rate": 3.3157553147743546e-05, "loss": 2.2124, "step": 6801500 }, { "epoch": 33.7, "learning_rate": 3.3156314561317456e-05, "loss": 2.18, "step": 6802000 }, { "epoch": 33.7, "learning_rate": 3.315507597489137e-05, "loss": 2.2071, "step": 6802500 }, { "epoch": 33.7, "learning_rate": 3.315383986563815e-05, "loss": 2.1993, "step": 6803000 }, { "epoch": 33.71, "learning_rate": 3.315260375638492e-05, "loss": 2.1961, "step": 6803500 }, { "epoch": 33.71, "learning_rate": 3.3151365169958835e-05, "loss": 2.1915, "step": 6804000 }, { "epoch": 33.71, "learning_rate": 3.3150126583532745e-05, "loss": 2.1792, "step": 6804500 }, { "epoch": 33.71, "learning_rate": 3.314888799710666e-05, "loss": 2.192, "step": 6805000 }, { "epoch": 33.72, "learning_rate": 3.314764941068058e-05, "loss": 2.1913, "step": 6805500 }, { "epoch": 33.72, "learning_rate": 3.3146410824254496e-05, "loss": 2.1953, "step": 6806000 }, { "epoch": 33.72, "learning_rate": 3.314517223782841e-05, "loss": 2.1918, "step": 6806500 }, { "epoch": 33.72, "learning_rate": 3.314393365140233e-05, "loss": 2.189, "step": 6807000 }, { "epoch": 33.73, "learning_rate": 3.31426975421491e-05, "loss": 2.1915, "step": 6807500 }, { "epoch": 33.73, "learning_rate": 3.314146143289587e-05, "loss": 2.2173, "step": 6808000 }, { "epoch": 33.73, "learning_rate": 3.3140222846469784e-05, "loss": 2.2431, "step": 6808500 }, { "epoch": 33.73, "learning_rate": 3.31389842600437e-05, "loss": 2.2125, "step": 6809000 }, { "epoch": 33.74, "learning_rate": 3.313774815079046e-05, "loss": 2.224, "step": 6809500 }, { "epoch": 33.74, "learning_rate": 3.313651204153724e-05, "loss": 2.2055, "step": 6810000 }, { "epoch": 33.74, "learning_rate": 3.3135273455111155e-05, "loss": 2.21, "step": 6810500 }, { "epoch": 33.74, "learning_rate": 3.313403486868507e-05, "loss": 2.2161, "step": 6811000 }, { "epoch": 33.75, "learning_rate": 3.313279628225899e-05, "loss": 2.1847, "step": 6811500 }, { "epoch": 33.75, "learning_rate": 3.31315576958329e-05, "loss": 2.2189, "step": 6812000 }, { "epoch": 33.75, "learning_rate": 3.3130319109406816e-05, "loss": 2.2207, "step": 6812500 }, { "epoch": 33.75, "learning_rate": 3.312908052298073e-05, "loss": 2.1814, "step": 6813000 }, { "epoch": 33.76, "learning_rate": 3.312784193655465e-05, "loss": 2.2075, "step": 6813500 }, { "epoch": 33.76, "learning_rate": 3.312660335012857e-05, "loss": 2.2068, "step": 6814000 }, { "epoch": 33.76, "learning_rate": 3.3125364763702484e-05, "loss": 2.2041, "step": 6814500 }, { "epoch": 33.76, "learning_rate": 3.3124128654449246e-05, "loss": 2.2088, "step": 6815000 }, { "epoch": 33.77, "learning_rate": 3.312289006802316e-05, "loss": 2.2029, "step": 6815500 }, { "epoch": 33.77, "learning_rate": 3.312165148159708e-05, "loss": 2.1921, "step": 6816000 }, { "epoch": 33.77, "learning_rate": 3.3120412895171e-05, "loss": 2.216, "step": 6816500 }, { "epoch": 33.77, "learning_rate": 3.3119174308744914e-05, "loss": 2.195, "step": 6817000 }, { "epoch": 33.78, "learning_rate": 3.311793572231883e-05, "loss": 2.2238, "step": 6817500 }, { "epoch": 33.78, "learning_rate": 3.311669713589275e-05, "loss": 2.2144, "step": 6818000 }, { "epoch": 33.78, "learning_rate": 3.3115458549466665e-05, "loss": 2.2092, "step": 6818500 }, { "epoch": 33.78, "learning_rate": 3.3114222440213433e-05, "loss": 2.2108, "step": 6819000 }, { "epoch": 33.79, "learning_rate": 3.311298385378735e-05, "loss": 2.2043, "step": 6819500 }, { "epoch": 33.79, "learning_rate": 3.311174526736127e-05, "loss": 2.1984, "step": 6820000 }, { "epoch": 33.79, "learning_rate": 3.3110509158108036e-05, "loss": 2.1975, "step": 6820500 }, { "epoch": 33.79, "learning_rate": 3.310927057168195e-05, "loss": 2.1769, "step": 6821000 }, { "epoch": 33.8, "learning_rate": 3.310803198525586e-05, "loss": 2.1828, "step": 6821500 }, { "epoch": 33.8, "learning_rate": 3.310679339882978e-05, "loss": 2.2122, "step": 6822000 }, { "epoch": 33.8, "learning_rate": 3.31055548124037e-05, "loss": 2.1925, "step": 6822500 }, { "epoch": 33.8, "learning_rate": 3.3104316225977614e-05, "loss": 2.1946, "step": 6823000 }, { "epoch": 33.81, "learning_rate": 3.310307763955153e-05, "loss": 2.2058, "step": 6823500 }, { "epoch": 33.81, "learning_rate": 3.310183905312545e-05, "loss": 2.2011, "step": 6824000 }, { "epoch": 33.81, "learning_rate": 3.3100600466699365e-05, "loss": 2.2053, "step": 6824500 }, { "epoch": 33.81, "learning_rate": 3.3099364357446134e-05, "loss": 2.2091, "step": 6825000 }, { "epoch": 33.82, "learning_rate": 3.309812577102005e-05, "loss": 2.2024, "step": 6825500 }, { "epoch": 33.82, "learning_rate": 3.309688718459397e-05, "loss": 2.1989, "step": 6826000 }, { "epoch": 33.82, "learning_rate": 3.3095648598167884e-05, "loss": 2.197, "step": 6826500 }, { "epoch": 33.82, "learning_rate": 3.30944100117418e-05, "loss": 2.2224, "step": 6827000 }, { "epoch": 33.83, "learning_rate": 3.309317142531572e-05, "loss": 2.2263, "step": 6827500 }, { "epoch": 33.83, "learning_rate": 3.3091932838889635e-05, "loss": 2.2002, "step": 6828000 }, { "epoch": 33.83, "learning_rate": 3.309069425246355e-05, "loss": 2.2009, "step": 6828500 }, { "epoch": 33.83, "learning_rate": 3.308945566603747e-05, "loss": 2.2113, "step": 6829000 }, { "epoch": 33.84, "learning_rate": 3.3088217079611386e-05, "loss": 2.2273, "step": 6829500 }, { "epoch": 33.84, "learning_rate": 3.30869784931853e-05, "loss": 2.2292, "step": 6830000 }, { "epoch": 33.84, "learning_rate": 3.308573990675922e-05, "loss": 2.1992, "step": 6830500 }, { "epoch": 33.84, "learning_rate": 3.308450132033314e-05, "loss": 2.217, "step": 6831000 }, { "epoch": 33.85, "learning_rate": 3.308326273390705e-05, "loss": 2.2222, "step": 6831500 }, { "epoch": 33.85, "learning_rate": 3.3082026624653816e-05, "loss": 2.1777, "step": 6832000 }, { "epoch": 33.85, "learning_rate": 3.308078803822773e-05, "loss": 2.1957, "step": 6832500 }, { "epoch": 33.85, "learning_rate": 3.30795519289745e-05, "loss": 2.2035, "step": 6833000 }, { "epoch": 33.86, "learning_rate": 3.307831334254842e-05, "loss": 2.2322, "step": 6833500 }, { "epoch": 33.86, "learning_rate": 3.3077074756122335e-05, "loss": 2.2006, "step": 6834000 }, { "epoch": 33.86, "learning_rate": 3.307583616969625e-05, "loss": 2.2075, "step": 6834500 }, { "epoch": 33.86, "learning_rate": 3.3074600060443014e-05, "loss": 2.2022, "step": 6835000 }, { "epoch": 33.87, "learning_rate": 3.307336147401693e-05, "loss": 2.239, "step": 6835500 }, { "epoch": 33.87, "learning_rate": 3.307212288759085e-05, "loss": 2.2284, "step": 6836000 }, { "epoch": 33.87, "learning_rate": 3.3070884301164765e-05, "loss": 2.2016, "step": 6836500 }, { "epoch": 33.87, "learning_rate": 3.3069648191911534e-05, "loss": 2.2318, "step": 6837000 }, { "epoch": 33.88, "learning_rate": 3.306840960548545e-05, "loss": 2.2189, "step": 6837500 }, { "epoch": 33.88, "learning_rate": 3.306717101905937e-05, "loss": 2.2088, "step": 6838000 }, { "epoch": 33.88, "learning_rate": 3.3065932432633285e-05, "loss": 2.215, "step": 6838500 }, { "epoch": 33.88, "learning_rate": 3.30646938462072e-05, "loss": 2.2143, "step": 6839000 }, { "epoch": 33.89, "learning_rate": 3.306345525978112e-05, "loss": 2.2258, "step": 6839500 }, { "epoch": 33.89, "learning_rate": 3.3062216673355035e-05, "loss": 2.2073, "step": 6840000 }, { "epoch": 33.89, "learning_rate": 3.306097808692895e-05, "loss": 2.2141, "step": 6840500 }, { "epoch": 33.89, "learning_rate": 3.305973950050287e-05, "loss": 2.2215, "step": 6841000 }, { "epoch": 33.9, "learning_rate": 3.3058500914076786e-05, "loss": 2.2281, "step": 6841500 }, { "epoch": 33.9, "learning_rate": 3.30572623276507e-05, "loss": 2.1636, "step": 6842000 }, { "epoch": 33.9, "learning_rate": 3.305602374122462e-05, "loss": 2.2143, "step": 6842500 }, { "epoch": 33.9, "learning_rate": 3.305478515479854e-05, "loss": 2.2349, "step": 6843000 }, { "epoch": 33.91, "learning_rate": 3.30535490455453e-05, "loss": 2.2016, "step": 6843500 }, { "epoch": 33.91, "learning_rate": 3.305231293629207e-05, "loss": 2.1975, "step": 6844000 }, { "epoch": 33.91, "learning_rate": 3.3051074349865985e-05, "loss": 2.2012, "step": 6844500 }, { "epoch": 33.91, "learning_rate": 3.30498357634399e-05, "loss": 2.2201, "step": 6845000 }, { "epoch": 33.91, "learning_rate": 3.304859717701382e-05, "loss": 2.1905, "step": 6845500 }, { "epoch": 33.92, "learning_rate": 3.3047358590587735e-05, "loss": 2.2118, "step": 6846000 }, { "epoch": 33.92, "learning_rate": 3.304612000416165e-05, "loss": 2.2002, "step": 6846500 }, { "epoch": 33.92, "learning_rate": 3.304488141773557e-05, "loss": 2.2046, "step": 6847000 }, { "epoch": 33.92, "learning_rate": 3.3043642831309486e-05, "loss": 2.192, "step": 6847500 }, { "epoch": 33.93, "learning_rate": 3.304240672205625e-05, "loss": 2.2318, "step": 6848000 }, { "epoch": 33.93, "learning_rate": 3.3041168135630165e-05, "loss": 2.2128, "step": 6848500 }, { "epoch": 33.93, "learning_rate": 3.303992954920408e-05, "loss": 2.2345, "step": 6849000 }, { "epoch": 33.93, "learning_rate": 3.303869343995085e-05, "loss": 2.2106, "step": 6849500 }, { "epoch": 33.94, "learning_rate": 3.303745485352477e-05, "loss": 2.2147, "step": 6850000 }, { "epoch": 33.94, "learning_rate": 3.3036216267098685e-05, "loss": 2.2091, "step": 6850500 }, { "epoch": 33.94, "learning_rate": 3.30349776806726e-05, "loss": 2.1914, "step": 6851000 }, { "epoch": 33.94, "learning_rate": 3.303373909424652e-05, "loss": 2.2098, "step": 6851500 }, { "epoch": 33.95, "learning_rate": 3.3032500507820436e-05, "loss": 2.1919, "step": 6852000 }, { "epoch": 33.95, "learning_rate": 3.303126192139435e-05, "loss": 2.1861, "step": 6852500 }, { "epoch": 33.95, "learning_rate": 3.303002828931397e-05, "loss": 2.202, "step": 6853000 }, { "epoch": 33.95, "learning_rate": 3.302878970288789e-05, "loss": 2.2175, "step": 6853500 }, { "epoch": 33.96, "learning_rate": 3.302755111646181e-05, "loss": 2.2061, "step": 6854000 }, { "epoch": 33.96, "learning_rate": 3.3026312530035724e-05, "loss": 2.1985, "step": 6854500 }, { "epoch": 33.96, "learning_rate": 3.302507394360964e-05, "loss": 2.2186, "step": 6855000 }, { "epoch": 33.96, "learning_rate": 3.302383783435641e-05, "loss": 2.2056, "step": 6855500 }, { "epoch": 33.97, "learning_rate": 3.302259924793033e-05, "loss": 2.2069, "step": 6856000 }, { "epoch": 33.97, "learning_rate": 3.3021360661504244e-05, "loss": 2.2249, "step": 6856500 }, { "epoch": 33.97, "learning_rate": 3.302012207507816e-05, "loss": 2.2189, "step": 6857000 }, { "epoch": 33.97, "learning_rate": 3.301888348865208e-05, "loss": 2.2219, "step": 6857500 }, { "epoch": 33.98, "learning_rate": 3.3017644902225994e-05, "loss": 2.1775, "step": 6858000 }, { "epoch": 33.98, "learning_rate": 3.301640631579991e-05, "loss": 2.1861, "step": 6858500 }, { "epoch": 33.98, "learning_rate": 3.301516772937382e-05, "loss": 2.2028, "step": 6859000 }, { "epoch": 33.98, "learning_rate": 3.301393162012059e-05, "loss": 2.1903, "step": 6859500 }, { "epoch": 33.99, "learning_rate": 3.301269303369451e-05, "loss": 2.2199, "step": 6860000 }, { "epoch": 33.99, "learning_rate": 3.3011454447268424e-05, "loss": 2.2266, "step": 6860500 }, { "epoch": 33.99, "learning_rate": 3.301021586084234e-05, "loss": 2.223, "step": 6861000 }, { "epoch": 33.99, "learning_rate": 3.300897975158911e-05, "loss": 2.2017, "step": 6861500 }, { "epoch": 34.0, "learning_rate": 3.300774116516303e-05, "loss": 2.2275, "step": 6862000 }, { "epoch": 34.0, "learning_rate": 3.3006502578736944e-05, "loss": 2.1995, "step": 6862500 }, { "epoch": 34.0, "eval_accuracy": 0.6615834582009188, "eval_accuracy_mlm": 0.6176810786763514, "eval_accuracy_nsp": 0.8686612357280975, "eval_loss": 2.2982945442199707, "eval_runtime": 145.6589, "eval_samples_per_second": 1750.384, "eval_steps_per_second": 72.938, "step": 6862662 } ], "max_steps": 20184300, "num_train_epochs": 100, "total_flos": 8.884340001928284e+18, "trial_name": null, "trial_params": null }