diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,24342 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "global_step": 2018430, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9998768845092475e-05, + "loss": 3.3856, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 4.999753025866639e-05, + "loss": 3.1861, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999629167224031e-05, + "loss": 3.1416, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995053085814226e-05, + "loss": 3.0688, + "step": 2000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999381449938814e-05, + "loss": 3.0467, + "step": 2500 + }, + { + "epoch": 0.01, + "learning_rate": 4.999257591296206e-05, + "loss": 3.0166, + "step": 3000 + }, + { + "epoch": 0.02, + "learning_rate": 4.999133980370883e-05, + "loss": 3.0395, + "step": 3500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9990101217282745e-05, + "loss": 2.9982, + "step": 4000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988862630856655e-05, + "loss": 2.9531, + "step": 4500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987626521603424e-05, + "loss": 2.985, + "step": 5000 + }, + { + "epoch": 0.03, + "learning_rate": 4.998638793517734e-05, + "loss": 2.9528, + "step": 5500 + }, + { + "epoch": 0.03, + "learning_rate": 4.998514934875126e-05, + "loss": 2.9339, + "step": 6000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9983910762325175e-05, + "loss": 2.9353, + "step": 6500 + }, + { + "epoch": 0.03, + "learning_rate": 4.998267217589909e-05, + "loss": 2.9251, + "step": 7000 + }, + { + "epoch": 0.04, + "learning_rate": 4.998143358947301e-05, + "loss": 2.9249, + "step": 7500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9980195003046926e-05, + "loss": 2.922, + "step": 8000 + }, + { + "epoch": 0.04, + "learning_rate": 4.997895641662084e-05, + "loss": 2.9129, + "step": 8500 + }, + { + "epoch": 0.04, + "learning_rate": 4.997771783019476e-05, + "loss": 2.8805, + "step": 9000 + }, + { + "epoch": 0.05, + "learning_rate": 4.997647924376868e-05, + "loss": 2.8891, + "step": 9500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9975240657342594e-05, + "loss": 2.8833, + "step": 10000 + }, + { + "epoch": 0.05, + "learning_rate": 4.997400207091651e-05, + "loss": 2.9069, + "step": 10500 + }, + { + "epoch": 0.05, + "learning_rate": 4.997276596166328e-05, + "loss": 2.8774, + "step": 11000 + }, + { + "epoch": 0.06, + "learning_rate": 4.997152737523719e-05, + "loss": 2.8608, + "step": 11500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9970288788811106e-05, + "loss": 2.8853, + "step": 12000 + }, + { + "epoch": 0.06, + "learning_rate": 4.996905020238502e-05, + "loss": 2.854, + "step": 12500 + }, + { + "epoch": 0.06, + "learning_rate": 4.996781161595894e-05, + "loss": 2.8447, + "step": 13000 + }, + { + "epoch": 0.07, + "learning_rate": 4.996657550670571e-05, + "loss": 2.8466, + "step": 13500 + }, + { + "epoch": 0.07, + "learning_rate": 4.9965336920279626e-05, + "loss": 2.8537, + "step": 14000 + }, + { + "epoch": 0.07, + "learning_rate": 4.996409833385354e-05, + "loss": 2.8332, + "step": 14500 + }, + { + "epoch": 0.07, + "learning_rate": 4.996285974742746e-05, + "loss": 2.8452, + "step": 15000 + }, + { + "epoch": 0.08, + "learning_rate": 4.996162363817423e-05, + "loss": 2.8432, + "step": 15500 + }, + { + "epoch": 0.08, + "learning_rate": 4.9960385051748146e-05, + "loss": 2.846, + "step": 16000 + }, + { + "epoch": 0.08, + "learning_rate": 4.995914646532206e-05, + "loss": 2.8418, + "step": 16500 + }, + { + "epoch": 0.08, + "learning_rate": 4.995790787889598e-05, + "loss": 2.8401, + "step": 17000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9956669292469896e-05, + "loss": 2.8253, + "step": 17500 + }, + { + "epoch": 0.09, + "learning_rate": 4.9955430706043807e-05, + "loss": 2.8343, + "step": 18000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9954192119617723e-05, + "loss": 2.8231, + "step": 18500 + }, + { + "epoch": 0.09, + "learning_rate": 4.995295353319164e-05, + "loss": 2.7981, + "step": 19000 + }, + { + "epoch": 0.1, + "learning_rate": 4.995171742393841e-05, + "loss": 2.8358, + "step": 19500 + }, + { + "epoch": 0.1, + "learning_rate": 4.9950478837512326e-05, + "loss": 2.825, + "step": 20000 + }, + { + "epoch": 0.1, + "learning_rate": 4.994924025108624e-05, + "loss": 2.7965, + "step": 20500 + }, + { + "epoch": 0.1, + "learning_rate": 4.994800166466016e-05, + "loss": 2.8224, + "step": 21000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9946768032579774e-05, + "loss": 2.8108, + "step": 21500 + }, + { + "epoch": 0.11, + "learning_rate": 4.994552944615369e-05, + "loss": 2.8076, + "step": 22000 + }, + { + "epoch": 0.11, + "learning_rate": 4.994429085972761e-05, + "loss": 2.8013, + "step": 22500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9943052273301525e-05, + "loss": 2.818, + "step": 23000 + }, + { + "epoch": 0.12, + "learning_rate": 4.99418161640483e-05, + "loss": 2.8436, + "step": 23500 + }, + { + "epoch": 0.12, + "learning_rate": 4.994057757762222e-05, + "loss": 2.8163, + "step": 24000 + }, + { + "epoch": 0.12, + "learning_rate": 4.993933899119613e-05, + "loss": 2.7991, + "step": 24500 + }, + { + "epoch": 0.12, + "learning_rate": 4.9938100404770044e-05, + "loss": 2.7893, + "step": 25000 + }, + { + "epoch": 0.13, + "learning_rate": 4.993686181834396e-05, + "loss": 2.7931, + "step": 25500 + }, + { + "epoch": 0.13, + "learning_rate": 4.993562323191788e-05, + "loss": 2.8015, + "step": 26000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9934384645491795e-05, + "loss": 2.7764, + "step": 26500 + }, + { + "epoch": 0.13, + "learning_rate": 4.993314605906571e-05, + "loss": 2.7875, + "step": 27000 + }, + { + "epoch": 0.14, + "learning_rate": 4.993190747263963e-05, + "loss": 2.8176, + "step": 27500 + }, + { + "epoch": 0.14, + "learning_rate": 4.9930668886213546e-05, + "loss": 2.7744, + "step": 28000 + }, + { + "epoch": 0.14, + "learning_rate": 4.992943029978746e-05, + "loss": 2.8011, + "step": 28500 + }, + { + "epoch": 0.14, + "learning_rate": 4.9928194190534225e-05, + "loss": 2.7897, + "step": 29000 + }, + { + "epoch": 0.15, + "learning_rate": 4.992695560410814e-05, + "loss": 2.7773, + "step": 29500 + }, + { + "epoch": 0.15, + "learning_rate": 4.992571701768206e-05, + "loss": 2.7895, + "step": 30000 + }, + { + "epoch": 0.15, + "learning_rate": 4.9924478431255976e-05, + "loss": 2.795, + "step": 30500 + }, + { + "epoch": 0.15, + "learning_rate": 4.992323984482989e-05, + "loss": 2.7796, + "step": 31000 + }, + { + "epoch": 0.16, + "learning_rate": 4.992200125840381e-05, + "loss": 2.7868, + "step": 31500 + }, + { + "epoch": 0.16, + "learning_rate": 4.9920762671977726e-05, + "loss": 2.8024, + "step": 32000 + }, + { + "epoch": 0.16, + "learning_rate": 4.991952408555164e-05, + "loss": 2.7609, + "step": 32500 + }, + { + "epoch": 0.16, + "learning_rate": 4.9918290453471264e-05, + "loss": 2.7815, + "step": 33000 + }, + { + "epoch": 0.17, + "learning_rate": 4.991705186704518e-05, + "loss": 2.7409, + "step": 33500 + }, + { + "epoch": 0.17, + "learning_rate": 4.99158132806191e-05, + "loss": 2.7607, + "step": 34000 + }, + { + "epoch": 0.17, + "learning_rate": 4.9914574694193015e-05, + "loss": 2.7929, + "step": 34500 + }, + { + "epoch": 0.17, + "learning_rate": 4.9913336107766925e-05, + "loss": 2.782, + "step": 35000 + }, + { + "epoch": 0.18, + "learning_rate": 4.99120999985137e-05, + "loss": 2.785, + "step": 35500 + }, + { + "epoch": 0.18, + "learning_rate": 4.991086141208762e-05, + "loss": 2.7697, + "step": 36000 + }, + { + "epoch": 0.18, + "learning_rate": 4.9909622825661534e-05, + "loss": 2.7626, + "step": 36500 + }, + { + "epoch": 0.18, + "learning_rate": 4.99083867164083e-05, + "loss": 2.7679, + "step": 37000 + }, + { + "epoch": 0.19, + "learning_rate": 4.990714812998222e-05, + "loss": 2.7565, + "step": 37500 + }, + { + "epoch": 0.19, + "learning_rate": 4.990590954355614e-05, + "loss": 2.7848, + "step": 38000 + }, + { + "epoch": 0.19, + "learning_rate": 4.99046734343029e-05, + "loss": 2.7555, + "step": 38500 + }, + { + "epoch": 0.19, + "learning_rate": 4.9903434847876816e-05, + "loss": 2.7742, + "step": 39000 + }, + { + "epoch": 0.2, + "learning_rate": 4.990219626145073e-05, + "loss": 2.7821, + "step": 39500 + }, + { + "epoch": 0.2, + "learning_rate": 4.990095767502465e-05, + "loss": 2.7616, + "step": 40000 + }, + { + "epoch": 0.2, + "learning_rate": 4.989971908859857e-05, + "loss": 2.7711, + "step": 40500 + }, + { + "epoch": 0.2, + "learning_rate": 4.9898480502172484e-05, + "loss": 2.7499, + "step": 41000 + }, + { + "epoch": 0.21, + "learning_rate": 4.98972419157464e-05, + "loss": 2.7549, + "step": 41500 + }, + { + "epoch": 0.21, + "learning_rate": 4.989600332932032e-05, + "loss": 2.7782, + "step": 42000 + }, + { + "epoch": 0.21, + "learning_rate": 4.9894764742894234e-05, + "loss": 2.748, + "step": 42500 + }, + { + "epoch": 0.21, + "learning_rate": 4.9893528633641e-05, + "loss": 2.7325, + "step": 43000 + }, + { + "epoch": 0.22, + "learning_rate": 4.989229004721492e-05, + "loss": 2.7488, + "step": 43500 + }, + { + "epoch": 0.22, + "learning_rate": 4.989105146078884e-05, + "loss": 2.7606, + "step": 44000 + }, + { + "epoch": 0.22, + "learning_rate": 4.9889812874362754e-05, + "loss": 2.7445, + "step": 44500 + }, + { + "epoch": 0.22, + "learning_rate": 4.988857428793667e-05, + "loss": 2.76, + "step": 45000 + }, + { + "epoch": 0.23, + "learning_rate": 4.988733570151059e-05, + "loss": 2.7375, + "step": 45500 + }, + { + "epoch": 0.23, + "learning_rate": 4.98860971150845e-05, + "loss": 2.7624, + "step": 46000 + }, + { + "epoch": 0.23, + "learning_rate": 4.9884858528658415e-05, + "loss": 2.7588, + "step": 46500 + }, + { + "epoch": 0.23, + "learning_rate": 4.9883622419405184e-05, + "loss": 2.7508, + "step": 47000 + }, + { + "epoch": 0.24, + "learning_rate": 4.988238631015195e-05, + "loss": 2.7562, + "step": 47500 + }, + { + "epoch": 0.24, + "learning_rate": 4.988114772372587e-05, + "loss": 2.7546, + "step": 48000 + }, + { + "epoch": 0.24, + "learning_rate": 4.9879909137299787e-05, + "loss": 2.7657, + "step": 48500 + }, + { + "epoch": 0.24, + "learning_rate": 4.9878670550873703e-05, + "loss": 2.741, + "step": 49000 + }, + { + "epoch": 0.25, + "learning_rate": 4.9877434441620465e-05, + "loss": 2.7813, + "step": 49500 + }, + { + "epoch": 0.25, + "learning_rate": 4.987619585519438e-05, + "loss": 2.7575, + "step": 50000 + }, + { + "epoch": 0.25, + "learning_rate": 4.98749572687683e-05, + "loss": 2.7419, + "step": 50500 + }, + { + "epoch": 0.25, + "learning_rate": 4.9873718682342216e-05, + "loss": 2.7422, + "step": 51000 + }, + { + "epoch": 0.26, + "learning_rate": 4.987248009591613e-05, + "loss": 2.7333, + "step": 51500 + }, + { + "epoch": 0.26, + "learning_rate": 4.987124150949005e-05, + "loss": 2.743, + "step": 52000 + }, + { + "epoch": 0.26, + "learning_rate": 4.987000540023682e-05, + "loss": 2.7217, + "step": 52500 + }, + { + "epoch": 0.26, + "learning_rate": 4.9868766813810736e-05, + "loss": 2.7355, + "step": 53000 + }, + { + "epoch": 0.27, + "learning_rate": 4.986752822738465e-05, + "loss": 2.7393, + "step": 53500 + }, + { + "epoch": 0.27, + "learning_rate": 4.986628964095857e-05, + "loss": 2.7279, + "step": 54000 + }, + { + "epoch": 0.27, + "learning_rate": 4.986505105453249e-05, + "loss": 2.7535, + "step": 54500 + }, + { + "epoch": 0.27, + "learning_rate": 4.9863812468106404e-05, + "loss": 2.7389, + "step": 55000 + }, + { + "epoch": 0.27, + "learning_rate": 4.986257635885317e-05, + "loss": 2.7488, + "step": 55500 + }, + { + "epoch": 0.28, + "learning_rate": 4.986134024959994e-05, + "loss": 2.7452, + "step": 56000 + }, + { + "epoch": 0.28, + "learning_rate": 4.986010166317385e-05, + "loss": 2.7503, + "step": 56500 + }, + { + "epoch": 0.28, + "learning_rate": 4.985886307674777e-05, + "loss": 2.7213, + "step": 57000 + }, + { + "epoch": 0.28, + "learning_rate": 4.9857624490321685e-05, + "loss": 2.7421, + "step": 57500 + }, + { + "epoch": 0.29, + "learning_rate": 4.98563859038956e-05, + "loss": 2.7295, + "step": 58000 + }, + { + "epoch": 0.29, + "learning_rate": 4.985514731746952e-05, + "loss": 2.7164, + "step": 58500 + }, + { + "epoch": 0.29, + "learning_rate": 4.9853908731043436e-05, + "loss": 2.7262, + "step": 59000 + }, + { + "epoch": 0.29, + "learning_rate": 4.985267014461735e-05, + "loss": 2.734, + "step": 59500 + }, + { + "epoch": 0.3, + "learning_rate": 4.985143155819127e-05, + "loss": 2.7186, + "step": 60000 + }, + { + "epoch": 0.3, + "learning_rate": 4.985019297176519e-05, + "loss": 2.739, + "step": 60500 + }, + { + "epoch": 0.3, + "learning_rate": 4.9848954385339104e-05, + "loss": 2.7356, + "step": 61000 + }, + { + "epoch": 0.3, + "learning_rate": 4.984771579891302e-05, + "loss": 2.7406, + "step": 61500 + }, + { + "epoch": 0.31, + "learning_rate": 4.984647968965979e-05, + "loss": 2.7167, + "step": 62000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9845241103233706e-05, + "loss": 2.6958, + "step": 62500 + }, + { + "epoch": 0.31, + "learning_rate": 4.984400499398047e-05, + "loss": 2.7223, + "step": 63000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9842766407554385e-05, + "loss": 2.7279, + "step": 63500 + }, + { + "epoch": 0.32, + "learning_rate": 4.98415278211283e-05, + "loss": 2.7186, + "step": 64000 + }, + { + "epoch": 0.32, + "learning_rate": 4.984029171187508e-05, + "loss": 2.7359, + "step": 64500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9839053125448995e-05, + "loss": 2.7288, + "step": 65000 + }, + { + "epoch": 0.32, + "learning_rate": 4.983781453902291e-05, + "loss": 2.7157, + "step": 65500 + }, + { + "epoch": 0.33, + "learning_rate": 4.983657595259682e-05, + "loss": 2.7194, + "step": 66000 + }, + { + "epoch": 0.33, + "learning_rate": 4.983533736617074e-05, + "loss": 2.7106, + "step": 66500 + }, + { + "epoch": 0.33, + "learning_rate": 4.9834098779744656e-05, + "loss": 2.7313, + "step": 67000 + }, + { + "epoch": 0.33, + "learning_rate": 4.983286019331857e-05, + "loss": 2.7072, + "step": 67500 + }, + { + "epoch": 0.34, + "learning_rate": 4.983162160689249e-05, + "loss": 2.7392, + "step": 68000 + }, + { + "epoch": 0.34, + "learning_rate": 4.9830383020466406e-05, + "loss": 2.7083, + "step": 68500 + }, + { + "epoch": 0.34, + "learning_rate": 4.982914691121317e-05, + "loss": 2.7407, + "step": 69000 + }, + { + "epoch": 0.34, + "learning_rate": 4.9827908324787085e-05, + "loss": 2.7374, + "step": 69500 + }, + { + "epoch": 0.35, + "learning_rate": 4.9826669738361e-05, + "loss": 2.7151, + "step": 70000 + }, + { + "epoch": 0.35, + "learning_rate": 4.982543115193492e-05, + "loss": 2.7057, + "step": 70500 + }, + { + "epoch": 0.35, + "learning_rate": 4.9824192565508836e-05, + "loss": 2.7337, + "step": 71000 + }, + { + "epoch": 0.35, + "learning_rate": 4.982295397908275e-05, + "loss": 2.7017, + "step": 71500 + }, + { + "epoch": 0.36, + "learning_rate": 4.982171786982952e-05, + "loss": 2.7105, + "step": 72000 + }, + { + "epoch": 0.36, + "learning_rate": 4.982047928340344e-05, + "loss": 2.7399, + "step": 72500 + }, + { + "epoch": 0.36, + "learning_rate": 4.981924565132306e-05, + "loss": 2.707, + "step": 73000 + }, + { + "epoch": 0.36, + "learning_rate": 4.981800954206983e-05, + "loss": 2.7199, + "step": 73500 + }, + { + "epoch": 0.37, + "learning_rate": 4.9816770955643745e-05, + "loss": 2.7107, + "step": 74000 + }, + { + "epoch": 0.37, + "learning_rate": 4.981553236921766e-05, + "loss": 2.7136, + "step": 74500 + }, + { + "epoch": 0.37, + "learning_rate": 4.981429378279158e-05, + "loss": 2.711, + "step": 75000 + }, + { + "epoch": 0.37, + "learning_rate": 4.9813055196365496e-05, + "loss": 2.705, + "step": 75500 + }, + { + "epoch": 0.38, + "learning_rate": 4.981181908711226e-05, + "loss": 2.672, + "step": 76000 + }, + { + "epoch": 0.38, + "learning_rate": 4.9810580500686175e-05, + "loss": 2.7155, + "step": 76500 + }, + { + "epoch": 0.38, + "learning_rate": 4.980934191426009e-05, + "loss": 2.7097, + "step": 77000 + }, + { + "epoch": 0.38, + "learning_rate": 4.980810332783401e-05, + "loss": 2.7019, + "step": 77500 + }, + { + "epoch": 0.39, + "learning_rate": 4.9806864741407926e-05, + "loss": 2.7046, + "step": 78000 + }, + { + "epoch": 0.39, + "learning_rate": 4.980562615498184e-05, + "loss": 2.7231, + "step": 78500 + }, + { + "epoch": 0.39, + "learning_rate": 4.980438756855576e-05, + "loss": 2.7074, + "step": 79000 + }, + { + "epoch": 0.39, + "learning_rate": 4.980314898212968e-05, + "loss": 2.7098, + "step": 79500 + }, + { + "epoch": 0.4, + "learning_rate": 4.9801910395703594e-05, + "loss": 2.7095, + "step": 80000 + }, + { + "epoch": 0.4, + "learning_rate": 4.980067180927751e-05, + "loss": 2.6981, + "step": 80500 + }, + { + "epoch": 0.4, + "learning_rate": 4.979943322285143e-05, + "loss": 2.7144, + "step": 81000 + }, + { + "epoch": 0.4, + "learning_rate": 4.9798194636425344e-05, + "loss": 2.7018, + "step": 81500 + }, + { + "epoch": 0.41, + "learning_rate": 4.979695604999926e-05, + "loss": 2.7057, + "step": 82000 + }, + { + "epoch": 0.41, + "learning_rate": 4.979571746357318e-05, + "loss": 2.6865, + "step": 82500 + }, + { + "epoch": 0.41, + "learning_rate": 4.9794478877147095e-05, + "loss": 2.7394, + "step": 83000 + }, + { + "epoch": 0.41, + "learning_rate": 4.979324029072101e-05, + "loss": 2.7101, + "step": 83500 + }, + { + "epoch": 0.42, + "learning_rate": 4.979200170429493e-05, + "loss": 2.7016, + "step": 84000 + }, + { + "epoch": 0.42, + "learning_rate": 4.979076311786884e-05, + "loss": 2.6752, + "step": 84500 + }, + { + "epoch": 0.42, + "learning_rate": 4.9789524531442756e-05, + "loss": 2.7344, + "step": 85000 + }, + { + "epoch": 0.42, + "learning_rate": 4.978828594501667e-05, + "loss": 2.7173, + "step": 85500 + }, + { + "epoch": 0.43, + "learning_rate": 4.978704735859059e-05, + "loss": 2.7205, + "step": 86000 + }, + { + "epoch": 0.43, + "learning_rate": 4.978581124933736e-05, + "loss": 2.6739, + "step": 86500 + }, + { + "epoch": 0.43, + "learning_rate": 4.9784572662911276e-05, + "loss": 2.7142, + "step": 87000 + }, + { + "epoch": 0.43, + "learning_rate": 4.978333407648519e-05, + "loss": 2.6928, + "step": 87500 + }, + { + "epoch": 0.44, + "learning_rate": 4.97820954900591e-05, + "loss": 2.722, + "step": 88000 + }, + { + "epoch": 0.44, + "learning_rate": 4.978085690363302e-05, + "loss": 2.7314, + "step": 88500 + }, + { + "epoch": 0.44, + "learning_rate": 4.9779620794379795e-05, + "loss": 2.7161, + "step": 89000 + }, + { + "epoch": 0.44, + "learning_rate": 4.977838220795371e-05, + "loss": 2.7042, + "step": 89500 + }, + { + "epoch": 0.45, + "learning_rate": 4.977714362152763e-05, + "loss": 2.7171, + "step": 90000 + }, + { + "epoch": 0.45, + "learning_rate": 4.9775905035101546e-05, + "loss": 2.7061, + "step": 90500 + }, + { + "epoch": 0.45, + "learning_rate": 4.977466892584831e-05, + "loss": 2.6724, + "step": 91000 + }, + { + "epoch": 0.45, + "learning_rate": 4.9773430339422225e-05, + "loss": 2.6936, + "step": 91500 + }, + { + "epoch": 0.46, + "learning_rate": 4.977219175299614e-05, + "loss": 2.709, + "step": 92000 + }, + { + "epoch": 0.46, + "learning_rate": 4.977095316657006e-05, + "loss": 2.7078, + "step": 92500 + }, + { + "epoch": 0.46, + "learning_rate": 4.976971705731683e-05, + "loss": 2.6722, + "step": 93000 + }, + { + "epoch": 0.46, + "learning_rate": 4.9768478470890745e-05, + "loss": 2.6995, + "step": 93500 + }, + { + "epoch": 0.47, + "learning_rate": 4.976723988446466e-05, + "loss": 2.6995, + "step": 94000 + }, + { + "epoch": 0.47, + "learning_rate": 4.976600129803858e-05, + "loss": 2.6942, + "step": 94500 + }, + { + "epoch": 0.47, + "learning_rate": 4.9764762711612495e-05, + "loss": 2.69, + "step": 95000 + }, + { + "epoch": 0.47, + "learning_rate": 4.9763526602359264e-05, + "loss": 2.7196, + "step": 95500 + }, + { + "epoch": 0.48, + "learning_rate": 4.976228801593318e-05, + "loss": 2.6655, + "step": 96000 + }, + { + "epoch": 0.48, + "learning_rate": 4.97610494295071e-05, + "loss": 2.7035, + "step": 96500 + }, + { + "epoch": 0.48, + "learning_rate": 4.9759810843081015e-05, + "loss": 2.7225, + "step": 97000 + }, + { + "epoch": 0.48, + "learning_rate": 4.9758572256654925e-05, + "loss": 2.6971, + "step": 97500 + }, + { + "epoch": 0.49, + "learning_rate": 4.975733367022884e-05, + "loss": 2.7039, + "step": 98000 + }, + { + "epoch": 0.49, + "learning_rate": 4.975609508380276e-05, + "loss": 2.6814, + "step": 98500 + }, + { + "epoch": 0.49, + "learning_rate": 4.9754856497376676e-05, + "loss": 2.7068, + "step": 99000 + }, + { + "epoch": 0.49, + "learning_rate": 4.975361791095059e-05, + "loss": 2.6989, + "step": 99500 + }, + { + "epoch": 0.5, + "learning_rate": 4.975237932452451e-05, + "loss": 2.6904, + "step": 100000 + }, + { + "epoch": 0.5, + "learning_rate": 4.975114073809842e-05, + "loss": 2.6863, + "step": 100500 + }, + { + "epoch": 0.5, + "learning_rate": 4.974990215167234e-05, + "loss": 2.6785, + "step": 101000 + }, + { + "epoch": 0.5, + "learning_rate": 4.974866604241911e-05, + "loss": 2.6996, + "step": 101500 + }, + { + "epoch": 0.51, + "learning_rate": 4.974742993316588e-05, + "loss": 2.7096, + "step": 102000 + }, + { + "epoch": 0.51, + "learning_rate": 4.97461913467398e-05, + "loss": 2.6979, + "step": 102500 + }, + { + "epoch": 0.51, + "learning_rate": 4.9744952760313715e-05, + "loss": 2.6932, + "step": 103000 + }, + { + "epoch": 0.51, + "learning_rate": 4.974371417388763e-05, + "loss": 2.6986, + "step": 103500 + }, + { + "epoch": 0.52, + "learning_rate": 4.974247558746155e-05, + "loss": 2.6888, + "step": 104000 + }, + { + "epoch": 0.52, + "learning_rate": 4.974123700103546e-05, + "loss": 2.6854, + "step": 104500 + }, + { + "epoch": 0.52, + "learning_rate": 4.974000089178223e-05, + "loss": 2.697, + "step": 105000 + }, + { + "epoch": 0.52, + "learning_rate": 4.9738762305356145e-05, + "loss": 2.6931, + "step": 105500 + }, + { + "epoch": 0.53, + "learning_rate": 4.973752371893006e-05, + "loss": 2.7098, + "step": 106000 + }, + { + "epoch": 0.53, + "learning_rate": 4.973628513250398e-05, + "loss": 2.6989, + "step": 106500 + }, + { + "epoch": 0.53, + "learning_rate": 4.9735046546077896e-05, + "loss": 2.7059, + "step": 107000 + }, + { + "epoch": 0.53, + "learning_rate": 4.9733810436824664e-05, + "loss": 2.6612, + "step": 107500 + }, + { + "epoch": 0.54, + "learning_rate": 4.973257185039858e-05, + "loss": 2.6945, + "step": 108000 + }, + { + "epoch": 0.54, + "learning_rate": 4.97313332639725e-05, + "loss": 2.6951, + "step": 108500 + }, + { + "epoch": 0.54, + "learning_rate": 4.9730094677546415e-05, + "loss": 2.6871, + "step": 109000 + }, + { + "epoch": 0.54, + "learning_rate": 4.972885609112033e-05, + "loss": 2.6909, + "step": 109500 + }, + { + "epoch": 0.54, + "learning_rate": 4.972761750469425e-05, + "loss": 2.6948, + "step": 110000 + }, + { + "epoch": 0.55, + "learning_rate": 4.972638139544101e-05, + "loss": 2.7037, + "step": 110500 + }, + { + "epoch": 0.55, + "learning_rate": 4.972514280901493e-05, + "loss": 2.6804, + "step": 111000 + }, + { + "epoch": 0.55, + "learning_rate": 4.9723904222588845e-05, + "loss": 2.6895, + "step": 111500 + }, + { + "epoch": 0.55, + "learning_rate": 4.972266563616276e-05, + "loss": 2.6878, + "step": 112000 + }, + { + "epoch": 0.56, + "learning_rate": 4.972142704973668e-05, + "loss": 2.6916, + "step": 112500 + }, + { + "epoch": 0.56, + "learning_rate": 4.9720188463310596e-05, + "loss": 2.6843, + "step": 113000 + }, + { + "epoch": 0.56, + "learning_rate": 4.9718952354057365e-05, + "loss": 2.6903, + "step": 113500 + }, + { + "epoch": 0.56, + "learning_rate": 4.971771376763128e-05, + "loss": 2.6777, + "step": 114000 + }, + { + "epoch": 0.57, + "learning_rate": 4.97164751812052e-05, + "loss": 2.7029, + "step": 114500 + }, + { + "epoch": 0.57, + "learning_rate": 4.9715236594779115e-05, + "loss": 2.7026, + "step": 115000 + }, + { + "epoch": 0.57, + "learning_rate": 4.971399800835303e-05, + "loss": 2.6807, + "step": 115500 + }, + { + "epoch": 0.57, + "learning_rate": 4.971275942192695e-05, + "loss": 2.6868, + "step": 116000 + }, + { + "epoch": 0.58, + "learning_rate": 4.9711520835500866e-05, + "loss": 2.6976, + "step": 116500 + }, + { + "epoch": 0.58, + "learning_rate": 4.971028472624763e-05, + "loss": 2.6817, + "step": 117000 + }, + { + "epoch": 0.58, + "learning_rate": 4.9709046139821545e-05, + "loss": 2.6925, + "step": 117500 + }, + { + "epoch": 0.58, + "learning_rate": 4.970780755339546e-05, + "loss": 2.6972, + "step": 118000 + }, + { + "epoch": 0.59, + "learning_rate": 4.970656896696938e-05, + "loss": 2.6853, + "step": 118500 + }, + { + "epoch": 0.59, + "learning_rate": 4.9705330380543296e-05, + "loss": 2.6735, + "step": 119000 + }, + { + "epoch": 0.59, + "learning_rate": 4.970409179411721e-05, + "loss": 2.702, + "step": 119500 + }, + { + "epoch": 0.59, + "learning_rate": 4.970285320769113e-05, + "loss": 2.6876, + "step": 120000 + }, + { + "epoch": 0.6, + "learning_rate": 4.970161462126505e-05, + "loss": 2.6829, + "step": 120500 + }, + { + "epoch": 0.6, + "learning_rate": 4.9700378512011815e-05, + "loss": 2.6596, + "step": 121000 + }, + { + "epoch": 0.6, + "learning_rate": 4.969913992558573e-05, + "loss": 2.6907, + "step": 121500 + }, + { + "epoch": 0.6, + "learning_rate": 4.969790133915965e-05, + "loss": 2.6968, + "step": 122000 + }, + { + "epoch": 0.61, + "learning_rate": 4.9696662752733566e-05, + "loss": 2.6804, + "step": 122500 + }, + { + "epoch": 0.61, + "learning_rate": 4.969542664348033e-05, + "loss": 2.6914, + "step": 123000 + }, + { + "epoch": 0.61, + "learning_rate": 4.96941905342271e-05, + "loss": 2.6812, + "step": 123500 + }, + { + "epoch": 0.61, + "learning_rate": 4.9692951947801014e-05, + "loss": 2.6607, + "step": 124000 + }, + { + "epoch": 0.62, + "learning_rate": 4.969171336137493e-05, + "loss": 2.686, + "step": 124500 + }, + { + "epoch": 0.62, + "learning_rate": 4.969047477494885e-05, + "loss": 2.6591, + "step": 125000 + }, + { + "epoch": 0.62, + "learning_rate": 4.9689236188522765e-05, + "loss": 2.6824, + "step": 125500 + }, + { + "epoch": 0.62, + "learning_rate": 4.968799760209668e-05, + "loss": 2.694, + "step": 126000 + }, + { + "epoch": 0.63, + "learning_rate": 4.968676149284345e-05, + "loss": 2.6881, + "step": 126500 + }, + { + "epoch": 0.63, + "learning_rate": 4.968552290641737e-05, + "loss": 2.7259, + "step": 127000 + }, + { + "epoch": 0.63, + "learning_rate": 4.9684284319991284e-05, + "loss": 2.68, + "step": 127500 + }, + { + "epoch": 0.63, + "learning_rate": 4.9683045733565195e-05, + "loss": 2.6683, + "step": 128000 + }, + { + "epoch": 0.64, + "learning_rate": 4.968180714713911e-05, + "loss": 2.6481, + "step": 128500 + }, + { + "epoch": 0.64, + "learning_rate": 4.968056856071303e-05, + "loss": 2.6635, + "step": 129000 + }, + { + "epoch": 0.64, + "learning_rate": 4.96793324514598e-05, + "loss": 2.6736, + "step": 129500 + }, + { + "epoch": 0.64, + "learning_rate": 4.9678093865033714e-05, + "loss": 2.7035, + "step": 130000 + }, + { + "epoch": 0.65, + "learning_rate": 4.967685527860763e-05, + "loss": 2.6893, + "step": 130500 + }, + { + "epoch": 0.65, + "learning_rate": 4.967561669218155e-05, + "loss": 2.6946, + "step": 131000 + }, + { + "epoch": 0.65, + "learning_rate": 4.9674378105755465e-05, + "loss": 2.6781, + "step": 131500 + }, + { + "epoch": 0.65, + "learning_rate": 4.967313951932938e-05, + "loss": 2.6633, + "step": 132000 + }, + { + "epoch": 0.66, + "learning_rate": 4.96719009329033e-05, + "loss": 2.6894, + "step": 132500 + }, + { + "epoch": 0.66, + "learning_rate": 4.9670662346477216e-05, + "loss": 2.6659, + "step": 133000 + }, + { + "epoch": 0.66, + "learning_rate": 4.966942376005113e-05, + "loss": 2.668, + "step": 133500 + }, + { + "epoch": 0.66, + "learning_rate": 4.966818517362505e-05, + "loss": 2.6745, + "step": 134000 + }, + { + "epoch": 0.67, + "learning_rate": 4.9666946587198966e-05, + "loss": 2.6407, + "step": 134500 + }, + { + "epoch": 0.67, + "learning_rate": 4.966571047794573e-05, + "loss": 2.6832, + "step": 135000 + }, + { + "epoch": 0.67, + "learning_rate": 4.96644743686925e-05, + "loss": 2.6761, + "step": 135500 + }, + { + "epoch": 0.67, + "learning_rate": 4.9663235782266414e-05, + "loss": 2.7023, + "step": 136000 + }, + { + "epoch": 0.68, + "learning_rate": 4.966199719584033e-05, + "loss": 2.6568, + "step": 136500 + }, + { + "epoch": 0.68, + "learning_rate": 4.966075860941425e-05, + "loss": 2.6889, + "step": 137000 + }, + { + "epoch": 0.68, + "learning_rate": 4.9659520022988165e-05, + "loss": 2.6811, + "step": 137500 + }, + { + "epoch": 0.68, + "learning_rate": 4.965828391373494e-05, + "loss": 2.6648, + "step": 138000 + }, + { + "epoch": 0.69, + "learning_rate": 4.965704532730885e-05, + "loss": 2.668, + "step": 138500 + }, + { + "epoch": 0.69, + "learning_rate": 4.965580674088277e-05, + "loss": 2.6594, + "step": 139000 + }, + { + "epoch": 0.69, + "learning_rate": 4.9654568154456685e-05, + "loss": 2.6537, + "step": 139500 + }, + { + "epoch": 0.69, + "learning_rate": 4.96533295680306e-05, + "loss": 2.7002, + "step": 140000 + }, + { + "epoch": 0.7, + "learning_rate": 4.965209345877737e-05, + "loss": 2.679, + "step": 140500 + }, + { + "epoch": 0.7, + "learning_rate": 4.965085487235129e-05, + "loss": 2.6813, + "step": 141000 + }, + { + "epoch": 0.7, + "learning_rate": 4.9649618763098056e-05, + "loss": 2.6977, + "step": 141500 + }, + { + "epoch": 0.7, + "learning_rate": 4.964838017667197e-05, + "loss": 2.6734, + "step": 142000 + }, + { + "epoch": 0.71, + "learning_rate": 4.964714159024589e-05, + "loss": 2.6827, + "step": 142500 + }, + { + "epoch": 0.71, + "learning_rate": 4.964590300381981e-05, + "loss": 2.6551, + "step": 143000 + }, + { + "epoch": 0.71, + "learning_rate": 4.9644664417393724e-05, + "loss": 2.6671, + "step": 143500 + }, + { + "epoch": 0.71, + "learning_rate": 4.964342583096764e-05, + "loss": 2.6576, + "step": 144000 + }, + { + "epoch": 0.72, + "learning_rate": 4.96421897217144e-05, + "loss": 2.6725, + "step": 144500 + }, + { + "epoch": 0.72, + "learning_rate": 4.964095113528832e-05, + "loss": 2.6767, + "step": 145000 + }, + { + "epoch": 0.72, + "learning_rate": 4.963971254886224e-05, + "loss": 2.6836, + "step": 145500 + }, + { + "epoch": 0.72, + "learning_rate": 4.9638473962436154e-05, + "loss": 2.6378, + "step": 146000 + }, + { + "epoch": 0.73, + "learning_rate": 4.963723537601007e-05, + "loss": 2.6721, + "step": 146500 + }, + { + "epoch": 0.73, + "learning_rate": 4.963599678958399e-05, + "loss": 2.7062, + "step": 147000 + }, + { + "epoch": 0.73, + "learning_rate": 4.9634758203157904e-05, + "loss": 2.6502, + "step": 147500 + }, + { + "epoch": 0.73, + "learning_rate": 4.9633519616731815e-05, + "loss": 2.6751, + "step": 148000 + }, + { + "epoch": 0.74, + "learning_rate": 4.963228103030573e-05, + "loss": 2.6675, + "step": 148500 + }, + { + "epoch": 0.74, + "learning_rate": 4.963104244387965e-05, + "loss": 2.6572, + "step": 149000 + }, + { + "epoch": 0.74, + "learning_rate": 4.9629803857453565e-05, + "loss": 2.6735, + "step": 149500 + }, + { + "epoch": 0.74, + "learning_rate": 4.962856774820034e-05, + "loss": 2.6621, + "step": 150000 + }, + { + "epoch": 0.75, + "learning_rate": 4.962732916177426e-05, + "loss": 2.6808, + "step": 150500 + }, + { + "epoch": 0.75, + "learning_rate": 4.962609057534817e-05, + "loss": 2.6627, + "step": 151000 + }, + { + "epoch": 0.75, + "learning_rate": 4.9624851988922085e-05, + "loss": 2.6605, + "step": 151500 + }, + { + "epoch": 0.75, + "learning_rate": 4.9623613402496e-05, + "loss": 2.6853, + "step": 152000 + }, + { + "epoch": 0.76, + "learning_rate": 4.962237481606992e-05, + "loss": 2.6536, + "step": 152500 + }, + { + "epoch": 0.76, + "learning_rate": 4.9621136229643836e-05, + "loss": 2.6741, + "step": 153000 + }, + { + "epoch": 0.76, + "learning_rate": 4.961989764321775e-05, + "loss": 2.6637, + "step": 153500 + }, + { + "epoch": 0.76, + "learning_rate": 4.9618666488310225e-05, + "loss": 2.65, + "step": 154000 + }, + { + "epoch": 0.77, + "learning_rate": 4.961742790188414e-05, + "loss": 2.6598, + "step": 154500 + }, + { + "epoch": 0.77, + "learning_rate": 4.961618931545806e-05, + "loss": 2.6615, + "step": 155000 + }, + { + "epoch": 0.77, + "learning_rate": 4.9614950729031976e-05, + "loss": 2.6592, + "step": 155500 + }, + { + "epoch": 0.77, + "learning_rate": 4.9613712142605886e-05, + "loss": 2.6707, + "step": 156000 + }, + { + "epoch": 0.78, + "learning_rate": 4.96124735561798e-05, + "loss": 2.6291, + "step": 156500 + }, + { + "epoch": 0.78, + "learning_rate": 4.961123496975372e-05, + "loss": 2.6811, + "step": 157000 + }, + { + "epoch": 0.78, + "learning_rate": 4.960999638332764e-05, + "loss": 2.6534, + "step": 157500 + }, + { + "epoch": 0.78, + "learning_rate": 4.9608757796901554e-05, + "loss": 2.6623, + "step": 158000 + }, + { + "epoch": 0.79, + "learning_rate": 4.960751921047547e-05, + "loss": 2.6498, + "step": 158500 + }, + { + "epoch": 0.79, + "learning_rate": 4.960628062404939e-05, + "loss": 2.6617, + "step": 159000 + }, + { + "epoch": 0.79, + "learning_rate": 4.9605042037623305e-05, + "loss": 2.6545, + "step": 159500 + }, + { + "epoch": 0.79, + "learning_rate": 4.960380345119722e-05, + "loss": 2.6516, + "step": 160000 + }, + { + "epoch": 0.8, + "learning_rate": 4.960256734194399e-05, + "loss": 2.6722, + "step": 160500 + }, + { + "epoch": 0.8, + "learning_rate": 4.960132875551791e-05, + "loss": 2.6724, + "step": 161000 + }, + { + "epoch": 0.8, + "learning_rate": 4.9600090169091824e-05, + "loss": 2.6304, + "step": 161500 + }, + { + "epoch": 0.8, + "learning_rate": 4.959885158266574e-05, + "loss": 2.6624, + "step": 162000 + }, + { + "epoch": 0.81, + "learning_rate": 4.959761299623966e-05, + "loss": 2.6667, + "step": 162500 + }, + { + "epoch": 0.81, + "learning_rate": 4.9596374409813575e-05, + "loss": 2.6568, + "step": 163000 + }, + { + "epoch": 0.81, + "learning_rate": 4.959513830056034e-05, + "loss": 2.6541, + "step": 163500 + }, + { + "epoch": 0.81, + "learning_rate": 4.9593899714134254e-05, + "loss": 2.6696, + "step": 164000 + }, + { + "epoch": 0.81, + "learning_rate": 4.959266112770817e-05, + "loss": 2.6907, + "step": 164500 + }, + { + "epoch": 0.82, + "learning_rate": 4.959142254128209e-05, + "loss": 2.656, + "step": 165000 + }, + { + "epoch": 0.82, + "learning_rate": 4.9590183954856005e-05, + "loss": 2.6893, + "step": 165500 + }, + { + "epoch": 0.82, + "learning_rate": 4.9588950322775625e-05, + "loss": 2.6747, + "step": 166000 + }, + { + "epoch": 0.82, + "learning_rate": 4.958771173634954e-05, + "loss": 2.6528, + "step": 166500 + }, + { + "epoch": 0.83, + "learning_rate": 4.958647314992346e-05, + "loss": 2.6685, + "step": 167000 + }, + { + "epoch": 0.83, + "learning_rate": 4.9585234563497376e-05, + "loss": 2.6476, + "step": 167500 + }, + { + "epoch": 0.83, + "learning_rate": 4.958399597707129e-05, + "loss": 2.66, + "step": 168000 + }, + { + "epoch": 0.83, + "learning_rate": 4.958275739064521e-05, + "loss": 2.6539, + "step": 168500 + }, + { + "epoch": 0.84, + "learning_rate": 4.958151880421913e-05, + "loss": 2.6832, + "step": 169000 + }, + { + "epoch": 0.84, + "learning_rate": 4.958028021779304e-05, + "loss": 2.6409, + "step": 169500 + }, + { + "epoch": 0.84, + "learning_rate": 4.9579044108539806e-05, + "loss": 2.6694, + "step": 170000 + }, + { + "epoch": 0.84, + "learning_rate": 4.957780552211372e-05, + "loss": 2.6599, + "step": 170500 + }, + { + "epoch": 0.85, + "learning_rate": 4.957656693568764e-05, + "loss": 2.6693, + "step": 171000 + }, + { + "epoch": 0.85, + "learning_rate": 4.957532834926156e-05, + "loss": 2.6476, + "step": 171500 + }, + { + "epoch": 0.85, + "learning_rate": 4.9574089762835474e-05, + "loss": 2.6592, + "step": 172000 + }, + { + "epoch": 0.85, + "learning_rate": 4.957285117640939e-05, + "loss": 2.6631, + "step": 172500 + }, + { + "epoch": 0.86, + "learning_rate": 4.957161258998331e-05, + "loss": 2.6674, + "step": 173000 + }, + { + "epoch": 0.86, + "learning_rate": 4.9570374003557224e-05, + "loss": 2.6603, + "step": 173500 + }, + { + "epoch": 0.86, + "learning_rate": 4.956913541713114e-05, + "loss": 2.6461, + "step": 174000 + }, + { + "epoch": 0.86, + "learning_rate": 4.956789930787791e-05, + "loss": 2.6766, + "step": 174500 + }, + { + "epoch": 0.87, + "learning_rate": 4.956666072145183e-05, + "loss": 2.6398, + "step": 175000 + }, + { + "epoch": 0.87, + "learning_rate": 4.956542461219859e-05, + "loss": 2.6699, + "step": 175500 + }, + { + "epoch": 0.87, + "learning_rate": 4.9564186025772506e-05, + "loss": 2.6702, + "step": 176000 + }, + { + "epoch": 0.87, + "learning_rate": 4.956294743934642e-05, + "loss": 2.6357, + "step": 176500 + }, + { + "epoch": 0.88, + "learning_rate": 4.956170885292034e-05, + "loss": 2.6474, + "step": 177000 + }, + { + "epoch": 0.88, + "learning_rate": 4.956047026649426e-05, + "loss": 2.6496, + "step": 177500 + }, + { + "epoch": 0.88, + "learning_rate": 4.9559231680068174e-05, + "loss": 2.664, + "step": 178000 + }, + { + "epoch": 0.88, + "learning_rate": 4.955799557081494e-05, + "loss": 2.6964, + "step": 178500 + }, + { + "epoch": 0.89, + "learning_rate": 4.955675698438886e-05, + "loss": 2.6707, + "step": 179000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9555518397962776e-05, + "loss": 2.654, + "step": 179500 + }, + { + "epoch": 0.89, + "learning_rate": 4.955427981153669e-05, + "loss": 2.6789, + "step": 180000 + }, + { + "epoch": 0.89, + "learning_rate": 4.9553043702283455e-05, + "loss": 2.6482, + "step": 180500 + }, + { + "epoch": 0.9, + "learning_rate": 4.955180511585737e-05, + "loss": 2.6471, + "step": 181000 + }, + { + "epoch": 0.9, + "learning_rate": 4.955056652943129e-05, + "loss": 2.6439, + "step": 181500 + }, + { + "epoch": 0.9, + "learning_rate": 4.9549327943005206e-05, + "loss": 2.6462, + "step": 182000 + }, + { + "epoch": 0.9, + "learning_rate": 4.954808935657912e-05, + "loss": 2.6688, + "step": 182500 + }, + { + "epoch": 0.91, + "learning_rate": 4.954685077015304e-05, + "loss": 2.6614, + "step": 183000 + }, + { + "epoch": 0.91, + "learning_rate": 4.954561218372696e-05, + "loss": 2.673, + "step": 183500 + }, + { + "epoch": 0.91, + "learning_rate": 4.9544373597300874e-05, + "loss": 2.6636, + "step": 184000 + }, + { + "epoch": 0.91, + "learning_rate": 4.954313748804764e-05, + "loss": 2.6533, + "step": 184500 + }, + { + "epoch": 0.92, + "learning_rate": 4.954189890162156e-05, + "loss": 2.6585, + "step": 185000 + }, + { + "epoch": 0.92, + "learning_rate": 4.9540660315195477e-05, + "loss": 2.6397, + "step": 185500 + }, + { + "epoch": 0.92, + "learning_rate": 4.9539421728769394e-05, + "loss": 2.6475, + "step": 186000 + }, + { + "epoch": 0.92, + "learning_rate": 4.953818314234331e-05, + "loss": 2.658, + "step": 186500 + }, + { + "epoch": 0.93, + "learning_rate": 4.953694455591723e-05, + "loss": 2.6754, + "step": 187000 + }, + { + "epoch": 0.93, + "learning_rate": 4.9535705969491144e-05, + "loss": 2.6488, + "step": 187500 + }, + { + "epoch": 0.93, + "learning_rate": 4.953446738306506e-05, + "loss": 2.6816, + "step": 188000 + }, + { + "epoch": 0.93, + "learning_rate": 4.953322879663898e-05, + "loss": 2.6558, + "step": 188500 + }, + { + "epoch": 0.94, + "learning_rate": 4.953199268738574e-05, + "loss": 2.6288, + "step": 189000 + }, + { + "epoch": 0.94, + "learning_rate": 4.953075410095966e-05, + "loss": 2.6406, + "step": 189500 + }, + { + "epoch": 0.94, + "learning_rate": 4.9529517991706426e-05, + "loss": 2.6695, + "step": 190000 + }, + { + "epoch": 0.94, + "learning_rate": 4.952827940528034e-05, + "loss": 2.6565, + "step": 190500 + }, + { + "epoch": 0.95, + "learning_rate": 4.952704081885426e-05, + "loss": 2.6577, + "step": 191000 + }, + { + "epoch": 0.95, + "learning_rate": 4.952580470960103e-05, + "loss": 2.6499, + "step": 191500 + }, + { + "epoch": 0.95, + "learning_rate": 4.95245686003478e-05, + "loss": 2.6423, + "step": 192000 + }, + { + "epoch": 0.95, + "learning_rate": 4.9523330013921714e-05, + "loss": 2.6735, + "step": 192500 + }, + { + "epoch": 0.96, + "learning_rate": 4.952209142749563e-05, + "loss": 2.6441, + "step": 193000 + }, + { + "epoch": 0.96, + "learning_rate": 4.952085284106955e-05, + "loss": 2.6782, + "step": 193500 + }, + { + "epoch": 0.96, + "learning_rate": 4.9519614254643465e-05, + "loss": 2.6542, + "step": 194000 + }, + { + "epoch": 0.96, + "learning_rate": 4.951837566821738e-05, + "loss": 2.6564, + "step": 194500 + }, + { + "epoch": 0.97, + "learning_rate": 4.95171370817913e-05, + "loss": 2.6682, + "step": 195000 + }, + { + "epoch": 0.97, + "learning_rate": 4.951589849536521e-05, + "loss": 2.6803, + "step": 195500 + }, + { + "epoch": 0.97, + "learning_rate": 4.9514659908939126e-05, + "loss": 2.6768, + "step": 196000 + }, + { + "epoch": 0.97, + "learning_rate": 4.951342132251304e-05, + "loss": 2.652, + "step": 196500 + }, + { + "epoch": 0.98, + "learning_rate": 4.951218273608696e-05, + "loss": 2.6467, + "step": 197000 + }, + { + "epoch": 0.98, + "learning_rate": 4.951094414966088e-05, + "loss": 2.631, + "step": 197500 + }, + { + "epoch": 0.98, + "learning_rate": 4.9509705563234794e-05, + "loss": 2.6418, + "step": 198000 + }, + { + "epoch": 0.98, + "learning_rate": 4.950846697680871e-05, + "loss": 2.6617, + "step": 198500 + }, + { + "epoch": 0.99, + "learning_rate": 4.950722839038263e-05, + "loss": 2.6553, + "step": 199000 + }, + { + "epoch": 0.99, + "learning_rate": 4.9505989803956545e-05, + "loss": 2.6369, + "step": 199500 + }, + { + "epoch": 0.99, + "learning_rate": 4.9504753694703307e-05, + "loss": 2.6282, + "step": 200000 + }, + { + "epoch": 0.99, + "learning_rate": 4.9503515108277224e-05, + "loss": 2.65, + "step": 200500 + }, + { + "epoch": 1.0, + "learning_rate": 4.950227652185114e-05, + "loss": 2.6651, + "step": 201000 + }, + { + "epoch": 1.0, + "learning_rate": 4.950103793542506e-05, + "loss": 2.6763, + "step": 201500 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.6236883143982033, + "eval_accuracy_mlm": 0.5761070598072291, + "eval_accuracy_nsp": 0.8479990900497727, + "eval_loss": 2.567748546600342, + "eval_runtime": 145.8951, + "eval_samples_per_second": 1747.55, + "eval_steps_per_second": 72.819, + "step": 201843 + }, + { + "epoch": 1.0, + "learning_rate": 4.9499801826171826e-05, + "loss": 2.6422, + "step": 202000 + }, + { + "epoch": 1.0, + "learning_rate": 4.949856323974574e-05, + "loss": 2.6314, + "step": 202500 + }, + { + "epoch": 1.01, + "learning_rate": 4.949732465331966e-05, + "loss": 2.6069, + "step": 203000 + }, + { + "epoch": 1.01, + "learning_rate": 4.949608606689358e-05, + "loss": 2.621, + "step": 203500 + }, + { + "epoch": 1.01, + "learning_rate": 4.9494847480467494e-05, + "loss": 2.6315, + "step": 204000 + }, + { + "epoch": 1.01, + "learning_rate": 4.949361137121426e-05, + "loss": 2.6192, + "step": 204500 + }, + { + "epoch": 1.02, + "learning_rate": 4.949237278478818e-05, + "loss": 2.6241, + "step": 205000 + }, + { + "epoch": 1.02, + "learning_rate": 4.949113667553495e-05, + "loss": 2.6318, + "step": 205500 + }, + { + "epoch": 1.02, + "learning_rate": 4.9489898089108865e-05, + "loss": 2.6222, + "step": 206000 + }, + { + "epoch": 1.02, + "learning_rate": 4.948865950268278e-05, + "loss": 2.6478, + "step": 206500 + }, + { + "epoch": 1.03, + "learning_rate": 4.94874209162567e-05, + "loss": 2.6204, + "step": 207000 + }, + { + "epoch": 1.03, + "learning_rate": 4.9486182329830616e-05, + "loss": 2.6316, + "step": 207500 + }, + { + "epoch": 1.03, + "learning_rate": 4.9484946220577385e-05, + "loss": 2.6242, + "step": 208000 + }, + { + "epoch": 1.03, + "learning_rate": 4.94837076341513e-05, + "loss": 2.5915, + "step": 208500 + }, + { + "epoch": 1.04, + "learning_rate": 4.948246904772522e-05, + "loss": 2.6172, + "step": 209000 + }, + { + "epoch": 1.04, + "learning_rate": 4.9481230461299136e-05, + "loss": 2.6155, + "step": 209500 + }, + { + "epoch": 1.04, + "learning_rate": 4.947999187487305e-05, + "loss": 2.6064, + "step": 210000 + }, + { + "epoch": 1.04, + "learning_rate": 4.947875328844697e-05, + "loss": 2.6333, + "step": 210500 + }, + { + "epoch": 1.05, + "learning_rate": 4.947751470202088e-05, + "loss": 2.6295, + "step": 211000 + }, + { + "epoch": 1.05, + "learning_rate": 4.94762761155948e-05, + "loss": 2.6262, + "step": 211500 + }, + { + "epoch": 1.05, + "learning_rate": 4.9475037529168714e-05, + "loss": 2.598, + "step": 212000 + }, + { + "epoch": 1.05, + "learning_rate": 4.947379894274263e-05, + "loss": 2.6367, + "step": 212500 + }, + { + "epoch": 1.06, + "learning_rate": 4.947256035631655e-05, + "loss": 2.6183, + "step": 213000 + }, + { + "epoch": 1.06, + "learning_rate": 4.947132176989046e-05, + "loss": 2.6535, + "step": 213500 + }, + { + "epoch": 1.06, + "learning_rate": 4.9470083183464375e-05, + "loss": 2.6075, + "step": 214000 + }, + { + "epoch": 1.06, + "learning_rate": 4.946884707421114e-05, + "loss": 2.5924, + "step": 214500 + }, + { + "epoch": 1.07, + "learning_rate": 4.946760848778506e-05, + "loss": 2.6268, + "step": 215000 + }, + { + "epoch": 1.07, + "learning_rate": 4.946636990135898e-05, + "loss": 2.6207, + "step": 215500 + }, + { + "epoch": 1.07, + "learning_rate": 4.9465131314932894e-05, + "loss": 2.5849, + "step": 216000 + }, + { + "epoch": 1.07, + "learning_rate": 4.946389520567967e-05, + "loss": 2.6141, + "step": 216500 + }, + { + "epoch": 1.08, + "learning_rate": 4.946265661925359e-05, + "loss": 2.6114, + "step": 217000 + }, + { + "epoch": 1.08, + "learning_rate": 4.94614180328275e-05, + "loss": 2.6018, + "step": 217500 + }, + { + "epoch": 1.08, + "learning_rate": 4.9460179446401414e-05, + "loss": 2.6197, + "step": 218000 + }, + { + "epoch": 1.08, + "learning_rate": 4.945894085997533e-05, + "loss": 2.6252, + "step": 218500 + }, + { + "epoch": 1.09, + "learning_rate": 4.945770227354925e-05, + "loss": 2.6027, + "step": 219000 + }, + { + "epoch": 1.09, + "learning_rate": 4.9456463687123165e-05, + "loss": 2.6315, + "step": 219500 + }, + { + "epoch": 1.09, + "learning_rate": 4.9455225100697075e-05, + "loss": 2.6094, + "step": 220000 + }, + { + "epoch": 1.09, + "learning_rate": 4.9453988991443843e-05, + "loss": 2.6246, + "step": 220500 + }, + { + "epoch": 1.09, + "learning_rate": 4.945275040501776e-05, + "loss": 2.6303, + "step": 221000 + }, + { + "epoch": 1.1, + "learning_rate": 4.945151181859168e-05, + "loss": 2.6081, + "step": 221500 + }, + { + "epoch": 1.1, + "learning_rate": 4.9450273232165594e-05, + "loss": 2.6129, + "step": 222000 + }, + { + "epoch": 1.1, + "learning_rate": 4.944903464573951e-05, + "loss": 2.6229, + "step": 222500 + }, + { + "epoch": 1.1, + "learning_rate": 4.944780101365913e-05, + "loss": 2.594, + "step": 223000 + }, + { + "epoch": 1.11, + "learning_rate": 4.944656242723305e-05, + "loss": 2.621, + "step": 223500 + }, + { + "epoch": 1.11, + "learning_rate": 4.9445323840806966e-05, + "loss": 2.6097, + "step": 224000 + }, + { + "epoch": 1.11, + "learning_rate": 4.944408525438088e-05, + "loss": 2.6152, + "step": 224500 + }, + { + "epoch": 1.11, + "learning_rate": 4.94428466679548e-05, + "loss": 2.6206, + "step": 225000 + }, + { + "epoch": 1.12, + "learning_rate": 4.944161055870157e-05, + "loss": 2.6391, + "step": 225500 + }, + { + "epoch": 1.12, + "learning_rate": 4.9440371972275485e-05, + "loss": 2.6487, + "step": 226000 + }, + { + "epoch": 1.12, + "learning_rate": 4.94391333858494e-05, + "loss": 2.5981, + "step": 226500 + }, + { + "epoch": 1.12, + "learning_rate": 4.943789479942332e-05, + "loss": 2.6175, + "step": 227000 + }, + { + "epoch": 1.13, + "learning_rate": 4.9436656212997236e-05, + "loss": 2.6332, + "step": 227500 + }, + { + "epoch": 1.13, + "learning_rate": 4.9435420103744e-05, + "loss": 2.6234, + "step": 228000 + }, + { + "epoch": 1.13, + "learning_rate": 4.9434181517317915e-05, + "loss": 2.6201, + "step": 228500 + }, + { + "epoch": 1.13, + "learning_rate": 4.943294293089183e-05, + "loss": 2.6235, + "step": 229000 + }, + { + "epoch": 1.14, + "learning_rate": 4.943170434446575e-05, + "loss": 2.6339, + "step": 229500 + }, + { + "epoch": 1.14, + "learning_rate": 4.9430465758039666e-05, + "loss": 2.5977, + "step": 230000 + }, + { + "epoch": 1.14, + "learning_rate": 4.942922717161358e-05, + "loss": 2.612, + "step": 230500 + }, + { + "epoch": 1.14, + "learning_rate": 4.942799106236035e-05, + "loss": 2.6263, + "step": 231000 + }, + { + "epoch": 1.15, + "learning_rate": 4.942675247593427e-05, + "loss": 2.6239, + "step": 231500 + }, + { + "epoch": 1.15, + "learning_rate": 4.9425513889508185e-05, + "loss": 2.6085, + "step": 232000 + }, + { + "epoch": 1.15, + "learning_rate": 4.94242753030821e-05, + "loss": 2.6082, + "step": 232500 + }, + { + "epoch": 1.15, + "learning_rate": 4.942303671665602e-05, + "loss": 2.6257, + "step": 233000 + }, + { + "epoch": 1.16, + "learning_rate": 4.9421798130229936e-05, + "loss": 2.6365, + "step": 233500 + }, + { + "epoch": 1.16, + "learning_rate": 4.942055954380385e-05, + "loss": 2.6226, + "step": 234000 + }, + { + "epoch": 1.16, + "learning_rate": 4.9419323434550615e-05, + "loss": 2.6333, + "step": 234500 + }, + { + "epoch": 1.16, + "learning_rate": 4.941808484812453e-05, + "loss": 2.6214, + "step": 235000 + }, + { + "epoch": 1.17, + "learning_rate": 4.941684626169845e-05, + "loss": 2.614, + "step": 235500 + }, + { + "epoch": 1.17, + "learning_rate": 4.941561015244522e-05, + "loss": 2.5987, + "step": 236000 + }, + { + "epoch": 1.17, + "learning_rate": 4.9414371566019135e-05, + "loss": 2.6155, + "step": 236500 + }, + { + "epoch": 1.17, + "learning_rate": 4.941313297959305e-05, + "loss": 2.6424, + "step": 237000 + }, + { + "epoch": 1.18, + "learning_rate": 4.941189439316697e-05, + "loss": 2.6057, + "step": 237500 + }, + { + "epoch": 1.18, + "learning_rate": 4.9410655806740886e-05, + "loss": 2.6237, + "step": 238000 + }, + { + "epoch": 1.18, + "learning_rate": 4.94094172203148e-05, + "loss": 2.6184, + "step": 238500 + }, + { + "epoch": 1.18, + "learning_rate": 4.940818111106157e-05, + "loss": 2.6616, + "step": 239000 + }, + { + "epoch": 1.19, + "learning_rate": 4.940694252463549e-05, + "loss": 2.6191, + "step": 239500 + }, + { + "epoch": 1.19, + "learning_rate": 4.9405703938209405e-05, + "loss": 2.5849, + "step": 240000 + }, + { + "epoch": 1.19, + "learning_rate": 4.940446535178332e-05, + "loss": 2.6178, + "step": 240500 + }, + { + "epoch": 1.19, + "learning_rate": 4.940322676535723e-05, + "loss": 2.6466, + "step": 241000 + }, + { + "epoch": 1.2, + "learning_rate": 4.940198817893115e-05, + "loss": 2.6025, + "step": 241500 + }, + { + "epoch": 1.2, + "learning_rate": 4.940075206967792e-05, + "loss": 2.5923, + "step": 242000 + }, + { + "epoch": 1.2, + "learning_rate": 4.9399513483251835e-05, + "loss": 2.6381, + "step": 242500 + }, + { + "epoch": 1.2, + "learning_rate": 4.939827489682575e-05, + "loss": 2.5819, + "step": 243000 + }, + { + "epoch": 1.21, + "learning_rate": 4.939703631039967e-05, + "loss": 2.6115, + "step": 243500 + }, + { + "epoch": 1.21, + "learning_rate": 4.9395797723973586e-05, + "loss": 2.6289, + "step": 244000 + }, + { + "epoch": 1.21, + "learning_rate": 4.93945591375475e-05, + "loss": 2.5913, + "step": 244500 + }, + { + "epoch": 1.21, + "learning_rate": 4.939332302829427e-05, + "loss": 2.6194, + "step": 245000 + }, + { + "epoch": 1.22, + "learning_rate": 4.939208444186819e-05, + "loss": 2.6232, + "step": 245500 + }, + { + "epoch": 1.22, + "learning_rate": 4.9390845855442105e-05, + "loss": 2.6176, + "step": 246000 + }, + { + "epoch": 1.22, + "learning_rate": 4.938960726901602e-05, + "loss": 2.6158, + "step": 246500 + }, + { + "epoch": 1.22, + "learning_rate": 4.938836868258994e-05, + "loss": 2.6155, + "step": 247000 + }, + { + "epoch": 1.23, + "learning_rate": 4.9387130096163856e-05, + "loss": 2.6407, + "step": 247500 + }, + { + "epoch": 1.23, + "learning_rate": 4.9385891509737766e-05, + "loss": 2.6334, + "step": 248000 + }, + { + "epoch": 1.23, + "learning_rate": 4.938465292331168e-05, + "loss": 2.62, + "step": 248500 + }, + { + "epoch": 1.23, + "learning_rate": 4.93834143368856e-05, + "loss": 2.6266, + "step": 249000 + }, + { + "epoch": 1.24, + "learning_rate": 4.938217575045952e-05, + "loss": 2.612, + "step": 249500 + }, + { + "epoch": 1.24, + "learning_rate": 4.9380937164033434e-05, + "loss": 2.6358, + "step": 250000 + }, + { + "epoch": 1.24, + "learning_rate": 4.937969857760735e-05, + "loss": 2.6289, + "step": 250500 + }, + { + "epoch": 1.24, + "learning_rate": 4.937846246835412e-05, + "loss": 2.6102, + "step": 251000 + }, + { + "epoch": 1.25, + "learning_rate": 4.9377223881928037e-05, + "loss": 2.6004, + "step": 251500 + }, + { + "epoch": 1.25, + "learning_rate": 4.9375985295501954e-05, + "loss": 2.607, + "step": 252000 + }, + { + "epoch": 1.25, + "learning_rate": 4.937475166342157e-05, + "loss": 2.6224, + "step": 252500 + }, + { + "epoch": 1.25, + "learning_rate": 4.9373513076995484e-05, + "loss": 2.6319, + "step": 253000 + }, + { + "epoch": 1.26, + "learning_rate": 4.93722744905694e-05, + "loss": 2.6201, + "step": 253500 + }, + { + "epoch": 1.26, + "learning_rate": 4.937103590414332e-05, + "loss": 2.605, + "step": 254000 + }, + { + "epoch": 1.26, + "learning_rate": 4.9369797317717235e-05, + "loss": 2.6106, + "step": 254500 + }, + { + "epoch": 1.26, + "learning_rate": 4.936855873129115e-05, + "loss": 2.5989, + "step": 255000 + }, + { + "epoch": 1.27, + "learning_rate": 4.936732014486507e-05, + "loss": 2.6324, + "step": 255500 + }, + { + "epoch": 1.27, + "learning_rate": 4.9366081558438986e-05, + "loss": 2.615, + "step": 256000 + }, + { + "epoch": 1.27, + "learning_rate": 4.9364845449185755e-05, + "loss": 2.6136, + "step": 256500 + }, + { + "epoch": 1.27, + "learning_rate": 4.936360686275967e-05, + "loss": 2.6346, + "step": 257000 + }, + { + "epoch": 1.28, + "learning_rate": 4.936236827633359e-05, + "loss": 2.6109, + "step": 257500 + }, + { + "epoch": 1.28, + "learning_rate": 4.9361129689907506e-05, + "loss": 2.629, + "step": 258000 + }, + { + "epoch": 1.28, + "learning_rate": 4.935989110348142e-05, + "loss": 2.6382, + "step": 258500 + }, + { + "epoch": 1.28, + "learning_rate": 4.935865251705534e-05, + "loss": 2.652, + "step": 259000 + }, + { + "epoch": 1.29, + "learning_rate": 4.9357413930629256e-05, + "loss": 2.5949, + "step": 259500 + }, + { + "epoch": 1.29, + "learning_rate": 4.935617534420317e-05, + "loss": 2.6228, + "step": 260000 + }, + { + "epoch": 1.29, + "learning_rate": 4.935493675777709e-05, + "loss": 2.5946, + "step": 260500 + }, + { + "epoch": 1.29, + "learning_rate": 4.935369817135101e-05, + "loss": 2.6195, + "step": 261000 + }, + { + "epoch": 1.3, + "learning_rate": 4.935245958492492e-05, + "loss": 2.6184, + "step": 261500 + }, + { + "epoch": 1.3, + "learning_rate": 4.9351223475671686e-05, + "loss": 2.6138, + "step": 262000 + }, + { + "epoch": 1.3, + "learning_rate": 4.93499848892456e-05, + "loss": 2.5975, + "step": 262500 + }, + { + "epoch": 1.3, + "learning_rate": 4.934874630281952e-05, + "loss": 2.6182, + "step": 263000 + }, + { + "epoch": 1.31, + "learning_rate": 4.934750771639344e-05, + "loss": 2.6013, + "step": 263500 + }, + { + "epoch": 1.31, + "learning_rate": 4.9346269129967354e-05, + "loss": 2.6424, + "step": 264000 + }, + { + "epoch": 1.31, + "learning_rate": 4.934503302071412e-05, + "loss": 2.6397, + "step": 264500 + }, + { + "epoch": 1.31, + "learning_rate": 4.934379443428804e-05, + "loss": 2.6183, + "step": 265000 + }, + { + "epoch": 1.32, + "learning_rate": 4.9342555847861956e-05, + "loss": 2.6236, + "step": 265500 + }, + { + "epoch": 1.32, + "learning_rate": 4.934131726143587e-05, + "loss": 2.6169, + "step": 266000 + }, + { + "epoch": 1.32, + "learning_rate": 4.934007867500979e-05, + "loss": 2.6104, + "step": 266500 + }, + { + "epoch": 1.32, + "learning_rate": 4.933884008858371e-05, + "loss": 2.6056, + "step": 267000 + }, + { + "epoch": 1.33, + "learning_rate": 4.9337601502157624e-05, + "loss": 2.6044, + "step": 267500 + }, + { + "epoch": 1.33, + "learning_rate": 4.9336362915731534e-05, + "loss": 2.5988, + "step": 268000 + }, + { + "epoch": 1.33, + "learning_rate": 4.933512432930545e-05, + "loss": 2.64, + "step": 268500 + }, + { + "epoch": 1.33, + "learning_rate": 4.933388822005222e-05, + "loss": 2.6459, + "step": 269000 + }, + { + "epoch": 1.34, + "learning_rate": 4.933264963362614e-05, + "loss": 2.6069, + "step": 269500 + }, + { + "epoch": 1.34, + "learning_rate": 4.9331413524372906e-05, + "loss": 2.6332, + "step": 270000 + }, + { + "epoch": 1.34, + "learning_rate": 4.933017493794682e-05, + "loss": 2.6211, + "step": 270500 + }, + { + "epoch": 1.34, + "learning_rate": 4.932893635152074e-05, + "loss": 2.637, + "step": 271000 + }, + { + "epoch": 1.35, + "learning_rate": 4.9327697765094657e-05, + "loss": 2.6399, + "step": 271500 + }, + { + "epoch": 1.35, + "learning_rate": 4.9326459178668573e-05, + "loss": 2.6234, + "step": 272000 + }, + { + "epoch": 1.35, + "learning_rate": 4.9325223069415336e-05, + "loss": 2.6221, + "step": 272500 + }, + { + "epoch": 1.35, + "learning_rate": 4.932398448298925e-05, + "loss": 2.6133, + "step": 273000 + }, + { + "epoch": 1.36, + "learning_rate": 4.932274589656317e-05, + "loss": 2.614, + "step": 273500 + }, + { + "epoch": 1.36, + "learning_rate": 4.9321507310137086e-05, + "loss": 2.6026, + "step": 274000 + }, + { + "epoch": 1.36, + "learning_rate": 4.9320268723711e-05, + "loss": 2.5999, + "step": 274500 + }, + { + "epoch": 1.36, + "learning_rate": 4.931903013728492e-05, + "loss": 2.6374, + "step": 275000 + }, + { + "epoch": 1.36, + "learning_rate": 4.931779155085884e-05, + "loss": 2.6182, + "step": 275500 + }, + { + "epoch": 1.37, + "learning_rate": 4.9316552964432754e-05, + "loss": 2.6048, + "step": 276000 + }, + { + "epoch": 1.37, + "learning_rate": 4.931531685517952e-05, + "loss": 2.6308, + "step": 276500 + }, + { + "epoch": 1.37, + "learning_rate": 4.931408074592629e-05, + "loss": 2.6062, + "step": 277000 + }, + { + "epoch": 1.37, + "learning_rate": 4.931284215950021e-05, + "loss": 2.6164, + "step": 277500 + }, + { + "epoch": 1.38, + "learning_rate": 4.9311603573074126e-05, + "loss": 2.6278, + "step": 278000 + }, + { + "epoch": 1.38, + "learning_rate": 4.9310364986648036e-05, + "loss": 2.6067, + "step": 278500 + }, + { + "epoch": 1.38, + "learning_rate": 4.930912640022195e-05, + "loss": 2.6246, + "step": 279000 + }, + { + "epoch": 1.38, + "learning_rate": 4.930788781379587e-05, + "loss": 2.6262, + "step": 279500 + }, + { + "epoch": 1.39, + "learning_rate": 4.9306649227369786e-05, + "loss": 2.6116, + "step": 280000 + }, + { + "epoch": 1.39, + "learning_rate": 4.93054106409437e-05, + "loss": 2.6325, + "step": 280500 + }, + { + "epoch": 1.39, + "learning_rate": 4.930417453169047e-05, + "loss": 2.5979, + "step": 281000 + }, + { + "epoch": 1.39, + "learning_rate": 4.930293594526439e-05, + "loss": 2.6106, + "step": 281500 + }, + { + "epoch": 1.4, + "learning_rate": 4.9301699836011165e-05, + "loss": 2.6094, + "step": 282000 + }, + { + "epoch": 1.4, + "learning_rate": 4.9300461249585075e-05, + "loss": 2.6065, + "step": 282500 + }, + { + "epoch": 1.4, + "learning_rate": 4.929922266315899e-05, + "loss": 2.5977, + "step": 283000 + }, + { + "epoch": 1.4, + "learning_rate": 4.929798407673291e-05, + "loss": 2.6309, + "step": 283500 + }, + { + "epoch": 1.41, + "learning_rate": 4.9296745490306826e-05, + "loss": 2.6134, + "step": 284000 + }, + { + "epoch": 1.41, + "learning_rate": 4.929550690388074e-05, + "loss": 2.6412, + "step": 284500 + }, + { + "epoch": 1.41, + "learning_rate": 4.929426831745465e-05, + "loss": 2.6118, + "step": 285000 + }, + { + "epoch": 1.41, + "learning_rate": 4.929302973102857e-05, + "loss": 2.5835, + "step": 285500 + }, + { + "epoch": 1.42, + "learning_rate": 4.9291791144602487e-05, + "loss": 2.5993, + "step": 286000 + }, + { + "epoch": 1.42, + "learning_rate": 4.9290555035349255e-05, + "loss": 2.614, + "step": 286500 + }, + { + "epoch": 1.42, + "learning_rate": 4.928931892609603e-05, + "loss": 2.5988, + "step": 287000 + }, + { + "epoch": 1.42, + "learning_rate": 4.928808033966995e-05, + "loss": 2.6119, + "step": 287500 + }, + { + "epoch": 1.43, + "learning_rate": 4.9286841753243865e-05, + "loss": 2.6121, + "step": 288000 + }, + { + "epoch": 1.43, + "learning_rate": 4.928560316681778e-05, + "loss": 2.6205, + "step": 288500 + }, + { + "epoch": 1.43, + "learning_rate": 4.92843645803917e-05, + "loss": 2.6136, + "step": 289000 + }, + { + "epoch": 1.43, + "learning_rate": 4.928312847113846e-05, + "loss": 2.6125, + "step": 289500 + }, + { + "epoch": 1.44, + "learning_rate": 4.928188988471238e-05, + "loss": 2.6199, + "step": 290000 + }, + { + "epoch": 1.44, + "learning_rate": 4.9280651298286295e-05, + "loss": 2.612, + "step": 290500 + }, + { + "epoch": 1.44, + "learning_rate": 4.927941271186021e-05, + "loss": 2.5957, + "step": 291000 + }, + { + "epoch": 1.44, + "learning_rate": 4.927817412543413e-05, + "loss": 2.5938, + "step": 291500 + }, + { + "epoch": 1.45, + "learning_rate": 4.9276935539008045e-05, + "loss": 2.6138, + "step": 292000 + }, + { + "epoch": 1.45, + "learning_rate": 4.9275699429754814e-05, + "loss": 2.6162, + "step": 292500 + }, + { + "epoch": 1.45, + "learning_rate": 4.927446084332873e-05, + "loss": 2.6214, + "step": 293000 + }, + { + "epoch": 1.45, + "learning_rate": 4.927322225690265e-05, + "loss": 2.6009, + "step": 293500 + }, + { + "epoch": 1.46, + "learning_rate": 4.9271983670476565e-05, + "loss": 2.6031, + "step": 294000 + }, + { + "epoch": 1.46, + "learning_rate": 4.927074508405048e-05, + "loss": 2.5915, + "step": 294500 + }, + { + "epoch": 1.46, + "learning_rate": 4.92695064976244e-05, + "loss": 2.6289, + "step": 295000 + }, + { + "epoch": 1.46, + "learning_rate": 4.9268267911198316e-05, + "loss": 2.6141, + "step": 295500 + }, + { + "epoch": 1.47, + "learning_rate": 4.9267029324772226e-05, + "loss": 2.6233, + "step": 296000 + }, + { + "epoch": 1.47, + "learning_rate": 4.926579073834614e-05, + "loss": 2.6347, + "step": 296500 + }, + { + "epoch": 1.47, + "learning_rate": 4.926455215192006e-05, + "loss": 2.619, + "step": 297000 + }, + { + "epoch": 1.47, + "learning_rate": 4.926331604266683e-05, + "loss": 2.5973, + "step": 297500 + }, + { + "epoch": 1.48, + "learning_rate": 4.9262077456240745e-05, + "loss": 2.5999, + "step": 298000 + }, + { + "epoch": 1.48, + "learning_rate": 4.926083886981466e-05, + "loss": 2.6199, + "step": 298500 + }, + { + "epoch": 1.48, + "learning_rate": 4.925960028338857e-05, + "loss": 2.6028, + "step": 299000 + }, + { + "epoch": 1.48, + "learning_rate": 4.925836417413535e-05, + "loss": 2.5981, + "step": 299500 + }, + { + "epoch": 1.49, + "learning_rate": 4.925712806488211e-05, + "loss": 2.6159, + "step": 300000 + }, + { + "epoch": 1.49, + "learning_rate": 4.925588947845603e-05, + "loss": 2.6048, + "step": 300500 + }, + { + "epoch": 1.49, + "learning_rate": 4.9254650892029944e-05, + "loss": 2.5982, + "step": 301000 + }, + { + "epoch": 1.49, + "learning_rate": 4.925341230560386e-05, + "loss": 2.6199, + "step": 301500 + }, + { + "epoch": 1.5, + "learning_rate": 4.925217619635063e-05, + "loss": 2.6351, + "step": 302000 + }, + { + "epoch": 1.5, + "learning_rate": 4.925093760992455e-05, + "loss": 2.6151, + "step": 302500 + }, + { + "epoch": 1.5, + "learning_rate": 4.9249701500671315e-05, + "loss": 2.5925, + "step": 303000 + }, + { + "epoch": 1.5, + "learning_rate": 4.924846291424523e-05, + "loss": 2.5977, + "step": 303500 + }, + { + "epoch": 1.51, + "learning_rate": 4.924722432781915e-05, + "loss": 2.6201, + "step": 304000 + }, + { + "epoch": 1.51, + "learning_rate": 4.9245985741393066e-05, + "loss": 2.6132, + "step": 304500 + }, + { + "epoch": 1.51, + "learning_rate": 4.924474715496698e-05, + "loss": 2.6258, + "step": 305000 + }, + { + "epoch": 1.51, + "learning_rate": 4.92435085685409e-05, + "loss": 2.6223, + "step": 305500 + }, + { + "epoch": 1.52, + "learning_rate": 4.924226998211482e-05, + "loss": 2.6127, + "step": 306000 + }, + { + "epoch": 1.52, + "learning_rate": 4.924103139568873e-05, + "loss": 2.6071, + "step": 306500 + }, + { + "epoch": 1.52, + "learning_rate": 4.9239795286435496e-05, + "loss": 2.6147, + "step": 307000 + }, + { + "epoch": 1.52, + "learning_rate": 4.923855670000941e-05, + "loss": 2.6054, + "step": 307500 + }, + { + "epoch": 1.53, + "learning_rate": 4.923731811358333e-05, + "loss": 2.597, + "step": 308000 + }, + { + "epoch": 1.53, + "learning_rate": 4.9236082004330105e-05, + "loss": 2.599, + "step": 308500 + }, + { + "epoch": 1.53, + "learning_rate": 4.9234843417904016e-05, + "loss": 2.5992, + "step": 309000 + }, + { + "epoch": 1.53, + "learning_rate": 4.923360483147793e-05, + "loss": 2.6026, + "step": 309500 + }, + { + "epoch": 1.54, + "learning_rate": 4.923236624505185e-05, + "loss": 2.5938, + "step": 310000 + }, + { + "epoch": 1.54, + "learning_rate": 4.9231127658625766e-05, + "loss": 2.6023, + "step": 310500 + }, + { + "epoch": 1.54, + "learning_rate": 4.922988907219968e-05, + "loss": 2.586, + "step": 311000 + }, + { + "epoch": 1.54, + "learning_rate": 4.92286504857736e-05, + "loss": 2.6111, + "step": 311500 + }, + { + "epoch": 1.55, + "learning_rate": 4.922741189934752e-05, + "loss": 2.6103, + "step": 312000 + }, + { + "epoch": 1.55, + "learning_rate": 4.922617579009428e-05, + "loss": 2.598, + "step": 312500 + }, + { + "epoch": 1.55, + "learning_rate": 4.9224937203668196e-05, + "loss": 2.5982, + "step": 313000 + }, + { + "epoch": 1.55, + "learning_rate": 4.922369861724211e-05, + "loss": 2.6226, + "step": 313500 + }, + { + "epoch": 1.56, + "learning_rate": 4.922246250798889e-05, + "loss": 2.6292, + "step": 314000 + }, + { + "epoch": 1.56, + "learning_rate": 4.9221223921562806e-05, + "loss": 2.6045, + "step": 314500 + }, + { + "epoch": 1.56, + "learning_rate": 4.921998533513672e-05, + "loss": 2.596, + "step": 315000 + }, + { + "epoch": 1.56, + "learning_rate": 4.921874674871063e-05, + "loss": 2.5956, + "step": 315500 + }, + { + "epoch": 1.57, + "learning_rate": 4.921750816228455e-05, + "loss": 2.5923, + "step": 316000 + }, + { + "epoch": 1.57, + "learning_rate": 4.9216269575858467e-05, + "loss": 2.5815, + "step": 316500 + }, + { + "epoch": 1.57, + "learning_rate": 4.9215030989432383e-05, + "loss": 2.6302, + "step": 317000 + }, + { + "epoch": 1.57, + "learning_rate": 4.92137924030063e-05, + "loss": 2.5967, + "step": 317500 + }, + { + "epoch": 1.58, + "learning_rate": 4.921255381658022e-05, + "loss": 2.611, + "step": 318000 + }, + { + "epoch": 1.58, + "learning_rate": 4.9211315230154134e-05, + "loss": 2.5912, + "step": 318500 + }, + { + "epoch": 1.58, + "learning_rate": 4.921007664372805e-05, + "loss": 2.6341, + "step": 319000 + }, + { + "epoch": 1.58, + "learning_rate": 4.920883805730197e-05, + "loss": 2.5869, + "step": 319500 + }, + { + "epoch": 1.59, + "learning_rate": 4.920759947087588e-05, + "loss": 2.648, + "step": 320000 + }, + { + "epoch": 1.59, + "learning_rate": 4.9206360884449795e-05, + "loss": 2.6136, + "step": 320500 + }, + { + "epoch": 1.59, + "learning_rate": 4.9205124775196564e-05, + "loss": 2.5998, + "step": 321000 + }, + { + "epoch": 1.59, + "learning_rate": 4.920388618877048e-05, + "loss": 2.5955, + "step": 321500 + }, + { + "epoch": 1.6, + "learning_rate": 4.920265007951725e-05, + "loss": 2.5852, + "step": 322000 + }, + { + "epoch": 1.6, + "learning_rate": 4.920141149309117e-05, + "loss": 2.6069, + "step": 322500 + }, + { + "epoch": 1.6, + "learning_rate": 4.9200172906665084e-05, + "loss": 2.5923, + "step": 323000 + }, + { + "epoch": 1.6, + "learning_rate": 4.9198934320239e-05, + "loss": 2.6257, + "step": 323500 + }, + { + "epoch": 1.61, + "learning_rate": 4.919769573381292e-05, + "loss": 2.6118, + "step": 324000 + }, + { + "epoch": 1.61, + "learning_rate": 4.9196459624559686e-05, + "loss": 2.6192, + "step": 324500 + }, + { + "epoch": 1.61, + "learning_rate": 4.9195221038133596e-05, + "loss": 2.6028, + "step": 325000 + }, + { + "epoch": 1.61, + "learning_rate": 4.919398245170751e-05, + "loss": 2.6064, + "step": 325500 + }, + { + "epoch": 1.62, + "learning_rate": 4.919274386528143e-05, + "loss": 2.6158, + "step": 326000 + }, + { + "epoch": 1.62, + "learning_rate": 4.919150527885535e-05, + "loss": 2.5847, + "step": 326500 + }, + { + "epoch": 1.62, + "learning_rate": 4.9190266692429264e-05, + "loss": 2.5866, + "step": 327000 + }, + { + "epoch": 1.62, + "learning_rate": 4.918902810600318e-05, + "loss": 2.6172, + "step": 327500 + }, + { + "epoch": 1.63, + "learning_rate": 4.91877895195771e-05, + "loss": 2.6314, + "step": 328000 + }, + { + "epoch": 1.63, + "learning_rate": 4.918655341032387e-05, + "loss": 2.6196, + "step": 328500 + }, + { + "epoch": 1.63, + "learning_rate": 4.9185317301070636e-05, + "loss": 2.6346, + "step": 329000 + }, + { + "epoch": 1.63, + "learning_rate": 4.918407871464455e-05, + "loss": 2.6118, + "step": 329500 + }, + { + "epoch": 1.63, + "learning_rate": 4.918284012821847e-05, + "loss": 2.5716, + "step": 330000 + }, + { + "epoch": 1.64, + "learning_rate": 4.9181601541792386e-05, + "loss": 2.626, + "step": 330500 + }, + { + "epoch": 1.64, + "learning_rate": 4.9180362955366297e-05, + "loss": 2.6039, + "step": 331000 + }, + { + "epoch": 1.64, + "learning_rate": 4.9179124368940213e-05, + "loss": 2.5957, + "step": 331500 + }, + { + "epoch": 1.64, + "learning_rate": 4.917788825968699e-05, + "loss": 2.6115, + "step": 332000 + }, + { + "epoch": 1.65, + "learning_rate": 4.9176649673260906e-05, + "loss": 2.6013, + "step": 332500 + }, + { + "epoch": 1.65, + "learning_rate": 4.917541108683482e-05, + "loss": 2.6131, + "step": 333000 + }, + { + "epoch": 1.65, + "learning_rate": 4.917417250040874e-05, + "loss": 2.5955, + "step": 333500 + }, + { + "epoch": 1.65, + "learning_rate": 4.917293391398265e-05, + "loss": 2.615, + "step": 334000 + }, + { + "epoch": 1.66, + "learning_rate": 4.917169780472942e-05, + "loss": 2.5883, + "step": 334500 + }, + { + "epoch": 1.66, + "learning_rate": 4.9170459218303336e-05, + "loss": 2.6009, + "step": 335000 + }, + { + "epoch": 1.66, + "learning_rate": 4.916922063187725e-05, + "loss": 2.6009, + "step": 335500 + }, + { + "epoch": 1.66, + "learning_rate": 4.916798204545117e-05, + "loss": 2.6285, + "step": 336000 + }, + { + "epoch": 1.67, + "learning_rate": 4.916674593619794e-05, + "loss": 2.6007, + "step": 336500 + }, + { + "epoch": 1.67, + "learning_rate": 4.9165507349771855e-05, + "loss": 2.5952, + "step": 337000 + }, + { + "epoch": 1.67, + "learning_rate": 4.916426876334577e-05, + "loss": 2.5738, + "step": 337500 + }, + { + "epoch": 1.67, + "learning_rate": 4.916303017691969e-05, + "loss": 2.6101, + "step": 338000 + }, + { + "epoch": 1.68, + "learning_rate": 4.916179406766646e-05, + "loss": 2.5811, + "step": 338500 + }, + { + "epoch": 1.68, + "learning_rate": 4.9160555481240375e-05, + "loss": 2.5833, + "step": 339000 + }, + { + "epoch": 1.68, + "learning_rate": 4.915931689481429e-05, + "loss": 2.6013, + "step": 339500 + }, + { + "epoch": 1.68, + "learning_rate": 4.915807830838821e-05, + "loss": 2.6163, + "step": 340000 + }, + { + "epoch": 1.69, + "learning_rate": 4.9156839721962126e-05, + "loss": 2.5969, + "step": 340500 + }, + { + "epoch": 1.69, + "learning_rate": 4.9155601135536036e-05, + "loss": 2.5872, + "step": 341000 + }, + { + "epoch": 1.69, + "learning_rate": 4.9154365026282805e-05, + "loss": 2.5693, + "step": 341500 + }, + { + "epoch": 1.69, + "learning_rate": 4.915312643985672e-05, + "loss": 2.5994, + "step": 342000 + }, + { + "epoch": 1.7, + "learning_rate": 4.915188785343064e-05, + "loss": 2.6187, + "step": 342500 + }, + { + "epoch": 1.7, + "learning_rate": 4.9150649267004555e-05, + "loss": 2.5982, + "step": 343000 + }, + { + "epoch": 1.7, + "learning_rate": 4.914941068057847e-05, + "loss": 2.5968, + "step": 343500 + }, + { + "epoch": 1.7, + "learning_rate": 4.914817209415239e-05, + "loss": 2.5756, + "step": 344000 + }, + { + "epoch": 1.71, + "learning_rate": 4.9146933507726306e-05, + "loss": 2.5962, + "step": 344500 + }, + { + "epoch": 1.71, + "learning_rate": 4.914569492130022e-05, + "loss": 2.6133, + "step": 345000 + }, + { + "epoch": 1.71, + "learning_rate": 4.914445881204699e-05, + "loss": 2.6052, + "step": 345500 + }, + { + "epoch": 1.71, + "learning_rate": 4.914322022562091e-05, + "loss": 2.6143, + "step": 346000 + }, + { + "epoch": 1.72, + "learning_rate": 4.9141981639194826e-05, + "loss": 2.6077, + "step": 346500 + }, + { + "epoch": 1.72, + "learning_rate": 4.914074552994159e-05, + "loss": 2.6198, + "step": 347000 + }, + { + "epoch": 1.72, + "learning_rate": 4.9139506943515505e-05, + "loss": 2.6435, + "step": 347500 + }, + { + "epoch": 1.72, + "learning_rate": 4.913826835708942e-05, + "loss": 2.6145, + "step": 348000 + }, + { + "epoch": 1.73, + "learning_rate": 4.913702977066334e-05, + "loss": 2.5931, + "step": 348500 + }, + { + "epoch": 1.73, + "learning_rate": 4.9135791184237256e-05, + "loss": 2.6031, + "step": 349000 + }, + { + "epoch": 1.73, + "learning_rate": 4.913455259781117e-05, + "loss": 2.599, + "step": 349500 + }, + { + "epoch": 1.73, + "learning_rate": 4.913331401138509e-05, + "loss": 2.6163, + "step": 350000 + }, + { + "epoch": 1.74, + "learning_rate": 4.9132075424959006e-05, + "loss": 2.6104, + "step": 350500 + }, + { + "epoch": 1.74, + "learning_rate": 4.913083683853292e-05, + "loss": 2.6251, + "step": 351000 + }, + { + "epoch": 1.74, + "learning_rate": 4.912959825210684e-05, + "loss": 2.6044, + "step": 351500 + }, + { + "epoch": 1.74, + "learning_rate": 4.912835966568076e-05, + "loss": 2.6131, + "step": 352000 + }, + { + "epoch": 1.75, + "learning_rate": 4.9127121079254674e-05, + "loss": 2.5796, + "step": 352500 + }, + { + "epoch": 1.75, + "learning_rate": 4.9125882492828584e-05, + "loss": 2.6012, + "step": 353000 + }, + { + "epoch": 1.75, + "learning_rate": 4.912464638357536e-05, + "loss": 2.6166, + "step": 353500 + }, + { + "epoch": 1.75, + "learning_rate": 4.912340779714928e-05, + "loss": 2.6058, + "step": 354000 + }, + { + "epoch": 1.76, + "learning_rate": 4.912216921072319e-05, + "loss": 2.6076, + "step": 354500 + }, + { + "epoch": 1.76, + "learning_rate": 4.9120930624297104e-05, + "loss": 2.5742, + "step": 355000 + }, + { + "epoch": 1.76, + "learning_rate": 4.911969203787102e-05, + "loss": 2.5899, + "step": 355500 + }, + { + "epoch": 1.76, + "learning_rate": 4.911845592861779e-05, + "loss": 2.6106, + "step": 356000 + }, + { + "epoch": 1.77, + "learning_rate": 4.9117217342191706e-05, + "loss": 2.6205, + "step": 356500 + }, + { + "epoch": 1.77, + "learning_rate": 4.9115978755765623e-05, + "loss": 2.6254, + "step": 357000 + }, + { + "epoch": 1.77, + "learning_rate": 4.911474016933954e-05, + "loss": 2.6239, + "step": 357500 + }, + { + "epoch": 1.77, + "learning_rate": 4.911350158291346e-05, + "loss": 2.6001, + "step": 358000 + }, + { + "epoch": 1.78, + "learning_rate": 4.9112262996487374e-05, + "loss": 2.5861, + "step": 358500 + }, + { + "epoch": 1.78, + "learning_rate": 4.911102688723414e-05, + "loss": 2.6104, + "step": 359000 + }, + { + "epoch": 1.78, + "learning_rate": 4.910978830080806e-05, + "loss": 2.5978, + "step": 359500 + }, + { + "epoch": 1.78, + "learning_rate": 4.910854971438198e-05, + "loss": 2.5799, + "step": 360000 + }, + { + "epoch": 1.79, + "learning_rate": 4.910731360512874e-05, + "loss": 2.6136, + "step": 360500 + }, + { + "epoch": 1.79, + "learning_rate": 4.9106075018702656e-05, + "loss": 2.6105, + "step": 361000 + }, + { + "epoch": 1.79, + "learning_rate": 4.910483643227657e-05, + "loss": 2.5947, + "step": 361500 + }, + { + "epoch": 1.79, + "learning_rate": 4.910359784585049e-05, + "loss": 2.5731, + "step": 362000 + }, + { + "epoch": 1.8, + "learning_rate": 4.910236421377011e-05, + "loss": 2.6102, + "step": 362500 + }, + { + "epoch": 1.8, + "learning_rate": 4.910112562734403e-05, + "loss": 2.6048, + "step": 363000 + }, + { + "epoch": 1.8, + "learning_rate": 4.9099887040917944e-05, + "loss": 2.6181, + "step": 363500 + }, + { + "epoch": 1.8, + "learning_rate": 4.909864845449186e-05, + "loss": 2.5967, + "step": 364000 + }, + { + "epoch": 1.81, + "learning_rate": 4.909740986806577e-05, + "loss": 2.6098, + "step": 364500 + }, + { + "epoch": 1.81, + "learning_rate": 4.909617128163969e-05, + "loss": 2.6129, + "step": 365000 + }, + { + "epoch": 1.81, + "learning_rate": 4.9094932695213605e-05, + "loss": 2.5773, + "step": 365500 + }, + { + "epoch": 1.81, + "learning_rate": 4.9093696585960374e-05, + "loss": 2.6096, + "step": 366000 + }, + { + "epoch": 1.82, + "learning_rate": 4.909245799953429e-05, + "loss": 2.6181, + "step": 366500 + }, + { + "epoch": 1.82, + "learning_rate": 4.909121941310821e-05, + "loss": 2.5896, + "step": 367000 + }, + { + "epoch": 1.82, + "learning_rate": 4.9089980826682125e-05, + "loss": 2.5851, + "step": 367500 + }, + { + "epoch": 1.82, + "learning_rate": 4.90887447174289e-05, + "loss": 2.6158, + "step": 368000 + }, + { + "epoch": 1.83, + "learning_rate": 4.908750613100282e-05, + "loss": 2.5965, + "step": 368500 + }, + { + "epoch": 1.83, + "learning_rate": 4.908626754457673e-05, + "loss": 2.6232, + "step": 369000 + }, + { + "epoch": 1.83, + "learning_rate": 4.9085028958150644e-05, + "loss": 2.5897, + "step": 369500 + }, + { + "epoch": 1.83, + "learning_rate": 4.908379037172456e-05, + "loss": 2.577, + "step": 370000 + }, + { + "epoch": 1.84, + "learning_rate": 4.908255178529848e-05, + "loss": 2.6109, + "step": 370500 + }, + { + "epoch": 1.84, + "learning_rate": 4.9081313198872395e-05, + "loss": 2.602, + "step": 371000 + }, + { + "epoch": 1.84, + "learning_rate": 4.9080074612446305e-05, + "loss": 2.5674, + "step": 371500 + }, + { + "epoch": 1.84, + "learning_rate": 4.907883602602022e-05, + "loss": 2.5986, + "step": 372000 + }, + { + "epoch": 1.85, + "learning_rate": 4.907759743959414e-05, + "loss": 2.6051, + "step": 372500 + }, + { + "epoch": 1.85, + "learning_rate": 4.9076358853168056e-05, + "loss": 2.5999, + "step": 373000 + }, + { + "epoch": 1.85, + "learning_rate": 4.907512026674197e-05, + "loss": 2.6057, + "step": 373500 + }, + { + "epoch": 1.85, + "learning_rate": 4.907388168031589e-05, + "loss": 2.5942, + "step": 374000 + }, + { + "epoch": 1.86, + "learning_rate": 4.907264309388981e-05, + "loss": 2.6297, + "step": 374500 + }, + { + "epoch": 1.86, + "learning_rate": 4.9071404507463724e-05, + "loss": 2.5977, + "step": 375000 + }, + { + "epoch": 1.86, + "learning_rate": 4.907016592103764e-05, + "loss": 2.5997, + "step": 375500 + }, + { + "epoch": 1.86, + "learning_rate": 4.906892733461156e-05, + "loss": 2.6261, + "step": 376000 + }, + { + "epoch": 1.87, + "learning_rate": 4.906769370253118e-05, + "loss": 2.6139, + "step": 376500 + }, + { + "epoch": 1.87, + "learning_rate": 4.9066455116105095e-05, + "loss": 2.6229, + "step": 377000 + }, + { + "epoch": 1.87, + "learning_rate": 4.906521652967901e-05, + "loss": 2.6, + "step": 377500 + }, + { + "epoch": 1.87, + "learning_rate": 4.906397794325292e-05, + "loss": 2.5966, + "step": 378000 + }, + { + "epoch": 1.88, + "learning_rate": 4.906273935682684e-05, + "loss": 2.5734, + "step": 378500 + }, + { + "epoch": 1.88, + "learning_rate": 4.9061500770400756e-05, + "loss": 2.6094, + "step": 379000 + }, + { + "epoch": 1.88, + "learning_rate": 4.906026218397467e-05, + "loss": 2.5933, + "step": 379500 + }, + { + "epoch": 1.88, + "learning_rate": 4.905902359754859e-05, + "loss": 2.6042, + "step": 380000 + }, + { + "epoch": 1.89, + "learning_rate": 4.905778501112251e-05, + "loss": 2.6221, + "step": 380500 + }, + { + "epoch": 1.89, + "learning_rate": 4.9056548901869276e-05, + "loss": 2.5897, + "step": 381000 + }, + { + "epoch": 1.89, + "learning_rate": 4.905531279261605e-05, + "loss": 2.5985, + "step": 381500 + }, + { + "epoch": 1.89, + "learning_rate": 4.9054076683362813e-05, + "loss": 2.6131, + "step": 382000 + }, + { + "epoch": 1.9, + "learning_rate": 4.905283809693673e-05, + "loss": 2.6016, + "step": 382500 + }, + { + "epoch": 1.9, + "learning_rate": 4.905159951051065e-05, + "loss": 2.6079, + "step": 383000 + }, + { + "epoch": 1.9, + "learning_rate": 4.9050360924084564e-05, + "loss": 2.6093, + "step": 383500 + }, + { + "epoch": 1.9, + "learning_rate": 4.904912233765848e-05, + "loss": 2.6203, + "step": 384000 + }, + { + "epoch": 1.9, + "learning_rate": 4.90478837512324e-05, + "loss": 2.6072, + "step": 384500 + }, + { + "epoch": 1.91, + "learning_rate": 4.904664516480631e-05, + "loss": 2.5929, + "step": 385000 + }, + { + "epoch": 1.91, + "learning_rate": 4.9045406578380225e-05, + "loss": 2.6029, + "step": 385500 + }, + { + "epoch": 1.91, + "learning_rate": 4.9044170469127e-05, + "loss": 2.6103, + "step": 386000 + }, + { + "epoch": 1.91, + "learning_rate": 4.904293188270092e-05, + "loss": 2.5676, + "step": 386500 + }, + { + "epoch": 1.92, + "learning_rate": 4.9041693296274835e-05, + "loss": 2.6016, + "step": 387000 + }, + { + "epoch": 1.92, + "learning_rate": 4.904045470984875e-05, + "loss": 2.5875, + "step": 387500 + }, + { + "epoch": 1.92, + "learning_rate": 4.903921612342266e-05, + "loss": 2.6147, + "step": 388000 + }, + { + "epoch": 1.92, + "learning_rate": 4.903797753699658e-05, + "loss": 2.573, + "step": 388500 + }, + { + "epoch": 1.93, + "learning_rate": 4.903674142774335e-05, + "loss": 2.6102, + "step": 389000 + }, + { + "epoch": 1.93, + "learning_rate": 4.9035502841317264e-05, + "loss": 2.6018, + "step": 389500 + }, + { + "epoch": 1.93, + "learning_rate": 4.903426425489118e-05, + "loss": 2.5944, + "step": 390000 + }, + { + "epoch": 1.93, + "learning_rate": 4.90330256684651e-05, + "loss": 2.6172, + "step": 390500 + }, + { + "epoch": 1.94, + "learning_rate": 4.903178708203901e-05, + "loss": 2.5899, + "step": 391000 + }, + { + "epoch": 1.94, + "learning_rate": 4.9030548495612925e-05, + "loss": 2.5876, + "step": 391500 + }, + { + "epoch": 1.94, + "learning_rate": 4.902930990918684e-05, + "loss": 2.5859, + "step": 392000 + }, + { + "epoch": 1.94, + "learning_rate": 4.902807132276076e-05, + "loss": 2.586, + "step": 392500 + }, + { + "epoch": 1.95, + "learning_rate": 4.9026832736334676e-05, + "loss": 2.6084, + "step": 393000 + }, + { + "epoch": 1.95, + "learning_rate": 4.902559662708145e-05, + "loss": 2.6038, + "step": 393500 + }, + { + "epoch": 1.95, + "learning_rate": 4.9024360517828214e-05, + "loss": 2.5836, + "step": 394000 + }, + { + "epoch": 1.95, + "learning_rate": 4.902312193140213e-05, + "loss": 2.5929, + "step": 394500 + }, + { + "epoch": 1.96, + "learning_rate": 4.90218858221489e-05, + "loss": 2.5957, + "step": 395000 + }, + { + "epoch": 1.96, + "learning_rate": 4.9020647235722816e-05, + "loss": 2.582, + "step": 395500 + }, + { + "epoch": 1.96, + "learning_rate": 4.901940864929673e-05, + "loss": 2.6287, + "step": 396000 + }, + { + "epoch": 1.96, + "learning_rate": 4.901817006287065e-05, + "loss": 2.6059, + "step": 396500 + }, + { + "epoch": 1.97, + "learning_rate": 4.901693147644457e-05, + "loss": 2.5855, + "step": 397000 + }, + { + "epoch": 1.97, + "learning_rate": 4.9015692890018484e-05, + "loss": 2.5951, + "step": 397500 + }, + { + "epoch": 1.97, + "learning_rate": 4.90144543035924e-05, + "loss": 2.5817, + "step": 398000 + }, + { + "epoch": 1.97, + "learning_rate": 4.901321571716632e-05, + "loss": 2.6157, + "step": 398500 + }, + { + "epoch": 1.98, + "learning_rate": 4.9011977130740235e-05, + "loss": 2.596, + "step": 399000 + }, + { + "epoch": 1.98, + "learning_rate": 4.901073854431415e-05, + "loss": 2.5786, + "step": 399500 + }, + { + "epoch": 1.98, + "learning_rate": 4.900949995788807e-05, + "loss": 2.5946, + "step": 400000 + }, + { + "epoch": 1.98, + "learning_rate": 4.900826384863483e-05, + "loss": 2.6155, + "step": 400500 + }, + { + "epoch": 1.99, + "learning_rate": 4.900702526220875e-05, + "loss": 2.5889, + "step": 401000 + }, + { + "epoch": 1.99, + "learning_rate": 4.9005786675782665e-05, + "loss": 2.5796, + "step": 401500 + }, + { + "epoch": 1.99, + "learning_rate": 4.900454808935658e-05, + "loss": 2.5993, + "step": 402000 + }, + { + "epoch": 1.99, + "learning_rate": 4.900331198010335e-05, + "loss": 2.5894, + "step": 402500 + }, + { + "epoch": 2.0, + "learning_rate": 4.900207339367727e-05, + "loss": 2.6215, + "step": 403000 + }, + { + "epoch": 2.0, + "learning_rate": 4.9000834807251184e-05, + "loss": 2.5994, + "step": 403500 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.6286475772528078, + "eval_accuracy_mlm": 0.5813388926339267, + "eval_accuracy_nsp": 0.8516075133648939, + "eval_loss": 2.524798631668091, + "eval_runtime": 146.063, + "eval_samples_per_second": 1745.542, + "eval_steps_per_second": 72.736, + "step": 403686 + }, + { + "epoch": 2.0, + "learning_rate": 4.89995962208251e-05, + "loss": 2.5887, + "step": 404000 + }, + { + "epoch": 2.0, + "learning_rate": 4.899835763439902e-05, + "loss": 2.5834, + "step": 404500 + }, + { + "epoch": 2.01, + "learning_rate": 4.8997119047972935e-05, + "loss": 2.5753, + "step": 405000 + }, + { + "epoch": 2.01, + "learning_rate": 4.8995882938719704e-05, + "loss": 2.5542, + "step": 405500 + }, + { + "epoch": 2.01, + "learning_rate": 4.8994644352293614e-05, + "loss": 2.5592, + "step": 406000 + }, + { + "epoch": 2.01, + "learning_rate": 4.899340576586753e-05, + "loss": 2.5818, + "step": 406500 + }, + { + "epoch": 2.02, + "learning_rate": 4.899216717944145e-05, + "loss": 2.5571, + "step": 407000 + }, + { + "epoch": 2.02, + "learning_rate": 4.8990928593015365e-05, + "loss": 2.5746, + "step": 407500 + }, + { + "epoch": 2.02, + "learning_rate": 4.8989692483762133e-05, + "loss": 2.5663, + "step": 408000 + }, + { + "epoch": 2.02, + "learning_rate": 4.898845389733605e-05, + "loss": 2.5716, + "step": 408500 + }, + { + "epoch": 2.03, + "learning_rate": 4.898721778808282e-05, + "loss": 2.5595, + "step": 409000 + }, + { + "epoch": 2.03, + "learning_rate": 4.8985979201656736e-05, + "loss": 2.5811, + "step": 409500 + }, + { + "epoch": 2.03, + "learning_rate": 4.898474061523065e-05, + "loss": 2.5488, + "step": 410000 + }, + { + "epoch": 2.03, + "learning_rate": 4.898350202880457e-05, + "loss": 2.5651, + "step": 410500 + }, + { + "epoch": 2.04, + "learning_rate": 4.898226344237849e-05, + "loss": 2.5839, + "step": 411000 + }, + { + "epoch": 2.04, + "learning_rate": 4.8981024855952404e-05, + "loss": 2.577, + "step": 411500 + }, + { + "epoch": 2.04, + "learning_rate": 4.897978626952632e-05, + "loss": 2.5854, + "step": 412000 + }, + { + "epoch": 2.04, + "learning_rate": 4.897855016027308e-05, + "loss": 2.5607, + "step": 412500 + }, + { + "epoch": 2.05, + "learning_rate": 4.8977311573847e-05, + "loss": 2.5672, + "step": 413000 + }, + { + "epoch": 2.05, + "learning_rate": 4.897607298742092e-05, + "loss": 2.5866, + "step": 413500 + }, + { + "epoch": 2.05, + "learning_rate": 4.8974834400994834e-05, + "loss": 2.6007, + "step": 414000 + }, + { + "epoch": 2.05, + "learning_rate": 4.897359581456875e-05, + "loss": 2.5648, + "step": 414500 + }, + { + "epoch": 2.06, + "learning_rate": 4.897235722814267e-05, + "loss": 2.5832, + "step": 415000 + }, + { + "epoch": 2.06, + "learning_rate": 4.8971118641716584e-05, + "loss": 2.5968, + "step": 415500 + }, + { + "epoch": 2.06, + "learning_rate": 4.896988253246335e-05, + "loss": 2.5548, + "step": 416000 + }, + { + "epoch": 2.06, + "learning_rate": 4.896864394603727e-05, + "loss": 2.5688, + "step": 416500 + }, + { + "epoch": 2.07, + "learning_rate": 4.896740535961119e-05, + "loss": 2.5836, + "step": 417000 + }, + { + "epoch": 2.07, + "learning_rate": 4.8966166773185104e-05, + "loss": 2.571, + "step": 417500 + }, + { + "epoch": 2.07, + "learning_rate": 4.896492818675902e-05, + "loss": 2.5721, + "step": 418000 + }, + { + "epoch": 2.07, + "learning_rate": 4.896368960033294e-05, + "loss": 2.561, + "step": 418500 + }, + { + "epoch": 2.08, + "learning_rate": 4.8962451013906855e-05, + "loss": 2.5737, + "step": 419000 + }, + { + "epoch": 2.08, + "learning_rate": 4.8961212427480765e-05, + "loss": 2.559, + "step": 419500 + }, + { + "epoch": 2.08, + "learning_rate": 4.895997384105468e-05, + "loss": 2.5928, + "step": 420000 + }, + { + "epoch": 2.08, + "learning_rate": 4.89587352546286e-05, + "loss": 2.5754, + "step": 420500 + }, + { + "epoch": 2.09, + "learning_rate": 4.895749914537537e-05, + "loss": 2.5631, + "step": 421000 + }, + { + "epoch": 2.09, + "learning_rate": 4.8956260558949285e-05, + "loss": 2.5715, + "step": 421500 + }, + { + "epoch": 2.09, + "learning_rate": 4.89550219725232e-05, + "loss": 2.5942, + "step": 422000 + }, + { + "epoch": 2.09, + "learning_rate": 4.895378338609712e-05, + "loss": 2.5579, + "step": 422500 + }, + { + "epoch": 2.1, + "learning_rate": 4.8952544799671035e-05, + "loss": 2.5612, + "step": 423000 + }, + { + "epoch": 2.1, + "learning_rate": 4.895130621324495e-05, + "loss": 2.5945, + "step": 423500 + }, + { + "epoch": 2.1, + "learning_rate": 4.895007010399172e-05, + "loss": 2.547, + "step": 424000 + }, + { + "epoch": 2.1, + "learning_rate": 4.894883151756564e-05, + "loss": 2.5908, + "step": 424500 + }, + { + "epoch": 2.11, + "learning_rate": 4.8947592931139555e-05, + "loss": 2.5855, + "step": 425000 + }, + { + "epoch": 2.11, + "learning_rate": 4.894635434471347e-05, + "loss": 2.5796, + "step": 425500 + }, + { + "epoch": 2.11, + "learning_rate": 4.894511575828739e-05, + "loss": 2.5654, + "step": 426000 + }, + { + "epoch": 2.11, + "learning_rate": 4.89438771718613e-05, + "loss": 2.5478, + "step": 426500 + }, + { + "epoch": 2.12, + "learning_rate": 4.8942638585435216e-05, + "loss": 2.5889, + "step": 427000 + }, + { + "epoch": 2.12, + "learning_rate": 4.894139999900913e-05, + "loss": 2.5634, + "step": 427500 + }, + { + "epoch": 2.12, + "learning_rate": 4.894016141258305e-05, + "loss": 2.5461, + "step": 428000 + }, + { + "epoch": 2.12, + "learning_rate": 4.893892530332982e-05, + "loss": 2.5366, + "step": 428500 + }, + { + "epoch": 2.13, + "learning_rate": 4.893768919407659e-05, + "loss": 2.5651, + "step": 429000 + }, + { + "epoch": 2.13, + "learning_rate": 4.8936450607650504e-05, + "loss": 2.5911, + "step": 429500 + }, + { + "epoch": 2.13, + "learning_rate": 4.893521202122442e-05, + "loss": 2.5801, + "step": 430000 + }, + { + "epoch": 2.13, + "learning_rate": 4.893397591197118e-05, + "loss": 2.5771, + "step": 430500 + }, + { + "epoch": 2.14, + "learning_rate": 4.89327373255451e-05, + "loss": 2.5864, + "step": 431000 + }, + { + "epoch": 2.14, + "learning_rate": 4.8931501216291876e-05, + "loss": 2.5512, + "step": 431500 + }, + { + "epoch": 2.14, + "learning_rate": 4.893026262986579e-05, + "loss": 2.5651, + "step": 432000 + }, + { + "epoch": 2.14, + "learning_rate": 4.89290240434397e-05, + "loss": 2.5588, + "step": 432500 + }, + { + "epoch": 2.15, + "learning_rate": 4.892778545701362e-05, + "loss": 2.5744, + "step": 433000 + }, + { + "epoch": 2.15, + "learning_rate": 4.892654687058754e-05, + "loss": 2.5913, + "step": 433500 + }, + { + "epoch": 2.15, + "learning_rate": 4.8925308284161454e-05, + "loss": 2.5761, + "step": 434000 + }, + { + "epoch": 2.15, + "learning_rate": 4.892406969773537e-05, + "loss": 2.5819, + "step": 434500 + }, + { + "epoch": 2.16, + "learning_rate": 4.892283111130929e-05, + "loss": 2.5598, + "step": 435000 + }, + { + "epoch": 2.16, + "learning_rate": 4.8921592524883204e-05, + "loss": 2.574, + "step": 435500 + }, + { + "epoch": 2.16, + "learning_rate": 4.892035393845712e-05, + "loss": 2.5842, + "step": 436000 + }, + { + "epoch": 2.16, + "learning_rate": 4.891911535203104e-05, + "loss": 2.5627, + "step": 436500 + }, + { + "epoch": 2.17, + "learning_rate": 4.8917876765604955e-05, + "loss": 2.5587, + "step": 437000 + }, + { + "epoch": 2.17, + "learning_rate": 4.891663817917887e-05, + "loss": 2.5872, + "step": 437500 + }, + { + "epoch": 2.17, + "learning_rate": 4.891539959275279e-05, + "loss": 2.5496, + "step": 438000 + }, + { + "epoch": 2.17, + "learning_rate": 4.891416348349955e-05, + "loss": 2.5884, + "step": 438500 + }, + { + "epoch": 2.17, + "learning_rate": 4.891292489707347e-05, + "loss": 2.5475, + "step": 439000 + }, + { + "epoch": 2.18, + "learning_rate": 4.8911686310647385e-05, + "loss": 2.5432, + "step": 439500 + }, + { + "epoch": 2.18, + "learning_rate": 4.8910450201394154e-05, + "loss": 2.5509, + "step": 440000 + }, + { + "epoch": 2.18, + "learning_rate": 4.890921161496807e-05, + "loss": 2.5689, + "step": 440500 + }, + { + "epoch": 2.18, + "learning_rate": 4.890797302854199e-05, + "loss": 2.5687, + "step": 441000 + }, + { + "epoch": 2.19, + "learning_rate": 4.8906734442115904e-05, + "loss": 2.5651, + "step": 441500 + }, + { + "epoch": 2.19, + "learning_rate": 4.890549585568982e-05, + "loss": 2.5761, + "step": 442000 + }, + { + "epoch": 2.19, + "learning_rate": 4.8904267177955146e-05, + "loss": 2.5642, + "step": 442500 + }, + { + "epoch": 2.19, + "learning_rate": 4.890302859152906e-05, + "loss": 2.5557, + "step": 443000 + }, + { + "epoch": 2.2, + "learning_rate": 4.890179000510298e-05, + "loss": 2.575, + "step": 443500 + }, + { + "epoch": 2.2, + "learning_rate": 4.890055141867689e-05, + "loss": 2.5907, + "step": 444000 + }, + { + "epoch": 2.2, + "learning_rate": 4.889931283225081e-05, + "loss": 2.5731, + "step": 444500 + }, + { + "epoch": 2.2, + "learning_rate": 4.8898074245824724e-05, + "loss": 2.5548, + "step": 445000 + }, + { + "epoch": 2.21, + "learning_rate": 4.889683565939864e-05, + "loss": 2.5467, + "step": 445500 + }, + { + "epoch": 2.21, + "learning_rate": 4.889559707297256e-05, + "loss": 2.592, + "step": 446000 + }, + { + "epoch": 2.21, + "learning_rate": 4.8894358486546475e-05, + "loss": 2.5996, + "step": 446500 + }, + { + "epoch": 2.21, + "learning_rate": 4.889311990012039e-05, + "loss": 2.5768, + "step": 447000 + }, + { + "epoch": 2.22, + "learning_rate": 4.889188131369431e-05, + "loss": 2.5667, + "step": 447500 + }, + { + "epoch": 2.22, + "learning_rate": 4.8890642727268225e-05, + "loss": 2.5699, + "step": 448000 + }, + { + "epoch": 2.22, + "learning_rate": 4.888940414084214e-05, + "loss": 2.5794, + "step": 448500 + }, + { + "epoch": 2.22, + "learning_rate": 4.888816555441606e-05, + "loss": 2.5778, + "step": 449000 + }, + { + "epoch": 2.23, + "learning_rate": 4.8886926967989976e-05, + "loss": 2.5842, + "step": 449500 + }, + { + "epoch": 2.23, + "learning_rate": 4.888568838156389e-05, + "loss": 2.5859, + "step": 450000 + }, + { + "epoch": 2.23, + "learning_rate": 4.888444979513781e-05, + "loss": 2.5658, + "step": 450500 + }, + { + "epoch": 2.23, + "learning_rate": 4.888321120871172e-05, + "loss": 2.5744, + "step": 451000 + }, + { + "epoch": 2.24, + "learning_rate": 4.888197262228564e-05, + "loss": 2.5542, + "step": 451500 + }, + { + "epoch": 2.24, + "learning_rate": 4.8880734035859554e-05, + "loss": 2.5659, + "step": 452000 + }, + { + "epoch": 2.24, + "learning_rate": 4.887949544943347e-05, + "loss": 2.5667, + "step": 452500 + }, + { + "epoch": 2.24, + "learning_rate": 4.887825686300739e-05, + "loss": 2.5916, + "step": 453000 + }, + { + "epoch": 2.25, + "learning_rate": 4.887702075375416e-05, + "loss": 2.5724, + "step": 453500 + }, + { + "epoch": 2.25, + "learning_rate": 4.8875782167328074e-05, + "loss": 2.5604, + "step": 454000 + }, + { + "epoch": 2.25, + "learning_rate": 4.887454358090199e-05, + "loss": 2.556, + "step": 454500 + }, + { + "epoch": 2.25, + "learning_rate": 4.887330499447591e-05, + "loss": 2.569, + "step": 455000 + }, + { + "epoch": 2.26, + "learning_rate": 4.8872066408049824e-05, + "loss": 2.5667, + "step": 455500 + }, + { + "epoch": 2.26, + "learning_rate": 4.887082782162374e-05, + "loss": 2.5849, + "step": 456000 + }, + { + "epoch": 2.26, + "learning_rate": 4.886958923519765e-05, + "loss": 2.5702, + "step": 456500 + }, + { + "epoch": 2.26, + "learning_rate": 4.886835064877157e-05, + "loss": 2.5687, + "step": 457000 + }, + { + "epoch": 2.27, + "learning_rate": 4.886711453951834e-05, + "loss": 2.5787, + "step": 457500 + }, + { + "epoch": 2.27, + "learning_rate": 4.8865875953092254e-05, + "loss": 2.5678, + "step": 458000 + }, + { + "epoch": 2.27, + "learning_rate": 4.886463736666617e-05, + "loss": 2.5795, + "step": 458500 + }, + { + "epoch": 2.27, + "learning_rate": 4.8863401257412947e-05, + "loss": 2.5772, + "step": 459000 + }, + { + "epoch": 2.28, + "learning_rate": 4.8862162670986864e-05, + "loss": 2.5546, + "step": 459500 + }, + { + "epoch": 2.28, + "learning_rate": 4.886092408456078e-05, + "loss": 2.5824, + "step": 460000 + }, + { + "epoch": 2.28, + "learning_rate": 4.885968549813469e-05, + "loss": 2.5854, + "step": 460500 + }, + { + "epoch": 2.28, + "learning_rate": 4.885844691170861e-05, + "loss": 2.5993, + "step": 461000 + }, + { + "epoch": 2.29, + "learning_rate": 4.8857208325282524e-05, + "loss": 2.5536, + "step": 461500 + }, + { + "epoch": 2.29, + "learning_rate": 4.885596973885644e-05, + "loss": 2.5607, + "step": 462000 + }, + { + "epoch": 2.29, + "learning_rate": 4.885473362960321e-05, + "loss": 2.5777, + "step": 462500 + }, + { + "epoch": 2.29, + "learning_rate": 4.885349504317713e-05, + "loss": 2.5489, + "step": 463000 + }, + { + "epoch": 2.3, + "learning_rate": 4.885225645675104e-05, + "loss": 2.5935, + "step": 463500 + }, + { + "epoch": 2.3, + "learning_rate": 4.8851017870324954e-05, + "loss": 2.5599, + "step": 464000 + }, + { + "epoch": 2.3, + "learning_rate": 4.884978176107173e-05, + "loss": 2.5644, + "step": 464500 + }, + { + "epoch": 2.3, + "learning_rate": 4.884854317464565e-05, + "loss": 2.5661, + "step": 465000 + }, + { + "epoch": 2.31, + "learning_rate": 4.8847304588219564e-05, + "loss": 2.5732, + "step": 465500 + }, + { + "epoch": 2.31, + "learning_rate": 4.884606600179348e-05, + "loss": 2.5702, + "step": 466000 + }, + { + "epoch": 2.31, + "learning_rate": 4.88448274153674e-05, + "loss": 2.5586, + "step": 466500 + }, + { + "epoch": 2.31, + "learning_rate": 4.884359130611416e-05, + "loss": 2.574, + "step": 467000 + }, + { + "epoch": 2.32, + "learning_rate": 4.8842352719688076e-05, + "loss": 2.5618, + "step": 467500 + }, + { + "epoch": 2.32, + "learning_rate": 4.884111413326199e-05, + "loss": 2.5526, + "step": 468000 + }, + { + "epoch": 2.32, + "learning_rate": 4.883987554683591e-05, + "loss": 2.5749, + "step": 468500 + }, + { + "epoch": 2.32, + "learning_rate": 4.883863696040983e-05, + "loss": 2.5694, + "step": 469000 + }, + { + "epoch": 2.33, + "learning_rate": 4.8837398373983744e-05, + "loss": 2.5888, + "step": 469500 + }, + { + "epoch": 2.33, + "learning_rate": 4.8836159787557654e-05, + "loss": 2.5595, + "step": 470000 + }, + { + "epoch": 2.33, + "learning_rate": 4.883492120113157e-05, + "loss": 2.5967, + "step": 470500 + }, + { + "epoch": 2.33, + "learning_rate": 4.883368261470549e-05, + "loss": 2.5484, + "step": 471000 + }, + { + "epoch": 2.34, + "learning_rate": 4.8832444028279405e-05, + "loss": 2.5715, + "step": 471500 + }, + { + "epoch": 2.34, + "learning_rate": 4.883120544185332e-05, + "loss": 2.5756, + "step": 472000 + }, + { + "epoch": 2.34, + "learning_rate": 4.882996685542724e-05, + "loss": 2.5513, + "step": 472500 + }, + { + "epoch": 2.34, + "learning_rate": 4.8828728269001156e-05, + "loss": 2.5952, + "step": 473000 + }, + { + "epoch": 2.35, + "learning_rate": 4.8827492159747925e-05, + "loss": 2.5728, + "step": 473500 + }, + { + "epoch": 2.35, + "learning_rate": 4.882625357332184e-05, + "loss": 2.577, + "step": 474000 + }, + { + "epoch": 2.35, + "learning_rate": 4.882501498689576e-05, + "loss": 2.6014, + "step": 474500 + }, + { + "epoch": 2.35, + "learning_rate": 4.882377887764253e-05, + "loss": 2.5732, + "step": 475000 + }, + { + "epoch": 2.36, + "learning_rate": 4.8822540291216444e-05, + "loss": 2.559, + "step": 475500 + }, + { + "epoch": 2.36, + "learning_rate": 4.8821301704790354e-05, + "loss": 2.5574, + "step": 476000 + }, + { + "epoch": 2.36, + "learning_rate": 4.882006311836427e-05, + "loss": 2.5518, + "step": 476500 + }, + { + "epoch": 2.36, + "learning_rate": 4.881882453193819e-05, + "loss": 2.5777, + "step": 477000 + }, + { + "epoch": 2.37, + "learning_rate": 4.8817585945512105e-05, + "loss": 2.5795, + "step": 477500 + }, + { + "epoch": 2.37, + "learning_rate": 4.881634735908602e-05, + "loss": 2.5723, + "step": 478000 + }, + { + "epoch": 2.37, + "learning_rate": 4.881510877265994e-05, + "loss": 2.5972, + "step": 478500 + }, + { + "epoch": 2.37, + "learning_rate": 4.8813872663406715e-05, + "loss": 2.5405, + "step": 479000 + }, + { + "epoch": 2.38, + "learning_rate": 4.8812634076980625e-05, + "loss": 2.6003, + "step": 479500 + }, + { + "epoch": 2.38, + "learning_rate": 4.881139549055454e-05, + "loss": 2.5426, + "step": 480000 + }, + { + "epoch": 2.38, + "learning_rate": 4.881015690412846e-05, + "loss": 2.587, + "step": 480500 + }, + { + "epoch": 2.38, + "learning_rate": 4.8808918317702376e-05, + "loss": 2.5803, + "step": 481000 + }, + { + "epoch": 2.39, + "learning_rate": 4.880767973127629e-05, + "loss": 2.5914, + "step": 481500 + }, + { + "epoch": 2.39, + "learning_rate": 4.880644114485021e-05, + "loss": 2.5622, + "step": 482000 + }, + { + "epoch": 2.39, + "learning_rate": 4.8805202558424126e-05, + "loss": 2.5813, + "step": 482500 + }, + { + "epoch": 2.39, + "learning_rate": 4.880396397199804e-05, + "loss": 2.5929, + "step": 483000 + }, + { + "epoch": 2.4, + "learning_rate": 4.8802727862744805e-05, + "loss": 2.5919, + "step": 483500 + }, + { + "epoch": 2.4, + "learning_rate": 4.880148927631872e-05, + "loss": 2.5565, + "step": 484000 + }, + { + "epoch": 2.4, + "learning_rate": 4.88002531670655e-05, + "loss": 2.5604, + "step": 484500 + }, + { + "epoch": 2.4, + "learning_rate": 4.8799014580639415e-05, + "loss": 2.5575, + "step": 485000 + }, + { + "epoch": 2.41, + "learning_rate": 4.8797775994213325e-05, + "loss": 2.5634, + "step": 485500 + }, + { + "epoch": 2.41, + "learning_rate": 4.879653740778724e-05, + "loss": 2.5762, + "step": 486000 + }, + { + "epoch": 2.41, + "learning_rate": 4.879529882136116e-05, + "loss": 2.5648, + "step": 486500 + }, + { + "epoch": 2.41, + "learning_rate": 4.8794060234935076e-05, + "loss": 2.5592, + "step": 487000 + }, + { + "epoch": 2.42, + "learning_rate": 4.879282164850899e-05, + "loss": 2.563, + "step": 487500 + }, + { + "epoch": 2.42, + "learning_rate": 4.879158553925576e-05, + "loss": 2.6136, + "step": 488000 + }, + { + "epoch": 2.42, + "learning_rate": 4.879034695282967e-05, + "loss": 2.5676, + "step": 488500 + }, + { + "epoch": 2.42, + "learning_rate": 4.878910836640359e-05, + "loss": 2.5787, + "step": 489000 + }, + { + "epoch": 2.43, + "learning_rate": 4.8787872257150364e-05, + "loss": 2.5805, + "step": 489500 + }, + { + "epoch": 2.43, + "learning_rate": 4.878663367072428e-05, + "loss": 2.5883, + "step": 490000 + }, + { + "epoch": 2.43, + "learning_rate": 4.87853950842982e-05, + "loss": 2.5649, + "step": 490500 + }, + { + "epoch": 2.43, + "learning_rate": 4.878415897504497e-05, + "loss": 2.5689, + "step": 491000 + }, + { + "epoch": 2.44, + "learning_rate": 4.878292038861888e-05, + "loss": 2.5621, + "step": 491500 + }, + { + "epoch": 2.44, + "learning_rate": 4.8781681802192794e-05, + "loss": 2.586, + "step": 492000 + }, + { + "epoch": 2.44, + "learning_rate": 4.878044321576671e-05, + "loss": 2.5845, + "step": 492500 + }, + { + "epoch": 2.44, + "learning_rate": 4.877920462934063e-05, + "loss": 2.5749, + "step": 493000 + }, + { + "epoch": 2.44, + "learning_rate": 4.8777966042914545e-05, + "loss": 2.5925, + "step": 493500 + }, + { + "epoch": 2.45, + "learning_rate": 4.877672745648846e-05, + "loss": 2.5829, + "step": 494000 + }, + { + "epoch": 2.45, + "learning_rate": 4.877549134723523e-05, + "loss": 2.5584, + "step": 494500 + }, + { + "epoch": 2.45, + "learning_rate": 4.877425276080915e-05, + "loss": 2.562, + "step": 495000 + }, + { + "epoch": 2.45, + "learning_rate": 4.8773014174383064e-05, + "loss": 2.5678, + "step": 495500 + }, + { + "epoch": 2.46, + "learning_rate": 4.877177558795698e-05, + "loss": 2.5516, + "step": 496000 + }, + { + "epoch": 2.46, + "learning_rate": 4.87705370015309e-05, + "loss": 2.567, + "step": 496500 + }, + { + "epoch": 2.46, + "learning_rate": 4.8769298415104815e-05, + "loss": 2.5885, + "step": 497000 + }, + { + "epoch": 2.46, + "learning_rate": 4.876805982867873e-05, + "loss": 2.5813, + "step": 497500 + }, + { + "epoch": 2.47, + "learning_rate": 4.876682124225264e-05, + "loss": 2.5801, + "step": 498000 + }, + { + "epoch": 2.47, + "learning_rate": 4.876558265582656e-05, + "loss": 2.567, + "step": 498500 + }, + { + "epoch": 2.47, + "learning_rate": 4.8764344069400476e-05, + "loss": 2.5659, + "step": 499000 + }, + { + "epoch": 2.47, + "learning_rate": 4.876310548297439e-05, + "loss": 2.572, + "step": 499500 + }, + { + "epoch": 2.48, + "learning_rate": 4.876186689654831e-05, + "loss": 2.5771, + "step": 500000 + }, + { + "epoch": 2.48, + "learning_rate": 4.876062831012223e-05, + "loss": 2.5942, + "step": 500500 + }, + { + "epoch": 2.48, + "learning_rate": 4.8759389723696144e-05, + "loss": 2.58, + "step": 501000 + }, + { + "epoch": 2.48, + "learning_rate": 4.875815113727006e-05, + "loss": 2.5474, + "step": 501500 + }, + { + "epoch": 2.49, + "learning_rate": 4.875691502801682e-05, + "loss": 2.5899, + "step": 502000 + }, + { + "epoch": 2.49, + "learning_rate": 4.87556789187636e-05, + "loss": 2.581, + "step": 502500 + }, + { + "epoch": 2.49, + "learning_rate": 4.8754440332337515e-05, + "loss": 2.5766, + "step": 503000 + }, + { + "epoch": 2.49, + "learning_rate": 4.875320174591143e-05, + "loss": 2.5461, + "step": 503500 + }, + { + "epoch": 2.5, + "learning_rate": 4.875196315948535e-05, + "loss": 2.5437, + "step": 504000 + }, + { + "epoch": 2.5, + "learning_rate": 4.875072457305926e-05, + "loss": 2.566, + "step": 504500 + }, + { + "epoch": 2.5, + "learning_rate": 4.8749485986633176e-05, + "loss": 2.5592, + "step": 505000 + }, + { + "epoch": 2.5, + "learning_rate": 4.8748249877379945e-05, + "loss": 2.5718, + "step": 505500 + }, + { + "epoch": 2.51, + "learning_rate": 4.874701129095386e-05, + "loss": 2.554, + "step": 506000 + }, + { + "epoch": 2.51, + "learning_rate": 4.874577270452778e-05, + "loss": 2.5343, + "step": 506500 + }, + { + "epoch": 2.51, + "learning_rate": 4.8744534118101696e-05, + "loss": 2.5618, + "step": 507000 + }, + { + "epoch": 2.51, + "learning_rate": 4.8743295531675606e-05, + "loss": 2.5622, + "step": 507500 + }, + { + "epoch": 2.52, + "learning_rate": 4.874205694524952e-05, + "loss": 2.5814, + "step": 508000 + }, + { + "epoch": 2.52, + "learning_rate": 4.874081835882344e-05, + "loss": 2.5951, + "step": 508500 + }, + { + "epoch": 2.52, + "learning_rate": 4.8739582249570215e-05, + "loss": 2.5568, + "step": 509000 + }, + { + "epoch": 2.52, + "learning_rate": 4.873834366314413e-05, + "loss": 2.5576, + "step": 509500 + }, + { + "epoch": 2.53, + "learning_rate": 4.873710507671805e-05, + "loss": 2.5658, + "step": 510000 + }, + { + "epoch": 2.53, + "learning_rate": 4.873586649029196e-05, + "loss": 2.5628, + "step": 510500 + }, + { + "epoch": 2.53, + "learning_rate": 4.8734630381038735e-05, + "loss": 2.5771, + "step": 511000 + }, + { + "epoch": 2.53, + "learning_rate": 4.8733391794612645e-05, + "loss": 2.5623, + "step": 511500 + }, + { + "epoch": 2.54, + "learning_rate": 4.873215320818656e-05, + "loss": 2.552, + "step": 512000 + }, + { + "epoch": 2.54, + "learning_rate": 4.873091462176048e-05, + "loss": 2.5771, + "step": 512500 + }, + { + "epoch": 2.54, + "learning_rate": 4.8729676035334396e-05, + "loss": 2.5683, + "step": 513000 + }, + { + "epoch": 2.54, + "learning_rate": 4.872843744890831e-05, + "loss": 2.5772, + "step": 513500 + }, + { + "epoch": 2.55, + "learning_rate": 4.872719886248222e-05, + "loss": 2.5589, + "step": 514000 + }, + { + "epoch": 2.55, + "learning_rate": 4.872596027605614e-05, + "loss": 2.5793, + "step": 514500 + }, + { + "epoch": 2.55, + "learning_rate": 4.872472168963006e-05, + "loss": 2.5705, + "step": 515000 + }, + { + "epoch": 2.55, + "learning_rate": 4.8723483103203974e-05, + "loss": 2.5768, + "step": 515500 + }, + { + "epoch": 2.56, + "learning_rate": 4.872224451677789e-05, + "loss": 2.5792, + "step": 516000 + }, + { + "epoch": 2.56, + "learning_rate": 4.872100593035181e-05, + "loss": 2.538, + "step": 516500 + }, + { + "epoch": 2.56, + "learning_rate": 4.8719767343925724e-05, + "loss": 2.5786, + "step": 517000 + }, + { + "epoch": 2.56, + "learning_rate": 4.871852875749964e-05, + "loss": 2.5611, + "step": 517500 + }, + { + "epoch": 2.57, + "learning_rate": 4.871729017107356e-05, + "loss": 2.5601, + "step": 518000 + }, + { + "epoch": 2.57, + "learning_rate": 4.8716051584647475e-05, + "loss": 2.5772, + "step": 518500 + }, + { + "epoch": 2.57, + "learning_rate": 4.8714817952567096e-05, + "loss": 2.5865, + "step": 519000 + }, + { + "epoch": 2.57, + "learning_rate": 4.871357936614101e-05, + "loss": 2.5592, + "step": 519500 + }, + { + "epoch": 2.58, + "learning_rate": 4.871234077971493e-05, + "loss": 2.5937, + "step": 520000 + }, + { + "epoch": 2.58, + "learning_rate": 4.871110219328885e-05, + "loss": 2.5601, + "step": 520500 + }, + { + "epoch": 2.58, + "learning_rate": 4.870986360686276e-05, + "loss": 2.5464, + "step": 521000 + }, + { + "epoch": 2.58, + "learning_rate": 4.8708625020436674e-05, + "loss": 2.5565, + "step": 521500 + }, + { + "epoch": 2.59, + "learning_rate": 4.870738643401059e-05, + "loss": 2.5702, + "step": 522000 + }, + { + "epoch": 2.59, + "learning_rate": 4.870614784758451e-05, + "loss": 2.5839, + "step": 522500 + }, + { + "epoch": 2.59, + "learning_rate": 4.8704911738331276e-05, + "loss": 2.5712, + "step": 523000 + }, + { + "epoch": 2.59, + "learning_rate": 4.8703673151905193e-05, + "loss": 2.5777, + "step": 523500 + }, + { + "epoch": 2.6, + "learning_rate": 4.870243704265197e-05, + "loss": 2.56, + "step": 524000 + }, + { + "epoch": 2.6, + "learning_rate": 4.8701198456225886e-05, + "loss": 2.5688, + "step": 524500 + }, + { + "epoch": 2.6, + "learning_rate": 4.8699959869799796e-05, + "loss": 2.5656, + "step": 525000 + }, + { + "epoch": 2.6, + "learning_rate": 4.8698723760546565e-05, + "loss": 2.5649, + "step": 525500 + }, + { + "epoch": 2.61, + "learning_rate": 4.869748517412048e-05, + "loss": 2.5436, + "step": 526000 + }, + { + "epoch": 2.61, + "learning_rate": 4.86962465876944e-05, + "loss": 2.5476, + "step": 526500 + }, + { + "epoch": 2.61, + "learning_rate": 4.8695008001268316e-05, + "loss": 2.5696, + "step": 527000 + }, + { + "epoch": 2.61, + "learning_rate": 4.869376941484223e-05, + "loss": 2.5495, + "step": 527500 + }, + { + "epoch": 2.62, + "learning_rate": 4.869253082841615e-05, + "loss": 2.5808, + "step": 528000 + }, + { + "epoch": 2.62, + "learning_rate": 4.8691292241990066e-05, + "loss": 2.5641, + "step": 528500 + }, + { + "epoch": 2.62, + "learning_rate": 4.869005365556398e-05, + "loss": 2.5714, + "step": 529000 + }, + { + "epoch": 2.62, + "learning_rate": 4.8688815069137894e-05, + "loss": 2.5913, + "step": 529500 + }, + { + "epoch": 2.63, + "learning_rate": 4.868757648271181e-05, + "loss": 2.5783, + "step": 530000 + }, + { + "epoch": 2.63, + "learning_rate": 4.868633789628573e-05, + "loss": 2.57, + "step": 530500 + }, + { + "epoch": 2.63, + "learning_rate": 4.86851017870325e-05, + "loss": 2.5547, + "step": 531000 + }, + { + "epoch": 2.63, + "learning_rate": 4.868386320060642e-05, + "loss": 2.5468, + "step": 531500 + }, + { + "epoch": 2.64, + "learning_rate": 4.868262461418033e-05, + "loss": 2.5864, + "step": 532000 + }, + { + "epoch": 2.64, + "learning_rate": 4.868138602775425e-05, + "loss": 2.5843, + "step": 532500 + }, + { + "epoch": 2.64, + "learning_rate": 4.8680147441328164e-05, + "loss": 2.5719, + "step": 533000 + }, + { + "epoch": 2.64, + "learning_rate": 4.867891133207493e-05, + "loss": 2.5732, + "step": 533500 + }, + { + "epoch": 2.65, + "learning_rate": 4.867767274564885e-05, + "loss": 2.5765, + "step": 534000 + }, + { + "epoch": 2.65, + "learning_rate": 4.8676434159222767e-05, + "loss": 2.5757, + "step": 534500 + }, + { + "epoch": 2.65, + "learning_rate": 4.8675195572796683e-05, + "loss": 2.5774, + "step": 535000 + }, + { + "epoch": 2.65, + "learning_rate": 4.8673956986370594e-05, + "loss": 2.554, + "step": 535500 + }, + { + "epoch": 2.66, + "learning_rate": 4.867272087711737e-05, + "loss": 2.5533, + "step": 536000 + }, + { + "epoch": 2.66, + "learning_rate": 4.8671482290691286e-05, + "loss": 2.5772, + "step": 536500 + }, + { + "epoch": 2.66, + "learning_rate": 4.86702437042652e-05, + "loss": 2.5513, + "step": 537000 + }, + { + "epoch": 2.66, + "learning_rate": 4.866900511783912e-05, + "loss": 2.5708, + "step": 537500 + }, + { + "epoch": 2.67, + "learning_rate": 4.866776653141304e-05, + "loss": 2.5684, + "step": 538000 + }, + { + "epoch": 2.67, + "learning_rate": 4.866652794498695e-05, + "loss": 2.5832, + "step": 538500 + }, + { + "epoch": 2.67, + "learning_rate": 4.8665291835733716e-05, + "loss": 2.5732, + "step": 539000 + }, + { + "epoch": 2.67, + "learning_rate": 4.866405324930763e-05, + "loss": 2.5757, + "step": 539500 + }, + { + "epoch": 2.68, + "learning_rate": 4.86628171400544e-05, + "loss": 2.5808, + "step": 540000 + }, + { + "epoch": 2.68, + "learning_rate": 4.866157855362832e-05, + "loss": 2.5977, + "step": 540500 + }, + { + "epoch": 2.68, + "learning_rate": 4.8660339967202236e-05, + "loss": 2.5599, + "step": 541000 + }, + { + "epoch": 2.68, + "learning_rate": 4.865910138077615e-05, + "loss": 2.5493, + "step": 541500 + }, + { + "epoch": 2.69, + "learning_rate": 4.865786279435007e-05, + "loss": 2.5864, + "step": 542000 + }, + { + "epoch": 2.69, + "learning_rate": 4.8656624207923986e-05, + "loss": 2.5611, + "step": 542500 + }, + { + "epoch": 2.69, + "learning_rate": 4.86553856214979e-05, + "loss": 2.5601, + "step": 543000 + }, + { + "epoch": 2.69, + "learning_rate": 4.865414703507182e-05, + "loss": 2.5715, + "step": 543500 + }, + { + "epoch": 2.7, + "learning_rate": 4.865291092581858e-05, + "loss": 2.5968, + "step": 544000 + }, + { + "epoch": 2.7, + "learning_rate": 4.86516723393925e-05, + "loss": 2.561, + "step": 544500 + }, + { + "epoch": 2.7, + "learning_rate": 4.8650433752966416e-05, + "loss": 2.593, + "step": 545000 + }, + { + "epoch": 2.7, + "learning_rate": 4.864919516654033e-05, + "loss": 2.5692, + "step": 545500 + }, + { + "epoch": 2.71, + "learning_rate": 4.864795658011425e-05, + "loss": 2.5501, + "step": 546000 + }, + { + "epoch": 2.71, + "learning_rate": 4.864671799368817e-05, + "loss": 2.5733, + "step": 546500 + }, + { + "epoch": 2.71, + "learning_rate": 4.8645481884434936e-05, + "loss": 2.5761, + "step": 547000 + }, + { + "epoch": 2.71, + "learning_rate": 4.864424329800885e-05, + "loss": 2.5604, + "step": 547500 + }, + { + "epoch": 2.71, + "learning_rate": 4.864300471158277e-05, + "loss": 2.5876, + "step": 548000 + }, + { + "epoch": 2.72, + "learning_rate": 4.8641766125156686e-05, + "loss": 2.5696, + "step": 548500 + }, + { + "epoch": 2.72, + "learning_rate": 4.86405275387306e-05, + "loss": 2.5717, + "step": 549000 + }, + { + "epoch": 2.72, + "learning_rate": 4.863928895230452e-05, + "loss": 2.5383, + "step": 549500 + }, + { + "epoch": 2.72, + "learning_rate": 4.863805284305128e-05, + "loss": 2.5773, + "step": 550000 + }, + { + "epoch": 2.73, + "learning_rate": 4.863681673379805e-05, + "loss": 2.5818, + "step": 550500 + }, + { + "epoch": 2.73, + "learning_rate": 4.863557814737197e-05, + "loss": 2.5148, + "step": 551000 + }, + { + "epoch": 2.73, + "learning_rate": 4.8634339560945885e-05, + "loss": 2.5935, + "step": 551500 + }, + { + "epoch": 2.73, + "learning_rate": 4.86331009745198e-05, + "loss": 2.5585, + "step": 552000 + }, + { + "epoch": 2.74, + "learning_rate": 4.863186238809372e-05, + "loss": 2.5496, + "step": 552500 + }, + { + "epoch": 2.74, + "learning_rate": 4.8630623801667636e-05, + "loss": 2.5555, + "step": 553000 + }, + { + "epoch": 2.74, + "learning_rate": 4.862938521524155e-05, + "loss": 2.547, + "step": 553500 + }, + { + "epoch": 2.74, + "learning_rate": 4.862814662881547e-05, + "loss": 2.567, + "step": 554000 + }, + { + "epoch": 2.75, + "learning_rate": 4.8626908042389387e-05, + "loss": 2.554, + "step": 554500 + }, + { + "epoch": 2.75, + "learning_rate": 4.8625669455963303e-05, + "loss": 2.5619, + "step": 555000 + }, + { + "epoch": 2.75, + "learning_rate": 4.862443582388292e-05, + "loss": 2.5697, + "step": 555500 + }, + { + "epoch": 2.75, + "learning_rate": 4.8623197237456834e-05, + "loss": 2.5523, + "step": 556000 + }, + { + "epoch": 2.76, + "learning_rate": 4.862195865103075e-05, + "loss": 2.568, + "step": 556500 + }, + { + "epoch": 2.76, + "learning_rate": 4.862072006460467e-05, + "loss": 2.5945, + "step": 557000 + }, + { + "epoch": 2.76, + "learning_rate": 4.861948643252429e-05, + "loss": 2.5638, + "step": 557500 + }, + { + "epoch": 2.76, + "learning_rate": 4.8618247846098206e-05, + "loss": 2.56, + "step": 558000 + }, + { + "epoch": 2.77, + "learning_rate": 4.861700925967212e-05, + "loss": 2.5682, + "step": 558500 + }, + { + "epoch": 2.77, + "learning_rate": 4.861577067324604e-05, + "loss": 2.5404, + "step": 559000 + }, + { + "epoch": 2.77, + "learning_rate": 4.8614532086819957e-05, + "loss": 2.5794, + "step": 559500 + }, + { + "epoch": 2.77, + "learning_rate": 4.8613293500393873e-05, + "loss": 2.5669, + "step": 560000 + }, + { + "epoch": 2.78, + "learning_rate": 4.861205491396779e-05, + "loss": 2.5423, + "step": 560500 + }, + { + "epoch": 2.78, + "learning_rate": 4.86108163275417e-05, + "loss": 2.5711, + "step": 561000 + }, + { + "epoch": 2.78, + "learning_rate": 4.860957774111562e-05, + "loss": 2.5841, + "step": 561500 + }, + { + "epoch": 2.78, + "learning_rate": 4.8608339154689534e-05, + "loss": 2.563, + "step": 562000 + }, + { + "epoch": 2.79, + "learning_rate": 4.860710056826345e-05, + "loss": 2.5802, + "step": 562500 + }, + { + "epoch": 2.79, + "learning_rate": 4.860586198183737e-05, + "loss": 2.5727, + "step": 563000 + }, + { + "epoch": 2.79, + "learning_rate": 4.8604625872584144e-05, + "loss": 2.5798, + "step": 563500 + }, + { + "epoch": 2.79, + "learning_rate": 4.860338728615806e-05, + "loss": 2.5647, + "step": 564000 + }, + { + "epoch": 2.8, + "learning_rate": 4.860214869973197e-05, + "loss": 2.5512, + "step": 564500 + }, + { + "epoch": 2.8, + "learning_rate": 4.860091011330589e-05, + "loss": 2.5654, + "step": 565000 + }, + { + "epoch": 2.8, + "learning_rate": 4.8599671526879805e-05, + "loss": 2.5424, + "step": 565500 + }, + { + "epoch": 2.8, + "learning_rate": 4.859843294045372e-05, + "loss": 2.554, + "step": 566000 + }, + { + "epoch": 2.81, + "learning_rate": 4.859719435402764e-05, + "loss": 2.5894, + "step": 566500 + }, + { + "epoch": 2.81, + "learning_rate": 4.8595955767601556e-05, + "loss": 2.551, + "step": 567000 + }, + { + "epoch": 2.81, + "learning_rate": 4.859471965834832e-05, + "loss": 2.5805, + "step": 567500 + }, + { + "epoch": 2.81, + "learning_rate": 4.8593481071922235e-05, + "loss": 2.5897, + "step": 568000 + }, + { + "epoch": 2.82, + "learning_rate": 4.859224248549615e-05, + "loss": 2.5625, + "step": 568500 + }, + { + "epoch": 2.82, + "learning_rate": 4.859100389907007e-05, + "loss": 2.5616, + "step": 569000 + }, + { + "epoch": 2.82, + "learning_rate": 4.8589765312643985e-05, + "loss": 2.5768, + "step": 569500 + }, + { + "epoch": 2.82, + "learning_rate": 4.858852920339076e-05, + "loss": 2.5554, + "step": 570000 + }, + { + "epoch": 2.83, + "learning_rate": 4.858729061696467e-05, + "loss": 2.5912, + "step": 570500 + }, + { + "epoch": 2.83, + "learning_rate": 4.858605203053859e-05, + "loss": 2.5492, + "step": 571000 + }, + { + "epoch": 2.83, + "learning_rate": 4.858481592128536e-05, + "loss": 2.5554, + "step": 571500 + }, + { + "epoch": 2.83, + "learning_rate": 4.8583577334859274e-05, + "loss": 2.5641, + "step": 572000 + }, + { + "epoch": 2.84, + "learning_rate": 4.858233874843319e-05, + "loss": 2.5709, + "step": 572500 + }, + { + "epoch": 2.84, + "learning_rate": 4.858110016200711e-05, + "loss": 2.5975, + "step": 573000 + }, + { + "epoch": 2.84, + "learning_rate": 4.857986157558102e-05, + "loss": 2.5723, + "step": 573500 + }, + { + "epoch": 2.84, + "learning_rate": 4.8578622989154935e-05, + "loss": 2.5557, + "step": 574000 + }, + { + "epoch": 2.85, + "learning_rate": 4.857738440272885e-05, + "loss": 2.538, + "step": 574500 + }, + { + "epoch": 2.85, + "learning_rate": 4.857614581630277e-05, + "loss": 2.5816, + "step": 575000 + }, + { + "epoch": 2.85, + "learning_rate": 4.8574907229876685e-05, + "loss": 2.5707, + "step": 575500 + }, + { + "epoch": 2.85, + "learning_rate": 4.85736686434506e-05, + "loss": 2.5592, + "step": 576000 + }, + { + "epoch": 2.86, + "learning_rate": 4.857243005702452e-05, + "loss": 2.5493, + "step": 576500 + }, + { + "epoch": 2.86, + "learning_rate": 4.8571191470598436e-05, + "loss": 2.5495, + "step": 577000 + }, + { + "epoch": 2.86, + "learning_rate": 4.8569955361345205e-05, + "loss": 2.5573, + "step": 577500 + }, + { + "epoch": 2.86, + "learning_rate": 4.8568719252091974e-05, + "loss": 2.5596, + "step": 578000 + }, + { + "epoch": 2.87, + "learning_rate": 4.856748066566589e-05, + "loss": 2.5427, + "step": 578500 + }, + { + "epoch": 2.87, + "learning_rate": 4.856624207923981e-05, + "loss": 2.5828, + "step": 579000 + }, + { + "epoch": 2.87, + "learning_rate": 4.8565003492813725e-05, + "loss": 2.5622, + "step": 579500 + }, + { + "epoch": 2.87, + "learning_rate": 4.8563764906387635e-05, + "loss": 2.5681, + "step": 580000 + }, + { + "epoch": 2.88, + "learning_rate": 4.856252631996155e-05, + "loss": 2.5767, + "step": 580500 + }, + { + "epoch": 2.88, + "learning_rate": 4.856128773353547e-05, + "loss": 2.5359, + "step": 581000 + }, + { + "epoch": 2.88, + "learning_rate": 4.8560049147109386e-05, + "loss": 2.5966, + "step": 581500 + }, + { + "epoch": 2.88, + "learning_rate": 4.85588105606833e-05, + "loss": 2.5783, + "step": 582000 + }, + { + "epoch": 2.89, + "learning_rate": 4.855757445143008e-05, + "loss": 2.563, + "step": 582500 + }, + { + "epoch": 2.89, + "learning_rate": 4.855633586500399e-05, + "loss": 2.5594, + "step": 583000 + }, + { + "epoch": 2.89, + "learning_rate": 4.855509975575076e-05, + "loss": 2.5744, + "step": 583500 + }, + { + "epoch": 2.89, + "learning_rate": 4.8553861169324674e-05, + "loss": 2.5556, + "step": 584000 + }, + { + "epoch": 2.9, + "learning_rate": 4.855262258289859e-05, + "loss": 2.5552, + "step": 584500 + }, + { + "epoch": 2.9, + "learning_rate": 4.855138399647251e-05, + "loss": 2.5402, + "step": 585000 + }, + { + "epoch": 2.9, + "learning_rate": 4.8550145410046425e-05, + "loss": 2.553, + "step": 585500 + }, + { + "epoch": 2.9, + "learning_rate": 4.8548906823620335e-05, + "loss": 2.5668, + "step": 586000 + }, + { + "epoch": 2.91, + "learning_rate": 4.854766823719425e-05, + "loss": 2.5855, + "step": 586500 + }, + { + "epoch": 2.91, + "learning_rate": 4.854643212794103e-05, + "loss": 2.5822, + "step": 587000 + }, + { + "epoch": 2.91, + "learning_rate": 4.8545193541514944e-05, + "loss": 2.5603, + "step": 587500 + }, + { + "epoch": 2.91, + "learning_rate": 4.854395495508886e-05, + "loss": 2.5564, + "step": 588000 + }, + { + "epoch": 2.92, + "learning_rate": 4.854271884583563e-05, + "loss": 2.5659, + "step": 588500 + }, + { + "epoch": 2.92, + "learning_rate": 4.854148025940955e-05, + "loss": 2.5704, + "step": 589000 + }, + { + "epoch": 2.92, + "learning_rate": 4.8540241672983464e-05, + "loss": 2.584, + "step": 589500 + }, + { + "epoch": 2.92, + "learning_rate": 4.8539003086557374e-05, + "loss": 2.5611, + "step": 590000 + }, + { + "epoch": 2.93, + "learning_rate": 4.853776697730414e-05, + "loss": 2.5538, + "step": 590500 + }, + { + "epoch": 2.93, + "learning_rate": 4.853652839087806e-05, + "loss": 2.5419, + "step": 591000 + }, + { + "epoch": 2.93, + "learning_rate": 4.853528980445198e-05, + "loss": 2.5637, + "step": 591500 + }, + { + "epoch": 2.93, + "learning_rate": 4.8534051218025894e-05, + "loss": 2.5852, + "step": 592000 + }, + { + "epoch": 2.94, + "learning_rate": 4.853281263159981e-05, + "loss": 2.5701, + "step": 592500 + }, + { + "epoch": 2.94, + "learning_rate": 4.853157404517373e-05, + "loss": 2.5697, + "step": 593000 + }, + { + "epoch": 2.94, + "learning_rate": 4.8530335458747644e-05, + "loss": 2.5454, + "step": 593500 + }, + { + "epoch": 2.94, + "learning_rate": 4.852909687232156e-05, + "loss": 2.5414, + "step": 594000 + }, + { + "epoch": 2.95, + "learning_rate": 4.852785828589548e-05, + "loss": 2.5856, + "step": 594500 + }, + { + "epoch": 2.95, + "learning_rate": 4.8526619699469395e-05, + "loss": 2.5922, + "step": 595000 + }, + { + "epoch": 2.95, + "learning_rate": 4.8525381113043305e-05, + "loss": 2.5762, + "step": 595500 + }, + { + "epoch": 2.95, + "learning_rate": 4.852414252661722e-05, + "loss": 2.5752, + "step": 596000 + }, + { + "epoch": 2.96, + "learning_rate": 4.852290394019114e-05, + "loss": 2.5849, + "step": 596500 + }, + { + "epoch": 2.96, + "learning_rate": 4.852166783093791e-05, + "loss": 2.5798, + "step": 597000 + }, + { + "epoch": 2.96, + "learning_rate": 4.8520429244511825e-05, + "loss": 2.5716, + "step": 597500 + }, + { + "epoch": 2.96, + "learning_rate": 4.8519193135258594e-05, + "loss": 2.559, + "step": 598000 + }, + { + "epoch": 2.97, + "learning_rate": 4.851795454883251e-05, + "loss": 2.5591, + "step": 598500 + }, + { + "epoch": 2.97, + "learning_rate": 4.851671596240643e-05, + "loss": 2.582, + "step": 599000 + }, + { + "epoch": 2.97, + "learning_rate": 4.8515477375980345e-05, + "loss": 2.581, + "step": 599500 + }, + { + "epoch": 2.97, + "learning_rate": 4.851423878955426e-05, + "loss": 2.5779, + "step": 600000 + }, + { + "epoch": 2.98, + "learning_rate": 4.851300268030103e-05, + "loss": 2.5746, + "step": 600500 + }, + { + "epoch": 2.98, + "learning_rate": 4.851176409387495e-05, + "loss": 2.5805, + "step": 601000 + }, + { + "epoch": 2.98, + "learning_rate": 4.8510525507448864e-05, + "loss": 2.5754, + "step": 601500 + }, + { + "epoch": 2.98, + "learning_rate": 4.850928692102278e-05, + "loss": 2.5709, + "step": 602000 + }, + { + "epoch": 2.98, + "learning_rate": 4.85080483345967e-05, + "loss": 2.5627, + "step": 602500 + }, + { + "epoch": 2.99, + "learning_rate": 4.8506809748170615e-05, + "loss": 2.5808, + "step": 603000 + }, + { + "epoch": 2.99, + "learning_rate": 4.8505571161744525e-05, + "loss": 2.5847, + "step": 603500 + }, + { + "epoch": 2.99, + "learning_rate": 4.850433257531844e-05, + "loss": 2.5929, + "step": 604000 + }, + { + "epoch": 2.99, + "learning_rate": 4.850309398889236e-05, + "loss": 2.5613, + "step": 604500 + }, + { + "epoch": 3.0, + "learning_rate": 4.850185787963913e-05, + "loss": 2.5579, + "step": 605000 + }, + { + "epoch": 3.0, + "learning_rate": 4.8500619293213045e-05, + "loss": 2.5798, + "step": 605500 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.6309935793243545, + "eval_accuracy_mlm": 0.583263169880887, + "eval_accuracy_nsp": 0.856231786287207, + "eval_loss": 2.503361225128174, + "eval_runtime": 145.9652, + "eval_samples_per_second": 1746.711, + "eval_steps_per_second": 72.784, + "step": 605529 + }, + { + "epoch": 3.0, + "learning_rate": 4.8499383183959814e-05, + "loss": 2.5333, + "step": 606000 + }, + { + "epoch": 3.0, + "learning_rate": 4.849814459753373e-05, + "loss": 2.5139, + "step": 606500 + }, + { + "epoch": 3.01, + "learning_rate": 4.849690601110765e-05, + "loss": 2.5247, + "step": 607000 + }, + { + "epoch": 3.01, + "learning_rate": 4.849566990185441e-05, + "loss": 2.5196, + "step": 607500 + }, + { + "epoch": 3.01, + "learning_rate": 4.8494431315428326e-05, + "loss": 2.5171, + "step": 608000 + }, + { + "epoch": 3.01, + "learning_rate": 4.849319272900224e-05, + "loss": 2.5036, + "step": 608500 + }, + { + "epoch": 3.02, + "learning_rate": 4.849195414257616e-05, + "loss": 2.5135, + "step": 609000 + }, + { + "epoch": 3.02, + "learning_rate": 4.849071555615008e-05, + "loss": 2.5322, + "step": 609500 + }, + { + "epoch": 3.02, + "learning_rate": 4.8489476969723994e-05, + "loss": 2.5542, + "step": 610000 + }, + { + "epoch": 3.02, + "learning_rate": 4.848823838329791e-05, + "loss": 2.5499, + "step": 610500 + }, + { + "epoch": 3.03, + "learning_rate": 4.848699979687183e-05, + "loss": 2.5478, + "step": 611000 + }, + { + "epoch": 3.03, + "learning_rate": 4.84857636876186e-05, + "loss": 2.5333, + "step": 611500 + }, + { + "epoch": 3.03, + "learning_rate": 4.8484525101192514e-05, + "loss": 2.552, + "step": 612000 + }, + { + "epoch": 3.03, + "learning_rate": 4.848328651476643e-05, + "loss": 2.5408, + "step": 612500 + }, + { + "epoch": 3.04, + "learning_rate": 4.848204792834035e-05, + "loss": 2.5539, + "step": 613000 + }, + { + "epoch": 3.04, + "learning_rate": 4.8480809341914264e-05, + "loss": 2.5355, + "step": 613500 + }, + { + "epoch": 3.04, + "learning_rate": 4.847957075548818e-05, + "loss": 2.5418, + "step": 614000 + }, + { + "epoch": 3.04, + "learning_rate": 4.84783321690621e-05, + "loss": 2.5373, + "step": 614500 + }, + { + "epoch": 3.05, + "learning_rate": 4.847709605980886e-05, + "loss": 2.539, + "step": 615000 + }, + { + "epoch": 3.05, + "learning_rate": 4.847585747338278e-05, + "loss": 2.5402, + "step": 615500 + }, + { + "epoch": 3.05, + "learning_rate": 4.8474618886956694e-05, + "loss": 2.5299, + "step": 616000 + }, + { + "epoch": 3.05, + "learning_rate": 4.847338030053061e-05, + "loss": 2.5361, + "step": 616500 + }, + { + "epoch": 3.06, + "learning_rate": 4.847214171410453e-05, + "loss": 2.5464, + "step": 617000 + }, + { + "epoch": 3.06, + "learning_rate": 4.8470903127678445e-05, + "loss": 2.5522, + "step": 617500 + }, + { + "epoch": 3.06, + "learning_rate": 4.846966454125236e-05, + "loss": 2.5553, + "step": 618000 + }, + { + "epoch": 3.06, + "learning_rate": 4.846842595482628e-05, + "loss": 2.5591, + "step": 618500 + }, + { + "epoch": 3.07, + "learning_rate": 4.8467187368400196e-05, + "loss": 2.54, + "step": 619000 + }, + { + "epoch": 3.07, + "learning_rate": 4.846594878197411e-05, + "loss": 2.5405, + "step": 619500 + }, + { + "epoch": 3.07, + "learning_rate": 4.846471267272088e-05, + "loss": 2.5232, + "step": 620000 + }, + { + "epoch": 3.07, + "learning_rate": 4.84634740862948e-05, + "loss": 2.5347, + "step": 620500 + }, + { + "epoch": 3.08, + "learning_rate": 4.8462235499868715e-05, + "loss": 2.528, + "step": 621000 + }, + { + "epoch": 3.08, + "learning_rate": 4.846099939061548e-05, + "loss": 2.5322, + "step": 621500 + }, + { + "epoch": 3.08, + "learning_rate": 4.8459760804189394e-05, + "loss": 2.5317, + "step": 622000 + }, + { + "epoch": 3.08, + "learning_rate": 4.845852221776331e-05, + "loss": 2.5383, + "step": 622500 + }, + { + "epoch": 3.09, + "learning_rate": 4.845728363133723e-05, + "loss": 2.5493, + "step": 623000 + }, + { + "epoch": 3.09, + "learning_rate": 4.8456045044911145e-05, + "loss": 2.538, + "step": 623500 + }, + { + "epoch": 3.09, + "learning_rate": 4.845480645848506e-05, + "loss": 2.5492, + "step": 624000 + }, + { + "epoch": 3.09, + "learning_rate": 4.845356787205898e-05, + "loss": 2.5289, + "step": 624500 + }, + { + "epoch": 3.1, + "learning_rate": 4.8452329285632896e-05, + "loss": 2.566, + "step": 625000 + }, + { + "epoch": 3.1, + "learning_rate": 4.8451093176379665e-05, + "loss": 2.5407, + "step": 625500 + }, + { + "epoch": 3.1, + "learning_rate": 4.844985458995358e-05, + "loss": 2.5053, + "step": 626000 + }, + { + "epoch": 3.1, + "learning_rate": 4.84486160035275e-05, + "loss": 2.5606, + "step": 626500 + }, + { + "epoch": 3.11, + "learning_rate": 4.844737989427427e-05, + "loss": 2.5307, + "step": 627000 + }, + { + "epoch": 3.11, + "learning_rate": 4.844614130784818e-05, + "loss": 2.5339, + "step": 627500 + }, + { + "epoch": 3.11, + "learning_rate": 4.8444902721422094e-05, + "loss": 2.5584, + "step": 628000 + }, + { + "epoch": 3.11, + "learning_rate": 4.844366413499601e-05, + "loss": 2.5237, + "step": 628500 + }, + { + "epoch": 3.12, + "learning_rate": 4.844242554856993e-05, + "loss": 2.5553, + "step": 629000 + }, + { + "epoch": 3.12, + "learning_rate": 4.84411894393167e-05, + "loss": 2.5506, + "step": 629500 + }, + { + "epoch": 3.12, + "learning_rate": 4.8439950852890614e-05, + "loss": 2.5469, + "step": 630000 + }, + { + "epoch": 3.12, + "learning_rate": 4.843871226646453e-05, + "loss": 2.551, + "step": 630500 + }, + { + "epoch": 3.13, + "learning_rate": 4.843747368003845e-05, + "loss": 2.5408, + "step": 631000 + }, + { + "epoch": 3.13, + "learning_rate": 4.8436235093612365e-05, + "loss": 2.5641, + "step": 631500 + }, + { + "epoch": 3.13, + "learning_rate": 4.843499650718628e-05, + "loss": 2.5317, + "step": 632000 + }, + { + "epoch": 3.13, + "learning_rate": 4.84337579207602e-05, + "loss": 2.528, + "step": 632500 + }, + { + "epoch": 3.14, + "learning_rate": 4.8432519334334116e-05, + "loss": 2.5114, + "step": 633000 + }, + { + "epoch": 3.14, + "learning_rate": 4.843128074790803e-05, + "loss": 2.5307, + "step": 633500 + }, + { + "epoch": 3.14, + "learning_rate": 4.843004216148195e-05, + "loss": 2.5363, + "step": 634000 + }, + { + "epoch": 3.14, + "learning_rate": 4.8428803575055866e-05, + "loss": 2.5329, + "step": 634500 + }, + { + "epoch": 3.15, + "learning_rate": 4.842756498862978e-05, + "loss": 2.5189, + "step": 635000 + }, + { + "epoch": 3.15, + "learning_rate": 4.8426328879376545e-05, + "loss": 2.5262, + "step": 635500 + }, + { + "epoch": 3.15, + "learning_rate": 4.842509029295046e-05, + "loss": 2.5593, + "step": 636000 + }, + { + "epoch": 3.15, + "learning_rate": 4.842385418369723e-05, + "loss": 2.5337, + "step": 636500 + }, + { + "epoch": 3.16, + "learning_rate": 4.842261559727115e-05, + "loss": 2.5343, + "step": 637000 + }, + { + "epoch": 3.16, + "learning_rate": 4.8421377010845065e-05, + "loss": 2.5407, + "step": 637500 + }, + { + "epoch": 3.16, + "learning_rate": 4.842013842441898e-05, + "loss": 2.5609, + "step": 638000 + }, + { + "epoch": 3.16, + "learning_rate": 4.84188998379929e-05, + "loss": 2.5219, + "step": 638500 + }, + { + "epoch": 3.17, + "learning_rate": 4.8417661251566816e-05, + "loss": 2.5684, + "step": 639000 + }, + { + "epoch": 3.17, + "learning_rate": 4.841642266514073e-05, + "loss": 2.5422, + "step": 639500 + }, + { + "epoch": 3.17, + "learning_rate": 4.841518407871465e-05, + "loss": 2.5736, + "step": 640000 + }, + { + "epoch": 3.17, + "learning_rate": 4.841394796946142e-05, + "loss": 2.5538, + "step": 640500 + }, + { + "epoch": 3.18, + "learning_rate": 4.841270938303533e-05, + "loss": 2.5404, + "step": 641000 + }, + { + "epoch": 3.18, + "learning_rate": 4.8411470796609245e-05, + "loss": 2.5472, + "step": 641500 + }, + { + "epoch": 3.18, + "learning_rate": 4.841023221018316e-05, + "loss": 2.5434, + "step": 642000 + }, + { + "epoch": 3.18, + "learning_rate": 4.840899610092993e-05, + "loss": 2.5179, + "step": 642500 + }, + { + "epoch": 3.19, + "learning_rate": 4.840775751450385e-05, + "loss": 2.5491, + "step": 643000 + }, + { + "epoch": 3.19, + "learning_rate": 4.8406518928077765e-05, + "loss": 2.5372, + "step": 643500 + }, + { + "epoch": 3.19, + "learning_rate": 4.840528034165168e-05, + "loss": 2.544, + "step": 644000 + }, + { + "epoch": 3.19, + "learning_rate": 4.84040417552256e-05, + "loss": 2.5429, + "step": 644500 + }, + { + "epoch": 3.2, + "learning_rate": 4.8402803168799516e-05, + "loss": 2.5307, + "step": 645000 + }, + { + "epoch": 3.2, + "learning_rate": 4.840156458237343e-05, + "loss": 2.553, + "step": 645500 + }, + { + "epoch": 3.2, + "learning_rate": 4.84003284731202e-05, + "loss": 2.5597, + "step": 646000 + }, + { + "epoch": 3.2, + "learning_rate": 4.839908988669412e-05, + "loss": 2.5398, + "step": 646500 + }, + { + "epoch": 3.21, + "learning_rate": 4.8397851300268035e-05, + "loss": 2.5258, + "step": 647000 + }, + { + "epoch": 3.21, + "learning_rate": 4.8396612713841946e-05, + "loss": 2.525, + "step": 647500 + }, + { + "epoch": 3.21, + "learning_rate": 4.839537412741586e-05, + "loss": 2.5487, + "step": 648000 + }, + { + "epoch": 3.21, + "learning_rate": 4.839413801816263e-05, + "loss": 2.5557, + "step": 648500 + }, + { + "epoch": 3.22, + "learning_rate": 4.839289943173655e-05, + "loss": 2.5434, + "step": 649000 + }, + { + "epoch": 3.22, + "learning_rate": 4.8391660845310465e-05, + "loss": 2.5495, + "step": 649500 + }, + { + "epoch": 3.22, + "learning_rate": 4.839042225888438e-05, + "loss": 2.5504, + "step": 650000 + }, + { + "epoch": 3.22, + "learning_rate": 4.838918614963115e-05, + "loss": 2.5101, + "step": 650500 + }, + { + "epoch": 3.23, + "learning_rate": 4.838794756320507e-05, + "loss": 2.5414, + "step": 651000 + }, + { + "epoch": 3.23, + "learning_rate": 4.8386708976778985e-05, + "loss": 2.5399, + "step": 651500 + }, + { + "epoch": 3.23, + "learning_rate": 4.83854703903529e-05, + "loss": 2.5554, + "step": 652000 + }, + { + "epoch": 3.23, + "learning_rate": 4.8384234281099664e-05, + "loss": 2.5515, + "step": 652500 + }, + { + "epoch": 3.24, + "learning_rate": 4.838299569467358e-05, + "loss": 2.5307, + "step": 653000 + }, + { + "epoch": 3.24, + "learning_rate": 4.83817571082475e-05, + "loss": 2.539, + "step": 653500 + }, + { + "epoch": 3.24, + "learning_rate": 4.8380518521821415e-05, + "loss": 2.5284, + "step": 654000 + }, + { + "epoch": 3.24, + "learning_rate": 4.837928241256819e-05, + "loss": 2.5652, + "step": 654500 + }, + { + "epoch": 3.25, + "learning_rate": 4.837804382614211e-05, + "loss": 2.5605, + "step": 655000 + }, + { + "epoch": 3.25, + "learning_rate": 4.837680523971602e-05, + "loss": 2.5348, + "step": 655500 + }, + { + "epoch": 3.25, + "learning_rate": 4.8375569130462786e-05, + "loss": 2.5298, + "step": 656000 + }, + { + "epoch": 3.25, + "learning_rate": 4.83743305440367e-05, + "loss": 2.5437, + "step": 656500 + }, + { + "epoch": 3.26, + "learning_rate": 4.837309195761062e-05, + "loss": 2.5365, + "step": 657000 + }, + { + "epoch": 3.26, + "learning_rate": 4.837185337118454e-05, + "loss": 2.5457, + "step": 657500 + }, + { + "epoch": 3.26, + "learning_rate": 4.8370614784758454e-05, + "loss": 2.5365, + "step": 658000 + }, + { + "epoch": 3.26, + "learning_rate": 4.836937619833237e-05, + "loss": 2.5297, + "step": 658500 + }, + { + "epoch": 3.26, + "learning_rate": 4.836813761190628e-05, + "loss": 2.5214, + "step": 659000 + }, + { + "epoch": 3.27, + "learning_rate": 4.83668990254802e-05, + "loss": 2.5343, + "step": 659500 + }, + { + "epoch": 3.27, + "learning_rate": 4.8365660439054115e-05, + "loss": 2.535, + "step": 660000 + }, + { + "epoch": 3.27, + "learning_rate": 4.836442432980089e-05, + "loss": 2.5523, + "step": 660500 + }, + { + "epoch": 3.27, + "learning_rate": 4.836318574337481e-05, + "loss": 2.5577, + "step": 661000 + }, + { + "epoch": 3.28, + "learning_rate": 4.8361947156948724e-05, + "loss": 2.5796, + "step": 661500 + }, + { + "epoch": 3.28, + "learning_rate": 4.8360708570522634e-05, + "loss": 2.5263, + "step": 662000 + }, + { + "epoch": 3.28, + "learning_rate": 4.835946998409655e-05, + "loss": 2.535, + "step": 662500 + }, + { + "epoch": 3.28, + "learning_rate": 4.835823139767047e-05, + "loss": 2.5555, + "step": 663000 + }, + { + "epoch": 3.29, + "learning_rate": 4.835699528841724e-05, + "loss": 2.5393, + "step": 663500 + }, + { + "epoch": 3.29, + "learning_rate": 4.8355756701991154e-05, + "loss": 2.5416, + "step": 664000 + }, + { + "epoch": 3.29, + "learning_rate": 4.835451811556507e-05, + "loss": 2.5387, + "step": 664500 + }, + { + "epoch": 3.29, + "learning_rate": 4.835327952913898e-05, + "loss": 2.5604, + "step": 665000 + }, + { + "epoch": 3.3, + "learning_rate": 4.83520409427129e-05, + "loss": 2.565, + "step": 665500 + }, + { + "epoch": 3.3, + "learning_rate": 4.8350802356286815e-05, + "loss": 2.5364, + "step": 666000 + }, + { + "epoch": 3.3, + "learning_rate": 4.834956376986073e-05, + "loss": 2.5461, + "step": 666500 + }, + { + "epoch": 3.3, + "learning_rate": 4.834832518343465e-05, + "loss": 2.5692, + "step": 667000 + }, + { + "epoch": 3.31, + "learning_rate": 4.8347089074181424e-05, + "loss": 2.5424, + "step": 667500 + }, + { + "epoch": 3.31, + "learning_rate": 4.834585296492819e-05, + "loss": 2.557, + "step": 668000 + }, + { + "epoch": 3.31, + "learning_rate": 4.834461437850211e-05, + "loss": 2.5454, + "step": 668500 + }, + { + "epoch": 3.31, + "learning_rate": 4.834337579207602e-05, + "loss": 2.5403, + "step": 669000 + }, + { + "epoch": 3.32, + "learning_rate": 4.834213720564994e-05, + "loss": 2.5301, + "step": 669500 + }, + { + "epoch": 3.32, + "learning_rate": 4.8340898619223854e-05, + "loss": 2.5211, + "step": 670000 + }, + { + "epoch": 3.32, + "learning_rate": 4.833966003279777e-05, + "loss": 2.5401, + "step": 670500 + }, + { + "epoch": 3.32, + "learning_rate": 4.833842144637169e-05, + "loss": 2.541, + "step": 671000 + }, + { + "epoch": 3.33, + "learning_rate": 4.833718533711846e-05, + "loss": 2.5565, + "step": 671500 + }, + { + "epoch": 3.33, + "learning_rate": 4.8335946750692374e-05, + "loss": 2.5404, + "step": 672000 + }, + { + "epoch": 3.33, + "learning_rate": 4.833470816426629e-05, + "loss": 2.567, + "step": 672500 + }, + { + "epoch": 3.33, + "learning_rate": 4.833346957784021e-05, + "loss": 2.5351, + "step": 673000 + }, + { + "epoch": 3.34, + "learning_rate": 4.8332230991414124e-05, + "loss": 2.5467, + "step": 673500 + }, + { + "epoch": 3.34, + "learning_rate": 4.833099240498804e-05, + "loss": 2.5485, + "step": 674000 + }, + { + "epoch": 3.34, + "learning_rate": 4.832975381856195e-05, + "loss": 2.5353, + "step": 674500 + }, + { + "epoch": 3.34, + "learning_rate": 4.832851523213587e-05, + "loss": 2.5495, + "step": 675000 + }, + { + "epoch": 3.35, + "learning_rate": 4.832728160005549e-05, + "loss": 2.5532, + "step": 675500 + }, + { + "epoch": 3.35, + "learning_rate": 4.8326043013629406e-05, + "loss": 2.5324, + "step": 676000 + }, + { + "epoch": 3.35, + "learning_rate": 4.832480442720332e-05, + "loss": 2.538, + "step": 676500 + }, + { + "epoch": 3.35, + "learning_rate": 4.832356584077724e-05, + "loss": 2.5226, + "step": 677000 + }, + { + "epoch": 3.36, + "learning_rate": 4.832232973152401e-05, + "loss": 2.5422, + "step": 677500 + }, + { + "epoch": 3.36, + "learning_rate": 4.8321091145097926e-05, + "loss": 2.5271, + "step": 678000 + }, + { + "epoch": 3.36, + "learning_rate": 4.831985255867184e-05, + "loss": 2.5118, + "step": 678500 + }, + { + "epoch": 3.36, + "learning_rate": 4.831861397224576e-05, + "loss": 2.5408, + "step": 679000 + }, + { + "epoch": 3.37, + "learning_rate": 4.831737786299252e-05, + "loss": 2.5397, + "step": 679500 + }, + { + "epoch": 3.37, + "learning_rate": 4.831613927656644e-05, + "loss": 2.5553, + "step": 680000 + }, + { + "epoch": 3.37, + "learning_rate": 4.8314900690140355e-05, + "loss": 2.5429, + "step": 680500 + }, + { + "epoch": 3.37, + "learning_rate": 4.831366210371427e-05, + "loss": 2.5535, + "step": 681000 + }, + { + "epoch": 3.38, + "learning_rate": 4.831242351728819e-05, + "loss": 2.5647, + "step": 681500 + }, + { + "epoch": 3.38, + "learning_rate": 4.8311184930862106e-05, + "loss": 2.5757, + "step": 682000 + }, + { + "epoch": 3.38, + "learning_rate": 4.830994634443602e-05, + "loss": 2.5574, + "step": 682500 + }, + { + "epoch": 3.38, + "learning_rate": 4.830870775800994e-05, + "loss": 2.53, + "step": 683000 + }, + { + "epoch": 3.39, + "learning_rate": 4.830746917158386e-05, + "loss": 2.5279, + "step": 683500 + }, + { + "epoch": 3.39, + "learning_rate": 4.8306233062330626e-05, + "loss": 2.5323, + "step": 684000 + }, + { + "epoch": 3.39, + "learning_rate": 4.830499447590454e-05, + "loss": 2.5433, + "step": 684500 + }, + { + "epoch": 3.39, + "learning_rate": 4.830375588947846e-05, + "loss": 2.5779, + "step": 685000 + }, + { + "epoch": 3.4, + "learning_rate": 4.8302517303052376e-05, + "loss": 2.5304, + "step": 685500 + }, + { + "epoch": 3.4, + "learning_rate": 4.830128119379914e-05, + "loss": 2.5579, + "step": 686000 + }, + { + "epoch": 3.4, + "learning_rate": 4.8300045084545914e-05, + "loss": 2.5429, + "step": 686500 + }, + { + "epoch": 3.4, + "learning_rate": 4.829880649811983e-05, + "loss": 2.5222, + "step": 687000 + }, + { + "epoch": 3.41, + "learning_rate": 4.829756791169374e-05, + "loss": 2.5562, + "step": 687500 + }, + { + "epoch": 3.41, + "learning_rate": 4.829632932526766e-05, + "loss": 2.579, + "step": 688000 + }, + { + "epoch": 3.41, + "learning_rate": 4.8295090738841575e-05, + "loss": 2.5623, + "step": 688500 + }, + { + "epoch": 3.41, + "learning_rate": 4.829385215241549e-05, + "loss": 2.543, + "step": 689000 + }, + { + "epoch": 3.42, + "learning_rate": 4.829261356598941e-05, + "loss": 2.5659, + "step": 689500 + }, + { + "epoch": 3.42, + "learning_rate": 4.8291374979563326e-05, + "loss": 2.5464, + "step": 690000 + }, + { + "epoch": 3.42, + "learning_rate": 4.829013639313724e-05, + "loss": 2.529, + "step": 690500 + }, + { + "epoch": 3.42, + "learning_rate": 4.828889780671116e-05, + "loss": 2.5534, + "step": 691000 + }, + { + "epoch": 3.43, + "learning_rate": 4.828765922028508e-05, + "loss": 2.5394, + "step": 691500 + }, + { + "epoch": 3.43, + "learning_rate": 4.8286420633858994e-05, + "loss": 2.5359, + "step": 692000 + }, + { + "epoch": 3.43, + "learning_rate": 4.8285184524605756e-05, + "loss": 2.5425, + "step": 692500 + }, + { + "epoch": 3.43, + "learning_rate": 4.828394593817967e-05, + "loss": 2.5602, + "step": 693000 + }, + { + "epoch": 3.44, + "learning_rate": 4.828270735175359e-05, + "loss": 2.5164, + "step": 693500 + }, + { + "epoch": 3.44, + "learning_rate": 4.8281468765327506e-05, + "loss": 2.5582, + "step": 694000 + }, + { + "epoch": 3.44, + "learning_rate": 4.8280232656074275e-05, + "loss": 2.5522, + "step": 694500 + }, + { + "epoch": 3.44, + "learning_rate": 4.827899406964819e-05, + "loss": 2.5528, + "step": 695000 + }, + { + "epoch": 3.45, + "learning_rate": 4.827775548322211e-05, + "loss": 2.5459, + "step": 695500 + }, + { + "epoch": 3.45, + "learning_rate": 4.8276516896796026e-05, + "loss": 2.5493, + "step": 696000 + }, + { + "epoch": 3.45, + "learning_rate": 4.827527831036994e-05, + "loss": 2.5516, + "step": 696500 + }, + { + "epoch": 3.45, + "learning_rate": 4.827403972394386e-05, + "loss": 2.5535, + "step": 697000 + }, + { + "epoch": 3.46, + "learning_rate": 4.827280113751778e-05, + "loss": 2.5477, + "step": 697500 + }, + { + "epoch": 3.46, + "learning_rate": 4.8271562551091694e-05, + "loss": 2.5291, + "step": 698000 + }, + { + "epoch": 3.46, + "learning_rate": 4.827032396466561e-05, + "loss": 2.5557, + "step": 698500 + }, + { + "epoch": 3.46, + "learning_rate": 4.826908537823953e-05, + "loss": 2.5559, + "step": 699000 + }, + { + "epoch": 3.47, + "learning_rate": 4.8267846791813444e-05, + "loss": 2.5445, + "step": 699500 + }, + { + "epoch": 3.47, + "learning_rate": 4.8266610682560206e-05, + "loss": 2.5345, + "step": 700000 + }, + { + "epoch": 3.47, + "learning_rate": 4.8265374573306975e-05, + "loss": 2.563, + "step": 700500 + }, + { + "epoch": 3.47, + "learning_rate": 4.826413598688089e-05, + "loss": 2.5561, + "step": 701000 + }, + { + "epoch": 3.48, + "learning_rate": 4.826289740045481e-05, + "loss": 2.5687, + "step": 701500 + }, + { + "epoch": 3.48, + "learning_rate": 4.8261658814028726e-05, + "loss": 2.544, + "step": 702000 + }, + { + "epoch": 3.48, + "learning_rate": 4.826042022760264e-05, + "loss": 2.5362, + "step": 702500 + }, + { + "epoch": 3.48, + "learning_rate": 4.825918164117656e-05, + "loss": 2.5309, + "step": 703000 + }, + { + "epoch": 3.49, + "learning_rate": 4.825794305475048e-05, + "loss": 2.5421, + "step": 703500 + }, + { + "epoch": 3.49, + "learning_rate": 4.8256704468324394e-05, + "loss": 2.5295, + "step": 704000 + }, + { + "epoch": 3.49, + "learning_rate": 4.825546588189831e-05, + "loss": 2.5354, + "step": 704500 + }, + { + "epoch": 3.49, + "learning_rate": 4.825422977264508e-05, + "loss": 2.5441, + "step": 705000 + }, + { + "epoch": 3.5, + "learning_rate": 4.8252991186218996e-05, + "loss": 2.5388, + "step": 705500 + }, + { + "epoch": 3.5, + "learning_rate": 4.8251752599792907e-05, + "loss": 2.5489, + "step": 706000 + }, + { + "epoch": 3.5, + "learning_rate": 4.8250514013366824e-05, + "loss": 2.5435, + "step": 706500 + }, + { + "epoch": 3.5, + "learning_rate": 4.824927790411359e-05, + "loss": 2.5567, + "step": 707000 + }, + { + "epoch": 3.51, + "learning_rate": 4.824803931768751e-05, + "loss": 2.5394, + "step": 707500 + }, + { + "epoch": 3.51, + "learning_rate": 4.8246800731261426e-05, + "loss": 2.5111, + "step": 708000 + }, + { + "epoch": 3.51, + "learning_rate": 4.824556214483534e-05, + "loss": 2.5513, + "step": 708500 + }, + { + "epoch": 3.51, + "learning_rate": 4.824432355840926e-05, + "loss": 2.5387, + "step": 709000 + }, + { + "epoch": 3.52, + "learning_rate": 4.824308497198318e-05, + "loss": 2.5602, + "step": 709500 + }, + { + "epoch": 3.52, + "learning_rate": 4.8241846385557094e-05, + "loss": 2.549, + "step": 710000 + }, + { + "epoch": 3.52, + "learning_rate": 4.824060779913101e-05, + "loss": 2.5252, + "step": 710500 + }, + { + "epoch": 3.52, + "learning_rate": 4.823937416705063e-05, + "loss": 2.5328, + "step": 711000 + }, + { + "epoch": 3.53, + "learning_rate": 4.82381380577974e-05, + "loss": 2.522, + "step": 711500 + }, + { + "epoch": 3.53, + "learning_rate": 4.823689947137132e-05, + "loss": 2.5387, + "step": 712000 + }, + { + "epoch": 3.53, + "learning_rate": 4.8235660884945234e-05, + "loss": 2.5568, + "step": 712500 + }, + { + "epoch": 3.53, + "learning_rate": 4.823442229851915e-05, + "loss": 2.5318, + "step": 713000 + }, + { + "epoch": 3.53, + "learning_rate": 4.823318371209307e-05, + "loss": 2.5472, + "step": 713500 + }, + { + "epoch": 3.54, + "learning_rate": 4.8231945125666985e-05, + "loss": 2.5452, + "step": 714000 + }, + { + "epoch": 3.54, + "learning_rate": 4.823070901641375e-05, + "loss": 2.5537, + "step": 714500 + }, + { + "epoch": 3.54, + "learning_rate": 4.8229470429987664e-05, + "loss": 2.5685, + "step": 715000 + }, + { + "epoch": 3.54, + "learning_rate": 4.822823184356158e-05, + "loss": 2.5505, + "step": 715500 + }, + { + "epoch": 3.55, + "learning_rate": 4.82269932571355e-05, + "loss": 2.5497, + "step": 716000 + }, + { + "epoch": 3.55, + "learning_rate": 4.8225754670709415e-05, + "loss": 2.529, + "step": 716500 + }, + { + "epoch": 3.55, + "learning_rate": 4.822451608428333e-05, + "loss": 2.5528, + "step": 717000 + }, + { + "epoch": 3.55, + "learning_rate": 4.822327749785725e-05, + "loss": 2.5036, + "step": 717500 + }, + { + "epoch": 3.56, + "learning_rate": 4.8222038911431166e-05, + "loss": 2.5104, + "step": 718000 + }, + { + "epoch": 3.56, + "learning_rate": 4.822080032500508e-05, + "loss": 2.5557, + "step": 718500 + }, + { + "epoch": 3.56, + "learning_rate": 4.821956173857899e-05, + "loss": 2.5172, + "step": 719000 + }, + { + "epoch": 3.56, + "learning_rate": 4.821832562932577e-05, + "loss": 2.5314, + "step": 719500 + }, + { + "epoch": 3.57, + "learning_rate": 4.821708952007254e-05, + "loss": 2.5631, + "step": 720000 + }, + { + "epoch": 3.57, + "learning_rate": 4.821585093364645e-05, + "loss": 2.5495, + "step": 720500 + }, + { + "epoch": 3.57, + "learning_rate": 4.8214612347220364e-05, + "loss": 2.5225, + "step": 721000 + }, + { + "epoch": 3.57, + "learning_rate": 4.821337376079428e-05, + "loss": 2.5358, + "step": 721500 + }, + { + "epoch": 3.58, + "learning_rate": 4.82121351743682e-05, + "loss": 2.5573, + "step": 722000 + }, + { + "epoch": 3.58, + "learning_rate": 4.8210896587942115e-05, + "loss": 2.5512, + "step": 722500 + }, + { + "epoch": 3.58, + "learning_rate": 4.820965800151603e-05, + "loss": 2.5373, + "step": 723000 + }, + { + "epoch": 3.58, + "learning_rate": 4.820841941508995e-05, + "loss": 2.5567, + "step": 723500 + }, + { + "epoch": 3.59, + "learning_rate": 4.8207180828663866e-05, + "loss": 2.5367, + "step": 724000 + }, + { + "epoch": 3.59, + "learning_rate": 4.820594224223778e-05, + "loss": 2.558, + "step": 724500 + }, + { + "epoch": 3.59, + "learning_rate": 4.820470613298455e-05, + "loss": 2.5263, + "step": 725000 + }, + { + "epoch": 3.59, + "learning_rate": 4.820346754655847e-05, + "loss": 2.5248, + "step": 725500 + }, + { + "epoch": 3.6, + "learning_rate": 4.8202228960132385e-05, + "loss": 2.5235, + "step": 726000 + }, + { + "epoch": 3.6, + "learning_rate": 4.82009903737063e-05, + "loss": 2.5575, + "step": 726500 + }, + { + "epoch": 3.6, + "learning_rate": 4.819975178728022e-05, + "loss": 2.5402, + "step": 727000 + }, + { + "epoch": 3.6, + "learning_rate": 4.8198513200854136e-05, + "loss": 2.5331, + "step": 727500 + }, + { + "epoch": 3.61, + "learning_rate": 4.8197274614428046e-05, + "loss": 2.5494, + "step": 728000 + }, + { + "epoch": 3.61, + "learning_rate": 4.819603602800196e-05, + "loss": 2.5456, + "step": 728500 + }, + { + "epoch": 3.61, + "learning_rate": 4.819479991874873e-05, + "loss": 2.5465, + "step": 729000 + }, + { + "epoch": 3.61, + "learning_rate": 4.819356133232265e-05, + "loss": 2.5497, + "step": 729500 + }, + { + "epoch": 3.62, + "learning_rate": 4.8192322745896566e-05, + "loss": 2.5535, + "step": 730000 + }, + { + "epoch": 3.62, + "learning_rate": 4.819108415947048e-05, + "loss": 2.5379, + "step": 730500 + }, + { + "epoch": 3.62, + "learning_rate": 4.81898455730444e-05, + "loss": 2.5503, + "step": 731000 + }, + { + "epoch": 3.62, + "learning_rate": 4.818860698661831e-05, + "loss": 2.5508, + "step": 731500 + }, + { + "epoch": 3.63, + "learning_rate": 4.818736840019223e-05, + "loss": 2.5412, + "step": 732000 + }, + { + "epoch": 3.63, + "learning_rate": 4.8186129813766144e-05, + "loss": 2.5646, + "step": 732500 + }, + { + "epoch": 3.63, + "learning_rate": 4.818489370451292e-05, + "loss": 2.554, + "step": 733000 + }, + { + "epoch": 3.63, + "learning_rate": 4.8183655118086836e-05, + "loss": 2.5507, + "step": 733500 + }, + { + "epoch": 3.64, + "learning_rate": 4.818241653166075e-05, + "loss": 2.5684, + "step": 734000 + }, + { + "epoch": 3.64, + "learning_rate": 4.818117794523466e-05, + "loss": 2.5564, + "step": 734500 + }, + { + "epoch": 3.64, + "learning_rate": 4.817993935880858e-05, + "loss": 2.5357, + "step": 735000 + }, + { + "epoch": 3.64, + "learning_rate": 4.81787057267282e-05, + "loss": 2.5358, + "step": 735500 + }, + { + "epoch": 3.65, + "learning_rate": 4.817746961747497e-05, + "loss": 2.5506, + "step": 736000 + }, + { + "epoch": 3.65, + "learning_rate": 4.8176231031048887e-05, + "loss": 2.5443, + "step": 736500 + }, + { + "epoch": 3.65, + "learning_rate": 4.8174992444622804e-05, + "loss": 2.5271, + "step": 737000 + }, + { + "epoch": 3.65, + "learning_rate": 4.817375633536957e-05, + "loss": 2.5738, + "step": 737500 + }, + { + "epoch": 3.66, + "learning_rate": 4.817251774894348e-05, + "loss": 2.5456, + "step": 738000 + }, + { + "epoch": 3.66, + "learning_rate": 4.81712791625174e-05, + "loss": 2.5431, + "step": 738500 + }, + { + "epoch": 3.66, + "learning_rate": 4.8170040576091316e-05, + "loss": 2.5176, + "step": 739000 + }, + { + "epoch": 3.66, + "learning_rate": 4.816880198966523e-05, + "loss": 2.5471, + "step": 739500 + }, + { + "epoch": 3.67, + "learning_rate": 4.816756340323915e-05, + "loss": 2.5422, + "step": 740000 + }, + { + "epoch": 3.67, + "learning_rate": 4.816632481681307e-05, + "loss": 2.5587, + "step": 740500 + }, + { + "epoch": 3.67, + "learning_rate": 4.8165086230386984e-05, + "loss": 2.5371, + "step": 741000 + }, + { + "epoch": 3.67, + "learning_rate": 4.81638476439609e-05, + "loss": 2.5467, + "step": 741500 + }, + { + "epoch": 3.68, + "learning_rate": 4.816260905753482e-05, + "loss": 2.5384, + "step": 742000 + }, + { + "epoch": 3.68, + "learning_rate": 4.8161370471108735e-05, + "loss": 2.5456, + "step": 742500 + }, + { + "epoch": 3.68, + "learning_rate": 4.816013188468265e-05, + "loss": 2.5356, + "step": 743000 + }, + { + "epoch": 3.68, + "learning_rate": 4.815889329825657e-05, + "loss": 2.5309, + "step": 743500 + }, + { + "epoch": 3.69, + "learning_rate": 4.815765718900334e-05, + "loss": 2.5272, + "step": 744000 + }, + { + "epoch": 3.69, + "learning_rate": 4.8156418602577254e-05, + "loss": 2.5665, + "step": 744500 + }, + { + "epoch": 3.69, + "learning_rate": 4.8155182493324016e-05, + "loss": 2.5413, + "step": 745000 + }, + { + "epoch": 3.69, + "learning_rate": 4.815394390689793e-05, + "loss": 2.568, + "step": 745500 + }, + { + "epoch": 3.7, + "learning_rate": 4.815270532047185e-05, + "loss": 2.5461, + "step": 746000 + }, + { + "epoch": 3.7, + "learning_rate": 4.815146673404577e-05, + "loss": 2.5391, + "step": 746500 + }, + { + "epoch": 3.7, + "learning_rate": 4.8150228147619684e-05, + "loss": 2.5244, + "step": 747000 + }, + { + "epoch": 3.7, + "learning_rate": 4.81489895611936e-05, + "loss": 2.544, + "step": 747500 + }, + { + "epoch": 3.71, + "learning_rate": 4.814775097476752e-05, + "loss": 2.5462, + "step": 748000 + }, + { + "epoch": 3.71, + "learning_rate": 4.8146512388341435e-05, + "loss": 2.5465, + "step": 748500 + }, + { + "epoch": 3.71, + "learning_rate": 4.814527380191535e-05, + "loss": 2.5428, + "step": 749000 + }, + { + "epoch": 3.71, + "learning_rate": 4.814403769266212e-05, + "loss": 2.555, + "step": 749500 + }, + { + "epoch": 3.72, + "learning_rate": 4.814280158340889e-05, + "loss": 2.563, + "step": 750000 + }, + { + "epoch": 3.72, + "learning_rate": 4.8141562996982806e-05, + "loss": 2.5506, + "step": 750500 + }, + { + "epoch": 3.72, + "learning_rate": 4.8140324410556717e-05, + "loss": 2.5451, + "step": 751000 + }, + { + "epoch": 3.72, + "learning_rate": 4.8139085824130633e-05, + "loss": 2.544, + "step": 751500 + }, + { + "epoch": 3.73, + "learning_rate": 4.813784971487741e-05, + "loss": 2.5365, + "step": 752000 + }, + { + "epoch": 3.73, + "learning_rate": 4.8136611128451326e-05, + "loss": 2.5606, + "step": 752500 + }, + { + "epoch": 3.73, + "learning_rate": 4.813537254202524e-05, + "loss": 2.5512, + "step": 753000 + }, + { + "epoch": 3.73, + "learning_rate": 4.813413395559916e-05, + "loss": 2.5219, + "step": 753500 + }, + { + "epoch": 3.74, + "learning_rate": 4.813289784634593e-05, + "loss": 2.5419, + "step": 754000 + }, + { + "epoch": 3.74, + "learning_rate": 4.813166173709269e-05, + "loss": 2.5459, + "step": 754500 + }, + { + "epoch": 3.74, + "learning_rate": 4.813042315066661e-05, + "loss": 2.5357, + "step": 755000 + }, + { + "epoch": 3.74, + "learning_rate": 4.8129184564240525e-05, + "loss": 2.5289, + "step": 755500 + }, + { + "epoch": 3.75, + "learning_rate": 4.812794597781444e-05, + "loss": 2.5534, + "step": 756000 + }, + { + "epoch": 3.75, + "learning_rate": 4.812670739138836e-05, + "loss": 2.5353, + "step": 756500 + }, + { + "epoch": 3.75, + "learning_rate": 4.8125468804962275e-05, + "loss": 2.5421, + "step": 757000 + }, + { + "epoch": 3.75, + "learning_rate": 4.8124232695709044e-05, + "loss": 2.5251, + "step": 757500 + }, + { + "epoch": 3.76, + "learning_rate": 4.812299410928296e-05, + "loss": 2.5294, + "step": 758000 + }, + { + "epoch": 3.76, + "learning_rate": 4.812175552285688e-05, + "loss": 2.5252, + "step": 758500 + }, + { + "epoch": 3.76, + "learning_rate": 4.8120516936430795e-05, + "loss": 2.5331, + "step": 759000 + }, + { + "epoch": 3.76, + "learning_rate": 4.811927835000471e-05, + "loss": 2.5705, + "step": 759500 + }, + { + "epoch": 3.77, + "learning_rate": 4.811803976357863e-05, + "loss": 2.5623, + "step": 760000 + }, + { + "epoch": 3.77, + "learning_rate": 4.8116801177152546e-05, + "loss": 2.5405, + "step": 760500 + }, + { + "epoch": 3.77, + "learning_rate": 4.811556259072646e-05, + "loss": 2.525, + "step": 761000 + }, + { + "epoch": 3.77, + "learning_rate": 4.811432400430038e-05, + "loss": 2.5315, + "step": 761500 + }, + { + "epoch": 3.78, + "learning_rate": 4.811308541787429e-05, + "loss": 2.5486, + "step": 762000 + }, + { + "epoch": 3.78, + "learning_rate": 4.811184683144821e-05, + "loss": 2.549, + "step": 762500 + }, + { + "epoch": 3.78, + "learning_rate": 4.8110608245022124e-05, + "loss": 2.5359, + "step": 763000 + }, + { + "epoch": 3.78, + "learning_rate": 4.810936965859604e-05, + "loss": 2.5533, + "step": 763500 + }, + { + "epoch": 3.79, + "learning_rate": 4.810813354934281e-05, + "loss": 2.5451, + "step": 764000 + }, + { + "epoch": 3.79, + "learning_rate": 4.8106894962916726e-05, + "loss": 2.5645, + "step": 764500 + }, + { + "epoch": 3.79, + "learning_rate": 4.810565637649064e-05, + "loss": 2.5328, + "step": 765000 + }, + { + "epoch": 3.79, + "learning_rate": 4.810441779006456e-05, + "loss": 2.538, + "step": 765500 + }, + { + "epoch": 3.8, + "learning_rate": 4.810318168081133e-05, + "loss": 2.5277, + "step": 766000 + }, + { + "epoch": 3.8, + "learning_rate": 4.8101943094385246e-05, + "loss": 2.5509, + "step": 766500 + }, + { + "epoch": 3.8, + "learning_rate": 4.810070450795916e-05, + "loss": 2.558, + "step": 767000 + }, + { + "epoch": 3.8, + "learning_rate": 4.809946592153308e-05, + "loss": 2.5482, + "step": 767500 + }, + { + "epoch": 3.8, + "learning_rate": 4.8098227335107e-05, + "loss": 2.5564, + "step": 768000 + }, + { + "epoch": 3.81, + "learning_rate": 4.809699122585376e-05, + "loss": 2.5368, + "step": 768500 + }, + { + "epoch": 3.81, + "learning_rate": 4.809575511660053e-05, + "loss": 2.5808, + "step": 769000 + }, + { + "epoch": 3.81, + "learning_rate": 4.8094516530174444e-05, + "loss": 2.5585, + "step": 769500 + }, + { + "epoch": 3.81, + "learning_rate": 4.809327794374836e-05, + "loss": 2.5754, + "step": 770000 + }, + { + "epoch": 3.82, + "learning_rate": 4.809203935732228e-05, + "loss": 2.5389, + "step": 770500 + }, + { + "epoch": 3.82, + "learning_rate": 4.8090800770896195e-05, + "loss": 2.5298, + "step": 771000 + }, + { + "epoch": 3.82, + "learning_rate": 4.808956218447011e-05, + "loss": 2.5495, + "step": 771500 + }, + { + "epoch": 3.82, + "learning_rate": 4.808832359804403e-05, + "loss": 2.5579, + "step": 772000 + }, + { + "epoch": 3.83, + "learning_rate": 4.808708748879079e-05, + "loss": 2.5362, + "step": 772500 + }, + { + "epoch": 3.83, + "learning_rate": 4.808584890236471e-05, + "loss": 2.5724, + "step": 773000 + }, + { + "epoch": 3.83, + "learning_rate": 4.8084610315938625e-05, + "loss": 2.5317, + "step": 773500 + }, + { + "epoch": 3.83, + "learning_rate": 4.808337172951254e-05, + "loss": 2.5207, + "step": 774000 + }, + { + "epoch": 3.84, + "learning_rate": 4.808213314308646e-05, + "loss": 2.5329, + "step": 774500 + }, + { + "epoch": 3.84, + "learning_rate": 4.8080894556660376e-05, + "loss": 2.5449, + "step": 775000 + }, + { + "epoch": 3.84, + "learning_rate": 4.807965597023429e-05, + "loss": 2.5359, + "step": 775500 + }, + { + "epoch": 3.84, + "learning_rate": 4.807841986098106e-05, + "loss": 2.5733, + "step": 776000 + }, + { + "epoch": 3.85, + "learning_rate": 4.807718375172783e-05, + "loss": 2.543, + "step": 776500 + }, + { + "epoch": 3.85, + "learning_rate": 4.807594516530175e-05, + "loss": 2.5573, + "step": 777000 + }, + { + "epoch": 3.85, + "learning_rate": 4.8074706578875664e-05, + "loss": 2.5554, + "step": 777500 + }, + { + "epoch": 3.85, + "learning_rate": 4.807346799244958e-05, + "loss": 2.5289, + "step": 778000 + }, + { + "epoch": 3.86, + "learning_rate": 4.807222940602349e-05, + "loss": 2.5436, + "step": 778500 + }, + { + "epoch": 3.86, + "learning_rate": 4.807099081959741e-05, + "loss": 2.5695, + "step": 779000 + }, + { + "epoch": 3.86, + "learning_rate": 4.8069752233171325e-05, + "loss": 2.5339, + "step": 779500 + }, + { + "epoch": 3.86, + "learning_rate": 4.806851364674524e-05, + "loss": 2.5585, + "step": 780000 + }, + { + "epoch": 3.87, + "learning_rate": 4.806727506031916e-05, + "loss": 2.5488, + "step": 780500 + }, + { + "epoch": 3.87, + "learning_rate": 4.8066036473893076e-05, + "loss": 2.55, + "step": 781000 + }, + { + "epoch": 3.87, + "learning_rate": 4.806479788746699e-05, + "loss": 2.524, + "step": 781500 + }, + { + "epoch": 3.87, + "learning_rate": 4.806355930104091e-05, + "loss": 2.5214, + "step": 782000 + }, + { + "epoch": 3.88, + "learning_rate": 4.806232071461483e-05, + "loss": 2.5534, + "step": 782500 + }, + { + "epoch": 3.88, + "learning_rate": 4.8061082128188744e-05, + "loss": 2.5238, + "step": 783000 + }, + { + "epoch": 3.88, + "learning_rate": 4.805984354176266e-05, + "loss": 2.5444, + "step": 783500 + }, + { + "epoch": 3.88, + "learning_rate": 4.805860495533658e-05, + "loss": 2.5237, + "step": 784000 + }, + { + "epoch": 3.89, + "learning_rate": 4.8057366368910494e-05, + "loss": 2.5258, + "step": 784500 + }, + { + "epoch": 3.89, + "learning_rate": 4.8056127782484404e-05, + "loss": 2.529, + "step": 785000 + }, + { + "epoch": 3.89, + "learning_rate": 4.805488919605832e-05, + "loss": 2.5469, + "step": 785500 + }, + { + "epoch": 3.89, + "learning_rate": 4.805365060963224e-05, + "loss": 2.5389, + "step": 786000 + }, + { + "epoch": 3.9, + "learning_rate": 4.8052414500379014e-05, + "loss": 2.5637, + "step": 786500 + }, + { + "epoch": 3.9, + "learning_rate": 4.805117591395293e-05, + "loss": 2.5438, + "step": 787000 + }, + { + "epoch": 3.9, + "learning_rate": 4.804993732752685e-05, + "loss": 2.5465, + "step": 787500 + }, + { + "epoch": 3.9, + "learning_rate": 4.804869874110076e-05, + "loss": 2.5289, + "step": 788000 + }, + { + "epoch": 3.91, + "learning_rate": 4.804746263184753e-05, + "loss": 2.5684, + "step": 788500 + }, + { + "epoch": 3.91, + "learning_rate": 4.8046226522594296e-05, + "loss": 2.5539, + "step": 789000 + }, + { + "epoch": 3.91, + "learning_rate": 4.804498793616821e-05, + "loss": 2.5358, + "step": 789500 + }, + { + "epoch": 3.91, + "learning_rate": 4.804374934974213e-05, + "loss": 2.5412, + "step": 790000 + }, + { + "epoch": 3.92, + "learning_rate": 4.8042510763316046e-05, + "loss": 2.5593, + "step": 790500 + }, + { + "epoch": 3.92, + "learning_rate": 4.804127217688996e-05, + "loss": 2.5245, + "step": 791000 + }, + { + "epoch": 3.92, + "learning_rate": 4.804003359046388e-05, + "loss": 2.5548, + "step": 791500 + }, + { + "epoch": 3.92, + "learning_rate": 4.80387950040378e-05, + "loss": 2.5387, + "step": 792000 + }, + { + "epoch": 3.93, + "learning_rate": 4.8037556417611714e-05, + "loss": 2.529, + "step": 792500 + }, + { + "epoch": 3.93, + "learning_rate": 4.8036320308358476e-05, + "loss": 2.5248, + "step": 793000 + }, + { + "epoch": 3.93, + "learning_rate": 4.803508172193239e-05, + "loss": 2.5407, + "step": 793500 + }, + { + "epoch": 3.93, + "learning_rate": 4.803384313550631e-05, + "loss": 2.5353, + "step": 794000 + }, + { + "epoch": 3.94, + "learning_rate": 4.803260454908023e-05, + "loss": 2.5249, + "step": 794500 + }, + { + "epoch": 3.94, + "learning_rate": 4.8031368439826996e-05, + "loss": 2.5362, + "step": 795000 + }, + { + "epoch": 3.94, + "learning_rate": 4.803012985340091e-05, + "loss": 2.5309, + "step": 795500 + }, + { + "epoch": 3.94, + "learning_rate": 4.802889126697483e-05, + "loss": 2.5317, + "step": 796000 + }, + { + "epoch": 3.95, + "learning_rate": 4.8027652680548746e-05, + "loss": 2.5299, + "step": 796500 + }, + { + "epoch": 3.95, + "learning_rate": 4.8026416571295515e-05, + "loss": 2.5359, + "step": 797000 + }, + { + "epoch": 3.95, + "learning_rate": 4.8025182939215136e-05, + "loss": 2.5481, + "step": 797500 + }, + { + "epoch": 3.95, + "learning_rate": 4.802394435278905e-05, + "loss": 2.5442, + "step": 798000 + }, + { + "epoch": 3.96, + "learning_rate": 4.802270576636297e-05, + "loss": 2.5507, + "step": 798500 + }, + { + "epoch": 3.96, + "learning_rate": 4.802146717993689e-05, + "loss": 2.5488, + "step": 799000 + }, + { + "epoch": 3.96, + "learning_rate": 4.8020228593510804e-05, + "loss": 2.5537, + "step": 799500 + }, + { + "epoch": 3.96, + "learning_rate": 4.801899000708472e-05, + "loss": 2.5368, + "step": 800000 + }, + { + "epoch": 3.97, + "learning_rate": 4.801775142065864e-05, + "loss": 2.5376, + "step": 800500 + }, + { + "epoch": 3.97, + "learning_rate": 4.8016512834232554e-05, + "loss": 2.5168, + "step": 801000 + }, + { + "epoch": 3.97, + "learning_rate": 4.8015276724979317e-05, + "loss": 2.5168, + "step": 801500 + }, + { + "epoch": 3.97, + "learning_rate": 4.8014038138553233e-05, + "loss": 2.5698, + "step": 802000 + }, + { + "epoch": 3.98, + "learning_rate": 4.801279955212715e-05, + "loss": 2.5456, + "step": 802500 + }, + { + "epoch": 3.98, + "learning_rate": 4.801156096570107e-05, + "loss": 2.5533, + "step": 803000 + }, + { + "epoch": 3.98, + "learning_rate": 4.8010322379274984e-05, + "loss": 2.5442, + "step": 803500 + }, + { + "epoch": 3.98, + "learning_rate": 4.80090837928489e-05, + "loss": 2.5533, + "step": 804000 + }, + { + "epoch": 3.99, + "learning_rate": 4.800784768359567e-05, + "loss": 2.5392, + "step": 804500 + }, + { + "epoch": 3.99, + "learning_rate": 4.800661157434244e-05, + "loss": 2.5661, + "step": 805000 + }, + { + "epoch": 3.99, + "learning_rate": 4.8005372987916356e-05, + "loss": 2.5666, + "step": 805500 + }, + { + "epoch": 3.99, + "learning_rate": 4.800413440149027e-05, + "loss": 2.5285, + "step": 806000 + }, + { + "epoch": 4.0, + "learning_rate": 4.800289581506418e-05, + "loss": 2.5249, + "step": 806500 + }, + { + "epoch": 4.0, + "learning_rate": 4.800165970581095e-05, + "loss": 2.5377, + "step": 807000 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6340747980022936, + "eval_accuracy_mlm": 0.5867373422775914, + "eval_accuracy_nsp": 0.8572672468906766, + "eval_loss": 2.4791698455810547, + "eval_runtime": 145.9611, + "eval_samples_per_second": 1746.76, + "eval_steps_per_second": 72.787, + "step": 807372 + }, + { + "epoch": 4.0, + "learning_rate": 4.800042111938487e-05, + "loss": 2.5294, + "step": 807500 + }, + { + "epoch": 4.0, + "learning_rate": 4.7999182532958785e-05, + "loss": 2.4976, + "step": 808000 + }, + { + "epoch": 4.01, + "learning_rate": 4.79979439465327e-05, + "loss": 2.5014, + "step": 808500 + }, + { + "epoch": 4.01, + "learning_rate": 4.799670536010662e-05, + "loss": 2.5063, + "step": 809000 + }, + { + "epoch": 4.01, + "learning_rate": 4.7995466773680536e-05, + "loss": 2.5132, + "step": 809500 + }, + { + "epoch": 4.01, + "learning_rate": 4.799422818725445e-05, + "loss": 2.5018, + "step": 810000 + }, + { + "epoch": 4.02, + "learning_rate": 4.799298960082837e-05, + "loss": 2.5117, + "step": 810500 + }, + { + "epoch": 4.02, + "learning_rate": 4.799175101440229e-05, + "loss": 2.5232, + "step": 811000 + }, + { + "epoch": 4.02, + "learning_rate": 4.7990514905149056e-05, + "loss": 2.5043, + "step": 811500 + }, + { + "epoch": 4.02, + "learning_rate": 4.798927631872297e-05, + "loss": 2.5114, + "step": 812000 + }, + { + "epoch": 4.03, + "learning_rate": 4.798803773229689e-05, + "loss": 2.5253, + "step": 812500 + }, + { + "epoch": 4.03, + "learning_rate": 4.7986799145870807e-05, + "loss": 2.5181, + "step": 813000 + }, + { + "epoch": 4.03, + "learning_rate": 4.798556055944472e-05, + "loss": 2.5291, + "step": 813500 + }, + { + "epoch": 4.03, + "learning_rate": 4.7984321973018634e-05, + "loss": 2.499, + "step": 814000 + }, + { + "epoch": 4.04, + "learning_rate": 4.798308338659255e-05, + "loss": 2.5222, + "step": 814500 + }, + { + "epoch": 4.04, + "learning_rate": 4.798184480016647e-05, + "loss": 2.5236, + "step": 815000 + }, + { + "epoch": 4.04, + "learning_rate": 4.7980606213740384e-05, + "loss": 2.5314, + "step": 815500 + }, + { + "epoch": 4.04, + "learning_rate": 4.797937010448715e-05, + "loss": 2.5234, + "step": 816000 + }, + { + "epoch": 4.05, + "learning_rate": 4.797813151806107e-05, + "loss": 2.4951, + "step": 816500 + }, + { + "epoch": 4.05, + "learning_rate": 4.797689293163499e-05, + "loss": 2.529, + "step": 817000 + }, + { + "epoch": 4.05, + "learning_rate": 4.7975654345208904e-05, + "loss": 2.5055, + "step": 817500 + }, + { + "epoch": 4.05, + "learning_rate": 4.797441575878282e-05, + "loss": 2.4981, + "step": 818000 + }, + { + "epoch": 4.06, + "learning_rate": 4.797317964952959e-05, + "loss": 2.512, + "step": 818500 + }, + { + "epoch": 4.06, + "learning_rate": 4.797194106310351e-05, + "loss": 2.515, + "step": 819000 + }, + { + "epoch": 4.06, + "learning_rate": 4.7970702476677424e-05, + "loss": 2.4995, + "step": 819500 + }, + { + "epoch": 4.06, + "learning_rate": 4.7969463890251334e-05, + "loss": 2.5222, + "step": 820000 + }, + { + "epoch": 4.07, + "learning_rate": 4.79682277809981e-05, + "loss": 2.5096, + "step": 820500 + }, + { + "epoch": 4.07, + "learning_rate": 4.796698919457202e-05, + "loss": 2.5198, + "step": 821000 + }, + { + "epoch": 4.07, + "learning_rate": 4.796575308531879e-05, + "loss": 2.5011, + "step": 821500 + }, + { + "epoch": 4.07, + "learning_rate": 4.7964514498892705e-05, + "loss": 2.5134, + "step": 822000 + }, + { + "epoch": 4.07, + "learning_rate": 4.796327591246662e-05, + "loss": 2.486, + "step": 822500 + }, + { + "epoch": 4.08, + "learning_rate": 4.796203732604054e-05, + "loss": 2.5452, + "step": 823000 + }, + { + "epoch": 4.08, + "learning_rate": 4.7960798739614456e-05, + "loss": 2.5249, + "step": 823500 + }, + { + "epoch": 4.08, + "learning_rate": 4.795956015318837e-05, + "loss": 2.5277, + "step": 824000 + }, + { + "epoch": 4.08, + "learning_rate": 4.795832156676229e-05, + "loss": 2.517, + "step": 824500 + }, + { + "epoch": 4.09, + "learning_rate": 4.795708298033621e-05, + "loss": 2.5276, + "step": 825000 + }, + { + "epoch": 4.09, + "learning_rate": 4.7955844393910124e-05, + "loss": 2.5128, + "step": 825500 + }, + { + "epoch": 4.09, + "learning_rate": 4.7954610761829744e-05, + "loss": 2.5389, + "step": 826000 + }, + { + "epoch": 4.09, + "learning_rate": 4.795337217540366e-05, + "loss": 2.5206, + "step": 826500 + }, + { + "epoch": 4.1, + "learning_rate": 4.795213358897757e-05, + "loss": 2.5004, + "step": 827000 + }, + { + "epoch": 4.1, + "learning_rate": 4.795089747972435e-05, + "loss": 2.5165, + "step": 827500 + }, + { + "epoch": 4.1, + "learning_rate": 4.794965889329826e-05, + "loss": 2.5113, + "step": 828000 + }, + { + "epoch": 4.1, + "learning_rate": 4.7948420306872174e-05, + "loss": 2.4997, + "step": 828500 + }, + { + "epoch": 4.11, + "learning_rate": 4.794718172044609e-05, + "loss": 2.5323, + "step": 829000 + }, + { + "epoch": 4.11, + "learning_rate": 4.794594313402001e-05, + "loss": 2.5239, + "step": 829500 + }, + { + "epoch": 4.11, + "learning_rate": 4.794470454759392e-05, + "loss": 2.5154, + "step": 830000 + }, + { + "epoch": 4.11, + "learning_rate": 4.7943465961167835e-05, + "loss": 2.5174, + "step": 830500 + }, + { + "epoch": 4.12, + "learning_rate": 4.794222737474175e-05, + "loss": 2.5047, + "step": 831000 + }, + { + "epoch": 4.12, + "learning_rate": 4.794098878831567e-05, + "loss": 2.5309, + "step": 831500 + }, + { + "epoch": 4.12, + "learning_rate": 4.7939750201889586e-05, + "loss": 2.5058, + "step": 832000 + }, + { + "epoch": 4.12, + "learning_rate": 4.793851409263636e-05, + "loss": 2.5134, + "step": 832500 + }, + { + "epoch": 4.13, + "learning_rate": 4.793727550621028e-05, + "loss": 2.5142, + "step": 833000 + }, + { + "epoch": 4.13, + "learning_rate": 4.793603691978419e-05, + "loss": 2.5138, + "step": 833500 + }, + { + "epoch": 4.13, + "learning_rate": 4.7934798333358106e-05, + "loss": 2.5339, + "step": 834000 + }, + { + "epoch": 4.13, + "learning_rate": 4.793355974693202e-05, + "loss": 2.5123, + "step": 834500 + }, + { + "epoch": 4.14, + "learning_rate": 4.793232363767879e-05, + "loss": 2.5296, + "step": 835000 + }, + { + "epoch": 4.14, + "learning_rate": 4.793108752842556e-05, + "loss": 2.5339, + "step": 835500 + }, + { + "epoch": 4.14, + "learning_rate": 4.792984894199948e-05, + "loss": 2.538, + "step": 836000 + }, + { + "epoch": 4.14, + "learning_rate": 4.7928610355573394e-05, + "loss": 2.5208, + "step": 836500 + }, + { + "epoch": 4.15, + "learning_rate": 4.792737176914731e-05, + "loss": 2.4823, + "step": 837000 + }, + { + "epoch": 4.15, + "learning_rate": 4.792613318272123e-05, + "loss": 2.5293, + "step": 837500 + }, + { + "epoch": 4.15, + "learning_rate": 4.7924894596295145e-05, + "loss": 2.5136, + "step": 838000 + }, + { + "epoch": 4.15, + "learning_rate": 4.792365600986906e-05, + "loss": 2.497, + "step": 838500 + }, + { + "epoch": 4.16, + "learning_rate": 4.792241742344298e-05, + "loss": 2.5301, + "step": 839000 + }, + { + "epoch": 4.16, + "learning_rate": 4.792117883701689e-05, + "loss": 2.5154, + "step": 839500 + }, + { + "epoch": 4.16, + "learning_rate": 4.7919940250590806e-05, + "loss": 2.5086, + "step": 840000 + }, + { + "epoch": 4.16, + "learning_rate": 4.791870166416472e-05, + "loss": 2.506, + "step": 840500 + }, + { + "epoch": 4.17, + "learning_rate": 4.791746307773864e-05, + "loss": 2.5198, + "step": 841000 + }, + { + "epoch": 4.17, + "learning_rate": 4.7916224491312556e-05, + "loss": 2.5042, + "step": 841500 + }, + { + "epoch": 4.17, + "learning_rate": 4.791499085923218e-05, + "loss": 2.5122, + "step": 842000 + }, + { + "epoch": 4.17, + "learning_rate": 4.7913752272806094e-05, + "loss": 2.5176, + "step": 842500 + }, + { + "epoch": 4.18, + "learning_rate": 4.791251368638001e-05, + "loss": 2.5354, + "step": 843000 + }, + { + "epoch": 4.18, + "learning_rate": 4.791127509995393e-05, + "loss": 2.515, + "step": 843500 + }, + { + "epoch": 4.18, + "learning_rate": 4.7910036513527845e-05, + "loss": 2.5099, + "step": 844000 + }, + { + "epoch": 4.18, + "learning_rate": 4.790879792710176e-05, + "loss": 2.5267, + "step": 844500 + }, + { + "epoch": 4.19, + "learning_rate": 4.790755934067568e-05, + "loss": 2.5115, + "step": 845000 + }, + { + "epoch": 4.19, + "learning_rate": 4.7906320754249596e-05, + "loss": 2.5051, + "step": 845500 + }, + { + "epoch": 4.19, + "learning_rate": 4.7905082167823506e-05, + "loss": 2.5294, + "step": 846000 + }, + { + "epoch": 4.19, + "learning_rate": 4.790384605857028e-05, + "loss": 2.5381, + "step": 846500 + }, + { + "epoch": 4.2, + "learning_rate": 4.79026074721442e-05, + "loss": 2.509, + "step": 847000 + }, + { + "epoch": 4.2, + "learning_rate": 4.7901368885718115e-05, + "loss": 2.5301, + "step": 847500 + }, + { + "epoch": 4.2, + "learning_rate": 4.7900130299292025e-05, + "loss": 2.5011, + "step": 848000 + }, + { + "epoch": 4.2, + "learning_rate": 4.789889171286594e-05, + "loss": 2.5179, + "step": 848500 + }, + { + "epoch": 4.21, + "learning_rate": 4.789765312643986e-05, + "loss": 2.5437, + "step": 849000 + }, + { + "epoch": 4.21, + "learning_rate": 4.789641701718663e-05, + "loss": 2.5317, + "step": 849500 + }, + { + "epoch": 4.21, + "learning_rate": 4.78951809079334e-05, + "loss": 2.5221, + "step": 850000 + }, + { + "epoch": 4.21, + "learning_rate": 4.7893942321507314e-05, + "loss": 2.5152, + "step": 850500 + }, + { + "epoch": 4.22, + "learning_rate": 4.789270621225408e-05, + "loss": 2.5534, + "step": 851000 + }, + { + "epoch": 4.22, + "learning_rate": 4.789146762582799e-05, + "loss": 2.5463, + "step": 851500 + }, + { + "epoch": 4.22, + "learning_rate": 4.789022903940191e-05, + "loss": 2.528, + "step": 852000 + }, + { + "epoch": 4.22, + "learning_rate": 4.7888990452975827e-05, + "loss": 2.5066, + "step": 852500 + }, + { + "epoch": 4.23, + "learning_rate": 4.7887751866549744e-05, + "loss": 2.5129, + "step": 853000 + }, + { + "epoch": 4.23, + "learning_rate": 4.788651328012366e-05, + "loss": 2.5264, + "step": 853500 + }, + { + "epoch": 4.23, + "learning_rate": 4.788527469369758e-05, + "loss": 2.5151, + "step": 854000 + }, + { + "epoch": 4.23, + "learning_rate": 4.7884036107271494e-05, + "loss": 2.5214, + "step": 854500 + }, + { + "epoch": 4.24, + "learning_rate": 4.788279752084541e-05, + "loss": 2.5097, + "step": 855000 + }, + { + "epoch": 4.24, + "learning_rate": 4.788155893441933e-05, + "loss": 2.5109, + "step": 855500 + }, + { + "epoch": 4.24, + "learning_rate": 4.7880320347993245e-05, + "loss": 2.522, + "step": 856000 + }, + { + "epoch": 4.24, + "learning_rate": 4.7879084238740014e-05, + "loss": 2.5233, + "step": 856500 + }, + { + "epoch": 4.25, + "learning_rate": 4.787784565231393e-05, + "loss": 2.5345, + "step": 857000 + }, + { + "epoch": 4.25, + "learning_rate": 4.787660706588785e-05, + "loss": 2.5216, + "step": 857500 + }, + { + "epoch": 4.25, + "learning_rate": 4.7875368479461765e-05, + "loss": 2.565, + "step": 858000 + }, + { + "epoch": 4.25, + "learning_rate": 4.787412989303568e-05, + "loss": 2.5278, + "step": 858500 + }, + { + "epoch": 4.26, + "learning_rate": 4.7872893783782444e-05, + "loss": 2.5257, + "step": 859000 + }, + { + "epoch": 4.26, + "learning_rate": 4.787165519735636e-05, + "loss": 2.5161, + "step": 859500 + }, + { + "epoch": 4.26, + "learning_rate": 4.787041661093028e-05, + "loss": 2.5183, + "step": 860000 + }, + { + "epoch": 4.26, + "learning_rate": 4.7869178024504194e-05, + "loss": 2.52, + "step": 860500 + }, + { + "epoch": 4.27, + "learning_rate": 4.786793943807811e-05, + "loss": 2.5174, + "step": 861000 + }, + { + "epoch": 4.27, + "learning_rate": 4.786670085165203e-05, + "loss": 2.5502, + "step": 861500 + }, + { + "epoch": 4.27, + "learning_rate": 4.78654647423988e-05, + "loss": 2.5172, + "step": 862000 + }, + { + "epoch": 4.27, + "learning_rate": 4.7864226155972714e-05, + "loss": 2.5307, + "step": 862500 + }, + { + "epoch": 4.28, + "learning_rate": 4.786298756954663e-05, + "loss": 2.5203, + "step": 863000 + }, + { + "epoch": 4.28, + "learning_rate": 4.786174898312055e-05, + "loss": 2.528, + "step": 863500 + }, + { + "epoch": 4.28, + "learning_rate": 4.7860510396694465e-05, + "loss": 2.5293, + "step": 864000 + }, + { + "epoch": 4.28, + "learning_rate": 4.785927181026838e-05, + "loss": 2.5237, + "step": 864500 + }, + { + "epoch": 4.29, + "learning_rate": 4.78580332238423e-05, + "loss": 2.5579, + "step": 865000 + }, + { + "epoch": 4.29, + "learning_rate": 4.7856794637416216e-05, + "loss": 2.5067, + "step": 865500 + }, + { + "epoch": 4.29, + "learning_rate": 4.785555605099013e-05, + "loss": 2.5152, + "step": 866000 + }, + { + "epoch": 4.29, + "learning_rate": 4.785431746456405e-05, + "loss": 2.5302, + "step": 866500 + }, + { + "epoch": 4.3, + "learning_rate": 4.785308135531081e-05, + "loss": 2.5225, + "step": 867000 + }, + { + "epoch": 4.3, + "learning_rate": 4.785184524605758e-05, + "loss": 2.507, + "step": 867500 + }, + { + "epoch": 4.3, + "learning_rate": 4.78506066596315e-05, + "loss": 2.5427, + "step": 868000 + }, + { + "epoch": 4.3, + "learning_rate": 4.7849370550378266e-05, + "loss": 2.508, + "step": 868500 + }, + { + "epoch": 4.31, + "learning_rate": 4.784813196395218e-05, + "loss": 2.5365, + "step": 869000 + }, + { + "epoch": 4.31, + "learning_rate": 4.78468933775261e-05, + "loss": 2.5121, + "step": 869500 + }, + { + "epoch": 4.31, + "learning_rate": 4.784565479110002e-05, + "loss": 2.519, + "step": 870000 + }, + { + "epoch": 4.31, + "learning_rate": 4.7844416204673934e-05, + "loss": 2.5295, + "step": 870500 + }, + { + "epoch": 4.32, + "learning_rate": 4.784317761824785e-05, + "loss": 2.5398, + "step": 871000 + }, + { + "epoch": 4.32, + "learning_rate": 4.784193903182176e-05, + "loss": 2.5191, + "step": 871500 + }, + { + "epoch": 4.32, + "learning_rate": 4.784070044539568e-05, + "loss": 2.523, + "step": 872000 + }, + { + "epoch": 4.32, + "learning_rate": 4.7839461858969595e-05, + "loss": 2.5589, + "step": 872500 + }, + { + "epoch": 4.33, + "learning_rate": 4.783822327254351e-05, + "loss": 2.5345, + "step": 873000 + }, + { + "epoch": 4.33, + "learning_rate": 4.783698468611743e-05, + "loss": 2.5171, + "step": 873500 + }, + { + "epoch": 4.33, + "learning_rate": 4.7835746099691345e-05, + "loss": 2.5048, + "step": 874000 + }, + { + "epoch": 4.33, + "learning_rate": 4.783450751326526e-05, + "loss": 2.5152, + "step": 874500 + }, + { + "epoch": 4.34, + "learning_rate": 4.783327140401203e-05, + "loss": 2.5189, + "step": 875000 + }, + { + "epoch": 4.34, + "learning_rate": 4.783203281758595e-05, + "loss": 2.506, + "step": 875500 + }, + { + "epoch": 4.34, + "learning_rate": 4.7830794231159865e-05, + "loss": 2.5281, + "step": 876000 + }, + { + "epoch": 4.34, + "learning_rate": 4.782955564473378e-05, + "loss": 2.5282, + "step": 876500 + }, + { + "epoch": 4.34, + "learning_rate": 4.782831953548055e-05, + "loss": 2.4956, + "step": 877000 + }, + { + "epoch": 4.35, + "learning_rate": 4.782708094905447e-05, + "loss": 2.5356, + "step": 877500 + }, + { + "epoch": 4.35, + "learning_rate": 4.7825842362628385e-05, + "loss": 2.5127, + "step": 878000 + }, + { + "epoch": 4.35, + "learning_rate": 4.7824603776202295e-05, + "loss": 2.536, + "step": 878500 + }, + { + "epoch": 4.35, + "learning_rate": 4.782336518977621e-05, + "loss": 2.5432, + "step": 879000 + }, + { + "epoch": 4.36, + "learning_rate": 4.782212660335013e-05, + "loss": 2.5164, + "step": 879500 + }, + { + "epoch": 4.36, + "learning_rate": 4.78208904940969e-05, + "loss": 2.5096, + "step": 880000 + }, + { + "epoch": 4.36, + "learning_rate": 4.7819651907670814e-05, + "loss": 2.5279, + "step": 880500 + }, + { + "epoch": 4.36, + "learning_rate": 4.781841332124473e-05, + "loss": 2.5284, + "step": 881000 + }, + { + "epoch": 4.37, + "learning_rate": 4.781717473481865e-05, + "loss": 2.496, + "step": 881500 + }, + { + "epoch": 4.37, + "learning_rate": 4.7815936148392565e-05, + "loss": 2.5169, + "step": 882000 + }, + { + "epoch": 4.37, + "learning_rate": 4.781469756196648e-05, + "loss": 2.5015, + "step": 882500 + }, + { + "epoch": 4.37, + "learning_rate": 4.781346145271325e-05, + "loss": 2.5389, + "step": 883000 + }, + { + "epoch": 4.38, + "learning_rate": 4.781222286628717e-05, + "loss": 2.5235, + "step": 883500 + }, + { + "epoch": 4.38, + "learning_rate": 4.7810984279861085e-05, + "loss": 2.5245, + "step": 884000 + }, + { + "epoch": 4.38, + "learning_rate": 4.780974817060785e-05, + "loss": 2.5214, + "step": 884500 + }, + { + "epoch": 4.38, + "learning_rate": 4.780851206135462e-05, + "loss": 2.5422, + "step": 885000 + }, + { + "epoch": 4.39, + "learning_rate": 4.780727347492854e-05, + "loss": 2.5327, + "step": 885500 + }, + { + "epoch": 4.39, + "learning_rate": 4.7806034888502456e-05, + "loss": 2.5164, + "step": 886000 + }, + { + "epoch": 4.39, + "learning_rate": 4.780479630207637e-05, + "loss": 2.5537, + "step": 886500 + }, + { + "epoch": 4.39, + "learning_rate": 4.780355771565028e-05, + "loss": 2.513, + "step": 887000 + }, + { + "epoch": 4.4, + "learning_rate": 4.78023191292242e-05, + "loss": 2.5196, + "step": 887500 + }, + { + "epoch": 4.4, + "learning_rate": 4.780108054279812e-05, + "loss": 2.5014, + "step": 888000 + }, + { + "epoch": 4.4, + "learning_rate": 4.7799841956372034e-05, + "loss": 2.5319, + "step": 888500 + }, + { + "epoch": 4.4, + "learning_rate": 4.779860336994595e-05, + "loss": 2.5317, + "step": 889000 + }, + { + "epoch": 4.41, + "learning_rate": 4.779736478351987e-05, + "loss": 2.5447, + "step": 889500 + }, + { + "epoch": 4.41, + "learning_rate": 4.7796126197093785e-05, + "loss": 2.513, + "step": 890000 + }, + { + "epoch": 4.41, + "learning_rate": 4.77948876106677e-05, + "loss": 2.5189, + "step": 890500 + }, + { + "epoch": 4.41, + "learning_rate": 4.7793651501414464e-05, + "loss": 2.5308, + "step": 891000 + }, + { + "epoch": 4.42, + "learning_rate": 4.779241291498838e-05, + "loss": 2.5264, + "step": 891500 + }, + { + "epoch": 4.42, + "learning_rate": 4.77911743285623e-05, + "loss": 2.5147, + "step": 892000 + }, + { + "epoch": 4.42, + "learning_rate": 4.7789935742136215e-05, + "loss": 2.5411, + "step": 892500 + }, + { + "epoch": 4.42, + "learning_rate": 4.7788702110055835e-05, + "loss": 2.496, + "step": 893000 + }, + { + "epoch": 4.43, + "learning_rate": 4.778746352362975e-05, + "loss": 2.5043, + "step": 893500 + }, + { + "epoch": 4.43, + "learning_rate": 4.778622493720367e-05, + "loss": 2.51, + "step": 894000 + }, + { + "epoch": 4.43, + "learning_rate": 4.7784986350777586e-05, + "loss": 2.518, + "step": 894500 + }, + { + "epoch": 4.43, + "learning_rate": 4.77837477643515e-05, + "loss": 2.5077, + "step": 895000 + }, + { + "epoch": 4.44, + "learning_rate": 4.778250917792542e-05, + "loss": 2.512, + "step": 895500 + }, + { + "epoch": 4.44, + "learning_rate": 4.778127306867219e-05, + "loss": 2.5042, + "step": 896000 + }, + { + "epoch": 4.44, + "learning_rate": 4.7780034482246106e-05, + "loss": 2.5105, + "step": 896500 + }, + { + "epoch": 4.44, + "learning_rate": 4.777879589582002e-05, + "loss": 2.5393, + "step": 897000 + }, + { + "epoch": 4.45, + "learning_rate": 4.777755730939394e-05, + "loss": 2.5374, + "step": 897500 + }, + { + "epoch": 4.45, + "learning_rate": 4.7776318722967856e-05, + "loss": 2.4974, + "step": 898000 + }, + { + "epoch": 4.45, + "learning_rate": 4.7775080136541773e-05, + "loss": 2.5217, + "step": 898500 + }, + { + "epoch": 4.45, + "learning_rate": 4.777384155011569e-05, + "loss": 2.5425, + "step": 899000 + }, + { + "epoch": 4.46, + "learning_rate": 4.77726029636896e-05, + "loss": 2.5461, + "step": 899500 + }, + { + "epoch": 4.46, + "learning_rate": 4.777136933160922e-05, + "loss": 2.5432, + "step": 900000 + }, + { + "epoch": 4.46, + "learning_rate": 4.777013074518314e-05, + "loss": 2.5105, + "step": 900500 + }, + { + "epoch": 4.46, + "learning_rate": 4.7768892158757055e-05, + "loss": 2.5198, + "step": 901000 + }, + { + "epoch": 4.47, + "learning_rate": 4.7767656049503824e-05, + "loss": 2.5226, + "step": 901500 + }, + { + "epoch": 4.47, + "learning_rate": 4.776641746307774e-05, + "loss": 2.5397, + "step": 902000 + }, + { + "epoch": 4.47, + "learning_rate": 4.776517887665166e-05, + "loss": 2.5173, + "step": 902500 + }, + { + "epoch": 4.47, + "learning_rate": 4.7763940290225575e-05, + "loss": 2.5123, + "step": 903000 + }, + { + "epoch": 4.48, + "learning_rate": 4.776270170379949e-05, + "loss": 2.5164, + "step": 903500 + }, + { + "epoch": 4.48, + "learning_rate": 4.776146311737341e-05, + "loss": 2.5277, + "step": 904000 + }, + { + "epoch": 4.48, + "learning_rate": 4.7760224530947325e-05, + "loss": 2.5085, + "step": 904500 + }, + { + "epoch": 4.48, + "learning_rate": 4.775898594452124e-05, + "loss": 2.5297, + "step": 905000 + }, + { + "epoch": 4.49, + "learning_rate": 4.775774735809516e-05, + "loss": 2.5393, + "step": 905500 + }, + { + "epoch": 4.49, + "learning_rate": 4.775651124884192e-05, + "loss": 2.5383, + "step": 906000 + }, + { + "epoch": 4.49, + "learning_rate": 4.775527266241584e-05, + "loss": 2.5199, + "step": 906500 + }, + { + "epoch": 4.49, + "learning_rate": 4.7754034075989755e-05, + "loss": 2.5323, + "step": 907000 + }, + { + "epoch": 4.5, + "learning_rate": 4.775279548956367e-05, + "loss": 2.5211, + "step": 907500 + }, + { + "epoch": 4.5, + "learning_rate": 4.775155690313759e-05, + "loss": 2.5245, + "step": 908000 + }, + { + "epoch": 4.5, + "learning_rate": 4.7750318316711506e-05, + "loss": 2.5016, + "step": 908500 + }, + { + "epoch": 4.5, + "learning_rate": 4.774907973028542e-05, + "loss": 2.5191, + "step": 909000 + }, + { + "epoch": 4.51, + "learning_rate": 4.774784114385934e-05, + "loss": 2.5471, + "step": 909500 + }, + { + "epoch": 4.51, + "learning_rate": 4.774660255743326e-05, + "loss": 2.5372, + "step": 910000 + }, + { + "epoch": 4.51, + "learning_rate": 4.7745363971007174e-05, + "loss": 2.52, + "step": 910500 + }, + { + "epoch": 4.51, + "learning_rate": 4.774412538458109e-05, + "loss": 2.512, + "step": 911000 + }, + { + "epoch": 4.52, + "learning_rate": 4.774288679815501e-05, + "loss": 2.5325, + "step": 911500 + }, + { + "epoch": 4.52, + "learning_rate": 4.774164821172892e-05, + "loss": 2.5194, + "step": 912000 + }, + { + "epoch": 4.52, + "learning_rate": 4.7740409625302835e-05, + "loss": 2.5473, + "step": 912500 + }, + { + "epoch": 4.52, + "learning_rate": 4.773917103887675e-05, + "loss": 2.5345, + "step": 913000 + }, + { + "epoch": 4.53, + "learning_rate": 4.773793245245067e-05, + "loss": 2.5285, + "step": 913500 + }, + { + "epoch": 4.53, + "learning_rate": 4.7736693866024585e-05, + "loss": 2.5442, + "step": 914000 + }, + { + "epoch": 4.53, + "learning_rate": 4.7735457756771354e-05, + "loss": 2.5358, + "step": 914500 + }, + { + "epoch": 4.53, + "learning_rate": 4.773421917034527e-05, + "loss": 2.5193, + "step": 915000 + }, + { + "epoch": 4.54, + "learning_rate": 4.773298306109204e-05, + "loss": 2.5213, + "step": 915500 + }, + { + "epoch": 4.54, + "learning_rate": 4.773174447466596e-05, + "loss": 2.5143, + "step": 916000 + }, + { + "epoch": 4.54, + "learning_rate": 4.7730505888239874e-05, + "loss": 2.507, + "step": 916500 + }, + { + "epoch": 4.54, + "learning_rate": 4.772926977898664e-05, + "loss": 2.5088, + "step": 917000 + }, + { + "epoch": 4.55, + "learning_rate": 4.772803119256056e-05, + "loss": 2.538, + "step": 917500 + }, + { + "epoch": 4.55, + "learning_rate": 4.7726792606134476e-05, + "loss": 2.5344, + "step": 918000 + }, + { + "epoch": 4.55, + "learning_rate": 4.7725554019708393e-05, + "loss": 2.5217, + "step": 918500 + }, + { + "epoch": 4.55, + "learning_rate": 4.772431543328231e-05, + "loss": 2.5035, + "step": 919000 + }, + { + "epoch": 4.56, + "learning_rate": 4.772307684685623e-05, + "loss": 2.5022, + "step": 919500 + }, + { + "epoch": 4.56, + "learning_rate": 4.772183826043014e-05, + "loss": 2.535, + "step": 920000 + }, + { + "epoch": 4.56, + "learning_rate": 4.7720599674004054e-05, + "loss": 2.481, + "step": 920500 + }, + { + "epoch": 4.56, + "learning_rate": 4.771936356475082e-05, + "loss": 2.5258, + "step": 921000 + }, + { + "epoch": 4.57, + "learning_rate": 4.771812497832474e-05, + "loss": 2.5533, + "step": 921500 + }, + { + "epoch": 4.57, + "learning_rate": 4.771688639189866e-05, + "loss": 2.5407, + "step": 922000 + }, + { + "epoch": 4.57, + "learning_rate": 4.7715647805472574e-05, + "loss": 2.5222, + "step": 922500 + }, + { + "epoch": 4.57, + "learning_rate": 4.771440921904649e-05, + "loss": 2.5202, + "step": 923000 + }, + { + "epoch": 4.58, + "learning_rate": 4.7713178064138963e-05, + "loss": 2.5326, + "step": 923500 + }, + { + "epoch": 4.58, + "learning_rate": 4.771193947771288e-05, + "loss": 2.5148, + "step": 924000 + }, + { + "epoch": 4.58, + "learning_rate": 4.77107008912868e-05, + "loss": 2.5138, + "step": 924500 + }, + { + "epoch": 4.58, + "learning_rate": 4.770946230486071e-05, + "loss": 2.4793, + "step": 925000 + }, + { + "epoch": 4.59, + "learning_rate": 4.7708223718434624e-05, + "loss": 2.5133, + "step": 925500 + }, + { + "epoch": 4.59, + "learning_rate": 4.770698513200854e-05, + "loss": 2.5307, + "step": 926000 + }, + { + "epoch": 4.59, + "learning_rate": 4.770574902275532e-05, + "loss": 2.5068, + "step": 926500 + }, + { + "epoch": 4.59, + "learning_rate": 4.7704510436329234e-05, + "loss": 2.528, + "step": 927000 + }, + { + "epoch": 4.6, + "learning_rate": 4.7703271849903144e-05, + "loss": 2.5122, + "step": 927500 + }, + { + "epoch": 4.6, + "learning_rate": 4.770203326347706e-05, + "loss": 2.5372, + "step": 928000 + }, + { + "epoch": 4.6, + "learning_rate": 4.770079467705098e-05, + "loss": 2.5282, + "step": 928500 + }, + { + "epoch": 4.6, + "learning_rate": 4.7699556090624895e-05, + "loss": 2.5162, + "step": 929000 + }, + { + "epoch": 4.61, + "learning_rate": 4.769831750419881e-05, + "loss": 2.5279, + "step": 929500 + }, + { + "epoch": 4.61, + "learning_rate": 4.769707891777272e-05, + "loss": 2.5302, + "step": 930000 + }, + { + "epoch": 4.61, + "learning_rate": 4.769584033134664e-05, + "loss": 2.5265, + "step": 930500 + }, + { + "epoch": 4.61, + "learning_rate": 4.7694601744920556e-05, + "loss": 2.5167, + "step": 931000 + }, + { + "epoch": 4.61, + "learning_rate": 4.769336315849447e-05, + "loss": 2.5033, + "step": 931500 + }, + { + "epoch": 4.62, + "learning_rate": 4.769212457206839e-05, + "loss": 2.5239, + "step": 932000 + }, + { + "epoch": 4.62, + "learning_rate": 4.769088846281516e-05, + "loss": 2.5485, + "step": 932500 + }, + { + "epoch": 4.62, + "learning_rate": 4.7689649876389075e-05, + "loss": 2.5165, + "step": 933000 + }, + { + "epoch": 4.62, + "learning_rate": 4.768841376713585e-05, + "loss": 2.5295, + "step": 933500 + }, + { + "epoch": 4.63, + "learning_rate": 4.768717518070977e-05, + "loss": 2.5282, + "step": 934000 + }, + { + "epoch": 4.63, + "learning_rate": 4.768593659428368e-05, + "loss": 2.505, + "step": 934500 + }, + { + "epoch": 4.63, + "learning_rate": 4.768470048503045e-05, + "loss": 2.5145, + "step": 935000 + }, + { + "epoch": 4.63, + "learning_rate": 4.7683461898604364e-05, + "loss": 2.5239, + "step": 935500 + }, + { + "epoch": 4.64, + "learning_rate": 4.768222331217828e-05, + "loss": 2.512, + "step": 936000 + }, + { + "epoch": 4.64, + "learning_rate": 4.76809847257522e-05, + "loss": 2.5265, + "step": 936500 + }, + { + "epoch": 4.64, + "learning_rate": 4.7679746139326114e-05, + "loss": 2.528, + "step": 937000 + }, + { + "epoch": 4.64, + "learning_rate": 4.7678507552900025e-05, + "loss": 2.5295, + "step": 937500 + }, + { + "epoch": 4.65, + "learning_rate": 4.767726896647394e-05, + "loss": 2.4952, + "step": 938000 + }, + { + "epoch": 4.65, + "learning_rate": 4.767603038004786e-05, + "loss": 2.5174, + "step": 938500 + }, + { + "epoch": 4.65, + "learning_rate": 4.7674791793621775e-05, + "loss": 2.5244, + "step": 939000 + }, + { + "epoch": 4.65, + "learning_rate": 4.767355320719569e-05, + "loss": 2.5318, + "step": 939500 + }, + { + "epoch": 4.66, + "learning_rate": 4.767231462076961e-05, + "loss": 2.5375, + "step": 940000 + }, + { + "epoch": 4.66, + "learning_rate": 4.7671078511516385e-05, + "loss": 2.5271, + "step": 940500 + }, + { + "epoch": 4.66, + "learning_rate": 4.7669839925090295e-05, + "loss": 2.5511, + "step": 941000 + }, + { + "epoch": 4.66, + "learning_rate": 4.766860133866421e-05, + "loss": 2.5225, + "step": 941500 + }, + { + "epoch": 4.67, + "learning_rate": 4.766736275223813e-05, + "loss": 2.5055, + "step": 942000 + }, + { + "epoch": 4.67, + "learning_rate": 4.7666124165812046e-05, + "loss": 2.5206, + "step": 942500 + }, + { + "epoch": 4.67, + "learning_rate": 4.766488557938596e-05, + "loss": 2.5182, + "step": 943000 + }, + { + "epoch": 4.67, + "learning_rate": 4.766364947013273e-05, + "loss": 2.5254, + "step": 943500 + }, + { + "epoch": 4.68, + "learning_rate": 4.766241088370664e-05, + "loss": 2.4945, + "step": 944000 + }, + { + "epoch": 4.68, + "learning_rate": 4.766117229728056e-05, + "loss": 2.5185, + "step": 944500 + }, + { + "epoch": 4.68, + "learning_rate": 4.7659933710854476e-05, + "loss": 2.5111, + "step": 945000 + }, + { + "epoch": 4.68, + "learning_rate": 4.765869512442839e-05, + "loss": 2.5186, + "step": 945500 + }, + { + "epoch": 4.69, + "learning_rate": 4.765745653800231e-05, + "loss": 2.5001, + "step": 946000 + }, + { + "epoch": 4.69, + "learning_rate": 4.7656217951576226e-05, + "loss": 2.5132, + "step": 946500 + }, + { + "epoch": 4.69, + "learning_rate": 4.7654981842322995e-05, + "loss": 2.5195, + "step": 947000 + }, + { + "epoch": 4.69, + "learning_rate": 4.765374325589691e-05, + "loss": 2.5179, + "step": 947500 + }, + { + "epoch": 4.7, + "learning_rate": 4.765250466947083e-05, + "loss": 2.5338, + "step": 948000 + }, + { + "epoch": 4.7, + "learning_rate": 4.7651266083044746e-05, + "loss": 2.5614, + "step": 948500 + }, + { + "epoch": 4.7, + "learning_rate": 4.7650029973791515e-05, + "loss": 2.5336, + "step": 949000 + }, + { + "epoch": 4.7, + "learning_rate": 4.764879138736543e-05, + "loss": 2.4901, + "step": 949500 + }, + { + "epoch": 4.71, + "learning_rate": 4.76475552781122e-05, + "loss": 2.5283, + "step": 950000 + }, + { + "epoch": 4.71, + "learning_rate": 4.764631669168612e-05, + "loss": 2.5171, + "step": 950500 + }, + { + "epoch": 4.71, + "learning_rate": 4.764508058243288e-05, + "loss": 2.5353, + "step": 951000 + }, + { + "epoch": 4.71, + "learning_rate": 4.7643841996006796e-05, + "loss": 2.5246, + "step": 951500 + }, + { + "epoch": 4.72, + "learning_rate": 4.764260340958071e-05, + "loss": 2.5173, + "step": 952000 + }, + { + "epoch": 4.72, + "learning_rate": 4.764136482315463e-05, + "loss": 2.5125, + "step": 952500 + }, + { + "epoch": 4.72, + "learning_rate": 4.764012623672855e-05, + "loss": 2.5237, + "step": 953000 + }, + { + "epoch": 4.72, + "learning_rate": 4.7638887650302464e-05, + "loss": 2.5033, + "step": 953500 + }, + { + "epoch": 4.73, + "learning_rate": 4.763764906387638e-05, + "loss": 2.5233, + "step": 954000 + }, + { + "epoch": 4.73, + "learning_rate": 4.76364104774503e-05, + "loss": 2.5408, + "step": 954500 + }, + { + "epoch": 4.73, + "learning_rate": 4.7635171891024215e-05, + "loss": 2.5335, + "step": 955000 + }, + { + "epoch": 4.73, + "learning_rate": 4.763393330459813e-05, + "loss": 2.5481, + "step": 955500 + }, + { + "epoch": 4.74, + "learning_rate": 4.763269471817205e-05, + "loss": 2.5219, + "step": 956000 + }, + { + "epoch": 4.74, + "learning_rate": 4.763145613174596e-05, + "loss": 2.5032, + "step": 956500 + }, + { + "epoch": 4.74, + "learning_rate": 4.7630217545319876e-05, + "loss": 2.5114, + "step": 957000 + }, + { + "epoch": 4.74, + "learning_rate": 4.762897895889379e-05, + "loss": 2.5286, + "step": 957500 + }, + { + "epoch": 4.75, + "learning_rate": 4.762774284964057e-05, + "loss": 2.5163, + "step": 958000 + }, + { + "epoch": 4.75, + "learning_rate": 4.7626504263214485e-05, + "loss": 2.5127, + "step": 958500 + }, + { + "epoch": 4.75, + "learning_rate": 4.76252656767884e-05, + "loss": 2.5466, + "step": 959000 + }, + { + "epoch": 4.75, + "learning_rate": 4.762402709036231e-05, + "loss": 2.5094, + "step": 959500 + }, + { + "epoch": 4.76, + "learning_rate": 4.762278850393623e-05, + "loss": 2.5276, + "step": 960000 + }, + { + "epoch": 4.76, + "learning_rate": 4.7621549917510146e-05, + "loss": 2.5316, + "step": 960500 + }, + { + "epoch": 4.76, + "learning_rate": 4.762031133108406e-05, + "loss": 2.5051, + "step": 961000 + }, + { + "epoch": 4.76, + "learning_rate": 4.761907522183083e-05, + "loss": 2.495, + "step": 961500 + }, + { + "epoch": 4.77, + "learning_rate": 4.761783663540475e-05, + "loss": 2.5125, + "step": 962000 + }, + { + "epoch": 4.77, + "learning_rate": 4.761659804897866e-05, + "loss": 2.5293, + "step": 962500 + }, + { + "epoch": 4.77, + "learning_rate": 4.7615359462552576e-05, + "loss": 2.5661, + "step": 963000 + }, + { + "epoch": 4.77, + "learning_rate": 4.761412087612649e-05, + "loss": 2.5403, + "step": 963500 + }, + { + "epoch": 4.78, + "learning_rate": 4.761288228970041e-05, + "loss": 2.4998, + "step": 964000 + }, + { + "epoch": 4.78, + "learning_rate": 4.761164370327433e-05, + "loss": 2.5193, + "step": 964500 + }, + { + "epoch": 4.78, + "learning_rate": 4.7610405116848244e-05, + "loss": 2.5213, + "step": 965000 + }, + { + "epoch": 4.78, + "learning_rate": 4.760916653042216e-05, + "loss": 2.5163, + "step": 965500 + }, + { + "epoch": 4.79, + "learning_rate": 4.760792794399608e-05, + "loss": 2.5139, + "step": 966000 + }, + { + "epoch": 4.79, + "learning_rate": 4.7606689357569994e-05, + "loss": 2.5008, + "step": 966500 + }, + { + "epoch": 4.79, + "learning_rate": 4.760545324831676e-05, + "loss": 2.5123, + "step": 967000 + }, + { + "epoch": 4.79, + "learning_rate": 4.760421466189068e-05, + "loss": 2.519, + "step": 967500 + }, + { + "epoch": 4.8, + "learning_rate": 4.76029760754646e-05, + "loss": 2.5064, + "step": 968000 + }, + { + "epoch": 4.8, + "learning_rate": 4.7601739966211366e-05, + "loss": 2.5199, + "step": 968500 + }, + { + "epoch": 4.8, + "learning_rate": 4.7600501379785276e-05, + "loss": 2.5431, + "step": 969000 + }, + { + "epoch": 4.8, + "learning_rate": 4.759926279335919e-05, + "loss": 2.5357, + "step": 969500 + }, + { + "epoch": 4.81, + "learning_rate": 4.759802420693311e-05, + "loss": 2.5141, + "step": 970000 + }, + { + "epoch": 4.81, + "learning_rate": 4.759678562050703e-05, + "loss": 2.5247, + "step": 970500 + }, + { + "epoch": 4.81, + "learning_rate": 4.7595547034080944e-05, + "loss": 2.5246, + "step": 971000 + }, + { + "epoch": 4.81, + "learning_rate": 4.759430844765486e-05, + "loss": 2.5307, + "step": 971500 + }, + { + "epoch": 4.82, + "learning_rate": 4.759307233840163e-05, + "loss": 2.5291, + "step": 972000 + }, + { + "epoch": 4.82, + "learning_rate": 4.7591833751975546e-05, + "loss": 2.5249, + "step": 972500 + }, + { + "epoch": 4.82, + "learning_rate": 4.759059516554946e-05, + "loss": 2.5034, + "step": 973000 + }, + { + "epoch": 4.82, + "learning_rate": 4.758935657912338e-05, + "loss": 2.4944, + "step": 973500 + }, + { + "epoch": 4.83, + "learning_rate": 4.75881179926973e-05, + "loss": 2.5173, + "step": 974000 + }, + { + "epoch": 4.83, + "learning_rate": 4.7586881883444066e-05, + "loss": 2.5288, + "step": 974500 + }, + { + "epoch": 4.83, + "learning_rate": 4.7585643297017976e-05, + "loss": 2.5382, + "step": 975000 + }, + { + "epoch": 4.83, + "learning_rate": 4.758440471059189e-05, + "loss": 2.5082, + "step": 975500 + }, + { + "epoch": 4.84, + "learning_rate": 4.758316612416581e-05, + "loss": 2.4987, + "step": 976000 + }, + { + "epoch": 4.84, + "learning_rate": 4.758192753773973e-05, + "loss": 2.5288, + "step": 976500 + }, + { + "epoch": 4.84, + "learning_rate": 4.7580693905659354e-05, + "loss": 2.5066, + "step": 977000 + }, + { + "epoch": 4.84, + "learning_rate": 4.757945531923327e-05, + "loss": 2.5299, + "step": 977500 + }, + { + "epoch": 4.85, + "learning_rate": 4.757821673280718e-05, + "loss": 2.5367, + "step": 978000 + }, + { + "epoch": 4.85, + "learning_rate": 4.75769781463811e-05, + "loss": 2.5034, + "step": 978500 + }, + { + "epoch": 4.85, + "learning_rate": 4.7575739559955015e-05, + "loss": 2.5142, + "step": 979000 + }, + { + "epoch": 4.85, + "learning_rate": 4.757450097352893e-05, + "loss": 2.5349, + "step": 979500 + }, + { + "epoch": 4.86, + "learning_rate": 4.757326238710285e-05, + "loss": 2.5567, + "step": 980000 + }, + { + "epoch": 4.86, + "learning_rate": 4.7572023800676766e-05, + "loss": 2.5057, + "step": 980500 + }, + { + "epoch": 4.86, + "learning_rate": 4.757078521425068e-05, + "loss": 2.5152, + "step": 981000 + }, + { + "epoch": 4.86, + "learning_rate": 4.756954662782459e-05, + "loss": 2.5161, + "step": 981500 + }, + { + "epoch": 4.87, + "learning_rate": 4.756830804139851e-05, + "loss": 2.4941, + "step": 982000 + }, + { + "epoch": 4.87, + "learning_rate": 4.756706945497243e-05, + "loss": 2.5139, + "step": 982500 + }, + { + "epoch": 4.87, + "learning_rate": 4.7565830868546344e-05, + "loss": 2.5407, + "step": 983000 + }, + { + "epoch": 4.87, + "learning_rate": 4.756459475929312e-05, + "loss": 2.5312, + "step": 983500 + }, + { + "epoch": 4.88, + "learning_rate": 4.7563356172867036e-05, + "loss": 2.5106, + "step": 984000 + }, + { + "epoch": 4.88, + "learning_rate": 4.756211758644095e-05, + "loss": 2.5268, + "step": 984500 + }, + { + "epoch": 4.88, + "learning_rate": 4.7560879000014864e-05, + "loss": 2.537, + "step": 985000 + }, + { + "epoch": 4.88, + "learning_rate": 4.755964041358878e-05, + "loss": 2.5343, + "step": 985500 + }, + { + "epoch": 4.88, + "learning_rate": 4.75584018271627e-05, + "loss": 2.5492, + "step": 986000 + }, + { + "epoch": 4.89, + "learning_rate": 4.7557163240736614e-05, + "loss": 2.5359, + "step": 986500 + }, + { + "epoch": 4.89, + "learning_rate": 4.755592465431053e-05, + "loss": 2.5285, + "step": 987000 + }, + { + "epoch": 4.89, + "learning_rate": 4.755468854505729e-05, + "loss": 2.5059, + "step": 987500 + }, + { + "epoch": 4.89, + "learning_rate": 4.755344995863121e-05, + "loss": 2.5226, + "step": 988000 + }, + { + "epoch": 4.9, + "learning_rate": 4.755221137220513e-05, + "loss": 2.5191, + "step": 988500 + }, + { + "epoch": 4.9, + "learning_rate": 4.75509752629519e-05, + "loss": 2.5028, + "step": 989000 + }, + { + "epoch": 4.9, + "learning_rate": 4.754973915369867e-05, + "loss": 2.5224, + "step": 989500 + }, + { + "epoch": 4.9, + "learning_rate": 4.7548503044445434e-05, + "loss": 2.5351, + "step": 990000 + }, + { + "epoch": 4.91, + "learning_rate": 4.754726445801935e-05, + "loss": 2.5295, + "step": 990500 + }, + { + "epoch": 4.91, + "learning_rate": 4.754602587159327e-05, + "loss": 2.5256, + "step": 991000 + }, + { + "epoch": 4.91, + "learning_rate": 4.7544787285167184e-05, + "loss": 2.5175, + "step": 991500 + }, + { + "epoch": 4.91, + "learning_rate": 4.75435486987411e-05, + "loss": 2.5368, + "step": 992000 + }, + { + "epoch": 4.92, + "learning_rate": 4.754231011231502e-05, + "loss": 2.5184, + "step": 992500 + }, + { + "epoch": 4.92, + "learning_rate": 4.7541071525888935e-05, + "loss": 2.5307, + "step": 993000 + }, + { + "epoch": 4.92, + "learning_rate": 4.753983293946285e-05, + "loss": 2.5217, + "step": 993500 + }, + { + "epoch": 4.92, + "learning_rate": 4.753859435303677e-05, + "loss": 2.5294, + "step": 994000 + }, + { + "epoch": 4.93, + "learning_rate": 4.753735824378354e-05, + "loss": 2.505, + "step": 994500 + }, + { + "epoch": 4.93, + "learning_rate": 4.7536119657357455e-05, + "loss": 2.5285, + "step": 995000 + }, + { + "epoch": 4.93, + "learning_rate": 4.753488107093137e-05, + "loss": 2.5311, + "step": 995500 + }, + { + "epoch": 4.93, + "learning_rate": 4.753364248450529e-05, + "loss": 2.5233, + "step": 996000 + }, + { + "epoch": 4.94, + "learning_rate": 4.7532403898079206e-05, + "loss": 2.5115, + "step": 996500 + }, + { + "epoch": 4.94, + "learning_rate": 4.753116531165312e-05, + "loss": 2.5477, + "step": 997000 + }, + { + "epoch": 4.94, + "learning_rate": 4.7529929202399885e-05, + "loss": 2.5459, + "step": 997500 + }, + { + "epoch": 4.94, + "learning_rate": 4.75286906159738e-05, + "loss": 2.5032, + "step": 998000 + }, + { + "epoch": 4.95, + "learning_rate": 4.752745202954772e-05, + "loss": 2.506, + "step": 998500 + }, + { + "epoch": 4.95, + "learning_rate": 4.7526213443121635e-05, + "loss": 2.5271, + "step": 999000 + }, + { + "epoch": 4.95, + "learning_rate": 4.7524977333868404e-05, + "loss": 2.5142, + "step": 999500 + }, + { + "epoch": 4.95, + "learning_rate": 4.752373874744232e-05, + "loss": 2.5061, + "step": 1000000 + }, + { + "epoch": 4.96, + "learning_rate": 4.752250016101624e-05, + "loss": 2.5104, + "step": 1000500 + }, + { + "epoch": 4.96, + "learning_rate": 4.7521261574590155e-05, + "loss": 2.5104, + "step": 1001000 + }, + { + "epoch": 4.96, + "learning_rate": 4.752002298816407e-05, + "loss": 2.5243, + "step": 1001500 + }, + { + "epoch": 4.96, + "learning_rate": 4.751878440173799e-05, + "loss": 2.5186, + "step": 1002000 + }, + { + "epoch": 4.97, + "learning_rate": 4.7517545815311906e-05, + "loss": 2.52, + "step": 1002500 + }, + { + "epoch": 4.97, + "learning_rate": 4.751630722888582e-05, + "loss": 2.5164, + "step": 1003000 + }, + { + "epoch": 4.97, + "learning_rate": 4.751506864245974e-05, + "loss": 2.5276, + "step": 1003500 + }, + { + "epoch": 4.97, + "learning_rate": 4.7513830056033656e-05, + "loss": 2.5109, + "step": 1004000 + }, + { + "epoch": 4.98, + "learning_rate": 4.751259146960757e-05, + "loss": 2.5262, + "step": 1004500 + }, + { + "epoch": 4.98, + "learning_rate": 4.7511352883181484e-05, + "loss": 2.5065, + "step": 1005000 + }, + { + "epoch": 4.98, + "learning_rate": 4.751011677392825e-05, + "loss": 2.5033, + "step": 1005500 + }, + { + "epoch": 4.98, + "learning_rate": 4.750887818750217e-05, + "loss": 2.5218, + "step": 1006000 + }, + { + "epoch": 4.99, + "learning_rate": 4.7507639601076086e-05, + "loss": 2.5046, + "step": 1006500 + }, + { + "epoch": 4.99, + "learning_rate": 4.750640101465e-05, + "loss": 2.5356, + "step": 1007000 + }, + { + "epoch": 4.99, + "learning_rate": 4.750516490539677e-05, + "loss": 2.5308, + "step": 1007500 + }, + { + "epoch": 4.99, + "learning_rate": 4.750392879614354e-05, + "loss": 2.5266, + "step": 1008000 + }, + { + "epoch": 5.0, + "learning_rate": 4.750269020971745e-05, + "loss": 2.5191, + "step": 1008500 + }, + { + "epoch": 5.0, + "learning_rate": 4.750145162329137e-05, + "loss": 2.5071, + "step": 1009000 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.635423503873417, + "eval_accuracy_mlm": 0.588502945831283, + "eval_accuracy_nsp": 0.8568240383747975, + "eval_loss": 2.4675350189208984, + "eval_runtime": 146.0196, + "eval_samples_per_second": 1746.06, + "eval_steps_per_second": 72.757, + "step": 1009215 + }, + { + "epoch": 5.0, + "learning_rate": 4.7500213036865285e-05, + "loss": 2.5021, + "step": 1009500 + }, + { + "epoch": 5.0, + "learning_rate": 4.74989744504392e-05, + "loss": 2.4848, + "step": 1010000 + }, + { + "epoch": 5.01, + "learning_rate": 4.749773586401312e-05, + "loss": 2.4841, + "step": 1010500 + }, + { + "epoch": 5.01, + "learning_rate": 4.7496497277587036e-05, + "loss": 2.4968, + "step": 1011000 + }, + { + "epoch": 5.01, + "learning_rate": 4.749525869116095e-05, + "loss": 2.4742, + "step": 1011500 + }, + { + "epoch": 5.01, + "learning_rate": 4.749402010473487e-05, + "loss": 2.5161, + "step": 1012000 + }, + { + "epoch": 5.02, + "learning_rate": 4.7492781518308786e-05, + "loss": 2.496, + "step": 1012500 + }, + { + "epoch": 5.02, + "learning_rate": 4.74915429318827e-05, + "loss": 2.4761, + "step": 1013000 + }, + { + "epoch": 5.02, + "learning_rate": 4.749030434545662e-05, + "loss": 2.514, + "step": 1013500 + }, + { + "epoch": 5.02, + "learning_rate": 4.748906823620339e-05, + "loss": 2.4858, + "step": 1014000 + }, + { + "epoch": 5.03, + "learning_rate": 4.7487829649777306e-05, + "loss": 2.5104, + "step": 1014500 + }, + { + "epoch": 5.03, + "learning_rate": 4.7486593540524075e-05, + "loss": 2.4841, + "step": 1015000 + }, + { + "epoch": 5.03, + "learning_rate": 4.7485354954097985e-05, + "loss": 2.4851, + "step": 1015500 + }, + { + "epoch": 5.03, + "learning_rate": 4.748411884484476e-05, + "loss": 2.4796, + "step": 1016000 + }, + { + "epoch": 5.04, + "learning_rate": 4.748288025841867e-05, + "loss": 2.512, + "step": 1016500 + }, + { + "epoch": 5.04, + "learning_rate": 4.7481644149165446e-05, + "loss": 2.4949, + "step": 1017000 + }, + { + "epoch": 5.04, + "learning_rate": 4.748040556273936e-05, + "loss": 2.4787, + "step": 1017500 + }, + { + "epoch": 5.04, + "learning_rate": 4.747916697631328e-05, + "loss": 2.4854, + "step": 1018000 + }, + { + "epoch": 5.05, + "learning_rate": 4.74779283898872e-05, + "loss": 2.505, + "step": 1018500 + }, + { + "epoch": 5.05, + "learning_rate": 4.7476689803461114e-05, + "loss": 2.4824, + "step": 1019000 + }, + { + "epoch": 5.05, + "learning_rate": 4.7475451217035024e-05, + "loss": 2.5006, + "step": 1019500 + }, + { + "epoch": 5.05, + "learning_rate": 4.747421263060894e-05, + "loss": 2.4915, + "step": 1020000 + }, + { + "epoch": 5.06, + "learning_rate": 4.747297652135571e-05, + "loss": 2.5021, + "step": 1020500 + }, + { + "epoch": 5.06, + "learning_rate": 4.747173793492963e-05, + "loss": 2.4845, + "step": 1021000 + }, + { + "epoch": 5.06, + "learning_rate": 4.7470499348503544e-05, + "loss": 2.4846, + "step": 1021500 + }, + { + "epoch": 5.06, + "learning_rate": 4.746926076207746e-05, + "loss": 2.4703, + "step": 1022000 + }, + { + "epoch": 5.07, + "learning_rate": 4.746802465282423e-05, + "loss": 2.5154, + "step": 1022500 + }, + { + "epoch": 5.07, + "learning_rate": 4.7466786066398146e-05, + "loss": 2.4895, + "step": 1023000 + }, + { + "epoch": 5.07, + "learning_rate": 4.746554747997206e-05, + "loss": 2.4909, + "step": 1023500 + }, + { + "epoch": 5.07, + "learning_rate": 4.746430889354598e-05, + "loss": 2.4931, + "step": 1024000 + }, + { + "epoch": 5.08, + "learning_rate": 4.74630703071199e-05, + "loss": 2.4837, + "step": 1024500 + }, + { + "epoch": 5.08, + "learning_rate": 4.746183667503951e-05, + "loss": 2.499, + "step": 1025000 + }, + { + "epoch": 5.08, + "learning_rate": 4.746059808861343e-05, + "loss": 2.5291, + "step": 1025500 + }, + { + "epoch": 5.08, + "learning_rate": 4.7459359502187345e-05, + "loss": 2.4858, + "step": 1026000 + }, + { + "epoch": 5.09, + "learning_rate": 4.745812091576126e-05, + "loss": 2.4969, + "step": 1026500 + }, + { + "epoch": 5.09, + "learning_rate": 4.745688232933518e-05, + "loss": 2.4516, + "step": 1027000 + }, + { + "epoch": 5.09, + "learning_rate": 4.7455643742909096e-05, + "loss": 2.5106, + "step": 1027500 + }, + { + "epoch": 5.09, + "learning_rate": 4.745440515648301e-05, + "loss": 2.4804, + "step": 1028000 + }, + { + "epoch": 5.1, + "learning_rate": 4.745316657005693e-05, + "loss": 2.4969, + "step": 1028500 + }, + { + "epoch": 5.1, + "learning_rate": 4.7451927983630846e-05, + "loss": 2.5142, + "step": 1029000 + }, + { + "epoch": 5.1, + "learning_rate": 4.745068939720476e-05, + "loss": 2.4816, + "step": 1029500 + }, + { + "epoch": 5.1, + "learning_rate": 4.744945081077868e-05, + "loss": 2.4882, + "step": 1030000 + }, + { + "epoch": 5.11, + "learning_rate": 4.74482122243526e-05, + "loss": 2.4912, + "step": 1030500 + }, + { + "epoch": 5.11, + "learning_rate": 4.7446973637926514e-05, + "loss": 2.4795, + "step": 1031000 + }, + { + "epoch": 5.11, + "learning_rate": 4.744573505150043e-05, + "loss": 2.498, + "step": 1031500 + }, + { + "epoch": 5.11, + "learning_rate": 4.744449646507434e-05, + "loss": 2.5025, + "step": 1032000 + }, + { + "epoch": 5.12, + "learning_rate": 4.744325787864826e-05, + "loss": 2.4867, + "step": 1032500 + }, + { + "epoch": 5.12, + "learning_rate": 4.7442019292222175e-05, + "loss": 2.5047, + "step": 1033000 + }, + { + "epoch": 5.12, + "learning_rate": 4.744078070579609e-05, + "loss": 2.493, + "step": 1033500 + }, + { + "epoch": 5.12, + "learning_rate": 4.743954211937001e-05, + "loss": 2.4766, + "step": 1034000 + }, + { + "epoch": 5.13, + "learning_rate": 4.7438303532943926e-05, + "loss": 2.4811, + "step": 1034500 + }, + { + "epoch": 5.13, + "learning_rate": 4.743706742369069e-05, + "loss": 2.4792, + "step": 1035000 + }, + { + "epoch": 5.13, + "learning_rate": 4.7435828837264605e-05, + "loss": 2.4694, + "step": 1035500 + }, + { + "epoch": 5.13, + "learning_rate": 4.743459272801138e-05, + "loss": 2.4567, + "step": 1036000 + }, + { + "epoch": 5.14, + "learning_rate": 4.7433359095930994e-05, + "loss": 2.4914, + "step": 1036500 + }, + { + "epoch": 5.14, + "learning_rate": 4.743212050950491e-05, + "loss": 2.4963, + "step": 1037000 + }, + { + "epoch": 5.14, + "learning_rate": 4.743088192307883e-05, + "loss": 2.5135, + "step": 1037500 + }, + { + "epoch": 5.14, + "learning_rate": 4.7429643336652745e-05, + "loss": 2.5065, + "step": 1038000 + }, + { + "epoch": 5.15, + "learning_rate": 4.742840475022666e-05, + "loss": 2.5155, + "step": 1038500 + }, + { + "epoch": 5.15, + "learning_rate": 4.742716616380058e-05, + "loss": 2.5172, + "step": 1039000 + }, + { + "epoch": 5.15, + "learning_rate": 4.7425927577374496e-05, + "loss": 2.4787, + "step": 1039500 + }, + { + "epoch": 5.15, + "learning_rate": 4.742468899094841e-05, + "loss": 2.4847, + "step": 1040000 + }, + { + "epoch": 5.15, + "learning_rate": 4.742345040452233e-05, + "loss": 2.5204, + "step": 1040500 + }, + { + "epoch": 5.16, + "learning_rate": 4.742221181809625e-05, + "loss": 2.4876, + "step": 1041000 + }, + { + "epoch": 5.16, + "learning_rate": 4.7420973231670164e-05, + "loss": 2.5044, + "step": 1041500 + }, + { + "epoch": 5.16, + "learning_rate": 4.741973464524408e-05, + "loss": 2.4813, + "step": 1042000 + }, + { + "epoch": 5.16, + "learning_rate": 4.7418496058818e-05, + "loss": 2.4927, + "step": 1042500 + }, + { + "epoch": 5.17, + "learning_rate": 4.7417257472391914e-05, + "loss": 2.5097, + "step": 1043000 + }, + { + "epoch": 5.17, + "learning_rate": 4.741601888596583e-05, + "loss": 2.5111, + "step": 1043500 + }, + { + "epoch": 5.17, + "learning_rate": 4.741478277671259e-05, + "loss": 2.5181, + "step": 1044000 + }, + { + "epoch": 5.17, + "learning_rate": 4.741354419028651e-05, + "loss": 2.5167, + "step": 1044500 + }, + { + "epoch": 5.18, + "learning_rate": 4.741230560386043e-05, + "loss": 2.5013, + "step": 1045000 + }, + { + "epoch": 5.18, + "learning_rate": 4.7411067017434344e-05, + "loss": 2.516, + "step": 1045500 + }, + { + "epoch": 5.18, + "learning_rate": 4.740982843100826e-05, + "loss": 2.4898, + "step": 1046000 + }, + { + "epoch": 5.18, + "learning_rate": 4.740858984458218e-05, + "loss": 2.5233, + "step": 1046500 + }, + { + "epoch": 5.19, + "learning_rate": 4.7407351258156095e-05, + "loss": 2.5021, + "step": 1047000 + }, + { + "epoch": 5.19, + "learning_rate": 4.7406115148902864e-05, + "loss": 2.4977, + "step": 1047500 + }, + { + "epoch": 5.19, + "learning_rate": 4.740487656247678e-05, + "loss": 2.5229, + "step": 1048000 + }, + { + "epoch": 5.19, + "learning_rate": 4.74036379760507e-05, + "loss": 2.5149, + "step": 1048500 + }, + { + "epoch": 5.2, + "learning_rate": 4.7402399389624615e-05, + "loss": 2.5227, + "step": 1049000 + }, + { + "epoch": 5.2, + "learning_rate": 4.740116328037138e-05, + "loss": 2.5052, + "step": 1049500 + }, + { + "epoch": 5.2, + "learning_rate": 4.7399924693945293e-05, + "loss": 2.5153, + "step": 1050000 + }, + { + "epoch": 5.2, + "learning_rate": 4.739868610751921e-05, + "loss": 2.5106, + "step": 1050500 + }, + { + "epoch": 5.21, + "learning_rate": 4.739744752109313e-05, + "loss": 2.4979, + "step": 1051000 + }, + { + "epoch": 5.21, + "learning_rate": 4.7396208934667044e-05, + "loss": 2.4952, + "step": 1051500 + }, + { + "epoch": 5.21, + "learning_rate": 4.739497034824096e-05, + "loss": 2.5124, + "step": 1052000 + }, + { + "epoch": 5.21, + "learning_rate": 4.739373176181488e-05, + "loss": 2.4948, + "step": 1052500 + }, + { + "epoch": 5.22, + "learning_rate": 4.7392493175388795e-05, + "loss": 2.5208, + "step": 1053000 + }, + { + "epoch": 5.22, + "learning_rate": 4.7391257066135564e-05, + "loss": 2.4947, + "step": 1053500 + }, + { + "epoch": 5.22, + "learning_rate": 4.7390023434055185e-05, + "loss": 2.505, + "step": 1054000 + }, + { + "epoch": 5.22, + "learning_rate": 4.7388784847629095e-05, + "loss": 2.5084, + "step": 1054500 + }, + { + "epoch": 5.23, + "learning_rate": 4.738754626120301e-05, + "loss": 2.5115, + "step": 1055000 + }, + { + "epoch": 5.23, + "learning_rate": 4.738630767477693e-05, + "loss": 2.5227, + "step": 1055500 + }, + { + "epoch": 5.23, + "learning_rate": 4.7385069088350846e-05, + "loss": 2.5089, + "step": 1056000 + }, + { + "epoch": 5.23, + "learning_rate": 4.738383050192476e-05, + "loss": 2.5291, + "step": 1056500 + }, + { + "epoch": 5.24, + "learning_rate": 4.738259191549868e-05, + "loss": 2.5091, + "step": 1057000 + }, + { + "epoch": 5.24, + "learning_rate": 4.7381353329072596e-05, + "loss": 2.4981, + "step": 1057500 + }, + { + "epoch": 5.24, + "learning_rate": 4.738011474264651e-05, + "loss": 2.4948, + "step": 1058000 + }, + { + "epoch": 5.24, + "learning_rate": 4.737887615622043e-05, + "loss": 2.5134, + "step": 1058500 + }, + { + "epoch": 5.25, + "learning_rate": 4.737763756979435e-05, + "loss": 2.528, + "step": 1059000 + }, + { + "epoch": 5.25, + "learning_rate": 4.7376398983368264e-05, + "loss": 2.5002, + "step": 1059500 + }, + { + "epoch": 5.25, + "learning_rate": 4.737516287411503e-05, + "loss": 2.4989, + "step": 1060000 + }, + { + "epoch": 5.25, + "learning_rate": 4.737392428768895e-05, + "loss": 2.5081, + "step": 1060500 + }, + { + "epoch": 5.26, + "learning_rate": 4.737268570126287e-05, + "loss": 2.4678, + "step": 1061000 + }, + { + "epoch": 5.26, + "learning_rate": 4.7371447114836784e-05, + "loss": 2.5176, + "step": 1061500 + }, + { + "epoch": 5.26, + "learning_rate": 4.73702085284107e-05, + "loss": 2.5324, + "step": 1062000 + }, + { + "epoch": 5.26, + "learning_rate": 4.736897489633032e-05, + "loss": 2.4977, + "step": 1062500 + }, + { + "epoch": 5.27, + "learning_rate": 4.736773630990424e-05, + "loss": 2.4846, + "step": 1063000 + }, + { + "epoch": 5.27, + "learning_rate": 4.7366497723478155e-05, + "loss": 2.4992, + "step": 1063500 + }, + { + "epoch": 5.27, + "learning_rate": 4.7365259137052065e-05, + "loss": 2.5183, + "step": 1064000 + }, + { + "epoch": 5.27, + "learning_rate": 4.736402055062598e-05, + "loss": 2.5053, + "step": 1064500 + }, + { + "epoch": 5.28, + "learning_rate": 4.73627819641999e-05, + "loss": 2.5262, + "step": 1065000 + }, + { + "epoch": 5.28, + "learning_rate": 4.7361543377773816e-05, + "loss": 2.515, + "step": 1065500 + }, + { + "epoch": 5.28, + "learning_rate": 4.736030479134773e-05, + "loss": 2.4938, + "step": 1066000 + }, + { + "epoch": 5.28, + "learning_rate": 4.735906620492165e-05, + "loss": 2.5149, + "step": 1066500 + }, + { + "epoch": 5.29, + "learning_rate": 4.735782761849557e-05, + "loss": 2.5019, + "step": 1067000 + }, + { + "epoch": 5.29, + "learning_rate": 4.735659150924233e-05, + "loss": 2.5112, + "step": 1067500 + }, + { + "epoch": 5.29, + "learning_rate": 4.7355352922816246e-05, + "loss": 2.5042, + "step": 1068000 + }, + { + "epoch": 5.29, + "learning_rate": 4.735411681356302e-05, + "loss": 2.5022, + "step": 1068500 + }, + { + "epoch": 5.3, + "learning_rate": 4.735287822713694e-05, + "loss": 2.5294, + "step": 1069000 + }, + { + "epoch": 5.3, + "learning_rate": 4.7351639640710855e-05, + "loss": 2.4906, + "step": 1069500 + }, + { + "epoch": 5.3, + "learning_rate": 4.735040105428477e-05, + "loss": 2.497, + "step": 1070000 + }, + { + "epoch": 5.3, + "learning_rate": 4.734916246785868e-05, + "loss": 2.5093, + "step": 1070500 + }, + { + "epoch": 5.31, + "learning_rate": 4.73479238814326e-05, + "loss": 2.4948, + "step": 1071000 + }, + { + "epoch": 5.31, + "learning_rate": 4.7346685295006516e-05, + "loss": 2.5087, + "step": 1071500 + }, + { + "epoch": 5.31, + "learning_rate": 4.734544670858043e-05, + "loss": 2.5076, + "step": 1072000 + }, + { + "epoch": 5.31, + "learning_rate": 4.734420812215435e-05, + "loss": 2.4856, + "step": 1072500 + }, + { + "epoch": 5.32, + "learning_rate": 4.734296953572827e-05, + "loss": 2.4979, + "step": 1073000 + }, + { + "epoch": 5.32, + "learning_rate": 4.7341730949302184e-05, + "loss": 2.4767, + "step": 1073500 + }, + { + "epoch": 5.32, + "learning_rate": 4.7340494840048946e-05, + "loss": 2.4792, + "step": 1074000 + }, + { + "epoch": 5.32, + "learning_rate": 4.733925625362286e-05, + "loss": 2.4931, + "step": 1074500 + }, + { + "epoch": 5.33, + "learning_rate": 4.733801766719678e-05, + "loss": 2.4973, + "step": 1075000 + }, + { + "epoch": 5.33, + "learning_rate": 4.73367790807707e-05, + "loss": 2.4988, + "step": 1075500 + }, + { + "epoch": 5.33, + "learning_rate": 4.7335540494344614e-05, + "loss": 2.4931, + "step": 1076000 + }, + { + "epoch": 5.33, + "learning_rate": 4.733430190791853e-05, + "loss": 2.5197, + "step": 1076500 + }, + { + "epoch": 5.34, + "learning_rate": 4.733306332149245e-05, + "loss": 2.4846, + "step": 1077000 + }, + { + "epoch": 5.34, + "learning_rate": 4.7331824735066364e-05, + "loss": 2.5041, + "step": 1077500 + }, + { + "epoch": 5.34, + "learning_rate": 4.733058862581313e-05, + "loss": 2.4894, + "step": 1078000 + }, + { + "epoch": 5.34, + "learning_rate": 4.732935003938705e-05, + "loss": 2.5004, + "step": 1078500 + }, + { + "epoch": 5.35, + "learning_rate": 4.732811145296097e-05, + "loss": 2.5042, + "step": 1079000 + }, + { + "epoch": 5.35, + "learning_rate": 4.7326875343707736e-05, + "loss": 2.4997, + "step": 1079500 + }, + { + "epoch": 5.35, + "learning_rate": 4.732563675728165e-05, + "loss": 2.5063, + "step": 1080000 + }, + { + "epoch": 5.35, + "learning_rate": 4.732439817085556e-05, + "loss": 2.5086, + "step": 1080500 + }, + { + "epoch": 5.36, + "learning_rate": 4.732315958442948e-05, + "loss": 2.4812, + "step": 1081000 + }, + { + "epoch": 5.36, + "learning_rate": 4.73219209980034e-05, + "loss": 2.4943, + "step": 1081500 + }, + { + "epoch": 5.36, + "learning_rate": 4.7320682411577314e-05, + "loss": 2.5031, + "step": 1082000 + }, + { + "epoch": 5.36, + "learning_rate": 4.731944382515123e-05, + "loss": 2.5015, + "step": 1082500 + }, + { + "epoch": 5.37, + "learning_rate": 4.731820523872515e-05, + "loss": 2.5043, + "step": 1083000 + }, + { + "epoch": 5.37, + "learning_rate": 4.7316966652299064e-05, + "loss": 2.5235, + "step": 1083500 + }, + { + "epoch": 5.37, + "learning_rate": 4.731572806587298e-05, + "loss": 2.491, + "step": 1084000 + }, + { + "epoch": 5.37, + "learning_rate": 4.73144894794469e-05, + "loss": 2.4956, + "step": 1084500 + }, + { + "epoch": 5.38, + "learning_rate": 4.7313250893020815e-05, + "loss": 2.5086, + "step": 1085000 + }, + { + "epoch": 5.38, + "learning_rate": 4.731201230659473e-05, + "loss": 2.534, + "step": 1085500 + }, + { + "epoch": 5.38, + "learning_rate": 4.73107761973415e-05, + "loss": 2.5147, + "step": 1086000 + }, + { + "epoch": 5.38, + "learning_rate": 4.730953761091542e-05, + "loss": 2.5247, + "step": 1086500 + }, + { + "epoch": 5.39, + "learning_rate": 4.7308299024489335e-05, + "loss": 2.5079, + "step": 1087000 + }, + { + "epoch": 5.39, + "learning_rate": 4.730706043806325e-05, + "loss": 2.5156, + "step": 1087500 + }, + { + "epoch": 5.39, + "learning_rate": 4.730582185163717e-05, + "loss": 2.4809, + "step": 1088000 + }, + { + "epoch": 5.39, + "learning_rate": 4.730458574238393e-05, + "loss": 2.5121, + "step": 1088500 + }, + { + "epoch": 5.4, + "learning_rate": 4.730334715595785e-05, + "loss": 2.5155, + "step": 1089000 + }, + { + "epoch": 5.4, + "learning_rate": 4.7302108569531765e-05, + "loss": 2.4882, + "step": 1089500 + }, + { + "epoch": 5.4, + "learning_rate": 4.730086998310568e-05, + "loss": 2.4941, + "step": 1090000 + }, + { + "epoch": 5.4, + "learning_rate": 4.72996313966796e-05, + "loss": 2.4765, + "step": 1090500 + }, + { + "epoch": 5.41, + "learning_rate": 4.7298392810253515e-05, + "loss": 2.4786, + "step": 1091000 + }, + { + "epoch": 5.41, + "learning_rate": 4.729715422382743e-05, + "loss": 2.5199, + "step": 1091500 + }, + { + "epoch": 5.41, + "learning_rate": 4.72959181145742e-05, + "loss": 2.4885, + "step": 1092000 + }, + { + "epoch": 5.41, + "learning_rate": 4.729468200532097e-05, + "loss": 2.523, + "step": 1092500 + }, + { + "epoch": 5.42, + "learning_rate": 4.729344589606774e-05, + "loss": 2.5936, + "step": 1093000 + }, + { + "epoch": 5.42, + "learning_rate": 4.7292207309641656e-05, + "loss": 2.5146, + "step": 1093500 + }, + { + "epoch": 5.42, + "learning_rate": 4.729096872321557e-05, + "loss": 2.5291, + "step": 1094000 + }, + { + "epoch": 5.42, + "learning_rate": 4.728973013678949e-05, + "loss": 2.5103, + "step": 1094500 + }, + { + "epoch": 5.43, + "learning_rate": 4.7288491550363406e-05, + "loss": 2.5062, + "step": 1095000 + }, + { + "epoch": 5.43, + "learning_rate": 4.728725791828302e-05, + "loss": 2.5114, + "step": 1095500 + }, + { + "epoch": 5.43, + "learning_rate": 4.728602180902979e-05, + "loss": 2.5239, + "step": 1096000 + }, + { + "epoch": 5.43, + "learning_rate": 4.7284783222603706e-05, + "loss": 2.5374, + "step": 1096500 + }, + { + "epoch": 5.43, + "learning_rate": 4.728354463617762e-05, + "loss": 2.5018, + "step": 1097000 + }, + { + "epoch": 5.44, + "learning_rate": 4.728230604975154e-05, + "loss": 2.4786, + "step": 1097500 + }, + { + "epoch": 5.44, + "learning_rate": 4.7281069940498316e-05, + "loss": 2.5172, + "step": 1098000 + }, + { + "epoch": 5.44, + "learning_rate": 4.727983135407223e-05, + "loss": 2.5365, + "step": 1098500 + }, + { + "epoch": 5.44, + "learning_rate": 4.727859276764614e-05, + "loss": 2.6215, + "step": 1099000 + }, + { + "epoch": 5.45, + "learning_rate": 4.727735418122006e-05, + "loss": 2.5575, + "step": 1099500 + }, + { + "epoch": 5.45, + "learning_rate": 4.7276115594793977e-05, + "loss": 2.549, + "step": 1100000 + }, + { + "epoch": 5.45, + "learning_rate": 4.7274877008367893e-05, + "loss": 2.538, + "step": 1100500 + }, + { + "epoch": 5.45, + "learning_rate": 4.727364089911466e-05, + "loss": 2.698, + "step": 1101000 + }, + { + "epoch": 5.46, + "learning_rate": 4.727240231268858e-05, + "loss": 2.6845, + "step": 1101500 + }, + { + "epoch": 5.46, + "learning_rate": 4.727116372626249e-05, + "loss": 2.6379, + "step": 1102000 + }, + { + "epoch": 5.46, + "learning_rate": 4.7269925139836406e-05, + "loss": 2.5858, + "step": 1102500 + }, + { + "epoch": 5.46, + "learning_rate": 4.726868655341032e-05, + "loss": 2.5711, + "step": 1103000 + }, + { + "epoch": 5.47, + "learning_rate": 4.7267452921329944e-05, + "loss": 2.6267, + "step": 1103500 + }, + { + "epoch": 5.47, + "learning_rate": 4.726621681207671e-05, + "loss": 2.6757, + "step": 1104000 + }, + { + "epoch": 5.47, + "learning_rate": 4.726497822565063e-05, + "loss": 2.6265, + "step": 1104500 + }, + { + "epoch": 5.47, + "learning_rate": 4.7263739639224547e-05, + "loss": 2.6267, + "step": 1105000 + }, + { + "epoch": 5.48, + "learning_rate": 4.7262501052798463e-05, + "loss": 2.6607, + "step": 1105500 + }, + { + "epoch": 5.48, + "learning_rate": 4.726126246637238e-05, + "loss": 2.5848, + "step": 1106000 + }, + { + "epoch": 5.48, + "learning_rate": 4.72600238799463e-05, + "loss": 2.5908, + "step": 1106500 + }, + { + "epoch": 5.48, + "learning_rate": 4.7258785293520214e-05, + "loss": 2.6656, + "step": 1107000 + }, + { + "epoch": 5.49, + "learning_rate": 4.725754918426698e-05, + "loss": 2.5915, + "step": 1107500 + }, + { + "epoch": 5.49, + "learning_rate": 4.72563105978409e-05, + "loss": 2.6518, + "step": 1108000 + }, + { + "epoch": 5.49, + "learning_rate": 4.725507201141482e-05, + "loss": 2.617, + "step": 1108500 + }, + { + "epoch": 5.49, + "learning_rate": 4.725383342498873e-05, + "loss": 2.6337, + "step": 1109000 + }, + { + "epoch": 5.5, + "learning_rate": 4.7252594838562644e-05, + "loss": 2.5885, + "step": 1109500 + }, + { + "epoch": 5.5, + "learning_rate": 4.725135625213656e-05, + "loss": 2.5465, + "step": 1110000 + }, + { + "epoch": 5.5, + "learning_rate": 4.725011766571048e-05, + "loss": 2.5822, + "step": 1110500 + }, + { + "epoch": 5.5, + "learning_rate": 4.7248879079284395e-05, + "loss": 2.6102, + "step": 1111000 + }, + { + "epoch": 5.51, + "learning_rate": 4.724764049285831e-05, + "loss": 2.6242, + "step": 1111500 + }, + { + "epoch": 5.51, + "learning_rate": 4.724640438360508e-05, + "loss": 2.6015, + "step": 1112000 + }, + { + "epoch": 5.51, + "learning_rate": 4.7245165797179e-05, + "loss": 2.5667, + "step": 1112500 + }, + { + "epoch": 5.51, + "learning_rate": 4.7243927210752914e-05, + "loss": 2.5967, + "step": 1113000 + }, + { + "epoch": 5.52, + "learning_rate": 4.724268862432683e-05, + "loss": 2.591, + "step": 1113500 + }, + { + "epoch": 5.52, + "learning_rate": 4.72414525150736e-05, + "loss": 2.6024, + "step": 1114000 + }, + { + "epoch": 5.52, + "learning_rate": 4.724021392864752e-05, + "loss": 2.6239, + "step": 1114500 + }, + { + "epoch": 5.52, + "learning_rate": 4.7238975342221434e-05, + "loss": 2.6354, + "step": 1115000 + }, + { + "epoch": 5.53, + "learning_rate": 4.723773675579535e-05, + "loss": 2.6408, + "step": 1115500 + }, + { + "epoch": 5.53, + "learning_rate": 4.723649816936926e-05, + "loss": 2.6031, + "step": 1116000 + }, + { + "epoch": 5.53, + "learning_rate": 4.723525958294318e-05, + "loss": 2.6564, + "step": 1116500 + }, + { + "epoch": 5.53, + "learning_rate": 4.7234020996517095e-05, + "loss": 2.5826, + "step": 1117000 + }, + { + "epoch": 5.54, + "learning_rate": 4.723278241009101e-05, + "loss": 2.583, + "step": 1117500 + }, + { + "epoch": 5.54, + "learning_rate": 4.723154382366493e-05, + "loss": 2.5424, + "step": 1118000 + }, + { + "epoch": 5.54, + "learning_rate": 4.7230305237238846e-05, + "loss": 2.5366, + "step": 1118500 + }, + { + "epoch": 5.54, + "learning_rate": 4.722906665081276e-05, + "loss": 2.5275, + "step": 1119000 + }, + { + "epoch": 5.55, + "learning_rate": 4.722782806438668e-05, + "loss": 2.546, + "step": 1119500 + }, + { + "epoch": 5.55, + "learning_rate": 4.7226589477960596e-05, + "loss": 2.5238, + "step": 1120000 + }, + { + "epoch": 5.55, + "learning_rate": 4.7225350891534513e-05, + "loss": 2.535, + "step": 1120500 + }, + { + "epoch": 5.55, + "learning_rate": 4.7224112305108424e-05, + "loss": 2.52, + "step": 1121000 + }, + { + "epoch": 5.56, + "learning_rate": 4.722287371868234e-05, + "loss": 2.5252, + "step": 1121500 + }, + { + "epoch": 5.56, + "learning_rate": 4.722163513225626e-05, + "loss": 2.544, + "step": 1122000 + }, + { + "epoch": 5.56, + "learning_rate": 4.7220396545830174e-05, + "loss": 2.5424, + "step": 1122500 + }, + { + "epoch": 5.56, + "learning_rate": 4.721915795940409e-05, + "loss": 2.5489, + "step": 1123000 + }, + { + "epoch": 5.57, + "learning_rate": 4.721791937297801e-05, + "loss": 2.5057, + "step": 1123500 + }, + { + "epoch": 5.57, + "learning_rate": 4.7216680786551925e-05, + "loss": 2.5233, + "step": 1124000 + }, + { + "epoch": 5.57, + "learning_rate": 4.721544220012584e-05, + "loss": 2.53, + "step": 1124500 + }, + { + "epoch": 5.57, + "learning_rate": 4.721420361369976e-05, + "loss": 2.5091, + "step": 1125000 + }, + { + "epoch": 5.58, + "learning_rate": 4.7212965027273676e-05, + "loss": 2.5253, + "step": 1125500 + }, + { + "epoch": 5.58, + "learning_rate": 4.7211728918020445e-05, + "loss": 2.5188, + "step": 1126000 + }, + { + "epoch": 5.58, + "learning_rate": 4.721049033159436e-05, + "loss": 2.522, + "step": 1126500 + }, + { + "epoch": 5.58, + "learning_rate": 4.720925669951398e-05, + "loss": 2.5627, + "step": 1127000 + }, + { + "epoch": 5.59, + "learning_rate": 4.72080181130879e-05, + "loss": 2.526, + "step": 1127500 + }, + { + "epoch": 5.59, + "learning_rate": 4.7206779526661816e-05, + "loss": 2.5394, + "step": 1128000 + }, + { + "epoch": 5.59, + "learning_rate": 4.720554094023573e-05, + "loss": 2.5182, + "step": 1128500 + }, + { + "epoch": 5.59, + "learning_rate": 4.720430235380965e-05, + "loss": 2.568, + "step": 1129000 + }, + { + "epoch": 5.6, + "learning_rate": 4.720306624455641e-05, + "loss": 2.5795, + "step": 1129500 + }, + { + "epoch": 5.6, + "learning_rate": 4.720182765813033e-05, + "loss": 2.6119, + "step": 1130000 + }, + { + "epoch": 5.6, + "learning_rate": 4.7200589071704246e-05, + "loss": 2.5682, + "step": 1130500 + }, + { + "epoch": 5.6, + "learning_rate": 4.719935048527816e-05, + "loss": 2.5436, + "step": 1131000 + }, + { + "epoch": 5.61, + "learning_rate": 4.719811189885208e-05, + "loss": 2.5482, + "step": 1131500 + }, + { + "epoch": 5.61, + "learning_rate": 4.7196873312426e-05, + "loss": 2.5595, + "step": 1132000 + }, + { + "epoch": 5.61, + "learning_rate": 4.7195634725999914e-05, + "loss": 2.5303, + "step": 1132500 + }, + { + "epoch": 5.61, + "learning_rate": 4.719439861674668e-05, + "loss": 2.5571, + "step": 1133000 + }, + { + "epoch": 5.62, + "learning_rate": 4.71931600303206e-05, + "loss": 2.5494, + "step": 1133500 + }, + { + "epoch": 5.62, + "learning_rate": 4.7191921443894516e-05, + "loss": 2.5194, + "step": 1134000 + }, + { + "epoch": 5.62, + "learning_rate": 4.719068285746843e-05, + "loss": 2.5303, + "step": 1134500 + }, + { + "epoch": 5.62, + "learning_rate": 4.718944427104235e-05, + "loss": 2.5406, + "step": 1135000 + }, + { + "epoch": 5.63, + "learning_rate": 4.718820568461627e-05, + "loss": 2.5231, + "step": 1135500 + }, + { + "epoch": 5.63, + "learning_rate": 4.7186967098190184e-05, + "loss": 2.5372, + "step": 1136000 + }, + { + "epoch": 5.63, + "learning_rate": 4.7185728511764094e-05, + "loss": 2.5714, + "step": 1136500 + }, + { + "epoch": 5.63, + "learning_rate": 4.718448992533801e-05, + "loss": 2.5142, + "step": 1137000 + }, + { + "epoch": 5.64, + "learning_rate": 4.718325381608478e-05, + "loss": 2.5527, + "step": 1137500 + }, + { + "epoch": 5.64, + "learning_rate": 4.71820152296587e-05, + "loss": 2.574, + "step": 1138000 + }, + { + "epoch": 5.64, + "learning_rate": 4.7180776643232614e-05, + "loss": 2.5483, + "step": 1138500 + }, + { + "epoch": 5.64, + "learning_rate": 4.717953805680653e-05, + "loss": 2.5275, + "step": 1139000 + }, + { + "epoch": 5.65, + "learning_rate": 4.717829947038044e-05, + "loss": 2.5176, + "step": 1139500 + }, + { + "epoch": 5.65, + "learning_rate": 4.717706088395436e-05, + "loss": 2.5255, + "step": 1140000 + }, + { + "epoch": 5.65, + "learning_rate": 4.717582477470113e-05, + "loss": 2.5277, + "step": 1140500 + }, + { + "epoch": 5.65, + "learning_rate": 4.717458618827505e-05, + "loss": 2.5264, + "step": 1141000 + }, + { + "epoch": 5.66, + "learning_rate": 4.717334760184897e-05, + "loss": 2.5172, + "step": 1141500 + }, + { + "epoch": 5.66, + "learning_rate": 4.7172109015422884e-05, + "loss": 2.5394, + "step": 1142000 + }, + { + "epoch": 5.66, + "learning_rate": 4.71708704289968e-05, + "loss": 2.4941, + "step": 1142500 + }, + { + "epoch": 5.66, + "learning_rate": 4.716963184257071e-05, + "loss": 2.5234, + "step": 1143000 + }, + { + "epoch": 5.67, + "learning_rate": 4.716839325614463e-05, + "loss": 2.5233, + "step": 1143500 + }, + { + "epoch": 5.67, + "learning_rate": 4.71671571468914e-05, + "loss": 2.5376, + "step": 1144000 + }, + { + "epoch": 5.67, + "learning_rate": 4.7165918560465314e-05, + "loss": 2.5352, + "step": 1144500 + }, + { + "epoch": 5.67, + "learning_rate": 4.716467997403923e-05, + "loss": 2.5087, + "step": 1145000 + }, + { + "epoch": 5.68, + "learning_rate": 4.716344138761315e-05, + "loss": 2.5188, + "step": 1145500 + }, + { + "epoch": 5.68, + "learning_rate": 4.716220280118706e-05, + "loss": 2.5193, + "step": 1146000 + }, + { + "epoch": 5.68, + "learning_rate": 4.7160964214760975e-05, + "loss": 2.5462, + "step": 1146500 + }, + { + "epoch": 5.68, + "learning_rate": 4.71597305826806e-05, + "loss": 2.5272, + "step": 1147000 + }, + { + "epoch": 5.69, + "learning_rate": 4.715849199625452e-05, + "loss": 2.5139, + "step": 1147500 + }, + { + "epoch": 5.69, + "learning_rate": 4.7157253409828436e-05, + "loss": 2.5406, + "step": 1148000 + }, + { + "epoch": 5.69, + "learning_rate": 4.715601482340235e-05, + "loss": 2.5199, + "step": 1148500 + }, + { + "epoch": 5.69, + "learning_rate": 4.715477623697627e-05, + "loss": 2.4929, + "step": 1149000 + }, + { + "epoch": 5.7, + "learning_rate": 4.715354012772303e-05, + "loss": 2.5136, + "step": 1149500 + }, + { + "epoch": 5.7, + "learning_rate": 4.715230154129695e-05, + "loss": 2.5388, + "step": 1150000 + }, + { + "epoch": 5.7, + "learning_rate": 4.7151062954870866e-05, + "loss": 2.5158, + "step": 1150500 + }, + { + "epoch": 5.7, + "learning_rate": 4.714982436844478e-05, + "loss": 2.4921, + "step": 1151000 + }, + { + "epoch": 5.7, + "learning_rate": 4.71485857820187e-05, + "loss": 2.5272, + "step": 1151500 + }, + { + "epoch": 5.71, + "learning_rate": 4.714734719559262e-05, + "loss": 2.5284, + "step": 1152000 + }, + { + "epoch": 5.71, + "learning_rate": 4.7146108609166534e-05, + "loss": 2.5297, + "step": 1152500 + }, + { + "epoch": 5.71, + "learning_rate": 4.714487002274045e-05, + "loss": 2.5186, + "step": 1153000 + }, + { + "epoch": 5.71, + "learning_rate": 4.714363143631437e-05, + "loss": 2.4969, + "step": 1153500 + }, + { + "epoch": 5.72, + "learning_rate": 4.7142392849888284e-05, + "loss": 2.5141, + "step": 1154000 + }, + { + "epoch": 5.72, + "learning_rate": 4.71411542634622e-05, + "loss": 2.5465, + "step": 1154500 + }, + { + "epoch": 5.72, + "learning_rate": 4.713991567703612e-05, + "loss": 2.5263, + "step": 1155000 + }, + { + "epoch": 5.72, + "learning_rate": 4.713867709061003e-05, + "loss": 2.5282, + "step": 1155500 + }, + { + "epoch": 5.73, + "learning_rate": 4.713744345852965e-05, + "loss": 2.5718, + "step": 1156000 + }, + { + "epoch": 5.73, + "learning_rate": 4.7136204872103566e-05, + "loss": 2.5614, + "step": 1156500 + }, + { + "epoch": 5.73, + "learning_rate": 4.7134968762850335e-05, + "loss": 2.5299, + "step": 1157000 + }, + { + "epoch": 5.73, + "learning_rate": 4.713373017642425e-05, + "loss": 2.5281, + "step": 1157500 + }, + { + "epoch": 5.74, + "learning_rate": 4.713249158999817e-05, + "loss": 2.503, + "step": 1158000 + }, + { + "epoch": 5.74, + "learning_rate": 4.7131253003572086e-05, + "loss": 2.5163, + "step": 1158500 + }, + { + "epoch": 5.74, + "learning_rate": 4.7130014417146e-05, + "loss": 2.5324, + "step": 1159000 + }, + { + "epoch": 5.74, + "learning_rate": 4.712877583071992e-05, + "loss": 2.522, + "step": 1159500 + }, + { + "epoch": 5.75, + "learning_rate": 4.7127537244293836e-05, + "loss": 2.55, + "step": 1160000 + }, + { + "epoch": 5.75, + "learning_rate": 4.712629865786775e-05, + "loss": 2.5301, + "step": 1160500 + }, + { + "epoch": 5.75, + "learning_rate": 4.712506007144167e-05, + "loss": 2.5052, + "step": 1161000 + }, + { + "epoch": 5.75, + "learning_rate": 4.712382148501559e-05, + "loss": 2.4877, + "step": 1161500 + }, + { + "epoch": 5.76, + "learning_rate": 4.7122582898589504e-05, + "loss": 2.5126, + "step": 1162000 + }, + { + "epoch": 5.76, + "learning_rate": 4.712134431216342e-05, + "loss": 2.5242, + "step": 1162500 + }, + { + "epoch": 5.76, + "learning_rate": 4.712010572573734e-05, + "loss": 2.5206, + "step": 1163000 + }, + { + "epoch": 5.76, + "learning_rate": 4.711886713931125e-05, + "loss": 2.5255, + "step": 1163500 + }, + { + "epoch": 5.77, + "learning_rate": 4.711763103005802e-05, + "loss": 2.5042, + "step": 1164000 + }, + { + "epoch": 5.77, + "learning_rate": 4.7116392443631934e-05, + "loss": 2.5521, + "step": 1164500 + }, + { + "epoch": 5.77, + "learning_rate": 4.711515385720585e-05, + "loss": 2.5334, + "step": 1165000 + }, + { + "epoch": 5.77, + "learning_rate": 4.711391527077977e-05, + "loss": 2.536, + "step": 1165500 + }, + { + "epoch": 5.78, + "learning_rate": 4.7112676684353685e-05, + "loss": 2.5293, + "step": 1166000 + }, + { + "epoch": 5.78, + "learning_rate": 4.71114380979276e-05, + "loss": 2.5166, + "step": 1166500 + }, + { + "epoch": 5.78, + "learning_rate": 4.711019951150152e-05, + "loss": 2.4953, + "step": 1167000 + }, + { + "epoch": 5.78, + "learning_rate": 4.7108960925075435e-05, + "loss": 2.4882, + "step": 1167500 + }, + { + "epoch": 5.79, + "learning_rate": 4.7107724815822204e-05, + "loss": 2.5227, + "step": 1168000 + }, + { + "epoch": 5.79, + "learning_rate": 4.710648622939612e-05, + "loss": 2.514, + "step": 1168500 + }, + { + "epoch": 5.79, + "learning_rate": 4.710524764297004e-05, + "loss": 2.5371, + "step": 1169000 + }, + { + "epoch": 5.79, + "learning_rate": 4.7104009056543955e-05, + "loss": 2.5194, + "step": 1169500 + }, + { + "epoch": 5.8, + "learning_rate": 4.7102770470117865e-05, + "loss": 2.5025, + "step": 1170000 + }, + { + "epoch": 5.8, + "learning_rate": 4.710153188369178e-05, + "loss": 2.5248, + "step": 1170500 + }, + { + "epoch": 5.8, + "learning_rate": 4.71002932972657e-05, + "loss": 2.4913, + "step": 1171000 + }, + { + "epoch": 5.8, + "learning_rate": 4.7099054710839616e-05, + "loss": 2.5103, + "step": 1171500 + }, + { + "epoch": 5.81, + "learning_rate": 4.709781612441353e-05, + "loss": 2.4856, + "step": 1172000 + }, + { + "epoch": 5.81, + "learning_rate": 4.709657753798744e-05, + "loss": 2.536, + "step": 1172500 + }, + { + "epoch": 5.81, + "learning_rate": 4.709534142873422e-05, + "loss": 2.4982, + "step": 1173000 + }, + { + "epoch": 5.81, + "learning_rate": 4.7094102842308136e-05, + "loss": 2.5312, + "step": 1173500 + }, + { + "epoch": 5.82, + "learning_rate": 4.7092866733054904e-05, + "loss": 2.4911, + "step": 1174000 + }, + { + "epoch": 5.82, + "learning_rate": 4.709162814662882e-05, + "loss": 2.5099, + "step": 1174500 + }, + { + "epoch": 5.82, + "learning_rate": 4.709038956020274e-05, + "loss": 2.5238, + "step": 1175000 + }, + { + "epoch": 5.82, + "learning_rate": 4.7089150973776655e-05, + "loss": 2.5279, + "step": 1175500 + }, + { + "epoch": 5.83, + "learning_rate": 4.708791486452342e-05, + "loss": 2.5004, + "step": 1176000 + }, + { + "epoch": 5.83, + "learning_rate": 4.7086676278097334e-05, + "loss": 2.5314, + "step": 1176500 + }, + { + "epoch": 5.83, + "learning_rate": 4.708543769167125e-05, + "loss": 2.5071, + "step": 1177000 + }, + { + "epoch": 5.83, + "learning_rate": 4.708419910524517e-05, + "loss": 2.5197, + "step": 1177500 + }, + { + "epoch": 5.84, + "learning_rate": 4.7082960518819085e-05, + "loss": 2.5249, + "step": 1178000 + }, + { + "epoch": 5.84, + "learning_rate": 4.7081721932393e-05, + "loss": 2.5054, + "step": 1178500 + }, + { + "epoch": 5.84, + "learning_rate": 4.708048334596692e-05, + "loss": 2.5106, + "step": 1179000 + }, + { + "epoch": 5.84, + "learning_rate": 4.7079244759540836e-05, + "loss": 2.5198, + "step": 1179500 + }, + { + "epoch": 5.85, + "learning_rate": 4.7078008650287604e-05, + "loss": 2.5138, + "step": 1180000 + }, + { + "epoch": 5.85, + "learning_rate": 4.707677006386152e-05, + "loss": 2.5156, + "step": 1180500 + }, + { + "epoch": 5.85, + "learning_rate": 4.707553147743544e-05, + "loss": 2.5207, + "step": 1181000 + }, + { + "epoch": 5.85, + "learning_rate": 4.7074292891009355e-05, + "loss": 2.4657, + "step": 1181500 + }, + { + "epoch": 5.86, + "learning_rate": 4.707305430458327e-05, + "loss": 2.4906, + "step": 1182000 + }, + { + "epoch": 5.86, + "learning_rate": 4.707181571815719e-05, + "loss": 2.4839, + "step": 1182500 + }, + { + "epoch": 5.86, + "learning_rate": 4.7070577131731106e-05, + "loss": 2.514, + "step": 1183000 + }, + { + "epoch": 5.86, + "learning_rate": 4.7069338545305016e-05, + "loss": 2.4829, + "step": 1183500 + }, + { + "epoch": 5.87, + "learning_rate": 4.706809995887893e-05, + "loss": 2.5311, + "step": 1184000 + }, + { + "epoch": 5.87, + "learning_rate": 4.706686137245285e-05, + "loss": 2.5153, + "step": 1184500 + }, + { + "epoch": 5.87, + "learning_rate": 4.706562774037247e-05, + "loss": 2.5128, + "step": 1185000 + }, + { + "epoch": 5.87, + "learning_rate": 4.706438915394639e-05, + "loss": 2.5017, + "step": 1185500 + }, + { + "epoch": 5.88, + "learning_rate": 4.7063153044693156e-05, + "loss": 2.5261, + "step": 1186000 + }, + { + "epoch": 5.88, + "learning_rate": 4.7061914458267073e-05, + "loss": 2.5201, + "step": 1186500 + }, + { + "epoch": 5.88, + "learning_rate": 4.7060675871840984e-05, + "loss": 2.5188, + "step": 1187000 + }, + { + "epoch": 5.88, + "learning_rate": 4.70594372854149e-05, + "loss": 2.534, + "step": 1187500 + }, + { + "epoch": 5.89, + "learning_rate": 4.705819869898882e-05, + "loss": 2.5313, + "step": 1188000 + }, + { + "epoch": 5.89, + "learning_rate": 4.7056960112562734e-05, + "loss": 2.5164, + "step": 1188500 + }, + { + "epoch": 5.89, + "learning_rate": 4.705572152613665e-05, + "loss": 2.5065, + "step": 1189000 + }, + { + "epoch": 5.89, + "learning_rate": 4.705448293971057e-05, + "loss": 2.5141, + "step": 1189500 + }, + { + "epoch": 5.9, + "learning_rate": 4.7053244353284485e-05, + "loss": 2.4913, + "step": 1190000 + }, + { + "epoch": 5.9, + "learning_rate": 4.70520057668584e-05, + "loss": 2.5257, + "step": 1190500 + }, + { + "epoch": 5.9, + "learning_rate": 4.705076965760517e-05, + "loss": 2.5149, + "step": 1191000 + }, + { + "epoch": 5.9, + "learning_rate": 4.704953107117909e-05, + "loss": 2.524, + "step": 1191500 + }, + { + "epoch": 5.91, + "learning_rate": 4.7048292484753005e-05, + "loss": 2.5101, + "step": 1192000 + }, + { + "epoch": 5.91, + "learning_rate": 4.704705389832692e-05, + "loss": 2.5176, + "step": 1192500 + }, + { + "epoch": 5.91, + "learning_rate": 4.704581531190084e-05, + "loss": 2.5155, + "step": 1193000 + }, + { + "epoch": 5.91, + "learning_rate": 4.7044576725474756e-05, + "loss": 2.4957, + "step": 1193500 + }, + { + "epoch": 5.92, + "learning_rate": 4.704333813904867e-05, + "loss": 2.5396, + "step": 1194000 + }, + { + "epoch": 5.92, + "learning_rate": 4.7042102029795434e-05, + "loss": 2.5241, + "step": 1194500 + }, + { + "epoch": 5.92, + "learning_rate": 4.704086344336935e-05, + "loss": 2.5134, + "step": 1195000 + }, + { + "epoch": 5.92, + "learning_rate": 4.703962485694327e-05, + "loss": 2.5014, + "step": 1195500 + }, + { + "epoch": 5.93, + "learning_rate": 4.7038386270517185e-05, + "loss": 2.5075, + "step": 1196000 + }, + { + "epoch": 5.93, + "learning_rate": 4.70371476840911e-05, + "loss": 2.5256, + "step": 1196500 + }, + { + "epoch": 5.93, + "learning_rate": 4.703590909766502e-05, + "loss": 2.4918, + "step": 1197000 + }, + { + "epoch": 5.93, + "learning_rate": 4.7034670511238936e-05, + "loss": 2.4843, + "step": 1197500 + }, + { + "epoch": 5.94, + "learning_rate": 4.703343192481285e-05, + "loss": 2.5463, + "step": 1198000 + }, + { + "epoch": 5.94, + "learning_rate": 4.703219333838677e-05, + "loss": 2.4987, + "step": 1198500 + }, + { + "epoch": 5.94, + "learning_rate": 4.703095475196068e-05, + "loss": 2.5182, + "step": 1199000 + }, + { + "epoch": 5.94, + "learning_rate": 4.702972111988031e-05, + "loss": 2.5022, + "step": 1199500 + }, + { + "epoch": 5.95, + "learning_rate": 4.7028482533454224e-05, + "loss": 2.5201, + "step": 1200000 + }, + { + "epoch": 5.95, + "learning_rate": 4.7027243947028135e-05, + "loss": 2.5196, + "step": 1200500 + }, + { + "epoch": 5.95, + "learning_rate": 4.702600536060205e-05, + "loss": 2.5223, + "step": 1201000 + }, + { + "epoch": 5.95, + "learning_rate": 4.702476677417597e-05, + "loss": 2.5153, + "step": 1201500 + }, + { + "epoch": 5.96, + "learning_rate": 4.7023528187749885e-05, + "loss": 2.5415, + "step": 1202000 + }, + { + "epoch": 5.96, + "learning_rate": 4.70222896013238e-05, + "loss": 2.529, + "step": 1202500 + }, + { + "epoch": 5.96, + "learning_rate": 4.702105349207057e-05, + "loss": 2.5326, + "step": 1203000 + }, + { + "epoch": 5.96, + "learning_rate": 4.701981490564449e-05, + "loss": 2.5434, + "step": 1203500 + }, + { + "epoch": 5.97, + "learning_rate": 4.7018576319218405e-05, + "loss": 2.5482, + "step": 1204000 + }, + { + "epoch": 5.97, + "learning_rate": 4.701733773279232e-05, + "loss": 2.5158, + "step": 1204500 + }, + { + "epoch": 5.97, + "learning_rate": 4.701609914636624e-05, + "loss": 2.5229, + "step": 1205000 + }, + { + "epoch": 5.97, + "learning_rate": 4.7014860559940156e-05, + "loss": 2.5232, + "step": 1205500 + }, + { + "epoch": 5.97, + "learning_rate": 4.7013624450686925e-05, + "loss": 2.5345, + "step": 1206000 + }, + { + "epoch": 5.98, + "learning_rate": 4.701238586426084e-05, + "loss": 2.4912, + "step": 1206500 + }, + { + "epoch": 5.98, + "learning_rate": 4.701114727783475e-05, + "loss": 2.5206, + "step": 1207000 + }, + { + "epoch": 5.98, + "learning_rate": 4.700990869140867e-05, + "loss": 2.4784, + "step": 1207500 + }, + { + "epoch": 5.98, + "learning_rate": 4.7008670104982586e-05, + "loss": 2.4862, + "step": 1208000 + }, + { + "epoch": 5.99, + "learning_rate": 4.70074315185565e-05, + "loss": 2.5107, + "step": 1208500 + }, + { + "epoch": 5.99, + "learning_rate": 4.700619293213042e-05, + "loss": 2.5165, + "step": 1209000 + }, + { + "epoch": 5.99, + "learning_rate": 4.700495682287719e-05, + "loss": 2.5188, + "step": 1209500 + }, + { + "epoch": 5.99, + "learning_rate": 4.7003718236451105e-05, + "loss": 2.4935, + "step": 1210000 + }, + { + "epoch": 6.0, + "learning_rate": 4.700247965002502e-05, + "loss": 2.5275, + "step": 1210500 + }, + { + "epoch": 6.0, + "learning_rate": 4.700124106359894e-05, + "loss": 2.5212, + "step": 1211000 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6358168518016798, + "eval_accuracy_mlm": 0.5887641252682907, + "eval_accuracy_nsp": 0.8578281213842226, + "eval_loss": 2.467965841293335, + "eval_runtime": 145.9334, + "eval_samples_per_second": 1747.092, + "eval_steps_per_second": 72.8, + "step": 1211058 + }, + { + "epoch": 6.0, + "learning_rate": 4.7000002477172856e-05, + "loss": 2.4902, + "step": 1211500 + }, + { + "epoch": 6.0, + "learning_rate": 4.699876389074677e-05, + "loss": 2.493, + "step": 1212000 + }, + { + "epoch": 6.01, + "learning_rate": 4.699752530432069e-05, + "loss": 2.5038, + "step": 1212500 + }, + { + "epoch": 6.01, + "learning_rate": 4.699628671789461e-05, + "loss": 2.4571, + "step": 1213000 + }, + { + "epoch": 6.01, + "learning_rate": 4.6995048131468524e-05, + "loss": 2.4688, + "step": 1213500 + }, + { + "epoch": 6.01, + "learning_rate": 4.699380954504244e-05, + "loss": 2.5051, + "step": 1214000 + }, + { + "epoch": 6.02, + "learning_rate": 4.699257095861635e-05, + "loss": 2.4776, + "step": 1214500 + }, + { + "epoch": 6.02, + "learning_rate": 4.699133237219027e-05, + "loss": 2.4888, + "step": 1215000 + }, + { + "epoch": 6.02, + "learning_rate": 4.6990096262937036e-05, + "loss": 2.4839, + "step": 1215500 + }, + { + "epoch": 6.02, + "learning_rate": 4.698885767651095e-05, + "loss": 2.4875, + "step": 1216000 + }, + { + "epoch": 6.03, + "learning_rate": 4.698762156725772e-05, + "loss": 2.5075, + "step": 1216500 + }, + { + "epoch": 6.03, + "learning_rate": 4.698638545800449e-05, + "loss": 2.4875, + "step": 1217000 + }, + { + "epoch": 6.03, + "learning_rate": 4.698514687157841e-05, + "loss": 2.4917, + "step": 1217500 + }, + { + "epoch": 6.03, + "learning_rate": 4.6983908285152325e-05, + "loss": 2.4866, + "step": 1218000 + }, + { + "epoch": 6.04, + "learning_rate": 4.698266969872624e-05, + "loss": 2.5019, + "step": 1218500 + }, + { + "epoch": 6.04, + "learning_rate": 4.698143111230016e-05, + "loss": 2.4532, + "step": 1219000 + }, + { + "epoch": 6.04, + "learning_rate": 4.6980192525874076e-05, + "loss": 2.4699, + "step": 1219500 + }, + { + "epoch": 6.04, + "learning_rate": 4.697895393944799e-05, + "loss": 2.5117, + "step": 1220000 + }, + { + "epoch": 6.05, + "learning_rate": 4.69777153530219e-05, + "loss": 2.4897, + "step": 1220500 + }, + { + "epoch": 6.05, + "learning_rate": 4.697647676659582e-05, + "loss": 2.4558, + "step": 1221000 + }, + { + "epoch": 6.05, + "learning_rate": 4.6975238180169737e-05, + "loss": 2.4775, + "step": 1221500 + }, + { + "epoch": 6.05, + "learning_rate": 4.6973999593743653e-05, + "loss": 2.4828, + "step": 1222000 + }, + { + "epoch": 6.06, + "learning_rate": 4.697276100731757e-05, + "loss": 2.487, + "step": 1222500 + }, + { + "epoch": 6.06, + "learning_rate": 4.697152242089149e-05, + "loss": 2.4936, + "step": 1223000 + }, + { + "epoch": 6.06, + "learning_rate": 4.6970283834465404e-05, + "loss": 2.489, + "step": 1223500 + }, + { + "epoch": 6.06, + "learning_rate": 4.6969045248039314e-05, + "loss": 2.4504, + "step": 1224000 + }, + { + "epoch": 6.07, + "learning_rate": 4.696780666161323e-05, + "loss": 2.4844, + "step": 1224500 + }, + { + "epoch": 6.07, + "learning_rate": 4.696656807518715e-05, + "loss": 2.4735, + "step": 1225000 + }, + { + "epoch": 6.07, + "learning_rate": 4.6965329488761065e-05, + "loss": 2.4685, + "step": 1225500 + }, + { + "epoch": 6.07, + "learning_rate": 4.696409090233498e-05, + "loss": 2.4795, + "step": 1226000 + }, + { + "epoch": 6.08, + "learning_rate": 4.69628523159089e-05, + "loss": 2.4918, + "step": 1226500 + }, + { + "epoch": 6.08, + "learning_rate": 4.696161620665567e-05, + "loss": 2.4753, + "step": 1227000 + }, + { + "epoch": 6.08, + "learning_rate": 4.696038009740244e-05, + "loss": 2.4695, + "step": 1227500 + }, + { + "epoch": 6.08, + "learning_rate": 4.6959141510976354e-05, + "loss": 2.4831, + "step": 1228000 + }, + { + "epoch": 6.09, + "learning_rate": 4.695790292455027e-05, + "loss": 2.4936, + "step": 1228500 + }, + { + "epoch": 6.09, + "learning_rate": 4.695666433812419e-05, + "loss": 2.4876, + "step": 1229000 + }, + { + "epoch": 6.09, + "learning_rate": 4.6955425751698104e-05, + "loss": 2.4676, + "step": 1229500 + }, + { + "epoch": 6.09, + "learning_rate": 4.695418716527202e-05, + "loss": 2.4873, + "step": 1230000 + }, + { + "epoch": 6.1, + "learning_rate": 4.695295105601879e-05, + "loss": 2.4812, + "step": 1230500 + }, + { + "epoch": 6.1, + "learning_rate": 4.695171246959271e-05, + "loss": 2.4859, + "step": 1231000 + }, + { + "epoch": 6.1, + "learning_rate": 4.6950473883166624e-05, + "loss": 2.4744, + "step": 1231500 + }, + { + "epoch": 6.1, + "learning_rate": 4.694923529674054e-05, + "loss": 2.4792, + "step": 1232000 + }, + { + "epoch": 6.11, + "learning_rate": 4.694799918748731e-05, + "loss": 2.4847, + "step": 1232500 + }, + { + "epoch": 6.11, + "learning_rate": 4.694676060106123e-05, + "loss": 2.5086, + "step": 1233000 + }, + { + "epoch": 6.11, + "learning_rate": 4.6945522014635144e-05, + "loss": 2.4921, + "step": 1233500 + }, + { + "epoch": 6.11, + "learning_rate": 4.6944283428209054e-05, + "loss": 2.471, + "step": 1234000 + }, + { + "epoch": 6.12, + "learning_rate": 4.694304484178297e-05, + "loss": 2.4858, + "step": 1234500 + }, + { + "epoch": 6.12, + "learning_rate": 4.694180625535689e-05, + "loss": 2.4519, + "step": 1235000 + }, + { + "epoch": 6.12, + "learning_rate": 4.6940567668930805e-05, + "loss": 2.4916, + "step": 1235500 + }, + { + "epoch": 6.12, + "learning_rate": 4.693932908250472e-05, + "loss": 2.4775, + "step": 1236000 + }, + { + "epoch": 6.13, + "learning_rate": 4.693809049607863e-05, + "loss": 2.483, + "step": 1236500 + }, + { + "epoch": 6.13, + "learning_rate": 4.693685190965255e-05, + "loss": 2.4979, + "step": 1237000 + }, + { + "epoch": 6.13, + "learning_rate": 4.6935613323226465e-05, + "loss": 2.4822, + "step": 1237500 + }, + { + "epoch": 6.13, + "learning_rate": 4.693437473680038e-05, + "loss": 2.4846, + "step": 1238000 + }, + { + "epoch": 6.14, + "learning_rate": 4.69331361503743e-05, + "loss": 2.4769, + "step": 1238500 + }, + { + "epoch": 6.14, + "learning_rate": 4.6931900041121075e-05, + "loss": 2.5055, + "step": 1239000 + }, + { + "epoch": 6.14, + "learning_rate": 4.6930661454694985e-05, + "loss": 2.4656, + "step": 1239500 + }, + { + "epoch": 6.14, + "learning_rate": 4.69294228682689e-05, + "loss": 2.5074, + "step": 1240000 + }, + { + "epoch": 6.15, + "learning_rate": 4.692818428184282e-05, + "loss": 2.4729, + "step": 1240500 + }, + { + "epoch": 6.15, + "learning_rate": 4.692694817258959e-05, + "loss": 2.4935, + "step": 1241000 + }, + { + "epoch": 6.15, + "learning_rate": 4.6925709586163505e-05, + "loss": 2.4919, + "step": 1241500 + }, + { + "epoch": 6.15, + "learning_rate": 4.692447099973742e-05, + "loss": 2.4865, + "step": 1242000 + }, + { + "epoch": 6.16, + "learning_rate": 4.692323241331134e-05, + "loss": 2.4812, + "step": 1242500 + }, + { + "epoch": 6.16, + "learning_rate": 4.6921993826885255e-05, + "loss": 2.483, + "step": 1243000 + }, + { + "epoch": 6.16, + "learning_rate": 4.6920757717632024e-05, + "loss": 2.4902, + "step": 1243500 + }, + { + "epoch": 6.16, + "learning_rate": 4.691951913120594e-05, + "loss": 2.47, + "step": 1244000 + }, + { + "epoch": 6.17, + "learning_rate": 4.691828054477986e-05, + "loss": 2.4547, + "step": 1244500 + }, + { + "epoch": 6.17, + "learning_rate": 4.6917041958353775e-05, + "loss": 2.4744, + "step": 1245000 + }, + { + "epoch": 6.17, + "learning_rate": 4.691580337192769e-05, + "loss": 2.4984, + "step": 1245500 + }, + { + "epoch": 6.17, + "learning_rate": 4.69145647855016e-05, + "loss": 2.4811, + "step": 1246000 + }, + { + "epoch": 6.18, + "learning_rate": 4.691332619907552e-05, + "loss": 2.4872, + "step": 1246500 + }, + { + "epoch": 6.18, + "learning_rate": 4.6912090089822295e-05, + "loss": 2.481, + "step": 1247000 + }, + { + "epoch": 6.18, + "learning_rate": 4.691085150339621e-05, + "loss": 2.4718, + "step": 1247500 + }, + { + "epoch": 6.18, + "learning_rate": 4.690961291697012e-05, + "loss": 2.4848, + "step": 1248000 + }, + { + "epoch": 6.19, + "learning_rate": 4.690837433054404e-05, + "loss": 2.483, + "step": 1248500 + }, + { + "epoch": 6.19, + "learning_rate": 4.6907135744117956e-05, + "loss": 2.4704, + "step": 1249000 + }, + { + "epoch": 6.19, + "learning_rate": 4.690589715769187e-05, + "loss": 2.4682, + "step": 1249500 + }, + { + "epoch": 6.19, + "learning_rate": 4.690466104843864e-05, + "loss": 2.4647, + "step": 1250000 + }, + { + "epoch": 6.2, + "learning_rate": 4.690342493918541e-05, + "loss": 2.4835, + "step": 1250500 + }, + { + "epoch": 6.2, + "learning_rate": 4.690218635275933e-05, + "loss": 2.4615, + "step": 1251000 + }, + { + "epoch": 6.2, + "learning_rate": 4.6900947766333244e-05, + "loss": 2.4774, + "step": 1251500 + }, + { + "epoch": 6.2, + "learning_rate": 4.689970917990716e-05, + "loss": 2.4772, + "step": 1252000 + }, + { + "epoch": 6.21, + "learning_rate": 4.689847059348108e-05, + "loss": 2.4977, + "step": 1252500 + }, + { + "epoch": 6.21, + "learning_rate": 4.6897232007054995e-05, + "loss": 2.5088, + "step": 1253000 + }, + { + "epoch": 6.21, + "learning_rate": 4.689599342062891e-05, + "loss": 2.4723, + "step": 1253500 + }, + { + "epoch": 6.21, + "learning_rate": 4.689475483420283e-05, + "loss": 2.4965, + "step": 1254000 + }, + { + "epoch": 6.22, + "learning_rate": 4.689351624777674e-05, + "loss": 2.4822, + "step": 1254500 + }, + { + "epoch": 6.22, + "learning_rate": 4.689228013852351e-05, + "loss": 2.5036, + "step": 1255000 + }, + { + "epoch": 6.22, + "learning_rate": 4.6891041552097424e-05, + "loss": 2.4874, + "step": 1255500 + }, + { + "epoch": 6.22, + "learning_rate": 4.688980544284419e-05, + "loss": 2.4798, + "step": 1256000 + }, + { + "epoch": 6.23, + "learning_rate": 4.688856685641811e-05, + "loss": 2.4956, + "step": 1256500 + }, + { + "epoch": 6.23, + "learning_rate": 4.688733074716488e-05, + "loss": 2.4933, + "step": 1257000 + }, + { + "epoch": 6.23, + "learning_rate": 4.6886092160738796e-05, + "loss": 2.5024, + "step": 1257500 + }, + { + "epoch": 6.23, + "learning_rate": 4.6884853574312706e-05, + "loss": 2.4811, + "step": 1258000 + }, + { + "epoch": 6.24, + "learning_rate": 4.688361498788662e-05, + "loss": 2.4707, + "step": 1258500 + }, + { + "epoch": 6.24, + "learning_rate": 4.688237887863339e-05, + "loss": 2.4857, + "step": 1259000 + }, + { + "epoch": 6.24, + "learning_rate": 4.688114029220731e-05, + "loss": 2.5269, + "step": 1259500 + }, + { + "epoch": 6.24, + "learning_rate": 4.6879901705781226e-05, + "loss": 2.4841, + "step": 1260000 + }, + { + "epoch": 6.24, + "learning_rate": 4.687866311935514e-05, + "loss": 2.5034, + "step": 1260500 + }, + { + "epoch": 6.25, + "learning_rate": 4.687742453292906e-05, + "loss": 2.4715, + "step": 1261000 + }, + { + "epoch": 6.25, + "learning_rate": 4.6876185946502976e-05, + "loss": 2.4924, + "step": 1261500 + }, + { + "epoch": 6.25, + "learning_rate": 4.6874947360076893e-05, + "loss": 2.5025, + "step": 1262000 + }, + { + "epoch": 6.25, + "learning_rate": 4.687370877365081e-05, + "loss": 2.4942, + "step": 1262500 + }, + { + "epoch": 6.26, + "learning_rate": 4.687247018722473e-05, + "loss": 2.4852, + "step": 1263000 + }, + { + "epoch": 6.26, + "learning_rate": 4.6871231600798644e-05, + "loss": 2.4815, + "step": 1263500 + }, + { + "epoch": 6.26, + "learning_rate": 4.686999301437256e-05, + "loss": 2.4833, + "step": 1264000 + }, + { + "epoch": 6.26, + "learning_rate": 4.686875690511932e-05, + "loss": 2.5097, + "step": 1264500 + }, + { + "epoch": 6.27, + "learning_rate": 4.686751831869324e-05, + "loss": 2.5067, + "step": 1265000 + }, + { + "epoch": 6.27, + "learning_rate": 4.686627973226716e-05, + "loss": 2.5029, + "step": 1265500 + }, + { + "epoch": 6.27, + "learning_rate": 4.6865041145841074e-05, + "loss": 2.4842, + "step": 1266000 + }, + { + "epoch": 6.27, + "learning_rate": 4.686380255941499e-05, + "loss": 2.4757, + "step": 1266500 + }, + { + "epoch": 6.28, + "learning_rate": 4.686256397298891e-05, + "loss": 2.4729, + "step": 1267000 + }, + { + "epoch": 6.28, + "learning_rate": 4.6861325386562825e-05, + "loss": 2.4995, + "step": 1267500 + }, + { + "epoch": 6.28, + "learning_rate": 4.686008680013674e-05, + "loss": 2.5044, + "step": 1268000 + }, + { + "epoch": 6.28, + "learning_rate": 4.685884821371066e-05, + "loss": 2.483, + "step": 1268500 + }, + { + "epoch": 6.29, + "learning_rate": 4.685761458163028e-05, + "loss": 2.5019, + "step": 1269000 + }, + { + "epoch": 6.29, + "learning_rate": 4.6856375995204196e-05, + "loss": 2.5024, + "step": 1269500 + }, + { + "epoch": 6.29, + "learning_rate": 4.685513740877811e-05, + "loss": 2.4707, + "step": 1270000 + }, + { + "epoch": 6.29, + "learning_rate": 4.685389882235203e-05, + "loss": 2.4647, + "step": 1270500 + }, + { + "epoch": 6.3, + "learning_rate": 4.68526627130988e-05, + "loss": 2.4922, + "step": 1271000 + }, + { + "epoch": 6.3, + "learning_rate": 4.685142412667271e-05, + "loss": 2.5003, + "step": 1271500 + }, + { + "epoch": 6.3, + "learning_rate": 4.6850185540246626e-05, + "loss": 2.4678, + "step": 1272000 + }, + { + "epoch": 6.3, + "learning_rate": 4.684894695382054e-05, + "loss": 2.4832, + "step": 1272500 + }, + { + "epoch": 6.31, + "learning_rate": 4.684771084456732e-05, + "loss": 2.4824, + "step": 1273000 + }, + { + "epoch": 6.31, + "learning_rate": 4.6846472258141235e-05, + "loss": 2.4748, + "step": 1273500 + }, + { + "epoch": 6.31, + "learning_rate": 4.684523367171515e-05, + "loss": 2.5172, + "step": 1274000 + }, + { + "epoch": 6.31, + "learning_rate": 4.684399508528906e-05, + "loss": 2.4525, + "step": 1274500 + }, + { + "epoch": 6.32, + "learning_rate": 4.684275649886298e-05, + "loss": 2.4783, + "step": 1275000 + }, + { + "epoch": 6.32, + "learning_rate": 4.6841517912436896e-05, + "loss": 2.5077, + "step": 1275500 + }, + { + "epoch": 6.32, + "learning_rate": 4.6840281803183665e-05, + "loss": 2.4907, + "step": 1276000 + }, + { + "epoch": 6.32, + "learning_rate": 4.683904321675758e-05, + "loss": 2.464, + "step": 1276500 + }, + { + "epoch": 6.33, + "learning_rate": 4.68378046303315e-05, + "loss": 2.4991, + "step": 1277000 + }, + { + "epoch": 6.33, + "learning_rate": 4.6836566043905416e-05, + "loss": 2.5031, + "step": 1277500 + }, + { + "epoch": 6.33, + "learning_rate": 4.6835327457479326e-05, + "loss": 2.4927, + "step": 1278000 + }, + { + "epoch": 6.33, + "learning_rate": 4.683408887105324e-05, + "loss": 2.4785, + "step": 1278500 + }, + { + "epoch": 6.34, + "learning_rate": 4.683285028462716e-05, + "loss": 2.465, + "step": 1279000 + }, + { + "epoch": 6.34, + "learning_rate": 4.683161169820108e-05, + "loss": 2.4818, + "step": 1279500 + }, + { + "epoch": 6.34, + "learning_rate": 4.6830373111774994e-05, + "loss": 2.5001, + "step": 1280000 + }, + { + "epoch": 6.34, + "learning_rate": 4.682913452534891e-05, + "loss": 2.4687, + "step": 1280500 + }, + { + "epoch": 6.35, + "learning_rate": 4.682789841609568e-05, + "loss": 2.4916, + "step": 1281000 + }, + { + "epoch": 6.35, + "learning_rate": 4.6826659829669596e-05, + "loss": 2.4898, + "step": 1281500 + }, + { + "epoch": 6.35, + "learning_rate": 4.682542124324351e-05, + "loss": 2.4705, + "step": 1282000 + }, + { + "epoch": 6.35, + "learning_rate": 4.682418265681743e-05, + "loss": 2.4925, + "step": 1282500 + }, + { + "epoch": 6.36, + "learning_rate": 4.682294407039135e-05, + "loss": 2.4848, + "step": 1283000 + }, + { + "epoch": 6.36, + "learning_rate": 4.6821705483965264e-05, + "loss": 2.4765, + "step": 1283500 + }, + { + "epoch": 6.36, + "learning_rate": 4.6820469374712026e-05, + "loss": 2.4905, + "step": 1284000 + }, + { + "epoch": 6.36, + "learning_rate": 4.681923078828594e-05, + "loss": 2.4852, + "step": 1284500 + }, + { + "epoch": 6.37, + "learning_rate": 4.681799220185986e-05, + "loss": 2.4962, + "step": 1285000 + }, + { + "epoch": 6.37, + "learning_rate": 4.681675361543378e-05, + "loss": 2.4493, + "step": 1285500 + }, + { + "epoch": 6.37, + "learning_rate": 4.6815515029007694e-05, + "loss": 2.4891, + "step": 1286000 + }, + { + "epoch": 6.37, + "learning_rate": 4.681427644258161e-05, + "loss": 2.4824, + "step": 1286500 + }, + { + "epoch": 6.38, + "learning_rate": 4.681303785615553e-05, + "loss": 2.4763, + "step": 1287000 + }, + { + "epoch": 6.38, + "learning_rate": 4.6811799269729445e-05, + "loss": 2.5012, + "step": 1287500 + }, + { + "epoch": 6.38, + "learning_rate": 4.681056068330336e-05, + "loss": 2.4999, + "step": 1288000 + }, + { + "epoch": 6.38, + "learning_rate": 4.680932209687728e-05, + "loss": 2.4742, + "step": 1288500 + }, + { + "epoch": 6.39, + "learning_rate": 4.680808598762405e-05, + "loss": 2.4774, + "step": 1289000 + }, + { + "epoch": 6.39, + "learning_rate": 4.6806847401197964e-05, + "loss": 2.4746, + "step": 1289500 + }, + { + "epoch": 6.39, + "learning_rate": 4.680560881477188e-05, + "loss": 2.4768, + "step": 1290000 + }, + { + "epoch": 6.39, + "learning_rate": 4.68043702283458e-05, + "loss": 2.4646, + "step": 1290500 + }, + { + "epoch": 6.4, + "learning_rate": 4.6803131641919715e-05, + "loss": 2.4499, + "step": 1291000 + }, + { + "epoch": 6.4, + "learning_rate": 4.680189553266648e-05, + "loss": 2.4654, + "step": 1291500 + }, + { + "epoch": 6.4, + "learning_rate": 4.6800656946240394e-05, + "loss": 2.4719, + "step": 1292000 + }, + { + "epoch": 6.4, + "learning_rate": 4.679941835981431e-05, + "loss": 2.4862, + "step": 1292500 + }, + { + "epoch": 6.41, + "learning_rate": 4.6798182250561087e-05, + "loss": 2.4977, + "step": 1293000 + }, + { + "epoch": 6.41, + "learning_rate": 4.679694614130785e-05, + "loss": 2.4832, + "step": 1293500 + }, + { + "epoch": 6.41, + "learning_rate": 4.6795707554881766e-05, + "loss": 2.5195, + "step": 1294000 + }, + { + "epoch": 6.41, + "learning_rate": 4.679446896845568e-05, + "loss": 2.4994, + "step": 1294500 + }, + { + "epoch": 6.42, + "learning_rate": 4.67932303820296e-05, + "loss": 2.4854, + "step": 1295000 + }, + { + "epoch": 6.42, + "learning_rate": 4.6791991795603516e-05, + "loss": 2.4757, + "step": 1295500 + }, + { + "epoch": 6.42, + "learning_rate": 4.679075320917743e-05, + "loss": 2.5, + "step": 1296000 + }, + { + "epoch": 6.42, + "learning_rate": 4.678951462275134e-05, + "loss": 2.4907, + "step": 1296500 + }, + { + "epoch": 6.43, + "learning_rate": 4.678827603632526e-05, + "loss": 2.5016, + "step": 1297000 + }, + { + "epoch": 6.43, + "learning_rate": 4.678703744989918e-05, + "loss": 2.5061, + "step": 1297500 + }, + { + "epoch": 6.43, + "learning_rate": 4.6785798863473094e-05, + "loss": 2.4797, + "step": 1298000 + }, + { + "epoch": 6.43, + "learning_rate": 4.678456027704701e-05, + "loss": 2.5274, + "step": 1298500 + }, + { + "epoch": 6.44, + "learning_rate": 4.678332169062093e-05, + "loss": 2.489, + "step": 1299000 + }, + { + "epoch": 6.44, + "learning_rate": 4.6782083104194845e-05, + "loss": 2.4759, + "step": 1299500 + }, + { + "epoch": 6.44, + "learning_rate": 4.6780846994941614e-05, + "loss": 2.4618, + "step": 1300000 + }, + { + "epoch": 6.44, + "learning_rate": 4.677961088568838e-05, + "loss": 2.4962, + "step": 1300500 + }, + { + "epoch": 6.45, + "learning_rate": 4.67783722992623e-05, + "loss": 2.485, + "step": 1301000 + }, + { + "epoch": 6.45, + "learning_rate": 4.6777133712836216e-05, + "loss": 2.4917, + "step": 1301500 + }, + { + "epoch": 6.45, + "learning_rate": 4.677589512641013e-05, + "loss": 2.4872, + "step": 1302000 + }, + { + "epoch": 6.45, + "learning_rate": 4.677465653998405e-05, + "loss": 2.4978, + "step": 1302500 + }, + { + "epoch": 6.46, + "learning_rate": 4.677342043073082e-05, + "loss": 2.486, + "step": 1303000 + }, + { + "epoch": 6.46, + "learning_rate": 4.6772181844304736e-05, + "loss": 2.5053, + "step": 1303500 + }, + { + "epoch": 6.46, + "learning_rate": 4.6770945735051505e-05, + "loss": 2.4912, + "step": 1304000 + }, + { + "epoch": 6.46, + "learning_rate": 4.676970714862542e-05, + "loss": 2.4926, + "step": 1304500 + }, + { + "epoch": 6.47, + "learning_rate": 4.676846856219934e-05, + "loss": 2.4839, + "step": 1305000 + }, + { + "epoch": 6.47, + "learning_rate": 4.6767229975773256e-05, + "loss": 2.4895, + "step": 1305500 + }, + { + "epoch": 6.47, + "learning_rate": 4.6765991389347166e-05, + "loss": 2.5066, + "step": 1306000 + }, + { + "epoch": 6.47, + "learning_rate": 4.6764755280093935e-05, + "loss": 2.4824, + "step": 1306500 + }, + { + "epoch": 6.48, + "learning_rate": 4.676351669366785e-05, + "loss": 2.482, + "step": 1307000 + }, + { + "epoch": 6.48, + "learning_rate": 4.676227810724177e-05, + "loss": 2.4795, + "step": 1307500 + }, + { + "epoch": 6.48, + "learning_rate": 4.6761039520815685e-05, + "loss": 2.5152, + "step": 1308000 + }, + { + "epoch": 6.48, + "learning_rate": 4.67598009343896e-05, + "loss": 2.4962, + "step": 1308500 + }, + { + "epoch": 6.49, + "learning_rate": 4.675856234796352e-05, + "loss": 2.499, + "step": 1309000 + }, + { + "epoch": 6.49, + "learning_rate": 4.6757323761537436e-05, + "loss": 2.5016, + "step": 1309500 + }, + { + "epoch": 6.49, + "learning_rate": 4.675608517511135e-05, + "loss": 2.5232, + "step": 1310000 + }, + { + "epoch": 6.49, + "learning_rate": 4.675484658868527e-05, + "loss": 2.4946, + "step": 1310500 + }, + { + "epoch": 6.5, + "learning_rate": 4.675361047943204e-05, + "loss": 2.4769, + "step": 1311000 + }, + { + "epoch": 6.5, + "learning_rate": 4.6752371893005956e-05, + "loss": 2.4588, + "step": 1311500 + }, + { + "epoch": 6.5, + "learning_rate": 4.675113578375272e-05, + "loss": 2.514, + "step": 1312000 + }, + { + "epoch": 6.5, + "learning_rate": 4.6749897197326635e-05, + "loss": 2.5118, + "step": 1312500 + }, + { + "epoch": 6.51, + "learning_rate": 4.674865861090055e-05, + "loss": 2.4955, + "step": 1313000 + }, + { + "epoch": 6.51, + "learning_rate": 4.674742002447447e-05, + "loss": 2.4849, + "step": 1313500 + }, + { + "epoch": 6.51, + "learning_rate": 4.6746181438048385e-05, + "loss": 2.4894, + "step": 1314000 + }, + { + "epoch": 6.51, + "learning_rate": 4.67449428516223e-05, + "loss": 2.4816, + "step": 1314500 + }, + { + "epoch": 6.51, + "learning_rate": 4.674370426519622e-05, + "loss": 2.4829, + "step": 1315000 + }, + { + "epoch": 6.52, + "learning_rate": 4.6742465678770136e-05, + "loss": 2.4969, + "step": 1315500 + }, + { + "epoch": 6.52, + "learning_rate": 4.6741229569516905e-05, + "loss": 2.4945, + "step": 1316000 + }, + { + "epoch": 6.52, + "learning_rate": 4.673999346026367e-05, + "loss": 2.4751, + "step": 1316500 + }, + { + "epoch": 6.52, + "learning_rate": 4.6738754873837584e-05, + "loss": 2.5224, + "step": 1317000 + }, + { + "epoch": 6.53, + "learning_rate": 4.67375162874115e-05, + "loss": 2.5149, + "step": 1317500 + }, + { + "epoch": 6.53, + "learning_rate": 4.673627770098542e-05, + "loss": 2.5144, + "step": 1318000 + }, + { + "epoch": 6.53, + "learning_rate": 4.6735039114559335e-05, + "loss": 2.4687, + "step": 1318500 + }, + { + "epoch": 6.53, + "learning_rate": 4.673380052813325e-05, + "loss": 2.4861, + "step": 1319000 + }, + { + "epoch": 6.54, + "learning_rate": 4.673256441888002e-05, + "loss": 2.5114, + "step": 1319500 + }, + { + "epoch": 6.54, + "learning_rate": 4.673132583245394e-05, + "loss": 2.482, + "step": 1320000 + }, + { + "epoch": 6.54, + "learning_rate": 4.6730087246027854e-05, + "loss": 2.5231, + "step": 1320500 + }, + { + "epoch": 6.54, + "learning_rate": 4.672885113677462e-05, + "loss": 2.511, + "step": 1321000 + }, + { + "epoch": 6.55, + "learning_rate": 4.672761502752139e-05, + "loss": 2.512, + "step": 1321500 + }, + { + "epoch": 6.55, + "learning_rate": 4.672637644109531e-05, + "loss": 2.5054, + "step": 1322000 + }, + { + "epoch": 6.55, + "learning_rate": 4.6725137854669226e-05, + "loss": 2.502, + "step": 1322500 + }, + { + "epoch": 6.55, + "learning_rate": 4.6723901745415995e-05, + "loss": 2.4838, + "step": 1323000 + }, + { + "epoch": 6.56, + "learning_rate": 4.672266315898991e-05, + "loss": 2.5259, + "step": 1323500 + }, + { + "epoch": 6.56, + "learning_rate": 4.672142457256383e-05, + "loss": 2.5036, + "step": 1324000 + }, + { + "epoch": 6.56, + "learning_rate": 4.6720185986137745e-05, + "loss": 2.5052, + "step": 1324500 + }, + { + "epoch": 6.56, + "learning_rate": 4.671894739971166e-05, + "loss": 2.4959, + "step": 1325000 + }, + { + "epoch": 6.57, + "learning_rate": 4.671770881328558e-05, + "loss": 2.5092, + "step": 1325500 + }, + { + "epoch": 6.57, + "learning_rate": 4.6716470226859496e-05, + "loss": 2.5141, + "step": 1326000 + }, + { + "epoch": 6.57, + "learning_rate": 4.671523164043341e-05, + "loss": 2.5202, + "step": 1326500 + }, + { + "epoch": 6.57, + "learning_rate": 4.671399305400732e-05, + "loss": 2.4919, + "step": 1327000 + }, + { + "epoch": 6.58, + "learning_rate": 4.671275446758124e-05, + "loss": 2.4807, + "step": 1327500 + }, + { + "epoch": 6.58, + "learning_rate": 4.671151588115516e-05, + "loss": 2.4948, + "step": 1328000 + }, + { + "epoch": 6.58, + "learning_rate": 4.6710277294729074e-05, + "loss": 2.51, + "step": 1328500 + }, + { + "epoch": 6.58, + "learning_rate": 4.670903870830299e-05, + "loss": 2.4887, + "step": 1329000 + }, + { + "epoch": 6.59, + "learning_rate": 4.67078001218769e-05, + "loss": 2.4943, + "step": 1329500 + }, + { + "epoch": 6.59, + "learning_rate": 4.670656153545082e-05, + "loss": 2.5102, + "step": 1330000 + }, + { + "epoch": 6.59, + "learning_rate": 4.6705322949024735e-05, + "loss": 2.4674, + "step": 1330500 + }, + { + "epoch": 6.59, + "learning_rate": 4.670408436259865e-05, + "loss": 2.4995, + "step": 1331000 + }, + { + "epoch": 6.6, + "learning_rate": 4.670285073051828e-05, + "loss": 2.5186, + "step": 1331500 + }, + { + "epoch": 6.6, + "learning_rate": 4.6701612144092196e-05, + "loss": 2.485, + "step": 1332000 + }, + { + "epoch": 6.6, + "learning_rate": 4.670037355766611e-05, + "loss": 2.487, + "step": 1332500 + }, + { + "epoch": 6.6, + "learning_rate": 4.669913497124003e-05, + "loss": 2.5017, + "step": 1333000 + }, + { + "epoch": 6.61, + "learning_rate": 4.669789886198679e-05, + "loss": 2.4821, + "step": 1333500 + }, + { + "epoch": 6.61, + "learning_rate": 4.669666027556071e-05, + "loss": 2.4815, + "step": 1334000 + }, + { + "epoch": 6.61, + "learning_rate": 4.6695421689134626e-05, + "loss": 2.4971, + "step": 1334500 + }, + { + "epoch": 6.61, + "learning_rate": 4.669418310270854e-05, + "loss": 2.5461, + "step": 1335000 + }, + { + "epoch": 6.62, + "learning_rate": 4.669294451628246e-05, + "loss": 2.514, + "step": 1335500 + }, + { + "epoch": 6.62, + "learning_rate": 4.669170592985638e-05, + "loss": 2.5117, + "step": 1336000 + }, + { + "epoch": 6.62, + "learning_rate": 4.6690467343430294e-05, + "loss": 2.5189, + "step": 1336500 + }, + { + "epoch": 6.62, + "learning_rate": 4.668922875700421e-05, + "loss": 2.4931, + "step": 1337000 + }, + { + "epoch": 6.63, + "learning_rate": 4.668799017057813e-05, + "loss": 2.5089, + "step": 1337500 + }, + { + "epoch": 6.63, + "learning_rate": 4.668675158415204e-05, + "loss": 2.5183, + "step": 1338000 + }, + { + "epoch": 6.63, + "learning_rate": 4.6685512997725955e-05, + "loss": 2.5247, + "step": 1338500 + }, + { + "epoch": 6.63, + "learning_rate": 4.668427441129987e-05, + "loss": 2.4885, + "step": 1339000 + }, + { + "epoch": 6.64, + "learning_rate": 4.668303830204665e-05, + "loss": 2.5007, + "step": 1339500 + }, + { + "epoch": 6.64, + "learning_rate": 4.6681799715620564e-05, + "loss": 2.483, + "step": 1340000 + }, + { + "epoch": 6.64, + "learning_rate": 4.668056112919448e-05, + "loss": 2.5025, + "step": 1340500 + }, + { + "epoch": 6.64, + "learning_rate": 4.667932254276839e-05, + "loss": 2.4848, + "step": 1341000 + }, + { + "epoch": 6.65, + "learning_rate": 4.667808395634231e-05, + "loss": 2.5055, + "step": 1341500 + }, + { + "epoch": 6.65, + "learning_rate": 4.6676845369916225e-05, + "loss": 2.4936, + "step": 1342000 + }, + { + "epoch": 6.65, + "learning_rate": 4.6675609260662994e-05, + "loss": 2.5113, + "step": 1342500 + }, + { + "epoch": 6.65, + "learning_rate": 4.667437315140976e-05, + "loss": 2.4663, + "step": 1343000 + }, + { + "epoch": 6.66, + "learning_rate": 4.667313456498368e-05, + "loss": 2.4992, + "step": 1343500 + }, + { + "epoch": 6.66, + "learning_rate": 4.66718959785576e-05, + "loss": 2.4944, + "step": 1344000 + }, + { + "epoch": 6.66, + "learning_rate": 4.6670657392131514e-05, + "loss": 2.4973, + "step": 1344500 + }, + { + "epoch": 6.66, + "learning_rate": 4.666941880570543e-05, + "loss": 2.469, + "step": 1345000 + }, + { + "epoch": 6.67, + "learning_rate": 4.666818021927935e-05, + "loss": 2.5108, + "step": 1345500 + }, + { + "epoch": 6.67, + "learning_rate": 4.6666941632853264e-05, + "loss": 2.4903, + "step": 1346000 + }, + { + "epoch": 6.67, + "learning_rate": 4.666570304642718e-05, + "loss": 2.4975, + "step": 1346500 + }, + { + "epoch": 6.67, + "learning_rate": 4.66644644600011e-05, + "loss": 2.5192, + "step": 1347000 + }, + { + "epoch": 6.68, + "learning_rate": 4.666322587357501e-05, + "loss": 2.5035, + "step": 1347500 + }, + { + "epoch": 6.68, + "learning_rate": 4.666198976432178e-05, + "loss": 2.4976, + "step": 1348000 + }, + { + "epoch": 6.68, + "learning_rate": 4.6660751177895694e-05, + "loss": 2.5002, + "step": 1348500 + }, + { + "epoch": 6.68, + "learning_rate": 4.665951506864246e-05, + "loss": 2.5158, + "step": 1349000 + }, + { + "epoch": 6.69, + "learning_rate": 4.665827648221638e-05, + "loss": 2.4809, + "step": 1349500 + }, + { + "epoch": 6.69, + "learning_rate": 4.66570378957903e-05, + "loss": 2.4874, + "step": 1350000 + }, + { + "epoch": 6.69, + "learning_rate": 4.6655799309364214e-05, + "loss": 2.5005, + "step": 1350500 + }, + { + "epoch": 6.69, + "learning_rate": 4.665456072293813e-05, + "loss": 2.4906, + "step": 1351000 + }, + { + "epoch": 6.7, + "learning_rate": 4.665332213651205e-05, + "loss": 2.5107, + "step": 1351500 + }, + { + "epoch": 6.7, + "learning_rate": 4.6652083550085964e-05, + "loss": 2.4649, + "step": 1352000 + }, + { + "epoch": 6.7, + "learning_rate": 4.665084496365988e-05, + "loss": 2.4886, + "step": 1352500 + }, + { + "epoch": 6.7, + "learning_rate": 4.66496063772338e-05, + "loss": 2.5207, + "step": 1353000 + }, + { + "epoch": 6.71, + "learning_rate": 4.664837026798056e-05, + "loss": 2.4898, + "step": 1353500 + }, + { + "epoch": 6.71, + "learning_rate": 4.664713168155448e-05, + "loss": 2.5024, + "step": 1354000 + }, + { + "epoch": 6.71, + "learning_rate": 4.6645893095128394e-05, + "loss": 2.517, + "step": 1354500 + }, + { + "epoch": 6.71, + "learning_rate": 4.664465450870231e-05, + "loss": 2.4937, + "step": 1355000 + }, + { + "epoch": 6.72, + "learning_rate": 4.664341592227623e-05, + "loss": 2.4872, + "step": 1355500 + }, + { + "epoch": 6.72, + "learning_rate": 4.6642177335850145e-05, + "loss": 2.5017, + "step": 1356000 + }, + { + "epoch": 6.72, + "learning_rate": 4.6640938749424055e-05, + "loss": 2.5051, + "step": 1356500 + }, + { + "epoch": 6.72, + "learning_rate": 4.663970016299797e-05, + "loss": 2.477, + "step": 1357000 + }, + { + "epoch": 6.73, + "learning_rate": 4.663846157657189e-05, + "loss": 2.5128, + "step": 1357500 + }, + { + "epoch": 6.73, + "learning_rate": 4.6637222990145806e-05, + "loss": 2.4908, + "step": 1358000 + }, + { + "epoch": 6.73, + "learning_rate": 4.663598440371972e-05, + "loss": 2.4832, + "step": 1358500 + }, + { + "epoch": 6.73, + "learning_rate": 4.66347482944665e-05, + "loss": 2.4844, + "step": 1359000 + }, + { + "epoch": 6.74, + "learning_rate": 4.6633509708040415e-05, + "loss": 2.5207, + "step": 1359500 + }, + { + "epoch": 6.74, + "learning_rate": 4.663227359878718e-05, + "loss": 2.496, + "step": 1360000 + }, + { + "epoch": 6.74, + "learning_rate": 4.6631035012361094e-05, + "loss": 2.5076, + "step": 1360500 + }, + { + "epoch": 6.74, + "learning_rate": 4.662979642593501e-05, + "loss": 2.4935, + "step": 1361000 + }, + { + "epoch": 6.75, + "learning_rate": 4.662855783950893e-05, + "loss": 2.4879, + "step": 1361500 + }, + { + "epoch": 6.75, + "learning_rate": 4.6627319253082845e-05, + "loss": 2.4986, + "step": 1362000 + }, + { + "epoch": 6.75, + "learning_rate": 4.6626083143829614e-05, + "loss": 2.515, + "step": 1362500 + }, + { + "epoch": 6.75, + "learning_rate": 4.662484455740353e-05, + "loss": 2.501, + "step": 1363000 + }, + { + "epoch": 6.76, + "learning_rate": 4.662360597097745e-05, + "loss": 2.4869, + "step": 1363500 + }, + { + "epoch": 6.76, + "learning_rate": 4.6622367384551365e-05, + "loss": 2.4833, + "step": 1364000 + }, + { + "epoch": 6.76, + "learning_rate": 4.662113127529813e-05, + "loss": 2.4934, + "step": 1364500 + }, + { + "epoch": 6.76, + "learning_rate": 4.6619895166044896e-05, + "loss": 2.48, + "step": 1365000 + }, + { + "epoch": 6.77, + "learning_rate": 4.661865657961881e-05, + "loss": 2.4729, + "step": 1365500 + }, + { + "epoch": 6.77, + "learning_rate": 4.661741799319273e-05, + "loss": 2.4902, + "step": 1366000 + }, + { + "epoch": 6.77, + "learning_rate": 4.6616179406766646e-05, + "loss": 2.5141, + "step": 1366500 + }, + { + "epoch": 6.77, + "learning_rate": 4.661494082034056e-05, + "loss": 2.4894, + "step": 1367000 + }, + { + "epoch": 6.78, + "learning_rate": 4.661370223391448e-05, + "loss": 2.4918, + "step": 1367500 + }, + { + "epoch": 6.78, + "learning_rate": 4.66124636474884e-05, + "loss": 2.5045, + "step": 1368000 + }, + { + "epoch": 6.78, + "learning_rate": 4.6611225061062314e-05, + "loss": 2.4838, + "step": 1368500 + }, + { + "epoch": 6.78, + "learning_rate": 4.660998647463623e-05, + "loss": 2.487, + "step": 1369000 + }, + { + "epoch": 6.78, + "learning_rate": 4.660874788821015e-05, + "loss": 2.5039, + "step": 1369500 + }, + { + "epoch": 6.79, + "learning_rate": 4.6607509301784065e-05, + "loss": 2.5063, + "step": 1370000 + }, + { + "epoch": 6.79, + "learning_rate": 4.660627071535798e-05, + "loss": 2.5151, + "step": 1370500 + }, + { + "epoch": 6.79, + "learning_rate": 4.6605039560450454e-05, + "loss": 2.4945, + "step": 1371000 + }, + { + "epoch": 6.79, + "learning_rate": 4.660380097402437e-05, + "loss": 2.4963, + "step": 1371500 + }, + { + "epoch": 6.8, + "learning_rate": 4.660256238759829e-05, + "loss": 2.5327, + "step": 1372000 + }, + { + "epoch": 6.8, + "learning_rate": 4.660132627834505e-05, + "loss": 2.4942, + "step": 1372500 + }, + { + "epoch": 6.8, + "learning_rate": 4.660008769191897e-05, + "loss": 2.512, + "step": 1373000 + }, + { + "epoch": 6.8, + "learning_rate": 4.6598849105492884e-05, + "loss": 2.4764, + "step": 1373500 + }, + { + "epoch": 6.81, + "learning_rate": 4.65976105190668e-05, + "loss": 2.4803, + "step": 1374000 + }, + { + "epoch": 6.81, + "learning_rate": 4.659637193264072e-05, + "loss": 2.4855, + "step": 1374500 + }, + { + "epoch": 6.81, + "learning_rate": 4.6595133346214635e-05, + "loss": 2.4853, + "step": 1375000 + }, + { + "epoch": 6.81, + "learning_rate": 4.6593897236961404e-05, + "loss": 2.4791, + "step": 1375500 + }, + { + "epoch": 6.82, + "learning_rate": 4.659265865053532e-05, + "loss": 2.4887, + "step": 1376000 + }, + { + "epoch": 6.82, + "learning_rate": 4.659142006410924e-05, + "loss": 2.4982, + "step": 1376500 + }, + { + "epoch": 6.82, + "learning_rate": 4.6590181477683154e-05, + "loss": 2.5017, + "step": 1377000 + }, + { + "epoch": 6.82, + "learning_rate": 4.658894536842992e-05, + "loss": 2.5206, + "step": 1377500 + }, + { + "epoch": 6.83, + "learning_rate": 4.658770678200384e-05, + "loss": 2.4919, + "step": 1378000 + }, + { + "epoch": 6.83, + "learning_rate": 4.658646819557776e-05, + "loss": 2.5036, + "step": 1378500 + }, + { + "epoch": 6.83, + "learning_rate": 4.658522960915167e-05, + "loss": 2.4691, + "step": 1379000 + }, + { + "epoch": 6.83, + "learning_rate": 4.6583991022725584e-05, + "loss": 2.4853, + "step": 1379500 + }, + { + "epoch": 6.84, + "learning_rate": 4.65827524362995e-05, + "loss": 2.5148, + "step": 1380000 + }, + { + "epoch": 6.84, + "learning_rate": 4.658151384987342e-05, + "loss": 2.5151, + "step": 1380500 + }, + { + "epoch": 6.84, + "learning_rate": 4.6580275263447335e-05, + "loss": 2.4836, + "step": 1381000 + }, + { + "epoch": 6.84, + "learning_rate": 4.657903667702125e-05, + "loss": 2.4898, + "step": 1381500 + }, + { + "epoch": 6.85, + "learning_rate": 4.657779809059517e-05, + "loss": 2.4917, + "step": 1382000 + }, + { + "epoch": 6.85, + "learning_rate": 4.657655950416908e-05, + "loss": 2.5036, + "step": 1382500 + }, + { + "epoch": 6.85, + "learning_rate": 4.6575320917742996e-05, + "loss": 2.4992, + "step": 1383000 + }, + { + "epoch": 6.85, + "learning_rate": 4.657408233131691e-05, + "loss": 2.502, + "step": 1383500 + }, + { + "epoch": 6.86, + "learning_rate": 4.657284374489083e-05, + "loss": 2.4944, + "step": 1384000 + }, + { + "epoch": 6.86, + "learning_rate": 4.657160515846475e-05, + "loss": 2.4917, + "step": 1384500 + }, + { + "epoch": 6.86, + "learning_rate": 4.6570366572038664e-05, + "loss": 2.4888, + "step": 1385000 + }, + { + "epoch": 6.86, + "learning_rate": 4.656912798561258e-05, + "loss": 2.485, + "step": 1385500 + }, + { + "epoch": 6.87, + "learning_rate": 4.65678893991865e-05, + "loss": 2.4949, + "step": 1386000 + }, + { + "epoch": 6.87, + "learning_rate": 4.6566653289933266e-05, + "loss": 2.4753, + "step": 1386500 + }, + { + "epoch": 6.87, + "learning_rate": 4.656541470350718e-05, + "loss": 2.5044, + "step": 1387000 + }, + { + "epoch": 6.87, + "learning_rate": 4.65641761170811e-05, + "loss": 2.5088, + "step": 1387500 + }, + { + "epoch": 6.88, + "learning_rate": 4.656293753065502e-05, + "loss": 2.4922, + "step": 1388000 + }, + { + "epoch": 6.88, + "learning_rate": 4.6561698944228934e-05, + "loss": 2.5054, + "step": 1388500 + }, + { + "epoch": 6.88, + "learning_rate": 4.656046035780285e-05, + "loss": 2.4948, + "step": 1389000 + }, + { + "epoch": 6.88, + "learning_rate": 4.655922177137677e-05, + "loss": 2.5002, + "step": 1389500 + }, + { + "epoch": 6.89, + "learning_rate": 4.655798566212353e-05, + "loss": 2.4978, + "step": 1390000 + }, + { + "epoch": 6.89, + "learning_rate": 4.655674707569745e-05, + "loss": 2.5017, + "step": 1390500 + }, + { + "epoch": 6.89, + "learning_rate": 4.6555508489271364e-05, + "loss": 2.4925, + "step": 1391000 + }, + { + "epoch": 6.89, + "learning_rate": 4.655426990284528e-05, + "loss": 2.4932, + "step": 1391500 + }, + { + "epoch": 6.9, + "learning_rate": 4.655303379359205e-05, + "loss": 2.5058, + "step": 1392000 + }, + { + "epoch": 6.9, + "learning_rate": 4.6551795207165966e-05, + "loss": 2.4799, + "step": 1392500 + }, + { + "epoch": 6.9, + "learning_rate": 4.655055662073988e-05, + "loss": 2.5225, + "step": 1393000 + }, + { + "epoch": 6.9, + "learning_rate": 4.65493180343138e-05, + "loss": 2.4859, + "step": 1393500 + }, + { + "epoch": 6.91, + "learning_rate": 4.654807944788772e-05, + "loss": 2.521, + "step": 1394000 + }, + { + "epoch": 6.91, + "learning_rate": 4.6546840861461634e-05, + "loss": 2.4728, + "step": 1394500 + }, + { + "epoch": 6.91, + "learning_rate": 4.6545604752208396e-05, + "loss": 2.4708, + "step": 1395000 + }, + { + "epoch": 6.91, + "learning_rate": 4.654436616578231e-05, + "loss": 2.4922, + "step": 1395500 + }, + { + "epoch": 6.92, + "learning_rate": 4.654312757935623e-05, + "loss": 2.4731, + "step": 1396000 + }, + { + "epoch": 6.92, + "learning_rate": 4.654188899293015e-05, + "loss": 2.4955, + "step": 1396500 + }, + { + "epoch": 6.92, + "learning_rate": 4.6540650406504064e-05, + "loss": 2.4996, + "step": 1397000 + }, + { + "epoch": 6.92, + "learning_rate": 4.653941182007798e-05, + "loss": 2.4988, + "step": 1397500 + }, + { + "epoch": 6.93, + "learning_rate": 4.65381732336519e-05, + "loss": 2.476, + "step": 1398000 + }, + { + "epoch": 6.93, + "learning_rate": 4.6536934647225815e-05, + "loss": 2.487, + "step": 1398500 + }, + { + "epoch": 6.93, + "learning_rate": 4.6535698537972583e-05, + "loss": 2.4695, + "step": 1399000 + }, + { + "epoch": 6.93, + "learning_rate": 4.65344599515465e-05, + "loss": 2.4825, + "step": 1399500 + }, + { + "epoch": 6.94, + "learning_rate": 4.653322136512042e-05, + "loss": 2.4803, + "step": 1400000 + }, + { + "epoch": 6.94, + "learning_rate": 4.6531982778694334e-05, + "loss": 2.4772, + "step": 1400500 + }, + { + "epoch": 6.94, + "learning_rate": 4.653074419226825e-05, + "loss": 2.4992, + "step": 1401000 + }, + { + "epoch": 6.94, + "learning_rate": 4.652950560584217e-05, + "loss": 2.4755, + "step": 1401500 + }, + { + "epoch": 6.95, + "learning_rate": 4.6528267019416085e-05, + "loss": 2.486, + "step": 1402000 + }, + { + "epoch": 6.95, + "learning_rate": 4.652703091016285e-05, + "loss": 2.5144, + "step": 1402500 + }, + { + "epoch": 6.95, + "learning_rate": 4.652579480090962e-05, + "loss": 2.4947, + "step": 1403000 + }, + { + "epoch": 6.95, + "learning_rate": 4.652455621448354e-05, + "loss": 2.4977, + "step": 1403500 + }, + { + "epoch": 6.96, + "learning_rate": 4.652331762805745e-05, + "loss": 2.508, + "step": 1404000 + }, + { + "epoch": 6.96, + "learning_rate": 4.652207904163137e-05, + "loss": 2.5197, + "step": 1404500 + }, + { + "epoch": 6.96, + "learning_rate": 4.6520840455205284e-05, + "loss": 2.4885, + "step": 1405000 + }, + { + "epoch": 6.96, + "learning_rate": 4.65196018687792e-05, + "loss": 2.4976, + "step": 1405500 + }, + { + "epoch": 6.97, + "learning_rate": 4.651836328235312e-05, + "loss": 2.4476, + "step": 1406000 + }, + { + "epoch": 6.97, + "learning_rate": 4.6517124695927034e-05, + "loss": 2.5003, + "step": 1406500 + }, + { + "epoch": 6.97, + "learning_rate": 4.651588610950095e-05, + "loss": 2.4807, + "step": 1407000 + }, + { + "epoch": 6.97, + "learning_rate": 4.651464752307487e-05, + "loss": 2.5, + "step": 1407500 + }, + { + "epoch": 6.98, + "learning_rate": 4.6513408936648785e-05, + "loss": 2.5051, + "step": 1408000 + }, + { + "epoch": 6.98, + "learning_rate": 4.65121703502227e-05, + "loss": 2.4952, + "step": 1408500 + }, + { + "epoch": 6.98, + "learning_rate": 4.651093176379662e-05, + "loss": 2.4864, + "step": 1409000 + }, + { + "epoch": 6.98, + "learning_rate": 4.650969565454338e-05, + "loss": 2.4935, + "step": 1409500 + }, + { + "epoch": 6.99, + "learning_rate": 4.65084570681173e-05, + "loss": 2.5068, + "step": 1410000 + }, + { + "epoch": 6.99, + "learning_rate": 4.650722095886407e-05, + "loss": 2.5097, + "step": 1410500 + }, + { + "epoch": 6.99, + "learning_rate": 4.6505982372437984e-05, + "loss": 2.4744, + "step": 1411000 + }, + { + "epoch": 6.99, + "learning_rate": 4.65047437860119e-05, + "loss": 2.4966, + "step": 1411500 + }, + { + "epoch": 7.0, + "learning_rate": 4.650350519958582e-05, + "loss": 2.4788, + "step": 1412000 + }, + { + "epoch": 7.0, + "learning_rate": 4.6502266613159735e-05, + "loss": 2.4876, + "step": 1412500 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.6390809050019882, + "eval_accuracy_mlm": 0.5919321078827258, + "eval_accuracy_nsp": 0.8611933683454986, + "eval_loss": 2.4429469108581543, + "eval_runtime": 145.8536, + "eval_samples_per_second": 1748.048, + "eval_steps_per_second": 72.84, + "step": 1412901 + }, + { + "epoch": 7.0, + "learning_rate": 4.650102802673365e-05, + "loss": 2.4845, + "step": 1413000 + }, + { + "epoch": 7.0, + "learning_rate": 4.649978944030757e-05, + "loss": 2.4572, + "step": 1413500 + }, + { + "epoch": 7.01, + "learning_rate": 4.6498550853881485e-05, + "loss": 2.4539, + "step": 1414000 + }, + { + "epoch": 7.01, + "learning_rate": 4.6497314744628254e-05, + "loss": 2.4568, + "step": 1414500 + }, + { + "epoch": 7.01, + "learning_rate": 4.6496076158202164e-05, + "loss": 2.4616, + "step": 1415000 + }, + { + "epoch": 7.01, + "learning_rate": 4.649483757177608e-05, + "loss": 2.4551, + "step": 1415500 + }, + { + "epoch": 7.02, + "learning_rate": 4.649360146252286e-05, + "loss": 2.4791, + "step": 1416000 + }, + { + "epoch": 7.02, + "learning_rate": 4.649236287609677e-05, + "loss": 2.481, + "step": 1416500 + }, + { + "epoch": 7.02, + "learning_rate": 4.6491124289670684e-05, + "loss": 2.4784, + "step": 1417000 + }, + { + "epoch": 7.02, + "learning_rate": 4.64898857032446e-05, + "loss": 2.467, + "step": 1417500 + }, + { + "epoch": 7.03, + "learning_rate": 4.648864711681852e-05, + "loss": 2.4805, + "step": 1418000 + }, + { + "epoch": 7.03, + "learning_rate": 4.6487408530392435e-05, + "loss": 2.4275, + "step": 1418500 + }, + { + "epoch": 7.03, + "learning_rate": 4.648616994396635e-05, + "loss": 2.4766, + "step": 1419000 + }, + { + "epoch": 7.03, + "learning_rate": 4.648493135754027e-05, + "loss": 2.4559, + "step": 1419500 + }, + { + "epoch": 7.04, + "learning_rate": 4.6483692771114185e-05, + "loss": 2.5089, + "step": 1420000 + }, + { + "epoch": 7.04, + "learning_rate": 4.64824541846881e-05, + "loss": 2.4865, + "step": 1420500 + }, + { + "epoch": 7.04, + "learning_rate": 4.648121559826202e-05, + "loss": 2.4508, + "step": 1421000 + }, + { + "epoch": 7.04, + "learning_rate": 4.6479977011835936e-05, + "loss": 2.4655, + "step": 1421500 + }, + { + "epoch": 7.05, + "learning_rate": 4.647873842540985e-05, + "loss": 2.4671, + "step": 1422000 + }, + { + "epoch": 7.05, + "learning_rate": 4.647749983898377e-05, + "loss": 2.4695, + "step": 1422500 + }, + { + "epoch": 7.05, + "learning_rate": 4.647626372973053e-05, + "loss": 2.4488, + "step": 1423000 + }, + { + "epoch": 7.05, + "learning_rate": 4.647502514330445e-05, + "loss": 2.4521, + "step": 1423500 + }, + { + "epoch": 7.05, + "learning_rate": 4.647378903405122e-05, + "loss": 2.4709, + "step": 1424000 + }, + { + "epoch": 7.06, + "learning_rate": 4.647255540197084e-05, + "loss": 2.4587, + "step": 1424500 + }, + { + "epoch": 7.06, + "learning_rate": 4.6471316815544755e-05, + "loss": 2.4739, + "step": 1425000 + }, + { + "epoch": 7.06, + "learning_rate": 4.647007822911867e-05, + "loss": 2.4977, + "step": 1425500 + }, + { + "epoch": 7.06, + "learning_rate": 4.646883964269259e-05, + "loss": 2.4725, + "step": 1426000 + }, + { + "epoch": 7.07, + "learning_rate": 4.6467601056266506e-05, + "loss": 2.4716, + "step": 1426500 + }, + { + "epoch": 7.07, + "learning_rate": 4.646636246984042e-05, + "loss": 2.4874, + "step": 1427000 + }, + { + "epoch": 7.07, + "learning_rate": 4.646512388341434e-05, + "loss": 2.4674, + "step": 1427500 + }, + { + "epoch": 7.07, + "learning_rate": 4.646388529698826e-05, + "loss": 2.4617, + "step": 1428000 + }, + { + "epoch": 7.08, + "learning_rate": 4.6462646710562174e-05, + "loss": 2.4703, + "step": 1428500 + }, + { + "epoch": 7.08, + "learning_rate": 4.6461408124136084e-05, + "loss": 2.4612, + "step": 1429000 + }, + { + "epoch": 7.08, + "learning_rate": 4.646016953771e-05, + "loss": 2.4505, + "step": 1429500 + }, + { + "epoch": 7.08, + "learning_rate": 4.645893095128392e-05, + "loss": 2.4645, + "step": 1430000 + }, + { + "epoch": 7.09, + "learning_rate": 4.6457692364857835e-05, + "loss": 2.4425, + "step": 1430500 + }, + { + "epoch": 7.09, + "learning_rate": 4.645645377843175e-05, + "loss": 2.4478, + "step": 1431000 + }, + { + "epoch": 7.09, + "learning_rate": 4.645521519200567e-05, + "loss": 2.4645, + "step": 1431500 + }, + { + "epoch": 7.09, + "learning_rate": 4.6453979082752444e-05, + "loss": 2.4471, + "step": 1432000 + }, + { + "epoch": 7.1, + "learning_rate": 4.6452740496326354e-05, + "loss": 2.4378, + "step": 1432500 + }, + { + "epoch": 7.1, + "learning_rate": 4.645150190990027e-05, + "loss": 2.4872, + "step": 1433000 + }, + { + "epoch": 7.1, + "learning_rate": 4.645026332347419e-05, + "loss": 2.4629, + "step": 1433500 + }, + { + "epoch": 7.1, + "learning_rate": 4.6449024737048105e-05, + "loss": 2.4646, + "step": 1434000 + }, + { + "epoch": 7.11, + "learning_rate": 4.644778615062202e-05, + "loss": 2.4917, + "step": 1434500 + }, + { + "epoch": 7.11, + "learning_rate": 4.644654756419594e-05, + "loss": 2.4704, + "step": 1435000 + }, + { + "epoch": 7.11, + "learning_rate": 4.644530897776985e-05, + "loss": 2.4439, + "step": 1435500 + }, + { + "epoch": 7.11, + "learning_rate": 4.6444070391343766e-05, + "loss": 2.458, + "step": 1436000 + }, + { + "epoch": 7.12, + "learning_rate": 4.6442834282090535e-05, + "loss": 2.4867, + "step": 1436500 + }, + { + "epoch": 7.12, + "learning_rate": 4.644159569566445e-05, + "loss": 2.4849, + "step": 1437000 + }, + { + "epoch": 7.12, + "learning_rate": 4.644035710923837e-05, + "loss": 2.4636, + "step": 1437500 + }, + { + "epoch": 7.12, + "learning_rate": 4.6439118522812286e-05, + "loss": 2.4742, + "step": 1438000 + }, + { + "epoch": 7.13, + "learning_rate": 4.643788736790476e-05, + "loss": 2.4822, + "step": 1438500 + }, + { + "epoch": 7.13, + "learning_rate": 4.6436648781478675e-05, + "loss": 2.4591, + "step": 1439000 + }, + { + "epoch": 7.13, + "learning_rate": 4.643541019505259e-05, + "loss": 2.4727, + "step": 1439500 + }, + { + "epoch": 7.13, + "learning_rate": 4.643417408579936e-05, + "loss": 2.4814, + "step": 1440000 + }, + { + "epoch": 7.14, + "learning_rate": 4.643293549937328e-05, + "loss": 2.4816, + "step": 1440500 + }, + { + "epoch": 7.14, + "learning_rate": 4.6431696912947195e-05, + "loss": 2.4745, + "step": 1441000 + }, + { + "epoch": 7.14, + "learning_rate": 4.643045832652111e-05, + "loss": 2.467, + "step": 1441500 + }, + { + "epoch": 7.14, + "learning_rate": 4.642921974009503e-05, + "loss": 2.4839, + "step": 1442000 + }, + { + "epoch": 7.15, + "learning_rate": 4.6427981153668946e-05, + "loss": 2.4538, + "step": 1442500 + }, + { + "epoch": 7.15, + "learning_rate": 4.6426742567242856e-05, + "loss": 2.4355, + "step": 1443000 + }, + { + "epoch": 7.15, + "learning_rate": 4.642550398081677e-05, + "loss": 2.479, + "step": 1443500 + }, + { + "epoch": 7.15, + "learning_rate": 4.642426539439069e-05, + "loss": 2.4619, + "step": 1444000 + }, + { + "epoch": 7.16, + "learning_rate": 4.642302680796461e-05, + "loss": 2.4853, + "step": 1444500 + }, + { + "epoch": 7.16, + "learning_rate": 4.6421788221538524e-05, + "loss": 2.4502, + "step": 1445000 + }, + { + "epoch": 7.16, + "learning_rate": 4.642054963511244e-05, + "loss": 2.4703, + "step": 1445500 + }, + { + "epoch": 7.16, + "learning_rate": 4.641931104868636e-05, + "loss": 2.4679, + "step": 1446000 + }, + { + "epoch": 7.17, + "learning_rate": 4.6418072462260274e-05, + "loss": 2.4749, + "step": 1446500 + }, + { + "epoch": 7.17, + "learning_rate": 4.641683387583419e-05, + "loss": 2.4873, + "step": 1447000 + }, + { + "epoch": 7.17, + "learning_rate": 4.641559528940811e-05, + "loss": 2.4585, + "step": 1447500 + }, + { + "epoch": 7.17, + "learning_rate": 4.641435670298202e-05, + "loss": 2.4642, + "step": 1448000 + }, + { + "epoch": 7.18, + "learning_rate": 4.6413120593728794e-05, + "loss": 2.4696, + "step": 1448500 + }, + { + "epoch": 7.18, + "learning_rate": 4.641188200730271e-05, + "loss": 2.4597, + "step": 1449000 + }, + { + "epoch": 7.18, + "learning_rate": 4.641064342087663e-05, + "loss": 2.4897, + "step": 1449500 + }, + { + "epoch": 7.18, + "learning_rate": 4.640940731162339e-05, + "loss": 2.4707, + "step": 1450000 + }, + { + "epoch": 7.19, + "learning_rate": 4.640816872519731e-05, + "loss": 2.4917, + "step": 1450500 + }, + { + "epoch": 7.19, + "learning_rate": 4.6406930138771224e-05, + "loss": 2.4877, + "step": 1451000 + }, + { + "epoch": 7.19, + "learning_rate": 4.640569155234514e-05, + "loss": 2.4417, + "step": 1451500 + }, + { + "epoch": 7.19, + "learning_rate": 4.640445544309191e-05, + "loss": 2.4632, + "step": 1452000 + }, + { + "epoch": 7.2, + "learning_rate": 4.6403216856665826e-05, + "loss": 2.5164, + "step": 1452500 + }, + { + "epoch": 7.2, + "learning_rate": 4.640197827023974e-05, + "loss": 2.476, + "step": 1453000 + }, + { + "epoch": 7.2, + "learning_rate": 4.640073968381366e-05, + "loss": 2.4726, + "step": 1453500 + }, + { + "epoch": 7.2, + "learning_rate": 4.639950357456043e-05, + "loss": 2.4642, + "step": 1454000 + }, + { + "epoch": 7.21, + "learning_rate": 4.6398264988134346e-05, + "loss": 2.4863, + "step": 1454500 + }, + { + "epoch": 7.21, + "learning_rate": 4.639702640170826e-05, + "loss": 2.4524, + "step": 1455000 + }, + { + "epoch": 7.21, + "learning_rate": 4.639578781528218e-05, + "loss": 2.4725, + "step": 1455500 + }, + { + "epoch": 7.21, + "learning_rate": 4.639455170602894e-05, + "loss": 2.4505, + "step": 1456000 + }, + { + "epoch": 7.22, + "learning_rate": 4.639331311960286e-05, + "loss": 2.4743, + "step": 1456500 + }, + { + "epoch": 7.22, + "learning_rate": 4.6392074533176776e-05, + "loss": 2.4581, + "step": 1457000 + }, + { + "epoch": 7.22, + "learning_rate": 4.639083594675069e-05, + "loss": 2.4725, + "step": 1457500 + }, + { + "epoch": 7.22, + "learning_rate": 4.638959736032461e-05, + "loss": 2.4704, + "step": 1458000 + }, + { + "epoch": 7.23, + "learning_rate": 4.6388358773898526e-05, + "loss": 2.4718, + "step": 1458500 + }, + { + "epoch": 7.23, + "learning_rate": 4.638712018747244e-05, + "loss": 2.4867, + "step": 1459000 + }, + { + "epoch": 7.23, + "learning_rate": 4.638588407821921e-05, + "loss": 2.4673, + "step": 1459500 + }, + { + "epoch": 7.23, + "learning_rate": 4.638464549179313e-05, + "loss": 2.4954, + "step": 1460000 + }, + { + "epoch": 7.24, + "learning_rate": 4.6383406905367046e-05, + "loss": 2.4766, + "step": 1460500 + }, + { + "epoch": 7.24, + "learning_rate": 4.638216831894096e-05, + "loss": 2.4681, + "step": 1461000 + }, + { + "epoch": 7.24, + "learning_rate": 4.638092973251488e-05, + "loss": 2.4931, + "step": 1461500 + }, + { + "epoch": 7.24, + "learning_rate": 4.63796911460888e-05, + "loss": 2.4671, + "step": 1462000 + }, + { + "epoch": 7.25, + "learning_rate": 4.6378452559662714e-05, + "loss": 2.4901, + "step": 1462500 + }, + { + "epoch": 7.25, + "learning_rate": 4.637721397323663e-05, + "loss": 2.4586, + "step": 1463000 + }, + { + "epoch": 7.25, + "learning_rate": 4.637597538681054e-05, + "loss": 2.4707, + "step": 1463500 + }, + { + "epoch": 7.25, + "learning_rate": 4.637474175473016e-05, + "loss": 2.4735, + "step": 1464000 + }, + { + "epoch": 7.26, + "learning_rate": 4.637350316830408e-05, + "loss": 2.4678, + "step": 1464500 + }, + { + "epoch": 7.26, + "learning_rate": 4.6372264581877995e-05, + "loss": 2.485, + "step": 1465000 + }, + { + "epoch": 7.26, + "learning_rate": 4.637102599545191e-05, + "loss": 2.4831, + "step": 1465500 + }, + { + "epoch": 7.26, + "learning_rate": 4.636978740902583e-05, + "loss": 2.4813, + "step": 1466000 + }, + { + "epoch": 7.27, + "learning_rate": 4.6368548822599746e-05, + "loss": 2.4825, + "step": 1466500 + }, + { + "epoch": 7.27, + "learning_rate": 4.6367312713346515e-05, + "loss": 2.4867, + "step": 1467000 + }, + { + "epoch": 7.27, + "learning_rate": 4.6366074126920425e-05, + "loss": 2.4784, + "step": 1467500 + }, + { + "epoch": 7.27, + "learning_rate": 4.636483554049434e-05, + "loss": 2.4478, + "step": 1468000 + }, + { + "epoch": 7.28, + "learning_rate": 4.636359695406826e-05, + "loss": 2.4608, + "step": 1468500 + }, + { + "epoch": 7.28, + "learning_rate": 4.6362358367642176e-05, + "loss": 2.4759, + "step": 1469000 + }, + { + "epoch": 7.28, + "learning_rate": 4.636111978121609e-05, + "loss": 2.4748, + "step": 1469500 + }, + { + "epoch": 7.28, + "learning_rate": 4.635988119479001e-05, + "loss": 2.4616, + "step": 1470000 + }, + { + "epoch": 7.29, + "learning_rate": 4.635864260836393e-05, + "loss": 2.4663, + "step": 1470500 + }, + { + "epoch": 7.29, + "learning_rate": 4.6357404021937844e-05, + "loss": 2.4643, + "step": 1471000 + }, + { + "epoch": 7.29, + "learning_rate": 4.635616543551176e-05, + "loss": 2.482, + "step": 1471500 + }, + { + "epoch": 7.29, + "learning_rate": 4.635492932625853e-05, + "loss": 2.4848, + "step": 1472000 + }, + { + "epoch": 7.3, + "learning_rate": 4.6353690739832446e-05, + "loss": 2.4771, + "step": 1472500 + }, + { + "epoch": 7.3, + "learning_rate": 4.635245215340636e-05, + "loss": 2.4642, + "step": 1473000 + }, + { + "epoch": 7.3, + "learning_rate": 4.635121356698028e-05, + "loss": 2.4742, + "step": 1473500 + }, + { + "epoch": 7.3, + "learning_rate": 4.63499749805542e-05, + "loss": 2.4884, + "step": 1474000 + }, + { + "epoch": 7.31, + "learning_rate": 4.6348736394128114e-05, + "loss": 2.4658, + "step": 1474500 + }, + { + "epoch": 7.31, + "learning_rate": 4.634749780770203e-05, + "loss": 2.49, + "step": 1475000 + }, + { + "epoch": 7.31, + "learning_rate": 4.634625922127595e-05, + "loss": 2.4885, + "step": 1475500 + }, + { + "epoch": 7.31, + "learning_rate": 4.634502558919557e-05, + "loss": 2.4919, + "step": 1476000 + }, + { + "epoch": 7.32, + "learning_rate": 4.634378700276948e-05, + "loss": 2.4588, + "step": 1476500 + }, + { + "epoch": 7.32, + "learning_rate": 4.6342548416343396e-05, + "loss": 2.4474, + "step": 1477000 + }, + { + "epoch": 7.32, + "learning_rate": 4.634130982991731e-05, + "loss": 2.4867, + "step": 1477500 + }, + { + "epoch": 7.32, + "learning_rate": 4.634007124349123e-05, + "loss": 2.4749, + "step": 1478000 + }, + { + "epoch": 7.33, + "learning_rate": 4.6338832657065146e-05, + "loss": 2.4603, + "step": 1478500 + }, + { + "epoch": 7.33, + "learning_rate": 4.633759407063906e-05, + "loss": 2.4551, + "step": 1479000 + }, + { + "epoch": 7.33, + "learning_rate": 4.633635548421298e-05, + "loss": 2.4716, + "step": 1479500 + }, + { + "epoch": 7.33, + "learning_rate": 4.633511937495974e-05, + "loss": 2.4879, + "step": 1480000 + }, + { + "epoch": 7.33, + "learning_rate": 4.633388078853366e-05, + "loss": 2.4664, + "step": 1480500 + }, + { + "epoch": 7.34, + "learning_rate": 4.6332642202107576e-05, + "loss": 2.4596, + "step": 1481000 + }, + { + "epoch": 7.34, + "learning_rate": 4.633140361568149e-05, + "loss": 2.5133, + "step": 1481500 + }, + { + "epoch": 7.34, + "learning_rate": 4.633016502925541e-05, + "loss": 2.4767, + "step": 1482000 + }, + { + "epoch": 7.34, + "learning_rate": 4.632892644282933e-05, + "loss": 2.4489, + "step": 1482500 + }, + { + "epoch": 7.35, + "learning_rate": 4.6327687856403244e-05, + "loss": 2.4446, + "step": 1483000 + }, + { + "epoch": 7.35, + "learning_rate": 4.632644926997716e-05, + "loss": 2.4838, + "step": 1483500 + }, + { + "epoch": 7.35, + "learning_rate": 4.632521563789679e-05, + "loss": 2.4423, + "step": 1484000 + }, + { + "epoch": 7.35, + "learning_rate": 4.63239770514707e-05, + "loss": 2.4774, + "step": 1484500 + }, + { + "epoch": 7.36, + "learning_rate": 4.6322738465044615e-05, + "loss": 2.4497, + "step": 1485000 + }, + { + "epoch": 7.36, + "learning_rate": 4.632149987861853e-05, + "loss": 2.4882, + "step": 1485500 + }, + { + "epoch": 7.36, + "learning_rate": 4.63202637693653e-05, + "loss": 2.4747, + "step": 1486000 + }, + { + "epoch": 7.36, + "learning_rate": 4.631902518293922e-05, + "loss": 2.4873, + "step": 1486500 + }, + { + "epoch": 7.37, + "learning_rate": 4.6317786596513135e-05, + "loss": 2.4719, + "step": 1487000 + }, + { + "epoch": 7.37, + "learning_rate": 4.631654801008705e-05, + "loss": 2.466, + "step": 1487500 + }, + { + "epoch": 7.37, + "learning_rate": 4.631530942366097e-05, + "loss": 2.5012, + "step": 1488000 + }, + { + "epoch": 7.37, + "learning_rate": 4.6314070837234886e-05, + "loss": 2.4666, + "step": 1488500 + }, + { + "epoch": 7.38, + "learning_rate": 4.6312832250808796e-05, + "loss": 2.476, + "step": 1489000 + }, + { + "epoch": 7.38, + "learning_rate": 4.631159366438271e-05, + "loss": 2.4908, + "step": 1489500 + }, + { + "epoch": 7.38, + "learning_rate": 4.631035507795663e-05, + "loss": 2.4713, + "step": 1490000 + }, + { + "epoch": 7.38, + "learning_rate": 4.630911649153055e-05, + "loss": 2.4714, + "step": 1490500 + }, + { + "epoch": 7.39, + "learning_rate": 4.6307877905104464e-05, + "loss": 2.4822, + "step": 1491000 + }, + { + "epoch": 7.39, + "learning_rate": 4.630663931867838e-05, + "loss": 2.468, + "step": 1491500 + }, + { + "epoch": 7.39, + "learning_rate": 4.63054007322523e-05, + "loss": 2.4597, + "step": 1492000 + }, + { + "epoch": 7.39, + "learning_rate": 4.6304162145826214e-05, + "loss": 2.4886, + "step": 1492500 + }, + { + "epoch": 7.4, + "learning_rate": 4.630292355940013e-05, + "loss": 2.4621, + "step": 1493000 + }, + { + "epoch": 7.4, + "learning_rate": 4.630168745014689e-05, + "loss": 2.4916, + "step": 1493500 + }, + { + "epoch": 7.4, + "learning_rate": 4.630044886372081e-05, + "loss": 2.5105, + "step": 1494000 + }, + { + "epoch": 7.4, + "learning_rate": 4.629921027729473e-05, + "loss": 2.4965, + "step": 1494500 + }, + { + "epoch": 7.41, + "learning_rate": 4.62979741680415e-05, + "loss": 2.4962, + "step": 1495000 + }, + { + "epoch": 7.41, + "learning_rate": 4.629673558161541e-05, + "loss": 2.4789, + "step": 1495500 + }, + { + "epoch": 7.41, + "learning_rate": 4.629549947236219e-05, + "loss": 2.4785, + "step": 1496000 + }, + { + "epoch": 7.41, + "learning_rate": 4.6294260885936105e-05, + "loss": 2.4689, + "step": 1496500 + }, + { + "epoch": 7.42, + "learning_rate": 4.629302229951002e-05, + "loss": 2.5094, + "step": 1497000 + }, + { + "epoch": 7.42, + "learning_rate": 4.629178371308394e-05, + "loss": 2.4664, + "step": 1497500 + }, + { + "epoch": 7.42, + "learning_rate": 4.629054512665785e-05, + "loss": 2.4711, + "step": 1498000 + }, + { + "epoch": 7.42, + "learning_rate": 4.6289306540231766e-05, + "loss": 2.4982, + "step": 1498500 + }, + { + "epoch": 7.43, + "learning_rate": 4.628806795380568e-05, + "loss": 2.4652, + "step": 1499000 + }, + { + "epoch": 7.43, + "learning_rate": 4.62868293673796e-05, + "loss": 2.464, + "step": 1499500 + }, + { + "epoch": 7.43, + "learning_rate": 4.628559078095352e-05, + "loss": 2.4816, + "step": 1500000 + }, + { + "epoch": 7.43, + "learning_rate": 4.628435219452743e-05, + "loss": 2.461, + "step": 1500500 + }, + { + "epoch": 7.44, + "learning_rate": 4.6283113608101344e-05, + "loss": 2.4604, + "step": 1501000 + }, + { + "epoch": 7.44, + "learning_rate": 4.628187502167526e-05, + "loss": 2.4845, + "step": 1501500 + }, + { + "epoch": 7.44, + "learning_rate": 4.628063643524918e-05, + "loss": 2.4844, + "step": 1502000 + }, + { + "epoch": 7.44, + "learning_rate": 4.6279397848823095e-05, + "loss": 2.4942, + "step": 1502500 + }, + { + "epoch": 7.45, + "learning_rate": 4.627815926239701e-05, + "loss": 2.4517, + "step": 1503000 + }, + { + "epoch": 7.45, + "learning_rate": 4.627692067597093e-05, + "loss": 2.4613, + "step": 1503500 + }, + { + "epoch": 7.45, + "learning_rate": 4.62756845667177e-05, + "loss": 2.4785, + "step": 1504000 + }, + { + "epoch": 7.45, + "learning_rate": 4.6274445980291615e-05, + "loss": 2.4746, + "step": 1504500 + }, + { + "epoch": 7.46, + "learning_rate": 4.627320739386553e-05, + "loss": 2.4834, + "step": 1505000 + }, + { + "epoch": 7.46, + "learning_rate": 4.627196880743945e-05, + "loss": 2.5137, + "step": 1505500 + }, + { + "epoch": 7.46, + "learning_rate": 4.6270730221013365e-05, + "loss": 2.4748, + "step": 1506000 + }, + { + "epoch": 7.46, + "learning_rate": 4.626949163458728e-05, + "loss": 2.459, + "step": 1506500 + }, + { + "epoch": 7.47, + "learning_rate": 4.62682530481612e-05, + "loss": 2.4794, + "step": 1507000 + }, + { + "epoch": 7.47, + "learning_rate": 4.6267014461735116e-05, + "loss": 2.4742, + "step": 1507500 + }, + { + "epoch": 7.47, + "learning_rate": 4.626577587530903e-05, + "loss": 2.4774, + "step": 1508000 + }, + { + "epoch": 7.47, + "learning_rate": 4.626453728888295e-05, + "loss": 2.4662, + "step": 1508500 + }, + { + "epoch": 7.48, + "learning_rate": 4.626330117962971e-05, + "loss": 2.4627, + "step": 1509000 + }, + { + "epoch": 7.48, + "learning_rate": 4.626206259320363e-05, + "loss": 2.4786, + "step": 1509500 + }, + { + "epoch": 7.48, + "learning_rate": 4.62608264839504e-05, + "loss": 2.4726, + "step": 1510000 + }, + { + "epoch": 7.48, + "learning_rate": 4.6259590374697173e-05, + "loss": 2.4954, + "step": 1510500 + }, + { + "epoch": 7.49, + "learning_rate": 4.6258351788271084e-05, + "loss": 2.4493, + "step": 1511000 + }, + { + "epoch": 7.49, + "learning_rate": 4.6257113201845e-05, + "loss": 2.4704, + "step": 1511500 + }, + { + "epoch": 7.49, + "learning_rate": 4.625587461541892e-05, + "loss": 2.4773, + "step": 1512000 + }, + { + "epoch": 7.49, + "learning_rate": 4.6254636028992834e-05, + "loss": 2.4571, + "step": 1512500 + }, + { + "epoch": 7.5, + "learning_rate": 4.62533999197396e-05, + "loss": 2.4826, + "step": 1513000 + }, + { + "epoch": 7.5, + "learning_rate": 4.625216133331352e-05, + "loss": 2.4711, + "step": 1513500 + }, + { + "epoch": 7.5, + "learning_rate": 4.625092274688743e-05, + "loss": 2.5154, + "step": 1514000 + }, + { + "epoch": 7.5, + "learning_rate": 4.624968416046135e-05, + "loss": 2.4689, + "step": 1514500 + }, + { + "epoch": 7.51, + "learning_rate": 4.624844805120812e-05, + "loss": 2.4558, + "step": 1515000 + }, + { + "epoch": 7.51, + "learning_rate": 4.624720946478204e-05, + "loss": 2.489, + "step": 1515500 + }, + { + "epoch": 7.51, + "learning_rate": 4.624597087835596e-05, + "loss": 2.4775, + "step": 1516000 + }, + { + "epoch": 7.51, + "learning_rate": 4.6244732291929874e-05, + "loss": 2.5, + "step": 1516500 + }, + { + "epoch": 7.52, + "learning_rate": 4.6243496182676636e-05, + "loss": 2.5144, + "step": 1517000 + }, + { + "epoch": 7.52, + "learning_rate": 4.624225759625055e-05, + "loss": 2.4776, + "step": 1517500 + }, + { + "epoch": 7.52, + "learning_rate": 4.624101900982447e-05, + "loss": 2.5141, + "step": 1518000 + }, + { + "epoch": 7.52, + "learning_rate": 4.6239780423398386e-05, + "loss": 2.4846, + "step": 1518500 + }, + { + "epoch": 7.53, + "learning_rate": 4.62385418369723e-05, + "loss": 2.4617, + "step": 1519000 + }, + { + "epoch": 7.53, + "learning_rate": 4.623730325054622e-05, + "loss": 2.4419, + "step": 1519500 + }, + { + "epoch": 7.53, + "learning_rate": 4.623606466412014e-05, + "loss": 2.4669, + "step": 1520000 + }, + { + "epoch": 7.53, + "learning_rate": 4.623482607769405e-05, + "loss": 2.4905, + "step": 1520500 + }, + { + "epoch": 7.54, + "learning_rate": 4.6233587491267964e-05, + "loss": 2.4972, + "step": 1521000 + }, + { + "epoch": 7.54, + "learning_rate": 4.623234890484188e-05, + "loss": 2.4975, + "step": 1521500 + }, + { + "epoch": 7.54, + "learning_rate": 4.62311103184158e-05, + "loss": 2.4753, + "step": 1522000 + }, + { + "epoch": 7.54, + "learning_rate": 4.6229871731989715e-05, + "loss": 2.4884, + "step": 1522500 + }, + { + "epoch": 7.55, + "learning_rate": 4.622863314556363e-05, + "loss": 2.4908, + "step": 1523000 + }, + { + "epoch": 7.55, + "learning_rate": 4.62273970363104e-05, + "loss": 2.4883, + "step": 1523500 + }, + { + "epoch": 7.55, + "learning_rate": 4.622615844988432e-05, + "loss": 2.48, + "step": 1524000 + }, + { + "epoch": 7.55, + "learning_rate": 4.6224919863458235e-05, + "loss": 2.4724, + "step": 1524500 + }, + { + "epoch": 7.56, + "learning_rate": 4.622368127703215e-05, + "loss": 2.4895, + "step": 1525000 + }, + { + "epoch": 7.56, + "learning_rate": 4.622244516777892e-05, + "loss": 2.4562, + "step": 1525500 + }, + { + "epoch": 7.56, + "learning_rate": 4.622120658135284e-05, + "loss": 2.4682, + "step": 1526000 + }, + { + "epoch": 7.56, + "learning_rate": 4.621996799492675e-05, + "loss": 2.4951, + "step": 1526500 + }, + { + "epoch": 7.57, + "learning_rate": 4.6218729408500664e-05, + "loss": 2.4807, + "step": 1527000 + }, + { + "epoch": 7.57, + "learning_rate": 4.621749082207458e-05, + "loss": 2.51, + "step": 1527500 + }, + { + "epoch": 7.57, + "learning_rate": 4.621625471282136e-05, + "loss": 2.465, + "step": 1528000 + }, + { + "epoch": 7.57, + "learning_rate": 4.621501860356812e-05, + "loss": 2.4833, + "step": 1528500 + }, + { + "epoch": 7.58, + "learning_rate": 4.6213780017142036e-05, + "loss": 2.4647, + "step": 1529000 + }, + { + "epoch": 7.58, + "learning_rate": 4.621254143071595e-05, + "loss": 2.461, + "step": 1529500 + }, + { + "epoch": 7.58, + "learning_rate": 4.621130284428987e-05, + "loss": 2.4943, + "step": 1530000 + }, + { + "epoch": 7.58, + "learning_rate": 4.6210064257863787e-05, + "loss": 2.4836, + "step": 1530500 + }, + { + "epoch": 7.59, + "learning_rate": 4.6208825671437704e-05, + "loss": 2.4815, + "step": 1531000 + }, + { + "epoch": 7.59, + "learning_rate": 4.620758956218447e-05, + "loss": 2.4911, + "step": 1531500 + }, + { + "epoch": 7.59, + "learning_rate": 4.620635097575839e-05, + "loss": 2.5123, + "step": 1532000 + }, + { + "epoch": 7.59, + "learning_rate": 4.6205112389332306e-05, + "loss": 2.4855, + "step": 1532500 + }, + { + "epoch": 7.6, + "learning_rate": 4.620387380290622e-05, + "loss": 2.4682, + "step": 1533000 + }, + { + "epoch": 7.6, + "learning_rate": 4.620263521648014e-05, + "loss": 2.4504, + "step": 1533500 + }, + { + "epoch": 7.6, + "learning_rate": 4.620139663005406e-05, + "loss": 2.4983, + "step": 1534000 + }, + { + "epoch": 7.6, + "learning_rate": 4.6200158043627974e-05, + "loss": 2.444, + "step": 1534500 + }, + { + "epoch": 7.6, + "learning_rate": 4.619891945720189e-05, + "loss": 2.4729, + "step": 1535000 + }, + { + "epoch": 7.61, + "learning_rate": 4.619768087077581e-05, + "loss": 2.4705, + "step": 1535500 + }, + { + "epoch": 7.61, + "learning_rate": 4.619644476152257e-05, + "loss": 2.4669, + "step": 1536000 + }, + { + "epoch": 7.61, + "learning_rate": 4.619520617509649e-05, + "loss": 2.4638, + "step": 1536500 + }, + { + "epoch": 7.61, + "learning_rate": 4.6193967588670404e-05, + "loss": 2.4943, + "step": 1537000 + }, + { + "epoch": 7.62, + "learning_rate": 4.6192733956590024e-05, + "loss": 2.4793, + "step": 1537500 + }, + { + "epoch": 7.62, + "learning_rate": 4.619149537016394e-05, + "loss": 2.4636, + "step": 1538000 + }, + { + "epoch": 7.62, + "learning_rate": 4.619025678373786e-05, + "loss": 2.4792, + "step": 1538500 + }, + { + "epoch": 7.62, + "learning_rate": 4.6189018197311775e-05, + "loss": 2.4809, + "step": 1539000 + }, + { + "epoch": 7.63, + "learning_rate": 4.618777961088569e-05, + "loss": 2.4845, + "step": 1539500 + }, + { + "epoch": 7.63, + "learning_rate": 4.618654102445961e-05, + "loss": 2.4399, + "step": 1540000 + }, + { + "epoch": 7.63, + "learning_rate": 4.6185302438033526e-05, + "loss": 2.4847, + "step": 1540500 + }, + { + "epoch": 7.63, + "learning_rate": 4.618406385160744e-05, + "loss": 2.4646, + "step": 1541000 + }, + { + "epoch": 7.64, + "learning_rate": 4.618282526518136e-05, + "loss": 2.4743, + "step": 1541500 + }, + { + "epoch": 7.64, + "learning_rate": 4.618158667875527e-05, + "loss": 2.4826, + "step": 1542000 + }, + { + "epoch": 7.64, + "learning_rate": 4.618034809232919e-05, + "loss": 2.5099, + "step": 1542500 + }, + { + "epoch": 7.64, + "learning_rate": 4.6179109505903104e-05, + "loss": 2.478, + "step": 1543000 + }, + { + "epoch": 7.65, + "learning_rate": 4.617787091947702e-05, + "loss": 2.4752, + "step": 1543500 + }, + { + "epoch": 7.65, + "learning_rate": 4.617663481022379e-05, + "loss": 2.4662, + "step": 1544000 + }, + { + "epoch": 7.65, + "learning_rate": 4.6175396223797706e-05, + "loss": 2.48, + "step": 1544500 + }, + { + "epoch": 7.65, + "learning_rate": 4.617415763737162e-05, + "loss": 2.498, + "step": 1545000 + }, + { + "epoch": 7.66, + "learning_rate": 4.617291905094554e-05, + "loss": 2.4554, + "step": 1545500 + }, + { + "epoch": 7.66, + "learning_rate": 4.617168046451946e-05, + "loss": 2.481, + "step": 1546000 + }, + { + "epoch": 7.66, + "learning_rate": 4.6170444355266226e-05, + "loss": 2.4825, + "step": 1546500 + }, + { + "epoch": 7.66, + "learning_rate": 4.616920576884014e-05, + "loss": 2.4594, + "step": 1547000 + }, + { + "epoch": 7.67, + "learning_rate": 4.616796718241406e-05, + "loss": 2.4847, + "step": 1547500 + }, + { + "epoch": 7.67, + "learning_rate": 4.616672859598798e-05, + "loss": 2.4623, + "step": 1548000 + }, + { + "epoch": 7.67, + "learning_rate": 4.616549000956189e-05, + "loss": 2.4656, + "step": 1548500 + }, + { + "epoch": 7.67, + "learning_rate": 4.6164251423135804e-05, + "loss": 2.4649, + "step": 1549000 + }, + { + "epoch": 7.68, + "learning_rate": 4.616301531388257e-05, + "loss": 2.474, + "step": 1549500 + }, + { + "epoch": 7.68, + "learning_rate": 4.616177672745649e-05, + "loss": 2.4778, + "step": 1550000 + }, + { + "epoch": 7.68, + "learning_rate": 4.616054061820326e-05, + "loss": 2.4788, + "step": 1550500 + }, + { + "epoch": 7.68, + "learning_rate": 4.6159302031777175e-05, + "loss": 2.4836, + "step": 1551000 + }, + { + "epoch": 7.69, + "learning_rate": 4.615806344535109e-05, + "loss": 2.4977, + "step": 1551500 + }, + { + "epoch": 7.69, + "learning_rate": 4.615682733609786e-05, + "loss": 2.4888, + "step": 1552000 + }, + { + "epoch": 7.69, + "learning_rate": 4.615558874967177e-05, + "loss": 2.4875, + "step": 1552500 + }, + { + "epoch": 7.69, + "learning_rate": 4.61543551175914e-05, + "loss": 2.503, + "step": 1553000 + }, + { + "epoch": 7.7, + "learning_rate": 4.6153116531165316e-05, + "loss": 2.5001, + "step": 1553500 + }, + { + "epoch": 7.7, + "learning_rate": 4.615187794473923e-05, + "loss": 2.4949, + "step": 1554000 + }, + { + "epoch": 7.7, + "learning_rate": 4.615063935831315e-05, + "loss": 2.4934, + "step": 1554500 + }, + { + "epoch": 7.7, + "learning_rate": 4.6149400771887066e-05, + "loss": 2.4785, + "step": 1555000 + }, + { + "epoch": 7.71, + "learning_rate": 4.614816218546098e-05, + "loss": 2.4767, + "step": 1555500 + }, + { + "epoch": 7.71, + "learning_rate": 4.61469235990349e-05, + "loss": 2.4847, + "step": 1556000 + }, + { + "epoch": 7.71, + "learning_rate": 4.614568501260881e-05, + "loss": 2.5089, + "step": 1556500 + }, + { + "epoch": 7.71, + "learning_rate": 4.614444642618273e-05, + "loss": 2.4748, + "step": 1557000 + }, + { + "epoch": 7.72, + "learning_rate": 4.6143207839756644e-05, + "loss": 2.4601, + "step": 1557500 + }, + { + "epoch": 7.72, + "learning_rate": 4.614196925333056e-05, + "loss": 2.5098, + "step": 1558000 + }, + { + "epoch": 7.72, + "learning_rate": 4.614073066690447e-05, + "loss": 2.4963, + "step": 1558500 + }, + { + "epoch": 7.72, + "learning_rate": 4.613949208047839e-05, + "loss": 2.4725, + "step": 1559000 + }, + { + "epoch": 7.73, + "learning_rate": 4.6138253494052305e-05, + "loss": 2.4686, + "step": 1559500 + }, + { + "epoch": 7.73, + "learning_rate": 4.613701490762622e-05, + "loss": 2.4655, + "step": 1560000 + }, + { + "epoch": 7.73, + "learning_rate": 4.613577632120014e-05, + "loss": 2.4731, + "step": 1560500 + }, + { + "epoch": 7.73, + "learning_rate": 4.6134537734774056e-05, + "loss": 2.4864, + "step": 1561000 + }, + { + "epoch": 7.74, + "learning_rate": 4.613329914834797e-05, + "loss": 2.5237, + "step": 1561500 + }, + { + "epoch": 7.74, + "learning_rate": 4.613206056192189e-05, + "loss": 2.4653, + "step": 1562000 + }, + { + "epoch": 7.74, + "learning_rate": 4.613082197549581e-05, + "loss": 2.4584, + "step": 1562500 + }, + { + "epoch": 7.74, + "learning_rate": 4.6129583389069724e-05, + "loss": 2.4773, + "step": 1563000 + }, + { + "epoch": 7.75, + "learning_rate": 4.612834727981649e-05, + "loss": 2.4909, + "step": 1563500 + }, + { + "epoch": 7.75, + "learning_rate": 4.612711117056326e-05, + "loss": 2.4713, + "step": 1564000 + }, + { + "epoch": 7.75, + "learning_rate": 4.612587258413718e-05, + "loss": 2.4771, + "step": 1564500 + }, + { + "epoch": 7.75, + "learning_rate": 4.6124633997711095e-05, + "loss": 2.4854, + "step": 1565000 + }, + { + "epoch": 7.76, + "learning_rate": 4.6123397888457864e-05, + "loss": 2.4965, + "step": 1565500 + }, + { + "epoch": 7.76, + "learning_rate": 4.612215930203178e-05, + "loss": 2.4689, + "step": 1566000 + }, + { + "epoch": 7.76, + "learning_rate": 4.61209207156057e-05, + "loss": 2.4708, + "step": 1566500 + }, + { + "epoch": 7.76, + "learning_rate": 4.6119682129179615e-05, + "loss": 2.4845, + "step": 1567000 + }, + { + "epoch": 7.77, + "learning_rate": 4.611844354275353e-05, + "loss": 2.473, + "step": 1567500 + }, + { + "epoch": 7.77, + "learning_rate": 4.611720495632744e-05, + "loss": 2.4954, + "step": 1568000 + }, + { + "epoch": 7.77, + "learning_rate": 4.611596636990136e-05, + "loss": 2.4764, + "step": 1568500 + }, + { + "epoch": 7.77, + "learning_rate": 4.6114727783475276e-05, + "loss": 2.4517, + "step": 1569000 + }, + { + "epoch": 7.78, + "learning_rate": 4.611348919704919e-05, + "loss": 2.4814, + "step": 1569500 + }, + { + "epoch": 7.78, + "learning_rate": 4.611225061062311e-05, + "loss": 2.4804, + "step": 1570000 + }, + { + "epoch": 7.78, + "learning_rate": 4.6111012024197027e-05, + "loss": 2.4678, + "step": 1570500 + }, + { + "epoch": 7.78, + "learning_rate": 4.6109773437770943e-05, + "loss": 2.4607, + "step": 1571000 + }, + { + "epoch": 7.79, + "learning_rate": 4.610853485134486e-05, + "loss": 2.4835, + "step": 1571500 + }, + { + "epoch": 7.79, + "learning_rate": 4.610729874209162e-05, + "loss": 2.4995, + "step": 1572000 + }, + { + "epoch": 7.79, + "learning_rate": 4.610606015566554e-05, + "loss": 2.4387, + "step": 1572500 + }, + { + "epoch": 7.79, + "learning_rate": 4.6104824046412315e-05, + "loss": 2.451, + "step": 1573000 + }, + { + "epoch": 7.8, + "learning_rate": 4.610358545998623e-05, + "loss": 2.4665, + "step": 1573500 + }, + { + "epoch": 7.8, + "learning_rate": 4.610234687356015e-05, + "loss": 2.4839, + "step": 1574000 + }, + { + "epoch": 7.8, + "learning_rate": 4.610110828713406e-05, + "loss": 2.4874, + "step": 1574500 + }, + { + "epoch": 7.8, + "learning_rate": 4.6099869700707976e-05, + "loss": 2.4841, + "step": 1575000 + }, + { + "epoch": 7.81, + "learning_rate": 4.609863111428189e-05, + "loss": 2.4755, + "step": 1575500 + }, + { + "epoch": 7.81, + "learning_rate": 4.609739252785581e-05, + "loss": 2.4554, + "step": 1576000 + }, + { + "epoch": 7.81, + "learning_rate": 4.609615394142973e-05, + "loss": 2.4956, + "step": 1576500 + }, + { + "epoch": 7.81, + "learning_rate": 4.6094915355003644e-05, + "loss": 2.4772, + "step": 1577000 + }, + { + "epoch": 7.82, + "learning_rate": 4.609367676857756e-05, + "loss": 2.4684, + "step": 1577500 + }, + { + "epoch": 7.82, + "learning_rate": 4.609244065932433e-05, + "loss": 2.4815, + "step": 1578000 + }, + { + "epoch": 7.82, + "learning_rate": 4.6091202072898246e-05, + "loss": 2.4774, + "step": 1578500 + }, + { + "epoch": 7.82, + "learning_rate": 4.6089963486472156e-05, + "loss": 2.4714, + "step": 1579000 + }, + { + "epoch": 7.83, + "learning_rate": 4.608872490004607e-05, + "loss": 2.4645, + "step": 1579500 + }, + { + "epoch": 7.83, + "learning_rate": 4.608748631361999e-05, + "loss": 2.485, + "step": 1580000 + }, + { + "epoch": 7.83, + "learning_rate": 4.608624772719391e-05, + "loss": 2.4805, + "step": 1580500 + }, + { + "epoch": 7.83, + "learning_rate": 4.6085009140767824e-05, + "loss": 2.4852, + "step": 1581000 + }, + { + "epoch": 7.84, + "learning_rate": 4.608377055434174e-05, + "loss": 2.4744, + "step": 1581500 + }, + { + "epoch": 7.84, + "learning_rate": 4.608253196791566e-05, + "loss": 2.4791, + "step": 1582000 + }, + { + "epoch": 7.84, + "learning_rate": 4.608129585866243e-05, + "loss": 2.4555, + "step": 1582500 + }, + { + "epoch": 7.84, + "learning_rate": 4.60800597494092e-05, + "loss": 2.4637, + "step": 1583000 + }, + { + "epoch": 7.85, + "learning_rate": 4.607882116298311e-05, + "loss": 2.4863, + "step": 1583500 + }, + { + "epoch": 7.85, + "learning_rate": 4.607758257655703e-05, + "loss": 2.4752, + "step": 1584000 + }, + { + "epoch": 7.85, + "learning_rate": 4.6076343990130946e-05, + "loss": 2.4594, + "step": 1584500 + }, + { + "epoch": 7.85, + "learning_rate": 4.6075107880877715e-05, + "loss": 2.4782, + "step": 1585000 + }, + { + "epoch": 7.86, + "learning_rate": 4.607386929445163e-05, + "loss": 2.4713, + "step": 1585500 + }, + { + "epoch": 7.86, + "learning_rate": 4.607263070802555e-05, + "loss": 2.4576, + "step": 1586000 + }, + { + "epoch": 7.86, + "learning_rate": 4.607139459877232e-05, + "loss": 2.4664, + "step": 1586500 + }, + { + "epoch": 7.86, + "learning_rate": 4.6070156012346235e-05, + "loss": 2.4834, + "step": 1587000 + }, + { + "epoch": 7.87, + "learning_rate": 4.606891742592015e-05, + "loss": 2.5103, + "step": 1587500 + }, + { + "epoch": 7.87, + "learning_rate": 4.6067681316666914e-05, + "loss": 2.4813, + "step": 1588000 + }, + { + "epoch": 7.87, + "learning_rate": 4.606644273024083e-05, + "loss": 2.4637, + "step": 1588500 + }, + { + "epoch": 7.87, + "learning_rate": 4.606520414381475e-05, + "loss": 2.4753, + "step": 1589000 + }, + { + "epoch": 7.87, + "learning_rate": 4.6063965557388665e-05, + "loss": 2.4819, + "step": 1589500 + }, + { + "epoch": 7.88, + "learning_rate": 4.606272697096258e-05, + "loss": 2.4511, + "step": 1590000 + }, + { + "epoch": 7.88, + "learning_rate": 4.60614883845365e-05, + "loss": 2.4717, + "step": 1590500 + }, + { + "epoch": 7.88, + "learning_rate": 4.6060249798110415e-05, + "loss": 2.489, + "step": 1591000 + }, + { + "epoch": 7.88, + "learning_rate": 4.605901121168433e-05, + "loss": 2.4643, + "step": 1591500 + }, + { + "epoch": 7.89, + "learning_rate": 4.605777262525825e-05, + "loss": 2.4807, + "step": 1592000 + }, + { + "epoch": 7.89, + "learning_rate": 4.605653651600502e-05, + "loss": 2.474, + "step": 1592500 + }, + { + "epoch": 7.89, + "learning_rate": 4.6055297929578935e-05, + "loss": 2.4841, + "step": 1593000 + }, + { + "epoch": 7.89, + "learning_rate": 4.605405934315285e-05, + "loss": 2.4805, + "step": 1593500 + }, + { + "epoch": 7.9, + "learning_rate": 4.605282075672677e-05, + "loss": 2.4747, + "step": 1594000 + }, + { + "epoch": 7.9, + "learning_rate": 4.6051582170300686e-05, + "loss": 2.4757, + "step": 1594500 + }, + { + "epoch": 7.9, + "learning_rate": 4.60503435838746e-05, + "loss": 2.4795, + "step": 1595000 + }, + { + "epoch": 7.9, + "learning_rate": 4.604910499744852e-05, + "loss": 2.4725, + "step": 1595500 + }, + { + "epoch": 7.91, + "learning_rate": 4.604786641102243e-05, + "loss": 2.4809, + "step": 1596000 + }, + { + "epoch": 7.91, + "learning_rate": 4.604662782459635e-05, + "loss": 2.4966, + "step": 1596500 + }, + { + "epoch": 7.91, + "learning_rate": 4.6045389238170264e-05, + "loss": 2.4485, + "step": 1597000 + }, + { + "epoch": 7.91, + "learning_rate": 4.604415065174418e-05, + "loss": 2.4603, + "step": 1597500 + }, + { + "epoch": 7.92, + "learning_rate": 4.60429120653181e-05, + "loss": 2.4999, + "step": 1598000 + }, + { + "epoch": 7.92, + "learning_rate": 4.6041673478892014e-05, + "loss": 2.4641, + "step": 1598500 + }, + { + "epoch": 7.92, + "learning_rate": 4.604043736963878e-05, + "loss": 2.473, + "step": 1599000 + }, + { + "epoch": 7.92, + "learning_rate": 4.603919878321269e-05, + "loss": 2.4771, + "step": 1599500 + }, + { + "epoch": 7.93, + "learning_rate": 4.603796267395947e-05, + "loss": 2.4965, + "step": 1600000 + }, + { + "epoch": 7.93, + "learning_rate": 4.6036724087533386e-05, + "loss": 2.4797, + "step": 1600500 + }, + { + "epoch": 7.93, + "learning_rate": 4.60354855011073e-05, + "loss": 2.4846, + "step": 1601000 + }, + { + "epoch": 7.93, + "learning_rate": 4.603424691468122e-05, + "loss": 2.4528, + "step": 1601500 + }, + { + "epoch": 7.94, + "learning_rate": 4.6033008328255137e-05, + "loss": 2.4811, + "step": 1602000 + }, + { + "epoch": 7.94, + "learning_rate": 4.603176974182905e-05, + "loss": 2.4997, + "step": 1602500 + }, + { + "epoch": 7.94, + "learning_rate": 4.6030533632575816e-05, + "loss": 2.4839, + "step": 1603000 + }, + { + "epoch": 7.94, + "learning_rate": 4.602929504614973e-05, + "loss": 2.4663, + "step": 1603500 + }, + { + "epoch": 7.95, + "learning_rate": 4.60280589368965e-05, + "loss": 2.4979, + "step": 1604000 + }, + { + "epoch": 7.95, + "learning_rate": 4.602682035047042e-05, + "loss": 2.4584, + "step": 1604500 + }, + { + "epoch": 7.95, + "learning_rate": 4.6025581764044335e-05, + "loss": 2.4736, + "step": 1605000 + }, + { + "epoch": 7.95, + "learning_rate": 4.602434317761825e-05, + "loss": 2.4942, + "step": 1605500 + }, + { + "epoch": 7.96, + "learning_rate": 4.602310459119217e-05, + "loss": 2.4717, + "step": 1606000 + }, + { + "epoch": 7.96, + "learning_rate": 4.6021866004766086e-05, + "loss": 2.4614, + "step": 1606500 + }, + { + "epoch": 7.96, + "learning_rate": 4.602062741834e-05, + "loss": 2.4844, + "step": 1607000 + }, + { + "epoch": 7.96, + "learning_rate": 4.601938883191392e-05, + "loss": 2.469, + "step": 1607500 + }, + { + "epoch": 7.97, + "learning_rate": 4.601815272266068e-05, + "loss": 2.48, + "step": 1608000 + }, + { + "epoch": 7.97, + "learning_rate": 4.60169141362346e-05, + "loss": 2.4687, + "step": 1608500 + }, + { + "epoch": 7.97, + "learning_rate": 4.6015675549808516e-05, + "loss": 2.4747, + "step": 1609000 + }, + { + "epoch": 7.97, + "learning_rate": 4.601443696338243e-05, + "loss": 2.4615, + "step": 1609500 + }, + { + "epoch": 7.98, + "learning_rate": 4.601319837695635e-05, + "loss": 2.5101, + "step": 1610000 + }, + { + "epoch": 7.98, + "learning_rate": 4.6011959790530266e-05, + "loss": 2.4775, + "step": 1610500 + }, + { + "epoch": 7.98, + "learning_rate": 4.6010723681277035e-05, + "loss": 2.4492, + "step": 1611000 + }, + { + "epoch": 7.98, + "learning_rate": 4.600948509485095e-05, + "loss": 2.49, + "step": 1611500 + }, + { + "epoch": 7.99, + "learning_rate": 4.600824650842487e-05, + "loss": 2.4681, + "step": 1612000 + }, + { + "epoch": 7.99, + "learning_rate": 4.6007007921998786e-05, + "loss": 2.4747, + "step": 1612500 + }, + { + "epoch": 7.99, + "learning_rate": 4.60057693355727e-05, + "loss": 2.4882, + "step": 1613000 + }, + { + "epoch": 7.99, + "learning_rate": 4.600453074914662e-05, + "loss": 2.4759, + "step": 1613500 + }, + { + "epoch": 8.0, + "learning_rate": 4.600329216272054e-05, + "loss": 2.4905, + "step": 1614000 + }, + { + "epoch": 8.0, + "learning_rate": 4.6002053576294454e-05, + "loss": 2.4694, + "step": 1614500 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.6397272831364085, + "eval_accuracy_mlm": 0.59286278093972, + "eval_accuracy_nsp": 0.8609541141909092, + "eval_loss": 2.4369935989379883, + "eval_runtime": 147.379, + "eval_samples_per_second": 1729.955, + "eval_steps_per_second": 72.086, + "step": 1614744 + }, + { + "epoch": 8.0, + "learning_rate": 4.6000814989868364e-05, + "loss": 2.4571, + "step": 1615000 + }, + { + "epoch": 8.0, + "learning_rate": 4.599957640344228e-05, + "loss": 2.4342, + "step": 1615500 + }, + { + "epoch": 8.01, + "learning_rate": 4.599834029418905e-05, + "loss": 2.4282, + "step": 1616000 + }, + { + "epoch": 8.01, + "learning_rate": 4.599710418493582e-05, + "loss": 2.4413, + "step": 1616500 + }, + { + "epoch": 8.01, + "learning_rate": 4.5995865598509735e-05, + "loss": 2.4362, + "step": 1617000 + }, + { + "epoch": 8.01, + "learning_rate": 4.599462701208365e-05, + "loss": 2.4393, + "step": 1617500 + }, + { + "epoch": 8.02, + "learning_rate": 4.599338842565757e-05, + "loss": 2.4598, + "step": 1618000 + }, + { + "epoch": 8.02, + "learning_rate": 4.599215231640434e-05, + "loss": 2.4666, + "step": 1618500 + }, + { + "epoch": 8.02, + "learning_rate": 4.5990913729978255e-05, + "loss": 2.4484, + "step": 1619000 + }, + { + "epoch": 8.02, + "learning_rate": 4.598967514355217e-05, + "loss": 2.4482, + "step": 1619500 + }, + { + "epoch": 8.03, + "learning_rate": 4.598843655712609e-05, + "loss": 2.4395, + "step": 1620000 + }, + { + "epoch": 8.03, + "learning_rate": 4.598720044787285e-05, + "loss": 2.4311, + "step": 1620500 + }, + { + "epoch": 8.03, + "learning_rate": 4.598596186144677e-05, + "loss": 2.438, + "step": 1621000 + }, + { + "epoch": 8.03, + "learning_rate": 4.5984723275020685e-05, + "loss": 2.4487, + "step": 1621500 + }, + { + "epoch": 8.04, + "learning_rate": 4.59834846885946e-05, + "loss": 2.4532, + "step": 1622000 + }, + { + "epoch": 8.04, + "learning_rate": 4.598224610216852e-05, + "loss": 2.4735, + "step": 1622500 + }, + { + "epoch": 8.04, + "learning_rate": 4.5981007515742436e-05, + "loss": 2.4447, + "step": 1623000 + }, + { + "epoch": 8.04, + "learning_rate": 4.597976892931635e-05, + "loss": 2.4488, + "step": 1623500 + }, + { + "epoch": 8.05, + "learning_rate": 4.597853034289027e-05, + "loss": 2.437, + "step": 1624000 + }, + { + "epoch": 8.05, + "learning_rate": 4.597729423363704e-05, + "loss": 2.4128, + "step": 1624500 + }, + { + "epoch": 8.05, + "learning_rate": 4.5976055647210955e-05, + "loss": 2.4574, + "step": 1625000 + }, + { + "epoch": 8.05, + "learning_rate": 4.5974822015130576e-05, + "loss": 2.4699, + "step": 1625500 + }, + { + "epoch": 8.06, + "learning_rate": 4.597358342870449e-05, + "loss": 2.4535, + "step": 1626000 + }, + { + "epoch": 8.06, + "learning_rate": 4.597234484227841e-05, + "loss": 2.463, + "step": 1626500 + }, + { + "epoch": 8.06, + "learning_rate": 4.5971106255852327e-05, + "loss": 2.477, + "step": 1627000 + }, + { + "epoch": 8.06, + "learning_rate": 4.5969867669426244e-05, + "loss": 2.4738, + "step": 1627500 + }, + { + "epoch": 8.07, + "learning_rate": 4.5968629083000154e-05, + "loss": 2.4388, + "step": 1628000 + }, + { + "epoch": 8.07, + "learning_rate": 4.596739049657407e-05, + "loss": 2.4269, + "step": 1628500 + }, + { + "epoch": 8.07, + "learning_rate": 4.596615191014799e-05, + "loss": 2.453, + "step": 1629000 + }, + { + "epoch": 8.07, + "learning_rate": 4.5964913323721904e-05, + "loss": 2.4559, + "step": 1629500 + }, + { + "epoch": 8.08, + "learning_rate": 4.596367473729582e-05, + "loss": 2.4375, + "step": 1630000 + }, + { + "epoch": 8.08, + "learning_rate": 4.596243615086974e-05, + "loss": 2.4509, + "step": 1630500 + }, + { + "epoch": 8.08, + "learning_rate": 4.5961197564443655e-05, + "loss": 2.4442, + "step": 1631000 + }, + { + "epoch": 8.08, + "learning_rate": 4.595995897801757e-05, + "loss": 2.4474, + "step": 1631500 + }, + { + "epoch": 8.09, + "learning_rate": 4.595872039159149e-05, + "loss": 2.4552, + "step": 1632000 + }, + { + "epoch": 8.09, + "learning_rate": 4.5957481805165406e-05, + "loss": 2.4695, + "step": 1632500 + }, + { + "epoch": 8.09, + "learning_rate": 4.595624321873932e-05, + "loss": 2.4425, + "step": 1633000 + }, + { + "epoch": 8.09, + "learning_rate": 4.595500463231324e-05, + "loss": 2.4627, + "step": 1633500 + }, + { + "epoch": 8.1, + "learning_rate": 4.595376852306e-05, + "loss": 2.4539, + "step": 1634000 + }, + { + "epoch": 8.1, + "learning_rate": 4.595252993663392e-05, + "loss": 2.4307, + "step": 1634500 + }, + { + "epoch": 8.1, + "learning_rate": 4.5951291350207836e-05, + "loss": 2.446, + "step": 1635000 + }, + { + "epoch": 8.1, + "learning_rate": 4.595005276378175e-05, + "loss": 2.4706, + "step": 1635500 + }, + { + "epoch": 8.11, + "learning_rate": 4.594881417735567e-05, + "loss": 2.4612, + "step": 1636000 + }, + { + "epoch": 8.11, + "learning_rate": 4.594757806810244e-05, + "loss": 2.4764, + "step": 1636500 + }, + { + "epoch": 8.11, + "learning_rate": 4.5946339481676355e-05, + "loss": 2.4155, + "step": 1637000 + }, + { + "epoch": 8.11, + "learning_rate": 4.594510337242312e-05, + "loss": 2.4596, + "step": 1637500 + }, + { + "epoch": 8.12, + "learning_rate": 4.5943864785997034e-05, + "loss": 2.4651, + "step": 1638000 + }, + { + "epoch": 8.12, + "learning_rate": 4.594262619957095e-05, + "loss": 2.4594, + "step": 1638500 + }, + { + "epoch": 8.12, + "learning_rate": 4.594138761314487e-05, + "loss": 2.4223, + "step": 1639000 + }, + { + "epoch": 8.12, + "learning_rate": 4.5940149026718785e-05, + "loss": 2.4587, + "step": 1639500 + }, + { + "epoch": 8.13, + "learning_rate": 4.59389104402927e-05, + "loss": 2.4586, + "step": 1640000 + }, + { + "epoch": 8.13, + "learning_rate": 4.593767185386662e-05, + "loss": 2.4567, + "step": 1640500 + }, + { + "epoch": 8.13, + "learning_rate": 4.5936433267440536e-05, + "loss": 2.4287, + "step": 1641000 + }, + { + "epoch": 8.13, + "learning_rate": 4.593519468101445e-05, + "loss": 2.4658, + "step": 1641500 + }, + { + "epoch": 8.14, + "learning_rate": 4.593395609458837e-05, + "loss": 2.4436, + "step": 1642000 + }, + { + "epoch": 8.14, + "learning_rate": 4.593271750816229e-05, + "loss": 2.4596, + "step": 1642500 + }, + { + "epoch": 8.14, + "learning_rate": 4.5931478921736204e-05, + "loss": 2.4541, + "step": 1643000 + }, + { + "epoch": 8.14, + "learning_rate": 4.593024281248297e-05, + "loss": 2.4404, + "step": 1643500 + }, + { + "epoch": 8.14, + "learning_rate": 4.592900422605689e-05, + "loss": 2.4754, + "step": 1644000 + }, + { + "epoch": 8.15, + "learning_rate": 4.592776811680365e-05, + "loss": 2.4406, + "step": 1644500 + }, + { + "epoch": 8.15, + "learning_rate": 4.592652953037757e-05, + "loss": 2.4571, + "step": 1645000 + }, + { + "epoch": 8.15, + "learning_rate": 4.5925290943951485e-05, + "loss": 2.4375, + "step": 1645500 + }, + { + "epoch": 8.15, + "learning_rate": 4.59240523575254e-05, + "loss": 2.4535, + "step": 1646000 + }, + { + "epoch": 8.16, + "learning_rate": 4.592281377109932e-05, + "loss": 2.4388, + "step": 1646500 + }, + { + "epoch": 8.16, + "learning_rate": 4.5921575184673236e-05, + "loss": 2.4679, + "step": 1647000 + }, + { + "epoch": 8.16, + "learning_rate": 4.592033659824715e-05, + "loss": 2.443, + "step": 1647500 + }, + { + "epoch": 8.16, + "learning_rate": 4.591909801182107e-05, + "loss": 2.449, + "step": 1648000 + }, + { + "epoch": 8.17, + "learning_rate": 4.591786190256784e-05, + "loss": 2.463, + "step": 1648500 + }, + { + "epoch": 8.17, + "learning_rate": 4.5916623316141756e-05, + "loss": 2.4809, + "step": 1649000 + }, + { + "epoch": 8.17, + "learning_rate": 4.591538472971567e-05, + "loss": 2.4569, + "step": 1649500 + }, + { + "epoch": 8.17, + "learning_rate": 4.591414614328959e-05, + "loss": 2.4376, + "step": 1650000 + }, + { + "epoch": 8.18, + "learning_rate": 4.5912907556863506e-05, + "loss": 2.4565, + "step": 1650500 + }, + { + "epoch": 8.18, + "learning_rate": 4.591166897043742e-05, + "loss": 2.448, + "step": 1651000 + }, + { + "epoch": 8.18, + "learning_rate": 4.591043038401134e-05, + "loss": 2.4753, + "step": 1651500 + }, + { + "epoch": 8.18, + "learning_rate": 4.590919179758526e-05, + "loss": 2.4302, + "step": 1652000 + }, + { + "epoch": 8.19, + "learning_rate": 4.5907953211159174e-05, + "loss": 2.4589, + "step": 1652500 + }, + { + "epoch": 8.19, + "learning_rate": 4.590671462473309e-05, + "loss": 2.454, + "step": 1653000 + }, + { + "epoch": 8.19, + "learning_rate": 4.590547603830701e-05, + "loss": 2.442, + "step": 1653500 + }, + { + "epoch": 8.19, + "learning_rate": 4.590423992905377e-05, + "loss": 2.4509, + "step": 1654000 + }, + { + "epoch": 8.2, + "learning_rate": 4.590300134262769e-05, + "loss": 2.4578, + "step": 1654500 + }, + { + "epoch": 8.2, + "learning_rate": 4.5901765233374456e-05, + "loss": 2.4688, + "step": 1655000 + }, + { + "epoch": 8.2, + "learning_rate": 4.590052664694837e-05, + "loss": 2.4823, + "step": 1655500 + }, + { + "epoch": 8.2, + "learning_rate": 4.589928806052229e-05, + "loss": 2.4619, + "step": 1656000 + }, + { + "epoch": 8.21, + "learning_rate": 4.5898049474096207e-05, + "loss": 2.443, + "step": 1656500 + }, + { + "epoch": 8.21, + "learning_rate": 4.5896810887670123e-05, + "loss": 2.4455, + "step": 1657000 + }, + { + "epoch": 8.21, + "learning_rate": 4.5895574778416885e-05, + "loss": 2.4619, + "step": 1657500 + }, + { + "epoch": 8.21, + "learning_rate": 4.589433866916366e-05, + "loss": 2.4513, + "step": 1658000 + }, + { + "epoch": 8.22, + "learning_rate": 4.589310008273758e-05, + "loss": 2.4317, + "step": 1658500 + }, + { + "epoch": 8.22, + "learning_rate": 4.5891861496311495e-05, + "loss": 2.4583, + "step": 1659000 + }, + { + "epoch": 8.22, + "learning_rate": 4.5890625387058264e-05, + "loss": 2.4789, + "step": 1659500 + }, + { + "epoch": 8.22, + "learning_rate": 4.588938680063218e-05, + "loss": 2.4496, + "step": 1660000 + }, + { + "epoch": 8.23, + "learning_rate": 4.58881482142061e-05, + "loss": 2.4006, + "step": 1660500 + }, + { + "epoch": 8.23, + "learning_rate": 4.5886909627780015e-05, + "loss": 2.4678, + "step": 1661000 + }, + { + "epoch": 8.23, + "learning_rate": 4.588567104135393e-05, + "loss": 2.4779, + "step": 1661500 + }, + { + "epoch": 8.23, + "learning_rate": 4.588443245492784e-05, + "loss": 2.4671, + "step": 1662000 + }, + { + "epoch": 8.24, + "learning_rate": 4.588319386850176e-05, + "loss": 2.457, + "step": 1662500 + }, + { + "epoch": 8.24, + "learning_rate": 4.5881955282075675e-05, + "loss": 2.466, + "step": 1663000 + }, + { + "epoch": 8.24, + "learning_rate": 4.588071669564959e-05, + "loss": 2.4845, + "step": 1663500 + }, + { + "epoch": 8.24, + "learning_rate": 4.587947810922351e-05, + "loss": 2.4621, + "step": 1664000 + }, + { + "epoch": 8.25, + "learning_rate": 4.587823952279742e-05, + "loss": 2.455, + "step": 1664500 + }, + { + "epoch": 8.25, + "learning_rate": 4.5877000936371336e-05, + "loss": 2.4329, + "step": 1665000 + }, + { + "epoch": 8.25, + "learning_rate": 4.587576234994525e-05, + "loss": 2.4424, + "step": 1665500 + }, + { + "epoch": 8.25, + "learning_rate": 4.587452376351917e-05, + "loss": 2.4667, + "step": 1666000 + }, + { + "epoch": 8.26, + "learning_rate": 4.587328765426594e-05, + "loss": 2.4531, + "step": 1666500 + }, + { + "epoch": 8.26, + "learning_rate": 4.5872049067839856e-05, + "loss": 2.4527, + "step": 1667000 + }, + { + "epoch": 8.26, + "learning_rate": 4.587081048141377e-05, + "loss": 2.4457, + "step": 1667500 + }, + { + "epoch": 8.26, + "learning_rate": 4.586957189498769e-05, + "loss": 2.4557, + "step": 1668000 + }, + { + "epoch": 8.27, + "learning_rate": 4.586833330856161e-05, + "loss": 2.4627, + "step": 1668500 + }, + { + "epoch": 8.27, + "learning_rate": 4.5867094722135524e-05, + "loss": 2.4349, + "step": 1669000 + }, + { + "epoch": 8.27, + "learning_rate": 4.586585613570944e-05, + "loss": 2.4301, + "step": 1669500 + }, + { + "epoch": 8.27, + "learning_rate": 4.586461754928336e-05, + "loss": 2.4527, + "step": 1670000 + }, + { + "epoch": 8.28, + "learning_rate": 4.5863378962857274e-05, + "loss": 2.4991, + "step": 1670500 + }, + { + "epoch": 8.28, + "learning_rate": 4.5862142853604037e-05, + "loss": 2.4676, + "step": 1671000 + }, + { + "epoch": 8.28, + "learning_rate": 4.5860904267177953e-05, + "loss": 2.4682, + "step": 1671500 + }, + { + "epoch": 8.28, + "learning_rate": 4.585966568075187e-05, + "loss": 2.4547, + "step": 1672000 + }, + { + "epoch": 8.29, + "learning_rate": 4.585842709432579e-05, + "loss": 2.454, + "step": 1672500 + }, + { + "epoch": 8.29, + "learning_rate": 4.5857190985072556e-05, + "loss": 2.4367, + "step": 1673000 + }, + { + "epoch": 8.29, + "learning_rate": 4.585595239864647e-05, + "loss": 2.4829, + "step": 1673500 + }, + { + "epoch": 8.29, + "learning_rate": 4.585471381222039e-05, + "loss": 2.4399, + "step": 1674000 + }, + { + "epoch": 8.3, + "learning_rate": 4.585347522579431e-05, + "loss": 2.4632, + "step": 1674500 + }, + { + "epoch": 8.3, + "learning_rate": 4.5852236639368224e-05, + "loss": 2.4526, + "step": 1675000 + }, + { + "epoch": 8.3, + "learning_rate": 4.585099805294214e-05, + "loss": 2.4805, + "step": 1675500 + }, + { + "epoch": 8.3, + "learning_rate": 4.584976194368891e-05, + "loss": 2.4633, + "step": 1676000 + }, + { + "epoch": 8.31, + "learning_rate": 4.5848523357262826e-05, + "loss": 2.4558, + "step": 1676500 + }, + { + "epoch": 8.31, + "learning_rate": 4.5847284770836743e-05, + "loss": 2.4494, + "step": 1677000 + }, + { + "epoch": 8.31, + "learning_rate": 4.584604618441066e-05, + "loss": 2.4412, + "step": 1677500 + }, + { + "epoch": 8.31, + "learning_rate": 4.584481255233028e-05, + "loss": 2.4841, + "step": 1678000 + }, + { + "epoch": 8.32, + "learning_rate": 4.58435739659042e-05, + "loss": 2.4545, + "step": 1678500 + }, + { + "epoch": 8.32, + "learning_rate": 4.5842335379478115e-05, + "loss": 2.4488, + "step": 1679000 + }, + { + "epoch": 8.32, + "learning_rate": 4.584109679305203e-05, + "loss": 2.454, + "step": 1679500 + }, + { + "epoch": 8.32, + "learning_rate": 4.583985820662595e-05, + "loss": 2.4556, + "step": 1680000 + }, + { + "epoch": 8.33, + "learning_rate": 4.5838619620199866e-05, + "loss": 2.4793, + "step": 1680500 + }, + { + "epoch": 8.33, + "learning_rate": 4.5837381033773776e-05, + "loss": 2.4807, + "step": 1681000 + }, + { + "epoch": 8.33, + "learning_rate": 4.583614244734769e-05, + "loss": 2.4468, + "step": 1681500 + }, + { + "epoch": 8.33, + "learning_rate": 4.583490386092161e-05, + "loss": 2.4587, + "step": 1682000 + }, + { + "epoch": 8.34, + "learning_rate": 4.583366775166838e-05, + "loss": 2.4789, + "step": 1682500 + }, + { + "epoch": 8.34, + "learning_rate": 4.583243164241515e-05, + "loss": 2.4712, + "step": 1683000 + }, + { + "epoch": 8.34, + "learning_rate": 4.5831193055989064e-05, + "loss": 2.4732, + "step": 1683500 + }, + { + "epoch": 8.34, + "learning_rate": 4.582995446956298e-05, + "loss": 2.4527, + "step": 1684000 + }, + { + "epoch": 8.35, + "learning_rate": 4.582871836030975e-05, + "loss": 2.4415, + "step": 1684500 + }, + { + "epoch": 8.35, + "learning_rate": 4.582747977388367e-05, + "loss": 2.4473, + "step": 1685000 + }, + { + "epoch": 8.35, + "learning_rate": 4.582624118745758e-05, + "loss": 2.4513, + "step": 1685500 + }, + { + "epoch": 8.35, + "learning_rate": 4.5825002601031494e-05, + "loss": 2.4484, + "step": 1686000 + }, + { + "epoch": 8.36, + "learning_rate": 4.582376649177826e-05, + "loss": 2.454, + "step": 1686500 + }, + { + "epoch": 8.36, + "learning_rate": 4.582252790535218e-05, + "loss": 2.4445, + "step": 1687000 + }, + { + "epoch": 8.36, + "learning_rate": 4.58212893189261e-05, + "loss": 2.4324, + "step": 1687500 + }, + { + "epoch": 8.36, + "learning_rate": 4.5820050732500014e-05, + "loss": 2.4387, + "step": 1688000 + }, + { + "epoch": 8.37, + "learning_rate": 4.581881214607393e-05, + "loss": 2.4302, + "step": 1688500 + }, + { + "epoch": 8.37, + "learning_rate": 4.581757355964785e-05, + "loss": 2.4632, + "step": 1689000 + }, + { + "epoch": 8.37, + "learning_rate": 4.5816334973221764e-05, + "loss": 2.4692, + "step": 1689500 + }, + { + "epoch": 8.37, + "learning_rate": 4.581509638679568e-05, + "loss": 2.4576, + "step": 1690000 + }, + { + "epoch": 8.38, + "learning_rate": 4.58138578003696e-05, + "loss": 2.454, + "step": 1690500 + }, + { + "epoch": 8.38, + "learning_rate": 4.5812619213943515e-05, + "loss": 2.4575, + "step": 1691000 + }, + { + "epoch": 8.38, + "learning_rate": 4.581138062751743e-05, + "loss": 2.4608, + "step": 1691500 + }, + { + "epoch": 8.38, + "learning_rate": 4.581014204109135e-05, + "loss": 2.4546, + "step": 1692000 + }, + { + "epoch": 8.39, + "learning_rate": 4.580890593183811e-05, + "loss": 2.4915, + "step": 1692500 + }, + { + "epoch": 8.39, + "learning_rate": 4.580766734541203e-05, + "loss": 2.4625, + "step": 1693000 + }, + { + "epoch": 8.39, + "learning_rate": 4.5806428758985945e-05, + "loss": 2.4287, + "step": 1693500 + }, + { + "epoch": 8.39, + "learning_rate": 4.580519017255986e-05, + "loss": 2.45, + "step": 1694000 + }, + { + "epoch": 8.4, + "learning_rate": 4.580395406330663e-05, + "loss": 2.4605, + "step": 1694500 + }, + { + "epoch": 8.4, + "learning_rate": 4.580271547688055e-05, + "loss": 2.4412, + "step": 1695000 + }, + { + "epoch": 8.4, + "learning_rate": 4.5801476890454464e-05, + "loss": 2.4722, + "step": 1695500 + }, + { + "epoch": 8.4, + "learning_rate": 4.580023830402838e-05, + "loss": 2.4585, + "step": 1696000 + }, + { + "epoch": 8.41, + "learning_rate": 4.57989997176023e-05, + "loss": 2.4503, + "step": 1696500 + }, + { + "epoch": 8.41, + "learning_rate": 4.579776360834907e-05, + "loss": 2.4558, + "step": 1697000 + }, + { + "epoch": 8.41, + "learning_rate": 4.5796525021922984e-05, + "loss": 2.4635, + "step": 1697500 + }, + { + "epoch": 8.41, + "learning_rate": 4.5795288912669746e-05, + "loss": 2.4488, + "step": 1698000 + }, + { + "epoch": 8.41, + "learning_rate": 4.579405032624366e-05, + "loss": 2.4235, + "step": 1698500 + }, + { + "epoch": 8.42, + "learning_rate": 4.579281173981758e-05, + "loss": 2.471, + "step": 1699000 + }, + { + "epoch": 8.42, + "learning_rate": 4.5791575630564356e-05, + "loss": 2.4608, + "step": 1699500 + }, + { + "epoch": 8.42, + "learning_rate": 4.579033704413827e-05, + "loss": 2.4558, + "step": 1700000 + }, + { + "epoch": 8.42, + "learning_rate": 4.578909845771218e-05, + "loss": 2.4506, + "step": 1700500 + }, + { + "epoch": 8.43, + "learning_rate": 4.57878598712861e-05, + "loss": 2.4858, + "step": 1701000 + }, + { + "epoch": 8.43, + "learning_rate": 4.5786621284860016e-05, + "loss": 2.4645, + "step": 1701500 + }, + { + "epoch": 8.43, + "learning_rate": 4.5785385175606785e-05, + "loss": 2.4548, + "step": 1702000 + }, + { + "epoch": 8.43, + "learning_rate": 4.57841465891807e-05, + "loss": 2.4721, + "step": 1702500 + }, + { + "epoch": 8.44, + "learning_rate": 4.578290800275462e-05, + "loss": 2.4628, + "step": 1703000 + }, + { + "epoch": 8.44, + "learning_rate": 4.578166941632853e-05, + "loss": 2.4656, + "step": 1703500 + }, + { + "epoch": 8.44, + "learning_rate": 4.5780430829902446e-05, + "loss": 2.4542, + "step": 1704000 + }, + { + "epoch": 8.44, + "learning_rate": 4.577919224347636e-05, + "loss": 2.4838, + "step": 1704500 + }, + { + "epoch": 8.45, + "learning_rate": 4.577795365705028e-05, + "loss": 2.4511, + "step": 1705000 + }, + { + "epoch": 8.45, + "learning_rate": 4.57767150706242e-05, + "loss": 2.4663, + "step": 1705500 + }, + { + "epoch": 8.45, + "learning_rate": 4.5775476484198114e-05, + "loss": 2.4657, + "step": 1706000 + }, + { + "epoch": 8.45, + "learning_rate": 4.577423789777203e-05, + "loss": 2.4396, + "step": 1706500 + }, + { + "epoch": 8.46, + "learning_rate": 4.577299931134595e-05, + "loss": 2.4544, + "step": 1707000 + }, + { + "epoch": 8.46, + "learning_rate": 4.5771760724919865e-05, + "loss": 2.4735, + "step": 1707500 + }, + { + "epoch": 8.46, + "learning_rate": 4.577052213849378e-05, + "loss": 2.4564, + "step": 1708000 + }, + { + "epoch": 8.46, + "learning_rate": 4.57692835520677e-05, + "loss": 2.4455, + "step": 1708500 + }, + { + "epoch": 8.47, + "learning_rate": 4.576804744281447e-05, + "loss": 2.4593, + "step": 1709000 + }, + { + "epoch": 8.47, + "learning_rate": 4.5766808856388384e-05, + "loss": 2.468, + "step": 1709500 + }, + { + "epoch": 8.47, + "learning_rate": 4.57655702699623e-05, + "loss": 2.4607, + "step": 1710000 + }, + { + "epoch": 8.47, + "learning_rate": 4.576433168353622e-05, + "loss": 2.4671, + "step": 1710500 + }, + { + "epoch": 8.48, + "learning_rate": 4.5763093097110135e-05, + "loss": 2.4854, + "step": 1711000 + }, + { + "epoch": 8.48, + "learning_rate": 4.576185451068405e-05, + "loss": 2.4792, + "step": 1711500 + }, + { + "epoch": 8.48, + "learning_rate": 4.576061592425797e-05, + "loss": 2.4591, + "step": 1712000 + }, + { + "epoch": 8.48, + "learning_rate": 4.575937733783188e-05, + "loss": 2.464, + "step": 1712500 + }, + { + "epoch": 8.49, + "learning_rate": 4.5758138751405796e-05, + "loss": 2.4626, + "step": 1713000 + }, + { + "epoch": 8.49, + "learning_rate": 4.575690016497971e-05, + "loss": 2.4861, + "step": 1713500 + }, + { + "epoch": 8.49, + "learning_rate": 4.5755666532899334e-05, + "loss": 2.4691, + "step": 1714000 + }, + { + "epoch": 8.49, + "learning_rate": 4.575442794647325e-05, + "loss": 2.4656, + "step": 1714500 + }, + { + "epoch": 8.5, + "learning_rate": 4.575318936004717e-05, + "loss": 2.4999, + "step": 1715000 + }, + { + "epoch": 8.5, + "learning_rate": 4.5751950773621084e-05, + "loss": 2.4376, + "step": 1715500 + }, + { + "epoch": 8.5, + "learning_rate": 4.5750712187195e-05, + "loss": 2.4519, + "step": 1716000 + }, + { + "epoch": 8.5, + "learning_rate": 4.5749476077941763e-05, + "loss": 2.4505, + "step": 1716500 + }, + { + "epoch": 8.51, + "learning_rate": 4.574823749151568e-05, + "loss": 2.4783, + "step": 1717000 + }, + { + "epoch": 8.51, + "learning_rate": 4.57469989050896e-05, + "loss": 2.437, + "step": 1717500 + }, + { + "epoch": 8.51, + "learning_rate": 4.5745760318663514e-05, + "loss": 2.4739, + "step": 1718000 + }, + { + "epoch": 8.51, + "learning_rate": 4.574452420941029e-05, + "loss": 2.4648, + "step": 1718500 + }, + { + "epoch": 8.52, + "learning_rate": 4.574328562298421e-05, + "loss": 2.4603, + "step": 1719000 + }, + { + "epoch": 8.52, + "learning_rate": 4.574204703655812e-05, + "loss": 2.4343, + "step": 1719500 + }, + { + "epoch": 8.52, + "learning_rate": 4.5740808450132034e-05, + "loss": 2.4705, + "step": 1720000 + }, + { + "epoch": 8.52, + "learning_rate": 4.573956986370595e-05, + "loss": 2.4513, + "step": 1720500 + }, + { + "epoch": 8.53, + "learning_rate": 4.573833127727987e-05, + "loss": 2.4702, + "step": 1721000 + }, + { + "epoch": 8.53, + "learning_rate": 4.5737092690853785e-05, + "loss": 2.4558, + "step": 1721500 + }, + { + "epoch": 8.53, + "learning_rate": 4.57358541044277e-05, + "loss": 2.4708, + "step": 1722000 + }, + { + "epoch": 8.53, + "learning_rate": 4.573461551800162e-05, + "loss": 2.4531, + "step": 1722500 + }, + { + "epoch": 8.54, + "learning_rate": 4.5733376931575535e-05, + "loss": 2.4685, + "step": 1723000 + }, + { + "epoch": 8.54, + "learning_rate": 4.573213834514945e-05, + "loss": 2.4622, + "step": 1723500 + }, + { + "epoch": 8.54, + "learning_rate": 4.573089975872337e-05, + "loss": 2.4663, + "step": 1724000 + }, + { + "epoch": 8.54, + "learning_rate": 4.572966364947013e-05, + "loss": 2.4865, + "step": 1724500 + }, + { + "epoch": 8.55, + "learning_rate": 4.572842506304405e-05, + "loss": 2.4635, + "step": 1725000 + }, + { + "epoch": 8.55, + "learning_rate": 4.5727186476617965e-05, + "loss": 2.4426, + "step": 1725500 + }, + { + "epoch": 8.55, + "learning_rate": 4.572594789019188e-05, + "loss": 2.4789, + "step": 1726000 + }, + { + "epoch": 8.55, + "learning_rate": 4.57247093037658e-05, + "loss": 2.4653, + "step": 1726500 + }, + { + "epoch": 8.56, + "learning_rate": 4.5723470717339716e-05, + "loss": 2.4574, + "step": 1727000 + }, + { + "epoch": 8.56, + "learning_rate": 4.572223213091363e-05, + "loss": 2.447, + "step": 1727500 + }, + { + "epoch": 8.56, + "learning_rate": 4.572099354448755e-05, + "loss": 2.4368, + "step": 1728000 + }, + { + "epoch": 8.56, + "learning_rate": 4.571975495806147e-05, + "loss": 2.4754, + "step": 1728500 + }, + { + "epoch": 8.57, + "learning_rate": 4.5718516371635384e-05, + "loss": 2.4884, + "step": 1729000 + }, + { + "epoch": 8.57, + "learning_rate": 4.57172777852093e-05, + "loss": 2.4793, + "step": 1729500 + }, + { + "epoch": 8.57, + "learning_rate": 4.571603919878322e-05, + "loss": 2.4638, + "step": 1730000 + }, + { + "epoch": 8.57, + "learning_rate": 4.5714803089529986e-05, + "loss": 2.4545, + "step": 1730500 + }, + { + "epoch": 8.58, + "learning_rate": 4.57135645031039e-05, + "loss": 2.4627, + "step": 1731000 + }, + { + "epoch": 8.58, + "learning_rate": 4.571232591667782e-05, + "loss": 2.46, + "step": 1731500 + }, + { + "epoch": 8.58, + "learning_rate": 4.571108733025174e-05, + "loss": 2.4664, + "step": 1732000 + }, + { + "epoch": 8.58, + "learning_rate": 4.570985369817135e-05, + "loss": 2.468, + "step": 1732500 + }, + { + "epoch": 8.59, + "learning_rate": 4.570861511174527e-05, + "loss": 2.4468, + "step": 1733000 + }, + { + "epoch": 8.59, + "learning_rate": 4.5707376525319185e-05, + "loss": 2.4756, + "step": 1733500 + }, + { + "epoch": 8.59, + "learning_rate": 4.57061379388931e-05, + "loss": 2.4659, + "step": 1734000 + }, + { + "epoch": 8.59, + "learning_rate": 4.570489935246702e-05, + "loss": 2.4663, + "step": 1734500 + }, + { + "epoch": 8.6, + "learning_rate": 4.570366324321379e-05, + "loss": 2.4561, + "step": 1735000 + }, + { + "epoch": 8.6, + "learning_rate": 4.5702424656787704e-05, + "loss": 2.4384, + "step": 1735500 + }, + { + "epoch": 8.6, + "learning_rate": 4.5701186070361615e-05, + "loss": 2.4622, + "step": 1736000 + }, + { + "epoch": 8.6, + "learning_rate": 4.569994748393553e-05, + "loss": 2.4654, + "step": 1736500 + }, + { + "epoch": 8.61, + "learning_rate": 4.569870889750945e-05, + "loss": 2.4673, + "step": 1737000 + }, + { + "epoch": 8.61, + "learning_rate": 4.5697472788256224e-05, + "loss": 2.4546, + "step": 1737500 + }, + { + "epoch": 8.61, + "learning_rate": 4.569623667900299e-05, + "loss": 2.4721, + "step": 1738000 + }, + { + "epoch": 8.61, + "learning_rate": 4.569499809257691e-05, + "loss": 2.4357, + "step": 1738500 + }, + { + "epoch": 8.62, + "learning_rate": 4.569375950615083e-05, + "loss": 2.4688, + "step": 1739000 + }, + { + "epoch": 8.62, + "learning_rate": 4.5692520919724744e-05, + "loss": 2.4617, + "step": 1739500 + }, + { + "epoch": 8.62, + "learning_rate": 4.569128233329866e-05, + "loss": 2.4783, + "step": 1740000 + }, + { + "epoch": 8.62, + "learning_rate": 4.569004374687257e-05, + "loss": 2.4474, + "step": 1740500 + }, + { + "epoch": 8.63, + "learning_rate": 4.568880516044649e-05, + "loss": 2.476, + "step": 1741000 + }, + { + "epoch": 8.63, + "learning_rate": 4.5687566574020405e-05, + "loss": 2.4684, + "step": 1741500 + }, + { + "epoch": 8.63, + "learning_rate": 4.568632798759432e-05, + "loss": 2.4567, + "step": 1742000 + }, + { + "epoch": 8.63, + "learning_rate": 4.568508940116824e-05, + "loss": 2.4522, + "step": 1742500 + }, + { + "epoch": 8.64, + "learning_rate": 4.568385081474215e-05, + "loss": 2.4613, + "step": 1743000 + }, + { + "epoch": 8.64, + "learning_rate": 4.5682612228316065e-05, + "loss": 2.481, + "step": 1743500 + }, + { + "epoch": 8.64, + "learning_rate": 4.568137611906284e-05, + "loss": 2.4624, + "step": 1744000 + }, + { + "epoch": 8.64, + "learning_rate": 4.568013753263675e-05, + "loss": 2.4642, + "step": 1744500 + }, + { + "epoch": 8.65, + "learning_rate": 4.567889894621067e-05, + "loss": 2.4412, + "step": 1745000 + }, + { + "epoch": 8.65, + "learning_rate": 4.5677660359784585e-05, + "loss": 2.4577, + "step": 1745500 + }, + { + "epoch": 8.65, + "learning_rate": 4.56764217733585e-05, + "loss": 2.4878, + "step": 1746000 + }, + { + "epoch": 8.65, + "learning_rate": 4.567518566410528e-05, + "loss": 2.4542, + "step": 1746500 + }, + { + "epoch": 8.66, + "learning_rate": 4.567394707767919e-05, + "loss": 2.466, + "step": 1747000 + }, + { + "epoch": 8.66, + "learning_rate": 4.5672708491253105e-05, + "loss": 2.4531, + "step": 1747500 + }, + { + "epoch": 8.66, + "learning_rate": 4.567146990482702e-05, + "loss": 2.4706, + "step": 1748000 + }, + { + "epoch": 8.66, + "learning_rate": 4.567023131840094e-05, + "loss": 2.4771, + "step": 1748500 + }, + { + "epoch": 8.67, + "learning_rate": 4.5668992731974855e-05, + "loss": 2.4667, + "step": 1749000 + }, + { + "epoch": 8.67, + "learning_rate": 4.5667754145548766e-05, + "loss": 2.4672, + "step": 1749500 + }, + { + "epoch": 8.67, + "learning_rate": 4.566651803629554e-05, + "loss": 2.4299, + "step": 1750000 + }, + { + "epoch": 8.67, + "learning_rate": 4.566527944986945e-05, + "loss": 2.4456, + "step": 1750500 + }, + { + "epoch": 8.68, + "learning_rate": 4.566404334061623e-05, + "loss": 2.4839, + "step": 1751000 + }, + { + "epoch": 8.68, + "learning_rate": 4.5662804754190144e-05, + "loss": 2.453, + "step": 1751500 + }, + { + "epoch": 8.68, + "learning_rate": 4.5661568644936906e-05, + "loss": 2.4527, + "step": 1752000 + }, + { + "epoch": 8.68, + "learning_rate": 4.566033005851082e-05, + "loss": 2.4506, + "step": 1752500 + }, + { + "epoch": 8.68, + "learning_rate": 4.565909147208474e-05, + "loss": 2.4345, + "step": 1753000 + }, + { + "epoch": 8.69, + "learning_rate": 4.565785288565866e-05, + "loss": 2.4588, + "step": 1753500 + }, + { + "epoch": 8.69, + "learning_rate": 4.5656614299232574e-05, + "loss": 2.4735, + "step": 1754000 + }, + { + "epoch": 8.69, + "learning_rate": 4.565537571280649e-05, + "loss": 2.4679, + "step": 1754500 + }, + { + "epoch": 8.69, + "learning_rate": 4.565413712638041e-05, + "loss": 2.4841, + "step": 1755000 + }, + { + "epoch": 8.7, + "learning_rate": 4.5652898539954324e-05, + "loss": 2.4871, + "step": 1755500 + }, + { + "epoch": 8.7, + "learning_rate": 4.565165995352824e-05, + "loss": 2.4499, + "step": 1756000 + }, + { + "epoch": 8.7, + "learning_rate": 4.565042136710216e-05, + "loss": 2.4511, + "step": 1756500 + }, + { + "epoch": 8.7, + "learning_rate": 4.564918278067607e-05, + "loss": 2.4915, + "step": 1757000 + }, + { + "epoch": 8.71, + "learning_rate": 4.5647944194249985e-05, + "loss": 2.4732, + "step": 1757500 + }, + { + "epoch": 8.71, + "learning_rate": 4.56467056078239e-05, + "loss": 2.4507, + "step": 1758000 + }, + { + "epoch": 8.71, + "learning_rate": 4.564546702139782e-05, + "loss": 2.4399, + "step": 1758500 + }, + { + "epoch": 8.71, + "learning_rate": 4.5644228434971736e-05, + "loss": 2.4765, + "step": 1759000 + }, + { + "epoch": 8.72, + "learning_rate": 4.564299480289136e-05, + "loss": 2.4684, + "step": 1759500 + }, + { + "epoch": 8.72, + "learning_rate": 4.5641756216465274e-05, + "loss": 2.4655, + "step": 1760000 + }, + { + "epoch": 8.72, + "learning_rate": 4.564051763003919e-05, + "loss": 2.4645, + "step": 1760500 + }, + { + "epoch": 8.72, + "learning_rate": 4.563927904361311e-05, + "loss": 2.4831, + "step": 1761000 + }, + { + "epoch": 8.73, + "learning_rate": 4.5638040457187025e-05, + "loss": 2.4761, + "step": 1761500 + }, + { + "epoch": 8.73, + "learning_rate": 4.563680187076094e-05, + "loss": 2.4824, + "step": 1762000 + }, + { + "epoch": 8.73, + "learning_rate": 4.563556328433486e-05, + "loss": 2.4461, + "step": 1762500 + }, + { + "epoch": 8.73, + "learning_rate": 4.563432469790877e-05, + "loss": 2.4469, + "step": 1763000 + }, + { + "epoch": 8.74, + "learning_rate": 4.5633086111482685e-05, + "loss": 2.4606, + "step": 1763500 + }, + { + "epoch": 8.74, + "learning_rate": 4.56318475250566e-05, + "loss": 2.4706, + "step": 1764000 + }, + { + "epoch": 8.74, + "learning_rate": 4.563061141580338e-05, + "loss": 2.4447, + "step": 1764500 + }, + { + "epoch": 8.74, + "learning_rate": 4.562937530655014e-05, + "loss": 2.4601, + "step": 1765000 + }, + { + "epoch": 8.75, + "learning_rate": 4.562813672012406e-05, + "loss": 2.4485, + "step": 1765500 + }, + { + "epoch": 8.75, + "learning_rate": 4.5626898133697974e-05, + "loss": 2.4624, + "step": 1766000 + }, + { + "epoch": 8.75, + "learning_rate": 4.562565954727189e-05, + "loss": 2.5026, + "step": 1766500 + }, + { + "epoch": 8.75, + "learning_rate": 4.562442096084581e-05, + "loss": 2.4555, + "step": 1767000 + }, + { + "epoch": 8.76, + "learning_rate": 4.5623182374419725e-05, + "loss": 2.4672, + "step": 1767500 + }, + { + "epoch": 8.76, + "learning_rate": 4.562194378799364e-05, + "loss": 2.4704, + "step": 1768000 + }, + { + "epoch": 8.76, + "learning_rate": 4.5620712633086114e-05, + "loss": 2.5025, + "step": 1768500 + }, + { + "epoch": 8.76, + "learning_rate": 4.561947404666003e-05, + "loss": 2.4781, + "step": 1769000 + }, + { + "epoch": 8.77, + "learning_rate": 4.561823546023395e-05, + "loss": 2.4675, + "step": 1769500 + }, + { + "epoch": 8.77, + "learning_rate": 4.561699687380786e-05, + "loss": 2.4574, + "step": 1770000 + }, + { + "epoch": 8.77, + "learning_rate": 4.5615758287381775e-05, + "loss": 2.4715, + "step": 1770500 + }, + { + "epoch": 8.77, + "learning_rate": 4.561451970095569e-05, + "loss": 2.4416, + "step": 1771000 + }, + { + "epoch": 8.78, + "learning_rate": 4.561328111452961e-05, + "loss": 2.4696, + "step": 1771500 + }, + { + "epoch": 8.78, + "learning_rate": 4.5612042528103526e-05, + "loss": 2.4609, + "step": 1772000 + }, + { + "epoch": 8.78, + "learning_rate": 4.561080394167744e-05, + "loss": 2.4613, + "step": 1772500 + }, + { + "epoch": 8.78, + "learning_rate": 4.560956535525136e-05, + "loss": 2.4924, + "step": 1773000 + }, + { + "epoch": 8.79, + "learning_rate": 4.560832676882528e-05, + "loss": 2.4574, + "step": 1773500 + }, + { + "epoch": 8.79, + "learning_rate": 4.5607088182399194e-05, + "loss": 2.479, + "step": 1774000 + }, + { + "epoch": 8.79, + "learning_rate": 4.560584959597311e-05, + "loss": 2.4495, + "step": 1774500 + }, + { + "epoch": 8.79, + "learning_rate": 4.560461100954703e-05, + "loss": 2.4675, + "step": 1775000 + }, + { + "epoch": 8.8, + "learning_rate": 4.5603372423120944e-05, + "loss": 2.472, + "step": 1775500 + }, + { + "epoch": 8.8, + "learning_rate": 4.560213631386771e-05, + "loss": 2.4506, + "step": 1776000 + }, + { + "epoch": 8.8, + "learning_rate": 4.560089772744163e-05, + "loss": 2.453, + "step": 1776500 + }, + { + "epoch": 8.8, + "learning_rate": 4.559965914101555e-05, + "loss": 2.4638, + "step": 1777000 + }, + { + "epoch": 8.81, + "learning_rate": 4.559842055458946e-05, + "loss": 2.4947, + "step": 1777500 + }, + { + "epoch": 8.81, + "learning_rate": 4.5597181968163374e-05, + "loss": 2.4435, + "step": 1778000 + }, + { + "epoch": 8.81, + "learning_rate": 4.559594338173729e-05, + "loss": 2.4739, + "step": 1778500 + }, + { + "epoch": 8.81, + "learning_rate": 4.559470727248406e-05, + "loss": 2.435, + "step": 1779000 + }, + { + "epoch": 8.82, + "learning_rate": 4.559346868605798e-05, + "loss": 2.4844, + "step": 1779500 + }, + { + "epoch": 8.82, + "learning_rate": 4.5592230099631894e-05, + "loss": 2.4432, + "step": 1780000 + }, + { + "epoch": 8.82, + "learning_rate": 4.559099151320581e-05, + "loss": 2.4639, + "step": 1780500 + }, + { + "epoch": 8.82, + "learning_rate": 4.558975540395258e-05, + "loss": 2.4533, + "step": 1781000 + }, + { + "epoch": 8.83, + "learning_rate": 4.558851929469935e-05, + "loss": 2.4545, + "step": 1781500 + }, + { + "epoch": 8.83, + "learning_rate": 4.5587280708273265e-05, + "loss": 2.4481, + "step": 1782000 + }, + { + "epoch": 8.83, + "learning_rate": 4.5586042121847175e-05, + "loss": 2.4613, + "step": 1782500 + }, + { + "epoch": 8.83, + "learning_rate": 4.558480353542109e-05, + "loss": 2.4775, + "step": 1783000 + }, + { + "epoch": 8.84, + "learning_rate": 4.558356494899501e-05, + "loss": 2.4433, + "step": 1783500 + }, + { + "epoch": 8.84, + "learning_rate": 4.5582326362568926e-05, + "loss": 2.4653, + "step": 1784000 + }, + { + "epoch": 8.84, + "learning_rate": 4.558108777614284e-05, + "loss": 2.4877, + "step": 1784500 + }, + { + "epoch": 8.84, + "learning_rate": 4.557984918971676e-05, + "loss": 2.4707, + "step": 1785000 + }, + { + "epoch": 8.85, + "learning_rate": 4.557861060329068e-05, + "loss": 2.4879, + "step": 1785500 + }, + { + "epoch": 8.85, + "learning_rate": 4.5577372016864594e-05, + "loss": 2.4454, + "step": 1786000 + }, + { + "epoch": 8.85, + "learning_rate": 4.557613343043851e-05, + "loss": 2.458, + "step": 1786500 + }, + { + "epoch": 8.85, + "learning_rate": 4.557489732118528e-05, + "loss": 2.4683, + "step": 1787000 + }, + { + "epoch": 8.86, + "learning_rate": 4.5573658734759196e-05, + "loss": 2.4623, + "step": 1787500 + }, + { + "epoch": 8.86, + "learning_rate": 4.5572420148333113e-05, + "loss": 2.4821, + "step": 1788000 + }, + { + "epoch": 8.86, + "learning_rate": 4.557118156190703e-05, + "loss": 2.456, + "step": 1788500 + }, + { + "epoch": 8.86, + "learning_rate": 4.556994297548095e-05, + "loss": 2.4476, + "step": 1789000 + }, + { + "epoch": 8.87, + "learning_rate": 4.5568704389054864e-05, + "loss": 2.4679, + "step": 1789500 + }, + { + "epoch": 8.87, + "learning_rate": 4.556746580262878e-05, + "loss": 2.4734, + "step": 1790000 + }, + { + "epoch": 8.87, + "learning_rate": 4.55662272162027e-05, + "loss": 2.4734, + "step": 1790500 + }, + { + "epoch": 8.87, + "learning_rate": 4.556498862977661e-05, + "loss": 2.4945, + "step": 1791000 + }, + { + "epoch": 8.88, + "learning_rate": 4.5563750043350525e-05, + "loss": 2.4307, + "step": 1791500 + }, + { + "epoch": 8.88, + "learning_rate": 4.5562513934097294e-05, + "loss": 2.4534, + "step": 1792000 + }, + { + "epoch": 8.88, + "learning_rate": 4.556127534767121e-05, + "loss": 2.4577, + "step": 1792500 + }, + { + "epoch": 8.88, + "learning_rate": 4.556003676124513e-05, + "loss": 2.4634, + "step": 1793000 + }, + { + "epoch": 8.89, + "learning_rate": 4.5558798174819045e-05, + "loss": 2.4808, + "step": 1793500 + }, + { + "epoch": 8.89, + "learning_rate": 4.555755958839296e-05, + "loss": 2.4439, + "step": 1794000 + }, + { + "epoch": 8.89, + "learning_rate": 4.555632100196688e-05, + "loss": 2.4453, + "step": 1794500 + }, + { + "epoch": 8.89, + "learning_rate": 4.555508489271365e-05, + "loss": 2.442, + "step": 1795000 + }, + { + "epoch": 8.9, + "learning_rate": 4.5553846306287564e-05, + "loss": 2.4453, + "step": 1795500 + }, + { + "epoch": 8.9, + "learning_rate": 4.555260771986148e-05, + "loss": 2.4428, + "step": 1796000 + }, + { + "epoch": 8.9, + "learning_rate": 4.555137161060824e-05, + "loss": 2.48, + "step": 1796500 + }, + { + "epoch": 8.9, + "learning_rate": 4.555013302418216e-05, + "loss": 2.4658, + "step": 1797000 + }, + { + "epoch": 8.91, + "learning_rate": 4.554889443775608e-05, + "loss": 2.4606, + "step": 1797500 + }, + { + "epoch": 8.91, + "learning_rate": 4.5547655851329994e-05, + "loss": 2.449, + "step": 1798000 + }, + { + "epoch": 8.91, + "learning_rate": 4.554641726490391e-05, + "loss": 2.4647, + "step": 1798500 + }, + { + "epoch": 8.91, + "learning_rate": 4.554517867847783e-05, + "loss": 2.4644, + "step": 1799000 + }, + { + "epoch": 8.92, + "learning_rate": 4.5543940092051745e-05, + "loss": 2.4737, + "step": 1799500 + }, + { + "epoch": 8.92, + "learning_rate": 4.554270150562566e-05, + "loss": 2.491, + "step": 1800000 + }, + { + "epoch": 8.92, + "learning_rate": 4.554146291919958e-05, + "loss": 2.4842, + "step": 1800500 + }, + { + "epoch": 8.92, + "learning_rate": 4.55402292871192e-05, + "loss": 2.4706, + "step": 1801000 + }, + { + "epoch": 8.93, + "learning_rate": 4.553899070069311e-05, + "loss": 2.4853, + "step": 1801500 + }, + { + "epoch": 8.93, + "learning_rate": 4.5537752114267026e-05, + "loss": 2.4699, + "step": 1802000 + }, + { + "epoch": 8.93, + "learning_rate": 4.5536513527840943e-05, + "loss": 2.4815, + "step": 1802500 + }, + { + "epoch": 8.93, + "learning_rate": 4.553527494141486e-05, + "loss": 2.458, + "step": 1803000 + }, + { + "epoch": 8.94, + "learning_rate": 4.553403635498878e-05, + "loss": 2.4418, + "step": 1803500 + }, + { + "epoch": 8.94, + "learning_rate": 4.5532797768562694e-05, + "loss": 2.4691, + "step": 1804000 + }, + { + "epoch": 8.94, + "learning_rate": 4.553155918213661e-05, + "loss": 2.4687, + "step": 1804500 + }, + { + "epoch": 8.94, + "learning_rate": 4.553032059571053e-05, + "loss": 2.4617, + "step": 1805000 + }, + { + "epoch": 8.95, + "learning_rate": 4.55290844864573e-05, + "loss": 2.491, + "step": 1805500 + }, + { + "epoch": 8.95, + "learning_rate": 4.5527845900031214e-05, + "loss": 2.4481, + "step": 1806000 + }, + { + "epoch": 8.95, + "learning_rate": 4.552660731360513e-05, + "loss": 2.447, + "step": 1806500 + }, + { + "epoch": 8.95, + "learning_rate": 4.552536872717905e-05, + "loss": 2.4697, + "step": 1807000 + }, + { + "epoch": 8.95, + "learning_rate": 4.5524132617925816e-05, + "loss": 2.4696, + "step": 1807500 + }, + { + "epoch": 8.96, + "learning_rate": 4.5522894031499727e-05, + "loss": 2.4883, + "step": 1808000 + }, + { + "epoch": 8.96, + "learning_rate": 4.5521655445073644e-05, + "loss": 2.429, + "step": 1808500 + }, + { + "epoch": 8.96, + "learning_rate": 4.552041685864756e-05, + "loss": 2.4636, + "step": 1809000 + }, + { + "epoch": 8.96, + "learning_rate": 4.5519180749394336e-05, + "loss": 2.4663, + "step": 1809500 + }, + { + "epoch": 8.97, + "learning_rate": 4.5517944640141105e-05, + "loss": 2.4526, + "step": 1810000 + }, + { + "epoch": 8.97, + "learning_rate": 4.551670605371502e-05, + "loss": 2.4824, + "step": 1810500 + }, + { + "epoch": 8.97, + "learning_rate": 4.551546746728894e-05, + "loss": 2.4671, + "step": 1811000 + }, + { + "epoch": 8.97, + "learning_rate": 4.5514228880862856e-05, + "loss": 2.4767, + "step": 1811500 + }, + { + "epoch": 8.98, + "learning_rate": 4.551299029443677e-05, + "loss": 2.4661, + "step": 1812000 + }, + { + "epoch": 8.98, + "learning_rate": 4.551175170801068e-05, + "loss": 2.4633, + "step": 1812500 + }, + { + "epoch": 8.98, + "learning_rate": 4.55105131215846e-05, + "loss": 2.4379, + "step": 1813000 + }, + { + "epoch": 8.98, + "learning_rate": 4.5509274535158517e-05, + "loss": 2.4667, + "step": 1813500 + }, + { + "epoch": 8.99, + "learning_rate": 4.5508035948732434e-05, + "loss": 2.4808, + "step": 1814000 + }, + { + "epoch": 8.99, + "learning_rate": 4.55067998394792e-05, + "loss": 2.4708, + "step": 1814500 + }, + { + "epoch": 8.99, + "learning_rate": 4.550556125305312e-05, + "loss": 2.4644, + "step": 1815000 + }, + { + "epoch": 8.99, + "learning_rate": 4.5504322666627036e-05, + "loss": 2.4861, + "step": 1815500 + }, + { + "epoch": 9.0, + "learning_rate": 4.550308408020095e-05, + "loss": 2.4598, + "step": 1816000 + }, + { + "epoch": 9.0, + "learning_rate": 4.550184549377487e-05, + "loss": 2.4818, + "step": 1816500 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.6403843342409302, + "eval_accuracy_mlm": 0.5934790457919371, + "eval_accuracy_nsp": 0.8615189108837107, + "eval_loss": 2.434657096862793, + "eval_runtime": 145.7954, + "eval_samples_per_second": 1748.745, + "eval_steps_per_second": 72.869, + "step": 1816587 + }, + { + "epoch": 9.0, + "learning_rate": 4.550060938452164e-05, + "loss": 2.424, + "step": 1817000 + }, + { + "epoch": 9.0, + "learning_rate": 4.5499370798095556e-05, + "loss": 2.426, + "step": 1817500 + }, + { + "epoch": 9.01, + "learning_rate": 4.549813221166947e-05, + "loss": 2.4208, + "step": 1818000 + }, + { + "epoch": 9.01, + "learning_rate": 4.549689362524339e-05, + "loss": 2.4265, + "step": 1818500 + }, + { + "epoch": 9.01, + "learning_rate": 4.549565751599015e-05, + "loss": 2.4424, + "step": 1819000 + }, + { + "epoch": 9.01, + "learning_rate": 4.549441892956407e-05, + "loss": 2.4181, + "step": 1819500 + }, + { + "epoch": 9.02, + "learning_rate": 4.5493180343137986e-05, + "loss": 2.4305, + "step": 1820000 + }, + { + "epoch": 9.02, + "learning_rate": 4.54919417567119e-05, + "loss": 2.4542, + "step": 1820500 + }, + { + "epoch": 9.02, + "learning_rate": 4.549070317028582e-05, + "loss": 2.4495, + "step": 1821000 + }, + { + "epoch": 9.02, + "learning_rate": 4.5489464583859736e-05, + "loss": 2.4497, + "step": 1821500 + }, + { + "epoch": 9.03, + "learning_rate": 4.5488228474606505e-05, + "loss": 2.442, + "step": 1822000 + }, + { + "epoch": 9.03, + "learning_rate": 4.548698988818042e-05, + "loss": 2.4593, + "step": 1822500 + }, + { + "epoch": 9.03, + "learning_rate": 4.548575130175434e-05, + "loss": 2.4229, + "step": 1823000 + }, + { + "epoch": 9.03, + "learning_rate": 4.5484512715328256e-05, + "loss": 2.4115, + "step": 1823500 + }, + { + "epoch": 9.04, + "learning_rate": 4.548327412890217e-05, + "loss": 2.4466, + "step": 1824000 + }, + { + "epoch": 9.04, + "learning_rate": 4.548203554247609e-05, + "loss": 2.4517, + "step": 1824500 + }, + { + "epoch": 9.04, + "learning_rate": 4.548079695605001e-05, + "loss": 2.4276, + "step": 1825000 + }, + { + "epoch": 9.04, + "learning_rate": 4.5479558369623924e-05, + "loss": 2.4327, + "step": 1825500 + }, + { + "epoch": 9.05, + "learning_rate": 4.5478319783197834e-05, + "loss": 2.4248, + "step": 1826000 + }, + { + "epoch": 9.05, + "learning_rate": 4.547708119677175e-05, + "loss": 2.4232, + "step": 1826500 + }, + { + "epoch": 9.05, + "learning_rate": 4.547584508751852e-05, + "loss": 2.4336, + "step": 1827000 + }, + { + "epoch": 9.05, + "learning_rate": 4.547460897826529e-05, + "loss": 2.4489, + "step": 1827500 + }, + { + "epoch": 9.06, + "learning_rate": 4.5473370391839205e-05, + "loss": 2.4391, + "step": 1828000 + }, + { + "epoch": 9.06, + "learning_rate": 4.5472134282585974e-05, + "loss": 2.4429, + "step": 1828500 + }, + { + "epoch": 9.06, + "learning_rate": 4.5470895696159884e-05, + "loss": 2.4328, + "step": 1829000 + }, + { + "epoch": 9.06, + "learning_rate": 4.54696571097338e-05, + "loss": 2.4022, + "step": 1829500 + }, + { + "epoch": 9.07, + "learning_rate": 4.546841852330772e-05, + "loss": 2.4167, + "step": 1830000 + }, + { + "epoch": 9.07, + "learning_rate": 4.5467179936881635e-05, + "loss": 2.4667, + "step": 1830500 + }, + { + "epoch": 9.07, + "learning_rate": 4.546594135045555e-05, + "loss": 2.4303, + "step": 1831000 + }, + { + "epoch": 9.07, + "learning_rate": 4.546470276402947e-05, + "loss": 2.4245, + "step": 1831500 + }, + { + "epoch": 9.08, + "learning_rate": 4.5463464177603386e-05, + "loss": 2.4439, + "step": 1832000 + }, + { + "epoch": 9.08, + "learning_rate": 4.54622255911773e-05, + "loss": 2.447, + "step": 1832500 + }, + { + "epoch": 9.08, + "learning_rate": 4.546098700475122e-05, + "loss": 2.4434, + "step": 1833000 + }, + { + "epoch": 9.08, + "learning_rate": 4.5459748418325137e-05, + "loss": 2.4311, + "step": 1833500 + }, + { + "epoch": 9.09, + "learning_rate": 4.5458509831899053e-05, + "loss": 2.4513, + "step": 1834000 + }, + { + "epoch": 9.09, + "learning_rate": 4.545727372264582e-05, + "loss": 2.4607, + "step": 1834500 + }, + { + "epoch": 9.09, + "learning_rate": 4.545603513621974e-05, + "loss": 2.4291, + "step": 1835000 + }, + { + "epoch": 9.09, + "learning_rate": 4.5454796549793656e-05, + "loss": 2.4439, + "step": 1835500 + }, + { + "epoch": 9.1, + "learning_rate": 4.545355796336757e-05, + "loss": 2.4741, + "step": 1836000 + }, + { + "epoch": 9.1, + "learning_rate": 4.5452321854114335e-05, + "loss": 2.4591, + "step": 1836500 + }, + { + "epoch": 9.1, + "learning_rate": 4.545108326768825e-05, + "loss": 2.4535, + "step": 1837000 + }, + { + "epoch": 9.1, + "learning_rate": 4.544984468126217e-05, + "loss": 2.4464, + "step": 1837500 + }, + { + "epoch": 9.11, + "learning_rate": 4.5448606094836086e-05, + "loss": 2.4405, + "step": 1838000 + }, + { + "epoch": 9.11, + "learning_rate": 4.544736750841e-05, + "loss": 2.4423, + "step": 1838500 + }, + { + "epoch": 9.11, + "learning_rate": 4.544612892198392e-05, + "loss": 2.4383, + "step": 1839000 + }, + { + "epoch": 9.11, + "learning_rate": 4.544489033555784e-05, + "loss": 2.4421, + "step": 1839500 + }, + { + "epoch": 9.12, + "learning_rate": 4.5443651749131754e-05, + "loss": 2.4476, + "step": 1840000 + }, + { + "epoch": 9.12, + "learning_rate": 4.544241316270567e-05, + "loss": 2.4121, + "step": 1840500 + }, + { + "epoch": 9.12, + "learning_rate": 4.544117457627959e-05, + "loss": 2.4078, + "step": 1841000 + }, + { + "epoch": 9.12, + "learning_rate": 4.5439935989853504e-05, + "loss": 2.4349, + "step": 1841500 + }, + { + "epoch": 9.13, + "learning_rate": 4.543869988060027e-05, + "loss": 2.4562, + "step": 1842000 + }, + { + "epoch": 9.13, + "learning_rate": 4.543746129417419e-05, + "loss": 2.404, + "step": 1842500 + }, + { + "epoch": 9.13, + "learning_rate": 4.543622270774811e-05, + "loss": 2.4584, + "step": 1843000 + }, + { + "epoch": 9.13, + "learning_rate": 4.543498659849487e-05, + "loss": 2.4404, + "step": 1843500 + }, + { + "epoch": 9.14, + "learning_rate": 4.5433748012068786e-05, + "loss": 2.4358, + "step": 1844000 + }, + { + "epoch": 9.14, + "learning_rate": 4.54325094256427e-05, + "loss": 2.4061, + "step": 1844500 + }, + { + "epoch": 9.14, + "learning_rate": 4.543127083921662e-05, + "loss": 2.4233, + "step": 1845000 + }, + { + "epoch": 9.14, + "learning_rate": 4.543003472996339e-05, + "loss": 2.4524, + "step": 1845500 + }, + { + "epoch": 9.15, + "learning_rate": 4.542879862071016e-05, + "loss": 2.4392, + "step": 1846000 + }, + { + "epoch": 9.15, + "learning_rate": 4.5427562511456926e-05, + "loss": 2.4372, + "step": 1846500 + }, + { + "epoch": 9.15, + "learning_rate": 4.542632392503084e-05, + "loss": 2.4474, + "step": 1847000 + }, + { + "epoch": 9.15, + "learning_rate": 4.542508533860476e-05, + "loss": 2.4468, + "step": 1847500 + }, + { + "epoch": 9.16, + "learning_rate": 4.542384675217868e-05, + "loss": 2.4432, + "step": 1848000 + }, + { + "epoch": 9.16, + "learning_rate": 4.5422608165752594e-05, + "loss": 2.4501, + "step": 1848500 + }, + { + "epoch": 9.16, + "learning_rate": 4.5421369579326504e-05, + "loss": 2.4495, + "step": 1849000 + }, + { + "epoch": 9.16, + "learning_rate": 4.542013099290042e-05, + "loss": 2.4418, + "step": 1849500 + }, + { + "epoch": 9.17, + "learning_rate": 4.541889240647434e-05, + "loss": 2.4484, + "step": 1850000 + }, + { + "epoch": 9.17, + "learning_rate": 4.5417653820048255e-05, + "loss": 2.4385, + "step": 1850500 + }, + { + "epoch": 9.17, + "learning_rate": 4.541641523362217e-05, + "loss": 2.4258, + "step": 1851000 + }, + { + "epoch": 9.17, + "learning_rate": 4.541517664719609e-05, + "loss": 2.4504, + "step": 1851500 + }, + { + "epoch": 9.18, + "learning_rate": 4.5413938060770006e-05, + "loss": 2.458, + "step": 1852000 + }, + { + "epoch": 9.18, + "learning_rate": 4.541269947434392e-05, + "loss": 2.4314, + "step": 1852500 + }, + { + "epoch": 9.18, + "learning_rate": 4.541146088791784e-05, + "loss": 2.436, + "step": 1853000 + }, + { + "epoch": 9.18, + "learning_rate": 4.5410222301491757e-05, + "loss": 2.426, + "step": 1853500 + }, + { + "epoch": 9.19, + "learning_rate": 4.5408983715065673e-05, + "loss": 2.435, + "step": 1854000 + }, + { + "epoch": 9.19, + "learning_rate": 4.540774512863959e-05, + "loss": 2.4389, + "step": 1854500 + }, + { + "epoch": 9.19, + "learning_rate": 4.540650901938636e-05, + "loss": 2.461, + "step": 1855000 + }, + { + "epoch": 9.19, + "learning_rate": 4.5405270432960276e-05, + "loss": 2.4472, + "step": 1855500 + }, + { + "epoch": 9.2, + "learning_rate": 4.540403432370704e-05, + "loss": 2.4388, + "step": 1856000 + }, + { + "epoch": 9.2, + "learning_rate": 4.5402795737280955e-05, + "loss": 2.4427, + "step": 1856500 + }, + { + "epoch": 9.2, + "learning_rate": 4.540155715085487e-05, + "loss": 2.4563, + "step": 1857000 + }, + { + "epoch": 9.2, + "learning_rate": 4.540032104160165e-05, + "loss": 2.4632, + "step": 1857500 + }, + { + "epoch": 9.21, + "learning_rate": 4.539908245517556e-05, + "loss": 2.4482, + "step": 1858000 + }, + { + "epoch": 9.21, + "learning_rate": 4.5397843868749475e-05, + "loss": 2.4332, + "step": 1858500 + }, + { + "epoch": 9.21, + "learning_rate": 4.539660528232339e-05, + "loss": 2.4423, + "step": 1859000 + }, + { + "epoch": 9.21, + "learning_rate": 4.539536669589731e-05, + "loss": 2.4584, + "step": 1859500 + }, + { + "epoch": 9.22, + "learning_rate": 4.5394128109471225e-05, + "loss": 2.4123, + "step": 1860000 + }, + { + "epoch": 9.22, + "learning_rate": 4.539288952304514e-05, + "loss": 2.4395, + "step": 1860500 + }, + { + "epoch": 9.22, + "learning_rate": 4.539165093661906e-05, + "loss": 2.4833, + "step": 1861000 + }, + { + "epoch": 9.22, + "learning_rate": 4.5390412350192976e-05, + "loss": 2.4595, + "step": 1861500 + }, + { + "epoch": 9.22, + "learning_rate": 4.538917376376689e-05, + "loss": 2.4364, + "step": 1862000 + }, + { + "epoch": 9.23, + "learning_rate": 4.538793517734081e-05, + "loss": 2.4205, + "step": 1862500 + }, + { + "epoch": 9.23, + "learning_rate": 4.538669659091472e-05, + "loss": 2.4233, + "step": 1863000 + }, + { + "epoch": 9.23, + "learning_rate": 4.538545800448864e-05, + "loss": 2.4394, + "step": 1863500 + }, + { + "epoch": 9.23, + "learning_rate": 4.5384219418062554e-05, + "loss": 2.4386, + "step": 1864000 + }, + { + "epoch": 9.24, + "learning_rate": 4.538298330880932e-05, + "loss": 2.4466, + "step": 1864500 + }, + { + "epoch": 9.24, + "learning_rate": 4.538174472238324e-05, + "loss": 2.4483, + "step": 1865000 + }, + { + "epoch": 9.24, + "learning_rate": 4.538050613595716e-05, + "loss": 2.454, + "step": 1865500 + }, + { + "epoch": 9.24, + "learning_rate": 4.5379267549531074e-05, + "loss": 2.4675, + "step": 1866000 + }, + { + "epoch": 9.25, + "learning_rate": 4.537803144027784e-05, + "loss": 2.4575, + "step": 1866500 + }, + { + "epoch": 9.25, + "learning_rate": 4.537679285385176e-05, + "loss": 2.4484, + "step": 1867000 + }, + { + "epoch": 9.25, + "learning_rate": 4.5375554267425676e-05, + "loss": 2.4436, + "step": 1867500 + }, + { + "epoch": 9.25, + "learning_rate": 4.537431568099959e-05, + "loss": 2.4271, + "step": 1868000 + }, + { + "epoch": 9.26, + "learning_rate": 4.537307709457351e-05, + "loss": 2.4645, + "step": 1868500 + }, + { + "epoch": 9.26, + "learning_rate": 4.537183850814743e-05, + "loss": 2.4648, + "step": 1869000 + }, + { + "epoch": 9.26, + "learning_rate": 4.537059992172134e-05, + "loss": 2.4415, + "step": 1869500 + }, + { + "epoch": 9.26, + "learning_rate": 4.5369363812468106e-05, + "loss": 2.4357, + "step": 1870000 + }, + { + "epoch": 9.27, + "learning_rate": 4.536812522604202e-05, + "loss": 2.4403, + "step": 1870500 + }, + { + "epoch": 9.27, + "learning_rate": 4.536688663961594e-05, + "loss": 2.4475, + "step": 1871000 + }, + { + "epoch": 9.27, + "learning_rate": 4.536564805318986e-05, + "loss": 2.4376, + "step": 1871500 + }, + { + "epoch": 9.27, + "learning_rate": 4.5364411943936626e-05, + "loss": 2.4186, + "step": 1872000 + }, + { + "epoch": 9.28, + "learning_rate": 4.536317335751054e-05, + "loss": 2.4547, + "step": 1872500 + }, + { + "epoch": 9.28, + "learning_rate": 4.536193477108446e-05, + "loss": 2.4273, + "step": 1873000 + }, + { + "epoch": 9.28, + "learning_rate": 4.5360696184658376e-05, + "loss": 2.453, + "step": 1873500 + }, + { + "epoch": 9.28, + "learning_rate": 4.5359457598232293e-05, + "loss": 2.4315, + "step": 1874000 + }, + { + "epoch": 9.29, + "learning_rate": 4.535821901180621e-05, + "loss": 2.4556, + "step": 1874500 + }, + { + "epoch": 9.29, + "learning_rate": 4.535698042538013e-05, + "loss": 2.4386, + "step": 1875000 + }, + { + "epoch": 9.29, + "learning_rate": 4.5355741838954044e-05, + "loss": 2.4383, + "step": 1875500 + }, + { + "epoch": 9.29, + "learning_rate": 4.535450325252796e-05, + "loss": 2.4127, + "step": 1876000 + }, + { + "epoch": 9.3, + "learning_rate": 4.535326466610187e-05, + "loss": 2.4339, + "step": 1876500 + }, + { + "epoch": 9.3, + "learning_rate": 4.535202607967579e-05, + "loss": 2.4542, + "step": 1877000 + }, + { + "epoch": 9.3, + "learning_rate": 4.5350787493249705e-05, + "loss": 2.4275, + "step": 1877500 + }, + { + "epoch": 9.3, + "learning_rate": 4.5349551383996474e-05, + "loss": 2.4308, + "step": 1878000 + }, + { + "epoch": 9.31, + "learning_rate": 4.534831279757039e-05, + "loss": 2.4147, + "step": 1878500 + }, + { + "epoch": 9.31, + "learning_rate": 4.534707421114431e-05, + "loss": 2.4667, + "step": 1879000 + }, + { + "epoch": 9.31, + "learning_rate": 4.5345835624718225e-05, + "loss": 2.4236, + "step": 1879500 + }, + { + "epoch": 9.31, + "learning_rate": 4.534459703829214e-05, + "loss": 2.4343, + "step": 1880000 + }, + { + "epoch": 9.32, + "learning_rate": 4.534335845186606e-05, + "loss": 2.4451, + "step": 1880500 + }, + { + "epoch": 9.32, + "learning_rate": 4.5342119865439975e-05, + "loss": 2.4541, + "step": 1881000 + }, + { + "epoch": 9.32, + "learning_rate": 4.534088127901389e-05, + "loss": 2.4212, + "step": 1881500 + }, + { + "epoch": 9.32, + "learning_rate": 4.533964516976066e-05, + "loss": 2.4321, + "step": 1882000 + }, + { + "epoch": 9.33, + "learning_rate": 4.533840658333458e-05, + "loss": 2.4481, + "step": 1882500 + }, + { + "epoch": 9.33, + "learning_rate": 4.533716799690849e-05, + "loss": 2.4062, + "step": 1883000 + }, + { + "epoch": 9.33, + "learning_rate": 4.5335929410482405e-05, + "loss": 2.4301, + "step": 1883500 + }, + { + "epoch": 9.33, + "learning_rate": 4.533469082405632e-05, + "loss": 2.4163, + "step": 1884000 + }, + { + "epoch": 9.34, + "learning_rate": 4.533345471480309e-05, + "loss": 2.4449, + "step": 1884500 + }, + { + "epoch": 9.34, + "learning_rate": 4.533221612837701e-05, + "loss": 2.4703, + "step": 1885000 + }, + { + "epoch": 9.34, + "learning_rate": 4.5330977541950925e-05, + "loss": 2.4336, + "step": 1885500 + }, + { + "epoch": 9.34, + "learning_rate": 4.5329741432697694e-05, + "loss": 2.4546, + "step": 1886000 + }, + { + "epoch": 9.35, + "learning_rate": 4.532850284627161e-05, + "loss": 2.4623, + "step": 1886500 + }, + { + "epoch": 9.35, + "learning_rate": 4.532726425984553e-05, + "loss": 2.4571, + "step": 1887000 + }, + { + "epoch": 9.35, + "learning_rate": 4.5326025673419444e-05, + "loss": 2.4461, + "step": 1887500 + }, + { + "epoch": 9.35, + "learning_rate": 4.532478708699336e-05, + "loss": 2.4349, + "step": 1888000 + }, + { + "epoch": 9.36, + "learning_rate": 4.532354850056728e-05, + "loss": 2.4261, + "step": 1888500 + }, + { + "epoch": 9.36, + "learning_rate": 4.5322309914141195e-05, + "loss": 2.4164, + "step": 1889000 + }, + { + "epoch": 9.36, + "learning_rate": 4.532107132771511e-05, + "loss": 2.462, + "step": 1889500 + }, + { + "epoch": 9.36, + "learning_rate": 4.5319835218461874e-05, + "loss": 2.4433, + "step": 1890000 + }, + { + "epoch": 9.37, + "learning_rate": 4.531859663203579e-05, + "loss": 2.4574, + "step": 1890500 + }, + { + "epoch": 9.37, + "learning_rate": 4.531735804560971e-05, + "loss": 2.4477, + "step": 1891000 + }, + { + "epoch": 9.37, + "learning_rate": 4.5316119459183625e-05, + "loss": 2.4444, + "step": 1891500 + }, + { + "epoch": 9.37, + "learning_rate": 4.531488087275754e-05, + "loss": 2.4569, + "step": 1892000 + }, + { + "epoch": 9.38, + "learning_rate": 4.531364476350431e-05, + "loss": 2.4576, + "step": 1892500 + }, + { + "epoch": 9.38, + "learning_rate": 4.531240617707823e-05, + "loss": 2.4366, + "step": 1893000 + }, + { + "epoch": 9.38, + "learning_rate": 4.5311167590652145e-05, + "loss": 2.455, + "step": 1893500 + }, + { + "epoch": 9.38, + "learning_rate": 4.530992900422606e-05, + "loss": 2.4634, + "step": 1894000 + }, + { + "epoch": 9.39, + "learning_rate": 4.530869041779998e-05, + "loss": 2.4578, + "step": 1894500 + }, + { + "epoch": 9.39, + "learning_rate": 4.530745430854674e-05, + "loss": 2.4391, + "step": 1895000 + }, + { + "epoch": 9.39, + "learning_rate": 4.530621572212066e-05, + "loss": 2.4376, + "step": 1895500 + }, + { + "epoch": 9.39, + "learning_rate": 4.5304977135694574e-05, + "loss": 2.4466, + "step": 1896000 + }, + { + "epoch": 9.4, + "learning_rate": 4.530373854926849e-05, + "loss": 2.429, + "step": 1896500 + }, + { + "epoch": 9.4, + "learning_rate": 4.530249996284241e-05, + "loss": 2.4456, + "step": 1897000 + }, + { + "epoch": 9.4, + "learning_rate": 4.5301261376416325e-05, + "loss": 2.4541, + "step": 1897500 + }, + { + "epoch": 9.4, + "learning_rate": 4.530002278999024e-05, + "loss": 2.4651, + "step": 1898000 + }, + { + "epoch": 9.41, + "learning_rate": 4.529878668073701e-05, + "loss": 2.4453, + "step": 1898500 + }, + { + "epoch": 9.41, + "learning_rate": 4.529754809431093e-05, + "loss": 2.4451, + "step": 1899000 + }, + { + "epoch": 9.41, + "learning_rate": 4.5296309507884845e-05, + "loss": 2.4266, + "step": 1899500 + }, + { + "epoch": 9.41, + "learning_rate": 4.529507092145876e-05, + "loss": 2.455, + "step": 1900000 + }, + { + "epoch": 9.42, + "learning_rate": 4.529383233503268e-05, + "loss": 2.4657, + "step": 1900500 + }, + { + "epoch": 9.42, + "learning_rate": 4.5292593748606595e-05, + "loss": 2.4469, + "step": 1901000 + }, + { + "epoch": 9.42, + "learning_rate": 4.529135516218051e-05, + "loss": 2.4411, + "step": 1901500 + }, + { + "epoch": 9.42, + "learning_rate": 4.5290121530100126e-05, + "loss": 2.4473, + "step": 1902000 + }, + { + "epoch": 9.43, + "learning_rate": 4.528888294367404e-05, + "loss": 2.4457, + "step": 1902500 + }, + { + "epoch": 9.43, + "learning_rate": 4.528764435724796e-05, + "loss": 2.4583, + "step": 1903000 + }, + { + "epoch": 9.43, + "learning_rate": 4.528640577082188e-05, + "loss": 2.4524, + "step": 1903500 + }, + { + "epoch": 9.43, + "learning_rate": 4.5285167184395794e-05, + "loss": 2.4532, + "step": 1904000 + }, + { + "epoch": 9.44, + "learning_rate": 4.528392859796971e-05, + "loss": 2.4809, + "step": 1904500 + }, + { + "epoch": 9.44, + "learning_rate": 4.528269001154363e-05, + "loss": 2.4212, + "step": 1905000 + }, + { + "epoch": 9.44, + "learning_rate": 4.5281451425117545e-05, + "loss": 2.4673, + "step": 1905500 + }, + { + "epoch": 9.44, + "learning_rate": 4.528021283869146e-05, + "loss": 2.4051, + "step": 1906000 + }, + { + "epoch": 9.45, + "learning_rate": 4.527897425226538e-05, + "loss": 2.4195, + "step": 1906500 + }, + { + "epoch": 9.45, + "learning_rate": 4.5277735665839296e-05, + "loss": 2.4623, + "step": 1907000 + }, + { + "epoch": 9.45, + "learning_rate": 4.527649955658606e-05, + "loss": 2.4388, + "step": 1907500 + }, + { + "epoch": 9.45, + "learning_rate": 4.5275260970159975e-05, + "loss": 2.4585, + "step": 1908000 + }, + { + "epoch": 9.46, + "learning_rate": 4.527402238373389e-05, + "loss": 2.4361, + "step": 1908500 + }, + { + "epoch": 9.46, + "learning_rate": 4.527278379730781e-05, + "loss": 2.442, + "step": 1909000 + }, + { + "epoch": 9.46, + "learning_rate": 4.5271545210881725e-05, + "loss": 2.4635, + "step": 1909500 + }, + { + "epoch": 9.46, + "learning_rate": 4.527030662445564e-05, + "loss": 2.4437, + "step": 1910000 + }, + { + "epoch": 9.47, + "learning_rate": 4.526906803802956e-05, + "loss": 2.4507, + "step": 1910500 + }, + { + "epoch": 9.47, + "learning_rate": 4.5267829451603476e-05, + "loss": 2.4323, + "step": 1911000 + }, + { + "epoch": 9.47, + "learning_rate": 4.526659086517739e-05, + "loss": 2.45, + "step": 1911500 + }, + { + "epoch": 9.47, + "learning_rate": 4.526535227875131e-05, + "loss": 2.4588, + "step": 1912000 + }, + { + "epoch": 9.48, + "learning_rate": 4.526411369232523e-05, + "loss": 2.4592, + "step": 1912500 + }, + { + "epoch": 9.48, + "learning_rate": 4.5262875105899144e-05, + "loss": 2.4493, + "step": 1913000 + }, + { + "epoch": 9.48, + "learning_rate": 4.526163899664591e-05, + "loss": 2.4565, + "step": 1913500 + }, + { + "epoch": 9.48, + "learning_rate": 4.5260402887392675e-05, + "loss": 2.4562, + "step": 1914000 + }, + { + "epoch": 9.49, + "learning_rate": 4.525916430096659e-05, + "loss": 2.463, + "step": 1914500 + }, + { + "epoch": 9.49, + "learning_rate": 4.525792819171336e-05, + "loss": 2.4384, + "step": 1915000 + }, + { + "epoch": 9.49, + "learning_rate": 4.525668960528728e-05, + "loss": 2.429, + "step": 1915500 + }, + { + "epoch": 9.49, + "learning_rate": 4.5255451018861194e-05, + "loss": 2.4265, + "step": 1916000 + }, + { + "epoch": 9.5, + "learning_rate": 4.525421243243511e-05, + "loss": 2.457, + "step": 1916500 + }, + { + "epoch": 9.5, + "learning_rate": 4.525297384600903e-05, + "loss": 2.4622, + "step": 1917000 + }, + { + "epoch": 9.5, + "learning_rate": 4.52517377367558e-05, + "loss": 2.4689, + "step": 1917500 + }, + { + "epoch": 9.5, + "learning_rate": 4.5250499150329714e-05, + "loss": 2.4288, + "step": 1918000 + }, + { + "epoch": 9.5, + "learning_rate": 4.524926304107648e-05, + "loss": 2.454, + "step": 1918500 + }, + { + "epoch": 9.51, + "learning_rate": 4.52480244546504e-05, + "loss": 2.4701, + "step": 1919000 + }, + { + "epoch": 9.51, + "learning_rate": 4.5246785868224317e-05, + "loss": 2.4518, + "step": 1919500 + }, + { + "epoch": 9.51, + "learning_rate": 4.5245547281798233e-05, + "loss": 2.4624, + "step": 1920000 + }, + { + "epoch": 9.51, + "learning_rate": 4.5244308695372144e-05, + "loss": 2.4445, + "step": 1920500 + }, + { + "epoch": 9.52, + "learning_rate": 4.524307010894606e-05, + "loss": 2.4495, + "step": 1921000 + }, + { + "epoch": 9.52, + "learning_rate": 4.524183152251998e-05, + "loss": 2.4558, + "step": 1921500 + }, + { + "epoch": 9.52, + "learning_rate": 4.5240592936093894e-05, + "loss": 2.4547, + "step": 1922000 + }, + { + "epoch": 9.52, + "learning_rate": 4.523935682684067e-05, + "loss": 2.441, + "step": 1922500 + }, + { + "epoch": 9.53, + "learning_rate": 4.523811824041459e-05, + "loss": 2.4597, + "step": 1923000 + }, + { + "epoch": 9.53, + "learning_rate": 4.5236879653988504e-05, + "loss": 2.444, + "step": 1923500 + }, + { + "epoch": 9.53, + "learning_rate": 4.523564602190812e-05, + "loss": 2.4567, + "step": 1924000 + }, + { + "epoch": 9.53, + "learning_rate": 4.5234407435482035e-05, + "loss": 2.4353, + "step": 1924500 + }, + { + "epoch": 9.54, + "learning_rate": 4.523316884905595e-05, + "loss": 2.4716, + "step": 1925000 + }, + { + "epoch": 9.54, + "learning_rate": 4.523193026262987e-05, + "loss": 2.4343, + "step": 1925500 + }, + { + "epoch": 9.54, + "learning_rate": 4.5230691676203785e-05, + "loss": 2.4463, + "step": 1926000 + }, + { + "epoch": 9.54, + "learning_rate": 4.52294530897777e-05, + "loss": 2.438, + "step": 1926500 + }, + { + "epoch": 9.55, + "learning_rate": 4.522821450335162e-05, + "loss": 2.4469, + "step": 1927000 + }, + { + "epoch": 9.55, + "learning_rate": 4.5226975916925536e-05, + "loss": 2.4329, + "step": 1927500 + }, + { + "epoch": 9.55, + "learning_rate": 4.522573733049945e-05, + "loss": 2.4451, + "step": 1928000 + }, + { + "epoch": 9.55, + "learning_rate": 4.522449874407337e-05, + "loss": 2.4711, + "step": 1928500 + }, + { + "epoch": 9.56, + "learning_rate": 4.522326015764729e-05, + "loss": 2.4351, + "step": 1929000 + }, + { + "epoch": 9.56, + "learning_rate": 4.5222021571221204e-05, + "loss": 2.4218, + "step": 1929500 + }, + { + "epoch": 9.56, + "learning_rate": 4.5220782984795114e-05, + "loss": 2.437, + "step": 1930000 + }, + { + "epoch": 9.56, + "learning_rate": 4.521954439836903e-05, + "loss": 2.4446, + "step": 1930500 + }, + { + "epoch": 9.57, + "learning_rate": 4.52183082891158e-05, + "loss": 2.4568, + "step": 1931000 + }, + { + "epoch": 9.57, + "learning_rate": 4.521706970268972e-05, + "loss": 2.4456, + "step": 1931500 + }, + { + "epoch": 9.57, + "learning_rate": 4.5215831116263634e-05, + "loss": 2.435, + "step": 1932000 + }, + { + "epoch": 9.57, + "learning_rate": 4.521459252983755e-05, + "loss": 2.4542, + "step": 1932500 + }, + { + "epoch": 9.58, + "learning_rate": 4.521335394341146e-05, + "loss": 2.4418, + "step": 1933000 + }, + { + "epoch": 9.58, + "learning_rate": 4.521211535698538e-05, + "loss": 2.4507, + "step": 1933500 + }, + { + "epoch": 9.58, + "learning_rate": 4.5210876770559295e-05, + "loss": 2.4544, + "step": 1934000 + }, + { + "epoch": 9.58, + "learning_rate": 4.520963818413321e-05, + "loss": 2.4652, + "step": 1934500 + }, + { + "epoch": 9.59, + "learning_rate": 4.520840207487999e-05, + "loss": 2.4531, + "step": 1935000 + }, + { + "epoch": 9.59, + "learning_rate": 4.5207163488453904e-05, + "loss": 2.4594, + "step": 1935500 + }, + { + "epoch": 9.59, + "learning_rate": 4.520592490202782e-05, + "loss": 2.453, + "step": 1936000 + }, + { + "epoch": 9.59, + "learning_rate": 4.5204693747120294e-05, + "loss": 2.4411, + "step": 1936500 + }, + { + "epoch": 9.6, + "learning_rate": 4.5203455160694204e-05, + "loss": 2.4588, + "step": 1937000 + }, + { + "epoch": 9.6, + "learning_rate": 4.520221657426812e-05, + "loss": 2.4649, + "step": 1937500 + }, + { + "epoch": 9.6, + "learning_rate": 4.520097798784204e-05, + "loss": 2.4384, + "step": 1938000 + }, + { + "epoch": 9.6, + "learning_rate": 4.5199739401415955e-05, + "loss": 2.4294, + "step": 1938500 + }, + { + "epoch": 9.61, + "learning_rate": 4.519850081498987e-05, + "loss": 2.4468, + "step": 1939000 + }, + { + "epoch": 9.61, + "learning_rate": 4.519726222856379e-05, + "loss": 2.451, + "step": 1939500 + }, + { + "epoch": 9.61, + "learning_rate": 4.5196023642137705e-05, + "loss": 2.4349, + "step": 1940000 + }, + { + "epoch": 9.61, + "learning_rate": 4.519478505571162e-05, + "loss": 2.439, + "step": 1940500 + }, + { + "epoch": 9.62, + "learning_rate": 4.519354646928554e-05, + "loss": 2.4513, + "step": 1941000 + }, + { + "epoch": 9.62, + "learning_rate": 4.519230788285945e-05, + "loss": 2.4451, + "step": 1941500 + }, + { + "epoch": 9.62, + "learning_rate": 4.5191069296433366e-05, + "loss": 2.4526, + "step": 1942000 + }, + { + "epoch": 9.62, + "learning_rate": 4.518983071000728e-05, + "loss": 2.4393, + "step": 1942500 + }, + { + "epoch": 9.63, + "learning_rate": 4.51885921235812e-05, + "loss": 2.4453, + "step": 1943000 + }, + { + "epoch": 9.63, + "learning_rate": 4.518735353715512e-05, + "loss": 2.4257, + "step": 1943500 + }, + { + "epoch": 9.63, + "learning_rate": 4.5186114950729034e-05, + "loss": 2.4565, + "step": 1944000 + }, + { + "epoch": 9.63, + "learning_rate": 4.51848788414758e-05, + "loss": 2.4816, + "step": 1944500 + }, + { + "epoch": 9.64, + "learning_rate": 4.518364273222257e-05, + "loss": 2.4714, + "step": 1945000 + }, + { + "epoch": 9.64, + "learning_rate": 4.518240662296934e-05, + "loss": 2.4386, + "step": 1945500 + }, + { + "epoch": 9.64, + "learning_rate": 4.518116803654326e-05, + "loss": 2.4524, + "step": 1946000 + }, + { + "epoch": 9.64, + "learning_rate": 4.517992945011717e-05, + "loss": 2.4481, + "step": 1946500 + }, + { + "epoch": 9.65, + "learning_rate": 4.5178690863691084e-05, + "loss": 2.4431, + "step": 1947000 + }, + { + "epoch": 9.65, + "learning_rate": 4.5177452277265e-05, + "loss": 2.4608, + "step": 1947500 + }, + { + "epoch": 9.65, + "learning_rate": 4.517621369083892e-05, + "loss": 2.4459, + "step": 1948000 + }, + { + "epoch": 9.65, + "learning_rate": 4.5174977581585694e-05, + "loss": 2.4347, + "step": 1948500 + }, + { + "epoch": 9.66, + "learning_rate": 4.517373899515961e-05, + "loss": 2.4307, + "step": 1949000 + }, + { + "epoch": 9.66, + "learning_rate": 4.517250040873352e-05, + "loss": 2.4474, + "step": 1949500 + }, + { + "epoch": 9.66, + "learning_rate": 4.517126182230744e-05, + "loss": 2.4829, + "step": 1950000 + }, + { + "epoch": 9.66, + "learning_rate": 4.5170023235881355e-05, + "loss": 2.4657, + "step": 1950500 + }, + { + "epoch": 9.67, + "learning_rate": 4.516878464945527e-05, + "loss": 2.4238, + "step": 1951000 + }, + { + "epoch": 9.67, + "learning_rate": 4.516754854020204e-05, + "loss": 2.4664, + "step": 1951500 + }, + { + "epoch": 9.67, + "learning_rate": 4.516630995377596e-05, + "loss": 2.4432, + "step": 1952000 + }, + { + "epoch": 9.67, + "learning_rate": 4.516507136734987e-05, + "loss": 2.4409, + "step": 1952500 + }, + { + "epoch": 9.68, + "learning_rate": 4.5163832780923785e-05, + "loss": 2.4568, + "step": 1953000 + }, + { + "epoch": 9.68, + "learning_rate": 4.51625941944977e-05, + "loss": 2.4436, + "step": 1953500 + }, + { + "epoch": 9.68, + "learning_rate": 4.516135560807162e-05, + "loss": 2.4369, + "step": 1954000 + }, + { + "epoch": 9.68, + "learning_rate": 4.5160117021645535e-05, + "loss": 2.4344, + "step": 1954500 + }, + { + "epoch": 9.69, + "learning_rate": 4.515887843521945e-05, + "loss": 2.4531, + "step": 1955000 + }, + { + "epoch": 9.69, + "learning_rate": 4.515764232596622e-05, + "loss": 2.4491, + "step": 1955500 + }, + { + "epoch": 9.69, + "learning_rate": 4.515640373954014e-05, + "loss": 2.4332, + "step": 1956000 + }, + { + "epoch": 9.69, + "learning_rate": 4.5155165153114055e-05, + "loss": 2.4424, + "step": 1956500 + }, + { + "epoch": 9.7, + "learning_rate": 4.515392656668797e-05, + "loss": 2.4608, + "step": 1957000 + }, + { + "epoch": 9.7, + "learning_rate": 4.515268798026189e-05, + "loss": 2.444, + "step": 1957500 + }, + { + "epoch": 9.7, + "learning_rate": 4.5151449393835806e-05, + "loss": 2.4751, + "step": 1958000 + }, + { + "epoch": 9.7, + "learning_rate": 4.5150213284582574e-05, + "loss": 2.4599, + "step": 1958500 + }, + { + "epoch": 9.71, + "learning_rate": 4.5148974698156485e-05, + "loss": 2.4676, + "step": 1959000 + }, + { + "epoch": 9.71, + "learning_rate": 4.51477361117304e-05, + "loss": 2.4294, + "step": 1959500 + }, + { + "epoch": 9.71, + "learning_rate": 4.514649752530432e-05, + "loss": 2.4494, + "step": 1960000 + }, + { + "epoch": 9.71, + "learning_rate": 4.5145263893223946e-05, + "loss": 2.4253, + "step": 1960500 + }, + { + "epoch": 9.72, + "learning_rate": 4.514402530679786e-05, + "loss": 2.4498, + "step": 1961000 + }, + { + "epoch": 9.72, + "learning_rate": 4.514278672037178e-05, + "loss": 2.4453, + "step": 1961500 + }, + { + "epoch": 9.72, + "learning_rate": 4.51415481339457e-05, + "loss": 2.4619, + "step": 1962000 + }, + { + "epoch": 9.72, + "learning_rate": 4.514030954751961e-05, + "loss": 2.4357, + "step": 1962500 + }, + { + "epoch": 9.73, + "learning_rate": 4.5139070961093524e-05, + "loss": 2.4439, + "step": 1963000 + }, + { + "epoch": 9.73, + "learning_rate": 4.513783485184029e-05, + "loss": 2.4647, + "step": 1963500 + }, + { + "epoch": 9.73, + "learning_rate": 4.513659626541421e-05, + "loss": 2.4701, + "step": 1964000 + }, + { + "epoch": 9.73, + "learning_rate": 4.5135357678988126e-05, + "loss": 2.4486, + "step": 1964500 + }, + { + "epoch": 9.74, + "learning_rate": 4.5134119092562043e-05, + "loss": 2.4749, + "step": 1965000 + }, + { + "epoch": 9.74, + "learning_rate": 4.513288050613596e-05, + "loss": 2.4462, + "step": 1965500 + }, + { + "epoch": 9.74, + "learning_rate": 4.513164191970988e-05, + "loss": 2.452, + "step": 1966000 + }, + { + "epoch": 9.74, + "learning_rate": 4.5130403333283794e-05, + "loss": 2.4414, + "step": 1966500 + }, + { + "epoch": 9.75, + "learning_rate": 4.512916474685771e-05, + "loss": 2.4396, + "step": 1967000 + }, + { + "epoch": 9.75, + "learning_rate": 4.512792616043163e-05, + "loss": 2.4279, + "step": 1967500 + }, + { + "epoch": 9.75, + "learning_rate": 4.512668757400554e-05, + "loss": 2.4615, + "step": 1968000 + }, + { + "epoch": 9.75, + "learning_rate": 4.5125451464752314e-05, + "loss": 2.4715, + "step": 1968500 + }, + { + "epoch": 9.76, + "learning_rate": 4.512421287832623e-05, + "loss": 2.4397, + "step": 1969000 + }, + { + "epoch": 9.76, + "learning_rate": 4.512297429190014e-05, + "loss": 2.4726, + "step": 1969500 + }, + { + "epoch": 9.76, + "learning_rate": 4.512173570547406e-05, + "loss": 2.4296, + "step": 1970000 + }, + { + "epoch": 9.76, + "learning_rate": 4.5120497119047975e-05, + "loss": 2.4543, + "step": 1970500 + }, + { + "epoch": 9.77, + "learning_rate": 4.511925853262189e-05, + "loss": 2.4283, + "step": 1971000 + }, + { + "epoch": 9.77, + "learning_rate": 4.511801994619581e-05, + "loss": 2.4433, + "step": 1971500 + }, + { + "epoch": 9.77, + "learning_rate": 4.511678135976972e-05, + "loss": 2.431, + "step": 1972000 + }, + { + "epoch": 9.77, + "learning_rate": 4.5115542773343636e-05, + "loss": 2.4314, + "step": 1972500 + }, + { + "epoch": 9.77, + "learning_rate": 4.511430666409041e-05, + "loss": 2.4454, + "step": 1973000 + }, + { + "epoch": 9.78, + "learning_rate": 4.511306807766433e-05, + "loss": 2.4631, + "step": 1973500 + }, + { + "epoch": 9.78, + "learning_rate": 4.5111829491238245e-05, + "loss": 2.4612, + "step": 1974000 + }, + { + "epoch": 9.78, + "learning_rate": 4.5110590904812155e-05, + "loss": 2.4322, + "step": 1974500 + }, + { + "epoch": 9.78, + "learning_rate": 4.510935231838607e-05, + "loss": 2.4259, + "step": 1975000 + }, + { + "epoch": 9.79, + "learning_rate": 4.510811620913285e-05, + "loss": 2.4497, + "step": 1975500 + }, + { + "epoch": 9.79, + "learning_rate": 4.510687762270676e-05, + "loss": 2.4183, + "step": 1976000 + }, + { + "epoch": 9.79, + "learning_rate": 4.5105639036280675e-05, + "loss": 2.4897, + "step": 1976500 + }, + { + "epoch": 9.79, + "learning_rate": 4.510440044985459e-05, + "loss": 2.4587, + "step": 1977000 + }, + { + "epoch": 9.8, + "learning_rate": 4.510316434060136e-05, + "loss": 2.4401, + "step": 1977500 + }, + { + "epoch": 9.8, + "learning_rate": 4.510192575417528e-05, + "loss": 2.4357, + "step": 1978000 + }, + { + "epoch": 9.8, + "learning_rate": 4.5100687167749194e-05, + "loss": 2.4482, + "step": 1978500 + }, + { + "epoch": 9.8, + "learning_rate": 4.509944858132311e-05, + "loss": 2.4574, + "step": 1979000 + }, + { + "epoch": 9.81, + "learning_rate": 4.509821247206988e-05, + "loss": 2.4316, + "step": 1979500 + }, + { + "epoch": 9.81, + "learning_rate": 4.50969738856438e-05, + "loss": 2.4638, + "step": 1980000 + }, + { + "epoch": 9.81, + "learning_rate": 4.5095735299217714e-05, + "loss": 2.4208, + "step": 1980500 + }, + { + "epoch": 9.81, + "learning_rate": 4.509449671279163e-05, + "loss": 2.4633, + "step": 1981000 + }, + { + "epoch": 9.82, + "learning_rate": 4.5093263080711245e-05, + "loss": 2.4471, + "step": 1981500 + }, + { + "epoch": 9.82, + "learning_rate": 4.509202697145802e-05, + "loss": 2.4612, + "step": 1982000 + }, + { + "epoch": 9.82, + "learning_rate": 4.509078838503194e-05, + "loss": 2.4536, + "step": 1982500 + }, + { + "epoch": 9.82, + "learning_rate": 4.50895522757787e-05, + "loss": 2.4203, + "step": 1983000 + }, + { + "epoch": 9.83, + "learning_rate": 4.5088313689352616e-05, + "loss": 2.4689, + "step": 1983500 + }, + { + "epoch": 9.83, + "learning_rate": 4.508707510292653e-05, + "loss": 2.4635, + "step": 1984000 + }, + { + "epoch": 9.83, + "learning_rate": 4.508583651650045e-05, + "loss": 2.4427, + "step": 1984500 + }, + { + "epoch": 9.83, + "learning_rate": 4.508459793007437e-05, + "loss": 2.4488, + "step": 1985000 + }, + { + "epoch": 9.84, + "learning_rate": 4.5083359343648284e-05, + "loss": 2.4687, + "step": 1985500 + }, + { + "epoch": 9.84, + "learning_rate": 4.50821207572222e-05, + "loss": 2.4482, + "step": 1986000 + }, + { + "epoch": 9.84, + "learning_rate": 4.508088217079612e-05, + "loss": 2.4457, + "step": 1986500 + }, + { + "epoch": 9.84, + "learning_rate": 4.5079643584370035e-05, + "loss": 2.4492, + "step": 1987000 + }, + { + "epoch": 9.85, + "learning_rate": 4.5078404997943945e-05, + "loss": 2.468, + "step": 1987500 + }, + { + "epoch": 9.85, + "learning_rate": 4.507716888869072e-05, + "loss": 2.4591, + "step": 1988000 + }, + { + "epoch": 9.85, + "learning_rate": 4.507593030226464e-05, + "loss": 2.4664, + "step": 1988500 + }, + { + "epoch": 9.85, + "learning_rate": 4.5074691715838554e-05, + "loss": 2.4511, + "step": 1989000 + }, + { + "epoch": 9.86, + "learning_rate": 4.507345312941247e-05, + "loss": 2.4497, + "step": 1989500 + }, + { + "epoch": 9.86, + "learning_rate": 4.507221454298639e-05, + "loss": 2.4432, + "step": 1990000 + }, + { + "epoch": 9.86, + "learning_rate": 4.50709759565603e-05, + "loss": 2.4519, + "step": 1990500 + }, + { + "epoch": 9.86, + "learning_rate": 4.5069737370134215e-05, + "loss": 2.4516, + "step": 1991000 + }, + { + "epoch": 9.87, + "learning_rate": 4.506849878370813e-05, + "loss": 2.4417, + "step": 1991500 + }, + { + "epoch": 9.87, + "learning_rate": 4.506726019728205e-05, + "loss": 2.464, + "step": 1992000 + }, + { + "epoch": 9.87, + "learning_rate": 4.5066021610855966e-05, + "loss": 2.4332, + "step": 1992500 + }, + { + "epoch": 9.87, + "learning_rate": 4.5064783024429876e-05, + "loss": 2.4693, + "step": 1993000 + }, + { + "epoch": 9.88, + "learning_rate": 4.506354443800379e-05, + "loss": 2.4477, + "step": 1993500 + }, + { + "epoch": 9.88, + "learning_rate": 4.506230585157771e-05, + "loss": 2.4455, + "step": 1994000 + }, + { + "epoch": 9.88, + "learning_rate": 4.506106974232448e-05, + "loss": 2.4597, + "step": 1994500 + }, + { + "epoch": 9.88, + "learning_rate": 4.5059833633071255e-05, + "loss": 2.446, + "step": 1995000 + }, + { + "epoch": 9.89, + "learning_rate": 4.505859504664517e-05, + "loss": 2.4577, + "step": 1995500 + }, + { + "epoch": 9.89, + "learning_rate": 4.505735646021909e-05, + "loss": 2.4678, + "step": 1996000 + }, + { + "epoch": 9.89, + "learning_rate": 4.5056117873793005e-05, + "loss": 2.4562, + "step": 1996500 + }, + { + "epoch": 9.89, + "learning_rate": 4.5054879287366916e-05, + "loss": 2.4657, + "step": 1997000 + }, + { + "epoch": 9.9, + "learning_rate": 4.505364070094083e-05, + "loss": 2.433, + "step": 1997500 + }, + { + "epoch": 9.9, + "learning_rate": 4.505240211451475e-05, + "loss": 2.4243, + "step": 1998000 + }, + { + "epoch": 9.9, + "learning_rate": 4.5051163528088666e-05, + "loss": 2.4324, + "step": 1998500 + }, + { + "epoch": 9.9, + "learning_rate": 4.504992494166258e-05, + "loss": 2.4494, + "step": 1999000 + }, + { + "epoch": 9.91, + "learning_rate": 4.50486863552365e-05, + "loss": 2.4347, + "step": 1999500 + }, + { + "epoch": 9.91, + "learning_rate": 4.504744776881041e-05, + "loss": 2.4345, + "step": 2000000 + }, + { + "epoch": 9.91, + "learning_rate": 4.504620918238433e-05, + "loss": 2.4564, + "step": 2000500 + }, + { + "epoch": 9.91, + "learning_rate": 4.5044973073131096e-05, + "loss": 2.4482, + "step": 2001000 + }, + { + "epoch": 9.92, + "learning_rate": 4.504373448670501e-05, + "loss": 2.4592, + "step": 2001500 + }, + { + "epoch": 9.92, + "learning_rate": 4.504249590027893e-05, + "loss": 2.4584, + "step": 2002000 + }, + { + "epoch": 9.92, + "learning_rate": 4.504125731385285e-05, + "loss": 2.4937, + "step": 2002500 + }, + { + "epoch": 9.92, + "learning_rate": 4.5040018727426764e-05, + "loss": 2.4234, + "step": 2003000 + }, + { + "epoch": 9.93, + "learning_rate": 4.503878014100068e-05, + "loss": 2.4364, + "step": 2003500 + }, + { + "epoch": 9.93, + "learning_rate": 4.50375415545746e-05, + "loss": 2.4408, + "step": 2004000 + }, + { + "epoch": 9.93, + "learning_rate": 4.5036302968148515e-05, + "loss": 2.4403, + "step": 2004500 + }, + { + "epoch": 9.93, + "learning_rate": 4.503506685889528e-05, + "loss": 2.4583, + "step": 2005000 + }, + { + "epoch": 9.94, + "learning_rate": 4.503383074964205e-05, + "loss": 2.4657, + "step": 2005500 + }, + { + "epoch": 9.94, + "learning_rate": 4.503259216321597e-05, + "loss": 2.4634, + "step": 2006000 + }, + { + "epoch": 9.94, + "learning_rate": 4.503135357678988e-05, + "loss": 2.4743, + "step": 2006500 + }, + { + "epoch": 9.94, + "learning_rate": 4.5030114990363796e-05, + "loss": 2.4383, + "step": 2007000 + }, + { + "epoch": 9.95, + "learning_rate": 4.502887640393771e-05, + "loss": 2.4592, + "step": 2007500 + }, + { + "epoch": 9.95, + "learning_rate": 4.502764029468449e-05, + "loss": 2.458, + "step": 2008000 + }, + { + "epoch": 9.95, + "learning_rate": 4.5026401708258406e-05, + "loss": 2.4377, + "step": 2008500 + }, + { + "epoch": 9.95, + "learning_rate": 4.502516312183232e-05, + "loss": 2.4445, + "step": 2009000 + }, + { + "epoch": 9.96, + "learning_rate": 4.502392453540623e-05, + "loss": 2.4318, + "step": 2009500 + }, + { + "epoch": 9.96, + "learning_rate": 4.502268594898015e-05, + "loss": 2.4583, + "step": 2010000 + }, + { + "epoch": 9.96, + "learning_rate": 4.502144983972692e-05, + "loss": 2.4609, + "step": 2010500 + }, + { + "epoch": 9.96, + "learning_rate": 4.5020211253300835e-05, + "loss": 2.4503, + "step": 2011000 + }, + { + "epoch": 9.97, + "learning_rate": 4.501897266687475e-05, + "loss": 2.4607, + "step": 2011500 + }, + { + "epoch": 9.97, + "learning_rate": 4.501773408044867e-05, + "loss": 2.4503, + "step": 2012000 + }, + { + "epoch": 9.97, + "learning_rate": 4.501649549402258e-05, + "loss": 2.4397, + "step": 2012500 + }, + { + "epoch": 9.97, + "learning_rate": 4.5015256907596496e-05, + "loss": 2.4582, + "step": 2013000 + }, + { + "epoch": 9.98, + "learning_rate": 4.501401832117041e-05, + "loss": 2.438, + "step": 2013500 + }, + { + "epoch": 9.98, + "learning_rate": 4.501277973474433e-05, + "loss": 2.462, + "step": 2014000 + }, + { + "epoch": 9.98, + "learning_rate": 4.5011543625491106e-05, + "loss": 2.4676, + "step": 2014500 + }, + { + "epoch": 9.98, + "learning_rate": 4.501030751623787e-05, + "loss": 2.4411, + "step": 2015000 + }, + { + "epoch": 9.99, + "learning_rate": 4.5009068929811785e-05, + "loss": 2.4521, + "step": 2015500 + }, + { + "epoch": 9.99, + "learning_rate": 4.50078303433857e-05, + "loss": 2.4713, + "step": 2016000 + }, + { + "epoch": 9.99, + "learning_rate": 4.500659175695962e-05, + "loss": 2.432, + "step": 2016500 + }, + { + "epoch": 9.99, + "learning_rate": 4.5005353170533535e-05, + "loss": 2.4529, + "step": 2017000 + }, + { + "epoch": 10.0, + "learning_rate": 4.500411458410745e-05, + "loss": 2.4451, + "step": 2017500 + }, + { + "epoch": 10.0, + "learning_rate": 4.500287847485422e-05, + "loss": 2.4712, + "step": 2018000 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.6423499793751883, + "eval_accuracy_mlm": 0.5954648160103292, + "eval_accuracy_nsp": 0.8633937221278716, + "eval_loss": 2.427269697189331, + "eval_runtime": 146.0474, + "eval_samples_per_second": 1745.728, + "eval_steps_per_second": 72.743, + "step": 2018430 + } + ], + "max_steps": 20184300, + "num_train_epochs": 100, + "total_flos": 2.6129529363964503e+18, + "trial_name": null, + "trial_params": null +}