diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 80.0, - "global_step": 16147440, + "epoch": 91.0, + "global_step": 18367713, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -194650,11 +194650,26778 @@ "eval_samples_per_second": 1735.915, "eval_steps_per_second": 72.335, "step": 16147440 + }, + { + "epoch": 80.0, + "learning_rate": 1.0014144656985876e-05, + "loss": 1.8567, + "step": 16147500 + }, + { + "epoch": 80.0, + "learning_rate": 1.0012906070559793e-05, + "loss": 1.8219, + "step": 16148000 + }, + { + "epoch": 80.01, + "learning_rate": 1.0011667484133708e-05, + "loss": 1.8452, + "step": 16148500 + }, + { + "epoch": 80.01, + "learning_rate": 1.0010428897707625e-05, + "loss": 1.8474, + "step": 16149000 + }, + { + "epoch": 80.01, + "learning_rate": 1.0009192788454392e-05, + "loss": 1.8379, + "step": 16149500 + }, + { + "epoch": 80.01, + "learning_rate": 1.0007956679201163e-05, + "loss": 1.8515, + "step": 16150000 + }, + { + "epoch": 80.02, + "learning_rate": 1.0006718092775078e-05, + "loss": 1.8533, + "step": 16150500 + }, + { + "epoch": 80.02, + "learning_rate": 1.0005479506348995e-05, + "loss": 1.8512, + "step": 16151000 + }, + { + "epoch": 80.02, + "learning_rate": 1.0004243397095762e-05, + "loss": 1.8749, + "step": 16151500 + }, + { + "epoch": 80.02, + "learning_rate": 1.0003004810669679e-05, + "loss": 1.8567, + "step": 16152000 + }, + { + "epoch": 80.03, + "learning_rate": 1.0001766224243596e-05, + "loss": 1.8374, + "step": 16152500 + }, + { + "epoch": 80.03, + "learning_rate": 1.0000530114990364e-05, + "loss": 1.8657, + "step": 16153000 + }, + { + "epoch": 80.03, + "learning_rate": 9.999291528564281e-06, + "loss": 1.8686, + "step": 16153500 + }, + { + "epoch": 80.03, + "learning_rate": 9.998052942138197e-06, + "loss": 1.8543, + "step": 16154000 + }, + { + "epoch": 80.03, + "learning_rate": 9.996814355712114e-06, + "loss": 1.8617, + "step": 16154500 + }, + { + "epoch": 80.04, + "learning_rate": 9.995575769286029e-06, + "loss": 1.8379, + "step": 16155000 + }, + { + "epoch": 80.04, + "learning_rate": 9.994337182859946e-06, + "loss": 1.8506, + "step": 16155500 + }, + { + "epoch": 80.04, + "learning_rate": 9.993098596433863e-06, + "loss": 1.8616, + "step": 16156000 + }, + { + "epoch": 80.04, + "learning_rate": 9.99186001000778e-06, + "loss": 1.8386, + "step": 16156500 + }, + { + "epoch": 80.05, + "learning_rate": 9.990621423581695e-06, + "loss": 1.8456, + "step": 16157000 + }, + { + "epoch": 80.05, + "learning_rate": 9.989382837155612e-06, + "loss": 1.8553, + "step": 16157500 + }, + { + "epoch": 80.05, + "learning_rate": 9.988144250729529e-06, + "loss": 1.854, + "step": 16158000 + }, + { + "epoch": 80.05, + "learning_rate": 9.986905664303446e-06, + "loss": 1.8643, + "step": 16158500 + }, + { + "epoch": 80.06, + "learning_rate": 9.98566707787736e-06, + "loss": 1.8764, + "step": 16159000 + }, + { + "epoch": 80.06, + "learning_rate": 9.984428491451276e-06, + "loss": 1.864, + "step": 16159500 + }, + { + "epoch": 80.06, + "learning_rate": 9.983189905025193e-06, + "loss": 1.8666, + "step": 16160000 + }, + { + "epoch": 80.06, + "learning_rate": 9.98195131859911e-06, + "loss": 1.8476, + "step": 16160500 + }, + { + "epoch": 80.07, + "learning_rate": 9.980712732173025e-06, + "loss": 1.8501, + "step": 16161000 + }, + { + "epoch": 80.07, + "learning_rate": 9.979474145746942e-06, + "loss": 1.8603, + "step": 16161500 + }, + { + "epoch": 80.07, + "learning_rate": 9.978235559320859e-06, + "loss": 1.8719, + "step": 16162000 + }, + { + "epoch": 80.07, + "learning_rate": 9.976999450067628e-06, + "loss": 1.8545, + "step": 16162500 + }, + { + "epoch": 80.08, + "learning_rate": 9.975760863641543e-06, + "loss": 1.8542, + "step": 16163000 + }, + { + "epoch": 80.08, + "learning_rate": 9.97452227721546e-06, + "loss": 1.8463, + "step": 16163500 + }, + { + "epoch": 80.08, + "learning_rate": 9.973283690789377e-06, + "loss": 1.864, + "step": 16164000 + }, + { + "epoch": 80.08, + "learning_rate": 9.972045104363292e-06, + "loss": 1.8247, + "step": 16164500 + }, + { + "epoch": 80.09, + "learning_rate": 9.970808995110063e-06, + "loss": 1.8746, + "step": 16165000 + }, + { + "epoch": 80.09, + "learning_rate": 9.969570408683978e-06, + "loss": 1.8555, + "step": 16165500 + }, + { + "epoch": 80.09, + "learning_rate": 9.968331822257893e-06, + "loss": 1.8539, + "step": 16166000 + }, + { + "epoch": 80.09, + "learning_rate": 9.96709323583181e-06, + "loss": 1.8777, + "step": 16166500 + }, + { + "epoch": 80.1, + "learning_rate": 9.965854649405727e-06, + "loss": 1.8657, + "step": 16167000 + }, + { + "epoch": 80.1, + "learning_rate": 9.964618540152496e-06, + "loss": 1.8729, + "step": 16167500 + }, + { + "epoch": 80.1, + "learning_rate": 9.963379953726413e-06, + "loss": 1.8602, + "step": 16168000 + }, + { + "epoch": 80.1, + "learning_rate": 9.962141367300328e-06, + "loss": 1.8433, + "step": 16168500 + }, + { + "epoch": 80.11, + "learning_rate": 9.960902780874245e-06, + "loss": 1.8453, + "step": 16169000 + }, + { + "epoch": 80.11, + "learning_rate": 9.95966419444816e-06, + "loss": 1.8517, + "step": 16169500 + }, + { + "epoch": 80.11, + "learning_rate": 9.958425608022077e-06, + "loss": 1.848, + "step": 16170000 + }, + { + "epoch": 80.11, + "learning_rate": 9.957187021595992e-06, + "loss": 1.8432, + "step": 16170500 + }, + { + "epoch": 80.12, + "learning_rate": 9.95594843516991e-06, + "loss": 1.8417, + "step": 16171000 + }, + { + "epoch": 80.12, + "learning_rate": 9.954709848743826e-06, + "loss": 1.854, + "step": 16171500 + }, + { + "epoch": 80.12, + "learning_rate": 9.953471262317743e-06, + "loss": 1.84, + "step": 16172000 + }, + { + "epoch": 80.12, + "learning_rate": 9.952235153064512e-06, + "loss": 1.8241, + "step": 16172500 + }, + { + "epoch": 80.13, + "learning_rate": 9.950996566638427e-06, + "loss": 1.8818, + "step": 16173000 + }, + { + "epoch": 80.13, + "learning_rate": 9.949757980212342e-06, + "loss": 1.8549, + "step": 16173500 + }, + { + "epoch": 80.13, + "learning_rate": 9.948524348131965e-06, + "loss": 1.8464, + "step": 16174000 + }, + { + "epoch": 80.13, + "learning_rate": 9.947288238878732e-06, + "loss": 1.853, + "step": 16174500 + }, + { + "epoch": 80.14, + "learning_rate": 9.946049652452649e-06, + "loss": 1.8404, + "step": 16175000 + }, + { + "epoch": 80.14, + "learning_rate": 9.944811066026566e-06, + "loss": 1.8608, + "step": 16175500 + }, + { + "epoch": 80.14, + "learning_rate": 9.943572479600483e-06, + "loss": 1.8723, + "step": 16176000 + }, + { + "epoch": 80.14, + "learning_rate": 9.942333893174398e-06, + "loss": 1.8535, + "step": 16176500 + }, + { + "epoch": 80.15, + "learning_rate": 9.941095306748315e-06, + "loss": 1.892, + "step": 16177000 + }, + { + "epoch": 80.15, + "learning_rate": 9.939856720322232e-06, + "loss": 1.8673, + "step": 16177500 + }, + { + "epoch": 80.15, + "learning_rate": 9.938618133896149e-06, + "loss": 1.83, + "step": 16178000 + }, + { + "epoch": 80.15, + "learning_rate": 9.937379547470064e-06, + "loss": 1.8411, + "step": 16178500 + }, + { + "epoch": 80.16, + "learning_rate": 9.936140961043979e-06, + "loss": 1.8558, + "step": 16179000 + }, + { + "epoch": 80.16, + "learning_rate": 9.934904851790748e-06, + "loss": 1.8509, + "step": 16179500 + }, + { + "epoch": 80.16, + "learning_rate": 9.933666265364665e-06, + "loss": 1.8621, + "step": 16180000 + }, + { + "epoch": 80.16, + "learning_rate": 9.932427678938582e-06, + "loss": 1.8471, + "step": 16180500 + }, + { + "epoch": 80.17, + "learning_rate": 9.931189092512499e-06, + "loss": 1.8531, + "step": 16181000 + }, + { + "epoch": 80.17, + "learning_rate": 9.929950506086416e-06, + "loss": 1.8706, + "step": 16181500 + }, + { + "epoch": 80.17, + "learning_rate": 9.92871191966033e-06, + "loss": 1.8732, + "step": 16182000 + }, + { + "epoch": 80.17, + "learning_rate": 9.927473333234246e-06, + "loss": 1.8836, + "step": 16182500 + }, + { + "epoch": 80.18, + "learning_rate": 9.926234746808163e-06, + "loss": 1.8849, + "step": 16183000 + }, + { + "epoch": 80.18, + "learning_rate": 9.92499616038208e-06, + "loss": 1.8486, + "step": 16183500 + }, + { + "epoch": 80.18, + "learning_rate": 9.923760051128849e-06, + "loss": 1.8524, + "step": 16184000 + }, + { + "epoch": 80.18, + "learning_rate": 9.922521464702766e-06, + "loss": 1.8461, + "step": 16184500 + }, + { + "epoch": 80.19, + "learning_rate": 9.921282878276681e-06, + "loss": 1.8548, + "step": 16185000 + }, + { + "epoch": 80.19, + "learning_rate": 9.920044291850598e-06, + "loss": 1.8478, + "step": 16185500 + }, + { + "epoch": 80.19, + "learning_rate": 9.918805705424513e-06, + "loss": 1.8469, + "step": 16186000 + }, + { + "epoch": 80.19, + "learning_rate": 9.917569596171282e-06, + "loss": 1.8833, + "step": 16186500 + }, + { + "epoch": 80.2, + "learning_rate": 9.916331009745199e-06, + "loss": 1.8541, + "step": 16187000 + }, + { + "epoch": 80.2, + "learning_rate": 9.915092423319116e-06, + "loss": 1.8583, + "step": 16187500 + }, + { + "epoch": 80.2, + "learning_rate": 9.913853836893031e-06, + "loss": 1.8281, + "step": 16188000 + }, + { + "epoch": 80.2, + "learning_rate": 9.912615250466948e-06, + "loss": 1.8405, + "step": 16188500 + }, + { + "epoch": 80.21, + "learning_rate": 9.911376664040863e-06, + "loss": 1.8555, + "step": 16189000 + }, + { + "epoch": 80.21, + "learning_rate": 9.910140554787632e-06, + "loss": 1.8478, + "step": 16189500 + }, + { + "epoch": 80.21, + "learning_rate": 9.908901968361549e-06, + "loss": 1.8548, + "step": 16190000 + }, + { + "epoch": 80.21, + "learning_rate": 9.907665859108318e-06, + "loss": 1.8585, + "step": 16190500 + }, + { + "epoch": 80.22, + "learning_rate": 9.906429749855085e-06, + "loss": 1.8682, + "step": 16191000 + }, + { + "epoch": 80.22, + "learning_rate": 9.905191163429002e-06, + "loss": 1.8621, + "step": 16191500 + }, + { + "epoch": 80.22, + "learning_rate": 9.903952577002919e-06, + "loss": 1.849, + "step": 16192000 + }, + { + "epoch": 80.22, + "learning_rate": 9.902713990576836e-06, + "loss": 1.8578, + "step": 16192500 + }, + { + "epoch": 80.23, + "learning_rate": 9.90147540415075e-06, + "loss": 1.8613, + "step": 16193000 + }, + { + "epoch": 80.23, + "learning_rate": 9.900236817724668e-06, + "loss": 1.8503, + "step": 16193500 + }, + { + "epoch": 80.23, + "learning_rate": 9.898998231298585e-06, + "loss": 1.8685, + "step": 16194000 + }, + { + "epoch": 80.23, + "learning_rate": 9.8977596448725e-06, + "loss": 1.8628, + "step": 16194500 + }, + { + "epoch": 80.24, + "learning_rate": 9.896521058446417e-06, + "loss": 1.8277, + "step": 16195000 + }, + { + "epoch": 80.24, + "learning_rate": 9.895284949193186e-06, + "loss": 1.8307, + "step": 16195500 + }, + { + "epoch": 80.24, + "learning_rate": 9.894046362767101e-06, + "loss": 1.8427, + "step": 16196000 + }, + { + "epoch": 80.24, + "learning_rate": 9.892807776341018e-06, + "loss": 1.8571, + "step": 16196500 + }, + { + "epoch": 80.25, + "learning_rate": 9.891569189914935e-06, + "loss": 1.8503, + "step": 16197000 + }, + { + "epoch": 80.25, + "learning_rate": 9.890330603488852e-06, + "loss": 1.8503, + "step": 16197500 + }, + { + "epoch": 80.25, + "learning_rate": 9.889092017062767e-06, + "loss": 1.8594, + "step": 16198000 + }, + { + "epoch": 80.25, + "learning_rate": 9.887853430636682e-06, + "loss": 1.8416, + "step": 16198500 + }, + { + "epoch": 80.26, + "learning_rate": 9.886614844210599e-06, + "loss": 1.8605, + "step": 16199000 + }, + { + "epoch": 80.26, + "learning_rate": 9.885378734957368e-06, + "loss": 1.8392, + "step": 16199500 + }, + { + "epoch": 80.26, + "learning_rate": 9.884140148531285e-06, + "loss": 1.8402, + "step": 16200000 + }, + { + "epoch": 80.26, + "learning_rate": 9.882901562105202e-06, + "loss": 1.8775, + "step": 16200500 + }, + { + "epoch": 80.27, + "learning_rate": 9.881662975679119e-06, + "loss": 1.8515, + "step": 16201000 + }, + { + "epoch": 80.27, + "learning_rate": 9.880424389253034e-06, + "loss": 1.859, + "step": 16201500 + }, + { + "epoch": 80.27, + "learning_rate": 9.879188279999803e-06, + "loss": 1.8549, + "step": 16202000 + }, + { + "epoch": 80.27, + "learning_rate": 9.877949693573718e-06, + "loss": 1.8368, + "step": 16202500 + }, + { + "epoch": 80.28, + "learning_rate": 9.876711107147635e-06, + "loss": 1.8407, + "step": 16203000 + }, + { + "epoch": 80.28, + "learning_rate": 9.875472520721552e-06, + "loss": 1.8619, + "step": 16203500 + }, + { + "epoch": 80.28, + "learning_rate": 9.874236411468319e-06, + "loss": 1.8607, + "step": 16204000 + }, + { + "epoch": 80.28, + "learning_rate": 9.872997825042236e-06, + "loss": 1.8585, + "step": 16204500 + }, + { + "epoch": 80.29, + "learning_rate": 9.871759238616153e-06, + "loss": 1.8578, + "step": 16205000 + }, + { + "epoch": 80.29, + "learning_rate": 9.870520652190068e-06, + "loss": 1.8557, + "step": 16205500 + }, + { + "epoch": 80.29, + "learning_rate": 9.869282065763985e-06, + "loss": 1.8808, + "step": 16206000 + }, + { + "epoch": 80.29, + "learning_rate": 9.868043479337902e-06, + "loss": 1.8394, + "step": 16206500 + }, + { + "epoch": 80.3, + "learning_rate": 9.866804892911819e-06, + "loss": 1.8698, + "step": 16207000 + }, + { + "epoch": 80.3, + "learning_rate": 9.865566306485736e-06, + "loss": 1.8627, + "step": 16207500 + }, + { + "epoch": 80.3, + "learning_rate": 9.864327720059651e-06, + "loss": 1.8573, + "step": 16208000 + }, + { + "epoch": 80.3, + "learning_rate": 9.863089133633568e-06, + "loss": 1.8556, + "step": 16208500 + }, + { + "epoch": 80.3, + "learning_rate": 9.861850547207483e-06, + "loss": 1.8435, + "step": 16209000 + }, + { + "epoch": 80.31, + "learning_rate": 9.860614437954252e-06, + "loss": 1.8294, + "step": 16209500 + }, + { + "epoch": 80.31, + "learning_rate": 9.85937832870102e-06, + "loss": 1.8748, + "step": 16210000 + }, + { + "epoch": 80.31, + "learning_rate": 9.858142219447788e-06, + "loss": 1.8448, + "step": 16210500 + }, + { + "epoch": 80.31, + "learning_rate": 9.856903633021705e-06, + "loss": 1.8571, + "step": 16211000 + }, + { + "epoch": 80.32, + "learning_rate": 9.855665046595622e-06, + "loss": 1.8417, + "step": 16211500 + }, + { + "epoch": 80.32, + "learning_rate": 9.85442893734239e-06, + "loss": 1.8652, + "step": 16212000 + }, + { + "epoch": 80.32, + "learning_rate": 9.853190350916307e-06, + "loss": 1.863, + "step": 16212500 + }, + { + "epoch": 80.32, + "learning_rate": 9.851951764490223e-06, + "loss": 1.8433, + "step": 16213000 + }, + { + "epoch": 80.33, + "learning_rate": 9.85071317806414e-06, + "loss": 1.8516, + "step": 16213500 + }, + { + "epoch": 80.33, + "learning_rate": 9.849474591638055e-06, + "loss": 1.8293, + "step": 16214000 + }, + { + "epoch": 80.33, + "learning_rate": 9.848236005211972e-06, + "loss": 1.8571, + "step": 16214500 + }, + { + "epoch": 80.33, + "learning_rate": 9.846997418785889e-06, + "loss": 1.8249, + "step": 16215000 + }, + { + "epoch": 80.34, + "learning_rate": 9.845758832359806e-06, + "loss": 1.8578, + "step": 16215500 + }, + { + "epoch": 80.34, + "learning_rate": 9.84452024593372e-06, + "loss": 1.8292, + "step": 16216000 + }, + { + "epoch": 80.34, + "learning_rate": 9.84328413668049e-06, + "loss": 1.8248, + "step": 16216500 + }, + { + "epoch": 80.34, + "learning_rate": 9.842048027427258e-06, + "loss": 1.8403, + "step": 16217000 + }, + { + "epoch": 80.35, + "learning_rate": 9.840809441001175e-06, + "loss": 1.8637, + "step": 16217500 + }, + { + "epoch": 80.35, + "learning_rate": 9.83957085457509e-06, + "loss": 1.8561, + "step": 16218000 + }, + { + "epoch": 80.35, + "learning_rate": 9.838332268149008e-06, + "loss": 1.8679, + "step": 16218500 + }, + { + "epoch": 80.35, + "learning_rate": 9.837093681722924e-06, + "loss": 1.8613, + "step": 16219000 + }, + { + "epoch": 80.36, + "learning_rate": 9.835855095296841e-06, + "loss": 1.8659, + "step": 16219500 + }, + { + "epoch": 80.36, + "learning_rate": 9.834618986043609e-06, + "loss": 1.8507, + "step": 16220000 + }, + { + "epoch": 80.36, + "learning_rate": 9.833380399617525e-06, + "loss": 1.8467, + "step": 16220500 + }, + { + "epoch": 80.36, + "learning_rate": 9.83214181319144e-06, + "loss": 1.8743, + "step": 16221000 + }, + { + "epoch": 80.37, + "learning_rate": 9.830903226765358e-06, + "loss": 1.8489, + "step": 16221500 + }, + { + "epoch": 80.37, + "learning_rate": 9.829664640339275e-06, + "loss": 1.8583, + "step": 16222000 + }, + { + "epoch": 80.37, + "learning_rate": 9.828426053913191e-06, + "loss": 1.8415, + "step": 16222500 + }, + { + "epoch": 80.37, + "learning_rate": 9.827189944659959e-06, + "loss": 1.8789, + "step": 16223000 + }, + { + "epoch": 80.38, + "learning_rate": 9.825951358233876e-06, + "loss": 1.856, + "step": 16223500 + }, + { + "epoch": 80.38, + "learning_rate": 9.82471277180779e-06, + "loss": 1.8565, + "step": 16224000 + }, + { + "epoch": 80.38, + "learning_rate": 9.823474185381708e-06, + "loss": 1.858, + "step": 16224500 + }, + { + "epoch": 80.38, + "learning_rate": 9.822238076128476e-06, + "loss": 1.8736, + "step": 16225000 + }, + { + "epoch": 80.39, + "learning_rate": 9.820999489702393e-06, + "loss": 1.8432, + "step": 16225500 + }, + { + "epoch": 80.39, + "learning_rate": 9.819760903276309e-06, + "loss": 1.85, + "step": 16226000 + }, + { + "epoch": 80.39, + "learning_rate": 9.818522316850226e-06, + "loss": 1.8382, + "step": 16226500 + }, + { + "epoch": 80.39, + "learning_rate": 9.81728373042414e-06, + "loss": 1.8716, + "step": 16227000 + }, + { + "epoch": 80.4, + "learning_rate": 9.816045143998058e-06, + "loss": 1.8408, + "step": 16227500 + }, + { + "epoch": 80.4, + "learning_rate": 9.814806557571975e-06, + "loss": 1.8599, + "step": 16228000 + }, + { + "epoch": 80.4, + "learning_rate": 9.813567971145892e-06, + "loss": 1.8699, + "step": 16228500 + }, + { + "epoch": 80.4, + "learning_rate": 9.812329384719809e-06, + "loss": 1.847, + "step": 16229000 + }, + { + "epoch": 80.41, + "learning_rate": 9.811090798293724e-06, + "loss": 1.8661, + "step": 16229500 + }, + { + "epoch": 80.41, + "learning_rate": 9.80985221186764e-06, + "loss": 1.8668, + "step": 16230000 + }, + { + "epoch": 80.41, + "learning_rate": 9.808613625441556e-06, + "loss": 1.8639, + "step": 16230500 + }, + { + "epoch": 80.41, + "learning_rate": 9.807375039015473e-06, + "loss": 1.8536, + "step": 16231000 + }, + { + "epoch": 80.42, + "learning_rate": 9.806136452589388e-06, + "loss": 1.8707, + "step": 16231500 + }, + { + "epoch": 80.42, + "learning_rate": 9.804900343336159e-06, + "loss": 1.8668, + "step": 16232000 + }, + { + "epoch": 80.42, + "learning_rate": 9.803661756910074e-06, + "loss": 1.8561, + "step": 16232500 + }, + { + "epoch": 80.42, + "learning_rate": 9.802425647656843e-06, + "loss": 1.8616, + "step": 16233000 + }, + { + "epoch": 80.43, + "learning_rate": 9.801187061230758e-06, + "loss": 1.858, + "step": 16233500 + }, + { + "epoch": 80.43, + "learning_rate": 9.799948474804675e-06, + "loss": 1.8609, + "step": 16234000 + }, + { + "epoch": 80.43, + "learning_rate": 9.798709888378592e-06, + "loss": 1.8494, + "step": 16234500 + }, + { + "epoch": 80.43, + "learning_rate": 9.797471301952509e-06, + "loss": 1.8494, + "step": 16235000 + }, + { + "epoch": 80.44, + "learning_rate": 9.796235192699277e-06, + "loss": 1.8684, + "step": 16235500 + }, + { + "epoch": 80.44, + "learning_rate": 9.794996606273193e-06, + "loss": 1.8566, + "step": 16236000 + }, + { + "epoch": 80.44, + "learning_rate": 9.79375801984711e-06, + "loss": 1.8497, + "step": 16236500 + }, + { + "epoch": 80.44, + "learning_rate": 9.792519433421025e-06, + "loss": 1.8616, + "step": 16237000 + }, + { + "epoch": 80.45, + "learning_rate": 9.791280846994942e-06, + "loss": 1.8295, + "step": 16237500 + }, + { + "epoch": 80.45, + "learning_rate": 9.790042260568859e-06, + "loss": 1.8624, + "step": 16238000 + }, + { + "epoch": 80.45, + "learning_rate": 9.788806151315628e-06, + "loss": 1.8626, + "step": 16238500 + }, + { + "epoch": 80.45, + "learning_rate": 9.787567564889544e-06, + "loss": 1.8683, + "step": 16239000 + }, + { + "epoch": 80.46, + "learning_rate": 9.78632897846346e-06, + "loss": 1.8697, + "step": 16239500 + }, + { + "epoch": 80.46, + "learning_rate": 9.785090392037375e-06, + "loss": 1.8523, + "step": 16240000 + }, + { + "epoch": 80.46, + "learning_rate": 9.783851805611292e-06, + "loss": 1.8489, + "step": 16240500 + }, + { + "epoch": 80.46, + "learning_rate": 9.782613219185209e-06, + "loss": 1.8855, + "step": 16241000 + }, + { + "epoch": 80.47, + "learning_rate": 9.781374632759126e-06, + "loss": 1.8642, + "step": 16241500 + }, + { + "epoch": 80.47, + "learning_rate": 9.780136046333041e-06, + "loss": 1.8488, + "step": 16242000 + }, + { + "epoch": 80.47, + "learning_rate": 9.778897459906958e-06, + "loss": 1.8923, + "step": 16242500 + }, + { + "epoch": 80.47, + "learning_rate": 9.777658873480875e-06, + "loss": 1.8776, + "step": 16243000 + }, + { + "epoch": 80.48, + "learning_rate": 9.776422764227642e-06, + "loss": 1.853, + "step": 16243500 + }, + { + "epoch": 80.48, + "learning_rate": 9.775184177801559e-06, + "loss": 1.883, + "step": 16244000 + }, + { + "epoch": 80.48, + "learning_rate": 9.773945591375476e-06, + "loss": 1.8654, + "step": 16244500 + }, + { + "epoch": 80.48, + "learning_rate": 9.772709482122245e-06, + "loss": 1.8644, + "step": 16245000 + }, + { + "epoch": 80.49, + "learning_rate": 9.771470895696161e-06, + "loss": 1.8476, + "step": 16245500 + }, + { + "epoch": 80.49, + "learning_rate": 9.770232309270077e-06, + "loss": 1.8479, + "step": 16246000 + }, + { + "epoch": 80.49, + "learning_rate": 9.768993722843994e-06, + "loss": 1.8547, + "step": 16246500 + }, + { + "epoch": 80.49, + "learning_rate": 9.767755136417909e-06, + "loss": 1.8778, + "step": 16247000 + }, + { + "epoch": 80.5, + "learning_rate": 9.766516549991826e-06, + "loss": 1.8325, + "step": 16247500 + }, + { + "epoch": 80.5, + "learning_rate": 9.765282917911447e-06, + "loss": 1.8624, + "step": 16248000 + }, + { + "epoch": 80.5, + "learning_rate": 9.764044331485363e-06, + "loss": 1.8544, + "step": 16248500 + }, + { + "epoch": 80.5, + "learning_rate": 9.762805745059279e-06, + "loss": 1.8484, + "step": 16249000 + }, + { + "epoch": 80.51, + "learning_rate": 9.761567158633196e-06, + "loss": 1.8484, + "step": 16249500 + }, + { + "epoch": 80.51, + "learning_rate": 9.76032857220711e-06, + "loss": 1.8696, + "step": 16250000 + }, + { + "epoch": 80.51, + "learning_rate": 9.759089985781028e-06, + "loss": 1.8575, + "step": 16250500 + }, + { + "epoch": 80.51, + "learning_rate": 9.757851399354945e-06, + "loss": 1.8633, + "step": 16251000 + }, + { + "epoch": 80.52, + "learning_rate": 9.756612812928862e-06, + "loss": 1.8445, + "step": 16251500 + }, + { + "epoch": 80.52, + "learning_rate": 9.755374226502777e-06, + "loss": 1.8566, + "step": 16252000 + }, + { + "epoch": 80.52, + "learning_rate": 9.754135640076694e-06, + "loss": 1.8681, + "step": 16252500 + }, + { + "epoch": 80.52, + "learning_rate": 9.752899530823461e-06, + "loss": 1.8865, + "step": 16253000 + }, + { + "epoch": 80.53, + "learning_rate": 9.751660944397378e-06, + "loss": 1.8497, + "step": 16253500 + }, + { + "epoch": 80.53, + "learning_rate": 9.750422357971295e-06, + "loss": 1.8422, + "step": 16254000 + }, + { + "epoch": 80.53, + "learning_rate": 9.749183771545212e-06, + "loss": 1.8627, + "step": 16254500 + }, + { + "epoch": 80.53, + "learning_rate": 9.747945185119129e-06, + "loss": 1.8215, + "step": 16255000 + }, + { + "epoch": 80.54, + "learning_rate": 9.746711553038748e-06, + "loss": 1.8765, + "step": 16255500 + }, + { + "epoch": 80.54, + "learning_rate": 9.745472966612665e-06, + "loss": 1.854, + "step": 16256000 + }, + { + "epoch": 80.54, + "learning_rate": 9.744234380186581e-06, + "loss": 1.8784, + "step": 16256500 + }, + { + "epoch": 80.54, + "learning_rate": 9.742995793760497e-06, + "loss": 1.8508, + "step": 16257000 + }, + { + "epoch": 80.55, + "learning_rate": 9.741757207334414e-06, + "loss": 1.8662, + "step": 16257500 + }, + { + "epoch": 80.55, + "learning_rate": 9.74051862090833e-06, + "loss": 1.8665, + "step": 16258000 + }, + { + "epoch": 80.55, + "learning_rate": 9.739280034482247e-06, + "loss": 1.8602, + "step": 16258500 + }, + { + "epoch": 80.55, + "learning_rate": 9.738041448056163e-06, + "loss": 1.8425, + "step": 16259000 + }, + { + "epoch": 80.56, + "learning_rate": 9.73680286163008e-06, + "loss": 1.8375, + "step": 16259500 + }, + { + "epoch": 80.56, + "learning_rate": 9.735564275203995e-06, + "loss": 1.8619, + "step": 16260000 + }, + { + "epoch": 80.56, + "learning_rate": 9.734325688777912e-06, + "loss": 1.8729, + "step": 16260500 + }, + { + "epoch": 80.56, + "learning_rate": 9.733087102351829e-06, + "loss": 1.8559, + "step": 16261000 + }, + { + "epoch": 80.57, + "learning_rate": 9.731848515925744e-06, + "loss": 1.8604, + "step": 16261500 + }, + { + "epoch": 80.57, + "learning_rate": 9.730614883845365e-06, + "loss": 1.869, + "step": 16262000 + }, + { + "epoch": 80.57, + "learning_rate": 9.729376297419282e-06, + "loss": 1.8567, + "step": 16262500 + }, + { + "epoch": 80.57, + "learning_rate": 9.728137710993199e-06, + "loss": 1.8523, + "step": 16263000 + }, + { + "epoch": 80.58, + "learning_rate": 9.726899124567114e-06, + "loss": 1.8744, + "step": 16263500 + }, + { + "epoch": 80.58, + "learning_rate": 9.72566053814103e-06, + "loss": 1.8366, + "step": 16264000 + }, + { + "epoch": 80.58, + "learning_rate": 9.7244244288878e-06, + "loss": 1.8469, + "step": 16264500 + }, + { + "epoch": 80.58, + "learning_rate": 9.723185842461716e-06, + "loss": 1.8693, + "step": 16265000 + }, + { + "epoch": 80.58, + "learning_rate": 9.721947256035632e-06, + "loss": 1.8421, + "step": 16265500 + }, + { + "epoch": 80.59, + "learning_rate": 9.720708669609549e-06, + "loss": 1.8556, + "step": 16266000 + }, + { + "epoch": 80.59, + "learning_rate": 9.719470083183464e-06, + "loss": 1.8516, + "step": 16266500 + }, + { + "epoch": 80.59, + "learning_rate": 9.71823149675738e-06, + "loss": 1.8726, + "step": 16267000 + }, + { + "epoch": 80.59, + "learning_rate": 9.716992910331298e-06, + "loss": 1.8458, + "step": 16267500 + }, + { + "epoch": 80.6, + "learning_rate": 9.715756801078066e-06, + "loss": 1.8765, + "step": 16268000 + }, + { + "epoch": 80.6, + "learning_rate": 9.714518214651982e-06, + "loss": 1.8442, + "step": 16268500 + }, + { + "epoch": 80.6, + "learning_rate": 9.71328210539875e-06, + "loss": 1.8641, + "step": 16269000 + }, + { + "epoch": 80.6, + "learning_rate": 9.712043518972667e-06, + "loss": 1.851, + "step": 16269500 + }, + { + "epoch": 80.61, + "learning_rate": 9.710804932546584e-06, + "loss": 1.8716, + "step": 16270000 + }, + { + "epoch": 80.61, + "learning_rate": 9.7095663461205e-06, + "loss": 1.8736, + "step": 16270500 + }, + { + "epoch": 80.61, + "learning_rate": 9.708330236867268e-06, + "loss": 1.878, + "step": 16271000 + }, + { + "epoch": 80.61, + "learning_rate": 9.707091650441184e-06, + "loss": 1.8424, + "step": 16271500 + }, + { + "epoch": 80.62, + "learning_rate": 9.7058530640151e-06, + "loss": 1.862, + "step": 16272000 + }, + { + "epoch": 80.62, + "learning_rate": 9.704614477589018e-06, + "loss": 1.8703, + "step": 16272500 + }, + { + "epoch": 80.62, + "learning_rate": 9.703375891162934e-06, + "loss": 1.8696, + "step": 16273000 + }, + { + "epoch": 80.62, + "learning_rate": 9.702137304736851e-06, + "loss": 1.854, + "step": 16273500 + }, + { + "epoch": 80.63, + "learning_rate": 9.700898718310767e-06, + "loss": 1.883, + "step": 16274000 + }, + { + "epoch": 80.63, + "learning_rate": 9.699660131884684e-06, + "loss": 1.8629, + "step": 16274500 + }, + { + "epoch": 80.63, + "learning_rate": 9.69842402263145e-06, + "loss": 1.8737, + "step": 16275000 + }, + { + "epoch": 80.63, + "learning_rate": 9.697185436205368e-06, + "loss": 1.909, + "step": 16275500 + }, + { + "epoch": 80.64, + "learning_rate": 9.695946849779285e-06, + "loss": 1.8513, + "step": 16276000 + }, + { + "epoch": 80.64, + "learning_rate": 9.694708263353201e-06, + "loss": 1.8663, + "step": 16276500 + }, + { + "epoch": 80.64, + "learning_rate": 9.693469676927117e-06, + "loss": 1.8557, + "step": 16277000 + }, + { + "epoch": 80.64, + "learning_rate": 9.692231090501034e-06, + "loss": 1.8544, + "step": 16277500 + }, + { + "epoch": 80.65, + "learning_rate": 9.69099250407495e-06, + "loss": 1.8757, + "step": 16278000 + }, + { + "epoch": 80.65, + "learning_rate": 9.689756394821718e-06, + "loss": 1.8616, + "step": 16278500 + }, + { + "epoch": 80.65, + "learning_rate": 9.688517808395635e-06, + "loss": 1.8659, + "step": 16279000 + }, + { + "epoch": 80.65, + "learning_rate": 9.687281699142403e-06, + "loss": 1.8334, + "step": 16279500 + }, + { + "epoch": 80.66, + "learning_rate": 9.68604311271632e-06, + "loss": 1.8415, + "step": 16280000 + }, + { + "epoch": 80.66, + "learning_rate": 9.684804526290237e-06, + "loss": 1.8586, + "step": 16280500 + }, + { + "epoch": 80.66, + "learning_rate": 9.683565939864152e-06, + "loss": 1.8493, + "step": 16281000 + }, + { + "epoch": 80.66, + "learning_rate": 9.682327353438068e-06, + "loss": 1.8823, + "step": 16281500 + }, + { + "epoch": 80.67, + "learning_rate": 9.681088767011985e-06, + "loss": 1.8433, + "step": 16282000 + }, + { + "epoch": 80.67, + "learning_rate": 9.679850180585902e-06, + "loss": 1.8352, + "step": 16282500 + }, + { + "epoch": 80.67, + "learning_rate": 9.678611594159817e-06, + "loss": 1.8746, + "step": 16283000 + }, + { + "epoch": 80.67, + "learning_rate": 9.677373007733734e-06, + "loss": 1.832, + "step": 16283500 + }, + { + "epoch": 80.68, + "learning_rate": 9.67613442130765e-06, + "loss": 1.8763, + "step": 16284000 + }, + { + "epoch": 80.68, + "learning_rate": 9.674895834881568e-06, + "loss": 1.8474, + "step": 16284500 + }, + { + "epoch": 80.68, + "learning_rate": 9.673657248455485e-06, + "loss": 1.8559, + "step": 16285000 + }, + { + "epoch": 80.68, + "learning_rate": 9.6724186620294e-06, + "loss": 1.8684, + "step": 16285500 + }, + { + "epoch": 80.69, + "learning_rate": 9.671180075603315e-06, + "loss": 1.854, + "step": 16286000 + }, + { + "epoch": 80.69, + "learning_rate": 9.669943966350084e-06, + "loss": 1.8621, + "step": 16286500 + }, + { + "epoch": 80.69, + "learning_rate": 9.668705379924e-06, + "loss": 1.8743, + "step": 16287000 + }, + { + "epoch": 80.69, + "learning_rate": 9.667466793497918e-06, + "loss": 1.8349, + "step": 16287500 + }, + { + "epoch": 80.7, + "learning_rate": 9.666228207071835e-06, + "loss": 1.8564, + "step": 16288000 + }, + { + "epoch": 80.7, + "learning_rate": 9.664992097818602e-06, + "loss": 1.8684, + "step": 16288500 + }, + { + "epoch": 80.7, + "learning_rate": 9.663753511392519e-06, + "loss": 1.8512, + "step": 16289000 + }, + { + "epoch": 80.7, + "learning_rate": 9.662514924966434e-06, + "loss": 1.8349, + "step": 16289500 + }, + { + "epoch": 80.71, + "learning_rate": 9.66127633854035e-06, + "loss": 1.8724, + "step": 16290000 + }, + { + "epoch": 80.71, + "learning_rate": 9.660037752114268e-06, + "loss": 1.8413, + "step": 16290500 + }, + { + "epoch": 80.71, + "learning_rate": 9.658799165688185e-06, + "loss": 1.8455, + "step": 16291000 + }, + { + "epoch": 80.71, + "learning_rate": 9.6575605792621e-06, + "loss": 1.8472, + "step": 16291500 + }, + { + "epoch": 80.72, + "learning_rate": 9.656321992836017e-06, + "loss": 1.8863, + "step": 16292000 + }, + { + "epoch": 80.72, + "learning_rate": 9.655088360755637e-06, + "loss": 1.8658, + "step": 16292500 + }, + { + "epoch": 80.72, + "learning_rate": 9.653849774329554e-06, + "loss": 1.8657, + "step": 16293000 + }, + { + "epoch": 80.72, + "learning_rate": 9.65261118790347e-06, + "loss": 1.853, + "step": 16293500 + }, + { + "epoch": 80.73, + "learning_rate": 9.651372601477387e-06, + "loss": 1.8412, + "step": 16294000 + }, + { + "epoch": 80.73, + "learning_rate": 9.650134015051303e-06, + "loss": 1.8561, + "step": 16294500 + }, + { + "epoch": 80.73, + "learning_rate": 9.648895428625219e-06, + "loss": 1.8929, + "step": 16295000 + }, + { + "epoch": 80.73, + "learning_rate": 9.647656842199134e-06, + "loss": 1.8664, + "step": 16295500 + }, + { + "epoch": 80.74, + "learning_rate": 9.646418255773051e-06, + "loss": 1.8274, + "step": 16296000 + }, + { + "epoch": 80.74, + "learning_rate": 9.645179669346968e-06, + "loss": 1.8727, + "step": 16296500 + }, + { + "epoch": 80.74, + "learning_rate": 9.643941082920885e-06, + "loss": 1.8552, + "step": 16297000 + }, + { + "epoch": 80.74, + "learning_rate": 9.642704973667654e-06, + "loss": 1.8548, + "step": 16297500 + }, + { + "epoch": 80.75, + "learning_rate": 9.64146638724157e-06, + "loss": 1.8244, + "step": 16298000 + }, + { + "epoch": 80.75, + "learning_rate": 9.640227800815486e-06, + "loss": 1.8368, + "step": 16298500 + }, + { + "epoch": 80.75, + "learning_rate": 9.638989214389401e-06, + "loss": 1.8784, + "step": 16299000 + }, + { + "epoch": 80.75, + "learning_rate": 9.637750627963318e-06, + "loss": 1.8466, + "step": 16299500 + }, + { + "epoch": 80.76, + "learning_rate": 9.636512041537235e-06, + "loss": 1.8602, + "step": 16300000 + }, + { + "epoch": 80.76, + "learning_rate": 9.635273455111152e-06, + "loss": 1.8432, + "step": 16300500 + }, + { + "epoch": 80.76, + "learning_rate": 9.634034868685067e-06, + "loss": 1.8636, + "step": 16301000 + }, + { + "epoch": 80.76, + "learning_rate": 9.632796282258984e-06, + "loss": 1.8879, + "step": 16301500 + }, + { + "epoch": 80.77, + "learning_rate": 9.6315576958329e-06, + "loss": 1.847, + "step": 16302000 + }, + { + "epoch": 80.77, + "learning_rate": 9.630321586579668e-06, + "loss": 1.8439, + "step": 16302500 + }, + { + "epoch": 80.77, + "learning_rate": 9.629083000153585e-06, + "loss": 1.8649, + "step": 16303000 + }, + { + "epoch": 80.77, + "learning_rate": 9.627844413727502e-06, + "loss": 1.8697, + "step": 16303500 + }, + { + "epoch": 80.78, + "learning_rate": 9.62660830447427e-06, + "loss": 1.8628, + "step": 16304000 + }, + { + "epoch": 80.78, + "learning_rate": 9.625369718048188e-06, + "loss": 1.8432, + "step": 16304500 + }, + { + "epoch": 80.78, + "learning_rate": 9.624131131622103e-06, + "loss": 1.8709, + "step": 16305000 + }, + { + "epoch": 80.78, + "learning_rate": 9.62289254519602e-06, + "loss": 1.8555, + "step": 16305500 + }, + { + "epoch": 80.79, + "learning_rate": 9.621653958769935e-06, + "loss": 1.883, + "step": 16306000 + }, + { + "epoch": 80.79, + "learning_rate": 9.620417849516704e-06, + "loss": 1.8423, + "step": 16306500 + }, + { + "epoch": 80.79, + "learning_rate": 9.61917926309062e-06, + "loss": 1.8349, + "step": 16307000 + }, + { + "epoch": 80.79, + "learning_rate": 9.617940676664538e-06, + "loss": 1.8495, + "step": 16307500 + }, + { + "epoch": 80.8, + "learning_rate": 9.616702090238453e-06, + "loss": 1.8532, + "step": 16308000 + }, + { + "epoch": 80.8, + "learning_rate": 9.61546350381237e-06, + "loss": 1.8749, + "step": 16308500 + }, + { + "epoch": 80.8, + "learning_rate": 9.614227394559137e-06, + "loss": 1.837, + "step": 16309000 + }, + { + "epoch": 80.8, + "learning_rate": 9.612988808133054e-06, + "loss": 1.8647, + "step": 16309500 + }, + { + "epoch": 80.81, + "learning_rate": 9.61175022170697e-06, + "loss": 1.855, + "step": 16310000 + }, + { + "epoch": 80.81, + "learning_rate": 9.610511635280888e-06, + "loss": 1.8557, + "step": 16310500 + }, + { + "epoch": 80.81, + "learning_rate": 9.609273048854805e-06, + "loss": 1.8863, + "step": 16311000 + }, + { + "epoch": 80.81, + "learning_rate": 9.60803446242872e-06, + "loss": 1.8678, + "step": 16311500 + }, + { + "epoch": 80.82, + "learning_rate": 9.606795876002637e-06, + "loss": 1.8687, + "step": 16312000 + }, + { + "epoch": 80.82, + "learning_rate": 9.605559766749404e-06, + "loss": 1.8478, + "step": 16312500 + }, + { + "epoch": 80.82, + "learning_rate": 9.604323657496173e-06, + "loss": 1.886, + "step": 16313000 + }, + { + "epoch": 80.82, + "learning_rate": 9.60308507107009e-06, + "loss": 1.8556, + "step": 16313500 + }, + { + "epoch": 80.83, + "learning_rate": 9.601846484644007e-06, + "loss": 1.8355, + "step": 16314000 + }, + { + "epoch": 80.83, + "learning_rate": 9.600607898217922e-06, + "loss": 1.8552, + "step": 16314500 + }, + { + "epoch": 80.83, + "learning_rate": 9.599369311791839e-06, + "loss": 1.8299, + "step": 16315000 + }, + { + "epoch": 80.83, + "learning_rate": 9.598130725365754e-06, + "loss": 1.8403, + "step": 16315500 + }, + { + "epoch": 80.84, + "learning_rate": 9.596892138939671e-06, + "loss": 1.8561, + "step": 16316000 + }, + { + "epoch": 80.84, + "learning_rate": 9.59565602968644e-06, + "loss": 1.8603, + "step": 16316500 + }, + { + "epoch": 80.84, + "learning_rate": 9.594417443260357e-06, + "loss": 1.8689, + "step": 16317000 + }, + { + "epoch": 80.84, + "learning_rate": 9.593178856834274e-06, + "loss": 1.8525, + "step": 16317500 + }, + { + "epoch": 80.85, + "learning_rate": 9.591940270408189e-06, + "loss": 1.8525, + "step": 16318000 + }, + { + "epoch": 80.85, + "learning_rate": 9.590701683982104e-06, + "loss": 1.8659, + "step": 16318500 + }, + { + "epoch": 80.85, + "learning_rate": 9.589463097556021e-06, + "loss": 1.8608, + "step": 16319000 + }, + { + "epoch": 80.85, + "learning_rate": 9.588224511129938e-06, + "loss": 1.8531, + "step": 16319500 + }, + { + "epoch": 80.85, + "learning_rate": 9.586985924703855e-06, + "loss": 1.88, + "step": 16320000 + }, + { + "epoch": 80.86, + "learning_rate": 9.58574733827777e-06, + "loss": 1.851, + "step": 16320500 + }, + { + "epoch": 80.86, + "learning_rate": 9.58451370619739e-06, + "loss": 1.8751, + "step": 16321000 + }, + { + "epoch": 80.86, + "learning_rate": 9.583275119771308e-06, + "loss": 1.8492, + "step": 16321500 + }, + { + "epoch": 80.86, + "learning_rate": 9.582036533345225e-06, + "loss": 1.8692, + "step": 16322000 + }, + { + "epoch": 80.87, + "learning_rate": 9.58079794691914e-06, + "loss": 1.8411, + "step": 16322500 + }, + { + "epoch": 80.87, + "learning_rate": 9.579559360493057e-06, + "loss": 1.8464, + "step": 16323000 + }, + { + "epoch": 80.87, + "learning_rate": 9.578320774066974e-06, + "loss": 1.8664, + "step": 16323500 + }, + { + "epoch": 80.87, + "learning_rate": 9.57708218764089e-06, + "loss": 1.8587, + "step": 16324000 + }, + { + "epoch": 80.88, + "learning_rate": 9.575843601214806e-06, + "loss": 1.857, + "step": 16324500 + }, + { + "epoch": 80.88, + "learning_rate": 9.574605014788723e-06, + "loss": 1.8763, + "step": 16325000 + }, + { + "epoch": 80.88, + "learning_rate": 9.573366428362638e-06, + "loss": 1.8501, + "step": 16325500 + }, + { + "epoch": 80.88, + "learning_rate": 9.572130319109407e-06, + "loss": 1.8542, + "step": 16326000 + }, + { + "epoch": 80.89, + "learning_rate": 9.570891732683324e-06, + "loss": 1.8302, + "step": 16326500 + }, + { + "epoch": 80.89, + "learning_rate": 9.56965314625724e-06, + "loss": 1.85, + "step": 16327000 + }, + { + "epoch": 80.89, + "learning_rate": 9.568417037004008e-06, + "loss": 1.844, + "step": 16327500 + }, + { + "epoch": 80.89, + "learning_rate": 9.567178450577925e-06, + "loss": 1.8618, + "step": 16328000 + }, + { + "epoch": 80.9, + "learning_rate": 9.565942341324693e-06, + "loss": 1.8805, + "step": 16328500 + }, + { + "epoch": 80.9, + "learning_rate": 9.56470375489861e-06, + "loss": 1.8716, + "step": 16329000 + }, + { + "epoch": 80.9, + "learning_rate": 9.563465168472527e-06, + "loss": 1.8759, + "step": 16329500 + }, + { + "epoch": 80.9, + "learning_rate": 9.562226582046443e-06, + "loss": 1.8443, + "step": 16330000 + }, + { + "epoch": 80.91, + "learning_rate": 9.56098799562036e-06, + "loss": 1.853, + "step": 16330500 + }, + { + "epoch": 80.91, + "learning_rate": 9.559751886367127e-06, + "loss": 1.8726, + "step": 16331000 + }, + { + "epoch": 80.91, + "learning_rate": 9.558513299941044e-06, + "loss": 1.8638, + "step": 16331500 + }, + { + "epoch": 80.91, + "learning_rate": 9.55727471351496e-06, + "loss": 1.8561, + "step": 16332000 + }, + { + "epoch": 80.92, + "learning_rate": 9.556036127088877e-06, + "loss": 1.8596, + "step": 16332500 + }, + { + "epoch": 80.92, + "learning_rate": 9.554797540662793e-06, + "loss": 1.8585, + "step": 16333000 + }, + { + "epoch": 80.92, + "learning_rate": 9.55355895423671e-06, + "loss": 1.8482, + "step": 16333500 + }, + { + "epoch": 80.92, + "learning_rate": 9.552320367810627e-06, + "loss": 1.8677, + "step": 16334000 + }, + { + "epoch": 80.93, + "learning_rate": 9.551081781384542e-06, + "loss": 1.8685, + "step": 16334500 + }, + { + "epoch": 80.93, + "learning_rate": 9.549843194958457e-06, + "loss": 1.8665, + "step": 16335000 + }, + { + "epoch": 80.93, + "learning_rate": 9.548604608532374e-06, + "loss": 1.8552, + "step": 16335500 + }, + { + "epoch": 80.93, + "learning_rate": 9.54736602210629e-06, + "loss": 1.8852, + "step": 16336000 + }, + { + "epoch": 80.94, + "learning_rate": 9.54612991285306e-06, + "loss": 1.8763, + "step": 16336500 + }, + { + "epoch": 80.94, + "learning_rate": 9.544891326426977e-06, + "loss": 1.8319, + "step": 16337000 + }, + { + "epoch": 80.94, + "learning_rate": 9.543655217173744e-06, + "loss": 1.8453, + "step": 16337500 + }, + { + "epoch": 80.94, + "learning_rate": 9.54241663074766e-06, + "loss": 1.8636, + "step": 16338000 + }, + { + "epoch": 80.95, + "learning_rate": 9.541178044321578e-06, + "loss": 1.8555, + "step": 16338500 + }, + { + "epoch": 80.95, + "learning_rate": 9.539939457895493e-06, + "loss": 1.8523, + "step": 16339000 + }, + { + "epoch": 80.95, + "learning_rate": 9.53870087146941e-06, + "loss": 1.892, + "step": 16339500 + }, + { + "epoch": 80.95, + "learning_rate": 9.537462285043327e-06, + "loss": 1.8648, + "step": 16340000 + }, + { + "epoch": 80.96, + "learning_rate": 9.536223698617244e-06, + "loss": 1.8731, + "step": 16340500 + }, + { + "epoch": 80.96, + "learning_rate": 9.534985112191159e-06, + "loss": 1.8589, + "step": 16341000 + }, + { + "epoch": 80.96, + "learning_rate": 9.533746525765076e-06, + "loss": 1.8465, + "step": 16341500 + }, + { + "epoch": 80.96, + "learning_rate": 9.532507939338991e-06, + "loss": 1.8587, + "step": 16342000 + }, + { + "epoch": 80.97, + "learning_rate": 9.53127183008576e-06, + "loss": 1.8435, + "step": 16342500 + }, + { + "epoch": 80.97, + "learning_rate": 9.530033243659677e-06, + "loss": 1.8301, + "step": 16343000 + }, + { + "epoch": 80.97, + "learning_rate": 9.528794657233594e-06, + "loss": 1.8405, + "step": 16343500 + }, + { + "epoch": 80.97, + "learning_rate": 9.527561025153213e-06, + "loss": 1.8699, + "step": 16344000 + }, + { + "epoch": 80.98, + "learning_rate": 9.52632243872713e-06, + "loss": 1.8542, + "step": 16344500 + }, + { + "epoch": 80.98, + "learning_rate": 9.525083852301046e-06, + "loss": 1.873, + "step": 16345000 + }, + { + "epoch": 80.98, + "learning_rate": 9.523845265874963e-06, + "loss": 1.8798, + "step": 16345500 + }, + { + "epoch": 80.98, + "learning_rate": 9.52260667944888e-06, + "loss": 1.872, + "step": 16346000 + }, + { + "epoch": 80.99, + "learning_rate": 9.521368093022796e-06, + "loss": 1.8563, + "step": 16346500 + }, + { + "epoch": 80.99, + "learning_rate": 9.520129506596712e-06, + "loss": 1.871, + "step": 16347000 + }, + { + "epoch": 80.99, + "learning_rate": 9.518890920170628e-06, + "loss": 1.8514, + "step": 16347500 + }, + { + "epoch": 80.99, + "learning_rate": 9.517652333744545e-06, + "loss": 1.8463, + "step": 16348000 + }, + { + "epoch": 81.0, + "learning_rate": 9.51641374731846e-06, + "loss": 1.8568, + "step": 16348500 + }, + { + "epoch": 81.0, + "learning_rate": 9.515175160892377e-06, + "loss": 1.8477, + "step": 16349000 + }, + { + "epoch": 81.0, + "eval_accuracy": 0.6823237633030192, + "eval_accuracy_mlm": 0.6431353760390075, + "eval_accuracy_nsp": 0.8671119670221487, + "eval_loss": 2.299555540084839, + "eval_runtime": 147.0471, + "eval_samples_per_second": 1733.86, + "eval_steps_per_second": 72.249, + "step": 16349283 + }, + { + "epoch": 81.0, + "learning_rate": 9.513936574466294e-06, + "loss": 1.8504, + "step": 16349500 + }, + { + "epoch": 81.0, + "learning_rate": 9.51269798804021e-06, + "loss": 1.859, + "step": 16350000 + }, + { + "epoch": 81.01, + "learning_rate": 9.511459401614126e-06, + "loss": 1.8733, + "step": 16350500 + }, + { + "epoch": 81.01, + "learning_rate": 9.510223292360895e-06, + "loss": 1.8509, + "step": 16351000 + }, + { + "epoch": 81.01, + "learning_rate": 9.50898470593481e-06, + "loss": 1.8299, + "step": 16351500 + }, + { + "epoch": 81.01, + "learning_rate": 9.50774859668158e-06, + "loss": 1.8378, + "step": 16352000 + }, + { + "epoch": 81.02, + "learning_rate": 9.506510010255496e-06, + "loss": 1.8377, + "step": 16352500 + }, + { + "epoch": 81.02, + "learning_rate": 9.505271423829413e-06, + "loss": 1.8208, + "step": 16353000 + }, + { + "epoch": 81.02, + "learning_rate": 9.50403283740333e-06, + "loss": 1.8333, + "step": 16353500 + }, + { + "epoch": 81.02, + "learning_rate": 9.502794250977245e-06, + "loss": 1.8631, + "step": 16354000 + }, + { + "epoch": 81.03, + "learning_rate": 9.501555664551162e-06, + "loss": 1.824, + "step": 16354500 + }, + { + "epoch": 81.03, + "learning_rate": 9.500317078125077e-06, + "loss": 1.8413, + "step": 16355000 + }, + { + "epoch": 81.03, + "learning_rate": 9.499078491698994e-06, + "loss": 1.822, + "step": 16355500 + }, + { + "epoch": 81.03, + "learning_rate": 9.49783990527291e-06, + "loss": 1.8574, + "step": 16356000 + }, + { + "epoch": 81.04, + "learning_rate": 9.49660379601968e-06, + "loss": 1.8475, + "step": 16356500 + }, + { + "epoch": 81.04, + "learning_rate": 9.495365209593597e-06, + "loss": 1.8267, + "step": 16357000 + }, + { + "epoch": 81.04, + "learning_rate": 9.494126623167512e-06, + "loss": 1.8588, + "step": 16357500 + }, + { + "epoch": 81.04, + "learning_rate": 9.492888036741427e-06, + "loss": 1.848, + "step": 16358000 + }, + { + "epoch": 81.05, + "learning_rate": 9.491649450315344e-06, + "loss": 1.8549, + "step": 16358500 + }, + { + "epoch": 81.05, + "learning_rate": 9.490410863889261e-06, + "loss": 1.8489, + "step": 16359000 + }, + { + "epoch": 81.05, + "learning_rate": 9.489172277463178e-06, + "loss": 1.8721, + "step": 16359500 + }, + { + "epoch": 81.05, + "learning_rate": 9.487933691037093e-06, + "loss": 1.8358, + "step": 16360000 + }, + { + "epoch": 81.06, + "learning_rate": 9.48669510461101e-06, + "loss": 1.8215, + "step": 16360500 + }, + { + "epoch": 81.06, + "learning_rate": 9.485456518184927e-06, + "loss": 1.8464, + "step": 16361000 + }, + { + "epoch": 81.06, + "learning_rate": 9.484220408931694e-06, + "loss": 1.8461, + "step": 16361500 + }, + { + "epoch": 81.06, + "learning_rate": 9.482984299678463e-06, + "loss": 1.8307, + "step": 16362000 + }, + { + "epoch": 81.07, + "learning_rate": 9.48174571325238e-06, + "loss": 1.8461, + "step": 16362500 + }, + { + "epoch": 81.07, + "learning_rate": 9.480507126826297e-06, + "loss": 1.8717, + "step": 16363000 + }, + { + "epoch": 81.07, + "learning_rate": 9.479268540400214e-06, + "loss": 1.8443, + "step": 16363500 + }, + { + "epoch": 81.07, + "learning_rate": 9.478029953974129e-06, + "loss": 1.8266, + "step": 16364000 + }, + { + "epoch": 81.08, + "learning_rate": 9.476793844720898e-06, + "loss": 1.8489, + "step": 16364500 + }, + { + "epoch": 81.08, + "learning_rate": 9.475555258294813e-06, + "loss": 1.8441, + "step": 16365000 + }, + { + "epoch": 81.08, + "learning_rate": 9.47431667186873e-06, + "loss": 1.8274, + "step": 16365500 + }, + { + "epoch": 81.08, + "learning_rate": 9.473078085442647e-06, + "loss": 1.8442, + "step": 16366000 + }, + { + "epoch": 81.09, + "learning_rate": 9.471839499016564e-06, + "loss": 1.8569, + "step": 16366500 + }, + { + "epoch": 81.09, + "learning_rate": 9.47060091259048e-06, + "loss": 1.8396, + "step": 16367000 + }, + { + "epoch": 81.09, + "learning_rate": 9.469362326164396e-06, + "loss": 1.8753, + "step": 16367500 + }, + { + "epoch": 81.09, + "learning_rate": 9.468123739738311e-06, + "loss": 1.8638, + "step": 16368000 + }, + { + "epoch": 81.1, + "learning_rate": 9.46688763048508e-06, + "loss": 1.86, + "step": 16368500 + }, + { + "epoch": 81.1, + "learning_rate": 9.465649044058997e-06, + "loss": 1.845, + "step": 16369000 + }, + { + "epoch": 81.1, + "learning_rate": 9.464410457632914e-06, + "loss": 1.8441, + "step": 16369500 + }, + { + "epoch": 81.1, + "learning_rate": 9.46317187120683e-06, + "loss": 1.8489, + "step": 16370000 + }, + { + "epoch": 81.11, + "learning_rate": 9.461933284780746e-06, + "loss": 1.8582, + "step": 16370500 + }, + { + "epoch": 81.11, + "learning_rate": 9.460694698354663e-06, + "loss": 1.8532, + "step": 16371000 + }, + { + "epoch": 81.11, + "learning_rate": 9.459456111928578e-06, + "loss": 1.8362, + "step": 16371500 + }, + { + "epoch": 81.11, + "learning_rate": 9.458217525502495e-06, + "loss": 1.8328, + "step": 16372000 + }, + { + "epoch": 81.12, + "learning_rate": 9.45697893907641e-06, + "loss": 1.8374, + "step": 16372500 + }, + { + "epoch": 81.12, + "learning_rate": 9.455745306996033e-06, + "loss": 1.8647, + "step": 16373000 + }, + { + "epoch": 81.12, + "learning_rate": 9.454506720569948e-06, + "loss": 1.845, + "step": 16373500 + }, + { + "epoch": 81.12, + "learning_rate": 9.453268134143865e-06, + "loss": 1.8434, + "step": 16374000 + }, + { + "epoch": 81.12, + "learning_rate": 9.452034502063485e-06, + "loss": 1.8677, + "step": 16374500 + }, + { + "epoch": 81.13, + "learning_rate": 9.450795915637402e-06, + "loss": 1.8433, + "step": 16375000 + }, + { + "epoch": 81.13, + "learning_rate": 9.44955732921132e-06, + "loss": 1.8422, + "step": 16375500 + }, + { + "epoch": 81.13, + "learning_rate": 9.448318742785235e-06, + "loss": 1.8594, + "step": 16376000 + }, + { + "epoch": 81.13, + "learning_rate": 9.44708015635915e-06, + "loss": 1.8588, + "step": 16376500 + }, + { + "epoch": 81.14, + "learning_rate": 9.445841569933067e-06, + "loss": 1.85, + "step": 16377000 + }, + { + "epoch": 81.14, + "learning_rate": 9.444605460679835e-06, + "loss": 1.8502, + "step": 16377500 + }, + { + "epoch": 81.14, + "learning_rate": 9.443369351426604e-06, + "loss": 1.8472, + "step": 16378000 + }, + { + "epoch": 81.14, + "learning_rate": 9.44213076500052e-06, + "loss": 1.8434, + "step": 16378500 + }, + { + "epoch": 81.15, + "learning_rate": 9.440892178574436e-06, + "loss": 1.8431, + "step": 16379000 + }, + { + "epoch": 81.15, + "learning_rate": 9.439653592148353e-06, + "loss": 1.8783, + "step": 16379500 + }, + { + "epoch": 81.15, + "learning_rate": 9.43841500572227e-06, + "loss": 1.8408, + "step": 16380000 + }, + { + "epoch": 81.15, + "learning_rate": 9.437178896469039e-06, + "loss": 1.8283, + "step": 16380500 + }, + { + "epoch": 81.16, + "learning_rate": 9.435940310042956e-06, + "loss": 1.8497, + "step": 16381000 + }, + { + "epoch": 81.16, + "learning_rate": 9.434701723616871e-06, + "loss": 1.8412, + "step": 16381500 + }, + { + "epoch": 81.16, + "learning_rate": 9.433463137190787e-06, + "loss": 1.8544, + "step": 16382000 + }, + { + "epoch": 81.16, + "learning_rate": 9.432224550764703e-06, + "loss": 1.8585, + "step": 16382500 + }, + { + "epoch": 81.17, + "learning_rate": 9.43098596433862e-06, + "loss": 1.8762, + "step": 16383000 + }, + { + "epoch": 81.17, + "learning_rate": 9.429747377912536e-06, + "loss": 1.8441, + "step": 16383500 + }, + { + "epoch": 81.17, + "learning_rate": 9.428508791486453e-06, + "loss": 1.8552, + "step": 16384000 + }, + { + "epoch": 81.17, + "learning_rate": 9.42727020506037e-06, + "loss": 1.8305, + "step": 16384500 + }, + { + "epoch": 81.18, + "learning_rate": 9.426034095807138e-06, + "loss": 1.839, + "step": 16385000 + }, + { + "epoch": 81.18, + "learning_rate": 9.424795509381054e-06, + "loss": 1.855, + "step": 16385500 + }, + { + "epoch": 81.18, + "learning_rate": 9.42355692295497e-06, + "loss": 1.8519, + "step": 16386000 + }, + { + "epoch": 81.18, + "learning_rate": 9.422318336528886e-06, + "loss": 1.8529, + "step": 16386500 + }, + { + "epoch": 81.19, + "learning_rate": 9.421079750102803e-06, + "loss": 1.8447, + "step": 16387000 + }, + { + "epoch": 81.19, + "learning_rate": 9.419843640849571e-06, + "loss": 1.8342, + "step": 16387500 + }, + { + "epoch": 81.19, + "learning_rate": 9.418605054423488e-06, + "loss": 1.8729, + "step": 16388000 + }, + { + "epoch": 81.19, + "learning_rate": 9.417366467997404e-06, + "loss": 1.8399, + "step": 16388500 + }, + { + "epoch": 81.2, + "learning_rate": 9.41612788157132e-06, + "loss": 1.8562, + "step": 16389000 + }, + { + "epoch": 81.2, + "learning_rate": 9.414889295145237e-06, + "loss": 1.8523, + "step": 16389500 + }, + { + "epoch": 81.2, + "learning_rate": 9.413650708719153e-06, + "loss": 1.8528, + "step": 16390000 + }, + { + "epoch": 81.2, + "learning_rate": 9.41241212229307e-06, + "loss": 1.8161, + "step": 16390500 + }, + { + "epoch": 81.21, + "learning_rate": 9.411173535866987e-06, + "loss": 1.8705, + "step": 16391000 + }, + { + "epoch": 81.21, + "learning_rate": 9.409937426613755e-06, + "loss": 1.8482, + "step": 16391500 + }, + { + "epoch": 81.21, + "learning_rate": 9.40869884018767e-06, + "loss": 1.8368, + "step": 16392000 + }, + { + "epoch": 81.21, + "learning_rate": 9.407460253761587e-06, + "loss": 1.856, + "step": 16392500 + }, + { + "epoch": 81.22, + "learning_rate": 9.406221667335503e-06, + "loss": 1.8389, + "step": 16393000 + }, + { + "epoch": 81.22, + "learning_rate": 9.40498308090942e-06, + "loss": 1.8347, + "step": 16393500 + }, + { + "epoch": 81.22, + "learning_rate": 9.403744494483337e-06, + "loss": 1.825, + "step": 16394000 + }, + { + "epoch": 81.22, + "learning_rate": 9.402505908057254e-06, + "loss": 1.8552, + "step": 16394500 + }, + { + "epoch": 81.23, + "learning_rate": 9.401267321631169e-06, + "loss": 1.8486, + "step": 16395000 + }, + { + "epoch": 81.23, + "learning_rate": 9.400028735205086e-06, + "loss": 1.8709, + "step": 16395500 + }, + { + "epoch": 81.23, + "learning_rate": 9.398792625951853e-06, + "loss": 1.8543, + "step": 16396000 + }, + { + "epoch": 81.23, + "learning_rate": 9.39755403952577e-06, + "loss": 1.8651, + "step": 16396500 + }, + { + "epoch": 81.24, + "learning_rate": 9.396315453099687e-06, + "loss": 1.8425, + "step": 16397000 + }, + { + "epoch": 81.24, + "learning_rate": 9.395076866673604e-06, + "loss": 1.8569, + "step": 16397500 + }, + { + "epoch": 81.24, + "learning_rate": 9.393838280247519e-06, + "loss": 1.8297, + "step": 16398000 + }, + { + "epoch": 81.24, + "learning_rate": 9.392599693821436e-06, + "loss": 1.8417, + "step": 16398500 + }, + { + "epoch": 81.25, + "learning_rate": 9.391361107395353e-06, + "loss": 1.8335, + "step": 16399000 + }, + { + "epoch": 81.25, + "learning_rate": 9.39012252096927e-06, + "loss": 1.866, + "step": 16399500 + }, + { + "epoch": 81.25, + "learning_rate": 9.388883934543185e-06, + "loss": 1.8393, + "step": 16400000 + }, + { + "epoch": 81.25, + "learning_rate": 9.3876453481171e-06, + "loss": 1.852, + "step": 16400500 + }, + { + "epoch": 81.26, + "learning_rate": 9.38640923886387e-06, + "loss": 1.8258, + "step": 16401000 + }, + { + "epoch": 81.26, + "learning_rate": 9.385170652437786e-06, + "loss": 1.8603, + "step": 16401500 + }, + { + "epoch": 81.26, + "learning_rate": 9.383932066011703e-06, + "loss": 1.8343, + "step": 16402000 + }, + { + "epoch": 81.26, + "learning_rate": 9.38269347958562e-06, + "loss": 1.8685, + "step": 16402500 + }, + { + "epoch": 81.27, + "learning_rate": 9.381454893159537e-06, + "loss": 1.8378, + "step": 16403000 + }, + { + "epoch": 81.27, + "learning_rate": 9.380216306733452e-06, + "loss": 1.8667, + "step": 16403500 + }, + { + "epoch": 81.27, + "learning_rate": 9.37898019748022e-06, + "loss": 1.8459, + "step": 16404000 + }, + { + "epoch": 81.27, + "learning_rate": 9.37774408822699e-06, + "loss": 1.8339, + "step": 16404500 + }, + { + "epoch": 81.28, + "learning_rate": 9.376505501800906e-06, + "loss": 1.8524, + "step": 16405000 + }, + { + "epoch": 81.28, + "learning_rate": 9.375266915374822e-06, + "loss": 1.8647, + "step": 16405500 + }, + { + "epoch": 81.28, + "learning_rate": 9.374028328948737e-06, + "loss": 1.8548, + "step": 16406000 + }, + { + "epoch": 81.28, + "learning_rate": 9.372792219695506e-06, + "loss": 1.8693, + "step": 16406500 + }, + { + "epoch": 81.29, + "learning_rate": 9.371553633269423e-06, + "loss": 1.8598, + "step": 16407000 + }, + { + "epoch": 81.29, + "learning_rate": 9.37031504684334e-06, + "loss": 1.8463, + "step": 16407500 + }, + { + "epoch": 81.29, + "learning_rate": 9.369076460417256e-06, + "loss": 1.8527, + "step": 16408000 + }, + { + "epoch": 81.29, + "learning_rate": 9.367837873991172e-06, + "loss": 1.8512, + "step": 16408500 + }, + { + "epoch": 81.3, + "learning_rate": 9.36660176473794e-06, + "loss": 1.8562, + "step": 16409000 + }, + { + "epoch": 81.3, + "learning_rate": 9.36536565548471e-06, + "loss": 1.8316, + "step": 16409500 + }, + { + "epoch": 81.3, + "learning_rate": 9.364127069058626e-06, + "loss": 1.8531, + "step": 16410000 + }, + { + "epoch": 81.3, + "learning_rate": 9.362888482632541e-06, + "loss": 1.8485, + "step": 16410500 + }, + { + "epoch": 81.31, + "learning_rate": 9.361649896206458e-06, + "loss": 1.8272, + "step": 16411000 + }, + { + "epoch": 81.31, + "learning_rate": 9.360413786953225e-06, + "loss": 1.8312, + "step": 16411500 + }, + { + "epoch": 81.31, + "learning_rate": 9.359177677699996e-06, + "loss": 1.8603, + "step": 16412000 + }, + { + "epoch": 81.31, + "learning_rate": 9.357939091273911e-06, + "loss": 1.8486, + "step": 16412500 + }, + { + "epoch": 81.32, + "learning_rate": 9.356700504847828e-06, + "loss": 1.8608, + "step": 16413000 + }, + { + "epoch": 81.32, + "learning_rate": 9.355461918421745e-06, + "loss": 1.8348, + "step": 16413500 + }, + { + "epoch": 81.32, + "learning_rate": 9.35422333199566e-06, + "loss": 1.8385, + "step": 16414000 + }, + { + "epoch": 81.32, + "learning_rate": 9.352984745569576e-06, + "loss": 1.8295, + "step": 16414500 + }, + { + "epoch": 81.33, + "learning_rate": 9.351746159143492e-06, + "loss": 1.8712, + "step": 16415000 + }, + { + "epoch": 81.33, + "learning_rate": 9.35050757271741e-06, + "loss": 1.8712, + "step": 16415500 + }, + { + "epoch": 81.33, + "learning_rate": 9.349268986291326e-06, + "loss": 1.8696, + "step": 16416000 + }, + { + "epoch": 81.33, + "learning_rate": 9.348030399865242e-06, + "loss": 1.865, + "step": 16416500 + }, + { + "epoch": 81.34, + "learning_rate": 9.346791813439158e-06, + "loss": 1.8575, + "step": 16417000 + }, + { + "epoch": 81.34, + "learning_rate": 9.345553227013075e-06, + "loss": 1.8258, + "step": 16417500 + }, + { + "epoch": 81.34, + "learning_rate": 9.344314640586992e-06, + "loss": 1.8678, + "step": 16418000 + }, + { + "epoch": 81.34, + "learning_rate": 9.343076054160908e-06, + "loss": 1.8414, + "step": 16418500 + }, + { + "epoch": 81.35, + "learning_rate": 9.341837467734823e-06, + "loss": 1.85, + "step": 16419000 + }, + { + "epoch": 81.35, + "learning_rate": 9.34059888130874e-06, + "loss": 1.8627, + "step": 16419500 + }, + { + "epoch": 81.35, + "learning_rate": 9.339360294882657e-06, + "loss": 1.8441, + "step": 16420000 + }, + { + "epoch": 81.35, + "learning_rate": 9.338121708456574e-06, + "loss": 1.8738, + "step": 16420500 + }, + { + "epoch": 81.36, + "learning_rate": 9.336883122030489e-06, + "loss": 1.8716, + "step": 16421000 + }, + { + "epoch": 81.36, + "learning_rate": 9.33564701277726e-06, + "loss": 1.8599, + "step": 16421500 + }, + { + "epoch": 81.36, + "learning_rate": 9.334410903524026e-06, + "loss": 1.8569, + "step": 16422000 + }, + { + "epoch": 81.36, + "learning_rate": 9.333172317097943e-06, + "loss": 1.8408, + "step": 16422500 + }, + { + "epoch": 81.37, + "learning_rate": 9.331933730671859e-06, + "loss": 1.8299, + "step": 16423000 + }, + { + "epoch": 81.37, + "learning_rate": 9.330695144245776e-06, + "loss": 1.8387, + "step": 16423500 + }, + { + "epoch": 81.37, + "learning_rate": 9.329456557819692e-06, + "loss": 1.8597, + "step": 16424000 + }, + { + "epoch": 81.37, + "learning_rate": 9.32821797139361e-06, + "loss": 1.8683, + "step": 16424500 + }, + { + "epoch": 81.38, + "learning_rate": 9.326979384967525e-06, + "loss": 1.8576, + "step": 16425000 + }, + { + "epoch": 81.38, + "learning_rate": 9.325740798541442e-06, + "loss": 1.8562, + "step": 16425500 + }, + { + "epoch": 81.38, + "learning_rate": 9.324504689288209e-06, + "loss": 1.8405, + "step": 16426000 + }, + { + "epoch": 81.38, + "learning_rate": 9.323266102862126e-06, + "loss": 1.8417, + "step": 16426500 + }, + { + "epoch": 81.39, + "learning_rate": 9.322027516436043e-06, + "loss": 1.835, + "step": 16427000 + }, + { + "epoch": 81.39, + "learning_rate": 9.32078893000996e-06, + "loss": 1.8688, + "step": 16427500 + }, + { + "epoch": 81.39, + "learning_rate": 9.319552820756727e-06, + "loss": 1.8656, + "step": 16428000 + }, + { + "epoch": 81.39, + "learning_rate": 9.318314234330644e-06, + "loss": 1.8425, + "step": 16428500 + }, + { + "epoch": 81.39, + "learning_rate": 9.317075647904559e-06, + "loss": 1.8397, + "step": 16429000 + }, + { + "epoch": 81.4, + "learning_rate": 9.315837061478476e-06, + "loss": 1.8377, + "step": 16429500 + }, + { + "epoch": 81.4, + "learning_rate": 9.314598475052393e-06, + "loss": 1.8547, + "step": 16430000 + }, + { + "epoch": 81.4, + "learning_rate": 9.31335988862631e-06, + "loss": 1.8531, + "step": 16430500 + }, + { + "epoch": 81.4, + "learning_rate": 9.312121302200226e-06, + "loss": 1.8401, + "step": 16431000 + }, + { + "epoch": 81.41, + "learning_rate": 9.310882715774142e-06, + "loss": 1.831, + "step": 16431500 + }, + { + "epoch": 81.41, + "learning_rate": 9.309644129348059e-06, + "loss": 1.8528, + "step": 16432000 + }, + { + "epoch": 81.41, + "learning_rate": 9.30841049726768e-06, + "loss": 1.8363, + "step": 16432500 + }, + { + "epoch": 81.41, + "learning_rate": 9.307171910841596e-06, + "loss": 1.8718, + "step": 16433000 + }, + { + "epoch": 81.42, + "learning_rate": 9.305933324415511e-06, + "loss": 1.8639, + "step": 16433500 + }, + { + "epoch": 81.42, + "learning_rate": 9.304694737989428e-06, + "loss": 1.8315, + "step": 16434000 + }, + { + "epoch": 81.42, + "learning_rate": 9.303456151563345e-06, + "loss": 1.8388, + "step": 16434500 + }, + { + "epoch": 81.42, + "learning_rate": 9.30221756513726e-06, + "loss": 1.8425, + "step": 16435000 + }, + { + "epoch": 81.43, + "learning_rate": 9.300978978711176e-06, + "loss": 1.851, + "step": 16435500 + }, + { + "epoch": 81.43, + "learning_rate": 9.299740392285093e-06, + "loss": 1.8444, + "step": 16436000 + }, + { + "epoch": 81.43, + "learning_rate": 9.298504283031862e-06, + "loss": 1.8798, + "step": 16436500 + }, + { + "epoch": 81.43, + "learning_rate": 9.297265696605778e-06, + "loss": 1.8405, + "step": 16437000 + }, + { + "epoch": 81.44, + "learning_rate": 9.296027110179695e-06, + "loss": 1.8701, + "step": 16437500 + }, + { + "epoch": 81.44, + "learning_rate": 9.29478852375361e-06, + "loss": 1.8626, + "step": 16438000 + }, + { + "epoch": 81.44, + "learning_rate": 9.293549937327528e-06, + "loss": 1.8472, + "step": 16438500 + }, + { + "epoch": 81.44, + "learning_rate": 9.292311350901443e-06, + "loss": 1.8484, + "step": 16439000 + }, + { + "epoch": 81.45, + "learning_rate": 9.29107276447536e-06, + "loss": 1.8682, + "step": 16439500 + }, + { + "epoch": 81.45, + "learning_rate": 9.289834178049277e-06, + "loss": 1.8525, + "step": 16440000 + }, + { + "epoch": 81.45, + "learning_rate": 9.288595591623192e-06, + "loss": 1.8558, + "step": 16440500 + }, + { + "epoch": 81.45, + "learning_rate": 9.287359482369962e-06, + "loss": 1.8382, + "step": 16441000 + }, + { + "epoch": 81.46, + "learning_rate": 9.286120895943878e-06, + "loss": 1.833, + "step": 16441500 + }, + { + "epoch": 81.46, + "learning_rate": 9.284884786690646e-06, + "loss": 1.8386, + "step": 16442000 + }, + { + "epoch": 81.46, + "learning_rate": 9.283646200264562e-06, + "loss": 1.8441, + "step": 16442500 + }, + { + "epoch": 81.46, + "learning_rate": 9.282407613838479e-06, + "loss": 1.8478, + "step": 16443000 + }, + { + "epoch": 81.47, + "learning_rate": 9.281169027412396e-06, + "loss": 1.8617, + "step": 16443500 + }, + { + "epoch": 81.47, + "learning_rate": 9.279930440986312e-06, + "loss": 1.8555, + "step": 16444000 + }, + { + "epoch": 81.47, + "learning_rate": 9.27869185456023e-06, + "loss": 1.845, + "step": 16444500 + }, + { + "epoch": 81.47, + "learning_rate": 9.277453268134145e-06, + "loss": 1.8472, + "step": 16445000 + }, + { + "epoch": 81.48, + "learning_rate": 9.27621468170806e-06, + "loss": 1.8677, + "step": 16445500 + }, + { + "epoch": 81.48, + "learning_rate": 9.274976095281977e-06, + "loss": 1.8504, + "step": 16446000 + }, + { + "epoch": 81.48, + "learning_rate": 9.273737508855894e-06, + "loss": 1.87, + "step": 16446500 + }, + { + "epoch": 81.48, + "learning_rate": 9.272503876775514e-06, + "loss": 1.8429, + "step": 16447000 + }, + { + "epoch": 81.49, + "learning_rate": 9.27126529034943e-06, + "loss": 1.8533, + "step": 16447500 + }, + { + "epoch": 81.49, + "learning_rate": 9.270026703923347e-06, + "loss": 1.8614, + "step": 16448000 + }, + { + "epoch": 81.49, + "learning_rate": 9.268790594670115e-06, + "loss": 1.8379, + "step": 16448500 + }, + { + "epoch": 81.49, + "learning_rate": 9.267552008244032e-06, + "loss": 1.8497, + "step": 16449000 + }, + { + "epoch": 81.5, + "learning_rate": 9.26631342181795e-06, + "loss": 1.8716, + "step": 16449500 + }, + { + "epoch": 81.5, + "learning_rate": 9.265077312564716e-06, + "loss": 1.8359, + "step": 16450000 + }, + { + "epoch": 81.5, + "learning_rate": 9.263838726138633e-06, + "loss": 1.8619, + "step": 16450500 + }, + { + "epoch": 81.5, + "learning_rate": 9.262600139712548e-06, + "loss": 1.8531, + "step": 16451000 + }, + { + "epoch": 81.51, + "learning_rate": 9.261361553286465e-06, + "loss": 1.8792, + "step": 16451500 + }, + { + "epoch": 81.51, + "learning_rate": 9.260122966860382e-06, + "loss": 1.8534, + "step": 16452000 + }, + { + "epoch": 81.51, + "learning_rate": 9.2588843804343e-06, + "loss": 1.8729, + "step": 16452500 + }, + { + "epoch": 81.51, + "learning_rate": 9.257645794008215e-06, + "loss": 1.8572, + "step": 16453000 + }, + { + "epoch": 81.52, + "learning_rate": 9.256407207582131e-06, + "loss": 1.8365, + "step": 16453500 + }, + { + "epoch": 81.52, + "learning_rate": 9.255168621156048e-06, + "loss": 1.861, + "step": 16454000 + }, + { + "epoch": 81.52, + "learning_rate": 9.253930034729964e-06, + "loss": 1.8337, + "step": 16454500 + }, + { + "epoch": 81.52, + "learning_rate": 9.252691448303879e-06, + "loss": 1.8746, + "step": 16455000 + }, + { + "epoch": 81.53, + "learning_rate": 9.251452861877796e-06, + "loss": 1.8344, + "step": 16455500 + }, + { + "epoch": 81.53, + "learning_rate": 9.250214275451713e-06, + "loss": 1.8484, + "step": 16456000 + }, + { + "epoch": 81.53, + "learning_rate": 9.248978166198482e-06, + "loss": 1.8609, + "step": 16456500 + }, + { + "epoch": 81.53, + "learning_rate": 9.247739579772398e-06, + "loss": 1.8971, + "step": 16457000 + }, + { + "epoch": 81.54, + "learning_rate": 9.246500993346315e-06, + "loss": 1.8691, + "step": 16457500 + }, + { + "epoch": 81.54, + "learning_rate": 9.24526240692023e-06, + "loss": 1.8516, + "step": 16458000 + }, + { + "epoch": 81.54, + "learning_rate": 9.244023820494146e-06, + "loss": 1.8457, + "step": 16458500 + }, + { + "epoch": 81.54, + "learning_rate": 9.242785234068063e-06, + "loss": 1.8678, + "step": 16459000 + }, + { + "epoch": 81.55, + "learning_rate": 9.241549124814832e-06, + "loss": 1.8304, + "step": 16459500 + }, + { + "epoch": 81.55, + "learning_rate": 9.240310538388748e-06, + "loss": 1.8637, + "step": 16460000 + }, + { + "epoch": 81.55, + "learning_rate": 9.239071951962665e-06, + "loss": 1.8506, + "step": 16460500 + }, + { + "epoch": 81.55, + "learning_rate": 9.23783336553658e-06, + "loss": 1.8532, + "step": 16461000 + }, + { + "epoch": 81.56, + "learning_rate": 9.236594779110498e-06, + "loss": 1.8629, + "step": 16461500 + }, + { + "epoch": 81.56, + "learning_rate": 9.235356192684413e-06, + "loss": 1.8369, + "step": 16462000 + }, + { + "epoch": 81.56, + "learning_rate": 9.23411760625833e-06, + "loss": 1.8635, + "step": 16462500 + }, + { + "epoch": 81.56, + "learning_rate": 9.232879019832247e-06, + "loss": 1.8625, + "step": 16463000 + }, + { + "epoch": 81.57, + "learning_rate": 9.231640433406162e-06, + "loss": 1.8705, + "step": 16463500 + }, + { + "epoch": 81.57, + "learning_rate": 9.230404324152932e-06, + "loss": 1.8455, + "step": 16464000 + }, + { + "epoch": 81.57, + "learning_rate": 9.229165737726848e-06, + "loss": 1.8681, + "step": 16464500 + }, + { + "epoch": 81.57, + "learning_rate": 9.227927151300763e-06, + "loss": 1.819, + "step": 16465000 + }, + { + "epoch": 81.58, + "learning_rate": 9.22668856487468e-06, + "loss": 1.86, + "step": 16465500 + }, + { + "epoch": 81.58, + "learning_rate": 9.225449978448597e-06, + "loss": 1.8526, + "step": 16466000 + }, + { + "epoch": 81.58, + "learning_rate": 9.224211392022512e-06, + "loss": 1.8731, + "step": 16466500 + }, + { + "epoch": 81.58, + "learning_rate": 9.222975282769282e-06, + "loss": 1.8787, + "step": 16467000 + }, + { + "epoch": 81.59, + "learning_rate": 9.221736696343198e-06, + "loss": 1.8436, + "step": 16467500 + }, + { + "epoch": 81.59, + "learning_rate": 9.220498109917115e-06, + "loss": 1.8398, + "step": 16468000 + }, + { + "epoch": 81.59, + "learning_rate": 9.21925952349103e-06, + "loss": 1.8627, + "step": 16468500 + }, + { + "epoch": 81.59, + "learning_rate": 9.218020937064947e-06, + "loss": 1.8602, + "step": 16469000 + }, + { + "epoch": 81.6, + "learning_rate": 9.216782350638864e-06, + "loss": 1.8427, + "step": 16469500 + }, + { + "epoch": 81.6, + "learning_rate": 9.215543764212779e-06, + "loss": 1.8143, + "step": 16470000 + }, + { + "epoch": 81.6, + "learning_rate": 9.214305177786696e-06, + "loss": 1.8603, + "step": 16470500 + }, + { + "epoch": 81.6, + "learning_rate": 9.213069068533465e-06, + "loss": 1.8549, + "step": 16471000 + }, + { + "epoch": 81.61, + "learning_rate": 9.211830482107382e-06, + "loss": 1.8561, + "step": 16471500 + }, + { + "epoch": 81.61, + "learning_rate": 9.210591895681297e-06, + "loss": 1.8693, + "step": 16472000 + }, + { + "epoch": 81.61, + "learning_rate": 9.209353309255214e-06, + "loss": 1.8525, + "step": 16472500 + }, + { + "epoch": 81.61, + "learning_rate": 9.208114722829129e-06, + "loss": 1.8436, + "step": 16473000 + }, + { + "epoch": 81.62, + "learning_rate": 9.206876136403046e-06, + "loss": 1.8269, + "step": 16473500 + }, + { + "epoch": 81.62, + "learning_rate": 9.205637549976963e-06, + "loss": 1.8548, + "step": 16474000 + }, + { + "epoch": 81.62, + "learning_rate": 9.20439896355088e-06, + "loss": 1.8514, + "step": 16474500 + }, + { + "epoch": 81.62, + "learning_rate": 9.203162854297649e-06, + "loss": 1.8514, + "step": 16475000 + }, + { + "epoch": 81.63, + "learning_rate": 9.201924267871564e-06, + "loss": 1.8663, + "step": 16475500 + }, + { + "epoch": 81.63, + "learning_rate": 9.200685681445479e-06, + "loss": 1.8449, + "step": 16476000 + }, + { + "epoch": 81.63, + "learning_rate": 9.199447095019396e-06, + "loss": 1.8359, + "step": 16476500 + }, + { + "epoch": 81.63, + "learning_rate": 9.198208508593313e-06, + "loss": 1.8322, + "step": 16477000 + }, + { + "epoch": 81.64, + "learning_rate": 9.19696992216723e-06, + "loss": 1.8357, + "step": 16477500 + }, + { + "epoch": 81.64, + "learning_rate": 9.195731335741145e-06, + "loss": 1.8564, + "step": 16478000 + }, + { + "epoch": 81.64, + "learning_rate": 9.194492749315062e-06, + "loss": 1.859, + "step": 16478500 + }, + { + "epoch": 81.64, + "learning_rate": 9.193254162888979e-06, + "loss": 1.8318, + "step": 16479000 + }, + { + "epoch": 81.65, + "learning_rate": 9.192015576462896e-06, + "loss": 1.8469, + "step": 16479500 + }, + { + "epoch": 81.65, + "learning_rate": 9.190779467209663e-06, + "loss": 1.8646, + "step": 16480000 + }, + { + "epoch": 81.65, + "learning_rate": 9.18954088078358e-06, + "loss": 1.8451, + "step": 16480500 + }, + { + "epoch": 81.65, + "learning_rate": 9.188302294357497e-06, + "loss": 1.8567, + "step": 16481000 + }, + { + "epoch": 81.66, + "learning_rate": 9.187066185104266e-06, + "loss": 1.8338, + "step": 16481500 + }, + { + "epoch": 81.66, + "learning_rate": 9.185830075851033e-06, + "loss": 1.8524, + "step": 16482000 + }, + { + "epoch": 81.66, + "learning_rate": 9.18459148942495e-06, + "loss": 1.8456, + "step": 16482500 + }, + { + "epoch": 81.66, + "learning_rate": 9.183352902998867e-06, + "loss": 1.8758, + "step": 16483000 + }, + { + "epoch": 81.66, + "learning_rate": 9.182114316572782e-06, + "loss": 1.865, + "step": 16483500 + }, + { + "epoch": 81.67, + "learning_rate": 9.180875730146699e-06, + "loss": 1.8536, + "step": 16484000 + }, + { + "epoch": 81.67, + "learning_rate": 9.179639620893468e-06, + "loss": 1.8709, + "step": 16484500 + }, + { + "epoch": 81.67, + "learning_rate": 9.178401034467383e-06, + "loss": 1.8304, + "step": 16485000 + }, + { + "epoch": 81.67, + "learning_rate": 9.1771624480413e-06, + "loss": 1.8332, + "step": 16485500 + }, + { + "epoch": 81.68, + "learning_rate": 9.175923861615217e-06, + "loss": 1.8666, + "step": 16486000 + }, + { + "epoch": 81.68, + "learning_rate": 9.174685275189132e-06, + "loss": 1.85, + "step": 16486500 + }, + { + "epoch": 81.68, + "learning_rate": 9.173446688763049e-06, + "loss": 1.869, + "step": 16487000 + }, + { + "epoch": 81.68, + "learning_rate": 9.172208102336966e-06, + "loss": 1.842, + "step": 16487500 + }, + { + "epoch": 81.69, + "learning_rate": 9.170969515910883e-06, + "loss": 1.852, + "step": 16488000 + }, + { + "epoch": 81.69, + "learning_rate": 9.169730929484798e-06, + "loss": 1.8519, + "step": 16488500 + }, + { + "epoch": 81.69, + "learning_rate": 9.168492343058715e-06, + "loss": 1.868, + "step": 16489000 + }, + { + "epoch": 81.69, + "learning_rate": 9.167256233805482e-06, + "loss": 1.8733, + "step": 16489500 + }, + { + "epoch": 81.7, + "learning_rate": 9.166017647379399e-06, + "loss": 1.8378, + "step": 16490000 + }, + { + "epoch": 81.7, + "learning_rate": 9.164779060953316e-06, + "loss": 1.8418, + "step": 16490500 + }, + { + "epoch": 81.7, + "learning_rate": 9.163540474527233e-06, + "loss": 1.8535, + "step": 16491000 + }, + { + "epoch": 81.7, + "learning_rate": 9.162301888101148e-06, + "loss": 1.8735, + "step": 16491500 + }, + { + "epoch": 81.71, + "learning_rate": 9.161063301675065e-06, + "loss": 1.8559, + "step": 16492000 + }, + { + "epoch": 81.71, + "learning_rate": 9.159827192421832e-06, + "loss": 1.8567, + "step": 16492500 + }, + { + "epoch": 81.71, + "learning_rate": 9.158591083168603e-06, + "loss": 1.8419, + "step": 16493000 + }, + { + "epoch": 81.71, + "learning_rate": 9.157352496742518e-06, + "loss": 1.866, + "step": 16493500 + }, + { + "epoch": 81.72, + "learning_rate": 9.156113910316435e-06, + "loss": 1.8645, + "step": 16494000 + }, + { + "epoch": 81.72, + "learning_rate": 9.154875323890352e-06, + "loss": 1.831, + "step": 16494500 + }, + { + "epoch": 81.72, + "learning_rate": 9.153636737464267e-06, + "loss": 1.8672, + "step": 16495000 + }, + { + "epoch": 81.72, + "learning_rate": 9.152398151038184e-06, + "loss": 1.8379, + "step": 16495500 + }, + { + "epoch": 81.73, + "learning_rate": 9.151159564612099e-06, + "loss": 1.85, + "step": 16496000 + }, + { + "epoch": 81.73, + "learning_rate": 9.149920978186016e-06, + "loss": 1.8615, + "step": 16496500 + }, + { + "epoch": 81.73, + "learning_rate": 9.148684868932785e-06, + "loss": 1.8389, + "step": 16497000 + }, + { + "epoch": 81.73, + "learning_rate": 9.147446282506702e-06, + "loss": 1.8567, + "step": 16497500 + }, + { + "epoch": 81.74, + "learning_rate": 9.146207696080619e-06, + "loss": 1.8584, + "step": 16498000 + }, + { + "epoch": 81.74, + "learning_rate": 9.144969109654534e-06, + "loss": 1.8345, + "step": 16498500 + }, + { + "epoch": 81.74, + "learning_rate": 9.143730523228449e-06, + "loss": 1.8571, + "step": 16499000 + }, + { + "epoch": 81.74, + "learning_rate": 9.142496891148071e-06, + "loss": 1.8623, + "step": 16499500 + }, + { + "epoch": 81.75, + "learning_rate": 9.141258304721988e-06, + "loss": 1.8685, + "step": 16500000 + }, + { + "epoch": 81.75, + "learning_rate": 9.140019718295904e-06, + "loss": 1.8676, + "step": 16500500 + }, + { + "epoch": 81.75, + "learning_rate": 9.138781131869819e-06, + "loss": 1.8581, + "step": 16501000 + }, + { + "epoch": 81.75, + "learning_rate": 9.137542545443736e-06, + "loss": 1.8527, + "step": 16501500 + }, + { + "epoch": 81.76, + "learning_rate": 9.136306436190505e-06, + "loss": 1.8673, + "step": 16502000 + }, + { + "epoch": 81.76, + "learning_rate": 9.135067849764422e-06, + "loss": 1.8974, + "step": 16502500 + }, + { + "epoch": 81.76, + "learning_rate": 9.133829263338338e-06, + "loss": 1.869, + "step": 16503000 + }, + { + "epoch": 81.76, + "learning_rate": 9.132590676912255e-06, + "loss": 1.862, + "step": 16503500 + }, + { + "epoch": 81.77, + "learning_rate": 9.13135209048617e-06, + "loss": 1.8712, + "step": 16504000 + }, + { + "epoch": 81.77, + "learning_rate": 9.13011598123294e-06, + "loss": 1.8666, + "step": 16504500 + }, + { + "epoch": 81.77, + "learning_rate": 9.128877394806855e-06, + "loss": 1.8411, + "step": 16505000 + }, + { + "epoch": 81.77, + "learning_rate": 9.127638808380772e-06, + "loss": 1.8418, + "step": 16505500 + }, + { + "epoch": 81.78, + "learning_rate": 9.126400221954689e-06, + "loss": 1.8288, + "step": 16506000 + }, + { + "epoch": 81.78, + "learning_rate": 9.125161635528605e-06, + "loss": 1.8566, + "step": 16506500 + }, + { + "epoch": 81.78, + "learning_rate": 9.12392304910252e-06, + "loss": 1.8414, + "step": 16507000 + }, + { + "epoch": 81.78, + "learning_rate": 9.122684462676438e-06, + "loss": 1.8667, + "step": 16507500 + }, + { + "epoch": 81.79, + "learning_rate": 9.121445876250353e-06, + "loss": 1.8479, + "step": 16508000 + }, + { + "epoch": 81.79, + "learning_rate": 9.12020728982427e-06, + "loss": 1.8433, + "step": 16508500 + }, + { + "epoch": 81.79, + "learning_rate": 9.118968703398185e-06, + "loss": 1.8363, + "step": 16509000 + }, + { + "epoch": 81.79, + "learning_rate": 9.117732594144956e-06, + "loss": 1.8537, + "step": 16509500 + }, + { + "epoch": 81.8, + "learning_rate": 9.11649400771887e-06, + "loss": 1.8393, + "step": 16510000 + }, + { + "epoch": 81.8, + "learning_rate": 9.115255421292788e-06, + "loss": 1.8388, + "step": 16510500 + }, + { + "epoch": 81.8, + "learning_rate": 9.114016834866703e-06, + "loss": 1.877, + "step": 16511000 + }, + { + "epoch": 81.8, + "learning_rate": 9.11277824844062e-06, + "loss": 1.8524, + "step": 16511500 + }, + { + "epoch": 81.81, + "learning_rate": 9.111542139187389e-06, + "loss": 1.8696, + "step": 16512000 + }, + { + "epoch": 81.81, + "learning_rate": 9.110303552761306e-06, + "loss": 1.8666, + "step": 16512500 + }, + { + "epoch": 81.81, + "learning_rate": 9.109064966335223e-06, + "loss": 1.8692, + "step": 16513000 + }, + { + "epoch": 81.81, + "learning_rate": 9.107826379909138e-06, + "loss": 1.8471, + "step": 16513500 + }, + { + "epoch": 81.82, + "learning_rate": 9.106587793483055e-06, + "loss": 1.8607, + "step": 16514000 + }, + { + "epoch": 81.82, + "learning_rate": 9.10534920705697e-06, + "loss": 1.8535, + "step": 16514500 + }, + { + "epoch": 81.82, + "learning_rate": 9.104110620630887e-06, + "loss": 1.8309, + "step": 16515000 + }, + { + "epoch": 81.82, + "learning_rate": 9.102872034204802e-06, + "loss": 1.8616, + "step": 16515500 + }, + { + "epoch": 81.83, + "learning_rate": 9.101633447778719e-06, + "loss": 1.847, + "step": 16516000 + }, + { + "epoch": 81.83, + "learning_rate": 9.100394861352636e-06, + "loss": 1.8373, + "step": 16516500 + }, + { + "epoch": 81.83, + "learning_rate": 9.099158752099405e-06, + "loss": 1.8534, + "step": 16517000 + }, + { + "epoch": 81.83, + "learning_rate": 9.097920165673322e-06, + "loss": 1.8449, + "step": 16517500 + }, + { + "epoch": 81.84, + "learning_rate": 9.096684056420089e-06, + "loss": 1.8744, + "step": 16518000 + }, + { + "epoch": 81.84, + "learning_rate": 9.095447947166858e-06, + "loss": 1.8456, + "step": 16518500 + }, + { + "epoch": 81.84, + "learning_rate": 9.094209360740775e-06, + "loss": 1.855, + "step": 16519000 + }, + { + "epoch": 81.84, + "learning_rate": 9.092970774314691e-06, + "loss": 1.8644, + "step": 16519500 + }, + { + "epoch": 81.85, + "learning_rate": 9.091732187888607e-06, + "loss": 1.8455, + "step": 16520000 + }, + { + "epoch": 81.85, + "learning_rate": 9.090493601462522e-06, + "loss": 1.8495, + "step": 16520500 + }, + { + "epoch": 81.85, + "learning_rate": 9.089255015036439e-06, + "loss": 1.8663, + "step": 16521000 + }, + { + "epoch": 81.85, + "learning_rate": 9.088016428610356e-06, + "loss": 1.8418, + "step": 16521500 + }, + { + "epoch": 81.86, + "learning_rate": 9.086777842184273e-06, + "loss": 1.85, + "step": 16522000 + }, + { + "epoch": 81.86, + "learning_rate": 9.085539255758188e-06, + "loss": 1.8596, + "step": 16522500 + }, + { + "epoch": 81.86, + "learning_rate": 9.084300669332105e-06, + "loss": 1.8548, + "step": 16523000 + }, + { + "epoch": 81.86, + "learning_rate": 9.083064560078874e-06, + "loss": 1.8546, + "step": 16523500 + }, + { + "epoch": 81.87, + "learning_rate": 9.081825973652789e-06, + "loss": 1.8376, + "step": 16524000 + }, + { + "epoch": 81.87, + "learning_rate": 9.080587387226706e-06, + "loss": 1.864, + "step": 16524500 + }, + { + "epoch": 81.87, + "learning_rate": 9.079348800800623e-06, + "loss": 1.8429, + "step": 16525000 + }, + { + "epoch": 81.87, + "learning_rate": 9.07811021437454e-06, + "loss": 1.845, + "step": 16525500 + }, + { + "epoch": 81.88, + "learning_rate": 9.076871627948455e-06, + "loss": 1.8535, + "step": 16526000 + }, + { + "epoch": 81.88, + "learning_rate": 9.075635518695224e-06, + "loss": 1.8786, + "step": 16526500 + }, + { + "epoch": 81.88, + "learning_rate": 9.07439693226914e-06, + "loss": 1.8687, + "step": 16527000 + }, + { + "epoch": 81.88, + "learning_rate": 9.073158345843056e-06, + "loss": 1.8389, + "step": 16527500 + }, + { + "epoch": 81.89, + "learning_rate": 9.071919759416973e-06, + "loss": 1.8348, + "step": 16528000 + }, + { + "epoch": 81.89, + "learning_rate": 9.07068117299089e-06, + "loss": 1.8651, + "step": 16528500 + }, + { + "epoch": 81.89, + "learning_rate": 9.069442586564805e-06, + "loss": 1.8412, + "step": 16529000 + }, + { + "epoch": 81.89, + "learning_rate": 9.068204000138722e-06, + "loss": 1.852, + "step": 16529500 + }, + { + "epoch": 81.9, + "learning_rate": 9.066965413712639e-06, + "loss": 1.8568, + "step": 16530000 + }, + { + "epoch": 81.9, + "learning_rate": 9.065729304459408e-06, + "loss": 1.8447, + "step": 16530500 + }, + { + "epoch": 81.9, + "learning_rate": 9.064490718033323e-06, + "loss": 1.8765, + "step": 16531000 + }, + { + "epoch": 81.9, + "learning_rate": 9.06325213160724e-06, + "loss": 1.8546, + "step": 16531500 + }, + { + "epoch": 81.91, + "learning_rate": 9.062016022354009e-06, + "loss": 1.8728, + "step": 16532000 + }, + { + "epoch": 81.91, + "learning_rate": 9.060777435927926e-06, + "loss": 1.8389, + "step": 16532500 + }, + { + "epoch": 81.91, + "learning_rate": 9.05953884950184e-06, + "loss": 1.8571, + "step": 16533000 + }, + { + "epoch": 81.91, + "learning_rate": 9.058300263075758e-06, + "loss": 1.8469, + "step": 16533500 + }, + { + "epoch": 81.92, + "learning_rate": 9.057061676649673e-06, + "loss": 1.8536, + "step": 16534000 + }, + { + "epoch": 81.92, + "learning_rate": 9.05582309022359e-06, + "loss": 1.8679, + "step": 16534500 + }, + { + "epoch": 81.92, + "learning_rate": 9.054584503797505e-06, + "loss": 1.8639, + "step": 16535000 + }, + { + "epoch": 81.92, + "learning_rate": 9.053345917371422e-06, + "loss": 1.8898, + "step": 16535500 + }, + { + "epoch": 81.93, + "learning_rate": 9.052107330945339e-06, + "loss": 1.8431, + "step": 16536000 + }, + { + "epoch": 81.93, + "learning_rate": 9.050868744519256e-06, + "loss": 1.8496, + "step": 16536500 + }, + { + "epoch": 81.93, + "learning_rate": 9.049630158093173e-06, + "loss": 1.8561, + "step": 16537000 + }, + { + "epoch": 81.93, + "learning_rate": 9.048391571667088e-06, + "loss": 1.8823, + "step": 16537500 + }, + { + "epoch": 81.93, + "learning_rate": 9.047157939586709e-06, + "loss": 1.8551, + "step": 16538000 + }, + { + "epoch": 81.94, + "learning_rate": 9.045919353160626e-06, + "loss": 1.8532, + "step": 16538500 + }, + { + "epoch": 81.94, + "learning_rate": 9.044680766734541e-06, + "loss": 1.8586, + "step": 16539000 + }, + { + "epoch": 81.94, + "learning_rate": 9.043444657481311e-06, + "loss": 1.8648, + "step": 16539500 + }, + { + "epoch": 81.94, + "learning_rate": 9.042206071055227e-06, + "loss": 1.9117, + "step": 16540000 + }, + { + "epoch": 81.95, + "learning_rate": 9.040967484629142e-06, + "loss": 1.8653, + "step": 16540500 + }, + { + "epoch": 81.95, + "learning_rate": 9.039728898203059e-06, + "loss": 1.8735, + "step": 16541000 + }, + { + "epoch": 81.95, + "learning_rate": 9.038492788949828e-06, + "loss": 1.8511, + "step": 16541500 + }, + { + "epoch": 81.95, + "learning_rate": 9.037254202523745e-06, + "loss": 1.8472, + "step": 16542000 + }, + { + "epoch": 81.96, + "learning_rate": 9.036015616097661e-06, + "loss": 1.861, + "step": 16542500 + }, + { + "epoch": 81.96, + "learning_rate": 9.034777029671577e-06, + "loss": 1.8567, + "step": 16543000 + }, + { + "epoch": 81.96, + "learning_rate": 9.033538443245494e-06, + "loss": 1.851, + "step": 16543500 + }, + { + "epoch": 81.96, + "learning_rate": 9.032299856819409e-06, + "loss": 1.8614, + "step": 16544000 + }, + { + "epoch": 81.97, + "learning_rate": 9.031061270393326e-06, + "loss": 1.8458, + "step": 16544500 + }, + { + "epoch": 81.97, + "learning_rate": 9.029822683967243e-06, + "loss": 1.8592, + "step": 16545000 + }, + { + "epoch": 81.97, + "learning_rate": 9.028584097541158e-06, + "loss": 1.8715, + "step": 16545500 + }, + { + "epoch": 81.97, + "learning_rate": 9.027345511115075e-06, + "loss": 1.8309, + "step": 16546000 + }, + { + "epoch": 81.98, + "learning_rate": 9.026106924688992e-06, + "loss": 1.8376, + "step": 16546500 + }, + { + "epoch": 81.98, + "learning_rate": 9.024868338262909e-06, + "loss": 1.8418, + "step": 16547000 + }, + { + "epoch": 81.98, + "learning_rate": 9.023629751836824e-06, + "loss": 1.8264, + "step": 16547500 + }, + { + "epoch": 81.98, + "learning_rate": 9.022391165410741e-06, + "loss": 1.8431, + "step": 16548000 + }, + { + "epoch": 81.99, + "learning_rate": 9.021152578984656e-06, + "loss": 1.846, + "step": 16548500 + }, + { + "epoch": 81.99, + "learning_rate": 9.019916469731425e-06, + "loss": 1.8811, + "step": 16549000 + }, + { + "epoch": 81.99, + "learning_rate": 9.018677883305342e-06, + "loss": 1.859, + "step": 16549500 + }, + { + "epoch": 81.99, + "learning_rate": 9.01744177405211e-06, + "loss": 1.8586, + "step": 16550000 + }, + { + "epoch": 82.0, + "learning_rate": 9.016203187626026e-06, + "loss": 1.8525, + "step": 16550500 + }, + { + "epoch": 82.0, + "learning_rate": 9.014964601199943e-06, + "loss": 1.8585, + "step": 16551000 + }, + { + "epoch": 82.0, + "eval_accuracy": 0.6816934349850013, + "eval_accuracy_mlm": 0.6424203151956372, + "eval_accuracy_nsp": 0.8668727128675591, + "eval_loss": 2.301812171936035, + "eval_runtime": 146.9181, + "eval_samples_per_second": 1735.382, + "eval_steps_per_second": 72.312, + "step": 16551126 + }, + { + "epoch": 82.0, + "learning_rate": 9.013726014773858e-06, + "loss": 1.8204, + "step": 16551500 + }, + { + "epoch": 82.0, + "learning_rate": 9.012487428347775e-06, + "loss": 1.8453, + "step": 16552000 + }, + { + "epoch": 82.01, + "learning_rate": 9.011248841921692e-06, + "loss": 1.8564, + "step": 16552500 + }, + { + "epoch": 82.01, + "learning_rate": 9.010010255495609e-06, + "loss": 1.8552, + "step": 16553000 + }, + { + "epoch": 82.01, + "learning_rate": 9.008771669069526e-06, + "loss": 1.8445, + "step": 16553500 + }, + { + "epoch": 82.01, + "learning_rate": 9.007533082643441e-06, + "loss": 1.8789, + "step": 16554000 + }, + { + "epoch": 82.02, + "learning_rate": 9.006294496217358e-06, + "loss": 1.8592, + "step": 16554500 + }, + { + "epoch": 82.02, + "learning_rate": 9.005055909791273e-06, + "loss": 1.8356, + "step": 16555000 + }, + { + "epoch": 82.02, + "learning_rate": 9.00381732336519e-06, + "loss": 1.8557, + "step": 16555500 + }, + { + "epoch": 82.02, + "learning_rate": 9.002581214111959e-06, + "loss": 1.8196, + "step": 16556000 + }, + { + "epoch": 82.03, + "learning_rate": 9.001342627685876e-06, + "loss": 1.8315, + "step": 16556500 + }, + { + "epoch": 82.03, + "learning_rate": 9.000104041259791e-06, + "loss": 1.86, + "step": 16557000 + }, + { + "epoch": 82.03, + "learning_rate": 8.998865454833708e-06, + "loss": 1.8318, + "step": 16557500 + }, + { + "epoch": 82.03, + "learning_rate": 8.997626868407625e-06, + "loss": 1.8446, + "step": 16558000 + }, + { + "epoch": 82.04, + "learning_rate": 8.99638828198154e-06, + "loss": 1.83, + "step": 16558500 + }, + { + "epoch": 82.04, + "learning_rate": 8.995152172728309e-06, + "loss": 1.8272, + "step": 16559000 + }, + { + "epoch": 82.04, + "learning_rate": 8.993913586302226e-06, + "loss": 1.8207, + "step": 16559500 + }, + { + "epoch": 82.04, + "learning_rate": 8.992674999876141e-06, + "loss": 1.8319, + "step": 16560000 + }, + { + "epoch": 82.05, + "learning_rate": 8.991436413450058e-06, + "loss": 1.8493, + "step": 16560500 + }, + { + "epoch": 82.05, + "learning_rate": 8.990197827023975e-06, + "loss": 1.8647, + "step": 16561000 + }, + { + "epoch": 82.05, + "learning_rate": 8.988961717770742e-06, + "loss": 1.8332, + "step": 16561500 + }, + { + "epoch": 82.05, + "learning_rate": 8.987723131344659e-06, + "loss": 1.8257, + "step": 16562000 + }, + { + "epoch": 82.06, + "learning_rate": 8.986484544918576e-06, + "loss": 1.8238, + "step": 16562500 + }, + { + "epoch": 82.06, + "learning_rate": 8.985245958492493e-06, + "loss": 1.8459, + "step": 16563000 + }, + { + "epoch": 82.06, + "learning_rate": 8.984009849239262e-06, + "loss": 1.8709, + "step": 16563500 + }, + { + "epoch": 82.06, + "learning_rate": 8.982773739986029e-06, + "loss": 1.8644, + "step": 16564000 + }, + { + "epoch": 82.07, + "learning_rate": 8.981535153559946e-06, + "loss": 1.8644, + "step": 16564500 + }, + { + "epoch": 82.07, + "learning_rate": 8.980301521479566e-06, + "loss": 1.8348, + "step": 16565000 + }, + { + "epoch": 82.07, + "learning_rate": 8.979062935053482e-06, + "loss": 1.8436, + "step": 16565500 + }, + { + "epoch": 82.07, + "learning_rate": 8.977824348627399e-06, + "loss": 1.8441, + "step": 16566000 + }, + { + "epoch": 82.08, + "learning_rate": 8.976585762201316e-06, + "loss": 1.847, + "step": 16566500 + }, + { + "epoch": 82.08, + "learning_rate": 8.97534717577523e-06, + "loss": 1.8288, + "step": 16567000 + }, + { + "epoch": 82.08, + "learning_rate": 8.974108589349148e-06, + "loss": 1.8433, + "step": 16567500 + }, + { + "epoch": 82.08, + "learning_rate": 8.972870002923065e-06, + "loss": 1.824, + "step": 16568000 + }, + { + "epoch": 82.09, + "learning_rate": 8.971631416496982e-06, + "loss": 1.8303, + "step": 16568500 + }, + { + "epoch": 82.09, + "learning_rate": 8.970392830070897e-06, + "loss": 1.8559, + "step": 16569000 + }, + { + "epoch": 82.09, + "learning_rate": 8.969154243644814e-06, + "loss": 1.8544, + "step": 16569500 + }, + { + "epoch": 82.09, + "learning_rate": 8.967915657218729e-06, + "loss": 1.8211, + "step": 16570000 + }, + { + "epoch": 82.1, + "learning_rate": 8.966677070792646e-06, + "loss": 1.8442, + "step": 16570500 + }, + { + "epoch": 82.1, + "learning_rate": 8.965438484366563e-06, + "loss": 1.8445, + "step": 16571000 + }, + { + "epoch": 82.1, + "learning_rate": 8.964199897940478e-06, + "loss": 1.8589, + "step": 16571500 + }, + { + "epoch": 82.1, + "learning_rate": 8.962963788687249e-06, + "loss": 1.8562, + "step": 16572000 + }, + { + "epoch": 82.11, + "learning_rate": 8.961727679434016e-06, + "loss": 1.8324, + "step": 16572500 + }, + { + "epoch": 82.11, + "learning_rate": 8.960489093007933e-06, + "loss": 1.8511, + "step": 16573000 + }, + { + "epoch": 82.11, + "learning_rate": 8.959250506581848e-06, + "loss": 1.865, + "step": 16573500 + }, + { + "epoch": 82.11, + "learning_rate": 8.958011920155765e-06, + "loss": 1.8283, + "step": 16574000 + }, + { + "epoch": 82.12, + "learning_rate": 8.956773333729682e-06, + "loss": 1.8481, + "step": 16574500 + }, + { + "epoch": 82.12, + "learning_rate": 8.955534747303599e-06, + "loss": 1.8367, + "step": 16575000 + }, + { + "epoch": 82.12, + "learning_rate": 8.954296160877514e-06, + "loss": 1.8435, + "step": 16575500 + }, + { + "epoch": 82.12, + "learning_rate": 8.95305757445143e-06, + "loss": 1.8499, + "step": 16576000 + }, + { + "epoch": 82.13, + "learning_rate": 8.951821465198198e-06, + "loss": 1.8325, + "step": 16576500 + }, + { + "epoch": 82.13, + "learning_rate": 8.950582878772115e-06, + "loss": 1.8605, + "step": 16577000 + }, + { + "epoch": 82.13, + "learning_rate": 8.949344292346032e-06, + "loss": 1.8316, + "step": 16577500 + }, + { + "epoch": 82.13, + "learning_rate": 8.948105705919949e-06, + "loss": 1.8415, + "step": 16578000 + }, + { + "epoch": 82.14, + "learning_rate": 8.946867119493864e-06, + "loss": 1.8508, + "step": 16578500 + }, + { + "epoch": 82.14, + "learning_rate": 8.945628533067781e-06, + "loss": 1.8499, + "step": 16579000 + }, + { + "epoch": 82.14, + "learning_rate": 8.944389946641698e-06, + "loss": 1.8263, + "step": 16579500 + }, + { + "epoch": 82.14, + "learning_rate": 8.943151360215615e-06, + "loss": 1.844, + "step": 16580000 + }, + { + "epoch": 82.15, + "learning_rate": 8.941915250962382e-06, + "loss": 1.8234, + "step": 16580500 + }, + { + "epoch": 82.15, + "learning_rate": 8.940676664536299e-06, + "loss": 1.8373, + "step": 16581000 + }, + { + "epoch": 82.15, + "learning_rate": 8.939438078110214e-06, + "loss": 1.8584, + "step": 16581500 + }, + { + "epoch": 82.15, + "learning_rate": 8.938199491684131e-06, + "loss": 1.8317, + "step": 16582000 + }, + { + "epoch": 82.16, + "learning_rate": 8.936960905258048e-06, + "loss": 1.8473, + "step": 16582500 + }, + { + "epoch": 82.16, + "learning_rate": 8.935724796004815e-06, + "loss": 1.8462, + "step": 16583000 + }, + { + "epoch": 82.16, + "learning_rate": 8.934486209578732e-06, + "loss": 1.8378, + "step": 16583500 + }, + { + "epoch": 82.16, + "learning_rate": 8.933247623152649e-06, + "loss": 1.8374, + "step": 16584000 + }, + { + "epoch": 82.17, + "learning_rate": 8.932011513899418e-06, + "loss": 1.83, + "step": 16584500 + }, + { + "epoch": 82.17, + "learning_rate": 8.930772927473335e-06, + "loss": 1.8527, + "step": 16585000 + }, + { + "epoch": 82.17, + "learning_rate": 8.929534341047251e-06, + "loss": 1.8508, + "step": 16585500 + }, + { + "epoch": 82.17, + "learning_rate": 8.928295754621167e-06, + "loss": 1.8673, + "step": 16586000 + }, + { + "epoch": 82.18, + "learning_rate": 8.927057168195082e-06, + "loss": 1.8385, + "step": 16586500 + }, + { + "epoch": 82.18, + "learning_rate": 8.925818581768999e-06, + "loss": 1.8474, + "step": 16587000 + }, + { + "epoch": 82.18, + "learning_rate": 8.924579995342916e-06, + "loss": 1.8534, + "step": 16587500 + }, + { + "epoch": 82.18, + "learning_rate": 8.923341408916831e-06, + "loss": 1.8351, + "step": 16588000 + }, + { + "epoch": 82.19, + "learning_rate": 8.922102822490748e-06, + "loss": 1.8433, + "step": 16588500 + }, + { + "epoch": 82.19, + "learning_rate": 8.920866713237517e-06, + "loss": 1.852, + "step": 16589000 + }, + { + "epoch": 82.19, + "learning_rate": 8.919628126811434e-06, + "loss": 1.831, + "step": 16589500 + }, + { + "epoch": 82.19, + "learning_rate": 8.918389540385349e-06, + "loss": 1.845, + "step": 16590000 + }, + { + "epoch": 82.2, + "learning_rate": 8.917153431132118e-06, + "loss": 1.827, + "step": 16590500 + }, + { + "epoch": 82.2, + "learning_rate": 8.915914844706035e-06, + "loss": 1.8405, + "step": 16591000 + }, + { + "epoch": 82.2, + "learning_rate": 8.914676258279952e-06, + "loss": 1.8538, + "step": 16591500 + }, + { + "epoch": 82.2, + "learning_rate": 8.913437671853867e-06, + "loss": 1.8451, + "step": 16592000 + }, + { + "epoch": 82.2, + "learning_rate": 8.912199085427784e-06, + "loss": 1.8571, + "step": 16592500 + }, + { + "epoch": 82.21, + "learning_rate": 8.910960499001699e-06, + "loss": 1.8308, + "step": 16593000 + }, + { + "epoch": 82.21, + "learning_rate": 8.909721912575616e-06, + "loss": 1.8372, + "step": 16593500 + }, + { + "epoch": 82.21, + "learning_rate": 8.908483326149533e-06, + "loss": 1.8647, + "step": 16594000 + }, + { + "epoch": 82.21, + "learning_rate": 8.907244739723448e-06, + "loss": 1.8507, + "step": 16594500 + }, + { + "epoch": 82.22, + "learning_rate": 8.906006153297365e-06, + "loss": 1.8461, + "step": 16595000 + }, + { + "epoch": 82.22, + "learning_rate": 8.904767566871282e-06, + "loss": 1.8143, + "step": 16595500 + }, + { + "epoch": 82.22, + "learning_rate": 8.903528980445199e-06, + "loss": 1.8524, + "step": 16596000 + }, + { + "epoch": 82.22, + "learning_rate": 8.902290394019114e-06, + "loss": 1.8603, + "step": 16596500 + }, + { + "epoch": 82.23, + "learning_rate": 8.901051807593031e-06, + "loss": 1.8173, + "step": 16597000 + }, + { + "epoch": 82.23, + "learning_rate": 8.899815698339798e-06, + "loss": 1.8397, + "step": 16597500 + }, + { + "epoch": 82.23, + "learning_rate": 8.898577111913715e-06, + "loss": 1.8346, + "step": 16598000 + }, + { + "epoch": 82.23, + "learning_rate": 8.897341002660484e-06, + "loss": 1.8538, + "step": 16598500 + }, + { + "epoch": 82.24, + "learning_rate": 8.8961024162344e-06, + "loss": 1.8689, + "step": 16599000 + }, + { + "epoch": 82.24, + "learning_rate": 8.894863829808318e-06, + "loss": 1.832, + "step": 16599500 + }, + { + "epoch": 82.24, + "learning_rate": 8.893625243382233e-06, + "loss": 1.8391, + "step": 16600000 + }, + { + "epoch": 82.24, + "learning_rate": 8.892386656956148e-06, + "loss": 1.8503, + "step": 16600500 + }, + { + "epoch": 82.25, + "learning_rate": 8.891148070530065e-06, + "loss": 1.8179, + "step": 16601000 + }, + { + "epoch": 82.25, + "learning_rate": 8.889909484103982e-06, + "loss": 1.8688, + "step": 16601500 + }, + { + "epoch": 82.25, + "learning_rate": 8.888670897677899e-06, + "loss": 1.857, + "step": 16602000 + }, + { + "epoch": 82.25, + "learning_rate": 8.887432311251814e-06, + "loss": 1.8474, + "step": 16602500 + }, + { + "epoch": 82.26, + "learning_rate": 8.886193724825731e-06, + "loss": 1.8267, + "step": 16603000 + }, + { + "epoch": 82.26, + "learning_rate": 8.8849576155725e-06, + "loss": 1.8236, + "step": 16603500 + }, + { + "epoch": 82.26, + "learning_rate": 8.883719029146415e-06, + "loss": 1.8558, + "step": 16604000 + }, + { + "epoch": 82.26, + "learning_rate": 8.882480442720332e-06, + "loss": 1.8506, + "step": 16604500 + }, + { + "epoch": 82.27, + "learning_rate": 8.881244333467101e-06, + "loss": 1.8396, + "step": 16605000 + }, + { + "epoch": 82.27, + "learning_rate": 8.880005747041018e-06, + "loss": 1.8443, + "step": 16605500 + }, + { + "epoch": 82.27, + "learning_rate": 8.878767160614935e-06, + "loss": 1.8475, + "step": 16606000 + }, + { + "epoch": 82.27, + "learning_rate": 8.877531051361702e-06, + "loss": 1.8445, + "step": 16606500 + }, + { + "epoch": 82.28, + "learning_rate": 8.876292464935619e-06, + "loss": 1.8355, + "step": 16607000 + }, + { + "epoch": 82.28, + "learning_rate": 8.875053878509534e-06, + "loss": 1.8223, + "step": 16607500 + }, + { + "epoch": 82.28, + "learning_rate": 8.873815292083451e-06, + "loss": 1.8605, + "step": 16608000 + }, + { + "epoch": 82.28, + "learning_rate": 8.872576705657368e-06, + "loss": 1.8682, + "step": 16608500 + }, + { + "epoch": 82.29, + "learning_rate": 8.871338119231285e-06, + "loss": 1.8352, + "step": 16609000 + }, + { + "epoch": 82.29, + "learning_rate": 8.870099532805202e-06, + "loss": 1.8461, + "step": 16609500 + }, + { + "epoch": 82.29, + "learning_rate": 8.868860946379117e-06, + "loss": 1.876, + "step": 16610000 + }, + { + "epoch": 82.29, + "learning_rate": 8.867622359953032e-06, + "loss": 1.8655, + "step": 16610500 + }, + { + "epoch": 82.3, + "learning_rate": 8.866386250699801e-06, + "loss": 1.8531, + "step": 16611000 + }, + { + "epoch": 82.3, + "learning_rate": 8.865147664273718e-06, + "loss": 1.8588, + "step": 16611500 + }, + { + "epoch": 82.3, + "learning_rate": 8.863909077847635e-06, + "loss": 1.822, + "step": 16612000 + }, + { + "epoch": 82.3, + "learning_rate": 8.862670491421552e-06, + "loss": 1.8271, + "step": 16612500 + }, + { + "epoch": 82.31, + "learning_rate": 8.861431904995467e-06, + "loss": 1.8523, + "step": 16613000 + }, + { + "epoch": 82.31, + "learning_rate": 8.860193318569384e-06, + "loss": 1.8444, + "step": 16613500 + }, + { + "epoch": 82.31, + "learning_rate": 8.8589547321433e-06, + "loss": 1.8513, + "step": 16614000 + }, + { + "epoch": 82.31, + "learning_rate": 8.857716145717216e-06, + "loss": 1.8698, + "step": 16614500 + }, + { + "epoch": 82.32, + "learning_rate": 8.856480036463985e-06, + "loss": 1.8324, + "step": 16615000 + }, + { + "epoch": 82.32, + "learning_rate": 8.855241450037902e-06, + "loss": 1.843, + "step": 16615500 + }, + { + "epoch": 82.32, + "learning_rate": 8.854002863611817e-06, + "loss": 1.8692, + "step": 16616000 + }, + { + "epoch": 82.32, + "learning_rate": 8.852764277185734e-06, + "loss": 1.8636, + "step": 16616500 + }, + { + "epoch": 82.33, + "learning_rate": 8.851525690759651e-06, + "loss": 1.8529, + "step": 16617000 + }, + { + "epoch": 82.33, + "learning_rate": 8.850287104333566e-06, + "loss": 1.8269, + "step": 16617500 + }, + { + "epoch": 82.33, + "learning_rate": 8.849048517907483e-06, + "loss": 1.8455, + "step": 16618000 + }, + { + "epoch": 82.33, + "learning_rate": 8.847812408654252e-06, + "loss": 1.8575, + "step": 16618500 + }, + { + "epoch": 82.34, + "learning_rate": 8.846573822228169e-06, + "loss": 1.8539, + "step": 16619000 + }, + { + "epoch": 82.34, + "learning_rate": 8.845335235802084e-06, + "loss": 1.841, + "step": 16619500 + }, + { + "epoch": 82.34, + "learning_rate": 8.844096649376001e-06, + "loss": 1.8509, + "step": 16620000 + }, + { + "epoch": 82.34, + "learning_rate": 8.842860540122768e-06, + "loss": 1.8553, + "step": 16620500 + }, + { + "epoch": 82.35, + "learning_rate": 8.841621953696685e-06, + "loss": 1.8219, + "step": 16621000 + }, + { + "epoch": 82.35, + "learning_rate": 8.840383367270602e-06, + "loss": 1.843, + "step": 16621500 + }, + { + "epoch": 82.35, + "learning_rate": 8.839144780844519e-06, + "loss": 1.8598, + "step": 16622000 + }, + { + "epoch": 82.35, + "learning_rate": 8.837908671591288e-06, + "loss": 1.8239, + "step": 16622500 + }, + { + "epoch": 82.36, + "learning_rate": 8.836670085165203e-06, + "loss": 1.8549, + "step": 16623000 + }, + { + "epoch": 82.36, + "learning_rate": 8.835431498739118e-06, + "loss": 1.8318, + "step": 16623500 + }, + { + "epoch": 82.36, + "learning_rate": 8.834192912313035e-06, + "loss": 1.8529, + "step": 16624000 + }, + { + "epoch": 82.36, + "learning_rate": 8.832954325886952e-06, + "loss": 1.8572, + "step": 16624500 + }, + { + "epoch": 82.37, + "learning_rate": 8.831715739460869e-06, + "loss": 1.852, + "step": 16625000 + }, + { + "epoch": 82.37, + "learning_rate": 8.830477153034784e-06, + "loss": 1.8574, + "step": 16625500 + }, + { + "epoch": 82.37, + "learning_rate": 8.829238566608701e-06, + "loss": 1.8253, + "step": 16626000 + }, + { + "epoch": 82.37, + "learning_rate": 8.827999980182618e-06, + "loss": 1.8441, + "step": 16626500 + }, + { + "epoch": 82.38, + "learning_rate": 8.826761393756535e-06, + "loss": 1.8552, + "step": 16627000 + }, + { + "epoch": 82.38, + "learning_rate": 8.825525284503302e-06, + "loss": 1.821, + "step": 16627500 + }, + { + "epoch": 82.38, + "learning_rate": 8.824286698077219e-06, + "loss": 1.8761, + "step": 16628000 + }, + { + "epoch": 82.38, + "learning_rate": 8.823048111651134e-06, + "loss": 1.8517, + "step": 16628500 + }, + { + "epoch": 82.39, + "learning_rate": 8.821812002397905e-06, + "loss": 1.8314, + "step": 16629000 + }, + { + "epoch": 82.39, + "learning_rate": 8.82057341597182e-06, + "loss": 1.853, + "step": 16629500 + }, + { + "epoch": 82.39, + "learning_rate": 8.819337306718589e-06, + "loss": 1.8534, + "step": 16630000 + }, + { + "epoch": 82.39, + "learning_rate": 8.818101197465358e-06, + "loss": 1.8504, + "step": 16630500 + }, + { + "epoch": 82.4, + "learning_rate": 8.816862611039275e-06, + "loss": 1.8522, + "step": 16631000 + }, + { + "epoch": 82.4, + "learning_rate": 8.81562402461319e-06, + "loss": 1.8578, + "step": 16631500 + }, + { + "epoch": 82.4, + "learning_rate": 8.814385438187107e-06, + "loss": 1.8495, + "step": 16632000 + }, + { + "epoch": 82.4, + "learning_rate": 8.813146851761022e-06, + "loss": 1.8355, + "step": 16632500 + }, + { + "epoch": 82.41, + "learning_rate": 8.811908265334939e-06, + "loss": 1.8334, + "step": 16633000 + }, + { + "epoch": 82.41, + "learning_rate": 8.810669678908854e-06, + "loss": 1.8617, + "step": 16633500 + }, + { + "epoch": 82.41, + "learning_rate": 8.809431092482771e-06, + "loss": 1.859, + "step": 16634000 + }, + { + "epoch": 82.41, + "learning_rate": 8.808192506056688e-06, + "loss": 1.8652, + "step": 16634500 + }, + { + "epoch": 82.42, + "learning_rate": 8.806953919630605e-06, + "loss": 1.8603, + "step": 16635000 + }, + { + "epoch": 82.42, + "learning_rate": 8.805717810377374e-06, + "loss": 1.8312, + "step": 16635500 + }, + { + "epoch": 82.42, + "learning_rate": 8.804481701124141e-06, + "loss": 1.8507, + "step": 16636000 + }, + { + "epoch": 82.42, + "learning_rate": 8.803243114698058e-06, + "loss": 1.8346, + "step": 16636500 + }, + { + "epoch": 82.43, + "learning_rate": 8.802004528271975e-06, + "loss": 1.8207, + "step": 16637000 + }, + { + "epoch": 82.43, + "learning_rate": 8.80076594184589e-06, + "loss": 1.8166, + "step": 16637500 + }, + { + "epoch": 82.43, + "learning_rate": 8.799527355419807e-06, + "loss": 1.8308, + "step": 16638000 + }, + { + "epoch": 82.43, + "learning_rate": 8.798288768993724e-06, + "loss": 1.8244, + "step": 16638500 + }, + { + "epoch": 82.44, + "learning_rate": 8.797050182567639e-06, + "loss": 1.8691, + "step": 16639000 + }, + { + "epoch": 82.44, + "learning_rate": 8.795811596141556e-06, + "loss": 1.8349, + "step": 16639500 + }, + { + "epoch": 82.44, + "learning_rate": 8.794573009715471e-06, + "loss": 1.8336, + "step": 16640000 + }, + { + "epoch": 82.44, + "learning_rate": 8.793334423289388e-06, + "loss": 1.8584, + "step": 16640500 + }, + { + "epoch": 82.45, + "learning_rate": 8.792095836863305e-06, + "loss": 1.855, + "step": 16641000 + }, + { + "epoch": 82.45, + "learning_rate": 8.790859727610074e-06, + "loss": 1.8169, + "step": 16641500 + }, + { + "epoch": 82.45, + "learning_rate": 8.78962114118399e-06, + "loss": 1.8205, + "step": 16642000 + }, + { + "epoch": 82.45, + "learning_rate": 8.788382554757906e-06, + "loss": 1.8477, + "step": 16642500 + }, + { + "epoch": 82.46, + "learning_rate": 8.787143968331821e-06, + "loss": 1.8443, + "step": 16643000 + }, + { + "epoch": 82.46, + "learning_rate": 8.785905381905738e-06, + "loss": 1.8265, + "step": 16643500 + }, + { + "epoch": 82.46, + "learning_rate": 8.784666795479655e-06, + "loss": 1.8425, + "step": 16644000 + }, + { + "epoch": 82.46, + "learning_rate": 8.783428209053572e-06, + "loss": 1.8719, + "step": 16644500 + }, + { + "epoch": 82.47, + "learning_rate": 8.782189622627487e-06, + "loss": 1.8275, + "step": 16645000 + }, + { + "epoch": 82.47, + "learning_rate": 8.780951036201404e-06, + "loss": 1.819, + "step": 16645500 + }, + { + "epoch": 82.47, + "learning_rate": 8.779712449775321e-06, + "loss": 1.8275, + "step": 16646000 + }, + { + "epoch": 82.47, + "learning_rate": 8.778473863349238e-06, + "loss": 1.8475, + "step": 16646500 + }, + { + "epoch": 82.47, + "learning_rate": 8.777235276923153e-06, + "loss": 1.8581, + "step": 16647000 + }, + { + "epoch": 82.48, + "learning_rate": 8.775999167669922e-06, + "loss": 1.8677, + "step": 16647500 + }, + { + "epoch": 82.48, + "learning_rate": 8.774760581243839e-06, + "loss": 1.8408, + "step": 16648000 + }, + { + "epoch": 82.48, + "learning_rate": 8.773521994817754e-06, + "loss": 1.831, + "step": 16648500 + }, + { + "epoch": 82.48, + "learning_rate": 8.772283408391671e-06, + "loss": 1.8516, + "step": 16649000 + }, + { + "epoch": 82.49, + "learning_rate": 8.771044821965588e-06, + "loss": 1.8301, + "step": 16649500 + }, + { + "epoch": 82.49, + "learning_rate": 8.769806235539505e-06, + "loss": 1.8379, + "step": 16650000 + }, + { + "epoch": 82.49, + "learning_rate": 8.76856764911342e-06, + "loss": 1.8381, + "step": 16650500 + }, + { + "epoch": 82.49, + "learning_rate": 8.767331539860189e-06, + "loss": 1.8579, + "step": 16651000 + }, + { + "epoch": 82.5, + "learning_rate": 8.766095430606958e-06, + "loss": 1.834, + "step": 16651500 + }, + { + "epoch": 82.5, + "learning_rate": 8.764856844180875e-06, + "loss": 1.8756, + "step": 16652000 + }, + { + "epoch": 82.5, + "learning_rate": 8.76361825775479e-06, + "loss": 1.8439, + "step": 16652500 + }, + { + "epoch": 82.5, + "learning_rate": 8.762379671328707e-06, + "loss": 1.846, + "step": 16653000 + }, + { + "epoch": 82.51, + "learning_rate": 8.761141084902622e-06, + "loss": 1.8149, + "step": 16653500 + }, + { + "epoch": 82.51, + "learning_rate": 8.75990249847654e-06, + "loss": 1.821, + "step": 16654000 + }, + { + "epoch": 82.51, + "learning_rate": 8.758663912050454e-06, + "loss": 1.8492, + "step": 16654500 + }, + { + "epoch": 82.51, + "learning_rate": 8.757425325624371e-06, + "loss": 1.8487, + "step": 16655000 + }, + { + "epoch": 82.52, + "learning_rate": 8.756186739198288e-06, + "loss": 1.8446, + "step": 16655500 + }, + { + "epoch": 82.52, + "learning_rate": 8.754948152772205e-06, + "loss": 1.8555, + "step": 16656000 + }, + { + "epoch": 82.52, + "learning_rate": 8.753709566346122e-06, + "loss": 1.8509, + "step": 16656500 + }, + { + "epoch": 82.52, + "learning_rate": 8.752470979920037e-06, + "loss": 1.8523, + "step": 16657000 + }, + { + "epoch": 82.53, + "learning_rate": 8.751232393493954e-06, + "loss": 1.8373, + "step": 16657500 + }, + { + "epoch": 82.53, + "learning_rate": 8.74999380706787e-06, + "loss": 1.8418, + "step": 16658000 + }, + { + "epoch": 82.53, + "learning_rate": 8.748755220641786e-06, + "loss": 1.8261, + "step": 16658500 + }, + { + "epoch": 82.53, + "learning_rate": 8.747516634215702e-06, + "loss": 1.8565, + "step": 16659000 + }, + { + "epoch": 82.54, + "learning_rate": 8.746280524962472e-06, + "loss": 1.8394, + "step": 16659500 + }, + { + "epoch": 82.54, + "learning_rate": 8.74504441570924e-06, + "loss": 1.8485, + "step": 16660000 + }, + { + "epoch": 82.54, + "learning_rate": 8.743805829283156e-06, + "loss": 1.8437, + "step": 16660500 + }, + { + "epoch": 82.54, + "learning_rate": 8.742569720029925e-06, + "loss": 1.8781, + "step": 16661000 + }, + { + "epoch": 82.55, + "learning_rate": 8.741331133603842e-06, + "loss": 1.8348, + "step": 16661500 + }, + { + "epoch": 82.55, + "learning_rate": 8.740092547177757e-06, + "loss": 1.8591, + "step": 16662000 + }, + { + "epoch": 82.55, + "learning_rate": 8.738853960751674e-06, + "loss": 1.8469, + "step": 16662500 + }, + { + "epoch": 82.55, + "learning_rate": 8.737615374325591e-06, + "loss": 1.8495, + "step": 16663000 + }, + { + "epoch": 82.56, + "learning_rate": 8.736379265072358e-06, + "loss": 1.8496, + "step": 16663500 + }, + { + "epoch": 82.56, + "learning_rate": 8.735140678646275e-06, + "loss": 1.8593, + "step": 16664000 + }, + { + "epoch": 82.56, + "learning_rate": 8.733902092220192e-06, + "loss": 1.8223, + "step": 16664500 + }, + { + "epoch": 82.56, + "learning_rate": 8.732663505794107e-06, + "loss": 1.8388, + "step": 16665000 + }, + { + "epoch": 82.57, + "learning_rate": 8.731427396540876e-06, + "loss": 1.8474, + "step": 16665500 + }, + { + "epoch": 82.57, + "learning_rate": 8.730188810114793e-06, + "loss": 1.8267, + "step": 16666000 + }, + { + "epoch": 82.57, + "learning_rate": 8.728950223688708e-06, + "loss": 1.8401, + "step": 16666500 + }, + { + "epoch": 82.57, + "learning_rate": 8.727711637262625e-06, + "loss": 1.8425, + "step": 16667000 + }, + { + "epoch": 82.58, + "learning_rate": 8.726473050836542e-06, + "loss": 1.8253, + "step": 16667500 + }, + { + "epoch": 82.58, + "learning_rate": 8.725234464410457e-06, + "loss": 1.849, + "step": 16668000 + }, + { + "epoch": 82.58, + "learning_rate": 8.723995877984374e-06, + "loss": 1.8494, + "step": 16668500 + }, + { + "epoch": 82.58, + "learning_rate": 8.722757291558291e-06, + "loss": 1.8518, + "step": 16669000 + }, + { + "epoch": 82.59, + "learning_rate": 8.721518705132208e-06, + "loss": 1.8521, + "step": 16669500 + }, + { + "epoch": 82.59, + "learning_rate": 8.720282595878975e-06, + "loss": 1.8459, + "step": 16670000 + }, + { + "epoch": 82.59, + "learning_rate": 8.719044009452892e-06, + "loss": 1.8418, + "step": 16670500 + }, + { + "epoch": 82.59, + "learning_rate": 8.717805423026807e-06, + "loss": 1.8345, + "step": 16671000 + }, + { + "epoch": 82.6, + "learning_rate": 8.716566836600724e-06, + "loss": 1.8387, + "step": 16671500 + }, + { + "epoch": 82.6, + "learning_rate": 8.715328250174641e-06, + "loss": 1.8517, + "step": 16672000 + }, + { + "epoch": 82.6, + "learning_rate": 8.714089663748558e-06, + "loss": 1.8486, + "step": 16672500 + }, + { + "epoch": 82.6, + "learning_rate": 8.712851077322475e-06, + "loss": 1.8673, + "step": 16673000 + }, + { + "epoch": 82.61, + "learning_rate": 8.71161249089639e-06, + "loss": 1.853, + "step": 16673500 + }, + { + "epoch": 82.61, + "learning_rate": 8.710378858816011e-06, + "loss": 1.8467, + "step": 16674000 + }, + { + "epoch": 82.61, + "learning_rate": 8.70914274956278e-06, + "loss": 1.8369, + "step": 16674500 + }, + { + "epoch": 82.61, + "learning_rate": 8.707904163136695e-06, + "loss": 1.8448, + "step": 16675000 + }, + { + "epoch": 82.62, + "learning_rate": 8.706665576710612e-06, + "loss": 1.8705, + "step": 16675500 + }, + { + "epoch": 82.62, + "learning_rate": 8.705426990284527e-06, + "loss": 1.8561, + "step": 16676000 + }, + { + "epoch": 82.62, + "learning_rate": 8.704188403858444e-06, + "loss": 1.84, + "step": 16676500 + }, + { + "epoch": 82.62, + "learning_rate": 8.702949817432361e-06, + "loss": 1.8699, + "step": 16677000 + }, + { + "epoch": 82.63, + "learning_rate": 8.701711231006278e-06, + "loss": 1.8739, + "step": 16677500 + }, + { + "epoch": 82.63, + "learning_rate": 8.700472644580195e-06, + "loss": 1.8448, + "step": 16678000 + }, + { + "epoch": 82.63, + "learning_rate": 8.69923405815411e-06, + "loss": 1.8449, + "step": 16678500 + }, + { + "epoch": 82.63, + "learning_rate": 8.697997948900879e-06, + "loss": 1.8347, + "step": 16679000 + }, + { + "epoch": 82.64, + "learning_rate": 8.696759362474794e-06, + "loss": 1.8514, + "step": 16679500 + }, + { + "epoch": 82.64, + "learning_rate": 8.695523253221563e-06, + "loss": 1.8566, + "step": 16680000 + }, + { + "epoch": 82.64, + "learning_rate": 8.69428466679548e-06, + "loss": 1.8611, + "step": 16680500 + }, + { + "epoch": 82.64, + "learning_rate": 8.693046080369397e-06, + "loss": 1.8434, + "step": 16681000 + }, + { + "epoch": 82.65, + "learning_rate": 8.691807493943314e-06, + "loss": 1.8486, + "step": 16681500 + }, + { + "epoch": 82.65, + "learning_rate": 8.690568907517229e-06, + "loss": 1.8505, + "step": 16682000 + }, + { + "epoch": 82.65, + "learning_rate": 8.689330321091144e-06, + "loss": 1.8748, + "step": 16682500 + }, + { + "epoch": 82.65, + "learning_rate": 8.688091734665061e-06, + "loss": 1.848, + "step": 16683000 + }, + { + "epoch": 82.66, + "learning_rate": 8.686853148238978e-06, + "loss": 1.8582, + "step": 16683500 + }, + { + "epoch": 82.66, + "learning_rate": 8.685614561812895e-06, + "loss": 1.827, + "step": 16684000 + }, + { + "epoch": 82.66, + "learning_rate": 8.68437597538681e-06, + "loss": 1.8413, + "step": 16684500 + }, + { + "epoch": 82.66, + "learning_rate": 8.68313986613358e-06, + "loss": 1.8242, + "step": 16685000 + }, + { + "epoch": 82.67, + "learning_rate": 8.681903756880348e-06, + "loss": 1.8598, + "step": 16685500 + }, + { + "epoch": 82.67, + "learning_rate": 8.680665170454265e-06, + "loss": 1.8236, + "step": 16686000 + }, + { + "epoch": 82.67, + "learning_rate": 8.67942658402818e-06, + "loss": 1.8421, + "step": 16686500 + }, + { + "epoch": 82.67, + "learning_rate": 8.678187997602097e-06, + "loss": 1.8623, + "step": 16687000 + }, + { + "epoch": 82.68, + "learning_rate": 8.676949411176014e-06, + "loss": 1.8569, + "step": 16687500 + }, + { + "epoch": 82.68, + "learning_rate": 8.675713301922781e-06, + "loss": 1.8482, + "step": 16688000 + }, + { + "epoch": 82.68, + "learning_rate": 8.674474715496698e-06, + "loss": 1.8401, + "step": 16688500 + }, + { + "epoch": 82.68, + "learning_rate": 8.673236129070615e-06, + "loss": 1.8347, + "step": 16689000 + }, + { + "epoch": 82.69, + "learning_rate": 8.67199754264453e-06, + "loss": 1.8505, + "step": 16689500 + }, + { + "epoch": 82.69, + "learning_rate": 8.670758956218447e-06, + "loss": 1.8225, + "step": 16690000 + }, + { + "epoch": 82.69, + "learning_rate": 8.669520369792364e-06, + "loss": 1.8477, + "step": 16690500 + }, + { + "epoch": 82.69, + "learning_rate": 8.668281783366281e-06, + "loss": 1.8289, + "step": 16691000 + }, + { + "epoch": 82.7, + "learning_rate": 8.667043196940198e-06, + "loss": 1.8648, + "step": 16691500 + }, + { + "epoch": 82.7, + "learning_rate": 8.665804610514113e-06, + "loss": 1.8546, + "step": 16692000 + }, + { + "epoch": 82.7, + "learning_rate": 8.66456850126088e-06, + "loss": 1.8621, + "step": 16692500 + }, + { + "epoch": 82.7, + "learning_rate": 8.66333239200765e-06, + "loss": 1.8425, + "step": 16693000 + }, + { + "epoch": 82.71, + "learning_rate": 8.662093805581566e-06, + "loss": 1.8612, + "step": 16693500 + }, + { + "epoch": 82.71, + "learning_rate": 8.660855219155483e-06, + "loss": 1.8241, + "step": 16694000 + }, + { + "epoch": 82.71, + "learning_rate": 8.6596166327294e-06, + "loss": 1.8428, + "step": 16694500 + }, + { + "epoch": 82.71, + "learning_rate": 8.658378046303315e-06, + "loss": 1.8414, + "step": 16695000 + }, + { + "epoch": 82.72, + "learning_rate": 8.657139459877232e-06, + "loss": 1.8509, + "step": 16695500 + }, + { + "epoch": 82.72, + "learning_rate": 8.655903350624e-06, + "loss": 1.8526, + "step": 16696000 + }, + { + "epoch": 82.72, + "learning_rate": 8.654664764197918e-06, + "loss": 1.8545, + "step": 16696500 + }, + { + "epoch": 82.72, + "learning_rate": 8.653426177771833e-06, + "loss": 1.8641, + "step": 16697000 + }, + { + "epoch": 82.73, + "learning_rate": 8.65218759134575e-06, + "loss": 1.8423, + "step": 16697500 + }, + { + "epoch": 82.73, + "learning_rate": 8.650949004919665e-06, + "loss": 1.8456, + "step": 16698000 + }, + { + "epoch": 82.73, + "learning_rate": 8.649710418493582e-06, + "loss": 1.8355, + "step": 16698500 + }, + { + "epoch": 82.73, + "learning_rate": 8.648471832067497e-06, + "loss": 1.8611, + "step": 16699000 + }, + { + "epoch": 82.74, + "learning_rate": 8.647233245641414e-06, + "loss": 1.8534, + "step": 16699500 + }, + { + "epoch": 82.74, + "learning_rate": 8.645997136388183e-06, + "loss": 1.8471, + "step": 16700000 + }, + { + "epoch": 82.74, + "learning_rate": 8.644761027134952e-06, + "loss": 1.8676, + "step": 16700500 + }, + { + "epoch": 82.74, + "learning_rate": 8.643522440708867e-06, + "loss": 1.8479, + "step": 16701000 + }, + { + "epoch": 82.75, + "learning_rate": 8.642283854282784e-06, + "loss": 1.859, + "step": 16701500 + }, + { + "epoch": 82.75, + "learning_rate": 8.641045267856701e-06, + "loss": 1.8395, + "step": 16702000 + }, + { + "epoch": 82.75, + "learning_rate": 8.639806681430618e-06, + "loss": 1.8554, + "step": 16702500 + }, + { + "epoch": 82.75, + "learning_rate": 8.638568095004533e-06, + "loss": 1.8257, + "step": 16703000 + }, + { + "epoch": 82.75, + "learning_rate": 8.63732950857845e-06, + "loss": 1.8565, + "step": 16703500 + }, + { + "epoch": 82.76, + "learning_rate": 8.636090922152367e-06, + "loss": 1.8654, + "step": 16704000 + }, + { + "epoch": 82.76, + "learning_rate": 8.634852335726284e-06, + "loss": 1.8496, + "step": 16704500 + }, + { + "epoch": 82.76, + "learning_rate": 8.633613749300199e-06, + "loss": 1.8481, + "step": 16705000 + }, + { + "epoch": 82.76, + "learning_rate": 8.632375162874114e-06, + "loss": 1.8745, + "step": 16705500 + }, + { + "epoch": 82.77, + "learning_rate": 8.631139053620883e-06, + "loss": 1.8399, + "step": 16706000 + }, + { + "epoch": 82.77, + "learning_rate": 8.6299004671948e-06, + "loss": 1.8657, + "step": 16706500 + }, + { + "epoch": 82.77, + "learning_rate": 8.628661880768717e-06, + "loss": 1.8385, + "step": 16707000 + }, + { + "epoch": 82.77, + "learning_rate": 8.627425771515484e-06, + "loss": 1.8528, + "step": 16707500 + }, + { + "epoch": 82.78, + "learning_rate": 8.626187185089401e-06, + "loss": 1.8448, + "step": 16708000 + }, + { + "epoch": 82.78, + "learning_rate": 8.624948598663318e-06, + "loss": 1.8479, + "step": 16708500 + }, + { + "epoch": 82.78, + "learning_rate": 8.623710012237235e-06, + "loss": 1.8464, + "step": 16709000 + }, + { + "epoch": 82.78, + "learning_rate": 8.62247142581115e-06, + "loss": 1.8539, + "step": 16709500 + }, + { + "epoch": 82.79, + "learning_rate": 8.62123531655792e-06, + "loss": 1.8528, + "step": 16710000 + }, + { + "epoch": 82.79, + "learning_rate": 8.619996730131836e-06, + "loss": 1.8603, + "step": 16710500 + }, + { + "epoch": 82.79, + "learning_rate": 8.618758143705751e-06, + "loss": 1.8632, + "step": 16711000 + }, + { + "epoch": 82.79, + "learning_rate": 8.617519557279668e-06, + "loss": 1.8593, + "step": 16711500 + }, + { + "epoch": 82.8, + "learning_rate": 8.616280970853585e-06, + "loss": 1.8527, + "step": 16712000 + }, + { + "epoch": 82.8, + "learning_rate": 8.6150423844275e-06, + "loss": 1.861, + "step": 16712500 + }, + { + "epoch": 82.8, + "learning_rate": 8.613803798001417e-06, + "loss": 1.8555, + "step": 16713000 + }, + { + "epoch": 82.8, + "learning_rate": 8.612565211575334e-06, + "loss": 1.8448, + "step": 16713500 + }, + { + "epoch": 82.81, + "learning_rate": 8.611326625149251e-06, + "loss": 1.8488, + "step": 16714000 + }, + { + "epoch": 82.81, + "learning_rate": 8.610088038723166e-06, + "loss": 1.8516, + "step": 16714500 + }, + { + "epoch": 82.81, + "learning_rate": 8.608849452297083e-06, + "loss": 1.8199, + "step": 16715000 + }, + { + "epoch": 82.81, + "learning_rate": 8.607610865870998e-06, + "loss": 1.871, + "step": 16715500 + }, + { + "epoch": 82.82, + "learning_rate": 8.606372279444915e-06, + "loss": 1.8239, + "step": 16716000 + }, + { + "epoch": 82.82, + "learning_rate": 8.605136170191684e-06, + "loss": 1.8645, + "step": 16716500 + }, + { + "epoch": 82.82, + "learning_rate": 8.603897583765601e-06, + "loss": 1.857, + "step": 16717000 + }, + { + "epoch": 82.82, + "learning_rate": 8.602658997339516e-06, + "loss": 1.8583, + "step": 16717500 + }, + { + "epoch": 82.83, + "learning_rate": 8.601420410913433e-06, + "loss": 1.8636, + "step": 16718000 + }, + { + "epoch": 82.83, + "learning_rate": 8.60018182448735e-06, + "loss": 1.8488, + "step": 16718500 + }, + { + "epoch": 82.83, + "learning_rate": 8.598943238061265e-06, + "loss": 1.8395, + "step": 16719000 + }, + { + "epoch": 82.83, + "learning_rate": 8.597704651635182e-06, + "loss": 1.8761, + "step": 16719500 + }, + { + "epoch": 82.84, + "learning_rate": 8.596466065209098e-06, + "loss": 1.8222, + "step": 16720000 + }, + { + "epoch": 82.84, + "learning_rate": 8.595229955955868e-06, + "loss": 1.8598, + "step": 16720500 + }, + { + "epoch": 82.84, + "learning_rate": 8.593991369529783e-06, + "loss": 1.8488, + "step": 16721000 + }, + { + "epoch": 82.84, + "learning_rate": 8.592755260276552e-06, + "loss": 1.8285, + "step": 16721500 + }, + { + "epoch": 82.85, + "learning_rate": 8.591516673850467e-06, + "loss": 1.8316, + "step": 16722000 + }, + { + "epoch": 82.85, + "learning_rate": 8.590278087424384e-06, + "loss": 1.8113, + "step": 16722500 + }, + { + "epoch": 82.85, + "learning_rate": 8.589039500998301e-06, + "loss": 1.8356, + "step": 16723000 + }, + { + "epoch": 82.85, + "learning_rate": 8.587800914572218e-06, + "loss": 1.8306, + "step": 16723500 + }, + { + "epoch": 82.86, + "learning_rate": 8.586562328146133e-06, + "loss": 1.848, + "step": 16724000 + }, + { + "epoch": 82.86, + "learning_rate": 8.585326218892902e-06, + "loss": 1.8299, + "step": 16724500 + }, + { + "epoch": 82.86, + "learning_rate": 8.584087632466817e-06, + "loss": 1.8791, + "step": 16725000 + }, + { + "epoch": 82.86, + "learning_rate": 8.582849046040734e-06, + "loss": 1.852, + "step": 16725500 + }, + { + "epoch": 82.87, + "learning_rate": 8.581610459614651e-06, + "loss": 1.8298, + "step": 16726000 + }, + { + "epoch": 82.87, + "learning_rate": 8.58037435036142e-06, + "loss": 1.8341, + "step": 16726500 + }, + { + "epoch": 82.87, + "learning_rate": 8.579135763935337e-06, + "loss": 1.8457, + "step": 16727000 + }, + { + "epoch": 82.87, + "learning_rate": 8.577897177509254e-06, + "loss": 1.8674, + "step": 16727500 + }, + { + "epoch": 82.88, + "learning_rate": 8.576658591083169e-06, + "loss": 1.8436, + "step": 16728000 + }, + { + "epoch": 82.88, + "learning_rate": 8.575420004657084e-06, + "loss": 1.8449, + "step": 16728500 + }, + { + "epoch": 82.88, + "learning_rate": 8.574181418231001e-06, + "loss": 1.8591, + "step": 16729000 + }, + { + "epoch": 82.88, + "learning_rate": 8.572942831804918e-06, + "loss": 1.8457, + "step": 16729500 + }, + { + "epoch": 82.89, + "learning_rate": 8.571704245378833e-06, + "loss": 1.8603, + "step": 16730000 + }, + { + "epoch": 82.89, + "learning_rate": 8.57046565895275e-06, + "loss": 1.8525, + "step": 16730500 + }, + { + "epoch": 82.89, + "learning_rate": 8.569227072526667e-06, + "loss": 1.8592, + "step": 16731000 + }, + { + "epoch": 82.89, + "learning_rate": 8.567990963273436e-06, + "loss": 1.87, + "step": 16731500 + }, + { + "epoch": 82.9, + "learning_rate": 8.566752376847351e-06, + "loss": 1.8415, + "step": 16732000 + }, + { + "epoch": 82.9, + "learning_rate": 8.565513790421268e-06, + "loss": 1.8498, + "step": 16732500 + }, + { + "epoch": 82.9, + "learning_rate": 8.564275203995185e-06, + "loss": 1.8351, + "step": 16733000 + }, + { + "epoch": 82.9, + "learning_rate": 8.5630366175691e-06, + "loss": 1.881, + "step": 16733500 + }, + { + "epoch": 82.91, + "learning_rate": 8.561798031143017e-06, + "loss": 1.8498, + "step": 16734000 + }, + { + "epoch": 82.91, + "learning_rate": 8.560559444716934e-06, + "loss": 1.8495, + "step": 16734500 + }, + { + "epoch": 82.91, + "learning_rate": 8.559320858290851e-06, + "loss": 1.8615, + "step": 16735000 + }, + { + "epoch": 82.91, + "learning_rate": 8.558082271864766e-06, + "loss": 1.8476, + "step": 16735500 + }, + { + "epoch": 82.92, + "learning_rate": 8.556846162611535e-06, + "loss": 1.8632, + "step": 16736000 + }, + { + "epoch": 82.92, + "learning_rate": 8.55560757618545e-06, + "loss": 1.8395, + "step": 16736500 + }, + { + "epoch": 82.92, + "learning_rate": 8.554371466932221e-06, + "loss": 1.8445, + "step": 16737000 + }, + { + "epoch": 82.92, + "learning_rate": 8.553135357678988e-06, + "loss": 1.8539, + "step": 16737500 + }, + { + "epoch": 82.93, + "learning_rate": 8.551896771252905e-06, + "loss": 1.8423, + "step": 16738000 + }, + { + "epoch": 82.93, + "learning_rate": 8.55065818482682e-06, + "loss": 1.8353, + "step": 16738500 + }, + { + "epoch": 82.93, + "learning_rate": 8.549419598400737e-06, + "loss": 1.8535, + "step": 16739000 + }, + { + "epoch": 82.93, + "learning_rate": 8.548181011974654e-06, + "loss": 1.836, + "step": 16739500 + }, + { + "epoch": 82.94, + "learning_rate": 8.546942425548571e-06, + "loss": 1.8393, + "step": 16740000 + }, + { + "epoch": 82.94, + "learning_rate": 8.54570631629534e-06, + "loss": 1.8439, + "step": 16740500 + }, + { + "epoch": 82.94, + "learning_rate": 8.544467729869255e-06, + "loss": 1.8277, + "step": 16741000 + }, + { + "epoch": 82.94, + "learning_rate": 8.54322914344317e-06, + "loss": 1.8348, + "step": 16741500 + }, + { + "epoch": 82.95, + "learning_rate": 8.541990557017087e-06, + "loss": 1.8787, + "step": 16742000 + }, + { + "epoch": 82.95, + "learning_rate": 8.540751970591004e-06, + "loss": 1.8728, + "step": 16742500 + }, + { + "epoch": 82.95, + "learning_rate": 8.539513384164921e-06, + "loss": 1.8555, + "step": 16743000 + }, + { + "epoch": 82.95, + "learning_rate": 8.53827727491169e-06, + "loss": 1.8468, + "step": 16743500 + }, + { + "epoch": 82.96, + "learning_rate": 8.537041165658457e-06, + "loss": 1.8606, + "step": 16744000 + }, + { + "epoch": 82.96, + "learning_rate": 8.535802579232374e-06, + "loss": 1.8442, + "step": 16744500 + }, + { + "epoch": 82.96, + "learning_rate": 8.534563992806291e-06, + "loss": 1.852, + "step": 16745000 + }, + { + "epoch": 82.96, + "learning_rate": 8.533325406380206e-06, + "loss": 1.8583, + "step": 16745500 + }, + { + "epoch": 82.97, + "learning_rate": 8.532086819954123e-06, + "loss": 1.8112, + "step": 16746000 + }, + { + "epoch": 82.97, + "learning_rate": 8.53084823352804e-06, + "loss": 1.8681, + "step": 16746500 + }, + { + "epoch": 82.97, + "learning_rate": 8.529612124274807e-06, + "loss": 1.8655, + "step": 16747000 + }, + { + "epoch": 82.97, + "learning_rate": 8.528373537848724e-06, + "loss": 1.8341, + "step": 16747500 + }, + { + "epoch": 82.98, + "learning_rate": 8.527137428595493e-06, + "loss": 1.8588, + "step": 16748000 + }, + { + "epoch": 82.98, + "learning_rate": 8.52589884216941e-06, + "loss": 1.8441, + "step": 16748500 + }, + { + "epoch": 82.98, + "learning_rate": 8.524660255743327e-06, + "loss": 1.8379, + "step": 16749000 + }, + { + "epoch": 82.98, + "learning_rate": 8.523424146490094e-06, + "loss": 1.8512, + "step": 16749500 + }, + { + "epoch": 82.99, + "learning_rate": 8.52218556006401e-06, + "loss": 1.867, + "step": 16750000 + }, + { + "epoch": 82.99, + "learning_rate": 8.520946973637926e-06, + "loss": 1.8567, + "step": 16750500 + }, + { + "epoch": 82.99, + "learning_rate": 8.519708387211843e-06, + "loss": 1.8348, + "step": 16751000 + }, + { + "epoch": 82.99, + "learning_rate": 8.51846980078576e-06, + "loss": 1.8436, + "step": 16751500 + }, + { + "epoch": 83.0, + "learning_rate": 8.517231214359677e-06, + "loss": 1.867, + "step": 16752000 + }, + { + "epoch": 83.0, + "learning_rate": 8.515992627933594e-06, + "loss": 1.8462, + "step": 16752500 + }, + { + "epoch": 83.0, + "eval_accuracy": 0.6826751215643115, + "eval_accuracy_mlm": 0.6434286902286902, + "eval_accuracy_nsp": 0.8677787408955949, + "eval_loss": 2.30057954788208, + "eval_runtime": 146.6226, + "eval_samples_per_second": 1738.879, + "eval_steps_per_second": 72.458, + "step": 16752969 + }, + { + "epoch": 83.0, + "learning_rate": 8.514754041507509e-06, + "loss": 1.8418, + "step": 16753000 + }, + { + "epoch": 83.0, + "learning_rate": 8.513515455081426e-06, + "loss": 1.8145, + "step": 16753500 + }, + { + "epoch": 83.01, + "learning_rate": 8.512276868655341e-06, + "loss": 1.844, + "step": 16754000 + }, + { + "epoch": 83.01, + "learning_rate": 8.511038282229258e-06, + "loss": 1.8424, + "step": 16754500 + }, + { + "epoch": 83.01, + "learning_rate": 8.509802172976027e-06, + "loss": 1.8356, + "step": 16755000 + }, + { + "epoch": 83.01, + "learning_rate": 8.508563586549944e-06, + "loss": 1.8465, + "step": 16755500 + }, + { + "epoch": 83.02, + "learning_rate": 8.507325000123859e-06, + "loss": 1.8213, + "step": 16756000 + }, + { + "epoch": 83.02, + "learning_rate": 8.506086413697776e-06, + "loss": 1.8547, + "step": 16756500 + }, + { + "epoch": 83.02, + "learning_rate": 8.504847827271691e-06, + "loss": 1.8493, + "step": 16757000 + }, + { + "epoch": 83.02, + "learning_rate": 8.503609240845608e-06, + "loss": 1.8501, + "step": 16757500 + }, + { + "epoch": 83.02, + "learning_rate": 8.502370654419523e-06, + "loss": 1.8442, + "step": 16758000 + }, + { + "epoch": 83.03, + "learning_rate": 8.501134545166294e-06, + "loss": 1.8341, + "step": 16758500 + }, + { + "epoch": 83.03, + "learning_rate": 8.499898435913063e-06, + "loss": 1.8506, + "step": 16759000 + }, + { + "epoch": 83.03, + "learning_rate": 8.498659849486978e-06, + "loss": 1.8367, + "step": 16759500 + }, + { + "epoch": 83.03, + "learning_rate": 8.497421263060893e-06, + "loss": 1.8336, + "step": 16760000 + }, + { + "epoch": 83.04, + "learning_rate": 8.49618267663481e-06, + "loss": 1.8266, + "step": 16760500 + }, + { + "epoch": 83.04, + "learning_rate": 8.494944090208727e-06, + "loss": 1.8323, + "step": 16761000 + }, + { + "epoch": 83.04, + "learning_rate": 8.493705503782644e-06, + "loss": 1.8396, + "step": 16761500 + }, + { + "epoch": 83.04, + "learning_rate": 8.492466917356559e-06, + "loss": 1.8538, + "step": 16762000 + }, + { + "epoch": 83.05, + "learning_rate": 8.491228330930476e-06, + "loss": 1.8569, + "step": 16762500 + }, + { + "epoch": 83.05, + "learning_rate": 8.489989744504393e-06, + "loss": 1.8235, + "step": 16763000 + }, + { + "epoch": 83.05, + "learning_rate": 8.48875115807831e-06, + "loss": 1.8255, + "step": 16763500 + }, + { + "epoch": 83.05, + "learning_rate": 8.487512571652225e-06, + "loss": 1.8203, + "step": 16764000 + }, + { + "epoch": 83.06, + "learning_rate": 8.48627398522614e-06, + "loss": 1.8374, + "step": 16764500 + }, + { + "epoch": 83.06, + "learning_rate": 8.485035398800057e-06, + "loss": 1.8128, + "step": 16765000 + }, + { + "epoch": 83.06, + "learning_rate": 8.483796812373974e-06, + "loss": 1.8524, + "step": 16765500 + }, + { + "epoch": 83.06, + "learning_rate": 8.482558225947891e-06, + "loss": 1.8449, + "step": 16766000 + }, + { + "epoch": 83.07, + "learning_rate": 8.48132211669466e-06, + "loss": 1.842, + "step": 16766500 + }, + { + "epoch": 83.07, + "learning_rate": 8.480083530268577e-06, + "loss": 1.8491, + "step": 16767000 + }, + { + "epoch": 83.07, + "learning_rate": 8.478844943842492e-06, + "loss": 1.8189, + "step": 16767500 + }, + { + "epoch": 83.07, + "learning_rate": 8.477608834589261e-06, + "loss": 1.8566, + "step": 16768000 + }, + { + "epoch": 83.08, + "learning_rate": 8.476370248163176e-06, + "loss": 1.8596, + "step": 16768500 + }, + { + "epoch": 83.08, + "learning_rate": 8.475131661737093e-06, + "loss": 1.8333, + "step": 16769000 + }, + { + "epoch": 83.08, + "learning_rate": 8.47389307531101e-06, + "loss": 1.8331, + "step": 16769500 + }, + { + "epoch": 83.08, + "learning_rate": 8.472654488884927e-06, + "loss": 1.8459, + "step": 16770000 + }, + { + "epoch": 83.09, + "learning_rate": 8.471415902458842e-06, + "loss": 1.8549, + "step": 16770500 + }, + { + "epoch": 83.09, + "learning_rate": 8.470177316032759e-06, + "loss": 1.8225, + "step": 16771000 + }, + { + "epoch": 83.09, + "learning_rate": 8.468938729606674e-06, + "loss": 1.8608, + "step": 16771500 + }, + { + "epoch": 83.09, + "learning_rate": 8.467700143180591e-06, + "loss": 1.8353, + "step": 16772000 + }, + { + "epoch": 83.1, + "learning_rate": 8.466461556754508e-06, + "loss": 1.8381, + "step": 16772500 + }, + { + "epoch": 83.1, + "learning_rate": 8.465225447501277e-06, + "loss": 1.8474, + "step": 16773000 + }, + { + "epoch": 83.1, + "learning_rate": 8.463986861075192e-06, + "loss": 1.8376, + "step": 16773500 + }, + { + "epoch": 83.1, + "learning_rate": 8.46274827464911e-06, + "loss": 1.8248, + "step": 16774000 + }, + { + "epoch": 83.11, + "learning_rate": 8.461509688223024e-06, + "loss": 1.8438, + "step": 16774500 + }, + { + "epoch": 83.11, + "learning_rate": 8.460271101796941e-06, + "loss": 1.8448, + "step": 16775000 + }, + { + "epoch": 83.11, + "learning_rate": 8.459032515370858e-06, + "loss": 1.8512, + "step": 16775500 + }, + { + "epoch": 83.11, + "learning_rate": 8.457793928944773e-06, + "loss": 1.8285, + "step": 16776000 + }, + { + "epoch": 83.12, + "learning_rate": 8.45655534251869e-06, + "loss": 1.8258, + "step": 16776500 + }, + { + "epoch": 83.12, + "learning_rate": 8.455316756092607e-06, + "loss": 1.8485, + "step": 16777000 + }, + { + "epoch": 83.12, + "learning_rate": 8.454078169666524e-06, + "loss": 1.8516, + "step": 16777500 + }, + { + "epoch": 83.12, + "learning_rate": 8.452842060413291e-06, + "loss": 1.8299, + "step": 16778000 + }, + { + "epoch": 83.13, + "learning_rate": 8.451603473987208e-06, + "loss": 1.8275, + "step": 16778500 + }, + { + "epoch": 83.13, + "learning_rate": 8.450364887561124e-06, + "loss": 1.8226, + "step": 16779000 + }, + { + "epoch": 83.13, + "learning_rate": 8.44912630113504e-06, + "loss": 1.8531, + "step": 16779500 + }, + { + "epoch": 83.13, + "learning_rate": 8.447887714708957e-06, + "loss": 1.8372, + "step": 16780000 + }, + { + "epoch": 83.14, + "learning_rate": 8.446649128282874e-06, + "loss": 1.8598, + "step": 16780500 + }, + { + "epoch": 83.14, + "learning_rate": 8.44541054185679e-06, + "loss": 1.8383, + "step": 16781000 + }, + { + "epoch": 83.14, + "learning_rate": 8.444171955430707e-06, + "loss": 1.8365, + "step": 16781500 + }, + { + "epoch": 83.14, + "learning_rate": 8.442935846177474e-06, + "loss": 1.8314, + "step": 16782000 + }, + { + "epoch": 83.15, + "learning_rate": 8.44169725975139e-06, + "loss": 1.8377, + "step": 16782500 + }, + { + "epoch": 83.15, + "learning_rate": 8.440458673325307e-06, + "loss": 1.8382, + "step": 16783000 + }, + { + "epoch": 83.15, + "learning_rate": 8.439220086899224e-06, + "loss": 1.8299, + "step": 16783500 + }, + { + "epoch": 83.15, + "learning_rate": 8.437981500473141e-06, + "loss": 1.823, + "step": 16784000 + }, + { + "epoch": 83.16, + "learning_rate": 8.43674539121991e-06, + "loss": 1.8485, + "step": 16784500 + }, + { + "epoch": 83.16, + "learning_rate": 8.435509281966677e-06, + "loss": 1.8517, + "step": 16785000 + }, + { + "epoch": 83.16, + "learning_rate": 8.434270695540594e-06, + "loss": 1.8268, + "step": 16785500 + }, + { + "epoch": 83.16, + "learning_rate": 8.43303210911451e-06, + "loss": 1.8334, + "step": 16786000 + }, + { + "epoch": 83.17, + "learning_rate": 8.431793522688426e-06, + "loss": 1.8439, + "step": 16786500 + }, + { + "epoch": 83.17, + "learning_rate": 8.430554936262343e-06, + "loss": 1.8156, + "step": 16787000 + }, + { + "epoch": 83.17, + "learning_rate": 8.42931634983626e-06, + "loss": 1.8533, + "step": 16787500 + }, + { + "epoch": 83.17, + "learning_rate": 8.428077763410175e-06, + "loss": 1.8391, + "step": 16788000 + }, + { + "epoch": 83.18, + "learning_rate": 8.426839176984092e-06, + "loss": 1.84, + "step": 16788500 + }, + { + "epoch": 83.18, + "learning_rate": 8.425600590558008e-06, + "loss": 1.8414, + "step": 16789000 + }, + { + "epoch": 83.18, + "learning_rate": 8.424362004131925e-06, + "loss": 1.8744, + "step": 16789500 + }, + { + "epoch": 83.18, + "learning_rate": 8.423123417705841e-06, + "loss": 1.8423, + "step": 16790000 + }, + { + "epoch": 83.19, + "learning_rate": 8.421884831279757e-06, + "loss": 1.8338, + "step": 16790500 + }, + { + "epoch": 83.19, + "learning_rate": 8.420651199199377e-06, + "loss": 1.8449, + "step": 16791000 + }, + { + "epoch": 83.19, + "learning_rate": 8.419412612773294e-06, + "loss": 1.8644, + "step": 16791500 + }, + { + "epoch": 83.19, + "learning_rate": 8.418174026347211e-06, + "loss": 1.8664, + "step": 16792000 + }, + { + "epoch": 83.2, + "learning_rate": 8.416935439921126e-06, + "loss": 1.8374, + "step": 16792500 + }, + { + "epoch": 83.2, + "learning_rate": 8.415696853495043e-06, + "loss": 1.8554, + "step": 16793000 + }, + { + "epoch": 83.2, + "learning_rate": 8.41445826706896e-06, + "loss": 1.8332, + "step": 16793500 + }, + { + "epoch": 83.2, + "learning_rate": 8.413219680642877e-06, + "loss": 1.8302, + "step": 16794000 + }, + { + "epoch": 83.21, + "learning_rate": 8.411981094216792e-06, + "loss": 1.8371, + "step": 16794500 + }, + { + "epoch": 83.21, + "learning_rate": 8.41074250779071e-06, + "loss": 1.8372, + "step": 16795000 + }, + { + "epoch": 83.21, + "learning_rate": 8.409503921364625e-06, + "loss": 1.8487, + "step": 16795500 + }, + { + "epoch": 83.21, + "learning_rate": 8.408267812111393e-06, + "loss": 1.8346, + "step": 16796000 + }, + { + "epoch": 83.22, + "learning_rate": 8.40702922568531e-06, + "loss": 1.8162, + "step": 16796500 + }, + { + "epoch": 83.22, + "learning_rate": 8.40579311643208e-06, + "loss": 1.8418, + "step": 16797000 + }, + { + "epoch": 83.22, + "learning_rate": 8.404554530005994e-06, + "loss": 1.8236, + "step": 16797500 + }, + { + "epoch": 83.22, + "learning_rate": 8.403315943579911e-06, + "loss": 1.8636, + "step": 16798000 + }, + { + "epoch": 83.23, + "learning_rate": 8.402077357153827e-06, + "loss": 1.827, + "step": 16798500 + }, + { + "epoch": 83.23, + "learning_rate": 8.400838770727744e-06, + "loss": 1.84, + "step": 16799000 + }, + { + "epoch": 83.23, + "learning_rate": 8.39960018430166e-06, + "loss": 1.8334, + "step": 16799500 + }, + { + "epoch": 83.23, + "learning_rate": 8.398361597875577e-06, + "loss": 1.8479, + "step": 16800000 + }, + { + "epoch": 83.24, + "learning_rate": 8.397125488622346e-06, + "loss": 1.8316, + "step": 16800500 + }, + { + "epoch": 83.24, + "learning_rate": 8.395886902196261e-06, + "loss": 1.8339, + "step": 16801000 + }, + { + "epoch": 83.24, + "learning_rate": 8.394648315770178e-06, + "loss": 1.829, + "step": 16801500 + }, + { + "epoch": 83.24, + "learning_rate": 8.393409729344094e-06, + "loss": 1.8458, + "step": 16802000 + }, + { + "epoch": 83.25, + "learning_rate": 8.39217114291801e-06, + "loss": 1.8505, + "step": 16802500 + }, + { + "epoch": 83.25, + "learning_rate": 8.390932556491927e-06, + "loss": 1.8375, + "step": 16803000 + }, + { + "epoch": 83.25, + "learning_rate": 8.389693970065844e-06, + "loss": 1.8523, + "step": 16803500 + }, + { + "epoch": 83.25, + "learning_rate": 8.38845538363976e-06, + "loss": 1.8611, + "step": 16804000 + }, + { + "epoch": 83.26, + "learning_rate": 8.387216797213677e-06, + "loss": 1.8307, + "step": 16804500 + }, + { + "epoch": 83.26, + "learning_rate": 8.385980687960444e-06, + "loss": 1.8601, + "step": 16805000 + }, + { + "epoch": 83.26, + "learning_rate": 8.384744578707214e-06, + "loss": 1.8494, + "step": 16805500 + }, + { + "epoch": 83.26, + "learning_rate": 8.38350599228113e-06, + "loss": 1.823, + "step": 16806000 + }, + { + "epoch": 83.27, + "learning_rate": 8.382269883027898e-06, + "loss": 1.8268, + "step": 16806500 + }, + { + "epoch": 83.27, + "learning_rate": 8.381031296601813e-06, + "loss": 1.8533, + "step": 16807000 + }, + { + "epoch": 83.27, + "learning_rate": 8.37979271017573e-06, + "loss": 1.8536, + "step": 16807500 + }, + { + "epoch": 83.27, + "learning_rate": 8.378554123749647e-06, + "loss": 1.8299, + "step": 16808000 + }, + { + "epoch": 83.28, + "learning_rate": 8.377315537323564e-06, + "loss": 1.8519, + "step": 16808500 + }, + { + "epoch": 83.28, + "learning_rate": 8.37607695089748e-06, + "loss": 1.8439, + "step": 16809000 + }, + { + "epoch": 83.28, + "learning_rate": 8.374838364471396e-06, + "loss": 1.8432, + "step": 16809500 + }, + { + "epoch": 83.28, + "learning_rate": 8.373599778045313e-06, + "loss": 1.8429, + "step": 16810000 + }, + { + "epoch": 83.29, + "learning_rate": 8.37236119161923e-06, + "loss": 1.8459, + "step": 16810500 + }, + { + "epoch": 83.29, + "learning_rate": 8.371122605193145e-06, + "loss": 1.8344, + "step": 16811000 + }, + { + "epoch": 83.29, + "learning_rate": 8.369884018767062e-06, + "loss": 1.8315, + "step": 16811500 + }, + { + "epoch": 83.29, + "learning_rate": 8.368645432340978e-06, + "loss": 1.8333, + "step": 16812000 + }, + { + "epoch": 83.29, + "learning_rate": 8.367409323087746e-06, + "loss": 1.8142, + "step": 16812500 + }, + { + "epoch": 83.3, + "learning_rate": 8.366170736661663e-06, + "loss": 1.8287, + "step": 16813000 + }, + { + "epoch": 83.3, + "learning_rate": 8.36493215023558e-06, + "loss": 1.8284, + "step": 16813500 + }, + { + "epoch": 83.3, + "learning_rate": 8.363693563809497e-06, + "loss": 1.83, + "step": 16814000 + }, + { + "epoch": 83.3, + "learning_rate": 8.362454977383412e-06, + "loss": 1.8447, + "step": 16814500 + }, + { + "epoch": 83.31, + "learning_rate": 8.361216390957328e-06, + "loss": 1.85, + "step": 16815000 + }, + { + "epoch": 83.31, + "learning_rate": 8.359980281704097e-06, + "loss": 1.8505, + "step": 16815500 + }, + { + "epoch": 83.31, + "learning_rate": 8.358741695278013e-06, + "loss": 1.8298, + "step": 16816000 + }, + { + "epoch": 83.31, + "learning_rate": 8.357505586024782e-06, + "loss": 1.8376, + "step": 16816500 + }, + { + "epoch": 83.32, + "learning_rate": 8.3562669995987e-06, + "loss": 1.8317, + "step": 16817000 + }, + { + "epoch": 83.32, + "learning_rate": 8.355028413172614e-06, + "loss": 1.8385, + "step": 16817500 + }, + { + "epoch": 83.32, + "learning_rate": 8.353789826746531e-06, + "loss": 1.8626, + "step": 16818000 + }, + { + "epoch": 83.32, + "learning_rate": 8.352551240320447e-06, + "loss": 1.8396, + "step": 16818500 + }, + { + "epoch": 83.33, + "learning_rate": 8.351312653894363e-06, + "loss": 1.8575, + "step": 16819000 + }, + { + "epoch": 83.33, + "learning_rate": 8.350076544641132e-06, + "loss": 1.8483, + "step": 16819500 + }, + { + "epoch": 83.33, + "learning_rate": 8.34883795821505e-06, + "loss": 1.8495, + "step": 16820000 + }, + { + "epoch": 83.33, + "learning_rate": 8.347599371788964e-06, + "loss": 1.8478, + "step": 16820500 + }, + { + "epoch": 83.34, + "learning_rate": 8.346360785362881e-06, + "loss": 1.8481, + "step": 16821000 + }, + { + "epoch": 83.34, + "learning_rate": 8.345122198936797e-06, + "loss": 1.8542, + "step": 16821500 + }, + { + "epoch": 83.34, + "learning_rate": 8.343883612510714e-06, + "loss": 1.8544, + "step": 16822000 + }, + { + "epoch": 83.34, + "learning_rate": 8.342647503257482e-06, + "loss": 1.832, + "step": 16822500 + }, + { + "epoch": 83.35, + "learning_rate": 8.3414089168314e-06, + "loss": 1.8451, + "step": 16823000 + }, + { + "epoch": 83.35, + "learning_rate": 8.340170330405316e-06, + "loss": 1.8391, + "step": 16823500 + }, + { + "epoch": 83.35, + "learning_rate": 8.338931743979231e-06, + "loss": 1.8498, + "step": 16824000 + }, + { + "epoch": 83.35, + "learning_rate": 8.337693157553147e-06, + "loss": 1.8509, + "step": 16824500 + }, + { + "epoch": 83.36, + "learning_rate": 8.336454571127064e-06, + "loss": 1.8277, + "step": 16825000 + }, + { + "epoch": 83.36, + "learning_rate": 8.335218461873832e-06, + "loss": 1.8535, + "step": 16825500 + }, + { + "epoch": 83.36, + "learning_rate": 8.333982352620601e-06, + "loss": 1.8545, + "step": 16826000 + }, + { + "epoch": 83.36, + "learning_rate": 8.332743766194518e-06, + "loss": 1.838, + "step": 16826500 + }, + { + "epoch": 83.37, + "learning_rate": 8.331505179768433e-06, + "loss": 1.8317, + "step": 16827000 + }, + { + "epoch": 83.37, + "learning_rate": 8.33026659334235e-06, + "loss": 1.8399, + "step": 16827500 + }, + { + "epoch": 83.37, + "learning_rate": 8.329028006916267e-06, + "loss": 1.8131, + "step": 16828000 + }, + { + "epoch": 83.37, + "learning_rate": 8.327789420490182e-06, + "loss": 1.8819, + "step": 16828500 + }, + { + "epoch": 83.38, + "learning_rate": 8.326553311236953e-06, + "loss": 1.8396, + "step": 16829000 + }, + { + "epoch": 83.38, + "learning_rate": 8.325314724810868e-06, + "loss": 1.8224, + "step": 16829500 + }, + { + "epoch": 83.38, + "learning_rate": 8.324076138384783e-06, + "loss": 1.8528, + "step": 16830000 + }, + { + "epoch": 83.38, + "learning_rate": 8.3228375519587e-06, + "loss": 1.8524, + "step": 16830500 + }, + { + "epoch": 83.39, + "learning_rate": 8.321598965532617e-06, + "loss": 1.8528, + "step": 16831000 + }, + { + "epoch": 83.39, + "learning_rate": 8.320360379106534e-06, + "loss": 1.851, + "step": 16831500 + }, + { + "epoch": 83.39, + "learning_rate": 8.31912179268045e-06, + "loss": 1.8598, + "step": 16832000 + }, + { + "epoch": 83.39, + "learning_rate": 8.317883206254366e-06, + "loss": 1.822, + "step": 16832500 + }, + { + "epoch": 83.4, + "learning_rate": 8.316644619828283e-06, + "loss": 1.8302, + "step": 16833000 + }, + { + "epoch": 83.4, + "learning_rate": 8.3154060334022e-06, + "loss": 1.8316, + "step": 16833500 + }, + { + "epoch": 83.4, + "learning_rate": 8.314169924148967e-06, + "loss": 1.8534, + "step": 16834000 + }, + { + "epoch": 83.4, + "learning_rate": 8.312931337722884e-06, + "loss": 1.838, + "step": 16834500 + }, + { + "epoch": 83.41, + "learning_rate": 8.3116927512968e-06, + "loss": 1.847, + "step": 16835000 + }, + { + "epoch": 83.41, + "learning_rate": 8.310454164870716e-06, + "loss": 1.8337, + "step": 16835500 + }, + { + "epoch": 83.41, + "learning_rate": 8.309215578444633e-06, + "loss": 1.8482, + "step": 16836000 + }, + { + "epoch": 83.41, + "learning_rate": 8.30797699201855e-06, + "loss": 1.8256, + "step": 16836500 + }, + { + "epoch": 83.42, + "learning_rate": 8.306738405592466e-06, + "loss": 1.837, + "step": 16837000 + }, + { + "epoch": 83.42, + "learning_rate": 8.305502296339234e-06, + "loss": 1.8523, + "step": 16837500 + }, + { + "epoch": 83.42, + "learning_rate": 8.30426370991315e-06, + "loss": 1.8745, + "step": 16838000 + }, + { + "epoch": 83.42, + "learning_rate": 8.303025123487067e-06, + "loss": 1.8518, + "step": 16838500 + }, + { + "epoch": 83.43, + "learning_rate": 8.301786537060983e-06, + "loss": 1.8413, + "step": 16839000 + }, + { + "epoch": 83.43, + "learning_rate": 8.3005479506349e-06, + "loss": 1.831, + "step": 16839500 + }, + { + "epoch": 83.43, + "learning_rate": 8.299309364208817e-06, + "loss": 1.8637, + "step": 16840000 + }, + { + "epoch": 83.43, + "learning_rate": 8.298073254955584e-06, + "loss": 1.858, + "step": 16840500 + }, + { + "epoch": 83.44, + "learning_rate": 8.296837145702353e-06, + "loss": 1.8741, + "step": 16841000 + }, + { + "epoch": 83.44, + "learning_rate": 8.29559855927627e-06, + "loss": 1.8378, + "step": 16841500 + }, + { + "epoch": 83.44, + "learning_rate": 8.294359972850185e-06, + "loss": 1.8574, + "step": 16842000 + }, + { + "epoch": 83.44, + "learning_rate": 8.293121386424102e-06, + "loss": 1.8394, + "step": 16842500 + }, + { + "epoch": 83.45, + "learning_rate": 8.29188279999802e-06, + "loss": 1.8349, + "step": 16843000 + }, + { + "epoch": 83.45, + "learning_rate": 8.290644213571934e-06, + "loss": 1.8464, + "step": 16843500 + }, + { + "epoch": 83.45, + "learning_rate": 8.289405627145851e-06, + "loss": 1.8019, + "step": 16844000 + }, + { + "epoch": 83.45, + "learning_rate": 8.288167040719767e-06, + "loss": 1.8351, + "step": 16844500 + }, + { + "epoch": 83.46, + "learning_rate": 8.286928454293684e-06, + "loss": 1.8516, + "step": 16845000 + }, + { + "epoch": 83.46, + "learning_rate": 8.2856898678676e-06, + "loss": 1.8611, + "step": 16845500 + }, + { + "epoch": 83.46, + "learning_rate": 8.28445375861437e-06, + "loss": 1.8407, + "step": 16846000 + }, + { + "epoch": 83.46, + "learning_rate": 8.283215172188286e-06, + "loss": 1.8291, + "step": 16846500 + }, + { + "epoch": 83.47, + "learning_rate": 8.281976585762201e-06, + "loss": 1.8401, + "step": 16847000 + }, + { + "epoch": 83.47, + "learning_rate": 8.28074047650897e-06, + "loss": 1.8547, + "step": 16847500 + }, + { + "epoch": 83.47, + "learning_rate": 8.279501890082887e-06, + "loss": 1.862, + "step": 16848000 + }, + { + "epoch": 83.47, + "learning_rate": 8.278263303656802e-06, + "loss": 1.8676, + "step": 16848500 + }, + { + "epoch": 83.48, + "learning_rate": 8.27702471723072e-06, + "loss": 1.8565, + "step": 16849000 + }, + { + "epoch": 83.48, + "learning_rate": 8.275788607977488e-06, + "loss": 1.8217, + "step": 16849500 + }, + { + "epoch": 83.48, + "learning_rate": 8.274550021551403e-06, + "loss": 1.8426, + "step": 16850000 + }, + { + "epoch": 83.48, + "learning_rate": 8.27331143512532e-06, + "loss": 1.8237, + "step": 16850500 + }, + { + "epoch": 83.49, + "learning_rate": 8.272072848699237e-06, + "loss": 1.8488, + "step": 16851000 + }, + { + "epoch": 83.49, + "learning_rate": 8.270834262273153e-06, + "loss": 1.8382, + "step": 16851500 + }, + { + "epoch": 83.49, + "learning_rate": 8.26959567584707e-06, + "loss": 1.8471, + "step": 16852000 + }, + { + "epoch": 83.49, + "learning_rate": 8.268357089420986e-06, + "loss": 1.8416, + "step": 16852500 + }, + { + "epoch": 83.5, + "learning_rate": 8.267118502994903e-06, + "loss": 1.8504, + "step": 16853000 + }, + { + "epoch": 83.5, + "learning_rate": 8.265879916568819e-06, + "loss": 1.8319, + "step": 16853500 + }, + { + "epoch": 83.5, + "learning_rate": 8.264641330142735e-06, + "loss": 1.826, + "step": 16854000 + }, + { + "epoch": 83.5, + "learning_rate": 8.263405220889503e-06, + "loss": 1.8421, + "step": 16854500 + }, + { + "epoch": 83.51, + "learning_rate": 8.26216663446342e-06, + "loss": 1.8178, + "step": 16855000 + }, + { + "epoch": 83.51, + "learning_rate": 8.260928048037336e-06, + "loss": 1.853, + "step": 16855500 + }, + { + "epoch": 83.51, + "learning_rate": 8.259689461611253e-06, + "loss": 1.852, + "step": 16856000 + }, + { + "epoch": 83.51, + "learning_rate": 8.25845087518517e-06, + "loss": 1.8472, + "step": 16856500 + }, + { + "epoch": 83.52, + "learning_rate": 8.257214765931937e-06, + "loss": 1.8332, + "step": 16857000 + }, + { + "epoch": 83.52, + "learning_rate": 8.255976179505854e-06, + "loss": 1.869, + "step": 16857500 + }, + { + "epoch": 83.52, + "learning_rate": 8.25473759307977e-06, + "loss": 1.8459, + "step": 16858000 + }, + { + "epoch": 83.52, + "learning_rate": 8.253499006653686e-06, + "loss": 1.827, + "step": 16858500 + }, + { + "epoch": 83.53, + "learning_rate": 8.252260420227603e-06, + "loss": 1.8254, + "step": 16859000 + }, + { + "epoch": 83.53, + "learning_rate": 8.25102183380152e-06, + "loss": 1.8504, + "step": 16859500 + }, + { + "epoch": 83.53, + "learning_rate": 8.249783247375436e-06, + "loss": 1.8466, + "step": 16860000 + }, + { + "epoch": 83.53, + "learning_rate": 8.248544660949353e-06, + "loss": 1.8184, + "step": 16860500 + }, + { + "epoch": 83.54, + "learning_rate": 8.247306074523268e-06, + "loss": 1.8469, + "step": 16861000 + }, + { + "epoch": 83.54, + "learning_rate": 8.246067488097185e-06, + "loss": 1.8305, + "step": 16861500 + }, + { + "epoch": 83.54, + "learning_rate": 8.244831378843953e-06, + "loss": 1.8439, + "step": 16862000 + }, + { + "epoch": 83.54, + "learning_rate": 8.24359279241787e-06, + "loss": 1.8378, + "step": 16862500 + }, + { + "epoch": 83.55, + "learning_rate": 8.242354205991786e-06, + "loss": 1.8458, + "step": 16863000 + }, + { + "epoch": 83.55, + "learning_rate": 8.241118096738554e-06, + "loss": 1.8558, + "step": 16863500 + }, + { + "epoch": 83.55, + "learning_rate": 8.239884464658175e-06, + "loss": 1.8557, + "step": 16864000 + }, + { + "epoch": 83.55, + "learning_rate": 8.238645878232092e-06, + "loss": 1.8254, + "step": 16864500 + }, + { + "epoch": 83.56, + "learning_rate": 8.237407291806009e-06, + "loss": 1.8321, + "step": 16865000 + }, + { + "epoch": 83.56, + "learning_rate": 8.236168705379924e-06, + "loss": 1.8203, + "step": 16865500 + }, + { + "epoch": 83.56, + "learning_rate": 8.23493011895384e-06, + "loss": 1.8411, + "step": 16866000 + }, + { + "epoch": 83.56, + "learning_rate": 8.233691532527756e-06, + "loss": 1.8454, + "step": 16866500 + }, + { + "epoch": 83.56, + "learning_rate": 8.232452946101673e-06, + "loss": 1.8568, + "step": 16867000 + }, + { + "epoch": 83.57, + "learning_rate": 8.23121435967559e-06, + "loss": 1.8645, + "step": 16867500 + }, + { + "epoch": 83.57, + "learning_rate": 8.229975773249505e-06, + "loss": 1.8206, + "step": 16868000 + }, + { + "epoch": 83.57, + "learning_rate": 8.228737186823422e-06, + "loss": 1.8414, + "step": 16868500 + }, + { + "epoch": 83.57, + "learning_rate": 8.22749860039734e-06, + "loss": 1.8377, + "step": 16869000 + }, + { + "epoch": 83.58, + "learning_rate": 8.226260013971256e-06, + "loss": 1.8607, + "step": 16869500 + }, + { + "epoch": 83.58, + "learning_rate": 8.225021427545172e-06, + "loss": 1.8547, + "step": 16870000 + }, + { + "epoch": 83.58, + "learning_rate": 8.22378531829194e-06, + "loss": 1.8576, + "step": 16870500 + }, + { + "epoch": 83.58, + "learning_rate": 8.222546731865857e-06, + "loss": 1.8566, + "step": 16871000 + }, + { + "epoch": 83.59, + "learning_rate": 8.221310622612626e-06, + "loss": 1.8361, + "step": 16871500 + }, + { + "epoch": 83.59, + "learning_rate": 8.220072036186541e-06, + "loss": 1.8539, + "step": 16872000 + }, + { + "epoch": 83.59, + "learning_rate": 8.218833449760458e-06, + "loss": 1.8637, + "step": 16872500 + }, + { + "epoch": 83.59, + "learning_rate": 8.217594863334373e-06, + "loss": 1.8486, + "step": 16873000 + }, + { + "epoch": 83.6, + "learning_rate": 8.21635627690829e-06, + "loss": 1.867, + "step": 16873500 + }, + { + "epoch": 83.6, + "learning_rate": 8.21512016765506e-06, + "loss": 1.8524, + "step": 16874000 + }, + { + "epoch": 83.6, + "learning_rate": 8.213881581228976e-06, + "loss": 1.8424, + "step": 16874500 + }, + { + "epoch": 83.6, + "learning_rate": 8.212642994802893e-06, + "loss": 1.8401, + "step": 16875000 + }, + { + "epoch": 83.61, + "learning_rate": 8.211404408376808e-06, + "loss": 1.829, + "step": 16875500 + }, + { + "epoch": 83.61, + "learning_rate": 8.210165821950725e-06, + "loss": 1.8472, + "step": 16876000 + }, + { + "epoch": 83.61, + "learning_rate": 8.20892723552464e-06, + "loss": 1.856, + "step": 16876500 + }, + { + "epoch": 83.61, + "learning_rate": 8.207688649098557e-06, + "loss": 1.8502, + "step": 16877000 + }, + { + "epoch": 83.62, + "learning_rate": 8.206450062672473e-06, + "loss": 1.8319, + "step": 16877500 + }, + { + "epoch": 83.62, + "learning_rate": 8.20521147624639e-06, + "loss": 1.8534, + "step": 16878000 + }, + { + "epoch": 83.62, + "learning_rate": 8.203972889820306e-06, + "loss": 1.8465, + "step": 16878500 + }, + { + "epoch": 83.62, + "learning_rate": 8.202734303394223e-06, + "loss": 1.83, + "step": 16879000 + }, + { + "epoch": 83.63, + "learning_rate": 8.20149819414099e-06, + "loss": 1.856, + "step": 16879500 + }, + { + "epoch": 83.63, + "learning_rate": 8.200259607714907e-06, + "loss": 1.8419, + "step": 16880000 + }, + { + "epoch": 83.63, + "learning_rate": 8.199021021288823e-06, + "loss": 1.849, + "step": 16880500 + }, + { + "epoch": 83.63, + "learning_rate": 8.19778243486274e-06, + "loss": 1.8435, + "step": 16881000 + }, + { + "epoch": 83.64, + "learning_rate": 8.196543848436657e-06, + "loss": 1.8433, + "step": 16881500 + }, + { + "epoch": 83.64, + "learning_rate": 8.195305262010573e-06, + "loss": 1.8504, + "step": 16882000 + }, + { + "epoch": 83.64, + "learning_rate": 8.19406667558449e-06, + "loss": 1.8433, + "step": 16882500 + }, + { + "epoch": 83.64, + "learning_rate": 8.192830566331257e-06, + "loss": 1.8472, + "step": 16883000 + }, + { + "epoch": 83.65, + "learning_rate": 8.191591979905174e-06, + "loss": 1.8073, + "step": 16883500 + }, + { + "epoch": 83.65, + "learning_rate": 8.19035339347909e-06, + "loss": 1.8609, + "step": 16884000 + }, + { + "epoch": 83.65, + "learning_rate": 8.189114807053007e-06, + "loss": 1.8588, + "step": 16884500 + }, + { + "epoch": 83.65, + "learning_rate": 8.187876220626924e-06, + "loss": 1.8786, + "step": 16885000 + }, + { + "epoch": 83.66, + "learning_rate": 8.186642588546544e-06, + "loss": 1.8361, + "step": 16885500 + }, + { + "epoch": 83.66, + "learning_rate": 8.18540400212046e-06, + "loss": 1.8433, + "step": 16886000 + }, + { + "epoch": 83.66, + "learning_rate": 8.184165415694376e-06, + "loss": 1.8312, + "step": 16886500 + }, + { + "epoch": 83.66, + "learning_rate": 8.182926829268293e-06, + "loss": 1.8364, + "step": 16887000 + }, + { + "epoch": 83.67, + "learning_rate": 8.18168824284221e-06, + "loss": 1.8341, + "step": 16887500 + }, + { + "epoch": 83.67, + "learning_rate": 8.180449656416125e-06, + "loss": 1.8374, + "step": 16888000 + }, + { + "epoch": 83.67, + "learning_rate": 8.179211069990042e-06, + "loss": 1.8544, + "step": 16888500 + }, + { + "epoch": 83.67, + "learning_rate": 8.17797248356396e-06, + "loss": 1.8527, + "step": 16889000 + }, + { + "epoch": 83.68, + "learning_rate": 8.176733897137876e-06, + "loss": 1.8377, + "step": 16889500 + }, + { + "epoch": 83.68, + "learning_rate": 8.175497787884643e-06, + "loss": 1.8473, + "step": 16890000 + }, + { + "epoch": 83.68, + "learning_rate": 8.17425920145856e-06, + "loss": 1.8462, + "step": 16890500 + }, + { + "epoch": 83.68, + "learning_rate": 8.173020615032476e-06, + "loss": 1.8251, + "step": 16891000 + }, + { + "epoch": 83.69, + "learning_rate": 8.171782028606392e-06, + "loss": 1.8398, + "step": 16891500 + }, + { + "epoch": 83.69, + "learning_rate": 8.17054344218031e-06, + "loss": 1.8418, + "step": 16892000 + }, + { + "epoch": 83.69, + "learning_rate": 8.169307332927076e-06, + "loss": 1.8455, + "step": 16892500 + }, + { + "epoch": 83.69, + "learning_rate": 8.168068746500993e-06, + "loss": 1.851, + "step": 16893000 + }, + { + "epoch": 83.7, + "learning_rate": 8.16683016007491e-06, + "loss": 1.8684, + "step": 16893500 + }, + { + "epoch": 83.7, + "learning_rate": 8.165591573648826e-06, + "loss": 1.8381, + "step": 16894000 + }, + { + "epoch": 83.7, + "learning_rate": 8.164352987222743e-06, + "loss": 1.8518, + "step": 16894500 + }, + { + "epoch": 83.7, + "learning_rate": 8.16311440079666e-06, + "loss": 1.8514, + "step": 16895000 + }, + { + "epoch": 83.71, + "learning_rate": 8.161875814370576e-06, + "loss": 1.8348, + "step": 16895500 + }, + { + "epoch": 83.71, + "learning_rate": 8.160637227944493e-06, + "loss": 1.8334, + "step": 16896000 + }, + { + "epoch": 83.71, + "learning_rate": 8.159398641518409e-06, + "loss": 1.8742, + "step": 16896500 + }, + { + "epoch": 83.71, + "learning_rate": 8.158162532265176e-06, + "loss": 1.8473, + "step": 16897000 + }, + { + "epoch": 83.72, + "learning_rate": 8.156926423011946e-06, + "loss": 1.852, + "step": 16897500 + }, + { + "epoch": 83.72, + "learning_rate": 8.155687836585861e-06, + "loss": 1.86, + "step": 16898000 + }, + { + "epoch": 83.72, + "learning_rate": 8.154449250159778e-06, + "loss": 1.8337, + "step": 16898500 + }, + { + "epoch": 83.72, + "learning_rate": 8.153213140906545e-06, + "loss": 1.8493, + "step": 16899000 + }, + { + "epoch": 83.73, + "learning_rate": 8.151974554480462e-06, + "loss": 1.8585, + "step": 16899500 + }, + { + "epoch": 83.73, + "learning_rate": 8.15073596805438e-06, + "loss": 1.8479, + "step": 16900000 + }, + { + "epoch": 83.73, + "learning_rate": 8.149497381628296e-06, + "loss": 1.8502, + "step": 16900500 + }, + { + "epoch": 83.73, + "learning_rate": 8.148258795202213e-06, + "loss": 1.8234, + "step": 16901000 + }, + { + "epoch": 83.74, + "learning_rate": 8.147020208776128e-06, + "loss": 1.8306, + "step": 16901500 + }, + { + "epoch": 83.74, + "learning_rate": 8.145781622350045e-06, + "loss": 1.8683, + "step": 16902000 + }, + { + "epoch": 83.74, + "learning_rate": 8.14454303592396e-06, + "loss": 1.8479, + "step": 16902500 + }, + { + "epoch": 83.74, + "learning_rate": 8.143304449497877e-06, + "loss": 1.8506, + "step": 16903000 + }, + { + "epoch": 83.75, + "learning_rate": 8.142065863071793e-06, + "loss": 1.8552, + "step": 16903500 + }, + { + "epoch": 83.75, + "learning_rate": 8.14082727664571e-06, + "loss": 1.8356, + "step": 16904000 + }, + { + "epoch": 83.75, + "learning_rate": 8.139588690219627e-06, + "loss": 1.8356, + "step": 16904500 + }, + { + "epoch": 83.75, + "learning_rate": 8.138350103793543e-06, + "loss": 1.8653, + "step": 16905000 + }, + { + "epoch": 83.76, + "learning_rate": 8.137111517367459e-06, + "loss": 1.8383, + "step": 16905500 + }, + { + "epoch": 83.76, + "learning_rate": 8.135872930941376e-06, + "loss": 1.84, + "step": 16906000 + }, + { + "epoch": 83.76, + "learning_rate": 8.134634344515293e-06, + "loss": 1.8584, + "step": 16906500 + }, + { + "epoch": 83.76, + "learning_rate": 8.13339823526206e-06, + "loss": 1.8601, + "step": 16907000 + }, + { + "epoch": 83.77, + "learning_rate": 8.132162126008828e-06, + "loss": 1.8467, + "step": 16907500 + }, + { + "epoch": 83.77, + "learning_rate": 8.130926016755597e-06, + "loss": 1.8695, + "step": 16908000 + }, + { + "epoch": 83.77, + "learning_rate": 8.129687430329514e-06, + "loss": 1.8648, + "step": 16908500 + }, + { + "epoch": 83.77, + "learning_rate": 8.12844884390343e-06, + "loss": 1.8657, + "step": 16909000 + }, + { + "epoch": 83.78, + "learning_rate": 8.127210257477346e-06, + "loss": 1.8561, + "step": 16909500 + }, + { + "epoch": 83.78, + "learning_rate": 8.125971671051263e-06, + "loss": 1.8567, + "step": 16910000 + }, + { + "epoch": 83.78, + "learning_rate": 8.124733084625179e-06, + "loss": 1.8307, + "step": 16910500 + }, + { + "epoch": 83.78, + "learning_rate": 8.123501929717653e-06, + "loss": 1.8383, + "step": 16911000 + }, + { + "epoch": 83.79, + "learning_rate": 8.122263343291568e-06, + "loss": 1.854, + "step": 16911500 + }, + { + "epoch": 83.79, + "learning_rate": 8.121024756865485e-06, + "loss": 1.8451, + "step": 16912000 + }, + { + "epoch": 83.79, + "learning_rate": 8.119786170439402e-06, + "loss": 1.8435, + "step": 16912500 + }, + { + "epoch": 83.79, + "learning_rate": 8.118547584013319e-06, + "loss": 1.8408, + "step": 16913000 + }, + { + "epoch": 83.8, + "learning_rate": 8.117308997587234e-06, + "loss": 1.8564, + "step": 16913500 + }, + { + "epoch": 83.8, + "learning_rate": 8.116070411161151e-06, + "loss": 1.8348, + "step": 16914000 + }, + { + "epoch": 83.8, + "learning_rate": 8.114831824735066e-06, + "loss": 1.8354, + "step": 16914500 + }, + { + "epoch": 83.8, + "learning_rate": 8.113593238308983e-06, + "loss": 1.8702, + "step": 16915000 + }, + { + "epoch": 83.81, + "learning_rate": 8.112354651882898e-06, + "loss": 1.8578, + "step": 16915500 + }, + { + "epoch": 83.81, + "learning_rate": 8.111116065456815e-06, + "loss": 1.8633, + "step": 16916000 + }, + { + "epoch": 83.81, + "learning_rate": 8.109877479030732e-06, + "loss": 1.8547, + "step": 16916500 + }, + { + "epoch": 83.81, + "learning_rate": 8.10863889260465e-06, + "loss": 1.8517, + "step": 16917000 + }, + { + "epoch": 83.82, + "learning_rate": 8.107400306178566e-06, + "loss": 1.8376, + "step": 16917500 + }, + { + "epoch": 83.82, + "learning_rate": 8.106161719752481e-06, + "loss": 1.8346, + "step": 16918000 + }, + { + "epoch": 83.82, + "learning_rate": 8.104923133326398e-06, + "loss": 1.8532, + "step": 16918500 + }, + { + "epoch": 83.82, + "learning_rate": 8.103684546900314e-06, + "loss": 1.8254, + "step": 16919000 + }, + { + "epoch": 83.83, + "learning_rate": 8.10244596047423e-06, + "loss": 1.8793, + "step": 16919500 + }, + { + "epoch": 83.83, + "learning_rate": 8.101209851221e-06, + "loss": 1.8455, + "step": 16920000 + }, + { + "epoch": 83.83, + "learning_rate": 8.099971264794916e-06, + "loss": 1.8434, + "step": 16920500 + }, + { + "epoch": 83.83, + "learning_rate": 8.098732678368831e-06, + "loss": 1.8476, + "step": 16921000 + }, + { + "epoch": 83.83, + "learning_rate": 8.0974965691156e-06, + "loss": 1.8527, + "step": 16921500 + }, + { + "epoch": 83.84, + "learning_rate": 8.096257982689515e-06, + "loss": 1.8433, + "step": 16922000 + }, + { + "epoch": 83.84, + "learning_rate": 8.095019396263432e-06, + "loss": 1.8571, + "step": 16922500 + }, + { + "epoch": 83.84, + "learning_rate": 8.093788241355905e-06, + "loss": 1.8259, + "step": 16923000 + }, + { + "epoch": 83.84, + "learning_rate": 8.092549654929822e-06, + "loss": 1.8381, + "step": 16923500 + }, + { + "epoch": 83.85, + "learning_rate": 8.091311068503739e-06, + "loss": 1.8427, + "step": 16924000 + }, + { + "epoch": 83.85, + "learning_rate": 8.090072482077654e-06, + "loss": 1.836, + "step": 16924500 + }, + { + "epoch": 83.85, + "learning_rate": 8.088833895651571e-06, + "loss": 1.8502, + "step": 16925000 + }, + { + "epoch": 83.85, + "learning_rate": 8.087595309225488e-06, + "loss": 1.8739, + "step": 16925500 + }, + { + "epoch": 83.86, + "learning_rate": 8.086356722799405e-06, + "loss": 1.8357, + "step": 16926000 + }, + { + "epoch": 83.86, + "learning_rate": 8.08511813637332e-06, + "loss": 1.8489, + "step": 16926500 + }, + { + "epoch": 83.86, + "learning_rate": 8.083882027120089e-06, + "loss": 1.8469, + "step": 16927000 + }, + { + "epoch": 83.86, + "learning_rate": 8.082643440694006e-06, + "loss": 1.8407, + "step": 16927500 + }, + { + "epoch": 83.87, + "learning_rate": 8.081404854267921e-06, + "loss": 1.8313, + "step": 16928000 + }, + { + "epoch": 83.87, + "learning_rate": 8.080166267841838e-06, + "loss": 1.8471, + "step": 16928500 + }, + { + "epoch": 83.87, + "learning_rate": 8.078927681415755e-06, + "loss": 1.8345, + "step": 16929000 + }, + { + "epoch": 83.87, + "learning_rate": 8.077689094989672e-06, + "loss": 1.8277, + "step": 16929500 + }, + { + "epoch": 83.88, + "learning_rate": 8.076450508563587e-06, + "loss": 1.8468, + "step": 16930000 + }, + { + "epoch": 83.88, + "learning_rate": 8.075211922137502e-06, + "loss": 1.8429, + "step": 16930500 + }, + { + "epoch": 83.88, + "learning_rate": 8.07397333571142e-06, + "loss": 1.8404, + "step": 16931000 + }, + { + "epoch": 83.88, + "learning_rate": 8.072734749285336e-06, + "loss": 1.808, + "step": 16931500 + }, + { + "epoch": 83.89, + "learning_rate": 8.071496162859251e-06, + "loss": 1.8741, + "step": 16932000 + }, + { + "epoch": 83.89, + "learning_rate": 8.070257576433168e-06, + "loss": 1.8432, + "step": 16932500 + }, + { + "epoch": 83.89, + "learning_rate": 8.069018990007085e-06, + "loss": 1.8767, + "step": 16933000 + }, + { + "epoch": 83.89, + "learning_rate": 8.067780403581002e-06, + "loss": 1.8349, + "step": 16933500 + }, + { + "epoch": 83.9, + "learning_rate": 8.066541817154919e-06, + "loss": 1.8528, + "step": 16934000 + }, + { + "epoch": 83.9, + "learning_rate": 8.065303230728834e-06, + "loss": 1.8572, + "step": 16934500 + }, + { + "epoch": 83.9, + "learning_rate": 8.064067121475603e-06, + "loss": 1.8533, + "step": 16935000 + }, + { + "epoch": 83.9, + "learning_rate": 8.062828535049518e-06, + "loss": 1.826, + "step": 16935500 + }, + { + "epoch": 83.91, + "learning_rate": 8.061589948623435e-06, + "loss": 1.8313, + "step": 16936000 + }, + { + "epoch": 83.91, + "learning_rate": 8.060351362197352e-06, + "loss": 1.8327, + "step": 16936500 + }, + { + "epoch": 83.91, + "learning_rate": 8.05911277577127e-06, + "loss": 1.8474, + "step": 16937000 + }, + { + "epoch": 83.91, + "learning_rate": 8.057876666518036e-06, + "loss": 1.8312, + "step": 16937500 + }, + { + "epoch": 83.92, + "learning_rate": 8.056640557264805e-06, + "loss": 1.8432, + "step": 16938000 + }, + { + "epoch": 83.92, + "learning_rate": 8.055401970838722e-06, + "loss": 1.8431, + "step": 16938500 + }, + { + "epoch": 83.92, + "learning_rate": 8.054163384412639e-06, + "loss": 1.8459, + "step": 16939000 + }, + { + "epoch": 83.92, + "learning_rate": 8.052927275159406e-06, + "loss": 1.8402, + "step": 16939500 + }, + { + "epoch": 83.93, + "learning_rate": 8.051688688733323e-06, + "loss": 1.8666, + "step": 16940000 + }, + { + "epoch": 83.93, + "learning_rate": 8.050450102307238e-06, + "loss": 1.8427, + "step": 16940500 + }, + { + "epoch": 83.93, + "learning_rate": 8.049211515881155e-06, + "loss": 1.8508, + "step": 16941000 + }, + { + "epoch": 83.93, + "learning_rate": 8.047972929455072e-06, + "loss": 1.8265, + "step": 16941500 + }, + { + "epoch": 83.94, + "learning_rate": 8.046734343028989e-06, + "loss": 1.8547, + "step": 16942000 + }, + { + "epoch": 83.94, + "learning_rate": 8.045495756602904e-06, + "loss": 1.849, + "step": 16942500 + }, + { + "epoch": 83.94, + "learning_rate": 8.044257170176821e-06, + "loss": 1.8449, + "step": 16943000 + }, + { + "epoch": 83.94, + "learning_rate": 8.043021060923588e-06, + "loss": 1.8706, + "step": 16943500 + }, + { + "epoch": 83.95, + "learning_rate": 8.041782474497505e-06, + "loss": 1.8436, + "step": 16944000 + }, + { + "epoch": 83.95, + "learning_rate": 8.040543888071422e-06, + "loss": 1.8417, + "step": 16944500 + }, + { + "epoch": 83.95, + "learning_rate": 8.039305301645339e-06, + "loss": 1.8551, + "step": 16945000 + }, + { + "epoch": 83.95, + "learning_rate": 8.038066715219254e-06, + "loss": 1.8351, + "step": 16945500 + }, + { + "epoch": 83.96, + "learning_rate": 8.036828128793171e-06, + "loss": 1.8591, + "step": 16946000 + }, + { + "epoch": 83.96, + "learning_rate": 8.035589542367088e-06, + "loss": 1.8235, + "step": 16946500 + }, + { + "epoch": 83.96, + "learning_rate": 8.034350955941005e-06, + "loss": 1.8494, + "step": 16947000 + }, + { + "epoch": 83.96, + "learning_rate": 8.03311236951492e-06, + "loss": 1.8261, + "step": 16947500 + }, + { + "epoch": 83.97, + "learning_rate": 8.031873783088836e-06, + "loss": 1.8361, + "step": 16948000 + }, + { + "epoch": 83.97, + "learning_rate": 8.030637673835606e-06, + "loss": 1.8627, + "step": 16948500 + }, + { + "epoch": 83.97, + "learning_rate": 8.029399087409521e-06, + "loss": 1.8427, + "step": 16949000 + }, + { + "epoch": 83.97, + "learning_rate": 8.028160500983438e-06, + "loss": 1.8329, + "step": 16949500 + }, + { + "epoch": 83.98, + "learning_rate": 8.026921914557355e-06, + "loss": 1.8539, + "step": 16950000 + }, + { + "epoch": 83.98, + "learning_rate": 8.025685805304122e-06, + "loss": 1.846, + "step": 16950500 + }, + { + "epoch": 83.98, + "learning_rate": 8.02444721887804e-06, + "loss": 1.8302, + "step": 16951000 + }, + { + "epoch": 83.98, + "learning_rate": 8.023211109624808e-06, + "loss": 1.8428, + "step": 16951500 + }, + { + "epoch": 83.99, + "learning_rate": 8.021972523198725e-06, + "loss": 1.8301, + "step": 16952000 + }, + { + "epoch": 83.99, + "learning_rate": 8.020733936772642e-06, + "loss": 1.8319, + "step": 16952500 + }, + { + "epoch": 83.99, + "learning_rate": 8.019497827519409e-06, + "loss": 1.8183, + "step": 16953000 + }, + { + "epoch": 83.99, + "learning_rate": 8.018259241093326e-06, + "loss": 1.8426, + "step": 16953500 + }, + { + "epoch": 84.0, + "learning_rate": 8.017020654667241e-06, + "loss": 1.8615, + "step": 16954000 + }, + { + "epoch": 84.0, + "learning_rate": 8.015784545414012e-06, + "loss": 1.8561, + "step": 16954500 + }, + { + "epoch": 84.0, + "eval_accuracy": 0.6832028692490457, + "eval_accuracy_mlm": 0.6442947493110244, + "eval_accuracy_nsp": 0.866656991908503, + "eval_loss": 2.302316665649414, + "eval_runtime": 147.5928, + "eval_samples_per_second": 1727.449, + "eval_steps_per_second": 71.982, + "step": 16954812 + }, + { + "epoch": 84.0, + "learning_rate": 8.014545958987927e-06, + "loss": 1.8462, + "step": 16955000 + }, + { + "epoch": 84.0, + "learning_rate": 8.013307372561842e-06, + "loss": 1.8221, + "step": 16955500 + }, + { + "epoch": 84.01, + "learning_rate": 8.012068786135759e-06, + "loss": 1.8519, + "step": 16956000 + }, + { + "epoch": 84.01, + "learning_rate": 8.010830199709676e-06, + "loss": 1.8213, + "step": 16956500 + }, + { + "epoch": 84.01, + "learning_rate": 8.009591613283591e-06, + "loss": 1.8156, + "step": 16957000 + }, + { + "epoch": 84.01, + "learning_rate": 8.008353026857508e-06, + "loss": 1.8342, + "step": 16957500 + }, + { + "epoch": 84.02, + "learning_rate": 8.007114440431425e-06, + "loss": 1.8267, + "step": 16958000 + }, + { + "epoch": 84.02, + "learning_rate": 8.005875854005342e-06, + "loss": 1.8188, + "step": 16958500 + }, + { + "epoch": 84.02, + "learning_rate": 8.004637267579257e-06, + "loss": 1.8403, + "step": 16959000 + }, + { + "epoch": 84.02, + "learning_rate": 8.003398681153174e-06, + "loss": 1.8336, + "step": 16959500 + }, + { + "epoch": 84.03, + "learning_rate": 8.002160094727091e-06, + "loss": 1.833, + "step": 16960000 + }, + { + "epoch": 84.03, + "learning_rate": 8.000921508301006e-06, + "loss": 1.8152, + "step": 16960500 + }, + { + "epoch": 84.03, + "learning_rate": 7.999682921874923e-06, + "loss": 1.8553, + "step": 16961000 + }, + { + "epoch": 84.03, + "learning_rate": 7.998444335448838e-06, + "loss": 1.8138, + "step": 16961500 + }, + { + "epoch": 84.04, + "learning_rate": 7.997205749022755e-06, + "loss": 1.8316, + "step": 16962000 + }, + { + "epoch": 84.04, + "learning_rate": 7.995967162596672e-06, + "loss": 1.8521, + "step": 16962500 + }, + { + "epoch": 84.04, + "learning_rate": 7.99472857617059e-06, + "loss": 1.8573, + "step": 16963000 + }, + { + "epoch": 84.04, + "learning_rate": 7.993489989744504e-06, + "loss": 1.8387, + "step": 16963500 + }, + { + "epoch": 84.05, + "learning_rate": 7.992251403318421e-06, + "loss": 1.8343, + "step": 16964000 + }, + { + "epoch": 84.05, + "learning_rate": 7.991012816892338e-06, + "loss": 1.8275, + "step": 16964500 + }, + { + "epoch": 84.05, + "learning_rate": 7.989776707639105e-06, + "loss": 1.8634, + "step": 16965000 + }, + { + "epoch": 84.05, + "learning_rate": 7.988543075558728e-06, + "loss": 1.8361, + "step": 16965500 + }, + { + "epoch": 84.06, + "learning_rate": 7.987304489132643e-06, + "loss": 1.8258, + "step": 16966000 + }, + { + "epoch": 84.06, + "learning_rate": 7.986065902706558e-06, + "loss": 1.8084, + "step": 16966500 + }, + { + "epoch": 84.06, + "learning_rate": 7.984827316280475e-06, + "loss": 1.8412, + "step": 16967000 + }, + { + "epoch": 84.06, + "learning_rate": 7.983588729854392e-06, + "loss": 1.8305, + "step": 16967500 + }, + { + "epoch": 84.07, + "learning_rate": 7.982350143428309e-06, + "loss": 1.8343, + "step": 16968000 + }, + { + "epoch": 84.07, + "learning_rate": 7.981111557002224e-06, + "loss": 1.8553, + "step": 16968500 + }, + { + "epoch": 84.07, + "learning_rate": 7.979872970576141e-06, + "loss": 1.8563, + "step": 16969000 + }, + { + "epoch": 84.07, + "learning_rate": 7.978634384150058e-06, + "loss": 1.8452, + "step": 16969500 + }, + { + "epoch": 84.08, + "learning_rate": 7.977395797723975e-06, + "loss": 1.848, + "step": 16970000 + }, + { + "epoch": 84.08, + "learning_rate": 7.97615721129789e-06, + "loss": 1.8385, + "step": 16970500 + }, + { + "epoch": 84.08, + "learning_rate": 7.974918624871806e-06, + "loss": 1.8237, + "step": 16971000 + }, + { + "epoch": 84.08, + "learning_rate": 7.973680038445723e-06, + "loss": 1.8452, + "step": 16971500 + }, + { + "epoch": 84.09, + "learning_rate": 7.972443929192491e-06, + "loss": 1.8191, + "step": 16972000 + }, + { + "epoch": 84.09, + "learning_rate": 7.971205342766408e-06, + "loss": 1.8295, + "step": 16972500 + }, + { + "epoch": 84.09, + "learning_rate": 7.969966756340325e-06, + "loss": 1.8397, + "step": 16973000 + }, + { + "epoch": 84.09, + "learning_rate": 7.968730647087092e-06, + "loss": 1.844, + "step": 16973500 + }, + { + "epoch": 84.1, + "learning_rate": 7.96749206066101e-06, + "loss": 1.8406, + "step": 16974000 + }, + { + "epoch": 84.1, + "learning_rate": 7.966253474234926e-06, + "loss": 1.8337, + "step": 16974500 + }, + { + "epoch": 84.1, + "learning_rate": 7.965014887808841e-06, + "loss": 1.8407, + "step": 16975000 + }, + { + "epoch": 84.1, + "learning_rate": 7.96377877855561e-06, + "loss": 1.8369, + "step": 16975500 + }, + { + "epoch": 84.1, + "learning_rate": 7.962540192129527e-06, + "loss": 1.8464, + "step": 16976000 + }, + { + "epoch": 84.11, + "learning_rate": 7.961301605703442e-06, + "loss": 1.8368, + "step": 16976500 + }, + { + "epoch": 84.11, + "learning_rate": 7.96006301927736e-06, + "loss": 1.8383, + "step": 16977000 + }, + { + "epoch": 84.11, + "learning_rate": 7.958824432851276e-06, + "loss": 1.8046, + "step": 16977500 + }, + { + "epoch": 84.11, + "learning_rate": 7.957588323598045e-06, + "loss": 1.8281, + "step": 16978000 + }, + { + "epoch": 84.12, + "learning_rate": 7.956349737171962e-06, + "loss": 1.8391, + "step": 16978500 + }, + { + "epoch": 84.12, + "learning_rate": 7.955111150745877e-06, + "loss": 1.8242, + "step": 16979000 + }, + { + "epoch": 84.12, + "learning_rate": 7.953872564319794e-06, + "loss": 1.8437, + "step": 16979500 + }, + { + "epoch": 84.12, + "learning_rate": 7.95263397789371e-06, + "loss": 1.854, + "step": 16980000 + }, + { + "epoch": 84.13, + "learning_rate": 7.951395391467626e-06, + "loss": 1.8382, + "step": 16980500 + }, + { + "epoch": 84.13, + "learning_rate": 7.950156805041541e-06, + "loss": 1.8546, + "step": 16981000 + }, + { + "epoch": 84.13, + "learning_rate": 7.948918218615458e-06, + "loss": 1.8489, + "step": 16981500 + }, + { + "epoch": 84.13, + "learning_rate": 7.947679632189375e-06, + "loss": 1.8371, + "step": 16982000 + }, + { + "epoch": 84.14, + "learning_rate": 7.946441045763292e-06, + "loss": 1.856, + "step": 16982500 + }, + { + "epoch": 84.14, + "learning_rate": 7.945202459337208e-06, + "loss": 1.8513, + "step": 16983000 + }, + { + "epoch": 84.14, + "learning_rate": 7.943963872911124e-06, + "loss": 1.8511, + "step": 16983500 + }, + { + "epoch": 84.14, + "learning_rate": 7.942727763657892e-06, + "loss": 1.8295, + "step": 16984000 + }, + { + "epoch": 84.15, + "learning_rate": 7.941489177231808e-06, + "loss": 1.8399, + "step": 16984500 + }, + { + "epoch": 84.15, + "learning_rate": 7.940250590805725e-06, + "loss": 1.8302, + "step": 16985000 + }, + { + "epoch": 84.15, + "learning_rate": 7.939012004379642e-06, + "loss": 1.8155, + "step": 16985500 + }, + { + "epoch": 84.15, + "learning_rate": 7.93777341795356e-06, + "loss": 1.8365, + "step": 16986000 + }, + { + "epoch": 84.16, + "learning_rate": 7.936534831527475e-06, + "loss": 1.8189, + "step": 16986500 + }, + { + "epoch": 84.16, + "learning_rate": 7.935296245101391e-06, + "loss": 1.8391, + "step": 16987000 + }, + { + "epoch": 84.16, + "learning_rate": 7.934060135848159e-06, + "loss": 1.8209, + "step": 16987500 + }, + { + "epoch": 84.16, + "learning_rate": 7.932821549422075e-06, + "loss": 1.8231, + "step": 16988000 + }, + { + "epoch": 84.17, + "learning_rate": 7.931582962995992e-06, + "loss": 1.8484, + "step": 16988500 + }, + { + "epoch": 84.17, + "learning_rate": 7.93034437656991e-06, + "loss": 1.8321, + "step": 16989000 + }, + { + "epoch": 84.17, + "learning_rate": 7.929108267316678e-06, + "loss": 1.8361, + "step": 16989500 + }, + { + "epoch": 84.17, + "learning_rate": 7.927869680890593e-06, + "loss": 1.8285, + "step": 16990000 + }, + { + "epoch": 84.18, + "learning_rate": 7.926633571637362e-06, + "loss": 1.8154, + "step": 16990500 + }, + { + "epoch": 84.18, + "learning_rate": 7.925394985211279e-06, + "loss": 1.8295, + "step": 16991000 + }, + { + "epoch": 84.18, + "learning_rate": 7.924156398785194e-06, + "loss": 1.8502, + "step": 16991500 + }, + { + "epoch": 84.18, + "learning_rate": 7.922917812359111e-06, + "loss": 1.8271, + "step": 16992000 + }, + { + "epoch": 84.19, + "learning_rate": 7.921679225933028e-06, + "loss": 1.844, + "step": 16992500 + }, + { + "epoch": 84.19, + "learning_rate": 7.920440639506945e-06, + "loss": 1.8533, + "step": 16993000 + }, + { + "epoch": 84.19, + "learning_rate": 7.91920205308086e-06, + "loss": 1.8416, + "step": 16993500 + }, + { + "epoch": 84.19, + "learning_rate": 7.917963466654776e-06, + "loss": 1.8207, + "step": 16994000 + }, + { + "epoch": 84.2, + "learning_rate": 7.916724880228693e-06, + "loss": 1.826, + "step": 16994500 + }, + { + "epoch": 84.2, + "learning_rate": 7.915488770975461e-06, + "loss": 1.8372, + "step": 16995000 + }, + { + "epoch": 84.2, + "learning_rate": 7.914250184549378e-06, + "loss": 1.8446, + "step": 16995500 + }, + { + "epoch": 84.2, + "learning_rate": 7.913011598123295e-06, + "loss": 1.8221, + "step": 16996000 + }, + { + "epoch": 84.21, + "learning_rate": 7.91177301169721e-06, + "loss": 1.8195, + "step": 16996500 + }, + { + "epoch": 84.21, + "learning_rate": 7.910534425271127e-06, + "loss": 1.8499, + "step": 16997000 + }, + { + "epoch": 84.21, + "learning_rate": 7.909295838845043e-06, + "loss": 1.8661, + "step": 16997500 + }, + { + "epoch": 84.21, + "learning_rate": 7.90805725241896e-06, + "loss": 1.8447, + "step": 16998000 + }, + { + "epoch": 84.22, + "learning_rate": 7.906818665992876e-06, + "loss": 1.8378, + "step": 16998500 + }, + { + "epoch": 84.22, + "learning_rate": 7.905580079566792e-06, + "loss": 1.8235, + "step": 16999000 + }, + { + "epoch": 84.22, + "learning_rate": 7.904341493140709e-06, + "loss": 1.8206, + "step": 16999500 + }, + { + "epoch": 84.22, + "learning_rate": 7.903102906714626e-06, + "loss": 1.8221, + "step": 17000000 + }, + { + "epoch": 84.23, + "learning_rate": 7.901864320288542e-06, + "loss": 1.8462, + "step": 17000500 + }, + { + "epoch": 84.23, + "learning_rate": 7.900625733862458e-06, + "loss": 1.8407, + "step": 17001000 + }, + { + "epoch": 84.23, + "learning_rate": 7.899389624609227e-06, + "loss": 1.8141, + "step": 17001500 + }, + { + "epoch": 84.23, + "learning_rate": 7.898153515355995e-06, + "loss": 1.8363, + "step": 17002000 + }, + { + "epoch": 84.24, + "learning_rate": 7.896914928929912e-06, + "loss": 1.8402, + "step": 17002500 + }, + { + "epoch": 84.24, + "learning_rate": 7.895676342503827e-06, + "loss": 1.8264, + "step": 17003000 + }, + { + "epoch": 84.24, + "learning_rate": 7.894440233250596e-06, + "loss": 1.8196, + "step": 17003500 + }, + { + "epoch": 84.24, + "learning_rate": 7.893201646824512e-06, + "loss": 1.8387, + "step": 17004000 + }, + { + "epoch": 84.25, + "learning_rate": 7.891963060398428e-06, + "loss": 1.8212, + "step": 17004500 + }, + { + "epoch": 84.25, + "learning_rate": 7.890724473972345e-06, + "loss": 1.8424, + "step": 17005000 + }, + { + "epoch": 84.25, + "learning_rate": 7.889485887546262e-06, + "loss": 1.8423, + "step": 17005500 + }, + { + "epoch": 84.25, + "learning_rate": 7.888247301120178e-06, + "loss": 1.861, + "step": 17006000 + }, + { + "epoch": 84.26, + "learning_rate": 7.887008714694094e-06, + "loss": 1.8443, + "step": 17006500 + }, + { + "epoch": 84.26, + "learning_rate": 7.885770128268011e-06, + "loss": 1.8155, + "step": 17007000 + }, + { + "epoch": 84.26, + "learning_rate": 7.884531541841927e-06, + "loss": 1.8188, + "step": 17007500 + }, + { + "epoch": 84.26, + "learning_rate": 7.883295432588695e-06, + "loss": 1.8421, + "step": 17008000 + }, + { + "epoch": 84.27, + "learning_rate": 7.882056846162612e-06, + "loss": 1.8375, + "step": 17008500 + }, + { + "epoch": 84.27, + "learning_rate": 7.880823214082231e-06, + "loss": 1.8535, + "step": 17009000 + }, + { + "epoch": 84.27, + "learning_rate": 7.879584627656148e-06, + "loss": 1.8275, + "step": 17009500 + }, + { + "epoch": 84.27, + "learning_rate": 7.878346041230065e-06, + "loss": 1.8321, + "step": 17010000 + }, + { + "epoch": 84.28, + "learning_rate": 7.877107454803982e-06, + "loss": 1.8381, + "step": 17010500 + }, + { + "epoch": 84.28, + "learning_rate": 7.875868868377897e-06, + "loss": 1.8361, + "step": 17011000 + }, + { + "epoch": 84.28, + "learning_rate": 7.874630281951814e-06, + "loss": 1.8647, + "step": 17011500 + }, + { + "epoch": 84.28, + "learning_rate": 7.873391695525731e-06, + "loss": 1.8336, + "step": 17012000 + }, + { + "epoch": 84.29, + "learning_rate": 7.872155586272498e-06, + "loss": 1.8077, + "step": 17012500 + }, + { + "epoch": 84.29, + "learning_rate": 7.870916999846415e-06, + "loss": 1.8053, + "step": 17013000 + }, + { + "epoch": 84.29, + "learning_rate": 7.869678413420332e-06, + "loss": 1.8485, + "step": 17013500 + }, + { + "epoch": 84.29, + "learning_rate": 7.868439826994247e-06, + "loss": 1.8127, + "step": 17014000 + }, + { + "epoch": 84.3, + "learning_rate": 7.867201240568164e-06, + "loss": 1.8356, + "step": 17014500 + }, + { + "epoch": 84.3, + "learning_rate": 7.865962654142081e-06, + "loss": 1.8355, + "step": 17015000 + }, + { + "epoch": 84.3, + "learning_rate": 7.86472654488885e-06, + "loss": 1.8082, + "step": 17015500 + }, + { + "epoch": 84.3, + "learning_rate": 7.863487958462765e-06, + "loss": 1.8357, + "step": 17016000 + }, + { + "epoch": 84.31, + "learning_rate": 7.862249372036682e-06, + "loss": 1.8336, + "step": 17016500 + }, + { + "epoch": 84.31, + "learning_rate": 7.8610107856106e-06, + "loss": 1.8199, + "step": 17017000 + }, + { + "epoch": 84.31, + "learning_rate": 7.859772199184514e-06, + "loss": 1.8521, + "step": 17017500 + }, + { + "epoch": 84.31, + "learning_rate": 7.858533612758431e-06, + "loss": 1.8288, + "step": 17018000 + }, + { + "epoch": 84.32, + "learning_rate": 7.857295026332348e-06, + "loss": 1.8476, + "step": 17018500 + }, + { + "epoch": 84.32, + "learning_rate": 7.856056439906265e-06, + "loss": 1.8548, + "step": 17019000 + }, + { + "epoch": 84.32, + "learning_rate": 7.85481785348018e-06, + "loss": 1.8474, + "step": 17019500 + }, + { + "epoch": 84.32, + "learning_rate": 7.853579267054097e-06, + "loss": 1.8384, + "step": 17020000 + }, + { + "epoch": 84.33, + "learning_rate": 7.852340680628013e-06, + "loss": 1.8482, + "step": 17020500 + }, + { + "epoch": 84.33, + "learning_rate": 7.851104571374781e-06, + "loss": 1.8128, + "step": 17021000 + }, + { + "epoch": 84.33, + "learning_rate": 7.849865984948698e-06, + "loss": 1.8552, + "step": 17021500 + }, + { + "epoch": 84.33, + "learning_rate": 7.848627398522615e-06, + "loss": 1.8474, + "step": 17022000 + }, + { + "epoch": 84.34, + "learning_rate": 7.84738881209653e-06, + "loss": 1.8386, + "step": 17022500 + }, + { + "epoch": 84.34, + "learning_rate": 7.846150225670447e-06, + "loss": 1.8323, + "step": 17023000 + }, + { + "epoch": 84.34, + "learning_rate": 7.844911639244364e-06, + "loss": 1.842, + "step": 17023500 + }, + { + "epoch": 84.34, + "learning_rate": 7.84367305281828e-06, + "loss": 1.8414, + "step": 17024000 + }, + { + "epoch": 84.35, + "learning_rate": 7.842434466392197e-06, + "loss": 1.8498, + "step": 17024500 + }, + { + "epoch": 84.35, + "learning_rate": 7.841195879966112e-06, + "loss": 1.8335, + "step": 17025000 + }, + { + "epoch": 84.35, + "learning_rate": 7.839957293540029e-06, + "loss": 1.8652, + "step": 17025500 + }, + { + "epoch": 84.35, + "learning_rate": 7.838718707113946e-06, + "loss": 1.8454, + "step": 17026000 + }, + { + "epoch": 84.36, + "learning_rate": 7.837480120687863e-06, + "loss": 1.8364, + "step": 17026500 + }, + { + "epoch": 84.36, + "learning_rate": 7.836241534261778e-06, + "loss": 1.8356, + "step": 17027000 + }, + { + "epoch": 84.36, + "learning_rate": 7.835005425008547e-06, + "loss": 1.8091, + "step": 17027500 + }, + { + "epoch": 84.36, + "learning_rate": 7.833771792928167e-06, + "loss": 1.8163, + "step": 17028000 + }, + { + "epoch": 84.37, + "learning_rate": 7.832533206502084e-06, + "loss": 1.8489, + "step": 17028500 + }, + { + "epoch": 84.37, + "learning_rate": 7.831294620076001e-06, + "loss": 1.827, + "step": 17029000 + }, + { + "epoch": 84.37, + "learning_rate": 7.830056033649916e-06, + "loss": 1.8415, + "step": 17029500 + }, + { + "epoch": 84.37, + "learning_rate": 7.828817447223832e-06, + "loss": 1.8473, + "step": 17030000 + }, + { + "epoch": 84.37, + "learning_rate": 7.827578860797749e-06, + "loss": 1.8372, + "step": 17030500 + }, + { + "epoch": 84.38, + "learning_rate": 7.826340274371665e-06, + "loss": 1.8355, + "step": 17031000 + }, + { + "epoch": 84.38, + "learning_rate": 7.825101687945582e-06, + "loss": 1.8416, + "step": 17031500 + }, + { + "epoch": 84.38, + "learning_rate": 7.823863101519498e-06, + "loss": 1.8416, + "step": 17032000 + }, + { + "epoch": 84.38, + "learning_rate": 7.822626992266268e-06, + "loss": 1.8583, + "step": 17032500 + }, + { + "epoch": 84.39, + "learning_rate": 7.821388405840183e-06, + "loss": 1.8285, + "step": 17033000 + }, + { + "epoch": 84.39, + "learning_rate": 7.820154773759804e-06, + "loss": 1.8481, + "step": 17033500 + }, + { + "epoch": 84.39, + "learning_rate": 7.818916187333721e-06, + "loss": 1.8426, + "step": 17034000 + }, + { + "epoch": 84.39, + "learning_rate": 7.817680078080488e-06, + "loss": 1.8573, + "step": 17034500 + }, + { + "epoch": 84.4, + "learning_rate": 7.816441491654405e-06, + "loss": 1.8361, + "step": 17035000 + }, + { + "epoch": 84.4, + "learning_rate": 7.81520290522832e-06, + "loss": 1.8154, + "step": 17035500 + }, + { + "epoch": 84.4, + "learning_rate": 7.813964318802237e-06, + "loss": 1.842, + "step": 17036000 + }, + { + "epoch": 84.4, + "learning_rate": 7.812725732376154e-06, + "loss": 1.8334, + "step": 17036500 + }, + { + "epoch": 84.41, + "learning_rate": 7.811487145950071e-06, + "loss": 1.8513, + "step": 17037000 + }, + { + "epoch": 84.41, + "learning_rate": 7.810248559523988e-06, + "loss": 1.8297, + "step": 17037500 + }, + { + "epoch": 84.41, + "learning_rate": 7.809009973097903e-06, + "loss": 1.8457, + "step": 17038000 + }, + { + "epoch": 84.41, + "learning_rate": 7.80777138667182e-06, + "loss": 1.8334, + "step": 17038500 + }, + { + "epoch": 84.42, + "learning_rate": 7.806532800245735e-06, + "loss": 1.8272, + "step": 17039000 + }, + { + "epoch": 84.42, + "learning_rate": 7.805294213819652e-06, + "loss": 1.8413, + "step": 17039500 + }, + { + "epoch": 84.42, + "learning_rate": 7.804058104566421e-06, + "loss": 1.8488, + "step": 17040000 + }, + { + "epoch": 84.42, + "learning_rate": 7.802819518140338e-06, + "loss": 1.8612, + "step": 17040500 + }, + { + "epoch": 84.43, + "learning_rate": 7.801583408887105e-06, + "loss": 1.8332, + "step": 17041000 + }, + { + "epoch": 84.43, + "learning_rate": 7.800344822461022e-06, + "loss": 1.8456, + "step": 17041500 + }, + { + "epoch": 84.43, + "learning_rate": 7.799106236034937e-06, + "loss": 1.8381, + "step": 17042000 + }, + { + "epoch": 84.43, + "learning_rate": 7.797867649608854e-06, + "loss": 1.8566, + "step": 17042500 + }, + { + "epoch": 84.44, + "learning_rate": 7.796629063182771e-06, + "loss": 1.8477, + "step": 17043000 + }, + { + "epoch": 84.44, + "learning_rate": 7.795390476756688e-06, + "loss": 1.8529, + "step": 17043500 + }, + { + "epoch": 84.44, + "learning_rate": 7.794151890330603e-06, + "loss": 1.8672, + "step": 17044000 + }, + { + "epoch": 84.44, + "learning_rate": 7.79291330390452e-06, + "loss": 1.8464, + "step": 17044500 + }, + { + "epoch": 84.45, + "learning_rate": 7.791674717478437e-06, + "loss": 1.839, + "step": 17045000 + }, + { + "epoch": 84.45, + "learning_rate": 7.790436131052352e-06, + "loss": 1.8245, + "step": 17045500 + }, + { + "epoch": 84.45, + "learning_rate": 7.789200021799121e-06, + "loss": 1.8538, + "step": 17046000 + }, + { + "epoch": 84.45, + "learning_rate": 7.78796391254589e-06, + "loss": 1.8323, + "step": 17046500 + }, + { + "epoch": 84.46, + "learning_rate": 7.786725326119807e-06, + "loss": 1.8598, + "step": 17047000 + }, + { + "epoch": 84.46, + "learning_rate": 7.785486739693724e-06, + "loss": 1.8355, + "step": 17047500 + }, + { + "epoch": 84.46, + "learning_rate": 7.784248153267639e-06, + "loss": 1.8116, + "step": 17048000 + }, + { + "epoch": 84.46, + "learning_rate": 7.783009566841554e-06, + "loss": 1.8604, + "step": 17048500 + }, + { + "epoch": 84.47, + "learning_rate": 7.781770980415471e-06, + "loss": 1.8285, + "step": 17049000 + }, + { + "epoch": 84.47, + "learning_rate": 7.780532393989388e-06, + "loss": 1.8307, + "step": 17049500 + }, + { + "epoch": 84.47, + "learning_rate": 7.779293807563305e-06, + "loss": 1.8472, + "step": 17050000 + }, + { + "epoch": 84.47, + "learning_rate": 7.77805522113722e-06, + "loss": 1.8473, + "step": 17050500 + }, + { + "epoch": 84.48, + "learning_rate": 7.776816634711137e-06, + "loss": 1.8434, + "step": 17051000 + }, + { + "epoch": 84.48, + "learning_rate": 7.775578048285054e-06, + "loss": 1.8244, + "step": 17051500 + }, + { + "epoch": 84.48, + "learning_rate": 7.774339461858971e-06, + "loss": 1.8367, + "step": 17052000 + }, + { + "epoch": 84.48, + "learning_rate": 7.773100875432886e-06, + "loss": 1.8354, + "step": 17052500 + }, + { + "epoch": 84.49, + "learning_rate": 7.771862289006802e-06, + "loss": 1.8472, + "step": 17053000 + }, + { + "epoch": 84.49, + "learning_rate": 7.770623702580719e-06, + "loss": 1.8338, + "step": 17053500 + }, + { + "epoch": 84.49, + "learning_rate": 7.769385116154636e-06, + "loss": 1.839, + "step": 17054000 + }, + { + "epoch": 84.49, + "learning_rate": 7.768149006901404e-06, + "loss": 1.8231, + "step": 17054500 + }, + { + "epoch": 84.5, + "learning_rate": 7.766910420475321e-06, + "loss": 1.8009, + "step": 17055000 + }, + { + "epoch": 84.5, + "learning_rate": 7.765671834049236e-06, + "loss": 1.8313, + "step": 17055500 + }, + { + "epoch": 84.5, + "learning_rate": 7.764433247623153e-06, + "loss": 1.8371, + "step": 17056000 + }, + { + "epoch": 84.5, + "learning_rate": 7.76319713836992e-06, + "loss": 1.8367, + "step": 17056500 + }, + { + "epoch": 84.51, + "learning_rate": 7.761961029116691e-06, + "loss": 1.8234, + "step": 17057000 + }, + { + "epoch": 84.51, + "learning_rate": 7.760722442690606e-06, + "loss": 1.8304, + "step": 17057500 + }, + { + "epoch": 84.51, + "learning_rate": 7.759483856264523e-06, + "loss": 1.824, + "step": 17058000 + }, + { + "epoch": 84.51, + "learning_rate": 7.758245269838438e-06, + "loss": 1.874, + "step": 17058500 + }, + { + "epoch": 84.52, + "learning_rate": 7.757009160585207e-06, + "loss": 1.8442, + "step": 17059000 + }, + { + "epoch": 84.52, + "learning_rate": 7.755770574159124e-06, + "loss": 1.8528, + "step": 17059500 + }, + { + "epoch": 84.52, + "learning_rate": 7.754531987733041e-06, + "loss": 1.8433, + "step": 17060000 + }, + { + "epoch": 84.52, + "learning_rate": 7.753293401306956e-06, + "loss": 1.8345, + "step": 17060500 + }, + { + "epoch": 84.53, + "learning_rate": 7.752054814880873e-06, + "loss": 1.8379, + "step": 17061000 + }, + { + "epoch": 84.53, + "learning_rate": 7.750821182800494e-06, + "loss": 1.8521, + "step": 17061500 + }, + { + "epoch": 84.53, + "learning_rate": 7.749582596374411e-06, + "loss": 1.8272, + "step": 17062000 + }, + { + "epoch": 84.53, + "learning_rate": 7.748344009948326e-06, + "loss": 1.8369, + "step": 17062500 + }, + { + "epoch": 84.54, + "learning_rate": 7.747105423522243e-06, + "loss": 1.8372, + "step": 17063000 + }, + { + "epoch": 84.54, + "learning_rate": 7.74586683709616e-06, + "loss": 1.8453, + "step": 17063500 + }, + { + "epoch": 84.54, + "learning_rate": 7.744628250670075e-06, + "loss": 1.8467, + "step": 17064000 + }, + { + "epoch": 84.54, + "learning_rate": 7.743389664243992e-06, + "loss": 1.8753, + "step": 17064500 + }, + { + "epoch": 84.55, + "learning_rate": 7.742151077817907e-06, + "loss": 1.8608, + "step": 17065000 + }, + { + "epoch": 84.55, + "learning_rate": 7.740914968564676e-06, + "loss": 1.8318, + "step": 17065500 + }, + { + "epoch": 84.55, + "learning_rate": 7.739676382138593e-06, + "loss": 1.8178, + "step": 17066000 + }, + { + "epoch": 84.55, + "learning_rate": 7.73843779571251e-06, + "loss": 1.8384, + "step": 17066500 + }, + { + "epoch": 84.56, + "learning_rate": 7.737201686459277e-06, + "loss": 1.8309, + "step": 17067000 + }, + { + "epoch": 84.56, + "learning_rate": 7.735963100033194e-06, + "loss": 1.8419, + "step": 17067500 + }, + { + "epoch": 84.56, + "learning_rate": 7.734724513607111e-06, + "loss": 1.8465, + "step": 17068000 + }, + { + "epoch": 84.56, + "learning_rate": 7.733485927181028e-06, + "loss": 1.8397, + "step": 17068500 + }, + { + "epoch": 84.57, + "learning_rate": 7.732249817927797e-06, + "loss": 1.8555, + "step": 17069000 + }, + { + "epoch": 84.57, + "learning_rate": 7.731011231501712e-06, + "loss": 1.8209, + "step": 17069500 + }, + { + "epoch": 84.57, + "learning_rate": 7.729772645075627e-06, + "loss": 1.8135, + "step": 17070000 + }, + { + "epoch": 84.57, + "learning_rate": 7.728534058649544e-06, + "loss": 1.8428, + "step": 17070500 + }, + { + "epoch": 84.58, + "learning_rate": 7.727295472223461e-06, + "loss": 1.8458, + "step": 17071000 + }, + { + "epoch": 84.58, + "learning_rate": 7.726056885797378e-06, + "loss": 1.856, + "step": 17071500 + }, + { + "epoch": 84.58, + "learning_rate": 7.724818299371293e-06, + "loss": 1.8359, + "step": 17072000 + }, + { + "epoch": 84.58, + "learning_rate": 7.72357971294521e-06, + "loss": 1.8637, + "step": 17072500 + }, + { + "epoch": 84.59, + "learning_rate": 7.722341126519127e-06, + "loss": 1.8506, + "step": 17073000 + }, + { + "epoch": 84.59, + "learning_rate": 7.721102540093044e-06, + "loss": 1.8365, + "step": 17073500 + }, + { + "epoch": 84.59, + "learning_rate": 7.71986395366696e-06, + "loss": 1.8213, + "step": 17074000 + }, + { + "epoch": 84.59, + "learning_rate": 7.718627844413728e-06, + "loss": 1.857, + "step": 17074500 + }, + { + "epoch": 84.6, + "learning_rate": 7.717389257987643e-06, + "loss": 1.852, + "step": 17075000 + }, + { + "epoch": 84.6, + "learning_rate": 7.71615067156156e-06, + "loss": 1.837, + "step": 17075500 + }, + { + "epoch": 84.6, + "learning_rate": 7.714914562308329e-06, + "loss": 1.8548, + "step": 17076000 + }, + { + "epoch": 84.6, + "learning_rate": 7.713675975882246e-06, + "loss": 1.8406, + "step": 17076500 + }, + { + "epoch": 84.61, + "learning_rate": 7.712437389456161e-06, + "loss": 1.8264, + "step": 17077000 + }, + { + "epoch": 84.61, + "learning_rate": 7.711198803030078e-06, + "loss": 1.8159, + "step": 17077500 + }, + { + "epoch": 84.61, + "learning_rate": 7.709960216603995e-06, + "loss": 1.8427, + "step": 17078000 + }, + { + "epoch": 84.61, + "learning_rate": 7.70872163017791e-06, + "loss": 1.8392, + "step": 17078500 + }, + { + "epoch": 84.62, + "learning_rate": 7.707485520924679e-06, + "loss": 1.8153, + "step": 17079000 + }, + { + "epoch": 84.62, + "learning_rate": 7.706246934498596e-06, + "loss": 1.8228, + "step": 17079500 + }, + { + "epoch": 84.62, + "learning_rate": 7.705008348072513e-06, + "loss": 1.8609, + "step": 17080000 + }, + { + "epoch": 84.62, + "learning_rate": 7.70377223881928e-06, + "loss": 1.8299, + "step": 17080500 + }, + { + "epoch": 84.63, + "learning_rate": 7.702533652393197e-06, + "loss": 1.8111, + "step": 17081000 + }, + { + "epoch": 84.63, + "learning_rate": 7.701295065967114e-06, + "loss": 1.8449, + "step": 17081500 + }, + { + "epoch": 84.63, + "learning_rate": 7.70005647954103e-06, + "loss": 1.8364, + "step": 17082000 + }, + { + "epoch": 84.63, + "learning_rate": 7.698817893114946e-06, + "loss": 1.8152, + "step": 17082500 + }, + { + "epoch": 84.64, + "learning_rate": 7.697579306688863e-06, + "loss": 1.8639, + "step": 17083000 + }, + { + "epoch": 84.64, + "learning_rate": 7.69634072026278e-06, + "loss": 1.8277, + "step": 17083500 + }, + { + "epoch": 84.64, + "learning_rate": 7.695104611009547e-06, + "loss": 1.8601, + "step": 17084000 + }, + { + "epoch": 84.64, + "learning_rate": 7.693866024583464e-06, + "loss": 1.8484, + "step": 17084500 + }, + { + "epoch": 84.64, + "learning_rate": 7.692627438157381e-06, + "loss": 1.8473, + "step": 17085000 + }, + { + "epoch": 84.65, + "learning_rate": 7.691388851731296e-06, + "loss": 1.832, + "step": 17085500 + }, + { + "epoch": 84.65, + "learning_rate": 7.690150265305213e-06, + "loss": 1.8255, + "step": 17086000 + }, + { + "epoch": 84.65, + "learning_rate": 7.68891167887913e-06, + "loss": 1.8459, + "step": 17086500 + }, + { + "epoch": 84.65, + "learning_rate": 7.687673092453045e-06, + "loss": 1.8387, + "step": 17087000 + }, + { + "epoch": 84.66, + "learning_rate": 7.686434506026962e-06, + "loss": 1.8355, + "step": 17087500 + }, + { + "epoch": 84.66, + "learning_rate": 7.685195919600877e-06, + "loss": 1.8354, + "step": 17088000 + }, + { + "epoch": 84.66, + "learning_rate": 7.683959810347646e-06, + "loss": 1.8374, + "step": 17088500 + }, + { + "epoch": 84.66, + "learning_rate": 7.682721223921563e-06, + "loss": 1.855, + "step": 17089000 + }, + { + "epoch": 84.67, + "learning_rate": 7.68148263749548e-06, + "loss": 1.8378, + "step": 17089500 + }, + { + "epoch": 84.67, + "learning_rate": 7.680244051069397e-06, + "loss": 1.8314, + "step": 17090000 + }, + { + "epoch": 84.67, + "learning_rate": 7.679005464643312e-06, + "loss": 1.8417, + "step": 17090500 + }, + { + "epoch": 84.67, + "learning_rate": 7.677766878217227e-06, + "loss": 1.8242, + "step": 17091000 + }, + { + "epoch": 84.68, + "learning_rate": 7.676528291791144e-06, + "loss": 1.8317, + "step": 17091500 + }, + { + "epoch": 84.68, + "learning_rate": 7.675289705365061e-06, + "loss": 1.8242, + "step": 17092000 + }, + { + "epoch": 84.68, + "learning_rate": 7.674051118938978e-06, + "loss": 1.8445, + "step": 17092500 + }, + { + "epoch": 84.68, + "learning_rate": 7.672812532512893e-06, + "loss": 1.8343, + "step": 17093000 + }, + { + "epoch": 84.69, + "learning_rate": 7.67157394608681e-06, + "loss": 1.8387, + "step": 17093500 + }, + { + "epoch": 84.69, + "learning_rate": 7.670335359660727e-06, + "loss": 1.831, + "step": 17094000 + }, + { + "epoch": 84.69, + "learning_rate": 7.669099250407494e-06, + "loss": 1.8534, + "step": 17094500 + }, + { + "epoch": 84.69, + "learning_rate": 7.667860663981411e-06, + "loss": 1.8372, + "step": 17095000 + }, + { + "epoch": 84.7, + "learning_rate": 7.666622077555328e-06, + "loss": 1.8479, + "step": 17095500 + }, + { + "epoch": 84.7, + "learning_rate": 7.665383491129244e-06, + "loss": 1.8408, + "step": 17096000 + }, + { + "epoch": 84.7, + "learning_rate": 7.66414490470316e-06, + "loss": 1.8408, + "step": 17096500 + }, + { + "epoch": 84.7, + "learning_rate": 7.662906318277077e-06, + "loss": 1.8068, + "step": 17097000 + }, + { + "epoch": 84.71, + "learning_rate": 7.661670209023846e-06, + "loss": 1.828, + "step": 17097500 + }, + { + "epoch": 84.71, + "learning_rate": 7.660431622597761e-06, + "loss": 1.8304, + "step": 17098000 + }, + { + "epoch": 84.71, + "learning_rate": 7.659193036171678e-06, + "loss": 1.8379, + "step": 17098500 + }, + { + "epoch": 84.71, + "learning_rate": 7.657956926918447e-06, + "loss": 1.8449, + "step": 17099000 + }, + { + "epoch": 84.72, + "learning_rate": 7.656720817665216e-06, + "loss": 1.8525, + "step": 17099500 + }, + { + "epoch": 84.72, + "learning_rate": 7.655482231239131e-06, + "loss": 1.8489, + "step": 17100000 + }, + { + "epoch": 84.72, + "learning_rate": 7.6542461219859e-06, + "loss": 1.8611, + "step": 17100500 + }, + { + "epoch": 84.72, + "learning_rate": 7.653007535559817e-06, + "loss": 1.833, + "step": 17101000 + }, + { + "epoch": 84.73, + "learning_rate": 7.651768949133734e-06, + "loss": 1.8469, + "step": 17101500 + }, + { + "epoch": 84.73, + "learning_rate": 7.650530362707649e-06, + "loss": 1.8489, + "step": 17102000 + }, + { + "epoch": 84.73, + "learning_rate": 7.649291776281566e-06, + "loss": 1.8617, + "step": 17102500 + }, + { + "epoch": 84.73, + "learning_rate": 7.648053189855483e-06, + "loss": 1.8169, + "step": 17103000 + }, + { + "epoch": 84.74, + "learning_rate": 7.646814603429398e-06, + "loss": 1.8282, + "step": 17103500 + }, + { + "epoch": 84.74, + "learning_rate": 7.645576017003313e-06, + "loss": 1.8326, + "step": 17104000 + }, + { + "epoch": 84.74, + "learning_rate": 7.64433743057723e-06, + "loss": 1.8637, + "step": 17104500 + }, + { + "epoch": 84.74, + "learning_rate": 7.643098844151147e-06, + "loss": 1.8482, + "step": 17105000 + }, + { + "epoch": 84.75, + "learning_rate": 7.641860257725064e-06, + "loss": 1.8558, + "step": 17105500 + }, + { + "epoch": 84.75, + "learning_rate": 7.640621671298981e-06, + "loss": 1.8512, + "step": 17106000 + }, + { + "epoch": 84.75, + "learning_rate": 7.639383084872896e-06, + "loss": 1.8263, + "step": 17106500 + }, + { + "epoch": 84.75, + "learning_rate": 7.638144498446813e-06, + "loss": 1.8581, + "step": 17107000 + }, + { + "epoch": 84.76, + "learning_rate": 7.63690591202073e-06, + "loss": 1.8461, + "step": 17107500 + }, + { + "epoch": 84.76, + "learning_rate": 7.635667325594645e-06, + "loss": 1.8221, + "step": 17108000 + }, + { + "epoch": 84.76, + "learning_rate": 7.63442873916856e-06, + "loss": 1.8433, + "step": 17108500 + }, + { + "epoch": 84.76, + "learning_rate": 7.633190152742478e-06, + "loss": 1.8556, + "step": 17109000 + }, + { + "epoch": 84.77, + "learning_rate": 7.631954043489246e-06, + "loss": 1.836, + "step": 17109500 + }, + { + "epoch": 84.77, + "learning_rate": 7.630715457063163e-06, + "loss": 1.8501, + "step": 17110000 + }, + { + "epoch": 84.77, + "learning_rate": 7.629479347809932e-06, + "loss": 1.8697, + "step": 17110500 + }, + { + "epoch": 84.77, + "learning_rate": 7.628243238556701e-06, + "loss": 1.8034, + "step": 17111000 + }, + { + "epoch": 84.78, + "learning_rate": 7.627004652130616e-06, + "loss": 1.8467, + "step": 17111500 + }, + { + "epoch": 84.78, + "learning_rate": 7.625766065704533e-06, + "loss": 1.836, + "step": 17112000 + }, + { + "epoch": 84.78, + "learning_rate": 7.624527479278449e-06, + "loss": 1.844, + "step": 17112500 + }, + { + "epoch": 84.78, + "learning_rate": 7.623288892852366e-06, + "loss": 1.8546, + "step": 17113000 + }, + { + "epoch": 84.79, + "learning_rate": 7.622050306426281e-06, + "loss": 1.8288, + "step": 17113500 + }, + { + "epoch": 84.79, + "learning_rate": 7.620811720000198e-06, + "loss": 1.8292, + "step": 17114000 + }, + { + "epoch": 84.79, + "learning_rate": 7.619575610746966e-06, + "loss": 1.834, + "step": 17114500 + }, + { + "epoch": 84.79, + "learning_rate": 7.618337024320883e-06, + "loss": 1.8301, + "step": 17115000 + }, + { + "epoch": 84.8, + "learning_rate": 7.6170984378948e-06, + "loss": 1.8657, + "step": 17115500 + }, + { + "epoch": 84.8, + "learning_rate": 7.615859851468716e-06, + "loss": 1.8221, + "step": 17116000 + }, + { + "epoch": 84.8, + "learning_rate": 7.6146212650426315e-06, + "loss": 1.8328, + "step": 17116500 + }, + { + "epoch": 84.8, + "learning_rate": 7.613382678616548e-06, + "loss": 1.8473, + "step": 17117000 + }, + { + "epoch": 84.81, + "learning_rate": 7.612146569363317e-06, + "loss": 1.8516, + "step": 17117500 + }, + { + "epoch": 84.81, + "learning_rate": 7.610907982937233e-06, + "loss": 1.828, + "step": 17118000 + }, + { + "epoch": 84.81, + "learning_rate": 7.60966939651115e-06, + "loss": 1.8725, + "step": 17118500 + }, + { + "epoch": 84.81, + "learning_rate": 7.608430810085067e-06, + "loss": 1.8447, + "step": 17119000 + }, + { + "epoch": 84.82, + "learning_rate": 7.607192223658983e-06, + "loss": 1.8382, + "step": 17119500 + }, + { + "epoch": 84.82, + "learning_rate": 7.6059536372328984e-06, + "loss": 1.839, + "step": 17120000 + }, + { + "epoch": 84.82, + "learning_rate": 7.604715050806815e-06, + "loss": 1.8459, + "step": 17120500 + }, + { + "epoch": 84.82, + "learning_rate": 7.603476464380732e-06, + "loss": 1.8075, + "step": 17121000 + }, + { + "epoch": 84.83, + "learning_rate": 7.602237877954649e-06, + "loss": 1.8664, + "step": 17121500 + }, + { + "epoch": 84.83, + "learning_rate": 7.6009992915285645e-06, + "loss": 1.8421, + "step": 17122000 + }, + { + "epoch": 84.83, + "learning_rate": 7.5997607051024805e-06, + "loss": 1.8534, + "step": 17122500 + }, + { + "epoch": 84.83, + "learning_rate": 7.5985221186763975e-06, + "loss": 1.8419, + "step": 17123000 + }, + { + "epoch": 84.84, + "learning_rate": 7.597283532250314e-06, + "loss": 1.837, + "step": 17123500 + }, + { + "epoch": 84.84, + "learning_rate": 7.596047422997082e-06, + "loss": 1.8308, + "step": 17124000 + }, + { + "epoch": 84.84, + "learning_rate": 7.594813790916704e-06, + "loss": 1.8053, + "step": 17124500 + }, + { + "epoch": 84.84, + "learning_rate": 7.593575204490619e-06, + "loss": 1.8255, + "step": 17125000 + }, + { + "epoch": 84.85, + "learning_rate": 7.592336618064535e-06, + "loss": 1.8466, + "step": 17125500 + }, + { + "epoch": 84.85, + "learning_rate": 7.591098031638452e-06, + "loss": 1.8477, + "step": 17126000 + }, + { + "epoch": 84.85, + "learning_rate": 7.589859445212369e-06, + "loss": 1.8628, + "step": 17126500 + }, + { + "epoch": 84.85, + "learning_rate": 7.588620858786284e-06, + "loss": 1.8467, + "step": 17127000 + }, + { + "epoch": 84.86, + "learning_rate": 7.587384749533054e-06, + "loss": 1.8426, + "step": 17127500 + }, + { + "epoch": 84.86, + "learning_rate": 7.586148640279822e-06, + "loss": 1.8541, + "step": 17128000 + }, + { + "epoch": 84.86, + "learning_rate": 7.584910053853739e-06, + "loss": 1.852, + "step": 17128500 + }, + { + "epoch": 84.86, + "learning_rate": 7.583671467427654e-06, + "loss": 1.8469, + "step": 17129000 + }, + { + "epoch": 84.87, + "learning_rate": 7.582432881001571e-06, + "loss": 1.8311, + "step": 17129500 + }, + { + "epoch": 84.87, + "learning_rate": 7.581194294575487e-06, + "loss": 1.8328, + "step": 17130000 + }, + { + "epoch": 84.87, + "learning_rate": 7.579955708149404e-06, + "loss": 1.8431, + "step": 17130500 + }, + { + "epoch": 84.87, + "learning_rate": 7.578717121723319e-06, + "loss": 1.8358, + "step": 17131000 + }, + { + "epoch": 84.88, + "learning_rate": 7.577478535297236e-06, + "loss": 1.841, + "step": 17131500 + }, + { + "epoch": 84.88, + "learning_rate": 7.576239948871153e-06, + "loss": 1.8425, + "step": 17132000 + }, + { + "epoch": 84.88, + "learning_rate": 7.575003839617921e-06, + "loss": 1.8482, + "step": 17132500 + }, + { + "epoch": 84.88, + "learning_rate": 7.573765253191838e-06, + "loss": 1.8651, + "step": 17133000 + }, + { + "epoch": 84.89, + "learning_rate": 7.572526666765754e-06, + "loss": 1.8467, + "step": 17133500 + }, + { + "epoch": 84.89, + "learning_rate": 7.571288080339669e-06, + "loss": 1.8358, + "step": 17134000 + }, + { + "epoch": 84.89, + "learning_rate": 7.570051971086439e-06, + "loss": 1.8634, + "step": 17134500 + }, + { + "epoch": 84.89, + "learning_rate": 7.568813384660354e-06, + "loss": 1.8635, + "step": 17135000 + }, + { + "epoch": 84.9, + "learning_rate": 7.567574798234271e-06, + "loss": 1.8383, + "step": 17135500 + }, + { + "epoch": 84.9, + "learning_rate": 7.566336211808188e-06, + "loss": 1.819, + "step": 17136000 + }, + { + "epoch": 84.9, + "learning_rate": 7.565097625382105e-06, + "loss": 1.8434, + "step": 17136500 + }, + { + "epoch": 84.9, + "learning_rate": 7.563859038956021e-06, + "loss": 1.8229, + "step": 17137000 + }, + { + "epoch": 84.91, + "learning_rate": 7.562620452529936e-06, + "loss": 1.8372, + "step": 17137500 + }, + { + "epoch": 84.91, + "learning_rate": 7.561381866103853e-06, + "loss": 1.8278, + "step": 17138000 + }, + { + "epoch": 84.91, + "learning_rate": 7.560145756850621e-06, + "loss": 1.8293, + "step": 17138500 + }, + { + "epoch": 84.91, + "learning_rate": 7.558907170424538e-06, + "loss": 1.8411, + "step": 17139000 + }, + { + "epoch": 84.92, + "learning_rate": 7.557668583998455e-06, + "loss": 1.8758, + "step": 17139500 + }, + { + "epoch": 84.92, + "learning_rate": 7.556429997572371e-06, + "loss": 1.8611, + "step": 17140000 + }, + { + "epoch": 84.92, + "learning_rate": 7.555191411146287e-06, + "loss": 1.8389, + "step": 17140500 + }, + { + "epoch": 84.92, + "learning_rate": 7.553952824720203e-06, + "loss": 1.8537, + "step": 17141000 + }, + { + "epoch": 84.92, + "learning_rate": 7.55271423829412e-06, + "loss": 1.8407, + "step": 17141500 + }, + { + "epoch": 84.93, + "learning_rate": 7.551475651868037e-06, + "loss": 1.8213, + "step": 17142000 + }, + { + "epoch": 84.93, + "learning_rate": 7.550239542614805e-06, + "loss": 1.8584, + "step": 17142500 + }, + { + "epoch": 84.93, + "learning_rate": 7.549000956188722e-06, + "loss": 1.8345, + "step": 17143000 + }, + { + "epoch": 84.93, + "learning_rate": 7.54776484693549e-06, + "loss": 1.8287, + "step": 17143500 + }, + { + "epoch": 84.94, + "learning_rate": 7.546526260509407e-06, + "loss": 1.856, + "step": 17144000 + }, + { + "epoch": 84.94, + "learning_rate": 7.545287674083322e-06, + "loss": 1.8237, + "step": 17144500 + }, + { + "epoch": 84.94, + "learning_rate": 7.544049087657238e-06, + "loss": 1.8205, + "step": 17145000 + }, + { + "epoch": 84.94, + "learning_rate": 7.542810501231155e-06, + "loss": 1.8394, + "step": 17145500 + }, + { + "epoch": 84.95, + "learning_rate": 7.541571914805072e-06, + "loss": 1.8511, + "step": 17146000 + }, + { + "epoch": 84.95, + "learning_rate": 7.540333328378987e-06, + "loss": 1.8517, + "step": 17146500 + }, + { + "epoch": 84.95, + "learning_rate": 7.539097219125757e-06, + "loss": 1.8334, + "step": 17147000 + }, + { + "epoch": 84.95, + "learning_rate": 7.537858632699672e-06, + "loss": 1.8417, + "step": 17147500 + }, + { + "epoch": 84.96, + "learning_rate": 7.536620046273589e-06, + "loss": 1.8171, + "step": 17148000 + }, + { + "epoch": 84.96, + "learning_rate": 7.535381459847505e-06, + "loss": 1.8325, + "step": 17148500 + }, + { + "epoch": 84.96, + "learning_rate": 7.534142873421422e-06, + "loss": 1.8293, + "step": 17149000 + }, + { + "epoch": 84.96, + "learning_rate": 7.532904286995339e-06, + "loss": 1.8134, + "step": 17149500 + }, + { + "epoch": 84.97, + "learning_rate": 7.531665700569254e-06, + "loss": 1.8466, + "step": 17150000 + }, + { + "epoch": 84.97, + "learning_rate": 7.530427114143171e-06, + "loss": 1.8339, + "step": 17150500 + }, + { + "epoch": 84.97, + "learning_rate": 7.529188527717087e-06, + "loss": 1.8346, + "step": 17151000 + }, + { + "epoch": 84.97, + "learning_rate": 7.527949941291004e-06, + "loss": 1.819, + "step": 17151500 + }, + { + "epoch": 84.98, + "learning_rate": 7.5267113548649195e-06, + "loss": 1.8339, + "step": 17152000 + }, + { + "epoch": 84.98, + "learning_rate": 7.5254727684388364e-06, + "loss": 1.8473, + "step": 17152500 + }, + { + "epoch": 84.98, + "learning_rate": 7.5242341820127525e-06, + "loss": 1.8107, + "step": 17153000 + }, + { + "epoch": 84.98, + "learning_rate": 7.5229955955866694e-06, + "loss": 1.8374, + "step": 17153500 + }, + { + "epoch": 84.99, + "learning_rate": 7.521761963506289e-06, + "loss": 1.8161, + "step": 17154000 + }, + { + "epoch": 84.99, + "learning_rate": 7.520525854253059e-06, + "loss": 1.816, + "step": 17154500 + }, + { + "epoch": 84.99, + "learning_rate": 7.519287267826974e-06, + "loss": 1.8282, + "step": 17155000 + }, + { + "epoch": 84.99, + "learning_rate": 7.518048681400891e-06, + "loss": 1.8309, + "step": 17155500 + }, + { + "epoch": 85.0, + "learning_rate": 7.516810094974808e-06, + "loss": 1.8578, + "step": 17156000 + }, + { + "epoch": 85.0, + "learning_rate": 7.515571508548724e-06, + "loss": 1.8237, + "step": 17156500 + }, + { + "epoch": 85.0, + "eval_accuracy": 0.6835737992703733, + "eval_accuracy_mlm": 0.6444747494641911, + "eval_accuracy_nsp": 0.8681082056330626, + "eval_loss": 2.307166576385498, + "eval_runtime": 147.0237, + "eval_samples_per_second": 1734.135, + "eval_steps_per_second": 72.26, + "step": 17156655 + }, + { + "epoch": 85.0, + "learning_rate": 7.514332922122639e-06, + "loss": 1.8319, + "step": 17157000 + }, + { + "epoch": 85.0, + "learning_rate": 7.513094335696556e-06, + "loss": 1.8298, + "step": 17157500 + }, + { + "epoch": 85.01, + "learning_rate": 7.511855749270473e-06, + "loss": 1.838, + "step": 17158000 + }, + { + "epoch": 85.01, + "learning_rate": 7.510619640017241e-06, + "loss": 1.8402, + "step": 17158500 + }, + { + "epoch": 85.01, + "learning_rate": 7.509383530764009e-06, + "loss": 1.8335, + "step": 17159000 + }, + { + "epoch": 85.01, + "learning_rate": 7.508144944337926e-06, + "loss": 1.8294, + "step": 17159500 + }, + { + "epoch": 85.02, + "learning_rate": 7.506906357911843e-06, + "loss": 1.859, + "step": 17160000 + }, + { + "epoch": 85.02, + "learning_rate": 7.50566777148576e-06, + "loss": 1.8442, + "step": 17160500 + }, + { + "epoch": 85.02, + "learning_rate": 7.504431662232528e-06, + "loss": 1.827, + "step": 17161000 + }, + { + "epoch": 85.02, + "learning_rate": 7.503193075806445e-06, + "loss": 1.8486, + "step": 17161500 + }, + { + "epoch": 85.03, + "learning_rate": 7.50195448938036e-06, + "loss": 1.816, + "step": 17162000 + }, + { + "epoch": 85.03, + "learning_rate": 7.500715902954276e-06, + "loss": 1.8251, + "step": 17162500 + }, + { + "epoch": 85.03, + "learning_rate": 7.499477316528193e-06, + "loss": 1.8123, + "step": 17163000 + }, + { + "epoch": 85.03, + "learning_rate": 7.498243684447815e-06, + "loss": 1.8077, + "step": 17163500 + }, + { + "epoch": 85.04, + "learning_rate": 7.49700509802173e-06, + "loss": 1.8269, + "step": 17164000 + }, + { + "epoch": 85.04, + "learning_rate": 7.495766511595646e-06, + "loss": 1.8317, + "step": 17164500 + }, + { + "epoch": 85.04, + "learning_rate": 7.494527925169563e-06, + "loss": 1.819, + "step": 17165000 + }, + { + "epoch": 85.04, + "learning_rate": 7.49328933874348e-06, + "loss": 1.8228, + "step": 17165500 + }, + { + "epoch": 85.05, + "learning_rate": 7.492050752317395e-06, + "loss": 1.819, + "step": 17166000 + }, + { + "epoch": 85.05, + "learning_rate": 7.490814643064165e-06, + "loss": 1.8215, + "step": 17166500 + }, + { + "epoch": 85.05, + "learning_rate": 7.48957605663808e-06, + "loss": 1.8359, + "step": 17167000 + }, + { + "epoch": 85.05, + "learning_rate": 7.488337470211997e-06, + "loss": 1.8357, + "step": 17167500 + }, + { + "epoch": 85.06, + "learning_rate": 7.487098883785913e-06, + "loss": 1.8377, + "step": 17168000 + }, + { + "epoch": 85.06, + "learning_rate": 7.48586029735983e-06, + "loss": 1.8324, + "step": 17168500 + }, + { + "epoch": 85.06, + "learning_rate": 7.484621710933745e-06, + "loss": 1.8262, + "step": 17169000 + }, + { + "epoch": 85.06, + "learning_rate": 7.483383124507662e-06, + "loss": 1.8456, + "step": 17169500 + }, + { + "epoch": 85.07, + "learning_rate": 7.482144538081579e-06, + "loss": 1.8599, + "step": 17170000 + }, + { + "epoch": 85.07, + "learning_rate": 7.480905951655495e-06, + "loss": 1.8466, + "step": 17170500 + }, + { + "epoch": 85.07, + "learning_rate": 7.479669842402264e-06, + "loss": 1.8151, + "step": 17171000 + }, + { + "epoch": 85.07, + "learning_rate": 7.47843125597618e-06, + "loss": 1.8346, + "step": 17171500 + }, + { + "epoch": 85.08, + "learning_rate": 7.477192669550097e-06, + "loss": 1.8281, + "step": 17172000 + }, + { + "epoch": 85.08, + "learning_rate": 7.475956560296865e-06, + "loss": 1.8194, + "step": 17172500 + }, + { + "epoch": 85.08, + "learning_rate": 7.474717973870782e-06, + "loss": 1.831, + "step": 17173000 + }, + { + "epoch": 85.08, + "learning_rate": 7.473479387444697e-06, + "loss": 1.8464, + "step": 17173500 + }, + { + "epoch": 85.09, + "learning_rate": 7.472240801018614e-06, + "loss": 1.847, + "step": 17174000 + }, + { + "epoch": 85.09, + "learning_rate": 7.471002214592531e-06, + "loss": 1.8518, + "step": 17174500 + }, + { + "epoch": 85.09, + "learning_rate": 7.469763628166447e-06, + "loss": 1.8338, + "step": 17175000 + }, + { + "epoch": 85.09, + "learning_rate": 7.468525041740362e-06, + "loss": 1.8478, + "step": 17175500 + }, + { + "epoch": 85.1, + "learning_rate": 7.467286455314279e-06, + "loss": 1.8275, + "step": 17176000 + }, + { + "epoch": 85.1, + "learning_rate": 7.466047868888196e-06, + "loss": 1.8377, + "step": 17176500 + }, + { + "epoch": 85.1, + "learning_rate": 7.464809282462112e-06, + "loss": 1.8272, + "step": 17177000 + }, + { + "epoch": 85.1, + "learning_rate": 7.463570696036028e-06, + "loss": 1.8443, + "step": 17177500 + }, + { + "epoch": 85.11, + "learning_rate": 7.462332109609944e-06, + "loss": 1.8383, + "step": 17178000 + }, + { + "epoch": 85.11, + "learning_rate": 7.461096000356713e-06, + "loss": 1.8355, + "step": 17178500 + }, + { + "epoch": 85.11, + "learning_rate": 7.459857413930629e-06, + "loss": 1.8188, + "step": 17179000 + }, + { + "epoch": 85.11, + "learning_rate": 7.458618827504546e-06, + "loss": 1.822, + "step": 17179500 + }, + { + "epoch": 85.12, + "learning_rate": 7.457380241078463e-06, + "loss": 1.8419, + "step": 17180000 + }, + { + "epoch": 85.12, + "learning_rate": 7.456141654652379e-06, + "loss": 1.8434, + "step": 17180500 + }, + { + "epoch": 85.12, + "learning_rate": 7.454903068226294e-06, + "loss": 1.8255, + "step": 17181000 + }, + { + "epoch": 85.12, + "learning_rate": 7.453664481800211e-06, + "loss": 1.8486, + "step": 17181500 + }, + { + "epoch": 85.13, + "learning_rate": 7.452425895374128e-06, + "loss": 1.8505, + "step": 17182000 + }, + { + "epoch": 85.13, + "learning_rate": 7.451189786120896e-06, + "loss": 1.844, + "step": 17182500 + }, + { + "epoch": 85.13, + "learning_rate": 7.449951199694813e-06, + "loss": 1.8327, + "step": 17183000 + }, + { + "epoch": 85.13, + "learning_rate": 7.44871261326873e-06, + "loss": 1.848, + "step": 17183500 + }, + { + "epoch": 85.14, + "learning_rate": 7.447474026842645e-06, + "loss": 1.8341, + "step": 17184000 + }, + { + "epoch": 85.14, + "learning_rate": 7.446235440416561e-06, + "loss": 1.8524, + "step": 17184500 + }, + { + "epoch": 85.14, + "learning_rate": 7.444996853990478e-06, + "loss": 1.8438, + "step": 17185000 + }, + { + "epoch": 85.14, + "learning_rate": 7.443758267564395e-06, + "loss": 1.8644, + "step": 17185500 + }, + { + "epoch": 85.15, + "learning_rate": 7.44251968113831e-06, + "loss": 1.8267, + "step": 17186000 + }, + { + "epoch": 85.15, + "learning_rate": 7.441281094712227e-06, + "loss": 1.8374, + "step": 17186500 + }, + { + "epoch": 85.15, + "learning_rate": 7.440042508286143e-06, + "loss": 1.8282, + "step": 17187000 + }, + { + "epoch": 85.15, + "learning_rate": 7.438806399032912e-06, + "loss": 1.8088, + "step": 17187500 + }, + { + "epoch": 85.16, + "learning_rate": 7.437567812606828e-06, + "loss": 1.826, + "step": 17188000 + }, + { + "epoch": 85.16, + "learning_rate": 7.436331703353597e-06, + "loss": 1.8534, + "step": 17188500 + }, + { + "epoch": 85.16, + "learning_rate": 7.435093116927513e-06, + "loss": 1.8418, + "step": 17189000 + }, + { + "epoch": 85.16, + "learning_rate": 7.43385453050143e-06, + "loss": 1.8289, + "step": 17189500 + }, + { + "epoch": 85.17, + "learning_rate": 7.432618421248198e-06, + "loss": 1.8272, + "step": 17190000 + }, + { + "epoch": 85.17, + "learning_rate": 7.431379834822115e-06, + "loss": 1.8333, + "step": 17190500 + }, + { + "epoch": 85.17, + "learning_rate": 7.43014124839603e-06, + "loss": 1.8353, + "step": 17191000 + }, + { + "epoch": 85.17, + "learning_rate": 7.4289051391428e-06, + "loss": 1.8403, + "step": 17191500 + }, + { + "epoch": 85.18, + "learning_rate": 7.427666552716715e-06, + "loss": 1.8134, + "step": 17192000 + }, + { + "epoch": 85.18, + "learning_rate": 7.426427966290632e-06, + "loss": 1.8502, + "step": 17192500 + }, + { + "epoch": 85.18, + "learning_rate": 7.425189379864549e-06, + "loss": 1.8465, + "step": 17193000 + }, + { + "epoch": 85.18, + "learning_rate": 7.423950793438465e-06, + "loss": 1.8388, + "step": 17193500 + }, + { + "epoch": 85.19, + "learning_rate": 7.422714684185234e-06, + "loss": 1.8686, + "step": 17194000 + }, + { + "epoch": 85.19, + "learning_rate": 7.42147609775915e-06, + "loss": 1.8356, + "step": 17194500 + }, + { + "epoch": 85.19, + "learning_rate": 7.420237511333065e-06, + "loss": 1.8434, + "step": 17195000 + }, + { + "epoch": 85.19, + "learning_rate": 7.418998924906982e-06, + "loss": 1.8174, + "step": 17195500 + }, + { + "epoch": 85.19, + "learning_rate": 7.417760338480899e-06, + "loss": 1.8183, + "step": 17196000 + }, + { + "epoch": 85.2, + "learning_rate": 7.416521752054816e-06, + "loss": 1.8348, + "step": 17196500 + }, + { + "epoch": 85.2, + "learning_rate": 7.415283165628732e-06, + "loss": 1.8351, + "step": 17197000 + }, + { + "epoch": 85.2, + "learning_rate": 7.414044579202647e-06, + "loss": 1.8469, + "step": 17197500 + }, + { + "epoch": 85.2, + "learning_rate": 7.412805992776564e-06, + "loss": 1.8662, + "step": 17198000 + }, + { + "epoch": 85.21, + "learning_rate": 7.411567406350481e-06, + "loss": 1.8184, + "step": 17198500 + }, + { + "epoch": 85.21, + "learning_rate": 7.410328819924397e-06, + "loss": 1.8379, + "step": 17199000 + }, + { + "epoch": 85.21, + "learning_rate": 7.409090233498312e-06, + "loss": 1.8415, + "step": 17199500 + }, + { + "epoch": 85.21, + "learning_rate": 7.407851647072229e-06, + "loss": 1.8162, + "step": 17200000 + }, + { + "epoch": 85.22, + "learning_rate": 7.406613060646146e-06, + "loss": 1.8436, + "step": 17200500 + }, + { + "epoch": 85.22, + "learning_rate": 7.405374474220063e-06, + "loss": 1.8203, + "step": 17201000 + }, + { + "epoch": 85.22, + "learning_rate": 7.404138364966831e-06, + "loss": 1.8586, + "step": 17201500 + }, + { + "epoch": 85.22, + "learning_rate": 7.402899778540748e-06, + "loss": 1.8673, + "step": 17202000 + }, + { + "epoch": 85.23, + "learning_rate": 7.401661192114663e-06, + "loss": 1.8345, + "step": 17202500 + }, + { + "epoch": 85.23, + "learning_rate": 7.400422605688579e-06, + "loss": 1.866, + "step": 17203000 + }, + { + "epoch": 85.23, + "learning_rate": 7.399184019262496e-06, + "loss": 1.8446, + "step": 17203500 + }, + { + "epoch": 85.23, + "learning_rate": 7.397947910009264e-06, + "loss": 1.8414, + "step": 17204000 + }, + { + "epoch": 85.24, + "learning_rate": 7.396709323583181e-06, + "loss": 1.8575, + "step": 17204500 + }, + { + "epoch": 85.24, + "learning_rate": 7.395470737157098e-06, + "loss": 1.834, + "step": 17205000 + }, + { + "epoch": 85.24, + "learning_rate": 7.394232150731015e-06, + "loss": 1.7968, + "step": 17205500 + }, + { + "epoch": 85.24, + "learning_rate": 7.39299356430493e-06, + "loss": 1.8282, + "step": 17206000 + }, + { + "epoch": 85.25, + "learning_rate": 7.391754977878846e-06, + "loss": 1.8332, + "step": 17206500 + }, + { + "epoch": 85.25, + "learning_rate": 7.390516391452763e-06, + "loss": 1.8257, + "step": 17207000 + }, + { + "epoch": 85.25, + "learning_rate": 7.38927780502668e-06, + "loss": 1.8395, + "step": 17207500 + }, + { + "epoch": 85.25, + "learning_rate": 7.3880392186005955e-06, + "loss": 1.8224, + "step": 17208000 + }, + { + "epoch": 85.26, + "learning_rate": 7.386800632174512e-06, + "loss": 1.8642, + "step": 17208500 + }, + { + "epoch": 85.26, + "learning_rate": 7.38556452292128e-06, + "loss": 1.8075, + "step": 17209000 + }, + { + "epoch": 85.26, + "learning_rate": 7.384325936495197e-06, + "loss": 1.8447, + "step": 17209500 + }, + { + "epoch": 85.26, + "learning_rate": 7.383087350069113e-06, + "loss": 1.8361, + "step": 17210000 + }, + { + "epoch": 85.27, + "learning_rate": 7.38184876364303e-06, + "loss": 1.8215, + "step": 17210500 + }, + { + "epoch": 85.27, + "learning_rate": 7.380612654389798e-06, + "loss": 1.874, + "step": 17211000 + }, + { + "epoch": 85.27, + "learning_rate": 7.379374067963715e-06, + "loss": 1.854, + "step": 17211500 + }, + { + "epoch": 85.27, + "learning_rate": 7.3781354815376304e-06, + "loss": 1.8422, + "step": 17212000 + }, + { + "epoch": 85.28, + "learning_rate": 7.3768993722844e-06, + "loss": 1.8006, + "step": 17212500 + }, + { + "epoch": 85.28, + "learning_rate": 7.375660785858315e-06, + "loss": 1.8313, + "step": 17213000 + }, + { + "epoch": 85.28, + "learning_rate": 7.374424676605085e-06, + "loss": 1.8318, + "step": 17213500 + }, + { + "epoch": 85.28, + "learning_rate": 7.373186090179e-06, + "loss": 1.8374, + "step": 17214000 + }, + { + "epoch": 85.29, + "learning_rate": 7.37194998092577e-06, + "loss": 1.8436, + "step": 17214500 + }, + { + "epoch": 85.29, + "learning_rate": 7.370711394499685e-06, + "loss": 1.8032, + "step": 17215000 + }, + { + "epoch": 85.29, + "learning_rate": 7.369472808073602e-06, + "loss": 1.8288, + "step": 17215500 + }, + { + "epoch": 85.29, + "learning_rate": 7.368234221647519e-06, + "loss": 1.8436, + "step": 17216000 + }, + { + "epoch": 85.3, + "learning_rate": 7.366995635221435e-06, + "loss": 1.8186, + "step": 17216500 + }, + { + "epoch": 85.3, + "learning_rate": 7.36575704879535e-06, + "loss": 1.828, + "step": 17217000 + }, + { + "epoch": 85.3, + "learning_rate": 7.364518462369267e-06, + "loss": 1.8463, + "step": 17217500 + }, + { + "epoch": 85.3, + "learning_rate": 7.363279875943184e-06, + "loss": 1.8603, + "step": 17218000 + }, + { + "epoch": 85.31, + "learning_rate": 7.362041289517101e-06, + "loss": 1.8348, + "step": 17218500 + }, + { + "epoch": 85.31, + "learning_rate": 7.360802703091017e-06, + "loss": 1.8222, + "step": 17219000 + }, + { + "epoch": 85.31, + "learning_rate": 7.359564116664932e-06, + "loss": 1.8296, + "step": 17219500 + }, + { + "epoch": 85.31, + "learning_rate": 7.358325530238849e-06, + "loss": 1.8284, + "step": 17220000 + }, + { + "epoch": 85.32, + "learning_rate": 7.357089420985617e-06, + "loss": 1.8242, + "step": 17220500 + }, + { + "epoch": 85.32, + "learning_rate": 7.355850834559534e-06, + "loss": 1.8464, + "step": 17221000 + }, + { + "epoch": 85.32, + "learning_rate": 7.354612248133451e-06, + "loss": 1.814, + "step": 17221500 + }, + { + "epoch": 85.32, + "learning_rate": 7.353373661707367e-06, + "loss": 1.8454, + "step": 17222000 + }, + { + "epoch": 85.33, + "learning_rate": 7.352135075281283e-06, + "loss": 1.8251, + "step": 17222500 + }, + { + "epoch": 85.33, + "learning_rate": 7.350896488855199e-06, + "loss": 1.8134, + "step": 17223000 + }, + { + "epoch": 85.33, + "learning_rate": 7.349657902429116e-06, + "loss": 1.837, + "step": 17223500 + }, + { + "epoch": 85.33, + "learning_rate": 7.348419316003033e-06, + "loss": 1.8134, + "step": 17224000 + }, + { + "epoch": 85.34, + "learning_rate": 7.347183206749801e-06, + "loss": 1.8336, + "step": 17224500 + }, + { + "epoch": 85.34, + "learning_rate": 7.345944620323718e-06, + "loss": 1.8511, + "step": 17225000 + }, + { + "epoch": 85.34, + "learning_rate": 7.344706033897633e-06, + "loss": 1.8096, + "step": 17225500 + }, + { + "epoch": 85.34, + "learning_rate": 7.3434674474715494e-06, + "loss": 1.8422, + "step": 17226000 + }, + { + "epoch": 85.35, + "learning_rate": 7.342228861045466e-06, + "loss": 1.8268, + "step": 17226500 + }, + { + "epoch": 85.35, + "learning_rate": 7.340992751792234e-06, + "loss": 1.8306, + "step": 17227000 + }, + { + "epoch": 85.35, + "learning_rate": 7.339754165366151e-06, + "loss": 1.8731, + "step": 17227500 + }, + { + "epoch": 85.35, + "learning_rate": 7.338515578940068e-06, + "loss": 1.8287, + "step": 17228000 + }, + { + "epoch": 85.36, + "learning_rate": 7.337276992513983e-06, + "loss": 1.8453, + "step": 17228500 + }, + { + "epoch": 85.36, + "learning_rate": 7.3360384060879e-06, + "loss": 1.8485, + "step": 17229000 + }, + { + "epoch": 85.36, + "learning_rate": 7.334799819661816e-06, + "loss": 1.819, + "step": 17229500 + }, + { + "epoch": 85.36, + "learning_rate": 7.333561233235733e-06, + "loss": 1.8378, + "step": 17230000 + }, + { + "epoch": 85.37, + "learning_rate": 7.33232264680965e-06, + "loss": 1.8246, + "step": 17230500 + }, + { + "epoch": 85.37, + "learning_rate": 7.33108901472927e-06, + "loss": 1.8348, + "step": 17231000 + }, + { + "epoch": 85.37, + "learning_rate": 7.329850428303186e-06, + "loss": 1.8282, + "step": 17231500 + }, + { + "epoch": 85.37, + "learning_rate": 7.328611841877103e-06, + "loss": 1.8418, + "step": 17232000 + }, + { + "epoch": 85.38, + "learning_rate": 7.327373255451018e-06, + "loss": 1.8355, + "step": 17232500 + }, + { + "epoch": 85.38, + "learning_rate": 7.326134669024935e-06, + "loss": 1.8324, + "step": 17233000 + }, + { + "epoch": 85.38, + "learning_rate": 7.324896082598852e-06, + "loss": 1.8427, + "step": 17233500 + }, + { + "epoch": 85.38, + "learning_rate": 7.323657496172768e-06, + "loss": 1.8543, + "step": 17234000 + }, + { + "epoch": 85.39, + "learning_rate": 7.322418909746685e-06, + "loss": 1.8194, + "step": 17234500 + }, + { + "epoch": 85.39, + "learning_rate": 7.321182800493453e-06, + "loss": 1.8597, + "step": 17235000 + }, + { + "epoch": 85.39, + "learning_rate": 7.31994421406737e-06, + "loss": 1.8645, + "step": 17235500 + }, + { + "epoch": 85.39, + "learning_rate": 7.318705627641285e-06, + "loss": 1.8323, + "step": 17236000 + }, + { + "epoch": 85.4, + "learning_rate": 7.317469518388055e-06, + "loss": 1.8322, + "step": 17236500 + }, + { + "epoch": 85.4, + "learning_rate": 7.31623093196197e-06, + "loss": 1.8602, + "step": 17237000 + }, + { + "epoch": 85.4, + "learning_rate": 7.314992345535887e-06, + "loss": 1.841, + "step": 17237500 + }, + { + "epoch": 85.4, + "learning_rate": 7.313753759109804e-06, + "loss": 1.8267, + "step": 17238000 + }, + { + "epoch": 85.41, + "learning_rate": 7.31251517268372e-06, + "loss": 1.8493, + "step": 17238500 + }, + { + "epoch": 85.41, + "learning_rate": 7.311276586257635e-06, + "loss": 1.8473, + "step": 17239000 + }, + { + "epoch": 85.41, + "learning_rate": 7.310037999831552e-06, + "loss": 1.8605, + "step": 17239500 + }, + { + "epoch": 85.41, + "learning_rate": 7.308799413405469e-06, + "loss": 1.8265, + "step": 17240000 + }, + { + "epoch": 85.42, + "learning_rate": 7.307560826979385e-06, + "loss": 1.8468, + "step": 17240500 + }, + { + "epoch": 85.42, + "learning_rate": 7.306324717726154e-06, + "loss": 1.8286, + "step": 17241000 + }, + { + "epoch": 85.42, + "learning_rate": 7.305086131300071e-06, + "loss": 1.8447, + "step": 17241500 + }, + { + "epoch": 85.42, + "learning_rate": 7.303847544873986e-06, + "loss": 1.8432, + "step": 17242000 + }, + { + "epoch": 85.43, + "learning_rate": 7.302608958447902e-06, + "loss": 1.8299, + "step": 17242500 + }, + { + "epoch": 85.43, + "learning_rate": 7.301370372021819e-06, + "loss": 1.8341, + "step": 17243000 + }, + { + "epoch": 85.43, + "learning_rate": 7.300131785595736e-06, + "loss": 1.8567, + "step": 17243500 + }, + { + "epoch": 85.43, + "learning_rate": 7.298893199169652e-06, + "loss": 1.8089, + "step": 17244000 + }, + { + "epoch": 85.44, + "learning_rate": 7.297657089916421e-06, + "loss": 1.8165, + "step": 17244500 + }, + { + "epoch": 85.44, + "learning_rate": 7.296418503490336e-06, + "loss": 1.8527, + "step": 17245000 + }, + { + "epoch": 85.44, + "learning_rate": 7.295179917064253e-06, + "loss": 1.8452, + "step": 17245500 + }, + { + "epoch": 85.44, + "learning_rate": 7.293943807811021e-06, + "loss": 1.8481, + "step": 17246000 + }, + { + "epoch": 85.45, + "learning_rate": 7.292705221384938e-06, + "loss": 1.8307, + "step": 17246500 + }, + { + "epoch": 85.45, + "learning_rate": 7.291466634958854e-06, + "loss": 1.8199, + "step": 17247000 + }, + { + "epoch": 85.45, + "learning_rate": 7.290228048532771e-06, + "loss": 1.8195, + "step": 17247500 + }, + { + "epoch": 85.45, + "learning_rate": 7.288989462106688e-06, + "loss": 1.8287, + "step": 17248000 + }, + { + "epoch": 85.46, + "learning_rate": 7.287750875680603e-06, + "loss": 1.8608, + "step": 17248500 + }, + { + "epoch": 85.46, + "learning_rate": 7.2865122892545195e-06, + "loss": 1.8551, + "step": 17249000 + }, + { + "epoch": 85.46, + "learning_rate": 7.285273702828436e-06, + "loss": 1.8421, + "step": 17249500 + }, + { + "epoch": 85.46, + "learning_rate": 7.284035116402353e-06, + "loss": 1.85, + "step": 17250000 + }, + { + "epoch": 85.46, + "learning_rate": 7.282799007149121e-06, + "loss": 1.8476, + "step": 17250500 + }, + { + "epoch": 85.47, + "learning_rate": 7.281560420723038e-06, + "loss": 1.838, + "step": 17251000 + }, + { + "epoch": 85.47, + "learning_rate": 7.2803218342969534e-06, + "loss": 1.8188, + "step": 17251500 + }, + { + "epoch": 85.47, + "learning_rate": 7.27908324787087e-06, + "loss": 1.8644, + "step": 17252000 + }, + { + "epoch": 85.47, + "learning_rate": 7.2778446614447864e-06, + "loss": 1.857, + "step": 17252500 + }, + { + "epoch": 85.48, + "learning_rate": 7.276608552191555e-06, + "loss": 1.8316, + "step": 17253000 + }, + { + "epoch": 85.48, + "learning_rate": 7.275372442938323e-06, + "loss": 1.852, + "step": 17253500 + }, + { + "epoch": 85.48, + "learning_rate": 7.27413385651224e-06, + "loss": 1.846, + "step": 17254000 + }, + { + "epoch": 85.48, + "learning_rate": 7.272895270086156e-06, + "loss": 1.8581, + "step": 17254500 + }, + { + "epoch": 85.49, + "learning_rate": 7.271656683660073e-06, + "loss": 1.8195, + "step": 17255000 + }, + { + "epoch": 85.49, + "learning_rate": 7.270420574406841e-06, + "loss": 1.8333, + "step": 17255500 + }, + { + "epoch": 85.49, + "learning_rate": 7.269181987980758e-06, + "loss": 1.8668, + "step": 17256000 + }, + { + "epoch": 85.49, + "learning_rate": 7.267943401554673e-06, + "loss": 1.8504, + "step": 17256500 + }, + { + "epoch": 85.5, + "learning_rate": 7.26670481512859e-06, + "loss": 1.8311, + "step": 17257000 + }, + { + "epoch": 85.5, + "learning_rate": 7.265466228702507e-06, + "loss": 1.8212, + "step": 17257500 + }, + { + "epoch": 85.5, + "learning_rate": 7.264227642276423e-06, + "loss": 1.8658, + "step": 17258000 + }, + { + "epoch": 85.5, + "learning_rate": 7.2629890558503385e-06, + "loss": 1.8504, + "step": 17258500 + }, + { + "epoch": 85.51, + "learning_rate": 7.261750469424255e-06, + "loss": 1.821, + "step": 17259000 + }, + { + "epoch": 85.51, + "learning_rate": 7.260511882998172e-06, + "loss": 1.828, + "step": 17259500 + }, + { + "epoch": 85.51, + "learning_rate": 7.259273296572089e-06, + "loss": 1.8385, + "step": 17260000 + }, + { + "epoch": 85.51, + "learning_rate": 7.258034710146005e-06, + "loss": 1.8173, + "step": 17260500 + }, + { + "epoch": 85.52, + "learning_rate": 7.2567961237199206e-06, + "loss": 1.8434, + "step": 17261000 + }, + { + "epoch": 85.52, + "learning_rate": 7.2555575372938375e-06, + "loss": 1.8223, + "step": 17261500 + }, + { + "epoch": 85.52, + "learning_rate": 7.2543214280406054e-06, + "loss": 1.8228, + "step": 17262000 + }, + { + "epoch": 85.52, + "learning_rate": 7.253082841614522e-06, + "loss": 1.846, + "step": 17262500 + }, + { + "epoch": 85.53, + "learning_rate": 7.251844255188439e-06, + "loss": 1.8147, + "step": 17263000 + }, + { + "epoch": 85.53, + "learning_rate": 7.250605668762356e-06, + "loss": 1.8561, + "step": 17263500 + }, + { + "epoch": 85.53, + "learning_rate": 7.2493670823362715e-06, + "loss": 1.8279, + "step": 17264000 + }, + { + "epoch": 85.53, + "learning_rate": 7.248130973083041e-06, + "loss": 1.8157, + "step": 17264500 + }, + { + "epoch": 85.54, + "learning_rate": 7.246892386656956e-06, + "loss": 1.8308, + "step": 17265000 + }, + { + "epoch": 85.54, + "learning_rate": 7.2456538002308724e-06, + "loss": 1.82, + "step": 17265500 + }, + { + "epoch": 85.54, + "learning_rate": 7.244417690977641e-06, + "loss": 1.8315, + "step": 17266000 + }, + { + "epoch": 85.54, + "learning_rate": 7.243179104551557e-06, + "loss": 1.8434, + "step": 17266500 + }, + { + "epoch": 85.55, + "learning_rate": 7.241940518125474e-06, + "loss": 1.8283, + "step": 17267000 + }, + { + "epoch": 85.55, + "learning_rate": 7.240701931699391e-06, + "loss": 1.8491, + "step": 17267500 + }, + { + "epoch": 85.55, + "learning_rate": 7.239463345273306e-06, + "loss": 1.8383, + "step": 17268000 + }, + { + "epoch": 85.55, + "learning_rate": 7.238224758847223e-06, + "loss": 1.8432, + "step": 17268500 + }, + { + "epoch": 85.56, + "learning_rate": 7.2369861724211394e-06, + "loss": 1.8431, + "step": 17269000 + }, + { + "epoch": 85.56, + "learning_rate": 7.235750063167908e-06, + "loss": 1.8335, + "step": 17269500 + }, + { + "epoch": 85.56, + "learning_rate": 7.234511476741824e-06, + "loss": 1.806, + "step": 17270000 + }, + { + "epoch": 85.56, + "learning_rate": 7.233272890315741e-06, + "loss": 1.8155, + "step": 17270500 + }, + { + "epoch": 85.57, + "learning_rate": 7.2320343038896565e-06, + "loss": 1.8275, + "step": 17271000 + }, + { + "epoch": 85.57, + "learning_rate": 7.230795717463573e-06, + "loss": 1.8278, + "step": 17271500 + }, + { + "epoch": 85.57, + "learning_rate": 7.2295571310374895e-06, + "loss": 1.8458, + "step": 17272000 + }, + { + "epoch": 85.57, + "learning_rate": 7.228318544611406e-06, + "loss": 1.8185, + "step": 17272500 + }, + { + "epoch": 85.58, + "learning_rate": 7.227079958185323e-06, + "loss": 1.8311, + "step": 17273000 + }, + { + "epoch": 85.58, + "learning_rate": 7.225841371759239e-06, + "loss": 1.8193, + "step": 17273500 + }, + { + "epoch": 85.58, + "learning_rate": 7.224605262506008e-06, + "loss": 1.8444, + "step": 17274000 + }, + { + "epoch": 85.58, + "learning_rate": 7.2233666760799235e-06, + "loss": 1.849, + "step": 17274500 + }, + { + "epoch": 85.59, + "learning_rate": 7.2221305668266914e-06, + "loss": 1.8468, + "step": 17275000 + }, + { + "epoch": 85.59, + "learning_rate": 7.220891980400608e-06, + "loss": 1.8242, + "step": 17275500 + }, + { + "epoch": 85.59, + "learning_rate": 7.219653393974525e-06, + "loss": 1.8485, + "step": 17276000 + }, + { + "epoch": 85.59, + "learning_rate": 7.218414807548441e-06, + "loss": 1.8516, + "step": 17276500 + }, + { + "epoch": 85.6, + "learning_rate": 7.217176221122358e-06, + "loss": 1.8137, + "step": 17277000 + }, + { + "epoch": 85.6, + "learning_rate": 7.215940111869126e-06, + "loss": 1.8349, + "step": 17277500 + }, + { + "epoch": 85.6, + "learning_rate": 7.214701525443043e-06, + "loss": 1.8606, + "step": 17278000 + }, + { + "epoch": 85.6, + "learning_rate": 7.213462939016958e-06, + "loss": 1.8288, + "step": 17278500 + }, + { + "epoch": 85.61, + "learning_rate": 7.212224352590875e-06, + "loss": 1.8272, + "step": 17279000 + }, + { + "epoch": 85.61, + "learning_rate": 7.210985766164792e-06, + "loss": 1.8326, + "step": 17279500 + }, + { + "epoch": 85.61, + "learning_rate": 7.209747179738708e-06, + "loss": 1.8642, + "step": 17280000 + }, + { + "epoch": 85.61, + "learning_rate": 7.208508593312624e-06, + "loss": 1.8288, + "step": 17280500 + }, + { + "epoch": 85.62, + "learning_rate": 7.207272484059393e-06, + "loss": 1.8436, + "step": 17281000 + }, + { + "epoch": 85.62, + "learning_rate": 7.2060338976333085e-06, + "loss": 1.8397, + "step": 17281500 + }, + { + "epoch": 85.62, + "learning_rate": 7.204795311207225e-06, + "loss": 1.8484, + "step": 17282000 + }, + { + "epoch": 85.62, + "learning_rate": 7.203556724781142e-06, + "loss": 1.8334, + "step": 17282500 + }, + { + "epoch": 85.63, + "learning_rate": 7.202318138355059e-06, + "loss": 1.8308, + "step": 17283000 + }, + { + "epoch": 85.63, + "learning_rate": 7.2010795519289745e-06, + "loss": 1.835, + "step": 17283500 + }, + { + "epoch": 85.63, + "learning_rate": 7.199840965502891e-06, + "loss": 1.8554, + "step": 17284000 + }, + { + "epoch": 85.63, + "learning_rate": 7.1986023790768075e-06, + "loss": 1.8274, + "step": 17284500 + }, + { + "epoch": 85.64, + "learning_rate": 7.1973637926507244e-06, + "loss": 1.8413, + "step": 17285000 + }, + { + "epoch": 85.64, + "learning_rate": 7.196127683397492e-06, + "loss": 1.8585, + "step": 17285500 + }, + { + "epoch": 85.64, + "learning_rate": 7.194896528489965e-06, + "loss": 1.8399, + "step": 17286000 + }, + { + "epoch": 85.64, + "learning_rate": 7.193657942063882e-06, + "loss": 1.8197, + "step": 17286500 + }, + { + "epoch": 85.65, + "learning_rate": 7.192419355637799e-06, + "loss": 1.8279, + "step": 17287000 + }, + { + "epoch": 85.65, + "learning_rate": 7.191180769211714e-06, + "loss": 1.8573, + "step": 17287500 + }, + { + "epoch": 85.65, + "learning_rate": 7.189942182785631e-06, + "loss": 1.8356, + "step": 17288000 + }, + { + "epoch": 85.65, + "learning_rate": 7.188703596359547e-06, + "loss": 1.8293, + "step": 17288500 + }, + { + "epoch": 85.66, + "learning_rate": 7.187465009933464e-06, + "loss": 1.8382, + "step": 17289000 + }, + { + "epoch": 85.66, + "learning_rate": 7.186228900680232e-06, + "loss": 1.8331, + "step": 17289500 + }, + { + "epoch": 85.66, + "learning_rate": 7.184990314254149e-06, + "loss": 1.8242, + "step": 17290000 + }, + { + "epoch": 85.66, + "learning_rate": 7.183751727828064e-06, + "loss": 1.8189, + "step": 17290500 + }, + { + "epoch": 85.67, + "learning_rate": 7.182513141401981e-06, + "loss": 1.8186, + "step": 17291000 + }, + { + "epoch": 85.67, + "learning_rate": 7.181274554975897e-06, + "loss": 1.8436, + "step": 17291500 + }, + { + "epoch": 85.67, + "learning_rate": 7.180035968549814e-06, + "loss": 1.8368, + "step": 17292000 + }, + { + "epoch": 85.67, + "learning_rate": 7.178797382123729e-06, + "loss": 1.8472, + "step": 17292500 + }, + { + "epoch": 85.68, + "learning_rate": 7.177558795697646e-06, + "loss": 1.8571, + "step": 17293000 + }, + { + "epoch": 85.68, + "learning_rate": 7.176320209271563e-06, + "loss": 1.8261, + "step": 17293500 + }, + { + "epoch": 85.68, + "learning_rate": 7.175081622845479e-06, + "loss": 1.8179, + "step": 17294000 + }, + { + "epoch": 85.68, + "learning_rate": 7.173843036419396e-06, + "loss": 1.8381, + "step": 17294500 + }, + { + "epoch": 85.69, + "learning_rate": 7.172604449993311e-06, + "loss": 1.8539, + "step": 17295000 + }, + { + "epoch": 85.69, + "learning_rate": 7.171365863567228e-06, + "loss": 1.829, + "step": 17295500 + }, + { + "epoch": 85.69, + "learning_rate": 7.170127277141145e-06, + "loss": 1.8561, + "step": 17296000 + }, + { + "epoch": 85.69, + "learning_rate": 7.168888690715061e-06, + "loss": 1.798, + "step": 17296500 + }, + { + "epoch": 85.7, + "learning_rate": 7.16765258146183e-06, + "loss": 1.8345, + "step": 17297000 + }, + { + "epoch": 85.7, + "learning_rate": 7.166416472208598e-06, + "loss": 1.8428, + "step": 17297500 + }, + { + "epoch": 85.7, + "learning_rate": 7.165177885782515e-06, + "loss": 1.8481, + "step": 17298000 + }, + { + "epoch": 85.7, + "learning_rate": 7.163939299356431e-06, + "loss": 1.8577, + "step": 17298500 + }, + { + "epoch": 85.71, + "learning_rate": 7.162700712930346e-06, + "loss": 1.8259, + "step": 17299000 + }, + { + "epoch": 85.71, + "learning_rate": 7.161462126504263e-06, + "loss": 1.8421, + "step": 17299500 + }, + { + "epoch": 85.71, + "learning_rate": 7.16022354007818e-06, + "loss": 1.8061, + "step": 17300000 + }, + { + "epoch": 85.71, + "learning_rate": 7.158984953652096e-06, + "loss": 1.8401, + "step": 17300500 + }, + { + "epoch": 85.72, + "learning_rate": 7.157746367226012e-06, + "loss": 1.8367, + "step": 17301000 + }, + { + "epoch": 85.72, + "learning_rate": 7.1565077807999285e-06, + "loss": 1.806, + "step": 17301500 + }, + { + "epoch": 85.72, + "learning_rate": 7.155269194373845e-06, + "loss": 1.8248, + "step": 17302000 + }, + { + "epoch": 85.72, + "learning_rate": 7.154030607947762e-06, + "loss": 1.8443, + "step": 17302500 + }, + { + "epoch": 85.73, + "learning_rate": 7.152792021521678e-06, + "loss": 1.8538, + "step": 17303000 + }, + { + "epoch": 85.73, + "learning_rate": 7.151553435095594e-06, + "loss": 1.8381, + "step": 17303500 + }, + { + "epoch": 85.73, + "learning_rate": 7.150317325842363e-06, + "loss": 1.858, + "step": 17304000 + }, + { + "epoch": 85.73, + "learning_rate": 7.1490787394162785e-06, + "loss": 1.833, + "step": 17304500 + }, + { + "epoch": 85.73, + "learning_rate": 7.1478401529901954e-06, + "loss": 1.8428, + "step": 17305000 + }, + { + "epoch": 85.74, + "learning_rate": 7.146601566564112e-06, + "loss": 1.8144, + "step": 17305500 + }, + { + "epoch": 85.74, + "learning_rate": 7.145362980138029e-06, + "loss": 1.8495, + "step": 17306000 + }, + { + "epoch": 85.74, + "learning_rate": 7.144126870884797e-06, + "loss": 1.8719, + "step": 17306500 + }, + { + "epoch": 85.74, + "learning_rate": 7.142888284458714e-06, + "loss": 1.8387, + "step": 17307000 + }, + { + "epoch": 85.75, + "learning_rate": 7.141649698032629e-06, + "loss": 1.8394, + "step": 17307500 + }, + { + "epoch": 85.75, + "learning_rate": 7.1404111116065455e-06, + "loss": 1.8671, + "step": 17308000 + }, + { + "epoch": 85.75, + "learning_rate": 7.1391725251804624e-06, + "loss": 1.8147, + "step": 17308500 + }, + { + "epoch": 85.75, + "learning_rate": 7.137933938754379e-06, + "loss": 1.8458, + "step": 17309000 + }, + { + "epoch": 85.76, + "learning_rate": 7.136697829501147e-06, + "loss": 1.8437, + "step": 17309500 + }, + { + "epoch": 85.76, + "learning_rate": 7.135459243075064e-06, + "loss": 1.8667, + "step": 17310000 + }, + { + "epoch": 85.76, + "learning_rate": 7.134225610994684e-06, + "loss": 1.8455, + "step": 17310500 + }, + { + "epoch": 85.76, + "learning_rate": 7.132987024568601e-06, + "loss": 1.8237, + "step": 17311000 + }, + { + "epoch": 85.77, + "learning_rate": 7.131748438142517e-06, + "loss": 1.8286, + "step": 17311500 + }, + { + "epoch": 85.77, + "learning_rate": 7.130509851716434e-06, + "loss": 1.862, + "step": 17312000 + }, + { + "epoch": 85.77, + "learning_rate": 7.129271265290349e-06, + "loss": 1.82, + "step": 17312500 + }, + { + "epoch": 85.77, + "learning_rate": 7.128032678864266e-06, + "loss": 1.8185, + "step": 17313000 + }, + { + "epoch": 85.78, + "learning_rate": 7.126794092438182e-06, + "loss": 1.8623, + "step": 17313500 + }, + { + "epoch": 85.78, + "learning_rate": 7.125555506012099e-06, + "loss": 1.8251, + "step": 17314000 + }, + { + "epoch": 85.78, + "learning_rate": 7.1243169195860144e-06, + "loss": 1.8288, + "step": 17314500 + }, + { + "epoch": 85.78, + "learning_rate": 7.123080810332784e-06, + "loss": 1.8105, + "step": 17315000 + }, + { + "epoch": 85.79, + "learning_rate": 7.121842223906699e-06, + "loss": 1.8345, + "step": 17315500 + }, + { + "epoch": 85.79, + "learning_rate": 7.120603637480616e-06, + "loss": 1.8474, + "step": 17316000 + }, + { + "epoch": 85.79, + "learning_rate": 7.119365051054533e-06, + "loss": 1.832, + "step": 17316500 + }, + { + "epoch": 85.79, + "learning_rate": 7.118126464628449e-06, + "loss": 1.8221, + "step": 17317000 + }, + { + "epoch": 85.8, + "learning_rate": 7.1168878782023645e-06, + "loss": 1.7926, + "step": 17317500 + }, + { + "epoch": 85.8, + "learning_rate": 7.1156492917762814e-06, + "loss": 1.8238, + "step": 17318000 + }, + { + "epoch": 85.8, + "learning_rate": 7.114410705350198e-06, + "loss": 1.8216, + "step": 17318500 + }, + { + "epoch": 85.8, + "learning_rate": 7.113172118924115e-06, + "loss": 1.8526, + "step": 17319000 + }, + { + "epoch": 85.81, + "learning_rate": 7.111933532498031e-06, + "loss": 1.8288, + "step": 17319500 + }, + { + "epoch": 85.81, + "learning_rate": 7.110694946071947e-06, + "loss": 1.8389, + "step": 17320000 + }, + { + "epoch": 85.81, + "learning_rate": 7.1094563596458635e-06, + "loss": 1.8511, + "step": 17320500 + }, + { + "epoch": 85.81, + "learning_rate": 7.1082177732197805e-06, + "loss": 1.8207, + "step": 17321000 + }, + { + "epoch": 85.82, + "learning_rate": 7.106981663966548e-06, + "loss": 1.8354, + "step": 17321500 + }, + { + "epoch": 85.82, + "learning_rate": 7.105743077540465e-06, + "loss": 1.8343, + "step": 17322000 + }, + { + "epoch": 85.82, + "learning_rate": 7.1045044911143814e-06, + "loss": 1.834, + "step": 17322500 + }, + { + "epoch": 85.82, + "learning_rate": 7.1032659046882975e-06, + "loss": 1.8251, + "step": 17323000 + }, + { + "epoch": 85.83, + "learning_rate": 7.102032272607918e-06, + "loss": 1.8302, + "step": 17323500 + }, + { + "epoch": 85.83, + "learning_rate": 7.100793686181835e-06, + "loss": 1.8328, + "step": 17324000 + }, + { + "epoch": 85.83, + "learning_rate": 7.099555099755752e-06, + "loss": 1.8233, + "step": 17324500 + }, + { + "epoch": 85.83, + "learning_rate": 7.098316513329667e-06, + "loss": 1.8218, + "step": 17325000 + }, + { + "epoch": 85.84, + "learning_rate": 7.097077926903583e-06, + "loss": 1.8443, + "step": 17325500 + }, + { + "epoch": 85.84, + "learning_rate": 7.0958393404775e-06, + "loss": 1.8168, + "step": 17326000 + }, + { + "epoch": 85.84, + "learning_rate": 7.094600754051417e-06, + "loss": 1.8479, + "step": 17326500 + }, + { + "epoch": 85.84, + "learning_rate": 7.0933621676253325e-06, + "loss": 1.8246, + "step": 17327000 + }, + { + "epoch": 85.85, + "learning_rate": 7.092123581199249e-06, + "loss": 1.8252, + "step": 17327500 + }, + { + "epoch": 85.85, + "learning_rate": 7.0908849947731655e-06, + "loss": 1.8281, + "step": 17328000 + }, + { + "epoch": 85.85, + "learning_rate": 7.089646408347082e-06, + "loss": 1.8189, + "step": 17328500 + }, + { + "epoch": 85.85, + "learning_rate": 7.088407821920999e-06, + "loss": 1.8389, + "step": 17329000 + }, + { + "epoch": 85.86, + "learning_rate": 7.0871692354949146e-06, + "loss": 1.839, + "step": 17329500 + }, + { + "epoch": 85.86, + "learning_rate": 7.085930649068831e-06, + "loss": 1.8327, + "step": 17330000 + }, + { + "epoch": 85.86, + "learning_rate": 7.0846945398155994e-06, + "loss": 1.815, + "step": 17330500 + }, + { + "epoch": 85.86, + "learning_rate": 7.0834559533895155e-06, + "loss": 1.8128, + "step": 17331000 + }, + { + "epoch": 85.87, + "learning_rate": 7.0822173669634325e-06, + "loss": 1.8478, + "step": 17331500 + }, + { + "epoch": 85.87, + "learning_rate": 7.080983734883052e-06, + "loss": 1.8552, + "step": 17332000 + }, + { + "epoch": 85.87, + "learning_rate": 7.079745148456969e-06, + "loss": 1.8301, + "step": 17332500 + }, + { + "epoch": 85.87, + "learning_rate": 7.078506562030886e-06, + "loss": 1.8299, + "step": 17333000 + }, + { + "epoch": 85.88, + "learning_rate": 7.077270452777654e-06, + "loss": 1.8426, + "step": 17333500 + }, + { + "epoch": 85.88, + "learning_rate": 7.076031866351571e-06, + "loss": 1.8408, + "step": 17334000 + }, + { + "epoch": 85.88, + "learning_rate": 7.074793279925487e-06, + "loss": 1.8456, + "step": 17334500 + }, + { + "epoch": 85.88, + "learning_rate": 7.073554693499404e-06, + "loss": 1.84, + "step": 17335000 + }, + { + "epoch": 85.89, + "learning_rate": 7.072321061419024e-06, + "loss": 1.8319, + "step": 17335500 + }, + { + "epoch": 85.89, + "learning_rate": 7.071082474992941e-06, + "loss": 1.8338, + "step": 17336000 + }, + { + "epoch": 85.89, + "learning_rate": 7.069843888566857e-06, + "loss": 1.8501, + "step": 17336500 + }, + { + "epoch": 85.89, + "learning_rate": 7.068605302140772e-06, + "loss": 1.8261, + "step": 17337000 + }, + { + "epoch": 85.9, + "learning_rate": 7.067366715714689e-06, + "loss": 1.8384, + "step": 17337500 + }, + { + "epoch": 85.9, + "learning_rate": 7.066128129288606e-06, + "loss": 1.8441, + "step": 17338000 + }, + { + "epoch": 85.9, + "learning_rate": 7.064889542862523e-06, + "loss": 1.8228, + "step": 17338500 + }, + { + "epoch": 85.9, + "learning_rate": 7.063650956436439e-06, + "loss": 1.8454, + "step": 17339000 + }, + { + "epoch": 85.91, + "learning_rate": 7.062412370010354e-06, + "loss": 1.8641, + "step": 17339500 + }, + { + "epoch": 85.91, + "learning_rate": 7.061176260757124e-06, + "loss": 1.8288, + "step": 17340000 + }, + { + "epoch": 85.91, + "learning_rate": 7.059937674331039e-06, + "loss": 1.8238, + "step": 17340500 + }, + { + "epoch": 85.91, + "learning_rate": 7.058699087904956e-06, + "loss": 1.8308, + "step": 17341000 + }, + { + "epoch": 85.92, + "learning_rate": 7.057460501478873e-06, + "loss": 1.8293, + "step": 17341500 + }, + { + "epoch": 85.92, + "learning_rate": 7.056221915052789e-06, + "loss": 1.8412, + "step": 17342000 + }, + { + "epoch": 85.92, + "learning_rate": 7.054983328626705e-06, + "loss": 1.8485, + "step": 17342500 + }, + { + "epoch": 85.92, + "learning_rate": 7.053744742200621e-06, + "loss": 1.8502, + "step": 17343000 + }, + { + "epoch": 85.93, + "learning_rate": 7.052506155774538e-06, + "loss": 1.8336, + "step": 17343500 + }, + { + "epoch": 85.93, + "learning_rate": 7.051270046521306e-06, + "loss": 1.8437, + "step": 17344000 + }, + { + "epoch": 85.93, + "learning_rate": 7.050031460095223e-06, + "loss": 1.859, + "step": 17344500 + }, + { + "epoch": 85.93, + "learning_rate": 7.04879287366914e-06, + "loss": 1.8207, + "step": 17345000 + }, + { + "epoch": 85.94, + "learning_rate": 7.047554287243055e-06, + "loss": 1.8432, + "step": 17345500 + }, + { + "epoch": 85.94, + "learning_rate": 7.046315700816971e-06, + "loss": 1.8425, + "step": 17346000 + }, + { + "epoch": 85.94, + "learning_rate": 7.045077114390888e-06, + "loss": 1.8029, + "step": 17346500 + }, + { + "epoch": 85.94, + "learning_rate": 7.043838527964805e-06, + "loss": 1.8298, + "step": 17347000 + }, + { + "epoch": 85.95, + "learning_rate": 7.042599941538722e-06, + "loss": 1.8307, + "step": 17347500 + }, + { + "epoch": 85.95, + "learning_rate": 7.041361355112637e-06, + "loss": 1.835, + "step": 17348000 + }, + { + "epoch": 85.95, + "learning_rate": 7.040125245859405e-06, + "loss": 1.8252, + "step": 17348500 + }, + { + "epoch": 85.95, + "learning_rate": 7.038886659433322e-06, + "loss": 1.8685, + "step": 17349000 + }, + { + "epoch": 85.96, + "learning_rate": 7.037648073007238e-06, + "loss": 1.8361, + "step": 17349500 + }, + { + "epoch": 85.96, + "learning_rate": 7.036409486581155e-06, + "loss": 1.8106, + "step": 17350000 + }, + { + "epoch": 85.96, + "learning_rate": 7.035170900155072e-06, + "loss": 1.8237, + "step": 17350500 + }, + { + "epoch": 85.96, + "learning_rate": 7.033937268074692e-06, + "loss": 1.8431, + "step": 17351000 + }, + { + "epoch": 85.97, + "learning_rate": 7.032698681648608e-06, + "loss": 1.8194, + "step": 17351500 + }, + { + "epoch": 85.97, + "learning_rate": 7.031460095222525e-06, + "loss": 1.8067, + "step": 17352000 + }, + { + "epoch": 85.97, + "learning_rate": 7.030221508796442e-06, + "loss": 1.8397, + "step": 17352500 + }, + { + "epoch": 85.97, + "learning_rate": 7.02898539954321e-06, + "loss": 1.845, + "step": 17353000 + }, + { + "epoch": 85.98, + "learning_rate": 7.027746813117125e-06, + "loss": 1.8467, + "step": 17353500 + }, + { + "epoch": 85.98, + "learning_rate": 7.026508226691042e-06, + "loss": 1.848, + "step": 17354000 + }, + { + "epoch": 85.98, + "learning_rate": 7.025269640264959e-06, + "loss": 1.8063, + "step": 17354500 + }, + { + "epoch": 85.98, + "learning_rate": 7.024031053838875e-06, + "loss": 1.8237, + "step": 17355000 + }, + { + "epoch": 85.99, + "learning_rate": 7.022792467412792e-06, + "loss": 1.8413, + "step": 17355500 + }, + { + "epoch": 85.99, + "learning_rate": 7.021553880986707e-06, + "loss": 1.8265, + "step": 17356000 + }, + { + "epoch": 85.99, + "learning_rate": 7.020315294560624e-06, + "loss": 1.8597, + "step": 17356500 + }, + { + "epoch": 85.99, + "learning_rate": 7.019076708134541e-06, + "loss": 1.8376, + "step": 17357000 + }, + { + "epoch": 86.0, + "learning_rate": 7.017840598881309e-06, + "loss": 1.8219, + "step": 17357500 + }, + { + "epoch": 86.0, + "learning_rate": 7.016602012455226e-06, + "loss": 1.8388, + "step": 17358000 + }, + { + "epoch": 86.0, + "eval_accuracy": 0.6832753959057551, + "eval_accuracy_mlm": 0.6441211393524955, + "eval_accuracy_nsp": 0.8679670064598621, + "eval_loss": 2.304426670074463, + "eval_runtime": 146.8495, + "eval_samples_per_second": 1736.192, + "eval_steps_per_second": 72.346, + "step": 17358498 + }, + { + "epoch": 86.0, + "learning_rate": 7.015363426029142e-06, + "loss": 1.8407, + "step": 17358500 + }, + { + "epoch": 86.0, + "learning_rate": 7.014124839603057e-06, + "loss": 1.8185, + "step": 17359000 + }, + { + "epoch": 86.0, + "learning_rate": 7.012886253176974e-06, + "loss": 1.8266, + "step": 17359500 + }, + { + "epoch": 86.01, + "learning_rate": 7.011647666750891e-06, + "loss": 1.8605, + "step": 17360000 + }, + { + "epoch": 86.01, + "learning_rate": 7.010411557497659e-06, + "loss": 1.8516, + "step": 17360500 + }, + { + "epoch": 86.01, + "learning_rate": 7.009172971071576e-06, + "loss": 1.8252, + "step": 17361000 + }, + { + "epoch": 86.01, + "learning_rate": 7.007934384645493e-06, + "loss": 1.8192, + "step": 17361500 + }, + { + "epoch": 86.02, + "learning_rate": 7.006695798219408e-06, + "loss": 1.8228, + "step": 17362000 + }, + { + "epoch": 86.02, + "learning_rate": 7.005457211793324e-06, + "loss": 1.8112, + "step": 17362500 + }, + { + "epoch": 86.02, + "learning_rate": 7.004218625367241e-06, + "loss": 1.8103, + "step": 17363000 + }, + { + "epoch": 86.02, + "learning_rate": 7.002980038941158e-06, + "loss": 1.8337, + "step": 17363500 + }, + { + "epoch": 86.03, + "learning_rate": 7.001741452515074e-06, + "loss": 1.823, + "step": 17364000 + }, + { + "epoch": 86.03, + "learning_rate": 7.0005028660889894e-06, + "loss": 1.8246, + "step": 17364500 + }, + { + "epoch": 86.03, + "learning_rate": 6.999264279662906e-06, + "loss": 1.8334, + "step": 17365000 + }, + { + "epoch": 86.03, + "learning_rate": 6.998025693236823e-06, + "loss": 1.8229, + "step": 17365500 + }, + { + "epoch": 86.04, + "learning_rate": 6.996789583983591e-06, + "loss": 1.8296, + "step": 17366000 + }, + { + "epoch": 86.04, + "learning_rate": 6.995550997557508e-06, + "loss": 1.8493, + "step": 17366500 + }, + { + "epoch": 86.04, + "learning_rate": 6.994312411131425e-06, + "loss": 1.831, + "step": 17367000 + }, + { + "epoch": 86.04, + "learning_rate": 6.99307382470534e-06, + "loss": 1.8236, + "step": 17367500 + }, + { + "epoch": 86.05, + "learning_rate": 6.99183771545211e-06, + "loss": 1.8236, + "step": 17368000 + }, + { + "epoch": 86.05, + "learning_rate": 6.990599129026025e-06, + "loss": 1.8235, + "step": 17368500 + }, + { + "epoch": 86.05, + "learning_rate": 6.989363019772795e-06, + "loss": 1.8086, + "step": 17369000 + }, + { + "epoch": 86.05, + "learning_rate": 6.98812443334671e-06, + "loss": 1.824, + "step": 17369500 + }, + { + "epoch": 86.06, + "learning_rate": 6.986885846920627e-06, + "loss": 1.8297, + "step": 17370000 + }, + { + "epoch": 86.06, + "learning_rate": 6.985647260494543e-06, + "loss": 1.8249, + "step": 17370500 + }, + { + "epoch": 86.06, + "learning_rate": 6.98440867406846e-06, + "loss": 1.8453, + "step": 17371000 + }, + { + "epoch": 86.06, + "learning_rate": 6.983170087642375e-06, + "loss": 1.8381, + "step": 17371500 + }, + { + "epoch": 86.07, + "learning_rate": 6.981931501216292e-06, + "loss": 1.8534, + "step": 17372000 + }, + { + "epoch": 86.07, + "learning_rate": 6.980692914790208e-06, + "loss": 1.8471, + "step": 17372500 + }, + { + "epoch": 86.07, + "learning_rate": 6.979454328364125e-06, + "loss": 1.8339, + "step": 17373000 + }, + { + "epoch": 86.07, + "learning_rate": 6.978218219110893e-06, + "loss": 1.8397, + "step": 17373500 + }, + { + "epoch": 86.08, + "learning_rate": 6.976982109857662e-06, + "loss": 1.82, + "step": 17374000 + }, + { + "epoch": 86.08, + "learning_rate": 6.975743523431578e-06, + "loss": 1.8305, + "step": 17374500 + }, + { + "epoch": 86.08, + "learning_rate": 6.974504937005495e-06, + "loss": 1.8265, + "step": 17375000 + }, + { + "epoch": 86.08, + "learning_rate": 6.97326635057941e-06, + "loss": 1.8178, + "step": 17375500 + }, + { + "epoch": 86.09, + "learning_rate": 6.972027764153327e-06, + "loss": 1.8482, + "step": 17376000 + }, + { + "epoch": 86.09, + "learning_rate": 6.970791654900095e-06, + "loss": 1.8244, + "step": 17376500 + }, + { + "epoch": 86.09, + "learning_rate": 6.969553068474012e-06, + "loss": 1.8399, + "step": 17377000 + }, + { + "epoch": 86.09, + "learning_rate": 6.968314482047929e-06, + "loss": 1.7937, + "step": 17377500 + }, + { + "epoch": 86.1, + "learning_rate": 6.967075895621845e-06, + "loss": 1.8177, + "step": 17378000 + }, + { + "epoch": 86.1, + "learning_rate": 6.96583730919576e-06, + "loss": 1.8197, + "step": 17378500 + }, + { + "epoch": 86.1, + "learning_rate": 6.964598722769677e-06, + "loss": 1.8268, + "step": 17379000 + }, + { + "epoch": 86.1, + "learning_rate": 6.963360136343594e-06, + "loss": 1.8371, + "step": 17379500 + }, + { + "epoch": 86.11, + "learning_rate": 6.962121549917511e-06, + "loss": 1.8163, + "step": 17380000 + }, + { + "epoch": 86.11, + "learning_rate": 6.960882963491427e-06, + "loss": 1.823, + "step": 17380500 + }, + { + "epoch": 86.11, + "learning_rate": 6.959644377065342e-06, + "loss": 1.8427, + "step": 17381000 + }, + { + "epoch": 86.11, + "learning_rate": 6.958410744984964e-06, + "loss": 1.8098, + "step": 17381500 + }, + { + "epoch": 86.12, + "learning_rate": 6.957172158558881e-06, + "loss": 1.8542, + "step": 17382000 + }, + { + "epoch": 86.12, + "learning_rate": 6.955933572132797e-06, + "loss": 1.8284, + "step": 17382500 + }, + { + "epoch": 86.12, + "learning_rate": 6.954694985706712e-06, + "loss": 1.8308, + "step": 17383000 + }, + { + "epoch": 86.12, + "learning_rate": 6.953456399280629e-06, + "loss": 1.8364, + "step": 17383500 + }, + { + "epoch": 86.13, + "learning_rate": 6.952217812854546e-06, + "loss": 1.8135, + "step": 17384000 + }, + { + "epoch": 86.13, + "learning_rate": 6.950979226428463e-06, + "loss": 1.8389, + "step": 17384500 + }, + { + "epoch": 86.13, + "learning_rate": 6.949743117175231e-06, + "loss": 1.857, + "step": 17385000 + }, + { + "epoch": 86.13, + "learning_rate": 6.948504530749148e-06, + "loss": 1.8235, + "step": 17385500 + }, + { + "epoch": 86.14, + "learning_rate": 6.947265944323063e-06, + "loss": 1.8143, + "step": 17386000 + }, + { + "epoch": 86.14, + "learning_rate": 6.946027357896979e-06, + "loss": 1.8354, + "step": 17386500 + }, + { + "epoch": 86.14, + "learning_rate": 6.944788771470896e-06, + "loss": 1.8369, + "step": 17387000 + }, + { + "epoch": 86.14, + "learning_rate": 6.943550185044813e-06, + "loss": 1.8106, + "step": 17387500 + }, + { + "epoch": 86.15, + "learning_rate": 6.942316552964433e-06, + "loss": 1.821, + "step": 17388000 + }, + { + "epoch": 86.15, + "learning_rate": 6.941077966538349e-06, + "loss": 1.8531, + "step": 17388500 + }, + { + "epoch": 86.15, + "learning_rate": 6.939839380112266e-06, + "loss": 1.8344, + "step": 17389000 + }, + { + "epoch": 86.15, + "learning_rate": 6.938600793686183e-06, + "loss": 1.811, + "step": 17389500 + }, + { + "epoch": 86.16, + "learning_rate": 6.937362207260098e-06, + "loss": 1.8324, + "step": 17390000 + }, + { + "epoch": 86.16, + "learning_rate": 6.936123620834015e-06, + "loss": 1.8572, + "step": 17390500 + }, + { + "epoch": 86.16, + "learning_rate": 6.934885034407931e-06, + "loss": 1.8622, + "step": 17391000 + }, + { + "epoch": 86.16, + "learning_rate": 6.933646447981848e-06, + "loss": 1.8291, + "step": 17391500 + }, + { + "epoch": 86.17, + "learning_rate": 6.932407861555763e-06, + "loss": 1.8356, + "step": 17392000 + }, + { + "epoch": 86.17, + "learning_rate": 6.931171752302533e-06, + "loss": 1.8197, + "step": 17392500 + }, + { + "epoch": 86.17, + "learning_rate": 6.929933165876448e-06, + "loss": 1.8273, + "step": 17393000 + }, + { + "epoch": 86.17, + "learning_rate": 6.928694579450365e-06, + "loss": 1.817, + "step": 17393500 + }, + { + "epoch": 86.18, + "learning_rate": 6.927458470197133e-06, + "loss": 1.8294, + "step": 17394000 + }, + { + "epoch": 86.18, + "learning_rate": 6.92621988377105e-06, + "loss": 1.8517, + "step": 17394500 + }, + { + "epoch": 86.18, + "learning_rate": 6.924981297344967e-06, + "loss": 1.8289, + "step": 17395000 + }, + { + "epoch": 86.18, + "learning_rate": 6.923742710918883e-06, + "loss": 1.8325, + "step": 17395500 + }, + { + "epoch": 86.19, + "learning_rate": 6.922506601665652e-06, + "loss": 1.844, + "step": 17396000 + }, + { + "epoch": 86.19, + "learning_rate": 6.921268015239568e-06, + "loss": 1.8231, + "step": 17396500 + }, + { + "epoch": 86.19, + "learning_rate": 6.920029428813483e-06, + "loss": 1.8177, + "step": 17397000 + }, + { + "epoch": 86.19, + "learning_rate": 6.9187908423874e-06, + "loss": 1.8187, + "step": 17397500 + }, + { + "epoch": 86.2, + "learning_rate": 6.917552255961317e-06, + "loss": 1.8377, + "step": 17398000 + }, + { + "epoch": 86.2, + "learning_rate": 6.916313669535234e-06, + "loss": 1.838, + "step": 17398500 + }, + { + "epoch": 86.2, + "learning_rate": 6.91507508310915e-06, + "loss": 1.8205, + "step": 17399000 + }, + { + "epoch": 86.2, + "learning_rate": 6.913836496683065e-06, + "loss": 1.8152, + "step": 17399500 + }, + { + "epoch": 86.21, + "learning_rate": 6.912597910256982e-06, + "loss": 1.809, + "step": 17400000 + }, + { + "epoch": 86.21, + "learning_rate": 6.91136180100375e-06, + "loss": 1.841, + "step": 17400500 + }, + { + "epoch": 86.21, + "learning_rate": 6.91012569175052e-06, + "loss": 1.8196, + "step": 17401000 + }, + { + "epoch": 86.21, + "learning_rate": 6.908887105324435e-06, + "loss": 1.8041, + "step": 17401500 + }, + { + "epoch": 86.22, + "learning_rate": 6.907648518898352e-06, + "loss": 1.8124, + "step": 17402000 + }, + { + "epoch": 86.22, + "learning_rate": 6.906409932472269e-06, + "loss": 1.8303, + "step": 17402500 + }, + { + "epoch": 86.22, + "learning_rate": 6.905171346046185e-06, + "loss": 1.8286, + "step": 17403000 + }, + { + "epoch": 86.22, + "learning_rate": 6.903932759620101e-06, + "loss": 1.8424, + "step": 17403500 + }, + { + "epoch": 86.23, + "learning_rate": 6.902694173194017e-06, + "loss": 1.8183, + "step": 17404000 + }, + { + "epoch": 86.23, + "learning_rate": 6.901455586767934e-06, + "loss": 1.8673, + "step": 17404500 + }, + { + "epoch": 86.23, + "learning_rate": 6.900217000341851e-06, + "loss": 1.8424, + "step": 17405000 + }, + { + "epoch": 86.23, + "learning_rate": 6.898978413915766e-06, + "loss": 1.8231, + "step": 17405500 + }, + { + "epoch": 86.24, + "learning_rate": 6.897739827489682e-06, + "loss": 1.857, + "step": 17406000 + }, + { + "epoch": 86.24, + "learning_rate": 6.896501241063599e-06, + "loss": 1.8287, + "step": 17406500 + }, + { + "epoch": 86.24, + "learning_rate": 6.895262654637516e-06, + "loss": 1.8184, + "step": 17407000 + }, + { + "epoch": 86.24, + "learning_rate": 6.894024068211433e-06, + "loss": 1.8253, + "step": 17407500 + }, + { + "epoch": 86.25, + "learning_rate": 6.892785481785348e-06, + "loss": 1.8448, + "step": 17408000 + }, + { + "epoch": 86.25, + "learning_rate": 6.891546895359264e-06, + "loss": 1.8447, + "step": 17408500 + }, + { + "epoch": 86.25, + "learning_rate": 6.890308308933181e-06, + "loss": 1.8412, + "step": 17409000 + }, + { + "epoch": 86.25, + "learning_rate": 6.889069722507098e-06, + "loss": 1.8036, + "step": 17409500 + }, + { + "epoch": 86.26, + "learning_rate": 6.887831136081013e-06, + "loss": 1.8277, + "step": 17410000 + }, + { + "epoch": 86.26, + "learning_rate": 6.886595026827783e-06, + "loss": 1.8312, + "step": 17410500 + }, + { + "epoch": 86.26, + "learning_rate": 6.885358917574551e-06, + "loss": 1.8313, + "step": 17411000 + }, + { + "epoch": 86.26, + "learning_rate": 6.884120331148468e-06, + "loss": 1.8383, + "step": 17411500 + }, + { + "epoch": 86.27, + "learning_rate": 6.882881744722383e-06, + "loss": 1.8335, + "step": 17412000 + }, + { + "epoch": 86.27, + "learning_rate": 6.8816431582963e-06, + "loss": 1.8264, + "step": 17412500 + }, + { + "epoch": 86.27, + "learning_rate": 6.880409526215921e-06, + "loss": 1.8349, + "step": 17413000 + }, + { + "epoch": 86.27, + "learning_rate": 6.879170939789838e-06, + "loss": 1.8412, + "step": 17413500 + }, + { + "epoch": 86.27, + "learning_rate": 6.877932353363753e-06, + "loss": 1.8229, + "step": 17414000 + }, + { + "epoch": 86.28, + "learning_rate": 6.87669376693767e-06, + "loss": 1.8512, + "step": 17414500 + }, + { + "epoch": 86.28, + "learning_rate": 6.875455180511586e-06, + "loss": 1.8095, + "step": 17415000 + }, + { + "epoch": 86.28, + "learning_rate": 6.874216594085503e-06, + "loss": 1.8252, + "step": 17415500 + }, + { + "epoch": 86.28, + "learning_rate": 6.872978007659418e-06, + "loss": 1.8247, + "step": 17416000 + }, + { + "epoch": 86.29, + "learning_rate": 6.871739421233335e-06, + "loss": 1.8262, + "step": 17416500 + }, + { + "epoch": 86.29, + "learning_rate": 6.870503311980103e-06, + "loss": 1.8301, + "step": 17417000 + }, + { + "epoch": 86.29, + "learning_rate": 6.86926472555402e-06, + "loss": 1.8277, + "step": 17417500 + }, + { + "epoch": 86.29, + "learning_rate": 6.8680310934736415e-06, + "loss": 1.8328, + "step": 17418000 + }, + { + "epoch": 86.3, + "learning_rate": 6.866792507047558e-06, + "loss": 1.8308, + "step": 17418500 + }, + { + "epoch": 86.3, + "learning_rate": 6.865556397794326e-06, + "loss": 1.8469, + "step": 17419000 + }, + { + "epoch": 86.3, + "learning_rate": 6.864317811368242e-06, + "loss": 1.8413, + "step": 17419500 + }, + { + "epoch": 86.3, + "learning_rate": 6.863079224942158e-06, + "loss": 1.8372, + "step": 17420000 + }, + { + "epoch": 86.31, + "learning_rate": 6.861840638516075e-06, + "loss": 1.838, + "step": 17420500 + }, + { + "epoch": 86.31, + "learning_rate": 6.8606020520899916e-06, + "loss": 1.8385, + "step": 17421000 + }, + { + "epoch": 86.31, + "learning_rate": 6.859363465663908e-06, + "loss": 1.8044, + "step": 17421500 + }, + { + "epoch": 86.31, + "learning_rate": 6.858124879237824e-06, + "loss": 1.8489, + "step": 17422000 + }, + { + "epoch": 86.32, + "learning_rate": 6.85688629281174e-06, + "loss": 1.8446, + "step": 17422500 + }, + { + "epoch": 86.32, + "learning_rate": 6.855647706385657e-06, + "loss": 1.8276, + "step": 17423000 + }, + { + "epoch": 86.32, + "learning_rate": 6.854409119959574e-06, + "loss": 1.8323, + "step": 17423500 + }, + { + "epoch": 86.32, + "learning_rate": 6.853170533533489e-06, + "loss": 1.8413, + "step": 17424000 + }, + { + "epoch": 86.33, + "learning_rate": 6.8519344242802586e-06, + "loss": 1.8254, + "step": 17424500 + }, + { + "epoch": 86.33, + "learning_rate": 6.850695837854174e-06, + "loss": 1.8347, + "step": 17425000 + }, + { + "epoch": 86.33, + "learning_rate": 6.84945725142809e-06, + "loss": 1.8187, + "step": 17425500 + }, + { + "epoch": 86.33, + "learning_rate": 6.848218665002007e-06, + "loss": 1.8338, + "step": 17426000 + }, + { + "epoch": 86.34, + "learning_rate": 6.846980078575924e-06, + "loss": 1.8333, + "step": 17426500 + }, + { + "epoch": 86.34, + "learning_rate": 6.845741492149839e-06, + "loss": 1.8276, + "step": 17427000 + }, + { + "epoch": 86.34, + "learning_rate": 6.844502905723756e-06, + "loss": 1.8295, + "step": 17427500 + }, + { + "epoch": 86.34, + "learning_rate": 6.843264319297672e-06, + "loss": 1.8297, + "step": 17428000 + }, + { + "epoch": 86.35, + "learning_rate": 6.842025732871589e-06, + "loss": 1.8125, + "step": 17428500 + }, + { + "epoch": 86.35, + "learning_rate": 6.840787146445506e-06, + "loss": 1.8429, + "step": 17429000 + }, + { + "epoch": 86.35, + "learning_rate": 6.839551037192274e-06, + "loss": 1.7966, + "step": 17429500 + }, + { + "epoch": 86.35, + "learning_rate": 6.838312450766191e-06, + "loss": 1.8272, + "step": 17430000 + }, + { + "epoch": 86.36, + "learning_rate": 6.837073864340106e-06, + "loss": 1.8345, + "step": 17430500 + }, + { + "epoch": 86.36, + "learning_rate": 6.835835277914023e-06, + "loss": 1.8413, + "step": 17431000 + }, + { + "epoch": 86.36, + "learning_rate": 6.834596691487939e-06, + "loss": 1.8149, + "step": 17431500 + }, + { + "epoch": 86.36, + "learning_rate": 6.833358105061856e-06, + "loss": 1.8009, + "step": 17432000 + }, + { + "epoch": 86.37, + "learning_rate": 6.832119518635771e-06, + "loss": 1.8636, + "step": 17432500 + }, + { + "epoch": 86.37, + "learning_rate": 6.830880932209688e-06, + "loss": 1.8351, + "step": 17433000 + }, + { + "epoch": 86.37, + "learning_rate": 6.829642345783604e-06, + "loss": 1.8209, + "step": 17433500 + }, + { + "epoch": 86.37, + "learning_rate": 6.828403759357521e-06, + "loss": 1.84, + "step": 17434000 + }, + { + "epoch": 86.38, + "learning_rate": 6.827165172931436e-06, + "loss": 1.8468, + "step": 17434500 + }, + { + "epoch": 86.38, + "learning_rate": 6.825926586505353e-06, + "loss": 1.8616, + "step": 17435000 + }, + { + "epoch": 86.38, + "learning_rate": 6.824690477252121e-06, + "loss": 1.853, + "step": 17435500 + }, + { + "epoch": 86.38, + "learning_rate": 6.823451890826038e-06, + "loss": 1.8276, + "step": 17436000 + }, + { + "epoch": 86.39, + "learning_rate": 6.822213304399955e-06, + "loss": 1.8331, + "step": 17436500 + }, + { + "epoch": 86.39, + "learning_rate": 6.820974717973871e-06, + "loss": 1.8022, + "step": 17437000 + }, + { + "epoch": 86.39, + "learning_rate": 6.819736131547788e-06, + "loss": 1.8085, + "step": 17437500 + }, + { + "epoch": 86.39, + "learning_rate": 6.818497545121703e-06, + "loss": 1.8169, + "step": 17438000 + }, + { + "epoch": 86.4, + "learning_rate": 6.81725895869562e-06, + "loss": 1.8239, + "step": 17438500 + }, + { + "epoch": 86.4, + "learning_rate": 6.816020372269537e-06, + "loss": 1.8324, + "step": 17439000 + }, + { + "epoch": 86.4, + "learning_rate": 6.814781785843453e-06, + "loss": 1.8449, + "step": 17439500 + }, + { + "epoch": 86.4, + "learning_rate": 6.8135431994173685e-06, + "loss": 1.8342, + "step": 17440000 + }, + { + "epoch": 86.41, + "learning_rate": 6.812307090164138e-06, + "loss": 1.8178, + "step": 17440500 + }, + { + "epoch": 86.41, + "learning_rate": 6.811068503738053e-06, + "loss": 1.824, + "step": 17441000 + }, + { + "epoch": 86.41, + "learning_rate": 6.80982991731197e-06, + "loss": 1.8386, + "step": 17441500 + }, + { + "epoch": 86.41, + "learning_rate": 6.808593808058738e-06, + "loss": 1.8192, + "step": 17442000 + }, + { + "epoch": 86.42, + "learning_rate": 6.807355221632655e-06, + "loss": 1.8348, + "step": 17442500 + }, + { + "epoch": 86.42, + "learning_rate": 6.806116635206572e-06, + "loss": 1.844, + "step": 17443000 + }, + { + "epoch": 86.42, + "learning_rate": 6.804878048780489e-06, + "loss": 1.8307, + "step": 17443500 + }, + { + "epoch": 86.42, + "learning_rate": 6.803639462354404e-06, + "loss": 1.8455, + "step": 17444000 + }, + { + "epoch": 86.43, + "learning_rate": 6.80240087592832e-06, + "loss": 1.8484, + "step": 17444500 + }, + { + "epoch": 86.43, + "learning_rate": 6.801164766675089e-06, + "loss": 1.8212, + "step": 17445000 + }, + { + "epoch": 86.43, + "learning_rate": 6.799926180249005e-06, + "loss": 1.8243, + "step": 17445500 + }, + { + "epoch": 86.43, + "learning_rate": 6.798687593822922e-06, + "loss": 1.803, + "step": 17446000 + }, + { + "epoch": 86.44, + "learning_rate": 6.797449007396839e-06, + "loss": 1.8436, + "step": 17446500 + }, + { + "epoch": 86.44, + "learning_rate": 6.796210420970754e-06, + "loss": 1.8237, + "step": 17447000 + }, + { + "epoch": 86.44, + "learning_rate": 6.794971834544671e-06, + "loss": 1.8344, + "step": 17447500 + }, + { + "epoch": 86.44, + "learning_rate": 6.793733248118587e-06, + "loss": 1.8262, + "step": 17448000 + }, + { + "epoch": 86.45, + "learning_rate": 6.792494661692504e-06, + "loss": 1.8389, + "step": 17448500 + }, + { + "epoch": 86.45, + "learning_rate": 6.791258552439272e-06, + "loss": 1.8126, + "step": 17449000 + }, + { + "epoch": 86.45, + "learning_rate": 6.790019966013189e-06, + "loss": 1.8391, + "step": 17449500 + }, + { + "epoch": 86.45, + "learning_rate": 6.788781379587106e-06, + "loss": 1.8257, + "step": 17450000 + }, + { + "epoch": 86.46, + "learning_rate": 6.787542793161021e-06, + "loss": 1.8249, + "step": 17450500 + }, + { + "epoch": 86.46, + "learning_rate": 6.786304206734937e-06, + "loss": 1.8362, + "step": 17451000 + }, + { + "epoch": 86.46, + "learning_rate": 6.785065620308854e-06, + "loss": 1.8107, + "step": 17451500 + }, + { + "epoch": 86.46, + "learning_rate": 6.783829511055622e-06, + "loss": 1.8226, + "step": 17452000 + }, + { + "epoch": 86.47, + "learning_rate": 6.782590924629539e-06, + "loss": 1.8292, + "step": 17452500 + }, + { + "epoch": 86.47, + "learning_rate": 6.781352338203456e-06, + "loss": 1.8178, + "step": 17453000 + }, + { + "epoch": 86.47, + "learning_rate": 6.780113751777371e-06, + "loss": 1.8404, + "step": 17453500 + }, + { + "epoch": 86.47, + "learning_rate": 6.778875165351288e-06, + "loss": 1.8183, + "step": 17454000 + }, + { + "epoch": 86.48, + "learning_rate": 6.777636578925204e-06, + "loss": 1.8061, + "step": 17454500 + }, + { + "epoch": 86.48, + "learning_rate": 6.776400469671973e-06, + "loss": 1.8319, + "step": 17455000 + }, + { + "epoch": 86.48, + "learning_rate": 6.775164360418741e-06, + "loss": 1.8383, + "step": 17455500 + }, + { + "epoch": 86.48, + "learning_rate": 6.773925773992658e-06, + "loss": 1.8219, + "step": 17456000 + }, + { + "epoch": 86.49, + "learning_rate": 6.772687187566574e-06, + "loss": 1.8369, + "step": 17456500 + }, + { + "epoch": 86.49, + "learning_rate": 6.771448601140491e-06, + "loss": 1.849, + "step": 17457000 + }, + { + "epoch": 86.49, + "learning_rate": 6.770210014714406e-06, + "loss": 1.8156, + "step": 17457500 + }, + { + "epoch": 86.49, + "learning_rate": 6.768971428288323e-06, + "loss": 1.8098, + "step": 17458000 + }, + { + "epoch": 86.5, + "learning_rate": 6.76773284186224e-06, + "loss": 1.8219, + "step": 17458500 + }, + { + "epoch": 86.5, + "learning_rate": 6.766494255436156e-06, + "loss": 1.8024, + "step": 17459000 + }, + { + "epoch": 86.5, + "learning_rate": 6.7652556690100715e-06, + "loss": 1.8094, + "step": 17459500 + }, + { + "epoch": 86.5, + "learning_rate": 6.7640170825839884e-06, + "loss": 1.8287, + "step": 17460000 + }, + { + "epoch": 86.51, + "learning_rate": 6.762778496157905e-06, + "loss": 1.8364, + "step": 17460500 + }, + { + "epoch": 86.51, + "learning_rate": 6.761539909731822e-06, + "loss": 1.8268, + "step": 17461000 + }, + { + "epoch": 86.51, + "learning_rate": 6.760301323305738e-06, + "loss": 1.8222, + "step": 17461500 + }, + { + "epoch": 86.51, + "learning_rate": 6.759065214052507e-06, + "loss": 1.8344, + "step": 17462000 + }, + { + "epoch": 86.52, + "learning_rate": 6.757826627626423e-06, + "loss": 1.8223, + "step": 17462500 + }, + { + "epoch": 86.52, + "learning_rate": 6.7565880412003385e-06, + "loss": 1.8139, + "step": 17463000 + }, + { + "epoch": 86.52, + "learning_rate": 6.755351931947108e-06, + "loss": 1.8051, + "step": 17463500 + }, + { + "epoch": 86.52, + "learning_rate": 6.754113345521023e-06, + "loss": 1.8283, + "step": 17464000 + }, + { + "epoch": 86.53, + "learning_rate": 6.75287475909494e-06, + "loss": 1.8236, + "step": 17464500 + }, + { + "epoch": 86.53, + "learning_rate": 6.751636172668857e-06, + "loss": 1.8186, + "step": 17465000 + }, + { + "epoch": 86.53, + "learning_rate": 6.750397586242773e-06, + "loss": 1.8569, + "step": 17465500 + }, + { + "epoch": 86.53, + "learning_rate": 6.749161476989542e-06, + "loss": 1.8521, + "step": 17466000 + }, + { + "epoch": 86.54, + "learning_rate": 6.747927844909162e-06, + "loss": 1.7978, + "step": 17466500 + }, + { + "epoch": 86.54, + "learning_rate": 6.746689258483078e-06, + "loss": 1.8375, + "step": 17467000 + }, + { + "epoch": 86.54, + "learning_rate": 6.745450672056995e-06, + "loss": 1.8379, + "step": 17467500 + }, + { + "epoch": 86.54, + "learning_rate": 6.744212085630912e-06, + "loss": 1.8215, + "step": 17468000 + }, + { + "epoch": 86.54, + "learning_rate": 6.742973499204829e-06, + "loss": 1.8266, + "step": 17468500 + }, + { + "epoch": 86.55, + "learning_rate": 6.741734912778744e-06, + "loss": 1.818, + "step": 17469000 + }, + { + "epoch": 86.55, + "learning_rate": 6.74049632635266e-06, + "loss": 1.8221, + "step": 17469500 + }, + { + "epoch": 86.55, + "learning_rate": 6.739257739926577e-06, + "loss": 1.8143, + "step": 17470000 + }, + { + "epoch": 86.55, + "learning_rate": 6.738019153500494e-06, + "loss": 1.8133, + "step": 17470500 + }, + { + "epoch": 86.56, + "learning_rate": 6.736783044247262e-06, + "loss": 1.8359, + "step": 17471000 + }, + { + "epoch": 86.56, + "learning_rate": 6.735544457821179e-06, + "loss": 1.8403, + "step": 17471500 + }, + { + "epoch": 86.56, + "learning_rate": 6.734305871395094e-06, + "loss": 1.8162, + "step": 17472000 + }, + { + "epoch": 86.56, + "learning_rate": 6.733067284969011e-06, + "loss": 1.8197, + "step": 17472500 + }, + { + "epoch": 86.57, + "learning_rate": 6.731831175715779e-06, + "loss": 1.8333, + "step": 17473000 + }, + { + "epoch": 86.57, + "learning_rate": 6.730592589289696e-06, + "loss": 1.8446, + "step": 17473500 + }, + { + "epoch": 86.57, + "learning_rate": 6.729354002863612e-06, + "loss": 1.8411, + "step": 17474000 + }, + { + "epoch": 86.57, + "learning_rate": 6.728117893610381e-06, + "loss": 1.8396, + "step": 17474500 + }, + { + "epoch": 86.58, + "learning_rate": 6.726879307184297e-06, + "loss": 1.8239, + "step": 17475000 + }, + { + "epoch": 86.58, + "learning_rate": 6.725640720758214e-06, + "loss": 1.8401, + "step": 17475500 + }, + { + "epoch": 86.58, + "learning_rate": 6.724402134332129e-06, + "loss": 1.8277, + "step": 17476000 + }, + { + "epoch": 86.58, + "learning_rate": 6.723166025078899e-06, + "loss": 1.8338, + "step": 17476500 + }, + { + "epoch": 86.59, + "learning_rate": 6.721927438652814e-06, + "loss": 1.8331, + "step": 17477000 + }, + { + "epoch": 86.59, + "learning_rate": 6.720688852226731e-06, + "loss": 1.807, + "step": 17477500 + }, + { + "epoch": 86.59, + "learning_rate": 6.719450265800648e-06, + "loss": 1.81, + "step": 17478000 + }, + { + "epoch": 86.59, + "learning_rate": 6.718211679374564e-06, + "loss": 1.7972, + "step": 17478500 + }, + { + "epoch": 86.6, + "learning_rate": 6.716973092948479e-06, + "loss": 1.8122, + "step": 17479000 + }, + { + "epoch": 86.6, + "learning_rate": 6.715736983695249e-06, + "loss": 1.8305, + "step": 17479500 + }, + { + "epoch": 86.6, + "learning_rate": 6.714498397269164e-06, + "loss": 1.8527, + "step": 17480000 + }, + { + "epoch": 86.6, + "learning_rate": 6.713259810843081e-06, + "loss": 1.8252, + "step": 17480500 + }, + { + "epoch": 86.61, + "learning_rate": 6.712021224416998e-06, + "loss": 1.8254, + "step": 17481000 + }, + { + "epoch": 86.61, + "learning_rate": 6.710782637990915e-06, + "loss": 1.8281, + "step": 17481500 + }, + { + "epoch": 86.61, + "learning_rate": 6.709546528737683e-06, + "loss": 1.8467, + "step": 17482000 + }, + { + "epoch": 86.61, + "learning_rate": 6.7083079423116e-06, + "loss": 1.8361, + "step": 17482500 + }, + { + "epoch": 86.62, + "learning_rate": 6.707069355885515e-06, + "loss": 1.8496, + "step": 17483000 + }, + { + "epoch": 86.62, + "learning_rate": 6.705830769459431e-06, + "loss": 1.8194, + "step": 17483500 + }, + { + "epoch": 86.62, + "learning_rate": 6.704592183033348e-06, + "loss": 1.8152, + "step": 17484000 + }, + { + "epoch": 86.62, + "learning_rate": 6.703353596607265e-06, + "loss": 1.808, + "step": 17484500 + }, + { + "epoch": 86.63, + "learning_rate": 6.702115010181181e-06, + "loss": 1.824, + "step": 17485000 + }, + { + "epoch": 86.63, + "learning_rate": 6.700876423755097e-06, + "loss": 1.8299, + "step": 17485500 + }, + { + "epoch": 86.63, + "learning_rate": 6.699637837329013e-06, + "loss": 1.8092, + "step": 17486000 + }, + { + "epoch": 86.63, + "learning_rate": 6.69839925090293e-06, + "loss": 1.8326, + "step": 17486500 + }, + { + "epoch": 86.64, + "learning_rate": 6.697160664476847e-06, + "loss": 1.8161, + "step": 17487000 + }, + { + "epoch": 86.64, + "learning_rate": 6.695922078050762e-06, + "loss": 1.8504, + "step": 17487500 + }, + { + "epoch": 86.64, + "learning_rate": 6.694683491624678e-06, + "loss": 1.8489, + "step": 17488000 + }, + { + "epoch": 86.64, + "learning_rate": 6.693444905198595e-06, + "loss": 1.8352, + "step": 17488500 + }, + { + "epoch": 86.65, + "learning_rate": 6.692211273118217e-06, + "loss": 1.8319, + "step": 17489000 + }, + { + "epoch": 86.65, + "learning_rate": 6.690972686692132e-06, + "loss": 1.8136, + "step": 17489500 + }, + { + "epoch": 86.65, + "learning_rate": 6.689734100266049e-06, + "loss": 1.8222, + "step": 17490000 + }, + { + "epoch": 86.65, + "learning_rate": 6.688495513839965e-06, + "loss": 1.8199, + "step": 17490500 + }, + { + "epoch": 86.66, + "learning_rate": 6.687256927413882e-06, + "loss": 1.8235, + "step": 17491000 + }, + { + "epoch": 86.66, + "learning_rate": 6.686018340987797e-06, + "loss": 1.8095, + "step": 17491500 + }, + { + "epoch": 86.66, + "learning_rate": 6.684779754561714e-06, + "loss": 1.8245, + "step": 17492000 + }, + { + "epoch": 86.66, + "learning_rate": 6.68354116813563e-06, + "loss": 1.844, + "step": 17492500 + }, + { + "epoch": 86.67, + "learning_rate": 6.682302581709547e-06, + "loss": 1.8517, + "step": 17493000 + }, + { + "epoch": 86.67, + "learning_rate": 6.681063995283464e-06, + "loss": 1.834, + "step": 17493500 + }, + { + "epoch": 86.67, + "learning_rate": 6.679827886030232e-06, + "loss": 1.8419, + "step": 17494000 + }, + { + "epoch": 86.67, + "learning_rate": 6.678591776777e-06, + "loss": 1.8287, + "step": 17494500 + }, + { + "epoch": 86.68, + "learning_rate": 6.677353190350917e-06, + "loss": 1.855, + "step": 17495000 + }, + { + "epoch": 86.68, + "learning_rate": 6.676114603924832e-06, + "loss": 1.8442, + "step": 17495500 + }, + { + "epoch": 86.68, + "learning_rate": 6.674876017498749e-06, + "loss": 1.8211, + "step": 17496000 + }, + { + "epoch": 86.68, + "learning_rate": 6.673637431072666e-06, + "loss": 1.8162, + "step": 17496500 + }, + { + "epoch": 86.69, + "learning_rate": 6.672398844646582e-06, + "loss": 1.8487, + "step": 17497000 + }, + { + "epoch": 86.69, + "learning_rate": 6.671160258220499e-06, + "loss": 1.8251, + "step": 17497500 + }, + { + "epoch": 86.69, + "learning_rate": 6.669921671794414e-06, + "loss": 1.8353, + "step": 17498000 + }, + { + "epoch": 86.69, + "learning_rate": 6.668685562541184e-06, + "loss": 1.8231, + "step": 17498500 + }, + { + "epoch": 86.7, + "learning_rate": 6.667446976115099e-06, + "loss": 1.8325, + "step": 17499000 + }, + { + "epoch": 86.7, + "learning_rate": 6.666208389689016e-06, + "loss": 1.8285, + "step": 17499500 + }, + { + "epoch": 86.7, + "learning_rate": 6.664969803262933e-06, + "loss": 1.8307, + "step": 17500000 + }, + { + "epoch": 86.7, + "learning_rate": 6.663731216836849e-06, + "loss": 1.839, + "step": 17500500 + }, + { + "epoch": 86.71, + "learning_rate": 6.662492630410764e-06, + "loss": 1.855, + "step": 17501000 + }, + { + "epoch": 86.71, + "learning_rate": 6.661254043984681e-06, + "loss": 1.8189, + "step": 17501500 + }, + { + "epoch": 86.71, + "learning_rate": 6.660015457558598e-06, + "loss": 1.8132, + "step": 17502000 + }, + { + "epoch": 86.71, + "learning_rate": 6.658776871132514e-06, + "loss": 1.831, + "step": 17502500 + }, + { + "epoch": 86.72, + "learning_rate": 6.657540761879283e-06, + "loss": 1.8281, + "step": 17503000 + }, + { + "epoch": 86.72, + "learning_rate": 6.6563021754532e-06, + "loss": 1.8125, + "step": 17503500 + }, + { + "epoch": 86.72, + "learning_rate": 6.655063589027115e-06, + "loss": 1.8245, + "step": 17504000 + }, + { + "epoch": 86.72, + "learning_rate": 6.653825002601031e-06, + "loss": 1.8438, + "step": 17504500 + }, + { + "epoch": 86.73, + "learning_rate": 6.6525888933478e-06, + "loss": 1.8264, + "step": 17505000 + }, + { + "epoch": 86.73, + "learning_rate": 6.651350306921716e-06, + "loss": 1.8249, + "step": 17505500 + }, + { + "epoch": 86.73, + "learning_rate": 6.650111720495633e-06, + "loss": 1.8603, + "step": 17506000 + }, + { + "epoch": 86.73, + "learning_rate": 6.64887313406955e-06, + "loss": 1.8166, + "step": 17506500 + }, + { + "epoch": 86.74, + "learning_rate": 6.647634547643465e-06, + "loss": 1.8222, + "step": 17507000 + }, + { + "epoch": 86.74, + "learning_rate": 6.646395961217382e-06, + "loss": 1.8135, + "step": 17507500 + }, + { + "epoch": 86.74, + "learning_rate": 6.645157374791298e-06, + "loss": 1.8296, + "step": 17508000 + }, + { + "epoch": 86.74, + "learning_rate": 6.643921265538067e-06, + "loss": 1.8447, + "step": 17508500 + }, + { + "epoch": 86.75, + "learning_rate": 6.642682679111983e-06, + "loss": 1.8627, + "step": 17509000 + }, + { + "epoch": 86.75, + "learning_rate": 6.6414440926859e-06, + "loss": 1.8295, + "step": 17509500 + }, + { + "epoch": 86.75, + "learning_rate": 6.640205506259817e-06, + "loss": 1.8232, + "step": 17510000 + }, + { + "epoch": 86.75, + "learning_rate": 6.638966919833732e-06, + "loss": 1.8393, + "step": 17510500 + }, + { + "epoch": 86.76, + "learning_rate": 6.637730810580502e-06, + "loss": 1.8067, + "step": 17511000 + }, + { + "epoch": 86.76, + "learning_rate": 6.636492224154417e-06, + "loss": 1.8404, + "step": 17511500 + }, + { + "epoch": 86.76, + "learning_rate": 6.635253637728333e-06, + "loss": 1.8447, + "step": 17512000 + }, + { + "epoch": 86.76, + "learning_rate": 6.63401505130225e-06, + "loss": 1.8147, + "step": 17512500 + }, + { + "epoch": 86.77, + "learning_rate": 6.632776464876167e-06, + "loss": 1.8434, + "step": 17513000 + }, + { + "epoch": 86.77, + "learning_rate": 6.631537878450082e-06, + "loss": 1.8375, + "step": 17513500 + }, + { + "epoch": 86.77, + "learning_rate": 6.630299292023999e-06, + "loss": 1.8327, + "step": 17514000 + }, + { + "epoch": 86.77, + "learning_rate": 6.629060705597915e-06, + "loss": 1.8181, + "step": 17514500 + }, + { + "epoch": 86.78, + "learning_rate": 6.627827073517537e-06, + "loss": 1.8245, + "step": 17515000 + }, + { + "epoch": 86.78, + "learning_rate": 6.626588487091452e-06, + "loss": 1.8517, + "step": 17515500 + }, + { + "epoch": 86.78, + "learning_rate": 6.625349900665369e-06, + "loss": 1.8271, + "step": 17516000 + }, + { + "epoch": 86.78, + "learning_rate": 6.624111314239285e-06, + "loss": 1.8408, + "step": 17516500 + }, + { + "epoch": 86.79, + "learning_rate": 6.622872727813202e-06, + "loss": 1.8367, + "step": 17517000 + }, + { + "epoch": 86.79, + "learning_rate": 6.621634141387117e-06, + "loss": 1.8314, + "step": 17517500 + }, + { + "epoch": 86.79, + "learning_rate": 6.620395554961034e-06, + "loss": 1.847, + "step": 17518000 + }, + { + "epoch": 86.79, + "learning_rate": 6.619156968534951e-06, + "loss": 1.8114, + "step": 17518500 + }, + { + "epoch": 86.8, + "learning_rate": 6.617918382108867e-06, + "loss": 1.8666, + "step": 17519000 + }, + { + "epoch": 86.8, + "learning_rate": 6.616682272855636e-06, + "loss": 1.8299, + "step": 17519500 + }, + { + "epoch": 86.8, + "learning_rate": 6.615443686429552e-06, + "loss": 1.8375, + "step": 17520000 + }, + { + "epoch": 86.8, + "learning_rate": 6.614205100003467e-06, + "loss": 1.8505, + "step": 17520500 + }, + { + "epoch": 86.81, + "learning_rate": 6.612966513577384e-06, + "loss": 1.8054, + "step": 17521000 + }, + { + "epoch": 86.81, + "learning_rate": 6.611727927151301e-06, + "loss": 1.8123, + "step": 17521500 + }, + { + "epoch": 86.81, + "learning_rate": 6.610489340725218e-06, + "loss": 1.8384, + "step": 17522000 + }, + { + "epoch": 86.81, + "learning_rate": 6.609250754299134e-06, + "loss": 1.7889, + "step": 17522500 + }, + { + "epoch": 86.81, + "learning_rate": 6.6080121678730494e-06, + "loss": 1.8521, + "step": 17523000 + }, + { + "epoch": 86.82, + "learning_rate": 6.606776058619819e-06, + "loss": 1.8461, + "step": 17523500 + }, + { + "epoch": 86.82, + "learning_rate": 6.605539949366588e-06, + "loss": 1.8341, + "step": 17524000 + }, + { + "epoch": 86.82, + "learning_rate": 6.604301362940503e-06, + "loss": 1.8373, + "step": 17524500 + }, + { + "epoch": 86.82, + "learning_rate": 6.603065253687273e-06, + "loss": 1.8496, + "step": 17525000 + }, + { + "epoch": 86.83, + "learning_rate": 6.601826667261188e-06, + "loss": 1.818, + "step": 17525500 + }, + { + "epoch": 86.83, + "learning_rate": 6.600588080835104e-06, + "loss": 1.81, + "step": 17526000 + }, + { + "epoch": 86.83, + "learning_rate": 6.599349494409021e-06, + "loss": 1.829, + "step": 17526500 + }, + { + "epoch": 86.83, + "learning_rate": 6.598110907982938e-06, + "loss": 1.8481, + "step": 17527000 + }, + { + "epoch": 86.84, + "learning_rate": 6.596872321556855e-06, + "loss": 1.8313, + "step": 17527500 + }, + { + "epoch": 86.84, + "learning_rate": 6.595636212303623e-06, + "loss": 1.8409, + "step": 17528000 + }, + { + "epoch": 86.84, + "learning_rate": 6.59439762587754e-06, + "loss": 1.8352, + "step": 17528500 + }, + { + "epoch": 86.84, + "learning_rate": 6.593159039451455e-06, + "loss": 1.8486, + "step": 17529000 + }, + { + "epoch": 86.85, + "learning_rate": 6.591920453025371e-06, + "loss": 1.8369, + "step": 17529500 + }, + { + "epoch": 86.85, + "learning_rate": 6.590681866599288e-06, + "loss": 1.8256, + "step": 17530000 + }, + { + "epoch": 86.85, + "learning_rate": 6.589443280173205e-06, + "loss": 1.8227, + "step": 17530500 + }, + { + "epoch": 86.85, + "learning_rate": 6.588207170919973e-06, + "loss": 1.8417, + "step": 17531000 + }, + { + "epoch": 86.86, + "learning_rate": 6.58696858449389e-06, + "loss": 1.8282, + "step": 17531500 + }, + { + "epoch": 86.86, + "learning_rate": 6.585729998067805e-06, + "loss": 1.8444, + "step": 17532000 + }, + { + "epoch": 86.86, + "learning_rate": 6.584491411641722e-06, + "loss": 1.8151, + "step": 17532500 + }, + { + "epoch": 86.86, + "learning_rate": 6.583252825215638e-06, + "loss": 1.8253, + "step": 17533000 + }, + { + "epoch": 86.87, + "learning_rate": 6.582016715962407e-06, + "loss": 1.8423, + "step": 17533500 + }, + { + "epoch": 86.87, + "learning_rate": 6.580778129536323e-06, + "loss": 1.8596, + "step": 17534000 + }, + { + "epoch": 86.87, + "learning_rate": 6.579542020283092e-06, + "loss": 1.8361, + "step": 17534500 + }, + { + "epoch": 86.87, + "learning_rate": 6.578303433857008e-06, + "loss": 1.8343, + "step": 17535000 + }, + { + "epoch": 86.88, + "learning_rate": 6.577064847430925e-06, + "loss": 1.8324, + "step": 17535500 + }, + { + "epoch": 86.88, + "learning_rate": 6.57582626100484e-06, + "loss": 1.8524, + "step": 17536000 + }, + { + "epoch": 86.88, + "learning_rate": 6.574587674578757e-06, + "loss": 1.8374, + "step": 17536500 + }, + { + "epoch": 86.88, + "learning_rate": 6.573349088152674e-06, + "loss": 1.8227, + "step": 17537000 + }, + { + "epoch": 86.89, + "learning_rate": 6.57211050172659e-06, + "loss": 1.8378, + "step": 17537500 + }, + { + "epoch": 86.89, + "learning_rate": 6.570871915300505e-06, + "loss": 1.832, + "step": 17538000 + }, + { + "epoch": 86.89, + "learning_rate": 6.569633328874422e-06, + "loss": 1.8391, + "step": 17538500 + }, + { + "epoch": 86.89, + "learning_rate": 6.568394742448339e-06, + "loss": 1.8332, + "step": 17539000 + }, + { + "epoch": 86.9, + "learning_rate": 6.567156156022255e-06, + "loss": 1.8283, + "step": 17539500 + }, + { + "epoch": 86.9, + "learning_rate": 6.565917569596172e-06, + "loss": 1.8679, + "step": 17540000 + }, + { + "epoch": 86.9, + "learning_rate": 6.564678983170087e-06, + "loss": 1.8285, + "step": 17540500 + }, + { + "epoch": 86.9, + "learning_rate": 6.563442873916857e-06, + "loss": 1.8373, + "step": 17541000 + }, + { + "epoch": 86.91, + "learning_rate": 6.562206764663626e-06, + "loss": 1.8208, + "step": 17541500 + }, + { + "epoch": 86.91, + "learning_rate": 6.560968178237542e-06, + "loss": 1.807, + "step": 17542000 + }, + { + "epoch": 86.91, + "learning_rate": 6.559729591811457e-06, + "loss": 1.8463, + "step": 17542500 + }, + { + "epoch": 86.91, + "learning_rate": 6.558491005385374e-06, + "loss": 1.815, + "step": 17543000 + }, + { + "epoch": 86.92, + "learning_rate": 6.557252418959291e-06, + "loss": 1.8357, + "step": 17543500 + }, + { + "epoch": 86.92, + "learning_rate": 6.556013832533207e-06, + "loss": 1.8441, + "step": 17544000 + }, + { + "epoch": 86.92, + "learning_rate": 6.554780200452827e-06, + "loss": 1.8308, + "step": 17544500 + }, + { + "epoch": 86.92, + "learning_rate": 6.553541614026744e-06, + "loss": 1.8306, + "step": 17545000 + }, + { + "epoch": 86.93, + "learning_rate": 6.552303027600661e-06, + "loss": 1.8153, + "step": 17545500 + }, + { + "epoch": 86.93, + "learning_rate": 6.551064441174578e-06, + "loss": 1.8271, + "step": 17546000 + }, + { + "epoch": 86.93, + "learning_rate": 6.549825854748493e-06, + "loss": 1.8444, + "step": 17546500 + }, + { + "epoch": 86.93, + "learning_rate": 6.548587268322409e-06, + "loss": 1.8103, + "step": 17547000 + }, + { + "epoch": 86.94, + "learning_rate": 6.547351159069178e-06, + "loss": 1.8337, + "step": 17547500 + }, + { + "epoch": 86.94, + "learning_rate": 6.546112572643094e-06, + "loss": 1.8296, + "step": 17548000 + }, + { + "epoch": 86.94, + "learning_rate": 6.544876463389863e-06, + "loss": 1.8338, + "step": 17548500 + }, + { + "epoch": 86.94, + "learning_rate": 6.543637876963779e-06, + "loss": 1.8376, + "step": 17549000 + }, + { + "epoch": 86.95, + "learning_rate": 6.542399290537696e-06, + "loss": 1.8178, + "step": 17549500 + }, + { + "epoch": 86.95, + "learning_rate": 6.541160704111613e-06, + "loss": 1.8449, + "step": 17550000 + }, + { + "epoch": 86.95, + "learning_rate": 6.539922117685528e-06, + "loss": 1.8344, + "step": 17550500 + }, + { + "epoch": 86.95, + "learning_rate": 6.538683531259445e-06, + "loss": 1.8389, + "step": 17551000 + }, + { + "epoch": 86.96, + "learning_rate": 6.537444944833361e-06, + "loss": 1.8272, + "step": 17551500 + }, + { + "epoch": 86.96, + "learning_rate": 6.536206358407278e-06, + "loss": 1.8534, + "step": 17552000 + }, + { + "epoch": 86.96, + "learning_rate": 6.534967771981193e-06, + "loss": 1.8391, + "step": 17552500 + }, + { + "epoch": 86.96, + "learning_rate": 6.53372918555511e-06, + "loss": 1.8303, + "step": 17553000 + }, + { + "epoch": 86.97, + "learning_rate": 6.532490599129026e-06, + "loss": 1.8188, + "step": 17553500 + }, + { + "epoch": 86.97, + "learning_rate": 6.531252012702943e-06, + "loss": 1.8174, + "step": 17554000 + }, + { + "epoch": 86.97, + "learning_rate": 6.530015903449711e-06, + "loss": 1.812, + "step": 17554500 + }, + { + "epoch": 86.97, + "learning_rate": 6.528777317023628e-06, + "loss": 1.8118, + "step": 17555000 + }, + { + "epoch": 86.98, + "learning_rate": 6.527538730597543e-06, + "loss": 1.8132, + "step": 17555500 + }, + { + "epoch": 86.98, + "learning_rate": 6.526305098517165e-06, + "loss": 1.8344, + "step": 17556000 + }, + { + "epoch": 86.98, + "learning_rate": 6.5250665120910815e-06, + "loss": 1.8046, + "step": 17556500 + }, + { + "epoch": 86.98, + "learning_rate": 6.523827925664998e-06, + "loss": 1.8146, + "step": 17557000 + }, + { + "epoch": 86.99, + "learning_rate": 6.522589339238913e-06, + "loss": 1.8227, + "step": 17557500 + }, + { + "epoch": 86.99, + "learning_rate": 6.52135075281283e-06, + "loss": 1.7993, + "step": 17558000 + }, + { + "epoch": 86.99, + "learning_rate": 6.520112166386747e-06, + "loss": 1.801, + "step": 17558500 + }, + { + "epoch": 86.99, + "learning_rate": 6.518873579960663e-06, + "loss": 1.8516, + "step": 17559000 + }, + { + "epoch": 87.0, + "learning_rate": 6.51763499353458e-06, + "loss": 1.8225, + "step": 17559500 + }, + { + "epoch": 87.0, + "learning_rate": 6.516396407108495e-06, + "loss": 1.8264, + "step": 17560000 + }, + { + "epoch": 87.0, + "eval_accuracy": 0.683857368409135, + "eval_accuracy_mlm": 0.6449371183731448, + "eval_accuracy_nsp": 0.8674100541655717, + "eval_loss": 2.3212554454803467, + "eval_runtime": 146.8878, + "eval_samples_per_second": 1735.74, + "eval_steps_per_second": 72.327, + "step": 17560341 + }, + { + "epoch": 87.0, + "learning_rate": 6.515157820682412e-06, + "loss": 1.818, + "step": 17560500 + }, + { + "epoch": 87.0, + "learning_rate": 6.513919234256329e-06, + "loss": 1.8439, + "step": 17561000 + }, + { + "epoch": 87.01, + "learning_rate": 6.512680647830245e-06, + "loss": 1.851, + "step": 17561500 + }, + { + "epoch": 87.01, + "learning_rate": 6.51144206140416e-06, + "loss": 1.8094, + "step": 17562000 + }, + { + "epoch": 87.01, + "learning_rate": 6.510203474978077e-06, + "loss": 1.8383, + "step": 17562500 + }, + { + "epoch": 87.01, + "learning_rate": 6.508964888551994e-06, + "loss": 1.7896, + "step": 17563000 + }, + { + "epoch": 87.02, + "learning_rate": 6.507726302125911e-06, + "loss": 1.8189, + "step": 17563500 + }, + { + "epoch": 87.02, + "learning_rate": 6.506487715699826e-06, + "loss": 1.8202, + "step": 17564000 + }, + { + "epoch": 87.02, + "learning_rate": 6.505249129273742e-06, + "loss": 1.8272, + "step": 17564500 + }, + { + "epoch": 87.02, + "learning_rate": 6.504013020020511e-06, + "loss": 1.8043, + "step": 17565000 + }, + { + "epoch": 87.03, + "learning_rate": 6.502774433594427e-06, + "loss": 1.8251, + "step": 17565500 + }, + { + "epoch": 87.03, + "learning_rate": 6.501535847168344e-06, + "loss": 1.8336, + "step": 17566000 + }, + { + "epoch": 87.03, + "learning_rate": 6.5003022150879656e-06, + "loss": 1.8394, + "step": 17566500 + }, + { + "epoch": 87.03, + "learning_rate": 6.499063628661881e-06, + "loss": 1.8296, + "step": 17567000 + }, + { + "epoch": 87.04, + "learning_rate": 6.497825042235797e-06, + "loss": 1.8283, + "step": 17567500 + }, + { + "epoch": 87.04, + "learning_rate": 6.496586455809714e-06, + "loss": 1.8258, + "step": 17568000 + }, + { + "epoch": 87.04, + "learning_rate": 6.495347869383631e-06, + "loss": 1.8154, + "step": 17568500 + }, + { + "epoch": 87.04, + "learning_rate": 6.494109282957546e-06, + "loss": 1.8297, + "step": 17569000 + }, + { + "epoch": 87.05, + "learning_rate": 6.492870696531463e-06, + "loss": 1.8189, + "step": 17569500 + }, + { + "epoch": 87.05, + "learning_rate": 6.491632110105379e-06, + "loss": 1.8203, + "step": 17570000 + }, + { + "epoch": 87.05, + "learning_rate": 6.490393523679296e-06, + "loss": 1.8376, + "step": 17570500 + }, + { + "epoch": 87.05, + "learning_rate": 6.489157414426064e-06, + "loss": 1.8301, + "step": 17571000 + }, + { + "epoch": 87.06, + "learning_rate": 6.487918827999981e-06, + "loss": 1.7955, + "step": 17571500 + }, + { + "epoch": 87.06, + "learning_rate": 6.486682718746749e-06, + "loss": 1.8504, + "step": 17572000 + }, + { + "epoch": 87.06, + "learning_rate": 6.485444132320666e-06, + "loss": 1.8126, + "step": 17572500 + }, + { + "epoch": 87.06, + "learning_rate": 6.484205545894581e-06, + "loss": 1.829, + "step": 17573000 + }, + { + "epoch": 87.07, + "learning_rate": 6.482966959468498e-06, + "loss": 1.8227, + "step": 17573500 + }, + { + "epoch": 87.07, + "learning_rate": 6.481730850215266e-06, + "loss": 1.8236, + "step": 17574000 + }, + { + "epoch": 87.07, + "learning_rate": 6.480492263789183e-06, + "loss": 1.8271, + "step": 17574500 + }, + { + "epoch": 87.07, + "learning_rate": 6.4792536773631e-06, + "loss": 1.8016, + "step": 17575000 + }, + { + "epoch": 87.08, + "learning_rate": 6.478015090937016e-06, + "loss": 1.8333, + "step": 17575500 + }, + { + "epoch": 87.08, + "learning_rate": 6.476776504510933e-06, + "loss": 1.8083, + "step": 17576000 + }, + { + "epoch": 87.08, + "learning_rate": 6.475537918084848e-06, + "loss": 1.818, + "step": 17576500 + }, + { + "epoch": 87.08, + "learning_rate": 6.474299331658765e-06, + "loss": 1.8377, + "step": 17577000 + }, + { + "epoch": 87.09, + "learning_rate": 6.473060745232682e-06, + "loss": 1.8255, + "step": 17577500 + }, + { + "epoch": 87.09, + "learning_rate": 6.471822158806598e-06, + "loss": 1.8413, + "step": 17578000 + }, + { + "epoch": 87.09, + "learning_rate": 6.470586049553367e-06, + "loss": 1.8364, + "step": 17578500 + }, + { + "epoch": 87.09, + "learning_rate": 6.469347463127283e-06, + "loss": 1.8222, + "step": 17579000 + }, + { + "epoch": 87.09, + "learning_rate": 6.468108876701198e-06, + "loss": 1.8244, + "step": 17579500 + }, + { + "epoch": 87.1, + "learning_rate": 6.466870290275115e-06, + "loss": 1.8077, + "step": 17580000 + }, + { + "epoch": 87.1, + "learning_rate": 6.465631703849032e-06, + "loss": 1.8288, + "step": 17580500 + }, + { + "epoch": 87.1, + "learning_rate": 6.464393117422948e-06, + "loss": 1.8372, + "step": 17581000 + }, + { + "epoch": 87.1, + "learning_rate": 6.463154530996864e-06, + "loss": 1.7843, + "step": 17581500 + }, + { + "epoch": 87.11, + "learning_rate": 6.46191594457078e-06, + "loss": 1.8173, + "step": 17582000 + }, + { + "epoch": 87.11, + "learning_rate": 6.460677358144697e-06, + "loss": 1.8304, + "step": 17582500 + }, + { + "epoch": 87.11, + "learning_rate": 6.459438771718614e-06, + "loss": 1.8349, + "step": 17583000 + }, + { + "epoch": 87.11, + "learning_rate": 6.45820018529253e-06, + "loss": 1.842, + "step": 17583500 + }, + { + "epoch": 87.12, + "learning_rate": 6.456961598866445e-06, + "loss": 1.799, + "step": 17584000 + }, + { + "epoch": 87.12, + "learning_rate": 6.455723012440362e-06, + "loss": 1.829, + "step": 17584500 + }, + { + "epoch": 87.12, + "learning_rate": 6.454484426014279e-06, + "loss": 1.8211, + "step": 17585000 + }, + { + "epoch": 87.12, + "learning_rate": 6.453245839588196e-06, + "loss": 1.8474, + "step": 17585500 + }, + { + "epoch": 87.13, + "learning_rate": 6.452009730334964e-06, + "loss": 1.8251, + "step": 17586000 + }, + { + "epoch": 87.13, + "learning_rate": 6.450771143908881e-06, + "loss": 1.823, + "step": 17586500 + }, + { + "epoch": 87.13, + "learning_rate": 6.449535034655649e-06, + "loss": 1.835, + "step": 17587000 + }, + { + "epoch": 87.13, + "learning_rate": 6.448298925402417e-06, + "loss": 1.8291, + "step": 17587500 + }, + { + "epoch": 87.14, + "learning_rate": 6.447060338976334e-06, + "loss": 1.8377, + "step": 17588000 + }, + { + "epoch": 87.14, + "learning_rate": 6.445821752550251e-06, + "loss": 1.8135, + "step": 17588500 + }, + { + "epoch": 87.14, + "learning_rate": 6.444583166124166e-06, + "loss": 1.8175, + "step": 17589000 + }, + { + "epoch": 87.14, + "learning_rate": 6.443344579698082e-06, + "loss": 1.8296, + "step": 17589500 + }, + { + "epoch": 87.15, + "learning_rate": 6.442108470444851e-06, + "loss": 1.8379, + "step": 17590000 + }, + { + "epoch": 87.15, + "learning_rate": 6.440869884018767e-06, + "loss": 1.813, + "step": 17590500 + }, + { + "epoch": 87.15, + "learning_rate": 6.439633774765536e-06, + "loss": 1.8188, + "step": 17591000 + }, + { + "epoch": 87.15, + "learning_rate": 6.438395188339452e-06, + "loss": 1.8005, + "step": 17591500 + }, + { + "epoch": 87.16, + "learning_rate": 6.437156601913369e-06, + "loss": 1.8208, + "step": 17592000 + }, + { + "epoch": 87.16, + "learning_rate": 6.435918015487286e-06, + "loss": 1.8303, + "step": 17592500 + }, + { + "epoch": 87.16, + "learning_rate": 6.434679429061201e-06, + "loss": 1.8044, + "step": 17593000 + }, + { + "epoch": 87.16, + "learning_rate": 6.4334433198079706e-06, + "loss": 1.8231, + "step": 17593500 + }, + { + "epoch": 87.17, + "learning_rate": 6.432204733381886e-06, + "loss": 1.8019, + "step": 17594000 + }, + { + "epoch": 87.17, + "learning_rate": 6.430966146955803e-06, + "loss": 1.8388, + "step": 17594500 + }, + { + "epoch": 87.17, + "learning_rate": 6.429727560529719e-06, + "loss": 1.819, + "step": 17595000 + }, + { + "epoch": 87.17, + "learning_rate": 6.428488974103636e-06, + "loss": 1.8289, + "step": 17595500 + }, + { + "epoch": 87.18, + "learning_rate": 6.427250387677551e-06, + "loss": 1.8148, + "step": 17596000 + }, + { + "epoch": 87.18, + "learning_rate": 6.426011801251468e-06, + "loss": 1.8256, + "step": 17596500 + }, + { + "epoch": 87.18, + "learning_rate": 6.424773214825385e-06, + "loss": 1.8511, + "step": 17597000 + }, + { + "epoch": 87.18, + "learning_rate": 6.423534628399301e-06, + "loss": 1.816, + "step": 17597500 + }, + { + "epoch": 87.19, + "learning_rate": 6.422296041973216e-06, + "loss": 1.8207, + "step": 17598000 + }, + { + "epoch": 87.19, + "learning_rate": 6.421057455547133e-06, + "loss": 1.8347, + "step": 17598500 + }, + { + "epoch": 87.19, + "learning_rate": 6.41981886912105e-06, + "loss": 1.801, + "step": 17599000 + }, + { + "epoch": 87.19, + "learning_rate": 6.418580282694966e-06, + "loss": 1.8267, + "step": 17599500 + }, + { + "epoch": 87.2, + "learning_rate": 6.417341696268883e-06, + "loss": 1.8041, + "step": 17600000 + }, + { + "epoch": 87.2, + "learning_rate": 6.416103109842798e-06, + "loss": 1.8295, + "step": 17600500 + }, + { + "epoch": 87.2, + "learning_rate": 6.414864523416715e-06, + "loss": 1.8313, + "step": 17601000 + }, + { + "epoch": 87.2, + "learning_rate": 6.413625936990632e-06, + "loss": 1.8113, + "step": 17601500 + }, + { + "epoch": 87.21, + "learning_rate": 6.412387350564548e-06, + "loss": 1.8283, + "step": 17602000 + }, + { + "epoch": 87.21, + "learning_rate": 6.411151241311317e-06, + "loss": 1.8286, + "step": 17602500 + }, + { + "epoch": 87.21, + "learning_rate": 6.409912654885233e-06, + "loss": 1.7925, + "step": 17603000 + }, + { + "epoch": 87.21, + "learning_rate": 6.408674068459148e-06, + "loss": 1.8376, + "step": 17603500 + }, + { + "epoch": 87.22, + "learning_rate": 6.407435482033065e-06, + "loss": 1.804, + "step": 17604000 + }, + { + "epoch": 87.22, + "learning_rate": 6.406199372779834e-06, + "loss": 1.8032, + "step": 17604500 + }, + { + "epoch": 87.22, + "learning_rate": 6.40496078635375e-06, + "loss": 1.8221, + "step": 17605000 + }, + { + "epoch": 87.22, + "learning_rate": 6.403722199927667e-06, + "loss": 1.8384, + "step": 17605500 + }, + { + "epoch": 87.23, + "learning_rate": 6.402483613501584e-06, + "loss": 1.8033, + "step": 17606000 + }, + { + "epoch": 87.23, + "learning_rate": 6.401245027075499e-06, + "loss": 1.8314, + "step": 17606500 + }, + { + "epoch": 87.23, + "learning_rate": 6.400006440649415e-06, + "loss": 1.8284, + "step": 17607000 + }, + { + "epoch": 87.23, + "learning_rate": 6.398767854223332e-06, + "loss": 1.8194, + "step": 17607500 + }, + { + "epoch": 87.24, + "learning_rate": 6.3975317449701e-06, + "loss": 1.8365, + "step": 17608000 + }, + { + "epoch": 87.24, + "learning_rate": 6.396295635716869e-06, + "loss": 1.8224, + "step": 17608500 + }, + { + "epoch": 87.24, + "learning_rate": 6.395057049290785e-06, + "loss": 1.8385, + "step": 17609000 + }, + { + "epoch": 87.24, + "learning_rate": 6.393818462864702e-06, + "loss": 1.8077, + "step": 17609500 + }, + { + "epoch": 87.25, + "learning_rate": 6.392579876438619e-06, + "loss": 1.824, + "step": 17610000 + }, + { + "epoch": 87.25, + "learning_rate": 6.391341290012534e-06, + "loss": 1.8316, + "step": 17610500 + }, + { + "epoch": 87.25, + "learning_rate": 6.390102703586451e-06, + "loss": 1.8236, + "step": 17611000 + }, + { + "epoch": 87.25, + "learning_rate": 6.388864117160367e-06, + "loss": 1.8461, + "step": 17611500 + }, + { + "epoch": 87.26, + "learning_rate": 6.387625530734284e-06, + "loss": 1.8001, + "step": 17612000 + }, + { + "epoch": 87.26, + "learning_rate": 6.386389421481052e-06, + "loss": 1.8245, + "step": 17612500 + }, + { + "epoch": 87.26, + "learning_rate": 6.385150835054969e-06, + "loss": 1.816, + "step": 17613000 + }, + { + "epoch": 87.26, + "learning_rate": 6.383912248628886e-06, + "loss": 1.8167, + "step": 17613500 + }, + { + "epoch": 87.27, + "learning_rate": 6.382673662202801e-06, + "loss": 1.8098, + "step": 17614000 + }, + { + "epoch": 87.27, + "learning_rate": 6.381435075776718e-06, + "loss": 1.8224, + "step": 17614500 + }, + { + "epoch": 87.27, + "learning_rate": 6.380198966523486e-06, + "loss": 1.805, + "step": 17615000 + }, + { + "epoch": 87.27, + "learning_rate": 6.378960380097403e-06, + "loss": 1.8372, + "step": 17615500 + }, + { + "epoch": 87.28, + "learning_rate": 6.377721793671319e-06, + "loss": 1.8106, + "step": 17616000 + }, + { + "epoch": 87.28, + "learning_rate": 6.376483207245236e-06, + "loss": 1.8319, + "step": 17616500 + }, + { + "epoch": 87.28, + "learning_rate": 6.375244620819151e-06, + "loss": 1.8221, + "step": 17617000 + }, + { + "epoch": 87.28, + "learning_rate": 6.374006034393068e-06, + "loss": 1.8322, + "step": 17617500 + }, + { + "epoch": 87.29, + "learning_rate": 6.372767447966985e-06, + "loss": 1.8447, + "step": 17618000 + }, + { + "epoch": 87.29, + "learning_rate": 6.371528861540901e-06, + "loss": 1.8341, + "step": 17618500 + }, + { + "epoch": 87.29, + "learning_rate": 6.370290275114816e-06, + "loss": 1.7999, + "step": 17619000 + }, + { + "epoch": 87.29, + "learning_rate": 6.369054165861586e-06, + "loss": 1.8207, + "step": 17619500 + }, + { + "epoch": 87.3, + "learning_rate": 6.367815579435501e-06, + "loss": 1.7923, + "step": 17620000 + }, + { + "epoch": 87.3, + "learning_rate": 6.366576993009418e-06, + "loss": 1.7994, + "step": 17620500 + }, + { + "epoch": 87.3, + "learning_rate": 6.365340883756186e-06, + "loss": 1.8073, + "step": 17621000 + }, + { + "epoch": 87.3, + "learning_rate": 6.364102297330103e-06, + "loss": 1.8268, + "step": 17621500 + }, + { + "epoch": 87.31, + "learning_rate": 6.36286371090402e-06, + "loss": 1.815, + "step": 17622000 + }, + { + "epoch": 87.31, + "learning_rate": 6.361625124477936e-06, + "loss": 1.8005, + "step": 17622500 + }, + { + "epoch": 87.31, + "learning_rate": 6.360386538051852e-06, + "loss": 1.8321, + "step": 17623000 + }, + { + "epoch": 87.31, + "learning_rate": 6.359150428798622e-06, + "loss": 1.8417, + "step": 17623500 + }, + { + "epoch": 87.32, + "learning_rate": 6.357911842372537e-06, + "loss": 1.8456, + "step": 17624000 + }, + { + "epoch": 87.32, + "learning_rate": 6.356673255946453e-06, + "loss": 1.8491, + "step": 17624500 + }, + { + "epoch": 87.32, + "learning_rate": 6.35543466952037e-06, + "loss": 1.8234, + "step": 17625000 + }, + { + "epoch": 87.32, + "learning_rate": 6.354196083094287e-06, + "loss": 1.8194, + "step": 17625500 + }, + { + "epoch": 87.33, + "learning_rate": 6.352957496668203e-06, + "loss": 1.8501, + "step": 17626000 + }, + { + "epoch": 87.33, + "learning_rate": 6.351718910242118e-06, + "loss": 1.8387, + "step": 17626500 + }, + { + "epoch": 87.33, + "learning_rate": 6.350480323816035e-06, + "loss": 1.8361, + "step": 17627000 + }, + { + "epoch": 87.33, + "learning_rate": 6.349244214562804e-06, + "loss": 1.8276, + "step": 17627500 + }, + { + "epoch": 87.34, + "learning_rate": 6.348008105309572e-06, + "loss": 1.8277, + "step": 17628000 + }, + { + "epoch": 87.34, + "learning_rate": 6.346769518883489e-06, + "loss": 1.827, + "step": 17628500 + }, + { + "epoch": 87.34, + "learning_rate": 6.345530932457405e-06, + "loss": 1.8312, + "step": 17629000 + }, + { + "epoch": 87.34, + "learning_rate": 6.344292346031322e-06, + "loss": 1.8352, + "step": 17629500 + }, + { + "epoch": 87.35, + "learning_rate": 6.343053759605239e-06, + "loss": 1.8213, + "step": 17630000 + }, + { + "epoch": 87.35, + "learning_rate": 6.341815173179154e-06, + "loss": 1.809, + "step": 17630500 + }, + { + "epoch": 87.35, + "learning_rate": 6.34057658675307e-06, + "loss": 1.8297, + "step": 17631000 + }, + { + "epoch": 87.35, + "learning_rate": 6.339338000326987e-06, + "loss": 1.8297, + "step": 17631500 + }, + { + "epoch": 87.36, + "learning_rate": 6.338099413900904e-06, + "loss": 1.7992, + "step": 17632000 + }, + { + "epoch": 87.36, + "learning_rate": 6.336863304647672e-06, + "loss": 1.8039, + "step": 17632500 + }, + { + "epoch": 87.36, + "learning_rate": 6.335624718221589e-06, + "loss": 1.8383, + "step": 17633000 + }, + { + "epoch": 87.36, + "learning_rate": 6.334386131795504e-06, + "loss": 1.8289, + "step": 17633500 + }, + { + "epoch": 87.36, + "learning_rate": 6.333150022542274e-06, + "loss": 1.8353, + "step": 17634000 + }, + { + "epoch": 87.37, + "learning_rate": 6.331913913289042e-06, + "loss": 1.8247, + "step": 17634500 + }, + { + "epoch": 87.37, + "learning_rate": 6.330675326862959e-06, + "loss": 1.837, + "step": 17635000 + }, + { + "epoch": 87.37, + "learning_rate": 6.329436740436874e-06, + "loss": 1.8467, + "step": 17635500 + }, + { + "epoch": 87.37, + "learning_rate": 6.328198154010791e-06, + "loss": 1.7947, + "step": 17636000 + }, + { + "epoch": 87.38, + "learning_rate": 6.326959567584707e-06, + "loss": 1.8255, + "step": 17636500 + }, + { + "epoch": 87.38, + "learning_rate": 6.325720981158624e-06, + "loss": 1.8383, + "step": 17637000 + }, + { + "epoch": 87.38, + "learning_rate": 6.324482394732539e-06, + "loss": 1.8189, + "step": 17637500 + }, + { + "epoch": 87.38, + "learning_rate": 6.323243808306456e-06, + "loss": 1.8532, + "step": 17638000 + }, + { + "epoch": 87.39, + "learning_rate": 6.322007699053224e-06, + "loss": 1.8091, + "step": 17638500 + }, + { + "epoch": 87.39, + "learning_rate": 6.320769112627141e-06, + "loss": 1.7903, + "step": 17639000 + }, + { + "epoch": 87.39, + "learning_rate": 6.319530526201058e-06, + "loss": 1.8528, + "step": 17639500 + }, + { + "epoch": 87.39, + "learning_rate": 6.318291939774974e-06, + "loss": 1.8381, + "step": 17640000 + }, + { + "epoch": 87.4, + "learning_rate": 6.317055830521743e-06, + "loss": 1.8335, + "step": 17640500 + }, + { + "epoch": 87.4, + "learning_rate": 6.315817244095659e-06, + "loss": 1.8289, + "step": 17641000 + }, + { + "epoch": 87.4, + "learning_rate": 6.314578657669575e-06, + "loss": 1.8317, + "step": 17641500 + }, + { + "epoch": 87.4, + "learning_rate": 6.313340071243491e-06, + "loss": 1.8237, + "step": 17642000 + }, + { + "epoch": 87.41, + "learning_rate": 6.312101484817408e-06, + "loss": 1.7973, + "step": 17642500 + }, + { + "epoch": 87.41, + "learning_rate": 6.310862898391325e-06, + "loss": 1.8419, + "step": 17643000 + }, + { + "epoch": 87.41, + "learning_rate": 6.309624311965241e-06, + "loss": 1.8277, + "step": 17643500 + }, + { + "epoch": 87.41, + "learning_rate": 6.30838820271201e-06, + "loss": 1.8553, + "step": 17644000 + }, + { + "epoch": 87.42, + "learning_rate": 6.307149616285926e-06, + "loss": 1.8369, + "step": 17644500 + }, + { + "epoch": 87.42, + "learning_rate": 6.305911029859841e-06, + "loss": 1.841, + "step": 17645000 + }, + { + "epoch": 87.42, + "learning_rate": 6.304672443433758e-06, + "loss": 1.8297, + "step": 17645500 + }, + { + "epoch": 87.42, + "learning_rate": 6.303433857007675e-06, + "loss": 1.8384, + "step": 17646000 + }, + { + "epoch": 87.43, + "learning_rate": 6.302195270581592e-06, + "loss": 1.8188, + "step": 17646500 + }, + { + "epoch": 87.43, + "learning_rate": 6.30095916132836e-06, + "loss": 1.8157, + "step": 17647000 + }, + { + "epoch": 87.43, + "learning_rate": 6.299720574902277e-06, + "loss": 1.8075, + "step": 17647500 + }, + { + "epoch": 87.43, + "learning_rate": 6.298481988476192e-06, + "loss": 1.8276, + "step": 17648000 + }, + { + "epoch": 87.44, + "learning_rate": 6.297243402050108e-06, + "loss": 1.8364, + "step": 17648500 + }, + { + "epoch": 87.44, + "learning_rate": 6.296004815624025e-06, + "loss": 1.8179, + "step": 17649000 + }, + { + "epoch": 87.44, + "learning_rate": 6.294766229197942e-06, + "loss": 1.8319, + "step": 17649500 + }, + { + "epoch": 87.44, + "learning_rate": 6.293527642771857e-06, + "loss": 1.8431, + "step": 17650000 + }, + { + "epoch": 87.45, + "learning_rate": 6.292291533518627e-06, + "loss": 1.8237, + "step": 17650500 + }, + { + "epoch": 87.45, + "learning_rate": 6.291055424265395e-06, + "loss": 1.8352, + "step": 17651000 + }, + { + "epoch": 87.45, + "learning_rate": 6.289816837839312e-06, + "loss": 1.8184, + "step": 17651500 + }, + { + "epoch": 87.45, + "learning_rate": 6.288578251413227e-06, + "loss": 1.805, + "step": 17652000 + }, + { + "epoch": 87.46, + "learning_rate": 6.287339664987144e-06, + "loss": 1.8244, + "step": 17652500 + }, + { + "epoch": 87.46, + "learning_rate": 6.28610107856106e-06, + "loss": 1.8406, + "step": 17653000 + }, + { + "epoch": 87.46, + "learning_rate": 6.284862492134977e-06, + "loss": 1.8162, + "step": 17653500 + }, + { + "epoch": 87.46, + "learning_rate": 6.283623905708892e-06, + "loss": 1.8188, + "step": 17654000 + }, + { + "epoch": 87.47, + "learning_rate": 6.282385319282809e-06, + "loss": 1.8354, + "step": 17654500 + }, + { + "epoch": 87.47, + "learning_rate": 6.281146732856726e-06, + "loss": 1.82, + "step": 17655000 + }, + { + "epoch": 87.47, + "learning_rate": 6.279908146430642e-06, + "loss": 1.8203, + "step": 17655500 + }, + { + "epoch": 87.47, + "learning_rate": 6.278669560004559e-06, + "loss": 1.8182, + "step": 17656000 + }, + { + "epoch": 87.48, + "learning_rate": 6.277430973578474e-06, + "loss": 1.8306, + "step": 17656500 + }, + { + "epoch": 87.48, + "learning_rate": 6.276192387152391e-06, + "loss": 1.8369, + "step": 17657000 + }, + { + "epoch": 87.48, + "learning_rate": 6.274953800726307e-06, + "loss": 1.8211, + "step": 17657500 + }, + { + "epoch": 87.48, + "learning_rate": 6.273715214300224e-06, + "loss": 1.8152, + "step": 17658000 + }, + { + "epoch": 87.49, + "learning_rate": 6.272476627874139e-06, + "loss": 1.8045, + "step": 17658500 + }, + { + "epoch": 87.49, + "learning_rate": 6.271238041448056e-06, + "loss": 1.8145, + "step": 17659000 + }, + { + "epoch": 87.49, + "learning_rate": 6.269999455021973e-06, + "loss": 1.8145, + "step": 17659500 + }, + { + "epoch": 87.49, + "learning_rate": 6.268763345768741e-06, + "loss": 1.8164, + "step": 17660000 + }, + { + "epoch": 87.5, + "learning_rate": 6.267524759342658e-06, + "loss": 1.8566, + "step": 17660500 + }, + { + "epoch": 87.5, + "learning_rate": 6.266286172916574e-06, + "loss": 1.8144, + "step": 17661000 + }, + { + "epoch": 87.5, + "learning_rate": 6.2650475864904894e-06, + "loss": 1.855, + "step": 17661500 + }, + { + "epoch": 87.5, + "learning_rate": 6.263811477237259e-06, + "loss": 1.8113, + "step": 17662000 + }, + { + "epoch": 87.51, + "learning_rate": 6.262572890811174e-06, + "loss": 1.827, + "step": 17662500 + }, + { + "epoch": 87.51, + "learning_rate": 6.261334304385091e-06, + "loss": 1.811, + "step": 17663000 + }, + { + "epoch": 87.51, + "learning_rate": 6.260095717959008e-06, + "loss": 1.8157, + "step": 17663500 + }, + { + "epoch": 87.51, + "learning_rate": 6.258859608705776e-06, + "loss": 1.8394, + "step": 17664000 + }, + { + "epoch": 87.52, + "learning_rate": 6.257623499452545e-06, + "loss": 1.8479, + "step": 17664500 + }, + { + "epoch": 87.52, + "learning_rate": 6.256384913026461e-06, + "loss": 1.8138, + "step": 17665000 + }, + { + "epoch": 87.52, + "learning_rate": 6.255146326600378e-06, + "loss": 1.8185, + "step": 17665500 + }, + { + "epoch": 87.52, + "learning_rate": 6.253907740174295e-06, + "loss": 1.8483, + "step": 17666000 + }, + { + "epoch": 87.53, + "learning_rate": 6.25266915374821e-06, + "loss": 1.8258, + "step": 17666500 + }, + { + "epoch": 87.53, + "learning_rate": 6.251430567322126e-06, + "loss": 1.8305, + "step": 17667000 + }, + { + "epoch": 87.53, + "learning_rate": 6.250191980896043e-06, + "loss": 1.8099, + "step": 17667500 + }, + { + "epoch": 87.53, + "learning_rate": 6.24895339446996e-06, + "loss": 1.8516, + "step": 17668000 + }, + { + "epoch": 87.54, + "learning_rate": 6.247717285216728e-06, + "loss": 1.8094, + "step": 17668500 + }, + { + "epoch": 87.54, + "learning_rate": 6.246478698790645e-06, + "loss": 1.8285, + "step": 17669000 + }, + { + "epoch": 87.54, + "learning_rate": 6.245242589537413e-06, + "loss": 1.8336, + "step": 17669500 + }, + { + "epoch": 87.54, + "learning_rate": 6.244004003111329e-06, + "loss": 1.8099, + "step": 17670000 + }, + { + "epoch": 87.55, + "learning_rate": 6.242765416685246e-06, + "loss": 1.8227, + "step": 17670500 + }, + { + "epoch": 87.55, + "learning_rate": 6.241526830259162e-06, + "loss": 1.8264, + "step": 17671000 + }, + { + "epoch": 87.55, + "learning_rate": 6.240290721005931e-06, + "loss": 1.8328, + "step": 17671500 + }, + { + "epoch": 87.55, + "learning_rate": 6.239052134579848e-06, + "loss": 1.8252, + "step": 17672000 + }, + { + "epoch": 87.56, + "learning_rate": 6.237813548153763e-06, + "loss": 1.8154, + "step": 17672500 + }, + { + "epoch": 87.56, + "learning_rate": 6.23657496172768e-06, + "loss": 1.8381, + "step": 17673000 + }, + { + "epoch": 87.56, + "learning_rate": 6.235336375301596e-06, + "loss": 1.8492, + "step": 17673500 + }, + { + "epoch": 87.56, + "learning_rate": 6.234097788875513e-06, + "loss": 1.8323, + "step": 17674000 + }, + { + "epoch": 87.57, + "learning_rate": 6.232859202449429e-06, + "loss": 1.8543, + "step": 17674500 + }, + { + "epoch": 87.57, + "learning_rate": 6.231620616023345e-06, + "loss": 1.8258, + "step": 17675000 + }, + { + "epoch": 87.57, + "learning_rate": 6.230382029597261e-06, + "loss": 1.8496, + "step": 17675500 + }, + { + "epoch": 87.57, + "learning_rate": 6.229143443171178e-06, + "loss": 1.8106, + "step": 17676000 + }, + { + "epoch": 87.58, + "learning_rate": 6.227904856745094e-06, + "loss": 1.8108, + "step": 17676500 + }, + { + "epoch": 87.58, + "learning_rate": 6.22666627031901e-06, + "loss": 1.8448, + "step": 17677000 + }, + { + "epoch": 87.58, + "learning_rate": 6.225427683892926e-06, + "loss": 1.8243, + "step": 17677500 + }, + { + "epoch": 87.58, + "learning_rate": 6.224189097466843e-06, + "loss": 1.8525, + "step": 17678000 + }, + { + "epoch": 87.59, + "learning_rate": 6.222955465386464e-06, + "loss": 1.8224, + "step": 17678500 + }, + { + "epoch": 87.59, + "learning_rate": 6.221716878960381e-06, + "loss": 1.824, + "step": 17679000 + }, + { + "epoch": 87.59, + "learning_rate": 6.220478292534296e-06, + "loss": 1.807, + "step": 17679500 + }, + { + "epoch": 87.59, + "learning_rate": 6.219239706108213e-06, + "loss": 1.8383, + "step": 17680000 + }, + { + "epoch": 87.6, + "learning_rate": 6.218001119682129e-06, + "loss": 1.8336, + "step": 17680500 + }, + { + "epoch": 87.6, + "learning_rate": 6.216762533256046e-06, + "loss": 1.8166, + "step": 17681000 + }, + { + "epoch": 87.6, + "learning_rate": 6.215523946829962e-06, + "loss": 1.8116, + "step": 17681500 + }, + { + "epoch": 87.6, + "learning_rate": 6.214285360403878e-06, + "loss": 1.8015, + "step": 17682000 + }, + { + "epoch": 87.61, + "learning_rate": 6.213046773977795e-06, + "loss": 1.8086, + "step": 17682500 + }, + { + "epoch": 87.61, + "learning_rate": 6.211808187551711e-06, + "loss": 1.8385, + "step": 17683000 + }, + { + "epoch": 87.61, + "learning_rate": 6.21057207829848e-06, + "loss": 1.8326, + "step": 17683500 + }, + { + "epoch": 87.61, + "learning_rate": 6.209333491872396e-06, + "loss": 1.8071, + "step": 17684000 + }, + { + "epoch": 87.62, + "learning_rate": 6.208097382619165e-06, + "loss": 1.8232, + "step": 17684500 + }, + { + "epoch": 87.62, + "learning_rate": 6.206858796193081e-06, + "loss": 1.8327, + "step": 17685000 + }, + { + "epoch": 87.62, + "learning_rate": 6.205620209766998e-06, + "loss": 1.8176, + "step": 17685500 + }, + { + "epoch": 87.62, + "learning_rate": 6.204381623340914e-06, + "loss": 1.8322, + "step": 17686000 + }, + { + "epoch": 87.63, + "learning_rate": 6.20314303691483e-06, + "loss": 1.8365, + "step": 17686500 + }, + { + "epoch": 87.63, + "learning_rate": 6.201906927661599e-06, + "loss": 1.8139, + "step": 17687000 + }, + { + "epoch": 87.63, + "learning_rate": 6.200668341235515e-06, + "loss": 1.8421, + "step": 17687500 + }, + { + "epoch": 87.63, + "learning_rate": 6.199429754809431e-06, + "loss": 1.827, + "step": 17688000 + }, + { + "epoch": 87.63, + "learning_rate": 6.198191168383348e-06, + "loss": 1.8177, + "step": 17688500 + }, + { + "epoch": 87.64, + "learning_rate": 6.196952581957264e-06, + "loss": 1.8175, + "step": 17689000 + }, + { + "epoch": 87.64, + "learning_rate": 6.195716472704033e-06, + "loss": 1.8141, + "step": 17689500 + }, + { + "epoch": 87.64, + "learning_rate": 6.194477886277949e-06, + "loss": 1.851, + "step": 17690000 + }, + { + "epoch": 87.64, + "learning_rate": 6.193239299851866e-06, + "loss": 1.8221, + "step": 17690500 + }, + { + "epoch": 87.65, + "learning_rate": 6.192000713425781e-06, + "loss": 1.8374, + "step": 17691000 + }, + { + "epoch": 87.65, + "learning_rate": 6.190764604172551e-06, + "loss": 1.8158, + "step": 17691500 + }, + { + "epoch": 87.65, + "learning_rate": 6.189526017746467e-06, + "loss": 1.8315, + "step": 17692000 + }, + { + "epoch": 87.65, + "learning_rate": 6.188287431320383e-06, + "loss": 1.8254, + "step": 17692500 + }, + { + "epoch": 87.66, + "learning_rate": 6.187048844894299e-06, + "loss": 1.8333, + "step": 17693000 + }, + { + "epoch": 87.66, + "learning_rate": 6.185812735641068e-06, + "loss": 1.8367, + "step": 17693500 + }, + { + "epoch": 87.66, + "learning_rate": 6.184576626387837e-06, + "loss": 1.8256, + "step": 17694000 + }, + { + "epoch": 87.66, + "learning_rate": 6.183338039961753e-06, + "loss": 1.8391, + "step": 17694500 + }, + { + "epoch": 87.67, + "learning_rate": 6.182099453535669e-06, + "loss": 1.8085, + "step": 17695000 + }, + { + "epoch": 87.67, + "learning_rate": 6.180860867109586e-06, + "loss": 1.8249, + "step": 17695500 + }, + { + "epoch": 87.67, + "learning_rate": 6.179622280683502e-06, + "loss": 1.8194, + "step": 17696000 + }, + { + "epoch": 87.67, + "learning_rate": 6.178383694257418e-06, + "loss": 1.8082, + "step": 17696500 + }, + { + "epoch": 87.68, + "learning_rate": 6.177145107831334e-06, + "loss": 1.8342, + "step": 17697000 + }, + { + "epoch": 87.68, + "learning_rate": 6.175906521405251e-06, + "loss": 1.8154, + "step": 17697500 + }, + { + "epoch": 87.68, + "learning_rate": 6.174667934979167e-06, + "loss": 1.8239, + "step": 17698000 + }, + { + "epoch": 87.68, + "learning_rate": 6.173429348553084e-06, + "loss": 1.8173, + "step": 17698500 + }, + { + "epoch": 87.69, + "learning_rate": 6.172190762127e-06, + "loss": 1.8309, + "step": 17699000 + }, + { + "epoch": 87.69, + "learning_rate": 6.170952175700916e-06, + "loss": 1.8201, + "step": 17699500 + }, + { + "epoch": 87.69, + "learning_rate": 6.169713589274833e-06, + "loss": 1.8274, + "step": 17700000 + }, + { + "epoch": 87.69, + "learning_rate": 6.168477480021601e-06, + "loss": 1.8052, + "step": 17700500 + }, + { + "epoch": 87.7, + "learning_rate": 6.167238893595518e-06, + "loss": 1.8383, + "step": 17701000 + }, + { + "epoch": 87.7, + "learning_rate": 6.166000307169434e-06, + "loss": 1.8322, + "step": 17701500 + }, + { + "epoch": 87.7, + "learning_rate": 6.164761720743351e-06, + "loss": 1.8445, + "step": 17702000 + }, + { + "epoch": 87.7, + "learning_rate": 6.163523134317266e-06, + "loss": 1.8536, + "step": 17702500 + }, + { + "epoch": 87.71, + "learning_rate": 6.162284547891183e-06, + "loss": 1.819, + "step": 17703000 + }, + { + "epoch": 87.71, + "learning_rate": 6.161045961465099e-06, + "loss": 1.8164, + "step": 17703500 + }, + { + "epoch": 87.71, + "learning_rate": 6.159809852211868e-06, + "loss": 1.8247, + "step": 17704000 + }, + { + "epoch": 87.71, + "learning_rate": 6.158571265785784e-06, + "loss": 1.8331, + "step": 17704500 + }, + { + "epoch": 87.72, + "learning_rate": 6.157335156532553e-06, + "loss": 1.8169, + "step": 17705000 + }, + { + "epoch": 87.72, + "learning_rate": 6.156096570106469e-06, + "loss": 1.8158, + "step": 17705500 + }, + { + "epoch": 87.72, + "learning_rate": 6.154857983680386e-06, + "loss": 1.8217, + "step": 17706000 + }, + { + "epoch": 87.72, + "learning_rate": 6.153619397254302e-06, + "loss": 1.8286, + "step": 17706500 + }, + { + "epoch": 87.73, + "learning_rate": 6.152380810828218e-06, + "loss": 1.8158, + "step": 17707000 + }, + { + "epoch": 87.73, + "learning_rate": 6.151142224402134e-06, + "loss": 1.8172, + "step": 17707500 + }, + { + "epoch": 87.73, + "learning_rate": 6.149903637976051e-06, + "loss": 1.8198, + "step": 17708000 + }, + { + "epoch": 87.73, + "learning_rate": 6.148665051549967e-06, + "loss": 1.8277, + "step": 17708500 + }, + { + "epoch": 87.74, + "learning_rate": 6.147428942296736e-06, + "loss": 1.8256, + "step": 17709000 + }, + { + "epoch": 87.74, + "learning_rate": 6.146190355870652e-06, + "loss": 1.8526, + "step": 17709500 + }, + { + "epoch": 87.74, + "learning_rate": 6.144951769444569e-06, + "loss": 1.8437, + "step": 17710000 + }, + { + "epoch": 87.74, + "learning_rate": 6.143713183018485e-06, + "loss": 1.8341, + "step": 17710500 + }, + { + "epoch": 87.75, + "learning_rate": 6.142477073765254e-06, + "loss": 1.8153, + "step": 17711000 + }, + { + "epoch": 87.75, + "learning_rate": 6.14123848733917e-06, + "loss": 1.8296, + "step": 17711500 + }, + { + "epoch": 87.75, + "learning_rate": 6.139999900913086e-06, + "loss": 1.8123, + "step": 17712000 + }, + { + "epoch": 87.75, + "learning_rate": 6.138761314487002e-06, + "loss": 1.806, + "step": 17712500 + }, + { + "epoch": 87.76, + "learning_rate": 6.137525205233771e-06, + "loss": 1.8173, + "step": 17713000 + }, + { + "epoch": 87.76, + "learning_rate": 6.136286618807687e-06, + "loss": 1.8323, + "step": 17713500 + }, + { + "epoch": 87.76, + "learning_rate": 6.135048032381604e-06, + "loss": 1.8238, + "step": 17714000 + }, + { + "epoch": 87.76, + "learning_rate": 6.133809445955521e-06, + "loss": 1.8354, + "step": 17714500 + }, + { + "epoch": 87.77, + "learning_rate": 6.132570859529437e-06, + "loss": 1.8501, + "step": 17715000 + }, + { + "epoch": 87.77, + "learning_rate": 6.131334750276205e-06, + "loss": 1.8207, + "step": 17715500 + }, + { + "epoch": 87.77, + "learning_rate": 6.130096163850122e-06, + "loss": 1.8171, + "step": 17716000 + }, + { + "epoch": 87.77, + "learning_rate": 6.128857577424038e-06, + "loss": 1.8354, + "step": 17716500 + }, + { + "epoch": 87.78, + "learning_rate": 6.127618990997954e-06, + "loss": 1.8212, + "step": 17717000 + }, + { + "epoch": 87.78, + "learning_rate": 6.126380404571871e-06, + "loss": 1.8249, + "step": 17717500 + }, + { + "epoch": 87.78, + "learning_rate": 6.125141818145787e-06, + "loss": 1.8251, + "step": 17718000 + }, + { + "epoch": 87.78, + "learning_rate": 6.123903231719703e-06, + "loss": 1.8133, + "step": 17718500 + }, + { + "epoch": 87.79, + "learning_rate": 6.122664645293619e-06, + "loss": 1.8403, + "step": 17719000 + }, + { + "epoch": 87.79, + "learning_rate": 6.121426058867536e-06, + "loss": 1.8308, + "step": 17719500 + }, + { + "epoch": 87.79, + "learning_rate": 6.120189949614304e-06, + "loss": 1.8157, + "step": 17720000 + }, + { + "epoch": 87.79, + "learning_rate": 6.118951363188221e-06, + "loss": 1.8299, + "step": 17720500 + }, + { + "epoch": 87.8, + "learning_rate": 6.117712776762137e-06, + "loss": 1.797, + "step": 17721000 + }, + { + "epoch": 87.8, + "learning_rate": 6.116474190336054e-06, + "loss": 1.848, + "step": 17721500 + }, + { + "epoch": 87.8, + "learning_rate": 6.115238081082822e-06, + "loss": 1.8249, + "step": 17722000 + }, + { + "epoch": 87.8, + "learning_rate": 6.113999494656739e-06, + "loss": 1.8512, + "step": 17722500 + }, + { + "epoch": 87.81, + "learning_rate": 6.112760908230655e-06, + "loss": 1.822, + "step": 17723000 + }, + { + "epoch": 87.81, + "learning_rate": 6.111522321804571e-06, + "loss": 1.8389, + "step": 17723500 + }, + { + "epoch": 87.81, + "learning_rate": 6.110283735378487e-06, + "loss": 1.8311, + "step": 17724000 + }, + { + "epoch": 87.81, + "learning_rate": 6.109045148952404e-06, + "loss": 1.824, + "step": 17724500 + }, + { + "epoch": 87.82, + "learning_rate": 6.10780656252632e-06, + "loss": 1.8395, + "step": 17725000 + }, + { + "epoch": 87.82, + "learning_rate": 6.106570453273089e-06, + "loss": 1.8528, + "step": 17725500 + }, + { + "epoch": 87.82, + "learning_rate": 6.105331866847005e-06, + "loss": 1.8619, + "step": 17726000 + }, + { + "epoch": 87.82, + "learning_rate": 6.104093280420922e-06, + "loss": 1.8332, + "step": 17726500 + }, + { + "epoch": 87.83, + "learning_rate": 6.102854693994838e-06, + "loss": 1.8256, + "step": 17727000 + }, + { + "epoch": 87.83, + "learning_rate": 6.101616107568754e-06, + "loss": 1.8151, + "step": 17727500 + }, + { + "epoch": 87.83, + "learning_rate": 6.100377521142671e-06, + "loss": 1.8205, + "step": 17728000 + }, + { + "epoch": 87.83, + "learning_rate": 6.099138934716587e-06, + "loss": 1.825, + "step": 17728500 + }, + { + "epoch": 87.84, + "learning_rate": 6.097902825463356e-06, + "loss": 1.8067, + "step": 17729000 + }, + { + "epoch": 87.84, + "learning_rate": 6.096666716210124e-06, + "loss": 1.8157, + "step": 17729500 + }, + { + "epoch": 87.84, + "learning_rate": 6.095430606956893e-06, + "loss": 1.8216, + "step": 17730000 + }, + { + "epoch": 87.84, + "learning_rate": 6.094192020530809e-06, + "loss": 1.8084, + "step": 17730500 + }, + { + "epoch": 87.85, + "learning_rate": 6.092953434104725e-06, + "loss": 1.831, + "step": 17731000 + }, + { + "epoch": 87.85, + "learning_rate": 6.091714847678642e-06, + "loss": 1.8271, + "step": 17731500 + }, + { + "epoch": 87.85, + "learning_rate": 6.09047873842541e-06, + "loss": 1.8161, + "step": 17732000 + }, + { + "epoch": 87.85, + "learning_rate": 6.089240151999327e-06, + "loss": 1.8319, + "step": 17732500 + }, + { + "epoch": 87.86, + "learning_rate": 6.088001565573243e-06, + "loss": 1.838, + "step": 17733000 + }, + { + "epoch": 87.86, + "learning_rate": 6.086762979147159e-06, + "loss": 1.8328, + "step": 17733500 + }, + { + "epoch": 87.86, + "learning_rate": 6.085524392721076e-06, + "loss": 1.8399, + "step": 17734000 + }, + { + "epoch": 87.86, + "learning_rate": 6.084285806294992e-06, + "loss": 1.8016, + "step": 17734500 + }, + { + "epoch": 87.87, + "learning_rate": 6.083047219868909e-06, + "loss": 1.835, + "step": 17735000 + }, + { + "epoch": 87.87, + "learning_rate": 6.081808633442825e-06, + "loss": 1.8163, + "step": 17735500 + }, + { + "epoch": 87.87, + "learning_rate": 6.080570047016741e-06, + "loss": 1.8132, + "step": 17736000 + }, + { + "epoch": 87.87, + "learning_rate": 6.079331460590657e-06, + "loss": 1.8062, + "step": 17736500 + }, + { + "epoch": 87.88, + "learning_rate": 6.078095351337426e-06, + "loss": 1.8229, + "step": 17737000 + }, + { + "epoch": 87.88, + "learning_rate": 6.076856764911342e-06, + "loss": 1.8141, + "step": 17737500 + }, + { + "epoch": 87.88, + "learning_rate": 6.075618178485259e-06, + "loss": 1.8311, + "step": 17738000 + }, + { + "epoch": 87.88, + "learning_rate": 6.074379592059175e-06, + "loss": 1.8244, + "step": 17738500 + }, + { + "epoch": 87.89, + "learning_rate": 6.073141005633092e-06, + "loss": 1.8049, + "step": 17739000 + }, + { + "epoch": 87.89, + "learning_rate": 6.071902419207007e-06, + "loss": 1.8254, + "step": 17739500 + }, + { + "epoch": 87.89, + "learning_rate": 6.070663832780924e-06, + "loss": 1.8266, + "step": 17740000 + }, + { + "epoch": 87.89, + "learning_rate": 6.06942524635484e-06, + "loss": 1.8471, + "step": 17740500 + }, + { + "epoch": 87.9, + "learning_rate": 6.068189137101609e-06, + "loss": 1.8245, + "step": 17741000 + }, + { + "epoch": 87.9, + "learning_rate": 6.066950550675525e-06, + "loss": 1.8169, + "step": 17741500 + }, + { + "epoch": 87.9, + "learning_rate": 6.065711964249442e-06, + "loss": 1.8129, + "step": 17742000 + }, + { + "epoch": 87.9, + "learning_rate": 6.064473377823359e-06, + "loss": 1.828, + "step": 17742500 + }, + { + "epoch": 87.9, + "learning_rate": 6.063234791397274e-06, + "loss": 1.8073, + "step": 17743000 + }, + { + "epoch": 87.91, + "learning_rate": 6.061996204971191e-06, + "loss": 1.812, + "step": 17743500 + }, + { + "epoch": 87.91, + "learning_rate": 6.060757618545107e-06, + "loss": 1.8256, + "step": 17744000 + }, + { + "epoch": 87.91, + "learning_rate": 6.059519032119024e-06, + "loss": 1.839, + "step": 17744500 + }, + { + "epoch": 87.91, + "learning_rate": 6.05828044569294e-06, + "loss": 1.8202, + "step": 17745000 + }, + { + "epoch": 87.92, + "learning_rate": 6.05704681361256e-06, + "loss": 1.839, + "step": 17745500 + }, + { + "epoch": 87.92, + "learning_rate": 6.055808227186477e-06, + "loss": 1.8244, + "step": 17746000 + }, + { + "epoch": 87.92, + "learning_rate": 6.054569640760394e-06, + "loss": 1.8062, + "step": 17746500 + }, + { + "epoch": 87.92, + "learning_rate": 6.05333105433431e-06, + "loss": 1.8275, + "step": 17747000 + }, + { + "epoch": 87.93, + "learning_rate": 6.052092467908226e-06, + "loss": 1.813, + "step": 17747500 + }, + { + "epoch": 87.93, + "learning_rate": 6.050853881482142e-06, + "loss": 1.8204, + "step": 17748000 + }, + { + "epoch": 87.93, + "learning_rate": 6.049615295056059e-06, + "loss": 1.8407, + "step": 17748500 + }, + { + "epoch": 87.93, + "learning_rate": 6.048376708629975e-06, + "loss": 1.7993, + "step": 17749000 + }, + { + "epoch": 87.94, + "learning_rate": 6.047138122203892e-06, + "loss": 1.8447, + "step": 17749500 + }, + { + "epoch": 87.94, + "learning_rate": 6.045899535777807e-06, + "loss": 1.849, + "step": 17750000 + }, + { + "epoch": 87.94, + "learning_rate": 6.044660949351724e-06, + "loss": 1.8127, + "step": 17750500 + }, + { + "epoch": 87.94, + "learning_rate": 6.04342236292564e-06, + "loss": 1.8166, + "step": 17751000 + }, + { + "epoch": 87.95, + "learning_rate": 6.042186253672409e-06, + "loss": 1.8481, + "step": 17751500 + }, + { + "epoch": 87.95, + "learning_rate": 6.040947667246325e-06, + "loss": 1.8464, + "step": 17752000 + }, + { + "epoch": 87.95, + "learning_rate": 6.039709080820242e-06, + "loss": 1.8202, + "step": 17752500 + }, + { + "epoch": 87.95, + "learning_rate": 6.038470494394158e-06, + "loss": 1.826, + "step": 17753000 + }, + { + "epoch": 87.96, + "learning_rate": 6.037231907968074e-06, + "loss": 1.8203, + "step": 17753500 + }, + { + "epoch": 87.96, + "learning_rate": 6.035993321541991e-06, + "loss": 1.8098, + "step": 17754000 + }, + { + "epoch": 87.96, + "learning_rate": 6.034754735115907e-06, + "loss": 1.8252, + "step": 17754500 + }, + { + "epoch": 87.96, + "learning_rate": 6.033516148689824e-06, + "loss": 1.8411, + "step": 17755000 + }, + { + "epoch": 87.97, + "learning_rate": 6.032280039436592e-06, + "loss": 1.8252, + "step": 17755500 + }, + { + "epoch": 87.97, + "learning_rate": 6.03104393018336e-06, + "loss": 1.831, + "step": 17756000 + }, + { + "epoch": 87.97, + "learning_rate": 6.029805343757277e-06, + "loss": 1.8074, + "step": 17756500 + }, + { + "epoch": 87.97, + "learning_rate": 6.028566757331194e-06, + "loss": 1.812, + "step": 17757000 + }, + { + "epoch": 87.98, + "learning_rate": 6.02732817090511e-06, + "loss": 1.8283, + "step": 17757500 + }, + { + "epoch": 87.98, + "learning_rate": 6.026092061651878e-06, + "loss": 1.8396, + "step": 17758000 + }, + { + "epoch": 87.98, + "learning_rate": 6.024853475225795e-06, + "loss": 1.8174, + "step": 17758500 + }, + { + "epoch": 87.98, + "learning_rate": 6.023614888799711e-06, + "loss": 1.8255, + "step": 17759000 + }, + { + "epoch": 87.99, + "learning_rate": 6.022376302373627e-06, + "loss": 1.8351, + "step": 17759500 + }, + { + "epoch": 87.99, + "learning_rate": 6.021137715947544e-06, + "loss": 1.8403, + "step": 17760000 + }, + { + "epoch": 87.99, + "learning_rate": 6.01989912952146e-06, + "loss": 1.8434, + "step": 17760500 + }, + { + "epoch": 87.99, + "learning_rate": 6.018660543095377e-06, + "loss": 1.8383, + "step": 17761000 + }, + { + "epoch": 88.0, + "learning_rate": 6.017421956669292e-06, + "loss": 1.8302, + "step": 17761500 + }, + { + "epoch": 88.0, + "learning_rate": 6.016183370243209e-06, + "loss": 1.7973, + "step": 17762000 + }, + { + "epoch": 88.0, + "eval_accuracy": 0.6831497678917225, + "eval_accuracy_mlm": 0.6442477508880258, + "eval_accuracy_nsp": 0.8669982232437372, + "eval_loss": 2.3037989139556885, + "eval_runtime": 146.7619, + "eval_samples_per_second": 1737.229, + "eval_steps_per_second": 72.389, + "step": 17762184 + }, + { + "epoch": 88.0, + "learning_rate": 6.014947260989977e-06, + "loss": 1.819, + "step": 17762500 + }, + { + "epoch": 88.0, + "learning_rate": 6.013708674563894e-06, + "loss": 1.8279, + "step": 17763000 + }, + { + "epoch": 88.01, + "learning_rate": 6.01247008813781e-06, + "loss": 1.83, + "step": 17763500 + }, + { + "epoch": 88.01, + "learning_rate": 6.011231501711727e-06, + "loss": 1.7914, + "step": 17764000 + }, + { + "epoch": 88.01, + "learning_rate": 6.009995392458495e-06, + "loss": 1.8097, + "step": 17764500 + }, + { + "epoch": 88.01, + "learning_rate": 6.008756806032412e-06, + "loss": 1.8406, + "step": 17765000 + }, + { + "epoch": 88.02, + "learning_rate": 6.007518219606328e-06, + "loss": 1.8153, + "step": 17765500 + }, + { + "epoch": 88.02, + "learning_rate": 6.006279633180244e-06, + "loss": 1.8438, + "step": 17766000 + }, + { + "epoch": 88.02, + "learning_rate": 6.00504104675416e-06, + "loss": 1.7921, + "step": 17766500 + }, + { + "epoch": 88.02, + "learning_rate": 6.003802460328077e-06, + "loss": 1.8244, + "step": 17767000 + }, + { + "epoch": 88.03, + "learning_rate": 6.002563873901994e-06, + "loss": 1.8271, + "step": 17767500 + }, + { + "epoch": 88.03, + "learning_rate": 6.00132528747591e-06, + "loss": 1.8028, + "step": 17768000 + }, + { + "epoch": 88.03, + "learning_rate": 6.000089178222678e-06, + "loss": 1.8089, + "step": 17768500 + }, + { + "epoch": 88.03, + "learning_rate": 5.998850591796595e-06, + "loss": 1.8148, + "step": 17769000 + }, + { + "epoch": 88.04, + "learning_rate": 5.997614482543363e-06, + "loss": 1.7916, + "step": 17769500 + }, + { + "epoch": 88.04, + "learning_rate": 5.99637589611728e-06, + "loss": 1.8056, + "step": 17770000 + }, + { + "epoch": 88.04, + "learning_rate": 5.995137309691196e-06, + "loss": 1.8231, + "step": 17770500 + }, + { + "epoch": 88.04, + "learning_rate": 5.993898723265112e-06, + "loss": 1.8121, + "step": 17771000 + }, + { + "epoch": 88.05, + "learning_rate": 5.992660136839029e-06, + "loss": 1.8225, + "step": 17771500 + }, + { + "epoch": 88.05, + "learning_rate": 5.991421550412945e-06, + "loss": 1.8176, + "step": 17772000 + }, + { + "epoch": 88.05, + "learning_rate": 5.990182963986862e-06, + "loss": 1.8288, + "step": 17772500 + }, + { + "epoch": 88.05, + "learning_rate": 5.988944377560777e-06, + "loss": 1.8452, + "step": 17773000 + }, + { + "epoch": 88.06, + "learning_rate": 5.987708268307547e-06, + "loss": 1.7982, + "step": 17773500 + }, + { + "epoch": 88.06, + "learning_rate": 5.986469681881462e-06, + "loss": 1.8291, + "step": 17774000 + }, + { + "epoch": 88.06, + "learning_rate": 5.985231095455379e-06, + "loss": 1.8449, + "step": 17774500 + }, + { + "epoch": 88.06, + "learning_rate": 5.983992509029295e-06, + "loss": 1.8175, + "step": 17775000 + }, + { + "epoch": 88.07, + "learning_rate": 5.982753922603212e-06, + "loss": 1.8107, + "step": 17775500 + }, + { + "epoch": 88.07, + "learning_rate": 5.98151781334998e-06, + "loss": 1.833, + "step": 17776000 + }, + { + "epoch": 88.07, + "learning_rate": 5.980279226923897e-06, + "loss": 1.8165, + "step": 17776500 + }, + { + "epoch": 88.07, + "learning_rate": 5.979040640497813e-06, + "loss": 1.8254, + "step": 17777000 + }, + { + "epoch": 88.08, + "learning_rate": 5.977802054071729e-06, + "loss": 1.8204, + "step": 17777500 + }, + { + "epoch": 88.08, + "learning_rate": 5.976563467645645e-06, + "loss": 1.8242, + "step": 17778000 + }, + { + "epoch": 88.08, + "learning_rate": 5.975324881219562e-06, + "loss": 1.824, + "step": 17778500 + }, + { + "epoch": 88.08, + "learning_rate": 5.974086294793478e-06, + "loss": 1.8159, + "step": 17779000 + }, + { + "epoch": 88.09, + "learning_rate": 5.972847708367395e-06, + "loss": 1.815, + "step": 17779500 + }, + { + "epoch": 88.09, + "learning_rate": 5.971611599114163e-06, + "loss": 1.8557, + "step": 17780000 + }, + { + "epoch": 88.09, + "learning_rate": 5.970375489860932e-06, + "loss": 1.8091, + "step": 17780500 + }, + { + "epoch": 88.09, + "learning_rate": 5.969136903434848e-06, + "loss": 1.8489, + "step": 17781000 + }, + { + "epoch": 88.1, + "learning_rate": 5.967898317008765e-06, + "loss": 1.8235, + "step": 17781500 + }, + { + "epoch": 88.1, + "learning_rate": 5.966662207755533e-06, + "loss": 1.8242, + "step": 17782000 + }, + { + "epoch": 88.1, + "learning_rate": 5.96542362132945e-06, + "loss": 1.8297, + "step": 17782500 + }, + { + "epoch": 88.1, + "learning_rate": 5.964185034903366e-06, + "loss": 1.8157, + "step": 17783000 + }, + { + "epoch": 88.11, + "learning_rate": 5.962946448477282e-06, + "loss": 1.8174, + "step": 17783500 + }, + { + "epoch": 88.11, + "learning_rate": 5.961707862051198e-06, + "loss": 1.8315, + "step": 17784000 + }, + { + "epoch": 88.11, + "learning_rate": 5.960469275625115e-06, + "loss": 1.8312, + "step": 17784500 + }, + { + "epoch": 88.11, + "learning_rate": 5.959230689199032e-06, + "loss": 1.7971, + "step": 17785000 + }, + { + "epoch": 88.12, + "learning_rate": 5.957992102772947e-06, + "loss": 1.8005, + "step": 17785500 + }, + { + "epoch": 88.12, + "learning_rate": 5.956753516346864e-06, + "loss": 1.8231, + "step": 17786000 + }, + { + "epoch": 88.12, + "learning_rate": 5.955517407093633e-06, + "loss": 1.8163, + "step": 17786500 + }, + { + "epoch": 88.12, + "learning_rate": 5.954278820667549e-06, + "loss": 1.8256, + "step": 17787000 + }, + { + "epoch": 88.13, + "learning_rate": 5.953040234241465e-06, + "loss": 1.8067, + "step": 17787500 + }, + { + "epoch": 88.13, + "learning_rate": 5.951801647815382e-06, + "loss": 1.8117, + "step": 17788000 + }, + { + "epoch": 88.13, + "learning_rate": 5.950563061389298e-06, + "loss": 1.8212, + "step": 17788500 + }, + { + "epoch": 88.13, + "learning_rate": 5.949324474963214e-06, + "loss": 1.8103, + "step": 17789000 + }, + { + "epoch": 88.14, + "learning_rate": 5.94808588853713e-06, + "loss": 1.8315, + "step": 17789500 + }, + { + "epoch": 88.14, + "learning_rate": 5.946847302111047e-06, + "loss": 1.8182, + "step": 17790000 + }, + { + "epoch": 88.14, + "learning_rate": 5.945611192857815e-06, + "loss": 1.7896, + "step": 17790500 + }, + { + "epoch": 88.14, + "learning_rate": 5.944372606431732e-06, + "loss": 1.8124, + "step": 17791000 + }, + { + "epoch": 88.15, + "learning_rate": 5.943134020005648e-06, + "loss": 1.8049, + "step": 17791500 + }, + { + "epoch": 88.15, + "learning_rate": 5.94190038792527e-06, + "loss": 1.8213, + "step": 17792000 + }, + { + "epoch": 88.15, + "learning_rate": 5.940661801499185e-06, + "loss": 1.8407, + "step": 17792500 + }, + { + "epoch": 88.15, + "learning_rate": 5.939423215073102e-06, + "loss": 1.8289, + "step": 17793000 + }, + { + "epoch": 88.16, + "learning_rate": 5.938184628647018e-06, + "loss": 1.8159, + "step": 17793500 + }, + { + "epoch": 88.16, + "learning_rate": 5.936946042220935e-06, + "loss": 1.8359, + "step": 17794000 + }, + { + "epoch": 88.16, + "learning_rate": 5.935707455794851e-06, + "loss": 1.8434, + "step": 17794500 + }, + { + "epoch": 88.16, + "learning_rate": 5.934468869368767e-06, + "loss": 1.8457, + "step": 17795000 + }, + { + "epoch": 88.17, + "learning_rate": 5.933230282942683e-06, + "loss": 1.8386, + "step": 17795500 + }, + { + "epoch": 88.17, + "learning_rate": 5.931994173689452e-06, + "loss": 1.844, + "step": 17796000 + }, + { + "epoch": 88.17, + "learning_rate": 5.930755587263368e-06, + "loss": 1.82, + "step": 17796500 + }, + { + "epoch": 88.17, + "learning_rate": 5.929517000837285e-06, + "loss": 1.8362, + "step": 17797000 + }, + { + "epoch": 88.17, + "learning_rate": 5.928278414411201e-06, + "loss": 1.8273, + "step": 17797500 + }, + { + "epoch": 88.18, + "learning_rate": 5.927039827985118e-06, + "loss": 1.8381, + "step": 17798000 + }, + { + "epoch": 88.18, + "learning_rate": 5.925801241559033e-06, + "loss": 1.8246, + "step": 17798500 + }, + { + "epoch": 88.18, + "learning_rate": 5.92456265513295e-06, + "loss": 1.822, + "step": 17799000 + }, + { + "epoch": 88.18, + "learning_rate": 5.923324068706867e-06, + "loss": 1.8181, + "step": 17799500 + }, + { + "epoch": 88.19, + "learning_rate": 5.922085482280783e-06, + "loss": 1.817, + "step": 17800000 + }, + { + "epoch": 88.19, + "learning_rate": 5.920846895854699e-06, + "loss": 1.8162, + "step": 17800500 + }, + { + "epoch": 88.19, + "learning_rate": 5.919608309428615e-06, + "loss": 1.8215, + "step": 17801000 + }, + { + "epoch": 88.19, + "learning_rate": 5.918369723002532e-06, + "loss": 1.8301, + "step": 17801500 + }, + { + "epoch": 88.2, + "learning_rate": 5.917131136576448e-06, + "loss": 1.825, + "step": 17802000 + }, + { + "epoch": 88.2, + "learning_rate": 5.915895027323217e-06, + "loss": 1.8223, + "step": 17802500 + }, + { + "epoch": 88.2, + "learning_rate": 5.914658918069985e-06, + "loss": 1.7827, + "step": 17803000 + }, + { + "epoch": 88.2, + "learning_rate": 5.913422808816754e-06, + "loss": 1.8293, + "step": 17803500 + }, + { + "epoch": 88.21, + "learning_rate": 5.91218422239067e-06, + "loss": 1.8175, + "step": 17804000 + }, + { + "epoch": 88.21, + "learning_rate": 5.910945635964587e-06, + "loss": 1.8173, + "step": 17804500 + }, + { + "epoch": 88.21, + "learning_rate": 5.909707049538503e-06, + "loss": 1.8331, + "step": 17805000 + }, + { + "epoch": 88.21, + "learning_rate": 5.90846846311242e-06, + "loss": 1.81, + "step": 17805500 + }, + { + "epoch": 88.22, + "learning_rate": 5.907229876686336e-06, + "loss": 1.7951, + "step": 17806000 + }, + { + "epoch": 88.22, + "learning_rate": 5.905991290260252e-06, + "loss": 1.8434, + "step": 17806500 + }, + { + "epoch": 88.22, + "learning_rate": 5.904752703834168e-06, + "loss": 1.8404, + "step": 17807000 + }, + { + "epoch": 88.22, + "learning_rate": 5.903514117408085e-06, + "loss": 1.7937, + "step": 17807500 + }, + { + "epoch": 88.23, + "learning_rate": 5.902280485327706e-06, + "loss": 1.8347, + "step": 17808000 + }, + { + "epoch": 88.23, + "learning_rate": 5.901041898901622e-06, + "loss": 1.8047, + "step": 17808500 + }, + { + "epoch": 88.23, + "learning_rate": 5.899803312475538e-06, + "loss": 1.8135, + "step": 17809000 + }, + { + "epoch": 88.23, + "learning_rate": 5.898564726049455e-06, + "loss": 1.8122, + "step": 17809500 + }, + { + "epoch": 88.24, + "learning_rate": 5.897328616796223e-06, + "loss": 1.8262, + "step": 17810000 + }, + { + "epoch": 88.24, + "learning_rate": 5.89609003037014e-06, + "loss": 1.8222, + "step": 17810500 + }, + { + "epoch": 88.24, + "learning_rate": 5.894851443944056e-06, + "loss": 1.8188, + "step": 17811000 + }, + { + "epoch": 88.24, + "learning_rate": 5.8936153346908245e-06, + "loss": 1.8085, + "step": 17811500 + }, + { + "epoch": 88.25, + "learning_rate": 5.892376748264741e-06, + "loss": 1.8282, + "step": 17812000 + }, + { + "epoch": 88.25, + "learning_rate": 5.8911381618386575e-06, + "loss": 1.8042, + "step": 17812500 + }, + { + "epoch": 88.25, + "learning_rate": 5.889899575412574e-06, + "loss": 1.8318, + "step": 17813000 + }, + { + "epoch": 88.25, + "learning_rate": 5.88866098898649e-06, + "loss": 1.8204, + "step": 17813500 + }, + { + "epoch": 88.26, + "learning_rate": 5.887422402560406e-06, + "loss": 1.8288, + "step": 17814000 + }, + { + "epoch": 88.26, + "learning_rate": 5.886183816134323e-06, + "loss": 1.8281, + "step": 17814500 + }, + { + "epoch": 88.26, + "learning_rate": 5.884945229708239e-06, + "loss": 1.8125, + "step": 17815000 + }, + { + "epoch": 88.26, + "learning_rate": 5.883706643282155e-06, + "loss": 1.8196, + "step": 17815500 + }, + { + "epoch": 88.27, + "learning_rate": 5.882468056856071e-06, + "loss": 1.8227, + "step": 17816000 + }, + { + "epoch": 88.27, + "learning_rate": 5.88123194760284e-06, + "loss": 1.8512, + "step": 17816500 + }, + { + "epoch": 88.27, + "learning_rate": 5.879993361176756e-06, + "loss": 1.8232, + "step": 17817000 + }, + { + "epoch": 88.27, + "learning_rate": 5.878754774750673e-06, + "loss": 1.8136, + "step": 17817500 + }, + { + "epoch": 88.28, + "learning_rate": 5.87751618832459e-06, + "loss": 1.8283, + "step": 17818000 + }, + { + "epoch": 88.28, + "learning_rate": 5.876277601898506e-06, + "loss": 1.8526, + "step": 17818500 + }, + { + "epoch": 88.28, + "learning_rate": 5.875041492645274e-06, + "loss": 1.8269, + "step": 17819000 + }, + { + "epoch": 88.28, + "learning_rate": 5.873802906219191e-06, + "loss": 1.831, + "step": 17819500 + }, + { + "epoch": 88.29, + "learning_rate": 5.872564319793107e-06, + "loss": 1.8123, + "step": 17820000 + }, + { + "epoch": 88.29, + "learning_rate": 5.871325733367023e-06, + "loss": 1.8251, + "step": 17820500 + }, + { + "epoch": 88.29, + "learning_rate": 5.87008714694094e-06, + "loss": 1.8031, + "step": 17821000 + }, + { + "epoch": 88.29, + "learning_rate": 5.868851037687708e-06, + "loss": 1.8029, + "step": 17821500 + }, + { + "epoch": 88.3, + "learning_rate": 5.867612451261625e-06, + "loss": 1.8264, + "step": 17822000 + }, + { + "epoch": 88.3, + "learning_rate": 5.866373864835541e-06, + "loss": 1.8131, + "step": 17822500 + }, + { + "epoch": 88.3, + "learning_rate": 5.865135278409458e-06, + "loss": 1.8447, + "step": 17823000 + }, + { + "epoch": 88.3, + "learning_rate": 5.863899169156226e-06, + "loss": 1.8269, + "step": 17823500 + }, + { + "epoch": 88.31, + "learning_rate": 5.8626605827301426e-06, + "loss": 1.8116, + "step": 17824000 + }, + { + "epoch": 88.31, + "learning_rate": 5.861421996304059e-06, + "loss": 1.8218, + "step": 17824500 + }, + { + "epoch": 88.31, + "learning_rate": 5.860183409877975e-06, + "loss": 1.8166, + "step": 17825000 + }, + { + "epoch": 88.31, + "learning_rate": 5.858944823451891e-06, + "loss": 1.8215, + "step": 17825500 + }, + { + "epoch": 88.32, + "learning_rate": 5.857706237025808e-06, + "loss": 1.8076, + "step": 17826000 + }, + { + "epoch": 88.32, + "learning_rate": 5.856467650599724e-06, + "loss": 1.8271, + "step": 17826500 + }, + { + "epoch": 88.32, + "learning_rate": 5.85522906417364e-06, + "loss": 1.8115, + "step": 17827000 + }, + { + "epoch": 88.32, + "learning_rate": 5.853990477747556e-06, + "loss": 1.8621, + "step": 17827500 + }, + { + "epoch": 88.33, + "learning_rate": 5.8527568456671775e-06, + "loss": 1.8303, + "step": 17828000 + }, + { + "epoch": 88.33, + "learning_rate": 5.851518259241094e-06, + "loss": 1.8369, + "step": 17828500 + }, + { + "epoch": 88.33, + "learning_rate": 5.8502796728150105e-06, + "loss": 1.8031, + "step": 17829000 + }, + { + "epoch": 88.33, + "learning_rate": 5.849041086388926e-06, + "loss": 1.8043, + "step": 17829500 + }, + { + "epoch": 88.34, + "learning_rate": 5.847802499962843e-06, + "loss": 1.8431, + "step": 17830000 + }, + { + "epoch": 88.34, + "learning_rate": 5.846563913536759e-06, + "loss": 1.825, + "step": 17830500 + }, + { + "epoch": 88.34, + "learning_rate": 5.845327804283528e-06, + "loss": 1.8071, + "step": 17831000 + }, + { + "epoch": 88.34, + "learning_rate": 5.844091695030296e-06, + "loss": 1.8157, + "step": 17831500 + }, + { + "epoch": 88.35, + "learning_rate": 5.8428531086042125e-06, + "loss": 1.8076, + "step": 17832000 + }, + { + "epoch": 88.35, + "learning_rate": 5.8416145221781285e-06, + "loss": 1.8364, + "step": 17832500 + }, + { + "epoch": 88.35, + "learning_rate": 5.8403759357520455e-06, + "loss": 1.8332, + "step": 17833000 + }, + { + "epoch": 88.35, + "learning_rate": 5.8391373493259616e-06, + "loss": 1.8171, + "step": 17833500 + }, + { + "epoch": 88.36, + "learning_rate": 5.837898762899878e-06, + "loss": 1.8198, + "step": 17834000 + }, + { + "epoch": 88.36, + "learning_rate": 5.836660176473794e-06, + "loss": 1.8362, + "step": 17834500 + }, + { + "epoch": 88.36, + "learning_rate": 5.8354240672205625e-06, + "loss": 1.8211, + "step": 17835000 + }, + { + "epoch": 88.36, + "learning_rate": 5.834185480794479e-06, + "loss": 1.8088, + "step": 17835500 + }, + { + "epoch": 88.37, + "learning_rate": 5.8329468943683955e-06, + "loss": 1.8061, + "step": 17836000 + }, + { + "epoch": 88.37, + "learning_rate": 5.831708307942312e-06, + "loss": 1.8115, + "step": 17836500 + }, + { + "epoch": 88.37, + "learning_rate": 5.8304697215162285e-06, + "loss": 1.8296, + "step": 17837000 + }, + { + "epoch": 88.37, + "learning_rate": 5.829231135090145e-06, + "loss": 1.8087, + "step": 17837500 + }, + { + "epoch": 88.38, + "learning_rate": 5.827992548664061e-06, + "loss": 1.8166, + "step": 17838000 + }, + { + "epoch": 88.38, + "learning_rate": 5.826753962237978e-06, + "loss": 1.8368, + "step": 17838500 + }, + { + "epoch": 88.38, + "learning_rate": 5.825517852984746e-06, + "loss": 1.8096, + "step": 17839000 + }, + { + "epoch": 88.38, + "learning_rate": 5.8242792665586625e-06, + "loss": 1.8164, + "step": 17839500 + }, + { + "epoch": 88.39, + "learning_rate": 5.823040680132579e-06, + "loss": 1.814, + "step": 17840000 + }, + { + "epoch": 88.39, + "learning_rate": 5.8218020937064955e-06, + "loss": 1.7898, + "step": 17840500 + }, + { + "epoch": 88.39, + "learning_rate": 5.820563507280411e-06, + "loss": 1.8119, + "step": 17841000 + }, + { + "epoch": 88.39, + "learning_rate": 5.819324920854328e-06, + "loss": 1.8127, + "step": 17841500 + }, + { + "epoch": 88.4, + "learning_rate": 5.818086334428244e-06, + "loss": 1.8111, + "step": 17842000 + }, + { + "epoch": 88.4, + "learning_rate": 5.816847748002161e-06, + "loss": 1.8091, + "step": 17842500 + }, + { + "epoch": 88.4, + "learning_rate": 5.815609161576077e-06, + "loss": 1.8114, + "step": 17843000 + }, + { + "epoch": 88.4, + "learning_rate": 5.814370575149993e-06, + "loss": 1.8277, + "step": 17843500 + }, + { + "epoch": 88.41, + "learning_rate": 5.813131988723909e-06, + "loss": 1.8366, + "step": 17844000 + }, + { + "epoch": 88.41, + "learning_rate": 5.811893402297826e-06, + "loss": 1.8297, + "step": 17844500 + }, + { + "epoch": 88.41, + "learning_rate": 5.810654815871743e-06, + "loss": 1.8186, + "step": 17845000 + }, + { + "epoch": 88.41, + "learning_rate": 5.809418706618511e-06, + "loss": 1.8225, + "step": 17845500 + }, + { + "epoch": 88.42, + "learning_rate": 5.808180120192428e-06, + "loss": 1.7926, + "step": 17846000 + }, + { + "epoch": 88.42, + "learning_rate": 5.806941533766344e-06, + "loss": 1.8039, + "step": 17846500 + }, + { + "epoch": 88.42, + "learning_rate": 5.80570294734026e-06, + "loss": 1.8215, + "step": 17847000 + }, + { + "epoch": 88.42, + "learning_rate": 5.804464360914176e-06, + "loss": 1.8165, + "step": 17847500 + }, + { + "epoch": 88.43, + "learning_rate": 5.803225774488093e-06, + "loss": 1.824, + "step": 17848000 + }, + { + "epoch": 88.43, + "learning_rate": 5.801987188062009e-06, + "loss": 1.8181, + "step": 17848500 + }, + { + "epoch": 88.43, + "learning_rate": 5.800751078808778e-06, + "loss": 1.8245, + "step": 17849000 + }, + { + "epoch": 88.43, + "learning_rate": 5.799512492382694e-06, + "loss": 1.8249, + "step": 17849500 + }, + { + "epoch": 88.44, + "learning_rate": 5.79827390595661e-06, + "loss": 1.804, + "step": 17850000 + }, + { + "epoch": 88.44, + "learning_rate": 5.797035319530526e-06, + "loss": 1.8039, + "step": 17850500 + }, + { + "epoch": 88.44, + "learning_rate": 5.795796733104443e-06, + "loss": 1.7986, + "step": 17851000 + }, + { + "epoch": 88.44, + "learning_rate": 5.794558146678359e-06, + "loss": 1.8313, + "step": 17851500 + }, + { + "epoch": 88.44, + "learning_rate": 5.793319560252276e-06, + "loss": 1.8445, + "step": 17852000 + }, + { + "epoch": 88.45, + "learning_rate": 5.792080973826192e-06, + "loss": 1.8059, + "step": 17852500 + }, + { + "epoch": 88.45, + "learning_rate": 5.790842387400108e-06, + "loss": 1.8289, + "step": 17853000 + }, + { + "epoch": 88.45, + "learning_rate": 5.789608755319729e-06, + "loss": 1.8163, + "step": 17853500 + }, + { + "epoch": 88.45, + "learning_rate": 5.788370168893646e-06, + "loss": 1.827, + "step": 17854000 + }, + { + "epoch": 88.46, + "learning_rate": 5.787131582467562e-06, + "loss": 1.8224, + "step": 17854500 + }, + { + "epoch": 88.46, + "learning_rate": 5.785892996041478e-06, + "loss": 1.7925, + "step": 17855000 + }, + { + "epoch": 88.46, + "learning_rate": 5.784654409615394e-06, + "loss": 1.8211, + "step": 17855500 + }, + { + "epoch": 88.46, + "learning_rate": 5.783415823189311e-06, + "loss": 1.8091, + "step": 17856000 + }, + { + "epoch": 88.47, + "learning_rate": 5.782177236763227e-06, + "loss": 1.8014, + "step": 17856500 + }, + { + "epoch": 88.47, + "learning_rate": 5.780943604682848e-06, + "loss": 1.8256, + "step": 17857000 + }, + { + "epoch": 88.47, + "learning_rate": 5.779705018256764e-06, + "loss": 1.8078, + "step": 17857500 + }, + { + "epoch": 88.47, + "learning_rate": 5.778466431830681e-06, + "loss": 1.8117, + "step": 17858000 + }, + { + "epoch": 88.48, + "learning_rate": 5.777227845404597e-06, + "loss": 1.8204, + "step": 17858500 + }, + { + "epoch": 88.48, + "learning_rate": 5.775989258978514e-06, + "loss": 1.8114, + "step": 17859000 + }, + { + "epoch": 88.48, + "learning_rate": 5.774750672552429e-06, + "loss": 1.8001, + "step": 17859500 + }, + { + "epoch": 88.48, + "learning_rate": 5.773512086126346e-06, + "loss": 1.8193, + "step": 17860000 + }, + { + "epoch": 88.49, + "learning_rate": 5.772275976873115e-06, + "loss": 1.8242, + "step": 17860500 + }, + { + "epoch": 88.49, + "learning_rate": 5.771037390447031e-06, + "loss": 1.825, + "step": 17861000 + }, + { + "epoch": 88.49, + "learning_rate": 5.769798804020947e-06, + "loss": 1.8296, + "step": 17861500 + }, + { + "epoch": 88.49, + "learning_rate": 5.768560217594864e-06, + "loss": 1.8315, + "step": 17862000 + }, + { + "epoch": 88.5, + "learning_rate": 5.767321631168781e-06, + "loss": 1.8139, + "step": 17862500 + }, + { + "epoch": 88.5, + "learning_rate": 5.766083044742696e-06, + "loss": 1.8038, + "step": 17863000 + }, + { + "epoch": 88.5, + "learning_rate": 5.764844458316613e-06, + "loss": 1.8179, + "step": 17863500 + }, + { + "epoch": 88.5, + "learning_rate": 5.763605871890529e-06, + "loss": 1.8144, + "step": 17864000 + }, + { + "epoch": 88.51, + "learning_rate": 5.762367285464446e-06, + "loss": 1.8318, + "step": 17864500 + }, + { + "epoch": 88.51, + "learning_rate": 5.761128699038362e-06, + "loss": 1.8166, + "step": 17865000 + }, + { + "epoch": 88.51, + "learning_rate": 5.759890112612278e-06, + "loss": 1.8224, + "step": 17865500 + }, + { + "epoch": 88.51, + "learning_rate": 5.758651526186194e-06, + "loss": 1.8251, + "step": 17866000 + }, + { + "epoch": 88.52, + "learning_rate": 5.757412939760111e-06, + "loss": 1.8245, + "step": 17866500 + }, + { + "epoch": 88.52, + "learning_rate": 5.756176830506879e-06, + "loss": 1.8155, + "step": 17867000 + }, + { + "epoch": 88.52, + "learning_rate": 5.754938244080796e-06, + "loss": 1.8397, + "step": 17867500 + }, + { + "epoch": 88.52, + "learning_rate": 5.753699657654712e-06, + "loss": 1.8287, + "step": 17868000 + }, + { + "epoch": 88.53, + "learning_rate": 5.752461071228629e-06, + "loss": 1.839, + "step": 17868500 + }, + { + "epoch": 88.53, + "learning_rate": 5.751224961975397e-06, + "loss": 1.8207, + "step": 17869000 + }, + { + "epoch": 88.53, + "learning_rate": 5.749986375549314e-06, + "loss": 1.833, + "step": 17869500 + }, + { + "epoch": 88.53, + "learning_rate": 5.748747789123229e-06, + "loss": 1.839, + "step": 17870000 + }, + { + "epoch": 88.54, + "learning_rate": 5.747509202697146e-06, + "loss": 1.8103, + "step": 17870500 + }, + { + "epoch": 88.54, + "learning_rate": 5.746270616271063e-06, + "loss": 1.8256, + "step": 17871000 + }, + { + "epoch": 88.54, + "learning_rate": 5.745032029844979e-06, + "loss": 1.8136, + "step": 17871500 + }, + { + "epoch": 88.54, + "learning_rate": 5.743793443418895e-06, + "loss": 1.8286, + "step": 17872000 + }, + { + "epoch": 88.55, + "learning_rate": 5.742557334165664e-06, + "loss": 1.8092, + "step": 17872500 + }, + { + "epoch": 88.55, + "learning_rate": 5.74131874773958e-06, + "loss": 1.8214, + "step": 17873000 + }, + { + "epoch": 88.55, + "learning_rate": 5.740080161313496e-06, + "loss": 1.8182, + "step": 17873500 + }, + { + "epoch": 88.55, + "learning_rate": 5.738841574887413e-06, + "loss": 1.8191, + "step": 17874000 + }, + { + "epoch": 88.56, + "learning_rate": 5.737602988461329e-06, + "loss": 1.8317, + "step": 17874500 + }, + { + "epoch": 88.56, + "learning_rate": 5.736364402035246e-06, + "loss": 1.8317, + "step": 17875000 + }, + { + "epoch": 88.56, + "learning_rate": 5.735125815609162e-06, + "loss": 1.8085, + "step": 17875500 + }, + { + "epoch": 88.56, + "learning_rate": 5.733889706355931e-06, + "loss": 1.8171, + "step": 17876000 + }, + { + "epoch": 88.57, + "learning_rate": 5.732651119929847e-06, + "loss": 1.7877, + "step": 17876500 + }, + { + "epoch": 88.57, + "learning_rate": 5.731412533503763e-06, + "loss": 1.8217, + "step": 17877000 + }, + { + "epoch": 88.57, + "learning_rate": 5.730173947077679e-06, + "loss": 1.8408, + "step": 17877500 + }, + { + "epoch": 88.57, + "learning_rate": 5.728935360651596e-06, + "loss": 1.819, + "step": 17878000 + }, + { + "epoch": 88.58, + "learning_rate": 5.727696774225512e-06, + "loss": 1.8102, + "step": 17878500 + }, + { + "epoch": 88.58, + "learning_rate": 5.726458187799428e-06, + "loss": 1.8194, + "step": 17879000 + }, + { + "epoch": 88.58, + "learning_rate": 5.725219601373344e-06, + "loss": 1.8199, + "step": 17879500 + }, + { + "epoch": 88.58, + "learning_rate": 5.723981014947261e-06, + "loss": 1.8073, + "step": 17880000 + }, + { + "epoch": 88.59, + "learning_rate": 5.722744905694029e-06, + "loss": 1.8607, + "step": 17880500 + }, + { + "epoch": 88.59, + "learning_rate": 5.721506319267946e-06, + "loss": 1.8264, + "step": 17881000 + }, + { + "epoch": 88.59, + "learning_rate": 5.720267732841862e-06, + "loss": 1.7987, + "step": 17881500 + }, + { + "epoch": 88.59, + "learning_rate": 5.719031623588631e-06, + "loss": 1.817, + "step": 17882000 + }, + { + "epoch": 88.6, + "learning_rate": 5.717793037162547e-06, + "loss": 1.8076, + "step": 17882500 + }, + { + "epoch": 88.6, + "learning_rate": 5.716554450736464e-06, + "loss": 1.8356, + "step": 17883000 + }, + { + "epoch": 88.6, + "learning_rate": 5.71531586431038e-06, + "loss": 1.8227, + "step": 17883500 + }, + { + "epoch": 88.6, + "learning_rate": 5.714079755057149e-06, + "loss": 1.8357, + "step": 17884000 + }, + { + "epoch": 88.61, + "learning_rate": 5.712841168631065e-06, + "loss": 1.8252, + "step": 17884500 + }, + { + "epoch": 88.61, + "learning_rate": 5.711605059377834e-06, + "loss": 1.8286, + "step": 17885000 + }, + { + "epoch": 88.61, + "learning_rate": 5.71036647295175e-06, + "loss": 1.8138, + "step": 17885500 + }, + { + "epoch": 88.61, + "learning_rate": 5.709127886525666e-06, + "loss": 1.8132, + "step": 17886000 + }, + { + "epoch": 88.62, + "learning_rate": 5.707891777272435e-06, + "loss": 1.8202, + "step": 17886500 + }, + { + "epoch": 88.62, + "learning_rate": 5.706653190846351e-06, + "loss": 1.8172, + "step": 17887000 + }, + { + "epoch": 88.62, + "learning_rate": 5.705414604420267e-06, + "loss": 1.8352, + "step": 17887500 + }, + { + "epoch": 88.62, + "learning_rate": 5.704176017994184e-06, + "loss": 1.8222, + "step": 17888000 + }, + { + "epoch": 88.63, + "learning_rate": 5.702937431568101e-06, + "loss": 1.844, + "step": 17888500 + }, + { + "epoch": 88.63, + "learning_rate": 5.701698845142017e-06, + "loss": 1.7932, + "step": 17889000 + }, + { + "epoch": 88.63, + "learning_rate": 5.700462735888785e-06, + "loss": 1.8336, + "step": 17889500 + }, + { + "epoch": 88.63, + "learning_rate": 5.699224149462702e-06, + "loss": 1.8129, + "step": 17890000 + }, + { + "epoch": 88.64, + "learning_rate": 5.697985563036618e-06, + "loss": 1.8362, + "step": 17890500 + }, + { + "epoch": 88.64, + "learning_rate": 5.696746976610534e-06, + "loss": 1.8236, + "step": 17891000 + }, + { + "epoch": 88.64, + "learning_rate": 5.695508390184451e-06, + "loss": 1.8329, + "step": 17891500 + }, + { + "epoch": 88.64, + "learning_rate": 5.694272280931219e-06, + "loss": 1.8116, + "step": 17892000 + }, + { + "epoch": 88.65, + "learning_rate": 5.693033694505136e-06, + "loss": 1.8354, + "step": 17892500 + }, + { + "epoch": 88.65, + "learning_rate": 5.691795108079052e-06, + "loss": 1.8343, + "step": 17893000 + }, + { + "epoch": 88.65, + "learning_rate": 5.690556521652969e-06, + "loss": 1.8426, + "step": 17893500 + }, + { + "epoch": 88.65, + "learning_rate": 5.689317935226885e-06, + "loss": 1.8043, + "step": 17894000 + }, + { + "epoch": 88.66, + "learning_rate": 5.688081825973654e-06, + "loss": 1.8111, + "step": 17894500 + }, + { + "epoch": 88.66, + "learning_rate": 5.68684323954757e-06, + "loss": 1.8266, + "step": 17895000 + }, + { + "epoch": 88.66, + "learning_rate": 5.685604653121486e-06, + "loss": 1.8108, + "step": 17895500 + }, + { + "epoch": 88.66, + "learning_rate": 5.684366066695402e-06, + "loss": 1.8199, + "step": 17896000 + }, + { + "epoch": 88.67, + "learning_rate": 5.683127480269319e-06, + "loss": 1.8155, + "step": 17896500 + }, + { + "epoch": 88.67, + "learning_rate": 5.681888893843235e-06, + "loss": 1.8136, + "step": 17897000 + }, + { + "epoch": 88.67, + "learning_rate": 5.680650307417151e-06, + "loss": 1.8373, + "step": 17897500 + }, + { + "epoch": 88.67, + "learning_rate": 5.679411720991067e-06, + "loss": 1.8161, + "step": 17898000 + }, + { + "epoch": 88.68, + "learning_rate": 5.678173134564984e-06, + "loss": 1.8374, + "step": 17898500 + }, + { + "epoch": 88.68, + "learning_rate": 5.6769345481389e-06, + "loss": 1.8392, + "step": 17899000 + }, + { + "epoch": 88.68, + "learning_rate": 5.675695961712817e-06, + "loss": 1.83, + "step": 17899500 + }, + { + "epoch": 88.68, + "learning_rate": 5.674457375286733e-06, + "loss": 1.8161, + "step": 17900000 + }, + { + "epoch": 88.69, + "learning_rate": 5.673218788860649e-06, + "loss": 1.8243, + "step": 17900500 + }, + { + "epoch": 88.69, + "learning_rate": 5.671980202434566e-06, + "loss": 1.8176, + "step": 17901000 + }, + { + "epoch": 88.69, + "learning_rate": 5.670741616008482e-06, + "loss": 1.8306, + "step": 17901500 + }, + { + "epoch": 88.69, + "learning_rate": 5.669503029582399e-06, + "loss": 1.8316, + "step": 17902000 + }, + { + "epoch": 88.7, + "learning_rate": 5.668264443156314e-06, + "loss": 1.8155, + "step": 17902500 + }, + { + "epoch": 88.7, + "learning_rate": 5.667028333903084e-06, + "loss": 1.7986, + "step": 17903000 + }, + { + "epoch": 88.7, + "learning_rate": 5.665789747476999e-06, + "loss": 1.8014, + "step": 17903500 + }, + { + "epoch": 88.7, + "learning_rate": 5.664551161050916e-06, + "loss": 1.8361, + "step": 17904000 + }, + { + "epoch": 88.71, + "learning_rate": 5.663312574624832e-06, + "loss": 1.8205, + "step": 17904500 + }, + { + "epoch": 88.71, + "learning_rate": 5.662073988198749e-06, + "loss": 1.842, + "step": 17905000 + }, + { + "epoch": 88.71, + "learning_rate": 5.660837878945517e-06, + "loss": 1.8654, + "step": 17905500 + }, + { + "epoch": 88.71, + "learning_rate": 5.659599292519434e-06, + "loss": 1.8121, + "step": 17906000 + }, + { + "epoch": 88.71, + "learning_rate": 5.65836070609335e-06, + "loss": 1.8154, + "step": 17906500 + }, + { + "epoch": 88.72, + "learning_rate": 5.657122119667266e-06, + "loss": 1.8637, + "step": 17907000 + }, + { + "epoch": 88.72, + "learning_rate": 5.655883533241182e-06, + "loss": 1.8303, + "step": 17907500 + }, + { + "epoch": 88.72, + "learning_rate": 5.654647423987951e-06, + "loss": 1.8426, + "step": 17908000 + }, + { + "epoch": 88.72, + "learning_rate": 5.653408837561867e-06, + "loss": 1.8187, + "step": 17908500 + }, + { + "epoch": 88.73, + "learning_rate": 5.652170251135784e-06, + "loss": 1.8227, + "step": 17909000 + }, + { + "epoch": 88.73, + "learning_rate": 5.6509316647097e-06, + "loss": 1.8449, + "step": 17909500 + }, + { + "epoch": 88.73, + "learning_rate": 5.649693078283617e-06, + "loss": 1.8182, + "step": 17910000 + }, + { + "epoch": 88.73, + "learning_rate": 5.648454491857533e-06, + "loss": 1.8108, + "step": 17910500 + }, + { + "epoch": 88.74, + "learning_rate": 5.647220859777154e-06, + "loss": 1.8149, + "step": 17911000 + }, + { + "epoch": 88.74, + "learning_rate": 5.64598227335107e-06, + "loss": 1.8157, + "step": 17911500 + }, + { + "epoch": 88.74, + "learning_rate": 5.644743686924987e-06, + "loss": 1.8115, + "step": 17912000 + }, + { + "epoch": 88.74, + "learning_rate": 5.643505100498903e-06, + "loss": 1.8093, + "step": 17912500 + }, + { + "epoch": 88.75, + "learning_rate": 5.642266514072819e-06, + "loss": 1.8178, + "step": 17913000 + }, + { + "epoch": 88.75, + "learning_rate": 5.641030404819588e-06, + "loss": 1.8119, + "step": 17913500 + }, + { + "epoch": 88.75, + "learning_rate": 5.639791818393504e-06, + "loss": 1.8189, + "step": 17914000 + }, + { + "epoch": 88.75, + "learning_rate": 5.63855323196742e-06, + "loss": 1.8111, + "step": 17914500 + }, + { + "epoch": 88.76, + "learning_rate": 5.637314645541337e-06, + "loss": 1.8286, + "step": 17915000 + }, + { + "epoch": 88.76, + "learning_rate": 5.636076059115254e-06, + "loss": 1.8248, + "step": 17915500 + }, + { + "epoch": 88.76, + "learning_rate": 5.634837472689169e-06, + "loss": 1.8143, + "step": 17916000 + }, + { + "epoch": 88.76, + "learning_rate": 5.633598886263086e-06, + "loss": 1.8227, + "step": 17916500 + }, + { + "epoch": 88.77, + "learning_rate": 5.632360299837002e-06, + "loss": 1.8199, + "step": 17917000 + }, + { + "epoch": 88.77, + "learning_rate": 5.631124190583771e-06, + "loss": 1.8241, + "step": 17917500 + }, + { + "epoch": 88.77, + "learning_rate": 5.629885604157687e-06, + "loss": 1.8428, + "step": 17918000 + }, + { + "epoch": 88.77, + "learning_rate": 5.628647017731604e-06, + "loss": 1.8032, + "step": 17918500 + }, + { + "epoch": 88.78, + "learning_rate": 5.627410908478372e-06, + "loss": 1.8299, + "step": 17919000 + }, + { + "epoch": 88.78, + "learning_rate": 5.626172322052289e-06, + "loss": 1.8304, + "step": 17919500 + }, + { + "epoch": 88.78, + "learning_rate": 5.624933735626205e-06, + "loss": 1.8171, + "step": 17920000 + }, + { + "epoch": 88.78, + "learning_rate": 5.623695149200121e-06, + "loss": 1.8361, + "step": 17920500 + }, + { + "epoch": 88.79, + "learning_rate": 5.622456562774037e-06, + "loss": 1.8332, + "step": 17921000 + }, + { + "epoch": 88.79, + "learning_rate": 5.621220453520806e-06, + "loss": 1.8261, + "step": 17921500 + }, + { + "epoch": 88.79, + "learning_rate": 5.619981867094722e-06, + "loss": 1.8115, + "step": 17922000 + }, + { + "epoch": 88.79, + "learning_rate": 5.618743280668639e-06, + "loss": 1.8277, + "step": 17922500 + }, + { + "epoch": 88.8, + "learning_rate": 5.617504694242555e-06, + "loss": 1.84, + "step": 17923000 + }, + { + "epoch": 88.8, + "learning_rate": 5.616266107816472e-06, + "loss": 1.8164, + "step": 17923500 + }, + { + "epoch": 88.8, + "learning_rate": 5.615027521390388e-06, + "loss": 1.8224, + "step": 17924000 + }, + { + "epoch": 88.8, + "learning_rate": 5.613791412137157e-06, + "loss": 1.8319, + "step": 17924500 + }, + { + "epoch": 88.81, + "learning_rate": 5.612552825711073e-06, + "loss": 1.8068, + "step": 17925000 + }, + { + "epoch": 88.81, + "learning_rate": 5.611314239284989e-06, + "loss": 1.8166, + "step": 17925500 + }, + { + "epoch": 88.81, + "learning_rate": 5.610075652858905e-06, + "loss": 1.8256, + "step": 17926000 + }, + { + "epoch": 88.81, + "learning_rate": 5.608837066432822e-06, + "loss": 1.8059, + "step": 17926500 + }, + { + "epoch": 88.82, + "learning_rate": 5.60760095717959e-06, + "loss": 1.8321, + "step": 17927000 + }, + { + "epoch": 88.82, + "learning_rate": 5.606362370753507e-06, + "loss": 1.8091, + "step": 17927500 + }, + { + "epoch": 88.82, + "learning_rate": 5.605123784327423e-06, + "loss": 1.8091, + "step": 17928000 + }, + { + "epoch": 88.82, + "learning_rate": 5.603887675074192e-06, + "loss": 1.8411, + "step": 17928500 + }, + { + "epoch": 88.83, + "learning_rate": 5.602649088648108e-06, + "loss": 1.8005, + "step": 17929000 + }, + { + "epoch": 88.83, + "learning_rate": 5.601410502222025e-06, + "loss": 1.8097, + "step": 17929500 + }, + { + "epoch": 88.83, + "learning_rate": 5.60017191579594e-06, + "loss": 1.8264, + "step": 17930000 + }, + { + "epoch": 88.83, + "learning_rate": 5.598933329369857e-06, + "loss": 1.8227, + "step": 17930500 + }, + { + "epoch": 88.84, + "learning_rate": 5.597694742943774e-06, + "loss": 1.8296, + "step": 17931000 + }, + { + "epoch": 88.84, + "learning_rate": 5.59645615651769e-06, + "loss": 1.8123, + "step": 17931500 + }, + { + "epoch": 88.84, + "learning_rate": 5.595217570091606e-06, + "loss": 1.8002, + "step": 17932000 + }, + { + "epoch": 88.84, + "learning_rate": 5.593981460838375e-06, + "loss": 1.8257, + "step": 17932500 + }, + { + "epoch": 88.85, + "learning_rate": 5.592742874412291e-06, + "loss": 1.8286, + "step": 17933000 + }, + { + "epoch": 88.85, + "learning_rate": 5.59150676515906e-06, + "loss": 1.8004, + "step": 17933500 + }, + { + "epoch": 88.85, + "learning_rate": 5.590268178732977e-06, + "loss": 1.8226, + "step": 17934000 + }, + { + "epoch": 88.85, + "learning_rate": 5.589029592306892e-06, + "loss": 1.8164, + "step": 17934500 + }, + { + "epoch": 88.86, + "learning_rate": 5.587791005880809e-06, + "loss": 1.8215, + "step": 17935000 + }, + { + "epoch": 88.86, + "learning_rate": 5.586552419454725e-06, + "loss": 1.8002, + "step": 17935500 + }, + { + "epoch": 88.86, + "learning_rate": 5.585313833028642e-06, + "loss": 1.8168, + "step": 17936000 + }, + { + "epoch": 88.86, + "learning_rate": 5.584075246602558e-06, + "loss": 1.8282, + "step": 17936500 + }, + { + "epoch": 88.87, + "learning_rate": 5.582836660176474e-06, + "loss": 1.8026, + "step": 17937000 + }, + { + "epoch": 88.87, + "learning_rate": 5.58159807375039e-06, + "loss": 1.8029, + "step": 17937500 + }, + { + "epoch": 88.87, + "learning_rate": 5.580359487324307e-06, + "loss": 1.8254, + "step": 17938000 + }, + { + "epoch": 88.87, + "learning_rate": 5.579120900898223e-06, + "loss": 1.8365, + "step": 17938500 + }, + { + "epoch": 88.88, + "learning_rate": 5.577882314472139e-06, + "loss": 1.7956, + "step": 17939000 + }, + { + "epoch": 88.88, + "learning_rate": 5.576646205218908e-06, + "loss": 1.8047, + "step": 17939500 + }, + { + "epoch": 88.88, + "learning_rate": 5.575407618792825e-06, + "loss": 1.823, + "step": 17940000 + }, + { + "epoch": 88.88, + "learning_rate": 5.57416903236674e-06, + "loss": 1.8365, + "step": 17940500 + }, + { + "epoch": 88.89, + "learning_rate": 5.57293292311351e-06, + "loss": 1.8356, + "step": 17941000 + }, + { + "epoch": 88.89, + "learning_rate": 5.571694336687425e-06, + "loss": 1.8298, + "step": 17941500 + }, + { + "epoch": 88.89, + "learning_rate": 5.570458227434195e-06, + "loss": 1.835, + "step": 17942000 + }, + { + "epoch": 88.89, + "learning_rate": 5.569219641008111e-06, + "loss": 1.8171, + "step": 17942500 + }, + { + "epoch": 88.9, + "learning_rate": 5.567981054582027e-06, + "loss": 1.81, + "step": 17943000 + }, + { + "epoch": 88.9, + "learning_rate": 5.566742468155943e-06, + "loss": 1.8299, + "step": 17943500 + }, + { + "epoch": 88.9, + "learning_rate": 5.56550388172986e-06, + "loss": 1.8144, + "step": 17944000 + }, + { + "epoch": 88.9, + "learning_rate": 5.564265295303776e-06, + "loss": 1.8233, + "step": 17944500 + }, + { + "epoch": 88.91, + "learning_rate": 5.563026708877692e-06, + "loss": 1.8114, + "step": 17945000 + }, + { + "epoch": 88.91, + "learning_rate": 5.561788122451609e-06, + "loss": 1.8293, + "step": 17945500 + }, + { + "epoch": 88.91, + "learning_rate": 5.560549536025525e-06, + "loss": 1.8131, + "step": 17946000 + }, + { + "epoch": 88.91, + "learning_rate": 5.559310949599442e-06, + "loss": 1.8294, + "step": 17946500 + }, + { + "epoch": 88.92, + "learning_rate": 5.558072363173358e-06, + "loss": 1.82, + "step": 17947000 + }, + { + "epoch": 88.92, + "learning_rate": 5.556833776747274e-06, + "loss": 1.8273, + "step": 17947500 + }, + { + "epoch": 88.92, + "learning_rate": 5.55559519032119e-06, + "loss": 1.82, + "step": 17948000 + }, + { + "epoch": 88.92, + "learning_rate": 5.554356603895107e-06, + "loss": 1.7961, + "step": 17948500 + }, + { + "epoch": 88.93, + "learning_rate": 5.553120494641875e-06, + "loss": 1.7996, + "step": 17949000 + }, + { + "epoch": 88.93, + "learning_rate": 5.551881908215792e-06, + "loss": 1.7929, + "step": 17949500 + }, + { + "epoch": 88.93, + "learning_rate": 5.550643321789708e-06, + "loss": 1.7922, + "step": 17950000 + }, + { + "epoch": 88.93, + "learning_rate": 5.549407212536477e-06, + "loss": 1.8064, + "step": 17950500 + }, + { + "epoch": 88.94, + "learning_rate": 5.548171103283245e-06, + "loss": 1.8032, + "step": 17951000 + }, + { + "epoch": 88.94, + "learning_rate": 5.546932516857162e-06, + "loss": 1.8438, + "step": 17951500 + }, + { + "epoch": 88.94, + "learning_rate": 5.545693930431078e-06, + "loss": 1.831, + "step": 17952000 + }, + { + "epoch": 88.94, + "learning_rate": 5.544455344004995e-06, + "loss": 1.8177, + "step": 17952500 + }, + { + "epoch": 88.95, + "learning_rate": 5.54321675757891e-06, + "loss": 1.8069, + "step": 17953000 + }, + { + "epoch": 88.95, + "learning_rate": 5.541978171152827e-06, + "loss": 1.8063, + "step": 17953500 + }, + { + "epoch": 88.95, + "learning_rate": 5.540739584726743e-06, + "loss": 1.7888, + "step": 17954000 + }, + { + "epoch": 88.95, + "learning_rate": 5.53950099830066e-06, + "loss": 1.796, + "step": 17954500 + }, + { + "epoch": 88.96, + "learning_rate": 5.538262411874576e-06, + "loss": 1.8277, + "step": 17955000 + }, + { + "epoch": 88.96, + "learning_rate": 5.537026302621345e-06, + "loss": 1.8307, + "step": 17955500 + }, + { + "epoch": 88.96, + "learning_rate": 5.535787716195261e-06, + "loss": 1.7994, + "step": 17956000 + }, + { + "epoch": 88.96, + "learning_rate": 5.534549129769177e-06, + "loss": 1.8331, + "step": 17956500 + }, + { + "epoch": 88.97, + "learning_rate": 5.533310543343093e-06, + "loss": 1.8306, + "step": 17957000 + }, + { + "epoch": 88.97, + "learning_rate": 5.53207195691701e-06, + "loss": 1.8195, + "step": 17957500 + }, + { + "epoch": 88.97, + "learning_rate": 5.530833370490927e-06, + "loss": 1.8226, + "step": 17958000 + }, + { + "epoch": 88.97, + "learning_rate": 5.529597261237695e-06, + "loss": 1.8326, + "step": 17958500 + }, + { + "epoch": 88.98, + "learning_rate": 5.528361151984463e-06, + "loss": 1.8098, + "step": 17959000 + }, + { + "epoch": 88.98, + "learning_rate": 5.52712256555838e-06, + "loss": 1.8395, + "step": 17959500 + }, + { + "epoch": 88.98, + "learning_rate": 5.525883979132296e-06, + "loss": 1.8268, + "step": 17960000 + }, + { + "epoch": 88.98, + "learning_rate": 5.524645392706213e-06, + "loss": 1.8269, + "step": 17960500 + }, + { + "epoch": 88.99, + "learning_rate": 5.523409283452981e-06, + "loss": 1.8308, + "step": 17961000 + }, + { + "epoch": 88.99, + "learning_rate": 5.522170697026898e-06, + "loss": 1.8203, + "step": 17961500 + }, + { + "epoch": 88.99, + "learning_rate": 5.520932110600814e-06, + "loss": 1.8335, + "step": 17962000 + }, + { + "epoch": 88.99, + "learning_rate": 5.51969352417473e-06, + "loss": 1.8158, + "step": 17962500 + }, + { + "epoch": 88.99, + "learning_rate": 5.518457414921499e-06, + "loss": 1.8211, + "step": 17963000 + }, + { + "epoch": 89.0, + "learning_rate": 5.517218828495415e-06, + "loss": 1.8075, + "step": 17963500 + }, + { + "epoch": 89.0, + "learning_rate": 5.515980242069332e-06, + "loss": 1.8106, + "step": 17964000 + }, + { + "epoch": 89.0, + "eval_accuracy": 0.6833987428476566, + "eval_accuracy_mlm": 0.6444478624519863, + "eval_accuracy_nsp": 0.8671511890147043, + "eval_loss": 2.3323161602020264, + "eval_runtime": 146.6947, + "eval_samples_per_second": 1738.024, + "eval_steps_per_second": 72.423, + "step": 17964027 + }, + { + "epoch": 89.0, + "learning_rate": 5.514741655643248e-06, + "loss": 1.8245, + "step": 17964500 + }, + { + "epoch": 89.0, + "learning_rate": 5.513503069217165e-06, + "loss": 1.7993, + "step": 17965000 + }, + { + "epoch": 89.01, + "learning_rate": 5.512264482791081e-06, + "loss": 1.8223, + "step": 17965500 + }, + { + "epoch": 89.01, + "learning_rate": 5.511025896364997e-06, + "loss": 1.8125, + "step": 17966000 + }, + { + "epoch": 89.01, + "learning_rate": 5.509787309938913e-06, + "loss": 1.7958, + "step": 17966500 + }, + { + "epoch": 89.01, + "learning_rate": 5.50854872351283e-06, + "loss": 1.8266, + "step": 17967000 + }, + { + "epoch": 89.02, + "learning_rate": 5.507312614259598e-06, + "loss": 1.8546, + "step": 17967500 + }, + { + "epoch": 89.02, + "learning_rate": 5.506074027833515e-06, + "loss": 1.7994, + "step": 17968000 + }, + { + "epoch": 89.02, + "learning_rate": 5.504835441407431e-06, + "loss": 1.8112, + "step": 17968500 + }, + { + "epoch": 89.02, + "learning_rate": 5.503596854981347e-06, + "loss": 1.803, + "step": 17969000 + }, + { + "epoch": 89.03, + "learning_rate": 5.502358268555263e-06, + "loss": 1.8168, + "step": 17969500 + }, + { + "epoch": 89.03, + "learning_rate": 5.50111968212918e-06, + "loss": 1.8201, + "step": 17970000 + }, + { + "epoch": 89.03, + "learning_rate": 5.499881095703096e-06, + "loss": 1.8175, + "step": 17970500 + }, + { + "epoch": 89.03, + "learning_rate": 5.498642509277013e-06, + "loss": 1.8287, + "step": 17971000 + }, + { + "epoch": 89.04, + "learning_rate": 5.497403922850929e-06, + "loss": 1.8023, + "step": 17971500 + }, + { + "epoch": 89.04, + "learning_rate": 5.496165336424845e-06, + "loss": 1.8028, + "step": 17972000 + }, + { + "epoch": 89.04, + "learning_rate": 5.494926749998762e-06, + "loss": 1.7985, + "step": 17972500 + }, + { + "epoch": 89.04, + "learning_rate": 5.493688163572678e-06, + "loss": 1.7928, + "step": 17973000 + }, + { + "epoch": 89.05, + "learning_rate": 5.492452054319447e-06, + "loss": 1.8109, + "step": 17973500 + }, + { + "epoch": 89.05, + "learning_rate": 5.491213467893363e-06, + "loss": 1.8015, + "step": 17974000 + }, + { + "epoch": 89.05, + "learning_rate": 5.489977358640132e-06, + "loss": 1.809, + "step": 17974500 + }, + { + "epoch": 89.05, + "learning_rate": 5.488738772214048e-06, + "loss": 1.8124, + "step": 17975000 + }, + { + "epoch": 89.06, + "learning_rate": 5.487500185787965e-06, + "loss": 1.7905, + "step": 17975500 + }, + { + "epoch": 89.06, + "learning_rate": 5.48626159936188e-06, + "loss": 1.8287, + "step": 17976000 + }, + { + "epoch": 89.06, + "learning_rate": 5.485023012935797e-06, + "loss": 1.8064, + "step": 17976500 + }, + { + "epoch": 89.06, + "learning_rate": 5.483784426509713e-06, + "loss": 1.8166, + "step": 17977000 + }, + { + "epoch": 89.07, + "learning_rate": 5.48254584008363e-06, + "loss": 1.8049, + "step": 17977500 + }, + { + "epoch": 89.07, + "learning_rate": 5.481307253657546e-06, + "loss": 1.8222, + "step": 17978000 + }, + { + "epoch": 89.07, + "learning_rate": 5.480071144404315e-06, + "loss": 1.8035, + "step": 17978500 + }, + { + "epoch": 89.07, + "learning_rate": 5.478832557978231e-06, + "loss": 1.773, + "step": 17979000 + }, + { + "epoch": 89.08, + "learning_rate": 5.477593971552147e-06, + "loss": 1.8046, + "step": 17979500 + }, + { + "epoch": 89.08, + "learning_rate": 5.476355385126063e-06, + "loss": 1.7921, + "step": 17980000 + }, + { + "epoch": 89.08, + "learning_rate": 5.47511679869998e-06, + "loss": 1.8248, + "step": 17980500 + }, + { + "epoch": 89.08, + "learning_rate": 5.473878212273896e-06, + "loss": 1.7877, + "step": 17981000 + }, + { + "epoch": 89.09, + "learning_rate": 5.472642103020665e-06, + "loss": 1.7873, + "step": 17981500 + }, + { + "epoch": 89.09, + "learning_rate": 5.471405993767433e-06, + "loss": 1.7818, + "step": 17982000 + }, + { + "epoch": 89.09, + "learning_rate": 5.47016740734135e-06, + "loss": 1.8096, + "step": 17982500 + }, + { + "epoch": 89.09, + "learning_rate": 5.468928820915266e-06, + "loss": 1.8239, + "step": 17983000 + }, + { + "epoch": 89.1, + "learning_rate": 5.467690234489183e-06, + "loss": 1.7993, + "step": 17983500 + }, + { + "epoch": 89.1, + "learning_rate": 5.466451648063099e-06, + "loss": 1.8526, + "step": 17984000 + }, + { + "epoch": 89.1, + "learning_rate": 5.465213061637015e-06, + "loss": 1.8042, + "step": 17984500 + }, + { + "epoch": 89.1, + "learning_rate": 5.463974475210931e-06, + "loss": 1.793, + "step": 17985000 + }, + { + "epoch": 89.11, + "learning_rate": 5.462735888784848e-06, + "loss": 1.8012, + "step": 17985500 + }, + { + "epoch": 89.11, + "learning_rate": 5.461499779531616e-06, + "loss": 1.827, + "step": 17986000 + }, + { + "epoch": 89.11, + "learning_rate": 5.460261193105533e-06, + "loss": 1.8235, + "step": 17986500 + }, + { + "epoch": 89.11, + "learning_rate": 5.45902260667945e-06, + "loss": 1.8038, + "step": 17987000 + }, + { + "epoch": 89.12, + "learning_rate": 5.457786497426218e-06, + "loss": 1.8021, + "step": 17987500 + }, + { + "epoch": 89.12, + "learning_rate": 5.456547911000134e-06, + "loss": 1.8013, + "step": 17988000 + }, + { + "epoch": 89.12, + "learning_rate": 5.455309324574051e-06, + "loss": 1.8173, + "step": 17988500 + }, + { + "epoch": 89.12, + "learning_rate": 5.454070738147967e-06, + "loss": 1.8273, + "step": 17989000 + }, + { + "epoch": 89.13, + "learning_rate": 5.452832151721883e-06, + "loss": 1.8044, + "step": 17989500 + }, + { + "epoch": 89.13, + "learning_rate": 5.451596042468651e-06, + "loss": 1.8368, + "step": 17990000 + }, + { + "epoch": 89.13, + "learning_rate": 5.450357456042568e-06, + "loss": 1.8185, + "step": 17990500 + }, + { + "epoch": 89.13, + "learning_rate": 5.449118869616485e-06, + "loss": 1.8049, + "step": 17991000 + }, + { + "epoch": 89.14, + "learning_rate": 5.447880283190401e-06, + "loss": 1.8026, + "step": 17991500 + }, + { + "epoch": 89.14, + "learning_rate": 5.44664417393717e-06, + "loss": 1.8075, + "step": 17992000 + }, + { + "epoch": 89.14, + "learning_rate": 5.445408064683938e-06, + "loss": 1.7972, + "step": 17992500 + }, + { + "epoch": 89.14, + "learning_rate": 5.444169478257854e-06, + "loss": 1.8263, + "step": 17993000 + }, + { + "epoch": 89.15, + "learning_rate": 5.442930891831771e-06, + "loss": 1.8027, + "step": 17993500 + }, + { + "epoch": 89.15, + "learning_rate": 5.4416923054056876e-06, + "loss": 1.8106, + "step": 17994000 + }, + { + "epoch": 89.15, + "learning_rate": 5.440453718979603e-06, + "loss": 1.812, + "step": 17994500 + }, + { + "epoch": 89.15, + "learning_rate": 5.4392176097263724e-06, + "loss": 1.7973, + "step": 17995000 + }, + { + "epoch": 89.16, + "learning_rate": 5.437979023300288e-06, + "loss": 1.7963, + "step": 17995500 + }, + { + "epoch": 89.16, + "learning_rate": 5.436740436874205e-06, + "loss": 1.8057, + "step": 17996000 + }, + { + "epoch": 89.16, + "learning_rate": 5.435501850448121e-06, + "loss": 1.7706, + "step": 17996500 + }, + { + "epoch": 89.16, + "learning_rate": 5.434263264022038e-06, + "loss": 1.8192, + "step": 17997000 + }, + { + "epoch": 89.17, + "learning_rate": 5.433024677595954e-06, + "loss": 1.7985, + "step": 17997500 + }, + { + "epoch": 89.17, + "learning_rate": 5.43178609116987e-06, + "loss": 1.8003, + "step": 17998000 + }, + { + "epoch": 89.17, + "learning_rate": 5.430547504743786e-06, + "loss": 1.8373, + "step": 17998500 + }, + { + "epoch": 89.17, + "learning_rate": 5.429308918317703e-06, + "loss": 1.8082, + "step": 17999000 + }, + { + "epoch": 89.18, + "learning_rate": 5.428070331891619e-06, + "loss": 1.8125, + "step": 17999500 + }, + { + "epoch": 89.18, + "learning_rate": 5.426831745465536e-06, + "loss": 1.8137, + "step": 18000000 + }, + { + "epoch": 89.18, + "learning_rate": 5.425595636212304e-06, + "loss": 1.8204, + "step": 18000500 + }, + { + "epoch": 89.18, + "learning_rate": 5.424357049786221e-06, + "loss": 1.801, + "step": 18001000 + }, + { + "epoch": 89.19, + "learning_rate": 5.423118463360136e-06, + "loss": 1.7888, + "step": 18001500 + }, + { + "epoch": 89.19, + "learning_rate": 5.421879876934053e-06, + "loss": 1.8001, + "step": 18002000 + }, + { + "epoch": 89.19, + "learning_rate": 5.420643767680822e-06, + "loss": 1.803, + "step": 18002500 + }, + { + "epoch": 89.19, + "learning_rate": 5.419405181254738e-06, + "loss": 1.7915, + "step": 18003000 + }, + { + "epoch": 89.2, + "learning_rate": 5.418166594828654e-06, + "loss": 1.8105, + "step": 18003500 + }, + { + "epoch": 89.2, + "learning_rate": 5.416928008402571e-06, + "loss": 1.8207, + "step": 18004000 + }, + { + "epoch": 89.2, + "learning_rate": 5.415689421976488e-06, + "loss": 1.7934, + "step": 18004500 + }, + { + "epoch": 89.2, + "learning_rate": 5.414453312723256e-06, + "loss": 1.8087, + "step": 18005000 + }, + { + "epoch": 89.21, + "learning_rate": 5.413217203470024e-06, + "loss": 1.7825, + "step": 18005500 + }, + { + "epoch": 89.21, + "learning_rate": 5.4119786170439405e-06, + "loss": 1.8248, + "step": 18006000 + }, + { + "epoch": 89.21, + "learning_rate": 5.410740030617857e-06, + "loss": 1.8034, + "step": 18006500 + }, + { + "epoch": 89.21, + "learning_rate": 5.409501444191773e-06, + "loss": 1.8326, + "step": 18007000 + }, + { + "epoch": 89.22, + "learning_rate": 5.40826285776569e-06, + "loss": 1.8243, + "step": 18007500 + }, + { + "epoch": 89.22, + "learning_rate": 5.407024271339606e-06, + "loss": 1.8109, + "step": 18008000 + }, + { + "epoch": 89.22, + "learning_rate": 5.405785684913523e-06, + "loss": 1.8171, + "step": 18008500 + }, + { + "epoch": 89.22, + "learning_rate": 5.404547098487439e-06, + "loss": 1.8019, + "step": 18009000 + }, + { + "epoch": 89.23, + "learning_rate": 5.403308512061355e-06, + "loss": 1.8035, + "step": 18009500 + }, + { + "epoch": 89.23, + "learning_rate": 5.402072402808124e-06, + "loss": 1.8187, + "step": 18010000 + }, + { + "epoch": 89.23, + "learning_rate": 5.40083381638204e-06, + "loss": 1.8079, + "step": 18010500 + }, + { + "epoch": 89.23, + "learning_rate": 5.399595229955956e-06, + "loss": 1.8148, + "step": 18011000 + }, + { + "epoch": 89.24, + "learning_rate": 5.398356643529873e-06, + "loss": 1.8101, + "step": 18011500 + }, + { + "epoch": 89.24, + "learning_rate": 5.397118057103789e-06, + "loss": 1.8007, + "step": 18012000 + }, + { + "epoch": 89.24, + "learning_rate": 5.395879470677706e-06, + "loss": 1.8155, + "step": 18012500 + }, + { + "epoch": 89.24, + "learning_rate": 5.394640884251621e-06, + "loss": 1.7936, + "step": 18013000 + }, + { + "epoch": 89.25, + "learning_rate": 5.393404774998391e-06, + "loss": 1.801, + "step": 18013500 + }, + { + "epoch": 89.25, + "learning_rate": 5.392166188572307e-06, + "loss": 1.8237, + "step": 18014000 + }, + { + "epoch": 89.25, + "learning_rate": 5.390927602146223e-06, + "loss": 1.8043, + "step": 18014500 + }, + { + "epoch": 89.25, + "learning_rate": 5.3896914928929916e-06, + "loss": 1.8067, + "step": 18015000 + }, + { + "epoch": 89.26, + "learning_rate": 5.388452906466908e-06, + "loss": 1.8267, + "step": 18015500 + }, + { + "epoch": 89.26, + "learning_rate": 5.387214320040824e-06, + "loss": 1.7945, + "step": 18016000 + }, + { + "epoch": 89.26, + "learning_rate": 5.385975733614741e-06, + "loss": 1.8214, + "step": 18016500 + }, + { + "epoch": 89.26, + "learning_rate": 5.384737147188657e-06, + "loss": 1.8207, + "step": 18017000 + }, + { + "epoch": 89.26, + "learning_rate": 5.3835010379354255e-06, + "loss": 1.8205, + "step": 18017500 + }, + { + "epoch": 89.27, + "learning_rate": 5.382262451509342e-06, + "loss": 1.8253, + "step": 18018000 + }, + { + "epoch": 89.27, + "learning_rate": 5.381023865083258e-06, + "loss": 1.7856, + "step": 18018500 + }, + { + "epoch": 89.27, + "learning_rate": 5.379785278657174e-06, + "loss": 1.7876, + "step": 18019000 + }, + { + "epoch": 89.27, + "learning_rate": 5.378546692231091e-06, + "loss": 1.8259, + "step": 18019500 + }, + { + "epoch": 89.28, + "learning_rate": 5.377308105805008e-06, + "loss": 1.8029, + "step": 18020000 + }, + { + "epoch": 89.28, + "learning_rate": 5.376069519378924e-06, + "loss": 1.825, + "step": 18020500 + }, + { + "epoch": 89.28, + "learning_rate": 5.37483093295284e-06, + "loss": 1.7981, + "step": 18021000 + }, + { + "epoch": 89.28, + "learning_rate": 5.373594823699609e-06, + "loss": 1.817, + "step": 18021500 + }, + { + "epoch": 89.29, + "learning_rate": 5.372356237273525e-06, + "loss": 1.8175, + "step": 18022000 + }, + { + "epoch": 89.29, + "learning_rate": 5.371117650847441e-06, + "loss": 1.7963, + "step": 18022500 + }, + { + "epoch": 89.29, + "learning_rate": 5.369879064421358e-06, + "loss": 1.8203, + "step": 18023000 + }, + { + "epoch": 89.29, + "learning_rate": 5.368640477995274e-06, + "loss": 1.8112, + "step": 18023500 + }, + { + "epoch": 89.3, + "learning_rate": 5.367401891569191e-06, + "loss": 1.8215, + "step": 18024000 + }, + { + "epoch": 89.3, + "learning_rate": 5.366163305143106e-06, + "loss": 1.799, + "step": 18024500 + }, + { + "epoch": 89.3, + "learning_rate": 5.364924718717023e-06, + "loss": 1.801, + "step": 18025000 + }, + { + "epoch": 89.3, + "learning_rate": 5.363686132290939e-06, + "loss": 1.815, + "step": 18025500 + }, + { + "epoch": 89.31, + "learning_rate": 5.362447545864856e-06, + "loss": 1.8114, + "step": 18026000 + }, + { + "epoch": 89.31, + "learning_rate": 5.361208959438772e-06, + "loss": 1.8236, + "step": 18026500 + }, + { + "epoch": 89.31, + "learning_rate": 5.359970373012688e-06, + "loss": 1.8041, + "step": 18027000 + }, + { + "epoch": 89.31, + "learning_rate": 5.358734263759457e-06, + "loss": 1.8217, + "step": 18027500 + }, + { + "epoch": 89.32, + "learning_rate": 5.357498154506226e-06, + "loss": 1.81, + "step": 18028000 + }, + { + "epoch": 89.32, + "learning_rate": 5.356259568080142e-06, + "loss": 1.8129, + "step": 18028500 + }, + { + "epoch": 89.32, + "learning_rate": 5.355020981654058e-06, + "loss": 1.7922, + "step": 18029000 + }, + { + "epoch": 89.32, + "learning_rate": 5.353784872400827e-06, + "loss": 1.8033, + "step": 18029500 + }, + { + "epoch": 89.33, + "learning_rate": 5.352546285974743e-06, + "loss": 1.8144, + "step": 18030000 + }, + { + "epoch": 89.33, + "learning_rate": 5.351307699548659e-06, + "loss": 1.8074, + "step": 18030500 + }, + { + "epoch": 89.33, + "learning_rate": 5.350069113122576e-06, + "loss": 1.781, + "step": 18031000 + }, + { + "epoch": 89.33, + "learning_rate": 5.348830526696492e-06, + "loss": 1.8039, + "step": 18031500 + }, + { + "epoch": 89.34, + "learning_rate": 5.347591940270409e-06, + "loss": 1.8151, + "step": 18032000 + }, + { + "epoch": 89.34, + "learning_rate": 5.346353353844325e-06, + "loss": 1.8087, + "step": 18032500 + }, + { + "epoch": 89.34, + "learning_rate": 5.345114767418241e-06, + "loss": 1.8328, + "step": 18033000 + }, + { + "epoch": 89.34, + "learning_rate": 5.343876180992158e-06, + "loss": 1.829, + "step": 18033500 + }, + { + "epoch": 89.35, + "learning_rate": 5.342637594566074e-06, + "loss": 1.8044, + "step": 18034000 + }, + { + "epoch": 89.35, + "learning_rate": 5.341399008139991e-06, + "loss": 1.8232, + "step": 18034500 + }, + { + "epoch": 89.35, + "learning_rate": 5.340160421713906e-06, + "loss": 1.8102, + "step": 18035000 + }, + { + "epoch": 89.35, + "learning_rate": 5.338924312460676e-06, + "loss": 1.8294, + "step": 18035500 + }, + { + "epoch": 89.36, + "learning_rate": 5.337685726034591e-06, + "loss": 1.8195, + "step": 18036000 + }, + { + "epoch": 89.36, + "learning_rate": 5.336447139608508e-06, + "loss": 1.8065, + "step": 18036500 + }, + { + "epoch": 89.36, + "learning_rate": 5.335208553182424e-06, + "loss": 1.8318, + "step": 18037000 + }, + { + "epoch": 89.36, + "learning_rate": 5.333969966756341e-06, + "loss": 1.815, + "step": 18037500 + }, + { + "epoch": 89.37, + "learning_rate": 5.332731380330257e-06, + "loss": 1.8273, + "step": 18038000 + }, + { + "epoch": 89.37, + "learning_rate": 5.331492793904173e-06, + "loss": 1.7992, + "step": 18038500 + }, + { + "epoch": 89.37, + "learning_rate": 5.330254207478089e-06, + "loss": 1.8247, + "step": 18039000 + }, + { + "epoch": 89.37, + "learning_rate": 5.329018098224858e-06, + "loss": 1.7998, + "step": 18039500 + }, + { + "epoch": 89.38, + "learning_rate": 5.327779511798774e-06, + "loss": 1.7997, + "step": 18040000 + }, + { + "epoch": 89.38, + "learning_rate": 5.326540925372691e-06, + "loss": 1.7927, + "step": 18040500 + }, + { + "epoch": 89.38, + "learning_rate": 5.325302338946607e-06, + "loss": 1.8124, + "step": 18041000 + }, + { + "epoch": 89.38, + "learning_rate": 5.324063752520524e-06, + "loss": 1.8172, + "step": 18041500 + }, + { + "epoch": 89.39, + "learning_rate": 5.32282516609444e-06, + "loss": 1.8165, + "step": 18042000 + }, + { + "epoch": 89.39, + "learning_rate": 5.321586579668356e-06, + "loss": 1.8281, + "step": 18042500 + }, + { + "epoch": 89.39, + "learning_rate": 5.320347993242273e-06, + "loss": 1.8257, + "step": 18043000 + }, + { + "epoch": 89.39, + "learning_rate": 5.319114361161894e-06, + "loss": 1.8077, + "step": 18043500 + }, + { + "epoch": 89.4, + "learning_rate": 5.317878251908662e-06, + "loss": 1.8472, + "step": 18044000 + }, + { + "epoch": 89.4, + "learning_rate": 5.3166421426554305e-06, + "loss": 1.796, + "step": 18044500 + }, + { + "epoch": 89.4, + "learning_rate": 5.315403556229347e-06, + "loss": 1.8291, + "step": 18045000 + }, + { + "epoch": 89.4, + "learning_rate": 5.3141649698032635e-06, + "loss": 1.8103, + "step": 18045500 + }, + { + "epoch": 89.41, + "learning_rate": 5.31292638337718e-06, + "loss": 1.8115, + "step": 18046000 + }, + { + "epoch": 89.41, + "learning_rate": 5.311687796951096e-06, + "loss": 1.807, + "step": 18046500 + }, + { + "epoch": 89.41, + "learning_rate": 5.310449210525012e-06, + "loss": 1.7946, + "step": 18047000 + }, + { + "epoch": 89.41, + "learning_rate": 5.309210624098929e-06, + "loss": 1.8362, + "step": 18047500 + }, + { + "epoch": 89.42, + "learning_rate": 5.307972037672845e-06, + "loss": 1.8169, + "step": 18048000 + }, + { + "epoch": 89.42, + "learning_rate": 5.306733451246762e-06, + "loss": 1.8046, + "step": 18048500 + }, + { + "epoch": 89.42, + "learning_rate": 5.305494864820678e-06, + "loss": 1.825, + "step": 18049000 + }, + { + "epoch": 89.42, + "learning_rate": 5.304256278394594e-06, + "loss": 1.8518, + "step": 18049500 + }, + { + "epoch": 89.43, + "learning_rate": 5.303017691968511e-06, + "loss": 1.823, + "step": 18050000 + }, + { + "epoch": 89.43, + "learning_rate": 5.301779105542427e-06, + "loss": 1.802, + "step": 18050500 + }, + { + "epoch": 89.43, + "learning_rate": 5.300540519116343e-06, + "loss": 1.8022, + "step": 18051000 + }, + { + "epoch": 89.43, + "learning_rate": 5.299304409863112e-06, + "loss": 1.8095, + "step": 18051500 + }, + { + "epoch": 89.44, + "learning_rate": 5.298065823437028e-06, + "loss": 1.8109, + "step": 18052000 + }, + { + "epoch": 89.44, + "learning_rate": 5.296827237010944e-06, + "loss": 1.8196, + "step": 18052500 + }, + { + "epoch": 89.44, + "learning_rate": 5.295588650584861e-06, + "loss": 1.8031, + "step": 18053000 + }, + { + "epoch": 89.44, + "learning_rate": 5.294350064158777e-06, + "loss": 1.8051, + "step": 18053500 + }, + { + "epoch": 89.45, + "learning_rate": 5.293113954905546e-06, + "loss": 1.8126, + "step": 18054000 + }, + { + "epoch": 89.45, + "learning_rate": 5.291875368479462e-06, + "loss": 1.8396, + "step": 18054500 + }, + { + "epoch": 89.45, + "learning_rate": 5.290636782053379e-06, + "loss": 1.8312, + "step": 18055000 + }, + { + "epoch": 89.45, + "learning_rate": 5.289398195627295e-06, + "loss": 1.7971, + "step": 18055500 + }, + { + "epoch": 89.46, + "learning_rate": 5.288159609201211e-06, + "loss": 1.8041, + "step": 18056000 + }, + { + "epoch": 89.46, + "learning_rate": 5.286921022775127e-06, + "loss": 1.8021, + "step": 18056500 + }, + { + "epoch": 89.46, + "learning_rate": 5.285684913521896e-06, + "loss": 1.7924, + "step": 18057000 + }, + { + "epoch": 89.46, + "learning_rate": 5.284446327095812e-06, + "loss": 1.8341, + "step": 18057500 + }, + { + "epoch": 89.47, + "learning_rate": 5.2832126950154334e-06, + "loss": 1.8276, + "step": 18058000 + }, + { + "epoch": 89.47, + "learning_rate": 5.2819741085893495e-06, + "loss": 1.8108, + "step": 18058500 + }, + { + "epoch": 89.47, + "learning_rate": 5.280735522163266e-06, + "loss": 1.8266, + "step": 18059000 + }, + { + "epoch": 89.47, + "learning_rate": 5.279496935737182e-06, + "loss": 1.8282, + "step": 18059500 + }, + { + "epoch": 89.48, + "learning_rate": 5.278258349311099e-06, + "loss": 1.8266, + "step": 18060000 + }, + { + "epoch": 89.48, + "learning_rate": 5.277019762885015e-06, + "loss": 1.8082, + "step": 18060500 + }, + { + "epoch": 89.48, + "learning_rate": 5.2757836536317835e-06, + "loss": 1.8205, + "step": 18061000 + }, + { + "epoch": 89.48, + "learning_rate": 5.2745450672057e-06, + "loss": 1.7907, + "step": 18061500 + }, + { + "epoch": 89.49, + "learning_rate": 5.2733064807796165e-06, + "loss": 1.8052, + "step": 18062000 + }, + { + "epoch": 89.49, + "learning_rate": 5.272067894353533e-06, + "loss": 1.8218, + "step": 18062500 + }, + { + "epoch": 89.49, + "learning_rate": 5.270829307927449e-06, + "loss": 1.8156, + "step": 18063000 + }, + { + "epoch": 89.49, + "learning_rate": 5.269590721501365e-06, + "loss": 1.8041, + "step": 18063500 + }, + { + "epoch": 89.5, + "learning_rate": 5.268352135075282e-06, + "loss": 1.8251, + "step": 18064000 + }, + { + "epoch": 89.5, + "learning_rate": 5.267113548649199e-06, + "loss": 1.8088, + "step": 18064500 + }, + { + "epoch": 89.5, + "learning_rate": 5.265874962223114e-06, + "loss": 1.8342, + "step": 18065000 + }, + { + "epoch": 89.5, + "learning_rate": 5.2646388529698835e-06, + "loss": 1.8044, + "step": 18065500 + }, + { + "epoch": 89.51, + "learning_rate": 5.2634027437166515e-06, + "loss": 1.8115, + "step": 18066000 + }, + { + "epoch": 89.51, + "learning_rate": 5.2621641572905675e-06, + "loss": 1.8142, + "step": 18066500 + }, + { + "epoch": 89.51, + "learning_rate": 5.260925570864484e-06, + "loss": 1.8168, + "step": 18067000 + }, + { + "epoch": 89.51, + "learning_rate": 5.2596869844384006e-06, + "loss": 1.8283, + "step": 18067500 + }, + { + "epoch": 89.52, + "learning_rate": 5.258450875185169e-06, + "loss": 1.8167, + "step": 18068000 + }, + { + "epoch": 89.52, + "learning_rate": 5.257214765931937e-06, + "loss": 1.7899, + "step": 18068500 + }, + { + "epoch": 89.52, + "learning_rate": 5.255976179505854e-06, + "loss": 1.8392, + "step": 18069000 + }, + { + "epoch": 89.52, + "learning_rate": 5.2547375930797695e-06, + "loss": 1.8105, + "step": 18069500 + }, + { + "epoch": 89.53, + "learning_rate": 5.253499006653686e-06, + "loss": 1.8082, + "step": 18070000 + }, + { + "epoch": 89.53, + "learning_rate": 5.252260420227603e-06, + "loss": 1.7921, + "step": 18070500 + }, + { + "epoch": 89.53, + "learning_rate": 5.2510218338015194e-06, + "loss": 1.8176, + "step": 18071000 + }, + { + "epoch": 89.53, + "learning_rate": 5.2497832473754355e-06, + "loss": 1.7919, + "step": 18071500 + }, + { + "epoch": 89.53, + "learning_rate": 5.248544660949352e-06, + "loss": 1.7974, + "step": 18072000 + }, + { + "epoch": 89.54, + "learning_rate": 5.2473060745232685e-06, + "loss": 1.8044, + "step": 18072500 + }, + { + "epoch": 89.54, + "learning_rate": 5.246067488097185e-06, + "loss": 1.8303, + "step": 18073000 + }, + { + "epoch": 89.54, + "learning_rate": 5.2448289016711015e-06, + "loss": 1.8089, + "step": 18073500 + }, + { + "epoch": 89.54, + "learning_rate": 5.243590315245018e-06, + "loss": 1.8066, + "step": 18074000 + }, + { + "epoch": 89.55, + "learning_rate": 5.242351728818934e-06, + "loss": 1.7829, + "step": 18074500 + }, + { + "epoch": 89.55, + "learning_rate": 5.24111314239285e-06, + "loss": 1.8216, + "step": 18075000 + }, + { + "epoch": 89.55, + "learning_rate": 5.239877033139619e-06, + "loss": 1.791, + "step": 18075500 + }, + { + "epoch": 89.55, + "learning_rate": 5.238638446713535e-06, + "loss": 1.8273, + "step": 18076000 + }, + { + "epoch": 89.56, + "learning_rate": 5.237399860287452e-06, + "loss": 1.7968, + "step": 18076500 + }, + { + "epoch": 89.56, + "learning_rate": 5.2361637510342196e-06, + "loss": 1.786, + "step": 18077000 + }, + { + "epoch": 89.56, + "learning_rate": 5.2349251646081365e-06, + "loss": 1.8216, + "step": 18077500 + }, + { + "epoch": 89.56, + "learning_rate": 5.2336865781820526e-06, + "loss": 1.8046, + "step": 18078000 + }, + { + "epoch": 89.57, + "learning_rate": 5.232447991755969e-06, + "loss": 1.7938, + "step": 18078500 + }, + { + "epoch": 89.57, + "learning_rate": 5.231209405329885e-06, + "loss": 1.8553, + "step": 18079000 + }, + { + "epoch": 89.57, + "learning_rate": 5.229970818903802e-06, + "loss": 1.8306, + "step": 18079500 + }, + { + "epoch": 89.57, + "learning_rate": 5.228732232477719e-06, + "loss": 1.792, + "step": 18080000 + }, + { + "epoch": 89.58, + "learning_rate": 5.227493646051635e-06, + "loss": 1.7983, + "step": 18080500 + }, + { + "epoch": 89.58, + "learning_rate": 5.226255059625551e-06, + "loss": 1.828, + "step": 18081000 + }, + { + "epoch": 89.58, + "learning_rate": 5.225016473199467e-06, + "loss": 1.8171, + "step": 18081500 + }, + { + "epoch": 89.58, + "learning_rate": 5.223777886773384e-06, + "loss": 1.8094, + "step": 18082000 + }, + { + "epoch": 89.59, + "learning_rate": 5.2225393003473e-06, + "loss": 1.8321, + "step": 18082500 + }, + { + "epoch": 89.59, + "learning_rate": 5.221300713921217e-06, + "loss": 1.8137, + "step": 18083000 + }, + { + "epoch": 89.59, + "learning_rate": 5.220067081840837e-06, + "loss": 1.8259, + "step": 18083500 + }, + { + "epoch": 89.59, + "learning_rate": 5.2188284954147535e-06, + "loss": 1.8067, + "step": 18084000 + }, + { + "epoch": 89.6, + "learning_rate": 5.21758990898867e-06, + "loss": 1.8195, + "step": 18084500 + }, + { + "epoch": 89.6, + "learning_rate": 5.2163513225625865e-06, + "loss": 1.812, + "step": 18085000 + }, + { + "epoch": 89.6, + "learning_rate": 5.215112736136503e-06, + "loss": 1.8081, + "step": 18085500 + }, + { + "epoch": 89.6, + "learning_rate": 5.213874149710419e-06, + "loss": 1.796, + "step": 18086000 + }, + { + "epoch": 89.61, + "learning_rate": 5.212635563284335e-06, + "loss": 1.7888, + "step": 18086500 + }, + { + "epoch": 89.61, + "learning_rate": 5.211396976858252e-06, + "loss": 1.8039, + "step": 18087000 + }, + { + "epoch": 89.61, + "learning_rate": 5.210158390432168e-06, + "loss": 1.8238, + "step": 18087500 + }, + { + "epoch": 89.61, + "learning_rate": 5.208922281178937e-06, + "loss": 1.8063, + "step": 18088000 + }, + { + "epoch": 89.62, + "learning_rate": 5.2076861719257046e-06, + "loss": 1.8213, + "step": 18088500 + }, + { + "epoch": 89.62, + "learning_rate": 5.2064475854996215e-06, + "loss": 1.8184, + "step": 18089000 + }, + { + "epoch": 89.62, + "learning_rate": 5.205208999073538e-06, + "loss": 1.8159, + "step": 18089500 + }, + { + "epoch": 89.62, + "learning_rate": 5.203972889820306e-06, + "loss": 1.8118, + "step": 18090000 + }, + { + "epoch": 89.63, + "learning_rate": 5.2027343033942225e-06, + "loss": 1.8154, + "step": 18090500 + }, + { + "epoch": 89.63, + "learning_rate": 5.201495716968139e-06, + "loss": 1.8181, + "step": 18091000 + }, + { + "epoch": 89.63, + "learning_rate": 5.200257130542055e-06, + "loss": 1.8292, + "step": 18091500 + }, + { + "epoch": 89.63, + "learning_rate": 5.1990185441159716e-06, + "loss": 1.8064, + "step": 18092000 + }, + { + "epoch": 89.64, + "learning_rate": 5.197779957689888e-06, + "loss": 1.8245, + "step": 18092500 + }, + { + "epoch": 89.64, + "learning_rate": 5.1965413712638046e-06, + "loss": 1.8121, + "step": 18093000 + }, + { + "epoch": 89.64, + "learning_rate": 5.195302784837721e-06, + "loss": 1.8285, + "step": 18093500 + }, + { + "epoch": 89.64, + "learning_rate": 5.194064198411637e-06, + "loss": 1.811, + "step": 18094000 + }, + { + "epoch": 89.65, + "learning_rate": 5.192825611985554e-06, + "loss": 1.8067, + "step": 18094500 + }, + { + "epoch": 89.65, + "learning_rate": 5.19158702555947e-06, + "loss": 1.8123, + "step": 18095000 + }, + { + "epoch": 89.65, + "learning_rate": 5.190348439133387e-06, + "loss": 1.7984, + "step": 18095500 + }, + { + "epoch": 89.65, + "learning_rate": 5.189109852707302e-06, + "loss": 1.811, + "step": 18096000 + }, + { + "epoch": 89.66, + "learning_rate": 5.187871266281219e-06, + "loss": 1.811, + "step": 18096500 + }, + { + "epoch": 89.66, + "learning_rate": 5.186632679855135e-06, + "loss": 1.8042, + "step": 18097000 + }, + { + "epoch": 89.66, + "learning_rate": 5.185394093429052e-06, + "loss": 1.8255, + "step": 18097500 + }, + { + "epoch": 89.66, + "learning_rate": 5.184155507002968e-06, + "loss": 1.8302, + "step": 18098000 + }, + { + "epoch": 89.67, + "learning_rate": 5.182919397749737e-06, + "loss": 1.8246, + "step": 18098500 + }, + { + "epoch": 89.67, + "learning_rate": 5.181680811323653e-06, + "loss": 1.7989, + "step": 18099000 + }, + { + "epoch": 89.67, + "learning_rate": 5.180442224897569e-06, + "loss": 1.8102, + "step": 18099500 + }, + { + "epoch": 89.67, + "learning_rate": 5.179203638471485e-06, + "loss": 1.8126, + "step": 18100000 + }, + { + "epoch": 89.68, + "learning_rate": 5.177965052045402e-06, + "loss": 1.8446, + "step": 18100500 + }, + { + "epoch": 89.68, + "learning_rate": 5.176726465619318e-06, + "loss": 1.809, + "step": 18101000 + }, + { + "epoch": 89.68, + "learning_rate": 5.175490356366087e-06, + "loss": 1.8036, + "step": 18101500 + }, + { + "epoch": 89.68, + "learning_rate": 5.174251769940003e-06, + "loss": 1.8263, + "step": 18102000 + }, + { + "epoch": 89.69, + "learning_rate": 5.17301318351392e-06, + "loss": 1.8091, + "step": 18102500 + }, + { + "epoch": 89.69, + "learning_rate": 5.171777074260688e-06, + "loss": 1.7955, + "step": 18103000 + }, + { + "epoch": 89.69, + "learning_rate": 5.1705409650074566e-06, + "loss": 1.8212, + "step": 18103500 + }, + { + "epoch": 89.69, + "learning_rate": 5.169302378581373e-06, + "loss": 1.8061, + "step": 18104000 + }, + { + "epoch": 89.7, + "learning_rate": 5.16806379215529e-06, + "loss": 1.8048, + "step": 18104500 + }, + { + "epoch": 89.7, + "learning_rate": 5.166825205729206e-06, + "loss": 1.8171, + "step": 18105000 + }, + { + "epoch": 89.7, + "learning_rate": 5.165586619303122e-06, + "loss": 1.8195, + "step": 18105500 + }, + { + "epoch": 89.7, + "learning_rate": 5.164348032877038e-06, + "loss": 1.824, + "step": 18106000 + }, + { + "epoch": 89.71, + "learning_rate": 5.163109446450955e-06, + "loss": 1.7927, + "step": 18106500 + }, + { + "epoch": 89.71, + "learning_rate": 5.161870860024872e-06, + "loss": 1.8025, + "step": 18107000 + }, + { + "epoch": 89.71, + "learning_rate": 5.160632273598788e-06, + "loss": 1.8182, + "step": 18107500 + }, + { + "epoch": 89.71, + "learning_rate": 5.159393687172704e-06, + "loss": 1.8023, + "step": 18108000 + }, + { + "epoch": 89.72, + "learning_rate": 5.15815510074662e-06, + "loss": 1.8164, + "step": 18108500 + }, + { + "epoch": 89.72, + "learning_rate": 5.156918991493389e-06, + "loss": 1.8201, + "step": 18109000 + }, + { + "epoch": 89.72, + "learning_rate": 5.155680405067305e-06, + "loss": 1.8325, + "step": 18109500 + }, + { + "epoch": 89.72, + "learning_rate": 5.154441818641222e-06, + "loss": 1.806, + "step": 18110000 + }, + { + "epoch": 89.73, + "learning_rate": 5.1532081865608424e-06, + "loss": 1.8091, + "step": 18110500 + }, + { + "epoch": 89.73, + "learning_rate": 5.1519696001347585e-06, + "loss": 1.8245, + "step": 18111000 + }, + { + "epoch": 89.73, + "learning_rate": 5.150731013708675e-06, + "loss": 1.8018, + "step": 18111500 + }, + { + "epoch": 89.73, + "learning_rate": 5.1494924272825915e-06, + "loss": 1.8064, + "step": 18112000 + }, + { + "epoch": 89.74, + "learning_rate": 5.148253840856508e-06, + "loss": 1.7892, + "step": 18112500 + }, + { + "epoch": 89.74, + "learning_rate": 5.1470152544304245e-06, + "loss": 1.833, + "step": 18113000 + }, + { + "epoch": 89.74, + "learning_rate": 5.14577666800434e-06, + "loss": 1.8096, + "step": 18113500 + }, + { + "epoch": 89.74, + "learning_rate": 5.144538081578257e-06, + "loss": 1.8063, + "step": 18114000 + }, + { + "epoch": 89.75, + "learning_rate": 5.143299495152173e-06, + "loss": 1.8187, + "step": 18114500 + }, + { + "epoch": 89.75, + "learning_rate": 5.14206090872609e-06, + "loss": 1.8107, + "step": 18115000 + }, + { + "epoch": 89.75, + "learning_rate": 5.1408272766457095e-06, + "loss": 1.8275, + "step": 18115500 + }, + { + "epoch": 89.75, + "learning_rate": 5.1395886902196265e-06, + "loss": 1.8411, + "step": 18116000 + }, + { + "epoch": 89.76, + "learning_rate": 5.1383501037935426e-06, + "loss": 1.8393, + "step": 18116500 + }, + { + "epoch": 89.76, + "learning_rate": 5.1371115173674595e-06, + "loss": 1.8168, + "step": 18117000 + }, + { + "epoch": 89.76, + "learning_rate": 5.1358729309413756e-06, + "loss": 1.8134, + "step": 18117500 + }, + { + "epoch": 89.76, + "learning_rate": 5.134634344515292e-06, + "loss": 1.8007, + "step": 18118000 + }, + { + "epoch": 89.77, + "learning_rate": 5.133395758089208e-06, + "loss": 1.8131, + "step": 18118500 + }, + { + "epoch": 89.77, + "learning_rate": 5.132157171663125e-06, + "loss": 1.8172, + "step": 18119000 + }, + { + "epoch": 89.77, + "learning_rate": 5.130918585237041e-06, + "loss": 1.8129, + "step": 18119500 + }, + { + "epoch": 89.77, + "learning_rate": 5.129679998810958e-06, + "loss": 1.8111, + "step": 18120000 + }, + { + "epoch": 89.78, + "learning_rate": 5.128441412384874e-06, + "loss": 1.8211, + "step": 18120500 + }, + { + "epoch": 89.78, + "learning_rate": 5.12720282595879e-06, + "loss": 1.8285, + "step": 18121000 + }, + { + "epoch": 89.78, + "learning_rate": 5.125964239532707e-06, + "loss": 1.8144, + "step": 18121500 + }, + { + "epoch": 89.78, + "learning_rate": 5.124728130279475e-06, + "loss": 1.7972, + "step": 18122000 + }, + { + "epoch": 89.79, + "learning_rate": 5.123489543853392e-06, + "loss": 1.8187, + "step": 18122500 + }, + { + "epoch": 89.79, + "learning_rate": 5.122250957427308e-06, + "loss": 1.8296, + "step": 18123000 + }, + { + "epoch": 89.79, + "learning_rate": 5.121012371001224e-06, + "loss": 1.8155, + "step": 18123500 + }, + { + "epoch": 89.79, + "learning_rate": 5.11977378457514e-06, + "loss": 1.8215, + "step": 18124000 + }, + { + "epoch": 89.8, + "learning_rate": 5.118535198149057e-06, + "loss": 1.832, + "step": 18124500 + }, + { + "epoch": 89.8, + "learning_rate": 5.117299088895825e-06, + "loss": 1.8029, + "step": 18125000 + }, + { + "epoch": 89.8, + "learning_rate": 5.116060502469742e-06, + "loss": 1.8194, + "step": 18125500 + }, + { + "epoch": 89.8, + "learning_rate": 5.114821916043658e-06, + "loss": 1.8032, + "step": 18126000 + }, + { + "epoch": 89.8, + "learning_rate": 5.113583329617575e-06, + "loss": 1.8063, + "step": 18126500 + }, + { + "epoch": 89.81, + "learning_rate": 5.112344743191491e-06, + "loss": 1.813, + "step": 18127000 + }, + { + "epoch": 89.81, + "learning_rate": 5.111106156765407e-06, + "loss": 1.8114, + "step": 18127500 + }, + { + "epoch": 89.81, + "learning_rate": 5.109870047512176e-06, + "loss": 1.8339, + "step": 18128000 + }, + { + "epoch": 89.81, + "learning_rate": 5.108631461086092e-06, + "loss": 1.8098, + "step": 18128500 + }, + { + "epoch": 89.82, + "learning_rate": 5.107392874660008e-06, + "loss": 1.8151, + "step": 18129000 + }, + { + "epoch": 89.82, + "learning_rate": 5.106154288233925e-06, + "loss": 1.7935, + "step": 18129500 + }, + { + "epoch": 89.82, + "learning_rate": 5.104915701807841e-06, + "loss": 1.8254, + "step": 18130000 + }, + { + "epoch": 89.82, + "learning_rate": 5.10367959255461e-06, + "loss": 1.8302, + "step": 18130500 + }, + { + "epoch": 89.83, + "learning_rate": 5.102443483301378e-06, + "loss": 1.816, + "step": 18131000 + }, + { + "epoch": 89.83, + "learning_rate": 5.1012048968752946e-06, + "loss": 1.8133, + "step": 18131500 + }, + { + "epoch": 89.83, + "learning_rate": 5.099966310449211e-06, + "loss": 1.8307, + "step": 18132000 + }, + { + "epoch": 89.83, + "learning_rate": 5.0987277240231276e-06, + "loss": 1.8317, + "step": 18132500 + }, + { + "epoch": 89.84, + "learning_rate": 5.097489137597043e-06, + "loss": 1.809, + "step": 18133000 + }, + { + "epoch": 89.84, + "learning_rate": 5.09625055117096e-06, + "loss": 1.8089, + "step": 18133500 + }, + { + "epoch": 89.84, + "learning_rate": 5.095011964744876e-06, + "loss": 1.7961, + "step": 18134000 + }, + { + "epoch": 89.84, + "learning_rate": 5.093773378318793e-06, + "loss": 1.8161, + "step": 18134500 + }, + { + "epoch": 89.85, + "learning_rate": 5.092537269065561e-06, + "loss": 1.8423, + "step": 18135000 + }, + { + "epoch": 89.85, + "learning_rate": 5.091298682639478e-06, + "loss": 1.815, + "step": 18135500 + }, + { + "epoch": 89.85, + "learning_rate": 5.0900600962133946e-06, + "loss": 1.8, + "step": 18136000 + }, + { + "epoch": 89.85, + "learning_rate": 5.08882150978731e-06, + "loss": 1.8177, + "step": 18136500 + }, + { + "epoch": 89.86, + "learning_rate": 5.087585400534079e-06, + "loss": 1.8176, + "step": 18137000 + }, + { + "epoch": 89.86, + "learning_rate": 5.086346814107995e-06, + "loss": 1.8119, + "step": 18137500 + }, + { + "epoch": 89.86, + "learning_rate": 5.085108227681912e-06, + "loss": 1.8418, + "step": 18138000 + }, + { + "epoch": 89.86, + "learning_rate": 5.083869641255828e-06, + "loss": 1.8165, + "step": 18138500 + }, + { + "epoch": 89.87, + "learning_rate": 5.082631054829745e-06, + "loss": 1.8173, + "step": 18139000 + }, + { + "epoch": 89.87, + "learning_rate": 5.081392468403661e-06, + "loss": 1.8033, + "step": 18139500 + }, + { + "epoch": 89.87, + "learning_rate": 5.080153881977577e-06, + "loss": 1.8172, + "step": 18140000 + }, + { + "epoch": 89.87, + "learning_rate": 5.078917772724346e-06, + "loss": 1.8108, + "step": 18140500 + }, + { + "epoch": 89.88, + "learning_rate": 5.077679186298262e-06, + "loss": 1.8182, + "step": 18141000 + }, + { + "epoch": 89.88, + "learning_rate": 5.076440599872178e-06, + "loss": 1.8342, + "step": 18141500 + }, + { + "epoch": 89.88, + "learning_rate": 5.075202013446095e-06, + "loss": 1.816, + "step": 18142000 + }, + { + "epoch": 89.88, + "learning_rate": 5.073963427020011e-06, + "loss": 1.8196, + "step": 18142500 + }, + { + "epoch": 89.89, + "learning_rate": 5.072724840593928e-06, + "loss": 1.8296, + "step": 18143000 + }, + { + "epoch": 89.89, + "learning_rate": 5.071486254167843e-06, + "loss": 1.8096, + "step": 18143500 + }, + { + "epoch": 89.89, + "learning_rate": 5.07024766774176e-06, + "loss": 1.8164, + "step": 18144000 + }, + { + "epoch": 89.89, + "learning_rate": 5.069009081315676e-06, + "loss": 1.8076, + "step": 18144500 + }, + { + "epoch": 89.9, + "learning_rate": 5.067770494889593e-06, + "loss": 1.8227, + "step": 18145000 + }, + { + "epoch": 89.9, + "learning_rate": 5.066531908463509e-06, + "loss": 1.8215, + "step": 18145500 + }, + { + "epoch": 89.9, + "learning_rate": 5.065293322037425e-06, + "loss": 1.8241, + "step": 18146000 + }, + { + "epoch": 89.9, + "learning_rate": 5.064057212784194e-06, + "loss": 1.7957, + "step": 18146500 + }, + { + "epoch": 89.91, + "learning_rate": 5.06281862635811e-06, + "loss": 1.833, + "step": 18147000 + }, + { + "epoch": 89.91, + "learning_rate": 5.061582517104879e-06, + "loss": 1.8381, + "step": 18147500 + }, + { + "epoch": 89.91, + "learning_rate": 5.060343930678795e-06, + "loss": 1.8143, + "step": 18148000 + }, + { + "epoch": 89.91, + "learning_rate": 5.059105344252712e-06, + "loss": 1.8082, + "step": 18148500 + }, + { + "epoch": 89.92, + "learning_rate": 5.057866757826628e-06, + "loss": 1.8189, + "step": 18149000 + }, + { + "epoch": 89.92, + "learning_rate": 5.056628171400545e-06, + "loss": 1.8263, + "step": 18149500 + }, + { + "epoch": 89.92, + "learning_rate": 5.055392062147313e-06, + "loss": 1.811, + "step": 18150000 + }, + { + "epoch": 89.92, + "learning_rate": 5.05415347572123e-06, + "loss": 1.824, + "step": 18150500 + }, + { + "epoch": 89.93, + "learning_rate": 5.052917366467998e-06, + "loss": 1.8097, + "step": 18151000 + }, + { + "epoch": 89.93, + "learning_rate": 5.051678780041914e-06, + "loss": 1.8103, + "step": 18151500 + }, + { + "epoch": 89.93, + "learning_rate": 5.050440193615831e-06, + "loss": 1.8063, + "step": 18152000 + }, + { + "epoch": 89.93, + "learning_rate": 5.049201607189747e-06, + "loss": 1.8168, + "step": 18152500 + }, + { + "epoch": 89.94, + "learning_rate": 5.0479654979365155e-06, + "loss": 1.8129, + "step": 18153000 + }, + { + "epoch": 89.94, + "learning_rate": 5.046726911510432e-06, + "loss": 1.7993, + "step": 18153500 + }, + { + "epoch": 89.94, + "learning_rate": 5.045488325084348e-06, + "loss": 1.8129, + "step": 18154000 + }, + { + "epoch": 89.94, + "learning_rate": 5.044249738658265e-06, + "loss": 1.8026, + "step": 18154500 + }, + { + "epoch": 89.95, + "learning_rate": 5.043011152232181e-06, + "loss": 1.8317, + "step": 18155000 + }, + { + "epoch": 89.95, + "learning_rate": 5.041772565806098e-06, + "loss": 1.8234, + "step": 18155500 + }, + { + "epoch": 89.95, + "learning_rate": 5.040533979380014e-06, + "loss": 1.8079, + "step": 18156000 + }, + { + "epoch": 89.95, + "learning_rate": 5.03929539295393e-06, + "loss": 1.81, + "step": 18156500 + }, + { + "epoch": 89.96, + "learning_rate": 5.038056806527846e-06, + "loss": 1.8076, + "step": 18157000 + }, + { + "epoch": 89.96, + "learning_rate": 5.036818220101763e-06, + "loss": 1.8176, + "step": 18157500 + }, + { + "epoch": 89.96, + "learning_rate": 5.035579633675679e-06, + "loss": 1.8176, + "step": 18158000 + }, + { + "epoch": 89.96, + "learning_rate": 5.034341047249595e-06, + "loss": 1.7952, + "step": 18158500 + }, + { + "epoch": 89.97, + "learning_rate": 5.033102460823511e-06, + "loss": 1.8125, + "step": 18159000 + }, + { + "epoch": 89.97, + "learning_rate": 5.031863874397428e-06, + "loss": 1.8097, + "step": 18159500 + }, + { + "epoch": 89.97, + "learning_rate": 5.030625287971345e-06, + "loss": 1.827, + "step": 18160000 + }, + { + "epoch": 89.97, + "learning_rate": 5.029386701545261e-06, + "loss": 1.8262, + "step": 18160500 + }, + { + "epoch": 89.98, + "learning_rate": 5.02815059229203e-06, + "loss": 1.7961, + "step": 18161000 + }, + { + "epoch": 89.98, + "learning_rate": 5.026914483038798e-06, + "loss": 1.836, + "step": 18161500 + }, + { + "epoch": 89.98, + "learning_rate": 5.025675896612714e-06, + "loss": 1.8222, + "step": 18162000 + }, + { + "epoch": 89.98, + "learning_rate": 5.024439787359483e-06, + "loss": 1.8182, + "step": 18162500 + }, + { + "epoch": 89.99, + "learning_rate": 5.023201200933399e-06, + "loss": 1.7863, + "step": 18163000 + }, + { + "epoch": 89.99, + "learning_rate": 5.021962614507316e-06, + "loss": 1.8048, + "step": 18163500 + }, + { + "epoch": 89.99, + "learning_rate": 5.020724028081232e-06, + "loss": 1.8057, + "step": 18164000 + }, + { + "epoch": 89.99, + "learning_rate": 5.019485441655148e-06, + "loss": 1.809, + "step": 18164500 + }, + { + "epoch": 90.0, + "learning_rate": 5.018246855229065e-06, + "loss": 1.806, + "step": 18165000 + }, + { + "epoch": 90.0, + "learning_rate": 5.017008268802981e-06, + "loss": 1.8336, + "step": 18165500 + }, + { + "epoch": 90.0, + "eval_accuracy": 0.6849343557473726, + "eval_accuracy_mlm": 0.646069626492843, + "eval_accuracy_nsp": 0.868159194223385, + "eval_loss": 2.3014326095581055, + "eval_runtime": 146.7505, + "eval_samples_per_second": 1737.364, + "eval_steps_per_second": 72.395, + "step": 18165870 + }, + { + "epoch": 90.0, + "learning_rate": 5.015769682376898e-06, + "loss": 1.8179, + "step": 18166000 + }, + { + "epoch": 90.0, + "learning_rate": 5.014531095950813e-06, + "loss": 1.7862, + "step": 18166500 + }, + { + "epoch": 90.01, + "learning_rate": 5.01329250952473e-06, + "loss": 1.7962, + "step": 18167000 + }, + { + "epoch": 90.01, + "learning_rate": 5.012053923098646e-06, + "loss": 1.7968, + "step": 18167500 + }, + { + "epoch": 90.01, + "learning_rate": 5.010815336672563e-06, + "loss": 1.8022, + "step": 18168000 + }, + { + "epoch": 90.01, + "learning_rate": 5.009576750246479e-06, + "loss": 1.808, + "step": 18168500 + }, + { + "epoch": 90.02, + "learning_rate": 5.008338163820395e-06, + "loss": 1.7877, + "step": 18169000 + }, + { + "epoch": 90.02, + "learning_rate": 5.007102054567164e-06, + "loss": 1.824, + "step": 18169500 + }, + { + "epoch": 90.02, + "learning_rate": 5.00586346814108e-06, + "loss": 1.8149, + "step": 18170000 + }, + { + "epoch": 90.02, + "learning_rate": 5.004624881714996e-06, + "loss": 1.7987, + "step": 18170500 + }, + { + "epoch": 90.03, + "learning_rate": 5.003386295288913e-06, + "loss": 1.8144, + "step": 18171000 + }, + { + "epoch": 90.03, + "learning_rate": 5.002150186035681e-06, + "loss": 1.802, + "step": 18171500 + }, + { + "epoch": 90.03, + "learning_rate": 5.000911599609598e-06, + "loss": 1.8138, + "step": 18172000 + }, + { + "epoch": 90.03, + "learning_rate": 4.999673013183514e-06, + "loss": 1.8218, + "step": 18172500 + }, + { + "epoch": 90.04, + "learning_rate": 4.998436903930283e-06, + "loss": 1.7874, + "step": 18173000 + }, + { + "epoch": 90.04, + "learning_rate": 4.997198317504199e-06, + "loss": 1.8196, + "step": 18173500 + }, + { + "epoch": 90.04, + "learning_rate": 4.995959731078116e-06, + "loss": 1.8387, + "step": 18174000 + }, + { + "epoch": 90.04, + "learning_rate": 4.994721144652032e-06, + "loss": 1.8084, + "step": 18174500 + }, + { + "epoch": 90.05, + "learning_rate": 4.993482558225948e-06, + "loss": 1.8254, + "step": 18175000 + }, + { + "epoch": 90.05, + "learning_rate": 4.992243971799865e-06, + "loss": 1.7775, + "step": 18175500 + }, + { + "epoch": 90.05, + "learning_rate": 4.991005385373781e-06, + "loss": 1.8232, + "step": 18176000 + }, + { + "epoch": 90.05, + "learning_rate": 4.989769276120549e-06, + "loss": 1.8091, + "step": 18176500 + }, + { + "epoch": 90.06, + "learning_rate": 4.988530689694466e-06, + "loss": 1.8032, + "step": 18177000 + }, + { + "epoch": 90.06, + "learning_rate": 4.987292103268383e-06, + "loss": 1.7951, + "step": 18177500 + }, + { + "epoch": 90.06, + "learning_rate": 4.986055994015151e-06, + "loss": 1.8072, + "step": 18178000 + }, + { + "epoch": 90.06, + "learning_rate": 4.984817407589068e-06, + "loss": 1.7949, + "step": 18178500 + }, + { + "epoch": 90.07, + "learning_rate": 4.983578821162984e-06, + "loss": 1.805, + "step": 18179000 + }, + { + "epoch": 90.07, + "learning_rate": 4.9823402347369e-06, + "loss": 1.8289, + "step": 18179500 + }, + { + "epoch": 90.07, + "learning_rate": 4.981101648310816e-06, + "loss": 1.8303, + "step": 18180000 + }, + { + "epoch": 90.07, + "learning_rate": 4.979863061884733e-06, + "loss": 1.7805, + "step": 18180500 + }, + { + "epoch": 90.07, + "learning_rate": 4.978624475458649e-06, + "loss": 1.8255, + "step": 18181000 + }, + { + "epoch": 90.08, + "learning_rate": 4.977385889032565e-06, + "loss": 1.8294, + "step": 18181500 + }, + { + "epoch": 90.08, + "learning_rate": 4.976147302606481e-06, + "loss": 1.8196, + "step": 18182000 + }, + { + "epoch": 90.08, + "learning_rate": 4.974908716180398e-06, + "loss": 1.8128, + "step": 18182500 + }, + { + "epoch": 90.08, + "learning_rate": 4.973672606927166e-06, + "loss": 1.7903, + "step": 18183000 + }, + { + "epoch": 90.09, + "learning_rate": 4.972434020501083e-06, + "loss": 1.7918, + "step": 18183500 + }, + { + "epoch": 90.09, + "learning_rate": 4.971195434074999e-06, + "loss": 1.8027, + "step": 18184000 + }, + { + "epoch": 90.09, + "learning_rate": 4.969956847648916e-06, + "loss": 1.8067, + "step": 18184500 + }, + { + "epoch": 90.09, + "learning_rate": 4.968718261222832e-06, + "loss": 1.809, + "step": 18185000 + }, + { + "epoch": 90.1, + "learning_rate": 4.967482151969601e-06, + "loss": 1.7957, + "step": 18185500 + }, + { + "epoch": 90.1, + "learning_rate": 4.966243565543517e-06, + "loss": 1.7825, + "step": 18186000 + }, + { + "epoch": 90.1, + "learning_rate": 4.965004979117433e-06, + "loss": 1.7995, + "step": 18186500 + }, + { + "epoch": 90.1, + "learning_rate": 4.963766392691349e-06, + "loss": 1.8031, + "step": 18187000 + }, + { + "epoch": 90.11, + "learning_rate": 4.962527806265266e-06, + "loss": 1.8063, + "step": 18187500 + }, + { + "epoch": 90.11, + "learning_rate": 4.961289219839183e-06, + "loss": 1.7914, + "step": 18188000 + }, + { + "epoch": 90.11, + "learning_rate": 4.960050633413098e-06, + "loss": 1.7758, + "step": 18188500 + }, + { + "epoch": 90.11, + "learning_rate": 4.958812046987015e-06, + "loss": 1.8163, + "step": 18189000 + }, + { + "epoch": 90.12, + "learning_rate": 4.957575937733783e-06, + "loss": 1.8002, + "step": 18189500 + }, + { + "epoch": 90.12, + "learning_rate": 4.9563373513077e-06, + "loss": 1.7904, + "step": 18190000 + }, + { + "epoch": 90.12, + "learning_rate": 4.955101242054469e-06, + "loss": 1.8159, + "step": 18190500 + }, + { + "epoch": 90.12, + "learning_rate": 4.953862655628385e-06, + "loss": 1.8057, + "step": 18191000 + }, + { + "epoch": 90.13, + "learning_rate": 4.952624069202301e-06, + "loss": 1.8229, + "step": 18191500 + }, + { + "epoch": 90.13, + "learning_rate": 4.951385482776218e-06, + "loss": 1.8029, + "step": 18192000 + }, + { + "epoch": 90.13, + "learning_rate": 4.950146896350134e-06, + "loss": 1.8169, + "step": 18192500 + }, + { + "epoch": 90.13, + "learning_rate": 4.94890830992405e-06, + "loss": 1.8042, + "step": 18193000 + }, + { + "epoch": 90.14, + "learning_rate": 4.947669723497966e-06, + "loss": 1.8239, + "step": 18193500 + }, + { + "epoch": 90.14, + "learning_rate": 4.946431137071883e-06, + "loss": 1.8053, + "step": 18194000 + }, + { + "epoch": 90.14, + "learning_rate": 4.945195027818651e-06, + "loss": 1.8054, + "step": 18194500 + }, + { + "epoch": 90.14, + "learning_rate": 4.943956441392568e-06, + "loss": 1.8208, + "step": 18195000 + }, + { + "epoch": 90.15, + "learning_rate": 4.942717854966484e-06, + "loss": 1.8148, + "step": 18195500 + }, + { + "epoch": 90.15, + "learning_rate": 4.941479268540401e-06, + "loss": 1.7933, + "step": 18196000 + }, + { + "epoch": 90.15, + "learning_rate": 4.940240682114317e-06, + "loss": 1.8162, + "step": 18196500 + }, + { + "epoch": 90.15, + "learning_rate": 4.939004572861086e-06, + "loss": 1.8032, + "step": 18197000 + }, + { + "epoch": 90.16, + "learning_rate": 4.937765986435002e-06, + "loss": 1.8092, + "step": 18197500 + }, + { + "epoch": 90.16, + "learning_rate": 4.936529877181771e-06, + "loss": 1.8088, + "step": 18198000 + }, + { + "epoch": 90.16, + "learning_rate": 4.935291290755687e-06, + "loss": 1.8166, + "step": 18198500 + }, + { + "epoch": 90.16, + "learning_rate": 4.934052704329603e-06, + "loss": 1.8061, + "step": 18199000 + }, + { + "epoch": 90.17, + "learning_rate": 4.932814117903519e-06, + "loss": 1.7963, + "step": 18199500 + }, + { + "epoch": 90.17, + "learning_rate": 4.931575531477436e-06, + "loss": 1.8258, + "step": 18200000 + }, + { + "epoch": 90.17, + "learning_rate": 4.930336945051352e-06, + "loss": 1.8203, + "step": 18200500 + }, + { + "epoch": 90.17, + "learning_rate": 4.929098358625268e-06, + "loss": 1.8208, + "step": 18201000 + }, + { + "epoch": 90.18, + "learning_rate": 4.927859772199184e-06, + "loss": 1.8307, + "step": 18201500 + }, + { + "epoch": 90.18, + "learning_rate": 4.926621185773101e-06, + "loss": 1.7993, + "step": 18202000 + }, + { + "epoch": 90.18, + "learning_rate": 4.925382599347018e-06, + "loss": 1.7984, + "step": 18202500 + }, + { + "epoch": 90.18, + "learning_rate": 4.924144012920934e-06, + "loss": 1.816, + "step": 18203000 + }, + { + "epoch": 90.19, + "learning_rate": 4.92290542649485e-06, + "loss": 1.8031, + "step": 18203500 + }, + { + "epoch": 90.19, + "learning_rate": 4.921669317241619e-06, + "loss": 1.7992, + "step": 18204000 + }, + { + "epoch": 90.19, + "learning_rate": 4.920433207988387e-06, + "loss": 1.7924, + "step": 18204500 + }, + { + "epoch": 90.19, + "learning_rate": 4.919194621562304e-06, + "loss": 1.797, + "step": 18205000 + }, + { + "epoch": 90.2, + "learning_rate": 4.91795603513622e-06, + "loss": 1.8209, + "step": 18205500 + }, + { + "epoch": 90.2, + "learning_rate": 4.916717448710136e-06, + "loss": 1.8099, + "step": 18206000 + }, + { + "epoch": 90.2, + "learning_rate": 4.915478862284053e-06, + "loss": 1.8292, + "step": 18206500 + }, + { + "epoch": 90.2, + "learning_rate": 4.914240275857969e-06, + "loss": 1.8056, + "step": 18207000 + }, + { + "epoch": 90.21, + "learning_rate": 4.913001689431886e-06, + "loss": 1.8048, + "step": 18207500 + }, + { + "epoch": 90.21, + "learning_rate": 4.911763103005802e-06, + "loss": 1.7902, + "step": 18208000 + }, + { + "epoch": 90.21, + "learning_rate": 4.910524516579718e-06, + "loss": 1.8158, + "step": 18208500 + }, + { + "epoch": 90.21, + "learning_rate": 4.909288407326487e-06, + "loss": 1.8056, + "step": 18209000 + }, + { + "epoch": 90.22, + "learning_rate": 4.908049820900403e-06, + "loss": 1.7931, + "step": 18209500 + }, + { + "epoch": 90.22, + "learning_rate": 4.906813711647172e-06, + "loss": 1.8007, + "step": 18210000 + }, + { + "epoch": 90.22, + "learning_rate": 4.905575125221088e-06, + "loss": 1.7942, + "step": 18210500 + }, + { + "epoch": 90.22, + "learning_rate": 4.904336538795004e-06, + "loss": 1.8083, + "step": 18211000 + }, + { + "epoch": 90.23, + "learning_rate": 4.903097952368921e-06, + "loss": 1.8205, + "step": 18211500 + }, + { + "epoch": 90.23, + "learning_rate": 4.901859365942837e-06, + "loss": 1.8253, + "step": 18212000 + }, + { + "epoch": 90.23, + "learning_rate": 4.900623256689606e-06, + "loss": 1.8133, + "step": 18212500 + }, + { + "epoch": 90.23, + "learning_rate": 4.899387147436374e-06, + "loss": 1.7914, + "step": 18213000 + }, + { + "epoch": 90.24, + "learning_rate": 4.898148561010291e-06, + "loss": 1.7736, + "step": 18213500 + }, + { + "epoch": 90.24, + "learning_rate": 4.896909974584207e-06, + "loss": 1.8165, + "step": 18214000 + }, + { + "epoch": 90.24, + "learning_rate": 4.895671388158124e-06, + "loss": 1.8191, + "step": 18214500 + }, + { + "epoch": 90.24, + "learning_rate": 4.894435278904892e-06, + "loss": 1.8029, + "step": 18215000 + }, + { + "epoch": 90.25, + "learning_rate": 4.8931966924788085e-06, + "loss": 1.8228, + "step": 18215500 + }, + { + "epoch": 90.25, + "learning_rate": 4.891958106052725e-06, + "loss": 1.8139, + "step": 18216000 + }, + { + "epoch": 90.25, + "learning_rate": 4.890719519626641e-06, + "loss": 1.8257, + "step": 18216500 + }, + { + "epoch": 90.25, + "learning_rate": 4.889480933200557e-06, + "loss": 1.826, + "step": 18217000 + }, + { + "epoch": 90.26, + "learning_rate": 4.888242346774474e-06, + "loss": 1.7964, + "step": 18217500 + }, + { + "epoch": 90.26, + "learning_rate": 4.88700376034839e-06, + "loss": 1.8298, + "step": 18218000 + }, + { + "epoch": 90.26, + "learning_rate": 4.885765173922306e-06, + "loss": 1.8171, + "step": 18218500 + }, + { + "epoch": 90.26, + "learning_rate": 4.884526587496223e-06, + "loss": 1.8096, + "step": 18219000 + }, + { + "epoch": 90.27, + "learning_rate": 4.883288001070139e-06, + "loss": 1.8024, + "step": 18219500 + }, + { + "epoch": 90.27, + "learning_rate": 4.882049414644056e-06, + "loss": 1.8036, + "step": 18220000 + }, + { + "epoch": 90.27, + "learning_rate": 4.880810828217972e-06, + "loss": 1.8143, + "step": 18220500 + }, + { + "epoch": 90.27, + "learning_rate": 4.879572241791888e-06, + "loss": 1.786, + "step": 18221000 + }, + { + "epoch": 90.28, + "learning_rate": 4.878336132538657e-06, + "loss": 1.8269, + "step": 18221500 + }, + { + "epoch": 90.28, + "learning_rate": 4.877097546112573e-06, + "loss": 1.8319, + "step": 18222000 + }, + { + "epoch": 90.28, + "learning_rate": 4.875858959686489e-06, + "loss": 1.8299, + "step": 18222500 + }, + { + "epoch": 90.28, + "learning_rate": 4.874620373260406e-06, + "loss": 1.8198, + "step": 18223000 + }, + { + "epoch": 90.29, + "learning_rate": 4.873381786834322e-06, + "loss": 1.796, + "step": 18223500 + }, + { + "epoch": 90.29, + "learning_rate": 4.872143200408239e-06, + "loss": 1.8028, + "step": 18224000 + }, + { + "epoch": 90.29, + "learning_rate": 4.870904613982154e-06, + "loss": 1.7918, + "step": 18224500 + }, + { + "epoch": 90.29, + "learning_rate": 4.869668504728924e-06, + "loss": 1.7997, + "step": 18225000 + }, + { + "epoch": 90.3, + "learning_rate": 4.868429918302839e-06, + "loss": 1.8007, + "step": 18225500 + }, + { + "epoch": 90.3, + "learning_rate": 4.867191331876756e-06, + "loss": 1.8049, + "step": 18226000 + }, + { + "epoch": 90.3, + "learning_rate": 4.865952745450672e-06, + "loss": 1.8177, + "step": 18226500 + }, + { + "epoch": 90.3, + "learning_rate": 4.8647191133702935e-06, + "loss": 1.8141, + "step": 18227000 + }, + { + "epoch": 90.31, + "learning_rate": 4.8634830041170615e-06, + "loss": 1.8114, + "step": 18227500 + }, + { + "epoch": 90.31, + "learning_rate": 4.8622444176909784e-06, + "loss": 1.8099, + "step": 18228000 + }, + { + "epoch": 90.31, + "learning_rate": 4.8610058312648945e-06, + "loss": 1.8051, + "step": 18228500 + }, + { + "epoch": 90.31, + "learning_rate": 4.859769722011663e-06, + "loss": 1.8062, + "step": 18229000 + }, + { + "epoch": 90.32, + "learning_rate": 4.858531135585579e-06, + "loss": 1.8008, + "step": 18229500 + }, + { + "epoch": 90.32, + "learning_rate": 4.8572925491594955e-06, + "loss": 1.8024, + "step": 18230000 + }, + { + "epoch": 90.32, + "learning_rate": 4.8560539627334116e-06, + "loss": 1.7889, + "step": 18230500 + }, + { + "epoch": 90.32, + "learning_rate": 4.8548153763073285e-06, + "loss": 1.7929, + "step": 18231000 + }, + { + "epoch": 90.33, + "learning_rate": 4.853576789881245e-06, + "loss": 1.8005, + "step": 18231500 + }, + { + "epoch": 90.33, + "learning_rate": 4.852338203455161e-06, + "loss": 1.8031, + "step": 18232000 + }, + { + "epoch": 90.33, + "learning_rate": 4.851099617029077e-06, + "loss": 1.8129, + "step": 18232500 + }, + { + "epoch": 90.33, + "learning_rate": 4.849861030602994e-06, + "loss": 1.8066, + "step": 18233000 + }, + { + "epoch": 90.34, + "learning_rate": 4.848624921349762e-06, + "loss": 1.7935, + "step": 18233500 + }, + { + "epoch": 90.34, + "learning_rate": 4.8473863349236786e-06, + "loss": 1.8082, + "step": 18234000 + }, + { + "epoch": 90.34, + "learning_rate": 4.846147748497595e-06, + "loss": 1.8037, + "step": 18234500 + }, + { + "epoch": 90.34, + "learning_rate": 4.8449091620715116e-06, + "loss": 1.8088, + "step": 18235000 + }, + { + "epoch": 90.34, + "learning_rate": 4.843670575645428e-06, + "loss": 1.7887, + "step": 18235500 + }, + { + "epoch": 90.35, + "learning_rate": 4.842431989219344e-06, + "loss": 1.8054, + "step": 18236000 + }, + { + "epoch": 90.35, + "learning_rate": 4.841193402793261e-06, + "loss": 1.7989, + "step": 18236500 + }, + { + "epoch": 90.35, + "learning_rate": 4.839954816367177e-06, + "loss": 1.8229, + "step": 18237000 + }, + { + "epoch": 90.35, + "learning_rate": 4.838716229941094e-06, + "loss": 1.8145, + "step": 18237500 + }, + { + "epoch": 90.36, + "learning_rate": 4.837477643515009e-06, + "loss": 1.7975, + "step": 18238000 + }, + { + "epoch": 90.36, + "learning_rate": 4.836239057088926e-06, + "loss": 1.8107, + "step": 18238500 + }, + { + "epoch": 90.36, + "learning_rate": 4.835000470662842e-06, + "loss": 1.8078, + "step": 18239000 + }, + { + "epoch": 90.36, + "learning_rate": 4.833761884236759e-06, + "loss": 1.8025, + "step": 18239500 + }, + { + "epoch": 90.37, + "learning_rate": 4.832525774983527e-06, + "loss": 1.7816, + "step": 18240000 + }, + { + "epoch": 90.37, + "learning_rate": 4.831287188557444e-06, + "loss": 1.8004, + "step": 18240500 + }, + { + "epoch": 90.37, + "learning_rate": 4.83004860213136e-06, + "loss": 1.7955, + "step": 18241000 + }, + { + "epoch": 90.37, + "learning_rate": 4.828810015705276e-06, + "loss": 1.8193, + "step": 18241500 + }, + { + "epoch": 90.38, + "learning_rate": 4.827573906452045e-06, + "loss": 1.8242, + "step": 18242000 + }, + { + "epoch": 90.38, + "learning_rate": 4.826335320025961e-06, + "loss": 1.8034, + "step": 18242500 + }, + { + "epoch": 90.38, + "learning_rate": 4.825096733599877e-06, + "loss": 1.8048, + "step": 18243000 + }, + { + "epoch": 90.38, + "learning_rate": 4.823858147173794e-06, + "loss": 1.8283, + "step": 18243500 + }, + { + "epoch": 90.39, + "learning_rate": 4.822622037920562e-06, + "loss": 1.8032, + "step": 18244000 + }, + { + "epoch": 90.39, + "learning_rate": 4.821385928667331e-06, + "loss": 1.8052, + "step": 18244500 + }, + { + "epoch": 90.39, + "learning_rate": 4.820147342241247e-06, + "loss": 1.7902, + "step": 18245000 + }, + { + "epoch": 90.39, + "learning_rate": 4.818908755815164e-06, + "loss": 1.7931, + "step": 18245500 + }, + { + "epoch": 90.4, + "learning_rate": 4.81767016938908e-06, + "loss": 1.8327, + "step": 18246000 + }, + { + "epoch": 90.4, + "learning_rate": 4.816431582962997e-06, + "loss": 1.7908, + "step": 18246500 + }, + { + "epoch": 90.4, + "learning_rate": 4.815192996536913e-06, + "loss": 1.8085, + "step": 18247000 + }, + { + "epoch": 90.4, + "learning_rate": 4.813954410110829e-06, + "loss": 1.8054, + "step": 18247500 + }, + { + "epoch": 90.41, + "learning_rate": 4.812715823684745e-06, + "loss": 1.8137, + "step": 18248000 + }, + { + "epoch": 90.41, + "learning_rate": 4.811482191604366e-06, + "loss": 1.8124, + "step": 18248500 + }, + { + "epoch": 90.41, + "learning_rate": 4.8102436051782824e-06, + "loss": 1.8209, + "step": 18249000 + }, + { + "epoch": 90.41, + "learning_rate": 4.8090050187521985e-06, + "loss": 1.8188, + "step": 18249500 + }, + { + "epoch": 90.42, + "learning_rate": 4.807766432326115e-06, + "loss": 1.8033, + "step": 18250000 + }, + { + "epoch": 90.42, + "learning_rate": 4.8065278459000315e-06, + "loss": 1.817, + "step": 18250500 + }, + { + "epoch": 90.42, + "learning_rate": 4.805289259473948e-06, + "loss": 1.8004, + "step": 18251000 + }, + { + "epoch": 90.42, + "learning_rate": 4.8040506730478645e-06, + "loss": 1.8001, + "step": 18251500 + }, + { + "epoch": 90.43, + "learning_rate": 4.802812086621781e-06, + "loss": 1.8235, + "step": 18252000 + }, + { + "epoch": 90.43, + "learning_rate": 4.801573500195697e-06, + "loss": 1.8186, + "step": 18252500 + }, + { + "epoch": 90.43, + "learning_rate": 4.800334913769614e-06, + "loss": 1.8046, + "step": 18253000 + }, + { + "epoch": 90.43, + "learning_rate": 4.79909632734353e-06, + "loss": 1.8027, + "step": 18253500 + }, + { + "epoch": 90.44, + "learning_rate": 4.797857740917446e-06, + "loss": 1.8083, + "step": 18254000 + }, + { + "epoch": 90.44, + "learning_rate": 4.796619154491362e-06, + "loss": 1.766, + "step": 18254500 + }, + { + "epoch": 90.44, + "learning_rate": 4.7953830452381315e-06, + "loss": 1.8085, + "step": 18255000 + }, + { + "epoch": 90.44, + "learning_rate": 4.7941469359848995e-06, + "loss": 1.8157, + "step": 18255500 + }, + { + "epoch": 90.45, + "learning_rate": 4.7929083495588164e-06, + "loss": 1.8204, + "step": 18256000 + }, + { + "epoch": 90.45, + "learning_rate": 4.791669763132732e-06, + "loss": 1.7865, + "step": 18256500 + }, + { + "epoch": 90.45, + "learning_rate": 4.790431176706649e-06, + "loss": 1.8123, + "step": 18257000 + }, + { + "epoch": 90.45, + "learning_rate": 4.789192590280565e-06, + "loss": 1.8007, + "step": 18257500 + }, + { + "epoch": 90.46, + "learning_rate": 4.7879564810273335e-06, + "loss": 1.7966, + "step": 18258000 + }, + { + "epoch": 90.46, + "learning_rate": 4.7867178946012496e-06, + "loss": 1.7935, + "step": 18258500 + }, + { + "epoch": 90.46, + "learning_rate": 4.7854793081751665e-06, + "loss": 1.8015, + "step": 18259000 + }, + { + "epoch": 90.46, + "learning_rate": 4.7842407217490826e-06, + "loss": 1.8175, + "step": 18259500 + }, + { + "epoch": 90.47, + "learning_rate": 4.783004612495851e-06, + "loss": 1.8008, + "step": 18260000 + }, + { + "epoch": 90.47, + "learning_rate": 4.7817660260697675e-06, + "loss": 1.8087, + "step": 18260500 + }, + { + "epoch": 90.47, + "learning_rate": 4.7805274396436835e-06, + "loss": 1.8229, + "step": 18261000 + }, + { + "epoch": 90.47, + "learning_rate": 4.7792888532176e-06, + "loss": 1.797, + "step": 18261500 + }, + { + "epoch": 90.48, + "learning_rate": 4.7780502667915166e-06, + "loss": 1.7996, + "step": 18262000 + }, + { + "epoch": 90.48, + "learning_rate": 4.776811680365433e-06, + "loss": 1.8242, + "step": 18262500 + }, + { + "epoch": 90.48, + "learning_rate": 4.7755755711122014e-06, + "loss": 1.7948, + "step": 18263000 + }, + { + "epoch": 90.48, + "learning_rate": 4.7743369846861175e-06, + "loss": 1.7996, + "step": 18263500 + }, + { + "epoch": 90.49, + "learning_rate": 4.7730983982600345e-06, + "loss": 1.8265, + "step": 18264000 + }, + { + "epoch": 90.49, + "learning_rate": 4.7718598118339505e-06, + "loss": 1.8108, + "step": 18264500 + }, + { + "epoch": 90.49, + "learning_rate": 4.770623702580719e-06, + "loss": 1.7945, + "step": 18265000 + }, + { + "epoch": 90.49, + "learning_rate": 4.7693851161546354e-06, + "loss": 1.7917, + "step": 18265500 + }, + { + "epoch": 90.5, + "learning_rate": 4.7681465297285515e-06, + "loss": 1.8192, + "step": 18266000 + }, + { + "epoch": 90.5, + "learning_rate": 4.766907943302468e-06, + "loss": 1.8, + "step": 18266500 + }, + { + "epoch": 90.5, + "learning_rate": 4.7656693568763845e-06, + "loss": 1.8055, + "step": 18267000 + }, + { + "epoch": 90.5, + "learning_rate": 4.764430770450301e-06, + "loss": 1.8244, + "step": 18267500 + }, + { + "epoch": 90.51, + "learning_rate": 4.763192184024217e-06, + "loss": 1.7996, + "step": 18268000 + }, + { + "epoch": 90.51, + "learning_rate": 4.761953597598134e-06, + "loss": 1.7984, + "step": 18268500 + }, + { + "epoch": 90.51, + "learning_rate": 4.76071501117205e-06, + "loss": 1.8001, + "step": 18269000 + }, + { + "epoch": 90.51, + "learning_rate": 4.7594789019188185e-06, + "loss": 1.8157, + "step": 18269500 + }, + { + "epoch": 90.52, + "learning_rate": 4.758240315492735e-06, + "loss": 1.8143, + "step": 18270000 + }, + { + "epoch": 90.52, + "learning_rate": 4.7570017290666515e-06, + "loss": 1.7854, + "step": 18270500 + }, + { + "epoch": 90.52, + "learning_rate": 4.755763142640568e-06, + "loss": 1.8105, + "step": 18271000 + }, + { + "epoch": 90.52, + "learning_rate": 4.754524556214484e-06, + "loss": 1.8203, + "step": 18271500 + }, + { + "epoch": 90.53, + "learning_rate": 4.7532859697884e-06, + "loss": 1.8097, + "step": 18272000 + }, + { + "epoch": 90.53, + "learning_rate": 4.752047383362317e-06, + "loss": 1.8117, + "step": 18272500 + }, + { + "epoch": 90.53, + "learning_rate": 4.750811274109085e-06, + "loss": 1.815, + "step": 18273000 + }, + { + "epoch": 90.53, + "learning_rate": 4.7495726876830016e-06, + "loss": 1.8196, + "step": 18273500 + }, + { + "epoch": 90.54, + "learning_rate": 4.7483365784297695e-06, + "loss": 1.8141, + "step": 18274000 + }, + { + "epoch": 90.54, + "learning_rate": 4.7470979920036865e-06, + "loss": 1.8196, + "step": 18274500 + }, + { + "epoch": 90.54, + "learning_rate": 4.7458594055776025e-06, + "loss": 1.7863, + "step": 18275000 + }, + { + "epoch": 90.54, + "learning_rate": 4.7446208191515195e-06, + "loss": 1.8266, + "step": 18275500 + }, + { + "epoch": 90.55, + "learning_rate": 4.7433822327254355e-06, + "loss": 1.8023, + "step": 18276000 + }, + { + "epoch": 90.55, + "learning_rate": 4.742143646299352e-06, + "loss": 1.7958, + "step": 18276500 + }, + { + "epoch": 90.55, + "learning_rate": 4.740905059873268e-06, + "loss": 1.8107, + "step": 18277000 + }, + { + "epoch": 90.55, + "learning_rate": 4.739666473447185e-06, + "loss": 1.7953, + "step": 18277500 + }, + { + "epoch": 90.56, + "learning_rate": 4.738427887021101e-06, + "loss": 1.8274, + "step": 18278000 + }, + { + "epoch": 90.56, + "learning_rate": 4.737189300595017e-06, + "loss": 1.8099, + "step": 18278500 + }, + { + "epoch": 90.56, + "learning_rate": 4.735950714168934e-06, + "loss": 1.8226, + "step": 18279000 + }, + { + "epoch": 90.56, + "learning_rate": 4.73471212774285e-06, + "loss": 1.8105, + "step": 18279500 + }, + { + "epoch": 90.57, + "learning_rate": 4.733476018489618e-06, + "loss": 1.8173, + "step": 18280000 + }, + { + "epoch": 90.57, + "learning_rate": 4.732237432063535e-06, + "loss": 1.8091, + "step": 18280500 + }, + { + "epoch": 90.57, + "learning_rate": 4.730998845637452e-06, + "loss": 1.801, + "step": 18281000 + }, + { + "epoch": 90.57, + "learning_rate": 4.729760259211368e-06, + "loss": 1.8096, + "step": 18281500 + }, + { + "epoch": 90.58, + "learning_rate": 4.728521672785284e-06, + "loss": 1.7963, + "step": 18282000 + }, + { + "epoch": 90.58, + "learning_rate": 4.7272830863592e-06, + "loss": 1.8243, + "step": 18282500 + }, + { + "epoch": 90.58, + "learning_rate": 4.726046977105969e-06, + "loss": 1.8017, + "step": 18283000 + }, + { + "epoch": 90.58, + "learning_rate": 4.724808390679885e-06, + "loss": 1.8015, + "step": 18283500 + }, + { + "epoch": 90.59, + "learning_rate": 4.723569804253802e-06, + "loss": 1.8178, + "step": 18284000 + }, + { + "epoch": 90.59, + "learning_rate": 4.722331217827718e-06, + "loss": 1.7948, + "step": 18284500 + }, + { + "epoch": 90.59, + "learning_rate": 4.721092631401635e-06, + "loss": 1.8089, + "step": 18285000 + }, + { + "epoch": 90.59, + "learning_rate": 4.719856522148403e-06, + "loss": 1.8206, + "step": 18285500 + }, + { + "epoch": 90.6, + "learning_rate": 4.7186204128951715e-06, + "loss": 1.7969, + "step": 18286000 + }, + { + "epoch": 90.6, + "learning_rate": 4.7173818264690876e-06, + "loss": 1.8092, + "step": 18286500 + }, + { + "epoch": 90.6, + "learning_rate": 4.7161432400430045e-06, + "loss": 1.8077, + "step": 18287000 + }, + { + "epoch": 90.6, + "learning_rate": 4.7149071307897724e-06, + "loss": 1.7946, + "step": 18287500 + }, + { + "epoch": 90.61, + "learning_rate": 4.713668544363689e-06, + "loss": 1.8092, + "step": 18288000 + }, + { + "epoch": 90.61, + "learning_rate": 4.7124299579376055e-06, + "loss": 1.8017, + "step": 18288500 + }, + { + "epoch": 90.61, + "learning_rate": 4.7111913715115215e-06, + "loss": 1.8014, + "step": 18289000 + }, + { + "epoch": 90.61, + "learning_rate": 4.709952785085438e-06, + "loss": 1.7659, + "step": 18289500 + }, + { + "epoch": 90.61, + "learning_rate": 4.7087141986593545e-06, + "loss": 1.8046, + "step": 18290000 + }, + { + "epoch": 90.62, + "learning_rate": 4.707475612233271e-06, + "loss": 1.8134, + "step": 18290500 + }, + { + "epoch": 90.62, + "learning_rate": 4.706237025807187e-06, + "loss": 1.8018, + "step": 18291000 + }, + { + "epoch": 90.62, + "learning_rate": 4.704998439381103e-06, + "loss": 1.8017, + "step": 18291500 + }, + { + "epoch": 90.62, + "learning_rate": 4.703762330127872e-06, + "loss": 1.8167, + "step": 18292000 + }, + { + "epoch": 90.63, + "learning_rate": 4.702523743701788e-06, + "loss": 1.8119, + "step": 18292500 + }, + { + "epoch": 90.63, + "learning_rate": 4.701285157275705e-06, + "loss": 1.7988, + "step": 18293000 + }, + { + "epoch": 90.63, + "learning_rate": 4.700046570849621e-06, + "loss": 1.7994, + "step": 18293500 + }, + { + "epoch": 90.63, + "learning_rate": 4.698807984423538e-06, + "loss": 1.829, + "step": 18294000 + }, + { + "epoch": 90.64, + "learning_rate": 4.697569397997454e-06, + "loss": 1.8139, + "step": 18294500 + }, + { + "epoch": 90.64, + "learning_rate": 4.6963332887442225e-06, + "loss": 1.8189, + "step": 18295000 + }, + { + "epoch": 90.64, + "learning_rate": 4.695094702318139e-06, + "loss": 1.7967, + "step": 18295500 + }, + { + "epoch": 90.64, + "learning_rate": 4.693858593064907e-06, + "loss": 1.8039, + "step": 18296000 + }, + { + "epoch": 90.65, + "learning_rate": 4.6926200066388235e-06, + "loss": 1.814, + "step": 18296500 + }, + { + "epoch": 90.65, + "learning_rate": 4.6913814202127396e-06, + "loss": 1.786, + "step": 18297000 + }, + { + "epoch": 90.65, + "learning_rate": 4.6901428337866565e-06, + "loss": 1.7753, + "step": 18297500 + }, + { + "epoch": 90.65, + "learning_rate": 4.6889042473605726e-06, + "loss": 1.7923, + "step": 18298000 + }, + { + "epoch": 90.66, + "learning_rate": 4.6876656609344895e-06, + "loss": 1.8209, + "step": 18298500 + }, + { + "epoch": 90.66, + "learning_rate": 4.6864295516812575e-06, + "loss": 1.817, + "step": 18299000 + }, + { + "epoch": 90.66, + "learning_rate": 4.685190965255174e-06, + "loss": 1.8288, + "step": 18299500 + }, + { + "epoch": 90.66, + "learning_rate": 4.6839523788290905e-06, + "loss": 1.8173, + "step": 18300000 + }, + { + "epoch": 90.67, + "learning_rate": 4.6827137924030065e-06, + "loss": 1.7865, + "step": 18300500 + }, + { + "epoch": 90.67, + "learning_rate": 4.681475205976923e-06, + "loss": 1.8151, + "step": 18301000 + }, + { + "epoch": 90.67, + "learning_rate": 4.6802366195508396e-06, + "loss": 1.837, + "step": 18301500 + }, + { + "epoch": 90.67, + "learning_rate": 4.678998033124756e-06, + "loss": 1.8028, + "step": 18302000 + }, + { + "epoch": 90.68, + "learning_rate": 4.677759446698672e-06, + "loss": 1.8193, + "step": 18302500 + }, + { + "epoch": 90.68, + "learning_rate": 4.676520860272588e-06, + "loss": 1.7748, + "step": 18303000 + }, + { + "epoch": 90.68, + "learning_rate": 4.675282273846505e-06, + "loss": 1.8019, + "step": 18303500 + }, + { + "epoch": 90.68, + "learning_rate": 4.674043687420421e-06, + "loss": 1.7899, + "step": 18304000 + }, + { + "epoch": 90.69, + "learning_rate": 4.672805100994338e-06, + "loss": 1.8277, + "step": 18304500 + }, + { + "epoch": 90.69, + "learning_rate": 4.671566514568254e-06, + "loss": 1.8004, + "step": 18305000 + }, + { + "epoch": 90.69, + "learning_rate": 4.67032792814217e-06, + "loss": 1.8164, + "step": 18305500 + }, + { + "epoch": 90.69, + "learning_rate": 4.669089341716087e-06, + "loss": 1.788, + "step": 18306000 + }, + { + "epoch": 90.7, + "learning_rate": 4.667853232462855e-06, + "loss": 1.7758, + "step": 18306500 + }, + { + "epoch": 90.7, + "learning_rate": 4.666614646036772e-06, + "loss": 1.8045, + "step": 18307000 + }, + { + "epoch": 90.7, + "learning_rate": 4.665376059610688e-06, + "loss": 1.7876, + "step": 18307500 + }, + { + "epoch": 90.7, + "learning_rate": 4.664137473184605e-06, + "loss": 1.8139, + "step": 18308000 + }, + { + "epoch": 90.71, + "learning_rate": 4.662901363931373e-06, + "loss": 1.7885, + "step": 18308500 + }, + { + "epoch": 90.71, + "learning_rate": 4.66166277750529e-06, + "loss": 1.8049, + "step": 18309000 + }, + { + "epoch": 90.71, + "learning_rate": 4.660424191079205e-06, + "loss": 1.7931, + "step": 18309500 + }, + { + "epoch": 90.71, + "learning_rate": 4.659185604653122e-06, + "loss": 1.8313, + "step": 18310000 + }, + { + "epoch": 90.72, + "learning_rate": 4.657949495399891e-06, + "loss": 1.8114, + "step": 18310500 + }, + { + "epoch": 90.72, + "learning_rate": 4.656710908973807e-06, + "loss": 1.8188, + "step": 18311000 + }, + { + "epoch": 90.72, + "learning_rate": 4.655472322547723e-06, + "loss": 1.8077, + "step": 18311500 + }, + { + "epoch": 90.72, + "learning_rate": 4.65423373612164e-06, + "loss": 1.783, + "step": 18312000 + }, + { + "epoch": 90.73, + "learning_rate": 4.652995149695556e-06, + "loss": 1.8074, + "step": 18312500 + }, + { + "epoch": 90.73, + "learning_rate": 4.6517590404423246e-06, + "loss": 1.8064, + "step": 18313000 + }, + { + "epoch": 90.73, + "learning_rate": 4.650520454016241e-06, + "loss": 1.8138, + "step": 18313500 + }, + { + "epoch": 90.73, + "learning_rate": 4.649281867590157e-06, + "loss": 1.8092, + "step": 18314000 + }, + { + "epoch": 90.74, + "learning_rate": 4.648043281164073e-06, + "loss": 1.784, + "step": 18314500 + }, + { + "epoch": 90.74, + "learning_rate": 4.6468071719108425e-06, + "loss": 1.8023, + "step": 18315000 + }, + { + "epoch": 90.74, + "learning_rate": 4.645568585484758e-06, + "loss": 1.8535, + "step": 18315500 + }, + { + "epoch": 90.74, + "learning_rate": 4.644329999058675e-06, + "loss": 1.8075, + "step": 18316000 + }, + { + "epoch": 90.75, + "learning_rate": 4.643091412632591e-06, + "loss": 1.825, + "step": 18316500 + }, + { + "epoch": 90.75, + "learning_rate": 4.641852826206508e-06, + "loss": 1.8092, + "step": 18317000 + }, + { + "epoch": 90.75, + "learning_rate": 4.640614239780424e-06, + "loss": 1.8088, + "step": 18317500 + }, + { + "epoch": 90.75, + "learning_rate": 4.63937565335434e-06, + "loss": 1.813, + "step": 18318000 + }, + { + "epoch": 90.76, + "learning_rate": 4.638137066928256e-06, + "loss": 1.8129, + "step": 18318500 + }, + { + "epoch": 90.76, + "learning_rate": 4.636898480502173e-06, + "loss": 1.8395, + "step": 18319000 + }, + { + "epoch": 90.76, + "learning_rate": 4.63565989407609e-06, + "loss": 1.8269, + "step": 18319500 + }, + { + "epoch": 90.76, + "learning_rate": 4.634421307650005e-06, + "loss": 1.7891, + "step": 18320000 + }, + { + "epoch": 90.77, + "learning_rate": 4.633182721223922e-06, + "loss": 1.8157, + "step": 18320500 + }, + { + "epoch": 90.77, + "learning_rate": 4.631946611970691e-06, + "loss": 1.7996, + "step": 18321000 + }, + { + "epoch": 90.77, + "learning_rate": 4.630708025544607e-06, + "loss": 1.7884, + "step": 18321500 + }, + { + "epoch": 90.77, + "learning_rate": 4.629469439118523e-06, + "loss": 1.8123, + "step": 18322000 + }, + { + "epoch": 90.78, + "learning_rate": 4.62823085269244e-06, + "loss": 1.7983, + "step": 18322500 + }, + { + "epoch": 90.78, + "learning_rate": 4.626994743439208e-06, + "loss": 1.8105, + "step": 18323000 + }, + { + "epoch": 90.78, + "learning_rate": 4.625756157013125e-06, + "loss": 1.8023, + "step": 18323500 + }, + { + "epoch": 90.78, + "learning_rate": 4.624517570587041e-06, + "loss": 1.8075, + "step": 18324000 + }, + { + "epoch": 90.79, + "learning_rate": 4.623278984160957e-06, + "loss": 1.8105, + "step": 18324500 + }, + { + "epoch": 90.79, + "learning_rate": 4.622040397734873e-06, + "loss": 1.7873, + "step": 18325000 + }, + { + "epoch": 90.79, + "learning_rate": 4.62080181130879e-06, + "loss": 1.8052, + "step": 18325500 + }, + { + "epoch": 90.79, + "learning_rate": 4.619563224882706e-06, + "loss": 1.8277, + "step": 18326000 + }, + { + "epoch": 90.8, + "learning_rate": 4.618324638456623e-06, + "loss": 1.8258, + "step": 18326500 + }, + { + "epoch": 90.8, + "learning_rate": 4.617088529203391e-06, + "loss": 1.8095, + "step": 18327000 + }, + { + "epoch": 90.8, + "learning_rate": 4.615849942777308e-06, + "loss": 1.8037, + "step": 18327500 + }, + { + "epoch": 90.8, + "learning_rate": 4.614611356351224e-06, + "loss": 1.8034, + "step": 18328000 + }, + { + "epoch": 90.81, + "learning_rate": 4.61337276992514e-06, + "loss": 1.8142, + "step": 18328500 + }, + { + "epoch": 90.81, + "learning_rate": 4.612134183499056e-06, + "loss": 1.8196, + "step": 18329000 + }, + { + "epoch": 90.81, + "learning_rate": 4.610895597072973e-06, + "loss": 1.8038, + "step": 18329500 + }, + { + "epoch": 90.81, + "learning_rate": 4.609661964992594e-06, + "loss": 1.842, + "step": 18330000 + }, + { + "epoch": 90.82, + "learning_rate": 4.60842337856651e-06, + "loss": 1.8073, + "step": 18330500 + }, + { + "epoch": 90.82, + "learning_rate": 4.607184792140426e-06, + "loss": 1.8007, + "step": 18331000 + }, + { + "epoch": 90.82, + "learning_rate": 4.605946205714343e-06, + "loss": 1.8159, + "step": 18331500 + }, + { + "epoch": 90.82, + "learning_rate": 4.604707619288259e-06, + "loss": 1.8174, + "step": 18332000 + }, + { + "epoch": 90.83, + "learning_rate": 4.603471510035028e-06, + "loss": 1.7862, + "step": 18332500 + }, + { + "epoch": 90.83, + "learning_rate": 4.602232923608944e-06, + "loss": 1.8226, + "step": 18333000 + }, + { + "epoch": 90.83, + "learning_rate": 4.600994337182861e-06, + "loss": 1.8028, + "step": 18333500 + }, + { + "epoch": 90.83, + "learning_rate": 4.599755750756776e-06, + "loss": 1.8088, + "step": 18334000 + }, + { + "epoch": 90.84, + "learning_rate": 4.598517164330693e-06, + "loss": 1.8195, + "step": 18334500 + }, + { + "epoch": 90.84, + "learning_rate": 4.59727857790461e-06, + "loss": 1.8069, + "step": 18335000 + }, + { + "epoch": 90.84, + "learning_rate": 4.596039991478526e-06, + "loss": 1.7964, + "step": 18335500 + }, + { + "epoch": 90.84, + "learning_rate": 4.594801405052442e-06, + "loss": 1.7984, + "step": 18336000 + }, + { + "epoch": 90.85, + "learning_rate": 4.593562818626358e-06, + "loss": 1.8194, + "step": 18336500 + }, + { + "epoch": 90.85, + "learning_rate": 4.592324232200275e-06, + "loss": 1.7832, + "step": 18337000 + }, + { + "epoch": 90.85, + "learning_rate": 4.5910906001198956e-06, + "loss": 1.8468, + "step": 18337500 + }, + { + "epoch": 90.85, + "learning_rate": 4.589852013693812e-06, + "loss": 1.8182, + "step": 18338000 + }, + { + "epoch": 90.86, + "learning_rate": 4.588613427267728e-06, + "loss": 1.7848, + "step": 18338500 + }, + { + "epoch": 90.86, + "learning_rate": 4.587374840841645e-06, + "loss": 1.8179, + "step": 18339000 + }, + { + "epoch": 90.86, + "learning_rate": 4.586136254415561e-06, + "loss": 1.7919, + "step": 18339500 + }, + { + "epoch": 90.86, + "learning_rate": 4.5849001451623296e-06, + "loss": 1.7867, + "step": 18340000 + }, + { + "epoch": 90.87, + "learning_rate": 4.583661558736246e-06, + "loss": 1.822, + "step": 18340500 + }, + { + "epoch": 90.87, + "learning_rate": 4.5824229723101626e-06, + "loss": 1.7947, + "step": 18341000 + }, + { + "epoch": 90.87, + "learning_rate": 4.581184385884079e-06, + "loss": 1.7998, + "step": 18341500 + }, + { + "epoch": 90.87, + "learning_rate": 4.5799482766308475e-06, + "loss": 1.8149, + "step": 18342000 + }, + { + "epoch": 90.88, + "learning_rate": 4.5787096902047635e-06, + "loss": 1.8176, + "step": 18342500 + }, + { + "epoch": 90.88, + "learning_rate": 4.57747110377868e-06, + "loss": 1.8263, + "step": 18343000 + }, + { + "epoch": 90.88, + "learning_rate": 4.576232517352596e-06, + "loss": 1.8009, + "step": 18343500 + }, + { + "epoch": 90.88, + "learning_rate": 4.574993930926513e-06, + "loss": 1.8154, + "step": 18344000 + }, + { + "epoch": 90.88, + "learning_rate": 4.573755344500429e-06, + "loss": 1.8063, + "step": 18344500 + }, + { + "epoch": 90.89, + "learning_rate": 4.572516758074346e-06, + "loss": 1.8041, + "step": 18345000 + }, + { + "epoch": 90.89, + "learning_rate": 4.571278171648261e-06, + "loss": 1.824, + "step": 18345500 + }, + { + "epoch": 90.89, + "learning_rate": 4.570039585222178e-06, + "loss": 1.8405, + "step": 18346000 + }, + { + "epoch": 90.89, + "learning_rate": 4.568800998796094e-06, + "loss": 1.7983, + "step": 18346500 + }, + { + "epoch": 90.9, + "learning_rate": 4.567564889542863e-06, + "loss": 1.7993, + "step": 18347000 + }, + { + "epoch": 90.9, + "learning_rate": 4.566326303116779e-06, + "loss": 1.7946, + "step": 18347500 + }, + { + "epoch": 90.9, + "learning_rate": 4.565087716690696e-06, + "loss": 1.8412, + "step": 18348000 + }, + { + "epoch": 90.9, + "learning_rate": 4.563849130264612e-06, + "loss": 1.8159, + "step": 18348500 + }, + { + "epoch": 90.91, + "learning_rate": 4.5626154981842325e-06, + "loss": 1.8094, + "step": 18349000 + }, + { + "epoch": 90.91, + "learning_rate": 4.561379388931001e-06, + "loss": 1.8011, + "step": 18349500 + }, + { + "epoch": 90.91, + "learning_rate": 4.560140802504917e-06, + "loss": 1.7923, + "step": 18350000 + }, + { + "epoch": 90.91, + "learning_rate": 4.5589022160788334e-06, + "loss": 1.7969, + "step": 18350500 + }, + { + "epoch": 90.92, + "learning_rate": 4.55766362965275e-06, + "loss": 1.8211, + "step": 18351000 + }, + { + "epoch": 90.92, + "learning_rate": 4.5564250432266664e-06, + "loss": 1.8165, + "step": 18351500 + }, + { + "epoch": 90.92, + "learning_rate": 4.5551864568005825e-06, + "loss": 1.8117, + "step": 18352000 + }, + { + "epoch": 90.92, + "learning_rate": 4.553947870374499e-06, + "loss": 1.8149, + "step": 18352500 + }, + { + "epoch": 90.93, + "learning_rate": 4.5527092839484155e-06, + "loss": 1.8189, + "step": 18353000 + }, + { + "epoch": 90.93, + "learning_rate": 4.551470697522332e-06, + "loss": 1.7963, + "step": 18353500 + }, + { + "epoch": 90.93, + "learning_rate": 4.5502345882691004e-06, + "loss": 1.8233, + "step": 18354000 + }, + { + "epoch": 90.93, + "learning_rate": 4.548998479015868e-06, + "loss": 1.8284, + "step": 18354500 + }, + { + "epoch": 90.94, + "learning_rate": 4.547759892589785e-06, + "loss": 1.8077, + "step": 18355000 + }, + { + "epoch": 90.94, + "learning_rate": 4.546521306163701e-06, + "loss": 1.8061, + "step": 18355500 + }, + { + "epoch": 90.94, + "learning_rate": 4.545282719737618e-06, + "loss": 1.7957, + "step": 18356000 + }, + { + "epoch": 90.94, + "learning_rate": 4.544044133311534e-06, + "loss": 1.827, + "step": 18356500 + }, + { + "epoch": 90.95, + "learning_rate": 4.542808024058303e-06, + "loss": 1.8064, + "step": 18357000 + }, + { + "epoch": 90.95, + "learning_rate": 4.541569437632219e-06, + "loss": 1.8092, + "step": 18357500 + }, + { + "epoch": 90.95, + "learning_rate": 4.540330851206135e-06, + "loss": 1.8177, + "step": 18358000 + }, + { + "epoch": 90.95, + "learning_rate": 4.5390922647800515e-06, + "loss": 1.7928, + "step": 18358500 + }, + { + "epoch": 90.96, + "learning_rate": 4.537853678353968e-06, + "loss": 1.7991, + "step": 18359000 + }, + { + "epoch": 90.96, + "learning_rate": 4.536615091927885e-06, + "loss": 1.8116, + "step": 18359500 + }, + { + "epoch": 90.96, + "learning_rate": 4.535376505501801e-06, + "loss": 1.8123, + "step": 18360000 + }, + { + "epoch": 90.96, + "learning_rate": 4.5341379190757175e-06, + "loss": 1.8009, + "step": 18360500 + }, + { + "epoch": 90.97, + "learning_rate": 4.532901809822486e-06, + "loss": 1.8307, + "step": 18361000 + }, + { + "epoch": 90.97, + "learning_rate": 4.531663223396402e-06, + "loss": 1.7919, + "step": 18361500 + }, + { + "epoch": 90.97, + "learning_rate": 4.5304246369703185e-06, + "loss": 1.8063, + "step": 18362000 + }, + { + "epoch": 90.97, + "learning_rate": 4.529186050544235e-06, + "loss": 1.8141, + "step": 18362500 + }, + { + "epoch": 90.98, + "learning_rate": 4.527949941291003e-06, + "loss": 1.8052, + "step": 18363000 + }, + { + "epoch": 90.98, + "learning_rate": 4.52671135486492e-06, + "loss": 1.8132, + "step": 18363500 + }, + { + "epoch": 90.98, + "learning_rate": 4.525472768438836e-06, + "loss": 1.8025, + "step": 18364000 + }, + { + "epoch": 90.98, + "learning_rate": 4.524234182012753e-06, + "loss": 1.8281, + "step": 18364500 + }, + { + "epoch": 90.99, + "learning_rate": 4.5229955955866685e-06, + "loss": 1.8091, + "step": 18365000 + }, + { + "epoch": 90.99, + "learning_rate": 4.5217570091605854e-06, + "loss": 1.8154, + "step": 18365500 + }, + { + "epoch": 90.99, + "learning_rate": 4.520520899907353e-06, + "loss": 1.8202, + "step": 18366000 + }, + { + "epoch": 90.99, + "learning_rate": 4.51928231348127e-06, + "loss": 1.8234, + "step": 18366500 + }, + { + "epoch": 91.0, + "learning_rate": 4.518043727055186e-06, + "loss": 1.8396, + "step": 18367000 + }, + { + "epoch": 91.0, + "learning_rate": 4.516805140629103e-06, + "loss": 1.8214, + "step": 18367500 + }, + { + "epoch": 91.0, + "eval_accuracy": 0.6842650430172879, + "eval_accuracy_mlm": 0.6455284093223714, + "eval_accuracy_nsp": 0.8670139120407595, + "eval_loss": 2.3278305530548096, + "eval_runtime": 146.7271, + "eval_samples_per_second": 1737.64, + "eval_steps_per_second": 72.407, + "step": 18367713 } ], "max_steps": 20184300, "num_train_epochs": 100, - "total_flos": 2.0905538460921295e+19, + "total_flos": 2.3780546655767814e+19, "trial_name": null, "trial_params": null }