{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 1260555, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9992066986367118e-05, "loss": 3.3636, "step": 500 }, { "epoch": 0.0, "learning_rate": 1.9984133972734233e-05, "loss": 2.8247, "step": 1000 }, { "epoch": 0.0, "learning_rate": 1.997620095910135e-05, "loss": 2.6434, "step": 1500 }, { "epoch": 0.0, "learning_rate": 1.996826794546847e-05, "loss": 2.5482, "step": 2000 }, { "epoch": 0.01, "learning_rate": 1.996033493183558e-05, "loss": 2.4555, "step": 2500 }, { "epoch": 0.01, "learning_rate": 1.9952401918202697e-05, "loss": 2.4086, "step": 3000 }, { "epoch": 0.01, "learning_rate": 1.9944468904569813e-05, "loss": 2.348, "step": 3500 }, { "epoch": 0.01, "learning_rate": 1.993653589093693e-05, "loss": 2.3079, "step": 4000 }, { "epoch": 0.01, "learning_rate": 1.9928602877304048e-05, "loss": 2.263, "step": 4500 }, { "epoch": 0.01, "learning_rate": 1.9920669863671164e-05, "loss": 2.2407, "step": 5000 }, { "epoch": 0.01, "learning_rate": 1.991273685003828e-05, "loss": 2.2031, "step": 5500 }, { "epoch": 0.01, "learning_rate": 1.9904803836405395e-05, "loss": 2.1797, "step": 6000 }, { "epoch": 0.02, "learning_rate": 1.989687082277251e-05, "loss": 2.1896, "step": 6500 }, { "epoch": 0.02, "learning_rate": 1.9888937809139627e-05, "loss": 2.14, "step": 7000 }, { "epoch": 0.02, "learning_rate": 1.9881004795506743e-05, "loss": 2.1279, "step": 7500 }, { "epoch": 0.02, "learning_rate": 1.987307178187386e-05, "loss": 2.0861, "step": 8000 }, { "epoch": 0.02, "learning_rate": 1.9865138768240975e-05, "loss": 2.0768, "step": 8500 }, { "epoch": 0.02, "learning_rate": 1.985720575460809e-05, "loss": 2.0827, "step": 9000 }, { "epoch": 0.02, "learning_rate": 1.9849272740975206e-05, "loss": 2.0539, "step": 9500 }, { "epoch": 0.02, "learning_rate": 1.9841339727342322e-05, "loss": 2.03, "step": 10000 }, { "epoch": 0.02, "learning_rate": 1.983340671370944e-05, "loss": 2.0105, "step": 10500 }, { "epoch": 0.03, "learning_rate": 1.9825473700076557e-05, "loss": 1.9989, "step": 11000 }, { "epoch": 0.03, "learning_rate": 1.981754068644367e-05, "loss": 1.9886, "step": 11500 }, { "epoch": 0.03, "learning_rate": 1.9809607672810786e-05, "loss": 1.9745, "step": 12000 }, { "epoch": 0.03, "learning_rate": 1.98016746591779e-05, "loss": 1.9839, "step": 12500 }, { "epoch": 0.03, "learning_rate": 1.979374164554502e-05, "loss": 1.9611, "step": 13000 }, { "epoch": 0.03, "learning_rate": 1.9785808631912137e-05, "loss": 1.9685, "step": 13500 }, { "epoch": 0.03, "learning_rate": 1.9777875618279253e-05, "loss": 1.9465, "step": 14000 }, { "epoch": 0.03, "learning_rate": 1.976994260464637e-05, "loss": 1.9532, "step": 14500 }, { "epoch": 0.04, "learning_rate": 1.9762009591013484e-05, "loss": 1.9243, "step": 15000 }, { "epoch": 0.04, "learning_rate": 1.97540765773806e-05, "loss": 1.9132, "step": 15500 }, { "epoch": 0.04, "learning_rate": 1.9746143563747716e-05, "loss": 1.907, "step": 16000 }, { "epoch": 0.04, "learning_rate": 1.9738210550114832e-05, "loss": 1.9201, "step": 16500 }, { "epoch": 0.04, "learning_rate": 1.9730277536481948e-05, "loss": 1.9033, "step": 17000 }, { "epoch": 0.04, "learning_rate": 1.9722344522849064e-05, "loss": 1.9114, "step": 17500 }, { "epoch": 0.04, "learning_rate": 1.971441150921618e-05, "loss": 1.8667, "step": 18000 }, { "epoch": 0.04, "learning_rate": 1.9706478495583295e-05, "loss": 1.8737, "step": 18500 }, { "epoch": 0.05, "learning_rate": 1.969854548195041e-05, "loss": 1.8696, "step": 19000 }, { "epoch": 0.05, "learning_rate": 1.969061246831753e-05, "loss": 1.871, "step": 19500 }, { "epoch": 0.05, "learning_rate": 1.9682679454684646e-05, "loss": 1.8694, "step": 20000 }, { "epoch": 0.05, "learning_rate": 1.967474644105176e-05, "loss": 1.8408, "step": 20500 }, { "epoch": 0.05, "learning_rate": 1.9666813427418875e-05, "loss": 1.8475, "step": 21000 }, { "epoch": 0.05, "learning_rate": 1.9658880413785994e-05, "loss": 1.8506, "step": 21500 }, { "epoch": 0.05, "learning_rate": 1.965094740015311e-05, "loss": 1.8373, "step": 22000 }, { "epoch": 0.05, "learning_rate": 1.9643014386520226e-05, "loss": 1.8109, "step": 22500 }, { "epoch": 0.05, "learning_rate": 1.963508137288734e-05, "loss": 1.819, "step": 23000 }, { "epoch": 0.06, "learning_rate": 1.9627148359254457e-05, "loss": 1.8175, "step": 23500 }, { "epoch": 0.06, "learning_rate": 1.9619215345621573e-05, "loss": 1.806, "step": 24000 }, { "epoch": 0.06, "learning_rate": 1.961128233198869e-05, "loss": 1.824, "step": 24500 }, { "epoch": 0.06, "learning_rate": 1.9603349318355805e-05, "loss": 1.8055, "step": 25000 }, { "epoch": 0.06, "learning_rate": 1.959541630472292e-05, "loss": 1.7914, "step": 25500 }, { "epoch": 0.06, "learning_rate": 1.9587483291090037e-05, "loss": 1.7943, "step": 26000 }, { "epoch": 0.06, "learning_rate": 1.9579550277457153e-05, "loss": 1.7821, "step": 26500 }, { "epoch": 0.06, "learning_rate": 1.957161726382427e-05, "loss": 1.7968, "step": 27000 }, { "epoch": 0.07, "learning_rate": 1.9563684250191384e-05, "loss": 1.7688, "step": 27500 }, { "epoch": 0.07, "learning_rate": 1.9555751236558504e-05, "loss": 1.7674, "step": 28000 }, { "epoch": 0.07, "learning_rate": 1.954781822292562e-05, "loss": 1.7623, "step": 28500 }, { "epoch": 0.07, "learning_rate": 1.9539885209292732e-05, "loss": 1.7549, "step": 29000 }, { "epoch": 0.07, "learning_rate": 1.9531952195659848e-05, "loss": 1.7584, "step": 29500 }, { "epoch": 0.07, "learning_rate": 1.9524019182026964e-05, "loss": 1.7497, "step": 30000 }, { "epoch": 0.07, "learning_rate": 1.9516086168394083e-05, "loss": 1.7464, "step": 30500 }, { "epoch": 0.07, "learning_rate": 1.95081531547612e-05, "loss": 1.7458, "step": 31000 }, { "epoch": 0.07, "learning_rate": 1.9500220141128315e-05, "loss": 1.7666, "step": 31500 }, { "epoch": 0.08, "learning_rate": 1.949228712749543e-05, "loss": 1.7518, "step": 32000 }, { "epoch": 0.08, "learning_rate": 1.9484354113862546e-05, "loss": 1.7312, "step": 32500 }, { "epoch": 0.08, "learning_rate": 1.9476421100229662e-05, "loss": 1.7569, "step": 33000 }, { "epoch": 0.08, "learning_rate": 1.9468488086596778e-05, "loss": 1.7352, "step": 33500 }, { "epoch": 0.08, "learning_rate": 1.9460555072963894e-05, "loss": 1.723, "step": 34000 }, { "epoch": 0.08, "learning_rate": 1.945262205933101e-05, "loss": 1.7439, "step": 34500 }, { "epoch": 0.08, "learning_rate": 1.9444689045698126e-05, "loss": 1.7154, "step": 35000 }, { "epoch": 0.08, "learning_rate": 1.943675603206524e-05, "loss": 1.7245, "step": 35500 }, { "epoch": 0.09, "learning_rate": 1.9428823018432357e-05, "loss": 1.7139, "step": 36000 }, { "epoch": 0.09, "learning_rate": 1.9420890004799477e-05, "loss": 1.7167, "step": 36500 }, { "epoch": 0.09, "learning_rate": 1.9412956991166593e-05, "loss": 1.7218, "step": 37000 }, { "epoch": 0.09, "learning_rate": 1.940502397753371e-05, "loss": 1.7136, "step": 37500 }, { "epoch": 0.09, "learning_rate": 1.939709096390082e-05, "loss": 1.6848, "step": 38000 }, { "epoch": 0.09, "learning_rate": 1.9389157950267937e-05, "loss": 1.7024, "step": 38500 }, { "epoch": 0.09, "learning_rate": 1.9381224936635056e-05, "loss": 1.7083, "step": 39000 }, { "epoch": 0.09, "learning_rate": 1.9373291923002172e-05, "loss": 1.6827, "step": 39500 }, { "epoch": 0.1, "learning_rate": 1.9365358909369288e-05, "loss": 1.6955, "step": 40000 }, { "epoch": 0.1, "learning_rate": 1.9357425895736404e-05, "loss": 1.6975, "step": 40500 }, { "epoch": 0.1, "learning_rate": 1.934949288210352e-05, "loss": 1.6946, "step": 41000 }, { "epoch": 0.1, "learning_rate": 1.9341559868470635e-05, "loss": 1.6997, "step": 41500 }, { "epoch": 0.1, "learning_rate": 1.933362685483775e-05, "loss": 1.6943, "step": 42000 }, { "epoch": 0.1, "learning_rate": 1.9325693841204867e-05, "loss": 1.6775, "step": 42500 }, { "epoch": 0.1, "learning_rate": 1.9317760827571983e-05, "loss": 1.6608, "step": 43000 }, { "epoch": 0.1, "learning_rate": 1.93098278139391e-05, "loss": 1.6745, "step": 43500 }, { "epoch": 0.1, "learning_rate": 1.9301894800306215e-05, "loss": 1.6841, "step": 44000 }, { "epoch": 0.11, "learning_rate": 1.929396178667333e-05, "loss": 1.6698, "step": 44500 }, { "epoch": 0.11, "learning_rate": 1.928602877304045e-05, "loss": 1.6706, "step": 45000 }, { "epoch": 0.11, "learning_rate": 1.9278095759407566e-05, "loss": 1.6591, "step": 45500 }, { "epoch": 0.11, "learning_rate": 1.927016274577468e-05, "loss": 1.6406, "step": 46000 }, { "epoch": 0.11, "learning_rate": 1.9262229732141797e-05, "loss": 1.6599, "step": 46500 }, { "epoch": 0.11, "learning_rate": 1.925429671850891e-05, "loss": 1.6872, "step": 47000 }, { "epoch": 0.11, "learning_rate": 1.924636370487603e-05, "loss": 1.6721, "step": 47500 }, { "epoch": 0.11, "learning_rate": 1.9238430691243145e-05, "loss": 1.6741, "step": 48000 }, { "epoch": 0.12, "learning_rate": 1.923049767761026e-05, "loss": 1.6763, "step": 48500 }, { "epoch": 0.12, "learning_rate": 1.9222564663977377e-05, "loss": 1.64, "step": 49000 }, { "epoch": 0.12, "learning_rate": 1.9214631650344493e-05, "loss": 1.6618, "step": 49500 }, { "epoch": 0.12, "learning_rate": 1.920669863671161e-05, "loss": 1.6495, "step": 50000 }, { "epoch": 0.12, "learning_rate": 1.9198765623078724e-05, "loss": 1.6656, "step": 50500 }, { "epoch": 0.12, "learning_rate": 1.9190832609445844e-05, "loss": 1.6539, "step": 51000 }, { "epoch": 0.12, "learning_rate": 1.9182899595812956e-05, "loss": 1.6522, "step": 51500 }, { "epoch": 0.12, "learning_rate": 1.9174966582180072e-05, "loss": 1.6519, "step": 52000 }, { "epoch": 0.12, "learning_rate": 1.9167033568547188e-05, "loss": 1.6273, "step": 52500 }, { "epoch": 0.13, "learning_rate": 1.9159100554914304e-05, "loss": 1.6323, "step": 53000 }, { "epoch": 0.13, "learning_rate": 1.9151167541281423e-05, "loss": 1.6291, "step": 53500 }, { "epoch": 0.13, "learning_rate": 1.914323452764854e-05, "loss": 1.6351, "step": 54000 }, { "epoch": 0.13, "learning_rate": 1.9135301514015655e-05, "loss": 1.6392, "step": 54500 }, { "epoch": 0.13, "learning_rate": 1.912736850038277e-05, "loss": 1.631, "step": 55000 }, { "epoch": 0.13, "learning_rate": 1.9119435486749886e-05, "loss": 1.6256, "step": 55500 }, { "epoch": 0.13, "learning_rate": 1.9111502473117002e-05, "loss": 1.6261, "step": 56000 }, { "epoch": 0.13, "learning_rate": 1.9103569459484118e-05, "loss": 1.64, "step": 56500 }, { "epoch": 0.14, "learning_rate": 1.9095636445851234e-05, "loss": 1.6334, "step": 57000 }, { "epoch": 0.14, "learning_rate": 1.908770343221835e-05, "loss": 1.6164, "step": 57500 }, { "epoch": 0.14, "learning_rate": 1.9079770418585466e-05, "loss": 1.6201, "step": 58000 }, { "epoch": 0.14, "learning_rate": 1.907183740495258e-05, "loss": 1.6119, "step": 58500 }, { "epoch": 0.14, "learning_rate": 1.9063904391319697e-05, "loss": 1.6112, "step": 59000 }, { "epoch": 0.14, "learning_rate": 1.9055971377686813e-05, "loss": 1.608, "step": 59500 }, { "epoch": 0.14, "learning_rate": 1.9048038364053933e-05, "loss": 1.61, "step": 60000 }, { "epoch": 0.14, "learning_rate": 1.9040105350421045e-05, "loss": 1.6156, "step": 60500 }, { "epoch": 0.15, "learning_rate": 1.903217233678816e-05, "loss": 1.5982, "step": 61000 }, { "epoch": 0.15, "learning_rate": 1.9024239323155277e-05, "loss": 1.6265, "step": 61500 }, { "epoch": 0.15, "learning_rate": 1.9016306309522396e-05, "loss": 1.5894, "step": 62000 }, { "epoch": 0.15, "learning_rate": 1.9008373295889512e-05, "loss": 1.619, "step": 62500 }, { "epoch": 0.15, "learning_rate": 1.9000440282256628e-05, "loss": 1.5871, "step": 63000 }, { "epoch": 0.15, "learning_rate": 1.8992507268623744e-05, "loss": 1.5939, "step": 63500 }, { "epoch": 0.15, "learning_rate": 1.898457425499086e-05, "loss": 1.5893, "step": 64000 }, { "epoch": 0.15, "learning_rate": 1.8976641241357975e-05, "loss": 1.5878, "step": 64500 }, { "epoch": 0.15, "learning_rate": 1.896870822772509e-05, "loss": 1.5954, "step": 65000 }, { "epoch": 0.16, "learning_rate": 1.8960775214092207e-05, "loss": 1.5962, "step": 65500 }, { "epoch": 0.16, "learning_rate": 1.8952842200459323e-05, "loss": 1.6119, "step": 66000 }, { "epoch": 0.16, "learning_rate": 1.894490918682644e-05, "loss": 1.5776, "step": 66500 }, { "epoch": 0.16, "learning_rate": 1.8936976173193555e-05, "loss": 1.5796, "step": 67000 }, { "epoch": 0.16, "learning_rate": 1.892904315956067e-05, "loss": 1.6101, "step": 67500 }, { "epoch": 0.16, "learning_rate": 1.8921110145927786e-05, "loss": 1.5797, "step": 68000 }, { "epoch": 0.16, "learning_rate": 1.8913177132294906e-05, "loss": 1.5989, "step": 68500 }, { "epoch": 0.16, "learning_rate": 1.8905244118662018e-05, "loss": 1.5874, "step": 69000 }, { "epoch": 0.17, "learning_rate": 1.8897311105029134e-05, "loss": 1.5918, "step": 69500 }, { "epoch": 0.17, "learning_rate": 1.888937809139625e-05, "loss": 1.5872, "step": 70000 }, { "epoch": 0.17, "learning_rate": 1.8881445077763366e-05, "loss": 1.6041, "step": 70500 }, { "epoch": 0.17, "learning_rate": 1.8873512064130485e-05, "loss": 1.5861, "step": 71000 }, { "epoch": 0.17, "learning_rate": 1.88655790504976e-05, "loss": 1.5676, "step": 71500 }, { "epoch": 0.17, "learning_rate": 1.8857646036864717e-05, "loss": 1.5859, "step": 72000 }, { "epoch": 0.17, "learning_rate": 1.8849713023231833e-05, "loss": 1.5694, "step": 72500 }, { "epoch": 0.17, "learning_rate": 1.884178000959895e-05, "loss": 1.5606, "step": 73000 }, { "epoch": 0.17, "learning_rate": 1.8833846995966064e-05, "loss": 1.5768, "step": 73500 }, { "epoch": 0.18, "learning_rate": 1.882591398233318e-05, "loss": 1.5834, "step": 74000 }, { "epoch": 0.18, "learning_rate": 1.8817980968700296e-05, "loss": 1.5815, "step": 74500 }, { "epoch": 0.18, "learning_rate": 1.8810047955067412e-05, "loss": 1.569, "step": 75000 }, { "epoch": 0.18, "learning_rate": 1.8802114941434528e-05, "loss": 1.5839, "step": 75500 }, { "epoch": 0.18, "learning_rate": 1.8794181927801644e-05, "loss": 1.577, "step": 76000 }, { "epoch": 0.18, "learning_rate": 1.878624891416876e-05, "loss": 1.5707, "step": 76500 }, { "epoch": 0.18, "learning_rate": 1.877831590053588e-05, "loss": 1.5722, "step": 77000 }, { "epoch": 0.18, "learning_rate": 1.8770382886902995e-05, "loss": 1.5634, "step": 77500 }, { "epoch": 0.19, "learning_rate": 1.8762449873270107e-05, "loss": 1.5612, "step": 78000 }, { "epoch": 0.19, "learning_rate": 1.8754516859637223e-05, "loss": 1.5566, "step": 78500 }, { "epoch": 0.19, "learning_rate": 1.874658384600434e-05, "loss": 1.5693, "step": 79000 }, { "epoch": 0.19, "learning_rate": 1.8738650832371458e-05, "loss": 1.5542, "step": 79500 }, { "epoch": 0.19, "learning_rate": 1.8730717818738574e-05, "loss": 1.5495, "step": 80000 }, { "epoch": 0.19, "learning_rate": 1.872278480510569e-05, "loss": 1.5419, "step": 80500 }, { "epoch": 0.19, "learning_rate": 1.8714851791472806e-05, "loss": 1.5529, "step": 81000 }, { "epoch": 0.19, "learning_rate": 1.870691877783992e-05, "loss": 1.5448, "step": 81500 }, { "epoch": 0.2, "learning_rate": 1.8698985764207037e-05, "loss": 1.5696, "step": 82000 }, { "epoch": 0.2, "learning_rate": 1.8691052750574153e-05, "loss": 1.5589, "step": 82500 }, { "epoch": 0.2, "learning_rate": 1.868311973694127e-05, "loss": 1.5483, "step": 83000 }, { "epoch": 0.2, "learning_rate": 1.8675186723308385e-05, "loss": 1.5546, "step": 83500 }, { "epoch": 0.2, "learning_rate": 1.86672537096755e-05, "loss": 1.5535, "step": 84000 }, { "epoch": 0.2, "learning_rate": 1.8659320696042617e-05, "loss": 1.5528, "step": 84500 }, { "epoch": 0.2, "learning_rate": 1.8651387682409733e-05, "loss": 1.5478, "step": 85000 }, { "epoch": 0.2, "learning_rate": 1.8643454668776852e-05, "loss": 1.5418, "step": 85500 }, { "epoch": 0.2, "learning_rate": 1.8635521655143968e-05, "loss": 1.5414, "step": 86000 }, { "epoch": 0.21, "learning_rate": 1.8627588641511084e-05, "loss": 1.542, "step": 86500 }, { "epoch": 0.21, "learning_rate": 1.8619655627878196e-05, "loss": 1.5685, "step": 87000 }, { "epoch": 0.21, "learning_rate": 1.8611722614245312e-05, "loss": 1.5616, "step": 87500 }, { "epoch": 0.21, "learning_rate": 1.860378960061243e-05, "loss": 1.5236, "step": 88000 }, { "epoch": 0.21, "learning_rate": 1.8595856586979547e-05, "loss": 1.5412, "step": 88500 }, { "epoch": 0.21, "learning_rate": 1.8587923573346663e-05, "loss": 1.5395, "step": 89000 }, { "epoch": 0.21, "learning_rate": 1.857999055971378e-05, "loss": 1.537, "step": 89500 }, { "epoch": 0.21, "learning_rate": 1.8572057546080895e-05, "loss": 1.5405, "step": 90000 }, { "epoch": 0.22, "learning_rate": 1.856412453244801e-05, "loss": 1.5417, "step": 90500 }, { "epoch": 0.22, "learning_rate": 1.8556191518815126e-05, "loss": 1.5448, "step": 91000 }, { "epoch": 0.22, "learning_rate": 1.8548258505182242e-05, "loss": 1.5352, "step": 91500 }, { "epoch": 0.22, "learning_rate": 1.8540325491549358e-05, "loss": 1.531, "step": 92000 }, { "epoch": 0.22, "learning_rate": 1.8532392477916474e-05, "loss": 1.528, "step": 92500 }, { "epoch": 0.22, "learning_rate": 1.852445946428359e-05, "loss": 1.5332, "step": 93000 }, { "epoch": 0.22, "learning_rate": 1.8516526450650706e-05, "loss": 1.523, "step": 93500 }, { "epoch": 0.22, "learning_rate": 1.8508593437017825e-05, "loss": 1.5142, "step": 94000 }, { "epoch": 0.22, "learning_rate": 1.850066042338494e-05, "loss": 1.5457, "step": 94500 }, { "epoch": 0.23, "learning_rate": 1.8492727409752057e-05, "loss": 1.5238, "step": 95000 }, { "epoch": 0.23, "learning_rate": 1.8484794396119172e-05, "loss": 1.5247, "step": 95500 }, { "epoch": 0.23, "learning_rate": 1.8476861382486285e-05, "loss": 1.5395, "step": 96000 }, { "epoch": 0.23, "learning_rate": 1.8468928368853404e-05, "loss": 1.5273, "step": 96500 }, { "epoch": 0.23, "learning_rate": 1.846099535522052e-05, "loss": 1.5271, "step": 97000 }, { "epoch": 0.23, "learning_rate": 1.8453062341587636e-05, "loss": 1.5232, "step": 97500 }, { "epoch": 0.23, "learning_rate": 1.8445129327954752e-05, "loss": 1.5027, "step": 98000 }, { "epoch": 0.23, "learning_rate": 1.8437196314321868e-05, "loss": 1.5073, "step": 98500 }, { "epoch": 0.24, "learning_rate": 1.8429263300688984e-05, "loss": 1.5218, "step": 99000 }, { "epoch": 0.24, "learning_rate": 1.84213302870561e-05, "loss": 1.5144, "step": 99500 }, { "epoch": 0.24, "learning_rate": 1.8413397273423215e-05, "loss": 1.5173, "step": 100000 }, { "epoch": 0.24, "learning_rate": 1.840546425979033e-05, "loss": 1.5381, "step": 100500 }, { "epoch": 0.24, "learning_rate": 1.8397531246157447e-05, "loss": 1.5068, "step": 101000 }, { "epoch": 0.24, "learning_rate": 1.8389598232524563e-05, "loss": 1.5072, "step": 101500 }, { "epoch": 0.24, "learning_rate": 1.838166521889168e-05, "loss": 1.5171, "step": 102000 }, { "epoch": 0.24, "learning_rate": 1.8373732205258798e-05, "loss": 1.5188, "step": 102500 }, { "epoch": 0.25, "learning_rate": 1.8365799191625914e-05, "loss": 1.5285, "step": 103000 }, { "epoch": 0.25, "learning_rate": 1.835786617799303e-05, "loss": 1.5022, "step": 103500 }, { "epoch": 0.25, "learning_rate": 1.8349933164360146e-05, "loss": 1.5106, "step": 104000 }, { "epoch": 0.25, "learning_rate": 1.834200015072726e-05, "loss": 1.5098, "step": 104500 }, { "epoch": 0.25, "learning_rate": 1.8334067137094377e-05, "loss": 1.4992, "step": 105000 }, { "epoch": 0.25, "learning_rate": 1.8326134123461493e-05, "loss": 1.4907, "step": 105500 }, { "epoch": 0.25, "learning_rate": 1.831820110982861e-05, "loss": 1.5323, "step": 106000 }, { "epoch": 0.25, "learning_rate": 1.8310268096195725e-05, "loss": 1.5156, "step": 106500 }, { "epoch": 0.25, "learning_rate": 1.830233508256284e-05, "loss": 1.5154, "step": 107000 }, { "epoch": 0.26, "learning_rate": 1.8294402068929957e-05, "loss": 1.5203, "step": 107500 }, { "epoch": 0.26, "learning_rate": 1.8286469055297072e-05, "loss": 1.4912, "step": 108000 }, { "epoch": 0.26, "learning_rate": 1.827853604166419e-05, "loss": 1.5087, "step": 108500 }, { "epoch": 0.26, "learning_rate": 1.8270603028031308e-05, "loss": 1.4957, "step": 109000 }, { "epoch": 0.26, "learning_rate": 1.826267001439842e-05, "loss": 1.5215, "step": 109500 }, { "epoch": 0.26, "learning_rate": 1.8254737000765536e-05, "loss": 1.5169, "step": 110000 }, { "epoch": 0.26, "learning_rate": 1.8246803987132652e-05, "loss": 1.495, "step": 110500 }, { "epoch": 0.26, "learning_rate": 1.8238870973499768e-05, "loss": 1.4967, "step": 111000 }, { "epoch": 0.27, "learning_rate": 1.8230937959866887e-05, "loss": 1.5037, "step": 111500 }, { "epoch": 0.27, "learning_rate": 1.8223004946234003e-05, "loss": 1.4955, "step": 112000 }, { "epoch": 0.27, "learning_rate": 1.821507193260112e-05, "loss": 1.5085, "step": 112500 }, { "epoch": 0.27, "learning_rate": 1.8207138918968235e-05, "loss": 1.503, "step": 113000 }, { "epoch": 0.27, "learning_rate": 1.8199205905335347e-05, "loss": 1.4851, "step": 113500 }, { "epoch": 0.27, "learning_rate": 1.8191272891702466e-05, "loss": 1.4809, "step": 114000 }, { "epoch": 0.27, "learning_rate": 1.8183339878069582e-05, "loss": 1.492, "step": 114500 }, { "epoch": 0.27, "learning_rate": 1.8175406864436698e-05, "loss": 1.4864, "step": 115000 }, { "epoch": 0.27, "learning_rate": 1.8167473850803814e-05, "loss": 1.51, "step": 115500 }, { "epoch": 0.28, "learning_rate": 1.815954083717093e-05, "loss": 1.4747, "step": 116000 }, { "epoch": 0.28, "learning_rate": 1.8151607823538046e-05, "loss": 1.4872, "step": 116500 }, { "epoch": 0.28, "learning_rate": 1.814367480990516e-05, "loss": 1.5075, "step": 117000 }, { "epoch": 0.28, "learning_rate": 1.813574179627228e-05, "loss": 1.4848, "step": 117500 }, { "epoch": 0.28, "learning_rate": 1.8127808782639393e-05, "loss": 1.5053, "step": 118000 }, { "epoch": 0.28, "learning_rate": 1.811987576900651e-05, "loss": 1.4734, "step": 118500 }, { "epoch": 0.28, "learning_rate": 1.8111942755373625e-05, "loss": 1.4905, "step": 119000 }, { "epoch": 0.28, "learning_rate": 1.810400974174074e-05, "loss": 1.5025, "step": 119500 }, { "epoch": 0.29, "learning_rate": 1.809607672810786e-05, "loss": 1.4873, "step": 120000 }, { "epoch": 0.29, "learning_rate": 1.8088143714474976e-05, "loss": 1.4957, "step": 120500 }, { "epoch": 0.29, "learning_rate": 1.8080210700842092e-05, "loss": 1.5084, "step": 121000 }, { "epoch": 0.29, "learning_rate": 1.8072277687209208e-05, "loss": 1.4878, "step": 121500 }, { "epoch": 0.29, "learning_rate": 1.8064344673576323e-05, "loss": 1.4849, "step": 122000 }, { "epoch": 0.29, "learning_rate": 1.805641165994344e-05, "loss": 1.4744, "step": 122500 }, { "epoch": 0.29, "learning_rate": 1.8048478646310555e-05, "loss": 1.4895, "step": 123000 }, { "epoch": 0.29, "learning_rate": 1.804054563267767e-05, "loss": 1.4884, "step": 123500 }, { "epoch": 0.3, "learning_rate": 1.8032612619044787e-05, "loss": 1.4721, "step": 124000 }, { "epoch": 0.3, "learning_rate": 1.8024679605411903e-05, "loss": 1.481, "step": 124500 }, { "epoch": 0.3, "learning_rate": 1.801674659177902e-05, "loss": 1.4799, "step": 125000 }, { "epoch": 0.3, "learning_rate": 1.8008813578146135e-05, "loss": 1.4843, "step": 125500 }, { "epoch": 0.3, "learning_rate": 1.8000880564513254e-05, "loss": 1.4828, "step": 126000 }, { "epoch": 0.3, "learning_rate": 1.799294755088037e-05, "loss": 1.4799, "step": 126500 }, { "epoch": 0.3, "learning_rate": 1.7985014537247482e-05, "loss": 1.4707, "step": 127000 }, { "epoch": 0.3, "learning_rate": 1.7977081523614598e-05, "loss": 1.4809, "step": 127500 }, { "epoch": 0.3, "learning_rate": 1.7969148509981714e-05, "loss": 1.4778, "step": 128000 }, { "epoch": 0.31, "learning_rate": 1.7961215496348833e-05, "loss": 1.4675, "step": 128500 }, { "epoch": 0.31, "learning_rate": 1.795328248271595e-05, "loss": 1.4854, "step": 129000 }, { "epoch": 0.31, "learning_rate": 1.7945349469083065e-05, "loss": 1.4501, "step": 129500 }, { "epoch": 0.31, "learning_rate": 1.793741645545018e-05, "loss": 1.4603, "step": 130000 }, { "epoch": 0.31, "learning_rate": 1.7929483441817297e-05, "loss": 1.4748, "step": 130500 }, { "epoch": 0.31, "learning_rate": 1.7921550428184412e-05, "loss": 1.4578, "step": 131000 }, { "epoch": 0.31, "learning_rate": 1.791361741455153e-05, "loss": 1.4679, "step": 131500 }, { "epoch": 0.31, "learning_rate": 1.7905684400918644e-05, "loss": 1.4745, "step": 132000 }, { "epoch": 0.32, "learning_rate": 1.789775138728576e-05, "loss": 1.4779, "step": 132500 }, { "epoch": 0.32, "learning_rate": 1.7889818373652876e-05, "loss": 1.4708, "step": 133000 }, { "epoch": 0.32, "learning_rate": 1.7881885360019992e-05, "loss": 1.4761, "step": 133500 }, { "epoch": 0.32, "learning_rate": 1.7873952346387108e-05, "loss": 1.4682, "step": 134000 }, { "epoch": 0.32, "learning_rate": 1.7866019332754227e-05, "loss": 1.47, "step": 134500 }, { "epoch": 0.32, "learning_rate": 1.7858086319121343e-05, "loss": 1.4355, "step": 135000 }, { "epoch": 0.32, "learning_rate": 1.785015330548846e-05, "loss": 1.448, "step": 135500 }, { "epoch": 0.32, "learning_rate": 1.784222029185557e-05, "loss": 1.4724, "step": 136000 }, { "epoch": 0.32, "learning_rate": 1.7834287278222687e-05, "loss": 1.4684, "step": 136500 }, { "epoch": 0.33, "learning_rate": 1.7826354264589806e-05, "loss": 1.4582, "step": 137000 }, { "epoch": 0.33, "learning_rate": 1.7818421250956922e-05, "loss": 1.4742, "step": 137500 }, { "epoch": 0.33, "learning_rate": 1.7810488237324038e-05, "loss": 1.4583, "step": 138000 }, { "epoch": 0.33, "learning_rate": 1.7802555223691154e-05, "loss": 1.465, "step": 138500 }, { "epoch": 0.33, "learning_rate": 1.779462221005827e-05, "loss": 1.4817, "step": 139000 }, { "epoch": 0.33, "learning_rate": 1.7786689196425386e-05, "loss": 1.4492, "step": 139500 }, { "epoch": 0.33, "learning_rate": 1.77787561827925e-05, "loss": 1.4506, "step": 140000 }, { "epoch": 0.33, "learning_rate": 1.7770823169159617e-05, "loss": 1.4632, "step": 140500 }, { "epoch": 0.34, "learning_rate": 1.7762890155526733e-05, "loss": 1.4393, "step": 141000 }, { "epoch": 0.34, "learning_rate": 1.775495714189385e-05, "loss": 1.4658, "step": 141500 }, { "epoch": 0.34, "learning_rate": 1.7747024128260965e-05, "loss": 1.4482, "step": 142000 }, { "epoch": 0.34, "learning_rate": 1.773909111462808e-05, "loss": 1.4754, "step": 142500 }, { "epoch": 0.34, "learning_rate": 1.77311581009952e-05, "loss": 1.464, "step": 143000 }, { "epoch": 0.34, "learning_rate": 1.7723225087362316e-05, "loss": 1.4376, "step": 143500 }, { "epoch": 0.34, "learning_rate": 1.7715292073729432e-05, "loss": 1.4426, "step": 144000 }, { "epoch": 0.34, "learning_rate": 1.7707359060096548e-05, "loss": 1.453, "step": 144500 }, { "epoch": 0.35, "learning_rate": 1.769942604646366e-05, "loss": 1.4593, "step": 145000 }, { "epoch": 0.35, "learning_rate": 1.769149303283078e-05, "loss": 1.4472, "step": 145500 }, { "epoch": 0.35, "learning_rate": 1.7683560019197895e-05, "loss": 1.4566, "step": 146000 }, { "epoch": 0.35, "learning_rate": 1.767562700556501e-05, "loss": 1.4435, "step": 146500 }, { "epoch": 0.35, "learning_rate": 1.7667693991932127e-05, "loss": 1.4489, "step": 147000 }, { "epoch": 0.35, "learning_rate": 1.7659760978299243e-05, "loss": 1.4362, "step": 147500 }, { "epoch": 0.35, "learning_rate": 1.765182796466636e-05, "loss": 1.4713, "step": 148000 }, { "epoch": 0.35, "learning_rate": 1.7643894951033474e-05, "loss": 1.4414, "step": 148500 }, { "epoch": 0.35, "learning_rate": 1.763596193740059e-05, "loss": 1.4431, "step": 149000 }, { "epoch": 0.36, "learning_rate": 1.7628028923767706e-05, "loss": 1.4597, "step": 149500 }, { "epoch": 0.36, "learning_rate": 1.7620095910134822e-05, "loss": 1.454, "step": 150000 }, { "epoch": 0.36, "learning_rate": 1.7612162896501938e-05, "loss": 1.4594, "step": 150500 }, { "epoch": 0.36, "learning_rate": 1.7604229882869054e-05, "loss": 1.4496, "step": 151000 }, { "epoch": 0.36, "learning_rate": 1.759629686923617e-05, "loss": 1.4593, "step": 151500 }, { "epoch": 0.36, "learning_rate": 1.758836385560329e-05, "loss": 1.462, "step": 152000 }, { "epoch": 0.36, "learning_rate": 1.7580430841970405e-05, "loss": 1.4471, "step": 152500 }, { "epoch": 0.36, "learning_rate": 1.757249782833752e-05, "loss": 1.4455, "step": 153000 }, { "epoch": 0.37, "learning_rate": 1.7564564814704637e-05, "loss": 1.4595, "step": 153500 }, { "epoch": 0.37, "learning_rate": 1.755663180107175e-05, "loss": 1.4522, "step": 154000 }, { "epoch": 0.37, "learning_rate": 1.7548698787438868e-05, "loss": 1.4458, "step": 154500 }, { "epoch": 0.37, "learning_rate": 1.7540765773805984e-05, "loss": 1.443, "step": 155000 }, { "epoch": 0.37, "learning_rate": 1.75328327601731e-05, "loss": 1.4546, "step": 155500 }, { "epoch": 0.37, "learning_rate": 1.7524899746540216e-05, "loss": 1.466, "step": 156000 }, { "epoch": 0.37, "learning_rate": 1.7516966732907332e-05, "loss": 1.4545, "step": 156500 }, { "epoch": 0.37, "learning_rate": 1.7509033719274448e-05, "loss": 1.4423, "step": 157000 }, { "epoch": 0.37, "learning_rate": 1.7501100705641563e-05, "loss": 1.4351, "step": 157500 }, { "epoch": 0.38, "learning_rate": 1.7493167692008683e-05, "loss": 1.4404, "step": 158000 }, { "epoch": 0.38, "learning_rate": 1.7485234678375795e-05, "loss": 1.4345, "step": 158500 }, { "epoch": 0.38, "learning_rate": 1.747730166474291e-05, "loss": 1.4435, "step": 159000 }, { "epoch": 0.38, "learning_rate": 1.7469368651110027e-05, "loss": 1.4248, "step": 159500 }, { "epoch": 0.38, "learning_rate": 1.7461435637477143e-05, "loss": 1.4276, "step": 160000 }, { "epoch": 0.38, "learning_rate": 1.7453502623844262e-05, "loss": 1.4206, "step": 160500 }, { "epoch": 0.38, "learning_rate": 1.7445569610211378e-05, "loss": 1.4408, "step": 161000 }, { "epoch": 0.38, "learning_rate": 1.7437636596578494e-05, "loss": 1.4219, "step": 161500 }, { "epoch": 0.39, "learning_rate": 1.742970358294561e-05, "loss": 1.4568, "step": 162000 }, { "epoch": 0.39, "learning_rate": 1.7421770569312722e-05, "loss": 1.4342, "step": 162500 }, { "epoch": 0.39, "learning_rate": 1.741383755567984e-05, "loss": 1.443, "step": 163000 }, { "epoch": 0.39, "learning_rate": 1.7405904542046957e-05, "loss": 1.4288, "step": 163500 }, { "epoch": 0.39, "learning_rate": 1.7397971528414073e-05, "loss": 1.4399, "step": 164000 }, { "epoch": 0.39, "learning_rate": 1.739003851478119e-05, "loss": 1.4421, "step": 164500 }, { "epoch": 0.39, "learning_rate": 1.7382105501148305e-05, "loss": 1.4323, "step": 165000 }, { "epoch": 0.39, "learning_rate": 1.737417248751542e-05, "loss": 1.434, "step": 165500 }, { "epoch": 0.4, "learning_rate": 1.7366239473882537e-05, "loss": 1.4261, "step": 166000 }, { "epoch": 0.4, "learning_rate": 1.7358306460249656e-05, "loss": 1.4221, "step": 166500 }, { "epoch": 0.4, "learning_rate": 1.7350373446616768e-05, "loss": 1.4418, "step": 167000 }, { "epoch": 0.4, "learning_rate": 1.7342440432983884e-05, "loss": 1.4297, "step": 167500 }, { "epoch": 0.4, "learning_rate": 1.7334507419351e-05, "loss": 1.419, "step": 168000 }, { "epoch": 0.4, "learning_rate": 1.7326574405718116e-05, "loss": 1.4328, "step": 168500 }, { "epoch": 0.4, "learning_rate": 1.7318641392085235e-05, "loss": 1.4227, "step": 169000 }, { "epoch": 0.4, "learning_rate": 1.731070837845235e-05, "loss": 1.432, "step": 169500 }, { "epoch": 0.4, "learning_rate": 1.7302775364819467e-05, "loss": 1.4383, "step": 170000 }, { "epoch": 0.41, "learning_rate": 1.7294842351186583e-05, "loss": 1.4363, "step": 170500 }, { "epoch": 0.41, "learning_rate": 1.72869093375537e-05, "loss": 1.438, "step": 171000 }, { "epoch": 0.41, "learning_rate": 1.7278976323920814e-05, "loss": 1.4065, "step": 171500 }, { "epoch": 0.41, "learning_rate": 1.727104331028793e-05, "loss": 1.4143, "step": 172000 }, { "epoch": 0.41, "learning_rate": 1.7263110296655046e-05, "loss": 1.4316, "step": 172500 }, { "epoch": 0.41, "learning_rate": 1.7255177283022162e-05, "loss": 1.4189, "step": 173000 }, { "epoch": 0.41, "learning_rate": 1.7247244269389278e-05, "loss": 1.4218, "step": 173500 }, { "epoch": 0.41, "learning_rate": 1.7239311255756394e-05, "loss": 1.4213, "step": 174000 }, { "epoch": 0.42, "learning_rate": 1.723137824212351e-05, "loss": 1.4286, "step": 174500 }, { "epoch": 0.42, "learning_rate": 1.722344522849063e-05, "loss": 1.4231, "step": 175000 }, { "epoch": 0.42, "learning_rate": 1.7215512214857745e-05, "loss": 1.4295, "step": 175500 }, { "epoch": 0.42, "learning_rate": 1.7207579201224857e-05, "loss": 1.4329, "step": 176000 }, { "epoch": 0.42, "learning_rate": 1.7199646187591973e-05, "loss": 1.413, "step": 176500 }, { "epoch": 0.42, "learning_rate": 1.719171317395909e-05, "loss": 1.4222, "step": 177000 }, { "epoch": 0.42, "learning_rate": 1.7183780160326208e-05, "loss": 1.4188, "step": 177500 }, { "epoch": 0.42, "learning_rate": 1.7175847146693324e-05, "loss": 1.4304, "step": 178000 }, { "epoch": 0.42, "learning_rate": 1.716791413306044e-05, "loss": 1.4011, "step": 178500 }, { "epoch": 0.43, "learning_rate": 1.7159981119427556e-05, "loss": 1.4098, "step": 179000 }, { "epoch": 0.43, "learning_rate": 1.715204810579467e-05, "loss": 1.4229, "step": 179500 }, { "epoch": 0.43, "learning_rate": 1.7144115092161788e-05, "loss": 1.4314, "step": 180000 }, { "epoch": 0.43, "learning_rate": 1.7136182078528903e-05, "loss": 1.437, "step": 180500 }, { "epoch": 0.43, "learning_rate": 1.712824906489602e-05, "loss": 1.4239, "step": 181000 }, { "epoch": 0.43, "learning_rate": 1.7120316051263135e-05, "loss": 1.4092, "step": 181500 }, { "epoch": 0.43, "learning_rate": 1.711238303763025e-05, "loss": 1.4114, "step": 182000 }, { "epoch": 0.43, "learning_rate": 1.7104450023997367e-05, "loss": 1.4131, "step": 182500 }, { "epoch": 0.44, "learning_rate": 1.7096517010364483e-05, "loss": 1.4138, "step": 183000 }, { "epoch": 0.44, "learning_rate": 1.7088583996731602e-05, "loss": 1.4207, "step": 183500 }, { "epoch": 0.44, "learning_rate": 1.7080650983098718e-05, "loss": 1.4036, "step": 184000 }, { "epoch": 0.44, "learning_rate": 1.7072717969465834e-05, "loss": 1.4177, "step": 184500 }, { "epoch": 0.44, "learning_rate": 1.7064784955832946e-05, "loss": 1.4006, "step": 185000 }, { "epoch": 0.44, "learning_rate": 1.7056851942200062e-05, "loss": 1.4258, "step": 185500 }, { "epoch": 0.44, "learning_rate": 1.704891892856718e-05, "loss": 1.4067, "step": 186000 }, { "epoch": 0.44, "learning_rate": 1.7040985914934297e-05, "loss": 1.4066, "step": 186500 }, { "epoch": 0.45, "learning_rate": 1.7033052901301413e-05, "loss": 1.4097, "step": 187000 }, { "epoch": 0.45, "learning_rate": 1.702511988766853e-05, "loss": 1.3993, "step": 187500 }, { "epoch": 0.45, "learning_rate": 1.7017186874035645e-05, "loss": 1.3967, "step": 188000 }, { "epoch": 0.45, "learning_rate": 1.700925386040276e-05, "loss": 1.3943, "step": 188500 }, { "epoch": 0.45, "learning_rate": 1.7001320846769876e-05, "loss": 1.4084, "step": 189000 }, { "epoch": 0.45, "learning_rate": 1.6993387833136992e-05, "loss": 1.4102, "step": 189500 }, { "epoch": 0.45, "learning_rate": 1.6985454819504108e-05, "loss": 1.3981, "step": 190000 }, { "epoch": 0.45, "learning_rate": 1.6977521805871224e-05, "loss": 1.4251, "step": 190500 }, { "epoch": 0.45, "learning_rate": 1.696958879223834e-05, "loss": 1.4113, "step": 191000 }, { "epoch": 0.46, "learning_rate": 1.6961655778605456e-05, "loss": 1.3979, "step": 191500 }, { "epoch": 0.46, "learning_rate": 1.695372276497257e-05, "loss": 1.4171, "step": 192000 }, { "epoch": 0.46, "learning_rate": 1.694578975133969e-05, "loss": 1.4015, "step": 192500 }, { "epoch": 0.46, "learning_rate": 1.6937856737706807e-05, "loss": 1.4109, "step": 193000 }, { "epoch": 0.46, "learning_rate": 1.6929923724073923e-05, "loss": 1.4151, "step": 193500 }, { "epoch": 0.46, "learning_rate": 1.6921990710441035e-05, "loss": 1.4171, "step": 194000 }, { "epoch": 0.46, "learning_rate": 1.691405769680815e-05, "loss": 1.4033, "step": 194500 }, { "epoch": 0.46, "learning_rate": 1.690612468317527e-05, "loss": 1.4259, "step": 195000 }, { "epoch": 0.47, "learning_rate": 1.6898191669542386e-05, "loss": 1.3964, "step": 195500 }, { "epoch": 0.47, "learning_rate": 1.6890258655909502e-05, "loss": 1.4096, "step": 196000 }, { "epoch": 0.47, "learning_rate": 1.6882325642276618e-05, "loss": 1.3983, "step": 196500 }, { "epoch": 0.47, "learning_rate": 1.6874392628643734e-05, "loss": 1.396, "step": 197000 }, { "epoch": 0.47, "learning_rate": 1.686645961501085e-05, "loss": 1.3992, "step": 197500 }, { "epoch": 0.47, "learning_rate": 1.6858526601377965e-05, "loss": 1.3996, "step": 198000 }, { "epoch": 0.47, "learning_rate": 1.685059358774508e-05, "loss": 1.3947, "step": 198500 }, { "epoch": 0.47, "learning_rate": 1.6842660574112197e-05, "loss": 1.4166, "step": 199000 }, { "epoch": 0.47, "learning_rate": 1.6834727560479313e-05, "loss": 1.403, "step": 199500 }, { "epoch": 0.48, "learning_rate": 1.682679454684643e-05, "loss": 1.4067, "step": 200000 }, { "epoch": 0.48, "learning_rate": 1.6818861533213545e-05, "loss": 1.3818, "step": 200500 }, { "epoch": 0.48, "learning_rate": 1.6810928519580664e-05, "loss": 1.425, "step": 201000 }, { "epoch": 0.48, "learning_rate": 1.680299550594778e-05, "loss": 1.4004, "step": 201500 }, { "epoch": 0.48, "learning_rate": 1.6795062492314896e-05, "loss": 1.3936, "step": 202000 }, { "epoch": 0.48, "learning_rate": 1.678712947868201e-05, "loss": 1.3815, "step": 202500 }, { "epoch": 0.48, "learning_rate": 1.6779196465049124e-05, "loss": 1.3987, "step": 203000 }, { "epoch": 0.48, "learning_rate": 1.6771263451416243e-05, "loss": 1.4018, "step": 203500 }, { "epoch": 0.49, "learning_rate": 1.676333043778336e-05, "loss": 1.3852, "step": 204000 }, { "epoch": 0.49, "learning_rate": 1.6755397424150475e-05, "loss": 1.4024, "step": 204500 }, { "epoch": 0.49, "learning_rate": 1.674746441051759e-05, "loss": 1.3827, "step": 205000 }, { "epoch": 0.49, "learning_rate": 1.6739531396884707e-05, "loss": 1.4046, "step": 205500 }, { "epoch": 0.49, "learning_rate": 1.6731598383251823e-05, "loss": 1.3966, "step": 206000 }, { "epoch": 0.49, "learning_rate": 1.672366536961894e-05, "loss": 1.3957, "step": 206500 }, { "epoch": 0.49, "learning_rate": 1.6715732355986054e-05, "loss": 1.4029, "step": 207000 }, { "epoch": 0.49, "learning_rate": 1.670779934235317e-05, "loss": 1.4036, "step": 207500 }, { "epoch": 0.5, "learning_rate": 1.6699866328720286e-05, "loss": 1.3773, "step": 208000 }, { "epoch": 0.5, "learning_rate": 1.6691933315087402e-05, "loss": 1.4042, "step": 208500 }, { "epoch": 0.5, "learning_rate": 1.6684000301454518e-05, "loss": 1.395, "step": 209000 }, { "epoch": 0.5, "learning_rate": 1.6676067287821637e-05, "loss": 1.3992, "step": 209500 }, { "epoch": 0.5, "learning_rate": 1.6668134274188753e-05, "loss": 1.404, "step": 210000 }, { "epoch": 0.5, "learning_rate": 1.666020126055587e-05, "loss": 1.4033, "step": 210500 }, { "epoch": 0.5, "learning_rate": 1.6652268246922985e-05, "loss": 1.3797, "step": 211000 }, { "epoch": 0.5, "learning_rate": 1.6644335233290097e-05, "loss": 1.3864, "step": 211500 }, { "epoch": 0.5, "learning_rate": 1.6636402219657216e-05, "loss": 1.4064, "step": 212000 }, { "epoch": 0.51, "learning_rate": 1.6628469206024332e-05, "loss": 1.3973, "step": 212500 }, { "epoch": 0.51, "learning_rate": 1.6620536192391448e-05, "loss": 1.3933, "step": 213000 }, { "epoch": 0.51, "learning_rate": 1.6612603178758564e-05, "loss": 1.3928, "step": 213500 }, { "epoch": 0.51, "learning_rate": 1.660467016512568e-05, "loss": 1.4047, "step": 214000 }, { "epoch": 0.51, "learning_rate": 1.6596737151492796e-05, "loss": 1.4135, "step": 214500 }, { "epoch": 0.51, "learning_rate": 1.658880413785991e-05, "loss": 1.3821, "step": 215000 }, { "epoch": 0.51, "learning_rate": 1.658087112422703e-05, "loss": 1.3826, "step": 215500 }, { "epoch": 0.51, "learning_rate": 1.6572938110594143e-05, "loss": 1.3868, "step": 216000 }, { "epoch": 0.52, "learning_rate": 1.656500509696126e-05, "loss": 1.407, "step": 216500 }, { "epoch": 0.52, "learning_rate": 1.6557072083328375e-05, "loss": 1.3846, "step": 217000 }, { "epoch": 0.52, "learning_rate": 1.654913906969549e-05, "loss": 1.3727, "step": 217500 }, { "epoch": 0.52, "learning_rate": 1.654120605606261e-05, "loss": 1.3815, "step": 218000 }, { "epoch": 0.52, "learning_rate": 1.6533273042429726e-05, "loss": 1.3885, "step": 218500 }, { "epoch": 0.52, "learning_rate": 1.6525340028796842e-05, "loss": 1.3858, "step": 219000 }, { "epoch": 0.52, "learning_rate": 1.6517407015163958e-05, "loss": 1.3936, "step": 219500 }, { "epoch": 0.52, "learning_rate": 1.6509474001531074e-05, "loss": 1.3923, "step": 220000 }, { "epoch": 0.52, "learning_rate": 1.650154098789819e-05, "loss": 1.3787, "step": 220500 }, { "epoch": 0.53, "learning_rate": 1.6493607974265305e-05, "loss": 1.3872, "step": 221000 }, { "epoch": 0.53, "learning_rate": 1.648567496063242e-05, "loss": 1.3694, "step": 221500 }, { "epoch": 0.53, "learning_rate": 1.6477741946999537e-05, "loss": 1.389, "step": 222000 }, { "epoch": 0.53, "learning_rate": 1.6469808933366653e-05, "loss": 1.3849, "step": 222500 }, { "epoch": 0.53, "learning_rate": 1.646187591973377e-05, "loss": 1.385, "step": 223000 }, { "epoch": 0.53, "learning_rate": 1.6453942906100885e-05, "loss": 1.3686, "step": 223500 }, { "epoch": 0.53, "learning_rate": 1.6446009892468004e-05, "loss": 1.3916, "step": 224000 }, { "epoch": 0.53, "learning_rate": 1.643807687883512e-05, "loss": 1.375, "step": 224500 }, { "epoch": 0.54, "learning_rate": 1.6430143865202232e-05, "loss": 1.3811, "step": 225000 }, { "epoch": 0.54, "learning_rate": 1.6422210851569348e-05, "loss": 1.3755, "step": 225500 }, { "epoch": 0.54, "learning_rate": 1.6414277837936464e-05, "loss": 1.381, "step": 226000 }, { "epoch": 0.54, "learning_rate": 1.6406344824303583e-05, "loss": 1.3781, "step": 226500 }, { "epoch": 0.54, "learning_rate": 1.63984118106707e-05, "loss": 1.3714, "step": 227000 }, { "epoch": 0.54, "learning_rate": 1.6390478797037815e-05, "loss": 1.3936, "step": 227500 }, { "epoch": 0.54, "learning_rate": 1.638254578340493e-05, "loss": 1.3711, "step": 228000 }, { "epoch": 0.54, "learning_rate": 1.6374612769772047e-05, "loss": 1.3872, "step": 228500 }, { "epoch": 0.54, "learning_rate": 1.6366679756139163e-05, "loss": 1.3851, "step": 229000 }, { "epoch": 0.55, "learning_rate": 1.635874674250628e-05, "loss": 1.3924, "step": 229500 }, { "epoch": 0.55, "learning_rate": 1.6350813728873394e-05, "loss": 1.381, "step": 230000 }, { "epoch": 0.55, "learning_rate": 1.634288071524051e-05, "loss": 1.3746, "step": 230500 }, { "epoch": 0.55, "learning_rate": 1.6334947701607626e-05, "loss": 1.3714, "step": 231000 }, { "epoch": 0.55, "learning_rate": 1.6327014687974742e-05, "loss": 1.386, "step": 231500 }, { "epoch": 0.55, "learning_rate": 1.6319081674341858e-05, "loss": 1.3823, "step": 232000 }, { "epoch": 0.55, "learning_rate": 1.6311148660708974e-05, "loss": 1.3791, "step": 232500 }, { "epoch": 0.55, "learning_rate": 1.6303215647076093e-05, "loss": 1.3683, "step": 233000 }, { "epoch": 0.56, "learning_rate": 1.629528263344321e-05, "loss": 1.3786, "step": 233500 }, { "epoch": 0.56, "learning_rate": 1.628734961981032e-05, "loss": 1.3739, "step": 234000 }, { "epoch": 0.56, "learning_rate": 1.6279416606177437e-05, "loss": 1.3765, "step": 234500 }, { "epoch": 0.56, "learning_rate": 1.6271483592544553e-05, "loss": 1.3808, "step": 235000 }, { "epoch": 0.56, "learning_rate": 1.6263550578911672e-05, "loss": 1.3843, "step": 235500 }, { "epoch": 0.56, "learning_rate": 1.6255617565278788e-05, "loss": 1.3607, "step": 236000 }, { "epoch": 0.56, "learning_rate": 1.6247684551645904e-05, "loss": 1.3586, "step": 236500 }, { "epoch": 0.56, "learning_rate": 1.623975153801302e-05, "loss": 1.4029, "step": 237000 }, { "epoch": 0.57, "learning_rate": 1.6231818524380136e-05, "loss": 1.3793, "step": 237500 }, { "epoch": 0.57, "learning_rate": 1.622388551074725e-05, "loss": 1.3644, "step": 238000 }, { "epoch": 0.57, "learning_rate": 1.6215952497114367e-05, "loss": 1.3749, "step": 238500 }, { "epoch": 0.57, "learning_rate": 1.6208019483481483e-05, "loss": 1.3771, "step": 239000 }, { "epoch": 0.57, "learning_rate": 1.62000864698486e-05, "loss": 1.3759, "step": 239500 }, { "epoch": 0.57, "learning_rate": 1.6192153456215715e-05, "loss": 1.3703, "step": 240000 }, { "epoch": 0.57, "learning_rate": 1.618422044258283e-05, "loss": 1.3821, "step": 240500 }, { "epoch": 0.57, "learning_rate": 1.6176287428949947e-05, "loss": 1.363, "step": 241000 }, { "epoch": 0.57, "learning_rate": 1.6168354415317066e-05, "loss": 1.3758, "step": 241500 }, { "epoch": 0.58, "learning_rate": 1.6160421401684182e-05, "loss": 1.3875, "step": 242000 }, { "epoch": 0.58, "learning_rate": 1.6152488388051298e-05, "loss": 1.3751, "step": 242500 }, { "epoch": 0.58, "learning_rate": 1.614455537441841e-05, "loss": 1.3703, "step": 243000 }, { "epoch": 0.58, "learning_rate": 1.6136622360785526e-05, "loss": 1.3672, "step": 243500 }, { "epoch": 0.58, "learning_rate": 1.6128689347152645e-05, "loss": 1.3743, "step": 244000 }, { "epoch": 0.58, "learning_rate": 1.612075633351976e-05, "loss": 1.3427, "step": 244500 }, { "epoch": 0.58, "learning_rate": 1.6112823319886877e-05, "loss": 1.3816, "step": 245000 }, { "epoch": 0.58, "learning_rate": 1.6104890306253993e-05, "loss": 1.381, "step": 245500 }, { "epoch": 0.59, "learning_rate": 1.609695729262111e-05, "loss": 1.3709, "step": 246000 }, { "epoch": 0.59, "learning_rate": 1.6089024278988225e-05, "loss": 1.3632, "step": 246500 }, { "epoch": 0.59, "learning_rate": 1.608109126535534e-05, "loss": 1.3776, "step": 247000 }, { "epoch": 0.59, "learning_rate": 1.6073158251722456e-05, "loss": 1.3771, "step": 247500 }, { "epoch": 0.59, "learning_rate": 1.6065225238089572e-05, "loss": 1.3684, "step": 248000 }, { "epoch": 0.59, "learning_rate": 1.6057292224456688e-05, "loss": 1.3799, "step": 248500 }, { "epoch": 0.59, "learning_rate": 1.6049359210823804e-05, "loss": 1.3803, "step": 249000 }, { "epoch": 0.59, "learning_rate": 1.604142619719092e-05, "loss": 1.3725, "step": 249500 }, { "epoch": 0.59, "learning_rate": 1.603349318355804e-05, "loss": 1.3825, "step": 250000 }, { "epoch": 0.6, "learning_rate": 1.6025560169925155e-05, "loss": 1.3642, "step": 250500 }, { "epoch": 0.6, "learning_rate": 1.601762715629227e-05, "loss": 1.3714, "step": 251000 }, { "epoch": 0.6, "learning_rate": 1.6009694142659383e-05, "loss": 1.3651, "step": 251500 }, { "epoch": 0.6, "learning_rate": 1.60017611290265e-05, "loss": 1.3768, "step": 252000 }, { "epoch": 0.6, "learning_rate": 1.599382811539362e-05, "loss": 1.3662, "step": 252500 }, { "epoch": 0.6, "learning_rate": 1.5985895101760734e-05, "loss": 1.3668, "step": 253000 }, { "epoch": 0.6, "learning_rate": 1.597796208812785e-05, "loss": 1.3642, "step": 253500 }, { "epoch": 0.6, "learning_rate": 1.5970029074494966e-05, "loss": 1.3711, "step": 254000 }, { "epoch": 0.61, "learning_rate": 1.5962096060862082e-05, "loss": 1.3713, "step": 254500 }, { "epoch": 0.61, "learning_rate": 1.5954163047229198e-05, "loss": 1.3604, "step": 255000 }, { "epoch": 0.61, "learning_rate": 1.5946230033596314e-05, "loss": 1.3728, "step": 255500 }, { "epoch": 0.61, "learning_rate": 1.593829701996343e-05, "loss": 1.3526, "step": 256000 }, { "epoch": 0.61, "learning_rate": 1.5930364006330545e-05, "loss": 1.3693, "step": 256500 }, { "epoch": 0.61, "learning_rate": 1.592243099269766e-05, "loss": 1.3555, "step": 257000 }, { "epoch": 0.61, "learning_rate": 1.5914497979064777e-05, "loss": 1.3622, "step": 257500 }, { "epoch": 0.61, "learning_rate": 1.5906564965431893e-05, "loss": 1.355, "step": 258000 }, { "epoch": 0.62, "learning_rate": 1.5898631951799012e-05, "loss": 1.3632, "step": 258500 }, { "epoch": 0.62, "learning_rate": 1.5890698938166128e-05, "loss": 1.3706, "step": 259000 }, { "epoch": 0.62, "learning_rate": 1.5882765924533244e-05, "loss": 1.3667, "step": 259500 }, { "epoch": 0.62, "learning_rate": 1.587483291090036e-05, "loss": 1.359, "step": 260000 }, { "epoch": 0.62, "learning_rate": 1.5866899897267472e-05, "loss": 1.3596, "step": 260500 }, { "epoch": 0.62, "learning_rate": 1.585896688363459e-05, "loss": 1.3723, "step": 261000 }, { "epoch": 0.62, "learning_rate": 1.5851033870001707e-05, "loss": 1.3712, "step": 261500 }, { "epoch": 0.62, "learning_rate": 1.5843100856368823e-05, "loss": 1.3611, "step": 262000 }, { "epoch": 0.62, "learning_rate": 1.583516784273594e-05, "loss": 1.3625, "step": 262500 }, { "epoch": 0.63, "learning_rate": 1.5827234829103055e-05, "loss": 1.3692, "step": 263000 }, { "epoch": 0.63, "learning_rate": 1.581930181547017e-05, "loss": 1.3352, "step": 263500 }, { "epoch": 0.63, "learning_rate": 1.5811368801837287e-05, "loss": 1.3738, "step": 264000 }, { "epoch": 0.63, "learning_rate": 1.5803435788204403e-05, "loss": 1.3665, "step": 264500 }, { "epoch": 0.63, "learning_rate": 1.579550277457152e-05, "loss": 1.3579, "step": 265000 }, { "epoch": 0.63, "learning_rate": 1.5787569760938634e-05, "loss": 1.38, "step": 265500 }, { "epoch": 0.63, "learning_rate": 1.577963674730575e-05, "loss": 1.3522, "step": 266000 }, { "epoch": 0.63, "learning_rate": 1.5771703733672866e-05, "loss": 1.3583, "step": 266500 }, { "epoch": 0.64, "learning_rate": 1.5763770720039985e-05, "loss": 1.3458, "step": 267000 }, { "epoch": 0.64, "learning_rate": 1.57558377064071e-05, "loss": 1.3676, "step": 267500 }, { "epoch": 0.64, "learning_rate": 1.5747904692774217e-05, "loss": 1.3463, "step": 268000 }, { "epoch": 0.64, "learning_rate": 1.5739971679141333e-05, "loss": 1.3685, "step": 268500 }, { "epoch": 0.64, "learning_rate": 1.573203866550845e-05, "loss": 1.3594, "step": 269000 }, { "epoch": 0.64, "learning_rate": 1.5724105651875565e-05, "loss": 1.355, "step": 269500 }, { "epoch": 0.64, "learning_rate": 1.571617263824268e-05, "loss": 1.3491, "step": 270000 }, { "epoch": 0.64, "learning_rate": 1.5708239624609796e-05, "loss": 1.3462, "step": 270500 }, { "epoch": 0.64, "learning_rate": 1.5700306610976912e-05, "loss": 1.3533, "step": 271000 }, { "epoch": 0.65, "learning_rate": 1.5692373597344028e-05, "loss": 1.3695, "step": 271500 }, { "epoch": 0.65, "learning_rate": 1.5684440583711144e-05, "loss": 1.3666, "step": 272000 }, { "epoch": 0.65, "learning_rate": 1.567650757007826e-05, "loss": 1.3446, "step": 272500 }, { "epoch": 0.65, "learning_rate": 1.5668574556445376e-05, "loss": 1.3747, "step": 273000 }, { "epoch": 0.65, "learning_rate": 1.5660641542812495e-05, "loss": 1.3456, "step": 273500 }, { "epoch": 0.65, "learning_rate": 1.5652708529179607e-05, "loss": 1.3613, "step": 274000 }, { "epoch": 0.65, "learning_rate": 1.5644775515546723e-05, "loss": 1.3537, "step": 274500 }, { "epoch": 0.65, "learning_rate": 1.563684250191384e-05, "loss": 1.3617, "step": 275000 }, { "epoch": 0.66, "learning_rate": 1.5628909488280955e-05, "loss": 1.3369, "step": 275500 }, { "epoch": 0.66, "learning_rate": 1.5620976474648074e-05, "loss": 1.3451, "step": 276000 }, { "epoch": 0.66, "learning_rate": 1.561304346101519e-05, "loss": 1.3559, "step": 276500 }, { "epoch": 0.66, "learning_rate": 1.5605110447382306e-05, "loss": 1.3487, "step": 277000 }, { "epoch": 0.66, "learning_rate": 1.5597177433749422e-05, "loss": 1.3382, "step": 277500 }, { "epoch": 0.66, "learning_rate": 1.5589244420116538e-05, "loss": 1.3394, "step": 278000 }, { "epoch": 0.66, "learning_rate": 1.5581311406483654e-05, "loss": 1.342, "step": 278500 }, { "epoch": 0.66, "learning_rate": 1.557337839285077e-05, "loss": 1.3467, "step": 279000 }, { "epoch": 0.67, "learning_rate": 1.5565445379217885e-05, "loss": 1.3409, "step": 279500 }, { "epoch": 0.67, "learning_rate": 1.5557512365585e-05, "loss": 1.355, "step": 280000 }, { "epoch": 0.67, "learning_rate": 1.5549579351952117e-05, "loss": 1.3444, "step": 280500 }, { "epoch": 0.67, "learning_rate": 1.5541646338319233e-05, "loss": 1.3609, "step": 281000 }, { "epoch": 0.67, "learning_rate": 1.553371332468635e-05, "loss": 1.3515, "step": 281500 }, { "epoch": 0.67, "learning_rate": 1.5525780311053468e-05, "loss": 1.3388, "step": 282000 }, { "epoch": 0.67, "learning_rate": 1.5517847297420584e-05, "loss": 1.3392, "step": 282500 }, { "epoch": 0.67, "learning_rate": 1.5509914283787696e-05, "loss": 1.3389, "step": 283000 }, { "epoch": 0.67, "learning_rate": 1.5501981270154812e-05, "loss": 1.3493, "step": 283500 }, { "epoch": 0.68, "learning_rate": 1.5494048256521928e-05, "loss": 1.3489, "step": 284000 }, { "epoch": 0.68, "learning_rate": 1.5486115242889047e-05, "loss": 1.3458, "step": 284500 }, { "epoch": 0.68, "learning_rate": 1.5478182229256163e-05, "loss": 1.346, "step": 285000 }, { "epoch": 0.68, "learning_rate": 1.547024921562328e-05, "loss": 1.3303, "step": 285500 }, { "epoch": 0.68, "learning_rate": 1.5462316201990395e-05, "loss": 1.3384, "step": 286000 }, { "epoch": 0.68, "learning_rate": 1.545438318835751e-05, "loss": 1.3537, "step": 286500 }, { "epoch": 0.68, "learning_rate": 1.5446450174724627e-05, "loss": 1.3511, "step": 287000 }, { "epoch": 0.68, "learning_rate": 1.5438517161091743e-05, "loss": 1.3256, "step": 287500 }, { "epoch": 0.69, "learning_rate": 1.543058414745886e-05, "loss": 1.3465, "step": 288000 }, { "epoch": 0.69, "learning_rate": 1.5422651133825974e-05, "loss": 1.3329, "step": 288500 }, { "epoch": 0.69, "learning_rate": 1.541471812019309e-05, "loss": 1.3549, "step": 289000 }, { "epoch": 0.69, "learning_rate": 1.5406785106560206e-05, "loss": 1.3458, "step": 289500 }, { "epoch": 0.69, "learning_rate": 1.5398852092927322e-05, "loss": 1.3359, "step": 290000 }, { "epoch": 0.69, "learning_rate": 1.539091907929444e-05, "loss": 1.3343, "step": 290500 }, { "epoch": 0.69, "learning_rate": 1.5382986065661557e-05, "loss": 1.3617, "step": 291000 }, { "epoch": 0.69, "learning_rate": 1.5375053052028673e-05, "loss": 1.3335, "step": 291500 }, { "epoch": 0.69, "learning_rate": 1.5367120038395785e-05, "loss": 1.3508, "step": 292000 }, { "epoch": 0.7, "learning_rate": 1.53591870247629e-05, "loss": 1.3492, "step": 292500 }, { "epoch": 0.7, "learning_rate": 1.535125401113002e-05, "loss": 1.3382, "step": 293000 }, { "epoch": 0.7, "learning_rate": 1.5343320997497136e-05, "loss": 1.3423, "step": 293500 }, { "epoch": 0.7, "learning_rate": 1.5335387983864252e-05, "loss": 1.3388, "step": 294000 }, { "epoch": 0.7, "learning_rate": 1.5327454970231368e-05, "loss": 1.3606, "step": 294500 }, { "epoch": 0.7, "learning_rate": 1.5319521956598484e-05, "loss": 1.3439, "step": 295000 }, { "epoch": 0.7, "learning_rate": 1.53115889429656e-05, "loss": 1.3292, "step": 295500 }, { "epoch": 0.7, "learning_rate": 1.5303655929332716e-05, "loss": 1.341, "step": 296000 }, { "epoch": 0.71, "learning_rate": 1.529572291569983e-05, "loss": 1.3289, "step": 296500 }, { "epoch": 0.71, "learning_rate": 1.5287789902066947e-05, "loss": 1.3151, "step": 297000 }, { "epoch": 0.71, "learning_rate": 1.5279856888434063e-05, "loss": 1.3513, "step": 297500 }, { "epoch": 0.71, "learning_rate": 1.527192387480118e-05, "loss": 1.3459, "step": 298000 }, { "epoch": 0.71, "learning_rate": 1.5263990861168295e-05, "loss": 1.3493, "step": 298500 }, { "epoch": 0.71, "learning_rate": 1.5256057847535413e-05, "loss": 1.331, "step": 299000 }, { "epoch": 0.71, "learning_rate": 1.5248124833902528e-05, "loss": 1.342, "step": 299500 }, { "epoch": 0.71, "learning_rate": 1.5240191820269646e-05, "loss": 1.3355, "step": 300000 }, { "epoch": 0.72, "learning_rate": 1.523225880663676e-05, "loss": 1.3206, "step": 300500 }, { "epoch": 0.72, "learning_rate": 1.5224325793003876e-05, "loss": 1.3463, "step": 301000 }, { "epoch": 0.72, "learning_rate": 1.5216392779370992e-05, "loss": 1.3243, "step": 301500 }, { "epoch": 0.72, "learning_rate": 1.5208459765738108e-05, "loss": 1.3299, "step": 302000 }, { "epoch": 0.72, "learning_rate": 1.5200526752105225e-05, "loss": 1.3552, "step": 302500 }, { "epoch": 0.72, "learning_rate": 1.5192593738472341e-05, "loss": 1.3497, "step": 303000 }, { "epoch": 0.72, "learning_rate": 1.5184660724839457e-05, "loss": 1.335, "step": 303500 }, { "epoch": 0.72, "learning_rate": 1.5176727711206573e-05, "loss": 1.3517, "step": 304000 }, { "epoch": 0.72, "learning_rate": 1.516879469757369e-05, "loss": 1.3291, "step": 304500 }, { "epoch": 0.73, "learning_rate": 1.5160861683940805e-05, "loss": 1.351, "step": 305000 }, { "epoch": 0.73, "learning_rate": 1.515292867030792e-05, "loss": 1.3385, "step": 305500 }, { "epoch": 0.73, "learning_rate": 1.5144995656675036e-05, "loss": 1.3322, "step": 306000 }, { "epoch": 0.73, "learning_rate": 1.5137062643042152e-05, "loss": 1.3398, "step": 306500 }, { "epoch": 0.73, "learning_rate": 1.512912962940927e-05, "loss": 1.3344, "step": 307000 }, { "epoch": 0.73, "learning_rate": 1.5121196615776386e-05, "loss": 1.3396, "step": 307500 }, { "epoch": 0.73, "learning_rate": 1.5113263602143501e-05, "loss": 1.3425, "step": 308000 }, { "epoch": 0.73, "learning_rate": 1.5105330588510619e-05, "loss": 1.3207, "step": 308500 }, { "epoch": 0.74, "learning_rate": 1.5097397574877735e-05, "loss": 1.3348, "step": 309000 }, { "epoch": 0.74, "learning_rate": 1.5089464561244849e-05, "loss": 1.3415, "step": 309500 }, { "epoch": 0.74, "learning_rate": 1.5081531547611965e-05, "loss": 1.3427, "step": 310000 }, { "epoch": 0.74, "learning_rate": 1.507359853397908e-05, "loss": 1.3465, "step": 310500 }, { "epoch": 0.74, "learning_rate": 1.5065665520346198e-05, "loss": 1.329, "step": 311000 }, { "epoch": 0.74, "learning_rate": 1.5057732506713314e-05, "loss": 1.3409, "step": 311500 }, { "epoch": 0.74, "learning_rate": 1.504979949308043e-05, "loss": 1.3252, "step": 312000 }, { "epoch": 0.74, "learning_rate": 1.5041866479447546e-05, "loss": 1.3378, "step": 312500 }, { "epoch": 0.74, "learning_rate": 1.5033933465814663e-05, "loss": 1.3381, "step": 313000 }, { "epoch": 0.75, "learning_rate": 1.502600045218178e-05, "loss": 1.3467, "step": 313500 }, { "epoch": 0.75, "learning_rate": 1.5018067438548894e-05, "loss": 1.3331, "step": 314000 }, { "epoch": 0.75, "learning_rate": 1.501013442491601e-05, "loss": 1.3487, "step": 314500 }, { "epoch": 0.75, "learning_rate": 1.5002201411283125e-05, "loss": 1.3421, "step": 315000 }, { "epoch": 0.75, "learning_rate": 1.4994268397650243e-05, "loss": 1.3232, "step": 315500 }, { "epoch": 0.75, "learning_rate": 1.4986335384017359e-05, "loss": 1.3304, "step": 316000 }, { "epoch": 0.75, "learning_rate": 1.4978402370384475e-05, "loss": 1.3374, "step": 316500 }, { "epoch": 0.75, "learning_rate": 1.4970469356751592e-05, "loss": 1.333, "step": 317000 }, { "epoch": 0.76, "learning_rate": 1.4962536343118708e-05, "loss": 1.3221, "step": 317500 }, { "epoch": 0.76, "learning_rate": 1.4954603329485824e-05, "loss": 1.3317, "step": 318000 }, { "epoch": 0.76, "learning_rate": 1.4946670315852938e-05, "loss": 1.3269, "step": 318500 }, { "epoch": 0.76, "learning_rate": 1.4938737302220054e-05, "loss": 1.318, "step": 319000 }, { "epoch": 0.76, "learning_rate": 1.4930804288587171e-05, "loss": 1.3188, "step": 319500 }, { "epoch": 0.76, "learning_rate": 1.4922871274954287e-05, "loss": 1.3333, "step": 320000 }, { "epoch": 0.76, "learning_rate": 1.4914938261321403e-05, "loss": 1.3207, "step": 320500 }, { "epoch": 0.76, "learning_rate": 1.4907005247688519e-05, "loss": 1.3166, "step": 321000 }, { "epoch": 0.77, "learning_rate": 1.4899072234055637e-05, "loss": 1.3143, "step": 321500 }, { "epoch": 0.77, "learning_rate": 1.4891139220422752e-05, "loss": 1.3376, "step": 322000 }, { "epoch": 0.77, "learning_rate": 1.4883206206789868e-05, "loss": 1.3167, "step": 322500 }, { "epoch": 0.77, "learning_rate": 1.4875273193156982e-05, "loss": 1.3148, "step": 323000 }, { "epoch": 0.77, "learning_rate": 1.4867340179524098e-05, "loss": 1.3274, "step": 323500 }, { "epoch": 0.77, "learning_rate": 1.4859407165891216e-05, "loss": 1.3247, "step": 324000 }, { "epoch": 0.77, "learning_rate": 1.4851474152258332e-05, "loss": 1.3306, "step": 324500 }, { "epoch": 0.77, "learning_rate": 1.4843541138625448e-05, "loss": 1.3114, "step": 325000 }, { "epoch": 0.77, "learning_rate": 1.4835608124992564e-05, "loss": 1.342, "step": 325500 }, { "epoch": 0.78, "learning_rate": 1.4827675111359681e-05, "loss": 1.3312, "step": 326000 }, { "epoch": 0.78, "learning_rate": 1.4819742097726797e-05, "loss": 1.3318, "step": 326500 }, { "epoch": 0.78, "learning_rate": 1.4811809084093913e-05, "loss": 1.328, "step": 327000 }, { "epoch": 0.78, "learning_rate": 1.4803876070461027e-05, "loss": 1.3375, "step": 327500 }, { "epoch": 0.78, "learning_rate": 1.4795943056828145e-05, "loss": 1.3197, "step": 328000 }, { "epoch": 0.78, "learning_rate": 1.478801004319526e-05, "loss": 1.3103, "step": 328500 }, { "epoch": 0.78, "learning_rate": 1.4780077029562376e-05, "loss": 1.3349, "step": 329000 }, { "epoch": 0.78, "learning_rate": 1.4772144015929492e-05, "loss": 1.3162, "step": 329500 }, { "epoch": 0.79, "learning_rate": 1.476421100229661e-05, "loss": 1.3056, "step": 330000 }, { "epoch": 0.79, "learning_rate": 1.4756277988663726e-05, "loss": 1.3276, "step": 330500 }, { "epoch": 0.79, "learning_rate": 1.4748344975030841e-05, "loss": 1.3215, "step": 331000 }, { "epoch": 0.79, "learning_rate": 1.4740411961397957e-05, "loss": 1.3295, "step": 331500 }, { "epoch": 0.79, "learning_rate": 1.4732478947765071e-05, "loss": 1.3234, "step": 332000 }, { "epoch": 0.79, "learning_rate": 1.4724545934132189e-05, "loss": 1.3274, "step": 332500 }, { "epoch": 0.79, "learning_rate": 1.4716612920499305e-05, "loss": 1.3364, "step": 333000 }, { "epoch": 0.79, "learning_rate": 1.470867990686642e-05, "loss": 1.3036, "step": 333500 }, { "epoch": 0.79, "learning_rate": 1.4700746893233537e-05, "loss": 1.3256, "step": 334000 }, { "epoch": 0.8, "learning_rate": 1.4692813879600654e-05, "loss": 1.3096, "step": 334500 }, { "epoch": 0.8, "learning_rate": 1.468488086596777e-05, "loss": 1.339, "step": 335000 }, { "epoch": 0.8, "learning_rate": 1.4676947852334886e-05, "loss": 1.328, "step": 335500 }, { "epoch": 0.8, "learning_rate": 1.4669014838702003e-05, "loss": 1.3228, "step": 336000 }, { "epoch": 0.8, "learning_rate": 1.4661081825069116e-05, "loss": 1.3213, "step": 336500 }, { "epoch": 0.8, "learning_rate": 1.4653148811436233e-05, "loss": 1.332, "step": 337000 }, { "epoch": 0.8, "learning_rate": 1.464521579780335e-05, "loss": 1.3262, "step": 337500 }, { "epoch": 0.8, "learning_rate": 1.4637282784170465e-05, "loss": 1.3213, "step": 338000 }, { "epoch": 0.81, "learning_rate": 1.4629349770537583e-05, "loss": 1.3289, "step": 338500 }, { "epoch": 0.81, "learning_rate": 1.4621416756904699e-05, "loss": 1.3137, "step": 339000 }, { "epoch": 0.81, "learning_rate": 1.4613483743271814e-05, "loss": 1.3191, "step": 339500 }, { "epoch": 0.81, "learning_rate": 1.460555072963893e-05, "loss": 1.3242, "step": 340000 }, { "epoch": 0.81, "learning_rate": 1.4597617716006045e-05, "loss": 1.3244, "step": 340500 }, { "epoch": 0.81, "learning_rate": 1.4589684702373162e-05, "loss": 1.3339, "step": 341000 }, { "epoch": 0.81, "learning_rate": 1.4581751688740278e-05, "loss": 1.3387, "step": 341500 }, { "epoch": 0.81, "learning_rate": 1.4573818675107394e-05, "loss": 1.317, "step": 342000 }, { "epoch": 0.82, "learning_rate": 1.456588566147451e-05, "loss": 1.3094, "step": 342500 }, { "epoch": 0.82, "learning_rate": 1.4557952647841627e-05, "loss": 1.322, "step": 343000 }, { "epoch": 0.82, "learning_rate": 1.4550019634208743e-05, "loss": 1.3226, "step": 343500 }, { "epoch": 0.82, "learning_rate": 1.4542086620575859e-05, "loss": 1.3209, "step": 344000 }, { "epoch": 0.82, "learning_rate": 1.4534153606942975e-05, "loss": 1.3332, "step": 344500 }, { "epoch": 0.82, "learning_rate": 1.4526220593310089e-05, "loss": 1.3088, "step": 345000 }, { "epoch": 0.82, "learning_rate": 1.4518287579677207e-05, "loss": 1.3244, "step": 345500 }, { "epoch": 0.82, "learning_rate": 1.4510354566044322e-05, "loss": 1.323, "step": 346000 }, { "epoch": 0.82, "learning_rate": 1.4502421552411438e-05, "loss": 1.3268, "step": 346500 }, { "epoch": 0.83, "learning_rate": 1.4494488538778554e-05, "loss": 1.3168, "step": 347000 }, { "epoch": 0.83, "learning_rate": 1.4486555525145672e-05, "loss": 1.3126, "step": 347500 }, { "epoch": 0.83, "learning_rate": 1.4478622511512788e-05, "loss": 1.3337, "step": 348000 }, { "epoch": 0.83, "learning_rate": 1.4470689497879903e-05, "loss": 1.3058, "step": 348500 }, { "epoch": 0.83, "learning_rate": 1.4462756484247021e-05, "loss": 1.3144, "step": 349000 }, { "epoch": 0.83, "learning_rate": 1.4454823470614135e-05, "loss": 1.2985, "step": 349500 }, { "epoch": 0.83, "learning_rate": 1.4446890456981251e-05, "loss": 1.3195, "step": 350000 }, { "epoch": 0.83, "learning_rate": 1.4438957443348367e-05, "loss": 1.3055, "step": 350500 }, { "epoch": 0.84, "learning_rate": 1.4431024429715483e-05, "loss": 1.3234, "step": 351000 }, { "epoch": 0.84, "learning_rate": 1.44230914160826e-05, "loss": 1.314, "step": 351500 }, { "epoch": 0.84, "learning_rate": 1.4415158402449716e-05, "loss": 1.3196, "step": 352000 }, { "epoch": 0.84, "learning_rate": 1.4407225388816832e-05, "loss": 1.3052, "step": 352500 }, { "epoch": 0.84, "learning_rate": 1.4399292375183948e-05, "loss": 1.3182, "step": 353000 }, { "epoch": 0.84, "learning_rate": 1.4391359361551065e-05, "loss": 1.3213, "step": 353500 }, { "epoch": 0.84, "learning_rate": 1.438342634791818e-05, "loss": 1.2952, "step": 354000 }, { "epoch": 0.84, "learning_rate": 1.4375493334285296e-05, "loss": 1.3257, "step": 354500 }, { "epoch": 0.84, "learning_rate": 1.4367560320652411e-05, "loss": 1.3322, "step": 355000 }, { "epoch": 0.85, "learning_rate": 1.4359627307019527e-05, "loss": 1.3058, "step": 355500 }, { "epoch": 0.85, "learning_rate": 1.4351694293386645e-05, "loss": 1.2913, "step": 356000 }, { "epoch": 0.85, "learning_rate": 1.434376127975376e-05, "loss": 1.3154, "step": 356500 }, { "epoch": 0.85, "learning_rate": 1.4335828266120877e-05, "loss": 1.3069, "step": 357000 }, { "epoch": 0.85, "learning_rate": 1.4327895252487994e-05, "loss": 1.3171, "step": 357500 }, { "epoch": 0.85, "learning_rate": 1.431996223885511e-05, "loss": 1.3136, "step": 358000 }, { "epoch": 0.85, "learning_rate": 1.4312029225222224e-05, "loss": 1.3125, "step": 358500 }, { "epoch": 0.85, "learning_rate": 1.430409621158934e-05, "loss": 1.3019, "step": 359000 }, { "epoch": 0.86, "learning_rate": 1.4296163197956456e-05, "loss": 1.3075, "step": 359500 }, { "epoch": 0.86, "learning_rate": 1.4288230184323573e-05, "loss": 1.2988, "step": 360000 }, { "epoch": 0.86, "learning_rate": 1.428029717069069e-05, "loss": 1.307, "step": 360500 }, { "epoch": 0.86, "learning_rate": 1.4272364157057805e-05, "loss": 1.3014, "step": 361000 }, { "epoch": 0.86, "learning_rate": 1.4264431143424921e-05, "loss": 1.2961, "step": 361500 }, { "epoch": 0.86, "learning_rate": 1.4256498129792039e-05, "loss": 1.293, "step": 362000 }, { "epoch": 0.86, "learning_rate": 1.4248565116159154e-05, "loss": 1.3178, "step": 362500 }, { "epoch": 0.86, "learning_rate": 1.4240632102526269e-05, "loss": 1.3029, "step": 363000 }, { "epoch": 0.87, "learning_rate": 1.4232699088893384e-05, "loss": 1.2999, "step": 363500 }, { "epoch": 0.87, "learning_rate": 1.42247660752605e-05, "loss": 1.312, "step": 364000 }, { "epoch": 0.87, "learning_rate": 1.4216833061627618e-05, "loss": 1.3113, "step": 364500 }, { "epoch": 0.87, "learning_rate": 1.4208900047994734e-05, "loss": 1.3054, "step": 365000 }, { "epoch": 0.87, "learning_rate": 1.420096703436185e-05, "loss": 1.3112, "step": 365500 }, { "epoch": 0.87, "learning_rate": 1.4193034020728966e-05, "loss": 1.3033, "step": 366000 }, { "epoch": 0.87, "learning_rate": 1.4185101007096083e-05, "loss": 1.3126, "step": 366500 }, { "epoch": 0.87, "learning_rate": 1.4177167993463199e-05, "loss": 1.2953, "step": 367000 }, { "epoch": 0.87, "learning_rate": 1.4169234979830313e-05, "loss": 1.3032, "step": 367500 }, { "epoch": 0.88, "learning_rate": 1.4161301966197429e-05, "loss": 1.3037, "step": 368000 }, { "epoch": 0.88, "learning_rate": 1.4153368952564547e-05, "loss": 1.3037, "step": 368500 }, { "epoch": 0.88, "learning_rate": 1.4145435938931662e-05, "loss": 1.3157, "step": 369000 }, { "epoch": 0.88, "learning_rate": 1.4137502925298778e-05, "loss": 1.2962, "step": 369500 }, { "epoch": 0.88, "learning_rate": 1.4129569911665894e-05, "loss": 1.3041, "step": 370000 }, { "epoch": 0.88, "learning_rate": 1.4121636898033012e-05, "loss": 1.3162, "step": 370500 }, { "epoch": 0.88, "learning_rate": 1.4113703884400128e-05, "loss": 1.3037, "step": 371000 }, { "epoch": 0.88, "learning_rate": 1.4105770870767243e-05, "loss": 1.3072, "step": 371500 }, { "epoch": 0.89, "learning_rate": 1.4097837857134358e-05, "loss": 1.2938, "step": 372000 }, { "epoch": 0.89, "learning_rate": 1.4089904843501473e-05, "loss": 1.3014, "step": 372500 }, { "epoch": 0.89, "learning_rate": 1.4081971829868591e-05, "loss": 1.3023, "step": 373000 }, { "epoch": 0.89, "learning_rate": 1.4074038816235707e-05, "loss": 1.3017, "step": 373500 }, { "epoch": 0.89, "learning_rate": 1.4066105802602823e-05, "loss": 1.3143, "step": 374000 }, { "epoch": 0.89, "learning_rate": 1.4058172788969939e-05, "loss": 1.31, "step": 374500 }, { "epoch": 0.89, "learning_rate": 1.4050239775337056e-05, "loss": 1.3034, "step": 375000 }, { "epoch": 0.89, "learning_rate": 1.4042306761704172e-05, "loss": 1.3064, "step": 375500 }, { "epoch": 0.89, "learning_rate": 1.4034373748071288e-05, "loss": 1.3006, "step": 376000 }, { "epoch": 0.9, "learning_rate": 1.4026440734438402e-05, "loss": 1.3022, "step": 376500 }, { "epoch": 0.9, "learning_rate": 1.4018507720805518e-05, "loss": 1.2904, "step": 377000 }, { "epoch": 0.9, "learning_rate": 1.4010574707172635e-05, "loss": 1.3114, "step": 377500 }, { "epoch": 0.9, "learning_rate": 1.4002641693539751e-05, "loss": 1.3129, "step": 378000 }, { "epoch": 0.9, "learning_rate": 1.3994708679906867e-05, "loss": 1.3128, "step": 378500 }, { "epoch": 0.9, "learning_rate": 1.3986775666273985e-05, "loss": 1.3276, "step": 379000 }, { "epoch": 0.9, "learning_rate": 1.39788426526411e-05, "loss": 1.3104, "step": 379500 }, { "epoch": 0.9, "learning_rate": 1.3970909639008216e-05, "loss": 1.3133, "step": 380000 }, { "epoch": 0.91, "learning_rate": 1.3962976625375332e-05, "loss": 1.3067, "step": 380500 }, { "epoch": 0.91, "learning_rate": 1.3955043611742447e-05, "loss": 1.288, "step": 381000 }, { "epoch": 0.91, "learning_rate": 1.3947110598109564e-05, "loss": 1.2915, "step": 381500 }, { "epoch": 0.91, "learning_rate": 1.393917758447668e-05, "loss": 1.3016, "step": 382000 }, { "epoch": 0.91, "learning_rate": 1.3931244570843796e-05, "loss": 1.3032, "step": 382500 }, { "epoch": 0.91, "learning_rate": 1.3923311557210912e-05, "loss": 1.3005, "step": 383000 }, { "epoch": 0.91, "learning_rate": 1.391537854357803e-05, "loss": 1.2996, "step": 383500 }, { "epoch": 0.91, "learning_rate": 1.3907445529945145e-05, "loss": 1.2896, "step": 384000 }, { "epoch": 0.92, "learning_rate": 1.3899512516312261e-05, "loss": 1.3101, "step": 384500 }, { "epoch": 0.92, "learning_rate": 1.3891579502679375e-05, "loss": 1.2886, "step": 385000 }, { "epoch": 0.92, "learning_rate": 1.3883646489046491e-05, "loss": 1.295, "step": 385500 }, { "epoch": 0.92, "learning_rate": 1.3875713475413609e-05, "loss": 1.3001, "step": 386000 }, { "epoch": 0.92, "learning_rate": 1.3867780461780724e-05, "loss": 1.3173, "step": 386500 }, { "epoch": 0.92, "learning_rate": 1.385984744814784e-05, "loss": 1.3049, "step": 387000 }, { "epoch": 0.92, "learning_rate": 1.3851914434514956e-05, "loss": 1.2951, "step": 387500 }, { "epoch": 0.92, "learning_rate": 1.3843981420882074e-05, "loss": 1.3051, "step": 388000 }, { "epoch": 0.92, "learning_rate": 1.383604840724919e-05, "loss": 1.2801, "step": 388500 }, { "epoch": 0.93, "learning_rate": 1.3828115393616305e-05, "loss": 1.2945, "step": 389000 }, { "epoch": 0.93, "learning_rate": 1.382018237998342e-05, "loss": 1.3094, "step": 389500 }, { "epoch": 0.93, "learning_rate": 1.3812249366350537e-05, "loss": 1.2932, "step": 390000 }, { "epoch": 0.93, "learning_rate": 1.3804316352717653e-05, "loss": 1.3115, "step": 390500 }, { "epoch": 0.93, "learning_rate": 1.3796383339084769e-05, "loss": 1.2897, "step": 391000 }, { "epoch": 0.93, "learning_rate": 1.3788450325451885e-05, "loss": 1.3055, "step": 391500 }, { "epoch": 0.93, "learning_rate": 1.3780517311819002e-05, "loss": 1.2972, "step": 392000 }, { "epoch": 0.93, "learning_rate": 1.3772584298186118e-05, "loss": 1.306, "step": 392500 }, { "epoch": 0.94, "learning_rate": 1.3764651284553234e-05, "loss": 1.2972, "step": 393000 }, { "epoch": 0.94, "learning_rate": 1.375671827092035e-05, "loss": 1.3171, "step": 393500 }, { "epoch": 0.94, "learning_rate": 1.3748785257287464e-05, "loss": 1.2894, "step": 394000 }, { "epoch": 0.94, "learning_rate": 1.3740852243654582e-05, "loss": 1.2987, "step": 394500 }, { "epoch": 0.94, "learning_rate": 1.3732919230021698e-05, "loss": 1.3164, "step": 395000 }, { "epoch": 0.94, "learning_rate": 1.3724986216388813e-05, "loss": 1.2989, "step": 395500 }, { "epoch": 0.94, "learning_rate": 1.371705320275593e-05, "loss": 1.2899, "step": 396000 }, { "epoch": 0.94, "learning_rate": 1.3709120189123047e-05, "loss": 1.2895, "step": 396500 }, { "epoch": 0.94, "learning_rate": 1.3701187175490163e-05, "loss": 1.3058, "step": 397000 }, { "epoch": 0.95, "learning_rate": 1.3693254161857279e-05, "loss": 1.3147, "step": 397500 }, { "epoch": 0.95, "learning_rate": 1.3685321148224396e-05, "loss": 1.3051, "step": 398000 }, { "epoch": 0.95, "learning_rate": 1.3677388134591509e-05, "loss": 1.3063, "step": 398500 }, { "epoch": 0.95, "learning_rate": 1.3669455120958626e-05, "loss": 1.2834, "step": 399000 }, { "epoch": 0.95, "learning_rate": 1.3661522107325742e-05, "loss": 1.3036, "step": 399500 }, { "epoch": 0.95, "learning_rate": 1.3653589093692858e-05, "loss": 1.2919, "step": 400000 }, { "epoch": 0.95, "learning_rate": 1.3645656080059975e-05, "loss": 1.2906, "step": 400500 }, { "epoch": 0.95, "learning_rate": 1.3637723066427091e-05, "loss": 1.3035, "step": 401000 }, { "epoch": 0.96, "learning_rate": 1.3629790052794207e-05, "loss": 1.3112, "step": 401500 }, { "epoch": 0.96, "learning_rate": 1.3621857039161323e-05, "loss": 1.3068, "step": 402000 }, { "epoch": 0.96, "learning_rate": 1.361392402552844e-05, "loss": 1.2963, "step": 402500 }, { "epoch": 0.96, "learning_rate": 1.3605991011895555e-05, "loss": 1.283, "step": 403000 }, { "epoch": 0.96, "learning_rate": 1.359805799826267e-05, "loss": 1.2828, "step": 403500 }, { "epoch": 0.96, "learning_rate": 1.3590124984629786e-05, "loss": 1.306, "step": 404000 }, { "epoch": 0.96, "learning_rate": 1.3582191970996902e-05, "loss": 1.2986, "step": 404500 }, { "epoch": 0.96, "learning_rate": 1.357425895736402e-05, "loss": 1.2872, "step": 405000 }, { "epoch": 0.97, "learning_rate": 1.3566325943731136e-05, "loss": 1.2923, "step": 405500 }, { "epoch": 0.97, "learning_rate": 1.3558392930098252e-05, "loss": 1.2967, "step": 406000 }, { "epoch": 0.97, "learning_rate": 1.3550459916465367e-05, "loss": 1.2779, "step": 406500 }, { "epoch": 0.97, "learning_rate": 1.3542526902832485e-05, "loss": 1.2828, "step": 407000 }, { "epoch": 0.97, "learning_rate": 1.35345938891996e-05, "loss": 1.2953, "step": 407500 }, { "epoch": 0.97, "learning_rate": 1.3526660875566715e-05, "loss": 1.2748, "step": 408000 }, { "epoch": 0.97, "learning_rate": 1.3518727861933831e-05, "loss": 1.2979, "step": 408500 }, { "epoch": 0.97, "learning_rate": 1.3510794848300949e-05, "loss": 1.2942, "step": 409000 }, { "epoch": 0.97, "learning_rate": 1.3502861834668064e-05, "loss": 1.3034, "step": 409500 }, { "epoch": 0.98, "learning_rate": 1.349492882103518e-05, "loss": 1.3038, "step": 410000 }, { "epoch": 0.98, "learning_rate": 1.3486995807402296e-05, "loss": 1.3012, "step": 410500 }, { "epoch": 0.98, "learning_rate": 1.3479062793769414e-05, "loss": 1.307, "step": 411000 }, { "epoch": 0.98, "learning_rate": 1.347112978013653e-05, "loss": 1.296, "step": 411500 }, { "epoch": 0.98, "learning_rate": 1.3463196766503644e-05, "loss": 1.3014, "step": 412000 }, { "epoch": 0.98, "learning_rate": 1.345526375287076e-05, "loss": 1.2976, "step": 412500 }, { "epoch": 0.98, "learning_rate": 1.3447330739237875e-05, "loss": 1.3056, "step": 413000 }, { "epoch": 0.98, "learning_rate": 1.3439397725604993e-05, "loss": 1.2974, "step": 413500 }, { "epoch": 0.99, "learning_rate": 1.3431464711972109e-05, "loss": 1.287, "step": 414000 }, { "epoch": 0.99, "learning_rate": 1.3423531698339225e-05, "loss": 1.2929, "step": 414500 }, { "epoch": 0.99, "learning_rate": 1.341559868470634e-05, "loss": 1.2903, "step": 415000 }, { "epoch": 0.99, "learning_rate": 1.3407665671073458e-05, "loss": 1.2967, "step": 415500 }, { "epoch": 0.99, "learning_rate": 1.3399732657440574e-05, "loss": 1.2881, "step": 416000 }, { "epoch": 0.99, "learning_rate": 1.3391799643807688e-05, "loss": 1.2844, "step": 416500 }, { "epoch": 0.99, "learning_rate": 1.3383866630174804e-05, "loss": 1.2803, "step": 417000 }, { "epoch": 0.99, "learning_rate": 1.337593361654192e-05, "loss": 1.2975, "step": 417500 }, { "epoch": 0.99, "learning_rate": 1.3368000602909037e-05, "loss": 1.2977, "step": 418000 }, { "epoch": 1.0, "learning_rate": 1.3360067589276153e-05, "loss": 1.2968, "step": 418500 }, { "epoch": 1.0, "learning_rate": 1.335213457564327e-05, "loss": 1.2808, "step": 419000 }, { "epoch": 1.0, "learning_rate": 1.3344201562010387e-05, "loss": 1.291, "step": 419500 }, { "epoch": 1.0, "learning_rate": 1.3336268548377503e-05, "loss": 1.2857, "step": 420000 }, { "epoch": 1.0, "eval_loss": 1.2632273435592651, "eval_runtime": 3622.7986, "eval_samples_per_second": 366.442, "eval_steps_per_second": 22.903, "step": 420185 }, { "epoch": 1.0, "learning_rate": 1.3328335534744618e-05, "loss": 1.2928, "step": 420500 }, { "epoch": 1.0, "learning_rate": 1.3320402521111733e-05, "loss": 1.274, "step": 421000 }, { "epoch": 1.0, "learning_rate": 1.3312469507478849e-05, "loss": 1.2829, "step": 421500 }, { "epoch": 1.0, "learning_rate": 1.3304536493845966e-05, "loss": 1.2867, "step": 422000 }, { "epoch": 1.01, "learning_rate": 1.3296603480213082e-05, "loss": 1.2886, "step": 422500 }, { "epoch": 1.01, "learning_rate": 1.3288670466580198e-05, "loss": 1.2908, "step": 423000 }, { "epoch": 1.01, "learning_rate": 1.3280737452947314e-05, "loss": 1.2776, "step": 423500 }, { "epoch": 1.01, "learning_rate": 1.3272804439314431e-05, "loss": 1.2855, "step": 424000 }, { "epoch": 1.01, "learning_rate": 1.3264871425681547e-05, "loss": 1.2843, "step": 424500 }, { "epoch": 1.01, "learning_rate": 1.3256938412048663e-05, "loss": 1.276, "step": 425000 }, { "epoch": 1.01, "learning_rate": 1.3249005398415777e-05, "loss": 1.2771, "step": 425500 }, { "epoch": 1.01, "learning_rate": 1.3241072384782893e-05, "loss": 1.291, "step": 426000 }, { "epoch": 1.02, "learning_rate": 1.323313937115001e-05, "loss": 1.2731, "step": 426500 }, { "epoch": 1.02, "learning_rate": 1.3225206357517126e-05, "loss": 1.2979, "step": 427000 }, { "epoch": 1.02, "learning_rate": 1.3217273343884242e-05, "loss": 1.3006, "step": 427500 }, { "epoch": 1.02, "learning_rate": 1.3209340330251358e-05, "loss": 1.2872, "step": 428000 }, { "epoch": 1.02, "learning_rate": 1.3201407316618476e-05, "loss": 1.2708, "step": 428500 }, { "epoch": 1.02, "learning_rate": 1.3193474302985592e-05, "loss": 1.2794, "step": 429000 }, { "epoch": 1.02, "learning_rate": 1.3185541289352707e-05, "loss": 1.2916, "step": 429500 }, { "epoch": 1.02, "learning_rate": 1.3177608275719822e-05, "loss": 1.2796, "step": 430000 }, { "epoch": 1.02, "learning_rate": 1.316967526208694e-05, "loss": 1.2867, "step": 430500 }, { "epoch": 1.03, "learning_rate": 1.3161742248454055e-05, "loss": 1.2937, "step": 431000 }, { "epoch": 1.03, "learning_rate": 1.3153809234821171e-05, "loss": 1.2805, "step": 431500 }, { "epoch": 1.03, "learning_rate": 1.3145876221188287e-05, "loss": 1.2908, "step": 432000 }, { "epoch": 1.03, "learning_rate": 1.3137943207555404e-05, "loss": 1.2829, "step": 432500 }, { "epoch": 1.03, "learning_rate": 1.313001019392252e-05, "loss": 1.2626, "step": 433000 }, { "epoch": 1.03, "learning_rate": 1.3122077180289636e-05, "loss": 1.2799, "step": 433500 }, { "epoch": 1.03, "learning_rate": 1.311414416665675e-05, "loss": 1.282, "step": 434000 }, { "epoch": 1.03, "learning_rate": 1.3106211153023866e-05, "loss": 1.2944, "step": 434500 }, { "epoch": 1.04, "learning_rate": 1.3098278139390984e-05, "loss": 1.2696, "step": 435000 }, { "epoch": 1.04, "learning_rate": 1.30903451257581e-05, "loss": 1.2904, "step": 435500 }, { "epoch": 1.04, "learning_rate": 1.3082412112125215e-05, "loss": 1.2845, "step": 436000 }, { "epoch": 1.04, "learning_rate": 1.3074479098492331e-05, "loss": 1.2837, "step": 436500 }, { "epoch": 1.04, "learning_rate": 1.3066546084859449e-05, "loss": 1.2837, "step": 437000 }, { "epoch": 1.04, "learning_rate": 1.3058613071226565e-05, "loss": 1.2696, "step": 437500 }, { "epoch": 1.04, "learning_rate": 1.305068005759368e-05, "loss": 1.2887, "step": 438000 }, { "epoch": 1.04, "learning_rate": 1.3042747043960795e-05, "loss": 1.2719, "step": 438500 }, { "epoch": 1.04, "learning_rate": 1.303481403032791e-05, "loss": 1.2717, "step": 439000 }, { "epoch": 1.05, "learning_rate": 1.3026881016695028e-05, "loss": 1.2922, "step": 439500 }, { "epoch": 1.05, "learning_rate": 1.3018948003062144e-05, "loss": 1.2643, "step": 440000 }, { "epoch": 1.05, "learning_rate": 1.301101498942926e-05, "loss": 1.2637, "step": 440500 }, { "epoch": 1.05, "learning_rate": 1.3003081975796377e-05, "loss": 1.2732, "step": 441000 }, { "epoch": 1.05, "learning_rate": 1.2995148962163493e-05, "loss": 1.2595, "step": 441500 }, { "epoch": 1.05, "learning_rate": 1.2987215948530609e-05, "loss": 1.2757, "step": 442000 }, { "epoch": 1.05, "learning_rate": 1.2979282934897725e-05, "loss": 1.3007, "step": 442500 }, { "epoch": 1.05, "learning_rate": 1.297134992126484e-05, "loss": 1.2722, "step": 443000 }, { "epoch": 1.06, "learning_rate": 1.2963416907631957e-05, "loss": 1.2739, "step": 443500 }, { "epoch": 1.06, "learning_rate": 1.2955483893999073e-05, "loss": 1.267, "step": 444000 }, { "epoch": 1.06, "learning_rate": 1.2947550880366188e-05, "loss": 1.2832, "step": 444500 }, { "epoch": 1.06, "learning_rate": 1.2939617866733304e-05, "loss": 1.2718, "step": 445000 }, { "epoch": 1.06, "learning_rate": 1.2931684853100422e-05, "loss": 1.2563, "step": 445500 }, { "epoch": 1.06, "learning_rate": 1.2923751839467538e-05, "loss": 1.28, "step": 446000 }, { "epoch": 1.06, "learning_rate": 1.2915818825834654e-05, "loss": 1.2692, "step": 446500 }, { "epoch": 1.06, "learning_rate": 1.290788581220177e-05, "loss": 1.278, "step": 447000 }, { "epoch": 1.07, "learning_rate": 1.2899952798568884e-05, "loss": 1.288, "step": 447500 }, { "epoch": 1.07, "learning_rate": 1.2892019784936001e-05, "loss": 1.2656, "step": 448000 }, { "epoch": 1.07, "learning_rate": 1.2884086771303117e-05, "loss": 1.28, "step": 448500 }, { "epoch": 1.07, "learning_rate": 1.2876153757670233e-05, "loss": 1.2734, "step": 449000 }, { "epoch": 1.07, "learning_rate": 1.2868220744037349e-05, "loss": 1.2654, "step": 449500 }, { "epoch": 1.07, "learning_rate": 1.2860287730404466e-05, "loss": 1.2963, "step": 450000 }, { "epoch": 1.07, "learning_rate": 1.2852354716771582e-05, "loss": 1.2817, "step": 450500 }, { "epoch": 1.07, "learning_rate": 1.2844421703138698e-05, "loss": 1.2841, "step": 451000 }, { "epoch": 1.07, "learning_rate": 1.2836488689505816e-05, "loss": 1.2771, "step": 451500 }, { "epoch": 1.08, "learning_rate": 1.282855567587293e-05, "loss": 1.2715, "step": 452000 }, { "epoch": 1.08, "learning_rate": 1.2820622662240046e-05, "loss": 1.2824, "step": 452500 }, { "epoch": 1.08, "learning_rate": 1.2812689648607162e-05, "loss": 1.2809, "step": 453000 }, { "epoch": 1.08, "learning_rate": 1.2804756634974277e-05, "loss": 1.2755, "step": 453500 }, { "epoch": 1.08, "learning_rate": 1.2796823621341395e-05, "loss": 1.2732, "step": 454000 }, { "epoch": 1.08, "learning_rate": 1.2788890607708511e-05, "loss": 1.3004, "step": 454500 }, { "epoch": 1.08, "learning_rate": 1.2780957594075627e-05, "loss": 1.268, "step": 455000 }, { "epoch": 1.08, "learning_rate": 1.2773024580442743e-05, "loss": 1.2652, "step": 455500 }, { "epoch": 1.09, "learning_rate": 1.276509156680986e-05, "loss": 1.2742, "step": 456000 }, { "epoch": 1.09, "learning_rate": 1.2757158553176974e-05, "loss": 1.2517, "step": 456500 }, { "epoch": 1.09, "learning_rate": 1.274922553954409e-05, "loss": 1.2721, "step": 457000 }, { "epoch": 1.09, "learning_rate": 1.2741292525911206e-05, "loss": 1.281, "step": 457500 }, { "epoch": 1.09, "learning_rate": 1.2733359512278322e-05, "loss": 1.272, "step": 458000 }, { "epoch": 1.09, "learning_rate": 1.272542649864544e-05, "loss": 1.2756, "step": 458500 }, { "epoch": 1.09, "learning_rate": 1.2717493485012555e-05, "loss": 1.2691, "step": 459000 }, { "epoch": 1.09, "learning_rate": 1.2709560471379671e-05, "loss": 1.2635, "step": 459500 }, { "epoch": 1.09, "learning_rate": 1.2701627457746789e-05, "loss": 1.2589, "step": 460000 }, { "epoch": 1.1, "learning_rate": 1.2693694444113905e-05, "loss": 1.2861, "step": 460500 }, { "epoch": 1.1, "learning_rate": 1.2685761430481019e-05, "loss": 1.2718, "step": 461000 }, { "epoch": 1.1, "learning_rate": 1.2677828416848135e-05, "loss": 1.2716, "step": 461500 }, { "epoch": 1.1, "learning_rate": 1.266989540321525e-05, "loss": 1.2627, "step": 462000 }, { "epoch": 1.1, "learning_rate": 1.2661962389582368e-05, "loss": 1.2708, "step": 462500 }, { "epoch": 1.1, "learning_rate": 1.2654029375949484e-05, "loss": 1.2742, "step": 463000 }, { "epoch": 1.1, "learning_rate": 1.26460963623166e-05, "loss": 1.2576, "step": 463500 }, { "epoch": 1.1, "learning_rate": 1.2638163348683716e-05, "loss": 1.2793, "step": 464000 }, { "epoch": 1.11, "learning_rate": 1.2630230335050833e-05, "loss": 1.2698, "step": 464500 }, { "epoch": 1.11, "learning_rate": 1.2622297321417949e-05, "loss": 1.2602, "step": 465000 }, { "epoch": 1.11, "learning_rate": 1.2614364307785063e-05, "loss": 1.2813, "step": 465500 }, { "epoch": 1.11, "learning_rate": 1.2606431294152179e-05, "loss": 1.2683, "step": 466000 }, { "epoch": 1.11, "learning_rate": 1.2598498280519295e-05, "loss": 1.2693, "step": 466500 }, { "epoch": 1.11, "learning_rate": 1.2590565266886413e-05, "loss": 1.2724, "step": 467000 }, { "epoch": 1.11, "learning_rate": 1.2582632253253528e-05, "loss": 1.272, "step": 467500 }, { "epoch": 1.11, "learning_rate": 1.2574699239620644e-05, "loss": 1.2723, "step": 468000 }, { "epoch": 1.11, "learning_rate": 1.256676622598776e-05, "loss": 1.2667, "step": 468500 }, { "epoch": 1.12, "learning_rate": 1.2558833212354878e-05, "loss": 1.2661, "step": 469000 }, { "epoch": 1.12, "learning_rate": 1.2550900198721994e-05, "loss": 1.2673, "step": 469500 }, { "epoch": 1.12, "learning_rate": 1.2542967185089108e-05, "loss": 1.2657, "step": 470000 }, { "epoch": 1.12, "learning_rate": 1.2535034171456224e-05, "loss": 1.2734, "step": 470500 }, { "epoch": 1.12, "learning_rate": 1.2527101157823341e-05, "loss": 1.2571, "step": 471000 }, { "epoch": 1.12, "learning_rate": 1.2519168144190457e-05, "loss": 1.2637, "step": 471500 }, { "epoch": 1.12, "learning_rate": 1.2511235130557573e-05, "loss": 1.2697, "step": 472000 }, { "epoch": 1.12, "learning_rate": 1.2503302116924689e-05, "loss": 1.2722, "step": 472500 }, { "epoch": 1.13, "learning_rate": 1.2495369103291806e-05, "loss": 1.2659, "step": 473000 }, { "epoch": 1.13, "learning_rate": 1.2487436089658922e-05, "loss": 1.2732, "step": 473500 }, { "epoch": 1.13, "learning_rate": 1.2479503076026038e-05, "loss": 1.2419, "step": 474000 }, { "epoch": 1.13, "learning_rate": 1.2471570062393152e-05, "loss": 1.2505, "step": 474500 }, { "epoch": 1.13, "learning_rate": 1.2463637048760268e-05, "loss": 1.2709, "step": 475000 }, { "epoch": 1.13, "learning_rate": 1.2455704035127386e-05, "loss": 1.2733, "step": 475500 }, { "epoch": 1.13, "learning_rate": 1.2447771021494502e-05, "loss": 1.2586, "step": 476000 }, { "epoch": 1.13, "learning_rate": 1.2439838007861617e-05, "loss": 1.2517, "step": 476500 }, { "epoch": 1.14, "learning_rate": 1.2431904994228733e-05, "loss": 1.2657, "step": 477000 }, { "epoch": 1.14, "learning_rate": 1.242397198059585e-05, "loss": 1.2724, "step": 477500 }, { "epoch": 1.14, "learning_rate": 1.2416038966962967e-05, "loss": 1.2481, "step": 478000 }, { "epoch": 1.14, "learning_rate": 1.240810595333008e-05, "loss": 1.2894, "step": 478500 }, { "epoch": 1.14, "learning_rate": 1.2400172939697197e-05, "loss": 1.2753, "step": 479000 }, { "epoch": 1.14, "learning_rate": 1.2392239926064313e-05, "loss": 1.254, "step": 479500 }, { "epoch": 1.14, "learning_rate": 1.238430691243143e-05, "loss": 1.2603, "step": 480000 }, { "epoch": 1.14, "learning_rate": 1.2376373898798546e-05, "loss": 1.2692, "step": 480500 }, { "epoch": 1.14, "learning_rate": 1.2368440885165662e-05, "loss": 1.2742, "step": 481000 }, { "epoch": 1.15, "learning_rate": 1.236050787153278e-05, "loss": 1.2838, "step": 481500 }, { "epoch": 1.15, "learning_rate": 1.2352574857899895e-05, "loss": 1.2536, "step": 482000 }, { "epoch": 1.15, "learning_rate": 1.2344641844267011e-05, "loss": 1.2669, "step": 482500 }, { "epoch": 1.15, "learning_rate": 1.2336708830634125e-05, "loss": 1.2685, "step": 483000 }, { "epoch": 1.15, "learning_rate": 1.2328775817001241e-05, "loss": 1.2623, "step": 483500 }, { "epoch": 1.15, "learning_rate": 1.2320842803368359e-05, "loss": 1.2721, "step": 484000 }, { "epoch": 1.15, "learning_rate": 1.2312909789735475e-05, "loss": 1.2683, "step": 484500 }, { "epoch": 1.15, "learning_rate": 1.230497677610259e-05, "loss": 1.2667, "step": 485000 }, { "epoch": 1.16, "learning_rate": 1.2297043762469706e-05, "loss": 1.2757, "step": 485500 }, { "epoch": 1.16, "learning_rate": 1.2289110748836824e-05, "loss": 1.2731, "step": 486000 }, { "epoch": 1.16, "learning_rate": 1.228117773520394e-05, "loss": 1.2847, "step": 486500 }, { "epoch": 1.16, "learning_rate": 1.2273244721571056e-05, "loss": 1.2712, "step": 487000 }, { "epoch": 1.16, "learning_rate": 1.226531170793817e-05, "loss": 1.2747, "step": 487500 }, { "epoch": 1.16, "learning_rate": 1.2257378694305286e-05, "loss": 1.2632, "step": 488000 }, { "epoch": 1.16, "learning_rate": 1.2249445680672403e-05, "loss": 1.2437, "step": 488500 }, { "epoch": 1.16, "learning_rate": 1.2241512667039519e-05, "loss": 1.2564, "step": 489000 }, { "epoch": 1.16, "learning_rate": 1.2233579653406635e-05, "loss": 1.2496, "step": 489500 }, { "epoch": 1.17, "learning_rate": 1.222564663977375e-05, "loss": 1.2671, "step": 490000 }, { "epoch": 1.17, "learning_rate": 1.2217713626140868e-05, "loss": 1.2625, "step": 490500 }, { "epoch": 1.17, "learning_rate": 1.2209780612507984e-05, "loss": 1.2625, "step": 491000 }, { "epoch": 1.17, "learning_rate": 1.22018475988751e-05, "loss": 1.2608, "step": 491500 }, { "epoch": 1.17, "learning_rate": 1.2193914585242214e-05, "loss": 1.2733, "step": 492000 }, { "epoch": 1.17, "learning_rate": 1.2185981571609332e-05, "loss": 1.269, "step": 492500 }, { "epoch": 1.17, "learning_rate": 1.2178048557976448e-05, "loss": 1.2539, "step": 493000 }, { "epoch": 1.17, "learning_rate": 1.2170115544343564e-05, "loss": 1.271, "step": 493500 }, { "epoch": 1.18, "learning_rate": 1.216218253071068e-05, "loss": 1.2579, "step": 494000 }, { "epoch": 1.18, "learning_rate": 1.2154249517077797e-05, "loss": 1.2536, "step": 494500 }, { "epoch": 1.18, "learning_rate": 1.2146316503444913e-05, "loss": 1.2563, "step": 495000 }, { "epoch": 1.18, "learning_rate": 1.2138383489812029e-05, "loss": 1.2678, "step": 495500 }, { "epoch": 1.18, "learning_rate": 1.2130450476179145e-05, "loss": 1.2557, "step": 496000 }, { "epoch": 1.18, "learning_rate": 1.2122517462546259e-05, "loss": 1.2654, "step": 496500 }, { "epoch": 1.18, "learning_rate": 1.2114584448913376e-05, "loss": 1.2754, "step": 497000 }, { "epoch": 1.18, "learning_rate": 1.2106651435280492e-05, "loss": 1.2644, "step": 497500 }, { "epoch": 1.19, "learning_rate": 1.2098718421647608e-05, "loss": 1.2749, "step": 498000 }, { "epoch": 1.19, "learning_rate": 1.2090785408014724e-05, "loss": 1.2735, "step": 498500 }, { "epoch": 1.19, "learning_rate": 1.2082852394381841e-05, "loss": 1.2462, "step": 499000 }, { "epoch": 1.19, "learning_rate": 1.2074919380748957e-05, "loss": 1.2446, "step": 499500 }, { "epoch": 1.19, "learning_rate": 1.2066986367116073e-05, "loss": 1.2712, "step": 500000 }, { "epoch": 1.19, "learning_rate": 1.205905335348319e-05, "loss": 1.2877, "step": 500500 }, { "epoch": 1.19, "learning_rate": 1.2051120339850303e-05, "loss": 1.2603, "step": 501000 }, { "epoch": 1.19, "learning_rate": 1.204318732621742e-05, "loss": 1.2627, "step": 501500 }, { "epoch": 1.19, "learning_rate": 1.2035254312584537e-05, "loss": 1.2672, "step": 502000 }, { "epoch": 1.2, "learning_rate": 1.2027321298951653e-05, "loss": 1.2648, "step": 502500 }, { "epoch": 1.2, "learning_rate": 1.201938828531877e-05, "loss": 1.2715, "step": 503000 }, { "epoch": 1.2, "learning_rate": 1.2011455271685886e-05, "loss": 1.2591, "step": 503500 }, { "epoch": 1.2, "learning_rate": 1.2003522258053002e-05, "loss": 1.2527, "step": 504000 }, { "epoch": 1.2, "learning_rate": 1.1995589244420118e-05, "loss": 1.2525, "step": 504500 }, { "epoch": 1.2, "learning_rate": 1.1987656230787235e-05, "loss": 1.2487, "step": 505000 }, { "epoch": 1.2, "learning_rate": 1.197972321715435e-05, "loss": 1.2802, "step": 505500 }, { "epoch": 1.2, "learning_rate": 1.1971790203521465e-05, "loss": 1.2523, "step": 506000 }, { "epoch": 1.21, "learning_rate": 1.1963857189888581e-05, "loss": 1.2546, "step": 506500 }, { "epoch": 1.21, "learning_rate": 1.1955924176255697e-05, "loss": 1.2496, "step": 507000 }, { "epoch": 1.21, "learning_rate": 1.1947991162622815e-05, "loss": 1.2648, "step": 507500 }, { "epoch": 1.21, "learning_rate": 1.194005814898993e-05, "loss": 1.2594, "step": 508000 }, { "epoch": 1.21, "learning_rate": 1.1932125135357046e-05, "loss": 1.2646, "step": 508500 }, { "epoch": 1.21, "learning_rate": 1.1924192121724162e-05, "loss": 1.2682, "step": 509000 }, { "epoch": 1.21, "learning_rate": 1.191625910809128e-05, "loss": 1.2853, "step": 509500 }, { "epoch": 1.21, "learning_rate": 1.1908326094458394e-05, "loss": 1.2447, "step": 510000 }, { "epoch": 1.21, "learning_rate": 1.190039308082551e-05, "loss": 1.2698, "step": 510500 }, { "epoch": 1.22, "learning_rate": 1.1892460067192626e-05, "loss": 1.261, "step": 511000 }, { "epoch": 1.22, "learning_rate": 1.1884527053559743e-05, "loss": 1.2583, "step": 511500 }, { "epoch": 1.22, "learning_rate": 1.1876594039926859e-05, "loss": 1.2381, "step": 512000 }, { "epoch": 1.22, "learning_rate": 1.1868661026293975e-05, "loss": 1.2632, "step": 512500 }, { "epoch": 1.22, "learning_rate": 1.186072801266109e-05, "loss": 1.2616, "step": 513000 }, { "epoch": 1.22, "learning_rate": 1.1852794999028208e-05, "loss": 1.2488, "step": 513500 }, { "epoch": 1.22, "learning_rate": 1.1844861985395324e-05, "loss": 1.2648, "step": 514000 }, { "epoch": 1.22, "learning_rate": 1.1836928971762438e-05, "loss": 1.2885, "step": 514500 }, { "epoch": 1.23, "learning_rate": 1.1828995958129554e-05, "loss": 1.2656, "step": 515000 }, { "epoch": 1.23, "learning_rate": 1.182106294449667e-05, "loss": 1.2588, "step": 515500 }, { "epoch": 1.23, "learning_rate": 1.1813129930863788e-05, "loss": 1.2548, "step": 516000 }, { "epoch": 1.23, "learning_rate": 1.1805196917230904e-05, "loss": 1.2548, "step": 516500 }, { "epoch": 1.23, "learning_rate": 1.179726390359802e-05, "loss": 1.2725, "step": 517000 }, { "epoch": 1.23, "learning_rate": 1.1789330889965135e-05, "loss": 1.2597, "step": 517500 }, { "epoch": 1.23, "learning_rate": 1.1781397876332253e-05, "loss": 1.2402, "step": 518000 }, { "epoch": 1.23, "learning_rate": 1.1773464862699369e-05, "loss": 1.2608, "step": 518500 }, { "epoch": 1.24, "learning_rate": 1.1765531849066483e-05, "loss": 1.2566, "step": 519000 }, { "epoch": 1.24, "learning_rate": 1.1757598835433599e-05, "loss": 1.2647, "step": 519500 }, { "epoch": 1.24, "learning_rate": 1.1749665821800715e-05, "loss": 1.2612, "step": 520000 }, { "epoch": 1.24, "learning_rate": 1.1741732808167832e-05, "loss": 1.2745, "step": 520500 }, { "epoch": 1.24, "learning_rate": 1.1733799794534948e-05, "loss": 1.2398, "step": 521000 }, { "epoch": 1.24, "learning_rate": 1.1725866780902064e-05, "loss": 1.2673, "step": 521500 }, { "epoch": 1.24, "learning_rate": 1.1717933767269181e-05, "loss": 1.2562, "step": 522000 }, { "epoch": 1.24, "learning_rate": 1.1710000753636297e-05, "loss": 1.2438, "step": 522500 }, { "epoch": 1.24, "learning_rate": 1.1702067740003411e-05, "loss": 1.2576, "step": 523000 }, { "epoch": 1.25, "learning_rate": 1.1694134726370527e-05, "loss": 1.2526, "step": 523500 }, { "epoch": 1.25, "learning_rate": 1.1686201712737643e-05, "loss": 1.2539, "step": 524000 }, { "epoch": 1.25, "learning_rate": 1.167826869910476e-05, "loss": 1.266, "step": 524500 }, { "epoch": 1.25, "learning_rate": 1.1670335685471877e-05, "loss": 1.2492, "step": 525000 }, { "epoch": 1.25, "learning_rate": 1.1662402671838992e-05, "loss": 1.2492, "step": 525500 }, { "epoch": 1.25, "learning_rate": 1.1654469658206108e-05, "loss": 1.2582, "step": 526000 }, { "epoch": 1.25, "learning_rate": 1.1646536644573226e-05, "loss": 1.255, "step": 526500 }, { "epoch": 1.25, "learning_rate": 1.1638603630940342e-05, "loss": 1.2504, "step": 527000 }, { "epoch": 1.26, "learning_rate": 1.1630670617307456e-05, "loss": 1.274, "step": 527500 }, { "epoch": 1.26, "learning_rate": 1.1622737603674572e-05, "loss": 1.2571, "step": 528000 }, { "epoch": 1.26, "learning_rate": 1.1614804590041688e-05, "loss": 1.2645, "step": 528500 }, { "epoch": 1.26, "learning_rate": 1.1606871576408805e-05, "loss": 1.2595, "step": 529000 }, { "epoch": 1.26, "learning_rate": 1.1598938562775921e-05, "loss": 1.2425, "step": 529500 }, { "epoch": 1.26, "learning_rate": 1.1591005549143037e-05, "loss": 1.2575, "step": 530000 }, { "epoch": 1.26, "learning_rate": 1.1583072535510153e-05, "loss": 1.2421, "step": 530500 }, { "epoch": 1.26, "learning_rate": 1.157513952187727e-05, "loss": 1.2479, "step": 531000 }, { "epoch": 1.26, "learning_rate": 1.1567206508244386e-05, "loss": 1.2727, "step": 531500 }, { "epoch": 1.27, "learning_rate": 1.15592734946115e-05, "loss": 1.2378, "step": 532000 }, { "epoch": 1.27, "learning_rate": 1.1551340480978616e-05, "loss": 1.2603, "step": 532500 }, { "epoch": 1.27, "learning_rate": 1.1543407467345734e-05, "loss": 1.2565, "step": 533000 }, { "epoch": 1.27, "learning_rate": 1.153547445371285e-05, "loss": 1.2526, "step": 533500 }, { "epoch": 1.27, "learning_rate": 1.1527541440079966e-05, "loss": 1.2603, "step": 534000 }, { "epoch": 1.27, "learning_rate": 1.1519608426447081e-05, "loss": 1.2542, "step": 534500 }, { "epoch": 1.27, "learning_rate": 1.1511675412814199e-05, "loss": 1.2574, "step": 535000 }, { "epoch": 1.27, "learning_rate": 1.1503742399181315e-05, "loss": 1.258, "step": 535500 }, { "epoch": 1.28, "learning_rate": 1.149580938554843e-05, "loss": 1.2556, "step": 536000 }, { "epoch": 1.28, "learning_rate": 1.1487876371915545e-05, "loss": 1.2515, "step": 536500 }, { "epoch": 1.28, "learning_rate": 1.147994335828266e-05, "loss": 1.2427, "step": 537000 }, { "epoch": 1.28, "learning_rate": 1.1472010344649778e-05, "loss": 1.2583, "step": 537500 }, { "epoch": 1.28, "learning_rate": 1.1464077331016894e-05, "loss": 1.2782, "step": 538000 }, { "epoch": 1.28, "learning_rate": 1.145614431738401e-05, "loss": 1.2524, "step": 538500 }, { "epoch": 1.28, "learning_rate": 1.1448211303751126e-05, "loss": 1.2447, "step": 539000 }, { "epoch": 1.28, "learning_rate": 1.1440278290118243e-05, "loss": 1.2417, "step": 539500 }, { "epoch": 1.29, "learning_rate": 1.143234527648536e-05, "loss": 1.2527, "step": 540000 }, { "epoch": 1.29, "learning_rate": 1.1424412262852475e-05, "loss": 1.2551, "step": 540500 }, { "epoch": 1.29, "learning_rate": 1.141647924921959e-05, "loss": 1.2426, "step": 541000 }, { "epoch": 1.29, "learning_rate": 1.1408546235586705e-05, "loss": 1.2493, "step": 541500 }, { "epoch": 1.29, "learning_rate": 1.1400613221953823e-05, "loss": 1.2722, "step": 542000 }, { "epoch": 1.29, "learning_rate": 1.1392680208320939e-05, "loss": 1.2572, "step": 542500 }, { "epoch": 1.29, "learning_rate": 1.1384747194688055e-05, "loss": 1.2443, "step": 543000 }, { "epoch": 1.29, "learning_rate": 1.1376814181055172e-05, "loss": 1.2579, "step": 543500 }, { "epoch": 1.29, "learning_rate": 1.1368881167422288e-05, "loss": 1.2437, "step": 544000 }, { "epoch": 1.3, "learning_rate": 1.1360948153789404e-05, "loss": 1.2388, "step": 544500 }, { "epoch": 1.3, "learning_rate": 1.135301514015652e-05, "loss": 1.2611, "step": 545000 }, { "epoch": 1.3, "learning_rate": 1.1345082126523634e-05, "loss": 1.2557, "step": 545500 }, { "epoch": 1.3, "learning_rate": 1.1337149112890751e-05, "loss": 1.2445, "step": 546000 }, { "epoch": 1.3, "learning_rate": 1.1329216099257867e-05, "loss": 1.2309, "step": 546500 }, { "epoch": 1.3, "learning_rate": 1.1321283085624983e-05, "loss": 1.2554, "step": 547000 }, { "epoch": 1.3, "learning_rate": 1.1313350071992099e-05, "loss": 1.2679, "step": 547500 }, { "epoch": 1.3, "learning_rate": 1.1305417058359217e-05, "loss": 1.2626, "step": 548000 }, { "epoch": 1.31, "learning_rate": 1.1297484044726332e-05, "loss": 1.2426, "step": 548500 }, { "epoch": 1.31, "learning_rate": 1.1289551031093448e-05, "loss": 1.2694, "step": 549000 }, { "epoch": 1.31, "learning_rate": 1.1281618017460564e-05, "loss": 1.2539, "step": 549500 }, { "epoch": 1.31, "learning_rate": 1.1273685003827678e-05, "loss": 1.2498, "step": 550000 }, { "epoch": 1.31, "learning_rate": 1.1265751990194796e-05, "loss": 1.2407, "step": 550500 }, { "epoch": 1.31, "learning_rate": 1.1257818976561912e-05, "loss": 1.2472, "step": 551000 }, { "epoch": 1.31, "learning_rate": 1.1249885962929028e-05, "loss": 1.2337, "step": 551500 }, { "epoch": 1.31, "learning_rate": 1.1241952949296145e-05, "loss": 1.2631, "step": 552000 }, { "epoch": 1.31, "learning_rate": 1.1234019935663261e-05, "loss": 1.2417, "step": 552500 }, { "epoch": 1.32, "learning_rate": 1.1226086922030377e-05, "loss": 1.2538, "step": 553000 }, { "epoch": 1.32, "learning_rate": 1.1218153908397493e-05, "loss": 1.2546, "step": 553500 }, { "epoch": 1.32, "learning_rate": 1.121022089476461e-05, "loss": 1.253, "step": 554000 }, { "epoch": 1.32, "learning_rate": 1.1202287881131724e-05, "loss": 1.2454, "step": 554500 }, { "epoch": 1.32, "learning_rate": 1.119435486749884e-05, "loss": 1.2478, "step": 555000 }, { "epoch": 1.32, "learning_rate": 1.1186421853865956e-05, "loss": 1.2628, "step": 555500 }, { "epoch": 1.32, "learning_rate": 1.1178488840233072e-05, "loss": 1.2592, "step": 556000 }, { "epoch": 1.32, "learning_rate": 1.117055582660019e-05, "loss": 1.258, "step": 556500 }, { "epoch": 1.33, "learning_rate": 1.1162622812967306e-05, "loss": 1.2573, "step": 557000 }, { "epoch": 1.33, "learning_rate": 1.1154689799334421e-05, "loss": 1.2372, "step": 557500 }, { "epoch": 1.33, "learning_rate": 1.1146756785701537e-05, "loss": 1.2416, "step": 558000 }, { "epoch": 1.33, "learning_rate": 1.1138823772068655e-05, "loss": 1.2381, "step": 558500 }, { "epoch": 1.33, "learning_rate": 1.1130890758435769e-05, "loss": 1.2492, "step": 559000 }, { "epoch": 1.33, "learning_rate": 1.1122957744802885e-05, "loss": 1.259, "step": 559500 }, { "epoch": 1.33, "learning_rate": 1.111502473117e-05, "loss": 1.2353, "step": 560000 }, { "epoch": 1.33, "learning_rate": 1.1107091717537117e-05, "loss": 1.2314, "step": 560500 }, { "epoch": 1.34, "learning_rate": 1.1099158703904234e-05, "loss": 1.2596, "step": 561000 }, { "epoch": 1.34, "learning_rate": 1.109122569027135e-05, "loss": 1.2457, "step": 561500 }, { "epoch": 1.34, "learning_rate": 1.1083292676638466e-05, "loss": 1.2315, "step": 562000 }, { "epoch": 1.34, "learning_rate": 1.1075359663005583e-05, "loss": 1.2322, "step": 562500 }, { "epoch": 1.34, "learning_rate": 1.10674266493727e-05, "loss": 1.2474, "step": 563000 }, { "epoch": 1.34, "learning_rate": 1.1059493635739813e-05, "loss": 1.251, "step": 563500 }, { "epoch": 1.34, "learning_rate": 1.105156062210693e-05, "loss": 1.2431, "step": 564000 }, { "epoch": 1.34, "learning_rate": 1.1043627608474045e-05, "loss": 1.2544, "step": 564500 }, { "epoch": 1.34, "learning_rate": 1.1035694594841163e-05, "loss": 1.2439, "step": 565000 }, { "epoch": 1.35, "learning_rate": 1.1027761581208279e-05, "loss": 1.2488, "step": 565500 }, { "epoch": 1.35, "learning_rate": 1.1019828567575394e-05, "loss": 1.2515, "step": 566000 }, { "epoch": 1.35, "learning_rate": 1.101189555394251e-05, "loss": 1.2324, "step": 566500 }, { "epoch": 1.35, "learning_rate": 1.1003962540309628e-05, "loss": 1.2294, "step": 567000 }, { "epoch": 1.35, "learning_rate": 1.0996029526676742e-05, "loss": 1.2316, "step": 567500 }, { "epoch": 1.35, "learning_rate": 1.0988096513043858e-05, "loss": 1.219, "step": 568000 }, { "epoch": 1.35, "learning_rate": 1.0980163499410974e-05, "loss": 1.2251, "step": 568500 }, { "epoch": 1.35, "learning_rate": 1.097223048577809e-05, "loss": 1.2195, "step": 569000 }, { "epoch": 1.36, "learning_rate": 1.0964297472145207e-05, "loss": 1.2508, "step": 569500 }, { "epoch": 1.36, "learning_rate": 1.0956364458512323e-05, "loss": 1.2491, "step": 570000 }, { "epoch": 1.36, "learning_rate": 1.0948431444879439e-05, "loss": 1.2378, "step": 570500 }, { "epoch": 1.36, "learning_rate": 1.0940498431246555e-05, "loss": 1.2378, "step": 571000 }, { "epoch": 1.36, "learning_rate": 1.0932565417613672e-05, "loss": 1.2436, "step": 571500 }, { "epoch": 1.36, "learning_rate": 1.0924632403980787e-05, "loss": 1.2443, "step": 572000 }, { "epoch": 1.36, "learning_rate": 1.0916699390347902e-05, "loss": 1.2383, "step": 572500 }, { "epoch": 1.36, "learning_rate": 1.0908766376715018e-05, "loss": 1.2309, "step": 573000 }, { "epoch": 1.36, "learning_rate": 1.0900833363082136e-05, "loss": 1.2478, "step": 573500 }, { "epoch": 1.37, "learning_rate": 1.0892900349449252e-05, "loss": 1.2505, "step": 574000 }, { "epoch": 1.37, "learning_rate": 1.0884967335816368e-05, "loss": 1.2439, "step": 574500 }, { "epoch": 1.37, "learning_rate": 1.0877034322183483e-05, "loss": 1.2237, "step": 575000 }, { "epoch": 1.37, "learning_rate": 1.0869101308550601e-05, "loss": 1.2199, "step": 575500 }, { "epoch": 1.37, "learning_rate": 1.0861168294917717e-05, "loss": 1.2324, "step": 576000 }, { "epoch": 1.37, "learning_rate": 1.0853235281284831e-05, "loss": 1.2398, "step": 576500 }, { "epoch": 1.37, "learning_rate": 1.0845302267651947e-05, "loss": 1.2273, "step": 577000 }, { "epoch": 1.37, "learning_rate": 1.0837369254019063e-05, "loss": 1.2428, "step": 577500 }, { "epoch": 1.38, "learning_rate": 1.082943624038618e-05, "loss": 1.255, "step": 578000 }, { "epoch": 1.38, "learning_rate": 1.0821503226753296e-05, "loss": 1.2402, "step": 578500 }, { "epoch": 1.38, "learning_rate": 1.0813570213120412e-05, "loss": 1.2374, "step": 579000 }, { "epoch": 1.38, "learning_rate": 1.0805637199487528e-05, "loss": 1.2142, "step": 579500 }, { "epoch": 1.38, "learning_rate": 1.0797704185854645e-05, "loss": 1.2368, "step": 580000 }, { "epoch": 1.38, "learning_rate": 1.0789771172221761e-05, "loss": 1.2328, "step": 580500 }, { "epoch": 1.38, "learning_rate": 1.0781838158588875e-05, "loss": 1.2355, "step": 581000 }, { "epoch": 1.38, "learning_rate": 1.0773905144955991e-05, "loss": 1.2521, "step": 581500 }, { "epoch": 1.39, "learning_rate": 1.0765972131323107e-05, "loss": 1.234, "step": 582000 }, { "epoch": 1.39, "learning_rate": 1.0758039117690225e-05, "loss": 1.2303, "step": 582500 }, { "epoch": 1.39, "learning_rate": 1.075010610405734e-05, "loss": 1.2484, "step": 583000 }, { "epoch": 1.39, "learning_rate": 1.0742173090424457e-05, "loss": 1.2394, "step": 583500 }, { "epoch": 1.39, "learning_rate": 1.0734240076791574e-05, "loss": 1.2242, "step": 584000 }, { "epoch": 1.39, "learning_rate": 1.072630706315869e-05, "loss": 1.2562, "step": 584500 }, { "epoch": 1.39, "learning_rate": 1.0718374049525806e-05, "loss": 1.2375, "step": 585000 }, { "epoch": 1.39, "learning_rate": 1.071044103589292e-05, "loss": 1.2331, "step": 585500 }, { "epoch": 1.39, "learning_rate": 1.0702508022260036e-05, "loss": 1.2309, "step": 586000 }, { "epoch": 1.4, "learning_rate": 1.0694575008627153e-05, "loss": 1.2495, "step": 586500 }, { "epoch": 1.4, "learning_rate": 1.068664199499427e-05, "loss": 1.2242, "step": 587000 }, { "epoch": 1.4, "learning_rate": 1.0678708981361385e-05, "loss": 1.2333, "step": 587500 }, { "epoch": 1.4, "learning_rate": 1.0670775967728501e-05, "loss": 1.2456, "step": 588000 }, { "epoch": 1.4, "learning_rate": 1.0662842954095619e-05, "loss": 1.2451, "step": 588500 }, { "epoch": 1.4, "learning_rate": 1.0654909940462734e-05, "loss": 1.2471, "step": 589000 }, { "epoch": 1.4, "learning_rate": 1.064697692682985e-05, "loss": 1.2343, "step": 589500 }, { "epoch": 1.4, "learning_rate": 1.0639043913196964e-05, "loss": 1.225, "step": 590000 }, { "epoch": 1.41, "learning_rate": 1.063111089956408e-05, "loss": 1.2223, "step": 590500 }, { "epoch": 1.41, "learning_rate": 1.0623177885931198e-05, "loss": 1.2502, "step": 591000 }, { "epoch": 1.41, "learning_rate": 1.0615244872298314e-05, "loss": 1.2455, "step": 591500 }, { "epoch": 1.41, "learning_rate": 1.060731185866543e-05, "loss": 1.2542, "step": 592000 }, { "epoch": 1.41, "learning_rate": 1.0599378845032547e-05, "loss": 1.2274, "step": 592500 }, { "epoch": 1.41, "learning_rate": 1.0591445831399663e-05, "loss": 1.2338, "step": 593000 }, { "epoch": 1.41, "learning_rate": 1.0583512817766779e-05, "loss": 1.2195, "step": 593500 }, { "epoch": 1.41, "learning_rate": 1.0575579804133895e-05, "loss": 1.2531, "step": 594000 }, { "epoch": 1.41, "learning_rate": 1.0567646790501009e-05, "loss": 1.2461, "step": 594500 }, { "epoch": 1.42, "learning_rate": 1.0559713776868126e-05, "loss": 1.2358, "step": 595000 }, { "epoch": 1.42, "learning_rate": 1.0551780763235242e-05, "loss": 1.2296, "step": 595500 }, { "epoch": 1.42, "learning_rate": 1.0543847749602358e-05, "loss": 1.228, "step": 596000 }, { "epoch": 1.42, "learning_rate": 1.0535914735969474e-05, "loss": 1.2335, "step": 596500 }, { "epoch": 1.42, "learning_rate": 1.0527981722336592e-05, "loss": 1.2414, "step": 597000 }, { "epoch": 1.42, "learning_rate": 1.0520048708703708e-05, "loss": 1.2255, "step": 597500 }, { "epoch": 1.42, "learning_rate": 1.0512115695070823e-05, "loss": 1.2497, "step": 598000 }, { "epoch": 1.42, "learning_rate": 1.050418268143794e-05, "loss": 1.2509, "step": 598500 }, { "epoch": 1.43, "learning_rate": 1.0496249667805053e-05, "loss": 1.2435, "step": 599000 }, { "epoch": 1.43, "learning_rate": 1.0488316654172171e-05, "loss": 1.2341, "step": 599500 }, { "epoch": 1.43, "learning_rate": 1.0480383640539287e-05, "loss": 1.2228, "step": 600000 }, { "epoch": 1.43, "learning_rate": 1.0472450626906403e-05, "loss": 1.24, "step": 600500 }, { "epoch": 1.43, "learning_rate": 1.0464517613273519e-05, "loss": 1.2094, "step": 601000 }, { "epoch": 1.43, "learning_rate": 1.0456584599640636e-05, "loss": 1.2489, "step": 601500 }, { "epoch": 1.43, "learning_rate": 1.0448651586007752e-05, "loss": 1.2388, "step": 602000 }, { "epoch": 1.43, "learning_rate": 1.0440718572374868e-05, "loss": 1.2268, "step": 602500 }, { "epoch": 1.44, "learning_rate": 1.0432785558741985e-05, "loss": 1.2429, "step": 603000 }, { "epoch": 1.44, "learning_rate": 1.0424852545109098e-05, "loss": 1.2336, "step": 603500 }, { "epoch": 1.44, "learning_rate": 1.0416919531476215e-05, "loss": 1.2319, "step": 604000 }, { "epoch": 1.44, "learning_rate": 1.0408986517843331e-05, "loss": 1.231, "step": 604500 }, { "epoch": 1.44, "learning_rate": 1.0401053504210447e-05, "loss": 1.2306, "step": 605000 }, { "epoch": 1.44, "learning_rate": 1.0393120490577565e-05, "loss": 1.2165, "step": 605500 }, { "epoch": 1.44, "learning_rate": 1.038518747694468e-05, "loss": 1.2401, "step": 606000 }, { "epoch": 1.44, "learning_rate": 1.0377254463311796e-05, "loss": 1.2291, "step": 606500 }, { "epoch": 1.44, "learning_rate": 1.0369321449678912e-05, "loss": 1.2372, "step": 607000 }, { "epoch": 1.45, "learning_rate": 1.036138843604603e-05, "loss": 1.2341, "step": 607500 }, { "epoch": 1.45, "learning_rate": 1.0353455422413144e-05, "loss": 1.2404, "step": 608000 }, { "epoch": 1.45, "learning_rate": 1.034552240878026e-05, "loss": 1.2312, "step": 608500 }, { "epoch": 1.45, "learning_rate": 1.0337589395147376e-05, "loss": 1.2478, "step": 609000 }, { "epoch": 1.45, "learning_rate": 1.0329656381514492e-05, "loss": 1.2271, "step": 609500 }, { "epoch": 1.45, "learning_rate": 1.032172336788161e-05, "loss": 1.2301, "step": 610000 }, { "epoch": 1.45, "learning_rate": 1.0313790354248725e-05, "loss": 1.219, "step": 610500 }, { "epoch": 1.45, "learning_rate": 1.0305857340615841e-05, "loss": 1.2283, "step": 611000 }, { "epoch": 1.46, "learning_rate": 1.0297924326982957e-05, "loss": 1.2512, "step": 611500 }, { "epoch": 1.46, "learning_rate": 1.0289991313350071e-05, "loss": 1.2383, "step": 612000 }, { "epoch": 1.46, "learning_rate": 1.0282058299717189e-05, "loss": 1.2406, "step": 612500 }, { "epoch": 1.46, "learning_rate": 1.0274125286084304e-05, "loss": 1.2417, "step": 613000 }, { "epoch": 1.46, "learning_rate": 1.026619227245142e-05, "loss": 1.2424, "step": 613500 }, { "epoch": 1.46, "learning_rate": 1.0258259258818538e-05, "loss": 1.2309, "step": 614000 }, { "epoch": 1.46, "learning_rate": 1.0250326245185654e-05, "loss": 1.2327, "step": 614500 }, { "epoch": 1.46, "learning_rate": 1.024239323155277e-05, "loss": 1.2523, "step": 615000 }, { "epoch": 1.46, "learning_rate": 1.0234460217919885e-05, "loss": 1.2256, "step": 615500 }, { "epoch": 1.47, "learning_rate": 1.0226527204287003e-05, "loss": 1.2333, "step": 616000 }, { "epoch": 1.47, "learning_rate": 1.0218594190654117e-05, "loss": 1.237, "step": 616500 }, { "epoch": 1.47, "learning_rate": 1.0210661177021233e-05, "loss": 1.2365, "step": 617000 }, { "epoch": 1.47, "learning_rate": 1.0202728163388349e-05, "loss": 1.2267, "step": 617500 }, { "epoch": 1.47, "learning_rate": 1.0194795149755465e-05, "loss": 1.2259, "step": 618000 }, { "epoch": 1.47, "learning_rate": 1.0186862136122582e-05, "loss": 1.2367, "step": 618500 }, { "epoch": 1.47, "learning_rate": 1.0178929122489698e-05, "loss": 1.2462, "step": 619000 }, { "epoch": 1.47, "learning_rate": 1.0170996108856814e-05, "loss": 1.2328, "step": 619500 }, { "epoch": 1.48, "learning_rate": 1.016306309522393e-05, "loss": 1.2295, "step": 620000 }, { "epoch": 1.48, "learning_rate": 1.0155130081591047e-05, "loss": 1.2077, "step": 620500 }, { "epoch": 1.48, "learning_rate": 1.0147197067958162e-05, "loss": 1.2405, "step": 621000 }, { "epoch": 1.48, "learning_rate": 1.0139264054325277e-05, "loss": 1.236, "step": 621500 }, { "epoch": 1.48, "learning_rate": 1.0131331040692393e-05, "loss": 1.2219, "step": 622000 }, { "epoch": 1.48, "learning_rate": 1.012339802705951e-05, "loss": 1.2249, "step": 622500 }, { "epoch": 1.48, "learning_rate": 1.0115465013426627e-05, "loss": 1.2262, "step": 623000 }, { "epoch": 1.48, "learning_rate": 1.0107531999793743e-05, "loss": 1.2271, "step": 623500 }, { "epoch": 1.49, "learning_rate": 1.0099598986160859e-05, "loss": 1.2369, "step": 624000 }, { "epoch": 1.49, "learning_rate": 1.0091665972527976e-05, "loss": 1.2349, "step": 624500 }, { "epoch": 1.49, "learning_rate": 1.0083732958895092e-05, "loss": 1.2275, "step": 625000 }, { "epoch": 1.49, "learning_rate": 1.0075799945262206e-05, "loss": 1.2324, "step": 625500 }, { "epoch": 1.49, "learning_rate": 1.0067866931629322e-05, "loss": 1.2136, "step": 626000 }, { "epoch": 1.49, "learning_rate": 1.0059933917996438e-05, "loss": 1.2246, "step": 626500 }, { "epoch": 1.49, "learning_rate": 1.0052000904363555e-05, "loss": 1.2136, "step": 627000 }, { "epoch": 1.49, "learning_rate": 1.0044067890730671e-05, "loss": 1.2296, "step": 627500 }, { "epoch": 1.49, "learning_rate": 1.0036134877097787e-05, "loss": 1.2207, "step": 628000 }, { "epoch": 1.5, "learning_rate": 1.0028201863464903e-05, "loss": 1.225, "step": 628500 }, { "epoch": 1.5, "learning_rate": 1.002026884983202e-05, "loss": 1.2222, "step": 629000 }, { "epoch": 1.5, "learning_rate": 1.0012335836199136e-05, "loss": 1.227, "step": 629500 }, { "epoch": 1.5, "learning_rate": 1.000440282256625e-05, "loss": 1.2424, "step": 630000 }, { "epoch": 1.5, "learning_rate": 9.996469808933368e-06, "loss": 1.2185, "step": 630500 }, { "epoch": 1.5, "learning_rate": 9.988536795300482e-06, "loss": 1.2252, "step": 631000 }, { "epoch": 1.5, "learning_rate": 9.9806037816676e-06, "loss": 1.2155, "step": 631500 }, { "epoch": 1.5, "learning_rate": 9.972670768034716e-06, "loss": 1.2481, "step": 632000 }, { "epoch": 1.51, "learning_rate": 9.964737754401832e-06, "loss": 1.2385, "step": 632500 }, { "epoch": 1.51, "learning_rate": 9.95680474076895e-06, "loss": 1.2393, "step": 633000 }, { "epoch": 1.51, "learning_rate": 9.948871727136063e-06, "loss": 1.2289, "step": 633500 }, { "epoch": 1.51, "learning_rate": 9.94093871350318e-06, "loss": 1.2412, "step": 634000 }, { "epoch": 1.51, "learning_rate": 9.933005699870297e-06, "loss": 1.2337, "step": 634500 }, { "epoch": 1.51, "learning_rate": 9.925072686237413e-06, "loss": 1.2283, "step": 635000 }, { "epoch": 1.51, "learning_rate": 9.917139672604528e-06, "loss": 1.2191, "step": 635500 }, { "epoch": 1.51, "learning_rate": 9.909206658971644e-06, "loss": 1.2281, "step": 636000 }, { "epoch": 1.51, "learning_rate": 9.90127364533876e-06, "loss": 1.2253, "step": 636500 }, { "epoch": 1.52, "learning_rate": 9.893340631705876e-06, "loss": 1.2368, "step": 637000 }, { "epoch": 1.52, "learning_rate": 9.885407618072994e-06, "loss": 1.221, "step": 637500 }, { "epoch": 1.52, "learning_rate": 9.877474604440108e-06, "loss": 1.2251, "step": 638000 }, { "epoch": 1.52, "learning_rate": 9.869541590807224e-06, "loss": 1.2191, "step": 638500 }, { "epoch": 1.52, "learning_rate": 9.861608577174341e-06, "loss": 1.2252, "step": 639000 }, { "epoch": 1.52, "learning_rate": 9.853675563541457e-06, "loss": 1.2376, "step": 639500 }, { "epoch": 1.52, "learning_rate": 9.845742549908573e-06, "loss": 1.2339, "step": 640000 }, { "epoch": 1.52, "learning_rate": 9.837809536275689e-06, "loss": 1.2308, "step": 640500 }, { "epoch": 1.53, "learning_rate": 9.829876522642805e-06, "loss": 1.2171, "step": 641000 }, { "epoch": 1.53, "learning_rate": 9.82194350900992e-06, "loss": 1.224, "step": 641500 }, { "epoch": 1.53, "learning_rate": 9.814010495377038e-06, "loss": 1.2415, "step": 642000 }, { "epoch": 1.53, "learning_rate": 9.806077481744152e-06, "loss": 1.2177, "step": 642500 }, { "epoch": 1.53, "learning_rate": 9.79814446811127e-06, "loss": 1.2285, "step": 643000 }, { "epoch": 1.53, "learning_rate": 9.790211454478386e-06, "loss": 1.2279, "step": 643500 }, { "epoch": 1.53, "learning_rate": 9.782278440845502e-06, "loss": 1.223, "step": 644000 }, { "epoch": 1.53, "learning_rate": 9.774345427212617e-06, "loss": 1.2278, "step": 644500 }, { "epoch": 1.54, "learning_rate": 9.766412413579733e-06, "loss": 1.2226, "step": 645000 }, { "epoch": 1.54, "learning_rate": 9.75847939994685e-06, "loss": 1.2169, "step": 645500 }, { "epoch": 1.54, "learning_rate": 9.750546386313967e-06, "loss": 1.216, "step": 646000 }, { "epoch": 1.54, "learning_rate": 9.742613372681081e-06, "loss": 1.2381, "step": 646500 }, { "epoch": 1.54, "learning_rate": 9.734680359048197e-06, "loss": 1.2404, "step": 647000 }, { "epoch": 1.54, "learning_rate": 9.726747345415314e-06, "loss": 1.2249, "step": 647500 }, { "epoch": 1.54, "learning_rate": 9.71881433178243e-06, "loss": 1.2213, "step": 648000 }, { "epoch": 1.54, "learning_rate": 9.710881318149546e-06, "loss": 1.2233, "step": 648500 }, { "epoch": 1.54, "learning_rate": 9.702948304516662e-06, "loss": 1.2286, "step": 649000 }, { "epoch": 1.55, "learning_rate": 9.695015290883778e-06, "loss": 1.2079, "step": 649500 }, { "epoch": 1.55, "learning_rate": 9.687082277250894e-06, "loss": 1.2364, "step": 650000 }, { "epoch": 1.55, "learning_rate": 9.679149263618011e-06, "loss": 1.2281, "step": 650500 }, { "epoch": 1.55, "learning_rate": 9.671216249985125e-06, "loss": 1.2287, "step": 651000 }, { "epoch": 1.55, "learning_rate": 9.663283236352243e-06, "loss": 1.2199, "step": 651500 }, { "epoch": 1.55, "learning_rate": 9.655350222719359e-06, "loss": 1.2197, "step": 652000 }, { "epoch": 1.55, "learning_rate": 9.647417209086475e-06, "loss": 1.2189, "step": 652500 }, { "epoch": 1.55, "learning_rate": 9.63948419545359e-06, "loss": 1.2235, "step": 653000 }, { "epoch": 1.56, "learning_rate": 9.631551181820706e-06, "loss": 1.2138, "step": 653500 }, { "epoch": 1.56, "learning_rate": 9.623618168187822e-06, "loss": 1.2133, "step": 654000 }, { "epoch": 1.56, "learning_rate": 9.61568515455494e-06, "loss": 1.2297, "step": 654500 }, { "epoch": 1.56, "learning_rate": 9.607752140922056e-06, "loss": 1.222, "step": 655000 }, { "epoch": 1.56, "learning_rate": 9.59981912728917e-06, "loss": 1.2225, "step": 655500 }, { "epoch": 1.56, "learning_rate": 9.591886113656287e-06, "loss": 1.214, "step": 656000 }, { "epoch": 1.56, "learning_rate": 9.583953100023403e-06, "loss": 1.2258, "step": 656500 }, { "epoch": 1.56, "learning_rate": 9.576020086390519e-06, "loss": 1.2062, "step": 657000 }, { "epoch": 1.56, "learning_rate": 9.568087072757635e-06, "loss": 1.2271, "step": 657500 }, { "epoch": 1.57, "learning_rate": 9.560154059124751e-06, "loss": 1.2179, "step": 658000 }, { "epoch": 1.57, "learning_rate": 9.552221045491867e-06, "loss": 1.2252, "step": 658500 }, { "epoch": 1.57, "learning_rate": 9.544288031858984e-06, "loss": 1.2442, "step": 659000 }, { "epoch": 1.57, "learning_rate": 9.5363550182261e-06, "loss": 1.2145, "step": 659500 }, { "epoch": 1.57, "learning_rate": 9.528422004593216e-06, "loss": 1.2347, "step": 660000 }, { "epoch": 1.57, "learning_rate": 9.520488990960332e-06, "loss": 1.2252, "step": 660500 }, { "epoch": 1.57, "learning_rate": 9.512555977327448e-06, "loss": 1.2191, "step": 661000 }, { "epoch": 1.57, "learning_rate": 9.504622963694564e-06, "loss": 1.2274, "step": 661500 }, { "epoch": 1.58, "learning_rate": 9.496689950061681e-06, "loss": 1.2219, "step": 662000 }, { "epoch": 1.58, "learning_rate": 9.488756936428795e-06, "loss": 1.2181, "step": 662500 }, { "epoch": 1.58, "learning_rate": 9.480823922795911e-06, "loss": 1.2131, "step": 663000 }, { "epoch": 1.58, "learning_rate": 9.472890909163029e-06, "loss": 1.2294, "step": 663500 }, { "epoch": 1.58, "learning_rate": 9.464957895530145e-06, "loss": 1.2429, "step": 664000 }, { "epoch": 1.58, "learning_rate": 9.45702488189726e-06, "loss": 1.2208, "step": 664500 }, { "epoch": 1.58, "learning_rate": 9.449091868264376e-06, "loss": 1.2231, "step": 665000 }, { "epoch": 1.58, "learning_rate": 9.441158854631492e-06, "loss": 1.2108, "step": 665500 }, { "epoch": 1.59, "learning_rate": 9.433225840998608e-06, "loss": 1.2158, "step": 666000 }, { "epoch": 1.59, "learning_rate": 9.425292827365726e-06, "loss": 1.2191, "step": 666500 }, { "epoch": 1.59, "learning_rate": 9.41735981373284e-06, "loss": 1.2236, "step": 667000 }, { "epoch": 1.59, "learning_rate": 9.409426800099957e-06, "loss": 1.2326, "step": 667500 }, { "epoch": 1.59, "learning_rate": 9.401493786467073e-06, "loss": 1.2071, "step": 668000 }, { "epoch": 1.59, "learning_rate": 9.393560772834189e-06, "loss": 1.2211, "step": 668500 }, { "epoch": 1.59, "learning_rate": 9.385627759201305e-06, "loss": 1.2259, "step": 669000 }, { "epoch": 1.59, "learning_rate": 9.377694745568421e-06, "loss": 1.2159, "step": 669500 }, { "epoch": 1.59, "learning_rate": 9.369761731935537e-06, "loss": 1.2172, "step": 670000 }, { "epoch": 1.6, "learning_rate": 9.361828718302654e-06, "loss": 1.2237, "step": 670500 }, { "epoch": 1.6, "learning_rate": 9.353895704669768e-06, "loss": 1.2084, "step": 671000 }, { "epoch": 1.6, "learning_rate": 9.345962691036884e-06, "loss": 1.2226, "step": 671500 }, { "epoch": 1.6, "learning_rate": 9.338029677404002e-06, "loss": 1.2274, "step": 672000 }, { "epoch": 1.6, "learning_rate": 9.330096663771118e-06, "loss": 1.2333, "step": 672500 }, { "epoch": 1.6, "learning_rate": 9.322163650138234e-06, "loss": 1.2306, "step": 673000 }, { "epoch": 1.6, "learning_rate": 9.31423063650535e-06, "loss": 1.2133, "step": 673500 }, { "epoch": 1.6, "learning_rate": 9.306297622872465e-06, "loss": 1.2164, "step": 674000 }, { "epoch": 1.61, "learning_rate": 9.298364609239581e-06, "loss": 1.2292, "step": 674500 }, { "epoch": 1.61, "learning_rate": 9.290431595606699e-06, "loss": 1.2242, "step": 675000 }, { "epoch": 1.61, "learning_rate": 9.282498581973813e-06, "loss": 1.2099, "step": 675500 }, { "epoch": 1.61, "learning_rate": 9.27456556834093e-06, "loss": 1.234, "step": 676000 }, { "epoch": 1.61, "learning_rate": 9.266632554708046e-06, "loss": 1.216, "step": 676500 }, { "epoch": 1.61, "learning_rate": 9.258699541075162e-06, "loss": 1.2322, "step": 677000 }, { "epoch": 1.61, "learning_rate": 9.250766527442278e-06, "loss": 1.2198, "step": 677500 }, { "epoch": 1.61, "learning_rate": 9.242833513809394e-06, "loss": 1.217, "step": 678000 }, { "epoch": 1.61, "learning_rate": 9.23490050017651e-06, "loss": 1.2182, "step": 678500 }, { "epoch": 1.62, "learning_rate": 9.226967486543626e-06, "loss": 1.2326, "step": 679000 }, { "epoch": 1.62, "learning_rate": 9.219034472910743e-06, "loss": 1.2211, "step": 679500 }, { "epoch": 1.62, "learning_rate": 9.211101459277857e-06, "loss": 1.2206, "step": 680000 }, { "epoch": 1.62, "learning_rate": 9.203168445644975e-06, "loss": 1.2312, "step": 680500 }, { "epoch": 1.62, "learning_rate": 9.19523543201209e-06, "loss": 1.2136, "step": 681000 }, { "epoch": 1.62, "learning_rate": 9.187302418379207e-06, "loss": 1.2123, "step": 681500 }, { "epoch": 1.62, "learning_rate": 9.179369404746323e-06, "loss": 1.2244, "step": 682000 }, { "epoch": 1.62, "learning_rate": 9.171436391113438e-06, "loss": 1.2188, "step": 682500 }, { "epoch": 1.63, "learning_rate": 9.163503377480554e-06, "loss": 1.2209, "step": 683000 }, { "epoch": 1.63, "learning_rate": 9.155570363847672e-06, "loss": 1.2239, "step": 683500 }, { "epoch": 1.63, "learning_rate": 9.147637350214788e-06, "loss": 1.2193, "step": 684000 }, { "epoch": 1.63, "learning_rate": 9.139704336581902e-06, "loss": 1.2076, "step": 684500 }, { "epoch": 1.63, "learning_rate": 9.13177132294902e-06, "loss": 1.2177, "step": 685000 }, { "epoch": 1.63, "learning_rate": 9.123838309316135e-06, "loss": 1.2086, "step": 685500 }, { "epoch": 1.63, "learning_rate": 9.115905295683251e-06, "loss": 1.2029, "step": 686000 }, { "epoch": 1.63, "learning_rate": 9.107972282050369e-06, "loss": 1.2213, "step": 686500 }, { "epoch": 1.63, "learning_rate": 9.100039268417483e-06, "loss": 1.2117, "step": 687000 }, { "epoch": 1.64, "learning_rate": 9.092106254784599e-06, "loss": 1.2133, "step": 687500 }, { "epoch": 1.64, "learning_rate": 9.084173241151716e-06, "loss": 1.2118, "step": 688000 }, { "epoch": 1.64, "learning_rate": 9.076240227518832e-06, "loss": 1.2065, "step": 688500 }, { "epoch": 1.64, "learning_rate": 9.068307213885948e-06, "loss": 1.2298, "step": 689000 }, { "epoch": 1.64, "learning_rate": 9.060374200253064e-06, "loss": 1.2306, "step": 689500 }, { "epoch": 1.64, "learning_rate": 9.05244118662018e-06, "loss": 1.2198, "step": 690000 }, { "epoch": 1.64, "learning_rate": 9.044508172987296e-06, "loss": 1.1961, "step": 690500 }, { "epoch": 1.64, "learning_rate": 9.036575159354412e-06, "loss": 1.2213, "step": 691000 }, { "epoch": 1.65, "learning_rate": 9.028642145721527e-06, "loss": 1.2063, "step": 691500 }, { "epoch": 1.65, "learning_rate": 9.020709132088645e-06, "loss": 1.2194, "step": 692000 }, { "epoch": 1.65, "learning_rate": 9.01277611845576e-06, "loss": 1.2069, "step": 692500 }, { "epoch": 1.65, "learning_rate": 9.004843104822877e-06, "loss": 1.2097, "step": 693000 }, { "epoch": 1.65, "learning_rate": 8.996910091189993e-06, "loss": 1.239, "step": 693500 }, { "epoch": 1.65, "learning_rate": 8.988977077557108e-06, "loss": 1.2312, "step": 694000 }, { "epoch": 1.65, "learning_rate": 8.981044063924224e-06, "loss": 1.2149, "step": 694500 }, { "epoch": 1.65, "learning_rate": 8.973111050291342e-06, "loss": 1.2192, "step": 695000 }, { "epoch": 1.66, "learning_rate": 8.965178036658456e-06, "loss": 1.212, "step": 695500 }, { "epoch": 1.66, "learning_rate": 8.957245023025572e-06, "loss": 1.2026, "step": 696000 }, { "epoch": 1.66, "learning_rate": 8.94931200939269e-06, "loss": 1.2288, "step": 696500 }, { "epoch": 1.66, "learning_rate": 8.941378995759805e-06, "loss": 1.2238, "step": 697000 }, { "epoch": 1.66, "learning_rate": 8.933445982126921e-06, "loss": 1.2216, "step": 697500 }, { "epoch": 1.66, "learning_rate": 8.925512968494037e-06, "loss": 1.2042, "step": 698000 }, { "epoch": 1.66, "learning_rate": 8.917579954861153e-06, "loss": 1.2156, "step": 698500 }, { "epoch": 1.66, "learning_rate": 8.909646941228269e-06, "loss": 1.2029, "step": 699000 }, { "epoch": 1.66, "learning_rate": 8.901713927595386e-06, "loss": 1.2186, "step": 699500 }, { "epoch": 1.67, "learning_rate": 8.8937809139625e-06, "loss": 1.2116, "step": 700000 }, { "epoch": 1.67, "learning_rate": 8.885847900329618e-06, "loss": 1.2051, "step": 700500 }, { "epoch": 1.67, "learning_rate": 8.877914886696734e-06, "loss": 1.2052, "step": 701000 }, { "epoch": 1.67, "learning_rate": 8.86998187306385e-06, "loss": 1.2049, "step": 701500 }, { "epoch": 1.67, "learning_rate": 8.862048859430966e-06, "loss": 1.2096, "step": 702000 }, { "epoch": 1.67, "learning_rate": 8.854115845798081e-06, "loss": 1.1989, "step": 702500 }, { "epoch": 1.67, "learning_rate": 8.846182832165197e-06, "loss": 1.2356, "step": 703000 }, { "epoch": 1.67, "learning_rate": 8.838249818532313e-06, "loss": 1.2042, "step": 703500 }, { "epoch": 1.68, "learning_rate": 8.83031680489943e-06, "loss": 1.2152, "step": 704000 }, { "epoch": 1.68, "learning_rate": 8.822383791266545e-06, "loss": 1.2113, "step": 704500 }, { "epoch": 1.68, "learning_rate": 8.814450777633663e-06, "loss": 1.218, "step": 705000 }, { "epoch": 1.68, "learning_rate": 8.806517764000778e-06, "loss": 1.2144, "step": 705500 }, { "epoch": 1.68, "learning_rate": 8.798584750367894e-06, "loss": 1.1974, "step": 706000 }, { "epoch": 1.68, "learning_rate": 8.79065173673501e-06, "loss": 1.2119, "step": 706500 }, { "epoch": 1.68, "learning_rate": 8.782718723102126e-06, "loss": 1.2106, "step": 707000 }, { "epoch": 1.68, "learning_rate": 8.774785709469242e-06, "loss": 1.2054, "step": 707500 }, { "epoch": 1.68, "learning_rate": 8.76685269583636e-06, "loss": 1.2112, "step": 708000 }, { "epoch": 1.69, "learning_rate": 8.758919682203475e-06, "loss": 1.1947, "step": 708500 }, { "epoch": 1.69, "learning_rate": 8.75098666857059e-06, "loss": 1.1973, "step": 709000 }, { "epoch": 1.69, "learning_rate": 8.743053654937707e-06, "loss": 1.2104, "step": 709500 }, { "epoch": 1.69, "learning_rate": 8.735120641304823e-06, "loss": 1.2197, "step": 710000 }, { "epoch": 1.69, "learning_rate": 8.727187627671939e-06, "loss": 1.2159, "step": 710500 }, { "epoch": 1.69, "learning_rate": 8.719254614039056e-06, "loss": 1.2197, "step": 711000 }, { "epoch": 1.69, "learning_rate": 8.71132160040617e-06, "loss": 1.2245, "step": 711500 }, { "epoch": 1.69, "learning_rate": 8.703388586773286e-06, "loss": 1.209, "step": 712000 }, { "epoch": 1.7, "learning_rate": 8.695455573140404e-06, "loss": 1.2146, "step": 712500 }, { "epoch": 1.7, "learning_rate": 8.68752255950752e-06, "loss": 1.2209, "step": 713000 }, { "epoch": 1.7, "learning_rate": 8.679589545874636e-06, "loss": 1.2176, "step": 713500 }, { "epoch": 1.7, "learning_rate": 8.671656532241751e-06, "loss": 1.1941, "step": 714000 }, { "epoch": 1.7, "learning_rate": 8.663723518608867e-06, "loss": 1.2102, "step": 714500 }, { "epoch": 1.7, "learning_rate": 8.655790504975983e-06, "loss": 1.211, "step": 715000 }, { "epoch": 1.7, "learning_rate": 8.647857491343099e-06, "loss": 1.2402, "step": 715500 }, { "epoch": 1.7, "learning_rate": 8.639924477710215e-06, "loss": 1.2039, "step": 716000 }, { "epoch": 1.71, "learning_rate": 8.631991464077332e-06, "loss": 1.2133, "step": 716500 }, { "epoch": 1.71, "learning_rate": 8.624058450444448e-06, "loss": 1.2174, "step": 717000 }, { "epoch": 1.71, "learning_rate": 8.616125436811564e-06, "loss": 1.2189, "step": 717500 }, { "epoch": 1.71, "learning_rate": 8.60819242317868e-06, "loss": 1.2109, "step": 718000 }, { "epoch": 1.71, "learning_rate": 8.600259409545796e-06, "loss": 1.2058, "step": 718500 }, { "epoch": 1.71, "learning_rate": 8.592326395912912e-06, "loss": 1.2209, "step": 719000 }, { "epoch": 1.71, "learning_rate": 8.584393382280028e-06, "loss": 1.2154, "step": 719500 }, { "epoch": 1.71, "learning_rate": 8.576460368647144e-06, "loss": 1.2254, "step": 720000 }, { "epoch": 1.71, "learning_rate": 8.56852735501426e-06, "loss": 1.2102, "step": 720500 }, { "epoch": 1.72, "learning_rate": 8.560594341381377e-06, "loss": 1.2163, "step": 721000 }, { "epoch": 1.72, "learning_rate": 8.552661327748493e-06, "loss": 1.2094, "step": 721500 }, { "epoch": 1.72, "learning_rate": 8.544728314115609e-06, "loss": 1.1982, "step": 722000 }, { "epoch": 1.72, "learning_rate": 8.536795300482725e-06, "loss": 1.2143, "step": 722500 }, { "epoch": 1.72, "learning_rate": 8.52886228684984e-06, "loss": 1.2127, "step": 723000 }, { "epoch": 1.72, "learning_rate": 8.520929273216956e-06, "loss": 1.2106, "step": 723500 }, { "epoch": 1.72, "learning_rate": 8.512996259584074e-06, "loss": 1.2127, "step": 724000 }, { "epoch": 1.72, "learning_rate": 8.505063245951188e-06, "loss": 1.2172, "step": 724500 }, { "epoch": 1.73, "learning_rate": 8.497130232318304e-06, "loss": 1.2115, "step": 725000 }, { "epoch": 1.73, "learning_rate": 8.489197218685421e-06, "loss": 1.1987, "step": 725500 }, { "epoch": 1.73, "learning_rate": 8.481264205052537e-06, "loss": 1.217, "step": 726000 }, { "epoch": 1.73, "learning_rate": 8.473331191419653e-06, "loss": 1.2072, "step": 726500 }, { "epoch": 1.73, "learning_rate": 8.465398177786769e-06, "loss": 1.2258, "step": 727000 }, { "epoch": 1.73, "learning_rate": 8.457465164153885e-06, "loss": 1.197, "step": 727500 }, { "epoch": 1.73, "learning_rate": 8.449532150521e-06, "loss": 1.2187, "step": 728000 }, { "epoch": 1.73, "learning_rate": 8.441599136888118e-06, "loss": 1.1952, "step": 728500 }, { "epoch": 1.73, "learning_rate": 8.433666123255232e-06, "loss": 1.1982, "step": 729000 }, { "epoch": 1.74, "learning_rate": 8.42573310962235e-06, "loss": 1.2218, "step": 729500 }, { "epoch": 1.74, "learning_rate": 8.417800095989466e-06, "loss": 1.214, "step": 730000 }, { "epoch": 1.74, "learning_rate": 8.409867082356582e-06, "loss": 1.2218, "step": 730500 }, { "epoch": 1.74, "learning_rate": 8.401934068723698e-06, "loss": 1.1943, "step": 731000 }, { "epoch": 1.74, "learning_rate": 8.394001055090814e-06, "loss": 1.2029, "step": 731500 }, { "epoch": 1.74, "learning_rate": 8.38606804145793e-06, "loss": 1.1989, "step": 732000 }, { "epoch": 1.74, "learning_rate": 8.378135027825047e-06, "loss": 1.2144, "step": 732500 }, { "epoch": 1.74, "learning_rate": 8.370202014192163e-06, "loss": 1.2149, "step": 733000 }, { "epoch": 1.75, "learning_rate": 8.362269000559277e-06, "loss": 1.1873, "step": 733500 }, { "epoch": 1.75, "learning_rate": 8.354335986926395e-06, "loss": 1.1982, "step": 734000 }, { "epoch": 1.75, "learning_rate": 8.34640297329351e-06, "loss": 1.1941, "step": 734500 }, { "epoch": 1.75, "learning_rate": 8.338469959660626e-06, "loss": 1.201, "step": 735000 }, { "epoch": 1.75, "learning_rate": 8.330536946027742e-06, "loss": 1.2132, "step": 735500 }, { "epoch": 1.75, "learning_rate": 8.322603932394858e-06, "loss": 1.2072, "step": 736000 }, { "epoch": 1.75, "learning_rate": 8.314670918761974e-06, "loss": 1.2003, "step": 736500 }, { "epoch": 1.75, "learning_rate": 8.306737905129091e-06, "loss": 1.2099, "step": 737000 }, { "epoch": 1.76, "learning_rate": 8.298804891496207e-06, "loss": 1.2126, "step": 737500 }, { "epoch": 1.76, "learning_rate": 8.290871877863323e-06, "loss": 1.2085, "step": 738000 }, { "epoch": 1.76, "learning_rate": 8.282938864230439e-06, "loss": 1.208, "step": 738500 }, { "epoch": 1.76, "learning_rate": 8.275005850597555e-06, "loss": 1.2003, "step": 739000 }, { "epoch": 1.76, "learning_rate": 8.26707283696467e-06, "loss": 1.2053, "step": 739500 }, { "epoch": 1.76, "learning_rate": 8.259139823331787e-06, "loss": 1.2025, "step": 740000 }, { "epoch": 1.76, "learning_rate": 8.251206809698902e-06, "loss": 1.2121, "step": 740500 }, { "epoch": 1.76, "learning_rate": 8.24327379606602e-06, "loss": 1.2019, "step": 741000 }, { "epoch": 1.76, "learning_rate": 8.235340782433136e-06, "loss": 1.2121, "step": 741500 }, { "epoch": 1.77, "learning_rate": 8.227407768800252e-06, "loss": 1.2208, "step": 742000 }, { "epoch": 1.77, "learning_rate": 8.219474755167368e-06, "loss": 1.2161, "step": 742500 }, { "epoch": 1.77, "learning_rate": 8.211541741534483e-06, "loss": 1.2203, "step": 743000 }, { "epoch": 1.77, "learning_rate": 8.2036087279016e-06, "loss": 1.2102, "step": 743500 }, { "epoch": 1.77, "learning_rate": 8.195675714268715e-06, "loss": 1.2069, "step": 744000 }, { "epoch": 1.77, "learning_rate": 8.187742700635831e-06, "loss": 1.2107, "step": 744500 }, { "epoch": 1.77, "learning_rate": 8.179809687002947e-06, "loss": 1.196, "step": 745000 }, { "epoch": 1.77, "learning_rate": 8.171876673370064e-06, "loss": 1.1884, "step": 745500 }, { "epoch": 1.78, "learning_rate": 8.16394365973718e-06, "loss": 1.2029, "step": 746000 }, { "epoch": 1.78, "learning_rate": 8.156010646104296e-06, "loss": 1.2077, "step": 746500 }, { "epoch": 1.78, "learning_rate": 8.148077632471412e-06, "loss": 1.2021, "step": 747000 }, { "epoch": 1.78, "learning_rate": 8.140144618838528e-06, "loss": 1.2097, "step": 747500 }, { "epoch": 1.78, "learning_rate": 8.132211605205644e-06, "loss": 1.2107, "step": 748000 }, { "epoch": 1.78, "learning_rate": 8.124278591572761e-06, "loss": 1.2107, "step": 748500 }, { "epoch": 1.78, "learning_rate": 8.116345577939876e-06, "loss": 1.2031, "step": 749000 }, { "epoch": 1.78, "learning_rate": 8.108412564306991e-06, "loss": 1.215, "step": 749500 }, { "epoch": 1.78, "learning_rate": 8.100479550674109e-06, "loss": 1.2002, "step": 750000 }, { "epoch": 1.79, "learning_rate": 8.092546537041225e-06, "loss": 1.2135, "step": 750500 }, { "epoch": 1.79, "learning_rate": 8.08461352340834e-06, "loss": 1.196, "step": 751000 }, { "epoch": 1.79, "learning_rate": 8.076680509775457e-06, "loss": 1.2067, "step": 751500 }, { "epoch": 1.79, "learning_rate": 8.068747496142572e-06, "loss": 1.2048, "step": 752000 }, { "epoch": 1.79, "learning_rate": 8.060814482509688e-06, "loss": 1.2176, "step": 752500 }, { "epoch": 1.79, "learning_rate": 8.052881468876806e-06, "loss": 1.2126, "step": 753000 }, { "epoch": 1.79, "learning_rate": 8.04494845524392e-06, "loss": 1.2093, "step": 753500 }, { "epoch": 1.79, "learning_rate": 8.037015441611038e-06, "loss": 1.206, "step": 754000 }, { "epoch": 1.8, "learning_rate": 8.029082427978153e-06, "loss": 1.1969, "step": 754500 }, { "epoch": 1.8, "learning_rate": 8.02114941434527e-06, "loss": 1.2162, "step": 755000 }, { "epoch": 1.8, "learning_rate": 8.013216400712385e-06, "loss": 1.1944, "step": 755500 }, { "epoch": 1.8, "learning_rate": 8.005283387079501e-06, "loss": 1.2028, "step": 756000 }, { "epoch": 1.8, "learning_rate": 7.997350373446617e-06, "loss": 1.1856, "step": 756500 }, { "epoch": 1.8, "learning_rate": 7.989417359813734e-06, "loss": 1.221, "step": 757000 }, { "epoch": 1.8, "learning_rate": 7.98148434618085e-06, "loss": 1.1955, "step": 757500 }, { "epoch": 1.8, "learning_rate": 7.973551332547965e-06, "loss": 1.1967, "step": 758000 }, { "epoch": 1.81, "learning_rate": 7.965618318915082e-06, "loss": 1.2161, "step": 758500 }, { "epoch": 1.81, "learning_rate": 7.957685305282198e-06, "loss": 1.203, "step": 759000 }, { "epoch": 1.81, "learning_rate": 7.949752291649314e-06, "loss": 1.2127, "step": 759500 }, { "epoch": 1.81, "learning_rate": 7.94181927801643e-06, "loss": 1.2106, "step": 760000 }, { "epoch": 1.81, "learning_rate": 7.933886264383546e-06, "loss": 1.2082, "step": 760500 }, { "epoch": 1.81, "learning_rate": 7.925953250750661e-06, "loss": 1.2047, "step": 761000 }, { "epoch": 1.81, "learning_rate": 7.918020237117779e-06, "loss": 1.1979, "step": 761500 }, { "epoch": 1.81, "learning_rate": 7.910087223484895e-06, "loss": 1.1888, "step": 762000 }, { "epoch": 1.81, "learning_rate": 7.90215420985201e-06, "loss": 1.2056, "step": 762500 }, { "epoch": 1.82, "learning_rate": 7.894221196219127e-06, "loss": 1.2081, "step": 763000 }, { "epoch": 1.82, "learning_rate": 7.886288182586242e-06, "loss": 1.183, "step": 763500 }, { "epoch": 1.82, "learning_rate": 7.878355168953358e-06, "loss": 1.1933, "step": 764000 }, { "epoch": 1.82, "learning_rate": 7.870422155320474e-06, "loss": 1.1922, "step": 764500 }, { "epoch": 1.82, "learning_rate": 7.86248914168759e-06, "loss": 1.2201, "step": 765000 }, { "epoch": 1.82, "learning_rate": 7.854556128054706e-06, "loss": 1.1971, "step": 765500 }, { "epoch": 1.82, "learning_rate": 7.846623114421823e-06, "loss": 1.205, "step": 766000 }, { "epoch": 1.82, "learning_rate": 7.83869010078894e-06, "loss": 1.2127, "step": 766500 }, { "epoch": 1.83, "learning_rate": 7.830757087156055e-06, "loss": 1.1925, "step": 767000 }, { "epoch": 1.83, "learning_rate": 7.822824073523171e-06, "loss": 1.2001, "step": 767500 }, { "epoch": 1.83, "learning_rate": 7.814891059890287e-06, "loss": 1.2059, "step": 768000 }, { "epoch": 1.83, "learning_rate": 7.806958046257403e-06, "loss": 1.1868, "step": 768500 }, { "epoch": 1.83, "learning_rate": 7.799025032624519e-06, "loss": 1.1982, "step": 769000 }, { "epoch": 1.83, "learning_rate": 7.791092018991634e-06, "loss": 1.1957, "step": 769500 }, { "epoch": 1.83, "learning_rate": 7.783159005358752e-06, "loss": 1.2069, "step": 770000 }, { "epoch": 1.83, "learning_rate": 7.775225991725868e-06, "loss": 1.191, "step": 770500 }, { "epoch": 1.83, "learning_rate": 7.767292978092984e-06, "loss": 1.2078, "step": 771000 }, { "epoch": 1.84, "learning_rate": 7.7593599644601e-06, "loss": 1.1973, "step": 771500 }, { "epoch": 1.84, "learning_rate": 7.751426950827216e-06, "loss": 1.1992, "step": 772000 }, { "epoch": 1.84, "learning_rate": 7.743493937194331e-06, "loss": 1.1951, "step": 772500 }, { "epoch": 1.84, "learning_rate": 7.735560923561449e-06, "loss": 1.1879, "step": 773000 }, { "epoch": 1.84, "learning_rate": 7.727627909928563e-06, "loss": 1.1847, "step": 773500 }, { "epoch": 1.84, "learning_rate": 7.719694896295679e-06, "loss": 1.2002, "step": 774000 }, { "epoch": 1.84, "learning_rate": 7.711761882662797e-06, "loss": 1.193, "step": 774500 }, { "epoch": 1.84, "learning_rate": 7.703828869029912e-06, "loss": 1.1985, "step": 775000 }, { "epoch": 1.85, "learning_rate": 7.695895855397028e-06, "loss": 1.1993, "step": 775500 }, { "epoch": 1.85, "learning_rate": 7.687962841764144e-06, "loss": 1.2016, "step": 776000 }, { "epoch": 1.85, "learning_rate": 7.68002982813126e-06, "loss": 1.2017, "step": 776500 }, { "epoch": 1.85, "learning_rate": 7.672096814498376e-06, "loss": 1.2132, "step": 777000 }, { "epoch": 1.85, "learning_rate": 7.664163800865493e-06, "loss": 1.2184, "step": 777500 }, { "epoch": 1.85, "learning_rate": 7.656230787232608e-06, "loss": 1.1996, "step": 778000 }, { "epoch": 1.85, "learning_rate": 7.648297773599725e-06, "loss": 1.2067, "step": 778500 }, { "epoch": 1.85, "learning_rate": 7.640364759966841e-06, "loss": 1.1858, "step": 779000 }, { "epoch": 1.86, "learning_rate": 7.632431746333957e-06, "loss": 1.1925, "step": 779500 }, { "epoch": 1.86, "learning_rate": 7.6244987327010736e-06, "loss": 1.1994, "step": 780000 }, { "epoch": 1.86, "learning_rate": 7.616565719068189e-06, "loss": 1.2115, "step": 780500 }, { "epoch": 1.86, "learning_rate": 7.6086327054353045e-06, "loss": 1.2029, "step": 781000 }, { "epoch": 1.86, "learning_rate": 7.600699691802421e-06, "loss": 1.202, "step": 781500 }, { "epoch": 1.86, "learning_rate": 7.592766678169538e-06, "loss": 1.1985, "step": 782000 }, { "epoch": 1.86, "learning_rate": 7.584833664536653e-06, "loss": 1.1924, "step": 782500 }, { "epoch": 1.86, "learning_rate": 7.576900650903769e-06, "loss": 1.199, "step": 783000 }, { "epoch": 1.86, "learning_rate": 7.5689676372708855e-06, "loss": 1.21, "step": 783500 }, { "epoch": 1.87, "learning_rate": 7.561034623638001e-06, "loss": 1.2042, "step": 784000 }, { "epoch": 1.87, "learning_rate": 7.553101610005117e-06, "loss": 1.202, "step": 784500 }, { "epoch": 1.87, "learning_rate": 7.545168596372233e-06, "loss": 1.1943, "step": 785000 }, { "epoch": 1.87, "learning_rate": 7.53723558273935e-06, "loss": 1.1947, "step": 785500 }, { "epoch": 1.87, "learning_rate": 7.529302569106466e-06, "loss": 1.2078, "step": 786000 }, { "epoch": 1.87, "learning_rate": 7.521369555473582e-06, "loss": 1.1961, "step": 786500 }, { "epoch": 1.87, "learning_rate": 7.513436541840697e-06, "loss": 1.1909, "step": 787000 }, { "epoch": 1.87, "learning_rate": 7.505503528207814e-06, "loss": 1.1897, "step": 787500 }, { "epoch": 1.88, "learning_rate": 7.49757051457493e-06, "loss": 1.1851, "step": 788000 }, { "epoch": 1.88, "learning_rate": 7.489637500942047e-06, "loss": 1.1985, "step": 788500 }, { "epoch": 1.88, "learning_rate": 7.481704487309162e-06, "loss": 1.2052, "step": 789000 }, { "epoch": 1.88, "learning_rate": 7.4737714736762775e-06, "loss": 1.1853, "step": 789500 }, { "epoch": 1.88, "learning_rate": 7.465838460043394e-06, "loss": 1.2105, "step": 790000 }, { "epoch": 1.88, "learning_rate": 7.45790544641051e-06, "loss": 1.2175, "step": 790500 }, { "epoch": 1.88, "learning_rate": 7.449972432777627e-06, "loss": 1.1808, "step": 791000 }, { "epoch": 1.88, "learning_rate": 7.442039419144742e-06, "loss": 1.1983, "step": 791500 }, { "epoch": 1.88, "learning_rate": 7.4341064055118586e-06, "loss": 1.2002, "step": 792000 }, { "epoch": 1.89, "learning_rate": 7.4261733918789744e-06, "loss": 1.2018, "step": 792500 }, { "epoch": 1.89, "learning_rate": 7.418240378246091e-06, "loss": 1.2046, "step": 793000 }, { "epoch": 1.89, "learning_rate": 7.410307364613206e-06, "loss": 1.2096, "step": 793500 }, { "epoch": 1.89, "learning_rate": 7.402374350980323e-06, "loss": 1.1899, "step": 794000 }, { "epoch": 1.89, "learning_rate": 7.394441337347439e-06, "loss": 1.2129, "step": 794500 }, { "epoch": 1.89, "learning_rate": 7.3865083237145554e-06, "loss": 1.2009, "step": 795000 }, { "epoch": 1.89, "learning_rate": 7.378575310081671e-06, "loss": 1.1873, "step": 795500 }, { "epoch": 1.89, "learning_rate": 7.370642296448786e-06, "loss": 1.1951, "step": 796000 }, { "epoch": 1.9, "learning_rate": 7.362709282815903e-06, "loss": 1.189, "step": 796500 }, { "epoch": 1.9, "learning_rate": 7.35477626918302e-06, "loss": 1.196, "step": 797000 }, { "epoch": 1.9, "learning_rate": 7.346843255550136e-06, "loss": 1.1971, "step": 797500 }, { "epoch": 1.9, "learning_rate": 7.338910241917251e-06, "loss": 1.1967, "step": 798000 }, { "epoch": 1.9, "learning_rate": 7.330977228284367e-06, "loss": 1.1899, "step": 798500 }, { "epoch": 1.9, "learning_rate": 7.323044214651483e-06, "loss": 1.1989, "step": 799000 }, { "epoch": 1.9, "learning_rate": 7.3151112010186e-06, "loss": 1.2093, "step": 799500 }, { "epoch": 1.9, "learning_rate": 7.307178187385716e-06, "loss": 1.1961, "step": 800000 }, { "epoch": 1.91, "learning_rate": 7.299245173752832e-06, "loss": 1.1913, "step": 800500 }, { "epoch": 1.91, "learning_rate": 7.2913121601199475e-06, "loss": 1.187, "step": 801000 }, { "epoch": 1.91, "learning_rate": 7.283379146487064e-06, "loss": 1.182, "step": 801500 }, { "epoch": 1.91, "learning_rate": 7.27544613285418e-06, "loss": 1.1837, "step": 802000 }, { "epoch": 1.91, "learning_rate": 7.267513119221296e-06, "loss": 1.1905, "step": 802500 }, { "epoch": 1.91, "learning_rate": 7.259580105588412e-06, "loss": 1.1965, "step": 803000 }, { "epoch": 1.91, "learning_rate": 7.2516470919555285e-06, "loss": 1.1928, "step": 803500 }, { "epoch": 1.91, "learning_rate": 7.243714078322644e-06, "loss": 1.1874, "step": 804000 }, { "epoch": 1.91, "learning_rate": 7.235781064689759e-06, "loss": 1.2034, "step": 804500 }, { "epoch": 1.92, "learning_rate": 7.227848051056876e-06, "loss": 1.1998, "step": 805000 }, { "epoch": 1.92, "learning_rate": 7.219915037423992e-06, "loss": 1.1794, "step": 805500 }, { "epoch": 1.92, "learning_rate": 7.211982023791109e-06, "loss": 1.2004, "step": 806000 }, { "epoch": 1.92, "learning_rate": 7.204049010158225e-06, "loss": 1.1982, "step": 806500 }, { "epoch": 1.92, "learning_rate": 7.1961159965253404e-06, "loss": 1.1876, "step": 807000 }, { "epoch": 1.92, "learning_rate": 7.188182982892456e-06, "loss": 1.2054, "step": 807500 }, { "epoch": 1.92, "learning_rate": 7.180249969259573e-06, "loss": 1.2039, "step": 808000 }, { "epoch": 1.92, "learning_rate": 7.172316955626689e-06, "loss": 1.1919, "step": 808500 }, { "epoch": 1.93, "learning_rate": 7.164383941993805e-06, "loss": 1.1786, "step": 809000 }, { "epoch": 1.93, "learning_rate": 7.156450928360921e-06, "loss": 1.1874, "step": 809500 }, { "epoch": 1.93, "learning_rate": 7.148517914728037e-06, "loss": 1.1925, "step": 810000 }, { "epoch": 1.93, "learning_rate": 7.140584901095153e-06, "loss": 1.2053, "step": 810500 }, { "epoch": 1.93, "learning_rate": 7.13265188746227e-06, "loss": 1.2005, "step": 811000 }, { "epoch": 1.93, "learning_rate": 7.124718873829385e-06, "loss": 1.204, "step": 811500 }, { "epoch": 1.93, "learning_rate": 7.116785860196501e-06, "loss": 1.214, "step": 812000 }, { "epoch": 1.93, "learning_rate": 7.1088528465636175e-06, "loss": 1.1993, "step": 812500 }, { "epoch": 1.93, "learning_rate": 7.100919832930734e-06, "loss": 1.1895, "step": 813000 }, { "epoch": 1.94, "learning_rate": 7.092986819297849e-06, "loss": 1.1815, "step": 813500 }, { "epoch": 1.94, "learning_rate": 7.085053805664965e-06, "loss": 1.1974, "step": 814000 }, { "epoch": 1.94, "learning_rate": 7.077120792032082e-06, "loss": 1.1966, "step": 814500 }, { "epoch": 1.94, "learning_rate": 7.069187778399198e-06, "loss": 1.2029, "step": 815000 }, { "epoch": 1.94, "learning_rate": 7.061254764766314e-06, "loss": 1.2016, "step": 815500 }, { "epoch": 1.94, "learning_rate": 7.053321751133429e-06, "loss": 1.191, "step": 816000 }, { "epoch": 1.94, "learning_rate": 7.045388737500546e-06, "loss": 1.1918, "step": 816500 }, { "epoch": 1.94, "learning_rate": 7.037455723867662e-06, "loss": 1.1887, "step": 817000 }, { "epoch": 1.95, "learning_rate": 7.029522710234779e-06, "loss": 1.2043, "step": 817500 }, { "epoch": 1.95, "learning_rate": 7.021589696601894e-06, "loss": 1.1876, "step": 818000 }, { "epoch": 1.95, "learning_rate": 7.01365668296901e-06, "loss": 1.1809, "step": 818500 }, { "epoch": 1.95, "learning_rate": 7.005723669336126e-06, "loss": 1.1993, "step": 819000 }, { "epoch": 1.95, "learning_rate": 6.997790655703243e-06, "loss": 1.1977, "step": 819500 }, { "epoch": 1.95, "learning_rate": 6.989857642070359e-06, "loss": 1.1895, "step": 820000 }, { "epoch": 1.95, "learning_rate": 6.981924628437474e-06, "loss": 1.1873, "step": 820500 }, { "epoch": 1.95, "learning_rate": 6.973991614804591e-06, "loss": 1.1772, "step": 821000 }, { "epoch": 1.96, "learning_rate": 6.9660586011717065e-06, "loss": 1.1851, "step": 821500 }, { "epoch": 1.96, "learning_rate": 6.958125587538823e-06, "loss": 1.1978, "step": 822000 }, { "epoch": 1.96, "learning_rate": 6.950192573905938e-06, "loss": 1.1879, "step": 822500 }, { "epoch": 1.96, "learning_rate": 6.942259560273055e-06, "loss": 1.1934, "step": 823000 }, { "epoch": 1.96, "learning_rate": 6.934326546640171e-06, "loss": 1.1992, "step": 823500 }, { "epoch": 1.96, "learning_rate": 6.9263935330072875e-06, "loss": 1.1991, "step": 824000 }, { "epoch": 1.96, "learning_rate": 6.918460519374403e-06, "loss": 1.2042, "step": 824500 }, { "epoch": 1.96, "learning_rate": 6.910527505741519e-06, "loss": 1.1911, "step": 825000 }, { "epoch": 1.96, "learning_rate": 6.902594492108635e-06, "loss": 1.1863, "step": 825500 }, { "epoch": 1.97, "learning_rate": 6.894661478475752e-06, "loss": 1.1915, "step": 826000 }, { "epoch": 1.97, "learning_rate": 6.886728464842868e-06, "loss": 1.1803, "step": 826500 }, { "epoch": 1.97, "learning_rate": 6.878795451209983e-06, "loss": 1.1952, "step": 827000 }, { "epoch": 1.97, "learning_rate": 6.870862437577099e-06, "loss": 1.178, "step": 827500 }, { "epoch": 1.97, "learning_rate": 6.862929423944216e-06, "loss": 1.1832, "step": 828000 }, { "epoch": 1.97, "learning_rate": 6.854996410311332e-06, "loss": 1.2055, "step": 828500 }, { "epoch": 1.97, "learning_rate": 6.847063396678447e-06, "loss": 1.1833, "step": 829000 }, { "epoch": 1.97, "learning_rate": 6.839130383045564e-06, "loss": 1.1813, "step": 829500 }, { "epoch": 1.98, "learning_rate": 6.8311973694126795e-06, "loss": 1.1941, "step": 830000 }, { "epoch": 1.98, "learning_rate": 6.823264355779796e-06, "loss": 1.1762, "step": 830500 }, { "epoch": 1.98, "learning_rate": 6.815331342146912e-06, "loss": 1.1858, "step": 831000 }, { "epoch": 1.98, "learning_rate": 6.807398328514028e-06, "loss": 1.1913, "step": 831500 }, { "epoch": 1.98, "learning_rate": 6.799465314881144e-06, "loss": 1.1903, "step": 832000 }, { "epoch": 1.98, "learning_rate": 6.7915323012482606e-06, "loss": 1.2029, "step": 832500 }, { "epoch": 1.98, "learning_rate": 6.7835992876153764e-06, "loss": 1.175, "step": 833000 }, { "epoch": 1.98, "learning_rate": 6.775666273982492e-06, "loss": 1.2037, "step": 833500 }, { "epoch": 1.98, "learning_rate": 6.767733260349608e-06, "loss": 1.204, "step": 834000 }, { "epoch": 1.99, "learning_rate": 6.759800246716725e-06, "loss": 1.1814, "step": 834500 }, { "epoch": 1.99, "learning_rate": 6.751867233083841e-06, "loss": 1.1863, "step": 835000 }, { "epoch": 1.99, "learning_rate": 6.7439342194509574e-06, "loss": 1.1878, "step": 835500 }, { "epoch": 1.99, "learning_rate": 6.7360012058180725e-06, "loss": 1.1881, "step": 836000 }, { "epoch": 1.99, "learning_rate": 6.728068192185188e-06, "loss": 1.1876, "step": 836500 }, { "epoch": 1.99, "learning_rate": 6.720135178552305e-06, "loss": 1.1892, "step": 837000 }, { "epoch": 1.99, "learning_rate": 6.712202164919422e-06, "loss": 1.1922, "step": 837500 }, { "epoch": 1.99, "learning_rate": 6.704269151286537e-06, "loss": 1.1981, "step": 838000 }, { "epoch": 2.0, "learning_rate": 6.696336137653653e-06, "loss": 1.1833, "step": 838500 }, { "epoch": 2.0, "learning_rate": 6.688403124020769e-06, "loss": 1.1869, "step": 839000 }, { "epoch": 2.0, "learning_rate": 6.680470110387885e-06, "loss": 1.1834, "step": 839500 }, { "epoch": 2.0, "learning_rate": 6.672537096755002e-06, "loss": 1.1937, "step": 840000 }, { "epoch": 2.0, "eval_loss": 1.169049859046936, "eval_runtime": 3623.1597, "eval_samples_per_second": 366.405, "eval_steps_per_second": 22.9, "step": 840370 }, { "epoch": 2.0, "learning_rate": 6.664604083122117e-06, "loss": 1.1996, "step": 840500 }, { "epoch": 2.0, "learning_rate": 6.656671069489234e-06, "loss": 1.1678, "step": 841000 }, { "epoch": 2.0, "learning_rate": 6.6487380558563495e-06, "loss": 1.1902, "step": 841500 }, { "epoch": 2.0, "learning_rate": 6.640805042223466e-06, "loss": 1.1885, "step": 842000 }, { "epoch": 2.01, "learning_rate": 6.632872028590581e-06, "loss": 1.1788, "step": 842500 }, { "epoch": 2.01, "learning_rate": 6.624939014957697e-06, "loss": 1.1884, "step": 843000 }, { "epoch": 2.01, "learning_rate": 6.617006001324814e-06, "loss": 1.1748, "step": 843500 }, { "epoch": 2.01, "learning_rate": 6.6090729876919305e-06, "loss": 1.1931, "step": 844000 }, { "epoch": 2.01, "learning_rate": 6.601139974059046e-06, "loss": 1.1791, "step": 844500 }, { "epoch": 2.01, "learning_rate": 6.593206960426161e-06, "loss": 1.1721, "step": 845000 }, { "epoch": 2.01, "learning_rate": 6.585273946793278e-06, "loss": 1.1864, "step": 845500 }, { "epoch": 2.01, "learning_rate": 6.577340933160394e-06, "loss": 1.1891, "step": 846000 }, { "epoch": 2.01, "learning_rate": 6.569407919527511e-06, "loss": 1.1789, "step": 846500 }, { "epoch": 2.02, "learning_rate": 6.561474905894626e-06, "loss": 1.1773, "step": 847000 }, { "epoch": 2.02, "learning_rate": 6.5535418922617424e-06, "loss": 1.1967, "step": 847500 }, { "epoch": 2.02, "learning_rate": 6.545608878628858e-06, "loss": 1.1974, "step": 848000 }, { "epoch": 2.02, "learning_rate": 6.537675864995975e-06, "loss": 1.1686, "step": 848500 }, { "epoch": 2.02, "learning_rate": 6.52974285136309e-06, "loss": 1.1928, "step": 849000 }, { "epoch": 2.02, "learning_rate": 6.521809837730207e-06, "loss": 1.1865, "step": 849500 }, { "epoch": 2.02, "learning_rate": 6.513876824097323e-06, "loss": 1.1695, "step": 850000 }, { "epoch": 2.02, "learning_rate": 6.505943810464439e-06, "loss": 1.1982, "step": 850500 }, { "epoch": 2.03, "learning_rate": 6.498010796831555e-06, "loss": 1.1748, "step": 851000 }, { "epoch": 2.03, "learning_rate": 6.49007778319867e-06, "loss": 1.1837, "step": 851500 }, { "epoch": 2.03, "learning_rate": 6.482144769565787e-06, "loss": 1.1869, "step": 852000 }, { "epoch": 2.03, "learning_rate": 6.474211755932903e-06, "loss": 1.178, "step": 852500 }, { "epoch": 2.03, "learning_rate": 6.4662787423000195e-06, "loss": 1.1664, "step": 853000 }, { "epoch": 2.03, "learning_rate": 6.4583457286671345e-06, "loss": 1.1672, "step": 853500 }, { "epoch": 2.03, "learning_rate": 6.450412715034251e-06, "loss": 1.1839, "step": 854000 }, { "epoch": 2.03, "learning_rate": 6.442479701401367e-06, "loss": 1.1904, "step": 854500 }, { "epoch": 2.03, "learning_rate": 6.434546687768484e-06, "loss": 1.1913, "step": 855000 }, { "epoch": 2.04, "learning_rate": 6.4266136741356e-06, "loss": 1.1941, "step": 855500 }, { "epoch": 2.04, "learning_rate": 6.4186806605027155e-06, "loss": 1.1875, "step": 856000 }, { "epoch": 2.04, "learning_rate": 6.410747646869831e-06, "loss": 1.1772, "step": 856500 }, { "epoch": 2.04, "learning_rate": 6.402814633236948e-06, "loss": 1.2005, "step": 857000 }, { "epoch": 2.04, "learning_rate": 6.394881619604064e-06, "loss": 1.1729, "step": 857500 }, { "epoch": 2.04, "learning_rate": 6.386948605971179e-06, "loss": 1.163, "step": 858000 }, { "epoch": 2.04, "learning_rate": 6.379015592338296e-06, "loss": 1.1781, "step": 858500 }, { "epoch": 2.04, "learning_rate": 6.371082578705412e-06, "loss": 1.1802, "step": 859000 }, { "epoch": 2.05, "learning_rate": 6.363149565072528e-06, "loss": 1.1739, "step": 859500 }, { "epoch": 2.05, "learning_rate": 6.355216551439645e-06, "loss": 1.1935, "step": 860000 }, { "epoch": 2.05, "learning_rate": 6.34728353780676e-06, "loss": 1.1873, "step": 860500 }, { "epoch": 2.05, "learning_rate": 6.339350524173876e-06, "loss": 1.1752, "step": 861000 }, { "epoch": 2.05, "learning_rate": 6.331417510540993e-06, "loss": 1.1839, "step": 861500 }, { "epoch": 2.05, "learning_rate": 6.3234844969081084e-06, "loss": 1.185, "step": 862000 }, { "epoch": 2.05, "learning_rate": 6.315551483275224e-06, "loss": 1.1716, "step": 862500 }, { "epoch": 2.05, "learning_rate": 6.30761846964234e-06, "loss": 1.1794, "step": 863000 }, { "epoch": 2.06, "learning_rate": 6.299685456009457e-06, "loss": 1.1826, "step": 863500 }, { "epoch": 2.06, "learning_rate": 6.291752442376573e-06, "loss": 1.1728, "step": 864000 }, { "epoch": 2.06, "learning_rate": 6.2838194287436895e-06, "loss": 1.1873, "step": 864500 }, { "epoch": 2.06, "learning_rate": 6.2758864151108045e-06, "loss": 1.1739, "step": 865000 }, { "epoch": 2.06, "learning_rate": 6.267953401477921e-06, "loss": 1.1873, "step": 865500 }, { "epoch": 2.06, "learning_rate": 6.260020387845037e-06, "loss": 1.1794, "step": 866000 }, { "epoch": 2.06, "learning_rate": 6.252087374212154e-06, "loss": 1.175, "step": 866500 }, { "epoch": 2.06, "learning_rate": 6.244154360579269e-06, "loss": 1.1816, "step": 867000 }, { "epoch": 2.06, "learning_rate": 6.236221346946385e-06, "loss": 1.1868, "step": 867500 }, { "epoch": 2.07, "learning_rate": 6.228288333313501e-06, "loss": 1.1741, "step": 868000 }, { "epoch": 2.07, "learning_rate": 6.220355319680618e-06, "loss": 1.1915, "step": 868500 }, { "epoch": 2.07, "learning_rate": 6.212422306047734e-06, "loss": 1.1641, "step": 869000 }, { "epoch": 2.07, "learning_rate": 6.204489292414849e-06, "loss": 1.1909, "step": 869500 }, { "epoch": 2.07, "learning_rate": 6.196556278781966e-06, "loss": 1.181, "step": 870000 }, { "epoch": 2.07, "learning_rate": 6.1886232651490815e-06, "loss": 1.1886, "step": 870500 }, { "epoch": 2.07, "learning_rate": 6.180690251516198e-06, "loss": 1.194, "step": 871000 }, { "epoch": 2.07, "learning_rate": 6.172757237883313e-06, "loss": 1.1887, "step": 871500 }, { "epoch": 2.08, "learning_rate": 6.16482422425043e-06, "loss": 1.1794, "step": 872000 }, { "epoch": 2.08, "learning_rate": 6.156891210617546e-06, "loss": 1.1913, "step": 872500 }, { "epoch": 2.08, "learning_rate": 6.1489581969846626e-06, "loss": 1.1855, "step": 873000 }, { "epoch": 2.08, "learning_rate": 6.1410251833517776e-06, "loss": 1.1831, "step": 873500 }, { "epoch": 2.08, "learning_rate": 6.133092169718894e-06, "loss": 1.1839, "step": 874000 }, { "epoch": 2.08, "learning_rate": 6.12515915608601e-06, "loss": 1.1855, "step": 874500 }, { "epoch": 2.08, "learning_rate": 6.117226142453127e-06, "loss": 1.1661, "step": 875000 }, { "epoch": 2.08, "learning_rate": 6.109293128820243e-06, "loss": 1.1778, "step": 875500 }, { "epoch": 2.08, "learning_rate": 6.101360115187358e-06, "loss": 1.1805, "step": 876000 }, { "epoch": 2.09, "learning_rate": 6.0934271015544745e-06, "loss": 1.1852, "step": 876500 }, { "epoch": 2.09, "learning_rate": 6.08549408792159e-06, "loss": 1.1708, "step": 877000 }, { "epoch": 2.09, "learning_rate": 6.077561074288707e-06, "loss": 1.1756, "step": 877500 }, { "epoch": 2.09, "learning_rate": 6.069628060655822e-06, "loss": 1.1901, "step": 878000 }, { "epoch": 2.09, "learning_rate": 6.061695047022939e-06, "loss": 1.1923, "step": 878500 }, { "epoch": 2.09, "learning_rate": 6.053762033390055e-06, "loss": 1.1793, "step": 879000 }, { "epoch": 2.09, "learning_rate": 6.045829019757171e-06, "loss": 1.1748, "step": 879500 }, { "epoch": 2.09, "learning_rate": 6.037896006124287e-06, "loss": 1.1789, "step": 880000 }, { "epoch": 2.1, "learning_rate": 6.029962992491403e-06, "loss": 1.1759, "step": 880500 }, { "epoch": 2.1, "learning_rate": 6.022029978858519e-06, "loss": 1.1656, "step": 881000 }, { "epoch": 2.1, "learning_rate": 6.014096965225636e-06, "loss": 1.1951, "step": 881500 }, { "epoch": 2.1, "learning_rate": 6.0061639515927515e-06, "loss": 1.1889, "step": 882000 }, { "epoch": 2.1, "learning_rate": 5.9982309379598665e-06, "loss": 1.1804, "step": 882500 }, { "epoch": 2.1, "learning_rate": 5.990297924326983e-06, "loss": 1.1807, "step": 883000 }, { "epoch": 2.1, "learning_rate": 5.982364910694099e-06, "loss": 1.1788, "step": 883500 }, { "epoch": 2.1, "learning_rate": 5.974431897061216e-06, "loss": 1.1859, "step": 884000 }, { "epoch": 2.11, "learning_rate": 5.9664988834283325e-06, "loss": 1.1857, "step": 884500 }, { "epoch": 2.11, "learning_rate": 5.9585658697954476e-06, "loss": 1.1705, "step": 885000 }, { "epoch": 2.11, "learning_rate": 5.950632856162563e-06, "loss": 1.1769, "step": 885500 }, { "epoch": 2.11, "learning_rate": 5.94269984252968e-06, "loss": 1.1752, "step": 886000 }, { "epoch": 2.11, "learning_rate": 5.934766828896796e-06, "loss": 1.172, "step": 886500 }, { "epoch": 2.11, "learning_rate": 5.926833815263912e-06, "loss": 1.1983, "step": 887000 }, { "epoch": 2.11, "learning_rate": 5.918900801631028e-06, "loss": 1.1735, "step": 887500 }, { "epoch": 2.11, "learning_rate": 5.9109677879981444e-06, "loss": 1.1638, "step": 888000 }, { "epoch": 2.11, "learning_rate": 5.90303477436526e-06, "loss": 1.1754, "step": 888500 }, { "epoch": 2.12, "learning_rate": 5.895101760732377e-06, "loss": 1.1755, "step": 889000 }, { "epoch": 2.12, "learning_rate": 5.887168747099492e-06, "loss": 1.1851, "step": 889500 }, { "epoch": 2.12, "learning_rate": 5.879235733466609e-06, "loss": 1.184, "step": 890000 }, { "epoch": 2.12, "learning_rate": 5.871302719833725e-06, "loss": 1.1843, "step": 890500 }, { "epoch": 2.12, "learning_rate": 5.863369706200841e-06, "loss": 1.1807, "step": 891000 }, { "epoch": 2.12, "learning_rate": 5.855436692567956e-06, "loss": 1.1601, "step": 891500 }, { "epoch": 2.12, "learning_rate": 5.847503678935072e-06, "loss": 1.185, "step": 892000 }, { "epoch": 2.12, "learning_rate": 5.839570665302189e-06, "loss": 1.1869, "step": 892500 }, { "epoch": 2.13, "learning_rate": 5.831637651669305e-06, "loss": 1.1678, "step": 893000 }, { "epoch": 2.13, "learning_rate": 5.823704638036421e-06, "loss": 1.1667, "step": 893500 }, { "epoch": 2.13, "learning_rate": 5.8157716244035365e-06, "loss": 1.1733, "step": 894000 }, { "epoch": 2.13, "learning_rate": 5.807838610770653e-06, "loss": 1.1803, "step": 894500 }, { "epoch": 2.13, "learning_rate": 5.799905597137769e-06, "loss": 1.177, "step": 895000 }, { "epoch": 2.13, "learning_rate": 5.791972583504886e-06, "loss": 1.1802, "step": 895500 }, { "epoch": 2.13, "learning_rate": 5.784039569872001e-06, "loss": 1.1784, "step": 896000 }, { "epoch": 2.13, "learning_rate": 5.7761065562391175e-06, "loss": 1.181, "step": 896500 }, { "epoch": 2.13, "learning_rate": 5.768173542606233e-06, "loss": 1.1664, "step": 897000 }, { "epoch": 2.14, "learning_rate": 5.76024052897335e-06, "loss": 1.1781, "step": 897500 }, { "epoch": 2.14, "learning_rate": 5.752307515340465e-06, "loss": 1.1857, "step": 898000 }, { "epoch": 2.14, "learning_rate": 5.744374501707581e-06, "loss": 1.1846, "step": 898500 }, { "epoch": 2.14, "learning_rate": 5.736441488074698e-06, "loss": 1.1582, "step": 899000 }, { "epoch": 2.14, "learning_rate": 5.728508474441814e-06, "loss": 1.1878, "step": 899500 }, { "epoch": 2.14, "learning_rate": 5.72057546080893e-06, "loss": 1.1752, "step": 900000 }, { "epoch": 2.14, "learning_rate": 5.712642447176045e-06, "loss": 1.1841, "step": 900500 }, { "epoch": 2.14, "learning_rate": 5.704709433543162e-06, "loss": 1.1902, "step": 901000 }, { "epoch": 2.15, "learning_rate": 5.696776419910278e-06, "loss": 1.1786, "step": 901500 }, { "epoch": 2.15, "learning_rate": 5.688843406277395e-06, "loss": 1.1815, "step": 902000 }, { "epoch": 2.15, "learning_rate": 5.68091039264451e-06, "loss": 1.193, "step": 902500 }, { "epoch": 2.15, "learning_rate": 5.672977379011626e-06, "loss": 1.1724, "step": 903000 }, { "epoch": 2.15, "learning_rate": 5.665044365378742e-06, "loss": 1.1818, "step": 903500 }, { "epoch": 2.15, "learning_rate": 5.657111351745859e-06, "loss": 1.1881, "step": 904000 }, { "epoch": 2.15, "learning_rate": 5.649178338112975e-06, "loss": 1.1733, "step": 904500 }, { "epoch": 2.15, "learning_rate": 5.641245324480091e-06, "loss": 1.1959, "step": 905000 }, { "epoch": 2.16, "learning_rate": 5.6333123108472065e-06, "loss": 1.1908, "step": 905500 }, { "epoch": 2.16, "learning_rate": 5.625379297214323e-06, "loss": 1.1697, "step": 906000 }, { "epoch": 2.16, "learning_rate": 5.617446283581439e-06, "loss": 1.1973, "step": 906500 }, { "epoch": 2.16, "learning_rate": 5.609513269948554e-06, "loss": 1.1845, "step": 907000 }, { "epoch": 2.16, "learning_rate": 5.601580256315671e-06, "loss": 1.1933, "step": 907500 }, { "epoch": 2.16, "learning_rate": 5.593647242682787e-06, "loss": 1.1832, "step": 908000 }, { "epoch": 2.16, "learning_rate": 5.585714229049903e-06, "loss": 1.1727, "step": 908500 }, { "epoch": 2.16, "learning_rate": 5.57778121541702e-06, "loss": 1.1759, "step": 909000 }, { "epoch": 2.16, "learning_rate": 5.569848201784135e-06, "loss": 1.162, "step": 909500 }, { "epoch": 2.17, "learning_rate": 5.561915188151251e-06, "loss": 1.1837, "step": 910000 }, { "epoch": 2.17, "learning_rate": 5.553982174518368e-06, "loss": 1.176, "step": 910500 }, { "epoch": 2.17, "learning_rate": 5.5460491608854835e-06, "loss": 1.1852, "step": 911000 }, { "epoch": 2.17, "learning_rate": 5.538116147252599e-06, "loss": 1.1746, "step": 911500 }, { "epoch": 2.17, "learning_rate": 5.530183133619715e-06, "loss": 1.1885, "step": 912000 }, { "epoch": 2.17, "learning_rate": 5.522250119986832e-06, "loss": 1.1591, "step": 912500 }, { "epoch": 2.17, "learning_rate": 5.514317106353948e-06, "loss": 1.1838, "step": 913000 }, { "epoch": 2.17, "learning_rate": 5.5063840927210646e-06, "loss": 1.1825, "step": 913500 }, { "epoch": 2.18, "learning_rate": 5.4984510790881796e-06, "loss": 1.1765, "step": 914000 }, { "epoch": 2.18, "learning_rate": 5.490518065455296e-06, "loss": 1.182, "step": 914500 }, { "epoch": 2.18, "learning_rate": 5.482585051822412e-06, "loss": 1.1725, "step": 915000 }, { "epoch": 2.18, "learning_rate": 5.474652038189529e-06, "loss": 1.1765, "step": 915500 }, { "epoch": 2.18, "learning_rate": 5.466719024556644e-06, "loss": 1.1798, "step": 916000 }, { "epoch": 2.18, "learning_rate": 5.45878601092376e-06, "loss": 1.1747, "step": 916500 }, { "epoch": 2.18, "learning_rate": 5.4508529972908765e-06, "loss": 1.1802, "step": 917000 }, { "epoch": 2.18, "learning_rate": 5.442919983657992e-06, "loss": 1.1751, "step": 917500 }, { "epoch": 2.18, "learning_rate": 5.434986970025108e-06, "loss": 1.1739, "step": 918000 }, { "epoch": 2.19, "learning_rate": 5.427053956392224e-06, "loss": 1.1917, "step": 918500 }, { "epoch": 2.19, "learning_rate": 5.419120942759341e-06, "loss": 1.177, "step": 919000 }, { "epoch": 2.19, "learning_rate": 5.411187929126457e-06, "loss": 1.1866, "step": 919500 }, { "epoch": 2.19, "learning_rate": 5.403254915493573e-06, "loss": 1.1553, "step": 920000 }, { "epoch": 2.19, "learning_rate": 5.395321901860688e-06, "loss": 1.1725, "step": 920500 }, { "epoch": 2.19, "learning_rate": 5.387388888227805e-06, "loss": 1.1702, "step": 921000 }, { "epoch": 2.19, "learning_rate": 5.379455874594921e-06, "loss": 1.1863, "step": 921500 }, { "epoch": 2.19, "learning_rate": 5.371522860962038e-06, "loss": 1.178, "step": 922000 }, { "epoch": 2.2, "learning_rate": 5.363589847329153e-06, "loss": 1.1789, "step": 922500 }, { "epoch": 2.2, "learning_rate": 5.3556568336962685e-06, "loss": 1.167, "step": 923000 }, { "epoch": 2.2, "learning_rate": 5.347723820063385e-06, "loss": 1.1732, "step": 923500 }, { "epoch": 2.2, "learning_rate": 5.339790806430501e-06, "loss": 1.1585, "step": 924000 }, { "epoch": 2.2, "learning_rate": 5.331857792797618e-06, "loss": 1.1692, "step": 924500 }, { "epoch": 2.2, "learning_rate": 5.323924779164733e-06, "loss": 1.1929, "step": 925000 }, { "epoch": 2.2, "learning_rate": 5.3159917655318495e-06, "loss": 1.1731, "step": 925500 }, { "epoch": 2.2, "learning_rate": 5.308058751898965e-06, "loss": 1.1743, "step": 926000 }, { "epoch": 2.2, "learning_rate": 5.300125738266082e-06, "loss": 1.1681, "step": 926500 }, { "epoch": 2.21, "learning_rate": 5.292192724633197e-06, "loss": 1.1739, "step": 927000 }, { "epoch": 2.21, "learning_rate": 5.284259711000314e-06, "loss": 1.1711, "step": 927500 }, { "epoch": 2.21, "learning_rate": 5.27632669736743e-06, "loss": 1.1751, "step": 928000 }, { "epoch": 2.21, "learning_rate": 5.2683936837345464e-06, "loss": 1.1826, "step": 928500 }, { "epoch": 2.21, "learning_rate": 5.260460670101662e-06, "loss": 1.1761, "step": 929000 }, { "epoch": 2.21, "learning_rate": 5.252527656468777e-06, "loss": 1.1812, "step": 929500 }, { "epoch": 2.21, "learning_rate": 5.244594642835894e-06, "loss": 1.1699, "step": 930000 }, { "epoch": 2.21, "learning_rate": 5.236661629203011e-06, "loss": 1.1799, "step": 930500 }, { "epoch": 2.22, "learning_rate": 5.228728615570127e-06, "loss": 1.1826, "step": 931000 }, { "epoch": 2.22, "learning_rate": 5.220795601937242e-06, "loss": 1.1762, "step": 931500 }, { "epoch": 2.22, "learning_rate": 5.212862588304358e-06, "loss": 1.1777, "step": 932000 }, { "epoch": 2.22, "learning_rate": 5.204929574671474e-06, "loss": 1.1801, "step": 932500 }, { "epoch": 2.22, "learning_rate": 5.196996561038591e-06, "loss": 1.1674, "step": 933000 }, { "epoch": 2.22, "learning_rate": 5.189063547405707e-06, "loss": 1.1584, "step": 933500 }, { "epoch": 2.22, "learning_rate": 5.181130533772823e-06, "loss": 1.1535, "step": 934000 }, { "epoch": 2.22, "learning_rate": 5.1731975201399385e-06, "loss": 1.1742, "step": 934500 }, { "epoch": 2.23, "learning_rate": 5.165264506507055e-06, "loss": 1.1859, "step": 935000 }, { "epoch": 2.23, "learning_rate": 5.157331492874171e-06, "loss": 1.1815, "step": 935500 }, { "epoch": 2.23, "learning_rate": 5.149398479241287e-06, "loss": 1.1693, "step": 936000 }, { "epoch": 2.23, "learning_rate": 5.141465465608403e-06, "loss": 1.169, "step": 936500 }, { "epoch": 2.23, "learning_rate": 5.1335324519755195e-06, "loss": 1.1595, "step": 937000 }, { "epoch": 2.23, "learning_rate": 5.125599438342635e-06, "loss": 1.1743, "step": 937500 }, { "epoch": 2.23, "learning_rate": 5.117666424709752e-06, "loss": 1.1576, "step": 938000 }, { "epoch": 2.23, "learning_rate": 5.109733411076867e-06, "loss": 1.1905, "step": 938500 }, { "epoch": 2.23, "learning_rate": 5.101800397443983e-06, "loss": 1.1705, "step": 939000 }, { "epoch": 2.24, "learning_rate": 5.0938673838111e-06, "loss": 1.1682, "step": 939500 }, { "epoch": 2.24, "learning_rate": 5.085934370178216e-06, "loss": 1.1637, "step": 940000 }, { "epoch": 2.24, "learning_rate": 5.078001356545331e-06, "loss": 1.1584, "step": 940500 }, { "epoch": 2.24, "learning_rate": 5.070068342912447e-06, "loss": 1.1787, "step": 941000 }, { "epoch": 2.24, "learning_rate": 5.062135329279564e-06, "loss": 1.1805, "step": 941500 }, { "epoch": 2.24, "learning_rate": 5.05420231564668e-06, "loss": 1.1684, "step": 942000 }, { "epoch": 2.24, "learning_rate": 5.046269302013796e-06, "loss": 1.1735, "step": 942500 }, { "epoch": 2.24, "learning_rate": 5.038336288380912e-06, "loss": 1.1674, "step": 943000 }, { "epoch": 2.25, "learning_rate": 5.030403274748028e-06, "loss": 1.1652, "step": 943500 }, { "epoch": 2.25, "learning_rate": 5.022470261115144e-06, "loss": 1.171, "step": 944000 }, { "epoch": 2.25, "learning_rate": 5.014537247482261e-06, "loss": 1.1813, "step": 944500 }, { "epoch": 2.25, "learning_rate": 5.006604233849376e-06, "loss": 1.1689, "step": 945000 }, { "epoch": 2.25, "learning_rate": 4.998671220216493e-06, "loss": 1.1807, "step": 945500 }, { "epoch": 2.25, "learning_rate": 4.9907382065836085e-06, "loss": 1.1766, "step": 946000 }, { "epoch": 2.25, "learning_rate": 4.982805192950724e-06, "loss": 1.1727, "step": 946500 }, { "epoch": 2.25, "learning_rate": 4.97487217931784e-06, "loss": 1.1717, "step": 947000 }, { "epoch": 2.25, "learning_rate": 4.966939165684956e-06, "loss": 1.1744, "step": 947500 }, { "epoch": 2.26, "learning_rate": 4.959006152052073e-06, "loss": 1.1776, "step": 948000 }, { "epoch": 2.26, "learning_rate": 4.951073138419189e-06, "loss": 1.1788, "step": 948500 }, { "epoch": 2.26, "learning_rate": 4.9431401247863045e-06, "loss": 1.1792, "step": 949000 }, { "epoch": 2.26, "learning_rate": 4.935207111153421e-06, "loss": 1.1628, "step": 949500 }, { "epoch": 2.26, "learning_rate": 4.927274097520537e-06, "loss": 1.176, "step": 950000 }, { "epoch": 2.26, "learning_rate": 4.919341083887653e-06, "loss": 1.1735, "step": 950500 }, { "epoch": 2.26, "learning_rate": 4.911408070254769e-06, "loss": 1.175, "step": 951000 }, { "epoch": 2.26, "learning_rate": 4.9034750566218855e-06, "loss": 1.1689, "step": 951500 }, { "epoch": 2.27, "learning_rate": 4.895542042989001e-06, "loss": 1.1726, "step": 952000 }, { "epoch": 2.27, "learning_rate": 4.887609029356117e-06, "loss": 1.1653, "step": 952500 }, { "epoch": 2.27, "learning_rate": 4.879676015723234e-06, "loss": 1.1516, "step": 953000 }, { "epoch": 2.27, "learning_rate": 4.87174300209035e-06, "loss": 1.1582, "step": 953500 }, { "epoch": 2.27, "learning_rate": 4.863809988457466e-06, "loss": 1.1732, "step": 954000 }, { "epoch": 2.27, "learning_rate": 4.8558769748245816e-06, "loss": 1.164, "step": 954500 }, { "epoch": 2.27, "learning_rate": 4.8479439611916974e-06, "loss": 1.1866, "step": 955000 }, { "epoch": 2.27, "learning_rate": 4.840010947558813e-06, "loss": 1.1639, "step": 955500 }, { "epoch": 2.28, "learning_rate": 4.83207793392593e-06, "loss": 1.1672, "step": 956000 }, { "epoch": 2.28, "learning_rate": 4.824144920293046e-06, "loss": 1.1631, "step": 956500 }, { "epoch": 2.28, "learning_rate": 4.816211906660162e-06, "loss": 1.1629, "step": 957000 }, { "epoch": 2.28, "learning_rate": 4.808278893027278e-06, "loss": 1.1709, "step": 957500 }, { "epoch": 2.28, "learning_rate": 4.800345879394394e-06, "loss": 1.1668, "step": 958000 }, { "epoch": 2.28, "learning_rate": 4.79241286576151e-06, "loss": 1.1825, "step": 958500 }, { "epoch": 2.28, "learning_rate": 4.784479852128626e-06, "loss": 1.1643, "step": 959000 }, { "epoch": 2.28, "learning_rate": 4.776546838495743e-06, "loss": 1.176, "step": 959500 }, { "epoch": 2.28, "learning_rate": 4.768613824862859e-06, "loss": 1.1738, "step": 960000 }, { "epoch": 2.29, "learning_rate": 4.7606808112299745e-06, "loss": 1.1737, "step": 960500 }, { "epoch": 2.29, "learning_rate": 4.75274779759709e-06, "loss": 1.1634, "step": 961000 }, { "epoch": 2.29, "learning_rate": 4.744814783964207e-06, "loss": 1.1711, "step": 961500 }, { "epoch": 2.29, "learning_rate": 4.736881770331322e-06, "loss": 1.1768, "step": 962000 }, { "epoch": 2.29, "learning_rate": 4.728948756698439e-06, "loss": 1.1614, "step": 962500 }, { "epoch": 2.29, "learning_rate": 4.7210157430655555e-06, "loss": 1.168, "step": 963000 }, { "epoch": 2.29, "learning_rate": 4.7130827294326705e-06, "loss": 1.1608, "step": 963500 }, { "epoch": 2.29, "learning_rate": 4.705149715799787e-06, "loss": 1.177, "step": 964000 }, { "epoch": 2.3, "learning_rate": 4.697216702166903e-06, "loss": 1.1692, "step": 964500 }, { "epoch": 2.3, "learning_rate": 4.689283688534019e-06, "loss": 1.1644, "step": 965000 }, { "epoch": 2.3, "learning_rate": 4.681350674901135e-06, "loss": 1.1563, "step": 965500 }, { "epoch": 2.3, "learning_rate": 4.6734176612682515e-06, "loss": 1.1629, "step": 966000 }, { "epoch": 2.3, "learning_rate": 4.665484647635367e-06, "loss": 1.1587, "step": 966500 }, { "epoch": 2.3, "learning_rate": 4.657551634002483e-06, "loss": 1.1762, "step": 967000 }, { "epoch": 2.3, "learning_rate": 4.649618620369599e-06, "loss": 1.1616, "step": 967500 }, { "epoch": 2.3, "learning_rate": 4.641685606736716e-06, "loss": 1.1682, "step": 968000 }, { "epoch": 2.3, "learning_rate": 4.633752593103832e-06, "loss": 1.1561, "step": 968500 }, { "epoch": 2.31, "learning_rate": 4.625819579470948e-06, "loss": 1.1736, "step": 969000 }, { "epoch": 2.31, "learning_rate": 4.617886565838064e-06, "loss": 1.161, "step": 969500 }, { "epoch": 2.31, "learning_rate": 4.609953552205179e-06, "loss": 1.1766, "step": 970000 }, { "epoch": 2.31, "learning_rate": 4.602020538572296e-06, "loss": 1.1677, "step": 970500 }, { "epoch": 2.31, "learning_rate": 4.594087524939412e-06, "loss": 1.1856, "step": 971000 }, { "epoch": 2.31, "learning_rate": 4.586154511306528e-06, "loss": 1.1703, "step": 971500 }, { "epoch": 2.31, "learning_rate": 4.578221497673644e-06, "loss": 1.1735, "step": 972000 }, { "epoch": 2.31, "learning_rate": 4.57028848404076e-06, "loss": 1.1751, "step": 972500 }, { "epoch": 2.32, "learning_rate": 4.562355470407876e-06, "loss": 1.1728, "step": 973000 }, { "epoch": 2.32, "learning_rate": 4.554422456774992e-06, "loss": 1.1845, "step": 973500 }, { "epoch": 2.32, "learning_rate": 4.546489443142109e-06, "loss": 1.1732, "step": 974000 }, { "epoch": 2.32, "learning_rate": 4.538556429509225e-06, "loss": 1.1657, "step": 974500 }, { "epoch": 2.32, "learning_rate": 4.5306234158763405e-06, "loss": 1.1675, "step": 975000 }, { "epoch": 2.32, "learning_rate": 4.522690402243456e-06, "loss": 1.1611, "step": 975500 }, { "epoch": 2.32, "learning_rate": 4.514757388610573e-06, "loss": 1.1631, "step": 976000 }, { "epoch": 2.32, "learning_rate": 4.506824374977689e-06, "loss": 1.1663, "step": 976500 }, { "epoch": 2.33, "learning_rate": 4.498891361344805e-06, "loss": 1.164, "step": 977000 }, { "epoch": 2.33, "learning_rate": 4.4909583477119215e-06, "loss": 1.1705, "step": 977500 }, { "epoch": 2.33, "learning_rate": 4.4830253340790365e-06, "loss": 1.1648, "step": 978000 }, { "epoch": 2.33, "learning_rate": 4.475092320446153e-06, "loss": 1.1627, "step": 978500 }, { "epoch": 2.33, "learning_rate": 4.467159306813269e-06, "loss": 1.1514, "step": 979000 }, { "epoch": 2.33, "learning_rate": 4.459226293180385e-06, "loss": 1.156, "step": 979500 }, { "epoch": 2.33, "learning_rate": 4.451293279547501e-06, "loss": 1.1699, "step": 980000 }, { "epoch": 2.33, "learning_rate": 4.4433602659146176e-06, "loss": 1.1801, "step": 980500 }, { "epoch": 2.33, "learning_rate": 4.435427252281733e-06, "loss": 1.1514, "step": 981000 }, { "epoch": 2.34, "learning_rate": 4.427494238648849e-06, "loss": 1.1701, "step": 981500 }, { "epoch": 2.34, "learning_rate": 4.419561225015965e-06, "loss": 1.1529, "step": 982000 }, { "epoch": 2.34, "learning_rate": 4.411628211383082e-06, "loss": 1.16, "step": 982500 }, { "epoch": 2.34, "learning_rate": 4.403695197750198e-06, "loss": 1.1811, "step": 983000 }, { "epoch": 2.34, "learning_rate": 4.395762184117314e-06, "loss": 1.1824, "step": 983500 }, { "epoch": 2.34, "learning_rate": 4.38782917048443e-06, "loss": 1.1603, "step": 984000 }, { "epoch": 2.34, "learning_rate": 4.379896156851546e-06, "loss": 1.1484, "step": 984500 }, { "epoch": 2.34, "learning_rate": 4.371963143218662e-06, "loss": 1.1672, "step": 985000 }, { "epoch": 2.35, "learning_rate": 4.364030129585778e-06, "loss": 1.172, "step": 985500 }, { "epoch": 2.35, "learning_rate": 4.356097115952895e-06, "loss": 1.1569, "step": 986000 }, { "epoch": 2.35, "learning_rate": 4.34816410232001e-06, "loss": 1.1726, "step": 986500 }, { "epoch": 2.35, "learning_rate": 4.340231088687126e-06, "loss": 1.1626, "step": 987000 }, { "epoch": 2.35, "learning_rate": 4.332298075054242e-06, "loss": 1.1794, "step": 987500 }, { "epoch": 2.35, "learning_rate": 4.324365061421358e-06, "loss": 1.1578, "step": 988000 }, { "epoch": 2.35, "learning_rate": 4.316432047788475e-06, "loss": 1.1586, "step": 988500 }, { "epoch": 2.35, "learning_rate": 4.308499034155591e-06, "loss": 1.1594, "step": 989000 }, { "epoch": 2.35, "learning_rate": 4.3005660205227065e-06, "loss": 1.1714, "step": 989500 }, { "epoch": 2.36, "learning_rate": 4.292633006889822e-06, "loss": 1.1657, "step": 990000 }, { "epoch": 2.36, "learning_rate": 4.284699993256939e-06, "loss": 1.1736, "step": 990500 }, { "epoch": 2.36, "learning_rate": 4.276766979624055e-06, "loss": 1.1616, "step": 991000 }, { "epoch": 2.36, "learning_rate": 4.268833965991171e-06, "loss": 1.1556, "step": 991500 }, { "epoch": 2.36, "learning_rate": 4.260900952358287e-06, "loss": 1.1796, "step": 992000 }, { "epoch": 2.36, "learning_rate": 4.252967938725403e-06, "loss": 1.1476, "step": 992500 }, { "epoch": 2.36, "learning_rate": 4.245034925092519e-06, "loss": 1.1616, "step": 993000 }, { "epoch": 2.36, "learning_rate": 4.237101911459635e-06, "loss": 1.1571, "step": 993500 }, { "epoch": 2.37, "learning_rate": 4.229168897826752e-06, "loss": 1.1862, "step": 994000 }, { "epoch": 2.37, "learning_rate": 4.221235884193867e-06, "loss": 1.1714, "step": 994500 }, { "epoch": 2.37, "learning_rate": 4.2133028705609836e-06, "loss": 1.1653, "step": 995000 }, { "epoch": 2.37, "learning_rate": 4.2053698569280994e-06, "loss": 1.159, "step": 995500 }, { "epoch": 2.37, "learning_rate": 4.197436843295215e-06, "loss": 1.1627, "step": 996000 }, { "epoch": 2.37, "learning_rate": 4.189503829662331e-06, "loss": 1.1754, "step": 996500 }, { "epoch": 2.37, "learning_rate": 4.181570816029448e-06, "loss": 1.1775, "step": 997000 }, { "epoch": 2.37, "learning_rate": 4.173637802396564e-06, "loss": 1.1588, "step": 997500 }, { "epoch": 2.38, "learning_rate": 4.16570478876368e-06, "loss": 1.1677, "step": 998000 }, { "epoch": 2.38, "learning_rate": 4.157771775130796e-06, "loss": 1.1699, "step": 998500 }, { "epoch": 2.38, "learning_rate": 4.149838761497912e-06, "loss": 1.1826, "step": 999000 }, { "epoch": 2.38, "learning_rate": 4.141905747865028e-06, "loss": 1.1611, "step": 999500 }, { "epoch": 2.38, "learning_rate": 4.133972734232144e-06, "loss": 1.1621, "step": 1000000 }, { "epoch": 2.38, "learning_rate": 4.126039720599261e-06, "loss": 1.1675, "step": 1000500 }, { "epoch": 2.38, "learning_rate": 4.118106706966376e-06, "loss": 1.1795, "step": 1001000 }, { "epoch": 2.38, "learning_rate": 4.110173693333492e-06, "loss": 1.1591, "step": 1001500 }, { "epoch": 2.38, "learning_rate": 4.102240679700608e-06, "loss": 1.1839, "step": 1002000 }, { "epoch": 2.39, "learning_rate": 4.094307666067724e-06, "loss": 1.1603, "step": 1002500 }, { "epoch": 2.39, "learning_rate": 4.086374652434841e-06, "loss": 1.1582, "step": 1003000 }, { "epoch": 2.39, "learning_rate": 4.078441638801957e-06, "loss": 1.1804, "step": 1003500 }, { "epoch": 2.39, "learning_rate": 4.0705086251690725e-06, "loss": 1.1517, "step": 1004000 }, { "epoch": 2.39, "learning_rate": 4.062575611536188e-06, "loss": 1.1675, "step": 1004500 }, { "epoch": 2.39, "learning_rate": 4.054642597903305e-06, "loss": 1.1616, "step": 1005000 }, { "epoch": 2.39, "learning_rate": 4.046709584270421e-06, "loss": 1.1809, "step": 1005500 }, { "epoch": 2.39, "learning_rate": 4.038776570637537e-06, "loss": 1.1575, "step": 1006000 }, { "epoch": 2.4, "learning_rate": 4.030843557004653e-06, "loss": 1.1477, "step": 1006500 }, { "epoch": 2.4, "learning_rate": 4.022910543371769e-06, "loss": 1.1603, "step": 1007000 }, { "epoch": 2.4, "learning_rate": 4.014977529738885e-06, "loss": 1.1656, "step": 1007500 }, { "epoch": 2.4, "learning_rate": 4.007044516106001e-06, "loss": 1.1509, "step": 1008000 }, { "epoch": 2.4, "learning_rate": 3.999111502473118e-06, "loss": 1.1531, "step": 1008500 }, { "epoch": 2.4, "learning_rate": 3.991178488840234e-06, "loss": 1.18, "step": 1009000 }, { "epoch": 2.4, "learning_rate": 3.9832454752073496e-06, "loss": 1.1605, "step": 1009500 }, { "epoch": 2.4, "learning_rate": 3.9753124615744654e-06, "loss": 1.1566, "step": 1010000 }, { "epoch": 2.4, "learning_rate": 3.967379447941581e-06, "loss": 1.1533, "step": 1010500 }, { "epoch": 2.41, "learning_rate": 3.959446434308697e-06, "loss": 1.172, "step": 1011000 }, { "epoch": 2.41, "learning_rate": 3.951513420675814e-06, "loss": 1.1685, "step": 1011500 }, { "epoch": 2.41, "learning_rate": 3.94358040704293e-06, "loss": 1.1672, "step": 1012000 }, { "epoch": 2.41, "learning_rate": 3.935647393410046e-06, "loss": 1.1782, "step": 1012500 }, { "epoch": 2.41, "learning_rate": 3.927714379777162e-06, "loss": 1.1696, "step": 1013000 }, { "epoch": 2.41, "learning_rate": 3.919781366144278e-06, "loss": 1.1686, "step": 1013500 }, { "epoch": 2.41, "learning_rate": 3.911848352511394e-06, "loss": 1.1603, "step": 1014000 }, { "epoch": 2.41, "learning_rate": 3.90391533887851e-06, "loss": 1.1643, "step": 1014500 }, { "epoch": 2.42, "learning_rate": 3.895982325245627e-06, "loss": 1.1517, "step": 1015000 }, { "epoch": 2.42, "learning_rate": 3.8880493116127425e-06, "loss": 1.1589, "step": 1015500 }, { "epoch": 2.42, "learning_rate": 3.880116297979858e-06, "loss": 1.1743, "step": 1016000 }, { "epoch": 2.42, "learning_rate": 3.872183284346974e-06, "loss": 1.162, "step": 1016500 }, { "epoch": 2.42, "learning_rate": 3.864250270714091e-06, "loss": 1.1791, "step": 1017000 }, { "epoch": 2.42, "learning_rate": 3.856317257081207e-06, "loss": 1.1741, "step": 1017500 }, { "epoch": 2.42, "learning_rate": 3.848384243448323e-06, "loss": 1.1521, "step": 1018000 }, { "epoch": 2.42, "learning_rate": 3.8404512298154385e-06, "loss": 1.1583, "step": 1018500 }, { "epoch": 2.43, "learning_rate": 3.832518216182554e-06, "loss": 1.1573, "step": 1019000 }, { "epoch": 2.43, "learning_rate": 3.824585202549671e-06, "loss": 1.1499, "step": 1019500 }, { "epoch": 2.43, "learning_rate": 3.816652188916787e-06, "loss": 1.1533, "step": 1020000 }, { "epoch": 2.43, "learning_rate": 3.8087191752839033e-06, "loss": 1.1518, "step": 1020500 }, { "epoch": 2.43, "learning_rate": 3.8007861616510187e-06, "loss": 1.1769, "step": 1021000 }, { "epoch": 2.43, "learning_rate": 3.792853148018135e-06, "loss": 1.1597, "step": 1021500 }, { "epoch": 2.43, "learning_rate": 3.7849201343852517e-06, "loss": 1.1713, "step": 1022000 }, { "epoch": 2.43, "learning_rate": 3.776987120752367e-06, "loss": 1.1516, "step": 1022500 }, { "epoch": 2.43, "learning_rate": 3.7690541071194834e-06, "loss": 1.1585, "step": 1023000 }, { "epoch": 2.44, "learning_rate": 3.7611210934865993e-06, "loss": 1.1438, "step": 1023500 }, { "epoch": 2.44, "learning_rate": 3.7531880798537156e-06, "loss": 1.1602, "step": 1024000 }, { "epoch": 2.44, "learning_rate": 3.7452550662208315e-06, "loss": 1.1579, "step": 1024500 }, { "epoch": 2.44, "learning_rate": 3.7373220525879477e-06, "loss": 1.1639, "step": 1025000 }, { "epoch": 2.44, "learning_rate": 3.7293890389550636e-06, "loss": 1.1612, "step": 1025500 }, { "epoch": 2.44, "learning_rate": 3.72145602532218e-06, "loss": 1.172, "step": 1026000 }, { "epoch": 2.44, "learning_rate": 3.7135230116892958e-06, "loss": 1.1449, "step": 1026500 }, { "epoch": 2.44, "learning_rate": 3.705589998056412e-06, "loss": 1.1701, "step": 1027000 }, { "epoch": 2.45, "learning_rate": 3.6976569844235283e-06, "loss": 1.1665, "step": 1027500 }, { "epoch": 2.45, "learning_rate": 3.689723970790644e-06, "loss": 1.1765, "step": 1028000 }, { "epoch": 2.45, "learning_rate": 3.6817909571577605e-06, "loss": 1.1455, "step": 1028500 }, { "epoch": 2.45, "learning_rate": 3.673857943524876e-06, "loss": 1.1789, "step": 1029000 }, { "epoch": 2.45, "learning_rate": 3.6659249298919926e-06, "loss": 1.1603, "step": 1029500 }, { "epoch": 2.45, "learning_rate": 3.657991916259108e-06, "loss": 1.1704, "step": 1030000 }, { "epoch": 2.45, "learning_rate": 3.6500589026262244e-06, "loss": 1.1654, "step": 1030500 }, { "epoch": 2.45, "learning_rate": 3.6421258889933402e-06, "loss": 1.1568, "step": 1031000 }, { "epoch": 2.45, "learning_rate": 3.6341928753604565e-06, "loss": 1.1505, "step": 1031500 }, { "epoch": 2.46, "learning_rate": 3.626259861727573e-06, "loss": 1.1562, "step": 1032000 }, { "epoch": 2.46, "learning_rate": 3.6183268480946887e-06, "loss": 1.163, "step": 1032500 }, { "epoch": 2.46, "learning_rate": 3.610393834461805e-06, "loss": 1.156, "step": 1033000 }, { "epoch": 2.46, "learning_rate": 3.602460820828921e-06, "loss": 1.1745, "step": 1033500 }, { "epoch": 2.46, "learning_rate": 3.594527807196037e-06, "loss": 1.1505, "step": 1034000 }, { "epoch": 2.46, "learning_rate": 3.586594793563153e-06, "loss": 1.1707, "step": 1034500 }, { "epoch": 2.46, "learning_rate": 3.5786617799302693e-06, "loss": 1.1594, "step": 1035000 }, { "epoch": 2.46, "learning_rate": 3.570728766297385e-06, "loss": 1.1612, "step": 1035500 }, { "epoch": 2.47, "learning_rate": 3.5627957526645014e-06, "loss": 1.1601, "step": 1036000 }, { "epoch": 2.47, "learning_rate": 3.554862739031617e-06, "loss": 1.1685, "step": 1036500 }, { "epoch": 2.47, "learning_rate": 3.5469297253987336e-06, "loss": 1.1675, "step": 1037000 }, { "epoch": 2.47, "learning_rate": 3.53899671176585e-06, "loss": 1.1472, "step": 1037500 }, { "epoch": 2.47, "learning_rate": 3.5310636981329653e-06, "loss": 1.1618, "step": 1038000 }, { "epoch": 2.47, "learning_rate": 3.5231306845000816e-06, "loss": 1.1558, "step": 1038500 }, { "epoch": 2.47, "learning_rate": 3.5151976708671975e-06, "loss": 1.1641, "step": 1039000 }, { "epoch": 2.47, "learning_rate": 3.5072646572343138e-06, "loss": 1.1571, "step": 1039500 }, { "epoch": 2.48, "learning_rate": 3.4993316436014296e-06, "loss": 1.1632, "step": 1040000 }, { "epoch": 2.48, "learning_rate": 3.491398629968546e-06, "loss": 1.1586, "step": 1040500 }, { "epoch": 2.48, "learning_rate": 3.4834656163356618e-06, "loss": 1.166, "step": 1041000 }, { "epoch": 2.48, "learning_rate": 3.475532602702778e-06, "loss": 1.1631, "step": 1041500 }, { "epoch": 2.48, "learning_rate": 3.4675995890698943e-06, "loss": 1.1777, "step": 1042000 }, { "epoch": 2.48, "learning_rate": 3.45966657543701e-06, "loss": 1.168, "step": 1042500 }, { "epoch": 2.48, "learning_rate": 3.4517335618041265e-06, "loss": 1.164, "step": 1043000 }, { "epoch": 2.48, "learning_rate": 3.4438005481712424e-06, "loss": 1.1374, "step": 1043500 }, { "epoch": 2.48, "learning_rate": 3.4358675345383587e-06, "loss": 1.1615, "step": 1044000 }, { "epoch": 2.49, "learning_rate": 3.427934520905474e-06, "loss": 1.1612, "step": 1044500 }, { "epoch": 2.49, "learning_rate": 3.420001507272591e-06, "loss": 1.1565, "step": 1045000 }, { "epoch": 2.49, "learning_rate": 3.4120684936397062e-06, "loss": 1.1597, "step": 1045500 }, { "epoch": 2.49, "learning_rate": 3.4041354800068225e-06, "loss": 1.1464, "step": 1046000 }, { "epoch": 2.49, "learning_rate": 3.3962024663739384e-06, "loss": 1.138, "step": 1046500 }, { "epoch": 2.49, "learning_rate": 3.3882694527410547e-06, "loss": 1.1457, "step": 1047000 }, { "epoch": 2.49, "learning_rate": 3.380336439108171e-06, "loss": 1.1774, "step": 1047500 }, { "epoch": 2.49, "learning_rate": 3.372403425475287e-06, "loss": 1.1501, "step": 1048000 }, { "epoch": 2.5, "learning_rate": 3.364470411842403e-06, "loss": 1.1656, "step": 1048500 }, { "epoch": 2.5, "learning_rate": 3.356537398209519e-06, "loss": 1.1614, "step": 1049000 }, { "epoch": 2.5, "learning_rate": 3.3486043845766353e-06, "loss": 1.1633, "step": 1049500 }, { "epoch": 2.5, "learning_rate": 3.340671370943751e-06, "loss": 1.1512, "step": 1050000 }, { "epoch": 2.5, "learning_rate": 3.3327383573108674e-06, "loss": 1.1414, "step": 1050500 }, { "epoch": 2.5, "learning_rate": 3.3248053436779833e-06, "loss": 1.1622, "step": 1051000 }, { "epoch": 2.5, "learning_rate": 3.3168723300450996e-06, "loss": 1.1565, "step": 1051500 }, { "epoch": 2.5, "learning_rate": 3.308939316412216e-06, "loss": 1.1566, "step": 1052000 }, { "epoch": 2.5, "learning_rate": 3.3010063027793317e-06, "loss": 1.1606, "step": 1052500 }, { "epoch": 2.51, "learning_rate": 3.293073289146448e-06, "loss": 1.1589, "step": 1053000 }, { "epoch": 2.51, "learning_rate": 3.2851402755135635e-06, "loss": 1.1599, "step": 1053500 }, { "epoch": 2.51, "learning_rate": 3.2772072618806798e-06, "loss": 1.1522, "step": 1054000 }, { "epoch": 2.51, "learning_rate": 3.2692742482477956e-06, "loss": 1.1559, "step": 1054500 }, { "epoch": 2.51, "learning_rate": 3.261341234614912e-06, "loss": 1.1468, "step": 1055000 }, { "epoch": 2.51, "learning_rate": 3.2534082209820278e-06, "loss": 1.1578, "step": 1055500 }, { "epoch": 2.51, "learning_rate": 3.245475207349144e-06, "loss": 1.1604, "step": 1056000 }, { "epoch": 2.51, "learning_rate": 3.2375421937162604e-06, "loss": 1.1723, "step": 1056500 }, { "epoch": 2.52, "learning_rate": 3.2296091800833762e-06, "loss": 1.155, "step": 1057000 }, { "epoch": 2.52, "learning_rate": 3.2216761664504925e-06, "loss": 1.1568, "step": 1057500 }, { "epoch": 2.52, "learning_rate": 3.2137431528176084e-06, "loss": 1.1568, "step": 1058000 }, { "epoch": 2.52, "learning_rate": 3.2058101391847247e-06, "loss": 1.1539, "step": 1058500 }, { "epoch": 2.52, "learning_rate": 3.1978771255518405e-06, "loss": 1.1582, "step": 1059000 }, { "epoch": 2.52, "learning_rate": 3.189944111918957e-06, "loss": 1.1531, "step": 1059500 }, { "epoch": 2.52, "learning_rate": 3.1820110982860723e-06, "loss": 1.1547, "step": 1060000 }, { "epoch": 2.52, "learning_rate": 3.174078084653189e-06, "loss": 1.1332, "step": 1060500 }, { "epoch": 2.53, "learning_rate": 3.1661450710203044e-06, "loss": 1.17, "step": 1061000 }, { "epoch": 2.53, "learning_rate": 3.1582120573874207e-06, "loss": 1.1534, "step": 1061500 }, { "epoch": 2.53, "learning_rate": 3.150279043754537e-06, "loss": 1.1561, "step": 1062000 }, { "epoch": 2.53, "learning_rate": 3.142346030121653e-06, "loss": 1.1375, "step": 1062500 }, { "epoch": 2.53, "learning_rate": 3.134413016488769e-06, "loss": 1.1432, "step": 1063000 }, { "epoch": 2.53, "learning_rate": 3.126480002855885e-06, "loss": 1.1522, "step": 1063500 }, { "epoch": 2.53, "learning_rate": 3.1185469892230013e-06, "loss": 1.1614, "step": 1064000 }, { "epoch": 2.53, "learning_rate": 3.110613975590117e-06, "loss": 1.1685, "step": 1064500 }, { "epoch": 2.53, "learning_rate": 3.1026809619572334e-06, "loss": 1.1501, "step": 1065000 }, { "epoch": 2.54, "learning_rate": 3.0947479483243493e-06, "loss": 1.1492, "step": 1065500 }, { "epoch": 2.54, "learning_rate": 3.0868149346914656e-06, "loss": 1.1668, "step": 1066000 }, { "epoch": 2.54, "learning_rate": 3.078881921058582e-06, "loss": 1.1526, "step": 1066500 }, { "epoch": 2.54, "learning_rate": 3.0709489074256978e-06, "loss": 1.1527, "step": 1067000 }, { "epoch": 2.54, "learning_rate": 3.063015893792814e-06, "loss": 1.1548, "step": 1067500 }, { "epoch": 2.54, "learning_rate": 3.05508288015993e-06, "loss": 1.1463, "step": 1068000 }, { "epoch": 2.54, "learning_rate": 3.047149866527046e-06, "loss": 1.1696, "step": 1068500 }, { "epoch": 2.54, "learning_rate": 3.0392168528941616e-06, "loss": 1.1562, "step": 1069000 }, { "epoch": 2.55, "learning_rate": 3.031283839261278e-06, "loss": 1.1556, "step": 1069500 }, { "epoch": 2.55, "learning_rate": 3.023350825628394e-06, "loss": 1.1552, "step": 1070000 }, { "epoch": 2.55, "learning_rate": 3.01541781199551e-06, "loss": 1.1538, "step": 1070500 }, { "epoch": 2.55, "learning_rate": 3.007484798362626e-06, "loss": 1.1532, "step": 1071000 }, { "epoch": 2.55, "learning_rate": 2.9995517847297422e-06, "loss": 1.1498, "step": 1071500 }, { "epoch": 2.55, "learning_rate": 2.9916187710968585e-06, "loss": 1.146, "step": 1072000 }, { "epoch": 2.55, "learning_rate": 2.9836857574639744e-06, "loss": 1.154, "step": 1072500 }, { "epoch": 2.55, "learning_rate": 2.9757527438310907e-06, "loss": 1.1584, "step": 1073000 }, { "epoch": 2.55, "learning_rate": 2.9678197301982065e-06, "loss": 1.1418, "step": 1073500 }, { "epoch": 2.56, "learning_rate": 2.959886716565323e-06, "loss": 1.1458, "step": 1074000 }, { "epoch": 2.56, "learning_rate": 2.9519537029324387e-06, "loss": 1.1503, "step": 1074500 }, { "epoch": 2.56, "learning_rate": 2.944020689299555e-06, "loss": 1.1475, "step": 1075000 }, { "epoch": 2.56, "learning_rate": 2.9360876756666704e-06, "loss": 1.1629, "step": 1075500 }, { "epoch": 2.56, "learning_rate": 2.928154662033787e-06, "loss": 1.1565, "step": 1076000 }, { "epoch": 2.56, "learning_rate": 2.9202216484009034e-06, "loss": 1.1727, "step": 1076500 }, { "epoch": 2.56, "learning_rate": 2.912288634768019e-06, "loss": 1.1451, "step": 1077000 }, { "epoch": 2.56, "learning_rate": 2.904355621135135e-06, "loss": 1.1485, "step": 1077500 }, { "epoch": 2.57, "learning_rate": 2.896422607502251e-06, "loss": 1.1484, "step": 1078000 }, { "epoch": 2.57, "learning_rate": 2.8884895938693673e-06, "loss": 1.1559, "step": 1078500 }, { "epoch": 2.57, "learning_rate": 2.880556580236483e-06, "loss": 1.1353, "step": 1079000 }, { "epoch": 2.57, "learning_rate": 2.8726235666035995e-06, "loss": 1.1427, "step": 1079500 }, { "epoch": 2.57, "learning_rate": 2.8646905529707153e-06, "loss": 1.1653, "step": 1080000 }, { "epoch": 2.57, "learning_rate": 2.8567575393378316e-06, "loss": 1.1541, "step": 1080500 }, { "epoch": 2.57, "learning_rate": 2.8488245257049475e-06, "loss": 1.1528, "step": 1081000 }, { "epoch": 2.57, "learning_rate": 2.8408915120720638e-06, "loss": 1.1531, "step": 1081500 }, { "epoch": 2.58, "learning_rate": 2.83295849843918e-06, "loss": 1.1671, "step": 1082000 }, { "epoch": 2.58, "learning_rate": 2.825025484806296e-06, "loss": 1.1541, "step": 1082500 }, { "epoch": 2.58, "learning_rate": 2.817092471173412e-06, "loss": 1.1451, "step": 1083000 }, { "epoch": 2.58, "learning_rate": 2.809159457540528e-06, "loss": 1.1609, "step": 1083500 }, { "epoch": 2.58, "learning_rate": 2.8012264439076444e-06, "loss": 1.1614, "step": 1084000 }, { "epoch": 2.58, "learning_rate": 2.79329343027476e-06, "loss": 1.1722, "step": 1084500 }, { "epoch": 2.58, "learning_rate": 2.785360416641876e-06, "loss": 1.1521, "step": 1085000 }, { "epoch": 2.58, "learning_rate": 2.777427403008992e-06, "loss": 1.1569, "step": 1085500 }, { "epoch": 2.58, "learning_rate": 2.7694943893761082e-06, "loss": 1.1467, "step": 1086000 }, { "epoch": 2.59, "learning_rate": 2.7615613757432245e-06, "loss": 1.1583, "step": 1086500 }, { "epoch": 2.59, "learning_rate": 2.7536283621103404e-06, "loss": 1.1665, "step": 1087000 }, { "epoch": 2.59, "learning_rate": 2.7456953484774567e-06, "loss": 1.1646, "step": 1087500 }, { "epoch": 2.59, "learning_rate": 2.7377623348445725e-06, "loss": 1.1541, "step": 1088000 }, { "epoch": 2.59, "learning_rate": 2.729829321211689e-06, "loss": 1.1588, "step": 1088500 }, { "epoch": 2.59, "learning_rate": 2.7218963075788047e-06, "loss": 1.1405, "step": 1089000 }, { "epoch": 2.59, "learning_rate": 2.713963293945921e-06, "loss": 1.1502, "step": 1089500 }, { "epoch": 2.59, "learning_rate": 2.706030280313037e-06, "loss": 1.1474, "step": 1090000 }, { "epoch": 2.6, "learning_rate": 2.698097266680153e-06, "loss": 1.1505, "step": 1090500 }, { "epoch": 2.6, "learning_rate": 2.6901642530472694e-06, "loss": 1.1579, "step": 1091000 }, { "epoch": 2.6, "learning_rate": 2.6822312394143853e-06, "loss": 1.1499, "step": 1091500 }, { "epoch": 2.6, "learning_rate": 2.6742982257815016e-06, "loss": 1.1668, "step": 1092000 }, { "epoch": 2.6, "learning_rate": 2.666365212148617e-06, "loss": 1.15, "step": 1092500 }, { "epoch": 2.6, "learning_rate": 2.6584321985157337e-06, "loss": 1.152, "step": 1093000 }, { "epoch": 2.6, "learning_rate": 2.650499184882849e-06, "loss": 1.1495, "step": 1093500 }, { "epoch": 2.6, "learning_rate": 2.6425661712499655e-06, "loss": 1.1559, "step": 1094000 }, { "epoch": 2.6, "learning_rate": 2.6346331576170813e-06, "loss": 1.1531, "step": 1094500 }, { "epoch": 2.61, "learning_rate": 2.6267001439841976e-06, "loss": 1.1509, "step": 1095000 }, { "epoch": 2.61, "learning_rate": 2.6187671303513135e-06, "loss": 1.1694, "step": 1095500 }, { "epoch": 2.61, "learning_rate": 2.6108341167184298e-06, "loss": 1.1527, "step": 1096000 }, { "epoch": 2.61, "learning_rate": 2.602901103085546e-06, "loss": 1.1524, "step": 1096500 }, { "epoch": 2.61, "learning_rate": 2.594968089452662e-06, "loss": 1.1582, "step": 1097000 }, { "epoch": 2.61, "learning_rate": 2.5870350758197782e-06, "loss": 1.1525, "step": 1097500 }, { "epoch": 2.61, "learning_rate": 2.579102062186894e-06, "loss": 1.1551, "step": 1098000 }, { "epoch": 2.61, "learning_rate": 2.5711690485540104e-06, "loss": 1.1551, "step": 1098500 }, { "epoch": 2.62, "learning_rate": 2.5632360349211262e-06, "loss": 1.1389, "step": 1099000 }, { "epoch": 2.62, "learning_rate": 2.5553030212882425e-06, "loss": 1.1448, "step": 1099500 }, { "epoch": 2.62, "learning_rate": 2.547370007655358e-06, "loss": 1.1498, "step": 1100000 }, { "epoch": 2.62, "learning_rate": 2.5394369940224743e-06, "loss": 1.1473, "step": 1100500 }, { "epoch": 2.62, "learning_rate": 2.531503980389591e-06, "loss": 1.1536, "step": 1101000 }, { "epoch": 2.62, "learning_rate": 2.5235709667567064e-06, "loss": 1.1498, "step": 1101500 }, { "epoch": 2.62, "learning_rate": 2.5156379531238227e-06, "loss": 1.1545, "step": 1102000 }, { "epoch": 2.62, "learning_rate": 2.5077049394909386e-06, "loss": 1.1536, "step": 1102500 }, { "epoch": 2.63, "learning_rate": 2.499771925858055e-06, "loss": 1.1677, "step": 1103000 }, { "epoch": 2.63, "learning_rate": 2.491838912225171e-06, "loss": 1.1516, "step": 1103500 }, { "epoch": 2.63, "learning_rate": 2.483905898592287e-06, "loss": 1.1645, "step": 1104000 }, { "epoch": 2.63, "learning_rate": 2.475972884959403e-06, "loss": 1.134, "step": 1104500 }, { "epoch": 2.63, "learning_rate": 2.468039871326519e-06, "loss": 1.1678, "step": 1105000 }, { "epoch": 2.63, "learning_rate": 2.460106857693635e-06, "loss": 1.1544, "step": 1105500 }, { "epoch": 2.63, "learning_rate": 2.4521738440607513e-06, "loss": 1.1343, "step": 1106000 }, { "epoch": 2.63, "learning_rate": 2.444240830427867e-06, "loss": 1.1478, "step": 1106500 }, { "epoch": 2.63, "learning_rate": 2.4363078167949835e-06, "loss": 1.152, "step": 1107000 }, { "epoch": 2.64, "learning_rate": 2.4283748031620993e-06, "loss": 1.1585, "step": 1107500 }, { "epoch": 2.64, "learning_rate": 2.420441789529215e-06, "loss": 1.1586, "step": 1108000 }, { "epoch": 2.64, "learning_rate": 2.412508775896332e-06, "loss": 1.1608, "step": 1108500 }, { "epoch": 2.64, "learning_rate": 2.4045757622634478e-06, "loss": 1.1614, "step": 1109000 }, { "epoch": 2.64, "learning_rate": 2.3966427486305636e-06, "loss": 1.1655, "step": 1109500 }, { "epoch": 2.64, "learning_rate": 2.38870973499768e-06, "loss": 1.162, "step": 1110000 }, { "epoch": 2.64, "learning_rate": 2.3807767213647958e-06, "loss": 1.157, "step": 1110500 }, { "epoch": 2.64, "learning_rate": 2.372843707731912e-06, "loss": 1.1472, "step": 1111000 }, { "epoch": 2.65, "learning_rate": 2.364910694099028e-06, "loss": 1.1376, "step": 1111500 }, { "epoch": 2.65, "learning_rate": 2.356977680466144e-06, "loss": 1.1512, "step": 1112000 }, { "epoch": 2.65, "learning_rate": 2.34904466683326e-06, "loss": 1.1498, "step": 1112500 }, { "epoch": 2.65, "learning_rate": 2.341111653200376e-06, "loss": 1.1588, "step": 1113000 }, { "epoch": 2.65, "learning_rate": 2.3331786395674922e-06, "loss": 1.1418, "step": 1113500 }, { "epoch": 2.65, "learning_rate": 2.3252456259346085e-06, "loss": 1.1655, "step": 1114000 }, { "epoch": 2.65, "learning_rate": 2.3173126123017244e-06, "loss": 1.151, "step": 1114500 }, { "epoch": 2.65, "learning_rate": 2.3093795986688407e-06, "loss": 1.141, "step": 1115000 }, { "epoch": 2.65, "learning_rate": 2.3014465850359566e-06, "loss": 1.1537, "step": 1115500 }, { "epoch": 2.66, "learning_rate": 2.2935135714030724e-06, "loss": 1.142, "step": 1116000 }, { "epoch": 2.66, "learning_rate": 2.2855805577701887e-06, "loss": 1.1444, "step": 1116500 }, { "epoch": 2.66, "learning_rate": 2.2776475441373046e-06, "loss": 1.1461, "step": 1117000 }, { "epoch": 2.66, "learning_rate": 2.269714530504421e-06, "loss": 1.1479, "step": 1117500 }, { "epoch": 2.66, "learning_rate": 2.2617815168715367e-06, "loss": 1.1634, "step": 1118000 }, { "epoch": 2.66, "learning_rate": 2.253848503238653e-06, "loss": 1.1528, "step": 1118500 }, { "epoch": 2.66, "learning_rate": 2.2459154896057693e-06, "loss": 1.1513, "step": 1119000 }, { "epoch": 2.66, "learning_rate": 2.237982475972885e-06, "loss": 1.1547, "step": 1119500 }, { "epoch": 2.67, "learning_rate": 2.2300494623400015e-06, "loss": 1.159, "step": 1120000 }, { "epoch": 2.67, "learning_rate": 2.2221164487071173e-06, "loss": 1.1388, "step": 1120500 }, { "epoch": 2.67, "learning_rate": 2.214183435074233e-06, "loss": 1.1616, "step": 1121000 }, { "epoch": 2.67, "learning_rate": 2.2062504214413495e-06, "loss": 1.1555, "step": 1121500 }, { "epoch": 2.67, "learning_rate": 2.1983174078084653e-06, "loss": 1.1521, "step": 1122000 }, { "epoch": 2.67, "learning_rate": 2.1903843941755816e-06, "loss": 1.1381, "step": 1122500 }, { "epoch": 2.67, "learning_rate": 2.1824513805426975e-06, "loss": 1.1495, "step": 1123000 }, { "epoch": 2.67, "learning_rate": 2.1745183669098138e-06, "loss": 1.1394, "step": 1123500 }, { "epoch": 2.68, "learning_rate": 2.16658535327693e-06, "loss": 1.1362, "step": 1124000 }, { "epoch": 2.68, "learning_rate": 2.158652339644046e-06, "loss": 1.1356, "step": 1124500 }, { "epoch": 2.68, "learning_rate": 2.150719326011162e-06, "loss": 1.1565, "step": 1125000 }, { "epoch": 2.68, "learning_rate": 2.142786312378278e-06, "loss": 1.1576, "step": 1125500 }, { "epoch": 2.68, "learning_rate": 2.134853298745394e-06, "loss": 1.1514, "step": 1126000 }, { "epoch": 2.68, "learning_rate": 2.1269202851125102e-06, "loss": 1.1508, "step": 1126500 }, { "epoch": 2.68, "learning_rate": 2.118987271479626e-06, "loss": 1.142, "step": 1127000 }, { "epoch": 2.68, "learning_rate": 2.111054257846742e-06, "loss": 1.1526, "step": 1127500 }, { "epoch": 2.68, "learning_rate": 2.1031212442138583e-06, "loss": 1.1457, "step": 1128000 }, { "epoch": 2.69, "learning_rate": 2.0951882305809745e-06, "loss": 1.1467, "step": 1128500 }, { "epoch": 2.69, "learning_rate": 2.0872552169480904e-06, "loss": 1.1466, "step": 1129000 }, { "epoch": 2.69, "learning_rate": 2.0793222033152067e-06, "loss": 1.1427, "step": 1129500 }, { "epoch": 2.69, "learning_rate": 2.0713891896823226e-06, "loss": 1.1412, "step": 1130000 }, { "epoch": 2.69, "learning_rate": 2.063456176049439e-06, "loss": 1.1154, "step": 1130500 }, { "epoch": 2.69, "learning_rate": 2.0555231624165547e-06, "loss": 1.1585, "step": 1131000 }, { "epoch": 2.69, "learning_rate": 2.047590148783671e-06, "loss": 1.1434, "step": 1131500 }, { "epoch": 2.69, "learning_rate": 2.039657135150787e-06, "loss": 1.1517, "step": 1132000 }, { "epoch": 2.7, "learning_rate": 2.0317241215179027e-06, "loss": 1.1624, "step": 1132500 }, { "epoch": 2.7, "learning_rate": 2.023791107885019e-06, "loss": 1.1429, "step": 1133000 }, { "epoch": 2.7, "learning_rate": 2.0158580942521353e-06, "loss": 1.1583, "step": 1133500 }, { "epoch": 2.7, "learning_rate": 2.007925080619251e-06, "loss": 1.1542, "step": 1134000 }, { "epoch": 2.7, "learning_rate": 1.9999920669863675e-06, "loss": 1.165, "step": 1134500 }, { "epoch": 2.7, "learning_rate": 1.9920590533534833e-06, "loss": 1.1383, "step": 1135000 }, { "epoch": 2.7, "learning_rate": 1.9841260397205996e-06, "loss": 1.1618, "step": 1135500 }, { "epoch": 2.7, "learning_rate": 1.9761930260877155e-06, "loss": 1.1569, "step": 1136000 }, { "epoch": 2.7, "learning_rate": 1.9682600124548313e-06, "loss": 1.1444, "step": 1136500 }, { "epoch": 2.71, "learning_rate": 1.9603269988219476e-06, "loss": 1.1771, "step": 1137000 }, { "epoch": 2.71, "learning_rate": 1.9523939851890635e-06, "loss": 1.1612, "step": 1137500 }, { "epoch": 2.71, "learning_rate": 1.94446097155618e-06, "loss": 1.1523, "step": 1138000 }, { "epoch": 2.71, "learning_rate": 1.936527957923296e-06, "loss": 1.1509, "step": 1138500 }, { "epoch": 2.71, "learning_rate": 1.928594944290412e-06, "loss": 1.1395, "step": 1139000 }, { "epoch": 2.71, "learning_rate": 1.9206619306575282e-06, "loss": 1.1454, "step": 1139500 }, { "epoch": 2.71, "learning_rate": 1.912728917024644e-06, "loss": 1.1569, "step": 1140000 }, { "epoch": 2.71, "learning_rate": 1.9047959033917602e-06, "loss": 1.1469, "step": 1140500 }, { "epoch": 2.72, "learning_rate": 1.8968628897588763e-06, "loss": 1.137, "step": 1141000 }, { "epoch": 2.72, "learning_rate": 1.8889298761259921e-06, "loss": 1.1388, "step": 1141500 }, { "epoch": 2.72, "learning_rate": 1.8809968624931082e-06, "loss": 1.1416, "step": 1142000 }, { "epoch": 2.72, "learning_rate": 1.8730638488602243e-06, "loss": 1.1556, "step": 1142500 }, { "epoch": 2.72, "learning_rate": 1.8651308352273406e-06, "loss": 1.1506, "step": 1143000 }, { "epoch": 2.72, "learning_rate": 1.8571978215944566e-06, "loss": 1.154, "step": 1143500 }, { "epoch": 2.72, "learning_rate": 1.8492648079615727e-06, "loss": 1.1644, "step": 1144000 }, { "epoch": 2.72, "learning_rate": 1.8413317943286888e-06, "loss": 1.1385, "step": 1144500 }, { "epoch": 2.72, "learning_rate": 1.8333987806958049e-06, "loss": 1.1396, "step": 1145000 }, { "epoch": 2.73, "learning_rate": 1.825465767062921e-06, "loss": 1.1317, "step": 1145500 }, { "epoch": 2.73, "learning_rate": 1.8175327534300368e-06, "loss": 1.1523, "step": 1146000 }, { "epoch": 2.73, "learning_rate": 1.8095997397971529e-06, "loss": 1.1546, "step": 1146500 }, { "epoch": 2.73, "learning_rate": 1.801666726164269e-06, "loss": 1.1568, "step": 1147000 }, { "epoch": 2.73, "learning_rate": 1.793733712531385e-06, "loss": 1.144, "step": 1147500 }, { "epoch": 2.73, "learning_rate": 1.7858006988985013e-06, "loss": 1.1573, "step": 1148000 }, { "epoch": 2.73, "learning_rate": 1.7778676852656174e-06, "loss": 1.1468, "step": 1148500 }, { "epoch": 2.73, "learning_rate": 1.7699346716327335e-06, "loss": 1.1495, "step": 1149000 }, { "epoch": 2.74, "learning_rate": 1.7620016579998496e-06, "loss": 1.14, "step": 1149500 }, { "epoch": 2.74, "learning_rate": 1.7540686443669654e-06, "loss": 1.1577, "step": 1150000 }, { "epoch": 2.74, "learning_rate": 1.7461356307340815e-06, "loss": 1.1323, "step": 1150500 }, { "epoch": 2.74, "learning_rate": 1.7382026171011976e-06, "loss": 1.1407, "step": 1151000 }, { "epoch": 2.74, "learning_rate": 1.7302696034683136e-06, "loss": 1.1487, "step": 1151500 }, { "epoch": 2.74, "learning_rate": 1.7223365898354297e-06, "loss": 1.1521, "step": 1152000 }, { "epoch": 2.74, "learning_rate": 1.7144035762025458e-06, "loss": 1.1614, "step": 1152500 }, { "epoch": 2.74, "learning_rate": 1.706470562569662e-06, "loss": 1.1469, "step": 1153000 }, { "epoch": 2.75, "learning_rate": 1.6985375489367782e-06, "loss": 1.155, "step": 1153500 }, { "epoch": 2.75, "learning_rate": 1.690604535303894e-06, "loss": 1.1583, "step": 1154000 }, { "epoch": 2.75, "learning_rate": 1.6826715216710101e-06, "loss": 1.1408, "step": 1154500 }, { "epoch": 2.75, "learning_rate": 1.6747385080381262e-06, "loss": 1.152, "step": 1155000 }, { "epoch": 2.75, "learning_rate": 1.6668054944052423e-06, "loss": 1.1549, "step": 1155500 }, { "epoch": 2.75, "learning_rate": 1.6588724807723583e-06, "loss": 1.1553, "step": 1156000 }, { "epoch": 2.75, "learning_rate": 1.6509394671394744e-06, "loss": 1.1451, "step": 1156500 }, { "epoch": 2.75, "learning_rate": 1.6430064535065903e-06, "loss": 1.1408, "step": 1157000 }, { "epoch": 2.75, "learning_rate": 1.6350734398737064e-06, "loss": 1.151, "step": 1157500 }, { "epoch": 2.76, "learning_rate": 1.6271404262408226e-06, "loss": 1.1568, "step": 1158000 }, { "epoch": 2.76, "learning_rate": 1.6192074126079387e-06, "loss": 1.1528, "step": 1158500 }, { "epoch": 2.76, "learning_rate": 1.6112743989750548e-06, "loss": 1.152, "step": 1159000 }, { "epoch": 2.76, "learning_rate": 1.6033413853421709e-06, "loss": 1.149, "step": 1159500 }, { "epoch": 2.76, "learning_rate": 1.595408371709287e-06, "loss": 1.1235, "step": 1160000 }, { "epoch": 2.76, "learning_rate": 1.587475358076403e-06, "loss": 1.1339, "step": 1160500 }, { "epoch": 2.76, "learning_rate": 1.579542344443519e-06, "loss": 1.1548, "step": 1161000 }, { "epoch": 2.76, "learning_rate": 1.571609330810635e-06, "loss": 1.1462, "step": 1161500 }, { "epoch": 2.77, "learning_rate": 1.563676317177751e-06, "loss": 1.1392, "step": 1162000 }, { "epoch": 2.77, "learning_rate": 1.5557433035448671e-06, "loss": 1.1461, "step": 1162500 }, { "epoch": 2.77, "learning_rate": 1.5478102899119834e-06, "loss": 1.1394, "step": 1163000 }, { "epoch": 2.77, "learning_rate": 1.5398772762790995e-06, "loss": 1.1375, "step": 1163500 }, { "epoch": 2.77, "learning_rate": 1.5319442626462156e-06, "loss": 1.1464, "step": 1164000 }, { "epoch": 2.77, "learning_rate": 1.5240112490133316e-06, "loss": 1.1496, "step": 1164500 }, { "epoch": 2.77, "learning_rate": 1.5160782353804477e-06, "loss": 1.156, "step": 1165000 }, { "epoch": 2.77, "learning_rate": 1.5081452217475636e-06, "loss": 1.1427, "step": 1165500 }, { "epoch": 2.77, "learning_rate": 1.5002122081146797e-06, "loss": 1.1399, "step": 1166000 }, { "epoch": 2.78, "learning_rate": 1.4922791944817957e-06, "loss": 1.1335, "step": 1166500 }, { "epoch": 2.78, "learning_rate": 1.4843461808489118e-06, "loss": 1.133, "step": 1167000 }, { "epoch": 2.78, "learning_rate": 1.4764131672160279e-06, "loss": 1.1617, "step": 1167500 }, { "epoch": 2.78, "learning_rate": 1.4684801535831442e-06, "loss": 1.1388, "step": 1168000 }, { "epoch": 2.78, "learning_rate": 1.4605471399502603e-06, "loss": 1.1378, "step": 1168500 }, { "epoch": 2.78, "learning_rate": 1.4526141263173763e-06, "loss": 1.1484, "step": 1169000 }, { "epoch": 2.78, "learning_rate": 1.4446811126844922e-06, "loss": 1.1449, "step": 1169500 }, { "epoch": 2.78, "learning_rate": 1.4367480990516083e-06, "loss": 1.1451, "step": 1170000 }, { "epoch": 2.79, "learning_rate": 1.4288150854187243e-06, "loss": 1.1632, "step": 1170500 }, { "epoch": 2.79, "learning_rate": 1.4208820717858404e-06, "loss": 1.1472, "step": 1171000 }, { "epoch": 2.79, "learning_rate": 1.4129490581529565e-06, "loss": 1.1508, "step": 1171500 }, { "epoch": 2.79, "learning_rate": 1.4050160445200726e-06, "loss": 1.1391, "step": 1172000 }, { "epoch": 2.79, "learning_rate": 1.3970830308871889e-06, "loss": 1.1599, "step": 1172500 }, { "epoch": 2.79, "learning_rate": 1.389150017254305e-06, "loss": 1.1427, "step": 1173000 }, { "epoch": 2.79, "learning_rate": 1.381217003621421e-06, "loss": 1.15, "step": 1173500 }, { "epoch": 2.79, "learning_rate": 1.3732839899885369e-06, "loss": 1.1542, "step": 1174000 }, { "epoch": 2.8, "learning_rate": 1.365350976355653e-06, "loss": 1.1528, "step": 1174500 }, { "epoch": 2.8, "learning_rate": 1.357417962722769e-06, "loss": 1.1424, "step": 1175000 }, { "epoch": 2.8, "learning_rate": 1.3494849490898851e-06, "loss": 1.1195, "step": 1175500 }, { "epoch": 2.8, "learning_rate": 1.3415519354570012e-06, "loss": 1.1542, "step": 1176000 }, { "epoch": 2.8, "learning_rate": 1.3336189218241173e-06, "loss": 1.1437, "step": 1176500 }, { "epoch": 2.8, "learning_rate": 1.3256859081912331e-06, "loss": 1.1434, "step": 1177000 }, { "epoch": 2.8, "learning_rate": 1.3177528945583496e-06, "loss": 1.1357, "step": 1177500 }, { "epoch": 2.8, "learning_rate": 1.3098198809254655e-06, "loss": 1.15, "step": 1178000 }, { "epoch": 2.8, "learning_rate": 1.3018868672925816e-06, "loss": 1.1593, "step": 1178500 }, { "epoch": 2.81, "learning_rate": 1.2939538536596977e-06, "loss": 1.1398, "step": 1179000 }, { "epoch": 2.81, "learning_rate": 1.2860208400268137e-06, "loss": 1.142, "step": 1179500 }, { "epoch": 2.81, "learning_rate": 1.2780878263939298e-06, "loss": 1.1422, "step": 1180000 }, { "epoch": 2.81, "learning_rate": 1.2701548127610459e-06, "loss": 1.1652, "step": 1180500 }, { "epoch": 2.81, "learning_rate": 1.2622217991281617e-06, "loss": 1.1397, "step": 1181000 }, { "epoch": 2.81, "learning_rate": 1.2542887854952778e-06, "loss": 1.1315, "step": 1181500 }, { "epoch": 2.81, "learning_rate": 1.2463557718623941e-06, "loss": 1.1462, "step": 1182000 }, { "epoch": 2.81, "learning_rate": 1.2384227582295102e-06, "loss": 1.1303, "step": 1182500 }, { "epoch": 2.82, "learning_rate": 1.230489744596626e-06, "loss": 1.1381, "step": 1183000 }, { "epoch": 2.82, "learning_rate": 1.2225567309637421e-06, "loss": 1.1645, "step": 1183500 }, { "epoch": 2.82, "learning_rate": 1.2146237173308584e-06, "loss": 1.1457, "step": 1184000 }, { "epoch": 2.82, "learning_rate": 1.2066907036979745e-06, "loss": 1.1397, "step": 1184500 }, { "epoch": 2.82, "learning_rate": 1.1987576900650904e-06, "loss": 1.1473, "step": 1185000 }, { "epoch": 2.82, "learning_rate": 1.1908246764322064e-06, "loss": 1.1547, "step": 1185500 }, { "epoch": 2.82, "learning_rate": 1.1828916627993225e-06, "loss": 1.1382, "step": 1186000 }, { "epoch": 2.82, "learning_rate": 1.1749586491664388e-06, "loss": 1.144, "step": 1186500 }, { "epoch": 2.82, "learning_rate": 1.1670256355335549e-06, "loss": 1.1461, "step": 1187000 }, { "epoch": 2.83, "learning_rate": 1.1590926219006707e-06, "loss": 1.1598, "step": 1187500 }, { "epoch": 2.83, "learning_rate": 1.1511596082677868e-06, "loss": 1.1352, "step": 1188000 }, { "epoch": 2.83, "learning_rate": 1.1432265946349031e-06, "loss": 1.1369, "step": 1188500 }, { "epoch": 2.83, "learning_rate": 1.1352935810020192e-06, "loss": 1.1548, "step": 1189000 }, { "epoch": 2.83, "learning_rate": 1.127360567369135e-06, "loss": 1.1429, "step": 1189500 }, { "epoch": 2.83, "learning_rate": 1.1194275537362511e-06, "loss": 1.1349, "step": 1190000 }, { "epoch": 2.83, "learning_rate": 1.1114945401033672e-06, "loss": 1.15, "step": 1190500 }, { "epoch": 2.83, "learning_rate": 1.1035615264704835e-06, "loss": 1.1365, "step": 1191000 }, { "epoch": 2.84, "learning_rate": 1.0956285128375994e-06, "loss": 1.1535, "step": 1191500 }, { "epoch": 2.84, "learning_rate": 1.0876954992047154e-06, "loss": 1.144, "step": 1192000 }, { "epoch": 2.84, "learning_rate": 1.0797624855718315e-06, "loss": 1.153, "step": 1192500 }, { "epoch": 2.84, "learning_rate": 1.0718294719389476e-06, "loss": 1.155, "step": 1193000 }, { "epoch": 2.84, "learning_rate": 1.0638964583060637e-06, "loss": 1.149, "step": 1193500 }, { "epoch": 2.84, "learning_rate": 1.0559634446731797e-06, "loss": 1.1501, "step": 1194000 }, { "epoch": 2.84, "learning_rate": 1.0480304310402958e-06, "loss": 1.1324, "step": 1194500 }, { "epoch": 2.84, "learning_rate": 1.0400974174074119e-06, "loss": 1.1415, "step": 1195000 }, { "epoch": 2.85, "learning_rate": 1.032164403774528e-06, "loss": 1.1606, "step": 1195500 }, { "epoch": 2.85, "learning_rate": 1.024231390141644e-06, "loss": 1.1448, "step": 1196000 }, { "epoch": 2.85, "learning_rate": 1.0162983765087601e-06, "loss": 1.1527, "step": 1196500 }, { "epoch": 2.85, "learning_rate": 1.0083653628758762e-06, "loss": 1.1392, "step": 1197000 }, { "epoch": 2.85, "learning_rate": 1.0004323492429923e-06, "loss": 1.1395, "step": 1197500 }, { "epoch": 2.85, "learning_rate": 9.924993356101084e-07, "loss": 1.1415, "step": 1198000 }, { "epoch": 2.85, "learning_rate": 9.845663219772244e-07, "loss": 1.1452, "step": 1198500 }, { "epoch": 2.85, "learning_rate": 9.766333083443405e-07, "loss": 1.1166, "step": 1199000 }, { "epoch": 2.85, "learning_rate": 9.687002947114566e-07, "loss": 1.1494, "step": 1199500 }, { "epoch": 2.86, "learning_rate": 9.607672810785727e-07, "loss": 1.1373, "step": 1200000 }, { "epoch": 2.86, "learning_rate": 9.528342674456886e-07, "loss": 1.1255, "step": 1200500 }, { "epoch": 2.86, "learning_rate": 9.449012538128048e-07, "loss": 1.1561, "step": 1201000 }, { "epoch": 2.86, "learning_rate": 9.369682401799209e-07, "loss": 1.1426, "step": 1201500 }, { "epoch": 2.86, "learning_rate": 9.290352265470369e-07, "loss": 1.1391, "step": 1202000 }, { "epoch": 2.86, "learning_rate": 9.211022129141529e-07, "loss": 1.1552, "step": 1202500 }, { "epoch": 2.86, "learning_rate": 9.13169199281269e-07, "loss": 1.1487, "step": 1203000 }, { "epoch": 2.86, "learning_rate": 9.052361856483852e-07, "loss": 1.1381, "step": 1203500 }, { "epoch": 2.87, "learning_rate": 8.973031720155012e-07, "loss": 1.1335, "step": 1204000 }, { "epoch": 2.87, "learning_rate": 8.893701583826172e-07, "loss": 1.1576, "step": 1204500 }, { "epoch": 2.87, "learning_rate": 8.814371447497333e-07, "loss": 1.1522, "step": 1205000 }, { "epoch": 2.87, "learning_rate": 8.735041311168493e-07, "loss": 1.1431, "step": 1205500 }, { "epoch": 2.87, "learning_rate": 8.655711174839655e-07, "loss": 1.1496, "step": 1206000 }, { "epoch": 2.87, "learning_rate": 8.576381038510815e-07, "loss": 1.1481, "step": 1206500 }, { "epoch": 2.87, "learning_rate": 8.497050902181976e-07, "loss": 1.1362, "step": 1207000 }, { "epoch": 2.87, "learning_rate": 8.417720765853137e-07, "loss": 1.1626, "step": 1207500 }, { "epoch": 2.87, "learning_rate": 8.338390629524297e-07, "loss": 1.1314, "step": 1208000 }, { "epoch": 2.88, "learning_rate": 8.259060493195459e-07, "loss": 1.1469, "step": 1208500 }, { "epoch": 2.88, "learning_rate": 8.179730356866619e-07, "loss": 1.1418, "step": 1209000 }, { "epoch": 2.88, "learning_rate": 8.10040022053778e-07, "loss": 1.1524, "step": 1209500 }, { "epoch": 2.88, "learning_rate": 8.02107008420894e-07, "loss": 1.1509, "step": 1210000 }, { "epoch": 2.88, "learning_rate": 7.941739947880101e-07, "loss": 1.1392, "step": 1210500 }, { "epoch": 2.88, "learning_rate": 7.862409811551262e-07, "loss": 1.1414, "step": 1211000 }, { "epoch": 2.88, "learning_rate": 7.783079675222423e-07, "loss": 1.1357, "step": 1211500 }, { "epoch": 2.88, "learning_rate": 7.703749538893583e-07, "loss": 1.1481, "step": 1212000 }, { "epoch": 2.89, "learning_rate": 7.624419402564744e-07, "loss": 1.1617, "step": 1212500 }, { "epoch": 2.89, "learning_rate": 7.545089266235904e-07, "loss": 1.1444, "step": 1213000 }, { "epoch": 2.89, "learning_rate": 7.465759129907066e-07, "loss": 1.155, "step": 1213500 }, { "epoch": 2.89, "learning_rate": 7.386428993578226e-07, "loss": 1.129, "step": 1214000 }, { "epoch": 2.89, "learning_rate": 7.307098857249387e-07, "loss": 1.154, "step": 1214500 }, { "epoch": 2.89, "learning_rate": 7.227768720920547e-07, "loss": 1.1496, "step": 1215000 }, { "epoch": 2.89, "learning_rate": 7.148438584591707e-07, "loss": 1.1322, "step": 1215500 }, { "epoch": 2.89, "learning_rate": 7.069108448262869e-07, "loss": 1.1548, "step": 1216000 }, { "epoch": 2.9, "learning_rate": 6.98977831193403e-07, "loss": 1.1403, "step": 1216500 }, { "epoch": 2.9, "learning_rate": 6.91044817560519e-07, "loss": 1.1508, "step": 1217000 }, { "epoch": 2.9, "learning_rate": 6.83111803927635e-07, "loss": 1.1455, "step": 1217500 }, { "epoch": 2.9, "learning_rate": 6.751787902947511e-07, "loss": 1.1381, "step": 1218000 }, { "epoch": 2.9, "learning_rate": 6.672457766618673e-07, "loss": 1.1509, "step": 1218500 }, { "epoch": 2.9, "learning_rate": 6.593127630289834e-07, "loss": 1.1373, "step": 1219000 }, { "epoch": 2.9, "learning_rate": 6.513797493960993e-07, "loss": 1.1482, "step": 1219500 }, { "epoch": 2.9, "learning_rate": 6.434467357632154e-07, "loss": 1.1388, "step": 1220000 }, { "epoch": 2.9, "learning_rate": 6.355137221303315e-07, "loss": 1.1422, "step": 1220500 }, { "epoch": 2.91, "learning_rate": 6.275807084974477e-07, "loss": 1.1342, "step": 1221000 }, { "epoch": 2.91, "learning_rate": 6.196476948645637e-07, "loss": 1.1466, "step": 1221500 }, { "epoch": 2.91, "learning_rate": 6.117146812316797e-07, "loss": 1.1359, "step": 1222000 }, { "epoch": 2.91, "learning_rate": 6.037816675987959e-07, "loss": 1.1504, "step": 1222500 }, { "epoch": 2.91, "learning_rate": 5.958486539659119e-07, "loss": 1.1369, "step": 1223000 }, { "epoch": 2.91, "learning_rate": 5.879156403330279e-07, "loss": 1.1404, "step": 1223500 }, { "epoch": 2.91, "learning_rate": 5.79982626700144e-07, "loss": 1.1438, "step": 1224000 }, { "epoch": 2.91, "learning_rate": 5.720496130672601e-07, "loss": 1.1355, "step": 1224500 }, { "epoch": 2.92, "learning_rate": 5.641165994343762e-07, "loss": 1.1538, "step": 1225000 }, { "epoch": 2.92, "learning_rate": 5.561835858014922e-07, "loss": 1.1396, "step": 1225500 }, { "epoch": 2.92, "learning_rate": 5.482505721686083e-07, "loss": 1.1504, "step": 1226000 }, { "epoch": 2.92, "learning_rate": 5.403175585357244e-07, "loss": 1.1428, "step": 1226500 }, { "epoch": 2.92, "learning_rate": 5.323845449028405e-07, "loss": 1.1513, "step": 1227000 }, { "epoch": 2.92, "learning_rate": 5.244515312699566e-07, "loss": 1.1489, "step": 1227500 }, { "epoch": 2.92, "learning_rate": 5.165185176370726e-07, "loss": 1.1469, "step": 1228000 }, { "epoch": 2.92, "learning_rate": 5.085855040041886e-07, "loss": 1.1604, "step": 1228500 }, { "epoch": 2.92, "learning_rate": 5.006524903713048e-07, "loss": 1.1446, "step": 1229000 }, { "epoch": 2.93, "learning_rate": 4.927194767384208e-07, "loss": 1.1386, "step": 1229500 }, { "epoch": 2.93, "learning_rate": 4.847864631055369e-07, "loss": 1.1435, "step": 1230000 }, { "epoch": 2.93, "learning_rate": 4.768534494726529e-07, "loss": 1.1456, "step": 1230500 }, { "epoch": 2.93, "learning_rate": 4.68920435839769e-07, "loss": 1.1422, "step": 1231000 }, { "epoch": 2.93, "learning_rate": 4.609874222068851e-07, "loss": 1.1365, "step": 1231500 }, { "epoch": 2.93, "learning_rate": 4.5305440857400114e-07, "loss": 1.1592, "step": 1232000 }, { "epoch": 2.93, "learning_rate": 4.4512139494111727e-07, "loss": 1.171, "step": 1232500 }, { "epoch": 2.93, "learning_rate": 4.371883813082333e-07, "loss": 1.1478, "step": 1233000 }, { "epoch": 2.94, "learning_rate": 4.2925536767534937e-07, "loss": 1.1486, "step": 1233500 }, { "epoch": 2.94, "learning_rate": 4.2132235404246545e-07, "loss": 1.1485, "step": 1234000 }, { "epoch": 2.94, "learning_rate": 4.133893404095815e-07, "loss": 1.131, "step": 1234500 }, { "epoch": 2.94, "learning_rate": 4.054563267766976e-07, "loss": 1.1415, "step": 1235000 }, { "epoch": 2.94, "learning_rate": 3.975233131438137e-07, "loss": 1.1492, "step": 1235500 }, { "epoch": 2.94, "learning_rate": 3.8959029951092975e-07, "loss": 1.1399, "step": 1236000 }, { "epoch": 2.94, "learning_rate": 3.8165728587804583e-07, "loss": 1.1325, "step": 1236500 }, { "epoch": 2.94, "learning_rate": 3.7372427224516185e-07, "loss": 1.1632, "step": 1237000 }, { "epoch": 2.95, "learning_rate": 3.65791258612278e-07, "loss": 1.1534, "step": 1237500 }, { "epoch": 2.95, "learning_rate": 3.57858244979394e-07, "loss": 1.1527, "step": 1238000 }, { "epoch": 2.95, "learning_rate": 3.4992523134651014e-07, "loss": 1.1318, "step": 1238500 }, { "epoch": 2.95, "learning_rate": 3.4199221771362616e-07, "loss": 1.1535, "step": 1239000 }, { "epoch": 2.95, "learning_rate": 3.3405920408074224e-07, "loss": 1.1368, "step": 1239500 }, { "epoch": 2.95, "learning_rate": 3.261261904478583e-07, "loss": 1.1448, "step": 1240000 }, { "epoch": 2.95, "learning_rate": 3.181931768149744e-07, "loss": 1.1597, "step": 1240500 }, { "epoch": 2.95, "learning_rate": 3.1026016318209047e-07, "loss": 1.149, "step": 1241000 }, { "epoch": 2.95, "learning_rate": 3.0232714954920654e-07, "loss": 1.1383, "step": 1241500 }, { "epoch": 2.96, "learning_rate": 2.943941359163226e-07, "loss": 1.1288, "step": 1242000 }, { "epoch": 2.96, "learning_rate": 2.864611222834387e-07, "loss": 1.1397, "step": 1242500 }, { "epoch": 2.96, "learning_rate": 2.785281086505547e-07, "loss": 1.1452, "step": 1243000 }, { "epoch": 2.96, "learning_rate": 2.705950950176708e-07, "loss": 1.1439, "step": 1243500 }, { "epoch": 2.96, "learning_rate": 2.626620813847869e-07, "loss": 1.144, "step": 1244000 }, { "epoch": 2.96, "learning_rate": 2.5472906775190295e-07, "loss": 1.139, "step": 1244500 }, { "epoch": 2.96, "learning_rate": 2.4679605411901903e-07, "loss": 1.138, "step": 1245000 }, { "epoch": 2.96, "learning_rate": 2.388630404861351e-07, "loss": 1.1524, "step": 1245500 }, { "epoch": 2.97, "learning_rate": 2.3093002685325115e-07, "loss": 1.145, "step": 1246000 }, { "epoch": 2.97, "learning_rate": 2.2299701322036723e-07, "loss": 1.1471, "step": 1246500 }, { "epoch": 2.97, "learning_rate": 2.150639995874833e-07, "loss": 1.1336, "step": 1247000 }, { "epoch": 2.97, "learning_rate": 2.0713098595459938e-07, "loss": 1.1451, "step": 1247500 }, { "epoch": 2.97, "learning_rate": 1.9919797232171543e-07, "loss": 1.1405, "step": 1248000 }, { "epoch": 2.97, "learning_rate": 1.912649586888315e-07, "loss": 1.1174, "step": 1248500 }, { "epoch": 2.97, "learning_rate": 1.833319450559476e-07, "loss": 1.1498, "step": 1249000 }, { "epoch": 2.97, "learning_rate": 1.7539893142306366e-07, "loss": 1.1536, "step": 1249500 }, { "epoch": 2.97, "learning_rate": 1.6746591779017974e-07, "loss": 1.1362, "step": 1250000 }, { "epoch": 2.98, "learning_rate": 1.5953290415729582e-07, "loss": 1.1453, "step": 1250500 }, { "epoch": 2.98, "learning_rate": 1.515998905244119e-07, "loss": 1.133, "step": 1251000 }, { "epoch": 2.98, "learning_rate": 1.4366687689152794e-07, "loss": 1.1544, "step": 1251500 }, { "epoch": 2.98, "learning_rate": 1.3573386325864402e-07, "loss": 1.1403, "step": 1252000 }, { "epoch": 2.98, "learning_rate": 1.2780084962576007e-07, "loss": 1.1533, "step": 1252500 }, { "epoch": 2.98, "learning_rate": 1.1986783599287617e-07, "loss": 1.1512, "step": 1253000 }, { "epoch": 2.98, "learning_rate": 1.1193482235999224e-07, "loss": 1.1531, "step": 1253500 }, { "epoch": 2.98, "learning_rate": 1.040018087271083e-07, "loss": 1.1366, "step": 1254000 }, { "epoch": 2.99, "learning_rate": 9.606879509422438e-08, "loss": 1.1419, "step": 1254500 }, { "epoch": 2.99, "learning_rate": 8.813578146134045e-08, "loss": 1.1226, "step": 1255000 }, { "epoch": 2.99, "learning_rate": 8.020276782845652e-08, "loss": 1.1279, "step": 1255500 }, { "epoch": 2.99, "learning_rate": 7.22697541955726e-08, "loss": 1.1539, "step": 1256000 }, { "epoch": 2.99, "learning_rate": 6.433674056268866e-08, "loss": 1.1498, "step": 1256500 }, { "epoch": 2.99, "learning_rate": 5.6403726929804734e-08, "loss": 1.133, "step": 1257000 }, { "epoch": 2.99, "learning_rate": 4.8470713296920804e-08, "loss": 1.1432, "step": 1257500 }, { "epoch": 2.99, "learning_rate": 4.053769966403688e-08, "loss": 1.1493, "step": 1258000 }, { "epoch": 3.0, "learning_rate": 3.260468603115295e-08, "loss": 1.1356, "step": 1258500 }, { "epoch": 3.0, "learning_rate": 2.4671672398269018e-08, "loss": 1.1405, "step": 1259000 }, { "epoch": 3.0, "learning_rate": 1.6738658765385088e-08, "loss": 1.148, "step": 1259500 }, { "epoch": 3.0, "learning_rate": 8.805645132501161e-09, "loss": 1.1409, "step": 1260000 }, { "epoch": 3.0, "learning_rate": 8.726314996172322e-10, "loss": 1.1348, "step": 1260500 }, { "epoch": 3.0, "eval_loss": 1.1232123374938965, "eval_runtime": 3625.9093, "eval_samples_per_second": 366.127, "eval_steps_per_second": 22.883, "step": 1260555 } ], "max_steps": 1260555, "num_train_epochs": 3, "total_flos": 2.738376510217363e+18, "trial_name": null, "trial_params": null }